diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2020-04-07 15:57:58 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2020-04-07 15:57:58 -0400 |
commit | 26a944cf29ba67bb49f42656dd2be98fe2485f5f (patch) | |
tree | d234713a62696aeb655b27b3c5756588809fa588 /src/backend/utils/adt/encode.c | |
parent | 9c74ceb20b991f786f71666d4b4d557d2744a567 (diff) | |
download | postgresql-26a944cf29ba67bb49f42656dd2be98fe2485f5f.tar.gz postgresql-26a944cf29ba67bb49f42656dd2be98fe2485f5f.zip |
Adjust bytea get_bit/set_bit to use int8 not int4 for bit numbering.
Since the existing bit number argument can't exceed INT32_MAX, it's
not possible for these functions to manipulate bits beyond the first
256MB of a bytea value. Lift that restriction by redeclaring the
bit number arguments as int8 (which requires a catversion bump,
hence is not back-patchable).
The similarly-named functions for bit/varbit don't really have a
problem because we restrict those types to at most VARBITMAXLEN bits;
hence leave them alone.
While here, extend the encode/decode functions in utils/adt/encode.c
to allow dealing with values wider than 1GB. This is not a live bug
or restriction in current usage, because no input could be more than
1GB, and since none of the encoders can expand a string more than 4X,
the result size couldn't overflow uint32. But it might be desirable
to support more in future, so make the input length values size_t
and the potential-output-length values uint64.
Also add some test cases to improve the miserable code coverage
of these functions.
Movead Li, editorialized some by me; also reviewed by Ashutosh Bapat
Discussion: https://postgr.es/m/20200312115135445367128@highgo.ca
Diffstat (limited to 'src/backend/utils/adt/encode.c')
-rw-r--r-- | src/backend/utils/adt/encode.c | 136 |
1 files changed, 85 insertions, 51 deletions
diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c index b8d9ec7e00a..61d318d93ca 100644 --- a/src/backend/utils/adt/encode.c +++ b/src/backend/utils/adt/encode.c @@ -16,14 +16,24 @@ #include <ctype.h> #include "utils/builtins.h" +#include "utils/memutils.h" +/* + * Encoding conversion API. + * encode_len() and decode_len() compute the amount of space needed, while + * encode() and decode() perform the actual conversions. It is okay for + * the _len functions to return an overestimate, but not an underestimate. + * (Having said that, large overestimates could cause unnecessary errors, + * so it's better to get it right.) The conversion routines write to the + * buffer at *res and return the true length of their output. + */ struct pg_encoding { - unsigned (*encode_len) (const char *data, unsigned dlen); - unsigned (*decode_len) (const char *data, unsigned dlen); - unsigned (*encode) (const char *data, unsigned dlen, char *res); - unsigned (*decode) (const char *data, unsigned dlen, char *res); + uint64 (*encode_len) (const char *data, size_t dlen); + uint64 (*decode_len) (const char *data, size_t dlen); + uint64 (*encode) (const char *data, size_t dlen, char *res); + uint64 (*decode) (const char *data, size_t dlen, char *res); }; static const struct pg_encoding *pg_find_encoding(const char *name); @@ -39,13 +49,12 @@ binary_encode(PG_FUNCTION_ARGS) Datum name = PG_GETARG_DATUM(1); text *result; char *namebuf; - int datalen, - resultlen, - res; + char *dataptr; + size_t datalen; + uint64 resultlen; + uint64 res; const struct pg_encoding *enc; - datalen = VARSIZE_ANY_EXHDR(data); - namebuf = TextDatumGetCString(name); enc = pg_find_encoding(namebuf); @@ -54,10 +63,23 @@ binary_encode(PG_FUNCTION_ARGS) (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("unrecognized encoding: \"%s\"", namebuf))); - resultlen = enc->encode_len(VARDATA_ANY(data), datalen); + dataptr = VARDATA_ANY(data); + datalen = VARSIZE_ANY_EXHDR(data); + + resultlen = enc->encode_len(dataptr, datalen); + + /* + * resultlen possibly overflows uint32, therefore on 32-bit machines it's + * unsafe to rely on palloc's internal check. + */ + if (resultlen > MaxAllocSize - VARHDRSZ) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("result of encoding conversion is too large"))); + result = palloc(VARHDRSZ + resultlen); - res = enc->encode(VARDATA_ANY(data), datalen, VARDATA(result)); + res = enc->encode(dataptr, datalen, VARDATA(result)); /* Make this FATAL 'cause we've trodden on memory ... */ if (res > resultlen) @@ -75,13 +97,12 @@ binary_decode(PG_FUNCTION_ARGS) Datum name = PG_GETARG_DATUM(1); bytea *result; char *namebuf; - int datalen, - resultlen, - res; + char *dataptr; + size_t datalen; + uint64 resultlen; + uint64 res; const struct pg_encoding *enc; - datalen = VARSIZE_ANY_EXHDR(data); - namebuf = TextDatumGetCString(name); enc = pg_find_encoding(namebuf); @@ -90,10 +111,23 @@ binary_decode(PG_FUNCTION_ARGS) (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("unrecognized encoding: \"%s\"", namebuf))); - resultlen = enc->decode_len(VARDATA_ANY(data), datalen); + dataptr = VARDATA_ANY(data); + datalen = VARSIZE_ANY_EXHDR(data); + + resultlen = enc->decode_len(dataptr, datalen); + + /* + * resultlen possibly overflows uint32, therefore on 32-bit machines it's + * unsafe to rely on palloc's internal check. + */ + if (resultlen > MaxAllocSize - VARHDRSZ) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("result of decoding conversion is too large"))); + result = palloc(VARHDRSZ + resultlen); - res = enc->decode(VARDATA_ANY(data), datalen, VARDATA(result)); + res = enc->decode(dataptr, datalen, VARDATA(result)); /* Make this FATAL 'cause we've trodden on memory ... */ if (res > resultlen) @@ -122,8 +156,8 @@ static const int8 hexlookup[128] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, }; -unsigned -hex_encode(const char *src, unsigned len, char *dst) +uint64 +hex_encode(const char *src, size_t len, char *dst) { const char *end = src + len; @@ -133,7 +167,7 @@ hex_encode(const char *src, unsigned len, char *dst) *dst++ = hextbl[*src & 0xF]; src++; } - return len * 2; + return (uint64) len * 2; } static inline char @@ -152,8 +186,8 @@ get_hex(char c) return (char) res; } -unsigned -hex_decode(const char *src, unsigned len, char *dst) +uint64 +hex_decode(const char *src, size_t len, char *dst) { const char *s, *srcend; @@ -184,16 +218,16 @@ hex_decode(const char *src, unsigned len, char *dst) return p - dst; } -static unsigned -hex_enc_len(const char *src, unsigned srclen) +static uint64 +hex_enc_len(const char *src, size_t srclen) { - return srclen << 1; + return (uint64) srclen << 1; } -static unsigned -hex_dec_len(const char *src, unsigned srclen) +static uint64 +hex_dec_len(const char *src, size_t srclen) { - return srclen >> 1; + return (uint64) srclen >> 1; } /* @@ -214,8 +248,8 @@ static const int8 b64lookup[128] = { 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, }; -static unsigned -pg_base64_encode(const char *src, unsigned len, char *dst) +static uint64 +pg_base64_encode(const char *src, size_t len, char *dst) { char *p, *lend = dst + 76; @@ -261,8 +295,8 @@ pg_base64_encode(const char *src, unsigned len, char *dst) return p - dst; } -static unsigned -pg_base64_decode(const char *src, unsigned len, char *dst) +static uint64 +pg_base64_decode(const char *src, size_t len, char *dst) { const char *srcend = src + len, *s = src; @@ -331,17 +365,17 @@ pg_base64_decode(const char *src, unsigned len, char *dst) } -static unsigned -pg_base64_enc_len(const char *src, unsigned srclen) +static uint64 +pg_base64_enc_len(const char *src, size_t srclen) { /* 3 bytes will be converted to 4, linefeed after 76 chars */ - return (srclen + 2) * 4 / 3 + srclen / (76 * 3 / 4); + return ((uint64) srclen + 2) * 4 / 3 + (uint64) srclen / (76 * 3 / 4); } -static unsigned -pg_base64_dec_len(const char *src, unsigned srclen) +static uint64 +pg_base64_dec_len(const char *src, size_t srclen) { - return (srclen * 3) >> 2; + return ((uint64) srclen * 3) >> 2; } /* @@ -361,12 +395,12 @@ pg_base64_dec_len(const char *src, unsigned srclen) #define VAL(CH) ((CH) - '0') #define DIG(VAL) ((VAL) + '0') -static unsigned -esc_encode(const char *src, unsigned srclen, char *dst) +static uint64 +esc_encode(const char *src, size_t srclen, char *dst) { const char *end = src + srclen; char *rp = dst; - int len = 0; + uint64 len = 0; while (src < end) { @@ -400,12 +434,12 @@ esc_encode(const char *src, unsigned srclen, char *dst) return len; } -static unsigned -esc_decode(const char *src, unsigned srclen, char *dst) +static uint64 +esc_decode(const char *src, size_t srclen, char *dst) { const char *end = src + srclen; char *rp = dst; - int len = 0; + uint64 len = 0; while (src < end) { @@ -448,11 +482,11 @@ esc_decode(const char *src, unsigned srclen, char *dst) return len; } -static unsigned -esc_enc_len(const char *src, unsigned srclen) +static uint64 +esc_enc_len(const char *src, size_t srclen) { const char *end = src + srclen; - int len = 0; + uint64 len = 0; while (src < end) { @@ -469,11 +503,11 @@ esc_enc_len(const char *src, unsigned srclen) return len; } -static unsigned -esc_dec_len(const char *src, unsigned srclen) +static uint64 +esc_dec_len(const char *src, size_t srclen) { const char *end = src + srclen; - int len = 0; + uint64 len = 0; while (src < end) { |