diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/utils/adt/encode.c | 136 | ||||
-rw-r--r-- | src/backend/utils/adt/varlena.c | 39 | ||||
-rw-r--r-- | src/include/catalog/catversion.h | 2 | ||||
-rw-r--r-- | src/include/catalog/pg_proc.dat | 4 | ||||
-rw-r--r-- | src/include/utils/builtins.h | 4 | ||||
-rw-r--r-- | src/test/regress/expected/strings.out | 76 | ||||
-rw-r--r-- | src/test/regress/sql/strings.sql | 23 |
7 files changed, 215 insertions, 69 deletions
diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c index b8d9ec7e00a..61d318d93ca 100644 --- a/src/backend/utils/adt/encode.c +++ b/src/backend/utils/adt/encode.c @@ -16,14 +16,24 @@ #include <ctype.h> #include "utils/builtins.h" +#include "utils/memutils.h" +/* + * Encoding conversion API. + * encode_len() and decode_len() compute the amount of space needed, while + * encode() and decode() perform the actual conversions. It is okay for + * the _len functions to return an overestimate, but not an underestimate. + * (Having said that, large overestimates could cause unnecessary errors, + * so it's better to get it right.) The conversion routines write to the + * buffer at *res and return the true length of their output. + */ struct pg_encoding { - unsigned (*encode_len) (const char *data, unsigned dlen); - unsigned (*decode_len) (const char *data, unsigned dlen); - unsigned (*encode) (const char *data, unsigned dlen, char *res); - unsigned (*decode) (const char *data, unsigned dlen, char *res); + uint64 (*encode_len) (const char *data, size_t dlen); + uint64 (*decode_len) (const char *data, size_t dlen); + uint64 (*encode) (const char *data, size_t dlen, char *res); + uint64 (*decode) (const char *data, size_t dlen, char *res); }; static const struct pg_encoding *pg_find_encoding(const char *name); @@ -39,13 +49,12 @@ binary_encode(PG_FUNCTION_ARGS) Datum name = PG_GETARG_DATUM(1); text *result; char *namebuf; - int datalen, - resultlen, - res; + char *dataptr; + size_t datalen; + uint64 resultlen; + uint64 res; const struct pg_encoding *enc; - datalen = VARSIZE_ANY_EXHDR(data); - namebuf = TextDatumGetCString(name); enc = pg_find_encoding(namebuf); @@ -54,10 +63,23 @@ binary_encode(PG_FUNCTION_ARGS) (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("unrecognized encoding: \"%s\"", namebuf))); - resultlen = enc->encode_len(VARDATA_ANY(data), datalen); + dataptr = VARDATA_ANY(data); + datalen = VARSIZE_ANY_EXHDR(data); + + resultlen = enc->encode_len(dataptr, datalen); + + /* + * resultlen possibly overflows uint32, therefore on 32-bit machines it's + * unsafe to rely on palloc's internal check. + */ + if (resultlen > MaxAllocSize - VARHDRSZ) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("result of encoding conversion is too large"))); + result = palloc(VARHDRSZ + resultlen); - res = enc->encode(VARDATA_ANY(data), datalen, VARDATA(result)); + res = enc->encode(dataptr, datalen, VARDATA(result)); /* Make this FATAL 'cause we've trodden on memory ... */ if (res > resultlen) @@ -75,13 +97,12 @@ binary_decode(PG_FUNCTION_ARGS) Datum name = PG_GETARG_DATUM(1); bytea *result; char *namebuf; - int datalen, - resultlen, - res; + char *dataptr; + size_t datalen; + uint64 resultlen; + uint64 res; const struct pg_encoding *enc; - datalen = VARSIZE_ANY_EXHDR(data); - namebuf = TextDatumGetCString(name); enc = pg_find_encoding(namebuf); @@ -90,10 +111,23 @@ binary_decode(PG_FUNCTION_ARGS) (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("unrecognized encoding: \"%s\"", namebuf))); - resultlen = enc->decode_len(VARDATA_ANY(data), datalen); + dataptr = VARDATA_ANY(data); + datalen = VARSIZE_ANY_EXHDR(data); + + resultlen = enc->decode_len(dataptr, datalen); + + /* + * resultlen possibly overflows uint32, therefore on 32-bit machines it's + * unsafe to rely on palloc's internal check. + */ + if (resultlen > MaxAllocSize - VARHDRSZ) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("result of decoding conversion is too large"))); + result = palloc(VARHDRSZ + resultlen); - res = enc->decode(VARDATA_ANY(data), datalen, VARDATA(result)); + res = enc->decode(dataptr, datalen, VARDATA(result)); /* Make this FATAL 'cause we've trodden on memory ... */ if (res > resultlen) @@ -122,8 +156,8 @@ static const int8 hexlookup[128] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, }; -unsigned -hex_encode(const char *src, unsigned len, char *dst) +uint64 +hex_encode(const char *src, size_t len, char *dst) { const char *end = src + len; @@ -133,7 +167,7 @@ hex_encode(const char *src, unsigned len, char *dst) *dst++ = hextbl[*src & 0xF]; src++; } - return len * 2; + return (uint64) len * 2; } static inline char @@ -152,8 +186,8 @@ get_hex(char c) return (char) res; } -unsigned -hex_decode(const char *src, unsigned len, char *dst) +uint64 +hex_decode(const char *src, size_t len, char *dst) { const char *s, *srcend; @@ -184,16 +218,16 @@ hex_decode(const char *src, unsigned len, char *dst) return p - dst; } -static unsigned -hex_enc_len(const char *src, unsigned srclen) +static uint64 +hex_enc_len(const char *src, size_t srclen) { - return srclen << 1; + return (uint64) srclen << 1; } -static unsigned -hex_dec_len(const char *src, unsigned srclen) +static uint64 +hex_dec_len(const char *src, size_t srclen) { - return srclen >> 1; + return (uint64) srclen >> 1; } /* @@ -214,8 +248,8 @@ static const int8 b64lookup[128] = { 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, }; -static unsigned -pg_base64_encode(const char *src, unsigned len, char *dst) +static uint64 +pg_base64_encode(const char *src, size_t len, char *dst) { char *p, *lend = dst + 76; @@ -261,8 +295,8 @@ pg_base64_encode(const char *src, unsigned len, char *dst) return p - dst; } -static unsigned -pg_base64_decode(const char *src, unsigned len, char *dst) +static uint64 +pg_base64_decode(const char *src, size_t len, char *dst) { const char *srcend = src + len, *s = src; @@ -331,17 +365,17 @@ pg_base64_decode(const char *src, unsigned len, char *dst) } -static unsigned -pg_base64_enc_len(const char *src, unsigned srclen) +static uint64 +pg_base64_enc_len(const char *src, size_t srclen) { /* 3 bytes will be converted to 4, linefeed after 76 chars */ - return (srclen + 2) * 4 / 3 + srclen / (76 * 3 / 4); + return ((uint64) srclen + 2) * 4 / 3 + (uint64) srclen / (76 * 3 / 4); } -static unsigned -pg_base64_dec_len(const char *src, unsigned srclen) +static uint64 +pg_base64_dec_len(const char *src, size_t srclen) { - return (srclen * 3) >> 2; + return ((uint64) srclen * 3) >> 2; } /* @@ -361,12 +395,12 @@ pg_base64_dec_len(const char *src, unsigned srclen) #define VAL(CH) ((CH) - '0') #define DIG(VAL) ((VAL) + '0') -static unsigned -esc_encode(const char *src, unsigned srclen, char *dst) +static uint64 +esc_encode(const char *src, size_t srclen, char *dst) { const char *end = src + srclen; char *rp = dst; - int len = 0; + uint64 len = 0; while (src < end) { @@ -400,12 +434,12 @@ esc_encode(const char *src, unsigned srclen, char *dst) return len; } -static unsigned -esc_decode(const char *src, unsigned srclen, char *dst) +static uint64 +esc_decode(const char *src, size_t srclen, char *dst) { const char *end = src + srclen; char *rp = dst; - int len = 0; + uint64 len = 0; while (src < end) { @@ -448,11 +482,11 @@ esc_decode(const char *src, unsigned srclen, char *dst) return len; } -static unsigned -esc_enc_len(const char *src, unsigned srclen) +static uint64 +esc_enc_len(const char *src, size_t srclen) { const char *end = src + srclen; - int len = 0; + uint64 len = 0; while (src < end) { @@ -469,11 +503,11 @@ esc_enc_len(const char *src, unsigned srclen) return len; } -static unsigned -esc_dec_len(const char *src, unsigned srclen) +static uint64 +esc_dec_len(const char *src, size_t srclen) { const char *end = src + srclen; - int len = 0; + uint64 len = 0; while (src < end) { diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 0e464950e15..2eaabd6231d 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -389,7 +389,7 @@ byteaout(PG_FUNCTION_ARGS) { /* Print traditional escaped format */ char *vp; - int len; + uint64 len; int i; len = 1; /* empty string has 1 char */ @@ -403,7 +403,18 @@ byteaout(PG_FUNCTION_ARGS) else len++; } + + /* + * In principle len can't overflow uint32 if the input fit in 1GB, but + * for safety let's check rather than relying on palloc's internal + * check. + */ + if (len > MaxAllocSize) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg_internal("result of bytea output conversion is too large"))); rp = result = (char *) palloc(len); + vp = VARDATA_ANY(vlena); for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++) { @@ -3456,7 +3467,7 @@ Datum byteaGetBit(PG_FUNCTION_ARGS) { bytea *v = PG_GETARG_BYTEA_PP(0); - int32 n = PG_GETARG_INT32(1); + int64 n = PG_GETARG_INT64(1); int byteNo, bitNo; int len; @@ -3464,14 +3475,15 @@ byteaGetBit(PG_FUNCTION_ARGS) len = VARSIZE_ANY_EXHDR(v); - if (n < 0 || n >= len * 8) + if (n < 0 || n >= (int64) len * 8) ereport(ERROR, (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("index %d out of valid range, 0..%d", - n, len * 8 - 1))); + errmsg("index %lld out of valid range, 0..%lld", + (long long) n, (long long) len * 8 - 1))); - byteNo = n / 8; - bitNo = n % 8; + /* n/8 is now known < len, so safe to cast to int */ + byteNo = (int) (n / 8); + bitNo = (int) (n % 8); byte = ((unsigned char *) VARDATA_ANY(v))[byteNo]; @@ -3525,7 +3537,7 @@ Datum byteaSetBit(PG_FUNCTION_ARGS) { bytea *res = PG_GETARG_BYTEA_P_COPY(0); - int32 n = PG_GETARG_INT32(1); + int64 n = PG_GETARG_INT64(1); int32 newBit = PG_GETARG_INT32(2); int len; int oldByte, @@ -3535,14 +3547,15 @@ byteaSetBit(PG_FUNCTION_ARGS) len = VARSIZE(res) - VARHDRSZ; - if (n < 0 || n >= len * 8) + if (n < 0 || n >= (int64) len * 8) ereport(ERROR, (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("index %d out of valid range, 0..%d", - n, len * 8 - 1))); + errmsg("index %lld out of valid range, 0..%lld", + (long long) n, (long long) len * 8 - 1))); - byteNo = n / 8; - bitNo = n % 8; + /* n/8 is now known < len, so safe to cast to int */ + byteNo = (int) (n / 8); + bitNo = (int) (n % 8); /* * sanity check! diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 498992ff84c..b077c0f0f59 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202004062 +#define CATALOG_VERSION_NO 202004071 #endif diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 2d1862a9d8a..c9902fa1234 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -1439,10 +1439,10 @@ proname => 'set_byte', prorettype => 'bytea', proargtypes => 'bytea int4 int4', prosrc => 'byteaSetByte' }, { oid => '723', descr => 'get bit', - proname => 'get_bit', prorettype => 'int4', proargtypes => 'bytea int4', + proname => 'get_bit', prorettype => 'int4', proargtypes => 'bytea int8', prosrc => 'byteaGetBit' }, { oid => '724', descr => 'set bit', - proname => 'set_bit', prorettype => 'bytea', proargtypes => 'bytea int4 int4', + proname => 'set_bit', prorettype => 'bytea', proargtypes => 'bytea int8 int4', prosrc => 'byteaSetBit' }, { oid => '749', descr => 'substitute portion of string', proname => 'overlay', prorettype => 'bytea', diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index e2016a8bc2d..a352a8b773c 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -32,8 +32,8 @@ extern int errdatatype(Oid datatypeOid); extern int errdomainconstraint(Oid datatypeOid, const char *conname); /* encode.c */ -extern unsigned hex_encode(const char *src, unsigned len, char *dst); -extern unsigned hex_decode(const char *src, unsigned len, char *dst); +extern uint64 hex_encode(const char *src, size_t len, char *dst); +extern uint64 hex_decode(const char *src, size_t len, char *dst); /* int.c */ extern int2vector *buildint2vector(const int16 *int2s, int n); diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index 6c4443afcf1..6e98d183f61 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -1727,6 +1727,82 @@ SELECT sha512('The quick brown fox jumps over the lazy dog.'); (1 row) -- +-- encode/decode +-- +SELECT encode('\x1234567890abcdef00', 'hex'); + encode +-------------------- + 1234567890abcdef00 +(1 row) + +SELECT decode('1234567890abcdef00', 'hex'); + decode +---------------------- + \x1234567890abcdef00 +(1 row) + +SELECT encode(('\x' || repeat('1234567890abcdef0001', 7))::bytea, 'base64'); + encode +------------------------------------------------------------------------------ + EjRWeJCrze8AARI0VniQq83vAAESNFZ4kKvN7wABEjRWeJCrze8AARI0VniQq83vAAESNFZ4kKvN+ + 7wABEjRWeJCrze8AAQ== +(1 row) + +SELECT decode(encode(('\x' || repeat('1234567890abcdef0001', 7))::bytea, + 'base64'), 'base64'); + decode +------------------------------------------------------------------------------------------------------------------------------------------------ + \x1234567890abcdef00011234567890abcdef00011234567890abcdef00011234567890abcdef00011234567890abcdef00011234567890abcdef00011234567890abcdef0001 +(1 row) + +SELECT encode('\x1234567890abcdef00', 'escape'); + encode +----------------------------- + \x124Vx\220\253\315\357\000 +(1 row) + +SELECT decode(encode('\x1234567890abcdef00', 'escape'), 'escape'); + decode +---------------------- + \x1234567890abcdef00 +(1 row) + +-- +-- get_bit/set_bit etc +-- +SELECT get_bit('\x1234567890abcdef00'::bytea, 43); + get_bit +--------- + 1 +(1 row) + +SELECT get_bit('\x1234567890abcdef00'::bytea, 99); -- error +ERROR: index 99 out of valid range, 0..71 +SELECT set_bit('\x1234567890abcdef00'::bytea, 43, 0); + set_bit +---------------------- + \x1234567890a3cdef00 +(1 row) + +SELECT set_bit('\x1234567890abcdef00'::bytea, 99, 0); -- error +ERROR: index 99 out of valid range, 0..71 +SELECT get_byte('\x1234567890abcdef00'::bytea, 3); + get_byte +---------- + 120 +(1 row) + +SELECT get_byte('\x1234567890abcdef00'::bytea, 99); -- error +ERROR: index 99 out of valid range, 0..8 +SELECT set_byte('\x1234567890abcdef00'::bytea, 7, 11); + set_byte +---------------------- + \x1234567890abcd0b00 +(1 row) + +SELECT set_byte('\x1234567890abcdef00'::bytea, 99, 11); -- error +ERROR: index 99 out of valid range, 0..8 +-- -- test behavior of escape_string_warning and standard_conforming_strings options -- set escape_string_warning = off; diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index 3e28cd198f4..3e89159a4fd 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -598,6 +598,29 @@ SELECT sha512(''); SELECT sha512('The quick brown fox jumps over the lazy dog.'); -- +-- encode/decode +-- +SELECT encode('\x1234567890abcdef00', 'hex'); +SELECT decode('1234567890abcdef00', 'hex'); +SELECT encode(('\x' || repeat('1234567890abcdef0001', 7))::bytea, 'base64'); +SELECT decode(encode(('\x' || repeat('1234567890abcdef0001', 7))::bytea, + 'base64'), 'base64'); +SELECT encode('\x1234567890abcdef00', 'escape'); +SELECT decode(encode('\x1234567890abcdef00', 'escape'), 'escape'); + +-- +-- get_bit/set_bit etc +-- +SELECT get_bit('\x1234567890abcdef00'::bytea, 43); +SELECT get_bit('\x1234567890abcdef00'::bytea, 99); -- error +SELECT set_bit('\x1234567890abcdef00'::bytea, 43, 0); +SELECT set_bit('\x1234567890abcdef00'::bytea, 99, 0); -- error +SELECT get_byte('\x1234567890abcdef00'::bytea, 3); +SELECT get_byte('\x1234567890abcdef00'::bytea, 99); -- error +SELECT set_byte('\x1234567890abcdef00'::bytea, 7, 11); +SELECT set_byte('\x1234567890abcdef00'::bytea, 99, 11); -- error + +-- -- test behavior of escape_string_warning and standard_conforming_strings options -- set escape_string_warning = off; |