aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/encode.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2020-04-07 15:57:58 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2020-04-07 15:57:58 -0400
commit26a944cf29ba67bb49f42656dd2be98fe2485f5f (patch)
treed234713a62696aeb655b27b3c5756588809fa588 /src/backend/utils/adt/encode.c
parent9c74ceb20b991f786f71666d4b4d557d2744a567 (diff)
downloadpostgresql-26a944cf29ba67bb49f42656dd2be98fe2485f5f.tar.gz
postgresql-26a944cf29ba67bb49f42656dd2be98fe2485f5f.zip
Adjust bytea get_bit/set_bit to use int8 not int4 for bit numbering.
Since the existing bit number argument can't exceed INT32_MAX, it's not possible for these functions to manipulate bits beyond the first 256MB of a bytea value. Lift that restriction by redeclaring the bit number arguments as int8 (which requires a catversion bump, hence is not back-patchable). The similarly-named functions for bit/varbit don't really have a problem because we restrict those types to at most VARBITMAXLEN bits; hence leave them alone. While here, extend the encode/decode functions in utils/adt/encode.c to allow dealing with values wider than 1GB. This is not a live bug or restriction in current usage, because no input could be more than 1GB, and since none of the encoders can expand a string more than 4X, the result size couldn't overflow uint32. But it might be desirable to support more in future, so make the input length values size_t and the potential-output-length values uint64. Also add some test cases to improve the miserable code coverage of these functions. Movead Li, editorialized some by me; also reviewed by Ashutosh Bapat Discussion: https://postgr.es/m/20200312115135445367128@highgo.ca
Diffstat (limited to 'src/backend/utils/adt/encode.c')
-rw-r--r--src/backend/utils/adt/encode.c136
1 files changed, 85 insertions, 51 deletions
diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c
index b8d9ec7e00a..61d318d93ca 100644
--- a/src/backend/utils/adt/encode.c
+++ b/src/backend/utils/adt/encode.c
@@ -16,14 +16,24 @@
#include <ctype.h>
#include "utils/builtins.h"
+#include "utils/memutils.h"
+/*
+ * Encoding conversion API.
+ * encode_len() and decode_len() compute the amount of space needed, while
+ * encode() and decode() perform the actual conversions. It is okay for
+ * the _len functions to return an overestimate, but not an underestimate.
+ * (Having said that, large overestimates could cause unnecessary errors,
+ * so it's better to get it right.) The conversion routines write to the
+ * buffer at *res and return the true length of their output.
+ */
struct pg_encoding
{
- unsigned (*encode_len) (const char *data, unsigned dlen);
- unsigned (*decode_len) (const char *data, unsigned dlen);
- unsigned (*encode) (const char *data, unsigned dlen, char *res);
- unsigned (*decode) (const char *data, unsigned dlen, char *res);
+ uint64 (*encode_len) (const char *data, size_t dlen);
+ uint64 (*decode_len) (const char *data, size_t dlen);
+ uint64 (*encode) (const char *data, size_t dlen, char *res);
+ uint64 (*decode) (const char *data, size_t dlen, char *res);
};
static const struct pg_encoding *pg_find_encoding(const char *name);
@@ -39,13 +49,12 @@ binary_encode(PG_FUNCTION_ARGS)
Datum name = PG_GETARG_DATUM(1);
text *result;
char *namebuf;
- int datalen,
- resultlen,
- res;
+ char *dataptr;
+ size_t datalen;
+ uint64 resultlen;
+ uint64 res;
const struct pg_encoding *enc;
- datalen = VARSIZE_ANY_EXHDR(data);
-
namebuf = TextDatumGetCString(name);
enc = pg_find_encoding(namebuf);
@@ -54,10 +63,23 @@ binary_encode(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized encoding: \"%s\"", namebuf)));
- resultlen = enc->encode_len(VARDATA_ANY(data), datalen);
+ dataptr = VARDATA_ANY(data);
+ datalen = VARSIZE_ANY_EXHDR(data);
+
+ resultlen = enc->encode_len(dataptr, datalen);
+
+ /*
+ * resultlen possibly overflows uint32, therefore on 32-bit machines it's
+ * unsafe to rely on palloc's internal check.
+ */
+ if (resultlen > MaxAllocSize - VARHDRSZ)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("result of encoding conversion is too large")));
+
result = palloc(VARHDRSZ + resultlen);
- res = enc->encode(VARDATA_ANY(data), datalen, VARDATA(result));
+ res = enc->encode(dataptr, datalen, VARDATA(result));
/* Make this FATAL 'cause we've trodden on memory ... */
if (res > resultlen)
@@ -75,13 +97,12 @@ binary_decode(PG_FUNCTION_ARGS)
Datum name = PG_GETARG_DATUM(1);
bytea *result;
char *namebuf;
- int datalen,
- resultlen,
- res;
+ char *dataptr;
+ size_t datalen;
+ uint64 resultlen;
+ uint64 res;
const struct pg_encoding *enc;
- datalen = VARSIZE_ANY_EXHDR(data);
-
namebuf = TextDatumGetCString(name);
enc = pg_find_encoding(namebuf);
@@ -90,10 +111,23 @@ binary_decode(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized encoding: \"%s\"", namebuf)));
- resultlen = enc->decode_len(VARDATA_ANY(data), datalen);
+ dataptr = VARDATA_ANY(data);
+ datalen = VARSIZE_ANY_EXHDR(data);
+
+ resultlen = enc->decode_len(dataptr, datalen);
+
+ /*
+ * resultlen possibly overflows uint32, therefore on 32-bit machines it's
+ * unsafe to rely on palloc's internal check.
+ */
+ if (resultlen > MaxAllocSize - VARHDRSZ)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("result of decoding conversion is too large")));
+
result = palloc(VARHDRSZ + resultlen);
- res = enc->decode(VARDATA_ANY(data), datalen, VARDATA(result));
+ res = enc->decode(dataptr, datalen, VARDATA(result));
/* Make this FATAL 'cause we've trodden on memory ... */
if (res > resultlen)
@@ -122,8 +156,8 @@ static const int8 hexlookup[128] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
-unsigned
-hex_encode(const char *src, unsigned len, char *dst)
+uint64
+hex_encode(const char *src, size_t len, char *dst)
{
const char *end = src + len;
@@ -133,7 +167,7 @@ hex_encode(const char *src, unsigned len, char *dst)
*dst++ = hextbl[*src & 0xF];
src++;
}
- return len * 2;
+ return (uint64) len * 2;
}
static inline char
@@ -152,8 +186,8 @@ get_hex(char c)
return (char) res;
}
-unsigned
-hex_decode(const char *src, unsigned len, char *dst)
+uint64
+hex_decode(const char *src, size_t len, char *dst)
{
const char *s,
*srcend;
@@ -184,16 +218,16 @@ hex_decode(const char *src, unsigned len, char *dst)
return p - dst;
}
-static unsigned
-hex_enc_len(const char *src, unsigned srclen)
+static uint64
+hex_enc_len(const char *src, size_t srclen)
{
- return srclen << 1;
+ return (uint64) srclen << 1;
}
-static unsigned
-hex_dec_len(const char *src, unsigned srclen)
+static uint64
+hex_dec_len(const char *src, size_t srclen)
{
- return srclen >> 1;
+ return (uint64) srclen >> 1;
}
/*
@@ -214,8 +248,8 @@ static const int8 b64lookup[128] = {
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
};
-static unsigned
-pg_base64_encode(const char *src, unsigned len, char *dst)
+static uint64
+pg_base64_encode(const char *src, size_t len, char *dst)
{
char *p,
*lend = dst + 76;
@@ -261,8 +295,8 @@ pg_base64_encode(const char *src, unsigned len, char *dst)
return p - dst;
}
-static unsigned
-pg_base64_decode(const char *src, unsigned len, char *dst)
+static uint64
+pg_base64_decode(const char *src, size_t len, char *dst)
{
const char *srcend = src + len,
*s = src;
@@ -331,17 +365,17 @@ pg_base64_decode(const char *src, unsigned len, char *dst)
}
-static unsigned
-pg_base64_enc_len(const char *src, unsigned srclen)
+static uint64
+pg_base64_enc_len(const char *src, size_t srclen)
{
/* 3 bytes will be converted to 4, linefeed after 76 chars */
- return (srclen + 2) * 4 / 3 + srclen / (76 * 3 / 4);
+ return ((uint64) srclen + 2) * 4 / 3 + (uint64) srclen / (76 * 3 / 4);
}
-static unsigned
-pg_base64_dec_len(const char *src, unsigned srclen)
+static uint64
+pg_base64_dec_len(const char *src, size_t srclen)
{
- return (srclen * 3) >> 2;
+ return ((uint64) srclen * 3) >> 2;
}
/*
@@ -361,12 +395,12 @@ pg_base64_dec_len(const char *src, unsigned srclen)
#define VAL(CH) ((CH) - '0')
#define DIG(VAL) ((VAL) + '0')
-static unsigned
-esc_encode(const char *src, unsigned srclen, char *dst)
+static uint64
+esc_encode(const char *src, size_t srclen, char *dst)
{
const char *end = src + srclen;
char *rp = dst;
- int len = 0;
+ uint64 len = 0;
while (src < end)
{
@@ -400,12 +434,12 @@ esc_encode(const char *src, unsigned srclen, char *dst)
return len;
}
-static unsigned
-esc_decode(const char *src, unsigned srclen, char *dst)
+static uint64
+esc_decode(const char *src, size_t srclen, char *dst)
{
const char *end = src + srclen;
char *rp = dst;
- int len = 0;
+ uint64 len = 0;
while (src < end)
{
@@ -448,11 +482,11 @@ esc_decode(const char *src, unsigned srclen, char *dst)
return len;
}
-static unsigned
-esc_enc_len(const char *src, unsigned srclen)
+static uint64
+esc_enc_len(const char *src, size_t srclen)
{
const char *end = src + srclen;
- int len = 0;
+ uint64 len = 0;
while (src < end)
{
@@ -469,11 +503,11 @@ esc_enc_len(const char *src, unsigned srclen)
return len;
}
-static unsigned
-esc_dec_len(const char *src, unsigned srclen)
+static uint64
+esc_dec_len(const char *src, size_t srclen)
{
const char *end = src + srclen;
- int len = 0;
+ uint64 len = 0;
while (src < end)
{