aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/utils/adt/encode.c136
-rw-r--r--src/backend/utils/adt/varlena.c39
-rw-r--r--src/include/catalog/catversion.h2
-rw-r--r--src/include/catalog/pg_proc.dat4
-rw-r--r--src/include/utils/builtins.h4
-rw-r--r--src/test/regress/expected/strings.out76
-rw-r--r--src/test/regress/sql/strings.sql23
7 files changed, 215 insertions, 69 deletions
diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c
index b8d9ec7e00a..61d318d93ca 100644
--- a/src/backend/utils/adt/encode.c
+++ b/src/backend/utils/adt/encode.c
@@ -16,14 +16,24 @@
#include <ctype.h>
#include "utils/builtins.h"
+#include "utils/memutils.h"
+/*
+ * Encoding conversion API.
+ * encode_len() and decode_len() compute the amount of space needed, while
+ * encode() and decode() perform the actual conversions. It is okay for
+ * the _len functions to return an overestimate, but not an underestimate.
+ * (Having said that, large overestimates could cause unnecessary errors,
+ * so it's better to get it right.) The conversion routines write to the
+ * buffer at *res and return the true length of their output.
+ */
struct pg_encoding
{
- unsigned (*encode_len) (const char *data, unsigned dlen);
- unsigned (*decode_len) (const char *data, unsigned dlen);
- unsigned (*encode) (const char *data, unsigned dlen, char *res);
- unsigned (*decode) (const char *data, unsigned dlen, char *res);
+ uint64 (*encode_len) (const char *data, size_t dlen);
+ uint64 (*decode_len) (const char *data, size_t dlen);
+ uint64 (*encode) (const char *data, size_t dlen, char *res);
+ uint64 (*decode) (const char *data, size_t dlen, char *res);
};
static const struct pg_encoding *pg_find_encoding(const char *name);
@@ -39,13 +49,12 @@ binary_encode(PG_FUNCTION_ARGS)
Datum name = PG_GETARG_DATUM(1);
text *result;
char *namebuf;
- int datalen,
- resultlen,
- res;
+ char *dataptr;
+ size_t datalen;
+ uint64 resultlen;
+ uint64 res;
const struct pg_encoding *enc;
- datalen = VARSIZE_ANY_EXHDR(data);
-
namebuf = TextDatumGetCString(name);
enc = pg_find_encoding(namebuf);
@@ -54,10 +63,23 @@ binary_encode(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized encoding: \"%s\"", namebuf)));
- resultlen = enc->encode_len(VARDATA_ANY(data), datalen);
+ dataptr = VARDATA_ANY(data);
+ datalen = VARSIZE_ANY_EXHDR(data);
+
+ resultlen = enc->encode_len(dataptr, datalen);
+
+ /*
+ * resultlen possibly overflows uint32, therefore on 32-bit machines it's
+ * unsafe to rely on palloc's internal check.
+ */
+ if (resultlen > MaxAllocSize - VARHDRSZ)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("result of encoding conversion is too large")));
+
result = palloc(VARHDRSZ + resultlen);
- res = enc->encode(VARDATA_ANY(data), datalen, VARDATA(result));
+ res = enc->encode(dataptr, datalen, VARDATA(result));
/* Make this FATAL 'cause we've trodden on memory ... */
if (res > resultlen)
@@ -75,13 +97,12 @@ binary_decode(PG_FUNCTION_ARGS)
Datum name = PG_GETARG_DATUM(1);
bytea *result;
char *namebuf;
- int datalen,
- resultlen,
- res;
+ char *dataptr;
+ size_t datalen;
+ uint64 resultlen;
+ uint64 res;
const struct pg_encoding *enc;
- datalen = VARSIZE_ANY_EXHDR(data);
-
namebuf = TextDatumGetCString(name);
enc = pg_find_encoding(namebuf);
@@ -90,10 +111,23 @@ binary_decode(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized encoding: \"%s\"", namebuf)));
- resultlen = enc->decode_len(VARDATA_ANY(data), datalen);
+ dataptr = VARDATA_ANY(data);
+ datalen = VARSIZE_ANY_EXHDR(data);
+
+ resultlen = enc->decode_len(dataptr, datalen);
+
+ /*
+ * resultlen possibly overflows uint32, therefore on 32-bit machines it's
+ * unsafe to rely on palloc's internal check.
+ */
+ if (resultlen > MaxAllocSize - VARHDRSZ)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("result of decoding conversion is too large")));
+
result = palloc(VARHDRSZ + resultlen);
- res = enc->decode(VARDATA_ANY(data), datalen, VARDATA(result));
+ res = enc->decode(dataptr, datalen, VARDATA(result));
/* Make this FATAL 'cause we've trodden on memory ... */
if (res > resultlen)
@@ -122,8 +156,8 @@ static const int8 hexlookup[128] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
-unsigned
-hex_encode(const char *src, unsigned len, char *dst)
+uint64
+hex_encode(const char *src, size_t len, char *dst)
{
const char *end = src + len;
@@ -133,7 +167,7 @@ hex_encode(const char *src, unsigned len, char *dst)
*dst++ = hextbl[*src & 0xF];
src++;
}
- return len * 2;
+ return (uint64) len * 2;
}
static inline char
@@ -152,8 +186,8 @@ get_hex(char c)
return (char) res;
}
-unsigned
-hex_decode(const char *src, unsigned len, char *dst)
+uint64
+hex_decode(const char *src, size_t len, char *dst)
{
const char *s,
*srcend;
@@ -184,16 +218,16 @@ hex_decode(const char *src, unsigned len, char *dst)
return p - dst;
}
-static unsigned
-hex_enc_len(const char *src, unsigned srclen)
+static uint64
+hex_enc_len(const char *src, size_t srclen)
{
- return srclen << 1;
+ return (uint64) srclen << 1;
}
-static unsigned
-hex_dec_len(const char *src, unsigned srclen)
+static uint64
+hex_dec_len(const char *src, size_t srclen)
{
- return srclen >> 1;
+ return (uint64) srclen >> 1;
}
/*
@@ -214,8 +248,8 @@ static const int8 b64lookup[128] = {
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
};
-static unsigned
-pg_base64_encode(const char *src, unsigned len, char *dst)
+static uint64
+pg_base64_encode(const char *src, size_t len, char *dst)
{
char *p,
*lend = dst + 76;
@@ -261,8 +295,8 @@ pg_base64_encode(const char *src, unsigned len, char *dst)
return p - dst;
}
-static unsigned
-pg_base64_decode(const char *src, unsigned len, char *dst)
+static uint64
+pg_base64_decode(const char *src, size_t len, char *dst)
{
const char *srcend = src + len,
*s = src;
@@ -331,17 +365,17 @@ pg_base64_decode(const char *src, unsigned len, char *dst)
}
-static unsigned
-pg_base64_enc_len(const char *src, unsigned srclen)
+static uint64
+pg_base64_enc_len(const char *src, size_t srclen)
{
/* 3 bytes will be converted to 4, linefeed after 76 chars */
- return (srclen + 2) * 4 / 3 + srclen / (76 * 3 / 4);
+ return ((uint64) srclen + 2) * 4 / 3 + (uint64) srclen / (76 * 3 / 4);
}
-static unsigned
-pg_base64_dec_len(const char *src, unsigned srclen)
+static uint64
+pg_base64_dec_len(const char *src, size_t srclen)
{
- return (srclen * 3) >> 2;
+ return ((uint64) srclen * 3) >> 2;
}
/*
@@ -361,12 +395,12 @@ pg_base64_dec_len(const char *src, unsigned srclen)
#define VAL(CH) ((CH) - '0')
#define DIG(VAL) ((VAL) + '0')
-static unsigned
-esc_encode(const char *src, unsigned srclen, char *dst)
+static uint64
+esc_encode(const char *src, size_t srclen, char *dst)
{
const char *end = src + srclen;
char *rp = dst;
- int len = 0;
+ uint64 len = 0;
while (src < end)
{
@@ -400,12 +434,12 @@ esc_encode(const char *src, unsigned srclen, char *dst)
return len;
}
-static unsigned
-esc_decode(const char *src, unsigned srclen, char *dst)
+static uint64
+esc_decode(const char *src, size_t srclen, char *dst)
{
const char *end = src + srclen;
char *rp = dst;
- int len = 0;
+ uint64 len = 0;
while (src < end)
{
@@ -448,11 +482,11 @@ esc_decode(const char *src, unsigned srclen, char *dst)
return len;
}
-static unsigned
-esc_enc_len(const char *src, unsigned srclen)
+static uint64
+esc_enc_len(const char *src, size_t srclen)
{
const char *end = src + srclen;
- int len = 0;
+ uint64 len = 0;
while (src < end)
{
@@ -469,11 +503,11 @@ esc_enc_len(const char *src, unsigned srclen)
return len;
}
-static unsigned
-esc_dec_len(const char *src, unsigned srclen)
+static uint64
+esc_dec_len(const char *src, size_t srclen)
{
const char *end = src + srclen;
- int len = 0;
+ uint64 len = 0;
while (src < end)
{
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 0e464950e15..2eaabd6231d 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -389,7 +389,7 @@ byteaout(PG_FUNCTION_ARGS)
{
/* Print traditional escaped format */
char *vp;
- int len;
+ uint64 len;
int i;
len = 1; /* empty string has 1 char */
@@ -403,7 +403,18 @@ byteaout(PG_FUNCTION_ARGS)
else
len++;
}
+
+ /*
+ * In principle len can't overflow uint32 if the input fit in 1GB, but
+ * for safety let's check rather than relying on palloc's internal
+ * check.
+ */
+ if (len > MaxAllocSize)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg_internal("result of bytea output conversion is too large")));
rp = result = (char *) palloc(len);
+
vp = VARDATA_ANY(vlena);
for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
{
@@ -3456,7 +3467,7 @@ Datum
byteaGetBit(PG_FUNCTION_ARGS)
{
bytea *v = PG_GETARG_BYTEA_PP(0);
- int32 n = PG_GETARG_INT32(1);
+ int64 n = PG_GETARG_INT64(1);
int byteNo,
bitNo;
int len;
@@ -3464,14 +3475,15 @@ byteaGetBit(PG_FUNCTION_ARGS)
len = VARSIZE_ANY_EXHDR(v);
- if (n < 0 || n >= len * 8)
+ if (n < 0 || n >= (int64) len * 8)
ereport(ERROR,
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
- errmsg("index %d out of valid range, 0..%d",
- n, len * 8 - 1)));
+ errmsg("index %lld out of valid range, 0..%lld",
+ (long long) n, (long long) len * 8 - 1)));
- byteNo = n / 8;
- bitNo = n % 8;
+ /* n/8 is now known < len, so safe to cast to int */
+ byteNo = (int) (n / 8);
+ bitNo = (int) (n % 8);
byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
@@ -3525,7 +3537,7 @@ Datum
byteaSetBit(PG_FUNCTION_ARGS)
{
bytea *res = PG_GETARG_BYTEA_P_COPY(0);
- int32 n = PG_GETARG_INT32(1);
+ int64 n = PG_GETARG_INT64(1);
int32 newBit = PG_GETARG_INT32(2);
int len;
int oldByte,
@@ -3535,14 +3547,15 @@ byteaSetBit(PG_FUNCTION_ARGS)
len = VARSIZE(res) - VARHDRSZ;
- if (n < 0 || n >= len * 8)
+ if (n < 0 || n >= (int64) len * 8)
ereport(ERROR,
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
- errmsg("index %d out of valid range, 0..%d",
- n, len * 8 - 1)));
+ errmsg("index %lld out of valid range, 0..%lld",
+ (long long) n, (long long) len * 8 - 1)));
- byteNo = n / 8;
- bitNo = n % 8;
+ /* n/8 is now known < len, so safe to cast to int */
+ byteNo = (int) (n / 8);
+ bitNo = (int) (n % 8);
/*
* sanity check!
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 498992ff84c..b077c0f0f59 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 202004062
+#define CATALOG_VERSION_NO 202004071
#endif
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 2d1862a9d8a..c9902fa1234 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1439,10 +1439,10 @@
proname => 'set_byte', prorettype => 'bytea',
proargtypes => 'bytea int4 int4', prosrc => 'byteaSetByte' },
{ oid => '723', descr => 'get bit',
- proname => 'get_bit', prorettype => 'int4', proargtypes => 'bytea int4',
+ proname => 'get_bit', prorettype => 'int4', proargtypes => 'bytea int8',
prosrc => 'byteaGetBit' },
{ oid => '724', descr => 'set bit',
- proname => 'set_bit', prorettype => 'bytea', proargtypes => 'bytea int4 int4',
+ proname => 'set_bit', prorettype => 'bytea', proargtypes => 'bytea int8 int4',
prosrc => 'byteaSetBit' },
{ oid => '749', descr => 'substitute portion of string',
proname => 'overlay', prorettype => 'bytea',
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index e2016a8bc2d..a352a8b773c 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -32,8 +32,8 @@ extern int errdatatype(Oid datatypeOid);
extern int errdomainconstraint(Oid datatypeOid, const char *conname);
/* encode.c */
-extern unsigned hex_encode(const char *src, unsigned len, char *dst);
-extern unsigned hex_decode(const char *src, unsigned len, char *dst);
+extern uint64 hex_encode(const char *src, size_t len, char *dst);
+extern uint64 hex_decode(const char *src, size_t len, char *dst);
/* int.c */
extern int2vector *buildint2vector(const int16 *int2s, int n);
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 6c4443afcf1..6e98d183f61 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -1727,6 +1727,82 @@ SELECT sha512('The quick brown fox jumps over the lazy dog.');
(1 row)
--
+-- encode/decode
+--
+SELECT encode('\x1234567890abcdef00', 'hex');
+ encode
+--------------------
+ 1234567890abcdef00
+(1 row)
+
+SELECT decode('1234567890abcdef00', 'hex');
+ decode
+----------------------
+ \x1234567890abcdef00
+(1 row)
+
+SELECT encode(('\x' || repeat('1234567890abcdef0001', 7))::bytea, 'base64');
+ encode
+------------------------------------------------------------------------------
+ EjRWeJCrze8AARI0VniQq83vAAESNFZ4kKvN7wABEjRWeJCrze8AARI0VniQq83vAAESNFZ4kKvN+
+ 7wABEjRWeJCrze8AAQ==
+(1 row)
+
+SELECT decode(encode(('\x' || repeat('1234567890abcdef0001', 7))::bytea,
+ 'base64'), 'base64');
+ decode
+------------------------------------------------------------------------------------------------------------------------------------------------
+ \x1234567890abcdef00011234567890abcdef00011234567890abcdef00011234567890abcdef00011234567890abcdef00011234567890abcdef00011234567890abcdef0001
+(1 row)
+
+SELECT encode('\x1234567890abcdef00', 'escape');
+ encode
+-----------------------------
+ \x124Vx\220\253\315\357\000
+(1 row)
+
+SELECT decode(encode('\x1234567890abcdef00', 'escape'), 'escape');
+ decode
+----------------------
+ \x1234567890abcdef00
+(1 row)
+
+--
+-- get_bit/set_bit etc
+--
+SELECT get_bit('\x1234567890abcdef00'::bytea, 43);
+ get_bit
+---------
+ 1
+(1 row)
+
+SELECT get_bit('\x1234567890abcdef00'::bytea, 99); -- error
+ERROR: index 99 out of valid range, 0..71
+SELECT set_bit('\x1234567890abcdef00'::bytea, 43, 0);
+ set_bit
+----------------------
+ \x1234567890a3cdef00
+(1 row)
+
+SELECT set_bit('\x1234567890abcdef00'::bytea, 99, 0); -- error
+ERROR: index 99 out of valid range, 0..71
+SELECT get_byte('\x1234567890abcdef00'::bytea, 3);
+ get_byte
+----------
+ 120
+(1 row)
+
+SELECT get_byte('\x1234567890abcdef00'::bytea, 99); -- error
+ERROR: index 99 out of valid range, 0..8
+SELECT set_byte('\x1234567890abcdef00'::bytea, 7, 11);
+ set_byte
+----------------------
+ \x1234567890abcd0b00
+(1 row)
+
+SELECT set_byte('\x1234567890abcdef00'::bytea, 99, 11); -- error
+ERROR: index 99 out of valid range, 0..8
+--
-- test behavior of escape_string_warning and standard_conforming_strings options
--
set escape_string_warning = off;
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index 3e28cd198f4..3e89159a4fd 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -598,6 +598,29 @@ SELECT sha512('');
SELECT sha512('The quick brown fox jumps over the lazy dog.');
--
+-- encode/decode
+--
+SELECT encode('\x1234567890abcdef00', 'hex');
+SELECT decode('1234567890abcdef00', 'hex');
+SELECT encode(('\x' || repeat('1234567890abcdef0001', 7))::bytea, 'base64');
+SELECT decode(encode(('\x' || repeat('1234567890abcdef0001', 7))::bytea,
+ 'base64'), 'base64');
+SELECT encode('\x1234567890abcdef00', 'escape');
+SELECT decode(encode('\x1234567890abcdef00', 'escape'), 'escape');
+
+--
+-- get_bit/set_bit etc
+--
+SELECT get_bit('\x1234567890abcdef00'::bytea, 43);
+SELECT get_bit('\x1234567890abcdef00'::bytea, 99); -- error
+SELECT set_bit('\x1234567890abcdef00'::bytea, 43, 0);
+SELECT set_bit('\x1234567890abcdef00'::bytea, 99, 0); -- error
+SELECT get_byte('\x1234567890abcdef00'::bytea, 3);
+SELECT get_byte('\x1234567890abcdef00'::bytea, 99); -- error
+SELECT set_byte('\x1234567890abcdef00'::bytea, 7, 11);
+SELECT set_byte('\x1234567890abcdef00'::bytea, 99, 11); -- error
+
+--
-- test behavior of escape_string_warning and standard_conforming_strings options
--
set escape_string_warning = off;