diff options
author | Bruce Momjian <bruce@momjian.us> | 2001-09-14 17:46:40 +0000 |
---|---|---|
committer | Bruce Momjian <bruce@momjian.us> | 2001-09-14 17:46:40 +0000 |
commit | c1fbf06654aca97a109a2346d182158e8f14ab01 (patch) | |
tree | c0e4e87bb80d3eca6a8dd5e6194dac791b630a5d /src/backend/utils/adt/encode.c | |
parent | e8d5b8d2901ddf1b4518ec0eb450028678f2082e (diff) | |
download | postgresql-c1fbf06654aca97a109a2346d182158e8f14ab01.tar.gz postgresql-c1fbf06654aca97a109a2346d182158e8f14ab01.zip |
> Here's a revised patch. Changes:
>
> 1. Now outputs '\\' instead of '\134' when using encode(bytea, 'escape')
> Note that I ended up leaving \0 as \000 so that there are no ambiguities
> when decoding something like, for example, \0123.
>
> 2. Fixed bug in byteain which allowed input values which were not valid
> octals (e.g. \789), to be parsed as if they were octals.
>
> Joe
>
Here's rev 2 of the bytea string support patch. Changes:
1. Added missing declaration for MatchBytea function
2. Added PQescapeBytea to fe-exec.c
3. Applies cleanly on cvs tip from this afternoon
I'm hoping that someone can review/approve/apply this before beta starts, so
I guess I'd vote (not that it counts for much) to delay beta a few days :-)
Joe Conway
Diffstat (limited to 'src/backend/utils/adt/encode.c')
-rw-r--r-- | src/backend/utils/adt/encode.c | 189 |
1 files changed, 180 insertions, 9 deletions
diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c index 89ba357aa57..53825851847 100644 --- a/src/backend/utils/adt/encode.c +++ b/src/backend/utils/adt/encode.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/encode.c,v 1.1 2001/07/12 14:05:31 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/encode.c,v 1.2 2001/09/14 17:46:40 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -19,8 +19,8 @@ struct pg_encoding { - unsigned (*encode_len) (unsigned dlen); - unsigned (*decode_len) (unsigned dlen); + unsigned (*encode_len) (const uint8 *data, unsigned dlen); + unsigned (*decode_len) (const uint8 *data, unsigned dlen); unsigned (*encode) (const uint8 *data, unsigned dlen, uint8 *res); unsigned (*decode) (const uint8 *data, unsigned dlen, uint8 *res); }; @@ -50,7 +50,7 @@ binary_encode(PG_FUNCTION_ARGS) if (enc == NULL) elog(ERROR, "No such encoding"); - resultlen = enc->encode_len(datalen); + resultlen = enc->encode_len(VARDATA(data), datalen); result = palloc(VARHDRSZ + resultlen); res = enc->encode(VARDATA(data), datalen, VARDATA(result)); @@ -81,7 +81,7 @@ binary_decode(PG_FUNCTION_ARGS) if (enc == NULL) elog(ERROR, "No such encoding"); - resultlen = enc->decode_len(datalen); + resultlen = enc->decode_len(VARDATA(data), datalen); result = palloc(VARHDRSZ + resultlen); res = enc->decode(VARDATA(data), datalen, VARDATA(result)); @@ -169,13 +169,13 @@ hex_decode(const uint8 * src, unsigned len, uint8 * dst) } static unsigned -hex_enc_len(unsigned srclen) +hex_enc_len(const uint8 * src, unsigned srclen) { return srclen << 1; } static unsigned -hex_dec_len(unsigned srclen) +hex_dec_len(const uint8 * src, unsigned srclen) { return srclen >> 1; } @@ -308,19 +308,189 @@ b64_decode(const uint8 * src, unsigned len, uint8 * dst) static unsigned -b64_enc_len(unsigned srclen) +b64_enc_len(const uint8 * src, unsigned srclen) { /* 3 bytes will be converted to 4, linefeed after 76 chars */ return (srclen + 2) * 4 / 3 + srclen / (76 * 3 / 4); } static unsigned -b64_dec_len(unsigned srclen) +b64_dec_len(const uint8 * src, unsigned srclen) { return (srclen * 3) >> 2; } /* + * Escape + * Minimally escape bytea to text. + * De-escape text to bytea. + * + * Only two characters are escaped: + * \0 (null) and \\ (backslash) + * + * De-escapes \\ and any \### octal + */ + +#define VAL(CH) ((CH) - '0') +#define DIG(VAL) ((VAL) + '0') + +static unsigned +esc_encode(const uint8 *src, unsigned srclen, uint8 *dst) +{ + const uint8 *end = src + srclen; + uint8 *rp = dst; + int val; + int len = 0; + + while (src < end) + { + if (*src == '\0') + { + val = *src; + rp[0] = '\\'; + rp[1] = '0'; + rp[2] = '0'; + rp[3] = '0'; + rp += 4; + len += 4; + } + else if (*src == '\\') + { + val = *src; + rp[0] = '\\'; + rp[1] = '\\'; + rp += 2; + len += 2; + } + else + { + *rp++ = *src; + len++; + } + + src++; + } + *rp = '\0'; + + return len; +} + +static unsigned +esc_decode(const uint8 *src, unsigned srclen, uint8 *dst) +{ + const uint8 *end = src + srclen; + uint8 *rp = dst; + int val; + int len = 0; + + while (src < end) + { + if (src[0] != '\\') + { + *rp++ = *src++; + } + else if ( (src[0] == '\\') && + (src[1] >= '0' && src[1] <= '3') && + (src[2] >= '0' && src[2] <= '7') && + (src[3] >= '0' && src[3] <= '7') ) + { + val = VAL(src[1]); + val <<= 3; + val += VAL(src[2]); + val <<= 3; + *rp++ = val + VAL(src[3]); + src += 4; + } + else if ( (src[0] == '\\') && + (src[1] == '\\') ) + { + *rp++ = '\\'; + src += 2; + } + else + { + /* + * One backslash, not followed by ### valid octal. + * Should never get here, since esc_dec_len does same check. + */ + elog(ERROR, "decode: Bad input string for type bytea"); + } + + len++; + } + return len; +} + +static unsigned +esc_enc_len(const uint8 *src, unsigned srclen) +{ + const uint8 *end = src + srclen; + int len = 0; + + while (src < end) + { + if (*src == '\0') + len += 4; + else if (*src == '\\') + len += 2; + else + len++; + + src++; + } + + /* + * Allow for null terminator + */ + len++; + + return len; +} + +static unsigned +esc_dec_len(const uint8 *src, unsigned srclen) +{ + const uint8 *end = src + srclen; + int len = 0; + + while (src < end) + { + if (src[0] != '\\') + { + src++; + } + else if ( (src[0] == '\\') && + (src[1] >= '0' && src[1] <= '3') && + (src[2] >= '0' && src[2] <= '7') && + (src[3] >= '0' && src[3] <= '7') ) + { + /* + * backslash + valid octal + */ + src += 4; + } + else if ( (src[0] == '\\') && + (src[1] == '\\') ) + { + /* + * two backslashes = backslash + */ + src += 2; + } + else + { + /* + * one backslash, not followed by ### valid octal + */ + elog(ERROR, "decode: Bad input string for type bytea"); + } + + len++; + } + return len; +} + +/* * Common */ @@ -330,6 +500,7 @@ static struct { } enclist[] = { {"hex", { hex_enc_len, hex_dec_len, hex_encode, hex_decode }}, {"base64", { b64_enc_len, b64_dec_len, b64_encode, b64_decode }}, + {"escape", { esc_enc_len, esc_dec_len, esc_encode, esc_decode }}, {NULL, { NULL, NULL, NULL, NULL } } }; |