diff options
Diffstat (limited to 'src/backend/utils/adt/oracle_compat.c')
-rw-r--r-- | src/backend/utils/adt/oracle_compat.c | 144 |
1 files changed, 137 insertions, 7 deletions
diff --git a/src/backend/utils/adt/oracle_compat.c b/src/backend/utils/adt/oracle_compat.c index 9fcd5ae747a..d62315d0f61 100644 --- a/src/backend/utils/adt/oracle_compat.c +++ b/src/backend/utils/adt/oracle_compat.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.70 2007/02/27 23:48:08 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.71 2007/09/18 17:41:17 adunstan Exp $ * *------------------------------------------------------------------------- */ @@ -1246,6 +1246,13 @@ translate(PG_FUNCTION_ARGS) * * Returns the decimal representation of the first character from * string. + * If the string is empty we return 0. + * If the database encoding is UTF8, we return the Unicode codepoint. + * If the database encoding is any other multi-byte encoding, we + * return the value of the first byte if it is an ASCII character + * (range 1 .. 127), or raise an error. + * For all other encodings we return the value of the first byte, + * (range 1..255). * ********************************************************************/ @@ -1253,11 +1260,57 @@ Datum ascii(PG_FUNCTION_ARGS) { text *string = PG_GETARG_TEXT_P(0); + int encoding = GetDatabaseEncoding(); + unsigned char *data; if (VARSIZE(string) <= VARHDRSZ) PG_RETURN_INT32(0); - PG_RETURN_INT32((int32) *((unsigned char *) VARDATA(string))); + data = (unsigned char *) VARDATA(string); + + if (encoding == PG_UTF8 && *data > 127) + { + /* return the code point for Unicode */ + + int result = 0, tbytes = 0, i; + + if (*data >= 0xF0) + { + result = *data & 0x07; + tbytes = 3; + } + else if (*data >= 0xE0) + { + result = *data & 0x0F; + tbytes = 2; + } + else + { + Assert (*data > 0xC0); + result = *data & 0x1f; + tbytes = 1; + } + + Assert (tbytes > 0); + + for (i = 1; i <= tbytes; i++) + { + Assert ((data[i] & 0xC0) == 0x80); + result = (result << 6) + (data[i] & 0x3f); + } + + PG_RETURN_INT32(result); + } + else + { + if (pg_encoding_max_length(encoding) > 1 && *data > 127) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("requested character too large"))); + + + PG_RETURN_INT32((int32) *data); + } } /******************************************************************** @@ -1270,19 +1323,96 @@ ascii(PG_FUNCTION_ARGS) * * Purpose: * - * Returns the character having the binary equivalent to val + * Returns the character having the binary equivalent to val. + * + * For UTF8 we treat the argumwent as a Unicode code point. + * For other multi-byte encodings we raise an error for arguments + * outside the strict ASCII range (1..127). + * + * It's important that we don't ever return a value that is not valid + * in the database encoding, so that this doesn't become a way for + * invalid data to enter the database. * ********************************************************************/ Datum chr(PG_FUNCTION_ARGS) { - int32 cvalue = PG_GETARG_INT32(0); + uint32 cvalue = PG_GETARG_UINT32(0); text *result; + int encoding = GetDatabaseEncoding(); + + if (encoding == PG_UTF8 && cvalue > 127) + { + /* for Unicode we treat the argument as a code point */ + int bytes ; + char *wch; - result = (text *) palloc(VARHDRSZ + 1); - SET_VARSIZE(result, VARHDRSZ + 1); - *VARDATA(result) = (char) cvalue; + /* We only allow valid Unicode code points */ + if (cvalue > 0x001fffff) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("requested character too large for encoding: %d", + cvalue))); + + if (cvalue > 0xffff) + bytes = 4; + else if (cvalue > 0x07ff) + bytes = 3; + else + bytes = 2; + + result = (text *) palloc(VARHDRSZ + bytes); + SET_VARSIZE(result, VARHDRSZ + bytes); + wch = VARDATA(result); + + if (bytes == 2) + { + wch[0] = 0xC0 | ((cvalue >> 6) & 0x1F); + wch[1] = 0x80 | (cvalue & 0x3F);; + } + else if (bytes == 3) + { + wch[0] = 0xE0 | ((cvalue >> 12) & 0x0F); + wch[1] = 0x80 | ((cvalue >> 6) & 0x3F); + wch[2] = 0x80 | (cvalue & 0x3F); + } + else + { + wch[0] = 0xF0 | ((cvalue >> 18) & 0x07); + wch[1] = 0x80 | ((cvalue >> 12) & 0x3F); + wch[2] = 0x80 | ((cvalue >> 6) & 0x3F); + wch[3] = 0x80 | (cvalue & 0x3F); + } + + } + + else + { + bool is_mb; + + /* Error out on arguments that make no sense or that we + * can't validly represent in the encoding. + */ + + if (cvalue == 0) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("null character not permitted"))); + + is_mb = pg_encoding_max_length(encoding) > 1; + + if ((is_mb && (cvalue > 255)) || (! is_mb && (cvalue > 127))) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("requested character too large for encoding: %d", + cvalue))); + + + result = (text *) palloc(VARHDRSZ + 1); + SET_VARSIZE(result, VARHDRSZ + 1); + *VARDATA(result) = (char) cvalue; + } PG_RETURN_TEXT_P(result); } |