aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/oracle_compat.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt/oracle_compat.c')
-rw-r--r--src/backend/utils/adt/oracle_compat.c144
1 files changed, 137 insertions, 7 deletions
diff --git a/src/backend/utils/adt/oracle_compat.c b/src/backend/utils/adt/oracle_compat.c
index 9fcd5ae747a..d62315d0f61 100644
--- a/src/backend/utils/adt/oracle_compat.c
+++ b/src/backend/utils/adt/oracle_compat.c
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.70 2007/02/27 23:48:08 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.71 2007/09/18 17:41:17 adunstan Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1246,6 +1246,13 @@ translate(PG_FUNCTION_ARGS)
*
* Returns the decimal representation of the first character from
* string.
+ * If the string is empty we return 0.
+ * If the database encoding is UTF8, we return the Unicode codepoint.
+ * If the database encoding is any other multi-byte encoding, we
+ * return the value of the first byte if it is an ASCII character
+ * (range 1 .. 127), or raise an error.
+ * For all other encodings we return the value of the first byte,
+ * (range 1..255).
*
********************************************************************/
@@ -1253,11 +1260,57 @@ Datum
ascii(PG_FUNCTION_ARGS)
{
text *string = PG_GETARG_TEXT_P(0);
+ int encoding = GetDatabaseEncoding();
+ unsigned char *data;
if (VARSIZE(string) <= VARHDRSZ)
PG_RETURN_INT32(0);
- PG_RETURN_INT32((int32) *((unsigned char *) VARDATA(string)));
+ data = (unsigned char *) VARDATA(string);
+
+ if (encoding == PG_UTF8 && *data > 127)
+ {
+ /* return the code point for Unicode */
+
+ int result = 0, tbytes = 0, i;
+
+ if (*data >= 0xF0)
+ {
+ result = *data & 0x07;
+ tbytes = 3;
+ }
+ else if (*data >= 0xE0)
+ {
+ result = *data & 0x0F;
+ tbytes = 2;
+ }
+ else
+ {
+ Assert (*data > 0xC0);
+ result = *data & 0x1f;
+ tbytes = 1;
+ }
+
+ Assert (tbytes > 0);
+
+ for (i = 1; i <= tbytes; i++)
+ {
+ Assert ((data[i] & 0xC0) == 0x80);
+ result = (result << 6) + (data[i] & 0x3f);
+ }
+
+ PG_RETURN_INT32(result);
+ }
+ else
+ {
+ if (pg_encoding_max_length(encoding) > 1 && *data > 127)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("requested character too large")));
+
+
+ PG_RETURN_INT32((int32) *data);
+ }
}
/********************************************************************
@@ -1270,19 +1323,96 @@ ascii(PG_FUNCTION_ARGS)
*
* Purpose:
*
- * Returns the character having the binary equivalent to val
+ * Returns the character having the binary equivalent to val.
+ *
+ * For UTF8 we treat the argumwent as a Unicode code point.
+ * For other multi-byte encodings we raise an error for arguments
+ * outside the strict ASCII range (1..127).
+ *
+ * It's important that we don't ever return a value that is not valid
+ * in the database encoding, so that this doesn't become a way for
+ * invalid data to enter the database.
*
********************************************************************/
Datum
chr(PG_FUNCTION_ARGS)
{
- int32 cvalue = PG_GETARG_INT32(0);
+ uint32 cvalue = PG_GETARG_UINT32(0);
text *result;
+ int encoding = GetDatabaseEncoding();
+
+ if (encoding == PG_UTF8 && cvalue > 127)
+ {
+ /* for Unicode we treat the argument as a code point */
+ int bytes ;
+ char *wch;
- result = (text *) palloc(VARHDRSZ + 1);
- SET_VARSIZE(result, VARHDRSZ + 1);
- *VARDATA(result) = (char) cvalue;
+ /* We only allow valid Unicode code points */
+ if (cvalue > 0x001fffff)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("requested character too large for encoding: %d",
+ cvalue)));
+
+ if (cvalue > 0xffff)
+ bytes = 4;
+ else if (cvalue > 0x07ff)
+ bytes = 3;
+ else
+ bytes = 2;
+
+ result = (text *) palloc(VARHDRSZ + bytes);
+ SET_VARSIZE(result, VARHDRSZ + bytes);
+ wch = VARDATA(result);
+
+ if (bytes == 2)
+ {
+ wch[0] = 0xC0 | ((cvalue >> 6) & 0x1F);
+ wch[1] = 0x80 | (cvalue & 0x3F);;
+ }
+ else if (bytes == 3)
+ {
+ wch[0] = 0xE0 | ((cvalue >> 12) & 0x0F);
+ wch[1] = 0x80 | ((cvalue >> 6) & 0x3F);
+ wch[2] = 0x80 | (cvalue & 0x3F);
+ }
+ else
+ {
+ wch[0] = 0xF0 | ((cvalue >> 18) & 0x07);
+ wch[1] = 0x80 | ((cvalue >> 12) & 0x3F);
+ wch[2] = 0x80 | ((cvalue >> 6) & 0x3F);
+ wch[3] = 0x80 | (cvalue & 0x3F);
+ }
+
+ }
+
+ else
+ {
+ bool is_mb;
+
+ /* Error out on arguments that make no sense or that we
+ * can't validly represent in the encoding.
+ */
+
+ if (cvalue == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("null character not permitted")));
+
+ is_mb = pg_encoding_max_length(encoding) > 1;
+
+ if ((is_mb && (cvalue > 255)) || (! is_mb && (cvalue > 127)))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("requested character too large for encoding: %d",
+ cvalue)));
+
+
+ result = (text *) palloc(VARHDRSZ + 1);
+ SET_VARSIZE(result, VARHDRSZ + 1);
+ *VARDATA(result) = (char) cvalue;
+ }
PG_RETURN_TEXT_P(result);
}