aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils
diff options
context:
space:
mode:
authorPeter Eisentraut <peter_e@gmx.net>2008-10-29 08:04:54 +0000
committerPeter Eisentraut <peter_e@gmx.net>2008-10-29 08:04:54 +0000
commit06735e32566ca2250afdc371b8b2521ee07ad922 (patch)
tree0ce96005b5ea63fcb9295f21a595dd376ed3b74d /src/backend/utils
parent05bba3d176e0adc1a032d5c8c6ea2a7622c7dd0d (diff)
downloadpostgresql-06735e32566ca2250afdc371b8b2521ee07ad922.tar.gz
postgresql-06735e32566ca2250afdc371b8b2521ee07ad922.zip
Unicode escapes in strings and identifiers
Diffstat (limited to 'src/backend/utils')
-rw-r--r--src/backend/utils/adt/xml.c25
-rw-r--r--src/backend/utils/mb/wchar.c37
2 files changed, 38 insertions, 24 deletions
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index e728e1254f5..c346299caa8 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.79 2008/10/14 17:12:33 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.80 2008/10/29 08:04:53 petere Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1497,28 +1497,7 @@ unicode_to_sqlchar(pg_wchar c)
{
static unsigned char utf8string[5]; /* need trailing zero */
- if (c <= 0x7F)
- {
- utf8string[0] = c;
- }
- else if (c <= 0x7FF)
- {
- utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
- utf8string[1] = 0x80 | (c & 0x3F);
- }
- else if (c <= 0xFFFF)
- {
- utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
- utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
- utf8string[2] = 0x80 | (c & 0x3F);
- }
- else
- {
- utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
- utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
- utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
- utf8string[3] = 0x80 | (c & 0x3F);
- }
+ unicode_to_utf8(c, utf8string);
return (char *) pg_do_encoding_conversion(utf8string,
pg_mblen((char *) utf8string),
diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c
index 2f11b3aa9b0..2c6c3f3ff1c 100644
--- a/src/backend/utils/mb/wchar.c
+++ b/src/backend/utils/mb/wchar.c
@@ -1,7 +1,7 @@
/*
* conversion functions between pg_wchar and multibyte streams.
* Tatsuo Ishii
- * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.67 2008/10/27 19:37:22 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.68 2008/10/29 08:04:53 petere Exp $
*
*/
/* can be used in either frontend or backend */
@@ -419,6 +419,41 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
return cnt;
}
+
+/*
+ * Map a Unicode code point to UTF-8. utf8string must have 4 bytes of
+ * space allocated.
+ */
+unsigned char *
+unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
+{
+ if (c <= 0x7F)
+ {
+ utf8string[0] = c;
+ }
+ else if (c <= 0x7FF)
+ {
+ utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
+ utf8string[1] = 0x80 | (c & 0x3F);
+ }
+ else if (c <= 0xFFFF)
+ {
+ utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
+ utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
+ utf8string[2] = 0x80 | (c & 0x3F);
+ }
+ else
+ {
+ utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
+ utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
+ utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
+ utf8string[3] = 0x80 | (c & 0x3F);
+ }
+
+ return utf8string;
+}
+
+
/*
* Return the byte length of a UTF8 character pointed to by s
*