aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/jsonpath_scan.l
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt/jsonpath_scan.l')
-rw-r--r--src/backend/utils/adt/jsonpath_scan.l45
1 files changed, 9 insertions, 36 deletions
diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l
index 70681b789d3..be0a2cfa2f7 100644
--- a/src/backend/utils/adt/jsonpath_scan.l
+++ b/src/backend/utils/adt/jsonpath_scan.l
@@ -486,13 +486,6 @@ hexval(char c)
static void
addUnicodeChar(int ch)
{
- /*
- * For UTF8, replace the escape sequence by the actual
- * utf8 character in lex->strval. Do this also for other
- * encodings if the escape designates an ASCII character,
- * otherwise raise an error.
- */
-
if (ch == 0)
{
/* We can't allow this, since our TEXT type doesn't */
@@ -501,40 +494,20 @@ addUnicodeChar(int ch)
errmsg("unsupported Unicode escape sequence"),
errdetail("\\u0000 cannot be converted to text.")));
}
- else if (GetDatabaseEncoding() == PG_UTF8)
- {
- char utf8str[5];
- int utf8len;
-
- unicode_to_utf8(ch, (unsigned char *) utf8str);
- utf8len = pg_utf_mblen((unsigned char *) utf8str);
- addstring(false, utf8str, utf8len);
- }
- else if (ch <= 0x007f)
- {
- /*
- * This is the only way to designate things like a
- * form feed character in JSON, so it's useful in all
- * encodings.
- */
- addchar(false, (char) ch);
- }
else
{
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
- errmsg("invalid input syntax for type %s", "jsonpath"),
- errdetail("Unicode escape values cannot be used for code "
- "point values above 007F when the server encoding "
- "is not UTF8.")));
+ char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
+
+ pg_unicode_to_server(ch, (unsigned char *) cbuf);
+ addstring(false, cbuf, strlen(cbuf));
}
}
-/* Add unicode character and process its hi surrogate */
+/* Add unicode character, processing any surrogate pairs */
static void
addUnicode(int ch, int *hi_surrogate)
{
- if (ch >= 0xd800 && ch <= 0xdbff)
+ if (is_utf16_surrogate_first(ch))
{
if (*hi_surrogate != -1)
ereport(ERROR,
@@ -542,10 +515,10 @@ addUnicode(int ch, int *hi_surrogate)
errmsg("invalid input syntax for type %s", "jsonpath"),
errdetail("Unicode high surrogate must not follow "
"a high surrogate.")));
- *hi_surrogate = (ch & 0x3ff) << 10;
+ *hi_surrogate = ch;
return;
}
- else if (ch >= 0xdc00 && ch <= 0xdfff)
+ else if (is_utf16_surrogate_second(ch))
{
if (*hi_surrogate == -1)
ereport(ERROR,
@@ -553,7 +526,7 @@ addUnicode(int ch, int *hi_surrogate)
errmsg("invalid input syntax for type %s", "jsonpath"),
errdetail("Unicode low surrogate must follow a high "
"surrogate.")));
- ch = 0x10000 + *hi_surrogate + (ch & 0x3ff);
+ ch = surrogate_pair_to_codepoint(*hi_surrogate, ch);
*hi_surrogate = -1;
}
else if (*hi_surrogate != -1)