aboutsummaryrefslogtreecommitdiff
path: root/src/common/jsonapi.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/common/jsonapi.c')
-rw-r--r--src/common/jsonapi.c58
1 files changed, 38 insertions, 20 deletions
diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c
index f08a03c1690..7df231c3851 100644
--- a/src/common/jsonapi.c
+++ b/src/common/jsonapi.c
@@ -744,21 +744,21 @@ json_lex_string(JsonLexContext *lex)
}
if (lex->strval != NULL)
{
- char utf8str[5];
- int utf8len;
-
- if (ch >= 0xd800 && ch <= 0xdbff)
+ /*
+ * Combine surrogate pairs.
+ */
+ if (is_utf16_surrogate_first(ch))
{
if (hi_surrogate != -1)
return JSON_UNICODE_HIGH_SURROGATE;
- hi_surrogate = (ch & 0x3ff) << 10;
+ hi_surrogate = ch;
continue;
}
- else if (ch >= 0xdc00 && ch <= 0xdfff)
+ else if (is_utf16_surrogate_second(ch))
{
if (hi_surrogate == -1)
return JSON_UNICODE_LOW_SURROGATE;
- ch = 0x10000 + hi_surrogate + (ch & 0x3ff);
+ ch = surrogate_pair_to_codepoint(hi_surrogate, ch);
hi_surrogate = -1;
}
@@ -766,35 +766,52 @@ json_lex_string(JsonLexContext *lex)
return JSON_UNICODE_LOW_SURROGATE;
/*
- * For UTF8, replace the escape sequence by the actual
- * utf8 character in lex->strval. Do this also for other
- * encodings if the escape designates an ASCII character,
- * otherwise raise an error.
+ * Reject invalid cases. We can't have a value above
+ * 0xFFFF here (since we only accepted 4 hex digits
+ * above), so no need to test for out-of-range chars.
*/
-
if (ch == 0)
{
/* We can't allow this, since our TEXT type doesn't */
return JSON_UNICODE_CODE_POINT_ZERO;
}
- else if (lex->input_encoding == PG_UTF8)
+
+ /*
+ * Add the represented character to lex->strval. In the
+ * backend, we can let pg_unicode_to_server() handle any
+ * required character set conversion; in frontend, we can
+ * only deal with trivial conversions.
+ *
+ * Note: pg_unicode_to_server() will throw an error for a
+ * conversion failure, rather than returning a failure
+ * indication. That seems OK.
+ */
+#ifndef FRONTEND
+ {
+ char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
+
+ pg_unicode_to_server(ch, (unsigned char *) cbuf);
+ appendStringInfoString(lex->strval, cbuf);
+ }
+#else
+ if (lex->input_encoding == PG_UTF8)
{
+ /* OK, we can map the code point to UTF8 easily */
+ char utf8str[5];
+ int utf8len;
+
unicode_to_utf8(ch, (unsigned char *) utf8str);
utf8len = pg_utf_mblen((unsigned char *) utf8str);
appendBinaryStringInfo(lex->strval, utf8str, utf8len);
}
else if (ch <= 0x007f)
{
- /*
- * This is the only way to designate things like a
- * form feed character in JSON, so it's useful in all
- * encodings.
- */
+ /* The ASCII range is the same in all encodings */
appendStringInfoChar(lex->strval, (char) ch);
}
else
return JSON_UNICODE_HIGH_ESCAPE;
-
+#endif /* FRONTEND */
}
}
else if (lex->strval != NULL)
@@ -1083,7 +1100,8 @@ json_errdetail(JsonParseErrorType error, JsonLexContext *lex)
case JSON_UNICODE_ESCAPE_FORMAT:
return _("\"\\u\" must be followed by four hexadecimal digits.");
case JSON_UNICODE_HIGH_ESCAPE:
- return _("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.");
+ /* note: this case is only reachable in frontend not backend */
+ return _("Unicode escape values cannot be used for code point values above 007F when the encoding is not UTF8.");
case JSON_UNICODE_HIGH_SURROGATE:
return _("Unicode high surrogate must not follow a high surrogate.");
case JSON_UNICODE_LOW_SURROGATE: