diff options
Diffstat (limited to 'src/common/jsonapi.c')
-rw-r--r-- | src/common/jsonapi.c | 58 |
1 files changed, 38 insertions, 20 deletions
diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c index f08a03c1690..7df231c3851 100644 --- a/src/common/jsonapi.c +++ b/src/common/jsonapi.c @@ -744,21 +744,21 @@ json_lex_string(JsonLexContext *lex) } if (lex->strval != NULL) { - char utf8str[5]; - int utf8len; - - if (ch >= 0xd800 && ch <= 0xdbff) + /* + * Combine surrogate pairs. + */ + if (is_utf16_surrogate_first(ch)) { if (hi_surrogate != -1) return JSON_UNICODE_HIGH_SURROGATE; - hi_surrogate = (ch & 0x3ff) << 10; + hi_surrogate = ch; continue; } - else if (ch >= 0xdc00 && ch <= 0xdfff) + else if (is_utf16_surrogate_second(ch)) { if (hi_surrogate == -1) return JSON_UNICODE_LOW_SURROGATE; - ch = 0x10000 + hi_surrogate + (ch & 0x3ff); + ch = surrogate_pair_to_codepoint(hi_surrogate, ch); hi_surrogate = -1; } @@ -766,35 +766,52 @@ json_lex_string(JsonLexContext *lex) return JSON_UNICODE_LOW_SURROGATE; /* - * For UTF8, replace the escape sequence by the actual - * utf8 character in lex->strval. Do this also for other - * encodings if the escape designates an ASCII character, - * otherwise raise an error. + * Reject invalid cases. We can't have a value above + * 0xFFFF here (since we only accepted 4 hex digits + * above), so no need to test for out-of-range chars. */ - if (ch == 0) { /* We can't allow this, since our TEXT type doesn't */ return JSON_UNICODE_CODE_POINT_ZERO; } - else if (lex->input_encoding == PG_UTF8) + + /* + * Add the represented character to lex->strval. In the + * backend, we can let pg_unicode_to_server() handle any + * required character set conversion; in frontend, we can + * only deal with trivial conversions. + * + * Note: pg_unicode_to_server() will throw an error for a + * conversion failure, rather than returning a failure + * indication. That seems OK. + */ +#ifndef FRONTEND + { + char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1]; + + pg_unicode_to_server(ch, (unsigned char *) cbuf); + appendStringInfoString(lex->strval, cbuf); + } +#else + if (lex->input_encoding == PG_UTF8) { + /* OK, we can map the code point to UTF8 easily */ + char utf8str[5]; + int utf8len; + unicode_to_utf8(ch, (unsigned char *) utf8str); utf8len = pg_utf_mblen((unsigned char *) utf8str); appendBinaryStringInfo(lex->strval, utf8str, utf8len); } else if (ch <= 0x007f) { - /* - * This is the only way to designate things like a - * form feed character in JSON, so it's useful in all - * encodings. - */ + /* The ASCII range is the same in all encodings */ appendStringInfoChar(lex->strval, (char) ch); } else return JSON_UNICODE_HIGH_ESCAPE; - +#endif /* FRONTEND */ } } else if (lex->strval != NULL) @@ -1083,7 +1100,8 @@ json_errdetail(JsonParseErrorType error, JsonLexContext *lex) case JSON_UNICODE_ESCAPE_FORMAT: return _("\"\\u\" must be followed by four hexadecimal digits."); case JSON_UNICODE_HIGH_ESCAPE: - return _("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8."); + /* note: this case is only reachable in frontend not backend */ + return _("Unicode escape values cannot be used for code point values above 007F when the encoding is not UTF8."); case JSON_UNICODE_HIGH_SURROGATE: return _("Unicode high surrogate must not follow a high surrogate."); case JSON_UNICODE_LOW_SURROGATE: |