diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/utils/adt/json.c | 52 | ||||
-rw-r--r-- | src/test/regress/expected/json.out | 23 | ||||
-rw-r--r-- | src/test/regress/sql/json.sql | 8 |
3 files changed, 83 insertions, 0 deletions
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c index aaf99bddf27..d8046c5b54d 100644 --- a/src/backend/utils/adt/json.c +++ b/src/backend/utils/adt/json.c @@ -646,6 +646,7 @@ json_lex_string(JsonLexContext *lex) { char *s; int len; + int hi_surrogate = -1; if (lex->strval != NULL) resetStringInfo(lex->strval); @@ -718,6 +719,36 @@ json_lex_string(JsonLexContext *lex) int utf8len; char *converted; + if (ch >= 0xd800 && ch <= 0xdbff) + { + if (hi_surrogate != -1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("high order surrogate must not follow a high order surrogate."), + report_json_context(lex))); + hi_surrogate = (ch & 0x3ff) << 10; + continue; + } + else if (ch >= 0xdc00 && ch <= 0xdfff) + { + if (hi_surrogate == -1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("low order surrogate must follow a high order surrogate."), + report_json_context(lex))); + ch = 0x10000 + hi_surrogate + (ch & 0x3ff); + hi_surrogate = -1; + } + + if (hi_surrogate != -1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("low order surrogate must follow a high order surrogate."), + report_json_context(lex))); + unicode_to_utf8(ch, (unsigned char *) utf8str); utf8len = pg_utf_mblen((unsigned char *) utf8str); utf8str[utf8len] = '\0'; @@ -730,6 +761,13 @@ json_lex_string(JsonLexContext *lex) } else if (lex->strval != NULL) { + if (hi_surrogate != -1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("low order surrogate must follow a high order surrogate."), + report_json_context(lex))); + switch (*s) { case '"': @@ -784,11 +822,25 @@ json_lex_string(JsonLexContext *lex) } else if (lex->strval != NULL) { + if (hi_surrogate != -1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("low order surrogate must follow a high order surrogate."), + report_json_context(lex))); + appendStringInfoChar(lex->strval, *s); } } + if (hi_surrogate != -1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("low order surrogate must follow a high order surrogate."), + report_json_context(lex))); + /* Hooray, we found the end of the string! */ lex->prev_token_terminator = lex->token_terminator; lex->token_terminator = s + 1; diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out index 1d7cf5ff2f3..293c7429627 100644 --- a/src/test/regress/expected/json.out +++ b/src/test/regress/expected/json.out @@ -920,3 +920,26 @@ select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":[100,200,3 ERROR: cannot call json_populate_recordset on a nested object select * from json_populate_recordset(row('def',99,null)::jpop,'[{"c":[100,200,300],"x":43.2},{"a":{"z":true},"b":3,"c":"2012-01-20 10:42:53"}]') q; ERROR: cannot call json_populate_recordset on a nested object +-- handling of unicode surrogate pairs +select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct; + correct +---------------------------- + "\ud83d\ude04\ud83d\udc36" +(1 row) + +select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row +ERROR: invalid input syntax for type json +DETAIL: high order surrogate must not follow a high order surrogate. +CONTEXT: JSON data, line 1: { "a":... +select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order +ERROR: invalid input syntax for type json +DETAIL: low order surrogate must follow a high order surrogate. +CONTEXT: JSON data, line 1: { "a":... +select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate +ERROR: invalid input syntax for type json +DETAIL: low order surrogate must follow a high order surrogate. +CONTEXT: JSON data, line 1: { "a":... +select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate +ERROR: invalid input syntax for type json +DETAIL: low order surrogate must follow a high order surrogate. +CONTEXT: JSON data, line 1: { "a":... diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql index 8a136d7a273..5b6bc36517e 100644 --- a/src/test/regress/sql/json.sql +++ b/src/test/regress/sql/json.sql @@ -296,3 +296,11 @@ select * from json_populate_recordset(null::jpop,'[{"a":"blurfl","x":43.2},{"b": select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":"blurfl","x":43.2},{"b":3,"c":"2012-01-20 10:42:53"}]') q; select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":[100,200,300],"x":43.2},{"a":{"z":true},"b":3,"c":"2012-01-20 10:42:53"}]') q; select * from json_populate_recordset(row('def',99,null)::jpop,'[{"c":[100,200,300],"x":43.2},{"a":{"z":true},"b":3,"c":"2012-01-20 10:42:53"}]') q; + +-- handling of unicode surrogate pairs + +select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct; +select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row +select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order +select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate +select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate |