aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/utils/adt/json.c52
-rw-r--r--src/test/regress/expected/json.out23
-rw-r--r--src/test/regress/sql/json.sql8
3 files changed, 83 insertions, 0 deletions
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c
index aaf99bddf27..d8046c5b54d 100644
--- a/src/backend/utils/adt/json.c
+++ b/src/backend/utils/adt/json.c
@@ -646,6 +646,7 @@ json_lex_string(JsonLexContext *lex)
{
char *s;
int len;
+ int hi_surrogate = -1;
if (lex->strval != NULL)
resetStringInfo(lex->strval);
@@ -718,6 +719,36 @@ json_lex_string(JsonLexContext *lex)
int utf8len;
char *converted;
+ if (ch >= 0xd800 && ch <= 0xdbff)
+ {
+ if (hi_surrogate != -1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type json"),
+ errdetail("high order surrogate must not follow a high order surrogate."),
+ report_json_context(lex)));
+ hi_surrogate = (ch & 0x3ff) << 10;
+ continue;
+ }
+ else if (ch >= 0xdc00 && ch <= 0xdfff)
+ {
+ if (hi_surrogate == -1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type json"),
+ errdetail("low order surrogate must follow a high order surrogate."),
+ report_json_context(lex)));
+ ch = 0x10000 + hi_surrogate + (ch & 0x3ff);
+ hi_surrogate = -1;
+ }
+
+ if (hi_surrogate != -1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type json"),
+ errdetail("low order surrogate must follow a high order surrogate."),
+ report_json_context(lex)));
+
unicode_to_utf8(ch, (unsigned char *) utf8str);
utf8len = pg_utf_mblen((unsigned char *) utf8str);
utf8str[utf8len] = '\0';
@@ -730,6 +761,13 @@ json_lex_string(JsonLexContext *lex)
}
else if (lex->strval != NULL)
{
+ if (hi_surrogate != -1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type json"),
+ errdetail("low order surrogate must follow a high order surrogate."),
+ report_json_context(lex)));
+
switch (*s)
{
case '"':
@@ -784,11 +822,25 @@ json_lex_string(JsonLexContext *lex)
}
else if (lex->strval != NULL)
{
+ if (hi_surrogate != -1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type json"),
+ errdetail("low order surrogate must follow a high order surrogate."),
+ report_json_context(lex)));
+
appendStringInfoChar(lex->strval, *s);
}
}
+ if (hi_surrogate != -1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type json"),
+ errdetail("low order surrogate must follow a high order surrogate."),
+ report_json_context(lex)));
+
/* Hooray, we found the end of the string! */
lex->prev_token_terminator = lex->token_terminator;
lex->token_terminator = s + 1;
diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out
index 1d7cf5ff2f3..293c7429627 100644
--- a/src/test/regress/expected/json.out
+++ b/src/test/regress/expected/json.out
@@ -920,3 +920,26 @@ select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":[100,200,3
ERROR: cannot call json_populate_recordset on a nested object
select * from json_populate_recordset(row('def',99,null)::jpop,'[{"c":[100,200,300],"x":43.2},{"a":{"z":true},"b":3,"c":"2012-01-20 10:42:53"}]') q;
ERROR: cannot call json_populate_recordset on a nested object
+-- handling of unicode surrogate pairs
+select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct;
+ correct
+----------------------------
+ "\ud83d\ude04\ud83d\udc36"
+(1 row)
+
+select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
+ERROR: invalid input syntax for type json
+DETAIL: high order surrogate must not follow a high order surrogate.
+CONTEXT: JSON data, line 1: { "a":...
+select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
+ERROR: invalid input syntax for type json
+DETAIL: low order surrogate must follow a high order surrogate.
+CONTEXT: JSON data, line 1: { "a":...
+select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
+ERROR: invalid input syntax for type json
+DETAIL: low order surrogate must follow a high order surrogate.
+CONTEXT: JSON data, line 1: { "a":...
+select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
+ERROR: invalid input syntax for type json
+DETAIL: low order surrogate must follow a high order surrogate.
+CONTEXT: JSON data, line 1: { "a":...
diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql
index 8a136d7a273..5b6bc36517e 100644
--- a/src/test/regress/sql/json.sql
+++ b/src/test/regress/sql/json.sql
@@ -296,3 +296,11 @@ select * from json_populate_recordset(null::jpop,'[{"a":"blurfl","x":43.2},{"b":
select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":"blurfl","x":43.2},{"b":3,"c":"2012-01-20 10:42:53"}]') q;
select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":[100,200,300],"x":43.2},{"a":{"z":true},"b":3,"c":"2012-01-20 10:42:53"}]') q;
select * from json_populate_recordset(row('def',99,null)::jpop,'[{"c":[100,200,300],"x":43.2},{"a":{"z":true},"b":3,"c":"2012-01-20 10:42:53"}]') q;
+
+-- handling of unicode surrogate pairs
+
+select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct;
+select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
+select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
+select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
+select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate