]> git.kaiwu.me - njs.git/commitdiff
Improved processing of invalid surrogate pairs in JSON strings.
authorAlexander Borisov <alexander.borisov@nginx.com>
Tue, 28 May 2019 17:51:24 +0000 (20:51 +0300)
committerAlexander Borisov <alexander.borisov@nginx.com>
Tue, 28 May 2019 17:51:24 +0000 (20:51 +0300)
Previously, an exception was thrown on invalid surrogate pairs.
Now, all such pairs are converted to replacement character.

njs/njs_json.c
njs/test/njs_unit_test.c

index aceffbd735519111faaba4c561a33b5303c28548..216c092311695e31039d5cc6de20cb1512363de3 100644 (file)
@@ -806,8 +806,8 @@ njs_json_parse_string(njs_json_parse_ctx_t *ctx, njs_value_t *value,
                 /* Surrogate pair. */
 
                 if (utf > 0xdbff || p[0] != '\\' || p[1] != 'u') {
-                    njs_json_parse_exception(ctx, "Invalid Unicode char", p);
-                    return NULL;
+                    s = nxt_utf8_encode(s, NXT_UTF8_REPLACEMENT);
+                    continue;
                 }
 
                 p += 2;
@@ -815,12 +815,17 @@ njs_json_parse_string(njs_json_parse_ctx_t *ctx, njs_value_t *value,
                 utf_low = njs_json_unicode(p);
                 p += 4;
 
-                if (nxt_slow_path(utf_low < 0xdc00 || utf_low > 0xdfff)) {
-                    njs_json_parse_exception(ctx, "Invalid surrogate pair", p);
-                    return NULL;
-                }
+                if (nxt_fast_path(utf_low >= 0xdc00 && utf_low <= 0xdfff)) {
+                    utf = njs_string_surrogate_pair(utf, utf_low);
 
-                utf = njs_string_surrogate_pair(utf, utf_low);
+                } else if (utf_low >= 0xd800 && utf_low <= 0xdbff) {
+                    utf = NXT_UTF8_REPLACEMENT;
+                    s = nxt_utf8_encode(s, NXT_UTF8_REPLACEMENT);
+
+                } else {
+                    utf = utf_low;
+                    s = nxt_utf8_encode(s, NXT_UTF8_REPLACEMENT);
+                }
             }
 
             s = nxt_utf8_encode(s, utf);
index 921e98a32efae861e0e6d8621906256f1c45adab..f68dcc1edcf7661dcd2a5b9c63c8769e07f1e33f 100644 (file)
@@ -11799,10 +11799,19 @@ static njs_unit_test_t  njs_test[] =
       nxt_string("SyntaxError: Unknown escape char at position 2") },
 
     { nxt_string("JSON.parse('\"\\\\uDC01\"')"),
-      nxt_string("SyntaxError: Invalid Unicode char at position 7") },
+      nxt_string("") },
 
     { nxt_string("JSON.parse('\"\\\\uD801\\\\uE000\"')"),
-      nxt_string("SyntaxError: Invalid surrogate pair at position 13") },
+      nxt_string("�") },
+
+    { nxt_string("JSON.parse('\"\\\\uD83D\"')"),
+      nxt_string("�") },
+
+    { nxt_string("JSON.parse('\"\\\\uD800\\\\uDB00\"')"),
+      nxt_string("��") },
+
+    { nxt_string("JSON.parse('\"\\\\ud800[\"')"),
+      nxt_string("�[") },
 
     { nxt_string("JSON.parse('{')"),
       nxt_string("SyntaxError: Unexpected end of input at position 1") },