improved compatibility of std.parseExtJSON() with JSON5

author: Fabrice Bellard <fabrice@bellard.org> 2025-05-24 10:16:10 +0200
committer: Fabrice Bellard <fabrice@bellard.org> 2025-05-24 10:16:10 +0200
commit: 1dfaa616801a8f559eb7abf232f008a27ff5958a (patch)
tree: 44780bf9c77384a05404c7b0728466d8486a8e42
parent: 8b2a1249a46e85271aa7da1738875eb1cbec06d0 (diff)
download: quickjs-1dfaa616801a8f559eb7abf232f008a27ff5958a.tar.gz
quickjs-1dfaa616801a8f559eb7abf232f008a27ff5958a.zip
3 files changed, 64 insertions, 16 deletions
diff --git a/doc/quickjs.texi b/doc/quickjs.texi
index eef00b7..9130b47 100644
--- a/doc/quickjs.texi
+++ b/doc/quickjs.texi
@@ -449,17 +449,20 @@ optional properties:
 
 @item parseExtJSON(str)
 
-  Parse @code{str} using a superset of @code{JSON.parse}. The
-  following extensions are accepted:
+  Parse @code{str} using a superset of @code{JSON.parse}. The superset
+  is very close to the JSON5 specification. The following extensions
+  are accepted:
 
   @itemize
   @item Single line and multiline comments
   @item unquoted properties (ASCII-only Javascript identifiers)
   @item trailing comma in array and object definitions
   @item single quoted strings
+  @item @code{\v} escape and multi-line strings with trailing @code{\}
   @item @code{\f} and @code{\v} are accepted as space characters
-  @item leading plus in numbers
-  @item octal (@code{0o} prefix) and hexadecimal (@code{0x} prefix) numbers
+  @item leading plus or decimal point in numbers
+  @item hexadecimal (@code{0x} prefix), octal (@code{0o} prefix) and binary (@code{0b} prefix) integers
+  @item @code{NaN} and @code{Infinity} are accepted as numbers
   @end itemize
 @end table
 
diff --git a/quickjs.c b/quickjs.c
index 2c545d9..e56adb8 100644
--- a/quickjs.c
+++ b/quickjs.c
@@ -21824,6 +21824,7 @@ static __exception int next_token(JSParseState *s)
 }
 
 /* 'c' is the first character. Return JS_ATOM_NULL in case of error */
+/* XXX: accept unicode identifiers as JSON5 ? */
 static JSAtom json_parse_ident(JSParseState *s, const uint8_t **pp, int c)
 {
     const uint8_t *p;
@@ -21899,11 +21900,22 @@ static int json_parse_string(JSParseState *s, const uint8_t **pp, int sep)
                     c = (c << 4) | h;
                 }
                 break;
+            case '\n':
+                if (s->ext_json)
+                    continue;
+                goto bad_escape;
+            case 'v':
+                if (s->ext_json) {
+                    c = '\v';
+                    break;
+                }
+                goto bad_escape;
             default:
                 if (c == sep)
                     break;
                 if (p > s->buf_end)
                     goto end_of_input;
+            bad_escape:
                 js_parse_error_pos(s, p - 1, "Bad escaped character");
                 goto fail;
             }
@@ -21943,8 +21955,23 @@ static int json_parse_number(JSParseState *s, const uint8_t **pp)
     if (*p == '+' || *p == '-')
         p++;
 
-    if (!is_digit(*p))
-        return js_parse_error_pos(s, p, "Unexpected token '%c'", *p_start);
+    if (!is_digit(*p)) {
+        if (s->ext_json) {
+            if (strstart((const char *)p, "Infinity", (const char **)&p)) {
+                d = 1.0 / 0.0;
+                if (*p_start == '-')
+                    d = -d;
+                goto done;
+            } else if (strstart((const char *)p, "NaN", (const char **)&p)) {
+                d = NAN;
+                goto done;
+            } else if (*p != '.') {
+                goto unexpected_token;
+            }
+        } else {
+            goto unexpected_token;
+        }
+    }
 
     if (p[0] == '0') {
         if (s->ext_json) {
@@ -21962,8 +21989,10 @@ static int json_parse_number(JSParseState *s, const uint8_t **pp)
             }
             if (radix != 10) {
                 /* prefix is present */
-                if (to_digit(*p) >= radix)
+                if (to_digit(*p) >= radix) {
+                unexpected_token:
                     return js_parse_error_pos(s, p, "Unexpected token '%c'", *p);
+                }
                 d = js_atod((const char *)p_start, (const char **)&p, 0,
                             JS_ATOD_INT_ONLY | JS_ATOD_ACCEPT_BIN_OCT, &atod_mem);
                 goto done;
@@ -22122,7 +22151,6 @@ static __exception int json_next_token(JSParseState *s)
     case 'Y': case 'Z':
     case '_':
     case '$':
-        /* identifier : only pure ascii characters are accepted */
         p++;
         atom = json_parse_ident(s, &p, c);
         if (atom == JS_ATOM_NULL)
@@ -22133,17 +22161,16 @@ static __exception int json_next_token(JSParseState *s)
         s->token.val = TOK_IDENT;
         break;
     case '+':
-        if (!s->ext_json || !is_digit(p[1]))
+        if (!s->ext_json)
             goto def_token;
         goto parse_number;
-    case '0':
-        if (is_digit(p[1]))
+    case '.':
+        if (s->ext_json && is_digit(p[1]))
+            goto parse_number;
+        else
             goto def_token;
-        goto parse_number;
     case '-':
-        if (!is_digit(p[1]))
-            goto def_token;
-        goto parse_number;
+    case '0':
     case '1': case '2': case '3': case '4':
     case '5': case '6': case '7': case '8':
     case '9':
@@ -46187,6 +46214,12 @@ static JSValue json_parse_value(JSParseState *s)
             val = JS_NewBool(ctx, s->token.u.ident.atom == JS_ATOM_true);
         } else if (s->token.u.ident.atom == JS_ATOM_null) {
             val = JS_NULL;
+        } else if (s->token.u.ident.atom == JS_ATOM_NaN && s->ext_json) {
+            /* Note: json5 identifier handling is ambiguous e.g. is 
+               '{ NaN: 1 }' a valid JSON5 production ? */ 
+            val = JS_NewFloat64(s->ctx, NAN);
+        } else if (s->token.u.ident.atom == JS_ATOM_Infinity && s->ext_json) {
+            val = JS_NewFloat64(s->ctx, INFINITY);
         } else {
             goto def_token;
         }
diff --git a/tests/test_std.js b/tests/test_std.js
index 0bbd9e7..3debe40 100644
--- a/tests/test_std.js
+++ b/tests/test_std.js
@@ -129,15 +129,27 @@ function test_popen()
 function test_ext_json()
 {
     var expected, input, obj;
-    expected = '{"x":false,"y":true,"z2":null,"a":[1,8,160],"s":"str"}';
+    expected = '{"x":false,"y":true,"z2":null,"a":[1,8,160],"b":"abc\\u000bd","s":"str"}';
     input = `{ "x":false, /*comments are allowed */
                "y":true,  // also a comment
                z2:null, // unquoted property names
                "a":[+1,0o10,0xa0,], // plus prefix, octal, hexadecimal
+               "b": "ab\
+c\\vd", // multi-line strings, '\v' escape
                "s":'str',} // trailing comma in objects and arrays, single quoted string
             `;
     obj = std.parseExtJSON(input);
     assert(JSON.stringify(obj), expected);
+
+    obj = std.parseExtJSON('[Infinity, +Infinity, -Infinity, NaN, +NaN, -NaN, .1, -.2]');
+    assert(obj[0], Infinity);
+    assert(obj[1], Infinity);
+    assert(obj[2], -Infinity);
+    assert(obj[3], NaN);
+    assert(obj[4], NaN);
+    assert(obj[5], NaN);
+    assert(obj[6], 0.1);
+    assert(obj[7], -0.2);
 }
 
 function test_os()
author	Fabrice Bellard <fabrice@bellard.org>	2025-05-24 10:16:10 +0200
committer	Fabrice Bellard <fabrice@bellard.org>	2025-05-24 10:16:10 +0200
commit	1dfaa616801a8f559eb7abf232f008a27ff5958a (patch)
tree	44780bf9c77384a05404c7b0728466d8486a8e42
parent	8b2a1249a46e85271aa7da1738875eb1cbec06d0 (diff)
download	quickjs-1dfaa616801a8f559eb7abf232f008a27ff5958a.tar.gz quickjs-1dfaa616801a8f559eb7abf232f008a27ff5958a.zip