summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFabrice Bellard <fabrice@bellard.org>2025-05-24 10:16:10 +0200
committerFabrice Bellard <fabrice@bellard.org>2025-05-24 10:16:10 +0200
commit1dfaa616801a8f559eb7abf232f008a27ff5958a (patch)
tree44780bf9c77384a05404c7b0728466d8486a8e42
parent8b2a1249a46e85271aa7da1738875eb1cbec06d0 (diff)
downloadquickjs-1dfaa616801a8f559eb7abf232f008a27ff5958a.tar.gz
quickjs-1dfaa616801a8f559eb7abf232f008a27ff5958a.zip
improved compatibility of std.parseExtJSON() with JSON5
-rw-r--r--doc/quickjs.texi11
-rw-r--r--quickjs.c55
-rw-r--r--tests/test_std.js14
3 files changed, 64 insertions, 16 deletions
diff --git a/doc/quickjs.texi b/doc/quickjs.texi
index eef00b7..9130b47 100644
--- a/doc/quickjs.texi
+++ b/doc/quickjs.texi
@@ -449,17 +449,20 @@ optional properties:
@item parseExtJSON(str)
- Parse @code{str} using a superset of @code{JSON.parse}. The
- following extensions are accepted:
+ Parse @code{str} using a superset of @code{JSON.parse}. The superset
+ is very close to the JSON5 specification. The following extensions
+ are accepted:
@itemize
@item Single line and multiline comments
@item unquoted properties (ASCII-only Javascript identifiers)
@item trailing comma in array and object definitions
@item single quoted strings
+ @item @code{\v} escape and multi-line strings with trailing @code{\}
@item @code{\f} and @code{\v} are accepted as space characters
- @item leading plus in numbers
- @item octal (@code{0o} prefix) and hexadecimal (@code{0x} prefix) numbers
+ @item leading plus or decimal point in numbers
+ @item hexadecimal (@code{0x} prefix), octal (@code{0o} prefix) and binary (@code{0b} prefix) integers
+ @item @code{NaN} and @code{Infinity} are accepted as numbers
@end itemize
@end table
diff --git a/quickjs.c b/quickjs.c
index 2c545d9..e56adb8 100644
--- a/quickjs.c
+++ b/quickjs.c
@@ -21824,6 +21824,7 @@ static __exception int next_token(JSParseState *s)
}
/* 'c' is the first character. Return JS_ATOM_NULL in case of error */
+/* XXX: accept unicode identifiers as JSON5 ? */
static JSAtom json_parse_ident(JSParseState *s, const uint8_t **pp, int c)
{
const uint8_t *p;
@@ -21899,11 +21900,22 @@ static int json_parse_string(JSParseState *s, const uint8_t **pp, int sep)
c = (c << 4) | h;
}
break;
+ case '\n':
+ if (s->ext_json)
+ continue;
+ goto bad_escape;
+ case 'v':
+ if (s->ext_json) {
+ c = '\v';
+ break;
+ }
+ goto bad_escape;
default:
if (c == sep)
break;
if (p > s->buf_end)
goto end_of_input;
+ bad_escape:
js_parse_error_pos(s, p - 1, "Bad escaped character");
goto fail;
}
@@ -21943,8 +21955,23 @@ static int json_parse_number(JSParseState *s, const uint8_t **pp)
if (*p == '+' || *p == '-')
p++;
- if (!is_digit(*p))
- return js_parse_error_pos(s, p, "Unexpected token '%c'", *p_start);
+ if (!is_digit(*p)) {
+ if (s->ext_json) {
+ if (strstart((const char *)p, "Infinity", (const char **)&p)) {
+ d = 1.0 / 0.0;
+ if (*p_start == '-')
+ d = -d;
+ goto done;
+ } else if (strstart((const char *)p, "NaN", (const char **)&p)) {
+ d = NAN;
+ goto done;
+ } else if (*p != '.') {
+ goto unexpected_token;
+ }
+ } else {
+ goto unexpected_token;
+ }
+ }
if (p[0] == '0') {
if (s->ext_json) {
@@ -21962,8 +21989,10 @@ static int json_parse_number(JSParseState *s, const uint8_t **pp)
}
if (radix != 10) {
/* prefix is present */
- if (to_digit(*p) >= radix)
+ if (to_digit(*p) >= radix) {
+ unexpected_token:
return js_parse_error_pos(s, p, "Unexpected token '%c'", *p);
+ }
d = js_atod((const char *)p_start, (const char **)&p, 0,
JS_ATOD_INT_ONLY | JS_ATOD_ACCEPT_BIN_OCT, &atod_mem);
goto done;
@@ -22122,7 +22151,6 @@ static __exception int json_next_token(JSParseState *s)
case 'Y': case 'Z':
case '_':
case '$':
- /* identifier : only pure ascii characters are accepted */
p++;
atom = json_parse_ident(s, &p, c);
if (atom == JS_ATOM_NULL)
@@ -22133,17 +22161,16 @@ static __exception int json_next_token(JSParseState *s)
s->token.val = TOK_IDENT;
break;
case '+':
- if (!s->ext_json || !is_digit(p[1]))
+ if (!s->ext_json)
goto def_token;
goto parse_number;
- case '0':
- if (is_digit(p[1]))
+ case '.':
+ if (s->ext_json && is_digit(p[1]))
+ goto parse_number;
+ else
goto def_token;
- goto parse_number;
case '-':
- if (!is_digit(p[1]))
- goto def_token;
- goto parse_number;
+ case '0':
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8':
case '9':
@@ -46187,6 +46214,12 @@ static JSValue json_parse_value(JSParseState *s)
val = JS_NewBool(ctx, s->token.u.ident.atom == JS_ATOM_true);
} else if (s->token.u.ident.atom == JS_ATOM_null) {
val = JS_NULL;
+ } else if (s->token.u.ident.atom == JS_ATOM_NaN && s->ext_json) {
+ /* Note: json5 identifier handling is ambiguous e.g. is
+ '{ NaN: 1 }' a valid JSON5 production ? */
+ val = JS_NewFloat64(s->ctx, NAN);
+ } else if (s->token.u.ident.atom == JS_ATOM_Infinity && s->ext_json) {
+ val = JS_NewFloat64(s->ctx, INFINITY);
} else {
goto def_token;
}
diff --git a/tests/test_std.js b/tests/test_std.js
index 0bbd9e7..3debe40 100644
--- a/tests/test_std.js
+++ b/tests/test_std.js
@@ -129,15 +129,27 @@ function test_popen()
function test_ext_json()
{
var expected, input, obj;
- expected = '{"x":false,"y":true,"z2":null,"a":[1,8,160],"s":"str"}';
+ expected = '{"x":false,"y":true,"z2":null,"a":[1,8,160],"b":"abc\\u000bd","s":"str"}';
input = `{ "x":false, /*comments are allowed */
"y":true, // also a comment
z2:null, // unquoted property names
"a":[+1,0o10,0xa0,], // plus prefix, octal, hexadecimal
+ "b": "ab\
+c\\vd", // multi-line strings, '\v' escape
"s":'str',} // trailing comma in objects and arrays, single quoted string
`;
obj = std.parseExtJSON(input);
assert(JSON.stringify(obj), expected);
+
+ obj = std.parseExtJSON('[Infinity, +Infinity, -Infinity, NaN, +NaN, -NaN, .1, -.2]');
+ assert(obj[0], Infinity);
+ assert(obj[1], Infinity);
+ assert(obj[2], -Infinity);
+ assert(obj[3], NaN);
+ assert(obj[4], NaN);
+ assert(obj[5], NaN);
+ assert(obj[6], 0.1);
+ assert(obj[7], -0.2);
}
function test_os()