From 65df72fd01dd98f8dfafc84e420bf7cd078fa994 Mon Sep 17 00:00:00 2001 From: Valentin Bartenev Date: Mon, 17 Aug 2020 19:55:46 +0300 Subject: [PATCH] Added support for numeric separators (ES12). --- src/njs_json.c | 2 +- src/njs_lexer.c | 10 ++-- src/njs_number.c | 42 +++++++++++----- src/njs_number.h | 6 ++- src/njs_parser.c | 4 +- src/njs_string.c | 6 +-- src/njs_strtod.c | 36 ++++++++++++-- src/njs_strtod.h | 3 +- src/test/njs_unit_test.c | 101 +++++++++++++++++++++++++++++++++++---- 9 files changed, 173 insertions(+), 37 deletions(-) diff --git a/src/njs_json.c b/src/njs_json.c index 07767f91..24a18e2a 100644 --- a/src/njs_json.c +++ b/src/njs_json.c @@ -796,7 +796,7 @@ njs_json_parse_number(njs_json_parse_ctx_t *ctx, njs_value_t *value, } start = p; - num = njs_number_dec_parse(&p, ctx->end); + num = njs_number_dec_parse(&p, ctx->end, 0); if (p != start) { njs_set_number(value, sign * num); return p; diff --git a/src/njs_lexer.c b/src/njs_lexer.c index 5f445ce7..4a040226 100644 --- a/src/njs_lexer.c +++ b/src/njs_lexer.c @@ -787,7 +787,7 @@ njs_lexer_number(njs_lexer_t *lexer, njs_lexer_token_t *token) goto illegal_token; } - token->number = njs_number_hex_parse(&p, lexer->end); + token->number = njs_number_hex_parse(&p, lexer->end, 1); goto done; } @@ -830,16 +830,20 @@ njs_lexer_number(njs_lexer_t *lexer, njs_lexer_token_t *token) /* Legacy Octal literals are deprecated. */ - if (*p >= '0' && *p <= '9') { + if ((*p >= '0' && *p <= '9') || *p == '_') { goto illegal_trailer; } } p--; - token->number = njs_number_dec_parse(&p, lexer->end); + token->number = njs_number_dec_parse(&p, lexer->end, 1); done: + if (p[-1] == '_') { + p--; + } + lexer->start = (u_char *) p; token->text.length = p - token->text.start; diff --git a/src/njs_number.c b/src/njs_number.c index 8acffe82..276b95b2 100644 --- a/src/njs_number.c +++ b/src/njs_number.c @@ -54,9 +54,10 @@ njs_key_to_index(const njs_value_t *value) double -njs_number_dec_parse(const u_char **start, const u_char *end) +njs_number_dec_parse(const u_char **start, const u_char *end, + njs_bool_t literal) { - return njs_strtod(start, end); + return njs_strtod(start, end, literal); } @@ -65,22 +66,27 @@ njs_number_oct_parse(const u_char **start, const u_char *end) { u_char c; uint64_t num; - const u_char *p; + const u_char *p, *_; p = *start; num = 0; + _ = p - 1; - while (p < end) { + for (; p < end; p++) { /* Values less than '0' become >= 208. */ c = *p - '0'; if (njs_slow_path(c > 7)) { + if (*p == '_' && (p - _) > 1) { + _ = p; + continue; + } + break; } num = num * 8 + c; - p++; } *start = p; @@ -94,22 +100,27 @@ njs_number_bin_parse(const u_char **start, const u_char *end) { u_char c; uint64_t num; - const u_char *p; + const u_char *p, *_; p = *start; num = 0; + _ = p - 1; - while (p < end) { + for (; p < end; p++) { /* Values less than '0' become >= 208. */ c = *p - '0'; if (njs_slow_path(c > 1)) { + if (*p == '_' && (p - _) > 1) { + _ = p; + continue; + } + break; } num = num * 2 + c; - p++; } *start = p; @@ -119,24 +130,31 @@ njs_number_bin_parse(const u_char **start, const u_char *end) uint64_t -njs_number_hex_parse(const u_char **start, const u_char *end) +njs_number_hex_parse(const u_char **start, const u_char *end, + njs_bool_t literal) { uint64_t num; njs_int_t n; - const u_char *p; + const u_char *p, *_; p = *start; num = 0; + _ = p - 1; - while (p < end) { + for (; p < end; p++) { n = njs_char_to_hex(*p); + if (njs_slow_path(n < 0)) { + if (literal && *p == '_' && (p - _) > 1) { + _ = p; + continue; + } + break; } num = num * 16 + n; - p++; } *start = p; diff --git a/src/njs_number.h b/src/njs_number.h index 6e8d8c42..77d85e33 100644 --- a/src/njs_number.h +++ b/src/njs_number.h @@ -12,10 +12,12 @@ double njs_key_to_index(const njs_value_t *value); -double njs_number_dec_parse(const u_char **start, const u_char *end); +double njs_number_dec_parse(const u_char **start, const u_char *end, + njs_bool_t literal); uint64_t njs_number_oct_parse(const u_char **start, const u_char *end); uint64_t njs_number_bin_parse(const u_char **start, const u_char *end); -uint64_t njs_number_hex_parse(const u_char **start, const u_char *end); +uint64_t njs_number_hex_parse(const u_char **start, const u_char *end, + njs_bool_t literal); int64_t njs_number_radix_parse(const u_char **start, const u_char *end, uint8_t radix); njs_int_t njs_number_to_string(njs_vm_t *vm, njs_value_t *string, diff --git a/src/njs_parser.c b/src/njs_parser.c index f93f3043..168ae985 100644 --- a/src/njs_parser.c +++ b/src/njs_parser.c @@ -8078,7 +8078,7 @@ njs_parser_escape_string_create(njs_parser_t *parser, njs_lexer_token_t *token, hex_end = src + hex_length; hex: - cp = njs_number_hex_parse(&src, hex_end); + cp = njs_number_hex_parse(&src, hex_end, 0); /* Skip '}' character. */ @@ -8219,7 +8219,7 @@ njs_parser_escape_string_calc_length(njs_parser_t *parser, hex: ptr = src; - cp = njs_number_hex_parse(&src, hex_end); + cp = njs_number_hex_parse(&src, hex_end, 0); if (hex_length != 0) { if (src != hex_end) { diff --git a/src/njs_string.c b/src/njs_string.c index 206d4884..bbd96224 100644 --- a/src/njs_string.c +++ b/src/njs_string.c @@ -3718,11 +3718,11 @@ njs_string_to_number(const njs_value_t *value, njs_bool_t parse_float) && p + 2 < end && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { p += 2; - num = njs_number_hex_parse(&p, end); + num = njs_number_hex_parse(&p, end, 0); } else { start = p; - num = njs_number_dec_parse(&p, end); + num = njs_number_dec_parse(&p, end, 0); if (p == start) { if (p + infinity > end || memcmp(p, "Infinity", infinity) != 0) { @@ -3821,7 +3821,7 @@ njs_string_to_index(const njs_value_t *value) } } - num = njs_strtod(&p, end); + num = njs_strtod(&p, end, 0); if (p != end) { return NAN; } diff --git a/src/njs_strtod.c b/src/njs_strtod.c index 1efd9162..d3e48c38 100644 --- a/src/njs_strtod.c +++ b/src/njs_strtod.c @@ -307,12 +307,12 @@ njs_strtod_internal(const u_char *start, size_t length, int exp) double -njs_strtod(const u_char **start, const u_char *end) +njs_strtod(const u_char **start, const u_char *end, njs_bool_t literal) { int exponent, exp, insignf; u_char c, *pos; njs_bool_t minus; - const u_char *e, *p, *last; + const u_char *e, *p, *last, *_; u_char data[128]; exponent = 0; @@ -321,11 +321,25 @@ njs_strtod(const u_char **start, const u_char *end) pos = data; last = data + sizeof(data); - for (p = *start; p < end; p++) { + p = *start; + _ = p - 2; + + for (; p < end; p++) { /* Values less than '0' become >= 208. */ c = *p - '0'; if (njs_slow_path(c > 9)) { + if (literal) { + if ((p - _) == 1) { + goto done; + } + + if (*p == '_') { + _ = p; + continue; + } + } + break; } @@ -339,12 +353,18 @@ njs_strtod(const u_char **start, const u_char *end) /* Do not emit a '.', but adjust the exponent instead. */ if (p < end && *p == '.') { + _ = p; for (p++; p < end; p++) { /* Values less than '0' become >= 208. */ c = *p - '0'; if (njs_slow_path(c > 9)) { + if (literal && *p == '_' && (p - _) > 1) { + _ = p; + continue; + } + break; } @@ -388,6 +408,11 @@ njs_strtod(const u_char **start, const u_char *end) c = *p - '0'; if (njs_slow_path(c > 9)) { + if (literal && *p == '_' && (p - _) > 1) { + _ = p; + continue; + } + break; } @@ -397,9 +422,14 @@ njs_strtod(const u_char **start, const u_char *end) } exponent += minus ? -exp : exp; + + } else if (literal && *e == '_') { + p = e; } } +done: + *start = p; exponent += insignf; diff --git a/src/njs_strtod.h b/src/njs_strtod.h index 42e3e652..581fdbb0 100644 --- a/src/njs_strtod.h +++ b/src/njs_strtod.h @@ -7,6 +7,7 @@ #ifndef _NJS_STRTOD_H_INCLUDED_ #define _NJS_STRTOD_H_INCLUDED_ -NJS_EXPORT double njs_strtod(const u_char **start, const u_char *end); +NJS_EXPORT double njs_strtod(const u_char **start, const u_char *end, + njs_bool_t literal); #endif /* _NJS_STRTOD_H_INCLUDED_ */ diff --git a/src/test/njs_unit_test.c b/src/test/njs_unit_test.c index 70e6b613..f191bf82 100644 --- a/src/test/njs_unit_test.c +++ b/src/test/njs_unit_test.c @@ -171,13 +171,13 @@ static njs_unit_test_t njs_test[] = { njs_str(".9"), njs_str("0.9") }, - { njs_str("-.01"), + { njs_str("-.0_1"), njs_str("-0.01") }, - { njs_str("0.000001"), + { njs_str("0.000_001"), njs_str("0.000001") }, - { njs_str("0.00000123456"), + { njs_str("0.00000_123456"), njs_str("0.00000123456") }, { njs_str("0.0000001"), @@ -186,10 +186,13 @@ static njs_unit_test_t njs_test[] = { njs_str("1.1000000"), njs_str("1.1") }, - { njs_str("99999999999999999999"), + { njs_str("1_0"), + njs_str("10") }, + + { njs_str("99_999_999_999_999_999_999"), njs_str("100000000000000000000") }, - { njs_str("99999999999999999999.111"), + { njs_str("9999999999999999999_9.1_1_1"), njs_str("100000000000000000000") }, { njs_str("999999999999999999999"), @@ -201,7 +204,7 @@ static njs_unit_test_t njs_test[] = { njs_str("18446744073709551616"), njs_str("18446744073709552000") }, - { njs_str("1.7976931348623157E+308"), + { njs_str("1.79769313_48623157E+3_0_8"), njs_str("1.7976931348623157e+308") }, { njs_str("+1"), @@ -213,6 +216,48 @@ static njs_unit_test_t njs_test[] = { njs_str("."), njs_str("SyntaxError: Unexpected token \".\" in 1") }, + { njs_str("0_1"), + njs_str("SyntaxError: Unexpected token \"0_\" in 1") }, + + { njs_str("1_"), + njs_str("SyntaxError: Unexpected token \"_\" in 1") }, + + { njs_str("1__0"), + njs_str("SyntaxError: Unexpected token \"__0\" in 1") }, + + { njs_str("._1"), + njs_str("SyntaxError: Unexpected token \".\" in 1") }, + + { njs_str(".1_"), + njs_str("SyntaxError: Unexpected token \"_\" in 1") }, + + { njs_str("1_.1"), + njs_str("SyntaxError: Unexpected token \"_\" in 1") }, + + { njs_str(".0__1"), + njs_str("SyntaxError: Unexpected token \"__1\" in 1") }, + + { njs_str("1e_1"), + njs_str("SyntaxError: Unexpected token \"_1\" in 1") }, + + { njs_str("1e-_1"), + njs_str("SyntaxError: Unexpected token \"_1\" in 1") }, + + { njs_str("1E1__0"), + njs_str("SyntaxError: Unexpected token \"__0\" in 1") }, + + { njs_str("1_e1"), + njs_str("SyntaxError: Unexpected token \"_e1\" in 1") }, + + { njs_str("1e1_"), + njs_str("SyntaxError: Unexpected token \"_\" in 1") }, + + { njs_str("-_1"), + njs_str("ReferenceError: \"_1\" is not defined in 1") }, + + { njs_str("_1"), + njs_str("ReferenceError: \"_1\" is not defined in 1") }, + /* Octal Numbers. */ { njs_str("0o0"), @@ -224,7 +269,7 @@ static njs_unit_test_t njs_test[] = { njs_str("0o011"), njs_str("9") }, - { njs_str("-0O777"), + { njs_str("-0O7_7_7"), njs_str("-511") }, { njs_str("0o"), @@ -233,6 +278,15 @@ static njs_unit_test_t njs_test[] = { njs_str("0O778"), njs_str("SyntaxError: Unexpected token \"0O778\" in 1") }, + { njs_str("0O_7"), + njs_str("SyntaxError: Unexpected token \"_7\" in 1") }, + + { njs_str("0o7_"), + njs_str("SyntaxError: Unexpected token \"_\" in 1") }, + + { njs_str("0o7__7"), + njs_str("SyntaxError: Unexpected token \"__7\" in 1") }, + /* Legacy Octal Numbers are deprecated. */ { njs_str("00"), @@ -247,6 +301,15 @@ static njs_unit_test_t njs_test[] = { njs_str("0011"), njs_str("SyntaxError: Unexpected token \"00\" in 1") }, + { njs_str("0_"), + njs_str("SyntaxError: Unexpected token \"0_\" in 1") }, + + { njs_str("0_1"), + njs_str("SyntaxError: Unexpected token \"0_\" in 1") }, + + { njs_str("00_1"), + njs_str("SyntaxError: Unexpected token \"00\" in 1") }, + /* Binary Numbers. */ { njs_str("0b0"), @@ -255,10 +318,10 @@ static njs_unit_test_t njs_test[] = { njs_str("0B10"), njs_str("2") }, - { njs_str("0b0101"), + { njs_str("0b0_1_0_1"), njs_str("5") }, - { njs_str("-0B11111111"), + { njs_str("-0B1111_1111"), njs_str("-255") }, { njs_str("0b"), @@ -267,6 +330,15 @@ static njs_unit_test_t njs_test[] = { njs_str("0B12"), njs_str("SyntaxError: Unexpected token \"0B12\" in 1") }, + { njs_str("0b_11"), + njs_str("SyntaxError: Unexpected token \"_11\" in 1") }, + + { njs_str("0B1__1"), + njs_str("SyntaxError: Unexpected token \"__1\" in 1") }, + + { njs_str("0b11_"), + njs_str("SyntaxError: Unexpected token \"_\" in 1") }, + /* Hex Numbers. */ { njs_str("0x0"), @@ -278,7 +350,7 @@ static njs_unit_test_t njs_test[] = { njs_str("0xffFF"), njs_str("65535") }, - { njs_str("0X0000BEEF"), + { njs_str("0X00_00_BE_EF"), njs_str("48879") }, { njs_str("0x"), @@ -290,6 +362,15 @@ static njs_unit_test_t njs_test[] = { njs_str("0x12g"), njs_str("SyntaxError: Unexpected token \"g\" in 1") }, + { njs_str("0X_ff"), + njs_str("SyntaxError: Unexpected token \"_ff\" in 1") }, + + { njs_str("0xff_"), + njs_str("SyntaxError: Unexpected token \"_\" in 1") }, + + { njs_str("0Xf__f"), + njs_str("SyntaxError: Unexpected token \"__f\" in 1") }, + { njs_str(""), njs_str("undefined") }, -- 2.47.3