From: Dmitry Volyntsev Date: Thu, 30 May 2019 17:05:14 +0000 (+0300) Subject: Escaping lone closing square brackets in RegExp() constructor. X-Git-Tag: 0.3.3~19 X-Git-Url: http://www.kaiwu.me/postgresql/commit/?a=commitdiff_plain;h=10c7ffb6d3cc8e9f6c0bb4bbc0fe730fe41ed1b2;p=njs.git Escaping lone closing square brackets in RegExp() constructor. This correctly fixes #157. As in 88263426432d this was done only for regexp literals. --- diff --git a/njs/njs_regexp.c b/njs/njs_regexp.c index 4ffd0ba8..03a3003a 100644 --- a/njs/njs_regexp.c +++ b/njs/njs_regexp.c @@ -206,56 +206,93 @@ njs_regexp_create(njs_vm_t *vm, njs_value_t *value, u_char *start, } +/* + * PCRE with PCRE_JAVASCRIPT_COMPAT flag rejects regexps with + * lone closing square brackets as invalid. Whereas according + * to ES6: 11.8.5 it is a valid regexp expression. + * + * Escaping it here as a workaround. + */ + nxt_inline njs_ret_t -njs_regexp_escape_bracket(njs_vm_t *vm, nxt_str_t *text, size_t count) +njs_regexp_escape(njs_vm_t *vm, nxt_str_t *text) { - size_t length, diff; - u_char *p, *dst, *start, *end; - - length = text->length + count; - - dst = nxt_mp_alloc(vm->mem_pool, length); - if (nxt_slow_path(dst == NULL)) { - njs_memory_error(vm); - return NJS_ERROR; - } + size_t brackets; + u_char *p, *dst, *start, *end; + nxt_bool_t in; start = text->start; end = text->start + text->length; + in = 0; + brackets = 0; + for (p = start; p < end; p++) { switch (*p) { case '[': - while (++p < end && *p != ']') { - if (*p == '\\') { - p++; - } - } - + in = 1; break; case ']': - diff = p - start; - dst = nxt_cpymem(dst, start, diff); - dst = nxt_cpymem(dst, "\\]", 2); + if (!in) { + brackets++; + } - start = p + 1; + in = 0; break; case '\\': p++; + } + } + + if (!brackets) { + return NXT_OK; + } + + text->length = text->length + brackets; + + text->start = nxt_mp_alloc(vm->mem_pool, text->length); + if (nxt_slow_path(text->start == NULL)) { + njs_memory_error(vm); + return NXT_ERROR; + } + + in = 0; + dst = text->start; + + for (p = start; p < end; p++) { + + switch (*p) { + case '[': + in = 1; break; + + case ']': + if (!in) { + *dst++ = '\\'; + } + + in = 0; + break; + + case '\\': + *dst++ = *p++; + + if (p == end) { + goto done; + } } + + *dst++ = *p; } - diff = p - start; - memcpy(dst, start, diff); +done: - text->start = dst - (length - diff); - text->length = length; + text->length = dst - text->start; - return NJS_OK; + return NXT_OK; } @@ -263,14 +300,11 @@ njs_token_t njs_regexp_literal(njs_vm_t *vm, njs_parser_t *parser, njs_value_t *value) { u_char *p; - size_t closing_brackets; nxt_str_t text; - njs_ret_t ret; njs_lexer_t *lexer; njs_regexp_flags_t flags; njs_regexp_pattern_t *pattern; - closing_brackets = 0; lexer = parser->lexer; for (p = lexer->start; p < lexer->end; p++) { @@ -298,10 +332,6 @@ njs_regexp_literal(njs_vm_t *vm, njs_parser_t *parser, njs_value_t *value) break; - case ']': - closing_brackets++; - break; - case '\\': if (++p < lexer->end && (*p == '\n' || *p == '\r')) { goto failed; @@ -327,28 +357,9 @@ njs_regexp_literal(njs_vm_t *vm, njs_parser_t *parser, njs_value_t *value) lexer->start = p; - if (closing_brackets != 0) { - /* - * PCRE with PCRE_JAVASCRIPT_COMPAT flag rejects regexps with - * lone closing square brackets as invalid. Whereas according - * to ES6: 11.8.5 it is a valid regexp expression. - * - * Escaping it here as a workaround. - */ - - ret = njs_regexp_escape_bracket(vm, &text, closing_brackets); - if (nxt_slow_path(ret != NXT_OK)) { - return NJS_TOKEN_ILLEGAL; - } - } - pattern = njs_regexp_pattern_create(vm, text.start, text.length, flags); - if (closing_brackets != 0) { - nxt_mp_free(vm->mem_pool, text.start); - } - if (nxt_slow_path(pattern == NULL)) { return NJS_TOKEN_ILLEGAL; } @@ -440,6 +451,7 @@ njs_regexp_pattern_create(njs_vm_t *vm, u_char *start, size_t length, int options, ret; u_char *p, *end; size_t size; + nxt_str_t text; nxt_uint_t n; nxt_regex_t *regex; njs_regexp_group_t *group; @@ -450,8 +462,16 @@ njs_regexp_pattern_create(njs_vm_t *vm, u_char *start, size_t length, size += ((flags & NJS_REGEXP_IGNORE_CASE) != 0); size += ((flags & NJS_REGEXP_MULTILINE) != 0); + text.start = start; + text.length = length; + + ret = njs_regexp_escape(vm, &text); + if (nxt_slow_path(ret != NXT_OK)) { + return NULL; + } + pattern = nxt_mp_zalloc(vm->mem_pool, sizeof(njs_regexp_pattern_t) + 1 - + length + size + 1); + + text.length + size + 1); if (nxt_slow_path(pattern == NULL)) { njs_memory_error(vm); return NULL; @@ -463,8 +483,8 @@ njs_regexp_pattern_create(njs_vm_t *vm, u_char *start, size_t length, pattern->source = p; *p++ = '/'; - p = memcpy(p, start, length); - p += length; + p = memcpy(p, text.start, text.length); + p += text.length; end = p; *p++ = '\0'; diff --git a/njs/test/njs_unit_test.c b/njs/test/njs_unit_test.c index 92aab421..204281cb 100644 --- a/njs/test/njs_unit_test.c +++ b/njs/test/njs_unit_test.c @@ -5540,9 +5540,21 @@ static njs_unit_test_t njs_test[] = { nxt_string("/]/"), nxt_string("/\\]/") }, + { nxt_string("RegExp(']')"), + nxt_string("/\\]/") }, + + { nxt_string("RegExp('[\\\\\\\\]]')"), + nxt_string("/[\\\\]\\]/") }, + + { nxt_string("/[\\\\]]/"), + nxt_string("/[\\\\]\\]/") }, + { nxt_string("/\\]/"), nxt_string("/\\]/") }, + { nxt_string("RegExp('\\]')"), + nxt_string("/\\]/") }, + { nxt_string("/ab]cd/"), nxt_string("/ab\\]cd/") }, @@ -7441,6 +7453,9 @@ static njs_unit_test_t njs_test[] = { nxt_string("new RegExp('[')"), nxt_string("SyntaxError: pcre_compile(\"[\") failed: missing terminating ] for character class") }, + { nxt_string("new RegExp('\\\\')"), + nxt_string("SyntaxError: pcre_compile(\"\\\") failed: \\ at end of pattern") }, + { nxt_string("[0].map(RegExp().toString)"), nxt_string("TypeError: \"this\" argument is not a regexp") },