From: Alexander Borisov Date: Fri, 17 May 2019 14:01:10 +0000 (+0300) Subject: Added escaping to lone closing square brackets in a regexp. X-Git-Tag: 0.3.2~3 X-Git-Url: http://www.kaiwu.me/postgresql/commit/?a=commitdiff_plain;h=0bcb290ae8e1d49b95986ae18a4a93bc84e700b8;p=njs.git Added escaping to lone closing square brackets in a regexp. PCRE with PCRE_JAVASCRIPT_COMPAT flag rejects regexps with lone closing square brackets as invalid. Whereas according to ES6: 11.8.5 it is a valid regexp expression. As a workaround, the solution is to escape those lone brackets. This closes #157 issue on GitHub. --- diff --git a/njs/njs_regexp.c b/njs/njs_regexp.c index 19c7ae75..72854efb 100644 --- a/njs/njs_regexp.c +++ b/njs/njs_regexp.c @@ -206,15 +206,71 @@ njs_regexp_create(njs_vm_t *vm, njs_value_t *value, u_char *start, } +nxt_inline njs_ret_t +njs_regexp_escape_bracket(njs_vm_t *vm, nxt_str_t *text, size_t count) +{ + size_t length, diff; + u_char *p, *dst, *start, *end; + + length = text->length + count; + + dst = nxt_mp_alloc(vm->mem_pool, length); + if (nxt_slow_path(dst == NULL)) { + njs_memory_error(vm); + return NJS_ERROR; + } + + start = text->start; + end = text->start + text->length; + + for (p = start; p < end; p++) { + + switch (*p) { + case '[': + while (++p < end && *p != ']') { + if (*p == '\\') { + p++; + } + } + + break; + + case ']': + diff = p - start; + dst = nxt_cpymem(dst, start, diff); + dst = nxt_cpymem(dst, "\\]", 2); + + start = p + 1; + break; + + case '\\': + p++; + break; + } + } + + diff = p - start; + memcpy(dst, start, diff); + + text->start = dst - (length - diff); + text->length = length; + + return NJS_OK; +} + + njs_token_t njs_regexp_literal(njs_vm_t *vm, njs_parser_t *parser, njs_value_t *value) { u_char *p; + size_t closing_brackets; nxt_str_t text; + njs_ret_t ret; njs_lexer_t *lexer; njs_regexp_flags_t flags; njs_regexp_pattern_t *pattern; + closing_brackets = 0; lexer = parser->lexer; for (p = lexer->start; p < lexer->end; p++) { @@ -242,6 +298,10 @@ njs_regexp_literal(njs_vm_t *vm, njs_parser_t *parser, njs_value_t *value) break; + case ']': + closing_brackets++; + break; + case '\\': if (++p < lexer->end && (*p == '\n' || *p == '\r')) { goto failed; @@ -267,8 +327,28 @@ njs_regexp_literal(njs_vm_t *vm, njs_parser_t *parser, njs_value_t *value) lexer->start = p; + if (closing_brackets != 0) { + /* + * PCRE with PCRE_JAVASCRIPT_COMPAT flag rejects regexps with + * lone closing square brackets as invalid. Whereas according + * to ES6: 11.8.5 it is a valid regexp expression. + * + * Escaping it here as a workaround. + */ + + ret = njs_regexp_escape_bracket(vm, &text, closing_brackets); + if (nxt_slow_path(ret != NXT_OK)) { + return NJS_TOKEN_ILLEGAL; + } + } + pattern = njs_regexp_pattern_create(vm, text.start, text.length, flags); + + if (closing_brackets != 0) { + nxt_mp_free(vm->mem_pool, text.start); + } + if (nxt_slow_path(pattern == NULL)) { return NJS_TOKEN_ILLEGAL; } diff --git a/njs/test/njs_unit_test.c b/njs/test/njs_unit_test.c index 72ef4ac8..41093c26 100644 --- a/njs/test/njs_unit_test.c +++ b/njs/test/njs_unit_test.c @@ -5496,6 +5496,36 @@ static njs_unit_test_t njs_test[] = { nxt_string("'12345'.replace(3, () => ({toString: () => 'aaaa'}))"), nxt_string("12aaaa45") }, + { nxt_string("/]/"), + nxt_string("/\\]/") }, + + { nxt_string("/\\]/"), + nxt_string("/\\]/") }, + + { nxt_string("/ab]cd/"), + nxt_string("/ab\\]cd/") }, + + { nxt_string("/ab]/"), + nxt_string("/ab\\]/") }, + + { nxt_string("/]cd/"), + nxt_string("/\\]cd/") }, + + { nxt_string("']'.match(/]/)"), + nxt_string("]") }, + + { nxt_string("'ab]cd'.match(/]/)"), + nxt_string("]") }, + + { nxt_string("'ab]'.match(/]/)"), + nxt_string("]") }, + + { nxt_string("']cd'.match(/]/)"), + nxt_string("]") }, + + { nxt_string("'ab]cd'.match(/\\]/)"), + nxt_string("]") }, + { nxt_string("'abc'.match(/a*/g)"), nxt_string("a,,,") },