From e63843c4b1cd530e73e0ce780e047284f540b4b4 Mon Sep 17 00:00:00 2001 From: Dmitry Volyntsev Date: Fri, 25 Jun 2021 17:00:12 +0000 Subject: [PATCH] Fixed RegExpBuiltinExec() with UTF-8 only regexps. The original issue was introduced in f9082cd59ba6 (0.4.2) while adding RegExpBuiltinExec(), but after de64420d0f2b (0.6.0) it started to affect RegExp.prototype.test() as it was rewritten according to spec. --- src/njs_regexp.c | 24 ++++++++++++++---------- src/test/njs_unit_test.c | 13 +++++++++++++ 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/src/njs_regexp.c b/src/njs_regexp.c index ec29c06e..d0554595 100644 --- a/src/njs_regexp.c +++ b/src/njs_regexp.c @@ -26,8 +26,7 @@ static u_char *njs_regexp_compile_trace_handler(njs_trace_t *trace, static u_char *njs_regexp_match_trace_handler(njs_trace_t *trace, njs_trace_data_t *td, u_char *start); static njs_array_t *njs_regexp_exec_result(njs_vm_t *vm, njs_value_t *r, - njs_regexp_utf8_t type, njs_string_prop_t *string, - njs_regex_match_data_t *data); + njs_utf8_t utf8, njs_string_prop_t *string, njs_regex_match_data_t *data); static njs_int_t njs_regexp_string_create(njs_vm_t *vm, njs_value_t *value, u_char *start, uint32_t size, int32_t length); @@ -946,6 +945,7 @@ njs_regexp_builtin_exec(njs_vm_t *vm, njs_value_t *r, njs_value_t *s, size_t length, offset; int64_t last_index; njs_int_t ret; + njs_utf8_t utf8; njs_value_t value; njs_array_t *result; njs_regexp_t *regexp; @@ -979,11 +979,15 @@ njs_regexp_builtin_exec(njs_vm_t *vm, njs_value_t *r, njs_value_t *s, goto not_found; } + utf8 = NJS_STRING_BYTE; type = NJS_REGEXP_BYTE; - if (length != string.size) { - /* UTF-8 string. */ + if (string.length != 0) { type = NJS_REGEXP_UTF8; + + if (string.length != string.size) { + utf8 = NJS_STRING_UTF8; + } } pattern = regexp->pattern; @@ -998,7 +1002,7 @@ njs_regexp_builtin_exec(njs_vm_t *vm, njs_value_t *r, njs_value_t *s, return NJS_ERROR; } - if (type != NJS_REGEXP_UTF8) { + if (utf8 != NJS_STRING_UTF8) { offset = last_index; } else { @@ -1010,7 +1014,7 @@ njs_regexp_builtin_exec(njs_vm_t *vm, njs_value_t *r, njs_value_t *s, ret = njs_regexp_match(vm, &pattern->regex[type], string.start, offset, string.size, match_data); if (ret >= 0) { - result = njs_regexp_exec_result(vm, r, type, &string, match_data); + result = njs_regexp_exec_result(vm, r, utf8, &string, match_data); if (njs_slow_path(result == NULL)) { return NJS_ERROR; } @@ -1043,7 +1047,7 @@ not_found: static njs_array_t * -njs_regexp_exec_result(njs_vm_t *vm, njs_value_t *r, njs_regexp_utf8_t type, +njs_regexp_exec_result(njs_vm_t *vm, njs_value_t *r, njs_utf8_t utf8, njs_string_prop_t *string, njs_regex_match_data_t *match_data) { int *captures; @@ -1081,7 +1085,7 @@ njs_regexp_exec_result(njs_vm_t *vm, njs_value_t *r, njs_regexp_utf8_t type, start = &string->start[captures[n]]; size = captures[n + 1] - captures[n]; - if (type == NJS_REGEXP_UTF8) { + if (utf8 == NJS_STRING_UTF8) { length = njs_max(njs_utf8_length(start, size), 0); } else { @@ -1105,7 +1109,7 @@ njs_regexp_exec_result(njs_vm_t *vm, njs_value_t *r, njs_regexp_utf8_t type, goto fail; } - if (type == NJS_REGEXP_UTF8) { + if (utf8 == NJS_STRING_UTF8) { index = njs_string_index(string, captures[0]); } else { @@ -1115,7 +1119,7 @@ njs_regexp_exec_result(njs_vm_t *vm, njs_value_t *r, njs_regexp_utf8_t type, njs_set_number(&prop->value, index); if (pattern->global || pattern->sticky) { - if (type == NJS_REGEXP_UTF8) { + if (utf8 == NJS_STRING_UTF8) { index = njs_string_index(string, captures[1]); } else { diff --git a/src/test/njs_unit_test.c b/src/test/njs_unit_test.c index ec920dee..d9def1f0 100644 --- a/src/test/njs_unit_test.c +++ b/src/test/njs_unit_test.c @@ -10763,6 +10763,12 @@ static njs_unit_test_t njs_test[] = { njs_str("/α/.test('\\u00CE\\u00B1'.toBytes())"), njs_str("true") }, + { njs_str("/[A-Za-z]/.test('S')"), + njs_str("true") }, + + { njs_str("/[A-Za-z]/.test('ø')"), + njs_str("false") }, + { njs_str("var r = /abc/y; r.test('abc'); r.lastIndex"), njs_str("3") }, @@ -21004,6 +21010,13 @@ static njs_unit_test_t njs_regexp_test[] = { njs_str("RegExp('[\0]').test('\0')"), njs_str("true") }, + + { njs_str("/[A-Za-z\\u00F8-\\u02FF]/.test('S')"), + njs_str("true") }, + + { njs_str("/[A-Za-z\\u00F8-\\u02FF]/.test('ø')"), + njs_str("true") }, + }; -- 2.47.3