From ea2adb73cdd60ff51b79b3f129c561aa5377df9c Mon Sep 17 00:00:00 2001 From: Dmitry Volyntsev Date: Tue, 17 Oct 2023 17:51:39 -0700 Subject: [PATCH] Fixed RegExp.prototype.exec() with global regexp and unicode input. Previously, when exactly 32 characters unicode string was provided and the "lastIndex" value of "this" regexp was equal to 32 too, the njs_string_utf8_offset() was called with invalid index argument (longer than a size of the string). As a result njs_string_utf8_offset() returned garbage values. This was manifested in the following ways: 1) InternalError: pcre2_match() failed: bad offset value 2) Very slow replace calls with global regexps, for example in expressions like: str.replace(//g). This fixes #677 on Github. --- src/njs_regexp.c | 11 ++++++++--- src/test/njs_unit_test.c | 6 ++++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/njs_regexp.c b/src/njs_regexp.c index 970041d6..e61bf54c 100644 --- a/src/njs_regexp.c +++ b/src/njs_regexp.c @@ -936,9 +936,14 @@ njs_regexp_builtin_exec(njs_vm_t *vm, njs_value_t *r, njs_value_t *s, offset = last_index; } else { - offset = njs_string_utf8_offset(string.start, - string.start + string.size, last_index) - - string.start; + if ((size_t) last_index < string.length) { + offset = njs_string_utf8_offset(string.start, + string.start + string.size, + last_index) + - string.start; + } else { + offset = string.size; + } } ret = njs_regexp_match(vm, &pattern->regex[type], string.start, offset, diff --git a/src/test/njs_unit_test.c b/src/test/njs_unit_test.c index fbe9043e..bec1f53f 100644 --- a/src/test/njs_unit_test.c +++ b/src/test/njs_unit_test.c @@ -9261,6 +9261,12 @@ static njs_unit_test_t njs_test[] = { njs_str("'abc'.replaceAll(/^/g, '|$&|')"), njs_str("||abc") }, + { njs_str("('α'.repeat(30) + 'aa').replace(/a/g, '#')"), + njs_str("αααααααααααααααααααααααααααααα##") }, + + { njs_str("('α'.repeat(30) + 'aa').replaceAll(/a/g, '#')"), + njs_str("αααααααααααααααααααααααααααααα##") }, + { njs_str("var uri ='/u/v1/Aa/bB?type=m3u8&mt=42';" "uri.replace(/^\\/u\\/v1\\/[^/]*\\/([^\?]*)\\?.*(mt=[^&]*).*$/, '$1|$2')"), njs_str("bB|mt=42") }, -- 2.47.3