From: Dmitry Volyntsev Date: Wed, 18 Oct 2023 00:51:39 +0000 (-0700) Subject: Fixed RegExp.prototype.exec() with global regexp and unicode input. X-Git-Tag: 0.8.2~4 X-Git-Url: http://www.kaiwu.me/postgresql/commit/static/gitweb.js?a=commitdiff_plain;h=ea2adb73cdd60ff51b79b3f129c561aa5377df9c;p=njs.git Fixed RegExp.prototype.exec() with global regexp and unicode input. Previously, when exactly 32 characters unicode string was provided and the "lastIndex" value of "this" regexp was equal to 32 too, the njs_string_utf8_offset() was called with invalid index argument (longer than a size of the string). As a result njs_string_utf8_offset() returned garbage values. This was manifested in the following ways: 1) InternalError: pcre2_match() failed: bad offset value 2) Very slow replace calls with global regexps, for example in expressions like: str.replace(//g). This fixes #677 on Github. --- diff --git a/src/njs_regexp.c b/src/njs_regexp.c index 970041d6..e61bf54c 100644 --- a/src/njs_regexp.c +++ b/src/njs_regexp.c @@ -936,9 +936,14 @@ njs_regexp_builtin_exec(njs_vm_t *vm, njs_value_t *r, njs_value_t *s, offset = last_index; } else { - offset = njs_string_utf8_offset(string.start, - string.start + string.size, last_index) - - string.start; + if ((size_t) last_index < string.length) { + offset = njs_string_utf8_offset(string.start, + string.start + string.size, + last_index) + - string.start; + } else { + offset = string.size; + } } ret = njs_regexp_match(vm, &pattern->regex[type], string.start, offset, diff --git a/src/test/njs_unit_test.c b/src/test/njs_unit_test.c index fbe9043e..bec1f53f 100644 --- a/src/test/njs_unit_test.c +++ b/src/test/njs_unit_test.c @@ -9261,6 +9261,12 @@ static njs_unit_test_t njs_test[] = { njs_str("'abc'.replaceAll(/^/g, '|$&|')"), njs_str("||abc") }, + { njs_str("('α'.repeat(30) + 'aa').replace(/a/g, '#')"), + njs_str("αααααααααααααααααααααααααααααα##") }, + + { njs_str("('α'.repeat(30) + 'aa').replaceAll(/a/g, '#')"), + njs_str("αααααααααααααααααααααααααααααα##") }, + { njs_str("var uri ='/u/v1/Aa/bB?type=m3u8&mt=42';" "uri.replace(/^\\/u\\/v1\\/[^/]*\\/([^\?]*)\\?.*(mt=[^&]*).*$/, '$1|$2')"), njs_str("bB|mt=42") },