From a5a519f8cc99b8e285d4e6b4a78cc0de4bf3ff52 Mon Sep 17 00:00:00 2001 From: Alexander Borisov Date: Wed, 8 May 2019 17:05:05 +0300 Subject: [PATCH] Fixed processing * quantifier for String.prototype.replace(). This closes #73 issue on GitHub. --- njs/njs_string.c | 132 ++++++++++++++++++++++++++++++++++----- njs/test/njs_unit_test.c | 33 ++++++++++ 2 files changed, 150 insertions(+), 15 deletions(-) diff --git a/njs/njs_string.c b/njs/njs_string.c index 3d4ea80d..2fa93f30 100644 --- a/njs/njs_string.c +++ b/njs/njs_string.c @@ -3061,11 +3061,16 @@ njs_string_replace_regexp(njs_vm_t *vm, njs_value_t *args, njs_string_replace_t *r) { int *captures; + u_char *p, *start; njs_ret_t ret; + const u_char *end; njs_regexp_pattern_t *pattern; - njs_string_replace_part_t *part; + njs_string_replace_part_t replace; pattern = args[1].data.u.regexp->pattern; + end = r->part[0].start + r->part[0].size; + + replace = r->part[1]; do { ret = njs_regexp_match(vm, &pattern->regex[r->type], @@ -3102,25 +3107,50 @@ njs_string_replace_regexp(njs_vm_t *vm, njs_value_t *args, r->part -= 2; } - r->part[2].start = r->part[0].start + captures[1]; - r->part[2].size = r->part[0].size - captures[1]; - njs_set_invalid(&r->part[2].value); + if (captures[1] == 0) { - if (r->function != NULL) { - return njs_string_replace_regexp_function(vm, args, r, - captures, ret); - } + /* Empty match. */ + + start = r->part[0].start; + + if (start < end) { + p = (u_char *) nxt_utf8_next(start, end); + + r->part[1].start = start; + r->part[1].size = p - start; - r->part[0].size = captures[0]; + r->part[2].start = p; + r->part[2].size = end - p; + + } else { + r->part[1].size = 0; + r->part[2].size = 0; + + /* To exit the loop. */ + r->part[2].start = start + 1; + } + + r->part[0] = replace; + + } else { + r->part[2].start = r->part[0].start + captures[1]; + r->part[2].size = r->part[0].size - captures[1]; + njs_set_invalid(&r->part[2].value); + + if (r->function != NULL) { + return njs_string_replace_regexp_function(vm, args, r, + captures, ret); + } + + r->part[0].size = captures[0]; + + r->part[1] = replace; + } if (!pattern->global) { return njs_string_replace_regexp_join(vm, r); } - /* A literal replacement is stored in the second part. */ - part = r->parts.start; - r->part[1] = part[1]; - r->part += 2; } @@ -3131,7 +3161,7 @@ njs_string_replace_regexp(njs_vm_t *vm, njs_value_t *args, return NXT_ERROR; } - } while (r->part[0].size > 0); + } while (r->part[0].start <= end); if (r->part != r->parts.start) { return njs_string_replace_regexp_join(vm, r); @@ -3447,11 +3477,15 @@ static njs_ret_t njs_string_replace_substitute(njs_vm_t *vm, njs_string_replace_t *r, int *captures) { - uint32_t i, n, last; + uint32_t i, n, last, index; + const u_char *end; njs_string_subst_t *s; njs_string_replace_part_t *part, *subject; + index = 0; + last = r->substitutions->items; + end = r->part[0].start + r->part[0].size; part = nxt_array_add_multiple(&r->parts, &njs_array_mem_proto, vm->mem_pool, last + 1); @@ -3462,6 +3496,22 @@ njs_string_replace_substitute(njs_vm_t *vm, njs_string_replace_t *r, r->part = &part[-1]; part[last].start = r->part[0].start + captures[1]; + + if (captures[1] == 0) { + + /* Empty match. */ + + if (r->part[0].start < end) { + captures[1] = nxt_utf8_next(r->part[0].start, end) + - r->part[0].start; + part[last].start = r->part[0].start + captures[1]; + + } else { + /* To exit the loop. */ + part[last].start = r->part[0].start + 1; + } + } + part[last].size = r->part[0].size - captures[1]; njs_set_invalid(&part[last].value); @@ -3498,6 +3548,44 @@ njs_string_replace_substitute(njs_vm_t *vm, njs_string_replace_t *r, * "$&" is the same as "$0", the "$0" however is not supported. */ default: + if (captures[n] == captures[n + 1]) { + + /* Empty match. */ + + if (captures[n - 1] == captures[n]) { + + /* + * Consecutive empty matches as in + * 'ab'.replace(/(z*)(h*)/g, 'x') + */ + + part->size = 0; + break; + } + + index = n; + continue; + } + + if (index != 0) { + + /* + * Inserting a single character after a series of + * (possibly several) empty matches. + */ + + if (part->start < end) { + part->start = r->part[0].start + captures[index]; + part->size = nxt_utf8_next(part->start, end) - part->start; + + } else { + part->size = 0; + } + + index = 0; + break; + } + part->start = r->part[0].start + captures[n]; part->size = captures[n + 1] - captures[n]; break; @@ -3507,6 +3595,20 @@ njs_string_replace_substitute(njs_vm_t *vm, njs_string_replace_t *r, part++; } + if (index != 0) { + part->start = r->part[0].start + captures[index]; + + if (part->start < end) { + part->size = nxt_utf8_next(part->start, end) - part->start; + + } else { + part->size = 0; + } + + njs_set_invalid(&part->value); + part++; + } + r->part = part; return NXT_OK; diff --git a/njs/test/njs_unit_test.c b/njs/test/njs_unit_test.c index 4f02b13e..b5743e02 100644 --- a/njs/test/njs_unit_test.c +++ b/njs/test/njs_unit_test.c @@ -5451,6 +5451,39 @@ static njs_unit_test_t njs_test[] = { nxt_string("('β' + 'α'.repeat(33)+'β').replace(/(α+)(β+)/, function(m, p1) { return p1[32]; })"), nxt_string("βα") }, + { nxt_string("'abc'.replace(/(h*)(z*)(g*)/g, '$1nn$2zz$3')"), + nxt_string("nnzzannzzbnnzzcnnzz") }, + + { nxt_string("'abc'.replace(/(h*)(z*)/g, '$1nn$2zz$3yy')"), + nxt_string("nnzz$3yyannzz$3yybnnzz$3yycnnzz$3yy") }, + + { nxt_string("'ъ'.replace(/(h*)/g, '$1ЮЙ')"), + nxt_string("ЮЙъЮЙ") }, + + { nxt_string("'ъg'.replace(/(h*)/g, '$1ЮЙ')"), + nxt_string("ЮЙъЮЙgЮЙ") }, + + { nxt_string("'ъg'.replace(/(ъ*)/g, '$1ЮЙ')"), + nxt_string("ъЮЙЮЙgЮЙ") }, + + { nxt_string("'ъg'.replace(/(h*)/g, 'fg$1ЮЙ')"), + nxt_string("fgЮЙъfgЮЙgfgЮЙ") }, + + { nxt_string("'юgёfя'.replace(/(gё)/g, 'n$1i')"), + nxt_string("юngёifя") }, + + { nxt_string("'aabbccaa'.replace(/a*/g, '')"), + nxt_string("bbcc") }, + + { nxt_string("'aabbccaab'.replace(/z*/g, '')"), + nxt_string("aabbccaab") }, + + { nxt_string("'αβγ'.replace(/z*/g, '|')"), + nxt_string("|α|β|γ|") }, + + { nxt_string("''.replace(/a*/g, '')"), + nxt_string("") }, + { nxt_string("'abc'.match(/a*/g)"), nxt_string("a,,,") }, -- 2.47.3