From: Igor Sysoev Date: Thu, 27 Oct 2016 07:56:54 +0000 (+0300) Subject: String.prototype.lastIndexOf() method fixes and optimizations. X-Git-Tag: 0.1.5~32 X-Git-Url: http://www.kaiwu.me/postgresql/commit/?a=commitdiff_plain;h=934abf2ee5252eaa6a1e041450292c2bc0e81061;p=njs.git String.prototype.lastIndexOf() method fixes and optimizations. In collaboration with Valentin Bartenev. --- diff --git a/njs/njs_string.c b/njs/njs_string.c index 7a11ff64..236f7d62 100644 --- a/njs/njs_string.c +++ b/njs/njs_string.c @@ -83,8 +83,6 @@ static nxt_noinline void njs_string_slice_args(njs_slice_prop_t *slice, njs_value_t *args, nxt_uint_t nargs); static njs_ret_t njs_string_from_char_code(njs_vm_t *vm, njs_value_t *args, nxt_uint_t nargs, njs_index_t unused); -static nxt_noinline ssize_t njs_string_index_of(njs_vm_t *vm, - njs_value_t *src, njs_value_t *search_string, size_t index); static njs_ret_t njs_string_match_multiple(njs_vm_t *vm, njs_value_t *args, njs_regexp_pattern_t *pattern); static njs_ret_t njs_string_split_part_add(njs_vm_t *vm, njs_array_t *array, @@ -1288,91 +1286,86 @@ static njs_ret_t njs_string_prototype_last_index_of(njs_vm_t *vm, njs_value_t *args, nxt_uint_t nargs, njs_index_t unused) { - ssize_t ret, index, last; - - index = -1; + ssize_t index, start, length, search_length; + const u_char *p, *end; + njs_string_prop_t string, search; if (nargs > 1) { - last = NJS_STRING_MAX_LENGTH; - - if (nargs > 2) { - last = args[2].data.u.number; + length = njs_string_prop(&string, &args[0]); + search_length = njs_string_prop(&search, &args[1]); - if (last < 0) { - last = 0; - } + if (length < search_length) { + goto small; } - ret = 0; + index = NJS_STRING_MAX_LENGTH; - for ( ;; ) { - ret = njs_string_index_of(vm, &args[0], &args[1], ret); + if (nargs > 2) { + index = args[2].data.u.number; - if (ret < 0 || ret >= last) { - break; + if (index < 0) { + index = 0; } - - index = ret++; } - } - - njs_number_set(&vm->retval, index); - return NXT_OK; -} - - -static nxt_noinline ssize_t -njs_string_index_of(njs_vm_t *vm, njs_value_t *src, njs_value_t *search_string, - size_t index) -{ - size_t length; - const u_char *p, *end; - njs_string_prop_t string, search; + if (index > length) { + index = length; + } - (void) njs_string_prop(&search, search_string); + if (string.size == (size_t) length) { + /* Byte or ASCII string. */ - length = njs_string_prop(&string, src); + start = length - search.size; - if (index < length) { + if (index > start) { + index = start; + } - if (string.size == length) { - /* Byte or ASCII string. */ p = string.start + index; - end = (string.start + string.size) - (search.size - 1); - while (p < end) { + do { if (memcmp(p, search.start, search.size) == 0) { - return index; + goto done; } - index++; - p++; - } + p--; + index--; + + } while (index >= 0); } else { /* UTF-8 string. */ - end = string.start + string.size; + end = string.start + string.size; p = njs_string_offset(string.start, end, index); + end -= search.size; - end -= search.size - 1; + while (p > end) { + index--; + p = nxt_utf8_prev(p); + } - while (p < end) { + do { if (memcmp(p, search.start, search.size) == 0) { - return index; + goto done; } - index++; - p = nxt_utf8_next(p, end); - } - } + p = nxt_utf8_prev(p); + index--; - } else if (search.size == 0) { - return length; + } while (index >= 0); + } } - return -1; +small: + + index = -1; + +done: + + njs_number_set(&vm->retval, index); + + return NXT_OK; } diff --git a/njs/test/njs_unit_test.c b/njs/test/njs_unit_test.c index dc8edb92..a2ddbad1 100644 --- a/njs/test/njs_unit_test.c +++ b/njs/test/njs_unit_test.c @@ -3224,6 +3224,9 @@ static njs_unit_test_t njs_test[] = { nxt_string("''.indexOf.call(12345, 45, '0')"), nxt_string("3") }, + { nxt_string("'abc'.lastIndexOf('abcdef')"), + nxt_string("-1") }, + { nxt_string("'abc abc abc abc'.lastIndexOf('abc')"), nxt_string("12") }, @@ -3231,7 +3234,43 @@ static njs_unit_test_t njs_test[] = nxt_string("8") }, { nxt_string("'abc abc abc abc'.lastIndexOf('abc', 0)"), - nxt_string("-1") }, + nxt_string("0") }, + + { nxt_string("'abc abc abc abc'.lastIndexOf('', 0)"), + nxt_string("0") }, + + { nxt_string("'abc abc abc abc'.lastIndexOf('', 5)"), + nxt_string("5") }, + + { nxt_string("'abc abc абвгд abc'.lastIndexOf('абвгд')"), + nxt_string("8") }, + + { nxt_string("'abc abc абвгдежз'.lastIndexOf('абвгд')"), + nxt_string("8") }, + + { nxt_string("'abc abc абвгдежз'.lastIndexOf('абвгд', 11)"), + nxt_string("8") }, + + { nxt_string("'abc abc абвгдежз'.lastIndexOf('абвгд', 12)"), + nxt_string("8") }, + + { nxt_string("'abc abc абвгдежз'.lastIndexOf('абвгд', 13)"), + nxt_string("8") }, + + { nxt_string("'abc abc абвгдежз'.lastIndexOf('абвгд', 14)"), + nxt_string("8") }, + + { nxt_string("'abc abc абвгдежз'.lastIndexOf('абвгд', 15)"), + nxt_string("8") }, + + { nxt_string("'abc abc абвгдежз'.lastIndexOf('')"), + nxt_string("16") }, + + { nxt_string("'abc abc абвгдежз'.lastIndexOf('', 12)"), + nxt_string("12") }, + + { nxt_string("''.lastIndexOf('')"), + nxt_string("0") }, { nxt_string("'ABC'.toLowerCase()"), nxt_string("abc") }, diff --git a/nxt/nxt_utf8.h b/nxt/nxt_utf8.h index 2b1ced7b..4a744ed5 100644 --- a/nxt/nxt_utf8.h +++ b/nxt/nxt_utf8.h @@ -29,7 +29,12 @@ NXT_EXPORT ssize_t nxt_utf8_length(const u_char *p, size_t len); NXT_EXPORT nxt_bool_t nxt_utf8_is_valid(const u_char *p, size_t len); -/* nxt_utf8_next() expects a valid UTF-8 string. */ +/* + * nxt_utf8_next() and nxt_utf8_prev() expect a valid UTF-8 string. + * + * The leading UTF-8 byte is either 0xxxxxxx or 11xxxxxx. + * The continuation UTF-8 bytes are 10xxxxxx. + */ nxt_inline const u_char * nxt_utf8_next(const u_char *p, const u_char *end) @@ -41,10 +46,6 @@ nxt_utf8_next(const u_char *p, const u_char *end) if ((c & 0x80) != 0) { do { - /* - * The first UTF-8 byte is either 0xxxxxxx or 11xxxxxx. - * The next UTF-8 bytes are 10xxxxxx. - */ c = *p; if ((c & 0xC0) != 0x80) { @@ -60,6 +61,22 @@ nxt_utf8_next(const u_char *p, const u_char *end) } +nxt_inline const u_char * +nxt_utf8_prev(const u_char *p) +{ + u_char c; + + do { + p--; + c = *p; + + } while ((c & 0xC0) == 0x80); + + return p; +} + + + #define nxt_utf8_size(u) \ ((u < 0x80) ? 1 : ((u < 0x0800) ? 2 : ((u < 0x10000) ? 3 : 4)))