From: Dmitry Volyntsev Date: Wed, 8 Mar 2023 04:35:00 +0000 (-0800) Subject: Introduced njs_string_offset() which support any string. X-Git-Tag: 0.7.11~3 X-Git-Url: http://www.kaiwu.me/postgresql/commit/?a=commitdiff_plain;h=ea0a09cd4400ab8073e48cc490c51939019f5305;p=njs.git Introduced njs_string_offset() which support any string. --- diff --git a/src/njs_iterator.c b/src/njs_iterator.c index 7ce0abf2..4845b628 100644 --- a/src/njs_iterator.c +++ b/src/njs_iterator.c @@ -558,7 +558,7 @@ njs_object_iterate_reverse(njs_vm_t *vm, njs_iterator_args_t *args, i = from + 1; if (i > to) { - p = njs_string_offset(string_prop.start, end, from); + p = njs_string_utf8_offset(string_prop.start, end, from); p = njs_utf8_next(p, end); } diff --git a/src/njs_json.c b/src/njs_json.c index e71afdf1..5d0fbffd 100644 --- a/src/njs_json.c +++ b/src/njs_json.c @@ -233,12 +233,7 @@ njs_json_stringify(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, return NJS_ERROR; } - if (length > 10) { - p = njs_string_offset(prop.start, prop.start + prop.size, 10); - - } else { - p = prop.start + prop.size; - } + p = njs_string_offset(&prop, njs_min(length, 10)); stringify->space.start = prop.start; stringify->space.length = p - prop.start; diff --git a/src/njs_parser.c b/src/njs_parser.c index 61f33d11..754aa784 100644 --- a/src/njs_parser.c +++ b/src/njs_parser.c @@ -8589,7 +8589,8 @@ njs_parser_string_create(njs_vm_t *vm, njs_lexer_token_t *token, njs_decode_utf8(&dst, &token->text); if (length > NJS_STRING_MAP_STRIDE && dst.length != length) { - njs_string_offset_map_init(value->long_string.data->start, dst.length); + njs_string_utf8_offset_map_init(value->long_string.data->start, + dst.length); } return NJS_OK; @@ -8833,7 +8834,7 @@ next_char: } if (length > NJS_STRING_MAP_STRIDE && length != size) { - njs_string_offset_map_init(start, size); + njs_string_utf8_offset_map_init(start, size); } return NJS_TOKEN_STRING; diff --git a/src/njs_regexp.c b/src/njs_regexp.c index 4d975517..ec8bd810 100644 --- a/src/njs_regexp.c +++ b/src/njs_regexp.c @@ -891,9 +891,9 @@ njs_regexp_builtin_exec(njs_vm_t *vm, njs_value_t *r, njs_value_t *s, offset = last_index; } else { - /* UTF-8 string. */ - offset = njs_string_offset(string.start, string.start + string.size, - last_index) - string.start; + offset = njs_string_utf8_offset(string.start, + string.start + string.size, last_index) + - string.start; } ret = njs_regexp_match(vm, &pattern->regex[type], string.start, offset, @@ -1360,7 +1360,8 @@ njs_regexp_prototype_symbol_replace(njs_vm_t *vm, njs_value_t *args, if ((size_t) length != s.size) { /* UTF-8 string. */ - pos = njs_string_offset(s.start, s.start + s.size, pos) - s.start; + pos = njs_string_utf8_offset(s.start, s.start + s.size, pos) + - s.start; } pos = njs_max(njs_min(pos, (int64_t) s.size), 0); @@ -1643,8 +1644,8 @@ njs_regexp_prototype_symbol_split(njs_vm_t *vm, njs_value_t *args, } if (utf8 == NJS_STRING_UTF8) { - start = njs_string_offset(s.start, s.start + s.size, p); - end = njs_string_offset(s.start, s.start + s.size, q); + start = njs_string_utf8_offset(s.start, s.start + s.size, p); + end = njs_string_utf8_offset(s.start, s.start + s.size, q); } else { start = &s.start[p]; @@ -1691,7 +1692,8 @@ njs_regexp_prototype_symbol_split(njs_vm_t *vm, njs_value_t *args, end = &s.start[s.size]; if (utf8 == NJS_STRING_UTF8) { - start = (p < length) ? njs_string_offset(s.start, s.start + s.size, p) + start = (p < length) ? njs_string_utf8_offset(s.start, s.start + s.size, + p) : end; } else { diff --git a/src/njs_string.c b/src/njs_string.c index 93dfb0b9..7b125f94 100644 --- a/src/njs_string.c +++ b/src/njs_string.c @@ -1146,7 +1146,7 @@ njs_string_prototype_to_bytes(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, /* UTF-8 string. */ end = string.start + string.size; - s = njs_string_offset(string.start, end, slice.start); + s = njs_string_utf8_offset(string.start, end, slice.start); length = slice.length; @@ -1503,7 +1503,7 @@ njs_string_slice_string_prop(njs_string_prop_t *dst, end = start + string->size; if (slice->start < slice->string_length) { - start = njs_string_offset(start, end, slice->start); + start = njs_string_utf8_offset(start, end, slice->start); /* Evaluate size of the slice in bytes and adjust length. */ p = start; @@ -1584,9 +1584,8 @@ njs_string_prototype_char_code_at(njs_vm_t *vm, njs_value_t *args, } else { njs_utf8_decode_init(&ctx); - /* UTF-8 string. */ end = string.start + string.size; - start = njs_string_offset(string.start, end, index); + start = njs_string_utf8_offset(string.start, end, index); code = njs_utf8_decode(&ctx, &start, end); } @@ -2151,7 +2150,7 @@ njs_string_index_of(njs_string_prop_t *string, njs_string_prop_t *search, } else { /* UTF-8 string. */ - p = njs_string_offset(string->start, end, index); + p = njs_string_utf8_offset(string->start, end, index); end -= search->size - 1; while (p < end) { @@ -2296,7 +2295,7 @@ njs_string_prototype_last_index_of(njs_vm_t *vm, njs_value_t *args, goto done; } - p = njs_string_offset(string.start, end, index); + p = njs_string_utf8_offset(string.start, end, index); for (; p >= string.start; p = njs_utf8_prev(p)) { if ((p + s.size) <= end && memcmp(p, s.start, s.size) == 0) { @@ -2376,15 +2375,7 @@ njs_string_prototype_includes(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, if (length - index >= search_length) { end = string.start + string.size; - - if (string.size == (size_t) length) { - /* Byte or ASCII string. */ - p = string.start + index; - - } else { - /* UTF-8 string. */ - p = njs_string_offset(string.start, end, index); - } + p = njs_string_offset(&string, index); end -= search.size - 1; @@ -2482,15 +2473,7 @@ njs_string_prototype_starts_or_ends_with(njs_vm_t *vm, njs_value_t *args, } end = string.start + string.size; - - if (string.size == (size_t) length) { - /* Byte or ASCII string. */ - p = string.start + index; - - } else { - /* UTF-8 string. */ - p = njs_string_offset(string.start, end, index); - } + p = njs_string_offset(&string, index); if ((size_t) (end - p) >= search.size && memcmp(p, search.start, search.size) == 0) @@ -2512,11 +2495,11 @@ done: /* - * njs_string_offset() assumes that index is correct. + * njs_string_utf8_offset() assumes that index is correct. */ const u_char * -njs_string_offset(const u_char *start, const u_char *end, size_t index) +njs_string_utf8_offset(const u_char *start, const u_char *end, size_t index) { uint32_t *map; njs_uint_t skip; @@ -2525,7 +2508,7 @@ njs_string_offset(const u_char *start, const u_char *end, size_t index) map = njs_string_map_start(end); if (map[0] == 0) { - njs_string_offset_map_init(start, end - start); + njs_string_utf8_offset_map_init(start, end - start); } start += map[index / NJS_STRING_MAP_STRIDE - 1]; @@ -2562,7 +2545,7 @@ njs_string_index(njs_string_prop_t *string, uint32_t offset) map = njs_string_map_start(end); if (map[0] == 0) { - njs_string_offset_map_init(string->start, string->size); + njs_string_utf8_offset_map_init(string->start, string->size); } while (index + NJS_STRING_MAP_STRIDE < string->length @@ -2587,7 +2570,7 @@ njs_string_index(njs_string_prop_t *string, uint32_t offset) void -njs_string_offset_map_init(const u_char *start, size_t size) +njs_string_utf8_offset_map_init(const u_char *start, size_t size) { size_t offset; uint32_t *map; @@ -3055,7 +3038,7 @@ njs_string_prototype_pad(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, if (pad_string.size != (size_t) pad_length) { /* UTF-8 string. */ end = pad_string.start + pad_string.size; - end = njs_string_offset(pad_string.start, end, trunc); + end = njs_string_utf8_offset(pad_string.start, end, trunc); trunc = end - pad_string.start; padding = pad_string.size * n + trunc; @@ -3799,14 +3782,7 @@ njs_string_prototype_replace(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, } } - if (njs_is_byte_or_ascii_string(&string)) { - p = string.start + pos; - - } else { - /* UTF-8 string. */ - p = njs_string_offset(string.start, string.start + string.size, - pos); - } + p = njs_string_offset(&string, pos); (void) njs_string_prop(&ret_string, &retval); @@ -3867,9 +3843,8 @@ njs_string_prototype_replace(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, p = string.start + pos; } else { - /* UTF-8 string. */ - p = njs_string_offset(string.start, string.start + string.size, - pos); + p = njs_string_utf8_offset(string.start, string.start + string.size, + pos); } (void) njs_string_prop(&ret_string, &retval); diff --git a/src/njs_string.h b/src/njs_string.h index e9629385..4df4e581 100644 --- a/src/njs_string.h +++ b/src/njs_string.h @@ -243,10 +243,10 @@ void njs_string_slice_string_prop(njs_string_prop_t *dst, const njs_string_prop_t *string, const njs_slice_prop_t *slice); njs_int_t njs_string_slice(njs_vm_t *vm, njs_value_t *dst, const njs_string_prop_t *string, const njs_slice_prop_t *slice); -const u_char *njs_string_offset(const u_char *start, const u_char *end, +const u_char *njs_string_utf8_offset(const u_char *start, const u_char *end, size_t index); uint32_t njs_string_index(njs_string_prop_t *string, uint32_t offset); -void njs_string_offset_map_init(const u_char *start, size_t size); +void njs_string_utf8_offset_map_init(const u_char *start, size_t size); double njs_string_to_index(const njs_value_t *value); njs_int_t njs_string_encode_uri(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, njs_index_t component); @@ -266,6 +266,20 @@ njs_int_t njs_string_get_substitution(njs_vm_t *vm, njs_value_t *matched, njs_value_t *groups, njs_value_t *replacement, njs_value_t *retval); +njs_inline const u_char * +njs_string_offset(njs_string_prop_t *string, int64_t index) +{ + if (njs_is_byte_or_ascii_string(string)) { + return string->start + index; + } + + /* UTF-8 string. */ + + return njs_string_utf8_offset(string->start, string->start + string->size, + index); +} + + extern const njs_object_init_t njs_string_instance_init; extern const njs_object_type_init_t njs_string_type_init;