From a87b06149477d980cda7c8b5786e23cf3de0ffda Mon Sep 17 00:00:00 2001 From: Valentin Bartenev Date: Thu, 10 Nov 2016 18:54:28 +0300 Subject: [PATCH] On-demand initialization of UTF-8 strings offset map. --- njs/njs_string.c | 70 +++++++++++++++++++----------------------------- njs/njs_string.h | 6 ++--- njs/njs_vm.c | 4 --- 3 files changed, 31 insertions(+), 49 deletions(-) diff --git a/njs/njs_string.c b/njs/njs_string.c index 439cbede..4c6361fb 100644 --- a/njs/njs_string.c +++ b/njs/njs_string.c @@ -178,11 +178,6 @@ njs_string_new(njs_vm_t *vm, njs_value_t *value, const u_char *start, if (nxt_fast_path(p != NULL)) { memcpy(p, start, size); - - if (size != length && length >= NJS_STRING_MAP_STRIDE) { - njs_string_offset_map_init(p, size); - } - return NXT_OK; } @@ -194,7 +189,7 @@ nxt_noinline u_char * njs_string_alloc(njs_vm_t *vm, njs_value_t *value, uint32_t size, uint32_t length) { - uint32_t total; + uint32_t total, map_offset, *map; njs_string_t *string; value->type = NJS_STRING; @@ -217,9 +212,11 @@ njs_string_alloc(njs_vm_t *vm, njs_value_t *value, uint32_t size, value->data.string_size = size; if (size != length && length > NJS_STRING_MAP_STRIDE) { - total = njs_string_map_offset(size) + njs_string_map_size(length); + map_offset = njs_string_map_offset(size); + total = map_offset + njs_string_map_size(length); } else { + map_offset = 0; total = size; } @@ -233,6 +230,11 @@ njs_string_alloc(njs_vm_t *vm, njs_value_t *value, uint32_t size, string->length = length; string->retain = 1; + if (map_offset != 0) { + map = (uint32_t *) (string->start + map_offset); + map[0] = 0; + } + return string->start; } @@ -251,15 +253,16 @@ njs_string_copy(njs_value_t *dst, njs_value_t *src) /* * njs_string_validate() validates an UTF-8 string, evaluates its length, - * sets njs_string_prop_t struct, and initializes offset map if it is required. + * sets njs_string_prop_t struct. */ nxt_noinline njs_ret_t njs_string_validate(njs_vm_t *vm, njs_string_prop_t *string, njs_value_t *value) { - u_char *start; - size_t new_size; - ssize_t size, length; + u_char *start; + size_t new_size, map_offset; + ssize_t size, length; + uint32_t *map; size = value->short_string.size; @@ -297,8 +300,8 @@ njs_string_validate(njs_vm_t *vm, njs_string_prop_t *string, njs_value_t *value) * Reallocate the long string with offset map * after the string. */ - new_size = njs_string_map_offset(size) - + njs_string_map_size(length); + map_offset = njs_string_map_offset(size); + new_size = map_offset + njs_string_map_size(length); start = nxt_mem_cache_alloc(vm->mem_cache_pool, new_size); if (nxt_slow_path(start == NULL)) { @@ -309,7 +312,8 @@ njs_string_validate(njs_vm_t *vm, njs_string_prop_t *string, njs_value_t *value) string->start = start; value->data.u.string->start = start; - njs_string_offset_map_init(start, size); + map = (uint32_t *) (start + map_offset); + map[0] = 0; } } @@ -649,10 +653,6 @@ njs_string_prototype_concat(njs_vm_t *vm, njs_value_t *args, nxt_uint_t nargs, p += string.size; } - if (length >= NJS_STRING_MAP_STRIDE && size != length) { - njs_string_offset_map_init(start, size); - } - return NXT_OK; } @@ -766,10 +766,6 @@ njs_string_prototype_from_bytes(njs_vm_t *vm, njs_value_t *args, for (p = string.start; p < end; p++) { s = nxt_utf8_encode(s, *p); } - - if (slice.length >= NJS_STRING_MAP_STRIDE || size != slice.length) { - njs_string_offset_map_init(start, size); - } } return NXT_OK; @@ -1518,8 +1514,7 @@ done: /* - * njs_string_offset() assumes that index is correct - * and the optional offset map has been initialized. + * njs_string_offset() assumes that index is correct. */ nxt_noinline const u_char * @@ -1531,6 +1526,10 @@ njs_string_offset(const u_char *start, const u_char *end, size_t index) if (index >= NJS_STRING_MAP_STRIDE) { map = njs_string_map_start(end); + if (map[0] == 0) { + njs_string_offset_map_init(start, end - start); + } + start += map[index / NJS_STRING_MAP_STRIDE - 1]; } @@ -1543,8 +1542,7 @@ njs_string_offset(const u_char *start, const u_char *end, size_t index) /* - * njs_string_index() assumes that offset is correct - * and the optional offset map has been initialized. + * njs_string_index() assumes that offset is correct. */ nxt_noinline uint32_t @@ -1565,6 +1563,10 @@ njs_string_index(njs_string_prop_t *string, uint32_t offset) end = string->start + string->size; map = njs_string_map_start(end); + if (map[0] == 0) { + njs_string_offset_map_init(string->start, string->size); + } + while (index + NJS_STRING_MAP_STRIDE < string->length && *map <= offset) { @@ -1628,10 +1630,6 @@ njs_string_prototype_to_lower_case(njs_vm_t *vm, njs_value_t *args, p = nxt_utf8_encode(p, nxt_utf8_lower_case(&s, end)); size--; } - - if (string.length >= NJS_STRING_MAP_STRIDE) { - njs_string_offset_map_init(start, string.size); - } } return NXT_OK; @@ -1680,10 +1678,6 @@ njs_string_prototype_to_upper_case(njs_vm_t *vm, njs_value_t *args, p = nxt_utf8_encode(p, nxt_utf8_upper_case(&s, end)); size--; } - - if (string.length >= NJS_STRING_MAP_STRIDE) { - njs_string_offset_map_init(start, string.size); - } } return NXT_OK; @@ -1865,10 +1859,6 @@ njs_string_prototype_repeat(njs_vm_t *vm, njs_value_t *args, nxt_uint_t nargs, n--; } - if (length >= NJS_STRING_MAP_STRIDE && size != length) { - njs_string_offset_map_init(start, size); - } - return NXT_OK; } @@ -2882,10 +2872,6 @@ njs_string_replace_join(njs_vm_t *vm, njs_string_replace_t *r) /* GC: release valid values. */ } - if (length >= NJS_STRING_MAP_STRIDE && size != length) { - njs_string_offset_map_init(string, size); - } - nxt_array_destroy(&r->parts, &njs_array_mem_proto, vm->mem_cache_pool); return NXT_OK; diff --git a/njs/njs_string.h b/njs/njs_string.h index 35e6bf0f..597c752e 100644 --- a/njs/njs_string.h +++ b/njs/njs_string.h @@ -53,9 +53,9 @@ * To speed up this search a map of offsets is stored after the UTF-8 string. * The map is aligned to uint32_t and contains byte positions of each * NJS_STRING_MAP_STRIDE UTF-8 character except zero position. The map - * can be allocated and updated on demand. If a string come outside - * JavaScript as byte sequnece just to be concatenated or to be used in - * regular expressions the offset map is not required. + * can be initialized on demand. If a string come outside JavaScript as + * byte sequnece just to be concatenated or to be used in regular expressions + * the offset map is not required. * * The map is not allocated: * 1) if the length is zero hence it is a byte string; diff --git a/njs/njs_vm.c b/njs/njs_vm.c index 8e67a2b3..cff396b7 100644 --- a/njs/njs_vm.c +++ b/njs/njs_vm.c @@ -1556,10 +1556,6 @@ njs_vmcode_addition(njs_vm_t *vm, njs_value_t *val1, njs_value_t *val2) (void) memcpy(start, string1.start, string1.size); (void) memcpy(start + string1.size, string2.start, string2.size); - if (length >= NJS_STRING_MAP_STRIDE && size != length) { - njs_string_offset_map_init(start, size); - } - return sizeof(njs_vmcode_3addr_t); } -- 2.47.3