From: Igor Sysoev Date: Wed, 18 Nov 2015 12:45:52 +0000 (+0300) Subject: Changes in byte string operations. X-Git-Tag: 0.1.0~121 X-Git-Url: http://www.kaiwu.me/postgresql/commit/?a=commitdiff_plain;h=efbefbfe033c9a03f80a8c2b0658a9972472f92d;p=njs.git Changes in byte string operations. String.bytes property is changed to String.toBytes() method. The method serializes an Unicode string to a byte string. It returns null if a character larger than 255 is encountered in the Unicode string. String.fromBytes() method converts a byte string to an Unicode string. String.utf8 property is converted to String.fromUTF8() method. The method converts an UTF-8 encoded byte string to an Unicode string. String.toUTF8() serializes Unicode string to an UTF-8 encoded byte string. --- diff --git a/njs/njs_string.c b/njs/njs_string.c index 5f41fb74..8a5cfc6c 100644 --- a/njs/njs_string.c +++ b/njs/njs_string.c @@ -24,6 +24,10 @@ #include +static nxt_noinline void njs_string_slice_prop(njs_param_t *param, + njs_string_prop_t *string, njs_slice_prop_t *slice); +static nxt_noinline void njs_string_slice_params(njs_param_t *param, + njs_slice_prop_t *slice); static nxt_noinline ssize_t njs_string_index_of(njs_vm_t *vm, njs_value_t *src, njs_value_t *search_string, size_t index); @@ -341,80 +345,6 @@ njs_string_prototype_length(njs_vm_t *vm, njs_value_t *value) } -static njs_ret_t -njs_string_prototype_bytes(njs_vm_t *vm, njs_value_t *value) -{ - u_char *p; - uintptr_t size; - const u_char *s, *end; - njs_string_prop_t string; - - size = njs_string_prop(&string, value); - - p = njs_string_alloc(vm, &vm->retval, size, 0); - - if (nxt_fast_path(p != NULL)) { - - if (string.length == 0) { - memcpy(p, string.start, size); - - } else { - s = string.start; - end = s + string.size; - - while (s < end) { - *p++ = (u_char) nxt_utf8_decode(&s, end); - } - } - - njs_release(vm, value); - return NXT_OK; - } - - return NXT_ERROR; -} - - -static njs_ret_t -njs_string_prototype_utf8(njs_vm_t *vm, njs_value_t *value) -{ - u_char *p; - ssize_t length; - njs_string_prop_t string; - - (void) njs_string_prop(&string, value); - - length = nxt_utf8_length(string.start, string.size); - - if (length < 0) { - vm->retval = njs_value_null; - njs_release(vm, value); - return NXT_OK; - } - - if ((size_t) length == string.size) { - return njs_string_create(vm, &vm->retval, string.start, length, length); - } - - /* length != string.size */ - - p = njs_string_alloc(vm, &vm->retval, string.size, length); - - if (nxt_fast_path(p != NULL)) { - memcpy(p, string.start, string.size); - - if (length >= NJS_STRING_MAP_OFFSET) { - njs_string_offset_map_init(p, string.size); - } - - njs_release(vm, value); - return NXT_OK; - } - - return NXT_ERROR; -} - - nxt_noinline void njs_string_offset_map_init(const u_char *start, size_t size) { @@ -591,66 +521,208 @@ njs_string_prototype_concat(njs_vm_t *vm, njs_param_t *param) static njs_ret_t -njs_string_prototype_slice(njs_vm_t *vm, njs_param_t *param) +njs_string_prototype_from_utf8(njs_vm_t *vm, njs_param_t *param) { - ssize_t start, end; - uintptr_t nargs; - njs_ret_t length, string_length; - njs_value_t *args; + u_char *p; + ssize_t length; + njs_slice_prop_t slice; njs_string_prop_t string; - string_length = njs_string_prop(&string, param->object); + njs_string_slice_prop(param, &string, &slice); - length = string_length; - start = 0; - nargs = param->nargs; + if (string.length != 0) { + /* ASCII or UTF8 string. */ + return njs_string_slice(vm, &vm->retval, &string, &slice); + } - if (nargs != 0) { - args = param->args; + string.start += slice.start; - start = njs_value_to_number(&args[0]); + length = nxt_utf8_length(string.start, slice.length); - if (start < 0) { - start += length; + if (length >= 0) { - if (start < 0) { - start = 0; - } + if (length < NJS_STRING_MAP_OFFSET || (size_t) length == slice.length) { + /* ASCII or short UTF-8 string. */ + return njs_string_create(vm, &vm->retval, string.start, + slice.length, length); } - if (nargs > 1) { - end = njs_value_to_number(&args[1]); + /* Long UTF-8 string. */ - if (end < 0) { - end += length; + p = njs_string_alloc(vm, &vm->retval, slice.length, length); + + if (nxt_fast_path(p != NULL)) { + memcpy(p, string.start, slice.length); + njs_string_offset_map_init(p, slice.length); + + return NXT_OK; + } + + return NXT_ERROR; + } + + vm->retval = njs_value_null; + + return NXT_OK; +} + + +static njs_ret_t +njs_string_prototype_to_utf8(njs_vm_t *vm, njs_param_t *param) +{ + njs_slice_prop_t slice; + njs_string_prop_t string; + + (void) njs_string_prop(&string, param->object); + + string.length = 0; + slice.string_length = string.size; + + njs_string_slice_params(param, &slice); + + return njs_string_slice(vm, &vm->retval, &string, &slice); +} + + +static njs_ret_t +njs_string_prototype_from_bytes(njs_vm_t *vm, njs_param_t *param) +{ + u_char *p, *s, *start, *end; + size_t size; + njs_slice_prop_t slice; + njs_string_prop_t string; + + njs_string_slice_prop(param, &string, &slice); + + if (string.length != 0) { + /* ASCII or UTF8 string. */ + return njs_string_slice(vm, &vm->retval, &string, &slice); + } + + size = 0; + string.start += slice.start; + end = string.start + slice.length; + + for (p = string.start; p < end; p++) { + size += (*p < 0x80) ? 1 : 2; + } + + start = njs_string_alloc(vm, &vm->retval, size, slice.length); + + if (nxt_fast_path(start != NULL)) { + + if (size == slice.length) { + memcpy(start, string.start, size); + + } else { + s = start; + end = string.start + slice.length; + + for (p = string.start; p < end; p++) { + s = nxt_utf8_encode(s, *p); } - length = end - start; + if (slice.length >= NJS_STRING_MAP_OFFSET || size != slice.length) { + njs_string_offset_map_init(start, size); + } + } - if (length < 0) { - start = 0; - length = 0; + return NXT_OK; + } + + return NXT_ERROR; +} + + +static njs_ret_t +njs_string_prototype_to_bytes(njs_vm_t *vm, njs_param_t *param) +{ + u_char *p; + size_t length; + uint32_t byte; + const u_char *s, *end; + njs_slice_prop_t slice; + njs_string_prop_t string; + + njs_string_slice_prop(param, &string, &slice); + + if (string.length == 0) { + /* Byte string. */ + return njs_string_slice(vm, &vm->retval, &string, &slice); + } + + p = njs_string_alloc(vm, &vm->retval, slice.length, 0); + + if (nxt_fast_path(p != NULL)) { + + if (string.length != 0) { + /* UTF-8 string. */ + end = string.start + string.size; + + s = njs_string_offset(string.start, end, slice.start); + + length = slice.length; + + while (length != 0 && s < end) { + byte = nxt_utf8_decode(&s, end); + + if (nxt_slow_path(byte > 0xFF)) { + njs_release(vm, &vm->retval); + vm->retval = njs_value_null; + + return NXT_OK; + } + + *p++ = (u_char) byte; + length--; } + + } else { + /* ASCII string. */ + memcpy(p, string.start + slice.start, slice.length); } + + return NXT_OK; } - return njs_string_slice(vm, &vm->retval, &string, string_length, - start, length); + return NXT_ERROR; +} + + +/* + * String.slice(start[, end]). + * JavaScript 1.2, ECMAScript 3. + */ + +static nxt_noinline njs_ret_t +njs_string_prototype_slice(njs_vm_t *vm, njs_param_t *param) +{ + njs_slice_prop_t slice; + njs_string_prop_t string; + + njs_string_slice_prop(param, &string, &slice); + + return njs_string_slice(vm, &vm->retval, &string, &slice); } +/* + * String.substring(start[, end]). + * JavaScript 1.0, ECMAScript 1. + */ + static njs_ret_t njs_string_prototype_substring(njs_vm_t *vm, njs_param_t *param) { - ssize_t start, end; + ssize_t start, end, length; uintptr_t nargs; - njs_ret_t length, string_length; njs_value_t *args; + njs_slice_prop_t slice; njs_string_prop_t string; - string_length = njs_string_prop(&string, param->object); + length = njs_string_prop(&string, param->object); - length = string_length; + slice.string_length = length; start = 0; nargs = param->nargs; @@ -679,23 +751,30 @@ njs_string_prototype_substring(njs_vm_t *vm, njs_param_t *param) } } - return njs_string_slice(vm, &vm->retval, &string, string_length, - start, length); + slice.start = start; + slice.length = length; + + return njs_string_slice(vm, &vm->retval, &string, &slice); } +/* + * String.substr(start[, length]). + * JavaScript 1.0, ECMAScript 3. + */ + static njs_ret_t njs_string_prototype_substr(njs_vm_t *vm, njs_param_t *param) { - ssize_t start; + ssize_t start, length; uintptr_t nargs; - njs_ret_t length, string_length; njs_value_t *args; + njs_slice_prop_t slice; njs_string_prop_t string; - string_length = njs_string_prop(&string, param->object); + length = njs_string_prop(&string, param->object); - length = string_length; + slice.string_length = length; start = 0; nargs = param->nargs; @@ -717,19 +796,21 @@ njs_string_prototype_substr(njs_vm_t *vm, njs_param_t *param) } } - return njs_string_slice(vm, &vm->retval, &string, string_length, - start, length); + slice.start = start; + slice.length = length; + + return njs_string_slice(vm, &vm->retval, &string, &slice); } static njs_ret_t njs_string_prototype_char_at(njs_vm_t *vm, njs_param_t *param) { - ssize_t start; - njs_ret_t length, string_length; + ssize_t start, length; + njs_slice_prop_t slice; njs_string_prop_t string; - string_length = njs_string_prop(&string, param->object); + slice.string_length = njs_string_prop(&string, param->object); start = 0; length = 1; @@ -742,29 +823,89 @@ njs_string_prototype_char_at(njs_vm_t *vm, njs_param_t *param) } } - return njs_string_slice(vm, &vm->retval, &string, string_length, - start, length); + slice.start = start; + slice.length = length; + + return njs_string_slice(vm, &vm->retval, &string, &slice); +} + + +static nxt_noinline void +njs_string_slice_prop(njs_param_t *param, njs_string_prop_t *string, + njs_slice_prop_t *slice) +{ + slice->string_length = njs_string_prop(string, param->object); + + njs_string_slice_params(param, slice); +} + + +static nxt_noinline void +njs_string_slice_params(njs_param_t *param, njs_slice_prop_t *slice) +{ + ssize_t start, end, length; + uintptr_t nargs; + njs_value_t *args; + + length = slice->string_length; + start = 0; + nargs = param->nargs; + + if (nargs != 0) { + args = param->args; + + start = njs_value_to_number(&args[0]); + + if (start < 0) { + start += length; + + if (start < 0) { + start = 0; + } + } + + end = length; + + if (nargs > 1) { + end = njs_value_to_number(&args[1]); + + if (end < 0) { + end += length; + } + } + + length = end - start; + + if (length < 0) { + start = 0; + length = 0; + } + } + + slice->start = start; + slice->length = length; } nxt_noinline njs_ret_t njs_string_slice(njs_vm_t *vm, njs_value_t *dst, - const njs_string_prop_t *string, size_t string_length, size_t index, - size_t length) + const njs_string_prop_t *string, njs_slice_prop_t *slice) { - u_char *slice; - size_t size, n; + u_char *s; + size_t size, n, length; ssize_t excess; const u_char *p, *start, *end; - if (length > 0 && index < string_length) { + length = slice->length; + + if (length > 0 && slice->start < slice->string_length) { start = string->start; end = start + string->size; - if (string->size == string_length) { + if (string->size == slice->string_length) { /* Byte or ASCII string. */ - start += index; + start += slice->start; excess = (start + length) - end; if (excess > 0) { @@ -779,7 +920,7 @@ njs_string_slice(njs_vm_t *vm, njs_value_t *dst, } else { /* UTF-8 string. */ - start = njs_string_offset(start, end, index); + start = njs_string_offset(start, end, slice->start); /* Evaluate size of the slice in bytes and ajdust length. */ p = start; @@ -795,16 +936,16 @@ njs_string_slice(njs_vm_t *vm, njs_value_t *dst, } if (nxt_fast_path(size != 0)) { - slice = njs_string_alloc(vm, &vm->retval, size, length); + s = njs_string_alloc(vm, &vm->retval, size, length); - if (nxt_slow_path(slice == NULL)) { + if (nxt_slow_path(s == NULL)) { return NXT_ERROR; } - memcpy(slice, start, size); + memcpy(s, start, size); if (length >= NJS_STRING_MAP_OFFSET && size != length) { - njs_string_offset_map_init(slice, size); + njs_string_offset_map_init(s, size); } return NXT_OK; @@ -1414,18 +1555,26 @@ static const njs_object_prop_t njs_string_prototype_properties[] = njs_string("length"), NJS_NATIVE_GETTER, 0, 0, 0, }, - { njs_getter(njs_string_prototype_bytes), - njs_string("bytes"), - NJS_NATIVE_GETTER, 0, 0, 0, }, - - { njs_getter(njs_string_prototype_utf8), - njs_string("utf8"), - NJS_NATIVE_GETTER, 0, 0, 0, }, - { njs_native_function(njs_string_prototype_concat, 0), njs_string("concat"), NJS_METHOD, 0, 0, 0, }, + { njs_native_function(njs_string_prototype_from_utf8, 0), + njs_string("fromUTF8"), + NJS_METHOD, 0, 0, 0, }, + + { njs_native_function(njs_string_prototype_to_utf8, 0), + njs_string("toUTF8"), + NJS_METHOD, 0, 0, 0, }, + + { njs_native_function(njs_string_prototype_from_bytes, 0), + njs_string("fromBytes"), + NJS_METHOD, 0, 0, 0, }, + + { njs_native_function(njs_string_prototype_to_bytes, 0), + njs_string("toBytes"), + NJS_METHOD, 0, 0, 0, }, + { njs_native_function(njs_string_prototype_slice, 0), njs_string("slice"), NJS_METHOD, 0, 0, 0, }, diff --git a/njs/njs_string.h b/njs/njs_string.h index e134bdaf..d26b147f 100644 --- a/njs/njs_string.h +++ b/njs/njs_string.h @@ -73,6 +73,13 @@ typedef struct { } njs_string_prop_t; +typedef struct { + size_t start; + size_t length; + size_t string_length; +} njs_slice_prop_t; + + u_char *njs_string_alloc(njs_vm_t *vm, njs_value_t *value, uint32_t size, uint32_t length) NXT_MALLOC_LIKE; @@ -87,8 +94,7 @@ void njs_string_offset_map_init(const u_char *start, size_t size); nxt_bool_t njs_string_eq(const njs_value_t *val1, const njs_value_t *val2); nxt_int_t njs_string_cmp(const njs_value_t *val1, const njs_value_t *val2); njs_ret_t njs_string_slice(njs_vm_t *vm, njs_value_t *dst, - const njs_string_prop_t *string, size_t string_length, size_t index, - size_t length); + const njs_string_prop_t *string, njs_slice_prop_t *slice); const u_char *njs_string_offset(const u_char *start, const u_char *end, size_t index); nxt_noinline uint32_t njs_string_index(njs_string_prop_t *string, diff --git a/njs/njs_vm.c b/njs/njs_vm.c index a9b2f7dc..74628a9c 100644 --- a/njs/njs_vm.c +++ b/njs/njs_vm.c @@ -445,15 +445,15 @@ njs_ret_t njs_vmcode_property_get(njs_vm_t *vm, njs_value_t *object, njs_value_t *property) { - size_t length; double num; int32_t index; uintptr_t data; njs_ret_t ret; njs_value_t *val; njs_extern_t *ext; - njs_object_prop_t *prop; + njs_slice_prop_t slice; njs_string_prop_t string; + njs_object_prop_t *prop; const njs_value_t *retval; njs_property_query_t pq; @@ -547,11 +547,12 @@ njs_vmcode_property_get(njs_vm_t *vm, njs_value_t *object, index = (int32_t) num; if (index >= 0 && index == num) { - length = njs_string_prop(&string, object); + slice.start = index; + slice.length = 1; + slice.string_length = njs_string_prop(&string, object); /* A single codepoint string fits in vm->retval cannot fail. */ - (void) njs_string_slice(vm, &vm->retval, &string, length, - index, 1); + (void) njs_string_slice(vm, &vm->retval, &string, &slice); if (nxt_fast_path(vm->retval.data.truth != 0)) { /* Non-empty string. */ @@ -1889,6 +1890,9 @@ njs_vmcode_strict_not_equal(njs_vm_t *vm, njs_value_t *val1, njs_value_t *val2) static nxt_noinline nxt_bool_t njs_values_strict_equal(njs_value_t *val1, njs_value_t *val2) { + size_t size; + const u_char *start1, *start2; + if (val1->type != val2->type) { return 0; } @@ -1899,7 +1903,36 @@ njs_values_strict_equal(njs_value_t *val1, njs_value_t *val2) } if (njs_is_string(val1)) { - return njs_string_eq(val1, val2); + size = val1->short_string.size; + + if (size != val2->short_string.size) { + return 0; + } + + if (size != NJS_STRING_LONG) { + if (val1->short_string.length != val2->short_string.length) { + return 0; + } + + start1 = val1->short_string.start; + start2 = val2->short_string.start; + + } else { + size = val1->data.string_size; + + if (size != val2->data.string_size) { + return 0; + } + + if (val1->data.u.string->length != val2->data.u.string->length) { + return 0; + } + + start1 = val1->data.u.string->start; + start2 = val2->data.u.string->start; + } + + return (memcmp(start1, start2, size) == 0); } return (val1->data.u.object == val2->data.u.object); diff --git a/njs/test/njs_unit_test.c b/njs/test/njs_unit_test.c index eaeafd82..43723f79 100644 --- a/njs/test/njs_unit_test.c +++ b/njs/test/njs_unit_test.c @@ -1744,15 +1744,39 @@ static njs_unit_test_t njs_test[] = { nxt_string("'abc'.length"), nxt_string("3") }, + { nxt_string("'abc'.toUTF8().length"), + nxt_string("3") }, + + { nxt_string("'абв'.length"), + nxt_string("3") }, + + { nxt_string("'абв'.toUTF8().length"), + nxt_string("6") }, + + { nxt_string("'αβγ'.length"), + nxt_string("3") }, + + { nxt_string("'αβγ'.toUTF8().length"), + nxt_string("6") }, + { nxt_string("'絵文字'.length"), nxt_string("3") }, + { nxt_string("'絵文字'.toUTF8().length"), + nxt_string("9") }, + { nxt_string("'えもじ'.length"), nxt_string("3") }, + { nxt_string("'えもじ'.toUTF8().length"), + nxt_string("9") }, + { nxt_string("'囲碁織'.length"), nxt_string("3") }, + { nxt_string("'囲碁織'.toUTF8().length"), + nxt_string("9") }, + { nxt_string("a = 'abc'; a.length"), nxt_string("3") }, @@ -1768,21 +1792,58 @@ static njs_unit_test_t njs_test[] = { nxt_string("a = 'abc' + 1 + 'абв'; a +' '+ a.length"), nxt_string("abc1абв 7") }, - /* TODO: '\u00C2\u00B6'.bytes */ - - { nxt_string("a = '\xC3\x82\xC2\xB6'.bytes; u = a.utf8;" - "a.length +' '+ a +' '+ u.length +' '+ u"), - nxt_string("2 \xC2\xB6 1 \xC2\xB6") }, - { nxt_string("a = 1; a.length"), nxt_string("undefined") }, { nxt_string("a = 'abc'; a.concat('абв', 123)"), nxt_string("abcабв123") }, - { nxt_string("a = $r.uri; s = a.utf8; s.length +' '+ s"), + { nxt_string("'\\u00CE\\u00B1'.toBytes() == 'α'"), + nxt_string("true") }, + + { nxt_string("'\\u00CE\\u00B1'.toBytes() === 'α'"), + nxt_string("false") }, + + { nxt_string("b = '\\u00C2\\u00B6'.toBytes(); u = b.fromUTF8();" + "b.length +' '+ b +' '+ u.length +' '+ u"), + nxt_string("2 ¶ 1 ¶") }, + + { nxt_string("'α'.toBytes()"), + nxt_string("null") }, + + { nxt_string("'α'.toUTF8()[0]"), + nxt_string("\xCE") }, + + { nxt_string("a = 'a'.toBytes() + 'α'; a + a.length"), + nxt_string("aα3") }, + + { nxt_string("a = 'µ§±®'.toBytes(); a"), + nxt_string("\xB5\xA7\xB1\xAE") }, + + { nxt_string("a = 'µ§±®'.toBytes(2); a"), + nxt_string("\xB1\xAE") }, + + { nxt_string("a = 'µ§±®'.toBytes(1,3); a"), + nxt_string("\xA7\xB1") }, + + { nxt_string("a = '\\xB5\\xA7\\xB1\\xAE'.toBytes(); a.fromBytes()"), + nxt_string("µ§±®") }, + + { nxt_string("a = '\\xB5\\xA7\\xB1\\xAE'.toBytes(); a.fromBytes(2)"), + nxt_string("±®") }, + + { nxt_string("a = '\\xB5\\xA7\\xB1\\xAE'.toBytes(); a.fromBytes(1, 3)"), + nxt_string("§±") }, + + { nxt_string("a = $r.uri; s = a.fromUTF8(); s.length +' '+ s"), nxt_string("3 АБВ") }, + { nxt_string("a = $r.uri; s = a.fromUTF8(2); s.length +' '+ s"), + nxt_string("2 БВ") }, + + { nxt_string("a = $r.uri; s = a.fromUTF8(2, 4); s.length +' '+ s"), + nxt_string("1 Б") }, + { nxt_string("a = $r.uri; a +' '+ a.length +' '+ a"), nxt_string("АБВ 6 АБВ") }, @@ -2225,12 +2286,10 @@ static njs_unit_test_t njs_test[] = { nxt_string("/абв/i.test('АБВ')"), nxt_string("true") }, - /* TODO: '\u00C2\u00B6".bytes */ - - { nxt_string("/\xC2\xB6/.test('\xC3\x82\xC2\xB6'.bytes)"), + { nxt_string("/\\xC2\\xB6/.test('\\u00C2\\u00B6'.toBytes())"), nxt_string("true") }, - { nxt_string("/\\x80/.test('\x80'.bytes)"), + { nxt_string("/\\x80/.test('\\u0080'.toBytes())"), nxt_string("true") }, { nxt_string("var a = /^$/.exec(''); a.length +' '+ a"), @@ -2239,7 +2298,8 @@ static njs_unit_test_t njs_test[] = { nxt_string("var r = /бв/ig; var a = r.exec('АБВ'); r.lastIndex +' '+ a"), nxt_string("3 БВ") }, - { nxt_string("var r = /\\x80/g; r.exec('\x81\x80'.bytes); r.lastIndex"), + { nxt_string("var r = /\\x80/g; r.exec('\\u0081\\u0080'.toBytes());" + "r.lastIndex"), nxt_string("1") }, /* @@ -3050,9 +3110,9 @@ main(int argc, char **argv) "function fibo(n) {" " if (n > 1)" " return fibo(n - 1) + fibo(n - 2)" - " return '\xC3\x8E\xC2\xB1'.bytes" + " return '\\x80'.toBytes()" "}" - "fibo(32).utf8.length"); + "fibo(32).length"); nxt_str_t fibo_utf8 = nxt_string( "function fibo(n) {"