From: Alexander Borisov Date: Mon, 22 Apr 2019 13:23:50 +0000 (+0300) Subject: Fixed parsing surrogate pair presents as UTF-16 escape sequences. X-Git-Tag: 0.3.2~52 X-Git-Url: http://www.kaiwu.me/postgresql/commit/?a=commitdiff_plain;h=578976674bd94f8717f53c6b5722b1e4f298505d;p=njs.git Fixed parsing surrogate pair presents as UTF-16 escape sequences. This closes #96 issue on GitHub. --- diff --git a/njs/njs_parser_terminal.c b/njs/njs_parser_terminal.c index ad3ef97b..77ade529 100644 --- a/njs/njs_parser_terminal.c +++ b/njs/njs_parser_terminal.c @@ -926,7 +926,7 @@ njs_parser_escape_string_create(njs_vm_t *vm, njs_parser_t *parser, { u_char c, *start, *dst; size_t size, length, hex_length; - uint64_t cp; + uint64_t cp, cp_pair; njs_ret_t ret; nxt_str_t *string; const u_char *src, *end, *hex_end; @@ -942,6 +942,7 @@ njs_parser_escape_string_create(njs_vm_t *vm, njs_parser_t *parser, } dst = start; + cp_pair = 0; string = njs_parser_text(parser); src = string->start; @@ -1041,6 +1042,23 @@ njs_parser_escape_string_create(njs_vm_t *vm, njs_parser_t *parser, hex: cp = njs_number_hex_parse(&src, hex_end); + /* Skip '}' character. */ + + if (hex_length == 0) { + src++; + } + + /* Surrogate pair. */ + + if (cp_pair != 0) { + cp = 0x10000 + ((cp_pair - 0xd800) << 10) + (cp - 0xdc00); + cp_pair = 0; + + } else if (cp >= 0xd800 && cp <= 0xdfff) { + cp_pair = cp; + continue; + } + dst = nxt_utf8_encode(dst, (uint32_t) cp); if (nxt_slow_path(dst == NULL)) { njs_parser_syntax_error(vm, parser, @@ -1049,12 +1067,6 @@ njs_parser_escape_string_create(njs_vm_t *vm, njs_parser_t *parser, return NJS_TOKEN_ILLEGAL; } - - /* Skip '}' character */ - - if (hex_length == 0) { - src++; - } } if (length > NJS_STRING_MAP_STRIDE && length != size) { @@ -1070,12 +1082,13 @@ njs_parser_escape_string_calc_length(njs_vm_t *vm, njs_parser_t *parser, size_t *out_size, size_t *out_length) { size_t size, length, hex_length; - uint64_t cp; + uint64_t cp, cp_pair; nxt_str_t *string; const u_char *ptr, *src, *end, *hex_end; size = 0; length = 0; + cp_pair = 0; string = njs_parser_text(parser); src = string->start; @@ -1171,6 +1184,25 @@ njs_parser_escape_string_calc_length(njs_vm_t *vm, njs_parser_t *parser, } } + /* Surrogate pair. */ + + if (cp_pair != 0) { + if (nxt_slow_path(cp < 0xdc00 || cp > 0xdfff)) { + goto invalid_pair; + } + + cp = 0x10000 + ((cp_pair - 0xd800) << 10) + (cp - 0xdc00); + cp_pair = 0; + + } else if (cp >= 0xd800 && cp <= 0xdfff) { + if (nxt_slow_path(cp > 0xdbff || src[0] != '\\' || src[1] != 'u')) { + goto invalid_pair; + } + + cp_pair = cp; + continue; + } + size += nxt_utf8_size(cp); length++; } @@ -1186,4 +1218,11 @@ invalid: njs_parser_text(parser)); return NJS_ERROR; + +invalid_pair: + + njs_parser_syntax_error(vm, parser, "Invalid surrogate pair \"%V\"", + njs_parser_text(parser)); + + return NJS_ERROR; } diff --git a/njs/test/njs_unit_test.c b/njs/test/njs_unit_test.c index 4c502688..4832b254 100644 --- a/njs/test/njs_unit_test.c +++ b/njs/test/njs_unit_test.c @@ -4387,6 +4387,23 @@ static njs_unit_test_t njs_test[] = { nxt_string("'привет\\n\\u{61}\\u{3B1}\\u{20AC}'.length"), nxt_string("10") }, + { nxt_string("'\\ud83d\\udc4d'"), + nxt_string("\xf0\x9f\x91\x8d") }, + + { nxt_string("'\\ud83d\\udc4d'.length"), + nxt_string("1") }, + + { nxt_string("'\\ud83d abc \\udc4d'"), + nxt_string("SyntaxError: Invalid surrogate pair " + "\"\\ud83d abc \\udc4d\" in 1") }, + + { nxt_string("'\\ud83d'"), + nxt_string("SyntaxError: Invalid surrogate pair \"\\ud83d\" in 1") }, + + { nxt_string("'\\ud83d\\uabcd'"), + nxt_string("SyntaxError: Invalid surrogate pair " + "\"\\ud83d\\uabcd\" in 1") }, + { nxt_string("''.hasOwnProperty('length')"), nxt_string("true") },