From: Dmitry Volyntsev Date: Wed, 17 Apr 2019 15:43:13 +0000 (+0300) Subject: Fixed handling of unicode only regexp expressions. X-Git-Url: http://www.kaiwu.me/postgresql/commit/?a=commitdiff_plain;h=c3eef40fe0458dda0f7647f7614f90a91710e093;p=njs.git Fixed handling of unicode only regexp expressions. This fixes #125 issue on Github. --- diff --git a/njs/njs_regexp.c b/njs/njs_regexp.c index 2725daa9..2a9fbe41 100644 --- a/njs/njs_regexp.c +++ b/njs/njs_regexp.c @@ -315,30 +315,43 @@ njs_regexp_pattern_create(njs_vm_t *vm, u_char *start, size_t length, ret = njs_regexp_pattern_compile(vm, &pattern->regex[0], &pattern->source[1], options); - if (nxt_slow_path(ret < 0)) { - return NULL; - } - pattern->ncaptures = ret; + if (nxt_fast_path(ret >= 0)) { + pattern->ncaptures = ret; + + } else if (ret < 0 && ret != NXT_DECLINED) { + goto fail; + } ret = njs_regexp_pattern_compile(vm, &pattern->regex[1], &pattern->source[1], options | PCRE_UTF8); if (nxt_fast_path(ret >= 0)) { - if (nxt_slow_path((u_int) ret != pattern->ncaptures)) { + if (nxt_slow_path(nxt_regex_is_valid(&pattern->regex[0]) + && (u_int) ret != pattern->ncaptures)) + { njs_internal_error(vm, "regexp pattern compile failed"); - nxt_mp_free(vm->mem_pool, pattern); - return NULL; + goto fail; } } else if (ret != NXT_DECLINED) { - nxt_mp_free(vm->mem_pool, pattern); - return NULL; + goto fail; + } + + if (!nxt_regex_is_valid(&pattern->regex[0]) + && !nxt_regex_is_valid(&pattern->regex[1])) + { + goto fail; } *end = '/'; return pattern; + +fail: + + nxt_mp_free(vm->mem_pool, pattern); + return NULL; } diff --git a/njs/test/njs_unit_test.c b/njs/test/njs_unit_test.c index b14998cc..669dfc36 100644 --- a/njs/test/njs_unit_test.c +++ b/njs/test/njs_unit_test.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -4451,6 +4452,12 @@ static njs_unit_test_t njs_test[] = { nxt_string("'α'.toUTF8()[0]"), nxt_string("\xCE") }, + { nxt_string("var r = /^\\x80$/; r.source + r.source.length"), + nxt_string("^\\x80$6") }, + + { nxt_string("var r = /^\\\\x80$/; r.source + r.source.length"), + nxt_string("^\\\\x80$7") }, + { nxt_string("/^\\x80$/.test('\\x80'.toBytes())"), nxt_string("true") }, @@ -11957,6 +11964,25 @@ static njs_unit_test_t njs_tz_test[] = }; +static njs_unit_test_t njs_regexp_test[] = +{ + { nxt_string("/[\\\\u02E0-\\\\u02E4]/"), + nxt_string("/[\\\\u02E0-\\\\u02E4]/") }, + + { nxt_string("/[\\u02E0-\\u02E4]/"), + nxt_string("/[\\u02E0-\\u02E4]/") }, + + { nxt_string("RegExp('[\\\\u02E0-\\\\u02E4]')"), + nxt_string("/[\\u02E0-\\u02E4]/") }, + + { nxt_string("/[\\u0430-\\u044f]+/.test('тест')"), + nxt_string("true") }, + + { nxt_string("RegExp('[\\\\u0430-\\\\u044f]+').test('тест')"), + nxt_string("true") }, +}; + + typedef struct { nxt_lvlhsh_t hash; const njs_extern_t *proto; @@ -12714,6 +12740,85 @@ done: } +static nxt_int_t +njs_timezone_optional_test(nxt_bool_t disassemble, nxt_bool_t verbose) +{ + size_t size; + u_char buf[16]; + time_t clock; + struct tm tm; + nxt_int_t ret; + + /* + * Chatham Islands NZ-CHAT time zone. + * Standard time: UTC+12:45, Daylight Saving time: UTC+13:45. + */ + (void) putenv((char *) "TZ=Pacific/Chatham"); + tzset(); + + clock = 0; + localtime_r(&clock, &tm); + + size = strftime((char *) buf, sizeof(buf), "%z", &tm); + + if (memcmp(buf, "+1245", size) == 0) { + ret = njs_unit_test(njs_tz_test, nxt_nitems(njs_tz_test), disassemble, + verbose); + if (ret != NXT_OK) { + return ret; + } + + nxt_printf("njs timezone tests passed\n"); + + } else { + nxt_printf("njs timezone tests skipped, timezone is unavailable\n"); + } + + return NXT_OK; +} + +static nxt_int_t +njs_regexp_optional_test(nxt_bool_t disassemble, nxt_bool_t verbose) +{ + int erroff; + pcre *re1, *re2; + njs_ret_t ret; + const char *errstr; + + /* + * pcre-8.21 crashes when it compiles unicode escape codes inside + * square brackets when PCRE_UTF8 option is provided. + * Catching it in runtime by compiling it without PCRE_UTF8. Normally it + * should return NULL and "character value in \u.... sequence is too large" + * error string. + */ + re1 = pcre_compile("/[\\u0410]/", PCRE_JAVASCRIPT_COMPAT, &errstr, &erroff, + NULL); + + /* + * pcre-7.8 fails to compile unicode escape codes inside square brackets + * even when PCRE_UTF8 option is provided. + */ + re2 = pcre_compile("/[\\u0410]/", PCRE_JAVASCRIPT_COMPAT | PCRE_UTF8, + &errstr, &erroff, NULL); + + if (re1 == NULL && re2 != NULL) { + ret = njs_unit_test(njs_regexp_test, nxt_nitems(njs_regexp_test), + disassemble, verbose); + if (ret != NXT_OK) { + return ret; + } + + nxt_printf("njs unicode regexp tests passed\n"); + + } else { + nxt_printf("njs unicode regexp tests skipped, libpcre fails\n"); + } + + return NXT_OK; +} + + static nxt_int_t njs_vm_json_test(nxt_bool_t disassemble, nxt_bool_t verbose) { @@ -13025,10 +13130,6 @@ done: int nxt_cdecl main(int argc, char **argv) { - size_t size; - u_char buf[16]; - time_t clock; - struct tm tm; nxt_int_t ret; nxt_bool_t disassemble, verbose; @@ -13059,33 +13160,18 @@ main(int argc, char **argv) return ret; } - nxt_printf("njs unit tests passed\n"); - - /* - * Chatham Islands NZ-CHAT time zone. - * Standard time: UTC+12:45, Daylight Saving time: UTC+13:45. - */ - (void) putenv((char *) "TZ=Pacific/Chatham"); - tzset(); - - clock = 0; - localtime_r(&clock, &tm); - - size = strftime((char *) buf, sizeof(buf), "%z", &tm); - - if (memcmp(buf, "+1245", size) == 0) { - ret = njs_unit_test(njs_tz_test, nxt_nitems(njs_tz_test), disassemble, - verbose); - if (ret != NXT_OK) { - return ret; - } - - nxt_printf("njs timezone tests passed\n"); + ret = njs_timezone_optional_test(disassemble, verbose); + if (ret != NXT_OK) { + return ret; + } - } else { - nxt_printf("njs timezone tests skipped, timezone is unavailable\n"); + ret = njs_regexp_optional_test(disassemble, verbose); + if (ret != NXT_OK) { + return ret; } + nxt_printf("njs unit tests passed\n"); + ret = njs_vm_json_test(disassemble, verbose); if (ret != NXT_OK) { return ret; diff --git a/nxt/nxt_pcre.c b/nxt/nxt_pcre.c index be5f7e1f..503d574b 100644 --- a/nxt/nxt_pcre.c +++ b/nxt/nxt_pcre.c @@ -92,6 +92,8 @@ nxt_regex_compile(nxt_regex_t *regex, u_char *source, size_t len, "pcre_compile(\"%s\") failed: %s", pattern, errstr); } + ret = NXT_DECLINED; + goto done; }