From 6feebca8094fc47ae08340bbaf1cbd9c1fd88b2e Mon Sep 17 00:00:00 2001 From: Igor Sysoev Date: Wed, 18 Nov 2015 15:46:02 +0300 Subject: [PATCH] RegExp updates. --- njs/njs_extern.c | 1 - njs/njs_generator.c | 1 - njs/njs_lexer.c | 62 ------- njs/njs_object.c | 1 - njs/njs_parser.c | 29 ++-- njs/njs_parser.h | 1 - njs/njs_parser_expression.c | 1 - njs/njs_regexp.c | 320 +++++++++++++++++++++++++++++------- njs/njs_regexp.h | 18 +- njs/njs_regexp_pattern.h | 34 ++-- njs/njs_shared.c | 2 +- njs/njs_string.c | 50 +++++- njs/njs_variable.c | 1 - njs/njs_vm.h | 6 + njs/njscript.c | 2 + njs/test/njs_unit_test.c | 44 ++++- 16 files changed, 390 insertions(+), 183 deletions(-) diff --git a/njs/njs_extern.c b/njs/njs_extern.c index 5b1fa3b4..ea32fb93 100644 --- a/njs/njs_extern.c +++ b/njs/njs_extern.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/njs/njs_generator.c b/njs/njs_generator.c index b3ad32d9..0dd5b657 100644 --- a/njs/njs_generator.c +++ b/njs/njs_generator.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/njs/njs_lexer.c b/njs/njs_lexer.c index 583a320c..3cf5ed4b 100644 --- a/njs/njs_lexer.c +++ b/njs/njs_lexer.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -648,64 +647,3 @@ njs_lexer_division(njs_lexer_t *lexer, njs_token_t token) return token; } - - -njs_token_t -njs_lexer_regexp(njs_lexer_t *lexer, njs_regexp_flags_t *flags) -{ - u_char *p; - njs_regexp_flags_t _flags, flag; - - for (p = lexer->start; p < lexer->end; p++) { - - if (*p == '\\') { - p++; - continue; - } - - if (*p == '/') { - - lexer->text.data = lexer->start; - lexer->text.len = p - lexer->text.data; - p++; - - _flags = 0; - - while (p < lexer->end) { - switch (*p) { - - case 'i': - flag = NJS_REGEXP_IGNORE_CASE; - break; - - case 'g': - flag = NJS_REGEXP_GLOBAL; - break; - - case 'm': - flag = NJS_REGEXP_MULTILINE; - break; - - default: - goto done; - } - - if (nxt_slow_path((_flags & flag) != 0)) { - return NJS_TOKEN_ILLEGAL; - } - - _flags |= flag; - p++; - } - - done: - - *flags = _flags; - lexer->start = p; - - return NJS_TOKEN_REGEXP_LITERAL; - } - } - - return NJS_TOKEN_ILLEGAL; -} diff --git a/njs/njs_object.c b/njs/njs_object.c index cd424dc1..14ab44c1 100644 --- a/njs/njs_object.c +++ b/njs/njs_object.c @@ -12,7 +12,6 @@ #include #include #include -#include #include diff --git a/njs/njs_parser.c b/njs/njs_parser.c index dca82cbc..89688bbe 100644 --- a/njs/njs_parser.c +++ b/njs/njs_parser.c @@ -1226,14 +1226,12 @@ njs_token_t njs_parser_terminal(njs_vm_t *vm, njs_parser_t *parser, njs_token_t token) { - double num; - nxt_int_t ret; - nxt_uint_t level; - njs_extern_t *ext; - njs_variable_t *var; - njs_parser_node_t *node; - njs_regexp_flags_t flags; - njs_regexp_pattern_t *pattern; + double num; + nxt_int_t ret; + nxt_uint_t level; + njs_extern_t *ext; + njs_variable_t *var; + njs_parser_node_t *node; if (token == NJS_TOKEN_OPEN_PARENTHESIS) { @@ -1346,21 +1344,14 @@ njs_parser_terminal(njs_vm_t *vm, njs_parser_t *parser, return token; case NJS_TOKEN_DIVISION: - token = njs_lexer_regexp(parser->lexer, &flags); - if (nxt_slow_path(token <= NJS_TOKEN_ILLEGAL)) { - return token; + ret = njs_regexp_literal(vm, parser, &node->u.value); + if (nxt_slow_path(ret != NXT_OK)) { + return NJS_TOKEN_ILLEGAL; } - node->token = token; - nxt_thread_log_debug("REGEX: '%V'", &parser->lexer->text); - pattern = njs_regexp_pattern_create(vm, &parser->lexer->text, flags); - if (nxt_slow_path(pattern == NULL)) { - return NJS_TOKEN_ERROR; - } - - node->u.value.data.u.data = pattern; + node->token = NJS_TOKEN_REGEXP_LITERAL; parser->code_size += sizeof(njs_vmcode_regexp_t); break; diff --git a/njs/njs_parser.h b/njs/njs_parser.h index 992f30ae..9c8e38ad 100644 --- a/njs/njs_parser.h +++ b/njs/njs_parser.h @@ -273,7 +273,6 @@ struct njs_parser_s { njs_token_t njs_lexer_token(njs_lexer_t *lexer); -njs_token_t njs_lexer_regexp(njs_lexer_t *lexer, njs_regexp_flags_t *flags); nxt_int_t njs_lexer_keywords_init(nxt_mem_cache_pool_t *mcp, nxt_lvlhsh_t *hash); njs_token_t njs_lexer_keyword(njs_lexer_t *lexer); diff --git a/njs/njs_parser_expression.c b/njs/njs_parser_expression.c index 6d8252c0..6cd5cbb2 100644 --- a/njs/njs_parser_expression.c +++ b/njs/njs_parser_expression.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/njs/njs_regexp.c b/njs/njs_regexp.c index 047a6264..a534c162 100644 --- a/njs/njs_regexp.c +++ b/njs/njs_regexp.c @@ -19,61 +19,202 @@ #include #include #include +#include +#include #include +static njs_regexp_flags_t njs_regexp_flags(u_char **start, u_char *end, + nxt_bool_t bound); static int njs_regexp_pattern_compile(pcre **code, pcre_extra **extra, u_char *source, int options); static njs_ret_t njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp, u_char *string, int *captures, nxt_uint_t utf8); -njs_regexp_t * -njs_regexp_alloc(njs_vm_t *vm, njs_regexp_pattern_t *pattern) +njs_ret_t +njs_regexp_function(njs_vm_t *vm, njs_param_t *param) { - njs_regexp_t *regexp; + size_t length; + njs_regexp_t *regexp; + njs_string_prop_t string; + njs_regexp_flags_t flags; + njs_regexp_pattern_t *pattern; - regexp = nxt_mem_cache_align(vm->mem_cache_pool, sizeof(njs_value_t), - sizeof(njs_regexp_t)); + flags = 0; - if (nxt_fast_path(regexp != NULL)) { - nxt_lvlhsh_init(®exp->object.hash); - nxt_lvlhsh_init(®exp->object.shared_hash); - regexp->object.__proto__ = &vm->prototypes[NJS_PROTOTYPE_REGEXP]; - regexp->last_index = 0; - regexp->pattern = pattern; + switch (param->nargs) { + + default: + length = njs_string_prop(&string, ¶m->args[1]); + + flags = njs_regexp_flags(&string.start, string.start + length, 1); + if (nxt_slow_path(flags < 0)) { + return NXT_ERROR; + } + + /* Fall through. */ + + case 1: + string.length = njs_string_prop(&string, ¶m->args[0]); + break; + + case 0: + string.start = (u_char *) "(?:)"; + string.length = sizeof("(?:)") - 1; + break; } - return regexp; + pattern = njs_regexp_pattern_create(vm, string.start, string.length, flags); + + if (nxt_fast_path(pattern != NULL)) { + + regexp = njs_regexp_alloc(vm, pattern); + + if (nxt_fast_path(regexp != NULL)) { + vm->retval.data.u.regexp = regexp; + vm->retval.type = NJS_REGEXP; + vm->retval.data.truth = 1; + + return NXT_OK; + } + } + + return NXT_ERROR; +} + + +nxt_int_t +njs_regexp_literal(njs_vm_t *vm, njs_parser_t *parser, njs_value_t *value) +{ + u_char *p; + njs_lexer_t *lexer; + njs_regexp_flags_t flags; + njs_regexp_pattern_t *pattern; + + lexer = parser->lexer; + + for (p = lexer->start; p < lexer->end; p++) { + + if (*p == '\\') { + p++; + continue; + } + + if (*p == '/') { + lexer->text.data = lexer->start; + lexer->text.len = p - lexer->text.data; + p++; + + flags = njs_regexp_flags(&p, lexer->end, 0); + + if (nxt_slow_path(flags < 0)) { + return NXT_ERROR; + } + + lexer->start = p; + + pattern = njs_regexp_pattern_create(vm, lexer->text.data, + lexer->text.len, flags); + if (nxt_slow_path(pattern == NULL)) { + return NXT_ERROR; + } + + value->data.u.data = pattern; + + return NXT_OK; + } + } + + return NXT_ERROR; +} + + +static njs_regexp_flags_t +njs_regexp_flags(u_char **start, u_char *end, nxt_bool_t bound) +{ + u_char *p; + njs_regexp_flags_t flags, flag; + + flags = 0; + + for (p = *start; p < end; p++) { + + switch (*p) { + + case 'g': + flag = NJS_REGEXP_GLOBAL; + break; + + case 'i': + flag = NJS_REGEXP_IGNORE_CASE; + break; + + case 'm': + flag = NJS_REGEXP_MULTILINE; + break; + + default: + if (bound) { + return NJS_REGEXP_INVALID_FLAG; + } + + goto done; + } + + if (nxt_slow_path((flags & flag) != 0)) { + return NJS_REGEXP_INVALID_FLAG; + } + + flags |= flag; + } + +done: + + *start = p; + + return flags; } njs_regexp_pattern_t * -njs_regexp_pattern_create(njs_vm_t *vm, nxt_str_t *source, +njs_regexp_pattern_create(njs_vm_t *vm, u_char *start, size_t length, njs_regexp_flags_t flags) { int options, ret; - u_char *p; + u_char *p, *end; + size_t size; njs_regexp_pattern_t *pattern; - /* TODO: pcre_malloc */ + size = 1; /* A trailing "/". */ + size += ((flags & NJS_REGEXP_GLOBAL) != 0); + size += ((flags & NJS_REGEXP_IGNORE_CASE) != 0); + size += ((flags & NJS_REGEXP_MULTILINE) != 0); pattern = nxt_mem_cache_alloc(vm->mem_cache_pool, - sizeof(njs_regexp_pattern_t) + source->len + 1); + sizeof(njs_regexp_pattern_t) + + 1 + length + size + 1); if (nxt_slow_path(pattern == NULL)) { return NULL; } p = (u_char *) pattern + sizeof(njs_regexp_pattern_t); pattern->source = p; + pattern->next = NULL; + pattern->flags = size; - p = memcpy(p, source->data, source->len); - p += source->len; - *p = '\0'; + *p++ = '/'; + p = memcpy(p, start, length); + p += length; + end = p; + *p++ = '\0'; pattern->ncaptures = 0; pattern->global = ((flags & NJS_REGEXP_GLOBAL) != 0); + if (pattern->global) { + *p++ = 'g'; + } #ifdef PCRE_JAVASCRIPT_COMPAT /* JavaScript compatibility has been introduced in PCRE-7.7. */ @@ -82,18 +223,22 @@ njs_regexp_pattern_create(njs_vm_t *vm, nxt_str_t *source, options = 0; #endif - if ((flags & NJS_REGEXP_IGNORE_CASE) != 0) { - pattern->ignore_case = 1; + pattern->ignore_case = ((flags & NJS_REGEXP_IGNORE_CASE) != 0); + if (pattern->ignore_case) { + *p++ = 'i'; options |= PCRE_CASELESS; } - if ((flags & NJS_REGEXP_MULTILINE) != 0) { - pattern->multiline = 1; + pattern->multiline = ((flags & NJS_REGEXP_MULTILINE) != 0); + if (pattern->multiline) { + *p++ = 'm'; options |= PCRE_MULTILINE; } + *p++ = '\0'; + ret = njs_regexp_pattern_compile(&pattern->code[0], &pattern->extra[0], - pattern->source, options); + &pattern->source[1], options); if (nxt_slow_path(ret < 0)) { return NULL; @@ -102,26 +247,30 @@ njs_regexp_pattern_create(njs_vm_t *vm, nxt_str_t *source, pattern->ncaptures = ret; ret = njs_regexp_pattern_compile(&pattern->code[1], &pattern->extra[1], - pattern->source, options | PCRE_UTF8); + &pattern->source[1], options | PCRE_UTF8); - if (nxt_slow_path(ret < 0)) { + if (nxt_fast_path(ret >= 0)) { + + if (nxt_slow_path((unsigned) ret != pattern->ncaptures)) { + nxt_thread_log_error(NXT_LOG_ERR, "numbers of captures in byte " + "and UTF-8 versions of RegExp \"%s\" vary: %d vs %d", + &pattern->source[1], pattern->ncaptures, ret); - if (ret == NXT_DECLINED) { - return pattern; + njs_regexp_pattern_free(pattern); + return NULL; } + } else if (ret != NXT_DECLINED) { + njs_regexp_pattern_free(pattern); return NULL; } - if (nxt_fast_path((unsigned) ret == pattern->ncaptures)) { - return pattern; - } + *end = '/'; - nxt_thread_log_error(NXT_LOG_ERR, "numbers of byte and UTF-8 captures " - "in RegExp \"%s\" vary: %d vs %d", - pattern->source, pattern->ncaptures, ret); + pattern->next = vm->pattern; + vm->pattern = pattern; - return NULL; + return pattern; } @@ -179,10 +328,23 @@ njs_regexp_pattern_compile(pcre **code, pcre_extra **extra, u_char *source, } -njs_ret_t -njs_regexp_function(njs_vm_t *vm, njs_param_t *param) +njs_regexp_t * +njs_regexp_alloc(njs_vm_t *vm, njs_regexp_pattern_t *pattern) { - return NXT_ERROR; + njs_regexp_t *regexp; + + regexp = nxt_mem_cache_align(vm->mem_cache_pool, sizeof(njs_value_t), + sizeof(njs_regexp_t)); + + if (nxt_fast_path(regexp != NULL)) { + nxt_lvlhsh_init(®exp->object.hash); + nxt_lvlhsh_init(®exp->object.shared_hash); + regexp->object.__proto__ = &vm->prototypes[NJS_PROTOTYPE_REGEXP]; + regexp->last_index = 0; + regexp->pattern = pattern; + } + + return regexp; } @@ -207,63 +369,76 @@ njs_regexp_prototype_last_index(njs_vm_t *vm, njs_value_t *value) static njs_ret_t -njs_regexp_prototype_ignore_case(njs_vm_t *vm, njs_value_t *regexp) +njs_regexp_prototype_global(njs_vm_t *vm, njs_value_t *value) { njs_regexp_pattern_t *pattern; - pattern = regexp->data.u.regexp->pattern; - vm->retval = pattern->ignore_case ? njs_value_true : njs_value_false; - njs_release(vm, regexp); + pattern = value->data.u.regexp->pattern; + vm->retval = pattern->global ? njs_value_true : njs_value_false; + njs_release(vm, value); return NXT_OK; } static njs_ret_t -njs_regexp_prototype_global(njs_vm_t *vm, njs_value_t *regexp) +njs_regexp_prototype_ignore_case(njs_vm_t *vm, njs_value_t *value) { njs_regexp_pattern_t *pattern; - pattern = regexp->data.u.regexp->pattern; - vm->retval = pattern->global ? njs_value_true : njs_value_false; - njs_release(vm, regexp); + pattern = value->data.u.regexp->pattern; + vm->retval = pattern->ignore_case ? njs_value_true : njs_value_false; + njs_release(vm, value); return NXT_OK; } static njs_ret_t -njs_regexp_prototype_multiline(njs_vm_t *vm, njs_value_t *regexp) +njs_regexp_prototype_multiline(njs_vm_t *vm, njs_value_t *value) { njs_regexp_pattern_t *pattern; - pattern = regexp->data.u.regexp->pattern; + pattern = value->data.u.regexp->pattern; vm->retval = pattern->multiline ? njs_value_true : njs_value_false; - njs_release(vm, regexp); + njs_release(vm, value); return NXT_OK; } static njs_ret_t -njs_regexp_prototype_source(njs_vm_t *vm, njs_value_t *regexp) +njs_regexp_prototype_source(njs_vm_t *vm, njs_value_t *value) { - size_t length; u_char *source; + size_t length, size; njs_regexp_pattern_t *pattern; - pattern = regexp->data.u.regexp->pattern; + pattern = value->data.u.regexp->pattern; + /* Skip starting "/". */ + source = pattern->source + 1; + + size = strlen((char *) source) - pattern->flags; + length = nxt_utf8_length(source, size); + + return njs_string_create(vm, &vm->retval, source, size, length); +} + + +static njs_ret_t +njs_regexp_prototype_to_string(njs_vm_t *vm, njs_param_t *param) +{ + u_char *source; + size_t length, size; + njs_regexp_pattern_t *pattern; - /* - * The pattern source is stored not as value but as C string even - * without length, because retrieving it is very seldom operation. - */ + pattern = param->object->data.u.regexp->pattern; source = pattern->source; - /* TODO: can regexp string be UTF-8? */ - length = strlen((char *) source); + size = strlen((char *) source); + length = nxt_utf8_length(source, size); - return njs_string_create(vm, &vm->retval, source, length, length); + return njs_string_create(vm, &vm->retval, source, size, length); } @@ -514,14 +689,14 @@ static const njs_object_prop_t njs_regexp_prototype_properties[] = njs_string("lastIndex"), NJS_NATIVE_GETTER, 0, 0, 0, }, - { njs_getter(njs_regexp_prototype_ignore_case), - njs_string("ignoreCase"), - NJS_NATIVE_GETTER, 0, 0, 0, }, - { njs_getter(njs_regexp_prototype_global), njs_string("global"), NJS_NATIVE_GETTER, 0, 0, 0, }, + { njs_getter(njs_regexp_prototype_ignore_case), + njs_string("ignoreCase"), + NJS_NATIVE_GETTER, 0, 0, 0, }, + { njs_getter(njs_regexp_prototype_multiline), njs_string("multiline"), NJS_NATIVE_GETTER, 0, 0, 0, }, @@ -530,6 +705,10 @@ static const njs_object_prop_t njs_regexp_prototype_properties[] = njs_string("source"), NJS_NATIVE_GETTER, 0, 0, 0, }, + { njs_native_function(njs_regexp_prototype_to_string, 0), + njs_string("toString"), + NJS_METHOD, 0, 0, 0, }, + { njs_native_function(njs_regexp_prototype_test, 0), njs_string("test"), NJS_METHOD, 0, 0, 0, }, @@ -546,3 +725,18 @@ njs_regexp_prototype_hash(njs_vm_t *vm, nxt_lvlhsh_t *hash) return njs_object_hash_create(vm, hash, njs_regexp_prototype_properties, nxt_nitems(njs_regexp_prototype_properties)); } + + +void +njs_regexp_pattern_free(njs_regexp_pattern_t *pattern) +{ + while (pattern != NULL) { + pcre_free_study(pattern->extra[0]); + pcre_free(pattern->code[0]); + + pcre_free_study(pattern->extra[1]); + pcre_free(pattern->code[1]); + + pattern = pattern->next; + } +} diff --git a/njs/njs_regexp.h b/njs/njs_regexp.h index 8ca7134a..62361337 100644 --- a/njs/njs_regexp.h +++ b/njs/njs_regexp.h @@ -9,9 +9,10 @@ typedef enum { - NJS_REGEXP_IGNORE_CASE = 1, - NJS_REGEXP_GLOBAL = 2, - NJS_REGEXP_MULTILINE = 4, + NJS_REGEXP_INVALID_FLAG = -1, + NJS_REGEXP_GLOBAL = 1, + NJS_REGEXP_IGNORE_CASE = 2, + NJS_REGEXP_MULTILINE = 4, } njs_regexp_flags_t; @@ -24,20 +25,23 @@ struct njs_regexp_s { njs_regexp_pattern_t *pattern; /* - * This string value can be not aligned since + * This string value can be unaligned since * it never used in nJSVM operations. */ njs_value_t string; }; -njs_regexp_t *njs_regexp_alloc(njs_vm_t *vm, njs_regexp_pattern_t *pattern); -njs_regexp_pattern_t *njs_regexp_pattern_create(njs_vm_t *vm, - nxt_str_t *source, njs_regexp_flags_t flags); njs_ret_t njs_regexp_function(njs_vm_t *vm, njs_param_t *param); +nxt_int_t njs_regexp_literal(njs_vm_t *vm, njs_parser_t *parser, + njs_value_t *value); +njs_regexp_pattern_t *njs_regexp_pattern_create(njs_vm_t *vm, + u_char *string, size_t length, njs_regexp_flags_t flags); +njs_regexp_t *njs_regexp_alloc(njs_vm_t *vm, njs_regexp_pattern_t *pattern); njs_ret_t njs_regexp_prototype_exec(njs_vm_t *vm, njs_param_t *param); nxt_int_t njs_regexp_function_hash(njs_vm_t *vm, nxt_lvlhsh_t *hash); nxt_int_t njs_regexp_prototype_hash(njs_vm_t *vm, nxt_lvlhsh_t *hash); +void njs_regexp_pattern_free(njs_regexp_pattern_t *pattern); #endif /* _NJS_REGEXP_H_INCLUDED_ */ diff --git a/njs/njs_regexp_pattern.h b/njs/njs_regexp_pattern.h index b9b438bb..ee4f47e8 100644 --- a/njs/njs_regexp_pattern.h +++ b/njs/njs_regexp_pattern.h @@ -11,20 +11,32 @@ struct njs_regexp_pattern_s { - pcre *code[2]; - pcre_extra *extra[2]; - u_char *source; + pcre *code[2]; + pcre_extra *extra[2]; + /* + * A pattern source is used by RegExp.toString() method and + * RegExp.source property. So it is is stored in form "/pattern/flags" + * and as zero-terminated C string but not as value, because retrieving + * it is very seldom operation. To get just a pattern string for + * RegExp.source property a length of flags part "/flags" is stored + * in flags field. + */ + u_char *source; + njs_regexp_pattern_t *next; #if (NXT_64BIT) - uint32_t ncaptures; - uint8_t global; /* 1 bit */ - uint8_t ignore_case; /* 1 bit */ - uint8_t multiline; /* 1 bit */ + uint32_t ncaptures; + uint8_t flags; /* 2 bits */ + + uint8_t global; /* 1 bit */ + uint8_t ignore_case; /* 1 bit */ + uint8_t multiline; /* 1 bit */ #else - uint16_t ncaptures; - uint8_t global; /* 1 bit */ - uint8_t ignore_case:1; - uint8_t multiline:1; + uint16_t ncaptures; + uint8_t flags; /* 2 bits */ + uint8_t global:1; + uint8_t ignore_case:1; + uint8_t multiline:1; #endif }; diff --git a/njs/njs_shared.c b/njs/njs_shared.c index 416e4371..38dcdaa9 100644 --- a/njs/njs_shared.c +++ b/njs/njs_shared.c @@ -70,7 +70,7 @@ njs_shared_objects_create(njs_vm_t *vm) njs_number_function, njs_string_ctor_function, njs_stub_function, - njs_stub_function, + njs_regexp_function, njs_stub_function, }; diff --git a/njs/njs_string.c b/njs/njs_string.c index 8a5cfc6c..dd77e23e 100644 --- a/njs/njs_string.c +++ b/njs/njs_string.c @@ -1191,6 +1191,7 @@ njs_string_prototype_search(njs_vm_t *vm, njs_param_t *param) int ret; nxt_int_t index; nxt_uint_t n; + njs_value_t *args; njs_string_prop_t string; njs_regexp_pattern_t *pattern; int captures[3]; @@ -1200,14 +1201,45 @@ njs_string_prototype_search(njs_vm_t *vm, njs_param_t *param) index = 0; if (param->nargs != 0) { - /* - * TODO: convert args[0] to RegExp: - * RegExp > RegExp - * String > RegExp - * undefined > // - * otherwise > String > RegExp - */ - pattern = param->args[0].data.u.regexp->pattern; + + args = param->args; + + switch (args[0].type) { + + case NJS_REGEXP: + pattern = args[0].data.u.regexp->pattern; + break; + + case NJS_STRING: + (void) njs_string_prop(&string, &args[0]); + + pattern = njs_regexp_pattern_create(vm, string.start, + string.length, 0); + if (nxt_slow_path(pattern == NULL)) { + return NXT_ERROR; + } + + break; + + case NJS_VOID: + /* STUB: precompiled "/(?:)/" pattern. */ + string.start = (u_char *) "(?:)"; + string.length = sizeof("(?:)") - 1; + + pattern = njs_regexp_pattern_create(vm, string.start, + string.length, 0); + if (nxt_slow_path(pattern == NULL)) { + return NXT_ERROR; + } + + break; + + default: + /* STUB: convert args[0] to String, then to RegExp. */ + vm->exception = &njs_exception_type_error; + + return NXT_ERROR; + } index = -1; @@ -1290,8 +1322,10 @@ njs_string_prototype_match(njs_vm_t *vm, njs_param_t *param) if (n != 0) { utf8 = 2; + } else if (string.length != 0) { utf8 = 1; + } else { utf8 = 1; } diff --git a/njs/njs_variable.c b/njs/njs_variable.c index 62e51f02..bc984478 100644 --- a/njs/njs_variable.c +++ b/njs/njs_variable.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/njs/njs_vm.h b/njs/njs_vm.h index 1a5c8e38..dc54fc32 100644 --- a/njs/njs_vm.h +++ b/njs/njs_vm.h @@ -315,6 +315,11 @@ njs_is_function(value) \ ((value)->type == NJS_FUNCTION) +#define \ +njs_is_regexp(value) \ + ((value)->type == NJS_REGEXP) + + #define \ njs_is_native(value) \ ((value)->type == NJS_NATIVE) @@ -688,6 +693,7 @@ struct njs_vm_s { njs_vm_shared_t *shared; njs_parser_t *parser; + njs_regexp_pattern_t *pattern; }; diff --git a/njs/njscript.c b/njs/njscript.c index 0f3c6236..ee5d9fff 100644 --- a/njs/njscript.c +++ b/njs/njscript.c @@ -142,6 +142,8 @@ njs_vm_create(nxt_mem_cache_pool_t *mcp, njs_vm_shared_t **shared, void njs_vm_destroy(njs_vm_t *vm) { + njs_regexp_pattern_free(vm->pattern); + nxt_mem_cache_pool_destroy(vm->mem_cache_pool); } diff --git a/njs/test/njs_unit_test.c b/njs/test/njs_unit_test.c index 9a3fb2e6..f4319254 100644 --- a/njs/test/njs_unit_test.c +++ b/njs/test/njs_unit_test.c @@ -2114,9 +2114,18 @@ static njs_unit_test_t njs_test[] = { nxt_string("'abcdefgh'.search()"), nxt_string("0") }, + { nxt_string("'abcdefgh'.search('')"), + nxt_string("0") }, + + { nxt_string("'abcdefgh'.search(undefined)"), + nxt_string("0") }, + { nxt_string("'abcdefgh'.search(/def/)"), nxt_string("3") }, + { nxt_string("'abcdefgh'.search('def')"), + nxt_string("3") }, + { nxt_string("''.match(/^$/) +''"), nxt_string("") }, @@ -2357,6 +2366,8 @@ static njs_unit_test_t njs_test[] = "b = a(); b(2)"), nxt_string("3") }, + /* RegExp. */ + { nxt_string("/^$/.test('')"), nxt_string("true") }, @@ -2387,12 +2398,15 @@ static njs_unit_test_t njs_test[] = { nxt_string("var a = /^$/.exec(''); a.length +' '+ a"), nxt_string("1 ") }, - { nxt_string("var r = /бв/ig; var a = r.exec('АБВ'); r.lastIndex +' '+ a"), - nxt_string("3 БВ") }, + { nxt_string("var r = /бв/ig;" + "var a = r.exec('АБВ');" + "r.lastIndex +' '+ a +' '+ " + "r.source +' '+ r.source.length +' '+ r"), + nxt_string("3 БВ бв 2 /бв/gi") }, { nxt_string("var r = /\\x80/g; r.exec('\\u0081\\u0080'.toBytes());" - "r.lastIndex"), - nxt_string("1") }, + "r.lastIndex +' '+ r.source +' '+ r.source.length +' '+ r"), + nxt_string("1 \\x80 4 /\\x80/g") }, /* * It seems that "/стоп/ig" fails on early PCRE versions. @@ -2401,8 +2415,8 @@ static njs_unit_test_t njs_test[] = { nxt_string("var r = /Стоп/ig;" "var a = r.exec('АБВДЕЁЖЗИКЛМНОПРСТУФХЦЧШЩЬЫЪЭЮЯСТОП');" - "r.lastIndex +' '+ a"), - nxt_string("35 СТОП") }, + "r.lastIndex +' '+ a +' '+ r.source +' '+ r"), + nxt_string("35 СТОП Стоп /Стоп/gi") }, { nxt_string("var r = /quick\\s(brown).+?(jumps)/ig;" "var a = r.exec('The Quick Brown Fox Jumps Over The Lazy Dog')" @@ -2417,6 +2431,24 @@ static njs_unit_test_t njs_test[] = { nxt_string("var r = /LS/i.exec(false); r[0]"), nxt_string("ls") }, + { nxt_string("var r = /./; r"), + nxt_string("/./") }, + + { nxt_string("var r = new RegExp(); r"), + nxt_string("/(?:)/") }, + + { nxt_string("var r = new RegExp('.'); r"), + nxt_string("/./") }, + + { nxt_string("var r = new RegExp('.', 'ig'); r"), + nxt_string("/./gi") }, + + { nxt_string("var r = new RegExp('abc'); r.test('00abc11')"), + nxt_string("true") }, + + { nxt_string("var r = new RegExp('abc', 'i'); r.test('00ABC11')"), + nxt_string("true") }, + /* Non-standard ECMA-262 features. */ /* 0x10400 is not a surrogate pair of 0xD801 and 0xDC00. */ -- 2.47.3