]> git.kaiwu.me - njs.git/commitdiff
RegExp updates.
authorIgor Sysoev <igor@sysoev.ru>
Wed, 18 Nov 2015 12:46:02 +0000 (15:46 +0300)
committerIgor Sysoev <igor@sysoev.ru>
Wed, 18 Nov 2015 12:46:02 +0000 (15:46 +0300)
16 files changed:
njs/njs_extern.c
njs/njs_generator.c
njs/njs_lexer.c
njs/njs_object.c
njs/njs_parser.c
njs/njs_parser.h
njs/njs_parser_expression.c
njs/njs_regexp.c
njs/njs_regexp.h
njs/njs_regexp_pattern.h
njs/njs_shared.c
njs/njs_string.c
njs/njs_variable.c
njs/njs_vm.h
njs/njscript.c
njs/test/njs_unit_test.c

index 5b1fa3b4917167e94105bc41196dfbe143707d70..ea32fb930b3faf69bef94bc95804b84312e6eb5c 100644 (file)
@@ -15,7 +15,6 @@
 #include <nxt_mem_cache_pool.h>
 #include <njscript.h>
 #include <njs_vm.h>
-#include <njs_regexp.h>
 #include <njs_variable.h>
 #include <njs_parser.h>
 #include <string.h>
index b3ad32d9a2094a9a3ef9c1705d6ccf2a606be88a..0dd5b657677df87b8789516ebb6fc1dc74310743 100644 (file)
@@ -12,7 +12,6 @@
 #include <nxt_mem_cache_pool.h>
 #include <njscript.h>
 #include <njs_vm.h>
-#include <njs_regexp.h>
 #include <njs_variable.h>
 #include <njs_parser.h>
 #include <string.h>
index 583a320c757e64dde28a4cea6458d92224dbaa6b..3cf5ed4b1141909e1e7a15b724864d23259dff01 100644 (file)
@@ -13,7 +13,6 @@
 #include <nxt_mem_cache_pool.h>
 #include <njscript.h>
 #include <njs_vm.h>
-#include <njs_regexp.h>
 #include <njs_variable.h>
 #include <njs_parser.h>
 #include <string.h>
@@ -648,64 +647,3 @@ njs_lexer_division(njs_lexer_t *lexer, njs_token_t token)
 
     return token;
 }
-
-
-njs_token_t
-njs_lexer_regexp(njs_lexer_t *lexer, njs_regexp_flags_t *flags)
-{
-    u_char              *p;
-    njs_regexp_flags_t  _flags, flag;
-
-    for (p = lexer->start; p < lexer->end; p++) {
-
-        if (*p == '\\') {
-            p++;
-            continue;
-        }
-
-        if (*p == '/') {
-
-            lexer->text.data = lexer->start;
-            lexer->text.len = p - lexer->text.data;
-            p++;
-
-            _flags = 0;
-
-            while (p < lexer->end) {
-                switch (*p) {
-
-                case 'i':
-                   flag = NJS_REGEXP_IGNORE_CASE;
-                   break;
-
-                case 'g':
-                   flag = NJS_REGEXP_GLOBAL;
-                   break;
-
-                case 'm':
-                   flag = NJS_REGEXP_MULTILINE;
-                   break;
-
-                default:
-                   goto done;
-                }
-
-                if (nxt_slow_path((_flags & flag) != 0)) {
-                    return NJS_TOKEN_ILLEGAL;
-                }
-
-                _flags |= flag;
-                p++;
-            }
-
-        done:
-
-            *flags = _flags;
-            lexer->start = p;
-
-            return NJS_TOKEN_REGEXP_LITERAL;
-        }
-    }
-
-    return NJS_TOKEN_ILLEGAL;
-}
index cd424dc1335b852ae3b72df8be148063ccf5ecf7..14ab44c1f1aeb599b80b62f21b6d58b02a5ce14d 100644 (file)
@@ -12,7 +12,6 @@
 #include <nxt_mem_cache_pool.h>
 #include <njscript.h>
 #include <njs_vm.h>
-#include <njs_regexp.h>
 #include <string.h>
 
 
index dca82cbc6dd3647cb77d82761388c4c7bd359fe0..89688bbe9ad65a7c58b85c06feb1c5ec2f195b15 100644 (file)
@@ -1226,14 +1226,12 @@ njs_token_t
 njs_parser_terminal(njs_vm_t *vm, njs_parser_t *parser,
     njs_token_t token)
 {
-    double                num;
-    nxt_int_t             ret;
-    nxt_uint_t            level;
-    njs_extern_t          *ext;
-    njs_variable_t        *var;
-    njs_parser_node_t     *node;
-    njs_regexp_flags_t    flags;
-    njs_regexp_pattern_t  *pattern;
+    double             num;
+    nxt_int_t          ret;
+    nxt_uint_t         level;
+    njs_extern_t       *ext;
+    njs_variable_t     *var;
+    njs_parser_node_t  *node;
 
     if (token == NJS_TOKEN_OPEN_PARENTHESIS) {
 
@@ -1346,21 +1344,14 @@ njs_parser_terminal(njs_vm_t *vm, njs_parser_t *parser,
         return token;
 
     case NJS_TOKEN_DIVISION:
-        token = njs_lexer_regexp(parser->lexer, &flags);
-        if (nxt_slow_path(token <= NJS_TOKEN_ILLEGAL)) {
-            return token;
+        ret = njs_regexp_literal(vm, parser, &node->u.value);
+        if (nxt_slow_path(ret != NXT_OK)) {
+            return NJS_TOKEN_ILLEGAL;
         }
 
-        node->token = token;
-
         nxt_thread_log_debug("REGEX: '%V'", &parser->lexer->text);
 
-        pattern = njs_regexp_pattern_create(vm, &parser->lexer->text, flags);
-        if (nxt_slow_path(pattern == NULL)) {
-            return NJS_TOKEN_ERROR;
-        }
-
-        node->u.value.data.u.data = pattern;
+        node->token = NJS_TOKEN_REGEXP_LITERAL;
         parser->code_size += sizeof(njs_vmcode_regexp_t);
 
         break;
index 992f30ae6667c07ca747f9a12e8ae021efcea9f3..9c8e38adfc353ec02646144d43946cbbcdeb80fe 100644 (file)
@@ -273,7 +273,6 @@ struct njs_parser_s {
 
 
 njs_token_t njs_lexer_token(njs_lexer_t *lexer);
-njs_token_t njs_lexer_regexp(njs_lexer_t *lexer, njs_regexp_flags_t *flags);
 nxt_int_t njs_lexer_keywords_init(nxt_mem_cache_pool_t *mcp,
     nxt_lvlhsh_t *hash);
 njs_token_t njs_lexer_keyword(njs_lexer_t *lexer);
index 6d8252c0dc412b85cb7534213e9ff661358dc5bf..6cd5cbb28380bb19c119fe681e2e710f42654abc 100644 (file)
@@ -14,7 +14,6 @@
 #include <nxt_mem_cache_pool.h>
 #include <njscript.h>
 #include <njs_vm.h>
-#include <njs_regexp.h>
 #include <njs_variable.h>
 #include <njs_parser.h>
 #include <string.h>
index 047a6264a8a5e0fae883dfc28c46dce4e91f8a4d..a534c162973967030e26a76857919132dcec3fea 100644 (file)
 #include <njs_vm.h>
 #include <njs_regexp.h>
 #include <njs_regexp_pattern.h>
+#include <njs_variable.h>
+#include <njs_parser.h>
 #include <string.h>
 
 
+static njs_regexp_flags_t njs_regexp_flags(u_char **start, u_char *end,
+    nxt_bool_t bound);
 static int njs_regexp_pattern_compile(pcre **code, pcre_extra **extra,
     u_char *source, int options);
 static njs_ret_t njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp,
     u_char *string, int *captures, nxt_uint_t utf8);
 
 
-njs_regexp_t *
-njs_regexp_alloc(njs_vm_t *vm, njs_regexp_pattern_t *pattern)
+njs_ret_t
+njs_regexp_function(njs_vm_t *vm, njs_param_t *param)
 {
-    njs_regexp_t  *regexp;
+    size_t                length;
+    njs_regexp_t          *regexp;
+    njs_string_prop_t     string;
+    njs_regexp_flags_t    flags;
+    njs_regexp_pattern_t  *pattern;
 
-    regexp = nxt_mem_cache_align(vm->mem_cache_pool, sizeof(njs_value_t),
-                                 sizeof(njs_regexp_t));
+    flags = 0;
 
-    if (nxt_fast_path(regexp != NULL)) {
-        nxt_lvlhsh_init(&regexp->object.hash);
-        nxt_lvlhsh_init(&regexp->object.shared_hash);
-        regexp->object.__proto__ = &vm->prototypes[NJS_PROTOTYPE_REGEXP];
-        regexp->last_index = 0;
-        regexp->pattern = pattern;
+    switch (param->nargs) {
+
+    default:
+        length = njs_string_prop(&string, &param->args[1]);
+
+        flags = njs_regexp_flags(&string.start, string.start + length, 1);
+        if (nxt_slow_path(flags < 0)) {
+            return NXT_ERROR;
+        }
+
+        /* Fall through. */
+
+    case 1:
+        string.length = njs_string_prop(&string, &param->args[0]);
+        break;
+
+    case 0:
+        string.start = (u_char *) "(?:)";
+        string.length = sizeof("(?:)") - 1;
+        break;
     }
 
-    return regexp;
+    pattern = njs_regexp_pattern_create(vm, string.start, string.length, flags);
+
+    if (nxt_fast_path(pattern != NULL)) {
+
+        regexp = njs_regexp_alloc(vm, pattern);
+
+        if (nxt_fast_path(regexp != NULL)) {
+            vm->retval.data.u.regexp = regexp;
+            vm->retval.type = NJS_REGEXP;
+            vm->retval.data.truth = 1;
+
+            return NXT_OK;
+        }
+    }
+
+    return NXT_ERROR;
+}
+
+
+nxt_int_t
+njs_regexp_literal(njs_vm_t *vm, njs_parser_t *parser, njs_value_t *value)
+{
+    u_char                *p;
+    njs_lexer_t           *lexer;
+    njs_regexp_flags_t    flags;
+    njs_regexp_pattern_t  *pattern;
+
+    lexer = parser->lexer;
+
+    for (p = lexer->start; p < lexer->end; p++) {
+
+        if (*p == '\\') {
+            p++;
+            continue;
+        }
+
+        if (*p == '/') {
+            lexer->text.data = lexer->start;
+            lexer->text.len = p - lexer->text.data;
+            p++;
+
+            flags = njs_regexp_flags(&p, lexer->end, 0);
+
+            if (nxt_slow_path(flags < 0)) {
+                return NXT_ERROR;
+            }
+
+            lexer->start = p;
+
+            pattern = njs_regexp_pattern_create(vm, lexer->text.data,
+                                                lexer->text.len, flags);
+            if (nxt_slow_path(pattern == NULL)) {
+                return NXT_ERROR;
+            }
+
+            value->data.u.data = pattern;
+
+            return NXT_OK;
+        }
+    }
+
+    return NXT_ERROR;
+}
+
+
+static njs_regexp_flags_t
+njs_regexp_flags(u_char **start, u_char *end, nxt_bool_t bound)
+{
+    u_char              *p;
+    njs_regexp_flags_t  flags, flag;
+
+    flags = 0;
+
+    for (p = *start; p < end; p++) {
+
+        switch (*p) {
+
+        case 'g':
+            flag = NJS_REGEXP_GLOBAL;
+            break;
+
+        case 'i':
+            flag = NJS_REGEXP_IGNORE_CASE;
+            break;
+
+        case 'm':
+            flag = NJS_REGEXP_MULTILINE;
+            break;
+
+        default:
+            if (bound) {
+                return NJS_REGEXP_INVALID_FLAG;
+            }
+
+            goto done;
+        }
+
+        if (nxt_slow_path((flags & flag) != 0)) {
+            return NJS_REGEXP_INVALID_FLAG;
+        }
+
+        flags |= flag;
+    }
+
+done:
+
+    *start = p;
+
+    return flags;
 }
 
 
 njs_regexp_pattern_t *
-njs_regexp_pattern_create(njs_vm_t *vm, nxt_str_t *source,
+njs_regexp_pattern_create(njs_vm_t *vm, u_char *start, size_t length,
     njs_regexp_flags_t flags)
 {
     int                   options, ret;
-    u_char                *p;
+    u_char                *p, *end;
+    size_t                size;
     njs_regexp_pattern_t  *pattern;
 
-    /* TODO: pcre_malloc */
+    size = 1;  /* A trailing "/". */
+    size += ((flags & NJS_REGEXP_GLOBAL) != 0);
+    size += ((flags & NJS_REGEXP_IGNORE_CASE) != 0);
+    size += ((flags & NJS_REGEXP_MULTILINE) != 0);
 
     pattern = nxt_mem_cache_alloc(vm->mem_cache_pool,
-                            sizeof(njs_regexp_pattern_t) + source->len + 1);
+                                  sizeof(njs_regexp_pattern_t)
+                                  + 1 + length + size + 1);
     if (nxt_slow_path(pattern == NULL)) {
         return NULL;
     }
 
     p = (u_char *) pattern + sizeof(njs_regexp_pattern_t);
     pattern->source = p;
+    pattern->next = NULL;
+    pattern->flags = size;
 
-    p = memcpy(p, source->data, source->len);
-    p += source->len;
-    *p = '\0';
+    *p++ = '/';
+    p = memcpy(p, start, length);
+    p += length;
+    end = p;
+    *p++ = '\0';
 
     pattern->ncaptures = 0;
 
     pattern->global = ((flags & NJS_REGEXP_GLOBAL) != 0);
+    if (pattern->global) {
+        *p++ = 'g';
+    }
 
 #ifdef PCRE_JAVASCRIPT_COMPAT
     /* JavaScript compatibility has been introduced in PCRE-7.7. */
@@ -82,18 +223,22 @@ njs_regexp_pattern_create(njs_vm_t *vm, nxt_str_t *source,
     options = 0;
 #endif
 
-    if ((flags & NJS_REGEXP_IGNORE_CASE) != 0) {
-         pattern->ignore_case = 1;
+    pattern->ignore_case = ((flags & NJS_REGEXP_IGNORE_CASE) != 0);
+    if (pattern->ignore_case) {
+        *p++ = 'i';
          options |= PCRE_CASELESS;
     }
 
-    if ((flags & NJS_REGEXP_MULTILINE) != 0) {
-         pattern->multiline = 1;
+    pattern->multiline = ((flags & NJS_REGEXP_MULTILINE) != 0);
+    if (pattern->multiline) {
+        *p++ = 'm';
          options |= PCRE_MULTILINE;
     }
 
+    *p++ = '\0';
+
     ret = njs_regexp_pattern_compile(&pattern->code[0], &pattern->extra[0],
-                                     pattern->source, options);
+                                     &pattern->source[1], options);
 
     if (nxt_slow_path(ret < 0)) {
         return NULL;
@@ -102,26 +247,30 @@ njs_regexp_pattern_create(njs_vm_t *vm, nxt_str_t *source,
     pattern->ncaptures = ret;
 
     ret = njs_regexp_pattern_compile(&pattern->code[1], &pattern->extra[1],
-                                     pattern->source, options | PCRE_UTF8);
+                                     &pattern->source[1], options | PCRE_UTF8);
 
-    if (nxt_slow_path(ret < 0)) {
+    if (nxt_fast_path(ret >= 0)) {
+
+        if (nxt_slow_path((unsigned) ret != pattern->ncaptures)) {
+            nxt_thread_log_error(NXT_LOG_ERR, "numbers of captures in byte "
+                           "and UTF-8 versions of RegExp \"%s\" vary: %d vs %d",
+                           &pattern->source[1], pattern->ncaptures, ret);
 
-        if (ret == NXT_DECLINED) {
-            return pattern;
+            njs_regexp_pattern_free(pattern);
+            return NULL;
         }
 
+    } else if (ret != NXT_DECLINED) {
+        njs_regexp_pattern_free(pattern);
         return NULL;
     }
 
-    if (nxt_fast_path((unsigned) ret == pattern->ncaptures)) {
-        return pattern;
-    }
+    *end = '/';
 
-    nxt_thread_log_error(NXT_LOG_ERR, "numbers of byte and UTF-8 captures "
-                         "in RegExp \"%s\" vary: %d vs %d",
-                         pattern->source, pattern->ncaptures, ret);
+    pattern->next = vm->pattern;
+    vm->pattern = pattern;
 
-    return NULL;
+    return pattern;
 }
 
 
@@ -179,10 +328,23 @@ njs_regexp_pattern_compile(pcre **code, pcre_extra **extra, u_char *source,
 }
 
 
-njs_ret_t
-njs_regexp_function(njs_vm_t *vm, njs_param_t *param)
+njs_regexp_t *
+njs_regexp_alloc(njs_vm_t *vm, njs_regexp_pattern_t *pattern)
 {
-    return NXT_ERROR;
+    njs_regexp_t  *regexp;
+
+    regexp = nxt_mem_cache_align(vm->mem_cache_pool, sizeof(njs_value_t),
+                                 sizeof(njs_regexp_t));
+
+    if (nxt_fast_path(regexp != NULL)) {
+        nxt_lvlhsh_init(&regexp->object.hash);
+        nxt_lvlhsh_init(&regexp->object.shared_hash);
+        regexp->object.__proto__ = &vm->prototypes[NJS_PROTOTYPE_REGEXP];
+        regexp->last_index = 0;
+        regexp->pattern = pattern;
+    }
+
+    return regexp;
 }
 
 
@@ -207,63 +369,76 @@ njs_regexp_prototype_last_index(njs_vm_t *vm, njs_value_t *value)
 
 
 static njs_ret_t
-njs_regexp_prototype_ignore_case(njs_vm_t *vm, njs_value_t *regexp)
+njs_regexp_prototype_global(njs_vm_t *vm, njs_value_t *value)
 {
     njs_regexp_pattern_t  *pattern;
 
-    pattern = regexp->data.u.regexp->pattern;
-    vm->retval = pattern->ignore_case ? njs_value_true : njs_value_false;
-    njs_release(vm, regexp);
+    pattern = value->data.u.regexp->pattern;
+    vm->retval = pattern->global ? njs_value_true : njs_value_false;
+    njs_release(vm, value);
 
     return NXT_OK;
 }
 
 
 static njs_ret_t
-njs_regexp_prototype_global(njs_vm_t *vm, njs_value_t *regexp)
+njs_regexp_prototype_ignore_case(njs_vm_t *vm, njs_value_t *value)
 {
     njs_regexp_pattern_t  *pattern;
 
-    pattern = regexp->data.u.regexp->pattern;
-    vm->retval = pattern->global ? njs_value_true : njs_value_false;
-    njs_release(vm, regexp);
+    pattern = value->data.u.regexp->pattern;
+    vm->retval = pattern->ignore_case ? njs_value_true : njs_value_false;
+    njs_release(vm, value);
 
     return NXT_OK;
 }
 
 
 static njs_ret_t
-njs_regexp_prototype_multiline(njs_vm_t *vm, njs_value_t *regexp)
+njs_regexp_prototype_multiline(njs_vm_t *vm, njs_value_t *value)
 {
     njs_regexp_pattern_t  *pattern;
 
-    pattern = regexp->data.u.regexp->pattern;
+    pattern = value->data.u.regexp->pattern;
     vm->retval = pattern->multiline ? njs_value_true : njs_value_false;
-    njs_release(vm, regexp);
+    njs_release(vm, value);
 
     return NXT_OK;
 }
 
 
 static njs_ret_t
-njs_regexp_prototype_source(njs_vm_t *vm, njs_value_t *regexp)
+njs_regexp_prototype_source(njs_vm_t *vm, njs_value_t *value)
 {
-    size_t                length;
     u_char                *source;
+    size_t                length, size;
     njs_regexp_pattern_t  *pattern;
 
-    pattern = regexp->data.u.regexp->pattern;
+    pattern = value->data.u.regexp->pattern;
+    /* Skip starting "/". */
+    source = pattern->source + 1;
+
+    size = strlen((char *) source) - pattern->flags;
+    length = nxt_utf8_length(source, size);
+
+    return njs_string_create(vm, &vm->retval, source, size, length);
+}
+
+
+static njs_ret_t
+njs_regexp_prototype_to_string(njs_vm_t *vm, njs_param_t *param)
+{
+    u_char                *source;
+    size_t                length, size;
+    njs_regexp_pattern_t  *pattern;
 
-    /*
-     * The pattern source is stored not as value but as C string even
-     * without length, because retrieving it is very seldom operation.
-     */
+    pattern = param->object->data.u.regexp->pattern;
     source = pattern->source;
 
-    /* TODO: can regexp string be UTF-8? */
-    length = strlen((char *) source);
+    size = strlen((char *) source);
+    length = nxt_utf8_length(source, size);
 
-    return njs_string_create(vm, &vm->retval, source, length, length);
+    return njs_string_create(vm, &vm->retval, source, size, length);
 }
 
 
@@ -514,14 +689,14 @@ static const njs_object_prop_t  njs_regexp_prototype_properties[] =
       njs_string("lastIndex"),
       NJS_NATIVE_GETTER, 0, 0, 0, },
 
-    { njs_getter(njs_regexp_prototype_ignore_case),
-      njs_string("ignoreCase"),
-      NJS_NATIVE_GETTER, 0, 0, 0, },
-
     { njs_getter(njs_regexp_prototype_global),
       njs_string("global"),
       NJS_NATIVE_GETTER, 0, 0, 0, },
 
+    { njs_getter(njs_regexp_prototype_ignore_case),
+      njs_string("ignoreCase"),
+      NJS_NATIVE_GETTER, 0, 0, 0, },
+
     { njs_getter(njs_regexp_prototype_multiline),
       njs_string("multiline"),
       NJS_NATIVE_GETTER, 0, 0, 0, },
@@ -530,6 +705,10 @@ static const njs_object_prop_t  njs_regexp_prototype_properties[] =
       njs_string("source"),
       NJS_NATIVE_GETTER, 0, 0, 0, },
 
+    { njs_native_function(njs_regexp_prototype_to_string, 0),
+      njs_string("toString"),
+      NJS_METHOD, 0, 0, 0, },
+
     { njs_native_function(njs_regexp_prototype_test, 0),
       njs_string("test"),
       NJS_METHOD, 0, 0, 0, },
@@ -546,3 +725,18 @@ njs_regexp_prototype_hash(njs_vm_t *vm, nxt_lvlhsh_t *hash)
     return njs_object_hash_create(vm, hash, njs_regexp_prototype_properties,
                                   nxt_nitems(njs_regexp_prototype_properties));
 }
+
+
+void
+njs_regexp_pattern_free(njs_regexp_pattern_t *pattern)
+{
+    while (pattern != NULL) {
+        pcre_free_study(pattern->extra[0]);
+        pcre_free(pattern->code[0]);
+
+        pcre_free_study(pattern->extra[1]);
+        pcre_free(pattern->code[1]);
+
+        pattern = pattern->next;
+    }
+}
index 8ca7134a05ef0ae9bceae26fcb2777a8f05753da..62361337367d19287f0274658653959ebed0f42c 100644 (file)
@@ -9,9 +9,10 @@
 
 
 typedef enum {
-    NJS_REGEXP_IGNORE_CASE = 1,
-    NJS_REGEXP_GLOBAL      = 2,
-    NJS_REGEXP_MULTILINE   = 4,
+    NJS_REGEXP_INVALID_FLAG = -1,
+    NJS_REGEXP_GLOBAL       =  1,
+    NJS_REGEXP_IGNORE_CASE  =  2,
+    NJS_REGEXP_MULTILINE    =  4,
 } njs_regexp_flags_t;
 
 
@@ -24,20 +25,23 @@ struct njs_regexp_s {
     njs_regexp_pattern_t  *pattern;
 
     /*
-     * This string value can be not aligned since
+     * This string value can be unaligned since
      * it never used in nJSVM operations.
      */
     njs_value_t           string;
 };
 
 
-njs_regexp_t *njs_regexp_alloc(njs_vm_t *vm, njs_regexp_pattern_t *pattern);
-njs_regexp_pattern_t *njs_regexp_pattern_create(njs_vm_t *vm,
-    nxt_str_t *source, njs_regexp_flags_t flags);
 njs_ret_t njs_regexp_function(njs_vm_t *vm, njs_param_t *param);
+nxt_int_t njs_regexp_literal(njs_vm_t *vm, njs_parser_t *parser,
+    njs_value_t *value);
+njs_regexp_pattern_t *njs_regexp_pattern_create(njs_vm_t *vm,
+    u_char *string, size_t length, njs_regexp_flags_t flags);
+njs_regexp_t *njs_regexp_alloc(njs_vm_t *vm, njs_regexp_pattern_t *pattern);
 njs_ret_t njs_regexp_prototype_exec(njs_vm_t *vm, njs_param_t *param);
 nxt_int_t njs_regexp_function_hash(njs_vm_t *vm, nxt_lvlhsh_t *hash);
 nxt_int_t njs_regexp_prototype_hash(njs_vm_t *vm, nxt_lvlhsh_t *hash);
+void njs_regexp_pattern_free(njs_regexp_pattern_t *pattern);
 
 
 #endif /* _NJS_REGEXP_H_INCLUDED_ */
index b9b438bb80e970487ee733388c92deb692ba7abd..ee4f47e87d750d6c73b509894c288be85fea0ee9 100644 (file)
 
 
 struct njs_regexp_pattern_s {
-    pcre        *code[2];
-    pcre_extra  *extra[2];
-    u_char      *source;
+    pcre                  *code[2];
+    pcre_extra            *extra[2];
+    /*
+     * A pattern source is used by RegExp.toString() method and
+     * RegExp.source property.  So it is is stored in form "/pattern/flags"
+     * and as zero-terminated C string but not as value, because retrieving
+     * it is very seldom operation.  To get just a pattern string for
+     * RegExp.source property a length of flags part "/flags" is stored
+     * in flags field.
+     */
+    u_char                *source;
+    njs_regexp_pattern_t  *next;
 
 #if (NXT_64BIT)
-    uint32_t    ncaptures;
-    uint8_t     global;       /* 1 bit */
-    uint8_t     ignore_case;  /* 1 bit */
-    uint8_t     multiline;    /* 1 bit */
+    uint32_t              ncaptures;
+    uint8_t               flags;        /* 2 bits */
+
+    uint8_t               global;       /* 1 bit */
+    uint8_t               ignore_case;  /* 1 bit */
+    uint8_t               multiline;    /* 1 bit */
 #else
-    uint16_t    ncaptures;
-    uint8_t     global;       /* 1 bit */
-    uint8_t     ignore_case:1;
-    uint8_t     multiline:1;
+    uint16_t              ncaptures;
+    uint8_t               flags;        /* 2 bits */
+    uint8_t               global:1;
+    uint8_t               ignore_case:1;
+    uint8_t               multiline:1;
 #endif
 };
 
index 416e4371ba3b15785e8c110cec521e99b3a1b66e..38dcdaa9896cb3d845efa85408e5f637703f54da 100644 (file)
@@ -70,7 +70,7 @@ njs_shared_objects_create(njs_vm_t *vm)
         njs_number_function,
         njs_string_ctor_function,
         njs_stub_function,
-        njs_stub_function,
+        njs_regexp_function,
         njs_stub_function,
     };
 
index 8a5cfc6caa7e55d44b511c109eb08870c3fd9b16..dd77e23e4f725c3bb1c7ec57640c48c627f14c20 100644 (file)
@@ -1191,6 +1191,7 @@ njs_string_prototype_search(njs_vm_t *vm, njs_param_t *param)
     int                   ret;
     nxt_int_t             index;
     nxt_uint_t            n;
+    njs_value_t           *args;
     njs_string_prop_t     string;
     njs_regexp_pattern_t  *pattern;
     int                   captures[3];
@@ -1200,14 +1201,45 @@ njs_string_prototype_search(njs_vm_t *vm, njs_param_t *param)
     index = 0;
 
     if (param->nargs != 0) {
-        /*
-         * TODO: convert args[0] to RegExp:
-         *       RegExp    > RegExp
-         *       String    > RegExp
-         *       undefined > //
-         *       otherwise > String > RegExp
-         */
-        pattern = param->args[0].data.u.regexp->pattern;
+
+        args = param->args;
+
+        switch (args[0].type) {
+
+        case NJS_REGEXP:
+            pattern = args[0].data.u.regexp->pattern;
+            break;
+
+        case NJS_STRING:
+            (void) njs_string_prop(&string, &args[0]);
+
+            pattern = njs_regexp_pattern_create(vm, string.start,
+                                                string.length, 0);
+            if (nxt_slow_path(pattern == NULL)) {
+                return NXT_ERROR;
+            }
+
+            break;
+
+        case NJS_VOID:
+            /* STUB: precompiled "/(?:)/" pattern. */
+            string.start = (u_char *) "(?:)";
+            string.length = sizeof("(?:)") - 1;
+
+            pattern = njs_regexp_pattern_create(vm, string.start,
+                                                string.length, 0);
+            if (nxt_slow_path(pattern == NULL)) {
+                return NXT_ERROR;
+            }
+
+            break;
+
+        default:
+            /* STUB: convert args[0] to String, then to RegExp. */
+            vm->exception = &njs_exception_type_error;
+
+            return NXT_ERROR;
+        }
 
         index = -1;
 
@@ -1290,8 +1322,10 @@ njs_string_prototype_match(njs_vm_t *vm, njs_param_t *param)
 
         if (n != 0) {
             utf8 = 2;
+
         } else if (string.length != 0) {
             utf8 = 1;
+
         } else {
             utf8 = 1;
         }
index 62e51f025f11549db5de3fc8dad53353c10405bd..bc984478bc80ab9762557f1519904d456c31710c 100644 (file)
@@ -15,7 +15,6 @@
 #include <nxt_mem_cache_pool.h>
 #include <njscript.h>
 #include <njs_vm.h>
-#include <njs_regexp.h>
 #include <njs_variable.h>
 #include <njs_parser.h>
 #include <string.h>
index 1a5c8e38bf6a181c87dd54c4a634ed3eb26fd0b0..dc54fc320f82a031affc2aec34c0819e23530d66 100644 (file)
@@ -315,6 +315,11 @@ njs_is_function(value)                                                        \
     ((value)->type == NJS_FUNCTION)
 
 
+#define                                                                       \
+njs_is_regexp(value)                                                          \
+    ((value)->type == NJS_REGEXP)
+
+
 #define                                                                       \
 njs_is_native(value)                                                          \
     ((value)->type == NJS_NATIVE)
@@ -688,6 +693,7 @@ struct njs_vm_s {
 
     njs_vm_shared_t          *shared;
     njs_parser_t             *parser;
+    njs_regexp_pattern_t     *pattern;
 };
 
 
index 0f3c62365f890be44b113865df5c37650c011ef0..ee5d9fffafa17118951046b72979668cb106aa31 100644 (file)
@@ -142,6 +142,8 @@ njs_vm_create(nxt_mem_cache_pool_t *mcp, njs_vm_shared_t **shared,
 void
 njs_vm_destroy(njs_vm_t *vm)
 {
+    njs_regexp_pattern_free(vm->pattern);
+
     nxt_mem_cache_pool_destroy(vm->mem_cache_pool);
 }
 
index 9a3fb2e6fc9b825c136590ea402e9abd44e6f127..f431925478bb9f08ec91860ea035b3ec99ceaee7 100644 (file)
@@ -2114,9 +2114,18 @@ static njs_unit_test_t  njs_test[] =
     { nxt_string("'abcdefgh'.search()"),
       nxt_string("0") },
 
+    { nxt_string("'abcdefgh'.search('')"),
+      nxt_string("0") },
+
+    { nxt_string("'abcdefgh'.search(undefined)"),
+      nxt_string("0") },
+
     { nxt_string("'abcdefgh'.search(/def/)"),
       nxt_string("3") },
 
+    { nxt_string("'abcdefgh'.search('def')"),
+      nxt_string("3") },
+
     { nxt_string("''.match(/^$/) +''"),
       nxt_string("") },
 
@@ -2357,6 +2366,8 @@ static njs_unit_test_t  njs_test[] =
                  "b = a(); b(2)"),
       nxt_string("3") },
 
+    /* RegExp. */
+
     { nxt_string("/^$/.test('')"),
       nxt_string("true") },
 
@@ -2387,12 +2398,15 @@ static njs_unit_test_t  njs_test[] =
     { nxt_string("var a = /^$/.exec(''); a.length +' '+ a"),
       nxt_string("1 ") },
 
-    { nxt_string("var r = /бв/ig; var a = r.exec('АБВ'); r.lastIndex +' '+ a"),
-      nxt_string("3 БВ") },
+    { nxt_string("var r = /бв/ig;"
+                 "var a = r.exec('АБВ');"
+                 "r.lastIndex +' '+ a +' '+ "
+                 "r.source +' '+ r.source.length +' '+ r"),
+      nxt_string("3 БВ бв 2 /бв/gi") },
 
     { nxt_string("var r = /\\x80/g; r.exec('\\u0081\\u0080'.toBytes());"
-                 "r.lastIndex"),
-      nxt_string("1") },
+                 "r.lastIndex +' '+ r.source +' '+ r.source.length +' '+ r"),
+      nxt_string("1 \\x80 4 /\\x80/g") },
 
     /*
      * It seems that "/стоп/ig" fails on early PCRE versions.
@@ -2401,8 +2415,8 @@ static njs_unit_test_t  njs_test[] =
 
     { nxt_string("var r = /Стоп/ig;"
                  "var a = r.exec('АБВДЕЁЖЗИКЛМНОПРСТУФХЦЧШЩЬЫЪЭЮЯСТОП');"
-                 "r.lastIndex +' '+ a"),
-      nxt_string("35 СТОП") },
+                 "r.lastIndex +' '+ a +' '+ r.source +' '+ r"),
+      nxt_string("35 СТОП Стоп /Стоп/gi") },
 
     { nxt_string("var r = /quick\\s(brown).+?(jumps)/ig;"
                  "var a = r.exec('The Quick Brown Fox Jumps Over The Lazy Dog')"
@@ -2417,6 +2431,24 @@ static njs_unit_test_t  njs_test[] =
     { nxt_string("var r = /LS/i.exec(false); r[0]"),
       nxt_string("ls") },
 
+    { nxt_string("var r = /./; r"),
+      nxt_string("/./") },
+
+    { nxt_string("var r = new RegExp(); r"),
+      nxt_string("/(?:)/") },
+
+    { nxt_string("var r = new RegExp('.'); r"),
+      nxt_string("/./") },
+
+    { nxt_string("var r = new RegExp('.', 'ig'); r"),
+      nxt_string("/./gi") },
+
+    { nxt_string("var r = new RegExp('abc'); r.test('00abc11')"),
+      nxt_string("true") },
+
+    { nxt_string("var r = new RegExp('abc', 'i'); r.test('00ABC11')"),
+      nxt_string("true") },
+
     /* Non-standard ECMA-262 features. */
 
     /* 0x10400 is not a surrogate pair of 0xD801 and 0xDC00. */