]> git.kaiwu.me - njs.git/commitdiff
String.replace() function.
authorIgor Sysoev <igor@sysoev.ru>
Fri, 23 Sep 2016 08:59:48 +0000 (11:59 +0300)
committerIgor Sysoev <igor@sysoev.ru>
Fri, 23 Sep 2016 08:59:48 +0000 (11:59 +0300)
13 files changed:
njs/njs_function.h
njs/njs_regexp.c
njs/njs_regexp_pattern.h
njs/njs_string.c
njs/njs_string.h
njs/njs_vm.c
njs/test/njs_unit_test.c
nxt/auto/clang
nxt/nxt_array.c
nxt/nxt_array.h
nxt/nxt_pcre.c
nxt/nxt_pcre.h
nxt/nxt_regex.h

index 2fb163fb4ad8ed59e96cb4ce8f2e1a47bc591dbd..0d2bd0881baf4a74fa03c54682152d2984f4420d 100644 (file)
 #define NJS_SKIP_ARG               1
 #define NJS_NUMBER_ARG             2
 #define NJS_INTEGER_ARG            3
-#define NJS_STRING_OBJECT_ARG      4
-#define NJS_STRING_ARG             5
-#define NJS_OBJECT_ARG             6
-#define NJS_REGEXP_ARG             7
-#define NJS_DATE_ARG               8
+#define NJS_STRING_ARG             4
+#define NJS_OBJECT_ARG             5
+#define NJS_STRING_OBJECT_ARG      6
+#define NJS_FUNCTION_ARG           7
+#define NJS_REGEXP_ARG             8
+#define NJS_DATE_ARG               9
 
 
 struct njs_function_lambda_s {
index 157d454bb830e3f9f625454c2b9e611d02aad3db..4d51c1290094fe50d749b82fa7eb2466edd0789f 100644 (file)
@@ -609,7 +609,7 @@ njs_regexp_prototype_test(njs_vm_t *vm, njs_value_t *args, nxt_uint_t nargs,
         if (ret >= 0) {
             retval = &njs_value_true;
 
-        } else if (ret != NGX_REGEX_NOMATCH) {
+        } else if (ret != NXT_REGEX_NOMATCH) {
             return NXT_ERROR;
         }
     }
@@ -683,7 +683,7 @@ njs_regexp_prototype_exec(njs_vm_t *vm, njs_value_t *args, nxt_uint_t nargs,
                                           utf8);
         }
 
-        if (nxt_slow_path(ret != NGX_REGEX_NOMATCH)) {
+        if (nxt_slow_path(ret != NXT_REGEX_NOMATCH)) {
             nxt_regex_match_data_free(match_data, vm->regex_context);
 
             return NXT_ERROR;
index 99bd302e1f9e1d6f9d55ab658ad1052c971070b2..4e16d89f80a68d17e234e2cf2bbbb42bc90f9aab 100644 (file)
 #include <pcre.h>
 
 
+typedef enum {
+    NJS_REGEXP_BYTE = 0,
+    NJS_REGEXP_UTF8,
+} njs_regexp_utf8_t;
+
+
 struct njs_regexp_pattern_s {
     nxt_regex_t           regex[2];
 
index b5f2a573b3ff4fcd56d89f4c698c0017c2a6258c..9905c7b02dfffa3519613b4042b5f3ec448ac95d 100644 (file)
 #include <string.h>
 
 
+typedef struct {
+    u_char                     *start;
+    size_t                     size;
+    njs_value_t                value;
+} njs_string_replace_part_t;
+
+
+#define NJS_SUBST_COPY        255
+#define NJS_SUBST_PRECEDING   254
+#define NJS_SUBST_FOLLOWING   253
+
+
+typedef struct {
+     uint32_t  type;
+     uint32_t  size;
+     u_char    *start;
+} njs_string_subst_t;
+
+
+typedef struct {
+    union {
+        njs_continuation_t     cont;
+        u_char                 padding[NJS_CONTINUATION_SIZE];
+    } u;
+    /*
+     * This retval value must be aligned so the continuation
+     * is padded to aligned size.
+     */
+    njs_value_t                retval;
+
+    nxt_array_t                parts;
+    njs_string_replace_part_t  array[3];
+    njs_string_replace_part_t  *part;
+
+    nxt_array_t                *substitutions;
+    njs_function_t             *function;
+
+    nxt_regex_match_data_t     *match_data;
+
+    njs_utf8_t                 utf8:8;
+    njs_regexp_utf8_t          type:8;
+} njs_string_replace_t;
+
+
 static nxt_noinline void njs_string_slice_prop(njs_string_prop_t *string,
     njs_slice_prop_t *slice, njs_value_t *args, nxt_uint_t nargs);
 static nxt_noinline void njs_string_slice_args(njs_slice_prop_t *slice,
@@ -45,6 +89,28 @@ static njs_ret_t njs_string_match_multiple(njs_vm_t *vm, njs_value_t *args,
     njs_regexp_pattern_t *pattern);
 static njs_ret_t njs_string_split_part_add(njs_vm_t *vm, njs_array_t *array,
     u_char *start, size_t size, nxt_uint_t utf8);
+static njs_ret_t njs_string_replace_regexp(njs_vm_t *vm, njs_value_t *args,
+    njs_string_replace_t *r);
+static njs_ret_t njs_string_replace_regexp_function(njs_vm_t *vm,
+    njs_value_t *args, njs_string_replace_t *r, int *captures, nxt_uint_t n);
+static njs_ret_t njs_string_replace_regexp_continuation(njs_vm_t *vm,
+    njs_value_t *args, nxt_uint_t nargs, njs_index_t unused);
+static njs_ret_t njs_string_replace_regexp_join(njs_vm_t *vm,
+    njs_string_replace_t *r);
+static njs_ret_t njs_string_replace_search(njs_vm_t *vm, njs_value_t *args,
+    njs_string_replace_t *r);
+static njs_ret_t njs_string_replace_search_function(njs_vm_t *vm,
+    njs_value_t *args, njs_string_replace_t *r);
+static njs_ret_t njs_string_replace_search_continuation(njs_vm_t *vm,
+    njs_value_t *args, nxt_uint_t nargs, njs_index_t unused);
+static njs_ret_t njs_string_replace_parse(njs_vm_t *vm,
+    njs_string_replace_t *r, u_char *p, u_char *end, size_t size,
+    nxt_uint_t ncaptures);
+static njs_ret_t njs_string_replace_substitute(njs_vm_t *vm,
+    njs_string_replace_t *r, int *captures);
+static njs_ret_t njs_string_replace_join(njs_vm_t *vm, njs_string_replace_t *r);
+static void njs_string_replacement_copy(njs_string_replace_part_t *string,
+    const njs_value_t *value);
 static njs_ret_t njs_string_encode(njs_vm_t *vm, njs_value_t *value,
     const uint32_t *escape);
 static njs_ret_t njs_string_decode(njs_vm_t *vm, njs_value_t *value,
@@ -1471,7 +1537,7 @@ njs_string_prototype_search(njs_vm_t *vm, njs_value_t *args, nxt_uint_t nargs,
                 captures = nxt_regex_captures(vm->single_match_data);
                 index = njs_string_index(&string, captures[0]);
 
-            } else if (ret != NGX_REGEX_NOMATCH) {
+            } else if (ret != NXT_REGEX_NOMATCH) {
                 return NXT_ERROR;
             }
         }
@@ -1629,7 +1695,7 @@ njs_string_match_multiple(njs_vm_t *vm, njs_value_t *args,
 
                 array->length++;
 
-            } else if (ret == NGX_REGEX_NOMATCH) {
+            } else if (ret == NXT_REGEX_NOMATCH) {
                 break;
 
             } else {
@@ -1760,7 +1826,7 @@ njs_string_prototype_split(njs_vm_t *vm, njs_value_t *args, nxt_uint_t nargs,
                     p = start + captures[0];
                     next = start + captures[1];
 
-                } else if (ret == NGX_REGEX_NOMATCH) {
+                } else if (ret == NXT_REGEX_NOMATCH) {
                     p = (u_char *) end;
                     next = (u_char *) end + 1;
 
@@ -1837,6 +1903,665 @@ njs_string_split_part_add(njs_vm_t *vm, njs_array_t *array, u_char *start,
 }
 
 
+/*
+ * String.replace([regexp|string[, string|function]])
+ */
+
+static njs_ret_t
+njs_string_prototype_replace(njs_vm_t *vm, njs_value_t *args, nxt_uint_t nargs,
+    njs_index_t unused)
+{
+    u_char                *p, *start, *end;
+    njs_ret_t             ret;
+    nxt_uint_t            ncaptures;
+    nxt_regex_t           *regex;
+    njs_string_prop_t     string;
+    njs_string_replace_t  *r;
+
+    if (nargs == 1) {
+        goto original;
+    }
+
+    (void) njs_string_prop(&string, &args[0]);
+
+    if (string.size == 0) {
+        goto original;
+    }
+
+    r = njs_continuation(vm->frame);
+
+    r->utf8 = NJS_STRING_BYTE;
+    r->type = NJS_REGEXP_BYTE;
+
+    if (string.length != 0) {
+        r->utf8 = NJS_STRING_ASCII;
+        r->type = NJS_REGEXP_UTF8;
+
+        if (string.length != string.size) {
+            r->utf8 = NJS_STRING_UTF8;
+        }
+    }
+
+    if (njs_is_regexp(&args[1])) {
+        regex = &args[1].data.u.regexp->pattern->regex[r->type];
+
+        if (!nxt_regex_is_valid(regex)) {
+            goto original;
+        }
+
+        ncaptures = nxt_regex_ncaptures(regex);
+
+    } else {
+        regex = NULL;
+        ncaptures = 1;
+    }
+
+    /* This cannot fail. */
+    r->part = nxt_array_init(&r->parts, &r->array,
+                             3, sizeof(njs_string_replace_part_t),
+                             &njs_array_mem_proto, vm->mem_cache_pool);
+
+    r->substitutions = NULL;
+    r->function = NULL;
+
+    /* A literal replacement is stored in the second part. */
+
+    if (nargs == 2) {
+        njs_string_replacement_copy(&r->part[1], &njs_string_void);
+
+    } else if (njs_is_string(&args[2])) {
+        njs_string_replacement_copy(&r->part[1], &args[2]);
+
+        start = r->part[1].start;
+
+        if (start == NULL) {
+            start = r->part[1].value.short_string.start;
+        }
+
+        end = start + r->part[1].size;
+
+        for (p = start; p < end; p++) {
+            if (*p == '$') {
+                ret = njs_string_replace_parse(vm, r, p, end, p - start,
+                                               ncaptures);
+                if (nxt_slow_path(ret != NXT_OK)) {
+                    return ret;
+                }
+
+                /* Reset parts array to the subject string only. */
+                r->parts.items = 1;
+
+                break;
+            }
+        }
+
+    } else {
+        r->function = args[2].data.u.function;
+    }
+
+    r->part[0].start = string.start;
+    r->part[0].size = string.size;
+    njs_set_invalid(&r->part[0].value);
+
+    if (regex != NULL) {
+        r->match_data = nxt_regex_match_data(regex, vm->regex_context);
+        if (nxt_slow_path(r->match_data == NULL)) {
+            return NXT_ERROR;
+        }
+
+        return njs_string_replace_regexp(vm, args, r);
+    }
+
+    return njs_string_replace_search(vm, args, r);
+
+original:
+
+    njs_string_copy(&vm->retval, &args[0]);
+
+    return NXT_OK;
+}
+
+
+static njs_ret_t
+njs_string_replace_regexp(njs_vm_t *vm, njs_value_t *args,
+    njs_string_replace_t *r)
+{
+    int                        *captures;
+    njs_ret_t                  ret;
+    njs_regexp_pattern_t       *pattern;
+    njs_string_replace_part_t  *part;
+
+    pattern = args[1].data.u.regexp->pattern;
+
+    do {
+        ret = njs_regexp_match(vm, &pattern->regex[r->type],
+                               r->part[0].start, r->part[0].size,
+                               r->match_data);
+
+        if (ret >= 0) {
+            captures = nxt_regex_captures(r->match_data);
+
+            if (r->substitutions != NULL) {
+                ret = njs_string_replace_substitute(vm, r, captures);
+                if (nxt_slow_path(ret != NXT_OK)) {
+                    return ret;
+                }
+
+                if (!pattern->global) {
+                    return njs_string_replace_regexp_join(vm, r);
+                }
+
+            } else {
+                if (r->part != r->parts.start) {
+                    r->part = nxt_array_add(&r->parts, &njs_array_mem_proto,
+                                            vm->mem_cache_pool);
+                    if (nxt_slow_path(r->part == NULL)) {
+                        return NXT_ERROR;
+                    }
+
+                    r->part = nxt_array_add(&r->parts, &njs_array_mem_proto,
+                                            vm->mem_cache_pool);
+                    if (nxt_slow_path(r->part == NULL)) {
+                        return NXT_ERROR;
+                    }
+
+                    r->part -= 2;
+                }
+
+                r->part[2].start = r->part[0].start + captures[1];
+                r->part[2].size = r->part[0].size - captures[1];
+                njs_set_invalid(&r->part[2].value);
+
+                if (r->function != NULL) {
+                    return njs_string_replace_regexp_function(vm, args, r,
+                                                              captures, ret);
+                }
+
+                r->part[0].size = captures[0];
+
+                if (!pattern->global) {
+                    return njs_string_replace_regexp_join(vm, r);
+                }
+
+                /* A literal replacement is stored in the second part. */
+                part = r->parts.start;
+                r->part[1] = part[1];
+
+                r->part += 2;
+            }
+
+        } else if (ret == NXT_REGEX_NOMATCH) {
+            break;
+
+        } else {
+            return NXT_ERROR;
+        }
+
+    } while (r->part[0].size > 0);
+
+    if (r->part != r->parts.start) {
+        return njs_string_replace_regexp_join(vm, r);
+    }
+
+    nxt_regex_match_data_free(r->match_data, vm->regex_context);
+
+    nxt_array_destroy(&r->parts, &njs_array_mem_proto, vm->mem_cache_pool);
+
+    njs_string_copy(&vm->retval, &args[0]);
+
+    return NXT_OK;
+}
+
+
+static njs_ret_t
+njs_string_replace_regexp_function(njs_vm_t *vm, njs_value_t *args,
+    njs_string_replace_t *r, int *captures, nxt_uint_t n)
+{
+    u_char       *start;
+    size_t       size, length;
+    njs_ret_t    ret;
+    nxt_uint_t   i, k;
+    njs_value_t  *arguments;
+
+    r->u.cont.function = njs_string_replace_regexp_continuation;
+    njs_set_invalid(&r->retval);
+
+    arguments = nxt_mem_cache_alloc(vm->mem_cache_pool,
+                                    (n + 3) * sizeof(njs_value_t));
+    if (nxt_slow_path(arguments == NULL)) {
+        return NXT_ERROR;
+    }
+
+    arguments[0] = njs_value_void;
+
+    /* Matched substring and parenthesized submatch strings. */
+    for (k = 0, i = 1; i <= n; i++) {
+
+        start = r->part[0].start + captures[k];
+        size = captures[k + 1] - captures[k];
+        k += 2;
+
+        length = njs_string_length(start, size, r->utf8);
+
+        ret = njs_string_create(vm, &arguments[i], start, size, length);
+        if (nxt_slow_path(ret != NXT_OK)) {
+            return NXT_ERROR;
+        }
+    }
+
+    /* The offset of the matched substring. */
+    njs_number_set(&arguments[n + 1], captures[0]);
+
+    /* The whole string being examined. */
+    length = njs_string_length(r->part[0].start, r->part[0].size, r->utf8);
+
+    ret = njs_string_create(vm, &arguments[n + 2], r->part[0].start,
+                            r->part[0].size, length);
+
+    if (nxt_slow_path(ret != NXT_OK)) {
+        return NXT_ERROR;
+    }
+
+    r->part[0].size = captures[0];
+
+    return njs_function_apply(vm, r->function, arguments, n + 3,
+                              (njs_index_t) &r->retval);
+}
+
+
+static njs_ret_t
+njs_string_replace_regexp_continuation(njs_vm_t *vm, njs_value_t *args,
+    nxt_uint_t nargs, njs_index_t unused)
+{
+    njs_string_replace_t  *r;
+
+    r = njs_continuation(vm->frame);
+
+    if (njs_is_string(&r->retval)) {
+        njs_string_replacement_copy(&r->part[1], &r->retval);
+
+        if (args[1].data.u.regexp->pattern->global) {
+            r->part += 2;
+            return njs_string_replace_regexp(vm, args, r);
+        }
+
+        return njs_string_replace_regexp_join(vm, r);
+    }
+
+    nxt_regex_match_data_free(r->match_data, vm->regex_context);
+
+    vm->exception = &njs_exception_type_error;
+
+    return NXT_ERROR;
+}
+
+
+static njs_ret_t
+njs_string_replace_regexp_join(njs_vm_t *vm, njs_string_replace_t *r)
+{
+    nxt_regex_match_data_free(r->match_data, vm->regex_context);
+
+    return njs_string_replace_join(vm, r);
+}
+
+
+static njs_ret_t
+njs_string_replace_search(njs_vm_t *vm, njs_value_t *args,
+    njs_string_replace_t *r)
+{
+    int                captures[2];
+    u_char             *p, *end;
+    size_t             size;
+    njs_ret_t          ret;
+    njs_string_prop_t  search;
+
+    (void) njs_string_prop(&search, &args[1]);
+
+    p = r->part[0].start;
+    end = (p + r->part[0].size) - (search.size - 1);
+
+    do {
+        if (memcmp(p, search.start, search.size) == 0) {
+
+            if (r->substitutions != NULL) {
+                captures[0] = p - r->part[0].start;
+                captures[1] = captures[0] + search.size;
+
+                ret = njs_string_replace_substitute(vm, r, captures);
+                if (nxt_slow_path(ret != NXT_OK)) {
+                    return ret;
+                }
+
+            } else {
+                r->part[2].start = p + search.size;
+                size = p - r->part[0].start;
+                r->part[2].size = r->part[0].size - size - search.size;
+                r->part[0].size = size;
+                njs_set_invalid(&r->part[2].value);
+
+                if (r->function != NULL) {
+                    return njs_string_replace_search_function(vm, args, r);
+                }
+            }
+
+            return njs_string_replace_join(vm, r);
+        }
+
+        if (r->utf8 < 2) {
+            p++;
+
+        } else {
+            p = (u_char *) nxt_utf8_next(p, end);
+        }
+
+    } while (p < end);
+
+    njs_string_copy(&vm->retval, &args[0]);
+
+    return NXT_OK;
+}
+
+
+static njs_ret_t
+njs_string_replace_search_function(njs_vm_t *vm, njs_value_t *args,
+    njs_string_replace_t *r)
+{
+    njs_value_t  arguments[4];
+
+    r->u.cont.function = njs_string_replace_search_continuation;
+
+    arguments[0] = njs_value_void;
+
+    /* GC, args[0], args[1] */
+
+    /* Matched substring, it is the same as the args[1]. */
+    arguments[1] = args[1];
+
+    /* The offset of the matched substring. */
+    njs_number_set(&arguments[2], r->part[0].size);
+
+    /* The whole string being examined. */
+    arguments[3] = args[0];
+
+    return njs_function_apply(vm, r->function, arguments, 4,
+                              (njs_index_t) &r->retval);
+}
+
+
+static njs_ret_t
+njs_string_replace_search_continuation(njs_vm_t *vm, njs_value_t *args,
+    nxt_uint_t nargs, njs_index_t unused)
+{
+    njs_string_replace_t  *r;
+
+    r = njs_continuation(vm->frame);
+
+    if (njs_is_string(&r->retval)) {
+        njs_string_replacement_copy(&r->part[1], &r->retval);
+
+        return njs_string_replace_join(vm, r);
+    }
+
+    vm->exception = &njs_exception_type_error;
+
+    return NXT_ERROR;
+}
+
+
+static njs_ret_t
+njs_string_replace_parse(njs_vm_t *vm, njs_string_replace_t *r, u_char *p,
+    u_char *end, size_t size, nxt_uint_t ncaptures)
+{
+    u_char              c;
+    uint32_t            type;
+    njs_string_subst_t  *s;
+
+    r->substitutions = nxt_array_create(4, sizeof(njs_string_subst_t),
+                                     &njs_array_mem_proto, vm->mem_cache_pool);
+
+    if (nxt_slow_path(r->substitutions == NULL)) {
+        return NXT_ERROR;
+    }
+
+    s = NULL;
+
+    if (size == 0) {
+        goto skip;
+    }
+
+copy:
+
+    if (s == NULL) {
+        s = nxt_array_add(r->substitutions, &njs_array_mem_proto,
+                          vm->mem_cache_pool);
+        if (nxt_slow_path(s == NULL)) {
+            return NXT_ERROR;
+        }
+
+        s->type = NJS_SUBST_COPY;
+        s->size = size;
+        s->start = p - size;
+
+    } else {
+        s->size += size;
+    }
+
+skip:
+
+    while (p < end) {
+        size = 1;
+        c = *p++;
+
+        if (c != '$' || p == end) {
+            goto copy;
+        }
+
+        c = *p++;
+
+        if (c == '$') {
+            s = NULL;
+            goto copy;
+        }
+
+        size = 2;
+
+        if (c >= '0' && c <= '9') {
+            type = c - '0';
+
+            if (p < end) {
+                c = *p;
+
+                if (c >= '0' && c <= '9') {
+                    type = type * 10 + (c - '0');
+                    p++;
+                    size = 3;
+                }
+            }
+
+            if (type >= ncaptures) {
+                goto copy;
+            }
+
+            type *= 2;
+
+        } else if (c == '&') {
+            type = 0;
+
+        } else if (c == '`') {
+            type = NJS_SUBST_PRECEDING;
+
+        } else if (c == '\'') {
+            type = NJS_SUBST_FOLLOWING;
+
+        } else {
+            goto copy;
+        }
+
+        s = nxt_array_add(r->substitutions, &njs_array_mem_proto,
+                          vm->mem_cache_pool);
+        if (nxt_slow_path(s == NULL)) {
+            return NXT_ERROR;
+        }
+
+        s->type = type;
+        s = NULL;
+    }
+
+    return NXT_OK;
+}
+
+
+static njs_ret_t
+njs_string_replace_substitute(njs_vm_t *vm, njs_string_replace_t *r,
+    int *captures)
+{
+    uint32_t                   i, n, last;
+    njs_string_subst_t         *s;
+    njs_string_replace_part_t  *part, *subject;
+
+    last = r->substitutions->items;
+
+    part = nxt_array_add_multiple(&r->parts, &njs_array_mem_proto,
+                                  vm->mem_cache_pool, last + 1);
+    if (nxt_slow_path(part == NULL)) {
+        return NXT_ERROR;
+    }
+
+    r->part = &part[-1];
+
+    part[last].start = r->part[0].start + captures[1];
+    part[last].size = r->part[0].size - captures[1];
+    njs_set_invalid(&part[last].value);
+
+    r->part[0].size = captures[0];
+
+    s = r->substitutions->start;
+
+    for (i = 0; i < last; i++) {
+        n = s[i].type;
+
+        switch (n) {
+
+        /* Literal text, "$$", and out of range "$n" substitutions. */
+        case NJS_SUBST_COPY:
+            part->start = s[i].start;
+            part->size = s[i].size;
+            break;
+
+        /* "$`" substitution. */
+        case NJS_SUBST_PRECEDING:
+            subject = r->parts.start;
+            part->start = subject->start;
+            part->size = (r->part[0].start - subject->start) + r->part[0].size;
+            break;
+
+        /* "$'" substitution. */
+        case NJS_SUBST_FOLLOWING:
+            part->start = r->part[last + 1].start;
+            part->size = r->part[last + 1].size;
+            break;
+
+        /*
+         * "$n" substitutions.
+         * "$&" is the same as "$0", the "$0" however is not supported.
+         */
+        default:
+            part->start = r->part[0].start + captures[n];
+            part->size = captures[n + 1] - captures[n];
+            break;
+        }
+
+        njs_set_invalid(&part->value);
+        part++;
+    }
+
+    r->part = part;
+
+    return NXT_OK;
+}
+
+
+static njs_ret_t
+njs_string_replace_join(njs_vm_t *vm, njs_string_replace_t *r)
+{
+    u_char                     *p, *string;
+    size_t                     size, length, mask;
+    ssize_t                    len;
+    nxt_uint_t                 i, n;
+    njs_string_replace_part_t  *part;
+
+    size = 0;
+    length = 0;
+    mask = -1;
+
+    part = r->parts.start;
+    n = r->parts.items;
+
+    for (i = 0; i < n; i++) {
+        if (part[i].start == NULL) {
+            part[i].start = part[i].value.short_string.start;
+        }
+
+        size += part[i].size;
+
+        len = nxt_utf8_length(part[i].start, part[i].size);
+
+        if (len >= 0) {
+            length += len;
+
+        } else {
+            mask = 0;
+        }
+    }
+
+    length &= mask;
+
+    string = njs_string_alloc(vm, &vm->retval, size, length);
+    if (nxt_slow_path(string == NULL)) {
+        return NXT_ERROR;
+    }
+
+    p = string;
+
+    for (i = 0; i < n; i++) {
+        p = memcpy(p, part[i].start, part[i].size);
+        p += part[i].size;
+
+        /* GC: release valid values. */
+    }
+
+    if (length >= NJS_STRING_MAP_OFFSET && size != length) {
+        njs_string_offset_map_init(string, size);
+    }
+
+    nxt_array_destroy(&r->parts, &njs_array_mem_proto, vm->mem_cache_pool);
+
+    return NXT_OK;
+}
+
+
+static void
+njs_string_replacement_copy(njs_string_replace_part_t *string,
+    const njs_value_t *value)
+{
+    size_t  size;
+
+    string->value = *value;
+
+    size = value->short_string.size;
+
+    if (size != NJS_STRING_LONG) {
+        string->start = NULL;
+
+    } else {
+        string->start = value->data.u.string->start;
+        size = value->data.string_size;
+    }
+
+    string->size = size;
+}
+
+
 njs_ret_t
 njs_primitive_value_to_string(njs_vm_t *vm, njs_value_t *dst,
     const njs_value_t *src)
@@ -2100,6 +2825,14 @@ static const njs_object_prop_t  njs_string_prototype_properties[] =
         .value = njs_native_function(njs_string_prototype_split, 0,
                      NJS_STRING_OBJECT_ARG, NJS_REGEXP_ARG, NJS_INTEGER_ARG),
     },
+
+    {
+        .type = NJS_METHOD,
+        .name = njs_string("replace"),
+        .value = njs_native_function(njs_string_prototype_replace,
+                     njs_continuation_size(njs_string_replace_t),
+                     NJS_STRING_OBJECT_ARG, NJS_REGEXP_ARG, NJS_FUNCTION_ARG),
+    },
 };
 
 
index 30f2515c3f56367958552a76f02d3c932d82744c..83eb290e48b573595b3965cbcb82a06a8700e484 100644 (file)
@@ -7,6 +7,7 @@
 #ifndef _NJS_STRING_H_INCLUDED_
 #define _NJS_STRING_H_INCLUDED_
 
+#include <nxt_utf8.h>
 
 /*
  * nJSVM supports two string variants:
@@ -80,6 +81,34 @@ typedef struct {
 } njs_slice_prop_t;
 
 
+typedef enum {
+    NJS_STRING_BYTE = 0,
+    NJS_STRING_ASCII,
+    NJS_STRING_UTF8,
+} njs_utf8_t;
+
+
+nxt_inline uint32_t
+njs_string_length(u_char *start, size_t size, njs_utf8_t utf8)
+{
+    ssize_t  length;
+
+    switch (utf8) {
+
+    case NJS_STRING_BYTE:
+        return 0;
+
+    case NJS_STRING_ASCII:
+        return size;
+
+    default:  /* NJS_STRING_UTF8 */
+        length = nxt_utf8_length(start, size);
+
+        return (length >= 0) ? length : 0;
+    }
+}
+
+
 njs_ret_t njs_string_new(njs_vm_t *vm, njs_value_t *value, const u_char *start,
     uint32_t size, uint32_t length);
 u_char *njs_string_alloc(njs_vm_t *vm, njs_value_t *value, uint32_t size,
index f85b40a067a40e1330bcbc78656b738ed1fad371..481da3afaf4cc01d8002d902a0ce294bae92d2f8 100644 (file)
@@ -2452,6 +2452,20 @@ njs_normalize_args(njs_vm_t *vm, njs_value_t *args, uint8_t *args_types,
             trap = NJS_TRAP_NUMBER_ARG;
             goto trap;
 
+        case NJS_FUNCTION_ARG:
+
+            switch (args->type) {
+            case NJS_STRING:
+            case NJS_FUNCTION:
+                break;
+
+            default:
+                trap = NJS_TRAP_STRING_ARG;
+                goto trap;
+            }
+
+            break;
+
         case NJS_REGEXP_ARG:
 
             switch (args->type) {
index 0601e401a3d140f447a86eca360cf1687722a509..3fcbaa9aa4c26c3a36a9f32f20583551d51bca3f 100644 (file)
@@ -3176,6 +3176,96 @@ static njs_unit_test_t  njs_test[] =
                  "'123456'.search(r)"),
       nxt_string("3") },
 
+    { nxt_string("'abcdefgh'.replace()"),
+      nxt_string("abcdefgh") },
+
+    { nxt_string("'abcdefgh'.replace('d')"),
+      nxt_string("abcundefinedefgh") },
+
+    { nxt_string("'abcdefgh'.replace('d', undefined)"),
+      nxt_string("abcundefinedefgh") },
+
+    { nxt_string("'abcdefgh'.replace('d', null)"),
+      nxt_string("abcnullefgh") },
+
+    { nxt_string("'abcdefgh'.replace('d', 1)"),
+      nxt_string("abc1efgh") },
+
+    { nxt_string("'abcdefghdijklm'.replace('d', 'X')"),
+      nxt_string("abcXefghdijklm") },
+
+    { nxt_string("'абвгдежгийклм'.replace('г', 'Г')"),
+      nxt_string("абвГдежгийклм") },
+
+    { nxt_string("'abcdefghdijklm'.replace('d',"
+                 "   function(m, o, s) { return '|'+s+'|'+o+'|'+m+'|' })"),
+      nxt_string("abc|abcdefghdijklm|3|d|efghdijklm") },
+
+    { nxt_string("'abcdefgh'.replace('', 'X')"),
+      nxt_string("Xabcdefgh") },
+
+    { nxt_string("'abcdefghdijklm'.replace(/d/, 'X')"),
+      nxt_string("abcXefghdijklm") },
+
+    { nxt_string("'abcdefghdijklm'.replace(/d/,"
+                 "   function(m, o, s) { return '|'+s+'|'+o+'|'+m+'|' })"),
+      nxt_string("abc|abcdefghdijklm|3|d|efghdijklm") },
+
+    { nxt_string("'abcdefghdijklm'.replace(/(d)/,"
+                 "   function(m, p, o, s)"
+                       "{ return '|'+s+'|'+o+'|'+m+'|'+p+'|' })"),
+      nxt_string("abc|abcdefghdijklm|3|d|d|efghdijklm") },
+
+    { nxt_string("'abcdefghdijklm'.replace(/x/, 'X')"),
+      nxt_string("abcdefghdijklm") },
+
+    { nxt_string("'abcdefghdijklm'.replace(/x/,"
+                 "   function(m, o, s) { return '|'+s+'|'+o+'|'+m+'|' })"),
+      nxt_string("abcdefghdijklm") },
+
+    { nxt_string("'абвгдежгийклм'.replace(/г/, 'Г')"),
+      nxt_string("абвГдежгийклм") },
+
+    { nxt_string("'abcdefghdijklm'.replace(/d/g, 'X')"),
+      nxt_string("abcXefghXijklm") },
+
+    { nxt_string("'абвгдежгийклм'.replace(/г/g, 'Г')"),
+      nxt_string("абвГдежГийклм") },
+
+    { nxt_string("'abc12345#$*%'.replace(/([^\\d]*)(\\d*)([^\\w]*)/,"
+                 "   function(match, p1, p2, p3) {"
+                 "     return [p1, p2, p3].join('-')})"),
+      nxt_string("abc-12345-#$*%") },
+
+    { nxt_string("'ABCDEFGHDIJKLM'.replace(/[A-Z]/g,"
+                 "   function(match) { return '-' + match.toLowerCase() })"),
+      nxt_string("-a-b-c-d-e-f-g-h-d-i-j-k-l-m") },
+
+    { nxt_string("'abcdbe'.replace(/(b)/g, '$')"),
+      nxt_string("a$cd$e") },
+
+    { nxt_string("'abcdbe'.replace(/(b)/g, '$2$23')"),
+      nxt_string("a$2$23cd$2$23e") },
+
+    { nxt_string("'abcdbe'.replace(/(b)/g, '$2$23X$$Y')"),
+      nxt_string("a$2$23X$Ycd$2$23X$Ye") },
+
+    { nxt_string("'abcdbe'.replace('b', '|$`X$\\'|')"),
+      nxt_string("a|aXcdbe|cdbe") },
+
+    { nxt_string("'abcdbe'.replace(/b/, '|$`X$\\'|')"),
+      nxt_string("a|aXcdbe|cdbe") },
+
+    { nxt_string("'abcdbefbgh'.replace(/b/g, '|$`X$\\'|')"),
+      nxt_string("a|aXcdbefbgh|cd|abcdXefbgh|ef|abcdbefXgh|gh") },
+
+    { nxt_string("'abc12345#$*%'.replace(/([^\\d]*)(\\d*)([^\\w]*)/,"
+                 "                       '$1-$2-$3')"),
+      nxt_string("abc-12345-#$*%") },
+
+    { nxt_string("'$1,$2'.replace(/(\\$(\\d))/g, '$$1-$1$2')"),
+      nxt_string("$1-$11,$1-$22") },
+
     { nxt_string("'abcdefgh'.match()"),
       nxt_string("") },
 
index 00ea35ca652be8f8503f33dbbe54cc20ee27dbde..122981483aa0221904f9974146f5756b3f7bf8ff 100644 (file)
@@ -160,7 +160,7 @@ set -e
 cat << END >> $NXT_MAKEFILE_CONF
 
 NXT_CC =       ${CC}
-NXT_CFLAGS =   ${CFLAGS} ${NXT_CFLAGS}
+NXT_CFLAGS =   ${NXT_CFLAGS} ${CFLAGS}
 END
 
 
index 222cd0e3274a2677ae109a842da76b906edd256e..9f1da8afd04e9f9f97bd9c76f3cfd8c73a7ff9cd 100644 (file)
@@ -12,7 +12,7 @@
 
 
 nxt_array_t *
-nxt_array_create(nxt_uint_t items, size_t item_size,
+nxt_array_create(uint32_t items, uint32_t item_size,
     const nxt_mem_proto_t *proto, void *pool)
 {
     nxt_array_t  *array;
@@ -24,7 +24,8 @@ nxt_array_create(nxt_uint_t items, size_t item_size,
         array->items = 0;
         array->item_size = item_size;
         array->avalaible = items;
-        array->type = NXT_ARRAY_EMBEDDED;
+        array->pointer = 1;
+        array->separate = 1;
     }
 
     return array;
@@ -32,16 +33,21 @@ nxt_array_create(nxt_uint_t items, size_t item_size,
 
 
 void *
-nxt_array_init(nxt_array_t *array, nxt_uint_t items, size_t item_size,
-    const nxt_mem_proto_t *proto, void *pool)
+nxt_array_init(nxt_array_t *array, void *start, uint32_t items,
+    uint32_t item_size, const nxt_mem_proto_t *proto, void *pool)
 {
-    array->start = proto->alloc(pool, items * item_size);
-
-    if (nxt_fast_path(array->start != NULL)) {
+    array->start = start;
+    array->items = items;
+    array->item_size = item_size;
+    array->avalaible = items;
+    array->pointer = 0;
+    array->separate = 0;
+
+    if (array->start == NULL) {
+        array->separate = 1;
         array->items = 0;
-        array->item_size = item_size;
-        array->avalaible = items;
-        array->type = NXT_ARRAY_INITED;
+
+        array->start = proto->alloc(pool, items * item_size);
     }
 
     return array->start;
@@ -51,39 +57,39 @@ nxt_array_init(nxt_array_t *array, nxt_uint_t items, size_t item_size,
 void
 nxt_array_destroy(nxt_array_t *array, const nxt_mem_proto_t *proto, void *pool)
 {
-    switch (array->type) {
-
-    case NXT_ARRAY_INITED:
+    if (array->separate) {
         proto->free(pool, array->start);
 #if (NXT_DEBUG)
         array->start = NULL;
         array->items = 0;
         array->avalaible = 0;
 #endif
-        break;
-
-    case NXT_ARRAY_DESCRETE:
-        proto->free(pool, array->start);
-
-        /* Fall through. */
+    }
 
-    case NXT_ARRAY_EMBEDDED:
+    if (array->pointer) {
         proto->free(pool, array);
-        break;
     }
 }
 
 
 void *
 nxt_array_add(nxt_array_t *array, const nxt_mem_proto_t *proto, void *pool)
+{
+    return nxt_array_add_multiple(array, proto, pool, 1);
+}
+
+
+void *
+nxt_array_add_multiple(nxt_array_t *array, const nxt_mem_proto_t *proto,
+    void *pool, uint32_t items)
 {
     void      *item, *start, *old;
-    size_t    size;
     uint32_t  n;
 
     n = array->avalaible;
+    items += array->items;
 
-    if (n == array->items) {
+    if (items >= n) {
 
         if (n < 16) {
             /* Allocate new array twice as much as current. */
@@ -94,9 +100,11 @@ nxt_array_add(nxt_array_t *array, const nxt_mem_proto_t *proto, void *pool)
             n += n / 2;
         }
 
-        size = n * array->item_size;
+        if (n < items) {
+            n = items;
+        }
 
-        start = proto->alloc(pool, size);
+        start = proto->alloc(pool, n * array->item_size);
         if (nxt_slow_path(start == NULL)) {
             return NULL;
         }
@@ -105,19 +113,19 @@ nxt_array_add(nxt_array_t *array, const nxt_mem_proto_t *proto, void *pool)
         old = array->start;
         array->start = start;
 
-        memcpy(start, old, size);
+        memcpy(start, old, array->items * array->item_size);
 
-        if (array->type == NXT_ARRAY_EMBEDDED) {
-            array->type = NXT_ARRAY_DESCRETE;
+        if (array->separate == 0) {
+            array->separate = 1;
 
         } else {
             proto->free(pool, old);
         }
     }
 
-    item = (char *) array->start + array->item_size * array->items;
+    item = (char *) array->start + array->items * array->item_size;
 
-    array->items++;
+    array->items = items;
 
     return item;
 }
index 5685f110178dbc82e18362a5e541a0f6397ca96f..b1c4cdf8f5a8c86f7424e1ac1c6b0f0f340083f4 100644 (file)
@@ -8,13 +8,6 @@
 #define _NXT_ARRAY_H_INCLUDED_
 
 
-typedef enum {
-    NXT_ARRAY_INITED = 0,
-    NXT_ARRAY_DESCRETE,
-    NXT_ARRAY_EMBEDDED,
-} nxt_array_type_t;
-
-
 typedef struct {
     void              *start;
     /*
@@ -24,18 +17,23 @@ typedef struct {
     uint16_t          items;
     uint16_t          avalaible;
     uint16_t          item_size;
-    nxt_array_type_t  type:8;
+
+    uint8_t           pointer;
+    uint8_t           separate;
 } nxt_array_t;
 
 
-NXT_EXPORT nxt_array_t *nxt_array_create(nxt_uint_t items, size_t item_size,
+NXT_EXPORT nxt_array_t *nxt_array_create(uint32_t items, uint32_t item_size,
     const nxt_mem_proto_t *proto, void *pool);
-NXT_EXPORT void *nxt_array_init(nxt_array_t *array, nxt_uint_t items,
-    size_t item_size, const nxt_mem_proto_t *proto, void *pool);
+NXT_EXPORT void *nxt_array_init(nxt_array_t *array, void *start,
+    uint32_t items, uint32_t item_size, const nxt_mem_proto_t *proto,
+    void *pool);
 NXT_EXPORT void nxt_array_destroy(nxt_array_t *array,
     const nxt_mem_proto_t *proto, void *pool);
 NXT_EXPORT void *nxt_array_add(nxt_array_t *array, const nxt_mem_proto_t *proto,
     void *pool);
+NXT_EXPORT void *nxt_array_add_multiple(nxt_array_t *array,
+    const nxt_mem_proto_t *proto, void *pool, uint32_t items);
 NXT_EXPORT void *nxt_array_zero_add(nxt_array_t *array,
     const nxt_mem_proto_t *proto, void *pool);
 NXT_EXPORT void nxt_array_remove(nxt_array_t *array, void *item);
index 45eef09a3fa3e9d6caf18af2b4e7ec1c2d75c451..9f13f993b47771cc340d0f71b21d1c08793c0105 100644 (file)
@@ -129,6 +129,20 @@ done:
 }
 
 
+nxt_bool_t
+nxt_regex_is_valid(nxt_regex_t *regex)
+{
+    return (regex->code != NULL);
+}
+
+
+nxt_uint_t
+nxt_regex_ncaptures(nxt_regex_t *regex)
+{
+    return regex->ncaptures;
+}
+
+
 nxt_regex_match_data_t *
 nxt_regex_match_data(nxt_regex_t *regex, nxt_regex_context_t *ctx)
 {
@@ -193,13 +207,6 @@ nxt_pcre_default_free(void *p, void *memory_data)
 }
 
 
-nxt_bool_t
-nxt_regex_is_valid(nxt_regex_t *regex)
-{
-    return (regex->code != NULL);
-}
-
-
 nxt_int_t
 nxt_regex_match(nxt_regex_t *regex, u_char *subject, size_t len,
     nxt_regex_match_data_t *match_data, nxt_regex_context_t *ctx)
@@ -222,5 +229,5 @@ nxt_regex_match(nxt_regex_t *regex, u_char *subject, size_t len,
 int *
 nxt_regex_captures(nxt_regex_match_data_t *match_data)
 {
-     return match_data->captures;
+    return match_data->captures;
 }
index ba9fbcfb4b8907824d741f4d1f2c4636bd36682a..12235d210fc3787f1c394384228baf51a6ecbde4 100644 (file)
@@ -11,7 +11,7 @@
 #include <pcre.h>
 
 
-#define NGX_REGEX_NOMATCH  PCRE_ERROR_NOMATCH
+#define NXT_REGEX_NOMATCH  PCRE_ERROR_NOMATCH
 
 
 struct nxt_regex_s {
@@ -25,6 +25,8 @@ struct nxt_regex_match_data_s {
     int         ncaptures;
     /*
      * Each capture is stored in 3 "int" vector elements.
+     * The N capture positions are stored in [n * 2] and [n * 2 + 1] elements.
+     * The 3rd bookkeeping elements are at the end of the vector.
      * The first vector is for the "$0" capture and it is always allocated.
      */
     int         captures[3];
index 674f1e22d1a41cbc7fcc43477deed1f0b877c89d..4178a03e8dae55b6e1518a5c8a024561112b03d3 100644 (file)
@@ -30,6 +30,7 @@ NXT_EXPORT nxt_regex_context_t *
 NXT_EXPORT nxt_int_t nxt_regex_compile(nxt_regex_t *regex, u_char *source,
     size_t len, nxt_uint_t options, nxt_regex_context_t *ctx);
 NXT_EXPORT nxt_bool_t nxt_regex_is_valid(nxt_regex_t *regex);
+NXT_EXPORT nxt_uint_t nxt_regex_ncaptures(nxt_regex_t *regex);
 NXT_EXPORT nxt_regex_match_data_t *nxt_regex_match_data(nxt_regex_t *regex,
     nxt_regex_context_t *ctx);
 NXT_EXPORT void nxt_regex_match_data_free(nxt_regex_match_data_t *match_data,