From ad652679ea3e2972ffcd355bac199502c00e5d5d Mon Sep 17 00:00:00 2001 From: Dmitry Volyntsev Date: Thu, 11 Nov 2021 14:26:30 +0000 Subject: [PATCH] RegExp: incapsulating PCRE API. --- src/njs_main.h | 1 - src/njs_parser.c | 2 +- src/njs_pcre.c | 214 ++++++++++++++++++++++++++------ src/njs_pcre.h | 40 ------ src/njs_regex.h | 65 ++++++++-- src/njs_regexp.c | 262 +++++++++++---------------------------- src/njs_regexp.h | 16 +-- src/njs_string.c | 25 ++-- src/njs_vm.h | 3 +- src/test/njs_unit_test.c | 2 + 10 files changed, 323 insertions(+), 307 deletions(-) delete mode 100644 src/njs_pcre.h diff --git a/src/njs_main.h b/src/njs_main.h index 4e8722ff..1f505304 100644 --- a/src/njs_main.h +++ b/src/njs_main.h @@ -38,7 +38,6 @@ #include #include -#include #include #include diff --git a/src/njs_parser.c b/src/njs_parser.c index cc366320..4f2aa307 100644 --- a/src/njs_parser.c +++ b/src/njs_parser.c @@ -1198,7 +1198,7 @@ njs_parser_regexp_literal(njs_parser_t *parser, njs_lexer_token_t *token, njs_int_t ret; njs_lexer_t *lexer; njs_value_t *value, retval; - njs_regexp_flags_t flags; + njs_regex_flags_t flags; njs_regexp_pattern_t *pattern; static const njs_value_t string_message = njs_string("message"); diff --git a/src/njs_pcre.c b/src/njs_pcre.c index d28c2069..c9e6e9ec 100644 --- a/src/njs_pcre.c +++ b/src/njs_pcre.c @@ -7,21 +7,23 @@ #include +#include + static void *njs_pcre_malloc(size_t size); static void njs_pcre_free(void *p); -static njs_regex_context_t *regex_context; +static njs_regex_generic_ctx_t *regex_context; -njs_regex_context_t * -njs_regex_context_create(njs_pcre_malloc_t private_malloc, +njs_regex_generic_ctx_t * +njs_regex_generic_ctx_create(njs_pcre_malloc_t private_malloc, njs_pcre_free_t private_free, void *memory_data) { - njs_regex_context_t *ctx; + njs_regex_generic_ctx_t *ctx; - ctx = private_malloc(sizeof(njs_regex_context_t), memory_data); + ctx = private_malloc(sizeof(njs_regex_generic_ctx_t), memory_data); if (njs_fast_path(ctx != NULL)) { ctx->private_malloc = private_malloc; @@ -33,15 +35,138 @@ njs_regex_context_create(njs_pcre_malloc_t private_malloc, } +njs_regex_compile_ctx_t * +njs_regex_compile_ctx_create(njs_regex_generic_ctx_t *ctx) +{ + return ctx; +} + + +/* + * 1) PCRE with PCRE_JAVASCRIPT_COMPAT flag rejects regexps with + * lone closing square brackets as invalid. Whereas according + * to ES6: 11.8.5 it is a valid regexp expression. + * + * 2) escaping zero byte characters as "\u0000". + * + * Escaping it here as a workaround. + */ + +njs_int_t +njs_regex_escape(njs_mp_t *mp, njs_str_t *text) +{ + size_t brackets, zeros; + u_char *p, *dst, *start, *end; + njs_bool_t in; + + start = text->start; + end = text->start + text->length; + + in = 0; + zeros = 0; + brackets = 0; + + for (p = start; p < end; p++) { + + switch (*p) { + case '[': + in = 1; + break; + + case ']': + if (!in) { + brackets++; + } + + in = 0; + break; + + case '\\': + p++; + + if (p == end || *p != '\0') { + break; + } + + /* Fall through. */ + + case '\0': + zeros++; + break; + } + } + + if (!brackets && !zeros) { + return NJS_OK; + } + + text->length = text->length + brackets + zeros * njs_length("\\u0000"); + + text->start = njs_mp_alloc(mp, text->length); + if (njs_slow_path(text->start == NULL)) { + return NJS_ERROR; + } + + in = 0; + dst = text->start; + + for (p = start; p < end; p++) { + + switch (*p) { + case '[': + in = 1; + break; + + case ']': + if (!in) { + *dst++ = '\\'; + } + + in = 0; + break; + + case '\\': + *dst++ = *p++; + + if (p == end) { + goto done; + } + + if (*p != '\0') { + break; + } + + /* Fall through. */ + + case '\0': + dst = njs_cpymem(dst, "\\u0000", 6); + continue; + } + + *dst++ = *p; + } + +done: + + text->length = dst - text->start; + + return NJS_OK; +} + + njs_int_t njs_regex_compile(njs_regex_t *regex, u_char *source, size_t len, - njs_uint_t options, njs_regex_context_t *ctx) + njs_regex_flags_t flags, njs_regex_compile_ctx_t *cctx, njs_trace_t *trace) { - int ret, err, erroff; - char *pattern, *error; - void *(*saved_malloc)(size_t size); - void (*saved_free)(void *p); - const char *errstr; + int ret, err, erroff; + char *pattern, *error; + void *(*saved_malloc)(size_t size); + void (*saved_free)(void *p); + njs_uint_t options; + const char *errstr; + njs_regex_generic_ctx_t *ctx; + + ctx = cctx; ret = NJS_ERROR; @@ -51,31 +176,43 @@ njs_regex_compile(njs_regex_t *regex, u_char *source, size_t len, pcre_free = njs_pcre_free; regex_context = ctx; - if (len == 0) { - pattern = (char *) source; +#ifdef PCRE_JAVASCRIPT_COMPAT + /* JavaScript compatibility has been introduced in PCRE-7.7. */ + options = PCRE_JAVASCRIPT_COMPAT; +#else + options = 0; +#endif - } else { - pattern = ctx->private_malloc(len + 1, ctx->memory_data); - if (njs_slow_path(pattern == NULL)) { - goto done; - } + if ((flags & NJS_REGEX_IGNORE_CASE)) { + options |= PCRE_CASELESS; + } + + if ((flags & NJS_REGEX_MULTILINE)) { + options |= PCRE_MULTILINE; + } + + if ((flags & NJS_REGEX_STICKY)) { + options |= PCRE_ANCHORED; + } - memcpy(pattern, source, len); - pattern[len] = '\0'; + if ((flags & NJS_REGEX_UTF8)) { + options |= PCRE_UTF8; } + pattern = (char *) source; + regex->code = pcre_compile(pattern, options, &errstr, &erroff, NULL); if (njs_slow_path(regex->code == NULL)) { error = pattern + erroff; if (*error != '\0') { - njs_alert(ctx->trace, NJS_LEVEL_ERROR, + njs_alert(trace, NJS_LEVEL_ERROR, "pcre_compile(\"%s\") failed: %s at \"%s\"", pattern, errstr, error); } else { - njs_alert(ctx->trace, NJS_LEVEL_ERROR, + njs_alert(trace, NJS_LEVEL_ERROR, "pcre_compile(\"%s\") failed: %s", pattern, errstr); } @@ -87,7 +224,7 @@ njs_regex_compile(njs_regex_t *regex, u_char *source, size_t len, regex->extra = pcre_study(regex->code, 0, &errstr); if (njs_slow_path(errstr != NULL)) { - njs_alert(ctx->trace, NJS_LEVEL_WARN, + njs_alert(trace, NJS_LEVEL_WARN, "pcre_study(\"%s\") failed: %s", pattern, errstr); } @@ -95,7 +232,7 @@ njs_regex_compile(njs_regex_t *regex, u_char *source, size_t len, ®ex->ncaptures); if (njs_slow_path(err < 0)) { - njs_alert(ctx->trace, NJS_LEVEL_ERROR, + njs_alert(trace, NJS_LEVEL_ERROR, "pcre_fullinfo(\"%s\", PCRE_INFO_CAPTURECOUNT) failed: %d", pattern, err); @@ -106,7 +243,7 @@ njs_regex_compile(njs_regex_t *regex, u_char *source, size_t len, ®ex->backrefmax); if (njs_slow_path(err < 0)) { - njs_alert(ctx->trace, NJS_LEVEL_ERROR, + njs_alert(trace, NJS_LEVEL_ERROR, "pcre_fullinfo(\"%s\", PCRE_INFO_BACKREFMAX) failed: %d", pattern, err); @@ -121,7 +258,7 @@ njs_regex_compile(njs_regex_t *regex, u_char *source, size_t len, ®ex->nentries); if (njs_slow_path(err < 0)) { - njs_alert(ctx->trace, NJS_LEVEL_ERROR, + njs_alert(trace, NJS_LEVEL_ERROR, "pcre_fullinfo(\"%s\", PCRE_INFO_NAMECOUNT) failed: %d", pattern, err); @@ -133,7 +270,7 @@ njs_regex_compile(njs_regex_t *regex, u_char *source, size_t len, ®ex->entry_size); if (njs_slow_path(err < 0)) { - njs_alert(ctx->trace, NJS_LEVEL_ERROR, "pcre_fullinfo(\"%s\", " + njs_alert(trace, NJS_LEVEL_ERROR, "pcre_fullinfo(\"%s\", " "PCRE_INFO_NAMEENTRYSIZE) failed: %d", pattern, err); goto done; @@ -143,7 +280,7 @@ njs_regex_compile(njs_regex_t *regex, u_char *source, size_t len, ®ex->entries); if (njs_slow_path(err < 0)) { - njs_alert(ctx->trace, NJS_LEVEL_ERROR, "pcre_fullinfo(\"%s\", " + njs_alert(trace, NJS_LEVEL_ERROR, "pcre_fullinfo(\"%s\", " "PCRE_INFO_NAMETABLE) failed: %d", pattern, err); goto done; @@ -193,7 +330,7 @@ njs_regex_named_captures(njs_regex_t *regex, njs_str_t *name, int n) njs_regex_match_data_t * -njs_regex_match_data(njs_regex_t *regex, njs_regex_context_t *ctx) +njs_regex_match_data(njs_regex_t *regex, njs_regex_generic_ctx_t *ctx) { size_t size; njs_uint_t ncaptures; @@ -222,7 +359,7 @@ njs_regex_match_data(njs_regex_t *regex, njs_regex_context_t *ctx) void njs_regex_match_data_free(njs_regex_match_data_t *match_data, - njs_regex_context_t *ctx) + njs_regex_generic_ctx_t *ctx) { ctx->private_free(match_data, ctx->memory_data); } @@ -244,25 +381,28 @@ njs_pcre_free(void *p) njs_int_t njs_regex_match(njs_regex_t *regex, const u_char *subject, size_t off, - size_t len, njs_regex_match_data_t *match_data, njs_regex_context_t *ctx) + size_t len, njs_regex_match_data_t *match_data, njs_trace_t *trace) { int ret; ret = pcre_exec(regex->code, regex->extra, (const char *) subject, len, off, 0, match_data->captures, match_data->ncaptures); - /* PCRE_ERROR_NOMATCH is -1. */ + if (ret <= PCRE_ERROR_NOMATCH) { + if (ret == PCRE_ERROR_NOMATCH) { + return NJS_DECLINED; + } - if (njs_slow_path(ret < PCRE_ERROR_NOMATCH)) { - njs_alert(ctx->trace, NJS_LEVEL_ERROR, "pcre_exec() failed: %d", ret); + njs_alert(trace, NJS_LEVEL_ERROR, "pcre_exec() failed: %d", ret); + return NJS_ERROR; } return ret; } -int * -njs_regex_captures(njs_regex_match_data_t *match_data) +size_t +njs_regex_capture(njs_regex_match_data_t *match_data, njs_uint_t n) { - return match_data->captures; + return match_data->captures[n]; } diff --git a/src/njs_pcre.h b/src/njs_pcre.h deleted file mode 100644 index 0f65ae2f..00000000 --- a/src/njs_pcre.h +++ /dev/null @@ -1,40 +0,0 @@ - -/* - * Copyright (C) Igor Sysoev - * Copyright (C) NGINX, Inc. - */ - -#ifndef _NJS_PCRE_H_INCLUDED_ -#define _NJS_PCRE_H_INCLUDED_ - - -#include - - -#define NJS_REGEX_NOMATCH PCRE_ERROR_NOMATCH - - -struct njs_regex_s { - pcre *code; - pcre_extra *extra; - int ncaptures; - int backrefmax; - int nentries; - int entry_size; - char *entries; -}; - - -struct njs_regex_match_data_s { - int ncaptures; - /* - * Each capture is stored in 3 "int" vector elements. - * The N capture positions are stored in [n * 2] and [n * 2 + 1] elements. - * The 3rd bookkeeping elements are at the end of the vector. - * The first vector is for the "$0" capture and it is always allocated. - */ - int captures[3]; -}; - - -#endif /* _NJS_PCRE_H_INCLUDED_ */ diff --git a/src/njs_regex.h b/src/njs_regex.h index eabfb066..ee08fe1f 100644 --- a/src/njs_regex.h +++ b/src/njs_regex.h @@ -7,39 +7,78 @@ #ifndef _NJS_REGEX_H_INCLUDED_ #define _NJS_REGEX_H_INCLUDED_ +#define NJS_REGEX_UNSET (size_t) (-1) -typedef void *(*njs_pcre_malloc_t)(size_t size, void *memory_data); -typedef void (*njs_pcre_free_t)(void *p, void *memory_data); +typedef enum { + NJS_REGEX_INVALID_FLAG = -1, + NJS_REGEX_NO_FLAGS = 0, + NJS_REGEX_GLOBAL = 1, + NJS_REGEX_IGNORE_CASE = 2, + NJS_REGEX_MULTILINE = 4, + NJS_REGEX_STICKY = 8, + NJS_REGEX_UTF8 = 16, +} njs_regex_flags_t; -typedef struct njs_regex_s njs_regex_t; -typedef struct njs_regex_match_data_s njs_regex_match_data_t; + +typedef void *(*njs_pcre_malloc_t)(size_t size, void *memory_data); +typedef void (*njs_pcre_free_t)(void *p, void *memory_data); typedef struct { njs_pcre_malloc_t private_malloc; njs_pcre_free_t private_free; void *memory_data; - njs_trace_t *trace; -} njs_regex_context_t; +} njs_regex_generic_ctx_t; + + +#define njs_regex_compile_ctx_t void + + +typedef struct { + void *code; + void *extra; + int ncaptures; + int backrefmax; + int nentries; + int entry_size; + char *entries; +} njs_regex_t; + + +typedef struct { + int ncaptures; + /* + * Each capture is stored in 3 "int" vector elements. + * The N capture positions are stored in [n * 2] and [n * 2 + 1] elements. + * The 3rd bookkeeping elements are at the end of the vector. + * The first vector is for the "$0" capture and it is always allocated. + */ + int captures[3]; +} njs_regex_match_data_t; -NJS_EXPORT njs_regex_context_t * - njs_regex_context_create(njs_pcre_malloc_t private_malloc, +NJS_EXPORT njs_regex_generic_ctx_t * + njs_regex_generic_ctx_create(njs_pcre_malloc_t private_malloc, njs_pcre_free_t private_free, void *memory_data); +NJS_EXPORT njs_regex_compile_ctx_t *njs_regex_compile_ctx_create( + njs_regex_generic_ctx_t *ctx); +NJS_EXPORT njs_int_t njs_regex_escape(njs_mp_t *mp, njs_str_t *text); NJS_EXPORT njs_int_t njs_regex_compile(njs_regex_t *regex, u_char *source, - size_t len, njs_uint_t options, njs_regex_context_t *ctx); + size_t len, njs_regex_flags_t flags, njs_regex_compile_ctx_t *ctx, + njs_trace_t *trace); NJS_EXPORT njs_bool_t njs_regex_is_valid(njs_regex_t *regex); NJS_EXPORT njs_int_t njs_regex_named_captures(njs_regex_t *regex, njs_str_t *name, int n); NJS_EXPORT njs_regex_match_data_t *njs_regex_match_data(njs_regex_t *regex, - njs_regex_context_t *ctx); + njs_regex_generic_ctx_t *ctx); NJS_EXPORT void njs_regex_match_data_free(njs_regex_match_data_t *match_data, - njs_regex_context_t *ctx); + njs_regex_generic_ctx_t *ctx); NJS_EXPORT njs_int_t njs_regex_match(njs_regex_t *regex, const u_char *subject, size_t off, size_t len, njs_regex_match_data_t *match_data, - njs_regex_context_t *ctx); -NJS_EXPORT int *njs_regex_captures(njs_regex_match_data_t *match_data); + njs_trace_t *trace); +NJS_EXPORT size_t njs_regex_capture(njs_regex_match_data_t *match_data, + njs_uint_t n); #endif /* _NJS_REGEX_H_INCLUDED_ */ diff --git a/src/njs_regexp.c b/src/njs_regexp.c index d0554595..84c9bea2 100644 --- a/src/njs_regexp.c +++ b/src/njs_regexp.c @@ -20,7 +20,7 @@ static void njs_regexp_free(void *p, void *memory_data); static njs_int_t njs_regexp_prototype_source(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, njs_index_t unused); static int njs_regexp_pattern_compile(njs_vm_t *vm, njs_regex_t *regex, - u_char *source, int options); + u_char *source, size_t len, njs_regex_flags_t flags); static u_char *njs_regexp_compile_trace_handler(njs_trace_t *trace, njs_trace_data_t *td, u_char *start); static u_char *njs_regexp_match_trace_handler(njs_trace_t *trace, @@ -37,20 +37,25 @@ const njs_value_t njs_string_lindex = njs_string("lastIndex"); njs_int_t njs_regexp_init(njs_vm_t *vm) { - vm->regex_context = njs_regex_context_create(njs_regexp_malloc, - njs_regexp_free, vm->mem_pool); - if (njs_slow_path(vm->regex_context == NULL)) { + vm->regex_generic_ctx = njs_regex_generic_ctx_create(njs_regexp_malloc, + njs_regexp_free, + vm->mem_pool); + if (njs_slow_path(vm->regex_generic_ctx == NULL)) { njs_memory_error(vm); return NJS_ERROR; } - vm->single_match_data = njs_regex_match_data(NULL, vm->regex_context); - if (njs_slow_path(vm->single_match_data == NULL)) { + vm->regex_compile_ctx = njs_regex_compile_ctx_create(vm->regex_generic_ctx); + if (njs_slow_path(vm->regex_compile_ctx == NULL)) { njs_memory_error(vm); return NJS_ERROR; } - vm->regex_context->trace = &vm->trace; + vm->single_match_data = njs_regex_match_data(NULL, vm->regex_generic_ctx); + if (njs_slow_path(vm->single_match_data == NULL)) { + njs_memory_error(vm); + return NJS_ERROR; + } return NJS_OK; } @@ -70,10 +75,10 @@ njs_regexp_free(void *p, void *memory_data) } -static njs_regexp_flags_t +static njs_regex_flags_t njs_regexp_value_flags(njs_vm_t *vm, const njs_value_t *regexp) { - njs_regexp_flags_t flags; + njs_regex_flags_t flags; njs_regexp_pattern_t *pattern; flags = 0; @@ -81,19 +86,19 @@ njs_regexp_value_flags(njs_vm_t *vm, const njs_value_t *regexp) pattern = njs_regexp_pattern(regexp); if (pattern->global) { - flags |= NJS_REGEXP_GLOBAL; + flags |= NJS_REGEX_GLOBAL; } if (pattern->ignore_case) { - flags |= NJS_REGEXP_IGNORE_CASE; + flags |= NJS_REGEX_IGNORE_CASE; } if (pattern->multiline) { - flags |= NJS_REGEXP_MULTILINE; + flags |= NJS_REGEX_MULTILINE; } if (pattern->sticky) { - flags |= NJS_REGEXP_STICKY; + flags |= NJS_REGEX_STICKY; } return flags; @@ -108,7 +113,7 @@ njs_regexp_constructor(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, njs_int_t ret; njs_str_t string; njs_value_t source, *pattern, *flags; - njs_regexp_flags_t re_flags; + njs_regex_flags_t re_flags; pattern = njs_arg(args, nargs, 1); @@ -168,7 +173,7 @@ njs_regexp_constructor(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, njs_int_t njs_regexp_create(njs_vm_t *vm, njs_value_t *value, u_char *start, - size_t length, njs_regexp_flags_t flags) + size_t length, njs_regex_flags_t flags) { njs_regexp_t *regexp; njs_regexp_pattern_t *pattern; @@ -200,143 +205,30 @@ njs_regexp_create(njs_vm_t *vm, njs_value_t *value, u_char *start, } -/* - * 1) PCRE with PCRE_JAVASCRIPT_COMPAT flag rejects regexps with - * lone closing square brackets as invalid. Whereas according - * to ES6: 11.8.5 it is a valid regexp expression. - * - * 2) escaping zero byte characters as "\u0000". - * - * Escaping it here as a workaround. - */ - -njs_inline njs_int_t -njs_regexp_escape(njs_vm_t *vm, njs_str_t *text) -{ - size_t brackets, zeros; - u_char *p, *dst, *start, *end; - njs_bool_t in; - - start = text->start; - end = text->start + text->length; - - in = 0; - zeros = 0; - brackets = 0; - - for (p = start; p < end; p++) { - - switch (*p) { - case '[': - in = 1; - break; - - case ']': - if (!in) { - brackets++; - } - - in = 0; - break; - - case '\\': - p++; - - if (p == end || *p != '\0') { - break; - } - - /* Fall through. */ - - case '\0': - zeros++; - break; - } - } - - if (!brackets && !zeros) { - return NJS_OK; - } - - text->length = text->length + brackets + zeros * njs_length("\\u0000"); - - text->start = njs_mp_alloc(vm->mem_pool, text->length); - if (njs_slow_path(text->start == NULL)) { - njs_memory_error(vm); - return NJS_ERROR; - } - - in = 0; - dst = text->start; - - for (p = start; p < end; p++) { - - switch (*p) { - case '[': - in = 1; - break; - - case ']': - if (!in) { - *dst++ = '\\'; - } - - in = 0; - break; - - case '\\': - *dst++ = *p++; - - if (p == end) { - goto done; - } - - if (*p != '\0') { - break; - } - - /* Fall through. */ - - case '\0': - dst = njs_cpymem(dst, "\\u0000", 6); - continue; - } - - *dst++ = *p; - } - -done: - - text->length = dst - text->start; - - return NJS_OK; -} - - -njs_regexp_flags_t +njs_regex_flags_t njs_regexp_flags(u_char **start, u_char *end) { - u_char *p; - njs_regexp_flags_t flags, flag; + u_char *p; + njs_regex_flags_t flags, flag; - flags = NJS_REGEXP_NO_FLAGS; + flags = NJS_REGEX_NO_FLAGS; for (p = *start; p < end; p++) { switch (*p) { case 'g': - flag = NJS_REGEXP_GLOBAL; + flag = NJS_REGEX_GLOBAL; break; case 'i': - flag = NJS_REGEXP_IGNORE_CASE; + flag = NJS_REGEX_IGNORE_CASE; break; case 'm': - flag = NJS_REGEXP_MULTILINE; + flag = NJS_REGEX_MULTILINE; break; case 'y': - flag = NJS_REGEXP_STICKY; + flag = NJS_REGEX_STICKY; break; default: @@ -364,15 +256,15 @@ invalid: *start = p + 1; - return NJS_REGEXP_INVALID_FLAG; + return NJS_REGEX_INVALID_FLAG; } njs_regexp_pattern_t * njs_regexp_pattern_create(njs_vm_t *vm, u_char *start, size_t length, - njs_regexp_flags_t flags) + njs_regex_flags_t flags) { - int options, ret; + int ret; u_char *p, *end; size_t size; njs_str_t text; @@ -382,15 +274,16 @@ njs_regexp_pattern_create(njs_vm_t *vm, u_char *start, size_t length, njs_regexp_pattern_t *pattern; size = 1; /* A trailing "/". */ - size += ((flags & NJS_REGEXP_GLOBAL) != 0); - size += ((flags & NJS_REGEXP_IGNORE_CASE) != 0); - size += ((flags & NJS_REGEXP_MULTILINE) != 0); + size += ((flags & NJS_REGEX_GLOBAL) != 0); + size += ((flags & NJS_REGEX_IGNORE_CASE) != 0); + size += ((flags & NJS_REGEX_MULTILINE) != 0); text.start = start; text.length = length; - ret = njs_regexp_escape(vm, &text); + ret = njs_regex_escape(vm->mem_pool, &text); if (njs_slow_path(ret != NJS_OK)) { + njs_memory_error(vm); return NULL; } @@ -412,39 +305,27 @@ njs_regexp_pattern_create(njs_vm_t *vm, u_char *start, size_t length, end = p; *p++ = '\0'; - pattern->global = ((flags & NJS_REGEXP_GLOBAL) != 0); + pattern->global = ((flags & NJS_REGEX_GLOBAL) != 0); if (pattern->global) { *p++ = 'g'; } -#ifdef PCRE_JAVASCRIPT_COMPAT - /* JavaScript compatibility has been introduced in PCRE-7.7. */ - options = PCRE_JAVASCRIPT_COMPAT; -#else - options = 0; -#endif - - pattern->ignore_case = ((flags & NJS_REGEXP_IGNORE_CASE) != 0); + pattern->ignore_case = ((flags & NJS_REGEX_IGNORE_CASE) != 0); if (pattern->ignore_case) { *p++ = 'i'; - options |= PCRE_CASELESS; } - pattern->multiline = ((flags & NJS_REGEXP_MULTILINE) != 0); + pattern->multiline = ((flags & NJS_REGEX_MULTILINE) != 0); if (pattern->multiline) { *p++ = 'm'; - options |= PCRE_MULTILINE; } - pattern->sticky = ((flags & NJS_REGEXP_STICKY) != 0); - if (pattern->sticky) { - options |= PCRE_ANCHORED; - } + pattern->sticky = ((flags & NJS_REGEX_STICKY) != 0); *p++ = '\0'; ret = njs_regexp_pattern_compile(vm, &pattern->regex[0], - &pattern->source[1], options); + &pattern->source[1], text.length, flags); if (njs_fast_path(ret >= 0)) { pattern->ncaptures = ret; @@ -454,7 +335,8 @@ njs_regexp_pattern_create(njs_vm_t *vm, u_char *start, size_t length, } ret = njs_regexp_pattern_compile(vm, &pattern->regex[1], - &pattern->source[1], options | PCRE_UTF8); + &pattern->source[1], text.length, + flags | NJS_REGEX_UTF8); if (njs_fast_path(ret >= 0)) { if (njs_slow_path(njs_regex_is_valid(&pattern->regex[0]) @@ -519,7 +401,7 @@ fail: static int njs_regexp_pattern_compile(njs_vm_t *vm, njs_regex_t *regex, u_char *source, - int options) + size_t len, njs_regex_flags_t flags) { njs_int_t ret; njs_trace_handler_t handler; @@ -527,8 +409,8 @@ njs_regexp_pattern_compile(njs_vm_t *vm, njs_regex_t *regex, u_char *source, handler = vm->trace.handler; vm->trace.handler = njs_regexp_compile_trace_handler; - /* Zero length means a zero-terminated string. */ - ret = njs_regex_compile(regex, source, 0, options, vm->regex_context); + ret = njs_regex_compile(regex, source, len, flags, vm->regex_compile_ctx, + &vm->trace); vm->trace.handler = handler; @@ -568,8 +450,7 @@ njs_regexp_match(njs_vm_t *vm, njs_regex_t *regex, const u_char *subject, handler = vm->trace.handler; vm->trace.handler = njs_regexp_match_trace_handler; - ret = njs_regex_match(regex, subject, off, len, match_data, - vm->regex_context); + ret = njs_regex_match(regex, subject, off, len, match_data, &vm->trace); vm->trace.handler = handler; @@ -742,19 +623,19 @@ njs_regexp_prototype_flag(njs_vm_t *vm, njs_value_t *args, pattern = njs_regexp_pattern(this); switch (flag) { - case NJS_REGEXP_GLOBAL: + case NJS_REGEX_GLOBAL: yn = pattern->global; break; - case NJS_REGEXP_IGNORE_CASE: + case NJS_REGEX_IGNORE_CASE: yn = pattern->ignore_case; break; - case NJS_REGEXP_MULTILINE: + case NJS_REGEX_MULTILINE: yn = pattern->multiline; break; - case NJS_REGEXP_STICKY: + case NJS_REGEX_STICKY: default: yn = pattern->sticky; break; @@ -996,7 +877,8 @@ njs_regexp_builtin_exec(njs_vm_t *vm, njs_value_t *r, njs_value_t *s, goto not_found; } - match_data = njs_regex_match_data(&pattern->regex[type], vm->regex_context); + match_data = njs_regex_match_data(&pattern->regex[type], + vm->regex_generic_ctx); if (njs_slow_path(match_data == NULL)) { njs_memory_error(vm); return NJS_ERROR; @@ -1023,9 +905,8 @@ njs_regexp_builtin_exec(njs_vm_t *vm, njs_value_t *r, njs_value_t *s, return NJS_OK; } - if (njs_slow_path(ret != NJS_REGEX_NOMATCH)) { - njs_regex_match_data_free(match_data, vm->regex_context); - + if (njs_slow_path(ret == NJS_ERROR)) { + njs_regex_match_data_free(match_data, vm->regex_generic_ctx); return NJS_ERROR; } @@ -1050,8 +931,8 @@ static njs_array_t * njs_regexp_exec_result(njs_vm_t *vm, njs_value_t *r, njs_utf8_t utf8, njs_string_prop_t *string, njs_regex_match_data_t *match_data) { - int *captures; u_char *start; + size_t c; int32_t size, length; uint32_t index; njs_int_t ret; @@ -1076,14 +957,13 @@ njs_regexp_exec_result(njs_vm_t *vm, njs_value_t *r, njs_utf8_t utf8, goto fail; } - captures = njs_regex_captures(match_data); - for (i = 0; i < pattern->ncaptures; i++) { n = 2 * i; + c = njs_regex_capture(match_data, n); - if (captures[n] != -1) { - start = &string->start[captures[n]]; - size = captures[n + 1] - captures[n]; + if (c != NJS_REGEX_UNSET) { + start = &string->start[c]; + size = njs_regex_capture(match_data, n + 1) - c; if (utf8 == NJS_STRING_UTF8) { length = njs_max(njs_utf8_length(start, size), 0); @@ -1109,21 +989,25 @@ njs_regexp_exec_result(njs_vm_t *vm, njs_value_t *r, njs_utf8_t utf8, goto fail; } + c = njs_regex_capture(match_data, 0); + if (utf8 == NJS_STRING_UTF8) { - index = njs_string_index(string, captures[0]); + index = njs_string_index(string, c); } else { - index = captures[0]; + index = c; } njs_set_number(&prop->value, index); if (pattern->global || pattern->sticky) { + c = njs_regex_capture(match_data, 1); + if (utf8 == NJS_STRING_UTF8) { - index = njs_string_index(string, captures[1]); + index = njs_string_index(string, c); } else { - index = captures[1]; + index = c; } njs_set_number(&value, index); @@ -1226,7 +1110,7 @@ fail: done: - njs_regex_match_data_free(match_data, vm->regex_context); + njs_regex_match_data_free(match_data, vm->regex_generic_ctx); return (ret == NJS_OK) ? array : NULL; } @@ -1919,7 +1803,7 @@ static const njs_object_prop_t njs_regexp_prototype_properties[] = .name = njs_string("global"), .value = njs_value(NJS_INVALID, 1, NAN), .getter = njs_native_function2(njs_regexp_prototype_flag, 0, - NJS_REGEXP_GLOBAL), + NJS_REGEX_GLOBAL), .setter = njs_value(NJS_UNDEFINED, 0, NAN), .writable = NJS_ATTRIBUTE_UNSET, .configurable = 1, @@ -1931,7 +1815,7 @@ static const njs_object_prop_t njs_regexp_prototype_properties[] = .name = njs_string("ignoreCase"), .value = njs_value(NJS_INVALID, 1, NAN), .getter = njs_native_function2(njs_regexp_prototype_flag, 0, - NJS_REGEXP_IGNORE_CASE), + NJS_REGEX_IGNORE_CASE), .setter = njs_value(NJS_UNDEFINED, 0, NAN), .writable = NJS_ATTRIBUTE_UNSET, .configurable = 1, @@ -1943,7 +1827,7 @@ static const njs_object_prop_t njs_regexp_prototype_properties[] = .name = njs_string("multiline"), .value = njs_value(NJS_INVALID, 1, NAN), .getter = njs_native_function2(njs_regexp_prototype_flag, 0, - NJS_REGEXP_MULTILINE), + NJS_REGEX_MULTILINE), .setter = njs_value(NJS_UNDEFINED, 0, NAN), .writable = NJS_ATTRIBUTE_UNSET, .configurable = 1, @@ -1966,7 +1850,7 @@ static const njs_object_prop_t njs_regexp_prototype_properties[] = .name = njs_string("sticky"), .value = njs_value(NJS_INVALID, 1, NAN), .getter = njs_native_function2(njs_regexp_prototype_flag, 0, - NJS_REGEXP_STICKY), + NJS_REGEX_STICKY), .setter = njs_value(NJS_UNDEFINED, 0, NAN), .writable = NJS_ATTRIBUTE_UNSET, .configurable = 1, diff --git a/src/njs_regexp.h b/src/njs_regexp.h index 202b65b7..8eaa3ede 100644 --- a/src/njs_regexp.h +++ b/src/njs_regexp.h @@ -8,22 +8,12 @@ #define _NJS_REGEXP_H_INCLUDED_ -typedef enum { - NJS_REGEXP_INVALID_FLAG = -1, - NJS_REGEXP_NO_FLAGS = 0, - NJS_REGEXP_GLOBAL = 1, - NJS_REGEXP_IGNORE_CASE = 2, - NJS_REGEXP_MULTILINE = 4, - NJS_REGEXP_STICKY = 8, -} njs_regexp_flags_t; - - njs_int_t njs_regexp_init(njs_vm_t *vm); njs_int_t njs_regexp_create(njs_vm_t *vm, njs_value_t *value, u_char *start, - size_t length, njs_regexp_flags_t flags); -njs_regexp_flags_t njs_regexp_flags(u_char **start, u_char *end); + size_t length, njs_regex_flags_t flags); +njs_regex_flags_t njs_regexp_flags(u_char **start, u_char *end); njs_regexp_pattern_t *njs_regexp_pattern_create(njs_vm_t *vm, - u_char *string, size_t length, njs_regexp_flags_t flags); + u_char *string, size_t length, njs_regex_flags_t flags); njs_int_t njs_regexp_match(njs_vm_t *vm, njs_regex_t *regex, const u_char *subject, size_t off, size_t len, njs_regex_match_data_t *d); njs_regexp_t *njs_regexp_alloc(njs_vm_t *vm, njs_regexp_pattern_t *pattern); diff --git a/src/njs_string.c b/src/njs_string.c index a3e8da67..c135c43c 100644 --- a/src/njs_string.c +++ b/src/njs_string.c @@ -3086,7 +3086,7 @@ static njs_int_t njs_string_prototype_search(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, njs_index_t unused) { - int *captures; + size_t c; njs_int_t ret, index; njs_uint_t n; njs_value_t *value; @@ -3145,10 +3145,10 @@ njs_string_prototype_search(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, ret = njs_regexp_match(vm, &pattern->regex[n], string.start, 0, string.size, vm->single_match_data); if (ret >= 0) { - captures = njs_regex_captures(vm->single_match_data); - index = njs_string_index(&string, captures[0]); + c = njs_regex_capture(vm->single_match_data, 0); + index = njs_string_index(&string, c); - } else if (ret != NJS_REGEX_NOMATCH) { + } else if (ret == NJS_ERROR) { return NJS_ERROR; } } @@ -3231,7 +3231,7 @@ static njs_int_t njs_string_match_multiple(njs_vm_t *vm, njs_value_t *args, njs_regexp_pattern_t *pattern) { - int *captures; + size_t c0, c1; int32_t size, length; njs_int_t ret; njs_utf8_t utf8; @@ -3271,7 +3271,7 @@ njs_string_match_multiple(njs_vm_t *vm, njs_value_t *args, ret = njs_regexp_match(vm, &pattern->regex[type], p, 0, string.size, vm->single_match_data); if (ret < 0) { - if (njs_fast_path(ret == NJS_REGEX_NOMATCH)) { + if (njs_fast_path(ret == NJS_DECLINED)) { break; } @@ -3285,10 +3285,11 @@ njs_string_match_multiple(njs_vm_t *vm, njs_value_t *args, return ret; } - captures = njs_regex_captures(vm->single_match_data); - start = p + captures[0]; + c0 = njs_regex_capture(vm->single_match_data, 0); + c1 = njs_regex_capture(vm->single_match_data, 1); + start = p + c0; - if (captures[1] == 0) { + if (c1 == 0) { if (start < end) { p = (utf8 != NJS_STRING_BYTE) ? njs_utf8_next(start, end) : start + 1; @@ -3303,10 +3304,10 @@ njs_string_match_multiple(njs_vm_t *vm, njs_value_t *args, length = 0; } else { - p += captures[1]; - string.size -= captures[1]; + p += c1; + string.size -= c1; - size = captures[1] - captures[0]; + size = c1 - c0; length = njs_string_calc_length(utf8, start, size); } diff --git a/src/njs_vm.h b/src/njs_vm.h index 48c22e76..66104e6f 100644 --- a/src/njs_vm.h +++ b/src/njs_vm.h @@ -178,7 +178,8 @@ struct njs_vm_s { njs_vm_shared_t *shared; - njs_regex_context_t *regex_context; + njs_regex_generic_ctx_t *regex_generic_ctx; + njs_regex_compile_ctx_t *regex_compile_ctx; njs_regex_match_data_t *single_match_data; njs_array_t *promise_reason; diff --git a/src/test/njs_unit_test.c b/src/test/njs_unit_test.c index 13b334ea..11193dea 100644 --- a/src/test/njs_unit_test.c +++ b/src/test/njs_unit_test.c @@ -6,6 +6,8 @@ #include +#include + #include "njs_externals_test.h" -- 2.47.3