diff options
author | Bruce Momjian <bruce@momjian.us> | 2005-07-10 04:54:33 +0000 |
---|---|---|
committer | Bruce Momjian <bruce@momjian.us> | 2005-07-10 04:54:33 +0000 |
commit | 75a64eeb4b9dc4ad790ddc87f1f8357c2049576a (patch) | |
tree | 1a59ab33e18a4f136942520a932e12979cfba11c /src/backend/utils/adt/regexp.c | |
parent | 73e2431817fec3d251a517ac185d210fda0ffcd6 (diff) | |
download | postgresql-75a64eeb4b9dc4ad790ddc87f1f8357c2049576a.tar.gz postgresql-75a64eeb4b9dc4ad790ddc87f1f8357c2049576a.zip |
I made the patch that implements regexp_replace again.
The specification of this function is as follows.
regexp_replace(source text, pattern text, replacement text, [flags
text])
returns text
Replace string that matches to regular expression in source text to
replacement text.
- pattern is regular expression pattern.
- replacement is replace string that can use '\1'-'\9', and '\&'.
'\1'-'\9': back reference to the n'th subexpression.
'\&' : entire matched string.
- flags can use the following values:
g: global (replace all)
i: ignore case
When the flags is not specified, case sensitive, replace the first
instance only.
Atsushi Ogawa
Diffstat (limited to 'src/backend/utils/adt/regexp.c')
-rw-r--r-- | src/backend/utils/adt/regexp.c | 165 |
1 files changed, 121 insertions, 44 deletions
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c index d8b038a09a8..b8112a2cb95 100644 --- a/src/backend/utils/adt/regexp.c +++ b/src/backend/utils/adt/regexp.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.56 2004/12/31 22:01:22 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.57 2005/07/10 04:54:30 momjian Exp $ * * Alistair Crooks added the code for the regex caching * agc - cached the regular expressions used - there's a good chance @@ -81,38 +81,27 @@ static cached_re_str re_array[MAX_CACHED_RES]; /* cached re's */ /* - * RE_compile_and_execute - compile and execute a RE, caching if possible + * RE_compile_and_cache - compile a RE, caching if possible * - * Returns TRUE on match, FALSE on no match + * Returns regex_t * - * text_re --- the pattern, expressed as an *untoasted* TEXT object - * dat --- the data to match against (need not be null-terminated) - * dat_len --- the length of the data string - * cflags --- compile options for the pattern - * nmatch, pmatch --- optional return area for match details + * text_re --- the pattern, expressed as an *untoasted* TEXT object + * cflags --- compile options for the pattern * - * Both pattern and data are given in the database encoding. We internally - * convert to array of pg_wchar which is what Spencer's regex package wants. + * Pattern is given in the database encoding. We internally convert to + * array of pg_wchar which is what Spencer's regex package wants. */ -static bool -RE_compile_and_execute(text *text_re, unsigned char *dat, int dat_len, - int cflags, int nmatch, regmatch_t *pmatch) +static regex_t +RE_compile_and_cache(text *text_re, int cflags) { int text_re_len = VARSIZE(text_re); - pg_wchar *data; - size_t data_len; pg_wchar *pattern; size_t pattern_len; int i; int regcomp_result; - int regexec_result; cached_re_str re_temp; char errMsg[100]; - /* Convert data string to wide characters */ - data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar)); - data_len = pg_mb2wchar_with_len(dat, data, dat_len); - /* * Look for a match among previously compiled REs. Since the data * structure is self-organizing with most-used entries at the front, @@ -134,28 +123,7 @@ RE_compile_and_execute(text *text_re, unsigned char *dat, int dat_len, re_array[0] = re_temp; } - /* Perform RE match and return result */ - regexec_result = pg_regexec(&re_array[0].cre_re, - data, - data_len, - NULL, /* no details */ - nmatch, - pmatch, - 0); - - pfree(data); - - if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH) - { - /* re failed??? */ - pg_regerror(regexec_result, &re_array[0].cre_re, - errMsg, sizeof(errMsg)); - ereport(ERROR, - (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), - errmsg("regular expression failed: %s", errMsg))); - } - - return (regexec_result == REG_OKAY); + return re_array[0].cre_re; } } @@ -220,10 +188,45 @@ RE_compile_and_execute(text *text_re, unsigned char *dat, int dat_len, re_array[0] = re_temp; num_res++; + return re_array[0].cre_re; +} + +/* + * RE_compile_and_execute - compile and execute a RE + * + * Returns TRUE on match, FALSE on no match + * + * text_re --- the pattern, expressed as an *untoasted* TEXT object + * dat --- the data to match against (need not be null-terminated) + * dat_len --- the length of the data string + * cflags --- compile options for the pattern + * nmatch, pmatch --- optional return area for match details + * + * Both pattern and data are given in the database encoding. We internally + * convert to array of pg_wchar which is what Spencer's regex package wants. + */ +static bool +RE_compile_and_execute(text *text_re, unsigned char *dat, int dat_len, + int cflags, int nmatch, regmatch_t *pmatch) +{ + pg_wchar *data; + size_t data_len; + int regexec_result; + regex_t re; + char errMsg[100]; + + /* Convert data string to wide characters */ + data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar)); + data_len = pg_mb2wchar_with_len(dat, data, dat_len); + + /* Compile RE */ + re = RE_compile_and_cache(text_re, cflags); + /* Perform RE match and return result */ regexec_result = pg_regexec(&re_array[0].cre_re, data, data_len, + 0, NULL, /* no details */ nmatch, pmatch, @@ -428,15 +431,89 @@ textregexsubstr(PG_FUNCTION_ARGS) eo = pmatch[0].rm_eo; } - return (DirectFunctionCall3(text_substr, + return DirectFunctionCall3(text_substr, PointerGetDatum(s), Int32GetDatum(so + 1), - Int32GetDatum(eo - so))); + Int32GetDatum(eo - so)); } PG_RETURN_NULL(); } +/* + * textregexreplace_noopt() + * Return a replace string matched by a regular expression. + * This function is a version that doesn't specify the option of + * textregexreplace. This is case sensitive, replace the first + * instance only. + */ +Datum +textregexreplace_noopt(PG_FUNCTION_ARGS) +{ + text *s = PG_GETARG_TEXT_P(0); + text *p = PG_GETARG_TEXT_P(1); + text *r = PG_GETARG_TEXT_P(2); + regex_t re; + + re = RE_compile_and_cache(p, regex_flavor); + + return DirectFunctionCall4(replace_text_regexp, + PointerGetDatum(s), + PointerGetDatum(&re), + PointerGetDatum(r), + BoolGetDatum(false)); +} + +/* + * textregexreplace() + * Return a replace string matched by a regular expression. + */ +Datum +textregexreplace(PG_FUNCTION_ARGS) +{ + text *s = PG_GETARG_TEXT_P(0); + text *p = PG_GETARG_TEXT_P(1); + text *r = PG_GETARG_TEXT_P(2); + text *opt = PG_GETARG_TEXT_P(3); + char *opt_p = VARDATA(opt); + int opt_len = (VARSIZE(opt) - VARHDRSZ); + int i; + bool global = false; + bool ignorecase = false; + regex_t re; + + /* parse options */ + for (i = 0; i < opt_len; i++) + { + switch (opt_p[i]) + { + case 'i': + ignorecase = true; + break; + case 'g': + global = true; + break; + default: + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid option of regexp_replace: %c", + opt_p[i]))); + break; + } + } + + if (ignorecase) + re = RE_compile_and_cache(p, regex_flavor | REG_ICASE); + else + re = RE_compile_and_cache(p, regex_flavor); + + return DirectFunctionCall4(replace_text_regexp, + PointerGetDatum(s), + PointerGetDatum(&re), + PointerGetDatum(r), + BoolGetDatum(global)); +} + /* similar_escape() * Convert a SQL99 regexp pattern to POSIX style, so it can be used by * our regexp engine. |