diff options
author | Bruce Momjian <bruce@momjian.us> | 2005-07-10 04:54:33 +0000 |
---|---|---|
committer | Bruce Momjian <bruce@momjian.us> | 2005-07-10 04:54:33 +0000 |
commit | 75a64eeb4b9dc4ad790ddc87f1f8357c2049576a (patch) | |
tree | 1a59ab33e18a4f136942520a932e12979cfba11c /src/backend/utils/adt/varlena.c | |
parent | 73e2431817fec3d251a517ac185d210fda0ffcd6 (diff) | |
download | postgresql-75a64eeb4b9dc4ad790ddc87f1f8357c2049576a.tar.gz postgresql-75a64eeb4b9dc4ad790ddc87f1f8357c2049576a.zip |
I made the patch that implements regexp_replace again.
The specification of this function is as follows.
regexp_replace(source text, pattern text, replacement text, [flags
text])
returns text
Replace string that matches to regular expression in source text to
replacement text.
- pattern is regular expression pattern.
- replacement is replace string that can use '\1'-'\9', and '\&'.
'\1'-'\9': back reference to the n'th subexpression.
'\&' : entire matched string.
- flags can use the following values:
g: global (replace all)
i: ignore case
When the flags is not specified, case sensitive, replace the first
instance only.
Atsushi Ogawa
Diffstat (limited to 'src/backend/utils/adt/varlena.c')
-rw-r--r-- | src/backend/utils/adt/varlena.c | 222 |
1 files changed, 221 insertions, 1 deletions
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 01cd5d2b7a3..ba40747df41 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.126 2005/07/07 04:36:08 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.127 2005/07/10 04:54:30 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -28,6 +28,7 @@ #include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/pg_locale.h" +#include "regex/regex.h" typedef struct varlena unknown; @@ -1994,6 +1995,225 @@ replace_text(PG_FUNCTION_ARGS) } /* + * check_replace_text_has_escape_char + * check whether replace_text has escape char. + */ +static bool +check_replace_text_has_escape_char(const text *replace_text) +{ + const char *p = VARDATA(replace_text); + const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ); + + if (pg_database_encoding_max_length() == 1) + { + for (; p < p_end; p++) + if (*p == '\\') return true; + } + else + { + for (; p < p_end; p += pg_mblen(p)) + if (*p == '\\') return true; + } + + return false; +} + +/* + * appendStringInfoRegexpSubstr + * append string by using back references of regexp. + */ +static void +appendStringInfoRegexpSubstr(StringInfo str, text *replace_text, + regmatch_t *pmatch, text *src_text) +{ + const char *p = VARDATA(replace_text); + const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ); + + int eml = pg_database_encoding_max_length(); + + int substr_start = 1; + int ch_cnt; + + int so; + int eo; + + while (1) + { + /* Find escape char. */ + ch_cnt = 0; + if (eml == 1) + { + for (; p < p_end && *p != '\\'; p++) + ch_cnt++; + } + else + { + for (; p < p_end && *p != '\\'; p += pg_mblen(p)) + ch_cnt++; + } + + /* + * Copy the text when there is a text in the left of escape char + * or escape char is not found. + */ + if (ch_cnt) + { + text *append_text = text_substring(PointerGetDatum(replace_text), + substr_start, ch_cnt, false); + appendStringInfoString(str, PG_TEXT_GET_STR(append_text)); + pfree(append_text); + } + substr_start += ch_cnt + 1; + + if (p >= p_end) /* When escape char is not found. */ + break; + + /* See the next character of escape char. */ + p++; + so = eo = -1; + + if (*p >= '1' && *p <= '9') + { + /* Use the back reference of regexp. */ + int idx = *p - '0'; + so = pmatch[idx].rm_so; + eo = pmatch[idx].rm_eo; + p++; + substr_start++; + } + else if (*p == '&') + { + /* Use the entire matched string. */ + so = pmatch[0].rm_so; + eo = pmatch[0].rm_eo; + p++; + substr_start++; + } + + if (so != -1 && eo != -1) + { + /* Copy the text that is back reference of regexp. */ + text *append_text = text_substring(PointerGetDatum(src_text), + so + 1, (eo - so), false); + appendStringInfoString(str, PG_TEXT_GET_STR(append_text)); + pfree(append_text); + } + } +} + +#define REGEXP_REPLACE_BACKREF_CNT 10 + +/* + * replace_text_regexp + * replace text that matches to regexp in src_text to replace_text. + */ +Datum +replace_text_regexp(PG_FUNCTION_ARGS) +{ + text *ret_text; + text *src_text = PG_GETARG_TEXT_P(0); + int src_text_len = VARSIZE(src_text) - VARHDRSZ; + regex_t *re = (regex_t *)PG_GETARG_POINTER(1); + text *replace_text = PG_GETARG_TEXT_P(2); + bool global = PG_GETARG_BOOL(3); + StringInfo str = makeStringInfo(); + int regexec_result; + regmatch_t pmatch[REGEXP_REPLACE_BACKREF_CNT]; + pg_wchar *data; + size_t data_len; + int search_start; + int data_pos; + bool have_escape; + + /* Convert data string to wide characters. */ + data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar)); + data_len = pg_mb2wchar_with_len(VARDATA(src_text), data, src_text_len); + + /* Check whether replace_text has escape char. */ + have_escape = check_replace_text_has_escape_char(replace_text); + + for (search_start = data_pos = 0; search_start <= data_len;) + { + regexec_result = pg_regexec(re, + data, + data_len, + search_start, + NULL, /* no details */ + REGEXP_REPLACE_BACKREF_CNT, + pmatch, + 0); + + if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH) + { + char errMsg[100]; + + /* re failed??? */ + pg_regerror(regexec_result, re, errMsg, sizeof(errMsg)); + ereport(ERROR, + (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), + errmsg("regular expression failed: %s", errMsg))); + } + + if (regexec_result == REG_NOMATCH) + break; + + /* + * Copy the text when there is a text in the left of matched position. + */ + if (pmatch[0].rm_so - data_pos > 0) + { + text *left_text = text_substring(PointerGetDatum(src_text), + data_pos + 1, + pmatch[0].rm_so - data_pos, false); + appendStringInfoString(str, PG_TEXT_GET_STR(left_text)); + pfree(left_text); + } + + /* + * Copy the replace_text. Process back references when the + * replace_text has escape characters. + */ + if (have_escape) + appendStringInfoRegexpSubstr(str, replace_text, pmatch, src_text); + else + appendStringInfoString(str, PG_TEXT_GET_STR(replace_text)); + + search_start = data_pos = pmatch[0].rm_eo; + + /* + * When global option is off, replace the first instance only. + */ + if (!global) + break; + + /* + * Search from next character when the matching text is zero width. + */ + if (pmatch[0].rm_so == pmatch[0].rm_eo) + search_start++; + } + + /* + * Copy the text when there is a text at the right of last matched + * or regexp is not matched. + */ + if (data_pos < data_len) + { + text *right_text = text_substring(PointerGetDatum(src_text), + data_pos + 1, -1, true); + appendStringInfoString(str, PG_TEXT_GET_STR(right_text)); + pfree(right_text); + } + + ret_text = PG_STR_GET_TEXT(str->data); + pfree(str->data); + pfree(str); + pfree(data); + + PG_RETURN_TEXT_P(ret_text); +} + +/* * split_text * parse input string * return ord item (1 based) |