aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/varlena.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt/varlena.c')
-rw-r--r--src/backend/utils/adt/varlena.c101
1 files changed, 53 insertions, 48 deletions
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 348b5566de4..acb87417341 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -4359,34 +4359,36 @@ replace_text(PG_FUNCTION_ARGS)
}
/*
- * check_replace_text_has_escape_char
+ * check_replace_text_has_escape
*
- * check whether replace_text contains escape char.
+ * Returns 0 if text contains no backslashes that need processing.
+ * Returns 1 if text contains backslashes, but not regexp submatch specifiers.
+ * Returns 2 if text contains regexp submatch specifiers (\1 .. \9).
*/
-static bool
-check_replace_text_has_escape_char(const text *replace_text)
+static int
+check_replace_text_has_escape(const text *replace_text)
{
+ int result = 0;
const char *p = VARDATA_ANY(replace_text);
const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
- if (pg_database_encoding_max_length() == 1)
- {
- for (; p < p_end; p++)
- {
- if (*p == '\\')
- return true;
- }
- }
- else
+ while (p < p_end)
{
- for (; p < p_end; p += pg_mblen(p))
+ /* Find next escape char, if any. */
+ p = memchr(p, '\\', p_end - p);
+ if (p == NULL)
+ break;
+ p++;
+ /* Note: a backslash at the end doesn't require extra processing. */
+ if (p < p_end)
{
- if (*p == '\\')
- return true;
+ if (*p >= '1' && *p <= '9')
+ return 2; /* Found a submatch specifier, so done */
+ result = 1; /* Found some other sequence, keep looking */
+ p++;
}
}
-
- return false;
+ return result;
}
/*
@@ -4403,25 +4405,17 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
{
const char *p = VARDATA_ANY(replace_text);
const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
- int eml = pg_database_encoding_max_length();
- for (;;)
+ while (p < p_end)
{
const char *chunk_start = p;
int so;
int eo;
- /* Find next escape char. */
- if (eml == 1)
- {
- for (; p < p_end && *p != '\\'; p++)
- /* nothing */ ;
- }
- else
- {
- for (; p < p_end && *p != '\\'; p += pg_mblen(p))
- /* nothing */ ;
- }
+ /* Find next escape char, if any. */
+ p = memchr(p, '\\', p_end - p);
+ if (p == NULL)
+ p = p_end;
/* Copy the text we just scanned over, if any. */
if (p > chunk_start)
@@ -4473,7 +4467,7 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
continue;
}
- if (so != -1 && eo != -1)
+ if (so >= 0 && eo >= 0)
{
/*
* Copy the text that is back reference of regexp. Note so and eo
@@ -4491,36 +4485,37 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
}
}
-#define REGEXP_REPLACE_BACKREF_CNT 10
-
/*
* replace_text_regexp
*
- * replace substring(s) in src_text that match regexp with replace_text.
+ * replace substring(s) in src_text that match pattern with replace_text.
+ * The replace_text can contain backslash markers to substitute
+ * (parts of) the matched text.
*
+ * cflags: regexp compile flags.
+ * collation: collation to use.
* search_start: the character (not byte) offset in src_text at which to
* begin searching.
* n: if 0, replace all matches; if > 0, replace only the N'th match.
- *
- * Note: to avoid having to include regex.h in builtins.h, we declare
- * the regexp argument as void *, but really it's regex_t *.
*/
text *
-replace_text_regexp(text *src_text, void *regexp,
+replace_text_regexp(text *src_text, text *pattern_text,
text *replace_text,
+ int cflags, Oid collation,
int search_start, int n)
{
text *ret_text;
- regex_t *re = (regex_t *) regexp;
+ regex_t *re;
int src_text_len = VARSIZE_ANY_EXHDR(src_text);
int nmatches = 0;
StringInfoData buf;
- regmatch_t pmatch[REGEXP_REPLACE_BACKREF_CNT];
+ regmatch_t pmatch[10]; /* main match, plus \1 to \9 */
+ int nmatch = lengthof(pmatch);
pg_wchar *data;
size_t data_len;
int data_pos;
char *start_ptr;
- bool have_escape;
+ int escape_status;
initStringInfo(&buf);
@@ -4528,8 +4523,19 @@ replace_text_regexp(text *src_text, void *regexp,
data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
- /* Check whether replace_text has escape char. */
- have_escape = check_replace_text_has_escape_char(replace_text);
+ /* Check whether replace_text has escapes, especially regexp submatches. */
+ escape_status = check_replace_text_has_escape(replace_text);
+
+ /* If no regexp submatches, we can use REG_NOSUB. */
+ if (escape_status < 2)
+ {
+ cflags |= REG_NOSUB;
+ /* Also tell pg_regexec we only want the whole-match location. */
+ nmatch = 1;
+ }
+
+ /* Prepare the regexp. */
+ re = RE_compile_and_cache(pattern_text, cflags, collation);
/* start_ptr points to the data_pos'th character of src_text */
start_ptr = (char *) VARDATA_ANY(src_text);
@@ -4546,7 +4552,7 @@ replace_text_regexp(text *src_text, void *regexp,
data_len,
search_start,
NULL, /* no details */
- REGEXP_REPLACE_BACKREF_CNT,
+ nmatch,
pmatch,
0);
@@ -4602,10 +4608,9 @@ replace_text_regexp(text *src_text, void *regexp,
}
/*
- * Copy the replace_text. Process back references when the
- * replace_text has escape characters.
+ * Copy the replace_text, processing escapes if any are present.
*/
- if (have_escape)
+ if (escape_status > 0)
appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
start_ptr, data_pos);
else