aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/regexp.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt/regexp.c')
-rw-r--r--src/backend/utils/adt/regexp.c73
1 files changed, 65 insertions, 8 deletions
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index da13a875eb0..ab44846e0b3 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -708,20 +708,42 @@ similar_escape(PG_FUNCTION_ARGS)
* We surround the transformed input string with
* ^(?: ... )$
* which requires some explanation. We need "^" and "$" to force
- * the pattern to match the entire input string as per SQL99 spec.
+ * the pattern to match the entire input string as per the SQL spec.
* The "(?:" and ")" are a non-capturing set of parens; we have to have
* parens in case the string contains "|", else the "^" and "$" will
* be bound into the first and last alternatives which is not what we
* want, and the parens must be non capturing because we don't want them
* to count when selecting output for SUBSTRING.
+ *
+ * When the pattern is divided into three parts by escape-double-quotes,
+ * what we emit is
+ * ^(?:part1){1,1}?(part2){1,1}(?:part3)$
+ * which requires even more explanation. The "{1,1}?" on part1 makes it
+ * non-greedy so that it will match the smallest possible amount of text
+ * not the largest, as required by SQL. The plain parens around part2
+ * are capturing parens so that that part is what controls the result of
+ * SUBSTRING. The "{1,1}" forces part2 to be greedy, so that it matches
+ * the largest possible amount of text; hence part3 must match the
+ * smallest amount of text, as required by SQL. We don't need an explicit
+ * greediness marker on part3. Note that this also confines the effects
+ * of any "|" characters to the respective part, which is what we want.
+ *
+ * The SQL spec says that SUBSTRING's pattern must contain exactly two
+ * escape-double-quotes, but we only complain if there's more than two.
+ * With none, we act as though part1 and part3 are empty; with one, we
+ * act as though part3 is empty. Both behaviors fall out of omitting
+ * the relevant part separators in the above expansion. If the result
+ * of this function is used in a plain regexp match (SIMILAR TO), the
+ * escape-double-quotes have no effect on the match behavior.
*----------
*/
/*
- * We need room for the prefix/postfix plus as many as 3 output bytes per
- * input byte; since the input is at most 1GB this can't overflow
+ * We need room for the prefix/postfix and part separators, plus as many
+ * as 3 output bytes per input byte; since the input is at most 1GB this
+ * can't overflow size_t.
*/
- result = (text *) palloc(VARHDRSZ + 6 + 3 * plen);
+ result = (text *) palloc(VARHDRSZ + 23 + 3 * (size_t) plen);
r = VARDATA(result);
*r++ = '^';
@@ -760,7 +782,7 @@ similar_escape(PG_FUNCTION_ARGS)
}
else if (e && elen == mblen && memcmp(e, p, mblen) == 0)
{
- /* SQL99 escape character; do not send to output */
+ /* SQL escape character; do not send to output */
afterescape = true;
}
else
@@ -784,10 +806,45 @@ similar_escape(PG_FUNCTION_ARGS)
/* fast path */
if (afterescape)
{
- if (pchar == '"' && !incharclass) /* for SUBSTRING patterns */
- *r++ = ((nquotes++ % 2) == 0) ? '(' : ')';
+ if (pchar == '"' && !incharclass) /* escape-double-quote? */
+ {
+ /* emit appropriate part separator, per notes above */
+ if (nquotes == 0)
+ {
+ *r++ = ')';
+ *r++ = '{';
+ *r++ = '1';
+ *r++ = ',';
+ *r++ = '1';
+ *r++ = '}';
+ *r++ = '?';
+ *r++ = '(';
+ }
+ else if (nquotes == 1)
+ {
+ *r++ = ')';
+ *r++ = '{';
+ *r++ = '1';
+ *r++ = ',';
+ *r++ = '1';
+ *r++ = '}';
+ *r++ = '(';
+ *r++ = '?';
+ *r++ = ':';
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_USE_OF_ESCAPE_CHARACTER),
+ errmsg("SQL regular expression may not contain more than two escape-double-quote separators")));
+ nquotes++;
+ }
else
{
+ /*
+ * We allow any character at all to be escaped; notably, this
+ * allows access to POSIX character-class escapes such as
+ * "\d". The SQL spec is considerably more restrictive.
+ */
*r++ = '\\';
*r++ = pchar;
}
@@ -795,7 +852,7 @@ similar_escape(PG_FUNCTION_ARGS)
}
else if (e && pchar == *e)
{
- /* SQL99 escape character; do not send to output */
+ /* SQL escape character; do not send to output */
afterescape = true;
}
else if (incharclass)