aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/regexp.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2006-04-13 18:01:31 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2006-04-13 18:01:31 +0000
commitcc39aca7d4a317a66db51a2180f7fee6e76084bd (patch)
tree55d7fe6adec711809d548505b03703610903ef6e /src/backend/utils/adt/regexp.c
parentdcdf738abb013082294b9a8afb15b8405e042d47 (diff)
downloadpostgresql-cc39aca7d4a317a66db51a2180f7fee6e76084bd.tar.gz
postgresql-cc39aca7d4a317a66db51a2180f7fee6e76084bd.zip
Fix similar_escape() so that SIMILAR TO works properly for patterns involving
alternatives ("|" symbol). The original coding allowed the added ^ and $ constraints to be absorbed into the first and last alternatives, producing a pattern that would match more than it should. Per report from Eric Noriega. I also changed the pattern to add an ARE director ("***:"), ensuring that SIMILAR TO patterns do not change behavior if regex_flavor is changed. This is necessary to make the non-capturing parentheses work, and seems like a good idea on general principles. Back-patched as far as 7.4. 7.3 also has the bug, but a fix seems impractical because that version's regex engine doesn't have non-capturing parens.
Diffstat (limited to 'src/backend/utils/adt/regexp.c')
-rw-r--r--src/backend/utils/adt/regexp.c32
1 files changed, 29 insertions, 3 deletions
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index 3db5ca9b6cf..c9d61de4180 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.62 2006/03/05 15:58:43 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.63 2006/04/13 18:01:31 tgl Exp $
*
* Alistair Crooks added the code for the regex caching
* agc - cached the regular expressions used - there's a good chance
@@ -549,11 +549,36 @@ similar_escape(PG_FUNCTION_ARGS)
errhint("Escape string must be empty or one character.")));
}
- /* We need room for ^, $, and up to 2 output bytes per input byte */
- result = (text *) palloc(VARHDRSZ + 2 + 2 * plen);
+ /*----------
+ * We surround the transformed input string with
+ * ***:^(?: ... )$
+ * which is bizarre enough to require some explanation. "***:" is a
+ * director prefix to force the regex to be treated as an ARE regardless
+ * of the current regex_flavor setting. We need "^" and "$" to force
+ * the pattern to match the entire input string as per SQL99 spec. The
+ * "(?:" and ")" are a non-capturing set of parens; we have to have
+ * parens in case the string contains "|", else the "^" and "$" will
+ * be bound into the first and last alternatives which is not what we
+ * want, and the parens must be non capturing because we don't want them
+ * to count when selecting output for SUBSTRING.
+ *----------
+ */
+
+ /*
+ * We need room for the prefix/postfix plus as many as 2 output bytes per
+ * input byte
+ */
+ result = (text *) palloc(VARHDRSZ + 10 + 2 * plen);
r = VARDATA(result);
+ *r++ = '*';
+ *r++ = '*';
+ *r++ = '*';
+ *r++ = ':';
*r++ = '^';
+ *r++ = '(';
+ *r++ = '?';
+ *r++ = ':';
while (plen > 0)
{
@@ -593,6 +618,7 @@ similar_escape(PG_FUNCTION_ARGS)
p++, plen--;
}
+ *r++ = ')';
*r++ = '$';
VARATT_SIZEP(result) = r - ((char *) result);