aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/regexp.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2008-03-19 02:40:37 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2008-03-19 02:40:37 +0000
commit965a2a191acdc6fc1cbdd2736a178f4d900e3765 (patch)
tree5b5c5df4ebde1ccf11528891d0a8afdd70ec36c5 /src/backend/utils/adt/regexp.c
parent8436f9a0364a6a16dbf7b250597986a97c9e6f09 (diff)
downloadpostgresql-965a2a191acdc6fc1cbdd2736a178f4d900e3765.tar.gz
postgresql-965a2a191acdc6fc1cbdd2736a178f4d900e3765.zip
Fix regexp substring matching (substring(string from pattern)) for the corner
case where there is a match to the pattern overall but the user has specified a parenthesized subexpression and that subexpression hasn't got a match. An example is substring('foo' from 'foo(bar)?'). This should return NULL, since (bar) isn't matched, but it was mistakenly returning the whole-pattern match instead (ie, 'foo'). Per bug #4044 from Rui Martins. This has been broken since the beginning; patch in all supported versions. The old behavior was sufficiently inconsistent that it's impossible to believe anyone is depending on it.
Diffstat (limited to 'src/backend/utils/adt/regexp.c')
-rw-r--r--src/backend/utils/adt/regexp.c58
1 files changed, 33 insertions, 25 deletions
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index 359fd33ceea..4a96a1bb70c 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.78 2008/01/01 19:45:52 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.79 2008/03/19 02:40:37 tgl Exp $
*
* Alistair Crooks added the code for the regex caching
* agc - cached the regular expressions used - there's a good chance
@@ -576,8 +576,13 @@ textregexsubstr(PG_FUNCTION_ARGS)
{
text *s = PG_GETARG_TEXT_PP(0);
text *p = PG_GETARG_TEXT_PP(1);
- bool match;
+ regex_t *re;
regmatch_t pmatch[2];
+ int so,
+ eo;
+
+ /* Compile RE */
+ re = RE_compile_and_cache(p, regex_flavor);
/*
* We pass two regmatch_t structs to get info about the overall match and
@@ -585,34 +590,37 @@ textregexsubstr(PG_FUNCTION_ARGS)
* is a parenthesized subexpression, we return what it matched; else
* return what the whole regexp matched.
*/
- match = RE_compile_and_execute(p,
- VARDATA_ANY(s),
- VARSIZE_ANY_EXHDR(s),
- regex_flavor,
- 2, pmatch);
-
- /* match? then return the substring matching the pattern */
- if (match)
- {
- int so,
- eo;
+ if (!RE_execute(re,
+ VARDATA_ANY(s), VARSIZE_ANY_EXHDR(s),
+ 2, pmatch))
+ PG_RETURN_NULL(); /* definitely no match */
+ if (re->re_nsub > 0)
+ {
+ /* has parenthesized subexpressions, use the first one */
so = pmatch[1].rm_so;
eo = pmatch[1].rm_eo;
- if (so < 0 || eo < 0)
- {
- /* no parenthesized subexpression */
- so = pmatch[0].rm_so;
- eo = pmatch[0].rm_eo;
- }
-
- return DirectFunctionCall3(text_substr,
- PointerGetDatum(s),
- Int32GetDatum(so + 1),
- Int32GetDatum(eo - so));
}
+ else
+ {
+ /* no parenthesized subexpression, use whole match */
+ so = pmatch[0].rm_so;
+ eo = pmatch[0].rm_eo;
+ }
+
+ /*
+ * It is possible to have a match to the whole pattern but no match
+ * for a subexpression; for example 'foo(bar)?' is considered to match
+ * 'foo' but there is no subexpression match. So this extra test for
+ * match failure is not redundant.
+ */
+ if (so < 0 || eo < 0)
+ PG_RETURN_NULL();
- PG_RETURN_NULL();
+ return DirectFunctionCall3(text_substr,
+ PointerGetDatum(s),
+ Int32GetDatum(so + 1),
+ Int32GetDatum(eo - so));
}
/*