aboutsummaryrefslogtreecommitdiff
path: root/src/backend/regex/regexec.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/regex/regexec.c')
-rw-r--r--src/backend/regex/regexec.c48
1 files changed, 35 insertions, 13 deletions
diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c
index 694a03965c0..d18672c7c7c 100644
--- a/src/backend/regex/regexec.c
+++ b/src/backend/regex/regexec.c
@@ -448,6 +448,7 @@ cfindloop(struct vars * v,
close = v->search_start;
do
{
+ /* Search with the search RE for match range at/beyond "close" */
MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
if (ISERR())
@@ -456,10 +457,11 @@ cfindloop(struct vars * v,
return v->err;
}
if (close == NULL)
- break; /* NOTE BREAK */
+ break; /* no more possible match anywhere */
assert(cold != NULL);
open = cold;
cold = NULL;
+ /* Search for matches starting between "open" and "close" inclusive */
MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
for (begin = open; begin <= close; begin++)
{
@@ -468,6 +470,7 @@ cfindloop(struct vars * v,
estop = v->stop;
for (;;)
{
+ /* Here we use the top node's detailed RE */
if (shorter)
end = shortest(v, d, begin, estart,
estop, (chr **) NULL, &hitend);
@@ -482,8 +485,9 @@ cfindloop(struct vars * v,
if (hitend && cold == NULL)
cold = begin;
if (end == NULL)
- break; /* NOTE BREAK OUT */
+ break; /* no match with this begin point, try next */
MDEBUG(("tentative end %ld\n", LOFF(end)));
+ /* Dissect the potential match to see if it really matches */
zapallsubs(v->pmatch, v->nmatch);
er = cdissect(v, v->g->tree, begin, end);
if (er == REG_OKAY)
@@ -502,21 +506,28 @@ cfindloop(struct vars * v,
*coldp = cold;
return er;
}
- /* try next shorter/longer match with same begin point */
+ /* Try next longer/shorter match with same begin point */
if (shorter)
{
if (end == estop)
- break; /* NOTE BREAK OUT */
+ break; /* no more, so try next begin point */
estart = end + 1;
}
else
{
if (end == begin)
- break; /* NOTE BREAK OUT */
+ break; /* no more, so try next begin point */
estop = end - 1;
}
} /* end loop over endpoint positions */
} /* end loop over beginning positions */
+
+ /*
+ * If we get here, there is no possible match starting at or before
+ * "close", so consider matches beyond that. We'll do a fresh search
+ * with the search RE to find a new promising match range.
+ */
+ close++;
} while (close < v->stop);
*coldp = cold;
@@ -963,17 +974,17 @@ citerdissect(struct vars * v,
assert(begin <= end);
/*
- * If zero matches are allowed, and target string is empty, just declare
- * victory. OTOH, if target string isn't empty, zero matches can't work
- * so we pretend the min is 1.
+ * For the moment, assume the minimum number of matches is 1. If zero
+ * matches are allowed, and the target string is empty, we are allowed to
+ * match regardless of the contents of the iter node --- but we would
+ * prefer to match once, so that capturing parens get set. (An example of
+ * the concern here is a pattern like "()*\1", which historically this
+ * code has allowed to succeed.) Therefore, we deal with the zero-matches
+ * case at the bottom, after failing to find any other way to match.
*/
min_matches = t->min;
if (min_matches <= 0)
- {
- if (begin == end)
- return REG_OKAY;
min_matches = 1;
- }
/*
* We need workspace to track the endpoints of each sub-match. Normally
@@ -1123,8 +1134,19 @@ backtrack:
}
/* all possibilities exhausted */
- MDEBUG(("%d failed\n", t->id));
FREE(endpts);
+
+ /*
+ * Now consider the possibility that we can match to a zero-length string
+ * by using zero repetitions.
+ */
+ if (t->min == 0 && begin == end)
+ {
+ MDEBUG(("%d allowing zero matches\n", t->id));
+ return REG_OKAY;
+ }
+
+ MDEBUG(("%d failed\n", t->id));
return REG_NOMATCH;
}