aboutsummaryrefslogtreecommitdiff
path: root/src/backend/regex/regexec.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/regex/regexec.c')
-rw-r--r--src/backend/regex/regexec.c48
1 files changed, 36 insertions, 12 deletions
diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c
index aa5ba855149..2411e6561d7 100644
--- a/src/backend/regex/regexec.c
+++ b/src/backend/regex/regexec.c
@@ -216,14 +216,14 @@ pg_regexec(regex_t *re,
if (backref && nmatch <= v->g->nsub)
{
/* need larger work area */
- if (v->g->nsub + 1 <= LOCALMAT)
+ v->nmatch = v->g->nsub + 1;
+ if (v->nmatch <= LOCALMAT)
v->pmatch = mat;
else
- v->pmatch = (regmatch_t *) MALLOC((v->g->nsub + 1) *
- sizeof(regmatch_t));
+ v->pmatch = (regmatch_t *) MALLOC(v->nmatch * sizeof(regmatch_t));
if (v->pmatch == NULL)
return REG_ESPACE;
- v->nmatch = v->g->nsub + 1;
+ zapallsubs(v->pmatch, v->nmatch);
}
else
{
@@ -488,7 +488,6 @@ find(struct vars *v,
return REG_OKAY;
/* find submatches */
- zapallsubs(v->pmatch, v->nmatch);
return cdissect(v, v->g->tree, begin, end);
}
@@ -599,7 +598,6 @@ cfindloop(struct vars *v,
break; /* no match with this begin point, try next */
MDEBUG(("tentative end %ld\n", LOFF(end)));
/* Dissect the potential match to see if it really matches */
- zapallsubs(v->pmatch, v->nmatch);
er = cdissect(v, v->g->tree, begin, end);
if (er == REG_OKAY)
{
@@ -647,6 +645,8 @@ cfindloop(struct vars *v,
/*
* zapallsubs - initialize all subexpression matches to "no match"
+ *
+ * Note that p[0], the overall-match location, is not touched.
*/
static void
zapallsubs(regmatch_t *p,
@@ -716,8 +716,30 @@ subset(struct vars *v,
* DFA and found that the proposed substring satisfies the DFA. (We make
* the caller do that because in concatenation and iteration nodes, it's
* much faster to check all the substrings against the child DFAs before we
- * recurse.) Also, caller must have cleared subexpression match data via
- * zaptreesubs (or zapallsubs at the top level).
+ * recurse.)
+ *
+ * A side-effect of a successful match is to save match locations for
+ * capturing subexpressions in v->pmatch[]. This is a little bit tricky,
+ * so we make the following rules:
+ * 1. Before initial entry to cdissect, all match data must have been
+ * cleared (this is seen to by zapallsubs).
+ * 2. Before any recursive entry to cdissect, the match data for that
+ * subexpression tree must be guaranteed clear (see zaptreesubs).
+ * 3. When returning REG_OKAY, each level of cdissect will have saved
+ * any relevant match locations.
+ * 4. When returning REG_NOMATCH, each level of cdissect will guarantee
+ * that its subexpression match locations are again clear.
+ * 5. No guarantees are made for error cases (i.e., other result codes).
+ * 6. When a level of cdissect abandons a successful sub-match, it will
+ * clear that subtree's match locations with zaptreesubs before trying
+ * any new DFA match or cdissect call for that subtree or any subtree
+ * to its right (that is, any subtree that could have a backref into the
+ * abandoned match).
+ * This may seem overly complicated, but it's difficult to simplify it
+ * because of the provision that match locations must be reset before
+ * any fresh DFA match (a rule that is needed to make dfa_backref safe).
+ * That means it won't work to just reset relevant match locations at the
+ * start of each cdissect level.
*/
static int /* regexec return code */
cdissect(struct vars *v,
@@ -841,6 +863,8 @@ ccondissect(struct vars *v,
MDEBUG(("%d: successful\n", t->id));
return REG_OKAY;
}
+ /* Reset left's matches (right should have done so itself) */
+ zaptreesubs(v, left);
}
if (er != REG_NOMATCH)
return er;
@@ -863,8 +887,6 @@ ccondissect(struct vars *v,
return REG_NOMATCH;
}
MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
- zaptreesubs(v, left);
- zaptreesubs(v, right);
}
/* can't get here */
@@ -922,6 +944,8 @@ crevcondissect(struct vars *v,
MDEBUG(("%d: successful\n", t->id));
return REG_OKAY;
}
+ /* Reset left's matches (right should have done so itself) */
+ zaptreesubs(v, left);
}
if (er != REG_NOMATCH)
return er;
@@ -944,8 +968,6 @@ crevcondissect(struct vars *v,
return REG_NOMATCH;
}
MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
- zaptreesubs(v, left);
- zaptreesubs(v, right);
}
/* can't get here */
@@ -1214,6 +1236,7 @@ citerdissect(struct vars *v,
for (i = nverified + 1; i <= k; i++)
{
+ /* zap any match data from a non-last iteration */
zaptreesubs(v, t->child);
er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
if (er == REG_OKAY)
@@ -1426,6 +1449,7 @@ creviterdissect(struct vars *v,
for (i = nverified + 1; i <= k; i++)
{
+ /* zap any match data from a non-last iteration */
zaptreesubs(v, t->child);
er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
if (er == REG_OKAY)