diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2015-10-02 13:45:39 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2015-10-02 13:45:39 -0400 |
commit | da8ff292026482d81a4b5068c90b508b5f96475d (patch) | |
tree | d18719344ecda4a9cc6a389fff07c224b37b0f1c /src/backend/regex/regexec.c | |
parent | 3b0c1d9573d47b3dad208dc055e0d0bd45d54f16 (diff) | |
download | postgresql-da8ff292026482d81a4b5068c90b508b5f96475d.tar.gz postgresql-da8ff292026482d81a4b5068c90b508b5f96475d.zip |
Add some more query-cancel checks to regular expression matching.
Commit 9662143f0c35d64d7042fbeaf879df8f0b54be32 added infrastructure to
allow regular-expression operations to be terminated early in the event
of SIGINT etc. However, fuzz testing by Greg Stark disclosed that there
are still cases where regex compilation could run for a long time without
noticing a cancel request. Specifically, the fixempties() phase never
adds new states, only new arcs, so it doesn't hit the cancel check I'd put
in newstate(). Add one to newarc() as well to cover that.
Some experimentation of my own found that regex execution could also run
for a long time despite a pending cancel. We'd put a high-level cancel
check into cdissect(), but there was none inside the core text-matching
routines longest() and shortest(). Ordinarily those inner loops are very
very fast ... but in the presence of lookahead constraints, not so much.
As a compromise, stick a cancel check into the stateset cache-miss
function, which is enough to guarantee a cancel check at least once per
lookahead constraint test.
Making this work required more attention to error handling throughout the
regex executor. Henry Spencer had apparently originally intended longest()
and shortest() to be incapable of incurring errors while running, so
neither they nor their subroutines had well-defined error reporting
behaviors. However, that was already broken by the lookahead constraint
feature, since lacon() can surely suffer an out-of-memory failure ---
which, in the code as it stood, might never be reported to the user at all,
but just silently be treated as a non-match of the lookahead constraint.
Normalize all that by inserting explicit error tests as needed. I took the
opportunity to add some more comments to the code, too.
Back-patch to all supported branches, like the previous patch.
Diffstat (limited to 'src/backend/regex/regexec.c')
-rw-r--r-- | src/backend/regex/regexec.c | 37 |
1 files changed, 31 insertions, 6 deletions
diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c index 7b98b18da7e..ee16a5ef412 100644 --- a/src/backend/regex/regexec.c +++ b/src/backend/regex/regexec.c @@ -428,6 +428,11 @@ cfindloop(struct vars * v, { MDEBUG(("\ncsearch at %ld\n", LOFF(close))); close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL); + if (ISERR()) + { + *coldp = cold; + return v->err; + } if (close == NULL) break; /* NOTE BREAK */ assert(cold != NULL); @@ -447,6 +452,11 @@ cfindloop(struct vars * v, else end = longest(v, d, begin, estop, &hitend); + if (ISERR()) + { + *coldp = cold; + return v->err; + } if (hitend && cold == NULL) cold = begin; if (end == NULL) @@ -626,15 +636,22 @@ condissect(struct vars * v, mid = longest(v, d, begin, end, (int *) NULL); if (mid == NULL) { + ERR(REG_ASSERT); /* if no other error reported already */ freedfa(d); freedfa(d2); - return REG_ASSERT; + return v->err; } MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); /* iterate until satisfaction or failure */ while (longest(v, d2, mid, end, (int *) NULL) != end) { + if (ISERR()) + { + freedfa(d); + freedfa(d2); + return v->err; + } /* that midpoint didn't work, find a new one */ if (mid == stop) { @@ -653,9 +670,10 @@ condissect(struct vars * v, { /* failed to find a new one! */ MDEBUG(("failed midpoint!\n")); + ERR(REG_ASSERT); /* if no other error reported already */ freedfa(d); freedfa(d2); - return REG_ASSERT; + return v->err; } MDEBUG(("new midpoint %ld\n", LOFF(mid))); } @@ -690,8 +708,7 @@ altdissect(struct vars * v, MDEBUG(("trying %dth\n", i)); assert(t->left != NULL && t->left->cnfa.nstates > 0); d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); - if (ISERR()) - return v->err; + NOERR(); if (longest(v, d, begin, end, (int *) NULL) == end) { MDEBUG(("success\n")); @@ -699,6 +716,7 @@ altdissect(struct vars * v, return dissect(v, t->left, begin, end); } freedfa(d); + NOERR(); } return REG_ASSERT; /* none of them matched?!? */ } @@ -804,6 +822,7 @@ ccondissect(struct vars * v, { freedfa(d); freedfa(d2); + NOERR(); return REG_NOMATCH; } MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); @@ -843,12 +862,13 @@ ccondissect(struct vars * v, } /* that midpoint didn't work, find a new one */ - if (mid == begin) + if (ISERR() || mid == begin) { /* all possibilities exhausted */ MDEBUG(("%d no midpoint\n", t->retry)); freedfa(d); freedfa(d2); + NOERR(); return REG_NOMATCH; } mid = longest(v, d, begin, mid - 1, (int *) NULL); @@ -858,6 +878,7 @@ ccondissect(struct vars * v, MDEBUG(("%d failed midpoint\n", t->retry)); freedfa(d); freedfa(d2); + NOERR(); return REG_NOMATCH; } MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid))); @@ -911,6 +932,7 @@ crevdissect(struct vars * v, { freedfa(d); freedfa(d2); + NOERR(); return REG_NOMATCH; } MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); @@ -950,12 +972,13 @@ crevdissect(struct vars * v, } /* that midpoint didn't work, find a new one */ - if (mid == end) + if (ISERR() || mid == end) { /* all possibilities exhausted */ MDEBUG(("%d no midpoint\n", t->retry)); freedfa(d); freedfa(d2); + NOERR(); return REG_NOMATCH; } mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL); @@ -965,6 +988,7 @@ crevdissect(struct vars * v, MDEBUG(("%d failed midpoint\n", t->retry)); freedfa(d); freedfa(d2); + NOERR(); return REG_NOMATCH; } MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid))); @@ -1096,6 +1120,7 @@ caltdissect(struct vars * v, if (longest(v, d, begin, end, (int *) NULL) != end) { freedfa(d); + NOERR(); v->mem[t->retry] = TRIED; return caltdissect(v, t->right, begin, end); } |