diff options
Diffstat (limited to 'src/backend/regex/regexec.c')
-rw-r--r-- | src/backend/regex/regexec.c | 48 |
1 files changed, 35 insertions, 13 deletions
diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c index 694a03965c0..d18672c7c7c 100644 --- a/src/backend/regex/regexec.c +++ b/src/backend/regex/regexec.c @@ -448,6 +448,7 @@ cfindloop(struct vars * v, close = v->search_start; do { + /* Search with the search RE for match range at/beyond "close" */ MDEBUG(("\ncsearch at %ld\n", LOFF(close))); close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL); if (ISERR()) @@ -456,10 +457,11 @@ cfindloop(struct vars * v, return v->err; } if (close == NULL) - break; /* NOTE BREAK */ + break; /* no more possible match anywhere */ assert(cold != NULL); open = cold; cold = NULL; + /* Search for matches starting between "open" and "close" inclusive */ MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close))); for (begin = open; begin <= close; begin++) { @@ -468,6 +470,7 @@ cfindloop(struct vars * v, estop = v->stop; for (;;) { + /* Here we use the top node's detailed RE */ if (shorter) end = shortest(v, d, begin, estart, estop, (chr **) NULL, &hitend); @@ -482,8 +485,9 @@ cfindloop(struct vars * v, if (hitend && cold == NULL) cold = begin; if (end == NULL) - break; /* NOTE BREAK OUT */ + break; /* no match with this begin point, try next */ MDEBUG(("tentative end %ld\n", LOFF(end))); + /* Dissect the potential match to see if it really matches */ zapallsubs(v->pmatch, v->nmatch); er = cdissect(v, v->g->tree, begin, end); if (er == REG_OKAY) @@ -502,21 +506,28 @@ cfindloop(struct vars * v, *coldp = cold; return er; } - /* try next shorter/longer match with same begin point */ + /* Try next longer/shorter match with same begin point */ if (shorter) { if (end == estop) - break; /* NOTE BREAK OUT */ + break; /* no more, so try next begin point */ estart = end + 1; } else { if (end == begin) - break; /* NOTE BREAK OUT */ + break; /* no more, so try next begin point */ estop = end - 1; } } /* end loop over endpoint positions */ } /* end loop over beginning positions */ + + /* + * If we get here, there is no possible match starting at or before + * "close", so consider matches beyond that. We'll do a fresh search + * with the search RE to find a new promising match range. + */ + close++; } while (close < v->stop); *coldp = cold; @@ -963,17 +974,17 @@ citerdissect(struct vars * v, assert(begin <= end); /* - * If zero matches are allowed, and target string is empty, just declare - * victory. OTOH, if target string isn't empty, zero matches can't work - * so we pretend the min is 1. + * For the moment, assume the minimum number of matches is 1. If zero + * matches are allowed, and the target string is empty, we are allowed to + * match regardless of the contents of the iter node --- but we would + * prefer to match once, so that capturing parens get set. (An example of + * the concern here is a pattern like "()*\1", which historically this + * code has allowed to succeed.) Therefore, we deal with the zero-matches + * case at the bottom, after failing to find any other way to match. */ min_matches = t->min; if (min_matches <= 0) - { - if (begin == end) - return REG_OKAY; min_matches = 1; - } /* * We need workspace to track the endpoints of each sub-match. Normally @@ -1123,8 +1134,19 @@ backtrack: } /* all possibilities exhausted */ - MDEBUG(("%d failed\n", t->id)); FREE(endpts); + + /* + * Now consider the possibility that we can match to a zero-length string + * by using zero repetitions. + */ + if (t->min == 0 && begin == end) + { + MDEBUG(("%d allowing zero matches\n", t->id)); + return REG_OKAY; + } + + MDEBUG(("%d failed\n", t->id)); return REG_NOMATCH; } |