diff options
Diffstat (limited to 'src/backend/regex')
-rw-r--r-- | src/backend/regex/regc_lex.c | 5 | ||||
-rw-r--r-- | src/backend/regex/regcomp.c | 68 | ||||
-rw-r--r-- | src/backend/regex/regexec.c | 36 |
3 files changed, 80 insertions, 29 deletions
diff --git a/src/backend/regex/regc_lex.c b/src/backend/regex/regc_lex.c index 7673dab76f4..45727ffa01f 100644 --- a/src/backend/regex/regc_lex.c +++ b/src/backend/regex/regc_lex.c @@ -528,10 +528,7 @@ next(struct vars *v) } assert(NOTREACHED); } - if (v->cflags & REG_NOSUB) - RETV('(', 0); /* all parens non-capturing */ - else - RETV('(', 1); + RETV('(', 1); break; case CHR(')'): if (LASTTYPE('(')) diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index 60a220c57ab..ae3a7b6a38c 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -65,7 +65,7 @@ static struct subre *subre(struct vars *, int, int, struct state *, struct state static void freesubre(struct vars *, struct subre *); static void freesubreandsiblings(struct vars *, struct subre *); static void freesrnode(struct vars *, struct subre *); -static void optst(struct vars *, struct subre *); +static void removecaptures(struct vars *, struct subre *); static int numst(struct subre *, int); static void markst(struct subre *); static void cleanst(struct vars *); @@ -431,7 +431,8 @@ pg_regcomp(regex_t *re, dumpst(v->tree, debug, 1); } #endif - optst(v, v->tree); + if (v->cflags & REG_NOSUB) + removecaptures(v, v->tree); v->ntree = numst(v->tree, 1); markst(v->tree); cleanst(v); @@ -1013,14 +1014,16 @@ parseqatom(struct vars *v, break; case BACKREF: /* the Feature From The Black Lagoon */ INSIST(type != LACON, REG_ESUBREG); - INSIST(v->nextvalue < v->nsubs, REG_ESUBREG); - INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG); + subno = v->nextvalue; + assert(subno > 0); + INSIST(subno < v->nsubs, REG_ESUBREG); + NOERRN(); + INSIST(v->subs[subno] != NULL, REG_ESUBREG); NOERRN(); - assert(v->nextvalue > 0); atom = subre(v, 'b', BACKR, lp, rp); NOERRN(); - subno = v->nextvalue; atom->backno = subno; + v->subs[subno]->flags |= BRUSE; EMPTYARC(lp, rp); /* temporarily, so there's something */ NEXT(); break; @@ -2141,19 +2144,54 @@ freesrnode(struct vars *v, /* might be NULL */ } /* - * optst - optimize a subRE subtree + * removecaptures - remove unnecessary capture subREs + * + * If the caller said that it doesn't care about subexpression match data, + * we may delete the "capture" markers on subREs that are not referenced + * by any backrefs, and then simplify anything that's become non-messy. + * Call this only if REG_NOSUB flag is set. */ static void -optst(struct vars *v, - struct subre *t) +removecaptures(struct vars *v, + struct subre *t) { + struct subre *t2; + + assert(t != NULL); + + /* + * If this isn't itself a backref target, clear capno and tentatively + * clear CAP flag. + */ + if (!(t->flags & BRUSE)) + { + t->capno = 0; + t->flags &= ~CAP; + } + + /* Now recurse to children */ + for (t2 = t->child; t2 != NULL; t2 = t2->sibling) + { + removecaptures(v, t2); + /* Propagate child CAP flag back up, if it's still set */ + if (t2->flags & CAP) + t->flags |= CAP; + } + /* - * DGP (2007-11-13): I assume it was the programmer's intent to eventually - * come back and add code to optimize subRE trees, but the routine coded - * just spends effort traversing the tree and doing nothing. We can do - * nothing with less effort. + * If t now contains neither captures nor backrefs, there's no longer any + * need to care where its sub-match boundaries are, so we can reduce it to + * a simple DFA node. (Note in particular that MIXED child greediness is + * not a hindrance here, so we don't use the MESSY() macro.) */ - return; + if ((t->flags & (CAP | BACKR)) == 0) + { + if (t->child) + freesubreandsiblings(v, t->child); + t->child = NULL; + t->op = '='; + t->flags &= ~MIXED; + } } /* @@ -2501,6 +2539,8 @@ stdump(struct subre *t, fprintf(f, " hascapture"); if (t->flags & BACKR) fprintf(f, " hasbackref"); + if (t->flags & BRUSE) + fprintf(f, " isreferenced"); if (!(t->flags & INUSE)) fprintf(f, " UNUSED"); if (t->latype != (char) -1) diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c index 5b9a0878203..db54ebfba40 100644 --- a/src/backend/regex/regexec.c +++ b/src/backend/regex/regexec.c @@ -213,12 +213,9 @@ pg_regexec(regex_t *re, return REG_NOMATCH; backref = (v->g->info & REG_UBACKREF) ? 1 : 0; v->eflags = flags; - if (v->g->cflags & REG_NOSUB) - nmatch = 0; /* override client */ - v->nmatch = nmatch; - if (backref) + if (backref && nmatch <= v->g->nsub) { - /* need work area */ + /* need larger work area */ if (v->g->nsub + 1 <= LOCALMAT) v->pmatch = mat; else @@ -229,7 +226,17 @@ pg_regexec(regex_t *re, v->nmatch = v->g->nsub + 1; } else + { + /* we can store results directly in caller's array */ v->pmatch = pmatch; + /* ensure any extra entries in caller's array are filled with -1 */ + if (nmatch > 0) + zapallsubs(pmatch, nmatch); + /* then forget about extra entries, to avoid useless work in find() */ + if (nmatch > v->g->nsub + 1) + nmatch = v->g->nsub + 1; + v->nmatch = nmatch; + } v->details = details; v->start = (chr *) string; v->search_start = (chr *) string + search_start; @@ -290,12 +297,20 @@ pg_regexec(regex_t *re, else st = find(v, &v->g->tree->cnfa, &v->g->cmap); - /* copy (portion of) match vector over if necessary */ - if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) + /* on success, ensure caller's match vector is filled correctly */ + if (st == REG_OKAY && nmatch > 0) { - zapallsubs(pmatch, nmatch); - n = (nmatch < v->nmatch) ? nmatch : v->nmatch; - memcpy(VS(pmatch), VS(v->pmatch), n * sizeof(regmatch_t)); + if (v->pmatch != pmatch) + { + /* copy portion of match vector over from (larger) work area */ + assert(nmatch <= v->nmatch); + memcpy(VS(pmatch), VS(v->pmatch), nmatch * sizeof(regmatch_t)); + } + if (v->g->cflags & REG_NOSUB) + { + /* don't expose possibly-partial sub-match results to caller */ + zapallsubs(pmatch, nmatch); + } } /* clean up */ @@ -752,7 +767,6 @@ cdissect(struct vars *v, break; case '(': /* no-op capture node */ assert(t->child != NULL); - assert(t->capno > 0); er = cdissect(v, t->child, begin, end); break; default: |