aboutsummaryrefslogtreecommitdiff
path: root/src/backend/regex/regcomp.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/regex/regcomp.c')
-rw-r--r--src/backend/regex/regcomp.c152
1 files changed, 126 insertions, 26 deletions
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
index b733bc7824e..aa759c26486 100644
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -57,6 +57,8 @@ static const chr *scanplain(struct vars *);
static void onechr(struct vars *, chr, struct state *, struct state *);
static void dovec(struct vars *, struct cvec *, struct state *, struct state *);
static void wordchrs(struct vars *);
+static void processlacon(struct vars *, struct state *, struct state *, int,
+ struct state *, struct state *);
static struct subre *subre(struct vars *, int, int, struct state *, struct state *);
static void freesubre(struct vars *, struct subre *);
static void freesrnode(struct vars *, struct subre *);
@@ -65,7 +67,7 @@ static int numst(struct subre *, int);
static void markst(struct subre *);
static void cleanst(struct vars *);
static long nfatree(struct vars *, struct subre *, FILE *);
-static long nfanode(struct vars *, struct subre *, FILE *);
+static long nfanode(struct vars *, struct subre *, int, FILE *);
static int newlacon(struct vars *, struct state *, struct state *, int);
static void freelacons(struct subre *, int);
static void rfree(regex_t *);
@@ -146,6 +148,7 @@ static void deltraverse(struct nfa *, struct state *, struct state *);
static void dupnfa(struct nfa *, struct state *, struct state *, struct state *, struct state *);
static void duptraverse(struct nfa *, struct state *, struct state *);
static void cleartraverse(struct nfa *, struct state *);
+static struct state *single_color_transition(struct state *, struct state *);
static void specialcolors(struct nfa *);
static long optimize(struct nfa *, FILE *);
static void pullback(struct nfa *, FILE *);
@@ -245,8 +248,9 @@ struct vars
int ntree; /* number of tree nodes, plus one */
struct cvec *cv; /* interface cvec */
struct cvec *cv2; /* utility cvec */
- struct subre *lacons; /* lookahead-constraint vector */
- int nlacons; /* size of lacons */
+ struct subre *lacons; /* lookaround-constraint vector */
+ int nlacons; /* size of lacons[]; note that only slots
+ * numbered 1 .. nlacons-1 are used */
size_t spaceused; /* approx. space used for compilation */
};
@@ -277,7 +281,7 @@ struct vars
#define CCLASS 'C' /* start of [: */
#define END 'X' /* end of [. [= [: */
#define RANGE 'R' /* - within [] which might be range delim. */
-#define LACON 'L' /* lookahead constraint subRE */
+#define LACON 'L' /* lookaround constraint subRE */
#define AHEAD 'a' /* color-lookahead arc */
#define BEHIND 'r' /* color-lookbehind arc */
#define WBDRY 'w' /* word boundary constraint */
@@ -432,11 +436,15 @@ pg_regcomp(regex_t *re,
assert(v->nlacons == 0 || v->lacons != NULL);
for (i = 1; i < v->nlacons; i++)
{
+ struct subre *lasub = &v->lacons[i];
+
#ifdef REG_DEBUG
if (debug != NULL)
fprintf(debug, "\n\n\n========= LA%d ==========\n", i);
#endif
- nfanode(v, &v->lacons[i], debug);
+
+ /* Prepend .* to pattern if it's a lookbehind LACON */
+ nfanode(v, lasub, !LATYPE_IS_AHEAD(lasub->subno), debug);
}
CNOERR();
if (v->tree->flags & SHORTER)
@@ -640,7 +648,7 @@ makesearch(struct vars * v,
static struct subre *
parse(struct vars * v,
int stopper, /* EOS or ')' */
- int type, /* LACON (lookahead subRE) or PLAIN */
+ int type, /* LACON (lookaround subRE) or PLAIN */
struct state * init, /* initial state */
struct state * final) /* final state */
{
@@ -719,7 +727,7 @@ parse(struct vars * v,
static struct subre *
parsebranch(struct vars * v,
int stopper, /* EOS or ')' */
- int type, /* LACON (lookahead subRE) or PLAIN */
+ int type, /* LACON (lookaround subRE) or PLAIN */
struct state * left, /* leftmost state */
struct state * right, /* rightmost state */
int partial) /* is this only part of a branch? */
@@ -768,7 +776,7 @@ parsebranch(struct vars * v,
static void
parseqatom(struct vars * v,
int stopper, /* EOS or ')' */
- int type, /* LACON (lookahead subRE) or PLAIN */
+ int type, /* LACON (lookaround subRE) or PLAIN */
struct state * lp, /* left state to hang it on */
struct state * rp, /* right state to hang it on */
struct subre * top) /* subtree top */
@@ -782,7 +790,7 @@ parseqatom(struct vars * v,
struct subre *atom; /* atom's subtree */
struct subre *t;
int cap; /* capturing parens? */
- int pos; /* positive lookahead? */
+ int latype; /* lookaround constraint type */
int subno; /* capturing-parens or backref number */
int atomtype;
int qprefer; /* quantifier short/long preference */
@@ -866,19 +874,18 @@ parseqatom(struct vars * v,
nonword(v, AHEAD, s, rp);
return;
break;
- case LACON: /* lookahead constraint */
- pos = v->nextvalue;
+ case LACON: /* lookaround constraint */
+ latype = v->nextvalue;
NEXT();
s = newstate(v->nfa);
s2 = newstate(v->nfa);
NOERR();
t = parse(v, ')', LACON, s, s2);
freesubre(v, t); /* internal structure irrelevant */
- assert(SEE(')') || ISERR());
- NEXT();
- n = newlacon(v, s, s2, pos);
NOERR();
- ARCV(LACON, n);
+ assert(SEE(')'));
+ NEXT();
+ processlacon(v, s, s2, latype, lp, rp);
return;
break;
/* then errors, to get them out of the way */
@@ -1634,6 +1641,75 @@ wordchrs(struct vars * v)
}
/*
+ * processlacon - generate the NFA representation of a LACON
+ *
+ * In the general case this is just newlacon() + newarc(), but some cases
+ * can be optimized.
+ */
+static void
+processlacon(struct vars * v,
+ struct state * begin, /* start of parsed LACON sub-re */
+ struct state * end, /* end of parsed LACON sub-re */
+ int latype,
+ struct state * lp, /* left state to hang it on */
+ struct state * rp) /* right state to hang it on */
+{
+ struct state *s1;
+ int n;
+
+ /*
+ * Check for lookaround RE consisting of a single plain color arc (or set
+ * of arcs); this would typically be a simple chr or a bracket expression.
+ */
+ s1 = single_color_transition(begin, end);
+ switch (latype)
+ {
+ case LATYPE_AHEAD_POS:
+ /* If lookahead RE is just colorset C, convert to AHEAD(C) */
+ if (s1 != NULL)
+ {
+ cloneouts(v->nfa, s1, lp, rp, AHEAD);
+ return;
+ }
+ break;
+ case LATYPE_AHEAD_NEG:
+ /* If lookahead RE is just colorset C, convert to AHEAD(^C)|$ */
+ if (s1 != NULL)
+ {
+ colorcomplement(v->nfa, v->cm, AHEAD, s1, lp, rp);
+ newarc(v->nfa, '$', 1, lp, rp);
+ newarc(v->nfa, '$', 0, lp, rp);
+ return;
+ }
+ break;
+ case LATYPE_BEHIND_POS:
+ /* If lookbehind RE is just colorset C, convert to BEHIND(C) */
+ if (s1 != NULL)
+ {
+ cloneouts(v->nfa, s1, lp, rp, BEHIND);
+ return;
+ }
+ break;
+ case LATYPE_BEHIND_NEG:
+ /* If lookbehind RE is just colorset C, convert to BEHIND(^C)|^ */
+ if (s1 != NULL)
+ {
+ colorcomplement(v->nfa, v->cm, BEHIND, s1, lp, rp);
+ newarc(v->nfa, '^', 1, lp, rp);
+ newarc(v->nfa, '^', 0, lp, rp);
+ return;
+ }
+ break;
+ default:
+ assert(NOTREACHED);
+ }
+
+ /* General case: we need a LACON subre and arc */
+ n = newlacon(v, begin, end, latype);
+ newarc(v->nfa, LACON, n, lp, rp);
+}
+
+/*
* subre - allocate a subre
*/
static struct subre *
@@ -1826,15 +1902,18 @@ nfatree(struct vars * v,
if (t->right != NULL)
(DISCARD) nfatree(v, t->right, f);
- return nfanode(v, t, f);
+ return nfanode(v, t, 0, f);
}
/*
- * nfanode - do one NFA for nfatree
+ * nfanode - do one NFA for nfatree or lacons
+ *
+ * If converttosearch is true, apply makesearch() to the NFA.
*/
static long /* optimize results */
nfanode(struct vars * v,
struct subre * t,
+ int converttosearch,
FILE *f) /* for debug output */
{
struct nfa *nfa;
@@ -1855,10 +1934,11 @@ nfanode(struct vars * v,
NOERRZ();
dupnfa(nfa, t->begin, t->end, nfa->init, nfa->final);
if (!ISERR())
- {
specialcolors(nfa);
+ if (!ISERR())
ret = optimize(nfa, f);
- }
+ if (converttosearch && !ISERR())
+ makesearch(v, nfa);
if (!ISERR())
compact(nfa, &t->cnfa);
@@ -1867,13 +1947,13 @@ nfanode(struct vars * v,
}
/*
- * newlacon - allocate a lookahead-constraint subRE
+ * newlacon - allocate a lookaround-constraint subRE
*/
static int /* lacon number */
newlacon(struct vars * v,
struct state * begin,
struct state * end,
- int pos)
+ int latype)
{
int n;
struct subre *newlacons;
@@ -1900,13 +1980,13 @@ newlacon(struct vars * v,
sub = &v->lacons[n];
sub->begin = begin;
sub->end = end;
- sub->subno = pos;
+ sub->subno = latype;
ZAPCNFA(sub->cnfa);
return n;
}
/*
- * freelacons - free lookahead-constraint subRE vector
+ * freelacons - free lookaround-constraint subRE vector
*/
static void
freelacons(struct subre * subs,
@@ -2020,9 +2100,29 @@ dump(regex_t *re,
}
for (i = 1; i < g->nlacons; i++)
{
- fprintf(f, "\nla%d (%s):\n", i,
- (g->lacons[i].subno) ? "positive" : "negative");
- dumpcnfa(&g->lacons[i].cnfa, f);
+ struct subre *lasub = &g->lacons[i];
+ const char *latype;
+
+ switch (lasub->subno)
+ {
+ case LATYPE_AHEAD_POS:
+ latype = "positive lookahead";
+ break;
+ case LATYPE_AHEAD_NEG:
+ latype = "negative lookahead";
+ break;
+ case LATYPE_BEHIND_POS:
+ latype = "positive lookbehind";
+ break;
+ case LATYPE_BEHIND_NEG:
+ latype = "negative lookbehind";
+ break;
+ default:
+ latype = "???";
+ break;
+ }
+ fprintf(f, "\nla%d (%s):\n", i, latype);
+ dumpcnfa(&lasub->cnfa, f);
}
fprintf(f, "\n");
dumpst(g->tree, f, 0);