diff options
author | Bruce Momjian <bruce@momjian.us> | 2003-08-04 00:43:34 +0000 |
---|---|---|
committer | Bruce Momjian <bruce@momjian.us> | 2003-08-04 00:43:34 +0000 |
commit | 089003fb462fcce46c02bf47322b429f73c33c50 (patch) | |
tree | 77d78bc3a149df06f5603f60200a6ab363336624 /src/backend/regex | |
parent | 63354a0228a1dbc4a0d5ddc8ecdd8326349d2100 (diff) | |
download | postgresql-089003fb462fcce46c02bf47322b429f73c33c50.tar.gz postgresql-089003fb462fcce46c02bf47322b429f73c33c50.zip |
pgindent run.
Diffstat (limited to 'src/backend/regex')
-rw-r--r-- | src/backend/regex/regc_color.c | 412 | ||||
-rw-r--r-- | src/backend/regex/regc_cvec.c | 197 | ||||
-rw-r--r-- | src/backend/regex/regc_lex.c | 1492 | ||||
-rw-r--r-- | src/backend/regex/regc_locale.c | 982 | ||||
-rw-r--r-- | src/backend/regex/regc_nfa.c | 858 | ||||
-rw-r--r-- | src/backend/regex/regcomp.c | 1836 | ||||
-rw-r--r-- | src/backend/regex/rege_dfa.c | 416 | ||||
-rw-r--r-- | src/backend/regex/regerror.c | 118 | ||||
-rw-r--r-- | src/backend/regex/regexec.c | 776 | ||||
-rw-r--r-- | src/backend/regex/regfree.c | 16 |
10 files changed, 3897 insertions, 3206 deletions
diff --git a/src/backend/regex/regc_color.c b/src/backend/regex/regc_color.c index eb250556822..da2b79b5f09 100644 --- a/src/backend/regex/regc_color.c +++ b/src/backend/regex/regc_color.c @@ -2,21 +2,21 @@ * colorings of characters * This file is #included by regcomp.c. * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * + * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. - * + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -28,7 +28,7 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Header: /cvsroot/pgsql/src/backend/regex/regc_color.c,v 1.1 2003/02/05 17:41:32 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/regex/regc_color.c,v 1.2 2003/08/04 00:43:21 momjian Exp $ * * * Note that there are some incestuous relationships between this code and @@ -37,8 +37,8 @@ -#define CISERR() VISERR(cm->v) -#define CERR(e) VERR(cm->v, (e)) +#define CISERR() VISERR(cm->v) +#define CERR(e) VERR(cm->v, (e)) @@ -46,11 +46,11 @@ * initcm - set up new colormap */ static void -initcm(struct vars *v, - struct colormap *cm) +initcm(struct vars * v, + struct colormap * cm) { - int i; - int j; + int i; + int j; union tree *t; union tree *nextt; struct colordesc *cd; @@ -63,21 +63,22 @@ initcm(struct vars *v, cm->max = 0; cm->free = 0; - cd = cm->cd; /* cm->cd[WHITE] */ + cd = cm->cd; /* cm->cd[WHITE] */ cd->sub = NOSUB; cd->arcs = NULL; cd->flags = 0; cd->nchrs = CHR_MAX - CHR_MIN + 1; /* upper levels of tree */ - for (t = &cm->tree[0], j = NBYTS-1; j > 0; t = nextt, j--) { + for (t = &cm->tree[0], j = NBYTS - 1; j > 0; t = nextt, j--) + { nextt = t + 1; - for (i = BYTTAB-1; i >= 0; i--) + for (i = BYTTAB - 1; i >= 0; i--) t->tptr[i] = nextt; } /* bottom level is solid white */ - t = &cm->tree[NBYTS-1]; - for (i = BYTTAB-1; i >= 0; i--) + t = &cm->tree[NBYTS - 1]; + for (i = BYTTAB - 1; i >= 0; i--) t->tcolor[i] = WHITE; cd->block = t; } @@ -86,16 +87,17 @@ initcm(struct vars *v, * freecm - free dynamically-allocated things in a colormap */ static void -freecm(struct colormap *cm) +freecm(struct colormap * cm) { - size_t i; + size_t i; union tree *cb; cm->magic = 0; if (NBYTS > 1) cmtreefree(cm, cm->tree, 0); for (i = 1; i <= cm->max; i++) /* skip WHITE */ - if (!UNUSEDCOLOR(&cm->cd[i])) { + if (!UNUSEDCOLOR(&cm->cd[i])) + { cb = cm->cd[i].block; if (cb != NULL) FREE(cb); @@ -108,24 +110,29 @@ freecm(struct colormap *cm) * cmtreefree - free a non-terminal part of a colormap tree */ static void -cmtreefree(struct colormap *cm, - union tree *tree, +cmtreefree(struct colormap * cm, + union tree * tree, int level) /* level number (top == 0) of this block */ { - int i; + int i; union tree *t; - union tree *fillt = &cm->tree[level+1]; + union tree *fillt = &cm->tree[level + 1]; union tree *cb; - assert(level < NBYTS-1); /* this level has pointers */ - for (i = BYTTAB-1; i >= 0; i--) { + assert(level < NBYTS - 1); /* this level has pointers */ + for (i = BYTTAB - 1; i >= 0; i--) + { t = tree->tptr[i]; assert(t != NULL); - if (t != fillt) { - if (level < NBYTS-2) { /* more pointer blocks below */ - cmtreefree(cm, t, level+1); + if (t != fillt) + { + if (level < NBYTS - 2) + { /* more pointer blocks below */ + cmtreefree(cm, t, level + 1); FREE(t); - } else { /* color block below */ + } + else + { /* color block below */ cb = cm->cd[t->tcolor[0]].block; if (t != cb) /* not a solid block */ FREE(t); @@ -137,22 +144,22 @@ cmtreefree(struct colormap *cm, /* * setcolor - set the color of a character in a colormap */ -static color /* previous color */ -setcolor(struct colormap *cm, +static color /* previous color */ +setcolor(struct colormap * cm, chr c, pcolor co) { - uchr uc = c; - int shift; - int level; - int b; - int bottom; + uchr uc = c; + int shift; + int level; + int b; + int bottom; union tree *t; union tree *newt; union tree *fillt; union tree *lastt; union tree *cb; - color prev; + color prev; assert(cm->magic == CMMAGIC); if (CISERR() || co == COLORLESS) @@ -160,27 +167,30 @@ setcolor(struct colormap *cm, t = cm->tree; for (level = 0, shift = BYTBITS * (NBYTS - 1); shift > 0; - level++, shift -= BYTBITS) { + level++, shift -= BYTBITS) + { b = (uc >> shift) & BYTMASK; lastt = t; t = lastt->tptr[b]; assert(t != NULL); - fillt = &cm->tree[level+1]; + fillt = &cm->tree[level + 1]; bottom = (shift <= BYTBITS) ? 1 : 0; cb = (bottom) ? cm->cd[t->tcolor[0]].block : fillt; - if (t == fillt || t == cb) { /* must allocate a new block */ - newt = (union tree *)MALLOC((bottom) ? - sizeof(struct colors) : sizeof(struct ptrs)); - if (newt == NULL) { + if (t == fillt || t == cb) + { /* must allocate a new block */ + newt = (union tree *) MALLOC((bottom) ? + sizeof(struct colors) : sizeof(struct ptrs)); + if (newt == NULL) + { CERR(REG_ESPACE); return COLORLESS; } if (bottom) memcpy(VS(newt->tcolor), VS(t->tcolor), - BYTTAB*sizeof(color)); + BYTTAB * sizeof(color)); else memcpy(VS(newt->tptr), VS(t->tptr), - BYTTAB*sizeof(union tree *)); + BYTTAB * sizeof(union tree *)); t = newt; lastt->tptr[b] = t; } @@ -188,7 +198,7 @@ setcolor(struct colormap *cm, b = uc & BYTMASK; prev = t->tcolor[b]; - t->tcolor[b] = (color)co; + t->tcolor[b] = (color) co; return prev; } @@ -196,51 +206,59 @@ setcolor(struct colormap *cm, * maxcolor - report largest color number in use */ static color -maxcolor(struct colormap *cm) +maxcolor(struct colormap * cm) { if (CISERR()) return COLORLESS; - return (color)cm->max; + return (color) cm->max; } /* * newcolor - find a new color (must be subject of setcolor at once) - * Beware: may relocate the colordescs. + * Beware: may relocate the colordescs. */ -static color /* COLORLESS for error */ -newcolor(struct colormap *cm) +static color /* COLORLESS for error */ +newcolor(struct colormap * cm) { struct colordesc *cd; struct colordesc *new; - size_t n; + size_t n; if (CISERR()) return COLORLESS; - if (cm->free != 0) { + if (cm->free != 0) + { assert(cm->free > 0); - assert((size_t)cm->free < cm->ncds); + assert((size_t) cm->free < cm->ncds); cd = &cm->cd[cm->free]; assert(UNUSEDCOLOR(cd)); assert(cd->arcs == NULL); cm->free = cd->sub; - } else if (cm->max < cm->ncds - 1) { + } + else if (cm->max < cm->ncds - 1) + { cm->max++; cd = &cm->cd[cm->max]; - } else { + } + else + { /* oops, must allocate more */ n = cm->ncds * 2; - if (cm->cd == cm->cdspace) { - new = (struct colordesc *)MALLOC(n * - sizeof(struct colordesc)); + if (cm->cd == cm->cdspace) + { + new = (struct colordesc *) MALLOC(n * + sizeof(struct colordesc)); if (new != NULL) memcpy(VS(new), VS(cm->cdspace), cm->ncds * - sizeof(struct colordesc)); - } else - new = (struct colordesc *)REALLOC(cm->cd, - n * sizeof(struct colordesc)); - if (new == NULL) { + sizeof(struct colordesc)); + } + else + new = (struct colordesc *) REALLOC(cm->cd, + n * sizeof(struct colordesc)); + if (new == NULL) + { CERR(REG_ESPACE); return COLORLESS; } @@ -257,18 +275,19 @@ newcolor(struct colormap *cm) cd->flags = 0; cd->block = NULL; - return (color)(cd - cm->cd); + return (color) (cd - cm->cd); } /* * freecolor - free a color (must have no arcs or subcolor) */ static void -freecolor(struct colormap *cm, +freecolor(struct colormap * cm, pcolor co) { struct colordesc *cd = &cm->cd[co]; - color pco, nco; /* for freelist scan */ + color pco, + nco; /* for freelist scan */ assert(co >= 0); if (co == WHITE) @@ -278,35 +297,43 @@ freecolor(struct colormap *cm, assert(cd->sub == NOSUB); assert(cd->nchrs == 0); cd->flags = FREECOL; - if (cd->block != NULL) { + if (cd->block != NULL) + { FREE(cd->block); - cd->block = NULL; /* just paranoia */ + cd->block = NULL; /* just paranoia */ } - if ((size_t)co == cm->max) { + if ((size_t) co == cm->max) + { while (cm->max > WHITE && UNUSEDCOLOR(&cm->cd[cm->max])) cm->max--; assert(cm->free >= 0); - while ((size_t)cm->free > cm->max) + while ((size_t) cm->free > cm->max) cm->free = cm->cd[cm->free].sub; - if (cm->free > 0) { + if (cm->free > 0) + { assert(cm->free < cm->max); pco = cm->free; nco = cm->cd[pco].sub; while (nco > 0) - if ((size_t)nco > cm->max) { + if ((size_t) nco > cm->max) + { /* take this one out of freelist */ nco = cm->cd[nco].sub; cm->cd[pco].sub = nco; - } else { + } + else + { assert(nco < cm->max); pco = nco; nco = cm->cd[pco].sub; } } - } else { + } + else + { cd->sub = cm->free; - cm->free = (color)(cd - cm->cd); + cm->free = (color) (cd - cm->cd); } } @@ -314,9 +341,9 @@ freecolor(struct colormap *cm, * pseudocolor - allocate a false color, to be managed by other means */ static color -pseudocolor(struct colormap *cm) +pseudocolor(struct colormap * cm) { - color co; + color co; co = newcolor(cm); if (CISERR()) @@ -330,10 +357,10 @@ pseudocolor(struct colormap *cm) * subcolor - allocate a new subcolor (if necessary) to this chr */ static color -subcolor(struct colormap *cm, chr c) +subcolor(struct colormap * cm, chr c) { - color co; /* current color of c */ - color sco; /* new subcolor */ + color co; /* current color of c */ + color sco; /* new subcolor */ co = GETCOLOR(cm, c); sco = newsub(cm, co); @@ -341,8 +368,8 @@ subcolor(struct colormap *cm, chr c) return COLORLESS; assert(sco != COLORLESS); - if (co == sco) /* already in an open subcolor */ - return co; /* rest is redundant */ + if (co == sco) /* already in an open subcolor */ + return co; /* rest is redundant */ cm->cd[co].nchrs--; cm->cd[sco].nchrs++; setcolor(cm, c, sco); @@ -353,17 +380,19 @@ subcolor(struct colormap *cm, chr c) * newsub - allocate a new subcolor (if necessary) for a color */ static color -newsub(struct colormap *cm, +newsub(struct colormap * cm, pcolor co) { - color sco; /* new subcolor */ + color sco; /* new subcolor */ sco = cm->cd[co].sub; - if (sco == NOSUB) { /* color has no open subcolor */ - if (cm->cd[co].nchrs == 1) /* optimization */ + if (sco == NOSUB) + { /* color has no open subcolor */ + if (cm->cd[co].nchrs == 1) /* optimization */ return co; - sco = newcolor(cm); /* must create subcolor */ - if (sco == COLORLESS) { + sco = newcolor(cm); /* must create subcolor */ + if (sco == COLORLESS) + { assert(CISERR()); return COLORLESS; } @@ -379,23 +408,23 @@ newsub(struct colormap *cm, * subrange - allocate new subcolors to this range of chrs, fill in arcs */ static void -subrange(struct vars *v, +subrange(struct vars * v, chr from, chr to, - struct state *lp, - struct state *rp) + struct state * lp, + struct state * rp) { - uchr uf; - int i; + uchr uf; + int i; assert(from <= to); /* first, align "from" on a tree-block boundary */ - uf = (uchr)from; - i = (int)( ((uf + BYTTAB-1) & (uchr)~BYTMASK) - uf ); + uf = (uchr) from; + i = (int) (((uf + BYTTAB - 1) & (uchr) ~ BYTMASK) - uf); for (; from <= to && i > 0; i--, from++) newarc(v->nfa, PLAIN, subcolor(v->cm, from), lp, rp); - if (from > to) /* didn't reach a boundary */ + if (from > to) /* didn't reach a boundary */ return; /* deal with whole blocks */ @@ -411,25 +440,25 @@ subrange(struct vars *v, * subblock - allocate new subcolors for one tree block of chrs, fill in arcs */ static void -subblock(struct vars *v, +subblock(struct vars * v, chr start, /* first of BYTTAB chrs */ - struct state *lp, - struct state *rp) + struct state * lp, + struct state * rp) { - uchr uc = start; + uchr uc = start; struct colormap *cm = v->cm; - int shift; - int level; - int i; - int b; + int shift; + int level; + int i; + int b; union tree *t; union tree *cb; union tree *fillt; union tree *lastt; - int previ; - int ndone; - color co; - color sco; + int previ; + int ndone; + color co; + color sco; assert((uc % BYTTAB) == 0); @@ -437,20 +466,23 @@ subblock(struct vars *v, t = cm->tree; fillt = NULL; for (level = 0, shift = BYTBITS * (NBYTS - 1); shift > 0; - level++, shift -= BYTBITS) { + level++, shift -= BYTBITS) + { b = (uc >> shift) & BYTMASK; lastt = t; t = lastt->tptr[b]; assert(t != NULL); - fillt = &cm->tree[level+1]; - if (t == fillt && shift > BYTBITS) { /* need new ptr block */ - t = (union tree *)MALLOC(sizeof(struct ptrs)); - if (t == NULL) { + fillt = &cm->tree[level + 1]; + if (t == fillt && shift > BYTBITS) + { /* need new ptr block */ + t = (union tree *) MALLOC(sizeof(struct ptrs)); + if (t == NULL) + { CERR(REG_ESPACE); return; } memcpy(VS(t->tptr), VS(fillt->tptr), - BYTTAB*sizeof(union tree *)); + BYTTAB * sizeof(union tree *)); lastt->tptr[b] = t; } } @@ -458,13 +490,16 @@ subblock(struct vars *v, /* special cases: fill block or solid block */ co = t->tcolor[0]; cb = cm->cd[co].block; - if (t == fillt || t == cb) { + if (t == fillt || t == cb) + { /* either way, we want a subcolor solid block */ sco = newsub(cm, co); t = cm->cd[sco].block; - if (t == NULL) { /* must set it up */ - t = (union tree *)MALLOC(sizeof(struct colors)); - if (t == NULL) { + if (t == NULL) + { /* must set it up */ + t = (union tree *) MALLOC(sizeof(struct colors)); + if (t == NULL) + { CERR(REG_ESPACE); return; } @@ -482,12 +517,14 @@ subblock(struct vars *v, /* general case, a mixed block to be altered */ i = 0; - while (i < BYTTAB) { + while (i < BYTTAB) + { co = t->tcolor[i]; sco = newsub(cm, co); newarc(v->nfa, PLAIN, sco, lp, rp); previ = i; - do { + do + { t->tcolor[i++] = sco; } while (i < BYTTAB && t->tcolor[i] == co); ndone = i - previ; @@ -500,30 +537,37 @@ subblock(struct vars *v, * okcolors - promote subcolors to full colors */ static void -okcolors(struct nfa *nfa, - struct colormap *cm) +okcolors(struct nfa * nfa, + struct colormap * cm) { struct colordesc *cd; struct colordesc *end = CDEND(cm); struct colordesc *scd; struct arc *a; - color co; - color sco; + color co; + color sco; - for (cd = cm->cd, co = 0; cd < end; cd++, co++) { + for (cd = cm->cd, co = 0; cd < end; cd++, co++) + { sco = cd->sub; - if (UNUSEDCOLOR(cd) || sco == NOSUB) { + if (UNUSEDCOLOR(cd) || sco == NOSUB) + { /* has no subcolor, no further action */ - } else if (sco == co) { + } + else if (sco == co) + { /* is subcolor, let parent deal with it */ - } else if (cd->nchrs == 0) { + } + else if (cd->nchrs == 0) + { /* parent empty, its arcs change color to subcolor */ cd->sub = NOSUB; scd = &cm->cd[sco]; assert(scd->nchrs > 0); assert(scd->sub == sco); scd->sub = NOSUB; - while ((a = cd->arcs) != NULL) { + while ((a = cd->arcs) != NULL) + { assert(a->co == co); /* uncolorchain(cm, a); */ cd->arcs = a->colorchain; @@ -533,14 +577,17 @@ okcolors(struct nfa *nfa, scd->arcs = a; } freecolor(cm, co); - } else { + } + else + { /* parent's arcs must gain parallel subcolor arcs */ cd->sub = NOSUB; scd = &cm->cd[sco]; assert(scd->nchrs > 0); assert(scd->sub == sco); scd->sub = NOSUB; - for (a = cd->arcs; a != NULL; a = a->colorchain) { + for (a = cd->arcs; a != NULL; a = a->colorchain) + { assert(a->co == co); newarc(nfa, a->type, sco, a->from, a->to); } @@ -552,8 +599,8 @@ okcolors(struct nfa *nfa, * colorchain - add this arc to the color chain of its color */ static void -colorchain(struct colormap *cm, - struct arc *a) +colorchain(struct colormap * cm, + struct arc * a) { struct colordesc *cd = &cm->cd[a->co]; @@ -565,32 +612,33 @@ colorchain(struct colormap *cm, * uncolorchain - delete this arc from the color chain of its color */ static void -uncolorchain(struct colormap *cm, - struct arc *a) +uncolorchain(struct colormap * cm, + struct arc * a) { struct colordesc *cd = &cm->cd[a->co]; struct arc *aa; aa = cd->arcs; - if (aa == a) /* easy case */ + if (aa == a) /* easy case */ cd->arcs = a->colorchain; - else { + else + { for (; aa != NULL && aa->colorchain != a; aa = aa->colorchain) continue; assert(aa != NULL); aa->colorchain = a->colorchain; } - a->colorchain = NULL; /* paranoia */ + a->colorchain = NULL; /* paranoia */ } /* * singleton - is this character in its own color? */ -static int /* predicate */ -singleton(struct colormap *cm, +static int /* predicate */ +singleton(struct colormap * cm, chr c) { - color co; /* color of c */ + color co; /* color of c */ co = GETCOLOR(cm, c); if (cm->cd[co].nchrs == 1 && cm->cd[co].sub == NOSUB) @@ -602,20 +650,20 @@ singleton(struct colormap *cm, * rainbow - add arcs of all full colors (but one) between specified states */ static void -rainbow(struct nfa *nfa, - struct colormap *cm, +rainbow(struct nfa * nfa, + struct colormap * cm, int type, pcolor but, /* COLORLESS if no exceptions */ - struct state *from, - struct state *to) + struct state * from, + struct state * to) { struct colordesc *cd; struct colordesc *end = CDEND(cm); - color co; + color co; for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++) if (!UNUSEDCOLOR(cd) && cd->sub != co && co != but && - !(cd->flags&PSEUDO)) + !(cd->flags & PSEUDO)) newarc(nfa, type, co, from, to); } @@ -625,20 +673,21 @@ rainbow(struct nfa *nfa, * The calling sequence ought to be reconciled with cloneouts(). */ static void -colorcomplement(struct nfa *nfa, - struct colormap *cm, +colorcomplement(struct nfa * nfa, + struct colormap * cm, int type, - struct state *of, /* complements of this guy's PLAIN outarcs */ - struct state *from, - struct state *to) + struct state * of, /* complements of this guy's PLAIN + * outarcs */ + struct state * from, + struct state * to) { struct colordesc *cd; struct colordesc *end = CDEND(cm); - color co; + color co; assert(of != from); for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++) - if (!UNUSEDCOLOR(cd) && !(cd->flags&PSEUDO)) + if (!UNUSEDCOLOR(cd) && !(cd->flags & PSEUDO)) if (findarc(of, PLAIN, co) == NULL) newarc(nfa, type, co, from, to); } @@ -650,28 +699,29 @@ colorcomplement(struct nfa *nfa, * dumpcolors - debugging output */ static void -dumpcolors(struct colormap *cm, +dumpcolors(struct colormap * cm, FILE *f) { struct colordesc *cd; struct colordesc *end; - color co; - chr c; - char *has; + color co; + chr c; + char *has; - fprintf(f, "max %ld\n", (long)cm->max); + fprintf(f, "max %ld\n", (long) cm->max); if (NBYTS > 1) fillcheck(cm, cm->tree, 0, f); end = CDEND(cm); - for (cd = cm->cd + 1, co = 1; cd < end; cd++, co++) /* skip 0 */ - if (!UNUSEDCOLOR(cd)) { + for (cd = cm->cd + 1, co = 1; cd < end; cd++, co++) /* skip 0 */ + if (!UNUSEDCOLOR(cd)) + { assert(cd->nchrs > 0); has = (cd->block != NULL) ? "#" : ""; - if (cd->flags&PSEUDO) - fprintf(f, "#%2ld%s(ps): ", (long)co, has); + if (cd->flags & PSEUDO) + fprintf(f, "#%2ld%s(ps): ", (long) co, has); else - fprintf(f, "#%2ld%s(%2d): ", (long)co, - has, cd->nchrs); + fprintf(f, "#%2ld%s(%2d): ", (long) co, + has, cd->nchrs); /* it's hard to do this more efficiently */ for (c = CHR_MIN; c < CHR_MAX; c++) if (GETCOLOR(cm, c) == co) @@ -687,24 +737,26 @@ dumpcolors(struct colormap *cm, * fillcheck - check proper filling of a tree */ static void -fillcheck(struct colormap *cm, - union tree *tree, +fillcheck(struct colormap * cm, + union tree * tree, int level, /* level number (top == 0) of this block */ FILE *f) { - int i; + int i; union tree *t; - union tree *fillt = &cm->tree[level+1]; + union tree *fillt = &cm->tree[level + 1]; - assert(level < NBYTS-1); /* this level has pointers */ - for (i = BYTTAB-1; i >= 0; i--) { + assert(level < NBYTS - 1); /* this level has pointers */ + for (i = BYTTAB - 1; i >= 0; i--) + { t = tree->tptr[i]; if (t == NULL) fprintf(f, "NULL found in filled tree!\n"); else if (t == fillt) - {} - else if (level < NBYTS-2) /* more pointer blocks below */ - fillcheck(cm, t, level+1, f); + { + } + else if (level < NBYTS - 2) /* more pointer blocks below */ + fillcheck(cm, t, level + 1, f); } } @@ -720,9 +772,9 @@ dumpchr(chr c, if (c == '\\') fprintf(f, "\\\\"); else if (c > ' ' && c <= '~') - putc((char)c, f); + putc((char) c, f); else - fprintf(f, "\\u%04lx", (long)c); + fprintf(f, "\\u%04lx", (long) c); } -#endif /* REG_DEBUG */ +#endif /* REG_DEBUG */ diff --git a/src/backend/regex/regc_cvec.c b/src/backend/regex/regc_cvec.c index 3b4e6ddb61b..502bbeeca7e 100644 --- a/src/backend/regex/regc_cvec.c +++ b/src/backend/regex/regc_cvec.c @@ -2,21 +2,21 @@ * Utility functions for handling cvecs * This file is #included by regcomp.c. * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * + * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. - * + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -28,7 +28,7 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Header: /cvsroot/pgsql/src/backend/regex/regc_cvec.c,v 1.1 2003/02/05 17:41:32 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/regex/regc_cvec.c,v 1.2 2003/08/04 00:43:21 momjian Exp $ * */ @@ -40,23 +40,24 @@ newcvec(int nchrs, /* to hold this many chrs... */ int nranges, /* ... and this many ranges... */ int nmcces) /* ... and this many MCCEs */ { - size_t n; - size_t nc; - struct cvec *cv; - - nc = (size_t)nchrs + (size_t)nmcces*(MAXMCCE+1) + (size_t)nranges*2; - n = sizeof(struct cvec) + (size_t)(nmcces-1)*sizeof(chr *) - + nc*sizeof(chr); - cv = (struct cvec *)MALLOC(n); - if (cv == NULL) { - return NULL; - } - cv->chrspace = nchrs; - cv->chrs = (chr *)&cv->mcces[nmcces]; /* chrs just after MCCE ptrs */ - cv->mccespace = nmcces; - cv->ranges = cv->chrs + nchrs + nmcces*(MAXMCCE+1); - cv->rangespace = nranges; - return clearcvec(cv); + size_t n; + size_t nc; + struct cvec *cv; + + nc = (size_t) nchrs + (size_t) nmcces *(MAXMCCE + 1) + (size_t) nranges *2; + + n = sizeof(struct cvec) + (size_t) (nmcces - 1) * sizeof(chr *) + + nc * sizeof(chr); + cv = (struct cvec *) MALLOC(n); + if (cv == NULL) + return NULL; + cv->chrspace = nchrs; + cv->chrs = (chr *) & cv->mcces[nmcces]; /* chrs just after MCCE + * ptrs */ + cv->mccespace = nmcces; + cv->ranges = cv->chrs + nchrs + nmcces * (MAXMCCE + 1); + cv->rangespace = nranges; + return clearcvec(cv); } /* @@ -64,131 +65,125 @@ newcvec(int nchrs, /* to hold this many chrs... */ * Returns pointer as convenience. */ static struct cvec * -clearcvec(struct cvec *cv) +clearcvec(struct cvec * cv) { - int i; - - assert(cv != NULL); - cv->nchrs = 0; - assert(cv->chrs == (chr *)&cv->mcces[cv->mccespace]); - cv->nmcces = 0; - cv->nmccechrs = 0; - cv->nranges = 0; - for (i = 0; i < cv->mccespace; i++) { - cv->mcces[i] = NULL; - } - - return cv; + int i; + + assert(cv != NULL); + cv->nchrs = 0; + assert(cv->chrs == (chr *) & cv->mcces[cv->mccespace]); + cv->nmcces = 0; + cv->nmccechrs = 0; + cv->nranges = 0; + for (i = 0; i < cv->mccespace; i++) + cv->mcces[i] = NULL; + + return cv; } /* * addchr - add a chr to a cvec */ static void -addchr(struct cvec *cv, /* character vector */ - chr c) /* character to add */ +addchr(struct cvec * cv, /* character vector */ + chr c) /* character to add */ { - assert(cv->nchrs < cv->chrspace - cv->nmccechrs); - cv->chrs[cv->nchrs++] = (chr)c; + assert(cv->nchrs < cv->chrspace - cv->nmccechrs); + cv->chrs[cv->nchrs++] = (chr) c; } /* * addrange - add a range to a cvec */ static void -addrange(struct cvec *cv, /* character vector */ +addrange(struct cvec * cv, /* character vector */ chr from, /* first character of range */ chr to) /* last character of range */ { - assert(cv->nranges < cv->rangespace); - cv->ranges[cv->nranges*2] = (chr)from; - cv->ranges[cv->nranges*2 + 1] = (chr)to; - cv->nranges++; + assert(cv->nranges < cv->rangespace); + cv->ranges[cv->nranges * 2] = (chr) from; + cv->ranges[cv->nranges * 2 + 1] = (chr) to; + cv->nranges++; } /* * addmcce - add an MCCE to a cvec */ static void -addmcce(struct cvec *cv, /* character vector */ - chr *startp, /* beginning of text */ - chr *endp) /* just past end of text */ +addmcce(struct cvec * cv, /* character vector */ + chr * startp, /* beginning of text */ + chr * endp) /* just past end of text */ { - int len; - int i; - chr *s; - chr *d; - - if (startp == NULL && endp == NULL) { - return; - } - len = endp - startp; - assert(len > 0); - assert(cv->nchrs + len < cv->chrspace - cv->nmccechrs); - assert(cv->nmcces < cv->mccespace); - d = &cv->chrs[cv->chrspace - cv->nmccechrs - len - 1]; - cv->mcces[cv->nmcces++] = d; - for (s = startp, i = len; i > 0; s++, i--) { - *d++ = *s; - } - *d++ = 0; /* endmarker */ - assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]); - cv->nmccechrs += len + 1; + int len; + int i; + chr *s; + chr *d; + + if (startp == NULL && endp == NULL) + return; + len = endp - startp; + assert(len > 0); + assert(cv->nchrs + len < cv->chrspace - cv->nmccechrs); + assert(cv->nmcces < cv->mccespace); + d = &cv->chrs[cv->chrspace - cv->nmccechrs - len - 1]; + cv->mcces[cv->nmcces++] = d; + for (s = startp, i = len; i > 0; s++, i--) + *d++ = *s; + *d++ = 0; /* endmarker */ + assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]); + cv->nmccechrs += len + 1; } /* * haschr - does a cvec contain this chr? */ -static int /* predicate */ -haschr(struct cvec *cv, /* character vector */ - chr c) /* character to test for */ +static int /* predicate */ +haschr(struct cvec * cv, /* character vector */ + chr c) /* character to test for */ { - int i; - chr *p; + int i; + chr *p; - for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) { - if (*p == c) { - return 1; + for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) + { + if (*p == c) + return 1; } - } - for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) { - if ((*p <= c) && (c <= *(p+1))) { - return 1; + for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) + { + if ((*p <= c) && (c <= *(p + 1))) + return 1; } - } - return 0; + return 0; } /* * getcvec - get a cvec, remembering it as v->cv */ static struct cvec * -getcvec(struct vars *v, /* context */ +getcvec(struct vars * v, /* context */ int nchrs, /* to hold this many chrs... */ int nranges, /* ... and this many ranges... */ int nmcces) /* ... and this many MCCEs */ { - if (v->cv != NULL && nchrs <= v->cv->chrspace && - nranges <= v->cv->rangespace && nmcces <= v->cv->mccespace) { - return clearcvec(v->cv); - } - - if (v->cv != NULL) { - freecvec(v->cv); - } - v->cv = newcvec(nchrs, nranges, nmcces); - if (v->cv == NULL) { - ERR(REG_ESPACE); - } - - return v->cv; + if (v->cv != NULL && nchrs <= v->cv->chrspace && + nranges <= v->cv->rangespace && nmcces <= v->cv->mccespace) + return clearcvec(v->cv); + + if (v->cv != NULL) + freecvec(v->cv); + v->cv = newcvec(nchrs, nranges, nmcces); + if (v->cv == NULL) + ERR(REG_ESPACE); + + return v->cv; } /* * freecvec - free a cvec */ static void -freecvec(struct cvec *cv) +freecvec(struct cvec * cv) { - FREE(cv); + FREE(cv); } diff --git a/src/backend/regex/regc_lex.c b/src/backend/regex/regc_lex.c index 2f1a5840ff2..2407e06ef47 100644 --- a/src/backend/regex/regc_lex.c +++ b/src/backend/regex/regc_lex.c @@ -2,21 +2,21 @@ * lexical analyzer * This file is #included by regcomp.c. * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * + * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. - * + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -28,159 +28,168 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Header: /cvsroot/pgsql/src/backend/regex/regc_lex.c,v 1.1 2003/02/05 17:41:32 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/regex/regc_lex.c,v 1.2 2003/08/04 00:43:21 momjian Exp $ * */ /* scanning macros (know about v) */ -#define ATEOS() (v->now >= v->stop) -#define HAVE(n) (v->stop - v->now >= (n)) -#define NEXT1(c) (!ATEOS() && *v->now == CHR(c)) -#define NEXT2(a,b) (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b)) -#define NEXT3(a,b,c) (HAVE(3) && *v->now == CHR(a) && \ +#define ATEOS() (v->now >= v->stop) +#define HAVE(n) (v->stop - v->now >= (n)) +#define NEXT1(c) (!ATEOS() && *v->now == CHR(c)) +#define NEXT2(a,b) (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b)) +#define NEXT3(a,b,c) (HAVE(3) && *v->now == CHR(a) && \ *(v->now+1) == CHR(b) && \ *(v->now+2) == CHR(c)) -#define SET(c) (v->nexttype = (c)) -#define SETV(c, n) (v->nexttype = (c), v->nextvalue = (n)) -#define RET(c) return (SET(c), 1) -#define RETV(c, n) return (SETV(c, n), 1) -#define FAILW(e) return (ERR(e), 0) /* ERR does SET(EOS) */ -#define LASTTYPE(t) (v->lasttype == (t)) +#define SET(c) (v->nexttype = (c)) +#define SETV(c, n) (v->nexttype = (c), v->nextvalue = (n)) +#define RET(c) return (SET(c), 1) +#define RETV(c, n) return (SETV(c, n), 1) +#define FAILW(e) return (ERR(e), 0) /* ERR does SET(EOS) */ +#define LASTTYPE(t) (v->lasttype == (t)) /* lexical contexts */ -#define L_ERE 1 /* mainline ERE/ARE */ -#define L_BRE 2 /* mainline BRE */ -#define L_Q 3 /* REG_QUOTE */ -#define L_EBND 4 /* ERE/ARE bound */ -#define L_BBND 5 /* BRE bound */ -#define L_BRACK 6 /* brackets */ -#define L_CEL 7 /* collating element */ -#define L_ECL 8 /* equivalence class */ -#define L_CCL 9 /* character class */ -#define INTOCON(c) (v->lexcon = (c)) -#define INCON(con) (v->lexcon == (con)) +#define L_ERE 1 /* mainline ERE/ARE */ +#define L_BRE 2 /* mainline BRE */ +#define L_Q 3 /* REG_QUOTE */ +#define L_EBND 4 /* ERE/ARE bound */ +#define L_BBND 5 /* BRE bound */ +#define L_BRACK 6 /* brackets */ +#define L_CEL 7 /* collating element */ +#define L_ECL 8 /* equivalence class */ +#define L_CCL 9 /* character class */ +#define INTOCON(c) (v->lexcon = (c)) +#define INCON(con) (v->lexcon == (con)) /* construct pointer past end of chr array */ -#define ENDOF(array) ((array) + sizeof(array)/sizeof(chr)) +#define ENDOF(array) ((array) + sizeof(array)/sizeof(chr)) /* * lexstart - set up lexical stuff, scan leading options */ static void -lexstart(struct vars *v) +lexstart(struct vars * v) { - prefixes(v); /* may turn on new type bits etc. */ + prefixes(v); /* may turn on new type bits etc. */ NOERR(); - if (v->cflags®_QUOTE) { - assert(!(v->cflags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE))); + if (v->cflags & REG_QUOTE) + { + assert(!(v->cflags & (REG_ADVANCED | REG_EXPANDED | REG_NEWLINE))); INTOCON(L_Q); - } else if (v->cflags®_EXTENDED) { - assert(!(v->cflags®_QUOTE)); + } + else if (v->cflags & REG_EXTENDED) + { + assert(!(v->cflags & REG_QUOTE)); INTOCON(L_ERE); - } else { - assert(!(v->cflags&(REG_QUOTE|REG_ADVF))); + } + else + { + assert(!(v->cflags & (REG_QUOTE | REG_ADVF))); INTOCON(L_BRE); } v->nexttype = EMPTY; /* remember we were at the start */ - next(v); /* set up the first token */ + next(v); /* set up the first token */ } /* * prefixes - implement various special prefixes */ static void -prefixes(struct vars *v) +prefixes(struct vars * v) { /* literal string doesn't get any of this stuff */ - if (v->cflags®_QUOTE) + if (v->cflags & REG_QUOTE) return; - /* initial "***" gets special things */ + /* initial "***" gets special things */ if (HAVE(4) && NEXT3('*', '*', '*')) - switch (*(v->now + 3)) { - case CHR('?'): /* "***?" error, msg shows version */ - ERR(REG_BADPAT); - return; /* proceed no further */ - break; - case CHR('='): /* "***=" shifts to literal string */ - NOTE(REG_UNONPOSIX); - v->cflags |= REG_QUOTE; - v->cflags &= ~(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE); - v->now += 4; - return; /* and there can be no more prefixes */ - break; - case CHR(':'): /* "***:" shifts to AREs */ - NOTE(REG_UNONPOSIX); - v->cflags |= REG_ADVANCED; - v->now += 4; - break; - default: /* otherwise *** is just an error */ - ERR(REG_BADRPT); - return; - break; + switch (*(v->now + 3)) + { + case CHR('?'): /* "***?" error, msg shows version */ + ERR(REG_BADPAT); + return; /* proceed no further */ + break; + case CHR('='): /* "***=" shifts to literal string */ + NOTE(REG_UNONPOSIX); + v->cflags |= REG_QUOTE; + v->cflags &= ~(REG_ADVANCED | REG_EXPANDED | REG_NEWLINE); + v->now += 4; + return; /* and there can be no more prefixes */ + break; + case CHR(':'): /* "***:" shifts to AREs */ + NOTE(REG_UNONPOSIX); + v->cflags |= REG_ADVANCED; + v->now += 4; + break; + default: /* otherwise *** is just an error */ + ERR(REG_BADRPT); + return; + break; } /* BREs and EREs don't get embedded options */ - if ((v->cflags®_ADVANCED) != REG_ADVANCED) + if ((v->cflags & REG_ADVANCED) != REG_ADVANCED) return; /* embedded options (AREs only) */ - if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) { + if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) + { NOTE(REG_UNONPOSIX); v->now += 2; for (; !ATEOS() && iscalpha(*v->now); v->now++) - switch (*v->now) { - case CHR('b'): /* BREs (but why???) */ - v->cflags &= ~(REG_ADVANCED|REG_QUOTE); - break; - case CHR('c'): /* case sensitive */ - v->cflags &= ~REG_ICASE; - break; - case CHR('e'): /* plain EREs */ - v->cflags |= REG_EXTENDED; - v->cflags &= ~(REG_ADVF|REG_QUOTE); - break; - case CHR('i'): /* case insensitive */ - v->cflags |= REG_ICASE; - break; - case CHR('m'): /* Perloid synonym for n */ - case CHR('n'): /* \n affects ^ $ . [^ */ - v->cflags |= REG_NEWLINE; - break; - case CHR('p'): /* ~Perl, \n affects . [^ */ - v->cflags |= REG_NLSTOP; - v->cflags &= ~REG_NLANCH; - break; - case CHR('q'): /* literal string */ - v->cflags |= REG_QUOTE; - v->cflags &= ~REG_ADVANCED; - break; - case CHR('s'): /* single line, \n ordinary */ - v->cflags &= ~REG_NEWLINE; - break; - case CHR('t'): /* tight syntax */ - v->cflags &= ~REG_EXPANDED; - break; - case CHR('w'): /* weird, \n affects ^ $ only */ - v->cflags &= ~REG_NLSTOP; - v->cflags |= REG_NLANCH; - break; - case CHR('x'): /* expanded syntax */ - v->cflags |= REG_EXPANDED; - break; - default: - ERR(REG_BADOPT); - return; + switch (*v->now) + { + case CHR('b'): /* BREs (but why???) */ + v->cflags &= ~(REG_ADVANCED | REG_QUOTE); + break; + case CHR('c'): /* case sensitive */ + v->cflags &= ~REG_ICASE; + break; + case CHR('e'): /* plain EREs */ + v->cflags |= REG_EXTENDED; + v->cflags &= ~(REG_ADVF | REG_QUOTE); + break; + case CHR('i'): /* case insensitive */ + v->cflags |= REG_ICASE; + break; + case CHR('m'): /* Perloid synonym for n */ + case CHR('n'): /* \n affects ^ $ . [^ */ + v->cflags |= REG_NEWLINE; + break; + case CHR('p'): /* ~Perl, \n affects . [^ */ + v->cflags |= REG_NLSTOP; + v->cflags &= ~REG_NLANCH; + break; + case CHR('q'): /* literal string */ + v->cflags |= REG_QUOTE; + v->cflags &= ~REG_ADVANCED; + break; + case CHR('s'): /* single line, \n ordinary */ + v->cflags &= ~REG_NEWLINE; + break; + case CHR('t'): /* tight syntax */ + v->cflags &= ~REG_EXPANDED; + break; + case CHR('w'): /* weird, \n affects ^ $ only */ + v->cflags &= ~REG_NLSTOP; + v->cflags |= REG_NLANCH; + break; + case CHR('x'): /* expanded syntax */ + v->cflags |= REG_EXPANDED; + break; + default: + ERR(REG_BADOPT); + return; } - if (!NEXT1(')')) { + if (!NEXT1(')')) + { ERR(REG_BADOPT); return; } v->now++; - if (v->cflags®_QUOTE) - v->cflags &= ~(REG_EXPANDED|REG_NEWLINE); + if (v->cflags & REG_QUOTE) + v->cflags &= ~(REG_EXPANDED | REG_NEWLINE); } } @@ -191,11 +200,11 @@ prefixes(struct vars *v) * implicit assumptions about what sorts of strings can be subroutines. */ static void -lexnest(struct vars *v, - chr *beginp, /* start of interpolation */ - chr *endp) /* one past end of interpolation */ +lexnest(struct vars * v, + chr * beginp, /* start of interpolation */ + chr * endp) /* one past end of interpolation */ { - assert(v->savenow == NULL); /* only one level of nesting */ + assert(v->savenow == NULL); /* only one level of nesting */ v->savenow = v->now; v->savestop = v->stop; v->now = beginp; @@ -205,47 +214,47 @@ lexnest(struct vars *v, /* * string constants to interpolate as expansions of things like \d */ -static chr backd[] = { /* \d */ +static chr backd[] = { /* \d */ CHR('['), CHR('['), CHR(':'), CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), CHR(':'), CHR(']'), CHR(']') }; -static chr backD[] = { /* \D */ +static chr backD[] = { /* \D */ CHR('['), CHR('^'), CHR('['), CHR(':'), CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), CHR(':'), CHR(']'), CHR(']') }; -static chr brbackd[] = { /* \d within brackets */ +static chr brbackd[] = { /* \d within brackets */ CHR('['), CHR(':'), CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), CHR(':'), CHR(']') }; -static chr backs[] = { /* \s */ +static chr backs[] = { /* \s */ CHR('['), CHR('['), CHR(':'), CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), CHR(':'), CHR(']'), CHR(']') }; -static chr backS[] = { /* \S */ +static chr backS[] = { /* \S */ CHR('['), CHR('^'), CHR('['), CHR(':'), CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), CHR(':'), CHR(']'), CHR(']') }; -static chr brbacks[] = { /* \s within brackets */ +static chr brbacks[] = { /* \s within brackets */ CHR('['), CHR(':'), CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), CHR(':'), CHR(']') }; -static chr backw[] = { /* \w */ +static chr backw[] = { /* \w */ CHR('['), CHR('['), CHR(':'), CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), CHR(':'), CHR(']'), CHR('_'), CHR(']') }; -static chr backW[] = { /* \W */ +static chr backW[] = { /* \W */ CHR('['), CHR('^'), CHR('['), CHR(':'), CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), CHR(':'), CHR(']'), CHR('_'), CHR(']') }; -static chr brbackw[] = { /* \w within brackets */ +static chr brbackw[] = { /* \w within brackets */ CHR('['), CHR(':'), CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), CHR(':'), CHR(']'), CHR('_') @@ -256,7 +265,7 @@ static chr brbackw[] = { /* \w within brackets */ * Possibly ought to inquire whether there is a "word" character class. */ static void -lexword(struct vars *v) +lexword(struct vars * v) { lexnest(v, backw, ENDOF(backw)); } @@ -264,60 +273,65 @@ lexword(struct vars *v) /* * next - get next token */ -static int /* 1 normal, 0 failure */ -next(struct vars *v) +static int /* 1 normal, 0 failure */ +next(struct vars * v) { - chr c; + chr c; /* errors yield an infinite sequence of failures */ if (ISERR()) - return 0; /* the error has set nexttype to EOS */ + return 0; /* the error has set nexttype to EOS */ /* remember flavor of last token */ v->lasttype = v->nexttype; /* REG_BOSONLY */ - if (v->nexttype == EMPTY && (v->cflags®_BOSONLY)) { + if (v->nexttype == EMPTY && (v->cflags & REG_BOSONLY)) + { /* at start of a REG_BOSONLY RE */ RETV(SBEGIN, 0); /* same as \A */ } /* if we're nested and we've hit end, return to outer level */ - if (v->savenow != NULL && ATEOS()) { + if (v->savenow != NULL && ATEOS()) + { v->now = v->savenow; v->stop = v->savestop; v->savenow = v->savestop = NULL; } /* skip white space etc. if appropriate (not in literal or []) */ - if (v->cflags®_EXPANDED) - switch (v->lexcon) { - case L_ERE: - case L_BRE: - case L_EBND: - case L_BBND: - skip(v); - break; + if (v->cflags & REG_EXPANDED) + switch (v->lexcon) + { + case L_ERE: + case L_BRE: + case L_EBND: + case L_BBND: + skip(v); + break; } /* handle EOS, depending on context */ - if (ATEOS()) { - switch (v->lexcon) { - case L_ERE: - case L_BRE: - case L_Q: - RET(EOS); - break; - case L_EBND: - case L_BBND: - FAILW(REG_EBRACE); - break; - case L_BRACK: - case L_CEL: - case L_ECL: - case L_CCL: - FAILW(REG_EBRACK); - break; + if (ATEOS()) + { + switch (v->lexcon) + { + case L_ERE: + case L_BRE: + case L_Q: + RET(EOS); + break; + case L_EBND: + case L_BBND: + FAILW(REG_EBRACE); + break; + case L_BRACK: + case L_CEL: + case L_ECL: + case L_CCL: + FAILW(REG_EBRACK); + break; } assert(NOTREACHED); } @@ -326,314 +340,365 @@ next(struct vars *v) c = *v->now++; /* deal with the easy contexts, punt EREs to code below */ - switch (v->lexcon) { - case L_BRE: /* punt BREs to separate function */ - return brenext(v, c); - break; - case L_ERE: /* see below */ - break; - case L_Q: /* literal strings are easy */ - RETV(PLAIN, c); - break; - case L_BBND: /* bounds are fairly simple */ - case L_EBND: - switch (c) { - case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'): - case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'): - case CHR('8'): case CHR('9'): - RETV(DIGIT, (chr)DIGITVAL(c)); + switch (v->lexcon) + { + case L_BRE: /* punt BREs to separate function */ + return brenext(v, c); break; - case CHR(','): - RET(','); + case L_ERE: /* see below */ break; - case CHR('}'): /* ERE bound ends with } */ - if (INCON(L_EBND)) { - INTOCON(L_ERE); - if ((v->cflags®_ADVF) && NEXT1('?')) { - v->now++; - NOTE(REG_UNONPOSIX); - RETV('}', 0); - } - RETV('}', 1); - } else - FAILW(REG_BADBR); - break; - case CHR('\\'): /* BRE bound ends with \} */ - if (INCON(L_BBND) && NEXT1('}')) { - v->now++; - INTOCON(L_BRE); - RET('}'); - } else - FAILW(REG_BADBR); - break; - default: - FAILW(REG_BADBR); + case L_Q: /* literal strings are easy */ + RETV(PLAIN, c); break; - } - assert(NOTREACHED); - break; - case L_BRACK: /* brackets are not too hard */ - switch (c) { - case CHR(']'): - if (LASTTYPE('[')) - RETV(PLAIN, c); - else { - INTOCON((v->cflags®_EXTENDED) ? - L_ERE : L_BRE); - RET(']'); + case L_BBND: /* bounds are fairly simple */ + case L_EBND: + switch (c) + { + case CHR('0'): + case CHR('1'): + case CHR('2'): + case CHR('3'): + case CHR('4'): + case CHR('5'): + case CHR('6'): + case CHR('7'): + case CHR('8'): + case CHR('9'): + RETV(DIGIT, (chr) DIGITVAL(c)); + break; + case CHR(','): + RET(','); + break; + case CHR('}'): /* ERE bound ends with } */ + if (INCON(L_EBND)) + { + INTOCON(L_ERE); + if ((v->cflags & REG_ADVF) && NEXT1('?')) + { + v->now++; + NOTE(REG_UNONPOSIX); + RETV('}', 0); + } + RETV('}', 1); + } + else + FAILW(REG_BADBR); + break; + case CHR('\\'): /* BRE bound ends with \} */ + if (INCON(L_BBND) && NEXT1('}')) + { + v->now++; + INTOCON(L_BRE); + RET('}'); + } + else + FAILW(REG_BADBR); + break; + default: + FAILW(REG_BADBR); + break; } + assert(NOTREACHED); break; - case CHR('\\'): - NOTE(REG_UBBS); - if (!(v->cflags®_ADVF)) - RETV(PLAIN, c); - NOTE(REG_UNONPOSIX); - if (ATEOS()) - FAILW(REG_EESCAPE); - (DISCARD)lexescape(v); - switch (v->nexttype) { /* not all escapes okay here */ - case PLAIN: - return 1; - break; - case CCLASS: - switch (v->nextvalue) { - case 'd': - lexnest(v, brbackd, ENDOF(brbackd)); + case L_BRACK: /* brackets are not too hard */ + switch (c) + { + case CHR(']'): + if (LASTTYPE('[')) + RETV(PLAIN, c); + else + { + INTOCON((v->cflags & REG_EXTENDED) ? + L_ERE : L_BRE); + RET(']'); + } + break; + case CHR('\\'): + NOTE(REG_UBBS); + if (!(v->cflags & REG_ADVF)) + RETV(PLAIN, c); + NOTE(REG_UNONPOSIX); + if (ATEOS()) + FAILW(REG_EESCAPE); + (DISCARD) lexescape(v); + switch (v->nexttype) + { /* not all escapes okay here */ + case PLAIN: + return 1; + break; + case CCLASS: + switch (v->nextvalue) + { + case 'd': + lexnest(v, brbackd, ENDOF(brbackd)); + break; + case 's': + lexnest(v, brbacks, ENDOF(brbacks)); + break; + case 'w': + lexnest(v, brbackw, ENDOF(brbackw)); + break; + default: + FAILW(REG_EESCAPE); + break; + } + /* lexnest done, back up and try again */ + v->nexttype = v->lasttype; + return next(v); + break; + } + /* not one of the acceptable escapes */ + FAILW(REG_EESCAPE); break; - case 's': - lexnest(v, brbacks, ENDOF(brbacks)); + case CHR('-'): + if (LASTTYPE('[') || NEXT1(']')) + RETV(PLAIN, c); + else + RETV(RANGE, c); break; - case 'w': - lexnest(v, brbackw, ENDOF(brbackw)); + case CHR('['): + if (ATEOS()) + FAILW(REG_EBRACK); + switch (*v->now++) + { + case CHR('.'): + INTOCON(L_CEL); + /* might or might not be locale-specific */ + RET(COLLEL); + break; + case CHR('='): + INTOCON(L_ECL); + NOTE(REG_ULOCALE); + RET(ECLASS); + break; + case CHR(':'): + INTOCON(L_CCL); + NOTE(REG_ULOCALE); + RET(CCLASS); + break; + default: /* oops */ + v->now--; + RETV(PLAIN, c); + break; + } + assert(NOTREACHED); break; default: - FAILW(REG_EESCAPE); + RETV(PLAIN, c); break; - } - /* lexnest done, back up and try again */ - v->nexttype = v->lasttype; - return next(v); - break; } - /* not one of the acceptable escapes */ - FAILW(REG_EESCAPE); + assert(NOTREACHED); break; - case CHR('-'): - if (LASTTYPE('[') || NEXT1(']')) - RETV(PLAIN, c); + case L_CEL: /* collating elements are easy */ + if (c == CHR('.') && NEXT1(']')) + { + v->now++; + INTOCON(L_BRACK); + RETV(END, '.'); + } else - RETV(RANGE, c); + RETV(PLAIN, c); break; - case CHR('['): - if (ATEOS()) - FAILW(REG_EBRACK); - switch (*v->now++) { - case CHR('.'): - INTOCON(L_CEL); - /* might or might not be locale-specific */ - RET(COLLEL); - break; - case CHR('='): - INTOCON(L_ECL); - NOTE(REG_ULOCALE); - RET(ECLASS); - break; - case CHR(':'): - INTOCON(L_CCL); - NOTE(REG_ULOCALE); - RET(CCLASS); - break; - default: /* oops */ - v->now--; + case L_ECL: /* ditto equivalence classes */ + if (c == CHR('=') && NEXT1(']')) + { + v->now++; + INTOCON(L_BRACK); + RETV(END, '='); + } + else RETV(PLAIN, c); - break; + break; + case L_CCL: /* ditto character classes */ + if (c == CHR(':') && NEXT1(']')) + { + v->now++; + INTOCON(L_BRACK); + RETV(END, ':'); } - assert(NOTREACHED); + else + RETV(PLAIN, c); break; default: - RETV(PLAIN, c); + assert(NOTREACHED); break; - } - assert(NOTREACHED); - break; - case L_CEL: /* collating elements are easy */ - if (c == CHR('.') && NEXT1(']')) { - v->now++; - INTOCON(L_BRACK); - RETV(END, '.'); - } else - RETV(PLAIN, c); - break; - case L_ECL: /* ditto equivalence classes */ - if (c == CHR('=') && NEXT1(']')) { - v->now++; - INTOCON(L_BRACK); - RETV(END, '='); - } else - RETV(PLAIN, c); - break; - case L_CCL: /* ditto character classes */ - if (c == CHR(':') && NEXT1(']')) { - v->now++; - INTOCON(L_BRACK); - RETV(END, ':'); - } else - RETV(PLAIN, c); - break; - default: - assert(NOTREACHED); - break; } /* that got rid of everything except EREs and AREs */ assert(INCON(L_ERE)); /* deal with EREs and AREs, except for backslashes */ - switch (c) { - case CHR('|'): - RET('|'); - break; - case CHR('*'): - if ((v->cflags®_ADVF) && NEXT1('?')) { - v->now++; - NOTE(REG_UNONPOSIX); - RETV('*', 0); - } - RETV('*', 1); - break; - case CHR('+'): - if ((v->cflags®_ADVF) && NEXT1('?')) { - v->now++; - NOTE(REG_UNONPOSIX); - RETV('+', 0); - } - RETV('+', 1); - break; - case CHR('?'): - if ((v->cflags®_ADVF) && NEXT1('?')) { - v->now++; - NOTE(REG_UNONPOSIX); - RETV('?', 0); - } - RETV('?', 1); - break; - case CHR('{'): /* bounds start or plain character */ - if (v->cflags®_EXPANDED) - skip(v); - if (ATEOS() || !iscdigit(*v->now)) { - NOTE(REG_UBRACES); - NOTE(REG_UUNSPEC); - RETV(PLAIN, c); - } else { - NOTE(REG_UBOUNDS); - INTOCON(L_EBND); - RET('{'); - } - assert(NOTREACHED); - break; - case CHR('('): /* parenthesis, or advanced extension */ - if ((v->cflags®_ADVF) && NEXT1('?')) { - NOTE(REG_UNONPOSIX); - v->now++; - switch (*v->now++) { - case CHR(':'): /* non-capturing paren */ - RETV('(', 0); - break; - case CHR('#'): /* comment */ - while (!ATEOS() && *v->now != CHR(')')) - v->now++; - if (!ATEOS()) - v->now++; - assert(v->nexttype == v->lasttype); - return next(v); - break; - case CHR('='): /* positive lookahead */ - NOTE(REG_ULOOKAHEAD); - RETV(LACON, 1); - break; - case CHR('!'): /* negative lookahead */ - NOTE(REG_ULOOKAHEAD); - RETV(LACON, 0); - break; - default: - FAILW(REG_BADRPT); - break; + switch (c) + { + case CHR('|'): + RET('|'); + break; + case CHR('*'): + if ((v->cflags & REG_ADVF) && NEXT1('?')) + { + v->now++; + NOTE(REG_UNONPOSIX); + RETV('*', 0); + } + RETV('*', 1); + break; + case CHR('+'): + if ((v->cflags & REG_ADVF) && NEXT1('?')) + { + v->now++; + NOTE(REG_UNONPOSIX); + RETV('+', 0); + } + RETV('+', 1); + break; + case CHR('?'): + if ((v->cflags & REG_ADVF) && NEXT1('?')) + { + v->now++; + NOTE(REG_UNONPOSIX); + RETV('?', 0); + } + RETV('?', 1); + break; + case CHR('{'): /* bounds start or plain character */ + if (v->cflags & REG_EXPANDED) + skip(v); + if (ATEOS() || !iscdigit(*v->now)) + { + NOTE(REG_UBRACES); + NOTE(REG_UUNSPEC); + RETV(PLAIN, c); + } + else + { + NOTE(REG_UBOUNDS); + INTOCON(L_EBND); + RET('{'); } assert(NOTREACHED); - } - if (v->cflags®_NOSUB) - RETV('(', 0); /* all parens non-capturing */ - else - RETV('(', 1); - break; - case CHR(')'): - if (LASTTYPE('(')) { - NOTE(REG_UUNSPEC); - } - RETV(')', c); - break; - case CHR('['): /* easy except for [[:<:]] and [[:>:]] */ - if (HAVE(6) && *(v->now+0) == CHR('[') && - *(v->now+1) == CHR(':') && - (*(v->now+2) == CHR('<') || - *(v->now+2) == CHR('>')) && - *(v->now+3) == CHR(':') && - *(v->now+4) == CHR(']') && - *(v->now+5) == CHR(']')) { - c = *(v->now+2); - v->now += 6; - NOTE(REG_UNONPOSIX); - RET((c == CHR('<')) ? '<' : '>'); - } - INTOCON(L_BRACK); - if (NEXT1('^')) { - v->now++; - RETV('[', 0); - } - RETV('[', 1); - break; - case CHR('.'): - RET('.'); - break; - case CHR('^'): - RET('^'); - break; - case CHR('$'): - RET('$'); - break; - case CHR('\\'): /* mostly punt backslashes to code below */ - if (ATEOS()) - FAILW(REG_EESCAPE); - break; - default: /* ordinary character */ - RETV(PLAIN, c); - break; + break; + case CHR('('): /* parenthesis, or advanced extension */ + if ((v->cflags & REG_ADVF) && NEXT1('?')) + { + NOTE(REG_UNONPOSIX); + v->now++; + switch (*v->now++) + { + case CHR(':'): /* non-capturing paren */ + RETV('(', 0); + break; + case CHR('#'): /* comment */ + while (!ATEOS() && *v->now != CHR(')')) + v->now++; + if (!ATEOS()) + v->now++; + assert(v->nexttype == v->lasttype); + return next(v); + break; + case CHR('='): /* positive lookahead */ + NOTE(REG_ULOOKAHEAD); + RETV(LACON, 1); + break; + case CHR('!'): /* negative lookahead */ + NOTE(REG_ULOOKAHEAD); + RETV(LACON, 0); + break; + default: + FAILW(REG_BADRPT); + break; + } + assert(NOTREACHED); + } + if (v->cflags & REG_NOSUB) + RETV('(', 0); /* all parens non-capturing */ + else + RETV('(', 1); + break; + case CHR(')'): + if (LASTTYPE('(')) + NOTE(REG_UUNSPEC); + RETV(')', c); + break; + case CHR('['): /* easy except for [[:<:]] and [[:>:]] */ + if (HAVE(6) && *(v->now + 0) == CHR('[') && + *(v->now + 1) == CHR(':') && + (*(v->now + 2) == CHR('<') || + *(v->now + 2) == CHR('>')) && + *(v->now + 3) == CHR(':') && + *(v->now + 4) == CHR(']') && + *(v->now + 5) == CHR(']')) + { + c = *(v->now + 2); + v->now += 6; + NOTE(REG_UNONPOSIX); + RET((c == CHR('<')) ? '<' : '>'); + } + INTOCON(L_BRACK); + if (NEXT1('^')) + { + v->now++; + RETV('[', 0); + } + RETV('[', 1); + break; + case CHR('.'): + RET('.'); + break; + case CHR('^'): + RET('^'); + break; + case CHR('$'): + RET('$'); + break; + case CHR('\\'): /* mostly punt backslashes to code below */ + if (ATEOS()) + FAILW(REG_EESCAPE); + break; + default: /* ordinary character */ + RETV(PLAIN, c); + break; } /* ERE/ARE backslash handling; backslash already eaten */ assert(!ATEOS()); - if (!(v->cflags®_ADVF)) { /* only AREs have non-trivial escapes */ - if (iscalnum(*v->now)) { + if (!(v->cflags & REG_ADVF)) + { /* only AREs have non-trivial escapes */ + if (iscalnum(*v->now)) + { NOTE(REG_UBSALNUM); NOTE(REG_UUNSPEC); } RETV(PLAIN, *v->now++); } - (DISCARD)lexescape(v); + (DISCARD) lexescape(v); if (ISERR()) FAILW(REG_EESCAPE); - if (v->nexttype == CCLASS) { /* fudge at lexical level */ - switch (v->nextvalue) { - case 'd': lexnest(v, backd, ENDOF(backd)); break; - case 'D': lexnest(v, backD, ENDOF(backD)); break; - case 's': lexnest(v, backs, ENDOF(backs)); break; - case 'S': lexnest(v, backS, ENDOF(backS)); break; - case 'w': lexnest(v, backw, ENDOF(backw)); break; - case 'W': lexnest(v, backW, ENDOF(backW)); break; - default: - assert(NOTREACHED); - FAILW(REG_ASSERT); - break; + if (v->nexttype == CCLASS) + { /* fudge at lexical level */ + switch (v->nextvalue) + { + case 'd': + lexnest(v, backd, ENDOF(backd)); + break; + case 'D': + lexnest(v, backD, ENDOF(backD)); + break; + case 's': + lexnest(v, backs, ENDOF(backs)); + break; + case 'S': + lexnest(v, backS, ENDOF(backS)); + break; + case 'w': + lexnest(v, backw, ENDOF(backw)); + break; + case 'W': + lexnest(v, backW, ENDOF(backW)); + break; + default: + assert(NOTREACHED); + FAILW(REG_ASSERT); + break; } /* lexnest done, back up and try again */ v->nexttype = v->lasttype; @@ -647,19 +712,20 @@ next(struct vars *v) * lexescape - parse an ARE backslash escape (backslash already eaten) * Note slightly nonstandard use of the CCLASS type code. */ -static int /* not actually used, but convenient for RETV */ -lexescape(struct vars *v) +static int /* not actually used, but convenient for + * RETV */ +lexescape(struct vars * v) { - chr c; - static chr alert[] = { + chr c; + static chr alert[] = { CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t') }; - static chr esc[] = { + static chr esc[] = { CHR('E'), CHR('S'), CHR('C') }; - chr *save; + chr *save; - assert(v->cflags®_ADVF); + assert(v->cflags & REG_ADVF); assert(!ATEOS()); c = *v->now++; @@ -667,132 +733,142 @@ lexescape(struct vars *v) RETV(PLAIN, c); NOTE(REG_UNONPOSIX); - switch (c) { - case CHR('a'): - RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007'))); - break; - case CHR('A'): - RETV(SBEGIN, 0); - break; - case CHR('b'): - RETV(PLAIN, CHR('\b')); - break; - case CHR('B'): - RETV(PLAIN, CHR('\\')); - break; - case CHR('c'): - NOTE(REG_UUNPORT); - if (ATEOS()) - FAILW(REG_EESCAPE); - RETV(PLAIN, (chr)(*v->now++ & 037)); - break; - case CHR('d'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'd'); - break; - case CHR('D'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'D'); - break; - case CHR('e'): - NOTE(REG_UUNPORT); - RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033'))); - break; - case CHR('f'): - RETV(PLAIN, CHR('\f')); - break; - case CHR('m'): - RET('<'); - break; - case CHR('M'): - RET('>'); - break; - case CHR('n'): - RETV(PLAIN, CHR('\n')); - break; - case CHR('r'): - RETV(PLAIN, CHR('\r')); - break; - case CHR('s'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 's'); - break; - case CHR('S'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'S'); - break; - case CHR('t'): - RETV(PLAIN, CHR('\t')); - break; - case CHR('u'): - c = lexdigits(v, 16, 4, 4); - if (ISERR()) - FAILW(REG_EESCAPE); - RETV(PLAIN, c); - break; - case CHR('U'): - c = lexdigits(v, 16, 8, 8); - if (ISERR()) - FAILW(REG_EESCAPE); - RETV(PLAIN, c); - break; - case CHR('v'): - RETV(PLAIN, CHR('\v')); - break; - case CHR('w'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'w'); - break; - case CHR('W'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'W'); - break; - case CHR('x'): - NOTE(REG_UUNPORT); - c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */ - if (ISERR()) - FAILW(REG_EESCAPE); - RETV(PLAIN, c); - break; - case CHR('y'): - NOTE(REG_ULOCALE); - RETV(WBDRY, 0); - break; - case CHR('Y'): - NOTE(REG_ULOCALE); - RETV(NWBDRY, 0); - break; - case CHR('Z'): - RETV(SEND, 0); - break; - case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'): - case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'): - case CHR('9'): - save = v->now; - v->now--; /* put first digit back */ - c = lexdigits(v, 10, 1, 255); /* REs >255 long outside spec */ - if (ISERR()) - FAILW(REG_EESCAPE); - /* ugly heuristic (first test is "exactly 1 digit?") */ - if (v->now - save == 0 || (int)c <= v->nsubexp) { - NOTE(REG_UBACKREF); - RETV(BACKREF, (chr)c); - } - /* oops, doesn't look like it's a backref after all... */ - v->now = save; - /* and fall through into octal number */ - case CHR('0'): - NOTE(REG_UUNPORT); - v->now--; /* put first digit back */ - c = lexdigits(v, 8, 1, 3); - if (ISERR()) - FAILW(REG_EESCAPE); - RETV(PLAIN, c); - break; - default: - assert(iscalpha(c)); - FAILW(REG_EESCAPE); /* unknown alphabetic escape */ - break; + switch (c) + { + case CHR('a'): + RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007'))); + break; + case CHR('A'): + RETV(SBEGIN, 0); + break; + case CHR('b'): + RETV(PLAIN, CHR('\b')); + break; + case CHR('B'): + RETV(PLAIN, CHR('\\')); + break; + case CHR('c'): + NOTE(REG_UUNPORT); + if (ATEOS()) + FAILW(REG_EESCAPE); + RETV(PLAIN, (chr) (*v->now++ & 037)); + break; + case CHR('d'): + NOTE(REG_ULOCALE); + RETV(CCLASS, 'd'); + break; + case CHR('D'): + NOTE(REG_ULOCALE); + RETV(CCLASS, 'D'); + break; + case CHR('e'): + NOTE(REG_UUNPORT); + RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033'))); + break; + case CHR('f'): + RETV(PLAIN, CHR('\f')); + break; + case CHR('m'): + RET('<'); + break; + case CHR('M'): + RET('>'); + break; + case CHR('n'): + RETV(PLAIN, CHR('\n')); + break; + case CHR('r'): + RETV(PLAIN, CHR('\r')); + break; + case CHR('s'): + NOTE(REG_ULOCALE); + RETV(CCLASS, 's'); + break; + case CHR('S'): + NOTE(REG_ULOCALE); + RETV(CCLASS, 'S'); + break; + case CHR('t'): + RETV(PLAIN, CHR('\t')); + break; + case CHR('u'): + c = lexdigits(v, 16, 4, 4); + if (ISERR()) + FAILW(REG_EESCAPE); + RETV(PLAIN, c); + break; + case CHR('U'): + c = lexdigits(v, 16, 8, 8); + if (ISERR()) + FAILW(REG_EESCAPE); + RETV(PLAIN, c); + break; + case CHR('v'): + RETV(PLAIN, CHR('\v')); + break; + case CHR('w'): + NOTE(REG_ULOCALE); + RETV(CCLASS, 'w'); + break; + case CHR('W'): + NOTE(REG_ULOCALE); + RETV(CCLASS, 'W'); + break; + case CHR('x'): + NOTE(REG_UUNPORT); + c = lexdigits(v, 16, 1, 255); /* REs >255 long outside + * spec */ + if (ISERR()) + FAILW(REG_EESCAPE); + RETV(PLAIN, c); + break; + case CHR('y'): + NOTE(REG_ULOCALE); + RETV(WBDRY, 0); + break; + case CHR('Y'): + NOTE(REG_ULOCALE); + RETV(NWBDRY, 0); + break; + case CHR('Z'): + RETV(SEND, 0); + break; + case CHR('1'): + case CHR('2'): + case CHR('3'): + case CHR('4'): + case CHR('5'): + case CHR('6'): + case CHR('7'): + case CHR('8'): + case CHR('9'): + save = v->now; + v->now--; /* put first digit back */ + c = lexdigits(v, 10, 1, 255); /* REs >255 long outside + * spec */ + if (ISERR()) + FAILW(REG_EESCAPE); + /* ugly heuristic (first test is "exactly 1 digit?") */ + if (v->now - save == 0 || (int) c <= v->nsubexp) + { + NOTE(REG_UBACKREF); + RETV(BACKREF, (chr) c); + } + /* oops, doesn't look like it's a backref after all... */ + v->now = save; + /* and fall through into octal number */ + case CHR('0'): + NOTE(REG_UUNPORT); + v->now--; /* put first digit back */ + c = lexdigits(v, 8, 1, 3); + if (ISERR()) + FAILW(REG_EESCAPE); + RETV(PLAIN, c); + break; + default: + assert(iscalpha(c)); + FAILW(REG_EESCAPE); /* unknown alphabetic escape */ + break; } assert(NOTREACHED); } @@ -800,51 +876,79 @@ lexescape(struct vars *v) /* * lexdigits - slurp up digits and return chr value */ -static chr /* chr value; errors signalled via ERR */ -lexdigits(struct vars *v, +static chr /* chr value; errors signalled via ERR */ +lexdigits(struct vars * v, int base, int minlen, int maxlen) { - uchr n; /* unsigned to avoid overflow misbehavior */ - int len; - chr c; - int d; - const uchr ub = (uchr) base; + uchr n; /* unsigned to avoid overflow misbehavior */ + int len; + chr c; + int d; + const uchr ub = (uchr) base; n = 0; - for (len = 0; len < maxlen && !ATEOS(); len++) { + for (len = 0; len < maxlen && !ATEOS(); len++) + { c = *v->now++; - switch (c) { - case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'): - case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'): - case CHR('8'): case CHR('9'): - d = DIGITVAL(c); - break; - case CHR('a'): case CHR('A'): d = 10; break; - case CHR('b'): case CHR('B'): d = 11; break; - case CHR('c'): case CHR('C'): d = 12; break; - case CHR('d'): case CHR('D'): d = 13; break; - case CHR('e'): case CHR('E'): d = 14; break; - case CHR('f'): case CHR('F'): d = 15; break; - default: - v->now--; /* oops, not a digit at all */ - d = -1; - break; + switch (c) + { + case CHR('0'): + case CHR('1'): + case CHR('2'): + case CHR('3'): + case CHR('4'): + case CHR('5'): + case CHR('6'): + case CHR('7'): + case CHR('8'): + case CHR('9'): + d = DIGITVAL(c); + break; + case CHR('a'): + case CHR('A'): + d = 10; + break; + case CHR('b'): + case CHR('B'): + d = 11; + break; + case CHR('c'): + case CHR('C'): + d = 12; + break; + case CHR('d'): + case CHR('D'): + d = 13; + break; + case CHR('e'): + case CHR('E'): + d = 14; + break; + case CHR('f'): + case CHR('F'): + d = 15; + break; + default: + v->now--; /* oops, not a digit at all */ + d = -1; + break; } - if (d >= base) { /* not a plausible digit */ + if (d >= base) + { /* not a plausible digit */ v->now--; d = -1; } if (d < 0) - break; /* NOTE BREAK OUT */ - n = n*ub + (uchr)d; + break; /* NOTE BREAK OUT */ + n = n * ub + (uchr) d; } if (len < minlen) ERR(REG_EESCAPE); - return (chr)n; + return (chr) n; } /* @@ -853,66 +957,71 @@ lexdigits(struct vars *v, * This is much like EREs except for all the stupid backslashes and the * context-dependency of some things. */ -static int /* 1 normal, 0 failure */ -brenext(struct vars *v, +static int /* 1 normal, 0 failure */ +brenext(struct vars * v, chr pc) { - chr c = (chr)pc; + chr c = (chr) pc; - switch (c) { - case CHR('*'): - if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^')) + switch (c) + { + case CHR('*'): + if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^')) + RETV(PLAIN, c); + RET('*'); + break; + case CHR('['): + if (HAVE(6) && *(v->now + 0) == CHR('[') && + *(v->now + 1) == CHR(':') && + (*(v->now + 2) == CHR('<') || + *(v->now + 2) == CHR('>')) && + *(v->now + 3) == CHR(':') && + *(v->now + 4) == CHR(']') && + *(v->now + 5) == CHR(']')) + { + c = *(v->now + 2); + v->now += 6; + NOTE(REG_UNONPOSIX); + RET((c == CHR('<')) ? '<' : '>'); + } + INTOCON(L_BRACK); + if (NEXT1('^')) + { + v->now++; + RETV('[', 0); + } + RETV('[', 1); + break; + case CHR('.'): + RET('.'); + break; + case CHR('^'): + if (LASTTYPE(EMPTY)) + RET('^'); + if (LASTTYPE('(')) + { + NOTE(REG_UUNSPEC); + RET('^'); + } RETV(PLAIN, c); - RET('*'); - break; - case CHR('['): - if (HAVE(6) && *(v->now+0) == CHR('[') && - *(v->now+1) == CHR(':') && - (*(v->now+2) == CHR('<') || - *(v->now+2) == CHR('>')) && - *(v->now+3) == CHR(':') && - *(v->now+4) == CHR(']') && - *(v->now+5) == CHR(']')) { - c = *(v->now+2); - v->now += 6; - NOTE(REG_UNONPOSIX); - RET((c == CHR('<')) ? '<' : '>'); - } - INTOCON(L_BRACK); - if (NEXT1('^')) { - v->now++; - RETV('[', 0); - } - RETV('[', 1); - break; - case CHR('.'): - RET('.'); - break; - case CHR('^'): - if (LASTTYPE(EMPTY)) - RET('^'); - if (LASTTYPE('(')) { - NOTE(REG_UUNSPEC); - RET('^'); - } - RETV(PLAIN, c); - break; - case CHR('$'): - if (v->cflags®_EXPANDED) - skip(v); - if (ATEOS()) - RET('$'); - if (NEXT2('\\', ')')) { - NOTE(REG_UUNSPEC); - RET('$'); - } - RETV(PLAIN, c); - break; - case CHR('\\'): - break; /* see below */ - default: - RETV(PLAIN, c); - break; + break; + case CHR('$'): + if (v->cflags & REG_EXPANDED) + skip(v); + if (ATEOS()) + RET('$'); + if (NEXT2('\\', ')')) + { + NOTE(REG_UUNSPEC); + RET('$'); + } + RETV(PLAIN, c); + break; + case CHR('\\'): + break; /* see below */ + default: + RETV(PLAIN, c); + break; } assert(c == CHR('\\')); @@ -921,39 +1030,47 @@ brenext(struct vars *v, FAILW(REG_EESCAPE); c = *v->now++; - switch (c) { - case CHR('{'): - INTOCON(L_BBND); - NOTE(REG_UBOUNDS); - RET('{'); - break; - case CHR('('): - RETV('(', 1); - break; - case CHR(')'): - RETV(')', c); - break; - case CHR('<'): - NOTE(REG_UNONPOSIX); - RET('<'); - break; - case CHR('>'): - NOTE(REG_UNONPOSIX); - RET('>'); - break; - case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'): - case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'): - case CHR('9'): - NOTE(REG_UBACKREF); - RETV(BACKREF, (chr)DIGITVAL(c)); - break; - default: - if (iscalnum(c)) { - NOTE(REG_UBSALNUM); - NOTE(REG_UUNSPEC); - } - RETV(PLAIN, c); - break; + switch (c) + { + case CHR('{'): + INTOCON(L_BBND); + NOTE(REG_UBOUNDS); + RET('{'); + break; + case CHR('('): + RETV('(', 1); + break; + case CHR(')'): + RETV(')', c); + break; + case CHR('<'): + NOTE(REG_UNONPOSIX); + RET('<'); + break; + case CHR('>'): + NOTE(REG_UNONPOSIX); + RET('>'); + break; + case CHR('1'): + case CHR('2'): + case CHR('3'): + case CHR('4'): + case CHR('5'): + case CHR('6'): + case CHR('7'): + case CHR('8'): + case CHR('9'): + NOTE(REG_UBACKREF); + RETV(BACKREF, (chr) DIGITVAL(c)); + break; + default: + if (iscalnum(c)) + { + NOTE(REG_UBSALNUM); + NOTE(REG_UUNSPEC); + } + RETV(PLAIN, c); + break; } assert(NOTREACHED); @@ -963,13 +1080,14 @@ brenext(struct vars *v, * skip - skip white space and comments in expanded form */ static void -skip(struct vars *v) +skip(struct vars * v) { - chr *start = v->now; + chr *start = v->now; - assert(v->cflags®_EXPANDED); + assert(v->cflags & REG_EXPANDED); - for (;;) { + for (;;) + { while (!ATEOS() && iscspace(*v->now)) v->now++; if (ATEOS() || *v->now != CHR('#')) @@ -1002,14 +1120,14 @@ newline(void) * use that it hardly matters. */ static chr -chrnamed(struct vars *v, - chr *startp, /* start of name */ - chr *endp, /* just past end of name */ +chrnamed(struct vars * v, + chr * startp, /* start of name */ + chr * endp, /* just past end of name */ chr lastresort) /* what to return if name lookup fails */ { - celt c; - int errsave; - int e; + celt c; + int errsave; + int e; struct cvec *cv; errsave = v->err; @@ -1019,10 +1137,10 @@ chrnamed(struct vars *v, v->err = errsave; if (e != 0) - return (chr)lastresort; + return (chr) lastresort; cv = range(v, c, c, 0); if (cv->nchrs == 0) - return (chr)lastresort; + return (chr) lastresort; return cv->chrs[0]; } diff --git a/src/backend/regex/regc_locale.c b/src/backend/regex/regc_locale.c index 41ea9fe1f29..69c7fd7214a 100644 --- a/src/backend/regex/regc_locale.c +++ b/src/backend/regex/regc_locale.c @@ -1,4 +1,4 @@ -/* +/* * regc_locale.c -- * * This file contains locale-specific regexp routines. @@ -11,7 +11,7 @@ * Corporation and other parties. The following terms apply to all files * associated with the software unless explicitly disclaimed in * individual files. - * + * * The authors hereby grant permission to use, copy, modify, distribute, * and license this software and its documentation for any purpose, provided * that existing copyright notices are retained in all copies and that this @@ -21,137 +21,332 @@ * and need not follow the licensing terms described here, provided that * the new terms are clearly indicated on the first page of each file where * they apply. - * + * * IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES * ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY * DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. - * + * * THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE + * FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE * IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE * NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR * MODIFICATIONS. - * + * * GOVERNMENT USE: If you are acquiring this software on behalf of the * U.S. government, the Government shall have only "Restricted Rights" - * in the software and related documentation as defined in the Federal - * Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you + * in the software and related documentation as defined in the Federal + * Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you * are acquiring the software on behalf of the Department of Defense, the * software shall be classified as "Commercial Computer Software" and the * Government shall have only "Restricted Rights" as defined in Clause * 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the * authors grant the U.S. Government and others acting in its behalf * permission to use and distribute the software in accordance with the - * terms specified in this license. + * terms specified in this license. * - * $Header: /cvsroot/pgsql/src/backend/regex/regc_locale.c,v 1.1 2003/02/05 17:41:32 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/regex/regc_locale.c,v 1.2 2003/08/04 00:43:21 momjian Exp $ */ /* ASCII character-name table */ -static struct cname { - char *name; - char code; -} cnames[] = { - {"NUL", '\0'}, - {"SOH", '\001'}, - {"STX", '\002'}, - {"ETX", '\003'}, - {"EOT", '\004'}, - {"ENQ", '\005'}, - {"ACK", '\006'}, - {"BEL", '\007'}, - {"alert", '\007'}, - {"BS", '\010'}, - {"backspace", '\b'}, - {"HT", '\011'}, - {"tab", '\t'}, - {"LF", '\012'}, - {"newline", '\n'}, - {"VT", '\013'}, - {"vertical-tab", '\v'}, - {"FF", '\014'}, - {"form-feed", '\f'}, - {"CR", '\015'}, - {"carriage-return", '\r'}, - {"SO", '\016'}, - {"SI", '\017'}, - {"DLE", '\020'}, - {"DC1", '\021'}, - {"DC2", '\022'}, - {"DC3", '\023'}, - {"DC4", '\024'}, - {"NAK", '\025'}, - {"SYN", '\026'}, - {"ETB", '\027'}, - {"CAN", '\030'}, - {"EM", '\031'}, - {"SUB", '\032'}, - {"ESC", '\033'}, - {"IS4", '\034'}, - {"FS", '\034'}, - {"IS3", '\035'}, - {"GS", '\035'}, - {"IS2", '\036'}, - {"RS", '\036'}, - {"IS1", '\037'}, - {"US", '\037'}, - {"space", ' '}, - {"exclamation-mark",'!'}, - {"quotation-mark", '"'}, - {"number-sign", '#'}, - {"dollar-sign", '$'}, - {"percent-sign", '%'}, - {"ampersand", '&'}, - {"apostrophe", '\''}, - {"left-parenthesis",'('}, - {"right-parenthesis", ')'}, - {"asterisk", '*'}, - {"plus-sign", '+'}, - {"comma", ','}, - {"hyphen", '-'}, - {"hyphen-minus", '-'}, - {"period", '.'}, - {"full-stop", '.'}, - {"slash", '/'}, - {"solidus", '/'}, - {"zero", '0'}, - {"one", '1'}, - {"two", '2'}, - {"three", '3'}, - {"four", '4'}, - {"five", '5'}, - {"six", '6'}, - {"seven", '7'}, - {"eight", '8'}, - {"nine", '9'}, - {"colon", ':'}, - {"semicolon", ';'}, - {"less-than-sign", '<'}, - {"equals-sign", '='}, - {"greater-than-sign", '>'}, - {"question-mark", '?'}, - {"commercial-at", '@'}, - {"left-square-bracket", '['}, - {"backslash", '\\'}, - {"reverse-solidus", '\\'}, - {"right-square-bracket", ']'}, - {"circumflex", '^'}, - {"circumflex-accent", '^'}, - {"underscore", '_'}, - {"low-line", '_'}, - {"grave-accent", '`'}, - {"left-brace", '{'}, - {"left-curly-bracket", '{'}, - {"vertical-line", '|'}, - {"right-brace", '}'}, - {"right-curly-bracket", '}'}, - {"tilde", '~'}, - {"DEL", '\177'}, - {NULL, 0} +static struct cname +{ + char *name; + char code; +} cnames[] = + +{ + { + "NUL", '\0' + }, + { + "SOH", '\001' + }, + { + "STX", '\002' + }, + { + "ETX", '\003' + }, + { + "EOT", '\004' + }, + { + "ENQ", '\005' + }, + { + "ACK", '\006' + }, + { + "BEL", '\007' + }, + { + "alert", '\007' + }, + { + "BS", '\010' + }, + { + "backspace", '\b' + }, + { + "HT", '\011' + }, + { + "tab", '\t' + }, + { + "LF", '\012' + }, + { + "newline", '\n' + }, + { + "VT", '\013' + }, + { + "vertical-tab", '\v' + }, + { + "FF", '\014' + }, + { + "form-feed", '\f' + }, + { + "CR", '\015' + }, + { + "carriage-return", '\r' + }, + { + "SO", '\016' + }, + { + "SI", '\017' + }, + { + "DLE", '\020' + }, + { + "DC1", '\021' + }, + { + "DC2", '\022' + }, + { + "DC3", '\023' + }, + { + "DC4", '\024' + }, + { + "NAK", '\025' + }, + { + "SYN", '\026' + }, + { + "ETB", '\027' + }, + { + "CAN", '\030' + }, + { + "EM", '\031' + }, + { + "SUB", '\032' + }, + { + "ESC", '\033' + }, + { + "IS4", '\034' + }, + { + "FS", '\034' + }, + { + "IS3", '\035' + }, + { + "GS", '\035' + }, + { + "IS2", '\036' + }, + { + "RS", '\036' + }, + { + "IS1", '\037' + }, + { + "US", '\037' + }, + { + "space", ' ' + }, + { + "exclamation-mark", '!' + }, + { + "quotation-mark", '"' + }, + { + "number-sign", '#' + }, + { + "dollar-sign", '$' + }, + { + "percent-sign", '%' + }, + { + "ampersand", '&' + }, + { + "apostrophe", '\'' + }, + { + "left-parenthesis", '(' + }, + { + "right-parenthesis", ')' + }, + { + "asterisk", '*' + }, + { + "plus-sign", '+' + }, + { + "comma", ',' + }, + { + "hyphen", '-' + }, + { + "hyphen-minus", '-' + }, + { + "period", '.' + }, + { + "full-stop", '.' + }, + { + "slash", '/' + }, + { + "solidus", '/' + }, + { + "zero", '0' + }, + { + "one", '1' + }, + { + "two", '2' + }, + { + "three", '3' + }, + { + "four", '4' + }, + { + "five", '5' + }, + { + "six", '6' + }, + { + "seven", '7' + }, + { + "eight", '8' + }, + { + "nine", '9' + }, + { + "colon", ':' + }, + { + "semicolon", ';' + }, + { + "less-than-sign", '<' + }, + { + "equals-sign", '=' + }, + { + "greater-than-sign", '>' + }, + { + "question-mark", '?' + }, + { + "commercial-at", '@' + }, + { + "left-square-bracket", '[' + }, + { + "backslash", '\\' + }, + { + "reverse-solidus", '\\' + }, + { + "right-square-bracket", ']' + }, + { + "circumflex", '^' + }, + { + "circumflex-accent", '^' + }, + { + "underscore", '_' + }, + { + "low-line", '_' + }, + { + "grave-accent", '`' + }, + { + "left-brace", '{' + }, + { + "left-curly-bracket", '{' + }, + { + "vertical-line", '|' + }, + { + "right-brace", '}' + }, + { + "right-curly-bracket", '}' + }, + { + "tilde", '~' + }, + { + "DEL", '\177' + }, + { + NULL, 0 + } }; /* @@ -226,132 +421,134 @@ pg_tolower(pg_wchar c) * nmcces - how many distinct MCCEs are there? */ static int -nmcces(struct vars *v) +nmcces(struct vars * v) { - /* - * No multi-character collating elements defined at the moment. - */ - return 0; + /* + * No multi-character collating elements defined at the moment. + */ + return 0; } /* * nleaders - how many chrs can be first chrs of MCCEs? */ static int -nleaders(struct vars *v) +nleaders(struct vars * v) { - return 0; + return 0; } /* * allmcces - return a cvec with all the MCCEs of the locale */ static struct cvec * -allmcces(struct vars *v, /* context */ - struct cvec *cv) /* this is supposed to have enough room */ +allmcces(struct vars * v, /* context */ + struct cvec * cv) /* this is supposed to have enough room */ { - return clearcvec(cv); + return clearcvec(cv); } /* * element - map collating-element name to celt */ static celt -element(struct vars *v, /* context */ - chr *startp, /* points to start of name */ - chr *endp) /* points just past end of name */ +element(struct vars * v, /* context */ + chr * startp, /* points to start of name */ + chr * endp) /* points just past end of name */ { - struct cname *cn; - size_t len; - - /* generic: one-chr names stand for themselves */ - assert(startp < endp); - len = endp - startp; - if (len == 1) { - return *startp; - } - - NOTE(REG_ULOCALE); - - /* search table */ - for (cn=cnames; cn->name!=NULL; cn++) { - if (strlen(cn->name)==len && - pg_char_and_wchar_strncmp(cn->name, startp, len)==0) { - break; /* NOTE BREAK OUT */ + struct cname *cn; + size_t len; + + /* generic: one-chr names stand for themselves */ + assert(startp < endp); + len = endp - startp; + if (len == 1) + return *startp; + + NOTE(REG_ULOCALE); + + /* search table */ + for (cn = cnames; cn->name != NULL; cn++) + { + if (strlen(cn->name) == len && + pg_char_and_wchar_strncmp(cn->name, startp, len) == 0) + { + break; /* NOTE BREAK OUT */ + } } - } - if (cn->name != NULL) { - return CHR(cn->code); - } - - /* couldn't find it */ - ERR(REG_ECOLLATE); - return 0; + if (cn->name != NULL) + return CHR(cn->code); + + /* couldn't find it */ + ERR(REG_ECOLLATE); + return 0; } /* * range - supply cvec for a range, including legality check */ static struct cvec * -range(struct vars *v, /* context */ - celt a, /* range start */ - celt b, /* range end, might equal a */ +range(struct vars * v, /* context */ + celt a, /* range start */ + celt b, /* range end, might equal a */ int cases) /* case-independent? */ { - int nchrs; - struct cvec *cv; - celt c, lc, uc; - - if (a != b && !before(a, b)) { - ERR(REG_ERANGE); - return NULL; - } + int nchrs; + struct cvec *cv; + celt c, + lc, + uc; + + if (a != b && !before(a, b)) + { + ERR(REG_ERANGE); + return NULL; + } - if (!cases) { /* easy version */ - cv = getcvec(v, 0, 1, 0); - NOERRN(); - addrange(cv, a, b); - return cv; - } + if (!cases) + { /* easy version */ + cv = getcvec(v, 0, 1, 0); + NOERRN(); + addrange(cv, a, b); + return cv; + } - /* - * When case-independent, it's hard to decide when cvec ranges are - * usable, so for now at least, we won't try. We allocate enough - * space for two case variants plus a little extra for the two - * title case variants. - */ + /* + * When case-independent, it's hard to decide when cvec ranges are + * usable, so for now at least, we won't try. We allocate enough + * space for two case variants plus a little extra for the two title + * case variants. + */ - nchrs = (b - a + 1)*2 + 4; + nchrs = (b - a + 1) * 2 + 4; - cv = getcvec(v, nchrs, 0, 0); - NOERRN(); + cv = getcvec(v, nchrs, 0, 0); + NOERRN(); - for (c=a; c<=b; c++) { - addchr(cv, c); - lc = pg_tolower((chr)c); - if (c != lc) { - addchr(cv, lc); - } - uc = pg_toupper((chr)c); - if (c != uc) { - addchr(cv, uc); + for (c = a; c <= b; c++) + { + addchr(cv, c); + lc = pg_tolower((chr) c); + if (c != lc) + addchr(cv, lc); + uc = pg_toupper((chr) c); + if (c != uc) + addchr(cv, uc); } - } - return cv; + return cv; } /* * before - is celt x before celt y, for purposes of range legality? */ -static int /* predicate */ +static int /* predicate */ before(celt x, celt y) { - /* trivial because no MCCEs */ - if (x < y) { - return 1; - } - return 0; + /* trivial because no MCCEs */ + if (x < y) + return 1; + return 0; } /* @@ -359,33 +556,34 @@ before(celt x, celt y) * Must include case counterparts on request. */ static struct cvec * -eclass(struct vars *v, /* context */ - celt c, /* Collating element representing - * the equivalence class. */ +eclass(struct vars * v, /* context */ + celt c, /* Collating element representing the + * equivalence class. */ int cases) /* all cases? */ { - struct cvec *cv; - - /* crude fake equivalence class for testing */ - if ((v->cflags®_FAKE) && c == 'x') { - cv = getcvec(v, 4, 0, 0); - addchr(cv, (chr)'x'); - addchr(cv, (chr)'y'); - if (cases) { - addchr(cv, (chr)'X'); - addchr(cv, (chr)'Y'); + struct cvec *cv; + + /* crude fake equivalence class for testing */ + if ((v->cflags & REG_FAKE) && c == 'x') + { + cv = getcvec(v, 4, 0, 0); + addchr(cv, (chr) 'x'); + addchr(cv, (chr) 'y'); + if (cases) + { + addchr(cv, (chr) 'X'); + addchr(cv, (chr) 'Y'); + } + return cv; } + + /* otherwise, none */ + if (cases) + return allcases(v, c); + cv = getcvec(v, 1, 0, 0); + assert(cv != NULL); + addchr(cv, (chr) c); return cv; - } - - /* otherwise, none */ - if (cases) { - return allcases(v, c); - } - cv = getcvec(v, 1, 0, 0); - assert(cv != NULL); - addchr(cv, (chr)c); - return cv; } /* @@ -394,164 +592,182 @@ eclass(struct vars *v, /* context */ * Must include case counterparts on request. */ static struct cvec * -cclass(struct vars *v, /* context */ - chr *startp, /* where the name starts */ - chr *endp, /* just past the end of the name */ +cclass(struct vars * v, /* context */ + chr * startp, /* where the name starts */ + chr * endp, /* just past the end of the name */ int cases) /* case-independent? */ { - size_t len; - struct cvec *cv = NULL; - char **namePtr; - int i, index; - - /* - * The following arrays define the valid character class names. - */ - - static char *classNames[] = { - "alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph", - "lower", "print", "punct", "space", "upper", "xdigit", NULL - }; - - enum classes { - CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH, - CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT - }; - - /* - * Map the name to the corresponding enumerated value. - */ - len = endp - startp; - index = -1; - for (namePtr=classNames,i=0 ; *namePtr!=NULL ; namePtr++,i++) { - if (strlen(*namePtr) == len && - pg_char_and_wchar_strncmp(*namePtr, startp, len) == 0) { - index = i; - break; + size_t len; + struct cvec *cv = NULL; + char **namePtr; + int i, + index; + + /* + * The following arrays define the valid character class names. + */ + + static char *classNames[] = { + "alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph", + "lower", "print", "punct", "space", "upper", "xdigit", NULL + }; + + enum classes + { + CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH, + CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT + }; + + /* + * Map the name to the corresponding enumerated value. + */ + len = endp - startp; + index = -1; + for (namePtr = classNames, i = 0; *namePtr != NULL; namePtr++, i++) + { + if (strlen(*namePtr) == len && + pg_char_and_wchar_strncmp(*namePtr, startp, len) == 0) + { + index = i; + break; + } + } + if (index == -1) + { + ERR(REG_ECTYPE); + return NULL; } - } - if (index == -1) { - ERR(REG_ECTYPE); - return NULL; - } - /* - * Remap lower and upper to alpha if the match is case insensitive. - */ + /* + * Remap lower and upper to alpha if the match is case insensitive. + */ - if (cases && + if (cases && ((enum classes) index == CC_LOWER || (enum classes) index == CC_UPPER)) index = (int) CC_ALPHA; - - /* - * Now compute the character class contents. + + /* + * Now compute the character class contents. * * For the moment, assume that only char codes < 256 can be in these * classes. - */ - - switch((enum classes) index) { - case CC_PRINT: - case CC_ALNUM: - cv = getcvec(v, UCHAR_MAX, 1, 0); - if (cv) { - for (i=0 ; i<= UCHAR_MAX ; i++) { - if (pg_isalpha((chr) i)) - addchr(cv, (chr) i); - } - addrange(cv, (chr) '0', (chr) '9'); - } - break; - case CC_ALPHA: - cv = getcvec(v, UCHAR_MAX, 0, 0); - if (cv) { - for (i=0 ; i<= UCHAR_MAX ; i++) { - if (pg_isalpha((chr) i)) - addchr(cv, (chr) i); - } - } - break; - case CC_ASCII: - cv = getcvec(v, 0, 1, 0); - if (cv) { - addrange(cv, 0, 0x7f); - } - break; - case CC_BLANK: - cv = getcvec(v, 2, 0, 0); - addchr(cv, '\t'); - addchr(cv, ' '); - break; - case CC_CNTRL: - cv = getcvec(v, 0, 2, 0); - addrange(cv, 0x0, 0x1f); - addrange(cv, 0x7f, 0x9f); - break; - case CC_DIGIT: - cv = getcvec(v, 0, 1, 0); - if (cv) { - addrange(cv, (chr) '0', (chr) '9'); + */ + + switch ((enum classes) index) + { + case CC_PRINT: + case CC_ALNUM: + cv = getcvec(v, UCHAR_MAX, 1, 0); + if (cv) + { + for (i = 0; i <= UCHAR_MAX; i++) + { + if (pg_isalpha((chr) i)) + addchr(cv, (chr) i); + } + addrange(cv, (chr) '0', (chr) '9'); + } + break; + case CC_ALPHA: + cv = getcvec(v, UCHAR_MAX, 0, 0); + if (cv) + { + for (i = 0; i <= UCHAR_MAX; i++) + { + if (pg_isalpha((chr) i)) + addchr(cv, (chr) i); + } + } + break; + case CC_ASCII: + cv = getcvec(v, 0, 1, 0); + if (cv) + addrange(cv, 0, 0x7f); + break; + case CC_BLANK: + cv = getcvec(v, 2, 0, 0); + addchr(cv, '\t'); + addchr(cv, ' '); + break; + case CC_CNTRL: + cv = getcvec(v, 0, 2, 0); + addrange(cv, 0x0, 0x1f); + addrange(cv, 0x7f, 0x9f); + break; + case CC_DIGIT: + cv = getcvec(v, 0, 1, 0); + if (cv) + addrange(cv, (chr) '0', (chr) '9'); + break; + case CC_PUNCT: + cv = getcvec(v, UCHAR_MAX, 0, 0); + if (cv) + { + for (i = 0; i <= UCHAR_MAX; i++) + { + if (pg_ispunct((chr) i)) + addchr(cv, (chr) i); + } + } + break; + case CC_XDIGIT: + cv = getcvec(v, 0, 3, 0); + if (cv) + { + addrange(cv, '0', '9'); + addrange(cv, 'a', 'f'); + addrange(cv, 'A', 'F'); + } + break; + case CC_SPACE: + cv = getcvec(v, UCHAR_MAX, 0, 0); + if (cv) + { + for (i = 0; i <= UCHAR_MAX; i++) + { + if (pg_isspace((chr) i)) + addchr(cv, (chr) i); + } + } + break; + case CC_LOWER: + cv = getcvec(v, UCHAR_MAX, 0, 0); + if (cv) + { + for (i = 0; i <= UCHAR_MAX; i++) + { + if (pg_islower((chr) i)) + addchr(cv, (chr) i); + } + } + break; + case CC_UPPER: + cv = getcvec(v, UCHAR_MAX, 0, 0); + if (cv) + { + for (i = 0; i <= UCHAR_MAX; i++) + { + if (pg_isupper((chr) i)) + addchr(cv, (chr) i); + } + } + break; + case CC_GRAPH: + cv = getcvec(v, UCHAR_MAX, 0, 0); + if (cv) + { + for (i = 0; i <= UCHAR_MAX; i++) + { + if (pg_isgraph((chr) i)) + addchr(cv, (chr) i); + } + } + break; } - break; - case CC_PUNCT: - cv = getcvec(v, UCHAR_MAX, 0, 0); - if (cv) { - for (i=0 ; i<= UCHAR_MAX ; i++) { - if (pg_ispunct((chr) i)) - addchr(cv, (chr) i); - } - } - break; - case CC_XDIGIT: - cv = getcvec(v, 0, 3, 0); - if (cv) { - addrange(cv, '0', '9'); - addrange(cv, 'a', 'f'); - addrange(cv, 'A', 'F'); - } - break; - case CC_SPACE: - cv = getcvec(v, UCHAR_MAX, 0, 0); - if (cv) { - for (i=0 ; i<= UCHAR_MAX ; i++) { - if (pg_isspace((chr) i)) - addchr(cv, (chr) i); - } - } - break; - case CC_LOWER: - cv = getcvec(v, UCHAR_MAX, 0, 0); - if (cv) { - for (i=0 ; i<= UCHAR_MAX ; i++) { - if (pg_islower((chr) i)) - addchr(cv, (chr) i); - } - } - break; - case CC_UPPER: - cv = getcvec(v, UCHAR_MAX, 0, 0); - if (cv) { - for (i=0 ; i<= UCHAR_MAX ; i++) { - if (pg_isupper((chr) i)) - addchr(cv, (chr) i); - } - } - break; - case CC_GRAPH: - cv = getcvec(v, UCHAR_MAX, 0, 0); - if (cv) { - for (i=0 ; i<= UCHAR_MAX ; i++) { - if (pg_isgraph((chr) i)) - addchr(cv, (chr) i); - } - } - break; - } - if (cv == NULL) { - ERR(REG_ESPACE); - } - return cv; + if (cv == NULL) + ERR(REG_ESPACE); + return cv; } /* @@ -561,37 +777,37 @@ cclass(struct vars *v, /* context */ * messy cases are done via range(). */ static struct cvec * -allcases(struct vars *v, /* context */ +allcases(struct vars * v, /* context */ chr pc) /* character to get case equivs of */ { - struct cvec *cv; - chr c = (chr)pc; - chr lc, uc; + struct cvec *cv; + chr c = (chr) pc; + chr lc, + uc; - lc = pg_tolower((chr)c); - uc = pg_toupper((chr)c); + lc = pg_tolower((chr) c); + uc = pg_toupper((chr) c); cv = getcvec(v, 2, 0, 0); - addchr(cv, lc); - if (lc != uc) { - addchr(cv, uc); - } - return cv; + addchr(cv, lc); + if (lc != uc) + addchr(cv, uc); + return cv; } /* * cmp - chr-substring compare * - * Backrefs need this. It should preferably be efficient. + * Backrefs need this. It should preferably be efficient. * Note that it does not need to report anything except equal/unequal. * Note also that the length is exact, and the comparison should not * stop at embedded NULs! */ -static int /* 0 for equal, nonzero for unequal */ -cmp(const chr *x, const chr *y, /* strings to compare */ - size_t len) /* exact length of comparison */ +static int /* 0 for equal, nonzero for unequal */ +cmp(const chr * x, const chr * y, /* strings to compare */ + size_t len) /* exact length of comparison */ { - return memcmp(VS(x), VS(y), len*sizeof(chr)); + return memcmp(VS(x), VS(y), len * sizeof(chr)); } /* @@ -602,14 +818,14 @@ cmp(const chr *x, const chr *y, /* strings to compare */ * Note also that the length is exact, and the comparison should not * stop at embedded NULs! */ -static int /* 0 for equal, nonzero for unequal */ -casecmp(const chr *x, const chr *y, /* strings to compare */ +static int /* 0 for equal, nonzero for unequal */ +casecmp(const chr * x, const chr * y, /* strings to compare */ size_t len) /* exact length of comparison */ { - for (; len > 0; len--, x++, y++) { - if ((*x!=*y) && (pg_tolower(*x) != pg_tolower(*y))) { - return 1; + for (; len > 0; len--, x++, y++) + { + if ((*x != *y) && (pg_tolower(*x) != pg_tolower(*y))) + return 1; } - } - return 0; + return 0; } diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c index 43e01ebe92b..51fd8bfb859 100644 --- a/src/backend/regex/regc_nfa.c +++ b/src/backend/regex/regc_nfa.c @@ -2,21 +2,21 @@ * NFA utilities. * This file is #included by regcomp.c. * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * + * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. - * + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -28,7 +28,7 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Header: /cvsroot/pgsql/src/backend/regex/regc_nfa.c,v 1.1 2003/02/05 17:41:32 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/regex/regc_nfa.c,v 1.2 2003/08/04 00:43:21 momjian Exp $ * * * One or two things that technically ought to be in here @@ -36,21 +36,21 @@ * the color chains. */ -#define NISERR() VISERR(nfa->v) -#define NERR(e) VERR(nfa->v, (e)) +#define NISERR() VISERR(nfa->v) +#define NERR(e) VERR(nfa->v, (e)) /* * newnfa - set up an NFA */ -static struct nfa * /* the NFA, or NULL */ -newnfa(struct vars *v, - struct colormap *cm, - struct nfa *parent) /* NULL if primary NFA */ +static struct nfa * /* the NFA, or NULL */ +newnfa(struct vars * v, + struct colormap * cm, + struct nfa * parent) /* NULL if primary NFA */ { struct nfa *nfa; - nfa = (struct nfa *)MALLOC(sizeof(struct nfa)); + nfa = (struct nfa *) MALLOC(sizeof(struct nfa)); if (nfa == NULL) return NULL; @@ -66,9 +66,10 @@ newnfa(struct vars *v, nfa->pre = newfstate(nfa, '>'); /* number 1 */ nfa->parent = parent; - nfa->init = newstate(nfa); /* may become invalid later */ + nfa->init = newstate(nfa); /* may become invalid later */ nfa->final = newstate(nfa); - if (ISERR()) { + if (ISERR()) + { freenfa(nfa); return NULL; } @@ -79,7 +80,8 @@ newnfa(struct vars *v, newarc(nfa, '$', 1, nfa->final, nfa->post); newarc(nfa, '$', 0, nfa->final, nfa->post); - if (ISERR()) { + if (ISERR()) + { freenfa(nfa); return NULL; } @@ -90,15 +92,17 @@ newnfa(struct vars *v, * freenfa - free an entire NFA */ static void -freenfa(struct nfa *nfa) +freenfa(struct nfa * nfa) { struct state *s; - while ((s = nfa->states) != NULL) { - s->nins = s->nouts = 0; /* don't worry about arcs */ + while ((s = nfa->states) != NULL) + { + s->nins = s->nouts = 0; /* don't worry about arcs */ freestate(nfa, s); } - while ((s = nfa->free) != NULL) { + while ((s = nfa->free) != NULL) + { nfa->free = s->next; destroystate(nfa, s); } @@ -113,17 +117,21 @@ freenfa(struct nfa *nfa) /* * newstate - allocate an NFA state, with zero flag value */ -static struct state * /* NULL on error */ -newstate(struct nfa *nfa) +static struct state * /* NULL on error */ +newstate(struct nfa * nfa) { struct state *s; - if (nfa->free != NULL) { + if (nfa->free != NULL) + { s = nfa->free; nfa->free = s->next; - } else { - s = (struct state *)MALLOC(sizeof(struct state)); - if (s == NULL) { + } + else + { + s = (struct state *) MALLOC(sizeof(struct state)); + if (s == NULL) + { NERR(REG_ESPACE); return NULL; } @@ -143,7 +151,8 @@ newstate(struct nfa *nfa) s->outs = NULL; s->tmp = NULL; s->next = NULL; - if (nfa->slast != NULL) { + if (nfa->slast != NULL) + { assert(nfa->slast->next == NULL); nfa->slast->next = s; } @@ -155,14 +164,14 @@ newstate(struct nfa *nfa) /* * newfstate - allocate an NFA state with a specified flag value */ -static struct state * /* NULL on error */ -newfstate(struct nfa *nfa, int flag) +static struct state * /* NULL on error */ +newfstate(struct nfa * nfa, int flag) { struct state *s; s = newstate(nfa); if (s != NULL) - s->flag = (char)flag; + s->flag = (char) flag; return s; } @@ -170,8 +179,8 @@ newfstate(struct nfa *nfa, int flag) * dropstate - delete a state's inarcs and outarcs and free it */ static void -dropstate(struct nfa *nfa, - struct state *s) +dropstate(struct nfa * nfa, + struct state * s) { struct arc *a; @@ -186,8 +195,8 @@ dropstate(struct nfa *nfa, * freestate - free a state, which has no in-arcs or out-arcs */ static void -freestate(struct nfa *nfa, - struct state *s) +freestate(struct nfa * nfa, + struct state * s) { assert(s != NULL); assert(s->nins == 0 && s->nouts == 0); @@ -196,18 +205,21 @@ freestate(struct nfa *nfa, s->flag = 0; if (s->next != NULL) s->next->prev = s->prev; - else { + else + { assert(s == nfa->slast); nfa->slast = s->prev; } if (s->prev != NULL) s->prev->next = s->next; - else { + else + { assert(s == nfa->states); nfa->states = s->next; } s->prev = NULL; - s->next = nfa->free; /* don't delete it, put it on the free list */ + s->next = nfa->free; /* don't delete it, put it on the free + * list */ nfa->free = s; } @@ -215,14 +227,15 @@ freestate(struct nfa *nfa, * destroystate - really get rid of an already-freed state */ static void -destroystate(struct nfa *nfa, - struct state *s) +destroystate(struct nfa * nfa, + struct state * s) { struct arcbatch *ab; struct arcbatch *abnext; assert(s->no == FREESTATE); - for (ab = s->oas.next; ab != NULL; ab = abnext) { + for (ab = s->oas.next; ab != NULL; ab = abnext) + { abnext = ab->next; FREE(ab); } @@ -236,11 +249,11 @@ destroystate(struct nfa *nfa, * newarc - set up a new arc within an NFA */ static void -newarc(struct nfa *nfa, +newarc(struct nfa * nfa, int t, pcolor co, - struct state *from, - struct state *to) + struct state * from, + struct state * to) { struct arc *a; @@ -257,13 +270,13 @@ newarc(struct nfa *nfa, assert(a != NULL); a->type = t; - a->co = (color)co; + a->co = (color) co; a->to = to; a->from = from; /* - * Put the new arc on the beginning, not the end, of the chains. - * Not only is this easier, it has the very useful side effect that + * Put the new arc on the beginning, not the end, of the chains. Not + * only is this easier, it has the very useful side effect that * deleting the most-recently-added arc is the cheapest case rather * than the most expensive one. */ @@ -284,36 +297,40 @@ newarc(struct nfa *nfa, /* * allocarc - allocate a new out-arc within a state */ -static struct arc * /* NULL for failure */ -allocarc(struct nfa *nfa, - struct state *s) +static struct arc * /* NULL for failure */ +allocarc(struct nfa * nfa, + struct state * s) { struct arc *a; struct arcbatch *new; - int i; + int i; /* shortcut */ - if (s->free == NULL && s->noas < ABSIZE) { + if (s->free == NULL && s->noas < ABSIZE) + { a = &s->oas.a[s->noas]; s->noas++; return a; } /* if none at hand, get more */ - if (s->free == NULL) { - new = (struct arcbatch *)MALLOC(sizeof(struct arcbatch)); - if (new == NULL) { + if (s->free == NULL) + { + new = (struct arcbatch *) MALLOC(sizeof(struct arcbatch)); + if (new == NULL) + { NERR(REG_ESPACE); return NULL; } new->next = s->oas.next; s->oas.next = new; - for (i = 0; i < ABSIZE; i++) { + for (i = 0; i < ABSIZE; i++) + { new->a[i].type = 0; - new->a[i].freechain = &new->a[i+1]; + new->a[i].freechain = &new->a[i + 1]; } - new->a[ABSIZE-1].freechain = NULL; + new->a[ABSIZE - 1].freechain = NULL; s->free = &new->a[0]; } assert(s->free != NULL); @@ -327,8 +344,8 @@ allocarc(struct nfa *nfa, * freearc - free an arc */ static void -freearc(struct nfa *nfa, - struct arc *victim) +freearc(struct nfa * nfa, + struct arc * victim) { struct state *from = victim->from; struct state *to = victim->to; @@ -344,9 +361,10 @@ freearc(struct nfa *nfa, assert(from != NULL); assert(from->outs != NULL); a = from->outs; - if (a == victim) /* simple case: first in chain */ + if (a == victim) /* simple case: first in chain */ from->outs = victim->outchain; - else { + else + { for (; a != NULL && a->outchain != victim; a = a->outchain) continue; assert(a != NULL); @@ -358,9 +376,10 @@ freearc(struct nfa *nfa, assert(to != NULL); assert(to->ins != NULL); a = to->ins; - if (a == victim) /* simple case: first in chain */ + if (a == victim) /* simple case: first in chain */ to->ins = victim->inchain; - else { + else + { for (; a != NULL && a->inchain != victim; a = a->inchain) continue; assert(a != NULL); @@ -383,7 +402,7 @@ freearc(struct nfa *nfa, * If there is more than one such arc, the result is random. */ static struct arc * -findarc(struct state *s, +findarc(struct state * s, int type, pcolor co) { @@ -399,10 +418,10 @@ findarc(struct state *s, * cparc - allocate a new arc within an NFA, copying details from old one */ static void -cparc(struct nfa *nfa, - struct arc *oa, - struct state *from, - struct state *to) +cparc(struct nfa * nfa, + struct arc * oa, + struct state * from, + struct state * to) { newarc(nfa, oa->type, oa->co, from, to); } @@ -416,15 +435,16 @@ cparc(struct nfa *nfa, * ones to exploit the suppression built into newarc. */ static void -moveins(struct nfa *nfa, - struct state *old, - struct state *new) +moveins(struct nfa * nfa, + struct state * old, + struct state * new) { struct arc *a; assert(old != new); - while ((a = old->ins) != NULL) { + while ((a = old->ins) != NULL) + { cparc(nfa, a, a->from, new); freearc(nfa, a); } @@ -436,9 +456,9 @@ moveins(struct nfa *nfa, * copyins - copy all in arcs of a state to another state */ static void -copyins(struct nfa *nfa, - struct state *old, - struct state *new) +copyins(struct nfa * nfa, + struct state * old, + struct state * new) { struct arc *a; @@ -452,15 +472,16 @@ copyins(struct nfa *nfa, * moveouts - move all out arcs of a state to another state */ static void -moveouts(struct nfa *nfa, - struct state *old, - struct state *new) +moveouts(struct nfa * nfa, + struct state * old, + struct state * new) { struct arc *a; assert(old != new); - while ((a = old->outs) != NULL) { + while ((a = old->outs) != NULL) + { cparc(nfa, a, new, a->to); freearc(nfa, a); } @@ -470,9 +491,9 @@ moveouts(struct nfa *nfa, * copyouts - copy all out arcs of a state to another state */ static void -copyouts(struct nfa *nfa, - struct state *old, - struct state *new) +copyouts(struct nfa * nfa, + struct state * old, + struct state * new) { struct arc *a; @@ -486,10 +507,10 @@ copyouts(struct nfa *nfa, * cloneouts - copy out arcs of a state to another state pair, modifying type */ static void -cloneouts(struct nfa *nfa, - struct state *old, - struct state *from, - struct state *to, +cloneouts(struct nfa * nfa, + struct state * old, + struct state * from, + struct state * to, int type) { struct arc *a; @@ -507,20 +528,20 @@ cloneouts(struct nfa *nfa, * states using their tmp pointer. */ static void -delsub(struct nfa *nfa, - struct state *lp, /* the sub-NFA goes from here... */ - struct state *rp) /* ...to here, *not* inclusive */ +delsub(struct nfa * nfa, + struct state * lp, /* the sub-NFA goes from here... */ + struct state * rp) /* ...to here, *not* inclusive */ { assert(lp != rp); - rp->tmp = rp; /* mark end */ + rp->tmp = rp; /* mark end */ deltraverse(nfa, lp, lp); assert(lp->nouts == 0 && rp->nins == 0); /* did the job */ - assert(lp->no != FREESTATE && rp->no != FREESTATE); /* no more */ + assert(lp->no != FREESTATE && rp->no != FREESTATE); /* no more */ - rp->tmp = NULL; /* unmark end */ - lp->tmp = NULL; /* and begin, marked by deltraverse */ + rp->tmp = NULL; /* unmark end */ + lp->tmp = NULL; /* and begin, marked by deltraverse */ } /* @@ -528,36 +549,38 @@ delsub(struct nfa *nfa, * This routine's basic job is to destroy all out-arcs of the state. */ static void -deltraverse(struct nfa *nfa, - struct state *leftend, - struct state *s) +deltraverse(struct nfa * nfa, + struct state * leftend, + struct state * s) { struct arc *a; struct state *to; if (s->nouts == 0) - return; /* nothing to do */ + return; /* nothing to do */ if (s->tmp != NULL) - return; /* already in progress */ + return; /* already in progress */ - s->tmp = s; /* mark as in progress */ + s->tmp = s; /* mark as in progress */ - while ((a = s->outs) != NULL) { + while ((a = s->outs) != NULL) + { to = a->to; deltraverse(nfa, leftend, to); assert(to->nouts == 0 || to->tmp != NULL); freearc(nfa, a); - if (to->nins == 0 && to->tmp == NULL) { + if (to->nins == 0 && to->tmp == NULL) + { assert(to->nouts == 0); freestate(nfa, to); } } - assert(s->no != FREESTATE); /* we're still here */ - assert(s == leftend || s->nins != 0); /* and still reachable */ + assert(s->no != FREESTATE); /* we're still here */ + assert(s == leftend || s->nins != 0); /* and still reachable */ assert(s->nouts == 0); /* but have no outarcs */ - s->tmp = NULL; /* we're done here */ + s->tmp = NULL; /* we're done here */ } /* @@ -568,13 +591,14 @@ deltraverse(struct nfa *nfa, * it's a state pointer, didn't you? :-)) */ static void -dupnfa(struct nfa *nfa, - struct state *start, /* duplicate of subNFA starting here */ - struct state *stop, /* and stopping here */ - struct state *from, /* stringing duplicate from here */ - struct state *to) /* to here */ +dupnfa(struct nfa * nfa, + struct state * start, /* duplicate of subNFA starting here */ + struct state * stop, /* and stopping here */ + struct state * from, /* stringing duplicate from here */ + struct state * to) /* to here */ { - if (start == stop) { + if (start == stop) + { newarc(nfa, EMPTY, 0, from, to); return; } @@ -591,23 +615,25 @@ dupnfa(struct nfa *nfa, * duptraverse - recursive heart of dupnfa */ static void -duptraverse(struct nfa *nfa, - struct state *s, - struct state *stmp) /* s's duplicate, or NULL */ +duptraverse(struct nfa * nfa, + struct state * s, + struct state * stmp) /* s's duplicate, or NULL */ { struct arc *a; if (s->tmp != NULL) - return; /* already done */ + return; /* already done */ s->tmp = (stmp == NULL) ? newstate(nfa) : stmp; - if (s->tmp == NULL) { + if (s->tmp == NULL) + { assert(NISERR()); return; } - for (a = s->outs; a != NULL && !NISERR(); a = a->outchain) { - duptraverse(nfa, a->to, (struct state *)NULL); + for (a = s->outs; a != NULL && !NISERR(); a = a->outchain) + { + duptraverse(nfa, a->to, (struct state *) NULL); assert(a->to->tmp != NULL); cparc(nfa, a, s->tmp, a->to->tmp); } @@ -617,8 +643,8 @@ duptraverse(struct nfa *nfa, * cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set */ static void -cleartraverse(struct nfa *nfa, - struct state *s) +cleartraverse(struct nfa * nfa, + struct state * s) { struct arc *a; @@ -634,15 +660,18 @@ cleartraverse(struct nfa *nfa, * specialcolors - fill in special colors for an NFA */ static void -specialcolors(struct nfa *nfa) +specialcolors(struct nfa * nfa) { /* false colors for BOS, BOL, EOS, EOL */ - if (nfa->parent == NULL) { + if (nfa->parent == NULL) + { nfa->bos[0] = pseudocolor(nfa->cm); nfa->bos[1] = pseudocolor(nfa->cm); nfa->eos[0] = pseudocolor(nfa->cm); nfa->eos[1] = pseudocolor(nfa->cm); - } else { + } + else + { assert(nfa->parent->bos[0] != COLORLESS); nfa->bos[0] = nfa->parent->bos[0]; assert(nfa->parent->bos[1] != COLORLESS); @@ -657,57 +686,60 @@ specialcolors(struct nfa *nfa) /* * optimize - optimize an NFA */ -static long /* re_info bits */ -optimize(struct nfa *nfa, +static long /* re_info bits */ +optimize(struct nfa * nfa, FILE *f) /* for debug output; NULL none */ { #ifdef REG_DEBUG - int verbose = (f != NULL) ? 1 : 0; + int verbose = (f != NULL) ? 1 : 0; if (verbose) fprintf(f, "\ninitial cleanup:\n"); #endif - cleanup(nfa); /* may simplify situation */ + cleanup(nfa); /* may simplify situation */ #ifdef REG_DEBUG if (verbose) dumpnfa(nfa, f); if (verbose) fprintf(f, "\nempties:\n"); #endif - fixempties(nfa, f); /* get rid of EMPTY arcs */ + fixempties(nfa, f); /* get rid of EMPTY arcs */ #ifdef REG_DEBUG if (verbose) fprintf(f, "\nconstraints:\n"); #endif - pullback(nfa, f); /* pull back constraints backward */ - pushfwd(nfa, f); /* push fwd constraints forward */ + pullback(nfa, f); /* pull back constraints backward */ + pushfwd(nfa, f); /* push fwd constraints forward */ #ifdef REG_DEBUG if (verbose) fprintf(f, "\nfinal cleanup:\n"); #endif - cleanup(nfa); /* final tidying */ - return analyze(nfa); /* and analysis */ + cleanup(nfa); /* final tidying */ + return analyze(nfa); /* and analysis */ } /* * pullback - pull back constraints backward to (with luck) eliminate them */ static void -pullback(struct nfa *nfa, - FILE *f) /* for debug output; NULL none */ +pullback(struct nfa * nfa, + FILE *f) /* for debug output; NULL none */ { struct state *s; struct state *nexts; struct arc *a; struct arc *nexta; - int progress; + int progress; /* find and pull until there are no more */ - do { + do + { progress = 0; - for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { + for (s = nfa->states; s != NULL && !NISERR(); s = nexts) + { nexts = s->next; - for (a = s->outs; a != NULL && !NISERR(); a = nexta) { + for (a = s->outs; a != NULL && !NISERR(); a = nexta) + { nexta = a->outchain; if (a->type == '^' || a->type == BEHIND) if (pull(nfa, a)) @@ -721,9 +753,11 @@ pullback(struct nfa *nfa, if (NISERR()) return; - for (a = nfa->pre->outs; a != NULL; a = nexta) { + for (a = nfa->pre->outs; a != NULL; a = nexta) + { nexta = a->outchain; - if (a->type == '^') { + if (a->type == '^') + { assert(a->co == 0 || a->co == 1); newarc(nfa, PLAIN, nfa->bos[a->co], a->from, a->to); freearc(nfa, a); @@ -737,9 +771,9 @@ pullback(struct nfa *nfa, * one state -- the constraint's from state -- and only if the constraint * was that state's last outarc. */ -static int /* 0 couldn't, 1 could */ -pull(struct nfa *nfa, - struct arc *con) +static int /* 0 couldn't, 1 could */ +pull(struct nfa * nfa, + struct arc * con) { struct state *from = con->from; struct state *to = con->to; @@ -747,25 +781,28 @@ pull(struct nfa *nfa, struct arc *nexta; struct state *s; - if (from == to) { /* circular constraint is pointless */ + if (from == to) + { /* circular constraint is pointless */ freearc(nfa, con); return 1; } - if (from->flag) /* can't pull back beyond start */ + if (from->flag) /* can't pull back beyond start */ return 0; - if (from->nins == 0) { /* unreachable */ + if (from->nins == 0) + { /* unreachable */ freearc(nfa, con); return 1; } /* first, clone from state if necessary to avoid other outarcs */ - if (from->nouts > 1) { + if (from->nouts > 1) + { s = newstate(nfa); if (NISERR()) return 0; assert(to != from); /* con is not an inarc */ - copyins(nfa, from, s); /* duplicate inarcs */ - cparc(nfa, con, s, to); /* move constraint arc */ + copyins(nfa, from, s); /* duplicate inarcs */ + cparc(nfa, con, s, to); /* move constraint arc */ freearc(nfa, con); from = s; con = from->outs; @@ -773,27 +810,29 @@ pull(struct nfa *nfa, assert(from->nouts == 1); /* propagate the constraint into the from state's inarcs */ - for (a = from->ins; a != NULL; a = nexta) { + for (a = from->ins; a != NULL; a = nexta) + { nexta = a->inchain; - switch (combine(con, a)) { - case INCOMPATIBLE: /* destroy the arc */ - freearc(nfa, a); - break; - case SATISFIED: /* no action needed */ - break; - case COMPATIBLE: /* swap the two arcs, more or less */ - s = newstate(nfa); - if (NISERR()) - return 0; - cparc(nfa, a, s, to); /* anticipate move */ - cparc(nfa, con, a->from, s); - if (NISERR()) - return 0; - freearc(nfa, a); - break; - default: - assert(NOTREACHED); - break; + switch (combine(con, a)) + { + case INCOMPATIBLE: /* destroy the arc */ + freearc(nfa, a); + break; + case SATISFIED: /* no action needed */ + break; + case COMPATIBLE: /* swap the two arcs, more or less */ + s = newstate(nfa); + if (NISERR()) + return 0; + cparc(nfa, a, s, to); /* anticipate move */ + cparc(nfa, con, a->from, s); + if (NISERR()) + return 0; + freearc(nfa, a); + break; + default: + assert(NOTREACHED); + break; } } @@ -807,21 +846,24 @@ pull(struct nfa *nfa, * pushfwd - push forward constraints forward to (with luck) eliminate them */ static void -pushfwd(struct nfa *nfa, - FILE *f) /* for debug output; NULL none */ +pushfwd(struct nfa * nfa, + FILE *f) /* for debug output; NULL none */ { struct state *s; struct state *nexts; struct arc *a; struct arc *nexta; - int progress; + int progress; /* find and push until there are no more */ - do { + do + { progress = 0; - for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { + for (s = nfa->states; s != NULL && !NISERR(); s = nexts) + { nexts = s->next; - for (a = s->ins; a != NULL && !NISERR(); a = nexta) { + for (a = s->ins; a != NULL && !NISERR(); a = nexta) + { nexta = a->inchain; if (a->type == '$' || a->type == AHEAD) if (push(nfa, a)) @@ -835,9 +877,11 @@ pushfwd(struct nfa *nfa, if (NISERR()) return; - for (a = nfa->post->ins; a != NULL; a = nexta) { + for (a = nfa->post->ins; a != NULL; a = nexta) + { nexta = a->inchain; - if (a->type == '$') { + if (a->type == '$') + { assert(a->co == 0 || a->co == 1); newarc(nfa, PLAIN, nfa->eos[a->co], a->from, a->to); freearc(nfa, a); @@ -851,9 +895,9 @@ pushfwd(struct nfa *nfa, * one state -- the constraint's to state -- and only if the constraint * was that state's last inarc. */ -static int /* 0 couldn't, 1 could */ -push(struct nfa *nfa, - struct arc *con) +static int /* 0 couldn't, 1 could */ +push(struct nfa * nfa, + struct arc * con) { struct state *from = con->from; struct state *to = con->to; @@ -861,24 +905,27 @@ push(struct nfa *nfa, struct arc *nexta; struct state *s; - if (to == from) { /* circular constraint is pointless */ + if (to == from) + { /* circular constraint is pointless */ freearc(nfa, con); return 1; } - if (to->flag) /* can't push forward beyond end */ + if (to->flag) /* can't push forward beyond end */ return 0; - if (to->nouts == 0) { /* dead end */ + if (to->nouts == 0) + { /* dead end */ freearc(nfa, con); return 1; } /* first, clone to state if necessary to avoid other inarcs */ - if (to->nins > 1) { + if (to->nins > 1) + { s = newstate(nfa); if (NISERR()) return 0; - copyouts(nfa, to, s); /* duplicate outarcs */ - cparc(nfa, con, from, s); /* move constraint */ + copyouts(nfa, to, s); /* duplicate outarcs */ + cparc(nfa, con, from, s); /* move constraint */ freearc(nfa, con); to = s; con = to->ins; @@ -886,88 +933,91 @@ push(struct nfa *nfa, assert(to->nins == 1); /* propagate the constraint into the to state's outarcs */ - for (a = to->outs; a != NULL; a = nexta) { + for (a = to->outs; a != NULL; a = nexta) + { nexta = a->outchain; - switch (combine(con, a)) { - case INCOMPATIBLE: /* destroy the arc */ - freearc(nfa, a); - break; - case SATISFIED: /* no action needed */ - break; - case COMPATIBLE: /* swap the two arcs, more or less */ - s = newstate(nfa); - if (NISERR()) - return 0; - cparc(nfa, con, s, a->to); /* anticipate move */ - cparc(nfa, a, from, s); - if (NISERR()) - return 0; - freearc(nfa, a); - break; - default: - assert(NOTREACHED); - break; + switch (combine(con, a)) + { + case INCOMPATIBLE: /* destroy the arc */ + freearc(nfa, a); + break; + case SATISFIED: /* no action needed */ + break; + case COMPATIBLE: /* swap the two arcs, more or less */ + s = newstate(nfa); + if (NISERR()) + return 0; + cparc(nfa, con, s, a->to); /* anticipate move */ + cparc(nfa, a, from, s); + if (NISERR()) + return 0; + freearc(nfa, a); + break; + default: + assert(NOTREACHED); + break; } } /* remaining outarcs, if any, incorporate the constraint */ moveouts(nfa, to, from); - dropstate(nfa, to); /* will free the constraint */ + dropstate(nfa, to); /* will free the constraint */ return 1; } /* * combine - constraint lands on an arc, what happens? * - * #def INCOMPATIBLE 1 // destroys arc - * #def SATISFIED 2 // constraint satisfied - * #def COMPATIBLE 3 // compatible but not satisfied yet + * #def INCOMPATIBLE 1 // destroys arc + * #def SATISFIED 2 // constraint satisfied + * #def COMPATIBLE 3 // compatible but not satisfied yet */ static int -combine(struct arc *con, - struct arc *a) +combine(struct arc * con, + struct arc * a) { -# define CA(ct,at) (((ct)<<CHAR_BIT) | (at)) - - switch (CA(con->type, a->type)) { - case CA('^', PLAIN): /* newlines are handled separately */ - case CA('$', PLAIN): - return INCOMPATIBLE; - break; - case CA(AHEAD, PLAIN): /* color constraints meet colors */ - case CA(BEHIND, PLAIN): - if (con->co == a->co) - return SATISFIED; - return INCOMPATIBLE; - break; - case CA('^', '^'): /* collision, similar constraints */ - case CA('$', '$'): - case CA(AHEAD, AHEAD): - case CA(BEHIND, BEHIND): - if (con->co == a->co) /* true duplication */ - return SATISFIED; - return INCOMPATIBLE; - break; - case CA('^', BEHIND): /* collision, dissimilar constraints */ - case CA(BEHIND, '^'): - case CA('$', AHEAD): - case CA(AHEAD, '$'): - return INCOMPATIBLE; - break; - case CA('^', '$'): /* constraints passing each other */ - case CA('^', AHEAD): - case CA(BEHIND, '$'): - case CA(BEHIND, AHEAD): - case CA('$', '^'): - case CA('$', BEHIND): - case CA(AHEAD, '^'): - case CA(AHEAD, BEHIND): - case CA('^', LACON): - case CA(BEHIND, LACON): - case CA('$', LACON): - case CA(AHEAD, LACON): - return COMPATIBLE; - break; +#define CA(ct,at) (((ct)<<CHAR_BIT) | (at)) + + switch (CA(con->type, a->type)) + { + case CA('^', PLAIN): /* newlines are handled separately */ + case CA('$', PLAIN): + return INCOMPATIBLE; + break; + case CA(AHEAD, PLAIN): /* color constraints meet colors */ + case CA(BEHIND, PLAIN): + if (con->co == a->co) + return SATISFIED; + return INCOMPATIBLE; + break; + case CA('^', '^'): /* collision, similar constraints */ + case CA('$', '$'): + case CA(AHEAD, AHEAD): + case CA(BEHIND, BEHIND): + if (con->co == a->co) /* true duplication */ + return SATISFIED; + return INCOMPATIBLE; + break; + case CA('^', BEHIND): /* collision, dissimilar constraints */ + case CA(BEHIND, '^'): + case CA('$', AHEAD): + case CA(AHEAD, '$'): + return INCOMPATIBLE; + break; + case CA('^', '$'): /* constraints passing each other */ + case CA('^', AHEAD): + case CA(BEHIND, '$'): + case CA(BEHIND, AHEAD): + case CA('$', '^'): + case CA('$', BEHIND): + case CA(AHEAD, '^'): + case CA(AHEAD, BEHIND): + case CA('^', LACON): + case CA(BEHIND, LACON): + case CA('$', LACON): + case CA(AHEAD, LACON): + return COMPATIBLE; + break; } assert(NOTREACHED); return INCOMPATIBLE; /* for benefit of blind compilers */ @@ -977,21 +1027,24 @@ combine(struct arc *con, * fixempties - get rid of EMPTY arcs */ static void -fixempties(struct nfa *nfa, - FILE *f) /* for debug output; NULL none */ +fixempties(struct nfa * nfa, + FILE *f) /* for debug output; NULL none */ { struct state *s; struct state *nexts; struct arc *a; struct arc *nexta; - int progress; + int progress; /* find and eliminate empties until there are no more */ - do { + do + { progress = 0; - for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { + for (s = nfa->states; s != NULL && !NISERR(); s = nexts) + { nexts = s->next; - for (a = s->outs; a != NULL && !NISERR(); a = nexta) { + for (a = s->outs; a != NULL && !NISERR(); a = nexta) + { nexta = a->outchain; if (a->type == EMPTY && unempty(nfa, a)) progress = 1; @@ -1009,46 +1062,55 @@ fixempties(struct nfa *nfa, * Actually, as it stands this function always succeeds, but the return * value is kept with an eye on possible future changes. */ -static int /* 0 couldn't, 1 could */ -unempty(struct nfa *nfa, - struct arc *a) +static int /* 0 couldn't, 1 could */ +unempty(struct nfa * nfa, + struct arc * a) { struct state *from = a->from; struct state *to = a->to; - int usefrom; /* work on from, as opposed to to? */ + int usefrom; /* work on from, as opposed to to? */ assert(a->type == EMPTY); assert(from != nfa->pre && to != nfa->post); - if (from == to) { /* vacuous loop */ + if (from == to) + { /* vacuous loop */ freearc(nfa, a); return 1; } /* decide which end to work on */ - usefrom = 1; /* default: attack from */ + usefrom = 1; /* default: attack from */ if (from->nouts > to->nins) usefrom = 0; - else if (from->nouts == to->nins) { + else if (from->nouts == to->nins) + { /* decide on secondary issue: move/copy fewest arcs */ if (from->nins > to->nouts) usefrom = 0; } - + freearc(nfa, a); - if (usefrom) { - if (from->nouts == 0) { + if (usefrom) + { + if (from->nouts == 0) + { /* was the state's only outarc */ moveins(nfa, from, to); freestate(nfa, from); - } else + } + else copyins(nfa, from, to); - } else { - if (to->nins == 0) { + } + else + { + if (to->nins == 0) + { /* was the state's only inarc */ moveouts(nfa, to, from); freestate(nfa, to); - } else + } + else copyouts(nfa, to, from); } @@ -1059,17 +1121,18 @@ unempty(struct nfa *nfa, * cleanup - clean up NFA after optimizations */ static void -cleanup(struct nfa *nfa) +cleanup(struct nfa * nfa) { struct state *s; struct state *nexts; - int n; + int n; /* clear out unreachable or dead-end states */ /* use pre to mark reachable, then post to mark can-reach-post */ - markreachable(nfa, nfa->pre, (struct state *)NULL, nfa->pre); + markreachable(nfa, nfa->pre, (struct state *) NULL, nfa->pre); markcanreach(nfa, nfa->post, nfa->pre, nfa->post); - for (s = nfa->states; s != NULL; s = nexts) { + for (s = nfa->states; s != NULL; s = nexts) + { nexts = s->next; if (s->tmp != nfa->post && !s->flag) dropstate(nfa, s); @@ -1090,10 +1153,11 @@ cleanup(struct nfa *nfa) * markreachable - recursive marking of reachable states */ static void -markreachable(struct nfa *nfa, - struct state *s, - struct state *okay, /* consider only states with this mark */ - struct state *mark) /* the value to mark with */ +markreachable(struct nfa * nfa, + struct state * s, + struct state * okay, /* consider only states with this + * mark */ + struct state * mark) /* the value to mark with */ { struct arc *a; @@ -1109,10 +1173,11 @@ markreachable(struct nfa *nfa, * markcanreach - recursive marking of states which can reach here */ static void -markcanreach(struct nfa *nfa, - struct state *s, - struct state *okay, /* consider only states with this mark */ - struct state *mark) /* the value to mark with */ +markcanreach(struct nfa * nfa, + struct state * s, + struct state * okay, /* consider only states with this + * mark */ + struct state * mark) /* the value to mark with */ { struct arc *a; @@ -1127,8 +1192,8 @@ markcanreach(struct nfa *nfa, /* * analyze - ascertain potentially-useful facts about an optimized NFA */ -static long /* re_info bits to be ORed in */ -analyze(struct nfa *nfa) +static long /* re_info bits to be ORed in */ +analyze(struct nfa * nfa) { struct arc *a; struct arc *aa; @@ -1146,29 +1211,31 @@ analyze(struct nfa *nfa) * compact - compact an NFA */ static void -compact(struct nfa *nfa, - struct cnfa *cnfa) +compact(struct nfa * nfa, + struct cnfa * cnfa) { struct state *s; struct arc *a; - size_t nstates; - size_t narcs; + size_t nstates; + size_t narcs; struct carc *ca; struct carc *first; - assert (!NISERR()); + assert(!NISERR()); nstates = 0; narcs = 0; - for (s = nfa->states; s != NULL; s = s->next) { + for (s = nfa->states; s != NULL; s = s->next) + { nstates++; narcs += 1 + s->nouts + 1; /* 1 as a fake for flags, nouts for arcs, 1 as endmarker */ } - cnfa->states = (struct carc **)MALLOC(nstates * sizeof(struct carc *)); - cnfa->arcs = (struct carc *)MALLOC(narcs * sizeof(struct carc)); - if (cnfa->states == NULL || cnfa->arcs == NULL) { + cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *)); + cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc)); + if (cnfa->states == NULL || cnfa->arcs == NULL) + { if (cnfa->states != NULL) FREE(cnfa->states); if (cnfa->arcs != NULL) @@ -1187,31 +1254,33 @@ compact(struct nfa *nfa, cnfa->flags = 0; ca = cnfa->arcs; - for (s = nfa->states; s != NULL; s = s->next) { - assert((size_t)s->no < nstates); + for (s = nfa->states; s != NULL; s = s->next) + { + assert((size_t) s->no < nstates); cnfa->states[s->no] = ca; - ca->co = 0; /* clear and skip flags "arc" */ + ca->co = 0; /* clear and skip flags "arc" */ ca++; first = ca; for (a = s->outs; a != NULL; a = a->outchain) - switch (a->type) { - case PLAIN: - ca->co = a->co; - ca->to = a->to->no; - ca++; - break; - case LACON: - assert(s->no != cnfa->pre); - ca->co = (color)(cnfa->ncolors + a->co); - ca->to = a->to->no; - ca++; - cnfa->flags |= HASLACONS; - break; - default: - assert(NOTREACHED); - break; + switch (a->type) + { + case PLAIN: + ca->co = a->co; + ca->to = a->to->no; + ca++; + break; + case LACON: + assert(s->no != cnfa->pre); + ca->co = (color) (cnfa->ncolors + a->co); + ca->to = a->to->no; + ca++; + cnfa->flags |= HASLACONS; + break; + default: + assert(NOTREACHED); + break; } - carcsort(first, ca-1); + carcsort(first, ca - 1); ca->co = COLORLESS; ca->to = 0; ca++; @@ -1232,8 +1301,8 @@ compact(struct nfa *nfa, * you're in real trouble anyway. */ static void -carcsort(struct carc *first, - struct carc *last) +carcsort(struct carc * first, + struct carc * last) { struct carc *p; struct carc *q; @@ -1245,7 +1314,8 @@ carcsort(struct carc *first, for (p = first; p <= last; p++) for (q = p; q <= last; q++) if (p->co > q->co || - (p->co == q->co && p->to > q->to)) { + (p->co == q->co && p->to > q->to)) + { assert(p != q); tmp = *p; *p = *q; @@ -1257,9 +1327,9 @@ carcsort(struct carc *first, * freecnfa - free a compacted NFA */ static void -freecnfa(struct cnfa *cnfa) +freecnfa(struct cnfa * cnfa) { - assert(cnfa->nstates != 0); /* not empty already */ + assert(cnfa->nstates != 0); /* not empty already */ cnfa->nstates = 0; FREE(cnfa->states); FREE(cnfa->arcs); @@ -1269,7 +1339,7 @@ freecnfa(struct cnfa *cnfa) * dumpnfa - dump an NFA in human-readable form */ static void -dumpnfa(struct nfa *nfa, +dumpnfa(struct nfa * nfa, FILE *f) { #ifdef REG_DEBUG @@ -1277,13 +1347,13 @@ dumpnfa(struct nfa *nfa, fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no); if (nfa->bos[0] != COLORLESS) - fprintf(f, ", bos [%ld]", (long)nfa->bos[0]); + fprintf(f, ", bos [%ld]", (long) nfa->bos[0]); if (nfa->bos[1] != COLORLESS) - fprintf(f, ", bol [%ld]", (long)nfa->bos[1]); + fprintf(f, ", bol [%ld]", (long) nfa->bos[1]); if (nfa->eos[0] != COLORLESS) - fprintf(f, ", eos [%ld]", (long)nfa->eos[0]); + fprintf(f, ", eos [%ld]", (long) nfa->eos[0]); if (nfa->eos[1] != COLORLESS) - fprintf(f, ", eol [%ld]", (long)nfa->eos[1]); + fprintf(f, ", eol [%ld]", (long) nfa->eos[1]); fprintf(f, "\n"); for (s = nfa->states; s != NULL; s = s->next) dumpstate(s, f); @@ -1293,19 +1363,19 @@ dumpnfa(struct nfa *nfa, #endif } -#ifdef REG_DEBUG /* subordinates of dumpnfa */ +#ifdef REG_DEBUG /* subordinates of dumpnfa */ /* * dumpstate - dump an NFA state in human-readable form */ static void -dumpstate(struct state *s, +dumpstate(struct state * s, FILE *f) { struct arc *a; fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "", - (s->flag) ? s->flag : '.'); + (s->flag) ? s->flag : '.'); if (s->prev != NULL && s->prev->next != s) fprintf(f, "\tstate chain bad\n"); if (s->nouts == 0) @@ -1313,7 +1383,8 @@ dumpstate(struct state *s, else dumparcs(s, f); fflush(f); - for (a = s->ins; a != NULL; a = a->inchain) { + for (a = s->ins; a != NULL; a = a->inchain) + { if (a->to != s) fprintf(f, "\tlink from %d to %d on %d's in-chain\n", a->from->no, a->to->no, s->no); @@ -1324,10 +1395,10 @@ dumpstate(struct state *s, * dumparcs - dump out-arcs in human-readable form */ static void -dumparcs(struct state *s, +dumparcs(struct state * s, FILE *f) { - int pos; + int pos; assert(s->nouts > 0); /* printing arcs in reverse order is usually clearer */ @@ -1339,19 +1410,21 @@ dumparcs(struct state *s, /* * dumprarcs - dump remaining outarcs, recursively, in reverse order */ -static int /* resulting print position */ -dumprarcs(struct arc *a, - struct state *s, +static int /* resulting print position */ +dumprarcs(struct arc * a, + struct state * s, FILE *f, - int pos) /* initial print position */ + int pos) /* initial print position */ { if (a->outchain != NULL) pos = dumprarcs(a->outchain, s, f, pos); dumparc(a, s, f); - if (pos == 5) { + if (pos == 5) + { fprintf(f, "\n"); pos = 1; - } else + } + else pos++; return pos; } @@ -1360,83 +1433,85 @@ dumprarcs(struct arc *a, * dumparc - dump one outarc in readable form, including prefixing tab */ static void -dumparc(struct arc *a, - struct state *s, +dumparc(struct arc * a, + struct state * s, FILE *f) { struct arc *aa; struct arcbatch *ab; fprintf(f, "\t"); - switch (a->type) { - case PLAIN: - fprintf(f, "[%ld]", (long)a->co); - break; - case AHEAD: - fprintf(f, ">%ld>", (long)a->co); - break; - case BEHIND: - fprintf(f, "<%ld<", (long)a->co); - break; - case LACON: - fprintf(f, ":%ld:", (long)a->co); - break; - case '^': - case '$': - fprintf(f, "%c%d", a->type, (int)a->co); - break; - case EMPTY: - break; - default: - fprintf(f, "0x%x/0%lo", a->type, (long)a->co); - break; + switch (a->type) + { + case PLAIN: + fprintf(f, "[%ld]", (long) a->co); + break; + case AHEAD: + fprintf(f, ">%ld>", (long) a->co); + break; + case BEHIND: + fprintf(f, "<%ld<", (long) a->co); + break; + case LACON: + fprintf(f, ":%ld:", (long) a->co); + break; + case '^': + case '$': + fprintf(f, "%c%d", a->type, (int) a->co); + break; + case EMPTY: + break; + default: + fprintf(f, "0x%x/0%lo", a->type, (long) a->co); + break; } if (a->from != s) fprintf(f, "?%d?", a->from->no); - for (ab = &a->from->oas; ab != NULL; ab = ab->next) { + for (ab = &a->from->oas; ab != NULL; ab = ab->next) + { for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++) if (aa == a) - break; /* NOTE BREAK OUT */ + break; /* NOTE BREAK OUT */ if (aa < &ab->a[ABSIZE]) /* propagate break */ - break; /* NOTE BREAK OUT */ + break; /* NOTE BREAK OUT */ } if (ab == NULL) - fprintf(f, "?!?"); /* not in allocated space */ + fprintf(f, "?!?"); /* not in allocated space */ fprintf(f, "->"); - if (a->to == NULL) { + if (a->to == NULL) + { fprintf(f, "NULL"); return; } fprintf(f, "%d", a->to->no); for (aa = a->to->ins; aa != NULL; aa = aa->inchain) if (aa == a) - break; /* NOTE BREAK OUT */ + break; /* NOTE BREAK OUT */ if (aa == NULL) - fprintf(f, "?!?"); /* missing from in-chain */ + fprintf(f, "?!?"); /* missing from in-chain */ } - -#endif /* REG_DEBUG */ +#endif /* REG_DEBUG */ /* * dumpcnfa - dump a compacted NFA in human-readable form */ #ifdef REG_DEBUG static void -dumpcnfa(struct cnfa *cnfa, +dumpcnfa(struct cnfa * cnfa, FILE *f) { - int st; + int st; fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post); if (cnfa->bos[0] != COLORLESS) - fprintf(f, ", bos [%ld]", (long)cnfa->bos[0]); + fprintf(f, ", bos [%ld]", (long) cnfa->bos[0]); if (cnfa->bos[1] != COLORLESS) - fprintf(f, ", bol [%ld]", (long)cnfa->bos[1]); + fprintf(f, ", bol [%ld]", (long) cnfa->bos[1]); if (cnfa->eos[0] != COLORLESS) - fprintf(f, ", eos [%ld]", (long)cnfa->eos[0]); + fprintf(f, ", eos [%ld]", (long) cnfa->eos[0]); if (cnfa->eos[1] != COLORLESS) - fprintf(f, ", eol [%ld]", (long)cnfa->eos[1]); - if (cnfa->flags&HASLACONS) + fprintf(f, ", eol [%ld]", (long) cnfa->eos[1]); + if (cnfa->flags & HASLACONS) fprintf(f, ", haslacons"); fprintf(f, "\n"); for (st = 0; st < cnfa->nstates; st++) @@ -1445,32 +1520,35 @@ dumpcnfa(struct cnfa *cnfa, } #endif -#ifdef REG_DEBUG /* subordinates of dumpcnfa */ +#ifdef REG_DEBUG /* subordinates of dumpcnfa */ /* * dumpcstate - dump a compacted-NFA state in human-readable form */ static void dumpcstate(int st, - struct carc *ca, - struct cnfa *cnfa, + struct carc * ca, + struct cnfa * cnfa, FILE *f) { - int i; - int pos; + int i; + int pos; fprintf(f, "%d%s", st, (ca[0].co) ? ":" : "."); pos = 1; - for (i = 1; ca[i].co != COLORLESS; i++) { + for (i = 1; ca[i].co != COLORLESS; i++) + { if (ca[i].co < cnfa->ncolors) - fprintf(f, "\t[%ld]->%d", (long)ca[i].co, ca[i].to); + fprintf(f, "\t[%ld]->%d", (long) ca[i].co, ca[i].to); else - fprintf(f, "\t:%ld:->%d", (long)ca[i].co-cnfa->ncolors, - ca[i].to); - if (pos == 5) { + fprintf(f, "\t:%ld:->%d", (long) ca[i].co - cnfa->ncolors, + ca[i].to); + if (pos == 5) + { fprintf(f, "\n"); pos = 1; - } else + } + else pos++; } if (i == 1 || pos != 1) @@ -1478,4 +1556,4 @@ dumpcstate(int st, fflush(f); } -#endif /* REG_DEBUG */ +#endif /* REG_DEBUG */ diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index 099a1872a8d..58af64539d8 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -2,21 +2,21 @@ * re_*comp and friends - compile REs * This file #includes several others (see the bottom). * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * + * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. - * + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -28,7 +28,7 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Header: /cvsroot/pgsql/src/backend/regex/regcomp.c,v 1.36 2003/02/05 17:41:33 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/regex/regcomp.c,v 1.37 2003/08/04 00:43:21 momjian Exp $ * */ @@ -38,243 +38,252 @@ * forward declarations, up here so forward datatypes etc. are defined early */ /* === regcomp.c === */ -static void moresubs (struct vars *, int); -static int freev (struct vars *, int); -static void makesearch (struct vars *, struct nfa *); -static struct subre *parse (struct vars *, int, int, struct state *, struct state *); -static struct subre *parsebranch (struct vars *, int, int, struct state *, struct state *, int); -static void parseqatom (struct vars *, int, int, struct state *, struct state *, struct subre *); -static void nonword (struct vars *, int, struct state *, struct state *); -static void word (struct vars *, int, struct state *, struct state *); -static int scannum (struct vars *); -static void repeat (struct vars *, struct state *, struct state *, int, int); -static void bracket (struct vars *, struct state *, struct state *); -static void cbracket (struct vars *, struct state *, struct state *); -static void brackpart (struct vars *, struct state *, struct state *); -static chr *scanplain (struct vars *); -static void leaders (struct vars *, struct cvec *); -static void onechr (struct vars *, chr, struct state *, struct state *); -static void dovec (struct vars *, struct cvec *, struct state *, struct state *); -static celt nextleader (struct vars *, chr, chr); -static void wordchrs (struct vars *); -static struct subre *subre (struct vars *, int, int, struct state *, struct state *); -static void freesubre (struct vars *, struct subre *); -static void freesrnode (struct vars *, struct subre *); -static void optst (struct vars *, struct subre *); -static int numst (struct subre *, int); -static void markst (struct subre *); -static void cleanst (struct vars *); -static long nfatree (struct vars *, struct subre *, FILE *); -static long nfanode (struct vars *, struct subre *, FILE *); -static int newlacon (struct vars *, struct state *, struct state *, int); -static void freelacons (struct subre *, int); -static void rfree (regex_t *); +static void moresubs(struct vars *, int); +static int freev(struct vars *, int); +static void makesearch(struct vars *, struct nfa *); +static struct subre *parse(struct vars *, int, int, struct state *, struct state *); +static struct subre *parsebranch(struct vars *, int, int, struct state *, struct state *, int); +static void parseqatom(struct vars *, int, int, struct state *, struct state *, struct subre *); +static void nonword(struct vars *, int, struct state *, struct state *); +static void word(struct vars *, int, struct state *, struct state *); +static int scannum(struct vars *); +static void repeat(struct vars *, struct state *, struct state *, int, int); +static void bracket(struct vars *, struct state *, struct state *); +static void cbracket(struct vars *, struct state *, struct state *); +static void brackpart(struct vars *, struct state *, struct state *); +static chr *scanplain(struct vars *); +static void leaders(struct vars *, struct cvec *); +static void onechr(struct vars *, chr, struct state *, struct state *); +static void dovec(struct vars *, struct cvec *, struct state *, struct state *); +static celt nextleader(struct vars *, chr, chr); +static void wordchrs(struct vars *); +static struct subre *subre(struct vars *, int, int, struct state *, struct state *); +static void freesubre(struct vars *, struct subre *); +static void freesrnode(struct vars *, struct subre *); +static void optst(struct vars *, struct subre *); +static int numst(struct subre *, int); +static void markst(struct subre *); +static void cleanst(struct vars *); +static long nfatree(struct vars *, struct subre *, FILE *); +static long nfanode(struct vars *, struct subre *, FILE *); +static int newlacon(struct vars *, struct state *, struct state *, int); +static void freelacons(struct subre *, int); +static void rfree(regex_t *); + #ifdef REG_DEBUG -static void dump (regex_t *, FILE *); -static void dumpst (struct subre *, FILE *, int); -static void stdump (struct subre *, FILE *, int); -static char *stid (struct subre *, char *, size_t); +static void dump(regex_t *, FILE *); +static void dumpst(struct subre *, FILE *, int); +static void stdump(struct subre *, FILE *, int); +static char *stid(struct subre *, char *, size_t); #endif /* === regc_lex.c === */ -static void lexstart (struct vars *); -static void prefixes (struct vars *); -static void lexnest (struct vars *, chr *, chr *); -static void lexword (struct vars *); -static int next (struct vars *); -static int lexescape (struct vars *); -static chr lexdigits (struct vars *, int, int, int); -static int brenext (struct vars *, chr); -static void skip (struct vars *); -static chr newline (void); -static chr chrnamed (struct vars *, chr *, chr *, chr); +static void lexstart(struct vars *); +static void prefixes(struct vars *); +static void lexnest(struct vars *, chr *, chr *); +static void lexword(struct vars *); +static int next(struct vars *); +static int lexescape(struct vars *); +static chr lexdigits(struct vars *, int, int, int); +static int brenext(struct vars *, chr); +static void skip(struct vars *); +static chr newline(void); +static chr chrnamed(struct vars *, chr *, chr *, chr); + /* === regc_color.c === */ -static void initcm (struct vars *, struct colormap *); -static void freecm (struct colormap *); -static void cmtreefree (struct colormap *, union tree *, int); -static color setcolor (struct colormap *, chr, pcolor); -static color maxcolor (struct colormap *); -static color newcolor (struct colormap *); -static void freecolor (struct colormap *, pcolor); -static color pseudocolor (struct colormap *); -static color subcolor (struct colormap *, chr c); -static color newsub (struct colormap *, pcolor); -static void subrange (struct vars *, chr, chr, struct state *, struct state *); -static void subblock (struct vars *, chr, struct state *, struct state *); -static void okcolors (struct nfa *, struct colormap *); -static void colorchain (struct colormap *, struct arc *); -static void uncolorchain (struct colormap *, struct arc *); -static int singleton (struct colormap *, chr c); -static void rainbow (struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *); -static void colorcomplement (struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *); +static void initcm(struct vars *, struct colormap *); +static void freecm(struct colormap *); +static void cmtreefree(struct colormap *, union tree *, int); +static color setcolor(struct colormap *, chr, pcolor); +static color maxcolor(struct colormap *); +static color newcolor(struct colormap *); +static void freecolor(struct colormap *, pcolor); +static color pseudocolor(struct colormap *); +static color subcolor(struct colormap *, chr c); +static color newsub(struct colormap *, pcolor); +static void subrange(struct vars *, chr, chr, struct state *, struct state *); +static void subblock(struct vars *, chr, struct state *, struct state *); +static void okcolors(struct nfa *, struct colormap *); +static void colorchain(struct colormap *, struct arc *); +static void uncolorchain(struct colormap *, struct arc *); +static int singleton(struct colormap *, chr c); +static void rainbow(struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *); +static void colorcomplement(struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *); + #ifdef REG_DEBUG -static void dumpcolors (struct colormap *, FILE *); -static void fillcheck (struct colormap *, union tree *, int, FILE *); -static void dumpchr (chr, FILE *); +static void dumpcolors(struct colormap *, FILE *); +static void fillcheck(struct colormap *, union tree *, int, FILE *); +static void dumpchr(chr, FILE *); #endif /* === regc_nfa.c === */ -static struct nfa *newnfa (struct vars *, struct colormap *, struct nfa *); -static void freenfa (struct nfa *); -static struct state *newstate (struct nfa *); -static struct state *newfstate (struct nfa *, int flag); -static void dropstate (struct nfa *, struct state *); -static void freestate (struct nfa *, struct state *); -static void destroystate (struct nfa *, struct state *); -static void newarc (struct nfa *, int, pcolor, struct state *, struct state *); -static struct arc *allocarc (struct nfa *, struct state *); -static void freearc (struct nfa *, struct arc *); -static struct arc *findarc (struct state *, int, pcolor); -static void cparc (struct nfa *, struct arc *, struct state *, struct state *); -static void moveins (struct nfa *, struct state *, struct state *); -static void copyins (struct nfa *, struct state *, struct state *); -static void moveouts (struct nfa *, struct state *, struct state *); -static void copyouts (struct nfa *, struct state *, struct state *); -static void cloneouts (struct nfa *, struct state *, struct state *, struct state *, int); -static void delsub (struct nfa *, struct state *, struct state *); -static void deltraverse (struct nfa *, struct state *, struct state *); -static void dupnfa (struct nfa *, struct state *, struct state *, struct state *, struct state *); -static void duptraverse (struct nfa *, struct state *, struct state *); -static void cleartraverse (struct nfa *, struct state *); -static void specialcolors (struct nfa *); -static long optimize (struct nfa *, FILE *); -static void pullback (struct nfa *, FILE *); -static int pull (struct nfa *, struct arc *); -static void pushfwd (struct nfa *, FILE *); -static int push (struct nfa *, struct arc *); -#define INCOMPATIBLE 1 /* destroys arc */ -#define SATISFIED 2 /* constraint satisfied */ -#define COMPATIBLE 3 /* compatible but not satisfied yet */ -static int combine (struct arc *, struct arc *); -static void fixempties (struct nfa *, FILE *); -static int unempty (struct nfa *, struct arc *); -static void cleanup (struct nfa *); -static void markreachable (struct nfa *, struct state *, struct state *, struct state *); -static void markcanreach (struct nfa *, struct state *, struct state *, struct state *); -static long analyze (struct nfa *); -static void compact (struct nfa *, struct cnfa *); -static void carcsort (struct carc *, struct carc *); -static void freecnfa (struct cnfa *); -static void dumpnfa (struct nfa *, FILE *); +static struct nfa *newnfa(struct vars *, struct colormap *, struct nfa *); +static void freenfa(struct nfa *); +static struct state *newstate(struct nfa *); +static struct state *newfstate(struct nfa *, int flag); +static void dropstate(struct nfa *, struct state *); +static void freestate(struct nfa *, struct state *); +static void destroystate(struct nfa *, struct state *); +static void newarc(struct nfa *, int, pcolor, struct state *, struct state *); +static struct arc *allocarc(struct nfa *, struct state *); +static void freearc(struct nfa *, struct arc *); +static struct arc *findarc(struct state *, int, pcolor); +static void cparc(struct nfa *, struct arc *, struct state *, struct state *); +static void moveins(struct nfa *, struct state *, struct state *); +static void copyins(struct nfa *, struct state *, struct state *); +static void moveouts(struct nfa *, struct state *, struct state *); +static void copyouts(struct nfa *, struct state *, struct state *); +static void cloneouts(struct nfa *, struct state *, struct state *, struct state *, int); +static void delsub(struct nfa *, struct state *, struct state *); +static void deltraverse(struct nfa *, struct state *, struct state *); +static void dupnfa(struct nfa *, struct state *, struct state *, struct state *, struct state *); +static void duptraverse(struct nfa *, struct state *, struct state *); +static void cleartraverse(struct nfa *, struct state *); +static void specialcolors(struct nfa *); +static long optimize(struct nfa *, FILE *); +static void pullback(struct nfa *, FILE *); +static int pull(struct nfa *, struct arc *); +static void pushfwd(struct nfa *, FILE *); +static int push(struct nfa *, struct arc *); + +#define INCOMPATIBLE 1 /* destroys arc */ +#define SATISFIED 2 /* constraint satisfied */ +#define COMPATIBLE 3 /* compatible but not satisfied yet */ +static int combine(struct arc *, struct arc *); +static void fixempties(struct nfa *, FILE *); +static int unempty(struct nfa *, struct arc *); +static void cleanup(struct nfa *); +static void markreachable(struct nfa *, struct state *, struct state *, struct state *); +static void markcanreach(struct nfa *, struct state *, struct state *, struct state *); +static long analyze(struct nfa *); +static void compact(struct nfa *, struct cnfa *); +static void carcsort(struct carc *, struct carc *); +static void freecnfa(struct cnfa *); +static void dumpnfa(struct nfa *, FILE *); + #ifdef REG_DEBUG -static void dumpstate (struct state *, FILE *); -static void dumparcs (struct state *, FILE *); -static int dumprarcs (struct arc *, struct state *, FILE *, int); -static void dumparc (struct arc *, struct state *, FILE *); -static void dumpcnfa (struct cnfa *, FILE *); -static void dumpcstate (int, struct carc *, struct cnfa *, FILE *); +static void dumpstate(struct state *, FILE *); +static void dumparcs(struct state *, FILE *); +static int dumprarcs(struct arc *, struct state *, FILE *, int); +static void dumparc(struct arc *, struct state *, FILE *); +static void dumpcnfa(struct cnfa *, FILE *); +static void dumpcstate(int, struct carc *, struct cnfa *, FILE *); #endif /* === regc_cvec.c === */ -static struct cvec *newcvec (int, int, int); -static struct cvec *clearcvec (struct cvec *); -static void addchr (struct cvec *, chr); -static void addrange (struct cvec *, chr, chr); -static void addmcce (struct cvec *, chr *, chr *); -static int haschr (struct cvec *, chr); -static struct cvec *getcvec (struct vars *, int, int, int); -static void freecvec (struct cvec *); +static struct cvec *newcvec(int, int, int); +static struct cvec *clearcvec(struct cvec *); +static void addchr(struct cvec *, chr); +static void addrange(struct cvec *, chr, chr); +static void addmcce(struct cvec *, chr *, chr *); +static int haschr(struct cvec *, chr); +static struct cvec *getcvec(struct vars *, int, int, int); +static void freecvec(struct cvec *); + /* === regc_locale.c === */ -static int pg_isdigit(pg_wchar c); -static int pg_isalpha(pg_wchar c); -static int pg_isalnum(pg_wchar c); -static int pg_isupper(pg_wchar c); -static int pg_islower(pg_wchar c); -static int pg_isgraph(pg_wchar c); -static int pg_ispunct(pg_wchar c); -static int pg_isspace(pg_wchar c); +static int pg_isdigit(pg_wchar c); +static int pg_isalpha(pg_wchar c); +static int pg_isalnum(pg_wchar c); +static int pg_isupper(pg_wchar c); +static int pg_islower(pg_wchar c); +static int pg_isgraph(pg_wchar c); +static int pg_ispunct(pg_wchar c); +static int pg_isspace(pg_wchar c); static pg_wchar pg_toupper(pg_wchar c); static pg_wchar pg_tolower(pg_wchar c); -static int nmcces (struct vars *); -static int nleaders (struct vars *); -static struct cvec *allmcces (struct vars *, struct cvec *); -static celt element (struct vars *, chr *, chr *); -static struct cvec *range (struct vars *, celt, celt, int); -static int before (celt, celt); -static struct cvec *eclass (struct vars *, celt, int); -static struct cvec *cclass (struct vars *, chr *, chr *, int); -static struct cvec *allcases (struct vars *, chr); -static int cmp (const chr *, const chr *, size_t); -static int casecmp (const chr *, const chr *, size_t); +static int nmcces(struct vars *); +static int nleaders(struct vars *); +static struct cvec *allmcces(struct vars *, struct cvec *); +static celt element(struct vars *, chr *, chr *); +static struct cvec *range(struct vars *, celt, celt, int); +static int before(celt, celt); +static struct cvec *eclass(struct vars *, celt, int); +static struct cvec *cclass(struct vars *, chr *, chr *, int); +static struct cvec *allcases(struct vars *, chr); +static int cmp(const chr *, const chr *, size_t); +static int casecmp(const chr *, const chr *, size_t); /* internal variables, bundled for easy passing around */ -struct vars { - regex_t *re; - chr *now; /* scan pointer into string */ - chr *stop; /* end of string */ - chr *savenow; /* saved now and stop for "subroutine call" */ - chr *savestop; - int err; /* error code (0 if none) */ - int cflags; /* copy of compile flags */ - int lasttype; /* type of previous token */ - int nexttype; /* type of next token */ - chr nextvalue; /* value (if any) of next token */ - int lexcon; /* lexical context type (see lex.c) */ - int nsubexp; /* subexpression count */ - struct subre **subs; /* subRE pointer vector */ - size_t nsubs; /* length of vector */ +struct vars +{ + regex_t *re; + chr *now; /* scan pointer into string */ + chr *stop; /* end of string */ + chr *savenow; /* saved now and stop for "subroutine + * call" */ + chr *savestop; + int err; /* error code (0 if none) */ + int cflags; /* copy of compile flags */ + int lasttype; /* type of previous token */ + int nexttype; /* type of next token */ + chr nextvalue; /* value (if any) of next token */ + int lexcon; /* lexical context type (see lex.c) */ + int nsubexp; /* subexpression count */ + struct subre **subs; /* subRE pointer vector */ + size_t nsubs; /* length of vector */ struct subre *sub10[10]; /* initial vector, enough for most */ - struct nfa *nfa; /* the NFA */ - struct colormap *cm; /* character color map */ - color nlcolor; /* color of newline */ - struct state *wordchrs; /* state in nfa holding word-char outarcs */ - struct subre *tree; /* subexpression tree */ + struct nfa *nfa; /* the NFA */ + struct colormap *cm; /* character color map */ + color nlcolor; /* color of newline */ + struct state *wordchrs; /* state in nfa holding word-char outarcs */ + struct subre *tree; /* subexpression tree */ struct subre *treechain; /* all tree nodes allocated */ struct subre *treefree; /* any free tree nodes */ - int ntree; /* number of tree nodes */ - struct cvec *cv; /* interface cvec */ - struct cvec *cv2; /* utility cvec */ - struct cvec *mcces; /* collating-element information */ -# define ISCELEADER(v,c) (v->mcces != NULL && haschr(v->mcces, (c))) + int ntree; /* number of tree nodes */ + struct cvec *cv; /* interface cvec */ + struct cvec *cv2; /* utility cvec */ + struct cvec *mcces; /* collating-element information */ +#define ISCELEADER(v,c) (v->mcces != NULL && haschr(v->mcces, (c))) struct state *mccepbegin; /* in nfa, start of MCCE prototypes */ - struct state *mccepend; /* in nfa, end of MCCE prototypes */ - struct subre *lacons; /* lookahead-constraint vector */ - int nlacons; /* size of lacons */ + struct state *mccepend; /* in nfa, end of MCCE prototypes */ + struct subre *lacons; /* lookahead-constraint vector */ + int nlacons; /* size of lacons */ }; /* parsing macros; most know that `v' is the struct vars pointer */ -#define NEXT() (next(v)) /* advance by one token */ -#define SEE(t) (v->nexttype == (t)) /* is next token this? */ -#define EAT(t) (SEE(t) && next(v)) /* if next is this, swallow it */ -#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */ -#define ISERR() VISERR(v) -#define VERR(vv,e) ((vv)->nexttype = EOS, ((vv)->err) ? (vv)->err :\ +#define NEXT() (next(v)) /* advance by one token */ +#define SEE(t) (v->nexttype == (t)) /* is next token this? */ +#define EAT(t) (SEE(t) && next(v)) /* if next is this, swallow it */ +#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */ +#define ISERR() VISERR(v) +#define VERR(vv,e) ((vv)->nexttype = EOS, ((vv)->err) ? (vv)->err :\ ((vv)->err = (e))) -#define ERR(e) VERR(v, e) /* record an error */ -#define NOERR() {if (ISERR()) return;} /* if error seen, return */ -#define NOERRN() {if (ISERR()) return NULL;} /* NOERR with retval */ -#define NOERRZ() {if (ISERR()) return 0;} /* NOERR with retval */ -#define INSIST(c, e) ((c) ? 0 : ERR(e)) /* if condition false, error */ -#define NOTE(b) (v->re->re_info |= (b)) /* note visible condition */ -#define EMPTYARC(x, y) newarc(v->nfa, EMPTY, 0, x, y) +#define ERR(e) VERR(v, e) /* record an error */ +#define NOERR() {if (ISERR()) return;} /* if error seen, return */ +#define NOERRN() {if (ISERR()) return NULL;} /* NOERR with retval */ +#define NOERRZ() {if (ISERR()) return 0;} /* NOERR with retval */ +#define INSIST(c, e) ((c) ? 0 : ERR(e)) /* if condition false, + * error */ +#define NOTE(b) (v->re->re_info |= (b)) /* note visible condition */ +#define EMPTYARC(x, y) newarc(v->nfa, EMPTY, 0, x, y) /* token type codes, some also used as NFA arc types */ -#define EMPTY 'n' /* no token present */ -#define EOS 'e' /* end of string */ -#define PLAIN 'p' /* ordinary character */ -#define DIGIT 'd' /* digit (in bound) */ -#define BACKREF 'b' /* back reference */ -#define COLLEL 'I' /* start of [. */ -#define ECLASS 'E' /* start of [= */ -#define CCLASS 'C' /* start of [: */ -#define END 'X' /* end of [. [= [: */ -#define RANGE 'R' /* - within [] which might be range delim. */ -#define LACON 'L' /* lookahead constraint subRE */ -#define AHEAD 'a' /* color-lookahead arc */ -#define BEHIND 'r' /* color-lookbehind arc */ -#define WBDRY 'w' /* word boundary constraint */ -#define NWBDRY 'W' /* non-word-boundary constraint */ -#define SBEGIN 'A' /* beginning of string (even if not BOL) */ -#define SEND 'Z' /* end of string (even if not EOL) */ -#define PREFER 'P' /* length preference */ +#define EMPTY 'n' /* no token present */ +#define EOS 'e' /* end of string */ +#define PLAIN 'p' /* ordinary character */ +#define DIGIT 'd' /* digit (in bound) */ +#define BACKREF 'b' /* back reference */ +#define COLLEL 'I' /* start of [. */ +#define ECLASS 'E' /* start of [= */ +#define CCLASS 'C' /* start of [: */ +#define END 'X' /* end of [. [= [: */ +#define RANGE 'R' /* - within [] which might be range delim. */ +#define LACON 'L' /* lookahead constraint subRE */ +#define AHEAD 'a' /* color-lookahead arc */ +#define BEHIND 'r' /* color-lookbehind arc */ +#define WBDRY 'w' /* word boundary constraint */ +#define NWBDRY 'W' /* non-word-boundary constraint */ +#define SBEGIN 'A' /* beginning of string (even if not BOL) */ +#define SEND 'Z' /* end of string (even if not EOL) */ +#define PREFER 'P' /* length preference */ /* is an arc colored, and hence on a color chain? */ -#define COLORED(a) ((a)->type == PLAIN || (a)->type == AHEAD || \ +#define COLORED(a) ((a)->type == PLAIN || (a)->type == AHEAD || \ (a)->type == BEHIND) /* static function list */ static struct fns functions = { - rfree, /* regfree insides */ + rfree, /* regfree insides */ }; @@ -284,36 +293,38 @@ static struct fns functions = { */ int pg_regcomp(regex_t *re, - const chr *string, + const chr * string, size_t len, int flags) { struct vars var; struct vars *v = &var; struct guts *g; - int i; - size_t j; + int i; + size_t j; + #ifdef REG_DEBUG - FILE *debug = (flags®_PROGRESS) ? stdout : (FILE *)NULL; + FILE *debug = (flags & REG_PROGRESS) ? stdout : (FILE *) NULL; + #else - FILE *debug = (FILE *) NULL; + FILE *debug = (FILE *) NULL; #endif -# define CNOERR() { if (ISERR()) return freev(v, v->err); } +#define CNOERR() { if (ISERR()) return freev(v, v->err); } /* sanity checks */ if (re == NULL || string == NULL) return REG_INVARG; - if ((flags®_QUOTE) && - (flags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE))) + if ((flags & REG_QUOTE) && + (flags & (REG_ADVANCED | REG_EXPANDED | REG_NEWLINE))) return REG_INVARG; - if (!(flags®_EXTENDED) && (flags®_ADVF)) + if (!(flags & REG_EXTENDED) && (flags & REG_ADVF)) return REG_INVARG; /* initial setup (after which freev() is callable) */ v->re = re; - v->now = (chr *)string; + v->now = (chr *) string; v->stop = v->now + len; v->savenow = v->savestop = NULL; v->err = 0; @@ -336,7 +347,7 @@ pg_regcomp(regex_t *re, v->lacons = NULL; v->nlacons = 0; re->re_magic = REMAGIC; - re->re_info = 0; /* bits get set during parse */ + re->re_info = 0; /* bits get set during parse */ re->re_csize = sizeof(chr); re->re_guts = NULL; re->re_fns = VS(&functions); @@ -345,38 +356,40 @@ pg_regcomp(regex_t *re, re->re_guts = VS(MALLOC(sizeof(struct guts))); if (re->re_guts == NULL) return freev(v, REG_ESPACE); - g = (struct guts *)re->re_guts; + g = (struct guts *) re->re_guts; g->tree = NULL; initcm(v, &g->cmap); v->cm = &g->cmap; g->lacons = NULL; g->nlacons = 0; ZAPCNFA(g->search); - v->nfa = newnfa(v, v->cm, (struct nfa *)NULL); + v->nfa = newnfa(v, v->cm, (struct nfa *) NULL); CNOERR(); v->cv = newcvec(100, 20, 10); if (v->cv == NULL) return freev(v, REG_ESPACE); i = nmcces(v); - if (i > 0) { + if (i > 0) + { v->mcces = newcvec(nleaders(v), 0, i); CNOERR(); v->mcces = allmcces(v, v->mcces); leaders(v, v->mcces); - addmcce(v->mcces, (chr *)NULL, (chr *)NULL); /* dummy */ + addmcce(v->mcces, (chr *) NULL, (chr *) NULL); /* dummy */ } CNOERR(); /* parsing */ - lexstart(v); /* also handles prefixes */ - if ((v->cflags®_NLSTOP) || (v->cflags®_NLANCH)) { + lexstart(v); /* also handles prefixes */ + if ((v->cflags & REG_NLSTOP) || (v->cflags & REG_NLANCH)) + { /* assign newline a unique color */ v->nlcolor = subcolor(v->cm, newline()); okcolors(v->nfa, v->cm); } CNOERR(); v->tree = parse(v, EOS, PLAIN, v->nfa->init, v->nfa->final); - assert(SEE(EOS)); /* even if error; ISERR() => SEE(EOS) */ + assert(SEE(EOS)); /* even if error; ISERR() => SEE(EOS) */ CNOERR(); assert(v->tree != NULL); @@ -384,7 +397,8 @@ pg_regcomp(regex_t *re, specialcolors(v->nfa); CNOERR(); #ifdef REG_DEBUG - if (debug != NULL) { + if (debug != NULL) + { fprintf(debug, "\n\n\n========= RAW ==========\n"); dumpnfa(v->nfa, debug); dumpst(v->tree, debug, 1); @@ -395,7 +409,8 @@ pg_regcomp(regex_t *re, markst(v->tree); cleanst(v); #ifdef REG_DEBUG - if (debug != NULL) { + if (debug != NULL) + { fprintf(debug, "\n\n\n========= TREE FIXED ==========\n"); dumpst(v->tree, debug, 1); } @@ -405,7 +420,8 @@ pg_regcomp(regex_t *re, re->re_info |= nfatree(v, v->tree, debug); CNOERR(); assert(v->nlacons == 0 || v->lacons != NULL); - for (i = 1; i < v->nlacons; i++) { + for (i = 1; i < v->nlacons; i++) + { #ifdef REG_DEBUG if (debug != NULL) fprintf(debug, "\n\n\n========= LA%d ==========\n", i); @@ -413,7 +429,7 @@ pg_regcomp(regex_t *re, nfanode(v, &v->lacons[i], debug); } CNOERR(); - if (v->tree->flags&SHORTER) + if (v->tree->flags & SHORTER) NOTE(REG_USHORTEST); /* build compacted NFAs for tree, lacons, fast search */ @@ -422,7 +438,7 @@ pg_regcomp(regex_t *re, fprintf(debug, "\n\n\n========= SEARCH ==========\n"); #endif /* can sacrifice main NFA now, so use it as work area */ - (DISCARD)optimize(v->nfa, debug); + (DISCARD) optimize(v->nfa, debug); CNOERR(); makesearch(v, v->nfa); CNOERR(); @@ -431,7 +447,7 @@ pg_regcomp(regex_t *re, /* looks okay, package it up */ re->re_nsub = v->nsubexp; - v->re = NULL; /* freev no longer frees re */ + v->re = NULL; /* freev no longer frees re */ g->magic = GUTSMAGIC; g->cflags = v->cflags; g->info = re->re_info; @@ -439,13 +455,13 @@ pg_regcomp(regex_t *re, g->tree = v->tree; v->tree = NULL; g->ntree = v->ntree; - g->compare = (v->cflags®_ICASE) ? casecmp : cmp; + g->compare = (v->cflags & REG_ICASE) ? casecmp : cmp; g->lacons = v->lacons; v->lacons = NULL; g->nlacons = v->nlacons; #ifdef REG_DEBUG - if (flags®_DUMP) + if (flags & REG_DUMP) dump(re, stdout); #endif @@ -457,22 +473,26 @@ pg_regcomp(regex_t *re, * moresubs - enlarge subRE vector */ static void -moresubs(struct vars *v, +moresubs(struct vars * v, int wanted) /* want enough room for this one */ { struct subre **p; - size_t n; + size_t n; - assert(wanted > 0 && (size_t)wanted >= v->nsubs); - n = (size_t)wanted * 3 / 2 + 1; - if (v->subs == v->sub10) { - p = (struct subre **)MALLOC(n * sizeof(struct subre *)); + assert(wanted > 0 && (size_t) wanted >= v->nsubs); + n = (size_t) wanted *3 / 2 + 1; + + if (v->subs == v->sub10) + { + p = (struct subre **) MALLOC(n * sizeof(struct subre *)); if (p != NULL) memcpy(VS(p), VS(v->subs), - v->nsubs * sizeof(struct subre *)); - } else - p = (struct subre **)REALLOC(v->subs, n*sizeof(struct subre *)); - if (p == NULL) { + v->nsubs * sizeof(struct subre *)); + } + else + p = (struct subre **) REALLOC(v->subs, n * sizeof(struct subre *)); + if (p == NULL) + { ERR(REG_ESPACE); return; } @@ -480,7 +500,7 @@ moresubs(struct vars *v, for (p = &v->subs[v->nsubs]; v->nsubs < n; p++, v->nsubs++) *p = NULL; assert(v->nsubs == n); - assert((size_t)wanted < v->nsubs); + assert((size_t) wanted < v->nsubs); } /* @@ -490,7 +510,7 @@ moresubs(struct vars *v, * (if any), to make error-handling code terser. */ static int -freev(struct vars *v, +freev(struct vars * v, int err) { if (v->re != NULL) @@ -511,7 +531,7 @@ freev(struct vars *v, freecvec(v->mcces); if (v->lacons != NULL) freelacons(v->lacons, v->nlacons); - ERR(err); /* nop if err==0 */ + ERR(err); /* nop if err==0 */ return v->err; } @@ -521,8 +541,8 @@ freev(struct vars *v, * NFA must have been optimize()d already. */ static void -makesearch(struct vars *v, - struct nfa *nfa) +makesearch(struct vars * v, + struct nfa * nfa) { struct arc *a; struct arc *b; @@ -532,12 +552,14 @@ makesearch(struct vars *v, struct state *slist; /* no loops are needed if it's anchored */ - for (a = pre->outs; a != NULL; a = a->outchain) { + for (a = pre->outs; a != NULL; a = a->outchain) + { assert(a->type == PLAIN); if (a->co != nfa->bos[0] && a->co != nfa->bos[1]) break; } - if (a != NULL) { + if (a != NULL) + { /* add implicit .* in front */ rainbow(nfa, v->cm, PLAIN, COLORLESS, pre, pre); @@ -548,40 +570,45 @@ makesearch(struct vars *v, /* * Now here's the subtle part. Because many REs have no lookback - * constraints, often knowing when you were in the pre state tells - * you little; it's the next state(s) that are informative. But - * some of them may have other inarcs, i.e. it may be possible to - * make actual progress and then return to one of them. We must - * de-optimize such cases, splitting each such state into progress - * and no-progress states. + * constraints, often knowing when you were in the pre state tells you + * little; it's the next state(s) that are informative. But some of + * them may have other inarcs, i.e. it may be possible to make actual + * progress and then return to one of them. We must de-optimize such + * cases, splitting each such state into progress and no-progress + * states. */ /* first, make a list of the states */ slist = NULL; - for (a = pre->outs; a != NULL; a = a->outchain) { + for (a = pre->outs; a != NULL; a = a->outchain) + { s = a->to; for (b = s->ins; b != NULL; b = b->inchain) if (b->from != pre) break; - if (b != NULL) { /* must be split */ + if (b != NULL) + { /* must be split */ s->tmp = slist; slist = s; } } /* do the splits */ - for (s = slist; s != NULL; s = s2) { + for (s = slist; s != NULL; s = s2) + { s2 = newstate(nfa); copyouts(nfa, s, s2); - for (a = s->ins; a != NULL; a = b) { + for (a = s->ins; a != NULL; a = b) + { b = a->inchain; - if (a->from != pre) { + if (a->from != pre) + { cparc(nfa, a, a->from, s2); freearc(nfa, a); } } s2 = s->tmp; - s->tmp = NULL; /* clean up while we're at it */ + s->tmp = NULL; /* clean up while we're at it */ } } @@ -589,22 +616,22 @@ makesearch(struct vars *v, * parse - parse an RE * * This is actually just the top level, which parses a bunch of branches - * tied together with '|'. They appear in the tree as the left children + * tied together with '|'. They appear in the tree as the left children * of a chain of '|' subres. */ static struct subre * -parse(struct vars *v, - int stopper, /* EOS or ')' */ - int type, /* LACON (lookahead subRE) or PLAIN */ - struct state *init, /* initial state */ - struct state *final) /* final state */ +parse(struct vars * v, + int stopper, /* EOS or ')' */ + int type, /* LACON (lookahead subRE) or PLAIN */ + struct state * init, /* initial state */ + struct state * final) /* final state */ { - struct state *left; /* scaffolding for branch */ + struct state *left; /* scaffolding for branch */ struct state *right; - struct subre *branches; /* top level */ - struct subre *branch; /* current branch */ - struct subre *t; /* temporary */ - int firstbranch; /* is this the first branch? */ + struct subre *branches; /* top level */ + struct subre *branch; /* current branch */ + struct subre *t; /* temporary */ + int firstbranch; /* is this the first branch? */ assert(stopper == ')' || stopper == EOS); @@ -612,8 +639,10 @@ parse(struct vars *v, NOERRN(); branch = branches; firstbranch = 1; - do { /* a branch */ - if (!firstbranch) { + do + { /* a branch */ + if (!firstbranch) + { /* need a place to hang it */ branch->right = subre(v, '|', LONGER, init, final); NOERRN(); @@ -629,25 +658,29 @@ parse(struct vars *v, branch->left = parsebranch(v, stopper, type, left, right, 0); NOERRN(); branch->flags |= UP(branch->flags | branch->left->flags); - if ((branch->flags &~ branches->flags) != 0) /* new flags */ + if ((branch->flags & ~branches->flags) != 0) /* new flags */ for (t = branches; t != branch; t = t->right) t->flags |= branch->flags; } while (EAT('|')); assert(SEE(stopper) || SEE(EOS)); - if (!SEE(stopper)) { + if (!SEE(stopper)) + { assert(stopper == ')' && SEE(EOS)); ERR(REG_EPAREN); } /* optimize out simple cases */ - if (branch == branches) { /* only one branch */ + if (branch == branches) + { /* only one branch */ assert(branch->right == NULL); t = branch->left; branch->left = NULL; freesubre(v, branches); branches = t; - } else if (!MESSY(branches->flags)) { /* no interesting innards */ + } + else if (!MESSY(branches->flags)) + { /* no interesting innards */ freesubre(v, branches->left); branches->left = NULL; freesubre(v, branches->right); @@ -666,23 +699,25 @@ parse(struct vars *v, * ',' nodes introduced only when necessary due to substructure. */ static struct subre * -parsebranch(struct vars *v, - int stopper, /* EOS or ')' */ +parsebranch(struct vars * v, + int stopper, /* EOS or ')' */ int type, /* LACON (lookahead subRE) or PLAIN */ - struct state *left, /* leftmost state */ - struct state *right, /* rightmost state */ - int partial) /* is this only part of a branch? */ + struct state * left, /* leftmost state */ + struct state * right, /* rightmost state */ + int partial) /* is this only part of a branch? */ { - struct state *lp; /* left end of current construct */ - int seencontent; /* is there anything in this branch yet? */ + struct state *lp; /* left end of current construct */ + int seencontent; /* is there anything in this branch yet? */ struct subre *t; lp = left; seencontent = 0; t = subre(v, '=', 0, left, right); /* op '=' is tentative */ NOERRN(); - while (!SEE('|') && !SEE(stopper) && !SEE(EOS)) { - if (seencontent) { /* implicit concat operator */ + while (!SEE('|') && !SEE(stopper) && !SEE(EOS)) + { + if (seencontent) + { /* implicit concat operator */ lp = newstate(v->nfa); NOERRN(); moveins(v->nfa, right, lp); @@ -693,7 +728,8 @@ parsebranch(struct vars *v, parseqatom(v, stopper, type, lp, right, t); } - if (!seencontent) { /* empty branch */ + if (!seencontent) + { /* empty branch */ if (!partial) NOTE(REG_UUNSPEC); assert(lp == left); @@ -711,259 +747,273 @@ parsebranch(struct vars *v, * of the branch, making this function's name somewhat inaccurate. */ static void -parseqatom(struct vars *v, +parseqatom(struct vars * v, int stopper, /* EOS or ')' */ int type, /* LACON (lookahead subRE) or PLAIN */ - struct state *lp, /* left state to hang it on */ - struct state *rp, /* right state to hang it on */ - struct subre *top) /* subtree top */ + struct state * lp, /* left state to hang it on */ + struct state * rp, /* right state to hang it on */ + struct subre * top) /* subtree top */ { - struct state *s; /* temporaries for new states */ + struct state *s; /* temporaries for new states */ struct state *s2; -# define ARCV(t, val) newarc(v->nfa, t, val, lp, rp) - int m, n; - struct subre *atom; /* atom's subtree */ + +#define ARCV(t, val) newarc(v->nfa, t, val, lp, rp) + int m, + n; + struct subre *atom; /* atom's subtree */ struct subre *t; - int cap; /* capturing parens? */ - int pos; /* positive lookahead? */ - int subno; /* capturing-parens or backref number */ - int atomtype; - int qprefer; /* quantifier short/long preference */ - int f; - struct subre **atomp; /* where the pointer to atom is */ + int cap; /* capturing parens? */ + int pos; /* positive lookahead? */ + int subno; /* capturing-parens or backref number */ + int atomtype; + int qprefer; /* quantifier short/long preference */ + int f; + struct subre **atomp; /* where the pointer to atom is */ /* initial bookkeeping */ atom = NULL; - assert(lp->nouts == 0); /* must string new code */ - assert(rp->nins == 0); /* between lp and rp */ - subno = 0; /* just to shut lint up */ + assert(lp->nouts == 0); /* must string new code */ + assert(rp->nins == 0); /* between lp and rp */ + subno = 0; /* just to shut lint up */ /* an atom or constraint... */ atomtype = v->nexttype; - switch (atomtype) { - /* first, constraints, which end by returning */ - case '^': - ARCV('^', 1); - if (v->cflags®_NLANCH) - ARCV(BEHIND, v->nlcolor); - NEXT(); - return; - break; - case '$': - ARCV('$', 1); - if (v->cflags®_NLANCH) - ARCV(AHEAD, v->nlcolor); - NEXT(); - return; - break; - case SBEGIN: - ARCV('^', 1); /* BOL */ - ARCV('^', 0); /* or BOS */ - NEXT(); - return; - break; - case SEND: - ARCV('$', 1); /* EOL */ - ARCV('$', 0); /* or EOS */ - NEXT(); - return; - break; - case '<': - wordchrs(v); /* does NEXT() */ - s = newstate(v->nfa); - NOERR(); - nonword(v, BEHIND, lp, s); - word(v, AHEAD, s, rp); - return; - break; - case '>': - wordchrs(v); /* does NEXT() */ - s = newstate(v->nfa); - NOERR(); - word(v, BEHIND, lp, s); - nonword(v, AHEAD, s, rp); - return; - break; - case WBDRY: - wordchrs(v); /* does NEXT() */ - s = newstate(v->nfa); - NOERR(); - nonword(v, BEHIND, lp, s); - word(v, AHEAD, s, rp); - s = newstate(v->nfa); - NOERR(); - word(v, BEHIND, lp, s); - nonword(v, AHEAD, s, rp); - return; - break; - case NWBDRY: - wordchrs(v); /* does NEXT() */ - s = newstate(v->nfa); - NOERR(); - word(v, BEHIND, lp, s); - word(v, AHEAD, s, rp); - s = newstate(v->nfa); - NOERR(); - nonword(v, BEHIND, lp, s); - nonword(v, AHEAD, s, rp); - return; - break; - case LACON: /* lookahead constraint */ - pos = v->nextvalue; - NEXT(); - s = newstate(v->nfa); - s2 = newstate(v->nfa); - NOERR(); - t = parse(v, ')', LACON, s, s2); - freesubre(v, t); /* internal structure irrelevant */ - assert(SEE(')') || ISERR()); - NEXT(); - n = newlacon(v, s, s2, pos); - NOERR(); - ARCV(LACON, n); - return; - break; - /* then errors, to get them out of the way */ - case '*': - case '+': - case '?': - case '{': - ERR(REG_BADRPT); - return; - break; - default: - ERR(REG_ASSERT); - return; - break; - /* then plain characters, and minor variants on that theme */ - case ')': /* unbalanced paren */ - if ((v->cflags®_ADVANCED) != REG_EXTENDED) { - ERR(REG_EPAREN); + switch (atomtype) + { + /* first, constraints, which end by returning */ + case '^': + ARCV('^', 1); + if (v->cflags & REG_NLANCH) + ARCV(BEHIND, v->nlcolor); + NEXT(); return; - } - /* legal in EREs due to specification botch */ - NOTE(REG_UPBOTCH); - /* fallthrough into case PLAIN */ - case PLAIN: - onechr(v, v->nextvalue, lp, rp); - okcolors(v->nfa, v->cm); - NOERR(); - NEXT(); - break; - case '[': - if (v->nextvalue == 1) - bracket(v, lp, rp); - else - cbracket(v, lp, rp); - assert(SEE(']') || ISERR()); - NEXT(); - break; - case '.': - rainbow(v->nfa, v->cm, PLAIN, - (v->cflags®_NLSTOP) ? v->nlcolor : COLORLESS, - lp, rp); - NEXT(); - break; - /* and finally the ugly stuff */ - case '(': /* value flags as capturing or non */ - cap = (type == LACON) ? 0 : v->nextvalue; - if (cap) { - v->nsubexp++; - subno = v->nsubexp; - if ((size_t)subno >= v->nsubs) - moresubs(v, subno); - assert((size_t)subno < v->nsubs); - } else - atomtype = PLAIN; /* something that's not '(' */ - NEXT(); - /* need new endpoints because tree will contain pointers */ - s = newstate(v->nfa); - s2 = newstate(v->nfa); - NOERR(); - EMPTYARC(lp, s); - EMPTYARC(s2, rp); - NOERR(); - atom = parse(v, ')', PLAIN, s, s2); - assert(SEE(')') || ISERR()); - NEXT(); - NOERR(); - if (cap) { - v->subs[subno] = atom; - t = subre(v, '(', atom->flags|CAP, lp, rp); + break; + case '$': + ARCV('$', 1); + if (v->cflags & REG_NLANCH) + ARCV(AHEAD, v->nlcolor); + NEXT(); + return; + break; + case SBEGIN: + ARCV('^', 1); /* BOL */ + ARCV('^', 0); /* or BOS */ + NEXT(); + return; + break; + case SEND: + ARCV('$', 1); /* EOL */ + ARCV('$', 0); /* or EOS */ + NEXT(); + return; + break; + case '<': + wordchrs(v); /* does NEXT() */ + s = newstate(v->nfa); NOERR(); - t->subno = subno; - t->left = atom; - atom = t; - } - /* postpone everything else pending possible {0} */ - break; - case BACKREF: /* the Feature From The Black Lagoon */ - INSIST(type != LACON, REG_ESUBREG); - INSIST(v->nextvalue < v->nsubs, REG_ESUBREG); - INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG); - NOERR(); - assert(v->nextvalue > 0); - atom = subre(v, 'b', BACKR, lp, rp); - subno = v->nextvalue; - atom->subno = subno; - EMPTYARC(lp, rp); /* temporarily, so there's something */ - NEXT(); - break; + nonword(v, BEHIND, lp, s); + word(v, AHEAD, s, rp); + return; + break; + case '>': + wordchrs(v); /* does NEXT() */ + s = newstate(v->nfa); + NOERR(); + word(v, BEHIND, lp, s); + nonword(v, AHEAD, s, rp); + return; + break; + case WBDRY: + wordchrs(v); /* does NEXT() */ + s = newstate(v->nfa); + NOERR(); + nonword(v, BEHIND, lp, s); + word(v, AHEAD, s, rp); + s = newstate(v->nfa); + NOERR(); + word(v, BEHIND, lp, s); + nonword(v, AHEAD, s, rp); + return; + break; + case NWBDRY: + wordchrs(v); /* does NEXT() */ + s = newstate(v->nfa); + NOERR(); + word(v, BEHIND, lp, s); + word(v, AHEAD, s, rp); + s = newstate(v->nfa); + NOERR(); + nonword(v, BEHIND, lp, s); + nonword(v, AHEAD, s, rp); + return; + break; + case LACON: /* lookahead constraint */ + pos = v->nextvalue; + NEXT(); + s = newstate(v->nfa); + s2 = newstate(v->nfa); + NOERR(); + t = parse(v, ')', LACON, s, s2); + freesubre(v, t); /* internal structure irrelevant */ + assert(SEE(')') || ISERR()); + NEXT(); + n = newlacon(v, s, s2, pos); + NOERR(); + ARCV(LACON, n); + return; + break; + /* then errors, to get them out of the way */ + case '*': + case '+': + case '?': + case '{': + ERR(REG_BADRPT); + return; + break; + default: + ERR(REG_ASSERT); + return; + break; + /* then plain characters, and minor variants on that theme */ + case ')': /* unbalanced paren */ + if ((v->cflags & REG_ADVANCED) != REG_EXTENDED) + { + ERR(REG_EPAREN); + return; + } + /* legal in EREs due to specification botch */ + NOTE(REG_UPBOTCH); + /* fallthrough into case PLAIN */ + case PLAIN: + onechr(v, v->nextvalue, lp, rp); + okcolors(v->nfa, v->cm); + NOERR(); + NEXT(); + break; + case '[': + if (v->nextvalue == 1) + bracket(v, lp, rp); + else + cbracket(v, lp, rp); + assert(SEE(']') || ISERR()); + NEXT(); + break; + case '.': + rainbow(v->nfa, v->cm, PLAIN, + (v->cflags & REG_NLSTOP) ? v->nlcolor : COLORLESS, + lp, rp); + NEXT(); + break; + /* and finally the ugly stuff */ + case '(': /* value flags as capturing or non */ + cap = (type == LACON) ? 0 : v->nextvalue; + if (cap) + { + v->nsubexp++; + subno = v->nsubexp; + if ((size_t) subno >= v->nsubs) + moresubs(v, subno); + assert((size_t) subno < v->nsubs); + } + else + atomtype = PLAIN; /* something that's not '(' */ + NEXT(); + /* need new endpoints because tree will contain pointers */ + s = newstate(v->nfa); + s2 = newstate(v->nfa); + NOERR(); + EMPTYARC(lp, s); + EMPTYARC(s2, rp); + NOERR(); + atom = parse(v, ')', PLAIN, s, s2); + assert(SEE(')') || ISERR()); + NEXT(); + NOERR(); + if (cap) + { + v->subs[subno] = atom; + t = subre(v, '(', atom->flags | CAP, lp, rp); + NOERR(); + t->subno = subno; + t->left = atom; + atom = t; + } + /* postpone everything else pending possible {0} */ + break; + case BACKREF: /* the Feature From The Black Lagoon */ + INSIST(type != LACON, REG_ESUBREG); + INSIST(v->nextvalue < v->nsubs, REG_ESUBREG); + INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG); + NOERR(); + assert(v->nextvalue > 0); + atom = subre(v, 'b', BACKR, lp, rp); + subno = v->nextvalue; + atom->subno = subno; + EMPTYARC(lp, rp); /* temporarily, so there's something */ + NEXT(); + break; } /* ...and an atom may be followed by a quantifier */ - switch (v->nexttype) { - case '*': - m = 0; - n = INFINITY; - qprefer = (v->nextvalue) ? LONGER : SHORTER; - NEXT(); - break; - case '+': - m = 1; - n = INFINITY; - qprefer = (v->nextvalue) ? LONGER : SHORTER; - NEXT(); - break; - case '?': - m = 0; - n = 1; - qprefer = (v->nextvalue) ? LONGER : SHORTER; - NEXT(); - break; - case '{': - NEXT(); - m = scannum(v); - if (EAT(',')) { - if (SEE(DIGIT)) - n = scannum(v); + switch (v->nexttype) + { + case '*': + m = 0; + n = INFINITY; + qprefer = (v->nextvalue) ? LONGER : SHORTER; + NEXT(); + break; + case '+': + m = 1; + n = INFINITY; + qprefer = (v->nextvalue) ? LONGER : SHORTER; + NEXT(); + break; + case '?': + m = 0; + n = 1; + qprefer = (v->nextvalue) ? LONGER : SHORTER; + NEXT(); + break; + case '{': + NEXT(); + m = scannum(v); + if (EAT(',')) + { + if (SEE(DIGIT)) + n = scannum(v); + else + n = INFINITY; + if (m > n) + { + ERR(REG_BADBR); + return; + } + /* {m,n} exercises preference, even if it's {m,m} */ + qprefer = (v->nextvalue) ? LONGER : SHORTER; + } else - n = INFINITY; - if (m > n) { + { + n = m; + /* {m} passes operand's preference through */ + qprefer = 0; + } + if (!SEE('}')) + { /* catches errors too */ ERR(REG_BADBR); return; } - /* {m,n} exercises preference, even if it's {m,m} */ - qprefer = (v->nextvalue) ? LONGER : SHORTER; - } else { - n = m; - /* {m} passes operand's preference through */ + NEXT(); + break; + default: /* no quantifier */ + m = n = 1; qprefer = 0; - } - if (!SEE('}')) { /* catches errors too */ - ERR(REG_BADBR); - return; - } - NEXT(); - break; - default: /* no quantifier */ - m = n = 1; - qprefer = 0; - break; + break; } /* annoying special case: {0} or {0,0} cancels everything */ - if (m == 0 && n == 0) { + if (m == 0 && n == 0) + { if (atom != NULL) freesubre(v, atom); if (atomtype == '(') @@ -976,7 +1026,8 @@ parseqatom(struct vars *v, /* if not a messy case, avoid hard part */ assert(!MESSY(top->flags)); f = top->flags | qprefer | ((atom != NULL) ? atom->flags : 0); - if (atomtype != '(' && atomtype != BACKREF && !MESSY(UP(f))) { + if (atomtype != '(' && atomtype != BACKREF && !MESSY(UP(f))) + { if (!(m == 1 && n == 1)) repeat(v, lp, rp, m, n); if (atom != NULL) @@ -986,13 +1037,14 @@ parseqatom(struct vars *v, } /* - * hard part: something messy - * That is, capturing parens, back reference, short/long clash, or - * an atom with substructure containing one of those. + * hard part: something messy That is, capturing parens, back + * reference, short/long clash, or an atom with substructure + * containing one of those. */ /* now we'll need a subre for the contents even if they're boring */ - if (atom == NULL) { + if (atom == NULL) + { atom = subre(v, '=', 0, lp, rp); NOERR(); } @@ -1000,9 +1052,8 @@ parseqatom(struct vars *v, /* * prepare a general-purpose state skeleton * - * ---> [s] ---prefix---> [begin] ---atom---> [end] ----rest---> [rp] - * / / - * [lp] ----> [s2] ----bypass--------------------- + * ---> [s] ---prefix---> [begin] ---atom---> [end] ----rest---> [rp] / + * / [lp] ----> [s2] ----bypass--------------------- * * where bypass is an empty, and prefix is some repetitions of atom */ @@ -1034,21 +1085,23 @@ parseqatom(struct vars *v, top->right = t; /* if it's a backref, now is the time to replicate the subNFA */ - if (atomtype == BACKREF) { - assert(atom->begin->nouts == 1); /* just the EMPTY */ + if (atomtype == BACKREF) + { + assert(atom->begin->nouts == 1); /* just the EMPTY */ delsub(v->nfa, atom->begin, atom->end); assert(v->subs[subno] != NULL); /* and here's why the recursion got postponed: it must */ /* wait until the skeleton is filled in, because it may */ /* hit a backref that wants to copy the filled-in skeleton */ dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end, - atom->begin, atom->end); + atom->begin, atom->end); NOERR(); } /* it's quantifier time; first, turn x{0,...} into x{1,...}|empty */ - if (m == 0) { - EMPTYARC(s2, atom->end); /* the bypass */ + if (m == 0) + { + EMPTYARC(s2, atom->end); /* the bypass */ assert(PREF(qprefer) != 0); f = COMBINE(qprefer, atom->flags); t = subre(v, '|', f, lp, atom->end); @@ -1064,25 +1117,30 @@ parseqatom(struct vars *v, } /* deal with the rest of the quantifier */ - if (atomtype == BACKREF) { + if (atomtype == BACKREF) + { /* special case: backrefs have internal quantifiers */ - EMPTYARC(s, atom->begin); /* empty prefix */ + EMPTYARC(s, atom->begin); /* empty prefix */ /* just stuff everything into atom */ repeat(v, atom->begin, atom->end, m, n); - atom->min = (short)m; - atom->max = (short)n; + atom->min = (short) m; + atom->max = (short) n; atom->flags |= COMBINE(qprefer, atom->flags); - } else if (m == 1 && n == 1) { + } + else if (m == 1 && n == 1) + { /* no/vacuous quantifier: done */ - EMPTYARC(s, atom->begin); /* empty prefix */ - } else { + EMPTYARC(s, atom->begin); /* empty prefix */ + } + else + { /* turn x{m,n} into x{m-1,n-1}x, with capturing */ - /* parens in only second x */ + /* parens in only second x */ dupnfa(v->nfa, atom->begin, atom->end, s, atom->begin); assert(m >= 1 && m != INFINITY && n >= 1); - repeat(v, s, atom->begin, m-1, (n == INFINITY) ? n : n-1); + repeat(v, s, atom->begin, m - 1, (n == INFINITY) ? n : n - 1); f = COMBINE(qprefer, atom->flags); - t = subre(v, '.', f, s, atom->end); /* prefix and atom */ + t = subre(v, '.', f, s, atom->end); /* prefix and atom */ NOERR(); t->left = subre(v, '=', PREF(f), s, atom->begin); NOERR(); @@ -1094,7 +1152,8 @@ parseqatom(struct vars *v, t = top->right; if (!(SEE('|') || SEE(stopper) || SEE(EOS))) t->right = parsebranch(v, stopper, type, atom->end, rp, 1); - else { + else + { EMPTYARC(atom->end, rp); t->right = subre(v, '=', 0, atom->end, rp); } @@ -1107,12 +1166,12 @@ parseqatom(struct vars *v, * nonword - generate arcs for non-word-character ahead or behind */ static void -nonword(struct vars *v, - int dir, /* AHEAD or BEHIND */ - struct state *lp, - struct state *rp) +nonword(struct vars * v, + int dir, /* AHEAD or BEHIND */ + struct state * lp, + struct state * rp) { - int anchor = (dir == AHEAD) ? '$' : '^'; + int anchor = (dir == AHEAD) ? '$' : '^'; assert(dir == AHEAD || dir == BEHIND); newarc(v->nfa, anchor, 1, lp, rp); @@ -1125,10 +1184,10 @@ nonword(struct vars *v, * word - generate arcs for word character ahead or behind */ static void -word(struct vars *v, - int dir, /* AHEAD or BEHIND */ - struct state *lp, - struct state *rp) +word(struct vars * v, + int dir, /* AHEAD or BEHIND */ + struct state * lp, + struct state * rp) { assert(dir == AHEAD || dir == BEHIND); cloneouts(v->nfa, v->wordchrs, lp, rp, dir); @@ -1138,16 +1197,18 @@ word(struct vars *v, /* * scannum - scan a number */ -static int /* value, <= DUPMAX */ -scannum(struct vars *v) +static int /* value, <= DUPMAX */ +scannum(struct vars * v) { - int n = 0; + int n = 0; - while (SEE(DIGIT) && n < DUPMAX) { - n = n*10 + v->nextvalue; + while (SEE(DIGIT) && n < DUPMAX) + { + n = n * 10 + v->nextvalue; NEXT(); } - if (SEE(DIGIT) || n > DUPMAX) { + if (SEE(DIGIT) || n > DUPMAX) + { ERR(REG_BADBR); return 0; } @@ -1165,83 +1226,84 @@ scannum(struct vars *v) * code in parse(), and when this is called, it doesn't matter any more. */ static void -repeat(struct vars *v, - struct state *lp, - struct state *rp, +repeat(struct vars * v, + struct state * lp, + struct state * rp, int m, int n) { -# define SOME 2 -# define INF 3 -# define PAIR(x, y) ((x)*4 + (y)) -# define REDUCE(x) ( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) ) - const int rm = REDUCE(m); - const int rn = REDUCE(n); +#define SOME 2 +#define INF 3 +#define PAIR(x, y) ((x)*4 + (y)) +#define REDUCE(x) ( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) ) + const int rm = REDUCE(m); + const int rn = REDUCE(n); struct state *s; struct state *s2; - switch (PAIR(rm, rn)) { - case PAIR(0, 0): /* empty string */ - delsub(v->nfa, lp, rp); - EMPTYARC(lp, rp); - break; - case PAIR(0, 1): /* do as x| */ - EMPTYARC(lp, rp); - break; - case PAIR(0, SOME): /* do as x{1,n}| */ - repeat(v, lp, rp, 1, n); - NOERR(); - EMPTYARC(lp, rp); - break; - case PAIR(0, INF): /* loop x around */ - s = newstate(v->nfa); - NOERR(); - moveouts(v->nfa, lp, s); - moveins(v->nfa, rp, s); - EMPTYARC(lp, s); - EMPTYARC(s, rp); - break; - case PAIR(1, 1): /* no action required */ - break; - case PAIR(1, SOME): /* do as x{0,n-1}x = (x{1,n-1}|)x */ - s = newstate(v->nfa); - NOERR(); - moveouts(v->nfa, lp, s); - dupnfa(v->nfa, s, rp, lp, s); - NOERR(); - repeat(v, lp, s, 1, n-1); - NOERR(); - EMPTYARC(lp, s); - break; - case PAIR(1, INF): /* add loopback arc */ - s = newstate(v->nfa); - s2 = newstate(v->nfa); - NOERR(); - moveouts(v->nfa, lp, s); - moveins(v->nfa, rp, s2); - EMPTYARC(lp, s); - EMPTYARC(s2, rp); - EMPTYARC(s2, s); - break; - case PAIR(SOME, SOME): /* do as x{m-1,n-1}x */ - s = newstate(v->nfa); - NOERR(); - moveouts(v->nfa, lp, s); - dupnfa(v->nfa, s, rp, lp, s); - NOERR(); - repeat(v, lp, s, m-1, n-1); - break; - case PAIR(SOME, INF): /* do as x{m-1,}x */ - s = newstate(v->nfa); - NOERR(); - moveouts(v->nfa, lp, s); - dupnfa(v->nfa, s, rp, lp, s); - NOERR(); - repeat(v, lp, s, m-1, n); - break; - default: - ERR(REG_ASSERT); - break; + switch (PAIR(rm, rn)) + { + case PAIR(0, 0): /* empty string */ + delsub(v->nfa, lp, rp); + EMPTYARC(lp, rp); + break; + case PAIR(0, 1): /* do as x| */ + EMPTYARC(lp, rp); + break; + case PAIR(0, SOME): /* do as x{1,n}| */ + repeat(v, lp, rp, 1, n); + NOERR(); + EMPTYARC(lp, rp); + break; + case PAIR(0, INF): /* loop x around */ + s = newstate(v->nfa); + NOERR(); + moveouts(v->nfa, lp, s); + moveins(v->nfa, rp, s); + EMPTYARC(lp, s); + EMPTYARC(s, rp); + break; + case PAIR(1, 1): /* no action required */ + break; + case PAIR(1, SOME): /* do as x{0,n-1}x = (x{1,n-1}|)x */ + s = newstate(v->nfa); + NOERR(); + moveouts(v->nfa, lp, s); + dupnfa(v->nfa, s, rp, lp, s); + NOERR(); + repeat(v, lp, s, 1, n - 1); + NOERR(); + EMPTYARC(lp, s); + break; + case PAIR(1, INF): /* add loopback arc */ + s = newstate(v->nfa); + s2 = newstate(v->nfa); + NOERR(); + moveouts(v->nfa, lp, s); + moveins(v->nfa, rp, s2); + EMPTYARC(lp, s); + EMPTYARC(s2, rp); + EMPTYARC(s2, s); + break; + case PAIR(SOME, SOME): /* do as x{m-1,n-1}x */ + s = newstate(v->nfa); + NOERR(); + moveouts(v->nfa, lp, s); + dupnfa(v->nfa, s, rp, lp, s); + NOERR(); + repeat(v, lp, s, m - 1, n - 1); + break; + case PAIR(SOME, INF): /* do as x{m-1,}x */ + s = newstate(v->nfa); + NOERR(); + moveouts(v->nfa, lp, s); + dupnfa(v->nfa, s, rp, lp, s); + NOERR(); + repeat(v, lp, s, m - 1, n); + break; + default: + ERR(REG_ASSERT); + break; } } @@ -1250,9 +1312,9 @@ repeat(struct vars *v, * Also called from cbracket for complemented bracket expressions. */ static void -bracket(struct vars *v, - struct state *lp, - struct state *rp) +bracket(struct vars * v, + struct state * lp, + struct state * rp) { assert(SEE('[')); NEXT(); @@ -1265,27 +1327,27 @@ bracket(struct vars *v, /* * cbracket - handle complemented bracket expression * We do it by calling bracket() with dummy endpoints, and then complementing - * the result. The alternative would be to invoke rainbow(), and then delete + * the result. The alternative would be to invoke rainbow(), and then delete * arcs as the b.e. is seen... but that gets messy. */ static void -cbracket(struct vars *v, - struct state *lp, - struct state *rp) +cbracket(struct vars * v, + struct state * lp, + struct state * rp) { struct state *left = newstate(v->nfa); struct state *right = newstate(v->nfa); struct state *s; - struct arc *a; /* arc from lp */ - struct arc *ba; /* arc from left, from bracket() */ - struct arc *pa; /* MCCE-prototype arc */ - color co; - chr *p; - int i; + struct arc *a; /* arc from lp */ + struct arc *ba; /* arc from left, from bracket() */ + struct arc *pa; /* MCCE-prototype arc */ + color co; + chr *p; + int i; NOERR(); bracket(v, left, right); - if (v->cflags®_NLSTOP) + if (v->cflags & REG_NLSTOP) newarc(v->nfa, PLAIN, v->nlcolor, left, right); NOERR(); @@ -1294,7 +1356,8 @@ cbracket(struct vars *v, /* easy part of complementing */ colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp); NOERR(); - if (v->mcces == NULL) { /* no MCCEs -- we're done */ + if (v->mcces == NULL) + { /* no MCCEs -- we're done */ dropstate(v->nfa, left); assert(right->nins == 0); freestate(v->nfa, right); @@ -1303,33 +1366,39 @@ cbracket(struct vars *v, /* but complementing gets messy in the presence of MCCEs... */ NOTE(REG_ULOCALE); - for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--) { + for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--) + { co = GETCOLOR(v->cm, *p); a = findarc(lp, PLAIN, co); ba = findarc(left, PLAIN, co); - if (ba == NULL) { + if (ba == NULL) + { assert(a != NULL); freearc(v->nfa, a); - } else { - assert(a == NULL); } + else + assert(a == NULL); s = newstate(v->nfa); NOERR(); newarc(v->nfa, PLAIN, co, lp, s); NOERR(); pa = findarc(v->mccepbegin, PLAIN, co); assert(pa != NULL); - if (ba == NULL) { /* easy case, need all of them */ + if (ba == NULL) + { /* easy case, need all of them */ cloneouts(v->nfa, pa->to, s, rp, PLAIN); newarc(v->nfa, '$', 1, s, rp); newarc(v->nfa, '$', 0, s, rp); colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp); - } else { /* must be selective */ - if (findarc(ba->to, '$', 1) == NULL) { + } + else + { /* must be selective */ + if (findarc(ba->to, '$', 1) == NULL) + { newarc(v->nfa, '$', 1, s, rp); newarc(v->nfa, '$', 0, s, rp); colorcomplement(v->nfa, v->cm, AHEAD, pa->to, - s, rp); + s, rp); } for (pa = pa->to->outs; pa != NULL; pa = pa->outchain) if (findarc(ba->to, PLAIN, pa->co) == NULL) @@ -1346,83 +1415,39 @@ cbracket(struct vars *v, assert(right->nins == 0); freestate(v->nfa, right); } - + /* * brackpart - handle one item (or range) within a bracket expression */ static void -brackpart(struct vars *v, - struct state *lp, - struct state *rp) +brackpart(struct vars * v, + struct state * lp, + struct state * rp) { - celt startc; - celt endc; + celt startc; + celt endc; struct cvec *cv; - chr *startp; - chr *endp; - chr c[1]; + chr *startp; + chr *endp; + chr c[1]; /* parse something, get rid of special cases, take shortcuts */ - switch (v->nexttype) { - case RANGE: /* a-b-c or other botch */ - ERR(REG_ERANGE); - return; - break; - case PLAIN: - c[0] = v->nextvalue; - NEXT(); - /* shortcut for ordinary chr (not range, not MCCE leader) */ - if (!SEE(RANGE) && !ISCELEADER(v, c[0])) { - onechr(v, c[0], lp, rp); + switch (v->nexttype) + { + case RANGE: /* a-b-c or other botch */ + ERR(REG_ERANGE); return; - } - startc = element(v, c, c+1); - NOERR(); - break; - case COLLEL: - startp = v->now; - endp = scanplain(v); - INSIST(startp < endp, REG_ECOLLATE); - NOERR(); - startc = element(v, startp, endp); - NOERR(); - break; - case ECLASS: - startp = v->now; - endp = scanplain(v); - INSIST(startp < endp, REG_ECOLLATE); - NOERR(); - startc = element(v, startp, endp); - NOERR(); - cv = eclass(v, startc, (v->cflags®_ICASE)); - NOERR(); - dovec(v, cv, lp, rp); - return; - break; - case CCLASS: - startp = v->now; - endp = scanplain(v); - INSIST(startp < endp, REG_ECTYPE); - NOERR(); - cv = cclass(v, startp, endp, (v->cflags®_ICASE)); - NOERR(); - dovec(v, cv, lp, rp); - return; - break; - default: - ERR(REG_ASSERT); - return; - break; - } - - if (SEE(RANGE)) { - NEXT(); - switch (v->nexttype) { + break; case PLAIN: - case RANGE: c[0] = v->nextvalue; NEXT(); - endc = element(v, c, c+1); + /* shortcut for ordinary chr (not range, not MCCE leader) */ + if (!SEE(RANGE) && !ISCELEADER(v, c[0])) + { + onechr(v, c[0], lp, rp); + return; + } + startc = element(v, c, c + 1); NOERR(); break; case COLLEL: @@ -1430,25 +1455,74 @@ brackpart(struct vars *v, endp = scanplain(v); INSIST(startp < endp, REG_ECOLLATE); NOERR(); - endc = element(v, startp, endp); + startc = element(v, startp, endp); + NOERR(); + break; + case ECLASS: + startp = v->now; + endp = scanplain(v); + INSIST(startp < endp, REG_ECOLLATE); + NOERR(); + startc = element(v, startp, endp); + NOERR(); + cv = eclass(v, startc, (v->cflags & REG_ICASE)); NOERR(); + dovec(v, cv, lp, rp); + return; + break; + case CCLASS: + startp = v->now; + endp = scanplain(v); + INSIST(startp < endp, REG_ECTYPE); + NOERR(); + cv = cclass(v, startp, endp, (v->cflags & REG_ICASE)); + NOERR(); + dovec(v, cv, lp, rp); + return; break; default: - ERR(REG_ERANGE); + ERR(REG_ASSERT); return; break; + } + + if (SEE(RANGE)) + { + NEXT(); + switch (v->nexttype) + { + case PLAIN: + case RANGE: + c[0] = v->nextvalue; + NEXT(); + endc = element(v, c, c + 1); + NOERR(); + break; + case COLLEL: + startp = v->now; + endp = scanplain(v); + INSIST(startp < endp, REG_ECOLLATE); + NOERR(); + endc = element(v, startp, endp); + NOERR(); + break; + default: + ERR(REG_ERANGE); + return; + break; } - } else + } + else endc = startc; /* - * Ranges are unportable. Actually, standard C does - * guarantee that digits are contiguous, but making - * that an exception is just too complicated. + * Ranges are unportable. Actually, standard C does guarantee that + * digits are contiguous, but making that an exception is just too + * complicated. */ if (startc != endc) NOTE(REG_UUNPORT); - cv = range(v, startc, endc, (v->cflags®_ICASE)); + cv = range(v, startc, endc, (v->cflags & REG_ICASE)); NOERR(); dovec(v, cv, lp, rp); } @@ -1459,16 +1533,17 @@ brackpart(struct vars *v, * Certain bits of trickery in lex.c know that this code does not try * to look past the final bracket of the [. etc. */ -static chr * /* just after end of sequence */ -scanplain(struct vars *v) +static chr * /* just after end of sequence */ +scanplain(struct vars * v) { - chr *endp; + chr *endp; assert(SEE(COLLEL) || SEE(ECLASS) || SEE(CCLASS)); NEXT(); endp = v->now; - while (SEE(PLAIN)) { + while (SEE(PLAIN)) + { endp = v->now; NEXT(); } @@ -1485,12 +1560,12 @@ scanplain(struct vars *v) * certainly necessary, and sets up little disconnected subNFA. */ static void -leaders(struct vars *v, - struct cvec *cv) +leaders(struct vars * v, + struct cvec * cv) { - int mcce; - chr *p; - chr leader; + int mcce; + chr *p; + chr leader; struct state *s; struct arc *a; @@ -1498,16 +1573,20 @@ leaders(struct vars *v, v->mccepend = newstate(v->nfa); NOERR(); - for (mcce = 0; mcce < cv->nmcces; mcce++) { + for (mcce = 0; mcce < cv->nmcces; mcce++) + { p = cv->mcces[mcce]; leader = *p; - if (!haschr(cv, leader)) { + if (!haschr(cv, leader)) + { addchr(cv, leader); s = newstate(v->nfa); newarc(v->nfa, PLAIN, subcolor(v->cm, leader), - v->mccepbegin, s); + v->mccepbegin, s); okcolors(v->nfa, v->cm); - } else { + } + else + { a = findarc(v->mccepbegin, PLAIN, GETCOLOR(v->cm, leader)); assert(a != NULL); @@ -1515,7 +1594,8 @@ leaders(struct vars *v, assert(s != v->mccepend); } p++; - assert(*p != 0 && *(p+1) == 0); /* only 2-char MCCEs for now */ + assert(*p != 0 && *(p + 1) == 0); /* only 2-char MCCEs for + * now */ newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->mccepend); okcolors(v->nfa, v->cm); } @@ -1526,12 +1606,13 @@ leaders(struct vars *v, * This is mostly a shortcut for efficient handling of the common case. */ static void -onechr(struct vars *v, +onechr(struct vars * v, chr c, - struct state *lp, - struct state *rp) + struct state * lp, + struct state * rp) { - if (!(v->cflags®_ICASE)) { + if (!(v->cflags & REG_ICASE)) + { newarc(v->nfa, PLAIN, subcolor(v->cm, c), lp, rp); return; } @@ -1545,42 +1626,50 @@ onechr(struct vars *v, * This one has to handle the messy cases, like MCCEs and MCCE leaders. */ static void -dovec(struct vars *v, - struct cvec *cv, - struct state *lp, - struct state *rp) +dovec(struct vars * v, + struct cvec * cv, + struct state * lp, + struct state * rp) { - chr ch, from, to; - celt ce; - chr *p; - int i; - color co; + chr ch, + from, + to; + celt ce; + chr *p; + int i; + color co; struct cvec *leads; struct arc *a; - struct arc *pa; /* arc in prototype */ + struct arc *pa; /* arc in prototype */ struct state *s; - struct state *ps; /* state in prototype */ + struct state *ps; /* state in prototype */ /* need a place to store leaders, if any */ - if (nmcces(v) > 0) { + if (nmcces(v) > 0) + { assert(v->mcces != NULL); - if (v->cv2 == NULL || v->cv2->nchrs < v->mcces->nchrs) { + if (v->cv2 == NULL || v->cv2->nchrs < v->mcces->nchrs) + { if (v->cv2 != NULL) free(v->cv2); v->cv2 = newcvec(v->mcces->nchrs, 0, v->mcces->nmcces); NOERR(); leads = v->cv2; - } else + } + else leads = clearcvec(v->cv2); - } else + } + else leads = NULL; /* first, get the ordinary characters out of the way */ - for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) { + for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) + { ch = *p; if (!ISCELEADER(v, ch)) newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp); - else { + else + { assert(singleton(v->cm, ch)); assert(leads != NULL); if (!haschr(leads, ch)) @@ -1589,10 +1678,12 @@ dovec(struct vars *v, } /* and the ranges */ - for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) { + for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) + { from = *p; - to = *(p+1); - while (from <= to && (ce = nextleader(v, from, to)) != NOCELT) { + to = *(p + 1); + while (from <= to && (ce = nextleader(v, from, to)) != NOCELT) + { if (from < ce) subrange(v, from, ce - 1, lp, rp); assert(singleton(v->cm, ce)); @@ -1610,12 +1701,14 @@ dovec(struct vars *v, /* deal with the MCCE leaders */ NOTE(REG_ULOCALE); - for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--) { + for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--) + { co = GETCOLOR(v->cm, *p); a = findarc(lp, PLAIN, co); if (a != NULL) s = a->to; - else { + else + { s = newstate(v->nfa); NOERR(); newarc(v->nfa, PLAIN, co, lp, s); @@ -1631,10 +1724,12 @@ dovec(struct vars *v, } /* and the MCCEs */ - for (i = 0; i < cv->nmcces; i++) { + for (i = 0; i < cv->nmcces; i++) + { p = cv->mcces[i]; assert(singleton(v->cm, *p)); - if (!singleton(v->cm, *p)) { + if (!singleton(v->cm, *p)) + { ERR(REG_ASSERT); return; } @@ -1643,17 +1738,18 @@ dovec(struct vars *v, a = findarc(lp, PLAIN, co); if (a != NULL) s = a->to; - else { + else + { s = newstate(v->nfa); NOERR(); newarc(v->nfa, PLAIN, co, lp, s); NOERR(); } - assert(*p != 0); /* at least two chars */ + assert(*p != 0); /* at least two chars */ assert(singleton(v->cm, *p)); ch = *p++; co = GETCOLOR(v->cm, ch); - assert(*p == 0); /* and only two, for now */ + assert(*p == 0); /* and only two, for now */ newarc(v->nfa, PLAIN, co, s, rp); NOERR(); } @@ -1662,20 +1758,21 @@ dovec(struct vars *v, /* * nextleader - find next MCCE leader within range */ -static celt /* NOCELT means none */ -nextleader(struct vars *v, +static celt /* NOCELT means none */ +nextleader(struct vars * v, chr from, chr to) { - int i; - chr *p; - chr ch; - celt it = NOCELT; + int i; + chr *p; + chr ch; + celt it = NOCELT; if (v->mcces == NULL) return it; - for (i = v->mcces->nchrs, p = v->mcces->chrs; i > 0; i--, p++) { + for (i = v->mcces->nchrs, p = v->mcces->chrs; i > 0; i--, p++) + { ch = *p; if (from <= ch && ch <= to) if (it == NOCELT || ch < it) @@ -1694,20 +1791,21 @@ nextleader(struct vars *v, * should be cleaned up to reduce dependencies on input scanning. */ static void -wordchrs(struct vars *v) +wordchrs(struct vars * v) { struct state *left; struct state *right; - if (v->wordchrs != NULL) { - NEXT(); /* for consistency */ + if (v->wordchrs != NULL) + { + NEXT(); /* for consistency */ return; } left = newstate(v->nfa); right = newstate(v->nfa); NOERR(); - /* fine point: implemented with [::], and lexer will set REG_ULOCALE */ + /* fine point: implemented with [::], and lexer will set REG_ULOCALE */ lexword(v); NEXT(); assert(v->savenow != NULL && SEE('[')); @@ -1722,20 +1820,22 @@ wordchrs(struct vars *v) * subre - allocate a subre */ static struct subre * -subre(struct vars *v, +subre(struct vars * v, int op, int flags, - struct state *begin, - struct state *end) + struct state * begin, + struct state * end) { struct subre *ret; ret = v->treefree; if (ret != NULL) v->treefree = ret->left; - else { - ret = (struct subre *)MALLOC(sizeof(struct subre)); - if (ret == NULL) { + else + { + ret = (struct subre *) MALLOC(sizeof(struct subre)); + if (ret == NULL) + { ERR(REG_ESPACE); return NULL; } @@ -1763,8 +1863,8 @@ subre(struct vars *v, * freesubre - free a subRE subtree */ static void -freesubre(struct vars *v, /* might be NULL */ - struct subre *sr) +freesubre(struct vars * v, /* might be NULL */ + struct subre * sr) { if (sr == NULL) return; @@ -1781,8 +1881,8 @@ freesubre(struct vars *v, /* might be NULL */ * freesrnode - free one node in a subRE subtree */ static void -freesrnode(struct vars *v, /* might be NULL */ - struct subre *sr) +freesrnode(struct vars * v, /* might be NULL */ + struct subre * sr) { if (sr == NULL) return; @@ -1791,10 +1891,12 @@ freesrnode(struct vars *v, /* might be NULL */ freecnfa(&sr->cnfa); sr->flags = 0; - if (v != NULL) { + if (v != NULL) + { sr->left = v->treefree; v->treefree = sr; - } else + } + else FREE(sr); } @@ -1802,8 +1904,8 @@ freesrnode(struct vars *v, /* might be NULL */ * optst - optimize a subRE subtree */ static void -optst(struct vars *v, - struct subre *t) +optst(struct vars * v, + struct subre * t) { if (t == NULL) return; @@ -1818,16 +1920,16 @@ optst(struct vars *v, /* * numst - number tree nodes (assigning retry indexes) */ -static int /* next number */ -numst(struct subre *t, - int start) /* starting point for subtree numbers */ +static int /* next number */ +numst(struct subre * t, + int start) /* starting point for subtree numbers */ { - int i; + int i; assert(t != NULL); i = start; - t->retry = (short)i++; + t->retry = (short) i++; if (t->left != NULL) i = numst(t->left, i); if (t->right != NULL) @@ -1839,7 +1941,7 @@ numst(struct subre *t, * markst - mark tree nodes as INUSE */ static void -markst(struct subre *t) +markst(struct subre * t) { assert(t != NULL); @@ -1854,34 +1956,35 @@ markst(struct subre *t) * cleanst - free any tree nodes not marked INUSE */ static void -cleanst(struct vars *v) +cleanst(struct vars * v) { struct subre *t; struct subre *next; - for (t = v->treechain; t != NULL; t = next) { + for (t = v->treechain; t != NULL; t = next) + { next = t->chain; - if (!(t->flags&INUSE)) + if (!(t->flags & INUSE)) FREE(t); } v->treechain = NULL; - v->treefree = NULL; /* just on general principles */ + v->treefree = NULL; /* just on general principles */ } /* * nfatree - turn a subRE subtree into a tree of compacted NFAs */ -static long /* optimize results from top node */ -nfatree(struct vars *v, - struct subre *t, +static long /* optimize results from top node */ +nfatree(struct vars * v, + struct subre * t, FILE *f) /* for debug output */ { assert(t != NULL && t->begin != NULL); if (t->left != NULL) - (DISCARD)nfatree(v, t->left, f); + (DISCARD) nfatree(v, t->left, f); if (t->right != NULL) - (DISCARD)nfatree(v, t->right, f); + (DISCARD) nfatree(v, t->right, f); return nfanode(v, t, f); } @@ -1889,29 +1992,30 @@ nfatree(struct vars *v, /* * nfanode - do one NFA for nfatree */ -static long /* optimize results */ -nfanode(struct vars *v, - struct subre *t, +static long /* optimize results */ +nfanode(struct vars * v, + struct subre * t, FILE *f) /* for debug output */ { struct nfa *nfa; - long ret = 0; + long ret = 0; assert(t->begin != NULL); #ifdef REG_DEBUG if (f != NULL) { - char idbuf[50]; + char idbuf[50]; fprintf(f, "\n\n\n========= TREE NODE %s ==========\n", - stid(t, idbuf, sizeof(idbuf))); + stid(t, idbuf, sizeof(idbuf))); } #endif nfa = newnfa(v, v->cm, v->nfa); NOERRZ(); dupnfa(nfa, t->begin, t->end, nfa->init, nfa->final); - if (!ISERR()) { + if (!ISERR()) + { specialcolors(nfa); ret = optimize(nfa, f); } @@ -1925,25 +2029,29 @@ nfanode(struct vars *v, /* * newlacon - allocate a lookahead-constraint subRE */ -static int /* lacon number */ -newlacon(struct vars *v, - struct state *begin, - struct state *end, +static int /* lacon number */ +newlacon(struct vars * v, + struct state * begin, + struct state * end, int pos) { - int n; + int n; struct subre *sub; - if (v->nlacons == 0) { - v->lacons = (struct subre *)MALLOC(2 * sizeof(struct subre)); - n = 1; /* skip 0th */ + if (v->nlacons == 0) + { + v->lacons = (struct subre *) MALLOC(2 * sizeof(struct subre)); + n = 1; /* skip 0th */ v->nlacons = 2; - } else { - v->lacons = (struct subre *)REALLOC(v->lacons, - (v->nlacons+1)*sizeof(struct subre)); + } + else + { + v->lacons = (struct subre *) REALLOC(v->lacons, + (v->nlacons + 1) * sizeof(struct subre)); n = v->nlacons++; } - if (v->lacons == NULL) { + if (v->lacons == NULL) + { ERR(REG_ESPACE); return 0; } @@ -1959,11 +2067,11 @@ newlacon(struct vars *v, * freelacons - free lookahead-constraint subRE vector */ static void -freelacons(struct subre *subs, +freelacons(struct subre * subs, int n) { struct subre *sub; - int i; + int i; assert(n > 0); for (sub = subs + 1, i = n - 1; i > 0; sub++, i--) /* no 0th */ @@ -1983,14 +2091,14 @@ rfree(regex_t *re) if (re == NULL || re->re_magic != REMAGIC) return; - re->re_magic = 0; /* invalidate RE */ - g = (struct guts *)re->re_guts; + re->re_magic = 0; /* invalidate RE */ + g = (struct guts *) re->re_guts; re->re_guts = NULL; re->re_fns = NULL; g->magic = 0; freecm(&g->cmap); if (g->tree != NULL) - freesubre((struct vars *)NULL, g->tree); + freesubre((struct vars *) NULL, g->tree); if (g->lacons != NULL) freelacons(g->lacons, g->nlacons); if (!NULLCNFA(g->search)) @@ -2008,30 +2116,33 @@ dump(regex_t *re, FILE *f) { struct guts *g; - int i; + int i; if (re->re_magic != REMAGIC) fprintf(f, "bad magic number (0x%x not 0x%x)\n", re->re_magic, - REMAGIC); - if (re->re_guts == NULL) { + REMAGIC); + if (re->re_guts == NULL) + { fprintf(f, "NULL guts!!!\n"); return; } - g = (struct guts *)re->re_guts; + g = (struct guts *) re->re_guts; if (g->magic != GUTSMAGIC) fprintf(f, "bad guts magic number (0x%x not 0x%x)\n", g->magic, - GUTSMAGIC); + GUTSMAGIC); fprintf(f, "\n\n\n========= DUMP ==========\n"); - fprintf(f, "nsub %d, info 0%lo, csize %d, ntree %d\n", - re->re_nsub, re->re_info, re->re_csize, g->ntree); + fprintf(f, "nsub %d, info 0%lo, csize %d, ntree %d\n", + re->re_nsub, re->re_info, re->re_csize, g->ntree); dumpcolors(&g->cmap, f); - if (!NULLCNFA(g->search)) { + if (!NULLCNFA(g->search)) + { printf("\nsearch:\n"); dumpcnfa(&g->search, f); } - for (i = 1; i < g->nlacons; i++) { + for (i = 1; i < g->nlacons; i++) + { fprintf(f, "\nla%d (%s):\n", i, (g->lacons[i].subno) ? "positive" : "negative"); dumpcnfa(&g->lacons[i].cnfa, f); @@ -2044,7 +2155,7 @@ dump(regex_t *re, * dumpst - dump a subRE tree */ static void -dumpst(struct subre *t, +dumpst(struct subre * t, FILE *f, int nfapresent) /* is the original NFA still around? */ { @@ -2059,40 +2170,42 @@ dumpst(struct subre *t, * stdump - recursive guts of dumpst */ static void -stdump(struct subre *t, +stdump(struct subre * t, FILE *f, int nfapresent) /* is the original NFA still around? */ { - char idbuf[50]; + char idbuf[50]; fprintf(f, "%s. `%c'", stid(t, idbuf, sizeof(idbuf)), t->op); - if (t->flags&LONGER) + if (t->flags & LONGER) fprintf(f, " longest"); - if (t->flags&SHORTER) + if (t->flags & SHORTER) fprintf(f, " shortest"); - if (t->flags&MIXED) + if (t->flags & MIXED) fprintf(f, " hasmixed"); - if (t->flags&CAP) + if (t->flags & CAP) fprintf(f, " hascapture"); - if (t->flags&BACKR) + if (t->flags & BACKR) fprintf(f, " hasbackref"); - if (!(t->flags&INUSE)) + if (!(t->flags & INUSE)) fprintf(f, " UNUSED"); if (t->subno != 0) fprintf(f, " (#%d)", t->subno); - if (t->min != 1 || t->max != 1) { + if (t->min != 1 || t->max != 1) + { fprintf(f, " {%d,", t->min); if (t->max != INFINITY) fprintf(f, "%d", t->max); fprintf(f, "}"); } if (nfapresent) - fprintf(f, " %ld-%ld", (long)t->begin->no, (long)t->end->no); + fprintf(f, " %ld-%ld", (long) t->begin->no, (long) t->end->no); if (t->left != NULL) fprintf(f, " L:%s", stid(t->left, idbuf, sizeof(idbuf))); if (t->right != NULL) fprintf(f, " R:%s", stid(t->right, idbuf, sizeof(idbuf))); - if (!NULLCNFA(t->cnfa)) { + if (!NULLCNFA(t->cnfa)) + { fprintf(f, "\n"); dumpcnfa(&t->cnfa, f); fprintf(f, "\n"); @@ -2106,22 +2219,21 @@ stdump(struct subre *t, /* * stid - identify a subtree node for dumping */ -static char * /* points to buf or constant string */ -stid(struct subre *t, +static char * /* points to buf or constant string */ +stid(struct subre * t, char *buf, size_t bufsize) { /* big enough for hex int or decimal t->retry? */ - if (bufsize < sizeof(int)*2 + 3 || bufsize < sizeof(t->retry)*3 + 1) + if (bufsize < sizeof(int) * 2 + 3 || bufsize < sizeof(t->retry) * 3 + 1) return "unable"; if (t->retry != 0) sprintf(buf, "%d", t->retry); else - sprintf(buf, "0x%x", (int)t); /* may lose bits, that's okay */ + sprintf(buf, "0x%x", (int) t); /* may lose bits, that's okay */ return buf; } - -#endif /* REG_DEBUG */ +#endif /* REG_DEBUG */ #include "regc_lex.c" diff --git a/src/backend/regex/rege_dfa.c b/src/backend/regex/rege_dfa.c index 3bdfc2ab182..6004462c934 100644 --- a/src/backend/regex/rege_dfa.c +++ b/src/backend/regex/rege_dfa.c @@ -2,21 +2,21 @@ * DFA routines * This file is #included by regexec.c. * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * + * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. - * + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -28,27 +28,27 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Header: /cvsroot/pgsql/src/backend/regex/rege_dfa.c,v 1.1 2003/02/05 17:41:33 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/regex/rege_dfa.c,v 1.2 2003/08/04 00:43:21 momjian Exp $ * */ /* * longest - longest-preferred matching engine */ -static chr * /* endpoint, or NULL */ -longest(struct vars *v, /* used only for debug and exec flags */ - struct dfa *d, - chr *start, /* where the match should start */ - chr *stop, /* match must end at or before here */ +static chr * /* endpoint, or NULL */ +longest(struct vars * v, /* used only for debug and exec flags */ + struct dfa * d, + chr * start, /* where the match should start */ + chr * stop, /* match must end at or before here */ int *hitstopp) /* record whether hit v->stop, if non-NULL */ { - chr *cp; - chr *realstop = (stop == v->stop) ? stop : stop + 1; - color co; + chr *cp; + chr *realstop = (stop == v->stop) ? stop : stop + 1; + color co; struct sset *css; struct sset *ss; - chr *post; - int i; + chr *post; + int i; struct colormap *cm = d->cm; /* initialize */ @@ -59,12 +59,15 @@ longest(struct vars *v, /* used only for debug and exec flags */ /* startup */ FDEBUG(("+++ startup +++\n")); - if (cp == v->start) { - co = d->cnfa->bos[(v->eflags®_NOTBOL) ? 0 : 1]; - FDEBUG(("color %ld\n", (long)co)); - } else { + if (cp == v->start) + { + co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1]; + FDEBUG(("color %ld\n", (long) co)); + } + else + { co = GETCOLOR(cm, *(cp - 1)); - FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co)); + FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co)); } css = miss(v, d, css, co, cp, start); if (css == NULL) @@ -72,29 +75,33 @@ longest(struct vars *v, /* used only for debug and exec flags */ css->lastseen = cp; /* main loop */ - if (v->eflags®_FTRACE) - while (cp < realstop) { + if (v->eflags & REG_FTRACE) + while (cp < realstop) + { FDEBUG(("+++ at c%d +++\n", css - d->ssets)); co = GETCOLOR(cm, *cp); - FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co)); + FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co)); ss = css->outs[co]; - if (ss == NULL) { - ss = miss(v, d, css, co, cp+1, start); + if (ss == NULL) + { + ss = miss(v, d, css, co, cp + 1, start); if (ss == NULL) - break; /* NOTE BREAK OUT */ + break; /* NOTE BREAK OUT */ } cp++; ss->lastseen = cp; css = ss; } else - while (cp < realstop) { + while (cp < realstop) + { co = GETCOLOR(cm, *cp); ss = css->outs[co]; - if (ss == NULL) { - ss = miss(v, d, css, co, cp+1, start); + if (ss == NULL) + { + ss = miss(v, d, css, co, cp + 1, start); if (ss == NULL) - break; /* NOTE BREAK OUT */ + break; /* NOTE BREAK OUT */ } cp++; ss->lastseen = cp; @@ -103,14 +110,15 @@ longest(struct vars *v, /* used only for debug and exec flags */ /* shutdown */ FDEBUG(("+++ shutdown at c%d +++\n", css - d->ssets)); - if (cp == v->stop && stop == v->stop) { + if (cp == v->stop && stop == v->stop) + { if (hitstopp != NULL) *hitstopp = 1; - co = d->cnfa->eos[(v->eflags®_NOTEOL) ? 0 : 1]; - FDEBUG(("color %ld\n", (long)co)); + co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1]; + FDEBUG(("color %ld\n", (long) co)); ss = miss(v, d, css, co, cp, start); /* special case: match ended at eol? */ - if (ss != NULL && (ss->flags&POSTSTATE)) + if (ss != NULL && (ss->flags & POSTSTATE)) return cp; else if (ss != NULL) ss->lastseen = cp; /* to be tidy */ @@ -119,10 +127,10 @@ longest(struct vars *v, /* used only for debug and exec flags */ /* find last match, if any */ post = d->lastpost; for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--) - if ((ss->flags&POSTSTATE) && post != ss->lastseen && - (post == NULL || post < ss->lastseen)) + if ((ss->flags & POSTSTATE) && post != ss->lastseen && + (post == NULL || post < ss->lastseen)) post = ss->lastseen; - if (post != NULL) /* found one */ + if (post != NULL) /* found one */ return post - 1; return NULL; @@ -131,19 +139,20 @@ longest(struct vars *v, /* used only for debug and exec flags */ /* * shortest - shortest-preferred matching engine */ -static chr * /* endpoint, or NULL */ -shortest(struct vars *v, - struct dfa *d, - chr *start, /* where the match should start */ - chr *min, /* match must end at or after here */ - chr *max, /* match must end at or before here */ - chr **coldp, /* store coldstart pointer here, if nonNULL */ +static chr * /* endpoint, or NULL */ +shortest(struct vars * v, + struct dfa * d, + chr * start, /* where the match should start */ + chr * min, /* match must end at or after here */ + chr * max, /* match must end at or before here */ + chr ** coldp, /* store coldstart pointer here, if + * nonNULL */ int *hitstopp) /* record whether hit v->stop, if non-NULL */ { - chr *cp; - chr *realmin = (min == v->stop) ? min : min + 1; - chr *realmax = (max == v->stop) ? max : max + 1; - color co; + chr *cp; + chr *realmin = (min == v->stop) ? min : min + 1; + chr *realmax = (max == v->stop) ? max : max + 1; + color co; struct sset *css; struct sset *ss; struct colormap *cm = d->cm; @@ -156,12 +165,15 @@ shortest(struct vars *v, /* startup */ FDEBUG(("--- startup ---\n")); - if (cp == v->start) { - co = d->cnfa->bos[(v->eflags®_NOTBOL) ? 0 : 1]; - FDEBUG(("color %ld\n", (long)co)); - } else { + if (cp == v->start) + { + co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1]; + FDEBUG(("color %ld\n", (long) co)); + } + else + { co = GETCOLOR(cm, *(cp - 1)); - FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co)); + FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co)); } css = miss(v, d, css, co, cp, start); if (css == NULL) @@ -170,58 +182,66 @@ shortest(struct vars *v, ss = css; /* main loop */ - if (v->eflags®_FTRACE) - while (cp < realmax) { + if (v->eflags & REG_FTRACE) + while (cp < realmax) + { FDEBUG(("--- at c%d ---\n", css - d->ssets)); co = GETCOLOR(cm, *cp); - FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co)); + FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co)); ss = css->outs[co]; - if (ss == NULL) { - ss = miss(v, d, css, co, cp+1, start); + if (ss == NULL) + { + ss = miss(v, d, css, co, cp + 1, start); if (ss == NULL) - break; /* NOTE BREAK OUT */ + break; /* NOTE BREAK OUT */ } cp++; ss->lastseen = cp; css = ss; - if ((ss->flags&POSTSTATE) && cp >= realmin) - break; /* NOTE BREAK OUT */ + if ((ss->flags & POSTSTATE) && cp >= realmin) + break; /* NOTE BREAK OUT */ } else - while (cp < realmax) { + while (cp < realmax) + { co = GETCOLOR(cm, *cp); ss = css->outs[co]; - if (ss == NULL) { - ss = miss(v, d, css, co, cp+1, start); + if (ss == NULL) + { + ss = miss(v, d, css, co, cp + 1, start); if (ss == NULL) - break; /* NOTE BREAK OUT */ + break; /* NOTE BREAK OUT */ } cp++; ss->lastseen = cp; css = ss; - if ((ss->flags&POSTSTATE) && cp >= realmin) - break; /* NOTE BREAK OUT */ + if ((ss->flags & POSTSTATE) && cp >= realmin) + break; /* NOTE BREAK OUT */ } if (ss == NULL) return NULL; - if (coldp != NULL) /* report last no-progress state set, if any */ + if (coldp != NULL) /* report last no-progress state set, if + * any */ *coldp = lastcold(v, d); - if ((ss->flags&POSTSTATE) && cp > min) { + if ((ss->flags & POSTSTATE) && cp > min) + { assert(cp >= realmin); cp--; - } else if (cp == v->stop && max == v->stop) { - co = d->cnfa->eos[(v->eflags®_NOTEOL) ? 0 : 1]; - FDEBUG(("color %ld\n", (long)co)); + } + else if (cp == v->stop && max == v->stop) + { + co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1]; + FDEBUG(("color %ld\n", (long) co)); ss = miss(v, d, css, co, cp, start); /* match might have ended at eol */ - if ((ss == NULL || !(ss->flags&POSTSTATE)) && hitstopp != NULL) + if ((ss == NULL || !(ss->flags & POSTSTATE)) && hitstopp != NULL) *hitstopp = 1; } - if (ss == NULL || !(ss->flags&POSTSTATE)) + if (ss == NULL || !(ss->flags & POSTSTATE)) return NULL; return cp; @@ -230,19 +250,19 @@ shortest(struct vars *v, /* * lastcold - determine last point at which no progress had been made */ -static chr * /* endpoint, or NULL */ -lastcold(struct vars *v, - struct dfa *d) +static chr * /* endpoint, or NULL */ +lastcold(struct vars * v, + struct dfa * d) { struct sset *ss; - chr *nopr; - int i; + chr *nopr; + int i; nopr = d->lastnopr; if (nopr == NULL) nopr = v->start; for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--) - if ((ss->flags&NOPROGRESS) && nopr < ss->lastseen) + if ((ss->flags & NOPROGRESS) && nopr < ss->lastseen) nopr = ss->lastseen; return nopr; } @@ -251,24 +271,27 @@ lastcold(struct vars *v, * newdfa - set up a fresh DFA */ static struct dfa * -newdfa(struct vars *v, - struct cnfa *cnfa, - struct colormap *cm, - struct smalldfa *small) /* preallocated space, may be NULL */ +newdfa(struct vars * v, + struct cnfa * cnfa, + struct colormap * cm, + struct smalldfa * small) /* preallocated space, may be NULL */ { struct dfa *d; - size_t nss = cnfa->nstates * 2; - int wordsper = (cnfa->nstates + UBITS - 1) / UBITS; + size_t nss = cnfa->nstates * 2; + int wordsper = (cnfa->nstates + UBITS - 1) / UBITS; struct smalldfa *smallwas = small; assert(cnfa != NULL && cnfa->nstates != 0); - if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS) { + if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS) + { assert(wordsper == 1); - if (small == NULL) { - small = (struct smalldfa *)MALLOC( - sizeof(struct smalldfa)); - if (small == NULL) { + if (small == NULL) + { + small = (struct smalldfa *) MALLOC( + sizeof(struct smalldfa)); + if (small == NULL) + { ERR(REG_ESPACE); return NULL; } @@ -280,32 +303,36 @@ newdfa(struct vars *v, d->outsarea = small->outsarea; d->incarea = small->incarea; d->cptsmalloced = 0; - d->mallocarea = (smallwas == NULL) ? (char *)small : NULL; - } else { - d = (struct dfa *)MALLOC(sizeof(struct dfa)); - if (d == NULL) { + d->mallocarea = (smallwas == NULL) ? (char *) small : NULL; + } + else + { + d = (struct dfa *) MALLOC(sizeof(struct dfa)); + if (d == NULL) + { ERR(REG_ESPACE); return NULL; } - d->ssets = (struct sset *)MALLOC(nss * sizeof(struct sset)); - d->statesarea = (unsigned *)MALLOC((nss+WORK) * wordsper * - sizeof(unsigned)); + d->ssets = (struct sset *) MALLOC(nss * sizeof(struct sset)); + d->statesarea = (unsigned *) MALLOC((nss + WORK) * wordsper * + sizeof(unsigned)); d->work = &d->statesarea[nss * wordsper]; - d->outsarea = (struct sset **)MALLOC(nss * cnfa->ncolors * - sizeof(struct sset *)); - d->incarea = (struct arcp *)MALLOC(nss * cnfa->ncolors * - sizeof(struct arcp)); + d->outsarea = (struct sset **) MALLOC(nss * cnfa->ncolors * + sizeof(struct sset *)); + d->incarea = (struct arcp *) MALLOC(nss * cnfa->ncolors * + sizeof(struct arcp)); d->cptsmalloced = 1; - d->mallocarea = (char *)d; + d->mallocarea = (char *) d; if (d->ssets == NULL || d->statesarea == NULL || - d->outsarea == NULL || d->incarea == NULL) { + d->outsarea == NULL || d->incarea == NULL) + { freedfa(d); ERR(REG_ESPACE); return NULL; } } - d->nssets = (v->eflags®_SMALL) ? 7 : nss; + d->nssets = (v->eflags & REG_SMALL) ? 7 : nss; d->nssused = 0; d->nstates = cnfa->nstates; d->ncolors = cnfa->ncolors; @@ -325,9 +352,10 @@ newdfa(struct vars *v, * freedfa - free a DFA */ static void -freedfa(struct dfa *d) +freedfa(struct dfa * d) { - if (d->cptsmalloced) { + if (d->cptsmalloced) + { if (d->ssets != NULL) FREE(d->ssets); if (d->statesarea != NULL) @@ -351,8 +379,8 @@ static unsigned hash(unsigned *uv, int n) { - int i; - unsigned h; + int i; + unsigned h; h = 0; for (i = 0; i < n; i++) @@ -364,24 +392,25 @@ hash(unsigned *uv, * initialize - hand-craft a cache entry for startup, otherwise get ready */ static struct sset * -initialize(struct vars *v, /* used only for debug flags */ - struct dfa *d, - chr *start) +initialize(struct vars * v, /* used only for debug flags */ + struct dfa * d, + chr * start) { struct sset *ss; - int i; + int i; /* is previous one still there? */ - if (d->nssused > 0 && (d->ssets[0].flags&STARTER)) + if (d->nssused > 0 && (d->ssets[0].flags & STARTER)) ss = &d->ssets[0]; - else { /* no, must (re)build it */ + else + { /* no, must (re)build it */ ss = getvacant(v, d, start, start); for (i = 0; i < d->wordsper; i++) ss->states[i] = 0; BSET(ss->states, d->cnfa->pre); ss->hash = HASH(ss->states, d->wordsper); assert(d->cnfa->pre != d->cnfa->post); - ss->flags = STARTER|LOCKED|NOPROGRESS; + ss->flags = STARTER | LOCKED | NOPROGRESS; /* lastseen dealt with below */ } @@ -396,27 +425,28 @@ initialize(struct vars *v, /* used only for debug flags */ /* * miss - handle a cache miss */ -static struct sset * /* NULL if goes to empty set */ -miss(struct vars *v, /* used only for debug flags */ - struct dfa *d, - struct sset *css, +static struct sset * /* NULL if goes to empty set */ +miss(struct vars * v, /* used only for debug flags */ + struct dfa * d, + struct sset * css, pcolor co, - chr *cp, /* next chr */ - chr *start) /* where the attempt got started */ + chr * cp, /* next chr */ + chr * start) /* where the attempt got started */ { struct cnfa *cnfa = d->cnfa; - int i; - unsigned h; + int i; + unsigned h; struct carc *ca; struct sset *p; - int ispost; - int noprogress; - int gotstate; - int dolacons; - int sawlacons; + int ispost; + int noprogress; + int gotstate; + int dolacons; + int sawlacons; /* for convenience, we can be called even if it might not be a miss */ - if (css->outs[co] != NULL) { + if (css->outs[co] != NULL) + { FDEBUG(("hit\n")); return css->outs[co]; } @@ -430,8 +460,9 @@ miss(struct vars *v, /* used only for debug flags */ gotstate = 0; for (i = 0; i < d->nstates; i++) if (ISBSET(css->states, i)) - for (ca = cnfa->states[i]+1; ca->co != COLORLESS; ca++) - if (ca->co == co) { + for (ca = cnfa->states[i] + 1; ca->co != COLORLESS; ca++) + if (ca->co == co) + { BSET(d->work, ca->to); gotstate = 1; if (ca->to == cnfa->post) @@ -440,21 +471,23 @@ miss(struct vars *v, /* used only for debug flags */ noprogress = 0; FDEBUG(("%d -> %d\n", i, ca->to)); } - dolacons = (gotstate) ? (cnfa->flags&HASLACONS) : 0; + dolacons = (gotstate) ? (cnfa->flags & HASLACONS) : 0; sawlacons = 0; - while (dolacons) { /* transitive closure */ + while (dolacons) + { /* transitive closure */ dolacons = 0; for (i = 0; i < d->nstates; i++) if (ISBSET(d->work, i)) - for (ca = cnfa->states[i]+1; ca->co != COLORLESS; - ca++) { + for (ca = cnfa->states[i] + 1; ca->co != COLORLESS; + ca++) + { if (ca->co <= cnfa->ncolors) - continue; /* NOTE CONTINUE */ + continue; /* NOTE CONTINUE */ sawlacons = 1; if (ISBSET(d->work, ca->to)) - continue; /* NOTE CONTINUE */ + continue; /* NOTE CONTINUE */ if (!lacon(v, cnfa, cp, ca->co)) - continue; /* NOTE CONTINUE */ + continue; /* NOTE CONTINUE */ BSET(d->work, ca->to); dolacons = 1; if (ca->to == cnfa->post) @@ -470,11 +503,13 @@ miss(struct vars *v, /* used only for debug flags */ /* next, is that in the cache? */ for (p = d->ssets, i = d->nssused; i > 0; p++, i--) - if (HIT(h, d->work, p, d->wordsper)) { + if (HIT(h, d->work, p, d->wordsper)) + { FDEBUG(("cached c%d\n", p - d->ssets)); - break; /* NOTE BREAK OUT */ + break; /* NOTE BREAK OUT */ } - if (i == 0) { /* nope, need a new cache entry */ + if (i == 0) + { /* nope, need a new cache entry */ p = getvacant(v, d, cp, start); assert(p != css); for (i = 0; i < d->wordsper; i++) @@ -486,12 +521,13 @@ miss(struct vars *v, /* used only for debug flags */ /* lastseen to be dealt with by caller */ } - if (!sawlacons) { /* lookahead conds. always cache miss */ + if (!sawlacons) + { /* lookahead conds. always cache miss */ FDEBUG(("c%d[%d]->c%d\n", css - d->ssets, co, p - d->ssets)); css->outs[co] = p; css->inchain[co] = p->ins; p->ins.ss = css; - p->ins.co = (color)co; + p->ins.co = (color) co; } return p; } @@ -499,28 +535,29 @@ miss(struct vars *v, /* used only for debug flags */ /* * lacon - lookahead-constraint checker for miss() */ -static int /* predicate: constraint satisfied? */ -lacon(struct vars *v, - struct cnfa *pcnfa, /* parent cnfa */ - chr *cp, - pcolor co) /* "color" of the lookahead constraint */ +static int /* predicate: constraint satisfied? */ +lacon(struct vars * v, + struct cnfa * pcnfa, /* parent cnfa */ + chr * cp, + pcolor co) /* "color" of the lookahead constraint */ { - int n; + int n; struct subre *sub; struct dfa *d; struct smalldfa sd; - chr *end; + chr *end; n = co - pcnfa->ncolors; assert(n < v->g->nlacons && v->g->lacons != NULL); FDEBUG(("=== testing lacon %d\n", n)); sub = &v->g->lacons[n]; d = newdfa(v, &sub->cnfa, &v->g->cmap, &sd); - if (d == NULL) { + if (d == NULL) + { ERR(REG_ESPACE); return 0; } - end = longest(v, d, cp, v->stop, (int *)NULL); + end = longest(v, d, cp, v->stop, (int *) NULL); freedfa(d); FDEBUG(("=== lacon %d match %d\n", n, (end != NULL))); return (sub->subno) ? (end != NULL) : (end == NULL); @@ -532,46 +569,49 @@ lacon(struct vars *v, * clear the innards of the state set -- that's up to the caller. */ static struct sset * -getvacant(struct vars *v, /* used only for debug flags */ - struct dfa *d, - chr *cp, - chr *start) +getvacant(struct vars * v, /* used only for debug flags */ + struct dfa * d, + chr * cp, + chr * start) { - int i; + int i; struct sset *ss; struct sset *p; struct arcp ap; struct arcp lastap; - color co; + color co; ss = pickss(v, d, cp, start); - assert(!(ss->flags&LOCKED)); + assert(!(ss->flags & LOCKED)); /* clear out its inarcs, including self-referential ones */ ap = ss->ins; - while ((p = ap.ss) != NULL) { + while ((p = ap.ss) != NULL) + { co = ap.co; - FDEBUG(("zapping c%d's %ld outarc\n", p - d->ssets, (long)co)); + FDEBUG(("zapping c%d's %ld outarc\n", p - d->ssets, (long) co)); p->outs[co] = NULL; ap = p->inchain[co]; - p->inchain[co].ss = NULL; /* paranoia */ + p->inchain[co].ss = NULL; /* paranoia */ } ss->ins.ss = NULL; /* take it off the inarc chains of the ssets reached by its outarcs */ - for (i = 0; i < d->ncolors; i++) { + for (i = 0; i < d->ncolors; i++) + { p = ss->outs[i]; assert(p != ss); /* not self-referential */ if (p == NULL) - continue; /* NOTE CONTINUE */ + continue; /* NOTE CONTINUE */ FDEBUG(("del outarc %d from c%d's in chn\n", i, p - d->ssets)); if (p->ins.ss == ss && p->ins.co == i) p->ins = ss->inchain[i]; - else { + else + { assert(p->ins.ss != NULL); for (ap = p->ins; ap.ss != NULL && - !(ap.ss == ss && ap.co == i); - ap = ap.ss->inchain[ap.co]) + !(ap.ss == ss && ap.co == i); + ap = ap.ss->inchain[ap.co]) lastap = ap; assert(ap.ss != NULL); lastap.ss->inchain[lastap.co] = ss->inchain[i]; @@ -581,13 +621,13 @@ getvacant(struct vars *v, /* used only for debug flags */ } /* if ss was a success state, may need to remember location */ - if ((ss->flags&POSTSTATE) && ss->lastseen != d->lastpost && - (d->lastpost == NULL || d->lastpost < ss->lastseen)) + if ((ss->flags & POSTSTATE) && ss->lastseen != d->lastpost && + (d->lastpost == NULL || d->lastpost < ss->lastseen)) d->lastpost = ss->lastseen; /* likewise for a no-progress state */ - if ((ss->flags&NOPROGRESS) && ss->lastseen != d->lastnopr && - (d->lastnopr == NULL || d->lastnopr < ss->lastseen)) + if ((ss->flags & NOPROGRESS) && ss->lastseen != d->lastnopr && + (d->lastnopr == NULL || d->lastnopr < ss->lastseen)) d->lastnopr = ss->lastseen; return ss; @@ -597,18 +637,19 @@ getvacant(struct vars *v, /* used only for debug flags */ * pickss - pick the next stateset to be used */ static struct sset * -pickss(struct vars *v, /* used only for debug flags */ - struct dfa *d, - chr *cp, - chr *start) +pickss(struct vars * v, /* used only for debug flags */ + struct dfa * d, + chr * cp, + chr * start) { - int i; + int i; struct sset *ss; struct sset *end; - chr *ancient; + chr *ancient; /* shortcut for cases where cache isn't full */ - if (d->nssused < d->nssets) { + if (d->nssused < d->nssets) + { i = d->nssused; d->nssused++; ss = &d->ssets[i]; @@ -620,7 +661,8 @@ pickss(struct vars *v, /* used only for debug flags */ ss->ins.co = WHITE; /* give it some value */ ss->outs = &d->outsarea[i * d->ncolors]; ss->inchain = &d->incarea[i * d->ncolors]; - for (i = 0; i < d->ncolors; i++) { + for (i = 0; i < d->ncolors; i++) + { ss->outs[i] = NULL; ss->inchain[i].ss = NULL; } @@ -628,20 +670,22 @@ pickss(struct vars *v, /* used only for debug flags */ } /* look for oldest, or old enough anyway */ - if (cp - start > d->nssets*2/3) /* oldest 33% are expendable */ - ancient = cp - d->nssets*2/3; + if (cp - start > d->nssets * 2 / 3) /* oldest 33% are expendable */ + ancient = cp - d->nssets * 2 / 3; else ancient = start; for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++) if ((ss->lastseen == NULL || ss->lastseen < ancient) && - !(ss->flags&LOCKED)) { + !(ss->flags & LOCKED)) + { d->search = ss + 1; FDEBUG(("replacing c%d\n", ss - d->ssets)); return ss; } for (ss = d->ssets, end = d->search; ss < end; ss++) if ((ss->lastseen == NULL || ss->lastseen < ancient) && - !(ss->flags&LOCKED)) { + !(ss->flags & LOCKED)) + { d->search = ss + 1; FDEBUG(("replacing c%d\n", ss - d->ssets)); return ss; diff --git a/src/backend/regex/regerror.c b/src/backend/regex/regerror.c index 94693eba211..a0a9d3f0410 100644 --- a/src/backend/regex/regerror.c +++ b/src/backend/regex/regerror.c @@ -1,21 +1,21 @@ /* * regerror - error-code expansion * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * + * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. - * + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -27,7 +27,7 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Header: /cvsroot/pgsql/src/backend/regex/regerror.c,v 1.25 2003/02/05 17:41:33 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/regex/regerror.c,v 1.26 2003/08/04 00:43:21 momjian Exp $ * */ @@ -37,72 +37,82 @@ static char unk[] = "*** unknown regex error code 0x%x ***"; /* struct to map among codes, code names, and explanations */ -static struct rerr { - int code; - char *name; - char *explain; -} rerrs[] = { +static struct rerr +{ + int code; + char *name; + char *explain; +} rerrs[] = + +{ /* the actual table is built from regex.h */ #include "regex/regerrs.h" - { -1, "", "oops" }, /* explanation special-cased in code */ + { + -1, "", "oops" + }, /* explanation special-cased in code */ }; /* * pg_regerror - the interface to error numbers */ /* ARGSUSED */ -size_t /* actual space needed (including NUL) */ +size_t /* actual space needed (including NUL) */ pg_regerror(int errcode, /* error code, or REG_ATOI or REG_ITOA */ - const regex_t *preg, /* associated regex_t (unused at present) */ + const regex_t *preg, /* associated regex_t (unused at present) */ char *errbuf, /* result buffer (unless errbuf_size==0) */ - size_t errbuf_size) /* available space in errbuf, can be 0 */ + size_t errbuf_size) /* available space in errbuf, can be 0 */ { struct rerr *r; - char *msg; - char convbuf[sizeof(unk)+50]; /* 50 = plenty for int */ - size_t len; - int icode; + char *msg; + char convbuf[sizeof(unk) + 50]; /* 50 = plenty for int */ + size_t len; + int icode; - switch (errcode) { - case REG_ATOI: /* convert name to number */ - for (r = rerrs; r->code >= 0; r++) - if (strcmp(r->name, errbuf) == 0) - break; - sprintf(convbuf, "%d", r->code); /* -1 for unknown */ - msg = convbuf; - break; - case REG_ITOA: /* convert number to name */ - icode = atoi(errbuf); /* not our problem if this fails */ - for (r = rerrs; r->code >= 0; r++) - if (r->code == icode) - break; - if (r->code >= 0) - msg = r->name; - else { /* unknown; tell him the number */ - sprintf(convbuf, "REG_%u", (unsigned)icode); + switch (errcode) + { + case REG_ATOI: /* convert name to number */ + for (r = rerrs; r->code >= 0; r++) + if (strcmp(r->name, errbuf) == 0) + break; + sprintf(convbuf, "%d", r->code); /* -1 for unknown */ msg = convbuf; - } - break; - default: /* a real, normal error code */ - for (r = rerrs; r->code >= 0; r++) - if (r->code == errcode) - break; - if (r->code >= 0) - msg = r->explain; - else { /* unknown; say so */ - sprintf(convbuf, unk, errcode); - msg = convbuf; - } - break; + break; + case REG_ITOA: /* convert number to name */ + icode = atoi(errbuf); /* not our problem if this fails */ + for (r = rerrs; r->code >= 0; r++) + if (r->code == icode) + break; + if (r->code >= 0) + msg = r->name; + else + { /* unknown; tell him the number */ + sprintf(convbuf, "REG_%u", (unsigned) icode); + msg = convbuf; + } + break; + default: /* a real, normal error code */ + for (r = rerrs; r->code >= 0; r++) + if (r->code == errcode) + break; + if (r->code >= 0) + msg = r->explain; + else + { /* unknown; say so */ + sprintf(convbuf, unk, errcode); + msg = convbuf; + } + break; } len = strlen(msg) + 1; /* space needed, including NUL */ - if (errbuf_size > 0) { + if (errbuf_size > 0) + { if (errbuf_size > len) strcpy(errbuf, msg); - else { /* truncate to fit */ - strncpy(errbuf, msg, errbuf_size-1); - errbuf[errbuf_size-1] = '\0'; + else + { /* truncate to fit */ + strncpy(errbuf, msg, errbuf_size - 1); + errbuf[errbuf_size - 1] = '\0'; } } diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c index eef01b0bd58..535501ff0b7 100644 --- a/src/backend/regex/regexec.c +++ b/src/backend/regex/regexec.c @@ -1,21 +1,21 @@ /* * re_*exec and friends - match REs * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * + * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. - * + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -27,7 +27,7 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Header: /cvsroot/pgsql/src/backend/regex/regexec.c,v 1.21 2003/02/05 17:41:33 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/regex/regexec.c,v 1.22 2003/08/04 00:43:21 momjian Exp $ * */ @@ -36,87 +36,95 @@ /* lazy-DFA representation */ -struct arcp { /* "pointer" to an outarc */ +struct arcp +{ /* "pointer" to an outarc */ struct sset *ss; - color co; + color co; }; -struct sset { /* state set */ - unsigned *states; /* pointer to bitvector */ - unsigned hash; /* hash of bitvector */ -# define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw)) -# define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \ +struct sset +{ /* state set */ + unsigned *states; /* pointer to bitvector */ + unsigned hash; /* hash of bitvector */ +#define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw)) +#define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \ memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0)) - int flags; -# define STARTER 01 /* the initial state set */ -# define POSTSTATE 02 /* includes the goal state */ -# define LOCKED 04 /* locked in cache */ -# define NOPROGRESS 010 /* zero-progress state set */ - struct arcp ins; /* chain of inarcs pointing here */ - chr *lastseen; /* last entered on arrival here */ - struct sset **outs; /* outarc vector indexed by color */ - struct arcp *inchain; /* chain-pointer vector for outarcs */ + int flags; +#define STARTER 01 /* the initial state set */ +#define POSTSTATE 02 /* includes the goal state */ +#define LOCKED 04 /* locked in cache */ +#define NOPROGRESS 010 /* zero-progress state set */ + struct arcp ins; /* chain of inarcs pointing here */ + chr *lastseen; /* last entered on arrival here */ + struct sset **outs; /* outarc vector indexed by color */ + struct arcp *inchain; /* chain-pointer vector for outarcs */ }; -struct dfa { - int nssets; /* size of cache */ - int nssused; /* how many entries occupied yet */ - int nstates; /* number of states */ - int ncolors; /* length of outarc and inchain vectors */ - int wordsper; /* length of state-set bitvectors */ - struct sset *ssets; /* state-set cache */ - unsigned *statesarea; /* bitvector storage */ - unsigned *work; /* pointer to work area within statesarea */ - struct sset **outsarea; /* outarc-vector storage */ - struct arcp *incarea; /* inchain storage */ +struct dfa +{ + int nssets; /* size of cache */ + int nssused; /* how many entries occupied yet */ + int nstates; /* number of states */ + int ncolors; /* length of outarc and inchain vectors */ + int wordsper; /* length of state-set bitvectors */ + struct sset *ssets; /* state-set cache */ + unsigned *statesarea; /* bitvector storage */ + unsigned *work; /* pointer to work area within statesarea */ + struct sset **outsarea; /* outarc-vector storage */ + struct arcp *incarea; /* inchain storage */ struct cnfa *cnfa; struct colormap *cm; - chr *lastpost; /* location of last cache-flushed success */ - chr *lastnopr; /* location of last cache-flushed NOPROGRESS */ - struct sset *search; /* replacement-search-pointer memory */ - int cptsmalloced; /* were the areas individually malloced? */ - char *mallocarea; /* self, or master malloced area, or NULL */ + chr *lastpost; /* location of last cache-flushed success */ + chr *lastnopr; /* location of last cache-flushed + * NOPROGRESS */ + struct sset *search; /* replacement-search-pointer memory */ + int cptsmalloced; /* were the areas individually malloced? */ + char *mallocarea; /* self, or master malloced area, or NULL */ }; -#define WORK 1 /* number of work bitvectors needed */ +#define WORK 1 /* number of work bitvectors needed */ /* setup for non-malloc allocation for small cases */ -#define FEWSTATES 20 /* must be less than UBITS */ -#define FEWCOLORS 15 -struct smalldfa { - struct dfa dfa; - struct sset ssets[FEWSTATES*2]; - unsigned statesarea[FEWSTATES*2 + WORK]; - struct sset *outsarea[FEWSTATES*2 * FEWCOLORS]; - struct arcp incarea[FEWSTATES*2 * FEWCOLORS]; +#define FEWSTATES 20 /* must be less than UBITS */ +#define FEWCOLORS 15 +struct smalldfa +{ + struct dfa dfa; + struct sset ssets[FEWSTATES * 2]; + unsigned statesarea[FEWSTATES * 2 + WORK]; + struct sset *outsarea[FEWSTATES * 2 * FEWCOLORS]; + struct arcp incarea[FEWSTATES * 2 * FEWCOLORS]; }; -#define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */ + +#define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */ /* internal variables, bundled for easy passing around */ -struct vars { - regex_t *re; +struct vars +{ + regex_t *re; struct guts *g; - int eflags; /* copies of arguments */ - size_t nmatch; + int eflags; /* copies of arguments */ + size_t nmatch; regmatch_t *pmatch; rm_detail_t *details; - chr *start; /* start of string */ - chr *stop; /* just past end of string */ - int err; /* error code if any (0 none) */ - regoff_t *mem; /* memory vector for backtracking */ + chr *start; /* start of string */ + chr *stop; /* just past end of string */ + int err; /* error code if any (0 none) */ + regoff_t *mem; /* memory vector for backtracking */ struct smalldfa dfa1; struct smalldfa dfa2; }; -#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */ -#define ISERR() VISERR(v) -#define VERR(vv,e) (((vv)->err) ? (vv)->err : ((vv)->err = (e))) -#define ERR(e) VERR(v, e) /* record an error */ -#define NOERR() {if (ISERR()) return v->err;} /* if error seen, return it */ -#define OFF(p) ((p) - v->start) -#define LOFF(p) ((long)OFF(p)) +#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */ +#define ISERR() VISERR(v) +#define VERR(vv,e) (((vv)->err) ? (vv)->err : ((vv)->err = (e))) +#define ERR(e) VERR(v, e) /* record an error */ +#define NOERR() {if (ISERR()) return v->err;} /* if error seen, return + * it */ +#define OFF(p) ((p) - v->start) +#define LOFF(p) ((long)OFF(p)) @@ -124,32 +132,33 @@ struct vars { * forward declarations */ /* === regexec.c === */ -static int find (struct vars *, struct cnfa *, struct colormap *); -static int cfind (struct vars *, struct cnfa *, struct colormap *); -static int cfindloop (struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **); -static void zapsubs (regmatch_t *, size_t); -static void zapmem (struct vars *, struct subre *); -static void subset (struct vars *, struct subre *, chr *, chr *); -static int dissect (struct vars *, struct subre *, chr *, chr *); -static int condissect (struct vars *, struct subre *, chr *, chr *); -static int altdissect (struct vars *, struct subre *, chr *, chr *); -static int cdissect (struct vars *, struct subre *, chr *, chr *); -static int ccondissect (struct vars *, struct subre *, chr *, chr *); -static int crevdissect (struct vars *, struct subre *, chr *, chr *); -static int cbrdissect (struct vars *, struct subre *, chr *, chr *); -static int caltdissect (struct vars *, struct subre *, chr *, chr *); +static int find(struct vars *, struct cnfa *, struct colormap *); +static int cfind(struct vars *, struct cnfa *, struct colormap *); +static int cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **); +static void zapsubs(regmatch_t *, size_t); +static void zapmem(struct vars *, struct subre *); +static void subset(struct vars *, struct subre *, chr *, chr *); +static int dissect(struct vars *, struct subre *, chr *, chr *); +static int condissect(struct vars *, struct subre *, chr *, chr *); +static int altdissect(struct vars *, struct subre *, chr *, chr *); +static int cdissect(struct vars *, struct subre *, chr *, chr *); +static int ccondissect(struct vars *, struct subre *, chr *, chr *); +static int crevdissect(struct vars *, struct subre *, chr *, chr *); +static int cbrdissect(struct vars *, struct subre *, chr *, chr *); +static int caltdissect(struct vars *, struct subre *, chr *, chr *); + /* === rege_dfa.c === */ -static chr *longest (struct vars *, struct dfa *, chr *, chr *, int *); -static chr *shortest (struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *); -static chr *lastcold (struct vars *, struct dfa *); -static struct dfa *newdfa (struct vars *, struct cnfa *, struct colormap *, struct smalldfa *); -static void freedfa (struct dfa *); -static unsigned hash (unsigned *, int); -static struct sset *initialize (struct vars *, struct dfa *, chr *); -static struct sset *miss (struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *); -static int lacon (struct vars *, struct cnfa *, chr *, pcolor); -static struct sset *getvacant (struct vars *, struct dfa *, chr *, chr *); -static struct sset *pickss (struct vars *, struct dfa *, chr *, chr *); +static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *); +static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *); +static chr *lastcold(struct vars *, struct dfa *); +static struct dfa *newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *); +static void freedfa(struct dfa *); +static unsigned hash(unsigned *, int); +static struct sset *initialize(struct vars *, struct dfa *, chr *); +static struct sset *miss(struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *); +static int lacon(struct vars *, struct cnfa *, chr *, pcolor); +static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *); +static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *); /* @@ -157,22 +166,24 @@ static struct sset *pickss (struct vars *, struct dfa *, chr *, chr *); */ int pg_regexec(regex_t *re, - const chr *string, + const chr * string, size_t len, - rm_detail_t *details, + rm_detail_t * details, size_t nmatch, regmatch_t pmatch[], int flags) { struct vars var; register struct vars *v = &var; - int st; - size_t n; - int backref; -# define LOCALMAT 20 - regmatch_t mat[LOCALMAT]; -# define LOCALMEM 40 - regoff_t mem[LOCALMEM]; + int st; + size_t n; + int backref; + +#define LOCALMAT 20 + regmatch_t mat[LOCALMAT]; + +#define LOCALMEM 40 + regoff_t mem[LOCALMEM]; /* sanity checks */ if (re == NULL || string == NULL || re->re_magic != REMAGIC) @@ -182,46 +193,51 @@ pg_regexec(regex_t *re, /* setup */ v->re = re; - v->g = (struct guts *)re->re_guts; - if ((v->g->cflags®_EXPECT) && details == NULL) + v->g = (struct guts *) re->re_guts; + if ((v->g->cflags & REG_EXPECT) && details == NULL) return REG_INVARG; - if (v->g->info®_UIMPOSSIBLE) + if (v->g->info & REG_UIMPOSSIBLE) return REG_NOMATCH; - backref = (v->g->info®_UBACKREF) ? 1 : 0; + backref = (v->g->info & REG_UBACKREF) ? 1 : 0; v->eflags = flags; - if (v->g->cflags®_NOSUB) - nmatch = 0; /* override client */ + if (v->g->cflags & REG_NOSUB) + nmatch = 0; /* override client */ v->nmatch = nmatch; - if (backref) { + if (backref) + { /* need work area */ if (v->g->nsub + 1 <= LOCALMAT) v->pmatch = mat; else - v->pmatch = (regmatch_t *)MALLOC((v->g->nsub + 1) * - sizeof(regmatch_t)); + v->pmatch = (regmatch_t *) MALLOC((v->g->nsub + 1) * + sizeof(regmatch_t)); if (v->pmatch == NULL) return REG_ESPACE; v->nmatch = v->g->nsub + 1; - } else + } + else v->pmatch = pmatch; v->details = details; - v->start = (chr *)string; - v->stop = (chr *)string + len; + v->start = (chr *) string; + v->stop = (chr *) string + len; v->err = 0; - if (backref) { + if (backref) + { /* need retry memory */ assert(v->g->ntree >= 0); - n = (size_t)v->g->ntree; + n = (size_t) v->g->ntree; if (n <= LOCALMEM) v->mem = mem; else - v->mem = (regoff_t *)MALLOC(n*sizeof(regoff_t)); - if (v->mem == NULL) { + v->mem = (regoff_t *) MALLOC(n * sizeof(regoff_t)); + if (v->mem == NULL) + { if (v->pmatch != pmatch && v->pmatch != mat) FREE(v->pmatch); return REG_ESPACE; } - } else + } + else v->mem = NULL; /* do it */ @@ -232,10 +248,11 @@ pg_regexec(regex_t *re, st = find(v, &v->g->tree->cnfa, &v->g->cmap); /* copy (portion of) match vector over if necessary */ - if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) { + if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) + { zapsubs(pmatch, nmatch); n = (nmatch < v->nmatch) ? nmatch : v->nmatch; - memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t)); + memcpy(VS(pmatch), VS(v->pmatch), n * sizeof(regmatch_t)); } /* clean up */ @@ -250,19 +267,20 @@ pg_regexec(regex_t *re, * find - find a match for the main NFA (no-complications case) */ static int -find(struct vars *v, - struct cnfa *cnfa, - struct colormap *cm) +find(struct vars * v, + struct cnfa * cnfa, + struct colormap * cm) { struct dfa *s; struct dfa *d; - chr *begin; - chr *end = NULL; - chr *cold; - chr *open; /* open and close of range of possible starts */ - chr *close; - int hitend; - int shorter = (v->g->tree->flags&SHORTER) ? 1 : 0; + chr *begin; + chr *end = NULL; + chr *cold; + chr *open; /* open and close of range of possible + * starts */ + chr *close; + int hitend; + int shorter = (v->g->tree->flags & SHORTER) ? 1 : 0; /* first, a shot with the search RE */ s = newdfa(v, &v->g->search, cm, &v->dfa1); @@ -270,20 +288,21 @@ find(struct vars *v, NOERR(); MDEBUG(("\nsearch at %ld\n", LOFF(v->start))); cold = NULL; - close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *)NULL); + close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *) NULL); freedfa(s); NOERR(); - if (v->g->cflags®_EXPECT) { + if (v->g->cflags & REG_EXPECT) + { assert(v->details != NULL); if (cold != NULL) v->details->rm_extend.rm_so = OFF(cold); else v->details->rm_extend.rm_so = OFF(v->stop); - v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ + v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ } - if (close == NULL) /* not found */ + if (close == NULL) /* not found */ return REG_NOMATCH; - if (v->nmatch == 0) /* found, don't need exact location */ + if (v->nmatch == 0) /* found, don't need exact location */ return REG_OKAY; /* find starting point and match */ @@ -294,18 +313,19 @@ find(struct vars *v, d = newdfa(v, cnfa, cm, &v->dfa1); assert(!(ISERR() && d != NULL)); NOERR(); - for (begin = open; begin <= close; begin++) { + for (begin = open; begin <= close; begin++) + { MDEBUG(("\nfind trying at %ld\n", LOFF(begin))); if (shorter) end = shortest(v, d, begin, begin, v->stop, - (chr **)NULL, &hitend); + (chr **) NULL, &hitend); else end = longest(v, d, begin, v->stop, &hitend); NOERR(); if (hitend && cold == NULL) cold = begin; if (end != NULL) - break; /* NOTE BREAK OUT */ + break; /* NOTE BREAK OUT */ } assert(end != NULL); /* search RE succeeded so loop should */ freedfa(d); @@ -314,14 +334,15 @@ find(struct vars *v, assert(v->nmatch > 0); v->pmatch[0].rm_so = OFF(begin); v->pmatch[0].rm_eo = OFF(end); - if (v->g->cflags®_EXPECT) { + if (v->g->cflags & REG_EXPECT) + { if (cold != NULL) v->details->rm_extend.rm_so = OFF(cold); else v->details->rm_extend.rm_so = OFF(v->stop); - v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ + v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ } - if (v->nmatch == 1) /* no need for submatches */ + if (v->nmatch == 1) /* no need for submatches */ return REG_OKAY; /* submatches */ @@ -333,19 +354,20 @@ find(struct vars *v, * cfind - find a match for the main NFA (with complications) */ static int -cfind(struct vars *v, - struct cnfa *cnfa, - struct colormap *cm) +cfind(struct vars * v, + struct cnfa * cnfa, + struct colormap * cm) { struct dfa *s; struct dfa *d; - chr *cold; - int ret; + chr *cold; + int ret; s = newdfa(v, &v->g->search, cm, &v->dfa1); NOERR(); d = newdfa(v, cnfa, cm, &v->dfa2); - if (ISERR()) { + if (ISERR()) + { assert(d == NULL); freedfa(s); return v->err; @@ -356,13 +378,14 @@ cfind(struct vars *v, freedfa(d); freedfa(s); NOERR(); - if (v->g->cflags®_EXPECT) { + if (v->g->cflags & REG_EXPECT) + { assert(v->details != NULL); if (cold != NULL) v->details->rm_extend.rm_so = OFF(cold); else v->details->rm_extend.rm_so = OFF(v->stop); - v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ + v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ } return ret; } @@ -371,47 +394,51 @@ cfind(struct vars *v, * cfindloop - the heart of cfind */ static int -cfindloop(struct vars *v, - struct cnfa *cnfa, - struct colormap *cm, - struct dfa *d, - struct dfa *s, - chr **coldp) /* where to put coldstart pointer */ +cfindloop(struct vars * v, + struct cnfa * cnfa, + struct colormap * cm, + struct dfa * d, + struct dfa * s, + chr ** coldp) /* where to put coldstart pointer */ { - chr *begin; - chr *end; - chr *cold; - chr *open; /* open and close of range of possible starts */ - chr *close; - chr *estart; - chr *estop; - int er; - int shorter = v->g->tree->flags&SHORTER; - int hitend; + chr *begin; + chr *end; + chr *cold; + chr *open; /* open and close of range of possible + * starts */ + chr *close; + chr *estart; + chr *estop; + int er; + int shorter = v->g->tree->flags & SHORTER; + int hitend; assert(d != NULL && s != NULL); cold = NULL; close = v->start; - do { + do + { MDEBUG(("\ncsearch at %ld\n", LOFF(close))); - close = shortest(v, s, close, close, v->stop, &cold, (int *)NULL); + close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL); if (close == NULL) break; /* NOTE BREAK */ assert(cold != NULL); open = cold; cold = NULL; MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close))); - for (begin = open; begin <= close; begin++) { + for (begin = open; begin <= close; begin++) + { MDEBUG(("\ncfind trying at %ld\n", LOFF(begin))); estart = begin; estop = v->stop; - for (;;) { + for (;;) + { if (shorter) end = shortest(v, d, begin, estart, - estop, (chr **)NULL, &hitend); + estop, (chr **) NULL, &hitend); else end = longest(v, d, begin, estop, - &hitend); + &hitend); if (hitend && cold == NULL) cold = begin; if (end == NULL) @@ -420,19 +447,23 @@ cfindloop(struct vars *v, zapsubs(v->pmatch, v->nmatch); zapmem(v, v->g->tree); er = cdissect(v, v->g->tree, begin, end); - if (er == REG_OKAY) { - if (v->nmatch > 0) { + if (er == REG_OKAY) + { + if (v->nmatch > 0) + { v->pmatch[0].rm_so = OFF(begin); v->pmatch[0].rm_eo = OFF(end); } *coldp = cold; return REG_OKAY; } - if (er != REG_NOMATCH) { + if (er != REG_NOMATCH) + { ERR(er); return er; } - if ((shorter) ? end == estop : end == begin) { + if ((shorter) ? end == estop : end == begin) + { /* no point in trying again */ *coldp = cold; return REG_NOMATCH; @@ -457,9 +488,10 @@ static void zapsubs(regmatch_t *p, size_t n) { - size_t i; + size_t i; - for (i = n-1; i > 0; i--) { + for (i = n - 1; i > 0; i--) + { p[i].rm_so = -1; p[i].rm_eo = -1; } @@ -469,15 +501,16 @@ zapsubs(regmatch_t *p, * zapmem - initialize the retry memory of a subtree to zeros */ static void -zapmem(struct vars *v, - struct subre *t) +zapmem(struct vars * v, + struct subre * t) { if (t == NULL) return; assert(v->mem != NULL); v->mem[t->retry] = 0; - if (t->op == '(') { + if (t->op == '(') + { assert(t->subno > 0); v->pmatch[t->subno].rm_so = -1; v->pmatch[t->subno].rm_eo = -1; @@ -493,15 +526,15 @@ zapmem(struct vars *v, * subset - set any subexpression relevant to a successful subre */ static void -subset(struct vars *v, - struct subre *sub, - chr *begin, - chr *end) +subset(struct vars * v, + struct subre * sub, + chr * begin, + chr * end) { - int n = sub->subno; + int n = sub->subno; assert(n > 0); - if ((size_t)n >= v->nmatch) + if ((size_t) n >= v->nmatch) return; MDEBUG(("setting %d\n", n)); @@ -512,58 +545,59 @@ subset(struct vars *v, /* * dissect - determine subexpression matches (uncomplicated case) */ -static int /* regexec return code */ -dissect(struct vars *v, - struct subre *t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ +static int /* regexec return code */ +dissect(struct vars * v, + struct subre * t, + chr * begin, /* beginning of relevant substring */ + chr * end) /* end of same */ { assert(t != NULL); MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end))); - switch (t->op) { - case '=': /* terminal node */ - assert(t->left == NULL && t->right == NULL); - return REG_OKAY; /* no action, parent did the work */ - break; - case '|': /* alternation */ - assert(t->left != NULL); - return altdissect(v, t, begin, end); - break; - case 'b': /* back ref -- shouldn't be calling us! */ - return REG_ASSERT; - break; - case '.': /* concatenation */ - assert(t->left != NULL && t->right != NULL); - return condissect(v, t, begin, end); - break; - case '(': /* capturing */ - assert(t->left != NULL && t->right == NULL); - assert(t->subno > 0); - subset(v, t, begin, end); - return dissect(v, t->left, begin, end); - break; - default: - return REG_ASSERT; - break; + switch (t->op) + { + case '=': /* terminal node */ + assert(t->left == NULL && t->right == NULL); + return REG_OKAY; /* no action, parent did the work */ + break; + case '|': /* alternation */ + assert(t->left != NULL); + return altdissect(v, t, begin, end); + break; + case 'b': /* back ref -- shouldn't be calling us! */ + return REG_ASSERT; + break; + case '.': /* concatenation */ + assert(t->left != NULL && t->right != NULL); + return condissect(v, t, begin, end); + break; + case '(': /* capturing */ + assert(t->left != NULL && t->right == NULL); + assert(t->subno > 0); + subset(v, t, begin, end); + return dissect(v, t->left, begin, end); + break; + default: + return REG_ASSERT; + break; } } /* * condissect - determine concatenation subexpression matches (uncomplicated) */ -static int /* regexec return code */ -condissect(struct vars *v, - struct subre *t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ +static int /* regexec return code */ +condissect(struct vars * v, + struct subre * t, + chr * begin, /* beginning of relevant substring */ + chr * end) /* end of same */ { struct dfa *d; struct dfa *d2; - chr *mid; - int i; - int shorter = (t->left->flags&SHORTER) ? 1 : 0; - chr *stop = (shorter) ? end : begin; + chr *mid; + int i; + int shorter = (t->left->flags & SHORTER) ? 1 : 0; + chr *stop = (shorter) ? end : begin; assert(t->op == '.'); assert(t->left != NULL && t->left->cnfa.nstates > 0); @@ -572,7 +606,8 @@ condissect(struct vars *v, d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); NOERR(); d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &v->dfa2); - if (ISERR()) { + if (ISERR()) + { assert(d2 == NULL); freedfa(d); return v->err; @@ -580,11 +615,12 @@ condissect(struct vars *v, /* pick a tentative midpoint */ if (shorter) - mid = shortest(v, d, begin, begin, end, (chr **)NULL, - (int *)NULL); + mid = shortest(v, d, begin, begin, end, (chr **) NULL, + (int *) NULL); else - mid = longest(v, d, begin, end, (int *)NULL); - if (mid == NULL) { + mid = longest(v, d, begin, end, (int *) NULL); + if (mid == NULL) + { freedfa(d); freedfa(d2); return REG_ASSERT; @@ -592,9 +628,11 @@ condissect(struct vars *v, MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); /* iterate until satisfaction or failure */ - while (longest(v, d2, mid, end, (int *)NULL) != end) { + while (longest(v, d2, mid, end, (int *) NULL) != end) + { /* that midpoint didn't work, find a new one */ - if (mid == stop) { + if (mid == stop) + { /* all possibilities exhausted! */ MDEBUG(("no midpoint!\n")); freedfa(d); @@ -602,11 +640,12 @@ condissect(struct vars *v, return REG_ASSERT; } if (shorter) - mid = shortest(v, d, begin, mid+1, end, (chr **)NULL, - (int *)NULL); + mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, + (int *) NULL); else - mid = longest(v, d, begin, mid-1, (int *)NULL); - if (mid == NULL) { + mid = longest(v, d, begin, mid - 1, (int *) NULL); + if (mid == NULL) + { /* failed to find a new one! */ MDEBUG(("failed midpoint!\n")); freedfa(d); @@ -629,154 +668,166 @@ condissect(struct vars *v, /* * altdissect - determine alternative subexpression matches (uncomplicated) */ -static int /* regexec return code */ -altdissect(struct vars *v, - struct subre *t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ +static int /* regexec return code */ +altdissect(struct vars * v, + struct subre * t, + chr * begin, /* beginning of relevant substring */ + chr * end) /* end of same */ { struct dfa *d; - int i; + int i; assert(t != NULL); assert(t->op == '|'); - for (i = 0; t != NULL; t = t->right, i++) { + for (i = 0; t != NULL; t = t->right, i++) + { MDEBUG(("trying %dth\n", i)); assert(t->left != NULL && t->left->cnfa.nstates > 0); d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); if (ISERR()) return v->err; - if (longest(v, d, begin, end, (int *)NULL) == end) { + if (longest(v, d, begin, end, (int *) NULL) == end) + { MDEBUG(("success\n")); freedfa(d); return dissect(v, t->left, begin, end); } freedfa(d); } - return REG_ASSERT; /* none of them matched?!? */ + return REG_ASSERT; /* none of them matched?!? */ } /* * cdissect - determine subexpression matches (with complications) - * The retry memory stores the offset of the trial midpoint from begin, + * The retry memory stores the offset of the trial midpoint from begin, * plus 1 so that 0 uniquely means "clean slate". */ -static int /* regexec return code */ -cdissect(struct vars *v, - struct subre *t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ +static int /* regexec return code */ +cdissect(struct vars * v, + struct subre * t, + chr * begin, /* beginning of relevant substring */ + chr * end) /* end of same */ { - int er; + int er; assert(t != NULL); MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op)); - switch (t->op) { - case '=': /* terminal node */ - assert(t->left == NULL && t->right == NULL); - return REG_OKAY; /* no action, parent did the work */ - break; - case '|': /* alternation */ - assert(t->left != NULL); - return caltdissect(v, t, begin, end); - break; - case 'b': /* back ref -- shouldn't be calling us! */ - assert(t->left == NULL && t->right == NULL); - return cbrdissect(v, t, begin, end); - break; - case '.': /* concatenation */ - assert(t->left != NULL && t->right != NULL); - return ccondissect(v, t, begin, end); - break; - case '(': /* capturing */ - assert(t->left != NULL && t->right == NULL); - assert(t->subno > 0); - er = cdissect(v, t->left, begin, end); - if (er == REG_OKAY) - subset(v, t, begin, end); - return er; - break; - default: - return REG_ASSERT; - break; + switch (t->op) + { + case '=': /* terminal node */ + assert(t->left == NULL && t->right == NULL); + return REG_OKAY; /* no action, parent did the work */ + break; + case '|': /* alternation */ + assert(t->left != NULL); + return caltdissect(v, t, begin, end); + break; + case 'b': /* back ref -- shouldn't be calling us! */ + assert(t->left == NULL && t->right == NULL); + return cbrdissect(v, t, begin, end); + break; + case '.': /* concatenation */ + assert(t->left != NULL && t->right != NULL); + return ccondissect(v, t, begin, end); + break; + case '(': /* capturing */ + assert(t->left != NULL && t->right == NULL); + assert(t->subno > 0); + er = cdissect(v, t->left, begin, end); + if (er == REG_OKAY) + subset(v, t, begin, end); + return er; + break; + default: + return REG_ASSERT; + break; } } /* * ccondissect - concatenation subexpression matches (with complications) - * The retry memory stores the offset of the trial midpoint from begin, + * The retry memory stores the offset of the trial midpoint from begin, * plus 1 so that 0 uniquely means "clean slate". */ -static int /* regexec return code */ -ccondissect(struct vars *v, - struct subre *t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ +static int /* regexec return code */ +ccondissect(struct vars * v, + struct subre * t, + chr * begin, /* beginning of relevant substring */ + chr * end) /* end of same */ { struct dfa *d; struct dfa *d2; - chr *mid; - int er; + chr *mid; + int er; assert(t->op == '.'); assert(t->left != NULL && t->left->cnfa.nstates > 0); assert(t->right != NULL && t->right->cnfa.nstates > 0); - if (t->left->flags&SHORTER) /* reverse scan */ + if (t->left->flags & SHORTER) /* reverse scan */ return crevdissect(v, t, begin, end); d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) return v->err; d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC); - if (ISERR()) { + if (ISERR()) + { freedfa(d); return v->err; } MDEBUG(("cconcat %d\n", t->retry)); /* pick a tentative midpoint */ - if (v->mem[t->retry] == 0) { - mid = longest(v, d, begin, end, (int *)NULL); - if (mid == NULL) { + if (v->mem[t->retry] == 0) + { + mid = longest(v, d, begin, end, (int *) NULL); + if (mid == NULL) + { freedfa(d); freedfa(d2); return REG_NOMATCH; } MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); v->mem[t->retry] = (mid - begin) + 1; - } else { + } + else + { mid = begin + (v->mem[t->retry] - 1); MDEBUG(("working midpoint %ld\n", LOFF(mid))); } /* iterate until satisfaction or failure */ - for (;;) { + for (;;) + { /* try this midpoint on for size */ er = cdissect(v, t->left, begin, mid); if (er == REG_OKAY && - longest(v, d2, mid, end, (int *)NULL) == end && - (er = cdissect(v, t->right, mid, end)) == - REG_OKAY) - break; /* NOTE BREAK OUT */ - if (er != REG_OKAY && er != REG_NOMATCH) { + longest(v, d2, mid, end, (int *) NULL) == end && + (er = cdissect(v, t->right, mid, end)) == + REG_OKAY) + break; /* NOTE BREAK OUT */ + if (er != REG_OKAY && er != REG_NOMATCH) + { freedfa(d); freedfa(d2); return er; } /* that midpoint didn't work, find a new one */ - if (mid == begin) { + if (mid == begin) + { /* all possibilities exhausted */ MDEBUG(("%d no midpoint\n", t->retry)); freedfa(d); freedfa(d2); return REG_NOMATCH; } - mid = longest(v, d, begin, mid-1, (int *)NULL); - if (mid == NULL) { + mid = longest(v, d, begin, mid - 1, (int *) NULL); + if (mid == NULL) + { /* failed to find a new one */ MDEBUG(("%d failed midpoint\n", t->retry)); freedfa(d); @@ -798,76 +849,85 @@ ccondissect(struct vars *v, /* * crevdissect - determine backref shortest-first subexpression matches - * The retry memory stores the offset of the trial midpoint from begin, + * The retry memory stores the offset of the trial midpoint from begin, * plus 1 so that 0 uniquely means "clean slate". */ -static int /* regexec return code */ -crevdissect(struct vars *v, - struct subre *t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ +static int /* regexec return code */ +crevdissect(struct vars * v, + struct subre * t, + chr * begin, /* beginning of relevant substring */ + chr * end) /* end of same */ { struct dfa *d; struct dfa *d2; - chr *mid; - int er; + chr *mid; + int er; assert(t->op == '.'); assert(t->left != NULL && t->left->cnfa.nstates > 0); assert(t->right != NULL && t->right->cnfa.nstates > 0); - assert(t->left->flags&SHORTER); + assert(t->left->flags & SHORTER); /* concatenation -- need to split the substring between parts */ d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) return v->err; d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC); - if (ISERR()) { + if (ISERR()) + { freedfa(d); return v->err; } MDEBUG(("crev %d\n", t->retry)); /* pick a tentative midpoint */ - if (v->mem[t->retry] == 0) { - mid = shortest(v, d, begin, begin, end, (chr **)NULL, (int *)NULL); - if (mid == NULL) { + if (v->mem[t->retry] == 0) + { + mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL); + if (mid == NULL) + { freedfa(d); freedfa(d2); return REG_NOMATCH; } MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); v->mem[t->retry] = (mid - begin) + 1; - } else { + } + else + { mid = begin + (v->mem[t->retry] - 1); MDEBUG(("working midpoint %ld\n", LOFF(mid))); } /* iterate until satisfaction or failure */ - for (;;) { + for (;;) + { /* try this midpoint on for size */ er = cdissect(v, t->left, begin, mid); if (er == REG_OKAY && - longest(v, d2, mid, end, (int *)NULL) == end && - (er = cdissect(v, t->right, mid, end)) == - REG_OKAY) - break; /* NOTE BREAK OUT */ - if (er != REG_OKAY && er != REG_NOMATCH) { + longest(v, d2, mid, end, (int *) NULL) == end && + (er = cdissect(v, t->right, mid, end)) == + REG_OKAY) + break; /* NOTE BREAK OUT */ + if (er != REG_OKAY && er != REG_NOMATCH) + { freedfa(d); freedfa(d2); return er; } /* that midpoint didn't work, find a new one */ - if (mid == end) { + if (mid == end) + { /* all possibilities exhausted */ MDEBUG(("%d no midpoint\n", t->retry)); freedfa(d); freedfa(d2); return REG_NOMATCH; } - mid = shortest(v, d, begin, mid+1, end, (chr **)NULL, (int *)NULL); - if (mid == NULL) { + mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL); + if (mid == NULL) + { /* failed to find a new one */ MDEBUG(("%d failed midpoint\n", t->retry)); freedfa(d); @@ -890,25 +950,25 @@ crevdissect(struct vars *v, /* * cbrdissect - determine backref subexpression matches */ -static int /* regexec return code */ -cbrdissect(struct vars *v, - struct subre *t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ +static int /* regexec return code */ +cbrdissect(struct vars * v, + struct subre * t, + chr * begin, /* beginning of relevant substring */ + chr * end) /* end of same */ { - int i; - int n = t->subno; - size_t len; - chr *paren; - chr *p; - chr *stop; - int min = t->min; - int max = t->max; + int i; + int n = t->subno; + size_t len; + chr *paren; + chr *p; + chr *stop; + int min = t->min; + int max = t->max; assert(t != NULL); assert(t->op == 'b'); assert(n >= 0); - assert((size_t)n < v->nmatch); + assert((size_t) n < v->nmatch); MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max)); @@ -923,7 +983,8 @@ cbrdissect(struct vars *v, v->mem[t->retry] = 1; /* special-case zero-length string */ - if (len == 0) { + if (len == 0) + { if (begin == end) return REG_OKAY; return REG_NOMATCH; @@ -931,41 +992,44 @@ cbrdissect(struct vars *v, /* and too-short string */ assert(end >= begin); - if ((size_t)(end - begin) < len) + if ((size_t) (end - begin) < len) return REG_NOMATCH; stop = end - len; /* count occurrences */ i = 0; - for (p = begin; p <= stop && (i < max || max == INFINITY); p += len) { - if ((*v->g->compare)(paren, p, len) != 0) - break; + for (p = begin; p <= stop && (i < max || max == INFINITY); p += len) + { + if ((*v->g->compare) (paren, p, len) != 0) + break; i++; } MDEBUG(("cbackref found %d\n", i)); /* and sort it out */ - if (p != end) /* didn't consume all of it */ + if (p != end) /* didn't consume all of it */ return REG_NOMATCH; if (min <= i && (i <= max || max == INFINITY)) return REG_OKAY; - return REG_NOMATCH; /* out of range */ + return REG_NOMATCH; /* out of range */ } /* * caltdissect - determine alternative subexpression matches (w. complications) */ -static int /* regexec return code */ -caltdissect(struct vars *v, - struct subre *t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ +static int /* regexec return code */ +caltdissect(struct vars * v, + struct subre * t, + chr * begin, /* beginning of relevant substring */ + chr * end) /* end of same */ { struct dfa *d; - int er; -# define UNTRIED 0 /* not yet tried at all */ -# define TRYING 1 /* top matched, trying submatches */ -# define TRIED 2 /* top didn't match or submatches exhausted */ + int er; + +#define UNTRIED 0 /* not yet tried at all */ +#define TRYING 1 /* top matched, trying submatches */ +#define TRIED 2 /* top didn't match or submatches + * exhausted */ if (t == NULL) return REG_NOMATCH; @@ -976,11 +1040,13 @@ caltdissect(struct vars *v, MDEBUG(("calt n%d\n", t->retry)); assert(t->left != NULL); - if (v->mem[t->retry] == UNTRIED) { + if (v->mem[t->retry] == UNTRIED) + { d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) return v->err; - if (longest(v, d, begin, end, (int *)NULL) != end) { + if (longest(v, d, begin, end, (int *) NULL) != end) + { freedfa(d); v->mem[t->retry] = TRIED; return caltdissect(v, t->right, begin, end); diff --git a/src/backend/regex/regfree.c b/src/backend/regex/regfree.c index 88f3da32287..1bb9057a916 100644 --- a/src/backend/regex/regfree.c +++ b/src/backend/regex/regfree.c @@ -1,21 +1,21 @@ /* * regfree - free an RE * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * + * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. - * + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -27,7 +27,7 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Header: /cvsroot/pgsql/src/backend/regex/regfree.c,v 1.16 2003/02/05 17:41:33 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/regex/regfree.c,v 1.17 2003/08/04 00:43:21 momjian Exp $ * * * You might think that this could be incorporated into regcomp.c, and @@ -50,5 +50,5 @@ pg_regfree(regex_t *re) { if (re == NULL) return; - (*((struct fns *)re->re_fns)->free)(re); + (*((struct fns *) re->re_fns)->free) (re); } |