diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2016-09-05 17:06:29 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2016-09-05 17:06:29 -0400 |
commit | c54159d44ceaba26ceda9fea1804f0de122a8f30 (patch) | |
tree | eaab96027f054cf5ff864d0745e446e8b8e13544 /src/backend/regex/regc_color.c | |
parent | f80049f76a32858601510eaaef19ab8160e4c9b3 (diff) | |
download | postgresql-c54159d44ceaba26ceda9fea1804f0de122a8f30.tar.gz postgresql-c54159d44ceaba26ceda9fea1804f0de122a8f30.zip |
Make locale-dependent regex character classes work for large char codes.
Previously, we failed to recognize Unicode characters above U+7FF as
being members of locale-dependent character classes such as [[:alpha:]].
(Actually, the same problem occurs for large pg_wchar values in any
multibyte encoding, but UTF8 is the only case people have actually
complained about.) It's impractical to get Spencer's original code to
handle character classes or ranges containing many thousands of characters,
because it insists on considering each member character individually at
regex compile time, whether or not the character will ever be of interest
at run time. To fix, choose a cutoff point MAX_SIMPLE_CHR below which
we process characters individually as before, and deal with entire ranges
or classes as single entities above that. We can actually make things
cheaper than before for chars below the cutoff, because the color map can
now be a simple linear array for those chars, rather than the multilevel
tree structure Spencer designed. It's more expensive than before for
chars above the cutoff, because we must do a binary search in a list of
high chars and char ranges used in the regex pattern, plus call iswalpha()
and friends for each locale-dependent character class used in the pattern.
However, multibyte encodings are normally designed to give smaller codes
to popular characters, so that we can expect that the slow path will be
taken relatively infrequently. In any case, the speed penalty appears
minor except when we have to apply iswalpha() etc. to high character codes
at runtime --- and the previous coding gave wrong answers for those cases,
so whether it was faster is moot.
Tom Lane, reviewed by Heikki Linnakangas
Discussion: <15563.1471913698@sss.pgh.pa.us>
Diffstat (limited to 'src/backend/regex/regc_color.c')
-rw-r--r-- | src/backend/regex/regc_color.c | 885 |
1 files changed, 618 insertions, 267 deletions
diff --git a/src/backend/regex/regc_color.c b/src/backend/regex/regc_color.c index 8ffc8fb797d..4b72894ad3a 100644 --- a/src/backend/regex/regc_color.c +++ b/src/backend/regex/regc_color.c @@ -49,10 +49,6 @@ static void initcm(struct vars * v, struct colormap * cm) { - int i; - int j; - union tree *t; - union tree *nextt; struct colordesc *cd; cm->magic = CMMAGIC; @@ -64,24 +60,40 @@ initcm(struct vars * v, cm->free = 0; cd = cm->cd; /* cm->cd[WHITE] */ + cd->nschrs = MAX_SIMPLE_CHR - CHR_MIN + 1; + cd->nuchrs = 1; cd->sub = NOSUB; cd->arcs = NULL; cd->firstchr = CHR_MIN; - cd->nchrs = CHR_MAX - CHR_MIN + 1; cd->flags = 0; - /* upper levels of tree */ - for (t = &cm->tree[0], j = NBYTS - 1; j > 0; t = nextt, j--) + cm->locolormap = (color *) + MALLOC((MAX_SIMPLE_CHR - CHR_MIN + 1) * sizeof(color)); + if (cm->locolormap == NULL) { - nextt = t + 1; - for (i = BYTTAB - 1; i >= 0; i--) - t->tptr[i] = nextt; + CERR(REG_ESPACE); + cm->cmranges = NULL; /* prevent failure during freecm */ + cm->hicolormap = NULL; + return; } - /* bottom level is solid white */ - t = &cm->tree[NBYTS - 1]; - for (i = BYTTAB - 1; i >= 0; i--) - t->tcolor[i] = WHITE; - cd->block = t; + /* this memset relies on WHITE being zero: */ + memset(cm->locolormap, WHITE, + (MAX_SIMPLE_CHR - CHR_MIN + 1) * sizeof(color)); + + memset(cm->classbits, 0, sizeof(cm->classbits)); + cm->numcmranges = 0; + cm->cmranges = NULL; + cm->maxarrayrows = 4; /* arbitrary initial allocation */ + cm->hiarrayrows = 1; /* but we have only one row/col initially */ + cm->hiarraycols = 1; + cm->hicolormap = (color *) MALLOC(cm->maxarrayrows * sizeof(color)); + if (cm->hicolormap == NULL) + { + CERR(REG_ESPACE); + return; + } + /* initialize the "all other characters" row to WHITE */ + cm->hicolormap[0] = WHITE; } /* @@ -90,117 +102,67 @@ initcm(struct vars * v, static void freecm(struct colormap * cm) { - size_t i; - union tree *cb; - cm->magic = 0; - if (NBYTS > 1) - cmtreefree(cm, cm->tree, 0); - for (i = 1; i <= cm->max; i++) /* skip WHITE */ - if (!UNUSEDCOLOR(&cm->cd[i])) - { - cb = cm->cd[i].block; - if (cb != NULL) - FREE(cb); - } if (cm->cd != cm->cdspace) FREE(cm->cd); + if (cm->locolormap != NULL) + FREE(cm->locolormap); + if (cm->cmranges != NULL) + FREE(cm->cmranges); + if (cm->hicolormap != NULL) + FREE(cm->hicolormap); } /* - * cmtreefree - free a non-terminal part of a colormap tree + * pg_reg_getcolor - slow case of GETCOLOR() */ -static void -cmtreefree(struct colormap * cm, - union tree * tree, - int level) /* level number (top == 0) of this block */ +color +pg_reg_getcolor(struct colormap * cm, chr c) { - int i; - union tree *t; - union tree *fillt = &cm->tree[level + 1]; - union tree *cb; - - assert(level < NBYTS - 1); /* this level has pointers */ - for (i = BYTTAB - 1; i >= 0; i--) + int rownum, + colnum, + low, + high; + + /* Should not be used for chrs in the locolormap */ + assert(c > MAX_SIMPLE_CHR); + + /* + * Find which row it's in. The colormapranges are in order, so we can use + * binary search. + */ + rownum = 0; /* if no match, use array row zero */ + low = 0; + high = cm->numcmranges; + while (low < high) { - t = tree->tptr[i]; - assert(t != NULL); - if (t != fillt) + int middle = low + (high - low) / 2; + const colormaprange *cmr = &cm->cmranges[middle]; + + if (c < cmr->cmin) + high = middle; + else if (c > cmr->cmax) + low = middle + 1; + else { - if (level < NBYTS - 2) - { /* more pointer blocks below */ - cmtreefree(cm, t, level + 1); - FREE(t); - } - else - { /* color block below */ - cb = cm->cd[t->tcolor[0]].block; - if (t != cb) /* not a solid block */ - FREE(t); - } + rownum = cmr->rownum; /* found a match */ + break; } } -} -/* - * setcolor - set the color of a character in a colormap - */ -static color /* previous color */ -setcolor(struct colormap * cm, - chr c, - color co) -{ - uchr uc = c; - int shift; - int level; - int b; - int bottom; - union tree *t; - union tree *newt; - union tree *fillt; - union tree *lastt; - union tree *cb; - color prev; - - assert(cm->magic == CMMAGIC); - if (CISERR() || co == COLORLESS) - return COLORLESS; - - t = cm->tree; - for (level = 0, shift = BYTBITS * (NBYTS - 1); shift > 0; - level++, shift -= BYTBITS) - { - b = (uc >> shift) & BYTMASK; - lastt = t; - t = lastt->tptr[b]; - assert(t != NULL); - fillt = &cm->tree[level + 1]; - bottom = (shift <= BYTBITS) ? 1 : 0; - cb = (bottom) ? cm->cd[t->tcolor[0]].block : fillt; - if (t == fillt || t == cb) - { /* must allocate a new block */ - newt = (union tree *) MALLOC((bottom) ? - sizeof(struct colors) : sizeof(struct ptrs)); - if (newt == NULL) - { - CERR(REG_ESPACE); - return COLORLESS; - } - if (bottom) - memcpy(VS(newt->tcolor), VS(t->tcolor), - BYTTAB * sizeof(color)); - else - memcpy(VS(newt->tptr), VS(t->tptr), - BYTTAB * sizeof(union tree *)); - t = newt; - lastt->tptr[b] = t; - } + /* + * Find which column it's in --- this is all locale-dependent. + */ + if (cm->hiarraycols > 1) + { + colnum = cclass_column_index(cm, c); + return cm->hicolormap[rownum * cm->hiarraycols + colnum]; + } + else + { + /* fast path if no relevant cclasses */ + return cm->hicolormap[rownum]; } - - b = uc & BYTMASK; - prev = t->tcolor[b]; - t->tcolor[b] = co; - return prev; } /* @@ -216,7 +178,7 @@ maxcolor(struct colormap * cm) } /* - * newcolor - find a new color (must be subject of setcolor at once) + * newcolor - find a new color (must be assigned at once) * Beware: may relocate the colordescs. */ static color /* COLORLESS for error */ @@ -278,12 +240,12 @@ newcolor(struct colormap * cm) cd = &cm->cd[cm->max]; } - cd->nchrs = 0; + cd->nschrs = 0; + cd->nuchrs = 0; cd->sub = NOSUB; cd->arcs = NULL; cd->firstchr = CHR_MIN; /* in case never set otherwise */ cd->flags = 0; - cd->block = NULL; return (color) (cd - cm->cd); } @@ -305,13 +267,9 @@ freecolor(struct colormap * cm, assert(cd->arcs == NULL); assert(cd->sub == NOSUB); - assert(cd->nchrs == 0); + assert(cd->nschrs == 0); + assert(cd->nuchrs == 0); cd->flags = FREECOL; - if (cd->block != NULL) - { - FREE(cd->block); - cd->block = NULL; /* just paranoia */ - } if ((size_t) co == cm->max) { @@ -354,17 +312,25 @@ static color pseudocolor(struct colormap * cm) { color co; + struct colordesc *cd; co = newcolor(cm); if (CISERR()) return COLORLESS; - cm->cd[co].nchrs = 1; - cm->cd[co].flags = PSEUDO; + cd = &cm->cd[co]; + cd->nschrs = 0; + cd->nuchrs = 1; /* pretend it is in the upper map */ + cd->sub = NOSUB; + cd->arcs = NULL; + cd->firstchr = CHR_MIN; + cd->flags = PSEUDO; return co; } /* * subcolor - allocate a new subcolor (if necessary) to this chr + * + * This works only for chrs that map into the low color map. */ static color subcolor(struct colormap * cm, chr c) @@ -372,7 +338,9 @@ subcolor(struct colormap * cm, chr c) color co; /* current color of c */ color sco; /* new subcolor */ - co = GETCOLOR(cm, c); + assert(c <= MAX_SIMPLE_CHR); + + co = cm->locolormap[c - CHR_MIN]; sco = newsub(cm, co); if (CISERR()) return COLORLESS; @@ -380,11 +348,37 @@ subcolor(struct colormap * cm, chr c) if (co == sco) /* already in an open subcolor */ return co; /* rest is redundant */ - cm->cd[co].nchrs--; - if (cm->cd[sco].nchrs == 0) + cm->cd[co].nschrs--; + if (cm->cd[sco].nschrs == 0) cm->cd[sco].firstchr = c; - cm->cd[sco].nchrs++; - setcolor(cm, c, sco); + cm->cd[sco].nschrs++; + cm->locolormap[c - CHR_MIN] = sco; + return sco; +} + +/* + * subcolorhi - allocate a new subcolor (if necessary) to this colormap entry + * + * This is the same processing as subcolor(), but for entries in the high + * colormap, which do not necessarily correspond to exactly one chr code. + */ +static color +subcolorhi(struct colormap * cm, color *pco) +{ + color co; /* current color of entry */ + color sco; /* new subcolor */ + + co = *pco; + sco = newsub(cm, co); + if (CISERR()) + return COLORLESS; + assert(sco != COLORLESS); + + if (co == sco) /* already in an open subcolor */ + return co; /* rest is redundant */ + cm->cd[co].nuchrs--; + cm->cd[sco].nuchrs++; + *pco = sco; return sco; } @@ -400,7 +394,8 @@ newsub(struct colormap * cm, sco = cm->cd[co].sub; if (sco == NOSUB) { /* color has no open subcolor */ - if (cm->cd[co].nchrs == 1) /* optimization */ + /* optimization: singly-referenced color need not be subcolored */ + if ((cm->cd[co].nschrs + cm->cd[co].nuchrs) == 1) return co; sco = newcolor(cm); /* must create subcolor */ if (sco == COLORLESS) @@ -417,136 +412,500 @@ newsub(struct colormap * cm, } /* - * subrange - allocate new subcolors to this range of chrs, fill in arcs + * newhicolorrow - get a new row in the hicolormap, cloning it from oldrow + * + * Returns array index of new row. Note the array might move. */ -static void -subrange(struct vars * v, - chr from, - chr to, - struct state * lp, - struct state * rp) +static int +newhicolorrow(struct colormap * cm, + int oldrow) { - uchr uf; + int newrow = cm->hiarrayrows; + color *newrowptr; int i; - assert(from <= to); + /* Assign a fresh array row index, enlarging storage if needed */ + if (newrow >= cm->maxarrayrows) + { + color *newarray; + + if (cm->maxarrayrows >= INT_MAX / (cm->hiarraycols * 2)) + { + CERR(REG_ESPACE); + return 0; + } + newarray = (color *) REALLOC(cm->hicolormap, + cm->maxarrayrows * 2 * + cm->hiarraycols * sizeof(color)); + if (newarray == NULL) + { + CERR(REG_ESPACE); + return 0; + } + cm->hicolormap = newarray; + cm->maxarrayrows *= 2; + } + cm->hiarrayrows++; + + /* Copy old row data */ + newrowptr = &cm->hicolormap[newrow * cm->hiarraycols]; + memcpy(newrowptr, + &cm->hicolormap[oldrow * cm->hiarraycols], + cm->hiarraycols * sizeof(color)); + + /* Increase color reference counts to reflect new colormap entries */ + for (i = 0; i < cm->hiarraycols; i++) + cm->cd[newrowptr[i]].nuchrs++; - /* first, align "from" on a tree-block boundary */ - uf = (uchr) from; - i = (int) (((uf + BYTTAB - 1) & (uchr) ~BYTMASK) - uf); - for (; from <= to && i > 0; i--, from++) - newarc(v->nfa, PLAIN, subcolor(v->cm, from), lp, rp); - if (from > to) /* didn't reach a boundary */ + return newrow; +} + +/* + * newhicolorcols - create a new set of columns in the high colormap + * + * Essentially, extends the 2-D array to the right with a copy of itself. + */ +static void +newhicolorcols(struct colormap * cm) +{ + color *newarray; + int r, + c; + + if (cm->hiarraycols >= INT_MAX / (cm->maxarrayrows * 2)) + { + CERR(REG_ESPACE); return; + } + newarray = (color *) REALLOC(cm->hicolormap, + cm->maxarrayrows * + cm->hiarraycols * 2 * sizeof(color)); + if (newarray == NULL) + { + CERR(REG_ESPACE); + return; + } + cm->hicolormap = newarray; - /* deal with whole blocks */ - for (; to - from >= BYTTAB; from += BYTTAB) - subblock(v, from, lp, rp); + /* Duplicate existing columns to the right, and increase ref counts */ + /* Must work backwards in the array because we realloc'd in place */ + for (r = cm->hiarrayrows - 1; r >= 0; r--) + { + color *oldrowptr = &newarray[r * cm->hiarraycols]; + color *newrowptr = &newarray[r * cm->hiarraycols * 2]; + color *newrowptr2 = newrowptr + cm->hiarraycols; + + for (c = 0; c < cm->hiarraycols; c++) + { + color co = oldrowptr[c]; - /* clean up any remaining partial table */ - for (; from <= to; from++) - newarc(v->nfa, PLAIN, subcolor(v->cm, from), lp, rp); + newrowptr[c] = newrowptr2[c] = co; + cm->cd[co].nuchrs++; + } + } + + cm->hiarraycols *= 2; } /* - * subblock - allocate new subcolors for one tree block of chrs, fill in arcs + * subcolorcvec - allocate new subcolors to cvec members, fill in arcs * - * Note: subcolors that are created during execution of this function - * will not be given a useful value of firstchr; it'll be left as CHR_MIN. - * For the current usage of firstchr in pg_regprefix, this does not matter - * because such subcolors won't occur in the common prefix of a regex. + * For each chr "c" represented by the cvec, do the equivalent of + * newarc(v->nfa, PLAIN, subcolor(v->cm, c), lp, rp); + * + * Note that in typical cases, many of the subcolors are the same. + * While newarc() would discard duplicate arc requests, we can save + * some cycles by not calling it repetitively to begin with. This is + * mechanized with the "lastsubcolor" state variable. */ static void -subblock(struct vars * v, - chr start, /* first of BYTTAB chrs */ - struct state * lp, - struct state * rp) +subcolorcvec(struct vars * v, + struct cvec * cv, + struct state * lp, + struct state * rp) { - uchr uc = start; struct colormap *cm = v->cm; - int shift; - int level; + color lastsubcolor = COLORLESS; + chr ch, + from, + to; + const chr *p; int i; - int b; - union tree *t; - union tree *cb; - union tree *fillt; - union tree *lastt; - int previ; - int ndone; - color co; - color sco; - assert((uc % BYTTAB) == 0); - - /* find its color block, making new pointer blocks as needed */ - t = cm->tree; - fillt = NULL; - for (level = 0, shift = BYTBITS * (NBYTS - 1); shift > 0; - level++, shift -= BYTBITS) - { - b = (uc >> shift) & BYTMASK; - lastt = t; - t = lastt->tptr[b]; - assert(t != NULL); - fillt = &cm->tree[level + 1]; - if (t == fillt && shift > BYTBITS) - { /* need new ptr block */ - t = (union tree *) MALLOC(sizeof(struct ptrs)); - if (t == NULL) + /* ordinary characters */ + for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) + { + ch = *p; + subcoloronechr(v, ch, lp, rp, &lastsubcolor); + NOERR(); + } + + /* and the ranges */ + for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) + { + from = *p; + to = *(p + 1); + if (from <= MAX_SIMPLE_CHR) + { + /* deal with simple chars one at a time */ + chr lim = (to <= MAX_SIMPLE_CHR) ? to : MAX_SIMPLE_CHR; + + while (from <= lim) { - CERR(REG_ESPACE); - return; + color sco = subcolor(cm, from); + + NOERR(); + if (sco != lastsubcolor) + { + newarc(v->nfa, PLAIN, sco, lp, rp); + NOERR(); + lastsubcolor = sco; + } + from++; } - memcpy(VS(t->tptr), VS(fillt->tptr), - BYTTAB * sizeof(union tree *)); - lastt->tptr[b] = t; } + /* deal with any part of the range that's above MAX_SIMPLE_CHR */ + if (from < to) + subcoloronerange(v, from, to, lp, rp, &lastsubcolor); + else if (from == to) + subcoloronechr(v, from, lp, rp, &lastsubcolor); + NOERR(); } - /* special cases: fill block or solid block */ - co = t->tcolor[0]; - cb = cm->cd[co].block; - if (t == fillt || t == cb) + /* and deal with cclass if any */ + if (cv->cclasscode >= 0) { - /* either way, we want a subcolor solid block */ - sco = newsub(cm, co); - t = cm->cd[sco].block; - if (t == NULL) - { /* must set it up */ - t = (union tree *) MALLOC(sizeof(struct colors)); - if (t == NULL) + int classbit; + color *pco; + int r, + c; + + /* Enlarge array if we don't have a column bit assignment for cclass */ + if (cm->classbits[cv->cclasscode] == 0) + { + cm->classbits[cv->cclasscode] = cm->hiarraycols; + newhicolorcols(cm); + NOERR(); + } + /* Apply subcolorhi() and make arc for each entry in relevant cols */ + classbit = cm->classbits[cv->cclasscode]; + pco = cm->hicolormap; + for (r = 0; r < cm->hiarrayrows; r++) + { + for (c = 0; c < cm->hiarraycols; c++) { - CERR(REG_ESPACE); - return; + if (c & classbit) + { + color sco = subcolorhi(cm, pco); + + NOERR(); + /* add the arc if needed */ + if (sco != lastsubcolor) + { + newarc(v->nfa, PLAIN, sco, lp, rp); + NOERR(); + lastsubcolor = sco; + } + } + pco++; } - for (i = 0; i < BYTTAB; i++) - t->tcolor[i] = sco; - cm->cd[sco].block = t; } - /* find loop must have run at least once */ - lastt->tptr[b] = t; - newarc(v->nfa, PLAIN, sco, lp, rp); - cm->cd[co].nchrs -= BYTTAB; - cm->cd[sco].nchrs += BYTTAB; + } +} + +/* + * subcoloronechr - do subcolorcvec's work for a singleton chr + * + * We could just let subcoloronerange do this, but it's a bit more efficient + * if we exploit the single-chr case. Also, callers find it useful for this + * to be able to handle both low and high chr codes. + */ +static void +subcoloronechr(struct vars * v, + chr ch, + struct state * lp, + struct state * rp, + color *lastsubcolor) +{ + struct colormap *cm = v->cm; + colormaprange *newranges; + int numnewranges; + colormaprange *oldrange; + int oldrangen; + int newrow; + + /* Easy case for low chr codes */ + if (ch <= MAX_SIMPLE_CHR) + { + color sco = subcolor(cm, ch); + + NOERR(); + if (sco != *lastsubcolor) + { + newarc(v->nfa, PLAIN, sco, lp, rp); + *lastsubcolor = sco; + } + return; + } + + /* + * Potentially, we could need two more colormapranges than we have now, if + * the given chr is in the middle of some existing range. + */ + newranges = (colormaprange *) + MALLOC((cm->numcmranges + 2) * sizeof(colormaprange)); + if (newranges == NULL) + { + CERR(REG_ESPACE); return; } + numnewranges = 0; - /* general case, a mixed block to be altered */ - i = 0; - while (i < BYTTAB) + /* Ranges before target are unchanged */ + for (oldrange = cm->cmranges, oldrangen = 0; + oldrangen < cm->numcmranges; + oldrange++, oldrangen++) + { + if (oldrange->cmax >= ch) + break; + newranges[numnewranges++] = *oldrange; + } + + /* Match target chr against current range */ + if (oldrangen >= cm->numcmranges || oldrange->cmin > ch) + { + /* chr does not belong to any existing range, make a new one */ + newranges[numnewranges].cmin = ch; + newranges[numnewranges].cmax = ch; + /* row state should be cloned from the "all others" row */ + newranges[numnewranges].rownum = newrow = newhicolorrow(cm, 0); + numnewranges++; + } + else if (oldrange->cmin == oldrange->cmax) + { + /* we have an existing singleton range matching the chr */ + newranges[numnewranges++] = *oldrange; + newrow = oldrange->rownum; + /* we've now fully processed this old range */ + oldrange++, oldrangen++; + } + else { - co = t->tcolor[i]; - sco = newsub(cm, co); - newarc(v->nfa, PLAIN, sco, lp, rp); - previ = i; - do + /* chr is a subset of this existing range, must split it */ + if (ch > oldrange->cmin) + { + /* emit portion of old range before chr */ + newranges[numnewranges].cmin = oldrange->cmin; + newranges[numnewranges].cmax = ch - 1; + newranges[numnewranges].rownum = oldrange->rownum; + numnewranges++; + } + /* emit chr as singleton range, initially cloning from range */ + newranges[numnewranges].cmin = ch; + newranges[numnewranges].cmax = ch; + newranges[numnewranges].rownum = newrow = + newhicolorrow(cm, oldrange->rownum); + numnewranges++; + if (ch < oldrange->cmax) { - t->tcolor[i++] = sco; - } while (i < BYTTAB && t->tcolor[i] == co); - ndone = i - previ; - cm->cd[co].nchrs -= ndone; - cm->cd[sco].nchrs += ndone; + /* emit portion of old range after chr */ + newranges[numnewranges].cmin = ch + 1; + newranges[numnewranges].cmax = oldrange->cmax; + /* must clone the row if we are making two new ranges from old */ + newranges[numnewranges].rownum = + (ch > oldrange->cmin) ? newhicolorrow(cm, oldrange->rownum) : + oldrange->rownum; + numnewranges++; + } + /* we've now fully processed this old range */ + oldrange++, oldrangen++; + } + + /* Update colors in newrow and create arcs as needed */ + subcoloronerow(v, newrow, lp, rp, lastsubcolor); + + /* Ranges after target are unchanged */ + for (; oldrangen < cm->numcmranges; oldrange++, oldrangen++) + { + newranges[numnewranges++] = *oldrange; + } + + /* Assert our original space estimate was adequate */ + assert(numnewranges <= (cm->numcmranges + 2)); + + /* And finally, store back the updated list of ranges */ + if (cm->cmranges != NULL) + FREE(cm->cmranges); + cm->cmranges = newranges; + cm->numcmranges = numnewranges; +} + +/* + * subcoloronerange - do subcolorcvec's work for a high range + */ +static void +subcoloronerange(struct vars * v, + chr from, + chr to, + struct state * lp, + struct state * rp, + color *lastsubcolor) +{ + struct colormap *cm = v->cm; + colormaprange *newranges; + int numnewranges; + colormaprange *oldrange; + int oldrangen; + int newrow; + + /* Caller should take care of non-high-range cases */ + assert(from > MAX_SIMPLE_CHR); + assert(from < to); + + /* + * Potentially, if we have N non-adjacent ranges, we could need as many as + * 2N+1 result ranges (consider case where new range spans 'em all). + */ + newranges = (colormaprange *) + MALLOC((cm->numcmranges * 2 + 1) * sizeof(colormaprange)); + if (newranges == NULL) + { + CERR(REG_ESPACE); + return; + } + numnewranges = 0; + + /* Ranges before target are unchanged */ + for (oldrange = cm->cmranges, oldrangen = 0; + oldrangen < cm->numcmranges; + oldrange++, oldrangen++) + { + if (oldrange->cmax >= from) + break; + newranges[numnewranges++] = *oldrange; + } + + /* + * Deal with ranges that (partially) overlap the target. As we process + * each such range, increase "from" to remove the dealt-with characters + * from the target range. + */ + while (oldrangen < cm->numcmranges && oldrange->cmin <= to) + { + if (from < oldrange->cmin) + { + /* Handle portion of new range that corresponds to no old range */ + newranges[numnewranges].cmin = from; + newranges[numnewranges].cmax = oldrange->cmin - 1; + /* row state should be cloned from the "all others" row */ + newranges[numnewranges].rownum = newrow = newhicolorrow(cm, 0); + numnewranges++; + /* Update colors in newrow and create arcs as needed */ + subcoloronerow(v, newrow, lp, rp, lastsubcolor); + /* We've now fully processed the part of new range before old */ + from = oldrange->cmin; + } + + if (from <= oldrange->cmin && to >= oldrange->cmax) + { + /* old range is fully contained in new, process it in-place */ + newranges[numnewranges++] = *oldrange; + newrow = oldrange->rownum; + from = oldrange->cmax + 1; + } + else + { + /* some part of old range does not overlap new range */ + if (from > oldrange->cmin) + { + /* emit portion of old range before new range */ + newranges[numnewranges].cmin = oldrange->cmin; + newranges[numnewranges].cmax = from - 1; + newranges[numnewranges].rownum = oldrange->rownum; + numnewranges++; + } + /* emit common subrange, initially cloning from old range */ + newranges[numnewranges].cmin = from; + newranges[numnewranges].cmax = + (to < oldrange->cmax) ? to : oldrange->cmax; + newranges[numnewranges].rownum = newrow = + newhicolorrow(cm, oldrange->rownum); + numnewranges++; + if (to < oldrange->cmax) + { + /* emit portion of old range after new range */ + newranges[numnewranges].cmin = to + 1; + newranges[numnewranges].cmax = oldrange->cmax; + /* must clone the row if we are making two new ranges from old */ + newranges[numnewranges].rownum = + (from > oldrange->cmin) ? newhicolorrow(cm, oldrange->rownum) : + oldrange->rownum; + numnewranges++; + } + from = oldrange->cmax + 1; + } + /* Update colors in newrow and create arcs as needed */ + subcoloronerow(v, newrow, lp, rp, lastsubcolor); + /* we've now fully processed this old range */ + oldrange++, oldrangen++; + } + + if (from <= to) + { + /* Handle portion of new range that corresponds to no old range */ + newranges[numnewranges].cmin = from; + newranges[numnewranges].cmax = to; + /* row state should be cloned from the "all others" row */ + newranges[numnewranges].rownum = newrow = newhicolorrow(cm, 0); + numnewranges++; + /* Update colors in newrow and create arcs as needed */ + subcoloronerow(v, newrow, lp, rp, lastsubcolor); + } + + /* Ranges after target are unchanged */ + for (; oldrangen < cm->numcmranges; oldrange++, oldrangen++) + { + newranges[numnewranges++] = *oldrange; + } + + /* Assert our original space estimate was adequate */ + assert(numnewranges <= (cm->numcmranges * 2 + 1)); + + /* And finally, store back the updated list of ranges */ + if (cm->cmranges != NULL) + FREE(cm->cmranges); + cm->cmranges = newranges; + cm->numcmranges = numnewranges; +} + +/* + * subcoloronerow - do subcolorcvec's work for one new row in the high colormap + */ +static void +subcoloronerow(struct vars * v, + int rownum, + struct state * lp, + struct state * rp, + color *lastsubcolor) +{ + struct colormap *cm = v->cm; + color *pco; + int i; + + /* Apply subcolorhi() and make arc for each entry in row */ + pco = &cm->hicolormap[rownum * cm->hiarraycols]; + for (i = 0; i < cm->hiarraycols; pco++, i++) + { + color sco = subcolorhi(cm, pco); + + NOERR(); + /* make the arc if needed */ + if (sco != *lastsubcolor) + { + newarc(v->nfa, PLAIN, sco, lp, rp); + NOERR(); + *lastsubcolor = sco; + } } } @@ -575,12 +934,12 @@ okcolors(struct nfa * nfa, { /* is subcolor, let parent deal with it */ } - else if (cd->nchrs == 0) + else if (cd->nschrs == 0 && cd->nuchrs == 0) { /* parent empty, its arcs change color to subcolor */ cd->sub = NOSUB; scd = &cm->cd[sco]; - assert(scd->nchrs > 0); + assert(scd->nschrs > 0 || scd->nuchrs > 0); assert(scd->sub == sco); scd->sub = NOSUB; while ((a = cd->arcs) != NULL) @@ -597,7 +956,7 @@ okcolors(struct nfa * nfa, /* parent's arcs must gain parallel subcolor arcs */ cd->sub = NOSUB; scd = &cm->cd[sco]; - assert(scd->nchrs > 0); + assert(scd->nschrs > 0 || scd->nuchrs > 0); assert(scd->sub == sco); scd->sub = NOSUB; for (a = cd->arcs; a != NULL; a = a->colorchain) @@ -711,62 +1070,54 @@ dumpcolors(struct colormap * cm, struct colordesc *end; color co; chr c; - char *has; fprintf(f, "max %ld\n", (long) cm->max); - if (NBYTS > 1) - fillcheck(cm, cm->tree, 0, f); end = CDEND(cm); for (cd = cm->cd + 1, co = 1; cd < end; cd++, co++) /* skip 0 */ + { if (!UNUSEDCOLOR(cd)) { - assert(cd->nchrs > 0); - has = (cd->block != NULL) ? "#" : ""; + assert(cd->nschrs > 0 || cd->nuchrs > 0); if (cd->flags & PSEUDO) - fprintf(f, "#%2ld%s(ps): ", (long) co, has); + fprintf(f, "#%2ld(ps): ", (long) co); else - fprintf(f, "#%2ld%s(%2d): ", (long) co, - has, cd->nchrs); + fprintf(f, "#%2ld(%2d): ", (long) co, cd->nschrs + cd->nuchrs); /* * Unfortunately, it's hard to do this next bit more efficiently. - * - * Spencer's original coding has the loop iterating from CHR_MIN - * to CHR_MAX, but that's utterly unusable for 32-bit chr. For - * debugging purposes it seems fine to print only chr codes up to - * 1000 or so. */ - for (c = CHR_MIN; c < 1000; c++) + for (c = CHR_MIN; c <= MAX_SIMPLE_CHR; c++) if (GETCOLOR(cm, c) == co) dumpchr(c, f); fprintf(f, "\n"); } -} - -/* - * fillcheck - check proper filling of a tree - */ -static void -fillcheck(struct colormap * cm, - union tree * tree, - int level, /* level number (top == 0) of this block */ - FILE *f) -{ - int i; - union tree *t; - union tree *fillt = &cm->tree[level + 1]; - - assert(level < NBYTS - 1); /* this level has pointers */ - for (i = BYTTAB - 1; i >= 0; i--) + } + /* dump the high colormap if it contains anything interesting */ + if (cm->hiarrayrows > 1 || cm->hiarraycols > 1) { - t = tree->tptr[i]; - if (t == NULL) - fprintf(f, "NULL found in filled tree!\n"); - else if (t == fillt) + int r, + c; + const color *rowptr; + + fprintf(f, "other:\t"); + for (c = 0; c < cm->hiarraycols; c++) + { + fprintf(f, "\t%ld", (long) cm->hicolormap[c]); + } + fprintf(f, "\n"); + for (r = 0; r < cm->numcmranges; r++) { + dumpchr(cm->cmranges[r].cmin, f); + fprintf(f, ".."); + dumpchr(cm->cmranges[r].cmax, f); + fprintf(f, ":"); + rowptr = &cm->hicolormap[cm->cmranges[r].rownum * cm->hiarraycols]; + for (c = 0; c < cm->hiarraycols; c++) + { + fprintf(f, "\t%ld", (long) rowptr[c]); + } + fprintf(f, "\n"); } - else if (level < NBYTS - 2) /* more pointer blocks below */ - fillcheck(cm, t, level + 1, f); } } |