diff options
Diffstat (limited to 'src/include/regex/regguts.h')
-rw-r--r-- | src/include/regex/regguts.h | 141 |
1 files changed, 66 insertions, 75 deletions
diff --git a/src/include/regex/regguts.h b/src/include/regex/regguts.h index b0aa641cc4f..69816f1449a 100644 --- a/src/include/regex/regguts.h +++ b/src/include/regex/regguts.h @@ -127,23 +127,6 @@ #define ISBSET(uv, sn) ((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS))) - -/* - * We dissect a chr into byts for colormap table indexing. Here we define - * a byt, which will be the same as a byte on most machines... The exact - * size of a byt is not critical, but about 8 bits is good, and extraction - * of 8-bit chunks is sometimes especially fast. - */ -#ifndef BYTBITS -#define BYTBITS 8 /* bits in a byt */ -#endif -#define BYTTAB (1<<BYTBITS) /* size of table with one entry per byt value */ -#define BYTMASK (BYTTAB-1) /* bit mask for byt */ -#define NBYTS ((CHRBITS+BYTBITS-1)/BYTBITS) -/* the definition of GETCOLOR(), below, assumes NBYTS <= 4 */ - - - /* * As soon as possible, we map chrs into equivalence classes -- "colors" -- * which are of much more manageable number. @@ -153,43 +136,10 @@ typedef short color; /* colors of characters */ #define MAX_COLOR 32767 /* max color (must fit in 'color' datatype) */ #define COLORLESS (-1) /* impossible color */ #define WHITE 0 /* default color, parent of all others */ - +/* Note: various places in the code know that WHITE is zero */ /* - * A colormap is a tree -- more precisely, a DAG -- indexed at each level - * by a byt of the chr, to map the chr to a color efficiently. Because - * lower sections of the tree can be shared, it can exploit the usual - * sparseness of such a mapping table. The tree is always NBYTS levels - * deep (in the past it was shallower during construction but was "filled" - * to full depth at the end of that); areas that are unaltered as yet point - * to "fill blocks" which are entirely WHITE in color. - * - * Leaf-level tree blocks are of type "struct colors", while upper-level - * blocks are of type "struct ptrs". Pointers into the tree are generally - * declared as "union tree *" to be agnostic about what level they point to. - */ - -/* the tree itself */ -struct colors -{ - color ccolor[BYTTAB]; -}; -struct ptrs -{ - union tree *pptr[BYTTAB]; -}; -union tree -{ - struct colors colors; - struct ptrs ptrs; -}; - -/* use these pseudo-field names when dereferencing a "union tree" pointer */ -#define tcolor colors.ccolor -#define tptr ptrs.pptr - -/* * Per-color data structure for the compile-time color machinery * * If "sub" is not NOSUB then it is the number of the color's current @@ -203,26 +153,56 @@ union tree */ struct colordesc { - uchr nchrs; /* number of chars of this color */ + int nschrs; /* number of simple chars of this color */ + int nuchrs; /* number of upper map entries of this color */ color sub; /* open subcolor, if any; or free-chain ptr */ #define NOSUB COLORLESS /* value of "sub" when no open subcolor */ struct arc *arcs; /* chain of all arcs of this color */ - chr firstchr; /* char first assigned to this color */ + chr firstchr; /* simple char first assigned to this color */ int flags; /* bit values defined next */ #define FREECOL 01 /* currently free */ #define PSEUDO 02 /* pseudocolor, no real chars */ #define UNUSEDCOLOR(cd) ((cd)->flags & FREECOL) - union tree *block; /* block of solid color, if any */ }; /* * The color map itself * - * Much of the data in the colormap struct is only used at compile time. - * However, the bulk of the space usage is in the "tree" structure, so it's - * not clear that there's much point in converting the rest to a more compact - * form when compilation is finished. + * This struct holds both data used only at compile time, and the chr to + * color mapping information, used at both compile and run time. The latter + * is the bulk of the space, so it's not really worth separating out the + * compile-only portion. + * + * Ideally, the mapping data would just be an array of colors indexed by + * chr codes; but for large character sets that's impractical. Fortunately, + * common characters have smaller codes, so we can use a simple array for chr + * codes up to MAX_SIMPLE_CHR, and do something more complex for codes above + * that, without much loss of performance. The "something more complex" is a + * 2-D array of color entries, where row indexes correspond to individual chrs + * or chr ranges that have been mentioned in the regex (with row zero + * representing all other chrs), and column indexes correspond to different + * sets of locale-dependent character classes such as "isalpha". The + * classbits[k] entry is zero if we do not care about the k'th character class + * in this regex, and otherwise it is the bit to be OR'd into the column index + * if the character in question is a member of that class. We find the color + * of a high-valued chr by identifying which colormaprange it is in to get + * the row index (use row zero if it's in none of them), identifying which of + * the interesting cclasses it's in to get the column index, and then indexing + * into the 2-D hicolormap array. + * + * The colormapranges are required to be nonempty, nonoverlapping, and to + * appear in increasing chr-value order. */ + +#define NUM_CCLASSES 13 /* must match data in regc_locale.c */ + +typedef struct colormaprange +{ + chr cmin; /* range represents cmin..cmax inclusive */ + chr cmax; + int rownum; /* row index in hicolormap array (>= 1) */ +} colormaprange; + struct colormap { int magic; @@ -233,27 +213,27 @@ struct colormap color free; /* beginning of free chain (if non-0) */ struct colordesc *cd; /* pointer to array of colordescs */ #define CDEND(cm) (&(cm)->cd[(cm)->max + 1]) + + /* mapping data for chrs <= MAX_SIMPLE_CHR: */ + color *locolormap; /* simple array indexed by chr code */ + + /* mapping data for chrs > MAX_SIMPLE_CHR: */ + int classbits[NUM_CCLASSES]; /* see comment above */ + int numcmranges; /* number of colormapranges */ + colormaprange *cmranges; /* ranges of high chrs */ + color *hicolormap; /* 2-D array of color entries */ + int maxarrayrows; /* number of array rows allocated */ + int hiarrayrows; /* number of array rows in use */ + int hiarraycols; /* number of array columns (2^N) */ + /* If we need up to NINLINECDS, we store them here to save a malloc */ -#define NINLINECDS ((size_t)10) +#define NINLINECDS ((size_t) 10) struct colordesc cdspace[NINLINECDS]; - union tree tree[NBYTS]; /* tree top, plus lower-level fill blocks */ }; -/* optimization magic to do fast chr->color mapping */ -#define B0(c) ((c) & BYTMASK) -#define B1(c) (((c)>>BYTBITS) & BYTMASK) -#define B2(c) (((c)>>(2*BYTBITS)) & BYTMASK) -#define B3(c) (((c)>>(3*BYTBITS)) & BYTMASK) -#if NBYTS == 1 -#define GETCOLOR(cm, c) ((cm)->tree->tcolor[B0(c)]) -#endif -/* beware, for NBYTS>1, GETCOLOR() is unsafe -- 2nd arg used repeatedly */ -#if NBYTS == 2 -#define GETCOLOR(cm, c) ((cm)->tree->tptr[B1(c)]->tcolor[B0(c)]) -#endif -#if NBYTS == 4 -#define GETCOLOR(cm, c) ((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)]) -#endif +/* fetch color for chr; beware of multiple evaluation of c argument */ +#define GETCOLOR(cm, c) \ + ((c) <= MAX_SIMPLE_CHR ? (cm)->locolormap[(c) - CHR_MIN] : pg_reg_getcolor(cm, c)) /* @@ -264,6 +244,11 @@ struct colormap * Representation of a set of characters. chrs[] represents individual * code points, ranges[] represents ranges in the form min..max inclusive. * + * If the cvec represents a locale-specific character class, eg [[:alpha:]], + * then the chrs[] and ranges[] arrays contain only members of that class + * up to MAX_SIMPLE_CHR (inclusive). cclasscode is set to regc_locale.c's + * code for the class, rather than being -1 as it is in an ordinary cvec. + * * Note that in cvecs gotten from newcvec() and intended to be freed by * freecvec(), both arrays of chrs are after the end of the struct, not * separately malloc'd; so chrspace and rangespace are effectively immutable. @@ -276,6 +261,7 @@ struct cvec int nranges; /* number of ranges (chr pairs) */ int rangespace; /* number of ranges allocated in ranges[] */ chr *ranges; /* pointer to vector of chr pairs */ + int cclasscode; /* value of "enum classes", or -1 */ }; @@ -489,3 +475,8 @@ struct guts int nlacons; /* size of lacons[]; note that only slots * numbered 1 .. nlacons-1 are used */ }; + + +/* prototypes for functions that are exported from regcomp.c to regexec.c */ +extern void pg_set_regex_collation(Oid collation); +extern color pg_reg_getcolor(struct colormap * cm, chr c); |