diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2021-02-25 13:00:40 -0500 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2021-02-25 13:00:40 -0500 |
commit | 2a0af7fe460eb46f9af996075972bf7c2e3f211d (patch) | |
tree | dc99ebbf913c05e67796401ebbd1cabe4fad349b /src/include/regex/regguts.h | |
parent | 6b40d9bdbdc9f873868b0ddecacd9a307fc8ee26 (diff) | |
download | postgresql-2a0af7fe460eb46f9af996075972bf7c2e3f211d.tar.gz postgresql-2a0af7fe460eb46f9af996075972bf7c2e3f211d.zip |
Allow complemented character class escapes within regex brackets.
The complement-class escapes \D, \S, \W are now allowed within
bracket expressions. There is no semantic difficulty with doing
that, but the rather hokey macro-expansion-based implementation
previously used here couldn't cope.
Also, invent "word" as an allowed character class name, thus "\w"
is now equivalent to "[[:word:]]" outside brackets, or "[:word:]"
within brackets. POSIX allows such implementation-specific
extensions, and the same name is used in e.g. bash.
One surprising compatibility issue this raises is that constructs
such as "[\w-_]" are now disallowed, as our documentation has always
said they should be: character classes can't be endpoints of a range.
Previously, because \w was just a macro for "[:alnum:]_", such a
construct was read as "[[:alnum:]_-_]", so it was accepted so long as
the character after "-" was numerically greater than or equal to "_".
Some implementation cleanup along the way:
* Remove the lexnest() hack, and in consequence clean up wordchrs()
to not interact with the lexer.
* Fix colorcomplement() to not be O(N^2) in the number of colors
involved.
* Get rid of useless-as-far-as-I-can-see calls of element()
on single-character character element names in brackpart().
element() always maps these to the character itself, and things
would be quite broken if it didn't --- should "[a]" match something
different than "a" does? Besides, the shortcut path in brackpart()
wasn't doing this anyway, making it even more inconsistent.
Discussion: https://postgr.es/m/2845172.1613674385@sss.pgh.pa.us
Discussion: https://postgr.es/m/3220564.1613859619@sss.pgh.pa.us
Diffstat (limited to 'src/include/regex/regguts.h')
-rw-r--r-- | src/include/regex/regguts.h | 20 |
1 files changed, 16 insertions, 4 deletions
diff --git a/src/include/regex/regguts.h b/src/include/regex/regguts.h index 306525eb5fa..0e76a828f8f 100644 --- a/src/include/regex/regguts.h +++ b/src/include/regex/regguts.h @@ -128,6 +128,18 @@ /* + * known character classes + */ +enum char_classes +{ + CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH, + CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT, CC_WORD +}; + +#define NUM_CCLASSES 14 + + +/* * As soon as possible, we map chrs into equivalence classes -- "colors" -- * which are of much more manageable number. * @@ -164,12 +176,14 @@ struct colordesc #define NOSUB COLORLESS /* value of "sub" when no open subcolor */ struct arc *arcs; /* chain of all arcs of this color */ chr firstchr; /* simple char first assigned to this color */ - int flags; /* bit values defined next */ + int flags; /* bitmask of the following flags: */ #define FREECOL 01 /* currently free */ #define PSEUDO 02 /* pseudocolor, no real chars */ -#define UNUSEDCOLOR(cd) ((cd)->flags & FREECOL) +#define COLMARK 04 /* temporary marker used in some functions */ }; +#define UNUSEDCOLOR(cd) ((cd)->flags & FREECOL) + /* * The color map itself * @@ -199,8 +213,6 @@ struct colordesc * appear in increasing chr-value order. */ -#define NUM_CCLASSES 13 /* must match data in regc_locale.c */ - typedef struct colormaprange { chr cmin; /* range represents cmin..cmax inclusive */ |