aboutsummaryrefslogtreecommitdiff
path: root/src/backend/regex/regc_locale.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/regex/regc_locale.c')
-rw-r--r--src/backend/regex/regc_locale.c89
1 files changed, 65 insertions, 24 deletions
diff --git a/src/backend/regex/regc_locale.c b/src/backend/regex/regc_locale.c
index 399de027cdd..7cb3a40a0c8 100644
--- a/src/backend/regex/regc_locale.c
+++ b/src/backend/regex/regc_locale.c
@@ -349,6 +349,19 @@ static const struct cname
}
};
+/*
+ * The following arrays define the valid character class names.
+ */
+static const char *const classNames[NUM_CCLASSES + 1] = {
+ "alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph",
+ "lower", "print", "punct", "space", "upper", "xdigit", NULL
+};
+
+enum classes
+{
+ CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
+ CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT
+};
/*
* We do not use the hard-wired Unicode classification tables that Tcl does.
@@ -544,21 +557,6 @@ cclass(struct vars * v, /* context */
index;
/*
- * The following arrays define the valid character class names.
- */
-
- static const char *const classNames[] = {
- "alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph",
- "lower", "print", "punct", "space", "upper", "xdigit", NULL
- };
-
- enum classes
- {
- CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
- CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT
- };
-
- /*
* Map the name to the corresponding enumerated value.
*/
len = endp - startp;
@@ -593,18 +591,20 @@ cclass(struct vars * v, /* context */
* pg_ctype_get_cache so that we can cache the results. Other classes
* have definitions that are hard-wired here, and for those we just
* construct a transient cvec on the fly.
+ *
+ * NB: keep this code in sync with cclass_column_index(), below.
*/
switch ((enum classes) index)
{
case CC_PRINT:
- cv = pg_ctype_get_cache(pg_wc_isprint);
+ cv = pg_ctype_get_cache(pg_wc_isprint, index);
break;
case CC_ALNUM:
- cv = pg_ctype_get_cache(pg_wc_isalnum);
+ cv = pg_ctype_get_cache(pg_wc_isalnum, index);
break;
case CC_ALPHA:
- cv = pg_ctype_get_cache(pg_wc_isalpha);
+ cv = pg_ctype_get_cache(pg_wc_isalpha, index);
break;
case CC_ASCII:
/* hard-wired meaning */
@@ -625,10 +625,10 @@ cclass(struct vars * v, /* context */
addrange(cv, 0x7f, 0x9f);
break;
case CC_DIGIT:
- cv = pg_ctype_get_cache(pg_wc_isdigit);
+ cv = pg_ctype_get_cache(pg_wc_isdigit, index);
break;
case CC_PUNCT:
- cv = pg_ctype_get_cache(pg_wc_ispunct);
+ cv = pg_ctype_get_cache(pg_wc_ispunct, index);
break;
case CC_XDIGIT:
@@ -646,16 +646,16 @@ cclass(struct vars * v, /* context */
}
break;
case CC_SPACE:
- cv = pg_ctype_get_cache(pg_wc_isspace);
+ cv = pg_ctype_get_cache(pg_wc_isspace, index);
break;
case CC_LOWER:
- cv = pg_ctype_get_cache(pg_wc_islower);
+ cv = pg_ctype_get_cache(pg_wc_islower, index);
break;
case CC_UPPER:
- cv = pg_ctype_get_cache(pg_wc_isupper);
+ cv = pg_ctype_get_cache(pg_wc_isupper, index);
break;
case CC_GRAPH:
- cv = pg_ctype_get_cache(pg_wc_isgraph);
+ cv = pg_ctype_get_cache(pg_wc_isgraph, index);
break;
}
@@ -666,6 +666,47 @@ cclass(struct vars * v, /* context */
}
/*
+ * cclass_column_index - get appropriate high colormap column index for chr
+ */
+static int
+cclass_column_index(struct colormap * cm, chr c)
+{
+ int colnum = 0;
+
+ /* Shouldn't go through all these pushups for simple chrs */
+ assert(c > MAX_SIMPLE_CHR);
+
+ /*
+ * Note: we should not see requests to consider cclasses that are not
+ * treated as locale-specific by cclass(), above.
+ */
+ if (cm->classbits[CC_PRINT] && pg_wc_isprint(c))
+ colnum |= cm->classbits[CC_PRINT];
+ if (cm->classbits[CC_ALNUM] && pg_wc_isalnum(c))
+ colnum |= cm->classbits[CC_ALNUM];
+ if (cm->classbits[CC_ALPHA] && pg_wc_isalpha(c))
+ colnum |= cm->classbits[CC_ALPHA];
+ assert(cm->classbits[CC_ASCII] == 0);
+ assert(cm->classbits[CC_BLANK] == 0);
+ assert(cm->classbits[CC_CNTRL] == 0);
+ if (cm->classbits[CC_DIGIT] && pg_wc_isdigit(c))
+ colnum |= cm->classbits[CC_DIGIT];
+ if (cm->classbits[CC_PUNCT] && pg_wc_ispunct(c))
+ colnum |= cm->classbits[CC_PUNCT];
+ assert(cm->classbits[CC_XDIGIT] == 0);
+ if (cm->classbits[CC_SPACE] && pg_wc_isspace(c))
+ colnum |= cm->classbits[CC_SPACE];
+ if (cm->classbits[CC_LOWER] && pg_wc_islower(c))
+ colnum |= cm->classbits[CC_LOWER];
+ if (cm->classbits[CC_UPPER] && pg_wc_isupper(c))
+ colnum |= cm->classbits[CC_UPPER];
+ if (cm->classbits[CC_GRAPH] && pg_wc_isgraph(c))
+ colnum |= cm->classbits[CC_GRAPH];
+
+ return colnum;
+}
+
+/*
* allcases - supply cvec for all case counterparts of a chr (including itself)
*
* This is a shortcut, preferably an efficient one, for simple characters;