1 files changed, 65 insertions, 24 deletions
diff --git a/src/backend/regex/regc_locale.c b/src/backend/regex/regc_locale.c
index 399de027cdd..7cb3a40a0c8 100644
--- a/src/backend/regex/regc_locale.c
+++ b/src/backend/regex/regc_locale.c
@@ -349,6 +349,19 @@ static const struct cname
 	}
 };
 
+/*
+ * The following arrays define the valid character class names.
+ */
+static const char *const classNames[NUM_CCLASSES + 1] = {
+	"alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph",
+	"lower", "print", "punct", "space", "upper", "xdigit", NULL
+};
+
+enum classes
+{
+	CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
+	CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT
+};
 
 /*
  * We do not use the hard-wired Unicode classification tables that Tcl does.
@@ -544,21 +557,6 @@ cclass(struct vars * v,			/* context */
 				index;
 
 	/*
-	 * The following arrays define the valid character class names.
-	 */
-
-	static const char *const classNames[] = {
-		"alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph",
-		"lower", "print", "punct", "space", "upper", "xdigit", NULL
-	};
-
-	enum classes
-	{
-		CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
-		CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT
-	};
-
-	/*
 	 * Map the name to the corresponding enumerated value.
 	 */
 	len = endp - startp;
@@ -593,18 +591,20 @@ cclass(struct vars * v,			/* context */
 	 * pg_ctype_get_cache so that we can cache the results.  Other classes
 	 * have definitions that are hard-wired here, and for those we just
 	 * construct a transient cvec on the fly.
+	 *
+	 * NB: keep this code in sync with cclass_column_index(), below.
 	 */
 
 	switch ((enum classes) index)
 	{
 		case CC_PRINT:
-			cv = pg_ctype_get_cache(pg_wc_isprint);
+			cv = pg_ctype_get_cache(pg_wc_isprint, index);
 			break;
 		case CC_ALNUM:
-			cv = pg_ctype_get_cache(pg_wc_isalnum);
+			cv = pg_ctype_get_cache(pg_wc_isalnum, index);
 			break;
 		case CC_ALPHA:
-			cv = pg_ctype_get_cache(pg_wc_isalpha);
+			cv = pg_ctype_get_cache(pg_wc_isalpha, index);
 			break;
 		case CC_ASCII:
 			/* hard-wired meaning */
@@ -625,10 +625,10 @@ cclass(struct vars * v,			/* context */
 			addrange(cv, 0x7f, 0x9f);
 			break;
 		case CC_DIGIT:
-			cv = pg_ctype_get_cache(pg_wc_isdigit);
+			cv = pg_ctype_get_cache(pg_wc_isdigit, index);
 			break;
 		case CC_PUNCT:
-			cv = pg_ctype_get_cache(pg_wc_ispunct);
+			cv = pg_ctype_get_cache(pg_wc_ispunct, index);
 			break;
 		case CC_XDIGIT:
 
@@ -646,16 +646,16 @@ cclass(struct vars * v,			/* context */
 			}
 			break;
 		case CC_SPACE:
-			cv = pg_ctype_get_cache(pg_wc_isspace);
+			cv = pg_ctype_get_cache(pg_wc_isspace, index);
 			break;
 		case CC_LOWER:
-			cv = pg_ctype_get_cache(pg_wc_islower);
+			cv = pg_ctype_get_cache(pg_wc_islower, index);
 			break;
 		case CC_UPPER:
-			cv = pg_ctype_get_cache(pg_wc_isupper);
+			cv = pg_ctype_get_cache(pg_wc_isupper, index);
 			break;
 		case CC_GRAPH:
-			cv = pg_ctype_get_cache(pg_wc_isgraph);
+			cv = pg_ctype_get_cache(pg_wc_isgraph, index);
 			break;
 	}
 
@@ -666,6 +666,47 @@ cclass(struct vars * v,			/* context */
 }
 
 /*
+ * cclass_column_index - get appropriate high colormap column index for chr
+ */
+static int
+cclass_column_index(struct colormap * cm, chr c)
+{
+	int			colnum = 0;
+
+	/* Shouldn't go through all these pushups for simple chrs */
+	assert(c > MAX_SIMPLE_CHR);
+
+	/*
+	 * Note: we should not see requests to consider cclasses that are not
+	 * treated as locale-specific by cclass(), above.
+	 */
+	if (cm->classbits[CC_PRINT] && pg_wc_isprint(c))
+		colnum |= cm->classbits[CC_PRINT];
+	if (cm->classbits[CC_ALNUM] && pg_wc_isalnum(c))
+		colnum |= cm->classbits[CC_ALNUM];
+	if (cm->classbits[CC_ALPHA] && pg_wc_isalpha(c))
+		colnum |= cm->classbits[CC_ALPHA];
+	assert(cm->classbits[CC_ASCII] == 0);
+	assert(cm->classbits[CC_BLANK] == 0);
+	assert(cm->classbits[CC_CNTRL] == 0);
+	if (cm->classbits[CC_DIGIT] && pg_wc_isdigit(c))
+		colnum |= cm->classbits[CC_DIGIT];
+	if (cm->classbits[CC_PUNCT] && pg_wc_ispunct(c))
+		colnum |= cm->classbits[CC_PUNCT];
+	assert(cm->classbits[CC_XDIGIT] == 0);
+	if (cm->classbits[CC_SPACE] && pg_wc_isspace(c))
+		colnum |= cm->classbits[CC_SPACE];
+	if (cm->classbits[CC_LOWER] && pg_wc_islower(c))
+		colnum |= cm->classbits[CC_LOWER];
+	if (cm->classbits[CC_UPPER] && pg_wc_isupper(c))
+		colnum |= cm->classbits[CC_UPPER];
+	if (cm->classbits[CC_GRAPH] && pg_wc_isgraph(c))
+		colnum |= cm->classbits[CC_GRAPH];
+
+	return colnum;
+}
+
+/*
  * allcases - supply cvec for all case counterparts of a chr (including itself)
  *
  * This is a shortcut, preferably an efficient one, for simple characters;