Teach regular expression operators to honor collations.

This involves getting the character classification and case-folding functions in the regex library to use the collations infrastructure. Most of this work had been done already in connection with the upper/lower and LIKE logic, so it was a simple matter of transposition. While at it, split out these functions into a separate source file regc_pg_locale.c, so that they can be correctly labeled with the Postgres project's license rather than the Scriptics license. These functions are 100% Postgres-written code whereas what remains in regc_locale.c is still mostly not ours, so lumping them both under the same copyright notice was getting more and more misleading.
author: Tom Lane <tgl@sss.pgh.pa.us> 2011-04-10 18:02:17 -0400
committer: Tom Lane <tgl@sss.pgh.pa.us> 2011-04-10 18:03:09 -0400
commit: 1e16a8107db9a50435b39e09c6f9c52c45e63e1a (patch)
tree: bf2231fc078b46004c7814ba871e3c38c1d8d52d /src/backend/regex/regc_locale.c
parent: 210f95f1cd59c6fdfe0f84b922c19d8498ac377d (diff)
download: postgresql-1e16a8107db9a50435b39e09c6f9c52c45e63e1a.tar.gz
postgresql-1e16a8107db9a50435b39e09c6f9c52c45e63e1a.zip
1 files changed, 0 insertions, 165 deletions
diff --git a/src/backend/regex/regc_locale.c b/src/backend/regex/regc_locale.c
index 4f891973643..0f70931b13e 100644
--- a/src/backend/regex/regc_locale.c
+++ b/src/backend/regex/regc_locale.c
@@ -351,171 +351,6 @@ static const struct cname
 
 
 /*
- * ctype functions adapted to work on pg_wchar (a/k/a chr)
- *
- * When working in UTF8 encoding, we use the <wctype.h> functions if
- * available.  This assumes that every platform uses Unicode codepoints
- * directly as the wchar_t representation of Unicode.  On some platforms
- * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
- *
- * In all other encodings, we use the <ctype.h> functions for pg_wchar
- * values up to 255, and punt for values above that.  This is only 100%
- * correct in single-byte encodings such as LATINn.  However, non-Unicode
- * multibyte encodings are mostly Far Eastern character sets for which the
- * properties being tested here aren't relevant for higher code values anyway.
- *
- * NB: the coding here assumes pg_wchar is an unsigned type.
- */
-
-static int
-pg_wc_isdigit(pg_wchar c)
-{
-#ifdef USE_WIDE_UPPER_LOWER
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-			return iswdigit((wint_t) c);
-	}
-#endif
-	return (c <= (pg_wchar) UCHAR_MAX && isdigit((unsigned char) c));
-}
-
-static int
-pg_wc_isalpha(pg_wchar c)
-{
-#ifdef USE_WIDE_UPPER_LOWER
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-			return iswalpha((wint_t) c);
-	}
-#endif
-	return (c <= (pg_wchar) UCHAR_MAX && isalpha((unsigned char) c));
-}
-
-static int
-pg_wc_isalnum(pg_wchar c)
-{
-#ifdef USE_WIDE_UPPER_LOWER
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-			return iswalnum((wint_t) c);
-	}
-#endif
-	return (c <= (pg_wchar) UCHAR_MAX && isalnum((unsigned char) c));
-}
-
-static int
-pg_wc_isupper(pg_wchar c)
-{
-#ifdef USE_WIDE_UPPER_LOWER
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-			return iswupper((wint_t) c);
-	}
-#endif
-	return (c <= (pg_wchar) UCHAR_MAX && isupper((unsigned char) c));
-}
-
-static int
-pg_wc_islower(pg_wchar c)
-{
-#ifdef USE_WIDE_UPPER_LOWER
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-			return iswlower((wint_t) c);
-	}
-#endif
-	return (c <= (pg_wchar) UCHAR_MAX && islower((unsigned char) c));
-}
-
-static int
-pg_wc_isgraph(pg_wchar c)
-{
-#ifdef USE_WIDE_UPPER_LOWER
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-			return iswgraph((wint_t) c);
-	}
-#endif
-	return (c <= (pg_wchar) UCHAR_MAX && isgraph((unsigned char) c));
-}
-
-static int
-pg_wc_isprint(pg_wchar c)
-{
-#ifdef USE_WIDE_UPPER_LOWER
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-			return iswprint((wint_t) c);
-	}
-#endif
-	return (c <= (pg_wchar) UCHAR_MAX && isprint((unsigned char) c));
-}
-
-static int
-pg_wc_ispunct(pg_wchar c)
-{
-#ifdef USE_WIDE_UPPER_LOWER
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-			return iswpunct((wint_t) c);
-	}
-#endif
-	return (c <= (pg_wchar) UCHAR_MAX && ispunct((unsigned char) c));
-}
-
-static int
-pg_wc_isspace(pg_wchar c)
-{
-#ifdef USE_WIDE_UPPER_LOWER
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-			return iswspace((wint_t) c);
-	}
-#endif
-	return (c <= (pg_wchar) UCHAR_MAX && isspace((unsigned char) c));
-}
-
-static pg_wchar
-pg_wc_toupper(pg_wchar c)
-{
-#ifdef USE_WIDE_UPPER_LOWER
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-			return towupper((wint_t) c);
-	}
-#endif
-	if (c <= (pg_wchar) UCHAR_MAX)
-		return toupper((unsigned char) c);
-	return c;
-}
-
-static pg_wchar
-pg_wc_tolower(pg_wchar c)
-{
-#ifdef USE_WIDE_UPPER_LOWER
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-			return towlower((wint_t) c);
-	}
-#endif
-	if (c <= (pg_wchar) UCHAR_MAX)
-		return tolower((unsigned char) c);
-	return c;
-}
-
-
-/*
  * element - map collating-element name to celt
  */
 static celt
author	Tom Lane <tgl@sss.pgh.pa.us>	2011-04-10 18:02:17 -0400
committer	Tom Lane <tgl@sss.pgh.pa.us>	2011-04-10 18:03:09 -0400
commit	1e16a8107db9a50435b39e09c6f9c52c45e63e1a (patch)
tree	bf2231fc078b46004c7814ba871e3c38c1d8d52d /src/backend/regex/regc_locale.c
parent	210f95f1cd59c6fdfe0f84b922c19d8498ac377d (diff)
download	postgresql-1e16a8107db9a50435b39e09c6f9c52c45e63e1a.tar.gz postgresql-1e16a8107db9a50435b39e09c6f9c52c45e63e1a.zip