aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff Davis <jdavis@postgresql.org>2023-03-17 11:47:35 -0700
committerJeff Davis <jdavis@postgresql.org>2023-03-17 12:08:46 -0700
commitf413941f41d370a7893caa3e6ed384b89a0577fd (patch)
tree05a69d2b5ddd453939f3a5346c525fc51ec304ba
parent064709f803c05559d2849a62fdb855fbb91ffeb8 (diff)
downloadpostgresql-f413941f41d370a7893caa3e6ed384b89a0577fd.tar.gz
postgresql-f413941f41d370a7893caa3e6ed384b89a0577fd.zip
Fix t_isspace(), etc., when datlocprovider=i and datctype=C.
Check whether the datctype is C to determine whether t_isspace() and related functions use isspace() or iswspace(). Previously, t_isspace() checked whether the database default collation was C; which is incorrect when the default collation uses the ICU provider. Discussion: https://postgr.es/m/79e4354d9eccfdb00483146a6b9f6295202e7890.camel@j-davis.com Reviewed-by: Peter Eisentraut Backpatch-through: 15
-rw-r--r--contrib/unaccent/expected/unaccent.out9
-rw-r--r--contrib/unaccent/expected/unaccent_1.out8
-rw-r--r--contrib/unaccent/sql/unaccent.sql11
-rw-r--r--src/backend/tsearch/ts_locale.c18
-rw-r--r--src/backend/tsearch/wparser_def.c3
-rw-r--r--src/backend/utils/adt/pg_locale.c3
-rw-r--r--src/backend/utils/init/postinit.c4
-rw-r--r--src/include/utils/pg_locale.h2
8 files changed, 16 insertions, 42 deletions
diff --git a/contrib/unaccent/expected/unaccent.out b/contrib/unaccent/expected/unaccent.out
index cef98ee60cc..ee0ac71a1cc 100644
--- a/contrib/unaccent/expected/unaccent.out
+++ b/contrib/unaccent/expected/unaccent.out
@@ -1,12 +1,3 @@
--- unaccent is broken if the default collation is provided by ICU and
--- LC_CTYPE=C
-SELECT current_setting('lc_ctype') = 'C' AND
- (SELECT datlocprovider='i' FROM pg_database
- WHERE datname=current_database())
- AS skip_test \gset
-\if :skip_test
-\quit
-\endif
CREATE EXTENSION unaccent;
-- must have a UTF8 database
SELECT getdatabaseencoding();
diff --git a/contrib/unaccent/expected/unaccent_1.out b/contrib/unaccent/expected/unaccent_1.out
deleted file mode 100644
index 0a4a3838abd..00000000000
--- a/contrib/unaccent/expected/unaccent_1.out
+++ /dev/null
@@ -1,8 +0,0 @@
--- unaccent is broken if the default collation is provided by ICU and
--- LC_CTYPE=C
-SELECT current_setting('lc_ctype') = 'C' AND
- (SELECT datlocprovider='i' FROM pg_database
- WHERE datname=current_database())
- AS skip_test \gset
-\if :skip_test
-\quit
diff --git a/contrib/unaccent/sql/unaccent.sql b/contrib/unaccent/sql/unaccent.sql
index 027dfb964a7..3fc0c706be3 100644
--- a/contrib/unaccent/sql/unaccent.sql
+++ b/contrib/unaccent/sql/unaccent.sql
@@ -1,14 +1,3 @@
-
--- unaccent is broken if the default collation is provided by ICU and
--- LC_CTYPE=C
-SELECT current_setting('lc_ctype') = 'C' AND
- (SELECT datlocprovider='i' FROM pg_database
- WHERE datname=current_database())
- AS skip_test \gset
-\if :skip_test
-\quit
-\endif
-
CREATE EXTENSION unaccent;
-- must have a UTF8 database
diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c
index 0c031709902..f1150d30b71 100644
--- a/src/backend/tsearch/ts_locale.c
+++ b/src/backend/tsearch/ts_locale.c
@@ -38,10 +38,9 @@ t_isdigit(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
- Oid collation = DEFAULT_COLLATION_OID; /* TODO */
pg_locale_t mylocale = 0; /* TODO */
- if (clen == 1 || lc_ctype_is_c(collation))
+ if (clen == 1 || database_ctype_is_c)
return isdigit(TOUCHAR(ptr));
char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
@@ -54,10 +53,9 @@ t_isspace(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
- Oid collation = DEFAULT_COLLATION_OID; /* TODO */
pg_locale_t mylocale = 0; /* TODO */
- if (clen == 1 || lc_ctype_is_c(collation))
+ if (clen == 1 || database_ctype_is_c)
return isspace(TOUCHAR(ptr));
char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
@@ -70,10 +68,9 @@ t_isalpha(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
- Oid collation = DEFAULT_COLLATION_OID; /* TODO */
pg_locale_t mylocale = 0; /* TODO */
- if (clen == 1 || lc_ctype_is_c(collation))
+ if (clen == 1 || database_ctype_is_c)
return isalpha(TOUCHAR(ptr));
char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
@@ -86,10 +83,9 @@ t_isalnum(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
- Oid collation = DEFAULT_COLLATION_OID; /* TODO */
pg_locale_t mylocale = 0; /* TODO */
- if (clen == 1 || lc_ctype_is_c(collation))
+ if (clen == 1 || database_ctype_is_c)
return isalnum(TOUCHAR(ptr));
char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
@@ -102,10 +98,9 @@ t_isprint(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
- Oid collation = DEFAULT_COLLATION_OID; /* TODO */
pg_locale_t mylocale = 0; /* TODO */
- if (clen == 1 || lc_ctype_is_c(collation))
+ if (clen == 1 || database_ctype_is_c)
return isprint(TOUCHAR(ptr));
char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
@@ -273,7 +268,6 @@ char *
lowerstr_with_len(const char *str, int len)
{
char *out;
- Oid collation = DEFAULT_COLLATION_OID; /* TODO */
pg_locale_t mylocale = 0; /* TODO */
if (len == 0)
@@ -285,7 +279,7 @@ lowerstr_with_len(const char *str, int len)
* Also, for a C locale there is no need to process as multibyte. From
* backend/utils/adt/oracle_compat.c Teodor
*/
- if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c(collation))
+ if (pg_database_encoding_max_length() > 1 && !database_ctype_is_c)
{
wchar_t *wstr,
*wptr;
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index cc3736454ec..840a44ec007 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -297,11 +297,10 @@ TParserInit(char *str, int len)
*/
if (prs->charmaxlen > 1)
{
- Oid collation = DEFAULT_COLLATION_OID; /* TODO */
pg_locale_t mylocale = 0; /* TODO */
prs->usewide = true;
- if (lc_ctype_is_c(collation))
+ if (database_ctype_is_c)
{
/*
* char2wchar doesn't work for C-locale and sizeof(pg_wchar) could
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 1d3d4d86d39..90ec773c024 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -107,6 +107,9 @@ char *localized_full_days[7 + 1];
char *localized_abbrev_months[12 + 1];
char *localized_full_months[12 + 1];
+/* is the databases's LC_CTYPE the C locale? */
+bool database_ctype_is_c = false;
+
/* indicates whether locale information cache is valid */
static bool CurrentLocaleConvValid = false;
static bool CurrentLCTimeValid = false;
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 92bac8b63f5..31d6a054260 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -419,6 +419,10 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect
" which is not recognized by setlocale().", ctype),
errhint("Recreate the database with another locale or install the missing locale.")));
+ if (strcmp(ctype, "C") == 0 ||
+ strcmp(ctype, "POSIX") == 0)
+ database_ctype_is_c = true;
+
if (dbform->datlocprovider == COLLPROVIDER_ICU)
{
char *icurules;
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index f9ce428233e..dd822a68be1 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -47,6 +47,8 @@ extern PGDLLIMPORT char *localized_full_days[];
extern PGDLLIMPORT char *localized_abbrev_months[];
extern PGDLLIMPORT char *localized_full_months[];
+/* is the databases's LC_CTYPE the C locale? */
+extern PGDLLIMPORT bool database_ctype_is_c;
extern bool check_locale(int category, const char *locale, char **canonname);
extern char *pg_perm_setlocale(int category, const char *locale);