aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/pg_locale.c
diff options
context:
space:
mode:
authorJeff Davis <jdavis@postgresql.org>2024-03-13 23:33:44 -0700
committerJeff Davis <jdavis@postgresql.org>2024-03-13 23:33:44 -0700
commit2d819a08a1cbc11364e36f816b02e33e8dcc030b (patch)
tree1a8d3b459866d7df936faffa0e64f5e339e6a6c2 /src/backend/utils/adt/pg_locale.c
parent6ab2e8385d55e0b73bb8bbc41d9c286f5f7f357f (diff)
downloadpostgresql-2d819a08a1cbc11364e36f816b02e33e8dcc030b.tar.gz
postgresql-2d819a08a1cbc11364e36f816b02e33e8dcc030b.zip
Introduce "builtin" collation provider.
New provider for collations, like "libc" or "icu", but without any external dependency. Initially, the only locale supported by the builtin provider is "C", which is identical to the libc provider's "C" locale. The libc provider's "C" locale has always been treated as a special case that uses an internal implementation, without using libc at all -- so the new builtin provider uses the same implementation. The builtin provider's locale is independent of the server environment variables LC_COLLATE and LC_CTYPE. Using the builtin provider, the database collation locale can be "C" while LC_COLLATE and LC_CTYPE are set to "en_US", which is impossible with the libc provider. By offering a new builtin provider, it clarifies that the semantics of a collation using this provider will never depend on libc, and makes it easier to document the behavior. Discussion: https://postgr.es/m/ab925f69-5f9d-f85e-b87c-bd2a44798659@joeconway.com Discussion: https://postgr.es/m/dd9261f4-7a98-4565-93ec-336c1c110d90@manitou-mail.org Discussion: https://postgr.es/m/ff4c2f2f9c8fc7ca27c1c24ae37ecaeaeaff6b53.camel%40j-davis.com Reviewed-by: Daniel Vérité, Peter Eisentraut, Jeremy Schneider
Diffstat (limited to 'src/backend/utils/adt/pg_locale.c')
-rw-r--r--src/backend/utils/adt/pg_locale.c123
1 files changed, 103 insertions, 20 deletions
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 77d5752dc8e..39390fbe4eb 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1268,7 +1268,18 @@ lookup_collation_cache(Oid collation, bool set_flags)
elog(ERROR, "cache lookup failed for collation %u", collation);
collform = (Form_pg_collation) GETSTRUCT(tp);
- if (collform->collprovider == COLLPROVIDER_LIBC)
+ if (collform->collprovider == COLLPROVIDER_BUILTIN)
+ {
+ Datum datum;
+ const char *colllocale;
+
+ datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
+ colllocale = TextDatumGetCString(datum);
+
+ cache_entry->collate_is_c = true;
+ cache_entry->ctype_is_c = (strcmp(colllocale, "C") == 0);
+ }
+ else if (collform->collprovider == COLLPROVIDER_LIBC)
{
Datum datum;
const char *collcollate;
@@ -1319,16 +1330,30 @@ lc_collate_is_c(Oid collation)
if (collation == DEFAULT_COLLATION_OID)
{
static int result = -1;
- char *localeptr;
-
- if (default_locale.provider == COLLPROVIDER_ICU)
- return false;
+ const char *localeptr;
if (result >= 0)
return (bool) result;
- localeptr = setlocale(LC_COLLATE, NULL);
- if (!localeptr)
- elog(ERROR, "invalid LC_COLLATE setting");
+
+ if (default_locale.provider == COLLPROVIDER_BUILTIN)
+ {
+ result = true;
+ return (bool) result;
+ }
+ else if (default_locale.provider == COLLPROVIDER_ICU)
+ {
+ result = false;
+ return (bool) result;
+ }
+ else if (default_locale.provider == COLLPROVIDER_LIBC)
+ {
+ localeptr = setlocale(LC_CTYPE, NULL);
+ if (!localeptr)
+ elog(ERROR, "invalid LC_CTYPE setting");
+ }
+ else
+ elog(ERROR, "unexpected collation provider '%c'",
+ default_locale.provider);
if (strcmp(localeptr, "C") == 0)
result = true;
@@ -1372,16 +1397,29 @@ lc_ctype_is_c(Oid collation)
if (collation == DEFAULT_COLLATION_OID)
{
static int result = -1;
- char *localeptr;
-
- if (default_locale.provider == COLLPROVIDER_ICU)
- return false;
+ const char *localeptr;
if (result >= 0)
return (bool) result;
- localeptr = setlocale(LC_CTYPE, NULL);
- if (!localeptr)
- elog(ERROR, "invalid LC_CTYPE setting");
+
+ if (default_locale.provider == COLLPROVIDER_BUILTIN)
+ {
+ localeptr = default_locale.info.builtin.locale;
+ }
+ else if (default_locale.provider == COLLPROVIDER_ICU)
+ {
+ result = false;
+ return (bool) result;
+ }
+ else if (default_locale.provider == COLLPROVIDER_LIBC)
+ {
+ localeptr = setlocale(LC_CTYPE, NULL);
+ if (!localeptr)
+ elog(ERROR, "invalid LC_CTYPE setting");
+ }
+ else
+ elog(ERROR, "unexpected collation provider '%c'",
+ default_locale.provider);
if (strcmp(localeptr, "C") == 0)
result = true;
@@ -1519,10 +1557,10 @@ pg_newlocale_from_collation(Oid collid)
if (collid == DEFAULT_COLLATION_OID)
{
- if (default_locale.provider == COLLPROVIDER_ICU)
- return &default_locale;
- else
+ if (default_locale.provider == COLLPROVIDER_LIBC)
return (pg_locale_t) 0;
+ else
+ return &default_locale;
}
cache_entry = lookup_collation_cache(collid, false);
@@ -1547,7 +1585,19 @@ pg_newlocale_from_collation(Oid collid)
result.provider = collform->collprovider;
result.deterministic = collform->collisdeterministic;
- if (collform->collprovider == COLLPROVIDER_LIBC)
+ if (collform->collprovider == COLLPROVIDER_BUILTIN)
+ {
+ const char *locstr;
+
+ datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
+ locstr = TextDatumGetCString(datum);
+
+ builtin_validate_locale(GetDatabaseEncoding(), locstr);
+
+ result.info.builtin.locale = MemoryContextStrdup(TopMemoryContext,
+ locstr);
+ }
+ else if (collform->collprovider == COLLPROVIDER_LIBC)
{
const char *collcollate;
const char *collctype pg_attribute_unused();
@@ -1626,7 +1676,11 @@ pg_newlocale_from_collation(Oid collid)
collversionstr = TextDatumGetCString(datum);
- datum = SysCacheGetAttrNotNull(COLLOID, tp, collform->collprovider == COLLPROVIDER_ICU ? Anum_pg_collation_colllocale : Anum_pg_collation_collcollate);
+ Assert(collform->collprovider != COLLPROVIDER_BUILTIN);
+ if (collform->collprovider == COLLPROVIDER_LIBC)
+ datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate);
+ else
+ datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
actual_versionstr = get_collation_actual_version(collform->collprovider,
TextDatumGetCString(datum));
@@ -1677,6 +1731,10 @@ get_collation_actual_version(char collprovider, const char *collcollate)
{
char *collversion = NULL;
+ /* the builtin collation provider is not versioned */
+ if (collprovider == COLLPROVIDER_BUILTIN)
+ return NULL;
+
#ifdef USE_ICU
if (collprovider == COLLPROVIDER_ICU)
{
@@ -2443,6 +2501,31 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
return result;
}
+const char *
+builtin_validate_locale(int encoding, const char *locale)
+{
+ const char *canonical_name = NULL;
+ int required_encoding = -1;
+
+ if (strcmp(locale, "C") == 0)
+ canonical_name = "C";
+
+ if (!canonical_name)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("invalid locale name \"%s\" for builtin provider",
+ locale)));
+
+ if (required_encoding >= 0 && encoding != required_encoding)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("encoding \"%s\" does not match locale \"%s\"",
+ pg_encoding_to_char(encoding), locale)));
+
+ return canonical_name;
+}
+
+
#ifdef USE_ICU
/*