diff options
author | Peter Eisentraut <peter_e@gmx.net> | 2017-03-23 15:25:34 -0400 |
---|---|---|
committer | Peter Eisentraut <peter_e@gmx.net> | 2017-03-23 15:28:48 -0400 |
commit | eccfef81e1f73ee41f1d8bfe4fa4e80576945048 (patch) | |
tree | 52bd1b2468bcf9682b356cf5b5f6199ae9d80ee4 /src/backend/utils/adt/pg_locale.c | |
parent | ea42cc18c35381f639d45628d792e790ff39e271 (diff) | |
download | postgresql-eccfef81e1f73ee41f1d8bfe4fa4e80576945048.tar.gz postgresql-eccfef81e1f73ee41f1d8bfe4fa4e80576945048.zip |
ICU support
Add a column collprovider to pg_collation that determines which library
provides the collation data. The existing choices are default and libc,
and this adds an icu choice, which uses the ICU4C library.
The pg_locale_t type is changed to a union that contains the
provider-specific locale handles. Users of locale information are
changed to look into that struct for the appropriate handle to use.
Also add a collversion column that records the version of the collation
when it is created, and check at run time whether it is still the same.
This detects potentially incompatible library upgrades that can corrupt
indexes and other structures. This is currently only supported by
ICU-provided collations.
initdb initializes the default collation set as before from the `locale
-a` output but also adds all available ICU locales with a "-x-icu"
appended.
Currently, ICU-provided collations can only be explicitly named
collations. The global database locales are still always libc-provided.
ICU support is enabled by configure --with-icu.
Reviewed-by: Thomas Munro <thomas.munro@enterprisedb.com>
Reviewed-by: Andreas Karlsson <andreas@proxel.se>
Diffstat (limited to 'src/backend/utils/adt/pg_locale.c')
-rw-r--r-- | src/backend/utils/adt/pg_locale.c | 266 |
1 files changed, 223 insertions, 43 deletions
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index ab197025f81..2a2c9bc5046 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -57,11 +57,17 @@ #include "catalog/pg_collation.h" #include "catalog/pg_control.h" #include "mb/pg_wchar.h" +#include "utils/builtins.h" #include "utils/hsearch.h" +#include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/pg_locale.h" #include "utils/syscache.h" +#ifdef USE_ICU +#include <unicode/ucnv.h> +#endif + #ifdef WIN32 /* * This Windows file defines StrNCpy. We don't need it here, so we undefine @@ -1272,12 +1278,13 @@ pg_newlocale_from_collation(Oid collid) if (cache_entry->locale == 0) { /* We haven't computed this yet in this session, so do it */ -#ifdef HAVE_LOCALE_T HeapTuple tp; Form_pg_collation collform; const char *collcollate; - const char *collctype; - locale_t result; + const char *collctype pg_attribute_unused(); + pg_locale_t result; + Datum collversion; + bool isnull; tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid)); if (!HeapTupleIsValid(tp)) @@ -1287,61 +1294,230 @@ pg_newlocale_from_collation(Oid collid) collcollate = NameStr(collform->collcollate); collctype = NameStr(collform->collctype); - if (strcmp(collcollate, collctype) == 0) + result = malloc(sizeof(* result)); + memset(result, 0, sizeof(* result)); + result->provider = collform->collprovider; + + if (collform->collprovider == COLLPROVIDER_LIBC) { - /* Normal case where they're the same */ +#ifdef HAVE_LOCALE_T + locale_t loc; + + if (strcmp(collcollate, collctype) == 0) + { + /* Normal case where they're the same */ #ifndef WIN32 - result = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate, - NULL); + loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate, + NULL); #else - result = _create_locale(LC_ALL, collcollate); + loc = _create_locale(LC_ALL, collcollate); #endif - if (!result) - report_newlocale_failure(collcollate); - } - else - { + if (!loc) + report_newlocale_failure(collcollate); + } + else + { #ifndef WIN32 - /* We need two newlocale() steps */ - locale_t loc1; - - loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL); - if (!loc1) - report_newlocale_failure(collcollate); - result = newlocale(LC_CTYPE_MASK, collctype, loc1); - if (!result) - report_newlocale_failure(collctype); + /* We need two newlocale() steps */ + locale_t loc1; + + loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL); + if (!loc1) + report_newlocale_failure(collcollate); + loc = newlocale(LC_CTYPE_MASK, collctype, loc1); + if (!loc) + report_newlocale_failure(collctype); #else - /* - * XXX The _create_locale() API doesn't appear to support this. - * Could perhaps be worked around by changing pg_locale_t to - * contain two separate fields. - */ + /* + * XXX The _create_locale() API doesn't appear to support this. + * Could perhaps be worked around by changing pg_locale_t to + * contain two separate fields. + */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("collations with different collate and ctype values are not supported on this platform"))); +#endif + } + + result->info.lt = loc; +#else /* not HAVE_LOCALE_T */ + /* platform that doesn't support locale_t */ ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("collations with different collate and ctype values are not supported on this platform"))); -#endif + errmsg("collation provider LIBC is not supported on this platform"))); +#endif /* not HAVE_LOCALE_T */ + } + else if (collform->collprovider == COLLPROVIDER_ICU) + { +#ifdef USE_ICU + UCollator *collator; + UErrorCode status; + + status = U_ZERO_ERROR; + collator = ucol_open(collcollate, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("could not open collator for locale \"%s\": %s", + collcollate, u_errorName(status)))); + + result->info.icu.locale = strdup(collcollate); + result->info.icu.ucol = collator; +#else /* not USE_ICU */ + /* could get here if a collation was created by a build with ICU */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("ICU is not supported in this build"), \ + errhint("You need to rebuild PostgreSQL using --with-icu."))); +#endif /* not USE_ICU */ } - cache_entry->locale = result; + collversion = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion, + &isnull); + if (!isnull) + { + char *actual_versionstr; + char *collversionstr; + + actual_versionstr = get_collation_actual_version(collform->collprovider, collcollate); + if (!actual_versionstr) + /* This could happen when specifying a version in CREATE + * COLLATION for a libc locale, or manually creating a mess + * in the catalogs. */ + ereport(ERROR, + (errmsg("collation \"%s\" has no actual version, but a version was specified", + NameStr(collform->collname)))); + collversionstr = TextDatumGetCString(collversion); + + if (strcmp(actual_versionstr, collversionstr) != 0) + ereport(WARNING, + (errmsg("collation \"%s\" has version mismatch", + NameStr(collform->collname)), + errdetail("The collation in the database was created using version %s, " + "but the operating system provides version %s.", + collversionstr, actual_versionstr), + errhint("Rebuild all objects affected by this collation and run " + "ALTER COLLATION %s REFRESH VERSION, " + "or build PostgreSQL with the right library version.", + quote_qualified_identifier(get_namespace_name(collform->collnamespace), + NameStr(collform->collname))))); + } ReleaseSysCache(tp); -#else /* not HAVE_LOCALE_T */ - /* - * For platforms that don't support locale_t, we can't do anything - * with non-default collations. - */ - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("nondefault collations are not supported on this platform"))); -#endif /* not HAVE_LOCALE_T */ + cache_entry->locale = result; } return cache_entry->locale; } +/* + * Get provider-specific collation version string for the given collation from + * the operating system/library. + * + * A particular provider must always either return a non-NULL string or return + * NULL (if it doesn't support versions). It must not return NULL for some + * collcollate and not NULL for others. + */ +char * +get_collation_actual_version(char collprovider, const char *collcollate) +{ + char *collversion; + +#ifdef USE_ICU + if (collprovider == COLLPROVIDER_ICU) + { + UCollator *collator; + UErrorCode status; + UVersionInfo versioninfo; + char buf[U_MAX_VERSION_STRING_LENGTH]; + + status = U_ZERO_ERROR; + collator = ucol_open(collcollate, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("could not open collator for locale \"%s\": %s", + collcollate, u_errorName(status)))); + ucol_getVersion(collator, versioninfo); + ucol_close(collator); + + u_versionToString(versioninfo, buf); + collversion = pstrdup(buf); + } + else +#endif + collversion = NULL; + + return collversion; +} + + +#ifdef USE_ICU +/* + * Converter object for converting between ICU's UChar strings and C strings + * in database encoding. Since the database encoding doesn't change, we only + * need one of these per session. + */ +static UConverter *icu_converter = NULL; + +static void +init_icu_converter(void) +{ + const char *icu_encoding_name; + UErrorCode status; + UConverter *conv; + + if (icu_converter) + return; + + icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding()); + + status = U_ZERO_ERROR; + conv = ucnv_open(icu_encoding_name, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("could not open ICU converter for encoding \"%s\": %s", + icu_encoding_name, u_errorName(status)))); + + icu_converter = conv; +} + +int32_t +icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes) +{ + UErrorCode status; + int32_t len_uchar; + + init_icu_converter(); + + len_uchar = 2 * nbytes; /* max length per docs */ + *buff_uchar = palloc(len_uchar * sizeof(**buff_uchar)); + status = U_ZERO_ERROR; + len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar, buff, nbytes, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("ucnv_toUChars failed: %s", u_errorName(status)))); + return len_uchar; +} + +int32_t +icu_from_uchar(char **result, UChar *buff_uchar, int32_t len_uchar) +{ + UErrorCode status; + int32_t len_result; + + init_icu_converter(); + + len_result = UCNV_GET_MAX_BYTES_FOR_STRING(len_uchar, ucnv_getMaxCharSize(icu_converter)); + *result = palloc(len_result + 1); + status = U_ZERO_ERROR; + ucnv_fromUChars(icu_converter, *result, len_result, buff_uchar, len_uchar, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("ucnv_fromUChars failed: %s", u_errorName(status)))); + return len_result; +} +#endif /* * These functions convert from/to libc's wchar_t, *not* pg_wchar_t. @@ -1362,6 +1538,8 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale) { size_t result; + Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + if (tolen == 0) return 0; @@ -1398,10 +1576,10 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale) #ifdef HAVE_LOCALE_T #ifdef HAVE_WCSTOMBS_L /* Use wcstombs_l for nondefault locales */ - result = wcstombs_l(to, from, tolen, locale); + result = wcstombs_l(to, from, tolen, locale->info.lt); #else /* !HAVE_WCSTOMBS_L */ /* We have to temporarily set the locale as current ... ugh */ - locale_t save_locale = uselocale(locale); + locale_t save_locale = uselocale(locale->info.lt); result = wcstombs(to, from, tolen); @@ -1432,6 +1610,8 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, { size_t result; + Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + if (tolen == 0) return 0; @@ -1473,10 +1653,10 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, #ifdef HAVE_LOCALE_T #ifdef HAVE_MBSTOWCS_L /* Use mbstowcs_l for nondefault locales */ - result = mbstowcs_l(to, str, tolen, locale); + result = mbstowcs_l(to, str, tolen, locale->info.lt); #else /* !HAVE_MBSTOWCS_L */ /* We have to temporarily set the locale as current ... ugh */ - locale_t save_locale = uselocale(locale); + locale_t save_locale = uselocale(locale->info.lt); result = mbstowcs(to, str, tolen); |