diff options
Diffstat (limited to 'src/backend/utils/adt/pg_locale.c')
-rw-r--r-- | src/backend/utils/adt/pg_locale.c | 266 |
1 files changed, 223 insertions, 43 deletions
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index ab197025f81..2a2c9bc5046 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -57,11 +57,17 @@ #include "catalog/pg_collation.h" #include "catalog/pg_control.h" #include "mb/pg_wchar.h" +#include "utils/builtins.h" #include "utils/hsearch.h" +#include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/pg_locale.h" #include "utils/syscache.h" +#ifdef USE_ICU +#include <unicode/ucnv.h> +#endif + #ifdef WIN32 /* * This Windows file defines StrNCpy. We don't need it here, so we undefine @@ -1272,12 +1278,13 @@ pg_newlocale_from_collation(Oid collid) if (cache_entry->locale == 0) { /* We haven't computed this yet in this session, so do it */ -#ifdef HAVE_LOCALE_T HeapTuple tp; Form_pg_collation collform; const char *collcollate; - const char *collctype; - locale_t result; + const char *collctype pg_attribute_unused(); + pg_locale_t result; + Datum collversion; + bool isnull; tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid)); if (!HeapTupleIsValid(tp)) @@ -1287,61 +1294,230 @@ pg_newlocale_from_collation(Oid collid) collcollate = NameStr(collform->collcollate); collctype = NameStr(collform->collctype); - if (strcmp(collcollate, collctype) == 0) + result = malloc(sizeof(* result)); + memset(result, 0, sizeof(* result)); + result->provider = collform->collprovider; + + if (collform->collprovider == COLLPROVIDER_LIBC) { - /* Normal case where they're the same */ +#ifdef HAVE_LOCALE_T + locale_t loc; + + if (strcmp(collcollate, collctype) == 0) + { + /* Normal case where they're the same */ #ifndef WIN32 - result = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate, - NULL); + loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate, + NULL); #else - result = _create_locale(LC_ALL, collcollate); + loc = _create_locale(LC_ALL, collcollate); #endif - if (!result) - report_newlocale_failure(collcollate); - } - else - { + if (!loc) + report_newlocale_failure(collcollate); + } + else + { #ifndef WIN32 - /* We need two newlocale() steps */ - locale_t loc1; - - loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL); - if (!loc1) - report_newlocale_failure(collcollate); - result = newlocale(LC_CTYPE_MASK, collctype, loc1); - if (!result) - report_newlocale_failure(collctype); + /* We need two newlocale() steps */ + locale_t loc1; + + loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL); + if (!loc1) + report_newlocale_failure(collcollate); + loc = newlocale(LC_CTYPE_MASK, collctype, loc1); + if (!loc) + report_newlocale_failure(collctype); #else - /* - * XXX The _create_locale() API doesn't appear to support this. - * Could perhaps be worked around by changing pg_locale_t to - * contain two separate fields. - */ + /* + * XXX The _create_locale() API doesn't appear to support this. + * Could perhaps be worked around by changing pg_locale_t to + * contain two separate fields. + */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("collations with different collate and ctype values are not supported on this platform"))); +#endif + } + + result->info.lt = loc; +#else /* not HAVE_LOCALE_T */ + /* platform that doesn't support locale_t */ ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("collations with different collate and ctype values are not supported on this platform"))); -#endif + errmsg("collation provider LIBC is not supported on this platform"))); +#endif /* not HAVE_LOCALE_T */ + } + else if (collform->collprovider == COLLPROVIDER_ICU) + { +#ifdef USE_ICU + UCollator *collator; + UErrorCode status; + + status = U_ZERO_ERROR; + collator = ucol_open(collcollate, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("could not open collator for locale \"%s\": %s", + collcollate, u_errorName(status)))); + + result->info.icu.locale = strdup(collcollate); + result->info.icu.ucol = collator; +#else /* not USE_ICU */ + /* could get here if a collation was created by a build with ICU */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("ICU is not supported in this build"), \ + errhint("You need to rebuild PostgreSQL using --with-icu."))); +#endif /* not USE_ICU */ } - cache_entry->locale = result; + collversion = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion, + &isnull); + if (!isnull) + { + char *actual_versionstr; + char *collversionstr; + + actual_versionstr = get_collation_actual_version(collform->collprovider, collcollate); + if (!actual_versionstr) + /* This could happen when specifying a version in CREATE + * COLLATION for a libc locale, or manually creating a mess + * in the catalogs. */ + ereport(ERROR, + (errmsg("collation \"%s\" has no actual version, but a version was specified", + NameStr(collform->collname)))); + collversionstr = TextDatumGetCString(collversion); + + if (strcmp(actual_versionstr, collversionstr) != 0) + ereport(WARNING, + (errmsg("collation \"%s\" has version mismatch", + NameStr(collform->collname)), + errdetail("The collation in the database was created using version %s, " + "but the operating system provides version %s.", + collversionstr, actual_versionstr), + errhint("Rebuild all objects affected by this collation and run " + "ALTER COLLATION %s REFRESH VERSION, " + "or build PostgreSQL with the right library version.", + quote_qualified_identifier(get_namespace_name(collform->collnamespace), + NameStr(collform->collname))))); + } ReleaseSysCache(tp); -#else /* not HAVE_LOCALE_T */ - /* - * For platforms that don't support locale_t, we can't do anything - * with non-default collations. - */ - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("nondefault collations are not supported on this platform"))); -#endif /* not HAVE_LOCALE_T */ + cache_entry->locale = result; } return cache_entry->locale; } +/* + * Get provider-specific collation version string for the given collation from + * the operating system/library. + * + * A particular provider must always either return a non-NULL string or return + * NULL (if it doesn't support versions). It must not return NULL for some + * collcollate and not NULL for others. + */ +char * +get_collation_actual_version(char collprovider, const char *collcollate) +{ + char *collversion; + +#ifdef USE_ICU + if (collprovider == COLLPROVIDER_ICU) + { + UCollator *collator; + UErrorCode status; + UVersionInfo versioninfo; + char buf[U_MAX_VERSION_STRING_LENGTH]; + + status = U_ZERO_ERROR; + collator = ucol_open(collcollate, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("could not open collator for locale \"%s\": %s", + collcollate, u_errorName(status)))); + ucol_getVersion(collator, versioninfo); + ucol_close(collator); + + u_versionToString(versioninfo, buf); + collversion = pstrdup(buf); + } + else +#endif + collversion = NULL; + + return collversion; +} + + +#ifdef USE_ICU +/* + * Converter object for converting between ICU's UChar strings and C strings + * in database encoding. Since the database encoding doesn't change, we only + * need one of these per session. + */ +static UConverter *icu_converter = NULL; + +static void +init_icu_converter(void) +{ + const char *icu_encoding_name; + UErrorCode status; + UConverter *conv; + + if (icu_converter) + return; + + icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding()); + + status = U_ZERO_ERROR; + conv = ucnv_open(icu_encoding_name, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("could not open ICU converter for encoding \"%s\": %s", + icu_encoding_name, u_errorName(status)))); + + icu_converter = conv; +} + +int32_t +icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes) +{ + UErrorCode status; + int32_t len_uchar; + + init_icu_converter(); + + len_uchar = 2 * nbytes; /* max length per docs */ + *buff_uchar = palloc(len_uchar * sizeof(**buff_uchar)); + status = U_ZERO_ERROR; + len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar, buff, nbytes, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("ucnv_toUChars failed: %s", u_errorName(status)))); + return len_uchar; +} + +int32_t +icu_from_uchar(char **result, UChar *buff_uchar, int32_t len_uchar) +{ + UErrorCode status; + int32_t len_result; + + init_icu_converter(); + + len_result = UCNV_GET_MAX_BYTES_FOR_STRING(len_uchar, ucnv_getMaxCharSize(icu_converter)); + *result = palloc(len_result + 1); + status = U_ZERO_ERROR; + ucnv_fromUChars(icu_converter, *result, len_result, buff_uchar, len_uchar, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("ucnv_fromUChars failed: %s", u_errorName(status)))); + return len_result; +} +#endif /* * These functions convert from/to libc's wchar_t, *not* pg_wchar_t. @@ -1362,6 +1538,8 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale) { size_t result; + Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + if (tolen == 0) return 0; @@ -1398,10 +1576,10 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale) #ifdef HAVE_LOCALE_T #ifdef HAVE_WCSTOMBS_L /* Use wcstombs_l for nondefault locales */ - result = wcstombs_l(to, from, tolen, locale); + result = wcstombs_l(to, from, tolen, locale->info.lt); #else /* !HAVE_WCSTOMBS_L */ /* We have to temporarily set the locale as current ... ugh */ - locale_t save_locale = uselocale(locale); + locale_t save_locale = uselocale(locale->info.lt); result = wcstombs(to, from, tolen); @@ -1432,6 +1610,8 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, { size_t result; + Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + if (tolen == 0) return 0; @@ -1473,10 +1653,10 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, #ifdef HAVE_LOCALE_T #ifdef HAVE_MBSTOWCS_L /* Use mbstowcs_l for nondefault locales */ - result = mbstowcs_l(to, str, tolen, locale); + result = mbstowcs_l(to, str, tolen, locale->info.lt); #else /* !HAVE_MBSTOWCS_L */ /* We have to temporarily set the locale as current ... ugh */ - locale_t save_locale = uselocale(locale); + locale_t save_locale = uselocale(locale->info.lt); result = mbstowcs(to, str, tolen); |