diff options
author | Jeff Davis <jdavis@postgresql.org> | 2024-12-16 09:35:18 -0800 |
---|---|---|
committer | Jeff Davis <jdavis@postgresql.org> | 2024-12-16 09:35:18 -0800 |
commit | 86a5d6006aff956a5e00982b7628177fa7dc5027 (patch) | |
tree | 90edf4aeac487e9dba957a5fbb75f3d6ae9e58f6 /src/backend/utils/adt/pg_locale_libc.c | |
parent | de1e29885730851787b467449f525ff6fc7d69fa (diff) | |
download | postgresql-86a5d6006aff956a5e00982b7628177fa7dc5027.tar.gz postgresql-86a5d6006aff956a5e00982b7628177fa7dc5027.zip |
Refactor string case conversion into provider-specific files.
Create API entry points pg_strlower(), etc., that work with any
provider and give the caller control over the destination
buffer. Then, move provider-specific logic into pg_locale_builtin.c,
pg_locale_icu.c, and pg_locale_libc.c as appropriate.
Discussion: https://postgr.es/m/7aa46d77b377428058403723440862d12a8a129a.camel@j-davis.com
Diffstat (limited to 'src/backend/utils/adt/pg_locale_libc.c')
-rw-r--r-- | src/backend/utils/adt/pg_locale_libc.c | 327 |
1 files changed, 327 insertions, 0 deletions
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c index 374ac37ba0a..97ca5a28e66 100644 --- a/src/backend/utils/adt/pg_locale_libc.c +++ b/src/backend/utils/adt/pg_locale_libc.c @@ -11,6 +11,9 @@ #include "postgres.h" +#include <limits.h> +#include <wctype.h> + #include "access/htup_details.h" #include "catalog/pg_database.h" #include "catalog/pg_collation.h" @@ -32,6 +35,13 @@ extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context); +extern size_t strlower_libc(char *dst, size_t dstsize, const char *src, + ssize_t srclen, pg_locale_t locale); +extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src, + ssize_t srclen, pg_locale_t locale); +extern size_t strupper_libc(char *dst, size_t dstsize, const char *src, + ssize_t srclen, pg_locale_t locale); + extern int strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale); @@ -48,6 +58,323 @@ static int strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, pg_locale_t locale); #endif +static size_t strlower_libc_sb(char *dest, size_t destsize, + const char *src, ssize_t srclen, + pg_locale_t locale); +static size_t strlower_libc_mb(char *dest, size_t destsize, + const char *src, ssize_t srclen, + pg_locale_t locale); +static size_t strtitle_libc_sb(char *dest, size_t destsize, + const char *src, ssize_t srclen, + pg_locale_t locale); +static size_t strtitle_libc_mb(char *dest, size_t destsize, + const char *src, ssize_t srclen, + pg_locale_t locale); +static size_t strupper_libc_sb(char *dest, size_t destsize, + const char *src, ssize_t srclen, + pg_locale_t locale); +static size_t strupper_libc_mb(char *dest, size_t destsize, + const char *src, ssize_t srclen, + pg_locale_t locale); + +size_t +strlower_libc(char *dst, size_t dstsize, const char *src, + ssize_t srclen, pg_locale_t locale) +{ + if (pg_database_encoding_max_length() > 1) + return strlower_libc_mb(dst, dstsize, src, srclen, locale); + else + return strlower_libc_sb(dst, dstsize, src, srclen, locale); +} + +size_t +strtitle_libc(char *dst, size_t dstsize, const char *src, + ssize_t srclen, pg_locale_t locale) +{ + if (pg_database_encoding_max_length() > 1) + return strtitle_libc_mb(dst, dstsize, src, srclen, locale); + else + return strtitle_libc_sb(dst, dstsize, src, srclen, locale); +} + +size_t +strupper_libc(char *dst, size_t dstsize, const char *src, + ssize_t srclen, pg_locale_t locale) +{ + if (pg_database_encoding_max_length() > 1) + return strupper_libc_mb(dst, dstsize, src, srclen, locale); + else + return strupper_libc_sb(dst, dstsize, src, srclen, locale); +} + +static size_t +strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, + pg_locale_t locale) +{ + if (srclen < 0) + srclen = strlen(src); + + if (srclen + 1 <= destsize) + { + locale_t loc = locale->info.lt; + char *p; + + if (srclen + 1 > destsize) + return srclen; + + memcpy(dest, src, srclen); + dest[srclen] = '\0'; + + /* + * Note: we assume that tolower_l() will not be so broken as to need + * an isupper_l() guard test. When using the default collation, we + * apply the traditional Postgres behavior that forces ASCII-style + * treatment of I/i, but in non-default collations you get exactly + * what the collation says. + */ + for (p = dest; *p; p++) + { + if (locale->is_default) + *p = pg_tolower((unsigned char) *p); + else + *p = tolower_l((unsigned char) *p, loc); + } + } + + return srclen; +} + +static size_t +strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, + pg_locale_t locale) +{ + locale_t loc = locale->info.lt; + size_t result_size; + wchar_t *workspace; + char *result; + size_t curr_char; + size_t max_size; + + if (srclen < 0) + srclen = strlen(src); + + /* Overflow paranoia */ + if ((srclen + 1) > (INT_MAX / sizeof(wchar_t))) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + + /* Output workspace cannot have more codes than input bytes */ + workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t)); + + char2wchar(workspace, srclen + 1, src, srclen, locale); + + for (curr_char = 0; workspace[curr_char] != 0; curr_char++) + workspace[curr_char] = towlower_l(workspace[curr_char], loc); + + /* + * Make result large enough; case change might change number of bytes + */ + max_size = curr_char * pg_database_encoding_max_length(); + result = palloc(max_size + 1); + + result_size = wchar2char(result, workspace, max_size + 1, locale); + + if (result_size + 1 > destsize) + return result_size; + + memcpy(dest, result, result_size); + dest[result_size] = '\0'; + + pfree(workspace); + pfree(result); + + return result_size; +} + +static size_t +strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, + pg_locale_t locale) +{ + if (srclen < 0) + srclen = strlen(src); + + if (srclen + 1 <= destsize) + { + locale_t loc = locale->info.lt; + int wasalnum = false; + char *p; + + memcpy(dest, src, srclen); + dest[srclen] = '\0'; + + /* + * Note: we assume that toupper_l()/tolower_l() will not be so broken + * as to need guard tests. When using the default collation, we apply + * the traditional Postgres behavior that forces ASCII-style treatment + * of I/i, but in non-default collations you get exactly what the + * collation says. + */ + for (p = dest; *p; p++) + { + if (locale->is_default) + { + if (wasalnum) + *p = pg_tolower((unsigned char) *p); + else + *p = pg_toupper((unsigned char) *p); + } + else + { + if (wasalnum) + *p = tolower_l((unsigned char) *p, loc); + else + *p = toupper_l((unsigned char) *p, loc); + } + wasalnum = isalnum_l((unsigned char) *p, loc); + } + } + + return srclen; +} + +static size_t +strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, + pg_locale_t locale) +{ + locale_t loc = locale->info.lt; + int wasalnum = false; + size_t result_size; + wchar_t *workspace; + char *result; + size_t curr_char; + size_t max_size; + + if (srclen < 0) + srclen = strlen(src); + + /* Overflow paranoia */ + if ((srclen + 1) > (INT_MAX / sizeof(wchar_t))) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + + /* Output workspace cannot have more codes than input bytes */ + workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t)); + + char2wchar(workspace, srclen + 1, src, srclen, locale); + + for (curr_char = 0; workspace[curr_char] != 0; curr_char++) + { + if (wasalnum) + workspace[curr_char] = towlower_l(workspace[curr_char], loc); + else + workspace[curr_char] = towupper_l(workspace[curr_char], loc); + wasalnum = iswalnum_l(workspace[curr_char], loc); + } + + /* + * Make result large enough; case change might change number of bytes + */ + max_size = curr_char * pg_database_encoding_max_length(); + result = palloc(max_size + 1); + + result_size = wchar2char(result, workspace, max_size + 1, locale); + + if (result_size + 1 > destsize) + return result_size; + + memcpy(dest, result, result_size); + dest[result_size] = '\0'; + + pfree(workspace); + pfree(result); + + return result_size; +} + +static size_t +strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, + pg_locale_t locale) +{ + if (srclen < 0) + srclen = strlen(src); + + if (srclen + 1 <= destsize) + { + locale_t loc = locale->info.lt; + char *p; + + memcpy(dest, src, srclen); + dest[srclen] = '\0'; + + /* + * Note: we assume that toupper_l() will not be so broken as to need + * an islower_l() guard test. When using the default collation, we + * apply the traditional Postgres behavior that forces ASCII-style + * treatment of I/i, but in non-default collations you get exactly + * what the collation says. + */ + for (p = dest; *p; p++) + { + if (locale->is_default) + *p = pg_toupper((unsigned char) *p); + else + *p = toupper_l((unsigned char) *p, loc); + } + } + + return srclen; +} + +static size_t +strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, + pg_locale_t locale) +{ + locale_t loc = locale->info.lt; + size_t result_size; + wchar_t *workspace; + char *result; + size_t curr_char; + size_t max_size; + + if (srclen < 0) + srclen = strlen(src); + + /* Overflow paranoia */ + if ((srclen + 1) > (INT_MAX / sizeof(wchar_t))) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + + /* Output workspace cannot have more codes than input bytes */ + workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t)); + + char2wchar(workspace, srclen + 1, src, srclen, locale); + + for (curr_char = 0; workspace[curr_char] != 0; curr_char++) + workspace[curr_char] = towupper_l(workspace[curr_char], loc); + + /* + * Make result large enough; case change might change number of bytes + */ + max_size = curr_char * pg_database_encoding_max_length(); + result = palloc(max_size + 1); + + result_size = wchar2char(result, workspace, max_size + 1, locale); + + if (result_size + 1 > destsize) + return result_size; + + memcpy(dest, result, result_size); + dest[result_size] = '\0'; + + pfree(workspace); + pfree(result); + + return result_size; +} + pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context) { |