aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/pg_locale_libc.c
diff options
context:
space:
mode:
authorJeff Davis <jdavis@postgresql.org>2024-12-16 09:35:18 -0800
committerJeff Davis <jdavis@postgresql.org>2024-12-16 09:35:18 -0800
commit86a5d6006aff956a5e00982b7628177fa7dc5027 (patch)
tree90edf4aeac487e9dba957a5fbb75f3d6ae9e58f6 /src/backend/utils/adt/pg_locale_libc.c
parentde1e29885730851787b467449f525ff6fc7d69fa (diff)
downloadpostgresql-86a5d6006aff956a5e00982b7628177fa7dc5027.tar.gz
postgresql-86a5d6006aff956a5e00982b7628177fa7dc5027.zip
Refactor string case conversion into provider-specific files.
Create API entry points pg_strlower(), etc., that work with any provider and give the caller control over the destination buffer. Then, move provider-specific logic into pg_locale_builtin.c, pg_locale_icu.c, and pg_locale_libc.c as appropriate. Discussion: https://postgr.es/m/7aa46d77b377428058403723440862d12a8a129a.camel@j-davis.com
Diffstat (limited to 'src/backend/utils/adt/pg_locale_libc.c')
-rw-r--r--src/backend/utils/adt/pg_locale_libc.c327
1 files changed, 327 insertions, 0 deletions
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index 374ac37ba0a..97ca5a28e66 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -11,6 +11,9 @@
#include "postgres.h"
+#include <limits.h>
+#include <wctype.h>
+
#include "access/htup_details.h"
#include "catalog/pg_database.h"
#include "catalog/pg_collation.h"
@@ -32,6 +35,13 @@
extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
+extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+
extern int strncoll_libc(const char *arg1, ssize_t len1,
const char *arg2, ssize_t len2,
pg_locale_t locale);
@@ -48,6 +58,323 @@ static int strncoll_libc_win32_utf8(const char *arg1, ssize_t len1,
pg_locale_t locale);
#endif
+static size_t strlower_libc_sb(char *dest, size_t destsize,
+ const char *src, ssize_t srclen,
+ pg_locale_t locale);
+static size_t strlower_libc_mb(char *dest, size_t destsize,
+ const char *src, ssize_t srclen,
+ pg_locale_t locale);
+static size_t strtitle_libc_sb(char *dest, size_t destsize,
+ const char *src, ssize_t srclen,
+ pg_locale_t locale);
+static size_t strtitle_libc_mb(char *dest, size_t destsize,
+ const char *src, ssize_t srclen,
+ pg_locale_t locale);
+static size_t strupper_libc_sb(char *dest, size_t destsize,
+ const char *src, ssize_t srclen,
+ pg_locale_t locale);
+static size_t strupper_libc_mb(char *dest, size_t destsize,
+ const char *src, ssize_t srclen,
+ pg_locale_t locale);
+
+size_t
+strlower_libc(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale)
+{
+ if (pg_database_encoding_max_length() > 1)
+ return strlower_libc_mb(dst, dstsize, src, srclen, locale);
+ else
+ return strlower_libc_sb(dst, dstsize, src, srclen, locale);
+}
+
+size_t
+strtitle_libc(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale)
+{
+ if (pg_database_encoding_max_length() > 1)
+ return strtitle_libc_mb(dst, dstsize, src, srclen, locale);
+ else
+ return strtitle_libc_sb(dst, dstsize, src, srclen, locale);
+}
+
+size_t
+strupper_libc(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale)
+{
+ if (pg_database_encoding_max_length() > 1)
+ return strupper_libc_mb(dst, dstsize, src, srclen, locale);
+ else
+ return strupper_libc_sb(dst, dstsize, src, srclen, locale);
+}
+
+static size_t
+strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
+ pg_locale_t locale)
+{
+ if (srclen < 0)
+ srclen = strlen(src);
+
+ if (srclen + 1 <= destsize)
+ {
+ locale_t loc = locale->info.lt;
+ char *p;
+
+ if (srclen + 1 > destsize)
+ return srclen;
+
+ memcpy(dest, src, srclen);
+ dest[srclen] = '\0';
+
+ /*
+ * Note: we assume that tolower_l() will not be so broken as to need
+ * an isupper_l() guard test. When using the default collation, we
+ * apply the traditional Postgres behavior that forces ASCII-style
+ * treatment of I/i, but in non-default collations you get exactly
+ * what the collation says.
+ */
+ for (p = dest; *p; p++)
+ {
+ if (locale->is_default)
+ *p = pg_tolower((unsigned char) *p);
+ else
+ *p = tolower_l((unsigned char) *p, loc);
+ }
+ }
+
+ return srclen;
+}
+
+static size_t
+strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
+ pg_locale_t locale)
+{
+ locale_t loc = locale->info.lt;
+ size_t result_size;
+ wchar_t *workspace;
+ char *result;
+ size_t curr_char;
+ size_t max_size;
+
+ if (srclen < 0)
+ srclen = strlen(src);
+
+ /* Overflow paranoia */
+ if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory")));
+
+ /* Output workspace cannot have more codes than input bytes */
+ workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
+
+ char2wchar(workspace, srclen + 1, src, srclen, locale);
+
+ for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+ workspace[curr_char] = towlower_l(workspace[curr_char], loc);
+
+ /*
+ * Make result large enough; case change might change number of bytes
+ */
+ max_size = curr_char * pg_database_encoding_max_length();
+ result = palloc(max_size + 1);
+
+ result_size = wchar2char(result, workspace, max_size + 1, locale);
+
+ if (result_size + 1 > destsize)
+ return result_size;
+
+ memcpy(dest, result, result_size);
+ dest[result_size] = '\0';
+
+ pfree(workspace);
+ pfree(result);
+
+ return result_size;
+}
+
+static size_t
+strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
+ pg_locale_t locale)
+{
+ if (srclen < 0)
+ srclen = strlen(src);
+
+ if (srclen + 1 <= destsize)
+ {
+ locale_t loc = locale->info.lt;
+ int wasalnum = false;
+ char *p;
+
+ memcpy(dest, src, srclen);
+ dest[srclen] = '\0';
+
+ /*
+ * Note: we assume that toupper_l()/tolower_l() will not be so broken
+ * as to need guard tests. When using the default collation, we apply
+ * the traditional Postgres behavior that forces ASCII-style treatment
+ * of I/i, but in non-default collations you get exactly what the
+ * collation says.
+ */
+ for (p = dest; *p; p++)
+ {
+ if (locale->is_default)
+ {
+ if (wasalnum)
+ *p = pg_tolower((unsigned char) *p);
+ else
+ *p = pg_toupper((unsigned char) *p);
+ }
+ else
+ {
+ if (wasalnum)
+ *p = tolower_l((unsigned char) *p, loc);
+ else
+ *p = toupper_l((unsigned char) *p, loc);
+ }
+ wasalnum = isalnum_l((unsigned char) *p, loc);
+ }
+ }
+
+ return srclen;
+}
+
+static size_t
+strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
+ pg_locale_t locale)
+{
+ locale_t loc = locale->info.lt;
+ int wasalnum = false;
+ size_t result_size;
+ wchar_t *workspace;
+ char *result;
+ size_t curr_char;
+ size_t max_size;
+
+ if (srclen < 0)
+ srclen = strlen(src);
+
+ /* Overflow paranoia */
+ if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory")));
+
+ /* Output workspace cannot have more codes than input bytes */
+ workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
+
+ char2wchar(workspace, srclen + 1, src, srclen, locale);
+
+ for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+ {
+ if (wasalnum)
+ workspace[curr_char] = towlower_l(workspace[curr_char], loc);
+ else
+ workspace[curr_char] = towupper_l(workspace[curr_char], loc);
+ wasalnum = iswalnum_l(workspace[curr_char], loc);
+ }
+
+ /*
+ * Make result large enough; case change might change number of bytes
+ */
+ max_size = curr_char * pg_database_encoding_max_length();
+ result = palloc(max_size + 1);
+
+ result_size = wchar2char(result, workspace, max_size + 1, locale);
+
+ if (result_size + 1 > destsize)
+ return result_size;
+
+ memcpy(dest, result, result_size);
+ dest[result_size] = '\0';
+
+ pfree(workspace);
+ pfree(result);
+
+ return result_size;
+}
+
+static size_t
+strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
+ pg_locale_t locale)
+{
+ if (srclen < 0)
+ srclen = strlen(src);
+
+ if (srclen + 1 <= destsize)
+ {
+ locale_t loc = locale->info.lt;
+ char *p;
+
+ memcpy(dest, src, srclen);
+ dest[srclen] = '\0';
+
+ /*
+ * Note: we assume that toupper_l() will not be so broken as to need
+ * an islower_l() guard test. When using the default collation, we
+ * apply the traditional Postgres behavior that forces ASCII-style
+ * treatment of I/i, but in non-default collations you get exactly
+ * what the collation says.
+ */
+ for (p = dest; *p; p++)
+ {
+ if (locale->is_default)
+ *p = pg_toupper((unsigned char) *p);
+ else
+ *p = toupper_l((unsigned char) *p, loc);
+ }
+ }
+
+ return srclen;
+}
+
+static size_t
+strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
+ pg_locale_t locale)
+{
+ locale_t loc = locale->info.lt;
+ size_t result_size;
+ wchar_t *workspace;
+ char *result;
+ size_t curr_char;
+ size_t max_size;
+
+ if (srclen < 0)
+ srclen = strlen(src);
+
+ /* Overflow paranoia */
+ if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory")));
+
+ /* Output workspace cannot have more codes than input bytes */
+ workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
+
+ char2wchar(workspace, srclen + 1, src, srclen, locale);
+
+ for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+ workspace[curr_char] = towupper_l(workspace[curr_char], loc);
+
+ /*
+ * Make result large enough; case change might change number of bytes
+ */
+ max_size = curr_char * pg_database_encoding_max_length();
+ result = palloc(max_size + 1);
+
+ result_size = wchar2char(result, workspace, max_size + 1, locale);
+
+ if (result_size + 1 > destsize)
+ return result_size;
+
+ memcpy(dest, result, result_size);
+ dest[result_size] = '\0';
+
+ pfree(workspace);
+ pfree(result);
+
+ return result_size;
+}
+
pg_locale_t
create_pg_locale_libc(Oid collid, MemoryContext context)
{