Refactor to add pg_strcoll(), pg_strxfrm(), and variants.

Offers a generally better separation of responsibilities for collation code. Also, a step towards multi-lib ICU, which should be based on a clean separation of the routines required for collation providers. Callers with NUL-terminated strings should call pg_strcoll() or pg_strxfrm(); callers with strings and their length should call the variants pg_strncoll() or pg_strnxfrm(). Reviewed-by: Peter Eisentraut, Peter Geoghegan Discussion: https://postgr.es/m/a581136455c940d7bd0ff482d3a2bd51af25a94f.camel%40j-davis.com
author: Jeff Davis <jdavis@postgresql.org> 2023-02-23 10:55:20 -0800
committer: Jeff Davis <jdavis@postgresql.org> 2023-02-23 10:55:20 -0800
commit: d87d548cd0304477413a73e9c1d148fb2d40b50d (patch)
tree: 110613f01e1fc49b20eb95e416227eaf96e469d0 /src/backend/utils/adt/varchar.c
parent: e9960732a9618d5f744ff43a09622c9185798760 (diff)
download: postgresql-d87d548cd0304477413a73e9c1d148fb2d40b50d.tar.gz
postgresql-d87d548cd0304477413a73e9c1d148fb2d40b50d.zip
1 files changed, 27 insertions, 24 deletions
diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c
index 8ddbae8f51d..9ff3bcbdb75 100644
--- a/src/backend/utils/adt/varchar.c
+++ b/src/backend/utils/adt/varchar.c
@@ -1024,21 +1024,22 @@ hashbpchar(PG_FUNCTION_ARGS)
 #ifdef USE_ICU
 		if (mylocale->provider == COLLPROVIDER_ICU)
 		{
-			int32_t		ulen = -1;
-			UChar	   *uchar = NULL;
-			Size		bsize;
-			uint8_t    *buf;
+			Size		bsize, rsize;
+			char	   *buf;
 
-			ulen = icu_to_uchar(&uchar, keydata, keylen);
+			bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
+			buf = palloc(bsize + 1);
 
-			bsize = ucol_getSortKey(mylocale->info.icu.ucol,
-									uchar, ulen, NULL, 0);
-			buf = palloc(bsize);
-			ucol_getSortKey(mylocale->info.icu.ucol,
-							uchar, ulen, buf, bsize);
-			pfree(uchar);
+			rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
+			if (rsize != bsize)
+				elog(ERROR, "pg_strnxfrm() returned unexpected result");
 
-			result = hash_any(buf, bsize);
+			/*
+			 * In principle, there's no reason to include the terminating NUL
+			 * character in the hash, but it was done before and the behavior
+			 * must be preserved.
+			 */
+			result = hash_any((uint8_t *) buf, bsize + 1);
 
 			pfree(buf);
 		}
@@ -1086,21 +1087,23 @@ hashbpcharextended(PG_FUNCTION_ARGS)
 #ifdef USE_ICU
 		if (mylocale->provider == COLLPROVIDER_ICU)
 		{
-			int32_t		ulen = -1;
-			UChar	   *uchar = NULL;
-			Size		bsize;
-			uint8_t    *buf;
+			Size		bsize, rsize;
+			char	   *buf;
 
-			ulen = icu_to_uchar(&uchar, keydata, keylen);
+			bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
+			buf = palloc(bsize + 1);
 
-			bsize = ucol_getSortKey(mylocale->info.icu.ucol,
-									uchar, ulen, NULL, 0);
-			buf = palloc(bsize);
-			ucol_getSortKey(mylocale->info.icu.ucol,
-							uchar, ulen, buf, bsize);
-			pfree(uchar);
+			rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
+			if (rsize != bsize)
+				elog(ERROR, "pg_strnxfrm() returned unexpected result");
 
-			result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
+			/*
+			 * In principle, there's no reason to include the terminating NUL
+			 * character in the hash, but it was done before and the behavior
+			 * must be preserved.
+			 */
+			result = hash_any_extended((uint8_t *) buf, bsize + 1,
+									   PG_GETARG_INT64(1));
 
 			pfree(buf);
 		}
author	Jeff Davis <jdavis@postgresql.org>	2023-02-23 10:55:20 -0800
committer	Jeff Davis <jdavis@postgresql.org>	2023-02-23 10:55:20 -0800
commit	d87d548cd0304477413a73e9c1d148fb2d40b50d (patch)
tree	110613f01e1fc49b20eb95e416227eaf96e469d0 /src/backend/utils/adt/varchar.c
parent	e9960732a9618d5f744ff43a09622c9185798760 (diff)
download	postgresql-d87d548cd0304477413a73e9c1d148fb2d40b50d.tar.gz postgresql-d87d548cd0304477413a73e9c1d148fb2d40b50d.zip