Merge duplicate upper/lower/initcap() routines in oracle_compat.c and

formatting.c to use common code; remove duplicate functions and support routines that are no longer needed.
author: Bruce Momjian <bruce@momjian.us> 2008-06-23 19:27:19 +0000
committer: Bruce Momjian <bruce@momjian.us> 2008-06-23 19:27:19 +0000
commit: f6ec7430f920991e417383c154f9c38c04a992b7 (patch)
tree: cb9994f9158108b9c3a69319db4dbf4216956930 /src/backend/utils/adt/oracle_compat.c
parent: eeee06919f3f8368fa48c6ceb7ece85a5562bc19 (diff)
download: postgresql-f6ec7430f920991e417383c154f9c38c04a992b7.tar.gz
postgresql-f6ec7430f920991e417383c154f9c38c04a992b7.zip
1 files changed, 23 insertions, 422 deletions
diff --git a/src/backend/utils/adt/oracle_compat.c b/src/backend/utils/adt/oracle_compat.c
index 372ff839563..bf29e7d1651 100644
--- a/src/backend/utils/adt/oracle_compat.c
+++ b/src/backend/utils/adt/oracle_compat.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	$PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.80 2008/06/17 16:09:06 momjian Exp $
+ *	$PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.81 2008/06/23 19:27:19 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -29,292 +29,16 @@
 #endif
 
 #include "utils/builtins.h"
+#include "utils/formatting.h"
 #include "utils/pg_locale.h"
 #include "mb/pg_wchar.h"
 
 
-/*
- * If the system provides the needed functions for wide-character manipulation
- * (which are all standardized by C99), then we implement upper/lower/initcap
- * using wide-character functions.	Otherwise we use the traditional <ctype.h>
- * functions, which of course will not work as desired in multibyte character
- * sets.  Note that in either case we are effectively assuming that the
- * database character encoding matches the encoding implied by LC_CTYPE.
- */
-#ifdef USE_WIDE_UPPER_LOWER
-char	   *wstring_lower(char *str);
-char	   *wstring_upper(char *str);
-wchar_t	   *texttowcs(const text *txt);
-text	   *wcstotext(const wchar_t *str, int ncodes);
-#endif
-
 static text *dotrim(const char *string, int stringlen,
 	   const char *set, int setlen,
 	   bool doltrim, bool dortrim);
 
 
-#ifdef USE_WIDE_UPPER_LOWER
-
-/*
- * Convert a TEXT value into a palloc'd wchar string.
- */
-wchar_t *
-texttowcs(const text *txt)
-{
-	int			nbytes = VARSIZE_ANY_EXHDR(txt);
-	char	   *workstr;
-	wchar_t    *result;
-	size_t		ncodes;
-
-	/* Overflow paranoia */
-	if (nbytes < 0 ||
-		nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)
-		ereport(ERROR,
-				(errcode(ERRCODE_OUT_OF_MEMORY),
-				 errmsg("out of memory")));
-
-	/* Need a null-terminated version of the input */
-	workstr = text_to_cstring(txt);
-
-	/* Output workspace cannot have more codes than input bytes */
-	result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
-
-	/* Do the conversion */
-	ncodes = mbstowcs(result, workstr, nbytes + 1);
-
-	if (ncodes == (size_t) -1)
-	{
-		/*
-		 * Invalid multibyte character encountered.  We try to give a useful
-		 * error message by letting pg_verifymbstr check the string.  But it's
-		 * possible that the string is OK to us, and not OK to mbstowcs ---
-		 * this suggests that the LC_CTYPE locale is different from the
-		 * database encoding.  Give a generic error message if verifymbstr
-		 * can't find anything wrong.
-		 */
-		pg_verifymbstr(workstr, nbytes, false);
-		ereport(ERROR,
-				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-				 errmsg("invalid multibyte character for locale"),
-				 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
-	}
-
-	Assert(ncodes <= (size_t) nbytes);
-
-	return result;
-}
-
-
-/*
- * Convert a wchar string into a palloc'd TEXT value.  The wchar string
- * must be zero-terminated, but we also require the caller to pass the string
- * length, since it will know it anyway in current uses.
- */
-text *
-wcstotext(const wchar_t *str, int ncodes)
-{
-	text	   *result;
-	size_t		nbytes;
-
-	/* Overflow paranoia */
-	if (ncodes < 0 ||
-		ncodes > (int) ((INT_MAX - VARHDRSZ) / MB_CUR_MAX) - 1)
-		ereport(ERROR,
-				(errcode(ERRCODE_OUT_OF_MEMORY),
-				 errmsg("out of memory")));
-
-	/* Make workspace certainly large enough for result */
-	result = (text *) palloc((ncodes + 1) * MB_CUR_MAX + VARHDRSZ);
-
-	/* Do the conversion */
-	nbytes = wcstombs((char *) VARDATA(result), str,
-					  (ncodes + 1) * MB_CUR_MAX);
-
-	if (nbytes == (size_t) -1)
-	{
-		/* Invalid multibyte character encountered ... shouldn't happen */
-		ereport(ERROR,
-				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-				 errmsg("invalid multibyte character for locale")));
-	}
-
-	Assert(nbytes <= (size_t) (ncodes * MB_CUR_MAX));
-
-	SET_VARSIZE(result, nbytes + VARHDRSZ);
-
-	return result;
-}
-#endif   /* USE_WIDE_UPPER_LOWER */
-
-
-/*
- * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding.
- * To make use of the upper/lower functionality, we need to map UTF8 to
- * UTF16, which for some reason mbstowcs and wcstombs won't do for us.
- * This conversion layer takes care of it.
- */
-
-#ifdef WIN32
-
-/* texttowcs for the case of UTF8 to UTF16 */
-static wchar_t *
-win32_utf8_texttowcs(const text *txt)
-{
-	int			nbytes = VARSIZE_ANY_EXHDR(txt);
-	wchar_t    *result;
-	int			r;
-
-	/* Overflow paranoia */
-	if (nbytes < 0 ||
-		nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)
-		ereport(ERROR,
-				(errcode(ERRCODE_OUT_OF_MEMORY),
-				 errmsg("out of memory")));
-
-	/* Output workspace cannot have more codes than input bytes */
-	result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
-
-	/* stupid Microsloth API does not work for zero-length input */
-	if (nbytes == 0)
-		r = 0;
-	else
-	{
-		/* Do the conversion */
-		r = MultiByteToWideChar(CP_UTF8, 0, VARDATA_ANY(txt), nbytes,
-								result, nbytes);
-
-		if (r <= 0)				/* assume it's NO_UNICODE_TRANSLATION */
-		{
-			/* see notes above about error reporting */
-			pg_verifymbstr(VARDATA_ANY(txt), nbytes, false);
-			ereport(ERROR,
-					(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-					 errmsg("invalid multibyte character for locale"),
-					 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
-		}
-	}
-
-	/* Append trailing null wchar (MultiByteToWideChar won't have) */
-	Assert(r <= nbytes);
-	result[r] = 0;
-
-	return result;
-}
-
-/* wcstotext for the case of UTF16 to UTF8 */
-static text *
-win32_utf8_wcstotext(const wchar_t *str)
-{
-	text	   *result;
-	int			nbytes;
-	int			r;
-
-	/* Compute size of output string (this *will* include trailing null) */
-	nbytes = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL);
-	if (nbytes <= 0)			/* shouldn't happen */
-		ereport(ERROR,
-				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-				 errmsg("UTF-16 to UTF-8 translation failed: %lu",
-						GetLastError())));
-
-	result = palloc(nbytes + VARHDRSZ);
-
-	r = WideCharToMultiByte(CP_UTF8, 0, str, -1, VARDATA(result), nbytes,
-							NULL, NULL);
-	if (r != nbytes)			/* shouldn't happen */
-		ereport(ERROR,
-				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-				 errmsg("UTF-16 to UTF-8 translation failed: %lu",
-						GetLastError())));
-
-	SET_VARSIZE(result, nbytes + VARHDRSZ - 1); /* -1 to ignore null */
-
-	return result;
-}
-
-/* interface layer to check which encoding is in use */
-
-static wchar_t *
-win32_texttowcs(const text *txt)
-{
-	if (GetDatabaseEncoding() == PG_UTF8)
-		return win32_utf8_texttowcs(txt);
-	else
-		return texttowcs(txt);
-}
-
-static text *
-win32_wcstotext(const wchar_t *str, int ncodes)
-{
-	if (GetDatabaseEncoding() == PG_UTF8)
-		return win32_utf8_wcstotext(str);
-	else
-		return wcstotext(str, ncodes);
-}
-
-/* use macros to cause routines below to call interface layer */
-
-#define texttowcs	win32_texttowcs
-#define wcstotext	win32_wcstotext
-#endif   /* WIN32 */
-
-#ifdef USE_WIDE_UPPER_LOWER
-/*
- * string_upper and string_lower are used for correct multibyte upper/lower
- * transformations localized strings. Returns pointers to transformated
- * string.
- */
-char *
-wstring_upper(char *str)
-{
-	wchar_t    *workspace;
-	text	   *in_text;
-	text	   *out_text;
-	char	   *result;
-	int			i;
-
-	in_text = cstring_to_text(str);
-	workspace = texttowcs(in_text);
-
-	for (i = 0; workspace[i] != 0; i++)
-		workspace[i] = towupper(workspace[i]);
-
-	out_text = wcstotext(workspace, i);
-	result = text_to_cstring(out_text);
-
-	pfree(workspace);
-	pfree(in_text);
-	pfree(out_text);
-
-	return result;
-}
-
-char *
-wstring_lower(char *str)
-{
-	wchar_t    *workspace;
-	text	   *in_text;
-	text	   *out_text;
-	char	   *result;
-	int			i;
-
-	in_text = cstring_to_text(str);
-	workspace = texttowcs(in_text);
-
-	for (i = 0; workspace[i] != 0; i++)
-		workspace[i] = towlower(workspace[i]);
-
-	out_text = wcstotext(workspace, i);
-	result = text_to_cstring(out_text);
-
-	pfree(workspace);
-	pfree(in_text);
-	pfree(out_text);
-
-	return result;
-}
-#endif   /* USE_WIDE_UPPER_LOWER */
-
 /********************************************************************
  *
  * lower
@@ -332,52 +56,15 @@ wstring_lower(char *str)
 Datum
 lower(PG_FUNCTION_ARGS)
 {
-#ifdef USE_WIDE_UPPER_LOWER
+	text	*in_string = PG_GETARG_TEXT_PP(0);
+	char	*out_string;
+	text	*result;
 
-	/*
-	 * Use wide char code only when max encoding length > 1 and ctype != C.
-	 * Some operating systems fail with multi-byte encodings and a C locale.
-	 * Also, for a C locale there is no need to process as multibyte.
-	 */
-	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
-	{
-		text	   *string = PG_GETARG_TEXT_PP(0);
-		text	   *result;
-		wchar_t    *workspace;
-		int			i;
-
-		workspace = texttowcs(string);
-
-		for (i = 0; workspace[i] != 0; i++)
-			workspace[i] = towlower(workspace[i]);
-
-		result = wcstotext(workspace, i);
-
-		pfree(workspace);
-
-		PG_RETURN_TEXT_P(result);
-	}
-	else
-#endif   /* USE_WIDE_UPPER_LOWER */
-	{
-		text	   *string = PG_GETARG_TEXT_P_COPY(0);
-		char	   *ptr;
-		int			m;
+	out_string = str_tolower(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string));
+	result = cstring_to_text(out_string);
+	pfree(out_string);
 
-		/*
-		 * Since we copied the string, we can scribble directly on the value
-		 */
-		ptr = VARDATA(string);
-		m = VARSIZE(string) - VARHDRSZ;
-
-		while (m-- > 0)
-		{
-			*ptr = tolower((unsigned char) *ptr);
-			ptr++;
-		}
-
-		PG_RETURN_TEXT_P(string);
-	}
+	PG_RETURN_TEXT_P(result);
 }
 
 
@@ -398,52 +85,15 @@ lower(PG_FUNCTION_ARGS)
 Datum
 upper(PG_FUNCTION_ARGS)
 {
-#ifdef USE_WIDE_UPPER_LOWER
+	text	*in_string = PG_GETARG_TEXT_PP(0);
+	char	*out_string;
+	text	*result;
 
-	/*
-	 * Use wide char code only when max encoding length > 1 and ctype != C.
-	 * Some operating systems fail with multi-byte encodings and a C locale.
-	 * Also, for a C locale there is no need to process as multibyte.
-	 */
-	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
-	{
-		text	   *string = PG_GETARG_TEXT_PP(0);
-		text	   *result;
-		wchar_t    *workspace;
-		int			i;
-
-		workspace = texttowcs(string);
-
-		for (i = 0; workspace[i] != 0; i++)
-			workspace[i] = towupper(workspace[i]);
-
-		result = wcstotext(workspace, i);
-
-		pfree(workspace);
+	out_string = str_toupper(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string));
+	result = cstring_to_text(out_string);
+	pfree(out_string);
 
-		PG_RETURN_TEXT_P(result);
-	}
-	else
-#endif   /* USE_WIDE_UPPER_LOWER */
-	{
-		text	   *string = PG_GETARG_TEXT_P_COPY(0);
-		char	   *ptr;
-		int			m;
-
-		/*
-		 * Since we copied the string, we can scribble directly on the value
-		 */
-		ptr = VARDATA(string);
-		m = VARSIZE(string) - VARHDRSZ;
-
-		while (m-- > 0)
-		{
-			*ptr = toupper((unsigned char) *ptr);
-			ptr++;
-		}
-
-		PG_RETURN_TEXT_P(string);
-	}
+	PG_RETURN_TEXT_P(result);
 }
 
 
@@ -467,64 +117,15 @@ upper(PG_FUNCTION_ARGS)
 Datum
 initcap(PG_FUNCTION_ARGS)
 {
-#ifdef USE_WIDE_UPPER_LOWER
+	text	*in_string = PG_GETARG_TEXT_PP(0);
+	char	*out_string;
+	text	*result;
 
-	/*
-	 * Use wide char code only when max encoding length > 1 and ctype != C.
-	 * Some operating systems fail with multi-byte encodings and a C locale.
-	 * Also, for a C locale there is no need to process as multibyte.
-	 */
-	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
-	{
-		text	   *string = PG_GETARG_TEXT_PP(0);
-		text	   *result;
-		wchar_t    *workspace;
-		int			wasalnum = 0;
-		int			i;
-
-		workspace = texttowcs(string);
-
-		for (i = 0; workspace[i] != 0; i++)
-		{
-			if (wasalnum)
-				workspace[i] = towlower(workspace[i]);
-			else
-				workspace[i] = towupper(workspace[i]);
-			wasalnum = iswalnum(workspace[i]);
-		}
-
-		result = wcstotext(workspace, i);
-
-		pfree(workspace);
-
-		PG_RETURN_TEXT_P(result);
-	}
-	else
-#endif   /* USE_WIDE_UPPER_LOWER */
-	{
-		text	   *string = PG_GETARG_TEXT_P_COPY(0);
-		int			wasalnum = 0;
-		char	   *ptr;
-		int			m;
-
-		/*
-		 * Since we copied the string, we can scribble directly on the value
-		 */
-		ptr = VARDATA(string);
-		m = VARSIZE(string) - VARHDRSZ;
-
-		while (m-- > 0)
-		{
-			if (wasalnum)
-				*ptr = tolower((unsigned char) *ptr);
-			else
-				*ptr = toupper((unsigned char) *ptr);
-			wasalnum = isalnum((unsigned char) *ptr);
-			ptr++;
-		}
+	out_string = str_initcap(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string));
+	result = cstring_to_text(out_string);
+	pfree(out_string);
 
-		PG_RETURN_TEXT_P(string);
-	}
+	PG_RETURN_TEXT_P(result);
 }
author	Bruce Momjian <bruce@momjian.us>	2008-06-23 19:27:19 +0000
committer	Bruce Momjian <bruce@momjian.us>	2008-06-23 19:27:19 +0000
commit	f6ec7430f920991e417383c154f9c38c04a992b7 (patch)
tree	cb9994f9158108b9c3a69319db4dbf4216956930 /src/backend/utils/adt/oracle_compat.c
parent	eeee06919f3f8368fa48c6ceb7ece85a5562bc19 (diff)
download	postgresql-f6ec7430f920991e417383c154f9c38c04a992b7.tar.gz postgresql-f6ec7430f920991e417383c154f9c38c04a992b7.zip