aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/tsearch/ts_locale.c119
-rw-r--r--src/backend/tsearch/ts_utils.c12
-rw-r--r--src/backend/utils/mb/mbutils.c130
-rw-r--r--src/backend/utils/mmgr/mcxt.c14
-rw-r--r--src/include/mb/pg_wchar.h7
-rw-r--r--src/include/tsearch/ts_locale.h5
-rw-r--r--src/include/tsearch/ts_public.h4
-rw-r--r--src/include/utils/palloc.h4
8 files changed, 155 insertions, 140 deletions
diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c
index c10a0862d36..5ce367a497e 100644
--- a/src/backend/tsearch/ts_locale.c
+++ b/src/backend/tsearch/ts_locale.c
@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.8 2008/06/17 16:09:06 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.9 2008/06/18 18:42:54 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -16,125 +16,8 @@
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
-
#ifdef USE_WIDE_UPPER_LOWER
-/*
- * wchar2char --- convert wide characters to multibyte format
- *
- * This has the same API as the standard wcstombs() function; in particular,
- * tolen is the maximum number of bytes to store at *to, and *from must be
- * zero-terminated. The output will be zero-terminated iff there is room.
- */
-size_t
-wchar2char(char *to, const wchar_t *from, size_t tolen)
-{
- if (tolen == 0)
- return 0;
-
-#ifdef WIN32
- if (GetDatabaseEncoding() == PG_UTF8)
- {
- int r;
-
- r = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
- NULL, NULL);
-
- if (r <= 0)
- return (size_t) -1;
-
- Assert(r <= tolen);
-
- /* Microsoft counts the zero terminator in the result */
- return r - 1;
- }
-#endif /* WIN32 */
-
- return wcstombs(to, from, tolen);
-}
-
-/*
- * char2wchar --- convert multibyte characters to wide characters
- *
- * This has almost the API of mbstowcs(), except that *from need not be
- * null-terminated; instead, the number of input bytes is specified as
- * fromlen. Also, we ereport() rather than returning -1 for invalid
- * input encoding. tolen is the maximum number of wchar_t's to store at *to.
- * The output will be zero-terminated iff there is room.
- */
-size_t
-char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
-{
- if (tolen == 0)
- return 0;
-
-#ifdef WIN32
- if (GetDatabaseEncoding() == PG_UTF8)
- {
- int r;
-
- /* stupid Microsloth API does not work for zero-length input */
- if (fromlen == 0)
- r = 0;
- else
- {
- r = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
-
- if (r <= 0)
- {
- /* see notes in oracle_compat.c about error reporting */
- pg_verifymbstr(from, fromlen, false);
- ereport(ERROR,
- (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
- errmsg("invalid multibyte character for locale"),
- errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
- }
- }
-
- Assert(r < tolen);
- to[r] = 0;
-
- return r;
- }
-#endif /* WIN32 */
-
- if (lc_ctype_is_c())
- {
- /*
- * pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be
- * allocated with sufficient space
- */
- return pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen);
- }
- else
- {
- /*
- * mbstowcs requires ending '\0'
- */
- char *str = pnstrdup(from, fromlen);
- size_t result;
-
- result = mbstowcs(to, str, tolen);
-
- pfree(str);
-
- if (result == (size_t) -1)
- {
- pg_verifymbstr(from, fromlen, false);
- ereport(ERROR,
- (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
- errmsg("invalid multibyte character for locale"),
- errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
- }
-
- if (result < tolen)
- to[result] = 0;
-
- return result;
- }
-}
-
-
int
t_isdigit(const char *ptr)
{
diff --git a/src/backend/tsearch/ts_utils.c b/src/backend/tsearch/ts_utils.c
index bdefaa6bc63..3708d02689f 100644
--- a/src/backend/tsearch/ts_utils.c
+++ b/src/backend/tsearch/ts_utils.c
@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.9 2008/01/01 19:45:52 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.10 2008/06/18 18:42:54 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -153,13 +153,3 @@ searchstoplist(StopList *s, char *key)
bsearch(&key, s->stop, s->len,
sizeof(char *), comparestr)) ? true : false;
}
-
-char *
-pnstrdup(const char *in, int len)
-{
- char *out = palloc(len + 1);
-
- memcpy(out, in, len);
- out[len] = '\0';
- return out;
-}
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
index 83e3a11c38f..e6b662199d9 100644
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -4,7 +4,7 @@
* (currently mule internal code (mic) is used)
* Tatsuo Ishii
*
- * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.71 2008/05/27 12:24:42 mha Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.72 2008/06/18 18:42:54 momjian Exp $
*/
#include "postgres.h"
@@ -555,6 +555,134 @@ perform_default_encoding_conversion(const char *src, int len, bool is_client_to_
return result;
}
+
+
+#ifdef USE_WIDE_UPPER_LOWER
+
+/*
+ * wchar2char --- convert wide characters to multibyte format
+ *
+ * This has the same API as the standard wcstombs() function; in particular,
+ * tolen is the maximum number of bytes to store at *to, and *from must be
+ * zero-terminated. The output will be zero-terminated iff there is room.
+ */
+size_t
+wchar2char(char *to, const wchar_t *from, size_t tolen)
+{
+ size_t result;
+
+ if (tolen == 0)
+ return 0;
+
+#ifdef WIN32
+ /*
+ * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding,
+ * and for some reason mbstowcs and wcstombs won't do this for us,
+ * so we use MultiByteToWideChar().
+ */
+ if (GetDatabaseEncoding() == PG_UTF8)
+ {
+ result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
+ NULL, NULL);
+ /* A zero return is failure */
+ if (result <= 0)
+ result = -1;
+ else
+ {
+ Assert(result <= tolen);
+ /* Microsoft counts the zero terminator in the result */
+ result--;
+ }
+ }
+ else
+#endif /* WIN32 */
+ result = wcstombs(to, from, tolen);
+ return result;
+}
+
+/*
+ * char2wchar --- convert multibyte characters to wide characters
+ *
+ * This has almost the API of mbstowcs(), except that *from need not be
+ * null-terminated; instead, the number of input bytes is specified as
+ * fromlen. Also, we ereport() rather than returning -1 for invalid
+ * input encoding. tolen is the maximum number of wchar_t's to store at *to.
+ * The output will be zero-terminated iff there is room.
+ */
+size_t
+char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
+{
+ size_t result;
+
+ if (tolen == 0)
+ return 0;
+
+#ifdef WIN32
+ /* See WIN32 "Unicode" comment above */
+ if (GetDatabaseEncoding() == PG_UTF8)
+ {
+ /* Win32 API does not work for zero-length input */
+ if (fromlen == 0)
+ result = 0;
+ else
+ {
+ result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
+ /* A zero return is failure */
+ if (result == 0)
+ result = -1;
+ }
+
+ if (result != -1)
+ {
+ Assert(result < tolen);
+ /* Append trailing null wchar (MultiByteToWideChar() does not) */
+ to[result] = 0;
+ }
+ }
+ else
+#endif /* WIN32 */
+ {
+ if (lc_ctype_is_c())
+ {
+ /*
+ * pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be
+ * allocated with sufficient space
+ */
+ result = pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen);
+ }
+ else
+ {
+ /* mbstowcs requires ending '\0' */
+ char *str = pnstrdup(from, fromlen);
+
+ result = mbstowcs(to, str, tolen);
+ pfree(str);
+ }
+ }
+
+ if (result == -1)
+ {
+ /*
+ * Invalid multibyte character encountered. We try to give a useful
+ * error message by letting pg_verifymbstr check the string. But it's
+ * possible that the string is OK to us, and not OK to mbstowcs ---
+ * this suggests that the LC_CTYPE locale is different from the
+ * database encoding. Give a generic error message if verifymbstr
+ * can't find anything wrong.
+ */
+ pg_verifymbstr(from, fromlen, false); /* might not return */
+ /* but if it does ... */
+ ereport(ERROR,
+ (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+ errmsg("invalid multibyte character for locale"),
+ errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
+ }
+
+ return result;
+}
+
+#endif
+
/* convert a multibyte string to a wchar */
int
pg_mb2wchar(const char *from, pg_wchar *to)
diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c
index ebceca438f7..daeaaaf1871 100644
--- a/src/backend/utils/mmgr/mcxt.c
+++ b/src/backend/utils/mmgr/mcxt.c
@@ -14,7 +14,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/mmgr/mcxt.c,v 1.63 2008/01/01 19:45:55 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/mmgr/mcxt.c,v 1.64 2008/06/18 18:42:54 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -624,6 +624,18 @@ repalloc(void *pointer, Size size)
pointer, size);
}
+/* Like pstrdup(), but append null byte */
+char *
+pnstrdup(const char *in, int len)
+{
+ char *out = palloc(len + 1);
+
+ memcpy(out, in, len);
+ out[len] = '\0';
+ return out;
+}
+
+
/*
* MemoryContextSwitchTo
* Returns the current context; installs the given context.
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
index adfdee9b973..b29552fdeaf 100644
--- a/src/include/mb/pg_wchar.h
+++ b/src/include/mb/pg_wchar.h
@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.78 2008/01/01 19:45:58 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.79 2008/06/18 18:42:54 momjian Exp $
*
* NOTES
* This is used both by the backend and by libpq, but should not be
@@ -362,6 +362,11 @@ extern int pg_mbcharcliplen(const char *mbstr, int len, int imit);
extern int pg_encoding_max_length(int encoding);
extern int pg_database_encoding_max_length(void);
+#ifdef USE_WIDE_UPPER_LOWER
+extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen);
+extern size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen);
+#endif
+
extern void SetDefaultClientEncoding(void);
extern int SetClientEncoding(int encoding, bool doit);
extern void InitializeClientEncoding(void);
diff --git a/src/include/tsearch/ts_locale.h b/src/include/tsearch/ts_locale.h
index adeeebac187..110efb191c1 100644
--- a/src/include/tsearch/ts_locale.h
+++ b/src/include/tsearch/ts_locale.h
@@ -5,7 +5,7 @@
*
* Copyright (c) 1998-2008, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/include/tsearch/ts_locale.h,v 1.6 2008/06/17 16:09:06 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_locale.h,v 1.7 2008/06/18 18:42:54 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -33,9 +33,6 @@
#ifdef USE_WIDE_UPPER_LOWER
-extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen);
-extern size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen);
-
extern int t_isdigit(const char *ptr);
extern int t_isspace(const char *ptr);
extern int t_isalpha(const char *ptr);
diff --git a/src/include/tsearch/ts_public.h b/src/include/tsearch/ts_public.h
index d08d35db193..5e3723fa8ec 100644
--- a/src/include/tsearch/ts_public.h
+++ b/src/include/tsearch/ts_public.h
@@ -6,7 +6,7 @@
*
* Copyright (c) 1998-2008, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.9 2008/05/16 16:31:02 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.10 2008/06/18 18:42:54 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -62,8 +62,6 @@ typedef struct
extern char *get_tsearch_config_filename(const char *basename,
const char *extension);
-extern char *pnstrdup(const char *in, int len);
-
/*
* Often useful stopword list management
*/
diff --git a/src/include/utils/palloc.h b/src/include/utils/palloc.h
index a3e78580f86..7e3c085a7aa 100644
--- a/src/include/utils/palloc.h
+++ b/src/include/utils/palloc.h
@@ -21,7 +21,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/utils/palloc.h,v 1.38 2008/01/01 19:45:59 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/utils/palloc.h,v 1.39 2008/06/18 18:42:54 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -70,6 +70,8 @@ extern void pfree(void *pointer);
extern void *repalloc(void *pointer, Size size);
+extern char *pnstrdup(const char *in, int len);
+
/*
* MemoryContextSwitchTo can't be a macro in standard C compilers.
* But we can make it an inline function when using GCC.