diff options
Diffstat (limited to 'src/backend/utils/mb/mbutils.c')
-rw-r--r-- | src/backend/utils/mb/mbutils.c | 130 |
1 files changed, 129 insertions, 1 deletions
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index 83e3a11c38f..e6b662199d9 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -4,7 +4,7 @@ * (currently mule internal code (mic) is used) * Tatsuo Ishii * - * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.71 2008/05/27 12:24:42 mha Exp $ + * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.72 2008/06/18 18:42:54 momjian Exp $ */ #include "postgres.h" @@ -555,6 +555,134 @@ perform_default_encoding_conversion(const char *src, int len, bool is_client_to_ return result; } + + +#ifdef USE_WIDE_UPPER_LOWER + +/* + * wchar2char --- convert wide characters to multibyte format + * + * This has the same API as the standard wcstombs() function; in particular, + * tolen is the maximum number of bytes to store at *to, and *from must be + * zero-terminated. The output will be zero-terminated iff there is room. + */ +size_t +wchar2char(char *to, const wchar_t *from, size_t tolen) +{ + size_t result; + + if (tolen == 0) + return 0; + +#ifdef WIN32 + /* + * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, + * and for some reason mbstowcs and wcstombs won't do this for us, + * so we use MultiByteToWideChar(). + */ + if (GetDatabaseEncoding() == PG_UTF8) + { + result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen, + NULL, NULL); + /* A zero return is failure */ + if (result <= 0) + result = -1; + else + { + Assert(result <= tolen); + /* Microsoft counts the zero terminator in the result */ + result--; + } + } + else +#endif /* WIN32 */ + result = wcstombs(to, from, tolen); + return result; +} + +/* + * char2wchar --- convert multibyte characters to wide characters + * + * This has almost the API of mbstowcs(), except that *from need not be + * null-terminated; instead, the number of input bytes is specified as + * fromlen. Also, we ereport() rather than returning -1 for invalid + * input encoding. tolen is the maximum number of wchar_t's to store at *to. + * The output will be zero-terminated iff there is room. + */ +size_t +char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen) +{ + size_t result; + + if (tolen == 0) + return 0; + +#ifdef WIN32 + /* See WIN32 "Unicode" comment above */ + if (GetDatabaseEncoding() == PG_UTF8) + { + /* Win32 API does not work for zero-length input */ + if (fromlen == 0) + result = 0; + else + { + result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1); + /* A zero return is failure */ + if (result == 0) + result = -1; + } + + if (result != -1) + { + Assert(result < tolen); + /* Append trailing null wchar (MultiByteToWideChar() does not) */ + to[result] = 0; + } + } + else +#endif /* WIN32 */ + { + if (lc_ctype_is_c()) + { + /* + * pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be + * allocated with sufficient space + */ + result = pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen); + } + else + { + /* mbstowcs requires ending '\0' */ + char *str = pnstrdup(from, fromlen); + + result = mbstowcs(to, str, tolen); + pfree(str); + } + } + + if (result == -1) + { + /* + * Invalid multibyte character encountered. We try to give a useful + * error message by letting pg_verifymbstr check the string. But it's + * possible that the string is OK to us, and not OK to mbstowcs --- + * this suggests that the LC_CTYPE locale is different from the + * database encoding. Give a generic error message if verifymbstr + * can't find anything wrong. + */ + pg_verifymbstr(from, fromlen, false); /* might not return */ + /* but if it does ... */ + ereport(ERROR, + (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), + errmsg("invalid multibyte character for locale"), + errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); + } + + return result; +} + +#endif + /* convert a multibyte string to a wchar */ int pg_mb2wchar(const char *from, pg_wchar *to) |