diff options
author | Noah Misch <noah@leadboat.com> | 2013-06-26 11:17:33 -0400 |
---|---|---|
committer | Noah Misch <noah@leadboat.com> | 2013-06-26 11:17:33 -0400 |
commit | 5f538ad004aa00cf0881f179f0cde789aad4f47e (patch) | |
tree | 4a84500c39ff82734078f1c9169879decd163bf7 /src/backend/utils/mb/mbutils.c | |
parent | 2c1031bd8602f749a81672015811f365a129acff (diff) | |
download | postgresql-5f538ad004aa00cf0881f179f0cde789aad4f47e.tar.gz postgresql-5f538ad004aa00cf0881f179f0cde789aad4f47e.zip |
Renovate display of non-ASCII messages on Windows.
GNU gettext selects a default encoding for the messages it emits in a
platform-specific manner; it uses the Windows ANSI code page on Windows
and follows LC_CTYPE on other platforms. This is inconvenient for
PostgreSQL server processes, so realize consistent cross-platform
behavior by calling bind_textdomain_codeset() on Windows each time we
permanently change LC_CTYPE. This primarily affects SQL_ASCII databases
and processes like the postmaster that do not attach to a database,
making their behavior consistent with PostgreSQL on non-Windows
platforms. Messages from SQL_ASCII databases use the encoding implied
by the database LC_CTYPE, and messages from non-database processes use
LC_CTYPE from the postmaster system environment. PlatformEncoding
becomes unused, so remove it.
Make write_console() prefer WriteConsoleW() to write() regardless of the
encodings in use. In this situation, write() will invariably mishandle
non-ASCII characters.
elog.c has assumed that messages conform to the database encoding.
While usually true, this does not hold for SQL_ASCII and MULE_INTERNAL.
Introduce MessageEncoding to track the actual encoding of message text.
The present consumers are Windows-specific code for converting messages
to UTF16 for use in system interfaces. This fixes the appearance in
Windows event logs and consoles of translated messages from SQL_ASCII
processes like the postmaster. Note that SQL_ASCII inherently disclaims
a strong notion of encoding, so non-ASCII byte sequences interpolated
into messages by %s may yet yield a nonsensical message. MULE_INTERNAL
has similar problems at present, albeit for a different reason: its lack
of libiconv support or a conversion to UTF8.
Consequently, one need no longer restart Windows with a different
Windows ANSI code page to broadly test backend logging under a given
language. Changing the user's locale ("Format") is enough. Several
accounts can simultaneously run postmasters under different locales, all
correctly logging localized messages to Windows event logs and consoles.
Alexander Law and Noah Misch
Diffstat (limited to 'src/backend/utils/mb/mbutils.c')
-rw-r--r-- | src/backend/utils/mb/mbutils.c | 132 |
1 files changed, 93 insertions, 39 deletions
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index 4582219af73..6d1cd8e8759 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -53,11 +53,11 @@ static FmgrInfo *ToServerConvProc = NULL; static FmgrInfo *ToClientConvProc = NULL; /* - * These variables track the currently selected FE and BE encodings. + * These variables track the currently-selected encodings. */ static pg_enc2name *ClientEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]; static pg_enc2name *DatabaseEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]; -static pg_enc2name *PlatformEncoding = NULL; +static pg_enc2name *MessageEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]; /* * During backend startup we can't set client encoding because we (a) @@ -881,46 +881,102 @@ SetDatabaseEncoding(int encoding) Assert(DatabaseEncoding->encoding == encoding); } -/* - * Bind gettext to the codeset equivalent with the database encoding. - */ void -pg_bind_textdomain_codeset(const char *domainname) +SetMessageEncoding(int encoding) { -#if defined(ENABLE_NLS) - int encoding = GetDatabaseEncoding(); - int i; + /* Some calls happen before we can elog()! */ + Assert(PG_VALID_ENCODING(encoding)); - /* - * gettext() uses the codeset specified by LC_CTYPE by default, so if that - * matches the database encoding we don't need to do anything. In CREATE - * DATABASE, we enforce or trust that the locale's codeset matches - * database encoding, except for the C locale. In C locale, we bind - * gettext() explicitly to the right codeset. - * - * On Windows, though, gettext() tends to get confused so we always bind - * it. - */ -#ifndef WIN32 - const char *ctype = setlocale(LC_CTYPE, NULL); + MessageEncoding = &pg_enc2name_tbl[encoding]; + Assert(MessageEncoding->encoding == encoding); +} - if (pg_strcasecmp(ctype, "C") != 0 && pg_strcasecmp(ctype, "POSIX") != 0) - return; -#endif +#ifdef ENABLE_NLS +/* + * Make one bind_textdomain_codeset() call, translating a pg_enc to a gettext + * codeset. Fails for MULE_INTERNAL, an encoding unknown to gettext; can also + * fail for gettext-internal causes like out-of-memory. + */ +static bool +raw_pg_bind_textdomain_codeset(const char *domainname, int encoding) +{ + bool elog_ok = (CurrentMemoryContext != NULL); + int i; for (i = 0; pg_enc2gettext_tbl[i].name != NULL; i++) { if (pg_enc2gettext_tbl[i].encoding == encoding) { if (bind_textdomain_codeset(domainname, - pg_enc2gettext_tbl[i].name) == NULL) + pg_enc2gettext_tbl[i].name) != NULL) + return true; + + if (elog_ok) elog(LOG, "bind_textdomain_codeset failed"); + else + write_stderr("bind_textdomain_codeset failed"); + break; } } + + return false; +} + +/* + * Bind a gettext message domain to the codeset corresponding to the database + * encoding. For SQL_ASCII, instead bind to the codeset implied by LC_CTYPE. + * Return the MessageEncoding implied by the new settings. + * + * On most platforms, gettext defaults to the codeset implied by LC_CTYPE. + * When that matches the database encoding, we don't need to do anything. In + * CREATE DATABASE, we enforce or trust that the locale's codeset matches the + * database encoding, except for the C locale. (On Windows, we also permit a + * discrepancy under the UTF8 encoding.) For the C locale, explicitly bind + * gettext to the right codeset. + * + * On Windows, gettext defaults to the Windows ANSI code page. This is a + * convenient departure for software that passes the strings to Windows ANSI + * APIs, but we don't do that. Compel gettext to use database encoding or, + * failing that, the LC_CTYPE encoding as it would on other platforms. + * + * This function is called before elog() and palloc() are usable. + */ +int +pg_bind_textdomain_codeset(const char *domainname) +{ + bool elog_ok = (CurrentMemoryContext != NULL); + int encoding = GetDatabaseEncoding(); + int new_msgenc; + +#ifndef WIN32 + const char *ctype = setlocale(LC_CTYPE, NULL); + + if (pg_strcasecmp(ctype, "C") == 0 || pg_strcasecmp(ctype, "POSIX") == 0) #endif + if (encoding != PG_SQL_ASCII && + raw_pg_bind_textdomain_codeset(domainname, encoding)) + return encoding; + + new_msgenc = pg_get_encoding_from_locale(NULL, elog_ok); + if (new_msgenc < 0) + new_msgenc = PG_SQL_ASCII; + +#ifdef WIN32 + if (!raw_pg_bind_textdomain_codeset(domainname, new_msgenc)) + /* On failure, the old message encoding remains valid. */ + return GetMessageEncoding(); +#endif + + return new_msgenc; } +#endif +/* + * The database encoding, also called the server encoding, represents the + * encoding of data stored in text-like data types. Affected types include + * cstring, text, varchar, name, xml, and json. + */ int GetDatabaseEncoding(void) { @@ -949,19 +1005,17 @@ pg_client_encoding(PG_FUNCTION_ARGS) return DirectFunctionCall1(namein, CStringGetDatum(ClientEncoding->name)); } +/* + * gettext() returns messages in this encoding. This often matches the + * database encoding, but it differs for SQL_ASCII databases, for processes + * not attached to a database, and under a database encoding lacking iconv + * support (MULE_INTERNAL). + */ int -GetPlatformEncoding(void) +GetMessageEncoding(void) { - if (PlatformEncoding == NULL) - { - /* try to determine encoding of server's environment locale */ - int encoding = pg_get_encoding_from_locale("", true); - - if (encoding < 0) - encoding = PG_SQL_ASCII; - PlatformEncoding = &pg_enc2name_tbl[encoding]; - } - return PlatformEncoding->encoding; + Assert(MessageEncoding); + return MessageEncoding->encoding; } #ifdef WIN32 @@ -971,13 +1025,13 @@ GetPlatformEncoding(void) * is also passed to utf16len if not null. Returns NULL iff failed. */ WCHAR * -pgwin32_toUTF16(const char *str, int len, int *utf16len) +pgwin32_message_to_UTF16(const char *str, int len, int *utf16len) { WCHAR *utf16; int dstlen; UINT codepage; - codepage = pg_enc2name_tbl[GetDatabaseEncoding()].codepage; + codepage = pg_enc2name_tbl[GetMessageEncoding()].codepage; /* * Use MultiByteToWideChar directly if there is a corresponding codepage, @@ -994,7 +1048,7 @@ pgwin32_toUTF16(const char *str, int len, int *utf16len) char *utf8; utf8 = (char *) pg_do_encoding_conversion((unsigned char *) str, - len, GetDatabaseEncoding(), PG_UTF8); + len, GetMessageEncoding(), PG_UTF8); if (utf8 != str) len = strlen(utf8); |