aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Eisentraut <peter@eisentraut.org>2022-09-16 09:37:54 +0200
committerPeter Eisentraut <peter@eisentraut.org>2022-09-16 09:41:33 +0200
commitc7db01e325a530ec38ec7ba57cd3ed32e123e33c (patch)
tree699109735a34071a016940152cb6ab5e7960e74f
parentcf2c7a736e4939ff0d6cf2acd29b17eea3bca7c2 (diff)
downloadpostgresql-c7db01e325a530ec38ec7ba57cd3ed32e123e33c.tar.gz
postgresql-c7db01e325a530ec38ec7ba57cd3ed32e123e33c.zip
Don't allow creation of database with ICU locale with unsupported encoding
Check in CREATE DATABASE and initdb that the selected encoding is supported by ICU. Before, they would pass but users would later get an error from the server when they tried to use the database. Also document that initdb sets the encoding to UTF8 by default if the ICU locale provider is chosen. Author: Marina Polyakova <m.polyakova@postgrespro.ru> Reviewed-by: Kyotaro Horiguchi <horikyota.ntt@gmail.com> Discussion: https://www.postgresql.org/message-id/6dd6db0984d86a51b7255ba79f111971@postgrespro.ru
-rw-r--r--doc/src/sgml/ref/initdb.sgml5
-rw-r--r--src/backend/commands/dbcommands.c9
-rw-r--r--src/bin/initdb/initdb.c29
-rw-r--r--src/bin/initdb/t/001_initdb.pl9
-rw-r--r--src/bin/scripts/t/020_createdb.pl9
5 files changed, 57 insertions, 4 deletions
diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml
index f01df2dde96..81588962980 100644
--- a/doc/src/sgml/ref/initdb.sgml
+++ b/doc/src/sgml/ref/initdb.sgml
@@ -209,8 +209,9 @@ PostgreSQL documentation
<para>
Selects the encoding of the template databases. This will also
be the default encoding of any database you create later,
- unless you override it then. The default is derived from the locale, or
- <literal>SQL_ASCII</literal> if that does not work. The character sets supported by
+ unless you override it then. The default is derived from the locale,
+ if the libc locale provider is used, or <literal>UTF8</literal> if the
+ ICU locale provider is used. The character sets supported by
the <productname>PostgreSQL</productname> server are described
in <xref linkend="multibyte-charset-supported"/>.
</para>
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 6ff48bb18f3..f248ad42b77 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -1034,6 +1034,12 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
if (dblocprovider == COLLPROVIDER_ICU)
{
+ if (!(is_encoding_supported_by_icu(encoding)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("encoding \"%s\" is not supported with ICU provider",
+ pg_encoding_to_char(encoding))));
+
/*
* This would happen if template0 uses the libc provider but the new
* database uses icu.
@@ -1042,10 +1048,9 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("ICU locale must be specified")));
- }
- if (dblocprovider == COLLPROVIDER_ICU)
check_icu_locale(dbiculocale);
+ }
/*
* Check that the new encoding and locale settings match the source
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 6aeec8d426c..28f22b25b2e 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -2043,6 +2043,27 @@ check_locale_encoding(const char *locale, int user_enc)
}
/*
+ * check if the chosen encoding matches is supported by ICU
+ *
+ * this should match the similar check in the backend createdb() function
+ */
+static bool
+check_icu_locale_encoding(int user_enc)
+{
+ if (!(is_encoding_supported_by_icu(user_enc)))
+ {
+ pg_log_error("encoding mismatch");
+ pg_log_error_detail("The encoding you selected (%s) is not supported with the ICU provider.",
+ pg_encoding_to_char(user_enc));
+ pg_log_error_hint("Rerun %s and either do not specify an encoding explicitly, "
+ "or choose a matching combination.",
+ progname);
+ return false;
+ }
+ return true;
+}
+
+/*
* set up the locale variables
*
* assumes we have called setlocale(LC_ALL, "") -- see set_pglocale_pgservice
@@ -2310,7 +2331,11 @@ setup_locale_encoding(void)
}
if (!encoding && locale_provider == COLLPROVIDER_ICU)
+ {
encodingid = PG_UTF8;
+ printf(_("The default database encoding has been set to \"%s\".\n"),
+ pg_encoding_to_char(encodingid));
+ }
else if (!encoding)
{
int ctype_enc;
@@ -2362,6 +2387,10 @@ setup_locale_encoding(void)
if (!check_locale_encoding(lc_ctype, encodingid) ||
!check_locale_encoding(lc_collate, encodingid))
exit(1); /* check_locale_encoding printed the error */
+
+ if (locale_provider == COLLPROVIDER_ICU &&
+ !check_icu_locale_encoding(encodingid))
+ exit(1);
}
diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl
index a37f6dd9b33..164fc11cbff 100644
--- a/src/bin/initdb/t/001_initdb.pl
+++ b/src/bin/initdb/t/001_initdb.pl
@@ -118,6 +118,15 @@ if ($ENV{with_icu} eq 'yes')
],
qr/FATAL: could not open collator for locale/,
'fails for invalid ICU locale');
+
+ command_fails_like(
+ [
+ 'initdb', '--no-sync',
+ '--locale-provider=icu', '--encoding=SQL_ASCII',
+ '--icu-locale=en', "$tempdir/dataX"
+ ],
+ qr/error: encoding mismatch/,
+ 'fails for encoding not supported by ICU');
}
else
{
diff --git a/src/bin/scripts/t/020_createdb.pl b/src/bin/scripts/t/020_createdb.pl
index e91c1d013d0..e95f200d0b9 100644
--- a/src/bin/scripts/t/020_createdb.pl
+++ b/src/bin/scripts/t/020_createdb.pl
@@ -50,6 +50,15 @@ if ($ENV{with_icu} eq 'yes')
],
'fails for invalid ICU locale');
+ $node->command_fails_like(
+ [
+ 'createdb', '-T',
+ 'template0', '--locale-provider=icu',
+ '--encoding=SQL_ASCII', 'foobarX'
+ ],
+ qr/ERROR: encoding "SQL_ASCII" is not supported with ICU provider/,
+ 'fails for encoding not supported by ICU');
+
# additional node, which uses the icu provider
my $node2 = PostgreSQL::Test::Cluster->new('icu');
$node2->init(extra => ['--locale-provider=icu', '--icu-locale=en']);