diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/bin/pg_upgrade/check.c | 749 | ||||
-rw-r--r-- | src/bin/pg_upgrade/pg_upgrade.h | 13 | ||||
-rw-r--r-- | src/bin/pg_upgrade/version.c | 265 |
3 files changed, 506 insertions, 521 deletions
diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index 8ce6c674e32..c198896c9f0 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -10,6 +10,7 @@ #include "postgres_fe.h" #include "catalog/pg_authid_d.h" +#include "catalog/pg_class_d.h" #include "catalog/pg_collation.h" #include "fe_utils/string_utils.h" #include "mb/pg_wchar.h" @@ -23,13 +24,6 @@ static void check_for_isn_and_int8_passing_mismatch(ClusterInfo *cluster); static void check_for_user_defined_postfix_ops(ClusterInfo *cluster); static void check_for_incompatible_polymorphics(ClusterInfo *cluster); static void check_for_tables_with_oids(ClusterInfo *cluster); -static void check_for_composite_data_type_usage(ClusterInfo *cluster); -static void check_for_reg_data_type_usage(ClusterInfo *cluster); -static void check_for_aclitem_data_type_usage(ClusterInfo *cluster); -static void check_for_removed_data_type_usage(ClusterInfo *cluster, - const char *version, - const char *datatype); -static void check_for_jsonb_9_4_usage(ClusterInfo *cluster); static void check_for_pg_role_prefix(ClusterInfo *cluster); static void check_for_new_tablespace_dir(void); static void check_for_user_defined_encoding_conversions(ClusterInfo *cluster); @@ -38,6 +32,497 @@ static void check_new_cluster_subscription_configuration(void); static void check_old_cluster_for_valid_slots(bool live_check); static void check_old_cluster_subscription_state(void); +/* + * DataTypesUsageChecks - definitions of data type checks for the old cluster + * in order to determine if an upgrade can be performed. See the comment on + * data_types_usage_checks below for a more detailed description. + */ +typedef struct +{ + /* Status line to print to the user */ + const char *status; + /* Filename to store report to */ + const char *report_filename; + /* Query to extract the oid of the datatype */ + const char *base_query; + /* Text to store to report in case of error */ + const char *report_text; + /* The latest version where the check applies */ + int threshold_version; + /* A function pointer for determining if the check applies */ + DataTypesUsageVersionCheck version_hook; +} DataTypesUsageChecks; + +/* + * Special values for threshold_version for indicating that a check applies to + * all versions, or that a custom function needs to be invoked to determine + * if the check applies. + */ +#define MANUAL_CHECK 1 +#define ALL_VERSIONS -1 + +/*-- + * Data type usage checks. Each check for problematic data type usage is + * defined in this array with metadata, SQL query for finding the data type + * and functionality for deciding if the check is applicable to the version + * of the old cluster. The struct members are described in detail below: + * + * status A oneline string which can be printed to the user to + * inform about progress. Should not end with newline. + * report_filename The filename in which the list of problems detected by + * the check will be printed. + * base_query A query which extracts the Oid of the datatype checked + * for. + * report_text The text which will be printed to the user to explain + * what the check did, and why it failed. The text should + * end with a newline, and does not need to refer to the + * report_filename as that is automatically appended to + * the report with the path to the log folder. + * threshold_version The major version of PostgreSQL for which to run the + * check. Iff the old cluster is less than, or equal to, + * the threshold version then the check will be executed. + * If the old version is greater than the threshold then + * the check is skipped. If the threshold_version is set + * to ALL_VERSIONS then it will be run unconditionally, + * if set to MANUAL_CHECK then the version_hook function + * will be executed in order to determine whether or not + * to run. + * version_hook A function pointer to a version check function of type + * DataTypesUsageVersionCheck which is used to determine + * if the check is applicable to the old cluster. If the + * version_hook returns true then the check will be run, + * else it will be skipped. The function will only be + * executed iff threshold_version is set to MANUAL_CHECK. + */ +static DataTypesUsageChecks data_types_usage_checks[] = +{ + /* + * Look for composite types that were made during initdb *or* belong to + * information_schema; that's important in case information_schema was + * dropped and reloaded. + * + * The cutoff OID here should match the source cluster's value of + * FirstNormalObjectId. We hardcode it rather than using that C #define + * because, if that #define is ever changed, our own version's value is + * NOT what to use. Eventually we may need a test on the source cluster's + * version to select the correct value. + */ + { + .status = gettext_noop("Checking for system-defined composite types in user tables"), + .report_filename = "tables_using_composite.txt", + .base_query = + "SELECT t.oid FROM pg_catalog.pg_type t " + "LEFT JOIN pg_catalog.pg_namespace n ON t.typnamespace = n.oid " + " WHERE typtype = 'c' AND (t.oid < 16384 OR nspname = 'information_schema')", + .report_text = + gettext_noop("Your installation contains system-defined composite types in user tables.\n" + "These type OIDs are not stable across PostgreSQL versions,\n" + "so this cluster cannot currently be upgraded. You can drop the\n" + "problem columns and restart the upgrade.\n"), + .threshold_version = ALL_VERSIONS + }, + + /* + * 9.3 -> 9.4 Fully implement the 'line' data type in 9.4, which + * previously returned "not enabled" by default and was only functionally + * enabled with a compile-time switch; as of 9.4 "line" has a different + * on-disk representation format. + */ + { + .status = gettext_noop("Checking for incompatible \"line\" data type"), + .report_filename = "tables_using_line.txt", + .base_query = + "SELECT 'pg_catalog.line'::pg_catalog.regtype AS oid", + .report_text = + gettext_noop("Your installation contains the \"line\" data type in user tables.\n" + "this data type changed its internal and input/output format\n" + "between your old and new versions so this\n" + "cluster cannot currently be upgraded. You can\n" + "drop the problem columns and restart the upgrade.\n"), + .threshold_version = 903 + }, + + /* + * pg_upgrade only preserves these system values: pg_class.oid pg_type.oid + * pg_enum.oid + * + * Many of the reg* data types reference system catalog info that is not + * preserved, and hence these data types cannot be used in user tables + * upgraded by pg_upgrade. + */ + { + .status = gettext_noop("Checking for reg* data types in user tables"), + .report_filename = "tables_using_reg.txt", + + /* + * Note: older servers will not have all of these reg* types, so we + * have to write the query like this rather than depending on casts to + * regtype. + */ + .base_query = + "SELECT oid FROM pg_catalog.pg_type t " + "WHERE t.typnamespace = " + " (SELECT oid FROM pg_catalog.pg_namespace " + " WHERE nspname = 'pg_catalog') " + " AND t.typname IN ( " + /* pg_class.oid is preserved, so 'regclass' is OK */ + " 'regcollation', " + " 'regconfig', " + " 'regdictionary', " + " 'regnamespace', " + " 'regoper', " + " 'regoperator', " + " 'regproc', " + " 'regprocedure' " + /* pg_authid.oid is preserved, so 'regrole' is OK */ + /* pg_type.oid is (mostly) preserved, so 'regtype' is OK */ + " )", + .report_text = + gettext_noop("Your installation contains one of the reg* data types in user tables.\n" + "These data types reference system OIDs that are not preserved by\n" + "pg_upgrade, so this cluster cannot currently be upgraded. You can\n" + "drop the problem columns and restart the upgrade.\n"), + .threshold_version = ALL_VERSIONS + }, + + /* + * PG 16 increased the size of the 'aclitem' type, which breaks the + * on-disk format for existing data. + */ + { + .status = gettext_noop("Checking for incompatible \"aclitem\" data type"), + .report_filename = "tables_using_aclitem.txt", + .base_query = + "SELECT 'pg_catalog.aclitem'::pg_catalog.regtype AS oid", + .report_text = + gettext_noop("Your installation contains the \"aclitem\" data type in user tables.\n" + "The internal format of \"aclitem\" changed in PostgreSQL version 16\n" + "so this cluster cannot currently be upgraded. You can drop the\n" + "problem columns and restart the upgrade.\n"), + .threshold_version = 1500 + }, + + /* + * It's no longer allowed to create tables or views with "unknown"-type + * columns. We do not complain about views with such columns, because + * they should get silently converted to "text" columns during the DDL + * dump and reload; it seems unlikely to be worth making users do that by + * hand. However, if there's a table with such a column, the DDL reload + * will fail, so we should pre-detect that rather than failing + * mid-upgrade. Worse, if there's a matview with such a column, the DDL + * reload will silently change it to "text" which won't match the on-disk + * storage (which is like "cstring"). So we *must* reject that. + */ + { + .status = gettext_noop("Checking for invalid \"unknown\" user columns"), + .report_filename = "tables_using_unknown.txt", + .base_query = + "SELECT 'pg_catalog.unknown'::pg_catalog.regtype AS oid", + .report_text = + gettext_noop("Your installation contains the \"unknown\" data type in user tables.\n" + "This data type is no longer allowed in tables, so this cluster\n" + "cannot currently be upgraded. You can drop the problem columns\n" + "and restart the upgrade.\n"), + .threshold_version = 906 + }, + + /* + * PG 12 changed the 'sql_identifier' type storage to be based on name, + * not varchar, which breaks on-disk format for existing data. So we need + * to prevent upgrade when used in user objects (tables, indexes, ...). In + * 12, the sql_identifier data type was switched from name to varchar, + * which does affect the storage (name is by-ref, but not varlena). This + * means user tables using sql_identifier for columns are broken because + * the on-disk format is different. + */ + { + .status = gettext_noop("Checking for invalid \"sql_identifier\" user columns"), + .report_filename = "tables_using_sql_identifier.txt", + .base_query = + "SELECT 'information_schema.sql_identifier'::pg_catalog.regtype AS oid", + .report_text = + gettext_noop("Your installation contains the \"sql_identifier\" data type in user tables.\n" + "The on-disk format for this data type has changed, so this\n" + "cluster cannot currently be upgraded. You can drop the problem\n" + "columns and restart the upgrade.\n"), + .threshold_version = 1100 + }, + + /* + * JSONB changed its storage format during 9.4 beta, so check for it. + */ + { + .status = gettext_noop("Checking for incompatible \"jsonb\" data type in user tables"), + .report_filename = "tables_using_jsonb.txt", + .base_query = + "SELECT 'pg_catalog.jsonb'::pg_catalog.regtype AS oid", + .report_text = + gettext_noop("Your installation contains the \"jsonb\" data type in user tables.\n" + "The internal format of \"jsonb\" changed during 9.4 beta so this\n" + "cluster cannot currently be upgraded. You can drop the problem \n" + "columns and restart the upgrade.\n"), + .threshold_version = MANUAL_CHECK, + .version_hook = jsonb_9_4_check_applicable + }, + + /* + * PG 12 removed types abstime, reltime, tinterval. + */ + { + .status = gettext_noop("Checking for removed \"abstime\" data type in user tables"), + .report_filename = "tables_using_abstime.txt", + .base_query = + "SELECT 'pg_catalog.abstime'::pg_catalog.regtype AS oid", + .report_text = + gettext_noop("Your installation contains the \"abstime\" data type in user tables.\n" + "The \"abstime\" type has been removed in PostgreSQL version 12,\n" + "so this cluster cannot currently be upgraded. You can drop the\n" + "problem columns, or change them to another data type, and restart\n" + "the upgrade.\n"), + .threshold_version = 1100 + }, + { + .status = gettext_noop("Checking for removed \"reltime\" data type in user tables"), + .report_filename = "tables_using_reltime.txt", + .base_query = + "SELECT 'pg_catalog.reltime'::pg_catalog.regtype AS oid", + .report_text = + gettext_noop("Your installation contains the \"reltime\" data type in user tables.\n" + "The \"reltime\" type has been removed in PostgreSQL version 12,\n" + "so this cluster cannot currently be upgraded. You can drop the\n" + "problem columns, or change them to another data type, and restart\n" + "the upgrade.\n"), + .threshold_version = 1100 + }, + { + .status = gettext_noop("Checking for removed \"tinterval\" data type in user tables"), + .report_filename = "tables_using_tinterval.txt", + .base_query = + "SELECT 'pg_catalog.tinterval'::pg_catalog.regtype AS oid", + .report_text = + gettext_noop("Your installation contains the \"tinterval\" data type in user tables.\n" + "The \"tinterval\" type has been removed in PostgreSQL version 12,\n" + "so this cluster cannot currently be upgraded. You can drop the\n" + "problem columns, or change them to another data type, and restart\n" + "the upgrade.\n"), + .threshold_version = 1100 + }, + + /* End of checks marker, must remain last */ + { + NULL, NULL, NULL, NULL, 0, NULL + } +}; + +/* + * check_for_data_types_usage() + * Detect whether there are any stored columns depending on given type(s) + * + * If so, write a report to the given file name and signal a failure to the + * user. + * + * The checks to run are defined in a DataTypesUsageChecks structure where + * each check has a metadata for explaining errors to the user, a base_query, + * a report filename and a function pointer hook for validating if the check + * should be executed given the cluster at hand. + * + * base_query should be a SELECT yielding a single column named "oid", + * containing the pg_type OIDs of one or more types that are known to have + * inconsistent on-disk representations across server versions. + * + * We check for the type(s) in tables, matviews, and indexes, but not views; + * there's no storage involved in a view. + */ +static void +check_for_data_types_usage(ClusterInfo *cluster, DataTypesUsageChecks * checks) +{ + bool found = false; + bool *results; + PQExpBufferData report; + DataTypesUsageChecks *tmp = checks; + int n_data_types_usage_checks = 0; + + prep_status("Checking for data type usage"); + + /* Gather number of checks to perform */ + while (tmp->status != NULL) + { + n_data_types_usage_checks++; + tmp++; + } + + /* Prepare an array to store the results of checks in */ + results = pg_malloc0(sizeof(bool) * n_data_types_usage_checks); + + /* + * Connect to each database in the cluster and run all defined checks + * against that database before trying the next one. + */ + for (int dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++) + { + DbInfo *active_db = &cluster->dbarr.dbs[dbnum]; + PGconn *conn = connectToServer(cluster, active_db->db_name); + + for (int checknum = 0; checknum < n_data_types_usage_checks; checknum++) + { + PGresult *res; + int ntups; + int i_nspname; + int i_relname; + int i_attname; + FILE *script = NULL; + bool db_used = false; + char output_path[MAXPGPATH]; + DataTypesUsageChecks *cur_check = &checks[checknum]; + + if (cur_check->threshold_version == MANUAL_CHECK) + { + Assert(cur_check->version_hook); + + /* + * Make sure that the check applies to the current cluster + * version and skip if not. If no check hook has been defined + * we run the check for all versions. + */ + if (!cur_check->version_hook(cluster)) + continue; + } + else if (cur_check->threshold_version != ALL_VERSIONS) + { + if (GET_MAJOR_VERSION(cluster->major_version) > cur_check->threshold_version) + continue; + } + else + Assert(cur_check->threshold_version == ALL_VERSIONS); + + snprintf(output_path, sizeof(output_path), "%s/%s", + log_opts.basedir, + cur_check->report_filename); + + /* + * The type(s) of interest might be wrapped in a domain, array, + * composite, or range, and these container types can be nested + * (to varying extents depending on server version, but that's not + * of concern here). To handle all these cases we need a + * recursive CTE. + */ + res = executeQueryOrDie(conn, + "WITH RECURSIVE oids AS ( " + /* start with the type(s) returned by base_query */ + " %s " + " UNION ALL " + " SELECT * FROM ( " + /* inner WITH because we can only reference the CTE once */ + " WITH x AS (SELECT oid FROM oids) " + /* domains on any type selected so far */ + " SELECT t.oid FROM pg_catalog.pg_type t, x WHERE typbasetype = x.oid AND typtype = 'd' " + " UNION ALL " + /* arrays over any type selected so far */ + " SELECT t.oid FROM pg_catalog.pg_type t, x WHERE typelem = x.oid AND typtype = 'b' " + " UNION ALL " + /* composite types containing any type selected so far */ + " SELECT t.oid FROM pg_catalog.pg_type t, pg_catalog.pg_class c, pg_catalog.pg_attribute a, x " + " WHERE t.typtype = 'c' AND " + " t.oid = c.reltype AND " + " c.oid = a.attrelid AND " + " NOT a.attisdropped AND " + " a.atttypid = x.oid " + " UNION ALL " + /* ranges containing any type selected so far */ + " SELECT t.oid FROM pg_catalog.pg_type t, pg_catalog.pg_range r, x " + " WHERE t.typtype = 'r' AND r.rngtypid = t.oid AND r.rngsubtype = x.oid" + " ) foo " + ") " + /* now look for stored columns of any such type */ + "SELECT n.nspname, c.relname, a.attname " + "FROM pg_catalog.pg_class c, " + " pg_catalog.pg_namespace n, " + " pg_catalog.pg_attribute a " + "WHERE c.oid = a.attrelid AND " + " NOT a.attisdropped AND " + " a.atttypid IN (SELECT oid FROM oids) AND " + " c.relkind IN (" + CppAsString2(RELKIND_RELATION) ", " + CppAsString2(RELKIND_MATVIEW) ", " + CppAsString2(RELKIND_INDEX) ") AND " + " c.relnamespace = n.oid AND " + /* exclude possible orphaned temp tables */ + " n.nspname !~ '^pg_temp_' AND " + " n.nspname !~ '^pg_toast_temp_' AND " + /* exclude system catalogs, too */ + " n.nspname NOT IN ('pg_catalog', 'information_schema')", + cur_check->base_query); + + ntups = PQntuples(res); + + /* + * The datatype was found, so extract the data and log to the + * requested filename. We need to open the file for appending + * since the check might have already found the type in another + * database earlier in the loop. + */ + if (ntups) + { + /* + * Make sure we have a buffer to save reports to now that we + * found a first failing check. + */ + if (!found) + initPQExpBuffer(&report); + found = true; + + /* + * If this is the first time we see an error for the check in + * question then print a status message of the failure. + */ + if (!results[checknum]) + { + pg_log(PG_REPORT, " failed check: %s", _(cur_check->status)); + appendPQExpBuffer(&report, "\n%s\n%s %s\n", + _(cur_check->report_text), + _("A list of the problem columns is in the file:"), + output_path); + } + results[checknum] = true; + + i_nspname = PQfnumber(res, "nspname"); + i_relname = PQfnumber(res, "relname"); + i_attname = PQfnumber(res, "attname"); + + for (int rowno = 0; rowno < ntups; rowno++) + { + if (script == NULL && (script = fopen_priv(output_path, "a")) == NULL) + pg_fatal("could not open file \"%s\": %m", output_path); + + if (!db_used) + { + fprintf(script, "In database: %s\n", active_db->db_name); + db_used = true; + } + fprintf(script, " %s.%s.%s\n", + PQgetvalue(res, rowno, i_nspname), + PQgetvalue(res, rowno, i_relname), + PQgetvalue(res, rowno, i_attname)); + } + + if (script) + { + fclose(script); + script = NULL; + } + } + + PQclear(res); + } + + PQfinish(conn); + } + + if (found) + pg_fatal("Data type checks failed: %s", report.data); + + check_ok(); +} /* * fix_path_separator @@ -110,8 +595,6 @@ check_and_dump_old_cluster(bool live_check) check_is_install_user(&old_cluster); check_proper_datallowconn(&old_cluster); check_for_prepared_transactions(&old_cluster); - check_for_composite_data_type_usage(&old_cluster); - check_for_reg_data_type_usage(&old_cluster); check_for_isn_and_int8_passing_mismatch(&old_cluster); if (GET_MAJOR_VERSION(old_cluster.major_version) >= 1700) @@ -129,22 +612,7 @@ check_and_dump_old_cluster(bool live_check) check_old_cluster_subscription_state(); } - /* - * PG 16 increased the size of the 'aclitem' type, which breaks the - * on-disk format for existing data. - */ - if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1500) - check_for_aclitem_data_type_usage(&old_cluster); - - /* - * PG 12 removed types abstime, reltime, tinterval. - */ - if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1100) - { - check_for_removed_data_type_usage(&old_cluster, "12", "abstime"); - check_for_removed_data_type_usage(&old_cluster, "12", "reltime"); - check_for_removed_data_type_usage(&old_cluster, "12", "tinterval"); - } + check_for_data_types_usage(&old_cluster, data_types_usage_checks); /* * PG 14 changed the function signature of encoding conversion functions. @@ -177,20 +645,11 @@ check_and_dump_old_cluster(bool live_check) check_for_tables_with_oids(&old_cluster); /* - * PG 12 changed the 'sql_identifier' type storage to be based on name, - * not varchar, which breaks on-disk format for existing data. So we need - * to prevent upgrade when used in user objects (tables, indexes, ...). - */ - if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1100) - old_11_check_for_sql_identifier_data_type_usage(&old_cluster); - - /* * Pre-PG 10 allowed tables with 'unknown' type columns and non WAL logged * hash indexes */ if (GET_MAJOR_VERSION(old_cluster.major_version) <= 906) { - old_9_6_check_for_unknown_data_type_usage(&old_cluster); if (user_opts.check) old_9_6_invalidate_hash_indexes(&old_cluster, true); } @@ -199,14 +658,6 @@ check_and_dump_old_cluster(bool live_check) if (GET_MAJOR_VERSION(old_cluster.major_version) <= 905) check_for_pg_role_prefix(&old_cluster); - if (GET_MAJOR_VERSION(old_cluster.major_version) == 904 && - old_cluster.controldata.cat_ver < JSONB_FORMAT_CHANGE_CAT_VER) - check_for_jsonb_9_4_usage(&old_cluster); - - /* Pre-PG 9.4 had a different 'line' data type internal format */ - if (GET_MAJOR_VERSION(old_cluster.major_version) <= 903) - old_9_3_check_for_line_data_type_usage(&old_cluster); - /* * While not a check option, we do this now because this is the only time * the old server is running. @@ -1123,220 +1574,6 @@ check_for_tables_with_oids(ClusterInfo *cluster) /* - * check_for_composite_data_type_usage() - * Check for system-defined composite types used in user tables. - * - * The OIDs of rowtypes of system catalogs and information_schema views - * can change across major versions; unlike user-defined types, we have - * no mechanism for forcing them to be the same in the new cluster. - * Hence, if any user table uses one, that's problematic for pg_upgrade. - */ -static void -check_for_composite_data_type_usage(ClusterInfo *cluster) -{ - bool found; - Oid firstUserOid; - char output_path[MAXPGPATH]; - char *base_query; - - prep_status("Checking for system-defined composite types in user tables"); - - snprintf(output_path, sizeof(output_path), "%s/%s", - log_opts.basedir, - "tables_using_composite.txt"); - - /* - * Look for composite types that were made during initdb *or* belong to - * information_schema; that's important in case information_schema was - * dropped and reloaded. - * - * The cutoff OID here should match the source cluster's value of - * FirstNormalObjectId. We hardcode it rather than using that C #define - * because, if that #define is ever changed, our own version's value is - * NOT what to use. Eventually we may need a test on the source cluster's - * version to select the correct value. - */ - firstUserOid = 16384; - - base_query = psprintf("SELECT t.oid FROM pg_catalog.pg_type t " - "LEFT JOIN pg_catalog.pg_namespace n ON t.typnamespace = n.oid " - " WHERE typtype = 'c' AND (t.oid < %u OR nspname = 'information_schema')", - firstUserOid); - - found = check_for_data_types_usage(cluster, base_query, output_path); - - free(base_query); - - if (found) - { - pg_log(PG_REPORT, "fatal"); - pg_fatal("Your installation contains system-defined composite types in user tables.\n" - "These type OIDs are not stable across PostgreSQL versions,\n" - "so this cluster cannot currently be upgraded. You can\n" - "drop the problem columns and restart the upgrade.\n" - "A list of the problem columns is in the file:\n" - " %s", output_path); - } - else - check_ok(); -} - -/* - * check_for_reg_data_type_usage() - * pg_upgrade only preserves these system values: - * pg_class.oid - * pg_type.oid - * pg_enum.oid - * - * Many of the reg* data types reference system catalog info that is - * not preserved, and hence these data types cannot be used in user - * tables upgraded by pg_upgrade. - */ -static void -check_for_reg_data_type_usage(ClusterInfo *cluster) -{ - bool found; - char output_path[MAXPGPATH]; - - prep_status("Checking for reg* data types in user tables"); - - snprintf(output_path, sizeof(output_path), "%s/%s", - log_opts.basedir, - "tables_using_reg.txt"); - - /* - * Note: older servers will not have all of these reg* types, so we have - * to write the query like this rather than depending on casts to regtype. - */ - found = check_for_data_types_usage(cluster, - "SELECT oid FROM pg_catalog.pg_type t " - "WHERE t.typnamespace = " - " (SELECT oid FROM pg_catalog.pg_namespace " - " WHERE nspname = 'pg_catalog') " - " AND t.typname IN ( " - /* pg_class.oid is preserved, so 'regclass' is OK */ - " 'regcollation', " - " 'regconfig', " - " 'regdictionary', " - " 'regnamespace', " - " 'regoper', " - " 'regoperator', " - " 'regproc', " - " 'regprocedure' " - /* pg_authid.oid is preserved, so 'regrole' is OK */ - /* pg_type.oid is (mostly) preserved, so 'regtype' is OK */ - " )", - output_path); - - if (found) - { - pg_log(PG_REPORT, "fatal"); - pg_fatal("Your installation contains one of the reg* data types in user tables.\n" - "These data types reference system OIDs that are not preserved by\n" - "pg_upgrade, so this cluster cannot currently be upgraded. You can\n" - "drop the problem columns and restart the upgrade.\n" - "A list of the problem columns is in the file:\n" - " %s", output_path); - } - else - check_ok(); -} - -/* - * check_for_aclitem_data_type_usage - * - * aclitem changed its storage format in 16, so check for it. - */ -static void -check_for_aclitem_data_type_usage(ClusterInfo *cluster) -{ - char output_path[MAXPGPATH]; - - prep_status("Checking for incompatible \"%s\" data type in user tables", - "aclitem"); - - snprintf(output_path, sizeof(output_path), "tables_using_aclitem.txt"); - - if (check_for_data_type_usage(cluster, "pg_catalog.aclitem", output_path)) - { - pg_log(PG_REPORT, "fatal"); - pg_fatal("Your installation contains the \"aclitem\" data type in user tables.\n" - "The internal format of \"aclitem\" changed in PostgreSQL version 16\n" - "so this cluster cannot currently be upgraded. You can drop the\n" - "problem columns and restart the upgrade. A list of the problem\n" - "columns is in the file:\n" - " %s", output_path); - } - else - check_ok(); -} - -/* - * check_for_removed_data_type_usage - * - * Check for in-core data types that have been removed. Callers know - * the exact list. - */ -static void -check_for_removed_data_type_usage(ClusterInfo *cluster, const char *version, - const char *datatype) -{ - char output_path[MAXPGPATH]; - char typename[NAMEDATALEN]; - - prep_status("Checking for removed \"%s\" data type in user tables", - datatype); - - snprintf(output_path, sizeof(output_path), "tables_using_%s.txt", - datatype); - snprintf(typename, sizeof(typename), "pg_catalog.%s", datatype); - - if (check_for_data_type_usage(cluster, typename, output_path)) - { - pg_log(PG_REPORT, "fatal"); - pg_fatal("Your installation contains the \"%s\" data type in user tables.\n" - "The \"%s\" type has been removed in PostgreSQL version %s,\n" - "so this cluster cannot currently be upgraded. You can drop the\n" - "problem columns, or change them to another data type, and restart\n" - "the upgrade. A list of the problem columns is in the file:\n" - " %s", datatype, datatype, version, output_path); - } - else - check_ok(); -} - - -/* - * check_for_jsonb_9_4_usage() - * - * JSONB changed its storage format during 9.4 beta, so check for it. - */ -static void -check_for_jsonb_9_4_usage(ClusterInfo *cluster) -{ - char output_path[MAXPGPATH]; - - prep_status("Checking for incompatible \"jsonb\" data type"); - - snprintf(output_path, sizeof(output_path), "%s/%s", - log_opts.basedir, - "tables_using_jsonb.txt"); - - if (check_for_data_type_usage(cluster, "pg_catalog.jsonb", output_path)) - { - pg_log(PG_REPORT, "fatal"); - pg_fatal("Your installation contains the \"jsonb\" data type in user tables.\n" - "The internal format of \"jsonb\" changed during 9.4 beta so this\n" - "cluster cannot currently be upgraded. You can\n" - "drop the problem columns and restart the upgrade.\n" - "A list of the problem columns is in the file:\n" - " %s", output_path); - } - else - check_ok(); -} - -/* * check_for_pg_role_prefix() * * Versions older than 9.6 should not have any pg_* roles diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index c0bfb002d2d..92bcb693fbf 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -352,6 +352,9 @@ typedef struct } OSInfo; +/* Function signature for data type check version hook */ +typedef bool (*DataTypesUsageVersionCheck) (ClusterInfo *cluster); + /* * Global variables */ @@ -479,18 +482,10 @@ unsigned int str2uint(const char *str); /* version.c */ -bool check_for_data_types_usage(ClusterInfo *cluster, - const char *base_query, - const char *output_path); -bool check_for_data_type_usage(ClusterInfo *cluster, - const char *type_name, - const char *output_path); -void old_9_3_check_for_line_data_type_usage(ClusterInfo *cluster); -void old_9_6_check_for_unknown_data_type_usage(ClusterInfo *cluster); +bool jsonb_9_4_check_applicable(ClusterInfo *cluster); void old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode); -void old_11_check_for_sql_identifier_data_type_usage(ClusterInfo *cluster); void report_extension_updates(ClusterInfo *cluster); /* parallel.c */ diff --git a/src/bin/pg_upgrade/version.c b/src/bin/pg_upgrade/version.c index 9dc1399f368..2de6dffccda 100644 --- a/src/bin/pg_upgrade/version.c +++ b/src/bin/pg_upgrade/version.c @@ -9,235 +9,23 @@ #include "postgres_fe.h" -#include "catalog/pg_class_d.h" #include "fe_utils/string_utils.h" #include "pg_upgrade.h" - /* - * check_for_data_types_usage() - * Detect whether there are any stored columns depending on given type(s) - * - * If so, write a report to the given file name, and return true. - * - * base_query should be a SELECT yielding a single column named "oid", - * containing the pg_type OIDs of one or more types that are known to have - * inconsistent on-disk representations across server versions. - * - * We check for the type(s) in tables, matviews, and indexes, but not views; - * there's no storage involved in a view. + * version_hook functions for check_for_data_types_usage in order to determine + * whether a data type check should be executed for the cluster in question or + * not. */ bool -check_for_data_types_usage(ClusterInfo *cluster, - const char *base_query, - const char *output_path) +jsonb_9_4_check_applicable(ClusterInfo *cluster) { - bool found = false; - FILE *script = NULL; - int dbnum; + /* JSONB changed its storage format during 9.4 beta */ + if (GET_MAJOR_VERSION(cluster->major_version) == 904 && + cluster->controldata.cat_ver < JSONB_FORMAT_CHANGE_CAT_VER) + return true; - for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++) - { - DbInfo *active_db = &cluster->dbarr.dbs[dbnum]; - PGconn *conn = connectToServer(cluster, active_db->db_name); - PQExpBufferData querybuf; - PGresult *res; - bool db_used = false; - int ntups; - int rowno; - int i_nspname, - i_relname, - i_attname; - - /* - * The type(s) of interest might be wrapped in a domain, array, - * composite, or range, and these container types can be nested (to - * varying extents depending on server version, but that's not of - * concern here). To handle all these cases we need a recursive CTE. - */ - initPQExpBuffer(&querybuf); - appendPQExpBuffer(&querybuf, - "WITH RECURSIVE oids AS ( " - /* start with the type(s) returned by base_query */ - " %s " - " UNION ALL " - " SELECT * FROM ( " - /* inner WITH because we can only reference the CTE once */ - " WITH x AS (SELECT oid FROM oids) " - /* domains on any type selected so far */ - " SELECT t.oid FROM pg_catalog.pg_type t, x WHERE typbasetype = x.oid AND typtype = 'd' " - " UNION ALL " - /* arrays over any type selected so far */ - " SELECT t.oid FROM pg_catalog.pg_type t, x WHERE typelem = x.oid AND typtype = 'b' " - " UNION ALL " - /* composite types containing any type selected so far */ - " SELECT t.oid FROM pg_catalog.pg_type t, pg_catalog.pg_class c, pg_catalog.pg_attribute a, x " - " WHERE t.typtype = 'c' AND " - " t.oid = c.reltype AND " - " c.oid = a.attrelid AND " - " NOT a.attisdropped AND " - " a.atttypid = x.oid " - " UNION ALL " - /* ranges containing any type selected so far */ - " SELECT t.oid FROM pg_catalog.pg_type t, pg_catalog.pg_range r, x " - " WHERE t.typtype = 'r' AND r.rngtypid = t.oid AND r.rngsubtype = x.oid" - " ) foo " - ") " - /* now look for stored columns of any such type */ - "SELECT n.nspname, c.relname, a.attname " - "FROM pg_catalog.pg_class c, " - " pg_catalog.pg_namespace n, " - " pg_catalog.pg_attribute a " - "WHERE c.oid = a.attrelid AND " - " NOT a.attisdropped AND " - " a.atttypid IN (SELECT oid FROM oids) AND " - " c.relkind IN (" - CppAsString2(RELKIND_RELATION) ", " - CppAsString2(RELKIND_MATVIEW) ", " - CppAsString2(RELKIND_INDEX) ") AND " - " c.relnamespace = n.oid AND " - /* exclude possible orphaned temp tables */ - " n.nspname !~ '^pg_temp_' AND " - " n.nspname !~ '^pg_toast_temp_' AND " - /* exclude system catalogs, too */ - " n.nspname NOT IN ('pg_catalog', 'information_schema')", - base_query); - - res = executeQueryOrDie(conn, "%s", querybuf.data); - - ntups = PQntuples(res); - i_nspname = PQfnumber(res, "nspname"); - i_relname = PQfnumber(res, "relname"); - i_attname = PQfnumber(res, "attname"); - for (rowno = 0; rowno < ntups; rowno++) - { - found = true; - if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL) - pg_fatal("could not open file \"%s\": %m", output_path); - if (!db_used) - { - fprintf(script, "In database: %s\n", active_db->db_name); - db_used = true; - } - fprintf(script, " %s.%s.%s\n", - PQgetvalue(res, rowno, i_nspname), - PQgetvalue(res, rowno, i_relname), - PQgetvalue(res, rowno, i_attname)); - } - - PQclear(res); - - termPQExpBuffer(&querybuf); - - PQfinish(conn); - } - - if (script) - fclose(script); - - return found; -} - -/* - * check_for_data_type_usage() - * Detect whether there are any stored columns depending on the given type - * - * If so, write a report to the given file name, and return true. - * - * type_name should be a fully qualified type name. This is just a - * trivial wrapper around check_for_data_types_usage() to convert a - * type name into a base query. - */ -bool -check_for_data_type_usage(ClusterInfo *cluster, - const char *type_name, - const char *output_path) -{ - bool found; - char *base_query; - - base_query = psprintf("SELECT '%s'::pg_catalog.regtype AS oid", - type_name); - - found = check_for_data_types_usage(cluster, base_query, output_path); - - free(base_query); - - return found; -} - - -/* - * old_9_3_check_for_line_data_type_usage() - * 9.3 -> 9.4 - * Fully implement the 'line' data type in 9.4, which previously returned - * "not enabled" by default and was only functionally enabled with a - * compile-time switch; as of 9.4 "line" has a different on-disk - * representation format. - */ -void -old_9_3_check_for_line_data_type_usage(ClusterInfo *cluster) -{ - char output_path[MAXPGPATH]; - - prep_status("Checking for incompatible \"line\" data type"); - - snprintf(output_path, sizeof(output_path), "%s/%s", - log_opts.basedir, - "tables_using_line.txt"); - - if (check_for_data_type_usage(cluster, "pg_catalog.line", output_path)) - { - pg_log(PG_REPORT, "fatal"); - pg_fatal("Your installation contains the \"line\" data type in user tables.\n" - "This data type changed its internal and input/output format\n" - "between your old and new versions so this\n" - "cluster cannot currently be upgraded. You can\n" - "drop the problem columns and restart the upgrade.\n" - "A list of the problem columns is in the file:\n" - " %s", output_path); - } - else - check_ok(); -} - - -/* - * old_9_6_check_for_unknown_data_type_usage() - * 9.6 -> 10 - * It's no longer allowed to create tables or views with "unknown"-type - * columns. We do not complain about views with such columns, because - * they should get silently converted to "text" columns during the DDL - * dump and reload; it seems unlikely to be worth making users do that - * by hand. However, if there's a table with such a column, the DDL - * reload will fail, so we should pre-detect that rather than failing - * mid-upgrade. Worse, if there's a matview with such a column, the - * DDL reload will silently change it to "text" which won't match the - * on-disk storage (which is like "cstring"). So we *must* reject that. - */ -void -old_9_6_check_for_unknown_data_type_usage(ClusterInfo *cluster) -{ - char output_path[MAXPGPATH]; - - prep_status("Checking for invalid \"unknown\" user columns"); - - snprintf(output_path, sizeof(output_path), "%s/%s", - log_opts.basedir, - "tables_using_unknown.txt"); - - if (check_for_data_type_usage(cluster, "pg_catalog.unknown", output_path)) - { - pg_log(PG_REPORT, "fatal"); - pg_fatal("Your installation contains the \"unknown\" data type in user tables.\n" - "This data type is no longer allowed in tables, so this\n" - "cluster cannot currently be upgraded. You can\n" - "drop the problem columns and restart the upgrade.\n" - "A list of the problem columns is in the file:\n" - " %s", output_path); - } - else - check_ok(); + return false; } /* @@ -352,41 +140,6 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) } /* - * old_11_check_for_sql_identifier_data_type_usage() - * 11 -> 12 - * In 12, the sql_identifier data type was switched from name to varchar, - * which does affect the storage (name is by-ref, but not varlena). This - * means user tables using sql_identifier for columns are broken because - * the on-disk format is different. - */ -void -old_11_check_for_sql_identifier_data_type_usage(ClusterInfo *cluster) -{ - char output_path[MAXPGPATH]; - - prep_status("Checking for invalid \"sql_identifier\" user columns"); - - snprintf(output_path, sizeof(output_path), "%s/%s", - log_opts.basedir, - "tables_using_sql_identifier.txt"); - - if (check_for_data_type_usage(cluster, "information_schema.sql_identifier", - output_path)) - { - pg_log(PG_REPORT, "fatal"); - pg_fatal("Your installation contains the \"sql_identifier\" data type in user tables.\n" - "The on-disk format for this data type has changed, so this\n" - "cluster cannot currently be upgraded. You can\n" - "drop the problem columns and restart the upgrade.\n" - "A list of the problem columns is in the file:\n" - " %s", output_path); - } - else - check_ok(); -} - - -/* * report_extension_updates() * Report extensions that should be updated. */ |