diff options
-rw-r--r-- | doc/src/sgml/ref/pgupgrade.sgml | 59 | ||||
-rw-r--r-- | src/bin/pg_upgrade/TESTING | 6 | ||||
-rw-r--r-- | src/bin/pg_upgrade/check.c | 29 | ||||
-rw-r--r-- | src/bin/pg_upgrade/controldata.c | 21 | ||||
-rw-r--r-- | src/bin/pg_upgrade/dump.c | 4 | ||||
-rw-r--r-- | src/bin/pg_upgrade/file.c | 14 | ||||
-rw-r--r-- | src/bin/pg_upgrade/info.c | 4 | ||||
-rw-r--r-- | src/bin/pg_upgrade/option.c | 7 | ||||
-rw-r--r-- | src/bin/pg_upgrade/pg_upgrade.c | 16 | ||||
-rw-r--r-- | src/bin/pg_upgrade/pg_upgrade.h | 5 | ||||
-rw-r--r-- | src/bin/pg_upgrade/relfilenumber.c | 384 | ||||
-rw-r--r-- | src/bin/pg_upgrade/t/006_transfer_modes.pl | 10 | ||||
-rw-r--r-- | src/common/file_utils.c | 14 | ||||
-rw-r--r-- | src/include/common/file_utils.h | 1 |
14 files changed, 540 insertions, 34 deletions
diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml index 5db761d1ff1..da261619043 100644 --- a/doc/src/sgml/ref/pgupgrade.sgml +++ b/doc/src/sgml/ref/pgupgrade.sgml @@ -244,7 +244,8 @@ PostgreSQL documentation <listitem> <para> Copy files to the new cluster. This is the default. (See also - <option>--link</option> and <option>--clone</option>.) + <option>--link</option>, <option>--clone</option>, + <option>--copy-file-range</option>, and <option>--swap</option>.) </para> </listitem> </varlistentry> @@ -263,6 +264,32 @@ PostgreSQL documentation </varlistentry> <varlistentry> + <term><option>--swap</option></term> + <listitem> + <para> + Move the data directories from the old cluster to the new cluster. + Then, replace the catalog files with those generated for the new + cluster. This mode can outperform <option>--link</option>, + <option>--clone</option>, <option>--copy</option>, and + <option>--copy-file-range</option>, especially on clusters with many + relations. + </para> + <para> + However, this mode creates many garbage files in the old cluster, which + can prolong the file synchronization step if + <option>--sync-method=syncfs</option> is used. Therefore, it is + recommended to use <option>--sync-method=fsync</option> with + <option>--swap</option>. + </para> + <para> + Additionally, once the file transfer step begins, the old cluster will + be destructively modified and therefore will no longer be safe to + start. See <xref linkend="pgupgrade-step-revert"/> for details. + </para> + </listitem> + </varlistentry> + + <varlistentry> <term><option>--sync-method=</option><replaceable>method</replaceable></term> <listitem> <para> @@ -530,6 +557,10 @@ NET STOP postgresql-&majorversion; is started. Clone mode also requires that the old and new data directories be in the same file system. This mode is only available on certain operating systems and file systems. + Swap mode may be the fastest if there are many relations, but you will not + be able to access your old cluster once the file transfer step begins. + Swap mode also requires that the old and new cluster data directories be + in the same file system. </para> <para> @@ -889,6 +920,32 @@ psql --username=postgres --file=script.sql postgres </itemizedlist></para> </listitem> + + <listitem> + <para> + If the <option>--swap</option> option was used, the old cluster might + be destructively modified: + + <itemizedlist> + <listitem> + <para> + If <command>pg_upgrade</command> aborts before reporting that the + old cluster is no longer safe to start, the old cluster was + unmodified; it can be restarted. + </para> + </listitem> + + <listitem> + <para> + If <command>pg_upgrade</command> has reported that the old cluster + is no longer safe to start, the old cluster was destructively + modified. The old cluster will need to be restored from backup in + this case. + </para> + </listitem> + </itemizedlist> + </para> + </listitem> </itemizedlist></para> </step> </procedure> diff --git a/src/bin/pg_upgrade/TESTING b/src/bin/pg_upgrade/TESTING index 00842ac6ec3..c3d463c9c29 100644 --- a/src/bin/pg_upgrade/TESTING +++ b/src/bin/pg_upgrade/TESTING @@ -20,13 +20,13 @@ export oldinstall=...otherversion/ (old version's install base path) See DETAILS below for more information about creation of the dump. You can also test the different transfer modes (--copy, --link, ---clone, --copy-file-range) by setting the environment variable +--clone, --copy-file-range, --swap) by setting the environment variable PG_TEST_PG_UPGRADE_MODE to the respective command-line option, like make check PG_TEST_PG_UPGRADE_MODE=--link -The default is --copy. Note that the other modes are not supported on -all operating systems. +The default is --copy. Note that not all modes are supported on all +operating systems. DETAILS ------- diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index 117f461d46a..02d9146e5ed 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -709,7 +709,34 @@ check_new_cluster(void) check_copy_file_range(); break; case TRANSFER_MODE_LINK: - check_hard_link(); + check_hard_link(TRANSFER_MODE_LINK); + break; + case TRANSFER_MODE_SWAP: + + /* + * We do the hard link check for --swap, too, since it's an easy + * way to verify the clusters are in the same file system. This + * allows us to take some shortcuts in the file synchronization + * step. With some more effort, we could probably support the + * separate-file-system use case, but this mode is unlikely to + * offer much benefit if we have to copy the files across file + * system boundaries. + */ + check_hard_link(TRANSFER_MODE_SWAP); + + /* + * There are a few known issues with using --swap to upgrade from + * versions older than 10. For example, the sequence tuple format + * changed in v10, and the visibility map format changed in 9.6. + * While such problems are not insurmountable (and we may have to + * deal with similar problems in the future, anyway), it doesn't + * seem worth the effort to support swap mode for upgrades from + * long-unsupported versions. + */ + if (GET_MAJOR_VERSION(old_cluster.major_version) < 1000) + pg_fatal("Swap mode can only upgrade clusters from PostgreSQL version %s and later.", + "10"); + break; } diff --git a/src/bin/pg_upgrade/controldata.c b/src/bin/pg_upgrade/controldata.c index bd49ea867bf..47ee27ec835 100644 --- a/src/bin/pg_upgrade/controldata.c +++ b/src/bin/pg_upgrade/controldata.c @@ -751,7 +751,7 @@ check_control_data(ControlData *oldctrl, void -disable_old_cluster(void) +disable_old_cluster(transferMode transfer_mode) { char old_path[MAXPGPATH], new_path[MAXPGPATH]; @@ -766,10 +766,17 @@ disable_old_cluster(void) old_path, new_path); check_ok(); - pg_log(PG_REPORT, "\n" - "If you want to start the old cluster, you will need to remove\n" - "the \".old\" suffix from %s/global/pg_control.old.\n" - "Because \"link\" mode was used, the old cluster cannot be safely\n" - "started once the new cluster has been started.", - old_cluster.pgdata); + if (transfer_mode == TRANSFER_MODE_LINK) + pg_log(PG_REPORT, "\n" + "If you want to start the old cluster, you will need to remove\n" + "the \".old\" suffix from %s/global/pg_control.old.\n" + "Because \"link\" mode was used, the old cluster cannot be safely\n" + "started once the new cluster has been started.", + old_cluster.pgdata); + else if (transfer_mode == TRANSFER_MODE_SWAP) + pg_log(PG_REPORT, "\n" + "Because \"swap\" mode was used, the old cluster can no longer be\n" + "safely started."); + else + pg_fatal("unrecognized transfer mode"); } diff --git a/src/bin/pg_upgrade/dump.c b/src/bin/pg_upgrade/dump.c index b8fd0d0acee..23cb08e8347 100644 --- a/src/bin/pg_upgrade/dump.c +++ b/src/bin/pg_upgrade/dump.c @@ -52,9 +52,11 @@ generate_old_dump(void) snprintf(log_file_name, sizeof(log_file_name), DB_DUMP_LOG_FILE_MASK, old_db->db_oid); parallel_exec_prog(log_file_name, NULL, - "\"%s/pg_dump\" %s --no-data %s --sequence-data --quote-all-identifiers " + "\"%s/pg_dump\" %s --no-data %s %s --quote-all-identifiers " "--binary-upgrade --format=custom %s --no-sync --file=\"%s/%s\" %s", new_cluster.bindir, cluster_conn_opts(&old_cluster), + (user_opts.transfer_mode == TRANSFER_MODE_SWAP) ? + "" : "--sequence-data", log_opts.verbose ? "--verbose" : "", user_opts.do_statistics ? "" : "--no-statistics", log_opts.dumpdir, diff --git a/src/bin/pg_upgrade/file.c b/src/bin/pg_upgrade/file.c index 7fd1991204a..91ed16acb08 100644 --- a/src/bin/pg_upgrade/file.c +++ b/src/bin/pg_upgrade/file.c @@ -434,7 +434,7 @@ check_copy_file_range(void) } void -check_hard_link(void) +check_hard_link(transferMode transfer_mode) { char existing_file[MAXPGPATH]; char new_link_file[MAXPGPATH]; @@ -444,8 +444,16 @@ check_hard_link(void) unlink(new_link_file); /* might fail */ if (link(existing_file, new_link_file) < 0) - pg_fatal("could not create hard link between old and new data directories: %m\n" - "In link mode the old and new data directories must be on the same file system."); + { + if (transfer_mode == TRANSFER_MODE_LINK) + pg_fatal("could not create hard link between old and new data directories: %m\n" + "In link mode the old and new data directories must be on the same file system."); + else if (transfer_mode == TRANSFER_MODE_SWAP) + pg_fatal("could not create hard link between old and new data directories: %m\n" + "In swap mode the old and new data directories must be on the same file system."); + else + pg_fatal("unrecognized transfer mode"); + } unlink(new_link_file); } diff --git a/src/bin/pg_upgrade/info.c b/src/bin/pg_upgrade/info.c index ad52de8b607..4b7a56f5b3b 100644 --- a/src/bin/pg_upgrade/info.c +++ b/src/bin/pg_upgrade/info.c @@ -490,7 +490,7 @@ get_rel_infos_query(void) " FROM pg_catalog.pg_class c JOIN pg_catalog.pg_namespace n " " ON c.relnamespace = n.oid " " WHERE relkind IN (" CppAsString2(RELKIND_RELATION) ", " - CppAsString2(RELKIND_MATVIEW) ") AND " + CppAsString2(RELKIND_MATVIEW) "%s) AND " /* exclude possible orphaned temp tables */ " ((n.nspname !~ '^pg_temp_' AND " " n.nspname !~ '^pg_toast_temp_' AND " @@ -499,6 +499,8 @@ get_rel_infos_query(void) " c.oid >= %u::pg_catalog.oid) OR " " (n.nspname = 'pg_catalog' AND " " relname IN ('pg_largeobject') ))), ", + (user_opts.transfer_mode == TRANSFER_MODE_SWAP) ? + ", " CppAsString2(RELKIND_SEQUENCE) : "", FirstNormalObjectId); /* diff --git a/src/bin/pg_upgrade/option.c b/src/bin/pg_upgrade/option.c index 188dd8d8a8b..7fd7f1d33fc 100644 --- a/src/bin/pg_upgrade/option.c +++ b/src/bin/pg_upgrade/option.c @@ -62,6 +62,7 @@ parseCommandLine(int argc, char *argv[]) {"sync-method", required_argument, NULL, 4}, {"no-statistics", no_argument, NULL, 5}, {"set-char-signedness", required_argument, NULL, 6}, + {"swap", no_argument, NULL, 7}, {NULL, 0, NULL, 0} }; @@ -228,6 +229,11 @@ parseCommandLine(int argc, char *argv[]) else pg_fatal("invalid argument for option %s", "--set-char-signedness"); break; + + case 7: + user_opts.transfer_mode = TRANSFER_MODE_SWAP; + break; + default: fprintf(stderr, _("Try \"%s --help\" for more information.\n"), os_info.progname); @@ -325,6 +331,7 @@ usage(void) printf(_(" --no-statistics do not import statistics from old cluster\n")); printf(_(" --set-char-signedness=OPTION set new cluster char signedness to \"signed\" or\n" " \"unsigned\"\n")); + printf(_(" --swap move data directories to new cluster\n")); printf(_(" --sync-method=METHOD set method for syncing files to disk\n")); printf(_(" -?, --help show this help, then exit\n")); printf(_("\n" diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c index 174cd920840..9295e46aed3 100644 --- a/src/bin/pg_upgrade/pg_upgrade.c +++ b/src/bin/pg_upgrade/pg_upgrade.c @@ -170,12 +170,14 @@ main(int argc, char **argv) /* * Most failures happen in create_new_objects(), which has completed at - * this point. We do this here because it is just before linking, which - * will link the old and new cluster data files, preventing the old - * cluster from being safely started once the new cluster is started. + * this point. We do this here because it is just before file transfer, + * which for --link will make it unsafe to start the old cluster once the + * new cluster is started, and for --swap will make it unsafe to start the + * old cluster at all. */ - if (user_opts.transfer_mode == TRANSFER_MODE_LINK) - disable_old_cluster(); + if (user_opts.transfer_mode == TRANSFER_MODE_LINK || + user_opts.transfer_mode == TRANSFER_MODE_SWAP) + disable_old_cluster(user_opts.transfer_mode); transfer_all_new_tablespaces(&old_cluster.dbarr, &new_cluster.dbarr, old_cluster.pgdata, new_cluster.pgdata); @@ -212,8 +214,10 @@ main(int argc, char **argv) { prep_status("Sync data directory to disk"); exec_prog(UTILITY_LOG_FILE, NULL, true, true, - "\"%s/initdb\" --sync-only \"%s\" --sync-method %s", + "\"%s/initdb\" --sync-only %s \"%s\" --sync-method %s", new_cluster.bindir, + (user_opts.transfer_mode == TRANSFER_MODE_SWAP) ? + "--no-sync-data-files" : "", new_cluster.pgdata, user_opts.sync_method); check_ok(); diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index 4c9d0172149..69c965bb7d0 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -262,6 +262,7 @@ typedef enum TRANSFER_MODE_COPY, TRANSFER_MODE_COPY_FILE_RANGE, TRANSFER_MODE_LINK, + TRANSFER_MODE_SWAP, } transferMode; /* @@ -391,7 +392,7 @@ void create_script_for_old_cluster_deletion(char **deletion_script_file_name); void get_control_data(ClusterInfo *cluster); void check_control_data(ControlData *oldctrl, ControlData *newctrl); -void disable_old_cluster(void); +void disable_old_cluster(transferMode transfer_mode); /* dump.c */ @@ -423,7 +424,7 @@ void rewriteVisibilityMap(const char *fromfile, const char *tofile, const char *schemaName, const char *relName); void check_file_clone(void); void check_copy_file_range(void); -void check_hard_link(void); +void check_hard_link(transferMode transfer_mode); /* fopen_priv() is no longer different from fopen() */ #define fopen_priv(path, mode) fopen(path, mode) diff --git a/src/bin/pg_upgrade/relfilenumber.c b/src/bin/pg_upgrade/relfilenumber.c index 8c23c583172..aa205aec51d 100644 --- a/src/bin/pg_upgrade/relfilenumber.c +++ b/src/bin/pg_upgrade/relfilenumber.c @@ -11,11 +11,92 @@ #include <sys/stat.h> +#include "common/file_perm.h" +#include "common/file_utils.h" +#include "common/int.h" +#include "common/logging.h" #include "pg_upgrade.h" static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace); static void transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_frozenbit); +/* + * The following set of sync_queue_* functions are used for --swap to reduce + * the amount of time spent synchronizing the swapped catalog files. When a + * file is added to the queue, we also alert the file system that we'd like it + * to be persisted to disk in the near future (if that operation is supported + * by the current platform). Once the queue is full, all of the files are + * synchronized to disk. This strategy should generally be much faster than + * simply calling fsync() on the files right away. + * + * The general usage pattern should be something like: + * + * for (int i = 0; i < num_files; i++) + * sync_queue_push(files[i]); + * + * // be sure to sync any remaining files in the queue + * sync_queue_sync_all(); + * synq_queue_destroy(); + */ + +#define SYNC_QUEUE_MAX_LEN (1024) + +static char *sync_queue[SYNC_QUEUE_MAX_LEN]; +static bool sync_queue_inited; +static int sync_queue_len; + +static inline void +sync_queue_init(void) +{ + if (sync_queue_inited) + return; + + sync_queue_inited = true; + for (int i = 0; i < SYNC_QUEUE_MAX_LEN; i++) + sync_queue[i] = palloc(MAXPGPATH); +} + +static inline void +sync_queue_sync_all(void) +{ + if (!sync_queue_inited) + return; + + for (int i = 0; i < sync_queue_len; i++) + { + if (fsync_fname(sync_queue[i], false) != 0) + pg_fatal("could not synchronize file \"%s\": %m", sync_queue[i]); + } + + sync_queue_len = 0; +} + +static inline void +sync_queue_push(const char *fname) +{ + sync_queue_init(); + + pre_sync_fname(fname, false); + + strncpy(sync_queue[sync_queue_len++], fname, MAXPGPATH); + if (sync_queue_len >= SYNC_QUEUE_MAX_LEN) + sync_queue_sync_all(); +} + +static inline void +sync_queue_destroy(void) +{ + if (!sync_queue_inited) + return; + + sync_queue_inited = false; + sync_queue_len = 0; + for (int i = 0; i < SYNC_QUEUE_MAX_LEN; i++) + { + pfree(sync_queue[i]); + sync_queue[i] = NULL; + } +} /* * transfer_all_new_tablespaces() @@ -41,6 +122,9 @@ transfer_all_new_tablespaces(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, case TRANSFER_MODE_LINK: prep_status_progress("Linking user relation files"); break; + case TRANSFER_MODE_SWAP: + prep_status_progress("Swapping data directories"); + break; } /* @@ -125,6 +209,287 @@ transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, /* We allocate something even for n_maps == 0 */ pg_free(mappings); } + + /* + * Make sure anything pending synchronization in swap mode is fully + * persisted to disk. This is a no-op for other transfer modes. + */ + sync_queue_sync_all(); + sync_queue_destroy(); +} + +/* + * prepare_for_swap() + * + * This function moves the database directory from the old cluster to the new + * cluster in preparation for moving the pg_restore-generated catalog files + * into place. Returns false if the database with the given OID does not have + * a directory in the given tablespace, otherwise returns true. + * + * This function will return paths in the following variables, which the caller + * must ensure are sized to MAXPGPATH bytes: + * + * old_catalog_dir: The directory for the old cluster's catalog files. + * new_db_dir: The new cluster's database directory for db_oid. + * moved_db_dir: Destination for the pg_restore-generated database directory. + */ +static bool +prepare_for_swap(const char *old_tablespace, Oid db_oid, + char *old_catalog_dir, char *new_db_dir, char *moved_db_dir) +{ + const char *new_tablespace; + const char *old_tblspc_suffix; + const char *new_tblspc_suffix; + char old_tblspc[MAXPGPATH]; + char new_tblspc[MAXPGPATH]; + char moved_tblspc[MAXPGPATH]; + char old_db_dir[MAXPGPATH]; + struct stat st; + + if (strcmp(old_tablespace, old_cluster.pgdata) == 0) + { + new_tablespace = new_cluster.pgdata; + new_tblspc_suffix = "/base"; + old_tblspc_suffix = "/base"; + } + else + { + /* + * XXX: The below line is a hack to deal with the fact that we + * presently don't have an easy way to find the corresponding new + * tablespace's path. This will need to be fixed if/when we add + * pg_upgrade support for in-place tablespaces. + */ + new_tablespace = old_tablespace; + + new_tblspc_suffix = new_cluster.tablespace_suffix; + old_tblspc_suffix = old_cluster.tablespace_suffix; + } + + /* Old and new cluster paths. */ + snprintf(old_tblspc, sizeof(old_tblspc), "%s%s", old_tablespace, old_tblspc_suffix); + snprintf(new_tblspc, sizeof(new_tblspc), "%s%s", new_tablespace, new_tblspc_suffix); + snprintf(old_db_dir, sizeof(old_db_dir), "%s/%u", old_tblspc, db_oid); + snprintf(new_db_dir, MAXPGPATH, "%s/%u", new_tblspc, db_oid); + + /* + * Paths for "moved aside" stuff. We intentionally put these in the old + * cluster so that the delete_old_cluster.{sh,bat} script handles them. + */ + snprintf(moved_tblspc, sizeof(moved_tblspc), "%s/moved_for_upgrade", old_tblspc); + snprintf(old_catalog_dir, MAXPGPATH, "%s/%u_old_catalogs", moved_tblspc, db_oid); + snprintf(moved_db_dir, MAXPGPATH, "%s/%u", moved_tblspc, db_oid); + + /* Check that the database directory exists in the given tablespace. */ + if (stat(old_db_dir, &st) != 0) + { + if (errno != ENOENT) + pg_fatal("could not stat file \"%s\": %m", old_db_dir); + return false; + } + + /* Create directory for stuff that is moved aside. */ + if (pg_mkdir_p(moved_tblspc, pg_dir_create_mode) != 0 && errno != EEXIST) + pg_fatal("could not create directory \"%s\"", moved_tblspc); + + /* Create directory for old catalog files. */ + if (pg_mkdir_p(old_catalog_dir, pg_dir_create_mode) != 0) + pg_fatal("could not create directory \"%s\"", old_catalog_dir); + + /* Move the new cluster's database directory aside. */ + if (rename(new_db_dir, moved_db_dir) != 0) + pg_fatal("could not rename \"%s\" to \"%s\"", new_db_dir, moved_db_dir); + + /* Move the old cluster's database directory into place. */ + if (rename(old_db_dir, new_db_dir) != 0) + pg_fatal("could not rename \"%s\" to \"%s\"", old_db_dir, new_db_dir); + + return true; +} + +/* + * FileNameMapCmp() + * + * qsort() comparator for FileNameMap that sorts by RelFileNumber. + */ +static int +FileNameMapCmp(const void *a, const void *b) +{ + const FileNameMap *map1 = (const FileNameMap *) a; + const FileNameMap *map2 = (const FileNameMap *) b; + + return pg_cmp_u32(map1->relfilenumber, map2->relfilenumber); +} + +/* + * parse_relfilenumber() + * + * Attempt to parse the RelFileNumber of the given file name. If we can't, + * return InvalidRelFileNumber. Note that this code snippet is lifted from + * parse_filename_for_nontemp_relation(). + */ +static RelFileNumber +parse_relfilenumber(const char *filename) +{ + char *endp; + unsigned long n; + + if (filename[0] < '1' || filename[0] > '9') + return InvalidRelFileNumber; + + errno = 0; + n = strtoul(filename, &endp, 10); + if (errno || filename == endp || n <= 0 || n > PG_UINT32_MAX) + return InvalidRelFileNumber; + + return (RelFileNumber) n; +} + +/* + * swap_catalog_files() + * + * Moves the old catalog files aside, and moves the new catalog files into + * place. prepare_for_swap() should have already been called (and returned + * true) for the tablespace/database being transferred. + * + * The arguments for the following parameters should be the corresponding + * variables returned by prepare_for_swap(): + * + * old_catalog_dir: The directory for the old cluster's catalog files. + * new_db_dir: New cluster's database directory (for DB being transferred). + * moved_db_dir: Moved-aside pg_restore-generated database directory. + */ +static void +swap_catalog_files(FileNameMap *maps, int size, const char *old_catalog_dir, + const char *new_db_dir, const char *moved_db_dir) +{ + DIR *dir; + struct dirent *de; + char path[MAXPGPATH]; + char dest[MAXPGPATH]; + RelFileNumber rfn; + + /* Move the old catalog files aside. */ + dir = opendir(new_db_dir); + if (dir == NULL) + pg_fatal("could not open directory \"%s\": %m", new_db_dir); + while (errno = 0, (de = readdir(dir)) != NULL) + { + snprintf(path, sizeof(path), "%s/%s", new_db_dir, de->d_name); + if (get_dirent_type(path, de, false, PG_LOG_ERROR) != PGFILETYPE_REG) + continue; + + rfn = parse_relfilenumber(de->d_name); + if (RelFileNumberIsValid(rfn)) + { + FileNameMap key = {.relfilenumber = rfn}; + + if (bsearch(&key, maps, size, sizeof(FileNameMap), FileNameMapCmp)) + continue; + } + + snprintf(dest, sizeof(dest), "%s/%s", old_catalog_dir, de->d_name); + if (rename(path, dest) != 0) + pg_fatal("could not rename \"%s\" to \"%s\": %m", path, dest); + } + if (errno) + pg_fatal("could not read directory \"%s\": %m", new_db_dir); + (void) closedir(dir); + + /* Move the new catalog files into place. */ + dir = opendir(moved_db_dir); + if (dir == NULL) + pg_fatal("could not open directory \"%s\": %m", moved_db_dir); + while (errno = 0, (de = readdir(dir)) != NULL) + { + snprintf(path, sizeof(path), "%s/%s", moved_db_dir, de->d_name); + if (get_dirent_type(path, de, false, PG_LOG_ERROR) != PGFILETYPE_REG) + continue; + + rfn = parse_relfilenumber(de->d_name); + if (RelFileNumberIsValid(rfn)) + { + FileNameMap key = {.relfilenumber = rfn}; + + if (bsearch(&key, maps, size, sizeof(FileNameMap), FileNameMapCmp)) + continue; + } + + snprintf(dest, sizeof(dest), "%s/%s", new_db_dir, de->d_name); + if (rename(path, dest) != 0) + pg_fatal("could not rename \"%s\" to \"%s\": %m", path, dest); + + /* + * We don't fsync() the database files in the file synchronization + * stage of pg_upgrade in swap mode, so we need to synchronize them + * ourselves. We only do this for the catalog files because they were + * created during pg_restore with fsync=off. We assume that the user + * data files files were properly persisted to disk when the user last + * shut it down. + */ + if (user_opts.do_sync) + sync_queue_push(dest); + } + if (errno) + pg_fatal("could not read directory \"%s\": %m", moved_db_dir); + (void) closedir(dir); + + /* Ensure the directory entries are persisted to disk. */ + if (fsync_fname(new_db_dir, true) != 0) + pg_fatal("could not synchronize directory \"%s\": %m", new_db_dir); + if (fsync_parent_path(new_db_dir) != 0) + pg_fatal("could not synchronize parent directory of \"%s\": %m", new_db_dir); +} + +/* + * do_swap() + * + * Perform the required steps for --swap for a single database. In short this + * moves the old cluster's database directory into the new cluster and then + * replaces any files for system catalogs with the ones that were generated + * during pg_restore. + */ +static void +do_swap(FileNameMap *maps, int size, char *old_tablespace) +{ + char old_catalog_dir[MAXPGPATH]; + char new_db_dir[MAXPGPATH]; + char moved_db_dir[MAXPGPATH]; + + /* + * We perform many lookups on maps by relfilenumber in swap mode, so make + * sure it's sorted by relfilenumber. maps should already be sorted by + * OID, so in general this shouldn't have much work to do. + */ + qsort(maps, size, sizeof(FileNameMap), FileNameMapCmp); + + /* + * If an old tablespace is given, we only need to process that one. If no + * old tablespace is specified, we need to process all the tablespaces on + * the system. + */ + if (old_tablespace) + { + if (prepare_for_swap(old_tablespace, maps[0].db_oid, + old_catalog_dir, new_db_dir, moved_db_dir)) + swap_catalog_files(maps, size, + old_catalog_dir, new_db_dir, moved_db_dir); + } + else + { + if (prepare_for_swap(old_cluster.pgdata, maps[0].db_oid, + old_catalog_dir, new_db_dir, moved_db_dir)) + swap_catalog_files(maps, size, + old_catalog_dir, new_db_dir, moved_db_dir); + + for (int tblnum = 0; tblnum < os_info.num_old_tablespaces; tblnum++) + { + if (prepare_for_swap(os_info.old_tablespaces[tblnum], maps[0].db_oid, + old_catalog_dir, new_db_dir, moved_db_dir)) + swap_catalog_files(maps, size, + old_catalog_dir, new_db_dir, moved_db_dir); + } + } } /* @@ -145,6 +510,20 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace) new_cluster.controldata.cat_ver >= VISIBILITY_MAP_FROZEN_BIT_CAT_VER) vm_must_add_frozenbit = true; + /* --swap has its own subroutine */ + if (user_opts.transfer_mode == TRANSFER_MODE_SWAP) + { + /* + * We don't support --swap to upgrade from versions that require + * rewriting the visibility map. We should've failed already if + * someone tries to do that. + */ + Assert(!vm_must_add_frozenbit); + + do_swap(maps, size, old_tablespace); + return; + } + for (mapnum = 0; mapnum < size; mapnum++) { if (old_tablespace == NULL || @@ -259,6 +638,11 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro pg_log(PG_VERBOSE, "linking \"%s\" to \"%s\"", old_file, new_file); linkFile(old_file, new_file, map->nspname, map->relname); + break; + case TRANSFER_MODE_SWAP: + /* swap mode is handled in its own code path */ + pg_fatal("should never happen"); + break; } } } diff --git a/src/bin/pg_upgrade/t/006_transfer_modes.pl b/src/bin/pg_upgrade/t/006_transfer_modes.pl index 518e0994145..34fddbcdab5 100644 --- a/src/bin/pg_upgrade/t/006_transfer_modes.pl +++ b/src/bin/pg_upgrade/t/006_transfer_modes.pl @@ -16,6 +16,15 @@ sub test_mode my $old = PostgreSQL::Test::Cluster->new('old', install_path => $ENV{oldinstall}); my $new = PostgreSQL::Test::Cluster->new('new'); + # --swap can't be used to upgrade from versions older than 10, so just skip + # the test if the old cluster version is too old. + if ($old->pg_version < 10 && $mode eq "--swap") + { + $old->clean_node(); + $new->clean_node(); + return; + } + if (defined($ENV{oldinstall})) { # Checksums are now enabled by default, but weren't before 18, so pass @@ -97,5 +106,6 @@ test_mode('--clone'); test_mode('--copy'); test_mode('--copy-file-range'); test_mode('--link'); +test_mode('--swap'); done_testing(); diff --git a/src/common/file_utils.c b/src/common/file_utils.c index 1e6250cc190..7b62687a2aa 100644 --- a/src/common/file_utils.c +++ b/src/common/file_utils.c @@ -45,9 +45,6 @@ */ #define MINIMUM_VERSION_FOR_PG_WAL 100000 -#ifdef PG_FLUSH_DATA_WORKS -static int pre_sync_fname(const char *fname, bool isdir); -#endif static void walkdir(const char *path, int (*action) (const char *fname, bool isdir), bool process_symlinks, @@ -352,16 +349,16 @@ walkdir(const char *path, } /* - * Hint to the OS that it should get ready to fsync() this file. + * Hint to the OS that it should get ready to fsync() this file, if supported + * by the platform. * * Ignores errors trying to open unreadable files, and reports other errors * non-fatally. */ -#ifdef PG_FLUSH_DATA_WORKS - -static int +int pre_sync_fname(const char *fname, bool isdir) { +#ifdef PG_FLUSH_DATA_WORKS int fd; fd = open(fname, O_RDONLY | PG_BINARY, 0); @@ -388,11 +385,10 @@ pre_sync_fname(const char *fname, bool isdir) #endif (void) close(fd); +#endif /* PG_FLUSH_DATA_WORKS */ return 0; } -#endif /* PG_FLUSH_DATA_WORKS */ - /* * fsync_fname -- Try to fsync a file or directory * diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h index 8274bc877ab..9fd88953e43 100644 --- a/src/include/common/file_utils.h +++ b/src/include/common/file_utils.h @@ -33,6 +33,7 @@ typedef enum DataDirSyncMethod struct iovec; /* avoid including port/pg_iovec.h here */ #ifdef FRONTEND +extern int pre_sync_fname(const char *fname, bool isdir); extern int fsync_fname(const char *fname, bool isdir); extern void sync_pgdata(const char *pg_data, int serverVersion, DataDirSyncMethod sync_method, bool sync_data_files); |