From df220714e50f2e924c57f4a8d9f578cf6c92a73f Mon Sep 17 00:00:00 2001 From: Noah Misch Date: Thu, 1 Feb 2024 13:44:19 -0800 Subject: Handle interleavings between CREATE DATABASE steps and base backup. Restoring a base backup taken in the middle of CreateDirAndVersionFile() or write_relmap_file() would lose the function's effects. The symptom was absence of the database directory, PG_VERSION file, or pg_filenode.map. If missing the directory, recovery would fail. Either missing file would not fail recovery but would render the new database unusable. Fix CreateDirAndVersionFile() with the transam/README "action first and then write a WAL entry" strategy. That has a side benefit of moving filesystem mutations out of a critical section, reducing the ways to PANIC. Fix the write_relmap_file() call with a lock acquisition, so it interacts with checkpoints like non-CREATE DATABASE calls do. Back-patch to v15, where commit 9c08aea6a3090a396be334cc58c511edab05776a introduced STRATEGY=WAL_LOG and made it the default. Discussion: https://postgr.es/m/20240130195003.0a.nmisch@google.com --- src/backend/commands/dbcommands.c | 44 ++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 26 deletions(-) (limited to 'src/backend/commands/dbcommands.c') diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 0a97a113144..dc420acae92 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -462,35 +462,12 @@ CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid, bool isRedo) char buf[16]; /* - * Prepare version data before starting a critical section. - * - * Note that we don't have to copy this from the source database; there's - * only one legal value. + * Note that we don't have to copy version data from the source database; + * there's only one legal value. */ sprintf(buf, "%s\n", PG_MAJORVERSION); nbytes = strlen(PG_MAJORVERSION) + 1; - /* If we are not in WAL replay then write the WAL. */ - if (!isRedo) - { - xl_dbase_create_wal_log_rec xlrec; - XLogRecPtr lsn; - - START_CRIT_SECTION(); - - xlrec.db_id = dbid; - xlrec.tablespace_id = tsid; - - XLogBeginInsert(); - XLogRegisterData((char *) (&xlrec), - sizeof(xl_dbase_create_wal_log_rec)); - - lsn = XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE_WAL_LOG); - - /* As always, WAL must hit the disk before the data update does. */ - XLogFlush(lsn); - } - /* Create database directory. */ if (MakePGDirectory(dbpath) < 0) { @@ -534,9 +511,24 @@ CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid, bool isRedo) /* Close the version file. */ CloseTransientFile(fd); - /* Critical section done. */ + /* If we are not in WAL replay then write the WAL. */ if (!isRedo) + { + xl_dbase_create_wal_log_rec xlrec; + + START_CRIT_SECTION(); + + xlrec.db_id = dbid; + xlrec.tablespace_id = tsid; + + XLogBeginInsert(); + XLogRegisterData((char *) (&xlrec), + sizeof(xl_dbase_create_wal_log_rec)); + + (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE_WAL_LOG); + END_CRIT_SECTION(); + } } /* -- cgit v1.2.3