diff options
author | Robert Haas <rhaas@postgresql.org> | 2022-03-29 11:31:43 -0400 |
---|---|---|
committer | Robert Haas <rhaas@postgresql.org> | 2022-03-29 11:48:36 -0400 |
commit | 9c08aea6a3090a396be334cc58c511edab05776a (patch) | |
tree | c15e6e9fa45a18173a5bbd67ff4a4c889e616cde /src/backend/access | |
parent | bf902c13930c268388644100663f2998868b6e85 (diff) | |
download | postgresql-9c08aea6a3090a396be334cc58c511edab05776a.tar.gz postgresql-9c08aea6a3090a396be334cc58c511edab05776a.zip |
Add new block-by-block strategy for CREATE DATABASE.
Because this strategy logs changes on a block-by-block basis, it
avoids the need to checkpoint before and after the operation.
However, because it logs each changed block individually, it might
generate a lot of extra write-ahead logging if the template database
is large. Therefore, the older strategy remains available via a new
STRATEGY parameter to CREATE DATABASE, and a corresponding --strategy
option to createdb.
Somewhat controversially, this patch assembles the list of relations
to be copied to the new database by reading the pg_class relation of
the template database. Cross-database access like this isn't normally
possible, but it can be made to work here because there can't be any
connections to the database being copied, nor can it contain any
in-doubt transactions. Even so, we have to use lower-level interfaces
than normal, since the table scan and relcache interfaces will not
work for a database to which we're not connected. The advantage of
this approach is that we do not need to rely on the filesystem to
determine what ought to be copied, but instead on PostgreSQL's own
knowledge of the database structure. This avoids, for example,
copying stray files that happen to be located in the source database
directory.
Dilip Kumar, with a fairly large number of cosmetic changes by me.
Reviewed and tested by Ashutosh Sharma, Andres Freund, John Naylor,
Greg Nancarrow, Neha Sharma. Additional feedback from Bruce Momjian,
Heikki Linnakangas, Julien Rouhaud, Adam Brusselback, Kyotaro
Horiguchi, Tomas Vondra, Andrew Dunstan, Álvaro Herrera, and others.
Discussion: http://postgr.es/m/CA+TgmoYtcdxBjLh31DLxUXHxFVMPGzrU5_T=CYCvRyFHywSBUQ@mail.gmail.com
Diffstat (limited to 'src/backend/access')
-rw-r--r-- | src/backend/access/heap/heapam_handler.c | 6 | ||||
-rw-r--r-- | src/backend/access/nbtree/nbtree.c | 2 | ||||
-rw-r--r-- | src/backend/access/rmgrdesc/dbasedesc.c | 20 | ||||
-rw-r--r-- | src/backend/access/transam/xlogutils.c | 6 |
4 files changed, 23 insertions, 11 deletions
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 39ef8a0b77d..dee264e8596 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -593,7 +593,7 @@ heapam_relation_set_new_filenode(Relation rel, */ *minmulti = GetOldestMultiXactId(); - srel = RelationCreateStorage(*newrnode, persistence); + srel = RelationCreateStorage(*newrnode, persistence, true); /* * If required, set up an init fork for an unlogged table so that it can @@ -601,7 +601,7 @@ heapam_relation_set_new_filenode(Relation rel, * even if the page has been logged, because the write did not go through * shared_buffers and therefore a concurrent checkpoint may have moved the * redo pointer past our xlog record. Recovery may as well remove it - * while replaying, for example, XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE + * while replaying, for example, XLOG_DBASE_CREATE* or XLOG_TBLSPC_CREATE * record. Therefore, logging is necessary even if wal_level=minimal. */ if (persistence == RELPERSISTENCE_UNLOGGED) @@ -645,7 +645,7 @@ heapam_relation_copy_data(Relation rel, const RelFileNode *newrnode) * NOTE: any conflict in relfilenode value will be caught in * RelationCreateStorage(). */ - RelationCreateStorage(*newrnode, rel->rd_rel->relpersistence); + RelationCreateStorage(*newrnode, rel->rd_rel->relpersistence, true); /* copy main fork */ RelationCopyStorage(RelationGetSmgr(rel), dstrel, MAIN_FORKNUM, diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index c9b4964c1e8..dacf3f7a587 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -161,7 +161,7 @@ btbuildempty(Relation index) * Write the page and log it. It might seem that an immediate sync would * be sufficient to guarantee that the file exists on disk, but recovery * itself might remove it while replaying, for example, an - * XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we need + * XLOG_DBASE_CREATE* or XLOG_TBLSPC_CREATE record. Therefore, we need * this even when wal_level=minimal. */ PageSetChecksumInplace(metapage, BTREE_METAPAGE); diff --git a/src/backend/access/rmgrdesc/dbasedesc.c b/src/backend/access/rmgrdesc/dbasedesc.c index 03af3fdbcfd..523d0b3c1da 100644 --- a/src/backend/access/rmgrdesc/dbasedesc.c +++ b/src/backend/access/rmgrdesc/dbasedesc.c @@ -24,14 +24,23 @@ dbase_desc(StringInfo buf, XLogReaderState *record) char *rec = XLogRecGetData(record); uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; - if (info == XLOG_DBASE_CREATE) + if (info == XLOG_DBASE_CREATE_FILE_COPY) { - xl_dbase_create_rec *xlrec = (xl_dbase_create_rec *) rec; + xl_dbase_create_file_copy_rec *xlrec = + (xl_dbase_create_file_copy_rec *) rec; appendStringInfo(buf, "copy dir %u/%u to %u/%u", xlrec->src_tablespace_id, xlrec->src_db_id, xlrec->tablespace_id, xlrec->db_id); } + else if (info == XLOG_DBASE_CREATE_WAL_LOG) + { + xl_dbase_create_wal_log_rec *xlrec = + (xl_dbase_create_wal_log_rec *) rec; + + appendStringInfo(buf, "create dir %u/%u", + xlrec->tablespace_id, xlrec->db_id); + } else if (info == XLOG_DBASE_DROP) { xl_dbase_drop_rec *xlrec = (xl_dbase_drop_rec *) rec; @@ -51,8 +60,11 @@ dbase_identify(uint8 info) switch (info & ~XLR_INFO_MASK) { - case XLOG_DBASE_CREATE: - id = "CREATE"; + case XLOG_DBASE_CREATE_FILE_COPY: + id = "CREATE_FILE_COPY"; + break; + case XLOG_DBASE_CREATE_WAL_LOG: + id = "CREATE_WAL_LOG"; break; case XLOG_DBASE_DROP: id = "DROP"; diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index 511f2f186f5..a4dedc58b71 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -484,7 +484,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, { /* page exists in file */ buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno, - mode, NULL); + mode, NULL, true); } else { @@ -509,7 +509,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, ReleaseBuffer(buffer); } buffer = ReadBufferWithoutRelcache(rnode, forknum, - P_NEW, mode, NULL); + P_NEW, mode, NULL, true); } while (BufferGetBlockNumber(buffer) < blkno); /* Handle the corner case that P_NEW returns non-consecutive pages */ @@ -519,7 +519,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno, - mode, NULL); + mode, NULL, true); } } |