aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access
diff options
context:
space:
mode:
authorRobert Haas <rhaas@postgresql.org>2022-03-29 11:31:43 -0400
committerRobert Haas <rhaas@postgresql.org>2022-03-29 11:48:36 -0400
commit9c08aea6a3090a396be334cc58c511edab05776a (patch)
treec15e6e9fa45a18173a5bbd67ff4a4c889e616cde /src/backend/access
parentbf902c13930c268388644100663f2998868b6e85 (diff)
downloadpostgresql-9c08aea6a3090a396be334cc58c511edab05776a.tar.gz
postgresql-9c08aea6a3090a396be334cc58c511edab05776a.zip
Add new block-by-block strategy for CREATE DATABASE.
Because this strategy logs changes on a block-by-block basis, it avoids the need to checkpoint before and after the operation. However, because it logs each changed block individually, it might generate a lot of extra write-ahead logging if the template database is large. Therefore, the older strategy remains available via a new STRATEGY parameter to CREATE DATABASE, and a corresponding --strategy option to createdb. Somewhat controversially, this patch assembles the list of relations to be copied to the new database by reading the pg_class relation of the template database. Cross-database access like this isn't normally possible, but it can be made to work here because there can't be any connections to the database being copied, nor can it contain any in-doubt transactions. Even so, we have to use lower-level interfaces than normal, since the table scan and relcache interfaces will not work for a database to which we're not connected. The advantage of this approach is that we do not need to rely on the filesystem to determine what ought to be copied, but instead on PostgreSQL's own knowledge of the database structure. This avoids, for example, copying stray files that happen to be located in the source database directory. Dilip Kumar, with a fairly large number of cosmetic changes by me. Reviewed and tested by Ashutosh Sharma, Andres Freund, John Naylor, Greg Nancarrow, Neha Sharma. Additional feedback from Bruce Momjian, Heikki Linnakangas, Julien Rouhaud, Adam Brusselback, Kyotaro Horiguchi, Tomas Vondra, Andrew Dunstan, Álvaro Herrera, and others. Discussion: http://postgr.es/m/CA+TgmoYtcdxBjLh31DLxUXHxFVMPGzrU5_T=CYCvRyFHywSBUQ@mail.gmail.com
Diffstat (limited to 'src/backend/access')
-rw-r--r--src/backend/access/heap/heapam_handler.c6
-rw-r--r--src/backend/access/nbtree/nbtree.c2
-rw-r--r--src/backend/access/rmgrdesc/dbasedesc.c20
-rw-r--r--src/backend/access/transam/xlogutils.c6
4 files changed, 23 insertions, 11 deletions
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 39ef8a0b77d..dee264e8596 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -593,7 +593,7 @@ heapam_relation_set_new_filenode(Relation rel,
*/
*minmulti = GetOldestMultiXactId();
- srel = RelationCreateStorage(*newrnode, persistence);
+ srel = RelationCreateStorage(*newrnode, persistence, true);
/*
* If required, set up an init fork for an unlogged table so that it can
@@ -601,7 +601,7 @@ heapam_relation_set_new_filenode(Relation rel,
* even if the page has been logged, because the write did not go through
* shared_buffers and therefore a concurrent checkpoint may have moved the
* redo pointer past our xlog record. Recovery may as well remove it
- * while replaying, for example, XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE
+ * while replaying, for example, XLOG_DBASE_CREATE* or XLOG_TBLSPC_CREATE
* record. Therefore, logging is necessary even if wal_level=minimal.
*/
if (persistence == RELPERSISTENCE_UNLOGGED)
@@ -645,7 +645,7 @@ heapam_relation_copy_data(Relation rel, const RelFileNode *newrnode)
* NOTE: any conflict in relfilenode value will be caught in
* RelationCreateStorage().
*/
- RelationCreateStorage(*newrnode, rel->rd_rel->relpersistence);
+ RelationCreateStorage(*newrnode, rel->rd_rel->relpersistence, true);
/* copy main fork */
RelationCopyStorage(RelationGetSmgr(rel), dstrel, MAIN_FORKNUM,
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index c9b4964c1e8..dacf3f7a587 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -161,7 +161,7 @@ btbuildempty(Relation index)
* Write the page and log it. It might seem that an immediate sync would
* be sufficient to guarantee that the file exists on disk, but recovery
* itself might remove it while replaying, for example, an
- * XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we need
+ * XLOG_DBASE_CREATE* or XLOG_TBLSPC_CREATE record. Therefore, we need
* this even when wal_level=minimal.
*/
PageSetChecksumInplace(metapage, BTREE_METAPAGE);
diff --git a/src/backend/access/rmgrdesc/dbasedesc.c b/src/backend/access/rmgrdesc/dbasedesc.c
index 03af3fdbcfd..523d0b3c1da 100644
--- a/src/backend/access/rmgrdesc/dbasedesc.c
+++ b/src/backend/access/rmgrdesc/dbasedesc.c
@@ -24,14 +24,23 @@ dbase_desc(StringInfo buf, XLogReaderState *record)
char *rec = XLogRecGetData(record);
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
- if (info == XLOG_DBASE_CREATE)
+ if (info == XLOG_DBASE_CREATE_FILE_COPY)
{
- xl_dbase_create_rec *xlrec = (xl_dbase_create_rec *) rec;
+ xl_dbase_create_file_copy_rec *xlrec =
+ (xl_dbase_create_file_copy_rec *) rec;
appendStringInfo(buf, "copy dir %u/%u to %u/%u",
xlrec->src_tablespace_id, xlrec->src_db_id,
xlrec->tablespace_id, xlrec->db_id);
}
+ else if (info == XLOG_DBASE_CREATE_WAL_LOG)
+ {
+ xl_dbase_create_wal_log_rec *xlrec =
+ (xl_dbase_create_wal_log_rec *) rec;
+
+ appendStringInfo(buf, "create dir %u/%u",
+ xlrec->tablespace_id, xlrec->db_id);
+ }
else if (info == XLOG_DBASE_DROP)
{
xl_dbase_drop_rec *xlrec = (xl_dbase_drop_rec *) rec;
@@ -51,8 +60,11 @@ dbase_identify(uint8 info)
switch (info & ~XLR_INFO_MASK)
{
- case XLOG_DBASE_CREATE:
- id = "CREATE";
+ case XLOG_DBASE_CREATE_FILE_COPY:
+ id = "CREATE_FILE_COPY";
+ break;
+ case XLOG_DBASE_CREATE_WAL_LOG:
+ id = "CREATE_WAL_LOG";
break;
case XLOG_DBASE_DROP:
id = "DROP";
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c
index 511f2f186f5..a4dedc58b71 100644
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -484,7 +484,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
{
/* page exists in file */
buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno,
- mode, NULL);
+ mode, NULL, true);
}
else
{
@@ -509,7 +509,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
ReleaseBuffer(buffer);
}
buffer = ReadBufferWithoutRelcache(rnode, forknum,
- P_NEW, mode, NULL);
+ P_NEW, mode, NULL, true);
}
while (BufferGetBlockNumber(buffer) < blkno);
/* Handle the corner case that P_NEW returns non-consecutive pages */
@@ -519,7 +519,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno,
- mode, NULL);
+ mode, NULL, true);
}
}