aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/cache
diff options
context:
space:
mode:
authorRobert Haas <rhaas@postgresql.org>2022-03-29 11:31:43 -0400
committerRobert Haas <rhaas@postgresql.org>2022-03-29 11:48:36 -0400
commit9c08aea6a3090a396be334cc58c511edab05776a (patch)
treec15e6e9fa45a18173a5bbd67ff4a4c889e616cde /src/backend/utils/cache
parentbf902c13930c268388644100663f2998868b6e85 (diff)
downloadpostgresql-9c08aea6a3090a396be334cc58c511edab05776a.tar.gz
postgresql-9c08aea6a3090a396be334cc58c511edab05776a.zip
Add new block-by-block strategy for CREATE DATABASE.
Because this strategy logs changes on a block-by-block basis, it avoids the need to checkpoint before and after the operation. However, because it logs each changed block individually, it might generate a lot of extra write-ahead logging if the template database is large. Therefore, the older strategy remains available via a new STRATEGY parameter to CREATE DATABASE, and a corresponding --strategy option to createdb. Somewhat controversially, this patch assembles the list of relations to be copied to the new database by reading the pg_class relation of the template database. Cross-database access like this isn't normally possible, but it can be made to work here because there can't be any connections to the database being copied, nor can it contain any in-doubt transactions. Even so, we have to use lower-level interfaces than normal, since the table scan and relcache interfaces will not work for a database to which we're not connected. The advantage of this approach is that we do not need to rely on the filesystem to determine what ought to be copied, but instead on PostgreSQL's own knowledge of the database structure. This avoids, for example, copying stray files that happen to be located in the source database directory. Dilip Kumar, with a fairly large number of cosmetic changes by me. Reviewed and tested by Ashutosh Sharma, Andres Freund, John Naylor, Greg Nancarrow, Neha Sharma. Additional feedback from Bruce Momjian, Heikki Linnakangas, Julien Rouhaud, Adam Brusselback, Kyotaro Horiguchi, Tomas Vondra, Andrew Dunstan, Álvaro Herrera, and others. Discussion: http://postgr.es/m/CA+TgmoYtcdxBjLh31DLxUXHxFVMPGzrU5_T=CYCvRyFHywSBUQ@mail.gmail.com
Diffstat (limited to 'src/backend/utils/cache')
-rw-r--r--src/backend/utils/cache/relcache.c2
-rw-r--r--src/backend/utils/cache/relmapper.c64
2 files changed, 65 insertions, 1 deletions
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index d47fac7bb98..a15ce9edb13 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -3746,7 +3746,7 @@ RelationSetNewRelfilenode(Relation relation, char persistence)
/* handle these directly, at least for now */
SMgrRelation srel;
- srel = RelationCreateStorage(newrnode, persistence);
+ srel = RelationCreateStorage(newrnode, persistence, true);
smgrclose(srel);
}
else
diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c
index 4d0718f0018..dee3387d026 100644
--- a/src/backend/utils/cache/relmapper.c
+++ b/src/backend/utils/cache/relmapper.c
@@ -252,6 +252,63 @@ RelationMapFilenodeToOid(Oid filenode, bool shared)
}
/*
+ * RelationMapOidToFilenodeForDatabase
+ *
+ * Like RelationMapOidToFilenode, but reads the mapping from the indicated
+ * path instead of using the one for the current database.
+ */
+Oid
+RelationMapOidToFilenodeForDatabase(char *dbpath, Oid relationId)
+{
+ RelMapFile map;
+ int i;
+
+ /* Read the relmap file from the source database. */
+ read_relmap_file(&map, dbpath, false, ERROR);
+
+ /* Iterate over the relmap entries to find the input relation OID. */
+ for (i = 0; i < map.num_mappings; i++)
+ {
+ if (relationId == map.mappings[i].mapoid)
+ return map.mappings[i].mapfilenode;
+ }
+
+ return InvalidOid;
+}
+
+/*
+ * RelationMapCopy
+ *
+ * Copy relmapfile from source db path to the destination db path and WAL log
+ * the operation. This is intended for use in creating a new relmap file
+ * for a database that doesn't have one yet, not for replacing an existing
+ * relmap file.
+ */
+void
+RelationMapCopy(Oid dbid, Oid tsid, char *srcdbpath, char *dstdbpath)
+{
+ RelMapFile map;
+
+ /*
+ * Read the relmap file from the source database.
+ */
+ read_relmap_file(&map, srcdbpath, false, ERROR);
+
+ /*
+ * Write the same data into the destination database's relmap file.
+ *
+ * No sinval is needed because no one can be connected to the destination
+ * database yet. For the same reason, there is no need to acquire
+ * RelationMappingLock.
+ *
+ * There's no point in trying to preserve files here. The new database
+ * isn't usable yet anyway, and won't ever be if we can't install a
+ * relmap file.
+ */
+ write_relmap_file(&map, true, false, false, dbid, tsid, dstdbpath);
+}
+
+/*
* RelationMapUpdateMap
*
* Install a new relfilenode mapping for the specified relation.
@@ -1031,6 +1088,13 @@ relmap_redo(XLogReaderState *record)
*
* There shouldn't be anyone else updating relmaps during WAL replay,
* but grab the lock to interlock against load_relmap_file().
+ *
+ * Note that we use the same WAL record for updating the relmap of
+ * an existing database as we do for creating a new database. In
+ * the latter case, taking the relmap log and sending sinval messages
+ * is unnecessary, but harmless. If we wanted to avoid it, we could
+ * add a flag to the WAL record to indicate which opration is being
+ * performed.
*/
LWLockAcquire(RelationMappingLock, LW_EXCLUSIVE);
write_relmap_file(&newmap, false, true, false,