diff options
author | Robert Haas <rhaas@postgresql.org> | 2022-03-29 11:31:43 -0400 |
---|---|---|
committer | Robert Haas <rhaas@postgresql.org> | 2022-03-29 11:48:36 -0400 |
commit | 9c08aea6a3090a396be334cc58c511edab05776a (patch) | |
tree | c15e6e9fa45a18173a5bbd67ff4a4c889e616cde /src/backend/utils/cache | |
parent | bf902c13930c268388644100663f2998868b6e85 (diff) | |
download | postgresql-9c08aea6a3090a396be334cc58c511edab05776a.tar.gz postgresql-9c08aea6a3090a396be334cc58c511edab05776a.zip |
Add new block-by-block strategy for CREATE DATABASE.
Because this strategy logs changes on a block-by-block basis, it
avoids the need to checkpoint before and after the operation.
However, because it logs each changed block individually, it might
generate a lot of extra write-ahead logging if the template database
is large. Therefore, the older strategy remains available via a new
STRATEGY parameter to CREATE DATABASE, and a corresponding --strategy
option to createdb.
Somewhat controversially, this patch assembles the list of relations
to be copied to the new database by reading the pg_class relation of
the template database. Cross-database access like this isn't normally
possible, but it can be made to work here because there can't be any
connections to the database being copied, nor can it contain any
in-doubt transactions. Even so, we have to use lower-level interfaces
than normal, since the table scan and relcache interfaces will not
work for a database to which we're not connected. The advantage of
this approach is that we do not need to rely on the filesystem to
determine what ought to be copied, but instead on PostgreSQL's own
knowledge of the database structure. This avoids, for example,
copying stray files that happen to be located in the source database
directory.
Dilip Kumar, with a fairly large number of cosmetic changes by me.
Reviewed and tested by Ashutosh Sharma, Andres Freund, John Naylor,
Greg Nancarrow, Neha Sharma. Additional feedback from Bruce Momjian,
Heikki Linnakangas, Julien Rouhaud, Adam Brusselback, Kyotaro
Horiguchi, Tomas Vondra, Andrew Dunstan, Álvaro Herrera, and others.
Discussion: http://postgr.es/m/CA+TgmoYtcdxBjLh31DLxUXHxFVMPGzrU5_T=CYCvRyFHywSBUQ@mail.gmail.com
Diffstat (limited to 'src/backend/utils/cache')
-rw-r--r-- | src/backend/utils/cache/relcache.c | 2 | ||||
-rw-r--r-- | src/backend/utils/cache/relmapper.c | 64 |
2 files changed, 65 insertions, 1 deletions
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index d47fac7bb98..a15ce9edb13 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -3746,7 +3746,7 @@ RelationSetNewRelfilenode(Relation relation, char persistence) /* handle these directly, at least for now */ SMgrRelation srel; - srel = RelationCreateStorage(newrnode, persistence); + srel = RelationCreateStorage(newrnode, persistence, true); smgrclose(srel); } else diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c index 4d0718f0018..dee3387d026 100644 --- a/src/backend/utils/cache/relmapper.c +++ b/src/backend/utils/cache/relmapper.c @@ -252,6 +252,63 @@ RelationMapFilenodeToOid(Oid filenode, bool shared) } /* + * RelationMapOidToFilenodeForDatabase + * + * Like RelationMapOidToFilenode, but reads the mapping from the indicated + * path instead of using the one for the current database. + */ +Oid +RelationMapOidToFilenodeForDatabase(char *dbpath, Oid relationId) +{ + RelMapFile map; + int i; + + /* Read the relmap file from the source database. */ + read_relmap_file(&map, dbpath, false, ERROR); + + /* Iterate over the relmap entries to find the input relation OID. */ + for (i = 0; i < map.num_mappings; i++) + { + if (relationId == map.mappings[i].mapoid) + return map.mappings[i].mapfilenode; + } + + return InvalidOid; +} + +/* + * RelationMapCopy + * + * Copy relmapfile from source db path to the destination db path and WAL log + * the operation. This is intended for use in creating a new relmap file + * for a database that doesn't have one yet, not for replacing an existing + * relmap file. + */ +void +RelationMapCopy(Oid dbid, Oid tsid, char *srcdbpath, char *dstdbpath) +{ + RelMapFile map; + + /* + * Read the relmap file from the source database. + */ + read_relmap_file(&map, srcdbpath, false, ERROR); + + /* + * Write the same data into the destination database's relmap file. + * + * No sinval is needed because no one can be connected to the destination + * database yet. For the same reason, there is no need to acquire + * RelationMappingLock. + * + * There's no point in trying to preserve files here. The new database + * isn't usable yet anyway, and won't ever be if we can't install a + * relmap file. + */ + write_relmap_file(&map, true, false, false, dbid, tsid, dstdbpath); +} + +/* * RelationMapUpdateMap * * Install a new relfilenode mapping for the specified relation. @@ -1031,6 +1088,13 @@ relmap_redo(XLogReaderState *record) * * There shouldn't be anyone else updating relmaps during WAL replay, * but grab the lock to interlock against load_relmap_file(). + * + * Note that we use the same WAL record for updating the relmap of + * an existing database as we do for creating a new database. In + * the latter case, taking the relmap log and sending sinval messages + * is unnecessary, but harmless. If we wanted to avoid it, we could + * add a flag to the WAL record to indicate which opration is being + * performed. */ LWLockAcquire(RelationMappingLock, LW_EXCLUSIVE); write_relmap_file(&newmap, false, true, false, |