aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/access/heap/heapam_handler.c35
-rw-r--r--src/backend/catalog/heap.c5
-rw-r--r--src/backend/catalog/storage.c12
-rw-r--r--src/backend/commands/tablecmds.c26
-rw-r--r--src/backend/utils/cache/relcache.c55
-rw-r--r--src/include/access/tableam.h25
-rw-r--r--src/include/catalog/storage.h2
-rw-r--r--src/include/catalog/storage_xlog.h2
8 files changed, 101 insertions, 61 deletions
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 6584a9cb8da..4d179881f27 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -566,10 +566,14 @@ heapam_finish_bulk_insert(Relation relation, int options)
*/
static void
-heapam_relation_set_new_filenode(Relation rel, char persistence,
+heapam_relation_set_new_filenode(Relation rel,
+ const RelFileNode *newrnode,
+ char persistence,
TransactionId *freezeXid,
MultiXactId *minmulti)
{
+ SMgrRelation srel;
+
/*
* Initialize to the minimum XID that could put tuples in the table. We
* know that no xacts older than RecentXmin are still running, so that
@@ -587,7 +591,7 @@ heapam_relation_set_new_filenode(Relation rel, char persistence,
*/
*minmulti = GetOldestMultiXactId();
- RelationCreateStorage(rel->rd_node, persistence);
+ srel = RelationCreateStorage(*newrnode, persistence);
/*
* If required, set up an init fork for an unlogged table so that it can
@@ -598,16 +602,17 @@ heapam_relation_set_new_filenode(Relation rel, char persistence,
* while replaying, for example, XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE
* record. Therefore, logging is necessary even if wal_level=minimal.
*/
- if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED)
+ if (persistence == RELPERSISTENCE_UNLOGGED)
{
Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
rel->rd_rel->relkind == RELKIND_MATVIEW ||
rel->rd_rel->relkind == RELKIND_TOASTVALUE);
- RelationOpenSmgr(rel);
- smgrcreate(rel->rd_smgr, INIT_FORKNUM, false);
- log_smgrcreate(&rel->rd_smgr->smgr_rnode.node, INIT_FORKNUM);
- smgrimmedsync(rel->rd_smgr, INIT_FORKNUM);
+ smgrcreate(srel, INIT_FORKNUM, false);
+ log_smgrcreate(newrnode, INIT_FORKNUM);
+ smgrimmedsync(srel, INIT_FORKNUM);
}
+
+ smgrclose(srel);
}
static void
@@ -617,21 +622,29 @@ heapam_relation_nontransactional_truncate(Relation rel)
}
static void
-heapam_relation_copy_data(Relation rel, RelFileNode newrnode)
+heapam_relation_copy_data(Relation rel, const RelFileNode *newrnode)
{
SMgrRelation dstrel;
- dstrel = smgropen(newrnode, rel->rd_backend);
+ dstrel = smgropen(*newrnode, rel->rd_backend);
RelationOpenSmgr(rel);
/*
+ * Since we copy the file directly without looking at the shared buffers,
+ * we'd better first flush out any pages of the source relation that are
+ * in shared buffers. We assume no new changes will be made while we are
+ * holding exclusive lock on the rel.
+ */
+ FlushRelationBuffers(rel);
+
+ /*
* Create and copy all forks of the relation, and schedule unlinking of
* old physical files.
*
* NOTE: any conflict in relfilenode value will be caught in
* RelationCreateStorage().
*/
- RelationCreateStorage(newrnode, rel->rd_rel->relpersistence);
+ RelationCreateStorage(*newrnode, rel->rd_rel->relpersistence);
/* copy main fork */
RelationCopyStorage(rel->rd_smgr, dstrel, MAIN_FORKNUM,
@@ -652,7 +665,7 @@ heapam_relation_copy_data(Relation rel, RelFileNode newrnode)
if (rel->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT ||
(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
forkNum == INIT_FORKNUM))
- log_smgrcreate(&newrnode, forkNum);
+ log_smgrcreate(newrnode, forkNum);
RelationCopyStorage(rel->rd_smgr, dstrel, forkNum,
rel->rd_rel->relpersistence);
}
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 6b77eff0af1..ee6b72e550a 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -435,8 +435,9 @@ heap_create(const char *relname,
case RELKIND_RELATION:
case RELKIND_TOASTVALUE:
case RELKIND_MATVIEW:
- table_relation_set_new_filenode(rel, relpersistence,
- relfrozenxid, relminmxid);
+ table_relation_set_new_filenode(rel, &rel->rd_node,
+ relpersistence,
+ relfrozenxid, relminmxid);
break;
}
}
diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c
index 72242b24761..fb41f223ada 100644
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -75,7 +75,7 @@ static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */
* This function is transactional. The creation is WAL-logged, and if the
* transaction aborts later on, the storage will be destroyed.
*/
-void
+SMgrRelation
RelationCreateStorage(RelFileNode rnode, char relpersistence)
{
PendingRelDelete *pending;
@@ -99,7 +99,7 @@ RelationCreateStorage(RelFileNode rnode, char relpersistence)
break;
default:
elog(ERROR, "invalid relpersistence: %c", relpersistence);
- return; /* placate compiler */
+ return NULL; /* placate compiler */
}
srel = smgropen(rnode, backend);
@@ -117,13 +117,15 @@ RelationCreateStorage(RelFileNode rnode, char relpersistence)
pending->nestLevel = GetCurrentTransactionNestLevel();
pending->next = pendingDeletes;
pendingDeletes = pending;
+
+ return srel;
}
/*
* Perform XLogInsert of an XLOG_SMGR_CREATE record to WAL.
*/
void
-log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum)
+log_smgrcreate(const RelFileNode *rnode, ForkNumber forkNum)
{
xl_smgr_create xlrec;
@@ -294,6 +296,10 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
/*
* Copy a fork's data, block by block.
+ *
+ * Note that this requires that there is no dirty data in shared buffers. If
+ * it's possible that there are, callers need to flush those using
+ * e.g. FlushRelationBuffers(rel).
*/
void
RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 14fcad9034b..2d0ef92badf 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -12237,14 +12237,6 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode)
rd_rel = (Form_pg_class) GETSTRUCT(tuple);
/*
- * Since we copy the file directly without looking at the shared buffers,
- * we'd better first flush out any pages of the source relation that are
- * in shared buffers. We assume no new changes will be made while we are
- * holding exclusive lock on the rel.
- */
- FlushRelationBuffers(rel);
-
- /*
* Relfilenodes are not unique in databases across tablespaces, so we need
* to allocate a new one in the new tablespace.
*/
@@ -12266,10 +12258,16 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode)
Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
rel->rd_rel->relkind == RELKIND_MATVIEW ||
rel->rd_rel->relkind == RELKIND_TOASTVALUE);
- table_relation_copy_data(rel, newrnode);
+ table_relation_copy_data(rel, &newrnode);
}
- /* update the pg_class row */
+ /*
+ * Update the pg_class row.
+ *
+ * NB: This wouldn't work if ATExecSetTableSpace() were allowed to be
+ * executed on pg_class or its indexes (the above copy wouldn't contain
+ * the updated pg_class entry), but that's forbidden above.
+ */
rd_rel->reltablespace = (newTableSpace == MyDatabaseTableSpace) ? InvalidOid : newTableSpace;
rd_rel->relfilenode = newrelfilenode;
CatalogTupleUpdate(pg_class, &tuple->t_self, tuple);
@@ -12538,6 +12536,14 @@ index_copy_data(Relation rel, RelFileNode newrnode)
RelationOpenSmgr(rel);
/*
+ * Since we copy the file directly without looking at the shared buffers,
+ * we'd better first flush out any pages of the source relation that are
+ * in shared buffers. We assume no new changes will be made while we are
+ * holding exclusive lock on the rel.
+ */
+ FlushRelationBuffers(rel);
+
+ /*
* Create and copy all forks of the relation, and schedule unlinking of
* old physical files.
*
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index bab59f16e68..90ff8ccf54f 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -3440,6 +3440,7 @@ RelationSetNewRelfilenode(Relation relation, char persistence)
Form_pg_class classform;
MultiXactId minmulti = InvalidMultiXactId;
TransactionId freezeXid = InvalidTransactionId;
+ RelFileNode newrnode;
/* Allocate a new relfilenode */
newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, NULL,
@@ -3462,39 +3463,23 @@ RelationSetNewRelfilenode(Relation relation, char persistence)
*/
RelationDropStorage(relation);
- /*
- * Now update the pg_class row. However, if we're dealing with a mapped
- * index, pg_class.relfilenode doesn't change; instead we have to send the
- * update to the relation mapper.
- */
- if (RelationIsMapped(relation))
- RelationMapUpdateMap(RelationGetRelid(relation),
- newrelfilenode,
- relation->rd_rel->relisshared,
- true);
- else
- {
- relation->rd_rel->relfilenode = newrelfilenode;
- classform->relfilenode = newrelfilenode;
- }
-
- RelationInitPhysicalAddr(relation);
+ /* initialize new relfilenode from old relfilenode */
+ newrnode = relation->rd_node;
/*
* Create storage for the main fork of the new relfilenode. If it's
* table-like object, call into table AM to do so, which'll also create
* the table's init fork.
*
- * NOTE: any conflict in relfilenode value will be caught here, if
- * GetNewRelFileNode messes up for any reason.
+ * NOTE: If relevant for the AM, any conflict in relfilenode value will be
+ * caught here, if GetNewRelFileNode messes up for any reason.
*/
+ newrnode = relation->rd_node;
+ newrnode.relNode = newrelfilenode;
- /*
- * Create storage for relation.
- */
switch (relation->rd_rel->relkind)
{
- /* shouldn't be called for these */
+ /* shouldn't be called for these */
case RELKIND_VIEW:
case RELKIND_COMPOSITE_TYPE:
case RELKIND_FOREIGN_TABLE:
@@ -3505,18 +3490,36 @@ RelationSetNewRelfilenode(Relation relation, char persistence)
case RELKIND_INDEX:
case RELKIND_SEQUENCE:
- RelationCreateStorage(relation->rd_node, persistence);
- RelationOpenSmgr(relation);
+ {
+ SMgrRelation srel;
+
+ srel = RelationCreateStorage(newrnode, persistence);
+ smgrclose(srel);
+ }
break;
case RELKIND_RELATION:
case RELKIND_TOASTVALUE:
case RELKIND_MATVIEW:
- table_relation_set_new_filenode(relation, persistence,
+ table_relation_set_new_filenode(relation, &newrnode,
+ persistence,
&freezeXid, &minmulti);
break;
}
+ /*
+ * However, if we're dealing with a mapped index, pg_class.relfilenode
+ * doesn't change; instead we have to send the update to the relation
+ * mapper.
+ */
+ if (RelationIsMapped(relation))
+ RelationMapUpdateMap(RelationGetRelid(relation),
+ newrelfilenode,
+ relation->rd_rel->relisshared,
+ false);
+ else
+ classform->relfilenode = newrelfilenode;
+
/* These changes are safe even for a mapped relation */
if (relation->rd_rel->relkind != RELKIND_SEQUENCE)
{
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index c018a44267a..ebfa0d51855 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -416,7 +416,12 @@ typedef struct TableAmRoutine
* This callback needs to create a new relation filenode for `rel`, with
* appropriate durability behaviour for `persistence`.
*
- * On output *freezeXid, *minmulti must be set to the values appropriate
+ * Note that only the subset of the relcache filled by
+ * RelationBuildLocalRelation() can be relied upon and that the relation's
+ * catalog entries either will either not yet exist (new relation), or
+ * will still reference the old relfilenode.
+ *
+ * As output *freezeXid, *minmulti must be set to the values appropriate
* for pg_class.{relfrozenxid, relminmxid}. For AMs that don't need those
* fields to be filled they can be set to InvalidTransactionId and
* InvalidMultiXactId, respectively.
@@ -424,6 +429,7 @@ typedef struct TableAmRoutine
* See also table_relation_set_new_filenode().
*/
void (*relation_set_new_filenode) (Relation rel,
+ const RelFileNode *newrnode,
char persistence,
TransactionId *freezeXid,
MultiXactId *minmulti);
@@ -444,7 +450,8 @@ typedef struct TableAmRoutine
* This can typically be implemented by directly copying the underlying
* storage, unless it contains references to the tablespace internally.
*/
- void (*relation_copy_data) (Relation rel, RelFileNode newrnode);
+ void (*relation_copy_data) (Relation rel,
+ const RelFileNode *newrnode);
/* See table_relation_copy_for_cluster() */
void (*relation_copy_for_cluster) (Relation NewHeap,
@@ -1251,21 +1258,25 @@ table_finish_bulk_insert(Relation rel, int options)
*/
/*
- * Create a new relation filenode for `rel`, with persistence set to
+ * Create storage for `rel` in `newrode`, with persistence set to
* `persistence`.
*
* This is used both during relation creation and various DDL operations to
- * create a new relfilenode that can be filled from scratch.
+ * create a new relfilenode that can be filled from scratch. When creating
+ * new storage for an existing relfilenode, this should be called before the
+ * relcache entry has been updated.
*
* *freezeXid, *minmulti are set to the xid / multixact horizon for the table
* that pg_class.{relfrozenxid, relminmxid} have to be set to.
*/
static inline void
-table_relation_set_new_filenode(Relation rel, char persistence,
+table_relation_set_new_filenode(Relation rel,
+ const RelFileNode *newrnode,
+ char persistence,
TransactionId *freezeXid,
MultiXactId *minmulti)
{
- rel->rd_tableam->relation_set_new_filenode(rel, persistence,
+ rel->rd_tableam->relation_set_new_filenode(rel, newrnode, persistence,
freezeXid, minmulti);
}
@@ -1288,7 +1299,7 @@ table_relation_nontransactional_truncate(Relation rel)
* changing a relation's tablespace.
*/
static inline void
-table_relation_copy_data(Relation rel, RelFileNode newrnode)
+table_relation_copy_data(Relation rel, const RelFileNode *newrnode)
{
rel->rd_tableam->relation_copy_data(rel, newrnode);
}
diff --git a/src/include/catalog/storage.h b/src/include/catalog/storage.h
index 882dc65c893..3579d3f3eb0 100644
--- a/src/include/catalog/storage.h
+++ b/src/include/catalog/storage.h
@@ -19,7 +19,7 @@
#include "storage/smgr.h"
#include "utils/relcache.h"
-extern void RelationCreateStorage(RelFileNode rnode, char relpersistence);
+extern SMgrRelation RelationCreateStorage(RelFileNode rnode, char relpersistence);
extern void RelationDropStorage(Relation rel);
extern void RelationPreserveStorage(RelFileNode rnode, bool atCommit);
extern void RelationTruncate(Relation rel, BlockNumber nblocks);
diff --git a/src/include/catalog/storage_xlog.h b/src/include/catalog/storage_xlog.h
index dfca3611ea3..40419efd373 100644
--- a/src/include/catalog/storage_xlog.h
+++ b/src/include/catalog/storage_xlog.h
@@ -50,7 +50,7 @@ typedef struct xl_smgr_truncate
int flags;
} xl_smgr_truncate;
-extern void log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum);
+extern void log_smgrcreate(const RelFileNode *rnode, ForkNumber forkNum);
extern void smgr_redo(XLogReaderState *record);
extern void smgr_desc(StringInfo buf, XLogReaderState *record);