diff options
Diffstat (limited to 'src/backend/commands')
-rw-r--r-- | src/backend/commands/cluster.c | 19 | ||||
-rw-r--r-- | src/backend/commands/copy.c | 58 | ||||
-rw-r--r-- | src/backend/commands/createas.c | 11 | ||||
-rw-r--r-- | src/backend/commands/indexcmds.c | 2 | ||||
-rw-r--r-- | src/backend/commands/matview.c | 12 | ||||
-rw-r--r-- | src/backend/commands/tablecmds.c | 26 |
6 files changed, 50 insertions, 78 deletions
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index fc1cea0236a..ccd0c9b286e 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -1111,6 +1111,25 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, } /* + * Recognize that rel1's relfilenode (swapped from rel2) is new in this + * subtransaction. The rel2 storage (swapped from rel1) may or may not be + * new. + */ + { + Relation rel1, + rel2; + + rel1 = relation_open(r1, NoLock); + rel2 = relation_open(r2, NoLock); + rel2->rd_createSubid = rel1->rd_createSubid; + rel2->rd_newRelfilenodeSubid = rel1->rd_newRelfilenodeSubid; + rel2->rd_firstRelfilenodeSubid = rel1->rd_firstRelfilenodeSubid; + RelationAssumeNewRelfilenode(rel1); + relation_close(rel1, NoLock); + relation_close(rel2, NoLock); + } + + /* * In the case of a shared catalog, these next few steps will only affect * our own database's pg_class row; but that's okay, because they are all * noncritical updates. That's also an important fact for the case of a diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index fbde9f88e74..05f1fae6b03 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -2713,63 +2713,15 @@ CopyFrom(CopyState cstate) RelationGetRelationName(cstate->rel)))); } - /*---------- - * Check to see if we can avoid writing WAL - * - * If archive logging/streaming is not enabled *and* either - * - table was created in same transaction as this COPY - * - data is being written to relfilenode created in this transaction - * then we can skip writing WAL. It's safe because if the transaction - * doesn't commit, we'll discard the table (or the new relfilenode file). - * If it does commit, we'll have done the table_finish_bulk_insert() at - * the bottom of this routine first. - * - * As mentioned in comments in utils/rel.h, the in-same-transaction test - * is not always set correctly, since in rare cases rd_newRelfilenodeSubid - * can be cleared before the end of the transaction. The exact case is - * when a relation sets a new relfilenode twice in same transaction, yet - * the second one fails in an aborted subtransaction, e.g. - * - * BEGIN; - * TRUNCATE t; - * SAVEPOINT save; - * TRUNCATE t; - * ROLLBACK TO save; - * COPY ... - * - * Also, if the target file is new-in-transaction, we assume that checking - * FSM for free space is a waste of time, even if we must use WAL because - * of archiving. This could possibly be wrong, but it's unlikely. - * - * The comments for table_tuple_insert and RelationGetBufferForTuple - * specify that skipping WAL logging is only safe if we ensure that our - * tuples do not go into pages containing tuples from any other - * transactions --- but this must be the case if we have a new table or - * new relfilenode, so we need no additional work to enforce that. - * - * We currently don't support this optimization if the COPY target is a - * partitioned table as we currently only lazily initialize partition - * information when routing the first tuple to the partition. We cannot - * know at this stage if we can perform this optimization. It should be - * possible to improve on this, but it does mean maintaining heap insert - * option flags per partition and setting them when we first open the - * partition. - * - * This optimization is not supported for relation types which do not - * have any physical storage, with foreign tables and views using - * INSTEAD OF triggers entering in this category. Partitioned tables - * are not supported as per the description above. - *---------- + /* + * If the target file is new-in-transaction, we assume that checking FSM + * for free space is a waste of time. This could possibly be wrong, but + * it's unlikely. */ - /* createSubid is creation check, newRelfilenodeSubid is truncation check */ if (RELKIND_HAS_STORAGE(cstate->rel->rd_rel->relkind) && (cstate->rel->rd_createSubid != InvalidSubTransactionId || - cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId)) - { + cstate->rel->rd_firstRelfilenodeSubid != InvalidSubTransactionId)) ti_options |= TABLE_INSERT_SKIP_FSM; - if (!XLogIsNeeded()) - ti_options |= TABLE_INSERT_SKIP_WAL; - } /* * Optimize if new relfilenode was created in this subxact or one of its diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index 3a5676fb39e..8e5e4fb95ed 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -550,16 +550,13 @@ intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo) myState->rel = intoRelationDesc; myState->reladdr = intoRelationAddr; myState->output_cid = GetCurrentCommandId(true); + myState->ti_options = TABLE_INSERT_SKIP_FSM; + myState->bistate = GetBulkInsertState(); /* - * We can skip WAL-logging the insertions, unless PITR or streaming - * replication is in use. We can skip the FSM in any case. + * Valid smgr_targblock implies something already wrote to the relation. + * This may be harmless, but this function hasn't planned for it. */ - myState->ti_options = TABLE_INSERT_SKIP_FSM | - (XLogIsNeeded() ? 0 : TABLE_INSERT_SKIP_WAL); - myState->bistate = GetBulkInsertState(); - - /* Not using WAL requires smgr_targblock be initially invalid */ Assert(RelationGetTargetBlock(intoRelationDesc) == InvalidBlockNumber); } diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 4e8263af4be..c94939df40a 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -1192,6 +1192,8 @@ DefineIndex(Oid relationId, childStmt->relation = NULL; childStmt->indexOid = InvalidOid; childStmt->oldNode = InvalidOid; + childStmt->oldCreateSubid = InvalidSubTransactionId; + childStmt->oldFirstRelfilenodeSubid = InvalidSubTransactionId; /* * Adjust any Vars (both in expressions and in the index's diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c index c3954f3e242..492b2a3ee69 100644 --- a/src/backend/commands/matview.c +++ b/src/backend/commands/matview.c @@ -457,17 +457,13 @@ transientrel_startup(DestReceiver *self, int operation, TupleDesc typeinfo) */ myState->transientrel = transientrel; myState->output_cid = GetCurrentCommandId(true); - - /* - * We can skip WAL-logging the insertions, unless PITR or streaming - * replication is in use. We can skip the FSM in any case. - */ myState->ti_options = TABLE_INSERT_SKIP_FSM | TABLE_INSERT_FROZEN; - if (!XLogIsNeeded()) - myState->ti_options |= TABLE_INSERT_SKIP_WAL; myState->bistate = GetBulkInsertState(); - /* Not using WAL requires smgr_targblock be initially invalid */ + /* + * Valid smgr_targblock implies something already wrote to the relation. + * This may be harmless, but this function hasn't planned for it. + */ Assert(RelationGetTargetBlock(transientrel) == InvalidBlockNumber); } diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 729025470dc..31d718e8ea1 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -5041,19 +5041,14 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) newrel = NULL; /* - * Prepare a BulkInsertState and options for table_tuple_insert. Because - * we're building a new heap, we can skip WAL-logging and fsync it to disk - * at the end instead (unless WAL-logging is required for archiving or - * streaming replication). The FSM is empty too, so don't bother using it. + * Prepare a BulkInsertState and options for table_tuple_insert. The FSM + * is empty, so don't bother using it. */ if (newrel) { mycid = GetCurrentCommandId(true); bistate = GetBulkInsertState(); - ti_options = TABLE_INSERT_SKIP_FSM; - if (!XLogIsNeeded()) - ti_options |= TABLE_INSERT_SKIP_WAL; } else { @@ -7719,14 +7714,19 @@ ATExecAddIndex(AlteredTableInfo *tab, Relation rel, /* * If TryReuseIndex() stashed a relfilenode for us, we used it for the new - * index instead of building from scratch. The DROP of the old edition of - * this index will have scheduled the storage for deletion at commit, so - * cancel that pending deletion. + * index instead of building from scratch. Restore associated fields. + * This may store InvalidSubTransactionId in both fields, in which case + * relcache.c will assume it can rebuild the relcache entry. Hence, do + * this after the CCI that made catalog rows visible to any rebuild. The + * DROP of the old edition of this index will have scheduled the storage + * for deletion at commit, so cancel that pending deletion. */ if (OidIsValid(stmt->oldNode)) { Relation irel = index_open(address.objectId, NoLock); + irel->rd_createSubid = stmt->oldCreateSubid; + irel->rd_firstRelfilenodeSubid = stmt->oldFirstRelfilenodeSubid; RelationPreserveStorage(irel->rd_node, true); index_close(irel, NoLock); } @@ -12052,7 +12052,11 @@ TryReuseIndex(Oid oldId, IndexStmt *stmt) /* If it's a partitioned index, there is no storage to share. */ if (irel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX) + { stmt->oldNode = irel->rd_node.relNode; + stmt->oldCreateSubid = irel->rd_createSubid; + stmt->oldFirstRelfilenodeSubid = irel->rd_firstRelfilenodeSubid; + } index_close(irel, NoLock); } } @@ -12988,6 +12992,8 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode) table_close(pg_class, RowExclusiveLock); + RelationAssumeNewRelfilenode(rel); + relation_close(rel, NoLock); /* Make sure the reltablespace change is visible */ |