diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2010-02-07 20:48:13 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2010-02-07 20:48:13 +0000 |
commit | b9b8831ad60f6e4bd580fe6dbe9749359298a3c4 (patch) | |
tree | af6948498f13a43edd982b05808ed89b5b8191ab /src/backend/commands/cluster.c | |
parent | 7fc30c488fc6e9674564206193c29b1a657a818f (diff) | |
download | postgresql-b9b8831ad60f6e4bd580fe6dbe9749359298a3c4.tar.gz postgresql-b9b8831ad60f6e4bd580fe6dbe9749359298a3c4.zip |
Create a "relation mapping" infrastructure to support changing the relfilenodes
of shared or nailed system catalogs. This has two key benefits:
* The new CLUSTER-based VACUUM FULL can be applied safely to all catalogs.
* We no longer have to use an unsafe reindex-in-place approach for reindexing
shared catalogs.
CLUSTER on nailed catalogs now works too, although I left it disabled on
shared catalogs because the resulting pg_index.indisclustered update would
only be visible in one database.
Since reindexing shared system catalogs is now fully transactional and
crash-safe, the former special cases in REINDEX behavior have been removed;
shared catalogs are treated the same as non-shared.
This commit does not do anything about the recently-discussed problem of
deadlocks between VACUUM FULL/CLUSTER on a system catalog and other
concurrent queries; will address that in a separate patch. As a stopgap,
parallel_schedule has been tweaked to run vacuum.sql by itself, to avoid
such failures during the regression tests.
Diffstat (limited to 'src/backend/commands/cluster.c')
-rw-r--r-- | src/backend/commands/cluster.c | 434 |
1 files changed, 300 insertions, 134 deletions
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index cf2ac19d533..da605bffacf 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -1,7 +1,7 @@ /*------------------------------------------------------------------------- * * cluster.c - * CLUSTER a table on an index. + * CLUSTER a table on an index. This is now also used for VACUUM FULL. * * There is hardly anything left of Paul Brown's original implementation... * @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.197 2010/02/04 00:09:14 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.198 2010/02/07 20:48:10 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -44,6 +44,7 @@ #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/relcache.h" +#include "utils/relmapper.h" #include "utils/snapmgr.h" #include "utils/syscache.h" #include "utils/tqual.h" @@ -223,7 +224,8 @@ cluster(ClusterStmt *stmt, bool isTopLevel) StartTransactionCommand(); /* functions in indexes may want a snapshot set */ PushActiveSnapshot(GetTransactionSnapshot()); - cluster_rel(rvtc->tableOid, rvtc->indexOid, true, stmt->verbose, -1, -1); + cluster_rel(rvtc->tableOid, rvtc->indexOid, true, stmt->verbose, + -1, -1); PopActiveSnapshot(); CommitTransactionCommand(); } @@ -245,13 +247,13 @@ cluster(ClusterStmt *stmt, bool isTopLevel) * GRANT, inheritance nor references to this table (this was a bug * in releases thru 7.3). * - * Also create new indexes and swap the filenodes with the old indexes the - * same way we do for the relation. Since we are effectively bulk-loading + * Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading * the new table, it's better to create the indexes afterwards than to fill * them incrementally while we load the table. * * If indexOid is InvalidOid, the table will be rewritten in physical order - * instead of index order. + * instead of index order. This is the new implementation of VACUUM FULL, + * and error messages should refer to the operation as VACUUM not CLUSTER. */ void cluster_rel(Oid tableOid, Oid indexOid, bool recheck, bool verbose, @@ -300,8 +302,7 @@ cluster_rel(Oid tableOid, Oid indexOid, bool recheck, bool verbose, * somebody is executing a database-wide CLUSTER), because there is * another check in cluster() which will stop any attempt to cluster * remote temp tables by name. There is another check in - * check_index_is_clusterable which is redundant, but we leave it for - * extra safety. + * cluster_rel which is redundant, but we leave it for extra safety. */ if (RELATION_IS_OTHER_TEMP(OldHeap)) { @@ -344,10 +345,44 @@ cluster_rel(Oid tableOid, Oid indexOid, bool recheck, bool verbose, } } + /* + * We allow VACUUM FULL, but not CLUSTER, on shared catalogs. CLUSTER + * would work in most respects, but the index would only get marked as + * indisclustered in the current database, leading to unexpected behavior + * if CLUSTER were later invoked in another database. + */ + if (OidIsValid(indexOid) && OldHeap->rd_rel->relisshared) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot cluster a shared catalog"))); + + /* + * Don't process temp tables of other backends ... their local + * buffer manager is not going to cope. + */ + if (RELATION_IS_OTHER_TEMP(OldHeap)) + { + if (OidIsValid(indexOid)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot cluster temporary tables of other sessions"))); + else + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot vacuum temporary tables of other sessions"))); + } + + /* + * Also check for active uses of the relation in the current transaction, + * including open scans and pending AFTER trigger events. + */ + CheckTableNotInUse(OldHeap, OidIsValid(indexOid) ? "CLUSTER" : "VACUUM"); + /* Check heap and index are valid to cluster on */ - check_index_is_clusterable(OldHeap, indexOid, recheck); + if (OidIsValid(indexOid)) + check_index_is_clusterable(OldHeap, indexOid, recheck); - /* rebuild_relation does all the dirty work */ + /* Log what we're doing (this could use more effort) */ if (OidIsValid(indexOid)) ereport(verbose ? INFO : DEBUG2, (errmsg("clustering \"%s.%s\"", @@ -358,6 +393,8 @@ cluster_rel(Oid tableOid, Oid indexOid, bool recheck, bool verbose, (errmsg("vacuuming \"%s.%s\"", get_namespace_name(RelationGetNamespace(OldHeap)), RelationGetRelationName(OldHeap)))); + + /* rebuild_relation does all the dirty work */ rebuild_relation(OldHeap, indexOid, freeze_min_age, freeze_table_age); /* NB: rebuild_relation does heap_close() on OldHeap */ @@ -376,38 +413,6 @@ check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck) { Relation OldIndex; - /* - * Disallow clustering system relations. This will definitely NOT work - * for shared relations (we have no way to update pg_class rows in other - * databases), nor for nailed-in-cache relations (the relfilenode values - * for those are hardwired, see relcache.c). It might work for other - * system relations, but I ain't gonna risk it. - */ - if (IsSystemRelation(OldHeap)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("\"%s\" is a system catalog", - RelationGetRelationName(OldHeap)))); - - /* - * Don't allow cluster on temp tables of other backends ... their local - * buffer manager is not going to cope. - */ - if (RELATION_IS_OTHER_TEMP(OldHeap)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot cluster temporary tables of other sessions"))); - - /* - * Also check for active uses of the relation in the current transaction, - * including open scans and pending AFTER trigger events. - */ - CheckTableNotInUse(OldHeap, "CLUSTER"); - - /* Skip checks for index if not specified. */ - if (!OidIsValid(indexOid)) - return; - OldIndex = index_open(indexOid, AccessExclusiveLock); /* @@ -421,6 +426,13 @@ check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck) RelationGetRelationName(OldIndex), RelationGetRelationName(OldHeap)))); + /* Index AM must allow clustering */ + if (!OldIndex->rd_am->amclusterable) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot cluster on index \"%s\" because access method does not support clustering", + RelationGetRelationName(OldIndex)))); + /* * Disallow clustering on incomplete indexes (those that might not index * every row of the relation). We could relax this by making a separate @@ -433,12 +445,6 @@ check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck) errmsg("cannot cluster on partial index \"%s\"", RelationGetRelationName(OldIndex)))); - if (!OldIndex->rd_am->amclusterable) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot cluster on index \"%s\" because access method does not support clustering", - RelationGetRelationName(OldIndex)))); - if (!OldIndex->rd_am->amindexnulls) { AttrNumber colno; @@ -585,6 +591,7 @@ rebuild_relation(Relation OldHeap, Oid indexOid, Oid tableOid = RelationGetRelid(OldHeap); Oid tableSpace = OldHeap->rd_rel->reltablespace; Oid OIDNewHeap; + bool is_system_catalog; bool swap_toast_by_content; TransactionId frozenXid; @@ -592,6 +599,9 @@ rebuild_relation(Relation OldHeap, Oid indexOid, if (OidIsValid(indexOid)) mark_index_clustered(OldHeap, indexOid); + /* Remember if it's a system catalog */ + is_system_catalog = IsSystemRelation(OldHeap); + /* Close relcache entry, but keep lock until transaction commit */ heap_close(OldHeap, NoLock); @@ -603,12 +613,12 @@ rebuild_relation(Relation OldHeap, Oid indexOid, freeze_min_age, freeze_table_age, &swap_toast_by_content, &frozenXid); - /* Swap the physical files of the old and new heaps */ - swap_relation_files(tableOid, OIDNewHeap, - swap_toast_by_content, frozenXid); - - /* Destroy the new heap, removing the old data along with it */ - cleanup_heap_swap(tableOid, OIDNewHeap, swap_toast_by_content); + /* + * Swap the physical files of the target and transient tables, then + * rebuild the target's indexes and throw away the transient table. + */ + finish_heap_swap(tableOid, OIDNewHeap, is_system_catalog, + swap_toast_by_content, frozenXid); } @@ -619,8 +629,7 @@ rebuild_relation(Relation OldHeap, Oid indexOid, * NewTableSpace which might be different from OldHeap's. * * After this, the caller should load the new heap with transferred/modified - * data, then call swap_relation_files, and finally call cleanup_heap_swap to - * remove the debris. + * data, then call finish_heap_swap to complete the operation. */ Oid make_new_heap(Oid OIDOldHeap, Oid NewTableSpace) @@ -666,6 +675,11 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace) * relnames. Working around this seems more trouble than it's worth; in * particular, we can't create the new heap in a different namespace from * the old, or we will have problems with the TEMP status of temp tables. + * + * Note: the new heap is not a shared relation, even if we are rebuilding + * a shared rel. However, we do make the new heap mapped if the source + * is mapped. This simplifies swap_relation_files, and is absolutely + * necessary for rebuilding pg_class, for reasons explained there. */ snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", OIDOldHeap); @@ -679,13 +693,14 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace) tupdesc, NIL, OldHeap->rd_rel->relkind, - OldHeap->rd_rel->relisshared, + false, + RelationIsMapped(OldHeap), true, 0, ONCOMMIT_NOOP, reloptions, false, - allowSystemTableMods); + true); ReleaseSysCache(tuple); @@ -696,14 +711,20 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace) CommandCounterIncrement(); /* - * If necessary, create a TOAST table for the new relation. Note that - * AlterTableCreateToastTable ends with CommandCounterIncrement(), so that - * the TOAST table will be visible for insertion. + * If necessary, create a TOAST table for the new relation. + * + * If the relation doesn't have a TOAST table already, we can't need one + * for the new relation. The other way around is possible though: if + * some wide columns have been dropped, AlterTableCreateToastTable + * can decide that no TOAST table is needed for the new table. + * + * Note that AlterTableCreateToastTable ends with CommandCounterIncrement, + * so that the TOAST table will be visible for insertion. */ toastid = OldHeap->rd_rel->reltoastrelid; - reloptions = (Datum) 0; if (OidIsValid(toastid)) { + /* keep the existing toast table's reloptions, if any */ tuple = SearchSysCache(RELOID, ObjectIdGetDatum(toastid), 0, 0, 0); @@ -713,11 +734,11 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace) &isNull); if (isNull) reloptions = (Datum) 0; - } - AlterTableCreateToastTable(OIDNewHeap, reloptions); - if (OidIsValid(toastid)) + AlterTableCreateToastTable(OIDNewHeap, reloptions); + ReleaseSysCache(tuple); + } heap_close(OldHeap, NoLock); @@ -747,6 +768,7 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, IndexScanDesc indexScan; HeapScanDesc heapScan; bool use_wal; + bool is_system_catalog; TransactionId OldestXmin; TransactionId FreezeXid; RewriteState rwstate; @@ -786,9 +808,14 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, */ if (!use_wal && !NewHeap->rd_istemp) { - char reason[NAMEDATALEN + 20]; - snprintf(reason, sizeof(reason), "CLUSTER on \"%s\"", - RelationGetRelationName(NewHeap)); + char reason[NAMEDATALEN + 32]; + + if (OldIndex != NULL) + snprintf(reason, sizeof(reason), "CLUSTER on \"%s\"", + RelationGetRelationName(NewHeap)); + else + snprintf(reason, sizeof(reason), "VACUUM FULL on \"%s\"", + RelationGetRelationName(NewHeap)); XLogReportUnloggedStatement(reason); } @@ -841,6 +868,9 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, /* return selected value to caller */ *pFreezeXid = FreezeXid; + /* Remember if it's a system catalog */ + is_system_catalog = IsSystemRelation(OldHeap); + /* Initialize the rewrite operation */ rwstate = begin_heap_rewrite(NewHeap, OldestXmin, FreezeXid, use_wal); @@ -909,25 +939,31 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, case HEAPTUPLE_INSERT_IN_PROGRESS: /* - * We should not see this unless it's been inserted earlier in - * our own transaction. + * Since we hold exclusive lock on the relation, normally + * the only way to see this is if it was inserted earlier + * in our own transaction. However, it can happen in system + * catalogs, since we tend to release write lock before commit + * there. Give a warning if neither case applies; but in + * any case we had better copy it. */ - if (!TransactionIdIsCurrentTransactionId( - HeapTupleHeaderGetXmin(tuple->t_data))) - elog(ERROR, "concurrent insert in progress"); + if (!is_system_catalog && + !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data))) + + elog(WARNING, "concurrent insert in progress within table \"%s\"", + RelationGetRelationName(OldHeap)); /* treat as live */ isdead = false; break; case HEAPTUPLE_DELETE_IN_PROGRESS: /* - * We should not see this unless it's been deleted earlier in - * our own transaction. + * Similar situation to INSERT_IN_PROGRESS case. */ Assert(!(tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI)); - if (!TransactionIdIsCurrentTransactionId( - HeapTupleHeaderGetXmax(tuple->t_data))) - elog(ERROR, "concurrent delete in progress"); + if (!is_system_catalog && + !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple->t_data))) + elog(WARNING, "concurrent delete in progress within table \"%s\"", + RelationGetRelationName(OldHeap)); /* treat as recently dead */ isdead = false; break; @@ -1016,21 +1052,29 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, * table is added or removed altogether. * * Additionally, the first relation is marked with relfrozenxid set to - * frozenXid. It seems a bit ugly to have this here, but all callers would + * frozenXid. It seems a bit ugly to have this here, but the caller would * have to do it anyway, so having it here saves a heap_update. Note: in * the swap-toast-links case, we assume we don't need to change the toast * table's relfrozenxid: the new version of the toast table should already * have relfrozenxid set to RecentXmin, which is good enough. + * + * Lastly, if r2 and its toast table and toast index (if any) are mapped, + * their OIDs are emitted into mapped_tables[]. This is hacky but beats + * having to look the information up again later in finish_heap_swap. */ -void -swap_relation_files(Oid r1, Oid r2, bool swap_toast_by_content, - TransactionId frozenXid) +static void +swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, + bool swap_toast_by_content, + TransactionId frozenXid, + Oid *mapped_tables) { Relation relRelation; HeapTuple reltup1, reltup2; Form_pg_class relform1, relform2; + Oid relfilenode1, + relfilenode2; Oid swaptemp; CatalogIndexState indstate; @@ -1051,29 +1095,86 @@ swap_relation_files(Oid r1, Oid r2, bool swap_toast_by_content, elog(ERROR, "cache lookup failed for relation %u", r2); relform2 = (Form_pg_class) GETSTRUCT(reltup2); - /* - * Actually swap the fields in the two tuples - */ - swaptemp = relform1->relfilenode; - relform1->relfilenode = relform2->relfilenode; - relform2->relfilenode = swaptemp; + relfilenode1 = relform1->relfilenode; + relfilenode2 = relform2->relfilenode; - swaptemp = relform1->reltablespace; - relform1->reltablespace = relform2->reltablespace; - relform2->reltablespace = swaptemp; + if (OidIsValid(relfilenode1) && OidIsValid(relfilenode2)) + { + /* Normal non-mapped relations: swap relfilenodes and reltablespaces */ + Assert(!target_is_pg_class); - if (!swap_toast_by_content) + swaptemp = relform1->relfilenode; + relform1->relfilenode = relform2->relfilenode; + relform2->relfilenode = swaptemp; + + swaptemp = relform1->reltablespace; + relform1->reltablespace = relform2->reltablespace; + relform2->reltablespace = swaptemp; + + /* Also swap toast links, if we're swapping by links */ + if (!swap_toast_by_content) + { + swaptemp = relform1->reltoastrelid; + relform1->reltoastrelid = relform2->reltoastrelid; + relform2->reltoastrelid = swaptemp; + + /* we should NOT swap reltoastidxid */ + } + } + else { - swaptemp = relform1->reltoastrelid; - relform1->reltoastrelid = relform2->reltoastrelid; - relform2->reltoastrelid = swaptemp; + /* + * Mapped-relation case. Here we have to swap the relation mappings + * instead of modifying the pg_class columns. Both must be mapped. + */ + if (OidIsValid(relfilenode1) || OidIsValid(relfilenode2)) + elog(ERROR, "cannot swap mapped relation \"%s\" with non-mapped relation", + NameStr(relform1->relname)); + + /* + * We can't change the tablespace of a mapped rel, and we can't handle + * toast link swapping for one either, because we must not apply any + * critical changes to its pg_class row. These cases should be + * prevented by upstream permissions tests, so this check is a + * non-user-facing emergency backstop. + */ + if (relform1->reltablespace != relform2->reltablespace) + elog(ERROR, "cannot change tablespace of mapped relation \"%s\"", + NameStr(relform1->relname)); + if (!swap_toast_by_content && + (relform1->reltoastrelid || relform2->reltoastrelid)) + elog(ERROR, "cannot swap toast by links for mapped relation \"%s\"", + NameStr(relform1->relname)); - /* we should not swap reltoastidxid */ + /* + * Fetch the mappings --- shouldn't fail, but be paranoid + */ + relfilenode1 = RelationMapOidToFilenode(r1, relform1->relisshared); + if (!OidIsValid(relfilenode1)) + elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u", + NameStr(relform1->relname), r1); + relfilenode2 = RelationMapOidToFilenode(r2, relform2->relisshared); + if (!OidIsValid(relfilenode2)) + elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u", + NameStr(relform2->relname), r2); + + /* + * Send replacement mappings to relmapper. Note these won't actually + * take effect until CommandCounterIncrement. + */ + RelationMapUpdateMap(r1, relfilenode2, relform1->relisshared, false); + RelationMapUpdateMap(r2, relfilenode1, relform2->relisshared, false); + + /* Pass OIDs of mapped r2 tables back to caller */ + *mapped_tables++ = r2; } /* - * In the case of a shared catalog, these next few steps only affect our - * own database's pg_class row; but that's okay. + * In the case of a shared catalog, these next few steps will only affect + * our own database's pg_class row; but that's okay, because they are + * all noncritical updates. That's also an important fact for the case + * of a mapped catalog, because it's possible that we'll commit the map + * change and then fail to commit the pg_class update. */ /* set rel1's frozen Xid */ @@ -1097,15 +1198,31 @@ swap_relation_files(Oid r1, Oid r2, bool swap_toast_by_content, relform2->reltuples = swap_tuples; } - /* Update the tuples in pg_class */ - simple_heap_update(relRelation, &reltup1->t_self, reltup1); - simple_heap_update(relRelation, &reltup2->t_self, reltup2); - - /* Keep system catalogs current */ - indstate = CatalogOpenIndexes(relRelation); - CatalogIndexInsert(indstate, reltup1); - CatalogIndexInsert(indstate, reltup2); - CatalogCloseIndexes(indstate); + /* + * Update the tuples in pg_class --- unless the target relation of the + * swap is pg_class itself. In that case, there is zero point in making + * changes because we'd be updating the old data that we're about to + * throw away. Because the real work being done here for a mapped relation + * is just to change the relation map settings, it's all right to not + * update the pg_class rows in this case. + */ + if (!target_is_pg_class) + { + simple_heap_update(relRelation, &reltup1->t_self, reltup1); + simple_heap_update(relRelation, &reltup2->t_self, reltup2); + + /* Keep system catalogs current */ + indstate = CatalogOpenIndexes(relRelation); + CatalogIndexInsert(indstate, reltup1); + CatalogIndexInsert(indstate, reltup2); + CatalogCloseIndexes(indstate); + } + else + { + /* no update ... but we do still need relcache inval */ + CacheInvalidateRelcacheByTuple(reltup1); + CacheInvalidateRelcacheByTuple(reltup2); + } /* * If we have toast tables associated with the relations being swapped, @@ -1120,8 +1237,10 @@ swap_relation_files(Oid r1, Oid r2, bool swap_toast_by_content, /* Recursively swap the contents of the toast tables */ swap_relation_files(relform1->reltoastrelid, relform2->reltoastrelid, - true, - frozenXid); + target_is_pg_class, + swap_toast_by_content, + frozenXid, + mapped_tables); } else { @@ -1146,6 +1265,15 @@ swap_relation_files(Oid r1, Oid r2, bool swap_toast_by_content, toastobject; long count; + /* + * We disallow this case for system catalogs, to avoid the + * possibility that the catalog we're rebuilding is one of the + * ones the dependency changes would change. It's too late + * to be making any data changes to the target catalog. + */ + if (IsSystemClass(relform1)) + elog(ERROR, "cannot swap toast files by links for system catalogs"); + /* Delete old dependencies */ if (relform1->reltoastrelid) { @@ -1196,30 +1324,35 @@ swap_relation_files(Oid r1, Oid r2, bool swap_toast_by_content, relform1->reltoastidxid && relform2->reltoastidxid) swap_relation_files(relform1->reltoastidxid, relform2->reltoastidxid, - true, - InvalidTransactionId); - - /* - * Blow away the old relcache entries now. We need this kluge because - * relcache.c keeps a link to the smgr relation for the physical file, and - * that will be out of date as soon as we do CommandCounterIncrement. - * Whichever of the rels is the second to be cleared during cache - * invalidation will have a dangling reference to an already-deleted smgr - * relation. Rather than trying to avoid this by ordering operations just - * so, it's easiest to not have the relcache entries there at all. - * (Fortunately, since one of the entries is local in our transaction, - * it's sufficient to clear out our own relcache this way; the problem - * cannot arise for other backends when they see our update on the - * non-local relation.) - */ - RelationForgetRelation(r1); - RelationForgetRelation(r2); + target_is_pg_class, + swap_toast_by_content, + InvalidTransactionId, + mapped_tables); /* Clean up. */ heap_freetuple(reltup1); heap_freetuple(reltup2); heap_close(relRelation, RowExclusiveLock); + + /* + * Close both relcache entries' smgr links. We need this kluge because + * both links will be invalidated during upcoming CommandCounterIncrement. + * Whichever of the rels is the second to be cleared will have a dangling + * reference to the other's smgr entry. Rather than trying to avoid this + * by ordering operations just so, it's easiest to close the links first. + * (Fortunately, since one of the entries is local in our transaction, + * it's sufficient to clear out our own relcache this way; the problem + * cannot arise for other backends when they see our update on the + * non-transient relation.) + * + * Caution: the placement of this step interacts with the decision to + * handle toast rels by recursion. When we are trying to rebuild pg_class + * itself, the smgr close on pg_class must happen after all accesses in + * this function. + */ + RelationCloseSmgrByOid(r1); + RelationCloseSmgrByOid(r2); } /* @@ -1227,12 +1360,43 @@ swap_relation_files(Oid r1, Oid r2, bool swap_toast_by_content, * cleaning up (including rebuilding all indexes on the old heap). */ void -cleanup_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap, bool swap_toast_by_content) +finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap, + bool is_system_catalog, + bool swap_toast_by_content, + TransactionId frozenXid) { ObjectAddress object; + Oid mapped_tables[4]; + int i; - /* Make swap_relation_files' changes visible in the catalogs. */ - CommandCounterIncrement(); + /* Zero out possible results from swapped_relation_files */ + memset(mapped_tables, 0, sizeof(mapped_tables)); + + /* + * Swap the contents of the heap relations (including any toast tables). + * Also set old heap's relfrozenxid to frozenXid. + */ + swap_relation_files(OIDOldHeap, OIDNewHeap, + (OIDOldHeap == RelationRelationId), + swap_toast_by_content, frozenXid, mapped_tables); + + /* + * If it's a system catalog, queue an sinval message to flush all + * catcaches on the catalog when we reach CommandCounterIncrement. + */ + if (is_system_catalog) + CacheInvalidateCatalog(OIDOldHeap); + + /* + * Rebuild each index on the relation (but not the toast table, which is + * all-new at this point). It is important to do this before the DROP + * step because if we are processing a system catalog that will be used + * during DROP, we want to have its indexes available. There is no + * advantage to the other order anyway because this is all transactional, + * so no chance to reclaim disk space before commit. We do not need + * a final CommandCounterIncrement() because reindex_relation does it. + */ + reindex_relation(OIDOldHeap, false, true); /* Destroy new heap with old filenode */ object.classId = RelationRelationId; @@ -1248,11 +1412,13 @@ cleanup_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap, bool swap_toast_by_content) /* performDeletion does CommandCounterIncrement at end */ /* - * Rebuild each index on the relation (but not the toast table, which is - * all-new at this point). We do not need CommandCounterIncrement() - * because reindex_relation does it. + * Now we must remove any relation mapping entries that we set up for the + * transient table, as well as its toast table and toast index if any. + * If we fail to do this before commit, the relmapper will complain about + * new permanent map entries being added post-bootstrap. */ - reindex_relation(OIDOldHeap, false); + for (i = 0; OidIsValid(mapped_tables[i]); i++) + RelationMapRemoveMapping(mapped_tables[i]); /* * At this point, everything is kosher except that, if we did toast swap |