aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2010-02-03 01:14:17 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2010-02-03 01:14:17 +0000
commit70a2b05a59c02464e36d8c9bf23d2eef8502eccd (patch)
tree367ea4f6b3285ec31e88868276f27524fc495674 /src/backend/utils
parentab7c49c98811f539db9294c8f2d1a15380e025f6 (diff)
downloadpostgresql-70a2b05a59c02464e36d8c9bf23d2eef8502eccd.tar.gz
postgresql-70a2b05a59c02464e36d8c9bf23d2eef8502eccd.zip
Assorted cleanups in preparation for using a map file to support altering
the relfilenode of currently-not-relocatable system catalogs. 1. Get rid of inval.c's dependency on relfilenode, by not having it emit smgr invalidations as a result of relcache flushes. Instead, smgr sinval messages are sent directly from smgr.c when an actual relation delete or truncate is done. This makes considerably more structural sense and allows elimination of a large number of useless smgr inval messages that were formerly sent even in cases where nothing was changing at the physical-relation level. Note that this reintroduces the concept of nontransactional inval messages, but that's okay --- because the messages are sent by smgr.c, they will be sent in Hot Standby slaves, just from a lower logical level than before. 2. Move setNewRelfilenode out of catalog/index.c, where it never logically belonged, into relcache.c; which is a somewhat debatable choice as well but better than before. (I considered catalog/storage.c, but that seemed too low level.) Rename to RelationSetNewRelfilenode. 3. Cosmetic cleanups of some other relfilenode manipulations.
Diffstat (limited to 'src/backend/utils')
-rw-r--r--src/backend/utils/cache/inval.c130
-rw-r--r--src/backend/utils/cache/relcache.c123
2 files changed, 150 insertions, 103 deletions
diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c
index 04935ffd546..99aad752bb3 100644
--- a/src/backend/utils/cache/inval.c
+++ b/src/backend/utils/cache/inval.c
@@ -53,14 +53,14 @@
*
* Also, whenever we see an operation on a pg_class or pg_attribute tuple,
* we register a relcache flush operation for the relation described by that
- * tuple. pg_class updates trigger an smgr flush operation as well.
+ * tuple.
*
- * We keep the relcache and smgr flush requests in lists separate from the
- * catcache tuple flush requests. This allows us to issue all the pending
- * catcache flushes before we issue relcache flushes, which saves us from
- * loading a catcache tuple during relcache load only to flush it again
- * right away. Also, we avoid queuing multiple relcache flush requests for
- * the same relation, since a relcache flush is relatively expensive to do.
+ * We keep the relcache flush requests in lists separate from the catcache
+ * tuple flush requests. This allows us to issue all the pending catcache
+ * flushes before we issue relcache flushes, which saves us from loading
+ * a catcache tuple during relcache load only to flush it again right away.
+ * Also, we avoid queuing multiple relcache flush requests for the same
+ * relation, since a relcache flush is relatively expensive to do.
* (XXX is it worth testing likewise for duplicate catcache flush entries?
* Probably not.)
*
@@ -80,7 +80,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.92 2010/01/09 16:49:27 sriggs Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.93 2010/02/03 01:14:17 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -116,7 +116,7 @@ typedef struct InvalidationChunk
typedef struct InvalidationListHeader
{
InvalidationChunk *cclist; /* list of chunks holding catcache msgs */
- InvalidationChunk *rclist; /* list of chunks holding relcache/smgr msgs */
+ InvalidationChunk *rclist; /* list of chunks holding relcache msgs */
} InvalidationListHeader;
/*----------------
@@ -304,7 +304,7 @@ AppendInvalidationMessageList(InvalidationChunk **destHdr,
* Invalidation set support functions
*
* These routines understand about the division of a logical invalidation
- * list into separate physical lists for catcache and relcache/smgr entries.
+ * list into separate physical lists for catcache and relcache entries.
* ----------------------------------------------------------------
*/
@@ -349,27 +349,6 @@ AddRelcacheInvalidationMessage(InvalidationListHeader *hdr,
}
/*
- * Add an smgr inval entry
- */
-static void
-AddSmgrInvalidationMessage(InvalidationListHeader *hdr,
- RelFileNode rnode)
-{
- SharedInvalidationMessage msg;
-
- /* Don't add a duplicate item */
- ProcessMessageList(hdr->rclist,
- if (msg->sm.id == SHAREDINVALSMGR_ID &&
- RelFileNodeEquals(msg->sm.rnode, rnode))
- return);
-
- /* OK, add the item */
- msg.sm.id = SHAREDINVALSMGR_ID;
- msg.sm.rnode = rnode;
- AddInvalidationMessage(&hdr->rclist, &msg);
-}
-
-/*
* Append one list of invalidation messages to another, resetting
* the source list to empty.
*/
@@ -455,23 +434,6 @@ RegisterRelcacheInvalidation(Oid dbId, Oid relId)
}
/*
- * RegisterSmgrInvalidation
- *
- * As above, but register an smgr invalidation event.
- */
-static void
-RegisterSmgrInvalidation(RelFileNode rnode)
-{
- AddSmgrInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs,
- rnode);
-
- /*
- * As above, just in case there is not an associated catalog change.
- */
- (void) GetCurrentCommandId(true);
-}
-
-/*
* LocalExecuteInvalidationMessage
*
* Process a single invalidation message (which could be of any type).
@@ -606,35 +568,12 @@ PrepareForTupleInvalidation(Relation relation, HeapTuple tuple)
if (tupleRelId == RelationRelationId)
{
Form_pg_class classtup = (Form_pg_class) GETSTRUCT(tuple);
- RelFileNode rnode;
relationId = HeapTupleGetOid(tuple);
if (classtup->relisshared)
databaseId = InvalidOid;
else
databaseId = MyDatabaseId;
-
- /*
- * We need to send out an smgr inval as well as a relcache inval. This
- * is needed because other backends might possibly possess smgr cache
- * but not relcache entries for the target relation.
- *
- * Note: during a pg_class row update that assigns a new relfilenode
- * or reltablespace value, we will be called on both the old and new
- * tuples, and thus will broadcast invalidation messages showing both
- * the old and new RelFileNode values. This ensures that other
- * backends will close smgr references to the old file.
- *
- * XXX possible future cleanup: it might be better to trigger smgr
- * flushes explicitly, rather than indirectly from pg_class updates.
- */
- if (classtup->reltablespace)
- rnode.spcNode = classtup->reltablespace;
- else
- rnode.spcNode = MyDatabaseTableSpace;
- rnode.dbNode = databaseId;
- rnode.relNode = classtup->relfilenode;
- RegisterSmgrInvalidation(rnode);
}
else if (tupleRelId == AttributeRelationId)
{
@@ -902,7 +841,7 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs,
*/
void
ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs,
- int nmsgs, bool RelcacheInitFileInval)
+ int nmsgs, bool RelcacheInitFileInval)
{
Oid dboid = 0;
bool invalidate_global = false;
@@ -1251,10 +1190,6 @@ CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple)
* This is used in places that need to force relcache rebuild but aren't
* changing any of the tuples recognized as contributors to the relcache
* entry by PrepareForTupleInvalidation. (An example is dropping an index.)
- * We assume in particular that relfilenode/reltablespace aren't changing
- * (so the rd_node value is still good).
- *
- * XXX most callers of this probably don't need to force an smgr flush.
*/
void
CacheInvalidateRelcache(Relation relation)
@@ -1269,7 +1204,6 @@ CacheInvalidateRelcache(Relation relation)
databaseId = MyDatabaseId;
RegisterRelcacheInvalidation(databaseId, relationId);
- RegisterSmgrInvalidation(relation->rd_node);
}
/*
@@ -1282,22 +1216,13 @@ CacheInvalidateRelcacheByTuple(HeapTuple classTuple)
Form_pg_class classtup = (Form_pg_class) GETSTRUCT(classTuple);
Oid databaseId;
Oid relationId;
- RelFileNode rnode;
relationId = HeapTupleGetOid(classTuple);
if (classtup->relisshared)
databaseId = InvalidOid;
else
databaseId = MyDatabaseId;
- if (classtup->reltablespace)
- rnode.spcNode = classtup->reltablespace;
- else
- rnode.spcNode = MyDatabaseTableSpace;
- rnode.dbNode = databaseId;
- rnode.relNode = classtup->relfilenode;
-
RegisterRelcacheInvalidation(databaseId, relationId);
- RegisterSmgrInvalidation(rnode);
}
/*
@@ -1320,6 +1245,39 @@ CacheInvalidateRelcacheByRelid(Oid relid)
ReleaseSysCache(tup);
}
+
+/*
+ * CacheInvalidateSmgr
+ * Register invalidation of smgr references to a physical relation.
+ *
+ * Sending this type of invalidation msg forces other backends to close open
+ * smgr entries for the rel. This should be done to flush dangling open-file
+ * references when the physical rel is being dropped or truncated. Because
+ * these are nontransactional (i.e., not-rollback-able) operations, we just
+ * send the inval message immediately without any queuing.
+ *
+ * Note: in most cases there will have been a relcache flush issued against
+ * the rel at the logical level. We need a separate smgr-level flush because
+ * it is possible for backends to have open smgr entries for rels they don't
+ * have a relcache entry for, e.g. because the only thing they ever did with
+ * the rel is write out dirty shared buffers.
+ *
+ * Note: because these messages are nontransactional, they won't be captured
+ * in commit/abort WAL entries. Instead, calls to CacheInvalidateSmgr()
+ * should happen in low-level smgr.c routines, which are executed while
+ * replaying WAL as well as when creating it.
+ */
+void
+CacheInvalidateSmgr(RelFileNode rnode)
+{
+ SharedInvalidationMessage msg;
+
+ msg.sm.id = SHAREDINVALSMGR_ID;
+ msg.sm.rnode = rnode;
+ SendSharedInvalidMessages(&msg, 1);
+}
+
+
/*
* CacheRegisterSyscacheCallback
* Register the specified function to be called for all future
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 30a32d17f9a..ba09331aba9 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.300 2010/01/13 23:07:08 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.301 2010/02/03 01:14:17 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -33,6 +33,7 @@
#include "access/genam.h"
#include "access/reloptions.h"
#include "access/sysattr.h"
+#include "access/transam.h"
#include "access/xact.h"
#include "catalog/catalog.h"
#include "catalog/index.h"
@@ -53,6 +54,7 @@
#include "catalog/pg_trigger.h"
#include "catalog/pg_type.h"
#include "catalog/schemapg.h"
+#include "catalog/storage.h"
#include "commands/trigger.h"
#include "miscadmin.h"
#include "optimizer/clauses.h"
@@ -2377,22 +2379,6 @@ AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
}
}
-/*
- * RelationCacheMarkNewRelfilenode
- *
- * Mark the rel as having been given a new relfilenode in the current
- * (sub) transaction. This is a hint that can be used to optimize
- * later operations on the rel in the same transaction.
- */
-void
-RelationCacheMarkNewRelfilenode(Relation rel)
-{
- /* Mark it... */
- rel->rd_newRelfilenodeSubid = GetCurrentSubTransactionId();
- /* ... and now we have eoxact cleanup work to do */
- need_eoxact_work = true;
-}
-
/*
* RelationBuildLocalRelation
@@ -2562,6 +2548,109 @@ RelationBuildLocalRelation(const char *relname,
return rel;
}
+
+/*
+ * RelationSetNewRelfilenode
+ *
+ * Assign a new relfilenode (physical file name) to the relation.
+ *
+ * This allows a full rewrite of the relation to be done with transactional
+ * safety (since the filenode assignment can be rolled back). Note however
+ * that there is no simple way to access the relation's old data for the
+ * remainder of the current transaction. This limits the usefulness to cases
+ * such as TRUNCATE or rebuilding an index from scratch.
+ *
+ * Caller must already hold exclusive lock on the relation.
+ *
+ * The relation is marked with relfrozenxid = freezeXid (InvalidTransactionId
+ * must be passed for indexes). This should be a lower bound on the XIDs
+ * that will be put into the new relation contents.
+ */
+void
+RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid)
+{
+ Oid newrelfilenode;
+ RelFileNode newrnode;
+ Relation pg_class;
+ HeapTuple tuple;
+ Form_pg_class classform;
+
+ /* Can't change relfilenode for nailed tables (indexes ok though) */
+ Assert(!relation->rd_isnailed ||
+ relation->rd_rel->relkind == RELKIND_INDEX);
+ /* Can't change for shared tables or indexes */
+ Assert(!relation->rd_rel->relisshared);
+ /* Indexes must have Invalid frozenxid; other relations must not */
+ Assert((relation->rd_rel->relkind == RELKIND_INDEX &&
+ freezeXid == InvalidTransactionId) ||
+ TransactionIdIsNormal(freezeXid));
+
+ /* Allocate a new relfilenode */
+ newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace,
+ relation->rd_rel->relisshared,
+ NULL);
+
+ /*
+ * Find the pg_class tuple for the given relation. This is not used
+ * during bootstrap, so okay to use heap_update always.
+ */
+ pg_class = heap_open(RelationRelationId, RowExclusiveLock);
+
+ tuple = SearchSysCacheCopy(RELOID,
+ ObjectIdGetDatum(RelationGetRelid(relation)),
+ 0, 0, 0);
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "could not find tuple for relation %u",
+ RelationGetRelid(relation));
+ classform = (Form_pg_class) GETSTRUCT(tuple);
+
+ /*
+ * Create storage for the main fork of the new relfilenode.
+ *
+ * NOTE: any conflict in relfilenode value will be caught here, if
+ * GetNewRelFileNode messes up for any reason.
+ */
+ newrnode = relation->rd_node;
+ newrnode.relNode = newrelfilenode;
+ RelationCreateStorage(newrnode, relation->rd_istemp);
+ smgrclosenode(newrnode);
+
+ /*
+ * Schedule unlinking of the old storage at transaction commit.
+ */
+ RelationDropStorage(relation);
+
+ /*
+ * Now update the pg_class row.
+ */
+ classform->relfilenode = newrelfilenode;
+ classform->relpages = 0; /* it's empty until further notice */
+ classform->reltuples = 0;
+ classform->relfrozenxid = freezeXid;
+ simple_heap_update(pg_class, &tuple->t_self, tuple);
+ CatalogUpdateIndexes(pg_class, tuple);
+
+ heap_freetuple(tuple);
+
+ heap_close(pg_class, RowExclusiveLock);
+
+ /*
+ * Make the pg_class row change visible. This will cause the relcache
+ * entry to get updated, too.
+ */
+ CommandCounterIncrement();
+
+ /*
+ * Mark the rel as having been given a new relfilenode in the current
+ * (sub) transaction. This is a hint that can be used to optimize
+ * later operations on the rel in the same transaction.
+ */
+ relation->rd_newRelfilenodeSubid = GetCurrentSubTransactionId();
+ /* ... and now we have eoxact cleanup work to do */
+ need_eoxact_work = true;
+}
+
+
/*
* RelationCacheInitialize
*