aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/cache/relcache.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2003-09-24 18:54:02 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2003-09-24 18:54:02 +0000
commita56a016ceb612cdee1ddc5990682f36d541e5b07 (patch)
treec496319424e0445562dd42ee7229e6d87567996f /src/backend/utils/cache/relcache.c
parent5f78c6a886a22209dee62de0c13edd6a68453011 (diff)
downloadpostgresql-a56a016ceb612cdee1ddc5990682f36d541e5b07.tar.gz
postgresql-a56a016ceb612cdee1ddc5990682f36d541e5b07.zip
Repair some REINDEX problems per recent discussions. The relcache is
now able to cope with assigning new relfilenode values to nailed-in-cache indexes, so they can be reindexed using the fully crash-safe method. This leaves only shared system indexes as special cases. Remove the 'index deactivation' code, since it provides no useful protection in the shared- index case. Require reindexing of shared indexes to be done in standalone mode, but remove other restrictions on REINDEX. -P (IgnoreSystemIndexes) now prevents using indexes for lookups, but does not disable index updates. It is therefore safe to allow from PGOPTIONS. Upshot: reindexing system catalogs can be done without a standalone backend for all cases except shared catalogs.
Diffstat (limited to 'src/backend/utils/cache/relcache.c')
-rw-r--r--src/backend/utils/cache/relcache.c204
1 files changed, 163 insertions, 41 deletions
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 592a99faa7d..3c4cb46a74f 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/cache/relcache.c,v 1.188 2003/08/04 02:40:06 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/cache/relcache.c,v 1.189 2003/09/24 18:54:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -279,9 +279,7 @@ static HTAB *OpClassCache = NULL;
static void RelationClearRelation(Relation relation, bool rebuild);
-#ifdef ENABLE_REINDEX_NAILED_RELATIONS
static void RelationReloadClassinfo(Relation relation);
-#endif /* ENABLE_REINDEX_NAILED_RELATIONS */
static void RelationFlushRelation(Relation relation);
static Relation RelationSysNameCacheGetRelation(const char *relationName);
static bool load_relcache_init_file(void);
@@ -290,7 +288,7 @@ static void write_relcache_init_file(void);
static void formrdesc(const char *relationName, int natts,
FormData_pg_attribute *att);
-static HeapTuple ScanPgRelation(RelationBuildDescInfo buildinfo);
+static HeapTuple ScanPgRelation(RelationBuildDescInfo buildinfo, bool indexOK);
static Relation AllocateRelationDesc(Relation relation, Form_pg_class relp);
static void RelationBuildTupleDesc(RelationBuildDescInfo buildinfo,
Relation relation);
@@ -322,7 +320,7 @@ static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
* and must eventually be freed with heap_freetuple.
*/
static HeapTuple
-ScanPgRelation(RelationBuildDescInfo buildinfo)
+ScanPgRelation(RelationBuildDescInfo buildinfo, bool indexOK)
{
HeapTuple pg_class_tuple;
Relation pg_class_desc;
@@ -367,11 +365,12 @@ ScanPgRelation(RelationBuildDescInfo buildinfo)
/*
* Open pg_class and fetch a tuple. Force heap scan if we haven't yet
* built the critical relcache entries (this includes initdb and
- * startup without a pg_internal.init file).
+ * startup without a pg_internal.init file). The caller can also
+ * force a heap scan by setting indexOK == false.
*/
pg_class_desc = heap_openr(RelationRelationName, AccessShareLock);
pg_class_scan = systable_beginscan(pg_class_desc, indexRelname,
- criticalRelcachesBuilt,
+ indexOK && criticalRelcachesBuilt,
SnapshotNow,
nkeys, key);
@@ -834,7 +833,7 @@ RelationBuildDesc(RelationBuildDescInfo buildinfo,
/*
* find the tuple in pg_class corresponding to the given relation id
*/
- pg_class_tuple = ScanPgRelation(buildinfo);
+ pg_class_tuple = ScanPgRelation(buildinfo, true);
/*
* if no such tuple exists, return NULL
@@ -875,7 +874,7 @@ RelationBuildDesc(RelationBuildDescInfo buildinfo,
* it could be new too, but it's okay to forget that fact if forced to
* flush the entry.)
*/
- relation->rd_isnailed = false;
+ relation->rd_isnailed = 0;
relation->rd_isnew = false;
relation->rd_istemp = isTempNamespace(relation->rd_rel->relnamespace);
@@ -1386,7 +1385,7 @@ formrdesc(const char *relationName,
* all entries built with this routine are nailed-in-cache; none are
* for new or temp relations.
*/
- relation->rd_isnailed = true;
+ relation->rd_isnailed = 1;
relation->rd_isnew = false;
relation->rd_istemp = false;
@@ -1500,7 +1499,7 @@ formrdesc(const char *relationName,
* Lookup an existing reldesc by OID.
*
* Only try to get the reldesc by looking in the cache,
- * do not go to the disk.
+ * do not go to the disk if it's not present.
*
* NB: relation ref count is incremented if successful.
* Caller should eventually decrement count. (Usually,
@@ -1514,7 +1513,12 @@ RelationIdCacheGetRelation(Oid relationId)
RelationIdCacheLookup(relationId, rd);
if (RelationIsValid(rd))
+ {
RelationIncrementReferenceCount(rd);
+ /* revalidate nailed index if necessary */
+ if (rd->rd_isnailed == 2)
+ RelationReloadClassinfo(rd);
+ }
return rd;
}
@@ -1538,11 +1542,27 @@ RelationSysNameCacheGetRelation(const char *relationName)
RelationSysNameCacheLookup(NameStr(name), rd);
if (RelationIsValid(rd))
+ {
RelationIncrementReferenceCount(rd);
+ /* revalidate nailed index if necessary */
+ if (rd->rd_isnailed == 2)
+ RelationReloadClassinfo(rd);
+ }
return rd;
}
+/*
+ * RelationNodeCacheGetRelation
+ *
+ * As above, but lookup by relfilenode.
+ *
+ * NOTE: this must NOT try to revalidate invalidated nailed indexes, since
+ * that could cause us to return an entry with a different relfilenode than
+ * the caller asked for. Currently this is used only by the buffer manager.
+ * Really the bufmgr's idea of relations should be separated out from the
+ * relcache ...
+ */
Relation
RelationNodeCacheGetRelation(RelFileNode rnode)
{
@@ -1647,39 +1667,60 @@ RelationClose(Relation relation)
#endif
}
-#ifdef ENABLE_REINDEX_NAILED_RELATIONS
/*
- * RelationReloadClassinfo
- *
- * This function is especially for nailed relations.
- * relhasindex/relfilenode could be changed even for
- * nailed relations.
+ * RelationReloadClassinfo - reload the pg_class row (only)
+ *
+ * This function is used only for nailed indexes. Since a REINDEX can
+ * change the relfilenode value for a nailed index, we have to reread
+ * the pg_class row anytime we get an SI invalidation on a nailed index
+ * (without throwing away the whole relcache entry, since we'd be unable
+ * to rebuild it).
+ *
+ * We can't necessarily reread the pg_class row right away; we might be
+ * in a failed transaction when we receive the SI notification. If so,
+ * RelationClearRelation just marks the entry as invalid by setting
+ * rd_isnailed to 2. This routine is called to fix the entry when it
+ * is next needed.
*/
static void
RelationReloadClassinfo(Relation relation)
{
RelationBuildDescInfo buildinfo;
+ bool indexOK;
HeapTuple pg_class_tuple;
Form_pg_class relp;
- if (!relation->rd_rel)
- return;
+ /* Should be called only for invalidated nailed indexes */
+ Assert(relation->rd_isnailed == 2 &&
+ relation->rd_rel->relkind == RELKIND_INDEX);
+ /* Read the pg_class row */
buildinfo.infotype = INFO_RELID;
buildinfo.i.info_id = relation->rd_id;
- pg_class_tuple = ScanPgRelation(buildinfo);
+ /*
+ * Don't try to use an indexscan of pg_class_oid_index to reload the
+ * info for pg_class_oid_index ...
+ */
+ indexOK = strcmp(RelationGetRelationName(relation), ClassOidIndex) != 0;
+ pg_class_tuple = ScanPgRelation(buildinfo, indexOK);
if (!HeapTupleIsValid(pg_class_tuple))
elog(ERROR, "could not find tuple for system relation %u",
relation->rd_id);
- RelationCacheDelete(relation);
relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
- memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);
- relation->rd_node.relNode = relp->relfilenode;
- RelationCacheInsert(relation);
+ if (relation->rd_node.relNode != relp->relfilenode)
+ {
+ /* We have to re-insert the entry into the relcache indexes */
+ RelationCacheDelete(relation);
+ memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);
+ relation->rd_node.relNode = relp->relfilenode;
+ RelationCacheInsert(relation);
+ }
heap_freetuple(pg_class_tuple);
-
- return;
+ /* Must adjust number of blocks after we know the new relfilenode */
+ relation->rd_targblock = InvalidBlockNumber;
+ RelationUpdateNumberOfBlocks(relation);
+ /* Okay, now it's valid again */
+ relation->rd_isnailed = 1;
}
-#endif /* ENABLE_REINDEX_NAILED_RELATIONS */
/*
* RelationClearRelation
@@ -1712,15 +1753,27 @@ RelationClearRelation(Relation relation, bool rebuild)
* Never, never ever blow away a nailed-in system relation, because
* we'd be unable to recover. However, we must update rd_nblocks and
* reset rd_targblock, in case we got called because of a relation
- * cache flush that was triggered by VACUUM.
+ * cache flush that was triggered by VACUUM. If it's a nailed index,
+ * then we need to re-read the pg_class row to see if its relfilenode
+ * changed. We can't necessarily do that here, because we might be in
+ * a failed transaction. We assume it's okay to do it if there are open
+ * references to the relcache entry (cf notes for AtEOXact_RelationCache).
+ * Otherwise just mark the entry as possibly invalid, and it'll be fixed
+ * when next opened.
*/
if (relation->rd_isnailed)
{
- relation->rd_targblock = InvalidBlockNumber;
- RelationUpdateNumberOfBlocks(relation);
-#ifdef ENABLE_REINDEX_NAILED_RELATIONS
- RelationReloadClassinfo(relation);
-#endif /* ENABLE_REINDEX_NAILED_RELATIONS */
+ if (relation->rd_rel->relkind == RELKIND_INDEX)
+ {
+ relation->rd_isnailed = 2; /* needs to be revalidated */
+ if (relation->rd_refcnt > 1)
+ RelationReloadClassinfo(relation);
+ }
+ else
+ {
+ relation->rd_targblock = InvalidBlockNumber;
+ RelationUpdateNumberOfBlocks(relation);
+ }
return;
}
@@ -1928,6 +1981,12 @@ RelationIdInvalidateRelationCacheByRelationId(Oid relationId)
* because (a) during the first pass we won't process any more SI messages,
* so hash_seq_search will complete safely; (b) during the second pass we
* only hold onto pointers to nondeletable entries.
+ *
+ * The two-phase approach also makes it easy to ensure that we process
+ * nailed-in-cache indexes before other nondeletable items, and that we
+ * process pg_class_oid_index first of all. In scenarios where a nailed
+ * index has been given a new relfilenode, we have to detect that update
+ * before the nailed index is used in reloading any other relcache entry.
*/
void
RelationCacheInvalidate(void)
@@ -1935,6 +1994,7 @@ RelationCacheInvalidate(void)
HASH_SEQ_STATUS status;
RelIdCacheEnt *idhentry;
Relation relation;
+ List *rebuildFirstList = NIL;
List *rebuildList = NIL;
List *l;
@@ -1954,15 +2014,33 @@ RelationCacheInvalidate(void)
if (RelationHasReferenceCountZero(relation))
{
/* Delete this entry immediately */
+ Assert(!relation->rd_isnailed);
RelationClearRelation(relation, false);
}
else
{
- /* Add entry to list of stuff to rebuild in second pass */
- rebuildList = lcons(relation, rebuildList);
+ /*
+ * Add this entry to list of stuff to rebuild in second pass.
+ * pg_class_oid_index goes on the front of rebuildFirstList,
+ * other nailed indexes on the back, and everything else into
+ * rebuildList (in no particular order).
+ */
+ if (relation->rd_isnailed &&
+ relation->rd_rel->relkind == RELKIND_INDEX)
+ {
+ if (strcmp(RelationGetRelationName(relation),
+ ClassOidIndex) == 0)
+ rebuildFirstList = lcons(relation, rebuildFirstList);
+ else
+ rebuildFirstList = lappend(rebuildFirstList, relation);
+ }
+ else
+ rebuildList = lcons(relation, rebuildList);
}
}
+ rebuildList = nconc(rebuildFirstList, rebuildList);
+
/* Phase 2: rebuild the items found to need rebuild in phase 1 */
foreach(l, rebuildList)
{
@@ -1976,6 +2054,11 @@ RelationCacheInvalidate(void)
* AtEOXact_RelationCache
*
* Clean up the relcache at transaction commit or abort.
+ *
+ * Note: this must be called *before* processing invalidation messages.
+ * In the case of abort, we don't want to try to rebuild any invalidated
+ * cache entries (since we can't safely do database accesses). Therefore
+ * we must reset refcnts before handling pending invalidations.
*/
void
AtEOXact_RelationCache(bool commit)
@@ -2045,6 +2128,16 @@ AtEOXact_RelationCache(bool commit)
/* abort case, just reset it quietly */
RelationSetReferenceCount(relation, expected_refcnt);
}
+
+ /*
+ * Flush any temporary index list.
+ */
+ if (relation->rd_indexvalid == 2)
+ {
+ freeList(relation->rd_indexlist);
+ relation->rd_indexlist = NIL;
+ relation->rd_indexvalid = 0;
+ }
}
}
@@ -2101,7 +2194,7 @@ RelationBuildLocalRelation(const char *relname,
* want it kicked out. e.g. pg_attribute!!!
*/
if (nailit)
- rel->rd_isnailed = true;
+ rel->rd_isnailed = 1;
/*
* create a new tuple descriptor from the one passed in. We do this
@@ -2288,7 +2381,7 @@ RelationCacheInitializePhase2(void)
buildinfo.infotype = INFO_RELNAME; \
buildinfo.i.info_name = (indname); \
ird = RelationBuildDesc(buildinfo, NULL); \
- ird->rd_isnailed = true; \
+ ird->rd_isnailed = 1; \
RelationSetReferenceCount(ird, 1); \
} while (0)
@@ -2575,7 +2668,7 @@ CheckConstraintFetch(Relation relation)
* The index list is created only if someone requests it. We scan pg_index
* to find relevant indexes, and add the list to the relcache entry so that
* we won't have to compute it again. Note that shared cache inval of a
- * relcache entry will delete the old list and set rd_indexfound to false,
+ * relcache entry will delete the old list and set rd_indexvalid to 0,
* so that we must recompute the index list on next request. This handles
* creation or deletion of an index.
*
@@ -2602,7 +2695,7 @@ RelationGetIndexList(Relation relation)
MemoryContext oldcxt;
/* Quick exit if we already computed the list. */
- if (relation->rd_indexfound)
+ if (relation->rd_indexvalid != 0)
return listCopy(relation->rd_indexlist);
/*
@@ -2638,7 +2731,7 @@ RelationGetIndexList(Relation relation)
/* Now save a copy of the completed list in the relcache entry. */
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
relation->rd_indexlist = listCopy(result);
- relation->rd_indexfound = true;
+ relation->rd_indexvalid = 1;
MemoryContextSwitchTo(oldcxt);
return result;
@@ -2677,6 +2770,35 @@ insert_ordered_oid(List *list, Oid datum)
}
/*
+ * RelationSetIndexList -- externally force the index list contents
+ *
+ * This is used to temporarily override what we think the set of valid
+ * indexes is. The forcing will be valid only until transaction commit
+ * or abort.
+ *
+ * This should only be applied to nailed relations, because in a non-nailed
+ * relation the hacked index list could be lost at any time due to SI
+ * messages. In practice it is only used on pg_class (see REINDEX).
+ *
+ * It is up to the caller to make sure the given list is correctly ordered.
+ */
+void
+RelationSetIndexList(Relation relation, List *indexIds)
+{
+ MemoryContext oldcxt;
+
+ Assert(relation->rd_isnailed == 1);
+ /* Copy the list into the cache context (could fail for lack of mem) */
+ oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
+ indexIds = listCopy(indexIds);
+ MemoryContextSwitchTo(oldcxt);
+ /* Okay to replace old list */
+ freeList(relation->rd_indexlist);
+ relation->rd_indexlist = indexIds;
+ relation->rd_indexvalid = 2; /* mark list as forced */
+}
+
+/*
* RelationGetIndexExpressions -- get the index expressions for an index
*
* We cache the result of transforming pg_index.indexprs into a node tree.
@@ -3087,7 +3209,7 @@ load_relcache_init_file(void)
RelationSetReferenceCount(rel, 1);
else
RelationSetReferenceCount(rel, 0);
- rel->rd_indexfound = false;
+ rel->rd_indexvalid = 0;
rel->rd_indexlist = NIL;
MemSet(&rel->pgstat_info, 0, sizeof(rel->pgstat_info));