46 files changed, 1478 insertions, 1642 deletions
diff --git a/src/backend/access/common/indexvalid.c b/src/backend/access/common/indexvalid.c
index 6a7c08b4506..94e7efd522e 100644
--- a/src/backend/access/common/indexvalid.c
+++ b/src/backend/access/common/indexvalid.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/common/Attic/indexvalid.c,v 1.26 2001/01/24 19:42:47 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/common/Attic/indexvalid.c,v 1.27 2001/07/15 22:48:15 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -24,12 +24,9 @@
  */
 int			NIndexTupleProcessed;
 
+
 /* ----------------
- *		index_keytest
- *
- * old comments
- *		May eventually combine with other tests (like timeranges)?
- *		Should have Buffer buffer; as an argument and pass it to amgetattr.
+ *		index_keytest - does this index tuple satisfy the scan key(s)?
  * ----------------
  */
 bool
@@ -38,16 +35,16 @@ index_keytest(IndexTuple tuple,
 			  int scanKeySize,
 			  ScanKey key)
 {
-	bool		isNull;
-	Datum		datum;
-	Datum		test;
-
 	IncrIndexProcessed();
 
 	while (scanKeySize > 0)
 	{
+		Datum		datum;
+		bool		isNull;
+		Datum		test;
+
 		datum = index_getattr(tuple,
-							  key[0].sk_attno,
+							  key->sk_attno,
 							  tupdesc,
 							  &isNull);
 
@@ -57,25 +54,19 @@ index_keytest(IndexTuple tuple,
 			return false;
 		}
 
-		if (key[0].sk_flags & SK_ISNULL)
+		if (key->sk_flags & SK_ISNULL)
 			return false;
 
-		if (key[0].sk_flags & SK_COMMUTE)
-		{
-			test = FunctionCall2(&key[0].sk_func,
-								 key[0].sk_argument, datum);
-		}
+		if (key->sk_flags & SK_COMMUTE)
+			test = FunctionCall2(&key->sk_func, key->sk_argument, datum);
 		else
-		{
-			test = FunctionCall2(&key[0].sk_func,
-								 datum, key[0].sk_argument);
-		}
+			test = FunctionCall2(&key->sk_func, datum, key->sk_argument);
 
-		if (DatumGetBool(test) == !!(key[0].sk_flags & SK_NEGATE))
+		if (DatumGetBool(test) == !!(key->sk_flags & SK_NEGATE))
 			return false;
 
-		scanKeySize -= 1;
 		key++;
+		scanKeySize--;
 	}
 
 	return true;
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index 9d6e2040f6c..c99c4a7e6e3 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.79 2001/06/11 05:00:56 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.80 2001/07/15 22:48:15 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -43,7 +43,23 @@
 #define RIGHT_ADDED	0x02
 #define BOTH_ADDED	( LEFT_ADDED | RIGHT_ADDED )
 
+
+/* Working state for gistbuild and its callback */
+typedef struct
+{
+	GISTSTATE	giststate;
+	int			numindexattrs;
+	double		indtuples;
+} GISTBuildState;
+
+
 /* non-export function prototypes */
+static void gistbuildCallback(Relation index,
+							  HeapTuple htup,
+							  Datum *attdata,
+							  char *nulls,
+							  bool tupleIsAlive,
+							  void *state);
 static void gistdoinsert(Relation r,
 			 IndexTuple itup,
 			 InsertIndexResult *res,
@@ -89,6 +105,7 @@ static void GISTInitBuffer(Buffer b, uint32 f);
 static OffsetNumber gistchoose(Relation r, Page p,
 		   IndexTuple it,
 		   GISTSTATE *giststate);
+static void gistdelete(Relation r, ItemPointer tid);
 #ifdef GIST_PAGEADDITEM
 static IndexTuple gist_tuple_replacekey(Relation r,
 					  GISTENTRY entry, IndexTuple t);
@@ -116,184 +133,36 @@ gistbuild(PG_FUNCTION_ARGS)
 	Relation	heap = (Relation) PG_GETARG_POINTER(0);
 	Relation	index = (Relation) PG_GETARG_POINTER(1);
 	IndexInfo  *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
-	Node	   *oldPred = (Node *) PG_GETARG_POINTER(3);
-
-#ifdef NOT_USED
-	IndexStrategy istrat = (IndexStrategy) PG_GETARG_POINTER(4);
-
-#endif
-	HeapScanDesc hscan;
-	HeapTuple	htup;
-	IndexTuple	itup;
-	TupleDesc	htupdesc,
-				itupdesc;
-	Datum		attdata[INDEX_MAX_KEYS];
-	char		nulls[INDEX_MAX_KEYS];
-	double		nhtups,
-				nitups;
-	Node	   *pred = indexInfo->ii_Predicate;
-
-#ifndef OMIT_PARTIAL_INDEX
-	TupleTable	tupleTable;
-	TupleTableSlot *slot;
-
-#endif
-	ExprContext *econtext;
-	GISTSTATE	giststate;
-	GISTENTRY	tmpcentry;
-	Buffer		buffer = InvalidBuffer;
-	bool	   *compvec;
-	int			i;
+	double		reltuples;
+	GISTBuildState buildstate;
+	Buffer		buffer;
 
 	/* no locking is needed */
 
-	initGISTstate(&giststate, index);
+	initGISTstate(&buildstate.giststate, index);
 
 	/*
 	 * We expect to be called exactly once for any index relation. If
 	 * that's not the case, big trouble's what we have.
 	 */
-	if (oldPred == NULL && RelationGetNumberOfBlocks(index) != 0)
-		elog(ERROR, "%s already contains data", RelationGetRelationName(index));
-
-	/* initialize the root page (if this is a new index) */
-	if (oldPred == NULL)
-	{
-		buffer = ReadBuffer(index, P_NEW);
-		GISTInitBuffer(buffer, F_LEAF);
-		WriteBuffer(buffer);
-	}
+	if (RelationGetNumberOfBlocks(index) != 0)
+		elog(ERROR, "%s already contains data",
+			 RelationGetRelationName(index));
 
-	/* get tuple descriptors for heap and index relations */
-	htupdesc = RelationGetDescr(heap);
-	itupdesc = RelationGetDescr(index);
-
-	/*
-	 * If this is a predicate (partial) index, we will need to evaluate
-	 * the predicate using ExecQual, which requires the current tuple to
-	 * be in a slot of a TupleTable.  In addition, ExecQual must have an
-	 * ExprContext referring to that slot.	Here, we initialize dummy
-	 * TupleTable and ExprContext objects for this purpose. --Nels, Feb 92
-	 *
-	 * We construct the ExprContext anyway since we need a per-tuple
-	 * temporary memory context for function evaluation -- tgl July 00
-	 */
-#ifndef OMIT_PARTIAL_INDEX
-	if (pred != NULL || oldPred != NULL)
-	{
-		tupleTable = ExecCreateTupleTable(1);
-		slot = ExecAllocTableSlot(tupleTable);
-		ExecSetSlotDescriptor(slot, htupdesc, false);
-	}
-	else
-	{
-		tupleTable = NULL;
-		slot = NULL;
-	}
-	econtext = MakeExprContext(slot, TransactionCommandContext);
-#else
-	econtext = MakeExprContext(NULL, TransactionCommandContext);
-#endif	 /* OMIT_PARTIAL_INDEX */
+	/* initialize the root page */
+	buffer = ReadBuffer(index, P_NEW);
+	GISTInitBuffer(buffer, F_LEAF);
+	WriteBuffer(buffer);
 
 	/* build the index */
-	nhtups = nitups = 0.0;
-
-	compvec = (bool *) palloc(sizeof(bool) * indexInfo->ii_NumIndexAttrs);
-
-	/* start a heap scan */
-	hscan = heap_beginscan(heap, 0, SnapshotNow, 0, (ScanKey) NULL);
-
-	while (HeapTupleIsValid(htup = heap_getnext(hscan, 0)))
-	{
-		MemoryContextReset(econtext->ecxt_per_tuple_memory);
-
-		nhtups += 1.0;
-
-#ifndef OMIT_PARTIAL_INDEX
-
-		/*
-		 * If oldPred != NULL, this is an EXTEND INDEX command, so skip
-		 * this tuple if it was already in the existing partial index
-		 */
-		if (oldPred != NULL)
-		{
-			slot->val = htup;
-			if (ExecQual((List *) oldPred, econtext, false))
-			{
-				nitups += 1.0;
-				continue;
-			}
-		}
-
-		/*
-		 * Skip this tuple if it doesn't satisfy the partial-index
-		 * predicate
-		 */
-		if (pred != NULL)
-		{
-			slot->val = htup;
-			if (!ExecQual((List *) pred, econtext, false))
-				continue;
-		}
-#endif	 /* OMIT_PARTIAL_INDEX */
-
-		nitups += 1.0;
-
-		/*
-		 * For the current heap tuple, extract all the attributes we use
-		 * in this index, and note which are null.
-		 */
-		FormIndexDatum(indexInfo,
-					   htup,
-					   htupdesc,
-					   econtext->ecxt_per_tuple_memory,
-					   attdata,
-					   nulls);
-
-		/* immediately compress keys to normalize */
-		for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
-		{
-			gistcentryinit(&giststate, i, &tmpcentry, attdata[i],
-						   (Relation) NULL, (Page) NULL, (OffsetNumber) 0,
-						   -1 /* size is currently bogus */ , TRUE);
-			if (attdata[i] != tmpcentry.key &&
-				!(giststate.keytypbyval))
-				compvec[i] = TRUE;
-			else
-				compvec[i] = FALSE;
-			attdata[i] = tmpcentry.key;
-		}
-
-		/* form an index tuple and point it at the heap tuple */
-		itup = index_formtuple(itupdesc, attdata, nulls);
-		itup->t_tid = htup->t_self;
-
-		/*
-		 * Since we already have the index relation locked, we call
-		 * gistdoinsert directly.  Normal access method calls dispatch
-		 * through gistinsert, which locks the relation for write.	This
-		 * is the right thing to do if you're inserting single tups, but
-		 * not when you're initializing the whole index at once.
-		 */
-		gistdoinsert(index, itup, NULL, &giststate);
+	buildstate.numindexattrs = indexInfo->ii_NumIndexAttrs;
+	buildstate.indtuples = 0;
 
-		for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
-			if (compvec[i])
-				pfree(DatumGetPointer(attdata[i]));
-
-		pfree(itup);
-	}
+	/* do the heap scan */
+	reltuples = IndexBuildHeapScan(heap, index, indexInfo,
+								   gistbuildCallback, (void *) &buildstate);
 
 	/* okay, all heap tuples are indexed */
-	heap_endscan(hscan);
-
-	pfree(compvec);
-
-#ifndef OMIT_PARTIAL_INDEX
-	if (pred != NULL || oldPred != NULL)
-		ExecDropTupleTable(tupleTable, true);
-#endif	 /* OMIT_PARTIAL_INDEX */
-	FreeExprContext(econtext);
 
 	/*
 	 * Since we just counted the tuples in the heap, we update its stats
@@ -313,14 +182,8 @@ gistbuild(PG_FUNCTION_ARGS)
 
 		heap_close(heap, NoLock);
 		index_close(index);
-		UpdateStats(hrelid, nhtups);
-		UpdateStats(irelid, nitups);
-		if (oldPred != NULL)
-		{
-			if (nitups == nhtups)
-				pred = NULL;
-			UpdateIndexPredicate(irelid, oldPred, pred);
-		}
+		UpdateStats(hrelid, reltuples);
+		UpdateStats(irelid, buildstate.indtuples);
 	}
 
 #ifdef GISTDEBUG
@@ -331,6 +194,63 @@ gistbuild(PG_FUNCTION_ARGS)
 }
 
 /*
+ * Per-tuple callback from IndexBuildHeapScan
+ */
+static void
+gistbuildCallback(Relation index,
+				  HeapTuple htup,
+				  Datum *attdata,
+				  char *nulls,
+				  bool tupleIsAlive,
+				  void *state)
+{
+	GISTBuildState   *buildstate = (GISTBuildState *) state;
+	IndexTuple	itup;
+	bool		compvec[INDEX_MAX_KEYS];
+	GISTENTRY	tmpcentry;
+	int			i;
+
+	/* immediately compress keys to normalize */
+	for (i = 0; i < buildstate->numindexattrs; i++)
+	{
+		gistcentryinit(&buildstate->giststate, i, &tmpcentry, attdata[i],
+					   (Relation) NULL, (Page) NULL, (OffsetNumber) 0,
+					   -1 /* size is currently bogus */ , TRUE);
+		if (attdata[i] != tmpcentry.key &&
+			!(buildstate->giststate.keytypbyval))
+			compvec[i] = TRUE;
+		else
+			compvec[i] = FALSE;
+		attdata[i] = tmpcentry.key;
+	}
+
+	/* form an index tuple and point it at the heap tuple */
+	itup = index_formtuple(RelationGetDescr(index), attdata, nulls);
+	itup->t_tid = htup->t_self;
+
+	/* GIST indexes don't index nulls, see notes in gistinsert */
+	if (! IndexTupleHasNulls(itup))
+	{
+		/*
+		 * Since we already have the index relation locked, we call
+		 * gistdoinsert directly.  Normal access method calls dispatch
+		 * through gistinsert, which locks the relation for write.	This
+		 * is the right thing to do if you're inserting single tups, but
+		 * not when you're initializing the whole index at once.
+		 */
+		gistdoinsert(index, itup, NULL, &buildstate->giststate);
+
+		buildstate->indtuples += 1;
+	}
+
+	for (i = 0; i < buildstate->numindexattrs; i++)
+		if (compvec[i])
+			pfree(DatumGetPointer(attdata[i]));
+
+	pfree(itup);
+}
+
+/*
  *	gistinsert -- wrapper for GiST tuple insertion.
  *
  *	  This is the public interface routine for tuple insertion in GiSTs.
@@ -343,25 +263,28 @@ gistinsert(PG_FUNCTION_ARGS)
 	Datum	   *datum = (Datum *) PG_GETARG_POINTER(1);
 	char	   *nulls = (char *) PG_GETARG_POINTER(2);
 	ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
-
 #ifdef NOT_USED
 	Relation	heapRel = (Relation) PG_GETARG_POINTER(4);
-
 #endif
 	InsertIndexResult res;
 	IndexTuple	itup;
 	GISTSTATE	giststate;
 	GISTENTRY	tmpentry;
 	int			i;
-	bool	   *compvec;
+	bool		compvec[INDEX_MAX_KEYS];
+
+	/*
+	 * Since GIST is not marked "amconcurrent" in pg_am, caller should
+	 * have acquired exclusive lock on index relation.  We need no locking
+	 * here.
+	 */
 
 	initGISTstate(&giststate, r);
 
 	/* immediately compress keys to normalize */
-	compvec = (bool *) palloc(sizeof(bool) * r->rd_att->natts);
 	for (i = 0; i < r->rd_att->natts; i++)
 	{
-		gistcentryinit(&giststate, i,&tmpentry, datum[i],
+		gistcentryinit(&giststate, i, &tmpentry, datum[i],
 					   (Relation) NULL, (Page) NULL, (OffsetNumber) 0,
 					   -1 /* size is currently bogus */ , TRUE);
 		if (datum[i] != tmpentry.key && !(giststate.keytypbyval))
@@ -374,18 +297,24 @@ gistinsert(PG_FUNCTION_ARGS)
 	itup->t_tid = *ht_ctid;
 
 	/*
-	 * Notes in ExecUtils:ExecOpenIndices()
-	 *
-	 * RelationSetLockForWrite(r);
+	 * Currently, GIST indexes do not support indexing NULLs; considerable
+	 * infrastructure work would have to be done to do anything reasonable
+	 * with a NULL.
 	 */
+	if (IndexTupleHasNulls(itup))
+	{
+		res = NULL;
+	}
+	else
+	{
+		res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData));
+		gistdoinsert(r, itup, &res, &giststate);
+	}
 
-	res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData));
-	gistdoinsert(r, itup, &res, &giststate);
 	for (i = 0; i < r->rd_att->natts; i++)
 		if (compvec[i] == TRUE)
 			pfree(DatumGetPointer(datum[i]));
 	pfree(itup);
-	pfree(compvec);
 
 	PG_RETURN_POINTER(res);
 }
@@ -527,9 +456,7 @@ gistlayerinsert(Relation r, BlockNumber blkno,
 
 		/* key is modified, so old version must be deleted */
 		ItemPointerSet(&oldtid, blkno, child);
-		DirectFunctionCall2(gistdelete,
-							PointerGetDatum(r),
-							PointerGetDatum(&oldtid));
+		gistdelete(r, &oldtid);
 	}
 
 	ret = INSERTED;
@@ -1416,29 +1343,31 @@ gistfreestack(GISTSTACK *s)
 
 
 /*
-** remove an entry from a page
-*/
-Datum
-gistdelete(PG_FUNCTION_ARGS)
+ * Retail deletion of a single tuple.
+ *
+ * NB: this is no longer called externally, but is still needed by
+ * gistlayerinsert().  That dependency will have to be fixed if GIST
+ * is ever going to allow concurrent insertions.
+ */
+static void
+gistdelete(Relation r, ItemPointer tid)
 {
-	Relation	r = (Relation) PG_GETARG_POINTER(0);
-	ItemPointer tid = (ItemPointer) PG_GETARG_POINTER(1);
 	BlockNumber blkno;
 	OffsetNumber offnum;
 	Buffer		buf;
 	Page		page;
 
 	/*
-	 * Notes in ExecUtils:ExecOpenIndices() Also note that only vacuum
-	 * deletes index tuples now...
-	 *
-	 * RelationSetLockForWrite(r);
+	 * Since GIST is not marked "amconcurrent" in pg_am, caller should
+	 * have acquired exclusive lock on index relation.  We need no locking
+	 * here.
 	 */
 
 	blkno = ItemPointerGetBlockNumber(tid);
 	offnum = ItemPointerGetOffsetNumber(tid);
 
 	/* adjust any scans that will be affected by this deletion */
+	/* NB: this works only for scans in *this* backend! */
 	gistadjscans(r, GISTOP_DEL, blkno, offnum);
 
 	/* delete the index tuple */
@@ -1448,10 +1377,93 @@ gistdelete(PG_FUNCTION_ARGS)
 	PageIndexTupleDelete(page, offnum);
 
 	WriteBuffer(buf);
+}
 
-	PG_RETURN_VOID();
+/*
+ * Bulk deletion of all index entries pointing to a set of heap tuples.
+ * The set of target tuples is specified via a callback routine that tells
+ * whether any given heap tuple (identified by ItemPointer) is being deleted.
+ *
+ * Result: a palloc'd struct containing statistical info for VACUUM displays.
+ */
+Datum
+gistbulkdelete(PG_FUNCTION_ARGS)
+{
+	Relation	rel = (Relation) PG_GETARG_POINTER(0);
+	IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(1);
+	void	   *callback_state = (void *) PG_GETARG_POINTER(2);
+	IndexBulkDeleteResult *result;
+	BlockNumber	num_pages;
+	double		tuples_removed;
+	double		num_index_tuples;
+	RetrieveIndexResult res;
+	IndexScanDesc iscan;
+
+	tuples_removed = 0;
+	num_index_tuples = 0;
+
+	/*
+	 * Since GIST is not marked "amconcurrent" in pg_am, caller should
+	 * have acquired exclusive lock on index relation.  We need no locking
+	 * here.
+	 */
+
+	/*
+	 * XXX generic implementation --- should be improved!
+	 */
+
+	/* walk through the entire index */
+	iscan = index_beginscan(rel, false, 0, (ScanKey) NULL);
+
+	while ((res = index_getnext(iscan, ForwardScanDirection))
+		   != (RetrieveIndexResult) NULL)
+	{
+		ItemPointer heapptr = &res->heap_iptr;
+
+		if (callback(heapptr, callback_state))
+		{
+			ItemPointer indexptr = &res->index_iptr;
+			BlockNumber blkno;
+			OffsetNumber offnum;
+			Buffer		buf;
+			Page		page;
+
+			blkno = ItemPointerGetBlockNumber(indexptr);
+			offnum = ItemPointerGetOffsetNumber(indexptr);
+
+			/* adjust any scans that will be affected by this deletion */
+			gistadjscans(rel, GISTOP_DEL, blkno, offnum);
+
+			/* delete the index tuple */
+			buf = ReadBuffer(rel, blkno);
+			page = BufferGetPage(buf);
+
+			PageIndexTupleDelete(page, offnum);
+
+			WriteBuffer(buf);
+
+			tuples_removed += 1;
+		}
+		else
+			num_index_tuples += 1;
+
+		pfree(res);
+	}
+
+	index_endscan(iscan);
+
+	/* return statistics */
+	num_pages = RelationGetNumberOfBlocks(rel);
+
+	result = (IndexBulkDeleteResult *) palloc(sizeof(IndexBulkDeleteResult));
+	result->num_pages = num_pages;
+	result->tuples_removed = tuples_removed;
+	result->num_index_tuples = num_index_tuples;
+
+	PG_RETURN_POINTER(result);
 }
 
+
 void
 initGISTstate(GISTSTATE *giststate, Relation index)
 {
diff --git a/src/backend/access/gist/gistscan.c b/src/backend/access/gist/gistscan.c
index 672b121693a..9358692a53c 100644
--- a/src/backend/access/gist/gistscan.c
+++ b/src/backend/access/gist/gistscan.c
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/gist/gistscan.c,v 1.37 2001/06/28 16:00:07 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/gist/gistscan.c,v 1.38 2001/07/15 22:48:15 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -59,13 +59,8 @@ gistbeginscan(PG_FUNCTION_ARGS)
 	ScanKey		key = (ScanKey) PG_GETARG_POINTER(3);
 	IndexScanDesc s;
 
-	/*
-	 * Let index_beginscan does its work...
-	 *
-	 * RelationSetLockForRead(r);
-	 */
-
 	s = RelationGetIndexScan(r, fromEnd, nkeys, key);
+
 	gistregscan(s);
 
 	PG_RETURN_POINTER(s);
@@ -283,6 +278,27 @@ gistdropscan(IndexScanDesc s)
 	pfree(l);
 }
 
+/*
+ * AtEOXact_gist() --- clean up gist subsystem at xact abort or commit.
+ *
+ * This is here because it needs to touch this module's static var GISTScans.
+ */
+void
+AtEOXact_gist(void)
+{
+	/*
+	 * Note: these actions should only be necessary during xact abort; but
+	 * they can't hurt during a commit.
+	 */
+
+	/*
+	 * Reset the active-scans list to empty. We do not need to free the
+	 * list elements, because they're all palloc()'d, so they'll go away
+	 * at end of transaction anyway.
+	 */
+	GISTScans = NULL;
+}
+
 void
 gistadjscans(Relation rel, int op, BlockNumber blkno, OffsetNumber offnum)
 {
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index 9617fcc33a6..9b0e6cf28ee 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.51 2001/05/07 00:43:15 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.52 2001/07/15 22:48:15 tgl Exp $
  *
  * NOTES
  *	  This file contains only the public interface routines.
@@ -21,13 +21,27 @@
 #include "access/genam.h"
 #include "access/hash.h"
 #include "access/heapam.h"
+#include "access/xlogutils.h"
 #include "catalog/index.h"
 #include "executor/executor.h"
 #include "miscadmin.h"
 
+
 bool		BuildingHash = false;
 
-#include "access/xlogutils.h"
+
+/* Working state for hashbuild and its callback */
+typedef struct
+{
+	double		indtuples;
+} HashBuildState;
+
+static void hashbuildCallback(Relation index,
+							  HeapTuple htup,
+							  Datum *attdata,
+							  char *nulls,
+							  bool tupleIsAlive,
+							  void *state);
 
 
 /*
@@ -44,161 +58,32 @@ hashbuild(PG_FUNCTION_ARGS)
 	Relation	heap = (Relation) PG_GETARG_POINTER(0);
 	Relation	index = (Relation) PG_GETARG_POINTER(1);
 	IndexInfo  *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
-	Node	   *oldPred = (Node *) PG_GETARG_POINTER(3);
-
-#ifdef NOT_USED
-	IndexStrategy istrat = (IndexStrategy) PG_GETARG_POINTER(4);
-
-#endif
-	HeapScanDesc hscan;
-	HeapTuple	htup;
-	IndexTuple	itup;
-	TupleDesc	htupdesc,
-				itupdesc;
-	Datum		attdata[INDEX_MAX_KEYS];
-	char		nulls[INDEX_MAX_KEYS];
-	double		nhtups,
-				nitups;
-	HashItem	hitem;
-	Node	   *pred = indexInfo->ii_Predicate;
-
-#ifndef OMIT_PARTIAL_INDEX
-	TupleTable	tupleTable;
-	TupleTableSlot *slot;
+	double		reltuples;
+	HashBuildState buildstate;
 
-#endif
-	ExprContext *econtext;
-	InsertIndexResult res = NULL;
-
-	/* note that this is a new hash */
+	/* set flag to disable locking */
 	BuildingHash = true;
 
-	/* initialize the hash index metadata page (if this is a new index) */
-	if (oldPred == NULL)
-		_hash_metapinit(index);
-
-	/* get tuple descriptors for heap and index relations */
-	htupdesc = RelationGetDescr(heap);
-	itupdesc = RelationGetDescr(index);
-
 	/*
-	 * If this is a predicate (partial) index, we will need to evaluate
-	 * the predicate using ExecQual, which requires the current tuple to
-	 * be in a slot of a TupleTable.  In addition, ExecQual must have an
-	 * ExprContext referring to that slot.	Here, we initialize dummy
-	 * TupleTable and ExprContext objects for this purpose. --Nels, Feb 92
-	 *
-	 * We construct the ExprContext anyway since we need a per-tuple
-	 * temporary memory context for function evaluation -- tgl July 00
+	 * We expect to be called exactly once for any index relation. If
+	 * that's not the case, big trouble's what we have.
 	 */
-#ifndef OMIT_PARTIAL_INDEX
-	if (pred != NULL || oldPred != NULL)
-	{
-		tupleTable = ExecCreateTupleTable(1);
-		slot = ExecAllocTableSlot(tupleTable);
-		ExecSetSlotDescriptor(slot, htupdesc, false);
-	}
-	else
-	{
-		tupleTable = NULL;
-		slot = NULL;
-	}
-	econtext = MakeExprContext(slot, TransactionCommandContext);
-#else
-	econtext = MakeExprContext(NULL, TransactionCommandContext);
-#endif	 /* OMIT_PARTIAL_INDEX */
-
-	/* build the index */
-	nhtups = nitups = 0.0;
-
-	/* start a heap scan */
-	hscan = heap_beginscan(heap, 0, SnapshotNow, 0, (ScanKey) NULL);
+	if (RelationGetNumberOfBlocks(index) != 0)
+		elog(ERROR, "%s already contains data",
+			 RelationGetRelationName(index));
 
-	while (HeapTupleIsValid(htup = heap_getnext(hscan, 0)))
-	{
-		MemoryContextReset(econtext->ecxt_per_tuple_memory);
+	/* initialize the hash index metadata page */
+	_hash_metapinit(index);
 
-		nhtups += 1.0;
-
-#ifndef OMIT_PARTIAL_INDEX
-
-		/*
-		 * If oldPred != NULL, this is an EXTEND INDEX command, so skip
-		 * this tuple if it was already in the existing partial index
-		 */
-		if (oldPred != NULL)
-		{
-			slot->val = htup;
-			if (ExecQual((List *) oldPred, econtext, false))
-			{
-				nitups += 1.0;
-				continue;
-			}
-		}
-
-		/*
-		 * Skip this tuple if it doesn't satisfy the partial-index
-		 * predicate
-		 */
-		if (pred != NULL)
-		{
-			slot->val = htup;
-			if (!ExecQual((List *) pred, econtext, false))
-				continue;
-		}
-#endif	 /* OMIT_PARTIAL_INDEX */
-
-		nitups += 1.0;
-
-		/*
-		 * For the current heap tuple, extract all the attributes we use
-		 * in this index, and note which are null.
-		 */
-		FormIndexDatum(indexInfo,
-					   htup,
-					   htupdesc,
-					   econtext->ecxt_per_tuple_memory,
-					   attdata,
-					   nulls);
-
-		/* form an index tuple and point it at the heap tuple */
-		itup = index_formtuple(itupdesc, attdata, nulls);
-
-		/*
-		 * If the single index key is null, we don't insert it into the
-		 * index.  Hash tables support scans on '='. Relational algebra
-		 * says that A = B returns null if either A or B is null.  This
-		 * means that no qualification used in an index scan could ever
-		 * return true on a null attribute.  It also means that indices
-		 * can't be used by ISNULL or NOTNULL scans, but that's an
-		 * artifact of the strategy map architecture chosen in 1986, not
-		 * of the way nulls are handled here.
-		 */
-
-		if (IndexTupleHasNulls(itup))
-		{
-			pfree(itup);
-			continue;
-		}
-
-		itup->t_tid = htup->t_self;
-		hitem = _hash_formitem(itup);
-
-		res = _hash_doinsert(index, hitem);
-
-		pfree(hitem);
-		pfree(itup);
-		pfree(res);
-	}
+	/* build the index */
+	buildstate.indtuples = 0;
 
-	/* okay, all heap tuples are indexed */
-	heap_endscan(hscan);
+	/* do the heap scan */
+	reltuples = IndexBuildHeapScan(heap, index, indexInfo,
+								   hashbuildCallback, (void *) &buildstate);
 
-#ifndef OMIT_PARTIAL_INDEX
-	if (pred != NULL || oldPred != NULL)
-		ExecDropTupleTable(tupleTable, true);
-#endif	 /* OMIT_PARTIAL_INDEX */
-	FreeExprContext(econtext);
+	/* all done */
+	BuildingHash = false;
 
 	/*
 	 * Since we just counted the tuples in the heap, we update its stats
@@ -218,23 +103,54 @@ hashbuild(PG_FUNCTION_ARGS)
 
 		heap_close(heap, NoLock);
 		index_close(index);
-		UpdateStats(hrelid, nhtups);
-		UpdateStats(irelid, nitups);
-		if (oldPred != NULL)
-		{
-			if (nitups == nhtups)
-				pred = NULL;
-			UpdateIndexPredicate(irelid, oldPred, pred);
-		}
+		UpdateStats(hrelid, reltuples);
+		UpdateStats(irelid, buildstate.indtuples);
 	}
 
-	/* all done */
-	BuildingHash = false;
-
 	PG_RETURN_VOID();
 }
 
 /*
+ * Per-tuple callback from IndexBuildHeapScan
+ */
+static void
+hashbuildCallback(Relation index,
+				  HeapTuple htup,
+				  Datum *attdata,
+				  char *nulls,
+				  bool tupleIsAlive,
+				  void *state)
+{
+	HashBuildState   *buildstate = (HashBuildState *) state;
+	IndexTuple	itup;
+	HashItem	hitem;
+	InsertIndexResult res;
+
+	/* form an index tuple and point it at the heap tuple */
+	itup = index_formtuple(RelationGetDescr(index), attdata, nulls);
+	itup->t_tid = htup->t_self;
+
+	/* Hash indexes don't index nulls, see notes in hashinsert */
+	if (IndexTupleHasNulls(itup))
+	{
+		pfree(itup);
+		return;
+	}
+
+	hitem = _hash_formitem(itup);
+
+	res = _hash_doinsert(index, hitem);
+
+	if (res)
+		pfree(res);
+
+	buildstate->indtuples += 1;
+
+	pfree(hitem);
+	pfree(itup);
+}
+
+/*
  *	hashinsert() -- insert an index tuple into a hash table.
  *
  *	Hash on the index tuple's key, find the appropriate location
@@ -248,10 +164,8 @@ hashinsert(PG_FUNCTION_ARGS)
 	Datum	   *datum = (Datum *) PG_GETARG_POINTER(1);
 	char	   *nulls = (char *) PG_GETARG_POINTER(2);
 	ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
-
 #ifdef NOT_USED
 	Relation	heapRel = (Relation) PG_GETARG_POINTER(4);
-
 #endif
 	InsertIndexResult res;
 	HashItem	hitem;
@@ -261,8 +175,21 @@ hashinsert(PG_FUNCTION_ARGS)
 	itup = index_formtuple(RelationGetDescr(rel), datum, nulls);
 	itup->t_tid = *ht_ctid;
 
+	/*
+	 * If the single index key is null, we don't insert it into the
+	 * index.  Hash tables support scans on '='. Relational algebra
+	 * says that A = B returns null if either A or B is null.  This
+	 * means that no qualification used in an index scan could ever
+	 * return true on a null attribute.  It also means that indices
+	 * can't be used by ISNULL or NOTNULL scans, but that's an
+	 * artifact of the strategy map architecture chosen in 1986, not
+	 * of the way nulls are handled here.
+	 */
 	if (IndexTupleHasNulls(itup))
+	{
+		pfree(itup);
 		PG_RETURN_POINTER((InsertIndexResult) NULL);
+	}
 
 	hitem = _hash_formitem(itup);
 
@@ -471,22 +398,74 @@ hashrestrpos(PG_FUNCTION_ARGS)
 	PG_RETURN_VOID();
 }
 
-/* stubs */
+/*
+ * Bulk deletion of all index entries pointing to a set of heap tuples.
+ * The set of target tuples is specified via a callback routine that tells
+ * whether any given heap tuple (identified by ItemPointer) is being deleted.
+ *
+ * Result: a palloc'd struct containing statistical info for VACUUM displays.
+ */
 Datum
-hashdelete(PG_FUNCTION_ARGS)
+hashbulkdelete(PG_FUNCTION_ARGS)
 {
 	Relation	rel = (Relation) PG_GETARG_POINTER(0);
-	ItemPointer tid = (ItemPointer) PG_GETARG_POINTER(1);
+	IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(1);
+	void	   *callback_state = (void *) PG_GETARG_POINTER(2);
+	IndexBulkDeleteResult *result;
+	BlockNumber	num_pages;
+	double		tuples_removed;
+	double		num_index_tuples;
+	RetrieveIndexResult res;
+	IndexScanDesc iscan;
 
-	/* adjust any active scans that will be affected by this deletion */
-	_hash_adjscans(rel, tid);
+	tuples_removed = 0;
+	num_index_tuples = 0;
 
-	/* delete the data from the page */
-	_hash_pagedel(rel, tid);
+	/*
+	 * XXX generic implementation --- should be improved!
+	 */
 
-	PG_RETURN_VOID();
+	/* walk through the entire index */
+	iscan = index_beginscan(rel, false, 0, (ScanKey) NULL);
+
+	while ((res = index_getnext(iscan, ForwardScanDirection))
+		   != (RetrieveIndexResult) NULL)
+	{
+		ItemPointer heapptr = &res->heap_iptr;
+
+		if (callback(heapptr, callback_state))
+		{
+			ItemPointer indexptr = &res->index_iptr;
+
+			/* adjust any active scans that will be affected by deletion */
+			/* (namely, my own scan) */
+			_hash_adjscans(rel, indexptr);
+
+			/* delete the data from the page */
+			_hash_pagedel(rel, indexptr);
+
+			tuples_removed += 1;
+		}
+		else
+			num_index_tuples += 1;
+
+		pfree(res);
+	}
+
+	index_endscan(iscan);
+
+	/* return statistics */
+	num_pages = RelationGetNumberOfBlocks(rel);
+
+	result = (IndexBulkDeleteResult *) palloc(sizeof(IndexBulkDeleteResult));
+	result->num_pages = num_pages;
+	result->tuples_removed = tuples_removed;
+	result->num_index_tuples = num_index_tuples;
+
+	PG_RETURN_POINTER(result);
 }
 
+
 void
 hash_redo(XLogRecPtr lsn, XLogRecord *record)
 {
diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c
index 8e2ed1bb8af..c9fb065dbd2 100644
--- a/src/backend/access/hash/hashovfl.c
+++ b/src/backend/access/hash/hashovfl.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.29 2001/03/07 21:20:26 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.30 2001/07/15 22:48:15 tgl Exp $
  *
  * NOTES
  *	  Overflow pages look like ordinary relation pages.
@@ -112,14 +112,14 @@ _hash_getovfladdr(Relation rel, Buffer *metabufp)
 
 	metap = (HashMetaPage) _hash_chgbufaccess(rel, metabufp, HASH_READ, HASH_WRITE);
 
-	splitnum = metap->OVFL_POINT;
-	max_free = metap->SPARES[splitnum];
+	splitnum = metap->hashm_ovflpoint;
+	max_free = metap->hashm_spares[splitnum];
 
 	free_page = (max_free - 1) >> (metap->hashm_bshift + BYTE_TO_BIT);
 	free_bit = (max_free - 1) & (BMPGSZ_BIT(metap) - 1);
 
 	/* Look through all the free maps to find the first free block */
-	first_page = metap->LAST_FREED >> (metap->hashm_bshift + BYTE_TO_BIT);
+	first_page = metap->hashm_lastfreed >> (metap->hashm_bshift + BYTE_TO_BIT);
 	for (i = first_page; i <= free_page; i++)
 	{
 		Page		mappage;
@@ -138,7 +138,7 @@ _hash_getovfladdr(Relation rel, Buffer *metabufp)
 
 		if (i == first_page)
 		{
-			bit = metap->LAST_FREED & (BMPGSZ_BIT(metap) - 1);
+			bit = metap->hashm_lastfreed & (BMPGSZ_BIT(metap) - 1);
 			j = bit / BITS_PER_MAP;
 			bit = bit & ~(BITS_PER_MAP - 1);
 		}
@@ -153,10 +153,10 @@ _hash_getovfladdr(Relation rel, Buffer *metabufp)
 	}
 
 	/* No Free Page Found - have to allocate a new page */
-	metap->LAST_FREED = metap->SPARES[splitnum];
-	metap->SPARES[splitnum]++;
-	offset = metap->SPARES[splitnum] -
-		(splitnum ? metap->SPARES[splitnum - 1] : 0);
+	metap->hashm_lastfreed = metap->hashm_spares[splitnum];
+	metap->hashm_spares[splitnum]++;
+	offset = metap->hashm_spares[splitnum] -
+		(splitnum ? metap->hashm_spares[splitnum - 1] : 0);
 
 #define OVMSG	"HASH: Out of overflow pages.  Out of luck.\n"
 
@@ -164,9 +164,9 @@ _hash_getovfladdr(Relation rel, Buffer *metabufp)
 	{
 		if (++splitnum >= NCACHED)
 			elog(ERROR, OVMSG);
-		metap->OVFL_POINT = splitnum;
-		metap->SPARES[splitnum] = metap->SPARES[splitnum - 1];
-		metap->SPARES[splitnum - 1]--;
+		metap->hashm_ovflpoint = splitnum;
+		metap->hashm_spares[splitnum] = metap->hashm_spares[splitnum - 1];
+		metap->hashm_spares[splitnum - 1]--;
 		offset = 0;
 	}
 
@@ -194,15 +194,15 @@ _hash_getovfladdr(Relation rel, Buffer *metabufp)
 		if (_hash_initbitmap(rel, metap, OADDR_OF(splitnum, offset),
 							 1, free_page))
 			elog(ERROR, "overflow_page: problem with _hash_initbitmap.");
-		metap->SPARES[splitnum]++;
+		metap->hashm_spares[splitnum]++;
 		offset++;
 		if (offset > SPLITMASK)
 		{
 			if (++splitnum >= NCACHED)
 				elog(ERROR, OVMSG);
-			metap->OVFL_POINT = splitnum;
-			metap->SPARES[splitnum] = metap->SPARES[splitnum - 1];
-			metap->SPARES[splitnum - 1]--;
+			metap->hashm_ovflpoint = splitnum;
+			metap->hashm_spares[splitnum] = metap->hashm_spares[splitnum - 1];
+			metap->hashm_spares[splitnum - 1]--;
 			offset = 0;
 		}
 	}
@@ -235,13 +235,13 @@ found:
 	 */
 
 	bit = 1 + bit + (i * BMPGSZ_BIT(metap));
-	if (bit >= metap->LAST_FREED)
-		metap->LAST_FREED = bit - 1;
+	if (bit >= metap->hashm_lastfreed)
+		metap->hashm_lastfreed = bit - 1;
 
 	/* Calculate the split number for this page */
-	for (i = 0; (i < splitnum) && (bit > metap->SPARES[i]); i++)
+	for (i = 0; (i < splitnum) && (bit > metap->hashm_spares[i]); i++)
 		;
-	offset = (i ? bit - metap->SPARES[i - 1] : bit);
+	offset = (i ? bit - metap->hashm_spares[i - 1] : bit);
 	if (offset >= SPLITMASK)
 		elog(ERROR, OVMSG);
 
@@ -355,10 +355,10 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
 	 * element hashm_mapp[bitmappage].
 	 */
 	splitnum = (addr >> SPLITSHIFT);
-	ovflpgno = (splitnum ? metap->SPARES[splitnum - 1] : 0) + (addr & SPLITMASK) - 1;
+	ovflpgno = (splitnum ? metap->hashm_spares[splitnum - 1] : 0) + (addr & SPLITMASK) - 1;
 
-	if (ovflpgno < metap->LAST_FREED)
-		metap->LAST_FREED = ovflpgno;
+	if (ovflpgno < metap->hashm_lastfreed)
+		metap->hashm_lastfreed = ovflpgno;
 
 	bitmappage = (ovflpgno >> (metap->hashm_bshift + BYTE_TO_BIT));
 	bitmapbit = ovflpgno & (BMPGSZ_BIT(metap) - 1);
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c
index d1b3aaa2325..b8c520e3c0d 100644
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.31 2001/06/27 23:31:37 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.32 2001/07/15 22:48:15 tgl Exp $
  *
  * NOTES
  *	  Postgres hash pages look like ordinary relation pages.  The opaque
@@ -18,7 +18,7 @@
  *	  address of the page if it is an overflow page.
  *
  *	  The first page in a hash relation, page zero, is special -- it stores
- *	  information describing the hash table; it is referred to as teh
+ *	  information describing the hash table; it is referred to as the
  *	  "meta page." Pages one and higher store the actual data.
  *
  *-------------------------------------------------------------------------
@@ -48,6 +48,19 @@ static void _hash_splitpage(Relation rel, Buffer metabuf, Bucket obucket, Bucket
  *	before the lock table is fully initialized, so we can't use it.
  *	Strictly speaking, this violates 2pl, but we don't do 2pl on the
  *	system catalogs anyway.
+ *
+ *	Note that our page locks are actual lockmanager locks, not buffer
+ *	locks (as are used by btree, for example).  This is a good idea because
+ *	the algorithms are not deadlock-free, and we'd better be able to detect
+ *	and recover from deadlocks.
+ *
+ *	Another important difference from btree is that a hash indexscan
+ *	retains both a lock and a buffer pin on the current index page
+ *	between hashgettuple() calls (btree keeps only a buffer pin).
+ *	Because of this, it's safe to do item deletions with only a regular
+ *	write lock on a hash page --- there cannot be an indexscan stopped on
+ *	the page being deleted, other than an indexscan of our own backend,
+ *	which will be taken care of by _hash_adjscans.
  */
 
 
@@ -350,6 +363,16 @@ _hash_unsetpagelock(Relation rel,
 	}
 }
 
+/*
+ * Delete a hash index item.
+ *
+ * It is safe to delete an item after acquiring a regular WRITE lock on
+ * the page, because no other backend can hold a READ lock on the page,
+ * and that means no other backend currently has an indexscan stopped on
+ * any item of the item being deleted.  Our own backend might have such
+ * an indexscan (in fact *will*, since that's how VACUUM found the item
+ * in the first place), but _hash_adjscans will fix the scan position.
+ */
 void
 _hash_pagedel(Relation rel, ItemPointer tid)
 {
@@ -384,7 +407,7 @@ _hash_pagedel(Relation rel, ItemPointer tid)
 	metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE);
 	metap = (HashMetaPage) BufferGetPage(metabuf);
 	_hash_checkpage((Page) metap, LH_META_PAGE);
-	++metap->hashm_nkeys;
+	metap->hashm_nkeys--;
 	_hash_wrtbuf(rel, metabuf);
 }
 
@@ -402,32 +425,32 @@ _hash_expandtable(Relation rel, Buffer metabuf)
 	_hash_checkpage((Page) metap, LH_META_PAGE);
 
 	metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE);
-	new_bucket = ++metap->MAX_BUCKET;
+	new_bucket = ++metap->hashm_maxbucket;
 	metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ);
-	old_bucket = (metap->MAX_BUCKET & metap->LOW_MASK);
+	old_bucket = (metap->hashm_maxbucket & metap->hashm_lowmask);
 
 	/*
-	 * If the split point is increasing (MAX_BUCKET's log base 2 *
+	 * If the split point is increasing (hashm_maxbucket's log base 2 *
 	 * increases), we need to copy the current contents of the spare split
 	 * bucket to the next bucket.
 	 */
-	spare_ndx = _hash_log2(metap->MAX_BUCKET + 1);
-	if (spare_ndx > metap->OVFL_POINT)
+	spare_ndx = _hash_log2(metap->hashm_maxbucket + 1);
+	if (spare_ndx > metap->hashm_ovflpoint)
 	{
 
 		metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE);
-		metap->SPARES[spare_ndx] = metap->SPARES[metap->OVFL_POINT];
-		metap->OVFL_POINT = spare_ndx;
+		metap->hashm_spares[spare_ndx] = metap->hashm_spares[metap->hashm_ovflpoint];
+		metap->hashm_ovflpoint = spare_ndx;
 		metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ);
 	}
 
-	if (new_bucket > metap->HIGH_MASK)
+	if (new_bucket > metap->hashm_highmask)
 	{
 
 		/* Starting a new doubling */
 		metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE);
-		metap->LOW_MASK = metap->HIGH_MASK;
-		metap->HIGH_MASK = new_bucket | metap->LOW_MASK;
+		metap->hashm_lowmask = metap->hashm_highmask;
+		metap->hashm_highmask = new_bucket | metap->hashm_lowmask;
 		metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ);
 
 	}
diff --git a/src/backend/access/hash/hashscan.c b/src/backend/access/hash/hashscan.c
index 649e42fbeb0..f4a91b5710f 100644
--- a/src/backend/access/hash/hashscan.c
+++ b/src/backend/access/hash/hashscan.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hashscan.c,v 1.24 2001/01/24 19:42:47 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hashscan.c,v 1.25 2001/07/15 22:48:15 tgl Exp $
  *
  * NOTES
  *	  Because we can be doing an index scan on a relation while we
@@ -45,6 +45,31 @@ typedef HashScanListData *HashScanList;
 
 static HashScanList HashScans = (HashScanList) NULL;
 
+
+/*
+ * AtEOXact_hash() --- clean up hash subsystem at xact abort or commit.
+ *
+ * This is here because it needs to touch this module's static var HashScans.
+ */
+void
+AtEOXact_hash(void)
+{
+	/*
+	 * Note: these actions should only be necessary during xact abort; but
+	 * they can't hurt during a commit.
+	 */
+
+	/*
+	 * Reset the active-scans list to empty. We do not need to free the
+	 * list elements, because they're all palloc()'d, so they'll go away
+	 * at end of transaction anyway.
+	 */
+	HashScans = NULL;
+
+	/* If we were building a hash, we ain't anymore. */
+	BuildingHash = false;
+}
+
 /*
  *	_Hash_regscan() -- register a new scan.
  */
diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c
index adeccf5cc84..2b6be06168f 100644
--- a/src/backend/access/index/indexam.c
+++ b/src/backend/access/index/indexam.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/index/indexam.c,v 1.51 2001/06/22 19:16:21 wieck Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/index/indexam.c,v 1.52 2001/07/15 22:48:15 tgl Exp $
  *
  * INTERFACE ROUTINES
  *		index_open		- open an index relation by relationId
@@ -18,23 +18,17 @@
  *		index_rescan	- restart a scan of an index
  *		index_endscan	- end a scan
  *		index_insert	- insert an index tuple into a relation
- *		index_delete	- delete an item from an index relation
  *		index_markpos	- mark a scan position
  *		index_restrpos	- restore a scan position
  *		index_getnext	- get the next tuple from a scan
- * **	index_fetch		- retrieve tuple with tid
- * **	index_replace	- replace a tuple
- * **	index_getattr	- get an attribute from an index tuple
- *		index_getprocid - get a support procedure id from the rel tuple
- *
- *		IndexScanIsValid - check index scan
+ *		index_bulk_delete	- bulk deletion of index tuples
+ *		index_cost_estimator	- fetch amcostestimate procedure OID
+ *		index_getprocid - get a support procedure OID
  *
  * NOTES
  *		This file contains the index_ routines which used
  *		to be a scattered collection of stuff in access/genam.
  *
- *		The ** routines: index_fetch, index_replace, and index_getattr
- *		have not yet been implemented.	They may not be needed.
  *
  * old comments
  *		Scans are implemented as follows:
@@ -211,23 +205,6 @@ index_insert(Relation relation,
 }
 
 /* ----------------
- *		index_delete - delete an item from an index relation
- * ----------------
- */
-void
-index_delete(Relation relation, ItemPointer indexItem)
-{
-	RegProcedure procedure;
-
-	RELATION_CHECKS;
-	GET_REL_PROCEDURE(delete, amdelete);
-
-	OidFunctionCall2(procedure,
-					 PointerGetDatum(relation),
-					 PointerGetDatum(indexItem));
-}
-
-/* ----------------
  *		index_beginscan - start a scan of an index
  * ----------------
  */
@@ -379,6 +356,35 @@ index_getnext(IndexScanDesc scan,
 }
 
 /* ----------------
+ *		index_bulk_delete - do mass deletion of index entries
+ *
+ *		callback routine tells whether a given main-heap tuple is
+ *		to be deleted
+ *
+ *		return value is an optional palloc'd struct of statistics
+ * ----------------
+ */
+IndexBulkDeleteResult *
+index_bulk_delete(Relation relation,
+				  IndexBulkDeleteCallback callback,
+				  void *callback_state)
+{
+	RegProcedure procedure;
+	IndexBulkDeleteResult *result;
+
+	RELATION_CHECKS;
+	GET_REL_PROCEDURE(bulk_delete, ambulkdelete);
+
+	result = (IndexBulkDeleteResult *)
+		DatumGetPointer(OidFunctionCall3(procedure,
+										 PointerGetDatum(relation),
+										 PointerGetDatum((Pointer) callback),
+										 PointerGetDatum(callback_state)));
+
+	return result;
+}
+
+/* ----------------
  *		index_cost_estimator
  *
  *		Fetch the amcostestimate procedure OID for an index.
diff --git a/src/backend/access/nbtree/Makefile b/src/backend/access/nbtree/Makefile
index eba9bd4eefe..bdc366dd0ad 100644
--- a/src/backend/access/nbtree/Makefile
+++ b/src/backend/access/nbtree/Makefile
@@ -4,7 +4,7 @@
 #    Makefile for access/nbtree
 #
 # IDENTIFICATION
-#    $Header: /cvsroot/pgsql/src/backend/access/nbtree/Makefile,v 1.10 2000/08/31 16:09:41 petere Exp $
+#    $Header: /cvsroot/pgsql/src/backend/access/nbtree/Makefile,v 1.11 2001/07/15 22:48:16 tgl Exp $
 #
 #-------------------------------------------------------------------------
 
@@ -12,7 +12,7 @@ subdir = src/backend/access/nbtree
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = nbtcompare.o nbtinsert.o nbtpage.o nbtree.o nbtscan.o nbtsearch.o \
+OBJS = nbtcompare.o nbtinsert.o nbtpage.o nbtree.o nbtsearch.o \
        nbtstrat.o nbtutils.o nbtsort.o
 
 all: SUBSYS.o
diff --git a/src/backend/access/nbtree/README b/src/backend/access/nbtree/README
index cff7ff0d655..d8ec739b2a8 100644
--- a/src/backend/access/nbtree/README
+++ b/src/backend/access/nbtree/README
@@ -1,4 +1,4 @@
-$Header: /cvsroot/pgsql/src/backend/access/nbtree/README,v 1.4 2000/07/25 05:26:40 tgl Exp $
+$Header: /cvsroot/pgsql/src/backend/access/nbtree/README,v 1.5 2001/07/15 22:48:16 tgl Exp $
 
 This directory contains a correct implementation of Lehman and Yao's
 high-concurrency B-tree management algorithm (P. Lehman and S. Yao,
@@ -109,15 +109,11 @@ In addition, the following things are handy to know:
    is too high a price).  Rebuilding corrupted indexes during restart
    seems more attractive.
 
-+  On deletions, we need to adjust the position of active scans on
-   the index.  The code in nbtscan.c handles this.  We don't need to
-   do this for insertions or splits because _bt_restscan can find the
-   new position of the previously-found item.  NOTE that nbtscan.c
-   only copes with deletions issued by the current backend.  This
-   essentially means that concurrent deletions are not supported, but
-   that's true already in the Lehman and Yao algorithm.  nbtscan.c
-   exists only to support VACUUM and allow it to delete items while
-   it's scanning the index.
++  Deletions are handled by getting a super-exclusive lock on the target
+   page, so that no other backend has a pin on the page when the deletion
+   starts.  This means no scan is pointing at the page.  This is OK for
+   deleting leaf items, probably not OK for deleting internal nodes;
+   will need to think harder when it's time to support index compaction.
 
 +  "ScanKey" data structures are used in two fundamentally different ways
    in this code.  Searches for the initial position for a scan, as well as
diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c
index 8ffb9b9043c..c91c568ed2f 100644
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.83 2001/06/22 19:16:21 wieck Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.84 2001/07/15 22:48:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -126,7 +126,7 @@ top:
 		if (TransactionIdIsValid(xwait))
 		{
 			/* Have to wait for the other guy ... */
-			_bt_relbuf(rel, buf, BT_WRITE);
+			_bt_relbuf(rel, buf);
 			XactLockTableWait(xwait);
 			/* start over... */
 			_bt_freestack(stack);
@@ -234,7 +234,7 @@ _bt_check_unique(Relation rel, BTItem btitem, Relation heapRel,
 				if (TransactionIdIsValid(xwait))
 				{
 					if (nbuf != InvalidBuffer)
-						_bt_relbuf(rel, nbuf, BT_READ);
+						_bt_relbuf(rel, nbuf);
 					/* Tell _bt_doinsert to wait... */
 					return xwait;
 				}
@@ -263,7 +263,7 @@ _bt_check_unique(Relation rel, BTItem btitem, Relation heapRel,
 				break;
 			nblkno = opaque->btpo_next;
 			if (nbuf != InvalidBuffer)
-				_bt_relbuf(rel, nbuf, BT_READ);
+				_bt_relbuf(rel, nbuf);
 			nbuf = _bt_getbuf(rel, nblkno, BT_READ);
 			page = BufferGetPage(nbuf);
 			opaque = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -273,7 +273,7 @@ _bt_check_unique(Relation rel, BTItem btitem, Relation heapRel,
 	}
 
 	if (nbuf != InvalidBuffer)
-		_bt_relbuf(rel, nbuf, BT_READ);
+		_bt_relbuf(rel, nbuf);
 
 	return NullTransactionId;
 }
@@ -397,7 +397,7 @@ _bt_insertonpg(Relation rel,
 			/* step right one page */
 			BlockNumber rblkno = lpageop->btpo_next;
 
-			_bt_relbuf(rel, buf, BT_WRITE);
+			_bt_relbuf(rel, buf);
 			buf = _bt_getbuf(rel, rblkno, BT_WRITE);
 			page = BufferGetPage(buf);
 			lpageop = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -1175,12 +1175,12 @@ _bt_getstackbuf(Relation rel, BTStack stack, int access)
 		 */
 		if (P_RIGHTMOST(opaque))
 		{
-			_bt_relbuf(rel, buf, access);
+			_bt_relbuf(rel, buf);
 			return (InvalidBuffer);
 		}
 
 		blkno = opaque->btpo_next;
-		_bt_relbuf(rel, buf, access);
+		_bt_relbuf(rel, buf);
 		buf = _bt_getbuf(rel, blkno, access);
 		page = BufferGetPage(buf);
 		opaque = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -1449,7 +1449,7 @@ _bt_fixroot(Relation rel, Buffer oldrootbuf, bool release)
 							   &itup_off, &itup_blkno);
 			/* Keep lock on new "root" buffer ! */
 			if (buf != rootbuf)
-				_bt_relbuf(rel, buf, BT_WRITE);
+				_bt_relbuf(rel, buf);
 			buf = newbuf;
 			page = BufferGetPage(buf);
 			opaque = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -1525,7 +1525,7 @@ _bt_fixtree(Relation rel, BlockNumber blkno)
 			if (P_ISROOT(opaque))
 			{
 				/* Tree is Ok now */
-				_bt_relbuf(rel, buf, BT_WRITE);
+				_bt_relbuf(rel, buf);
 				return;
 			}
 			/* Call _bt_fixroot() if there is no upper level */
@@ -1533,12 +1533,12 @@ _bt_fixtree(Relation rel, BlockNumber blkno)
 			{
 				elog(NOTICE, "bt_fixtree[%s]: fixing root page", RelationGetRelationName(rel));
 				buf = _bt_fixroot(rel, buf, true);
-				_bt_relbuf(rel, buf, BT_WRITE);
+				_bt_relbuf(rel, buf);
 				return;
 			}
 			/* Have to go up one level */
 			pblkno = opaque->btpo_parent;
-			_bt_relbuf(rel, buf, BT_WRITE);
+			_bt_relbuf(rel, buf);
 		}
 		blkno = pblkno;
 	}
@@ -1571,7 +1571,7 @@ _bt_fixlevel(Relation rel, Buffer buf, BlockNumber limit)
 	page = BufferGetPage(buf);
 	/* copy page to temp storage */
 	memmove(tbuf, page, PageGetPageSize(page));
-	_bt_relbuf(rel, buf, BT_READ);
+	_bt_relbuf(rel, buf);
 
 	page = (Page) tbuf;
 	opaque = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -1682,7 +1682,7 @@ _bt_fixlevel(Relation rel, Buffer buf, BlockNumber limit)
 					{
 						if (coff[i] != P_FIRSTDATAKEY(newopaque))
 							elog(ERROR, "bt_fixlevel[%s]: invalid item order(3) (need to recreate index)", RelationGetRelationName(rel));
-						_bt_relbuf(rel, buf, BT_WRITE);
+						_bt_relbuf(rel, buf);
 						buf = newbuf;
 						page = newpage;
 						opaque = newopaque;
@@ -1691,7 +1691,7 @@ _bt_fixlevel(Relation rel, Buffer buf, BlockNumber limit)
 						continue;
 					}
 					/* unfound - need to insert on current page */
-					_bt_relbuf(rel, newbuf, BT_WRITE);
+					_bt_relbuf(rel, newbuf);
 				}
 				/* insert pointer */
 				ritem = (BTItem) PageGetItem(cpage[i - 1],
@@ -1718,10 +1718,10 @@ _bt_fixlevel(Relation rel, Buffer buf, BlockNumber limit)
 									   &itup_off, &itup_blkno);
 					/* what buffer we need in ? */
 					if (newitemonleft)
-						_bt_relbuf(rel, newbuf, BT_WRITE);
+						_bt_relbuf(rel, newbuf);
 					else
 					{
-						_bt_relbuf(rel, buf, BT_WRITE);
+						_bt_relbuf(rel, buf);
 						buf = newbuf;
 						page = BufferGetPage(buf);
 						opaque = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -1741,7 +1741,7 @@ _bt_fixlevel(Relation rel, Buffer buf, BlockNumber limit)
 
 			/* copy page with pointer to cblkno[cidx] to temp storage */
 			memmove(tbuf, page, PageGetPageSize(page));
-			_bt_relbuf(rel, buf, BT_WRITE);
+			_bt_relbuf(rel, buf);
 			page = (Page) tbuf;
 			opaque = (BTPageOpaque) PageGetSpecialPointer(page);
 		}
@@ -1751,13 +1751,13 @@ _bt_fixlevel(Relation rel, Buffer buf, BlockNumber limit)
 			goodbye = false;
 
 		/* Pointers to child pages are Ok - right end of child level ? */
-		_bt_relbuf(rel, cbuf[0], BT_READ);
-		_bt_relbuf(rel, cbuf[1], BT_READ);
+		_bt_relbuf(rel, cbuf[0]);
+		_bt_relbuf(rel, cbuf[1]);
 		if (cidx == 1 ||
 			(cidx == 2 && (P_RIGHTMOST(copaque[2]) || goodbye)))
 		{
 			if (cidx == 2)
-				_bt_relbuf(rel, cbuf[2], BT_READ);
+				_bt_relbuf(rel, cbuf[2]);
 			return;
 		}
 		if (cblkno[0] == limit || cblkno[1] == limit)
@@ -1819,7 +1819,7 @@ _bt_fixbranch(Relation rel, BlockNumber lblkno,
 		{
 			if (offnum <= stack.bts_offset)
 				elog(ERROR, "bt_fixbranch[%s]: invalid item order (need to recreate index)", RelationGetRelationName(rel));
-			_bt_relbuf(rel, buf, BT_READ);
+			_bt_relbuf(rel, buf);
 			return;
 		}
 
@@ -1837,7 +1837,7 @@ _bt_fixbranch(Relation rel, BlockNumber lblkno,
 		if (rbuf == InvalidBuffer)
 			elog(ERROR, "bt_fixbranch[%s]: right pointer unfound(2) (need to recreate index)", RelationGetRelationName(rel));
 		rblkno = BufferGetBlockNumber(rbuf);
-		_bt_relbuf(rel, rbuf, BT_READ);
+		_bt_relbuf(rel, rbuf);
 
 		/*
 		 * If we have parent item in true_stack then go up one level and
@@ -1845,7 +1845,7 @@ _bt_fixbranch(Relation rel, BlockNumber lblkno,
 		 */
 		if (true_stack)
 		{
-			_bt_relbuf(rel, buf, BT_READ);
+			_bt_relbuf(rel, buf);
 			blkno = true_stack->bts_blkno;
 			true_stack = true_stack->bts_parent;
 			continue;
@@ -1860,19 +1860,19 @@ _bt_fixbranch(Relation rel, BlockNumber lblkno,
 		if (!BTreeInvalidParent(opaque))
 		{
 			blkno = opaque->btpo_parent;
-			_bt_relbuf(rel, buf, BT_READ);
+			_bt_relbuf(rel, buf);
 			continue;
 		}
 
 		/* Have to switch to excl buf lock and re-check btpo_parent */
-		_bt_relbuf(rel, buf, BT_READ);
+		_bt_relbuf(rel, buf);
 		buf = _bt_getbuf(rel, blkno, BT_WRITE);
 		page = BufferGetPage(buf);
 		opaque = (BTPageOpaque) PageGetSpecialPointer(page);
 		if (!BTreeInvalidParent(opaque))
 		{
 			blkno = opaque->btpo_parent;
-			_bt_relbuf(rel, buf, BT_WRITE);
+			_bt_relbuf(rel, buf);
 			continue;
 		}
 
@@ -1913,7 +1913,7 @@ _bt_fixup(Relation rel, Buffer buf)
 		if (!BTreeInvalidParent(opaque))
 		{
 			blkno = opaque->btpo_parent;
-			_bt_relbuf(rel, buf, BT_WRITE);
+			_bt_relbuf(rel, buf);
 			elog(NOTICE, "bt_fixup[%s]: checking/fixing upper levels", RelationGetRelationName(rel));
 			_bt_fixtree(rel, blkno);
 			return;
@@ -1921,8 +1921,7 @@ _bt_fixup(Relation rel, Buffer buf)
 		if (P_LEFTMOST(opaque))
 			break;
 		blkno = opaque->btpo_prev;
-		LockBuffer(buf, BUFFER_LOCK_UNLOCK);
-		ReleaseBuffer(buf);
+		_bt_relbuf(rel, buf);
 		buf = _bt_getbuf(rel, blkno, BT_WRITE);
 	}
 
@@ -1932,9 +1931,7 @@ _bt_fixup(Relation rel, Buffer buf)
 	 */
 	elog(NOTICE, "bt_fixup[%s]: fixing root page", RelationGetRelationName(rel));
 	buf = _bt_fixroot(rel, buf, true);
-	_bt_relbuf(rel, buf, BT_WRITE);
-
-	return;
+	_bt_relbuf(rel, buf);
 }
 
 static OffsetNumber
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index 67e1407b22b..376274c5621 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.52 2001/06/27 23:31:38 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.53 2001/07/15 22:48:16 tgl Exp $
  *
  *	NOTES
  *	   Postgres btree pages look like ordinary relation pages.	The opaque
@@ -138,7 +138,7 @@ _bt_getroot(Relation rel, int access)
 		/* If access = BT_READ, caller doesn't want us to create root yet */
 		if (access == BT_READ)
 		{
-			_bt_relbuf(rel, metabuf, BT_READ);
+			_bt_relbuf(rel, metabuf);
 			return InvalidBuffer;
 		}
 
@@ -215,14 +215,14 @@ _bt_getroot(Relation rel, int access)
 			 * guarantee no deadlocks, we have to release the metadata
 			 * page and start all over again.
 			 */
-			_bt_relbuf(rel, metabuf, BT_WRITE);
+			_bt_relbuf(rel, metabuf);
 			return _bt_getroot(rel, access);
 		}
 	}
 	else
 	{
 		rootblkno = metad->btm_root;
-		_bt_relbuf(rel, metabuf, BT_READ);		/* done with the meta page */
+		_bt_relbuf(rel, metabuf);		/* done with the meta page */
 
 		rootbuf = _bt_getbuf(rel, rootblkno, BT_READ);
 	}
@@ -270,8 +270,8 @@ _bt_getroot(Relation rel, int access)
 					goto check_parent;
 				}
 				else
-/* someone else already fixed root */
 				{
+					/* someone else already fixed root */
 					LockBuffer(rootbuf, BUFFER_LOCK_UNLOCK);
 					LockBuffer(rootbuf, BT_READ);
 				}
@@ -283,7 +283,7 @@ _bt_getroot(Relation rel, int access)
 			 * chance that parent is root page.
 			 */
 			newrootbuf = _bt_getbuf(rel, rootopaque->btpo_parent, BT_READ);
-			_bt_relbuf(rel, rootbuf, BT_READ);
+			_bt_relbuf(rel, rootbuf);
 			rootbuf = newrootbuf;
 			rootpage = BufferGetPage(rootbuf);
 			rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
@@ -293,7 +293,7 @@ _bt_getroot(Relation rel, int access)
 		}
 
 		/* try again */
-		_bt_relbuf(rel, rootbuf, BT_READ);
+		_bt_relbuf(rel, rootbuf);
 		return _bt_getroot(rel, access);
 	}
 
@@ -350,10 +350,12 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
 /*
  *	_bt_relbuf() -- release a locked buffer.
  *
- * Lock and pin (refcount) are both dropped.
+ * Lock and pin (refcount) are both dropped.  Note that either read or
+ * write lock can be dropped this way, but if we modified the buffer,
+ * this is NOT the right way to release a write lock.
  */
 void
-_bt_relbuf(Relation rel, Buffer buf, int access)
+_bt_relbuf(Relation rel, Buffer buf)
 {
 	LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 	ReleaseBuffer(buf);
@@ -449,24 +451,23 @@ _bt_metaproot(Relation rel, BlockNumber rootbknum, int level)
 }
 
 /*
- * Delete an item from a btree.  It had better be a leaf item...
+ * Delete an item from a btree page.
+ *
+ * This routine assumes that the caller has pinned and locked the buffer,
+ * and will write the buffer afterwards.
  */
 void
-_bt_pagedel(Relation rel, ItemPointer tid)
+_bt_itemdel(Relation rel, Buffer buf, ItemPointer tid)
 {
-	Buffer		buf;
-	Page		page;
-	BlockNumber blkno;
+	Page		page = BufferGetPage(buf);
 	OffsetNumber offno;
 
-	blkno = ItemPointerGetBlockNumber(tid);
 	offno = ItemPointerGetOffsetNumber(tid);
 
-	buf = _bt_getbuf(rel, blkno, BT_WRITE);
-	page = BufferGetPage(buf);
-
 	START_CRIT_SECTION();
+
 	PageIndexTupleDelete(page, offno);
+
 	/* XLOG stuff */
 	{
 		xl_btree_delete xlrec;
@@ -490,8 +491,6 @@ _bt_pagedel(Relation rel, ItemPointer tid)
 		PageSetLSN(page, recptr);
 		PageSetSUI(page, ThisStartUpID);
 	}
-	END_CRIT_SECTION();
 
-	/* write the buffer and release the lock */
-	_bt_wrtbuf(rel, buf);
+	END_CRIT_SECTION();
 }
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index b714296c8f7..b1426456241 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -12,7 +12,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.81 2001/05/18 21:24:17 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.82 2001/07/15 22:48:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -28,11 +28,27 @@
 #include "storage/sinval.h"
 #include "access/xlogutils.h"
 
-bool		BuildingBtree = false;		/* see comment in btbuild() */
-bool		FastBuild = true;	/* use sort/build instead */
 
- /* of insertion build */
+/* Working state for btbuild and its callback */
+typedef struct
+{
+	bool		usefast;
+	bool		isUnique;
+	bool		haveDead;
+	Relation	heapRel;
+	BTSpool    *spool;
+	/*
+	 * spool2 is needed only when the index is an unique index. Dead
+	 * tuples are put into spool2 instead of spool in order to avoid
+	 * uniqueness check.
+	 */
+	BTSpool    *spool2;
+	double		indtuples;
+} BTBuildState;
+
 
+bool		BuildingBtree = false;		/* see comment in btbuild() */
+bool		FastBuild = true;	/* use SORT instead of insertion build */
 
 /*
  * TEMPORARY FLAG FOR TESTING NEW FIX TREE
@@ -41,6 +57,29 @@ bool		FastBuild = true;	/* use sort/build instead */
 bool		FixBTree = true;
 
 static void _bt_restscan(IndexScanDesc scan);
+static void btbuildCallback(Relation index,
+							HeapTuple htup,
+							Datum *attdata,
+							char *nulls,
+							bool tupleIsAlive,
+							void *state);
+
+
+/*
+ * AtEOXact_nbtree() --- clean up nbtree subsystem at xact abort or commit.
+ */
+void
+AtEOXact_nbtree(void)
+{
+	/*
+	 * Note: these actions should only be necessary during xact abort; but
+	 * they can't hurt during a commit.
+	 */
+
+	/* If we were building a btree, we ain't anymore. */
+	BuildingBtree = false;
+}
+
 
 /*
  *	btbuild() -- build a new btree index.
@@ -56,42 +95,10 @@ btbuild(PG_FUNCTION_ARGS)
 	Relation	heap = (Relation) PG_GETARG_POINTER(0);
 	Relation	index = (Relation) PG_GETARG_POINTER(1);
 	IndexInfo  *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
-	Node	   *oldPred = (Node *) PG_GETARG_POINTER(3);
-#ifdef NOT_USED
-	IndexStrategy istrat = (IndexStrategy) PG_GETARG_POINTER(4);
-#endif
-	HeapScanDesc hscan;
-	HeapTuple	htup;
-	IndexTuple	itup;
-	TupleDesc	htupdesc,
-				itupdesc;
-	Datum		attdata[INDEX_MAX_KEYS];
-	char		nulls[INDEX_MAX_KEYS];
-	double		nhtups,
-				nitups;
-	Node	   *pred = indexInfo->ii_Predicate;
-#ifndef OMIT_PARTIAL_INDEX
-	TupleTable	tupleTable;
-	TupleTableSlot *slot;
-#endif
-	ExprContext *econtext;
-	InsertIndexResult res = NULL;
-	BTSpool    *spool = NULL;
-	BTItem		btitem;
-	bool		usefast;
-	Snapshot	snapshot;
-	TransactionId XmaxRecent;
+	double		reltuples;
+	BTBuildState buildstate;
 
-	/*
-	 * spool2 is needed only when the index is an unique index. Dead
-	 * tuples are put into spool2 instead of spool in order to avoid
-	 * uniqueness check.
-	 */
-	BTSpool    *spool2 = NULL;
-	bool		tupleIsAlive;
-	int			dead_count;
-
-	/* note that this is a new btree */
+	/* set flag to disable locking */
 	BuildingBtree = true;
 
 	/*
@@ -100,220 +107,63 @@ btbuild(PG_FUNCTION_ARGS)
 	 * look harder at this.  (there is some kind of incremental processing
 	 * going on there.) -- pma 08/29/95
 	 */
-	usefast = (FastBuild && IsNormalProcessingMode());
+	buildstate.usefast = (FastBuild && IsNormalProcessingMode());
+	buildstate.isUnique = indexInfo->ii_Unique;
+	buildstate.haveDead = false;
+	buildstate.heapRel = heap;
+	buildstate.spool = NULL;
+	buildstate.spool2 = NULL;
+	buildstate.indtuples = 0;
 
 #ifdef BTREE_BUILD_STATS
 	if (Show_btree_build_stats)
 		ResetUsage();
 #endif	 /* BTREE_BUILD_STATS */
 
-	/* initialize the btree index metadata page (if this is a new index) */
-	if (oldPred == NULL)
-		_bt_metapinit(index);
-
-	/* get tuple descriptors for heap and index relations */
-	htupdesc = RelationGetDescr(heap);
-	itupdesc = RelationGetDescr(index);
-
 	/*
-	 * If this is a predicate (partial) index, we will need to evaluate
-	 * the predicate using ExecQual, which requires the current tuple to
-	 * be in a slot of a TupleTable.  In addition, ExecQual must have an
-	 * ExprContext referring to that slot.	Here, we initialize dummy
-	 * TupleTable and ExprContext objects for this purpose. --Nels, Feb 92
-	 *
-	 * We construct the ExprContext anyway since we need a per-tuple
-	 * temporary memory context for function evaluation -- tgl July 00
+	 * We expect to be called exactly once for any index relation. If
+	 * that's not the case, big trouble's what we have.
 	 */
-#ifndef OMIT_PARTIAL_INDEX
-	if (pred != NULL || oldPred != NULL)
-	{
-		tupleTable = ExecCreateTupleTable(1);
-		slot = ExecAllocTableSlot(tupleTable);
-		ExecSetSlotDescriptor(slot, htupdesc, false);
-
-		/*
-		 * we never want to use sort/build if we are extending an existing
-		 * partial index -- it works by inserting the newly-qualifying
-		 * tuples into the existing index. (sort/build would overwrite the
-		 * existing index with one consisting of the newly-qualifying
-		 * tuples.)
-		 */
-		usefast = false;
-	}
-	else
-	{
-		tupleTable = NULL;
-		slot = NULL;
-	}
-	econtext = MakeExprContext(slot, TransactionCommandContext);
-#else
-	econtext = MakeExprContext(NULL, TransactionCommandContext);
-#endif	 /* OMIT_PARTIAL_INDEX */
+	if (RelationGetNumberOfBlocks(index) != 0)
+		elog(ERROR, "%s already contains data",
+			 RelationGetRelationName(index));
 
-	/* build the index */
-	nhtups = nitups = 0.0;
+	/* initialize the btree index metadata page */
+	_bt_metapinit(index);
 
-	if (usefast)
+	if (buildstate.usefast)
 	{
-		spool = _bt_spoolinit(index, indexInfo->ii_Unique);
-
+		buildstate.spool = _bt_spoolinit(index, indexInfo->ii_Unique);
 		/*
-		 * Different from spool,the uniqueness isn't checked for spool2.
+		 * Different from spool, the uniqueness isn't checked for spool2.
 		 */
 		if (indexInfo->ii_Unique)
-			spool2 = _bt_spoolinit(index, false);
+			buildstate.spool2 = _bt_spoolinit(index, false);
 	}
 
-	/* start a heap scan */
-	dead_count = 0;
-	snapshot = (IsBootstrapProcessingMode() ? SnapshotNow : SnapshotAny);
-	hscan = heap_beginscan(heap, 0, snapshot, 0, (ScanKey) NULL);
-	XmaxRecent = 0;
-	if (snapshot == SnapshotAny)
-		GetXmaxRecent(&XmaxRecent);
-
-	while (HeapTupleIsValid(htup = heap_getnext(hscan, 0)))
-	{
-		if (snapshot == SnapshotAny)
-		{
-			tupleIsAlive = HeapTupleSatisfiesNow(htup->t_data);
-			if (!tupleIsAlive)
-			{
-				if ((htup->t_data->t_infomask & HEAP_XMIN_INVALID) != 0)
-					continue;
-				if (htup->t_data->t_infomask & HEAP_XMAX_COMMITTED &&
-					htup->t_data->t_xmax < XmaxRecent)
-					continue;
-			}
-		}
-		else
-			tupleIsAlive = true;
-
-		MemoryContextReset(econtext->ecxt_per_tuple_memory);
-
-		nhtups += 1.0;
-
-#ifndef OMIT_PARTIAL_INDEX
-
-		/*
-		 * If oldPred != NULL, this is an EXTEND INDEX command, so skip
-		 * this tuple if it was already in the existing partial index
-		 */
-		if (oldPred != NULL)
-		{
-			slot->val = htup;
-			if (ExecQual((List *) oldPred, econtext, false))
-			{
-				nitups += 1.0;
-				continue;
-			}
-		}
-
-		/*
-		 * Skip this tuple if it doesn't satisfy the partial-index
-		 * predicate
-		 */
-		if (pred != NULL)
-		{
-			slot->val = htup;
-			if (!ExecQual((List *) pred, econtext, false))
-				continue;
-		}
-#endif	 /* OMIT_PARTIAL_INDEX */
-
-		nitups += 1.0;
-
-		/*
-		 * For the current heap tuple, extract all the attributes we use
-		 * in this index, and note which are null.
-		 */
-		FormIndexDatum(indexInfo,
-					   htup,
-					   htupdesc,
-					   econtext->ecxt_per_tuple_memory,
-					   attdata,
-					   nulls);
-
-		/* form an index tuple and point it at the heap tuple */
-		itup = index_formtuple(itupdesc, attdata, nulls);
-
-		/*
-		 * If the single index key is null, we don't insert it into the
-		 * index.  Btrees support scans on <, <=, =, >=, and >. Relational
-		 * algebra says that A op B (where op is one of the operators
-		 * above) returns null if either A or B is null.  This means that
-		 * no qualification used in an index scan could ever return true
-		 * on a null attribute.  It also means that indices can't be used
-		 * by ISNULL or NOTNULL scans, but that's an artifact of the
-		 * strategy map architecture chosen in 1986, not of the way nulls
-		 * are handled here.
-		 */
-
-		/*
-		 * New comments: NULLs handling. While we can't do NULL
-		 * comparison, we can follow simple rule for ordering items on
-		 * btree pages - NULLs greater NOT_NULLs and NULL = NULL is TRUE.
-		 * Sure, it's just rule for placing/finding items and no more -
-		 * keytest'll return FALSE for a = 5 for items having 'a' isNULL.
-		 * Look at _bt_compare for how it works. - vadim 03/23/97
-		 *
-		 * if (itup->t_info & INDEX_NULL_MASK) { pfree(itup); continue; }
-		 */
-
-		itup->t_tid = htup->t_self;
-		btitem = _bt_formitem(itup);
-
-		/*
-		 * if we are doing bottom-up btree build, we insert the index into
-		 * a spool file for subsequent processing.	otherwise, we insert
-		 * into the btree.
-		 */
-		if (usefast)
-		{
-			if (tupleIsAlive || !spool2)
-				_bt_spool(btitem, spool);
-			else
-/* dead tuples are put into spool2 */
-			{
-				dead_count++;
-				_bt_spool(btitem, spool2);
-			}
-		}
-		else
-			res = _bt_doinsert(index, btitem, indexInfo->ii_Unique, heap);
-
-		pfree(btitem);
-		pfree(itup);
-		if (res)
-			pfree(res);
-	}
+	/* do the heap scan */
+	reltuples = IndexBuildHeapScan(heap, index, indexInfo,
+								   btbuildCallback, (void *) &buildstate);
 
 	/* okay, all heap tuples are indexed */
-	heap_endscan(hscan);
-	if (spool2 && !dead_count)	/* spool2 was found to be unnecessary */
+	if (buildstate.spool2 && !buildstate.haveDead)
 	{
-		_bt_spooldestroy(spool2);
-		spool2 = NULL;
+		/* spool2 turns out to be unnecessary */
+		_bt_spooldestroy(buildstate.spool2);
+		buildstate.spool2 = NULL;
 	}
 
-#ifndef OMIT_PARTIAL_INDEX
-	if (pred != NULL || oldPred != NULL)
-		ExecDropTupleTable(tupleTable, true);
-#endif	 /* OMIT_PARTIAL_INDEX */
-	FreeExprContext(econtext);
-
 	/*
 	 * if we are doing bottom-up btree build, finish the build by (1)
 	 * completing the sort of the spool file, (2) inserting the sorted
 	 * tuples into btree pages and (3) building the upper levels.
 	 */
-	if (usefast)
+	if (buildstate.usefast)
 	{
-		_bt_leafbuild(spool, spool2);
-		_bt_spooldestroy(spool);
-		if (spool2)
-			_bt_spooldestroy(spool2);
+		_bt_leafbuild(buildstate.spool, buildstate.spool2);
+		_bt_spooldestroy(buildstate.spool);
+		if (buildstate.spool2)
+			_bt_spooldestroy(buildstate.spool2);
 	}
 
 #ifdef BTREE_BUILD_STATS
@@ -325,6 +175,9 @@ btbuild(PG_FUNCTION_ARGS)
 	}
 #endif	 /* BTREE_BUILD_STATS */
 
+	/* all done */
+	BuildingBtree = false;
+
 	/*
 	 * Since we just counted the tuples in the heap, we update its stats
 	 * in pg_class to guarantee that the planner takes advantage of the
@@ -343,20 +196,63 @@ btbuild(PG_FUNCTION_ARGS)
 
 		heap_close(heap, NoLock);
 		index_close(index);
-		UpdateStats(hrelid, nhtups);
-		UpdateStats(irelid, nitups);
-		if (oldPred != NULL)
+		UpdateStats(hrelid, reltuples);
+		UpdateStats(irelid, buildstate.indtuples);
+	}
+
+	PG_RETURN_VOID();
+}
+
+/*
+ * Per-tuple callback from IndexBuildHeapScan
+ */
+static void
+btbuildCallback(Relation index,
+				HeapTuple htup,
+				Datum *attdata,
+				char *nulls,
+				bool tupleIsAlive,
+				void *state)
+{
+	BTBuildState   *buildstate = (BTBuildState *) state;
+	IndexTuple	itup;
+	BTItem		btitem;
+	InsertIndexResult res;
+
+	/* form an index tuple and point it at the heap tuple */
+	itup = index_formtuple(RelationGetDescr(index), attdata, nulls);
+	itup->t_tid = htup->t_self;
+
+	btitem = _bt_formitem(itup);
+
+	/*
+	 * if we are doing bottom-up btree build, we insert the index into
+	 * a spool file for subsequent processing.	otherwise, we insert
+	 * into the btree.
+	 */
+	if (buildstate->usefast)
+	{
+		if (tupleIsAlive || buildstate->spool2 == NULL)
+			_bt_spool(btitem, buildstate->spool);
+		else
 		{
-			if (nitups == nhtups)
-				pred = NULL;
-			UpdateIndexPredicate(irelid, oldPred, pred);
+			/* dead tuples are put into spool2 */
+			buildstate->haveDead = true;
+			_bt_spool(btitem, buildstate->spool2);
 		}
 	}
+	else
+	{
+		res = _bt_doinsert(index, btitem,
+						   buildstate->isUnique, buildstate->heapRel);
+		if (res)
+			pfree(res);
+	}
 
-	/* all done */
-	BuildingBtree = false;
+	buildstate->indtuples += 1;
 
-	PG_RETURN_VOID();
+	pfree(btitem);
+	pfree(itup);
 }
 
 /*
@@ -423,8 +319,10 @@ btgettuple(PG_FUNCTION_ARGS)
 
 	/*
 	 * Save heap TID to use it in _bt_restscan.  Then release the read
-	 * lock on the buffer so that we aren't blocking other backends. NOTE:
-	 * we do keep the pin on the buffer!
+	 * lock on the buffer so that we aren't blocking other backends.
+	 *
+	 * NOTE: we do keep the pin on the buffer!  This is essential to ensure
+	 * that someone else doesn't delete the index entry we are stopped on.
 	 */
 	if (res)
 	{
@@ -451,9 +349,6 @@ btbeginscan(PG_FUNCTION_ARGS)
 	/* get the scan */
 	scan = RelationGetIndexScan(rel, fromEnd, keysz, scankey);
 
-	/* register scan in case we change pages it's using */
-	_bt_regscan(scan);
-
 	PG_RETURN_POINTER(scan);
 }
 
@@ -571,8 +466,6 @@ btendscan(PG_FUNCTION_ARGS)
 		pfree(so->keyData);
 	pfree(so);
 
-	_bt_dropscan(scan);
-
 	PG_RETURN_VOID();
 }
 
@@ -640,20 +533,127 @@ btrestrpos(PG_FUNCTION_ARGS)
 	PG_RETURN_VOID();
 }
 
-/* stubs */
+/*
+ * Bulk deletion of all index entries pointing to a set of heap tuples.
+ * The set of target tuples is specified via a callback routine that tells
+ * whether any given heap tuple (identified by ItemPointer) is being deleted.
+ *
+ * Result: a palloc'd struct containing statistical info for VACUUM displays.
+ */
 Datum
-btdelete(PG_FUNCTION_ARGS)
+btbulkdelete(PG_FUNCTION_ARGS)
 {
 	Relation	rel = (Relation) PG_GETARG_POINTER(0);
-	ItemPointer tid = (ItemPointer) PG_GETARG_POINTER(1);
+	IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(1);
+	void	   *callback_state = (void *) PG_GETARG_POINTER(2);
+	IndexBulkDeleteResult *result;
+	BlockNumber	num_pages;
+	double		tuples_removed;
+	double		num_index_tuples;
+	RetrieveIndexResult res;
+	IndexScanDesc scan;
+	BTScanOpaque so;
+	ItemPointer current;
+
+	tuples_removed = 0;
+	num_index_tuples = 0;
+
+	/*
+	 * We use a standard IndexScanDesc scan object, but to speed up the loop,
+	 * we skip most of the wrapper layers of index_getnext and instead call
+	 * _bt_step directly.  This implies holding buffer lock on a target page
+	 * throughout the loop over the page's tuples.  Initially, we have a read
+	 * lock acquired by _bt_step when we stepped onto the page.  If we find
+	 * a tuple we need to delete, we trade in the read lock for an exclusive
+	 * write lock; after that, we hold the write lock until we step off the
+	 * page (fortunately, _bt_relbuf doesn't care which kind of lock it's
+	 * releasing).  This should minimize the amount of work needed per page.
+	 */
+	scan = index_beginscan(rel, false, 0, (ScanKey) NULL);
+	so = (BTScanOpaque) scan->opaque;
+	current = &(scan->currentItemData);
 
-	/* adjust any active scans that will be affected by this deletion */
-	_bt_adjscans(rel, tid);
+	/* Use _bt_first to get started, then _bt_step to remaining tuples */
+	res = _bt_first(scan, ForwardScanDirection);
 
-	/* delete the data from the page */
-	_bt_pagedel(rel, tid);
+	if (res != NULL)
+	{
+		Buffer		buf;
+		BlockNumber	lockedBlock = InvalidBlockNumber;
 
-	PG_RETURN_VOID();
+		pfree(res);
+		/* we have the buffer pinned and locked */
+		buf = so->btso_curbuf;
+		Assert(BufferIsValid(buf));
+
+		do
+		{
+			Page		page;
+			BlockNumber	blkno;
+			OffsetNumber offnum;
+			BTItem		btitem;
+			IndexTuple	itup;
+			ItemPointer	htup;
+
+			/* current is the next index tuple */
+			blkno = ItemPointerGetBlockNumber(current);
+			offnum = ItemPointerGetOffsetNumber(current);
+			page = BufferGetPage(buf);
+			btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum));
+			itup = &btitem->bti_itup;
+			htup = &(itup->t_tid);
+
+			if (callback(htup, callback_state))
+			{
+				/*
+				 * If this is first deletion on this page, trade in read
+				 * lock for a really-exclusive write lock.  Then, step back
+				 * one and re-examine the item, because someone else might
+				 * have inserted an item while we weren't holding the lock!
+				 */
+				if (blkno != lockedBlock)
+				{
+					LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+					LockBufferForCleanup(buf);
+					lockedBlock = blkno;
+				}
+				else
+				{
+					/* Delete the item from the page */
+					_bt_itemdel(rel, buf, current);
+
+					/* Mark buffer dirty, but keep the lock and pin */
+					WriteNoReleaseBuffer(buf);
+
+					tuples_removed += 1;
+				}
+
+				/*
+				 * We need to back up the scan one item so that the next
+				 * cycle will re-examine the same offnum on this page.
+				 *
+				 * For now, just hack the current-item index.  Will need
+				 * to be smarter when deletion includes removal of empty
+				 * index pages.
+				 */
+				current->ip_posid--;
+			}
+			else
+				num_index_tuples += 1;
+		} while (_bt_step(scan, &buf, ForwardScanDirection));
+	}
+
+	index_endscan(scan);
+
+	/* return statistics */
+	num_pages = RelationGetNumberOfBlocks(rel);
+
+	result = (IndexBulkDeleteResult *) palloc(sizeof(IndexBulkDeleteResult));
+	result->num_pages = num_pages;
+	result->tuples_removed = tuples_removed;
+	result->num_index_tuples = num_index_tuples;
+
+	PG_RETURN_POINTER(result);
 }
 
 /*
@@ -676,7 +676,7 @@ _bt_restscan(IndexScanDesc scan)
 
 	/*
 	 * Get back the read lock we were holding on the buffer. (We still
-	 * have a reference-count pin on it, though.)
+	 * have a reference-count pin on it, so need not get that.)
 	 */
 	LockBuffer(buf, BT_READ);
 
@@ -729,7 +729,7 @@ _bt_restscan(IndexScanDesc scan)
 				 "\n\tRecreate index %s.", RelationGetRelationName(rel));
 
 		blkno = opaque->btpo_next;
-		_bt_relbuf(rel, buf, BT_READ);
+		_bt_relbuf(rel, buf);
 		buf = _bt_getbuf(rel, blkno, BT_READ);
 		page = BufferGetPage(buf);
 		maxoff = PageGetMaxOffsetNumber(page);
diff --git a/src/backend/access/nbtree/nbtscan.c b/src/backend/access/nbtree/nbtscan.c
deleted file mode 100644
index e07914b3440..00000000000
--- a/src/backend/access/nbtree/nbtscan.c
+++ /dev/null
@@ -1,224 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * btscan.c
- *	  manage scans on btrees.
- *
- * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- *
- * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/Attic/nbtscan.c,v 1.33 2001/01/24 19:42:48 momjian Exp $
- *
- *
- * NOTES
- *	 Because we can be doing an index scan on a relation while we update
- *	 it, we need to avoid missing data that moves around in the index.
- *	 Insertions and page splits are no problem because _bt_restscan()
- *	 can figure out where the current item moved to, but if a deletion
- *	 happens at or before the current scan position, we'd better do
- *	 something to stay in sync.
- *
- *	 The routines in this file handle the problem for deletions issued
- *	 by the current backend.  Currently, that's all we need, since
- *	 deletions are only done by VACUUM and it gets an exclusive lock.
- *
- *	 The scheme is to manage a list of active scans in the current backend.
- *	 Whenever we remove a record from an index, we check the list of active
- *	 scans to see if any has been affected.  A scan is affected only if it
- *	 is on the same relation, and the same page, as the update.
- *
- *-------------------------------------------------------------------------
- */
-
-#include "postgres.h"
-
-#include "access/nbtree.h"
-
-typedef struct BTScanListData
-{
-	IndexScanDesc btsl_scan;
-	struct BTScanListData *btsl_next;
-} BTScanListData;
-
-typedef BTScanListData *BTScanList;
-
-static BTScanList BTScans = (BTScanList) NULL;
-
-static void _bt_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno);
-
-/*
- * AtEOXact_nbtree() --- clean up nbtree subsystem at xact abort or commit.
- *
- * This is here because it needs to touch this module's static var BTScans.
- */
-void
-AtEOXact_nbtree(void)
-{
-
-	/*
-	 * Note: these actions should only be necessary during xact abort; but
-	 * they can't hurt during a commit.
-	 */
-
-	/*
-	 * Reset the active-scans list to empty. We do not need to free the
-	 * list elements, because they're all palloc()'d, so they'll go away
-	 * at end of transaction anyway.
-	 */
-	BTScans = NULL;
-
-	/* If we were building a btree, we ain't anymore. */
-	BuildingBtree = false;
-}
-
-/*
- *	_bt_regscan() -- register a new scan.
- */
-void
-_bt_regscan(IndexScanDesc scan)
-{
-	BTScanList	new_el;
-
-	new_el = (BTScanList) palloc(sizeof(BTScanListData));
-	new_el->btsl_scan = scan;
-	new_el->btsl_next = BTScans;
-	BTScans = new_el;
-}
-
-/*
- *	_bt_dropscan() -- drop a scan from the scan list
- */
-void
-_bt_dropscan(IndexScanDesc scan)
-{
-	BTScanList	chk,
-				last;
-
-	last = (BTScanList) NULL;
-	for (chk = BTScans;
-		 chk != (BTScanList) NULL && chk->btsl_scan != scan;
-		 chk = chk->btsl_next)
-		last = chk;
-
-	if (chk == (BTScanList) NULL)
-		elog(ERROR, "btree scan list trashed; can't find 0x%p", (void *) scan);
-
-	if (last == (BTScanList) NULL)
-		BTScans = chk->btsl_next;
-	else
-		last->btsl_next = chk->btsl_next;
-
-	pfree(chk);
-}
-
-/*
- *	_bt_adjscans() -- adjust all scans in the scan list to compensate
- *					  for a given deletion
- */
-void
-_bt_adjscans(Relation rel, ItemPointer tid)
-{
-	BTScanList	l;
-	Oid			relid;
-
-	relid = RelationGetRelid(rel);
-	for (l = BTScans; l != (BTScanList) NULL; l = l->btsl_next)
-	{
-		if (relid == RelationGetRelid(l->btsl_scan->relation))
-			_bt_scandel(l->btsl_scan,
-						ItemPointerGetBlockNumber(tid),
-						ItemPointerGetOffsetNumber(tid));
-	}
-}
-
-/*
- *	_bt_scandel() -- adjust a single scan on deletion
- *
- */
-static void
-_bt_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno)
-{
-	ItemPointer current;
-	Buffer		buf;
-	BTScanOpaque so;
-	OffsetNumber start;
-	Page		page;
-	BTPageOpaque opaque;
-
-	so = (BTScanOpaque) scan->opaque;
-	buf = so->btso_curbuf;
-
-	current = &(scan->currentItemData);
-	if (ItemPointerIsValid(current)
-		&& ItemPointerGetBlockNumber(current) == blkno
-		&& ItemPointerGetOffsetNumber(current) >= offno)
-	{
-		page = BufferGetPage(buf);
-		opaque = (BTPageOpaque) PageGetSpecialPointer(page);
-		start = P_FIRSTDATAKEY(opaque);
-		if (ItemPointerGetOffsetNumber(current) == start)
-			ItemPointerSetInvalid(&(so->curHeapIptr));
-		else
-		{
-
-			/*
-			 * We have to lock buffer before _bt_step and unlock it after
-			 * that.
-			 */
-			LockBuffer(buf, BT_READ);
-			_bt_step(scan, &buf, BackwardScanDirection);
-			if (ItemPointerIsValid(current))
-			{
-				Page		pg = BufferGetPage(buf);
-				BTItem		btitem = (BTItem) PageGetItem(pg,
-				 PageGetItemId(pg, ItemPointerGetOffsetNumber(current)));
-
-				so->curHeapIptr = btitem->bti_itup.t_tid;
-				LockBuffer(buf, BUFFER_LOCK_UNLOCK);
-			}
-		}
-	}
-
-	current = &(scan->currentMarkData);
-	if (ItemPointerIsValid(current)
-		&& ItemPointerGetBlockNumber(current) == blkno
-		&& ItemPointerGetOffsetNumber(current) >= offno)
-	{
-		page = BufferGetPage(so->btso_mrkbuf);
-		opaque = (BTPageOpaque) PageGetSpecialPointer(page);
-		start = P_FIRSTDATAKEY(opaque);
-
-		if (ItemPointerGetOffsetNumber(current) == start)
-			ItemPointerSetInvalid(&(so->mrkHeapIptr));
-		else
-		{
-			ItemPointerData tmp;
-
-			tmp = *current;
-			*current = scan->currentItemData;
-			scan->currentItemData = tmp;
-			so->btso_curbuf = so->btso_mrkbuf;
-			so->btso_mrkbuf = buf;
-			buf = so->btso_curbuf;
-			LockBuffer(buf, BT_READ);	/* as above */
-
-			_bt_step(scan, &buf, BackwardScanDirection);
-
-			so->btso_curbuf = so->btso_mrkbuf;
-			so->btso_mrkbuf = buf;
-			tmp = *current;
-			*current = scan->currentItemData;
-			scan->currentItemData = tmp;
-			if (ItemPointerIsValid(current))
-			{
-				Page		pg = BufferGetPage(buf);
-				BTItem		btitem = (BTItem) PageGetItem(pg,
-				 PageGetItemId(pg, ItemPointerGetOffsetNumber(current)));
-
-				so->mrkHeapIptr = btitem->bti_itup.t_tid;
-				LockBuffer(buf, BUFFER_LOCK_UNLOCK);	/* as above */
-			}
-		}
-	}
-}
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index 59bf5358e4f..295387ed517 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.66 2001/03/23 04:49:51 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.67 2001/07/15 22:48:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -94,7 +94,7 @@ _bt_search(Relation rel, int keysz, ScanKey scankey,
 		new_stack->bts_parent = stack_in;
 
 		/* drop the read lock on the parent page, acquire one on the child */
-		_bt_relbuf(rel, *bufP, BT_READ);
+		_bt_relbuf(rel, *bufP);
 		*bufP = _bt_getbuf(rel, blkno, BT_READ);
 
 		/*
@@ -155,7 +155,7 @@ _bt_moveright(Relation rel,
 		/* step right one page */
 		BlockNumber rblkno = opaque->btpo_next;
 
-		_bt_relbuf(rel, buf, access);
+		_bt_relbuf(rel, buf);
 		buf = _bt_getbuf(rel, rblkno, access);
 		page = BufferGetPage(buf);
 		opaque = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -406,7 +406,7 @@ _bt_next(IndexScanDesc scan, ScanDirection dir)
 	/* No more items, so close down the current-item info */
 	ItemPointerSetInvalid(current);
 	so->btso_curbuf = InvalidBuffer;
-	_bt_relbuf(rel, buf, BT_READ);
+	_bt_relbuf(rel, buf);
 
 	return (RetrieveIndexResult) NULL;
 }
@@ -760,7 +760,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 nomatches:
 		ItemPointerSetInvalid(current);
 		so->btso_curbuf = InvalidBuffer;
-		_bt_relbuf(rel, buf, BT_READ);
+		_bt_relbuf(rel, buf);
 		res = (RetrieveIndexResult) NULL;
 	}
 
@@ -815,14 +815,14 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
 				/* if we're at end of scan, release the buffer and return */
 				if (P_RIGHTMOST(opaque))
 				{
-					_bt_relbuf(rel, *bufP, BT_READ);
+					_bt_relbuf(rel, *bufP);
 					ItemPointerSetInvalid(current);
 					*bufP = so->btso_curbuf = InvalidBuffer;
 					return false;
 				}
 				/* step right one page */
 				blkno = opaque->btpo_next;
-				_bt_relbuf(rel, *bufP, BT_READ);
+				_bt_relbuf(rel, *bufP);
 				*bufP = _bt_getbuf(rel, blkno, BT_READ);
 				page = BufferGetPage(*bufP);
 				opaque = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -846,7 +846,7 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
 				/* if we're at end of scan, release the buffer and return */
 				if (P_LEFTMOST(opaque))
 				{
-					_bt_relbuf(rel, *bufP, BT_READ);
+					_bt_relbuf(rel, *bufP);
 					ItemPointerSetInvalid(current);
 					*bufP = so->btso_curbuf = InvalidBuffer;
 					return false;
@@ -854,7 +854,7 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
 				/* step left */
 				obknum = BufferGetBlockNumber(*bufP);
 				blkno = opaque->btpo_prev;
-				_bt_relbuf(rel, *bufP, BT_READ);
+				_bt_relbuf(rel, *bufP);
 				*bufP = _bt_getbuf(rel, blkno, BT_READ);
 				page = BufferGetPage(*bufP);
 				opaque = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -868,7 +868,7 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
 				while (opaque->btpo_next != obknum)
 				{
 					blkno = opaque->btpo_next;
-					_bt_relbuf(rel, *bufP, BT_READ);
+					_bt_relbuf(rel, *bufP);
 					*bufP = _bt_getbuf(rel, blkno, BT_READ);
 					page = BufferGetPage(*bufP);
 					opaque = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -952,7 +952,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
 		itup = &(btitem->bti_itup);
 		blkno = ItemPointerGetBlockNumber(&(itup->t_tid));
 
-		_bt_relbuf(rel, buf, BT_READ);
+		_bt_relbuf(rel, buf);
 		buf = _bt_getbuf(rel, blkno, BT_READ);
 
 		page = BufferGetPage(buf);
@@ -968,7 +968,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
 			do
 			{
 				blkno = opaque->btpo_next;
-				_bt_relbuf(rel, buf, BT_READ);
+				_bt_relbuf(rel, buf);
 				buf = _bt_getbuf(rel, blkno, BT_READ);
 				page = BufferGetPage(buf);
 				opaque = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -1035,7 +1035,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
 		/* no tuples in the index match this scan key */
 		ItemPointerSetInvalid(current);
 		so->btso_curbuf = InvalidBuffer;
-		_bt_relbuf(rel, buf, BT_READ);
+		_bt_relbuf(rel, buf);
 		res = (RetrieveIndexResult) NULL;
 	}
 
diff --git a/src/backend/access/rtree/rtree.c b/src/backend/access/rtree/rtree.c
index a8c6a13ea3c..21831ef5d61 100644
--- a/src/backend/access/rtree/rtree.c
+++ b/src/backend/access/rtree/rtree.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.62 2001/05/07 00:43:16 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.63 2001/07/15 22:48:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -62,7 +62,20 @@ typedef struct RTSTATE
 	FmgrInfo	interFn;		/* intersection function */
 } RTSTATE;
 
+/* Working state for rtbuild and its callback */
+typedef struct
+{
+	RTSTATE		rtState;
+	double		indtuples;
+} RTBuildState;
+
 /* non-export function prototypes */
+static void rtbuildCallback(Relation index,
+							HeapTuple htup,
+							Datum *attdata,
+							char *nulls,
+							bool tupleIsAlive,
+							void *state);
 static InsertIndexResult rtdoinsert(Relation r, IndexTuple itup,
 		   RTSTATE *rtstate);
 static void rttighten(Relation r, RTSTACK *stk, Datum datum, int att_size,
@@ -81,165 +94,44 @@ static int	nospace(Page p, IndexTuple it);
 static void initRtstate(RTSTATE *rtstate, Relation index);
 
 
+/*
+ * routine to build an index.  Basically calls insert over and over
+ */
 Datum
 rtbuild(PG_FUNCTION_ARGS)
 {
 	Relation	heap = (Relation) PG_GETARG_POINTER(0);
 	Relation	index = (Relation) PG_GETARG_POINTER(1);
 	IndexInfo  *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
-	Node	   *oldPred = (Node *) PG_GETARG_POINTER(3);
-
-#ifdef NOT_USED
-	IndexStrategy istrat = (IndexStrategy) PG_GETARG_POINTER(4);
-
-#endif
-	HeapScanDesc hscan;
-	HeapTuple	htup;
-	IndexTuple	itup;
-	TupleDesc	htupdesc,
-				itupdesc;
-	Datum		attdata[INDEX_MAX_KEYS];
-	char		nulls[INDEX_MAX_KEYS];
-	double		nhtups,
-				nitups;
-	Node	   *pred = indexInfo->ii_Predicate;
-
-#ifndef OMIT_PARTIAL_INDEX
-	TupleTable	tupleTable;
-	TupleTableSlot *slot;
+	double		reltuples;
+	RTBuildState buildstate;
+	Buffer		buffer;
 
-#endif
-	ExprContext *econtext;
-	InsertIndexResult res = NULL;
-	Buffer		buffer = InvalidBuffer;
-	RTSTATE		rtState;
+	/* no locking is needed */
 
-	initRtstate(&rtState, index);
+	initRtstate(&buildstate.rtState, index);
 
 	/*
 	 * We expect to be called exactly once for any index relation. If
 	 * that's not the case, big trouble's what we have.
 	 */
-	if (oldPred == NULL && RelationGetNumberOfBlocks(index) != 0)
-		elog(ERROR, "%s already contains data", RelationGetRelationName(index));
-
-	/* initialize the root page (if this is a new index) */
-	if (oldPred == NULL)
-	{
-		buffer = ReadBuffer(index, P_NEW);
-		RTInitBuffer(buffer, F_LEAF);
-		WriteBuffer(buffer);
-	}
-
-	/* get tuple descriptors for heap and index relations */
-	htupdesc = RelationGetDescr(heap);
-	itupdesc = RelationGetDescr(index);
-
-	/*
-	 * If this is a predicate (partial) index, we will need to evaluate
-	 * the predicate using ExecQual, which requires the current tuple to
-	 * be in a slot of a TupleTable.  In addition, ExecQual must have an
-	 * ExprContext referring to that slot.	Here, we initialize dummy
-	 * TupleTable and ExprContext objects for this purpose. --Nels, Feb 92
-	 *
-	 * We construct the ExprContext anyway since we need a per-tuple
-	 * temporary memory context for function evaluation -- tgl July 00
-	 */
-#ifndef OMIT_PARTIAL_INDEX
-	if (pred != NULL || oldPred != NULL)
-	{
-		tupleTable = ExecCreateTupleTable(1);
-		slot = ExecAllocTableSlot(tupleTable);
-		ExecSetSlotDescriptor(slot, htupdesc, false);
-	}
-	else
-	{
-		tupleTable = NULL;
-		slot = NULL;
-	}
-	econtext = MakeExprContext(slot, TransactionCommandContext);
-#else
-	econtext = MakeExprContext(NULL, TransactionCommandContext);
-#endif	 /* OMIT_PARTIAL_INDEX */
-
-	/* count the tuples as we insert them */
-	nhtups = nitups = 0.0;
-
-	/* start a heap scan */
-	hscan = heap_beginscan(heap, 0, SnapshotNow, 0, (ScanKey) NULL);
-
-	while (HeapTupleIsValid(htup = heap_getnext(hscan, 0)))
-	{
-		MemoryContextReset(econtext->ecxt_per_tuple_memory);
+	if (RelationGetNumberOfBlocks(index) != 0)
+		elog(ERROR, "%s already contains data",
+			 RelationGetRelationName(index));
 
-		nhtups += 1.0;
-
-#ifndef OMIT_PARTIAL_INDEX
-
-		/*
-		 * If oldPred != NULL, this is an EXTEND INDEX command, so skip
-		 * this tuple if it was already in the existing partial index
-		 */
-		if (oldPred != NULL)
-		{
-			slot->val = htup;
-			if (ExecQual((List *) oldPred, econtext, false))
-			{
-				nitups += 1.0;
-				continue;
-			}
-		}
-
-		/*
-		 * Skip this tuple if it doesn't satisfy the partial-index
-		 * predicate
-		 */
-		if (pred != NULL)
-		{
-			slot->val = htup;
-			if (!ExecQual((List *) pred, econtext, false))
-				continue;
-		}
-#endif	 /* OMIT_PARTIAL_INDEX */
-
-		nitups += 1.0;
-
-		/*
-		 * For the current heap tuple, extract all the attributes we use
-		 * in this index, and note which are null.
-		 */
-		FormIndexDatum(indexInfo,
-					   htup,
-					   htupdesc,
-					   econtext->ecxt_per_tuple_memory,
-					   attdata,
-					   nulls);
-
-		/* form an index tuple and point it at the heap tuple */
-		itup = index_formtuple(itupdesc, attdata, nulls);
-		itup->t_tid = htup->t_self;
+	/* initialize the root page */
+	buffer = ReadBuffer(index, P_NEW);
+	RTInitBuffer(buffer, F_LEAF);
+	WriteBuffer(buffer);
 
-		/*
-		 * Since we already have the index relation locked, we call
-		 * rtdoinsert directly.  Normal access method calls dispatch
-		 * through rtinsert, which locks the relation for write.  This is
-		 * the right thing to do if you're inserting single tups, but not
-		 * when you're initializing the whole index at once.
-		 */
+	/* build the index */
+	buildstate.indtuples = 0;
 
-		res = rtdoinsert(index, itup, &rtState);
-		pfree(itup);
-		pfree(res);
-	}
+	/* do the heap scan */
+	reltuples = IndexBuildHeapScan(heap, index, indexInfo,
+								   rtbuildCallback, (void *) &buildstate);
 
 	/* okay, all heap tuples are indexed */
-	heap_endscan(hscan);
-
-#ifndef OMIT_PARTIAL_INDEX
-	if (pred != NULL || oldPred != NULL)
-		ExecDropTupleTable(tupleTable, true);
-#endif	 /* OMIT_PARTIAL_INDEX */
-	FreeExprContext(econtext);
 
 	/*
 	 * Since we just counted the tuples in the heap, we update its stats
@@ -259,20 +151,57 @@ rtbuild(PG_FUNCTION_ARGS)
 
 		heap_close(heap, NoLock);
 		index_close(index);
-		UpdateStats(hrelid, nhtups);
-		UpdateStats(irelid, nitups);
-		if (oldPred != NULL)
-		{
-			if (nitups == nhtups)
-				pred = NULL;
-			UpdateIndexPredicate(irelid, oldPred, pred);
-		}
+		UpdateStats(hrelid, reltuples);
+		UpdateStats(irelid, buildstate.indtuples);
 	}
 
 	PG_RETURN_VOID();
 }
 
 /*
+ * Per-tuple callback from IndexBuildHeapScan
+ */
+static void
+rtbuildCallback(Relation index,
+				HeapTuple htup,
+				Datum *attdata,
+				char *nulls,
+				bool tupleIsAlive,
+				void *state)
+{
+	RTBuildState   *buildstate = (RTBuildState *) state;
+	IndexTuple	itup;
+	InsertIndexResult res;
+
+	/* form an index tuple and point it at the heap tuple */
+	itup = index_formtuple(RelationGetDescr(index), attdata, nulls);
+	itup->t_tid = htup->t_self;
+
+	/* rtree indexes don't index nulls, see notes in rtinsert */
+	if (IndexTupleHasNulls(itup))
+	{
+		pfree(itup);
+		return;
+	}
+
+	/*
+	 * Since we already have the index relation locked, we call
+	 * rtdoinsert directly.  Normal access method calls dispatch
+	 * through rtinsert, which locks the relation for write.  This is
+	 * the right thing to do if you're inserting single tups, but not
+	 * when you're initializing the whole index at once.
+	 */
+	res = rtdoinsert(index, itup, &buildstate->rtState);
+
+	if (res)
+		pfree(res);
+
+	buildstate->indtuples += 1;
+
+	pfree(itup);
+}
+
+/*
  *	rtinsert -- wrapper for rtree tuple insertion.
  *
  *	  This is the public interface routine for tuple insertion in rtrees.
@@ -285,10 +214,8 @@ rtinsert(PG_FUNCTION_ARGS)
 	Datum	   *datum = (Datum *) PG_GETARG_POINTER(1);
 	char	   *nulls = (char *) PG_GETARG_POINTER(2);
 	ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
-
 #ifdef NOT_USED
 	Relation	heapRel = (Relation) PG_GETARG_POINTER(4);
-
 #endif
 	InsertIndexResult res;
 	IndexTuple	itup;
@@ -297,12 +224,24 @@ rtinsert(PG_FUNCTION_ARGS)
 	/* generate an index tuple */
 	itup = index_formtuple(RelationGetDescr(r), datum, nulls);
 	itup->t_tid = *ht_ctid;
+
+	/*
+	 * Currently, rtrees do not support indexing NULLs; considerable
+	 * infrastructure work would have to be done to do anything reasonable
+	 * with a NULL.
+	 */
+	if (IndexTupleHasNulls(itup))
+	{
+		pfree(itup);
+		PG_RETURN_POINTER((InsertIndexResult) NULL);
+	}
+
 	initRtstate(&rtState, r);
 
 	/*
-	 * Notes in ExecUtils:ExecOpenIndices()
-	 *
-	 * RelationSetLockForWrite(r);
+	 * Since rtree is not marked "amconcurrent" in pg_am, caller should
+	 * have acquired exclusive lock on index relation.  We need no locking
+	 * here.
 	 */
 
 	res = rtdoinsert(r, itup, &rtState);
@@ -1104,40 +1043,92 @@ freestack(RTSTACK *s)
 	}
 }
 
+/*
+ * Bulk deletion of all index entries pointing to a set of heap tuples.
+ * The set of target tuples is specified via a callback routine that tells
+ * whether any given heap tuple (identified by ItemPointer) is being deleted.
+ *
+ * Result: a palloc'd struct containing statistical info for VACUUM displays.
+ */
 Datum
-rtdelete(PG_FUNCTION_ARGS)
+rtbulkdelete(PG_FUNCTION_ARGS)
 {
-	Relation	r = (Relation) PG_GETARG_POINTER(0);
-	ItemPointer tid = (ItemPointer) PG_GETARG_POINTER(1);
-	BlockNumber blkno;
-	OffsetNumber offnum;
-	Buffer		buf;
-	Page		page;
+	Relation	rel = (Relation) PG_GETARG_POINTER(0);
+	IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(1);
+	void	   *callback_state = (void *) PG_GETARG_POINTER(2);
+	IndexBulkDeleteResult *result;
+	BlockNumber	num_pages;
+	double		tuples_removed;
+	double		num_index_tuples;
+	RetrieveIndexResult res;
+	IndexScanDesc iscan;
+
+	tuples_removed = 0;
+	num_index_tuples = 0;
 
 	/*
-	 * Notes in ExecUtils:ExecOpenIndices() Also note that only vacuum
-	 * deletes index tuples now...
-	 *
-	 * RelationSetLockForWrite(r);
+	 * Since rtree is not marked "amconcurrent" in pg_am, caller should
+	 * have acquired exclusive lock on index relation.  We need no locking
+	 * here.
 	 */
 
-	blkno = ItemPointerGetBlockNumber(tid);
-	offnum = ItemPointerGetOffsetNumber(tid);
+	/*
+	 * XXX generic implementation --- should be improved!
+	 */
 
-	/* adjust any scans that will be affected by this deletion */
-	rtadjscans(r, RTOP_DEL, blkno, offnum);
+	/* walk through the entire index */
+	iscan = index_beginscan(rel, false, 0, (ScanKey) NULL);
 
-	/* delete the index tuple */
-	buf = ReadBuffer(r, blkno);
-	page = BufferGetPage(buf);
+	while ((res = index_getnext(iscan, ForwardScanDirection))
+		   != (RetrieveIndexResult) NULL)
+	{
+		ItemPointer heapptr = &res->heap_iptr;
 
-	PageIndexTupleDelete(page, offnum);
+		if (callback(heapptr, callback_state))
+		{
+			ItemPointer indexptr = &res->index_iptr;
+			BlockNumber blkno;
+			OffsetNumber offnum;
+			Buffer		buf;
+			Page		page;
 
-	WriteBuffer(buf);
+			blkno = ItemPointerGetBlockNumber(indexptr);
+			offnum = ItemPointerGetOffsetNumber(indexptr);
 
-	PG_RETURN_VOID();
+			/* adjust any scans that will be affected by this deletion */
+			/* (namely, my own scan) */
+			rtadjscans(rel, RTOP_DEL, blkno, offnum);
+
+			/* delete the index tuple */
+			buf = ReadBuffer(rel, blkno);
+			page = BufferGetPage(buf);
+
+			PageIndexTupleDelete(page, offnum);
+
+			WriteBuffer(buf);
+
+			tuples_removed += 1;
+		}
+		else
+			num_index_tuples += 1;
+
+		pfree(res);
+	}
+
+	index_endscan(iscan);
+
+	/* return statistics */
+	num_pages = RelationGetNumberOfBlocks(rel);
+
+	result = (IndexBulkDeleteResult *) palloc(sizeof(IndexBulkDeleteResult));
+	result->num_pages = num_pages;
+	result->tuples_removed = tuples_removed;
+	result->num_index_tuples = num_index_tuples;
+
+	PG_RETURN_POINTER(result);
 }
 
+
 static void
 initRtstate(RTSTATE *rtstate, Relation index)
 {
diff --git a/src/backend/access/rtree/rtscan.c b/src/backend/access/rtree/rtscan.c
index c9f1ab7b893..1311cfdc29a 100644
--- a/src/backend/access/rtree/rtscan.c
+++ b/src/backend/access/rtree/rtscan.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtscan.c,v 1.37 2001/06/09 18:16:56 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtscan.c,v 1.38 2001/07/15 22:48:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -60,13 +60,8 @@ rtbeginscan(PG_FUNCTION_ARGS)
 	ScanKey		key = (ScanKey) PG_GETARG_POINTER(3);
 	IndexScanDesc s;
 
-	/*
-	 * Let index_beginscan does its work...
-	 *
-	 * RelationSetLockForRead(r);
-	 */
-
 	s = RelationGetIndexScan(r, fromEnd, nkeys, key);
+
 	rtregscan(s);
 
 	PG_RETURN_POINTER(s);
@@ -282,6 +277,27 @@ rtdropscan(IndexScanDesc s)
 	pfree(l);
 }
 
+/*
+ * AtEOXact_rtree() --- clean up rtree subsystem at xact abort or commit.
+ *
+ * This is here because it needs to touch this module's static var RTScans.
+ */
+void
+AtEOXact_rtree(void)
+{
+	/*
+	 * Note: these actions should only be necessary during xact abort; but
+	 * they can't hurt during a commit.
+	 */
+
+	/*
+	 * Reset the active-scans list to empty. We do not need to free the
+	 * list elements, because they're all palloc()'d, so they'll go away
+	 * at end of transaction anyway.
+	 */
+	RTScans = NULL;
+}
+
 void
 rtadjscans(Relation r, int op, BlockNumber blkno, OffsetNumber offnum)
 {
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index 64671792315..d32a6dda978 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.106 2001/07/12 04:11:13 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.107 2001/07/15 22:48:16 tgl Exp $
  *
  * NOTES
  *		Transaction aborts can now occur two ways:
@@ -156,7 +156,10 @@
 
 #include <sys/time.h>
 
+#include "access/gistscan.h"
+#include "access/hash.h"
 #include "access/nbtree.h"
+#include "access/rtree.h"
 #include "access/xact.h"
 #include "catalog/heap.h"
 #include "catalog/index.h"
@@ -1040,7 +1043,10 @@ CommitTransaction(void)
 	smgrDoPendingDeletes(true);
 
 	AtEOXact_SPI();
+	AtEOXact_gist();
+	AtEOXact_hash();
 	AtEOXact_nbtree();
+	AtEOXact_rtree();
 	AtCommit_Cache();
 	AtCommit_Locks();
 	AtEOXact_CatCache(true);
@@ -1147,7 +1153,10 @@ AbortTransaction(void)
 	smgrDoPendingDeletes(false);
 
 	AtEOXact_SPI();
+	AtEOXact_gist();
+	AtEOXact_hash();
 	AtEOXact_nbtree();
+	AtEOXact_rtree();
 	AtAbort_Cache();
 	AtEOXact_CatCache(false);
 	AtAbort_Memory();
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index 23bcc420368..ee1a4b7c31f 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/bootstrap/bootstrap.c,v 1.110 2001/06/25 23:03:03 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/bootstrap/bootstrap.c,v 1.111 2001/07/15 22:48:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1119,7 +1119,7 @@ build_indices()
 
 		heap = heap_openr(ILHead->il_heap, NoLock);
 		ind = index_openr(ILHead->il_ind);
-		index_build(heap, ind, ILHead->il_info, NULL);
+		index_build(heap, ind, ILHead->il_info);
 
 		/*
 		 * In normal processing mode, index_build would close the heap and
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 1171376bb25..02e29441da2 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.170 2001/06/29 21:08:24 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.171 2001/07/15 22:48:17 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -1031,7 +1031,7 @@ RelationTruncateIndexes(Oid heapId)
 		/* Initialize the index and rebuild */
 		InitIndexStrategy(indexInfo->ii_NumIndexAttrs,
 						  currentIndex, accessMethodId);
-		index_build(heapRelation, currentIndex, indexInfo, NULL);
+		index_build(heapRelation, currentIndex, indexInfo);
 
 		/*
 		 * index_build will close both the heap and index relations (but
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 34989055b62..f0fa73e83dd 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.155 2001/06/27 23:31:38 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.156 2001/07/15 22:48:17 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -41,6 +41,7 @@
 #include "optimizer/clauses.h"
 #include "optimizer/prep.h"
 #include "parser/parse_func.h"
+#include "storage/sinval.h"
 #include "storage/smgr.h"
 #include "utils/builtins.h"
 #include "utils/catcache.h"
@@ -73,9 +74,6 @@ static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
 					IndexInfo *indexInfo,
 					Oid *classOids,
 					bool islossy, bool primary);
-static void DefaultBuild(Relation heapRelation, Relation indexRelation,
-			 IndexInfo *indexInfo, Node *oldPred,
-			 IndexStrategy indexStrategy);
 static Oid	IndexGetRelation(Oid indexId);
 static bool activate_index(Oid indexId, bool activate, bool inplace);
 
@@ -656,7 +654,7 @@ UpdateIndexPredicate(Oid indexoid, Node *oldPred, Node *predicate)
 	}
 
 	replace[Anum_pg_index_indpred - 1] = 'r';
-	values[Anum_pg_index_indpred - 1] = (Datum) predText;
+	values[Anum_pg_index_indpred - 1] = PointerGetDatum(predText);
 
 	newtup = heap_modifytuple(tuple, pg_index, values, nulls, replace);
 
@@ -885,7 +883,7 @@ index_create(char *heapRelationName,
 		/* XXX shouldn't we close the heap and index rels here? */
 	}
 	else
-		index_build(heapRelation, indexRelation, indexInfo, NULL);
+		index_build(heapRelation, indexRelation, indexInfo);
 }
 
 /* ----------------------------------------------------------------
@@ -912,12 +910,13 @@ index_drop(Oid indexId)
 	/*
 	 * To drop an index safely, we must grab exclusive lock on its parent
 	 * table; otherwise there could be other backends using the index!
-	 * Exclusive lock on the index alone is insufficient because the index
-	 * access routines are a little slipshod about obtaining adequate
-	 * locking (see ExecOpenIndices()).  We do grab exclusive lock on the
-	 * index too, just to be safe.	Both locks must be held till end of
-	 * transaction, else other backends will still see this index in
-	 * pg_index.
+	 * Exclusive lock on the index alone is insufficient because another
+	 * backend might be in the midst of devising a query plan that will use
+	 * the index.  The parser and planner take care to hold an appropriate
+	 * lock on the parent table while working, but having them hold locks on
+	 * all the indexes too seems overly complex.  We do grab exclusive lock
+	 * on the index too, just to be safe. Both locks must be held till end of
+	 * transaction, else other backends will still see this index in pg_index.
 	 */
 	heapId = IndexGetRelation(indexId);
 	userHeapRelation = heap_open(heapId, AccessExclusiveLock);
@@ -1075,7 +1074,7 @@ BuildIndexInfo(HeapTuple indexTuple)
 	/*
 	 * If partial index, convert predicate into expression nodetree
 	 */
-	if (VARSIZE(&indexStruct->indpred) != 0)
+	if (VARSIZE(&indexStruct->indpred) > VARHDRSZ)
 	{
 		char	   *predString;
 
@@ -1625,43 +1624,77 @@ UpdateStats(Oid relid, double reltuples)
 }
 
 
-/* ----------------
- *		DefaultBuild
+/*
+ * index_build - invoke access-method-specific index build procedure
+ */
+void
+index_build(Relation heapRelation,
+			Relation indexRelation,
+			IndexInfo *indexInfo)
+{
+	RegProcedure procedure;
+
+	/*
+	 * sanity checks
+	 */
+	Assert(RelationIsValid(indexRelation));
+	Assert(PointerIsValid(indexRelation->rd_am));
+
+	procedure = indexRelation->rd_am->ambuild;
+	Assert(RegProcedureIsValid(procedure));
+
+	/*
+	 * Call the access method's build procedure
+	 */
+	OidFunctionCall3(procedure,
+					 PointerGetDatum(heapRelation),
+					 PointerGetDatum(indexRelation),
+					 PointerGetDatum(indexInfo));
+}
+
+
+/*
+ * IndexBuildHeapScan - scan the heap relation to find tuples to be indexed
  *
- * NB: this routine is dead code, and likely always has been, because
- * there are no access methods that don't supply their own ambuild procedure.
+ * This is called back from an access-method-specific index build procedure
+ * after the AM has done whatever setup it needs.  The parent heap relation
+ * is scanned to find tuples that should be entered into the index.  Each
+ * such tuple is passed to the AM's callback routine, which does the right
+ * things to add it to the new index.  After we return, the AM's index
+ * build procedure does whatever cleanup is needed; in particular, it should
+ * close the heap and index relations.
  *
- * Anyone want to wager whether it would actually work if executed?
- * ----------------
+ * The total count of heap tuples is returned.  This is for updating pg_class
+ * statistics.  (It's annoying not to be able to do that here, but we can't
+ * do it until after the relation is closed.)  Note that the index AM itself
+ * must keep track of the number of index tuples; we don't do so here because
+ * the AM might reject some of the tuples for its own reasons, such as being
+ * unable to store NULLs.
  */
-static void
-DefaultBuild(Relation heapRelation,
-			 Relation indexRelation,
-			 IndexInfo *indexInfo,
-			 Node *oldPred,
-			 IndexStrategy indexStrategy)		/* not used */
+double
+IndexBuildHeapScan(Relation heapRelation,
+				   Relation indexRelation,
+				   IndexInfo *indexInfo,
+				   IndexBuildCallback callback,
+				   void *callback_state)
 {
 	HeapScanDesc scan;
 	HeapTuple	heapTuple;
 	TupleDesc	heapDescriptor;
-	Datum		datum[INDEX_MAX_KEYS];
-	char		nullv[INDEX_MAX_KEYS];
-	double		reltuples,
-				indtuples;
+	Datum		attdata[INDEX_MAX_KEYS];
+	char		nulls[INDEX_MAX_KEYS];
+	double		reltuples;
 	Node	   *predicate = indexInfo->ii_Predicate;
-
-#ifndef OMIT_PARTIAL_INDEX
 	TupleTable	tupleTable;
 	TupleTableSlot *slot;
-
-#endif
 	ExprContext *econtext;
-	InsertIndexResult insertResult;
+	Snapshot	snapshot;
+	TransactionId XmaxRecent;
 
 	/*
-	 * more & better checking is needed
+	 * sanity checks
 	 */
-	Assert(OidIsValid(indexRelation->rd_rel->relam));	/* XXX */
+	Assert(OidIsValid(indexRelation->rd_rel->relam));
 
 	heapDescriptor = RelationGetDescr(heapRelation);
 
@@ -1675,8 +1708,7 @@ DefaultBuild(Relation heapRelation,
 	 * We construct the ExprContext anyway since we need a per-tuple
 	 * temporary memory context for function evaluation -- tgl July 00
 	 */
-#ifndef OMIT_PARTIAL_INDEX
-	if (predicate != NULL || oldPred != NULL)
+	if (predicate != NULL)
 	{
 		tupleTable = ExecCreateTupleTable(1);
 		slot = ExecAllocTableSlot(tupleTable);
@@ -1688,155 +1720,158 @@ DefaultBuild(Relation heapRelation,
 		slot = NULL;
 	}
 	econtext = MakeExprContext(slot, TransactionCommandContext);
-#else
-	econtext = MakeExprContext(NULL, TransactionCommandContext);
-#endif	 /* OMIT_PARTIAL_INDEX */
 
 	/*
-	 * Ok, begin our scan of the base relation.
+	 * Ok, begin our scan of the base relation.  We use SnapshotAny
+	 * because we must retrieve all tuples and do our own time qual checks.
 	 */
+	if (IsBootstrapProcessingMode())
+	{
+		snapshot = SnapshotNow;
+		XmaxRecent = InvalidTransactionId;
+	}
+	else
+	{
+		snapshot = SnapshotAny;
+		GetXmaxRecent(&XmaxRecent);
+	}
+
 	scan = heap_beginscan(heapRelation, /* relation */
 						  0,	/* start at end */
-						  SnapshotNow,	/* seeself */
+						  snapshot,	/* seeself */
 						  0,	/* number of keys */
 						  (ScanKey) NULL);		/* scan key */
 
-	reltuples = indtuples = 0.0;
+	reltuples = 0;
 
 	/*
-	 * for each tuple in the base relation, we create an index tuple and
-	 * add it to the index relation.  We keep a running count of the
-	 * number of tuples so that we can update pg_class with correct
-	 * statistics when we're done building the index.
+	 * Scan all tuples in the base relation.
 	 */
 	while (HeapTupleIsValid(heapTuple = heap_getnext(scan, 0)))
 	{
-		MemoryContextReset(econtext->ecxt_per_tuple_memory);
+		bool		tupleIsAlive;
 
-		reltuples += 1.0;
+		if (snapshot == SnapshotAny)
+		{
+			/* do our own time qual check */
+			bool	indexIt;
+			uint16	sv_infomask;
 
-#ifndef OMIT_PARTIAL_INDEX
+			/*
+			 * HeapTupleSatisfiesVacuum may update tuple's hint status bits.
+			 * We could possibly get away with not locking the buffer here,
+			 * since caller should hold ShareLock on the relation, but let's
+			 * be conservative about it.
+			 */
+			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+			sv_infomask = heapTuple->t_data->t_infomask;
 
-		/*
-		 * If oldPred != NULL, this is an EXTEND INDEX command, so skip
-		 * this tuple if it was already in the existing partial index
-		 */
-		if (oldPred != NULL)
-		{
-			slot->val = heapTuple;
-			if (ExecQual((List *) oldPred, econtext, false))
+			switch (HeapTupleSatisfiesVacuum(heapTuple->t_data, XmaxRecent))
 			{
-				indtuples += 1.0;
-				continue;
+				case HEAPTUPLE_DEAD:
+					indexIt = false;
+					tupleIsAlive = false;
+					break;
+				case HEAPTUPLE_LIVE:
+					indexIt = true;
+					tupleIsAlive = true;
+					break;
+				case HEAPTUPLE_RECENTLY_DEAD:
+					/*
+					 * If tuple is recently deleted then we must index it
+					 * anyway to keep VACUUM from complaining.
+					 */
+					indexIt = true;
+					tupleIsAlive = false;
+					break;
+				case HEAPTUPLE_INSERT_IN_PROGRESS:
+					/*
+					 * This should not happen, if caller holds ShareLock on
+					 * the parent relation.
+					 */
+					elog(ERROR, "IndexBuildHeapScan: concurrent insert in progress");
+					indexIt = tupleIsAlive = false;	/* keep compiler quiet */
+					break;
+				case HEAPTUPLE_DELETE_IN_PROGRESS:
+					/*
+					 * This should not happen, if caller holds ShareLock on
+					 * the parent relation.
+					 */
+					elog(ERROR, "IndexBuildHeapScan: concurrent delete in progress");
+					indexIt = tupleIsAlive = false;	/* keep compiler quiet */
+					break;
+				default:
+					elog(ERROR, "Unexpected HeapTupleSatisfiesVacuum result");
+					indexIt = tupleIsAlive = false;	/* keep compiler quiet */
+					break;
 			}
+
+			/* check for hint-bit update by HeapTupleSatisfiesVacuum */
+			if (sv_infomask != heapTuple->t_data->t_infomask)
+				SetBufferCommitInfoNeedsSave(scan->rs_cbuf);
+
+			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+
+			if (! indexIt)
+				continue;
 		}
+		else
+		{
+			/* heap_getnext did the time qual check */
+			tupleIsAlive = true;
+		}
+
+		reltuples += 1;
+
+		MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
 		/*
-		 * Skip this tuple if it doesn't satisfy the partial-index
-		 * predicate
+		 * In a partial index, discard tuples that don't satisfy the
+		 * predicate.  We can also discard recently-dead tuples, since
+		 * VACUUM doesn't complain about tuple count mismatch for partial
+		 * indexes.
 		 */
 		if (predicate != NULL)
 		{
-			slot->val = heapTuple;
+			if (! tupleIsAlive)
+				continue;
+			ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
 			if (!ExecQual((List *) predicate, econtext, false))
 				continue;
 		}
-#endif	 /* OMIT_PARTIAL_INDEX */
-
-		indtuples += 1.0;
 
 		/*
-		 * FormIndexDatum fills in its datum and null parameters with
-		 * attribute information taken from the given heap tuple.
+		 * For the current heap tuple, extract all the attributes we use
+		 * in this index, and note which are null.  This also performs
+		 * evaluation of the function, if this is a functional index.
 		 */
 		FormIndexDatum(indexInfo,
 					   heapTuple,
 					   heapDescriptor,
 					   econtext->ecxt_per_tuple_memory,
-					   datum,
-					   nullv);
+					   attdata,
+					   nulls);
 
-		insertResult = index_insert(indexRelation, datum, nullv,
-									&(heapTuple->t_self), heapRelation);
+		/*
+		 * You'd think we should go ahead and build the index tuple here,
+		 * but some index AMs want to do further processing on the
+		 * data first.  So pass the attdata and nulls arrays, instead.
+		 */
 
-		if (insertResult)
-			pfree(insertResult);
+		/* Call the AM's callback routine to process the tuple */
+		callback(indexRelation, heapTuple, attdata, nulls, tupleIsAlive,
+				 callback_state);
 	}
 
 	heap_endscan(scan);
 
-#ifndef OMIT_PARTIAL_INDEX
-	if (predicate != NULL || oldPred != NULL)
+	if (predicate != NULL)
 		ExecDropTupleTable(tupleTable, true);
-#endif	 /* OMIT_PARTIAL_INDEX */
 	FreeExprContext(econtext);
 
-	/*
-	 * Since we just counted the tuples in the heap, we update its stats
-	 * in pg_class to guarantee that the planner takes advantage of the
-	 * index we just created.  But, only update statistics during normal
-	 * index definitions, not for indices on system catalogs created
-	 * during bootstrap processing.  We must close the relations before
-	 * updating statistics to guarantee that the relcache entries are
-	 * flushed when we increment the command counter in UpdateStats(). But
-	 * we do not release any locks on the relations; those will be held
-	 * until end of transaction.
-	 */
-	if (IsNormalProcessingMode())
-	{
-		Oid			hrelid = RelationGetRelid(heapRelation);
-		Oid			irelid = RelationGetRelid(indexRelation);
-
-		heap_close(heapRelation, NoLock);
-		index_close(indexRelation);
-		UpdateStats(hrelid, reltuples);
-		UpdateStats(irelid, indtuples);
-		if (oldPred != NULL)
-		{
-			if (indtuples == reltuples)
-				predicate = NULL;
-			UpdateIndexPredicate(irelid, oldPred, predicate);
-		}
-	}
+	return reltuples;
 }
 
-/* ----------------
- *		index_build
- * ----------------
- */
-void
-index_build(Relation heapRelation,
-			Relation indexRelation,
-			IndexInfo *indexInfo,
-			Node *oldPred)
-{
-	RegProcedure procedure;
-
-	/*
-	 * sanity checks
-	 */
-	Assert(RelationIsValid(indexRelation));
-	Assert(PointerIsValid(indexRelation->rd_am));
-
-	procedure = indexRelation->rd_am->ambuild;
-
-	/*
-	 * use the access method build procedure if supplied, else default.
-	 */
-	if (RegProcedureIsValid(procedure))
-		OidFunctionCall5(procedure,
-						 PointerGetDatum(heapRelation),
-						 PointerGetDatum(indexRelation),
-						 PointerGetDatum(indexInfo),
-						 PointerGetDatum(oldPred),
-			   PointerGetDatum(RelationGetIndexStrategy(indexRelation)));
-	else
-		DefaultBuild(heapRelation,
-					 indexRelation,
-					 indexInfo,
-					 oldPred,
-					 RelationGetIndexStrategy(indexRelation));
-}
 
 /*
  * IndexGetRelation: given an index's relation OID, get the OID of the
@@ -1967,7 +2002,7 @@ reindex_index(Oid indexId, bool force, bool inplace)
 
 	/* Initialize the index and rebuild */
 	InitIndexStrategy(indexInfo->ii_NumIndexAttrs, iRel, accessMethodId);
-	index_build(heapRelation, iRel, indexInfo, NULL);
+	index_build(heapRelation, iRel, indexInfo);
 
 	/*
 	 * index_build will close both the heap and index relations (but not
diff --git a/src/backend/catalog/pg_operator.c b/src/backend/catalog/pg_operator.c
index 40ee84c0186..d96d17752af 100644
--- a/src/backend/catalog/pg_operator.c
+++ b/src/backend/catalog/pg_operator.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/catalog/pg_operator.c,v 1.59 2001/06/01 02:41:35 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/catalog/pg_operator.c,v 1.60 2001/07/15 22:48:17 tgl Exp $
  *
  * NOTES
  *	  these routines moved here from commands/define.c and somewhat cleaned up.
@@ -402,7 +402,7 @@ OperatorShellMake(char *operatorName,
  *	 rightSortObjectId -- same as for commutatorObjectId
  *	 operatorProcedure -- must access the pg_procedure catalog to get the
  *				   ObjectId of the procedure that actually does the operator
- *				   actions this is required.  Do an amgetattr to find out the
+ *				   actions this is required.  Do a lookup to find out the
  *				   return type of the procedure
  *	 restrictionProcedure -- must access the pg_procedure catalog to get
  *				   the ObjectId but this is optional
diff --git a/src/backend/commands/command.c b/src/backend/commands/command.c
index 6a2bd7dc932..4fcbeeceb6c 100644
--- a/src/backend/commands/command.c
+++ b/src/backend/commands/command.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/commands/Attic/command.c,v 1.134 2001/06/14 01:09:22 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/commands/Attic/command.c,v 1.135 2001/07/15 22:48:17 tgl Exp $
  *
  * NOTES
  *	  The PerformAddAttribute() code, like most of the relation
@@ -269,7 +269,7 @@ PerformPortalClose(char *name, CommandDest dest)
  *				Initial idea of ordering the tuple attributes so that all
  *		the variable length domains occured last was scratched.  Doing
  *		so would not speed access too much (in general) and would create
- *		many complications in formtuple, amgetattr, and addattribute.
+ *		many complications in formtuple, heap_getattr, and addattribute.
  *
  *		scan attribute catalog for name conflict (within rel)
  *		scan type catalog for absence of data type (if not arg)
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 108c4ea3780..7398b0b0ce5 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/commands/indexcmds.c,v 1.50 2001/06/13 21:44:40 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/commands/indexcmds.c,v 1.51 2001/07/15 22:48:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -83,6 +83,8 @@ DefineIndex(char *heapRelationName,
 	Oid		   *classObjectId;
 	Oid			accessMethodId;
 	Oid			relationId;
+	HeapTuple	tuple;
+	Form_pg_am	accessMethodForm;
 	IndexInfo  *indexInfo;
 	int			numberOfAttributes;
 	List	   *cnfPred = NIL;
@@ -107,27 +109,25 @@ DefineIndex(char *heapRelationName,
 			 heapRelationName);
 
 	/*
-	 * compute access method id
+	 * look up the access method, verify it can handle the requested features
 	 */
-	accessMethodId = GetSysCacheOid(AMNAME,
-									PointerGetDatum(accessMethodName),
-									0, 0, 0);
-	if (!OidIsValid(accessMethodId))
+	tuple = SearchSysCache(AMNAME,
+						   PointerGetDatum(accessMethodName),
+						   0, 0, 0);
+	if (!HeapTupleIsValid(tuple))
 		elog(ERROR, "DefineIndex: access method \"%s\" not found",
 			 accessMethodName);
+	accessMethodId = tuple->t_data->t_oid;
+	accessMethodForm = (Form_pg_am) GETSTRUCT(tuple);
 
-	/*
-	 * XXX Hardwired hacks to check for limitations on supported index
-	 * types. We really ought to be learning this info from entries in the
-	 * pg_am table, instead of having it wired-in here!
-	 */
-	if (unique && accessMethodId != BTREE_AM_OID)
-		elog(ERROR, "DefineIndex: unique indices are only available with the btree access method");
+	if (unique && ! accessMethodForm->amcanunique)
+		elog(ERROR, "DefineIndex: access method \"%s\" does not support UNIQUE indexes",
+			 accessMethodName);
+	if (numberOfAttributes > 1 && ! accessMethodForm->amcanmulticol)
+		elog(ERROR, "DefineIndex: access method \"%s\" does not support multi-column indexes",
+			 accessMethodName);
 
-	if (numberOfAttributes > 1 &&
-		!( accessMethodId == BTREE_AM_OID ||
-		   accessMethodId == GIST_AM_OID))
-		elog(ERROR, "DefineIndex: multi-column indices are only available with the btree or GiST access methods");
+	ReleaseSysCache(tuple);
 
 	/*
 	 * WITH clause reinstated to handle lossy indices. -- JMH, 7/22/96
@@ -298,7 +298,15 @@ ExtendIndex(char *indexRelationName, Expr *predicate, List *rangetable)
 	InitIndexStrategy(indexInfo->ii_NumIndexAttrs,
 					  indexRelation, accessMethodId);
 
-	index_build(heapRelation, indexRelation, indexInfo, oldPred);
+	/*
+	 * XXX currently BROKEN: if we want to support EXTEND INDEX, oldPred
+	 * needs to be passed through to IndexBuildHeapScan.  We could do this
+	 * without help from the index AMs if we added an oldPred field to the
+	 * IndexInfo struct.  Currently I'm expecting that EXTEND INDEX will
+	 * get removed, so I'm not going to do that --- tgl 7/14/01
+	 */
+
+	index_build(heapRelation, indexRelation, indexInfo);
 
 	/* heap and index rels are closed as a side-effect of index_build */
 }
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index f41bb664a2f..c53fa05812e 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -13,7 +13,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.204 2001/07/13 22:55:59 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.205 2001/07/15 22:48:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -128,7 +128,7 @@ static void vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage);
 static void vacuum_index(VacPageList vacpagelist, Relation indrel,
 						 double num_tuples, int keep_tuples);
 static void scan_index(Relation indrel, double num_tuples);
-static VacPage tid_reaped(ItemPointer itemptr, VacPageList vacpagelist);
+static bool tid_reaped(ItemPointer itemptr, void *state);
 static void vac_update_fsm(Relation onerel, VacPageList fraged_pages,
 						   BlockNumber rel_pages);
 static VacPage copy_vac_page(VacPage vacpage);
@@ -542,17 +542,11 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt)
 
 	/*
 	 * Do the actual work --- either FULL or "lazy" vacuum
-	 *
-	 * XXX for the moment, lazy vac not supported unless CONCURRENT_VACUUM
 	 */
-#ifdef CONCURRENT_VACUUM
 	if (vacstmt->full)
 		full_vacuum_rel(onerel);
 	else
 		lazy_vacuum_rel(onerel, vacstmt);
-#else
-	full_vacuum_rel(onerel);
-#endif
 
 	/* all done with this class, but hold lock until commit */
 	heap_close(onerel, NoLock);
@@ -1049,7 +1043,7 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
 
 	elog(MESSAGE_LEVEL, "Pages %u: Changed %u, reaped %u, Empty %u, New %u; \
 Tup %.0f: Vac %.0f, Keep/VTL %.0f/%u, UnUsed %.0f, MinLen %lu, MaxLen %lu; \
-Re-using: Free/Avail. Space %.0f/%.0f; EndEmpty/Avail. Pages %u/%u. %s",
+Re-using: Free/Avail. Space %.0f/%.0f; EndEmpty/Avail. Pages %u/%u.\n\t%s",
 		 nblocks, changed_pages, vacuum_pages->num_pages, empty_pages,
 		 new_pages, num_tuples, tups_vacuumed,
 		 nkeep, vacrelstats->num_vtlinks,
@@ -1965,7 +1959,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
 	}
 	Assert(num_moved == checked_moved);
 
-	elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u; Tuple(s) moved: %u. %s",
+	elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u; Tuple(s) moved: %u.\n\t%s",
 		 RelationGetRelationName(onerel),
 		 nblocks, blkno, num_moved,
 		 vac_show_rusage(&ru0));
@@ -2213,7 +2207,7 @@ scan_index(Relation indrel, double num_tuples)
 	nipages = RelationGetNumberOfBlocks(indrel);
 	vac_update_relstats(RelationGetRelid(indrel), nipages, nitups, false);
 
-	elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %.0f. %s",
+	elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %.0f.\n\t%s",
 		 RelationGetRelationName(indrel), nipages, nitups,
 		 vac_show_rusage(&ru0));
 
@@ -2247,85 +2241,55 @@ static void
 vacuum_index(VacPageList vacpagelist, Relation indrel,
 			 double num_tuples, int keep_tuples)
 {
-	RetrieveIndexResult res;
-	IndexScanDesc iscan;
-	ItemPointer heapptr;
-	int			tups_vacuumed;
-	BlockNumber	num_pages;
-	double		num_index_tuples;
-	VacPage		vp;
+	IndexBulkDeleteResult *stats;
 	VacRUsage	ru0;
 
 	vac_init_rusage(&ru0);
 
-	/* walk through the entire index */
-	iscan = index_beginscan(indrel, false, 0, (ScanKey) NULL);
-	tups_vacuumed = 0;
-	num_index_tuples = 0;
-
-	while ((res = index_getnext(iscan, ForwardScanDirection))
-		   != (RetrieveIndexResult) NULL)
-	{
-		heapptr = &res->heap_iptr;
-
-		if ((vp = tid_reaped(heapptr, vacpagelist)) != (VacPage) NULL)
-		{
-#ifdef NOT_USED
-			elog(DEBUG, "<%x,%x> -> <%x,%x>",
-				 ItemPointerGetBlockNumber(&(res->index_iptr)),
-				 ItemPointerGetOffsetNumber(&(res->index_iptr)),
-				 ItemPointerGetBlockNumber(&(res->heap_iptr)),
-				 ItemPointerGetOffsetNumber(&(res->heap_iptr)));
-#endif
-			if (vp->offsets_free == 0)
-			{
-				elog(NOTICE, "Index %s: pointer to EmptyPage (blk %u off %u) - fixing",
-					 RelationGetRelationName(indrel),
-					 vp->blkno, ItemPointerGetOffsetNumber(heapptr));
-			}
-			++tups_vacuumed;
-			index_delete(indrel, &res->index_iptr);
-		}
-		else
-			num_index_tuples += 1;
-
-		pfree(res);
-	}
+	/* Do bulk deletion */
+	stats = index_bulk_delete(indrel, tid_reaped, (void *) vacpagelist);
 
-	index_endscan(iscan);
+	if (!stats)
+		return;
 
 	/* now update statistics in pg_class */
-	num_pages = RelationGetNumberOfBlocks(indrel);
 	vac_update_relstats(RelationGetRelid(indrel),
-						num_pages, num_index_tuples, false);
+						stats->num_pages, stats->num_index_tuples,
+						false);
 
-	elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %.0f: Deleted %u. %s",
-		 RelationGetRelationName(indrel), num_pages,
-		 num_index_tuples - keep_tuples, tups_vacuumed,
+	elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %.0f: Deleted %.0f.\n\t%s",
+		 RelationGetRelationName(indrel), stats->num_pages,
+		 stats->num_index_tuples - keep_tuples, stats->tuples_removed,
 		 vac_show_rusage(&ru0));
 
 	/*
 	 * Check for tuple count mismatch.  If the index is partial, then
 	 * it's OK for it to have fewer tuples than the heap; else we got trouble.
 	 */
-	if (num_index_tuples != num_tuples + keep_tuples)
+	if (stats->num_index_tuples != num_tuples + keep_tuples)
 	{
-		if (num_index_tuples > num_tuples + keep_tuples ||
+		if (stats->num_index_tuples > num_tuples + keep_tuples ||
 			! is_partial_index(indrel))
 			elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%.0f) IS NOT THE SAME AS HEAP' (%.0f).\
 \n\tRecreate the index.",
-				 RelationGetRelationName(indrel), num_index_tuples, num_tuples);
+				 RelationGetRelationName(indrel),
+				 stats->num_index_tuples, num_tuples);
 	}
+
+	pfree(stats);
 }
 
 /*
  *	tid_reaped() -- is a particular tid reaped?
  *
+ *		This has the right signature to be an IndexBulkDeleteCallback.
+ *
  *		vacpagelist->VacPage_array is sorted in right order.
  */
-static VacPage
-tid_reaped(ItemPointer itemptr, VacPageList vacpagelist)
+static bool
+tid_reaped(ItemPointer itemptr, void *state)
 {
+	VacPageList	vacpagelist = (VacPageList) state;
 	OffsetNumber ioffno;
 	OffsetNumber *voff;
 	VacPage		vp,
@@ -2342,8 +2306,8 @@ tid_reaped(ItemPointer itemptr, VacPageList vacpagelist)
 								  sizeof(VacPage),
 								  vac_cmp_blk);
 
-	if (vpp == (VacPage *) NULL)
-		return (VacPage) NULL;
+	if (vpp == NULL)
+		return false;
 
 	/* ok - we are on a partially or fully reaped page */
 	vp = *vpp;
@@ -2351,7 +2315,7 @@ tid_reaped(ItemPointer itemptr, VacPageList vacpagelist)
 	if (vp->offsets_free == 0)
 	{
 		/* this is EmptyPage, so claim all tuples on it are reaped!!! */
-		return vp;
+		return true;
 	}
 
 	voff = (OffsetNumber *) vac_bsearch((void *) &ioffno,
@@ -2360,11 +2324,11 @@ tid_reaped(ItemPointer itemptr, VacPageList vacpagelist)
 										sizeof(OffsetNumber),
 										vac_cmp_offno);
 
-	if (voff == (OffsetNumber *) NULL)
-		return (VacPage) NULL;
+	if (voff == NULL)
+		return false;
 
 	/* tid is reaped */
-	return vp;
+	return true;
 }
 
 /*
@@ -2595,6 +2559,13 @@ is_partial_index(Relation indrel)
 	HeapTuple	cachetuple;
 	Form_pg_index indexStruct;
 
+	/*
+	 * If the index's AM doesn't support nulls, it's partial for our purposes
+	 */
+	if (! indrel->rd_am->amindexnulls)
+		return true;
+
+	/* Otherwise, look to see if there's a partial-index predicate */
 	cachetuple = SearchSysCache(INDEXRELID,
 								ObjectIdGetDatum(RelationGetRelid(indrel)),
 								0, 0, 0);
@@ -2603,7 +2574,7 @@ is_partial_index(Relation indrel)
 			 RelationGetRelid(indrel));
 	indexStruct = (Form_pg_index) GETSTRUCT(cachetuple);
 
-	result = (VARSIZE(&indexStruct->indpred) != 0);
+	result = (VARSIZE(&indexStruct->indpred) > VARHDRSZ);
 
 	ReleaseSysCache(cachetuple);
 	return result;
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index 07529fe265a..b78f933f0c3 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -31,7 +31,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.1 2001/07/13 22:55:59 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.2 2001/07/15 22:48:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -112,7 +112,7 @@ static void lazy_record_dead_tuple(LVRelStats *vacrelstats,
 								   ItemPointer itemptr);
 static void lazy_record_free_space(LVRelStats *vacrelstats,
 								   BlockNumber page, Size avail);
-static bool lazy_tid_reaped(ItemPointer itemptr, LVRelStats *vacrelstats);
+static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
 static void lazy_update_fsm(Relation onerel, LVRelStats *vacrelstats);
 static int	vac_cmp_itemptr(const void *left, const void *right);
 
@@ -371,11 +371,11 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 
 		if (pgchanged)
 		{
-			WriteBuffer(buf);
+			SetBufferCommitInfoNeedsSave(buf);
 			changed_pages++;
 		}
-		else
-			ReleaseBuffer(buf);
+
+		ReleaseBuffer(buf);
 	}
 
 	/* If any tuples need to be deleted, perform final vacuum cycle */
@@ -507,64 +507,40 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
 static void
 lazy_vacuum_index(Relation indrel, LVRelStats *vacrelstats)
 {
-	RetrieveIndexResult res;
-	IndexScanDesc iscan;
-	int			tups_vacuumed;
-	BlockNumber	num_pages;
-	double		num_index_tuples;
+	IndexBulkDeleteResult *stats;
 	VacRUsage	ru0;
 
 	vac_init_rusage(&ru0);
 
 	/*
-	 * Only btree and hash indexes are currently safe for concurrent access;
-	 * see notes in ExecOpenIndices().  XXX should rely on index AM for this
+	 * If index is unsafe for concurrent access, must lock it.
 	 */
-	if (indrel->rd_rel->relam != BTREE_AM_OID &&
-		indrel->rd_rel->relam != HASH_AM_OID)
+	if (! indrel->rd_am->amconcurrent)
 		LockRelation(indrel, AccessExclusiveLock);
 
-	/* XXX should use a bulk-delete call here */
-
-	/* walk through the entire index */
-	iscan = index_beginscan(indrel, false, 0, (ScanKey) NULL);
-	tups_vacuumed = 0;
-	num_index_tuples = 0;
-
-	while ((res = index_getnext(iscan, ForwardScanDirection))
-		   != (RetrieveIndexResult) NULL)
-	{
-		ItemPointer heapptr = &res->heap_iptr;
-
-		if (lazy_tid_reaped(heapptr, vacrelstats))
-		{
-			index_delete(indrel, &res->index_iptr);
-			++tups_vacuumed;
-		}
-		else
-			num_index_tuples += 1;
-
-		pfree(res);
-	}
-
-	index_endscan(iscan);
-
-	/* now update statistics in pg_class */
-	num_pages = RelationGetNumberOfBlocks(indrel);
-	vac_update_relstats(RelationGetRelid(indrel),
-						num_pages, num_index_tuples, false);
+	/* Do bulk deletion */
+	stats = index_bulk_delete(indrel, lazy_tid_reaped, (void *) vacrelstats);
 
 	/*
 	 * Release lock acquired above.
 	 */
-	if (indrel->rd_rel->relam != BTREE_AM_OID &&
-		indrel->rd_rel->relam != HASH_AM_OID)
+	if (! indrel->rd_am->amconcurrent)
 		UnlockRelation(indrel, AccessExclusiveLock);
 
-	elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %.0f: Deleted %u.\n\t%s",
-		 RelationGetRelationName(indrel), num_pages,
-		 num_index_tuples, tups_vacuumed,
-		 vac_show_rusage(&ru0));
+	/* now update statistics in pg_class */
+	if (stats)
+	{
+		vac_update_relstats(RelationGetRelid(indrel),
+							stats->num_pages, stats->num_index_tuples,
+							false);
+
+		elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %.0f: Deleted %.0f.\n\t%s",
+			 RelationGetRelationName(indrel), stats->num_pages,
+			 stats->num_index_tuples, stats->tuples_removed,
+			 vac_show_rusage(&ru0));
+
+		pfree(stats);
+	}
 }
 
 /*
@@ -960,11 +936,14 @@ lazy_record_free_space(LVRelStats *vacrelstats,
 /*
  *	lazy_tid_reaped() -- is a particular tid deletable?
  *
+ *		This has the right signature to be an IndexBulkDeleteCallback.
+ *
  *		Assumes dead_tuples array is in sorted order.
  */
 static bool
-lazy_tid_reaped(ItemPointer itemptr, LVRelStats *vacrelstats)
+lazy_tid_reaped(ItemPointer itemptr, void *state)
 {
+	LVRelStats *vacrelstats = (LVRelStats *) state;
 	ItemPointer	res;
 
 	res = (ItemPointer) bsearch((void *) itemptr,
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c
index 72aceb35f0f..9465604b584 100644
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/execUtils.c,v 1.75 2001/03/22 06:16:12 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/execUtils.c,v 1.76 2001/07/15 22:48:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -504,25 +504,26 @@ ExecOpenIndices(ResultRelInfo *resultRelInfo)
 		/*
 		 * Open (and lock, if necessary) the index relation
 		 *
-		 * Hack for not btree and hash indices: they use relation level
-		 * exclusive locking on update (i.e. - they are not ready for
-		 * MVCC) and so we have to exclusively lock indices here to
-		 * prevent deadlocks if we will scan them - index_beginscan places
-		 * AccessShareLock, indices update methods don't use locks at all.
-		 * We release this lock in ExecCloseIndices. Note, that hashes use
-		 * page level locking - i.e. are not deadlock-free - let's them be
-		 * on their way -:)) vadim 03-12-1998
+		 * If the index AM is not safe for concurrent updates, obtain
+		 * an exclusive lock on the index to lock out other updaters as
+		 * well as readers (index_beginscan places AccessShareLock).
+		 * We will release this lock in ExecCloseIndices.
 		 *
-		 * If there are multiple not-btree-or-hash indices, all backends must
-		 * lock the indices in the same order or we will get deadlocks
-		 * here during concurrent updates.	This is now guaranteed by
+		 * If the index AM supports concurrent updates, we obtain no lock
+		 * here at all, which is a tad weird, but safe since any critical
+		 * operation on the index (like deleting it) will acquire exclusive
+		 * lock on the parent table.  Perhaps someday we should acquire
+		 * RowExclusiveLock on the index here?
+		 *
+		 * If there are multiple not-concurrent-safe indexes, all backends
+		 * must lock the indexes in the same order or we will get deadlocks
+		 * here during concurrent updates.	This is guaranteed by
 		 * RelationGetIndexList(), which promises to return the index list
-		 * in OID order.  tgl 06-19-2000
+		 * in OID order.
 		 */
 		indexDesc = index_open(indexOid);
 
-		if (indexDesc->rd_rel->relam != BTREE_AM_OID &&
-			indexDesc->rd_rel->relam != HASH_AM_OID)
+		if (! indexDesc->rd_am->amconcurrent)
 			LockRelation(indexDesc, AccessExclusiveLock);
 
 		/*
@@ -560,24 +561,21 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo)
 {
 	int			i;
 	int			numIndices;
-	RelationPtr relationDescs;
+	RelationPtr indexDescs;
 
 	numIndices = resultRelInfo->ri_NumIndices;
-	relationDescs = resultRelInfo->ri_IndexRelationDescs;
+	indexDescs = resultRelInfo->ri_IndexRelationDescs;
 
 	for (i = 0; i < numIndices; i++)
 	{
-		if (relationDescs[i] == NULL)
+		if (indexDescs[i] == NULL)
 			continue;
 
-		/*
-		 * See notes in ExecOpenIndices.
-		 */
-		if (relationDescs[i]->rd_rel->relam != BTREE_AM_OID &&
-			relationDescs[i]->rd_rel->relam != HASH_AM_OID)
-			UnlockRelation(relationDescs[i], AccessExclusiveLock);
+		/* Drop lock, if one was acquired by ExecOpenIndices */
+		if (! indexDescs[i]->rd_am->amconcurrent)
+			UnlockRelation(indexDescs[i], AccessExclusiveLock);
 
-		index_close(relationDescs[i]);
+		index_close(indexDescs[i]);
 	}
 
 	/*
diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c
index bd118b876b0..ec26ed05cc0 100644
--- a/src/backend/executor/nodeIndexscan.c
+++ b/src/backend/executor/nodeIndexscan.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeIndexscan.c,v 1.61 2001/06/22 19:16:22 wieck Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeIndexscan.c,v 1.62 2001/07/15 22:48:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -993,7 +993,7 @@ ExecInitIndexScan(IndexScan *node, EState *estate, Plan *parent)
 	ExecOpenScanR(reloid,		/* relation */
 				  0,			/* nkeys */
 				  (ScanKey) NULL,		/* scan key */
-				  0,			/* is index */
+				  false,		/* is index */
 				  direction,	/* scan direction */
 				  estate->es_snapshot,	/* */
 				  &currentRelation,		/* return: rel desc */
@@ -1023,7 +1023,7 @@ ExecInitIndexScan(IndexScan *node, EState *estate, Plan *parent)
 			ExecOpenScanR(indexOid,		/* relation */
 						  numScanKeys[i],		/* nkeys */
 						  scanKeys[i],	/* scan key */
-						  true, /* is index */
+						  true,			/* is index */
 						  direction,	/* scan direction */
 						  estate->es_snapshot,
 						  &(relationDescs[i]),	/* return: rel desc */
diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c
index 5a16b77085f..48beffd7920 100644
--- a/src/backend/executor/nodeSeqscan.c
+++ b/src/backend/executor/nodeSeqscan.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeSeqscan.c,v 1.30 2001/05/27 20:42:19 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeSeqscan.c,v 1.31 2001/07/15 22:48:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -166,7 +166,7 @@ InitScanRelation(SeqScan *node, EState *estate,
 	ExecOpenScanR(reloid,		/* relation */
 				  0,			/* nkeys */
 				  NULL,			/* scan key */
-				  0,			/* is index */
+				  false,		/* is index */
 				  direction,	/* scan direction */
 				  estate->es_snapshot,
 				  &currentRelation,		/* return: rel desc */
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 749390a4d2d..3f537fb0d9b 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.66 2001/05/20 20:28:19 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.67 2001/07/15 22:48:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -126,7 +126,7 @@ find_secondary_indexes(Oid relationObjectId)
 		/* Extract info from the pg_index tuple */
 		info->indexoid = index->indexrelid;
 		info->indproc = index->indproc; /* functional index ?? */
-		if (VARSIZE(&index->indpred) != 0)		/* partial index ?? */
+		if (VARSIZE(&index->indpred) > VARHDRSZ) /* partial index ?? */
 		{
 			char	   *predString;
 
diff --git a/src/backend/utils/adt/datum.c b/src/backend/utils/adt/datum.c
index d0766d15d70..4e278c0489c 100644
--- a/src/backend/utils/adt/datum.c
+++ b/src/backend/utils/adt/datum.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/datum.c,v 1.20 2001/03/22 03:59:50 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/datum.c,v 1.21 2001/07/15 22:48:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -118,7 +118,7 @@ datumCopy(Datum value, bool typByVal, int typLen)
  *
  * Free the space occupied by a datum CREATED BY "datumCopy"
  *
- * NOTE: DO NOT USE THIS ROUTINE with datums returned by amgetattr() etc.
+ * NOTE: DO NOT USE THIS ROUTINE with datums returned by heap_getattr() etc.
  * ONLY datums created by "datumCopy" can be freed!
  *-------------------------------------------------------------------------
  */
diff --git a/src/bin/initdb/initdb.sh b/src/bin/initdb/initdb.sh
index 42dea03a242..38de46269a9 100644
--- a/src/bin/initdb/initdb.sh
+++ b/src/bin/initdb/initdb.sh
@@ -27,7 +27,7 @@
 # Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
 # Portions Copyright (c) 1994, Regents of the University of California
 #
-# $Header: /cvsroot/pgsql/src/bin/initdb/Attic/initdb.sh,v 1.129 2001/06/23 23:29:48 petere Exp $
+# $Header: /cvsroot/pgsql/src/bin/initdb/Attic/initdb.sh,v 1.130 2001/07/15 22:48:18 tgl Exp $
 #
 #-------------------------------------------------------------------------
 
@@ -813,7 +813,7 @@ echo "UPDATE pg_database SET \
 		| "$PGPATH"/postgres $PGSQL_OPT template1 > /dev/null || exit_nicely
 
 echo "Vacuuming database."
-echo "VACUUM ANALYZE" \
+echo "VACUUM FULL ANALYZE" \
 	| "$PGPATH"/postgres $PGSQL_OPT template1 > /dev/null || exit_nicely
 
 echo "Copying template1 to template0."
@@ -824,7 +824,7 @@ echo "UPDATE pg_database SET \
 	datallowconn = 'f' \
         WHERE datname = 'template0'" \
 		| "$PGPATH"/postgres $PGSQL_OPT template1 > /dev/null || exit_nicely
-echo "VACUUM pg_database" \
+echo "VACUUM FULL pg_database" \
 	| "$PGPATH"/postgres $PGSQL_OPT template1 > /dev/null || exit_nicely
 
 
diff --git a/src/include/access/genam.h b/src/include/access/genam.h
index 0102d8c7e43..db6795c0933 100644
--- a/src/include/access/genam.h
+++ b/src/include/access/genam.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: genam.h,v 1.25 2001/01/24 19:43:19 momjian Exp $
+ * $Id: genam.h,v 1.26 2001/07/15 22:48:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -18,8 +18,21 @@
 #include "access/relscan.h"
 #include "access/sdir.h"
 
+
+/* Struct for statistics returned by bulk-delete operation */
+typedef struct IndexBulkDeleteResult
+{
+	BlockNumber	num_pages;		/* pages remaining in index */
+	double		tuples_removed;	/* # removed by bulk-delete operation */
+	double		num_index_tuples; /* # remaining */
+} IndexBulkDeleteResult;
+
+/* Typedef for callback function to determine if a tuple is bulk-deletable */
+typedef bool (*IndexBulkDeleteCallback) (ItemPointer itemptr, void *state);
+
+
 /* ----------------
- *		generalized index_ interface routines
+ *		generalized index_ interface routines (in indexam.c)
  * ----------------
  */
 extern Relation index_open(Oid relationId);
@@ -29,7 +42,6 @@ extern InsertIndexResult index_insert(Relation relation,
 			 Datum *datum, char *nulls,
 			 ItemPointer heap_t_ctid,
 			 Relation heapRel);
-extern void index_delete(Relation relation, ItemPointer indexItem);
 extern IndexScanDesc index_beginscan(Relation relation, bool scanFromEnd,
 				uint16 numberOfKeys, ScanKey key);
 extern void index_rescan(IndexScanDesc scan, bool scanFromEnd, ScanKey key);
@@ -38,6 +50,9 @@ extern void index_markpos(IndexScanDesc scan);
 extern void index_restrpos(IndexScanDesc scan);
 extern RetrieveIndexResult index_getnext(IndexScanDesc scan,
 			  ScanDirection direction);
+extern IndexBulkDeleteResult *index_bulk_delete(Relation relation,
+				IndexBulkDeleteCallback callback,
+				void *callback_state);
 extern RegProcedure index_cost_estimator(Relation relation);
 extern RegProcedure index_getprocid(Relation irel, AttrNumber attnum,
 				uint16 procnum);
diff --git a/src/include/access/gist.h b/src/include/access/gist.h
index 9e8091a8a09..b555a195db9 100644
--- a/src/include/access/gist.h
+++ b/src/include/access/gist.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: gist.h,v 1.28 2001/05/31 18:16:55 tgl Exp $
+ * $Id: gist.h,v 1.29 2001/07/15 22:48:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -162,7 +162,7 @@ typedef struct GISTENTRY
 /* gist.c */
 extern Datum gistbuild(PG_FUNCTION_ARGS);
 extern Datum gistinsert(PG_FUNCTION_ARGS);
-extern Datum gistdelete(PG_FUNCTION_ARGS);
+extern Datum gistbulkdelete(PG_FUNCTION_ARGS);
 extern void _gistdump(Relation r);
 extern void gistfreestack(GISTSTACK *s);
 extern void initGISTstate(GISTSTATE *giststate, Relation index);
diff --git a/src/include/access/gistscan.h b/src/include/access/gistscan.h
index d4f9403c10d..f7955bce9ef 100644
--- a/src/include/access/gistscan.h
+++ b/src/include/access/gistscan.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: gistscan.h,v 1.15 2001/05/30 19:53:39 tgl Exp $
+ * $Id: gistscan.h,v 1.16 2001/07/15 22:48:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -22,5 +22,6 @@ extern Datum gistmarkpos(PG_FUNCTION_ARGS);
 extern Datum gistrestrpos(PG_FUNCTION_ARGS);
 extern Datum gistendscan(PG_FUNCTION_ARGS);
 extern void gistadjscans(Relation r, int op, BlockNumber blkno, OffsetNumber offnum);
+extern void AtEOXact_gist(void);
 
 #endif	 /* GISTSCAN_H */
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
index 871629a1220..e973b81a7cc 100644
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: hash.h,v 1.38 2001/03/22 04:00:27 momjian Exp $
+ * $Id: hash.h,v 1.39 2001/07/15 22:48:18 tgl Exp $
  *
  * NOTES
  *		modeled after Margo Seltzer's hash implementation for unix.
@@ -55,7 +55,7 @@ typedef uint32 PageOffset;
 #define OADDR_OF(S,O)	((OverflowPageAddress)((uint32)((uint32)(S) << SPLITSHIFT) + (O)))
 
 #define BUCKET_TO_BLKNO(B) \
-		((Bucket) ((B) + ((B) ? metap->SPARES[_hash_log2((B)+1)-1] : 0)) + 1)
+		((Bucket) ((B) + ((B) ? metap->hashm_spares[_hash_log2((B)+1)-1] : 0)) + 1)
 #define OADDR_TO_BLKNO(B)		 \
 		((BlockNumber) \
 		 (BUCKET_TO_BLKNO ( (1 << SPLITNUM((B))) -1 ) + OPAGENUM((B))));
@@ -165,16 +165,6 @@ typedef struct HashMetaPageData
 
 typedef HashMetaPageData *HashMetaPage;
 
-/* Short hands for accessing structure */
-#define OVFL_POINT		hashm_ovflpoint
-#define LAST_FREED		hashm_lastfreed
-#define MAX_BUCKET		hashm_maxbucket
-#define FFACTOR			hashm_ffactor
-#define HIGH_MASK		hashm_highmask
-#define LOW_MASK		hashm_lowmask
-#define NKEYS			hashm_nkeys
-#define SPARES			hashm_spares
-
 extern bool BuildingHash;
 
 typedef struct HashItemData
@@ -256,7 +246,7 @@ extern Datum hashrescan(PG_FUNCTION_ARGS);
 extern Datum hashendscan(PG_FUNCTION_ARGS);
 extern Datum hashmarkpos(PG_FUNCTION_ARGS);
 extern Datum hashrestrpos(PG_FUNCTION_ARGS);
-extern Datum hashdelete(PG_FUNCTION_ARGS);
+extern Datum hashbulkdelete(PG_FUNCTION_ARGS);
 
 /*
  * Datatype-specific hash functions in hashfunc.c.
@@ -310,6 +300,7 @@ extern void _hash_expandtable(Relation rel, Buffer metabuf);
 extern void _hash_regscan(IndexScanDesc scan);
 extern void _hash_dropscan(IndexScanDesc scan);
 extern void _hash_adjscans(Relation rel, ItemPointer tid);
+extern void AtEOXact_hash(void);
 
 
 /* hashsearch.c */
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 1ba7f963303..789dd027424 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: nbtree.h,v 1.55 2001/03/22 04:00:29 momjian Exp $
+ * $Id: nbtree.h,v 1.56 2001/07/15 22:48:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -319,6 +319,8 @@ typedef struct xl_btree_newroot
  */
 extern bool BuildingBtree;		/* in nbtree.c */
 
+extern void AtEOXact_nbtree(void);
+
 extern Datum btbuild(PG_FUNCTION_ARGS);
 extern Datum btinsert(PG_FUNCTION_ARGS);
 extern Datum btgettuple(PG_FUNCTION_ARGS);
@@ -328,7 +330,7 @@ extern void btmovescan(IndexScanDesc scan, Datum v);
 extern Datum btendscan(PG_FUNCTION_ARGS);
 extern Datum btmarkpos(PG_FUNCTION_ARGS);
 extern Datum btrestrpos(PG_FUNCTION_ARGS);
-extern Datum btdelete(PG_FUNCTION_ARGS);
+extern Datum btbulkdelete(PG_FUNCTION_ARGS);
 
 extern void btree_redo(XLogRecPtr lsn, XLogRecord *record);
 extern void btree_undo(XLogRecPtr lsn, XLogRecord *record);
@@ -346,20 +348,12 @@ extern InsertIndexResult _bt_doinsert(Relation rel, BTItem btitem,
 extern void _bt_metapinit(Relation rel);
 extern Buffer _bt_getroot(Relation rel, int access);
 extern Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access);
-extern void _bt_relbuf(Relation rel, Buffer buf, int access);
+extern void _bt_relbuf(Relation rel, Buffer buf);
 extern void _bt_wrtbuf(Relation rel, Buffer buf);
 extern void _bt_wrtnorelbuf(Relation rel, Buffer buf);
 extern void _bt_pageinit(Page page, Size size);
 extern void _bt_metaproot(Relation rel, BlockNumber rootbknum, int level);
-extern void _bt_pagedel(Relation rel, ItemPointer tid);
-
-/*
- * prototypes for functions in nbtscan.c
- */
-extern void _bt_regscan(IndexScanDesc scan);
-extern void _bt_dropscan(IndexScanDesc scan);
-extern void _bt_adjscans(Relation rel, ItemPointer tid);
-extern void AtEOXact_nbtree(void);
+extern void _bt_itemdel(Relation rel, Buffer buf, ItemPointer tid);
 
 /*
  * prototypes for functions in nbtsearch.c
diff --git a/src/include/access/rtree.h b/src/include/access/rtree.h
index 210e8739814..237937fe46a 100644
--- a/src/include/access/rtree.h
+++ b/src/include/access/rtree.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: rtree.h,v 1.23 2001/05/30 19:53:39 tgl Exp $
+ * $Id: rtree.h,v 1.24 2001/07/15 22:48:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -110,7 +110,7 @@ extern void freestack(RTSTACK *s);
  *		Defined in access/rtree/
  */
 extern Datum rtinsert(PG_FUNCTION_ARGS);
-extern Datum rtdelete(PG_FUNCTION_ARGS);
+extern Datum rtbulkdelete(PG_FUNCTION_ARGS);
 
 extern Datum rtgettuple(PG_FUNCTION_ARGS);
 extern Datum rtbeginscan(PG_FUNCTION_ARGS);
@@ -129,6 +129,7 @@ extern void rtree_desc(char *buf, uint8 xl_info, char *rec);
 /* rtscan.c */
 extern void rtadjscans(Relation r, int op, BlockNumber blkno,
 		   OffsetNumber offnum);
+extern void AtEOXact_rtree(void);
 
 /* rtstrat.c */
 extern RegProcedure RTMapOperator(Relation r, AttrNumber attnum,
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 162bf4fe5f6..a4a132bc410 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: catversion.h,v 1.85 2001/06/22 19:16:24 wieck Exp $
+ * $Id: catversion.h,v 1.86 2001/07/15 22:48:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	200106221
+#define CATALOG_VERSION_NO	200107151
 
 #endif
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index e139dde2cc3..f93de9c2e90 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: index.h,v 1.35 2001/05/30 20:52:34 momjian Exp $
+ * $Id: index.h,v 1.36 2001/07/15 22:48:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -17,6 +17,16 @@
 #include "access/itup.h"
 #include "nodes/execnodes.h"
 
+
+/* Typedef for callback function for IndexBuildHeapScan */
+typedef void (*IndexBuildCallback) (Relation index,
+									HeapTuple htup,
+									Datum *attdata,
+									char *nulls,
+									bool tupleIsAlive,
+									void *state);
+
+
 extern Form_pg_am AccessMethodObjectIdGetForm(Oid accessMethodObjectId,
 							MemoryContext resultCxt);
 
@@ -56,7 +66,13 @@ extern bool SetReindexProcessing(bool processing);
 extern bool IsReindexProcessing(void);
 
 extern void index_build(Relation heapRelation, Relation indexRelation,
-			IndexInfo *indexInfo, Node *oldPred);
+						IndexInfo *indexInfo);
+
+extern double IndexBuildHeapScan(Relation heapRelation,
+								 Relation indexRelation,
+								 IndexInfo *indexInfo,
+								 IndexBuildCallback callback,
+								 void *callback_state);
 
 extern bool reindex_index(Oid indexId, bool force, bool inplace);
 extern bool activate_indexes_of_a_table(Oid relid, bool activate);
diff --git a/src/include/catalog/pg_am.h b/src/include/catalog/pg_am.h
index f2de6fb6c06..3bf79404d8e 100644
--- a/src/include/catalog/pg_am.h
+++ b/src/include/catalog/pg_am.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_am.h,v 1.17 2001/05/30 19:55:08 tgl Exp $
+ * $Id: pg_am.h,v 1.18 2001/07/15 22:48:18 tgl Exp $
  *
  * NOTES
  *		the genbki.sh script reads this file and generates .bki
@@ -38,30 +38,26 @@ CATALOG(pg_am)
 {
 	NameData	amname;			/* access method name */
 	int4		amowner;		/* usesysid of creator */
-	int2		amstrategies;	/* total NUMBER of strategies by which we
-								 * can traverse/search this AM */
+	int2		amstrategies;	/* total NUMBER of strategies (operators) by
+								 * which we can traverse/search this AM */
 	int2		amsupport;		/* total NUMBER of support functions that
 								 * this AM uses */
 	int2		amorderstrategy;/* if this AM has a sort order, the
 								 * strategy number of the sort operator.
 								 * Zero if AM is not ordered. */
+	bool		amcanunique;	/* does AM support UNIQUE indexes? */
+	bool		amcanmulticol;	/* does AM support multi-column indexes? */
+	bool		amindexnulls;	/* does AM support NULL index entries? */
+	bool		amconcurrent;	/* does AM support concurrent updates? */
 	regproc		amgettuple;		/* "next valid tuple" function */
 	regproc		aminsert;		/* "insert this tuple" function */
-	regproc		amdelete;		/* "delete this tuple" function */
-	regproc		amgetattr;		/* - deprecated */
-	regproc		amsetlock;		/* - deprecated */
-	regproc		amsettid;		/* - deprecated */
-	regproc		amfreetuple;	/* - deprecated */
 	regproc		ambeginscan;	/* "start new scan" function */
 	regproc		amrescan;		/* "restart this scan" function */
 	regproc		amendscan;		/* "end this scan" function */
 	regproc		ammarkpos;		/* "mark current scan position" function */
 	regproc		amrestrpos;		/* "restore marked scan position" function */
-	regproc		amopen;			/* - deprecated */
-	regproc		amclose;		/* - deprecated */
 	regproc		ambuild;		/* "build new index" function */
-	regproc		amcreate;		/* - deprecated */
-	regproc		amdestroy;		/* - deprecated */
+	regproc		ambulkdelete;	/* bulk-delete function */
 	regproc		amcostestimate; /* estimate cost of an indexscan */
 } FormData_pg_am;
 
@@ -76,46 +72,40 @@ typedef FormData_pg_am *Form_pg_am;
  *		compiler constants for pg_am
  * ----------------
  */
-#define Natts_pg_am						23
+#define Natts_pg_am						19
 #define Anum_pg_am_amname				1
 #define Anum_pg_am_amowner				2
 #define Anum_pg_am_amstrategies			3
 #define Anum_pg_am_amsupport			4
 #define Anum_pg_am_amorderstrategy		5
-#define Anum_pg_am_amgettuple			6
-#define Anum_pg_am_aminsert				7
-#define Anum_pg_am_amdelete				8
-#define Anum_pg_am_amgetattr			9
-#define Anum_pg_am_amsetlock			10
-#define Anum_pg_am_amsettid				11
-#define Anum_pg_am_amfreetuple			12
-#define Anum_pg_am_ambeginscan			13
-#define Anum_pg_am_amrescan				14
-#define Anum_pg_am_amendscan			15
-#define Anum_pg_am_ammarkpos			16
-#define Anum_pg_am_amrestrpos			17
-#define Anum_pg_am_amopen				18
-#define Anum_pg_am_amclose				19
-#define Anum_pg_am_ambuild				20
-#define Anum_pg_am_amcreate				21
-#define Anum_pg_am_amdestroy			22
-#define Anum_pg_am_amcostestimate		23
+#define Anum_pg_am_amcanunique			6
+#define Anum_pg_am_amcanmulticol		7
+#define Anum_pg_am_amindexnulls			8
+#define Anum_pg_am_amconcurrent			9
+#define Anum_pg_am_amgettuple			10
+#define Anum_pg_am_aminsert				11
+#define Anum_pg_am_ambeginscan			12
+#define Anum_pg_am_amrescan				13
+#define Anum_pg_am_amendscan			14
+#define Anum_pg_am_ammarkpos			15
+#define Anum_pg_am_amrestrpos			16
+#define Anum_pg_am_ambuild				17
+#define Anum_pg_am_ambulkdelete			18
+#define Anum_pg_am_amcostestimate		19
 
 /* ----------------
  *		initial contents of pg_am
  * ----------------
  */
 
-DATA(insert OID = 402 (  rtree PGUID 8 3 0 rtgettuple rtinsert rtdelete - - - - rtbeginscan rtrescan rtendscan rtmarkpos rtrestrpos - - rtbuild - - rtcostestimate ));
-DESCR("");
-DATA(insert OID = 403 (  btree PGUID 5 1 1 btgettuple btinsert btdelete - - - - btbeginscan btrescan btendscan btmarkpos btrestrpos - - btbuild - - btcostestimate ));
-DESCR("");
+DATA(insert OID = 402 (  rtree	PGUID   8 3 0 f f f f rtgettuple rtinsert rtbeginscan rtrescan rtendscan rtmarkpos rtrestrpos rtbuild rtbulkdelete rtcostestimate ));
+DESCR("r-tree index access method");
+DATA(insert OID = 403 (  btree	PGUID   5 1 1 t t t t btgettuple btinsert btbeginscan btrescan btendscan btmarkpos btrestrpos btbuild btbulkdelete btcostestimate ));
+DESCR("b-tree index access method");
 #define BTREE_AM_OID 403
-DATA(insert OID = 405 (  hash PGUID 1 1 0 hashgettuple hashinsert hashdelete - - - - hashbeginscan hashrescan hashendscan hashmarkpos hashrestrpos - - hashbuild - - hashcostestimate ));
-DESCR("");
-#define HASH_AM_OID 405
-DATA(insert OID = 783 (  gist PGUID 100 7 0 gistgettuple gistinsert gistdelete - - - - gistbeginscan gistrescan gistendscan gistmarkpos gistrestrpos - - gistbuild - - gistcostestimate ));
-DESCR("");
-#define GIST_AM_OID 783
+DATA(insert OID = 405 (  hash	PGUID   1 1 0 f f f t hashgettuple hashinsert hashbeginscan hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbulkdelete hashcostestimate ));
+DESCR("hash index access method");
+DATA(insert OID = 783 (  gist	PGUID 100 7 0 f t f f gistgettuple gistinsert gistbeginscan gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistcostestimate ));
+DESCR("GiST index access method");
 
 #endif	 /* PG_AM_H */
diff --git a/src/include/catalog/pg_index.h b/src/include/catalog/pg_index.h
index a490c8b5828..521a3ec3b62 100644
--- a/src/include/catalog/pg_index.h
+++ b/src/include/catalog/pg_index.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_index.h,v 1.21 2001/07/09 18:35:52 momjian Exp $
+ * $Id: pg_index.h,v 1.22 2001/07/15 22:48:18 tgl Exp $
  *
  * NOTES
  *	  the genbki.sh script reads this file and generates .bki
@@ -58,7 +58,9 @@ CATALOG(pg_index)
 	bool		indisprimary;	/* is this index for primary key */
 	Oid			indreference;	/* oid of index of referenced relation (ie
 								 * - this index for foreign key */
-	text		indpred;		/* query plan for partial index predicate */
+	/* VARIABLE LENGTH FIELD: */
+	text		indpred;		/* expression tree for predicate,
+								 * if a partial index */
 } FormData_pg_index;
 
 /* ----------------
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 506e2b28009..f249fcf2d9d 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_proc.h,v 1.196 2001/07/11 22:14:02 momjian Exp $
+ * $Id: pg_proc.h,v 1.197 2001/07/15 22:48:18 tgl Exp $
  *
  * NOTES
  *	  The script catalog/genbki.sh reads this file and generates .bki
@@ -210,11 +210,6 @@ DESCR("not equal");
 DATA(insert OID =  89 (  version		   PGUID 12 f t f t 0 f 25 "" 100 0 0 100 pgsql_version - ));
 DESCR("PostgreSQL version string");
 
-DATA(insert OID = 1265 (  rtcostestimate   PGUID 12 f t f t 8 f 0 "0 0 0 0 0 0 0 0" 100 0 0 100  rtcostestimate - ));
-DESCR("r-tree cost estimator");
-DATA(insert OID = 1268 (  btcostestimate   PGUID 12 f t f t 8 f 0 "0 0 0 0 0 0 0 0" 100 0 0 100  btcostestimate - ));
-DESCR("btree cost estimator");
-
 /* OIDS 100 - 199 */
 
 DATA(insert OID = 100 (  int8fac		   PGUID 12 f t t t 1 f 20 "20" 100 0 0 100  int8fac - ));
@@ -671,11 +666,9 @@ DESCR("convert float4 to int4");
 
 DATA(insert OID = 320 (  rtinsert		   PGUID 12 f t f t 5 f 23 "0 0 0 0 0" 100 0 0 100	rtinsert - ));
 DESCR("r-tree(internal)");
-DATA(insert OID = 321 (  rtdelete		   PGUID 12 f t f t 2 f 23 "0 0" 100 0 0 100  rtdelete - ));
-DESCR("r-tree(internal)");
 DATA(insert OID = 322 (  rtgettuple		   PGUID 12 f t f t 2 f 23 "0 0" 100 0 0 100  rtgettuple - ));
 DESCR("r-tree(internal)");
-DATA(insert OID = 323 (  rtbuild		   PGUID 12 f t f t 5 f 23 "0 0 0 0 0" 100 0 0 100	rtbuild - ));
+DATA(insert OID = 323 (  rtbuild		   PGUID 12 f t f t 3 f 23 "0 0 0" 100 0 0 100	rtbuild - ));
 DESCR("r-tree(internal)");
 DATA(insert OID = 324 (  rtbeginscan	   PGUID 12 f t f t 4 f 23 "0 0 0 0" 100 0 0 100  rtbeginscan - ));
 DESCR("r-tree(internal)");
@@ -687,13 +680,15 @@ DATA(insert OID = 327 (  rtrestrpos		   PGUID 12 f t f t 1 f 23 "0" 100 0 0 100
 DESCR("r-tree(internal)");
 DATA(insert OID = 328 (  rtrescan		   PGUID 12 f t f t 3 f 23 "0 0 0" 100 0 0 100	rtrescan - ));
 DESCR("r-tree(internal)");
+DATA(insert OID = 321 (  rtbulkdelete	   PGUID 12 f t f t 3 f 23 "0 0 0" 100 0 0 100  rtbulkdelete - ));
+DESCR("r-tree(internal)");
+DATA(insert OID = 1265 (  rtcostestimate   PGUID 12 f t f t 8 f 0 "0 0 0 0 0 0 0 0" 100 0 0 100  rtcostestimate - ));
+DESCR("r-tree(internal)");
 
 DATA(insert OID = 330 (  btgettuple		   PGUID 12 f t f t 2 f 23 "0 0" 100 0 0 100  btgettuple - ));
 DESCR("btree(internal)");
 DATA(insert OID = 331 (  btinsert		   PGUID 12 f t f t 5 f 23 "0 0 0 0 0" 100 0 0 100	btinsert - ));
 DESCR("btree(internal)");
-DATA(insert OID = 332 (  btdelete		   PGUID 12 f t f t 2 f 23 "0 0" 100 0 0 100  btdelete - ));
-DESCR("btree(internal)");
 DATA(insert OID = 333 (  btbeginscan	   PGUID 12 f t f t 4 f 23 "0 0 0 0" 100 0 0 100  btbeginscan - ));
 DESCR("btree(internal)");
 DATA(insert OID = 334 (  btrescan		   PGUID 12 f t f t 3 f 23 "0 0 0" 100 0 0 100	btrescan - ));
@@ -704,7 +699,11 @@ DATA(insert OID = 336 (  btmarkpos		   PGUID 12 f t f t 1 f 23 "0" 100 0 0 100	b
 DESCR("btree(internal)");
 DATA(insert OID = 337 (  btrestrpos		   PGUID 12 f t f t 1 f 23 "0" 100 0 0 100	btrestrpos - ));
 DESCR("btree(internal)");
-DATA(insert OID = 338 (  btbuild		   PGUID 12 f t f t 5 f 23 "0 0 0 0 0" 100 0 0 100	btbuild - ));
+DATA(insert OID = 338 (  btbuild		   PGUID 12 f t f t 3 f 23 "0 0 0" 100 0 0 100	btbuild - ));
+DESCR("btree(internal)");
+DATA(insert OID = 332 (  btbulkdelete	   PGUID 12 f t f t 3 f 23 "0 0 0" 100 0 0 100  btbulkdelete - ));
+DESCR("btree(internal)");
+DATA(insert OID = 1268 (  btcostestimate   PGUID 12 f t f t 8 f 0 "0 0 0 0 0 0 0 0" 100 0 0 100  btcostestimate - ));
 DESCR("btree(internal)");
 
 DATA(insert OID = 339 (  poly_same		   PGUID 12 f t t t 2 f 16 "604 604" 100 0 0 100  poly_same - ));
@@ -789,15 +788,10 @@ DESCR("convert name to char()");
 DATA(insert OID =  409 (  name			   PGUID 12 f t t t 1 f 19 "1042" 100 0 0 100	bpchar_name - ));
 DESCR("convert char() to name");
 
-DATA(insert OID =  438 (  hashcostestimate PGUID 12 f t f t 8 f 0 "0 0 0 0 0 0 0 0" 100 0 0 100  hashcostestimate - ));
-DESCR("hash index cost estimator");
-
 DATA(insert OID = 440 (  hashgettuple	   PGUID 12 f t f t 2 f 23 "0 0" 100 0 0 100  hashgettuple - ));
 DESCR("hash(internal)");
 DATA(insert OID = 441 (  hashinsert		   PGUID 12 f t f t 5 f 23 "0 0 0 0 0" 100 0 0 100	hashinsert - ));
 DESCR("hash(internal)");
-DATA(insert OID = 442 (  hashdelete		   PGUID 12 f t f t 2 f 23 "0 0" 100 0 0 100  hashdelete - ));
-DESCR("hash(internal)");
 DATA(insert OID = 443 (  hashbeginscan	   PGUID 12 f t f t 4 f 23 "0 0 0 0" 100 0 0 100  hashbeginscan - ));
 DESCR("hash(internal)");
 DATA(insert OID = 444 (  hashrescan		   PGUID 12 f t f t 3 f 23 "0 0 0" 100 0 0 100	hashrescan - ));
@@ -808,8 +802,13 @@ DATA(insert OID = 446 (  hashmarkpos	   PGUID 12 f t f t 1 f 23 "0" 100 0 0 100
 DESCR("hash(internal)");
 DATA(insert OID = 447 (  hashrestrpos	   PGUID 12 f t f t 1 f 23 "0" 100 0 0 100	hashrestrpos - ));
 DESCR("hash(internal)");
-DATA(insert OID = 448 (  hashbuild		   PGUID 12 f t f t 5 f 23 "0 0 0 0 0" 100 0 0 100	hashbuild - ));
+DATA(insert OID = 448 (  hashbuild		   PGUID 12 f t f t 3 f 23 "0 0 0" 100 0 0 100	hashbuild - ));
+DESCR("hash(internal)");
+DATA(insert OID = 442 (  hashbulkdelete	   PGUID 12 f t f t 3 f 23 "0 0 0" 100 0 0 100  hashbulkdelete - ));
 DESCR("hash(internal)");
+DATA(insert OID = 438 (  hashcostestimate  PGUID 12 f t f t 8 f 0 "0 0 0 0 0 0 0 0" 100 0 0 100  hashcostestimate - ));
+DESCR("hash(internal)");
+
 DATA(insert OID = 449 (  hashint2		   PGUID 12 f t t t 1 f 23 "21" 100 0 0 100  hashint2 - ));
 DESCR("hash");
 DATA(insert OID = 450 (  hashint4		   PGUID 12 f t t t 1 f 23 "23" 100 0 0 100  hashint4 - ));
@@ -1014,14 +1013,10 @@ DESCR("larger of two");
 DATA(insert OID = 771 (  int2smaller	   PGUID 12 f t t t 2 f 21 "21 21" 100 0 0 100	int2smaller - ));
 DESCR("smaller of two");
 
-DATA(insert OID = 772 (  gistcostestimate  PGUID 12 f t f t 8 f 0 "0 0 0 0 0 0 0 0" 100 0 0 100  gistcostestimate - ));
-DESCR("gist cost estimator");
 DATA(insert OID = 774 (  gistgettuple	   PGUID 12 f t f t 2 f 23 "0 0" 100 0 0 100  gistgettuple - ));
 DESCR("gist(internal)");
 DATA(insert OID = 775 (  gistinsert		   PGUID 12 f t f t 5 f 23 "0 0 0 0 0" 100 0 0 100	gistinsert - ));
 DESCR("gist(internal)");
-DATA(insert OID = 776 (  gistdelete		   PGUID 12 f t f t 2 f 23 "0 0" 100 0 0 100  gistdelete - ));
-DESCR("gist(internal)");
 DATA(insert OID = 777 (  gistbeginscan	   PGUID 12 f t f t 4 f 23 "0 0 0 0" 100 0 0 100  gistbeginscan - ));
 DESCR("gist(internal)");
 DATA(insert OID = 778 (  gistrescan		   PGUID 12 f t f t 3 f 23 "0 0 0" 100 0 0 100	gistrescan - ));
@@ -1032,7 +1027,11 @@ DATA(insert OID = 780 (  gistmarkpos	   PGUID 12 f t f t 1 f 23 "0" 100 0 0 100
 DESCR("gist(internal)");
 DATA(insert OID = 781 (  gistrestrpos	   PGUID 12 f t f t 1 f 23 "0" 100 0 0 100	gistrestrpos - ));
 DESCR("gist(internal)");
-DATA(insert OID = 782 (  gistbuild		   PGUID 12 f t f t 5 f 23 "0 0 0 0 0" 100 0 0 100	gistbuild - ));
+DATA(insert OID = 782 (  gistbuild		   PGUID 12 f t f t 3 f 23 "0 0 0" 100 0 0 100	gistbuild - ));
+DESCR("gist(internal)");
+DATA(insert OID = 776 (  gistbulkdelete	   PGUID 12 f t f t 3 f 23 "0 0 0" 100 0 0 100  gistbulkdelete - ));
+DESCR("gist(internal)");
+DATA(insert OID = 772 (  gistcostestimate  PGUID 12 f t f t 8 f 0 "0 0 0 0 0 0 0 0" 100 0 0 100  gistcostestimate - ));
 DESCR("gist(internal)");
 
 DATA(insert OID = 784 (  tintervaleq	   PGUID 12 f t f t 2 f 16 "704 704" 100 0 0 100	tintervaleq - ));
diff --git a/src/test/regress/expected/oidjoins.out b/src/test/regress/expected/oidjoins.out
index 46bc60f6955..95c24879857 100644
--- a/src/test/regress/expected/oidjoins.out
+++ b/src/test/regress/expected/oidjoins.out
@@ -57,14 +57,6 @@ WHERE	pg_am.aminsert != 0 AND
 -----+----------
 (0 rows)
 
-SELECT	oid, pg_am.amdelete 
-FROM	pg_am 
-WHERE	pg_am.amdelete != 0 AND 
-	NOT EXISTS(SELECT * FROM pg_proc AS t1 WHERE t1.oid = pg_am.amdelete);
- oid | amdelete 
------+----------
-(0 rows)
-
 SELECT	oid, pg_am.ambeginscan 
 FROM	pg_am 
 WHERE	pg_am.ambeginscan != 0 AND 
@@ -113,6 +105,14 @@ WHERE	pg_am.ambuild != 0 AND
 -----+---------
 (0 rows)
 
+SELECT	oid, pg_am.ambulkdelete 
+FROM	pg_am 
+WHERE	pg_am.ambulkdelete != 0 AND 
+	NOT EXISTS(SELECT * FROM pg_proc AS t1 WHERE t1.oid = pg_am.ambulkdelete);
+ oid | ambulkdelete 
+-----+--------------
+(0 rows)
+
 SELECT	oid, pg_am.amcostestimate 
 FROM	pg_am 
 WHERE	pg_am.amcostestimate != 0 AND 
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 41328c53b1d..533655c52ea 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -480,8 +480,8 @@ WHERE p1.aggtransfn = p2.oid AND
           (p2.pronargs = 1 AND p1.aggbasetype = 0)));
   oid  | aggname | oid |   proname   
 -------+---------+-----+-------------
- 16963 | max     | 768 | int4larger
- 16977 | min     | 769 | int4smaller
+ 16959 | max     | 768 | int4larger
+ 16973 | min     | 769 | int4smaller
 (2 rows)
 
 -- Cross-check finalfn (if present) against its entry in pg_proc.
diff --git a/src/test/regress/sql/oidjoins.sql b/src/test/regress/sql/oidjoins.sql
index 88727a6c76e..34352128f44 100644
--- a/src/test/regress/sql/oidjoins.sql
+++ b/src/test/regress/sql/oidjoins.sql
@@ -29,10 +29,6 @@ SELECT	oid, pg_am.aminsert
 FROM	pg_am 
 WHERE	pg_am.aminsert != 0 AND 
 	NOT EXISTS(SELECT * FROM pg_proc AS t1 WHERE t1.oid = pg_am.aminsert);
-SELECT	oid, pg_am.amdelete 
-FROM	pg_am 
-WHERE	pg_am.amdelete != 0 AND 
-	NOT EXISTS(SELECT * FROM pg_proc AS t1 WHERE t1.oid = pg_am.amdelete);
 SELECT	oid, pg_am.ambeginscan 
 FROM	pg_am 
 WHERE	pg_am.ambeginscan != 0 AND 
@@ -57,6 +53,10 @@ SELECT	oid, pg_am.ambuild
 FROM	pg_am 
 WHERE	pg_am.ambuild != 0 AND 
 	NOT EXISTS(SELECT * FROM pg_proc AS t1 WHERE t1.oid = pg_am.ambuild);
+SELECT	oid, pg_am.ambulkdelete 
+FROM	pg_am 
+WHERE	pg_am.ambulkdelete != 0 AND 
+	NOT EXISTS(SELECT * FROM pg_proc AS t1 WHERE t1.oid = pg_am.ambulkdelete);
 SELECT	oid, pg_am.amcostestimate 
 FROM	pg_am 
 WHERE	pg_am.amcostestimate != 0 AND