1 files changed, 94 insertions, 100 deletions
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index 52d60abaec0..927860030c8 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.87 2005/08/12 14:34:14 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.88 2005/10/15 02:49:09 momjian Exp $
  *
  *	NOTES
  *	   Postgres btree pages look like ordinary relation pages.	The opaque
@@ -115,8 +115,8 @@ _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level)
 	metaopaque->btpo_flags = BTP_META;
 
 	/*
-	 * Set pd_lower just past the end of the metadata.  This is not
-	 * essential but it makes the page look compressible to xlog.c.
+	 * Set pd_lower just past the end of the metadata.	This is not essential
+	 * but it makes the page look compressible to xlog.c.
 	 */
 	((PageHeader) page)->pd_lower =
 		((char *) metad + sizeof(BTMetaPageData)) - (char *) page;
@@ -198,26 +198,26 @@ _bt_getroot(Relation rel, int access)
 		LockBuffer(metabuf, BT_WRITE);
 
 		/*
-		 * Race condition:	if someone else initialized the metadata
-		 * between the time we released the read lock and acquired the
-		 * write lock, we must avoid doing it again.
+		 * Race condition:	if someone else initialized the metadata between
+		 * the time we released the read lock and acquired the write lock, we
+		 * must avoid doing it again.
 		 */
 		if (metad->btm_root != P_NONE)
 		{
 			/*
-			 * Metadata initialized by someone else.  In order to
-			 * guarantee no deadlocks, we have to release the metadata
-			 * page and start all over again.  (Is that really true? But
-			 * it's hardly worth trying to optimize this case.)
+			 * Metadata initialized by someone else.  In order to guarantee no
+			 * deadlocks, we have to release the metadata page and start all
+			 * over again.	(Is that really true? But it's hardly worth trying
+			 * to optimize this case.)
 			 */
 			_bt_relbuf(rel, metabuf);
 			return _bt_getroot(rel, access);
 		}
 
 		/*
-		 * Get, initialize, write, and leave a lock of the appropriate
-		 * type on the new root page.  Since this is the first page in the
-		 * tree, it's a leaf as well as the root.
+		 * Get, initialize, write, and leave a lock of the appropriate type on
+		 * the new root page.  Since this is the first page in the tree, it's
+		 * a leaf as well as the root.
 		 */
 		rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
 		rootblkno = BufferGetBlockNumber(rootbuf);
@@ -266,9 +266,9 @@ _bt_getroot(Relation rel, int access)
 		_bt_wrtnorelbuf(rel, rootbuf);
 
 		/*
-		 * swap root write lock for read lock.	There is no danger of
-		 * anyone else accessing the new root page while it's unlocked,
-		 * since no one else knows where it is yet.
+		 * swap root write lock for read lock.	There is no danger of anyone
+		 * else accessing the new root page while it's unlocked, since no one
+		 * else knows where it is yet.
 		 */
 		LockBuffer(rootbuf, BUFFER_LOCK_UNLOCK);
 		LockBuffer(rootbuf, BT_READ);
@@ -312,8 +312,8 @@ _bt_getroot(Relation rel, int access)
 	}
 
 	/*
-	 * By here, we have a pin and read lock on the root page, and no lock
-	 * set on the metadata page.  Return the root page's buffer.
+	 * By here, we have a pin and read lock on the root page, and no lock set
+	 * on the metadata page.  Return the root page's buffer.
 	 */
 	return rootbuf;
 }
@@ -435,27 +435,26 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
 		/*
 		 * First see if the FSM knows of any free pages.
 		 *
-		 * We can't trust the FSM's report unreservedly; we have to check
-		 * that the page is still free.  (For example, an already-free
-		 * page could have been re-used between the time the last VACUUM
-		 * scanned it and the time the VACUUM made its FSM updates.)
+		 * We can't trust the FSM's report unreservedly; we have to check that
+		 * the page is still free.	(For example, an already-free page could
+		 * have been re-used between the time the last VACUUM scanned it and
+		 * the time the VACUUM made its FSM updates.)
 		 *
-		 * In fact, it's worse than that: we can't even assume that it's safe
-		 * to take a lock on the reported page.  If somebody else has a
-		 * lock on it, or even worse our own caller does, we could
-		 * deadlock.  (The own-caller scenario is actually not improbable.
-		 * Consider an index on a serial or timestamp column.  Nearly all
-		 * splits will be at the rightmost page, so it's entirely likely
-		 * that _bt_split will call us while holding a lock on the page
-		 * most recently acquired from FSM.  A VACUUM running concurrently
-		 * with the previous split could well have placed that page back
-		 * in FSM.)
+		 * In fact, it's worse than that: we can't even assume that it's safe to
+		 * take a lock on the reported page.  If somebody else has a lock on
+		 * it, or even worse our own caller does, we could deadlock.  (The
+		 * own-caller scenario is actually not improbable. Consider an index
+		 * on a serial or timestamp column.  Nearly all splits will be at the
+		 * rightmost page, so it's entirely likely that _bt_split will call us
+		 * while holding a lock on the page most recently acquired from FSM.
+		 * A VACUUM running concurrently with the previous split could well
+		 * have placed that page back in FSM.)
 		 *
-		 * To get around that, we ask for only a conditional lock on the
-		 * reported page.  If we fail, then someone else is using the
-		 * page, and we may reasonably assume it's not free.  (If we
-		 * happen to be wrong, the worst consequence is the page will be
-		 * lost to use till the next VACUUM, which is no big problem.)
+		 * To get around that, we ask for only a conditional lock on the reported
+		 * page.  If we fail, then someone else is using the page, and we may
+		 * reasonably assume it's not free.  (If we happen to be wrong, the
+		 * worst consequence is the page will be lost to use till the next
+		 * VACUUM, which is no big problem.)
 		 */
 		for (;;)
 		{
@@ -486,10 +485,10 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
 		/*
 		 * Extend the relation by one page.
 		 *
-		 * We have to use a lock to ensure no one else is extending the rel
-		 * at the same time, else we will both try to initialize the same
-		 * new page.  We can skip locking for new or temp relations,
-		 * however, since no one else could be accessing them.
+		 * We have to use a lock to ensure no one else is extending the rel at
+		 * the same time, else we will both try to initialize the same new
+		 * page.  We can skip locking for new or temp relations, however,
+		 * since no one else could be accessing them.
 		 */
 		needLock = !RELATION_IS_LOCAL(rel);
 
@@ -504,8 +503,8 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
 		/*
 		 * Release the file-extension lock; it's now OK for someone else to
 		 * extend the relation some more.  Note that we cannot release this
-		 * lock before we have buffer lock on the new page, or we risk a
-		 * race condition against btvacuumcleanup --- see comments therein.
+		 * lock before we have buffer lock on the new page, or we risk a race
+		 * condition against btvacuumcleanup --- see comments therein.
 		 */
 		if (needLock)
 			UnlockRelationForExtension(rel, ExclusiveLock);
@@ -614,10 +613,10 @@ _bt_page_recyclable(Page page)
 	BTPageOpaque opaque;
 
 	/*
-	 * It's possible to find an all-zeroes page in an index --- for
-	 * example, a backend might successfully extend the relation one page
-	 * and then crash before it is able to make a WAL entry for adding the
-	 * page. If we find a zeroed page then reclaim it.
+	 * It's possible to find an all-zeroes page in an index --- for example, a
+	 * backend might successfully extend the relation one page and then crash
+	 * before it is able to make a WAL entry for adding the page. If we find a
+	 * zeroed page then reclaim it.
 	 */
 	if (PageIsNew(page))
 		return true;
@@ -672,9 +671,9 @@ _bt_delitems(Relation rel, Buffer buf,
 		rdata[0].next = &(rdata[1]);
 
 		/*
-		 * The target-offsets array is not in the buffer, but pretend that
-		 * it is.  When XLogInsert stores the whole buffer, the offsets
-		 * array need not be stored too.
+		 * The target-offsets array is not in the buffer, but pretend that it
+		 * is.	When XLogInsert stores the whole buffer, the offsets array
+		 * need not be stored too.
 		 */
 		if (nitems > 0)
 		{
@@ -747,8 +746,8 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
 	BTPageOpaque opaque;
 
 	/*
-	 * We can never delete rightmost pages nor root pages.	While at it,
-	 * check that page is not already deleted and is empty.
+	 * We can never delete rightmost pages nor root pages.	While at it, check
+	 * that page is not already deleted and is empty.
 	 */
 	page = BufferGetPage(buf);
 	opaque = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -760,8 +759,8 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
 	}
 
 	/*
-	 * Save info about page, including a copy of its high key (it must
-	 * have one, being non-rightmost).
+	 * Save info about page, including a copy of its high key (it must have
+	 * one, being non-rightmost).
 	 */
 	target = BufferGetBlockNumber(buf);
 	targetlevel = opaque->btpo.level;
@@ -770,11 +769,11 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
 	targetkey = CopyBTItem((BTItem) PageGetItem(page, itemid));
 
 	/*
-	 * We need to get an approximate pointer to the page's parent page.
-	 * Use the standard search mechanism to search for the page's high
-	 * key; this will give us a link to either the current parent or
-	 * someplace to its left (if there are multiple equal high keys).  To
-	 * avoid deadlocks, we'd better drop the target page lock first.
+	 * We need to get an approximate pointer to the page's parent page. Use
+	 * the standard search mechanism to search for the page's high key; this
+	 * will give us a link to either the current parent or someplace to its
+	 * left (if there are multiple equal high keys).  To avoid deadlocks, we'd
+	 * better drop the target page lock first.
 	 */
 	_bt_relbuf(rel, buf);
 	/* we need a scan key to do our search, so build one */
@@ -786,9 +785,8 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
 	_bt_relbuf(rel, lbuf);
 
 	/*
-	 * If we are trying to delete an interior page, _bt_search did more
-	 * than we needed.	Locate the stack item pointing to our parent
-	 * level.
+	 * If we are trying to delete an interior page, _bt_search did more than
+	 * we needed.  Locate the stack item pointing to our parent level.
 	 */
 	ilevel = 0;
 	for (;;)
@@ -803,16 +801,15 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
 
 	/*
 	 * We have to lock the pages we need to modify in the standard order:
-	 * moving right, then up.  Else we will deadlock against other
-	 * writers.
+	 * moving right, then up.  Else we will deadlock against other writers.
 	 *
-	 * So, we need to find and write-lock the current left sibling of the
-	 * target page.  The sibling that was current a moment ago could have
-	 * split, so we may have to move right.  This search could fail if
-	 * either the sibling or the target page was deleted by someone else
-	 * meanwhile; if so, give up.  (Right now, that should never happen,
-	 * since page deletion is only done in VACUUM and there shouldn't be
-	 * multiple VACUUMs concurrently on the same table.)
+	 * So, we need to find and write-lock the current left sibling of the target
+	 * page.  The sibling that was current a moment ago could have split, so
+	 * we may have to move right.  This search could fail if either the
+	 * sibling or the target page was deleted by someone else meanwhile; if
+	 * so, give up.  (Right now, that should never happen, since page deletion
+	 * is only done in VACUUM and there shouldn't be multiple VACUUMs
+	 * concurrently on the same table.)
 	 */
 	if (leftsib != P_NONE)
 	{
@@ -839,19 +836,18 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
 		lbuf = InvalidBuffer;
 
 	/*
-	 * Next write-lock the target page itself.	It should be okay to take
-	 * just a write lock not a superexclusive lock, since no scans would
-	 * stop on an empty page.
+	 * Next write-lock the target page itself.	It should be okay to take just
+	 * a write lock not a superexclusive lock, since no scans would stop on an
+	 * empty page.
 	 */
 	buf = _bt_getbuf(rel, target, BT_WRITE);
 	page = BufferGetPage(buf);
 	opaque = (BTPageOpaque) PageGetSpecialPointer(page);
 
 	/*
-	 * Check page is still empty etc, else abandon deletion.  The empty
-	 * check is necessary since someone else might have inserted into it
-	 * while we didn't have it locked; the others are just for paranoia's
-	 * sake.
+	 * Check page is still empty etc, else abandon deletion.  The empty check
+	 * is necessary since someone else might have inserted into it while we
+	 * didn't have it locked; the others are just for paranoia's sake.
 	 */
 	if (P_RIGHTMOST(opaque) || P_ISROOT(opaque) || P_ISDELETED(opaque) ||
 		P_FIRSTDATAKEY(opaque) <= PageGetMaxOffsetNumber(page))
@@ -872,9 +868,8 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
 	rbuf = _bt_getbuf(rel, rightsib, BT_WRITE);
 
 	/*
-	 * Next find and write-lock the current parent of the target page.
-	 * This is essentially the same as the corresponding step of
-	 * splitting.
+	 * Next find and write-lock the current parent of the target page. This is
+	 * essentially the same as the corresponding step of splitting.
 	 */
 	ItemPointerSet(&(stack->bts_btitem.bti_itup.t_tid),
 				   target, P_HIKEY);
@@ -887,8 +882,8 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
 
 	/*
 	 * If the target is the rightmost child of its parent, then we can't
-	 * delete, unless it's also the only child --- in which case the
-	 * parent changes to half-dead status.
+	 * delete, unless it's also the only child --- in which case the parent
+	 * changes to half-dead status.
 	 */
 	page = BufferGetPage(pbuf);
 	opaque = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -917,11 +912,10 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
 	}
 
 	/*
-	 * If we are deleting the next-to-last page on the target's level,
-	 * then the rightsib is a candidate to become the new fast root. (In
-	 * theory, it might be possible to push the fast root even further
-	 * down, but the odds of doing so are slim, and the locking
-	 * considerations daunting.)
+	 * If we are deleting the next-to-last page on the target's level, then
+	 * the rightsib is a candidate to become the new fast root. (In theory, it
+	 * might be possible to push the fast root even further down, but the odds
+	 * of doing so are slim, and the locking considerations daunting.)
 	 *
 	 * We can safely acquire a lock on the metapage here --- see comments for
 	 * _bt_newroot().
@@ -939,9 +933,9 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
 			metad = BTPageGetMeta(metapg);
 
 			/*
-			 * The expected case here is btm_fastlevel == targetlevel+1;
-			 * if the fastlevel is <= targetlevel, something is wrong, and
-			 * we choose to overwrite it to fix it.
+			 * The expected case here is btm_fastlevel == targetlevel+1; if
+			 * the fastlevel is <= targetlevel, something is wrong, and we
+			 * choose to overwrite it to fix it.
 			 */
 			if (metad->btm_fastlevel > targetlevel + 1)
 			{
@@ -961,9 +955,9 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
 
 	/*
 	 * Update parent.  The normal case is a tad tricky because we want to
-	 * delete the target's downlink and the *following* key.  Easiest way
-	 * is to copy the right sibling's downlink over the target downlink,
-	 * and then delete the following item.
+	 * delete the target's downlink and the *following* key.  Easiest way is
+	 * to copy the right sibling's downlink over the target downlink, and then
+	 * delete the following item.
 	 */
 	page = BufferGetPage(pbuf);
 	opaque = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -992,8 +986,8 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
 	}
 
 	/*
-	 * Update siblings' side-links.  Note the target page's side-links
-	 * will continue to point to the siblings.
+	 * Update siblings' side-links.  Note the target page's side-links will
+	 * continue to point to the siblings.
 	 */
 	if (BufferIsValid(lbuf))
 	{
@@ -1123,10 +1117,10 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
 		_bt_wrtbuf(rel, lbuf);
 
 	/*
-	 * If parent became half dead, recurse to try to delete it. Otherwise,
-	 * if right sibling is empty and is now the last child of the parent,
-	 * recurse to try to delete it.  (These cases cannot apply at the same
-	 * time, though the second case might itself recurse to the first.)
+	 * If parent became half dead, recurse to try to delete it. Otherwise, if
+	 * right sibling is empty and is now the last child of the parent, recurse
+	 * to try to delete it.  (These cases cannot apply at the same time,
+	 * though the second case might itself recurse to the first.)
 	 */
 	if (parent_half_dead)
 	{