aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/nbtree/nbtree.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/nbtree/nbtree.c')
-rw-r--r--src/backend/access/nbtree/nbtree.c66
1 files changed, 63 insertions, 3 deletions
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 87a8a225dbf..d166a811b80 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -12,7 +12,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.172 2009/07/29 20:56:18 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.173 2009/12/19 01:32:33 sriggs Exp $
*
*-------------------------------------------------------------------------
*/
@@ -57,7 +57,8 @@ typedef struct
IndexBulkDeleteCallback callback;
void *callback_state;
BTCycleId cycleid;
- BlockNumber lastUsedPage;
+ BlockNumber lastBlockVacuumed; /* last blkno reached by Vacuum scan */
+ BlockNumber lastUsedPage; /* blkno of last non-recyclable page */
BlockNumber totFreePages; /* true total # of free pages */
MemoryContext pagedelcontext;
} BTVacState;
@@ -629,6 +630,7 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
vstate.callback = callback;
vstate.callback_state = callback_state;
vstate.cycleid = cycleid;
+ vstate.lastBlockVacuumed = BTREE_METAPAGE; /* Initialise at first block */
vstate.lastUsedPage = BTREE_METAPAGE;
vstate.totFreePages = 0;
@@ -705,6 +707,32 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
num_pages = new_pages;
}
+ /*
+ * InHotStandby we need to scan right up to the end of the index for
+ * correct locking, so we may need to write a WAL record for the final
+ * block in the index if it was not vacuumed. It's possible that VACUUMing
+ * has actually removed zeroed pages at the end of the index so we need to
+ * take care to issue the record for last actual block and not for the
+ * last block that was scanned. Ignore empty indexes.
+ */
+ if (XLogStandbyInfoActive() &&
+ num_pages > 1 && vstate.lastBlockVacuumed < (num_pages - 1))
+ {
+ Buffer buf;
+
+ /*
+ * We can't use _bt_getbuf() here because it always applies
+ * _bt_checkpage(), which will barf on an all-zero page. We want to
+ * recycle all-zero pages, not fail. Also, we want to use a nondefault
+ * buffer access strategy.
+ */
+ buf = ReadBufferExtended(rel, MAIN_FORKNUM, num_pages - 1, RBM_NORMAL,
+ info->strategy);
+ LockBufferForCleanup(buf);
+ _bt_delitems(rel, buf, NULL, 0, true, vstate.lastBlockVacuumed);
+ _bt_relbuf(rel, buf);
+ }
+
MemoryContextDelete(vstate.pagedelcontext);
/* update statistics */
@@ -847,6 +875,26 @@ restart:
itup = (IndexTuple) PageGetItem(page,
PageGetItemId(page, offnum));
htup = &(itup->t_tid);
+
+ /*
+ * During Hot Standby we currently assume that XLOG_BTREE_VACUUM
+ * records do not produce conflicts. That is only true as long
+ * as the callback function depends only upon whether the index
+ * tuple refers to heap tuples removed in the initial heap scan.
+ * When vacuum starts it derives a value of OldestXmin. Backends
+ * taking later snapshots could have a RecentGlobalXmin with a
+ * later xid than the vacuum's OldestXmin, so it is possible that
+ * row versions deleted after OldestXmin could be marked as killed
+ * by other backends. The callback function *could* look at the
+ * index tuple state in isolation and decide to delete the index
+ * tuple, though currently it does not. If it ever did, we would
+ * need to reconsider whether XLOG_BTREE_VACUUM records should
+ * cause conflicts. If they did cause conflicts they would be
+ * fairly harsh conflicts, since we haven't yet worked out a way
+ * to pass a useful value for latestRemovedXid on the
+ * XLOG_BTREE_VACUUM records. This applies to *any* type of index
+ * that marks index tuples as killed.
+ */
if (callback(htup, callback_state))
deletable[ndeletable++] = offnum;
}
@@ -858,7 +906,19 @@ restart:
*/
if (ndeletable > 0)
{
- _bt_delitems(rel, buf, deletable, ndeletable);
+ BlockNumber lastBlockVacuumed = BufferGetBlockNumber(buf);
+
+ _bt_delitems(rel, buf, deletable, ndeletable, true, vstate->lastBlockVacuumed);
+
+ /*
+ * Keep track of the block number of the lastBlockVacuumed, so
+ * we can scan those blocks as well during WAL replay. This then
+ * provides concurrency protection and allows btrees to be used
+ * while in recovery.
+ */
+ if (lastBlockVacuumed > vstate->lastBlockVacuumed)
+ vstate->lastBlockVacuumed = lastBlockVacuumed;
+
stats->tuples_removed += ndeletable;
/* must recompute maxoff */
maxoff = PageGetMaxOffsetNumber(page);