aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/nbtree/nbtree.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/nbtree/nbtree.c')
-rw-r--r--src/backend/access/nbtree/nbtree.c111
1 files changed, 23 insertions, 88 deletions
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index c67235ab807..065b5290b0d 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -46,8 +46,6 @@ typedef struct
IndexBulkDeleteCallback callback;
void *callback_state;
BTCycleId cycleid;
- BlockNumber lastBlockVacuumed; /* highest blkno actually vacuumed */
- BlockNumber lastBlockLocked; /* highest blkno we've cleanup-locked */
BlockNumber totFreePages; /* true total # of free pages */
TransactionId oldestBtpoXact;
MemoryContext pagedelcontext;
@@ -978,8 +976,6 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
vstate.callback = callback;
vstate.callback_state = callback_state;
vstate.cycleid = cycleid;
- vstate.lastBlockVacuumed = BTREE_METAPAGE; /* Initialise at first block */
- vstate.lastBlockLocked = BTREE_METAPAGE;
vstate.totFreePages = 0;
vstate.oldestBtpoXact = InvalidTransactionId;
@@ -1040,39 +1036,6 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
}
}
- /*
- * Check to see if we need to issue one final WAL record for this index,
- * which may be needed for correctness on a hot standby node when non-MVCC
- * index scans could take place.
- *
- * If the WAL is replayed in hot standby, the replay process needs to get
- * cleanup locks on all index leaf pages, just as we've been doing here.
- * However, we won't issue any WAL records about pages that have no items
- * to be deleted. For pages between pages we've vacuumed, the replay code
- * will take locks under the direction of the lastBlockVacuumed fields in
- * the XLOG_BTREE_VACUUM WAL records. To cover pages after the last one
- * we vacuum, we need to issue a dummy XLOG_BTREE_VACUUM WAL record
- * against the last leaf page in the index, if that one wasn't vacuumed.
- */
- if (XLogStandbyInfoActive() &&
- vstate.lastBlockVacuumed < vstate.lastBlockLocked)
- {
- Buffer buf;
-
- /*
- * The page should be valid, but we can't use _bt_getbuf() because we
- * want to use a nondefault buffer access strategy. Since we aren't
- * going to delete any items, getting cleanup lock again is probably
- * overkill, but for consistency do that anyway.
- */
- buf = ReadBufferExtended(rel, MAIN_FORKNUM, vstate.lastBlockLocked,
- RBM_NORMAL, info->strategy);
- LockBufferForCleanup(buf);
- _bt_checkpage(rel, buf);
- _bt_delitems_vacuum(rel, buf, NULL, 0, vstate.lastBlockVacuumed);
- _bt_relbuf(rel, buf);
- }
-
MemoryContextDelete(vstate.pagedelcontext);
/*
@@ -1204,13 +1167,6 @@ restart:
LockBufferForCleanup(buf);
/*
- * Remember highest leaf page number we've taken cleanup lock on; see
- * notes in btvacuumscan
- */
- if (blkno > vstate->lastBlockLocked)
- vstate->lastBlockLocked = blkno;
-
- /*
* Check whether we need to recurse back to earlier pages. What we
* are concerned about is a page split that happened since we started
* the vacuum scan. If the split moved some tuples to a lower page
@@ -1225,8 +1181,10 @@ restart:
recurse_to = opaque->btpo_next;
/*
- * Scan over all items to see which ones need deleted according to the
- * callback function.
+ * When each VACUUM begins, it determines an OldestXmin cutoff value.
+ * Tuples before the cutoff are removed by VACUUM. Scan over all
+ * items to see which ones need to be deleted according to cutoff
+ * point using callback.
*/
ndeletable = 0;
minoff = P_FIRSTDATAKEY(opaque);
@@ -1245,25 +1203,24 @@ restart:
htup = &(itup->t_tid);
/*
- * During Hot Standby we currently assume that
- * XLOG_BTREE_VACUUM records do not produce conflicts. That is
- * only true as long as the callback function depends only
- * upon whether the index tuple refers to heap tuples removed
- * in the initial heap scan. When vacuum starts it derives a
- * value of OldestXmin. Backends taking later snapshots could
- * have a RecentGlobalXmin with a later xid than the vacuum's
- * OldestXmin, so it is possible that row versions deleted
- * after OldestXmin could be marked as killed by other
- * backends. The callback function *could* look at the index
- * tuple state in isolation and decide to delete the index
- * tuple, though currently it does not. If it ever did, we
- * would need to reconsider whether XLOG_BTREE_VACUUM records
- * should cause conflicts. If they did cause conflicts they
- * would be fairly harsh conflicts, since we haven't yet
- * worked out a way to pass a useful value for
- * latestRemovedXid on the XLOG_BTREE_VACUUM records. This
- * applies to *any* type of index that marks index tuples as
- * killed.
+ * Hot Standby assumes that it's okay that XLOG_BTREE_VACUUM
+ * records do not produce their own conflicts. This is safe
+ * as long as the callback function only considers whether the
+ * index tuple refers to pre-cutoff heap tuples that were
+ * certainly already pruned away during VACUUM's initial heap
+ * scan by the time we get here. (We can rely on conflicts
+ * produced by heap pruning, rather than producing our own
+ * now.)
+ *
+ * Backends with snapshots acquired after a VACUUM starts but
+ * before it finishes could have a RecentGlobalXmin with a
+ * later xid than the VACUUM's OldestXmin cutoff. These
+ * backends might happen to opportunistically mark some index
+ * tuples LP_DEAD before we reach them, even though they may
+ * be after our cutoff. We don't try to kill these "extra"
+ * index tuples in _bt_delitems_vacuum(). This keep things
+ * simple, and allows us to always avoid generating our own
+ * conflicts.
*/
if (callback(htup, callback_state))
deletable[ndeletable++] = offnum;
@@ -1276,29 +1233,7 @@ restart:
*/
if (ndeletable > 0)
{
- /*
- * Notice that the issued XLOG_BTREE_VACUUM WAL record includes
- * all information to the replay code to allow it to get a cleanup
- * lock on all pages between the previous lastBlockVacuumed and
- * this page. This ensures that WAL replay locks all leaf pages at
- * some point, which is important should non-MVCC scans be
- * requested. This is currently unused on standby, but we record
- * it anyway, so that the WAL contains the required information.
- *
- * Since we can visit leaf pages out-of-order when recursing,
- * replay might end up locking such pages an extra time, but it
- * doesn't seem worth the amount of bookkeeping it'd take to avoid
- * that.
- */
- _bt_delitems_vacuum(rel, buf, deletable, ndeletable,
- vstate->lastBlockVacuumed);
-
- /*
- * Remember highest leaf page number we've issued a
- * XLOG_BTREE_VACUUM WAL record for.
- */
- if (blkno > vstate->lastBlockVacuumed)
- vstate->lastBlockVacuumed = blkno;
+ _bt_delitems_vacuum(rel, buf, deletable, ndeletable);
stats->tuples_removed += ndeletable;
/* must recompute maxoff */