aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/nbtree/nbtxlog.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/nbtree/nbtxlog.c')
-rw-r--r--src/backend/access/nbtree/nbtxlog.c99
1 files changed, 7 insertions, 92 deletions
diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c
index 4325cbee5ba..234b0e0596c 100644
--- a/src/backend/access/nbtree/nbtxlog.c
+++ b/src/backend/access/nbtree/nbtxlog.c
@@ -383,110 +383,25 @@ static void
btree_xlog_vacuum(XLogReaderState *record)
{
XLogRecPtr lsn = record->EndRecPtr;
+ xl_btree_vacuum *xlrec = (xl_btree_vacuum *) XLogRecGetData(record);
Buffer buffer;
Page page;
BTPageOpaque opaque;
-#ifdef UNUSED
- xl_btree_vacuum *xlrec = (xl_btree_vacuum *) XLogRecGetData(record);
-
- /*
- * This section of code is thought to be no longer needed, after analysis
- * of the calling paths. It is retained to allow the code to be reinstated
- * if a flaw is revealed in that thinking.
- *
- * If we are running non-MVCC scans using this index we need to do some
- * additional work to ensure correctness, which is known as a "pin scan"
- * described in more detail in next paragraphs. We used to do the extra
- * work in all cases, whereas we now avoid that work in most cases. If
- * lastBlockVacuumed is set to InvalidBlockNumber then we skip the
- * additional work required for the pin scan.
- *
- * Avoiding this extra work is important since it requires us to touch
- * every page in the index, so is an O(N) operation. Worse, it is an
- * operation performed in the foreground during redo, so it delays
- * replication directly.
- *
- * If queries might be active then we need to ensure every leaf page is
- * unpinned between the lastBlockVacuumed and the current block, if there
- * are any. This prevents replay of the VACUUM from reaching the stage of
- * removing heap tuples while there could still be indexscans "in flight"
- * to those particular tuples for those scans which could be confused by
- * finding new tuples at the old TID locations (see nbtree/README).
- *
- * It might be worth checking if there are actually any backends running;
- * if not, we could just skip this.
- *
- * Since VACUUM can visit leaf pages out-of-order, it might issue records
- * with lastBlockVacuumed >= block; that's not an error, it just means
- * nothing to do now.
- *
- * Note: since we touch all pages in the range, we will lock non-leaf
- * pages, and also any empty (all-zero) pages that may be in the index. It
- * doesn't seem worth the complexity to avoid that. But it's important
- * that HotStandbyActiveInReplay() will not return true if the database
- * isn't yet consistent; so we need not fear reading still-corrupt blocks
- * here during crash recovery.
- */
- if (HotStandbyActiveInReplay() && BlockNumberIsValid(xlrec->lastBlockVacuumed))
- {
- RelFileNode thisrnode;
- BlockNumber thisblkno;
- BlockNumber blkno;
-
- XLogRecGetBlockTag(record, 0, &thisrnode, NULL, &thisblkno);
-
- for (blkno = xlrec->lastBlockVacuumed + 1; blkno < thisblkno; blkno++)
- {
- /*
- * We use RBM_NORMAL_NO_LOG mode because it's not an error
- * condition to see all-zero pages. The original btvacuumpage
- * scan would have skipped over all-zero pages, noting them in FSM
- * but not bothering to initialize them just yet; so we mustn't
- * throw an error here. (We could skip acquiring the cleanup lock
- * if PageIsNew, but it's probably not worth the cycles to test.)
- *
- * XXX we don't actually need to read the block, we just need to
- * confirm it is unpinned. If we had a special call into the
- * buffer manager we could optimise this so that if the block is
- * not in shared_buffers we confirm it as unpinned. Optimizing
- * this is now moot, since in most cases we avoid the scan.
- */
- buffer = XLogReadBufferExtended(thisrnode, MAIN_FORKNUM, blkno,
- RBM_NORMAL_NO_LOG);
- if (BufferIsValid(buffer))
- {
- LockBufferForCleanup(buffer);
- UnlockReleaseBuffer(buffer);
- }
- }
- }
-#endif
/*
- * Like in btvacuumpage(), we need to take a cleanup lock on every leaf
- * page. See nbtree/README for details.
+ * We need to take a cleanup lock here, just like btvacuumpage(). However,
+ * it isn't necessary to exhaustively get a cleanup lock on every block in
+ * the index during recovery (just getting a cleanup lock on pages with
+ * items to kill suffices). See nbtree/README for details.
*/
if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer)
== BLK_NEEDS_REDO)
{
- char *ptr;
- Size len;
-
- ptr = XLogRecGetBlockData(record, 0, &len);
+ char *ptr = XLogRecGetBlockData(record, 0, NULL);
page = (Page) BufferGetPage(buffer);
- if (len > 0)
- {
- OffsetNumber *unused;
- OffsetNumber *unend;
-
- unused = (OffsetNumber *) ptr;
- unend = (OffsetNumber *) ((char *) ptr + len);
-
- if ((unend - unused) > 0)
- PageIndexMultiDelete(page, unused, unend - unused);
- }
+ PageIndexMultiDelete(page, (OffsetNumber *) ptr, xlrec->ndeleted);
/*
* Mark the page as not containing any LP_DEAD items --- see comments