aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/nbtree/nbtxlog.c
diff options
context:
space:
mode:
authorPeter Geoghegan <pg@bowt.ie>2019-12-19 11:35:55 -0800
committerPeter Geoghegan <pg@bowt.ie>2019-12-19 11:35:55 -0800
commit9f83468b3536caf6fb7fe8f9dcdbb108a98d1257 (patch)
tree141ca3091939ce51a24b07808272850af18a8309 /src/backend/access/nbtree/nbtxlog.c
parentb93e9a5c94b4c89932a637798bd560971fe790d7 (diff)
downloadpostgresql-9f83468b3536caf6fb7fe8f9dcdbb108a98d1257.tar.gz
postgresql-9f83468b3536caf6fb7fe8f9dcdbb108a98d1257.zip
Remove unneeded "pin scan" nbtree VACUUM code.
The REDO routine for nbtree's xl_btree_vacuum record type hasn't performed a "pin scan" since commit 3e4b7d87 went in, so clearly there isn't any point in VACUUM WAL-logging information that won't actually be used. Finish off the work of commit 3e4b7d87 (and the closely related preceding commit 687f2cd7) by removing the code that generates this unused information. Also remove the REDO routine code disabled by commit 3e4b7d87. Replace the unneeded lastBlockVacuumed field in xl_btree_vacuum with a new "ndeleted" field. The new field isn't actually needed right now, since we could continue to infer the array length from the overall record length. However, an upcoming patch to add deduplication to nbtree needs to add an "items updated" field to xl_btree_vacuum, so we might as well start being explicit about the number of items now. (Besides, it doesn't seem like a good idea to leave the xl_btree_vacuum struct without any fields; the C standard says that that's undefined.) nbtree VACUUM no longer forces writing a WAL record for the last block in the index. Writing out a WAL record with no items for the final block was supposed to force processing of a lastBlockVacuumed field by a pin scan. Bump XLOG_PAGE_MAGIC because xl_btree_vacuum changed. Discussion: https://postgr.es/m/CAH2-WzmY_mT7UnTzFB5LBQDBkKpdV5UxP3B5bLb7uP%3D%3D6UQJRQ%40mail.gmail.com
Diffstat (limited to 'src/backend/access/nbtree/nbtxlog.c')
-rw-r--r--src/backend/access/nbtree/nbtxlog.c99
1 files changed, 7 insertions, 92 deletions
diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c
index 4325cbee5ba..234b0e0596c 100644
--- a/src/backend/access/nbtree/nbtxlog.c
+++ b/src/backend/access/nbtree/nbtxlog.c
@@ -383,110 +383,25 @@ static void
btree_xlog_vacuum(XLogReaderState *record)
{
XLogRecPtr lsn = record->EndRecPtr;
+ xl_btree_vacuum *xlrec = (xl_btree_vacuum *) XLogRecGetData(record);
Buffer buffer;
Page page;
BTPageOpaque opaque;
-#ifdef UNUSED
- xl_btree_vacuum *xlrec = (xl_btree_vacuum *) XLogRecGetData(record);
-
- /*
- * This section of code is thought to be no longer needed, after analysis
- * of the calling paths. It is retained to allow the code to be reinstated
- * if a flaw is revealed in that thinking.
- *
- * If we are running non-MVCC scans using this index we need to do some
- * additional work to ensure correctness, which is known as a "pin scan"
- * described in more detail in next paragraphs. We used to do the extra
- * work in all cases, whereas we now avoid that work in most cases. If
- * lastBlockVacuumed is set to InvalidBlockNumber then we skip the
- * additional work required for the pin scan.
- *
- * Avoiding this extra work is important since it requires us to touch
- * every page in the index, so is an O(N) operation. Worse, it is an
- * operation performed in the foreground during redo, so it delays
- * replication directly.
- *
- * If queries might be active then we need to ensure every leaf page is
- * unpinned between the lastBlockVacuumed and the current block, if there
- * are any. This prevents replay of the VACUUM from reaching the stage of
- * removing heap tuples while there could still be indexscans "in flight"
- * to those particular tuples for those scans which could be confused by
- * finding new tuples at the old TID locations (see nbtree/README).
- *
- * It might be worth checking if there are actually any backends running;
- * if not, we could just skip this.
- *
- * Since VACUUM can visit leaf pages out-of-order, it might issue records
- * with lastBlockVacuumed >= block; that's not an error, it just means
- * nothing to do now.
- *
- * Note: since we touch all pages in the range, we will lock non-leaf
- * pages, and also any empty (all-zero) pages that may be in the index. It
- * doesn't seem worth the complexity to avoid that. But it's important
- * that HotStandbyActiveInReplay() will not return true if the database
- * isn't yet consistent; so we need not fear reading still-corrupt blocks
- * here during crash recovery.
- */
- if (HotStandbyActiveInReplay() && BlockNumberIsValid(xlrec->lastBlockVacuumed))
- {
- RelFileNode thisrnode;
- BlockNumber thisblkno;
- BlockNumber blkno;
-
- XLogRecGetBlockTag(record, 0, &thisrnode, NULL, &thisblkno);
-
- for (blkno = xlrec->lastBlockVacuumed + 1; blkno < thisblkno; blkno++)
- {
- /*
- * We use RBM_NORMAL_NO_LOG mode because it's not an error
- * condition to see all-zero pages. The original btvacuumpage
- * scan would have skipped over all-zero pages, noting them in FSM
- * but not bothering to initialize them just yet; so we mustn't
- * throw an error here. (We could skip acquiring the cleanup lock
- * if PageIsNew, but it's probably not worth the cycles to test.)
- *
- * XXX we don't actually need to read the block, we just need to
- * confirm it is unpinned. If we had a special call into the
- * buffer manager we could optimise this so that if the block is
- * not in shared_buffers we confirm it as unpinned. Optimizing
- * this is now moot, since in most cases we avoid the scan.
- */
- buffer = XLogReadBufferExtended(thisrnode, MAIN_FORKNUM, blkno,
- RBM_NORMAL_NO_LOG);
- if (BufferIsValid(buffer))
- {
- LockBufferForCleanup(buffer);
- UnlockReleaseBuffer(buffer);
- }
- }
- }
-#endif
/*
- * Like in btvacuumpage(), we need to take a cleanup lock on every leaf
- * page. See nbtree/README for details.
+ * We need to take a cleanup lock here, just like btvacuumpage(). However,
+ * it isn't necessary to exhaustively get a cleanup lock on every block in
+ * the index during recovery (just getting a cleanup lock on pages with
+ * items to kill suffices). See nbtree/README for details.
*/
if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer)
== BLK_NEEDS_REDO)
{
- char *ptr;
- Size len;
-
- ptr = XLogRecGetBlockData(record, 0, &len);
+ char *ptr = XLogRecGetBlockData(record, 0, NULL);
page = (Page) BufferGetPage(buffer);
- if (len > 0)
- {
- OffsetNumber *unused;
- OffsetNumber *unend;
-
- unused = (OffsetNumber *) ptr;
- unend = (OffsetNumber *) ((char *) ptr + len);
-
- if ((unend - unused) > 0)
- PageIndexMultiDelete(page, unused, unend - unused);
- }
+ PageIndexMultiDelete(page, (OffsetNumber *) ptr, xlrec->ndeleted);
/*
* Mark the page as not containing any LP_DEAD items --- see comments