diff options
Diffstat (limited to 'src/backend/access/nbtree/nbtree.c')
-rw-r--r-- | src/backend/access/nbtree/nbtree.c | 66 |
1 files changed, 63 insertions, 3 deletions
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 87a8a225dbf..d166a811b80 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -12,7 +12,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.172 2009/07/29 20:56:18 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.173 2009/12/19 01:32:33 sriggs Exp $ * *------------------------------------------------------------------------- */ @@ -57,7 +57,8 @@ typedef struct IndexBulkDeleteCallback callback; void *callback_state; BTCycleId cycleid; - BlockNumber lastUsedPage; + BlockNumber lastBlockVacuumed; /* last blkno reached by Vacuum scan */ + BlockNumber lastUsedPage; /* blkno of last non-recyclable page */ BlockNumber totFreePages; /* true total # of free pages */ MemoryContext pagedelcontext; } BTVacState; @@ -629,6 +630,7 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, vstate.callback = callback; vstate.callback_state = callback_state; vstate.cycleid = cycleid; + vstate.lastBlockVacuumed = BTREE_METAPAGE; /* Initialise at first block */ vstate.lastUsedPage = BTREE_METAPAGE; vstate.totFreePages = 0; @@ -705,6 +707,32 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, num_pages = new_pages; } + /* + * InHotStandby we need to scan right up to the end of the index for + * correct locking, so we may need to write a WAL record for the final + * block in the index if it was not vacuumed. It's possible that VACUUMing + * has actually removed zeroed pages at the end of the index so we need to + * take care to issue the record for last actual block and not for the + * last block that was scanned. Ignore empty indexes. + */ + if (XLogStandbyInfoActive() && + num_pages > 1 && vstate.lastBlockVacuumed < (num_pages - 1)) + { + Buffer buf; + + /* + * We can't use _bt_getbuf() here because it always applies + * _bt_checkpage(), which will barf on an all-zero page. We want to + * recycle all-zero pages, not fail. Also, we want to use a nondefault + * buffer access strategy. + */ + buf = ReadBufferExtended(rel, MAIN_FORKNUM, num_pages - 1, RBM_NORMAL, + info->strategy); + LockBufferForCleanup(buf); + _bt_delitems(rel, buf, NULL, 0, true, vstate.lastBlockVacuumed); + _bt_relbuf(rel, buf); + } + MemoryContextDelete(vstate.pagedelcontext); /* update statistics */ @@ -847,6 +875,26 @@ restart: itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); htup = &(itup->t_tid); + + /* + * During Hot Standby we currently assume that XLOG_BTREE_VACUUM + * records do not produce conflicts. That is only true as long + * as the callback function depends only upon whether the index + * tuple refers to heap tuples removed in the initial heap scan. + * When vacuum starts it derives a value of OldestXmin. Backends + * taking later snapshots could have a RecentGlobalXmin with a + * later xid than the vacuum's OldestXmin, so it is possible that + * row versions deleted after OldestXmin could be marked as killed + * by other backends. The callback function *could* look at the + * index tuple state in isolation and decide to delete the index + * tuple, though currently it does not. If it ever did, we would + * need to reconsider whether XLOG_BTREE_VACUUM records should + * cause conflicts. If they did cause conflicts they would be + * fairly harsh conflicts, since we haven't yet worked out a way + * to pass a useful value for latestRemovedXid on the + * XLOG_BTREE_VACUUM records. This applies to *any* type of index + * that marks index tuples as killed. + */ if (callback(htup, callback_state)) deletable[ndeletable++] = offnum; } @@ -858,7 +906,19 @@ restart: */ if (ndeletable > 0) { - _bt_delitems(rel, buf, deletable, ndeletable); + BlockNumber lastBlockVacuumed = BufferGetBlockNumber(buf); + + _bt_delitems(rel, buf, deletable, ndeletable, true, vstate->lastBlockVacuumed); + + /* + * Keep track of the block number of the lastBlockVacuumed, so + * we can scan those blocks as well during WAL replay. This then + * provides concurrency protection and allows btrees to be used + * while in recovery. + */ + if (lastBlockVacuumed > vstate->lastBlockVacuumed) + vstate->lastBlockVacuumed = lastBlockVacuumed; + stats->tuples_removed += ndeletable; /* must recompute maxoff */ maxoff = PageGetMaxOffsetNumber(page); |