aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/index/indexam.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2007-09-20 17:56:33 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2007-09-20 17:56:33 +0000
commit282d2a03dd30804b01f8042f640d638c2ee76604 (patch)
tree004f08ce31f1bfb03ab55571ad7867babe5b3d7f /src/backend/access/index/indexam.c
parentbbf4fdc2538097bb3103806e1419ceef1f289203 (diff)
downloadpostgresql-282d2a03dd30804b01f8042f640d638c2ee76604.tar.gz
postgresql-282d2a03dd30804b01f8042f640d638c2ee76604.zip
HOT updates. When we update a tuple without changing any of its indexed
columns, and the new version can be stored on the same heap page, we no longer generate extra index entries for the new version. Instead, index searches follow the HOT-chain links to ensure they find the correct tuple version. In addition, this patch introduces the ability to "prune" dead tuples on a per-page basis, without having to do a complete VACUUM pass to recover space. VACUUM is still needed to clean up dead index entries, however. Pavan Deolasee, with help from a bunch of other people.
Diffstat (limited to 'src/backend/access/index/indexam.c')
-rw-r--r--src/backend/access/index/indexam.c259
1 files changed, 213 insertions, 46 deletions
diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c
index d905013a5fc..fd727ca68c8 100644
--- a/src/backend/access/index/indexam.c
+++ b/src/backend/access/index/indexam.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.98 2007/05/27 03:50:38 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.99 2007/09/20 17:56:30 tgl Exp $
*
* INTERFACE ROUTINES
* index_open - open an index relation by relation OID
@@ -64,6 +64,7 @@
#include "access/genam.h"
#include "access/heapam.h"
+#include "access/transam.h"
#include "pgstat.h"
#include "utils/relcache.h"
@@ -313,6 +314,8 @@ index_rescan(IndexScanDesc scan, ScanKey key)
scan->xs_cbuf = InvalidBuffer;
}
+ scan->xs_next_hot = InvalidOffsetNumber;
+
scan->kill_prior_tuple = false; /* for safety */
FunctionCall2(procedure,
@@ -370,6 +373,14 @@ index_markpos(IndexScanDesc scan)
* NOTE: this only restores the internal scan state of the index AM.
* The current result tuple (scan->xs_ctup) doesn't change. See comments
* for ExecRestrPos().
+ *
+ * NOTE: in the presence of HOT chains, mark/restore only works correctly
+ * if the scan's snapshot is MVCC-safe; that ensures that there's at most one
+ * returnable tuple in each HOT chain, and so restoring the prior state at the
+ * granularity of the index AM is sufficient. Since the only current user
+ * of mark/restore functionality is nodeMergejoin.c, this effectively means
+ * that merge-join plans only work for MVCC snapshots. This could be fixed
+ * if necessary, but for now it seems unimportant.
* ----------------
*/
void
@@ -377,9 +388,13 @@ index_restrpos(IndexScanDesc scan)
{
FmgrInfo *procedure;
+ Assert(IsMVCCSnapshot(scan->xs_snapshot));
+
SCAN_CHECKS;
GET_SCAN_PROCEDURE(amrestrpos);
+ scan->xs_next_hot = InvalidOffsetNumber;
+
scan->kill_prior_tuple = false; /* for safety */
FunctionCall1(procedure, PointerGetDatum(scan));
@@ -398,72 +413,224 @@ HeapTuple
index_getnext(IndexScanDesc scan, ScanDirection direction)
{
HeapTuple heapTuple = &scan->xs_ctup;
+ ItemPointer tid = &heapTuple->t_self;
FmgrInfo *procedure;
SCAN_CHECKS;
GET_SCAN_PROCEDURE(amgettuple);
- /* just make sure this is false... */
- scan->kill_prior_tuple = false;
+ /*
+ * We always reset xs_hot_dead; if we are here then either we are just
+ * starting the scan, or we previously returned a visible tuple, and in
+ * either case it's inappropriate to kill the prior index entry.
+ */
+ scan->xs_hot_dead = false;
for (;;)
{
- bool found;
+ OffsetNumber offnum;
+ bool at_chain_start;
+ Page dp;
- /*
- * The AM's gettuple proc finds the next tuple matching the scan keys.
- */
- found = DatumGetBool(FunctionCall2(procedure,
- PointerGetDatum(scan),
- Int32GetDatum(direction)));
+ if (scan->xs_next_hot != InvalidOffsetNumber)
+ {
+ /*
+ * We are resuming scan of a HOT chain after having returned
+ * an earlier member. Must still hold pin on current heap page.
+ */
+ Assert(BufferIsValid(scan->xs_cbuf));
+ Assert(ItemPointerGetBlockNumber(tid) ==
+ BufferGetBlockNumber(scan->xs_cbuf));
+ Assert(TransactionIdIsValid(scan->xs_prev_xmax));
+ offnum = scan->xs_next_hot;
+ at_chain_start = false;
+ scan->xs_next_hot = InvalidOffsetNumber;
+ }
+ else
+ {
+ bool found;
+ Buffer prev_buf;
+
+ /*
+ * If we scanned a whole HOT chain and found only dead tuples,
+ * tell index AM to kill its entry for that TID.
+ */
+ scan->kill_prior_tuple = scan->xs_hot_dead;
+
+ /*
+ * The AM's gettuple proc finds the next index entry matching the
+ * scan keys, and puts the TID in xs_ctup.t_self (ie, *tid).
+ */
+ found = DatumGetBool(FunctionCall2(procedure,
+ PointerGetDatum(scan),
+ Int32GetDatum(direction)));
+
+ /* Reset kill flag immediately for safety */
+ scan->kill_prior_tuple = false;
+
+ /* If we're out of index entries, break out of outer loop */
+ if (!found)
+ break;
+
+ pgstat_count_index_tuples(scan->indexRelation, 1);
+
+ /* Switch to correct buffer if we don't have it already */
+ prev_buf = scan->xs_cbuf;
+ scan->xs_cbuf = ReleaseAndReadBuffer(scan->xs_cbuf,
+ scan->heapRelation,
+ ItemPointerGetBlockNumber(tid));
+
+ /*
+ * Prune page, but only if we weren't already on this page
+ */
+ if (prev_buf != scan->xs_cbuf)
+ heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf,
+ RecentGlobalXmin);
+
+ /* Prepare to scan HOT chain starting at index-referenced offnum */
+ offnum = ItemPointerGetOffsetNumber(tid);
+ at_chain_start = true;
+
+ /* We don't know what the first tuple's xmin should be */
+ scan->xs_prev_xmax = InvalidTransactionId;
+
+ /* Initialize flag to detect if all entries are dead */
+ scan->xs_hot_dead = true;
+ }
+
+ /* Obtain share-lock on the buffer so we can examine visibility */
+ LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
- /* Reset kill flag immediately for safety */
- scan->kill_prior_tuple = false;
+ dp = (Page) BufferGetPage(scan->xs_cbuf);
- if (!found)
+ /* Scan through possible multiple members of HOT-chain */
+ for (;;)
{
- /* Release any held pin on a heap page */
- if (BufferIsValid(scan->xs_cbuf))
- {
- ReleaseBuffer(scan->xs_cbuf);
- scan->xs_cbuf = InvalidBuffer;
- }
- return NULL; /* failure exit */
- }
+ ItemId lp;
+ ItemPointer ctid;
- pgstat_count_index_tuples(scan->indexRelation, 1);
+ /* check for bogus TID */
+ if (offnum < FirstOffsetNumber ||
+ offnum > PageGetMaxOffsetNumber(dp))
+ break;
- /*
- * Fetch the heap tuple and see if it matches the snapshot.
- */
- if (heap_release_fetch(scan->heapRelation, scan->xs_snapshot,
- heapTuple, &scan->xs_cbuf, true,
- scan->indexRelation))
- break;
+ lp = PageGetItemId(dp, offnum);
- /* Skip if no undeleted tuple at this location */
- if (heapTuple->t_data == NULL)
- continue;
+ /* check for unused, dead, or redirected items */
+ if (!ItemIdIsNormal(lp))
+ {
+ /* We should only see a redirect at start of chain */
+ if (ItemIdIsRedirected(lp) && at_chain_start)
+ {
+ /* Follow the redirect */
+ offnum = ItemIdGetRedirect(lp);
+ at_chain_start = false;
+ continue;
+ }
+ /* else must be end of chain */
+ break;
+ }
- /*
- * If we can't see it, maybe no one else can either. Check to see if
- * the tuple is dead to all transactions. If so, signal the index AM
- * to not return it on future indexscans.
- *
- * We told heap_release_fetch to keep a pin on the buffer, so we can
- * re-access the tuple here. But we must re-lock the buffer first.
- */
- LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
+ /*
+ * We must initialize all of *heapTuple (ie, scan->xs_ctup)
+ * since it is returned to the executor on success.
+ */
+ heapTuple->t_data = (HeapTupleHeader) PageGetItem(dp, lp);
+ heapTuple->t_len = ItemIdGetLength(lp);
+ ItemPointerSetOffsetNumber(tid, offnum);
+ heapTuple->t_tableOid = RelationGetRelid(scan->heapRelation);
+ ctid = &heapTuple->t_data->t_ctid;
+
+ /*
+ * Shouldn't see a HEAP_ONLY tuple at chain start. (This test
+ * should be unnecessary, since the chain root can't be removed
+ * while we have pin on the index entry, but let's make it anyway.)
+ */
+ if (at_chain_start && HeapTupleIsHeapOnly(heapTuple))
+ break;
+
+ /*
+ * The xmin should match the previous xmax value, else chain is
+ * broken. (Note: this test is not optional because it protects
+ * us against the case where the prior chain member's xmax
+ * aborted since we looked at it.)
+ */
+ if (TransactionIdIsValid(scan->xs_prev_xmax) &&
+ !TransactionIdEquals(scan->xs_prev_xmax,
+ HeapTupleHeaderGetXmin(heapTuple->t_data)))
+ break;
+
+ /* If it's visible per the snapshot, we must return it */
+ if (HeapTupleSatisfiesVisibility(heapTuple, scan->xs_snapshot,
+ scan->xs_cbuf))
+ {
+ /*
+ * If the snapshot is MVCC, we know that it could accept
+ * at most one member of the HOT chain, so we can skip
+ * examining any more members. Otherwise, check for
+ * continuation of the HOT-chain, and set state for next time.
+ */
+ if (IsMVCCSnapshot(scan->xs_snapshot))
+ scan->xs_next_hot = InvalidOffsetNumber;
+ else if (HeapTupleIsHotUpdated(heapTuple))
+ {
+ Assert(ItemPointerGetBlockNumber(ctid) ==
+ ItemPointerGetBlockNumber(tid));
+ scan->xs_next_hot = ItemPointerGetOffsetNumber(ctid);
+ scan->xs_prev_xmax = HeapTupleHeaderGetXmax(heapTuple->t_data);
+ }
+ else
+ scan->xs_next_hot = InvalidOffsetNumber;
+
+ LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
+
+ pgstat_count_heap_fetch(scan->indexRelation);
+
+ return heapTuple;
+ }
- if (HeapTupleSatisfiesVacuum(heapTuple->t_data, RecentGlobalXmin,
- scan->xs_cbuf) == HEAPTUPLE_DEAD)
- scan->kill_prior_tuple = true;
+ /*
+ * If we can't see it, maybe no one else can either. Check to see
+ * if the tuple is dead to all transactions. If we find that all
+ * the tuples in the HOT chain are dead, we'll signal the index AM
+ * to not return that TID on future indexscans.
+ */
+ if (scan->xs_hot_dead &&
+ HeapTupleSatisfiesVacuum(heapTuple->t_data, RecentGlobalXmin,
+ scan->xs_cbuf) != HEAPTUPLE_DEAD)
+ scan->xs_hot_dead = false;
+
+ /*
+ * Check to see if HOT chain continues past this tuple; if so
+ * fetch the next offnum (we don't bother storing it into
+ * xs_next_hot, but must store xs_prev_xmax), and loop around.
+ */
+ if (HeapTupleIsHotUpdated(heapTuple))
+ {
+ Assert(ItemPointerGetBlockNumber(ctid) ==
+ ItemPointerGetBlockNumber(tid));
+ offnum = ItemPointerGetOffsetNumber(ctid);
+ at_chain_start = false;
+ scan->xs_prev_xmax = HeapTupleHeaderGetXmax(heapTuple->t_data);
+ }
+ else
+ break; /* end of chain */
+ } /* loop over a single HOT chain */
LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
+
+ /* Loop around to ask index AM for another TID */
+ scan->xs_next_hot = InvalidOffsetNumber;
+ }
+
+ /* Release any held pin on a heap page */
+ if (BufferIsValid(scan->xs_cbuf))
+ {
+ ReleaseBuffer(scan->xs_cbuf);
+ scan->xs_cbuf = InvalidBuffer;
}
- /* Success exit */
- return heapTuple;
+ return NULL; /* failure exit */
}
/* ----------------