aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/access/heap/heapam.c8
-rw-r--r--src/backend/access/heap/heapam_handler.c56
-rw-r--r--src/backend/executor/nodeBitmapHeapscan.c246
-rw-r--r--src/include/access/relscan.h28
-rw-r--r--src/include/access/tableam.h64
-rw-r--r--src/include/nodes/execnodes.h12
6 files changed, 252 insertions, 162 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 4c8febdc811..75ff9e7388f 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -1387,8 +1387,8 @@ heap_set_tidrange(TableScanDesc sscan, ItemPointer mintid,
heap_setscanlimits(sscan, startBlk, numBlks);
/* Finally, set the TID range in sscan */
- ItemPointerCopy(&lowestItem, &sscan->rs_mintid);
- ItemPointerCopy(&highestItem, &sscan->rs_maxtid);
+ ItemPointerCopy(&lowestItem, &sscan->st.tidrange.rs_mintid);
+ ItemPointerCopy(&highestItem, &sscan->st.tidrange.rs_maxtid);
}
bool
@@ -1396,8 +1396,8 @@ heap_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction,
TupleTableSlot *slot)
{
HeapScanDesc scan = (HeapScanDesc) sscan;
- ItemPointer mintid = &sscan->rs_mintid;
- ItemPointer maxtid = &sscan->rs_maxtid;
+ ItemPointer mintid = &sscan->st.tidrange.rs_mintid;
+ ItemPointer maxtid = &sscan->st.tidrange.rs_maxtid;
/* Note: no locking manipulations needed */
for (;;)
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 166aab7a93c..a8d95e0f1c1 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -2115,18 +2115,49 @@ heapam_estimate_rel_size(Relation rel, int32 *attr_widths,
static bool
heapam_scan_bitmap_next_block(TableScanDesc scan,
- TBMIterateResult *tbmres,
+ BlockNumber *blockno, bool *recheck,
uint64 *lossy_pages, uint64 *exact_pages)
{
HeapScanDesc hscan = (HeapScanDesc) scan;
- BlockNumber block = tbmres->blockno;
+ BlockNumber block;
Buffer buffer;
Snapshot snapshot;
int ntup;
+ TBMIterateResult *tbmres;
hscan->rs_cindex = 0;
hscan->rs_ntuples = 0;
+ *blockno = InvalidBlockNumber;
+ *recheck = true;
+
+ do
+ {
+ CHECK_FOR_INTERRUPTS();
+
+ if (scan->st.bitmap.rs_shared_iterator)
+ tbmres = tbm_shared_iterate(scan->st.bitmap.rs_shared_iterator);
+ else
+ tbmres = tbm_iterate(scan->st.bitmap.rs_iterator);
+
+ if (tbmres == NULL)
+ return false;
+
+ /*
+ * Ignore any claimed entries past what we think is the end of the
+ * relation. It may have been extended after the start of our scan (we
+ * only hold an AccessShareLock, and it could be inserts from this
+ * backend). We don't take this optimization in SERIALIZABLE
+ * isolation though, as we need to examine all invisible tuples
+ * reachable by the index.
+ */
+ } while (!IsolationIsSerializable() &&
+ tbmres->blockno >= hscan->rs_nblocks);
+
+ /* Got a valid block */
+ *blockno = tbmres->blockno;
+ *recheck = tbmres->recheck;
+
/*
* We can skip fetching the heap page if we don't need any fields from the
* heap, the bitmap entries don't need rechecking, and all tuples on the
@@ -2145,16 +2176,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
return true;
}
- /*
- * Ignore any claimed entries past what we think is the end of the
- * relation. It may have been extended after the start of our scan (we
- * only hold an AccessShareLock, and it could be inserts from this
- * backend). We don't take this optimization in SERIALIZABLE isolation
- * though, as we need to examine all invisible tuples reachable by the
- * index.
- */
- if (!IsolationIsSerializable() && block >= hscan->rs_nblocks)
- return false;
+ block = tbmres->blockno;
/*
* Acquire pin on the target heap page, trading in any pin we held before.
@@ -2249,12 +2271,18 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
else
(*lossy_pages)++;
- return ntup > 0;
+ /*
+ * Return true to indicate that a valid block was found and the bitmap is
+ * not exhausted. If there are no visible tuples on this page,
+ * hscan->rs_ntuples will be 0 and heapam_scan_bitmap_next_tuple() will
+ * return false returning control to this function to advance to the next
+ * block in the bitmap.
+ */
+ return true;
}
static bool
heapam_scan_bitmap_next_tuple(TableScanDesc scan,
- TBMIterateResult *tbmres,
TupleTableSlot *slot)
{
HeapScanDesc hscan = (HeapScanDesc) scan;
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index f4690a20bb1..89a16f142b7 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -51,8 +51,7 @@
static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate);
-static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
- BlockNumber blockno);
+static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node);
static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
static inline void BitmapPrefetch(BitmapHeapScanState *node,
TableScanDesc scan);
@@ -71,9 +70,6 @@ BitmapHeapNext(BitmapHeapScanState *node)
ExprContext *econtext;
TableScanDesc scan;
TIDBitmap *tbm;
- TBMIterator *tbmiterator = NULL;
- TBMSharedIterator *shared_tbmiterator = NULL;
- TBMIterateResult *tbmres;
TupleTableSlot *slot;
ParallelBitmapHeapState *pstate = node->pstate;
dsa_area *dsa = node->ss.ps.state->es_query_dsa;
@@ -85,11 +81,6 @@ BitmapHeapNext(BitmapHeapScanState *node)
slot = node->ss.ss_ScanTupleSlot;
scan = node->ss.ss_currentScanDesc;
tbm = node->tbm;
- if (pstate == NULL)
- tbmiterator = node->tbmiterator;
- else
- shared_tbmiterator = node->shared_tbmiterator;
- tbmres = node->tbmres;
/*
* If we haven't yet performed the underlying index scan, do it, and begin
@@ -105,6 +96,9 @@ BitmapHeapNext(BitmapHeapScanState *node)
*/
if (!node->initialized)
{
+ TBMIterator *tbmiterator = NULL;
+ TBMSharedIterator *shared_tbmiterator = NULL;
+
if (!pstate)
{
tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
@@ -113,8 +107,7 @@ BitmapHeapNext(BitmapHeapScanState *node)
elog(ERROR, "unrecognized result from subplan");
node->tbm = tbm;
- node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
- node->tbmres = tbmres = NULL;
+ tbmiterator = tbm_begin_iterate(tbm);
#ifdef USE_PREFETCH
if (node->prefetch_maximum > 0)
@@ -166,9 +159,8 @@ BitmapHeapNext(BitmapHeapScanState *node)
}
/* Allocate a private iterator and attach the shared state to it */
- node->shared_tbmiterator = shared_tbmiterator =
+ shared_tbmiterator =
tbm_attach_shared_iterate(dsa, pstate->tbmiterator);
- node->tbmres = tbmres = NULL;
#ifdef USE_PREFETCH
if (node->prefetch_maximum > 0)
@@ -207,47 +199,23 @@ BitmapHeapNext(BitmapHeapScanState *node)
node->ss.ss_currentScanDesc = scan;
}
+ scan->st.bitmap.rs_iterator = tbmiterator;
+ scan->st.bitmap.rs_shared_iterator = shared_tbmiterator;
node->initialized = true;
+
+ goto new_page;
}
for (;;)
{
- CHECK_FOR_INTERRUPTS();
-
- /*
- * Get next page of results if needed
- */
- if (tbmres == NULL)
- {
- if (!pstate)
- node->tbmres = tbmres = tbm_iterate(tbmiterator);
- else
- node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator);
- if (tbmres == NULL)
- {
- /* no more entries in the bitmap */
- break;
- }
-
- BitmapAdjustPrefetchIterator(node, tbmres->blockno);
-
- if (!table_scan_bitmap_next_block(scan, tbmres,
- &node->stats.lossy_pages,
- &node->stats.exact_pages))
- {
- /* AM doesn't think this block is valid, skip */
- continue;
- }
-
- /* Adjust the prefetch target */
- BitmapAdjustPrefetchTarget(node);
- }
- else
+ while (table_scan_bitmap_next_tuple(scan, slot))
{
/*
* Continuing in previously obtained page.
*/
+ CHECK_FOR_INTERRUPTS();
+
#ifdef USE_PREFETCH
/*
@@ -268,45 +236,64 @@ BitmapHeapNext(BitmapHeapScanState *node)
SpinLockRelease(&pstate->mutex);
}
#endif /* USE_PREFETCH */
+
+ /*
+ * We issue prefetch requests *after* fetching the current page to
+ * try to avoid having prefetching interfere with the main I/O.
+ * Also, this should happen only when we have determined there is
+ * still something to do on the current page, else we may
+ * uselessly prefetch the same page we are just about to request
+ * for real.
+ */
+ BitmapPrefetch(node, scan);
+
+ /*
+ * If we are using lossy info, we have to recheck the qual
+ * conditions at every tuple.
+ */
+ if (node->recheck)
+ {
+ econtext->ecxt_scantuple = slot;
+ if (!ExecQualAndReset(node->bitmapqualorig, econtext))
+ {
+ /* Fails recheck, so drop it and loop back for another */
+ InstrCountFiltered2(node, 1);
+ ExecClearTuple(slot);
+ continue;
+ }
+ }
+
+ /* OK to return this tuple */
+ return slot;
}
- /*
- * We issue prefetch requests *after* fetching the current page to try
- * to avoid having prefetching interfere with the main I/O. Also, this
- * should happen only when we have determined there is still something
- * to do on the current page, else we may uselessly prefetch the same
- * page we are just about to request for real.
- */
- BitmapPrefetch(node, scan);
+new_page:
+
+ BitmapAdjustPrefetchIterator(node);
/*
- * Attempt to fetch tuple from AM.
+ * Returns false if the bitmap is exhausted and there are no further
+ * blocks we need to scan.
*/
- if (!table_scan_bitmap_next_tuple(scan, tbmres, slot))
- {
- /* nothing more to look at on this page */
- node->tbmres = tbmres = NULL;
- continue;
- }
+ if (!table_scan_bitmap_next_block(scan, &node->blockno,
+ &node->recheck,
+ &node->stats.lossy_pages,
+ &node->stats.exact_pages))
+ break;
/*
- * If we are using lossy info, we have to recheck the qual conditions
- * at every tuple.
+ * If serial, we can error out if the the prefetch block doesn't stay
+ * ahead of the current block.
*/
- if (tbmres->recheck)
- {
- econtext->ecxt_scantuple = slot;
- if (!ExecQualAndReset(node->bitmapqualorig, econtext))
- {
- /* Fails recheck, so drop it and loop back for another */
- InstrCountFiltered2(node, 1);
- ExecClearTuple(slot);
- continue;
- }
- }
-
- /* OK to return this tuple */
- return slot;
+ if (node->pstate == NULL &&
+ node->prefetch_iterator &&
+ node->prefetch_blockno < node->blockno)
+ elog(ERROR,
+ "prefetch and main iterators are out of sync. pfblockno: %d. blockno: %d",
+ node->prefetch_blockno, node->blockno);
+
+ /* Adjust the prefetch target */
+ BitmapAdjustPrefetchTarget(node);
}
/*
@@ -332,13 +319,17 @@ BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
/*
* BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
+ *
+ * We keep track of how far the prefetch iterator is ahead of the main
+ * iterator in prefetch_pages. For each block the main iterator returns, we
+ * decrement prefetch_pages.
*/
static inline void
-BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
- BlockNumber blockno)
+BitmapAdjustPrefetchIterator(BitmapHeapScanState *node)
{
#ifdef USE_PREFETCH
ParallelBitmapHeapState *pstate = node->pstate;
+ TBMIterateResult *tbmpre;
if (pstate == NULL)
{
@@ -351,15 +342,22 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
}
else if (prefetch_iterator)
{
- /* Do not let the prefetch iterator get behind the main one */
- TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
-
- if (tbmpre == NULL || tbmpre->blockno != blockno)
- elog(ERROR, "prefetch and main iterators are out of sync");
+ tbmpre = tbm_iterate(prefetch_iterator);
+ node->prefetch_blockno = tbmpre ? tbmpre->blockno :
+ InvalidBlockNumber;
}
return;
}
+ /*
+ * XXX: There is a known issue with keeping the prefetch and current block
+ * iterators in sync for parallel bitmap table scans. This can lead to
+ * prefetching blocks that have already been read. See the discussion
+ * here:
+ * https://postgr.es/m/20240315211449.en2jcmdqxv5o6tlz%40alap3.anarazel.de
+ * Note that moving the call site of BitmapAdjustPrefetchIterator()
+ * exacerbates the effects of this bug.
+ */
if (node->prefetch_maximum > 0)
{
TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
@@ -384,7 +382,11 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
* case.
*/
if (prefetch_iterator)
- tbm_shared_iterate(prefetch_iterator);
+ {
+ tbmpre = tbm_shared_iterate(prefetch_iterator);
+ node->prefetch_blockno = tbmpre ? tbmpre->blockno :
+ InvalidBlockNumber;
+ }
}
}
#endif /* USE_PREFETCH */
@@ -462,6 +464,7 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
break;
}
node->prefetch_pages++;
+ node->prefetch_blockno = tbmpre->blockno;
/*
* If we expect not to have to actually read this heap page,
@@ -519,6 +522,8 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
break;
}
+ node->prefetch_blockno = tbmpre->blockno;
+
/* As above, skip prefetch if we expect not to need page */
skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
!tbmpre->recheck &&
@@ -575,17 +580,32 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
{
PlanState *outerPlan = outerPlanState(node);
- /* rescan to release any page pin */
- if (node->ss.ss_currentScanDesc)
+ TableScanDesc scan = node->ss.ss_currentScanDesc;
+
+ if (scan)
+ {
+ /*
+ * End iteration on iterators saved in scan descriptor.
+ */
+ if (scan->st.bitmap.rs_shared_iterator)
+ {
+ tbm_end_shared_iterate(scan->st.bitmap.rs_shared_iterator);
+ scan->st.bitmap.rs_shared_iterator = NULL;
+ }
+
+ if (scan->st.bitmap.rs_iterator)
+ {
+ tbm_end_iterate(scan->st.bitmap.rs_iterator);
+ scan->st.bitmap.rs_iterator = NULL;
+ }
+
+ /* rescan to release any page pin */
table_rescan(node->ss.ss_currentScanDesc, NULL);
+ }
/* release bitmaps and buffers if any */
- if (node->tbmiterator)
- tbm_end_iterate(node->tbmiterator);
if (node->prefetch_iterator)
tbm_end_iterate(node->prefetch_iterator);
- if (node->shared_tbmiterator)
- tbm_end_shared_iterate(node->shared_tbmiterator);
if (node->shared_prefetch_iterator)
tbm_end_shared_iterate(node->shared_prefetch_iterator);
if (node->tbm)
@@ -593,13 +613,13 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
if (node->pvmbuffer != InvalidBuffer)
ReleaseBuffer(node->pvmbuffer);
node->tbm = NULL;
- node->tbmiterator = NULL;
- node->tbmres = NULL;
node->prefetch_iterator = NULL;
node->initialized = false;
- node->shared_tbmiterator = NULL;
node->shared_prefetch_iterator = NULL;
node->pvmbuffer = InvalidBuffer;
+ node->recheck = true;
+ node->blockno = InvalidBlockNumber;
+ node->prefetch_blockno = InvalidBlockNumber;
ExecScanReScan(&node->ss);
@@ -653,28 +673,40 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
*/
ExecEndNode(outerPlanState(node));
+ if (scanDesc)
+ {
+ /*
+ * End iteration on iterators saved in scan descriptor.
+ */
+ if (scanDesc->st.bitmap.rs_shared_iterator)
+ {
+ tbm_end_shared_iterate(scanDesc->st.bitmap.rs_shared_iterator);
+ scanDesc->st.bitmap.rs_shared_iterator = NULL;
+ }
+
+ if (scanDesc->st.bitmap.rs_iterator)
+ {
+ tbm_end_iterate(scanDesc->st.bitmap.rs_iterator);
+ scanDesc->st.bitmap.rs_iterator = NULL;
+ }
+
+ /*
+ * close table scan
+ */
+ table_endscan(scanDesc);
+ }
+
/*
* release bitmaps and buffers if any
*/
- if (node->tbmiterator)
- tbm_end_iterate(node->tbmiterator);
if (node->prefetch_iterator)
tbm_end_iterate(node->prefetch_iterator);
if (node->tbm)
tbm_free(node->tbm);
- if (node->shared_tbmiterator)
- tbm_end_shared_iterate(node->shared_tbmiterator);
if (node->shared_prefetch_iterator)
tbm_end_shared_iterate(node->shared_prefetch_iterator);
if (node->pvmbuffer != InvalidBuffer)
ReleaseBuffer(node->pvmbuffer);
-
- /*
- * close heap scan
- */
- if (scanDesc)
- table_endscan(scanDesc);
-
}
/* ----------------------------------------------------------------
@@ -707,8 +739,6 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
scanstate->tbm = NULL;
- scanstate->tbmiterator = NULL;
- scanstate->tbmres = NULL;
scanstate->pvmbuffer = InvalidBuffer;
/* Zero the statistics counters */
@@ -718,9 +748,11 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
scanstate->prefetch_pages = 0;
scanstate->prefetch_target = 0;
scanstate->initialized = false;
- scanstate->shared_tbmiterator = NULL;
scanstate->shared_prefetch_iterator = NULL;
scanstate->pstate = NULL;
+ scanstate->recheck = true;
+ scanstate->blockno = InvalidBlockNumber;
+ scanstate->prefetch_blockno = InvalidBlockNumber;
/*
* Miscellaneous initialization
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index 114a85dc47c..e1884acf493 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -25,6 +25,9 @@
struct ParallelTableScanDescData;
+struct TBMIterator;
+struct TBMSharedIterator;
+
/*
* Generic descriptor for table scans. This is the base-class for table scans,
* which needs to be embedded in the scans of individual AMs.
@@ -37,9 +40,28 @@ typedef struct TableScanDescData
int rs_nkeys; /* number of scan keys */
struct ScanKeyData *rs_key; /* array of scan key descriptors */
- /* Range of ItemPointers for table_scan_getnextslot_tidrange() to scan. */
- ItemPointerData rs_mintid;
- ItemPointerData rs_maxtid;
+ /*
+ * Scan type-specific members
+ */
+ union
+ {
+ /* Iterators for Bitmap Table Scans */
+ struct
+ {
+ struct TBMIterator *rs_iterator;
+ struct TBMSharedIterator *rs_shared_iterator;
+ } bitmap;
+
+ /*
+ * Range of ItemPointers for table_scan_getnextslot_tidrange() to
+ * scan.
+ */
+ struct
+ {
+ ItemPointerData rs_mintid;
+ ItemPointerData rs_maxtid;
+ } tidrange;
+ } st;
/*
* Information about type and behaviour of the scan, a bitmask of members
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index be09d180d45..adb478a93ca 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -36,7 +36,6 @@ extern PGDLLIMPORT bool synchronize_seqscans;
struct BulkInsertStateData;
struct IndexInfo;
struct SampleScanState;
-struct TBMIterateResult;
struct VacuumParams;
struct ValidateIndexState;
@@ -780,26 +779,29 @@ typedef struct TableAmRoutine
*/
/*
- * Prepare to fetch / check / return tuples from `tbmres->blockno` as part
- * of a bitmap table scan. `scan` was started via table_beginscan_bm().
- * Return false if there are no tuples to be found on the page, true
- * otherwise.
+ * Prepare to fetch / check / return tuples from `blockno` as part of a
+ * bitmap table scan. `scan` was started via table_beginscan_bm(). Return
+ * false if the bitmap is exhausted and true otherwise.
*
* This will typically read and pin the target block, and do the necessary
* work to allow scan_bitmap_next_tuple() to return tuples (e.g. it might
- * make sense to perform tuple visibility checks at this time). For some
- * AMs it will make more sense to do all the work referencing `tbmres`
- * contents here, for others it might be better to defer more work to
- * scan_bitmap_next_tuple.
- *
- * If `tbmres->blockno` is -1, this is a lossy scan and all visible tuples
- * on the page have to be returned, otherwise the tuples at offsets in
- * `tbmres->offsets` need to be returned.
+ * make sense to perform tuple visibility checks at this time).
*
* `lossy_pages` and `exact_pages` are EXPLAIN counters that can be
* incremented by the table AM to indicate whether or not the block's
* representation in the bitmap is lossy.
*
+ * `recheck` is set by the table AM to indicate whether or not the tuples
+ * from this block should be rechecked. Tuples from lossy pages will
+ * always need to be rechecked, but some non-lossy pages' tuples may also
+ * require recheck.
+ *
+ * `blockno` is the current block and is set by the table AM. The table AM
+ * is responsible for advancing the main iterator, but the bitmap table
+ * scan code still advances the prefetch iterator. `blockno` is used by
+ * bitmap table scan code to validate that the prefetch block stays ahead
+ * of the current block.
+ *
* XXX: Currently this may only be implemented if the AM uses md.c as its
* storage manager, and uses ItemPointer->ip_blkid in a manner that maps
* blockids directly to the underlying storage. nodeBitmapHeapscan.c
@@ -815,7 +817,8 @@ typedef struct TableAmRoutine
* scan_bitmap_next_tuple need to exist, or neither.
*/
bool (*scan_bitmap_next_block) (TableScanDesc scan,
- struct TBMIterateResult *tbmres,
+ BlockNumber *blockno,
+ bool *recheck,
uint64 *lossy_pages,
uint64 *exact_pages);
@@ -823,15 +826,10 @@ typedef struct TableAmRoutine
* Fetch the next tuple of a bitmap table scan into `slot` and return true
* if a visible tuple was found, false otherwise.
*
- * For some AMs it will make more sense to do all the work referencing
- * `tbmres` contents in scan_bitmap_next_block, for others it might be
- * better to defer more work to this callback.
- *
* Optional callback, but either both scan_bitmap_next_block and
* scan_bitmap_next_tuple need to exist, or neither.
*/
bool (*scan_bitmap_next_tuple) (TableScanDesc scan,
- struct TBMIterateResult *tbmres,
TupleTableSlot *slot);
/*
@@ -959,12 +957,17 @@ static inline TableScanDesc
table_beginscan_bm(Relation rel, Snapshot snapshot,
int nkeys, struct ScanKeyData *key, bool need_tuple)
{
+ TableScanDesc result;
uint32 flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE;
if (need_tuple)
flags |= SO_NEED_TUPLES;
- return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
+ result = rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key,
+ NULL, flags);
+ result->st.bitmap.rs_shared_iterator = NULL;
+ result->st.bitmap.rs_iterator = NULL;
+ return result;
}
/*
@@ -1955,21 +1958,28 @@ table_relation_estimate_size(Relation rel, int32 *attr_widths,
*/
/*
- * Prepare to fetch / check / return tuples from `tbmres->blockno` as part of
- * a bitmap table scan. `scan` needs to have been started via
- * table_beginscan_bm(). Returns false if there are no tuples to be found on
- * the page, true otherwise.
+ * Prepare to fetch / check / return tuples as part of a bitmap table scan.
+ * `scan` needs to have been started via table_beginscan_bm(). Returns false
+ * if there are no more blocks in the bitmap, true otherwise.
*
* `lossy_pages` and `exact_pages` are EXPLAIN counters that can be
* incremented by the table AM to indicate whether or not the block's
* representation in the bitmap is lossy.
*
+ * `recheck` is set by the table AM to indicate whether or not the tuples
+ * from this block should be rechecked.
+ *
+ * `blockno` is the current block and is set by the table AM and is used by
+ * bitmap table scan code to validate that the prefetch block stays ahead of
+ * the current block.
+ *
* Note, this is an optionally implemented function, therefore should only be
* used after verifying the presence (at plan time or such).
*/
static inline bool
table_scan_bitmap_next_block(TableScanDesc scan,
- struct TBMIterateResult *tbmres,
+ BlockNumber *blockno,
+ bool *recheck,
uint64 *lossy_pages,
uint64 *exact_pages)
{
@@ -1982,7 +1992,7 @@ table_scan_bitmap_next_block(TableScanDesc scan,
elog(ERROR, "unexpected table_scan_bitmap_next_block call during logical decoding");
return scan->rs_rd->rd_tableam->scan_bitmap_next_block(scan,
- tbmres,
+ blockno, recheck,
lossy_pages,
exact_pages);
}
@@ -1997,7 +2007,6 @@ table_scan_bitmap_next_block(TableScanDesc scan,
*/
static inline bool
table_scan_bitmap_next_tuple(TableScanDesc scan,
- struct TBMIterateResult *tbmres,
TupleTableSlot *slot)
{
/*
@@ -2009,7 +2018,6 @@ table_scan_bitmap_next_tuple(TableScanDesc scan,
elog(ERROR, "unexpected table_scan_bitmap_next_tuple call during logical decoding");
return scan->rs_rd->rd_tableam->scan_bitmap_next_tuple(scan,
- tbmres,
slot);
}
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index e4698a28c4f..b67d5186a2d 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1833,8 +1833,6 @@ typedef struct SharedBitmapHeapInstrumentation
*
* bitmapqualorig execution state for bitmapqualorig expressions
* tbm bitmap obtained from child index scan(s)
- * tbmiterator iterator for scanning current pages
- * tbmres current-page data
* pvmbuffer buffer for visibility-map lookups of prefetched pages
* stats execution statistics
* prefetch_iterator iterator for prefetching ahead of current page
@@ -1842,10 +1840,12 @@ typedef struct SharedBitmapHeapInstrumentation
* prefetch_target current target prefetch distance
* prefetch_maximum maximum value for prefetch_target
* initialized is node is ready to iterate
- * shared_tbmiterator shared iterator
* shared_prefetch_iterator shared iterator for prefetching
* pstate shared state for parallel bitmap scan
* sinstrument statistics for parallel workers
+ * recheck do current page's tuples need recheck
+ * blockno used to validate pf and current block stay in sync
+ * prefetch_blockno used to validate pf stays ahead of current block
* ----------------
*/
typedef struct BitmapHeapScanState
@@ -1853,8 +1853,6 @@ typedef struct BitmapHeapScanState
ScanState ss; /* its first field is NodeTag */
ExprState *bitmapqualorig;
TIDBitmap *tbm;
- TBMIterator *tbmiterator;
- TBMIterateResult *tbmres;
Buffer pvmbuffer;
BitmapHeapScanInstrumentation stats;
TBMIterator *prefetch_iterator;
@@ -1862,10 +1860,12 @@ typedef struct BitmapHeapScanState
int prefetch_target;
int prefetch_maximum;
bool initialized;
- TBMSharedIterator *shared_tbmiterator;
TBMSharedIterator *shared_prefetch_iterator;
ParallelBitmapHeapState *pstate;
SharedBitmapHeapInstrumentation *sinstrument;
+ bool recheck;
+ BlockNumber blockno;
+ BlockNumber prefetch_blockno;
} BitmapHeapScanState;
/* ----------------