diff options
-rw-r--r-- | src/backend/access/heap/heapam.c | 8 | ||||
-rw-r--r-- | src/backend/access/heap/heapam_handler.c | 56 | ||||
-rw-r--r-- | src/backend/executor/nodeBitmapHeapscan.c | 246 | ||||
-rw-r--r-- | src/include/access/relscan.h | 28 | ||||
-rw-r--r-- | src/include/access/tableam.h | 64 | ||||
-rw-r--r-- | src/include/nodes/execnodes.h | 12 |
6 files changed, 252 insertions, 162 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 4c8febdc811..75ff9e7388f 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -1387,8 +1387,8 @@ heap_set_tidrange(TableScanDesc sscan, ItemPointer mintid, heap_setscanlimits(sscan, startBlk, numBlks); /* Finally, set the TID range in sscan */ - ItemPointerCopy(&lowestItem, &sscan->rs_mintid); - ItemPointerCopy(&highestItem, &sscan->rs_maxtid); + ItemPointerCopy(&lowestItem, &sscan->st.tidrange.rs_mintid); + ItemPointerCopy(&highestItem, &sscan->st.tidrange.rs_maxtid); } bool @@ -1396,8 +1396,8 @@ heap_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot) { HeapScanDesc scan = (HeapScanDesc) sscan; - ItemPointer mintid = &sscan->rs_mintid; - ItemPointer maxtid = &sscan->rs_maxtid; + ItemPointer mintid = &sscan->st.tidrange.rs_mintid; + ItemPointer maxtid = &sscan->st.tidrange.rs_maxtid; /* Note: no locking manipulations needed */ for (;;) diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 166aab7a93c..a8d95e0f1c1 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -2115,18 +2115,49 @@ heapam_estimate_rel_size(Relation rel, int32 *attr_widths, static bool heapam_scan_bitmap_next_block(TableScanDesc scan, - TBMIterateResult *tbmres, + BlockNumber *blockno, bool *recheck, uint64 *lossy_pages, uint64 *exact_pages) { HeapScanDesc hscan = (HeapScanDesc) scan; - BlockNumber block = tbmres->blockno; + BlockNumber block; Buffer buffer; Snapshot snapshot; int ntup; + TBMIterateResult *tbmres; hscan->rs_cindex = 0; hscan->rs_ntuples = 0; + *blockno = InvalidBlockNumber; + *recheck = true; + + do + { + CHECK_FOR_INTERRUPTS(); + + if (scan->st.bitmap.rs_shared_iterator) + tbmres = tbm_shared_iterate(scan->st.bitmap.rs_shared_iterator); + else + tbmres = tbm_iterate(scan->st.bitmap.rs_iterator); + + if (tbmres == NULL) + return false; + + /* + * Ignore any claimed entries past what we think is the end of the + * relation. It may have been extended after the start of our scan (we + * only hold an AccessShareLock, and it could be inserts from this + * backend). We don't take this optimization in SERIALIZABLE + * isolation though, as we need to examine all invisible tuples + * reachable by the index. + */ + } while (!IsolationIsSerializable() && + tbmres->blockno >= hscan->rs_nblocks); + + /* Got a valid block */ + *blockno = tbmres->blockno; + *recheck = tbmres->recheck; + /* * We can skip fetching the heap page if we don't need any fields from the * heap, the bitmap entries don't need rechecking, and all tuples on the @@ -2145,16 +2176,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, return true; } - /* - * Ignore any claimed entries past what we think is the end of the - * relation. It may have been extended after the start of our scan (we - * only hold an AccessShareLock, and it could be inserts from this - * backend). We don't take this optimization in SERIALIZABLE isolation - * though, as we need to examine all invisible tuples reachable by the - * index. - */ - if (!IsolationIsSerializable() && block >= hscan->rs_nblocks) - return false; + block = tbmres->blockno; /* * Acquire pin on the target heap page, trading in any pin we held before. @@ -2249,12 +2271,18 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, else (*lossy_pages)++; - return ntup > 0; + /* + * Return true to indicate that a valid block was found and the bitmap is + * not exhausted. If there are no visible tuples on this page, + * hscan->rs_ntuples will be 0 and heapam_scan_bitmap_next_tuple() will + * return false returning control to this function to advance to the next + * block in the bitmap. + */ + return true; } static bool heapam_scan_bitmap_next_tuple(TableScanDesc scan, - TBMIterateResult *tbmres, TupleTableSlot *slot) { HeapScanDesc hscan = (HeapScanDesc) scan; diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index f4690a20bb1..89a16f142b7 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -51,8 +51,7 @@ static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node); static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate); -static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, - BlockNumber blockno); +static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node); static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node); static inline void BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan); @@ -71,9 +70,6 @@ BitmapHeapNext(BitmapHeapScanState *node) ExprContext *econtext; TableScanDesc scan; TIDBitmap *tbm; - TBMIterator *tbmiterator = NULL; - TBMSharedIterator *shared_tbmiterator = NULL; - TBMIterateResult *tbmres; TupleTableSlot *slot; ParallelBitmapHeapState *pstate = node->pstate; dsa_area *dsa = node->ss.ps.state->es_query_dsa; @@ -85,11 +81,6 @@ BitmapHeapNext(BitmapHeapScanState *node) slot = node->ss.ss_ScanTupleSlot; scan = node->ss.ss_currentScanDesc; tbm = node->tbm; - if (pstate == NULL) - tbmiterator = node->tbmiterator; - else - shared_tbmiterator = node->shared_tbmiterator; - tbmres = node->tbmres; /* * If we haven't yet performed the underlying index scan, do it, and begin @@ -105,6 +96,9 @@ BitmapHeapNext(BitmapHeapScanState *node) */ if (!node->initialized) { + TBMIterator *tbmiterator = NULL; + TBMSharedIterator *shared_tbmiterator = NULL; + if (!pstate) { tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node)); @@ -113,8 +107,7 @@ BitmapHeapNext(BitmapHeapScanState *node) elog(ERROR, "unrecognized result from subplan"); node->tbm = tbm; - node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm); - node->tbmres = tbmres = NULL; + tbmiterator = tbm_begin_iterate(tbm); #ifdef USE_PREFETCH if (node->prefetch_maximum > 0) @@ -166,9 +159,8 @@ BitmapHeapNext(BitmapHeapScanState *node) } /* Allocate a private iterator and attach the shared state to it */ - node->shared_tbmiterator = shared_tbmiterator = + shared_tbmiterator = tbm_attach_shared_iterate(dsa, pstate->tbmiterator); - node->tbmres = tbmres = NULL; #ifdef USE_PREFETCH if (node->prefetch_maximum > 0) @@ -207,47 +199,23 @@ BitmapHeapNext(BitmapHeapScanState *node) node->ss.ss_currentScanDesc = scan; } + scan->st.bitmap.rs_iterator = tbmiterator; + scan->st.bitmap.rs_shared_iterator = shared_tbmiterator; node->initialized = true; + + goto new_page; } for (;;) { - CHECK_FOR_INTERRUPTS(); - - /* - * Get next page of results if needed - */ - if (tbmres == NULL) - { - if (!pstate) - node->tbmres = tbmres = tbm_iterate(tbmiterator); - else - node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator); - if (tbmres == NULL) - { - /* no more entries in the bitmap */ - break; - } - - BitmapAdjustPrefetchIterator(node, tbmres->blockno); - - if (!table_scan_bitmap_next_block(scan, tbmres, - &node->stats.lossy_pages, - &node->stats.exact_pages)) - { - /* AM doesn't think this block is valid, skip */ - continue; - } - - /* Adjust the prefetch target */ - BitmapAdjustPrefetchTarget(node); - } - else + while (table_scan_bitmap_next_tuple(scan, slot)) { /* * Continuing in previously obtained page. */ + CHECK_FOR_INTERRUPTS(); + #ifdef USE_PREFETCH /* @@ -268,45 +236,64 @@ BitmapHeapNext(BitmapHeapScanState *node) SpinLockRelease(&pstate->mutex); } #endif /* USE_PREFETCH */ + + /* + * We issue prefetch requests *after* fetching the current page to + * try to avoid having prefetching interfere with the main I/O. + * Also, this should happen only when we have determined there is + * still something to do on the current page, else we may + * uselessly prefetch the same page we are just about to request + * for real. + */ + BitmapPrefetch(node, scan); + + /* + * If we are using lossy info, we have to recheck the qual + * conditions at every tuple. + */ + if (node->recheck) + { + econtext->ecxt_scantuple = slot; + if (!ExecQualAndReset(node->bitmapqualorig, econtext)) + { + /* Fails recheck, so drop it and loop back for another */ + InstrCountFiltered2(node, 1); + ExecClearTuple(slot); + continue; + } + } + + /* OK to return this tuple */ + return slot; } - /* - * We issue prefetch requests *after* fetching the current page to try - * to avoid having prefetching interfere with the main I/O. Also, this - * should happen only when we have determined there is still something - * to do on the current page, else we may uselessly prefetch the same - * page we are just about to request for real. - */ - BitmapPrefetch(node, scan); +new_page: + + BitmapAdjustPrefetchIterator(node); /* - * Attempt to fetch tuple from AM. + * Returns false if the bitmap is exhausted and there are no further + * blocks we need to scan. */ - if (!table_scan_bitmap_next_tuple(scan, tbmres, slot)) - { - /* nothing more to look at on this page */ - node->tbmres = tbmres = NULL; - continue; - } + if (!table_scan_bitmap_next_block(scan, &node->blockno, + &node->recheck, + &node->stats.lossy_pages, + &node->stats.exact_pages)) + break; /* - * If we are using lossy info, we have to recheck the qual conditions - * at every tuple. + * If serial, we can error out if the the prefetch block doesn't stay + * ahead of the current block. */ - if (tbmres->recheck) - { - econtext->ecxt_scantuple = slot; - if (!ExecQualAndReset(node->bitmapqualorig, econtext)) - { - /* Fails recheck, so drop it and loop back for another */ - InstrCountFiltered2(node, 1); - ExecClearTuple(slot); - continue; - } - } - - /* OK to return this tuple */ - return slot; + if (node->pstate == NULL && + node->prefetch_iterator && + node->prefetch_blockno < node->blockno) + elog(ERROR, + "prefetch and main iterators are out of sync. pfblockno: %d. blockno: %d", + node->prefetch_blockno, node->blockno); + + /* Adjust the prefetch target */ + BitmapAdjustPrefetchTarget(node); } /* @@ -332,13 +319,17 @@ BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate) /* * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator + * + * We keep track of how far the prefetch iterator is ahead of the main + * iterator in prefetch_pages. For each block the main iterator returns, we + * decrement prefetch_pages. */ static inline void -BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, - BlockNumber blockno) +BitmapAdjustPrefetchIterator(BitmapHeapScanState *node) { #ifdef USE_PREFETCH ParallelBitmapHeapState *pstate = node->pstate; + TBMIterateResult *tbmpre; if (pstate == NULL) { @@ -351,15 +342,22 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, } else if (prefetch_iterator) { - /* Do not let the prefetch iterator get behind the main one */ - TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator); - - if (tbmpre == NULL || tbmpre->blockno != blockno) - elog(ERROR, "prefetch and main iterators are out of sync"); + tbmpre = tbm_iterate(prefetch_iterator); + node->prefetch_blockno = tbmpre ? tbmpre->blockno : + InvalidBlockNumber; } return; } + /* + * XXX: There is a known issue with keeping the prefetch and current block + * iterators in sync for parallel bitmap table scans. This can lead to + * prefetching blocks that have already been read. See the discussion + * here: + * https://postgr.es/m/20240315211449.en2jcmdqxv5o6tlz%40alap3.anarazel.de + * Note that moving the call site of BitmapAdjustPrefetchIterator() + * exacerbates the effects of this bug. + */ if (node->prefetch_maximum > 0) { TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator; @@ -384,7 +382,11 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, * case. */ if (prefetch_iterator) - tbm_shared_iterate(prefetch_iterator); + { + tbmpre = tbm_shared_iterate(prefetch_iterator); + node->prefetch_blockno = tbmpre ? tbmpre->blockno : + InvalidBlockNumber; + } } } #endif /* USE_PREFETCH */ @@ -462,6 +464,7 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan) break; } node->prefetch_pages++; + node->prefetch_blockno = tbmpre->blockno; /* * If we expect not to have to actually read this heap page, @@ -519,6 +522,8 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan) break; } + node->prefetch_blockno = tbmpre->blockno; + /* As above, skip prefetch if we expect not to need page */ skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) && !tbmpre->recheck && @@ -575,17 +580,32 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node) { PlanState *outerPlan = outerPlanState(node); - /* rescan to release any page pin */ - if (node->ss.ss_currentScanDesc) + TableScanDesc scan = node->ss.ss_currentScanDesc; + + if (scan) + { + /* + * End iteration on iterators saved in scan descriptor. + */ + if (scan->st.bitmap.rs_shared_iterator) + { + tbm_end_shared_iterate(scan->st.bitmap.rs_shared_iterator); + scan->st.bitmap.rs_shared_iterator = NULL; + } + + if (scan->st.bitmap.rs_iterator) + { + tbm_end_iterate(scan->st.bitmap.rs_iterator); + scan->st.bitmap.rs_iterator = NULL; + } + + /* rescan to release any page pin */ table_rescan(node->ss.ss_currentScanDesc, NULL); + } /* release bitmaps and buffers if any */ - if (node->tbmiterator) - tbm_end_iterate(node->tbmiterator); if (node->prefetch_iterator) tbm_end_iterate(node->prefetch_iterator); - if (node->shared_tbmiterator) - tbm_end_shared_iterate(node->shared_tbmiterator); if (node->shared_prefetch_iterator) tbm_end_shared_iterate(node->shared_prefetch_iterator); if (node->tbm) @@ -593,13 +613,13 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node) if (node->pvmbuffer != InvalidBuffer) ReleaseBuffer(node->pvmbuffer); node->tbm = NULL; - node->tbmiterator = NULL; - node->tbmres = NULL; node->prefetch_iterator = NULL; node->initialized = false; - node->shared_tbmiterator = NULL; node->shared_prefetch_iterator = NULL; node->pvmbuffer = InvalidBuffer; + node->recheck = true; + node->blockno = InvalidBlockNumber; + node->prefetch_blockno = InvalidBlockNumber; ExecScanReScan(&node->ss); @@ -653,28 +673,40 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node) */ ExecEndNode(outerPlanState(node)); + if (scanDesc) + { + /* + * End iteration on iterators saved in scan descriptor. + */ + if (scanDesc->st.bitmap.rs_shared_iterator) + { + tbm_end_shared_iterate(scanDesc->st.bitmap.rs_shared_iterator); + scanDesc->st.bitmap.rs_shared_iterator = NULL; + } + + if (scanDesc->st.bitmap.rs_iterator) + { + tbm_end_iterate(scanDesc->st.bitmap.rs_iterator); + scanDesc->st.bitmap.rs_iterator = NULL; + } + + /* + * close table scan + */ + table_endscan(scanDesc); + } + /* * release bitmaps and buffers if any */ - if (node->tbmiterator) - tbm_end_iterate(node->tbmiterator); if (node->prefetch_iterator) tbm_end_iterate(node->prefetch_iterator); if (node->tbm) tbm_free(node->tbm); - if (node->shared_tbmiterator) - tbm_end_shared_iterate(node->shared_tbmiterator); if (node->shared_prefetch_iterator) tbm_end_shared_iterate(node->shared_prefetch_iterator); if (node->pvmbuffer != InvalidBuffer) ReleaseBuffer(node->pvmbuffer); - - /* - * close heap scan - */ - if (scanDesc) - table_endscan(scanDesc); - } /* ---------------------------------------------------------------- @@ -707,8 +739,6 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan; scanstate->tbm = NULL; - scanstate->tbmiterator = NULL; - scanstate->tbmres = NULL; scanstate->pvmbuffer = InvalidBuffer; /* Zero the statistics counters */ @@ -718,9 +748,11 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) scanstate->prefetch_pages = 0; scanstate->prefetch_target = 0; scanstate->initialized = false; - scanstate->shared_tbmiterator = NULL; scanstate->shared_prefetch_iterator = NULL; scanstate->pstate = NULL; + scanstate->recheck = true; + scanstate->blockno = InvalidBlockNumber; + scanstate->prefetch_blockno = InvalidBlockNumber; /* * Miscellaneous initialization diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index 114a85dc47c..e1884acf493 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -25,6 +25,9 @@ struct ParallelTableScanDescData; +struct TBMIterator; +struct TBMSharedIterator; + /* * Generic descriptor for table scans. This is the base-class for table scans, * which needs to be embedded in the scans of individual AMs. @@ -37,9 +40,28 @@ typedef struct TableScanDescData int rs_nkeys; /* number of scan keys */ struct ScanKeyData *rs_key; /* array of scan key descriptors */ - /* Range of ItemPointers for table_scan_getnextslot_tidrange() to scan. */ - ItemPointerData rs_mintid; - ItemPointerData rs_maxtid; + /* + * Scan type-specific members + */ + union + { + /* Iterators for Bitmap Table Scans */ + struct + { + struct TBMIterator *rs_iterator; + struct TBMSharedIterator *rs_shared_iterator; + } bitmap; + + /* + * Range of ItemPointers for table_scan_getnextslot_tidrange() to + * scan. + */ + struct + { + ItemPointerData rs_mintid; + ItemPointerData rs_maxtid; + } tidrange; + } st; /* * Information about type and behaviour of the scan, a bitmask of members diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index be09d180d45..adb478a93ca 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -36,7 +36,6 @@ extern PGDLLIMPORT bool synchronize_seqscans; struct BulkInsertStateData; struct IndexInfo; struct SampleScanState; -struct TBMIterateResult; struct VacuumParams; struct ValidateIndexState; @@ -780,26 +779,29 @@ typedef struct TableAmRoutine */ /* - * Prepare to fetch / check / return tuples from `tbmres->blockno` as part - * of a bitmap table scan. `scan` was started via table_beginscan_bm(). - * Return false if there are no tuples to be found on the page, true - * otherwise. + * Prepare to fetch / check / return tuples from `blockno` as part of a + * bitmap table scan. `scan` was started via table_beginscan_bm(). Return + * false if the bitmap is exhausted and true otherwise. * * This will typically read and pin the target block, and do the necessary * work to allow scan_bitmap_next_tuple() to return tuples (e.g. it might - * make sense to perform tuple visibility checks at this time). For some - * AMs it will make more sense to do all the work referencing `tbmres` - * contents here, for others it might be better to defer more work to - * scan_bitmap_next_tuple. - * - * If `tbmres->blockno` is -1, this is a lossy scan and all visible tuples - * on the page have to be returned, otherwise the tuples at offsets in - * `tbmres->offsets` need to be returned. + * make sense to perform tuple visibility checks at this time). * * `lossy_pages` and `exact_pages` are EXPLAIN counters that can be * incremented by the table AM to indicate whether or not the block's * representation in the bitmap is lossy. * + * `recheck` is set by the table AM to indicate whether or not the tuples + * from this block should be rechecked. Tuples from lossy pages will + * always need to be rechecked, but some non-lossy pages' tuples may also + * require recheck. + * + * `blockno` is the current block and is set by the table AM. The table AM + * is responsible for advancing the main iterator, but the bitmap table + * scan code still advances the prefetch iterator. `blockno` is used by + * bitmap table scan code to validate that the prefetch block stays ahead + * of the current block. + * * XXX: Currently this may only be implemented if the AM uses md.c as its * storage manager, and uses ItemPointer->ip_blkid in a manner that maps * blockids directly to the underlying storage. nodeBitmapHeapscan.c @@ -815,7 +817,8 @@ typedef struct TableAmRoutine * scan_bitmap_next_tuple need to exist, or neither. */ bool (*scan_bitmap_next_block) (TableScanDesc scan, - struct TBMIterateResult *tbmres, + BlockNumber *blockno, + bool *recheck, uint64 *lossy_pages, uint64 *exact_pages); @@ -823,15 +826,10 @@ typedef struct TableAmRoutine * Fetch the next tuple of a bitmap table scan into `slot` and return true * if a visible tuple was found, false otherwise. * - * For some AMs it will make more sense to do all the work referencing - * `tbmres` contents in scan_bitmap_next_block, for others it might be - * better to defer more work to this callback. - * * Optional callback, but either both scan_bitmap_next_block and * scan_bitmap_next_tuple need to exist, or neither. */ bool (*scan_bitmap_next_tuple) (TableScanDesc scan, - struct TBMIterateResult *tbmres, TupleTableSlot *slot); /* @@ -959,12 +957,17 @@ static inline TableScanDesc table_beginscan_bm(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool need_tuple) { + TableScanDesc result; uint32 flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE; if (need_tuple) flags |= SO_NEED_TUPLES; - return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags); + result = rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, + NULL, flags); + result->st.bitmap.rs_shared_iterator = NULL; + result->st.bitmap.rs_iterator = NULL; + return result; } /* @@ -1955,21 +1958,28 @@ table_relation_estimate_size(Relation rel, int32 *attr_widths, */ /* - * Prepare to fetch / check / return tuples from `tbmres->blockno` as part of - * a bitmap table scan. `scan` needs to have been started via - * table_beginscan_bm(). Returns false if there are no tuples to be found on - * the page, true otherwise. + * Prepare to fetch / check / return tuples as part of a bitmap table scan. + * `scan` needs to have been started via table_beginscan_bm(). Returns false + * if there are no more blocks in the bitmap, true otherwise. * * `lossy_pages` and `exact_pages` are EXPLAIN counters that can be * incremented by the table AM to indicate whether or not the block's * representation in the bitmap is lossy. * + * `recheck` is set by the table AM to indicate whether or not the tuples + * from this block should be rechecked. + * + * `blockno` is the current block and is set by the table AM and is used by + * bitmap table scan code to validate that the prefetch block stays ahead of + * the current block. + * * Note, this is an optionally implemented function, therefore should only be * used after verifying the presence (at plan time or such). */ static inline bool table_scan_bitmap_next_block(TableScanDesc scan, - struct TBMIterateResult *tbmres, + BlockNumber *blockno, + bool *recheck, uint64 *lossy_pages, uint64 *exact_pages) { @@ -1982,7 +1992,7 @@ table_scan_bitmap_next_block(TableScanDesc scan, elog(ERROR, "unexpected table_scan_bitmap_next_block call during logical decoding"); return scan->rs_rd->rd_tableam->scan_bitmap_next_block(scan, - tbmres, + blockno, recheck, lossy_pages, exact_pages); } @@ -1997,7 +2007,6 @@ table_scan_bitmap_next_block(TableScanDesc scan, */ static inline bool table_scan_bitmap_next_tuple(TableScanDesc scan, - struct TBMIterateResult *tbmres, TupleTableSlot *slot) { /* @@ -2009,7 +2018,6 @@ table_scan_bitmap_next_tuple(TableScanDesc scan, elog(ERROR, "unexpected table_scan_bitmap_next_tuple call during logical decoding"); return scan->rs_rd->rd_tableam->scan_bitmap_next_tuple(scan, - tbmres, slot); } diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index e4698a28c4f..b67d5186a2d 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1833,8 +1833,6 @@ typedef struct SharedBitmapHeapInstrumentation * * bitmapqualorig execution state for bitmapqualorig expressions * tbm bitmap obtained from child index scan(s) - * tbmiterator iterator for scanning current pages - * tbmres current-page data * pvmbuffer buffer for visibility-map lookups of prefetched pages * stats execution statistics * prefetch_iterator iterator for prefetching ahead of current page @@ -1842,10 +1840,12 @@ typedef struct SharedBitmapHeapInstrumentation * prefetch_target current target prefetch distance * prefetch_maximum maximum value for prefetch_target * initialized is node is ready to iterate - * shared_tbmiterator shared iterator * shared_prefetch_iterator shared iterator for prefetching * pstate shared state for parallel bitmap scan * sinstrument statistics for parallel workers + * recheck do current page's tuples need recheck + * blockno used to validate pf and current block stay in sync + * prefetch_blockno used to validate pf stays ahead of current block * ---------------- */ typedef struct BitmapHeapScanState @@ -1853,8 +1853,6 @@ typedef struct BitmapHeapScanState ScanState ss; /* its first field is NodeTag */ ExprState *bitmapqualorig; TIDBitmap *tbm; - TBMIterator *tbmiterator; - TBMIterateResult *tbmres; Buffer pvmbuffer; BitmapHeapScanInstrumentation stats; TBMIterator *prefetch_iterator; @@ -1862,10 +1860,12 @@ typedef struct BitmapHeapScanState int prefetch_target; int prefetch_maximum; bool initialized; - TBMSharedIterator *shared_tbmiterator; TBMSharedIterator *shared_prefetch_iterator; ParallelBitmapHeapState *pstate; SharedBitmapHeapInstrumentation *sinstrument; + bool recheck; + BlockNumber blockno; + BlockNumber prefetch_blockno; } BitmapHeapScanState; /* ---------------- |