diff options
Diffstat (limited to 'src/backend/executor/nodeBitmapHeapscan.c')
-rw-r--r-- | src/backend/executor/nodeBitmapHeapscan.c | 341 |
1 files changed, 1 insertions, 340 deletions
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index 3b4ea0f6144..6df34094a13 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -51,10 +51,6 @@ static void BitmapTableScanSetup(BitmapHeapScanState *node); static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node); static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate); -static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node); -static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node); -static inline void BitmapPrefetch(BitmapHeapScanState *node, - TableScanDesc scan); static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate); @@ -62,14 +58,6 @@ static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate); * Do the underlying index scan, build the bitmap, set up the parallel state * needed for parallel workers to iterate through the bitmap, and set up the * underlying table scan descriptor. - * - * For prefetching, we use *two* iterators, one for the pages we are actually - * scanning and another that runs ahead of the first for prefetching. - * node->prefetch_pages tracks exactly how many pages ahead the prefetch - * iterator is. Also, node->prefetch_target tracks the desired prefetch - * distance, which starts small and increases up to the - * node->prefetch_maximum. This is to avoid doing a lot of prefetching in a - * scan that stops after a few tuples because of a LIMIT. */ static void BitmapTableScanSetup(BitmapHeapScanState *node) @@ -102,14 +90,6 @@ BitmapTableScanSetup(BitmapHeapScanState *node) */ pstate->tbmiterator = tbm_prepare_shared_iterate(node->tbm); -#ifdef USE_PREFETCH - if (node->prefetch_maximum > 0) - { - pstate->prefetch_iterator = - tbm_prepare_shared_iterate(node->tbm); - } -#endif /* USE_PREFETCH */ - /* We have initialized the shared state so wake up others. */ BitmapDoneInitializingSharedState(pstate); } @@ -119,15 +99,6 @@ BitmapTableScanSetup(BitmapHeapScanState *node) pstate->tbmiterator : InvalidDsaPointer); -#ifdef USE_PREFETCH - if (node->prefetch_maximum > 0) - node->prefetch_iterator = - tbm_begin_iterate(node->tbm, dsa, - pstate ? - pstate->prefetch_iterator : - InvalidDsaPointer); -#endif /* USE_PREFETCH */ - /* * If this is the first scan of the underlying table, create the table * scan descriptor and begin the scan. @@ -158,7 +129,6 @@ BitmapTableScanSetup(BitmapHeapScanState *node) node->initialized = true; } - /* ---------------------------------------------------------------- * BitmapHeapNext * @@ -172,10 +142,6 @@ BitmapHeapNext(BitmapHeapScanState *node) TableScanDesc scan; TupleTableSlot *slot; -#ifdef USE_PREFETCH - ParallelBitmapHeapState *pstate = node->pstate; -#endif - /* * extract necessary information from index scan node */ @@ -204,37 +170,6 @@ BitmapHeapNext(BitmapHeapScanState *node) CHECK_FOR_INTERRUPTS(); -#ifdef USE_PREFETCH - - /* - * Try to prefetch at least a few pages even before we get to the - * second page if we don't stop reading after the first tuple. - */ - if (!pstate) - { - if (node->prefetch_target < node->prefetch_maximum) - node->prefetch_target++; - } - else if (pstate->prefetch_target < node->prefetch_maximum) - { - /* take spinlock while updating shared state */ - SpinLockAcquire(&pstate->mutex); - if (pstate->prefetch_target < node->prefetch_maximum) - pstate->prefetch_target++; - SpinLockRelease(&pstate->mutex); - } -#endif /* USE_PREFETCH */ - - /* - * We issue prefetch requests *after* fetching the current page to - * try to avoid having prefetching interfere with the main I/O. - * Also, this should happen only when we have determined there is - * still something to do on the current page, else we may - * uselessly prefetch the same page we are just about to request - * for real. - */ - BitmapPrefetch(node, scan); - /* * If we are using lossy info, we have to recheck the qual * conditions at every tuple. @@ -257,31 +192,15 @@ BitmapHeapNext(BitmapHeapScanState *node) new_page: - BitmapAdjustPrefetchIterator(node); - /* * Returns false if the bitmap is exhausted and there are no further * blocks we need to scan. */ - if (!table_scan_bitmap_next_block(scan, &node->blockno, + if (!table_scan_bitmap_next_block(scan, &node->recheck, &node->stats.lossy_pages, &node->stats.exact_pages)) break; - - /* - * If serial, we can error out if the prefetch block doesn't stay - * ahead of the current block. - */ - if (node->pstate == NULL && - !tbm_exhausted(&node->prefetch_iterator) && - node->prefetch_blockno < node->blockno) - elog(ERROR, - "prefetch and main iterators are out of sync. pfblockno: %d. blockno: %d", - node->prefetch_blockno, node->blockno); - - /* Adjust the prefetch target */ - BitmapAdjustPrefetchTarget(node); } /* @@ -306,225 +225,6 @@ BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate) } /* - * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator - * - * We keep track of how far the prefetch iterator is ahead of the main - * iterator in prefetch_pages. For each block the main iterator returns, we - * decrement prefetch_pages. - */ -static inline void -BitmapAdjustPrefetchIterator(BitmapHeapScanState *node) -{ -#ifdef USE_PREFETCH - ParallelBitmapHeapState *pstate = node->pstate; - TBMIterateResult tbmpre; - - if (pstate == NULL) - { - TBMIterator *prefetch_iterator = &node->prefetch_iterator; - - if (node->prefetch_pages > 0) - { - /* The main iterator has closed the distance by one page */ - node->prefetch_pages--; - } - else if (!tbm_exhausted(prefetch_iterator)) - { - tbm_iterate(prefetch_iterator, &tbmpre); - node->prefetch_blockno = tbmpre.blockno; - } - return; - } - - /* - * XXX: There is a known issue with keeping the prefetch and current block - * iterators in sync for parallel bitmap table scans. This can lead to - * prefetching blocks that have already been read. See the discussion - * here: - * https://postgr.es/m/20240315211449.en2jcmdqxv5o6tlz%40alap3.anarazel.de - * Note that moving the call site of BitmapAdjustPrefetchIterator() - * exacerbates the effects of this bug. - */ - if (node->prefetch_maximum > 0) - { - TBMIterator *prefetch_iterator = &node->prefetch_iterator; - - SpinLockAcquire(&pstate->mutex); - if (pstate->prefetch_pages > 0) - { - pstate->prefetch_pages--; - SpinLockRelease(&pstate->mutex); - } - else - { - /* Release the mutex before iterating */ - SpinLockRelease(&pstate->mutex); - - /* - * In case of shared mode, we can not ensure that the current - * blockno of the main iterator and that of the prefetch iterator - * are same. It's possible that whatever blockno we are - * prefetching will be processed by another process. Therefore, - * we don't validate the blockno here as we do in non-parallel - * case. - */ - if (!tbm_exhausted(prefetch_iterator)) - { - tbm_iterate(prefetch_iterator, &tbmpre); - node->prefetch_blockno = tbmpre.blockno; - } - } - } -#endif /* USE_PREFETCH */ -} - -/* - * BitmapAdjustPrefetchTarget - Adjust the prefetch target - * - * Increase prefetch target if it's not yet at the max. Note that - * we will increase it to zero after fetching the very first - * page/tuple, then to one after the second tuple is fetched, then - * it doubles as later pages are fetched. - */ -static inline void -BitmapAdjustPrefetchTarget(BitmapHeapScanState *node) -{ -#ifdef USE_PREFETCH - ParallelBitmapHeapState *pstate = node->pstate; - - if (pstate == NULL) - { - if (node->prefetch_target >= node->prefetch_maximum) - /* don't increase any further */ ; - else if (node->prefetch_target >= node->prefetch_maximum / 2) - node->prefetch_target = node->prefetch_maximum; - else if (node->prefetch_target > 0) - node->prefetch_target *= 2; - else - node->prefetch_target++; - return; - } - - /* Do an unlocked check first to save spinlock acquisitions. */ - if (pstate->prefetch_target < node->prefetch_maximum) - { - SpinLockAcquire(&pstate->mutex); - if (pstate->prefetch_target >= node->prefetch_maximum) - /* don't increase any further */ ; - else if (pstate->prefetch_target >= node->prefetch_maximum / 2) - pstate->prefetch_target = node->prefetch_maximum; - else if (pstate->prefetch_target > 0) - pstate->prefetch_target *= 2; - else - pstate->prefetch_target++; - SpinLockRelease(&pstate->mutex); - } -#endif /* USE_PREFETCH */ -} - -/* - * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target - */ -static inline void -BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan) -{ -#ifdef USE_PREFETCH - ParallelBitmapHeapState *pstate = node->pstate; - - if (pstate == NULL) - { - TBMIterator *prefetch_iterator = &node->prefetch_iterator; - - if (!tbm_exhausted(prefetch_iterator)) - { - while (node->prefetch_pages < node->prefetch_target) - { - TBMIterateResult tbmpre; - bool skip_fetch; - - if (!tbm_iterate(prefetch_iterator, &tbmpre)) - { - /* No more pages to prefetch */ - Assert(!BlockNumberIsValid(tbmpre.blockno)); - tbm_end_iterate(prefetch_iterator); - break; - } - node->prefetch_pages++; - node->prefetch_blockno = tbmpre.blockno; - - /* - * If we expect not to have to actually read this heap page, - * skip this prefetch call, but continue to run the prefetch - * logic normally. (Would it be better not to increment - * prefetch_pages?) - */ - skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) && - !tbmpre.recheck && - VM_ALL_VISIBLE(node->ss.ss_currentRelation, - tbmpre.blockno, - &node->pvmbuffer)); - - if (!skip_fetch) - PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre.blockno); - } - } - - return; - } - - if (pstate->prefetch_pages < pstate->prefetch_target) - { - TBMIterator *prefetch_iterator = &node->prefetch_iterator; - - if (!tbm_exhausted(prefetch_iterator)) - { - while (1) - { - TBMIterateResult tbmpre; - bool do_prefetch = false; - bool skip_fetch; - - /* - * Recheck under the mutex. If some other process has already - * done enough prefetching then we need not to do anything. - */ - SpinLockAcquire(&pstate->mutex); - if (pstate->prefetch_pages < pstate->prefetch_target) - { - pstate->prefetch_pages++; - do_prefetch = true; - } - SpinLockRelease(&pstate->mutex); - - if (!do_prefetch) - return; - - if (!tbm_iterate(prefetch_iterator, &tbmpre)) - { - Assert(!BlockNumberIsValid(tbmpre.blockno)); - /* No more pages to prefetch */ - tbm_end_iterate(prefetch_iterator); - break; - } - - node->prefetch_blockno = tbmpre.blockno; - - /* As above, skip prefetch if we expect not to need page */ - skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) && - !tbmpre.recheck && - VM_ALL_VISIBLE(node->ss.ss_currentRelation, - tbmpre.blockno, - &node->pvmbuffer)); - - if (!skip_fetch) - PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre.blockno); - } - } - } -#endif /* USE_PREFETCH */ -} - -/* * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual */ static bool @@ -580,24 +280,12 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node) table_rescan(node->ss.ss_currentScanDesc, NULL); } - /* If we did not already clean up the prefetch iterator, do so now. */ - if (!tbm_exhausted(&node->prefetch_iterator)) - tbm_end_iterate(&node->prefetch_iterator); - /* release bitmaps and buffers if any */ if (node->tbm) tbm_free(node->tbm); - if (node->pvmbuffer != InvalidBuffer) - ReleaseBuffer(node->pvmbuffer); node->tbm = NULL; node->initialized = false; - node->pvmbuffer = InvalidBuffer; node->recheck = true; - /* Only used for serial BHS */ - node->blockno = InvalidBlockNumber; - node->prefetch_blockno = InvalidBlockNumber; - node->prefetch_pages = 0; - node->prefetch_target = -1; ExecScanReScan(&node->ss); @@ -666,17 +354,11 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node) table_endscan(scanDesc); } - /* If we did not already clean up the prefetch iterator, do so now. */ - if (!tbm_exhausted(&node->prefetch_iterator)) - tbm_end_iterate(&node->prefetch_iterator); - /* * release bitmaps and buffers if any */ if (node->tbm) tbm_free(node->tbm); - if (node->pvmbuffer != InvalidBuffer) - ReleaseBuffer(node->pvmbuffer); } /* ---------------------------------------------------------------- @@ -709,18 +391,13 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan; scanstate->tbm = NULL; - scanstate->pvmbuffer = InvalidBuffer; /* Zero the statistics counters */ memset(&scanstate->stats, 0, sizeof(BitmapHeapScanInstrumentation)); - scanstate->prefetch_pages = 0; - scanstate->prefetch_target = -1; scanstate->initialized = false; scanstate->pstate = NULL; scanstate->recheck = true; - scanstate->blockno = InvalidBlockNumber; - scanstate->prefetch_blockno = InvalidBlockNumber; /* * Miscellaneous initialization @@ -760,13 +437,6 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) scanstate->bitmapqualorig = ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate); - /* - * Maximum number of prefetches for the tablespace if configured, - * otherwise the current value of the effective_io_concurrency GUC. - */ - scanstate->prefetch_maximum = - get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace); - scanstate->ss.ss_currentRelation = currentRelation; /* @@ -870,12 +540,9 @@ ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node, sinstrument = (SharedBitmapHeapInstrumentation *) ptr; pstate->tbmiterator = 0; - pstate->prefetch_iterator = 0; /* Initialize the mutex */ SpinLockInit(&pstate->mutex); - pstate->prefetch_pages = 0; - pstate->prefetch_target = -1; pstate->state = BM_INITIAL; ConditionVariableInit(&pstate->cv); @@ -912,17 +579,11 @@ ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node, return; pstate->state = BM_INITIAL; - pstate->prefetch_pages = 0; - pstate->prefetch_target = -1; if (DsaPointerIsValid(pstate->tbmiterator)) tbm_free_shared_area(dsa, pstate->tbmiterator); - if (DsaPointerIsValid(pstate->prefetch_iterator)) - tbm_free_shared_area(dsa, pstate->prefetch_iterator); - pstate->tbmiterator = InvalidDsaPointer; - pstate->prefetch_iterator = InvalidDsaPointer; } /* ---------------------------------------------------------------- |