aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/nbtree/nbtree.c
diff options
context:
space:
mode:
authorPeter Geoghegan <pg@bowt.ie>2024-11-08 13:10:10 -0500
committerPeter Geoghegan <pg@bowt.ie>2024-11-08 13:10:10 -0500
commitb5ee4e52026b03b19ba47cde27bed9cf740576cc (patch)
treeba868ef9228a936cd44f84e008cb8e674e3c5806 /src/backend/access/nbtree/nbtree.c
parent14e87ffa5c543b5f30ead7413084c25f7735039f (diff)
downloadpostgresql-b5ee4e52026b03b19ba47cde27bed9cf740576cc.tar.gz
postgresql-b5ee4e52026b03b19ba47cde27bed9cf740576cc.zip
Avoid nbtree parallel scan currPos confusion.
Commit 1bd4bc85, which refactored nbtree sibling link traversal, made _bt_parallel_seize reset the scan's currPos so that things were consistent with the state of a serial backend moving between pages. This overlooked the fact that _bt_readnextpage relied on the existing currPos state to decide when to end the scan -- even though it came from before the scan was seized. As a result of all this, parallel nbtree scans could needlessly behave like full index scans. To fix, teach _bt_readnextpage to explicitly allow the use of an already read page's so->currPos when deciding whether to end the scan -- even during parallel index scans (allow it consistently now). This requires moving _bt_readnextpage's seizure of the scan to earlier in its loop. That way _bt_readnextpage either deals with the true so->currPos state, or an initialized-by-_bt_parallel_seize currPos state set from when the scan was seized. Now _bt_steppage (the most important _bt_readnextpage caller) takes the same uniform approach to setting up its call using details taken from so->currPos -- regardless of whether the scan happens to be parallel or serial. The new loop structure in _bt_readnextpage is prone to getting confused by P_NONE blknos set when the rightmost or leftmost page was reached. We could avoid that by adding an explicit check, but that would be ugly. Avoid this problem by teaching _bt_parallel_seize to end the parallel scan instead of returning a P_NONE next block/blkno. Doing things this way was arguably a missed opportunity for commit 1bd4bc85. It allows us to remove a similar "blkno == P_NONE" check from _bt_first. Oversight in commit 1bd4bc85, which refactored sibling link traversal (as part of optimizing nbtree backward scan locking). Author: Peter Geoghegan <pg@bowt.ie> Reported-By: Masahiro Ikeda <ikedamsh@oss.nttdata.com> Diagnosed-By: Masahiro Ikeda <ikedamsh@oss.nttdata.com> Reviewed-By: Masahiro Ikeda <ikedamsh@oss.nttdata.com> Discussion: https://postgr.es/m/f8efb9c0f8d1a71b44fd7f8e42e49c25@oss.nttdata.com
Diffstat (limited to 'src/backend/access/nbtree/nbtree.c')
-rw-r--r--src/backend/access/nbtree/nbtree.c32
1 files changed, 24 insertions, 8 deletions
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 2919b12639d..dd76fe1da90 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -596,9 +596,7 @@ btparallelrescan(IndexScanDesc scan)
* scan, and *last_curr_page returns the page that *next_scan_page came from.
* An invalid *next_scan_page means the scan hasn't yet started, or that
* caller needs to start the next primitive index scan (if it's the latter
- * case we'll set so.needPrimScan). The first time a participating process
- * reaches the last page, it will return true and set *next_scan_page to
- * P_NONE; after that, further attempts to seize the scan will return false.
+ * case we'll set so.needPrimScan).
*
* Callers should ignore the value of *next_scan_page and *last_curr_page if
* the return value is false.
@@ -608,12 +606,13 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *next_scan_page,
BlockNumber *last_curr_page, bool first)
{
BTScanOpaque so = (BTScanOpaque) scan->opaque;
- bool exit_loop = false;
- bool status = true;
+ bool exit_loop = false,
+ status = true,
+ endscan = false;
ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
BTParallelScanDesc btscan;
- *next_scan_page = P_NONE;
+ *next_scan_page = InvalidBlockNumber;
*last_curr_page = InvalidBlockNumber;
/*
@@ -657,6 +656,13 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *next_scan_page,
/* We're done with this parallel index scan */
status = false;
}
+ else if (btscan->btps_pageStatus == BTPARALLEL_IDLE &&
+ btscan->btps_nextScanPage == P_NONE)
+ {
+ /* End this parallel index scan */
+ status = false;
+ endscan = true;
+ }
else if (btscan->btps_pageStatus == BTPARALLEL_NEED_PRIMSCAN)
{
Assert(so->numArrayKeys);
@@ -673,7 +679,6 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *next_scan_page,
array->cur_elem = btscan->btps_arrElems[i];
skey->sk_argument = array->elem_values[array->cur_elem];
}
- *next_scan_page = InvalidBlockNumber;
exit_loop = true;
}
else
@@ -701,6 +706,7 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *next_scan_page,
* of advancing it to a new page!
*/
btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
+ Assert(btscan->btps_nextScanPage != P_NONE);
*next_scan_page = btscan->btps_nextScanPage;
*last_curr_page = btscan->btps_lastCurrPage;
exit_loop = true;
@@ -712,6 +718,10 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *next_scan_page,
}
ConditionVariableCancelSleep();
+ /* When the scan has reached the rightmost (or leftmost) page, end it */
+ if (endscan)
+ _bt_parallel_done(scan);
+
return status;
}
@@ -724,6 +734,10 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *next_scan_page,
* that it can be passed to _bt_parallel_primscan_schedule, should caller
* determine that another primitive index scan is required.
*
+ * If caller's next_scan_page is P_NONE, the scan has reached the index's
+ * rightmost/leftmost page. This is treated as reaching the end of the scan
+ * within _bt_parallel_seize.
+ *
* Note: unlike the serial case, parallel scans don't need to remember both
* sibling links. next_scan_page is whichever link is next given the scan's
* direction. That's all we'll ever need, since the direction of a parallel
@@ -736,6 +750,8 @@ _bt_parallel_release(IndexScanDesc scan, BlockNumber next_scan_page,
ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
BTParallelScanDesc btscan;
+ Assert(BlockNumberIsValid(next_scan_page));
+
btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
parallel_scan->ps_offset);
@@ -770,7 +786,7 @@ _bt_parallel_done(IndexScanDesc scan)
/*
* Should not mark parallel scan done when there's still a pending
- * primitive index scan (defensive)
+ * primitive index scan
*/
if (so->needPrimScan)
return;