diff options
author | Peter Geoghegan <pg@bowt.ie> | 2025-05-07 15:20:42 -0400 |
---|---|---|
committer | Peter Geoghegan <pg@bowt.ie> | 2025-05-07 15:20:42 -0400 |
commit | 5f4d98d4f3718cf7a0597c37f3ee4a4485de6ef8 (patch) | |
tree | b635f978d9fb77102b8a7b67d249dc3e7c1f46c9 | |
parent | 7e25c9363a82b6974c1ca2303ae8ded98af3bb24 (diff) | |
download | postgresql-5f4d98d4f3718cf7a0597c37f3ee4a4485de6ef8.tar.gz postgresql-5f4d98d4f3718cf7a0597c37f3ee4a4485de6ef8.zip |
Prevent premature nbtree array advancement.
nbtree array index scans could fail to return matching tuples in rare
cases where the missed tuples cover key space that the scan's arrays
incorrectly indicate has already been read. These cases involved nearby
tuples with NULL values that were evaluated using a skip array key while
in pstate.forcenonrequired mode.
To fix, prevent forcenonrequired mode from prematurely advancing the
scan's array keys beyond key space that the scan has yet to read tuples
from: reset the scan's array keys (to the first elements in the current
scan direction) before the _bt_checkkeys call for pstate.finaltup. That
way _bt_checkkeys starts from a clean slate, which ensures that it will
call _bt_advance_array_keys (while passing it sktrig_required=true).
This reliably restores the invariant that the scan's arrays always
accurately track its progress through the index's key space (at least
when the scan is "between pages").
Oversight in commit 8a510275, which optimized nbtree search scan key
comparisons.
Author: Peter Geoghegan <pg@bowt.ie>
Reviewed-By: Mark Dilger <mark.dilger@enterprisedb.com>
Discussion: https://postgr.es/m/CAH2-WzmodSE+gpTd1CRGU9ez8ytyyDS+Kns2r9NzgUp1s56kpw@mail.gmail.com
-rw-r--r-- | src/backend/access/nbtree/nbtsearch.c | 8 | ||||
-rw-r--r-- | src/backend/access/nbtree/nbtutils.c | 42 |
2 files changed, 39 insertions, 11 deletions
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 77264ddeecb..fe9a3886913 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -1790,9 +1790,13 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum, IndexTuple itup = (IndexTuple) PageGetItem(page, iid); int truncatt; - truncatt = BTreeTupleGetNAtts(itup, rel); + /* Reset arrays, per _bt_set_startikey contract */ + if (pstate.forcenonrequired) + _bt_start_array_keys(scan, dir); pstate.forcenonrequired = false; pstate.startikey = 0; /* _bt_set_startikey ignores P_HIKEY */ + + truncatt = BTreeTupleGetNAtts(itup, rel); _bt_checkkeys(scan, &pstate, arrayKeys, itup, truncatt); } @@ -1879,8 +1883,10 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum, pstate.offnum = offnum; if (arrayKeys && offnum == minoff && pstate.forcenonrequired) { + /* Reset arrays, per _bt_set_startikey contract */ pstate.forcenonrequired = false; pstate.startikey = 0; + _bt_start_array_keys(scan, dir); } passes_quals = _bt_checkkeys(scan, &pstate, arrayKeys, itup, indnatts); diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index c580c2bf527..1a15dfcb7d3 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -2489,13 +2489,14 @@ _bt_oppodir_checkkeys(IndexScanDesc scan, ScanDirection dir, * primscan's first page would mislead _bt_advance_array_keys, which expects * pstate.nskipadvances to be representative of every first page's key space.) * - * Caller must reset startikey and forcenonrequired ahead of the _bt_checkkeys - * call for pstate.finaltup iff we set forcenonrequired=true. This will give - * _bt_checkkeys the opportunity to call _bt_advance_array_keys once more, - * with sktrig_required=true, to advance the arrays that were ignored during - * checks of all of the page's prior tuples. Caller doesn't need to do this - * on the rightmost/leftmost page in the index (where pstate.finaltup isn't - * set), since forcenonrequired won't be set here by us in the first place. + * Caller must call _bt_start_array_keys and reset startikey/forcenonrequired + * ahead of the finaltup _bt_checkkeys call when we set forcenonrequired=true. + * This will give _bt_checkkeys the opportunity to call _bt_advance_array_keys + * with sktrig_required=true, restoring the invariant that the scan's required + * arrays always track the scan's progress through the index's key space. + * Caller won't need to do this on the rightmost/leftmost page in the index + * (where pstate.finaltup isn't ever set), since forcenonrequired will never + * be set here in the first place. */ void _bt_set_startikey(IndexScanDesc scan, BTReadPageState *pstate) @@ -2556,10 +2557,31 @@ _bt_set_startikey(IndexScanDesc scan, BTReadPageState *pstate) if (key->sk_flags & SK_ROW_HEADER) { /* - * Can't let pstate.startikey get set to an ikey beyond a - * RowCompare inequality + * RowCompare inequality. + * + * Only the first subkey from a RowCompare can ever be marked + * required (that happens when the row header is marked required). + * There is no simple, general way for us to transitively deduce + * whether or not every tuple on the page satisfies a RowCompare + * key based only on firsttup and lasttup -- so we just give up. */ - break; /* unsafe */ + if (!start_past_saop_eq && !so->skipScan) + break; /* unsafe to go further */ + + /* + * We have to be even more careful with RowCompares that come + * after an array: we assume it's unsafe to even bypass the array. + * Calling _bt_start_array_keys to recover the scan's arrays + * following use of forcenonrequired mode isn't compatible with + * _bt_check_rowcompare's continuescan=false behavior with NULL + * row compare members. _bt_advance_array_keys must not make a + * decision on the basis of a key not being satisfied in the + * opposite-to-scan direction until the scan reaches a leaf page + * where the same key begins to be satisfied in scan direction. + * The _bt_first !used_all_subkeys behavior makes this limitation + * hard to work around some other way. + */ + return; /* completely unsafe to set pstate.startikey */ } if (key->sk_strategy != BTEqualStrategyNumber) { |