aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/nbtree/nbtutils.c
diff options
context:
space:
mode:
authorPeter Geoghegan <pg@bowt.ie>2025-07-02 09:40:49 -0400
committerPeter Geoghegan <pg@bowt.ie>2025-07-02 09:40:49 -0400
commitf09816a0a7c138751b76ba3676adb75c94be2ab0 (patch)
tree86dc275a1315db55ca95f08edd9bd62dbb7d9466 /src/backend/access/nbtree/nbtutils.c
parent8eede2c7200fba0eae40a19ca78939fd0dc0ec5b (diff)
downloadpostgresql-f09816a0a7c138751b76ba3676adb75c94be2ab0.tar.gz
postgresql-f09816a0a7c138751b76ba3676adb75c94be2ab0.zip
Make handling of redundant nbtree keys more robust.
nbtree preprocessing's handling of redundant (and contradictory) keys created problems for scans with = arrays. It was just about possible for a scan with an = array key and one or more redundant keys (keys that preprocessing could not eliminate due an incomplete opfamily and a cross-type key) to get stuck. Testing has shown that infinite cycling where the scan never manages to make forward progress was possible. This could happen when the scan's arrays were reset in _bt_readpage's forcenonrequired=true path (added by bugfix commit 5f4d98d4) when the arrays weren't at least advanced up to the same point that they were in at the start of the _bt_readpage call. Earlier redundant keys prevented the finaltup call to _bt_advance_array_keys from reaching lower-order keys that needed to be used to sufficiently advance the scan's arrays. To fix, make preprocessing leave the scan's keys in a state that is as close as possible to how it'll usually leave them (in the common case where there's no redundant keys that preprocessing failed to eliminate). Now nbtree preprocessing _reliably_ leaves behind at most one required >/>= key per index column, and at most one required </<= key per index column. Columns that have one or more = keys that are eligible to be marked required (based on the traditional rules) prioritize the = keys over redundant inequality keys; they'll _reliably_ be left with only one of the = keys as the index column's only required key. Keys that are not marked required (whether due to the new preprocessing step running or for some other reason) are relocated to the end of the so->keyData[] array as needed. That way they'll always be evaluated after the scan's required keys, and so cannot prevent code in places like _bt_advance_array_keys and _bt_first from reaching a required key. Also teach _bt_first to decide which initial positioning keys to use based on the same requiredness markings that have long been used by _bt_checkkeys/_bt_advance_array_keys. This is a necessary condition for reliably avoiding infinite cycling. _bt_advance_array_keys expects to be able to reason about what'll happen in the next _bt_first call should it start another primitive index scan, by evaluating inequality keys that were marked required in the opposite-to-scan scan direction only. Now everybody (_bt_first, _bt_checkkeys, and _bt_advance_array_keys) will always agree on which exact key will be used on each index column to start and/or end the scan (except when row compare keys are involved, which have similar problems not addressed by this commit). An upcoming commit will finish off the work started by this commit by harmonizing how _bt_first, _bt_checkkeys, and _bt_advance_array_keys apply row compare keys to start and end scans. This fixes what was arguably an oversight in either commit 5f4d98d4 or commit 8a510275. Author: Peter Geoghegan <pg@bowt.ie> Reviewed-By: Heikki Linnakangas <heikki.linnakangas@iki.fi> Discussion: https://postgr.es/m/CAH2-Wz=ds4M+3NXMgwxYxqU8MULaLf696_v5g=9WNmWL2=Uo2A@mail.gmail.com Backpatch-through: 18
Diffstat (limited to 'src/backend/access/nbtree/nbtutils.c')
-rw-r--r--src/backend/access/nbtree/nbtutils.c136
1 files changed, 11 insertions, 125 deletions
diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c
index c71d1b6f2e1..eb6dbfda33c 100644
--- a/src/backend/access/nbtree/nbtutils.c
+++ b/src/backend/access/nbtree/nbtutils.c
@@ -44,7 +44,6 @@ static bool _bt_array_decrement(Relation rel, ScanKey skey, BTArrayKeyInfo *arra
static bool _bt_array_increment(Relation rel, ScanKey skey, BTArrayKeyInfo *array);
static bool _bt_advance_array_keys_increment(IndexScanDesc scan, ScanDirection dir,
bool *skip_array_set);
-static void _bt_rewind_nonrequired_arrays(IndexScanDesc scan, ScanDirection dir);
static bool _bt_tuple_before_array_skeys(IndexScanDesc scan, ScanDirection dir,
IndexTuple tuple, TupleDesc tupdesc, int tupnatts,
bool readpagetup, int sktrig, bool *scanBehind);
@@ -52,7 +51,6 @@ static bool _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
IndexTuple tuple, int tupnatts, TupleDesc tupdesc,
int sktrig, bool sktrig_required);
#ifdef USE_ASSERT_CHECKING
-static bool _bt_verify_arrays_bt_first(IndexScanDesc scan, ScanDirection dir);
static bool _bt_verify_keys_with_arraykeys(IndexScanDesc scan);
#endif
static bool _bt_oppodir_checkkeys(IndexScanDesc scan, ScanDirection dir,
@@ -1035,73 +1033,6 @@ _bt_advance_array_keys_increment(IndexScanDesc scan, ScanDirection dir,
}
/*
- * _bt_rewind_nonrequired_arrays() -- Rewind SAOP arrays not marked required
- *
- * Called when _bt_advance_array_keys decides to start a new primitive index
- * scan on the basis of the current scan position being before the position
- * that _bt_first is capable of repositioning the scan to by applying an
- * inequality operator required in the opposite-to-scan direction only.
- *
- * Although equality strategy scan keys (for both arrays and non-arrays alike)
- * are either marked required in both directions or in neither direction,
- * there is a sense in which non-required arrays behave like required arrays.
- * With a qual such as "WHERE a IN (100, 200) AND b >= 3 AND c IN (5, 6, 7)",
- * the scan key on "c" is non-required, but nevertheless enables positioning
- * the scan at the first tuple >= "(100, 3, 5)" on the leaf level during the
- * first descent of the tree by _bt_first. Later on, there could also be a
- * second descent, that places the scan right before tuples >= "(200, 3, 5)".
- * _bt_first must never be allowed to build an insertion scan key whose "c"
- * entry is set to a value other than 5, the "c" array's first element/value.
- * (Actually, it's the first in the current scan direction. This example uses
- * a forward scan.)
- *
- * Calling here resets the array scan key elements for the scan's non-required
- * arrays. This is strictly necessary for correctness in a subset of cases
- * involving "required in opposite direction"-triggered primitive index scans.
- * Not all callers are at risk of _bt_first using a non-required array like
- * this, but advancement always resets the arrays when another primitive scan
- * is scheduled, just to keep things simple. Array advancement even makes
- * sure to reset non-required arrays during scans that have no inequalities.
- * (Advancement still won't call here when there are no inequalities, though
- * that's just because it's all handled indirectly instead.)
- *
- * Note: _bt_verify_arrays_bt_first is called by an assertion to enforce that
- * everybody got this right.
- *
- * Note: In practice almost all SAOP arrays are marked required during
- * preprocessing (if necessary by generating skip arrays). It is hardly ever
- * truly necessary to call here, but consistently doing so is simpler.
- */
-static void
-_bt_rewind_nonrequired_arrays(IndexScanDesc scan, ScanDirection dir)
-{
- Relation rel = scan->indexRelation;
- BTScanOpaque so = (BTScanOpaque) scan->opaque;
- int arrayidx = 0;
-
- for (int ikey = 0; ikey < so->numberOfKeys; ikey++)
- {
- ScanKey cur = so->keyData + ikey;
- BTArrayKeyInfo *array = NULL;
-
- if (!(cur->sk_flags & SK_SEARCHARRAY) ||
- cur->sk_strategy != BTEqualStrategyNumber)
- continue;
-
- array = &so->arrayKeys[arrayidx++];
- Assert(array->scan_key == ikey);
-
- if ((cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)))
- continue;
-
- Assert(array->num_elems != -1); /* No non-required skip arrays */
-
- _bt_array_set_low_or_high(rel, cur, array,
- ScanDirectionIsForward(dir));
- }
-}
-
-/*
* _bt_tuple_before_array_skeys() -- too early to advance required arrays?
*
* We always compare the tuple using the current array keys (which we assume
@@ -1380,8 +1311,6 @@ _bt_start_prim_scan(IndexScanDesc scan, ScanDirection dir)
*/
if (so->needPrimScan)
{
- Assert(_bt_verify_arrays_bt_first(scan, dir));
-
/*
* Flag was set -- must call _bt_first again, which will reset the
* scan's needPrimScan flag
@@ -2007,14 +1936,7 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
*/
else if (has_required_opposite_direction_only && pstate->finaltup &&
unlikely(!_bt_oppodir_checkkeys(scan, dir, pstate->finaltup)))
- {
- /*
- * Make sure that any SAOP arrays that were not marked required by
- * preprocessing are reset to their first element for this direction
- */
- _bt_rewind_nonrequired_arrays(scan, dir);
goto new_prim_scan;
- }
continue_scan:
@@ -2045,8 +1967,6 @@ continue_scan:
*/
so->oppositeDirCheck = has_required_opposite_direction_only;
- _bt_rewind_nonrequired_arrays(scan, dir);
-
/*
* skip by setting "look ahead" mechanism's offnum for forwards scans
* (backwards scans check scanBehind flag directly instead)
@@ -2143,48 +2063,6 @@ end_toplevel_scan:
#ifdef USE_ASSERT_CHECKING
/*
- * Verify that the scan's qual state matches what we expect at the point that
- * _bt_start_prim_scan is about to start a just-scheduled new primitive scan.
- *
- * We enforce a rule against non-required array scan keys: they must start out
- * with whatever element is the first for the scan's current scan direction.
- * See _bt_rewind_nonrequired_arrays comments for an explanation.
- */
-static bool
-_bt_verify_arrays_bt_first(IndexScanDesc scan, ScanDirection dir)
-{
- BTScanOpaque so = (BTScanOpaque) scan->opaque;
- int arrayidx = 0;
-
- for (int ikey = 0; ikey < so->numberOfKeys; ikey++)
- {
- ScanKey cur = so->keyData + ikey;
- BTArrayKeyInfo *array = NULL;
- int first_elem_dir;
-
- if (!(cur->sk_flags & SK_SEARCHARRAY) ||
- cur->sk_strategy != BTEqualStrategyNumber)
- continue;
-
- array = &so->arrayKeys[arrayidx++];
-
- if (((cur->sk_flags & SK_BT_REQFWD) && ScanDirectionIsForward(dir)) ||
- ((cur->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsBackward(dir)))
- continue;
-
- if (ScanDirectionIsForward(dir))
- first_elem_dir = 0;
- else
- first_elem_dir = array->num_elems - 1;
-
- if (array->cur_elem != first_elem_dir)
- return false;
- }
-
- return _bt_verify_keys_with_arraykeys(scan);
-}
-
-/*
* Verify that the scan's "so->keyData[]" scan keys are in agreement with
* its array key state
*/
@@ -2194,6 +2072,7 @@ _bt_verify_keys_with_arraykeys(IndexScanDesc scan)
BTScanOpaque so = (BTScanOpaque) scan->opaque;
int last_sk_attno = InvalidAttrNumber,
arrayidx = 0;
+ bool nonrequiredseen = false;
if (!so->qual_ok)
return false;
@@ -2217,8 +2096,16 @@ _bt_verify_keys_with_arraykeys(IndexScanDesc scan)
if (array->num_elems != -1 &&
cur->sk_argument != array->elem_values[array->cur_elem])
return false;
- if (last_sk_attno > cur->sk_attno)
- return false;
+ if (cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))
+ {
+ if (last_sk_attno > cur->sk_attno)
+ return false;
+ if (nonrequiredseen)
+ return false;
+ }
+ else
+ nonrequiredseen = true;
+
last_sk_attno = cur->sk_attno;
}
@@ -2551,7 +2438,6 @@ _bt_set_startikey(IndexScanDesc scan, BTReadPageState *pstate)
if (!(key->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)))
{
/* Scan key isn't marked required (corner case) */
- Assert(!(key->sk_flags & SK_ROW_HEADER));
break; /* unsafe */
}
if (key->sk_flags & SK_ROW_HEADER)