diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/access/nbtree/nbtpreprocesskeys.c | 1 | ||||
-rw-r--r-- | src/backend/access/nbtree/nbtree.c | 1 | ||||
-rw-r--r-- | src/backend/access/nbtree/nbtsearch.c | 77 | ||||
-rw-r--r-- | src/backend/access/nbtree/nbtutils.c | 546 | ||||
-rw-r--r-- | src/include/access/nbtree.h | 11 |
5 files changed, 483 insertions, 153 deletions
diff --git a/src/backend/access/nbtree/nbtpreprocesskeys.c b/src/backend/access/nbtree/nbtpreprocesskeys.c index 791aa4ece40..1420b0aee3a 100644 --- a/src/backend/access/nbtree/nbtpreprocesskeys.c +++ b/src/backend/access/nbtree/nbtpreprocesskeys.c @@ -1390,6 +1390,7 @@ _bt_preprocess_array_keys(IndexScanDesc scan, int *new_numberOfKeys) arrayKeyData = (ScanKey) palloc(numArrayKeyData * sizeof(ScanKeyData)); /* Allocate space for per-array data in the workspace context */ + so->skipScan = (numSkipArrayKeys > 0); so->arrayKeys = (BTArrayKeyInfo *) palloc(numArrayKeys * sizeof(BTArrayKeyInfo)); /* Allocate space for ORDER procs used to help _bt_checkkeys */ diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 77781cb9002..accc7fe8bbe 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -349,6 +349,7 @@ btbeginscan(Relation rel, int nkeys, int norderbys) else so->keyData = NULL; + so->skipScan = false; so->needPrimScan = false; so->scanBehind = false; so->oppositeDirCheck = false; diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 1ef2cb2b55e..4369582435e 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -1648,47 +1648,14 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum, pstate.finaltup = NULL; pstate.page = page; pstate.firstpage = firstpage; + pstate.forcenonrequired = false; + pstate.startikey = 0; pstate.offnum = InvalidOffsetNumber; pstate.skip = InvalidOffsetNumber; pstate.continuescan = true; /* default assumption */ - pstate.prechecked = false; - pstate.firstmatch = false; pstate.rechecks = 0; pstate.targetdistance = 0; - /* - * Prechecking the value of the continuescan flag for the last item on the - * page (for backwards scan it will be the first item on a page). If we - * observe it to be true, then it should be true for all other items. This - * allows us to do significant optimizations in the _bt_checkkeys() - * function for all the items on the page. - * - * With the forward scan, we do this check for the last item on the page - * instead of the high key. It's relatively likely that the most - * significant column in the high key will be different from the - * corresponding value from the last item on the page. So checking with - * the last item on the page would give a more precise answer. - * - * We skip this for the first page read by each (primitive) scan, to avoid - * slowing down point queries. They typically don't stand to gain much - * when the optimization can be applied, and are more likely to notice the - * overhead of the precheck. Also avoid it during scans with array keys, - * which might be using skip scan (XXX fixed in next commit). - */ - if (!pstate.firstpage && !arrayKeys && minoff < maxoff) - { - ItemId iid; - IndexTuple itup; - - iid = PageGetItemId(page, ScanDirectionIsForward(dir) ? maxoff : minoff); - itup = (IndexTuple) PageGetItem(page, iid); - - /* Call with arrayKeys=false to avoid undesirable side-effects */ - _bt_checkkeys(scan, &pstate, false, itup, indnatts); - pstate.prechecked = pstate.continuescan; - pstate.continuescan = true; /* reset */ - } - if (ScanDirectionIsForward(dir)) { /* SK_SEARCHARRAY forward scans must provide high key up front */ @@ -1716,6 +1683,13 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum, so->scanBehind = so->oppositeDirCheck = false; /* reset */ } + /* + * Consider pstate.startikey optimization once the ongoing primitive + * index scan has already read at least one page + */ + if (!pstate.firstpage && minoff < maxoff) + _bt_set_startikey(scan, &pstate); + /* load items[] in ascending order */ itemIndex = 0; @@ -1752,6 +1726,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum, { Assert(!passes_quals && pstate.continuescan); Assert(offnum < pstate.skip); + Assert(!pstate.forcenonrequired); offnum = pstate.skip; pstate.skip = InvalidOffsetNumber; @@ -1761,7 +1736,6 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum, if (passes_quals) { /* tuple passes all scan key conditions */ - pstate.firstmatch = true; if (!BTreeTupleIsPosting(itup)) { /* Remember it */ @@ -1816,7 +1790,8 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum, int truncatt; truncatt = BTreeTupleGetNAtts(itup, rel); - pstate.prechecked = false; /* precheck didn't cover HIKEY */ + pstate.forcenonrequired = false; + pstate.startikey = 0; /* _bt_set_startikey ignores HIKEY */ _bt_checkkeys(scan, &pstate, arrayKeys, itup, truncatt); } @@ -1855,6 +1830,13 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum, so->scanBehind = so->oppositeDirCheck = false; /* reset */ } + /* + * Consider pstate.startikey optimization once the ongoing primitive + * index scan has already read at least one page + */ + if (!pstate.firstpage && minoff < maxoff) + _bt_set_startikey(scan, &pstate); + /* load items[] in descending order */ itemIndex = MaxTIDsPerBTreePage; @@ -1894,6 +1876,11 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum, Assert(!BTreeTupleIsPivot(itup)); pstate.offnum = offnum; + if (arrayKeys && offnum == minoff && pstate.forcenonrequired) + { + pstate.forcenonrequired = false; + pstate.startikey = 0; + } passes_quals = _bt_checkkeys(scan, &pstate, arrayKeys, itup, indnatts); @@ -1905,6 +1892,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum, { Assert(!passes_quals && pstate.continuescan); Assert(offnum > pstate.skip); + Assert(!pstate.forcenonrequired); offnum = pstate.skip; pstate.skip = InvalidOffsetNumber; @@ -1914,7 +1902,6 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum, if (passes_quals && tuple_alive) { /* tuple passes all scan key conditions */ - pstate.firstmatch = true; if (!BTreeTupleIsPosting(itup)) { /* Remember it */ @@ -1970,6 +1957,20 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum, so->currPos.itemIndex = MaxTIDsPerBTreePage - 1; } + /* + * If _bt_set_startikey told us to temporarily treat the scan's keys as + * nonrequired (possible only during scans with array keys), there must be + * no lasting consequences for the scan's array keys. The scan's arrays + * should now have exactly the same elements as they would have had if the + * nonrequired behavior had never been used. (In general, a scan's arrays + * are expected to track its progress through the index's key space.) + * + * We are required (by _bt_set_startikey) to call _bt_checkkeys against + * pstate.finaltup with pstate.forcenonrequired=false to allow the scan's + * arrays to recover. Assert that that step hasn't been missed. + */ + Assert(!pstate.forcenonrequired); + return (so->currPos.firstItem <= so->currPos.lastItem); } diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index 108030a8ee7..8b668102164 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -57,11 +57,11 @@ static bool _bt_oppodir_checkkeys(IndexScanDesc scan, ScanDirection dir, IndexTuple finaltup); static bool _bt_check_compare(IndexScanDesc scan, ScanDirection dir, IndexTuple tuple, int tupnatts, TupleDesc tupdesc, - bool advancenonrequired, bool prechecked, bool firstmatch, + bool advancenonrequired, bool forcenonrequired, bool *continuescan, int *ikey); static bool _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, TupleDesc tupdesc, - ScanDirection dir, bool *continuescan); + ScanDirection dir, bool forcenonrequired, bool *continuescan); static void _bt_checkkeys_look_ahead(IndexScanDesc scan, BTReadPageState *pstate, int tupnatts, TupleDesc tupdesc); static int _bt_keep_natts(Relation rel, IndexTuple lastleft, @@ -1422,9 +1422,10 @@ _bt_start_prim_scan(IndexScanDesc scan, ScanDirection dir) * postcondition's <= operator with a >=. In other words, just swap the * precondition with the postcondition.) * - * We also deal with "advancing" non-required arrays here. Callers whose - * sktrig scan key is non-required specify sktrig_required=false. These calls - * are the only exception to the general rule about always advancing the + * We also deal with "advancing" non-required arrays here (or arrays that are + * treated as non-required for the duration of a _bt_readpage call). Callers + * whose sktrig scan key is non-required specify sktrig_required=false. These + * calls are the only exception to the general rule about always advancing the * required array keys (the scan may not even have a required array). These * callers should just pass a NULL pstate (since there is never any question * of stopping the scan). No call to _bt_tuple_before_array_skeys is required @@ -1464,6 +1465,7 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate, all_satisfied = true; Assert(!so->needPrimScan && !so->scanBehind && !so->oppositeDirCheck); + Assert(_bt_verify_keys_with_arraykeys(scan)); if (sktrig_required) { @@ -1474,25 +1476,32 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate, tupnatts, false, 0, NULL)); /* - * Required scan key wasn't satisfied, so required arrays will have to - * advance. Invalidate page-level state that tracks whether the - * scan's required-in-opposite-direction-only keys are known to be - * satisfied by page's remaining tuples. - */ - pstate->firstmatch = false; - - /* Shouldn't have to invalidate 'prechecked', though */ - Assert(!pstate->prechecked); - - /* * Once we return we'll have a new set of required array keys, so * reset state used by "look ahead" optimization */ pstate->rechecks = 0; pstate->targetdistance = 0; } + else if (sktrig < so->numberOfKeys - 1 && + !(so->keyData[so->numberOfKeys - 1].sk_flags & SK_SEARCHARRAY)) + { + int least_sign_ikey = so->numberOfKeys - 1; + bool continuescan; - Assert(_bt_verify_keys_with_arraykeys(scan)); + /* + * Optimization: perform a precheck of the least significant key + * during !sktrig_required calls when it isn't already our sktrig + * (provided the precheck key is not itself an array). + * + * When the precheck works out we'll avoid an expensive binary search + * of sktrig's array (plus any other arrays before least_sign_ikey). + */ + Assert(so->keyData[sktrig].sk_flags & SK_SEARCHARRAY); + if (!_bt_check_compare(scan, dir, tuple, tupnatts, tupdesc, false, + false, &continuescan, + &least_sign_ikey)) + return false; + } for (int ikey = 0; ikey < so->numberOfKeys; ikey++) { @@ -1534,8 +1543,6 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate, if (cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) { - Assert(sktrig_required); - required = true; if (cur->sk_attno > tupnatts) @@ -1669,7 +1676,7 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate, } else { - Assert(sktrig_required && required); + Assert(required); /* * This is a required non-array equality strategy scan key, which @@ -1711,7 +1718,7 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate, * be eliminated by _bt_preprocess_keys. It won't matter if some of * our "true" array scan keys (or even all of them) are non-required. */ - if (required && + if (sktrig_required && required && ((ScanDirectionIsForward(dir) && result > 0) || (ScanDirectionIsBackward(dir) && result < 0))) beyond_end_advance = true; @@ -1726,7 +1733,7 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate, * array scan keys are considered interesting.) */ all_satisfied = false; - if (required) + if (sktrig_required && required) all_required_satisfied = false; else { @@ -1786,6 +1793,12 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate, * of any required scan key). All that matters is whether caller's tuple * satisfies the new qual, so it's safe to just skip the _bt_check_compare * recheck when we've already determined that it can only return 'false'. + * + * Note: In practice most scan keys are marked required by preprocessing, + * if necessary by generating a preceding skip array. We nevertheless + * often handle array keys marked required as if they were nonrequired. + * This behavior is requested by our _bt_check_compare caller, though only + * when it is passed "forcenonrequired=true" by _bt_checkkeys. */ if ((sktrig_required && all_required_satisfied) || (!sktrig_required && all_satisfied)) @@ -1796,9 +1809,9 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate, Assert(all_required_satisfied); /* Recheck _bt_check_compare on behalf of caller */ - if (_bt_check_compare(scan, dir, tuple, tupnatts, tupdesc, - false, false, false, - &continuescan, &nsktrig) && + if (_bt_check_compare(scan, dir, tuple, tupnatts, tupdesc, false, + false, &continuescan, + &nsktrig) && !so->scanBehind) { /* This tuple satisfies the new qual */ @@ -2042,8 +2055,9 @@ new_prim_scan: * read at least one leaf page before the one we're reading now. This * makes primscan scheduling more efficient when scanning subsets of an * index with many distinct attribute values matching many array elements. - * It encourages fewer, larger primitive scans where that makes sense - * (where index descent costs need to be kept under control). + * It encourages fewer, larger primitive scans where that makes sense. + * This will in turn encourage _bt_readpage to apply the pstate.startikey + * optimization more often. * * Note: This heuristic isn't as aggressive as you might think. We're * conservative about allowing a primitive scan to step from the first @@ -2200,17 +2214,14 @@ _bt_verify_keys_with_arraykeys(IndexScanDesc scan) * the page to the right. * * Advances the scan's array keys when necessary for arrayKeys=true callers. - * Caller can avoid all array related side-effects when calling just to do a - * page continuescan precheck -- pass arrayKeys=false for that. Scans without - * any arrays keys must always pass arrayKeys=false. + * Scans without any array keys must always pass arrayKeys=false. * * Also stops and starts primitive index scans for arrayKeys=true callers. * Scans with array keys are required to set up page state that helps us with * this. The page's finaltup tuple (the page high key for a forward scan, or * the page's first non-pivot tuple for a backward scan) must be set in - * pstate.finaltup ahead of the first call here for the page (or possibly the - * first call after an initial continuescan-setting page precheck call). Set - * this to NULL for rightmost page (or the leftmost page for backwards scans). + * pstate.finaltup ahead of the first call here for the page. Set this to + * NULL for rightmost page (or the leftmost page for backwards scans). * * scan: index scan descriptor (containing a search-type scankey) * pstate: page level input and output parameters @@ -2225,42 +2236,46 @@ _bt_checkkeys(IndexScanDesc scan, BTReadPageState *pstate, bool arrayKeys, TupleDesc tupdesc = RelationGetDescr(scan->indexRelation); BTScanOpaque so = (BTScanOpaque) scan->opaque; ScanDirection dir = so->currPos.dir; - int ikey = 0; + int ikey = pstate->startikey; bool res; Assert(BTreeTupleGetNAtts(tuple, scan->indexRelation) == tupnatts); Assert(!so->needPrimScan && !so->scanBehind && !so->oppositeDirCheck); + Assert(arrayKeys || so->numArrayKeys == 0); - res = _bt_check_compare(scan, dir, tuple, tupnatts, tupdesc, - arrayKeys, pstate->prechecked, pstate->firstmatch, - &pstate->continuescan, &ikey); + res = _bt_check_compare(scan, dir, tuple, tupnatts, tupdesc, arrayKeys, + pstate->forcenonrequired, &pstate->continuescan, + &ikey); + /* + * If _bt_check_compare relied on the pstate.startikey optimization, call + * again (in assert-enabled builds) to verify it didn't affect our answer. + * + * Note: we can't do this when !pstate.forcenonrequired, since any arrays + * before pstate.startikey won't have advanced on this page at all. + */ + Assert(!pstate->forcenonrequired || arrayKeys); #ifdef USE_ASSERT_CHECKING - if (!arrayKeys && so->numArrayKeys) + if (pstate->startikey > 0 && !pstate->forcenonrequired) { - /* - * This is a continuescan precheck call for a scan with array keys. - * - * Assert that the scan isn't in danger of becoming confused. - */ - Assert(!so->scanBehind && !so->oppositeDirCheck); - Assert(!pstate->prechecked && !pstate->firstmatch); - Assert(!_bt_tuple_before_array_skeys(scan, dir, tuple, tupdesc, - tupnatts, false, 0, NULL)); - } - if (pstate->prechecked || pstate->firstmatch) - { - bool dcontinuescan; + bool dres, + dcontinuescan; int dikey = 0; + /* Pass arrayKeys=false to avoid array side-effects */ + dres = _bt_check_compare(scan, dir, tuple, tupnatts, tupdesc, false, + pstate->forcenonrequired, &dcontinuescan, + &dikey); + Assert(res == dres); + Assert(pstate->continuescan == dcontinuescan); + /* - * Call relied on continuescan/firstmatch prechecks -- assert that we - * get the same answer without those optimizations + * Should also get the same ikey result. We need a slightly weaker + * assertion during arrayKeys calls, since they might be using an + * array that couldn't be marked required during preprocessing. */ - Assert(res == _bt_check_compare(scan, dir, tuple, tupnatts, tupdesc, - false, false, false, - &dcontinuescan, &dikey)); - Assert(pstate->continuescan == dcontinuescan); + Assert(arrayKeys || ikey == dikey); + Assert(ikey <= dikey); } #endif @@ -2281,6 +2296,7 @@ _bt_checkkeys(IndexScanDesc scan, BTReadPageState *pstate, bool arrayKeys, * It's also possible that the scan is still _before_ the _start_ of * tuples matching the current set of array keys. Check for that first. */ + Assert(!pstate->forcenonrequired); if (_bt_tuple_before_array_skeys(scan, dir, tuple, tupdesc, tupnatts, true, ikey, NULL)) { @@ -2394,8 +2410,9 @@ _bt_oppodir_checkkeys(IndexScanDesc scan, ScanDirection dir, Assert(so->numArrayKeys); - _bt_check_compare(scan, flipped, finaltup, nfinaltupatts, tupdesc, - false, false, false, &continuescan, &ikey); + _bt_check_compare(scan, flipped, finaltup, nfinaltupatts, tupdesc, false, + false, &continuescan, + &ikey); if (!continuescan && so->keyData[ikey].sk_strategy != BTEqualStrategyNumber) return false; @@ -2404,6 +2421,301 @@ _bt_oppodir_checkkeys(IndexScanDesc scan, ScanDirection dir, } /* + * Determines an offset to the first scan key (an so->keyData[]-wise offset) + * that is _not_ guaranteed to be satisfied by every tuple from pstate.page, + * which is set in pstate.startikey for _bt_checkkeys calls for the page. + * This allows caller to save cycles on comparisons of a prefix of keys while + * reading pstate.page. + * + * Also determines if later calls to _bt_checkkeys (for pstate.page) should be + * forced to treat all required scan keys >= pstate.startikey as nonrequired + * (that is, if they're to be treated as if any SK_BT_REQFWD/SK_BT_REQBKWD + * markings that were set by preprocessing were not set at all, for the + * duration of _bt_checkkeys calls prior to the call for pstate.finaltup). + * This is indicated to caller by setting pstate.forcenonrequired. + * + * Call here at the start of reading a leaf page beyond the first one for the + * primitive index scan. We consider all non-pivot tuples, so it doesn't make + * sense to call here when only a subset of those tuples can ever be read. + * This is also a good idea on performance grounds; not calling here when on + * the first page (first for the current primitive scan) avoids wasting cycles + * during selective point queries. They typically don't stand to gain as much + * when we can set pstate.startikey, and are likely to notice the overhead of + * calling here. + * + * Caller must reset startikey and forcenonrequired ahead of the _bt_checkkeys + * call for pstate.finaltup iff we set forcenonrequired=true. This will give + * _bt_checkkeys the opportunity to call _bt_advance_array_keys once more, + * with sktrig_required=true, to advance the arrays that were ignored during + * checks of all of the page's prior tuples. Caller doesn't need to do this + * on the rightmost/leftmost page in the index (where pstate.finaltup isn't + * set), since forcenonrequired won't be set here by us in the first place. + */ +void +_bt_set_startikey(IndexScanDesc scan, BTReadPageState *pstate) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + Relation rel = scan->indexRelation; + TupleDesc tupdesc = RelationGetDescr(rel); + ItemId iid; + IndexTuple firsttup, + lasttup; + int startikey = 0, + arrayidx = 0, + firstchangingattnum; + bool start_past_saop_eq = false; + + Assert(!so->scanBehind); + Assert(pstate->minoff < pstate->maxoff); + Assert(!pstate->firstpage); + Assert(pstate->startikey == 0); + Assert(!so->numArrayKeys || pstate->finaltup || + P_RIGHTMOST(BTPageGetOpaque(pstate->page)) || + P_LEFTMOST(BTPageGetOpaque(pstate->page))); + + if (so->numberOfKeys == 0) + return; + + /* minoff is an offset to the lowest non-pivot tuple on the page */ + iid = PageGetItemId(pstate->page, pstate->minoff); + firsttup = (IndexTuple) PageGetItem(pstate->page, iid); + + /* maxoff is an offset to the highest non-pivot tuple on the page */ + iid = PageGetItemId(pstate->page, pstate->maxoff); + lasttup = (IndexTuple) PageGetItem(pstate->page, iid); + + /* Determine the first attribute whose values change on caller's page */ + firstchangingattnum = _bt_keep_natts_fast(rel, firsttup, lasttup); + + for (; startikey < so->numberOfKeys; startikey++) + { + ScanKey key = so->keyData + startikey; + BTArrayKeyInfo *array; + Datum firstdatum, + lastdatum; + bool firstnull, + lastnull; + int32 result; + + /* + * Determine if it's safe to set pstate.startikey to an offset to a + * key that comes after this key, by examining this key + */ + if (!(key->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))) + { + /* Scan key isn't marked required (corner case) */ + Assert(!(key->sk_flags & SK_ROW_HEADER)); + break; /* unsafe */ + } + if (key->sk_flags & SK_ROW_HEADER) + { + /* + * Can't let pstate.startikey get set to an ikey beyond a + * RowCompare inequality + */ + break; /* unsafe */ + } + if (key->sk_strategy != BTEqualStrategyNumber) + { + /* + * Scalar inequality key. + * + * It's definitely safe for _bt_checkkeys to avoid assessing this + * inequality when the page's first and last non-pivot tuples both + * satisfy the inequality (since the same must also be true of all + * the tuples in between these two). + * + * Unlike the "=" case, it doesn't matter if this attribute has + * more than one distinct value (though it _is_ necessary for any + * and all _prior_ attributes to contain no more than one distinct + * value amongst all of the tuples from pstate.page). + */ + if (key->sk_attno > firstchangingattnum) /* >, not >= */ + break; /* unsafe, preceding attr has multiple + * distinct values */ + + firstdatum = index_getattr(firsttup, key->sk_attno, tupdesc, &firstnull); + lastdatum = index_getattr(lasttup, key->sk_attno, tupdesc, &lastnull); + + if (key->sk_flags & SK_ISNULL) + { + /* IS NOT NULL key */ + Assert(key->sk_flags & SK_SEARCHNOTNULL); + + if (firstnull || lastnull) + break; /* unsafe */ + + /* Safe, IS NOT NULL key satisfied by every tuple */ + continue; + } + + /* Test firsttup */ + if (firstnull || + !DatumGetBool(FunctionCall2Coll(&key->sk_func, + key->sk_collation, firstdatum, + key->sk_argument))) + break; /* unsafe */ + + /* Test lasttup */ + if (lastnull || + !DatumGetBool(FunctionCall2Coll(&key->sk_func, + key->sk_collation, lastdatum, + key->sk_argument))) + break; /* unsafe */ + + /* Safe, scalar inequality satisfied by every tuple */ + continue; + } + + /* Some = key (could be a a scalar = key, could be an array = key) */ + Assert(key->sk_strategy == BTEqualStrategyNumber); + + if (!(key->sk_flags & SK_SEARCHARRAY)) + { + /* + * Scalar = key (possibly an IS NULL key). + * + * It is unsafe to set pstate.startikey to an ikey beyond this + * key, unless the = key is satisfied by every possible tuple on + * the page (possible only when attribute has just one distinct + * value among all tuples on the page). + */ + if (key->sk_attno >= firstchangingattnum) + break; /* unsafe, multiple distinct attr values */ + + firstdatum = index_getattr(firsttup, key->sk_attno, tupdesc, + &firstnull); + if (key->sk_flags & SK_ISNULL) + { + /* IS NULL key */ + Assert(key->sk_flags & SK_SEARCHNULL); + + if (!firstnull) + break; /* unsafe */ + + /* Safe, IS NULL key satisfied by every tuple */ + continue; + } + if (firstnull || + !DatumGetBool(FunctionCall2Coll(&key->sk_func, + key->sk_collation, firstdatum, + key->sk_argument))) + break; /* unsafe */ + + /* Safe, scalar = key satisfied by every tuple */ + continue; + } + + /* = array key (could be a SAOP array, could be a skip array) */ + array = &so->arrayKeys[arrayidx++]; + Assert(array->scan_key == startikey); + if (array->num_elems != -1) + { + /* + * SAOP array = key. + * + * Handle this like we handle scalar = keys (though binary search + * for a matching element, to avoid relying on key's sk_argument). + */ + if (key->sk_attno >= firstchangingattnum) + break; /* unsafe, multiple distinct attr values */ + + firstdatum = index_getattr(firsttup, key->sk_attno, tupdesc, + &firstnull); + _bt_binsrch_array_skey(&so->orderProcs[startikey], + false, NoMovementScanDirection, + firstdatum, firstnull, array, key, + &result); + if (result != 0) + break; /* unsafe */ + + /* Safe, SAOP = key satisfied by every tuple */ + start_past_saop_eq = true; + continue; + } + + /* + * Skip array = key + */ + Assert(key->sk_flags & SK_BT_SKIP); + if (array->null_elem) + { + /* + * Non-range skip array = key. + * + * Safe, non-range skip array "satisfied" by every tuple on page + * (safe even when "key->sk_attno > firstchangingattnum"). + */ + continue; + } + + /* + * Range skip array = key. + * + * Handle this like we handle scalar inequality keys (but avoid using + * key's sk_argument directly, as in the SAOP array case). + */ + if (key->sk_attno > firstchangingattnum) /* >, not >= */ + break; /* unsafe, preceding attr has multiple + * distinct values */ + + firstdatum = index_getattr(firsttup, key->sk_attno, tupdesc, &firstnull); + lastdatum = index_getattr(lasttup, key->sk_attno, tupdesc, &lastnull); + + /* Test firsttup */ + _bt_binsrch_skiparray_skey(false, ForwardScanDirection, + firstdatum, firstnull, array, key, + &result); + if (result != 0) + break; /* unsafe */ + + /* Test lasttup */ + _bt_binsrch_skiparray_skey(false, ForwardScanDirection, + lastdatum, lastnull, array, key, + &result); + if (result != 0) + break; /* unsafe */ + + /* Safe, range skip array satisfied by every tuple on page */ + } + + /* + * Use of forcenonrequired is typically undesirable, since it'll force + * _bt_readpage caller to read every tuple on the page -- even though, in + * general, it might well be possible to end the scan on an earlier tuple. + * However, caller must use forcenonrequired when start_past_saop_eq=true, + * since the usual required array behavior might fail to roll over to the + * SAOP array. + * + * We always prefer forcenonrequired=true during scans with skip arrays + * (except on the first page of each primitive index scan), though -- even + * when "startikey == 0". That way, _bt_advance_array_keys's low-order + * key precheck optimization can always be used (unless on the first page + * of the scan). It seems slightly preferable to check more tuples when + * that allows us to do significantly less skip array maintenance. + */ + pstate->forcenonrequired = (start_past_saop_eq || so->skipScan); + pstate->startikey = startikey; + + /* + * _bt_readpage caller is required to call _bt_checkkeys against page's + * finaltup with forcenonrequired=false whenever we initially set + * forcenonrequired=true. That way the scan's arrays will reliably track + * its progress through the index's key space. + * + * We don't expect this when _bt_readpage caller has no finaltup due to + * its page being the rightmost (or the leftmost, during backwards scans). + * When we see that _bt_readpage has no finaltup, back out of everything. + */ + Assert(!pstate->forcenonrequired || so->numArrayKeys); + if (pstate->forcenonrequired && !pstate->finaltup) + { + pstate->forcenonrequired = false; + pstate->startikey = 0; + } +} + +/* * Test whether an indextuple satisfies current scan condition. * * Return true if so, false if not. If not, also sets *continuescan to false @@ -2432,23 +2744,33 @@ _bt_oppodir_checkkeys(IndexScanDesc scan, ScanDirection dir, * by the current array key, or if they're truly unsatisfied (that is, if * they're unsatisfied by every possible array key). * - * Though we advance non-required array keys on our own, that shouldn't have - * any lasting consequences for the scan. By definition, non-required arrays - * have no fixed relationship with the scan's progress. (There are delicate - * considerations for non-required arrays when the arrays need to be advanced - * following our setting continuescan to false, but that doesn't concern us.) - * * Pass advancenonrequired=false to avoid all array related side effects. * This allows _bt_advance_array_keys caller to avoid infinite recursion. + * + * Pass forcenonrequired=true to instruct us to treat all keys as nonrequired. + * This is used to make it safe to temporarily stop properly maintaining the + * scan's required arrays. _bt_checkkeys caller (_bt_readpage, actually) + * determines a prefix of keys that must satisfy every possible corresponding + * index attribute value from its page, which is passed to us via *ikey arg + * (this is the first key that might be unsatisfied by tuples on the page). + * Obviously, we won't maintain any array keys from before *ikey, so it's + * quite possible for such arrays to "fall behind" the index's keyspace. + * Caller will need to "catch up" by passing forcenonrequired=true (alongside + * an *ikey=0) once the page's finaltup is reached. + * + * Note: it's safe to pass an *ikey > 0 with forcenonrequired=false, but only + * when caller determines that it won't affect array maintenance. */ static bool _bt_check_compare(IndexScanDesc scan, ScanDirection dir, IndexTuple tuple, int tupnatts, TupleDesc tupdesc, - bool advancenonrequired, bool prechecked, bool firstmatch, + bool advancenonrequired, bool forcenonrequired, bool *continuescan, int *ikey) { BTScanOpaque so = (BTScanOpaque) scan->opaque; + Assert(!forcenonrequired || advancenonrequired); + *continuescan = true; /* default assumption */ for (; *ikey < so->numberOfKeys; (*ikey)++) @@ -2461,36 +2783,20 @@ _bt_check_compare(IndexScanDesc scan, ScanDirection dir, /* * Check if the key is required in the current scan direction, in the - * opposite scan direction _only_, or in neither direction + * opposite scan direction _only_, or in neither direction (except + * when we're forced to treat all scan keys as nonrequired) */ - if (((key->sk_flags & SK_BT_REQFWD) && ScanDirectionIsForward(dir)) || - ((key->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsBackward(dir))) + if (forcenonrequired) + { + /* treating scan's keys as non-required */ + } + else if (((key->sk_flags & SK_BT_REQFWD) && ScanDirectionIsForward(dir)) || + ((key->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsBackward(dir))) requiredSameDir = true; else if (((key->sk_flags & SK_BT_REQFWD) && ScanDirectionIsBackward(dir)) || ((key->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsForward(dir))) requiredOppositeDirOnly = true; - /* - * If the caller told us the *continuescan flag is known to be true - * for the last item on the page, then we know the keys required for - * the current direction scan should be matched. Otherwise, the - * *continuescan flag would be set for the current item and - * subsequently the last item on the page accordingly. - * - * If the key is required for the opposite direction scan, we can skip - * the check if the caller tells us there was already at least one - * matching item on the page. Also, we require the *continuescan flag - * to be true for the last item on the page to know there are no - * NULLs. - * - * Both cases above work except for the row keys, where NULLs could be - * found in the middle of matching values. - */ - if (prechecked && - (requiredSameDir || (requiredOppositeDirOnly && firstmatch)) && - !(key->sk_flags & SK_ROW_HEADER)) - continue; - if (key->sk_attno > tupnatts) { /* @@ -2512,6 +2818,16 @@ _bt_check_compare(IndexScanDesc scan, ScanDirection dir, { Assert(key->sk_flags & SK_SEARCHARRAY); Assert(key->sk_flags & SK_BT_SKIP); + Assert(requiredSameDir || forcenonrequired); + + /* + * Cannot fall back on _bt_tuple_before_array_skeys when we're + * treating the scan's keys as nonrequired, though. Just handle + * this like any other non-required equality-type array key. + */ + if (forcenonrequired) + return _bt_advance_array_keys(scan, NULL, tuple, tupnatts, + tupdesc, *ikey, false); *continuescan = false; return false; @@ -2521,7 +2837,7 @@ _bt_check_compare(IndexScanDesc scan, ScanDirection dir, if (key->sk_flags & SK_ROW_HEADER) { if (_bt_check_rowcompare(key, tuple, tupnatts, tupdesc, dir, - continuescan)) + forcenonrequired, continuescan)) continue; return false; } @@ -2554,9 +2870,20 @@ _bt_check_compare(IndexScanDesc scan, ScanDirection dir, */ if (requiredSameDir) *continuescan = false; + else if (unlikely(key->sk_flags & SK_BT_SKIP)) + { + /* + * If we're treating scan keys as nonrequired, and encounter a + * skip array scan key whose current element is NULL, then it + * must be a non-range skip array. It must be satisfied, so + * there's no need to call _bt_advance_array_keys to check. + */ + Assert(forcenonrequired && *ikey > 0); + continue; + } /* - * In any case, this indextuple doesn't match the qual. + * This indextuple doesn't match the qual. */ return false; } @@ -2577,7 +2904,7 @@ _bt_check_compare(IndexScanDesc scan, ScanDirection dir, * (_bt_advance_array_keys also relies on this behavior during * forward scans.) */ - if ((key->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) && + if ((requiredSameDir || requiredOppositeDirOnly) && ScanDirectionIsBackward(dir)) *continuescan = false; } @@ -2595,7 +2922,7 @@ _bt_check_compare(IndexScanDesc scan, ScanDirection dir, * (_bt_advance_array_keys also relies on this behavior during * backward scans.) */ - if ((key->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) && + if ((requiredSameDir || requiredOppositeDirOnly) && ScanDirectionIsForward(dir)) *continuescan = false; } @@ -2606,15 +2933,7 @@ _bt_check_compare(IndexScanDesc scan, ScanDirection dir, return false; } - /* - * Apply the key-checking function, though only if we must. - * - * When a key is required in the opposite-of-scan direction _only_, - * then it must already be satisfied if firstmatch=true indicates that - * an earlier tuple from this same page satisfied it earlier on. - */ - if (!(requiredOppositeDirOnly && firstmatch) && - !DatumGetBool(FunctionCall2Coll(&key->sk_func, key->sk_collation, + if (!DatumGetBool(FunctionCall2Coll(&key->sk_func, key->sk_collation, datum, key->sk_argument))) { /* @@ -2664,7 +2983,8 @@ _bt_check_compare(IndexScanDesc scan, ScanDirection dir, */ static bool _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, - TupleDesc tupdesc, ScanDirection dir, bool *continuescan) + TupleDesc tupdesc, ScanDirection dir, + bool forcenonrequired, bool *continuescan) { ScanKey subkey = (ScanKey) DatumGetPointer(skey->sk_argument); int32 cmpresult = 0; @@ -2704,7 +3024,11 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, if (isNull) { - if (subkey->sk_flags & SK_BT_NULLS_FIRST) + if (forcenonrequired) + { + /* treating scan's keys as non-required */ + } + else if (subkey->sk_flags & SK_BT_NULLS_FIRST) { /* * Since NULLs are sorted before non-NULLs, we know we have @@ -2758,8 +3082,12 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, */ Assert(subkey != (ScanKey) DatumGetPointer(skey->sk_argument)); subkey--; - if ((subkey->sk_flags & SK_BT_REQFWD) && - ScanDirectionIsForward(dir)) + if (forcenonrequired) + { + /* treating scan's keys as non-required */ + } + else if ((subkey->sk_flags & SK_BT_REQFWD) && + ScanDirectionIsForward(dir)) *continuescan = false; else if ((subkey->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsBackward(dir)) @@ -2811,7 +3139,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, break; } - if (!result) + if (!result && !forcenonrequired) { /* * Tuple fails this qual. If it's a required qual for the current @@ -2855,6 +3183,8 @@ _bt_checkkeys_look_ahead(IndexScanDesc scan, BTReadPageState *pstate, OffsetNumber aheadoffnum; IndexTuple ahead; + Assert(!pstate->forcenonrequired); + /* Avoid looking ahead when comparing the page high key */ if (pstate->offnum < pstate->minoff) return; diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 9d9b76d5b48..ebbdd83a8c1 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -1059,6 +1059,7 @@ typedef struct BTScanOpaqueData /* workspace for SK_SEARCHARRAY support */ int numArrayKeys; /* number of equality-type array keys */ + bool skipScan; /* At least one skip array in arrayKeys[]? */ bool needPrimScan; /* New prim scan to continue in current dir? */ bool scanBehind; /* Check scan not still behind on next page? */ bool oppositeDirCheck; /* scanBehind opposite-scan-dir check? */ @@ -1105,6 +1106,8 @@ typedef struct BTReadPageState IndexTuple finaltup; /* Needed by scans with array keys */ Page page; /* Page being read */ bool firstpage; /* page is first for primitive scan? */ + bool forcenonrequired; /* treat all keys as nonrequired? */ + int startikey; /* start comparisons from this scan key */ /* Per-tuple input parameters, set by _bt_readpage for _bt_checkkeys */ OffsetNumber offnum; /* current tuple's page offset number */ @@ -1114,13 +1117,6 @@ typedef struct BTReadPageState bool continuescan; /* Terminate ongoing (primitive) index scan? */ /* - * Input and output parameters, set and unset by both _bt_readpage and - * _bt_checkkeys to manage precheck optimizations - */ - bool prechecked; /* precheck set continuescan to 'true'? */ - bool firstmatch; /* at least one match so far? */ - - /* * Private _bt_checkkeys state used to manage "look ahead" optimization * (only used during scans with array keys) */ @@ -1327,6 +1323,7 @@ extern bool _bt_checkkeys(IndexScanDesc scan, BTReadPageState *pstate, bool arra IndexTuple tuple, int tupnatts); extern bool _bt_scanbehind_checkkeys(IndexScanDesc scan, ScanDirection dir, IndexTuple finaltup); +extern void _bt_set_startikey(IndexScanDesc scan, BTReadPageState *pstate); extern void _bt_killitems(IndexScanDesc scan); extern BTCycleId _bt_vacuum_cycleid(Relation rel); extern BTCycleId _bt_start_vacuum(Relation rel); |