diff options
Diffstat (limited to 'src/backend/access')
-rw-r--r-- | src/backend/access/nbtree/nbtree.c | 2 | ||||
-rw-r--r-- | src/backend/access/nbtree/nbtutils.c | 72 | ||||
-rw-r--r-- | src/backend/access/transam/xact.c | 28 |
3 files changed, 44 insertions, 58 deletions
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 03a1d7b027a..fdff960c130 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -417,6 +417,8 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, * way, so we might as well avoid wasting cycles on acquiring page LSNs. * * See nbtree/README section on making concurrent TID recycling safe. + * + * Note: so->dropPin should never change across rescans. */ so->dropPin = (!scan->xs_want_itup && IsMVCCSnapshot(scan->xs_snapshot) && diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index 29f0dca1b08..c71d1b6f2e1 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -63,7 +63,7 @@ static bool _bt_check_compare(IndexScanDesc scan, ScanDirection dir, bool *continuescan, int *ikey); static bool _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, TupleDesc tupdesc, - ScanDirection dir, bool *continuescan); + ScanDirection dir, bool forcenonrequired, bool *continuescan); static void _bt_checkkeys_look_ahead(IndexScanDesc scan, BTReadPageState *pstate, int tupnatts, TupleDesc tupdesc); static int _bt_keep_natts(Relation rel, IndexTuple lastleft, @@ -2902,10 +2902,8 @@ _bt_check_compare(IndexScanDesc scan, ScanDirection dir, /* row-comparison keys need special processing */ if (key->sk_flags & SK_ROW_HEADER) { - Assert(!forcenonrequired); /* forbidden by _bt_set_startikey */ - if (_bt_check_rowcompare(key, tuple, tupnatts, tupdesc, dir, - continuescan)) + forcenonrequired, continuescan)) continue; return false; } @@ -3062,7 +3060,8 @@ _bt_check_compare(IndexScanDesc scan, ScanDirection dir, */ static bool _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, - TupleDesc tupdesc, ScanDirection dir, bool *continuescan) + TupleDesc tupdesc, ScanDirection dir, + bool forcenonrequired, bool *continuescan) { ScanKey subkey = (ScanKey) DatumGetPointer(skey->sk_argument); int32 cmpresult = 0; @@ -3102,7 +3101,11 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, if (isNull) { - if (subkey->sk_flags & SK_BT_NULLS_FIRST) + if (forcenonrequired) + { + /* treating scan's keys as non-required */ + } + else if (subkey->sk_flags & SK_BT_NULLS_FIRST) { /* * Since NULLs are sorted before non-NULLs, we know we have @@ -3156,8 +3159,12 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, */ Assert(subkey != (ScanKey) DatumGetPointer(skey->sk_argument)); subkey--; - if ((subkey->sk_flags & SK_BT_REQFWD) && - ScanDirectionIsForward(dir)) + if (forcenonrequired) + { + /* treating scan's keys as non-required */ + } + else if ((subkey->sk_flags & SK_BT_REQFWD) && + ScanDirectionIsForward(dir)) *continuescan = false; else if ((subkey->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsBackward(dir)) @@ -3209,7 +3216,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, break; } - if (!result) + if (!result && !forcenonrequired) { /* * Tuple fails this qual. If it's a required qual for the current @@ -3323,24 +3330,26 @@ _bt_checkkeys_look_ahead(IndexScanDesc scan, BTReadPageState *pstate, * current page and killed tuples thereon (generally, this should only be * called if so->numKilled > 0). * - * The caller does not have a lock on the page and may or may not have the - * page pinned in a buffer. Note that read-lock is sufficient for setting - * LP_DEAD status (which is only a hint). - * - * We match items by heap TID before assuming they are the right ones to - * delete. We cope with cases where items have moved right due to insertions. - * If an item has moved off the current page due to a split, we'll fail to - * find it and do nothing (this is not an error case --- we assume the item - * will eventually get marked in a future indexscan). + * Caller should not have a lock on the so->currPos page, but must hold a + * buffer pin when !so->dropPin. When we return, it still won't be locked. + * It'll continue to hold whatever pins were held before calling here. * - * Note that if we hold a pin on the target page continuously from initially - * reading the items until applying this function, VACUUM cannot have deleted - * any items on the page, so the page's TIDs can't have been recycled by now. - * There's no risk that we'll confuse a new index tuple that happens to use a - * recycled TID with a now-removed tuple with the same TID (that used to be on - * this same page). We can't rely on that during scans that drop pins eagerly + * We match items by heap TID before assuming they are the right ones to set + * LP_DEAD. If the scan is one that holds a buffer pin on the target page + * continuously from initially reading the items until applying this function + * (if it is a !so->dropPin scan), VACUUM cannot have deleted any items on the + * page, so the page's TIDs can't have been recycled by now. There's no risk + * that we'll confuse a new index tuple that happens to use a recycled TID + * with a now-removed tuple with the same TID (that used to be on this same + * page). We can't rely on that during scans that drop buffer pins eagerly * (so->dropPin scans), though, so we must condition setting LP_DEAD bits on * the page LSN having not changed since back when _bt_readpage saw the page. + * We totally give up on setting LP_DEAD bits when the page LSN changed. + * + * We give up much less often during !so->dropPin scans, but it still happens. + * We cope with cases where items have moved right due to insertions. If an + * item has moved off the current page due to a split, we'll fail to find it + * and just give up on it. */ void _bt_killitems(IndexScanDesc scan) @@ -3353,6 +3362,7 @@ _bt_killitems(IndexScanDesc scan) OffsetNumber maxoff; int numKilled = so->numKilled; bool killedsomething = false; + Buffer buf; Assert(numKilled > 0); Assert(BTScanPosIsValid(so->currPos)); @@ -3369,11 +3379,11 @@ _bt_killitems(IndexScanDesc scan) * concurrent VACUUMs from recycling any of the TIDs on the page. */ Assert(BTScanPosIsPinned(so->currPos)); - _bt_lockbuf(rel, so->currPos.buf, BT_READ); + buf = so->currPos.buf; + _bt_lockbuf(rel, buf, BT_READ); } else { - Buffer buf; XLogRecPtr latestlsn; Assert(!BTScanPosIsPinned(so->currPos)); @@ -3391,10 +3401,9 @@ _bt_killitems(IndexScanDesc scan) } /* Unmodified, hinting is safe */ - so->currPos.buf = buf; } - page = BufferGetPage(so->currPos.buf); + page = BufferGetPage(buf); opaque = BTPageGetOpaque(page); minoff = P_FIRSTDATAKEY(opaque); maxoff = PageGetMaxOffsetNumber(page); @@ -3511,10 +3520,13 @@ _bt_killitems(IndexScanDesc scan) if (killedsomething) { opaque->btpo_flags |= BTP_HAS_GARBAGE; - MarkBufferDirtyHint(so->currPos.buf, true); + MarkBufferDirtyHint(buf, true); } - _bt_unlockbuf(rel, so->currPos.buf); + if (!so->dropPin) + _bt_unlockbuf(rel, buf); + else + _bt_relbuf(rel, buf); } diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 2e67e998adb..b885513f765 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -1045,34 +1045,6 @@ TransactionStartedDuringRecovery(void) } /* - * GetTopReadOnlyTransactionNestLevel - * - * Note: this will return zero when not inside any transaction or when neither - * a top-level transaction nor subtransactions are read-only, one when the - * top-level transaction is read-only, two when one level of subtransaction is - * read-only, etc. - * - * Note: subtransactions of the topmost read-only transaction are also - * read-only, because they inherit read-only mode from the transaction, and - * thus can't change to read-write mode. See check_transaction_read_only(). - */ -int -GetTopReadOnlyTransactionNestLevel(void) -{ - TransactionState s = CurrentTransactionState; - - if (!XactReadOnly) - return 0; - while (s->nestingLevel > 1) - { - if (!s->prevXactReadOnly) - return s->nestingLevel; - s = s->parent; - } - return s->nestingLevel; -} - -/* * EnterParallelMode */ void |