diff options
author | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2011-03-01 19:05:16 +0200 |
---|---|---|
committer | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2011-03-01 19:05:16 +0200 |
commit | 47ad79122bc099c1f0ea8a7ae413fcd8d45e26a6 (patch) | |
tree | b8a4ac5fa3cff5235a5693ca31825ecbcfdd97db | |
parent | 16143d64513e4dc3c72bad7ae98d3df0b5a23013 (diff) | |
download | postgresql-47ad79122bc099c1f0ea8a7ae413fcd8d45e26a6.tar.gz postgresql-47ad79122bc099c1f0ea8a7ae413fcd8d45e26a6.zip |
Fix bugs in Serializable Snapshot Isolation.
Change the way UPDATEs are handled. Instead of maintaining a chain of
tuple-level locks in shared memory, copy any existing locks on the old
tuple to the new tuple at UPDATE. Any existing page-level lock needs to
be duplicated too, as a lock on the new tuple. That was neglected
previously.
Store xmin on tuple-level predicate locks, to distinguish a lock on an old
already-recycled tuple from a new tuple at the same physical location.
Failure to distinguish them caused loops in the tuple-lock chains, as
reported by YAMAMOTO Takashi. Although we don't use the chain representation
of UPDATEs anymore, it seems like a good idea to store the xmin to avoid
some false positives if no other reason.
CheckSingleTargetForConflictsIn now correctly handles the case where a lock
that's being held is not reflected in the local lock table. That happens
if another backend acquires a lock on our behalf due to an UPDATE or a page
split.
PredicateLockPageCombine now retains locks for the page that is being
removed, rather than removing them. This prevents a potentially dangerous
false-positive inconsistency where the local lock table believes that a lock
is held, but it is actually not.
Dan Ports and Kevin Grittner
-rw-r--r-- | src/backend/access/nbtree/nbtree.c | 1 | ||||
-rw-r--r-- | src/backend/storage/lmgr/predicate.c | 383 | ||||
-rw-r--r-- | src/include/storage/lwlock.h | 1 | ||||
-rw-r--r-- | src/include/storage/predicate_internals.h | 54 |
4 files changed, 148 insertions, 291 deletions
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index ba01874469b..7a0e1a9c25e 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -824,7 +824,6 @@ restart: if (_bt_page_recyclable(page)) { /* Okay to recycle this page */ - Assert(!PageIsPredicateLocked(rel, blkno)); RecordFreeIndexPage(rel, blkno); vstate->totFreePages++; stats->pages_deleted++; diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index d660ce5af78..aa657fab579 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -124,10 +124,6 @@ * SerializableXactHashLock * - Protects both PredXact and SerializableXidHash. * - * PredicateLockNextRowLinkLock - * - Protects the priorVersionOfRow and nextVersionOfRow fields of - * PREDICATELOCKTARGET when linkage is being created or destroyed. - * * * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -444,8 +440,6 @@ static void ReleaseOneSerializableXact(SERIALIZABLEXACT *sxact, bool partial, bool summarize); static bool XidIsConcurrent(TransactionId xid); static void CheckTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag); -static bool CheckSingleTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag, - PREDICATELOCKTARGETTAG *nexttargettag); static void FlagRWConflict(SERIALIZABLEXACT *reader, SERIALIZABLEXACT *writer); static void OnConflict_CheckForSerializationFailure(const SERIALIZABLEXACT *reader, SERIALIZABLEXACT *writer); @@ -1044,7 +1038,6 @@ InitPredicateLocks(void) PredXact->LastSxactCommitSeqNo = FirstNormalSerCommitSeqNo - 1; PredXact->CanPartialClearThrough = 0; PredXact->HavePartialClearedThrough = 0; - PredXact->NeedTargetLinkCleanup = false; requestSize = mul_size((Size) max_table_size, PredXactListElementDataSize); PredXact->element = ShmemAlloc(requestSize); @@ -1651,9 +1644,10 @@ PageIsPredicateLocked(const Relation relation, const BlockNumber blkno) * Important note: this function may return false even if the lock is * being held, because it uses the local lock table which is not * updated if another transaction modifies our lock list (e.g. to - * split an index page). However, it will never return true if the - * lock is not held. We only use this function in circumstances where - * such false negatives are acceptable. + * split an index page). It can also return true when a coarser + * granularity lock that covers this target is being held. Be careful + * to only use this function in circumstances where such errors are + * acceptable! */ static bool PredicateLockExists(const PREDICATELOCKTARGETTAG *targettag) @@ -1717,6 +1711,9 @@ GetParentPredicateLockTag(const PREDICATELOCKTARGETTAG *tag, /* * Check whether the lock we are considering is already covered by a * coarser lock for our transaction. + * + * Like PredicateLockExists, this function might return a false + * negative, but it will never return a false positive. */ static bool CoarserLockCovers(const PREDICATELOCKTARGETTAG *newtargettag) @@ -1747,7 +1744,6 @@ static void RemoveTargetIfNoLongerUsed(PREDICATELOCKTARGET *target, uint32 targettaghash) { PREDICATELOCKTARGET *rmtarget; - PREDICATELOCKTARGET *next; Assert(LWLockHeldByMe(SerializablePredicateLockListLock)); @@ -1755,33 +1751,6 @@ RemoveTargetIfNoLongerUsed(PREDICATELOCKTARGET *target, uint32 targettaghash) if (!SHMQueueEmpty(&target->predicateLocks)) return; - /* Can't remove it if there are locks for a prior row version. */ - LWLockAcquire(PredicateLockNextRowLinkLock, LW_EXCLUSIVE); - if (target->priorVersionOfRow != NULL) - { - LWLockRelease(PredicateLockNextRowLinkLock); - return; - } - - /* - * We are going to release this target, This requires that we let the - * next version of the row (if any) know that it's previous version is - * done. - * - * It might be that the link was all that was keeping the other target - * from cleanup, but we can't clean that up here -- LW locking is all - * wrong for that. We'll pass the HTAB in the general cleanup function to - * get rid of such "dead" targets. - */ - next = target->nextVersionOfRow; - if (next != NULL) - { - next->priorVersionOfRow = NULL; - if (SHMQueueEmpty(&next->predicateLocks)) - PredXact->NeedTargetLinkCleanup = true; - } - LWLockRelease(PredicateLockNextRowLinkLock); - /* Actually remove the target. */ rmtarget = hash_search_with_hash_value(PredicateLockTargetHash, &target->tag, @@ -2065,11 +2034,7 @@ CreatePredicateLock(const PREDICATELOCKTARGETTAG *targettag, errmsg("out of shared memory"), errhint("You might need to increase max_pred_locks_per_transaction."))); if (!found) - { SHMQueueInit(&(target->predicateLocks)); - target->priorVersionOfRow = NULL; - target->nextVersionOfRow = NULL; - } /* We've got the sxact and target, make sure they're joined. */ locktag.myTarget = target; @@ -2125,8 +2090,6 @@ PredicateLockAcquire(const PREDICATELOCKTARGETTAG *targettag) hash_search_with_hash_value(LocalPredicateLockHash, targettag, targettaghash, HASH_ENTER, &found); - /* We should not hold the lock (but its entry might still exist) */ - Assert(!found || !locallock->held); locallock->held = true; if (!found) locallock->childLocks = 0; @@ -2215,6 +2178,7 @@ PredicateLockTuple(const Relation relation, const HeapTuple tuple) { PREDICATELOCKTARGETTAG tag; ItemPointer tid; + TransactionId targetxmin; if (SkipSerialization(relation)) return; @@ -2224,15 +2188,16 @@ PredicateLockTuple(const Relation relation, const HeapTuple tuple) */ if (relation->rd_index == NULL) { - TransactionId myxid = GetTopTransactionIdIfAny(); + TransactionId myxid; + + targetxmin = HeapTupleHeaderGetXmin(tuple->t_data); + myxid = GetTopTransactionIdIfAny(); if (TransactionIdIsValid(myxid)) { - TransactionId xid = HeapTupleHeaderGetXmin(tuple->t_data); - - if (TransactionIdFollowsOrEquals(xid, TransactionXmin)) + if (TransactionIdFollowsOrEquals(targetxmin, TransactionXmin)) { - xid = SubTransGetTopmostTransaction(xid); + TransactionId xid = SubTransGetTopmostTransaction(targetxmin); if (TransactionIdEquals(xid, myxid)) { /* We wrote it; we already have a write lock. */ @@ -2241,6 +2206,8 @@ PredicateLockTuple(const Relation relation, const HeapTuple tuple) } } } + else + targetxmin = InvalidTransactionId; /* * Do quick-but-not-definitive test for a relation lock first. This will @@ -2259,116 +2226,78 @@ PredicateLockTuple(const Relation relation, const HeapTuple tuple) relation->rd_node.dbNode, relation->rd_id, ItemPointerGetBlockNumber(tid), - ItemPointerGetOffsetNumber(tid)); + ItemPointerGetOffsetNumber(tid), + targetxmin); PredicateLockAcquire(&tag); } /* - * If the old tuple has any predicate locks, create a lock target for the - * new tuple and point them at each other. Conflict detection needs to - * look for locks against prior versions of the row. + * If the old tuple has any predicate locks, copy them to the new target. + * + * This is called at an UPDATE, where any predicate locks held on the old + * tuple need to be copied to the new tuple, because logically they both + * represent the same row. A lock taken before the update must conflict + * with anyone locking the same row after the update. */ void PredicateLockTupleRowVersionLink(const Relation relation, const HeapTuple oldTuple, const HeapTuple newTuple) { - PREDICATELOCKTARGETTAG oldtargettag; - PREDICATELOCKTARGETTAG newtargettag; - PREDICATELOCKTARGET *oldtarget; - PREDICATELOCKTARGET *newtarget; - PREDICATELOCKTARGET *next; - uint32 oldtargettaghash; - LWLockId oldpartitionLock; - uint32 newtargettaghash; - LWLockId newpartitionLock; - bool found; - - SET_PREDICATELOCKTARGETTAG_TUPLE(oldtargettag, + PREDICATELOCKTARGETTAG oldtupletag; + PREDICATELOCKTARGETTAG oldpagetag; + PREDICATELOCKTARGETTAG newtupletag; + BlockNumber oldblk, + newblk; + OffsetNumber oldoff, + newoff; + TransactionId oldxmin, + newxmin; + + oldblk = ItemPointerGetBlockNumber(&(oldTuple->t_self)); + oldoff = ItemPointerGetOffsetNumber(&(oldTuple->t_self)); + oldxmin = HeapTupleHeaderGetXmin(oldTuple->t_data); + + newblk = ItemPointerGetBlockNumber(&(newTuple->t_self)); + newoff = ItemPointerGetOffsetNumber(&(newTuple->t_self)); + newxmin = HeapTupleHeaderGetXmin(newTuple->t_data); + + SET_PREDICATELOCKTARGETTAG_TUPLE(oldtupletag, relation->rd_node.dbNode, relation->rd_id, - ItemPointerGetBlockNumber(&(oldTuple->t_self)), - ItemPointerGetOffsetNumber(&(oldTuple->t_self))); - oldtargettaghash = PredicateLockTargetTagHashCode(&oldtargettag); - oldpartitionLock = PredicateLockHashPartitionLock(oldtargettaghash); + oldblk, + oldoff, + oldxmin); - SET_PREDICATELOCKTARGETTAG_TUPLE(newtargettag, + SET_PREDICATELOCKTARGETTAG_PAGE(oldpagetag, + relation->rd_node.dbNode, + relation->rd_id, + oldblk); + + SET_PREDICATELOCKTARGETTAG_TUPLE(newtupletag, relation->rd_node.dbNode, relation->rd_id, - ItemPointerGetBlockNumber(&(newTuple->t_self)), - ItemPointerGetOffsetNumber(&(newTuple->t_self))); - newtargettaghash = PredicateLockTargetTagHashCode(&newtargettag); - newpartitionLock = PredicateLockHashPartitionLock(newtargettaghash); - - /* Lock lower numbered partition first. */ - if (oldpartitionLock < newpartitionLock) - { - LWLockAcquire(oldpartitionLock, LW_SHARED); - LWLockAcquire(newpartitionLock, LW_EXCLUSIVE); - } - else if (newpartitionLock < oldpartitionLock) - { - LWLockAcquire(newpartitionLock, LW_EXCLUSIVE); - LWLockAcquire(oldpartitionLock, LW_SHARED); - } - else - LWLockAcquire(newpartitionLock, LW_EXCLUSIVE); - - oldtarget = (PREDICATELOCKTARGET *) - hash_search_with_hash_value(PredicateLockTargetHash, - &oldtargettag, oldtargettaghash, - HASH_FIND, NULL); - - /* Only need to link if there is an old target already. */ - if (oldtarget) - { - LWLockAcquire(PredicateLockNextRowLinkLock, LW_EXCLUSIVE); - - /* Guard against stale pointers from rollback. */ - next = oldtarget->nextVersionOfRow; - if (next != NULL) - { - next->priorVersionOfRow = NULL; - oldtarget->nextVersionOfRow = NULL; - } - - /* Find or create the new target, and link old and new. */ - newtarget = (PREDICATELOCKTARGET *) - hash_search_with_hash_value(PredicateLockTargetHash, - &newtargettag, newtargettaghash, - HASH_ENTER, &found); - if (!newtarget) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of shared memory"), - errhint("You might need to increase max_pred_locks_per_transaction."))); - if (!found) - { - SHMQueueInit(&(newtarget->predicateLocks)); - newtarget->nextVersionOfRow = NULL; - } - else - Assert(newtarget->priorVersionOfRow == NULL); + newblk, + newoff, + newxmin); - newtarget->priorVersionOfRow = oldtarget; - oldtarget->nextVersionOfRow = newtarget; + /* + * A page-level lock on the page containing the old tuple counts too. + * Anyone holding a lock on the page is logically holding a lock on + * the old tuple, so we need to acquire a lock on his behalf on the + * new tuple too. However, if the new tuple is on the same page as the + * old one, the old page-level lock already covers the new tuple. + * + * A relation-level lock always covers both tuple versions, so we don't + * need to worry about those here. + */ + LWLockAcquire(SerializablePredicateLockListLock, LW_EXCLUSIVE); - LWLockRelease(PredicateLockNextRowLinkLock); - } + TransferPredicateLocksToNewTarget(oldtupletag, newtupletag, false); + if (newblk != oldblk) + TransferPredicateLocksToNewTarget(oldpagetag, newtupletag, false); - /* Release lower number partition last. */ - if (oldpartitionLock < newpartitionLock) - { - LWLockRelease(newpartitionLock); - LWLockRelease(oldpartitionLock); - } - else if (newpartitionLock < oldpartitionLock) - { - LWLockRelease(oldpartitionLock); - LWLockRelease(newpartitionLock); - } - else - LWLockRelease(newpartitionLock); + LWLockRelease(SerializablePredicateLockListLock); } @@ -2437,6 +2366,17 @@ DeleteLockTarget(PREDICATELOCKTARGET *target, uint32 targettaghash) * removeOld is set (by using the reserved entry in * PredicateLockTargetHash for scratch space). * + * Warning: the "removeOld" option should be used only with care, + * because this function does not (indeed, can not) update other + * backends' LocalPredicateLockHash. If we are only adding new + * entries, this is not a problem: the local lock table is used only + * as a hint, so missing entries for locks that are held are + * OK. Having entries for locks that are no longer held, as can happen + * when using "removeOld", is not in general OK. We can only use it + * safely when replacing a lock with a coarser-granularity lock that + * covers it, or if we are absolutely certain that no one will need to + * refer to that lock in the future. + * * Caller must hold SerializablePredicateLockListLock. */ static bool @@ -2533,11 +2473,7 @@ TransferPredicateLocksToNewTarget(const PREDICATELOCKTARGETTAG oldtargettag, /* If we created a new entry, initialize it */ if (!found) - { SHMQueueInit(&(newtarget->predicateLocks)); - newtarget->priorVersionOfRow = NULL; - newtarget->nextVersionOfRow = NULL; - } newpredlocktag.myTarget = newtarget; @@ -2704,7 +2640,14 @@ PredicateLockPageSplit(const Relation relation, const BlockNumber oldblkno, &newtargettag); Assert(success); - /* Move the locks to the parent. This shouldn't fail. */ + /* + * Move the locks to the parent. This shouldn't fail. + * + * Note that here we are removing locks held by other + * backends, leading to a possible inconsistency in their + * local lock hash table. This is OK because we're replacing + * it with a lock that covers the old one. + */ success = TransferPredicateLocksToNewTarget(oldtargettag, newtargettag, true); @@ -2727,36 +2670,19 @@ void PredicateLockPageCombine(const Relation relation, const BlockNumber oldblkno, const BlockNumber newblkno) { - PREDICATELOCKTARGETTAG oldtargettag; - PREDICATELOCKTARGETTAG newtargettag; - bool success; - - - if (SkipSplitTracking(relation)) - return; - - Assert(oldblkno != newblkno); - Assert(BlockNumberIsValid(oldblkno)); - Assert(BlockNumberIsValid(newblkno)); - - SET_PREDICATELOCKTARGETTAG_PAGE(oldtargettag, - relation->rd_node.dbNode, - relation->rd_id, - oldblkno); - SET_PREDICATELOCKTARGETTAG_PAGE(newtargettag, - relation->rd_node.dbNode, - relation->rd_id, - newblkno); - - LWLockAcquire(SerializablePredicateLockListLock, LW_EXCLUSIVE); - - /* Move the locks. This shouldn't fail. */ - success = TransferPredicateLocksToNewTarget(oldtargettag, - newtargettag, - true); - Assert(success); - - LWLockRelease(SerializablePredicateLockListLock); + /* + * Page combines differ from page splits in that we ought to be + * able to remove the locks on the old page after transferring + * them to the new page, instead of duplicating them. However, + * because we can't edit other backends' local lock tables, + * removing the old lock would leave them with an entry in their + * LocalPredicateLockHash for a lock they're not holding, which + * isn't acceptable. So we wind up having to do the same work as a + * page split, acquiring a lock on the new page and keeping the old + * page locked too. That can lead to some false positives, but + * should be rare in practice. + */ + PredicateLockPageSplit(relation, oldblkno, newblkno); } /* @@ -3132,9 +3058,6 @@ ClearOldPredicateLocks(void) { SERIALIZABLEXACT *finishedSxact; PREDICATELOCK *predlock; - int i; - HASH_SEQ_STATUS seqstat; - PREDICATELOCKTARGET *locktarget; LWLockAcquire(SerializableFinishedListLock, LW_EXCLUSIVE); finishedSxact = (SERIALIZABLEXACT *) @@ -3232,35 +3155,6 @@ ClearOldPredicateLocks(void) LWLockRelease(SerializablePredicateLockListLock); LWLockRelease(SerializableFinishedListLock); - - if (!PredXact->NeedTargetLinkCleanup) - return; - - /* - * Clean up any targets which were disconnected from a prior version with - * no predicate locks attached. - */ - for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++) - LWLockAcquire(FirstPredicateLockMgrLock + i, LW_EXCLUSIVE); - LWLockAcquire(PredicateLockNextRowLinkLock, LW_SHARED); - - hash_seq_init(&seqstat, PredicateLockTargetHash); - while ((locktarget = (PREDICATELOCKTARGET *) hash_seq_search(&seqstat))) - { - if (SHMQueueEmpty(&locktarget->predicateLocks) - && locktarget->priorVersionOfRow == NULL - && locktarget->nextVersionOfRow == NULL) - { - hash_search(PredicateLockTargetHash, &locktarget->tag, - HASH_REMOVE, NULL); - } - } - - PredXact->NeedTargetLinkCleanup = false; - - LWLockRelease(PredicateLockNextRowLinkLock); - for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--) - LWLockRelease(FirstPredicateLockMgrLock + i); } /* @@ -3676,38 +3570,15 @@ CheckForSerializableConflictOut(const bool visible, const Relation relation, } /* - * Check a particular target for rw-dependency conflict in. This will - * also check prior versions of a tuple, if any. + * Check a particular target for rw-dependency conflict in. */ static void CheckTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag) { - PREDICATELOCKTARGETTAG nexttargettag = { 0 }; - PREDICATELOCKTARGETTAG thistargettag; - - for (;;) - { - if (!CheckSingleTargetForConflictsIn(targettag, &nexttargettag)) - break; - thistargettag = nexttargettag; - targettag = &thistargettag; - } -} - -/* - * Check a particular target for rw-dependency conflict in. If the tuple - * has prior versions, returns true and *nexttargettag is set to the tag - * of the prior tuple version. - */ -static bool -CheckSingleTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag, - PREDICATELOCKTARGETTAG *nexttargettag) -{ uint32 targettaghash; LWLockId partitionLock; PREDICATELOCKTARGET *target; PREDICATELOCK *predlock; - bool hasnexttarget = false; Assert(MySerializableXact != InvalidSerializableXact); @@ -3717,7 +3588,6 @@ CheckSingleTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag, targettaghash = PredicateLockTargetTagHashCode(targettag); partitionLock = PredicateLockHashPartitionLock(targettaghash); LWLockAcquire(partitionLock, LW_SHARED); - LWLockAcquire(PredicateLockNextRowLinkLock, LW_SHARED); target = (PREDICATELOCKTARGET *) hash_search_with_hash_value(PredicateLockTargetHash, targettag, targettaghash, @@ -3725,21 +3595,9 @@ CheckSingleTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag, if (!target) { /* Nothing has this target locked; we're done here. */ - LWLockRelease(PredicateLockNextRowLinkLock); LWLockRelease(partitionLock); - return false; - } - - /* - * If the target is linked to a prior version of the row, save the tag so - * that it can be used for iterative calls to this function. - */ - if (target->priorVersionOfRow != NULL) - { - *nexttargettag = target->priorVersionOfRow->tag; - hasnexttarget = true; + return; } - LWLockRelease(PredicateLockNextRowLinkLock); /* * Each lock for an overlapping transaction represents a conflict: a @@ -3828,17 +3686,25 @@ CheckSingleTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag, hash_search_with_hash_value(LocalPredicateLockHash, targettag, targettaghash, HASH_FIND, NULL); - Assert(locallock != NULL); - Assert(locallock->held); - locallock->held = false; - if (locallock->childLocks == 0) + /* + * Remove entry in local lock table if it exists and has + * no children. It's OK if it doesn't exist; that means + * the lock was transferred to a new target by a + * different backend. + */ + if (locallock != NULL) { - rmlocallock = (LOCALPREDICATELOCK *) - hash_search_with_hash_value(LocalPredicateLockHash, - targettag, targettaghash, - HASH_REMOVE, NULL); - Assert(rmlocallock == locallock); + locallock->held = false; + + if (locallock->childLocks == 0) + { + rmlocallock = (LOCALPREDICATELOCK *) + hash_search_with_hash_value(LocalPredicateLockHash, + targettag, targettaghash, + HASH_REMOVE, NULL); + Assert(rmlocallock == locallock); + } } DecrementParentLocks(targettag); @@ -3848,7 +3714,7 @@ CheckSingleTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag, * the target, bail out before re-acquiring the locks. */ if (rmtarget) - return hasnexttarget; + return; /* * The list has been altered. Start over at the front. @@ -3895,8 +3761,6 @@ CheckSingleTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag, } LWLockRelease(SerializableXactHashLock); LWLockRelease(partitionLock); - - return hasnexttarget; } /* @@ -3943,7 +3807,8 @@ CheckForSerializableConflictIn(const Relation relation, const HeapTuple tuple, relation->rd_node.dbNode, relation->rd_id, ItemPointerGetBlockNumber(&(tuple->t_data->t_ctid)), - ItemPointerGetOffsetNumber(&(tuple->t_data->t_ctid))); + ItemPointerGetOffsetNumber(&(tuple->t_data->t_ctid)), + HeapTupleHeaderGetXmin(tuple->t_data)); CheckTargetForConflictsIn(&targettag); } diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 99fe37ef83b..ad0bcd775b0 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -78,7 +78,6 @@ typedef enum LWLockId SerializableFinishedListLock, SerializablePredicateLockListLock, OldSerXidLock, - PredicateLockNextRowLinkLock, /* Individual lock IDs end here */ FirstBufMappingLock, FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS, diff --git a/src/include/storage/predicate_internals.h b/src/include/storage/predicate_internals.h index 32e9a1bc218..05561256367 100644 --- a/src/include/storage/predicate_internals.h +++ b/src/include/storage/predicate_internals.h @@ -150,8 +150,6 @@ typedef struct PredXactListData SerCommitSeqNo HavePartialClearedThrough; /* have cleared through this * seq no */ SERIALIZABLEXACT *OldCommittedSxact; /* shared copy of dummy sxact */ - bool NeedTargetLinkCleanup; /* to save cleanup effort for rare - * case */ PredXactListElement element; } PredXactListData; @@ -231,9 +229,13 @@ typedef struct SERIALIZABLEXID /* * The PREDICATELOCKTARGETTAG struct identifies a database object which can - * be the target of predicate locks. It is designed to fit into 16 bytes - * with no padding. Note that this would need adjustment if we widen Oid or - * BlockNumber to more than 32 bits. + * be the target of predicate locks. + * + * Note that the hash function being used doesn't properly respect tag + * length -- it will go to a four byte boundary past the end of the tag. + * If you change this struct, make sure any slack space is initialized, + * so that any random bytes in the middle or at the end are not included + * in the hash. * * TODO SSI: If we always use the same fields for the same type of value, we * should rename these. Holding off until it's clear there are no exceptions. @@ -247,8 +249,8 @@ typedef struct PREDICATELOCKTARGETTAG uint32 locktag_field1; /* a 32-bit ID field */ uint32 locktag_field2; /* a 32-bit ID field */ uint32 locktag_field3; /* a 32-bit ID field */ - uint16 locktag_field4; /* a 16-bit ID field */ - uint16 locktag_field5; /* a 16-bit ID field */ + uint32 locktag_field4; /* a 32-bit ID field */ + uint32 locktag_field5; /* a 32-bit ID field */ } PREDICATELOCKTARGETTAG; /* @@ -260,12 +262,11 @@ typedef struct PREDICATELOCKTARGETTAG * already have one. An entry is removed when the last lock is removed from * its list. * - * Because a check for predicate locks on a tuple target should also find - * locks on previous versions of the same row, if there are any created by - * overlapping transactions, we keep a pointer to the target for the prior - * version of the row. We also keep a pointer to the next version of the - * row, so that when we no longer have any predicate locks and the back - * pointer is clear, we can clean up the prior pointer for the next version. + * Because a particular target might become obsolete, due to update to a new + * version, before the reading transaction is obsolete, we need some way to + * prevent errors from reuse of a tuple ID. Rather than attempting to clean + * up the targets as the related tuples are pruned or vacuumed, we check the + * xmin on access. This should be far less costly. */ typedef struct PREDICATELOCKTARGET PREDICATELOCKTARGET; @@ -277,15 +278,6 @@ struct PREDICATELOCKTARGET /* data */ SHM_QUEUE predicateLocks; /* list of PREDICATELOCK objects assoc. with * predicate lock target */ - - /* - * The following two pointers are only used for tuple locks, and are only - * consulted for conflict detection and cleanup; not for granularity - * promotion. - */ - PREDICATELOCKTARGET *priorVersionOfRow; /* what other locks to check */ - PREDICATELOCKTARGET *nextVersionOfRow; /* who has pointer here for - * more targets */ }; @@ -387,30 +379,32 @@ typedef struct PredicateLockData (locktag).locktag_field2 = (reloid), \ (locktag).locktag_field3 = InvalidBlockNumber, \ (locktag).locktag_field4 = InvalidOffsetNumber, \ - (locktag).locktag_field5 = 0) + (locktag).locktag_field5 = InvalidTransactionId) #define SET_PREDICATELOCKTARGETTAG_PAGE(locktag,dboid,reloid,blocknum) \ ((locktag).locktag_field1 = (dboid), \ (locktag).locktag_field2 = (reloid), \ (locktag).locktag_field3 = (blocknum), \ (locktag).locktag_field4 = InvalidOffsetNumber, \ - (locktag).locktag_field5 = 0) + (locktag).locktag_field5 = InvalidTransactionId) -#define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum) \ +#define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum,xmin) \ ((locktag).locktag_field1 = (dboid), \ (locktag).locktag_field2 = (reloid), \ (locktag).locktag_field3 = (blocknum), \ (locktag).locktag_field4 = (offnum), \ - (locktag).locktag_field5 = 0) + (locktag).locktag_field5 = (xmin)) #define GET_PREDICATELOCKTARGETTAG_DB(locktag) \ - ((locktag).locktag_field1) + ((Oid) (locktag).locktag_field1) #define GET_PREDICATELOCKTARGETTAG_RELATION(locktag) \ - ((locktag).locktag_field2) + ((Oid) (locktag).locktag_field2) #define GET_PREDICATELOCKTARGETTAG_PAGE(locktag) \ - ((locktag).locktag_field3) + ((BlockNumber) (locktag).locktag_field3) #define GET_PREDICATELOCKTARGETTAG_OFFSET(locktag) \ - ((locktag).locktag_field4) + ((OffsetNumber) (locktag).locktag_field4) +#define GET_PREDICATELOCKTARGETTAG_XMIN(locktag) \ + ((TransactionId) (locktag).locktag_field5) #define GET_PREDICATELOCKTARGETTAG_TYPE(locktag) \ (((locktag).locktag_field4 != InvalidOffsetNumber) ? PREDLOCKTAG_TUPLE : \ (((locktag).locktag_field3 != InvalidBlockNumber) ? PREDLOCKTAG_PAGE : \ |