diff options
author | Andres Freund <andres@anarazel.de> | 2016-07-18 02:01:13 -0700 |
---|---|---|
committer | Andres Freund <andres@anarazel.de> | 2016-07-18 02:01:13 -0700 |
commit | eca0f1db14ac92d91d54eca8eeff2d15ccd797fa (patch) | |
tree | 293bb9becbd0ca0b53c00816fa7bdd6f08d94f02 /src/backend/access/heap/heapam.c | |
parent | 65632082b7eb3c7d56f1b42e1df452d0f66bc189 (diff) | |
download | postgresql-eca0f1db14ac92d91d54eca8eeff2d15ccd797fa.tar.gz postgresql-eca0f1db14ac92d91d54eca8eeff2d15ccd797fa.zip |
Clear all-frozen visibilitymap status when locking tuples.
Since a892234 & fd31cd265 the visibilitymap's freeze bit is used to
avoid vacuuming the whole relation in anti-wraparound vacuums. Doing so
correctly relies on not adding xids to the heap without also unsetting
the visibilitymap flag. Tuple locking related code has not done so.
To allow selectively resetting all-frozen - to avoid pessimizing
heap_lock_tuple - allow to selectively reset the all-frozen with
visibilitymap_clear(). To avoid having to use
visibilitymap_get_status (e.g. via VM_ALL_FROZEN) inside a critical
section, have visibilitymap_clear() return whether any bits have been
reset.
There's a remaining issue (denoted by XXX): After the PageIsAllVisible()
check in heap_lock_tuple() and heap_lock_updated_tuple_rec() the page
status could theoretically change. Practically that currently seems
impossible, because updaters will hold a page level pin already. Due to
the next beta coming up, it seems better to get the required WAL magic
bump done before resolving this issue.
The added flags field fields to xl_heap_lock and xl_heap_lock_updated
require bumping the WAL magic. Since there's already been a catversion
bump since the last beta, that's not an issue.
Reviewed-By: Robert Haas, Amit Kapila and Andres Freund
Author: Masahiko Sawada, heavily revised by Andres Freund
Discussion: CAEepm=3fWAbWryVW9swHyLTY4sXVf0xbLvXqOwUoDiNCx9mBjQ@mail.gmail.com
Backpatch: -
Diffstat (limited to 'src/backend/access/heap/heapam.c')
-rw-r--r-- | src/backend/access/heap/heapam.c | 207 |
1 files changed, 164 insertions, 43 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 2815d916f3d..38bba162997 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2423,7 +2423,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, PageClearAllVisible(BufferGetPage(buffer)); visibilitymap_clear(relation, ItemPointerGetBlockNumber(&(heaptup->t_self)), - vmbuffer); + vmbuffer, VISIBILITYMAP_VALID_BITS); } /* @@ -2737,7 +2737,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, PageClearAllVisible(page); visibilitymap_clear(relation, BufferGetBlockNumber(buffer), - vmbuffer); + vmbuffer, VISIBILITYMAP_VALID_BITS); } /* @@ -3239,7 +3239,7 @@ l1: all_visible_cleared = true; PageClearAllVisible(page); visibilitymap_clear(relation, BufferGetBlockNumber(buffer), - vmbuffer); + vmbuffer, VISIBILITYMAP_VALID_BITS); } /* store transaction information of xact deleting the tuple */ @@ -3925,6 +3925,7 @@ l2: TransactionId xmax_lock_old_tuple; uint16 infomask_lock_old_tuple, infomask2_lock_old_tuple; + bool cleared_all_frozen = false; /* * To prevent concurrent sessions from updating the tuple, we have to @@ -3968,6 +3969,17 @@ l2: /* temporarily make it look not-updated, but locked */ oldtup.t_data->t_ctid = oldtup.t_self; + /* + * Clear all-frozen bit on visibility map if needed. We could + * immediately reset ALL_VISIBLE, but given that the WAL logging + * overhead would be unchanged, that doesn't seem necessarily + * worthwhile. + */ + if (PageIsAllVisible(BufferGetPage(buffer)) && + visibilitymap_clear(relation, block, vmbuffer, + VISIBILITYMAP_ALL_FROZEN)) + cleared_all_frozen = true; + MarkBufferDirty(buffer); if (RelationNeedsWAL(relation)) @@ -3982,6 +3994,8 @@ l2: xlrec.locking_xid = xmax_lock_old_tuple; xlrec.infobits_set = compute_infobits(oldtup.t_data->t_infomask, oldtup.t_data->t_infomask2); + xlrec.flags = + cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0; XLogRegisterData((char *) &xlrec, SizeOfHeapLock); recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK); PageSetLSN(page, recptr); @@ -4159,20 +4173,20 @@ l2: /* record address of new tuple in t_ctid of old one */ oldtup.t_data->t_ctid = heaptup->t_self; - /* clear PD_ALL_VISIBLE flags */ + /* clear PD_ALL_VISIBLE flags, reset all visibilitymap bits */ if (PageIsAllVisible(BufferGetPage(buffer))) { all_visible_cleared = true; PageClearAllVisible(BufferGetPage(buffer)); visibilitymap_clear(relation, BufferGetBlockNumber(buffer), - vmbuffer); + vmbuffer, VISIBILITYMAP_VALID_BITS); } if (newbuf != buffer && PageIsAllVisible(BufferGetPage(newbuf))) { all_visible_cleared_new = true; PageClearAllVisible(BufferGetPage(newbuf)); visibilitymap_clear(relation, BufferGetBlockNumber(newbuf), - vmbuffer_new); + vmbuffer_new, VISIBILITYMAP_VALID_BITS); } if (newbuf != buffer) @@ -4556,6 +4570,8 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, ItemPointer tid = &(tuple->t_self); ItemId lp; Page page; + Buffer vmbuffer = InvalidBuffer; + BlockNumber block; TransactionId xid, xmax; uint16 old_infomask, @@ -4563,8 +4579,19 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, new_infomask2; bool first_time = true; bool have_tuple_lock = false; + bool cleared_all_frozen = false; *buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid)); + block = ItemPointerGetBlockNumber(tid); + + /* + * Before locking the buffer, pin the visibility map page if it may be + * necessary. XXX: It might be possible for this to change after acquiring + * the lock below. We don't yet deal with that case. + */ + if (PageIsAllVisible(BufferGetPage(*buffer))) + visibilitymap_pin(relation, block, &vmbuffer); + LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); page = BufferGetPage(*buffer); @@ -4580,15 +4607,14 @@ l3: if (result == HeapTupleInvisible) { - LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); - /* * This is possible, but only when locking a tuple for ON CONFLICT * UPDATE. We return this value here rather than throwing an error in * order to give that case the opportunity to throw a more specific * error. */ - return HeapTupleInvisible; + result = HeapTupleInvisible; + goto out_locked; } else if (result == HeapTupleBeingUpdated || result == HeapTupleUpdated) { @@ -4646,7 +4672,8 @@ l3: if (TUPLOCK_from_mxstatus(members[i].status) >= mode) { pfree(members); - return HeapTupleMayBeUpdated; + result = HeapTupleMayBeUpdated; + goto out_unlocked; } } @@ -4661,21 +4688,30 @@ l3: Assert(HEAP_XMAX_IS_KEYSHR_LOCKED(infomask) || HEAP_XMAX_IS_SHR_LOCKED(infomask) || HEAP_XMAX_IS_EXCL_LOCKED(infomask)); - return HeapTupleMayBeUpdated; - break; + result = HeapTupleMayBeUpdated; + goto out_unlocked; case LockTupleShare: if (HEAP_XMAX_IS_SHR_LOCKED(infomask) || HEAP_XMAX_IS_EXCL_LOCKED(infomask)) - return HeapTupleMayBeUpdated; + { + result = HeapTupleMayBeUpdated; + goto out_unlocked; + } break; case LockTupleNoKeyExclusive: if (HEAP_XMAX_IS_EXCL_LOCKED(infomask)) - return HeapTupleMayBeUpdated; + { + result = HeapTupleMayBeUpdated; + goto out_unlocked; + } break; case LockTupleExclusive: if (HEAP_XMAX_IS_EXCL_LOCKED(infomask) && infomask2 & HEAP_KEYS_UPDATED) - return HeapTupleMayBeUpdated; + { + result = HeapTupleMayBeUpdated; + goto out_unlocked; + } break; } } @@ -5036,10 +5072,7 @@ failed: hufd->cmax = HeapTupleHeaderGetCmax(tuple->t_data); else hufd->cmax = InvalidCommandId; - LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); - if (have_tuple_lock) - UnlockTupleTuplock(relation, tid, mode); - return result; + goto out_locked; } xmax = HeapTupleHeaderGetRawXmax(tuple->t_data); @@ -5094,6 +5127,13 @@ failed: if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask)) tuple->t_data->t_ctid = *tid; + /* Clear only the all-frozen bit on visibility map if needed */ + if (PageIsAllVisible(page) && + visibilitymap_clear(relation, block, vmbuffer, + VISIBILITYMAP_ALL_FROZEN)) + cleared_all_frozen = true; + + MarkBufferDirty(*buffer); /* @@ -5120,6 +5160,7 @@ failed: xlrec.locking_xid = xid; xlrec.infobits_set = compute_infobits(new_infomask, tuple->t_data->t_infomask2); + xlrec.flags = cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0; XLogRegisterData((char *) &xlrec, SizeOfHeapLock); /* we don't decode row locks atm, so no need to log the origin */ @@ -5131,8 +5172,15 @@ failed: END_CRIT_SECTION(); + result = HeapTupleMayBeUpdated; + +out_locked: LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); +out_unlocked: + if (BufferIsValid(vmbuffer)) + ReleaseBuffer(vmbuffer); + /* * Don't update the visibility map here. Locking a tuple doesn't change * visibility info. @@ -5145,7 +5193,7 @@ failed: if (have_tuple_lock) UnlockTupleTuplock(relation, tid, mode); - return HeapTupleMayBeUpdated; + return result; } /* @@ -5577,6 +5625,7 @@ static HTSU_Result heap_lock_updated_tuple_rec(Relation rel, ItemPointer tid, TransactionId xid, LockTupleMode mode) { + HTSU_Result result; ItemPointerData tupid; HeapTupleData mytup; Buffer buf; @@ -5587,6 +5636,9 @@ heap_lock_updated_tuple_rec(Relation rel, ItemPointer tid, TransactionId xid, TransactionId xmax, new_xmax; TransactionId priorXmax = InvalidTransactionId; + bool cleared_all_frozen = false; + Buffer vmbuffer = InvalidBuffer; + BlockNumber block; ItemPointerCopy(tid, &tupid); @@ -5594,6 +5646,7 @@ heap_lock_updated_tuple_rec(Relation rel, ItemPointer tid, TransactionId xid, { new_infomask = 0; new_xmax = InvalidTransactionId; + block = ItemPointerGetBlockNumber(&tupid); ItemPointerCopy(&tupid, &(mytup.t_self)); if (!heap_fetch(rel, SnapshotAny, &mytup, &buf, false, NULL)) @@ -5610,6 +5663,17 @@ heap_lock_updated_tuple_rec(Relation rel, ItemPointer tid, TransactionId xid, l4: CHECK_FOR_INTERRUPTS(); + + /* + * Before locking the buffer, pin the visibility map page if it may be + * necessary. XXX: It might be possible for this to change after + * acquiring the lock below. We don't yet deal with that case. + */ + if (PageIsAllVisible(BufferGetPage(buf))) + visibilitymap_pin(rel, block, &vmbuffer); + else + vmbuffer = InvalidBuffer; + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); /* @@ -5620,8 +5684,8 @@ l4: !TransactionIdEquals(HeapTupleHeaderGetXmin(mytup.t_data), priorXmax)) { - UnlockReleaseBuffer(buf); - return HeapTupleMayBeUpdated; + result = HeapTupleMayBeUpdated; + goto out_locked; } old_infomask = mytup.t_data->t_infomask; @@ -5661,11 +5725,9 @@ l4: HEAP_XMAX_IS_LOCKED_ONLY(old_infomask)); for (i = 0; i < nmembers; i++) { - HTSU_Result res; - - res = test_lockmode_for_conflict(members[i].status, - members[i].xid, - mode, &needwait); + result = test_lockmode_for_conflict(members[i].status, + members[i].xid, + mode, &needwait); if (needwait) { @@ -5676,11 +5738,10 @@ l4: pfree(members); goto l4; } - if (res != HeapTupleMayBeUpdated) + if (result != HeapTupleMayBeUpdated) { - UnlockReleaseBuffer(buf); pfree(members); - return res; + goto out_locked; } } if (members) @@ -5688,7 +5749,6 @@ l4: } else { - HTSU_Result res; MultiXactStatus status; /* @@ -5727,8 +5787,8 @@ l4: status = MultiXactStatusNoKeyUpdate; } - res = test_lockmode_for_conflict(status, rawxmax, mode, - &needwait); + result = test_lockmode_for_conflict(status, rawxmax, mode, + &needwait); if (needwait) { LockBuffer(buf, BUFFER_LOCK_UNLOCK); @@ -5736,10 +5796,9 @@ l4: XLTW_LockUpdated); goto l4; } - if (res != HeapTupleMayBeUpdated) + if (result != HeapTupleMayBeUpdated) { - UnlockReleaseBuffer(buf); - return res; + goto out_locked; } } } @@ -5749,6 +5808,11 @@ l4: xid, mode, false, &new_xmax, &new_infomask, &new_infomask2); + if (PageIsAllVisible(BufferGetPage(buf)) && + visibilitymap_clear(rel, block, vmbuffer, + VISIBILITYMAP_ALL_FROZEN)) + cleared_all_frozen = true; + START_CRIT_SECTION(); /* ... and set them */ @@ -5773,6 +5837,8 @@ l4: xlrec.offnum = ItemPointerGetOffsetNumber(&mytup.t_self); xlrec.xmax = new_xmax; xlrec.infobits_set = compute_infobits(new_infomask, new_infomask2); + xlrec.flags = + cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0; XLogRegisterData((char *) &xlrec, SizeOfHeapLockUpdated); @@ -5788,15 +5854,28 @@ l4: ItemPointerEquals(&mytup.t_self, &mytup.t_data->t_ctid) || HeapTupleHeaderIsOnlyLocked(mytup.t_data)) { - UnlockReleaseBuffer(buf); - return HeapTupleMayBeUpdated; + result = HeapTupleMayBeUpdated; + goto out_locked; } /* tail recursion */ priorXmax = HeapTupleHeaderGetUpdateXid(mytup.t_data); ItemPointerCopy(&(mytup.t_data->t_ctid), &tupid); UnlockReleaseBuffer(buf); + if (vmbuffer != InvalidBuffer) + ReleaseBuffer(vmbuffer); } + + result = HeapTupleMayBeUpdated; + +out_locked: + UnlockReleaseBuffer(buf); + + if (vmbuffer != InvalidBuffer) + ReleaseBuffer(vmbuffer); + + return result; + } /* @@ -8107,7 +8186,7 @@ heap_xlog_delete(XLogReaderState *record) Buffer vmbuffer = InvalidBuffer; visibilitymap_pin(reln, blkno, &vmbuffer); - visibilitymap_clear(reln, blkno, vmbuffer); + visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS); ReleaseBuffer(vmbuffer); FreeFakeRelcacheEntry(reln); } @@ -8185,7 +8264,7 @@ heap_xlog_insert(XLogReaderState *record) Buffer vmbuffer = InvalidBuffer; visibilitymap_pin(reln, blkno, &vmbuffer); - visibilitymap_clear(reln, blkno, vmbuffer); + visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS); ReleaseBuffer(vmbuffer); FreeFakeRelcacheEntry(reln); } @@ -8305,7 +8384,7 @@ heap_xlog_multi_insert(XLogReaderState *record) Buffer vmbuffer = InvalidBuffer; visibilitymap_pin(reln, blkno, &vmbuffer); - visibilitymap_clear(reln, blkno, vmbuffer); + visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS); ReleaseBuffer(vmbuffer); FreeFakeRelcacheEntry(reln); } @@ -8460,7 +8539,7 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) Buffer vmbuffer = InvalidBuffer; visibilitymap_pin(reln, oldblk, &vmbuffer); - visibilitymap_clear(reln, oldblk, vmbuffer); + visibilitymap_clear(reln, oldblk, vmbuffer, VISIBILITYMAP_VALID_BITS); ReleaseBuffer(vmbuffer); FreeFakeRelcacheEntry(reln); } @@ -8544,7 +8623,7 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) Buffer vmbuffer = InvalidBuffer; visibilitymap_pin(reln, newblk, &vmbuffer); - visibilitymap_clear(reln, newblk, vmbuffer); + visibilitymap_clear(reln, newblk, vmbuffer, VISIBILITYMAP_VALID_BITS); ReleaseBuffer(vmbuffer); FreeFakeRelcacheEntry(reln); } @@ -8724,6 +8803,27 @@ heap_xlog_lock(XLogReaderState *record) ItemId lp = NULL; HeapTupleHeader htup; + /* + * The visibility map may need to be fixed even if the heap page is + * already up-to-date. + */ + if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED) + { + RelFileNode rnode; + Buffer vmbuffer = InvalidBuffer; + BlockNumber block; + Relation reln; + + XLogRecGetBlockTag(record, 0, &rnode, NULL, &block); + reln = CreateFakeRelcacheEntry(rnode); + + visibilitymap_pin(reln, block, &vmbuffer); + visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN); + + ReleaseBuffer(vmbuffer); + FreeFakeRelcacheEntry(reln); + } + if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { page = (Page) BufferGetPage(buffer); @@ -8776,6 +8876,27 @@ heap_xlog_lock_updated(XLogReaderState *record) xlrec = (xl_heap_lock_updated *) XLogRecGetData(record); + /* + * The visibility map may need to be fixed even if the heap page is + * already up-to-date. + */ + if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED) + { + RelFileNode rnode; + Buffer vmbuffer = InvalidBuffer; + BlockNumber block; + Relation reln; + + XLogRecGetBlockTag(record, 0, &rnode, NULL, &block); + reln = CreateFakeRelcacheEntry(rnode); + + visibilitymap_pin(reln, block, &vmbuffer); + visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN); + + ReleaseBuffer(vmbuffer); + FreeFakeRelcacheEntry(reln); + } + if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); |