aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/heap
diff options
context:
space:
mode:
authorAndres Freund <andres@anarazel.de>2018-04-07 13:24:10 -0700
committerAndres Freund <andres@anarazel.de>2018-04-07 13:24:27 -0700
commitf16241bef7cc271bff60e23de2f827a10e50dde8 (patch)
tree3e35912975c9be3038ce3b35625d21763979a313 /src/backend/access/heap
parent8224de4f42ccf98e08db07b43d52fed72f962ebb (diff)
downloadpostgresql-f16241bef7cc271bff60e23de2f827a10e50dde8.tar.gz
postgresql-f16241bef7cc271bff60e23de2f827a10e50dde8.zip
Raise error when affecting tuple moved into different partition.
When an update moves a row between partitions (supported since 2f178441044b), our normal logic for following update chains in READ COMMITTED mode doesn't work anymore. Cross partition updates are modeled as an delete from the old and insert into the new partition. No ctid chain exists across partitions, and there's no convenient space to introduce that link. Not throwing an error in a partitioned context when one would have been thrown without partitioning is obviously problematic. This commit introduces infrastructure to detect when a tuple has been moved, not just plainly deleted. That allows to throw an error when encountering a deletion that's actually a move, while attempting to following a ctid chain. The row deleted as part of a cross partition update is marked by pointing it's t_ctid to an invalid block, instead of self as a normal update would. That was deemed to be the least invasive and most future proof way to represent the knowledge, given how few infomask bits are there to be recycled (there's also some locking issues with using infomask bits). External code following ctid chains should be updated to check for moved tuples. The most likely consequence of not doing so is a missed error. Author: Amul Sul, editorialized by me Reviewed-By: Amit Kapila, Pavan Deolasee, Andres Freund, Robert Haas Discussion: http://postgr.es/m/CAAJ_b95PkwojoYfz0bzXU8OokcTVGzN6vYGCNVUukeUDrnF3dw@mail.gmail.com
Diffstat (limited to 'src/backend/access/heap')
-rw-r--r--src/backend/access/heap/heapam.c39
-rw-r--r--src/backend/access/heap/pruneheap.c6
-rw-r--r--src/backend/access/heap/rewriteheap.c1
3 files changed, 40 insertions, 6 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 201d1f5a1b4..fa415ab06f9 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2308,6 +2308,7 @@ heap_get_latest_tid(Relation relation,
*/
if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
HeapTupleHeaderIsOnlyLocked(tp.t_data) ||
+ HeapTupleHeaderIndicatesMovedPartitions(tp.t_data) ||
ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
{
UnlockReleaseBuffer(buffer);
@@ -3041,6 +3042,8 @@ xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
* crosscheck - if not InvalidSnapshot, also check tuple against this
* wait - true if should wait for any conflicting update to commit/abort
* hufd - output parameter, filled in failure cases (see below)
+ * changingPart - true iff the tuple is being moved to another partition
+ * table due to an update of the partition key. Otherwise, false.
*
* Normal, successful return value is HeapTupleMayBeUpdated, which
* actually means we did delete it. Failure return codes are
@@ -3056,7 +3059,7 @@ xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
HTSU_Result
heap_delete(Relation relation, ItemPointer tid,
CommandId cid, Snapshot crosscheck, bool wait,
- HeapUpdateFailureData *hufd)
+ HeapUpdateFailureData *hufd, bool changingPart)
{
HTSU_Result result;
TransactionId xid = GetCurrentTransactionId();
@@ -3325,6 +3328,10 @@ l1:
/* Make sure there is no forward chain link in t_ctid */
tp.t_data->t_ctid = tp.t_self;
+ /* Signal that this is actually a move into another partition */
+ if (changingPart)
+ HeapTupleHeaderSetMovedPartitions(tp.t_data);
+
MarkBufferDirty(buffer);
/*
@@ -3342,7 +3349,11 @@ l1:
if (RelationIsAccessibleInLogicalDecoding(relation))
log_heap_new_cid(relation, &tp);
- xlrec.flags = all_visible_cleared ? XLH_DELETE_ALL_VISIBLE_CLEARED : 0;
+ xlrec.flags = 0;
+ if (all_visible_cleared)
+ xlrec.flags |= XLH_DELETE_ALL_VISIBLE_CLEARED;
+ if (changingPart)
+ xlrec.flags |= XLH_DELETE_IS_PARTITION_MOVE;
xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
tp.t_data->t_infomask2);
xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self);
@@ -3450,7 +3461,7 @@ simple_heap_delete(Relation relation, ItemPointer tid)
result = heap_delete(relation, tid,
GetCurrentCommandId(true), InvalidSnapshot,
true /* wait for commit */ ,
- &hufd);
+ &hufd, false /* changingPart */);
switch (result)
{
case HeapTupleSelfUpdated:
@@ -6051,6 +6062,7 @@ l4:
next:
/* if we find the end of update chain, we're done. */
if (mytup.t_data->t_infomask & HEAP_XMAX_INVALID ||
+ HeapTupleHeaderIndicatesMovedPartitions(mytup.t_data) ||
ItemPointerEquals(&mytup.t_self, &mytup.t_data->t_ctid) ||
HeapTupleHeaderIsOnlyLocked(mytup.t_data))
{
@@ -6102,7 +6114,12 @@ static HTSU_Result
heap_lock_updated_tuple(Relation rel, HeapTuple tuple, ItemPointer ctid,
TransactionId xid, LockTupleMode mode)
{
- if (!ItemPointerEquals(&tuple->t_self, ctid))
+ /*
+ * If the tuple has not been updated, or has moved into another partition
+ * (effectively a delete) stop here.
+ */
+ if (!HeapTupleHeaderIndicatesMovedPartitions(tuple->t_data) &&
+ !ItemPointerEquals(&tuple->t_self, ctid))
{
/*
* If this is the first possibly-multixact-able operation in the
@@ -8493,8 +8510,11 @@ heap_xlog_delete(XLogReaderState *record)
if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page);
- /* Make sure there is no forward chain link in t_ctid */
- htup->t_ctid = target_tid;
+ /* Make sure t_ctid is set correctly */
+ if (xlrec->flags & XLH_DELETE_IS_PARTITION_MOVE)
+ HeapTupleHeaderSetMovedPartitions(htup);
+ else
+ htup->t_ctid = target_tid;
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
@@ -9422,6 +9442,13 @@ heap_mask(char *pagedata, BlockNumber blkno)
*/
if (HeapTupleHeaderIsSpeculative(page_htup))
ItemPointerSet(&page_htup->t_ctid, blkno, off);
+
+ /*
+ * NB: Not ignoring ctid changes due to the tuple having moved
+ * (i.e. HeapTupleHeaderIndicatesMovedPartitions), because that's
+ * important information that needs to be in-sync between primary
+ * and standby, and thus is WAL logged.
+ */
}
/*
diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index f67d7d15df1..c2f5343dac8 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -552,6 +552,9 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
if (!HeapTupleHeaderIsHotUpdated(htup))
break;
+ /* HOT implies it can't have moved to different partition */
+ Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
+
/*
* Advance to next chain member.
*/
@@ -823,6 +826,9 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
if (!HeapTupleHeaderIsHotUpdated(htup))
break;
+ /* HOT implies it can't have moved to different partition */
+ Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
+
nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
priorXmax = HeapTupleHeaderGetUpdateXid(htup);
}
diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c
index 7d466c2588c..8d3c861a330 100644
--- a/src/backend/access/heap/rewriteheap.c
+++ b/src/backend/access/heap/rewriteheap.c
@@ -424,6 +424,7 @@ rewrite_heap_tuple(RewriteState state,
*/
if (!((old_tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
HeapTupleHeaderIsOnlyLocked(old_tuple->t_data)) &&
+ !HeapTupleHeaderIndicatesMovedPartitions(old_tuple->t_data) &&
!(ItemPointerEquals(&(old_tuple->t_self),
&(old_tuple->t_data->t_ctid))))
{