Raise error when affecting tuple moved into different partition.

When an update moves a row between partitions (supported since 2f178441044b), our normal logic for following update chains in READ COMMITTED mode doesn't work anymore. Cross partition updates are modeled as an delete from the old and insert into the new partition. No ctid chain exists across partitions, and there's no convenient space to introduce that link. Not throwing an error in a partitioned context when one would have been thrown without partitioning is obviously problematic. This commit introduces infrastructure to detect when a tuple has been moved, not just plainly deleted. That allows to throw an error when encountering a deletion that's actually a move, while attempting to following a ctid chain. The row deleted as part of a cross partition update is marked by pointing it's t_ctid to an invalid block, instead of self as a normal update would. That was deemed to be the least invasive and most future proof way to represent the knowledge, given how few infomask bits are there to be recycled (there's also some locking issues with using infomask bits). External code following ctid chains should be updated to check for moved tuples. The most likely consequence of not doing so is a missed error. Author: Amul Sul, editorialized by me Reviewed-By: Amit Kapila, Pavan Deolasee, Andres Freund, Robert Haas Discussion: http://postgr.es/m/CAAJ_b95PkwojoYfz0bzXU8OokcTVGzN6vYGCNVUukeUDrnF3dw@mail.gmail.com
author: Andres Freund <andres@anarazel.de> 2018-04-07 13:24:10 -0700
committer: Andres Freund <andres@anarazel.de> 2018-04-07 13:24:27 -0700
commit: f16241bef7cc271bff60e23de2f827a10e50dde8 (patch)
tree: 3e35912975c9be3038ce3b35625d21763979a313 /src/backend/executor
parent: 8224de4f42ccf98e08db07b43d52fed72f962ebb (diff)
download: postgresql-f16241bef7cc271bff60e23de2f827a10e50dde8.tar.gz
postgresql-f16241bef7cc271bff60e23de2f827a10e50dde8.zip
5 files changed, 58 insertions, 11 deletions
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index cc47f5df402..13ad92745e0 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -2733,6 +2733,10 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
 						ereport(ERROR,
 								(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
 								 errmsg("could not serialize access due to concurrent update")));
+					if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
+						ereport(ERROR,
+								(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+								 errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
 
 					/* Should not encounter speculative tuple on recheck */
 					Assert(!HeapTupleHeaderIsSpeculative(tuple.t_data));
@@ -2801,6 +2805,14 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
 		 * As above, it should be safe to examine xmax and t_ctid without the
 		 * buffer content lock, because they can't be changing.
 		 */
+
+		/* check whether next version would be in a different partition */
+		if (HeapTupleHeaderIndicatesMovedPartitions(tuple.t_data))
+			ereport(ERROR,
+					(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+					 errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
+
+		/* check whether tuple has been deleted */
 		if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
 		{
 			/* deleted, so forget about it */
diff --git a/src/backend/executor/execMerge.c b/src/backend/executor/execMerge.c
index d39ddd30345..d75d7e5ab26 100644
--- a/src/backend/executor/execMerge.c
+++ b/src/backend/executor/execMerge.c
@@ -324,7 +324,8 @@ lmerge_matched:;
 				slot = ExecDelete(mtstate, tupleid, NULL,
 								  slot, epqstate, estate,
 								  &tuple_deleted, false, &hufd, action,
-								  mtstate->canSetTag);
+								  mtstate->canSetTag,
+								  false /* changingPart */);
 
 				break;
 
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 6c5a5401c32..b66346702dc 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -191,9 +191,14 @@ retry:
 				break;
 			case HeapTupleUpdated:
 				/* XXX: Improve handling here */
-				ereport(LOG,
-						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-						 errmsg("concurrent update, retrying")));
+				if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
+					ereport(LOG,
+							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+							 errmsg("tuple to be locked was already moved to another partition due to concurrent update, retrying")));
+				else
+					ereport(LOG,
+							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+							 errmsg("concurrent update, retrying")));
 				goto retry;
 			case HeapTupleInvisible:
 				elog(ERROR, "attempted to lock invisible tuple");
@@ -349,9 +354,14 @@ retry:
 				break;
 			case HeapTupleUpdated:
 				/* XXX: Improve handling here */
-				ereport(LOG,
-						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-						 errmsg("concurrent update, retrying")));
+				if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
+					ereport(LOG,
+							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+							 errmsg("tuple to be locked was already moved to another partition due to concurrent update, retrying")));
+				else
+					ereport(LOG,
+							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+							 errmsg("concurrent update, retrying")));
 				goto retry;
 			case HeapTupleInvisible:
 				elog(ERROR, "attempted to lock invisible tuple");
diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c
index b39ccf7dc13..ace126cbf24 100644
--- a/src/backend/executor/nodeLockRows.c
+++ b/src/backend/executor/nodeLockRows.c
@@ -218,6 +218,11 @@ lnext:
 					ereport(ERROR,
 							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
 							 errmsg("could not serialize access due to concurrent update")));
+				if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
+					ereport(ERROR,
+							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+							 errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
+
 				if (ItemPointerEquals(&hufd.ctid, &tuple.t_self))
 				{
 					/* Tuple was deleted, so don't return it */
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index bf4c2bf6082..f47649d0517 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -645,7 +645,8 @@ ExecDelete(ModifyTableState *mtstate,
 		   bool processReturning,
 		   HeapUpdateFailureData *hufdp,
 		   MergeActionState *actionState,
-		   bool canSetTag)
+		   bool canSetTag,
+		   bool changingPart)
 {
 	ResultRelInfo *resultRelInfo;
 	Relation	resultRelationDesc;
@@ -744,7 +745,8 @@ ldelete:;
 							 estate->es_output_cid,
 							 estate->es_crosscheck_snapshot,
 							 true /* wait for commit */ ,
-							 &hufd);
+							 &hufd,
+							 changingPart);
 
 		/*
 		 * Copy the necessary information, if the caller has asked for it. We
@@ -803,6 +805,10 @@ ldelete:;
 					ereport(ERROR,
 							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
 							 errmsg("could not serialize access due to concurrent update")));
+				if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
+					ereport(ERROR,
+							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+							 errmsg("tuple to be deleted was already moved to another partition due to concurrent update")));
 
 				if (!ItemPointerEquals(tupleid, &hufd.ctid))
 				{
@@ -1157,7 +1163,7 @@ lreplace:;
 			 */
 			ExecDelete(mtstate, tupleid, oldtuple, planSlot, epqstate,
 					   estate, &tuple_deleted, false, hufdp, NULL,
-					   false);
+					   false /* canSetTag */, true /* changingPart */);
 
 			/*
 			 * For some reason if DELETE didn't happen (e.g. trigger prevented
@@ -1333,6 +1339,10 @@ lreplace:;
 					ereport(ERROR,
 							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
 							 errmsg("could not serialize access due to concurrent update")));
+				if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
+					ereport(ERROR,
+							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+							 errmsg("tuple to be updated was already moved to another partition due to concurrent update")));
 
 				if (!ItemPointerEquals(tupleid, &hufd.ctid))
 				{
@@ -1523,6 +1533,14 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
 						 errmsg("could not serialize access due to concurrent update")));
 
 			/*
+			 * As long as we don't support an UPDATE of INSERT ON CONFLICT for
+			 * a partitioned table we shouldn't reach to a case where tuple to
+			 * be lock is moved to another partition due to concurrent update
+			 * of the partition key.
+			 */
+			Assert(!ItemPointerIndicatesMovedPartitions(&hufd.ctid));
+
+			/*
 			 * Tell caller to try again from the very start.
 			 *
 			 * It does not make sense to use the usual EvalPlanQual() style
@@ -2274,7 +2292,8 @@ ExecModifyTable(PlanState *pstate)
 			case CMD_DELETE:
 				slot = ExecDelete(node, tupleid, oldtuple, planSlot,
 								  &node->mt_epqstate, estate,
-								  NULL, true, NULL, NULL, node->canSetTag);
+								  NULL, true, NULL, NULL, node->canSetTag,
+								  false /* changingPart */);
 				break;
 			default:
 				elog(ERROR, "unknown operation");
author	Andres Freund <andres@anarazel.de>	2018-04-07 13:24:10 -0700
committer	Andres Freund <andres@anarazel.de>	2018-04-07 13:24:27 -0700
commit	f16241bef7cc271bff60e23de2f827a10e50dde8 (patch)
tree	3e35912975c9be3038ce3b35625d21763979a313 /src/backend/executor
parent	8224de4f42ccf98e08db07b43d52fed72f962ebb (diff)
download	postgresql-f16241bef7cc271bff60e23de2f827a10e50dde8.tar.gz postgresql-f16241bef7cc271bff60e23de2f827a10e50dde8.zip