3 files changed, 120 insertions, 43 deletions
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index d966be543e4..dbd3755b1b5 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -1802,8 +1802,7 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
 		if (heap_fetch(relation, &SnapshotDirty, &tuple, &buffer, true, NULL))
 		{
 			HTSU_Result test;
-			ItemPointerData update_ctid;
-			TransactionId update_xmax;
+			HeapUpdateFailureData hufd;
 
 			/*
 			 * If xmin isn't what we're expecting, the slot must have been
@@ -1838,13 +1837,13 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
 			/*
 			 * If tuple was inserted by our own transaction, we have to check
 			 * cmin against es_output_cid: cmin >= current CID means our
-			 * command cannot see the tuple, so we should ignore it.  Without
-			 * this we are open to the "Halloween problem" of indefinitely
-			 * re-updating the same tuple. (We need not check cmax because
-			 * HeapTupleSatisfiesDirty will consider a tuple deleted by our
-			 * transaction dead, regardless of cmax.)  We just checked that
-			 * priorXmax == xmin, so we can test that variable instead of
-			 * doing HeapTupleHeaderGetXmin again.
+			 * command cannot see the tuple, so we should ignore it.
+			 * Otherwise heap_lock_tuple() will throw an error, and so would
+			 * any later attempt to update or delete the tuple.  (We need not
+			 * check cmax because HeapTupleSatisfiesDirty will consider a
+			 * tuple deleted by our transaction dead, regardless of cmax.)
+			 * Wee just checked that priorXmax == xmin, so we can test that
+			 * variable instead of doing HeapTupleHeaderGetXmin again.
 			 */
 			if (TransactionIdIsCurrentTransactionId(priorXmax) &&
 				HeapTupleHeaderGetCmin(tuple.t_data) >= estate->es_output_cid)
@@ -1856,17 +1855,29 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
 			/*
 			 * This is a live tuple, so now try to lock it.
 			 */
-			test = heap_lock_tuple(relation, &tuple, &buffer,
-								   &update_ctid, &update_xmax,
+			test = heap_lock_tuple(relation, &tuple,
 								   estate->es_output_cid,
-								   lockmode, false);
+								   lockmode, false /* wait */,
+								   &buffer, &hufd);
 			/* We now have two pins on the buffer, get rid of one */
 			ReleaseBuffer(buffer);
 
 			switch (test)
 			{
 				case HeapTupleSelfUpdated:
-					/* treat it as deleted; do not process */
+					/*
+					 * The target tuple was already updated or deleted by the
+					 * current command, or by a later command in the current
+					 * transaction.  We *must* ignore the tuple in the former
+					 * case, so as to avoid the "Halloween problem" of
+					 * repeated update attempts.  In the latter case it might
+					 * be sensible to fetch the updated tuple instead, but
+					 * doing so would require changing heap_lock_tuple as well
+					 * as heap_update and heap_delete to not complain about
+					 * updating "invisible" tuples, which seems pretty scary.
+					 * So for now, treat the tuple as deleted and do not
+					 * process.
+					 */
 					ReleaseBuffer(buffer);
 					return NULL;
 
@@ -1880,12 +1891,12 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
 						ereport(ERROR,
 								(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
 								 errmsg("could not serialize access due to concurrent update")));
-					if (!ItemPointerEquals(&update_ctid, &tuple.t_self))
+					if (!ItemPointerEquals(&hufd.ctid, &tuple.t_self))
 					{
 						/* it was updated, so look at the updated version */
-						tuple.t_self = update_ctid;
+						tuple.t_self = hufd.ctid;
 						/* updated row should have xmin matching this xmax */
-						priorXmax = update_xmax;
+						priorXmax = hufd.xmax;
 						continue;
 					}
 					/* tuple was deleted, so give up */
diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c
index ec0825b460f..6474393d7f4 100644
--- a/src/backend/executor/nodeLockRows.c
+++ b/src/backend/executor/nodeLockRows.c
@@ -71,8 +71,7 @@ lnext:
 		bool		isNull;
 		HeapTupleData tuple;
 		Buffer		buffer;
-		ItemPointerData update_ctid;
-		TransactionId update_xmax;
+		HeapUpdateFailureData hufd;
 		LockTupleMode lockmode;
 		HTSU_Result test;
 		HeapTuple	copyTuple;
@@ -117,15 +116,26 @@ lnext:
 		else
 			lockmode = LockTupleShared;
 
-		test = heap_lock_tuple(erm->relation, &tuple, &buffer,
-							   &update_ctid, &update_xmax,
+		test = heap_lock_tuple(erm->relation, &tuple,
 							   estate->es_output_cid,
-							   lockmode, erm->noWait);
+							   lockmode, erm->noWait,
+							   &buffer, &hufd);
 		ReleaseBuffer(buffer);
 		switch (test)
 		{
 			case HeapTupleSelfUpdated:
-				/* treat it as deleted; do not process */
+				/*
+				 * The target tuple was already updated or deleted by the
+				 * current command, or by a later command in the current
+				 * transaction.  We *must* ignore the tuple in the former
+				 * case, so as to avoid the "Halloween problem" of repeated
+				 * update attempts.  In the latter case it might be sensible
+				 * to fetch the updated tuple instead, but doing so would
+				 * require changing heap_lock_tuple as well as heap_update and
+				 * heap_delete to not complain about updating "invisible"
+				 * tuples, which seems pretty scary.  So for now, treat the
+				 * tuple as deleted and do not process.
+				 */
 				goto lnext;
 
 			case HeapTupleMayBeUpdated:
@@ -137,8 +147,7 @@ lnext:
 					ereport(ERROR,
 							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
 							 errmsg("could not serialize access due to concurrent update")));
-				if (ItemPointerEquals(&update_ctid,
-									  &tuple.t_self))
+				if (ItemPointerEquals(&hufd.ctid, &tuple.t_self))
 				{
 					/* Tuple was deleted, so don't return it */
 					goto lnext;
@@ -146,7 +155,7 @@ lnext:
 
 				/* updated, so fetch and lock the updated version */
 				copyTuple = EvalPlanQualFetch(estate, erm->relation, lockmode,
-											  &update_ctid, update_xmax);
+											  &hufd.ctid, hufd.xmax);
 
 				if (copyTuple == NULL)
 				{
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 26a59d0121d..d31015c654c 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -295,8 +295,7 @@ ExecDelete(ItemPointer tupleid,
 	ResultRelInfo *resultRelInfo;
 	Relation	resultRelationDesc;
 	HTSU_Result result;
-	ItemPointerData update_ctid;
-	TransactionId update_xmax;
+	HeapUpdateFailureData hufd;
 
 	/*
 	 * get information on the (current) result relation
@@ -348,14 +347,44 @@ ExecDelete(ItemPointer tupleid,
 		 */
 ldelete:;
 		result = heap_delete(resultRelationDesc, tupleid,
-							 &update_ctid, &update_xmax,
 							 estate->es_output_cid,
 							 estate->es_crosscheck_snapshot,
-							 true /* wait for commit */ );
+							 true /* wait for commit */,
+							 &hufd);
 		switch (result)
 		{
 			case HeapTupleSelfUpdated:
-				/* already deleted by self; nothing to do */
+				/*
+				 * The target tuple was already updated or deleted by the
+				 * current command, or by a later command in the current
+				 * transaction.  The former case is possible in a join DELETE
+				 * where multiple tuples join to the same target tuple.
+				 * This is somewhat questionable, but Postgres has always
+				 * allowed it: we just ignore additional deletion attempts.
+				 *
+				 * The latter case arises if the tuple is modified by a
+				 * command in a BEFORE trigger, or perhaps by a command in a
+				 * volatile function used in the query.  In such situations we
+				 * should not ignore the deletion, but it is equally unsafe to
+				 * proceed.  We don't want to discard the original DELETE
+				 * while keeping the triggered actions based on its deletion;
+				 * and it would be no better to allow the original DELETE
+				 * while discarding updates that it triggered.  The row update
+				 * carries some information that might be important according
+				 * to business rules; so throwing an error is the only safe
+				 * course.
+				 *
+				 * If a trigger actually intends this type of interaction,
+				 * it can re-execute the DELETE and then return NULL to
+				 * cancel the outer delete.
+				 */
+				if (hufd.cmax != estate->es_output_cid)
+					ereport(ERROR,
+							(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
+							 errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
+							 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
+
+				/* Else, already deleted by self; nothing to do */
 				return NULL;
 
 			case HeapTupleMayBeUpdated:
@@ -366,7 +395,7 @@ ldelete:;
 					ereport(ERROR,
 							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
 							 errmsg("could not serialize access due to concurrent update")));
-				if (!ItemPointerEquals(tupleid, &update_ctid))
+				if (!ItemPointerEquals(tupleid, &hufd.ctid))
 				{
 					TupleTableSlot *epqslot;
 
@@ -374,11 +403,11 @@ ldelete:;
 										   epqstate,
 										   resultRelationDesc,
 										   resultRelInfo->ri_RangeTableIndex,
-										   &update_ctid,
-										   update_xmax);
+										   &hufd.ctid,
+										   hufd.xmax);
 					if (!TupIsNull(epqslot))
 					{
-						*tupleid = update_ctid;
+						*tupleid = hufd.ctid;
 						goto ldelete;
 					}
 				}
@@ -482,8 +511,7 @@ ExecUpdate(ItemPointer tupleid,
 	ResultRelInfo *resultRelInfo;
 	Relation	resultRelationDesc;
 	HTSU_Result result;
-	ItemPointerData update_ctid;
-	TransactionId update_xmax;
+	HeapUpdateFailureData hufd;
 	List	   *recheckIndexes = NIL;
 
 	/*
@@ -564,14 +592,43 @@ lreplace:;
 		 * mode transactions.
 		 */
 		result = heap_update(resultRelationDesc, tupleid, tuple,
-							 &update_ctid, &update_xmax,
 							 estate->es_output_cid,
 							 estate->es_crosscheck_snapshot,
-							 true /* wait for commit */ );
+							 true /* wait for commit */,
+							 &hufd);
 		switch (result)
 		{
 			case HeapTupleSelfUpdated:
-				/* already deleted by self; nothing to do */
+				/*
+				 * The target tuple was already updated or deleted by the
+				 * current command, or by a later command in the current
+				 * transaction.  The former case is possible in a join UPDATE
+				 * where multiple tuples join to the same target tuple.
+				 * This is pretty questionable, but Postgres has always
+				 * allowed it: we just execute the first update action and
+				 * ignore additional update attempts.
+				 *
+				 * The latter case arises if the tuple is modified by a
+				 * command in a BEFORE trigger, or perhaps by a command in a
+				 * volatile function used in the query.  In such situations we
+				 * should not ignore the update, but it is equally unsafe to
+				 * proceed.  We don't want to discard the original UPDATE
+				 * while keeping the triggered actions based on it; and we
+				 * have no principled way to merge this update with the
+				 * previous ones.  So throwing an error is the only safe
+				 * course.
+				 *
+				 * If a trigger actually intends this type of interaction,
+				 * it can re-execute the UPDATE (assuming it can figure out
+				 * how) and then return NULL to cancel the outer update.
+				 */
+				if (hufd.cmax != estate->es_output_cid)
+					ereport(ERROR,
+							(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
+							 errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
+							 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
+
+				/* Else, already updated by self; nothing to do */
 				return NULL;
 
 			case HeapTupleMayBeUpdated:
@@ -582,7 +639,7 @@ lreplace:;
 					ereport(ERROR,
 							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
 							 errmsg("could not serialize access due to concurrent update")));
-				if (!ItemPointerEquals(tupleid, &update_ctid))
+				if (!ItemPointerEquals(tupleid, &hufd.ctid))
 				{
 					TupleTableSlot *epqslot;
 
@@ -590,11 +647,11 @@ lreplace:;
 										   epqstate,
 										   resultRelationDesc,
 										   resultRelInfo->ri_RangeTableIndex,
-										   &update_ctid,
-										   update_xmax);
+										   &hufd.ctid,
+										   hufd.xmax);
 					if (!TupIsNull(epqslot))
 					{
-						*tupleid = update_ctid;
+						*tupleid = hufd.ctid;
 						slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot);
 						tuple = ExecMaterializeSlot(slot);
 						goto lreplace;