tableam: Add tuple_{insert, delete, update, lock} and use.

This adds new, required, table AM callbacks for insert/delete/update and lock_tuple. To be able to reasonably use those, the EvalPlanQual mechanism had to be adapted, moving more logic into the AM. Previously both delete/update/lock call-sites and the EPQ mechanism had to have awareness of the specific tuple format to be able to fetch the latest version of a tuple. Obviously that needs to be abstracted away. To do so, move the logic that find the latest row version into the AM. lock_tuple has a new flag argument, TUPLE_LOCK_FLAG_FIND_LAST_VERSION, that forces it to lock the last version, rather than the current one. It'd have been possible to do so via a separate callback as well, but finding the last version usually also necessitates locking the newest version, making it sensible to combine the two. This replaces the previous use of EvalPlanQualFetch(). Additionally HeapTupleUpdated, which previously signaled either a concurrent update or delete, is now split into two, to avoid callers needing AM specific knowledge to differentiate. The move of finding the latest row version into tuple_lock means that encountering a row concurrently moved into another partition will now raise an error about "tuple to be locked" rather than "tuple to be updated/deleted" - which is accurate, as that always happens when locking rows. While possible slightly less helpful for users, it seems like an acceptable trade-off. As part of this commit HTSU_Result has been renamed to TM_Result, and its members been expanded to differentiated between updating and deleting. HeapUpdateFailureData has been renamed to TM_FailureData. The interface to speculative insertion is changed so nodeModifyTable.c does not have to set the speculative token itself anymore. Instead there's a version of tuple_insert, tuple_insert_speculative, that performs the speculative insertion (without requiring a flag to signal that fact), and the speculative insertion is either made permanent with table_complete_speculative(succeeded = true) or aborted with succeeded = false). Note that multi_insert is not yet routed through tableam, nor is COPY. Changing multi_insert requires changes to copy.c that are large enough to better be done separately. Similarly, although simpler, CREATE TABLE AS and CREATE MATERIALIZED VIEW are also only going to be adjusted in a later commit. Author: Andres Freund and Haribabu Kommi Discussion: https://postgr.es/m/20180703070645.wchpu5muyto5n647@alap3.anarazel.de https://postgr.es/m/20190313003903.nwvrxi7rw3ywhdel@alap3.anarazel.de https://postgr.es/m/20160812231527.GA690404@alvherre.pgsql
author: Andres Freund <andres@anarazel.de> 2019-03-23 19:55:57 -0700
committer: Andres Freund <andres@anarazel.de> 2019-03-23 19:55:57 -0700
commit: 5db6df0c0117ff2a4e0cd87594d2db408cd5022f (patch)
tree: 7b06b96b6f8c1b7e4cdfb602af357f81e21f23b1 /src/backend/executor/nodeLockRows.c
parent: f778e537a0d02d5e05016da3e6f4068914101dee (diff)
download: postgresql-5db6df0c0117ff2a4e0cd87594d2db408cd5022f.tar.gz
postgresql-5db6df0c0117ff2a4e0cd87594d2db408cd5022f.zip
1 files changed, 44 insertions, 98 deletions
diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c
index 76f0f9d66e5..7674ac893c2 100644
--- a/src/backend/executor/nodeLockRows.c
+++ b/src/backend/executor/nodeLockRows.c
@@ -21,14 +21,12 @@
 
 #include "postgres.h"
 
-#include "access/heapam.h"
-#include "access/htup_details.h"
+#include "access/tableam.h"
 #include "access/xact.h"
 #include "executor/executor.h"
 #include "executor/nodeLockRows.h"
 #include "foreign/fdwapi.h"
 #include "miscadmin.h"
-#include "storage/bufmgr.h"
 #include "utils/rel.h"
 
 
@@ -82,11 +80,11 @@ lnext:
 		ExecRowMark *erm = aerm->rowmark;
 		Datum		datum;
 		bool		isNull;
-		HeapTupleData tuple;
-		Buffer		buffer;
-		HeapUpdateFailureData hufd;
+		ItemPointerData tid;
+		TM_FailureData tmfd;
 		LockTupleMode lockmode;
-		HTSU_Result test;
+		int			lockflags = 0;
+		TM_Result	test;
 		TupleTableSlot *markSlot;
 
 		/* clear any leftover test tuple for this rel */
@@ -112,6 +110,7 @@ lnext:
 				/* this child is inactive right now */
 				erm->ermActive = false;
 				ItemPointerSetInvalid(&(erm->curCtid));
+				ExecClearTuple(markSlot);
 				continue;
 			}
 		}
@@ -160,8 +159,8 @@ lnext:
 			continue;
 		}
 
-		/* okay, try to lock the tuple */
-		tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
+		/* okay, try to lock (and fetch) the tuple */
+		tid = *((ItemPointer) DatumGetPointer(datum));
 		switch (erm->markType)
 		{
 			case ROW_MARK_EXCLUSIVE:
@@ -182,18 +181,23 @@ lnext:
 				break;
 		}
 
-		test = heap_lock_tuple(erm->relation, &tuple,
-							   estate->es_output_cid,
-							   lockmode, erm->waitPolicy, true,
-							   &buffer, &hufd);
-		ReleaseBuffer(buffer);
+		lockflags = TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS;
+		if (!IsolationUsesXactSnapshot())
+			lockflags |= TUPLE_LOCK_FLAG_FIND_LAST_VERSION;
+
+		test = table_lock_tuple(erm->relation, &tid, estate->es_snapshot,
+								markSlot, estate->es_output_cid,
+								lockmode, erm->waitPolicy,
+								lockflags,
+								&tmfd);
+
 		switch (test)
 		{
-			case HeapTupleWouldBlock:
+			case TM_WouldBlock:
 				/* couldn't lock tuple in SKIP LOCKED mode */
 				goto lnext;
 
-			case HeapTupleSelfUpdated:
+			case TM_SelfModified:
 
 				/*
 				 * The target tuple was already updated or deleted by the
@@ -204,65 +208,50 @@ lnext:
 				 * to fetch the updated tuple instead, but doing so would
 				 * require changing heap_update and heap_delete to not
 				 * complain about updating "invisible" tuples, which seems
-				 * pretty scary (heap_lock_tuple will not complain, but few
-				 * callers expect HeapTupleInvisible, and we're not one of
-				 * them).  So for now, treat the tuple as deleted and do not
-				 * process.
+				 * pretty scary (table_lock_tuple will not complain, but few
+				 * callers expect TM_Invisible, and we're not one of them). So
+				 * for now, treat the tuple as deleted and do not process.
 				 */
 				goto lnext;
 
-			case HeapTupleMayBeUpdated:
-				/* got the lock successfully */
+			case TM_Ok:
+
+				/*
+				 * Got the lock successfully, the locked tuple saved in
+				 * markSlot for, if needed, EvalPlanQual testing below.
+				 */
+				if (tmfd.traversed)
+					epq_needed = true;
 				break;
 
-			case HeapTupleUpdated:
+			case TM_Updated:
 				if (IsolationUsesXactSnapshot())
 					ereport(ERROR,
 							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
 							 errmsg("could not serialize access due to concurrent update")));
-				if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
+				elog(ERROR, "unexpected table_lock_tuple status: %u",
+					 test);
+				break;
+
+			case TM_Deleted:
+				if (IsolationUsesXactSnapshot())
 					ereport(ERROR,
 							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-							 errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
-
-				if (ItemPointerEquals(&hufd.ctid, &tuple.t_self))
-				{
-					/* Tuple was deleted, so don't return it */
-					goto lnext;
-				}
-
-				/* updated, so fetch and lock the updated version */
-				if (!EvalPlanQualFetch(estate, erm->relation,
-									   lockmode, erm->waitPolicy,
-									   &hufd.ctid, hufd.xmax,
-									   markSlot))
-				{
-					/*
-					 * Tuple was deleted; or it's locked and we're under SKIP
-					 * LOCKED policy, so don't return it
-					 */
-					goto lnext;
-				}
-				/* remember the actually locked tuple's TID */
-				tuple.t_self = markSlot->tts_tid;
-
-				/* Remember we need to do EPQ testing */
-				epq_needed = true;
-
-				/* Continue loop until we have all target tuples */
-				break;
+							 errmsg("could not serialize access due to concurrent update")));
+				/* tuple was deleted so don't return it */
+				goto lnext;
 
-			case HeapTupleInvisible:
+			case TM_Invisible:
 				elog(ERROR, "attempted to lock invisible tuple");
 				break;
 
 			default:
-				elog(ERROR, "unrecognized heap_lock_tuple status: %u",
+				elog(ERROR, "unrecognized table_lock_tuple status: %u",
 					 test);
 		}
 
 		/* Remember locked tuple's TID for EPQ testing and WHERE CURRENT OF */
-		erm->curCtid = tuple.t_self;
+		erm->curCtid = tid;
 	}
 
 	/*
@@ -271,49 +260,6 @@ lnext:
 	if (epq_needed)
 	{
 		/*
-		 * Fetch a copy of any rows that were successfully locked without any
-		 * update having occurred.  (We do this in a separate pass so as to
-		 * avoid overhead in the common case where there are no concurrent
-		 * updates.)  Make sure any inactive child rels have NULL test tuples
-		 * in EPQ.
-		 */
-		foreach(lc, node->lr_arowMarks)
-		{
-			ExecAuxRowMark *aerm = (ExecAuxRowMark *) lfirst(lc);
-			ExecRowMark *erm = aerm->rowmark;
-			TupleTableSlot *markSlot;
-			HeapTupleData tuple;
-			Buffer buffer;
-
-			markSlot = EvalPlanQualSlot(&node->lr_epqstate, erm->relation, erm->rti);
-
-			/* skip non-active child tables, but clear their test tuples */
-			if (!erm->ermActive)
-			{
-				Assert(erm->rti != erm->prti);	/* check it's child table */
-				ExecClearTuple(markSlot);
-				continue;
-			}
-
-			/* was tuple updated and fetched above? */
-			if (!TupIsNull(markSlot))
-				continue;
-
-			/* foreign tables should have been fetched above */
-			Assert(erm->relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE);
-			Assert(ItemPointerIsValid(&(erm->curCtid)));
-
-			/* okay, fetch the tuple */
-			tuple.t_self = erm->curCtid;
-			if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
-							false, NULL))
-				elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
-			ExecStorePinnedBufferHeapTuple(&tuple, markSlot, buffer);
-			ExecMaterializeSlot(markSlot);
-			/* successful, use tuple in slot */
-		}
-
-		/*
 		 * Now fetch any non-locked source rows --- the EPQ logic knows how to
 		 * do that.
 		 */
author	Andres Freund <andres@anarazel.de>	2019-03-23 19:55:57 -0700
committer	Andres Freund <andres@anarazel.de>	2019-03-23 19:55:57 -0700
commit	5db6df0c0117ff2a4e0cd87594d2db408cd5022f (patch)
tree	7b06b96b6f8c1b7e4cdfb602af357f81e21f23b1 /src/backend/executor/nodeLockRows.c
parent	f778e537a0d02d5e05016da3e6f4068914101dee (diff)
download	postgresql-5db6df0c0117ff2a4e0cd87594d2db408cd5022f.tar.gz postgresql-5db6df0c0117ff2a4e0cd87594d2db408cd5022f.zip