tableam: sample scan.

This moves sample scan support to below tableam. It's not optional as there is, in contrast to e.g. bitmap heap scans, no alternative way to perform tablesample queries. If an AM can't deal with the block based API, it will have to throw an ERROR. The tableam callbacks for this are block based, but given the current TsmRoutine interface, that seems to be required. The new interface doesn't require TsmRoutines to perform visibility checks anymore - that requires the TsmRoutine to know details about the AM, which we want to avoid. To continue to allow taking the returned number of tuples account SampleScanState now has a donetuples field (which previously e.g. existed in SystemRowsSamplerData), which is only incremented after the visibility check succeeds. Author: Andres Freund Discussion: https://postgr.es/m/20180703070645.wchpu5muyto5n647@alap3.anarazel.de
author: Andres Freund <andres@anarazel.de> 2019-03-30 20:18:53 -0700
committer: Andres Freund <andres@anarazel.de> 2019-03-31 18:37:57 -0700
commit: 73c954d24896aeb05de0f81d75e891a858e439e9 (patch)
tree: fd4ef1ff3c9b695b061ea3180ecda8fbea8c8b9b /src/backend/executor/nodeSamplescan.c
parent: 4bb50236eb561f4639e75a393a5a1c9b8681acfb (diff)
download: postgresql-73c954d24896aeb05de0f81d75e891a858e439e9.tar.gz
postgresql-73c954d24896aeb05de0f81d75e891a858e439e9.zip
1 files changed, 29 insertions, 220 deletions
diff --git a/src/backend/executor/nodeSamplescan.c b/src/backend/executor/nodeSamplescan.c
index 817e4ca41fc..14a0a6357d4 100644
--- a/src/backend/executor/nodeSamplescan.c
+++ b/src/backend/executor/nodeSamplescan.c
@@ -14,7 +14,6 @@
  */
 #include "postgres.h"
 
-#include "access/heapam.h"
 #include "access/relscan.h"
 #include "access/tableam.h"
 #include "access/tsmapi.h"
@@ -29,9 +28,7 @@
 
 static TupleTableSlot *SampleNext(SampleScanState *node);
 static void tablesample_init(SampleScanState *scanstate);
-static HeapTuple tablesample_getnext(SampleScanState *scanstate);
-static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset,
-				   HeapScanDesc scan);
+static TupleTableSlot *tablesample_getnext(SampleScanState *scanstate);
 
 /* ----------------------------------------------------------------
  *						Scan Support
@@ -47,10 +44,6 @@ static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset,
 static TupleTableSlot *
 SampleNext(SampleScanState *node)
 {
-	HeapTuple	tuple;
-	TupleTableSlot *slot;
-	HeapScanDesc hscan;
-
 	/*
 	 * if this is first call within a scan, initialize
 	 */
@@ -60,19 +53,7 @@ SampleNext(SampleScanState *node)
 	/*
 	 * get the next tuple, and store it in our result slot
 	 */
-	tuple = tablesample_getnext(node);
-
-	slot = node->ss.ss_ScanTupleSlot;
-	hscan = (HeapScanDesc) node->ss.ss_currentScanDesc;
-
-	if (tuple)
-		ExecStoreBufferHeapTuple(tuple, /* tuple to store */
-								 slot,	/* slot to store in */
-								 hscan->rs_cbuf); /* tuple's buffer */
-	else
-		ExecClearTuple(slot);
-
-	return slot;
+	return tablesample_getnext(node);
 }
 
 /*
@@ -237,6 +218,9 @@ ExecReScanSampleScan(SampleScanState *node)
 {
 	/* Remember we need to do BeginSampleScan again (if we did it at all) */
 	node->begun = false;
+	node->done = false;
+	node->haveblock = false;
+	node->donetuples = 0;
 
 	ExecScanReScan(&node->ss);
 }
@@ -258,6 +242,7 @@ tablesample_init(SampleScanState *scanstate)
 	int			i;
 	ListCell   *arg;
 
+	scanstate->donetuples = 0;
 	params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum));
 
 	i = 0;
@@ -345,225 +330,49 @@ tablesample_init(SampleScanState *scanstate)
 
 /*
  * Get next tuple from TABLESAMPLE method.
- *
- * Note: an awful lot of this is copied-and-pasted from heapam.c.  It would
- * perhaps be better to refactor to share more code.
  */
-static HeapTuple
+static TupleTableSlot *
 tablesample_getnext(SampleScanState *scanstate)
 {
-	TsmRoutine *tsm = scanstate->tsmroutine;
 	TableScanDesc scan = scanstate->ss.ss_currentScanDesc;
-	HeapScanDesc hscan = (HeapScanDesc) scan;
-	HeapTuple	tuple = &(hscan->rs_ctup);
-	Snapshot	snapshot = scan->rs_snapshot;
-	bool		pagemode = scan->rs_pageatatime;
-	BlockNumber blockno;
-	Page		page;
-	bool		all_visible;
-	OffsetNumber maxoffset;
-
-	if (!hscan->rs_inited)
-	{
-		/*
-		 * return null immediately if relation is empty
-		 */
-		if (hscan->rs_nblocks == 0)
-		{
-			Assert(!BufferIsValid(hscan->rs_cbuf));
-			tuple->t_data = NULL;
-			return NULL;
-		}
-		if (tsm->NextSampleBlock)
-		{
-			blockno = tsm->NextSampleBlock(scanstate);
-			if (!BlockNumberIsValid(blockno))
-			{
-				tuple->t_data = NULL;
-				return NULL;
-			}
-		}
-		else
-			blockno = hscan->rs_startblock;
-		Assert(blockno < hscan->rs_nblocks);
-		heapgetpage(scan, blockno);
-		hscan->rs_inited = true;
-	}
-	else
-	{
-		/* continue from previously returned page/tuple */
-		blockno = hscan->rs_cblock;	/* current page */
-	}
+	TupleTableSlot *slot = scanstate->ss.ss_ScanTupleSlot;
 
-	/*
-	 * When not using pagemode, we must lock the buffer during tuple
-	 * visibility checks.
-	 */
-	if (!pagemode)
-		LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
+	ExecClearTuple(slot);
 
-	page = (Page) BufferGetPage(hscan->rs_cbuf);
-	all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
-	maxoffset = PageGetMaxOffsetNumber(page);
+	if (scanstate->done)
+		return NULL;
 
 	for (;;)
 	{
-		OffsetNumber tupoffset;
-		bool		finished;
-
-		CHECK_FOR_INTERRUPTS();
-
-		/* Ask the tablesample method which tuples to check on this page. */
-		tupoffset = tsm->NextSampleTuple(scanstate,
-										 blockno,
-										 maxoffset);
-
-		if (OffsetNumberIsValid(tupoffset))
+		if (!scanstate->haveblock)
 		{
-			ItemId		itemid;
-			bool		visible;
-
-			/* Skip invalid tuple pointers. */
-			itemid = PageGetItemId(page, tupoffset);
-			if (!ItemIdIsNormal(itemid))
-				continue;
-
-			tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
-			tuple->t_len = ItemIdGetLength(itemid);
-			ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
-
-			if (all_visible)
-				visible = true;
-			else
-				visible = SampleTupleVisible(tuple, tupoffset, hscan);
-
-			/* in pagemode, heapgetpage did this for us */
-			if (!pagemode)
-				CheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
-												hscan->rs_cbuf, snapshot);
-
-			if (visible)
-			{
-				/* Found visible tuple, return it. */
-				if (!pagemode)
-					LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
-				break;
-			}
-			else
+			if (!table_scan_sample_next_block(scan, scanstate))
 			{
-				/* Try next tuple from same page. */
-				continue;
-			}
-		}
+				scanstate->haveblock = false;
+				scanstate->done = true;
 
-		/*
-		 * if we get here, it means we've exhausted the items on this page and
-		 * it's time to move to the next.
-		 */
-		if (!pagemode)
-			LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+				/* exhausted relation */
+				return NULL;
+			}
 
-		if (tsm->NextSampleBlock)
-		{
-			blockno = tsm->NextSampleBlock(scanstate);
-			Assert(!scan->rs_syncscan);
-			finished = !BlockNumberIsValid(blockno);
+			scanstate->haveblock = true;
 		}
-		else
-		{
-			/* Without NextSampleBlock, just do a plain forward seqscan. */
-			blockno++;
-			if (blockno >= hscan->rs_nblocks)
-				blockno = 0;
 
+		if (!table_scan_sample_next_tuple(scan, scanstate, slot))
+		{
 			/*
-			 * Report our new scan position for synchronization purposes.
-			 *
-			 * Note: we do this before checking for end of scan so that the
-			 * final state of the position hint is back at the start of the
-			 * rel.  That's not strictly necessary, but otherwise when you run
-			 * the same query multiple times the starting position would shift
-			 * a little bit backwards on every invocation, which is confusing.
-			 * We don't guarantee any specific ordering in general, though.
+			 * If we get here, it means we've exhausted the items on this page
+			 * and it's time to move to the next.
 			 */
-			if (scan->rs_syncscan)
-				ss_report_location(scan->rs_rd, blockno);
-
-			finished = (blockno == hscan->rs_startblock);
+			scanstate->haveblock = false;
+			continue;
 		}
 
-		/*
-		 * Reached end of scan?
-		 */
-		if (finished)
-		{
-			if (BufferIsValid(hscan->rs_cbuf))
-				ReleaseBuffer(hscan->rs_cbuf);
-			hscan->rs_cbuf = InvalidBuffer;
-			hscan->rs_cblock = InvalidBlockNumber;
-			tuple->t_data = NULL;
-			hscan->rs_inited = false;
-			return NULL;
-		}
-
-		Assert(blockno < hscan->rs_nblocks);
-		heapgetpage(scan, blockno);
-
-		/* Re-establish state for new page */
-		if (!pagemode)
-			LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
-
-		page = (Page) BufferGetPage(hscan->rs_cbuf);
-		all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
-		maxoffset = PageGetMaxOffsetNumber(page);
+		/* Found visible tuple, return it. */
+		break;
 	}
 
-	/* Count successfully-fetched tuples as heap fetches */
-	pgstat_count_heap_getnext(scan->rs_rd);
-
-	return &(hscan->rs_ctup);
-}
+	scanstate->donetuples++;
 
-/*
- * Check visibility of the tuple.
- */
-static bool
-SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
-{
-	if (scan->rs_base.rs_pageatatime)
-	{
-		/*
-		 * In pageatatime mode, heapgetpage() already did visibility checks,
-		 * so just look at the info it left in rs_vistuples[].
-		 *
-		 * We use a binary search over the known-sorted array.  Note: we could
-		 * save some effort if we insisted that NextSampleTuple select tuples
-		 * in increasing order, but it's not clear that there would be enough
-		 * gain to justify the restriction.
-		 */
-		int			start = 0,
-					end = scan->rs_ntuples - 1;
-
-		while (start <= end)
-		{
-			int			mid = (start + end) / 2;
-			OffsetNumber curoffset = scan->rs_vistuples[mid];
-
-			if (tupoffset == curoffset)
-				return true;
-			else if (tupoffset < curoffset)
-				end = mid - 1;
-			else
-				start = mid + 1;
-		}
-
-		return false;
-	}
-	else
-	{
-		/* Otherwise, we have to check the tuple individually. */
-		return HeapTupleSatisfiesVisibility(tuple,
-											scan->rs_base.rs_snapshot,
-											scan->rs_cbuf);
-	}
+	return slot;
 }
author	Andres Freund <andres@anarazel.de>	2019-03-30 20:18:53 -0700
committer	Andres Freund <andres@anarazel.de>	2019-03-31 18:37:57 -0700
commit	73c954d24896aeb05de0f81d75e891a858e439e9 (patch)
tree	fd4ef1ff3c9b695b061ea3180ecda8fbea8c8b9b /src/backend/executor/nodeSamplescan.c
parent	4bb50236eb561f4639e75a393a5a1c9b8681acfb (diff)
download	postgresql-73c954d24896aeb05de0f81d75e891a858e439e9.tar.gz postgresql-73c954d24896aeb05de0f81d75e891a858e439e9.zip