aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/tablesample
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/tablesample')
-rw-r--r--src/backend/access/tablesample/Makefile17
-rw-r--r--src/backend/access/tablesample/bernoulli.c235
-rw-r--r--src/backend/access/tablesample/system.c186
-rw-r--r--src/backend/access/tablesample/tablesample.c368
4 files changed, 806 insertions, 0 deletions
diff --git a/src/backend/access/tablesample/Makefile b/src/backend/access/tablesample/Makefile
new file mode 100644
index 00000000000..46eeb59f9c4
--- /dev/null
+++ b/src/backend/access/tablesample/Makefile
@@ -0,0 +1,17 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+# Makefile for utils/tablesample
+#
+# IDENTIFICATION
+# src/backend/utils/tablesample/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/backend/access/tablesample
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS = tablesample.o system.o bernoulli.o
+
+include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/tablesample/bernoulli.c b/src/backend/access/tablesample/bernoulli.c
new file mode 100644
index 00000000000..c91f3f593e5
--- /dev/null
+++ b/src/backend/access/tablesample/bernoulli.c
@@ -0,0 +1,235 @@
+/*-------------------------------------------------------------------------
+ *
+ * bernoulli.c
+ * interface routines for BERNOULLI tablesample method
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/tablesample/bernoulli.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "fmgr.h"
+
+#include "access/tablesample.h"
+#include "access/relscan.h"
+#include "nodes/execnodes.h"
+#include "nodes/relation.h"
+#include "optimizer/clauses.h"
+#include "storage/bufmgr.h"
+#include "utils/sampling.h"
+
+
+/* tsdesc */
+typedef struct
+{
+ uint32 seed; /* random seed */
+ BlockNumber startblock; /* starting block, we use ths for syncscan support */
+ BlockNumber nblocks; /* number of blocks */
+ BlockNumber blockno; /* current block */
+ float4 probability; /* probabilty that tuple will be returned (0.0-1.0) */
+ OffsetNumber lt; /* last tuple returned from current block */
+ SamplerRandomState randstate; /* random generator tsdesc */
+} BernoulliSamplerData;
+
+/*
+ * Initialize the state.
+ */
+Datum
+tsm_bernoulli_init(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ uint32 seed = PG_GETARG_UINT32(1);
+ float4 percent = PG_ARGISNULL(2) ? -1 : PG_GETARG_FLOAT4(2);
+ HeapScanDesc scan = tsdesc->heapScan;
+ BernoulliSamplerData *sampler;
+
+ if (percent < 0 || percent > 100)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("invalid sample size"),
+ errhint("Sample size must be numeric value between 0 and 100 (inclusive).")));
+
+ sampler = palloc0(sizeof(BernoulliSamplerData));
+
+ /* Remember initial values for reinit */
+ sampler->seed = seed;
+ sampler->startblock = scan->rs_startblock;
+ sampler->nblocks = scan->rs_nblocks;
+ sampler->blockno = InvalidBlockNumber;
+ sampler->probability = percent / 100;
+ sampler->lt = InvalidOffsetNumber;
+ sampler_random_init_state(sampler->seed, sampler->randstate);
+
+ tsdesc->tsmdata = (void *) sampler;
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Get next block number to read or InvalidBlockNumber if we are at the
+ * end of the relation.
+ */
+Datum
+tsm_bernoulli_nextblock(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ BernoulliSamplerData *sampler =
+ (BernoulliSamplerData *) tsdesc->tsmdata;
+
+ /*
+ * Bernoulli sampling scans all blocks on the table and supports
+ * syncscan so loop from startblock to startblock instead of
+ * from 0 to nblocks.
+ */
+ if (sampler->blockno == InvalidBlockNumber)
+ sampler->blockno = sampler->startblock;
+ else
+ {
+ sampler->blockno++;
+
+ if (sampler->blockno >= sampler->nblocks)
+ sampler->blockno = 0;
+
+ if (sampler->blockno == sampler->startblock)
+ PG_RETURN_UINT32(InvalidBlockNumber);
+ }
+
+ PG_RETURN_UINT32(sampler->blockno);
+}
+
+/*
+ * Get next tuple from current block.
+ *
+ * This method implements the main logic in bernoulli sampling.
+ * The algorithm simply generates new random number (in 0.0-1.0 range) and if
+ * it falls within user specified probability (in the same range) return the
+ * tuple offset.
+ *
+ * It is ok here to return tuple offset without knowing if tuple is visible
+ * and not check it via examinetuple. The reason for that is that we do the
+ * coinflip (random number generation) for every tuple in the table. Since all
+ * tuples have same probability of being returned the visible and invisible
+ * tuples will be returned in same ratio as they have in the actual table.
+ * This means that there is no skew towards either visible or invisible tuples
+ * and the number returned visible tuples to from the executor node is the
+ * fraction of visible tuples which was specified in input.
+ *
+ * This is faster than doing the coinflip in the examinetuple because we don't
+ * have to do visibility checks on uninteresting tuples.
+ *
+ * If we reach end of the block return InvalidOffsetNumber which tells
+ * SampleScan to go to next block.
+ */
+Datum
+tsm_bernoulli_nexttuple(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ OffsetNumber maxoffset = PG_GETARG_UINT16(2);
+ BernoulliSamplerData *sampler =
+ (BernoulliSamplerData *) tsdesc->tsmdata;
+ OffsetNumber tupoffset = sampler->lt;
+ float4 probability = sampler->probability;
+
+ if (tupoffset == InvalidOffsetNumber)
+ tupoffset = FirstOffsetNumber;
+ else
+ tupoffset++;
+
+ /*
+ * Loop over tuple offsets until the random generator returns value that
+ * is within the probability of returning the tuple or until we reach
+ * end of the block.
+ *
+ * (This is our implementation of bernoulli trial)
+ */
+ while (sampler_random_fract(sampler->randstate) > probability)
+ {
+ tupoffset++;
+
+ if (tupoffset > maxoffset)
+ break;
+ }
+
+ if (tupoffset > maxoffset)
+ /* Tell SampleScan that we want next block. */
+ tupoffset = InvalidOffsetNumber;
+
+ sampler->lt = tupoffset;
+
+ PG_RETURN_UINT16(tupoffset);
+}
+
+/*
+ * Cleanup method.
+ */
+Datum
+tsm_bernoulli_end(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+
+ pfree(tsdesc->tsmdata);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Reset tsdesc (called by ReScan).
+ */
+Datum
+tsm_bernoulli_reset(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ BernoulliSamplerData *sampler =
+ (BernoulliSamplerData *) tsdesc->tsmdata;
+
+ sampler->blockno = InvalidBlockNumber;
+ sampler->lt = InvalidOffsetNumber;
+ sampler_random_init_state(sampler->seed, sampler->randstate);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Costing function.
+ */
+Datum
+tsm_bernoulli_cost(PG_FUNCTION_ARGS)
+{
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ Path *path = (Path *) PG_GETARG_POINTER(1);
+ RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2);
+ List *args = (List *) PG_GETARG_POINTER(3);
+ BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4);
+ double *tuples = (double *) PG_GETARG_POINTER(5);
+ Node *pctnode;
+ float4 samplesize;
+
+ *pages = baserel->pages;
+
+ pctnode = linitial(args);
+ pctnode = estimate_expression_value(root, pctnode);
+
+ if (IsA(pctnode, RelabelType))
+ pctnode = (Node *) ((RelabelType *) pctnode)->arg;
+
+ if (IsA(pctnode, Const))
+ {
+ samplesize = DatumGetFloat4(((Const *) pctnode)->constvalue);
+ samplesize /= 100.0;
+ }
+ else
+ {
+ /* Default samplesize if the estimation didn't return Const. */
+ samplesize = 0.1f;
+ }
+
+ *tuples = path->rows * samplesize;
+ path->rows = *tuples;
+
+ PG_RETURN_VOID();
+}
diff --git a/src/backend/access/tablesample/system.c b/src/backend/access/tablesample/system.c
new file mode 100644
index 00000000000..1412e511faf
--- /dev/null
+++ b/src/backend/access/tablesample/system.c
@@ -0,0 +1,186 @@
+/*-------------------------------------------------------------------------
+ *
+ * system.c
+ * interface routines for system tablesample method
+ *
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/tablesample/system.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "fmgr.h"
+
+#include "access/tablesample.h"
+#include "access/relscan.h"
+#include "nodes/execnodes.h"
+#include "nodes/relation.h"
+#include "optimizer/clauses.h"
+#include "storage/bufmgr.h"
+#include "utils/sampling.h"
+
+
+/*
+ * State
+ */
+typedef struct
+{
+ BlockSamplerData bs;
+ uint32 seed; /* random seed */
+ BlockNumber nblocks; /* number of block in relation */
+ int samplesize; /* number of blocks to return */
+ OffsetNumber lt; /* last tuple returned from current block */
+} SystemSamplerData;
+
+
+/*
+ * Initializes the state.
+ */
+Datum
+tsm_system_init(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ uint32 seed = PG_GETARG_UINT32(1);
+ float4 percent = PG_ARGISNULL(2) ? -1 : PG_GETARG_FLOAT4(2);
+ HeapScanDesc scan = tsdesc->heapScan;
+ SystemSamplerData *sampler;
+
+ if (percent < 0 || percent > 100)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("invalid sample size"),
+ errhint("Sample size must be numeric value between 0 and 100 (inclusive).")));
+
+ sampler = palloc0(sizeof(SystemSamplerData));
+
+ /* Remember initial values for reinit */
+ sampler->seed = seed;
+ sampler->nblocks = scan->rs_nblocks;
+ sampler->samplesize = 1 + (int) (sampler->nblocks * (percent / 100.0));
+ sampler->lt = InvalidOffsetNumber;
+
+ BlockSampler_Init(&sampler->bs, sampler->nblocks, sampler->samplesize,
+ sampler->seed);
+
+ tsdesc->tsmdata = (void *) sampler;
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Get next block number or InvalidBlockNumber when we're done.
+ *
+ * Uses the same logic as ANALYZE for picking the random blocks.
+ */
+Datum
+tsm_system_nextblock(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+ BlockNumber blockno;
+
+ if (!BlockSampler_HasMore(&sampler->bs))
+ PG_RETURN_UINT32(InvalidBlockNumber);
+
+ blockno = BlockSampler_Next(&sampler->bs);
+
+ PG_RETURN_UINT32(blockno);
+}
+
+/*
+ * Get next tuple offset in current block or InvalidOffsetNumber if we are done
+ * with this block.
+ */
+Datum
+tsm_system_nexttuple(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ OffsetNumber maxoffset = PG_GETARG_UINT16(2);
+ SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+ OffsetNumber tupoffset = sampler->lt;
+
+ if (tupoffset == InvalidOffsetNumber)
+ tupoffset = FirstOffsetNumber;
+ else
+ tupoffset++;
+
+ if (tupoffset > maxoffset)
+ tupoffset = InvalidOffsetNumber;
+
+ sampler->lt = tupoffset;
+
+ PG_RETURN_UINT16(tupoffset);
+}
+
+/*
+ * Cleanup method.
+ */
+Datum
+tsm_system_end(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+
+ pfree(tsdesc->tsmdata);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Reset state (called by ReScan).
+ */
+Datum
+tsm_system_reset(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+
+ sampler->lt = InvalidOffsetNumber;
+ BlockSampler_Init(&sampler->bs, sampler->nblocks, sampler->samplesize,
+ sampler->seed);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Costing function.
+ */
+Datum
+tsm_system_cost(PG_FUNCTION_ARGS)
+{
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ Path *path = (Path *) PG_GETARG_POINTER(1);
+ RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2);
+ List *args = (List *) PG_GETARG_POINTER(3);
+ BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4);
+ double *tuples = (double *) PG_GETARG_POINTER(5);
+ Node *pctnode;
+ float4 samplesize;
+
+ pctnode = linitial(args);
+ pctnode = estimate_expression_value(root, pctnode);
+
+ if (IsA(pctnode, RelabelType))
+ pctnode = (Node *) ((RelabelType *) pctnode)->arg;
+
+ if (IsA(pctnode, Const))
+ {
+ samplesize = DatumGetFloat4(((Const *) pctnode)->constvalue);
+ samplesize /= 100.0;
+ }
+ else
+ {
+ /* Default samplesize if the estimation didn't return Const. */
+ samplesize = 0.1f;
+ }
+
+ *pages = baserel->pages * samplesize;
+ *tuples = path->rows * samplesize;
+ path->rows = *tuples;
+
+ PG_RETURN_VOID();
+}
diff --git a/src/backend/access/tablesample/tablesample.c b/src/backend/access/tablesample/tablesample.c
new file mode 100644
index 00000000000..ef55d062e75
--- /dev/null
+++ b/src/backend/access/tablesample/tablesample.c
@@ -0,0 +1,368 @@
+/*-------------------------------------------------------------------------
+ *
+ * tablesample.c
+ * TABLESAMPLE internal API
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/access/tablesample/tablesample.c
+ *
+ * TABLESAMPLE is the SQL standard clause for sampling the relations.
+ *
+ * The API is interface between the Executor and the TABLESAMPLE Methods.
+ *
+ * TABLESAMPLE Methods are implementations of actual sampling algorithms which
+ * can be used for returning a sample of the source relation.
+ * Methods don't read the table directly but are asked for block number and
+ * tuple offset which they want to examine (or return) and the tablesample
+ * interface implemented here does the reading for them.
+ *
+ * We currently only support sampling of the physical relations, but in the
+ * future we might extend the API to support subqueries as well.
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/tablesample.h"
+
+#include "catalog/pg_tablesample_method.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "storage/bufmgr.h"
+#include "storage/predicate.h"
+#include "utils/rel.h"
+#include "utils/tqual.h"
+
+
+static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan);
+
+
+/*
+ * Initialize the TABLESAMPLE Descriptor and the TABLESAMPLE Method.
+ */
+TableSampleDesc *
+tablesample_init(SampleScanState *scanstate, TableSampleClause *tablesample)
+{
+ FunctionCallInfoData fcinfo;
+ int i;
+ List *args = tablesample->args;
+ ListCell *arg;
+ ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
+ TableSampleDesc *tsdesc = (TableSampleDesc *) palloc0(sizeof(TableSampleDesc));
+
+ /* Load functions */
+ fmgr_info(tablesample->tsminit, &(tsdesc->tsminit));
+ fmgr_info(tablesample->tsmnextblock, &(tsdesc->tsmnextblock));
+ fmgr_info(tablesample->tsmnexttuple, &(tsdesc->tsmnexttuple));
+ if (OidIsValid(tablesample->tsmexaminetuple))
+ fmgr_info(tablesample->tsmexaminetuple, &(tsdesc->tsmexaminetuple));
+ else
+ tsdesc->tsmexaminetuple.fn_oid = InvalidOid;
+ fmgr_info(tablesample->tsmreset, &(tsdesc->tsmreset));
+ fmgr_info(tablesample->tsmend, &(tsdesc->tsmend));
+
+ InitFunctionCallInfoData(fcinfo, &tsdesc->tsminit,
+ list_length(args) + 2,
+ InvalidOid, NULL, NULL);
+
+ tsdesc->tupDesc = scanstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor;
+ tsdesc->heapScan = scanstate->ss.ss_currentScanDesc;
+
+ /* First argument for init function is always TableSampleDesc */
+ fcinfo.arg[0] = PointerGetDatum(tsdesc);
+ fcinfo.argnull[0] = false;
+
+ /*
+ * Second arg for init function is always REPEATABLE
+ * When tablesample->repeatable is NULL then REPEATABLE clause was not
+ * specified.
+ * When specified, the expression cannot evaluate to NULL.
+ */
+ if (tablesample->repeatable)
+ {
+ ExprState *argstate = ExecInitExpr((Expr *) tablesample->repeatable,
+ (PlanState *) scanstate);
+ fcinfo.arg[1] = ExecEvalExpr(argstate, econtext,
+ &fcinfo.argnull[1], NULL);
+ if (fcinfo.argnull[1])
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("REPEATABLE clause must be NOT NULL numeric value")));
+ }
+ else
+ {
+ fcinfo.arg[1] = UInt32GetDatum(random());
+ fcinfo.argnull[1] = false;
+ }
+
+ /* Rest of the arguments come from user. */
+ i = 2;
+ foreach(arg, args)
+ {
+ Expr *argexpr = (Expr *) lfirst(arg);
+ ExprState *argstate = ExecInitExpr(argexpr, (PlanState *) scanstate);
+
+ if (argstate == NULL)
+ {
+ fcinfo.argnull[i] = true;
+ fcinfo.arg[i] = (Datum) 0;;
+ }
+
+ fcinfo.arg[i] = ExecEvalExpr(argstate, econtext,
+ &fcinfo.argnull[i], NULL);
+ i++;
+ }
+ Assert(i == fcinfo.nargs);
+
+ (void) FunctionCallInvoke(&fcinfo);
+
+ return tsdesc;
+}
+
+/*
+ * Get next tuple from TABLESAMPLE Method.
+ */
+HeapTuple
+tablesample_getnext(TableSampleDesc *desc)
+{
+ HeapScanDesc scan = desc->heapScan;
+ HeapTuple tuple = &(scan->rs_ctup);
+ bool pagemode = scan->rs_pageatatime;
+ BlockNumber blockno;
+ Page page;
+ bool page_all_visible;
+ ItemId itemid;
+ OffsetNumber tupoffset,
+ maxoffset;
+
+ if (!scan->rs_inited)
+ {
+ /*
+ * return null immediately if relation is empty
+ */
+ if (scan->rs_nblocks == 0)
+ {
+ Assert(!BufferIsValid(scan->rs_cbuf));
+ tuple->t_data = NULL;
+ return NULL;
+ }
+ blockno = DatumGetInt32(FunctionCall1(&desc->tsmnextblock,
+ PointerGetDatum(desc)));
+ if (!BlockNumberIsValid(blockno))
+ {
+ tuple->t_data = NULL;
+ return NULL;
+ }
+
+ heapgetpage(scan, blockno);
+ scan->rs_inited = true;
+ }
+ else
+ {
+ /* continue from previously returned page/tuple */
+ blockno = scan->rs_cblock; /* current page */
+ }
+
+ /*
+ * When pagemode is disabled, the scan will do visibility checks for each
+ * tuple it finds so the buffer needs to be locked.
+ */
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+ page = (Page) BufferGetPage(scan->rs_cbuf);
+ page_all_visible = PageIsAllVisible(page);
+ maxoffset = PageGetMaxOffsetNumber(page);
+
+ for (;;)
+ {
+ CHECK_FOR_INTERRUPTS();
+
+ tupoffset = DatumGetUInt16(FunctionCall3(&desc->tsmnexttuple,
+ PointerGetDatum(desc),
+ UInt32GetDatum(blockno),
+ UInt16GetDatum(maxoffset)));
+
+ if (OffsetNumberIsValid(tupoffset))
+ {
+ bool visible;
+ bool found;
+
+ /* Skip invalid tuple pointers. */
+ itemid = PageGetItemId(page, tupoffset);
+ if (!ItemIdIsNormal(itemid))
+ continue;
+
+ tuple->t_data = (HeapTupleHeader) PageGetItem((Page) page, itemid);
+ tuple->t_len = ItemIdGetLength(itemid);
+ ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
+
+ if (page_all_visible)
+ visible = true;
+ else
+ visible = SampleTupleVisible(tuple, tupoffset, scan);
+
+ /*
+ * Let the sampling method examine the actual tuple and decide if we
+ * should return it.
+ *
+ * Note that we let it examine even invisible tuples for
+ * statistical purposes, but not return them since user should
+ * never see invisible tuples.
+ */
+ if (OidIsValid(desc->tsmexaminetuple.fn_oid))
+ {
+ found = DatumGetBool(FunctionCall4(&desc->tsmexaminetuple,
+ PointerGetDatum(desc),
+ UInt32GetDatum(blockno),
+ PointerGetDatum(tuple),
+ BoolGetDatum(visible)));
+ /* Should not happen if sampling method is well written. */
+ if (found && !visible)
+ elog(ERROR, "Sampling method wanted to return invisible tuple");
+ }
+ else
+ found = visible;
+
+ /* Found visible tuple, return it. */
+ if (found)
+ {
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+ break;
+ }
+ else
+ {
+ /* Try next tuple from same page. */
+ continue;
+ }
+ }
+
+
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+
+ blockno = DatumGetInt32(FunctionCall1(&desc->tsmnextblock,
+ PointerGetDatum(desc)));
+
+ /*
+ * Report our new scan position for synchronization purposes. We
+ * don't do that when moving backwards, however. That would just
+ * mess up any other forward-moving scanners.
+ *
+ * Note: we do this before checking for end of scan so that the
+ * final state of the position hint is back at the start of the
+ * rel. That's not strictly necessary, but otherwise when you run
+ * the same query multiple times the starting position would shift
+ * a little bit backwards on every invocation, which is confusing.
+ * We don't guarantee any specific ordering in general, though.
+ */
+ if (scan->rs_syncscan)
+ ss_report_location(scan->rs_rd, BlockNumberIsValid(blockno) ?
+ blockno : scan->rs_startblock);
+
+ /*
+ * Reached end of scan.
+ */
+ if (!BlockNumberIsValid(blockno))
+ {
+ if (BufferIsValid(scan->rs_cbuf))
+ ReleaseBuffer(scan->rs_cbuf);
+ scan->rs_cbuf = InvalidBuffer;
+ scan->rs_cblock = InvalidBlockNumber;
+ tuple->t_data = NULL;
+ scan->rs_inited = false;
+ return NULL;
+ }
+
+ heapgetpage(scan, blockno);
+
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+ page = (Page) BufferGetPage(scan->rs_cbuf);
+ page_all_visible = PageIsAllVisible(page);
+ maxoffset = PageGetMaxOffsetNumber(page);
+ }
+
+ pgstat_count_heap_getnext(scan->rs_rd);
+
+ return &(scan->rs_ctup);
+}
+
+/*
+ * Reset the sampling to starting state
+ */
+void
+tablesample_reset(TableSampleDesc *desc)
+{
+ (void) FunctionCall1(&desc->tsmreset, PointerGetDatum(desc));
+}
+
+/*
+ * Signal the sampling method that the scan has finished.
+ */
+void
+tablesample_end(TableSampleDesc *desc)
+{
+ (void) FunctionCall1(&desc->tsmend, PointerGetDatum(desc));
+}
+
+/*
+ * Check visibility of the tuple.
+ */
+static bool
+SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
+{
+ /*
+ * If this scan is reading whole pages at a time, there is already
+ * visibility info present in rs_vistuples so we can just search it
+ * for the tupoffset.
+ */
+ if (scan->rs_pageatatime)
+ {
+ int start = 0,
+ end = scan->rs_ntuples - 1;
+
+ /*
+ * Do the binary search over rs_vistuples, it's already sorted by
+ * OffsetNumber so we don't need to do any sorting ourselves here.
+ *
+ * We could use bsearch() here but it's slower for integers because
+ * of the function call overhead and because it needs boiler plate code
+ * it would not save us anything code-wise anyway.
+ */
+ while (start <= end)
+ {
+ int mid = start + (end - start) / 2;
+ OffsetNumber curoffset = scan->rs_vistuples[mid];
+
+ if (curoffset == tupoffset)
+ return true;
+ else if (curoffset > tupoffset)
+ end = mid - 1;
+ else
+ start = mid + 1;
+ }
+
+ return false;
+ }
+ else
+ {
+ /* No pagemode, we have to check the tuple itself. */
+ Snapshot snapshot = scan->rs_snapshot;
+ Buffer buffer = scan->rs_cbuf;
+
+ bool visible = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
+
+ CheckForSerializableConflictOut(visible, scan->rs_rd, tuple, buffer,
+ snapshot);
+
+ return visible;
+ }
+}