aboutsummaryrefslogtreecommitdiff
path: root/src/backend/executor/nodeSamplescan.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/executor/nodeSamplescan.c')
-rw-r--r--src/backend/executor/nodeSamplescan.c437
1 files changed, 391 insertions, 46 deletions
diff --git a/src/backend/executor/nodeSamplescan.c b/src/backend/executor/nodeSamplescan.c
index 4c1c5237b7d..dbe84b0baa8 100644
--- a/src/backend/executor/nodeSamplescan.c
+++ b/src/backend/executor/nodeSamplescan.c
@@ -3,7 +3,7 @@
* nodeSamplescan.c
* Support routines for sample scans of relations (table sampling).
*
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
@@ -14,22 +14,23 @@
*/
#include "postgres.h"
-#include "access/tablesample.h"
+#include "access/hash.h"
+#include "access/relscan.h"
+#include "access/tsmapi.h"
#include "executor/executor.h"
#include "executor/nodeSamplescan.h"
#include "miscadmin.h"
-#include "parser/parsetree.h"
#include "pgstat.h"
-#include "storage/bufmgr.h"
#include "storage/predicate.h"
#include "utils/rel.h"
-#include "utils/syscache.h"
#include "utils/tqual.h"
-static void InitScanRelation(SampleScanState *node, EState *estate,
- int eflags, TableSampleClause *tablesample);
+static void InitScanRelation(SampleScanState *node, EState *estate, int eflags);
static TupleTableSlot *SampleNext(SampleScanState *node);
-
+static void tablesample_init(SampleScanState *scanstate);
+static HeapTuple tablesample_getnext(SampleScanState *scanstate);
+static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset,
+ HeapScanDesc scan);
/* ----------------------------------------------------------------
* Scan Support
@@ -45,23 +46,26 @@ static TupleTableSlot *SampleNext(SampleScanState *node);
static TupleTableSlot *
SampleNext(SampleScanState *node)
{
- TupleTableSlot *slot;
- TableSampleDesc *tsdesc;
HeapTuple tuple;
+ TupleTableSlot *slot;
/*
- * get information from the scan state
+ * if this is first call within a scan, initialize
*/
- slot = node->ss.ss_ScanTupleSlot;
- tsdesc = node->tsdesc;
+ if (!node->begun)
+ tablesample_init(node);
+
+ /*
+ * get the next tuple, and store it in our result slot
+ */
+ tuple = tablesample_getnext(node);
- tuple = tablesample_getnext(tsdesc);
+ slot = node->ss.ss_ScanTupleSlot;
if (tuple)
ExecStoreTuple(tuple, /* tuple to store */
slot, /* slot to store in */
- tsdesc->heapScan->rs_cbuf, /* buffer associated
- * with this tuple */
+ node->ss.ss_currentScanDesc->rs_cbuf, /* tuple's buffer */
false); /* don't pfree this pointer */
else
ExecClearTuple(slot);
@@ -75,7 +79,10 @@ SampleNext(SampleScanState *node)
static bool
SampleRecheck(SampleScanState *node, TupleTableSlot *slot)
{
- /* No need to recheck for SampleScan */
+ /*
+ * No need to recheck for SampleScan, since like SeqScan we don't pass any
+ * checkable keys to heap_beginscan.
+ */
return true;
}
@@ -103,8 +110,7 @@ ExecSampleScan(SampleScanState *node)
* ----------------------------------------------------------------
*/
static void
-InitScanRelation(SampleScanState *node, EState *estate, int eflags,
- TableSampleClause *tablesample)
+InitScanRelation(SampleScanState *node, EState *estate, int eflags)
{
Relation currentRelation;
@@ -113,19 +119,13 @@ InitScanRelation(SampleScanState *node, EState *estate, int eflags,
* open that relation and acquire appropriate lock on it.
*/
currentRelation = ExecOpenScanRelation(estate,
- ((SampleScan *) node->ss.ps.plan)->scanrelid,
+ ((SampleScan *) node->ss.ps.plan)->scan.scanrelid,
eflags);
node->ss.ss_currentRelation = currentRelation;
- /*
- * Even though we aren't going to do a conventional seqscan, it is useful
- * to create a HeapScanDesc --- many of the fields in it are usable.
- */
- node->ss.ss_currentScanDesc =
- heap_beginscan_sampling(currentRelation, estate->es_snapshot, 0, NULL,
- tablesample->tsmseqscan,
- tablesample->tsmpagemode);
+ /* we won't set up the HeapScanDesc till later */
+ node->ss.ss_currentScanDesc = NULL;
/* and report the scan tuple slot's rowtype */
ExecAssignScanType(&node->ss, RelationGetDescr(currentRelation));
@@ -140,12 +140,11 @@ SampleScanState *
ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
{
SampleScanState *scanstate;
- RangeTblEntry *rte = rt_fetch(node->scanrelid,
- estate->es_range_table);
+ TableSampleClause *tsc = node->tablesample;
+ TsmRoutine *tsm;
Assert(outerPlan(node) == NULL);
Assert(innerPlan(node) == NULL);
- Assert(rte->tablesample != NULL);
/*
* create state structure
@@ -165,10 +164,17 @@ ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
* initialize child expressions
*/
scanstate->ss.ps.targetlist = (List *)
- ExecInitExpr((Expr *) node->plan.targetlist,
+ ExecInitExpr((Expr *) node->scan.plan.targetlist,
(PlanState *) scanstate);
scanstate->ss.ps.qual = (List *)
- ExecInitExpr((Expr *) node->plan.qual,
+ ExecInitExpr((Expr *) node->scan.plan.qual,
+ (PlanState *) scanstate);
+
+ scanstate->args = (List *)
+ ExecInitExpr((Expr *) tsc->args,
+ (PlanState *) scanstate);
+ scanstate->repeatable =
+ ExecInitExpr(tsc->repeatable,
(PlanState *) scanstate);
/*
@@ -180,7 +186,7 @@ ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
/*
* initialize scan relation
*/
- InitScanRelation(scanstate, estate, eflags, rte->tablesample);
+ InitScanRelation(scanstate, estate, eflags);
scanstate->ss.ps.ps_TupFromTlist = false;
@@ -190,7 +196,25 @@ ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
ExecAssignResultTypeFromTL(&scanstate->ss.ps);
ExecAssignScanProjectionInfo(&scanstate->ss);
- scanstate->tsdesc = tablesample_init(scanstate, rte->tablesample);
+ /*
+ * If we don't have a REPEATABLE clause, select a random seed. We want to
+ * do this just once, since the seed shouldn't change over rescans.
+ */
+ if (tsc->repeatable == NULL)
+ scanstate->seed = random();
+
+ /*
+ * Finally, initialize the TABLESAMPLE method handler.
+ */
+ tsm = GetTsmRoutine(tsc->tsmhandler);
+ scanstate->tsmroutine = tsm;
+ scanstate->tsm_state = NULL;
+
+ if (tsm->InitSampleScan)
+ tsm->InitSampleScan(scanstate, eflags);
+
+ /* We'll do BeginSampleScan later; we can't evaluate params yet */
+ scanstate->begun = false;
return scanstate;
}
@@ -207,7 +231,8 @@ ExecEndSampleScan(SampleScanState *node)
/*
* Tell sampling function that we finished the scan.
*/
- tablesample_end(node->tsdesc);
+ if (node->tsmroutine->EndSampleScan)
+ node->tsmroutine->EndSampleScan(node);
/*
* Free the exprcontext
@@ -223,7 +248,8 @@ ExecEndSampleScan(SampleScanState *node)
/*
* close heap scan
*/
- heap_endscan(node->ss.ss_currentScanDesc);
+ if (node->ss.ss_currentScanDesc)
+ heap_endscan(node->ss.ss_currentScanDesc);
/*
* close the heap relation.
@@ -232,11 +258,6 @@ ExecEndSampleScan(SampleScanState *node)
}
/* ----------------------------------------------------------------
- * Join Support
- * ----------------------------------------------------------------
- */
-
-/* ----------------------------------------------------------------
* ExecReScanSampleScan
*
* Rescans the relation.
@@ -246,12 +267,336 @@ ExecEndSampleScan(SampleScanState *node)
void
ExecReScanSampleScan(SampleScanState *node)
{
- heap_rescan(node->ss.ss_currentScanDesc, NULL);
+ /* Remember we need to do BeginSampleScan again (if we did it at all) */
+ node->begun = false;
+
+ ExecScanReScan(&node->ss);
+}
+
+
+/*
+ * Initialize the TABLESAMPLE method: evaluate params and call BeginSampleScan.
+ */
+static void
+tablesample_init(SampleScanState *scanstate)
+{
+ TsmRoutine *tsm = scanstate->tsmroutine;
+ ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
+ Datum *params;
+ Datum datum;
+ bool isnull;
+ uint32 seed;
+ bool allow_sync;
+ int i;
+ ListCell *arg;
+
+ params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum));
+
+ i = 0;
+ foreach(arg, scanstate->args)
+ {
+ ExprState *argstate = (ExprState *) lfirst(arg);
+
+ params[i] = ExecEvalExprSwitchContext(argstate,
+ econtext,
+ &isnull,
+ NULL);
+ if (isnull)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
+ errmsg("TABLESAMPLE parameter cannot be null")));
+ i++;
+ }
+
+ if (scanstate->repeatable)
+ {
+ datum = ExecEvalExprSwitchContext(scanstate->repeatable,
+ econtext,
+ &isnull,
+ NULL);
+ if (isnull)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLESAMPLE_REPEAT),
+ errmsg("TABLESAMPLE REPEATABLE parameter cannot be null")));
+
+ /*
+ * The REPEATABLE parameter has been coerced to float8 by the parser.
+ * The reason for using float8 at the SQL level is that it will
+ * produce unsurprising results both for users used to databases that
+ * accept only integers in the REPEATABLE clause and for those who
+ * might expect that REPEATABLE works like setseed() (a float in the
+ * range from -1 to 1).
+ *
+ * We use hashfloat8() to convert the supplied value into a suitable
+ * seed. For regression-testing purposes, that has the convenient
+ * property that REPEATABLE(0) gives a machine-independent result.
+ */
+ seed = DatumGetUInt32(DirectFunctionCall1(hashfloat8, datum));
+ }
+ else
+ {
+ /* Use the seed selected by ExecInitSampleScan */
+ seed = scanstate->seed;
+ }
+
+ /* Set default values for params that BeginSampleScan can adjust */
+ scanstate->use_bulkread = true;
+ scanstate->use_pagemode = true;
+
+ /* Let tablesample method do its thing */
+ tsm->BeginSampleScan(scanstate,
+ params,
+ list_length(scanstate->args),
+ seed);
+
+ /* We'll use syncscan if there's no NextSampleBlock function */
+ allow_sync = (tsm->NextSampleBlock == NULL);
+
+ /* Now we can create or reset the HeapScanDesc */
+ if (scanstate->ss.ss_currentScanDesc == NULL)
+ {
+ scanstate->ss.ss_currentScanDesc =
+ heap_beginscan_sampling(scanstate->ss.ss_currentRelation,
+ scanstate->ss.ps.state->es_snapshot,
+ 0, NULL,
+ scanstate->use_bulkread,
+ allow_sync,
+ scanstate->use_pagemode);
+ }
+ else
+ {
+ heap_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL,
+ scanstate->use_bulkread,
+ allow_sync,
+ scanstate->use_pagemode);
+ }
+
+ pfree(params);
+
+ /* And we're initialized. */
+ scanstate->begun = true;
+}
+
+/*
+ * Get next tuple from TABLESAMPLE method.
+ *
+ * Note: an awful lot of this is copied-and-pasted from heapam.c. It would
+ * perhaps be better to refactor to share more code.
+ */
+static HeapTuple
+tablesample_getnext(SampleScanState *scanstate)
+{
+ TsmRoutine *tsm = scanstate->tsmroutine;
+ HeapScanDesc scan = scanstate->ss.ss_currentScanDesc;
+ HeapTuple tuple = &(scan->rs_ctup);
+ Snapshot snapshot = scan->rs_snapshot;
+ bool pagemode = scan->rs_pageatatime;
+ BlockNumber blockno;
+ Page page;
+ bool all_visible;
+ OffsetNumber maxoffset;
+
+ if (!scan->rs_inited)
+ {
+ /*
+ * return null immediately if relation is empty
+ */
+ if (scan->rs_nblocks == 0)
+ {
+ Assert(!BufferIsValid(scan->rs_cbuf));
+ tuple->t_data = NULL;
+ return NULL;
+ }
+ if (tsm->NextSampleBlock)
+ {
+ blockno = tsm->NextSampleBlock(scanstate);
+ if (!BlockNumberIsValid(blockno))
+ {
+ tuple->t_data = NULL;
+ return NULL;
+ }
+ }
+ else
+ blockno = scan->rs_startblock;
+ Assert(blockno < scan->rs_nblocks);
+ heapgetpage(scan, blockno);
+ scan->rs_inited = true;
+ }
+ else
+ {
+ /* continue from previously returned page/tuple */
+ blockno = scan->rs_cblock; /* current page */
+ }
/*
- * Tell sampling function to reset its state for rescan.
+ * When not using pagemode, we must lock the buffer during tuple
+ * visibility checks.
*/
- tablesample_reset(node->tsdesc);
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+ page = (Page) BufferGetPage(scan->rs_cbuf);
+ all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
+ maxoffset = PageGetMaxOffsetNumber(page);
+
+ for (;;)
+ {
+ OffsetNumber tupoffset;
+ bool finished;
+
+ CHECK_FOR_INTERRUPTS();
+
+ /* Ask the tablesample method which tuples to check on this page. */
+ tupoffset = tsm->NextSampleTuple(scanstate,
+ blockno,
+ maxoffset);
+
+ if (OffsetNumberIsValid(tupoffset))
+ {
+ ItemId itemid;
+ bool visible;
+
+ /* Skip invalid tuple pointers. */
+ itemid = PageGetItemId(page, tupoffset);
+ if (!ItemIdIsNormal(itemid))
+ continue;
+
+ tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
+ tuple->t_len = ItemIdGetLength(itemid);
+ ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
+
+ if (all_visible)
+ visible = true;
+ else
+ visible = SampleTupleVisible(tuple, tupoffset, scan);
+
+ /* in pagemode, heapgetpage did this for us */
+ if (!pagemode)
+ CheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
+ scan->rs_cbuf, snapshot);
+
+ if (visible)
+ {
+ /* Found visible tuple, return it. */
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+ break;
+ }
+ else
+ {
+ /* Try next tuple from same page. */
+ continue;
+ }
+ }
+
+ /*
+ * if we get here, it means we've exhausted the items on this page and
+ * it's time to move to the next.
+ */
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+
+ if (tsm->NextSampleBlock)
+ {
+ blockno = tsm->NextSampleBlock(scanstate);
+ Assert(!scan->rs_syncscan);
+ finished = !BlockNumberIsValid(blockno);
+ }
+ else
+ {
+ /* Without NextSampleBlock, just do a plain forward seqscan. */
+ blockno++;
+ if (blockno >= scan->rs_nblocks)
+ blockno = 0;
+
+ /*
+ * Report our new scan position for synchronization purposes.
+ *
+ * Note: we do this before checking for end of scan so that the
+ * final state of the position hint is back at the start of the
+ * rel. That's not strictly necessary, but otherwise when you run
+ * the same query multiple times the starting position would shift
+ * a little bit backwards on every invocation, which is confusing.
+ * We don't guarantee any specific ordering in general, though.
+ */
+ if (scan->rs_syncscan)
+ ss_report_location(scan->rs_rd, blockno);
+
+ finished = (blockno == scan->rs_startblock);
+ }
+
+ /*
+ * Reached end of scan?
+ */
+ if (finished)
+ {
+ if (BufferIsValid(scan->rs_cbuf))
+ ReleaseBuffer(scan->rs_cbuf);
+ scan->rs_cbuf = InvalidBuffer;
+ scan->rs_cblock = InvalidBlockNumber;
+ tuple->t_data = NULL;
+ scan->rs_inited = false;
+ return NULL;
+ }
+
+ Assert(blockno < scan->rs_nblocks);
+ heapgetpage(scan, blockno);
+
+ /* Re-establish state for new page */
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+ page = (Page) BufferGetPage(scan->rs_cbuf);
+ all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
+ maxoffset = PageGetMaxOffsetNumber(page);
+ }
+
+ /* Count successfully-fetched tuples as heap fetches */
+ pgstat_count_heap_getnext(scan->rs_rd);
+
+ return &(scan->rs_ctup);
+}
- ExecScanReScan(&node->ss);
+/*
+ * Check visibility of the tuple.
+ */
+static bool
+SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
+{
+ if (scan->rs_pageatatime)
+ {
+ /*
+ * In pageatatime mode, heapgetpage() already did visibility checks,
+ * so just look at the info it left in rs_vistuples[].
+ *
+ * We use a binary search over the known-sorted array. Note: we could
+ * save some effort if we insisted that NextSampleTuple select tuples
+ * in increasing order, but it's not clear that there would be enough
+ * gain to justify the restriction.
+ */
+ int start = 0,
+ end = scan->rs_ntuples - 1;
+
+ while (start <= end)
+ {
+ int mid = (start + end) / 2;
+ OffsetNumber curoffset = scan->rs_vistuples[mid];
+
+ if (tupoffset == curoffset)
+ return true;
+ else if (tupoffset < curoffset)
+ end = mid - 1;
+ else
+ start = mid + 1;
+ }
+
+ return false;
+ }
+ else
+ {
+ /* Otherwise, we have to check the tuple individually. */
+ return HeapTupleSatisfiesVisibility(tuple,
+ scan->rs_snapshot,
+ scan->rs_cbuf);
+ }
}