diff options
author | Simon Riggs <simon@2ndQuadrant.com> | 2015-05-15 14:37:10 -0400 |
---|---|---|
committer | Simon Riggs <simon@2ndQuadrant.com> | 2015-05-15 14:37:10 -0400 |
commit | f6d208d6e51810c73f0e02c477984a6b44627f11 (patch) | |
tree | 99d540d0b7bda73ff60479f15444f554403d4679 /src/backend/executor | |
parent | 11a83bbedd73800db70f6f2af5a8eb10d15d39d7 (diff) | |
download | postgresql-f6d208d6e51810c73f0e02c477984a6b44627f11.tar.gz postgresql-f6d208d6e51810c73f0e02c477984a6b44627f11.zip |
TABLESAMPLE, SQL Standard and extensible
Add a TABLESAMPLE clause to SELECT statements that allows
user to specify random BERNOULLI sampling or block level
SYSTEM sampling. Implementation allows for extensible
sampling functions to be written, using a standard API.
Basic version follows SQLStandard exactly. Usable
concrete use cases for the sampling API follow in later
commits.
Petr Jelinek
Reviewed by Michael Paquier and Simon Riggs
Diffstat (limited to 'src/backend/executor')
-rw-r--r-- | src/backend/executor/Makefile | 2 | ||||
-rw-r--r-- | src/backend/executor/execAmi.c | 8 | ||||
-rw-r--r-- | src/backend/executor/execCurrent.c | 1 | ||||
-rw-r--r-- | src/backend/executor/execProcnode.c | 14 | ||||
-rw-r--r-- | src/backend/executor/nodeSamplescan.c | 256 |
5 files changed, 280 insertions, 1 deletions
diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile index bc5d373d68a..08cba6fa2b5 100644 --- a/src/backend/executor/Makefile +++ b/src/backend/executor/Makefile @@ -21,7 +21,7 @@ OBJS = execAmi.o execCurrent.o execGrouping.o execIndexing.o execJunk.o \ nodeLimit.o nodeLockRows.o \ nodeMaterial.o nodeMergeAppend.o nodeMergejoin.o nodeModifyTable.o \ nodeNestloop.o nodeFunctionscan.o nodeRecursiveunion.o nodeResult.o \ - nodeSeqscan.o nodeSetOp.o nodeSort.o nodeUnique.o \ + nodeSamplescan.o nodeSeqscan.o nodeSetOp.o nodeSort.o nodeUnique.o \ nodeValuesscan.o nodeCtescan.o nodeWorktablescan.o \ nodeGroup.o nodeSubplan.o nodeSubqueryscan.o nodeTidscan.o \ nodeForeignscan.o nodeWindowAgg.o tstoreReceiver.o spi.o diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c index 6ebad2f03f0..4948a265cb2 100644 --- a/src/backend/executor/execAmi.c +++ b/src/backend/executor/execAmi.c @@ -39,6 +39,7 @@ #include "executor/nodeNestloop.h" #include "executor/nodeRecursiveunion.h" #include "executor/nodeResult.h" +#include "executor/nodeSamplescan.h" #include "executor/nodeSeqscan.h" #include "executor/nodeSetOp.h" #include "executor/nodeSort.h" @@ -155,6 +156,10 @@ ExecReScan(PlanState *node) ExecReScanSeqScan((SeqScanState *) node); break; + case T_SampleScanState: + ExecReScanSampleScan((SampleScanState *) node); + break; + case T_IndexScanState: ExecReScanIndexScan((IndexScanState *) node); break; @@ -480,6 +485,9 @@ ExecSupportsBackwardScan(Plan *node) } return false; + case T_SampleScan: + return false; + case T_Material: case T_Sort: /* these don't evaluate tlist */ diff --git a/src/backend/executor/execCurrent.c b/src/backend/executor/execCurrent.c index d87be963a95..bcd287f8742 100644 --- a/src/backend/executor/execCurrent.c +++ b/src/backend/executor/execCurrent.c @@ -261,6 +261,7 @@ search_plan_tree(PlanState *node, Oid table_oid) * Relation scan nodes can all be treated alike */ case T_SeqScanState: + case T_SampleScanState: case T_IndexScanState: case T_IndexOnlyScanState: case T_BitmapHeapScanState: diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index 9892499fb7a..03c2febc3e1 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -102,6 +102,7 @@ #include "executor/nodeNestloop.h" #include "executor/nodeRecursiveunion.h" #include "executor/nodeResult.h" +#include "executor/nodeSamplescan.h" #include "executor/nodeSeqscan.h" #include "executor/nodeSetOp.h" #include "executor/nodeSort.h" @@ -190,6 +191,11 @@ ExecInitNode(Plan *node, EState *estate, int eflags) estate, eflags); break; + case T_SampleScan: + result = (PlanState *) ExecInitSampleScan((SampleScan *) node, + estate, eflags); + break; + case T_IndexScan: result = (PlanState *) ExecInitIndexScan((IndexScan *) node, estate, eflags); @@ -406,6 +412,10 @@ ExecProcNode(PlanState *node) result = ExecSeqScan((SeqScanState *) node); break; + case T_SampleScanState: + result = ExecSampleScan((SampleScanState *) node); + break; + case T_IndexScanState: result = ExecIndexScan((IndexScanState *) node); break; @@ -644,6 +654,10 @@ ExecEndNode(PlanState *node) ExecEndSeqScan((SeqScanState *) node); break; + case T_SampleScanState: + ExecEndSampleScan((SampleScanState *) node); + break; + case T_IndexScanState: ExecEndIndexScan((IndexScanState *) node); break; diff --git a/src/backend/executor/nodeSamplescan.c b/src/backend/executor/nodeSamplescan.c new file mode 100644 index 00000000000..fc89d1dca03 --- /dev/null +++ b/src/backend/executor/nodeSamplescan.c @@ -0,0 +1,256 @@ +/*------------------------------------------------------------------------- + * + * nodeSamplescan.c + * Support routines for sample scans of relations (table sampling). + * + * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/executor/nodeSamplescan.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/tablesample.h" +#include "executor/executor.h" +#include "executor/nodeSamplescan.h" +#include "miscadmin.h" +#include "parser/parsetree.h" +#include "pgstat.h" +#include "storage/bufmgr.h" +#include "storage/predicate.h" +#include "utils/rel.h" +#include "utils/syscache.h" +#include "utils/tqual.h" + +static void InitScanRelation(SampleScanState *node, EState *estate, + int eflags, TableSampleClause *tablesample); +static TupleTableSlot *SampleNext(SampleScanState *node); + + +/* ---------------------------------------------------------------- + * Scan Support + * ---------------------------------------------------------------- + */ + +/* ---------------------------------------------------------------- + * SampleNext + * + * This is a workhorse for ExecSampleScan + * ---------------------------------------------------------------- + */ +static TupleTableSlot * +SampleNext(SampleScanState *node) +{ + TupleTableSlot *slot; + TableSampleDesc *tsdesc; + HeapTuple tuple; + + /* + * get information from the scan state + */ + slot = node->ss.ss_ScanTupleSlot; + tsdesc = node->tsdesc; + + tuple = tablesample_getnext(tsdesc); + + if (tuple) + ExecStoreTuple(tuple, /* tuple to store */ + slot, /* slot to store in */ + tsdesc->heapScan->rs_cbuf, /* buffer associated with this tuple */ + false); /* don't pfree this pointer */ + else + ExecClearTuple(slot); + + return slot; +} + +/* + * SampleRecheck -- access method routine to recheck a tuple in EvalPlanQual + */ +static bool +SampleRecheck(SampleScanState *node, TupleTableSlot *slot) +{ + /* No need to recheck for SampleScan */ + return true; +} + +/* ---------------------------------------------------------------- + * ExecSampleScan(node) + * + * Scans the relation using the sampling method and returns + * the next qualifying tuple. + * We call the ExecScan() routine and pass it the appropriate + * access method functions. + * ---------------------------------------------------------------- + */ +TupleTableSlot * +ExecSampleScan(SampleScanState *node) +{ + return ExecScan((ScanState *) node, + (ExecScanAccessMtd) SampleNext, + (ExecScanRecheckMtd) SampleRecheck); +} + +/* ---------------------------------------------------------------- + * InitScanRelation + * + * Set up to access the scan relation. + * ---------------------------------------------------------------- + */ +static void +InitScanRelation(SampleScanState *node, EState *estate, int eflags, + TableSampleClause *tablesample) +{ + Relation currentRelation; + + /* + * get the relation object id from the relid'th entry in the range table, + * open that relation and acquire appropriate lock on it. + */ + currentRelation = ExecOpenScanRelation(estate, + ((SampleScan *) node->ss.ps.plan)->scanrelid, + eflags); + + node->ss.ss_currentRelation = currentRelation; + + /* + * Even though we aren't going to do a conventional seqscan, it is useful + * to create a HeapScanDesc --- many of the fields in it are usable. + */ + node->ss.ss_currentScanDesc = + heap_beginscan_sampling(currentRelation, estate->es_snapshot, 0, NULL, + tablesample->tsmseqscan, + tablesample->tsmpagemode); + + /* and report the scan tuple slot's rowtype */ + ExecAssignScanType(&node->ss, RelationGetDescr(currentRelation)); +} + + +/* ---------------------------------------------------------------- + * ExecInitSampleScan + * ---------------------------------------------------------------- + */ +SampleScanState * +ExecInitSampleScan(SampleScan *node, EState *estate, int eflags) +{ + SampleScanState *scanstate; + RangeTblEntry *rte = rt_fetch(node->scanrelid, + estate->es_range_table); + + Assert(outerPlan(node) == NULL); + Assert(innerPlan(node) == NULL); + Assert(rte->tablesample != NULL); + + /* + * create state structure + */ + scanstate = makeNode(SampleScanState); + scanstate->ss.ps.plan = (Plan *) node; + scanstate->ss.ps.state = estate; + + /* + * Miscellaneous initialization + * + * create expression context for node + */ + ExecAssignExprContext(estate, &scanstate->ss.ps); + + /* + * initialize child expressions + */ + scanstate->ss.ps.targetlist = (List *) + ExecInitExpr((Expr *) node->plan.targetlist, + (PlanState *) scanstate); + scanstate->ss.ps.qual = (List *) + ExecInitExpr((Expr *) node->plan.qual, + (PlanState *) scanstate); + + /* + * tuple table initialization + */ + ExecInitResultTupleSlot(estate, &scanstate->ss.ps); + ExecInitScanTupleSlot(estate, &scanstate->ss); + + /* + * initialize scan relation + */ + InitScanRelation(scanstate, estate, eflags, rte->tablesample); + + scanstate->ss.ps.ps_TupFromTlist = false; + + /* + * Initialize result tuple type and projection info. + */ + ExecAssignResultTypeFromTL(&scanstate->ss.ps); + ExecAssignScanProjectionInfo(&scanstate->ss); + + scanstate->tsdesc = tablesample_init(scanstate, rte->tablesample); + + return scanstate; +} + +/* ---------------------------------------------------------------- + * ExecEndSampleScan + * + * frees any storage allocated through C routines. + * ---------------------------------------------------------------- + */ +void +ExecEndSampleScan(SampleScanState *node) +{ + /* + * Tell sampling function that we finished the scan. + */ + tablesample_end(node->tsdesc); + + /* + * Free the exprcontext + */ + ExecFreeExprContext(&node->ss.ps); + + /* + * clean out the tuple table + */ + ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); + ExecClearTuple(node->ss.ss_ScanTupleSlot); + + /* + * close heap scan + */ + heap_endscan(node->ss.ss_currentScanDesc); + + /* + * close the heap relation. + */ + ExecCloseScanRelation(node->ss.ss_currentRelation); +} + +/* ---------------------------------------------------------------- + * Join Support + * ---------------------------------------------------------------- + */ + +/* ---------------------------------------------------------------- + * ExecReScanSampleScan + * + * Rescans the relation. + * + * ---------------------------------------------------------------- + */ +void +ExecReScanSampleScan(SampleScanState *node) +{ + heap_rescan(node->ss.ss_currentScanDesc, NULL); + + /* + * Tell sampling function to reset its state for rescan. + */ + tablesample_reset(node->tsdesc); + + ExecScanReScan(&node->ss); +} |