aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/heap/heapam.c61
-rw-r--r--src/backend/access/tablesample/Makefile6
-rw-r--r--src/backend/access/tablesample/bernoulli.c326
-rw-r--r--src/backend/access/tablesample/system.c312
-rw-r--r--src/backend/access/tablesample/tablesample.c355
-rw-r--r--src/backend/catalog/Makefile5
-rw-r--r--src/backend/catalog/dependency.c8
-rw-r--r--src/backend/commands/explain.c107
-rw-r--r--src/backend/executor/execAmi.c7
-rw-r--r--src/backend/executor/nodeSamplescan.c437
-rw-r--r--src/backend/nodes/copyfuncs.c115
-rw-r--r--src/backend/nodes/equalfuncs.c64
-rw-r--r--src/backend/nodes/nodeFuncs.c75
-rw-r--r--src/backend/nodes/outfuncs.c88
-rw-r--r--src/backend/nodes/readfuncs.c61
-rw-r--r--src/backend/optimizer/path/allpaths.c100
-rw-r--r--src/backend/optimizer/path/costsize.c57
-rw-r--r--src/backend/optimizer/plan/createplan.c34
-rw-r--r--src/backend/optimizer/plan/initsplan.c4
-rw-r--r--src/backend/optimizer/plan/planner.c19
-rw-r--r--src/backend/optimizer/plan/setrefs.c18
-rw-r--r--src/backend/optimizer/plan/subselect.c7
-rw-r--r--src/backend/optimizer/prep/prepjointree.c13
-rw-r--r--src/backend/optimizer/util/pathnode.c8
-rw-r--r--src/backend/parser/gram.y27
-rw-r--r--src/backend/parser/parse_clause.c190
-rw-r--r--src/backend/parser/parse_func.c144
-rw-r--r--src/backend/rewrite/rewriteHandler.c4
-rw-r--r--src/backend/utils/adt/pseudotypes.c27
-rw-r--r--src/backend/utils/adt/ruleutils.c94
-rw-r--r--src/backend/utils/cache/lsyscache.c27
-rw-r--r--src/backend/utils/cache/syscache.c23
-rw-r--r--src/backend/utils/errcodes.txt2
-rw-r--r--src/backend/utils/misc/sampling.c2
-rw-r--r--src/bin/psql/tab-complete.c10
-rw-r--r--src/include/access/heapam.h4
-rw-r--r--src/include/access/tablesample.h61
-rw-r--r--src/include/access/tsmapi.h81
-rw-r--r--src/include/catalog/catversion.h2
-rw-r--r--src/include/catalog/indexing.h5
-rw-r--r--src/include/catalog/pg_proc.h37
-rw-r--r--src/include/catalog/pg_tablesample_method.h81
-rw-r--r--src/include/catalog/pg_type.h2
-rw-r--r--src/include/executor/nodeSamplescan.h2
-rw-r--r--src/include/nodes/execnodes.h15
-rw-r--r--src/include/nodes/nodes.h9
-rw-r--r--src/include/nodes/parsenodes.h59
-rw-r--r--src/include/nodes/plannodes.h7
-rw-r--r--src/include/optimizer/cost.h3
-rw-r--r--src/include/parser/parse_func.h5
-rw-r--r--src/include/port.h4
-rw-r--r--src/include/utils/builtins.h8
-rw-r--r--src/include/utils/lsyscache.h1
-rw-r--r--src/include/utils/syscache.h2
-rw-r--r--src/port/erand48.c3
-rw-r--r--src/test/regress/expected/rowsecurity.out24
-rw-r--r--src/test/regress/expected/rules.out4
-rw-r--r--src/test/regress/expected/sanity_check.out1
-rw-r--r--src/test/regress/expected/tablesample.out284
-rw-r--r--src/test/regress/output/misc.source5
-rw-r--r--src/test/regress/serial_schedule2
-rw-r--r--src/test/regress/sql/rowsecurity.sql8
-rw-r--r--src/test/regress/sql/tablesample.sql90
63 files changed, 1943 insertions, 1703 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 6f4ff2718fe..050efdc4806 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -80,8 +80,11 @@ bool synchronize_seqscans = true;
static HeapScanDesc heap_beginscan_internal(Relation relation,
Snapshot snapshot,
int nkeys, ScanKey key,
- bool allow_strat, bool allow_sync, bool allow_pagemode,
- bool is_bitmapscan, bool is_samplescan,
+ bool allow_strat,
+ bool allow_sync,
+ bool allow_pagemode,
+ bool is_bitmapscan,
+ bool is_samplescan,
bool temp_snap);
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup,
TransactionId xid, CommandId cid, int options);
@@ -207,7 +210,7 @@ static const int MultiXactStatusLock[MaxMultiXactStatus + 1] =
* ----------------
*/
static void
-initscan(HeapScanDesc scan, ScanKey key, bool is_rescan)
+initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
{
bool allow_strat;
bool allow_sync;
@@ -257,12 +260,12 @@ initscan(HeapScanDesc scan, ScanKey key, bool is_rescan)
scan->rs_strategy = NULL;
}
- if (is_rescan)
+ if (keep_startblock)
{
/*
- * If rescan, keep the previous startblock setting so that rewinding a
- * cursor doesn't generate surprising results. Reset the syncscan
- * setting, though.
+ * When rescanning, we want to keep the previous startblock setting,
+ * so that rewinding a cursor doesn't generate surprising results.
+ * Reset the active syncscan setting, though.
*/
scan->rs_syncscan = (allow_sync && synchronize_seqscans);
}
@@ -1313,6 +1316,10 @@ heap_openrv_extended(const RangeVar *relation, LOCKMODE lockmode,
/* ----------------
* heap_beginscan - begin relation scan
*
+ * heap_beginscan is the "standard" case.
+ *
+ * heap_beginscan_catalog differs in setting up its own temporary snapshot.
+ *
* heap_beginscan_strat offers an extended API that lets the caller control
* whether a nondefault buffer access strategy can be used, and whether
* syncscan can be chosen (possibly resulting in the scan not starting from
@@ -1323,8 +1330,11 @@ heap_openrv_extended(const RangeVar *relation, LOCKMODE lockmode,
* really quite unlike a standard seqscan, there is just enough commonality
* to make it worth using the same data structure.
*
- * heap_beginscan_samplingscan is alternate entry point for setting up a
- * HeapScanDesc for a TABLESAMPLE scan.
+ * heap_beginscan_sampling is an alternative entry point for setting up a
+ * HeapScanDesc for a TABLESAMPLE scan. As with bitmap scans, it's worth
+ * using the same data structure although the behavior is rather different.
+ * In addition to the options offered by heap_beginscan_strat, this call
+ * also allows control of whether page-mode visibility checking is used.
* ----------------
*/
HeapScanDesc
@@ -1366,18 +1376,22 @@ heap_beginscan_bm(Relation relation, Snapshot snapshot,
HeapScanDesc
heap_beginscan_sampling(Relation relation, Snapshot snapshot,
int nkeys, ScanKey key,
- bool allow_strat, bool allow_pagemode)
+ bool allow_strat, bool allow_sync, bool allow_pagemode)
{
return heap_beginscan_internal(relation, snapshot, nkeys, key,
- allow_strat, false, allow_pagemode,
+ allow_strat, allow_sync, allow_pagemode,
false, true, false);
}
static HeapScanDesc
heap_beginscan_internal(Relation relation, Snapshot snapshot,
int nkeys, ScanKey key,
- bool allow_strat, bool allow_sync, bool allow_pagemode,
- bool is_bitmapscan, bool is_samplescan, bool temp_snap)
+ bool allow_strat,
+ bool allow_sync,
+ bool allow_pagemode,
+ bool is_bitmapscan,
+ bool is_samplescan,
+ bool temp_snap)
{
HeapScanDesc scan;
@@ -1462,6 +1476,27 @@ heap_rescan(HeapScanDesc scan,
}
/* ----------------
+ * heap_rescan_set_params - restart a relation scan after changing params
+ *
+ * This call allows changing the buffer strategy, syncscan, and pagemode
+ * options before starting a fresh scan. Note that although the actual use
+ * of syncscan might change (effectively, enabling or disabling reporting),
+ * the previously selected startblock will be kept.
+ * ----------------
+ */
+void
+heap_rescan_set_params(HeapScanDesc scan, ScanKey key,
+ bool allow_strat, bool allow_sync, bool allow_pagemode)
+{
+ /* adjust parameters */
+ scan->rs_allow_strat = allow_strat;
+ scan->rs_allow_sync = allow_sync;
+ scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(scan->rs_snapshot);
+ /* ... and rescan */
+ heap_rescan(scan, key);
+}
+
+/* ----------------
* heap_endscan - end relation scan
*
* See how to integrate with index scans.
diff --git a/src/backend/access/tablesample/Makefile b/src/backend/access/tablesample/Makefile
index 46eeb59f9c4..68d9ab28147 100644
--- a/src/backend/access/tablesample/Makefile
+++ b/src/backend/access/tablesample/Makefile
@@ -1,10 +1,10 @@
#-------------------------------------------------------------------------
#
# Makefile--
-# Makefile for utils/tablesample
+# Makefile for access/tablesample
#
# IDENTIFICATION
-# src/backend/utils/tablesample/Makefile
+# src/backend/access/tablesample/Makefile
#
#-------------------------------------------------------------------------
@@ -12,6 +12,6 @@ subdir = src/backend/access/tablesample
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
-OBJS = tablesample.o system.o bernoulli.o
+OBJS = bernoulli.o system.o tablesample.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/tablesample/bernoulli.c b/src/backend/access/tablesample/bernoulli.c
index 0a539008221..cf88f95e757 100644
--- a/src/backend/access/tablesample/bernoulli.c
+++ b/src/backend/access/tablesample/bernoulli.c
@@ -1,233 +1,231 @@
/*-------------------------------------------------------------------------
*
* bernoulli.c
- * interface routines for BERNOULLI tablesample method
+ * support routines for BERNOULLI tablesample method
*
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * To ensure repeatability of samples, it is necessary that selection of a
+ * given tuple be history-independent; otherwise syncscanning would break
+ * repeatability, to say nothing of logically-irrelevant maintenance such
+ * as physical extension or shortening of the relation.
+ *
+ * To achieve that, we proceed by hashing each candidate TID together with
+ * the active seed, and then selecting it if the hash is less than the
+ * cutoff value computed from the selection probability by BeginSampleScan.
+ *
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * src/backend/utils/tablesample/bernoulli.c
+ * src/backend/access/tablesample/bernoulli.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
-#include "fmgr.h"
+#ifdef _MSC_VER
+#include <float.h> /* for _isnan */
+#endif
+#include <math.h>
-#include "access/tablesample.h"
-#include "access/relscan.h"
-#include "nodes/execnodes.h"
-#include "nodes/relation.h"
+#include "access/hash.h"
+#include "access/tsmapi.h"
+#include "catalog/pg_type.h"
#include "optimizer/clauses.h"
-#include "storage/bufmgr.h"
-#include "utils/sampling.h"
+#include "optimizer/cost.h"
+#include "utils/builtins.h"
-/* tsdesc */
+/* Private state */
typedef struct
{
+ uint64 cutoff; /* select tuples with hash less than this */
uint32 seed; /* random seed */
- BlockNumber startblock; /* starting block, we use ths for syncscan
- * support */
- BlockNumber nblocks; /* number of blocks */
- BlockNumber blockno; /* current block */
- float4 probability; /* probabilty that tuple will be returned
- * (0.0-1.0) */
OffsetNumber lt; /* last tuple returned from current block */
- SamplerRandomState randstate; /* random generator tsdesc */
} BernoulliSamplerData;
+
+static void bernoulli_samplescangetsamplesize(PlannerInfo *root,
+ RelOptInfo *baserel,
+ List *paramexprs,
+ BlockNumber *pages,
+ double *tuples);
+static void bernoulli_initsamplescan(SampleScanState *node,
+ int eflags);
+static void bernoulli_beginsamplescan(SampleScanState *node,
+ Datum *params,
+ int nparams,
+ uint32 seed);
+static OffsetNumber bernoulli_nextsampletuple(SampleScanState *node,
+ BlockNumber blockno,
+ OffsetNumber maxoffset);
+
+
/*
- * Initialize the state.
+ * Create a TsmRoutine descriptor for the BERNOULLI method.
*/
Datum
-tsm_bernoulli_init(PG_FUNCTION_ARGS)
+tsm_bernoulli_handler(PG_FUNCTION_ARGS)
{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
- uint32 seed = PG_GETARG_UINT32(1);
- float4 percent = PG_ARGISNULL(2) ? -1 : PG_GETARG_FLOAT4(2);
- HeapScanDesc scan = tsdesc->heapScan;
- BernoulliSamplerData *sampler;
+ TsmRoutine *tsm = makeNode(TsmRoutine);
+
+ tsm->parameterTypes = list_make1_oid(FLOAT4OID);
+ tsm->repeatable_across_queries = true;
+ tsm->repeatable_across_scans = true;
+ tsm->SampleScanGetSampleSize = bernoulli_samplescangetsamplesize;
+ tsm->InitSampleScan = bernoulli_initsamplescan;
+ tsm->BeginSampleScan = bernoulli_beginsamplescan;
+ tsm->NextSampleBlock = NULL;
+ tsm->NextSampleTuple = bernoulli_nextsampletuple;
+ tsm->EndSampleScan = NULL;
+
+ PG_RETURN_POINTER(tsm);
+}
- if (percent < 0 || percent > 100)
- ereport(ERROR,
- (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
- errmsg("invalid sample size"),
- errhint("Sample size must be numeric value between 0 and 100 (inclusive).")));
+/*
+ * Sample size estimation.
+ */
+static void
+bernoulli_samplescangetsamplesize(PlannerInfo *root,
+ RelOptInfo *baserel,
+ List *paramexprs,
+ BlockNumber *pages,
+ double *tuples)
+{
+ Node *pctnode;
+ float4 samplefract;
- sampler = palloc0(sizeof(BernoulliSamplerData));
+ /* Try to extract an estimate for the sample percentage */
+ pctnode = (Node *) linitial(paramexprs);
+ pctnode = estimate_expression_value(root, pctnode);
- /* Remember initial values for reinit */
- sampler->seed = seed;
- sampler->startblock = scan->rs_startblock;
- sampler->nblocks = scan->rs_nblocks;
- sampler->blockno = InvalidBlockNumber;
- sampler->probability = percent / 100;
- sampler->lt = InvalidOffsetNumber;
- sampler_random_init_state(sampler->seed, sampler->randstate);
+ if (IsA(pctnode, Const) &&
+ !((Const *) pctnode)->constisnull)
+ {
+ samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue);
+ if (samplefract >= 0 && samplefract <= 100 && !isnan(samplefract))
+ samplefract /= 100.0f;
+ else
+ {
+ /* Default samplefract if the value is bogus */
+ samplefract = 0.1f;
+ }
+ }
+ else
+ {
+ /* Default samplefract if we didn't obtain a non-null Const */
+ samplefract = 0.1f;
+ }
+
+ /* We'll visit all pages of the baserel */
+ *pages = baserel->pages;
- tsdesc->tsmdata = (void *) sampler;
+ *tuples = clamp_row_est(baserel->tuples * samplefract);
+}
- PG_RETURN_VOID();
+/*
+ * Initialize during executor setup.
+ */
+static void
+bernoulli_initsamplescan(SampleScanState *node, int eflags)
+{
+ node->tsm_state = palloc0(sizeof(BernoulliSamplerData));
}
/*
- * Get next block number to read or InvalidBlockNumber if we are at the
- * end of the relation.
+ * Examine parameters and prepare for a sample scan.
*/
-Datum
-tsm_bernoulli_nextblock(PG_FUNCTION_ARGS)
+static void
+bernoulli_beginsamplescan(SampleScanState *node,
+ Datum *params,
+ int nparams,
+ uint32 seed)
{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
- BernoulliSamplerData *sampler = (BernoulliSamplerData *) tsdesc->tsmdata;
+ BernoulliSamplerData *sampler = (BernoulliSamplerData *) node->tsm_state;
+ double percent = DatumGetFloat4(params[0]);
+
+ if (percent < 0 || percent > 100 || isnan(percent))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
+ errmsg("sample percentage must be between 0 and 100")));
/*
- * Bernoulli sampling scans all blocks on the table and supports syncscan
- * so loop from startblock to startblock instead of from 0 to nblocks.
+ * The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to
+ * store that as a uint64, of course. Note that this gives strictly
+ * correct behavior at the limits of zero or one probability.
*/
- if (sampler->blockno == InvalidBlockNumber)
- sampler->blockno = sampler->startblock;
- else
- {
- sampler->blockno++;
-
- if (sampler->blockno >= sampler->nblocks)
- sampler->blockno = 0;
-
- if (sampler->blockno == sampler->startblock)
- PG_RETURN_UINT32(InvalidBlockNumber);
- }
+ sampler->cutoff = rint(((double) PG_UINT32_MAX + 1) * percent / 100);
+ sampler->seed = seed;
+ sampler->lt = InvalidOffsetNumber;
- PG_RETURN_UINT32(sampler->blockno);
+ /*
+ * Use bulkread, since we're scanning all pages. But pagemode visibility
+ * checking is a win only at larger sampling fractions. The 25% cutoff
+ * here is based on very limited experimentation.
+ */
+ node->use_bulkread = true;
+ node->use_pagemode = (percent >= 25);
}
/*
- * Get next tuple from current block.
- *
- * This method implements the main logic in bernoulli sampling.
- * The algorithm simply generates new random number (in 0.0-1.0 range) and if
- * it falls within user specified probability (in the same range) return the
- * tuple offset.
- *
- * It is ok here to return tuple offset without knowing if tuple is visible
- * and not check it via examinetuple. The reason for that is that we do the
- * coinflip (random number generation) for every tuple in the table. Since all
- * tuples have same probability of being returned the visible and invisible
- * tuples will be returned in same ratio as they have in the actual table.
- * This means that there is no skew towards either visible or invisible tuples
- * and the number of visible tuples returned from the executor node should
- * match the fraction of visible tuples which was specified by user.
+ * Select next sampled tuple in current block.
*
- * This is faster than doing the coinflip in examinetuple because we don't
- * have to do visibility checks on uninteresting tuples.
+ * It is OK here to return an offset without knowing if the tuple is visible
+ * (or even exists). The reason is that we do the coinflip for every tuple
+ * offset in the table. Since all tuples have the same probability of being
+ * returned, it doesn't matter if we do extra coinflips for invisible tuples.
*
- * If we reach end of the block return InvalidOffsetNumber which tells
+ * When we reach end of the block, return InvalidOffsetNumber which tells
* SampleScan to go to next block.
*/
-Datum
-tsm_bernoulli_nexttuple(PG_FUNCTION_ARGS)
+static OffsetNumber
+bernoulli_nextsampletuple(SampleScanState *node,
+ BlockNumber blockno,
+ OffsetNumber maxoffset)
{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
- OffsetNumber maxoffset = PG_GETARG_UINT16(2);
- BernoulliSamplerData *sampler = (BernoulliSamplerData *) tsdesc->tsmdata;
+ BernoulliSamplerData *sampler = (BernoulliSamplerData *) node->tsm_state;
OffsetNumber tupoffset = sampler->lt;
- float4 probability = sampler->probability;
+ uint32 hashinput[3];
+ /* Advance to first/next tuple in block */
if (tupoffset == InvalidOffsetNumber)
tupoffset = FirstOffsetNumber;
else
tupoffset++;
/*
- * Loop over tuple offsets until the random generator returns value that
- * is within the probability of returning the tuple or until we reach end
- * of the block.
+ * We compute the hash by applying hash_any to an array of 3 uint32's
+ * containing the block, offset, and seed. This is efficient to set up,
+ * and with the current implementation of hash_any, it gives
+ * machine-independent results, which is a nice property for regression
+ * testing.
*
- * (This is our implementation of bernoulli trial)
+ * These words in the hash input are the same throughout the block:
*/
- while (sampler_random_fract(sampler->randstate) > probability)
+ hashinput[0] = blockno;
+ hashinput[2] = sampler->seed;
+
+ /*
+ * Loop over tuple offsets until finding suitable TID or reaching end of
+ * block.
+ */
+ for (; tupoffset <= maxoffset; tupoffset++)
{
- tupoffset++;
+ uint32 hash;
- if (tupoffset > maxoffset)
+ hashinput[1] = tupoffset;
+
+ hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput,
+ (int) sizeof(hashinput)));
+ if (hash < sampler->cutoff)
break;
}
if (tupoffset > maxoffset)
- /* Tell SampleScan that we want next block. */
tupoffset = InvalidOffsetNumber;
sampler->lt = tupoffset;
- PG_RETURN_UINT16(tupoffset);
-}
-
-/*
- * Cleanup method.
- */
-Datum
-tsm_bernoulli_end(PG_FUNCTION_ARGS)
-{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-
- pfree(tsdesc->tsmdata);
-
- PG_RETURN_VOID();
-}
-
-/*
- * Reset tsdesc (called by ReScan).
- */
-Datum
-tsm_bernoulli_reset(PG_FUNCTION_ARGS)
-{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
- BernoulliSamplerData *sampler = (BernoulliSamplerData *) tsdesc->tsmdata;
-
- sampler->blockno = InvalidBlockNumber;
- sampler->lt = InvalidOffsetNumber;
- sampler_random_init_state(sampler->seed, sampler->randstate);
-
- PG_RETURN_VOID();
-}
-
-/*
- * Costing function.
- */
-Datum
-tsm_bernoulli_cost(PG_FUNCTION_ARGS)
-{
- PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
- Path *path = (Path *) PG_GETARG_POINTER(1);
- RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2);
- List *args = (List *) PG_GETARG_POINTER(3);
- BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4);
- double *tuples = (double *) PG_GETARG_POINTER(5);
- Node *pctnode;
- float4 samplesize;
-
- *pages = baserel->pages;
-
- pctnode = linitial(args);
- pctnode = estimate_expression_value(root, pctnode);
-
- if (IsA(pctnode, RelabelType))
- pctnode = (Node *) ((RelabelType *) pctnode)->arg;
-
- if (IsA(pctnode, Const))
- {
- samplesize = DatumGetFloat4(((Const *) pctnode)->constvalue);
- samplesize /= 100.0;
- }
- else
- {
- /* Default samplesize if the estimation didn't return Const. */
- samplesize = 0.1f;
- }
-
- *tuples = path->rows * samplesize;
- path->rows = *tuples;
-
- PG_RETURN_VOID();
+ return tupoffset;
}
diff --git a/src/backend/access/tablesample/system.c b/src/backend/access/tablesample/system.c
index 1d834369a4b..43c5dab7161 100644
--- a/src/backend/access/tablesample/system.c
+++ b/src/backend/access/tablesample/system.c
@@ -1,186 +1,260 @@
/*-------------------------------------------------------------------------
*
* system.c
- * interface routines for system tablesample method
+ * support routines for SYSTEM tablesample method
*
+ * To ensure repeatability of samples, it is necessary that selection of a
+ * given tuple be history-independent; otherwise syncscanning would break
+ * repeatability, to say nothing of logically-irrelevant maintenance such
+ * as physical extension or shortening of the relation.
*
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * To achieve that, we proceed by hashing each candidate block number together
+ * with the active seed, and then selecting it if the hash is less than the
+ * cutoff value computed from the selection probability by BeginSampleScan.
+ *
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * src/backend/utils/tablesample/system.c
+ * src/backend/access/tablesample/system.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
-#include "fmgr.h"
+#ifdef _MSC_VER
+#include <float.h> /* for _isnan */
+#endif
+#include <math.h>
-#include "access/tablesample.h"
+#include "access/hash.h"
#include "access/relscan.h"
-#include "nodes/execnodes.h"
-#include "nodes/relation.h"
+#include "access/tsmapi.h"
+#include "catalog/pg_type.h"
#include "optimizer/clauses.h"
-#include "storage/bufmgr.h"
-#include "utils/sampling.h"
+#include "optimizer/cost.h"
+#include "utils/builtins.h"
-/*
- * State
- */
+/* Private state */
typedef struct
{
- BlockSamplerData bs;
+ uint64 cutoff; /* select blocks with hash less than this */
uint32 seed; /* random seed */
- BlockNumber nblocks; /* number of block in relation */
- int samplesize; /* number of blocks to return */
+ BlockNumber nextblock; /* next block to consider sampling */
OffsetNumber lt; /* last tuple returned from current block */
} SystemSamplerData;
-/*
- * Initializes the state.
- */
-Datum
-tsm_system_init(PG_FUNCTION_ARGS)
-{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
- uint32 seed = PG_GETARG_UINT32(1);
- float4 percent = PG_ARGISNULL(2) ? -1 : PG_GETARG_FLOAT4(2);
- HeapScanDesc scan = tsdesc->heapScan;
- SystemSamplerData *sampler;
+static void system_samplescangetsamplesize(PlannerInfo *root,
+ RelOptInfo *baserel,
+ List *paramexprs,
+ BlockNumber *pages,
+ double *tuples);
+static void system_initsamplescan(SampleScanState *node,
+ int eflags);
+static void system_beginsamplescan(SampleScanState *node,
+ Datum *params,
+ int nparams,
+ uint32 seed);
+static BlockNumber system_nextsampleblock(SampleScanState *node);
+static OffsetNumber system_nextsampletuple(SampleScanState *node,
+ BlockNumber blockno,
+ OffsetNumber maxoffset);
- if (percent < 0 || percent > 100)
- ereport(ERROR,
- (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
- errmsg("invalid sample size"),
- errhint("Sample size must be numeric value between 0 and 100 (inclusive).")));
-
- sampler = palloc0(sizeof(SystemSamplerData));
-
- /* Remember initial values for reinit */
- sampler->seed = seed;
- sampler->nblocks = scan->rs_nblocks;
- sampler->samplesize = 1 + (int) (sampler->nblocks * (percent / 100.0));
- sampler->lt = InvalidOffsetNumber;
-
- BlockSampler_Init(&sampler->bs, sampler->nblocks, sampler->samplesize,
- sampler->seed);
-
- tsdesc->tsmdata = (void *) sampler;
-
- PG_RETURN_VOID();
-}
/*
- * Get next block number or InvalidBlockNumber when we're done.
- *
- * Uses the same logic as ANALYZE for picking the random blocks.
+ * Create a TsmRoutine descriptor for the SYSTEM method.
*/
Datum
-tsm_system_nextblock(PG_FUNCTION_ARGS)
+tsm_system_handler(PG_FUNCTION_ARGS)
{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
- SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
- BlockNumber blockno;
-
- if (!BlockSampler_HasMore(&sampler->bs))
- PG_RETURN_UINT32(InvalidBlockNumber);
-
- blockno = BlockSampler_Next(&sampler->bs);
-
- PG_RETURN_UINT32(blockno);
+ TsmRoutine *tsm = makeNode(TsmRoutine);
+
+ tsm->parameterTypes = list_make1_oid(FLOAT4OID);
+ tsm->repeatable_across_queries = true;
+ tsm->repeatable_across_scans = true;
+ tsm->SampleScanGetSampleSize = system_samplescangetsamplesize;
+ tsm->InitSampleScan = system_initsamplescan;
+ tsm->BeginSampleScan = system_beginsamplescan;
+ tsm->NextSampleBlock = system_nextsampleblock;
+ tsm->NextSampleTuple = system_nextsampletuple;
+ tsm->EndSampleScan = NULL;
+
+ PG_RETURN_POINTER(tsm);
}
/*
- * Get next tuple offset in current block or InvalidOffsetNumber if we are done
- * with this block.
+ * Sample size estimation.
*/
-Datum
-tsm_system_nexttuple(PG_FUNCTION_ARGS)
+static void
+system_samplescangetsamplesize(PlannerInfo *root,
+ RelOptInfo *baserel,
+ List *paramexprs,
+ BlockNumber *pages,
+ double *tuples)
{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
- OffsetNumber maxoffset = PG_GETARG_UINT16(2);
- SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
- OffsetNumber tupoffset = sampler->lt;
+ Node *pctnode;
+ float4 samplefract;
- if (tupoffset == InvalidOffsetNumber)
- tupoffset = FirstOffsetNumber;
- else
- tupoffset++;
+ /* Try to extract an estimate for the sample percentage */
+ pctnode = (Node *) linitial(paramexprs);
+ pctnode = estimate_expression_value(root, pctnode);
- if (tupoffset > maxoffset)
- tupoffset = InvalidOffsetNumber;
+ if (IsA(pctnode, Const) &&
+ !((Const *) pctnode)->constisnull)
+ {
+ samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue);
+ if (samplefract >= 0 && samplefract <= 100 && !isnan(samplefract))
+ samplefract /= 100.0f;
+ else
+ {
+ /* Default samplefract if the value is bogus */
+ samplefract = 0.1f;
+ }
+ }
+ else
+ {
+ /* Default samplefract if we didn't obtain a non-null Const */
+ samplefract = 0.1f;
+ }
- sampler->lt = tupoffset;
+ /* We'll visit a sample of the pages ... */
+ *pages = clamp_row_est(baserel->pages * samplefract);
- PG_RETURN_UINT16(tupoffset);
+ /* ... and hopefully get a representative number of tuples from them */
+ *tuples = clamp_row_est(baserel->tuples * samplefract);
}
/*
- * Cleanup method.
+ * Initialize during executor setup.
*/
-Datum
-tsm_system_end(PG_FUNCTION_ARGS)
+static void
+system_initsamplescan(SampleScanState *node, int eflags)
{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-
- pfree(tsdesc->tsmdata);
-
- PG_RETURN_VOID();
+ node->tsm_state = palloc0(sizeof(SystemSamplerData));
}
/*
- * Reset state (called by ReScan).
+ * Examine parameters and prepare for a sample scan.
*/
-Datum
-tsm_system_reset(PG_FUNCTION_ARGS)
+static void
+system_beginsamplescan(SampleScanState *node,
+ Datum *params,
+ int nparams,
+ uint32 seed)
{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
- SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+ SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
+ double percent = DatumGetFloat4(params[0]);
+ if (percent < 0 || percent > 100 || isnan(percent))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
+ errmsg("sample percentage must be between 0 and 100")));
+
+ /*
+ * The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to
+ * store that as a uint64, of course. Note that this gives strictly
+ * correct behavior at the limits of zero or one probability.
+ */
+ sampler->cutoff = rint(((double) PG_UINT32_MAX + 1) * percent / 100);
+ sampler->seed = seed;
+ sampler->nextblock = 0;
sampler->lt = InvalidOffsetNumber;
- BlockSampler_Init(&sampler->bs, sampler->nblocks, sampler->samplesize,
- sampler->seed);
- PG_RETURN_VOID();
+ /*
+ * Bulkread buffer access strategy probably makes sense unless we're
+ * scanning a very small fraction of the table. The 1% cutoff here is a
+ * guess. We should use pagemode visibility checking, since we scan all
+ * tuples on each selected page.
+ */
+ node->use_bulkread = (percent >= 1);
+ node->use_pagemode = true;
}
/*
- * Costing function.
+ * Select next block to sample.
*/
-Datum
-tsm_system_cost(PG_FUNCTION_ARGS)
+static BlockNumber
+system_nextsampleblock(SampleScanState *node)
{
- PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
- Path *path = (Path *) PG_GETARG_POINTER(1);
- RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2);
- List *args = (List *) PG_GETARG_POINTER(3);
- BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4);
- double *tuples = (double *) PG_GETARG_POINTER(5);
- Node *pctnode;
- float4 samplesize;
+ SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
+ HeapScanDesc scan = node->ss.ss_currentScanDesc;
+ BlockNumber nextblock = sampler->nextblock;
+ uint32 hashinput[2];
+
+ /*
+ * We compute the hash by applying hash_any to an array of 2 uint32's
+ * containing the block number and seed. This is efficient to set up, and
+ * with the current implementation of hash_any, it gives
+ * machine-independent results, which is a nice property for regression
+ * testing.
+ *
+ * These words in the hash input are the same throughout the block:
+ */
+ hashinput[1] = sampler->seed;
+
+ /*
+ * Loop over block numbers until finding suitable block or reaching end of
+ * relation.
+ */
+ for (; nextblock < scan->rs_nblocks; nextblock++)
+ {
+ uint32 hash;
- pctnode = linitial(args);
- pctnode = estimate_expression_value(root, pctnode);
+ hashinput[0] = nextblock;
- if (IsA(pctnode, RelabelType))
- pctnode = (Node *) ((RelabelType *) pctnode)->arg;
+ hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput,
+ (int) sizeof(hashinput)));
+ if (hash < sampler->cutoff)
+ break;
+ }
- if (IsA(pctnode, Const))
+ if (nextblock < scan->rs_nblocks)
{
- samplesize = DatumGetFloat4(((Const *) pctnode)->constvalue);
- samplesize /= 100.0;
+ /* Found a suitable block; remember where we should start next time */
+ sampler->nextblock = nextblock + 1;
+ return nextblock;
}
+
+ /* Done, but let's reset nextblock to 0 for safety. */
+ sampler->nextblock = 0;
+ return InvalidBlockNumber;
+}
+
+/*
+ * Select next sampled tuple in current block.
+ *
+ * In block sampling, we just want to sample all the tuples in each selected
+ * block.
+ *
+ * It is OK here to return an offset without knowing if the tuple is visible
+ * (or even exists); nodeSamplescan.c will deal with that.
+ *
+ * When we reach end of the block, return InvalidOffsetNumber which tells
+ * SampleScan to go to next block.
+ */
+static OffsetNumber
+system_nextsampletuple(SampleScanState *node,
+ BlockNumber blockno,
+ OffsetNumber maxoffset)
+{
+ SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
+ OffsetNumber tupoffset = sampler->lt;
+
+ /* Advance to next possible offset on page */
+ if (tupoffset == InvalidOffsetNumber)
+ tupoffset = FirstOffsetNumber;
else
- {
- /* Default samplesize if the estimation didn't return Const. */
- samplesize = 0.1f;
- }
+ tupoffset++;
- *pages = baserel->pages * samplesize;
- *tuples = path->rows * samplesize;
- path->rows = *tuples;
+ /* Done? */
+ if (tupoffset > maxoffset)
+ tupoffset = InvalidOffsetNumber;
+
+ sampler->lt = tupoffset;
- PG_RETURN_VOID();
+ return tupoffset;
}
diff --git a/src/backend/access/tablesample/tablesample.c b/src/backend/access/tablesample/tablesample.c
index f21d42c8e38..b8ad7ced743 100644
--- a/src/backend/access/tablesample/tablesample.c
+++ b/src/backend/access/tablesample/tablesample.c
@@ -1,7 +1,7 @@
/*-------------------------------------------------------------------------
*
* tablesample.c
- * TABLESAMPLE internal API
+ * Support functions for TABLESAMPLE feature
*
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
@@ -10,356 +10,31 @@
* IDENTIFICATION
* src/backend/access/tablesample/tablesample.c
*
- * TABLESAMPLE is the SQL standard clause for sampling the relations.
- *
- * The API is interface between the Executor and the TABLESAMPLE Methods.
- *
- * TABLESAMPLE Methods are implementations of actual sampling algorithms which
- * can be used for returning a sample of the source relation.
- * Methods don't read the table directly but are asked for block number and
- * tuple offset which they want to examine (or return) and the tablesample
- * interface implemented here does the reading for them.
- *
- * We currently only support sampling of the physical relations, but in the
- * future we might extend the API to support subqueries as well.
- *
* -------------------------------------------------------------------------
*/
#include "postgres.h"
-#include "access/tablesample.h"
-
-#include "catalog/pg_tablesample_method.h"
-#include "miscadmin.h"
-#include "pgstat.h"
-#include "storage/bufmgr.h"
-#include "storage/predicate.h"
-#include "utils/rel.h"
-#include "utils/tqual.h"
-
-
-static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan);
-
-
-/*
- * Initialize the TABLESAMPLE Descriptor and the TABLESAMPLE Method.
- */
-TableSampleDesc *
-tablesample_init(SampleScanState *scanstate, TableSampleClause *tablesample)
-{
- FunctionCallInfoData fcinfo;
- int i;
- List *args = tablesample->args;
- ListCell *arg;
- ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
- TableSampleDesc *tsdesc = (TableSampleDesc *) palloc0(sizeof(TableSampleDesc));
-
- /* Load functions */
- fmgr_info(tablesample->tsminit, &(tsdesc->tsminit));
- fmgr_info(tablesample->tsmnextblock, &(tsdesc->tsmnextblock));
- fmgr_info(tablesample->tsmnexttuple, &(tsdesc->tsmnexttuple));
- if (OidIsValid(tablesample->tsmexaminetuple))
- fmgr_info(tablesample->tsmexaminetuple, &(tsdesc->tsmexaminetuple));
- else
- tsdesc->tsmexaminetuple.fn_oid = InvalidOid;
- fmgr_info(tablesample->tsmreset, &(tsdesc->tsmreset));
- fmgr_info(tablesample->tsmend, &(tsdesc->tsmend));
-
- InitFunctionCallInfoData(fcinfo, &tsdesc->tsminit,
- list_length(args) + 2,
- InvalidOid, NULL, NULL);
-
- tsdesc->tupDesc = scanstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor;
- tsdesc->heapScan = scanstate->ss.ss_currentScanDesc;
-
- /* First argument for init function is always TableSampleDesc */
- fcinfo.arg[0] = PointerGetDatum(tsdesc);
- fcinfo.argnull[0] = false;
+#include "access/tsmapi.h"
- /*
- * Second arg for init function is always REPEATABLE.
- *
- * If tablesample->repeatable is NULL then REPEATABLE clause was not
- * specified, and we insert a random value as default.
- *
- * When specified, the expression cannot evaluate to NULL.
- */
- if (tablesample->repeatable)
- {
- ExprState *argstate = ExecInitExpr((Expr *) tablesample->repeatable,
- (PlanState *) scanstate);
-
- fcinfo.arg[1] = ExecEvalExpr(argstate, econtext,
- &fcinfo.argnull[1], NULL);
- if (fcinfo.argnull[1])
- ereport(ERROR,
- (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
- errmsg("REPEATABLE clause must be NOT NULL numeric value")));
- }
- else
- {
- fcinfo.arg[1] = UInt32GetDatum(random());
- fcinfo.argnull[1] = false;
- }
-
- /* Rest of the arguments come from user. */
- i = 2;
- foreach(arg, args)
- {
- Expr *argexpr = (Expr *) lfirst(arg);
- ExprState *argstate = ExecInitExpr(argexpr, (PlanState *) scanstate);
-
- fcinfo.arg[i] = ExecEvalExpr(argstate, econtext,
- &fcinfo.argnull[i], NULL);
- i++;
- }
- Assert(i == fcinfo.nargs);
-
- (void) FunctionCallInvoke(&fcinfo);
-
- return tsdesc;
-}
/*
- * Get next tuple from TABLESAMPLE Method.
- */
-HeapTuple
-tablesample_getnext(TableSampleDesc *desc)
-{
- HeapScanDesc scan = desc->heapScan;
- HeapTuple tuple = &(scan->rs_ctup);
- bool pagemode = scan->rs_pageatatime;
- BlockNumber blockno;
- Page page;
- bool page_all_visible;
- ItemId itemid;
- OffsetNumber tupoffset,
- maxoffset;
-
- if (!scan->rs_inited)
- {
- /*
- * return null immediately if relation is empty
- */
- if (scan->rs_nblocks == 0)
- {
- Assert(!BufferIsValid(scan->rs_cbuf));
- tuple->t_data = NULL;
- return NULL;
- }
- blockno = DatumGetInt32(FunctionCall1(&desc->tsmnextblock,
- PointerGetDatum(desc)));
- if (!BlockNumberIsValid(blockno))
- {
- tuple->t_data = NULL;
- return NULL;
- }
-
- heapgetpage(scan, blockno);
- scan->rs_inited = true;
- }
- else
- {
- /* continue from previously returned page/tuple */
- blockno = scan->rs_cblock; /* current page */
- }
-
- /*
- * When pagemode is disabled, the scan will do visibility checks for each
- * tuple it finds so the buffer needs to be locked.
- */
- if (!pagemode)
- LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
-
- page = (Page) BufferGetPage(scan->rs_cbuf);
- page_all_visible = PageIsAllVisible(page);
- maxoffset = PageGetMaxOffsetNumber(page);
-
- for (;;)
- {
- CHECK_FOR_INTERRUPTS();
-
- tupoffset = DatumGetUInt16(FunctionCall3(&desc->tsmnexttuple,
- PointerGetDatum(desc),
- UInt32GetDatum(blockno),
- UInt16GetDatum(maxoffset)));
-
- if (OffsetNumberIsValid(tupoffset))
- {
- bool visible;
- bool found;
-
- /* Skip invalid tuple pointers. */
- itemid = PageGetItemId(page, tupoffset);
- if (!ItemIdIsNormal(itemid))
- continue;
-
- tuple->t_data = (HeapTupleHeader) PageGetItem((Page) page, itemid);
- tuple->t_len = ItemIdGetLength(itemid);
- ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
-
- if (page_all_visible)
- visible = true;
- else
- visible = SampleTupleVisible(tuple, tupoffset, scan);
-
- /*
- * Let the sampling method examine the actual tuple and decide if
- * we should return it.
- *
- * Note that we let it examine even invisible tuples for
- * statistical purposes, but not return them since user should
- * never see invisible tuples.
- */
- if (OidIsValid(desc->tsmexaminetuple.fn_oid))
- {
- found = DatumGetBool(FunctionCall4(&desc->tsmexaminetuple,
- PointerGetDatum(desc),
- UInt32GetDatum(blockno),
- PointerGetDatum(tuple),
- BoolGetDatum(visible)));
- /* Should not happen if sampling method is well written. */
- if (found && !visible)
- elog(ERROR, "Sampling method wanted to return invisible tuple");
- }
- else
- found = visible;
-
- /* Found visible tuple, return it. */
- if (found)
- {
- if (!pagemode)
- LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
- break;
- }
- else
- {
- /* Try next tuple from same page. */
- continue;
- }
- }
-
-
- if (!pagemode)
- LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
-
- blockno = DatumGetInt32(FunctionCall1(&desc->tsmnextblock,
- PointerGetDatum(desc)));
-
- /*
- * Report our new scan position for synchronization purposes. We don't
- * do that when moving backwards, however. That would just mess up any
- * other forward-moving scanners.
- *
- * Note: we do this before checking for end of scan so that the final
- * state of the position hint is back at the start of the rel. That's
- * not strictly necessary, but otherwise when you run the same query
- * multiple times the starting position would shift a little bit
- * backwards on every invocation, which is confusing. We don't
- * guarantee any specific ordering in general, though.
- */
- if (scan->rs_syncscan)
- ss_report_location(scan->rs_rd, BlockNumberIsValid(blockno) ?
- blockno : scan->rs_startblock);
-
- /*
- * Reached end of scan.
- */
- if (!BlockNumberIsValid(blockno))
- {
- if (BufferIsValid(scan->rs_cbuf))
- ReleaseBuffer(scan->rs_cbuf);
- scan->rs_cbuf = InvalidBuffer;
- scan->rs_cblock = InvalidBlockNumber;
- tuple->t_data = NULL;
- scan->rs_inited = false;
- return NULL;
- }
-
- heapgetpage(scan, blockno);
-
- if (!pagemode)
- LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
-
- page = (Page) BufferGetPage(scan->rs_cbuf);
- page_all_visible = PageIsAllVisible(page);
- maxoffset = PageGetMaxOffsetNumber(page);
- }
-
- pgstat_count_heap_getnext(scan->rs_rd);
-
- return &(scan->rs_ctup);
-}
-
-/*
- * Reset the sampling to starting state
- */
-void
-tablesample_reset(TableSampleDesc *desc)
-{
- (void) FunctionCall1(&desc->tsmreset, PointerGetDatum(desc));
-}
-
-/*
- * Signal the sampling method that the scan has finished.
- */
-void
-tablesample_end(TableSampleDesc *desc)
-{
- (void) FunctionCall1(&desc->tsmend, PointerGetDatum(desc));
-}
-
-/*
- * Check visibility of the tuple.
+ * GetTsmRoutine --- get a TsmRoutine struct by invoking the handler.
+ *
+ * This is a convenience routine that's just meant to check for errors.
*/
-static bool
-SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
+TsmRoutine *
+GetTsmRoutine(Oid tsmhandler)
{
- /*
- * If this scan is reading whole pages at a time, there is already
- * visibility info present in rs_vistuples so we can just search it for
- * the tupoffset.
- */
- if (scan->rs_pageatatime)
- {
- int start = 0,
- end = scan->rs_ntuples - 1;
-
- /*
- * Do the binary search over rs_vistuples, it's already sorted by
- * OffsetNumber so we don't need to do any sorting ourselves here.
- *
- * We could use bsearch() here but it's slower for integers because of
- * the function call overhead and because it needs boiler plate code
- * it would not save us anything code-wise anyway.
- */
- while (start <= end)
- {
- int mid = start + (end - start) / 2;
- OffsetNumber curoffset = scan->rs_vistuples[mid];
-
- if (curoffset == tupoffset)
- return true;
- else if (curoffset > tupoffset)
- end = mid - 1;
- else
- start = mid + 1;
- }
-
- return false;
- }
- else
- {
- /* No pagemode, we have to check the tuple itself. */
- Snapshot snapshot = scan->rs_snapshot;
- Buffer buffer = scan->rs_cbuf;
+ Datum datum;
+ TsmRoutine *routine;
- bool visible = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
+ datum = OidFunctionCall1(tsmhandler, PointerGetDatum(NULL));
+ routine = (TsmRoutine *) DatumGetPointer(datum);
- CheckForSerializableConflictOut(visible, scan->rs_rd, tuple, buffer,
- snapshot);
+ if (routine == NULL || !IsA(routine, TsmRoutine))
+ elog(ERROR, "tablesample handler function %u did not return a TsmRoutine struct",
+ tsmhandler);
- return visible;
- }
+ return routine;
}
diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile
index 3d1139b5ba0..25130ecf124 100644
--- a/src/backend/catalog/Makefile
+++ b/src/backend/catalog/Makefile
@@ -40,8 +40,9 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\
pg_ts_parser.h pg_ts_template.h pg_extension.h \
pg_foreign_data_wrapper.h pg_foreign_server.h pg_user_mapping.h \
pg_foreign_table.h pg_policy.h pg_replication_origin.h \
- pg_tablesample_method.h pg_default_acl.h pg_seclabel.h pg_shseclabel.h \
- pg_collation.h pg_range.h pg_transform.h toasting.h indexing.h \
+ pg_default_acl.h pg_seclabel.h pg_shseclabel.h \
+ pg_collation.h pg_range.h pg_transform.h \
+ toasting.h indexing.h \
)
# location of Catalog.pm
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c
index 5d7c441739c..90b1cd835f8 100644
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -1911,6 +1911,14 @@ find_expr_references_walker(Node *node,
context->addrs);
}
}
+ else if (IsA(node, TableSampleClause))
+ {
+ TableSampleClause *tsc = (TableSampleClause *) node;
+
+ add_object_address(OCLASS_PROC, tsc->tsmhandler, 0,
+ context->addrs);
+ /* fall through to examine arguments */
+ }
return expression_tree_walker(node, find_expr_references_walker,
(void *) context);
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 0d1ecc2a3ed..5d06fa4ea65 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -96,6 +96,8 @@ static void show_sort_group_keys(PlanState *planstate, const char *qlabel,
List *ancestors, ExplainState *es);
static void show_sortorder_options(StringInfo buf, Node *sortexpr,
Oid sortOperator, Oid collation, bool nullsFirst);
+static void show_tablesample(TableSampleClause *tsc, PlanState *planstate,
+ List *ancestors, ExplainState *es);
static void show_sort_info(SortState *sortstate, ExplainState *es);
static void show_hash_info(HashState *hashstate, ExplainState *es);
static void show_tidbitmap_info(BitmapHeapScanState *planstate,
@@ -116,7 +118,7 @@ static void ExplainMemberNodes(List *plans, PlanState **planstates,
static void ExplainSubPlans(List *plans, List *ancestors,
const char *relationship, ExplainState *es);
static void ExplainCustomChildren(CustomScanState *css,
- List *ancestors, ExplainState *es);
+ List *ancestors, ExplainState *es);
static void ExplainProperty(const char *qlabel, const char *value,
bool numeric, ExplainState *es);
static void ExplainOpenGroup(const char *objtype, const char *labelname,
@@ -730,6 +732,7 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
switch (nodeTag(plan))
{
case T_SeqScan:
+ case T_SampleScan:
case T_IndexScan:
case T_IndexOnlyScan:
case T_BitmapHeapScan:
@@ -739,7 +742,6 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
case T_ValuesScan:
case T_CteScan:
case T_WorkTableScan:
- case T_SampleScan:
*rels_used = bms_add_member(*rels_used,
((Scan *) plan)->scanrelid);
break;
@@ -935,6 +937,9 @@ ExplainNode(PlanState *planstate, List *ancestors,
case T_SeqScan:
pname = sname = "Seq Scan";
break;
+ case T_SampleScan:
+ pname = sname = "Sample Scan";
+ break;
case T_IndexScan:
pname = sname = "Index Scan";
break;
@@ -976,23 +981,6 @@ ExplainNode(PlanState *planstate, List *ancestors,
else
pname = sname;
break;
- case T_SampleScan:
- {
- /*
- * Fetch the tablesample method name from RTE.
- *
- * It would be nice to also show parameters, but since we
- * support arbitrary expressions as parameter it might get
- * quite messy.
- */
- RangeTblEntry *rte;
-
- rte = rt_fetch(((SampleScan *) plan)->scanrelid, es->rtable);
- custom_name = get_tablesample_method_name(rte->tablesample->tsmid);
- pname = psprintf("Sample Scan (%s)", custom_name);
- sname = "Sample Scan";
- }
- break;
case T_Material:
pname = sname = "Materialize";
break;
@@ -1101,6 +1089,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
switch (nodeTag(plan))
{
case T_SeqScan:
+ case T_SampleScan:
case T_BitmapHeapScan:
case T_TidScan:
case T_SubqueryScan:
@@ -1115,9 +1104,6 @@ ExplainNode(PlanState *planstate, List *ancestors,
if (((Scan *) plan)->scanrelid > 0)
ExplainScanTarget((Scan *) plan, es);
break;
- case T_SampleScan:
- ExplainScanTarget((Scan *) plan, es);
- break;
case T_IndexScan:
{
IndexScan *indexscan = (IndexScan *) plan;
@@ -1363,12 +1349,15 @@ ExplainNode(PlanState *planstate, List *ancestors,
if (es->analyze)
show_tidbitmap_info((BitmapHeapScanState *) planstate, es);
break;
+ case T_SampleScan:
+ show_tablesample(((SampleScan *) plan)->tablesample,
+ planstate, ancestors, es);
+ /* FALL THRU to print additional fields the same as SeqScan */
case T_SeqScan:
case T_ValuesScan:
case T_CteScan:
case T_WorkTableScan:
case T_SubqueryScan:
- case T_SampleScan:
show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
if (plan->qual)
show_instrumentation_count("Rows Removed by Filter", 1,
@@ -2110,6 +2099,72 @@ show_sortorder_options(StringInfo buf, Node *sortexpr,
}
/*
+ * Show TABLESAMPLE properties
+ */
+static void
+show_tablesample(TableSampleClause *tsc, PlanState *planstate,
+ List *ancestors, ExplainState *es)
+{
+ List *context;
+ bool useprefix;
+ char *method_name;
+ List *params = NIL;
+ char *repeatable;
+ ListCell *lc;
+
+ /* Set up deparsing context */
+ context = set_deparse_context_planstate(es->deparse_cxt,
+ (Node *) planstate,
+ ancestors);
+ useprefix = list_length(es->rtable) > 1;
+
+ /* Get the tablesample method name */
+ method_name = get_func_name(tsc->tsmhandler);
+
+ /* Deparse parameter expressions */
+ foreach(lc, tsc->args)
+ {
+ Node *arg = (Node *) lfirst(lc);
+
+ params = lappend(params,
+ deparse_expression(arg, context,
+ useprefix, false));
+ }
+ if (tsc->repeatable)
+ repeatable = deparse_expression((Node *) tsc->repeatable, context,
+ useprefix, false);
+ else
+ repeatable = NULL;
+
+ /* Print results */
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ bool first = true;
+
+ appendStringInfoSpaces(es->str, es->indent * 2);
+ appendStringInfo(es->str, "Sampling: %s (", method_name);
+ foreach(lc, params)
+ {
+ if (!first)
+ appendStringInfoString(es->str, ", ");
+ appendStringInfoString(es->str, (const char *) lfirst(lc));
+ first = false;
+ }
+ appendStringInfoChar(es->str, ')');
+ if (repeatable)
+ appendStringInfo(es->str, " REPEATABLE (%s)", repeatable);
+ appendStringInfoChar(es->str, '\n');
+ }
+ else
+ {
+ ExplainPropertyText("Sampling Method", method_name, es);
+ ExplainPropertyList("Sampling Parameters", params, es);
+ if (repeatable)
+ ExplainPropertyText("Repeatable Seed", repeatable, es);
+ }
+}
+
+/*
* If it's EXPLAIN ANALYZE, show tuplesort stats for a sort node
*/
static void
@@ -2366,13 +2421,13 @@ ExplainTargetRel(Plan *plan, Index rti, ExplainState *es)
switch (nodeTag(plan))
{
case T_SeqScan:
+ case T_SampleScan:
case T_IndexScan:
case T_IndexOnlyScan:
case T_BitmapHeapScan:
case T_TidScan:
case T_ForeignScan:
case T_CustomScan:
- case T_SampleScan:
case T_ModifyTable:
/* Assert it's on a real relation */
Assert(rte->rtekind == RTE_RELATION);
@@ -2663,9 +2718,9 @@ ExplainCustomChildren(CustomScanState *css, List *ancestors, ExplainState *es)
{
ListCell *cell;
const char *label =
- (list_length(css->custom_ps) != 1 ? "children" : "child");
+ (list_length(css->custom_ps) != 1 ? "children" : "child");
- foreach (cell, css->custom_ps)
+ foreach(cell, css->custom_ps)
ExplainNode((PlanState *) lfirst(cell), ancestors, label, NULL, es);
}
diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c
index 04073d3f9f9..93e1e9a691c 100644
--- a/src/backend/executor/execAmi.c
+++ b/src/backend/executor/execAmi.c
@@ -463,6 +463,10 @@ ExecSupportsBackwardScan(Plan *node)
case T_CteScan:
return TargetListSupportsBackwardScan(node->targetlist);
+ case T_SampleScan:
+ /* Simplify life for tablesample methods by disallowing this */
+ return false;
+
case T_IndexScan:
return IndexSupportsBackwardScan(((IndexScan *) node)->indexid) &&
TargetListSupportsBackwardScan(node->targetlist);
@@ -485,9 +489,6 @@ ExecSupportsBackwardScan(Plan *node)
}
return false;
- case T_SampleScan:
- return false;
-
case T_Material:
case T_Sort:
/* these don't evaluate tlist */
diff --git a/src/backend/executor/nodeSamplescan.c b/src/backend/executor/nodeSamplescan.c
index 4c1c5237b7d..dbe84b0baa8 100644
--- a/src/backend/executor/nodeSamplescan.c
+++ b/src/backend/executor/nodeSamplescan.c
@@ -3,7 +3,7 @@
* nodeSamplescan.c
* Support routines for sample scans of relations (table sampling).
*
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
@@ -14,22 +14,23 @@
*/
#include "postgres.h"
-#include "access/tablesample.h"
+#include "access/hash.h"
+#include "access/relscan.h"
+#include "access/tsmapi.h"
#include "executor/executor.h"
#include "executor/nodeSamplescan.h"
#include "miscadmin.h"
-#include "parser/parsetree.h"
#include "pgstat.h"
-#include "storage/bufmgr.h"
#include "storage/predicate.h"
#include "utils/rel.h"
-#include "utils/syscache.h"
#include "utils/tqual.h"
-static void InitScanRelation(SampleScanState *node, EState *estate,
- int eflags, TableSampleClause *tablesample);
+static void InitScanRelation(SampleScanState *node, EState *estate, int eflags);
static TupleTableSlot *SampleNext(SampleScanState *node);
-
+static void tablesample_init(SampleScanState *scanstate);
+static HeapTuple tablesample_getnext(SampleScanState *scanstate);
+static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset,
+ HeapScanDesc scan);
/* ----------------------------------------------------------------
* Scan Support
@@ -45,23 +46,26 @@ static TupleTableSlot *SampleNext(SampleScanState *node);
static TupleTableSlot *
SampleNext(SampleScanState *node)
{
- TupleTableSlot *slot;
- TableSampleDesc *tsdesc;
HeapTuple tuple;
+ TupleTableSlot *slot;
/*
- * get information from the scan state
+ * if this is first call within a scan, initialize
*/
- slot = node->ss.ss_ScanTupleSlot;
- tsdesc = node->tsdesc;
+ if (!node->begun)
+ tablesample_init(node);
+
+ /*
+ * get the next tuple, and store it in our result slot
+ */
+ tuple = tablesample_getnext(node);
- tuple = tablesample_getnext(tsdesc);
+ slot = node->ss.ss_ScanTupleSlot;
if (tuple)
ExecStoreTuple(tuple, /* tuple to store */
slot, /* slot to store in */
- tsdesc->heapScan->rs_cbuf, /* buffer associated
- * with this tuple */
+ node->ss.ss_currentScanDesc->rs_cbuf, /* tuple's buffer */
false); /* don't pfree this pointer */
else
ExecClearTuple(slot);
@@ -75,7 +79,10 @@ SampleNext(SampleScanState *node)
static bool
SampleRecheck(SampleScanState *node, TupleTableSlot *slot)
{
- /* No need to recheck for SampleScan */
+ /*
+ * No need to recheck for SampleScan, since like SeqScan we don't pass any
+ * checkable keys to heap_beginscan.
+ */
return true;
}
@@ -103,8 +110,7 @@ ExecSampleScan(SampleScanState *node)
* ----------------------------------------------------------------
*/
static void
-InitScanRelation(SampleScanState *node, EState *estate, int eflags,
- TableSampleClause *tablesample)
+InitScanRelation(SampleScanState *node, EState *estate, int eflags)
{
Relation currentRelation;
@@ -113,19 +119,13 @@ InitScanRelation(SampleScanState *node, EState *estate, int eflags,
* open that relation and acquire appropriate lock on it.
*/
currentRelation = ExecOpenScanRelation(estate,
- ((SampleScan *) node->ss.ps.plan)->scanrelid,
+ ((SampleScan *) node->ss.ps.plan)->scan.scanrelid,
eflags);
node->ss.ss_currentRelation = currentRelation;
- /*
- * Even though we aren't going to do a conventional seqscan, it is useful
- * to create a HeapScanDesc --- many of the fields in it are usable.
- */
- node->ss.ss_currentScanDesc =
- heap_beginscan_sampling(currentRelation, estate->es_snapshot, 0, NULL,
- tablesample->tsmseqscan,
- tablesample->tsmpagemode);
+ /* we won't set up the HeapScanDesc till later */
+ node->ss.ss_currentScanDesc = NULL;
/* and report the scan tuple slot's rowtype */
ExecAssignScanType(&node->ss, RelationGetDescr(currentRelation));
@@ -140,12 +140,11 @@ SampleScanState *
ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
{
SampleScanState *scanstate;
- RangeTblEntry *rte = rt_fetch(node->scanrelid,
- estate->es_range_table);
+ TableSampleClause *tsc = node->tablesample;
+ TsmRoutine *tsm;
Assert(outerPlan(node) == NULL);
Assert(innerPlan(node) == NULL);
- Assert(rte->tablesample != NULL);
/*
* create state structure
@@ -165,10 +164,17 @@ ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
* initialize child expressions
*/
scanstate->ss.ps.targetlist = (List *)
- ExecInitExpr((Expr *) node->plan.targetlist,
+ ExecInitExpr((Expr *) node->scan.plan.targetlist,
(PlanState *) scanstate);
scanstate->ss.ps.qual = (List *)
- ExecInitExpr((Expr *) node->plan.qual,
+ ExecInitExpr((Expr *) node->scan.plan.qual,
+ (PlanState *) scanstate);
+
+ scanstate->args = (List *)
+ ExecInitExpr((Expr *) tsc->args,
+ (PlanState *) scanstate);
+ scanstate->repeatable =
+ ExecInitExpr(tsc->repeatable,
(PlanState *) scanstate);
/*
@@ -180,7 +186,7 @@ ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
/*
* initialize scan relation
*/
- InitScanRelation(scanstate, estate, eflags, rte->tablesample);
+ InitScanRelation(scanstate, estate, eflags);
scanstate->ss.ps.ps_TupFromTlist = false;
@@ -190,7 +196,25 @@ ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
ExecAssignResultTypeFromTL(&scanstate->ss.ps);
ExecAssignScanProjectionInfo(&scanstate->ss);
- scanstate->tsdesc = tablesample_init(scanstate, rte->tablesample);
+ /*
+ * If we don't have a REPEATABLE clause, select a random seed. We want to
+ * do this just once, since the seed shouldn't change over rescans.
+ */
+ if (tsc->repeatable == NULL)
+ scanstate->seed = random();
+
+ /*
+ * Finally, initialize the TABLESAMPLE method handler.
+ */
+ tsm = GetTsmRoutine(tsc->tsmhandler);
+ scanstate->tsmroutine = tsm;
+ scanstate->tsm_state = NULL;
+
+ if (tsm->InitSampleScan)
+ tsm->InitSampleScan(scanstate, eflags);
+
+ /* We'll do BeginSampleScan later; we can't evaluate params yet */
+ scanstate->begun = false;
return scanstate;
}
@@ -207,7 +231,8 @@ ExecEndSampleScan(SampleScanState *node)
/*
* Tell sampling function that we finished the scan.
*/
- tablesample_end(node->tsdesc);
+ if (node->tsmroutine->EndSampleScan)
+ node->tsmroutine->EndSampleScan(node);
/*
* Free the exprcontext
@@ -223,7 +248,8 @@ ExecEndSampleScan(SampleScanState *node)
/*
* close heap scan
*/
- heap_endscan(node->ss.ss_currentScanDesc);
+ if (node->ss.ss_currentScanDesc)
+ heap_endscan(node->ss.ss_currentScanDesc);
/*
* close the heap relation.
@@ -232,11 +258,6 @@ ExecEndSampleScan(SampleScanState *node)
}
/* ----------------------------------------------------------------
- * Join Support
- * ----------------------------------------------------------------
- */
-
-/* ----------------------------------------------------------------
* ExecReScanSampleScan
*
* Rescans the relation.
@@ -246,12 +267,336 @@ ExecEndSampleScan(SampleScanState *node)
void
ExecReScanSampleScan(SampleScanState *node)
{
- heap_rescan(node->ss.ss_currentScanDesc, NULL);
+ /* Remember we need to do BeginSampleScan again (if we did it at all) */
+ node->begun = false;
+
+ ExecScanReScan(&node->ss);
+}
+
+
+/*
+ * Initialize the TABLESAMPLE method: evaluate params and call BeginSampleScan.
+ */
+static void
+tablesample_init(SampleScanState *scanstate)
+{
+ TsmRoutine *tsm = scanstate->tsmroutine;
+ ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
+ Datum *params;
+ Datum datum;
+ bool isnull;
+ uint32 seed;
+ bool allow_sync;
+ int i;
+ ListCell *arg;
+
+ params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum));
+
+ i = 0;
+ foreach(arg, scanstate->args)
+ {
+ ExprState *argstate = (ExprState *) lfirst(arg);
+
+ params[i] = ExecEvalExprSwitchContext(argstate,
+ econtext,
+ &isnull,
+ NULL);
+ if (isnull)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
+ errmsg("TABLESAMPLE parameter cannot be null")));
+ i++;
+ }
+
+ if (scanstate->repeatable)
+ {
+ datum = ExecEvalExprSwitchContext(scanstate->repeatable,
+ econtext,
+ &isnull,
+ NULL);
+ if (isnull)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLESAMPLE_REPEAT),
+ errmsg("TABLESAMPLE REPEATABLE parameter cannot be null")));
+
+ /*
+ * The REPEATABLE parameter has been coerced to float8 by the parser.
+ * The reason for using float8 at the SQL level is that it will
+ * produce unsurprising results both for users used to databases that
+ * accept only integers in the REPEATABLE clause and for those who
+ * might expect that REPEATABLE works like setseed() (a float in the
+ * range from -1 to 1).
+ *
+ * We use hashfloat8() to convert the supplied value into a suitable
+ * seed. For regression-testing purposes, that has the convenient
+ * property that REPEATABLE(0) gives a machine-independent result.
+ */
+ seed = DatumGetUInt32(DirectFunctionCall1(hashfloat8, datum));
+ }
+ else
+ {
+ /* Use the seed selected by ExecInitSampleScan */
+ seed = scanstate->seed;
+ }
+
+ /* Set default values for params that BeginSampleScan can adjust */
+ scanstate->use_bulkread = true;
+ scanstate->use_pagemode = true;
+
+ /* Let tablesample method do its thing */
+ tsm->BeginSampleScan(scanstate,
+ params,
+ list_length(scanstate->args),
+ seed);
+
+ /* We'll use syncscan if there's no NextSampleBlock function */
+ allow_sync = (tsm->NextSampleBlock == NULL);
+
+ /* Now we can create or reset the HeapScanDesc */
+ if (scanstate->ss.ss_currentScanDesc == NULL)
+ {
+ scanstate->ss.ss_currentScanDesc =
+ heap_beginscan_sampling(scanstate->ss.ss_currentRelation,
+ scanstate->ss.ps.state->es_snapshot,
+ 0, NULL,
+ scanstate->use_bulkread,
+ allow_sync,
+ scanstate->use_pagemode);
+ }
+ else
+ {
+ heap_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL,
+ scanstate->use_bulkread,
+ allow_sync,
+ scanstate->use_pagemode);
+ }
+
+ pfree(params);
+
+ /* And we're initialized. */
+ scanstate->begun = true;
+}
+
+/*
+ * Get next tuple from TABLESAMPLE method.
+ *
+ * Note: an awful lot of this is copied-and-pasted from heapam.c. It would
+ * perhaps be better to refactor to share more code.
+ */
+static HeapTuple
+tablesample_getnext(SampleScanState *scanstate)
+{
+ TsmRoutine *tsm = scanstate->tsmroutine;
+ HeapScanDesc scan = scanstate->ss.ss_currentScanDesc;
+ HeapTuple tuple = &(scan->rs_ctup);
+ Snapshot snapshot = scan->rs_snapshot;
+ bool pagemode = scan->rs_pageatatime;
+ BlockNumber blockno;
+ Page page;
+ bool all_visible;
+ OffsetNumber maxoffset;
+
+ if (!scan->rs_inited)
+ {
+ /*
+ * return null immediately if relation is empty
+ */
+ if (scan->rs_nblocks == 0)
+ {
+ Assert(!BufferIsValid(scan->rs_cbuf));
+ tuple->t_data = NULL;
+ return NULL;
+ }
+ if (tsm->NextSampleBlock)
+ {
+ blockno = tsm->NextSampleBlock(scanstate);
+ if (!BlockNumberIsValid(blockno))
+ {
+ tuple->t_data = NULL;
+ return NULL;
+ }
+ }
+ else
+ blockno = scan->rs_startblock;
+ Assert(blockno < scan->rs_nblocks);
+ heapgetpage(scan, blockno);
+ scan->rs_inited = true;
+ }
+ else
+ {
+ /* continue from previously returned page/tuple */
+ blockno = scan->rs_cblock; /* current page */
+ }
/*
- * Tell sampling function to reset its state for rescan.
+ * When not using pagemode, we must lock the buffer during tuple
+ * visibility checks.
*/
- tablesample_reset(node->tsdesc);
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+ page = (Page) BufferGetPage(scan->rs_cbuf);
+ all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
+ maxoffset = PageGetMaxOffsetNumber(page);
+
+ for (;;)
+ {
+ OffsetNumber tupoffset;
+ bool finished;
+
+ CHECK_FOR_INTERRUPTS();
+
+ /* Ask the tablesample method which tuples to check on this page. */
+ tupoffset = tsm->NextSampleTuple(scanstate,
+ blockno,
+ maxoffset);
+
+ if (OffsetNumberIsValid(tupoffset))
+ {
+ ItemId itemid;
+ bool visible;
+
+ /* Skip invalid tuple pointers. */
+ itemid = PageGetItemId(page, tupoffset);
+ if (!ItemIdIsNormal(itemid))
+ continue;
+
+ tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
+ tuple->t_len = ItemIdGetLength(itemid);
+ ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
+
+ if (all_visible)
+ visible = true;
+ else
+ visible = SampleTupleVisible(tuple, tupoffset, scan);
+
+ /* in pagemode, heapgetpage did this for us */
+ if (!pagemode)
+ CheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
+ scan->rs_cbuf, snapshot);
+
+ if (visible)
+ {
+ /* Found visible tuple, return it. */
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+ break;
+ }
+ else
+ {
+ /* Try next tuple from same page. */
+ continue;
+ }
+ }
+
+ /*
+ * if we get here, it means we've exhausted the items on this page and
+ * it's time to move to the next.
+ */
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+
+ if (tsm->NextSampleBlock)
+ {
+ blockno = tsm->NextSampleBlock(scanstate);
+ Assert(!scan->rs_syncscan);
+ finished = !BlockNumberIsValid(blockno);
+ }
+ else
+ {
+ /* Without NextSampleBlock, just do a plain forward seqscan. */
+ blockno++;
+ if (blockno >= scan->rs_nblocks)
+ blockno = 0;
+
+ /*
+ * Report our new scan position for synchronization purposes.
+ *
+ * Note: we do this before checking for end of scan so that the
+ * final state of the position hint is back at the start of the
+ * rel. That's not strictly necessary, but otherwise when you run
+ * the same query multiple times the starting position would shift
+ * a little bit backwards on every invocation, which is confusing.
+ * We don't guarantee any specific ordering in general, though.
+ */
+ if (scan->rs_syncscan)
+ ss_report_location(scan->rs_rd, blockno);
+
+ finished = (blockno == scan->rs_startblock);
+ }
+
+ /*
+ * Reached end of scan?
+ */
+ if (finished)
+ {
+ if (BufferIsValid(scan->rs_cbuf))
+ ReleaseBuffer(scan->rs_cbuf);
+ scan->rs_cbuf = InvalidBuffer;
+ scan->rs_cblock = InvalidBlockNumber;
+ tuple->t_data = NULL;
+ scan->rs_inited = false;
+ return NULL;
+ }
+
+ Assert(blockno < scan->rs_nblocks);
+ heapgetpage(scan, blockno);
+
+ /* Re-establish state for new page */
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+ page = (Page) BufferGetPage(scan->rs_cbuf);
+ all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
+ maxoffset = PageGetMaxOffsetNumber(page);
+ }
+
+ /* Count successfully-fetched tuples as heap fetches */
+ pgstat_count_heap_getnext(scan->rs_rd);
+
+ return &(scan->rs_ctup);
+}
- ExecScanReScan(&node->ss);
+/*
+ * Check visibility of the tuple.
+ */
+static bool
+SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
+{
+ if (scan->rs_pageatatime)
+ {
+ /*
+ * In pageatatime mode, heapgetpage() already did visibility checks,
+ * so just look at the info it left in rs_vistuples[].
+ *
+ * We use a binary search over the known-sorted array. Note: we could
+ * save some effort if we insisted that NextSampleTuple select tuples
+ * in increasing order, but it's not clear that there would be enough
+ * gain to justify the restriction.
+ */
+ int start = 0,
+ end = scan->rs_ntuples - 1;
+
+ while (start <= end)
+ {
+ int mid = (start + end) / 2;
+ OffsetNumber curoffset = scan->rs_vistuples[mid];
+
+ if (tupoffset == curoffset)
+ return true;
+ else if (tupoffset < curoffset)
+ end = mid - 1;
+ else
+ start = mid + 1;
+ }
+
+ return false;
+ }
+ else
+ {
+ /* Otherwise, we have to check the tuple individually. */
+ return HeapTupleSatisfiesVisibility(tuple,
+ scan->rs_snapshot,
+ scan->rs_cbuf);
+ }
}
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 6a08c2db211..7248440ead3 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -360,6 +360,27 @@ _copySeqScan(const SeqScan *from)
}
/*
+ * _copySampleScan
+ */
+static SampleScan *
+_copySampleScan(const SampleScan *from)
+{
+ SampleScan *newnode = makeNode(SampleScan);
+
+ /*
+ * copy node superclass fields
+ */
+ CopyScanFields((const Scan *) from, (Scan *) newnode);
+
+ /*
+ * copy remainder of node
+ */
+ COPY_NODE_FIELD(tablesample);
+
+ return newnode;
+}
+
+/*
* _copyIndexScan
*/
static IndexScan *
@@ -642,22 +663,6 @@ _copyCustomScan(const CustomScan *from)
}
/*
- * _copySampleScan
- */
-static SampleScan *
-_copySampleScan(const SampleScan *from)
-{
- SampleScan *newnode = makeNode(SampleScan);
-
- /*
- * copy node superclass fields
- */
- CopyScanFields((const Scan *) from, (Scan *) newnode);
-
- return newnode;
-}
-
-/*
* CopyJoinFields
*
* This function copies the fields of the Join node. It is used by
@@ -2143,6 +2148,18 @@ _copyRangeTblFunction(const RangeTblFunction *from)
return newnode;
}
+static TableSampleClause *
+_copyTableSampleClause(const TableSampleClause *from)
+{
+ TableSampleClause *newnode = makeNode(TableSampleClause);
+
+ COPY_SCALAR_FIELD(tsmhandler);
+ COPY_NODE_FIELD(args);
+ COPY_NODE_FIELD(repeatable);
+
+ return newnode;
+}
+
static WithCheckOption *
_copyWithCheckOption(const WithCheckOption *from)
{
@@ -2271,40 +2288,6 @@ _copyCommonTableExpr(const CommonTableExpr *from)
return newnode;
}
-static RangeTableSample *
-_copyRangeTableSample(const RangeTableSample *from)
-{
- RangeTableSample *newnode = makeNode(RangeTableSample);
-
- COPY_NODE_FIELD(relation);
- COPY_STRING_FIELD(method);
- COPY_NODE_FIELD(repeatable);
- COPY_NODE_FIELD(args);
-
- return newnode;
-}
-
-static TableSampleClause *
-_copyTableSampleClause(const TableSampleClause *from)
-{
- TableSampleClause *newnode = makeNode(TableSampleClause);
-
- COPY_SCALAR_FIELD(tsmid);
- COPY_SCALAR_FIELD(tsmseqscan);
- COPY_SCALAR_FIELD(tsmpagemode);
- COPY_SCALAR_FIELD(tsminit);
- COPY_SCALAR_FIELD(tsmnextblock);
- COPY_SCALAR_FIELD(tsmnexttuple);
- COPY_SCALAR_FIELD(tsmexaminetuple);
- COPY_SCALAR_FIELD(tsmend);
- COPY_SCALAR_FIELD(tsmreset);
- COPY_SCALAR_FIELD(tsmcost);
- COPY_NODE_FIELD(repeatable);
- COPY_NODE_FIELD(args);
-
- return newnode;
-}
-
static A_Expr *
_copyAExpr(const A_Expr *from)
{
@@ -2532,6 +2515,20 @@ _copyRangeFunction(const RangeFunction *from)
return newnode;
}
+static RangeTableSample *
+_copyRangeTableSample(const RangeTableSample *from)
+{
+ RangeTableSample *newnode = makeNode(RangeTableSample);
+
+ COPY_NODE_FIELD(relation);
+ COPY_NODE_FIELD(method);
+ COPY_NODE_FIELD(args);
+ COPY_NODE_FIELD(repeatable);
+ COPY_LOCATION_FIELD(location);
+
+ return newnode;
+}
+
static TypeCast *
_copyTypeCast(const TypeCast *from)
{
@@ -4237,6 +4234,9 @@ copyObject(const void *from)
case T_SeqScan:
retval = _copySeqScan(from);
break;
+ case T_SampleScan:
+ retval = _copySampleScan(from);
+ break;
case T_IndexScan:
retval = _copyIndexScan(from);
break;
@@ -4273,9 +4273,6 @@ copyObject(const void *from)
case T_CustomScan:
retval = _copyCustomScan(from);
break;
- case T_SampleScan:
- retval = _copySampleScan(from);
- break;
case T_Join:
retval = _copyJoin(from);
break;
@@ -4897,6 +4894,9 @@ copyObject(const void *from)
case T_RangeFunction:
retval = _copyRangeFunction(from);
break;
+ case T_RangeTableSample:
+ retval = _copyRangeTableSample(from);
+ break;
case T_TypeName:
retval = _copyTypeName(from);
break;
@@ -4921,6 +4921,9 @@ copyObject(const void *from)
case T_RangeTblFunction:
retval = _copyRangeTblFunction(from);
break;
+ case T_TableSampleClause:
+ retval = _copyTableSampleClause(from);
+ break;
case T_WithCheckOption:
retval = _copyWithCheckOption(from);
break;
@@ -4948,12 +4951,6 @@ copyObject(const void *from)
case T_CommonTableExpr:
retval = _copyCommonTableExpr(from);
break;
- case T_RangeTableSample:
- retval = _copyRangeTableSample(from);
- break;
- case T_TableSampleClause:
- retval = _copyTableSampleClause(from);
- break;
case T_FuncWithArgs:
retval = _copyFuncWithArgs(from);
break;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index faf5eedab4e..6597dbc33e1 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -2291,6 +2291,18 @@ _equalRangeFunction(const RangeFunction *a, const RangeFunction *b)
}
static bool
+_equalRangeTableSample(const RangeTableSample *a, const RangeTableSample *b)
+{
+ COMPARE_NODE_FIELD(relation);
+ COMPARE_NODE_FIELD(method);
+ COMPARE_NODE_FIELD(args);
+ COMPARE_NODE_FIELD(repeatable);
+ COMPARE_LOCATION_FIELD(location);
+
+ return true;
+}
+
+static bool
_equalIndexElem(const IndexElem *a, const IndexElem *b)
{
COMPARE_STRING_FIELD(name);
@@ -2429,6 +2441,16 @@ _equalRangeTblFunction(const RangeTblFunction *a, const RangeTblFunction *b)
}
static bool
+_equalTableSampleClause(const TableSampleClause *a, const TableSampleClause *b)
+{
+ COMPARE_SCALAR_FIELD(tsmhandler);
+ COMPARE_NODE_FIELD(args);
+ COMPARE_NODE_FIELD(repeatable);
+
+ return true;
+}
+
+static bool
_equalWithCheckOption(const WithCheckOption *a, const WithCheckOption *b)
{
COMPARE_SCALAR_FIELD(kind);
@@ -2539,36 +2561,6 @@ _equalCommonTableExpr(const CommonTableExpr *a, const CommonTableExpr *b)
}
static bool
-_equalRangeTableSample(const RangeTableSample *a, const RangeTableSample *b)
-{
- COMPARE_NODE_FIELD(relation);
- COMPARE_STRING_FIELD(method);
- COMPARE_NODE_FIELD(repeatable);
- COMPARE_NODE_FIELD(args);
-
- return true;
-}
-
-static bool
-_equalTableSampleClause(const TableSampleClause *a, const TableSampleClause *b)
-{
- COMPARE_SCALAR_FIELD(tsmid);
- COMPARE_SCALAR_FIELD(tsmseqscan);
- COMPARE_SCALAR_FIELD(tsmpagemode);
- COMPARE_SCALAR_FIELD(tsminit);
- COMPARE_SCALAR_FIELD(tsmnextblock);
- COMPARE_SCALAR_FIELD(tsmnexttuple);
- COMPARE_SCALAR_FIELD(tsmexaminetuple);
- COMPARE_SCALAR_FIELD(tsmend);
- COMPARE_SCALAR_FIELD(tsmreset);
- COMPARE_SCALAR_FIELD(tsmcost);
- COMPARE_NODE_FIELD(repeatable);
- COMPARE_NODE_FIELD(args);
-
- return true;
-}
-
-static bool
_equalXmlSerialize(const XmlSerialize *a, const XmlSerialize *b)
{
COMPARE_SCALAR_FIELD(xmloption);
@@ -3260,6 +3252,9 @@ equal(const void *a, const void *b)
case T_RangeFunction:
retval = _equalRangeFunction(a, b);
break;
+ case T_RangeTableSample:
+ retval = _equalRangeTableSample(a, b);
+ break;
case T_TypeName:
retval = _equalTypeName(a, b);
break;
@@ -3284,6 +3279,9 @@ equal(const void *a, const void *b)
case T_RangeTblFunction:
retval = _equalRangeTblFunction(a, b);
break;
+ case T_TableSampleClause:
+ retval = _equalTableSampleClause(a, b);
+ break;
case T_WithCheckOption:
retval = _equalWithCheckOption(a, b);
break;
@@ -3311,12 +3309,6 @@ equal(const void *a, const void *b)
case T_CommonTableExpr:
retval = _equalCommonTableExpr(a, b);
break;
- case T_RangeTableSample:
- retval = _equalRangeTableSample(a, b);
- break;
- case T_TableSampleClause:
- retval = _equalTableSampleClause(a, b);
- break;
case T_FuncWithArgs:
retval = _equalFuncWithArgs(a, b);
break;
diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c
index b1e3e6e4893..c517dfd9d69 100644
--- a/src/backend/nodes/nodeFuncs.c
+++ b/src/backend/nodes/nodeFuncs.c
@@ -1486,6 +1486,9 @@ exprLocation(const Node *expr)
case T_WindowDef:
loc = ((const WindowDef *) expr)->location;
break;
+ case T_RangeTableSample:
+ loc = ((const RangeTableSample *) expr)->location;
+ break;
case T_TypeName:
loc = ((const TypeName *) expr)->location;
break;
@@ -1995,6 +1998,17 @@ expression_tree_walker(Node *node,
return walker(((PlaceHolderInfo *) node)->ph_var, context);
case T_RangeTblFunction:
return walker(((RangeTblFunction *) node)->funcexpr, context);
+ case T_TableSampleClause:
+ {
+ TableSampleClause *tsc = (TableSampleClause *) node;
+
+ if (expression_tree_walker((Node *) tsc->args,
+ walker, context))
+ return true;
+ if (walker((Node *) tsc->repeatable, context))
+ return true;
+ }
+ break;
default:
elog(ERROR, "unrecognized node type: %d",
(int) nodeTag(node));
@@ -2082,13 +2096,8 @@ range_table_walker(List *rtable,
switch (rte->rtekind)
{
case RTE_RELATION:
- if (rte->tablesample)
- {
- if (walker(rte->tablesample->args, context))
- return true;
- if (walker(rte->tablesample->repeatable, context))
- return true;
- }
+ if (walker(rte->tablesample, context))
+ return true;
break;
case RTE_CTE:
/* nothing to do */
@@ -2782,6 +2791,17 @@ expression_tree_mutator(Node *node,
return (Node *) newnode;
}
break;
+ case T_TableSampleClause:
+ {
+ TableSampleClause *tsc = (TableSampleClause *) node;
+ TableSampleClause *newnode;
+
+ FLATCOPY(newnode, tsc, TableSampleClause);
+ MUTATE(newnode->args, tsc->args, List *);
+ MUTATE(newnode->repeatable, tsc->repeatable, Expr *);
+ return (Node *) newnode;
+ }
+ break;
default:
elog(ERROR, "unrecognized node type: %d",
(int) nodeTag(node));
@@ -2868,20 +2888,12 @@ range_table_mutator(List *rtable,
switch (rte->rtekind)
{
case RTE_RELATION:
- if (rte->tablesample)
- {
- CHECKFLATCOPY(newrte->tablesample, rte->tablesample,
- TableSampleClause);
- MUTATE(newrte->tablesample->args,
- newrte->tablesample->args,
- List *);
- MUTATE(newrte->tablesample->repeatable,
- newrte->tablesample->repeatable,
- Node *);
- }
+ MUTATE(newrte->tablesample, rte->tablesample,
+ TableSampleClause *);
+ /* we don't bother to copy eref, aliases, etc; OK? */
break;
case RTE_CTE:
- /* we don't bother to copy eref, aliases, etc; OK? */
+ /* nothing to do */
break;
case RTE_SUBQUERY:
if (!(flags & QTW_IGNORE_RT_SUBQUERIES))
@@ -3316,6 +3328,19 @@ raw_expression_tree_walker(Node *node,
return true;
}
break;
+ case T_RangeTableSample:
+ {
+ RangeTableSample *rts = (RangeTableSample *) node;
+
+ if (walker(rts->relation, context))
+ return true;
+ /* method name is deemed uninteresting */
+ if (walker(rts->args, context))
+ return true;
+ if (walker(rts->repeatable, context))
+ return true;
+ }
+ break;
case T_TypeName:
{
TypeName *tn = (TypeName *) node;
@@ -3380,18 +3405,6 @@ raw_expression_tree_walker(Node *node,
break;
case T_CommonTableExpr:
return walker(((CommonTableExpr *) node)->ctequery, context);
- case T_RangeTableSample:
- {
- RangeTableSample *rts = (RangeTableSample *) node;
-
- if (walker(rts->relation, context))
- return true;
- if (walker(rts->repeatable, context))
- return true;
- if (walker(rts->args, context))
- return true;
- }
- break;
default:
elog(ERROR, "unrecognized node type: %d",
(int) nodeTag(node));
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 87304ba9bf6..81725d6e59a 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -445,6 +445,16 @@ _outSeqScan(StringInfo str, const SeqScan *node)
}
static void
+_outSampleScan(StringInfo str, const SampleScan *node)
+{
+ WRITE_NODE_TYPE("SAMPLESCAN");
+
+ _outScanInfo(str, (const Scan *) node);
+
+ WRITE_NODE_FIELD(tablesample);
+}
+
+static void
_outIndexScan(StringInfo str, const IndexScan *node)
{
WRITE_NODE_TYPE("INDEXSCAN");
@@ -592,14 +602,6 @@ _outCustomScan(StringInfo str, const CustomScan *node)
}
static void
-_outSampleScan(StringInfo str, const SampleScan *node)
-{
- WRITE_NODE_TYPE("SAMPLESCAN");
-
- _outScanInfo(str, (const Scan *) node);
-}
-
-static void
_outJoin(StringInfo str, const Join *node)
{
WRITE_NODE_TYPE("JOIN");
@@ -2479,36 +2481,6 @@ _outCommonTableExpr(StringInfo str, const CommonTableExpr *node)
}
static void
-_outRangeTableSample(StringInfo str, const RangeTableSample *node)
-{
- WRITE_NODE_TYPE("RANGETABLESAMPLE");
-
- WRITE_NODE_FIELD(relation);
- WRITE_STRING_FIELD(method);
- WRITE_NODE_FIELD(repeatable);
- WRITE_NODE_FIELD(args);
-}
-
-static void
-_outTableSampleClause(StringInfo str, const TableSampleClause *node)
-{
- WRITE_NODE_TYPE("TABLESAMPLECLAUSE");
-
- WRITE_OID_FIELD(tsmid);
- WRITE_BOOL_FIELD(tsmseqscan);
- WRITE_BOOL_FIELD(tsmpagemode);
- WRITE_OID_FIELD(tsminit);
- WRITE_OID_FIELD(tsmnextblock);
- WRITE_OID_FIELD(tsmnexttuple);
- WRITE_OID_FIELD(tsmexaminetuple);
- WRITE_OID_FIELD(tsmend);
- WRITE_OID_FIELD(tsmreset);
- WRITE_OID_FIELD(tsmcost);
- WRITE_NODE_FIELD(repeatable);
- WRITE_NODE_FIELD(args);
-}
-
-static void
_outSetOperationStmt(StringInfo str, const SetOperationStmt *node)
{
WRITE_NODE_TYPE("SETOPERATIONSTMT");
@@ -2595,6 +2567,16 @@ _outRangeTblFunction(StringInfo str, const RangeTblFunction *node)
}
static void
+_outTableSampleClause(StringInfo str, const TableSampleClause *node)
+{
+ WRITE_NODE_TYPE("TABLESAMPLECLAUSE");
+
+ WRITE_OID_FIELD(tsmhandler);
+ WRITE_NODE_FIELD(args);
+ WRITE_NODE_FIELD(repeatable);
+}
+
+static void
_outAExpr(StringInfo str, const A_Expr *node)
{
WRITE_NODE_TYPE("AEXPR");
@@ -2846,6 +2828,18 @@ _outRangeFunction(StringInfo str, const RangeFunction *node)
}
static void
+_outRangeTableSample(StringInfo str, const RangeTableSample *node)
+{
+ WRITE_NODE_TYPE("RANGETABLESAMPLE");
+
+ WRITE_NODE_FIELD(relation);
+ WRITE_NODE_FIELD(method);
+ WRITE_NODE_FIELD(args);
+ WRITE_NODE_FIELD(repeatable);
+ WRITE_LOCATION_FIELD(location);
+}
+
+static void
_outConstraint(StringInfo str, const Constraint *node)
{
WRITE_NODE_TYPE("CONSTRAINT");
@@ -3002,6 +2996,9 @@ _outNode(StringInfo str, const void *obj)
case T_SeqScan:
_outSeqScan(str, obj);
break;
+ case T_SampleScan:
+ _outSampleScan(str, obj);
+ break;
case T_IndexScan:
_outIndexScan(str, obj);
break;
@@ -3038,9 +3035,6 @@ _outNode(StringInfo str, const void *obj)
case T_CustomScan:
_outCustomScan(str, obj);
break;
- case T_SampleScan:
- _outSampleScan(str, obj);
- break;
case T_Join:
_outJoin(str, obj);
break;
@@ -3393,12 +3387,6 @@ _outNode(StringInfo str, const void *obj)
case T_CommonTableExpr:
_outCommonTableExpr(str, obj);
break;
- case T_RangeTableSample:
- _outRangeTableSample(str, obj);
- break;
- case T_TableSampleClause:
- _outTableSampleClause(str, obj);
- break;
case T_SetOperationStmt:
_outSetOperationStmt(str, obj);
break;
@@ -3408,6 +3396,9 @@ _outNode(StringInfo str, const void *obj)
case T_RangeTblFunction:
_outRangeTblFunction(str, obj);
break;
+ case T_TableSampleClause:
+ _outTableSampleClause(str, obj);
+ break;
case T_A_Expr:
_outAExpr(str, obj);
break;
@@ -3450,6 +3441,9 @@ _outNode(StringInfo str, const void *obj)
case T_RangeFunction:
_outRangeFunction(str, obj);
break;
+ case T_RangeTableSample:
+ _outRangeTableSample(str, obj);
+ break;
case T_Constraint:
_outConstraint(str, obj);
break;
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index f5a40fbfb44..71be840eac9 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -368,46 +368,6 @@ _readCommonTableExpr(void)
}
/*
- * _readRangeTableSample
- */
-static RangeTableSample *
-_readRangeTableSample(void)
-{
- READ_LOCALS(RangeTableSample);
-
- READ_NODE_FIELD(relation);
- READ_STRING_FIELD(method);
- READ_NODE_FIELD(repeatable);
- READ_NODE_FIELD(args);
-
- READ_DONE();
-}
-
-/*
- * _readTableSampleClause
- */
-static TableSampleClause *
-_readTableSampleClause(void)
-{
- READ_LOCALS(TableSampleClause);
-
- READ_OID_FIELD(tsmid);
- READ_BOOL_FIELD(tsmseqscan);
- READ_BOOL_FIELD(tsmpagemode);
- READ_OID_FIELD(tsminit);
- READ_OID_FIELD(tsmnextblock);
- READ_OID_FIELD(tsmnexttuple);
- READ_OID_FIELD(tsmexaminetuple);
- READ_OID_FIELD(tsmend);
- READ_OID_FIELD(tsmreset);
- READ_OID_FIELD(tsmcost);
- READ_NODE_FIELD(repeatable);
- READ_NODE_FIELD(args);
-
- READ_DONE();
-}
-
-/*
* _readSetOperationStmt
*/
static SetOperationStmt *
@@ -1391,6 +1351,21 @@ _readRangeTblFunction(void)
READ_DONE();
}
+/*
+ * _readTableSampleClause
+ */
+static TableSampleClause *
+_readTableSampleClause(void)
+{
+ READ_LOCALS(TableSampleClause);
+
+ READ_OID_FIELD(tsmhandler);
+ READ_NODE_FIELD(args);
+ READ_NODE_FIELD(repeatable);
+
+ READ_DONE();
+}
+
/*
* parseNodeString
@@ -1426,10 +1401,6 @@ parseNodeString(void)
return_value = _readRowMarkClause();
else if (MATCH("COMMONTABLEEXPR", 15))
return_value = _readCommonTableExpr();
- else if (MATCH("RANGETABLESAMPLE", 16))
- return_value = _readRangeTableSample();
- else if (MATCH("TABLESAMPLECLAUSE", 17))
- return_value = _readTableSampleClause();
else if (MATCH("SETOPERATIONSTMT", 16))
return_value = _readSetOperationStmt();
else if (MATCH("ALIAS", 5))
@@ -1528,6 +1499,8 @@ parseNodeString(void)
return_value = _readRangeTblEntry();
else if (MATCH("RANGETBLFUNCTION", 16))
return_value = _readRangeTblFunction();
+ else if (MATCH("TABLESAMPLECLAUSE", 17))
+ return_value = _readTableSampleClause();
else if (MATCH("NOTIFY", 6))
return_value = _readNotifyStmt();
else if (MATCH("DECLARECURSOR", 13))
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 888eeac5151..1590be11675 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -18,6 +18,7 @@
#include <math.h>
#include "access/sysattr.h"
+#include "access/tsmapi.h"
#include "catalog/pg_class.h"
#include "catalog/pg_operator.h"
#include "foreign/fdwapi.h"
@@ -390,7 +391,7 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
}
else if (rte->tablesample != NULL)
{
- /* Build sample scan on relation */
+ /* Sampled relation */
set_tablesample_rel_pathlist(root, rel, rte);
}
else
@@ -480,11 +481,40 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
/*
* set_tablesample_rel_size
- * Set size estimates for a sampled relation.
+ * Set size estimates for a sampled relation
*/
static void
set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
+ TableSampleClause *tsc = rte->tablesample;
+ TsmRoutine *tsm;
+ BlockNumber pages;
+ double tuples;
+
+ /*
+ * Test any partial indexes of rel for applicability. We must do this
+ * first since partial unique indexes can affect size estimates.
+ */
+ check_partial_indexes(root, rel);
+
+ /*
+ * Call the sampling method's estimation function to estimate the number
+ * of pages it will read and the number of tuples it will return. (Note:
+ * we assume the function returns sane values.)
+ */
+ tsm = GetTsmRoutine(tsc->tsmhandler);
+ tsm->SampleScanGetSampleSize(root, rel, tsc->args,
+ &pages, &tuples);
+
+ /*
+ * For the moment, because we will only consider a SampleScan path for the
+ * rel, it's okay to just overwrite the pages and tuples estimates for the
+ * whole relation. If we ever consider multiple path types for sampled
+ * rels, we'll need more complication.
+ */
+ rel->pages = pages;
+ rel->tuples = tuples;
+
/* Mark rel with estimated output rows, width, etc */
set_baserel_size_estimates(root, rel);
}
@@ -492,8 +522,6 @@ set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
/*
* set_tablesample_rel_pathlist
* Build access paths for a sampled relation
- *
- * There is only one possible path - sampling scan
*/
static void
set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
@@ -502,15 +530,41 @@ set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *
Path *path;
/*
- * We don't support pushing join clauses into the quals of a seqscan, but
- * it could still have required parameterization due to LATERAL refs in
- * its tlist.
+ * We don't support pushing join clauses into the quals of a samplescan,
+ * but it could still have required parameterization due to LATERAL refs
+ * in its tlist or TABLESAMPLE arguments.
*/
required_outer = rel->lateral_relids;
- /* We only do sample scan if it was requested */
+ /* Consider sampled scan */
path = create_samplescan_path(root, rel, required_outer);
- rel->pathlist = list_make1(path);
+
+ /*
+ * If the sampling method does not support repeatable scans, we must avoid
+ * plans that would scan the rel multiple times. Ideally, we'd simply
+ * avoid putting the rel on the inside of a nestloop join; but adding such
+ * a consideration to the planner seems like a great deal of complication
+ * to support an uncommon usage of second-rate sampling methods. Instead,
+ * if there is a risk that the query might perform an unsafe join, just
+ * wrap the SampleScan in a Materialize node. We can check for joins by
+ * counting the membership of all_baserels (note that this correctly
+ * counts inheritance trees as single rels). If we're inside a subquery,
+ * we can't easily check whether a join might occur in the outer query, so
+ * just assume one is possible.
+ *
+ * GetTsmRoutine is relatively expensive compared to the other tests here,
+ * so check repeatable_across_scans last, even though that's a bit odd.
+ */
+ if ((root->query_level > 1 ||
+ bms_membership(root->all_baserels) != BMS_SINGLETON) &&
+ !(GetTsmRoutine(rte->tablesample->tsmhandler)->repeatable_across_scans))
+ {
+ path = (Path *) create_material_path(rel, path);
+ }
+
+ add_path(rel, path);
+
+ /* For the moment, at least, there are no other paths to consider */
}
/*
@@ -2450,7 +2504,33 @@ print_path(PlannerInfo *root, Path *path, int indent)
switch (nodeTag(path))
{
case T_Path:
- ptype = "SeqScan";
+ switch (path->pathtype)
+ {
+ case T_SeqScan:
+ ptype = "SeqScan";
+ break;
+ case T_SampleScan:
+ ptype = "SampleScan";
+ break;
+ case T_SubqueryScan:
+ ptype = "SubqueryScan";
+ break;
+ case T_FunctionScan:
+ ptype = "FunctionScan";
+ break;
+ case T_ValuesScan:
+ ptype = "ValuesScan";
+ break;
+ case T_CteScan:
+ ptype = "CteScan";
+ break;
+ case T_WorkTableScan:
+ ptype = "WorkTableScan";
+ break;
+ default:
+ ptype = "???Path";
+ break;
+ }
break;
case T_IndexPath:
ptype = "IdxScan";
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 0d302f66bee..7069f604110 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -74,6 +74,7 @@
#include <math.h>
#include "access/htup_details.h"
+#include "access/tsmapi.h"
#include "executor/executor.h"
#include "executor/nodeHash.h"
#include "miscadmin.h"
@@ -223,64 +224,66 @@ cost_seqscan(Path *path, PlannerInfo *root,
* cost_samplescan
* Determines and returns the cost of scanning a relation using sampling.
*
- * From planner/optimizer perspective, we don't care all that much about cost
- * itself since there is always only one scan path to consider when sampling
- * scan is present, but number of rows estimation is still important.
- *
* 'baserel' is the relation to be scanned
* 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
*/
void
-cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
+cost_samplescan(Path *path, PlannerInfo *root,
+ RelOptInfo *baserel, ParamPathInfo *param_info)
{
Cost startup_cost = 0;
Cost run_cost = 0;
+ RangeTblEntry *rte;
+ TableSampleClause *tsc;
+ TsmRoutine *tsm;
double spc_seq_page_cost,
spc_random_page_cost,
spc_page_cost;
QualCost qpqual_cost;
Cost cpu_per_tuple;
- BlockNumber pages;
- double tuples;
- RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
- TableSampleClause *tablesample = rte->tablesample;
- /* Should only be applied to base relations */
+ /* Should only be applied to base relations with tablesample clauses */
Assert(baserel->relid > 0);
- Assert(baserel->rtekind == RTE_RELATION);
+ rte = planner_rt_fetch(baserel->relid, root);
+ Assert(rte->rtekind == RTE_RELATION);
+ tsc = rte->tablesample;
+ Assert(tsc != NULL);
+ tsm = GetTsmRoutine(tsc->tsmhandler);
/* Mark the path with the correct row estimate */
- if (path->param_info)
- path->rows = path->param_info->ppi_rows;
+ if (param_info)
+ path->rows = param_info->ppi_rows;
else
path->rows = baserel->rows;
- /* Call the sampling method's costing function. */
- OidFunctionCall6(tablesample->tsmcost, PointerGetDatum(root),
- PointerGetDatum(path), PointerGetDatum(baserel),
- PointerGetDatum(tablesample->args),
- PointerGetDatum(&pages), PointerGetDatum(&tuples));
-
/* fetch estimated page cost for tablespace containing table */
get_tablespace_page_costs(baserel->reltablespace,
&spc_random_page_cost,
&spc_seq_page_cost);
-
- spc_page_cost = tablesample->tsmseqscan ? spc_seq_page_cost :
- spc_random_page_cost;
+ /* if NextSampleBlock is used, assume random access, else sequential */
+ spc_page_cost = (tsm->NextSampleBlock != NULL) ?
+ spc_random_page_cost : spc_seq_page_cost;
/*
- * disk costs
+ * disk costs (recall that baserel->pages has already been set to the
+ * number of pages the sampling method will visit)
*/
- run_cost += spc_page_cost * pages;
+ run_cost += spc_page_cost * baserel->pages;
- /* CPU costs */
- get_restriction_qual_cost(root, baserel, path->param_info, &qpqual_cost);
+ /*
+ * CPU costs (recall that baserel->tuples has already been set to the
+ * number of tuples the sampling method will select). Note that we ignore
+ * execution cost of the TABLESAMPLE parameter expressions; they will be
+ * evaluated only once per scan, and in most usages they'll likely be
+ * simple constants anyway. We also don't charge anything for the
+ * calculations the sampling method might do internally.
+ */
+ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
startup_cost += qpqual_cost.startup;
cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
- run_cost += cpu_per_tuple * tuples;
+ run_cost += cpu_per_tuple * baserel->tuples;
path->startup_cost = startup_cost;
path->total_cost = startup_cost + run_cost;
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 8d15c8ede90..f461586e08c 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -102,7 +102,8 @@ static List *order_qual_clauses(PlannerInfo *root, List *clauses);
static void copy_path_costsize(Plan *dest, Path *src);
static void copy_plan_costsize(Plan *dest, Plan *src);
static SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid);
-static SampleScan *make_samplescan(List *qptlist, List *qpqual, Index scanrelid);
+static SampleScan *make_samplescan(List *qptlist, List *qpqual, Index scanrelid,
+ TableSampleClause *tsc);
static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid,
Oid indexid, List *indexqual, List *indexqualorig,
List *indexorderby, List *indexorderbyorig,
@@ -1148,7 +1149,7 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path,
/*
* create_samplescan_plan
- * Returns a samplecan plan for the base relation scanned by 'best_path'
+ * Returns a samplescan plan for the base relation scanned by 'best_path'
* with restriction clauses 'scan_clauses' and targetlist 'tlist'.
*/
static SampleScan *
@@ -1157,11 +1158,15 @@ create_samplescan_plan(PlannerInfo *root, Path *best_path,
{
SampleScan *scan_plan;
Index scan_relid = best_path->parent->relid;
+ RangeTblEntry *rte;
+ TableSampleClause *tsc;
- /* it should be a base rel with tablesample clause... */
+ /* it should be a base rel with a tablesample clause... */
Assert(scan_relid > 0);
- Assert(best_path->parent->rtekind == RTE_RELATION);
- Assert(best_path->pathtype == T_SampleScan);
+ rte = planner_rt_fetch(scan_relid, root);
+ Assert(rte->rtekind == RTE_RELATION);
+ tsc = rte->tablesample;
+ Assert(tsc != NULL);
/* Sort clauses into best execution order */
scan_clauses = order_qual_clauses(root, scan_clauses);
@@ -1174,13 +1179,16 @@ create_samplescan_plan(PlannerInfo *root, Path *best_path,
{
scan_clauses = (List *)
replace_nestloop_params(root, (Node *) scan_clauses);
+ tsc = (TableSampleClause *)
+ replace_nestloop_params(root, (Node *) tsc);
}
scan_plan = make_samplescan(tlist,
scan_clauses,
- scan_relid);
+ scan_relid,
+ tsc);
- copy_path_costsize(&scan_plan->plan, best_path);
+ copy_path_costsize(&scan_plan->scan.plan, best_path);
return scan_plan;
}
@@ -2161,9 +2169,9 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path,
ListCell *lc;
/* Recursively transform child paths. */
- foreach (lc, best_path->custom_paths)
+ foreach(lc, best_path->custom_paths)
{
- Plan *plan = create_plan_recurse(root, (Path *) lfirst(lc));
+ Plan *plan = create_plan_recurse(root, (Path *) lfirst(lc));
custom_plans = lappend(custom_plans, plan);
}
@@ -3437,17 +3445,19 @@ make_seqscan(List *qptlist,
static SampleScan *
make_samplescan(List *qptlist,
List *qpqual,
- Index scanrelid)
+ Index scanrelid,
+ TableSampleClause *tsc)
{
SampleScan *node = makeNode(SampleScan);
- Plan *plan = &node->plan;
+ Plan *plan = &node->scan.plan;
/* cost should be inserted by caller */
plan->targetlist = qptlist;
plan->qual = qpqual;
plan->lefttree = NULL;
plan->righttree = NULL;
- node->scanrelid = scanrelid;
+ node->scan.scanrelid = scanrelid;
+ node->tablesample = tsc;
return node;
}
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index 00b2625d342..701b99254db 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -306,7 +306,9 @@ extract_lateral_references(PlannerInfo *root, RelOptInfo *brel, Index rtindex)
return;
/* Fetch the appropriate variables */
- if (rte->rtekind == RTE_SUBQUERY)
+ if (rte->rtekind == RTE_RELATION)
+ vars = pull_vars_of_level((Node *) rte->tablesample, 0);
+ else if (rte->rtekind == RTE_SUBQUERY)
vars = pull_vars_of_level((Node *) rte->subquery, 1);
else if (rte->rtekind == RTE_FUNCTION)
vars = pull_vars_of_level((Node *) rte->functions, 0);
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index a6ce96efc48..b95cc95e5d9 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -505,14 +505,10 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
if (rte->rtekind == RTE_RELATION)
{
if (rte->tablesample)
- {
- rte->tablesample->args = (List *)
- preprocess_expression(root, (Node *) rte->tablesample->args,
- EXPRKIND_TABLESAMPLE);
- rte->tablesample->repeatable = (Node *)
- preprocess_expression(root, rte->tablesample->repeatable,
+ rte->tablesample = (TableSampleClause *)
+ preprocess_expression(root,
+ (Node *) rte->tablesample,
EXPRKIND_TABLESAMPLE);
- }
}
else if (rte->rtekind == RTE_SUBQUERY)
{
@@ -697,11 +693,14 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind)
* If the query has any join RTEs, replace join alias variables with
* base-relation variables. We must do this before sublink processing,
* else sublinks expanded out from join aliases would not get processed.
- * We can skip it in non-lateral RTE functions and VALUES lists, however,
- * since they can't contain any Vars of the current query level.
+ * We can skip it in non-lateral RTE functions, VALUES lists, and
+ * TABLESAMPLE clauses, however, since they can't contain any Vars of the
+ * current query level.
*/
if (root->hasJoinRTEs &&
- !(kind == EXPRKIND_RTFUNC || kind == EXPRKIND_VALUES))
+ !(kind == EXPRKIND_RTFUNC ||
+ kind == EXPRKIND_VALUES ||
+ kind == EXPRKIND_TABLESAMPLE))
expr = flatten_join_alias_vars(root, expr);
/*
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 258e541754a..ea185d4b4cf 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -372,9 +372,8 @@ flatten_rtes_walker(Node *node, PlannerGlobal *glob)
*
* In the flat rangetable, we zero out substructure pointers that are not
* needed by the executor; this reduces the storage space and copying cost
- * for cached plans. We keep only the tablesample field (which we'd otherwise
- * have to put in the plan tree, anyway); the ctename, alias and eref Alias
- * fields, which are needed by EXPLAIN; and the selectedCols, insertedCols and
+ * for cached plans. We keep only the ctename, alias and eref Alias fields,
+ * which are needed by EXPLAIN, and the selectedCols, insertedCols and
* updatedCols bitmaps, which are needed for executor-startup permissions
* checking and for trigger event checking.
*/
@@ -388,6 +387,7 @@ add_rte_to_flat_rtable(PlannerGlobal *glob, RangeTblEntry *rte)
memcpy(newrte, rte, sizeof(RangeTblEntry));
/* zap unneeded sub-structure */
+ newrte->tablesample = NULL;
newrte->subquery = NULL;
newrte->joinaliasvars = NIL;
newrte->functions = NIL;
@@ -456,11 +456,13 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
{
SampleScan *splan = (SampleScan *) plan;
- splan->scanrelid += rtoffset;
- splan->plan.targetlist =
- fix_scan_list(root, splan->plan.targetlist, rtoffset);
- splan->plan.qual =
- fix_scan_list(root, splan->plan.qual, rtoffset);
+ splan->scan.scanrelid += rtoffset;
+ splan->scan.plan.targetlist =
+ fix_scan_list(root, splan->scan.plan.targetlist, rtoffset);
+ splan->scan.plan.qual =
+ fix_scan_list(root, splan->scan.plan.qual, rtoffset);
+ splan->tablesample = (TableSampleClause *)
+ fix_scan_expr(root, (Node *) splan->tablesample, rtoffset);
}
break;
case T_IndexScan:
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 4708b87f330..f3038cdffda 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -2216,7 +2216,12 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
break;
case T_SeqScan:
+ context.paramids = bms_add_members(context.paramids, scan_params);
+ break;
+
case T_SampleScan:
+ finalize_primnode((Node *) ((SampleScan *) plan)->tablesample,
+ &context);
context.paramids = bms_add_members(context.paramids, scan_params);
break;
@@ -2384,7 +2389,7 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
bms_add_members(context.paramids, scan_params);
/* child nodes if any */
- foreach (lc, cscan->custom_plans)
+ foreach(lc, cscan->custom_plans)
{
context.paramids =
bms_add_members(context.paramids,
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index 92b05628434..34144ccaf0f 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -1091,12 +1091,15 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
switch (child_rte->rtekind)
{
+ case RTE_RELATION:
+ if (child_rte->tablesample)
+ child_rte->lateral = true;
+ break;
case RTE_SUBQUERY:
case RTE_FUNCTION:
case RTE_VALUES:
child_rte->lateral = true;
break;
- case RTE_RELATION:
case RTE_JOIN:
case RTE_CTE:
/* these can't contain any lateral references */
@@ -1909,6 +1912,13 @@ replace_vars_in_jointree(Node *jtnode,
{
switch (rte->rtekind)
{
+ case RTE_RELATION:
+ /* shouldn't be marked LATERAL unless tablesample */
+ Assert(rte->tablesample);
+ rte->tablesample = (TableSampleClause *)
+ pullup_replace_vars((Node *) rte->tablesample,
+ context);
+ break;
case RTE_SUBQUERY:
rte->subquery =
pullup_replace_vars_subquery(rte->subquery,
@@ -1924,7 +1934,6 @@ replace_vars_in_jointree(Node *jtnode,
pullup_replace_vars((Node *) rte->values_lists,
context);
break;
- case RTE_RELATION:
case RTE_JOIN:
case RTE_CTE:
/* these shouldn't be marked LATERAL */
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index f7f33bbe772..935bc2b9667 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -713,7 +713,7 @@ create_seqscan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
/*
* create_samplescan_path
- * Like seqscan but uses sampling function while scanning.
+ * Creates a path node for a sampled table scan.
*/
Path *
create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
@@ -726,7 +726,7 @@ create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer
required_outer);
pathnode->pathkeys = NIL; /* samplescan has unordered result */
- cost_samplescan(pathnode, root, rel);
+ cost_samplescan(pathnode, root, rel, pathnode->param_info);
return pathnode;
}
@@ -1773,6 +1773,8 @@ reparameterize_path(PlannerInfo *root, Path *path,
{
case T_SeqScan:
return create_seqscan_path(root, rel, required_outer);
+ case T_SampleScan:
+ return (Path *) create_samplescan_path(root, rel, required_outer);
case T_IndexScan:
case T_IndexOnlyScan:
{
@@ -1805,8 +1807,6 @@ reparameterize_path(PlannerInfo *root, Path *path,
case T_SubqueryScan:
return create_subqueryscan_path(root, rel, path->pathkeys,
required_outer);
- case T_SampleScan:
- return (Path *) create_samplescan_path(root, rel, required_outer);
default:
break;
}
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 2b02a2e5233..8f053e47e82 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -457,8 +457,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
%type <jexpr> joined_table
%type <range> relation_expr
%type <range> relation_expr_opt_alias
+%type <node> tablesample_clause opt_repeatable_clause
%type <target> target_el single_set_clause set_target insert_column_item
-%type <node> relation_expr_tablesample tablesample_clause opt_repeatable_clause
%type <str> generic_option_name
%type <node> generic_option_arg
@@ -10491,9 +10491,13 @@ table_ref: relation_expr opt_alias_clause
$1->alias = $2;
$$ = (Node *) $1;
}
- | relation_expr_tablesample
+ | relation_expr opt_alias_clause tablesample_clause
{
- $$ = (Node *) $1;
+ RangeTableSample *n = (RangeTableSample *) $3;
+ $1->alias = $2;
+ /* relation_expr goes inside the RangeTableSample node */
+ n->relation = (Node *) $1;
+ $$ = (Node *) n;
}
| func_table func_alias_clause
{
@@ -10820,23 +10824,18 @@ relation_expr_opt_alias: relation_expr %prec UMINUS
}
;
-
-relation_expr_tablesample: relation_expr opt_alias_clause tablesample_clause
- {
- RangeTableSample *n = (RangeTableSample *) $3;
- n->relation = $1;
- n->relation->alias = $2;
- $$ = (Node *) n;
- }
- ;
-
+/*
+ * TABLESAMPLE decoration in a FROM item
+ */
tablesample_clause:
- TABLESAMPLE ColId '(' expr_list ')' opt_repeatable_clause
+ TABLESAMPLE func_name '(' expr_list ')' opt_repeatable_clause
{
RangeTableSample *n = makeNode(RangeTableSample);
+ /* n->relation will be filled in later */
n->method = $2;
n->args = $4;
n->repeatable = $6;
+ n->location = @2;
$$ = (Node *) n;
}
;
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index e90e1d68e3a..4e490b23b4e 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -18,8 +18,8 @@
#include "miscadmin.h"
#include "access/heapam.h"
+#include "access/tsmapi.h"
#include "catalog/catalog.h"
-#include "access/htup_details.h"
#include "catalog/heap.h"
#include "catalog/pg_constraint.h"
#include "catalog/pg_type.h"
@@ -43,7 +43,7 @@
#include "utils/guc.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"
-#include "utils/syscache.h"
+
/* Convenience macro for the most common makeNamespaceItem() case */
#define makeDefaultNSItem(rte) makeNamespaceItem(rte, true, true, false, true)
@@ -63,6 +63,8 @@ static RangeTblEntry *transformRangeSubselect(ParseState *pstate,
RangeSubselect *r);
static RangeTblEntry *transformRangeFunction(ParseState *pstate,
RangeFunction *r);
+static TableSampleClause *transformRangeTableSample(ParseState *pstate,
+ RangeTableSample *rts);
static Node *transformFromClauseItem(ParseState *pstate, Node *n,
RangeTblEntry **top_rte, int *top_rti,
List **namespace);
@@ -423,40 +425,6 @@ transformJoinOnClause(ParseState *pstate, JoinExpr *j, List *namespace)
return result;
}
-static RangeTblEntry *
-transformTableSampleEntry(ParseState *pstate, RangeTableSample *rv)
-{
- RangeTblEntry *rte = NULL;
- CommonTableExpr *cte = NULL;
- TableSampleClause *tablesample = NULL;
-
- /* if relation has an unqualified name, it might be a CTE reference */
- if (!rv->relation->schemaname)
- {
- Index levelsup;
-
- cte = scanNameSpaceForCTE(pstate, rv->relation->relname, &levelsup);
- }
-
- /* We first need to build a range table entry */
- if (!cte)
- rte = transformTableEntry(pstate, rv->relation);
-
- if (!rte ||
- (rte->relkind != RELKIND_RELATION &&
- rte->relkind != RELKIND_MATVIEW))
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("TABLESAMPLE clause can only be used on tables and materialized views"),
- parser_errposition(pstate, rv->relation->location)));
-
- tablesample = ParseTableSample(pstate, rv->method, rv->repeatable,
- rv->args, rv->relation->location);
- rte->tablesample = tablesample;
-
- return rte;
-}
-
/*
* transformTableEntry --- transform a RangeVar (simple relation reference)
*/
@@ -748,6 +716,109 @@ transformRangeFunction(ParseState *pstate, RangeFunction *r)
return rte;
}
+/*
+ * transformRangeTableSample --- transform a TABLESAMPLE clause
+ *
+ * Caller has already transformed rts->relation, we just have to validate
+ * the remaining fields and create a TableSampleClause node.
+ */
+static TableSampleClause *
+transformRangeTableSample(ParseState *pstate, RangeTableSample *rts)
+{
+ TableSampleClause *tablesample;
+ Oid handlerOid;
+ Oid funcargtypes[1];
+ TsmRoutine *tsm;
+ List *fargs;
+ ListCell *larg,
+ *ltyp;
+
+ /*
+ * To validate the sample method name, look up the handler function, which
+ * has the same name, one dummy INTERNAL argument, and a result type of
+ * tsm_handler. (Note: tablesample method names are not schema-qualified
+ * in the SQL standard; but since they are just functions to us, we allow
+ * schema qualification to resolve any potential ambiguity.)
+ */
+ funcargtypes[0] = INTERNALOID;
+
+ handlerOid = LookupFuncName(rts->method, 1, funcargtypes, true);
+
+ /* we want error to complain about no-such-method, not no-such-function */
+ if (!OidIsValid(handlerOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("tablesample method %s does not exist",
+ NameListToString(rts->method)),
+ parser_errposition(pstate, rts->location)));
+
+ /* check that handler has correct return type */
+ if (get_func_rettype(handlerOid) != TSM_HANDLEROID)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("function %s must return type \"tsm_handler\"",
+ NameListToString(rts->method)),
+ parser_errposition(pstate, rts->location)));
+
+ /* OK, run the handler to get TsmRoutine, for argument type info */
+ tsm = GetTsmRoutine(handlerOid);
+
+ tablesample = makeNode(TableSampleClause);
+ tablesample->tsmhandler = handlerOid;
+
+ /* check user provided the expected number of arguments */
+ if (list_length(rts->args) != list_length(tsm->parameterTypes))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
+ errmsg_plural("tablesample method %s requires %d argument, not %d",
+ "tablesample method %s requires %d arguments, not %d",
+ list_length(tsm->parameterTypes),
+ NameListToString(rts->method),
+ list_length(tsm->parameterTypes),
+ list_length(rts->args)),
+ parser_errposition(pstate, rts->location)));
+
+ /*
+ * Transform the arguments, typecasting them as needed. Note we must also
+ * assign collations now, because assign_query_collations() doesn't
+ * examine any substructure of RTEs.
+ */
+ fargs = NIL;
+ forboth(larg, rts->args, ltyp, tsm->parameterTypes)
+ {
+ Node *arg = (Node *) lfirst(larg);
+ Oid argtype = lfirst_oid(ltyp);
+
+ arg = transformExpr(pstate, arg, EXPR_KIND_FROM_FUNCTION);
+ arg = coerce_to_specific_type(pstate, arg, argtype, "TABLESAMPLE");
+ assign_expr_collations(pstate, arg);
+ fargs = lappend(fargs, arg);
+ }
+ tablesample->args = fargs;
+
+ /* Process REPEATABLE (seed) */
+ if (rts->repeatable != NULL)
+ {
+ Node *arg;
+
+ if (!tsm->repeatable_across_queries)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("tablesample method %s does not support REPEATABLE",
+ NameListToString(rts->method)),
+ parser_errposition(pstate, rts->location)));
+
+ arg = transformExpr(pstate, rts->repeatable, EXPR_KIND_FROM_FUNCTION);
+ arg = coerce_to_specific_type(pstate, arg, FLOAT8OID, "REPEATABLE");
+ assign_expr_collations(pstate, arg);
+ tablesample->repeatable = (Expr *) arg;
+ }
+ else
+ tablesample->repeatable = NULL;
+
+ return tablesample;
+}
+
/*
* transformFromClauseItem -
@@ -844,6 +915,33 @@ transformFromClauseItem(ParseState *pstate, Node *n,
rtr->rtindex = rtindex;
return (Node *) rtr;
}
+ else if (IsA(n, RangeTableSample))
+ {
+ /* TABLESAMPLE clause (wrapping some other valid FROM node) */
+ RangeTableSample *rts = (RangeTableSample *) n;
+ Node *rel;
+ RangeTblRef *rtr;
+ RangeTblEntry *rte;
+
+ /* Recursively transform the contained relation */
+ rel = transformFromClauseItem(pstate, rts->relation,
+ top_rte, top_rti, namespace);
+ /* Currently, grammar could only return a RangeVar as contained rel */
+ Assert(IsA(rel, RangeTblRef));
+ rtr = (RangeTblRef *) rel;
+ rte = rt_fetch(rtr->rtindex, pstate->p_rtable);
+ /* We only support this on plain relations and matviews */
+ if (rte->relkind != RELKIND_RELATION &&
+ rte->relkind != RELKIND_MATVIEW)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("TABLESAMPLE clause can only be applied to tables and materialized views"),
+ parser_errposition(pstate, exprLocation(rts->relation))));
+
+ /* Transform TABLESAMPLE details and attach to the RTE */
+ rte->tablesample = transformRangeTableSample(pstate, rts);
+ return (Node *) rtr;
+ }
else if (IsA(n, JoinExpr))
{
/* A newfangled join expression */
@@ -1165,26 +1263,6 @@ transformFromClauseItem(ParseState *pstate, Node *n,
return (Node *) j;
}
- else if (IsA(n, RangeTableSample))
- {
- /* Tablesample reference */
- RangeTableSample *rv = (RangeTableSample *) n;
- RangeTblRef *rtr;
- RangeTblEntry *rte = NULL;
- int rtindex;
-
- rte = transformTableSampleEntry(pstate, rv);
-
- /* assume new rte is at end */
- rtindex = list_length(pstate->p_rtable);
- Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
- *top_rte = rte;
- *top_rti = rtindex;
- *namespace = list_make1(makeDefaultNSItem(rte));
- rtr = makeNode(RangeTblRef);
- rtr->rtindex = rtindex;
- return (Node *) rtr;
- }
else
elog(ERROR, "unrecognized node type: %d", (int) nodeTag(n));
return NULL; /* can't get here, keep compiler quiet */
diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c
index 430baff1165..554ca9d8c47 100644
--- a/src/backend/parser/parse_func.c
+++ b/src/backend/parser/parse_func.c
@@ -18,7 +18,6 @@
#include "catalog/pg_aggregate.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_type.h"
-#include "catalog/pg_tablesample_method.h"
#include "funcapi.h"
#include "lib/stringinfo.h"
#include "nodes/makefuncs.h"
@@ -27,7 +26,6 @@
#include "parser/parse_clause.h"
#include "parser/parse_coerce.h"
#include "parser/parse_func.h"
-#include "parser/parse_expr.h"
#include "parser/parse_relation.h"
#include "parser/parse_target.h"
#include "parser/parse_type.h"
@@ -769,148 +767,6 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
}
-/*
- * ParseTableSample
- *
- * Parse TABLESAMPLE clause and process the arguments
- */
-TableSampleClause *
-ParseTableSample(ParseState *pstate, char *samplemethod, Node *repeatable,
- List *sampleargs, int location)
-{
- HeapTuple tuple;
- Form_pg_tablesample_method tsm;
- Form_pg_proc procform;
- TableSampleClause *tablesample;
- List *fargs;
- ListCell *larg;
- int nargs,
- initnargs;
- Oid init_arg_types[FUNC_MAX_ARGS];
-
- /* Load the tablesample method */
- tuple = SearchSysCache1(TABLESAMPLEMETHODNAME, PointerGetDatum(samplemethod));
- if (!HeapTupleIsValid(tuple))
- ereport(ERROR,
- (errcode(ERRCODE_UNDEFINED_OBJECT),
- errmsg("tablesample method \"%s\" does not exist",
- samplemethod),
- parser_errposition(pstate, location)));
-
- tablesample = makeNode(TableSampleClause);
- tablesample->tsmid = HeapTupleGetOid(tuple);
-
- tsm = (Form_pg_tablesample_method) GETSTRUCT(tuple);
-
- tablesample->tsmseqscan = tsm->tsmseqscan;
- tablesample->tsmpagemode = tsm->tsmpagemode;
- tablesample->tsminit = tsm->tsminit;
- tablesample->tsmnextblock = tsm->tsmnextblock;
- tablesample->tsmnexttuple = tsm->tsmnexttuple;
- tablesample->tsmexaminetuple = tsm->tsmexaminetuple;
- tablesample->tsmend = tsm->tsmend;
- tablesample->tsmreset = tsm->tsmreset;
- tablesample->tsmcost = tsm->tsmcost;
-
- ReleaseSysCache(tuple);
-
- /* Validate the parameters against init function definition. */
- tuple = SearchSysCache1(PROCOID,
- ObjectIdGetDatum(tablesample->tsminit));
-
- if (!HeapTupleIsValid(tuple)) /* should not happen */
- elog(ERROR, "cache lookup failed for function %u",
- tablesample->tsminit);
-
- procform = (Form_pg_proc) GETSTRUCT(tuple);
- initnargs = procform->pronargs;
- Assert(initnargs >= 3);
-
- /*
- * First parameter is used to pass the SampleScanState, second is seed
- * (REPEATABLE), skip the processing for them here, just assert that the
- * types are correct.
- */
- Assert(procform->proargtypes.values[0] == INTERNALOID);
- Assert(procform->proargtypes.values[1] == INT4OID);
- initnargs -= 2;
- memcpy(init_arg_types, procform->proargtypes.values + 2,
- initnargs * sizeof(Oid));
-
- /* Now we are done with the catalog */
- ReleaseSysCache(tuple);
-
- /* Process repeatable (seed) */
- if (repeatable != NULL)
- {
- Node *arg = repeatable;
-
- if (arg && IsA(arg, A_Const))
- {
- A_Const *con = (A_Const *) arg;
-
- if (con->val.type == T_Null)
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("REPEATABLE clause must be NOT NULL numeric value"),
- parser_errposition(pstate, con->location)));
-
- }
-
- arg = transformExpr(pstate, arg, EXPR_KIND_FROM_FUNCTION);
- arg = coerce_to_specific_type(pstate, arg, INT4OID, "REPEATABLE");
- tablesample->repeatable = arg;
- }
- else
- tablesample->repeatable = NULL;
-
- /* Check user provided expected number of arguments. */
- if (list_length(sampleargs) != initnargs)
- ereport(ERROR,
- (errcode(ERRCODE_TOO_MANY_ARGUMENTS),
- errmsg_plural("tablesample method \"%s\" expects %d argument got %d",
- "tablesample method \"%s\" expects %d arguments got %d",
- initnargs,
- samplemethod,
- initnargs, list_length(sampleargs)),
- parser_errposition(pstate, location)));
-
- /* Transform the arguments, typecasting them as needed. */
- fargs = NIL;
- nargs = 0;
- foreach(larg, sampleargs)
- {
- Node *inarg = (Node *) lfirst(larg);
- Node *arg = transformExpr(pstate, inarg, EXPR_KIND_FROM_FUNCTION);
- Oid argtype = exprType(arg);
-
- if (argtype != init_arg_types[nargs])
- {
- if (!can_coerce_type(1, &argtype, &init_arg_types[nargs],
- COERCION_IMPLICIT))
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("wrong parameter %d for tablesample method \"%s\"",
- nargs + 1, samplemethod),
- errdetail("Expected type %s got %s.",
- format_type_be(init_arg_types[nargs]),
- format_type_be(argtype)),
- parser_errposition(pstate, exprLocation(inarg))));
-
- arg = coerce_type(pstate, arg, argtype, init_arg_types[nargs], -1,
- COERCION_IMPLICIT, COERCE_IMPLICIT_CAST, -1);
- }
-
- fargs = lappend(fargs, arg);
- nargs++;
- }
-
- /* Pass the arguments down */
- tablesample->args = fargs;
-
- return tablesample;
-}
-
/* func_match_argtypes()
*
* Given a list of candidate functions (having the right name and number
diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c
index bbd6b77c5ea..1734e48241a 100644
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -418,6 +418,10 @@ rewriteRuleAction(Query *parsetree,
switch (rte->rtekind)
{
+ case RTE_RELATION:
+ sub_action->hasSubLinks =
+ checkExprHasSubLink((Node *) rte->tablesample);
+ break;
case RTE_FUNCTION:
sub_action->hasSubLinks =
checkExprHasSubLink((Node *) rte->functions);
diff --git a/src/backend/utils/adt/pseudotypes.c b/src/backend/utils/adt/pseudotypes.c
index 9ad460abfbd..5b809aa7d49 100644
--- a/src/backend/utils/adt/pseudotypes.c
+++ b/src/backend/utils/adt/pseudotypes.c
@@ -374,6 +374,33 @@ fdw_handler_out(PG_FUNCTION_ARGS)
/*
+ * tsm_handler_in - input routine for pseudo-type TSM_HANDLER.
+ */
+Datum
+tsm_handler_in(PG_FUNCTION_ARGS)
+{
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot accept a value of type tsm_handler")));
+
+ PG_RETURN_VOID(); /* keep compiler quiet */
+}
+
+/*
+ * tsm_handler_out - output routine for pseudo-type TSM_HANDLER.
+ */
+Datum
+tsm_handler_out(PG_FUNCTION_ARGS)
+{
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot display a value of type tsm_handler")));
+
+ PG_RETURN_VOID(); /* keep compiler quiet */
+}
+
+
+/*
* internal_in - input routine for pseudo-type INTERNAL.
*/
Datum
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 5112cac9017..51391f6a4e0 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -32,7 +32,6 @@
#include "catalog/pg_opclass.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_proc.h"
-#include "catalog/pg_tablesample_method.h"
#include "catalog/pg_trigger.h"
#include "catalog/pg_type.h"
#include "commands/defrem.h"
@@ -349,8 +348,6 @@ static void make_ruledef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc,
int prettyFlags);
static void make_viewdef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc,
int prettyFlags, int wrapColumn);
-static void get_tablesample_def(TableSampleClause *tablesample,
- deparse_context *context);
static void get_query_def(Query *query, StringInfo buf, List *parentnamespace,
TupleDesc resultDesc,
int prettyFlags, int wrapColumn, int startIndent);
@@ -416,6 +413,8 @@ static void get_column_alias_list(deparse_columns *colinfo,
static void get_from_clause_coldeflist(RangeTblFunction *rtfunc,
deparse_columns *colinfo,
deparse_context *context);
+static void get_tablesample_def(TableSampleClause *tablesample,
+ deparse_context *context);
static void get_opclass_name(Oid opclass, Oid actual_datatype,
StringInfo buf);
static Node *processIndirection(Node *node, deparse_context *context,
@@ -4235,50 +4234,6 @@ make_viewdef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc,
heap_close(ev_relation, AccessShareLock);
}
-/* ----------
- * get_tablesample_def - Convert TableSampleClause back to SQL
- * ----------
- */
-static void
-get_tablesample_def(TableSampleClause *tablesample, deparse_context *context)
-{
- StringInfo buf = context->buf;
- HeapTuple tuple;
- Form_pg_tablesample_method tsm;
- char *tsmname;
- int nargs;
- ListCell *l;
-
- /* Load the tablesample method */
- tuple = SearchSysCache1(TABLESAMPLEMETHODOID, ObjectIdGetDatum(tablesample->tsmid));
- if (!HeapTupleIsValid(tuple))
- ereport(ERROR,
- (errcode(ERRCODE_UNDEFINED_OBJECT),
- errmsg("cache lookup failed for tablesample method %u",
- tablesample->tsmid)));
-
- tsm = (Form_pg_tablesample_method) GETSTRUCT(tuple);
- tsmname = NameStr(tsm->tsmname);
- appendStringInfo(buf, " TABLESAMPLE %s (", quote_identifier(tsmname));
-
- ReleaseSysCache(tuple);
-
- nargs = 0;
- foreach(l, tablesample->args)
- {
- if (nargs++ > 0)
- appendStringInfoString(buf, ", ");
- get_rule_expr((Node *) lfirst(l), context, true);
- }
- appendStringInfoChar(buf, ')');
-
- if (tablesample->repeatable != NULL)
- {
- appendStringInfoString(buf, " REPEATABLE (");
- get_rule_expr(tablesample->repeatable, context, true);
- appendStringInfoChar(buf, ')');
- }
-}
/* ----------
* get_query_def - Parse back one query parsetree
@@ -8781,9 +8736,6 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context)
only_marker(rte),
generate_relation_name(rte->relid,
context->namespaces));
-
- if (rte->tablesample)
- get_tablesample_def(rte->tablesample, context);
break;
case RTE_SUBQUERY:
/* Subquery RTE */
@@ -8963,6 +8915,10 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context)
/* Else print column aliases as needed */
get_column_alias_list(colinfo, context);
}
+
+ /* Tablesample clause must go after any alias */
+ if (rte->rtekind == RTE_RELATION && rte->tablesample)
+ get_tablesample_def(rte->tablesample, context);
}
else if (IsA(jtnode, JoinExpr))
{
@@ -9163,6 +9119,44 @@ get_from_clause_coldeflist(RangeTblFunction *rtfunc,
}
/*
+ * get_tablesample_def - print a TableSampleClause
+ */
+static void
+get_tablesample_def(TableSampleClause *tablesample, deparse_context *context)
+{
+ StringInfo buf = context->buf;
+ Oid argtypes[1];
+ int nargs;
+ ListCell *l;
+
+ /*
+ * We should qualify the handler's function name if it wouldn't be
+ * resolved by lookup in the current search path.
+ */
+ argtypes[0] = INTERNALOID;
+ appendStringInfo(buf, " TABLESAMPLE %s (",
+ generate_function_name(tablesample->tsmhandler, 1,
+ NIL, argtypes,
+ false, NULL, EXPR_KIND_NONE));
+
+ nargs = 0;
+ foreach(l, tablesample->args)
+ {
+ if (nargs++ > 0)
+ appendStringInfoString(buf, ", ");
+ get_rule_expr((Node *) lfirst(l), context, false);
+ }
+ appendStringInfoChar(buf, ')');
+
+ if (tablesample->repeatable != NULL)
+ {
+ appendStringInfoString(buf, " REPEATABLE (");
+ get_rule_expr((Node *) tablesample->repeatable, context, false);
+ appendStringInfoChar(buf, ')');
+ }
+}
+
+/*
* get_opclass_name - fetch name of an index operator class
*
* The opclass name is appended (after a space) to buf.
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c
index 7b32247d34e..1dc293297d9 100644
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -32,7 +32,6 @@
#include "catalog/pg_range.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_transform.h"
-#include "catalog/pg_tablesample_method.h"
#include "catalog/pg_type.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
@@ -2997,29 +2996,3 @@ get_range_subtype(Oid rangeOid)
else
return InvalidOid;
}
-
-/* ---------- PG_TABLESAMPLE_METHOD CACHE ---------- */
-
-/*
- * get_tablesample_method_name - given a tablesample method OID,
- * look up the name or NULL if not found
- */
-char *
-get_tablesample_method_name(Oid tsmid)
-{
- HeapTuple tuple;
-
- tuple = SearchSysCache1(TABLESAMPLEMETHODOID, ObjectIdGetDatum(tsmid));
- if (HeapTupleIsValid(tuple))
- {
- Form_pg_tablesample_method tup =
- (Form_pg_tablesample_method) GETSTRUCT(tuple);
- char *result;
-
- result = pstrdup(NameStr(tup->tsmname));
- ReleaseSysCache(tuple);
- return result;
- }
- else
- return NULL;
-}
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index b6333e362f0..efce7b9a3d1 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -56,7 +56,6 @@
#include "catalog/pg_shseclabel.h"
#include "catalog/pg_replication_origin.h"
#include "catalog/pg_statistic.h"
-#include "catalog/pg_tablesample_method.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_transform.h"
#include "catalog/pg_ts_config.h"
@@ -667,28 +666,6 @@ static const struct cachedesc cacheinfo[] = {
},
128
},
- {TableSampleMethodRelationId, /* TABLESAMPLEMETHODNAME */
- TableSampleMethodNameIndexId,
- 1,
- {
- Anum_pg_tablesample_method_tsmname,
- 0,
- 0,
- 0,
- },
- 2
- },
- {TableSampleMethodRelationId, /* TABLESAMPLEMETHODOID */
- TableSampleMethodOidIndexId,
- 1,
- {
- ObjectIdAttributeNumber,
- 0,
- 0,
- 0,
- },
- 2
- },
{TableSpaceRelationId, /* TABLESPACEOID */
TablespaceOidIndexId,
1,
diff --git a/src/backend/utils/errcodes.txt b/src/backend/utils/errcodes.txt
index 6cc3ed96c44..7b97d45a53a 100644
--- a/src/backend/utils/errcodes.txt
+++ b/src/backend/utils/errcodes.txt
@@ -177,6 +177,8 @@ Section: Class 22 - Data Exception
2201B E ERRCODE_INVALID_REGULAR_EXPRESSION invalid_regular_expression
2201W E ERRCODE_INVALID_ROW_COUNT_IN_LIMIT_CLAUSE invalid_row_count_in_limit_clause
2201X E ERRCODE_INVALID_ROW_COUNT_IN_RESULT_OFFSET_CLAUSE invalid_row_count_in_result_offset_clause
+2202H E ERRCODE_INVALID_TABLESAMPLE_ARGUMENT invalid_tablesample_argument
+2202G E ERRCODE_INVALID_TABLESAMPLE_REPEAT invalid_tablesample_repeat
22009 E ERRCODE_INVALID_TIME_ZONE_DISPLACEMENT_VALUE invalid_time_zone_displacement_value
2200C E ERRCODE_INVALID_USE_OF_ESCAPE_CHARACTER invalid_use_of_escape_character
2200G E ERRCODE_MOST_SPECIFIC_TYPE_MISMATCH most_specific_type_mismatch
diff --git a/src/backend/utils/misc/sampling.c b/src/backend/utils/misc/sampling.c
index 6191f797344..4142e01123f 100644
--- a/src/backend/utils/misc/sampling.c
+++ b/src/backend/utils/misc/sampling.c
@@ -228,7 +228,7 @@ reservoir_get_next_S(ReservoirState rs, double t, int n)
void
sampler_random_init_state(long seed, SamplerRandomState randstate)
{
- randstate[0] = RAND48_SEED_0;
+ randstate[0] = 0x330e; /* same as pg_erand48, but could be anything */
randstate[1] = (unsigned short) seed;
randstate[2] = (unsigned short) (seed >> 16);
}
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index 9596af6a7b3..ece05155490 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -738,13 +738,15 @@ static const SchemaQuery Query_for_list_of_matviews = {
" WHERE substring(pg_catalog.quote_ident(evtname),1,%d)='%s'"
#define Query_for_list_of_tablesample_methods \
-" SELECT pg_catalog.quote_ident(tsmname) "\
-" FROM pg_catalog.pg_tablesample_method "\
-" WHERE substring(pg_catalog.quote_ident(tsmname),1,%d)='%s'"
+" SELECT pg_catalog.quote_ident(proname) "\
+" FROM pg_catalog.pg_proc "\
+" WHERE prorettype = 'pg_catalog.tsm_handler'::pg_catalog.regtype AND "\
+" proargtypes[0] = 'pg_catalog.internal'::pg_catalog.regtype AND "\
+" substring(pg_catalog.quote_ident(proname),1,%d)='%s'"
#define Query_for_list_of_policies \
" SELECT pg_catalog.quote_ident(polname) "\
-" FROM pg_catalog.pg_policy " \
+" FROM pg_catalog.pg_policy "\
" WHERE substring(pg_catalog.quote_ident(polname),1,%d)='%s'"
#define Query_for_list_of_tables_for_policy \
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 31139cbd0cc..75e6b72f9e0 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -116,11 +116,13 @@ extern HeapScanDesc heap_beginscan_bm(Relation relation, Snapshot snapshot,
int nkeys, ScanKey key);
extern HeapScanDesc heap_beginscan_sampling(Relation relation,
Snapshot snapshot, int nkeys, ScanKey key,
- bool allow_strat, bool allow_pagemode);
+ bool allow_strat, bool allow_sync, bool allow_pagemode);
extern void heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk,
BlockNumber endBlk);
extern void heapgetpage(HeapScanDesc scan, BlockNumber page);
extern void heap_rescan(HeapScanDesc scan, ScanKey key);
+extern void heap_rescan_set_params(HeapScanDesc scan, ScanKey key,
+ bool allow_strat, bool allow_sync, bool allow_pagemode);
extern void heap_endscan(HeapScanDesc scan);
extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction);
diff --git a/src/include/access/tablesample.h b/src/include/access/tablesample.h
deleted file mode 100644
index a02e93d3222..00000000000
--- a/src/include/access/tablesample.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * tablesample.h
- * Public header file for TABLESAMPLE clause interface
- *
- *
- * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * src/include/access/tablesample.h
- *
- *-------------------------------------------------------------------------
- */
-#ifndef TABLESAMPLE_H
-#define TABLESAMPLE_H
-
-#include "access/relscan.h"
-#include "executor/executor.h"
-
-typedef struct TableSampleDesc
-{
- HeapScanDesc heapScan;
- TupleDesc tupDesc; /* Mostly useful for tsmexaminetuple */
-
- void *tsmdata; /* private method data */
-
- /* These point to he function of the TABLESAMPLE Method. */
- FmgrInfo tsminit;
- FmgrInfo tsmnextblock;
- FmgrInfo tsmnexttuple;
- FmgrInfo tsmexaminetuple;
- FmgrInfo tsmreset;
- FmgrInfo tsmend;
-} TableSampleDesc;
-
-
-extern TableSampleDesc *tablesample_init(SampleScanState *scanstate,
- TableSampleClause *tablesample);
-extern HeapTuple tablesample_getnext(TableSampleDesc *desc);
-extern void tablesample_reset(TableSampleDesc *desc);
-extern void tablesample_end(TableSampleDesc *desc);
-extern HeapTuple tablesample_source_getnext(TableSampleDesc *desc);
-extern HeapTuple tablesample_source_gettup(TableSampleDesc *desc, ItemPointer tid,
- bool *visible);
-
-extern Datum tsm_system_init(PG_FUNCTION_ARGS);
-extern Datum tsm_system_nextblock(PG_FUNCTION_ARGS);
-extern Datum tsm_system_nexttuple(PG_FUNCTION_ARGS);
-extern Datum tsm_system_end(PG_FUNCTION_ARGS);
-extern Datum tsm_system_reset(PG_FUNCTION_ARGS);
-extern Datum tsm_system_cost(PG_FUNCTION_ARGS);
-
-extern Datum tsm_bernoulli_init(PG_FUNCTION_ARGS);
-extern Datum tsm_bernoulli_nextblock(PG_FUNCTION_ARGS);
-extern Datum tsm_bernoulli_nexttuple(PG_FUNCTION_ARGS);
-extern Datum tsm_bernoulli_end(PG_FUNCTION_ARGS);
-extern Datum tsm_bernoulli_reset(PG_FUNCTION_ARGS);
-extern Datum tsm_bernoulli_cost(PG_FUNCTION_ARGS);
-
-
-#endif
diff --git a/src/include/access/tsmapi.h b/src/include/access/tsmapi.h
new file mode 100644
index 00000000000..4b59ffabd6e
--- /dev/null
+++ b/src/include/access/tsmapi.h
@@ -0,0 +1,81 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsmapi.h
+ * API for tablesample methods
+ *
+ * Copyright (c) 2015, PostgreSQL Global Development Group
+ *
+ * src/include/access/tsmapi.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TSMAPI_H
+#define TSMAPI_H
+
+#include "nodes/execnodes.h"
+#include "nodes/relation.h"
+
+
+/*
+ * Callback function signatures --- see tablesample-method.sgml for more info.
+ */
+
+typedef void (*SampleScanGetSampleSize_function) (PlannerInfo *root,
+ RelOptInfo *baserel,
+ List *paramexprs,
+ BlockNumber *pages,
+ double *tuples);
+
+typedef void (*InitSampleScan_function) (SampleScanState *node,
+ int eflags);
+
+typedef void (*BeginSampleScan_function) (SampleScanState *node,
+ Datum *params,
+ int nparams,
+ uint32 seed);
+
+typedef BlockNumber (*NextSampleBlock_function) (SampleScanState *node);
+
+typedef OffsetNumber (*NextSampleTuple_function) (SampleScanState *node,
+ BlockNumber blockno,
+ OffsetNumber maxoffset);
+
+typedef void (*EndSampleScan_function) (SampleScanState *node);
+
+/*
+ * TsmRoutine is the struct returned by a tablesample method's handler
+ * function. It provides pointers to the callback functions needed by the
+ * planner and executor, as well as additional information about the method.
+ *
+ * More function pointers are likely to be added in the future.
+ * Therefore it's recommended that the handler initialize the struct with
+ * makeNode(TsmRoutine) so that all fields are set to NULL. This will
+ * ensure that no fields are accidentally left undefined.
+ */
+typedef struct TsmRoutine
+{
+ NodeTag type;
+
+ /* List of datatype OIDs for the arguments of the TABLESAMPLE clause */
+ List *parameterTypes;
+
+ /* Can method produce repeatable samples across, or even within, queries? */
+ bool repeatable_across_queries;
+ bool repeatable_across_scans;
+
+ /* Functions for planning a SampleScan on a physical table */
+ SampleScanGetSampleSize_function SampleScanGetSampleSize;
+
+ /* Functions for executing a SampleScan on a physical table */
+ InitSampleScan_function InitSampleScan; /* can be NULL */
+ BeginSampleScan_function BeginSampleScan;
+ NextSampleBlock_function NextSampleBlock; /* can be NULL */
+ NextSampleTuple_function NextSampleTuple;
+ EndSampleScan_function EndSampleScan; /* can be NULL */
+} TsmRoutine;
+
+
+/* Functions in access/tablesample/tablesample.c */
+extern TsmRoutine *GetTsmRoutine(Oid tsmhandler);
+
+#endif /* TSMAPI_H */
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 8f6685fd0cc..0e983279313 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201507171
+#define CATALOG_VERSION_NO 201507252
#endif
diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h
index 748aadde945..c38958d6c5e 100644
--- a/src/include/catalog/indexing.h
+++ b/src/include/catalog/indexing.h
@@ -316,11 +316,6 @@ DECLARE_UNIQUE_INDEX(pg_replication_origin_roiident_index, 6001, on pg_replicati
DECLARE_UNIQUE_INDEX(pg_replication_origin_roname_index, 6002, on pg_replication_origin using btree(roname text_pattern_ops));
#define ReplicationOriginNameIndex 6002
-DECLARE_UNIQUE_INDEX(pg_tablesample_method_name_index, 3331, on pg_tablesample_method using btree(tsmname name_ops));
-#define TableSampleMethodNameIndexId 3331
-DECLARE_UNIQUE_INDEX(pg_tablesample_method_oid_index, 3332, on pg_tablesample_method using btree(oid oid_ops));
-#define TableSampleMethodOidIndexId 3332
-
/* last step of initialization script: build the indexes declared above */
BUILD_INDICES
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 1d68ad7209e..09bf1439c46 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -3734,6 +3734,16 @@ DATA(insert OID = 3116 ( fdw_handler_in PGNSP PGUID 12 1 0 0 0 f f f f f f i 1
DESCR("I/O");
DATA(insert OID = 3117 ( fdw_handler_out PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 2275 "3115" _null_ _null_ _null_ _null_ _null_ fdw_handler_out _null_ _null_ _null_ ));
DESCR("I/O");
+DATA(insert OID = 3311 ( tsm_handler_in PGNSP PGUID 12 1 0 0 0 f f f f f f i 1 0 3310 "2275" _null_ _null_ _null_ _null_ _null_ tsm_handler_in _null_ _null_ _null_ ));
+DESCR("I/O");
+DATA(insert OID = 3312 ( tsm_handler_out PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 2275 "3310" _null_ _null_ _null_ _null_ _null_ tsm_handler_out _null_ _null_ _null_ ));
+DESCR("I/O");
+
+/* tablesample method handlers */
+DATA(insert OID = 3313 ( bernoulli PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 3310 "2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_handler _null_ _null_ _null_ ));
+DESCR("BERNOULLI tablesample method handler");
+DATA(insert OID = 3314 ( system PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 3310 "2281" _null_ _null_ _null_ _null_ _null_ tsm_system_handler _null_ _null_ _null_ ));
+DESCR("SYSTEM tablesample method handler");
/* cryptographic */
DATA(insert OID = 2311 ( md5 PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 25 "25" _null_ _null_ _null_ _null_ _null_ md5_text _null_ _null_ _null_ ));
@@ -5321,33 +5331,6 @@ DESCR("get an individual replication origin's replication progress");
DATA(insert OID = 6014 ( pg_show_replication_origin_status PGNSP PGUID 12 1 100 0 0 f f f f f t v 0 0 2249 "" "{26,25,3220,3220}" "{o,o,o,o}" "{local_id, external_id, remote_lsn, local_lsn}" _null_ _null_ pg_show_replication_origin_status _null_ _null_ _null_ ));
DESCR("get progress for all replication origins");
-/* tablesample */
-DATA(insert OID = 3335 ( tsm_system_init PGNSP PGUID 12 1 0 0 0 f f f f t f v 3 0 2278 "2281 23 700" _null_ _null_ _null_ _null_ _null_ tsm_system_init _null_ _null_ _null_ ));
-DESCR("tsm_system_init(internal)");
-DATA(insert OID = 3336 ( tsm_system_nextblock PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 23 "2281 16" _null_ _null_ _null_ _null_ _null_ tsm_system_nextblock _null_ _null_ _null_ ));
-DESCR("tsm_system_nextblock(internal)");
-DATA(insert OID = 3337 ( tsm_system_nexttuple PGNSP PGUID 12 1 0 0 0 f f f f t f v 4 0 21 "2281 23 21 16" _null_ _null_ _null_ _null_ _null_ tsm_system_nexttuple _null_ _null_ _null_ ));
-DESCR("tsm_system_nexttuple(internal)");
-DATA(insert OID = 3338 ( tsm_system_end PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_system_end _null_ _null_ _null_ ));
-DESCR("tsm_system_end(internal)");
-DATA(insert OID = 3339 ( tsm_system_reset PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_system_reset _null_ _null_ _null_ ));
-DESCR("tsm_system_reset(internal)");
-DATA(insert OID = 3340 ( tsm_system_cost PGNSP PGUID 12 1 0 0 0 f f f f t f v 7 0 2278 "2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ _null_ tsm_system_cost _null_ _null_ _null_ ));
-DESCR("tsm_system_cost(internal)");
-
-DATA(insert OID = 3341 ( tsm_bernoulli_init PGNSP PGUID 12 1 0 0 0 f f f f t f v 3 0 2278 "2281 23 700" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_init _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_init(internal)");
-DATA(insert OID = 3342 ( tsm_bernoulli_nextblock PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 23 "2281 16" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_nextblock _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_nextblock(internal)");
-DATA(insert OID = 3343 ( tsm_bernoulli_nexttuple PGNSP PGUID 12 1 0 0 0 f f f f t f v 4 0 21 "2281 23 21 16" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_nexttuple _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_nexttuple(internal)");
-DATA(insert OID = 3344 ( tsm_bernoulli_end PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_end _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_end(internal)");
-DATA(insert OID = 3345 ( tsm_bernoulli_reset PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_reset _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_reset(internal)");
-DATA(insert OID = 3346 ( tsm_bernoulli_cost PGNSP PGUID 12 1 0 0 0 f f f f t f v 7 0 2278 "2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_cost _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_cost(internal)");
-
/*
* Symbolic values for provolatile column: these indicate whether the result
* of a function is dependent *only* on the values of its explicit arguments,
diff --git a/src/include/catalog/pg_tablesample_method.h b/src/include/catalog/pg_tablesample_method.h
deleted file mode 100644
index b422414d080..00000000000
--- a/src/include/catalog/pg_tablesample_method.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * pg_tablesample_method.h
- * definition of the table scan methods.
- *
- *
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * src/include/catalog/pg_tablesample_method.h
- *
- *
- *-------------------------------------------------------------------------
- */
-#ifndef PG_TABLESAMPLE_METHOD_H
-#define PG_TABLESAMPLE_METHOD_H
-
-#include "catalog/genbki.h"
-#include "catalog/objectaddress.h"
-
-/* ----------------
- * pg_tablesample_method definition. cpp turns this into
- * typedef struct FormData_pg_tablesample_method
- * ----------------
- */
-#define TableSampleMethodRelationId 3330
-
-CATALOG(pg_tablesample_method,3330)
-{
- NameData tsmname; /* tablesample method name */
- bool tsmseqscan; /* does this method scan whole table
- * sequentially? */
- bool tsmpagemode; /* does this method scan page at a time? */
- regproc tsminit; /* init scan function */
- regproc tsmnextblock; /* function returning next block to sample or
- * InvalidBlockOffset if finished */
- regproc tsmnexttuple; /* function returning next tuple offset from
- * current block or InvalidOffsetNumber if end
- * of the block was reacher */
- regproc tsmexaminetuple;/* optional function which can examine tuple
- * contents and decide if tuple should be
- * returned or not */
- regproc tsmend; /* end scan function */
- regproc tsmreset; /* reset state - used by rescan */
- regproc tsmcost; /* costing function */
-} FormData_pg_tablesample_method;
-
-/* ----------------
- * Form_pg_tablesample_method corresponds to a pointer to a tuple with
- * the format of pg_tablesample_method relation.
- * ----------------
- */
-typedef FormData_pg_tablesample_method *Form_pg_tablesample_method;
-
-/* ----------------
- * compiler constants for pg_tablesample_method
- * ----------------
- */
-#define Natts_pg_tablesample_method 10
-#define Anum_pg_tablesample_method_tsmname 1
-#define Anum_pg_tablesample_method_tsmseqscan 2
-#define Anum_pg_tablesample_method_tsmpagemode 3
-#define Anum_pg_tablesample_method_tsminit 4
-#define Anum_pg_tablesample_method_tsmnextblock 5
-#define Anum_pg_tablesample_method_tsmnexttuple 6
-#define Anum_pg_tablesample_method_tsmexaminetuple 7
-#define Anum_pg_tablesample_method_tsmend 8
-#define Anum_pg_tablesample_method_tsmreset 9
-#define Anum_pg_tablesample_method_tsmcost 10
-
-/* ----------------
- * initial contents of pg_tablesample_method
- * ----------------
- */
-
-DATA(insert OID = 3333 ( system false true tsm_system_init tsm_system_nextblock tsm_system_nexttuple - tsm_system_end tsm_system_reset tsm_system_cost ));
-DESCR("SYSTEM table sampling method");
-DATA(insert OID = 3334 ( bernoulli true false tsm_bernoulli_init tsm_bernoulli_nextblock tsm_bernoulli_nexttuple - tsm_bernoulli_end tsm_bernoulli_reset tsm_bernoulli_cost ));
-DESCR("BERNOULLI table sampling method");
-
-#endif /* PG_TABLESAMPLE_METHOD_H */
diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h
index da123f6c495..7dc95c8d2c6 100644
--- a/src/include/catalog/pg_type.h
+++ b/src/include/catalog/pg_type.h
@@ -694,6 +694,8 @@ DATA(insert OID = 3500 ( anyenum PGNSP PGUID 4 t p P f t \054 0 0 0 anyenum_in
#define ANYENUMOID 3500
DATA(insert OID = 3115 ( fdw_handler PGNSP PGUID 4 t p P f t \054 0 0 0 fdw_handler_in fdw_handler_out - - - - - i p f 0 -1 0 0 _null_ _null_ _null_ ));
#define FDW_HANDLEROID 3115
+DATA(insert OID = 3310 ( tsm_handler PGNSP PGUID 4 t p P f t \054 0 0 0 tsm_handler_in tsm_handler_out - - - - - i p f 0 -1 0 0 _null_ _null_ _null_ ));
+#define TSM_HANDLEROID 3310
DATA(insert OID = 3831 ( anyrange PGNSP PGUID -1 f p P f t \054 0 0 0 anyrange_in anyrange_out - - - - - d x f 0 -1 0 0 _null_ _null_ _null_ ));
#define ANYRANGEOID 3831
diff --git a/src/include/executor/nodeSamplescan.h b/src/include/executor/nodeSamplescan.h
index 4b769daec8b..a0cc6ce467a 100644
--- a/src/include/executor/nodeSamplescan.h
+++ b/src/include/executor/nodeSamplescan.h
@@ -4,7 +4,7 @@
*
*
*
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/executor/nodeSamplescan.h
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 541ee187356..303fc3c1c77 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1257,13 +1257,22 @@ typedef struct ScanState
*/
typedef ScanState SeqScanState;
-/*
- * SampleScan
+/* ----------------
+ * SampleScanState information
+ * ----------------
*/
typedef struct SampleScanState
{
ScanState ss;
- struct TableSampleDesc *tsdesc;
+ List *args; /* expr states for TABLESAMPLE params */
+ ExprState *repeatable; /* expr state for REPEATABLE expr */
+ /* use struct pointer to avoid including tsmapi.h here */
+ struct TsmRoutine *tsmroutine; /* descriptor for tablesample method */
+ void *tsm_state; /* tablesample method can keep state here */
+ bool use_bulkread; /* use bulkread buffer access strategy? */
+ bool use_pagemode; /* use page-at-a-time visibility checking? */
+ bool begun; /* false means need to call BeginSampleScan */
+ uint32 seed; /* random seed */
} SampleScanState;
/*
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index f8acda4eede..748e434a27a 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -51,6 +51,7 @@ typedef enum NodeTag
T_BitmapOr,
T_Scan,
T_SeqScan,
+ T_SampleScan,
T_IndexScan,
T_IndexOnlyScan,
T_BitmapIndexScan,
@@ -61,7 +62,6 @@ typedef enum NodeTag
T_ValuesScan,
T_CteScan,
T_WorkTableScan,
- T_SampleScan,
T_ForeignScan,
T_CustomScan,
T_Join,
@@ -400,6 +400,7 @@ typedef enum NodeTag
T_WindowDef,
T_RangeSubselect,
T_RangeFunction,
+ T_RangeTableSample,
T_TypeName,
T_ColumnDef,
T_IndexElem,
@@ -407,6 +408,7 @@ typedef enum NodeTag
T_DefElem,
T_RangeTblEntry,
T_RangeTblFunction,
+ T_TableSampleClause,
T_WithCheckOption,
T_SortGroupClause,
T_GroupingSet,
@@ -425,8 +427,6 @@ typedef enum NodeTag
T_OnConflictClause,
T_CommonTableExpr,
T_RoleSpec,
- T_RangeTableSample,
- T_TableSampleClause,
/*
* TAGS FOR REPLICATION GRAMMAR PARSE NODES (replnodes.h)
@@ -452,7 +452,8 @@ typedef enum NodeTag
T_WindowObjectData, /* private in nodeWindowAgg.c */
T_TIDBitmap, /* in nodes/tidbitmap.h */
T_InlineCodeBlock, /* in nodes/parsenodes.h */
- T_FdwRoutine /* in foreign/fdwapi.h */
+ T_FdwRoutine, /* in foreign/fdwapi.h */
+ T_TsmRoutine /* in access/tsmapi.h */
} NodeTag;
/*
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index b336ff9c6ab..151c93a078e 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -338,26 +338,6 @@ typedef struct FuncCall
} FuncCall;
/*
- * TableSampleClause - a sampling method information
- */
-typedef struct TableSampleClause
-{
- NodeTag type;
- Oid tsmid;
- bool tsmseqscan;
- bool tsmpagemode;
- Oid tsminit;
- Oid tsmnextblock;
- Oid tsmnexttuple;
- Oid tsmexaminetuple;
- Oid tsmend;
- Oid tsmreset;
- Oid tsmcost;
- Node *repeatable;
- List *args;
-} TableSampleClause;
-
-/*
* A_Star - '*' representing all columns of a table or compound field
*
* This can appear within ColumnRef.fields, A_Indirection.indirection, and
@@ -558,19 +538,23 @@ typedef struct RangeFunction
} RangeFunction;
/*
- * RangeTableSample - represents <table> TABLESAMPLE <method> (<params>) REPEATABLE (<num>)
+ * RangeTableSample - TABLESAMPLE appearing in a raw FROM clause
*
- * SQL Standard specifies only one parameter which is percentage. But we allow
- * custom tablesample methods which may need different input arguments so we
- * accept list of arguments.
+ * This node, appearing only in raw parse trees, represents
+ * <relation> TABLESAMPLE <method> (<params>) REPEATABLE (<num>)
+ * Currently, the <relation> can only be a RangeVar, but we might in future
+ * allow RangeSubselect and other options. Note that the RangeTableSample
+ * is wrapped around the node representing the <relation>, rather than being
+ * a subfield of it.
*/
typedef struct RangeTableSample
{
NodeTag type;
- RangeVar *relation;
- char *method; /* sampling method */
- Node *repeatable;
- List *args; /* arguments for sampling method */
+ Node *relation; /* relation to be sampled */
+ List *method; /* sampling method name (possibly qualified) */
+ List *args; /* argument(s) for sampling method */
+ Node *repeatable; /* REPEATABLE expression, or NULL if none */
+ int location; /* method name location, or -1 if unknown */
} RangeTableSample;
/*
@@ -810,7 +794,7 @@ typedef struct RangeTblEntry
*/
Oid relid; /* OID of the relation */
char relkind; /* relation kind (see pg_class.relkind) */
- TableSampleClause *tablesample; /* sampling method and parameters */
+ struct TableSampleClause *tablesample; /* sampling info, or NULL */
/*
* Fields valid for a subquery RTE (else NULL):
@@ -913,6 +897,19 @@ typedef struct RangeTblFunction
} RangeTblFunction;
/*
+ * TableSampleClause - TABLESAMPLE appearing in a transformed FROM clause
+ *
+ * Unlike RangeTableSample, this is a subnode of the relevant RangeTblEntry.
+ */
+typedef struct TableSampleClause
+{
+ NodeTag type;
+ Oid tsmhandler; /* OID of the tablesample handler function */
+ List *args; /* tablesample argument expression(s) */
+ Expr *repeatable; /* REPEATABLE expression, or NULL if none */
+} TableSampleClause;
+
+/*
* WithCheckOption -
* representation of WITH CHECK OPTION checks to be applied to new tuples
* when inserting/updating an auto-updatable view, or RLS WITH CHECK
@@ -2520,7 +2517,7 @@ typedef struct RenameStmt
typedef struct AlterObjectSchemaStmt
{
NodeTag type;
- ObjectType objectType; /* OBJECT_TABLE, OBJECT_TYPE, etc */
+ ObjectType objectType; /* OBJECT_TABLE, OBJECT_TYPE, etc */
RangeVar *relation; /* in case it's a table */
List *object; /* in case it's some other object */
List *objarg; /* argument types, if applicable */
@@ -2535,7 +2532,7 @@ typedef struct AlterObjectSchemaStmt
typedef struct AlterOwnerStmt
{
NodeTag type;
- ObjectType objectType; /* OBJECT_TABLE, OBJECT_TYPE, etc */
+ ObjectType objectType; /* OBJECT_TABLE, OBJECT_TYPE, etc */
RangeVar *relation; /* in case it's a table */
List *object; /* in case it's some other object */
List *objarg; /* argument types, if applicable */
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 5f538f3e8cc..0654d0266cd 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -287,7 +287,12 @@ typedef Scan SeqScan;
* table sample scan node
* ----------------
*/
-typedef Scan SampleScan;
+typedef struct SampleScan
+{
+ Scan scan;
+ /* use struct pointer to avoid including parsenodes.h here */
+ struct TableSampleClause *tablesample;
+} SampleScan;
/* ----------------
* index scan node
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 24003ae3591..dd43e45d0c0 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -68,7 +68,8 @@ extern double index_pages_fetched(double tuples_fetched, BlockNumber pages,
double index_pages, PlannerInfo *root);
extern void cost_seqscan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
ParamPathInfo *param_info);
-extern void cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel);
+extern void cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
+ ParamPathInfo *param_info);
extern void cost_index(IndexPath *path, PlannerInfo *root,
double loop_count);
extern void cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
diff --git a/src/include/parser/parse_func.h b/src/include/parser/parse_func.h
index 3194da46394..32646918e20 100644
--- a/src/include/parser/parse_func.h
+++ b/src/include/parser/parse_func.h
@@ -33,11 +33,6 @@ typedef enum
extern Node *ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
FuncCall *fn, int location);
-extern TableSampleClause *ParseTableSample(ParseState *pstate,
- char *samplemethod,
- Node *repeatable, List *args,
- int location);
-
extern FuncDetailCode func_get_detail(List *funcname,
List *fargs, List *fargnames,
int nargs, Oid *argtypes,
diff --git a/src/include/port.h b/src/include/port.h
index 71113c03944..3787cbfb761 100644
--- a/src/include/port.h
+++ b/src/include/port.h
@@ -357,10 +357,6 @@ extern off_t ftello(FILE *stream);
#endif
#endif
-#define RAND48_SEED_0 (0x330e)
-#define RAND48_SEED_1 (0xabcd)
-#define RAND48_SEED_2 (0x1234)
-
extern double pg_erand48(unsigned short xseed[3]);
extern long pg_lrand48(void);
extern void pg_srand48(long seed);
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index fcb0bf0ce8e..49caa565574 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -566,6 +566,8 @@ extern Datum language_handler_in(PG_FUNCTION_ARGS);
extern Datum language_handler_out(PG_FUNCTION_ARGS);
extern Datum fdw_handler_in(PG_FUNCTION_ARGS);
extern Datum fdw_handler_out(PG_FUNCTION_ARGS);
+extern Datum tsm_handler_in(PG_FUNCTION_ARGS);
+extern Datum tsm_handler_out(PG_FUNCTION_ARGS);
extern Datum internal_in(PG_FUNCTION_ARGS);
extern Datum internal_out(PG_FUNCTION_ARGS);
extern Datum opaque_in(PG_FUNCTION_ARGS);
@@ -1213,6 +1215,12 @@ extern Datum ginqueryarrayextract(PG_FUNCTION_ARGS);
extern Datum ginarrayconsistent(PG_FUNCTION_ARGS);
extern Datum ginarraytriconsistent(PG_FUNCTION_ARGS);
+/* access/tablesample/bernoulli.c */
+extern Datum tsm_bernoulli_handler(PG_FUNCTION_ARGS);
+
+/* access/tablesample/system.c */
+extern Datum tsm_system_handler(PG_FUNCTION_ARGS);
+
/* access/transam/twophase.c */
extern Datum pg_prepared_xact(PG_FUNCTION_ARGS);
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h
index a40c9b12732..97115384329 100644
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -156,7 +156,6 @@ extern void free_attstatsslot(Oid atttype,
extern char *get_namespace_name(Oid nspid);
extern char *get_namespace_name_or_temp(Oid nspid);
extern Oid get_range_subtype(Oid rangeOid);
-extern char *get_tablesample_method_name(Oid tsmid);
#define type_is_array(typid) (get_element_type(typid) != InvalidOid)
/* type_is_array_domain accepts both plain arrays and domains over arrays */
diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h
index f06f03a996f..18404e266eb 100644
--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@@ -81,8 +81,6 @@ enum SysCacheIdentifier
REPLORIGNAME,
RULERELNAME,
STATRELATTINH,
- TABLESAMPLEMETHODNAME,
- TABLESAMPLEMETHODOID,
TABLESPACEOID,
TRFOID,
TRFTYPELANG,
diff --git a/src/port/erand48.c b/src/port/erand48.c
index 12efd8193c4..9d471197c35 100644
--- a/src/port/erand48.c
+++ b/src/port/erand48.c
@@ -33,6 +33,9 @@
#include <math.h>
+#define RAND48_SEED_0 (0x330e)
+#define RAND48_SEED_1 (0xabcd)
+#define RAND48_SEED_2 (0x1234)
#define RAND48_MULT_0 (0xe66d)
#define RAND48_MULT_1 (0xdeec)
#define RAND48_MULT_2 (0x0005)
diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out
index 414299a6941..e7c242cd22d 100644
--- a/src/test/regress/expected/rowsecurity.out
+++ b/src/test/regress/expected/rowsecurity.out
@@ -101,15 +101,17 @@ NOTICE: f_leak => great manga
44 | 8 | 1 | rls_regress_user2 | great manga | manga
(4 rows)
-SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did;
-NOTICE: f_leak => my first novel
+-- try a sampled version
+SELECT * FROM document TABLESAMPLE BERNOULLI(50) REPEATABLE(0)
+ WHERE f_leak(dtitle) ORDER BY did;
NOTICE: f_leak => my first manga
NOTICE: f_leak => great science fiction
+NOTICE: f_leak => great manga
did | cid | dlevel | dauthor | dtitle
-----+-----+--------+-------------------+-----------------------
- 1 | 11 | 1 | rls_regress_user1 | my first novel
4 | 44 | 1 | rls_regress_user1 | my first manga
6 | 22 | 1 | rls_regress_user2 | great science fiction
+ 8 | 44 | 1 | rls_regress_user2 | great manga
(3 rows)
-- viewpoint from rls_regress_user2
@@ -156,20 +158,20 @@ NOTICE: f_leak => great manga
44 | 8 | 1 | rls_regress_user2 | great manga | manga
(8 rows)
-SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did;
-NOTICE: f_leak => my first novel
-NOTICE: f_leak => my second novel
+-- try a sampled version
+SELECT * FROM document TABLESAMPLE BERNOULLI(50) REPEATABLE(0)
+ WHERE f_leak(dtitle) ORDER BY did;
NOTICE: f_leak => my first manga
+NOTICE: f_leak => my second manga
NOTICE: f_leak => great science fiction
-NOTICE: f_leak => great technology book
+NOTICE: f_leak => great manga
did | cid | dlevel | dauthor | dtitle
-----+-----+--------+-------------------+-----------------------
- 1 | 11 | 1 | rls_regress_user1 | my first novel
- 2 | 11 | 2 | rls_regress_user1 | my second novel
4 | 44 | 1 | rls_regress_user1 | my first manga
+ 5 | 44 | 2 | rls_regress_user1 | my second manga
6 | 22 | 1 | rls_regress_user2 | great science fiction
- 7 | 33 | 2 | rls_regress_user2 | great technology book
-(5 rows)
+ 8 | 44 | 1 | rls_regress_user2 | great manga
+(4 rows)
EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle);
QUERY PLAN
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index cd5337531d4..1e5b0b9a2c4 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -2202,6 +2202,10 @@ street| SELECT r.name,
FROM ONLY road r,
real_city c
WHERE (c.outline ## r.thepath);
+test_tablesample_v1| SELECT test_tablesample.id
+ FROM test_tablesample TABLESAMPLE system ((10 * 2)) REPEATABLE (2);
+test_tablesample_v2| SELECT test_tablesample.id
+ FROM test_tablesample TABLESAMPLE system (99);
toyemp| SELECT emp.name,
emp.age,
emp.location,
diff --git a/src/test/regress/expected/sanity_check.out b/src/test/regress/expected/sanity_check.out
index 14acd16da3b..eb0bc88ef1f 100644
--- a/src/test/regress/expected/sanity_check.out
+++ b/src/test/regress/expected/sanity_check.out
@@ -128,7 +128,6 @@ pg_shdepend|t
pg_shdescription|t
pg_shseclabel|t
pg_statistic|t
-pg_tablesample_method|t
pg_tablespace|t
pg_transform|t
pg_trigger|t
diff --git a/src/test/regress/expected/tablesample.out b/src/test/regress/expected/tablesample.out
index 04e5eb8b807..727a8354397 100644
--- a/src/test/regress/expected/tablesample.out
+++ b/src/test/regress/expected/tablesample.out
@@ -1,107 +1,123 @@
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i) ORDER BY i;
-SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10);
+CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10);
+-- use fillfactor so we don't have to load too much data to get multiple pages
+INSERT INTO test_tablesample
+ SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i);
+SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (0);
id
----
- 0
- 1
- 2
3
4
5
- 9
-(7 rows)
-
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (9999);
- id
-----
6
7
8
-(3 rows)
+(6 rows)
-SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100);
- count
--------
- 10
-(1 row)
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (0);
+ id
+----
+(0 rows)
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
id
----
- 0
- 1
- 2
+ 3
+ 4
+ 5
6
7
8
- 9
-(7 rows)
+(6 rows)
-SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (100);
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (0);
id
----
- 0
- 1
- 3
4
5
+ 6
+ 7
+ 8
(5 rows)
-SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1);
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (0);
id
----
- 0
- 5
-(2 rows)
+ 7
+(1 row)
-CREATE VIEW test_tablesample_v1 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2);
-CREATE VIEW test_tablesample_v2 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99);
-SELECT pg_get_viewdef('test_tablesample_v1'::regclass);
- pg_get_viewdef
---------------------------------------------------------------------------------
- SELECT test_tablesample.id +
- FROM test_tablesample TABLESAMPLE system (((10 * 2))::real) REPEATABLE (2);
+-- 100% should give repeatable count results (ie, all rows) in any case
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100);
+ count
+-------
+ 10
(1 row)
-SELECT pg_get_viewdef('test_tablesample_v2'::regclass);
- pg_get_viewdef
------------------------------------------------------------
- SELECT test_tablesample.id +
- FROM test_tablesample TABLESAMPLE system ((99)::real);
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100) REPEATABLE (1+2);
+ count
+-------
+ 10
+(1 row)
+
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100) REPEATABLE (0.4);
+ count
+-------
+ 10
(1 row)
+CREATE VIEW test_tablesample_v1 AS
+ SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2);
+CREATE VIEW test_tablesample_v2 AS
+ SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99);
+\d+ test_tablesample_v1
+ View "public.test_tablesample_v1"
+ Column | Type | Modifiers | Storage | Description
+--------+---------+-----------+---------+-------------
+ id | integer | | plain |
+View definition:
+ SELECT test_tablesample.id
+ FROM test_tablesample TABLESAMPLE system ((10 * 2)) REPEATABLE (2);
+
+\d+ test_tablesample_v2
+ View "public.test_tablesample_v2"
+ Column | Type | Modifiers | Storage | Description
+--------+---------+-----------+---------+-------------
+ id | integer | | plain |
+View definition:
+ SELECT test_tablesample.id
+ FROM test_tablesample TABLESAMPLE system (99);
+
+-- check a sampled query doesn't affect cursor in progress
BEGIN;
-DECLARE tablesample_cur CURSOR FOR SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
+DECLARE tablesample_cur CURSOR FOR
+ SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
FETCH FIRST FROM tablesample_cur;
id
----
- 0
+ 3
(1 row)
FETCH NEXT FROM tablesample_cur;
id
----
- 1
+ 4
(1 row)
FETCH NEXT FROM tablesample_cur;
id
----
- 2
+ 5
(1 row)
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
id
----
- 0
- 1
- 2
3
4
5
- 9
-(7 rows)
+ 6
+ 7
+ 8
+(6 rows)
FETCH NEXT FROM tablesample_cur;
id
@@ -124,19 +140,19 @@ FETCH NEXT FROM tablesample_cur;
FETCH FIRST FROM tablesample_cur;
id
----
- 0
+ 3
(1 row)
FETCH NEXT FROM tablesample_cur;
id
----
- 1
+ 4
(1 row)
FETCH NEXT FROM tablesample_cur;
id
----
- 2
+ 5
(1 row)
FETCH NEXT FROM tablesample_cur;
@@ -159,41 +175,129 @@ FETCH NEXT FROM tablesample_cur;
CLOSE tablesample_cur;
END;
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10);
- QUERY PLAN
--------------------------------------------------------------------------------
- Sample Scan (system) on test_tablesample (cost=0.00..26.35 rows=635 width=4)
+EXPLAIN (COSTS OFF)
+ SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (2);
+ QUERY PLAN
+--------------------------------------------------------------------
+ Sample Scan on test_tablesample
+ Sampling: system ('50'::real) REPEATABLE ('2'::double precision)
+(2 rows)
+
+EXPLAIN (COSTS OFF)
+ SELECT * FROM test_tablesample_v1;
+ QUERY PLAN
+--------------------------------------------------------------------
+ Sample Scan on test_tablesample
+ Sampling: system ('20'::real) REPEATABLE ('2'::double precision)
+(2 rows)
+
+-- check inheritance behavior
+explain (costs off)
+ select count(*) from person tablesample bernoulli (100);
+ QUERY PLAN
+-------------------------------------------------
+ Aggregate
+ -> Append
+ -> Sample Scan on person
+ Sampling: bernoulli ('100'::real)
+ -> Sample Scan on emp
+ Sampling: bernoulli ('100'::real)
+ -> Sample Scan on student
+ Sampling: bernoulli ('100'::real)
+ -> Sample Scan on stud_emp
+ Sampling: bernoulli ('100'::real)
+(10 rows)
+
+select count(*) from person tablesample bernoulli (100);
+ count
+-------
+ 58
(1 row)
-EXPLAIN SELECT * FROM test_tablesample_v1;
- QUERY PLAN
--------------------------------------------------------------------------------
- Sample Scan (system) on test_tablesample (cost=0.00..10.54 rows=254 width=4)
+select count(*) from person;
+ count
+-------
+ 58
+(1 row)
+
+-- check that collations get assigned within the tablesample arguments
+SELECT count(*) FROM test_tablesample TABLESAMPLE bernoulli (('1'::text < '0'::text)::int);
+ count
+-------
+ 0
+(1 row)
+
+-- check behavior during rescans, as well as correct handling of min/max pct
+select * from
+ (values (0),(100)) v(pct),
+ lateral (select count(*) from tenk1 tablesample bernoulli (pct)) ss;
+ pct | count
+-----+-------
+ 0 | 0
+ 100 | 10000
+(2 rows)
+
+select * from
+ (values (0),(100)) v(pct),
+ lateral (select count(*) from tenk1 tablesample system (pct)) ss;
+ pct | count
+-----+-------
+ 0 | 0
+ 100 | 10000
+(2 rows)
+
+explain (costs off)
+select pct, count(unique1) from
+ (values (0),(100)) v(pct),
+ lateral (select * from tenk1 tablesample bernoulli (pct)) ss
+ group by pct;
+ QUERY PLAN
+--------------------------------------------------------
+ HashAggregate
+ Group Key: "*VALUES*".column1
+ -> Nested Loop
+ -> Values Scan on "*VALUES*"
+ -> Sample Scan on tenk1
+ Sampling: bernoulli ("*VALUES*".column1)
+(6 rows)
+
+select pct, count(unique1) from
+ (values (0),(100)) v(pct),
+ lateral (select * from tenk1 tablesample bernoulli (pct)) ss
+ group by pct;
+ pct | count
+-----+-------
+ 100 | 10000
+(1 row)
+
+select pct, count(unique1) from
+ (values (0),(100)) v(pct),
+ lateral (select * from tenk1 tablesample system (pct)) ss
+ group by pct;
+ pct | count
+-----+-------
+ 100 | 10000
(1 row)
-- errors
SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1);
-ERROR: tablesample method "foobar" does not exist
+ERROR: tablesample method foobar does not exist
LINE 1: SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1);
- ^
+ ^
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (NULL);
+ERROR: TABLESAMPLE parameter cannot be null
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (NULL);
-ERROR: REPEATABLE clause must be NOT NULL numeric value
-LINE 1: ... test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (NULL);
- ^
+ERROR: TABLESAMPLE REPEATABLE parameter cannot be null
SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (-1);
-ERROR: invalid sample size
-HINT: Sample size must be numeric value between 0 and 100 (inclusive).
+ERROR: sample percentage must be between 0 and 100
SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (200);
-ERROR: invalid sample size
-HINT: Sample size must be numeric value between 0 and 100 (inclusive).
+ERROR: sample percentage must be between 0 and 100
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (-1);
-ERROR: invalid sample size
-HINT: Sample size must be numeric value between 0 and 100 (inclusive).
+ERROR: sample percentage must be between 0 and 100
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (200);
-ERROR: invalid sample size
-HINT: Sample size must be numeric value between 0 and 100 (inclusive).
+ERROR: sample percentage must be between 0 and 100
SELECT id FROM test_tablesample_v1 TABLESAMPLE BERNOULLI (1);
-ERROR: TABLESAMPLE clause can only be used on tables and materialized views
+ERROR: TABLESAMPLE clause can only be applied to tables and materialized views
LINE 1: SELECT id FROM test_tablesample_v1 TABLESAMPLE BERNOULLI (1)...
^
INSERT INTO test_tablesample_v1 VALUES(1);
@@ -202,30 +306,10 @@ DETAIL: Views containing TABLESAMPLE are not automatically updatable.
HINT: To enable inserting into the view, provide an INSTEAD OF INSERT trigger or an unconditional ON INSERT DO INSTEAD rule.
WITH query_select AS (SELECT * FROM test_tablesample)
SELECT * FROM query_select TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1);
-ERROR: TABLESAMPLE clause can only be used on tables and materialized views
+ERROR: TABLESAMPLE clause can only be applied to tables and materialized views
LINE 2: SELECT * FROM query_select TABLESAMPLE BERNOULLI (5.5) REPEA...
^
SELECT q.* FROM (SELECT * FROM test_tablesample) as q TABLESAMPLE BERNOULLI (5);
ERROR: syntax error at or near "TABLESAMPLE"
LINE 1: ...CT q.* FROM (SELECT * FROM test_tablesample) as q TABLESAMPL...
^
--- catalog sanity
-SELECT *
-FROM pg_tablesample_method
-WHERE tsminit IS NULL
- OR tsmseqscan IS NULL
- OR tsmpagemode IS NULL
- OR tsmnextblock IS NULL
- OR tsmnexttuple IS NULL
- OR tsmend IS NULL
- OR tsmreset IS NULL
- OR tsmcost IS NULL;
- tsmname | tsmseqscan | tsmpagemode | tsminit | tsmnextblock | tsmnexttuple | tsmexaminetuple | tsmend | tsmreset | tsmcost
----------+------------+-------------+---------+--------------+--------------+-----------------+--------+----------+---------
-(0 rows)
-
--- done
-DROP TABLE test_tablesample CASCADE;
-NOTICE: drop cascades to 2 other objects
-DETAIL: drop cascades to view test_tablesample_v1
-drop cascades to view test_tablesample_v2
diff --git a/src/test/regress/output/misc.source b/src/test/regress/output/misc.source
index 70c9cc356a6..9eedb363d06 100644
--- a/src/test/regress/output/misc.source
+++ b/src/test/regress/output/misc.source
@@ -686,6 +686,9 @@ SELECT user_relns() AS user_relns
test_range_excl
test_range_gist
test_range_spgist
+ test_tablesample
+ test_tablesample_v1
+ test_tablesample_v2
test_tsvector
testjsonb
text_tbl
@@ -705,7 +708,7 @@ SELECT user_relns() AS user_relns
tvvmv
varchar_tbl
xacttest
-(127 rows)
+(130 rows)
SELECT name(equipment(hobby_construct(text 'skywalking', text 'mer')));
name
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index 3a607cff46c..15d74d4e6eb 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -110,6 +110,7 @@ test: lock
test: replica_identity
test: rowsecurity
test: object_address
+test: tablesample
test: alter_generic
test: alter_operator
test: misc
@@ -156,4 +157,3 @@ test: with
test: xml
test: event_trigger
test: stats
-test: tablesample
diff --git a/src/test/regress/sql/rowsecurity.sql b/src/test/regress/sql/rowsecurity.sql
index 039070b85b7..e86f8143142 100644
--- a/src/test/regress/sql/rowsecurity.sql
+++ b/src/test/regress/sql/rowsecurity.sql
@@ -94,14 +94,18 @@ SET row_security TO ON;
SELECT * FROM document WHERE f_leak(dtitle) ORDER BY did;
SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle) ORDER BY did;
-SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did;
+-- try a sampled version
+SELECT * FROM document TABLESAMPLE BERNOULLI(50) REPEATABLE(0)
+ WHERE f_leak(dtitle) ORDER BY did;
-- viewpoint from rls_regress_user2
SET SESSION AUTHORIZATION rls_regress_user2;
SELECT * FROM document WHERE f_leak(dtitle) ORDER BY did;
SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle) ORDER BY did;
-SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did;
+-- try a sampled version
+SELECT * FROM document TABLESAMPLE BERNOULLI(50) REPEATABLE(0)
+ WHERE f_leak(dtitle) ORDER BY did;
EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle);
EXPLAIN (COSTS OFF) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle);
diff --git a/src/test/regress/sql/tablesample.sql b/src/test/regress/sql/tablesample.sql
index 7b3eb9bedf7..eec97934966 100644
--- a/src/test/regress/sql/tablesample.sql
+++ b/src/test/regress/sql/tablesample.sql
@@ -1,26 +1,37 @@
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
+CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10);
+-- use fillfactor so we don't have to load too much data to get multiple pages
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i) ORDER BY i;
+INSERT INTO test_tablesample
+ SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i);
-SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10);
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (9999);
+SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (0);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (0);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (0);
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (0);
+
+-- 100% should give repeatable count results (ie, all rows) in any case
SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100);
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
-SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (100);
-SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1);
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100) REPEATABLE (1+2);
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100) REPEATABLE (0.4);
-CREATE VIEW test_tablesample_v1 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2);
-CREATE VIEW test_tablesample_v2 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99);
-SELECT pg_get_viewdef('test_tablesample_v1'::regclass);
-SELECT pg_get_viewdef('test_tablesample_v2'::regclass);
+CREATE VIEW test_tablesample_v1 AS
+ SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2);
+CREATE VIEW test_tablesample_v2 AS
+ SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99);
+\d+ test_tablesample_v1
+\d+ test_tablesample_v2
+-- check a sampled query doesn't affect cursor in progress
BEGIN;
-DECLARE tablesample_cur CURSOR FOR SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
+DECLARE tablesample_cur CURSOR FOR
+ SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
+
FETCH FIRST FROM tablesample_cur;
FETCH NEXT FROM tablesample_cur;
FETCH NEXT FROM tablesample_cur;
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
FETCH NEXT FROM tablesample_cur;
FETCH NEXT FROM tablesample_cur;
@@ -36,12 +47,45 @@ FETCH NEXT FROM tablesample_cur;
CLOSE tablesample_cur;
END;
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10);
-EXPLAIN SELECT * FROM test_tablesample_v1;
+EXPLAIN (COSTS OFF)
+ SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (2);
+EXPLAIN (COSTS OFF)
+ SELECT * FROM test_tablesample_v1;
+
+-- check inheritance behavior
+explain (costs off)
+ select count(*) from person tablesample bernoulli (100);
+select count(*) from person tablesample bernoulli (100);
+select count(*) from person;
+
+-- check that collations get assigned within the tablesample arguments
+SELECT count(*) FROM test_tablesample TABLESAMPLE bernoulli (('1'::text < '0'::text)::int);
+
+-- check behavior during rescans, as well as correct handling of min/max pct
+select * from
+ (values (0),(100)) v(pct),
+ lateral (select count(*) from tenk1 tablesample bernoulli (pct)) ss;
+select * from
+ (values (0),(100)) v(pct),
+ lateral (select count(*) from tenk1 tablesample system (pct)) ss;
+explain (costs off)
+select pct, count(unique1) from
+ (values (0),(100)) v(pct),
+ lateral (select * from tenk1 tablesample bernoulli (pct)) ss
+ group by pct;
+select pct, count(unique1) from
+ (values (0),(100)) v(pct),
+ lateral (select * from tenk1 tablesample bernoulli (pct)) ss
+ group by pct;
+select pct, count(unique1) from
+ (values (0),(100)) v(pct),
+ lateral (select * from tenk1 tablesample system (pct)) ss
+ group by pct;
-- errors
SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (NULL);
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (NULL);
SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (-1);
@@ -56,19 +100,3 @@ WITH query_select AS (SELECT * FROM test_tablesample)
SELECT * FROM query_select TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1);
SELECT q.* FROM (SELECT * FROM test_tablesample) as q TABLESAMPLE BERNOULLI (5);
-
--- catalog sanity
-
-SELECT *
-FROM pg_tablesample_method
-WHERE tsminit IS NULL
- OR tsmseqscan IS NULL
- OR tsmpagemode IS NULL
- OR tsmnextblock IS NULL
- OR tsmnexttuple IS NULL
- OR tsmend IS NULL
- OR tsmreset IS NULL
- OR tsmcost IS NULL;
-
--- done
-DROP TABLE test_tablesample CASCADE;