diff options
Diffstat (limited to 'src')
63 files changed, 1943 insertions, 1703 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 6f4ff2718fe..050efdc4806 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -80,8 +80,11 @@ bool synchronize_seqscans = true; static HeapScanDesc heap_beginscan_internal(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, - bool allow_strat, bool allow_sync, bool allow_pagemode, - bool is_bitmapscan, bool is_samplescan, + bool allow_strat, + bool allow_sync, + bool allow_pagemode, + bool is_bitmapscan, + bool is_samplescan, bool temp_snap); static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options); @@ -207,7 +210,7 @@ static const int MultiXactStatusLock[MaxMultiXactStatus + 1] = * ---------------- */ static void -initscan(HeapScanDesc scan, ScanKey key, bool is_rescan) +initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) { bool allow_strat; bool allow_sync; @@ -257,12 +260,12 @@ initscan(HeapScanDesc scan, ScanKey key, bool is_rescan) scan->rs_strategy = NULL; } - if (is_rescan) + if (keep_startblock) { /* - * If rescan, keep the previous startblock setting so that rewinding a - * cursor doesn't generate surprising results. Reset the syncscan - * setting, though. + * When rescanning, we want to keep the previous startblock setting, + * so that rewinding a cursor doesn't generate surprising results. + * Reset the active syncscan setting, though. */ scan->rs_syncscan = (allow_sync && synchronize_seqscans); } @@ -1313,6 +1316,10 @@ heap_openrv_extended(const RangeVar *relation, LOCKMODE lockmode, /* ---------------- * heap_beginscan - begin relation scan * + * heap_beginscan is the "standard" case. + * + * heap_beginscan_catalog differs in setting up its own temporary snapshot. + * * heap_beginscan_strat offers an extended API that lets the caller control * whether a nondefault buffer access strategy can be used, and whether * syncscan can be chosen (possibly resulting in the scan not starting from @@ -1323,8 +1330,11 @@ heap_openrv_extended(const RangeVar *relation, LOCKMODE lockmode, * really quite unlike a standard seqscan, there is just enough commonality * to make it worth using the same data structure. * - * heap_beginscan_samplingscan is alternate entry point for setting up a - * HeapScanDesc for a TABLESAMPLE scan. + * heap_beginscan_sampling is an alternative entry point for setting up a + * HeapScanDesc for a TABLESAMPLE scan. As with bitmap scans, it's worth + * using the same data structure although the behavior is rather different. + * In addition to the options offered by heap_beginscan_strat, this call + * also allows control of whether page-mode visibility checking is used. * ---------------- */ HeapScanDesc @@ -1366,18 +1376,22 @@ heap_beginscan_bm(Relation relation, Snapshot snapshot, HeapScanDesc heap_beginscan_sampling(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, - bool allow_strat, bool allow_pagemode) + bool allow_strat, bool allow_sync, bool allow_pagemode) { return heap_beginscan_internal(relation, snapshot, nkeys, key, - allow_strat, false, allow_pagemode, + allow_strat, allow_sync, allow_pagemode, false, true, false); } static HeapScanDesc heap_beginscan_internal(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, - bool allow_strat, bool allow_sync, bool allow_pagemode, - bool is_bitmapscan, bool is_samplescan, bool temp_snap) + bool allow_strat, + bool allow_sync, + bool allow_pagemode, + bool is_bitmapscan, + bool is_samplescan, + bool temp_snap) { HeapScanDesc scan; @@ -1462,6 +1476,27 @@ heap_rescan(HeapScanDesc scan, } /* ---------------- + * heap_rescan_set_params - restart a relation scan after changing params + * + * This call allows changing the buffer strategy, syncscan, and pagemode + * options before starting a fresh scan. Note that although the actual use + * of syncscan might change (effectively, enabling or disabling reporting), + * the previously selected startblock will be kept. + * ---------------- + */ +void +heap_rescan_set_params(HeapScanDesc scan, ScanKey key, + bool allow_strat, bool allow_sync, bool allow_pagemode) +{ + /* adjust parameters */ + scan->rs_allow_strat = allow_strat; + scan->rs_allow_sync = allow_sync; + scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(scan->rs_snapshot); + /* ... and rescan */ + heap_rescan(scan, key); +} + +/* ---------------- * heap_endscan - end relation scan * * See how to integrate with index scans. diff --git a/src/backend/access/tablesample/Makefile b/src/backend/access/tablesample/Makefile index 46eeb59f9c4..68d9ab28147 100644 --- a/src/backend/access/tablesample/Makefile +++ b/src/backend/access/tablesample/Makefile @@ -1,10 +1,10 @@ #------------------------------------------------------------------------- # # Makefile-- -# Makefile for utils/tablesample +# Makefile for access/tablesample # # IDENTIFICATION -# src/backend/utils/tablesample/Makefile +# src/backend/access/tablesample/Makefile # #------------------------------------------------------------------------- @@ -12,6 +12,6 @@ subdir = src/backend/access/tablesample top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = tablesample.o system.o bernoulli.o +OBJS = bernoulli.o system.o tablesample.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/access/tablesample/bernoulli.c b/src/backend/access/tablesample/bernoulli.c index 0a539008221..cf88f95e757 100644 --- a/src/backend/access/tablesample/bernoulli.c +++ b/src/backend/access/tablesample/bernoulli.c @@ -1,233 +1,231 @@ /*------------------------------------------------------------------------- * * bernoulli.c - * interface routines for BERNOULLI tablesample method + * support routines for BERNOULLI tablesample method * - * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group + * To ensure repeatability of samples, it is necessary that selection of a + * given tuple be history-independent; otherwise syncscanning would break + * repeatability, to say nothing of logically-irrelevant maintenance such + * as physical extension or shortening of the relation. + * + * To achieve that, we proceed by hashing each candidate TID together with + * the active seed, and then selecting it if the hash is less than the + * cutoff value computed from the selection probability by BeginSampleScan. + * + * + * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * src/backend/utils/tablesample/bernoulli.c + * src/backend/access/tablesample/bernoulli.c * *------------------------------------------------------------------------- */ #include "postgres.h" -#include "fmgr.h" +#ifdef _MSC_VER +#include <float.h> /* for _isnan */ +#endif +#include <math.h> -#include "access/tablesample.h" -#include "access/relscan.h" -#include "nodes/execnodes.h" -#include "nodes/relation.h" +#include "access/hash.h" +#include "access/tsmapi.h" +#include "catalog/pg_type.h" #include "optimizer/clauses.h" -#include "storage/bufmgr.h" -#include "utils/sampling.h" +#include "optimizer/cost.h" +#include "utils/builtins.h" -/* tsdesc */ +/* Private state */ typedef struct { + uint64 cutoff; /* select tuples with hash less than this */ uint32 seed; /* random seed */ - BlockNumber startblock; /* starting block, we use ths for syncscan - * support */ - BlockNumber nblocks; /* number of blocks */ - BlockNumber blockno; /* current block */ - float4 probability; /* probabilty that tuple will be returned - * (0.0-1.0) */ OffsetNumber lt; /* last tuple returned from current block */ - SamplerRandomState randstate; /* random generator tsdesc */ } BernoulliSamplerData; + +static void bernoulli_samplescangetsamplesize(PlannerInfo *root, + RelOptInfo *baserel, + List *paramexprs, + BlockNumber *pages, + double *tuples); +static void bernoulli_initsamplescan(SampleScanState *node, + int eflags); +static void bernoulli_beginsamplescan(SampleScanState *node, + Datum *params, + int nparams, + uint32 seed); +static OffsetNumber bernoulli_nextsampletuple(SampleScanState *node, + BlockNumber blockno, + OffsetNumber maxoffset); + + /* - * Initialize the state. + * Create a TsmRoutine descriptor for the BERNOULLI method. */ Datum -tsm_bernoulli_init(PG_FUNCTION_ARGS) +tsm_bernoulli_handler(PG_FUNCTION_ARGS) { - TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); - uint32 seed = PG_GETARG_UINT32(1); - float4 percent = PG_ARGISNULL(2) ? -1 : PG_GETARG_FLOAT4(2); - HeapScanDesc scan = tsdesc->heapScan; - BernoulliSamplerData *sampler; + TsmRoutine *tsm = makeNode(TsmRoutine); + + tsm->parameterTypes = list_make1_oid(FLOAT4OID); + tsm->repeatable_across_queries = true; + tsm->repeatable_across_scans = true; + tsm->SampleScanGetSampleSize = bernoulli_samplescangetsamplesize; + tsm->InitSampleScan = bernoulli_initsamplescan; + tsm->BeginSampleScan = bernoulli_beginsamplescan; + tsm->NextSampleBlock = NULL; + tsm->NextSampleTuple = bernoulli_nextsampletuple; + tsm->EndSampleScan = NULL; + + PG_RETURN_POINTER(tsm); +} - if (percent < 0 || percent > 100) - ereport(ERROR, - (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("invalid sample size"), - errhint("Sample size must be numeric value between 0 and 100 (inclusive)."))); +/* + * Sample size estimation. + */ +static void +bernoulli_samplescangetsamplesize(PlannerInfo *root, + RelOptInfo *baserel, + List *paramexprs, + BlockNumber *pages, + double *tuples) +{ + Node *pctnode; + float4 samplefract; - sampler = palloc0(sizeof(BernoulliSamplerData)); + /* Try to extract an estimate for the sample percentage */ + pctnode = (Node *) linitial(paramexprs); + pctnode = estimate_expression_value(root, pctnode); - /* Remember initial values for reinit */ - sampler->seed = seed; - sampler->startblock = scan->rs_startblock; - sampler->nblocks = scan->rs_nblocks; - sampler->blockno = InvalidBlockNumber; - sampler->probability = percent / 100; - sampler->lt = InvalidOffsetNumber; - sampler_random_init_state(sampler->seed, sampler->randstate); + if (IsA(pctnode, Const) && + !((Const *) pctnode)->constisnull) + { + samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue); + if (samplefract >= 0 && samplefract <= 100 && !isnan(samplefract)) + samplefract /= 100.0f; + else + { + /* Default samplefract if the value is bogus */ + samplefract = 0.1f; + } + } + else + { + /* Default samplefract if we didn't obtain a non-null Const */ + samplefract = 0.1f; + } + + /* We'll visit all pages of the baserel */ + *pages = baserel->pages; - tsdesc->tsmdata = (void *) sampler; + *tuples = clamp_row_est(baserel->tuples * samplefract); +} - PG_RETURN_VOID(); +/* + * Initialize during executor setup. + */ +static void +bernoulli_initsamplescan(SampleScanState *node, int eflags) +{ + node->tsm_state = palloc0(sizeof(BernoulliSamplerData)); } /* - * Get next block number to read or InvalidBlockNumber if we are at the - * end of the relation. + * Examine parameters and prepare for a sample scan. */ -Datum -tsm_bernoulli_nextblock(PG_FUNCTION_ARGS) +static void +bernoulli_beginsamplescan(SampleScanState *node, + Datum *params, + int nparams, + uint32 seed) { - TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); - BernoulliSamplerData *sampler = (BernoulliSamplerData *) tsdesc->tsmdata; + BernoulliSamplerData *sampler = (BernoulliSamplerData *) node->tsm_state; + double percent = DatumGetFloat4(params[0]); + + if (percent < 0 || percent > 100 || isnan(percent)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT), + errmsg("sample percentage must be between 0 and 100"))); /* - * Bernoulli sampling scans all blocks on the table and supports syncscan - * so loop from startblock to startblock instead of from 0 to nblocks. + * The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to + * store that as a uint64, of course. Note that this gives strictly + * correct behavior at the limits of zero or one probability. */ - if (sampler->blockno == InvalidBlockNumber) - sampler->blockno = sampler->startblock; - else - { - sampler->blockno++; - - if (sampler->blockno >= sampler->nblocks) - sampler->blockno = 0; - - if (sampler->blockno == sampler->startblock) - PG_RETURN_UINT32(InvalidBlockNumber); - } + sampler->cutoff = rint(((double) PG_UINT32_MAX + 1) * percent / 100); + sampler->seed = seed; + sampler->lt = InvalidOffsetNumber; - PG_RETURN_UINT32(sampler->blockno); + /* + * Use bulkread, since we're scanning all pages. But pagemode visibility + * checking is a win only at larger sampling fractions. The 25% cutoff + * here is based on very limited experimentation. + */ + node->use_bulkread = true; + node->use_pagemode = (percent >= 25); } /* - * Get next tuple from current block. - * - * This method implements the main logic in bernoulli sampling. - * The algorithm simply generates new random number (in 0.0-1.0 range) and if - * it falls within user specified probability (in the same range) return the - * tuple offset. - * - * It is ok here to return tuple offset without knowing if tuple is visible - * and not check it via examinetuple. The reason for that is that we do the - * coinflip (random number generation) for every tuple in the table. Since all - * tuples have same probability of being returned the visible and invisible - * tuples will be returned in same ratio as they have in the actual table. - * This means that there is no skew towards either visible or invisible tuples - * and the number of visible tuples returned from the executor node should - * match the fraction of visible tuples which was specified by user. + * Select next sampled tuple in current block. * - * This is faster than doing the coinflip in examinetuple because we don't - * have to do visibility checks on uninteresting tuples. + * It is OK here to return an offset without knowing if the tuple is visible + * (or even exists). The reason is that we do the coinflip for every tuple + * offset in the table. Since all tuples have the same probability of being + * returned, it doesn't matter if we do extra coinflips for invisible tuples. * - * If we reach end of the block return InvalidOffsetNumber which tells + * When we reach end of the block, return InvalidOffsetNumber which tells * SampleScan to go to next block. */ -Datum -tsm_bernoulli_nexttuple(PG_FUNCTION_ARGS) +static OffsetNumber +bernoulli_nextsampletuple(SampleScanState *node, + BlockNumber blockno, + OffsetNumber maxoffset) { - TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); - OffsetNumber maxoffset = PG_GETARG_UINT16(2); - BernoulliSamplerData *sampler = (BernoulliSamplerData *) tsdesc->tsmdata; + BernoulliSamplerData *sampler = (BernoulliSamplerData *) node->tsm_state; OffsetNumber tupoffset = sampler->lt; - float4 probability = sampler->probability; + uint32 hashinput[3]; + /* Advance to first/next tuple in block */ if (tupoffset == InvalidOffsetNumber) tupoffset = FirstOffsetNumber; else tupoffset++; /* - * Loop over tuple offsets until the random generator returns value that - * is within the probability of returning the tuple or until we reach end - * of the block. + * We compute the hash by applying hash_any to an array of 3 uint32's + * containing the block, offset, and seed. This is efficient to set up, + * and with the current implementation of hash_any, it gives + * machine-independent results, which is a nice property for regression + * testing. * - * (This is our implementation of bernoulli trial) + * These words in the hash input are the same throughout the block: */ - while (sampler_random_fract(sampler->randstate) > probability) + hashinput[0] = blockno; + hashinput[2] = sampler->seed; + + /* + * Loop over tuple offsets until finding suitable TID or reaching end of + * block. + */ + for (; tupoffset <= maxoffset; tupoffset++) { - tupoffset++; + uint32 hash; - if (tupoffset > maxoffset) + hashinput[1] = tupoffset; + + hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput, + (int) sizeof(hashinput))); + if (hash < sampler->cutoff) break; } if (tupoffset > maxoffset) - /* Tell SampleScan that we want next block. */ tupoffset = InvalidOffsetNumber; sampler->lt = tupoffset; - PG_RETURN_UINT16(tupoffset); -} - -/* - * Cleanup method. - */ -Datum -tsm_bernoulli_end(PG_FUNCTION_ARGS) -{ - TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); - - pfree(tsdesc->tsmdata); - - PG_RETURN_VOID(); -} - -/* - * Reset tsdesc (called by ReScan). - */ -Datum -tsm_bernoulli_reset(PG_FUNCTION_ARGS) -{ - TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); - BernoulliSamplerData *sampler = (BernoulliSamplerData *) tsdesc->tsmdata; - - sampler->blockno = InvalidBlockNumber; - sampler->lt = InvalidOffsetNumber; - sampler_random_init_state(sampler->seed, sampler->randstate); - - PG_RETURN_VOID(); -} - -/* - * Costing function. - */ -Datum -tsm_bernoulli_cost(PG_FUNCTION_ARGS) -{ - PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); - Path *path = (Path *) PG_GETARG_POINTER(1); - RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2); - List *args = (List *) PG_GETARG_POINTER(3); - BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4); - double *tuples = (double *) PG_GETARG_POINTER(5); - Node *pctnode; - float4 samplesize; - - *pages = baserel->pages; - - pctnode = linitial(args); - pctnode = estimate_expression_value(root, pctnode); - - if (IsA(pctnode, RelabelType)) - pctnode = (Node *) ((RelabelType *) pctnode)->arg; - - if (IsA(pctnode, Const)) - { - samplesize = DatumGetFloat4(((Const *) pctnode)->constvalue); - samplesize /= 100.0; - } - else - { - /* Default samplesize if the estimation didn't return Const. */ - samplesize = 0.1f; - } - - *tuples = path->rows * samplesize; - path->rows = *tuples; - - PG_RETURN_VOID(); + return tupoffset; } diff --git a/src/backend/access/tablesample/system.c b/src/backend/access/tablesample/system.c index 1d834369a4b..43c5dab7161 100644 --- a/src/backend/access/tablesample/system.c +++ b/src/backend/access/tablesample/system.c @@ -1,186 +1,260 @@ /*------------------------------------------------------------------------- * * system.c - * interface routines for system tablesample method + * support routines for SYSTEM tablesample method * + * To ensure repeatability of samples, it is necessary that selection of a + * given tuple be history-independent; otherwise syncscanning would break + * repeatability, to say nothing of logically-irrelevant maintenance such + * as physical extension or shortening of the relation. * - * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group + * To achieve that, we proceed by hashing each candidate block number together + * with the active seed, and then selecting it if the hash is less than the + * cutoff value computed from the selection probability by BeginSampleScan. + * + * + * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * src/backend/utils/tablesample/system.c + * src/backend/access/tablesample/system.c * *------------------------------------------------------------------------- */ #include "postgres.h" -#include "fmgr.h" +#ifdef _MSC_VER +#include <float.h> /* for _isnan */ +#endif +#include <math.h> -#include "access/tablesample.h" +#include "access/hash.h" #include "access/relscan.h" -#include "nodes/execnodes.h" -#include "nodes/relation.h" +#include "access/tsmapi.h" +#include "catalog/pg_type.h" #include "optimizer/clauses.h" -#include "storage/bufmgr.h" -#include "utils/sampling.h" +#include "optimizer/cost.h" +#include "utils/builtins.h" -/* - * State - */ +/* Private state */ typedef struct { - BlockSamplerData bs; + uint64 cutoff; /* select blocks with hash less than this */ uint32 seed; /* random seed */ - BlockNumber nblocks; /* number of block in relation */ - int samplesize; /* number of blocks to return */ + BlockNumber nextblock; /* next block to consider sampling */ OffsetNumber lt; /* last tuple returned from current block */ } SystemSamplerData; -/* - * Initializes the state. - */ -Datum -tsm_system_init(PG_FUNCTION_ARGS) -{ - TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); - uint32 seed = PG_GETARG_UINT32(1); - float4 percent = PG_ARGISNULL(2) ? -1 : PG_GETARG_FLOAT4(2); - HeapScanDesc scan = tsdesc->heapScan; - SystemSamplerData *sampler; +static void system_samplescangetsamplesize(PlannerInfo *root, + RelOptInfo *baserel, + List *paramexprs, + BlockNumber *pages, + double *tuples); +static void system_initsamplescan(SampleScanState *node, + int eflags); +static void system_beginsamplescan(SampleScanState *node, + Datum *params, + int nparams, + uint32 seed); +static BlockNumber system_nextsampleblock(SampleScanState *node); +static OffsetNumber system_nextsampletuple(SampleScanState *node, + BlockNumber blockno, + OffsetNumber maxoffset); - if (percent < 0 || percent > 100) - ereport(ERROR, - (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("invalid sample size"), - errhint("Sample size must be numeric value between 0 and 100 (inclusive)."))); - - sampler = palloc0(sizeof(SystemSamplerData)); - - /* Remember initial values for reinit */ - sampler->seed = seed; - sampler->nblocks = scan->rs_nblocks; - sampler->samplesize = 1 + (int) (sampler->nblocks * (percent / 100.0)); - sampler->lt = InvalidOffsetNumber; - - BlockSampler_Init(&sampler->bs, sampler->nblocks, sampler->samplesize, - sampler->seed); - - tsdesc->tsmdata = (void *) sampler; - - PG_RETURN_VOID(); -} /* - * Get next block number or InvalidBlockNumber when we're done. - * - * Uses the same logic as ANALYZE for picking the random blocks. + * Create a TsmRoutine descriptor for the SYSTEM method. */ Datum -tsm_system_nextblock(PG_FUNCTION_ARGS) +tsm_system_handler(PG_FUNCTION_ARGS) { - TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); - SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata; - BlockNumber blockno; - - if (!BlockSampler_HasMore(&sampler->bs)) - PG_RETURN_UINT32(InvalidBlockNumber); - - blockno = BlockSampler_Next(&sampler->bs); - - PG_RETURN_UINT32(blockno); + TsmRoutine *tsm = makeNode(TsmRoutine); + + tsm->parameterTypes = list_make1_oid(FLOAT4OID); + tsm->repeatable_across_queries = true; + tsm->repeatable_across_scans = true; + tsm->SampleScanGetSampleSize = system_samplescangetsamplesize; + tsm->InitSampleScan = system_initsamplescan; + tsm->BeginSampleScan = system_beginsamplescan; + tsm->NextSampleBlock = system_nextsampleblock; + tsm->NextSampleTuple = system_nextsampletuple; + tsm->EndSampleScan = NULL; + + PG_RETURN_POINTER(tsm); } /* - * Get next tuple offset in current block or InvalidOffsetNumber if we are done - * with this block. + * Sample size estimation. */ -Datum -tsm_system_nexttuple(PG_FUNCTION_ARGS) +static void +system_samplescangetsamplesize(PlannerInfo *root, + RelOptInfo *baserel, + List *paramexprs, + BlockNumber *pages, + double *tuples) { - TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); - OffsetNumber maxoffset = PG_GETARG_UINT16(2); - SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata; - OffsetNumber tupoffset = sampler->lt; + Node *pctnode; + float4 samplefract; - if (tupoffset == InvalidOffsetNumber) - tupoffset = FirstOffsetNumber; - else - tupoffset++; + /* Try to extract an estimate for the sample percentage */ + pctnode = (Node *) linitial(paramexprs); + pctnode = estimate_expression_value(root, pctnode); - if (tupoffset > maxoffset) - tupoffset = InvalidOffsetNumber; + if (IsA(pctnode, Const) && + !((Const *) pctnode)->constisnull) + { + samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue); + if (samplefract >= 0 && samplefract <= 100 && !isnan(samplefract)) + samplefract /= 100.0f; + else + { + /* Default samplefract if the value is bogus */ + samplefract = 0.1f; + } + } + else + { + /* Default samplefract if we didn't obtain a non-null Const */ + samplefract = 0.1f; + } - sampler->lt = tupoffset; + /* We'll visit a sample of the pages ... */ + *pages = clamp_row_est(baserel->pages * samplefract); - PG_RETURN_UINT16(tupoffset); + /* ... and hopefully get a representative number of tuples from them */ + *tuples = clamp_row_est(baserel->tuples * samplefract); } /* - * Cleanup method. + * Initialize during executor setup. */ -Datum -tsm_system_end(PG_FUNCTION_ARGS) +static void +system_initsamplescan(SampleScanState *node, int eflags) { - TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); - - pfree(tsdesc->tsmdata); - - PG_RETURN_VOID(); + node->tsm_state = palloc0(sizeof(SystemSamplerData)); } /* - * Reset state (called by ReScan). + * Examine parameters and prepare for a sample scan. */ -Datum -tsm_system_reset(PG_FUNCTION_ARGS) +static void +system_beginsamplescan(SampleScanState *node, + Datum *params, + int nparams, + uint32 seed) { - TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); - SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata; + SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state; + double percent = DatumGetFloat4(params[0]); + if (percent < 0 || percent > 100 || isnan(percent)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT), + errmsg("sample percentage must be between 0 and 100"))); + + /* + * The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to + * store that as a uint64, of course. Note that this gives strictly + * correct behavior at the limits of zero or one probability. + */ + sampler->cutoff = rint(((double) PG_UINT32_MAX + 1) * percent / 100); + sampler->seed = seed; + sampler->nextblock = 0; sampler->lt = InvalidOffsetNumber; - BlockSampler_Init(&sampler->bs, sampler->nblocks, sampler->samplesize, - sampler->seed); - PG_RETURN_VOID(); + /* + * Bulkread buffer access strategy probably makes sense unless we're + * scanning a very small fraction of the table. The 1% cutoff here is a + * guess. We should use pagemode visibility checking, since we scan all + * tuples on each selected page. + */ + node->use_bulkread = (percent >= 1); + node->use_pagemode = true; } /* - * Costing function. + * Select next block to sample. */ -Datum -tsm_system_cost(PG_FUNCTION_ARGS) +static BlockNumber +system_nextsampleblock(SampleScanState *node) { - PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); - Path *path = (Path *) PG_GETARG_POINTER(1); - RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2); - List *args = (List *) PG_GETARG_POINTER(3); - BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4); - double *tuples = (double *) PG_GETARG_POINTER(5); - Node *pctnode; - float4 samplesize; + SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state; + HeapScanDesc scan = node->ss.ss_currentScanDesc; + BlockNumber nextblock = sampler->nextblock; + uint32 hashinput[2]; + + /* + * We compute the hash by applying hash_any to an array of 2 uint32's + * containing the block number and seed. This is efficient to set up, and + * with the current implementation of hash_any, it gives + * machine-independent results, which is a nice property for regression + * testing. + * + * These words in the hash input are the same throughout the block: + */ + hashinput[1] = sampler->seed; + + /* + * Loop over block numbers until finding suitable block or reaching end of + * relation. + */ + for (; nextblock < scan->rs_nblocks; nextblock++) + { + uint32 hash; - pctnode = linitial(args); - pctnode = estimate_expression_value(root, pctnode); + hashinput[0] = nextblock; - if (IsA(pctnode, RelabelType)) - pctnode = (Node *) ((RelabelType *) pctnode)->arg; + hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput, + (int) sizeof(hashinput))); + if (hash < sampler->cutoff) + break; + } - if (IsA(pctnode, Const)) + if (nextblock < scan->rs_nblocks) { - samplesize = DatumGetFloat4(((Const *) pctnode)->constvalue); - samplesize /= 100.0; + /* Found a suitable block; remember where we should start next time */ + sampler->nextblock = nextblock + 1; + return nextblock; } + + /* Done, but let's reset nextblock to 0 for safety. */ + sampler->nextblock = 0; + return InvalidBlockNumber; +} + +/* + * Select next sampled tuple in current block. + * + * In block sampling, we just want to sample all the tuples in each selected + * block. + * + * It is OK here to return an offset without knowing if the tuple is visible + * (or even exists); nodeSamplescan.c will deal with that. + * + * When we reach end of the block, return InvalidOffsetNumber which tells + * SampleScan to go to next block. + */ +static OffsetNumber +system_nextsampletuple(SampleScanState *node, + BlockNumber blockno, + OffsetNumber maxoffset) +{ + SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state; + OffsetNumber tupoffset = sampler->lt; + + /* Advance to next possible offset on page */ + if (tupoffset == InvalidOffsetNumber) + tupoffset = FirstOffsetNumber; else - { - /* Default samplesize if the estimation didn't return Const. */ - samplesize = 0.1f; - } + tupoffset++; - *pages = baserel->pages * samplesize; - *tuples = path->rows * samplesize; - path->rows = *tuples; + /* Done? */ + if (tupoffset > maxoffset) + tupoffset = InvalidOffsetNumber; + + sampler->lt = tupoffset; - PG_RETURN_VOID(); + return tupoffset; } diff --git a/src/backend/access/tablesample/tablesample.c b/src/backend/access/tablesample/tablesample.c index f21d42c8e38..b8ad7ced743 100644 --- a/src/backend/access/tablesample/tablesample.c +++ b/src/backend/access/tablesample/tablesample.c @@ -1,7 +1,7 @@ /*------------------------------------------------------------------------- * * tablesample.c - * TABLESAMPLE internal API + * Support functions for TABLESAMPLE feature * * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -10,356 +10,31 @@ * IDENTIFICATION * src/backend/access/tablesample/tablesample.c * - * TABLESAMPLE is the SQL standard clause for sampling the relations. - * - * The API is interface between the Executor and the TABLESAMPLE Methods. - * - * TABLESAMPLE Methods are implementations of actual sampling algorithms which - * can be used for returning a sample of the source relation. - * Methods don't read the table directly but are asked for block number and - * tuple offset which they want to examine (or return) and the tablesample - * interface implemented here does the reading for them. - * - * We currently only support sampling of the physical relations, but in the - * future we might extend the API to support subqueries as well. - * * ------------------------------------------------------------------------- */ #include "postgres.h" -#include "access/tablesample.h" - -#include "catalog/pg_tablesample_method.h" -#include "miscadmin.h" -#include "pgstat.h" -#include "storage/bufmgr.h" -#include "storage/predicate.h" -#include "utils/rel.h" -#include "utils/tqual.h" - - -static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan); - - -/* - * Initialize the TABLESAMPLE Descriptor and the TABLESAMPLE Method. - */ -TableSampleDesc * -tablesample_init(SampleScanState *scanstate, TableSampleClause *tablesample) -{ - FunctionCallInfoData fcinfo; - int i; - List *args = tablesample->args; - ListCell *arg; - ExprContext *econtext = scanstate->ss.ps.ps_ExprContext; - TableSampleDesc *tsdesc = (TableSampleDesc *) palloc0(sizeof(TableSampleDesc)); - - /* Load functions */ - fmgr_info(tablesample->tsminit, &(tsdesc->tsminit)); - fmgr_info(tablesample->tsmnextblock, &(tsdesc->tsmnextblock)); - fmgr_info(tablesample->tsmnexttuple, &(tsdesc->tsmnexttuple)); - if (OidIsValid(tablesample->tsmexaminetuple)) - fmgr_info(tablesample->tsmexaminetuple, &(tsdesc->tsmexaminetuple)); - else - tsdesc->tsmexaminetuple.fn_oid = InvalidOid; - fmgr_info(tablesample->tsmreset, &(tsdesc->tsmreset)); - fmgr_info(tablesample->tsmend, &(tsdesc->tsmend)); - - InitFunctionCallInfoData(fcinfo, &tsdesc->tsminit, - list_length(args) + 2, - InvalidOid, NULL, NULL); - - tsdesc->tupDesc = scanstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor; - tsdesc->heapScan = scanstate->ss.ss_currentScanDesc; - - /* First argument for init function is always TableSampleDesc */ - fcinfo.arg[0] = PointerGetDatum(tsdesc); - fcinfo.argnull[0] = false; +#include "access/tsmapi.h" - /* - * Second arg for init function is always REPEATABLE. - * - * If tablesample->repeatable is NULL then REPEATABLE clause was not - * specified, and we insert a random value as default. - * - * When specified, the expression cannot evaluate to NULL. - */ - if (tablesample->repeatable) - { - ExprState *argstate = ExecInitExpr((Expr *) tablesample->repeatable, - (PlanState *) scanstate); - - fcinfo.arg[1] = ExecEvalExpr(argstate, econtext, - &fcinfo.argnull[1], NULL); - if (fcinfo.argnull[1]) - ereport(ERROR, - (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), - errmsg("REPEATABLE clause must be NOT NULL numeric value"))); - } - else - { - fcinfo.arg[1] = UInt32GetDatum(random()); - fcinfo.argnull[1] = false; - } - - /* Rest of the arguments come from user. */ - i = 2; - foreach(arg, args) - { - Expr *argexpr = (Expr *) lfirst(arg); - ExprState *argstate = ExecInitExpr(argexpr, (PlanState *) scanstate); - - fcinfo.arg[i] = ExecEvalExpr(argstate, econtext, - &fcinfo.argnull[i], NULL); - i++; - } - Assert(i == fcinfo.nargs); - - (void) FunctionCallInvoke(&fcinfo); - - return tsdesc; -} /* - * Get next tuple from TABLESAMPLE Method. - */ -HeapTuple -tablesample_getnext(TableSampleDesc *desc) -{ - HeapScanDesc scan = desc->heapScan; - HeapTuple tuple = &(scan->rs_ctup); - bool pagemode = scan->rs_pageatatime; - BlockNumber blockno; - Page page; - bool page_all_visible; - ItemId itemid; - OffsetNumber tupoffset, - maxoffset; - - if (!scan->rs_inited) - { - /* - * return null immediately if relation is empty - */ - if (scan->rs_nblocks == 0) - { - Assert(!BufferIsValid(scan->rs_cbuf)); - tuple->t_data = NULL; - return NULL; - } - blockno = DatumGetInt32(FunctionCall1(&desc->tsmnextblock, - PointerGetDatum(desc))); - if (!BlockNumberIsValid(blockno)) - { - tuple->t_data = NULL; - return NULL; - } - - heapgetpage(scan, blockno); - scan->rs_inited = true; - } - else - { - /* continue from previously returned page/tuple */ - blockno = scan->rs_cblock; /* current page */ - } - - /* - * When pagemode is disabled, the scan will do visibility checks for each - * tuple it finds so the buffer needs to be locked. - */ - if (!pagemode) - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); - - page = (Page) BufferGetPage(scan->rs_cbuf); - page_all_visible = PageIsAllVisible(page); - maxoffset = PageGetMaxOffsetNumber(page); - - for (;;) - { - CHECK_FOR_INTERRUPTS(); - - tupoffset = DatumGetUInt16(FunctionCall3(&desc->tsmnexttuple, - PointerGetDatum(desc), - UInt32GetDatum(blockno), - UInt16GetDatum(maxoffset))); - - if (OffsetNumberIsValid(tupoffset)) - { - bool visible; - bool found; - - /* Skip invalid tuple pointers. */ - itemid = PageGetItemId(page, tupoffset); - if (!ItemIdIsNormal(itemid)) - continue; - - tuple->t_data = (HeapTupleHeader) PageGetItem((Page) page, itemid); - tuple->t_len = ItemIdGetLength(itemid); - ItemPointerSet(&(tuple->t_self), blockno, tupoffset); - - if (page_all_visible) - visible = true; - else - visible = SampleTupleVisible(tuple, tupoffset, scan); - - /* - * Let the sampling method examine the actual tuple and decide if - * we should return it. - * - * Note that we let it examine even invisible tuples for - * statistical purposes, but not return them since user should - * never see invisible tuples. - */ - if (OidIsValid(desc->tsmexaminetuple.fn_oid)) - { - found = DatumGetBool(FunctionCall4(&desc->tsmexaminetuple, - PointerGetDatum(desc), - UInt32GetDatum(blockno), - PointerGetDatum(tuple), - BoolGetDatum(visible))); - /* Should not happen if sampling method is well written. */ - if (found && !visible) - elog(ERROR, "Sampling method wanted to return invisible tuple"); - } - else - found = visible; - - /* Found visible tuple, return it. */ - if (found) - { - if (!pagemode) - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); - break; - } - else - { - /* Try next tuple from same page. */ - continue; - } - } - - - if (!pagemode) - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); - - blockno = DatumGetInt32(FunctionCall1(&desc->tsmnextblock, - PointerGetDatum(desc))); - - /* - * Report our new scan position for synchronization purposes. We don't - * do that when moving backwards, however. That would just mess up any - * other forward-moving scanners. - * - * Note: we do this before checking for end of scan so that the final - * state of the position hint is back at the start of the rel. That's - * not strictly necessary, but otherwise when you run the same query - * multiple times the starting position would shift a little bit - * backwards on every invocation, which is confusing. We don't - * guarantee any specific ordering in general, though. - */ - if (scan->rs_syncscan) - ss_report_location(scan->rs_rd, BlockNumberIsValid(blockno) ? - blockno : scan->rs_startblock); - - /* - * Reached end of scan. - */ - if (!BlockNumberIsValid(blockno)) - { - if (BufferIsValid(scan->rs_cbuf)) - ReleaseBuffer(scan->rs_cbuf); - scan->rs_cbuf = InvalidBuffer; - scan->rs_cblock = InvalidBlockNumber; - tuple->t_data = NULL; - scan->rs_inited = false; - return NULL; - } - - heapgetpage(scan, blockno); - - if (!pagemode) - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); - - page = (Page) BufferGetPage(scan->rs_cbuf); - page_all_visible = PageIsAllVisible(page); - maxoffset = PageGetMaxOffsetNumber(page); - } - - pgstat_count_heap_getnext(scan->rs_rd); - - return &(scan->rs_ctup); -} - -/* - * Reset the sampling to starting state - */ -void -tablesample_reset(TableSampleDesc *desc) -{ - (void) FunctionCall1(&desc->tsmreset, PointerGetDatum(desc)); -} - -/* - * Signal the sampling method that the scan has finished. - */ -void -tablesample_end(TableSampleDesc *desc) -{ - (void) FunctionCall1(&desc->tsmend, PointerGetDatum(desc)); -} - -/* - * Check visibility of the tuple. + * GetTsmRoutine --- get a TsmRoutine struct by invoking the handler. + * + * This is a convenience routine that's just meant to check for errors. */ -static bool -SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan) +TsmRoutine * +GetTsmRoutine(Oid tsmhandler) { - /* - * If this scan is reading whole pages at a time, there is already - * visibility info present in rs_vistuples so we can just search it for - * the tupoffset. - */ - if (scan->rs_pageatatime) - { - int start = 0, - end = scan->rs_ntuples - 1; - - /* - * Do the binary search over rs_vistuples, it's already sorted by - * OffsetNumber so we don't need to do any sorting ourselves here. - * - * We could use bsearch() here but it's slower for integers because of - * the function call overhead and because it needs boiler plate code - * it would not save us anything code-wise anyway. - */ - while (start <= end) - { - int mid = start + (end - start) / 2; - OffsetNumber curoffset = scan->rs_vistuples[mid]; - - if (curoffset == tupoffset) - return true; - else if (curoffset > tupoffset) - end = mid - 1; - else - start = mid + 1; - } - - return false; - } - else - { - /* No pagemode, we have to check the tuple itself. */ - Snapshot snapshot = scan->rs_snapshot; - Buffer buffer = scan->rs_cbuf; + Datum datum; + TsmRoutine *routine; - bool visible = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer); + datum = OidFunctionCall1(tsmhandler, PointerGetDatum(NULL)); + routine = (TsmRoutine *) DatumGetPointer(datum); - CheckForSerializableConflictOut(visible, scan->rs_rd, tuple, buffer, - snapshot); + if (routine == NULL || !IsA(routine, TsmRoutine)) + elog(ERROR, "tablesample handler function %u did not return a TsmRoutine struct", + tsmhandler); - return visible; - } + return routine; } diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile index 3d1139b5ba0..25130ecf124 100644 --- a/src/backend/catalog/Makefile +++ b/src/backend/catalog/Makefile @@ -40,8 +40,9 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\ pg_ts_parser.h pg_ts_template.h pg_extension.h \ pg_foreign_data_wrapper.h pg_foreign_server.h pg_user_mapping.h \ pg_foreign_table.h pg_policy.h pg_replication_origin.h \ - pg_tablesample_method.h pg_default_acl.h pg_seclabel.h pg_shseclabel.h \ - pg_collation.h pg_range.h pg_transform.h toasting.h indexing.h \ + pg_default_acl.h pg_seclabel.h pg_shseclabel.h \ + pg_collation.h pg_range.h pg_transform.h \ + toasting.h indexing.h \ ) # location of Catalog.pm diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 5d7c441739c..90b1cd835f8 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -1911,6 +1911,14 @@ find_expr_references_walker(Node *node, context->addrs); } } + else if (IsA(node, TableSampleClause)) + { + TableSampleClause *tsc = (TableSampleClause *) node; + + add_object_address(OCLASS_PROC, tsc->tsmhandler, 0, + context->addrs); + /* fall through to examine arguments */ + } return expression_tree_walker(node, find_expr_references_walker, (void *) context); diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 0d1ecc2a3ed..5d06fa4ea65 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -96,6 +96,8 @@ static void show_sort_group_keys(PlanState *planstate, const char *qlabel, List *ancestors, ExplainState *es); static void show_sortorder_options(StringInfo buf, Node *sortexpr, Oid sortOperator, Oid collation, bool nullsFirst); +static void show_tablesample(TableSampleClause *tsc, PlanState *planstate, + List *ancestors, ExplainState *es); static void show_sort_info(SortState *sortstate, ExplainState *es); static void show_hash_info(HashState *hashstate, ExplainState *es); static void show_tidbitmap_info(BitmapHeapScanState *planstate, @@ -116,7 +118,7 @@ static void ExplainMemberNodes(List *plans, PlanState **planstates, static void ExplainSubPlans(List *plans, List *ancestors, const char *relationship, ExplainState *es); static void ExplainCustomChildren(CustomScanState *css, - List *ancestors, ExplainState *es); + List *ancestors, ExplainState *es); static void ExplainProperty(const char *qlabel, const char *value, bool numeric, ExplainState *es); static void ExplainOpenGroup(const char *objtype, const char *labelname, @@ -730,6 +732,7 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used) switch (nodeTag(plan)) { case T_SeqScan: + case T_SampleScan: case T_IndexScan: case T_IndexOnlyScan: case T_BitmapHeapScan: @@ -739,7 +742,6 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used) case T_ValuesScan: case T_CteScan: case T_WorkTableScan: - case T_SampleScan: *rels_used = bms_add_member(*rels_used, ((Scan *) plan)->scanrelid); break; @@ -935,6 +937,9 @@ ExplainNode(PlanState *planstate, List *ancestors, case T_SeqScan: pname = sname = "Seq Scan"; break; + case T_SampleScan: + pname = sname = "Sample Scan"; + break; case T_IndexScan: pname = sname = "Index Scan"; break; @@ -976,23 +981,6 @@ ExplainNode(PlanState *planstate, List *ancestors, else pname = sname; break; - case T_SampleScan: - { - /* - * Fetch the tablesample method name from RTE. - * - * It would be nice to also show parameters, but since we - * support arbitrary expressions as parameter it might get - * quite messy. - */ - RangeTblEntry *rte; - - rte = rt_fetch(((SampleScan *) plan)->scanrelid, es->rtable); - custom_name = get_tablesample_method_name(rte->tablesample->tsmid); - pname = psprintf("Sample Scan (%s)", custom_name); - sname = "Sample Scan"; - } - break; case T_Material: pname = sname = "Materialize"; break; @@ -1101,6 +1089,7 @@ ExplainNode(PlanState *planstate, List *ancestors, switch (nodeTag(plan)) { case T_SeqScan: + case T_SampleScan: case T_BitmapHeapScan: case T_TidScan: case T_SubqueryScan: @@ -1115,9 +1104,6 @@ ExplainNode(PlanState *planstate, List *ancestors, if (((Scan *) plan)->scanrelid > 0) ExplainScanTarget((Scan *) plan, es); break; - case T_SampleScan: - ExplainScanTarget((Scan *) plan, es); - break; case T_IndexScan: { IndexScan *indexscan = (IndexScan *) plan; @@ -1363,12 +1349,15 @@ ExplainNode(PlanState *planstate, List *ancestors, if (es->analyze) show_tidbitmap_info((BitmapHeapScanState *) planstate, es); break; + case T_SampleScan: + show_tablesample(((SampleScan *) plan)->tablesample, + planstate, ancestors, es); + /* FALL THRU to print additional fields the same as SeqScan */ case T_SeqScan: case T_ValuesScan: case T_CteScan: case T_WorkTableScan: case T_SubqueryScan: - case T_SampleScan: show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, @@ -2110,6 +2099,72 @@ show_sortorder_options(StringInfo buf, Node *sortexpr, } /* + * Show TABLESAMPLE properties + */ +static void +show_tablesample(TableSampleClause *tsc, PlanState *planstate, + List *ancestors, ExplainState *es) +{ + List *context; + bool useprefix; + char *method_name; + List *params = NIL; + char *repeatable; + ListCell *lc; + + /* Set up deparsing context */ + context = set_deparse_context_planstate(es->deparse_cxt, + (Node *) planstate, + ancestors); + useprefix = list_length(es->rtable) > 1; + + /* Get the tablesample method name */ + method_name = get_func_name(tsc->tsmhandler); + + /* Deparse parameter expressions */ + foreach(lc, tsc->args) + { + Node *arg = (Node *) lfirst(lc); + + params = lappend(params, + deparse_expression(arg, context, + useprefix, false)); + } + if (tsc->repeatable) + repeatable = deparse_expression((Node *) tsc->repeatable, context, + useprefix, false); + else + repeatable = NULL; + + /* Print results */ + if (es->format == EXPLAIN_FORMAT_TEXT) + { + bool first = true; + + appendStringInfoSpaces(es->str, es->indent * 2); + appendStringInfo(es->str, "Sampling: %s (", method_name); + foreach(lc, params) + { + if (!first) + appendStringInfoString(es->str, ", "); + appendStringInfoString(es->str, (const char *) lfirst(lc)); + first = false; + } + appendStringInfoChar(es->str, ')'); + if (repeatable) + appendStringInfo(es->str, " REPEATABLE (%s)", repeatable); + appendStringInfoChar(es->str, '\n'); + } + else + { + ExplainPropertyText("Sampling Method", method_name, es); + ExplainPropertyList("Sampling Parameters", params, es); + if (repeatable) + ExplainPropertyText("Repeatable Seed", repeatable, es); + } +} + +/* * If it's EXPLAIN ANALYZE, show tuplesort stats for a sort node */ static void @@ -2366,13 +2421,13 @@ ExplainTargetRel(Plan *plan, Index rti, ExplainState *es) switch (nodeTag(plan)) { case T_SeqScan: + case T_SampleScan: case T_IndexScan: case T_IndexOnlyScan: case T_BitmapHeapScan: case T_TidScan: case T_ForeignScan: case T_CustomScan: - case T_SampleScan: case T_ModifyTable: /* Assert it's on a real relation */ Assert(rte->rtekind == RTE_RELATION); @@ -2663,9 +2718,9 @@ ExplainCustomChildren(CustomScanState *css, List *ancestors, ExplainState *es) { ListCell *cell; const char *label = - (list_length(css->custom_ps) != 1 ? "children" : "child"); + (list_length(css->custom_ps) != 1 ? "children" : "child"); - foreach (cell, css->custom_ps) + foreach(cell, css->custom_ps) ExplainNode((PlanState *) lfirst(cell), ancestors, label, NULL, es); } diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c index 04073d3f9f9..93e1e9a691c 100644 --- a/src/backend/executor/execAmi.c +++ b/src/backend/executor/execAmi.c @@ -463,6 +463,10 @@ ExecSupportsBackwardScan(Plan *node) case T_CteScan: return TargetListSupportsBackwardScan(node->targetlist); + case T_SampleScan: + /* Simplify life for tablesample methods by disallowing this */ + return false; + case T_IndexScan: return IndexSupportsBackwardScan(((IndexScan *) node)->indexid) && TargetListSupportsBackwardScan(node->targetlist); @@ -485,9 +489,6 @@ ExecSupportsBackwardScan(Plan *node) } return false; - case T_SampleScan: - return false; - case T_Material: case T_Sort: /* these don't evaluate tlist */ diff --git a/src/backend/executor/nodeSamplescan.c b/src/backend/executor/nodeSamplescan.c index 4c1c5237b7d..dbe84b0baa8 100644 --- a/src/backend/executor/nodeSamplescan.c +++ b/src/backend/executor/nodeSamplescan.c @@ -3,7 +3,7 @@ * nodeSamplescan.c * Support routines for sample scans of relations (table sampling). * - * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -14,22 +14,23 @@ */ #include "postgres.h" -#include "access/tablesample.h" +#include "access/hash.h" +#include "access/relscan.h" +#include "access/tsmapi.h" #include "executor/executor.h" #include "executor/nodeSamplescan.h" #include "miscadmin.h" -#include "parser/parsetree.h" #include "pgstat.h" -#include "storage/bufmgr.h" #include "storage/predicate.h" #include "utils/rel.h" -#include "utils/syscache.h" #include "utils/tqual.h" -static void InitScanRelation(SampleScanState *node, EState *estate, - int eflags, TableSampleClause *tablesample); +static void InitScanRelation(SampleScanState *node, EState *estate, int eflags); static TupleTableSlot *SampleNext(SampleScanState *node); - +static void tablesample_init(SampleScanState *scanstate); +static HeapTuple tablesample_getnext(SampleScanState *scanstate); +static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, + HeapScanDesc scan); /* ---------------------------------------------------------------- * Scan Support @@ -45,23 +46,26 @@ static TupleTableSlot *SampleNext(SampleScanState *node); static TupleTableSlot * SampleNext(SampleScanState *node) { - TupleTableSlot *slot; - TableSampleDesc *tsdesc; HeapTuple tuple; + TupleTableSlot *slot; /* - * get information from the scan state + * if this is first call within a scan, initialize */ - slot = node->ss.ss_ScanTupleSlot; - tsdesc = node->tsdesc; + if (!node->begun) + tablesample_init(node); + + /* + * get the next tuple, and store it in our result slot + */ + tuple = tablesample_getnext(node); - tuple = tablesample_getnext(tsdesc); + slot = node->ss.ss_ScanTupleSlot; if (tuple) ExecStoreTuple(tuple, /* tuple to store */ slot, /* slot to store in */ - tsdesc->heapScan->rs_cbuf, /* buffer associated - * with this tuple */ + node->ss.ss_currentScanDesc->rs_cbuf, /* tuple's buffer */ false); /* don't pfree this pointer */ else ExecClearTuple(slot); @@ -75,7 +79,10 @@ SampleNext(SampleScanState *node) static bool SampleRecheck(SampleScanState *node, TupleTableSlot *slot) { - /* No need to recheck for SampleScan */ + /* + * No need to recheck for SampleScan, since like SeqScan we don't pass any + * checkable keys to heap_beginscan. + */ return true; } @@ -103,8 +110,7 @@ ExecSampleScan(SampleScanState *node) * ---------------------------------------------------------------- */ static void -InitScanRelation(SampleScanState *node, EState *estate, int eflags, - TableSampleClause *tablesample) +InitScanRelation(SampleScanState *node, EState *estate, int eflags) { Relation currentRelation; @@ -113,19 +119,13 @@ InitScanRelation(SampleScanState *node, EState *estate, int eflags, * open that relation and acquire appropriate lock on it. */ currentRelation = ExecOpenScanRelation(estate, - ((SampleScan *) node->ss.ps.plan)->scanrelid, + ((SampleScan *) node->ss.ps.plan)->scan.scanrelid, eflags); node->ss.ss_currentRelation = currentRelation; - /* - * Even though we aren't going to do a conventional seqscan, it is useful - * to create a HeapScanDesc --- many of the fields in it are usable. - */ - node->ss.ss_currentScanDesc = - heap_beginscan_sampling(currentRelation, estate->es_snapshot, 0, NULL, - tablesample->tsmseqscan, - tablesample->tsmpagemode); + /* we won't set up the HeapScanDesc till later */ + node->ss.ss_currentScanDesc = NULL; /* and report the scan tuple slot's rowtype */ ExecAssignScanType(&node->ss, RelationGetDescr(currentRelation)); @@ -140,12 +140,11 @@ SampleScanState * ExecInitSampleScan(SampleScan *node, EState *estate, int eflags) { SampleScanState *scanstate; - RangeTblEntry *rte = rt_fetch(node->scanrelid, - estate->es_range_table); + TableSampleClause *tsc = node->tablesample; + TsmRoutine *tsm; Assert(outerPlan(node) == NULL); Assert(innerPlan(node) == NULL); - Assert(rte->tablesample != NULL); /* * create state structure @@ -165,10 +164,17 @@ ExecInitSampleScan(SampleScan *node, EState *estate, int eflags) * initialize child expressions */ scanstate->ss.ps.targetlist = (List *) - ExecInitExpr((Expr *) node->plan.targetlist, + ExecInitExpr((Expr *) node->scan.plan.targetlist, (PlanState *) scanstate); scanstate->ss.ps.qual = (List *) - ExecInitExpr((Expr *) node->plan.qual, + ExecInitExpr((Expr *) node->scan.plan.qual, + (PlanState *) scanstate); + + scanstate->args = (List *) + ExecInitExpr((Expr *) tsc->args, + (PlanState *) scanstate); + scanstate->repeatable = + ExecInitExpr(tsc->repeatable, (PlanState *) scanstate); /* @@ -180,7 +186,7 @@ ExecInitSampleScan(SampleScan *node, EState *estate, int eflags) /* * initialize scan relation */ - InitScanRelation(scanstate, estate, eflags, rte->tablesample); + InitScanRelation(scanstate, estate, eflags); scanstate->ss.ps.ps_TupFromTlist = false; @@ -190,7 +196,25 @@ ExecInitSampleScan(SampleScan *node, EState *estate, int eflags) ExecAssignResultTypeFromTL(&scanstate->ss.ps); ExecAssignScanProjectionInfo(&scanstate->ss); - scanstate->tsdesc = tablesample_init(scanstate, rte->tablesample); + /* + * If we don't have a REPEATABLE clause, select a random seed. We want to + * do this just once, since the seed shouldn't change over rescans. + */ + if (tsc->repeatable == NULL) + scanstate->seed = random(); + + /* + * Finally, initialize the TABLESAMPLE method handler. + */ + tsm = GetTsmRoutine(tsc->tsmhandler); + scanstate->tsmroutine = tsm; + scanstate->tsm_state = NULL; + + if (tsm->InitSampleScan) + tsm->InitSampleScan(scanstate, eflags); + + /* We'll do BeginSampleScan later; we can't evaluate params yet */ + scanstate->begun = false; return scanstate; } @@ -207,7 +231,8 @@ ExecEndSampleScan(SampleScanState *node) /* * Tell sampling function that we finished the scan. */ - tablesample_end(node->tsdesc); + if (node->tsmroutine->EndSampleScan) + node->tsmroutine->EndSampleScan(node); /* * Free the exprcontext @@ -223,7 +248,8 @@ ExecEndSampleScan(SampleScanState *node) /* * close heap scan */ - heap_endscan(node->ss.ss_currentScanDesc); + if (node->ss.ss_currentScanDesc) + heap_endscan(node->ss.ss_currentScanDesc); /* * close the heap relation. @@ -232,11 +258,6 @@ ExecEndSampleScan(SampleScanState *node) } /* ---------------------------------------------------------------- - * Join Support - * ---------------------------------------------------------------- - */ - -/* ---------------------------------------------------------------- * ExecReScanSampleScan * * Rescans the relation. @@ -246,12 +267,336 @@ ExecEndSampleScan(SampleScanState *node) void ExecReScanSampleScan(SampleScanState *node) { - heap_rescan(node->ss.ss_currentScanDesc, NULL); + /* Remember we need to do BeginSampleScan again (if we did it at all) */ + node->begun = false; + + ExecScanReScan(&node->ss); +} + + +/* + * Initialize the TABLESAMPLE method: evaluate params and call BeginSampleScan. + */ +static void +tablesample_init(SampleScanState *scanstate) +{ + TsmRoutine *tsm = scanstate->tsmroutine; + ExprContext *econtext = scanstate->ss.ps.ps_ExprContext; + Datum *params; + Datum datum; + bool isnull; + uint32 seed; + bool allow_sync; + int i; + ListCell *arg; + + params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum)); + + i = 0; + foreach(arg, scanstate->args) + { + ExprState *argstate = (ExprState *) lfirst(arg); + + params[i] = ExecEvalExprSwitchContext(argstate, + econtext, + &isnull, + NULL); + if (isnull) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT), + errmsg("TABLESAMPLE parameter cannot be null"))); + i++; + } + + if (scanstate->repeatable) + { + datum = ExecEvalExprSwitchContext(scanstate->repeatable, + econtext, + &isnull, + NULL); + if (isnull) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLESAMPLE_REPEAT), + errmsg("TABLESAMPLE REPEATABLE parameter cannot be null"))); + + /* + * The REPEATABLE parameter has been coerced to float8 by the parser. + * The reason for using float8 at the SQL level is that it will + * produce unsurprising results both for users used to databases that + * accept only integers in the REPEATABLE clause and for those who + * might expect that REPEATABLE works like setseed() (a float in the + * range from -1 to 1). + * + * We use hashfloat8() to convert the supplied value into a suitable + * seed. For regression-testing purposes, that has the convenient + * property that REPEATABLE(0) gives a machine-independent result. + */ + seed = DatumGetUInt32(DirectFunctionCall1(hashfloat8, datum)); + } + else + { + /* Use the seed selected by ExecInitSampleScan */ + seed = scanstate->seed; + } + + /* Set default values for params that BeginSampleScan can adjust */ + scanstate->use_bulkread = true; + scanstate->use_pagemode = true; + + /* Let tablesample method do its thing */ + tsm->BeginSampleScan(scanstate, + params, + list_length(scanstate->args), + seed); + + /* We'll use syncscan if there's no NextSampleBlock function */ + allow_sync = (tsm->NextSampleBlock == NULL); + + /* Now we can create or reset the HeapScanDesc */ + if (scanstate->ss.ss_currentScanDesc == NULL) + { + scanstate->ss.ss_currentScanDesc = + heap_beginscan_sampling(scanstate->ss.ss_currentRelation, + scanstate->ss.ps.state->es_snapshot, + 0, NULL, + scanstate->use_bulkread, + allow_sync, + scanstate->use_pagemode); + } + else + { + heap_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL, + scanstate->use_bulkread, + allow_sync, + scanstate->use_pagemode); + } + + pfree(params); + + /* And we're initialized. */ + scanstate->begun = true; +} + +/* + * Get next tuple from TABLESAMPLE method. + * + * Note: an awful lot of this is copied-and-pasted from heapam.c. It would + * perhaps be better to refactor to share more code. + */ +static HeapTuple +tablesample_getnext(SampleScanState *scanstate) +{ + TsmRoutine *tsm = scanstate->tsmroutine; + HeapScanDesc scan = scanstate->ss.ss_currentScanDesc; + HeapTuple tuple = &(scan->rs_ctup); + Snapshot snapshot = scan->rs_snapshot; + bool pagemode = scan->rs_pageatatime; + BlockNumber blockno; + Page page; + bool all_visible; + OffsetNumber maxoffset; + + if (!scan->rs_inited) + { + /* + * return null immediately if relation is empty + */ + if (scan->rs_nblocks == 0) + { + Assert(!BufferIsValid(scan->rs_cbuf)); + tuple->t_data = NULL; + return NULL; + } + if (tsm->NextSampleBlock) + { + blockno = tsm->NextSampleBlock(scanstate); + if (!BlockNumberIsValid(blockno)) + { + tuple->t_data = NULL; + return NULL; + } + } + else + blockno = scan->rs_startblock; + Assert(blockno < scan->rs_nblocks); + heapgetpage(scan, blockno); + scan->rs_inited = true; + } + else + { + /* continue from previously returned page/tuple */ + blockno = scan->rs_cblock; /* current page */ + } /* - * Tell sampling function to reset its state for rescan. + * When not using pagemode, we must lock the buffer during tuple + * visibility checks. */ - tablesample_reset(node->tsdesc); + if (!pagemode) + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); + + page = (Page) BufferGetPage(scan->rs_cbuf); + all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery; + maxoffset = PageGetMaxOffsetNumber(page); + + for (;;) + { + OffsetNumber tupoffset; + bool finished; + + CHECK_FOR_INTERRUPTS(); + + /* Ask the tablesample method which tuples to check on this page. */ + tupoffset = tsm->NextSampleTuple(scanstate, + blockno, + maxoffset); + + if (OffsetNumberIsValid(tupoffset)) + { + ItemId itemid; + bool visible; + + /* Skip invalid tuple pointers. */ + itemid = PageGetItemId(page, tupoffset); + if (!ItemIdIsNormal(itemid)) + continue; + + tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid); + tuple->t_len = ItemIdGetLength(itemid); + ItemPointerSet(&(tuple->t_self), blockno, tupoffset); + + if (all_visible) + visible = true; + else + visible = SampleTupleVisible(tuple, tupoffset, scan); + + /* in pagemode, heapgetpage did this for us */ + if (!pagemode) + CheckForSerializableConflictOut(visible, scan->rs_rd, tuple, + scan->rs_cbuf, snapshot); + + if (visible) + { + /* Found visible tuple, return it. */ + if (!pagemode) + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + break; + } + else + { + /* Try next tuple from same page. */ + continue; + } + } + + /* + * if we get here, it means we've exhausted the items on this page and + * it's time to move to the next. + */ + if (!pagemode) + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + + if (tsm->NextSampleBlock) + { + blockno = tsm->NextSampleBlock(scanstate); + Assert(!scan->rs_syncscan); + finished = !BlockNumberIsValid(blockno); + } + else + { + /* Without NextSampleBlock, just do a plain forward seqscan. */ + blockno++; + if (blockno >= scan->rs_nblocks) + blockno = 0; + + /* + * Report our new scan position for synchronization purposes. + * + * Note: we do this before checking for end of scan so that the + * final state of the position hint is back at the start of the + * rel. That's not strictly necessary, but otherwise when you run + * the same query multiple times the starting position would shift + * a little bit backwards on every invocation, which is confusing. + * We don't guarantee any specific ordering in general, though. + */ + if (scan->rs_syncscan) + ss_report_location(scan->rs_rd, blockno); + + finished = (blockno == scan->rs_startblock); + } + + /* + * Reached end of scan? + */ + if (finished) + { + if (BufferIsValid(scan->rs_cbuf)) + ReleaseBuffer(scan->rs_cbuf); + scan->rs_cbuf = InvalidBuffer; + scan->rs_cblock = InvalidBlockNumber; + tuple->t_data = NULL; + scan->rs_inited = false; + return NULL; + } + + Assert(blockno < scan->rs_nblocks); + heapgetpage(scan, blockno); + + /* Re-establish state for new page */ + if (!pagemode) + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); + + page = (Page) BufferGetPage(scan->rs_cbuf); + all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery; + maxoffset = PageGetMaxOffsetNumber(page); + } + + /* Count successfully-fetched tuples as heap fetches */ + pgstat_count_heap_getnext(scan->rs_rd); + + return &(scan->rs_ctup); +} - ExecScanReScan(&node->ss); +/* + * Check visibility of the tuple. + */ +static bool +SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan) +{ + if (scan->rs_pageatatime) + { + /* + * In pageatatime mode, heapgetpage() already did visibility checks, + * so just look at the info it left in rs_vistuples[]. + * + * We use a binary search over the known-sorted array. Note: we could + * save some effort if we insisted that NextSampleTuple select tuples + * in increasing order, but it's not clear that there would be enough + * gain to justify the restriction. + */ + int start = 0, + end = scan->rs_ntuples - 1; + + while (start <= end) + { + int mid = (start + end) / 2; + OffsetNumber curoffset = scan->rs_vistuples[mid]; + + if (tupoffset == curoffset) + return true; + else if (tupoffset < curoffset) + end = mid - 1; + else + start = mid + 1; + } + + return false; + } + else + { + /* Otherwise, we have to check the tuple individually. */ + return HeapTupleSatisfiesVisibility(tuple, + scan->rs_snapshot, + scan->rs_cbuf); + } } diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 6a08c2db211..7248440ead3 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -360,6 +360,27 @@ _copySeqScan(const SeqScan *from) } /* + * _copySampleScan + */ +static SampleScan * +_copySampleScan(const SampleScan *from) +{ + SampleScan *newnode = makeNode(SampleScan); + + /* + * copy node superclass fields + */ + CopyScanFields((const Scan *) from, (Scan *) newnode); + + /* + * copy remainder of node + */ + COPY_NODE_FIELD(tablesample); + + return newnode; +} + +/* * _copyIndexScan */ static IndexScan * @@ -642,22 +663,6 @@ _copyCustomScan(const CustomScan *from) } /* - * _copySampleScan - */ -static SampleScan * -_copySampleScan(const SampleScan *from) -{ - SampleScan *newnode = makeNode(SampleScan); - - /* - * copy node superclass fields - */ - CopyScanFields((const Scan *) from, (Scan *) newnode); - - return newnode; -} - -/* * CopyJoinFields * * This function copies the fields of the Join node. It is used by @@ -2143,6 +2148,18 @@ _copyRangeTblFunction(const RangeTblFunction *from) return newnode; } +static TableSampleClause * +_copyTableSampleClause(const TableSampleClause *from) +{ + TableSampleClause *newnode = makeNode(TableSampleClause); + + COPY_SCALAR_FIELD(tsmhandler); + COPY_NODE_FIELD(args); + COPY_NODE_FIELD(repeatable); + + return newnode; +} + static WithCheckOption * _copyWithCheckOption(const WithCheckOption *from) { @@ -2271,40 +2288,6 @@ _copyCommonTableExpr(const CommonTableExpr *from) return newnode; } -static RangeTableSample * -_copyRangeTableSample(const RangeTableSample *from) -{ - RangeTableSample *newnode = makeNode(RangeTableSample); - - COPY_NODE_FIELD(relation); - COPY_STRING_FIELD(method); - COPY_NODE_FIELD(repeatable); - COPY_NODE_FIELD(args); - - return newnode; -} - -static TableSampleClause * -_copyTableSampleClause(const TableSampleClause *from) -{ - TableSampleClause *newnode = makeNode(TableSampleClause); - - COPY_SCALAR_FIELD(tsmid); - COPY_SCALAR_FIELD(tsmseqscan); - COPY_SCALAR_FIELD(tsmpagemode); - COPY_SCALAR_FIELD(tsminit); - COPY_SCALAR_FIELD(tsmnextblock); - COPY_SCALAR_FIELD(tsmnexttuple); - COPY_SCALAR_FIELD(tsmexaminetuple); - COPY_SCALAR_FIELD(tsmend); - COPY_SCALAR_FIELD(tsmreset); - COPY_SCALAR_FIELD(tsmcost); - COPY_NODE_FIELD(repeatable); - COPY_NODE_FIELD(args); - - return newnode; -} - static A_Expr * _copyAExpr(const A_Expr *from) { @@ -2532,6 +2515,20 @@ _copyRangeFunction(const RangeFunction *from) return newnode; } +static RangeTableSample * +_copyRangeTableSample(const RangeTableSample *from) +{ + RangeTableSample *newnode = makeNode(RangeTableSample); + + COPY_NODE_FIELD(relation); + COPY_NODE_FIELD(method); + COPY_NODE_FIELD(args); + COPY_NODE_FIELD(repeatable); + COPY_LOCATION_FIELD(location); + + return newnode; +} + static TypeCast * _copyTypeCast(const TypeCast *from) { @@ -4237,6 +4234,9 @@ copyObject(const void *from) case T_SeqScan: retval = _copySeqScan(from); break; + case T_SampleScan: + retval = _copySampleScan(from); + break; case T_IndexScan: retval = _copyIndexScan(from); break; @@ -4273,9 +4273,6 @@ copyObject(const void *from) case T_CustomScan: retval = _copyCustomScan(from); break; - case T_SampleScan: - retval = _copySampleScan(from); - break; case T_Join: retval = _copyJoin(from); break; @@ -4897,6 +4894,9 @@ copyObject(const void *from) case T_RangeFunction: retval = _copyRangeFunction(from); break; + case T_RangeTableSample: + retval = _copyRangeTableSample(from); + break; case T_TypeName: retval = _copyTypeName(from); break; @@ -4921,6 +4921,9 @@ copyObject(const void *from) case T_RangeTblFunction: retval = _copyRangeTblFunction(from); break; + case T_TableSampleClause: + retval = _copyTableSampleClause(from); + break; case T_WithCheckOption: retval = _copyWithCheckOption(from); break; @@ -4948,12 +4951,6 @@ copyObject(const void *from) case T_CommonTableExpr: retval = _copyCommonTableExpr(from); break; - case T_RangeTableSample: - retval = _copyRangeTableSample(from); - break; - case T_TableSampleClause: - retval = _copyTableSampleClause(from); - break; case T_FuncWithArgs: retval = _copyFuncWithArgs(from); break; diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index faf5eedab4e..6597dbc33e1 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -2291,6 +2291,18 @@ _equalRangeFunction(const RangeFunction *a, const RangeFunction *b) } static bool +_equalRangeTableSample(const RangeTableSample *a, const RangeTableSample *b) +{ + COMPARE_NODE_FIELD(relation); + COMPARE_NODE_FIELD(method); + COMPARE_NODE_FIELD(args); + COMPARE_NODE_FIELD(repeatable); + COMPARE_LOCATION_FIELD(location); + + return true; +} + +static bool _equalIndexElem(const IndexElem *a, const IndexElem *b) { COMPARE_STRING_FIELD(name); @@ -2429,6 +2441,16 @@ _equalRangeTblFunction(const RangeTblFunction *a, const RangeTblFunction *b) } static bool +_equalTableSampleClause(const TableSampleClause *a, const TableSampleClause *b) +{ + COMPARE_SCALAR_FIELD(tsmhandler); + COMPARE_NODE_FIELD(args); + COMPARE_NODE_FIELD(repeatable); + + return true; +} + +static bool _equalWithCheckOption(const WithCheckOption *a, const WithCheckOption *b) { COMPARE_SCALAR_FIELD(kind); @@ -2539,36 +2561,6 @@ _equalCommonTableExpr(const CommonTableExpr *a, const CommonTableExpr *b) } static bool -_equalRangeTableSample(const RangeTableSample *a, const RangeTableSample *b) -{ - COMPARE_NODE_FIELD(relation); - COMPARE_STRING_FIELD(method); - COMPARE_NODE_FIELD(repeatable); - COMPARE_NODE_FIELD(args); - - return true; -} - -static bool -_equalTableSampleClause(const TableSampleClause *a, const TableSampleClause *b) -{ - COMPARE_SCALAR_FIELD(tsmid); - COMPARE_SCALAR_FIELD(tsmseqscan); - COMPARE_SCALAR_FIELD(tsmpagemode); - COMPARE_SCALAR_FIELD(tsminit); - COMPARE_SCALAR_FIELD(tsmnextblock); - COMPARE_SCALAR_FIELD(tsmnexttuple); - COMPARE_SCALAR_FIELD(tsmexaminetuple); - COMPARE_SCALAR_FIELD(tsmend); - COMPARE_SCALAR_FIELD(tsmreset); - COMPARE_SCALAR_FIELD(tsmcost); - COMPARE_NODE_FIELD(repeatable); - COMPARE_NODE_FIELD(args); - - return true; -} - -static bool _equalXmlSerialize(const XmlSerialize *a, const XmlSerialize *b) { COMPARE_SCALAR_FIELD(xmloption); @@ -3260,6 +3252,9 @@ equal(const void *a, const void *b) case T_RangeFunction: retval = _equalRangeFunction(a, b); break; + case T_RangeTableSample: + retval = _equalRangeTableSample(a, b); + break; case T_TypeName: retval = _equalTypeName(a, b); break; @@ -3284,6 +3279,9 @@ equal(const void *a, const void *b) case T_RangeTblFunction: retval = _equalRangeTblFunction(a, b); break; + case T_TableSampleClause: + retval = _equalTableSampleClause(a, b); + break; case T_WithCheckOption: retval = _equalWithCheckOption(a, b); break; @@ -3311,12 +3309,6 @@ equal(const void *a, const void *b) case T_CommonTableExpr: retval = _equalCommonTableExpr(a, b); break; - case T_RangeTableSample: - retval = _equalRangeTableSample(a, b); - break; - case T_TableSampleClause: - retval = _equalTableSampleClause(a, b); - break; case T_FuncWithArgs: retval = _equalFuncWithArgs(a, b); break; diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c index b1e3e6e4893..c517dfd9d69 100644 --- a/src/backend/nodes/nodeFuncs.c +++ b/src/backend/nodes/nodeFuncs.c @@ -1486,6 +1486,9 @@ exprLocation(const Node *expr) case T_WindowDef: loc = ((const WindowDef *) expr)->location; break; + case T_RangeTableSample: + loc = ((const RangeTableSample *) expr)->location; + break; case T_TypeName: loc = ((const TypeName *) expr)->location; break; @@ -1995,6 +1998,17 @@ expression_tree_walker(Node *node, return walker(((PlaceHolderInfo *) node)->ph_var, context); case T_RangeTblFunction: return walker(((RangeTblFunction *) node)->funcexpr, context); + case T_TableSampleClause: + { + TableSampleClause *tsc = (TableSampleClause *) node; + + if (expression_tree_walker((Node *) tsc->args, + walker, context)) + return true; + if (walker((Node *) tsc->repeatable, context)) + return true; + } + break; default: elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node)); @@ -2082,13 +2096,8 @@ range_table_walker(List *rtable, switch (rte->rtekind) { case RTE_RELATION: - if (rte->tablesample) - { - if (walker(rte->tablesample->args, context)) - return true; - if (walker(rte->tablesample->repeatable, context)) - return true; - } + if (walker(rte->tablesample, context)) + return true; break; case RTE_CTE: /* nothing to do */ @@ -2782,6 +2791,17 @@ expression_tree_mutator(Node *node, return (Node *) newnode; } break; + case T_TableSampleClause: + { + TableSampleClause *tsc = (TableSampleClause *) node; + TableSampleClause *newnode; + + FLATCOPY(newnode, tsc, TableSampleClause); + MUTATE(newnode->args, tsc->args, List *); + MUTATE(newnode->repeatable, tsc->repeatable, Expr *); + return (Node *) newnode; + } + break; default: elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node)); @@ -2868,20 +2888,12 @@ range_table_mutator(List *rtable, switch (rte->rtekind) { case RTE_RELATION: - if (rte->tablesample) - { - CHECKFLATCOPY(newrte->tablesample, rte->tablesample, - TableSampleClause); - MUTATE(newrte->tablesample->args, - newrte->tablesample->args, - List *); - MUTATE(newrte->tablesample->repeatable, - newrte->tablesample->repeatable, - Node *); - } + MUTATE(newrte->tablesample, rte->tablesample, + TableSampleClause *); + /* we don't bother to copy eref, aliases, etc; OK? */ break; case RTE_CTE: - /* we don't bother to copy eref, aliases, etc; OK? */ + /* nothing to do */ break; case RTE_SUBQUERY: if (!(flags & QTW_IGNORE_RT_SUBQUERIES)) @@ -3316,6 +3328,19 @@ raw_expression_tree_walker(Node *node, return true; } break; + case T_RangeTableSample: + { + RangeTableSample *rts = (RangeTableSample *) node; + + if (walker(rts->relation, context)) + return true; + /* method name is deemed uninteresting */ + if (walker(rts->args, context)) + return true; + if (walker(rts->repeatable, context)) + return true; + } + break; case T_TypeName: { TypeName *tn = (TypeName *) node; @@ -3380,18 +3405,6 @@ raw_expression_tree_walker(Node *node, break; case T_CommonTableExpr: return walker(((CommonTableExpr *) node)->ctequery, context); - case T_RangeTableSample: - { - RangeTableSample *rts = (RangeTableSample *) node; - - if (walker(rts->relation, context)) - return true; - if (walker(rts->repeatable, context)) - return true; - if (walker(rts->args, context)) - return true; - } - break; default: elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node)); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 87304ba9bf6..81725d6e59a 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -445,6 +445,16 @@ _outSeqScan(StringInfo str, const SeqScan *node) } static void +_outSampleScan(StringInfo str, const SampleScan *node) +{ + WRITE_NODE_TYPE("SAMPLESCAN"); + + _outScanInfo(str, (const Scan *) node); + + WRITE_NODE_FIELD(tablesample); +} + +static void _outIndexScan(StringInfo str, const IndexScan *node) { WRITE_NODE_TYPE("INDEXSCAN"); @@ -592,14 +602,6 @@ _outCustomScan(StringInfo str, const CustomScan *node) } static void -_outSampleScan(StringInfo str, const SampleScan *node) -{ - WRITE_NODE_TYPE("SAMPLESCAN"); - - _outScanInfo(str, (const Scan *) node); -} - -static void _outJoin(StringInfo str, const Join *node) { WRITE_NODE_TYPE("JOIN"); @@ -2479,36 +2481,6 @@ _outCommonTableExpr(StringInfo str, const CommonTableExpr *node) } static void -_outRangeTableSample(StringInfo str, const RangeTableSample *node) -{ - WRITE_NODE_TYPE("RANGETABLESAMPLE"); - - WRITE_NODE_FIELD(relation); - WRITE_STRING_FIELD(method); - WRITE_NODE_FIELD(repeatable); - WRITE_NODE_FIELD(args); -} - -static void -_outTableSampleClause(StringInfo str, const TableSampleClause *node) -{ - WRITE_NODE_TYPE("TABLESAMPLECLAUSE"); - - WRITE_OID_FIELD(tsmid); - WRITE_BOOL_FIELD(tsmseqscan); - WRITE_BOOL_FIELD(tsmpagemode); - WRITE_OID_FIELD(tsminit); - WRITE_OID_FIELD(tsmnextblock); - WRITE_OID_FIELD(tsmnexttuple); - WRITE_OID_FIELD(tsmexaminetuple); - WRITE_OID_FIELD(tsmend); - WRITE_OID_FIELD(tsmreset); - WRITE_OID_FIELD(tsmcost); - WRITE_NODE_FIELD(repeatable); - WRITE_NODE_FIELD(args); -} - -static void _outSetOperationStmt(StringInfo str, const SetOperationStmt *node) { WRITE_NODE_TYPE("SETOPERATIONSTMT"); @@ -2595,6 +2567,16 @@ _outRangeTblFunction(StringInfo str, const RangeTblFunction *node) } static void +_outTableSampleClause(StringInfo str, const TableSampleClause *node) +{ + WRITE_NODE_TYPE("TABLESAMPLECLAUSE"); + + WRITE_OID_FIELD(tsmhandler); + WRITE_NODE_FIELD(args); + WRITE_NODE_FIELD(repeatable); +} + +static void _outAExpr(StringInfo str, const A_Expr *node) { WRITE_NODE_TYPE("AEXPR"); @@ -2846,6 +2828,18 @@ _outRangeFunction(StringInfo str, const RangeFunction *node) } static void +_outRangeTableSample(StringInfo str, const RangeTableSample *node) +{ + WRITE_NODE_TYPE("RANGETABLESAMPLE"); + + WRITE_NODE_FIELD(relation); + WRITE_NODE_FIELD(method); + WRITE_NODE_FIELD(args); + WRITE_NODE_FIELD(repeatable); + WRITE_LOCATION_FIELD(location); +} + +static void _outConstraint(StringInfo str, const Constraint *node) { WRITE_NODE_TYPE("CONSTRAINT"); @@ -3002,6 +2996,9 @@ _outNode(StringInfo str, const void *obj) case T_SeqScan: _outSeqScan(str, obj); break; + case T_SampleScan: + _outSampleScan(str, obj); + break; case T_IndexScan: _outIndexScan(str, obj); break; @@ -3038,9 +3035,6 @@ _outNode(StringInfo str, const void *obj) case T_CustomScan: _outCustomScan(str, obj); break; - case T_SampleScan: - _outSampleScan(str, obj); - break; case T_Join: _outJoin(str, obj); break; @@ -3393,12 +3387,6 @@ _outNode(StringInfo str, const void *obj) case T_CommonTableExpr: _outCommonTableExpr(str, obj); break; - case T_RangeTableSample: - _outRangeTableSample(str, obj); - break; - case T_TableSampleClause: - _outTableSampleClause(str, obj); - break; case T_SetOperationStmt: _outSetOperationStmt(str, obj); break; @@ -3408,6 +3396,9 @@ _outNode(StringInfo str, const void *obj) case T_RangeTblFunction: _outRangeTblFunction(str, obj); break; + case T_TableSampleClause: + _outTableSampleClause(str, obj); + break; case T_A_Expr: _outAExpr(str, obj); break; @@ -3450,6 +3441,9 @@ _outNode(StringInfo str, const void *obj) case T_RangeFunction: _outRangeFunction(str, obj); break; + case T_RangeTableSample: + _outRangeTableSample(str, obj); + break; case T_Constraint: _outConstraint(str, obj); break; diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index f5a40fbfb44..71be840eac9 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -368,46 +368,6 @@ _readCommonTableExpr(void) } /* - * _readRangeTableSample - */ -static RangeTableSample * -_readRangeTableSample(void) -{ - READ_LOCALS(RangeTableSample); - - READ_NODE_FIELD(relation); - READ_STRING_FIELD(method); - READ_NODE_FIELD(repeatable); - READ_NODE_FIELD(args); - - READ_DONE(); -} - -/* - * _readTableSampleClause - */ -static TableSampleClause * -_readTableSampleClause(void) -{ - READ_LOCALS(TableSampleClause); - - READ_OID_FIELD(tsmid); - READ_BOOL_FIELD(tsmseqscan); - READ_BOOL_FIELD(tsmpagemode); - READ_OID_FIELD(tsminit); - READ_OID_FIELD(tsmnextblock); - READ_OID_FIELD(tsmnexttuple); - READ_OID_FIELD(tsmexaminetuple); - READ_OID_FIELD(tsmend); - READ_OID_FIELD(tsmreset); - READ_OID_FIELD(tsmcost); - READ_NODE_FIELD(repeatable); - READ_NODE_FIELD(args); - - READ_DONE(); -} - -/* * _readSetOperationStmt */ static SetOperationStmt * @@ -1391,6 +1351,21 @@ _readRangeTblFunction(void) READ_DONE(); } +/* + * _readTableSampleClause + */ +static TableSampleClause * +_readTableSampleClause(void) +{ + READ_LOCALS(TableSampleClause); + + READ_OID_FIELD(tsmhandler); + READ_NODE_FIELD(args); + READ_NODE_FIELD(repeatable); + + READ_DONE(); +} + /* * parseNodeString @@ -1426,10 +1401,6 @@ parseNodeString(void) return_value = _readRowMarkClause(); else if (MATCH("COMMONTABLEEXPR", 15)) return_value = _readCommonTableExpr(); - else if (MATCH("RANGETABLESAMPLE", 16)) - return_value = _readRangeTableSample(); - else if (MATCH("TABLESAMPLECLAUSE", 17)) - return_value = _readTableSampleClause(); else if (MATCH("SETOPERATIONSTMT", 16)) return_value = _readSetOperationStmt(); else if (MATCH("ALIAS", 5)) @@ -1528,6 +1499,8 @@ parseNodeString(void) return_value = _readRangeTblEntry(); else if (MATCH("RANGETBLFUNCTION", 16)) return_value = _readRangeTblFunction(); + else if (MATCH("TABLESAMPLECLAUSE", 17)) + return_value = _readTableSampleClause(); else if (MATCH("NOTIFY", 6)) return_value = _readNotifyStmt(); else if (MATCH("DECLARECURSOR", 13)) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 888eeac5151..1590be11675 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -18,6 +18,7 @@ #include <math.h> #include "access/sysattr.h" +#include "access/tsmapi.h" #include "catalog/pg_class.h" #include "catalog/pg_operator.h" #include "foreign/fdwapi.h" @@ -390,7 +391,7 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, } else if (rte->tablesample != NULL) { - /* Build sample scan on relation */ + /* Sampled relation */ set_tablesample_rel_pathlist(root, rel, rte); } else @@ -480,11 +481,40 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) /* * set_tablesample_rel_size - * Set size estimates for a sampled relation. + * Set size estimates for a sampled relation */ static void set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { + TableSampleClause *tsc = rte->tablesample; + TsmRoutine *tsm; + BlockNumber pages; + double tuples; + + /* + * Test any partial indexes of rel for applicability. We must do this + * first since partial unique indexes can affect size estimates. + */ + check_partial_indexes(root, rel); + + /* + * Call the sampling method's estimation function to estimate the number + * of pages it will read and the number of tuples it will return. (Note: + * we assume the function returns sane values.) + */ + tsm = GetTsmRoutine(tsc->tsmhandler); + tsm->SampleScanGetSampleSize(root, rel, tsc->args, + &pages, &tuples); + + /* + * For the moment, because we will only consider a SampleScan path for the + * rel, it's okay to just overwrite the pages and tuples estimates for the + * whole relation. If we ever consider multiple path types for sampled + * rels, we'll need more complication. + */ + rel->pages = pages; + rel->tuples = tuples; + /* Mark rel with estimated output rows, width, etc */ set_baserel_size_estimates(root, rel); } @@ -492,8 +522,6 @@ set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) /* * set_tablesample_rel_pathlist * Build access paths for a sampled relation - * - * There is only one possible path - sampling scan */ static void set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) @@ -502,15 +530,41 @@ set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry * Path *path; /* - * We don't support pushing join clauses into the quals of a seqscan, but - * it could still have required parameterization due to LATERAL refs in - * its tlist. + * We don't support pushing join clauses into the quals of a samplescan, + * but it could still have required parameterization due to LATERAL refs + * in its tlist or TABLESAMPLE arguments. */ required_outer = rel->lateral_relids; - /* We only do sample scan if it was requested */ + /* Consider sampled scan */ path = create_samplescan_path(root, rel, required_outer); - rel->pathlist = list_make1(path); + + /* + * If the sampling method does not support repeatable scans, we must avoid + * plans that would scan the rel multiple times. Ideally, we'd simply + * avoid putting the rel on the inside of a nestloop join; but adding such + * a consideration to the planner seems like a great deal of complication + * to support an uncommon usage of second-rate sampling methods. Instead, + * if there is a risk that the query might perform an unsafe join, just + * wrap the SampleScan in a Materialize node. We can check for joins by + * counting the membership of all_baserels (note that this correctly + * counts inheritance trees as single rels). If we're inside a subquery, + * we can't easily check whether a join might occur in the outer query, so + * just assume one is possible. + * + * GetTsmRoutine is relatively expensive compared to the other tests here, + * so check repeatable_across_scans last, even though that's a bit odd. + */ + if ((root->query_level > 1 || + bms_membership(root->all_baserels) != BMS_SINGLETON) && + !(GetTsmRoutine(rte->tablesample->tsmhandler)->repeatable_across_scans)) + { + path = (Path *) create_material_path(rel, path); + } + + add_path(rel, path); + + /* For the moment, at least, there are no other paths to consider */ } /* @@ -2450,7 +2504,33 @@ print_path(PlannerInfo *root, Path *path, int indent) switch (nodeTag(path)) { case T_Path: - ptype = "SeqScan"; + switch (path->pathtype) + { + case T_SeqScan: + ptype = "SeqScan"; + break; + case T_SampleScan: + ptype = "SampleScan"; + break; + case T_SubqueryScan: + ptype = "SubqueryScan"; + break; + case T_FunctionScan: + ptype = "FunctionScan"; + break; + case T_ValuesScan: + ptype = "ValuesScan"; + break; + case T_CteScan: + ptype = "CteScan"; + break; + case T_WorkTableScan: + ptype = "WorkTableScan"; + break; + default: + ptype = "???Path"; + break; + } break; case T_IndexPath: ptype = "IdxScan"; diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 0d302f66bee..7069f604110 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -74,6 +74,7 @@ #include <math.h> #include "access/htup_details.h" +#include "access/tsmapi.h" #include "executor/executor.h" #include "executor/nodeHash.h" #include "miscadmin.h" @@ -223,64 +224,66 @@ cost_seqscan(Path *path, PlannerInfo *root, * cost_samplescan * Determines and returns the cost of scanning a relation using sampling. * - * From planner/optimizer perspective, we don't care all that much about cost - * itself since there is always only one scan path to consider when sampling - * scan is present, but number of rows estimation is still important. - * * 'baserel' is the relation to be scanned * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL */ void -cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel) +cost_samplescan(Path *path, PlannerInfo *root, + RelOptInfo *baserel, ParamPathInfo *param_info) { Cost startup_cost = 0; Cost run_cost = 0; + RangeTblEntry *rte; + TableSampleClause *tsc; + TsmRoutine *tsm; double spc_seq_page_cost, spc_random_page_cost, spc_page_cost; QualCost qpqual_cost; Cost cpu_per_tuple; - BlockNumber pages; - double tuples; - RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root); - TableSampleClause *tablesample = rte->tablesample; - /* Should only be applied to base relations */ + /* Should only be applied to base relations with tablesample clauses */ Assert(baserel->relid > 0); - Assert(baserel->rtekind == RTE_RELATION); + rte = planner_rt_fetch(baserel->relid, root); + Assert(rte->rtekind == RTE_RELATION); + tsc = rte->tablesample; + Assert(tsc != NULL); + tsm = GetTsmRoutine(tsc->tsmhandler); /* Mark the path with the correct row estimate */ - if (path->param_info) - path->rows = path->param_info->ppi_rows; + if (param_info) + path->rows = param_info->ppi_rows; else path->rows = baserel->rows; - /* Call the sampling method's costing function. */ - OidFunctionCall6(tablesample->tsmcost, PointerGetDatum(root), - PointerGetDatum(path), PointerGetDatum(baserel), - PointerGetDatum(tablesample->args), - PointerGetDatum(&pages), PointerGetDatum(&tuples)); - /* fetch estimated page cost for tablespace containing table */ get_tablespace_page_costs(baserel->reltablespace, &spc_random_page_cost, &spc_seq_page_cost); - - spc_page_cost = tablesample->tsmseqscan ? spc_seq_page_cost : - spc_random_page_cost; + /* if NextSampleBlock is used, assume random access, else sequential */ + spc_page_cost = (tsm->NextSampleBlock != NULL) ? + spc_random_page_cost : spc_seq_page_cost; /* - * disk costs + * disk costs (recall that baserel->pages has already been set to the + * number of pages the sampling method will visit) */ - run_cost += spc_page_cost * pages; + run_cost += spc_page_cost * baserel->pages; - /* CPU costs */ - get_restriction_qual_cost(root, baserel, path->param_info, &qpqual_cost); + /* + * CPU costs (recall that baserel->tuples has already been set to the + * number of tuples the sampling method will select). Note that we ignore + * execution cost of the TABLESAMPLE parameter expressions; they will be + * evaluated only once per scan, and in most usages they'll likely be + * simple constants anyway. We also don't charge anything for the + * calculations the sampling method might do internally. + */ + get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost); startup_cost += qpqual_cost.startup; cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple; - run_cost += cpu_per_tuple * tuples; + run_cost += cpu_per_tuple * baserel->tuples; path->startup_cost = startup_cost; path->total_cost = startup_cost + run_cost; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 8d15c8ede90..f461586e08c 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -102,7 +102,8 @@ static List *order_qual_clauses(PlannerInfo *root, List *clauses); static void copy_path_costsize(Plan *dest, Path *src); static void copy_plan_costsize(Plan *dest, Plan *src); static SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid); -static SampleScan *make_samplescan(List *qptlist, List *qpqual, Index scanrelid); +static SampleScan *make_samplescan(List *qptlist, List *qpqual, Index scanrelid, + TableSampleClause *tsc); static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid, Oid indexid, List *indexqual, List *indexqualorig, List *indexorderby, List *indexorderbyorig, @@ -1148,7 +1149,7 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path, /* * create_samplescan_plan - * Returns a samplecan plan for the base relation scanned by 'best_path' + * Returns a samplescan plan for the base relation scanned by 'best_path' * with restriction clauses 'scan_clauses' and targetlist 'tlist'. */ static SampleScan * @@ -1157,11 +1158,15 @@ create_samplescan_plan(PlannerInfo *root, Path *best_path, { SampleScan *scan_plan; Index scan_relid = best_path->parent->relid; + RangeTblEntry *rte; + TableSampleClause *tsc; - /* it should be a base rel with tablesample clause... */ + /* it should be a base rel with a tablesample clause... */ Assert(scan_relid > 0); - Assert(best_path->parent->rtekind == RTE_RELATION); - Assert(best_path->pathtype == T_SampleScan); + rte = planner_rt_fetch(scan_relid, root); + Assert(rte->rtekind == RTE_RELATION); + tsc = rte->tablesample; + Assert(tsc != NULL); /* Sort clauses into best execution order */ scan_clauses = order_qual_clauses(root, scan_clauses); @@ -1174,13 +1179,16 @@ create_samplescan_plan(PlannerInfo *root, Path *best_path, { scan_clauses = (List *) replace_nestloop_params(root, (Node *) scan_clauses); + tsc = (TableSampleClause *) + replace_nestloop_params(root, (Node *) tsc); } scan_plan = make_samplescan(tlist, scan_clauses, - scan_relid); + scan_relid, + tsc); - copy_path_costsize(&scan_plan->plan, best_path); + copy_path_costsize(&scan_plan->scan.plan, best_path); return scan_plan; } @@ -2161,9 +2169,9 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path, ListCell *lc; /* Recursively transform child paths. */ - foreach (lc, best_path->custom_paths) + foreach(lc, best_path->custom_paths) { - Plan *plan = create_plan_recurse(root, (Path *) lfirst(lc)); + Plan *plan = create_plan_recurse(root, (Path *) lfirst(lc)); custom_plans = lappend(custom_plans, plan); } @@ -3437,17 +3445,19 @@ make_seqscan(List *qptlist, static SampleScan * make_samplescan(List *qptlist, List *qpqual, - Index scanrelid) + Index scanrelid, + TableSampleClause *tsc) { SampleScan *node = makeNode(SampleScan); - Plan *plan = &node->plan; + Plan *plan = &node->scan.plan; /* cost should be inserted by caller */ plan->targetlist = qptlist; plan->qual = qpqual; plan->lefttree = NULL; plan->righttree = NULL; - node->scanrelid = scanrelid; + node->scan.scanrelid = scanrelid; + node->tablesample = tsc; return node; } diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 00b2625d342..701b99254db 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -306,7 +306,9 @@ extract_lateral_references(PlannerInfo *root, RelOptInfo *brel, Index rtindex) return; /* Fetch the appropriate variables */ - if (rte->rtekind == RTE_SUBQUERY) + if (rte->rtekind == RTE_RELATION) + vars = pull_vars_of_level((Node *) rte->tablesample, 0); + else if (rte->rtekind == RTE_SUBQUERY) vars = pull_vars_of_level((Node *) rte->subquery, 1); else if (rte->rtekind == RTE_FUNCTION) vars = pull_vars_of_level((Node *) rte->functions, 0); diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index a6ce96efc48..b95cc95e5d9 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -505,14 +505,10 @@ subquery_planner(PlannerGlobal *glob, Query *parse, if (rte->rtekind == RTE_RELATION) { if (rte->tablesample) - { - rte->tablesample->args = (List *) - preprocess_expression(root, (Node *) rte->tablesample->args, - EXPRKIND_TABLESAMPLE); - rte->tablesample->repeatable = (Node *) - preprocess_expression(root, rte->tablesample->repeatable, + rte->tablesample = (TableSampleClause *) + preprocess_expression(root, + (Node *) rte->tablesample, EXPRKIND_TABLESAMPLE); - } } else if (rte->rtekind == RTE_SUBQUERY) { @@ -697,11 +693,14 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind) * If the query has any join RTEs, replace join alias variables with * base-relation variables. We must do this before sublink processing, * else sublinks expanded out from join aliases would not get processed. - * We can skip it in non-lateral RTE functions and VALUES lists, however, - * since they can't contain any Vars of the current query level. + * We can skip it in non-lateral RTE functions, VALUES lists, and + * TABLESAMPLE clauses, however, since they can't contain any Vars of the + * current query level. */ if (root->hasJoinRTEs && - !(kind == EXPRKIND_RTFUNC || kind == EXPRKIND_VALUES)) + !(kind == EXPRKIND_RTFUNC || + kind == EXPRKIND_VALUES || + kind == EXPRKIND_TABLESAMPLE)) expr = flatten_join_alias_vars(root, expr); /* diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 258e541754a..ea185d4b4cf 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -372,9 +372,8 @@ flatten_rtes_walker(Node *node, PlannerGlobal *glob) * * In the flat rangetable, we zero out substructure pointers that are not * needed by the executor; this reduces the storage space and copying cost - * for cached plans. We keep only the tablesample field (which we'd otherwise - * have to put in the plan tree, anyway); the ctename, alias and eref Alias - * fields, which are needed by EXPLAIN; and the selectedCols, insertedCols and + * for cached plans. We keep only the ctename, alias and eref Alias fields, + * which are needed by EXPLAIN, and the selectedCols, insertedCols and * updatedCols bitmaps, which are needed for executor-startup permissions * checking and for trigger event checking. */ @@ -388,6 +387,7 @@ add_rte_to_flat_rtable(PlannerGlobal *glob, RangeTblEntry *rte) memcpy(newrte, rte, sizeof(RangeTblEntry)); /* zap unneeded sub-structure */ + newrte->tablesample = NULL; newrte->subquery = NULL; newrte->joinaliasvars = NIL; newrte->functions = NIL; @@ -456,11 +456,13 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) { SampleScan *splan = (SampleScan *) plan; - splan->scanrelid += rtoffset; - splan->plan.targetlist = - fix_scan_list(root, splan->plan.targetlist, rtoffset); - splan->plan.qual = - fix_scan_list(root, splan->plan.qual, rtoffset); + splan->scan.scanrelid += rtoffset; + splan->scan.plan.targetlist = + fix_scan_list(root, splan->scan.plan.targetlist, rtoffset); + splan->scan.plan.qual = + fix_scan_list(root, splan->scan.plan.qual, rtoffset); + splan->tablesample = (TableSampleClause *) + fix_scan_expr(root, (Node *) splan->tablesample, rtoffset); } break; case T_IndexScan: diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 4708b87f330..f3038cdffda 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -2216,7 +2216,12 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, break; case T_SeqScan: + context.paramids = bms_add_members(context.paramids, scan_params); + break; + case T_SampleScan: + finalize_primnode((Node *) ((SampleScan *) plan)->tablesample, + &context); context.paramids = bms_add_members(context.paramids, scan_params); break; @@ -2384,7 +2389,7 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, bms_add_members(context.paramids, scan_params); /* child nodes if any */ - foreach (lc, cscan->custom_plans) + foreach(lc, cscan->custom_plans) { context.paramids = bms_add_members(context.paramids, diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 92b05628434..34144ccaf0f 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -1091,12 +1091,15 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, switch (child_rte->rtekind) { + case RTE_RELATION: + if (child_rte->tablesample) + child_rte->lateral = true; + break; case RTE_SUBQUERY: case RTE_FUNCTION: case RTE_VALUES: child_rte->lateral = true; break; - case RTE_RELATION: case RTE_JOIN: case RTE_CTE: /* these can't contain any lateral references */ @@ -1909,6 +1912,13 @@ replace_vars_in_jointree(Node *jtnode, { switch (rte->rtekind) { + case RTE_RELATION: + /* shouldn't be marked LATERAL unless tablesample */ + Assert(rte->tablesample); + rte->tablesample = (TableSampleClause *) + pullup_replace_vars((Node *) rte->tablesample, + context); + break; case RTE_SUBQUERY: rte->subquery = pullup_replace_vars_subquery(rte->subquery, @@ -1924,7 +1934,6 @@ replace_vars_in_jointree(Node *jtnode, pullup_replace_vars((Node *) rte->values_lists, context); break; - case RTE_RELATION: case RTE_JOIN: case RTE_CTE: /* these shouldn't be marked LATERAL */ diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index f7f33bbe772..935bc2b9667 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -713,7 +713,7 @@ create_seqscan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer) /* * create_samplescan_path - * Like seqscan but uses sampling function while scanning. + * Creates a path node for a sampled table scan. */ Path * create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer) @@ -726,7 +726,7 @@ create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer required_outer); pathnode->pathkeys = NIL; /* samplescan has unordered result */ - cost_samplescan(pathnode, root, rel); + cost_samplescan(pathnode, root, rel, pathnode->param_info); return pathnode; } @@ -1773,6 +1773,8 @@ reparameterize_path(PlannerInfo *root, Path *path, { case T_SeqScan: return create_seqscan_path(root, rel, required_outer); + case T_SampleScan: + return (Path *) create_samplescan_path(root, rel, required_outer); case T_IndexScan: case T_IndexOnlyScan: { @@ -1805,8 +1807,6 @@ reparameterize_path(PlannerInfo *root, Path *path, case T_SubqueryScan: return create_subqueryscan_path(root, rel, path->pathkeys, required_outer); - case T_SampleScan: - return (Path *) create_samplescan_path(root, rel, required_outer); default: break; } diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 2b02a2e5233..8f053e47e82 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -457,8 +457,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type <jexpr> joined_table %type <range> relation_expr %type <range> relation_expr_opt_alias +%type <node> tablesample_clause opt_repeatable_clause %type <target> target_el single_set_clause set_target insert_column_item -%type <node> relation_expr_tablesample tablesample_clause opt_repeatable_clause %type <str> generic_option_name %type <node> generic_option_arg @@ -10491,9 +10491,13 @@ table_ref: relation_expr opt_alias_clause $1->alias = $2; $$ = (Node *) $1; } - | relation_expr_tablesample + | relation_expr opt_alias_clause tablesample_clause { - $$ = (Node *) $1; + RangeTableSample *n = (RangeTableSample *) $3; + $1->alias = $2; + /* relation_expr goes inside the RangeTableSample node */ + n->relation = (Node *) $1; + $$ = (Node *) n; } | func_table func_alias_clause { @@ -10820,23 +10824,18 @@ relation_expr_opt_alias: relation_expr %prec UMINUS } ; - -relation_expr_tablesample: relation_expr opt_alias_clause tablesample_clause - { - RangeTableSample *n = (RangeTableSample *) $3; - n->relation = $1; - n->relation->alias = $2; - $$ = (Node *) n; - } - ; - +/* + * TABLESAMPLE decoration in a FROM item + */ tablesample_clause: - TABLESAMPLE ColId '(' expr_list ')' opt_repeatable_clause + TABLESAMPLE func_name '(' expr_list ')' opt_repeatable_clause { RangeTableSample *n = makeNode(RangeTableSample); + /* n->relation will be filled in later */ n->method = $2; n->args = $4; n->repeatable = $6; + n->location = @2; $$ = (Node *) n; } ; diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index e90e1d68e3a..4e490b23b4e 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -18,8 +18,8 @@ #include "miscadmin.h" #include "access/heapam.h" +#include "access/tsmapi.h" #include "catalog/catalog.h" -#include "access/htup_details.h" #include "catalog/heap.h" #include "catalog/pg_constraint.h" #include "catalog/pg_type.h" @@ -43,7 +43,7 @@ #include "utils/guc.h" #include "utils/lsyscache.h" #include "utils/rel.h" -#include "utils/syscache.h" + /* Convenience macro for the most common makeNamespaceItem() case */ #define makeDefaultNSItem(rte) makeNamespaceItem(rte, true, true, false, true) @@ -63,6 +63,8 @@ static RangeTblEntry *transformRangeSubselect(ParseState *pstate, RangeSubselect *r); static RangeTblEntry *transformRangeFunction(ParseState *pstate, RangeFunction *r); +static TableSampleClause *transformRangeTableSample(ParseState *pstate, + RangeTableSample *rts); static Node *transformFromClauseItem(ParseState *pstate, Node *n, RangeTblEntry **top_rte, int *top_rti, List **namespace); @@ -423,40 +425,6 @@ transformJoinOnClause(ParseState *pstate, JoinExpr *j, List *namespace) return result; } -static RangeTblEntry * -transformTableSampleEntry(ParseState *pstate, RangeTableSample *rv) -{ - RangeTblEntry *rte = NULL; - CommonTableExpr *cte = NULL; - TableSampleClause *tablesample = NULL; - - /* if relation has an unqualified name, it might be a CTE reference */ - if (!rv->relation->schemaname) - { - Index levelsup; - - cte = scanNameSpaceForCTE(pstate, rv->relation->relname, &levelsup); - } - - /* We first need to build a range table entry */ - if (!cte) - rte = transformTableEntry(pstate, rv->relation); - - if (!rte || - (rte->relkind != RELKIND_RELATION && - rte->relkind != RELKIND_MATVIEW)) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("TABLESAMPLE clause can only be used on tables and materialized views"), - parser_errposition(pstate, rv->relation->location))); - - tablesample = ParseTableSample(pstate, rv->method, rv->repeatable, - rv->args, rv->relation->location); - rte->tablesample = tablesample; - - return rte; -} - /* * transformTableEntry --- transform a RangeVar (simple relation reference) */ @@ -748,6 +716,109 @@ transformRangeFunction(ParseState *pstate, RangeFunction *r) return rte; } +/* + * transformRangeTableSample --- transform a TABLESAMPLE clause + * + * Caller has already transformed rts->relation, we just have to validate + * the remaining fields and create a TableSampleClause node. + */ +static TableSampleClause * +transformRangeTableSample(ParseState *pstate, RangeTableSample *rts) +{ + TableSampleClause *tablesample; + Oid handlerOid; + Oid funcargtypes[1]; + TsmRoutine *tsm; + List *fargs; + ListCell *larg, + *ltyp; + + /* + * To validate the sample method name, look up the handler function, which + * has the same name, one dummy INTERNAL argument, and a result type of + * tsm_handler. (Note: tablesample method names are not schema-qualified + * in the SQL standard; but since they are just functions to us, we allow + * schema qualification to resolve any potential ambiguity.) + */ + funcargtypes[0] = INTERNALOID; + + handlerOid = LookupFuncName(rts->method, 1, funcargtypes, true); + + /* we want error to complain about no-such-method, not no-such-function */ + if (!OidIsValid(handlerOid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("tablesample method %s does not exist", + NameListToString(rts->method)), + parser_errposition(pstate, rts->location))); + + /* check that handler has correct return type */ + if (get_func_rettype(handlerOid) != TSM_HANDLEROID) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("function %s must return type \"tsm_handler\"", + NameListToString(rts->method)), + parser_errposition(pstate, rts->location))); + + /* OK, run the handler to get TsmRoutine, for argument type info */ + tsm = GetTsmRoutine(handlerOid); + + tablesample = makeNode(TableSampleClause); + tablesample->tsmhandler = handlerOid; + + /* check user provided the expected number of arguments */ + if (list_length(rts->args) != list_length(tsm->parameterTypes)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT), + errmsg_plural("tablesample method %s requires %d argument, not %d", + "tablesample method %s requires %d arguments, not %d", + list_length(tsm->parameterTypes), + NameListToString(rts->method), + list_length(tsm->parameterTypes), + list_length(rts->args)), + parser_errposition(pstate, rts->location))); + + /* + * Transform the arguments, typecasting them as needed. Note we must also + * assign collations now, because assign_query_collations() doesn't + * examine any substructure of RTEs. + */ + fargs = NIL; + forboth(larg, rts->args, ltyp, tsm->parameterTypes) + { + Node *arg = (Node *) lfirst(larg); + Oid argtype = lfirst_oid(ltyp); + + arg = transformExpr(pstate, arg, EXPR_KIND_FROM_FUNCTION); + arg = coerce_to_specific_type(pstate, arg, argtype, "TABLESAMPLE"); + assign_expr_collations(pstate, arg); + fargs = lappend(fargs, arg); + } + tablesample->args = fargs; + + /* Process REPEATABLE (seed) */ + if (rts->repeatable != NULL) + { + Node *arg; + + if (!tsm->repeatable_across_queries) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("tablesample method %s does not support REPEATABLE", + NameListToString(rts->method)), + parser_errposition(pstate, rts->location))); + + arg = transformExpr(pstate, rts->repeatable, EXPR_KIND_FROM_FUNCTION); + arg = coerce_to_specific_type(pstate, arg, FLOAT8OID, "REPEATABLE"); + assign_expr_collations(pstate, arg); + tablesample->repeatable = (Expr *) arg; + } + else + tablesample->repeatable = NULL; + + return tablesample; +} + /* * transformFromClauseItem - @@ -844,6 +915,33 @@ transformFromClauseItem(ParseState *pstate, Node *n, rtr->rtindex = rtindex; return (Node *) rtr; } + else if (IsA(n, RangeTableSample)) + { + /* TABLESAMPLE clause (wrapping some other valid FROM node) */ + RangeTableSample *rts = (RangeTableSample *) n; + Node *rel; + RangeTblRef *rtr; + RangeTblEntry *rte; + + /* Recursively transform the contained relation */ + rel = transformFromClauseItem(pstate, rts->relation, + top_rte, top_rti, namespace); + /* Currently, grammar could only return a RangeVar as contained rel */ + Assert(IsA(rel, RangeTblRef)); + rtr = (RangeTblRef *) rel; + rte = rt_fetch(rtr->rtindex, pstate->p_rtable); + /* We only support this on plain relations and matviews */ + if (rte->relkind != RELKIND_RELATION && + rte->relkind != RELKIND_MATVIEW) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("TABLESAMPLE clause can only be applied to tables and materialized views"), + parser_errposition(pstate, exprLocation(rts->relation)))); + + /* Transform TABLESAMPLE details and attach to the RTE */ + rte->tablesample = transformRangeTableSample(pstate, rts); + return (Node *) rtr; + } else if (IsA(n, JoinExpr)) { /* A newfangled join expression */ @@ -1165,26 +1263,6 @@ transformFromClauseItem(ParseState *pstate, Node *n, return (Node *) j; } - else if (IsA(n, RangeTableSample)) - { - /* Tablesample reference */ - RangeTableSample *rv = (RangeTableSample *) n; - RangeTblRef *rtr; - RangeTblEntry *rte = NULL; - int rtindex; - - rte = transformTableSampleEntry(pstate, rv); - - /* assume new rte is at end */ - rtindex = list_length(pstate->p_rtable); - Assert(rte == rt_fetch(rtindex, pstate->p_rtable)); - *top_rte = rte; - *top_rti = rtindex; - *namespace = list_make1(makeDefaultNSItem(rte)); - rtr = makeNode(RangeTblRef); - rtr->rtindex = rtindex; - return (Node *) rtr; - } else elog(ERROR, "unrecognized node type: %d", (int) nodeTag(n)); return NULL; /* can't get here, keep compiler quiet */ diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c index 430baff1165..554ca9d8c47 100644 --- a/src/backend/parser/parse_func.c +++ b/src/backend/parser/parse_func.c @@ -18,7 +18,6 @@ #include "catalog/pg_aggregate.h" #include "catalog/pg_proc.h" #include "catalog/pg_type.h" -#include "catalog/pg_tablesample_method.h" #include "funcapi.h" #include "lib/stringinfo.h" #include "nodes/makefuncs.h" @@ -27,7 +26,6 @@ #include "parser/parse_clause.h" #include "parser/parse_coerce.h" #include "parser/parse_func.h" -#include "parser/parse_expr.h" #include "parser/parse_relation.h" #include "parser/parse_target.h" #include "parser/parse_type.h" @@ -769,148 +767,6 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, } -/* - * ParseTableSample - * - * Parse TABLESAMPLE clause and process the arguments - */ -TableSampleClause * -ParseTableSample(ParseState *pstate, char *samplemethod, Node *repeatable, - List *sampleargs, int location) -{ - HeapTuple tuple; - Form_pg_tablesample_method tsm; - Form_pg_proc procform; - TableSampleClause *tablesample; - List *fargs; - ListCell *larg; - int nargs, - initnargs; - Oid init_arg_types[FUNC_MAX_ARGS]; - - /* Load the tablesample method */ - tuple = SearchSysCache1(TABLESAMPLEMETHODNAME, PointerGetDatum(samplemethod)); - if (!HeapTupleIsValid(tuple)) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_OBJECT), - errmsg("tablesample method \"%s\" does not exist", - samplemethod), - parser_errposition(pstate, location))); - - tablesample = makeNode(TableSampleClause); - tablesample->tsmid = HeapTupleGetOid(tuple); - - tsm = (Form_pg_tablesample_method) GETSTRUCT(tuple); - - tablesample->tsmseqscan = tsm->tsmseqscan; - tablesample->tsmpagemode = tsm->tsmpagemode; - tablesample->tsminit = tsm->tsminit; - tablesample->tsmnextblock = tsm->tsmnextblock; - tablesample->tsmnexttuple = tsm->tsmnexttuple; - tablesample->tsmexaminetuple = tsm->tsmexaminetuple; - tablesample->tsmend = tsm->tsmend; - tablesample->tsmreset = tsm->tsmreset; - tablesample->tsmcost = tsm->tsmcost; - - ReleaseSysCache(tuple); - - /* Validate the parameters against init function definition. */ - tuple = SearchSysCache1(PROCOID, - ObjectIdGetDatum(tablesample->tsminit)); - - if (!HeapTupleIsValid(tuple)) /* should not happen */ - elog(ERROR, "cache lookup failed for function %u", - tablesample->tsminit); - - procform = (Form_pg_proc) GETSTRUCT(tuple); - initnargs = procform->pronargs; - Assert(initnargs >= 3); - - /* - * First parameter is used to pass the SampleScanState, second is seed - * (REPEATABLE), skip the processing for them here, just assert that the - * types are correct. - */ - Assert(procform->proargtypes.values[0] == INTERNALOID); - Assert(procform->proargtypes.values[1] == INT4OID); - initnargs -= 2; - memcpy(init_arg_types, procform->proargtypes.values + 2, - initnargs * sizeof(Oid)); - - /* Now we are done with the catalog */ - ReleaseSysCache(tuple); - - /* Process repeatable (seed) */ - if (repeatable != NULL) - { - Node *arg = repeatable; - - if (arg && IsA(arg, A_Const)) - { - A_Const *con = (A_Const *) arg; - - if (con->val.type == T_Null) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("REPEATABLE clause must be NOT NULL numeric value"), - parser_errposition(pstate, con->location))); - - } - - arg = transformExpr(pstate, arg, EXPR_KIND_FROM_FUNCTION); - arg = coerce_to_specific_type(pstate, arg, INT4OID, "REPEATABLE"); - tablesample->repeatable = arg; - } - else - tablesample->repeatable = NULL; - - /* Check user provided expected number of arguments. */ - if (list_length(sampleargs) != initnargs) - ereport(ERROR, - (errcode(ERRCODE_TOO_MANY_ARGUMENTS), - errmsg_plural("tablesample method \"%s\" expects %d argument got %d", - "tablesample method \"%s\" expects %d arguments got %d", - initnargs, - samplemethod, - initnargs, list_length(sampleargs)), - parser_errposition(pstate, location))); - - /* Transform the arguments, typecasting them as needed. */ - fargs = NIL; - nargs = 0; - foreach(larg, sampleargs) - { - Node *inarg = (Node *) lfirst(larg); - Node *arg = transformExpr(pstate, inarg, EXPR_KIND_FROM_FUNCTION); - Oid argtype = exprType(arg); - - if (argtype != init_arg_types[nargs]) - { - if (!can_coerce_type(1, &argtype, &init_arg_types[nargs], - COERCION_IMPLICIT)) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("wrong parameter %d for tablesample method \"%s\"", - nargs + 1, samplemethod), - errdetail("Expected type %s got %s.", - format_type_be(init_arg_types[nargs]), - format_type_be(argtype)), - parser_errposition(pstate, exprLocation(inarg)))); - - arg = coerce_type(pstate, arg, argtype, init_arg_types[nargs], -1, - COERCION_IMPLICIT, COERCE_IMPLICIT_CAST, -1); - } - - fargs = lappend(fargs, arg); - nargs++; - } - - /* Pass the arguments down */ - tablesample->args = fargs; - - return tablesample; -} - /* func_match_argtypes() * * Given a list of candidate functions (having the right name and number diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index bbd6b77c5ea..1734e48241a 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -418,6 +418,10 @@ rewriteRuleAction(Query *parsetree, switch (rte->rtekind) { + case RTE_RELATION: + sub_action->hasSubLinks = + checkExprHasSubLink((Node *) rte->tablesample); + break; case RTE_FUNCTION: sub_action->hasSubLinks = checkExprHasSubLink((Node *) rte->functions); diff --git a/src/backend/utils/adt/pseudotypes.c b/src/backend/utils/adt/pseudotypes.c index 9ad460abfbd..5b809aa7d49 100644 --- a/src/backend/utils/adt/pseudotypes.c +++ b/src/backend/utils/adt/pseudotypes.c @@ -374,6 +374,33 @@ fdw_handler_out(PG_FUNCTION_ARGS) /* + * tsm_handler_in - input routine for pseudo-type TSM_HANDLER. + */ +Datum +tsm_handler_in(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot accept a value of type tsm_handler"))); + + PG_RETURN_VOID(); /* keep compiler quiet */ +} + +/* + * tsm_handler_out - output routine for pseudo-type TSM_HANDLER. + */ +Datum +tsm_handler_out(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot display a value of type tsm_handler"))); + + PG_RETURN_VOID(); /* keep compiler quiet */ +} + + +/* * internal_in - input routine for pseudo-type INTERNAL. */ Datum diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 5112cac9017..51391f6a4e0 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -32,7 +32,6 @@ #include "catalog/pg_opclass.h" #include "catalog/pg_operator.h" #include "catalog/pg_proc.h" -#include "catalog/pg_tablesample_method.h" #include "catalog/pg_trigger.h" #include "catalog/pg_type.h" #include "commands/defrem.h" @@ -349,8 +348,6 @@ static void make_ruledef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc, int prettyFlags); static void make_viewdef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc, int prettyFlags, int wrapColumn); -static void get_tablesample_def(TableSampleClause *tablesample, - deparse_context *context); static void get_query_def(Query *query, StringInfo buf, List *parentnamespace, TupleDesc resultDesc, int prettyFlags, int wrapColumn, int startIndent); @@ -416,6 +413,8 @@ static void get_column_alias_list(deparse_columns *colinfo, static void get_from_clause_coldeflist(RangeTblFunction *rtfunc, deparse_columns *colinfo, deparse_context *context); +static void get_tablesample_def(TableSampleClause *tablesample, + deparse_context *context); static void get_opclass_name(Oid opclass, Oid actual_datatype, StringInfo buf); static Node *processIndirection(Node *node, deparse_context *context, @@ -4235,50 +4234,6 @@ make_viewdef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc, heap_close(ev_relation, AccessShareLock); } -/* ---------- - * get_tablesample_def - Convert TableSampleClause back to SQL - * ---------- - */ -static void -get_tablesample_def(TableSampleClause *tablesample, deparse_context *context) -{ - StringInfo buf = context->buf; - HeapTuple tuple; - Form_pg_tablesample_method tsm; - char *tsmname; - int nargs; - ListCell *l; - - /* Load the tablesample method */ - tuple = SearchSysCache1(TABLESAMPLEMETHODOID, ObjectIdGetDatum(tablesample->tsmid)); - if (!HeapTupleIsValid(tuple)) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_OBJECT), - errmsg("cache lookup failed for tablesample method %u", - tablesample->tsmid))); - - tsm = (Form_pg_tablesample_method) GETSTRUCT(tuple); - tsmname = NameStr(tsm->tsmname); - appendStringInfo(buf, " TABLESAMPLE %s (", quote_identifier(tsmname)); - - ReleaseSysCache(tuple); - - nargs = 0; - foreach(l, tablesample->args) - { - if (nargs++ > 0) - appendStringInfoString(buf, ", "); - get_rule_expr((Node *) lfirst(l), context, true); - } - appendStringInfoChar(buf, ')'); - - if (tablesample->repeatable != NULL) - { - appendStringInfoString(buf, " REPEATABLE ("); - get_rule_expr(tablesample->repeatable, context, true); - appendStringInfoChar(buf, ')'); - } -} /* ---------- * get_query_def - Parse back one query parsetree @@ -8781,9 +8736,6 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context) only_marker(rte), generate_relation_name(rte->relid, context->namespaces)); - - if (rte->tablesample) - get_tablesample_def(rte->tablesample, context); break; case RTE_SUBQUERY: /* Subquery RTE */ @@ -8963,6 +8915,10 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context) /* Else print column aliases as needed */ get_column_alias_list(colinfo, context); } + + /* Tablesample clause must go after any alias */ + if (rte->rtekind == RTE_RELATION && rte->tablesample) + get_tablesample_def(rte->tablesample, context); } else if (IsA(jtnode, JoinExpr)) { @@ -9163,6 +9119,44 @@ get_from_clause_coldeflist(RangeTblFunction *rtfunc, } /* + * get_tablesample_def - print a TableSampleClause + */ +static void +get_tablesample_def(TableSampleClause *tablesample, deparse_context *context) +{ + StringInfo buf = context->buf; + Oid argtypes[1]; + int nargs; + ListCell *l; + + /* + * We should qualify the handler's function name if it wouldn't be + * resolved by lookup in the current search path. + */ + argtypes[0] = INTERNALOID; + appendStringInfo(buf, " TABLESAMPLE %s (", + generate_function_name(tablesample->tsmhandler, 1, + NIL, argtypes, + false, NULL, EXPR_KIND_NONE)); + + nargs = 0; + foreach(l, tablesample->args) + { + if (nargs++ > 0) + appendStringInfoString(buf, ", "); + get_rule_expr((Node *) lfirst(l), context, false); + } + appendStringInfoChar(buf, ')'); + + if (tablesample->repeatable != NULL) + { + appendStringInfoString(buf, " REPEATABLE ("); + get_rule_expr((Node *) tablesample->repeatable, context, false); + appendStringInfoChar(buf, ')'); + } +} + +/* * get_opclass_name - fetch name of an index operator class * * The opclass name is appended (after a space) to buf. diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index 7b32247d34e..1dc293297d9 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -32,7 +32,6 @@ #include "catalog/pg_range.h" #include "catalog/pg_statistic.h" #include "catalog/pg_transform.h" -#include "catalog/pg_tablesample_method.h" #include "catalog/pg_type.h" #include "miscadmin.h" #include "nodes/makefuncs.h" @@ -2997,29 +2996,3 @@ get_range_subtype(Oid rangeOid) else return InvalidOid; } - -/* ---------- PG_TABLESAMPLE_METHOD CACHE ---------- */ - -/* - * get_tablesample_method_name - given a tablesample method OID, - * look up the name or NULL if not found - */ -char * -get_tablesample_method_name(Oid tsmid) -{ - HeapTuple tuple; - - tuple = SearchSysCache1(TABLESAMPLEMETHODOID, ObjectIdGetDatum(tsmid)); - if (HeapTupleIsValid(tuple)) - { - Form_pg_tablesample_method tup = - (Form_pg_tablesample_method) GETSTRUCT(tuple); - char *result; - - result = pstrdup(NameStr(tup->tsmname)); - ReleaseSysCache(tuple); - return result; - } - else - return NULL; -} diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index b6333e362f0..efce7b9a3d1 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -56,7 +56,6 @@ #include "catalog/pg_shseclabel.h" #include "catalog/pg_replication_origin.h" #include "catalog/pg_statistic.h" -#include "catalog/pg_tablesample_method.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_transform.h" #include "catalog/pg_ts_config.h" @@ -667,28 +666,6 @@ static const struct cachedesc cacheinfo[] = { }, 128 }, - {TableSampleMethodRelationId, /* TABLESAMPLEMETHODNAME */ - TableSampleMethodNameIndexId, - 1, - { - Anum_pg_tablesample_method_tsmname, - 0, - 0, - 0, - }, - 2 - }, - {TableSampleMethodRelationId, /* TABLESAMPLEMETHODOID */ - TableSampleMethodOidIndexId, - 1, - { - ObjectIdAttributeNumber, - 0, - 0, - 0, - }, - 2 - }, {TableSpaceRelationId, /* TABLESPACEOID */ TablespaceOidIndexId, 1, diff --git a/src/backend/utils/errcodes.txt b/src/backend/utils/errcodes.txt index 6cc3ed96c44..7b97d45a53a 100644 --- a/src/backend/utils/errcodes.txt +++ b/src/backend/utils/errcodes.txt @@ -177,6 +177,8 @@ Section: Class 22 - Data Exception 2201B E ERRCODE_INVALID_REGULAR_EXPRESSION invalid_regular_expression 2201W E ERRCODE_INVALID_ROW_COUNT_IN_LIMIT_CLAUSE invalid_row_count_in_limit_clause 2201X E ERRCODE_INVALID_ROW_COUNT_IN_RESULT_OFFSET_CLAUSE invalid_row_count_in_result_offset_clause +2202H E ERRCODE_INVALID_TABLESAMPLE_ARGUMENT invalid_tablesample_argument +2202G E ERRCODE_INVALID_TABLESAMPLE_REPEAT invalid_tablesample_repeat 22009 E ERRCODE_INVALID_TIME_ZONE_DISPLACEMENT_VALUE invalid_time_zone_displacement_value 2200C E ERRCODE_INVALID_USE_OF_ESCAPE_CHARACTER invalid_use_of_escape_character 2200G E ERRCODE_MOST_SPECIFIC_TYPE_MISMATCH most_specific_type_mismatch diff --git a/src/backend/utils/misc/sampling.c b/src/backend/utils/misc/sampling.c index 6191f797344..4142e01123f 100644 --- a/src/backend/utils/misc/sampling.c +++ b/src/backend/utils/misc/sampling.c @@ -228,7 +228,7 @@ reservoir_get_next_S(ReservoirState rs, double t, int n) void sampler_random_init_state(long seed, SamplerRandomState randstate) { - randstate[0] = RAND48_SEED_0; + randstate[0] = 0x330e; /* same as pg_erand48, but could be anything */ randstate[1] = (unsigned short) seed; randstate[2] = (unsigned short) (seed >> 16); } diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index 9596af6a7b3..ece05155490 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -738,13 +738,15 @@ static const SchemaQuery Query_for_list_of_matviews = { " WHERE substring(pg_catalog.quote_ident(evtname),1,%d)='%s'" #define Query_for_list_of_tablesample_methods \ -" SELECT pg_catalog.quote_ident(tsmname) "\ -" FROM pg_catalog.pg_tablesample_method "\ -" WHERE substring(pg_catalog.quote_ident(tsmname),1,%d)='%s'" +" SELECT pg_catalog.quote_ident(proname) "\ +" FROM pg_catalog.pg_proc "\ +" WHERE prorettype = 'pg_catalog.tsm_handler'::pg_catalog.regtype AND "\ +" proargtypes[0] = 'pg_catalog.internal'::pg_catalog.regtype AND "\ +" substring(pg_catalog.quote_ident(proname),1,%d)='%s'" #define Query_for_list_of_policies \ " SELECT pg_catalog.quote_ident(polname) "\ -" FROM pg_catalog.pg_policy " \ +" FROM pg_catalog.pg_policy "\ " WHERE substring(pg_catalog.quote_ident(polname),1,%d)='%s'" #define Query_for_list_of_tables_for_policy \ diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 31139cbd0cc..75e6b72f9e0 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -116,11 +116,13 @@ extern HeapScanDesc heap_beginscan_bm(Relation relation, Snapshot snapshot, int nkeys, ScanKey key); extern HeapScanDesc heap_beginscan_sampling(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, - bool allow_strat, bool allow_pagemode); + bool allow_strat, bool allow_sync, bool allow_pagemode); extern void heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk, BlockNumber endBlk); extern void heapgetpage(HeapScanDesc scan, BlockNumber page); extern void heap_rescan(HeapScanDesc scan, ScanKey key); +extern void heap_rescan_set_params(HeapScanDesc scan, ScanKey key, + bool allow_strat, bool allow_sync, bool allow_pagemode); extern void heap_endscan(HeapScanDesc scan); extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction); diff --git a/src/include/access/tablesample.h b/src/include/access/tablesample.h deleted file mode 100644 index a02e93d3222..00000000000 --- a/src/include/access/tablesample.h +++ /dev/null @@ -1,61 +0,0 @@ -/*------------------------------------------------------------------------- - * - * tablesample.h - * Public header file for TABLESAMPLE clause interface - * - * - * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * src/include/access/tablesample.h - * - *------------------------------------------------------------------------- - */ -#ifndef TABLESAMPLE_H -#define TABLESAMPLE_H - -#include "access/relscan.h" -#include "executor/executor.h" - -typedef struct TableSampleDesc -{ - HeapScanDesc heapScan; - TupleDesc tupDesc; /* Mostly useful for tsmexaminetuple */ - - void *tsmdata; /* private method data */ - - /* These point to he function of the TABLESAMPLE Method. */ - FmgrInfo tsminit; - FmgrInfo tsmnextblock; - FmgrInfo tsmnexttuple; - FmgrInfo tsmexaminetuple; - FmgrInfo tsmreset; - FmgrInfo tsmend; -} TableSampleDesc; - - -extern TableSampleDesc *tablesample_init(SampleScanState *scanstate, - TableSampleClause *tablesample); -extern HeapTuple tablesample_getnext(TableSampleDesc *desc); -extern void tablesample_reset(TableSampleDesc *desc); -extern void tablesample_end(TableSampleDesc *desc); -extern HeapTuple tablesample_source_getnext(TableSampleDesc *desc); -extern HeapTuple tablesample_source_gettup(TableSampleDesc *desc, ItemPointer tid, - bool *visible); - -extern Datum tsm_system_init(PG_FUNCTION_ARGS); -extern Datum tsm_system_nextblock(PG_FUNCTION_ARGS); -extern Datum tsm_system_nexttuple(PG_FUNCTION_ARGS); -extern Datum tsm_system_end(PG_FUNCTION_ARGS); -extern Datum tsm_system_reset(PG_FUNCTION_ARGS); -extern Datum tsm_system_cost(PG_FUNCTION_ARGS); - -extern Datum tsm_bernoulli_init(PG_FUNCTION_ARGS); -extern Datum tsm_bernoulli_nextblock(PG_FUNCTION_ARGS); -extern Datum tsm_bernoulli_nexttuple(PG_FUNCTION_ARGS); -extern Datum tsm_bernoulli_end(PG_FUNCTION_ARGS); -extern Datum tsm_bernoulli_reset(PG_FUNCTION_ARGS); -extern Datum tsm_bernoulli_cost(PG_FUNCTION_ARGS); - - -#endif diff --git a/src/include/access/tsmapi.h b/src/include/access/tsmapi.h new file mode 100644 index 00000000000..4b59ffabd6e --- /dev/null +++ b/src/include/access/tsmapi.h @@ -0,0 +1,81 @@ +/*------------------------------------------------------------------------- + * + * tsmapi.h + * API for tablesample methods + * + * Copyright (c) 2015, PostgreSQL Global Development Group + * + * src/include/access/tsmapi.h + * + *------------------------------------------------------------------------- + */ +#ifndef TSMAPI_H +#define TSMAPI_H + +#include "nodes/execnodes.h" +#include "nodes/relation.h" + + +/* + * Callback function signatures --- see tablesample-method.sgml for more info. + */ + +typedef void (*SampleScanGetSampleSize_function) (PlannerInfo *root, + RelOptInfo *baserel, + List *paramexprs, + BlockNumber *pages, + double *tuples); + +typedef void (*InitSampleScan_function) (SampleScanState *node, + int eflags); + +typedef void (*BeginSampleScan_function) (SampleScanState *node, + Datum *params, + int nparams, + uint32 seed); + +typedef BlockNumber (*NextSampleBlock_function) (SampleScanState *node); + +typedef OffsetNumber (*NextSampleTuple_function) (SampleScanState *node, + BlockNumber blockno, + OffsetNumber maxoffset); + +typedef void (*EndSampleScan_function) (SampleScanState *node); + +/* + * TsmRoutine is the struct returned by a tablesample method's handler + * function. It provides pointers to the callback functions needed by the + * planner and executor, as well as additional information about the method. + * + * More function pointers are likely to be added in the future. + * Therefore it's recommended that the handler initialize the struct with + * makeNode(TsmRoutine) so that all fields are set to NULL. This will + * ensure that no fields are accidentally left undefined. + */ +typedef struct TsmRoutine +{ + NodeTag type; + + /* List of datatype OIDs for the arguments of the TABLESAMPLE clause */ + List *parameterTypes; + + /* Can method produce repeatable samples across, or even within, queries? */ + bool repeatable_across_queries; + bool repeatable_across_scans; + + /* Functions for planning a SampleScan on a physical table */ + SampleScanGetSampleSize_function SampleScanGetSampleSize; + + /* Functions for executing a SampleScan on a physical table */ + InitSampleScan_function InitSampleScan; /* can be NULL */ + BeginSampleScan_function BeginSampleScan; + NextSampleBlock_function NextSampleBlock; /* can be NULL */ + NextSampleTuple_function NextSampleTuple; + EndSampleScan_function EndSampleScan; /* can be NULL */ +} TsmRoutine; + + +/* Functions in access/tablesample/tablesample.c */ +extern TsmRoutine *GetTsmRoutine(Oid tsmhandler); + +#endif /* TSMAPI_H */ diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 8f6685fd0cc..0e983279313 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201507171 +#define CATALOG_VERSION_NO 201507252 #endif diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h index 748aadde945..c38958d6c5e 100644 --- a/src/include/catalog/indexing.h +++ b/src/include/catalog/indexing.h @@ -316,11 +316,6 @@ DECLARE_UNIQUE_INDEX(pg_replication_origin_roiident_index, 6001, on pg_replicati DECLARE_UNIQUE_INDEX(pg_replication_origin_roname_index, 6002, on pg_replication_origin using btree(roname text_pattern_ops)); #define ReplicationOriginNameIndex 6002 -DECLARE_UNIQUE_INDEX(pg_tablesample_method_name_index, 3331, on pg_tablesample_method using btree(tsmname name_ops)); -#define TableSampleMethodNameIndexId 3331 -DECLARE_UNIQUE_INDEX(pg_tablesample_method_oid_index, 3332, on pg_tablesample_method using btree(oid oid_ops)); -#define TableSampleMethodOidIndexId 3332 - /* last step of initialization script: build the indexes declared above */ BUILD_INDICES diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 1d68ad7209e..09bf1439c46 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -3734,6 +3734,16 @@ DATA(insert OID = 3116 ( fdw_handler_in PGNSP PGUID 12 1 0 0 0 f f f f f f i 1 DESCR("I/O"); DATA(insert OID = 3117 ( fdw_handler_out PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 2275 "3115" _null_ _null_ _null_ _null_ _null_ fdw_handler_out _null_ _null_ _null_ )); DESCR("I/O"); +DATA(insert OID = 3311 ( tsm_handler_in PGNSP PGUID 12 1 0 0 0 f f f f f f i 1 0 3310 "2275" _null_ _null_ _null_ _null_ _null_ tsm_handler_in _null_ _null_ _null_ )); +DESCR("I/O"); +DATA(insert OID = 3312 ( tsm_handler_out PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 2275 "3310" _null_ _null_ _null_ _null_ _null_ tsm_handler_out _null_ _null_ _null_ )); +DESCR("I/O"); + +/* tablesample method handlers */ +DATA(insert OID = 3313 ( bernoulli PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 3310 "2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_handler _null_ _null_ _null_ )); +DESCR("BERNOULLI tablesample method handler"); +DATA(insert OID = 3314 ( system PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 3310 "2281" _null_ _null_ _null_ _null_ _null_ tsm_system_handler _null_ _null_ _null_ )); +DESCR("SYSTEM tablesample method handler"); /* cryptographic */ DATA(insert OID = 2311 ( md5 PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 25 "25" _null_ _null_ _null_ _null_ _null_ md5_text _null_ _null_ _null_ )); @@ -5321,33 +5331,6 @@ DESCR("get an individual replication origin's replication progress"); DATA(insert OID = 6014 ( pg_show_replication_origin_status PGNSP PGUID 12 1 100 0 0 f f f f f t v 0 0 2249 "" "{26,25,3220,3220}" "{o,o,o,o}" "{local_id, external_id, remote_lsn, local_lsn}" _null_ _null_ pg_show_replication_origin_status _null_ _null_ _null_ )); DESCR("get progress for all replication origins"); -/* tablesample */ -DATA(insert OID = 3335 ( tsm_system_init PGNSP PGUID 12 1 0 0 0 f f f f t f v 3 0 2278 "2281 23 700" _null_ _null_ _null_ _null_ _null_ tsm_system_init _null_ _null_ _null_ )); -DESCR("tsm_system_init(internal)"); -DATA(insert OID = 3336 ( tsm_system_nextblock PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 23 "2281 16" _null_ _null_ _null_ _null_ _null_ tsm_system_nextblock _null_ _null_ _null_ )); -DESCR("tsm_system_nextblock(internal)"); -DATA(insert OID = 3337 ( tsm_system_nexttuple PGNSP PGUID 12 1 0 0 0 f f f f t f v 4 0 21 "2281 23 21 16" _null_ _null_ _null_ _null_ _null_ tsm_system_nexttuple _null_ _null_ _null_ )); -DESCR("tsm_system_nexttuple(internal)"); -DATA(insert OID = 3338 ( tsm_system_end PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_system_end _null_ _null_ _null_ )); -DESCR("tsm_system_end(internal)"); -DATA(insert OID = 3339 ( tsm_system_reset PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_system_reset _null_ _null_ _null_ )); -DESCR("tsm_system_reset(internal)"); -DATA(insert OID = 3340 ( tsm_system_cost PGNSP PGUID 12 1 0 0 0 f f f f t f v 7 0 2278 "2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ _null_ tsm_system_cost _null_ _null_ _null_ )); -DESCR("tsm_system_cost(internal)"); - -DATA(insert OID = 3341 ( tsm_bernoulli_init PGNSP PGUID 12 1 0 0 0 f f f f t f v 3 0 2278 "2281 23 700" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_init _null_ _null_ _null_ )); -DESCR("tsm_bernoulli_init(internal)"); -DATA(insert OID = 3342 ( tsm_bernoulli_nextblock PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 23 "2281 16" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_nextblock _null_ _null_ _null_ )); -DESCR("tsm_bernoulli_nextblock(internal)"); -DATA(insert OID = 3343 ( tsm_bernoulli_nexttuple PGNSP PGUID 12 1 0 0 0 f f f f t f v 4 0 21 "2281 23 21 16" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_nexttuple _null_ _null_ _null_ )); -DESCR("tsm_bernoulli_nexttuple(internal)"); -DATA(insert OID = 3344 ( tsm_bernoulli_end PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_end _null_ _null_ _null_ )); -DESCR("tsm_bernoulli_end(internal)"); -DATA(insert OID = 3345 ( tsm_bernoulli_reset PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_reset _null_ _null_ _null_ )); -DESCR("tsm_bernoulli_reset(internal)"); -DATA(insert OID = 3346 ( tsm_bernoulli_cost PGNSP PGUID 12 1 0 0 0 f f f f t f v 7 0 2278 "2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_cost _null_ _null_ _null_ )); -DESCR("tsm_bernoulli_cost(internal)"); - /* * Symbolic values for provolatile column: these indicate whether the result * of a function is dependent *only* on the values of its explicit arguments, diff --git a/src/include/catalog/pg_tablesample_method.h b/src/include/catalog/pg_tablesample_method.h deleted file mode 100644 index b422414d080..00000000000 --- a/src/include/catalog/pg_tablesample_method.h +++ /dev/null @@ -1,81 +0,0 @@ -/*------------------------------------------------------------------------- - * - * pg_tablesample_method.h - * definition of the table scan methods. - * - * - * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * src/include/catalog/pg_tablesample_method.h - * - * - *------------------------------------------------------------------------- - */ -#ifndef PG_TABLESAMPLE_METHOD_H -#define PG_TABLESAMPLE_METHOD_H - -#include "catalog/genbki.h" -#include "catalog/objectaddress.h" - -/* ---------------- - * pg_tablesample_method definition. cpp turns this into - * typedef struct FormData_pg_tablesample_method - * ---------------- - */ -#define TableSampleMethodRelationId 3330 - -CATALOG(pg_tablesample_method,3330) -{ - NameData tsmname; /* tablesample method name */ - bool tsmseqscan; /* does this method scan whole table - * sequentially? */ - bool tsmpagemode; /* does this method scan page at a time? */ - regproc tsminit; /* init scan function */ - regproc tsmnextblock; /* function returning next block to sample or - * InvalidBlockOffset if finished */ - regproc tsmnexttuple; /* function returning next tuple offset from - * current block or InvalidOffsetNumber if end - * of the block was reacher */ - regproc tsmexaminetuple;/* optional function which can examine tuple - * contents and decide if tuple should be - * returned or not */ - regproc tsmend; /* end scan function */ - regproc tsmreset; /* reset state - used by rescan */ - regproc tsmcost; /* costing function */ -} FormData_pg_tablesample_method; - -/* ---------------- - * Form_pg_tablesample_method corresponds to a pointer to a tuple with - * the format of pg_tablesample_method relation. - * ---------------- - */ -typedef FormData_pg_tablesample_method *Form_pg_tablesample_method; - -/* ---------------- - * compiler constants for pg_tablesample_method - * ---------------- - */ -#define Natts_pg_tablesample_method 10 -#define Anum_pg_tablesample_method_tsmname 1 -#define Anum_pg_tablesample_method_tsmseqscan 2 -#define Anum_pg_tablesample_method_tsmpagemode 3 -#define Anum_pg_tablesample_method_tsminit 4 -#define Anum_pg_tablesample_method_tsmnextblock 5 -#define Anum_pg_tablesample_method_tsmnexttuple 6 -#define Anum_pg_tablesample_method_tsmexaminetuple 7 -#define Anum_pg_tablesample_method_tsmend 8 -#define Anum_pg_tablesample_method_tsmreset 9 -#define Anum_pg_tablesample_method_tsmcost 10 - -/* ---------------- - * initial contents of pg_tablesample_method - * ---------------- - */ - -DATA(insert OID = 3333 ( system false true tsm_system_init tsm_system_nextblock tsm_system_nexttuple - tsm_system_end tsm_system_reset tsm_system_cost )); -DESCR("SYSTEM table sampling method"); -DATA(insert OID = 3334 ( bernoulli true false tsm_bernoulli_init tsm_bernoulli_nextblock tsm_bernoulli_nexttuple - tsm_bernoulli_end tsm_bernoulli_reset tsm_bernoulli_cost )); -DESCR("BERNOULLI table sampling method"); - -#endif /* PG_TABLESAMPLE_METHOD_H */ diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h index da123f6c495..7dc95c8d2c6 100644 --- a/src/include/catalog/pg_type.h +++ b/src/include/catalog/pg_type.h @@ -694,6 +694,8 @@ DATA(insert OID = 3500 ( anyenum PGNSP PGUID 4 t p P f t \054 0 0 0 anyenum_in #define ANYENUMOID 3500 DATA(insert OID = 3115 ( fdw_handler PGNSP PGUID 4 t p P f t \054 0 0 0 fdw_handler_in fdw_handler_out - - - - - i p f 0 -1 0 0 _null_ _null_ _null_ )); #define FDW_HANDLEROID 3115 +DATA(insert OID = 3310 ( tsm_handler PGNSP PGUID 4 t p P f t \054 0 0 0 tsm_handler_in tsm_handler_out - - - - - i p f 0 -1 0 0 _null_ _null_ _null_ )); +#define TSM_HANDLEROID 3310 DATA(insert OID = 3831 ( anyrange PGNSP PGUID -1 f p P f t \054 0 0 0 anyrange_in anyrange_out - - - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); #define ANYRANGEOID 3831 diff --git a/src/include/executor/nodeSamplescan.h b/src/include/executor/nodeSamplescan.h index 4b769daec8b..a0cc6ce467a 100644 --- a/src/include/executor/nodeSamplescan.h +++ b/src/include/executor/nodeSamplescan.h @@ -4,7 +4,7 @@ * * * - * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/include/executor/nodeSamplescan.h diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 541ee187356..303fc3c1c77 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1257,13 +1257,22 @@ typedef struct ScanState */ typedef ScanState SeqScanState; -/* - * SampleScan +/* ---------------- + * SampleScanState information + * ---------------- */ typedef struct SampleScanState { ScanState ss; - struct TableSampleDesc *tsdesc; + List *args; /* expr states for TABLESAMPLE params */ + ExprState *repeatable; /* expr state for REPEATABLE expr */ + /* use struct pointer to avoid including tsmapi.h here */ + struct TsmRoutine *tsmroutine; /* descriptor for tablesample method */ + void *tsm_state; /* tablesample method can keep state here */ + bool use_bulkread; /* use bulkread buffer access strategy? */ + bool use_pagemode; /* use page-at-a-time visibility checking? */ + bool begun; /* false means need to call BeginSampleScan */ + uint32 seed; /* random seed */ } SampleScanState; /* diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index f8acda4eede..748e434a27a 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -51,6 +51,7 @@ typedef enum NodeTag T_BitmapOr, T_Scan, T_SeqScan, + T_SampleScan, T_IndexScan, T_IndexOnlyScan, T_BitmapIndexScan, @@ -61,7 +62,6 @@ typedef enum NodeTag T_ValuesScan, T_CteScan, T_WorkTableScan, - T_SampleScan, T_ForeignScan, T_CustomScan, T_Join, @@ -400,6 +400,7 @@ typedef enum NodeTag T_WindowDef, T_RangeSubselect, T_RangeFunction, + T_RangeTableSample, T_TypeName, T_ColumnDef, T_IndexElem, @@ -407,6 +408,7 @@ typedef enum NodeTag T_DefElem, T_RangeTblEntry, T_RangeTblFunction, + T_TableSampleClause, T_WithCheckOption, T_SortGroupClause, T_GroupingSet, @@ -425,8 +427,6 @@ typedef enum NodeTag T_OnConflictClause, T_CommonTableExpr, T_RoleSpec, - T_RangeTableSample, - T_TableSampleClause, /* * TAGS FOR REPLICATION GRAMMAR PARSE NODES (replnodes.h) @@ -452,7 +452,8 @@ typedef enum NodeTag T_WindowObjectData, /* private in nodeWindowAgg.c */ T_TIDBitmap, /* in nodes/tidbitmap.h */ T_InlineCodeBlock, /* in nodes/parsenodes.h */ - T_FdwRoutine /* in foreign/fdwapi.h */ + T_FdwRoutine, /* in foreign/fdwapi.h */ + T_TsmRoutine /* in access/tsmapi.h */ } NodeTag; /* diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index b336ff9c6ab..151c93a078e 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -338,26 +338,6 @@ typedef struct FuncCall } FuncCall; /* - * TableSampleClause - a sampling method information - */ -typedef struct TableSampleClause -{ - NodeTag type; - Oid tsmid; - bool tsmseqscan; - bool tsmpagemode; - Oid tsminit; - Oid tsmnextblock; - Oid tsmnexttuple; - Oid tsmexaminetuple; - Oid tsmend; - Oid tsmreset; - Oid tsmcost; - Node *repeatable; - List *args; -} TableSampleClause; - -/* * A_Star - '*' representing all columns of a table or compound field * * This can appear within ColumnRef.fields, A_Indirection.indirection, and @@ -558,19 +538,23 @@ typedef struct RangeFunction } RangeFunction; /* - * RangeTableSample - represents <table> TABLESAMPLE <method> (<params>) REPEATABLE (<num>) + * RangeTableSample - TABLESAMPLE appearing in a raw FROM clause * - * SQL Standard specifies only one parameter which is percentage. But we allow - * custom tablesample methods which may need different input arguments so we - * accept list of arguments. + * This node, appearing only in raw parse trees, represents + * <relation> TABLESAMPLE <method> (<params>) REPEATABLE (<num>) + * Currently, the <relation> can only be a RangeVar, but we might in future + * allow RangeSubselect and other options. Note that the RangeTableSample + * is wrapped around the node representing the <relation>, rather than being + * a subfield of it. */ typedef struct RangeTableSample { NodeTag type; - RangeVar *relation; - char *method; /* sampling method */ - Node *repeatable; - List *args; /* arguments for sampling method */ + Node *relation; /* relation to be sampled */ + List *method; /* sampling method name (possibly qualified) */ + List *args; /* argument(s) for sampling method */ + Node *repeatable; /* REPEATABLE expression, or NULL if none */ + int location; /* method name location, or -1 if unknown */ } RangeTableSample; /* @@ -810,7 +794,7 @@ typedef struct RangeTblEntry */ Oid relid; /* OID of the relation */ char relkind; /* relation kind (see pg_class.relkind) */ - TableSampleClause *tablesample; /* sampling method and parameters */ + struct TableSampleClause *tablesample; /* sampling info, or NULL */ /* * Fields valid for a subquery RTE (else NULL): @@ -913,6 +897,19 @@ typedef struct RangeTblFunction } RangeTblFunction; /* + * TableSampleClause - TABLESAMPLE appearing in a transformed FROM clause + * + * Unlike RangeTableSample, this is a subnode of the relevant RangeTblEntry. + */ +typedef struct TableSampleClause +{ + NodeTag type; + Oid tsmhandler; /* OID of the tablesample handler function */ + List *args; /* tablesample argument expression(s) */ + Expr *repeatable; /* REPEATABLE expression, or NULL if none */ +} TableSampleClause; + +/* * WithCheckOption - * representation of WITH CHECK OPTION checks to be applied to new tuples * when inserting/updating an auto-updatable view, or RLS WITH CHECK @@ -2520,7 +2517,7 @@ typedef struct RenameStmt typedef struct AlterObjectSchemaStmt { NodeTag type; - ObjectType objectType; /* OBJECT_TABLE, OBJECT_TYPE, etc */ + ObjectType objectType; /* OBJECT_TABLE, OBJECT_TYPE, etc */ RangeVar *relation; /* in case it's a table */ List *object; /* in case it's some other object */ List *objarg; /* argument types, if applicable */ @@ -2535,7 +2532,7 @@ typedef struct AlterObjectSchemaStmt typedef struct AlterOwnerStmt { NodeTag type; - ObjectType objectType; /* OBJECT_TABLE, OBJECT_TYPE, etc */ + ObjectType objectType; /* OBJECT_TABLE, OBJECT_TYPE, etc */ RangeVar *relation; /* in case it's a table */ List *object; /* in case it's some other object */ List *objarg; /* argument types, if applicable */ diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 5f538f3e8cc..0654d0266cd 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -287,7 +287,12 @@ typedef Scan SeqScan; * table sample scan node * ---------------- */ -typedef Scan SampleScan; +typedef struct SampleScan +{ + Scan scan; + /* use struct pointer to avoid including parsenodes.h here */ + struct TableSampleClause *tablesample; +} SampleScan; /* ---------------- * index scan node diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 24003ae3591..dd43e45d0c0 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -68,7 +68,8 @@ extern double index_pages_fetched(double tuples_fetched, BlockNumber pages, double index_pages, PlannerInfo *root); extern void cost_seqscan(Path *path, PlannerInfo *root, RelOptInfo *baserel, ParamPathInfo *param_info); -extern void cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel); +extern void cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel, + ParamPathInfo *param_info); extern void cost_index(IndexPath *path, PlannerInfo *root, double loop_count); extern void cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, diff --git a/src/include/parser/parse_func.h b/src/include/parser/parse_func.h index 3194da46394..32646918e20 100644 --- a/src/include/parser/parse_func.h +++ b/src/include/parser/parse_func.h @@ -33,11 +33,6 @@ typedef enum extern Node *ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, FuncCall *fn, int location); -extern TableSampleClause *ParseTableSample(ParseState *pstate, - char *samplemethod, - Node *repeatable, List *args, - int location); - extern FuncDetailCode func_get_detail(List *funcname, List *fargs, List *fargnames, int nargs, Oid *argtypes, diff --git a/src/include/port.h b/src/include/port.h index 71113c03944..3787cbfb761 100644 --- a/src/include/port.h +++ b/src/include/port.h @@ -357,10 +357,6 @@ extern off_t ftello(FILE *stream); #endif #endif -#define RAND48_SEED_0 (0x330e) -#define RAND48_SEED_1 (0xabcd) -#define RAND48_SEED_2 (0x1234) - extern double pg_erand48(unsigned short xseed[3]); extern long pg_lrand48(void); extern void pg_srand48(long seed); diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index fcb0bf0ce8e..49caa565574 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -566,6 +566,8 @@ extern Datum language_handler_in(PG_FUNCTION_ARGS); extern Datum language_handler_out(PG_FUNCTION_ARGS); extern Datum fdw_handler_in(PG_FUNCTION_ARGS); extern Datum fdw_handler_out(PG_FUNCTION_ARGS); +extern Datum tsm_handler_in(PG_FUNCTION_ARGS); +extern Datum tsm_handler_out(PG_FUNCTION_ARGS); extern Datum internal_in(PG_FUNCTION_ARGS); extern Datum internal_out(PG_FUNCTION_ARGS); extern Datum opaque_in(PG_FUNCTION_ARGS); @@ -1213,6 +1215,12 @@ extern Datum ginqueryarrayextract(PG_FUNCTION_ARGS); extern Datum ginarrayconsistent(PG_FUNCTION_ARGS); extern Datum ginarraytriconsistent(PG_FUNCTION_ARGS); +/* access/tablesample/bernoulli.c */ +extern Datum tsm_bernoulli_handler(PG_FUNCTION_ARGS); + +/* access/tablesample/system.c */ +extern Datum tsm_system_handler(PG_FUNCTION_ARGS); + /* access/transam/twophase.c */ extern Datum pg_prepared_xact(PG_FUNCTION_ARGS); diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index a40c9b12732..97115384329 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -156,7 +156,6 @@ extern void free_attstatsslot(Oid atttype, extern char *get_namespace_name(Oid nspid); extern char *get_namespace_name_or_temp(Oid nspid); extern Oid get_range_subtype(Oid rangeOid); -extern char *get_tablesample_method_name(Oid tsmid); #define type_is_array(typid) (get_element_type(typid) != InvalidOid) /* type_is_array_domain accepts both plain arrays and domains over arrays */ diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h index f06f03a996f..18404e266eb 100644 --- a/src/include/utils/syscache.h +++ b/src/include/utils/syscache.h @@ -81,8 +81,6 @@ enum SysCacheIdentifier REPLORIGNAME, RULERELNAME, STATRELATTINH, - TABLESAMPLEMETHODNAME, - TABLESAMPLEMETHODOID, TABLESPACEOID, TRFOID, TRFTYPELANG, diff --git a/src/port/erand48.c b/src/port/erand48.c index 12efd8193c4..9d471197c35 100644 --- a/src/port/erand48.c +++ b/src/port/erand48.c @@ -33,6 +33,9 @@ #include <math.h> +#define RAND48_SEED_0 (0x330e) +#define RAND48_SEED_1 (0xabcd) +#define RAND48_SEED_2 (0x1234) #define RAND48_MULT_0 (0xe66d) #define RAND48_MULT_1 (0xdeec) #define RAND48_MULT_2 (0x0005) diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out index 414299a6941..e7c242cd22d 100644 --- a/src/test/regress/expected/rowsecurity.out +++ b/src/test/regress/expected/rowsecurity.out @@ -101,15 +101,17 @@ NOTICE: f_leak => great manga 44 | 8 | 1 | rls_regress_user2 | great manga | manga (4 rows) -SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did; -NOTICE: f_leak => my first novel +-- try a sampled version +SELECT * FROM document TABLESAMPLE BERNOULLI(50) REPEATABLE(0) + WHERE f_leak(dtitle) ORDER BY did; NOTICE: f_leak => my first manga NOTICE: f_leak => great science fiction +NOTICE: f_leak => great manga did | cid | dlevel | dauthor | dtitle -----+-----+--------+-------------------+----------------------- - 1 | 11 | 1 | rls_regress_user1 | my first novel 4 | 44 | 1 | rls_regress_user1 | my first manga 6 | 22 | 1 | rls_regress_user2 | great science fiction + 8 | 44 | 1 | rls_regress_user2 | great manga (3 rows) -- viewpoint from rls_regress_user2 @@ -156,20 +158,20 @@ NOTICE: f_leak => great manga 44 | 8 | 1 | rls_regress_user2 | great manga | manga (8 rows) -SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did; -NOTICE: f_leak => my first novel -NOTICE: f_leak => my second novel +-- try a sampled version +SELECT * FROM document TABLESAMPLE BERNOULLI(50) REPEATABLE(0) + WHERE f_leak(dtitle) ORDER BY did; NOTICE: f_leak => my first manga +NOTICE: f_leak => my second manga NOTICE: f_leak => great science fiction -NOTICE: f_leak => great technology book +NOTICE: f_leak => great manga did | cid | dlevel | dauthor | dtitle -----+-----+--------+-------------------+----------------------- - 1 | 11 | 1 | rls_regress_user1 | my first novel - 2 | 11 | 2 | rls_regress_user1 | my second novel 4 | 44 | 1 | rls_regress_user1 | my first manga + 5 | 44 | 2 | rls_regress_user1 | my second manga 6 | 22 | 1 | rls_regress_user2 | great science fiction - 7 | 33 | 2 | rls_regress_user2 | great technology book -(5 rows) + 8 | 44 | 1 | rls_regress_user2 | great manga +(4 rows) EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle); QUERY PLAN diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index cd5337531d4..1e5b0b9a2c4 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -2202,6 +2202,10 @@ street| SELECT r.name, FROM ONLY road r, real_city c WHERE (c.outline ## r.thepath); +test_tablesample_v1| SELECT test_tablesample.id + FROM test_tablesample TABLESAMPLE system ((10 * 2)) REPEATABLE (2); +test_tablesample_v2| SELECT test_tablesample.id + FROM test_tablesample TABLESAMPLE system (99); toyemp| SELECT emp.name, emp.age, emp.location, diff --git a/src/test/regress/expected/sanity_check.out b/src/test/regress/expected/sanity_check.out index 14acd16da3b..eb0bc88ef1f 100644 --- a/src/test/regress/expected/sanity_check.out +++ b/src/test/regress/expected/sanity_check.out @@ -128,7 +128,6 @@ pg_shdepend|t pg_shdescription|t pg_shseclabel|t pg_statistic|t -pg_tablesample_method|t pg_tablespace|t pg_transform|t pg_trigger|t diff --git a/src/test/regress/expected/tablesample.out b/src/test/regress/expected/tablesample.out index 04e5eb8b807..727a8354397 100644 --- a/src/test/regress/expected/tablesample.out +++ b/src/test/regress/expected/tablesample.out @@ -1,107 +1,123 @@ -CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages -INSERT INTO test_tablesample SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i) ORDER BY i; -SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10); +CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); +-- use fillfactor so we don't have to load too much data to get multiple pages +INSERT INTO test_tablesample + SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i); +SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (0); id ---- - 0 - 1 - 2 3 4 5 - 9 -(7 rows) - -SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (9999); - id ----- 6 7 8 -(3 rows) +(6 rows) -SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100); - count -------- - 10 -(1 row) +SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (0); + id +---- +(0 rows) -SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100); +SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0); id ---- - 0 - 1 - 2 + 3 + 4 + 5 6 7 8 - 9 -(7 rows) +(6 rows) -SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (100); +SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (0); id ---- - 0 - 1 - 3 4 5 + 6 + 7 + 8 (5 rows) -SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1); +SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (0); id ---- - 0 - 5 -(2 rows) + 7 +(1 row) -CREATE VIEW test_tablesample_v1 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2); -CREATE VIEW test_tablesample_v2 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99); -SELECT pg_get_viewdef('test_tablesample_v1'::regclass); - pg_get_viewdef --------------------------------------------------------------------------------- - SELECT test_tablesample.id + - FROM test_tablesample TABLESAMPLE system (((10 * 2))::real) REPEATABLE (2); +-- 100% should give repeatable count results (ie, all rows) in any case +SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100); + count +------- + 10 (1 row) -SELECT pg_get_viewdef('test_tablesample_v2'::regclass); - pg_get_viewdef ------------------------------------------------------------ - SELECT test_tablesample.id + - FROM test_tablesample TABLESAMPLE system ((99)::real); +SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100) REPEATABLE (1+2); + count +------- + 10 +(1 row) + +SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100) REPEATABLE (0.4); + count +------- + 10 (1 row) +CREATE VIEW test_tablesample_v1 AS + SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2); +CREATE VIEW test_tablesample_v2 AS + SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99); +\d+ test_tablesample_v1 + View "public.test_tablesample_v1" + Column | Type | Modifiers | Storage | Description +--------+---------+-----------+---------+------------- + id | integer | | plain | +View definition: + SELECT test_tablesample.id + FROM test_tablesample TABLESAMPLE system ((10 * 2)) REPEATABLE (2); + +\d+ test_tablesample_v2 + View "public.test_tablesample_v2" + Column | Type | Modifiers | Storage | Description +--------+---------+-----------+---------+------------- + id | integer | | plain | +View definition: + SELECT test_tablesample.id + FROM test_tablesample TABLESAMPLE system (99); + +-- check a sampled query doesn't affect cursor in progress BEGIN; -DECLARE tablesample_cur CURSOR FOR SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100); +DECLARE tablesample_cur CURSOR FOR + SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0); FETCH FIRST FROM tablesample_cur; id ---- - 0 + 3 (1 row) FETCH NEXT FROM tablesample_cur; id ---- - 1 + 4 (1 row) FETCH NEXT FROM tablesample_cur; id ---- - 2 + 5 (1 row) -SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10); +SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0); id ---- - 0 - 1 - 2 3 4 5 - 9 -(7 rows) + 6 + 7 + 8 +(6 rows) FETCH NEXT FROM tablesample_cur; id @@ -124,19 +140,19 @@ FETCH NEXT FROM tablesample_cur; FETCH FIRST FROM tablesample_cur; id ---- - 0 + 3 (1 row) FETCH NEXT FROM tablesample_cur; id ---- - 1 + 4 (1 row) FETCH NEXT FROM tablesample_cur; id ---- - 2 + 5 (1 row) FETCH NEXT FROM tablesample_cur; @@ -159,41 +175,129 @@ FETCH NEXT FROM tablesample_cur; CLOSE tablesample_cur; END; -EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10); - QUERY PLAN -------------------------------------------------------------------------------- - Sample Scan (system) on test_tablesample (cost=0.00..26.35 rows=635 width=4) +EXPLAIN (COSTS OFF) + SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (2); + QUERY PLAN +-------------------------------------------------------------------- + Sample Scan on test_tablesample + Sampling: system ('50'::real) REPEATABLE ('2'::double precision) +(2 rows) + +EXPLAIN (COSTS OFF) + SELECT * FROM test_tablesample_v1; + QUERY PLAN +-------------------------------------------------------------------- + Sample Scan on test_tablesample + Sampling: system ('20'::real) REPEATABLE ('2'::double precision) +(2 rows) + +-- check inheritance behavior +explain (costs off) + select count(*) from person tablesample bernoulli (100); + QUERY PLAN +------------------------------------------------- + Aggregate + -> Append + -> Sample Scan on person + Sampling: bernoulli ('100'::real) + -> Sample Scan on emp + Sampling: bernoulli ('100'::real) + -> Sample Scan on student + Sampling: bernoulli ('100'::real) + -> Sample Scan on stud_emp + Sampling: bernoulli ('100'::real) +(10 rows) + +select count(*) from person tablesample bernoulli (100); + count +------- + 58 (1 row) -EXPLAIN SELECT * FROM test_tablesample_v1; - QUERY PLAN -------------------------------------------------------------------------------- - Sample Scan (system) on test_tablesample (cost=0.00..10.54 rows=254 width=4) +select count(*) from person; + count +------- + 58 +(1 row) + +-- check that collations get assigned within the tablesample arguments +SELECT count(*) FROM test_tablesample TABLESAMPLE bernoulli (('1'::text < '0'::text)::int); + count +------- + 0 +(1 row) + +-- check behavior during rescans, as well as correct handling of min/max pct +select * from + (values (0),(100)) v(pct), + lateral (select count(*) from tenk1 tablesample bernoulli (pct)) ss; + pct | count +-----+------- + 0 | 0 + 100 | 10000 +(2 rows) + +select * from + (values (0),(100)) v(pct), + lateral (select count(*) from tenk1 tablesample system (pct)) ss; + pct | count +-----+------- + 0 | 0 + 100 | 10000 +(2 rows) + +explain (costs off) +select pct, count(unique1) from + (values (0),(100)) v(pct), + lateral (select * from tenk1 tablesample bernoulli (pct)) ss + group by pct; + QUERY PLAN +-------------------------------------------------------- + HashAggregate + Group Key: "*VALUES*".column1 + -> Nested Loop + -> Values Scan on "*VALUES*" + -> Sample Scan on tenk1 + Sampling: bernoulli ("*VALUES*".column1) +(6 rows) + +select pct, count(unique1) from + (values (0),(100)) v(pct), + lateral (select * from tenk1 tablesample bernoulli (pct)) ss + group by pct; + pct | count +-----+------- + 100 | 10000 +(1 row) + +select pct, count(unique1) from + (values (0),(100)) v(pct), + lateral (select * from tenk1 tablesample system (pct)) ss + group by pct; + pct | count +-----+------- + 100 | 10000 (1 row) -- errors SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1); -ERROR: tablesample method "foobar" does not exist +ERROR: tablesample method foobar does not exist LINE 1: SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1); - ^ + ^ +SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (NULL); +ERROR: TABLESAMPLE parameter cannot be null SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (NULL); -ERROR: REPEATABLE clause must be NOT NULL numeric value -LINE 1: ... test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (NULL); - ^ +ERROR: TABLESAMPLE REPEATABLE parameter cannot be null SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (-1); -ERROR: invalid sample size -HINT: Sample size must be numeric value between 0 and 100 (inclusive). +ERROR: sample percentage must be between 0 and 100 SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (200); -ERROR: invalid sample size -HINT: Sample size must be numeric value between 0 and 100 (inclusive). +ERROR: sample percentage must be between 0 and 100 SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (-1); -ERROR: invalid sample size -HINT: Sample size must be numeric value between 0 and 100 (inclusive). +ERROR: sample percentage must be between 0 and 100 SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (200); -ERROR: invalid sample size -HINT: Sample size must be numeric value between 0 and 100 (inclusive). +ERROR: sample percentage must be between 0 and 100 SELECT id FROM test_tablesample_v1 TABLESAMPLE BERNOULLI (1); -ERROR: TABLESAMPLE clause can only be used on tables and materialized views +ERROR: TABLESAMPLE clause can only be applied to tables and materialized views LINE 1: SELECT id FROM test_tablesample_v1 TABLESAMPLE BERNOULLI (1)... ^ INSERT INTO test_tablesample_v1 VALUES(1); @@ -202,30 +306,10 @@ DETAIL: Views containing TABLESAMPLE are not automatically updatable. HINT: To enable inserting into the view, provide an INSTEAD OF INSERT trigger or an unconditional ON INSERT DO INSTEAD rule. WITH query_select AS (SELECT * FROM test_tablesample) SELECT * FROM query_select TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1); -ERROR: TABLESAMPLE clause can only be used on tables and materialized views +ERROR: TABLESAMPLE clause can only be applied to tables and materialized views LINE 2: SELECT * FROM query_select TABLESAMPLE BERNOULLI (5.5) REPEA... ^ SELECT q.* FROM (SELECT * FROM test_tablesample) as q TABLESAMPLE BERNOULLI (5); ERROR: syntax error at or near "TABLESAMPLE" LINE 1: ...CT q.* FROM (SELECT * FROM test_tablesample) as q TABLESAMPL... ^ --- catalog sanity -SELECT * -FROM pg_tablesample_method -WHERE tsminit IS NULL - OR tsmseqscan IS NULL - OR tsmpagemode IS NULL - OR tsmnextblock IS NULL - OR tsmnexttuple IS NULL - OR tsmend IS NULL - OR tsmreset IS NULL - OR tsmcost IS NULL; - tsmname | tsmseqscan | tsmpagemode | tsminit | tsmnextblock | tsmnexttuple | tsmexaminetuple | tsmend | tsmreset | tsmcost ----------+------------+-------------+---------+--------------+--------------+-----------------+--------+----------+--------- -(0 rows) - --- done -DROP TABLE test_tablesample CASCADE; -NOTICE: drop cascades to 2 other objects -DETAIL: drop cascades to view test_tablesample_v1 -drop cascades to view test_tablesample_v2 diff --git a/src/test/regress/output/misc.source b/src/test/regress/output/misc.source index 70c9cc356a6..9eedb363d06 100644 --- a/src/test/regress/output/misc.source +++ b/src/test/regress/output/misc.source @@ -686,6 +686,9 @@ SELECT user_relns() AS user_relns test_range_excl test_range_gist test_range_spgist + test_tablesample + test_tablesample_v1 + test_tablesample_v2 test_tsvector testjsonb text_tbl @@ -705,7 +708,7 @@ SELECT user_relns() AS user_relns tvvmv varchar_tbl xacttest -(127 rows) +(130 rows) SELECT name(equipment(hobby_construct(text 'skywalking', text 'mer'))); name diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index 3a607cff46c..15d74d4e6eb 100644 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -110,6 +110,7 @@ test: lock test: replica_identity test: rowsecurity test: object_address +test: tablesample test: alter_generic test: alter_operator test: misc @@ -156,4 +157,3 @@ test: with test: xml test: event_trigger test: stats -test: tablesample diff --git a/src/test/regress/sql/rowsecurity.sql b/src/test/regress/sql/rowsecurity.sql index 039070b85b7..e86f8143142 100644 --- a/src/test/regress/sql/rowsecurity.sql +++ b/src/test/regress/sql/rowsecurity.sql @@ -94,14 +94,18 @@ SET row_security TO ON; SELECT * FROM document WHERE f_leak(dtitle) ORDER BY did; SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle) ORDER BY did; -SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did; +-- try a sampled version +SELECT * FROM document TABLESAMPLE BERNOULLI(50) REPEATABLE(0) + WHERE f_leak(dtitle) ORDER BY did; -- viewpoint from rls_regress_user2 SET SESSION AUTHORIZATION rls_regress_user2; SELECT * FROM document WHERE f_leak(dtitle) ORDER BY did; SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle) ORDER BY did; -SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did; +-- try a sampled version +SELECT * FROM document TABLESAMPLE BERNOULLI(50) REPEATABLE(0) + WHERE f_leak(dtitle) ORDER BY did; EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle); EXPLAIN (COSTS OFF) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle); diff --git a/src/test/regress/sql/tablesample.sql b/src/test/regress/sql/tablesample.sql index 7b3eb9bedf7..eec97934966 100644 --- a/src/test/regress/sql/tablesample.sql +++ b/src/test/regress/sql/tablesample.sql @@ -1,26 +1,37 @@ -CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages +CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); +-- use fillfactor so we don't have to load too much data to get multiple pages -INSERT INTO test_tablesample SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i) ORDER BY i; +INSERT INTO test_tablesample + SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i); -SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10); -SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (9999); +SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (0); +SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (0); +SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0); +SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (0); +SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (0); + +-- 100% should give repeatable count results (ie, all rows) in any case SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100); -SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100); -SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (100); -SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1); +SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100) REPEATABLE (1+2); +SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100) REPEATABLE (0.4); -CREATE VIEW test_tablesample_v1 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2); -CREATE VIEW test_tablesample_v2 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99); -SELECT pg_get_viewdef('test_tablesample_v1'::regclass); -SELECT pg_get_viewdef('test_tablesample_v2'::regclass); +CREATE VIEW test_tablesample_v1 AS + SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2); +CREATE VIEW test_tablesample_v2 AS + SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99); +\d+ test_tablesample_v1 +\d+ test_tablesample_v2 +-- check a sampled query doesn't affect cursor in progress BEGIN; -DECLARE tablesample_cur CURSOR FOR SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100); +DECLARE tablesample_cur CURSOR FOR + SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0); + FETCH FIRST FROM tablesample_cur; FETCH NEXT FROM tablesample_cur; FETCH NEXT FROM tablesample_cur; -SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10); +SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0); FETCH NEXT FROM tablesample_cur; FETCH NEXT FROM tablesample_cur; @@ -36,12 +47,45 @@ FETCH NEXT FROM tablesample_cur; CLOSE tablesample_cur; END; -EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10); -EXPLAIN SELECT * FROM test_tablesample_v1; +EXPLAIN (COSTS OFF) + SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (2); +EXPLAIN (COSTS OFF) + SELECT * FROM test_tablesample_v1; + +-- check inheritance behavior +explain (costs off) + select count(*) from person tablesample bernoulli (100); +select count(*) from person tablesample bernoulli (100); +select count(*) from person; + +-- check that collations get assigned within the tablesample arguments +SELECT count(*) FROM test_tablesample TABLESAMPLE bernoulli (('1'::text < '0'::text)::int); + +-- check behavior during rescans, as well as correct handling of min/max pct +select * from + (values (0),(100)) v(pct), + lateral (select count(*) from tenk1 tablesample bernoulli (pct)) ss; +select * from + (values (0),(100)) v(pct), + lateral (select count(*) from tenk1 tablesample system (pct)) ss; +explain (costs off) +select pct, count(unique1) from + (values (0),(100)) v(pct), + lateral (select * from tenk1 tablesample bernoulli (pct)) ss + group by pct; +select pct, count(unique1) from + (values (0),(100)) v(pct), + lateral (select * from tenk1 tablesample bernoulli (pct)) ss + group by pct; +select pct, count(unique1) from + (values (0),(100)) v(pct), + lateral (select * from tenk1 tablesample system (pct)) ss + group by pct; -- errors SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1); +SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (NULL); SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (NULL); SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (-1); @@ -56,19 +100,3 @@ WITH query_select AS (SELECT * FROM test_tablesample) SELECT * FROM query_select TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1); SELECT q.* FROM (SELECT * FROM test_tablesample) as q TABLESAMPLE BERNOULLI (5); - --- catalog sanity - -SELECT * -FROM pg_tablesample_method -WHERE tsminit IS NULL - OR tsmseqscan IS NULL - OR tsmpagemode IS NULL - OR tsmnextblock IS NULL - OR tsmnexttuple IS NULL - OR tsmend IS NULL - OR tsmreset IS NULL - OR tsmcost IS NULL; - --- done -DROP TABLE test_tablesample CASCADE; |