diff options
Diffstat (limited to 'src/backend/access/tablesample')
-rw-r--r-- | src/backend/access/tablesample/Makefile | 17 | ||||
-rw-r--r-- | src/backend/access/tablesample/bernoulli.c | 235 | ||||
-rw-r--r-- | src/backend/access/tablesample/system.c | 186 | ||||
-rw-r--r-- | src/backend/access/tablesample/tablesample.c | 368 |
4 files changed, 806 insertions, 0 deletions
diff --git a/src/backend/access/tablesample/Makefile b/src/backend/access/tablesample/Makefile new file mode 100644 index 00000000000..46eeb59f9c4 --- /dev/null +++ b/src/backend/access/tablesample/Makefile @@ -0,0 +1,17 @@ +#------------------------------------------------------------------------- +# +# Makefile-- +# Makefile for utils/tablesample +# +# IDENTIFICATION +# src/backend/utils/tablesample/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/backend/access/tablesample +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global + +OBJS = tablesample.o system.o bernoulli.o + +include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/access/tablesample/bernoulli.c b/src/backend/access/tablesample/bernoulli.c new file mode 100644 index 00000000000..c91f3f593e5 --- /dev/null +++ b/src/backend/access/tablesample/bernoulli.c @@ -0,0 +1,235 @@ +/*------------------------------------------------------------------------- + * + * bernoulli.c + * interface routines for BERNOULLI tablesample method + * + * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/tablesample/bernoulli.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "fmgr.h" + +#include "access/tablesample.h" +#include "access/relscan.h" +#include "nodes/execnodes.h" +#include "nodes/relation.h" +#include "optimizer/clauses.h" +#include "storage/bufmgr.h" +#include "utils/sampling.h" + + +/* tsdesc */ +typedef struct +{ + uint32 seed; /* random seed */ + BlockNumber startblock; /* starting block, we use ths for syncscan support */ + BlockNumber nblocks; /* number of blocks */ + BlockNumber blockno; /* current block */ + float4 probability; /* probabilty that tuple will be returned (0.0-1.0) */ + OffsetNumber lt; /* last tuple returned from current block */ + SamplerRandomState randstate; /* random generator tsdesc */ +} BernoulliSamplerData; + +/* + * Initialize the state. + */ +Datum +tsm_bernoulli_init(PG_FUNCTION_ARGS) +{ + TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); + uint32 seed = PG_GETARG_UINT32(1); + float4 percent = PG_ARGISNULL(2) ? -1 : PG_GETARG_FLOAT4(2); + HeapScanDesc scan = tsdesc->heapScan; + BernoulliSamplerData *sampler; + + if (percent < 0 || percent > 100) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("invalid sample size"), + errhint("Sample size must be numeric value between 0 and 100 (inclusive)."))); + + sampler = palloc0(sizeof(BernoulliSamplerData)); + + /* Remember initial values for reinit */ + sampler->seed = seed; + sampler->startblock = scan->rs_startblock; + sampler->nblocks = scan->rs_nblocks; + sampler->blockno = InvalidBlockNumber; + sampler->probability = percent / 100; + sampler->lt = InvalidOffsetNumber; + sampler_random_init_state(sampler->seed, sampler->randstate); + + tsdesc->tsmdata = (void *) sampler; + + PG_RETURN_VOID(); +} + +/* + * Get next block number to read or InvalidBlockNumber if we are at the + * end of the relation. + */ +Datum +tsm_bernoulli_nextblock(PG_FUNCTION_ARGS) +{ + TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); + BernoulliSamplerData *sampler = + (BernoulliSamplerData *) tsdesc->tsmdata; + + /* + * Bernoulli sampling scans all blocks on the table and supports + * syncscan so loop from startblock to startblock instead of + * from 0 to nblocks. + */ + if (sampler->blockno == InvalidBlockNumber) + sampler->blockno = sampler->startblock; + else + { + sampler->blockno++; + + if (sampler->blockno >= sampler->nblocks) + sampler->blockno = 0; + + if (sampler->blockno == sampler->startblock) + PG_RETURN_UINT32(InvalidBlockNumber); + } + + PG_RETURN_UINT32(sampler->blockno); +} + +/* + * Get next tuple from current block. + * + * This method implements the main logic in bernoulli sampling. + * The algorithm simply generates new random number (in 0.0-1.0 range) and if + * it falls within user specified probability (in the same range) return the + * tuple offset. + * + * It is ok here to return tuple offset without knowing if tuple is visible + * and not check it via examinetuple. The reason for that is that we do the + * coinflip (random number generation) for every tuple in the table. Since all + * tuples have same probability of being returned the visible and invisible + * tuples will be returned in same ratio as they have in the actual table. + * This means that there is no skew towards either visible or invisible tuples + * and the number returned visible tuples to from the executor node is the + * fraction of visible tuples which was specified in input. + * + * This is faster than doing the coinflip in the examinetuple because we don't + * have to do visibility checks on uninteresting tuples. + * + * If we reach end of the block return InvalidOffsetNumber which tells + * SampleScan to go to next block. + */ +Datum +tsm_bernoulli_nexttuple(PG_FUNCTION_ARGS) +{ + TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); + OffsetNumber maxoffset = PG_GETARG_UINT16(2); + BernoulliSamplerData *sampler = + (BernoulliSamplerData *) tsdesc->tsmdata; + OffsetNumber tupoffset = sampler->lt; + float4 probability = sampler->probability; + + if (tupoffset == InvalidOffsetNumber) + tupoffset = FirstOffsetNumber; + else + tupoffset++; + + /* + * Loop over tuple offsets until the random generator returns value that + * is within the probability of returning the tuple or until we reach + * end of the block. + * + * (This is our implementation of bernoulli trial) + */ + while (sampler_random_fract(sampler->randstate) > probability) + { + tupoffset++; + + if (tupoffset > maxoffset) + break; + } + + if (tupoffset > maxoffset) + /* Tell SampleScan that we want next block. */ + tupoffset = InvalidOffsetNumber; + + sampler->lt = tupoffset; + + PG_RETURN_UINT16(tupoffset); +} + +/* + * Cleanup method. + */ +Datum +tsm_bernoulli_end(PG_FUNCTION_ARGS) +{ + TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); + + pfree(tsdesc->tsmdata); + + PG_RETURN_VOID(); +} + +/* + * Reset tsdesc (called by ReScan). + */ +Datum +tsm_bernoulli_reset(PG_FUNCTION_ARGS) +{ + TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); + BernoulliSamplerData *sampler = + (BernoulliSamplerData *) tsdesc->tsmdata; + + sampler->blockno = InvalidBlockNumber; + sampler->lt = InvalidOffsetNumber; + sampler_random_init_state(sampler->seed, sampler->randstate); + + PG_RETURN_VOID(); +} + +/* + * Costing function. + */ +Datum +tsm_bernoulli_cost(PG_FUNCTION_ARGS) +{ + PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); + Path *path = (Path *) PG_GETARG_POINTER(1); + RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2); + List *args = (List *) PG_GETARG_POINTER(3); + BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4); + double *tuples = (double *) PG_GETARG_POINTER(5); + Node *pctnode; + float4 samplesize; + + *pages = baserel->pages; + + pctnode = linitial(args); + pctnode = estimate_expression_value(root, pctnode); + + if (IsA(pctnode, RelabelType)) + pctnode = (Node *) ((RelabelType *) pctnode)->arg; + + if (IsA(pctnode, Const)) + { + samplesize = DatumGetFloat4(((Const *) pctnode)->constvalue); + samplesize /= 100.0; + } + else + { + /* Default samplesize if the estimation didn't return Const. */ + samplesize = 0.1f; + } + + *tuples = path->rows * samplesize; + path->rows = *tuples; + + PG_RETURN_VOID(); +} diff --git a/src/backend/access/tablesample/system.c b/src/backend/access/tablesample/system.c new file mode 100644 index 00000000000..1412e511faf --- /dev/null +++ b/src/backend/access/tablesample/system.c @@ -0,0 +1,186 @@ +/*------------------------------------------------------------------------- + * + * system.c + * interface routines for system tablesample method + * + * + * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/tablesample/system.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "fmgr.h" + +#include "access/tablesample.h" +#include "access/relscan.h" +#include "nodes/execnodes.h" +#include "nodes/relation.h" +#include "optimizer/clauses.h" +#include "storage/bufmgr.h" +#include "utils/sampling.h" + + +/* + * State + */ +typedef struct +{ + BlockSamplerData bs; + uint32 seed; /* random seed */ + BlockNumber nblocks; /* number of block in relation */ + int samplesize; /* number of blocks to return */ + OffsetNumber lt; /* last tuple returned from current block */ +} SystemSamplerData; + + +/* + * Initializes the state. + */ +Datum +tsm_system_init(PG_FUNCTION_ARGS) +{ + TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); + uint32 seed = PG_GETARG_UINT32(1); + float4 percent = PG_ARGISNULL(2) ? -1 : PG_GETARG_FLOAT4(2); + HeapScanDesc scan = tsdesc->heapScan; + SystemSamplerData *sampler; + + if (percent < 0 || percent > 100) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("invalid sample size"), + errhint("Sample size must be numeric value between 0 and 100 (inclusive)."))); + + sampler = palloc0(sizeof(SystemSamplerData)); + + /* Remember initial values for reinit */ + sampler->seed = seed; + sampler->nblocks = scan->rs_nblocks; + sampler->samplesize = 1 + (int) (sampler->nblocks * (percent / 100.0)); + sampler->lt = InvalidOffsetNumber; + + BlockSampler_Init(&sampler->bs, sampler->nblocks, sampler->samplesize, + sampler->seed); + + tsdesc->tsmdata = (void *) sampler; + + PG_RETURN_VOID(); +} + +/* + * Get next block number or InvalidBlockNumber when we're done. + * + * Uses the same logic as ANALYZE for picking the random blocks. + */ +Datum +tsm_system_nextblock(PG_FUNCTION_ARGS) +{ + TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); + SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata; + BlockNumber blockno; + + if (!BlockSampler_HasMore(&sampler->bs)) + PG_RETURN_UINT32(InvalidBlockNumber); + + blockno = BlockSampler_Next(&sampler->bs); + + PG_RETURN_UINT32(blockno); +} + +/* + * Get next tuple offset in current block or InvalidOffsetNumber if we are done + * with this block. + */ +Datum +tsm_system_nexttuple(PG_FUNCTION_ARGS) +{ + TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); + OffsetNumber maxoffset = PG_GETARG_UINT16(2); + SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata; + OffsetNumber tupoffset = sampler->lt; + + if (tupoffset == InvalidOffsetNumber) + tupoffset = FirstOffsetNumber; + else + tupoffset++; + + if (tupoffset > maxoffset) + tupoffset = InvalidOffsetNumber; + + sampler->lt = tupoffset; + + PG_RETURN_UINT16(tupoffset); +} + +/* + * Cleanup method. + */ +Datum +tsm_system_end(PG_FUNCTION_ARGS) +{ + TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); + + pfree(tsdesc->tsmdata); + + PG_RETURN_VOID(); +} + +/* + * Reset state (called by ReScan). + */ +Datum +tsm_system_reset(PG_FUNCTION_ARGS) +{ + TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); + SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata; + + sampler->lt = InvalidOffsetNumber; + BlockSampler_Init(&sampler->bs, sampler->nblocks, sampler->samplesize, + sampler->seed); + + PG_RETURN_VOID(); +} + +/* + * Costing function. + */ +Datum +tsm_system_cost(PG_FUNCTION_ARGS) +{ + PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); + Path *path = (Path *) PG_GETARG_POINTER(1); + RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2); + List *args = (List *) PG_GETARG_POINTER(3); + BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4); + double *tuples = (double *) PG_GETARG_POINTER(5); + Node *pctnode; + float4 samplesize; + + pctnode = linitial(args); + pctnode = estimate_expression_value(root, pctnode); + + if (IsA(pctnode, RelabelType)) + pctnode = (Node *) ((RelabelType *) pctnode)->arg; + + if (IsA(pctnode, Const)) + { + samplesize = DatumGetFloat4(((Const *) pctnode)->constvalue); + samplesize /= 100.0; + } + else + { + /* Default samplesize if the estimation didn't return Const. */ + samplesize = 0.1f; + } + + *pages = baserel->pages * samplesize; + *tuples = path->rows * samplesize; + path->rows = *tuples; + + PG_RETURN_VOID(); +} diff --git a/src/backend/access/tablesample/tablesample.c b/src/backend/access/tablesample/tablesample.c new file mode 100644 index 00000000000..ef55d062e75 --- /dev/null +++ b/src/backend/access/tablesample/tablesample.c @@ -0,0 +1,368 @@ +/*------------------------------------------------------------------------- + * + * tablesample.c + * TABLESAMPLE internal API + * + * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/access/tablesample/tablesample.c + * + * TABLESAMPLE is the SQL standard clause for sampling the relations. + * + * The API is interface between the Executor and the TABLESAMPLE Methods. + * + * TABLESAMPLE Methods are implementations of actual sampling algorithms which + * can be used for returning a sample of the source relation. + * Methods don't read the table directly but are asked for block number and + * tuple offset which they want to examine (or return) and the tablesample + * interface implemented here does the reading for them. + * + * We currently only support sampling of the physical relations, but in the + * future we might extend the API to support subqueries as well. + * + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/tablesample.h" + +#include "catalog/pg_tablesample_method.h" +#include "miscadmin.h" +#include "pgstat.h" +#include "storage/bufmgr.h" +#include "storage/predicate.h" +#include "utils/rel.h" +#include "utils/tqual.h" + + +static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan); + + +/* + * Initialize the TABLESAMPLE Descriptor and the TABLESAMPLE Method. + */ +TableSampleDesc * +tablesample_init(SampleScanState *scanstate, TableSampleClause *tablesample) +{ + FunctionCallInfoData fcinfo; + int i; + List *args = tablesample->args; + ListCell *arg; + ExprContext *econtext = scanstate->ss.ps.ps_ExprContext; + TableSampleDesc *tsdesc = (TableSampleDesc *) palloc0(sizeof(TableSampleDesc)); + + /* Load functions */ + fmgr_info(tablesample->tsminit, &(tsdesc->tsminit)); + fmgr_info(tablesample->tsmnextblock, &(tsdesc->tsmnextblock)); + fmgr_info(tablesample->tsmnexttuple, &(tsdesc->tsmnexttuple)); + if (OidIsValid(tablesample->tsmexaminetuple)) + fmgr_info(tablesample->tsmexaminetuple, &(tsdesc->tsmexaminetuple)); + else + tsdesc->tsmexaminetuple.fn_oid = InvalidOid; + fmgr_info(tablesample->tsmreset, &(tsdesc->tsmreset)); + fmgr_info(tablesample->tsmend, &(tsdesc->tsmend)); + + InitFunctionCallInfoData(fcinfo, &tsdesc->tsminit, + list_length(args) + 2, + InvalidOid, NULL, NULL); + + tsdesc->tupDesc = scanstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor; + tsdesc->heapScan = scanstate->ss.ss_currentScanDesc; + + /* First argument for init function is always TableSampleDesc */ + fcinfo.arg[0] = PointerGetDatum(tsdesc); + fcinfo.argnull[0] = false; + + /* + * Second arg for init function is always REPEATABLE + * When tablesample->repeatable is NULL then REPEATABLE clause was not + * specified. + * When specified, the expression cannot evaluate to NULL. + */ + if (tablesample->repeatable) + { + ExprState *argstate = ExecInitExpr((Expr *) tablesample->repeatable, + (PlanState *) scanstate); + fcinfo.arg[1] = ExecEvalExpr(argstate, econtext, + &fcinfo.argnull[1], NULL); + if (fcinfo.argnull[1]) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("REPEATABLE clause must be NOT NULL numeric value"))); + } + else + { + fcinfo.arg[1] = UInt32GetDatum(random()); + fcinfo.argnull[1] = false; + } + + /* Rest of the arguments come from user. */ + i = 2; + foreach(arg, args) + { + Expr *argexpr = (Expr *) lfirst(arg); + ExprState *argstate = ExecInitExpr(argexpr, (PlanState *) scanstate); + + if (argstate == NULL) + { + fcinfo.argnull[i] = true; + fcinfo.arg[i] = (Datum) 0;; + } + + fcinfo.arg[i] = ExecEvalExpr(argstate, econtext, + &fcinfo.argnull[i], NULL); + i++; + } + Assert(i == fcinfo.nargs); + + (void) FunctionCallInvoke(&fcinfo); + + return tsdesc; +} + +/* + * Get next tuple from TABLESAMPLE Method. + */ +HeapTuple +tablesample_getnext(TableSampleDesc *desc) +{ + HeapScanDesc scan = desc->heapScan; + HeapTuple tuple = &(scan->rs_ctup); + bool pagemode = scan->rs_pageatatime; + BlockNumber blockno; + Page page; + bool page_all_visible; + ItemId itemid; + OffsetNumber tupoffset, + maxoffset; + + if (!scan->rs_inited) + { + /* + * return null immediately if relation is empty + */ + if (scan->rs_nblocks == 0) + { + Assert(!BufferIsValid(scan->rs_cbuf)); + tuple->t_data = NULL; + return NULL; + } + blockno = DatumGetInt32(FunctionCall1(&desc->tsmnextblock, + PointerGetDatum(desc))); + if (!BlockNumberIsValid(blockno)) + { + tuple->t_data = NULL; + return NULL; + } + + heapgetpage(scan, blockno); + scan->rs_inited = true; + } + else + { + /* continue from previously returned page/tuple */ + blockno = scan->rs_cblock; /* current page */ + } + + /* + * When pagemode is disabled, the scan will do visibility checks for each + * tuple it finds so the buffer needs to be locked. + */ + if (!pagemode) + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); + + page = (Page) BufferGetPage(scan->rs_cbuf); + page_all_visible = PageIsAllVisible(page); + maxoffset = PageGetMaxOffsetNumber(page); + + for (;;) + { + CHECK_FOR_INTERRUPTS(); + + tupoffset = DatumGetUInt16(FunctionCall3(&desc->tsmnexttuple, + PointerGetDatum(desc), + UInt32GetDatum(blockno), + UInt16GetDatum(maxoffset))); + + if (OffsetNumberIsValid(tupoffset)) + { + bool visible; + bool found; + + /* Skip invalid tuple pointers. */ + itemid = PageGetItemId(page, tupoffset); + if (!ItemIdIsNormal(itemid)) + continue; + + tuple->t_data = (HeapTupleHeader) PageGetItem((Page) page, itemid); + tuple->t_len = ItemIdGetLength(itemid); + ItemPointerSet(&(tuple->t_self), blockno, tupoffset); + + if (page_all_visible) + visible = true; + else + visible = SampleTupleVisible(tuple, tupoffset, scan); + + /* + * Let the sampling method examine the actual tuple and decide if we + * should return it. + * + * Note that we let it examine even invisible tuples for + * statistical purposes, but not return them since user should + * never see invisible tuples. + */ + if (OidIsValid(desc->tsmexaminetuple.fn_oid)) + { + found = DatumGetBool(FunctionCall4(&desc->tsmexaminetuple, + PointerGetDatum(desc), + UInt32GetDatum(blockno), + PointerGetDatum(tuple), + BoolGetDatum(visible))); + /* Should not happen if sampling method is well written. */ + if (found && !visible) + elog(ERROR, "Sampling method wanted to return invisible tuple"); + } + else + found = visible; + + /* Found visible tuple, return it. */ + if (found) + { + if (!pagemode) + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + break; + } + else + { + /* Try next tuple from same page. */ + continue; + } + } + + + if (!pagemode) + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + + blockno = DatumGetInt32(FunctionCall1(&desc->tsmnextblock, + PointerGetDatum(desc))); + + /* + * Report our new scan position for synchronization purposes. We + * don't do that when moving backwards, however. That would just + * mess up any other forward-moving scanners. + * + * Note: we do this before checking for end of scan so that the + * final state of the position hint is back at the start of the + * rel. That's not strictly necessary, but otherwise when you run + * the same query multiple times the starting position would shift + * a little bit backwards on every invocation, which is confusing. + * We don't guarantee any specific ordering in general, though. + */ + if (scan->rs_syncscan) + ss_report_location(scan->rs_rd, BlockNumberIsValid(blockno) ? + blockno : scan->rs_startblock); + + /* + * Reached end of scan. + */ + if (!BlockNumberIsValid(blockno)) + { + if (BufferIsValid(scan->rs_cbuf)) + ReleaseBuffer(scan->rs_cbuf); + scan->rs_cbuf = InvalidBuffer; + scan->rs_cblock = InvalidBlockNumber; + tuple->t_data = NULL; + scan->rs_inited = false; + return NULL; + } + + heapgetpage(scan, blockno); + + if (!pagemode) + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); + + page = (Page) BufferGetPage(scan->rs_cbuf); + page_all_visible = PageIsAllVisible(page); + maxoffset = PageGetMaxOffsetNumber(page); + } + + pgstat_count_heap_getnext(scan->rs_rd); + + return &(scan->rs_ctup); +} + +/* + * Reset the sampling to starting state + */ +void +tablesample_reset(TableSampleDesc *desc) +{ + (void) FunctionCall1(&desc->tsmreset, PointerGetDatum(desc)); +} + +/* + * Signal the sampling method that the scan has finished. + */ +void +tablesample_end(TableSampleDesc *desc) +{ + (void) FunctionCall1(&desc->tsmend, PointerGetDatum(desc)); +} + +/* + * Check visibility of the tuple. + */ +static bool +SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan) +{ + /* + * If this scan is reading whole pages at a time, there is already + * visibility info present in rs_vistuples so we can just search it + * for the tupoffset. + */ + if (scan->rs_pageatatime) + { + int start = 0, + end = scan->rs_ntuples - 1; + + /* + * Do the binary search over rs_vistuples, it's already sorted by + * OffsetNumber so we don't need to do any sorting ourselves here. + * + * We could use bsearch() here but it's slower for integers because + * of the function call overhead and because it needs boiler plate code + * it would not save us anything code-wise anyway. + */ + while (start <= end) + { + int mid = start + (end - start) / 2; + OffsetNumber curoffset = scan->rs_vistuples[mid]; + + if (curoffset == tupoffset) + return true; + else if (curoffset > tupoffset) + end = mid - 1; + else + start = mid + 1; + } + + return false; + } + else + { + /* No pagemode, we have to check the tuple itself. */ + Snapshot snapshot = scan->rs_snapshot; + Buffer buffer = scan->rs_cbuf; + + bool visible = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer); + + CheckForSerializableConflictOut(visible, scan->rs_rd, tuple, buffer, + snapshot); + + return visible; + } +} |