aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndres Freund <andres@anarazel.de>2019-05-17 18:52:01 -0700
committerAndres Freund <andres@anarazel.de>2019-05-17 18:56:55 -0700
commit147e3722f7e531f15ba389a4d518efe8cd0bd736 (patch)
treee48a71b19f0e9412b93ef15b8e40edd65f7a5415 /src
parent7f44ede5941499c4cee13b812dd93335f4005095 (diff)
downloadpostgresql-147e3722f7e531f15ba389a4d518efe8cd0bd736.tar.gz
postgresql-147e3722f7e531f15ba389a4d518efe8cd0bd736.zip
tableam: Avoid relying on relation size to determine validity of tids.
Instead add a tableam callback to do so. To avoid adding per validation overhead, pass a scan to tuple_tid_valid. In heap's case we'd otherwise incurred a RelationGetNumberOfBlocks() call for each tid - which'd have added noticable overhead to nodeTidscan.c. Author: Andres Freund Reviewed-By: Ashwin Agrawal Discussion: https://postgr.es/m/20190515185447.gno2jtqxyktylyvs@alap3.anarazel.de
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/heap/heapam.c26
-rw-r--r--src/backend/access/heap/heapam_handler.c10
-rw-r--r--src/backend/access/table/tableam.c27
-rw-r--r--src/backend/executor/nodeTidscan.c73
-rw-r--r--src/backend/utils/adt/tid.c10
-rw-r--r--src/include/access/heapam.h3
-rw-r--r--src/include/access/tableam.h37
7 files changed, 129 insertions, 57 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index ec9853603fd..d8d4f3b1f5a 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -1654,8 +1654,8 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
/*
* heap_get_latest_tid - get the latest tid of a specified tuple
*
- * Actually, this gets the latest version that is visible according to
- * the passed snapshot. You can pass SnapshotDirty to get the very latest,
+ * Actually, this gets the latest version that is visible according to the
+ * scan's snapshot. Create a scan using SnapshotDirty to get the very latest,
* possibly uncommitted version.
*
* *tid is both an input and an output parameter: it is updated to
@@ -1663,28 +1663,20 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
* if no version of the row passes the snapshot test.
*/
void
-heap_get_latest_tid(Relation relation,
- Snapshot snapshot,
+heap_get_latest_tid(TableScanDesc sscan,
ItemPointer tid)
{
- BlockNumber blk;
+ Relation relation = sscan->rs_rd;
+ Snapshot snapshot = sscan->rs_snapshot;
ItemPointerData ctid;
TransactionId priorXmax;
- /* this is to avoid Assert failures on bad input */
- if (!ItemPointerIsValid(tid))
- return;
-
/*
- * Since this can be called with user-supplied TID, don't trust the input
- * too much. (RelationGetNumberOfBlocks is an expensive check, so we
- * don't check t_ctid links again this way. Note that it would not do to
- * call it just once and save the result, either.)
+ * table_get_latest_tid verified that the passed in tid is valid. Assume
+ * that t_ctid links are valid however - there shouldn't be invalid ones
+ * in the table.
*/
- blk = ItemPointerGetBlockNumber(tid);
- if (blk >= RelationGetNumberOfBlocks(relation))
- elog(ERROR, "block number %u is out of range for relation \"%s\"",
- blk, RelationGetRelationName(relation));
+ Assert(ItemPointerIsValid(tid));
/*
* Loop to chase down t_ctid links. At top of loop, ctid is the tuple we
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 9aa468295ae..35553c7c92d 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -205,6 +205,15 @@ heapam_fetch_row_version(Relation relation,
}
static bool
+heapam_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
+{
+ HeapScanDesc hscan = (HeapScanDesc) scan;
+
+ return ItemPointerIsValid(tid) &&
+ ItemPointerGetBlockNumber(tid) < hscan->rs_nblocks;
+}
+
+static bool
heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
Snapshot snapshot)
{
@@ -2568,6 +2577,7 @@ static const TableAmRoutine heapam_methods = {
.tuple_fetch_row_version = heapam_fetch_row_version,
.tuple_get_latest_tid = heap_get_latest_tid,
+ .tuple_tid_valid = heapam_tuple_tid_valid,
.tuple_satisfies_snapshot = heapam_tuple_satisfies_snapshot,
.compute_xid_horizon_for_tuples = heap_compute_xid_horizon_for_tuples,
diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c
index baba1ea699b..6e46befdfd9 100644
--- a/src/backend/access/table/tableam.c
+++ b/src/backend/access/table/tableam.c
@@ -213,6 +213,33 @@ table_index_fetch_tuple_check(Relation rel,
}
+/* ------------------------------------------------------------------------
+ * Functions for non-modifying operations on individual tuples
+ * ------------------------------------------------------------------------
+ */
+
+void
+table_get_latest_tid(TableScanDesc scan, ItemPointer tid)
+{
+ Relation rel = scan->rs_rd;
+ const TableAmRoutine *tableam = rel->rd_tableam;
+
+ /*
+ * Since this can be called with user-supplied TID, don't trust the input
+ * too much.
+ */
+ if (!tableam->tuple_tid_valid(scan, tid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("tid (%u, %u) is not valid for relation for relation \"%s\"",
+ ItemPointerGetBlockNumberNoCheck(tid),
+ ItemPointerGetOffsetNumberNoCheck(tid),
+ RelationGetRelationName(rel))));
+
+ return tableam->tuple_get_latest_tid(scan, tid);
+}
+
+
/* ----------------------------------------------------------------------------
* Functions to make modifications a bit simpler.
* ----------------------------------------------------------------------------
diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c
index 156be56a57d..93335864a1e 100644
--- a/src/backend/executor/nodeTidscan.c
+++ b/src/backend/executor/nodeTidscan.c
@@ -129,19 +129,23 @@ static void
TidListEval(TidScanState *tidstate)
{
ExprContext *econtext = tidstate->ss.ps.ps_ExprContext;
- BlockNumber nblocks;
+ TableScanDesc scan;
ItemPointerData *tidList;
int numAllocTids;
int numTids;
ListCell *l;
/*
- * We silently discard any TIDs that are out of range at the time of scan
- * start. (Since we hold at least AccessShareLock on the table, it won't
- * be possible for someone to truncate away the blocks we intend to
- * visit.)
+ * Start scan on-demand - initializing a scan isn't free (e.g. heap stats
+ * the size of the table), so it makes sense to delay that until needed -
+ * the node might never get executed.
*/
- nblocks = RelationGetNumberOfBlocks(tidstate->ss.ss_currentRelation);
+ if (tidstate->ss.ss_currentScanDesc == NULL)
+ tidstate->ss.ss_currentScanDesc =
+ table_beginscan(tidstate->ss.ss_currentRelation,
+ tidstate->ss.ps.state->es_snapshot,
+ 0, NULL);
+ scan = tidstate->ss.ss_currentScanDesc;
/*
* We initialize the array with enough slots for the case that all quals
@@ -165,19 +169,27 @@ TidListEval(TidScanState *tidstate)
DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate,
econtext,
&isNull));
- if (!isNull &&
- ItemPointerIsValid(itemptr) &&
- ItemPointerGetBlockNumber(itemptr) < nblocks)
+ if (isNull)
+ continue;
+
+ /*
+ * We silently discard any TIDs that the AM considers invalid
+ * (E.g. for heap, they could be out of range at the time of scan
+ * start. Since we hold at least AccessShareLock on the table, it
+ * won't be possible for someone to truncate away the blocks we
+ * intend to visit.).
+ */
+ if (!table_tuple_tid_valid(scan, itemptr))
+ continue;
+
+ if (numTids >= numAllocTids)
{
- if (numTids >= numAllocTids)
- {
- numAllocTids *= 2;
- tidList = (ItemPointerData *)
- repalloc(tidList,
- numAllocTids * sizeof(ItemPointerData));
- }
- tidList[numTids++] = *itemptr;
+ numAllocTids *= 2;
+ tidList = (ItemPointerData *)
+ repalloc(tidList,
+ numAllocTids * sizeof(ItemPointerData));
}
+ tidList[numTids++] = *itemptr;
}
else if (tidexpr->exprstate && tidexpr->isarray)
{
@@ -206,13 +218,15 @@ TidListEval(TidScanState *tidstate)
}
for (i = 0; i < ndatums; i++)
{
- if (!ipnulls[i])
- {
- itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]);
- if (ItemPointerIsValid(itemptr) &&
- ItemPointerGetBlockNumber(itemptr) < nblocks)
- tidList[numTids++] = *itemptr;
- }
+ if (ipnulls[i])
+ continue;
+
+ itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]);
+
+ if (!table_tuple_tid_valid(scan, itemptr))
+ continue;
+
+ tidList[numTids++] = *itemptr;
}
pfree(ipdatums);
pfree(ipnulls);
@@ -306,6 +320,7 @@ TidNext(TidScanState *node)
EState *estate;
ScanDirection direction;
Snapshot snapshot;
+ TableScanDesc scan;
Relation heapRelation;
TupleTableSlot *slot;
ItemPointerData *tidList;
@@ -327,6 +342,7 @@ TidNext(TidScanState *node)
if (node->tss_TidList == NULL)
TidListEval(node);
+ scan = node->ss.ss_currentScanDesc;
tidList = node->tss_TidList;
numTids = node->tss_NumTids;
@@ -365,7 +381,7 @@ TidNext(TidScanState *node)
* current according to our snapshot.
*/
if (node->tss_isCurrentOf)
- table_get_latest_tid(heapRelation, snapshot, &tid);
+ table_get_latest_tid(scan, &tid);
if (table_fetch_row_version(heapRelation, &tid, snapshot, slot))
return slot;
@@ -442,6 +458,10 @@ ExecReScanTidScan(TidScanState *node)
node->tss_NumTids = 0;
node->tss_TidPtr = -1;
+ /* not really necessary, but seems good form */
+ if (node->ss.ss_currentScanDesc)
+ table_rescan(node->ss.ss_currentScanDesc, NULL);
+
ExecScanReScan(&node->ss);
}
@@ -455,6 +475,9 @@ ExecReScanTidScan(TidScanState *node)
void
ExecEndTidScan(TidScanState *node)
{
+ if (node->ss.ss_currentScanDesc)
+ table_endscan(node->ss.ss_currentScanDesc);
+
/*
* Free the exprcontext
*/
diff --git a/src/backend/utils/adt/tid.c b/src/backend/utils/adt/tid.c
index 6ab26d8ea8b..1aab30b6aab 100644
--- a/src/backend/utils/adt/tid.c
+++ b/src/backend/utils/adt/tid.c
@@ -358,6 +358,7 @@ currtid_byreloid(PG_FUNCTION_ARGS)
Relation rel;
AclResult aclresult;
Snapshot snapshot;
+ TableScanDesc scan;
result = (ItemPointer) palloc(sizeof(ItemPointerData));
if (!reloid)
@@ -380,7 +381,9 @@ currtid_byreloid(PG_FUNCTION_ARGS)
ItemPointerCopy(tid, result);
snapshot = RegisterSnapshot(GetLatestSnapshot());
- table_get_latest_tid(rel, snapshot, result);
+ scan = table_beginscan(rel, snapshot, 0, NULL);
+ table_get_latest_tid(scan, result);
+ table_endscan(scan);
UnregisterSnapshot(snapshot);
table_close(rel, AccessShareLock);
@@ -398,6 +401,7 @@ currtid_byrelname(PG_FUNCTION_ARGS)
Relation rel;
AclResult aclresult;
Snapshot snapshot;
+ TableScanDesc scan;
relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
rel = table_openrv(relrv, AccessShareLock);
@@ -415,7 +419,9 @@ currtid_byrelname(PG_FUNCTION_ARGS)
ItemPointerCopy(tid, result);
snapshot = RegisterSnapshot(GetLatestSnapshot());
- table_get_latest_tid(rel, snapshot, result);
+ scan = table_beginscan(rel, snapshot, 0, NULL);
+ table_get_latest_tid(scan, result);
+ table_endscan(scan);
UnregisterSnapshot(snapshot);
table_close(rel, AccessShareLock);
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 77e5e603b03..6b8c7020c8c 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -134,8 +134,7 @@ extern bool heap_hot_search_buffer(ItemPointer tid, Relation relation,
Buffer buffer, Snapshot snapshot, HeapTuple heapTuple,
bool *all_dead, bool first_call);
-extern void heap_get_latest_tid(Relation relation, Snapshot snapshot,
- ItemPointer tid);
+extern void heap_get_latest_tid(TableScanDesc scan, ItemPointer tid);
extern void setLastTid(const ItemPointer tid);
extern BulkInsertState GetBulkInsertState(void);
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index c5d64602036..8fbeb020337 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -309,11 +309,16 @@ typedef struct TableAmRoutine
TupleTableSlot *slot);
/*
+ * Is tid valid for a scan of this relation.
+ */
+ bool (*tuple_tid_valid) (TableScanDesc scan,
+ ItemPointer tid);
+
+ /*
* Return the latest version of the tuple at `tid`, by updating `tid` to
* point at the newest version.
*/
- void (*tuple_get_latest_tid) (Relation rel,
- Snapshot snapshot,
+ void (*tuple_get_latest_tid) (TableScanDesc scan,
ItemPointer tid);
/*
@@ -548,10 +553,10 @@ typedef struct TableAmRoutine
/*
* See table_relation_size().
*
- * Note that currently a few callers use the MAIN_FORKNUM size to vet the
- * validity of tids (e.g. nodeTidscans.c), and others use it to figure out
- * the range of potentially interesting blocks (brin, analyze). The
- * abstraction around this will need to be improved in the near future.
+ * Note that currently a few callers use the MAIN_FORKNUM size to figure
+ * out the range of potentially interesting blocks (brin, analyze). It's
+ * probable that we'll need to revise the interface for those at some
+ * point.
*/
uint64 (*relation_size) (Relation rel, ForkNumber forkNumber);
@@ -986,16 +991,26 @@ table_fetch_row_version(Relation rel,
}
/*
- * Return the latest version of the tuple at `tid`, by updating `tid` to
- * point at the newest version.
+ * Verify that `tid` is a potentially valid tuple identifier. That doesn't
+ * mean that the pointed to row needs to exist or be visible, but that
+ * attempting to fetch the row (e.g. with table_get_latest_tid() or
+ * table_fetch_row_version()) should not error out if called with that tid.
+ *
+ * `scan` needs to have been started via table_beginscan().
*/
-static inline void
-table_get_latest_tid(Relation rel, Snapshot snapshot, ItemPointer tid)
+static inline bool
+table_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
{
- rel->rd_tableam->tuple_get_latest_tid(rel, snapshot, tid);
+ return scan->rs_rd->rd_tableam->tuple_tid_valid(scan, tid);
}
/*
+ * Return the latest version of the tuple at `tid`, by updating `tid` to
+ * point at the newest version.
+ */
+extern void table_get_latest_tid(TableScanDesc scan, ItemPointer tid);
+
+/*
* Return true iff tuple in slot satisfies the snapshot.
*
* This assumes the slot's tuple is valid, and of the appropriate type for the