aboutsummaryrefslogtreecommitdiff
path: root/src/include/access/tableam.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/access/tableam.h')
-rw-r--r--src/include/access/tableam.h468
1 files changed, 467 insertions, 1 deletions
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index ccdc6de3ae5..f2913b8cff9 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -14,31 +14,497 @@
#ifndef TABLEAM_H
#define TABLEAM_H
+#include "access/relscan.h"
+#include "access/sdir.h"
#include "utils/guc.h"
+#include "utils/rel.h"
+#include "utils/snapshot.h"
#define DEFAULT_TABLE_ACCESS_METHOD "heap"
extern char *default_table_access_method;
-
+extern bool synchronize_seqscans;
/*
* API struct for a table AM. Note this must be allocated in a
* server-lifetime manner, typically as a static const struct, which then gets
* returned by FormData_pg_am.amhandler.
+ *
+ * I most cases it's not appropriate to directly call the callbacks directly,
+ * instead use the table_* wrapper functions.
+ *
+ * GetTableAmRoutine() asserts that required callbacks are filled in, remember
+ * to update when adding a callback.
*/
typedef struct TableAmRoutine
{
/* this must be set to T_TableAmRoutine */
NodeTag type;
+
+
+ /* ------------------------------------------------------------------------
+ * Slot related callbacks.
+ * ------------------------------------------------------------------------
+ */
+
+ /*
+ * Return slot implementation suitable for storing a tuple of this AM.
+ */
+ const TupleTableSlotOps *(*slot_callbacks) (Relation rel);
+
+
+ /* ------------------------------------------------------------------------
+ * Table scan callbacks.
+ * ------------------------------------------------------------------------
+ */
+
+ /*
+ * Start a scan of `rel`. The callback has to return a TableScanDesc,
+ * which will typically be embedded in a larger, AM specific, struct.
+ *
+ * If nkeys != 0, the results need to be filtered by those scan keys.
+ *
+ * pscan, if not NULL, will have already been initialized with
+ * parallelscan_initialize(), and has to be for the same relation. Will
+ * only be set coming from table_beginscan_parallel().
+ *
+ * allow_{strat, sync, pagemode} specify whether a scan strategy,
+ * synchronized scans, or page mode may be used (although not every AM
+ * will support those).
+ *
+ * is_{bitmapscan, samplescan} specify whether the scan is inteded to
+ * support those types of scans.
+ *
+ * if temp_snap is true, the snapshot will need to be deallocated at
+ * scan_end.
+ */
+ TableScanDesc (*scan_begin) (Relation rel,
+ Snapshot snapshot,
+ int nkeys, struct ScanKeyData *key,
+ ParallelTableScanDesc pscan,
+ bool allow_strat,
+ bool allow_sync,
+ bool allow_pagemode,
+ bool is_bitmapscan,
+ bool is_samplescan,
+ bool temp_snap);
+
+ /*
+ * Release resources and deallocate scan. If TableScanDesc.temp_snap,
+ * TableScanDesc.rs_snapshot needs to be unregistered.
+ */
+ void (*scan_end) (TableScanDesc scan);
+
+ /*
+ * Restart relation scan. If set_params is set to true, allow{strat,
+ * sync, pagemode} (see scan_begin) changes should be taken into account.
+ */
+ void (*scan_rescan) (TableScanDesc scan, struct ScanKeyData *key, bool set_params,
+ bool allow_strat, bool allow_sync, bool allow_pagemode);
+
+ /*
+ * Return next tuple from `scan`, store in slot.
+ */
+ bool (*scan_getnextslot) (TableScanDesc scan,
+ ScanDirection direction, TupleTableSlot *slot);
+
+
+ /* ------------------------------------------------------------------------
+ * Parallel table scan related functions.
+ * ------------------------------------------------------------------------
+ */
+
+ /*
+ * Estimate the size of shared memory needed for a parallel scan of this
+ * relation. The snapshot does not need to be accounted for.
+ */
+ Size (*parallelscan_estimate) (Relation rel);
+
+ /*
+ * Initialize ParallelTableScanDesc for a parallel scan of this relation.
+ * pscan will be sized according to parallelscan_estimate() for the same
+ * relation.
+ */
+ Size (*parallelscan_initialize) (Relation rel, ParallelTableScanDesc pscan);
+
+ /*
+ * Reinitilize `pscan` for a new scan. `rel` will be the same relation as
+ * when `pscan` was initialized by parallelscan_initialize.
+ */
+ void (*parallelscan_reinitialize) (Relation rel, ParallelTableScanDesc pscan);
+
+
+ /* ------------------------------------------------------------------------
+ * Index Scan Callbacks
+ * ------------------------------------------------------------------------
+ */
+
+ /*
+ * Prepare to fetch tuples from the relation, as needed when fetching
+ * tuples for an index scan. The callback has to return a
+ * IndexFetchTableData, which the AM will typically embed in a larger
+ * structure with additional information.
+ *
+ * Tuples for an index scan can then be fetched via index_fetch_tuple.
+ */
+ struct IndexFetchTableData *(*index_fetch_begin) (Relation rel);
+
+ /*
+ * Reset index fetch. Typically this will release cross index fetch
+ * resources held in IndexFetchTableData.
+ */
+ void (*index_fetch_reset) (struct IndexFetchTableData *data);
+
+ /*
+ * Release resources and deallocate index fetch.
+ */
+ void (*index_fetch_end) (struct IndexFetchTableData *data);
+
+ /*
+ * Fetch tuple at `tid` into `slot`, after doing a visibility test
+ * according to `snapshot`. If a tuple was found and passed the visibility
+ * test, return true, false otherwise.
+ *
+ * Note that AMs that do not necessarily update indexes when indexed
+ * columns do not change, need to return the current/correct version of a
+ * tuple as appropriate, even if the tid points to an older version of the
+ * tuple.
+ *
+ * *call_again is false on the first call to index_fetch_tuple for a tid.
+ * If there potentially is another tuple matching the tid, *call_again
+ * needs be set to true by index_fetch_tuple, signalling to the caller
+ * that index_fetch_tuple should be called again for the same tid.
+ *
+ * *all_dead should be set to true by index_fetch_tuple iff it is
+ * guaranteed that no backend needs to see that tuple. Index AMs can use
+ * that do avoid returning that tid in future searches.
+ */
+ bool (*index_fetch_tuple) (struct IndexFetchTableData *scan,
+ ItemPointer tid,
+ Snapshot snapshot,
+ TupleTableSlot *slot,
+ bool *call_again, bool *all_dead);
+
+ /* ------------------------------------------------------------------------
+ * Callbacks for non-modifying operations on individual tuples
+ * ------------------------------------------------------------------------
+ */
+
+ /*
+ * Does the tuple in `slot` satisfy `snapshot`? The slot needs to be of
+ * the appropriate type for the AM.
+ */
+ bool (*tuple_satisfies_snapshot) (Relation rel,
+ TupleTableSlot *slot,
+ Snapshot snapshot);
+
} TableAmRoutine;
+/* ----------------------------------------------------------------------------
+ * Slot functions.
+ * ----------------------------------------------------------------------------
+ */
+
+/*
+ * Returns slot callbacks suitable for holding tuples of the appropriate type
+ * for the relation. Works for tables, views, foreign tables and partitioned
+ * tables.
+ */
+extern const TupleTableSlotOps *table_slot_callbacks(Relation rel);
+
+/*
+ * Returns slot using the callbacks returned by table_slot_callbacks(), and
+ * registers it on *reglist.
+ */
+extern TupleTableSlot *table_slot_create(Relation rel, List **reglist);
+
+
+/* ----------------------------------------------------------------------------
+ * Table scan functions.
+ * ----------------------------------------------------------------------------
+ */
+
+/*
+ * Start a scan of `rel`. Returned tuples pass a visibility test of
+ * `snapshot`, and if nkeys != 0, the results are filtered by those scan keys.
+ */
+static inline TableScanDesc
+table_beginscan(Relation rel, Snapshot snapshot,
+ int nkeys, struct ScanKeyData *key)
+{
+ return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL,
+ true, true, true, false, false, false);
+}
+
+/*
+ * Like table_beginscan(), but for scanning catalog. It'll automatically use a
+ * snapshot appropriate for scanning catalog relations.
+ */
+extern TableScanDesc table_beginscan_catalog(Relation rel, int nkeys,
+ struct ScanKeyData *key);
+
+/*
+ * Like table_beginscan(), but table_beginscan_strat() offers an extended API
+ * that lets the caller control whether a nondefault buffer access strategy
+ * can be used, and whether syncscan can be chosen (possibly resulting in the
+ * scan not starting from block zero). Both of these default to true with
+ * plain table_beginscan.
+ */
+static inline TableScanDesc
+table_beginscan_strat(Relation rel, Snapshot snapshot,
+ int nkeys, struct ScanKeyData *key,
+ bool allow_strat, bool allow_sync)
+{
+ return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL,
+ allow_strat, allow_sync, true,
+ false, false, false);
+}
+
+
+/*
+ * table_beginscan_bm is an alternative entry point for setting up a
+ * TableScanDesc for a bitmap heap scan. Although that scan technology is
+ * really quite unlike a standard seqscan, there is just enough commonality to
+ * make it worth using the same data structure.
+ */
+static inline TableScanDesc
+table_beginscan_bm(Relation rel, Snapshot snapshot,
+ int nkeys, struct ScanKeyData *key)
+{
+ return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL,
+ false, false, true, true, false, false);
+}
+
+/*
+ * table_beginscan_sampling is an alternative entry point for setting up a
+ * TableScanDesc for a TABLESAMPLE scan. As with bitmap scans, it's worth
+ * using the same data structure although the behavior is rather different.
+ * In addition to the options offered by table_beginscan_strat, this call
+ * also allows control of whether page-mode visibility checking is used.
+ */
+static inline TableScanDesc
+table_beginscan_sampling(Relation rel, Snapshot snapshot,
+ int nkeys, struct ScanKeyData *key,
+ bool allow_strat, bool allow_sync, bool allow_pagemode)
+{
+ return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL,
+ allow_strat, allow_sync, allow_pagemode,
+ false, true, false);
+}
+
+/*
+ * table_beginscan_analyze is an alternative entry point for setting up a
+ * TableScanDesc for an ANALYZE scan. As with bitmap scans, it's worth using
+ * the same data structure although the behavior is rather different.
+ */
+static inline TableScanDesc
+table_beginscan_analyze(Relation rel)
+{
+ return rel->rd_tableam->scan_begin(rel, NULL, 0, NULL, NULL,
+ true, false, true,
+ false, true, false);
+}
+
+/*
+ * End relation scan.
+ */
+static inline void
+table_endscan(TableScanDesc scan)
+{
+ scan->rs_rd->rd_tableam->scan_end(scan);
+}
+
+
+/*
+ * Restart a relation scan.
+ */
+static inline void
+table_rescan(TableScanDesc scan,
+ struct ScanKeyData *key)
+{
+ scan->rs_rd->rd_tableam->scan_rescan(scan, key, false, false, false, false);
+}
+
+/*
+ * Restart a relation scan after changing params.
+ *
+ * This call allows changing the buffer strategy, syncscan, and pagemode
+ * options before starting a fresh scan. Note that although the actual use of
+ * syncscan might change (effectively, enabling or disabling reporting), the
+ * previously selected startblock will be kept.
+ */
+static inline void
+table_rescan_set_params(TableScanDesc scan, struct ScanKeyData *key,
+ bool allow_strat, bool allow_sync, bool allow_pagemode)
+{
+ scan->rs_rd->rd_tableam->scan_rescan(scan, key, true,
+ allow_strat, allow_sync,
+ allow_pagemode);
+}
+
+/*
+ * Update snapshot used by the scan.
+ */
+extern void table_scan_update_snapshot(TableScanDesc scan, Snapshot snapshot);
+
+
+/*
+ * Return next tuple from `scan`, store in slot.
+ */
+static inline bool
+table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
+{
+ slot->tts_tableOid = RelationGetRelid(sscan->rs_rd);
+ return sscan->rs_rd->rd_tableam->scan_getnextslot(sscan, direction, slot);
+}
+
+
+/* ----------------------------------------------------------------------------
+ * Parallel table scan related functions.
+ * ----------------------------------------------------------------------------
+ */
+
+/*
+ * Estimate the size of shared memory needed for a parallel scan of this
+ * relation.
+ */
+extern Size table_parallelscan_estimate(Relation rel, Snapshot snapshot);
+
+/*
+ * Initialize ParallelTableScanDesc for a parallel scan of this
+ * relation. `pscan` needs to be sized according to parallelscan_estimate()
+ * for the same relation. Call this just once in the leader process; then,
+ * individual workers attach via table_beginscan_parallel.
+ */
+extern void table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan, Snapshot snapshot);
+
+/*
+ * Begin a parallel scan. `pscan` needs to have been initialized with
+ * table_parallelscan_initialize(), for the same relation. The initialization
+ * does not need to have happened in this backend.
+ *
+ * Caller must hold a suitable lock on the correct relation.
+ */
+extern TableScanDesc table_beginscan_parallel(Relation rel, ParallelTableScanDesc pscan);
+
+/*
+ * Restart a parallel scan. Call this in the leader process. Caller is
+ * responsible for making sure that all workers have finished the scan
+ * beforehand.
+ */
+static inline void
+table_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
+{
+ return rel->rd_tableam->parallelscan_reinitialize(rel, pscan);
+}
+
+
+/* ----------------------------------------------------------------------------
+ * Index scan related functions.
+ * ----------------------------------------------------------------------------
+ */
+
+/*
+ * Prepare to fetch tuples from the relation, as needed when fetching tuples
+ * for an index scan.
+ *
+ * Tuples for an index scan can then be fetched via table_index_fetch_tuple().
+ */
+static inline IndexFetchTableData *
+table_index_fetch_begin(Relation rel)
+{
+ return rel->rd_tableam->index_fetch_begin(rel);
+}
+
+/*
+ * Reset index fetch. Typically this will release cross index fetch resources
+ * held in IndexFetchTableData.
+ */
+static inline void
+table_index_fetch_reset(struct IndexFetchTableData *scan)
+{
+ scan->rel->rd_tableam->index_fetch_reset(scan);
+}
+
+/*
+ * Release resources and deallocate index fetch.
+ */
+static inline void
+table_index_fetch_end(struct IndexFetchTableData *scan)
+{
+ scan->rel->rd_tableam->index_fetch_end(scan);
+}
+
+/*
+ * Fetches tuple at `tid` into `slot`, after doing a visibility test according
+ * to `snapshot`. If a tuple was found and passed the visibility test, returns
+ * true, false otherwise.
+ *
+ * *call_again needs to be false on the first call to table_index_fetch_tuple() for
+ * a tid. If there potentially is another tuple matching the tid, *call_again
+ * will be set to true, signalling that table_index_fetch_tuple() should be called
+ * again for the same tid.
+ *
+ * *all_dead will be set to true by table_index_fetch_tuple() iff it is guaranteed
+ * that no backend needs to see that tuple. Index AMs can use that do avoid
+ * returning that tid in future searches.
+ */
+static inline bool
+table_index_fetch_tuple(struct IndexFetchTableData *scan,
+ ItemPointer tid,
+ Snapshot snapshot,
+ TupleTableSlot *slot,
+ bool *call_again, bool *all_dead)
+{
+
+ return scan->rel->rd_tableam->index_fetch_tuple(scan, tid, snapshot,
+ slot, call_again,
+ all_dead);
+}
+
+
+/* ------------------------------------------------------------------------
+ * Functions for non-modifying operations on individual tuples
+ * ------------------------------------------------------------------------
+ */
/*
+ * Return true iff tuple in slot satisfies the snapshot.
+ *
+ * This assumes the slot's tuple is valid, and of the appropriate type for the
+ * AM.
+ *
+ * Some AMs might modify the data underlying the tuple as a side-effect. If so
+ * they ought to mark the relevant buffer dirty.
+ */
+static inline bool
+table_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
+{
+ return rel->rd_tableam->tuple_satisfies_snapshot(rel, slot, snapshot);
+}
+
+
+/* ----------------------------------------------------------------------------
+ * Helper functions to implement parallel scans for block oriented AMs.
+ * ----------------------------------------------------------------------------
+ */
+
+extern Size table_block_parallelscan_estimate(Relation rel);
+extern Size table_block_parallelscan_initialize(Relation rel,
+ ParallelTableScanDesc pscan);
+extern void table_block_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan);
+extern BlockNumber table_block_parallelscan_nextpage(Relation rel, ParallelBlockTableScanDesc pbscan);
+extern void table_block_parallelscan_startblock_init(Relation rel, ParallelBlockTableScanDesc pbscan);
+
+
+/* ----------------------------------------------------------------------------
* Functions in tableamapi.c
+ * ----------------------------------------------------------------------------
*/
+
extern const TableAmRoutine *GetTableAmRoutine(Oid amhandler);
extern const TableAmRoutine *GetTableAmRoutineByAmId(Oid amoid);
extern const TableAmRoutine *GetHeapamTableAmRoutine(void);