aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/nbtree/nbtree.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/nbtree/nbtree.c')
-rw-r--r--src/backend/access/nbtree/nbtree.c196
1 files changed, 179 insertions, 17 deletions
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index a99667eb2bd..77781cb9002 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -31,6 +31,7 @@
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/read_stream.h"
+#include "utils/datum.h"
#include "utils/fmgrprotos.h"
#include "utils/index_selfuncs.h"
#include "utils/memutils.h"
@@ -76,14 +77,26 @@ typedef struct BTParallelScanDescData
/*
* btps_arrElems is used when scans need to schedule another primitive
- * index scan. Holds BTArrayKeyInfo.cur_elem offsets for scan keys.
+ * index scan with one or more SAOP arrays. Holds BTArrayKeyInfo.cur_elem
+ * offsets for each = scan key associated with a ScalarArrayOp array.
*/
int btps_arrElems[FLEXIBLE_ARRAY_MEMBER];
+
+ /*
+ * Additional space (at the end of the struct) is used when scans need to
+ * schedule another primitive index scan with one or more skip arrays.
+ * Holds a flattened datum representation for each = scan key associated
+ * with a skip array.
+ */
} BTParallelScanDescData;
typedef struct BTParallelScanDescData *BTParallelScanDesc;
+static void _bt_parallel_serialize_arrays(Relation rel, BTParallelScanDesc btscan,
+ BTScanOpaque so);
+static void _bt_parallel_restore_arrays(Relation rel, BTParallelScanDesc btscan,
+ BTScanOpaque so);
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
IndexBulkDeleteCallback callback, void *callback_state,
BTCycleId cycleid);
@@ -541,10 +554,167 @@ btrestrpos(IndexScanDesc scan)
* btestimateparallelscan -- estimate storage for BTParallelScanDescData
*/
Size
-btestimateparallelscan(int nkeys, int norderbys)
+btestimateparallelscan(Relation rel, int nkeys, int norderbys)
+{
+ int16 nkeyatts = IndexRelationGetNumberOfKeyAttributes(rel);
+ Size estnbtreeshared,
+ genericattrspace;
+
+ /*
+ * Pessimistically assume that every input scan key will be output with
+ * its own SAOP array
+ */
+ estnbtreeshared = offsetof(BTParallelScanDescData, btps_arrElems) +
+ sizeof(int) * nkeys;
+
+ /* Single column indexes cannot possibly use a skip array */
+ if (nkeyatts == 1)
+ return estnbtreeshared;
+
+ /*
+ * Pessimistically assume that all attributes prior to the least
+ * significant attribute require a skip array (and an associated key)
+ */
+ genericattrspace = datumEstimateSpace((Datum) 0, false, true,
+ sizeof(Datum));
+ for (int attnum = 1; attnum < nkeyatts; attnum++)
+ {
+ CompactAttribute *attr;
+
+ /*
+ * We make the conservative assumption that every index column will
+ * also require a skip array.
+ *
+ * Every skip array must have space to store its scan key's sk_flags.
+ */
+ estnbtreeshared = add_size(estnbtreeshared, sizeof(int));
+
+ /* Consider space required to store a datum of opclass input type */
+ attr = TupleDescCompactAttr(rel->rd_att, attnum - 1);
+ if (attr->attbyval)
+ {
+ /* This index attribute stores pass-by-value datums */
+ Size estfixed = datumEstimateSpace((Datum) 0, false,
+ true, attr->attlen);
+
+ estnbtreeshared = add_size(estnbtreeshared, estfixed);
+ continue;
+ }
+
+ /*
+ * This index attribute stores pass-by-reference datums.
+ *
+ * Assume that serializing this array will use just as much space as a
+ * pass-by-value datum, in addition to space for the largest possible
+ * whole index tuple (this is not just a per-datum portion of the
+ * largest possible tuple because that'd be almost as large anyway).
+ *
+ * This is quite conservative, but it's not clear how we could do much
+ * better. The executor requires an up-front storage request size
+ * that reliably covers the scan's high watermark memory usage. We
+ * can't be sure of the real high watermark until the scan is over.
+ */
+ estnbtreeshared = add_size(estnbtreeshared, genericattrspace);
+ estnbtreeshared = add_size(estnbtreeshared, BTMaxItemSize);
+ }
+
+ return estnbtreeshared;
+}
+
+/*
+ * _bt_parallel_serialize_arrays() -- Serialize parallel array state.
+ *
+ * Caller must have exclusively locked btscan->btps_lock when called.
+ */
+static void
+_bt_parallel_serialize_arrays(Relation rel, BTParallelScanDesc btscan,
+ BTScanOpaque so)
+{
+ char *datumshared;
+
+ /* Space for serialized datums begins immediately after btps_arrElems[] */
+ datumshared = ((char *) &btscan->btps_arrElems[so->numArrayKeys]);
+ for (int i = 0; i < so->numArrayKeys; i++)
+ {
+ BTArrayKeyInfo *array = &so->arrayKeys[i];
+ ScanKey skey = &so->keyData[array->scan_key];
+
+ if (array->num_elems != -1)
+ {
+ /* Save SAOP array's cur_elem (no need to copy key/datum) */
+ Assert(!(skey->sk_flags & SK_BT_SKIP));
+ btscan->btps_arrElems[i] = array->cur_elem;
+ continue;
+ }
+
+ /* Save all mutable state associated with skip array's key */
+ Assert(skey->sk_flags & SK_BT_SKIP);
+ memcpy(datumshared, &skey->sk_flags, sizeof(int));
+ datumshared += sizeof(int);
+
+ if (skey->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL))
+ {
+ /* No sk_argument datum to serialize */
+ Assert(skey->sk_argument == 0);
+ continue;
+ }
+
+ datumSerialize(skey->sk_argument, (skey->sk_flags & SK_ISNULL) != 0,
+ array->attbyval, array->attlen, &datumshared);
+ }
+}
+
+/*
+ * _bt_parallel_restore_arrays() -- Restore serialized parallel array state.
+ *
+ * Caller must have exclusively locked btscan->btps_lock when called.
+ */
+static void
+_bt_parallel_restore_arrays(Relation rel, BTParallelScanDesc btscan,
+ BTScanOpaque so)
{
- /* Pessimistically assume all input scankeys will be output with arrays */
- return offsetof(BTParallelScanDescData, btps_arrElems) + sizeof(int) * nkeys;
+ char *datumshared;
+
+ /* Space for serialized datums begins immediately after btps_arrElems[] */
+ datumshared = ((char *) &btscan->btps_arrElems[so->numArrayKeys]);
+ for (int i = 0; i < so->numArrayKeys; i++)
+ {
+ BTArrayKeyInfo *array = &so->arrayKeys[i];
+ ScanKey skey = &so->keyData[array->scan_key];
+ bool isnull;
+
+ if (array->num_elems != -1)
+ {
+ /* Restore SAOP array using its saved cur_elem */
+ Assert(!(skey->sk_flags & SK_BT_SKIP));
+ array->cur_elem = btscan->btps_arrElems[i];
+ skey->sk_argument = array->elem_values[array->cur_elem];
+ continue;
+ }
+
+ /* Restore skip array by restoring its key directly */
+ if (!array->attbyval && skey->sk_argument)
+ pfree(DatumGetPointer(skey->sk_argument));
+ skey->sk_argument = (Datum) 0;
+ memcpy(&skey->sk_flags, datumshared, sizeof(int));
+ datumshared += sizeof(int);
+
+ Assert(skey->sk_flags & SK_BT_SKIP);
+
+ if (skey->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL))
+ {
+ /* No sk_argument datum to restore */
+ continue;
+ }
+
+ skey->sk_argument = datumRestore(&datumshared, &isnull);
+ if (isnull)
+ {
+ Assert(skey->sk_argument == 0);
+ Assert(skey->sk_flags & SK_SEARCHNULL);
+ Assert(skey->sk_flags & SK_ISNULL);
+ }
+ }
}
/*
@@ -613,6 +783,7 @@ bool
_bt_parallel_seize(IndexScanDesc scan, BlockNumber *next_scan_page,
BlockNumber *last_curr_page, bool first)
{
+ Relation rel = scan->indexRelation;
BTScanOpaque so = (BTScanOpaque) scan->opaque;
bool exit_loop = false,
status = true,
@@ -679,14 +850,9 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *next_scan_page,
{
/* Can start scheduled primitive scan right away, so do so */
btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
- for (int i = 0; i < so->numArrayKeys; i++)
- {
- BTArrayKeyInfo *array = &so->arrayKeys[i];
- ScanKey skey = &so->keyData[array->scan_key];
- array->cur_elem = btscan->btps_arrElems[i];
- skey->sk_argument = array->elem_values[array->cur_elem];
- }
+ /* Restore scan's array keys from serialized values */
+ _bt_parallel_restore_arrays(rel, btscan, so);
exit_loop = true;
}
else
@@ -831,6 +997,7 @@ _bt_parallel_done(IndexScanDesc scan)
void
_bt_parallel_primscan_schedule(IndexScanDesc scan, BlockNumber curr_page)
{
+ Relation rel = scan->indexRelation;
BTScanOpaque so = (BTScanOpaque) scan->opaque;
ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
BTParallelScanDesc btscan;
@@ -849,12 +1016,7 @@ _bt_parallel_primscan_schedule(IndexScanDesc scan, BlockNumber curr_page)
btscan->btps_pageStatus = BTPARALLEL_NEED_PRIMSCAN;
/* Serialize scan's current array keys */
- for (int i = 0; i < so->numArrayKeys; i++)
- {
- BTArrayKeyInfo *array = &so->arrayKeys[i];
-
- btscan->btps_arrElems[i] = array->cur_elem;
- }
+ _bt_parallel_serialize_arrays(rel, btscan, so);
}
LWLockRelease(&btscan->btps_lock);
}