diff options
Diffstat (limited to 'src/backend/access/nbtree/nbtree.c')
-rw-r--r-- | src/backend/access/nbtree/nbtree.c | 196 |
1 files changed, 179 insertions, 17 deletions
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index a99667eb2bd..77781cb9002 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -31,6 +31,7 @@ #include "storage/ipc.h" #include "storage/lmgr.h" #include "storage/read_stream.h" +#include "utils/datum.h" #include "utils/fmgrprotos.h" #include "utils/index_selfuncs.h" #include "utils/memutils.h" @@ -76,14 +77,26 @@ typedef struct BTParallelScanDescData /* * btps_arrElems is used when scans need to schedule another primitive - * index scan. Holds BTArrayKeyInfo.cur_elem offsets for scan keys. + * index scan with one or more SAOP arrays. Holds BTArrayKeyInfo.cur_elem + * offsets for each = scan key associated with a ScalarArrayOp array. */ int btps_arrElems[FLEXIBLE_ARRAY_MEMBER]; + + /* + * Additional space (at the end of the struct) is used when scans need to + * schedule another primitive index scan with one or more skip arrays. + * Holds a flattened datum representation for each = scan key associated + * with a skip array. + */ } BTParallelScanDescData; typedef struct BTParallelScanDescData *BTParallelScanDesc; +static void _bt_parallel_serialize_arrays(Relation rel, BTParallelScanDesc btscan, + BTScanOpaque so); +static void _bt_parallel_restore_arrays(Relation rel, BTParallelScanDesc btscan, + BTScanOpaque so); static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid); @@ -541,10 +554,167 @@ btrestrpos(IndexScanDesc scan) * btestimateparallelscan -- estimate storage for BTParallelScanDescData */ Size -btestimateparallelscan(int nkeys, int norderbys) +btestimateparallelscan(Relation rel, int nkeys, int norderbys) +{ + int16 nkeyatts = IndexRelationGetNumberOfKeyAttributes(rel); + Size estnbtreeshared, + genericattrspace; + + /* + * Pessimistically assume that every input scan key will be output with + * its own SAOP array + */ + estnbtreeshared = offsetof(BTParallelScanDescData, btps_arrElems) + + sizeof(int) * nkeys; + + /* Single column indexes cannot possibly use a skip array */ + if (nkeyatts == 1) + return estnbtreeshared; + + /* + * Pessimistically assume that all attributes prior to the least + * significant attribute require a skip array (and an associated key) + */ + genericattrspace = datumEstimateSpace((Datum) 0, false, true, + sizeof(Datum)); + for (int attnum = 1; attnum < nkeyatts; attnum++) + { + CompactAttribute *attr; + + /* + * We make the conservative assumption that every index column will + * also require a skip array. + * + * Every skip array must have space to store its scan key's sk_flags. + */ + estnbtreeshared = add_size(estnbtreeshared, sizeof(int)); + + /* Consider space required to store a datum of opclass input type */ + attr = TupleDescCompactAttr(rel->rd_att, attnum - 1); + if (attr->attbyval) + { + /* This index attribute stores pass-by-value datums */ + Size estfixed = datumEstimateSpace((Datum) 0, false, + true, attr->attlen); + + estnbtreeshared = add_size(estnbtreeshared, estfixed); + continue; + } + + /* + * This index attribute stores pass-by-reference datums. + * + * Assume that serializing this array will use just as much space as a + * pass-by-value datum, in addition to space for the largest possible + * whole index tuple (this is not just a per-datum portion of the + * largest possible tuple because that'd be almost as large anyway). + * + * This is quite conservative, but it's not clear how we could do much + * better. The executor requires an up-front storage request size + * that reliably covers the scan's high watermark memory usage. We + * can't be sure of the real high watermark until the scan is over. + */ + estnbtreeshared = add_size(estnbtreeshared, genericattrspace); + estnbtreeshared = add_size(estnbtreeshared, BTMaxItemSize); + } + + return estnbtreeshared; +} + +/* + * _bt_parallel_serialize_arrays() -- Serialize parallel array state. + * + * Caller must have exclusively locked btscan->btps_lock when called. + */ +static void +_bt_parallel_serialize_arrays(Relation rel, BTParallelScanDesc btscan, + BTScanOpaque so) +{ + char *datumshared; + + /* Space for serialized datums begins immediately after btps_arrElems[] */ + datumshared = ((char *) &btscan->btps_arrElems[so->numArrayKeys]); + for (int i = 0; i < so->numArrayKeys; i++) + { + BTArrayKeyInfo *array = &so->arrayKeys[i]; + ScanKey skey = &so->keyData[array->scan_key]; + + if (array->num_elems != -1) + { + /* Save SAOP array's cur_elem (no need to copy key/datum) */ + Assert(!(skey->sk_flags & SK_BT_SKIP)); + btscan->btps_arrElems[i] = array->cur_elem; + continue; + } + + /* Save all mutable state associated with skip array's key */ + Assert(skey->sk_flags & SK_BT_SKIP); + memcpy(datumshared, &skey->sk_flags, sizeof(int)); + datumshared += sizeof(int); + + if (skey->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL)) + { + /* No sk_argument datum to serialize */ + Assert(skey->sk_argument == 0); + continue; + } + + datumSerialize(skey->sk_argument, (skey->sk_flags & SK_ISNULL) != 0, + array->attbyval, array->attlen, &datumshared); + } +} + +/* + * _bt_parallel_restore_arrays() -- Restore serialized parallel array state. + * + * Caller must have exclusively locked btscan->btps_lock when called. + */ +static void +_bt_parallel_restore_arrays(Relation rel, BTParallelScanDesc btscan, + BTScanOpaque so) { - /* Pessimistically assume all input scankeys will be output with arrays */ - return offsetof(BTParallelScanDescData, btps_arrElems) + sizeof(int) * nkeys; + char *datumshared; + + /* Space for serialized datums begins immediately after btps_arrElems[] */ + datumshared = ((char *) &btscan->btps_arrElems[so->numArrayKeys]); + for (int i = 0; i < so->numArrayKeys; i++) + { + BTArrayKeyInfo *array = &so->arrayKeys[i]; + ScanKey skey = &so->keyData[array->scan_key]; + bool isnull; + + if (array->num_elems != -1) + { + /* Restore SAOP array using its saved cur_elem */ + Assert(!(skey->sk_flags & SK_BT_SKIP)); + array->cur_elem = btscan->btps_arrElems[i]; + skey->sk_argument = array->elem_values[array->cur_elem]; + continue; + } + + /* Restore skip array by restoring its key directly */ + if (!array->attbyval && skey->sk_argument) + pfree(DatumGetPointer(skey->sk_argument)); + skey->sk_argument = (Datum) 0; + memcpy(&skey->sk_flags, datumshared, sizeof(int)); + datumshared += sizeof(int); + + Assert(skey->sk_flags & SK_BT_SKIP); + + if (skey->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL)) + { + /* No sk_argument datum to restore */ + continue; + } + + skey->sk_argument = datumRestore(&datumshared, &isnull); + if (isnull) + { + Assert(skey->sk_argument == 0); + Assert(skey->sk_flags & SK_SEARCHNULL); + Assert(skey->sk_flags & SK_ISNULL); + } + } } /* @@ -613,6 +783,7 @@ bool _bt_parallel_seize(IndexScanDesc scan, BlockNumber *next_scan_page, BlockNumber *last_curr_page, bool first) { + Relation rel = scan->indexRelation; BTScanOpaque so = (BTScanOpaque) scan->opaque; bool exit_loop = false, status = true, @@ -679,14 +850,9 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *next_scan_page, { /* Can start scheduled primitive scan right away, so do so */ btscan->btps_pageStatus = BTPARALLEL_ADVANCING; - for (int i = 0; i < so->numArrayKeys; i++) - { - BTArrayKeyInfo *array = &so->arrayKeys[i]; - ScanKey skey = &so->keyData[array->scan_key]; - array->cur_elem = btscan->btps_arrElems[i]; - skey->sk_argument = array->elem_values[array->cur_elem]; - } + /* Restore scan's array keys from serialized values */ + _bt_parallel_restore_arrays(rel, btscan, so); exit_loop = true; } else @@ -831,6 +997,7 @@ _bt_parallel_done(IndexScanDesc scan) void _bt_parallel_primscan_schedule(IndexScanDesc scan, BlockNumber curr_page) { + Relation rel = scan->indexRelation; BTScanOpaque so = (BTScanOpaque) scan->opaque; ParallelIndexScanDesc parallel_scan = scan->parallel_scan; BTParallelScanDesc btscan; @@ -849,12 +1016,7 @@ _bt_parallel_primscan_schedule(IndexScanDesc scan, BlockNumber curr_page) btscan->btps_pageStatus = BTPARALLEL_NEED_PRIMSCAN; /* Serialize scan's current array keys */ - for (int i = 0; i < so->numArrayKeys; i++) - { - BTArrayKeyInfo *array = &so->arrayKeys[i]; - - btscan->btps_arrElems[i] = array->cur_elem; - } + _bt_parallel_serialize_arrays(rel, btscan, so); } LWLockRelease(&btscan->btps_lock); } |