diff options
Diffstat (limited to 'src/backend/executor/nodeSetOp.c')
-rw-r--r-- | src/backend/executor/nodeSetOp.c | 537 |
1 files changed, 311 insertions, 226 deletions
diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c index b40d81f3ffa..09089204e8b 100644 --- a/src/backend/executor/nodeSetOp.c +++ b/src/backend/executor/nodeSetOp.c @@ -3,29 +3,30 @@ * nodeSetOp.c * Routines to handle INTERSECT and EXCEPT selection * - * The input of a SetOp node consists of tuples from two relations, - * which have been combined into one dataset, with a junk attribute added - * that shows which relation each tuple came from. In SETOP_SORTED mode, - * the input has furthermore been sorted according to all the grouping - * columns (ie, all the non-junk attributes). The SetOp node scans each - * group of identical tuples to determine how many came from each input - * relation. Then it is a simple matter to emit the output demanded by the - * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL. + * The input of a SetOp node consists of two relations (outer and inner) + * with identical column sets. In EXCEPT queries the outer relation is + * always the left side, while in INTERSECT cases the planner tries to + * make the outer relation be the smaller of the two inputs. * - * In SETOP_HASHED mode, the input is delivered in no particular order, - * except that we know all the tuples from one input relation will come before - * all the tuples of the other. The planner guarantees that the first input - * relation is the left-hand one for EXCEPT, and tries to make the smaller - * input relation come first for INTERSECT. We build a hash table in memory - * with one entry for each group of identical tuples, and count the number of - * tuples in the group from each relation. After seeing all the input, we - * scan the hashtable and generate the correct output using those counts. - * We can avoid making hashtable entries for any tuples appearing only in the - * second input relation, since they cannot result in any output. + * In SETOP_SORTED mode, each input has been sorted according to all the + * grouping columns. The SetOp node essentially performs a merge join on + * the grouping columns, except that it is only interested in counting how + * many tuples from each input match. Then it is a simple matter to emit + * the output demanded by the SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, + * or EXCEPT ALL. + * + * In SETOP_HASHED mode, the inputs are delivered in no particular order. + * We read the outer relation and build a hash table in memory with one entry + * for each group of identical tuples, counting the number of tuples in the + * group. Then we read the inner relation and count the number of tuples + * matching each outer group. (We can disregard any tuples appearing only + * in the inner relation, since they cannot result in any output.) After + * seeing all the input, we scan the hashtable and generate the correct + * output using those counts. * * This node type is not used for UNION or UNION ALL, since those can be - * implemented more cheaply (there's no need for the junk attribute to - * identify the source relation). + * implemented more cheaply (there's no need to count the number of + * matching tuples). * * Note that SetOp does no qual checking nor projection. The delivered * output tuples are just copies of the first-to-arrive tuple in each @@ -54,66 +55,29 @@ /* * SetOpStatePerGroupData - per-group working state * - * These values are working state that is initialized at the start of - * an input tuple group and updated for each input tuple. - * - * In SETOP_SORTED mode, we need only one of these structs, and it's kept in - * the plan state node. In SETOP_HASHED mode, the hash table contains one - * of these for each tuple group. + * In SETOP_SORTED mode, we need only one of these structs, and it's just a + * local in setop_retrieve_sorted. In SETOP_HASHED mode, the hash table + * contains one of these for each tuple group. */ typedef struct SetOpStatePerGroupData { - long numLeft; /* number of left-input dups in group */ - long numRight; /* number of right-input dups in group */ -} SetOpStatePerGroupData; + int64 numLeft; /* number of left-input dups in group */ + int64 numRight; /* number of right-input dups in group */ +} SetOpStatePerGroupData; + +typedef SetOpStatePerGroupData *SetOpStatePerGroup; -static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate); +static TupleTableSlot *setop_retrieve_sorted(SetOpState *setopstate); +static void setop_load_group(SetOpStatePerInput *input, PlanState *inputPlan, + SetOpState *setopstate); +static int setop_compare_slots(TupleTableSlot *s1, TupleTableSlot *s2, + SetOpState *setopstate); static void setop_fill_hash_table(SetOpState *setopstate); static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate); /* - * Initialize state for a new group of input values. - */ -static inline void -initialize_counts(SetOpStatePerGroup pergroup) -{ - pergroup->numLeft = pergroup->numRight = 0; -} - -/* - * Advance the appropriate counter for one input tuple. - */ -static inline void -advance_counts(SetOpStatePerGroup pergroup, int flag) -{ - if (flag) - pergroup->numRight++; - else - pergroup->numLeft++; -} - -/* - * Fetch the "flag" column from an input tuple. - * This is an integer column with value 0 for left side, 1 for right side. - */ -static int -fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot) -{ - SetOp *node = (SetOp *) setopstate->ps.plan; - int flag; - bool isNull; - - flag = DatumGetInt32(slot_getattr(inputslot, - node->flagColIdx, - &isNull)); - Assert(!isNull); - Assert(flag == 0 || flag == 1); - return flag; -} - -/* * Initialize the hash table to empty. */ static void @@ -126,14 +90,19 @@ build_hash_table(SetOpState *setopstate) Assert(node->strategy == SETOP_HASHED); Assert(node->numGroups > 0); + /* + * If both child plans deliver the same fixed tuple slot type, we can tell + * BuildTupleHashTableExt to expect that slot type as input. Otherwise, + * we'll pass NULL denoting that any slot type is possible. + */ setopstate->hashtable = BuildTupleHashTableExt(&setopstate->ps, desc, - NULL, + ExecGetCommonChildSlotOps(&setopstate->ps), node->numCols, - node->dupColIdx, + node->cmpColIdx, setopstate->eqfuncoids, setopstate->hashfunctions, - node->dupCollations, + node->cmpCollations, node->numGroups, 0, setopstate->ps.state->es_query_cxt, @@ -218,108 +187,126 @@ ExecSetOp(PlanState *pstate) return setop_retrieve_hash_table(node); } else - return setop_retrieve_direct(node); + return setop_retrieve_sorted(node); } /* * ExecSetOp for non-hashed case */ static TupleTableSlot * -setop_retrieve_direct(SetOpState *setopstate) +setop_retrieve_sorted(SetOpState *setopstate) { PlanState *outerPlan; - SetOpStatePerGroup pergroup; - TupleTableSlot *outerslot; + PlanState *innerPlan; TupleTableSlot *resultTupleSlot; - ExprContext *econtext = setopstate->ps.ps_ExprContext; /* * get state info from node */ outerPlan = outerPlanState(setopstate); - pergroup = (SetOpStatePerGroup) setopstate->pergroup; + innerPlan = innerPlanState(setopstate); resultTupleSlot = setopstate->ps.ps_ResultTupleSlot; /* - * We loop retrieving groups until we find one we should return + * If first time through, establish the invariant that setop_load_group + * expects: each side's nextTupleSlot is the next output from the child + * plan, or empty if there is no more output from it. */ - while (!setopstate->setop_done) + if (setopstate->need_init) { + setopstate->need_init = false; + + setopstate->leftInput.nextTupleSlot = ExecProcNode(outerPlan); + /* - * If we don't already have the first tuple of the new group, fetch it - * from the outer plan. + * If the outer relation is empty, then we will emit nothing, and we + * don't need to read the inner relation at all. */ - if (setopstate->grp_firstTuple == NULL) + if (TupIsNull(setopstate->leftInput.nextTupleSlot)) { - outerslot = ExecProcNode(outerPlan); - if (!TupIsNull(outerslot)) - { - /* Make a copy of the first input tuple */ - setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot); - } - else - { - /* outer plan produced no tuples at all */ - setopstate->setop_done = true; - return NULL; - } + setopstate->setop_done = true; + return NULL; } + setopstate->rightInput.nextTupleSlot = ExecProcNode(innerPlan); + + /* Set flags that we've not completed either side's group */ + setopstate->leftInput.needGroup = true; + setopstate->rightInput.needGroup = true; + } + + /* + * We loop retrieving groups until we find one we should return + */ + while (!setopstate->setop_done) + { + int cmpresult; + SetOpStatePerGroupData pergroup; + /* - * Store the copied first input tuple in the tuple table slot reserved - * for it. The tuple will be deleted when it is cleared from the - * slot. + * Fetch the rest of the current outer group, if we didn't already. */ - ExecStoreHeapTuple(setopstate->grp_firstTuple, - resultTupleSlot, - true); - setopstate->grp_firstTuple = NULL; /* don't keep two pointers */ + if (setopstate->leftInput.needGroup) + setop_load_group(&setopstate->leftInput, outerPlan, setopstate); - /* Initialize working state for a new input tuple group */ - initialize_counts(pergroup); + /* + * If no more outer groups, we're done, and don't need to look at any + * more of the inner relation. + */ + if (setopstate->leftInput.numTuples == 0) + { + setopstate->setop_done = true; + break; + } - /* Count the first input tuple */ - advance_counts(pergroup, - fetch_tuple_flag(setopstate, resultTupleSlot)); + /* + * Fetch the rest of the current inner group, if we didn't already. + */ + if (setopstate->rightInput.needGroup) + setop_load_group(&setopstate->rightInput, innerPlan, setopstate); /* - * Scan the outer plan until we exhaust it or cross a group boundary. + * Determine whether we have matching groups on both sides (this is + * basically like the core logic of a merge join). */ - for (;;) - { - outerslot = ExecProcNode(outerPlan); - if (TupIsNull(outerslot)) - { - /* no more outer-plan tuples available */ - setopstate->setop_done = true; - break; - } - - /* - * Check whether we've crossed a group boundary. - */ - econtext->ecxt_outertuple = resultTupleSlot; - econtext->ecxt_innertuple = outerslot; - - if (!ExecQualAndReset(setopstate->eqfunction, econtext)) - { - /* - * Save the first input tuple of the next group. - */ - setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot); - break; - } + if (setopstate->rightInput.numTuples == 0) + cmpresult = -1; /* as though left input is lesser */ + else + cmpresult = setop_compare_slots(setopstate->leftInput.firstTupleSlot, + setopstate->rightInput.firstTupleSlot, + setopstate); - /* Still in same group, so count this tuple */ - advance_counts(pergroup, - fetch_tuple_flag(setopstate, outerslot)); + if (cmpresult < 0) + { + /* Left group is first, and has no right matches */ + pergroup.numLeft = setopstate->leftInput.numTuples; + pergroup.numRight = 0; + /* We'll need another left group next time */ + setopstate->leftInput.needGroup = true; + } + else if (cmpresult == 0) + { + /* We have matching groups */ + pergroup.numLeft = setopstate->leftInput.numTuples; + pergroup.numRight = setopstate->rightInput.numTuples; + /* We'll need to read from both sides next time */ + setopstate->leftInput.needGroup = true; + setopstate->rightInput.needGroup = true; + } + else + { + /* Right group has no left matches, so we can ignore it */ + setopstate->rightInput.needGroup = true; + continue; } /* - * Done scanning input tuple group. See if we should emit any copies - * of result tuple, and if so return the first copy. + * Done scanning these input tuple groups. See if we should emit any + * copies of result tuple, and if so return the first copy. (Note + * that the result tuple is the same as the left input's firstTuple + * slot.) */ - set_output_count(setopstate, pergroup); + set_output_count(setopstate, &pergroup); if (setopstate->numOutput > 0) { @@ -334,84 +321,168 @@ setop_retrieve_direct(SetOpState *setopstate) } /* - * ExecSetOp for hashed case: phase 1, read input and build hash table + * Load next group of tuples from one child plan or the other. + * + * On entry, we've already read the first tuple of the next group + * (if there is one) into input->nextTupleSlot. This invariant + * is maintained on exit. + */ +static void +setop_load_group(SetOpStatePerInput *input, PlanState *inputPlan, + SetOpState *setopstate) +{ + input->needGroup = false; + + /* If we've exhausted this child plan, report an empty group */ + if (TupIsNull(input->nextTupleSlot)) + { + ExecClearTuple(input->firstTupleSlot); + input->numTuples = 0; + return; + } + + /* Make a local copy of the first tuple for comparisons */ + ExecStoreMinimalTuple(ExecCopySlotMinimalTuple(input->nextTupleSlot), + input->firstTupleSlot, + true); + /* and count it */ + input->numTuples = 1; + + /* Scan till we find the end-of-group */ + for (;;) + { + int cmpresult; + + /* Get next input tuple, if there is one */ + input->nextTupleSlot = ExecProcNode(inputPlan); + if (TupIsNull(input->nextTupleSlot)) + break; + + /* There is; does it belong to same group as firstTuple? */ + cmpresult = setop_compare_slots(input->firstTupleSlot, + input->nextTupleSlot, + setopstate); + Assert(cmpresult <= 0); /* else input is mis-sorted */ + if (cmpresult != 0) + break; + + /* Still in same group, so count this tuple */ + input->numTuples++; + } +} + +/* + * Compare the tuples in the two given slots. + */ +static int +setop_compare_slots(TupleTableSlot *s1, TupleTableSlot *s2, + SetOpState *setopstate) +{ + /* We'll often need to fetch all the columns, so just do it */ + slot_getallattrs(s1); + slot_getallattrs(s2); + for (int nkey = 0; nkey < setopstate->numCols; nkey++) + { + SortSupport sortKey = setopstate->sortKeys + nkey; + AttrNumber attno = sortKey->ssup_attno; + Datum datum1 = s1->tts_values[attno - 1], + datum2 = s2->tts_values[attno - 1]; + bool isNull1 = s1->tts_isnull[attno - 1], + isNull2 = s2->tts_isnull[attno - 1]; + int compare; + + compare = ApplySortComparator(datum1, isNull1, + datum2, isNull2, + sortKey); + if (compare != 0) + return compare; + } + return 0; +} + +/* + * ExecSetOp for hashed case: phase 1, read inputs and build hash table */ static void setop_fill_hash_table(SetOpState *setopstate) { - SetOp *node = (SetOp *) setopstate->ps.plan; PlanState *outerPlan; - int firstFlag; - bool in_first_rel PG_USED_FOR_ASSERTS_ONLY; + PlanState *innerPlan; ExprContext *econtext = setopstate->ps.ps_ExprContext; + bool have_tuples = false; /* * get state info from node */ outerPlan = outerPlanState(setopstate); - firstFlag = node->firstFlag; - /* verify planner didn't mess up */ - Assert(firstFlag == 0 || - (firstFlag == 1 && - (node->cmd == SETOPCMD_INTERSECT || - node->cmd == SETOPCMD_INTERSECT_ALL))); + innerPlan = innerPlanState(setopstate); /* * Process each outer-plan tuple, and then fetch the next one, until we * exhaust the outer plan. */ - in_first_rel = true; for (;;) { TupleTableSlot *outerslot; - int flag; TupleHashEntryData *entry; bool isnew; outerslot = ExecProcNode(outerPlan); if (TupIsNull(outerslot)) break; + have_tuples = true; - /* Identify whether it's left or right input */ - flag = fetch_tuple_flag(setopstate, outerslot); + /* Find or build hashtable entry for this tuple's group */ + entry = LookupTupleHashEntry(setopstate->hashtable, + outerslot, + &isnew, NULL); - if (flag == firstFlag) + /* If new tuple group, initialize counts to zero */ + if (isnew) { - /* (still) in first input relation */ - Assert(in_first_rel); - - /* Find or build hashtable entry for this tuple's group */ - entry = LookupTupleHashEntry(setopstate->hashtable, outerslot, - &isnew, NULL); - - /* If new tuple group, initialize counts */ - if (isnew) - { - entry->additional = (SetOpStatePerGroup) - MemoryContextAlloc(setopstate->hashtable->tablecxt, + entry->additional = (SetOpStatePerGroup) + MemoryContextAllocZero(setopstate->hashtable->tablecxt, sizeof(SetOpStatePerGroupData)); - initialize_counts((SetOpStatePerGroup) entry->additional); - } - - /* Advance the counts */ - advance_counts((SetOpStatePerGroup) entry->additional, flag); } - else + + /* Advance the counts */ + ((SetOpStatePerGroup) entry->additional)->numLeft++; + + /* Must reset expression context after each hashtable lookup */ + ResetExprContext(econtext); + } + + /* + * If the outer relation is empty, then we will emit nothing, and we don't + * need to read the inner relation at all. + */ + if (have_tuples) + { + /* + * Process each inner-plan tuple, and then fetch the next one, until + * we exhaust the inner plan. + */ + for (;;) { - /* reached second relation */ - in_first_rel = false; + TupleTableSlot *innerslot; + TupleHashEntryData *entry; + + innerslot = ExecProcNode(innerPlan); + if (TupIsNull(innerslot)) + break; /* For tuples not seen previously, do not make hashtable entry */ - entry = LookupTupleHashEntry(setopstate->hashtable, outerslot, + entry = LookupTupleHashEntry(setopstate->hashtable, + innerslot, NULL, NULL); /* Advance the counts if entry is already present */ if (entry) - advance_counts((SetOpStatePerGroup) entry->additional, flag); - } + ((SetOpStatePerGroup) entry->additional)->numRight++; - /* Must reset expression context after each hashtable lookup */ - ResetExprContext(econtext); + /* Must reset expression context after each hashtable lookup */ + ResetExprContext(econtext); + } } setopstate->table_filled = true; @@ -482,7 +553,6 @@ SetOpState * ExecInitSetOp(SetOp *node, EState *estate, int eflags) { SetOpState *setopstate; - TupleDesc outerDesc; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); @@ -495,14 +565,10 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags) setopstate->ps.state = estate; setopstate->ps.ExecProcNode = ExecSetOp; - setopstate->eqfuncoids = NULL; - setopstate->hashfunctions = NULL; setopstate->setop_done = false; setopstate->numOutput = 0; - setopstate->pergroup = NULL; - setopstate->grp_firstTuple = NULL; - setopstate->hashtable = NULL; - setopstate->tableContext = NULL; + setopstate->numCols = node->numCols; + setopstate->need_init = true; /* * create expression context @@ -523,52 +589,72 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags) /* * initialize child nodes * - * If we are hashing then the child plan does not need to handle REWIND + * If we are hashing then the child plans do not need to handle REWIND * efficiently; see ExecReScanSetOp. */ if (node->strategy == SETOP_HASHED) eflags &= ~EXEC_FLAG_REWIND; outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags); - outerDesc = ExecGetResultType(outerPlanState(setopstate)); + innerPlanState(setopstate) = ExecInitNode(innerPlan(node), estate, eflags); /* - * Initialize result slot and type. Setop nodes do no projections, so - * initialize projection info for this node appropriately. + * Initialize locally-allocated slots. In hashed mode, we just need a + * result slot. In sorted mode, we need one first-tuple-of-group slot for + * each input; we use the result slot for the left input's slot and create + * another for the right input. (Note: the nextTupleSlot slots are not + * ours, but just point to the last slot returned by the input plan node.) */ - ExecInitResultTupleSlotTL(&setopstate->ps, - node->strategy == SETOP_HASHED ? - &TTSOpsMinimalTuple : &TTSOpsHeapTuple); + ExecInitResultTupleSlotTL(&setopstate->ps, &TTSOpsMinimalTuple); + if (node->strategy != SETOP_HASHED) + { + setopstate->leftInput.firstTupleSlot = + setopstate->ps.ps_ResultTupleSlot; + setopstate->rightInput.firstTupleSlot = + ExecInitExtraTupleSlot(estate, + setopstate->ps.ps_ResultTupleDesc, + &TTSOpsMinimalTuple); + } + + /* Setop nodes do no projections. */ setopstate->ps.ps_ProjInfo = NULL; /* - * Precompute fmgr lookup data for inner loop. We need both equality and - * hashing functions to do it by hashing, but only equality if not - * hashing. + * Precompute fmgr lookup data for inner loop. We need equality and + * hashing functions to do it by hashing, while for sorting we need + * SortSupport data. */ if (node->strategy == SETOP_HASHED) execTuplesHashPrepare(node->numCols, - node->dupOperators, + node->cmpOperators, &setopstate->eqfuncoids, &setopstate->hashfunctions); else - setopstate->eqfunction = - execTuplesMatchPrepare(outerDesc, - node->numCols, - node->dupColIdx, - node->dupOperators, - node->dupCollations, - &setopstate->ps); + { + int nkeys = node->numCols; + + setopstate->sortKeys = (SortSupport) + palloc0(nkeys * sizeof(SortSupportData)); + for (int i = 0; i < nkeys; i++) + { + SortSupport sortKey = setopstate->sortKeys + i; + + sortKey->ssup_cxt = CurrentMemoryContext; + sortKey->ssup_collation = node->cmpCollations[i]; + sortKey->ssup_nulls_first = node->cmpNullsFirst[i]; + sortKey->ssup_attno = node->cmpColIdx[i]; + /* abbreviated key conversion is not useful here */ + sortKey->abbreviate = false; + PrepareSortSupportFromOrderingOp(node->cmpOperators[i], sortKey); + } + } + + /* Create a hash table if needed */ if (node->strategy == SETOP_HASHED) { build_hash_table(setopstate); setopstate->table_filled = false; } - else - { - setopstate->pergroup = - (SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData)); - } return setopstate; } @@ -576,7 +662,7 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags) /* ---------------------------------------------------------------- * ExecEndSetOp * - * This shuts down the subplan and frees resources allocated + * This shuts down the subplans and frees resources allocated * to this node. * ---------------------------------------------------------------- */ @@ -588,6 +674,7 @@ ExecEndSetOp(SetOpState *node) MemoryContextDelete(node->tableContext); ExecEndNode(outerPlanState(node)); + ExecEndNode(innerPlanState(node)); } @@ -595,6 +682,7 @@ void ExecReScanSetOp(SetOpState *node) { PlanState *outerPlan = outerPlanState(node); + PlanState *innerPlan = innerPlanState(node); ExecClearTuple(node->ps.ps_ResultTupleSlot); node->setop_done = false; @@ -612,34 +700,29 @@ ExecReScanSetOp(SetOpState *node) return; /* - * If we do have the hash table and the subplan does not have any + * If we do have the hash table and the subplans do not have any * parameter changes, then we can just rescan the existing hash table; * no need to build it again. */ - if (outerPlan->chgParam == NULL) + if (outerPlan->chgParam == NULL && innerPlan->chgParam == NULL) { ResetTupleHashIterator(node->hashtable, &node->hashiter); return; } - } - - /* Release first tuple of group, if we have made a copy */ - if (node->grp_firstTuple != NULL) - { - heap_freetuple(node->grp_firstTuple); - node->grp_firstTuple = NULL; - } - /* Release any hashtable storage */ - if (node->tableContext) - MemoryContextReset(node->tableContext); + /* Release any hashtable storage */ + if (node->tableContext) + MemoryContextReset(node->tableContext); - /* And rebuild empty hashtable if needed */ - if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED) - { + /* And rebuild an empty hashtable */ ResetTupleHashTable(node->hashtable); node->table_filled = false; } + else + { + /* Need to re-read first input from each side */ + node->need_init = true; + } /* * if chgParam of subnode is not null then plan will be re-scanned by @@ -647,4 +730,6 @@ ExecReScanSetOp(SetOpState *node) */ if (outerPlan->chgParam == NULL) ExecReScan(outerPlan); + if (innerPlan->chgParam == NULL) + ExecReScan(innerPlan); } |