1 files changed, 311 insertions, 226 deletions
diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c
index b40d81f3ffa..09089204e8b 100644
--- a/src/backend/executor/nodeSetOp.c
+++ b/src/backend/executor/nodeSetOp.c
@@ -3,29 +3,30 @@
  * nodeSetOp.c
  *	  Routines to handle INTERSECT and EXCEPT selection
  *
- * The input of a SetOp node consists of tuples from two relations,
- * which have been combined into one dataset, with a junk attribute added
- * that shows which relation each tuple came from.  In SETOP_SORTED mode,
- * the input has furthermore been sorted according to all the grouping
- * columns (ie, all the non-junk attributes).  The SetOp node scans each
- * group of identical tuples to determine how many came from each input
- * relation.  Then it is a simple matter to emit the output demanded by the
- * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
+ * The input of a SetOp node consists of two relations (outer and inner)
+ * with identical column sets.  In EXCEPT queries the outer relation is
+ * always the left side, while in INTERSECT cases the planner tries to
+ * make the outer relation be the smaller of the two inputs.
  *
- * In SETOP_HASHED mode, the input is delivered in no particular order,
- * except that we know all the tuples from one input relation will come before
- * all the tuples of the other.  The planner guarantees that the first input
- * relation is the left-hand one for EXCEPT, and tries to make the smaller
- * input relation come first for INTERSECT.  We build a hash table in memory
- * with one entry for each group of identical tuples, and count the number of
- * tuples in the group from each relation.  After seeing all the input, we
- * scan the hashtable and generate the correct output using those counts.
- * We can avoid making hashtable entries for any tuples appearing only in the
- * second input relation, since they cannot result in any output.
+ * In SETOP_SORTED mode, each input has been sorted according to all the
+ * grouping columns.  The SetOp node essentially performs a merge join on
+ * the grouping columns, except that it is only interested in counting how
+ * many tuples from each input match.  Then it is a simple matter to emit
+ * the output demanded by the SQL spec for INTERSECT, INTERSECT ALL, EXCEPT,
+ * or EXCEPT ALL.
+ *
+ * In SETOP_HASHED mode, the inputs are delivered in no particular order.
+ * We read the outer relation and build a hash table in memory with one entry
+ * for each group of identical tuples, counting the number of tuples in the
+ * group.  Then we read the inner relation and count the number of tuples
+ * matching each outer group.  (We can disregard any tuples appearing only
+ * in the inner relation, since they cannot result in any output.)  After
+ * seeing all the input, we scan the hashtable and generate the correct
+ * output using those counts.
  *
  * This node type is not used for UNION or UNION ALL, since those can be
- * implemented more cheaply (there's no need for the junk attribute to
- * identify the source relation).
+ * implemented more cheaply (there's no need to count the number of
+ * matching tuples).
  *
  * Note that SetOp does no qual checking nor projection.  The delivered
  * output tuples are just copies of the first-to-arrive tuple in each
@@ -54,66 +55,29 @@
 /*
  * SetOpStatePerGroupData - per-group working state
  *
- * These values are working state that is initialized at the start of
- * an input tuple group and updated for each input tuple.
- *
- * In SETOP_SORTED mode, we need only one of these structs, and it's kept in
- * the plan state node.  In SETOP_HASHED mode, the hash table contains one
- * of these for each tuple group.
+ * In SETOP_SORTED mode, we need only one of these structs, and it's just a
+ * local in setop_retrieve_sorted.  In SETOP_HASHED mode, the hash table
+ * contains one of these for each tuple group.
  */
 typedef struct SetOpStatePerGroupData
 {
-	long		numLeft;		/* number of left-input dups in group */
-	long		numRight;		/* number of right-input dups in group */
-}			SetOpStatePerGroupData;
+	int64		numLeft;		/* number of left-input dups in group */
+	int64		numRight;		/* number of right-input dups in group */
+} SetOpStatePerGroupData;
+
+typedef SetOpStatePerGroupData *SetOpStatePerGroup;
 
 
-static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate);
+static TupleTableSlot *setop_retrieve_sorted(SetOpState *setopstate);
+static void setop_load_group(SetOpStatePerInput *input, PlanState *inputPlan,
+							 SetOpState *setopstate);
+static int	setop_compare_slots(TupleTableSlot *s1, TupleTableSlot *s2,
+								SetOpState *setopstate);
 static void setop_fill_hash_table(SetOpState *setopstate);
 static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate);
 
 
 /*
- * Initialize state for a new group of input values.
- */
-static inline void
-initialize_counts(SetOpStatePerGroup pergroup)
-{
-	pergroup->numLeft = pergroup->numRight = 0;
-}
-
-/*
- * Advance the appropriate counter for one input tuple.
- */
-static inline void
-advance_counts(SetOpStatePerGroup pergroup, int flag)
-{
-	if (flag)
-		pergroup->numRight++;
-	else
-		pergroup->numLeft++;
-}
-
-/*
- * Fetch the "flag" column from an input tuple.
- * This is an integer column with value 0 for left side, 1 for right side.
- */
-static int
-fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot)
-{
-	SetOp	   *node = (SetOp *) setopstate->ps.plan;
-	int			flag;
-	bool		isNull;
-
-	flag = DatumGetInt32(slot_getattr(inputslot,
-									  node->flagColIdx,
-									  &isNull));
-	Assert(!isNull);
-	Assert(flag == 0 || flag == 1);
-	return flag;
-}
-
-/*
  * Initialize the hash table to empty.
  */
 static void
@@ -126,14 +90,19 @@ build_hash_table(SetOpState *setopstate)
 	Assert(node->strategy == SETOP_HASHED);
 	Assert(node->numGroups > 0);
 
+	/*
+	 * If both child plans deliver the same fixed tuple slot type, we can tell
+	 * BuildTupleHashTableExt to expect that slot type as input.  Otherwise,
+	 * we'll pass NULL denoting that any slot type is possible.
+	 */
 	setopstate->hashtable = BuildTupleHashTableExt(&setopstate->ps,
 												   desc,
-												   NULL,
+												   ExecGetCommonChildSlotOps(&setopstate->ps),
 												   node->numCols,
-												   node->dupColIdx,
+												   node->cmpColIdx,
 												   setopstate->eqfuncoids,
 												   setopstate->hashfunctions,
-												   node->dupCollations,
+												   node->cmpCollations,
 												   node->numGroups,
 												   0,
 												   setopstate->ps.state->es_query_cxt,
@@ -218,108 +187,126 @@ ExecSetOp(PlanState *pstate)
 		return setop_retrieve_hash_table(node);
 	}
 	else
-		return setop_retrieve_direct(node);
+		return setop_retrieve_sorted(node);
 }
 
 /*
  * ExecSetOp for non-hashed case
  */
 static TupleTableSlot *
-setop_retrieve_direct(SetOpState *setopstate)
+setop_retrieve_sorted(SetOpState *setopstate)
 {
 	PlanState  *outerPlan;
-	SetOpStatePerGroup pergroup;
-	TupleTableSlot *outerslot;
+	PlanState  *innerPlan;
 	TupleTableSlot *resultTupleSlot;
-	ExprContext *econtext = setopstate->ps.ps_ExprContext;
 
 	/*
 	 * get state info from node
 	 */
 	outerPlan = outerPlanState(setopstate);
-	pergroup = (SetOpStatePerGroup) setopstate->pergroup;
+	innerPlan = innerPlanState(setopstate);
 	resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
 
 	/*
-	 * We loop retrieving groups until we find one we should return
+	 * If first time through, establish the invariant that setop_load_group
+	 * expects: each side's nextTupleSlot is the next output from the child
+	 * plan, or empty if there is no more output from it.
 	 */
-	while (!setopstate->setop_done)
+	if (setopstate->need_init)
 	{
+		setopstate->need_init = false;
+
+		setopstate->leftInput.nextTupleSlot = ExecProcNode(outerPlan);
+
 		/*
-		 * If we don't already have the first tuple of the new group, fetch it
-		 * from the outer plan.
+		 * If the outer relation is empty, then we will emit nothing, and we
+		 * don't need to read the inner relation at all.
 		 */
-		if (setopstate->grp_firstTuple == NULL)
+		if (TupIsNull(setopstate->leftInput.nextTupleSlot))
 		{
-			outerslot = ExecProcNode(outerPlan);
-			if (!TupIsNull(outerslot))
-			{
-				/* Make a copy of the first input tuple */
-				setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot);
-			}
-			else
-			{
-				/* outer plan produced no tuples at all */
-				setopstate->setop_done = true;
-				return NULL;
-			}
+			setopstate->setop_done = true;
+			return NULL;
 		}
 
+		setopstate->rightInput.nextTupleSlot = ExecProcNode(innerPlan);
+
+		/* Set flags that we've not completed either side's group */
+		setopstate->leftInput.needGroup = true;
+		setopstate->rightInput.needGroup = true;
+	}
+
+	/*
+	 * We loop retrieving groups until we find one we should return
+	 */
+	while (!setopstate->setop_done)
+	{
+		int			cmpresult;
+		SetOpStatePerGroupData pergroup;
+
 		/*
-		 * Store the copied first input tuple in the tuple table slot reserved
-		 * for it.  The tuple will be deleted when it is cleared from the
-		 * slot.
+		 * Fetch the rest of the current outer group, if we didn't already.
 		 */
-		ExecStoreHeapTuple(setopstate->grp_firstTuple,
-						   resultTupleSlot,
-						   true);
-		setopstate->grp_firstTuple = NULL;	/* don't keep two pointers */
+		if (setopstate->leftInput.needGroup)
+			setop_load_group(&setopstate->leftInput, outerPlan, setopstate);
 
-		/* Initialize working state for a new input tuple group */
-		initialize_counts(pergroup);
+		/*
+		 * If no more outer groups, we're done, and don't need to look at any
+		 * more of the inner relation.
+		 */
+		if (setopstate->leftInput.numTuples == 0)
+		{
+			setopstate->setop_done = true;
+			break;
+		}
 
-		/* Count the first input tuple */
-		advance_counts(pergroup,
-					   fetch_tuple_flag(setopstate, resultTupleSlot));
+		/*
+		 * Fetch the rest of the current inner group, if we didn't already.
+		 */
+		if (setopstate->rightInput.needGroup)
+			setop_load_group(&setopstate->rightInput, innerPlan, setopstate);
 
 		/*
-		 * Scan the outer plan until we exhaust it or cross a group boundary.
+		 * Determine whether we have matching groups on both sides (this is
+		 * basically like the core logic of a merge join).
 		 */
-		for (;;)
-		{
-			outerslot = ExecProcNode(outerPlan);
-			if (TupIsNull(outerslot))
-			{
-				/* no more outer-plan tuples available */
-				setopstate->setop_done = true;
-				break;
-			}
-
-			/*
-			 * Check whether we've crossed a group boundary.
-			 */
-			econtext->ecxt_outertuple = resultTupleSlot;
-			econtext->ecxt_innertuple = outerslot;
-
-			if (!ExecQualAndReset(setopstate->eqfunction, econtext))
-			{
-				/*
-				 * Save the first input tuple of the next group.
-				 */
-				setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot);
-				break;
-			}
+		if (setopstate->rightInput.numTuples == 0)
+			cmpresult = -1;		/* as though left input is lesser */
+		else
+			cmpresult = setop_compare_slots(setopstate->leftInput.firstTupleSlot,
+											setopstate->rightInput.firstTupleSlot,
+											setopstate);
 
-			/* Still in same group, so count this tuple */
-			advance_counts(pergroup,
-						   fetch_tuple_flag(setopstate, outerslot));
+		if (cmpresult < 0)
+		{
+			/* Left group is first, and has no right matches */
+			pergroup.numLeft = setopstate->leftInput.numTuples;
+			pergroup.numRight = 0;
+			/* We'll need another left group next time */
+			setopstate->leftInput.needGroup = true;
+		}
+		else if (cmpresult == 0)
+		{
+			/* We have matching groups */
+			pergroup.numLeft = setopstate->leftInput.numTuples;
+			pergroup.numRight = setopstate->rightInput.numTuples;
+			/* We'll need to read from both sides next time */
+			setopstate->leftInput.needGroup = true;
+			setopstate->rightInput.needGroup = true;
+		}
+		else
+		{
+			/* Right group has no left matches, so we can ignore it */
+			setopstate->rightInput.needGroup = true;
+			continue;
 		}
 
 		/*
-		 * Done scanning input tuple group.  See if we should emit any copies
-		 * of result tuple, and if so return the first copy.
+		 * Done scanning these input tuple groups.  See if we should emit any
+		 * copies of result tuple, and if so return the first copy.  (Note
+		 * that the result tuple is the same as the left input's firstTuple
+		 * slot.)
 		 */
-		set_output_count(setopstate, pergroup);
+		set_output_count(setopstate, &pergroup);
 
 		if (setopstate->numOutput > 0)
 		{
@@ -334,84 +321,168 @@ setop_retrieve_direct(SetOpState *setopstate)
 }
 
 /*
- * ExecSetOp for hashed case: phase 1, read input and build hash table
+ * Load next group of tuples from one child plan or the other.
+ *
+ * On entry, we've already read the first tuple of the next group
+ * (if there is one) into input->nextTupleSlot.  This invariant
+ * is maintained on exit.
+ */
+static void
+setop_load_group(SetOpStatePerInput *input, PlanState *inputPlan,
+				 SetOpState *setopstate)
+{
+	input->needGroup = false;
+
+	/* If we've exhausted this child plan, report an empty group */
+	if (TupIsNull(input->nextTupleSlot))
+	{
+		ExecClearTuple(input->firstTupleSlot);
+		input->numTuples = 0;
+		return;
+	}
+
+	/* Make a local copy of the first tuple for comparisons */
+	ExecStoreMinimalTuple(ExecCopySlotMinimalTuple(input->nextTupleSlot),
+						  input->firstTupleSlot,
+						  true);
+	/* and count it */
+	input->numTuples = 1;
+
+	/* Scan till we find the end-of-group */
+	for (;;)
+	{
+		int			cmpresult;
+
+		/* Get next input tuple, if there is one */
+		input->nextTupleSlot = ExecProcNode(inputPlan);
+		if (TupIsNull(input->nextTupleSlot))
+			break;
+
+		/* There is; does it belong to same group as firstTuple? */
+		cmpresult = setop_compare_slots(input->firstTupleSlot,
+										input->nextTupleSlot,
+										setopstate);
+		Assert(cmpresult <= 0); /* else input is mis-sorted */
+		if (cmpresult != 0)
+			break;
+
+		/* Still in same group, so count this tuple */
+		input->numTuples++;
+	}
+}
+
+/*
+ * Compare the tuples in the two given slots.
+ */
+static int
+setop_compare_slots(TupleTableSlot *s1, TupleTableSlot *s2,
+					SetOpState *setopstate)
+{
+	/* We'll often need to fetch all the columns, so just do it */
+	slot_getallattrs(s1);
+	slot_getallattrs(s2);
+	for (int nkey = 0; nkey < setopstate->numCols; nkey++)
+	{
+		SortSupport sortKey = setopstate->sortKeys + nkey;
+		AttrNumber	attno = sortKey->ssup_attno;
+		Datum		datum1 = s1->tts_values[attno - 1],
+					datum2 = s2->tts_values[attno - 1];
+		bool		isNull1 = s1->tts_isnull[attno - 1],
+					isNull2 = s2->tts_isnull[attno - 1];
+		int			compare;
+
+		compare = ApplySortComparator(datum1, isNull1,
+									  datum2, isNull2,
+									  sortKey);
+		if (compare != 0)
+			return compare;
+	}
+	return 0;
+}
+
+/*
+ * ExecSetOp for hashed case: phase 1, read inputs and build hash table
  */
 static void
 setop_fill_hash_table(SetOpState *setopstate)
 {
-	SetOp	   *node = (SetOp *) setopstate->ps.plan;
 	PlanState  *outerPlan;
-	int			firstFlag;
-	bool		in_first_rel PG_USED_FOR_ASSERTS_ONLY;
+	PlanState  *innerPlan;
 	ExprContext *econtext = setopstate->ps.ps_ExprContext;
+	bool		have_tuples = false;
 
 	/*
 	 * get state info from node
 	 */
 	outerPlan = outerPlanState(setopstate);
-	firstFlag = node->firstFlag;
-	/* verify planner didn't mess up */
-	Assert(firstFlag == 0 ||
-		   (firstFlag == 1 &&
-			(node->cmd == SETOPCMD_INTERSECT ||
-			 node->cmd == SETOPCMD_INTERSECT_ALL)));
+	innerPlan = innerPlanState(setopstate);
 
 	/*
 	 * Process each outer-plan tuple, and then fetch the next one, until we
 	 * exhaust the outer plan.
 	 */
-	in_first_rel = true;
 	for (;;)
 	{
 		TupleTableSlot *outerslot;
-		int			flag;
 		TupleHashEntryData *entry;
 		bool		isnew;
 
 		outerslot = ExecProcNode(outerPlan);
 		if (TupIsNull(outerslot))
 			break;
+		have_tuples = true;
 
-		/* Identify whether it's left or right input */
-		flag = fetch_tuple_flag(setopstate, outerslot);
+		/* Find or build hashtable entry for this tuple's group */
+		entry = LookupTupleHashEntry(setopstate->hashtable,
+									 outerslot,
+									 &isnew, NULL);
 
-		if (flag == firstFlag)
+		/* If new tuple group, initialize counts to zero */
+		if (isnew)
 		{
-			/* (still) in first input relation */
-			Assert(in_first_rel);
-
-			/* Find or build hashtable entry for this tuple's group */
-			entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
-										 &isnew, NULL);
-
-			/* If new tuple group, initialize counts */
-			if (isnew)
-			{
-				entry->additional = (SetOpStatePerGroup)
-					MemoryContextAlloc(setopstate->hashtable->tablecxt,
+			entry->additional = (SetOpStatePerGroup)
+				MemoryContextAllocZero(setopstate->hashtable->tablecxt,
 									   sizeof(SetOpStatePerGroupData));
-				initialize_counts((SetOpStatePerGroup) entry->additional);
-			}
-
-			/* Advance the counts */
-			advance_counts((SetOpStatePerGroup) entry->additional, flag);
 		}
-		else
+
+		/* Advance the counts */
+		((SetOpStatePerGroup) entry->additional)->numLeft++;
+
+		/* Must reset expression context after each hashtable lookup */
+		ResetExprContext(econtext);
+	}
+
+	/*
+	 * If the outer relation is empty, then we will emit nothing, and we don't
+	 * need to read the inner relation at all.
+	 */
+	if (have_tuples)
+	{
+		/*
+		 * Process each inner-plan tuple, and then fetch the next one, until
+		 * we exhaust the inner plan.
+		 */
+		for (;;)
 		{
-			/* reached second relation */
-			in_first_rel = false;
+			TupleTableSlot *innerslot;
+			TupleHashEntryData *entry;
+
+			innerslot = ExecProcNode(innerPlan);
+			if (TupIsNull(innerslot))
+				break;
 
 			/* For tuples not seen previously, do not make hashtable entry */
-			entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
+			entry = LookupTupleHashEntry(setopstate->hashtable,
+										 innerslot,
 										 NULL, NULL);
 
 			/* Advance the counts if entry is already present */
 			if (entry)
-				advance_counts((SetOpStatePerGroup) entry->additional, flag);
-		}
+				((SetOpStatePerGroup) entry->additional)->numRight++;
 
-		/* Must reset expression context after each hashtable lookup */
-		ResetExprContext(econtext);
+			/* Must reset expression context after each hashtable lookup */
+			ResetExprContext(econtext);
+		}
 	}
 
 	setopstate->table_filled = true;
@@ -482,7 +553,6 @@ SetOpState *
 ExecInitSetOp(SetOp *node, EState *estate, int eflags)
 {
 	SetOpState *setopstate;
-	TupleDesc	outerDesc;
 
 	/* check for unsupported flags */
 	Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
@@ -495,14 +565,10 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
 	setopstate->ps.state = estate;
 	setopstate->ps.ExecProcNode = ExecSetOp;
 
-	setopstate->eqfuncoids = NULL;
-	setopstate->hashfunctions = NULL;
 	setopstate->setop_done = false;
 	setopstate->numOutput = 0;
-	setopstate->pergroup = NULL;
-	setopstate->grp_firstTuple = NULL;
-	setopstate->hashtable = NULL;
-	setopstate->tableContext = NULL;
+	setopstate->numCols = node->numCols;
+	setopstate->need_init = true;
 
 	/*
 	 * create expression context
@@ -523,52 +589,72 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
 	/*
 	 * initialize child nodes
 	 *
-	 * If we are hashing then the child plan does not need to handle REWIND
+	 * If we are hashing then the child plans do not need to handle REWIND
 	 * efficiently; see ExecReScanSetOp.
 	 */
 	if (node->strategy == SETOP_HASHED)
 		eflags &= ~EXEC_FLAG_REWIND;
 	outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags);
-	outerDesc = ExecGetResultType(outerPlanState(setopstate));
+	innerPlanState(setopstate) = ExecInitNode(innerPlan(node), estate, eflags);
 
 	/*
-	 * Initialize result slot and type. Setop nodes do no projections, so
-	 * initialize projection info for this node appropriately.
+	 * Initialize locally-allocated slots.  In hashed mode, we just need a
+	 * result slot.  In sorted mode, we need one first-tuple-of-group slot for
+	 * each input; we use the result slot for the left input's slot and create
+	 * another for the right input.  (Note: the nextTupleSlot slots are not
+	 * ours, but just point to the last slot returned by the input plan node.)
 	 */
-	ExecInitResultTupleSlotTL(&setopstate->ps,
-							  node->strategy == SETOP_HASHED ?
-							  &TTSOpsMinimalTuple : &TTSOpsHeapTuple);
+	ExecInitResultTupleSlotTL(&setopstate->ps, &TTSOpsMinimalTuple);
+	if (node->strategy != SETOP_HASHED)
+	{
+		setopstate->leftInput.firstTupleSlot =
+			setopstate->ps.ps_ResultTupleSlot;
+		setopstate->rightInput.firstTupleSlot =
+			ExecInitExtraTupleSlot(estate,
+								   setopstate->ps.ps_ResultTupleDesc,
+								   &TTSOpsMinimalTuple);
+	}
+
+	/* Setop nodes do no projections. */
 	setopstate->ps.ps_ProjInfo = NULL;
 
 	/*
-	 * Precompute fmgr lookup data for inner loop. We need both equality and
-	 * hashing functions to do it by hashing, but only equality if not
-	 * hashing.
+	 * Precompute fmgr lookup data for inner loop.  We need equality and
+	 * hashing functions to do it by hashing, while for sorting we need
+	 * SortSupport data.
 	 */
 	if (node->strategy == SETOP_HASHED)
 		execTuplesHashPrepare(node->numCols,
-							  node->dupOperators,
+							  node->cmpOperators,
 							  &setopstate->eqfuncoids,
 							  &setopstate->hashfunctions);
 	else
-		setopstate->eqfunction =
-			execTuplesMatchPrepare(outerDesc,
-								   node->numCols,
-								   node->dupColIdx,
-								   node->dupOperators,
-								   node->dupCollations,
-								   &setopstate->ps);
+	{
+		int			nkeys = node->numCols;
+
+		setopstate->sortKeys = (SortSupport)
+			palloc0(nkeys * sizeof(SortSupportData));
+		for (int i = 0; i < nkeys; i++)
+		{
+			SortSupport sortKey = setopstate->sortKeys + i;
+
+			sortKey->ssup_cxt = CurrentMemoryContext;
+			sortKey->ssup_collation = node->cmpCollations[i];
+			sortKey->ssup_nulls_first = node->cmpNullsFirst[i];
+			sortKey->ssup_attno = node->cmpColIdx[i];
+			/* abbreviated key conversion is not useful here */
+			sortKey->abbreviate = false;
 
+			PrepareSortSupportFromOrderingOp(node->cmpOperators[i], sortKey);
+		}
+	}
+
+	/* Create a hash table if needed */
 	if (node->strategy == SETOP_HASHED)
 	{
 		build_hash_table(setopstate);
 		setopstate->table_filled = false;
 	}
-	else
-	{
-		setopstate->pergroup =
-			(SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData));
-	}
 
 	return setopstate;
 }
@@ -576,7 +662,7 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
 /* ----------------------------------------------------------------
  *		ExecEndSetOp
  *
- *		This shuts down the subplan and frees resources allocated
+ *		This shuts down the subplans and frees resources allocated
  *		to this node.
  * ----------------------------------------------------------------
  */
@@ -588,6 +674,7 @@ ExecEndSetOp(SetOpState *node)
 		MemoryContextDelete(node->tableContext);
 
 	ExecEndNode(outerPlanState(node));
+	ExecEndNode(innerPlanState(node));
 }
 
 
@@ -595,6 +682,7 @@ void
 ExecReScanSetOp(SetOpState *node)
 {
 	PlanState  *outerPlan = outerPlanState(node);
+	PlanState  *innerPlan = innerPlanState(node);
 
 	ExecClearTuple(node->ps.ps_ResultTupleSlot);
 	node->setop_done = false;
@@ -612,34 +700,29 @@ ExecReScanSetOp(SetOpState *node)
 			return;
 
 		/*
-		 * If we do have the hash table and the subplan does not have any
+		 * If we do have the hash table and the subplans do not have any
 		 * parameter changes, then we can just rescan the existing hash table;
 		 * no need to build it again.
 		 */
-		if (outerPlan->chgParam == NULL)
+		if (outerPlan->chgParam == NULL && innerPlan->chgParam == NULL)
 		{
 			ResetTupleHashIterator(node->hashtable, &node->hashiter);
 			return;
 		}
-	}
-
-	/* Release first tuple of group, if we have made a copy */
-	if (node->grp_firstTuple != NULL)
-	{
-		heap_freetuple(node->grp_firstTuple);
-		node->grp_firstTuple = NULL;
-	}
 
-	/* Release any hashtable storage */
-	if (node->tableContext)
-		MemoryContextReset(node->tableContext);
+		/* Release any hashtable storage */
+		if (node->tableContext)
+			MemoryContextReset(node->tableContext);
 
-	/* And rebuild empty hashtable if needed */
-	if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
-	{
+		/* And rebuild an empty hashtable */
 		ResetTupleHashTable(node->hashtable);
 		node->table_filled = false;
 	}
+	else
+	{
+		/* Need to re-read first input from each side */
+		node->need_init = true;
+	}
 
 	/*
 	 * if chgParam of subnode is not null then plan will be re-scanned by
@@ -647,4 +730,6 @@ ExecReScanSetOp(SetOpState *node)
 	 */
 	if (outerPlan->chgParam == NULL)
 		ExecReScan(outerPlan);
+	if (innerPlan->chgParam == NULL)
+		ExecReScan(innerPlan);
 }