4 files changed, 162 insertions, 127 deletions
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README
index 2ab4f3dbf37..f341d9f303c 100644
--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
@@ -649,7 +649,7 @@ RelOptInfo      - a relation or joined relations
   GroupingSetsPath - an Agg plan node used to implement GROUPING SETS
   MinMaxAggPath - a Result plan node with subplans performing MIN/MAX
   WindowAggPath - a WindowAgg plan node applied to some sub-path
-  SetOpPath     - a SetOp plan node applied to some sub-path
+  SetOpPath     - a SetOp plan node applied to two sub-paths
   RecursiveUnionPath - a RecursiveUnion plan node applied to two sub-paths
   LockRowsPath  - a LockRows plan node applied to some sub-path
   ModifyTablePath - a ModifyTable plan node applied to some sub-path(s)
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 178c572b021..b3e2294e84f 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -301,9 +301,9 @@ static Unique *make_unique_from_pathkeys(Plan *lefttree,
 										 List *pathkeys, int numCols);
 static Gather *make_gather(List *qptlist, List *qpqual,
 						   int nworkers, int rescan_param, bool single_copy, Plan *subplan);
-static SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
-						 List *distinctList, AttrNumber flagColIdx, int firstFlag,
-						 long numGroups);
+static SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy,
+						 List *tlist, Plan *lefttree, Plan *righttree,
+						 List *groupList, long numGroups);
 static LockRows *make_lockrows(Plan *lefttree, List *rowMarks, int epqParam);
 static Result *make_result(List *tlist, Node *resconstantqual, Plan *subplan);
 static ProjectSet *make_project_set(List *tlist, Plan *subplan);
@@ -2719,25 +2719,29 @@ static SetOp *
 create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags)
 {
 	SetOp	   *plan;
-	Plan	   *subplan;
+	List	   *tlist = build_path_tlist(root, &best_path->path);
+	Plan	   *leftplan;
+	Plan	   *rightplan;
 	long		numGroups;
 
 	/*
 	 * SetOp doesn't project, so tlist requirements pass through; moreover we
 	 * need grouping columns to be labeled.
 	 */
-	subplan = create_plan_recurse(root, best_path->subpath,
-								  flags | CP_LABEL_TLIST);
+	leftplan = create_plan_recurse(root, best_path->leftpath,
+								   flags | CP_LABEL_TLIST);
+	rightplan = create_plan_recurse(root, best_path->rightpath,
+									flags | CP_LABEL_TLIST);
 
 	/* Convert numGroups to long int --- but 'ware overflow! */
 	numGroups = clamp_cardinality_to_long(best_path->numGroups);
 
 	plan = make_setop(best_path->cmd,
 					  best_path->strategy,
-					  subplan,
-					  best_path->distinctList,
-					  best_path->flagColIdx,
-					  best_path->firstFlag,
+					  tlist,
+					  leftplan,
+					  rightplan,
+					  best_path->groupList,
 					  numGroups);
 
 	copy_generic_path_info(&plan->plan, (Path *) best_path);
@@ -6950,57 +6954,62 @@ make_gather(List *qptlist,
 }
 
 /*
- * distinctList is a list of SortGroupClauses, identifying the targetlist
- * items that should be considered by the SetOp filter.  The input path must
- * already be sorted accordingly.
+ * groupList is a list of SortGroupClauses, identifying the targetlist
+ * items that should be considered by the SetOp filter.  The input plans must
+ * already be sorted accordingly, if we're doing SETOP_SORTED mode.
  */
 static SetOp *
-make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
-		   List *distinctList, AttrNumber flagColIdx, int firstFlag,
-		   long numGroups)
+make_setop(SetOpCmd cmd, SetOpStrategy strategy,
+		   List *tlist, Plan *lefttree, Plan *righttree,
+		   List *groupList, long numGroups)
 {
 	SetOp	   *node = makeNode(SetOp);
 	Plan	   *plan = &node->plan;
-	int			numCols = list_length(distinctList);
+	int			numCols = list_length(groupList);
 	int			keyno = 0;
-	AttrNumber *dupColIdx;
-	Oid		   *dupOperators;
-	Oid		   *dupCollations;
+	AttrNumber *cmpColIdx;
+	Oid		   *cmpOperators;
+	Oid		   *cmpCollations;
+	bool	   *cmpNullsFirst;
 	ListCell   *slitem;
 
-	plan->targetlist = lefttree->targetlist;
+	plan->targetlist = tlist;
 	plan->qual = NIL;
 	plan->lefttree = lefttree;
-	plan->righttree = NULL;
+	plan->righttree = righttree;
 
 	/*
-	 * convert SortGroupClause list into arrays of attr indexes and equality
+	 * convert SortGroupClause list into arrays of attr indexes and comparison
 	 * operators, as wanted by executor
 	 */
-	dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
-	dupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
-	dupCollations = (Oid *) palloc(sizeof(Oid) * numCols);
+	cmpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
+	cmpOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+	cmpCollations = (Oid *) palloc(sizeof(Oid) * numCols);
+	cmpNullsFirst = (bool *) palloc(sizeof(bool) * numCols);
 
-	foreach(slitem, distinctList)
+	foreach(slitem, groupList)
 	{
 		SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem);
 		TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist);
 
-		dupColIdx[keyno] = tle->resno;
-		dupOperators[keyno] = sortcl->eqop;
-		dupCollations[keyno] = exprCollation((Node *) tle->expr);
-		Assert(OidIsValid(dupOperators[keyno]));
+		cmpColIdx[keyno] = tle->resno;
+		if (strategy == SETOP_HASHED)
+			cmpOperators[keyno] = sortcl->eqop;
+		else
+			cmpOperators[keyno] = sortcl->sortop;
+		Assert(OidIsValid(cmpOperators[keyno]));
+		cmpCollations[keyno] = exprCollation((Node *) tle->expr);
+		cmpNullsFirst[keyno] = sortcl->nulls_first;
 		keyno++;
 	}
 
 	node->cmd = cmd;
 	node->strategy = strategy;
 	node->numCols = numCols;
-	node->dupColIdx = dupColIdx;
-	node->dupOperators = dupOperators;
-	node->dupCollations = dupCollations;
-	node->flagColIdx = flagColIdx;
-	node->firstFlag = firstFlag;
+	node->cmpColIdx = cmpColIdx;
+	node->cmpOperators = cmpOperators;
+	node->cmpCollations = cmpCollations;
+	node->cmpNullsFirst = cmpNullsFirst;
 	node->numGroups = numGroups;
 
 	return node;
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 9c3822f19ad..1c43370005e 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -65,7 +65,7 @@ static List *plan_union_children(PlannerInfo *root,
 								 List **istrivial_tlist);
 static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
 static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
-								Path *input_path,
+								Path *lpath, Path *rpath,
 								double dNumGroups, double dNumOutputRows,
 								const char *construct);
 static List *generate_setop_tlist(List *colTypes, List *colCollations,
@@ -315,8 +315,8 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
 		 * to the corresponding tlist entries of the subplan. However, since
 		 * the subplan was generated by generate_union_paths() or
 		 * generate_nonunion_paths(), and hence its tlist was generated by
-		 * generate_append_tlist(), this will work.  We just tell
-		 * generate_setop_tlist() to use varno 0.
+		 * generate_append_tlist() or generate_setop_tlist(), this will work.
+		 * We just tell generate_setop_tlist() to use varno 0.
 		 */
 		if (flag >= 0 ||
 			!tlist_same_datatypes(*pTargetList, colTypes, junkOK) ||
@@ -1028,29 +1028,27 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
 			   *path;
 	List	   *lpath_tlist,
 			   *rpath_tlist,
-			   *tlist_list,
 			   *tlist,
-			   *groupList,
-			   *pathlist;
+			   *groupList;
 	bool		lpath_trivial_tlist,
-				rpath_trivial_tlist;
+				rpath_trivial_tlist,
+				result_trivial_tlist;
 	double		dLeftGroups,
 				dRightGroups,
 				dNumGroups,
 				dNumOutputRows;
 	bool		use_hash;
 	SetOpCmd	cmd;
-	int			firstFlag;
 
 	/*
 	 * Tell children to fetch all tuples.
 	 */
 	root->tuple_fraction = 0.0;
 
-	/* Recurse on children, ensuring their outputs are marked */
+	/* Recurse on children */
 	lrel = recurse_set_operations(op->larg, root,
 								  op->colTypes, op->colCollations,
-								  false, 0,
+								  false, -1,
 								  refnames_tlist,
 								  &lpath_tlist,
 								  &lpath_trivial_tlist);
@@ -1060,10 +1058,9 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
 	else
 		dLeftGroups = lrel->rows;
 
-	lpath = lrel->cheapest_total_path;
 	rrel = recurse_set_operations(op->rarg, root,
 								  op->colTypes, op->colCollations,
-								  false, 1,
+								  false, -1,
 								  refnames_tlist,
 								  &rpath_tlist,
 								  &rpath_trivial_tlist);
@@ -1073,41 +1070,51 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
 	else
 		dRightGroups = rrel->rows;
 
-	rpath = rrel->cheapest_total_path;
-
 	/* Undo effects of forcing tuple_fraction to 0 */
 	root->tuple_fraction = save_fraction;
 
 	/*
 	 * For EXCEPT, we must put the left input first.  For INTERSECT, either
 	 * order should give the same results, and we prefer to put the smaller
-	 * input first in order to minimize the size of the hash table in the
-	 * hashing case.  "Smaller" means the one with the fewer groups.
+	 * input first in order to (a) minimize the size of the hash table in the
+	 * hashing case, and (b) improve our chances of exploiting the executor's
+	 * fast path for empty left-hand input.  "Smaller" means the one with the
+	 * fewer groups.
 	 */
-	if (op->op == SETOP_EXCEPT || dLeftGroups <= dRightGroups)
-	{
-		pathlist = list_make2(lpath, rpath);
-		tlist_list = list_make2(lpath_tlist, rpath_tlist);
-		firstFlag = 0;
-	}
-	else
+	if (op->op != SETOP_EXCEPT && dLeftGroups > dRightGroups)
 	{
-		pathlist = list_make2(rpath, lpath);
-		tlist_list = list_make2(rpath_tlist, lpath_tlist);
-		firstFlag = 1;
+		/* need to swap the two inputs */
+		RelOptInfo *tmprel;
+		List	   *tmplist;
+		double		tmpd;
+
+		tmprel = lrel;
+		lrel = rrel;
+		rrel = tmprel;
+		tmplist = lpath_tlist;
+		lpath_tlist = rpath_tlist;
+		rpath_tlist = tmplist;
+		tmpd = dLeftGroups;
+		dLeftGroups = dRightGroups;
+		dRightGroups = tmpd;
 	}
 
+	lpath = lrel->cheapest_total_path;
+	rpath = rrel->cheapest_total_path;
+
 	/*
-	 * Generate tlist for Append plan node.
+	 * Generate tlist for SetOp plan node.
 	 *
-	 * The tlist for an Append plan isn't important as far as the Append is
+	 * The tlist for a SetOp plan isn't important so far as the SetOp is
 	 * concerned, but we must make it look real anyway for the benefit of the
-	 * next plan level up.  In fact, it has to be real enough that the flag
-	 * column is shown as a variable not a constant, else setrefs.c will get
-	 * confused.
+	 * next plan level up.
 	 */
-	tlist = generate_append_tlist(op->colTypes, op->colCollations, true,
-								  tlist_list, refnames_tlist);
+	tlist = generate_setop_tlist(op->colTypes, op->colCollations, -1,
+								 0, false, lpath_tlist, refnames_tlist,
+								 &result_trivial_tlist);
+
+	/* We should not have needed any type coercions in the tlist */
+	Assert(result_trivial_tlist);
 
 	*pTargetList = tlist;
 
@@ -1116,12 +1123,6 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
 								 bms_union(lrel->relids, rrel->relids));
 	result_rel->reltarget = create_pathtarget(root, tlist);
 
-	/*
-	 * Append the child results together.
-	 */
-	path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
-									   NIL, NULL, 0, false, -1);
-
 	/* Identify the grouping semantics */
 	groupList = generate_setop_grouplist(op, tlist);
 
@@ -1140,25 +1141,40 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
 	}
 	else
 	{
-		dNumGroups = Min(dLeftGroups, dRightGroups);
+		dNumGroups = dLeftGroups;
 		dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
 	}
 
 	/*
-	 * Decide whether to hash or sort, and add a sort node if needed.
+	 * Decide whether to hash or sort, and add sort nodes if needed.
 	 */
-	use_hash = choose_hashed_setop(root, groupList, path,
+	use_hash = choose_hashed_setop(root, groupList, lpath, rpath,
 								   dNumGroups, dNumOutputRows,
 								   (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT");
 
 	if (groupList && !use_hash)
-		path = (Path *) create_sort_path(root,
-										 result_rel,
-										 path,
-										 make_pathkeys_for_sortclauses(root,
-																	   groupList,
-																	   tlist),
-										 -1.0);
+	{
+		List	   *pathkeys;
+
+		pathkeys = make_pathkeys_for_sortclauses(root,
+												 groupList,
+												 lpath_tlist);
+		if (!pathkeys_contained_in(pathkeys, lpath->pathkeys))
+			lpath = (Path *) create_sort_path(root,
+											  lpath->parent,
+											  lpath,
+											  pathkeys,
+											  -1.0);
+		pathkeys = make_pathkeys_for_sortclauses(root,
+												 groupList,
+												 rpath_tlist);
+		if (!pathkeys_contained_in(pathkeys, rpath->pathkeys))
+			rpath = (Path *) create_sort_path(root,
+											  rpath->parent,
+											  rpath,
+											  pathkeys,
+											  -1.0);
+	}
 
 	/*
 	 * Finally, add a SetOp path node to generate the correct output.
@@ -1178,12 +1194,11 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
 	}
 	path = (Path *) create_setop_path(root,
 									  result_rel,
-									  path,
+									  lpath,
+									  rpath,
 									  cmd,
 									  use_hash ? SETOP_HASHED : SETOP_SORTED,
 									  groupList,
-									  list_length(op->colTypes) + 1,
-									  use_hash ? firstFlag : -1,
 									  dNumGroups,
 									  dNumOutputRows);
 
@@ -1285,10 +1300,13 @@ postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
 
 /*
  * choose_hashed_setop - should we use hashing for a set operation?
+ *
+ * XXX probably this should go away: just make both paths and let
+ * add_path sort it out.
  */
 static bool
 choose_hashed_setop(PlannerInfo *root, List *groupClauses,
-					Path *input_path,
+					Path *lpath, Path *rpath,
 					double dNumGroups, double dNumOutputRows,
 					const char *construct)
 {
@@ -1327,7 +1345,7 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
 	 * Don't do it if it doesn't look like the hashtable will fit into
 	 * hash_mem.
 	 */
-	hashentrysize = MAXALIGN(input_path->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
+	hashentrysize = MAXALIGN(lpath->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
 
 	if (hashentrysize * dNumGroups > hash_mem_limit)
 		return false;
@@ -1336,9 +1354,9 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
 	 * See if the estimated cost is no more than doing it the other way.
 	 *
 	 * We need to consider input_plan + hashagg versus input_plan + sort +
-	 * group.  Note that the actual result plan might involve a SetOp or
-	 * Unique node, not Agg or Group, but the cost estimates for Agg and Group
-	 * should be close enough for our purposes here.
+	 * group. XXX NOT TRUE: Note that the actual result plan might involve a
+	 * SetOp or Unique node, not Agg or Group, but the cost estimates for Agg
+	 * and Group should be close enough for our purposes here.
 	 *
 	 * These path variables are dummies that just hold cost fields; we don't
 	 * make actual Paths for these steps.
@@ -1346,27 +1364,31 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
 	cost_agg(&hashed_p, root, AGG_HASHED, NULL,
 			 numGroupCols, dNumGroups,
 			 NIL,
-			 input_path->disabled_nodes,
-			 input_path->startup_cost, input_path->total_cost,
-			 input_path->rows, input_path->pathtarget->width);
+			 lpath->disabled_nodes + rpath->disabled_nodes,
+			 lpath->startup_cost + rpath->startup_cost,
+			 lpath->total_cost + rpath->total_cost,
+			 lpath->rows + rpath->rows,
+			 lpath->pathtarget->width);
 
 	/*
-	 * Now for the sorted case.  Note that the input is *always* unsorted,
-	 * since it was made by appending unrelated sub-relations together.
+	 * Now for the sorted case.  XXX NOT TRUE: Note that the input is *always*
+	 * unsorted, since it was made by appending unrelated sub-relations
+	 * together.
 	 */
-	sorted_p.disabled_nodes = input_path->disabled_nodes;
-	sorted_p.startup_cost = input_path->startup_cost;
-	sorted_p.total_cost = input_path->total_cost;
+	sorted_p.disabled_nodes = lpath->disabled_nodes + rpath->disabled_nodes;
+	sorted_p.startup_cost = lpath->startup_cost + rpath->startup_cost;
+	sorted_p.total_cost = lpath->total_cost + rpath->total_cost;
 	/* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */
 	cost_sort(&sorted_p, root, NIL, sorted_p.disabled_nodes,
 			  sorted_p.total_cost,
-			  input_path->rows, input_path->pathtarget->width,
+			  lpath->rows + rpath->rows,
+			  lpath->pathtarget->width,
 			  0.0, work_mem, -1.0);
 	cost_group(&sorted_p, root, numGroupCols, dNumGroups,
 			   NIL,
 			   sorted_p.disabled_nodes,
 			   sorted_p.startup_cost, sorted_p.total_cost,
-			   input_path->rows);
+			   lpath->rows + rpath->rows);
 
 	/*
 	 * Now make the decision using the top-level tuple fraction.  First we
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index fc97bf6ee26..e52e4b1d677 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -3634,25 +3634,26 @@ create_windowagg_path(PlannerInfo *root,
  *	  Creates a pathnode that represents computation of INTERSECT or EXCEPT
  *
  * 'rel' is the parent relation associated with the result
- * 'subpath' is the path representing the source of data
+ * 'leftpath' is the path representing the left-hand source of data
+ * 'rightpath' is the path representing the right-hand source of data
  * 'cmd' is the specific semantics (INTERSECT or EXCEPT, with/without ALL)
  * 'strategy' is the implementation strategy (sorted or hashed)
- * 'distinctList' is a list of SortGroupClause's representing the grouping
- * 'flagColIdx' is the column number where the flag column will be, if any
- * 'firstFlag' is the flag value for the first input relation when hashing;
- *		or -1 when sorting
- * 'numGroups' is the estimated number of distinct groups
+ * 'groupList' is a list of SortGroupClause's representing the grouping
+ * 'numGroups' is the estimated number of distinct groups in left-hand input
  * 'outputRows' is the estimated number of output rows
+ *
+ * leftpath and rightpath must produce the same columns.  Moreover, if
+ * strategy is SETOP_SORTED, leftpath and rightpath must both be sorted
+ * by all the grouping columns.
  */
 SetOpPath *
 create_setop_path(PlannerInfo *root,
 				  RelOptInfo *rel,
-				  Path *subpath,
+				  Path *leftpath,
+				  Path *rightpath,
 				  SetOpCmd cmd,
 				  SetOpStrategy strategy,
-				  List *distinctList,
-				  AttrNumber flagColIdx,
-				  int firstFlag,
+				  List *groupList,
 				  double numGroups,
 				  double outputRows)
 {
@@ -3660,34 +3661,37 @@ create_setop_path(PlannerInfo *root,
 
 	pathnode->path.pathtype = T_SetOp;
 	pathnode->path.parent = rel;
-	/* SetOp doesn't project, so use source path's pathtarget */
-	pathnode->path.pathtarget = subpath->pathtarget;
+	pathnode->path.pathtarget = rel->reltarget;
 	/* For now, assume we are above any joins, so no parameterization */
 	pathnode->path.param_info = NULL;
 	pathnode->path.parallel_aware = false;
 	pathnode->path.parallel_safe = rel->consider_parallel &&
-		subpath->parallel_safe;
-	pathnode->path.parallel_workers = subpath->parallel_workers;
+		leftpath->parallel_safe && rightpath->parallel_safe;
+	pathnode->path.parallel_workers =
+		leftpath->parallel_workers + rightpath->parallel_workers;
 	/* SetOp preserves the input sort order if in sort mode */
 	pathnode->path.pathkeys =
-		(strategy == SETOP_SORTED) ? subpath->pathkeys : NIL;
+		(strategy == SETOP_SORTED) ? leftpath->pathkeys : NIL;
 
-	pathnode->subpath = subpath;
+	pathnode->leftpath = leftpath;
+	pathnode->rightpath = rightpath;
 	pathnode->cmd = cmd;
 	pathnode->strategy = strategy;
-	pathnode->distinctList = distinctList;
-	pathnode->flagColIdx = flagColIdx;
-	pathnode->firstFlag = firstFlag;
+	pathnode->groupList = groupList;
 	pathnode->numGroups = numGroups;
 
 	/*
 	 * Charge one cpu_operator_cost per comparison per input tuple. We assume
 	 * all columns get compared at most of the tuples.
+	 *
+	 * XXX all wrong for hashing
 	 */
-	pathnode->path.disabled_nodes = subpath->disabled_nodes;
-	pathnode->path.startup_cost = subpath->startup_cost;
-	pathnode->path.total_cost = subpath->total_cost +
-		cpu_operator_cost * subpath->rows * list_length(distinctList);
+	pathnode->path.disabled_nodes =
+		leftpath->disabled_nodes + rightpath->disabled_nodes;
+	pathnode->path.startup_cost =
+		leftpath->startup_cost + rightpath->startup_cost;
+	pathnode->path.total_cost = leftpath->total_cost + rightpath->total_cost +
+		cpu_operator_cost * (leftpath->rows + rightpath->rows) * list_length(groupList);
 	pathnode->path.rows = outputRows;
 
 	return pathnode;