9 files changed, 147 insertions, 39 deletions
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 358bb2aed6f..028d9e16808 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -2451,6 +2451,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
 {
 	Query	   *parse = root->parse;
 	Query	   *subquery = rte->subquery;
+	bool		trivial_pathtarget;
 	Relids		required_outer;
 	pushdown_safety_info safetyInfo;
 	double		tuple_fraction;
@@ -2614,6 +2615,36 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
 	set_subquery_size_estimates(root, rel);
 
 	/*
+	 * Also detect whether the reltarget is trivial, so that we can pass that
+	 * info to cost_subqueryscan (rather than re-deriving it multiple times).
+	 * It's trivial if it fetches all the subplan output columns in order.
+	 */
+	if (list_length(rel->reltarget->exprs) != list_length(subquery->targetList))
+		trivial_pathtarget = false;
+	else
+	{
+		trivial_pathtarget = true;
+		foreach(lc, rel->reltarget->exprs)
+		{
+			Node	   *node = (Node *) lfirst(lc);
+			Var		   *var;
+
+			if (!IsA(node, Var))
+			{
+				trivial_pathtarget = false;
+				break;
+			}
+			var = (Var *) node;
+			if (var->varno != rti ||
+				var->varattno != foreach_current_index(lc) + 1)
+			{
+				trivial_pathtarget = false;
+				break;
+			}
+		}
+	}
+
+	/*
 	 * For each Path that subquery_planner produced, make a SubqueryScanPath
 	 * in the outer query.
 	 */
@@ -2631,6 +2662,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
 		/* Generate outer path using this subpath */
 		add_path(rel, (Path *)
 				 create_subqueryscan_path(root, rel, subpath,
+										  trivial_pathtarget,
 										  pathkeys, required_outer));
 	}
 
@@ -2656,6 +2688,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
 			/* Generate outer path using this subpath */
 			add_partial_path(rel, (Path *)
 							 create_subqueryscan_path(root, rel, subpath,
+													  trivial_pathtarget,
 													  pathkeys,
 													  required_outer));
 		}
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 5e5732f6e12..fb28e6411aa 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1415,10 +1415,12 @@ cost_tidrangescan(Path *path, PlannerInfo *root,
  *
  * 'baserel' is the relation to be scanned
  * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
+ * 'trivial_pathtarget' is true if the pathtarget is believed to be trivial.
  */
 void
 cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root,
-				  RelOptInfo *baserel, ParamPathInfo *param_info)
+				  RelOptInfo *baserel, ParamPathInfo *param_info,
+				  bool trivial_pathtarget)
 {
 	Cost		startup_cost;
 	Cost		run_cost;
@@ -1458,6 +1460,22 @@ cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root,
 	path->path.startup_cost = path->subpath->startup_cost;
 	path->path.total_cost = path->subpath->total_cost;
 
+	/*
+	 * However, if there are no relevant restriction clauses and the
+	 * pathtarget is trivial, then we expect that setrefs.c will optimize away
+	 * the SubqueryScan plan node altogether, so we should just make its cost
+	 * and rowcount equal to the input path's.
+	 *
+	 * Note: there are some edge cases where createplan.c will apply a
+	 * different targetlist to the SubqueryScan node, thus falsifying our
+	 * current estimate of whether the target is trivial, and making the cost
+	 * estimate (though not the rowcount) wrong.  It does not seem worth the
+	 * extra complication to try to account for that exactly, especially since
+	 * that behavior falsifies other cost estimates as well.
+	 */
+	if (qpquals == NIL && trivial_pathtarget)
+		return;
+
 	get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
 
 	startup_cost = qpqual_cost.startup;
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 707c1016c25..1cb0abdbc1f 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -1636,9 +1636,10 @@ set_append_references(PlannerInfo *root,
 	/*
 	 * See if it's safe to get rid of the Append entirely.  For this to be
 	 * safe, there must be only one child plan and that child plan's parallel
-	 * awareness must match that of the Append's.  The reason for the latter
-	 * is that if the Append is parallel aware and the child is not, then the
-	 * calling plan may execute the non-parallel aware child multiple times.
+	 * awareness must match the Append's.  The reason for the latter is that
+	 * if the Append is parallel aware and the child is not, then the calling
+	 * plan may execute the non-parallel aware child multiple times.  (If you
+	 * change these rules, update create_append_path to match.)
 	 */
 	if (list_length(aplan->appendplans) == 1)
 	{
@@ -1710,10 +1711,11 @@ set_mergeappend_references(PlannerInfo *root,
 	/*
 	 * See if it's safe to get rid of the MergeAppend entirely.  For this to
 	 * be safe, there must be only one child plan and that child plan's
-	 * parallel awareness must match that of the MergeAppend's.  The reason
-	 * for the latter is that if the MergeAppend is parallel aware and the
-	 * child is not then the calling plan may execute the non-parallel aware
-	 * child multiple times.
+	 * parallel awareness must match the MergeAppend's.  The reason for the
+	 * latter is that if the MergeAppend is parallel aware and the child is
+	 * not, then the calling plan may execute the non-parallel aware child
+	 * multiple times.  (If you change these rules, update
+	 * create_merge_append_path to match.)
 	 */
 	if (list_length(mplan->mergeplans) == 1)
 	{
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index f004fad1d92..2214920dea4 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -78,7 +78,8 @@ static List *generate_setop_tlist(List *colTypes, List *colCollations,
 								  Index varno,
 								  bool hack_constants,
 								  List *input_tlist,
-								  List *refnames_tlist);
+								  List *refnames_tlist,
+								  bool *trivial_tlist);
 static List *generate_append_tlist(List *colTypes, List *colCollations,
 								   bool flag,
 								   List *input_tlists,
@@ -226,6 +227,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
 		Path	   *subpath;
 		Path	   *path;
 		List	   *tlist;
+		bool		trivial_tlist;
 
 		Assert(subquery != NULL);
 
@@ -254,7 +256,8 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
 									 rtr->rtindex,
 									 true,
 									 subroot->processed_tlist,
-									 refnames_tlist);
+									 refnames_tlist,
+									 &trivial_tlist);
 		rel->reltarget = create_pathtarget(root, tlist);
 
 		/* Return the fully-fledged tlist to caller, too */
@@ -291,6 +294,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
 		 * soon too, likely.)
 		 */
 		path = (Path *) create_subqueryscan_path(root, rel, subpath,
+												 trivial_tlist,
 												 NIL, NULL);
 
 		add_path(rel, path);
@@ -309,6 +313,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
 			partial_subpath = linitial(final_rel->partial_pathlist);
 			partial_path = (Path *)
 				create_subqueryscan_path(root, rel, partial_subpath,
+										 trivial_tlist,
 										 NIL, NULL);
 			add_partial_path(rel, partial_path);
 		}
@@ -376,6 +381,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
 			!tlist_same_collations(*pTargetList, colCollations, junkOK))
 		{
 			PathTarget *target;
+			bool		trivial_tlist;
 			ListCell   *lc;
 
 			*pTargetList = generate_setop_tlist(colTypes, colCollations,
@@ -383,7 +389,8 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
 												0,
 												false,
 												*pTargetList,
-												refnames_tlist);
+												refnames_tlist,
+												&trivial_tlist);
 			target = create_pathtarget(root, *pTargetList);
 
 			/* Apply projection to each path */
@@ -1117,6 +1124,7 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
  * hack_constants: true to copy up constants (see comments in code)
  * input_tlist: targetlist of this node's input node
  * refnames_tlist: targetlist to take column names from
+ * trivial_tlist: output parameter, set to true if targetlist is trivial
  */
 static List *
 generate_setop_tlist(List *colTypes, List *colCollations,
@@ -1124,7 +1132,8 @@ generate_setop_tlist(List *colTypes, List *colCollations,
 					 Index varno,
 					 bool hack_constants,
 					 List *input_tlist,
-					 List *refnames_tlist)
+					 List *refnames_tlist,
+					 bool *trivial_tlist)
 {
 	List	   *tlist = NIL;
 	int			resno = 1;
@@ -1135,6 +1144,8 @@ generate_setop_tlist(List *colTypes, List *colCollations,
 	TargetEntry *tle;
 	Node	   *expr;
 
+	*trivial_tlist = true;		/* until proven differently */
+
 	forfour(ctlc, colTypes, cclc, colCollations,
 			itlc, input_tlist, rtlc, refnames_tlist)
 	{
@@ -1160,6 +1171,9 @@ generate_setop_tlist(List *colTypes, List *colCollations,
 		 * this only at the first level of subquery-scan plans; we don't want
 		 * phony constants appearing in the output tlists of upper-level
 		 * nodes!
+		 *
+		 * Note that copying a constant doesn't in itself require us to mark
+		 * the tlist nontrivial; see trivial_subqueryscan() in setrefs.c.
 		 */
 		if (hack_constants && inputtle->expr && IsA(inputtle->expr, Const))
 			expr = (Node *) inputtle->expr;
@@ -1185,6 +1199,7 @@ generate_setop_tlist(List *colTypes, List *colCollations,
 										 expr,
 										 colType,
 										 "UNION/INTERSECT/EXCEPT");
+			*trivial_tlist = false; /* the coercion makes it not trivial */
 		}
 
 		/*
@@ -1199,9 +1214,12 @@ generate_setop_tlist(List *colTypes, List *colCollations,
 		 * will reach the executor without any further processing.
 		 */
 		if (exprCollation(expr) != colColl)
+		{
 			expr = applyRelabelType(expr,
 									exprType(expr), exprTypmod(expr), colColl,
 									COERCE_IMPLICIT_CAST, -1, false);
+			*trivial_tlist = false; /* the relabel makes it not trivial */
+		}
 
 		tle = makeTargetEntry((Expr *) expr,
 							  (AttrNumber) resno++,
@@ -1234,6 +1252,7 @@ generate_setop_tlist(List *colTypes, List *colCollations,
 							  pstrdup("flag"),
 							  true);
 		tlist = lappend(tlist, tle);
+		*trivial_tlist = false; /* the extra entry makes it not trivial */
 	}
 
 	return tlist;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 483c4f41373..dd64b460865 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1326,19 +1326,28 @@ create_append_path(PlannerInfo *root,
 	Assert(!parallel_aware || pathnode->path.parallel_safe);
 
 	/*
-	 * If there's exactly one child path, the Append is a no-op and will be
-	 * discarded later (in setrefs.c); therefore, we can inherit the child's
-	 * size and cost, as well as its pathkeys if any (overriding whatever the
-	 * caller might've said).  Otherwise, we must do the normal costsize
+	 * If there's exactly one child path then the output of the Append is
+	 * necessarily ordered the same as the child's, so we can inherit the
+	 * child's pathkeys if any, overriding whatever the caller might've said.
+	 * Furthermore, if the child's parallel awareness matches the Append's,
+	 * then the Append is a no-op and will be discarded later (in setrefs.c).
+	 * Then we can inherit the child's size and cost too, effectively charging
+	 * zero for the Append.  Otherwise, we must do the normal costsize
 	 * calculation.
 	 */
 	if (list_length(pathnode->subpaths) == 1)
 	{
 		Path	   *child = (Path *) linitial(pathnode->subpaths);
 
-		pathnode->path.rows = child->rows;
-		pathnode->path.startup_cost = child->startup_cost;
-		pathnode->path.total_cost = child->total_cost;
+		if (child->parallel_aware == parallel_aware)
+		{
+			pathnode->path.rows = child->rows;
+			pathnode->path.startup_cost = child->startup_cost;
+			pathnode->path.total_cost = child->total_cost;
+		}
+		else
+			cost_append(pathnode, root);
+		/* Must do this last, else cost_append complains */
 		pathnode->path.pathkeys = child->pathkeys;
 	}
 	else
@@ -1476,10 +1485,13 @@ create_merge_append_path(PlannerInfo *root,
 
 	/*
 	 * Now we can compute total costs of the MergeAppend.  If there's exactly
-	 * one child path, the MergeAppend is a no-op and will be discarded later
-	 * (in setrefs.c); otherwise we do the normal cost calculation.
+	 * one child path and its parallel awareness matches that of the
+	 * MergeAppend, then the MergeAppend is a no-op and will be discarded
+	 * later (in setrefs.c); otherwise we do the normal cost calculation.
 	 */
-	if (list_length(subpaths) == 1)
+	if (list_length(subpaths) == 1 &&
+		((Path *) linitial(subpaths))->parallel_aware ==
+		pathnode->path.parallel_aware)
 	{
 		pathnode->path.startup_cost = input_startup_cost;
 		pathnode->path.total_cost = input_total_cost;
@@ -1986,9 +1998,15 @@ create_gather_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
  * create_subqueryscan_path
  *	  Creates a path corresponding to a scan of a subquery,
  *	  returning the pathnode.
+ *
+ * Caller must pass trivial_pathtarget = true if it believes rel->reltarget to
+ * be trivial, ie just a fetch of all the subquery output columns in order.
+ * While we could determine that here, the caller can usually do it more
+ * efficiently (or at least amortize it over multiple calls).
  */
 SubqueryScanPath *
 create_subqueryscan_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
+						 bool trivial_pathtarget,
 						 List *pathkeys, Relids required_outer)
 {
 	SubqueryScanPath *pathnode = makeNode(SubqueryScanPath);
@@ -2005,7 +2023,8 @@ create_subqueryscan_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	pathnode->path.pathkeys = pathkeys;
 	pathnode->subpath = subpath;
 
-	cost_subqueryscan(pathnode, root, rel, pathnode->path.param_info);
+	cost_subqueryscan(pathnode, root, rel, pathnode->path.param_info,
+					  trivial_pathtarget);
 
 	return pathnode;
 }
@@ -3901,10 +3920,23 @@ reparameterize_path(PlannerInfo *root, Path *path,
 		case T_SubqueryScan:
 			{
 				SubqueryScanPath *spath = (SubqueryScanPath *) path;
+				Path	   *subpath = spath->subpath;
+				bool		trivial_pathtarget;
+
+				/*
+				 * If existing node has zero extra cost, we must have decided
+				 * its target is trivial.  (The converse is not true, because
+				 * it might have a trivial target but quals to enforce; but in
+				 * that case the new node will too, so it doesn't matter
+				 * whether we get the right answer here.)
+				 */
+				trivial_pathtarget =
+					(subpath->total_cost == spath->path.total_cost);
 
 				return (Path *) create_subqueryscan_path(root,
 														 rel,
-														 spath->subpath,
+														 subpath,
+														 trivial_pathtarget,
 														 spath->path.pathkeys,
 														 required_outer);
 			}
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index dc7fc174114..9e91da7114f 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -91,7 +91,8 @@ extern void cost_tidrangescan(Path *path, PlannerInfo *root,
 							  RelOptInfo *baserel, List *tidrangequals,
 							  ParamPathInfo *param_info);
 extern void cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root,
-							  RelOptInfo *baserel, ParamPathInfo *param_info);
+							  RelOptInfo *baserel, ParamPathInfo *param_info,
+							  bool trivial_pathtarget);
 extern void cost_functionscan(Path *path, PlannerInfo *root,
 							  RelOptInfo *baserel, ParamPathInfo *param_info);
 extern void cost_valuesscan(Path *path, PlannerInfo *root,
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 635cc0a0a66..050f00e79a4 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -103,8 +103,11 @@ extern GatherMergePath *create_gather_merge_path(PlannerInfo *root,
 												 Relids required_outer,
 												 double *rows);
 extern SubqueryScanPath *create_subqueryscan_path(PlannerInfo *root,
-												  RelOptInfo *rel, Path *subpath,
-												  List *pathkeys, Relids required_outer);
+												  RelOptInfo *rel,
+												  Path *subpath,
+												  bool trivial_pathtarget,
+												  List *pathkeys,
+												  Relids required_outer);
 extern Path *create_functionscan_path(PlannerInfo *root, RelOptInfo *rel,
 									  List *pathkeys, Relids required_outer);
 extern Path *create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel,
diff --git a/src/test/regress/expected/create_view.out b/src/test/regress/expected/create_view.out
index 32385bbb0ef..9d4f9011a8f 100644
--- a/src/test/regress/expected/create_view.out
+++ b/src/test/regress/expected/create_view.out
@@ -1976,18 +1976,18 @@ select * from tt24v;
 ------------------------------------------------------------------------------------------
  Hash Join
    Output: (cte.r).column2, ((ROW("*VALUES*".column1, "*VALUES*".column2))).column2
-   Hash Cond: (((ROW("*VALUES*".column1, "*VALUES*".column2))).column1 = (cte.r).column1)
+   Hash Cond: ((cte.r).column1 = ((ROW("*VALUES*".column1, "*VALUES*".column2))).column1)
    CTE cte
      ->  Values Scan on "*VALUES*_1"
            Output: ROW("*VALUES*_1".column1, "*VALUES*_1".column2)
-   ->  Limit
-         Output: (ROW("*VALUES*".column1, "*VALUES*".column2))
-         ->  Values Scan on "*VALUES*"
-               Output: ROW("*VALUES*".column1, "*VALUES*".column2)
-   ->  Hash
+   ->  CTE Scan on cte
          Output: cte.r
-         ->  CTE Scan on cte
-               Output: cte.r
+   ->  Hash
+         Output: (ROW("*VALUES*".column1, "*VALUES*".column2))
+         ->  Limit
+               Output: (ROW("*VALUES*".column1, "*VALUES*".column2))
+               ->  Values Scan on "*VALUES*"
+                     Output: ROW("*VALUES*".column1, "*VALUES*".column2)
 (14 rows)
 
 explain (verbose, costs off)
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 1f0df6b7d94..e1d9d971d65 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -5207,11 +5207,11 @@ explain (costs off)
    Sort Key: a.q1, a.q2, x.q1, x.q2, (a.q1)
    ->  Nested Loop
          ->  Seq Scan on int8_tbl a
-         ->  Hash Right Join
-               Hash Cond: ((a.q1) = x.q2)
-               ->  Seq Scan on int4_tbl y
+         ->  Hash Left Join
+               Hash Cond: (x.q2 = (a.q1))
+               ->  Seq Scan on int8_tbl x
                ->  Hash
-                     ->  Seq Scan on int8_tbl x
+                     ->  Seq Scan on int4_tbl y
 (9 rows)
 
 select * from int8_tbl a,