11 files changed, 584 insertions, 667 deletions
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 99ce241fed1..6af480629e2 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.69 1999/08/16 02:17:50 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.70 1999/08/21 03:49:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -70,6 +70,8 @@ static List *index_innerjoin(Query *root, RelOptInfo *rel, RelOptInfo *index,
 							 List *clausegroup_list, List *outerrelids_list);
 static bool useful_for_mergejoin(RelOptInfo *rel, RelOptInfo *index,
 								 List *joininfo_list);
+static bool useful_for_ordering(Query *root, RelOptInfo *rel,
+								RelOptInfo *index);
 static bool match_index_to_operand(int indexkey, Var *operand,
 								   RelOptInfo *rel, RelOptInfo *index);
 static bool function_index_operand(Expr *funcOpnd, RelOptInfo *rel, RelOptInfo *index);
@@ -86,7 +88,8 @@ static List *prefix_quals(Var *leftop, Oid expr_op,
  *	  Generate all interesting index paths for the given relation.
  *
  * To be considered for an index scan, an index must match one or more
- * restriction clauses or join clauses from the query's qual condition.
+ * restriction clauses or join clauses from the query's qual condition,
+ * or match the query's ORDER BY condition.
  *
  * There are two basic kinds of index scans.  A "plain" index scan uses
  * only restriction clauses (possibly none at all) in its indexqual,
@@ -104,14 +107,6 @@ static List *prefix_quals(Var *leftop, Oid expr_op,
  * relations.  The innerjoin paths are *not* in the return list, but are
  * appended to the "innerjoin" list of the relation itself.
  *
- * XXX An index scan might also be used simply to order the result.  We
- * probably should create an index path for any index that matches the
- * query's ORDER BY condition, even if it doesn't seem useful for join
- * or restriction clauses.  But currently, such a path would never
- * survive the path selection process, so there's no point.  The selection
- * process needs to award bonus scores to indexscans that produce a
- * suitably-ordered result...
- *
  * 'rel' is the relation for which we want to generate index paths
  * 'indices' is a list of available indexes for 'rel'
  * 'restrictinfo_list' is a list of restrictinfo nodes for 'rel'
@@ -192,13 +187,16 @@ create_index_paths(Query *root,
 		 * index path for it even if there were no restriction clauses.
 		 * (If there were, there is no need to make another index path.)
 		 * This will allow the index to be considered as a base for a
-		 * mergejoin in later processing.
+		 * mergejoin in later processing.  Similarly, if the index matches
+		 * the ordering that is needed for the overall query result, make
+		 * an index path for it even if there is no other reason to do so.
 		 */
-		if (restrictclauses == NIL &&
-			useful_for_mergejoin(rel, index, joininfo_list))
+		if (restrictclauses == NIL)
 		{
-			retval = lappend(retval,
-							 create_index_path(root, rel, index, NIL));
+			if (useful_for_mergejoin(rel, index, joininfo_list) ||
+				useful_for_ordering(root, rel, index))
+				retval = lappend(retval,
+								 create_index_path(root, rel, index, NIL));
 		}
 
 		/*
@@ -748,6 +746,101 @@ indexable_operator(Expr *clause, int xclass, Oid relam,
 	return false;
 }
 
+/*
+ * useful_for_mergejoin
+ *	  Determine whether the given index can support a mergejoin based
+ *	  on any available join clause.
+ *
+ *	  We look to see whether the first indexkey of the index matches the
+ *	  left or right sides of any of the mergejoinable clauses and provides
+ *	  the ordering needed for that side.  If so, the index is useful.
+ *	  Matching a second or later indexkey is not useful unless there is
+ *	  also a mergeclause for the first indexkey, so we need not consider
+ *	  secondary indexkeys at this stage.
+ *
+ * 'rel' is the relation for which 'index' is defined
+ * 'joininfo_list' is the list of JoinInfo nodes for 'rel'
+ */
+static bool
+useful_for_mergejoin(RelOptInfo *rel,
+					 RelOptInfo *index,
+					 List *joininfo_list)
+{
+	int		   *indexkeys = index->indexkeys;
+	Oid		   *ordering = index->ordering;
+	List	   *i;
+
+	if (!indexkeys || indexkeys[0] == 0 ||
+		!ordering || ordering[0] == InvalidOid)
+		return false;			/* unordered index is not useful */
+
+	foreach(i, joininfo_list)
+	{
+		JoinInfo   *joininfo = (JoinInfo *) lfirst(i);
+		List	   *j;
+
+		foreach(j, joininfo->jinfo_restrictinfo)
+		{
+			RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
+
+			if (restrictinfo->mergejoinoperator)
+			{
+				if (restrictinfo->left_sortop == ordering[0] &&
+					match_index_to_operand(indexkeys[0],
+										   get_leftop(restrictinfo->clause),
+										   rel, index))
+					return true;
+				if (restrictinfo->right_sortop == ordering[0] &&
+					match_index_to_operand(indexkeys[0],
+										   get_rightop(restrictinfo->clause),
+										   rel, index))
+					return true;
+			}
+		}
+	}
+	return false;
+}
+
+/*
+ * useful_for_ordering
+ *	  Determine whether the given index can produce an ordering matching
+ *	  the order that is wanted for the query result.
+ *
+ * We check to see whether either forward or backward scan direction can
+ * match the specified pathkeys.
+ *
+ * 'rel' is the relation for which 'index' is defined
+ */
+static bool
+useful_for_ordering(Query *root,
+					RelOptInfo *rel,
+					RelOptInfo *index)
+{
+	List	   *index_pathkeys;
+
+	if (root->query_pathkeys == NIL)
+		return false;			/* no special ordering requested */
+
+	index_pathkeys = build_index_pathkeys(root, rel, index);
+
+	if (index_pathkeys == NIL)
+		return false;			/* unordered index */
+
+	if (pathkeys_contained_in(root->query_pathkeys, index_pathkeys))
+		return true;
+
+	/* caution: commute_pathkeys destructively modifies its argument;
+	 * safe because we just built the index_pathkeys for local use here.
+	 */
+	if (commute_pathkeys(index_pathkeys))
+	{
+		if (pathkeys_contained_in(root->query_pathkeys, index_pathkeys))
+			return true;		/* useful as a reverse-order path */
+	}
+
+	return false;
+}
+
 /****************************************************************************
  *				----  ROUTINES TO DO PARTIAL INDEX PREDICATE TESTS	----
  ****************************************************************************/
@@ -1285,61 +1378,6 @@ index_innerjoin(Query *root, RelOptInfo *rel, RelOptInfo *index,
 	return path_list;
 }
 
-/*
- * useful_for_mergejoin
- *	  Determine whether the given index can support a mergejoin based
- *	  on any available join clause.
- *
- *	  We look to see whether the first indexkey of the index matches the
- *	  left or right sides of any of the mergejoinable clauses and provides
- *	  the ordering needed for that side.  If so, the index is useful.
- *	  Matching a second or later indexkey is not useful unless there is
- *	  also a mergeclause for the first indexkey, so we need not consider
- *	  secondary indexkeys at this stage.
- *
- * 'rel' is the relation for which 'index' is defined
- * 'joininfo_list' is the list of JoinInfo nodes for 'rel'
- */
-static bool
-useful_for_mergejoin(RelOptInfo *rel,
-					 RelOptInfo *index,
-					 List *joininfo_list)
-{
-	int		   *indexkeys = index->indexkeys;
-	Oid		   *ordering = index->ordering;
-	List	   *i;
-
-	if (!indexkeys || indexkeys[0] == 0 ||
-		!ordering || ordering[0] == InvalidOid)
-		return false;			/* unordered index is not useful */
-
-	foreach(i, joininfo_list)
-	{
-		JoinInfo   *joininfo = (JoinInfo *) lfirst(i);
-		List	   *j;
-
-		foreach(j, joininfo->jinfo_restrictinfo)
-		{
-			RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
-
-			if (restrictinfo->mergejoinoperator)
-			{
-				if (restrictinfo->left_sortop == ordering[0] &&
-					match_index_to_operand(indexkeys[0],
-										   get_leftop(restrictinfo->clause),
-										   rel, index))
-					return true;
-				if (restrictinfo->right_sortop == ordering[0] &&
-					match_index_to_operand(indexkeys[0],
-										   get_rightop(restrictinfo->clause),
-										   rel, index))
-					return true;
-			}
-		}
-	}
-	return false;
-}
-
 /****************************************************************************
  *				----  ROUTINES TO CHECK OPERANDS  ----
  ****************************************************************************/
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 55891d87a95..c1ac6a2c4d8 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.45 1999/08/16 02:17:51 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.46 1999/08/21 03:49:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -408,7 +408,8 @@ match_unsorted_outer(RelOptInfo *joinrel,
 				trialinnerpath =
 					get_cheapest_path_for_pathkeys(innerrel->pathlist,
 												   ltruncate(clausecount,
-															 trialsortkeys));
+															 trialsortkeys),
+												   false);
 				if (trialinnerpath != NULL &&
 					trialinnerpath->path_cost < cheapest_cost)
 				{
@@ -488,7 +489,8 @@ match_unsorted_inner(RelOptInfo *joinrel,
 			/* Look for an outer path already ordered well enough to merge */
 			mergeouterpath =
 				get_cheapest_path_for_pathkeys(outerrel->pathlist,
-											   outersortkeys);
+											   outersortkeys,
+											   false);
 
 			/* Should we use the mergeouter, or sort the cheapest outer? */
 			if (mergeouterpath != NULL &&
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 44a7b614b69..41a3ff35b48 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.14 1999/08/16 02:17:52 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.15 1999/08/21 03:49:01 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -20,6 +20,7 @@
 #include "optimizer/tlist.h"
 #include "optimizer/var.h"
 #include "parser/parsetree.h"
+#include "parser/parse_func.h"
 #include "utils/lsyscache.h"
 
 static PathKeyItem *makePathKeyItem(Node *key, Oid sortop);
@@ -89,6 +90,11 @@ static List *build_join_pathkey(List *pathkeys, List *join_rel_tlist,
  *	executor might have to split the join into multiple batches.  Therefore
  *	a Hashjoin is always given NIL pathkeys.
  *
+ *	Pathkeys are also useful to represent an ordering that we wish to achieve,
+ *	since they are easily compared to the pathkeys of a potential candidate
+ *	path.  So, SortClause lists are turned into pathkeys lists for use inside
+ *	the optimizer.
+ *
  *	-- bjm & tgl
  *--------------------
  */
@@ -254,9 +260,11 @@ pathkeys_contained_in(List *keys1, List *keys2)
  *
  * 'paths' is a list of possible paths (either inner or outer)
  * 'pathkeys' represents a required ordering
+ * if 'indexpaths_only' is true, only IndexPaths will be considered.
  */
 Path *
-get_cheapest_path_for_pathkeys(List *paths, List *pathkeys)
+get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
+							   bool indexpaths_only)
 {
 	Path	   *matched_path = NULL;
 	List	   *i;
@@ -265,6 +273,9 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys)
 	{
 		Path	   *path = (Path *) lfirst(i);
 
+		if (indexpaths_only && ! IsA(path, IndexPath))
+			continue;
+
 		if (pathkeys_contained_in(pathkeys, path->pathkeys))
 		{
 			if (matched_path == NULL ||
@@ -314,7 +325,8 @@ build_index_pathkeys(Query *root, RelOptInfo *rel, RelOptInfo *index)
 		funcnode->funcisindex = false;
 		funcnode->funcsize = 0;
 		funcnode->func_fcache = NULL;
-		funcnode->func_tlist = NIL;
+		/* we assume here that the function returns a base type... */
+		funcnode->func_tlist = setup_base_tlist(funcnode->functype);
 		funcnode->func_planlist = NIL;
 
 		while (*indexkeys != 0)
@@ -516,6 +528,70 @@ build_join_pathkey(List *pathkey,
 	return new_pathkey;
 }
 
+/*
+ * commute_pathkeys
+ *		Attempt to commute the operators in a set of pathkeys, producing
+ *		pathkeys that describe the reverse sort order (DESC instead of ASC).
+ *		Returns TRUE if successful (all the operators have commutators).
+ *
+ * CAUTION: given pathkeys are modified in place, even if not successful!!
+ * Usually, caller should have just built or copied the pathkeys list to
+ * ensure there are no unwanted side-effects.
+ */
+bool
+commute_pathkeys(List *pathkeys)
+{
+	List	   *i;
+
+	foreach(i, pathkeys)
+	{
+		List	   *pathkey = lfirst(i);
+		List	   *j;
+
+		foreach(j, pathkey)
+		{
+			PathKeyItem	   *key = lfirst(j);
+
+			key->sortop = get_commutator(key->sortop);
+			if (key->sortop == InvalidOid)
+				return false;
+		}
+	}
+	return true;				/* successful */
+}
+
+/****************************************************************************
+ *		PATHKEYS AND SORT CLAUSES
+ ****************************************************************************/
+
+/*
+ * make_pathkeys_for_sortclauses
+ *		Generate a pathkeys list that represents the sort order specified
+ *		by a list of SortClauses (GroupClauses will work too!)
+ *
+ * 'sortclauses' is a list of SortClause or GroupClause nodes
+ * 'tlist' is the targetlist to find the referenced tlist entries in
+ */
+List *
+make_pathkeys_for_sortclauses(List *sortclauses, List *tlist)
+{
+	List	   *pathkeys = NIL;
+	List	   *i;
+
+	foreach(i, sortclauses)
+	{
+		SortClause	   *sortcl = (SortClause *) lfirst(i);
+		Node		   *sortkey;
+		PathKeyItem	   *pathkey;
+
+		sortkey = get_sortgroupclause_expr(sortcl, tlist);
+		pathkey = makePathKeyItem(sortkey, sortcl->sortop);
+		/* pathkey becomes a one-element sublist */
+		pathkeys = lappend(pathkeys, lcons(pathkey, NIL));
+	}
+	return pathkeys;
+}
+
 /****************************************************************************
  *		PATHKEYS AND MERGECLAUSES
  ****************************************************************************/
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 40923fe27da..d1f756fc7c1 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.73 1999/08/18 04:15:16 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.74 1999/08/21 03:49:02 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1131,7 +1131,6 @@ make_agg(List *tlist, Plan *lefttree)
 	node->plan.targetlist = tlist;
 	node->plan.lefttree = lefttree;
 	node->plan.righttree = (Plan *) NULL;
-	node->aggs = NIL;
 
 	return node;
 }
@@ -1141,15 +1140,15 @@ make_group(List *tlist,
 		   bool tuplePerGroup,
 		   int ngrp,
 		   AttrNumber *grpColIdx,
-		   Sort *lefttree)
+		   Plan *lefttree)
 {
 	Group	   *node = makeNode(Group);
 
-	copy_costsize(&node->plan, (Plan *) lefttree);
+	copy_costsize(&node->plan, lefttree);
 	node->plan.state = (EState *) NULL;
 	node->plan.qual = NULL;
 	node->plan.targetlist = tlist;
-	node->plan.lefttree = (Plan *) lefttree;
+	node->plan.lefttree = lefttree;
 	node->plan.righttree = (Plan *) NULL;
 	node->tuplePerGroup = tuplePerGroup;
 	node->numCols = ngrp;
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index 76e010c9d22..f6f62abfe08 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.40 1999/07/16 04:59:19 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.41 1999/08/21 03:49:03 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -17,11 +17,13 @@
 
 
 #include "optimizer/clauses.h"
+#include "optimizer/cost.h"
 #include "optimizer/paths.h"
 #include "optimizer/planmain.h"
 #include "optimizer/prep.h"
 #include "optimizer/subselect.h"
 #include "optimizer/tlist.h"
+#include "utils/lsyscache.h"
 
 
 static Plan *subplanner(Query *root, List *flat_tlist, List *qual);
@@ -42,6 +44,13 @@ static Result *make_result(List *tlist, Node *resconstantqual, Plan *subplan);
  *	  tlist is the target list of the query
  *	  qual is the qualification of the query
  *
+ *	  Note: the Query node now also includes a query_pathkeys field, which
+ *	  signals query_planner that the indicated sort order is wanted in the
+ *	  final output plan.  If, for some reason, query_planner is unable to
+ *	  comply, it sets query_pathkeys to NIL before returning.  (The reason
+ *	  query_pathkeys is a Query field and not a passed parameter is that
+ *	  the low-level routines in indxpath.c need to see it.)
+ *
  *	  Returns a query plan.
  */
 Plan *
@@ -100,6 +109,8 @@ query_planner(Query *root,
 	 */
 	if (var_only_tlist == NULL && qual == NULL)
 	{
+		root->query_pathkeys = NIL; /* these plans make unordered results */
+
 		switch (command_type)
 		{
 			case CMD_SELECT:
@@ -152,6 +163,8 @@ query_planner(Query *root,
 		 */
 		set_tlist_references(subplan);
 
+		root->query_pathkeys = NIL; /* result is unordered, no? */
+
 		return subplan;
 	}
 
@@ -163,15 +176,15 @@ query_planner(Query *root,
 	 * responsibility to optimally push these expressions down the plan
 	 * tree.  -- Wei
 	 *
-	 * Note: formerly there was a test here to skip the flatten call if we
-	 * expected union_planner to insert a Group or Agg node above our
+	 * Note: formerly there was a test here to skip the unflatten call if
+	 * we expected union_planner to insert a Group or Agg node above our
 	 * result. However, now union_planner tells us exactly what it wants
 	 * returned, and we just do it.  Much cleaner.
 	 */
 	else
 	{
-		subplan->targetlist = flatten_tlist_vars(tlist,
-												 subplan->targetlist);
+		subplan->targetlist = unflatten_tlist(tlist,
+											  subplan->targetlist);
 		return subplan;
 	}
 
@@ -204,6 +217,8 @@ subplanner(Query *root,
 		   List *qual)
 {
 	RelOptInfo *final_rel;
+	Cost		cheapest_cost;
+	Path	   *sortedpath;
 
 	/*
 	 * Initialize the targetlist and qualification, adding entries to
@@ -244,18 +259,83 @@ subplanner(Query *root,
 	}
 #endif
 
+	if (! final_rel)
+	{
+		elog(NOTICE, "final relation is null");
+		root->query_pathkeys = NIL; /* result is unordered, no? */
+		return create_plan((Path *) NULL);
+	}
+
+	/*
+	 * Determine the cheapest path and create a subplan to execute it.
+	 *
+	 * If no special sort order is wanted, or if the cheapest path is
+	 * already appropriately ordered, just use the cheapest path.
+	 */
+	if (root->query_pathkeys == NIL ||
+		pathkeys_contained_in(root->query_pathkeys,
+							  final_rel->cheapestpath->pathkeys))
+		return create_plan(final_rel->cheapestpath);
 	/*
-	 * Determine the cheapest path and create a subplan corresponding to
-	 * it.
+	 * Otherwise, look to see if we have an already-ordered path that is
+	 * cheaper than doing an explicit sort on cheapestpath.
 	 */
-	if (final_rel)
-		return create_plan((Path *) final_rel->cheapestpath);
+	cheapest_cost = final_rel->cheapestpath->path_cost +
+		cost_sort(root->query_pathkeys, final_rel->size, final_rel->width);
+
+	sortedpath = get_cheapest_path_for_pathkeys(final_rel->pathlist,
+												root->query_pathkeys,
+												false);
+	if (sortedpath)
+	{
+		if (sortedpath->path_cost <= cheapest_cost)
+		{
+			/* Found a better presorted path, use it */
+			return create_plan(sortedpath);
+		}
+		/* otherwise, doing it the hard way is still cheaper */
+	}
 	else
 	{
-		elog(NOTICE, "final relation is null");
-		return create_plan((Path *) NULL);
+		/*
+		 * If we found no usable presorted path at all, it is possible
+		 * that the user asked for descending sort order.  Check to see
+		 * if we can satisfy the pathkeys by using a backwards indexscan.
+		 * To do this, we commute all the operators in the pathkeys and
+		 * then look for a matching path that is an IndexPath.
+		 */
+		List	   *commuted_pathkeys = copyObject(root->query_pathkeys);
+
+		if (commute_pathkeys(commuted_pathkeys))
+		{
+			/* pass 'true' to force only IndexPaths to be considered */
+			sortedpath = get_cheapest_path_for_pathkeys(final_rel->pathlist,
+														commuted_pathkeys,
+														true);
+			if (sortedpath && sortedpath->path_cost <= cheapest_cost)
+			{
+				/*
+				 * Kluge here: since IndexPath has no representation for
+				 * backwards scan, we have to convert to Plan format and
+				 * then poke the result.
+				 */
+				Plan	   *sortedplan = create_plan(sortedpath);
+
+				Assert(IsA(sortedplan, IndexScan));
+				((IndexScan *) sortedplan)->indxorderdir = BackwardScanDirection;
+				return sortedplan;
+			}
+		}
 	}
 
+	/* Nothing for it but to sort the cheapestpath...
+	 *
+	 * we indicate we failed to sort the plan, and let the caller
+	 * stick the appropriate sortplan on top.
+	 */
+	root->query_pathkeys = NIL; /* sorry, it ain't sorted */
+
+	return create_plan(final_rel->cheapestpath);
 }
 
 /*****************************************************************************
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 17ad449839b..b328c40f226 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.62 1999/08/09 06:20:26 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.63 1999/08/21 03:49:03 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -22,6 +22,7 @@
 #include "nodes/makefuncs.h"
 #include "optimizer/clauses.h"
 #include "optimizer/internal.h"
+#include "optimizer/paths.h"
 #include "optimizer/planmain.h"
 #include "optimizer/planner.h"
 #include "optimizer/prep.h"
@@ -35,11 +36,10 @@
 #include "utils/syscache.h"
 
 static List *make_subplanTargetList(Query *parse, List *tlist,
-					   AttrNumber **groupColIdx);
+									AttrNumber **groupColIdx);
 static Plan *make_groupplan(List *group_tlist, bool tuplePerGroup,
-			   List *groupClause, AttrNumber *grpColIdx,
-			   Plan *subplan);
-static ScanDirection get_dir_to_omit_sortplan(List *sortcls, Plan *plan);
+							List *groupClause, AttrNumber *grpColIdx,
+							bool is_sorted, Plan *subplan);
 static Plan *make_sortplan(List *tlist, List *sortcls, Plan *plannode);
 
 /*****************************************************************************
@@ -59,6 +59,7 @@ planner(Query *parse)
 	PlannerPlanId = 0;
 
 	transformKeySetQuery(parse);
+
 	result_plan = union_planner(parse);
 
 	Assert(PlannerQueryLevel == 1);
@@ -88,6 +89,7 @@ union_planner(Query *parse)
 	List	   *rangetable = parse->rtable;
 	Plan	   *result_plan = (Plan *) NULL;
 	AttrNumber *groupColIdx = NULL;
+	bool		is_sorted = false;
 	Index		rt_index;
 
 	if (parse->unionClause)
@@ -180,6 +182,34 @@ union_planner(Query *parse)
 		}
 
 		/*
+		 * Figure out whether we need a sorted result from query_planner.
+		 *
+		 * If we have a GROUP BY clause, then we want a result sorted
+		 * properly for grouping.  Otherwise, if there is an ORDER BY clause
+		 * and no need for an aggregate node, we want to sort by the ORDER BY
+		 * clause.  (XXX In some cases, we could presort even when there is
+		 * an aggregate, but I'll leave that refinement for another day.)
+		 *
+		 * NOTE: the reason we put the target pathkeys into the Query node
+		 * rather than passing them as an argument to query_planner is that
+		 * the low-level routines in indxpath.c want to be able to see them.
+		 */
+		if (parse->groupClause)
+		{
+			parse->query_pathkeys =
+				make_pathkeys_for_sortclauses(parse->groupClause, tlist);
+		}
+		else if (parse->sortClause && ! parse->hasAggs)
+		{
+			parse->query_pathkeys =
+				make_pathkeys_for_sortclauses(parse->sortClause, tlist);
+		}
+		else
+		{
+			parse->query_pathkeys = NIL;
+		}
+
+		/*
 		 * Generate appropriate target list for subplan; may be different
 		 * from tlist if grouping or aggregation is needed.
 		 */
@@ -190,6 +220,12 @@ union_planner(Query *parse)
 									parse->commandType,
 									sub_tlist,
 									(List *) parse->qual);
+
+		/* query_planner sets query_pathkeys to NIL if it didn't make
+		 * a properly sorted plan
+		 */
+		if (parse->query_pathkeys)
+			is_sorted = true;
 	}
 
 	/* query_planner returns NULL if it thinks plan is bogus */
@@ -197,8 +233,8 @@ union_planner(Query *parse)
 		elog(ERROR, "union_planner: failed to create plan");
 
 	/*
-	 * If we have a GROUP BY clause, insert a group node (with the
-	 * appropriate sort node.)
+	 * If we have a GROUP BY clause, insert a group node (plus the
+	 * appropriate sort node, if necessary).
 	 */
 	if (parse->groupClause)
 	{
@@ -215,17 +251,27 @@ union_planner(Query *parse)
 
 		/*
 		 * If there are aggregates then the Group node should just return
-		 * the same (simplified) tlist as the subplan, which we indicate
-		 * to make_groupplan by passing NIL.  If there are no aggregates
+		 * the same set of vars as the subplan did (but we can exclude
+		 * any GROUP BY expressions).  If there are no aggregates
 		 * then the Group node had better compute the final tlist.
 		 */
-		group_tlist = parse->hasAggs ? NIL : tlist;
+		if (parse->hasAggs)
+			group_tlist = flatten_tlist(result_plan->targetlist);
+		else
+			group_tlist = tlist;
 
 		result_plan = make_groupplan(group_tlist,
 									 tuplePerGroup,
 									 parse->groupClause,
 									 groupColIdx,
+									 is_sorted,
 									 result_plan);
+
+		/*
+		 * Assume the result of the group step is not ordered suitably
+		 * for any ORDER BY that may exist.  XXX it might be; improve this!
+		 */
+		is_sorted = false;
 	}
 
 	/*
@@ -269,66 +315,48 @@ union_planner(Query *parse)
 		result_plan->qual = (List *) parse->havingQual;
 
 		/*
-		 * Update vars to refer to subplan result tuples, find Aggrefs,
+		 * Update vars to refer to subplan result tuples, and
 		 * make sure there is an Aggref in every HAVING clause.
 		 */
 		if (!set_agg_tlist_references((Agg *) result_plan))
 			elog(ERROR, "SELECT/HAVING requires aggregates to be valid");
 
 		/*
-		 * Check that we actually found some aggregates, else executor
-		 * will die unpleasantly.  (This defends against possible bugs in
-		 * parser or rewrite that might cause hasAggs to be incorrectly
-		 * set 'true'. It's not easy to recover here, since we've already
-		 * made decisions assuming there will be an Agg node.)
+		 * Assume result is not ordered suitably for ORDER BY.
+		 * XXX it might be; improve this!
 		 */
-		if (((Agg *) result_plan)->aggs == NIL)
-			elog(ERROR, "union_planner: query is marked hasAggs, but I don't see any");
+		is_sorted = false;
 	}
 
 	/*
-	 * For now, before we hand back the plan, check to see if there is a
-	 * user-specified sort that needs to be done.  Eventually, this will
-	 * be moved into the guts of the planner s.t. user specified sorts
-	 * will be considered as part of the planning process. Since we can
-	 * only make use of user-specified sorts in special cases, we can do
-	 * the optimization step later.
+	 * If we were not able to make the plan come out in the right order,
+	 * add an explicit sort step.
 	 */
-
-	if (parse->uniqueFlag)
+	if (parse->sortClause && ! is_sorted)
 	{
-		Plan	   *sortplan = make_sortplan(tlist, parse->sortClause, result_plan);
-
-		return ((Plan *) make_unique(tlist, sortplan, parse->uniqueFlag));
+		result_plan = make_sortplan(tlist, parse->sortClause, result_plan);
 	}
-	else
+
+	/*
+	 * Finally, if there is a UNIQUE clause, add the UNIQUE node.
+	 */
+	if (parse->uniqueFlag)
 	{
-		if (parse->sortClause)
-		{
-			ScanDirection	dir = get_dir_to_omit_sortplan(parse->sortClause, result_plan);
-			if (ScanDirectionIsNoMovement(dir))
-				return (make_sortplan(tlist, parse->sortClause, result_plan));
-			else
-			{ 
-				((IndexScan *)result_plan)->indxorderdir = dir;
-				return ((Plan *) result_plan);
-			}
-		}
-		else
-			return ((Plan *) result_plan);
+		result_plan = (Plan *) make_unique(tlist, result_plan,
+										   parse->uniqueFlag);
 	}
 
+	return result_plan;
 }
 
 /*---------------
  * make_subplanTargetList
- *	  Generate appropriate target lists when grouping is required.
+ *	  Generate appropriate target list when grouping is required.
  *
- * When union_planner inserts Aggregate and/or Group/Sort plan nodes above
- * the result of query_planner, we typically need to pass a different
+ * When union_planner inserts Aggregate and/or Group plan nodes above
+ * the result of query_planner, we typically want to pass a different
  * target list to query_planner than the outer plan nodes should have.
- * This routine generates the correct target list for the subplan, and
- * if necessary modifies the target list for the inserted nodes as well.
+ * This routine generates the correct target list for the subplan.
  *
  * The initial target list passed from the parser already contains entries
  * for all ORDER BY and GROUP BY expressions, but it will not have entries
@@ -340,26 +368,18 @@ union_planner(Query *parse)
  * given a query like
  *		SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
  * we want to pass this targetlist to the subplan:
- *		a+b,c,d
+ *		a,b,c,d,a+b
  * where the a+b target will be used by the Sort/Group steps, and the
- * c and d targets will be needed to compute the aggregate results.
+ * other targets will be used for computing the final results.  (In the
+ * above example we could theoretically suppress the a and b targets and
+ * use only a+b, but it's not really worth the trouble.)
  *
  * 'parse' is the query being processed.
- * 'tlist' is the query's target list.  CAUTION: list elements may be
- * modified by this routine!
+ * 'tlist' is the query's target list.
  * 'groupColIdx' receives an array of column numbers for the GROUP BY
  * expressions (if there are any) in the subplan's target list.
  *
- * The result is the targetlist to be passed to the subplan.  Also,
- * the parent tlist is modified so that any nontrivial targetlist items that
- * exactly match GROUP BY items are replaced by simple Var nodes referencing
- * those outputs of the subplan.  This avoids redundant recalculations in
- * cases like
- *		SELECT a+1, ... GROUP BY a+1
- * Note, however, that other varnodes in the parent's targetlist (and
- * havingQual, if any) will still need to be updated to refer to outputs
- * of the subplan.	This routine is quite large enough already, so we do
- * that later.
+ * The result is the targetlist to be passed to the subplan.
  *---------------
  */
 static List *
@@ -368,14 +388,8 @@ make_subplanTargetList(Query *parse,
 					   AttrNumber **groupColIdx)
 {
 	List	   *sub_tlist;
-	List	   *prnt_tlist;
-	List	   *sl,
-			   *gl;
-	List	   *glc = NIL;
-	List	   *extravars = NIL;
+	List	   *extravars;
 	int			numCols;
-	AttrNumber *grpColIdx = NULL;
-	int			next_resno = 1;
 
 	*groupColIdx = NULL;
 
@@ -387,247 +401,151 @@ make_subplanTargetList(Query *parse,
 		return tlist;
 
 	/*
-	 * If grouping, make a working copy of groupClause list (which we use
-	 * just to verify that we found all the groupClause items in tlist).
-	 * Also allocate space to remember where the group columns are in the
-	 * subplan tlist.
+	 * Otherwise, start with a "flattened" tlist (having just the vars
+	 * mentioned in the targetlist and HAVING qual).
 	 */
-	numCols = length(parse->groupClause);
-	if (numCols > 0)
-	{
-		glc = listCopy(parse->groupClause);
-		grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
-		*groupColIdx = grpColIdx;
-	}
-
-	sub_tlist = new_unsorted_tlist(tlist);		/* make a modifiable copy */
+	sub_tlist = flatten_tlist(tlist);
+	extravars = pull_var_clause(parse->havingQual);
+	sub_tlist = add_to_flat_tlist(sub_tlist, extravars);
+	freeList(extravars);
 
 	/*
-	 * Step 1: build grpColIdx by finding targetlist items that match
-	 * GroupBy entries.  If there are aggregates, remove non-GroupBy items
-	 * from sub_tlist, and reset its resnos accordingly.  When we leave an
-	 * expression in the subplan tlist, modify the parent tlist to copy
-	 * the value from the subplan output rather than re-evaluating it.
+	 * If grouping, create sub_tlist entries for all GROUP BY expressions
+	 * (GROUP BY items that are simple Vars should be in the list already),
+	 * and make an array showing where the group columns are in the sub_tlist.
 	 */
-	prnt_tlist = tlist;			/* scans parent tlist in sync with sl */
-	foreach(sl, sub_tlist)
+	numCols = length(parse->groupClause);
+	if (numCols > 0)
 	{
-		TargetEntry *te = (TargetEntry *) lfirst(sl);
-		TargetEntry *parentte = (TargetEntry *) lfirst(prnt_tlist);
-		Resdom	   *resdom = te->resdom;
-		bool		keepInSubPlan = true;
-		bool		foundGroupClause = false;
 		int			keyno = 0;
+		AttrNumber *grpColIdx;
+		List	   *gl;
+
+		grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
+		*groupColIdx = grpColIdx;
 
 		foreach(gl, parse->groupClause)
 		{
-			GroupClause *grpcl = (GroupClause *) lfirst(gl);
+			GroupClause	   *grpcl = (GroupClause *) lfirst(gl);
+			Node		   *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
+			TargetEntry	   *te = NULL;
+			List		   *sl;
 
-			keyno++;			/* sort key # for this GroupClause */
-			if (grpcl->tleGroupref == resdom->resgroupref)
+			/* Find or make a matching sub_tlist entry */
+			foreach(sl, sub_tlist)
 			{
-				/* Found a matching groupclause; record info for sorting */
-				foundGroupClause = true;
-				resdom->reskey = keyno;
-				resdom->reskeyop = get_opcode(grpcl->grpOpoid);
-				grpColIdx[keyno - 1] = next_resno;
-
-				/*
-				 * Remove groupclause from our list of unmatched
-				 * groupclauses. NB: this depends on having used a shallow
-				 * listCopy() above.
-				 */
-				glc = lremove((void *) grpcl, glc);
-				break;
+				te = (TargetEntry *) lfirst(sl);
+				if (equal(groupexpr, te->expr))
+					break;
 			}
-		}
-
-		if (!foundGroupClause)
-		{
-
-			/*
-			 * Non-GroupBy entry: remove it from subplan if there are
-			 * aggregates in query - it will be evaluated by Aggregate
-			 * plan. But do not remove simple-Var entries; we'd just have
-			 * to add them back anyway, and we risk confusing
-			 * INSERT/UPDATE.
-			 */
-			if (parse->hasAggs && !IsA(te->expr, Var))
-				keepInSubPlan = false;
-		}
-
-		if (keepInSubPlan)
-		{
-			/* Assign new sequential resnos to subplan tlist items */
-			resdom->resno = next_resno++;
-			if (!IsA(parentte->expr, Var))
+			if (! sl)
 			{
-
-				/*
-				 * Since the item is being computed in the subplan, we can
-				 * just make a Var node to reference it in the outer plan,
-				 * rather than recomputing it there. Note we use varnoold
-				 * = -1 as a flag to let replace_vars_with_subplan_refs
-				 * know it needn't change this Var node. If it's only a
-				 * Var anyway, we leave it alone for now;
-				 * replace_vars_with_subplan_refs will fix it later.
-				 */
-				parentte->expr = (Node *) makeVar(1, resdom->resno,
-												  resdom->restype,
-												  resdom->restypmod,
-												  0, -1, resdom->resno);
+				te = makeTargetEntry(makeResdom(length(sub_tlist) + 1,
+												exprType(groupexpr),
+												exprTypmod(groupexpr),
+												NULL,
+												(Index) 0,
+												(Oid) 0,
+												false),
+									 groupexpr);
+				sub_tlist = lappend(sub_tlist, te);
 			}
-		}
-		else
-		{
-
-			/*
-			 * Remove this tlist item from the subplan, but remember the
-			 * vars it needs.  The outer tlist item probably needs
-			 * changes, but that will happen later.
-			 */
-			sub_tlist = lremove(te, sub_tlist);
-			extravars = nconc(extravars, pull_var_clause(te->expr));
-		}
-
-		prnt_tlist = lnext(prnt_tlist);
-	}
-
-	/* We should have found all the GROUP BY clauses in the tlist. */
-	if (length(glc) != 0)
-		elog(ERROR, "make_subplanTargetList: GROUP BY attribute not found in target list");
 
-	/*
-	 * Add subplan targets for any variables needed by removed tlist
-	 * entries that aren't otherwise mentioned in the subplan target list.
-	 * We'll also need targets for any variables seen only in HAVING.
-	 */
-	extravars = nconc(extravars, pull_var_clause(parse->havingQual));
-
-	foreach(gl, extravars)
-	{
-		Var		   *v = (Var *) lfirst(gl);
-
-		if (tlist_member(v, sub_tlist) == NULL)
-		{
-
-			/*
-			 * Make sure sub_tlist element is a fresh object not shared
-			 * with any other structure; not sure if anything will break
-			 * if it is shared, but better to be safe...
-			 */
-			sub_tlist = lappend(sub_tlist,
-								create_tl_element((Var *) copyObject(v),
-												  next_resno));
-			next_resno++;
+			/* and save its resno */
+			grpColIdx[keyno++] = te->resdom->resno;
 		}
 	}
 
 	return sub_tlist;
 }
 
+/*
+ * make_groupplan
+ *		Add a Group node for GROUP BY processing.
+ *		If we couldn't make the subplan produce presorted output for grouping,
+ *		first add an explicit Sort node.
+ */
 static Plan *
 make_groupplan(List *group_tlist,
 			   bool tuplePerGroup,
 			   List *groupClause,
 			   AttrNumber *grpColIdx,
+			   bool is_sorted,
 			   Plan *subplan)
 {
-	List	   *sort_tlist;
-	List	   *sl;
-	Sort	   *sortplan;
-	Group	   *grpplan;
 	int			numCols = length(groupClause);
+	Group	   *grpplan;
 
-	/*
-	 * Make the targetlist for the Sort node; it always just references
-	 * each of the corresponding target items of the subplan.  We need to
-	 * ensure that simple Vars in the subplan's target list are
-	 * recognizable by replace_vars_with_subplan_refs when it's applied to
-	 * the Sort/Group target list, so copy up their varnoold/varoattno.
-	 */
-	sort_tlist = NIL;
-	foreach(sl, subplan->targetlist)
+	if (! is_sorted)
 	{
-		TargetEntry *te = (TargetEntry *) lfirst(sl);
-		Resdom	   *resdom = te->resdom;
-		Var		   *newvar;
+		/*
+		 * The Sort node always just takes a copy of the subplan's tlist
+		 * plus ordering information.  (This might seem inefficient if the
+		 * subplan contains complex GROUP BY expressions, but in fact Sort
+		 * does not evaluate its targetlist --- it only outputs the same
+		 * tuples in a new order.  So the expressions we might be copying
+		 * are just dummies with no extra execution cost.)
+		 */
+		List	   *sort_tlist = new_unsorted_tlist(subplan->targetlist);
+		int			keyno = 0;
+		List	   *gl;
 
-		if (IsA(te->expr, Var))
+		foreach(gl, groupClause)
 		{
-			Var		   *subvar = (Var *) te->expr;
+			GroupClause	   *grpcl = (GroupClause *) lfirst(gl);
+			TargetEntry	   *te = nth(grpColIdx[keyno]-1, sort_tlist);
+			Resdom		   *resdom = te->resdom;
 
-			newvar = makeVar(1, resdom->resno,
-							 resdom->restype, resdom->restypmod,
-							 0, subvar->varnoold, subvar->varoattno);
-		}
-		else
-		{
-			newvar = makeVar(1, resdom->resno,
-							 resdom->restype, resdom->restypmod,
-							 0, -1, resdom->resno);
+			/*
+			 * Check for the possibility of duplicate group-by clauses --- the
+			 * parser should have removed 'em, but the Sort executor will get
+			 * terribly confused if any get through!
+			 */
+			if (resdom->reskey == 0)
+			{
+				/* OK, insert the ordering info needed by the executor. */
+				resdom->reskey = ++keyno;
+				resdom->reskeyop = get_opcode(grpcl->sortop);
+			}
 		}
 
-		sort_tlist = lappend(sort_tlist,
-						   makeTargetEntry((Resdom *) copyObject(resdom),
-										   (Node *) newvar));
+		subplan = (Plan *) make_sort(sort_tlist,
+									 _NONAME_RELATION_ID_,
+									 subplan,
+									 keyno);
 	}
 
 	/*
-	 * Make the Sort node
+	 * Fix variables in tlist (should be done somewhere else?)
 	 */
-	sortplan = make_sort(sort_tlist,
-						 _NONAME_RELATION_ID_,
-						 subplan,
-						 numCols);
-	sortplan->plan.cost = subplan->cost;		/* XXX assume no cost */
-
-	/*
-	 * If the caller gave us a target list, use it after fixing the
-	 * variables. If not, we need the same sort of "repeater" tlist as for
-	 * the Sort node.
-	 */
-	if (group_tlist)
-	{
-		group_tlist = copyObject(group_tlist);	/* necessary?? */
-		replace_tlist_with_subplan_refs(group_tlist,
-										(Index) 0,
-										subplan->targetlist);
-	}
-	else
-		group_tlist = copyObject(sort_tlist);
+	group_tlist = copyObject(group_tlist);	/* necessary?? */
+	replace_tlist_with_subplan_refs(group_tlist,
+									(Index) 0,
+									subplan->targetlist);
 
 	/*
 	 * Make the Group node
 	 */
 	grpplan = make_group(group_tlist, tuplePerGroup, numCols,
-						 grpColIdx, sortplan);
+						 grpColIdx, subplan);
 
 	return (Plan *) grpplan;
 }
 
 /*
  * make_sortplan
- *	  Returns a sortplan which is basically a SORT node attached to the
- *	  top of the plan returned from the planner.  It also adds the
- *	   cost of sorting into the plan.
- *
- * sortkeys: ( resdom1 resdom2 resdom3 ...)
- * sortops:  (sortop1 sortop2 sortop3 ...)
+ *	  Add a Sort node to implement an explicit ORDER BY clause.
  */
 static Plan *
 make_sortplan(List *tlist, List *sortcls, Plan *plannode)
 {
-	Plan	   *sortplan = (Plan *) NULL;
-	List	   *temp_tlist = NIL;
-	List	   *i = NIL;
-	Resdom	   *resnode = (Resdom *) NULL;
-	Resdom	   *resdom = (Resdom *) NULL;
-	int			keyno = 1;
+	List	   *temp_tlist;
+	List	   *i;
+	int			keyno = 0;
 
 	/*
-	 * First make a copy of the tlist so that we don't corrupt the the
-	 * original .
+	 * First make a copy of the tlist so that we don't corrupt the
+	 * original.
 	 */
 
 	temp_tlist = new_unsorted_tlist(tlist);
@@ -635,35 +553,38 @@ make_sortplan(List *tlist, List *sortcls, Plan *plannode)
 	foreach(i, sortcls)
 	{
 		SortClause *sortcl = (SortClause *) lfirst(i);
+		Index		refnumber = sortcl->tleSortGroupRef;
+		TargetEntry *tle = NULL;
+		Resdom	   *resdom;
+		List	   *l;
 
-		resnode = sortcl->resdom;
-		resdom = tlist_resdom(temp_tlist, resnode);
+		foreach(l, temp_tlist)
+		{
+			tle = (TargetEntry *) lfirst(l);
+			if (tle->resdom->ressortgroupref == refnumber)
+				break;
+		}
+		if (l == NIL)
+			elog(ERROR, "make_sortplan: ORDER BY expression not found in targetlist");
+		resdom = tle->resdom;
 
 		/*
-		 * Order the resdom keys and replace the operator OID for each key
-		 * with the regproc OID.
+		 * Check for the possibility of duplicate order-by clauses --- the
+		 * parser should have removed 'em, but the executor will get terribly
+		 * confused if any get through!
 		 */
-		resdom->reskey = keyno;
-		resdom->reskeyop = get_opcode(sortcl->opoid);
-		keyno += 1;
+		if (resdom->reskey == 0)
+		{
+			/* OK, insert the ordering info needed by the executor. */
+			resdom->reskey = ++keyno;
+			resdom->reskeyop = get_opcode(sortcl->sortop);
+		}
 	}
 
-	sortplan = (Plan *) make_sort(temp_tlist,
-								  _NONAME_RELATION_ID_,
-								  (Plan *) plannode,
-								  length(sortcls));
-
-	/*
-	 * XXX Assuming that an internal sort has no. cost. This is wrong, but
-	 * given that at this point, we don't know the no. of tuples returned,
-	 * etc, we can't do better than to add a constant cost. This will be
-	 * fixed once we move the sort further into the planner, but for now
-	 * ... functionality....
-	 */
-
-	sortplan->cost = plannode->cost;
-
-	return sortplan;
+	return (Plan *) make_sort(temp_tlist,
+							  _NONAME_RELATION_ID_,
+							  plannode,
+							  keyno);
 }
 
 /*
@@ -828,187 +749,3 @@ pg_checkretval(Oid rettype, List *queryTreeList)
 	/* success */
 	return;
 }
-
-
-/* ----------
- * Support function for get scan direction to omit sortplan
- * ----------
- */
-static TargetEntry *
-get_matching_tle(Plan *plan, Resdom *resdom)
-{
-	List	   *i;
-	TargetEntry *tle;
-
-	foreach(i, plan->targetlist)
-	{
-		tle = (TargetEntry *) lfirst(i);
-		if (tle->resdom->resno == resdom->resno)
-			return tle;
-	}
-	return NULL;
-}
-
-
-/* ----------
- * Check if a user requested ORDER BY is already satisfied by
- * the choosen index scan.
- *
- * Returns the direction of Index scan to omit sort,
- * if sort is required returns NoMovementScanDirection  
- *
- * ----------
- */
-static ScanDirection
-get_dir_to_omit_sortplan(List *sortcls, Plan *plan)
-{
-	Relation	indexRel;
-	IndexScan  *indexScan;
-	Oid			indexId;
-	List	   *i;
-	HeapTuple	htup;
-	Form_pg_index index_tup;
-	int			key_no = 0;
-	ScanDirection   dir, nodir = NoMovementScanDirection;
-
-	dir = nodir;
-	/* ----------
-	 * Must be an IndexScan
-	 * ----------
-	 */
-	if (nodeTag(plan) != T_IndexScan)
-		return nodir;
-
-	indexScan = (IndexScan *) plan;
-
-	/* ----------
-	 * Should not have left- or righttree
-	 * ----------
-	 */
-	if (plan->lefttree != NULL)
-		return nodir;
-	if (plan->righttree != NULL)
-		return nodir;
-
-	/* ----------
-	 * Must be a single index scan
-	 * ----------
-	 */
-	if (length(indexScan->indxid) != 1)
-		return nodir;
-
-	/* ----------
-	 * Indices can only have up to 8 attributes. So an ORDER BY using
-	 * more that 8 attributes could never be satisfied by an index.
-	 * ----------
-	 */
-	if (length(sortcls) > 8)
-		return nodir;
-
-	/* ----------
-	 * The choosen Index must be a btree
-	 * ----------
-	 */
-	indexId = lfirsti(indexScan->indxid);
-
-	indexRel = index_open(indexId);
-	if (strcmp(nameout(&(indexRel->rd_am->amname)), "btree") != 0)
-	{
-		heap_close(indexRel);
-		return nodir;
-	}
-	heap_close(indexRel);
-
-	/* ----------
-	 * Fetch the index tuple
-	 * ----------
-	 */
-	htup = SearchSysCacheTuple(INDEXRELID,
-							   ObjectIdGetDatum(indexId), 0, 0, 0);
-	if (!HeapTupleIsValid(htup))
-		elog(ERROR, "cache lookup for index %u failed", indexId);
-	index_tup = (Form_pg_index) GETSTRUCT(htup);
-
-	/* ----------
-	 * Check if all the sort clauses match the attributes in the index
-	 * ----------
-	 */
-	foreach(i, sortcls)
-	{
-		SortClause *sortcl;
-		Resdom	   *resdom;
-		TargetEntry *tle;
-		Var		   *var;
-
-		sortcl = (SortClause *) lfirst(i);
-
-		resdom = sortcl->resdom;
-		tle = get_matching_tle(plan, resdom);
-		if (tle == NULL)
-		{
-			/* ----------
-			 * Could this happen?
-			 * ----------
-			 */
-			return nodir;
-		}
-		if (nodeTag(tle->expr) != T_Var)
-		{
-			/* ----------
-			 * The target list expression isn't a var, so it
-			 * cannot be the indexed attribute
-			 * ----------
-			 */
-			return nodir;
-		}
-		var = (Var *) (tle->expr);
-
-		if (var->varno != indexScan->scan.scanrelid)
-		{
-			/* ----------
-			 * This Var isn't from the scan relation. So it isn't
-			 * that of the index
-			 * ----------
-			 */
-			return nodir;
-		}
-
-		if (var->varattno != index_tup->indkey[key_no])
-		{
-			/* ----------
-			 * It isn't the indexed attribute.
-			 * ----------
-			 */
-			return nodir;
-		}
-
-		if (oprid(oper("<", resdom->restype, resdom->restype, FALSE)) != sortcl->opoid)
-		{
-			/* ----------
-			 * Sort order isn't in ascending order.
-			 * ----------
-			 */
-			if (ScanDirectionIsForward(dir))
-				return nodir;
-			dir = BackwardScanDirection;
-		}
-		else
-		{
-			/* ----------
-			 * Sort order is in ascending order.
-			 * ----------
-			*/
-			if (ScanDirectionIsBackward(dir))
-				return nodir;
-			dir = ForwardScanDirection;
-		}
-
-		key_no++;
-	}
-
-	/* ----------
-	 * Index matches ORDER BY - sort not required
-	 * ----------
-	 */
-	return dir;
-}
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 6471ad4e41b..1492df8b030 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/setrefs.c,v 1.55 1999/08/18 04:15:16 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/setrefs.c,v 1.56 1999/08/21 03:49:03 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -52,7 +52,6 @@ static void replace_vars_with_subplan_refs(Node *clause,
 										   List *subplanTargetList);
 static bool replace_vars_with_subplan_refs_walker(Node *node,
 					replace_vars_with_subplan_refs_context *context);
-static List *pull_agg_clause(Node *clause);
 static bool pull_agg_clause_walker(Node *node, List **listptr);
 static bool check_having_for_ungrouped_vars_walker(Node *node,
 					check_having_for_ungrouped_vars_context *context);
@@ -416,23 +415,9 @@ replace_vars_with_subplan_refs_walker(Node *node,
 		return false;
 	if (IsA(node, Var))
 	{
-		/*
-		 * It could be that this varnode has been created by make_groupplan
-		 * and is already set up to reference the subplan target list. We
-		 * recognize that case by varno = 1, varnoold = -1, varattno =
-		 * varoattno, and varlevelsup = 0.	(Probably ought to have an
-		 * explicit flag, but this should do for now.)
-		 */
 		Var		   *var = (Var *) node;
 		TargetEntry *subplanVar;
 
-		if (var->varno == (Index) 1 &&
-			var->varnoold == ((Index) -1) &&
-			var->varattno == var->varoattno &&
-			var->varlevelsup == 0)
-			return false;		/* OK to leave it alone */
-
-		/* Otherwise it had better be in the subplan list. */
 		subplanVar = match_varid(var, context->subplanTargetList);
 		if (!subplanVar)
 			elog(ERROR, "replace_vars_with_subplan_refs: variable not in target list");
@@ -461,7 +446,6 @@ replace_vars_with_subplan_refs_walker(Node *node,
  *	  the tuples returned by its left tree subplan.
  *	* If there is a qual list (from a HAVING clause), similarly update
  *	  vars in it to point to the subplan target list.
- *	* Generate the aggNode->aggs list of Aggref nodes contained in the Agg.
  *
  * The return value is TRUE if all qual clauses include Aggrefs, or FALSE
  * if any do not (caller may choose to raise an error condition).
@@ -475,7 +459,6 @@ set_agg_tlist_references(Agg *aggNode)
 	bool		all_quals_ok;
 
 	subplanTargetList = aggNode->plan.lefttree->targetlist;
-	aggNode->aggs = NIL;
 
 	foreach(tl, aggNode->plan.targetlist)
 	{
@@ -484,24 +467,19 @@ set_agg_tlist_references(Agg *aggNode)
 		replace_vars_with_subplan_refs(tle->expr,
 									   (Index) 0,
 									   subplanTargetList);
-		aggNode->aggs = nconc(pull_agg_clause(tle->expr), aggNode->aggs);
 	}
 
 	all_quals_ok = true;
 	foreach(ql, aggNode->plan.qual)
 	{
 		Node	   *qual = lfirst(ql);
-		List	   *qualaggs;
 
 		replace_vars_with_subplan_refs(qual,
 									   (Index) 0,
 									   subplanTargetList);
-		qualaggs = pull_agg_clause(qual);
-		if (qualaggs == NIL)
+		if (pull_agg_clause(qual) == NIL)
 			all_quals_ok = false;		/* this qual clause has no agg
 										 * functions! */
-		else
-			aggNode->aggs = nconc(qualaggs, aggNode->aggs);
 	}
 
 	return all_quals_ok;
@@ -514,7 +492,7 @@ set_agg_tlist_references(Agg *aggNode)
  *	  Returns list of Aggref nodes found.  Note the nodes themselves are not
  *	  copied, only referenced.
  */
-static List *
+List *
 pull_agg_clause(Node *clause)
 {
 	List	   *result = NIL;
@@ -545,10 +523,6 @@ pull_agg_clause_walker(Node *node, List **listptr)
  * exprIsAggOrGroupCol()).	But that routine currently does not check subplans,
  * because the necessary info is not computed until the planner runs.
  * This ought to be cleaned up someday.
- *
- * NOTE: the havingClause has been cnf-ified, so AND subclauses have been
- * turned into a plain List.  Thus, this routine has to cope with List nodes
- * where the routine above does not...
  */
 
 void
@@ -588,10 +562,13 @@ check_having_for_ungrouped_vars_walker(Node *node,
 
 			foreach(gl, context->groupClause)
 			{
-				Var	   *groupexpr = get_groupclause_expr(lfirst(gl),
-														 context->targetList);
+				GroupClause	   *gcl = lfirst(gl);
+				Node		   *groupexpr;
 
-				if (var_equal((Var *) thisarg, groupexpr))
+				groupexpr = get_sortgroupclause_expr(gcl,
+													 context->targetList);
+				/* XXX is var_equal correct, or should we use equal()? */
+				if (var_equal((Var *) thisarg, (Var *) groupexpr))
 				{
 					contained_in_group_clause = true;
 					break;
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index d0022b4cbc0..188379c9a2d 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -6,7 +6,7 @@
  * Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.21 1999/07/16 04:59:20 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.22 1999/08/21 03:49:03 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -552,9 +552,7 @@ SS_finalize_plan(Plan *plan)
 			break;
 
 		case T_Agg:
-			finalize_primnode_walker((Node *) ((Agg *) plan)->aggs,
-									 &results);
-			Assert(results.subplans == NIL);
+			/* XXX Code used to reject subplans in Aggref args; needed?? */
 			break;
 
 		case T_SeqScan:
diff --git a/src/backend/optimizer/prep/preptlist.c b/src/backend/optimizer/prep/preptlist.c
index 0cc4780807e..2a9ddfc716a 100644
--- a/src/backend/optimizer/prep/preptlist.c
+++ b/src/backend/optimizer/prep/preptlist.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/prep/preptlist.c,v 1.29 1999/08/09 03:13:31 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/prep/preptlist.c,v 1.30 1999/08/21 03:49:05 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -163,7 +163,6 @@ replace_matching_resname(List *new_tlist, List *old_tlist)
 		{
 			TargetEntry *old_tle = (TargetEntry *) lfirst(temp);
 
-			old_tle = lfirst(temp);
 			if (!strcmp(old_tle->resdom->resname,
 						new_tle->resdom->resname))
 			{
@@ -174,6 +173,7 @@ replace_matching_resname(List *new_tlist, List *old_tlist)
 
 		if (matching_old_tl)
 		{
+			/* XXX safe to modify old resdom? */
 			matching_old_tl->resdom->resno = new_tle->resdom->resno;
 			t_list = lappend(t_list, matching_old_tl);
 		}
@@ -197,45 +197,42 @@ replace_matching_resname(List *new_tlist, List *old_tlist)
 	{
 		TargetEntry *old_tle,
 				   *new_tl;
-		Resdom	   *newresno;
 
 		old_tle = lfirst(temp);
 		if (old_tle->resdom->resno < 0)
 		{
-			newresno = (Resdom *) copyObject((Node *) old_tle->resdom);
-			newresno->resno = length(t_list) + 1;
-			newresno->resjunk = true;
-			new_tl = makeTargetEntry(newresno, old_tle->expr);
+			Resdom	   *newresdom;
+
+			newresdom = (Resdom *) copyObject((Node *) old_tle->resdom);
+			newresdom->resno = length(t_list) + 1;
+			newresdom->resjunk = true;
+			new_tl = makeTargetEntry(newresdom, old_tle->expr);
 			t_list = lappend(t_list, new_tl);
 		}
-
 		/*
 		 * Also it is possible that the parser or rewriter added some junk
-		 * attributes to hold GROUP BY expressions which are not part of
+		 * attributes to hold ORDER/GROUP BY expressions which are not part of
 		 * the result attributes. We can simply identify them by looking
-		 * at the resgroupref in the TLE's resdom, which is a unique
-		 * number telling which TLE belongs to which GroupClause.
+		 * at the ressortgroupref in the TLE's resdom, which is a unique
+		 * number telling which TLE belongs to which Sort/GroupClause.
 		 */
-		if (old_tle->resdom->resgroupref > 0)
+		else if (old_tle->resdom->ressortgroupref > 0)
 		{
-			bool		already_there = FALSE;
-			TargetEntry *new_tle;
-			Resdom	   *newresno;
+			bool		already_there = false;
 
 			/*
 			 * Check if the tle is already in the new list
 			 */
 			foreach(i, t_list)
 			{
-				new_tle = (TargetEntry *) lfirst(i);
+				TargetEntry *new_tle = (TargetEntry *) lfirst(i);
 
-				if (new_tle->resdom->resgroupref ==
-					old_tle->resdom->resgroupref)
+				if (new_tle->resdom->ressortgroupref ==
+					old_tle->resdom->ressortgroupref)
 				{
-					already_there = TRUE;
+					already_there = true;
 					break;
 				}
-
 			}
 
 			/*
@@ -243,10 +240,12 @@ replace_matching_resname(List *new_tlist, List *old_tlist)
 			 */
 			if (!already_there)
 			{
-				newresno = (Resdom *) copyObject((Node *) old_tle->resdom);
-				newresno->resno = length(t_list) + 1;
-				newresno->resjunk = true;
-				new_tl = makeTargetEntry(newresno, old_tle->expr);
+				Resdom	   *newresdom;
+
+				newresdom = (Resdom *) copyObject((Node *) old_tle->resdom);
+				newresdom->resno = length(t_list) + 1;
+				newresdom->resjunk = true;
+				new_tl = makeTargetEntry(newresdom, old_tle->expr);
 				t_list = lappend(t_list, new_tl);
 			}
 		}
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 21afad92b82..54e84739d24 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.38 1999/08/16 02:17:55 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.39 1999/08/21 03:49:05 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -159,7 +159,7 @@ plan_union_queries(Query *parse)
 		union_plans = lcons(union_planner(parse), NIL);
 		union_rts = lcons(parse->rtable, NIL);
 
-		/* Append the remainging UNION ALLs */
+		/* Append the remaining UNION ALLs */
 		foreach(ulist, union_all_queries)
 		{
 			Query	   *union_all_query = lfirst(ulist);
@@ -172,14 +172,19 @@ plan_union_queries(Query *parse)
 	/* We have already split UNION and UNION ALL and we made it consistent */
 	if (!last_union_all_flag)
 	{
+		/* Need SELECT DISTINCT behavior to implement UNION.
+		 * Set uniqueFlag properly, put back the held sortClause,
+		 * and add any missing columns to the sort clause.
+		 */
 		parse->uniqueFlag = "*";
-		parse->sortClause = transformSortClause(NULL, NIL,
-												hold_sortClause,
-												parse->targetList, "*");
+		parse->sortClause = addAllTargetsToSortList(hold_sortClause,
+													parse->targetList);
 	}
 	else
+	{
 		/* needed so we don't take the flag from the first query */
 		parse->uniqueFlag = NULL;
+	}
 
 	/* Make sure we don't try to apply the first query's grouping stuff
 	 * to the Append node, either.  Basically we don't want union_planner
diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c
index 921bb1d671a..37a790cc3dd 100644
--- a/src/backend/optimizer/util/tlist.c
+++ b/src/backend/optimizer/util/tlist.c
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/tlist.c,v 1.38 1999/08/10 03:00:15 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/tlist.c,v 1.39 1999/08/21 03:49:07 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -19,7 +19,7 @@
 #include "optimizer/tlist.h"
 #include "optimizer/var.h"
 
-static Node *flatten_tlist_vars_mutator(Node *node, List *flat_tlist);
+static Node *unflatten_tlist_mutator(Node *node, List *flat_tlist);
 
 /*****************************************************************************
  *	---------- RELATION node target list routines ----------
@@ -89,7 +89,7 @@ tlist_member(Var *var, List *tlist)
 void
 add_var_to_tlist(RelOptInfo *rel, Var *var)
 {
-	if (tlistentry_member(var, rel->targetlist) == NULL)
+	if (! tlistentry_member(var, rel->targetlist))
 	{
 		/* XXX is copyObject necessary here? */
 		rel->targetlist = lappend(rel->targetlist,
@@ -157,27 +157,6 @@ get_actual_tlist(List *tlist)
  *****************************************************************************/
 
 /*
- *	 Routine to get the resdom out of a targetlist.
- */
-Resdom *
-tlist_resdom(List *tlist, Resdom *resnode)
-{
-	List	   *i;
-
-	foreach(i, tlist)
-	{
-		TargetEntry *tle = (TargetEntry *) lfirst(i);
-		Resdom	   *resdom = tle->resdom;
-
-		/* Since resnos are supposed to be unique */
-		if (resnode->resno == resdom->resno)
-			return resdom;
-	}
-	return (Resdom *) NULL;
-}
-
-
-/*
  * match_varid
  *	  Searches a target list for an entry matching a given var.
  *
@@ -276,49 +255,68 @@ copy_vars(List *target, List *source)
  * flatten_tlist
  *	  Create a target list that only contains unique variables.
  *
- *
  * 'tlist' is the current target list
  *
  * Returns the "flattened" new target list.
  *
+ * The result is entirely new structure sharing no nodes with the original.
+ * Copying the Var nodes is probably overkill, but be safe for now.
  */
 List *
 flatten_tlist(List *tlist)
 {
 	List	   *vlist = pull_var_clause((Node *) tlist);
-	int			last_resdomno = 1;
-	List	   *new_tlist = NIL;
+	List	   *new_tlist;
+
+	new_tlist = add_to_flat_tlist(NIL, vlist);
+	freeList(vlist);
+	return new_tlist;
+}
+
+/*
+ * add_to_flat_tlist
+ *		Add more vars to a flattened tlist (if they're not already in it)
+ *
+ * 'tlist' is the flattened tlist
+ * 'vars' is a list of var nodes
+ *
+ * Returns the extended tlist.
+ */
+List *
+add_to_flat_tlist(List *tlist, List *vars)
+{
+	int			next_resdomno = length(tlist) + 1;
 	List	   *v;
 
-	foreach(v, vlist)
+	foreach(v, vars)
 	{
 		Var		   *var = lfirst(v);
 
-		if (! tlistentry_member(var, new_tlist))
+		if (! tlistentry_member(var, tlist))
 		{
 			Resdom	   *r;
 
-			r = makeResdom(last_resdomno++,
+			r = makeResdom(next_resdomno++,
 						   var->vartype,
 						   var->vartypmod,
 						   NULL,
 						   (Index) 0,
 						   (Oid) 0,
 						   false);
-			new_tlist = lappend(new_tlist,
-								makeTargetEntry(r, (Node *) var));
+			tlist = lappend(tlist,
+							makeTargetEntry(r, copyObject(var)));
 		}
 	}
-	freeList(vlist);
-
-	return new_tlist;
+	return tlist;
 }
 
 /*
- * flatten_tlist_vars
- *	  Redoes the target list of a query by replacing vars within
+ * unflatten_tlist
+ *	  Reconstructs the target list of a query by replacing vars within
  *	  target expressions with vars from the 'flattened' target list.
  *
+ * XXX is this really necessary?  Why can't we just use the tlist as is?
+ *
  * 'full_tlist' is the original target list
  * 'flat_tlist' is the flattened (var-only) target list
  *
@@ -326,21 +324,21 @@ flatten_tlist(List *tlist)
  *
  */
 List *
-flatten_tlist_vars(List *full_tlist, List *flat_tlist)
+unflatten_tlist(List *full_tlist, List *flat_tlist)
 {
-	return (List *) flatten_tlist_vars_mutator((Node *) full_tlist,
-											   flat_tlist);
+	return (List *) unflatten_tlist_mutator((Node *) full_tlist,
+											flat_tlist);
 }
 
 static Node *
-flatten_tlist_vars_mutator(Node *node, List *flat_tlist)
+unflatten_tlist_mutator(Node *node, List *flat_tlist)
 {
 	if (node == NULL)
 		return NULL;
 	if (IsA(node, Var))
 		return (Node *) get_expr(match_varid((Var *) node,
 											 flat_tlist));
-	return expression_tree_mutator(node, flatten_tlist_vars_mutator,
+	return expression_tree_mutator(node, unflatten_tlist_mutator,
 								   (void *) flat_tlist);
 }
 
@@ -354,20 +352,28 @@ get_expr(TargetEntry *tle)
 	return (Var *) tle->expr;
 }
 
-
-Var *
-get_groupclause_expr(GroupClause *groupClause, List *targetList)
+/*
+ * get_sortgroupclause_expr
+ *		Find the targetlist entry matching the given SortClause
+ *		(or GroupClause) by ressortgroupref, and return its expression.
+ *
+ * Because GroupClause is typedef'd as SortClause, either kind of
+ * node can be passed without casting.
+ */
+Node *
+get_sortgroupclause_expr(SortClause *sortClause, List *targetList)
 {
+	Index		refnumber = sortClause->tleSortGroupRef;
 	List	   *l;
 
 	foreach(l, targetList)
 	{
 		TargetEntry *tle = (TargetEntry *) lfirst(l);
-		if (tle->resdom->resgroupref == groupClause->tleGroupref)
-			return get_expr(tle);
+
+		if (tle->resdom->ressortgroupref == refnumber)
+			return tle->expr;
 	}
 
-	elog(ERROR,
-	"get_groupclause_expr: GROUP BY expression not found in targetlist");
-	return NULL;
+	elog(ERROR, "get_sortgroupclause_expr: ORDER/GROUP BY expression not found in targetlist");
+	return NULL;				/* keep compiler quiet */
 }