7 files changed, 321 insertions, 139 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 69d1a890e4b..e1c003b5047 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -54,7 +54,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.173 2007/01/08 16:09:22 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.174 2007/01/10 18:06:03 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1258,8 +1258,8 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
 	Path	   *outer_path = path->jpath.outerjoinpath;
 	Path	   *inner_path = path->jpath.innerjoinpath;
 	List	   *mergeclauses = path->path_mergeclauses;
-	List	   *mergefamilies = path->path_mergefamilies;
-	List	   *mergestrategies = path->path_mergestrategies;
+	Oid		   *mergeFamilies = path->path_mergeFamilies;
+	int		   *mergeStrategies = path->path_mergeStrategies;
 	List	   *outersortkeys = path->outersortkeys;
 	List	   *innersortkeys = path->innersortkeys;
 	Cost		startup_cost = 0;
@@ -1357,8 +1357,8 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
 		firstclause = (RestrictInfo *) linitial(mergeclauses);
 		if (firstclause->left_mergescansel < 0) /* not computed yet? */
 			mergejoinscansel(root, (Node *) firstclause->clause,
-							 linitial_oid(mergefamilies),
-							 linitial_int(mergestrategies),
+							 mergeFamilies[0],
+							 mergeStrategies[0],
 							 &firstclause->left_mergescansel,
 							 &firstclause->right_mergescansel);
 
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index bde38af0ddd..2885b021546 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.109 2007/01/05 22:19:31 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.110 2007/01/10 18:06:03 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -40,8 +40,10 @@ static List *select_mergejoin_clauses(RelOptInfo *joinrel,
 						 RelOptInfo *innerrel,
 						 List *restrictlist,
 						 JoinType jointype);
-static void build_mergejoin_strat_lists(List *mergeclauses,
-							List **mergefamilies, List **mergestrategies);
+static void build_mergejoin_strat_arrays(List *mergeclauses,
+										 Oid **mergefamilies,
+										 int **mergestrategies,
+										 bool **mergenullsfirst);
 
 
 /*
@@ -228,8 +230,9 @@ sort_inner_and_outer(PlannerInfo *root,
 		List	   *front_pathkey = (List *) lfirst(l);
 		List	   *cur_pathkeys;
 		List	   *cur_mergeclauses;
-		List	   *mergefamilies;
-		List	   *mergestrategies;
+		Oid		   *mergefamilies;
+		int		   *mergestrategies;
+		bool	   *mergenullsfirst;
 		List	   *outerkeys;
 		List	   *innerkeys;
 		List	   *merge_pathkeys;
@@ -275,8 +278,10 @@ sort_inner_and_outer(PlannerInfo *root,
 											 outerkeys);
 
 		/* Build opfamily info for execution */
-		build_mergejoin_strat_lists(cur_mergeclauses,
-									&mergefamilies, &mergestrategies);
+		build_mergejoin_strat_arrays(cur_mergeclauses,
+									 &mergefamilies,
+									 &mergestrategies,
+									 &mergenullsfirst);
 
 		/*
 		 * And now we can make the path.
@@ -292,6 +297,7 @@ sort_inner_and_outer(PlannerInfo *root,
 									   cur_mergeclauses,
 									   mergefamilies,
 									   mergestrategies,
+									   mergenullsfirst,
 									   outerkeys,
 									   innerkeys));
 	}
@@ -421,8 +427,9 @@ match_unsorted_outer(PlannerInfo *root,
 		Path	   *outerpath = (Path *) lfirst(l);
 		List	   *merge_pathkeys;
 		List	   *mergeclauses;
-		List	   *mergefamilies;
-		List	   *mergestrategies;
+		Oid		   *mergefamilies;
+		int		   *mergestrategies;
+		bool	   *mergenullsfirst;
 		List	   *innersortkeys;
 		List	   *trialsortkeys;
 		Path	   *cheapest_startup_inner;
@@ -530,8 +537,10 @@ match_unsorted_outer(PlannerInfo *root,
 													   innerrel);
 
 		/* Build opfamily info for execution */
-		build_mergejoin_strat_lists(mergeclauses,
-									&mergefamilies, &mergestrategies);
+		build_mergejoin_strat_arrays(mergeclauses,
+									 &mergefamilies,
+									 &mergestrategies,
+									 &mergenullsfirst);
 
 		/*
 		 * Generate a mergejoin on the basis of sorting the cheapest inner.
@@ -550,6 +559,7 @@ match_unsorted_outer(PlannerInfo *root,
 									   mergeclauses,
 									   mergefamilies,
 									   mergestrategies,
+									   mergenullsfirst,
 									   NIL,
 									   innersortkeys));
 
@@ -610,8 +620,10 @@ match_unsorted_outer(PlannerInfo *root,
 					newclauses = mergeclauses;
 
 				/* Build opfamily info for execution */
-				build_mergejoin_strat_lists(newclauses,
-											&mergefamilies, &mergestrategies);
+				build_mergejoin_strat_arrays(newclauses,
+											 &mergefamilies,
+											 &mergestrategies,
+											 &mergenullsfirst);
 
 				add_path(joinrel, (Path *)
 						 create_mergejoin_path(root,
@@ -624,6 +636,7 @@ match_unsorted_outer(PlannerInfo *root,
 											   newclauses,
 											   mergefamilies,
 											   mergestrategies,
+											   mergenullsfirst,
 											   NIL,
 											   NIL));
 				cheapest_total_inner = innerpath;
@@ -661,8 +674,10 @@ match_unsorted_outer(PlannerInfo *root,
 					}
 
 					/* Build opfamily info for execution */
-					build_mergejoin_strat_lists(newclauses,
-												&mergefamilies, &mergestrategies);
+					build_mergejoin_strat_arrays(newclauses,
+												 &mergefamilies,
+												 &mergestrategies,
+												 &mergenullsfirst);
 
 					add_path(joinrel, (Path *)
 							 create_mergejoin_path(root,
@@ -675,6 +690,7 @@ match_unsorted_outer(PlannerInfo *root,
 												   newclauses,
 												   mergefamilies,
 												   mergestrategies,
+												   mergenullsfirst,
 												   NIL,
 												   NIL));
 				}
@@ -981,20 +997,26 @@ select_mergejoin_clauses(RelOptInfo *joinrel,
 }
 
 /*
- * Temporary hack to build opfamily and strategy lists needed for mergejoin
+ * Temporary hack to build opfamily and strategy info needed for mergejoin
  * by the executor.  We need to rethink the planner's handling of merge
  * planning so that it can deal with multiple possible merge orders, but
  * that's not done yet.
  */
 static void
-build_mergejoin_strat_lists(List *mergeclauses,
-							List **mergefamilies, List **mergestrategies)
+build_mergejoin_strat_arrays(List *mergeclauses,
+							 Oid **mergefamilies,
+							 int **mergestrategies,
+							 bool **mergenullsfirst)
 {
+	int			nClauses = list_length(mergeclauses);
+	int			i;
 	ListCell   *l;
 
-	*mergefamilies = NIL;
-	*mergestrategies = NIL;
+	*mergefamilies = (Oid *) palloc(nClauses * sizeof(Oid));
+	*mergestrategies = (int *) palloc(nClauses * sizeof(int));
+	*mergenullsfirst = (bool *) palloc(nClauses * sizeof(bool));
 
+	i = 0;
 	foreach(l, mergeclauses)
 	{
 		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
@@ -1003,11 +1025,16 @@ build_mergejoin_strat_lists(List *mergeclauses,
 		 * We do not need to worry about whether the mergeclause will be
 		 * commuted at runtime --- it's the same opfamily either way.
 		 */
-		*mergefamilies = lappend_oid(*mergefamilies, restrictinfo->mergeopfamily);
+		(*mergefamilies)[i] = restrictinfo->mergeopfamily;
 		/*
 		 * For the moment, strategy must always be LessThan --- see
 		 * hack version of get_op_mergejoin_info
 		 */
-		*mergestrategies = lappend_int(*mergestrategies, BTLessStrategyNumber);
+		(*mergestrategies)[i] = BTLessStrategyNumber;
+
+		/* And we only allow NULLS LAST, too */
+		(*mergenullsfirst)[i] = false;
+
+		i++;
 	}
 }
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 8f1d8e81cc4..9a8204392a1 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.220 2007/01/09 02:14:12 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.221 2007/01/10 18:06:03 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -113,7 +113,10 @@ static HashJoin *make_hashjoin(List *tlist,
 static Hash *make_hash(Plan *lefttree);
 static MergeJoin *make_mergejoin(List *tlist,
 			   List *joinclauses, List *otherclauses,
-			   List *mergeclauses, List *mergefamilies, List *mergestrategies,
+			   List *mergeclauses,
+			   Oid *mergefamilies,
+			   int *mergestrategies,
+			   bool *mergenullsfirst,
 			   Plan *lefttree, Plan *righttree,
 			   JoinType jointype);
 static Sort *make_sort(PlannerInfo *root, Plan *lefttree, int numCols,
@@ -595,6 +598,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path)
 	Plan	   *plan;
 	Plan	   *subplan;
 	List	   *uniq_exprs;
+	List	   *in_operators;
 	List	   *newtlist;
 	int			nextresno;
 	bool		newitems;
@@ -626,10 +630,12 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path)
 	 * To find the correct list of values to unique-ify, we look in the
 	 * information saved for IN expressions.  If this code is ever used in
 	 * other scenarios, some other way of finding what to unique-ify will
-	 * be needed.
+	 * be needed.  The IN clause's operators are needed too, since they
+	 * determine what the meaning of "unique" is in this context.
 	 *----------
 	 */
 	uniq_exprs = NIL;			/* just to keep compiler quiet */
+	in_operators = NIL;
 	foreach(l, root->in_info_list)
 	{
 		InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
@@ -637,6 +643,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path)
 		if (bms_equal(ininfo->righthand, best_path->path.parent->relids))
 		{
 			uniq_exprs = ininfo->sub_targetlist;
+			in_operators = ininfo->in_operators;
 			break;
 		}
 	}
@@ -687,8 +694,8 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path)
 	newtlist = subplan->targetlist;
 	numGroupCols = list_length(uniq_exprs);
 	groupColIdx = (AttrNumber *) palloc(numGroupCols * sizeof(AttrNumber));
-	groupColPos = 0;
 
+	groupColPos = 0;
 	foreach(l, uniq_exprs)
 	{
 		Node	   *uniqexpr = lfirst(l);
@@ -703,10 +710,31 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path)
 	if (best_path->umethod == UNIQUE_PATH_HASH)
 	{
 		long		numGroups;
+		Oid		   *groupOperators;
 
 		numGroups = (long) Min(best_path->rows, (double) LONG_MAX);
 
 		/*
+		 * Get the (presumed hashable) equality operators for the Agg node
+		 * to use.  Normally these are the same as the IN clause operators,
+		 * but if those are cross-type operators then the equality operators
+		 * are the ones for the IN clause operators' RHS datatype.
+		 */
+		groupOperators = (Oid *) palloc(numGroupCols * sizeof(Oid));
+		groupColPos = 0;
+		foreach(l, in_operators)
+		{
+			Oid			in_oper = lfirst_oid(l);
+			Oid			eq_oper;
+
+			eq_oper = get_compatible_hash_operator(in_oper, false);
+			if (!OidIsValid(eq_oper))		/* shouldn't happen */
+				elog(ERROR, "could not find compatible hash operator for operator %u",
+					 in_oper);
+			groupOperators[groupColPos++] = eq_oper;
+		}
+
+		/*
 		 * Since the Agg node is going to project anyway, we can give it the
 		 * minimum output tlist, without any stuff we might have added to the
 		 * subplan tlist.
@@ -717,6 +745,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path)
 								 AGG_HASHED,
 								 numGroupCols,
 								 groupColIdx,
+								 groupOperators,
 								 numGroups,
 								 0,
 								 subplan);
@@ -725,18 +754,29 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path)
 	{
 		List	   *sortList = NIL;
 
-		for (groupColPos = 0; groupColPos < numGroupCols; groupColPos++)
+		/* Create an ORDER BY list to sort the input compatibly */
+		groupColPos = 0;
+		foreach(l, in_operators)
 		{
+			Oid			in_oper = lfirst_oid(l);
+			Oid			sortop;
 			TargetEntry *tle;
+			SortClause *sortcl;
 
+			sortop = get_ordering_op_for_equality_op(in_oper, false);
+			if (!OidIsValid(sortop))		/* shouldn't happen */
+				elog(ERROR, "could not find ordering operator for equality operator %u",
+					 in_oper);
 			tle = get_tle_by_resno(subplan->targetlist,
 								   groupColIdx[groupColPos]);
 			Assert(tle != NULL);
-			sortList = addTargetToSortList(NULL, tle,
-										   sortList, subplan->targetlist,
-										   SORTBY_DEFAULT,
-										   SORTBY_NULLS_DEFAULT,
-										   NIL, false);
+			sortcl = makeNode(SortClause);
+			sortcl->tleSortGroupRef = assignSortGroupRef(tle,
+														 subplan->targetlist);
+			sortcl->sortop = sortop;
+			sortcl->nulls_first = false;
+			sortList = lappend(sortList, sortcl);
+			groupColPos++;
 		}
 		plan = (Plan *) make_sort_from_sortclauses(root, sortList, subplan);
 		plan = (Plan *) make_unique(plan, sortList);
@@ -1542,8 +1582,9 @@ create_mergejoin_plan(PlannerInfo *root,
 							   joinclauses,
 							   otherclauses,
 							   mergeclauses,
-							   best_path->path_mergefamilies,
-							   best_path->path_mergestrategies,
+							   best_path->path_mergeFamilies,
+							   best_path->path_mergeStrategies,
+							   best_path->path_mergeNullsFirst,
 							   outer_plan,
 							   inner_plan,
 							   best_path->jpath.jointype);
@@ -2335,8 +2376,9 @@ make_mergejoin(List *tlist,
 			   List *joinclauses,
 			   List *otherclauses,
 			   List *mergeclauses,
-			   List *mergefamilies,
-			   List *mergestrategies,
+			   Oid *mergefamilies,
+			   int *mergestrategies,
+			   bool *mergenullsfirst,
 			   Plan *lefttree,
 			   Plan *righttree,
 			   JoinType jointype)
@@ -2350,8 +2392,9 @@ make_mergejoin(List *tlist,
 	plan->lefttree = lefttree;
 	plan->righttree = righttree;
 	node->mergeclauses = mergeclauses;
-	node->mergefamilies = mergefamilies;
-	node->mergestrategies = mergestrategies;
+	node->mergeFamilies = mergefamilies;
+	node->mergeStrategies = mergestrategies;
+	node->mergeNullsFirst = mergenullsfirst;
 	node->join.jointype = jointype;
 	node->join.joinqual = joinclauses;
 
@@ -2613,7 +2656,7 @@ make_sort_from_sortclauses(PlannerInfo *root, List *sortcls, Plan *lefttree)
  * This might look like it could be merged with make_sort_from_sortclauses,
  * but presently we *must* use the grpColIdx[] array to locate sort columns,
  * because the child plan's tlist is not marked with ressortgroupref info
- * appropriate to the grouping node.  So, only the sort direction info
+ * appropriate to the grouping node.  So, only the sort ordering info
  * is used from the GroupClause entries.
  */
 Sort *
@@ -2716,7 +2759,7 @@ materialize_finished_plan(Plan *subplan)
 Agg *
 make_agg(PlannerInfo *root, List *tlist, List *qual,
 		 AggStrategy aggstrategy,
-		 int numGroupCols, AttrNumber *grpColIdx,
+		 int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators,
 		 long numGroups, int numAggs,
 		 Plan *lefttree)
 {
@@ -2728,6 +2771,7 @@ make_agg(PlannerInfo *root, List *tlist, List *qual,
 	node->aggstrategy = aggstrategy;
 	node->numCols = numGroupCols;
 	node->grpColIdx = grpColIdx;
+	node->grpOperators = grpOperators;
 	node->numGroups = numGroups;
 
 	copy_plan_costsize(plan, lefttree); /* only care about copying size */
@@ -2784,6 +2828,7 @@ make_group(PlannerInfo *root,
 		   List *qual,
 		   int numGroupCols,
 		   AttrNumber *grpColIdx,
+		   Oid *grpOperators,
 		   double numGroups,
 		   Plan *lefttree)
 {
@@ -2794,6 +2839,7 @@ make_group(PlannerInfo *root,
 
 	node->numCols = numGroupCols;
 	node->grpColIdx = grpColIdx;
+	node->grpOperators = grpOperators;
 
 	copy_plan_costsize(plan, lefttree); /* only care about copying size */
 	cost_group(&group_path, root,
@@ -2841,7 +2887,8 @@ make_group(PlannerInfo *root,
 
 /*
  * distinctList is a list of SortClauses, identifying the targetlist items
- * that should be considered by the Unique filter.
+ * that should be considered by the Unique filter.  The input path must
+ * already be sorted accordingly.
  */
 Unique *
 make_unique(Plan *lefttree, List *distinctList)
@@ -2851,6 +2898,7 @@ make_unique(Plan *lefttree, List *distinctList)
 	int			numCols = list_length(distinctList);
 	int			keyno = 0;
 	AttrNumber *uniqColIdx;
+	Oid		   *uniqOperators;
 	ListCell   *slitem;
 
 	copy_plan_costsize(plan, lefttree);
@@ -2874,28 +2922,37 @@ make_unique(Plan *lefttree, List *distinctList)
 	plan->righttree = NULL;
 
 	/*
-	 * convert SortClause list into array of attr indexes, as wanted by exec
+	 * convert SortClause list into arrays of attr indexes and equality
+	 * operators, as wanted by executor
 	 */
 	Assert(numCols > 0);
 	uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
+	uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols);
 
 	foreach(slitem, distinctList)
 	{
 		SortClause *sortcl = (SortClause *) lfirst(slitem);
 		TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist);
 
-		uniqColIdx[keyno++] = tle->resno;
+		uniqColIdx[keyno] = tle->resno;
+		uniqOperators[keyno] = get_equality_op_for_ordering_op(sortcl->sortop);
+		if (!OidIsValid(uniqOperators[keyno]))		/* shouldn't happen */
+			elog(ERROR, "could not find equality operator for ordering operator %u",
+				 sortcl->sortop);
+		keyno++;
 	}
 
 	node->numCols = numCols;
 	node->uniqColIdx = uniqColIdx;
+	node->uniqOperators = uniqOperators;
 
 	return node;
 }
 
 /*
  * distinctList is a list of SortClauses, identifying the targetlist items
- * that should be considered by the SetOp filter.
+ * that should be considered by the SetOp filter.  The input path must
+ * already be sorted accordingly.
  */
 SetOp *
 make_setop(SetOpCmd cmd, Plan *lefttree,
@@ -2906,6 +2963,7 @@ make_setop(SetOpCmd cmd, Plan *lefttree,
 	int			numCols = list_length(distinctList);
 	int			keyno = 0;
 	AttrNumber *dupColIdx;
+	Oid		   *dupOperators;
 	ListCell   *slitem;
 
 	copy_plan_costsize(plan, lefttree);
@@ -2930,22 +2988,30 @@ make_setop(SetOpCmd cmd, Plan *lefttree,
 	plan->righttree = NULL;
 
 	/*
-	 * convert SortClause list into array of attr indexes, as wanted by exec
+	 * convert SortClause list into arrays of attr indexes and equality
+	 * operators, as wanted by executor
 	 */
 	Assert(numCols > 0);
 	dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
+	dupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
 
 	foreach(slitem, distinctList)
 	{
 		SortClause *sortcl = (SortClause *) lfirst(slitem);
 		TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist);
 
-		dupColIdx[keyno++] = tle->resno;
+		dupColIdx[keyno] = tle->resno;
+		dupOperators[keyno] = get_equality_op_for_ordering_op(sortcl->sortop);
+		if (!OidIsValid(dupOperators[keyno]))		/* shouldn't happen */
+			elog(ERROR, "could not find equality operator for ordering operator %u",
+				 sortcl->sortop);
+		keyno++;
 	}
 
 	node->cmd = cmd;
 	node->numCols = numCols;
 	node->dupColIdx = dupColIdx;
+	node->dupOperators = dupOperators;
 	node->flagColIdx = flagColIdx;
 
 	return node;
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index a4de2dd36a5..5fcfc58bf9b 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.210 2007/01/05 22:19:32 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.211 2007/01/10 18:06:03 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -38,6 +38,7 @@
 #include "parser/parse_expr.h"
 #include "parser/parse_oper.h"
 #include "parser/parsetree.h"
+#include "utils/lsyscache.h"
 #include "utils/syscache.h"
 
 
@@ -62,10 +63,11 @@ static bool is_dummy_plan(Plan *plan);
 static double preprocess_limit(PlannerInfo *root,
 				 double tuple_fraction,
 				 int64 *offset_est, int64 *count_est);
+static Oid *extract_grouping_ops(List *groupClause);
 static bool choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
 					   Path *cheapest_path, Path *sorted_path,
-					   double dNumGroups, AggClauseCounts *agg_counts);
-static bool hash_safe_grouping(PlannerInfo *root);
+					   Oid *groupOperators, double dNumGroups,
+					   AggClauseCounts *agg_counts);
 static List *make_subplanTargetList(PlannerInfo *root, List *tlist,
 					   AttrNumber **groupColIdx, bool *need_tlist_eval);
 static void locate_grouping_columns(PlannerInfo *root,
@@ -750,6 +752,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		List	   *sub_tlist;
 		List	   *group_pathkeys;
 		AttrNumber *groupColIdx = NULL;
+		Oid		   *groupOperators = NULL;
 		bool		need_tlist_eval = true;
 		QualCost	tlist_cost;
 		Path	   *cheapest_path;
@@ -829,14 +832,17 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		sort_pathkeys = root->sort_pathkeys;
 
 		/*
-		 * If grouping, decide whether we want to use hashed grouping.
+		 * If grouping, extract the grouping operators and decide whether we
+		 * want to use hashed grouping.
 		 */
 		if (parse->groupClause)
 		{
+			groupOperators = extract_grouping_ops(parse->groupClause);
 			use_hashed_grouping =
 				choose_hashed_grouping(root, tuple_fraction,
 									   cheapest_path, sorted_path,
-									   dNumGroups, &agg_counts);
+									   groupOperators, dNumGroups,
+									   &agg_counts);
 
 			/* Also convert # groups to long int --- but 'ware overflow! */
 			numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
@@ -956,6 +962,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 												AGG_HASHED,
 												numGroupCols,
 												groupColIdx,
+												groupOperators,
 												numGroups,
 												agg_counts.numAggs,
 												result_plan);
@@ -999,6 +1006,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 												aggstrategy,
 												numGroupCols,
 												groupColIdx,
+												groupOperators,
 												numGroups,
 												agg_counts.numAggs,
 												result_plan);
@@ -1027,6 +1035,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 												  (List *) parse->havingQual,
 												  numGroupCols,
 												  groupColIdx,
+												  groupOperators,
 												  dNumGroups,
 												  result_plan);
 				/* The Group node won't change sort ordering */
@@ -1338,12 +1347,41 @@ preprocess_limit(PlannerInfo *root, double tuple_fraction,
 }
 
 /*
+ * extract_grouping_ops - make an array of the equality operator OIDs
+ *		for the GROUP BY clause
+ */
+static Oid *
+extract_grouping_ops(List *groupClause)
+{
+	int			numCols = list_length(groupClause);
+	int			colno = 0;
+	Oid		   *groupOperators;
+	ListCell   *glitem;
+
+	groupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+
+	foreach(glitem, groupClause)
+	{
+		GroupClause *groupcl = (GroupClause *) lfirst(glitem);
+
+		groupOperators[colno] = get_equality_op_for_ordering_op(groupcl->sortop);
+		if (!OidIsValid(groupOperators[colno]))		/* shouldn't happen */
+			elog(ERROR, "could not find equality operator for ordering operator %u",
+				 groupcl->sortop);
+		colno++;
+	}
+
+	return groupOperators;
+}
+
+/*
  * choose_hashed_grouping - should we use hashed grouping?
  */
 static bool
 choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
 					   Path *cheapest_path, Path *sorted_path,
-					   double dNumGroups, AggClauseCounts *agg_counts)
+					   Oid *groupOperators, double dNumGroups,
+					   AggClauseCounts *agg_counts)
 {
 	int			numGroupCols = list_length(root->parse->groupClause);
 	double		cheapest_path_rows;
@@ -1352,10 +1390,13 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
 	List	   *current_pathkeys;
 	Path		hashed_p;
 	Path		sorted_p;
+	int			i;
 
 	/*
 	 * Check can't-do-it conditions, including whether the grouping operators
-	 * are hashjoinable.
+	 * are hashjoinable.  (We assume hashing is OK if they are marked
+	 * oprcanhash.  If there isn't actually a supporting hash function,
+	 * the executor will complain at runtime.)
 	 *
 	 * Executor doesn't support hashed aggregation with DISTINCT aggregates.
 	 * (Doing so would imply storing *all* the input values in the hash table,
@@ -1365,8 +1406,11 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
 		return false;
 	if (agg_counts->numDistinctAggs != 0)
 		return false;
-	if (!hash_safe_grouping(root))
-		return false;
+	for (i = 0; i < numGroupCols; i++)
+	{
+		if (!op_hashjoinable(groupOperators[i]))
+			return false;
+	}
 
 	/*
 	 * Don't do it if it doesn't look like the hashtable will fit into
@@ -1471,36 +1515,6 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
 	return false;
 }
 
-/*
- * hash_safe_grouping - are grouping operators hashable?
- *
- * We assume hashed aggregation will work if the datatype's equality operator
- * is marked hashjoinable.
- */
-static bool
-hash_safe_grouping(PlannerInfo *root)
-{
-	ListCell   *gl;
-
-	foreach(gl, root->parse->groupClause)
-	{
-		GroupClause *grpcl = (GroupClause *) lfirst(gl);
-		TargetEntry *tle = get_sortgroupclause_tle(grpcl,
-												   root->parse->targetList);
-		Operator	optup;
-		bool		oprcanhash;
-
-		optup = equality_oper(exprType((Node *) tle->expr), true);
-		if (!optup)
-			return false;
-		oprcanhash = ((Form_pg_operator) GETSTRUCT(optup))->oprcanhash;
-		ReleaseSysCache(optup);
-		if (!oprcanhash)
-			return false;
-	}
-	return true;
-}
-
 /*---------------
  * make_subplanTargetList
  *	  Generate appropriate target list when grouping is required.
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index ed76612dc0a..7339445e046 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.116 2007/01/05 22:19:32 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.117 2007/01/10 18:06:03 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -671,11 +671,13 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
 {
 	Query	   *parse = root->parse;
 	Query	   *subselect = (Query *) sublink->subselect;
+	List	   *in_operators;
 	Relids		left_varnos;
 	int			rtindex;
 	RangeTblEntry *rte;
 	RangeTblRef *rtr;
 	InClauseInfo *ininfo;
+	Node	   *result;
 
 	/*
 	 * The sublink type must be "= ANY" --- that is, an IN operator.  We
@@ -689,15 +691,31 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
 		return NULL;
 	if (sublink->testexpr && IsA(sublink->testexpr, OpExpr))
 	{
+		Oid			opno = ((OpExpr *) sublink->testexpr)->opno;
 		List	   *opfamilies;
 		List	   *opstrats;
 
-		get_op_btree_interpretation(((OpExpr *) sublink->testexpr)->opno,
-									&opfamilies, &opstrats);
+		get_op_btree_interpretation(opno, &opfamilies, &opstrats);
 		if (!list_member_int(opstrats, ROWCOMPARE_EQ))
 			return NULL;
+		in_operators = list_make1_oid(opno);
+	}
+	else if (and_clause(sublink->testexpr))
+	{
+		ListCell   *lc;
+
+		/* OK, but we need to extract the per-column operator OIDs */
+		in_operators = NIL;
+		foreach(lc, ((BoolExpr *) sublink->testexpr)->args)
+		{
+			OpExpr *op = (OpExpr *) lfirst(lc);
+
+			if (!IsA(op, OpExpr))		/* probably shouldn't happen */
+				return NULL;
+			in_operators = lappend_oid(in_operators, op->opno);
+		}
 	}
-	else if (!and_clause(sublink->testexpr))
+	else
 		return NULL;
 
 	/*
@@ -745,16 +763,23 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
 	ininfo = makeNode(InClauseInfo);
 	ininfo->lefthand = left_varnos;
 	ininfo->righthand = bms_make_singleton(rtindex);
-	root->in_info_list = lappend(root->in_info_list, ininfo);
+	ininfo->in_operators = in_operators;
 
 	/*
 	 * Build the result qual expression.  As a side effect,
 	 * ininfo->sub_targetlist is filled with a list of Vars representing the
 	 * subselect outputs.
 	 */
-	return convert_testexpr(sublink->testexpr,
-							rtindex,
-							&ininfo->sub_targetlist);
+	result = convert_testexpr(sublink->testexpr,
+							  rtindex,
+							  &ininfo->sub_targetlist);
+
+	Assert(list_length(in_operators) == list_length(ininfo->sub_targetlist));
+
+	/* Add the completed node to the query's list */
+	root->in_info_list = lappend(root->in_info_list, ininfo);
+
+	return result;
 }
 
 /*
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 3250beafb67..4858f985cff 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.228 2007/01/09 02:14:13 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.229 2007/01/10 18:06:04 tgl Exp $
  *
  * HISTORY
  *	  AUTHOR			DATE			MAJOR EVENT
@@ -3995,6 +3995,7 @@ expression_tree_mutator(Node *node,
 
 				FLATCOPY(newnode, ininfo, InClauseInfo);
 				MUTATE(newnode->sub_targetlist, ininfo->sub_targetlist, List *);
+				/* Assume we need not make a copy of in_operators list */
 				return (Node *) newnode;
 			}
 			break;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index e1390aa1112..8ee6346e5b1 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.135 2007/01/05 22:19:32 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.136 2007/01/10 18:06:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -28,12 +28,14 @@
 #include "parser/parsetree.h"
 #include "utils/memutils.h"
 #include "utils/selfuncs.h"
+#include "utils/lsyscache.h"
 #include "utils/syscache.h"
 
 
 static List *translate_sub_tlist(List *tlist, int relid);
-static bool query_is_distinct_for(Query *query, List *colnos);
-static bool hash_safe_tlist(List *tlist);
+static bool query_is_distinct_for(Query *query, List *colnos, List *opids);
+static Oid	distinct_col_search(int colno, List *colnos, List *opids);
+static bool hash_safe_operators(List *opids);
 
 
 /*****************************************************************************
@@ -733,6 +735,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
 	Path		agg_path;		/* dummy for result of cost_agg */
 	MemoryContext oldcontext;
 	List	   *sub_targetlist;
+	List	   *in_operators;
 	ListCell   *l;
 	int			numCols;
 
@@ -769,9 +772,11 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
 	/*
 	 * Try to identify the targetlist that will actually be unique-ified. In
 	 * current usage, this routine is only used for sub-selects of IN clauses,
-	 * so we should be able to find the tlist in in_info_list.
+	 * so we should be able to find the tlist in in_info_list.  Get the IN
+	 * clause's operators, too, because they determine what "unique" means.
 	 */
 	sub_targetlist = NIL;
+	in_operators = NIL;
 	foreach(l, root->in_info_list)
 	{
 		InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
@@ -779,6 +784,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
 		if (bms_equal(ininfo->righthand, rel->relids))
 		{
 			sub_targetlist = ininfo->sub_targetlist;
+			in_operators = ininfo->in_operators;
 			break;
 		}
 	}
@@ -801,7 +807,8 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
 		sub_tlist_colnos = translate_sub_tlist(sub_targetlist, rel->relid);
 
 		if (sub_tlist_colnos &&
-			query_is_distinct_for(rte->subquery, sub_tlist_colnos))
+			query_is_distinct_for(rte->subquery,
+								  sub_tlist_colnos, in_operators))
 		{
 			pathnode->umethod = UNIQUE_PATH_NOOP;
 			pathnode->rows = rel->rows;
@@ -847,11 +854,11 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
 
 	/*
 	 * Is it safe to use a hashed implementation?  If so, estimate and compare
-	 * costs.  We only try this if we know the targetlist for sure (else we
-	 * can't be sure about the datatypes involved).
+	 * costs.  We only try this if we know the IN operators, else we can't
+	 * check their hashability.
 	 */
 	pathnode->umethod = UNIQUE_PATH_SORT;
-	if (enable_hashagg && sub_targetlist && hash_safe_tlist(sub_targetlist))
+	if (enable_hashagg && in_operators && hash_safe_operators(in_operators))
 	{
 		/*
 		 * Estimate the overhead per hashtable entry at 64 bytes (same as in
@@ -924,16 +931,25 @@ translate_sub_tlist(List *tlist, int relid)
  *
  * colnos is an integer list of output column numbers (resno's).  We are
  * interested in whether rows consisting of just these columns are certain
- * to be distinct.
+ * to be distinct.  "Distinctness" is defined according to whether the
+ * corresponding upper-level equality operators listed in opids would think
+ * the values are distinct.  (Note: the opids entries could be cross-type
+ * operators, and thus not exactly the equality operators that the subquery
+ * would use itself.  We assume that the subquery is compatible if these
+ * operators appear in the same btree opfamily as the ones the subquery uses.)
  */
 static bool
-query_is_distinct_for(Query *query, List *colnos)
+query_is_distinct_for(Query *query, List *colnos, List *opids)
 {
 	ListCell   *l;
+	Oid			opid;
+
+	Assert(list_length(colnos) == list_length(opids));
 
 	/*
 	 * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
-	 * columns in the DISTINCT clause appear in colnos.
+	 * columns in the DISTINCT clause appear in colnos and operator
+	 * semantics match.
 	 */
 	if (query->distinctClause)
 	{
@@ -943,7 +959,9 @@ query_is_distinct_for(Query *query, List *colnos)
 			TargetEntry *tle = get_sortgroupclause_tle(scl,
 													   query->targetList);
 
-			if (!list_member_int(colnos, tle->resno))
+			opid = distinct_col_search(tle->resno, colnos, opids);
+			if (!OidIsValid(opid) ||
+				!ops_in_same_btree_opfamily(opid, scl->sortop))
 				break;			/* exit early if no match */
 		}
 		if (l == NULL)			/* had matches for all? */
@@ -952,7 +970,7 @@ query_is_distinct_for(Query *query, List *colnos)
 
 	/*
 	 * Similarly, GROUP BY guarantees uniqueness if all the grouped columns
-	 * appear in colnos.
+	 * appear in colnos and operator semantics match.
 	 */
 	if (query->groupClause)
 	{
@@ -962,7 +980,9 @@ query_is_distinct_for(Query *query, List *colnos)
 			TargetEntry *tle = get_sortgroupclause_tle(grpcl,
 													   query->targetList);
 
-			if (!list_member_int(colnos, tle->resno))
+			opid = distinct_col_search(tle->resno, colnos, opids);
+			if (!OidIsValid(opid) ||
+				!ops_in_same_btree_opfamily(opid, grpcl->sortop))
 				break;			/* exit early if no match */
 		}
 		if (l == NULL)			/* had matches for all? */
@@ -972,7 +992,7 @@ query_is_distinct_for(Query *query, List *colnos)
 	{
 		/*
 		 * If we have no GROUP BY, but do have aggregates or HAVING, then the
-		 * result is at most one row so it's surely unique.
+		 * result is at most one row so it's surely unique, for any operators.
 		 */
 		if (query->hasAggs || query->havingQual)
 			return true;
@@ -980,7 +1000,13 @@ query_is_distinct_for(Query *query, List *colnos)
 
 	/*
 	 * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
-	 * except with ALL
+	 * except with ALL.
+	 *
+	 * XXX this code knows that prepunion.c will adopt the default ordering
+	 * operator for each column datatype as the sortop.  It'd probably be
+	 * better if these operators were chosen at parse time and stored into
+	 * the parsetree, instead of leaving bits of the planner to decide
+	 * semantics.
 	 */
 	if (query->setOperations)
 	{
@@ -996,8 +1022,13 @@ query_is_distinct_for(Query *query, List *colnos)
 			{
 				TargetEntry *tle = (TargetEntry *) lfirst(l);
 
-				if (!tle->resjunk &&
-					!list_member_int(colnos, tle->resno))
+				if (tle->resjunk)
+					continue;	/* ignore resjunk columns */
+
+				opid = distinct_col_search(tle->resno, colnos, opids);
+				if (!OidIsValid(opid) ||
+					!ops_in_same_btree_opfamily(opid,
+												ordering_oper_opid(exprType((Node *) tle->expr))))
 					break;		/* exit early if no match */
 			}
 			if (l == NULL)		/* had matches for all? */
@@ -1014,31 +1045,46 @@ query_is_distinct_for(Query *query, List *colnos)
 }
 
 /*
- * hash_safe_tlist - can datatypes of given tlist be hashed?
+ * distinct_col_search - subroutine for query_is_distinct_for
  *
- * We assume hashed aggregation will work if the datatype's equality operator
- * is marked hashjoinable.
+ * If colno is in colnos, return the corresponding element of opids,
+ * else return InvalidOid.  (We expect colnos does not contain duplicates,
+ * so the result is well-defined.)
+ */
+static Oid
+distinct_col_search(int colno, List *colnos, List *opids)
+{
+	ListCell   *lc1,
+			   *lc2;
+
+	forboth(lc1, colnos, lc2, opids)
+	{
+		if (colno == lfirst_int(lc1))
+			return lfirst_oid(lc2);
+	}
+	return InvalidOid;
+}
+
+/*
+ * hash_safe_operators - can all the specified IN operators be hashed?
  *
- * XXX this probably should be somewhere else.	See also hash_safe_grouping
- * in plan/planner.c.
+ * We assume hashed aggregation will work if each IN operator is marked
+ * hashjoinable.  If the IN operators are cross-type, this could conceivably
+ * fail: the aggregation will need a hashable equality operator for the RHS
+ * datatype --- but it's pretty hard to conceive of a hash opclass that has
+ * cross-type hashing without support for hashing the individual types, so
+ * we don't expend cycles here to support the case.  We could check
+ * get_compatible_hash_operator() instead of just op_hashjoinable(), but the
+ * former is a significantly more expensive test.
  */
 static bool
-hash_safe_tlist(List *tlist)
+hash_safe_operators(List *opids)
 {
-	ListCell   *tl;
+	ListCell   *lc;
 
-	foreach(tl, tlist)
+	foreach(lc, opids)
 	{
-		Node	   *expr = (Node *) lfirst(tl);
-		Operator	optup;
-		bool		oprcanhash;
-
-		optup = equality_oper(exprType(expr), true);
-		if (!optup)
-			return false;
-		oprcanhash = ((Form_pg_operator) GETSTRUCT(optup))->oprcanhash;
-		ReleaseSysCache(optup);
-		if (!oprcanhash)
+		if (!op_hashjoinable(lfirst_oid(lc)))
 			return false;
 	}
 	return true;
@@ -1156,6 +1202,7 @@ create_nestloop_path(PlannerInfo *root,
  *		ordering for each merge clause
  * 'mergestrategies' are the btree operator strategies identifying the merge
  *		ordering for each merge clause
+ * 'mergenullsfirst' are the nulls first/last flags for each merge clause
  * 'outersortkeys' are the sort varkeys for the outer relation
  * 'innersortkeys' are the sort varkeys for the inner relation
  */
@@ -1168,8 +1215,9 @@ create_mergejoin_path(PlannerInfo *root,
 					  List *restrict_clauses,
 					  List *pathkeys,
 					  List *mergeclauses,
-					  List *mergefamilies,
-					  List *mergestrategies,
+					  Oid *mergefamilies,
+					  int *mergestrategies,
+					  bool *mergenullsfirst,
 					  List *outersortkeys,
 					  List *innersortkeys)
 {
@@ -1210,8 +1258,9 @@ create_mergejoin_path(PlannerInfo *root,
 	pathnode->jpath.joinrestrictinfo = restrict_clauses;
 	pathnode->jpath.path.pathkeys = pathkeys;
 	pathnode->path_mergeclauses = mergeclauses;
-	pathnode->path_mergefamilies = mergefamilies;
-	pathnode->path_mergestrategies = mergestrategies;
+	pathnode->path_mergeFamilies = mergefamilies;
+	pathnode->path_mergeStrategies = mergestrategies;
+	pathnode->path_mergeNullsFirst = mergenullsfirst;
 	pathnode->outersortkeys = outersortkeys;
 	pathnode->innersortkeys = innersortkeys;