Change the planner-to-executor API so that the planner tells the executor

which comparison operators to use for plan nodes involving tuple comparison (Agg, Group, Unique, SetOp). Formerly the executor looked up the default equality operator for the datatype, which was really pretty shaky, since it's possible that the data being fed to the node is sorted according to some nondefault operator class that could have an incompatible idea of equality. The planner knows what it has sorted by and therefore can provide the right equality operator to use. Also, this change moves a couple of catalog lookups out of the executor and into the planner, which should help startup time for pre-planned queries by some small amount. Modify the planner to remove some other cavalier assumptions about always being able to use the default operators. Also add "nulls first/last" info to the Plan node for a mergejoin --- neither the executor nor the planner can cope yet, but at least the API is in place.
author: Tom Lane <tgl@sss.pgh.pa.us> 2007-01-10 18:06:05 +0000
committer: Tom Lane <tgl@sss.pgh.pa.us> 2007-01-10 18:06:05 +0000
commit: a191a169d6d0b9558da4519e66510c4540204a51 (patch)
tree: cd32b62bc013145015f4932fef1f7687737205b3 /src/backend/optimizer/util/pathnode.c
parent: 5f6d735356c9090d87e184c9322bfe37a165a014 (diff)
download: postgresql-a191a169d6d0b9558da4519e66510c4540204a51.tar.gz
postgresql-a191a169d6d0b9558da4519e66510c4540204a51.zip
1 files changed, 89 insertions, 40 deletions
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index e1390aa1112..8ee6346e5b1 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.135 2007/01/05 22:19:32 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.136 2007/01/10 18:06:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -28,12 +28,14 @@
 #include "parser/parsetree.h"
 #include "utils/memutils.h"
 #include "utils/selfuncs.h"
+#include "utils/lsyscache.h"
 #include "utils/syscache.h"
 
 
 static List *translate_sub_tlist(List *tlist, int relid);
-static bool query_is_distinct_for(Query *query, List *colnos);
-static bool hash_safe_tlist(List *tlist);
+static bool query_is_distinct_for(Query *query, List *colnos, List *opids);
+static Oid	distinct_col_search(int colno, List *colnos, List *opids);
+static bool hash_safe_operators(List *opids);
 
 
 /*****************************************************************************
@@ -733,6 +735,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
 	Path		agg_path;		/* dummy for result of cost_agg */
 	MemoryContext oldcontext;
 	List	   *sub_targetlist;
+	List	   *in_operators;
 	ListCell   *l;
 	int			numCols;
 
@@ -769,9 +772,11 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
 	/*
 	 * Try to identify the targetlist that will actually be unique-ified. In
 	 * current usage, this routine is only used for sub-selects of IN clauses,
-	 * so we should be able to find the tlist in in_info_list.
+	 * so we should be able to find the tlist in in_info_list.  Get the IN
+	 * clause's operators, too, because they determine what "unique" means.
 	 */
 	sub_targetlist = NIL;
+	in_operators = NIL;
 	foreach(l, root->in_info_list)
 	{
 		InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
@@ -779,6 +784,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
 		if (bms_equal(ininfo->righthand, rel->relids))
 		{
 			sub_targetlist = ininfo->sub_targetlist;
+			in_operators = ininfo->in_operators;
 			break;
 		}
 	}
@@ -801,7 +807,8 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
 		sub_tlist_colnos = translate_sub_tlist(sub_targetlist, rel->relid);
 
 		if (sub_tlist_colnos &&
-			query_is_distinct_for(rte->subquery, sub_tlist_colnos))
+			query_is_distinct_for(rte->subquery,
+								  sub_tlist_colnos, in_operators))
 		{
 			pathnode->umethod = UNIQUE_PATH_NOOP;
 			pathnode->rows = rel->rows;
@@ -847,11 +854,11 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
 
 	/*
 	 * Is it safe to use a hashed implementation?  If so, estimate and compare
-	 * costs.  We only try this if we know the targetlist for sure (else we
-	 * can't be sure about the datatypes involved).
+	 * costs.  We only try this if we know the IN operators, else we can't
+	 * check their hashability.
 	 */
 	pathnode->umethod = UNIQUE_PATH_SORT;
-	if (enable_hashagg && sub_targetlist && hash_safe_tlist(sub_targetlist))
+	if (enable_hashagg && in_operators && hash_safe_operators(in_operators))
 	{
 		/*
 		 * Estimate the overhead per hashtable entry at 64 bytes (same as in
@@ -924,16 +931,25 @@ translate_sub_tlist(List *tlist, int relid)
  *
  * colnos is an integer list of output column numbers (resno's).  We are
  * interested in whether rows consisting of just these columns are certain
- * to be distinct.
+ * to be distinct.  "Distinctness" is defined according to whether the
+ * corresponding upper-level equality operators listed in opids would think
+ * the values are distinct.  (Note: the opids entries could be cross-type
+ * operators, and thus not exactly the equality operators that the subquery
+ * would use itself.  We assume that the subquery is compatible if these
+ * operators appear in the same btree opfamily as the ones the subquery uses.)
  */
 static bool
-query_is_distinct_for(Query *query, List *colnos)
+query_is_distinct_for(Query *query, List *colnos, List *opids)
 {
 	ListCell   *l;
+	Oid			opid;
+
+	Assert(list_length(colnos) == list_length(opids));
 
 	/*
 	 * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
-	 * columns in the DISTINCT clause appear in colnos.
+	 * columns in the DISTINCT clause appear in colnos and operator
+	 * semantics match.
 	 */
 	if (query->distinctClause)
 	{
@@ -943,7 +959,9 @@ query_is_distinct_for(Query *query, List *colnos)
 			TargetEntry *tle = get_sortgroupclause_tle(scl,
 													   query->targetList);
 
-			if (!list_member_int(colnos, tle->resno))
+			opid = distinct_col_search(tle->resno, colnos, opids);
+			if (!OidIsValid(opid) ||
+				!ops_in_same_btree_opfamily(opid, scl->sortop))
 				break;			/* exit early if no match */
 		}
 		if (l == NULL)			/* had matches for all? */
@@ -952,7 +970,7 @@ query_is_distinct_for(Query *query, List *colnos)
 
 	/*
 	 * Similarly, GROUP BY guarantees uniqueness if all the grouped columns
-	 * appear in colnos.
+	 * appear in colnos and operator semantics match.
 	 */
 	if (query->groupClause)
 	{
@@ -962,7 +980,9 @@ query_is_distinct_for(Query *query, List *colnos)
 			TargetEntry *tle = get_sortgroupclause_tle(grpcl,
 													   query->targetList);
 
-			if (!list_member_int(colnos, tle->resno))
+			opid = distinct_col_search(tle->resno, colnos, opids);
+			if (!OidIsValid(opid) ||
+				!ops_in_same_btree_opfamily(opid, grpcl->sortop))
 				break;			/* exit early if no match */
 		}
 		if (l == NULL)			/* had matches for all? */
@@ -972,7 +992,7 @@ query_is_distinct_for(Query *query, List *colnos)
 	{
 		/*
 		 * If we have no GROUP BY, but do have aggregates or HAVING, then the
-		 * result is at most one row so it's surely unique.
+		 * result is at most one row so it's surely unique, for any operators.
 		 */
 		if (query->hasAggs || query->havingQual)
 			return true;
@@ -980,7 +1000,13 @@ query_is_distinct_for(Query *query, List *colnos)
 
 	/*
 	 * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
-	 * except with ALL
+	 * except with ALL.
+	 *
+	 * XXX this code knows that prepunion.c will adopt the default ordering
+	 * operator for each column datatype as the sortop.  It'd probably be
+	 * better if these operators were chosen at parse time and stored into
+	 * the parsetree, instead of leaving bits of the planner to decide
+	 * semantics.
 	 */
 	if (query->setOperations)
 	{
@@ -996,8 +1022,13 @@ query_is_distinct_for(Query *query, List *colnos)
 			{
 				TargetEntry *tle = (TargetEntry *) lfirst(l);
 
-				if (!tle->resjunk &&
-					!list_member_int(colnos, tle->resno))
+				if (tle->resjunk)
+					continue;	/* ignore resjunk columns */
+
+				opid = distinct_col_search(tle->resno, colnos, opids);
+				if (!OidIsValid(opid) ||
+					!ops_in_same_btree_opfamily(opid,
+												ordering_oper_opid(exprType((Node *) tle->expr))))
 					break;		/* exit early if no match */
 			}
 			if (l == NULL)		/* had matches for all? */
@@ -1014,31 +1045,46 @@ query_is_distinct_for(Query *query, List *colnos)
 }
 
 /*
- * hash_safe_tlist - can datatypes of given tlist be hashed?
+ * distinct_col_search - subroutine for query_is_distinct_for
  *
- * We assume hashed aggregation will work if the datatype's equality operator
- * is marked hashjoinable.
+ * If colno is in colnos, return the corresponding element of opids,
+ * else return InvalidOid.  (We expect colnos does not contain duplicates,
+ * so the result is well-defined.)
+ */
+static Oid
+distinct_col_search(int colno, List *colnos, List *opids)
+{
+	ListCell   *lc1,
+			   *lc2;
+
+	forboth(lc1, colnos, lc2, opids)
+	{
+		if (colno == lfirst_int(lc1))
+			return lfirst_oid(lc2);
+	}
+	return InvalidOid;
+}
+
+/*
+ * hash_safe_operators - can all the specified IN operators be hashed?
  *
- * XXX this probably should be somewhere else.	See also hash_safe_grouping
- * in plan/planner.c.
+ * We assume hashed aggregation will work if each IN operator is marked
+ * hashjoinable.  If the IN operators are cross-type, this could conceivably
+ * fail: the aggregation will need a hashable equality operator for the RHS
+ * datatype --- but it's pretty hard to conceive of a hash opclass that has
+ * cross-type hashing without support for hashing the individual types, so
+ * we don't expend cycles here to support the case.  We could check
+ * get_compatible_hash_operator() instead of just op_hashjoinable(), but the
+ * former is a significantly more expensive test.
  */
 static bool
-hash_safe_tlist(List *tlist)
+hash_safe_operators(List *opids)
 {
-	ListCell   *tl;
+	ListCell   *lc;
 
-	foreach(tl, tlist)
+	foreach(lc, opids)
 	{
-		Node	   *expr = (Node *) lfirst(tl);
-		Operator	optup;
-		bool		oprcanhash;
-
-		optup = equality_oper(exprType(expr), true);
-		if (!optup)
-			return false;
-		oprcanhash = ((Form_pg_operator) GETSTRUCT(optup))->oprcanhash;
-		ReleaseSysCache(optup);
-		if (!oprcanhash)
+		if (!op_hashjoinable(lfirst_oid(lc)))
 			return false;
 	}
 	return true;
@@ -1156,6 +1202,7 @@ create_nestloop_path(PlannerInfo *root,
  *		ordering for each merge clause
  * 'mergestrategies' are the btree operator strategies identifying the merge
  *		ordering for each merge clause
+ * 'mergenullsfirst' are the nulls first/last flags for each merge clause
  * 'outersortkeys' are the sort varkeys for the outer relation
  * 'innersortkeys' are the sort varkeys for the inner relation
  */
@@ -1168,8 +1215,9 @@ create_mergejoin_path(PlannerInfo *root,
 					  List *restrict_clauses,
 					  List *pathkeys,
 					  List *mergeclauses,
-					  List *mergefamilies,
-					  List *mergestrategies,
+					  Oid *mergefamilies,
+					  int *mergestrategies,
+					  bool *mergenullsfirst,
 					  List *outersortkeys,
 					  List *innersortkeys)
 {
@@ -1210,8 +1258,9 @@ create_mergejoin_path(PlannerInfo *root,
 	pathnode->jpath.joinrestrictinfo = restrict_clauses;
 	pathnode->jpath.path.pathkeys = pathkeys;
 	pathnode->path_mergeclauses = mergeclauses;
-	pathnode->path_mergefamilies = mergefamilies;
-	pathnode->path_mergestrategies = mergestrategies;
+	pathnode->path_mergeFamilies = mergefamilies;
+	pathnode->path_mergeStrategies = mergestrategies;
+	pathnode->path_mergeNullsFirst = mergenullsfirst;
 	pathnode->outersortkeys = outersortkeys;
 	pathnode->innersortkeys = innersortkeys;
author	Tom Lane <tgl@sss.pgh.pa.us>	2007-01-10 18:06:05 +0000
committer	Tom Lane <tgl@sss.pgh.pa.us>	2007-01-10 18:06:05 +0000
commit	a191a169d6d0b9558da4519e66510c4540204a51 (patch)
tree	cd32b62bc013145015f4932fef1f7687737205b3 /src/backend/optimizer/util/pathnode.c
parent	5f6d735356c9090d87e184c9322bfe37a165a014 (diff)
download	postgresql-a191a169d6d0b9558da4519e66510c4540204a51.tar.gz postgresql-a191a169d6d0b9558da4519e66510c4540204a51.zip