Improve planner's estimation of the space needed for HashAgg plans:

look at the actual aggregate transition datatypes and the actual overhead needed by nodeAgg.c, instead of using pessimistic round numbers. Per a discussion with Michael Tiemann.
author: Tom Lane <tgl@sss.pgh.pa.us> 2005-01-28 19:34:28 +0000
committer: Tom Lane <tgl@sss.pgh.pa.us> 2005-01-28 19:34:28 +0000
commit: 0bf2587df46f3b554d62f7628d1d474bda7ddfc5 (patch)
tree: 92f670d4d386d72eab646685648d6cf3f4a546eb /src/backend/optimizer/plan/planner.c
parent: c3a4e22e826ca2cd2f1e1b6d6dd4ae953b6755f1 (diff)
download: postgresql-0bf2587df46f3b554d62f7628d1d474bda7ddfc5.tar.gz
postgresql-0bf2587df46f3b554d62f7628d1d474bda7ddfc5.zip
1 files changed, 22 insertions, 17 deletions
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 3a19abbc4c6..819879209b7 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.177 2004/12/31 22:00:09 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.178 2005/01/28 19:34:05 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -20,6 +20,7 @@
 #include "catalog/pg_operator.h"
 #include "catalog/pg_type.h"
 #include "executor/executor.h"
+#include "executor/nodeAgg.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
 #ifdef OPTIMIZER_DEBUG
@@ -660,10 +661,12 @@ grouping_planner(Query *parse, double tuple_fraction)
 		Path	   *sorted_path;
 		double		dNumGroups = 0;
 		long		numGroups = 0;
-		int			numAggs = 0;
+		AggClauseCounts agg_counts;
 		int			numGroupCols = list_length(parse->groupClause);
 		bool		use_hashed_grouping = false;
 
+		MemSet(&agg_counts, 0, sizeof(AggClauseCounts));
+
 		/* Preprocess targetlist in case we are inside an INSERT/UPDATE. */
 		tlist = preprocess_targetlist(tlist,
 									  parse->commandType,
@@ -752,8 +755,10 @@ grouping_planner(Query *parse, double tuple_fraction)
 		 * the aggregate semantics (eg, producing only one output row).
 		 */
 		if (parse->hasAggs)
-			numAggs = count_agg_clause((Node *) tlist) +
-				count_agg_clause(parse->havingQual);
+		{
+			count_agg_clauses((Node *) tlist, &agg_counts);
+			count_agg_clauses(parse->havingQual, &agg_counts);
+		}
 
 		/*
 		 * Figure out whether we need a sorted result from query_planner.
@@ -990,9 +995,7 @@ grouping_planner(Query *parse, double tuple_fraction)
 			 */
 			if (!enable_hashagg || !hash_safe_grouping(parse))
 				use_hashed_grouping = false;
-			else if (parse->hasAggs &&
-					 (contain_distinct_agg_clause((Node *) tlist) ||
-					  contain_distinct_agg_clause(parse->havingQual)))
+			else if (agg_counts.numDistinctAggs != 0)
 				use_hashed_grouping = false;
 			else
 			{
@@ -1003,13 +1006,15 @@ grouping_planner(Query *parse, double tuple_fraction)
 				 * the need for sorted input is usually a win, the fact
 				 * that the output won't be sorted may be a loss; so we
 				 * need to do an actual cost comparison.
-				 *
-				 * In most cases we have no good way to estimate the size of
-				 * the transition value needed by an aggregate;
-				 * arbitrarily assume it is 100 bytes.	Also set the
-				 * overhead per hashtable entry at 64 bytes.
 				 */
-				int			hashentrysize = cheapest_path_width + 64 + numAggs * 100;
+				Size		hashentrysize;
+
+				/* Estimate per-hash-entry space at tuple width... */
+				hashentrysize = cheapest_path_width;
+				/* plus space for pass-by-ref transition values... */
+				hashentrysize += agg_counts.transitionSpace;
+				/* plus the per-hash-entry overhead */
+				hashentrysize += hash_agg_entry_size(agg_counts.numAggs);
 
 				if (hashentrysize * dNumGroups <= work_mem * 1024L)
 				{
@@ -1030,7 +1035,7 @@ grouping_planner(Query *parse, double tuple_fraction)
 					Path		sorted_p;
 
 					cost_agg(&hashed_p, parse,
-							 AGG_HASHED, numAggs,
+							 AGG_HASHED, agg_counts.numAggs,
 							 numGroupCols, dNumGroups,
 							 cheapest_path->startup_cost,
 							 cheapest_path->total_cost,
@@ -1065,7 +1070,7 @@ grouping_planner(Query *parse, double tuple_fraction)
 					}
 					if (parse->hasAggs)
 						cost_agg(&sorted_p, parse,
-								 AGG_SORTED, numAggs,
+								 AGG_SORTED, agg_counts.numAggs,
 								 numGroupCols, dNumGroups,
 								 sorted_p.startup_cost,
 								 sorted_p.total_cost,
@@ -1202,7 +1207,7 @@ grouping_planner(Query *parse, double tuple_fraction)
 											numGroupCols,
 											groupColIdx,
 											numGroups,
-											numAggs,
+											agg_counts.numAggs,
 											result_plan);
 			/* Hashed aggregation produces randomly-ordered results */
 			current_pathkeys = NIL;
@@ -1244,7 +1249,7 @@ grouping_planner(Query *parse, double tuple_fraction)
 											numGroupCols,
 											groupColIdx,
 											numGroups,
-											numAggs,
+											agg_counts.numAggs,
 											result_plan);
 		}
 		else
author	Tom Lane <tgl@sss.pgh.pa.us>	2005-01-28 19:34:28 +0000
committer	Tom Lane <tgl@sss.pgh.pa.us>	2005-01-28 19:34:28 +0000
commit	0bf2587df46f3b554d62f7628d1d474bda7ddfc5 (patch)
tree	92f670d4d386d72eab646685648d6cf3f4a546eb /src/backend/optimizer/plan/planner.c
parent	c3a4e22e826ca2cd2f1e1b6d6dd4ae953b6755f1 (diff)
download	postgresql-0bf2587df46f3b554d62f7628d1d474bda7ddfc5.tar.gz postgresql-0bf2587df46f3b554d62f7628d1d474bda7ddfc5.zip