aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/plan/planner.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2005-01-28 19:34:28 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2005-01-28 19:34:28 +0000
commit0bf2587df46f3b554d62f7628d1d474bda7ddfc5 (patch)
tree92f670d4d386d72eab646685648d6cf3f4a546eb /src/backend/optimizer/plan/planner.c
parentc3a4e22e826ca2cd2f1e1b6d6dd4ae953b6755f1 (diff)
downloadpostgresql-0bf2587df46f3b554d62f7628d1d474bda7ddfc5.tar.gz
postgresql-0bf2587df46f3b554d62f7628d1d474bda7ddfc5.zip
Improve planner's estimation of the space needed for HashAgg plans:
look at the actual aggregate transition datatypes and the actual overhead needed by nodeAgg.c, instead of using pessimistic round numbers. Per a discussion with Michael Tiemann.
Diffstat (limited to 'src/backend/optimizer/plan/planner.c')
-rw-r--r--src/backend/optimizer/plan/planner.c39
1 files changed, 22 insertions, 17 deletions
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 3a19abbc4c6..819879209b7 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.177 2004/12/31 22:00:09 pgsql Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.178 2005/01/28 19:34:05 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -20,6 +20,7 @@
#include "catalog/pg_operator.h"
#include "catalog/pg_type.h"
#include "executor/executor.h"
+#include "executor/nodeAgg.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#ifdef OPTIMIZER_DEBUG
@@ -660,10 +661,12 @@ grouping_planner(Query *parse, double tuple_fraction)
Path *sorted_path;
double dNumGroups = 0;
long numGroups = 0;
- int numAggs = 0;
+ AggClauseCounts agg_counts;
int numGroupCols = list_length(parse->groupClause);
bool use_hashed_grouping = false;
+ MemSet(&agg_counts, 0, sizeof(AggClauseCounts));
+
/* Preprocess targetlist in case we are inside an INSERT/UPDATE. */
tlist = preprocess_targetlist(tlist,
parse->commandType,
@@ -752,8 +755,10 @@ grouping_planner(Query *parse, double tuple_fraction)
* the aggregate semantics (eg, producing only one output row).
*/
if (parse->hasAggs)
- numAggs = count_agg_clause((Node *) tlist) +
- count_agg_clause(parse->havingQual);
+ {
+ count_agg_clauses((Node *) tlist, &agg_counts);
+ count_agg_clauses(parse->havingQual, &agg_counts);
+ }
/*
* Figure out whether we need a sorted result from query_planner.
@@ -990,9 +995,7 @@ grouping_planner(Query *parse, double tuple_fraction)
*/
if (!enable_hashagg || !hash_safe_grouping(parse))
use_hashed_grouping = false;
- else if (parse->hasAggs &&
- (contain_distinct_agg_clause((Node *) tlist) ||
- contain_distinct_agg_clause(parse->havingQual)))
+ else if (agg_counts.numDistinctAggs != 0)
use_hashed_grouping = false;
else
{
@@ -1003,13 +1006,15 @@ grouping_planner(Query *parse, double tuple_fraction)
* the need for sorted input is usually a win, the fact
* that the output won't be sorted may be a loss; so we
* need to do an actual cost comparison.
- *
- * In most cases we have no good way to estimate the size of
- * the transition value needed by an aggregate;
- * arbitrarily assume it is 100 bytes. Also set the
- * overhead per hashtable entry at 64 bytes.
*/
- int hashentrysize = cheapest_path_width + 64 + numAggs * 100;
+ Size hashentrysize;
+
+ /* Estimate per-hash-entry space at tuple width... */
+ hashentrysize = cheapest_path_width;
+ /* plus space for pass-by-ref transition values... */
+ hashentrysize += agg_counts.transitionSpace;
+ /* plus the per-hash-entry overhead */
+ hashentrysize += hash_agg_entry_size(agg_counts.numAggs);
if (hashentrysize * dNumGroups <= work_mem * 1024L)
{
@@ -1030,7 +1035,7 @@ grouping_planner(Query *parse, double tuple_fraction)
Path sorted_p;
cost_agg(&hashed_p, parse,
- AGG_HASHED, numAggs,
+ AGG_HASHED, agg_counts.numAggs,
numGroupCols, dNumGroups,
cheapest_path->startup_cost,
cheapest_path->total_cost,
@@ -1065,7 +1070,7 @@ grouping_planner(Query *parse, double tuple_fraction)
}
if (parse->hasAggs)
cost_agg(&sorted_p, parse,
- AGG_SORTED, numAggs,
+ AGG_SORTED, agg_counts.numAggs,
numGroupCols, dNumGroups,
sorted_p.startup_cost,
sorted_p.total_cost,
@@ -1202,7 +1207,7 @@ grouping_planner(Query *parse, double tuple_fraction)
numGroupCols,
groupColIdx,
numGroups,
- numAggs,
+ agg_counts.numAggs,
result_plan);
/* Hashed aggregation produces randomly-ordered results */
current_pathkeys = NIL;
@@ -1244,7 +1249,7 @@ grouping_planner(Query *parse, double tuple_fraction)
numGroupCols,
groupColIdx,
numGroups,
- numAggs,
+ agg_counts.numAggs,
result_plan);
}
else