diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2005-01-28 19:34:28 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2005-01-28 19:34:28 +0000 |
commit | 0bf2587df46f3b554d62f7628d1d474bda7ddfc5 (patch) | |
tree | 92f670d4d386d72eab646685648d6cf3f4a546eb /src/backend/optimizer/plan/planner.c | |
parent | c3a4e22e826ca2cd2f1e1b6d6dd4ae953b6755f1 (diff) | |
download | postgresql-0bf2587df46f3b554d62f7628d1d474bda7ddfc5.tar.gz postgresql-0bf2587df46f3b554d62f7628d1d474bda7ddfc5.zip |
Improve planner's estimation of the space needed for HashAgg plans:
look at the actual aggregate transition datatypes and the actual overhead
needed by nodeAgg.c, instead of using pessimistic round numbers.
Per a discussion with Michael Tiemann.
Diffstat (limited to 'src/backend/optimizer/plan/planner.c')
-rw-r--r-- | src/backend/optimizer/plan/planner.c | 39 |
1 files changed, 22 insertions, 17 deletions
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 3a19abbc4c6..819879209b7 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.177 2004/12/31 22:00:09 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.178 2005/01/28 19:34:05 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -20,6 +20,7 @@ #include "catalog/pg_operator.h" #include "catalog/pg_type.h" #include "executor/executor.h" +#include "executor/nodeAgg.h" #include "miscadmin.h" #include "nodes/makefuncs.h" #ifdef OPTIMIZER_DEBUG @@ -660,10 +661,12 @@ grouping_planner(Query *parse, double tuple_fraction) Path *sorted_path; double dNumGroups = 0; long numGroups = 0; - int numAggs = 0; + AggClauseCounts agg_counts; int numGroupCols = list_length(parse->groupClause); bool use_hashed_grouping = false; + MemSet(&agg_counts, 0, sizeof(AggClauseCounts)); + /* Preprocess targetlist in case we are inside an INSERT/UPDATE. */ tlist = preprocess_targetlist(tlist, parse->commandType, @@ -752,8 +755,10 @@ grouping_planner(Query *parse, double tuple_fraction) * the aggregate semantics (eg, producing only one output row). */ if (parse->hasAggs) - numAggs = count_agg_clause((Node *) tlist) + - count_agg_clause(parse->havingQual); + { + count_agg_clauses((Node *) tlist, &agg_counts); + count_agg_clauses(parse->havingQual, &agg_counts); + } /* * Figure out whether we need a sorted result from query_planner. @@ -990,9 +995,7 @@ grouping_planner(Query *parse, double tuple_fraction) */ if (!enable_hashagg || !hash_safe_grouping(parse)) use_hashed_grouping = false; - else if (parse->hasAggs && - (contain_distinct_agg_clause((Node *) tlist) || - contain_distinct_agg_clause(parse->havingQual))) + else if (agg_counts.numDistinctAggs != 0) use_hashed_grouping = false; else { @@ -1003,13 +1006,15 @@ grouping_planner(Query *parse, double tuple_fraction) * the need for sorted input is usually a win, the fact * that the output won't be sorted may be a loss; so we * need to do an actual cost comparison. - * - * In most cases we have no good way to estimate the size of - * the transition value needed by an aggregate; - * arbitrarily assume it is 100 bytes. Also set the - * overhead per hashtable entry at 64 bytes. */ - int hashentrysize = cheapest_path_width + 64 + numAggs * 100; + Size hashentrysize; + + /* Estimate per-hash-entry space at tuple width... */ + hashentrysize = cheapest_path_width; + /* plus space for pass-by-ref transition values... */ + hashentrysize += agg_counts.transitionSpace; + /* plus the per-hash-entry overhead */ + hashentrysize += hash_agg_entry_size(agg_counts.numAggs); if (hashentrysize * dNumGroups <= work_mem * 1024L) { @@ -1030,7 +1035,7 @@ grouping_planner(Query *parse, double tuple_fraction) Path sorted_p; cost_agg(&hashed_p, parse, - AGG_HASHED, numAggs, + AGG_HASHED, agg_counts.numAggs, numGroupCols, dNumGroups, cheapest_path->startup_cost, cheapest_path->total_cost, @@ -1065,7 +1070,7 @@ grouping_planner(Query *parse, double tuple_fraction) } if (parse->hasAggs) cost_agg(&sorted_p, parse, - AGG_SORTED, numAggs, + AGG_SORTED, agg_counts.numAggs, numGroupCols, dNumGroups, sorted_p.startup_cost, sorted_p.total_cost, @@ -1202,7 +1207,7 @@ grouping_planner(Query *parse, double tuple_fraction) numGroupCols, groupColIdx, numGroups, - numAggs, + agg_counts.numAggs, result_plan); /* Hashed aggregation produces randomly-ordered results */ current_pathkeys = NIL; @@ -1244,7 +1249,7 @@ grouping_planner(Query *parse, double tuple_fraction) numGroupCols, groupColIdx, numGroups, - numAggs, + agg_counts.numAggs, result_plan); } else |