aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/plan/planner.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/optimizer/plan/planner.c')
-rw-r--r--src/backend/optimizer/plan/planner.c192
1 files changed, 130 insertions, 62 deletions
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index baccf2ffbda..9a5141ea6b9 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.129 2002/11/19 23:21:59 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.130 2002/11/21 00:42:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -933,11 +933,13 @@ grouping_planner(Query *parse, double tuple_fraction)
List *sub_tlist;
List *group_pathkeys;
AttrNumber *groupColIdx = NULL;
+ double sub_tuple_fraction;
Path *cheapest_path;
Path *sorted_path;
double dNumGroups = 0;
long numGroups = 0;
int numAggs = 0;
+ int numGroupCols = length(parse->groupClause);
bool use_hashed_grouping = false;
/* Preprocess targetlist in case we are inside an INSERT/UPDATE. */
@@ -1169,6 +1171,12 @@ grouping_planner(Query *parse, double tuple_fraction)
}
}
+ /*
+ * With grouping or aggregation, the tuple fraction to pass to
+ * query_planner() may be different from what it is at top level.
+ */
+ sub_tuple_fraction = tuple_fraction;
+
if (parse->groupClause)
{
/*
@@ -1182,8 +1190,8 @@ grouping_planner(Query *parse, double tuple_fraction)
* amounts to assuming that all the groups are about the same
* size).
*/
- if (tuple_fraction >= 1.0)
- tuple_fraction = 0.25;
+ if (sub_tuple_fraction >= 1.0)
+ sub_tuple_fraction = 0.25;
/*
* If both GROUP BY and ORDER BY are specified, we will need
@@ -1195,7 +1203,7 @@ grouping_planner(Query *parse, double tuple_fraction)
if (parse->groupClause && parse->sortClause &&
!noncanonical_pathkeys_contained_in(sort_pathkeys,
group_pathkeys))
- tuple_fraction = 0.0;
+ sub_tuple_fraction = 0.0;
}
else if (parse->hasAggs)
{
@@ -1203,7 +1211,7 @@ grouping_planner(Query *parse, double tuple_fraction)
* Ungrouped aggregate will certainly want all the input
* tuples.
*/
- tuple_fraction = 0.0;
+ sub_tuple_fraction = 0.0;
}
else if (parse->distinctClause)
{
@@ -1212,15 +1220,15 @@ grouping_planner(Query *parse, double tuple_fraction)
* number of input tuples per output tuple. Handle the same
* way.
*/
- if (tuple_fraction >= 1.0)
- tuple_fraction = 0.25;
+ if (sub_tuple_fraction >= 1.0)
+ sub_tuple_fraction = 0.25;
}
/*
* Generate the best unsorted and presorted paths for this Query
* (but note there may not be any presorted path).
*/
- query_planner(parse, sub_tlist, tuple_fraction,
+ query_planner(parse, sub_tlist, sub_tuple_fraction,
&cheapest_path, &sorted_path);
/*
@@ -1236,11 +1244,13 @@ grouping_planner(Query *parse, double tuple_fraction)
if (parse->groupClause)
{
/*
- * Always estimate the number of groups.
+ * Always estimate the number of groups. We can't do this until
+ * after running query_planner(), either.
*/
dNumGroups = estimate_num_groups(parse,
parse->groupClause,
cheapest_path->parent->rows);
+ /* Also want it as a long int --- but 'ware overflow! */
numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
/*
@@ -1248,9 +1258,11 @@ grouping_planner(Query *parse, double tuple_fraction)
* aggregates. (Doing so would imply storing *all* the input
* values in the hash table, which seems like a certain loser.)
*/
- if (parse->hasAggs &&
- (contain_distinct_agg_clause((Node *) tlist) ||
- contain_distinct_agg_clause(parse->havingQual)))
+ if (!enable_hashagg)
+ use_hashed_grouping = false;
+ else if (parse->hasAggs &&
+ (contain_distinct_agg_clause((Node *) tlist) ||
+ contain_distinct_agg_clause(parse->havingQual)))
use_hashed_grouping = false;
else
{
@@ -1272,11 +1284,96 @@ grouping_planner(Query *parse, double tuple_fraction)
if (hashentrysize * dNumGroups <= SortMem * 1024L)
{
- /* much more to do here */
-#if 0
- /* TEMPORARY HOTWIRE FOR TESTING */
- use_hashed_grouping = true;
-#endif
+ /*
+ * Okay, do the cost comparison. We need to consider
+ * cheapest_path + hashagg [+ final sort]
+ * versus either
+ * cheapest_path [+ sort] + group or agg [+ final sort]
+ * or
+ * presorted_path + group or agg [+ final sort]
+ * where brackets indicate a step that may not be needed.
+ * We assume query_planner() will have returned a
+ * presorted path only if it's a winner compared to
+ * cheapest_path for this purpose.
+ *
+ * These path variables are dummies that just hold cost
+ * fields; we don't make actual Paths for these steps.
+ */
+ Path hashed_p;
+ Path sorted_p;
+
+ cost_agg(&hashed_p, parse,
+ AGG_HASHED, numAggs,
+ numGroupCols, dNumGroups,
+ cheapest_path->startup_cost,
+ cheapest_path->total_cost,
+ cheapest_path->parent->rows);
+ /* Result of hashed agg is always unsorted */
+ if (sort_pathkeys)
+ cost_sort(&hashed_p, parse, sort_pathkeys,
+ hashed_p.total_cost,
+ dNumGroups,
+ cheapest_path->parent->width);
+
+ if (sorted_path)
+ {
+ sorted_p.startup_cost = sorted_path->startup_cost;
+ sorted_p.total_cost = sorted_path->total_cost;
+ current_pathkeys = sorted_path->pathkeys;
+ }
+ else
+ {
+ sorted_p.startup_cost = cheapest_path->startup_cost;
+ sorted_p.total_cost = cheapest_path->total_cost;
+ current_pathkeys = cheapest_path->pathkeys;
+ }
+ if (!pathkeys_contained_in(group_pathkeys,
+ current_pathkeys))
+ {
+ cost_sort(&sorted_p, parse, group_pathkeys,
+ sorted_p.total_cost,
+ cheapest_path->parent->rows,
+ cheapest_path->parent->width);
+ current_pathkeys = group_pathkeys;
+ }
+ if (parse->hasAggs)
+ cost_agg(&sorted_p, parse,
+ AGG_SORTED, numAggs,
+ numGroupCols, dNumGroups,
+ sorted_p.startup_cost,
+ sorted_p.total_cost,
+ cheapest_path->parent->rows);
+ else
+ cost_group(&sorted_p, parse,
+ numGroupCols, dNumGroups,
+ sorted_p.startup_cost,
+ sorted_p.total_cost,
+ cheapest_path->parent->rows);
+ /* The Agg or Group node will preserve ordering */
+ if (sort_pathkeys &&
+ !pathkeys_contained_in(sort_pathkeys,
+ current_pathkeys))
+ {
+ cost_sort(&sorted_p, parse, sort_pathkeys,
+ sorted_p.total_cost,
+ dNumGroups,
+ cheapest_path->parent->width);
+ }
+
+ /*
+ * Now make the decision using the top-level tuple
+ * fraction. First we have to convert an absolute
+ * count (LIMIT) into fractional form.
+ */
+ if (tuple_fraction >= 1.0)
+ tuple_fraction /= dNumGroups;
+
+ if (compare_fractional_path_costs(&hashed_p, &sorted_p,
+ tuple_fraction) <= 0)
+ {
+ /* Hashed is cheaper, so use it */
+ use_hashed_grouping = true;
+ }
}
}
}
@@ -1284,50 +1381,17 @@ grouping_planner(Query *parse, double tuple_fraction)
/*
* Select the best path and create a plan to execute it.
*
- * If no special sort order is wanted, or if the cheapest path is
- * already appropriately ordered, use the cheapest path.
- * Otherwise, look to see if we have an already-ordered path that is
- * cheaper than doing an explicit sort on the cheapest-total-cost
- * path.
+ * If we are doing hashed grouping, we will always read all the
+ * input tuples, so use the cheapest-total path. Otherwise,
+ * trust query_planner's decision about which to use.
*/
- if (parse->query_pathkeys == NIL ||
- pathkeys_contained_in(parse->query_pathkeys,
- cheapest_path->pathkeys))
+ if (sorted_path && !use_hashed_grouping)
{
- result_plan = create_plan(parse, cheapest_path);
- current_pathkeys = cheapest_path->pathkeys;
- }
- else if (sorted_path)
- {
- Path sort_path; /* dummy for result of cost_sort */
-
- cost_sort(&sort_path, parse, parse->query_pathkeys,
- sorted_path->parent->rows, sorted_path->parent->width);
- sort_path.startup_cost += cheapest_path->total_cost;
- sort_path.total_cost += cheapest_path->total_cost;
- /* Convert absolute-count tuple_fraction into a fraction */
- if (tuple_fraction >= 1.0)
- tuple_fraction /= sorted_path->parent->rows;
- if (compare_fractional_path_costs(sorted_path, &sort_path,
- tuple_fraction) <= 0)
- {
- /* Presorted path is cheaper, use it */
- result_plan = create_plan(parse, sorted_path);
- current_pathkeys = sorted_path->pathkeys;
- }
- else
- {
- /* otherwise, doing it the hard way is still cheaper */
- result_plan = create_plan(parse, cheapest_path);
- current_pathkeys = cheapest_path->pathkeys;
- }
+ result_plan = create_plan(parse, sorted_path);
+ current_pathkeys = sorted_path->pathkeys;
}
else
{
- /*
- * No sorted path, so we must use the cheapest-total path.
- * The actual sort step will be generated below.
- */
result_plan = create_plan(parse, cheapest_path);
current_pathkeys = cheapest_path->pathkeys;
}
@@ -1362,10 +1426,11 @@ grouping_planner(Query *parse, double tuple_fraction)
if (use_hashed_grouping)
{
/* Hashed aggregate plan --- no sort needed */
- result_plan = (Plan *) make_agg(tlist,
+ result_plan = (Plan *) make_agg(parse,
+ tlist,
(List *) parse->havingQual,
AGG_HASHED,
- length(parse->groupClause),
+ numGroupCols,
groupColIdx,
numGroups,
numAggs,
@@ -1401,10 +1466,11 @@ grouping_planner(Query *parse, double tuple_fraction)
current_pathkeys = NIL;
}
- result_plan = (Plan *) make_agg(tlist,
+ result_plan = (Plan *) make_agg(parse,
+ tlist,
(List *) parse->havingQual,
aggstrategy,
- length(parse->groupClause),
+ numGroupCols,
groupColIdx,
numGroups,
numAggs,
@@ -1436,11 +1502,13 @@ grouping_planner(Query *parse, double tuple_fraction)
current_pathkeys = group_pathkeys;
}
- result_plan = (Plan *) make_group(tlist,
- length(parse->groupClause),
+ result_plan = (Plan *) make_group(parse,
+ tlist,
+ numGroupCols,
groupColIdx,
dNumGroups,
result_plan);
+ /* The Group node won't change sort ordering */
}
}
} /* end of if (setOperations) */