aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/plan/planmain.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/optimizer/plan/planmain.c')
-rw-r--r--src/backend/optimizer/plan/planmain.c108
1 files changed, 98 insertions, 10 deletions
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index 7038a45ac64..1aca1249d43 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -14,7 +14,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.86 2005/07/02 23:00:41 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.87 2005/08/27 22:13:43 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -25,9 +25,11 @@
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/planmain.h"
+#include "optimizer/tlist.h"
+#include "utils/selfuncs.h"
-/*--------------------
+/*
* query_planner
* Generate a path (that is, a simplified plan) for a basic query,
* which may involve joins but not any fancier features.
@@ -51,6 +53,8 @@
* *cheapest_path receives the overall-cheapest path for the query
* *sorted_path receives the cheapest presorted path for the query,
* if any (NULL if there is no useful presorted path)
+ * *num_groups receives the estimated number of groups, or 1 if query
+ * does not use grouping
*
* Note: the PlannerInfo node also includes a query_pathkeys field, which is
* both an input and an output of query_planner(). The input value signals
@@ -61,17 +65,21 @@
* PlannerInfo field and not a passed parameter is that the low-level routines
* in indxpath.c need to see it.)
*
+ * Note: the PlannerInfo node also includes group_pathkeys and sort_pathkeys,
+ * which like query_pathkeys need to be canonicalized once the info is
+ * available.
+ *
* tuple_fraction is interpreted as follows:
* 0: expect all tuples to be retrieved (normal case)
* 0 < tuple_fraction < 1: expect the given fraction of tuples available
* from the plan to be retrieved
* tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
* expected to be retrieved (ie, a LIMIT specification)
- *--------------------
*/
void
query_planner(PlannerInfo *root, List *tlist, double tuple_fraction,
- Path **cheapest_path, Path **sorted_path)
+ Path **cheapest_path, Path **sorted_path,
+ double *num_groups)
{
Query *parse = root->parse;
List *constant_quals;
@@ -82,6 +90,8 @@ query_planner(PlannerInfo *root, List *tlist, double tuple_fraction,
/* Make tuple_fraction accessible to lower-level routines */
root->tuple_fraction = tuple_fraction;
+ *num_groups = 1; /* default result */
+
/*
* If the query has an empty join tree, then it's something easy like
* "SELECT 2+2;" or "INSERT ... VALUES()". Fall through quickly.
@@ -156,9 +166,12 @@ query_planner(PlannerInfo *root, List *tlist, double tuple_fraction,
/*
* We should now have all the pathkey equivalence sets built, so it's
* now possible to convert the requested query_pathkeys to canonical
- * form.
+ * form. Also canonicalize the groupClause and sortClause pathkeys
+ * for use later.
*/
root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys);
+ root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys);
+ root->sort_pathkeys = canonicalize_pathkeys(root, root->sort_pathkeys);
/*
* Ready to do the primary planning.
@@ -169,12 +182,87 @@ query_planner(PlannerInfo *root, List *tlist, double tuple_fraction,
elog(ERROR, "failed to construct the join relation");
/*
- * Now that we have an estimate of the final rel's size, we can
- * convert a tuple_fraction specified as an absolute count (ie, a
- * LIMIT option) into a fraction of the total tuples.
+ * If there's grouping going on, estimate the number of result groups.
+ * We couldn't do this any earlier because it depends on relation size
+ * estimates that were set up above.
+ *
+ * Then convert tuple_fraction to fractional form if it is absolute,
+ * and adjust it based on the knowledge that grouping_planner will be
+ * doing grouping or aggregation work with our result.
+ *
+ * This introduces some undesirable coupling between this code and
+ * grouping_planner, but the alternatives seem even uglier; we couldn't
+ * pass back completed paths without making these decisions here.
*/
- if (tuple_fraction >= 1.0)
- tuple_fraction /= final_rel->rows;
+ if (parse->groupClause)
+ {
+ List *groupExprs;
+
+ groupExprs = get_sortgrouplist_exprs(parse->groupClause,
+ parse->targetList);
+ *num_groups = estimate_num_groups(root,
+ groupExprs,
+ final_rel->rows);
+
+ /*
+ * In GROUP BY mode, an absolute LIMIT is relative to the number
+ * of groups not the number of tuples. If the caller gave us
+ * a fraction, keep it as-is. (In both cases, we are effectively
+ * assuming that all the groups are about the same size.)
+ */
+ if (tuple_fraction >= 1.0)
+ tuple_fraction /= *num_groups;
+
+ /*
+ * If both GROUP BY and ORDER BY are specified, we will need two
+ * levels of sort --- and, therefore, certainly need to read all
+ * the tuples --- unless ORDER BY is a subset of GROUP BY.
+ */
+ if (parse->groupClause && parse->sortClause &&
+ !pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys))
+ tuple_fraction = 0.0;
+ }
+ else if (parse->hasAggs || root->hasHavingQual)
+ {
+ /*
+ * Ungrouped aggregate will certainly want to read all the tuples,
+ * and it will deliver a single result row (so leave *num_groups 1).
+ */
+ tuple_fraction = 0.0;
+ }
+ else if (parse->distinctClause)
+ {
+ /*
+ * Since there was no grouping or aggregation, it's reasonable to
+ * assume the UNIQUE filter has effects comparable to GROUP BY.
+ * Return the estimated number of output rows for use by caller.
+ * (If DISTINCT is used with grouping, we ignore its effects for
+ * rowcount estimation purposes; this amounts to assuming the grouped
+ * rows are distinct already.)
+ */
+ List *distinctExprs;
+
+ distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
+ parse->targetList);
+ *num_groups = estimate_num_groups(root,
+ distinctExprs,
+ final_rel->rows);
+
+ /*
+ * Adjust tuple_fraction the same way as for GROUP BY, too.
+ */
+ if (tuple_fraction >= 1.0)
+ tuple_fraction /= *num_groups;
+ }
+ else
+ {
+ /*
+ * Plain non-grouped, non-aggregated query: an absolute tuple
+ * fraction can be divided by the number of tuples.
+ */
+ if (tuple_fraction >= 1.0)
+ tuple_fraction /= final_rel->rows;
+ }
/*
* Pick out the cheapest-total path and the cheapest presorted path