aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/plan/planner.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/optimizer/plan/planner.c')
-rw-r--r--src/backend/optimizer/plan/planner.c829
1 files changed, 744 insertions, 85 deletions
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 9ba10516bb2..d3f5a140170 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -16,13 +16,16 @@
#include "postgres.h"
#include <limits.h>
+#include <math.h>
#include "access/htup_details.h"
#include "executor/executor.h"
#include "executor/nodeAgg.h"
#include "foreign/fdwapi.h"
#include "miscadmin.h"
+#include "lib/bipartite_match.h"
#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
#ifdef OPTIMIZER_DEBUG
#include "nodes/print.h"
#endif
@@ -38,6 +41,7 @@
#include "optimizer/tlist.h"
#include "parser/analyze.h"
#include "parser/parsetree.h"
+#include "parser/parse_agg.h"
#include "rewrite/rewriteManip.h"
#include "utils/rel.h"
#include "utils/selfuncs.h"
@@ -67,6 +71,7 @@ typedef struct
{
List *tlist; /* preprocessed query targetlist */
List *activeWindows; /* active windows, if any */
+ List *groupClause; /* overrides parse->groupClause */
} standard_qp_extra;
/* Local functions */
@@ -79,7 +84,9 @@ static double preprocess_limit(PlannerInfo *root,
double tuple_fraction,
int64 *offset_est, int64 *count_est);
static bool limit_needed(Query *parse);
-static void preprocess_groupclause(PlannerInfo *root);
+static List *preprocess_groupclause(PlannerInfo *root, List *force);
+static List *extract_rollup_sets(List *groupingSets);
+static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
static void standard_qp_callback(PlannerInfo *root, void *extra);
static bool choose_hashed_grouping(PlannerInfo *root,
double tuple_fraction, double limit_tuples,
@@ -115,7 +122,16 @@ static void get_column_info_for_window(PlannerInfo *root, WindowClause *wc,
int *ordNumCols,
AttrNumber **ordColIdx,
Oid **ordOperators);
-
+static Plan *build_grouping_chain(PlannerInfo *root,
+ Query *parse,
+ List *tlist,
+ bool need_sort_for_grouping,
+ List *rollup_groupclauses,
+ List *rollup_lists,
+ AttrNumber *groupColIdx,
+ AggClauseCosts *agg_costs,
+ long numGroups,
+ Plan *result_plan);
/*****************************************************************************
*
@@ -321,6 +337,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
root->append_rel_list = NIL;
root->rowMarks = NIL;
root->hasInheritedTarget = false;
+ root->grouping_map = NULL;
root->hasRecursion = hasRecursion;
if (hasRecursion)
@@ -559,7 +576,8 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
if (contain_agg_clause(havingclause) ||
contain_volatile_functions(havingclause) ||
- contain_subplans(havingclause))
+ contain_subplans(havingclause) ||
+ parse->groupingSets)
{
/* keep it in HAVING */
newHaving = lappend(newHaving, havingclause);
@@ -1248,11 +1266,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
List *sub_tlist;
AttrNumber *groupColIdx = NULL;
bool need_tlist_eval = true;
- standard_qp_extra qp_extra;
- RelOptInfo *final_rel;
- Path *cheapest_path;
- Path *sorted_path;
- Path *best_path;
long numGroups = 0;
AggClauseCosts agg_costs;
int numGroupCols;
@@ -1262,15 +1275,89 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
WindowFuncLists *wflists = NULL;
List *activeWindows = NIL;
OnConflictExpr *onconfl;
+ int maxref = 0;
+ int *tleref_to_colnum_map;
+ List *rollup_lists = NIL;
+ List *rollup_groupclauses = NIL;
+ standard_qp_extra qp_extra;
+ RelOptInfo *final_rel;
+ Path *cheapest_path;
+ Path *sorted_path;
+ Path *best_path;
MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
/* A recursive query should always have setOperations */
Assert(!root->hasRecursion);
- /* Preprocess GROUP BY clause, if any */
+ /* Preprocess Grouping set, if any */
+ if (parse->groupingSets)
+ parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
+
if (parse->groupClause)
- preprocess_groupclause(root);
+ {
+ ListCell *lc;
+
+ foreach(lc, parse->groupClause)
+ {
+ SortGroupClause *gc = lfirst(lc);
+ if (gc->tleSortGroupRef > maxref)
+ maxref = gc->tleSortGroupRef;
+ }
+ }
+
+ tleref_to_colnum_map = palloc((maxref + 1) * sizeof(int));
+
+ if (parse->groupingSets)
+ {
+ ListCell *lc;
+ ListCell *lc2;
+ ListCell *lc_set;
+ List *sets = extract_rollup_sets(parse->groupingSets);
+
+ foreach(lc_set, sets)
+ {
+ List *current_sets = reorder_grouping_sets(lfirst(lc_set),
+ (list_length(sets) == 1
+ ? parse->sortClause
+ : NIL));
+ List *groupclause = preprocess_groupclause(root, linitial(current_sets));
+ int ref = 0;
+
+ /*
+ * Now that we've pinned down an order for the groupClause for
+ * this list of grouping sets, we need to remap the entries in
+ * the grouping sets from sortgrouprefs to plain indices
+ * (0-based) into the groupClause for this collection of
+ * grouping sets.
+ */
+
+ foreach(lc, groupclause)
+ {
+ SortGroupClause *gc = lfirst(lc);
+ tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
+ }
+
+ foreach(lc, current_sets)
+ {
+ foreach(lc2, (List *) lfirst(lc))
+ {
+ lfirst_int(lc2) = tleref_to_colnum_map[lfirst_int(lc2)];
+ }
+ }
+
+ rollup_lists = lcons(current_sets, rollup_lists);
+ rollup_groupclauses = lcons(groupclause, rollup_groupclauses);
+ }
+ }
+ else
+ {
+ /* Preprocess GROUP BY clause, if any */
+ if (parse->groupClause)
+ parse->groupClause = preprocess_groupclause(root, NIL);
+ rollup_groupclauses = list_make1(parse->groupClause);
+ }
+
numGroupCols = list_length(parse->groupClause);
/* Preprocess targetlist */
@@ -1350,6 +1437,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
* grouping/aggregation operations.
*/
if (parse->groupClause ||
+ parse->groupingSets ||
parse->distinctClause ||
parse->hasAggs ||
parse->hasWindowFuncs ||
@@ -1361,6 +1449,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
/* Set up data needed by standard_qp_callback */
qp_extra.tlist = tlist;
qp_extra.activeWindows = activeWindows;
+ qp_extra.groupClause = llast(rollup_groupclauses);
/*
* Generate the best unsorted and presorted paths for this Query (but
@@ -1393,9 +1482,39 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
{
List *groupExprs;
- groupExprs = get_sortgrouplist_exprs(parse->groupClause,
- parse->targetList);
- dNumGroups = estimate_num_groups(root, groupExprs, path_rows);
+ if (parse->groupingSets)
+ {
+ ListCell *lc,
+ *lc2;
+
+ dNumGroups = 0;
+
+ forboth(lc, rollup_groupclauses, lc2, rollup_lists)
+ {
+ ListCell *lc3;
+
+ groupExprs = get_sortgrouplist_exprs(lfirst(lc),
+ parse->targetList);
+
+ foreach(lc3, lfirst(lc2))
+ {
+ List *gset = lfirst(lc3);
+
+ dNumGroups += estimate_num_groups(root,
+ groupExprs,
+ path_rows,
+ &gset);
+ }
+ }
+ }
+ else
+ {
+ groupExprs = get_sortgrouplist_exprs(parse->groupClause,
+ parse->targetList);
+
+ dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
+ NULL);
+ }
/*
* In GROUP BY mode, an absolute LIMIT is relative to the number
@@ -1407,6 +1526,13 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
tuple_fraction /= dNumGroups;
/*
+ * If there's more than one grouping set, we'll have to sort the
+ * entire input.
+ */
+ if (list_length(rollup_lists) > 1)
+ tuple_fraction = 0.0;
+
+ /*
* If both GROUP BY and ORDER BY are specified, we will need two
* levels of sort --- and, therefore, certainly need to read all
* the tuples --- unless ORDER BY is a subset of GROUP BY.
@@ -1421,14 +1547,17 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
root->group_pathkeys))
tuple_fraction = 0.0;
}
- else if (parse->hasAggs || root->hasHavingQual)
+ else if (parse->hasAggs || root->hasHavingQual || parse->groupingSets)
{
/*
* Ungrouped aggregate will certainly want to read all the tuples,
- * and it will deliver a single result row (so leave dNumGroups
- * set to 1).
+ * and it will deliver a single result row per grouping set (or 1
+ * if no grouping sets were explicitly given, in which case leave
+ * dNumGroups as-is)
*/
tuple_fraction = 0.0;
+ if (parse->groupingSets)
+ dNumGroups = list_length(parse->groupingSets);
}
else if (parse->distinctClause)
{
@@ -1443,7 +1572,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
parse->targetList);
- dNumGroups = estimate_num_groups(root, distinctExprs, path_rows);
+ dNumGroups = estimate_num_groups(root, distinctExprs, path_rows, NULL);
/*
* Adjust tuple_fraction the same way as for GROUP BY, too.
@@ -1526,13 +1655,24 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
{
/*
* If grouping, decide whether to use sorted or hashed grouping.
+ * If grouping sets are present, we can currently do only sorted
+ * grouping.
*/
- use_hashed_grouping =
- choose_hashed_grouping(root,
- tuple_fraction, limit_tuples,
- path_rows, path_width,
- cheapest_path, sorted_path,
- dNumGroups, &agg_costs);
+
+ if (parse->groupingSets)
+ {
+ use_hashed_grouping = false;
+ }
+ else
+ {
+ use_hashed_grouping =
+ choose_hashed_grouping(root,
+ tuple_fraction, limit_tuples,
+ path_rows, path_width,
+ cheapest_path, sorted_path,
+ dNumGroups, &agg_costs);
+ }
+
/* Also convert # groups to long int --- but 'ware overflow! */
numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
}
@@ -1598,7 +1738,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
/* Detect if we'll need an explicit sort for grouping */
if (parse->groupClause && !use_hashed_grouping &&
- !pathkeys_contained_in(root->group_pathkeys, current_pathkeys))
+ !pathkeys_contained_in(root->group_pathkeys, current_pathkeys))
{
need_sort_for_grouping = true;
@@ -1658,6 +1798,27 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
}
/*
+ * groupColIdx is now cast in stone, so record a mapping from
+ * tleSortGroupRef to column index. setrefs.c needs this to
+ * finalize GROUPING() operations.
+ */
+
+ if (parse->groupingSets)
+ {
+ AttrNumber *grouping_map = palloc0(sizeof(AttrNumber) * (maxref + 1));
+ ListCell *lc;
+ int i = 0;
+
+ foreach(lc, parse->groupClause)
+ {
+ SortGroupClause *gc = lfirst(lc);
+ grouping_map[gc->tleSortGroupRef] = groupColIdx[i++];
+ }
+
+ root->grouping_map = grouping_map;
+ }
+
+ /*
* Insert AGG or GROUP node if needed, plus an explicit sort step
* if necessary.
*
@@ -1673,52 +1834,43 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
&agg_costs,
numGroupCols,
groupColIdx,
- extract_grouping_ops(parse->groupClause),
+ extract_grouping_ops(parse->groupClause),
+ NIL,
numGroups,
result_plan);
/* Hashed aggregation produces randomly-ordered results */
current_pathkeys = NIL;
}
- else if (parse->hasAggs)
+ else if (parse->hasAggs || (parse->groupingSets && parse->groupClause))
{
- /* Plain aggregate plan --- sort if needed */
- AggStrategy aggstrategy;
-
- if (parse->groupClause)
- {
- if (need_sort_for_grouping)
- {
- result_plan = (Plan *)
- make_sort_from_groupcols(root,
- parse->groupClause,
- groupColIdx,
- result_plan);
- current_pathkeys = root->group_pathkeys;
- }
- aggstrategy = AGG_SORTED;
-
- /*
- * The AGG node will not change the sort ordering of its
- * groups, so current_pathkeys describes the result too.
- */
- }
+ /*
+ * Output is in sorted order by group_pathkeys if, and only if,
+ * there is a single rollup operation on a non-empty list of
+ * grouping expressions.
+ */
+ if (list_length(rollup_groupclauses) == 1
+ && list_length(linitial(rollup_groupclauses)) > 0)
+ current_pathkeys = root->group_pathkeys;
else
- {
- aggstrategy = AGG_PLAIN;
- /* Result will be only one row anyway; no sort order */
current_pathkeys = NIL;
- }
- result_plan = (Plan *) make_agg(root,
- tlist,
- (List *) parse->havingQual,
- aggstrategy,
- &agg_costs,
- numGroupCols,
- groupColIdx,
- extract_grouping_ops(parse->groupClause),
- numGroups,
- result_plan);
+ result_plan = build_grouping_chain(root,
+ parse,
+ tlist,
+ need_sort_for_grouping,
+ rollup_groupclauses,
+ rollup_lists,
+ groupColIdx,
+ &agg_costs,
+ numGroups,
+ result_plan);
+
+ /*
+ * these are destroyed by build_grouping_chain, so make sure we
+ * don't try and touch them again
+ */
+ rollup_groupclauses = NIL;
+ rollup_lists = NIL;
}
else if (parse->groupClause)
{
@@ -1749,24 +1901,45 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
result_plan);
/* The Group node won't change sort ordering */
}
- else if (root->hasHavingQual)
+ else if (root->hasHavingQual || parse->groupingSets)
{
+ int nrows = list_length(parse->groupingSets);
+
/*
- * No aggregates, and no GROUP BY, but we have a HAVING qual.
+ * No aggregates, and no GROUP BY, but we have a HAVING qual or
+ * grouping sets (which by elimination of cases above must
+ * consist solely of empty grouping sets, since otherwise
+ * groupClause will be non-empty).
+ *
* This is a degenerate case in which we are supposed to emit
- * either 0 or 1 row depending on whether HAVING succeeds.
- * Furthermore, there cannot be any variables in either HAVING
- * or the targetlist, so we actually do not need the FROM
- * table at all! We can just throw away the plan-so-far and
- * generate a Result node. This is a sufficiently unusual
- * corner case that it's not worth contorting the structure of
- * this routine to avoid having to generate the plan in the
- * first place.
+ * either 0 or 1 row for each grouping set depending on whether
+ * HAVING succeeds. Furthermore, there cannot be any variables
+ * in either HAVING or the targetlist, so we actually do not
+ * need the FROM table at all! We can just throw away the
+ * plan-so-far and generate a Result node. This is a
+ * sufficiently unusual corner case that it's not worth
+ * contorting the structure of this routine to avoid having to
+ * generate the plan in the first place.
*/
result_plan = (Plan *) make_result(root,
tlist,
parse->havingQual,
NULL);
+
+ /*
+ * Doesn't seem worthwhile writing code to cons up a
+ * generate_series or a values scan to emit multiple rows.
+ * Instead just clone the result in an Append.
+ */
+ if (nrows > 1)
+ {
+ List *plans = list_make1(result_plan);
+
+ while (--nrows > 0)
+ plans = lappend(plans, copyObject(result_plan));
+
+ result_plan = (Plan *) make_append(plans, tlist);
+ }
}
} /* end of non-minmax-aggregate case */
@@ -1932,7 +2105,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
* result was already mostly unique). If not, use the number of
* distinct-groups calculated previously.
*/
- if (parse->groupClause || root->hasHavingQual || parse->hasAggs)
+ if (parse->groupClause || parse->groupingSets || root->hasHavingQual || parse->hasAggs)
dNumDistinctRows = result_plan->plan_rows;
else
dNumDistinctRows = dNumGroups;
@@ -1973,6 +2146,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
extract_grouping_cols(parse->distinctClause,
result_plan->targetlist),
extract_grouping_ops(parse->distinctClause),
+ NIL,
numDistinctRows,
result_plan);
/* Hashed aggregation produces randomly-ordered results */
@@ -2082,6 +2256,198 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
return result_plan;
}
+
+/*
+ * Given a groupclause for a collection of grouping sets, produce the
+ * corresponding groupColIdx.
+ *
+ * root->grouping_map maps the tleSortGroupRef to the actual column position in
+ * the input tuple. So we get the ref from the entries in the groupclause and
+ * look them up there.
+ */
+static AttrNumber *
+remap_groupColIdx(PlannerInfo *root, List *groupClause)
+{
+ AttrNumber *grouping_map = root->grouping_map;
+ AttrNumber *new_grpColIdx;
+ ListCell *lc;
+ int i;
+
+ Assert(grouping_map);
+
+ new_grpColIdx = palloc0(sizeof(AttrNumber) * list_length(groupClause));
+
+ i = 0;
+ foreach(lc, groupClause)
+ {
+ SortGroupClause *clause = lfirst(lc);
+ new_grpColIdx[i++] = grouping_map[clause->tleSortGroupRef];
+ }
+
+ return new_grpColIdx;
+}
+
+/*
+ * Build Agg and Sort nodes to implement sorted grouping with one or more
+ * grouping sets. (A plain GROUP BY or just the presence of aggregates counts
+ * for this purpose as a single grouping set; the calling code is responsible
+ * for providing a non-empty rollup_groupclauses list for such cases, though
+ * rollup_lists may be null.)
+ *
+ * The last entry in rollup_groupclauses (which is the one the input is sorted
+ * on, if at all) is the one used for the returned Agg node. Any additional
+ * rollups are attached, with corresponding sort info, to subsidiary Agg and
+ * Sort nodes attached to the side of the real Agg node; these nodes don't
+ * participate in the plan directly, but they are both a convenient way to
+ * represent the required data and a convenient way to account for the costs
+ * of execution.
+ *
+ * rollup_groupclauses and rollup_lists are destroyed by this function.
+ */
+static Plan *
+build_grouping_chain(PlannerInfo *root,
+ Query *parse,
+ List *tlist,
+ bool need_sort_for_grouping,
+ List *rollup_groupclauses,
+ List *rollup_lists,
+ AttrNumber *groupColIdx,
+ AggClauseCosts *agg_costs,
+ long numGroups,
+ Plan *result_plan)
+{
+ AttrNumber *top_grpColIdx = groupColIdx;
+ List *chain = NIL;
+
+ /*
+ * Prepare the grpColIdx for the real Agg node first, because we may need
+ * it for sorting
+ */
+ if (list_length(rollup_groupclauses) > 1)
+ {
+ Assert(rollup_lists && llast(rollup_lists));
+
+ top_grpColIdx =
+ remap_groupColIdx(root, llast(rollup_groupclauses));
+ }
+
+ /*
+ * If we need a Sort operation on the input, generate that.
+ */
+ if (need_sort_for_grouping)
+ {
+ result_plan = (Plan *)
+ make_sort_from_groupcols(root,
+ llast(rollup_groupclauses),
+ top_grpColIdx,
+ result_plan);
+ }
+
+ /*
+ * Generate the side nodes that describe the other sort and group
+ * operations besides the top one.
+ */
+ while (list_length(rollup_groupclauses) > 1)
+ {
+ List *groupClause = linitial(rollup_groupclauses);
+ List *gsets = linitial(rollup_lists);
+ AttrNumber *new_grpColIdx;
+ Plan *sort_plan;
+ Plan *agg_plan;
+
+ Assert(groupClause);
+ Assert(gsets);
+
+ new_grpColIdx = remap_groupColIdx(root, groupClause);
+
+ sort_plan = (Plan *)
+ make_sort_from_groupcols(root,
+ groupClause,
+ new_grpColIdx,
+ result_plan);
+
+ /*
+ * sort_plan includes the cost of result_plan over again, which is not
+ * what we want (since it's not actually running that plan). So correct
+ * the cost figures.
+ */
+
+ sort_plan->startup_cost -= result_plan->total_cost;
+ sort_plan->total_cost -= result_plan->total_cost;
+
+ agg_plan = (Plan *) make_agg(root,
+ tlist,
+ (List *) parse->havingQual,
+ AGG_SORTED,
+ agg_costs,
+ list_length(linitial(gsets)),
+ new_grpColIdx,
+ extract_grouping_ops(groupClause),
+ gsets,
+ numGroups,
+ sort_plan);
+
+ sort_plan->lefttree = NULL;
+
+ chain = lappend(chain, agg_plan);
+
+ if (rollup_lists)
+ rollup_lists = list_delete_first(rollup_lists);
+
+ rollup_groupclauses = list_delete_first(rollup_groupclauses);
+ }
+
+ /*
+ * Now make the final Agg node
+ */
+ {
+ List *groupClause = linitial(rollup_groupclauses);
+ List *gsets = rollup_lists ? linitial(rollup_lists) : NIL;
+ int numGroupCols;
+ ListCell *lc;
+
+ if (gsets)
+ numGroupCols = list_length(linitial(gsets));
+ else
+ numGroupCols = list_length(parse->groupClause);
+
+ result_plan = (Plan *) make_agg(root,
+ tlist,
+ (List *) parse->havingQual,
+ (numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN,
+ agg_costs,
+ numGroupCols,
+ top_grpColIdx,
+ extract_grouping_ops(groupClause),
+ gsets,
+ numGroups,
+ result_plan);
+
+ ((Agg *) result_plan)->chain = chain;
+
+ /*
+ * Add the additional costs. But only the total costs count, since the
+ * additional sorts aren't run on startup.
+ */
+ foreach(lc, chain)
+ {
+ Plan *subplan = lfirst(lc);
+
+ result_plan->total_cost += subplan->total_cost;
+
+ /*
+ * Nuke stuff we don't need to avoid bloating debug output.
+ */
+
+ subplan->targetlist = NIL;
+ subplan->qual = NIL;
+ subplan->lefttree->targetlist = NIL;
+ }
+ }
+
+ return result_plan;
+}
+
/*
* add_tlist_costs_to_plan
*
@@ -2642,19 +3008,38 @@ limit_needed(Query *parse)
*
* Note: we need no comparable processing of the distinctClause because
* the parser already enforced that that matches ORDER BY.
+ *
+ * For grouping sets, the order of items is instead forced to agree with that
+ * of the grouping set (and items not in the grouping set are skipped). The
+ * work of sorting the order of grouping set elements to match the ORDER BY if
+ * possible is done elsewhere.
*/
-static void
-preprocess_groupclause(PlannerInfo *root)
+static List *
+preprocess_groupclause(PlannerInfo *root, List *force)
{
Query *parse = root->parse;
- List *new_groupclause;
+ List *new_groupclause = NIL;
bool partial_match;
ListCell *sl;
ListCell *gl;
+ /* For grouping sets, we need to force the ordering */
+ if (force)
+ {
+ foreach(sl, force)
+ {
+ Index ref = lfirst_int(sl);
+ SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
+
+ new_groupclause = lappend(new_groupclause, cl);
+ }
+
+ return new_groupclause;
+ }
+
/* If no ORDER BY, nothing useful to do here */
if (parse->sortClause == NIL)
- return;
+ return parse->groupClause;
/*
* Scan the ORDER BY clause and construct a list of matching GROUP BY
@@ -2662,7 +3047,6 @@ preprocess_groupclause(PlannerInfo *root)
*
* This code assumes that the sortClause contains no duplicate items.
*/
- new_groupclause = NIL;
foreach(sl, parse->sortClause)
{
SortGroupClause *sc = (SortGroupClause *) lfirst(sl);
@@ -2686,7 +3070,7 @@ preprocess_groupclause(PlannerInfo *root)
/* If no match at all, no point in reordering GROUP BY */
if (new_groupclause == NIL)
- return;
+ return parse->groupClause;
/*
* Add any remaining GROUP BY items to the new list, but only if we were
@@ -2703,15 +3087,290 @@ preprocess_groupclause(PlannerInfo *root)
if (list_member_ptr(new_groupclause, gc))
continue; /* it matched an ORDER BY item */
if (partial_match)
- return; /* give up, no common sort possible */
+ return parse->groupClause; /* give up, no common sort possible */
if (!OidIsValid(gc->sortop))
- return; /* give up, GROUP BY can't be sorted */
+ return parse->groupClause; /* give up, GROUP BY can't be sorted */
new_groupclause = lappend(new_groupclause, gc);
}
/* Success --- install the rearranged GROUP BY list */
Assert(list_length(parse->groupClause) == list_length(new_groupclause));
- parse->groupClause = new_groupclause;
+ return new_groupclause;
+}
+
+/*
+ * Extract lists of grouping sets that can be implemented using a single
+ * rollup-type aggregate pass each. Returns a list of lists of grouping sets.
+ *
+ * Input must be sorted with smallest sets first. Result has each sublist
+ * sorted with smallest sets first.
+ *
+ * We want to produce the absolute minimum possible number of lists here to
+ * avoid excess sorts. Fortunately, there is an algorithm for this; the problem
+ * of finding the minimal partition of a partially-ordered set into chains
+ * (which is what we need, taking the list of grouping sets as a poset ordered
+ * by set inclusion) can be mapped to the problem of finding the maximum
+ * cardinality matching on a bipartite graph, which is solvable in polynomial
+ * time with a worst case of no worse than O(n^2.5) and usually much
+ * better. Since our N is at most 4096, we don't need to consider fallbacks to
+ * heuristic or approximate methods. (Planning time for a 12-d cube is under
+ * half a second on my modest system even with optimization off and assertions
+ * on.)
+ */
+static List *
+extract_rollup_sets(List *groupingSets)
+{
+ int num_sets_raw = list_length(groupingSets);
+ int num_empty = 0;
+ int num_sets = 0; /* distinct sets */
+ int num_chains = 0;
+ List *result = NIL;
+ List **results;
+ List **orig_sets;
+ Bitmapset **set_masks;
+ int *chains;
+ short **adjacency;
+ short *adjacency_buf;
+ BipartiteMatchState *state;
+ int i;
+ int j;
+ int j_size;
+ ListCell *lc1 = list_head(groupingSets);
+ ListCell *lc;
+
+ /*
+ * Start by stripping out empty sets. The algorithm doesn't require this,
+ * but the planner currently needs all empty sets to be returned in the
+ * first list, so we strip them here and add them back after.
+ */
+ while (lc1 && lfirst(lc1) == NIL)
+ {
+ ++num_empty;
+ lc1 = lnext(lc1);
+ }
+
+ /* bail out now if it turns out that all we had were empty sets. */
+ if (!lc1)
+ return list_make1(groupingSets);
+
+ /*
+ * We don't strictly need to remove duplicate sets here, but if we
+ * don't, they tend to become scattered through the result, which is
+ * a bit confusing (and irritating if we ever decide to optimize them
+ * out). So we remove them here and add them back after.
+ *
+ * For each non-duplicate set, we fill in the following:
+ *
+ * orig_sets[i] = list of the original set lists
+ * set_masks[i] = bitmapset for testing inclusion
+ * adjacency[i] = array [n, v1, v2, ... vn] of adjacency indices
+ *
+ * chains[i] will be the result group this set is assigned to.
+ *
+ * We index all of these from 1 rather than 0 because it is convenient
+ * to leave 0 free for the NIL node in the graph algorithm.
+ */
+ orig_sets = palloc0((num_sets_raw + 1) * sizeof(List*));
+ set_masks = palloc0((num_sets_raw + 1) * sizeof(Bitmapset *));
+ adjacency = palloc0((num_sets_raw + 1) * sizeof(short *));
+ adjacency_buf = palloc((num_sets_raw + 1) * sizeof(short));
+
+ j_size = 0;
+ j = 0;
+ i = 1;
+
+ for_each_cell(lc, lc1)
+ {
+ List *candidate = lfirst(lc);
+ Bitmapset *candidate_set = NULL;
+ ListCell *lc2;
+ int dup_of = 0;
+
+ foreach(lc2, candidate)
+ {
+ candidate_set = bms_add_member(candidate_set, lfirst_int(lc2));
+ }
+
+ /* we can only be a dup if we're the same length as a previous set */
+ if (j_size == list_length(candidate))
+ {
+ int k;
+ for (k = j; k < i; ++k)
+ {
+ if (bms_equal(set_masks[k], candidate_set))
+ {
+ dup_of = k;
+ break;
+ }
+ }
+ }
+ else if (j_size < list_length(candidate))
+ {
+ j_size = list_length(candidate);
+ j = i;
+ }
+
+ if (dup_of > 0)
+ {
+ orig_sets[dup_of] = lappend(orig_sets[dup_of], candidate);
+ bms_free(candidate_set);
+ }
+ else
+ {
+ int k;
+ int n_adj = 0;
+
+ orig_sets[i] = list_make1(candidate);
+ set_masks[i] = candidate_set;
+
+ /* fill in adjacency list; no need to compare equal-size sets */
+
+ for (k = j - 1; k > 0; --k)
+ {
+ if (bms_is_subset(set_masks[k], candidate_set))
+ adjacency_buf[++n_adj] = k;
+ }
+
+ if (n_adj > 0)
+ {
+ adjacency_buf[0] = n_adj;
+ adjacency[i] = palloc((n_adj + 1) * sizeof(short));
+ memcpy(adjacency[i], adjacency_buf, (n_adj + 1) * sizeof(short));
+ }
+ else
+ adjacency[i] = NULL;
+
+ ++i;
+ }
+ }
+
+ num_sets = i - 1;
+
+ /*
+ * Apply the graph matching algorithm to do the work.
+ */
+ state = BipartiteMatch(num_sets, num_sets, adjacency);
+
+ /*
+ * Now, the state->pair* fields have the info we need to assign sets to
+ * chains. Two sets (u,v) belong to the same chain if pair_uv[u] = v or
+ * pair_vu[v] = u (both will be true, but we check both so that we can do
+ * it in one pass)
+ */
+ chains = palloc0((num_sets + 1) * sizeof(int));
+
+ for (i = 1; i <= num_sets; ++i)
+ {
+ int u = state->pair_vu[i];
+ int v = state->pair_uv[i];
+
+ if (u > 0 && u < i)
+ chains[i] = chains[u];
+ else if (v > 0 && v < i)
+ chains[i] = chains[v];
+ else
+ chains[i] = ++num_chains;
+ }
+
+ /* build result lists. */
+ results = palloc0((num_chains + 1) * sizeof(List*));
+
+ for (i = 1; i <= num_sets; ++i)
+ {
+ int c = chains[i];
+
+ Assert(c > 0);
+
+ results[c] = list_concat(results[c], orig_sets[i]);
+ }
+
+ /* push any empty sets back on the first list. */
+ while (num_empty-- > 0)
+ results[1] = lcons(NIL, results[1]);
+
+ /* make result list */
+ for (i = 1; i <= num_chains; ++i)
+ result = lappend(result, results[i]);
+
+ /*
+ * Free all the things.
+ *
+ * (This is over-fussy for small sets but for large sets we could have
+ * tied up a nontrivial amount of memory.)
+ */
+ BipartiteMatchFree(state);
+ pfree(results);
+ pfree(chains);
+ for (i = 1; i <= num_sets; ++i)
+ if (adjacency[i])
+ pfree(adjacency[i]);
+ pfree(adjacency);
+ pfree(adjacency_buf);
+ pfree(orig_sets);
+ for (i = 1; i <= num_sets; ++i)
+ bms_free(set_masks[i]);
+ pfree(set_masks);
+
+ return result;
+}
+
+/*
+ * Reorder the elements of a list of grouping sets such that they have correct
+ * prefix relationships.
+ *
+ * The input must be ordered with smallest sets first; the result is returned
+ * with largest sets first.
+ *
+ * If we're passed in a sortclause, we follow its order of columns to the
+ * extent possible, to minimize the chance that we add unnecessary sorts.
+ * (We're trying here to ensure that GROUPING SETS ((a,b,c),(c)) ORDER BY c,b,a
+ * gets implemented in one pass.)
+ */
+static List *
+reorder_grouping_sets(List *groupingsets, List *sortclause)
+{
+ ListCell *lc;
+ ListCell *lc2;
+ List *previous = NIL;
+ List *result = NIL;
+
+ foreach(lc, groupingsets)
+ {
+ List *candidate = lfirst(lc);
+ List *new_elems = list_difference_int(candidate, previous);
+
+ if (list_length(new_elems) > 0)
+ {
+ while (list_length(sortclause) > list_length(previous))
+ {
+ SortGroupClause *sc = list_nth(sortclause, list_length(previous));
+ int ref = sc->tleSortGroupRef;
+ if (list_member_int(new_elems, ref))
+ {
+ previous = lappend_int(previous, ref);
+ new_elems = list_delete_int(new_elems, ref);
+ }
+ else
+ {
+ /* diverged from the sortclause; give up on it */
+ sortclause = NIL;
+ break;
+ }
+ }
+
+ foreach(lc2, new_elems)
+ {
+ previous = lappend_int(previous, lfirst_int(lc2));
+ }
+ }
+
+ result = lcons(list_copy(previous), result);
+ list_free(new_elems);
+ }
+
+ list_free(previous);
+
+ return result;
}
/*
@@ -2730,11 +3389,11 @@ standard_qp_callback(PlannerInfo *root, void *extra)
* sortClause is certainly sort-able, but GROUP BY and DISTINCT might not
* be, in which case we just leave their pathkeys empty.
*/
- if (parse->groupClause &&
- grouping_is_sortable(parse->groupClause))
+ if (qp_extra->groupClause &&
+ grouping_is_sortable(qp_extra->groupClause))
root->group_pathkeys =
make_pathkeys_for_sortclauses(root,
- parse->groupClause,
+ qp_extra->groupClause,
tlist);
else
root->group_pathkeys = NIL;
@@ -3159,7 +3818,7 @@ make_subplanTargetList(PlannerInfo *root,
* If we're not grouping or aggregating, there's nothing to do here;
* query_planner should receive the unmodified target list.
*/
- if (!parse->hasAggs && !parse->groupClause && !root->hasHavingQual &&
+ if (!parse->hasAggs && !parse->groupClause && !parse->groupingSets && !root->hasHavingQual &&
!parse->hasWindowFuncs)
{
*need_tlist_eval = true;