aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/plan/planner.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/optimizer/plan/planner.c')
-rw-r--r--src/backend/optimizer/plan/planner.c861
1 files changed, 703 insertions, 158 deletions
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 90619509a2a..fa7a5f84277 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -30,6 +30,7 @@
#include "foreign/fdwapi.h"
#include "miscadmin.h"
#include "lib/bipartite_match.h"
+#include "lib/knapsack.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#ifdef OPTIMIZER_DEBUG
@@ -91,12 +92,31 @@ typedef struct
List *groupClause; /* overrides parse->groupClause */
} standard_qp_extra;
+/*
+ * Data specific to grouping sets
+ */
+
+typedef struct
+{
+ List *rollups;
+ List *hash_sets_idx;
+ double dNumHashGroups;
+ bool any_hashable;
+ Bitmapset *unsortable_refs;
+ Bitmapset *unhashable_refs;
+ List *unsortable_sets;
+ int *tleref_to_colnum_map;
+} grouping_sets_data;
+
/* Local functions */
static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
static void inheritance_planner(PlannerInfo *root);
static void grouping_planner(PlannerInfo *root, bool inheritance_update,
double tuple_fraction);
+static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root);
+static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
+ int *tleref_to_colnum_map);
static void preprocess_rowmarks(PlannerInfo *root);
static double preprocess_limit(PlannerInfo *root,
double tuple_fraction,
@@ -109,8 +129,7 @@ static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
static void standard_qp_callback(PlannerInfo *root, void *extra);
static double get_number_of_groups(PlannerInfo *root,
double path_rows,
- List *rollup_lists,
- List *rollup_groupclauses);
+ grouping_sets_data *gd);
static Size estimate_hashagg_tablesize(Path *path,
const AggClauseCosts *agg_costs,
double dNumGroups);
@@ -118,8 +137,16 @@ static RelOptInfo *create_grouping_paths(PlannerInfo *root,
RelOptInfo *input_rel,
PathTarget *target,
const AggClauseCosts *agg_costs,
- List *rollup_lists,
- List *rollup_groupclauses);
+ grouping_sets_data *gd);
+static void consider_groupingsets_paths(PlannerInfo *root,
+ RelOptInfo *grouped_rel,
+ Path *path,
+ bool is_sorted,
+ bool can_hash,
+ PathTarget *target,
+ grouping_sets_data *gd,
+ const AggClauseCosts *agg_costs,
+ double dNumGroups);
static RelOptInfo *create_window_paths(PlannerInfo *root,
RelOptInfo *input_rel,
PathTarget *input_target,
@@ -1540,8 +1567,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
AggClauseCosts agg_costs;
WindowFuncLists *wflists = NULL;
List *activeWindows = NIL;
- List *rollup_lists = NIL;
- List *rollup_groupclauses = NIL;
+ grouping_sets_data *gset_data = NULL;
standard_qp_extra qp_extra;
/* A recursive query should always have setOperations */
@@ -1550,84 +1576,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
/* Preprocess grouping sets and GROUP BY clause, if any */
if (parse->groupingSets)
{
- int *tleref_to_colnum_map;
- List *sets;
- int maxref;
- ListCell *lc;
- ListCell *lc2;
- ListCell *lc_set;
-
- parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
-
- /* Identify max SortGroupRef in groupClause, for array sizing */
- maxref = 0;
- foreach(lc, parse->groupClause)
- {
- SortGroupClause *gc = lfirst(lc);
-
- if (gc->tleSortGroupRef > maxref)
- maxref = gc->tleSortGroupRef;
- }
-
- /* Allocate workspace array for remapping */
- tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
-
- /* Examine the rollup sets */
- sets = extract_rollup_sets(parse->groupingSets);
-
- foreach(lc_set, sets)
- {
- List *current_sets = (List *) lfirst(lc_set);
- List *groupclause;
- int ref;
-
- /*
- * Reorder the current list of grouping sets into correct
- * prefix order. If only one aggregation pass is needed, try
- * to make the list match the ORDER BY clause; if more than
- * one pass is needed, we don't bother with that.
- */
- current_sets = reorder_grouping_sets(current_sets,
- (list_length(sets) == 1
- ? parse->sortClause
- : NIL));
-
- /*
- * Order the groupClause appropriately. If the first grouping
- * set is empty, this can match regular GROUP BY
- * preprocessing, otherwise we have to force the groupClause
- * to match that grouping set's order.
- */
- groupclause = preprocess_groupclause(root,
- linitial(current_sets));
-
- /*
- * Now that we've pinned down an order for the groupClause for
- * this list of grouping sets, we need to remap the entries in
- * the grouping sets from sortgrouprefs to plain indices
- * (0-based) into the groupClause for this collection of
- * grouping sets.
- */
- ref = 0;
- foreach(lc, groupclause)
- {
- SortGroupClause *gc = lfirst(lc);
-
- tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
- }
-
- foreach(lc, current_sets)
- {
- foreach(lc2, (List *) lfirst(lc))
- {
- lfirst_int(lc2) = tleref_to_colnum_map[lfirst_int(lc2)];
- }
- }
-
- /* Save the reordered sets and corresponding groupclauses */
- rollup_lists = lcons(current_sets, rollup_lists);
- rollup_groupclauses = lcons(groupclause, rollup_groupclauses);
- }
+ gset_data = preprocess_grouping_sets(root);
}
else
{
@@ -1721,8 +1670,9 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
/* Set up data needed by standard_qp_callback */
qp_extra.tlist = tlist;
qp_extra.activeWindows = activeWindows;
- qp_extra.groupClause =
- parse->groupingSets ? llast(rollup_groupclauses) : parse->groupClause;
+ qp_extra.groupClause = (gset_data
+ ? (gset_data->rollups ? ((RollupData *) linitial(gset_data->rollups))->groupClause : NIL)
+ : parse->groupClause);
/*
* Generate the best unsorted and presorted paths for the scan/join
@@ -1922,8 +1872,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
current_rel,
grouping_target,
&agg_costs,
- rollup_lists,
- rollup_groupclauses);
+ gset_data);
/* Fix things up if grouping_target contains SRFs */
if (parse->hasTargetSRFs)
adjust_paths_for_srfs(root, current_rel,
@@ -1960,7 +1909,6 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
current_rel = create_distinct_paths(root,
current_rel);
}
-
} /* end of if (setOperations) */
/*
@@ -2113,6 +2061,221 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
/* Note: currently, we leave it to callers to do set_cheapest() */
}
+/*
+ * Do preprocessing for groupingSets clause and related data. This handles the
+ * preliminary steps of expanding the grouping sets, organizing them into lists
+ * of rollups, and preparing annotations which will later be filled in with
+ * size estimates.
+ */
+static grouping_sets_data *
+preprocess_grouping_sets(PlannerInfo *root)
+{
+ Query *parse = root->parse;
+ List *sets;
+ int maxref = 0;
+ ListCell *lc;
+ ListCell *lc_set;
+ grouping_sets_data *gd = palloc0(sizeof(grouping_sets_data));
+
+ parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
+
+ gd->any_hashable = false;
+ gd->unhashable_refs = NULL;
+ gd->unsortable_refs = NULL;
+ gd->unsortable_sets = NIL;
+
+ if (parse->groupClause)
+ {
+ ListCell *lc;
+
+ foreach(lc, parse->groupClause)
+ {
+ SortGroupClause *gc = lfirst(lc);
+ Index ref = gc->tleSortGroupRef;
+
+ if (ref > maxref)
+ maxref = ref;
+
+ if (!gc->hashable)
+ gd->unhashable_refs = bms_add_member(gd->unhashable_refs, ref);
+
+ if (!OidIsValid(gc->sortop))
+ gd->unsortable_refs = bms_add_member(gd->unsortable_refs, ref);
+ }
+ }
+
+ /* Allocate workspace array for remapping */
+ gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
+
+ /*
+ * If we have any unsortable sets, we must extract them before trying to
+ * prepare rollups. Unsortable sets don't go through
+ * reorder_grouping_sets, so we must apply the GroupingSetData annotation
+ * here.
+ */
+ if (!bms_is_empty(gd->unsortable_refs))
+ {
+ List *sortable_sets = NIL;
+
+ foreach(lc, parse->groupingSets)
+ {
+ List *gset = lfirst(lc);
+
+ if (bms_overlap_list(gd->unsortable_refs, gset))
+ {
+ GroupingSetData *gs = makeNode(GroupingSetData);
+
+ gs->set = gset;
+ gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
+
+ /*
+ * We must enforce here that an unsortable set is hashable;
+ * later code assumes this. Parse analysis only checks that
+ * every individual column is either hashable or sortable.
+ *
+ * Note that passing this test doesn't guarantee we can
+ * generate a plan; there might be other showstoppers.
+ */
+ if (bms_overlap_list(gd->unhashable_refs, gset))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not implement GROUP BY"),
+ errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
+ }
+ else
+ sortable_sets = lappend(sortable_sets, gset);
+ }
+
+ if (sortable_sets)
+ sets = extract_rollup_sets(sortable_sets);
+ else
+ sets = NIL;
+ }
+ else
+ sets = extract_rollup_sets(parse->groupingSets);
+
+ foreach(lc_set, sets)
+ {
+ List *current_sets = (List *) lfirst(lc_set);
+ RollupData *rollup = makeNode(RollupData);
+ GroupingSetData *gs;
+
+ /*
+ * Reorder the current list of grouping sets into correct prefix
+ * order. If only one aggregation pass is needed, try to make the
+ * list match the ORDER BY clause; if more than one pass is needed, we
+ * don't bother with that.
+ *
+ * Note that this reorders the sets from smallest-member-first to
+ * largest-member-first, and applies the GroupingSetData annotations,
+ * though the data will be filled in later.
+ */
+ current_sets = reorder_grouping_sets(current_sets,
+ (list_length(sets) == 1
+ ? parse->sortClause
+ : NIL));
+
+ /*
+ * Get the initial (and therefore largest) grouping set.
+ */
+ gs = linitial(current_sets);
+
+ /*
+ * Order the groupClause appropriately. If the first grouping set is
+ * empty, then the groupClause must also be empty; otherwise we have
+ * to force the groupClause to match that grouping set's order.
+ *
+ * (The first grouping set can be empty even though parse->groupClause
+ * is not empty only if all non-empty grouping sets are unsortable.
+ * The groupClauses for hashed grouping sets are built later on.)
+ */
+ if (gs->set)
+ rollup->groupClause = preprocess_groupclause(root, gs->set);
+ else
+ rollup->groupClause = NIL;
+
+ /*
+ * Is it hashable? We pretend empty sets are hashable even though we
+ * actually force them not to be hashed later. But don't bother if
+ * there's nothing but empty sets (since in that case we can't hash
+ * anything).
+ */
+ if (gs->set &&
+ !bms_overlap_list(gd->unhashable_refs, gs->set))
+ {
+ rollup->hashable = true;
+ gd->any_hashable = true;
+ }
+
+ /*
+ * Now that we've pinned down an order for the groupClause for this
+ * list of grouping sets, we need to remap the entries in the grouping
+ * sets from sortgrouprefs to plain indices (0-based) into the
+ * groupClause for this collection of grouping sets. We keep the
+ * original form for later use, though.
+ */
+ rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
+ current_sets,
+ gd->tleref_to_colnum_map);
+ rollup->gsets_data = current_sets;
+
+ gd->rollups = lappend(gd->rollups, rollup);
+ }
+
+ if (gd->unsortable_sets)
+ {
+ /*
+ * We have not yet pinned down a groupclause for this, but we will
+ * need index-based lists for estimation purposes. Construct
+ * hash_sets_idx based on the entire original groupclause for now.
+ */
+ gd->hash_sets_idx = remap_to_groupclause_idx(parse->groupClause,
+ gd->unsortable_sets,
+ gd->tleref_to_colnum_map);
+ gd->any_hashable = true;
+ }
+
+ return gd;
+}
+
+/*
+ * Given a groupclause and a list of GroupingSetData, return equivalent sets
+ * (without annotation) mapped to indexes into the given groupclause.
+ */
+static List *
+remap_to_groupclause_idx(List *groupClause,
+ List *gsets,
+ int *tleref_to_colnum_map)
+{
+ int ref = 0;
+ List *result = NIL;
+ ListCell *lc;
+
+ foreach(lc, groupClause)
+ {
+ SortGroupClause *gc = lfirst(lc);
+
+ tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
+ }
+
+ foreach(lc, gsets)
+ {
+ List *set = NIL;
+ ListCell *lc2;
+ GroupingSetData *gs = lfirst(lc);
+
+ foreach(lc2, gs->set)
+ {
+ set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
+ }
+
+ result = lappend(result, set);
+ }
+
+ return result;
+}
+
+
/*
* Detect whether a plan node is a "dummy" plan created when a relation
@@ -3028,7 +3191,7 @@ extract_rollup_sets(List *groupingSets)
/*
* Reorder the elements of a list of grouping sets such that they have correct
- * prefix relationships.
+ * prefix relationships. Also inserts the GroupingSetData annotations.
*
* The input must be ordered with smallest sets first; the result is returned
* with largest sets first. Note that the result shares no list substructure
@@ -3051,6 +3214,7 @@ reorder_grouping_sets(List *groupingsets, List *sortclause)
{
List *candidate = lfirst(lc);
List *new_elems = list_difference_int(candidate, previous);
+ GroupingSetData *gs = makeNode(GroupingSetData);
if (list_length(new_elems) > 0)
{
@@ -3078,7 +3242,8 @@ reorder_grouping_sets(List *groupingsets, List *sortclause)
}
}
- result = lcons(list_copy(previous), result);
+ gs->set = list_copy(previous);
+ result = lcons(gs, result);
list_free(new_elems);
}
@@ -3173,15 +3338,16 @@ standard_qp_callback(PlannerInfo *root, void *extra)
* Estimate number of groups produced by grouping clauses (1 if not grouping)
*
* path_rows: number of output rows from scan/join step
- * rollup_lists: list of grouping sets, or NIL if not doing grouping sets
- * rollup_groupclauses: list of grouping clauses for grouping sets,
- * or NIL if not doing grouping sets
+ * gsets: grouping set data, or NULL if not doing grouping sets
+ *
+ * If doing grouping sets, we also annotate the gsets data with the estimates
+ * for each set and each individual rollup list, with a view to later
+ * determining whether some combination of them could be hashed instead.
*/
static double
get_number_of_groups(PlannerInfo *root,
double path_rows,
- List *rollup_lists,
- List *rollup_groupclauses)
+ grouping_sets_data *gd)
{
Query *parse = root->parse;
double dNumGroups;
@@ -3193,28 +3359,60 @@ get_number_of_groups(PlannerInfo *root,
if (parse->groupingSets)
{
/* Add up the estimates for each grouping set */
- ListCell *lc,
- *lc2;
+ ListCell *lc;
+ ListCell *lc2;
dNumGroups = 0;
- forboth(lc, rollup_groupclauses, lc2, rollup_lists)
+
+ foreach(lc, gd->rollups)
{
- List *groupClause = (List *) lfirst(lc);
- List *gsets = (List *) lfirst(lc2);
- ListCell *lc3;
+ RollupData *rollup = lfirst(lc);
+ ListCell *lc;
- groupExprs = get_sortgrouplist_exprs(groupClause,
+ groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
parse->targetList);
- foreach(lc3, gsets)
+ rollup->numGroups = 0.0;
+
+ forboth(lc, rollup->gsets, lc2, rollup->gsets_data)
{
- List *gset = (List *) lfirst(lc3);
+ List *gset = (List *) lfirst(lc);
+ GroupingSetData *gs = lfirst(lc2);
+ double numGroups = estimate_num_groups(root,
+ groupExprs,
+ path_rows,
+ &gset);
+
+ gs->numGroups = numGroups;
+ rollup->numGroups += numGroups;
+ }
+
+ dNumGroups += rollup->numGroups;
+ }
+
+ if (gd->hash_sets_idx)
+ {
+ ListCell *lc;
+
+ gd->dNumHashGroups = 0;
- dNumGroups += estimate_num_groups(root,
- groupExprs,
- path_rows,
- &gset);
+ groupExprs = get_sortgrouplist_exprs(parse->groupClause,
+ parse->targetList);
+
+ forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
+ {
+ List *gset = (List *) lfirst(lc);
+ GroupingSetData *gs = lfirst(lc2);
+ double numGroups = estimate_num_groups(root,
+ groupExprs,
+ path_rows,
+ &gset);
+
+ gs->numGroups = numGroups;
+ gd->dNumHashGroups += numGroups;
}
+
+ dNumGroups += gd->dNumHashGroups;
}
}
else
@@ -3250,6 +3448,11 @@ get_number_of_groups(PlannerInfo *root,
* estimate_hashagg_tablesize
* estimate the number of bytes that a hash aggregate hashtable will
* require based on the agg_costs, path width and dNumGroups.
+ *
+ * XXX this may be over-estimating the size now that hashagg knows to omit
+ * unneeded columns from the hashtable. Also for mixed-mode grouping sets,
+ * grouping columns not in the hashed set are counted here even though hashagg
+ * won't store them. Is this a problem?
*/
static Size
estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
@@ -3300,8 +3503,7 @@ create_grouping_paths(PlannerInfo *root,
RelOptInfo *input_rel,
PathTarget *target,
const AggClauseCosts *agg_costs,
- List *rollup_lists,
- List *rollup_groupclauses)
+ grouping_sets_data *gd)
{
Query *parse = root->parse;
Path *cheapest_path = input_rel->cheapest_total_path;
@@ -3410,8 +3612,7 @@ create_grouping_paths(PlannerInfo *root,
*/
dNumGroups = get_number_of_groups(root,
cheapest_path->rows,
- rollup_lists,
- rollup_groupclauses);
+ gd);
/*
* Determine whether it's possible to perform sort-based implementations
@@ -3419,15 +3620,22 @@ create_grouping_paths(PlannerInfo *root,
* grouping_is_sortable() is trivially true, and all the
* pathkeys_contained_in() tests will succeed too, so that we'll consider
* every surviving input path.)
+ *
+ * If we have grouping sets, we might be able to sort some but not all of
+ * them; in this case, we need can_sort to be true as long as we must
+ * consider any sorted-input plan.
*/
- can_sort = grouping_is_sortable(parse->groupClause);
+ can_sort = (gd && gd->rollups != NIL)
+ || grouping_is_sortable(parse->groupClause);
/*
* Determine whether we should consider hash-based implementations of
* grouping.
*
- * Hashed aggregation only applies if we're grouping. We currently can't
- * hash if there are grouping sets, though.
+ * Hashed aggregation only applies if we're grouping. If we have grouping
+ * sets, some groups might be hashable but others not; in this case we set
+ * can_hash true as long as there is nothing globally preventing us from
+ * hashing (and we should therefore consider plans with hashes).
*
* Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
* aggregates. (Doing so would imply storing *all* the input values in
@@ -3440,9 +3648,8 @@ create_grouping_paths(PlannerInfo *root,
* other gating conditions, so we want to do it last.
*/
can_hash = (parse->groupClause != NIL &&
- parse->groupingSets == NIL &&
agg_costs->numOrderedAggs == 0 &&
- grouping_is_hashable(parse->groupClause));
+ (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)));
/*
* If grouped_rel->consider_parallel is true, then paths that we generate
@@ -3508,8 +3715,7 @@ create_grouping_paths(PlannerInfo *root,
/* Estimate number of partial groups. */
dNumPartialGroups = get_number_of_groups(root,
cheapest_partial_path->rows,
- NIL,
- NIL);
+ gd);
/*
* Collect statistics about aggregates for estimating costs of
@@ -3642,20 +3848,9 @@ create_grouping_paths(PlannerInfo *root,
/* Now decide what to stick atop it */
if (parse->groupingSets)
{
- /*
- * We have grouping sets, possibly with aggregation. Make
- * a GroupingSetsPath.
- */
- add_path(grouped_rel, (Path *)
- create_groupingsets_path(root,
- grouped_rel,
- path,
- target,
- (List *) parse->havingQual,
- rollup_lists,
- rollup_groupclauses,
- agg_costs,
- dNumGroups));
+ consider_groupingsets_paths(root, grouped_rel,
+ path, true, can_hash, target,
+ gd, agg_costs, dNumGroups);
}
else if (parse->hasAggs)
{
@@ -3816,33 +4011,45 @@ create_grouping_paths(PlannerInfo *root,
if (can_hash)
{
- hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
- agg_costs,
- dNumGroups);
-
- /*
- * Provided that the estimated size of the hashtable does not exceed
- * work_mem, we'll generate a HashAgg Path, although if we were unable
- * to sort above, then we'd better generate a Path, so that we at
- * least have one.
- */
- if (hashaggtablesize < work_mem * 1024L ||
- grouped_rel->pathlist == NIL)
+ if (parse->groupingSets)
{
/*
- * We just need an Agg over the cheapest-total input path, since
- * input order won't matter.
+ * Try for a hash-only groupingsets path over unsorted input.
*/
- add_path(grouped_rel, (Path *)
- create_agg_path(root, grouped_rel,
- cheapest_path,
- target,
- AGG_HASHED,
- AGGSPLIT_SIMPLE,
- parse->groupClause,
- (List *) parse->havingQual,
- agg_costs,
- dNumGroups));
+ consider_groupingsets_paths(root, grouped_rel,
+ cheapest_path, false, true, target,
+ gd, agg_costs, dNumGroups);
+ }
+ else
+ {
+ hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
+ agg_costs,
+ dNumGroups);
+
+ /*
+ * Provided that the estimated size of the hashtable does not
+ * exceed work_mem, we'll generate a HashAgg Path, although if we
+ * were unable to sort above, then we'd better generate a Path, so
+ * that we at least have one.
+ */
+ if (hashaggtablesize < work_mem * 1024L ||
+ grouped_rel->pathlist == NIL)
+ {
+ /*
+ * We just need an Agg over the cheapest-total input path,
+ * since input order won't matter.
+ */
+ add_path(grouped_rel, (Path *)
+ create_agg_path(root, grouped_rel,
+ cheapest_path,
+ target,
+ AGG_HASHED,
+ AGGSPLIT_SIMPLE,
+ parse->groupClause,
+ (List *) parse->havingQual,
+ agg_costs,
+ dNumGroups));
+ }
}
/*
@@ -3921,6 +4128,344 @@ create_grouping_paths(PlannerInfo *root,
return grouped_rel;
}
+
+/*
+ * For a given input path, consider the possible ways of doing grouping sets on
+ * it, by combinations of hashing and sorting. This can be called multiple
+ * times, so it's important that it not scribble on input. No result is
+ * returned, but any generated paths are added to grouped_rel.
+ */
+static void
+consider_groupingsets_paths(PlannerInfo *root,
+ RelOptInfo *grouped_rel,
+ Path *path,
+ bool is_sorted,
+ bool can_hash,
+ PathTarget *target,
+ grouping_sets_data *gd,
+ const AggClauseCosts *agg_costs,
+ double dNumGroups)
+{
+ Query *parse = root->parse;
+
+ /*
+ * If we're not being offered sorted input, then only consider plans that
+ * can be done entirely by hashing.
+ *
+ * We can hash everything if it looks like it'll fit in work_mem. But if
+ * the input is actually sorted despite not being advertised as such, we
+ * prefer to make use of that in order to use less memory.
+ *
+ * If none of the grouping sets are sortable, then ignore the work_mem
+ * limit and generate a path anyway, since otherwise we'll just fail.
+ */
+ if (!is_sorted)
+ {
+ List *new_rollups = NIL;
+ RollupData *unhashed_rollup = NULL;
+ List *sets_data;
+ List *empty_sets_data = NIL;
+ List *empty_sets = NIL;
+ ListCell *lc;
+ ListCell *l_start = list_head(gd->rollups);
+ AggStrategy strat = AGG_HASHED;
+ Size hashsize;
+ double exclude_groups = 0.0;
+
+ Assert(can_hash);
+
+ if (pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
+ {
+ unhashed_rollup = lfirst(l_start);
+ exclude_groups = unhashed_rollup->numGroups;
+ l_start = lnext(l_start);
+ }
+
+ hashsize = estimate_hashagg_tablesize(path,
+ agg_costs,
+ dNumGroups - exclude_groups);
+
+ /*
+ * gd->rollups is empty if we have only unsortable columns to work
+ * with. Override work_mem in that case; otherwise, we'll rely on the
+ * sorted-input case to generate usable mixed paths.
+ */
+ if (hashsize > work_mem * 1024L && gd->rollups)
+ return; /* nope, won't fit */
+
+ /*
+ * We need to burst the existing rollups list into individual grouping
+ * sets and recompute a groupClause for each set.
+ */
+ sets_data = list_copy(gd->unsortable_sets);
+
+ for_each_cell(lc, l_start)
+ {
+ RollupData *rollup = lfirst(lc);
+
+ /*
+ * If we find an unhashable rollup that's not been skipped by the
+ * "actually sorted" check above, we can't cope; we'd need sorted
+ * input (with a different sort order) but we can't get that here.
+ * So bail out; we'll get a valid path from the is_sorted case
+ * instead.
+ *
+ * The mere presence of empty grouping sets doesn't make a rollup
+ * unhashable (see preprocess_grouping_sets), we handle those
+ * specially below.
+ */
+ if (!rollup->hashable)
+ return;
+ else
+ sets_data = list_concat(sets_data, list_copy(rollup->gsets_data));
+ }
+ foreach(lc, sets_data)
+ {
+ GroupingSetData *gs = lfirst(lc);
+ List *gset = gs->set;
+ RollupData *rollup;
+
+ if (gset == NIL)
+ {
+ /* Empty grouping sets can't be hashed. */
+ empty_sets_data = lappend(empty_sets_data, gs);
+ empty_sets = lappend(empty_sets, NIL);
+ }
+ else
+ {
+ rollup = makeNode(RollupData);
+
+ rollup->groupClause = preprocess_groupclause(root, gset);
+ rollup->gsets_data = list_make1(gs);
+ rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
+ rollup->gsets_data,
+ gd->tleref_to_colnum_map);
+ rollup->numGroups = gs->numGroups;
+ rollup->hashable = true;
+ rollup->is_hashed = true;
+ new_rollups = lappend(new_rollups, rollup);
+ }
+ }
+
+ /*
+ * If we didn't find anything nonempty to hash, then bail. We'll
+ * generate a path from the is_sorted case.
+ */
+ if (new_rollups == NIL)
+ return;
+
+ /*
+ * If there were empty grouping sets they should have been in the
+ * first rollup.
+ */
+ Assert(!unhashed_rollup || !empty_sets);
+
+ if (unhashed_rollup)
+ {
+ new_rollups = lappend(new_rollups, unhashed_rollup);
+ strat = AGG_MIXED;
+ }
+ else if (empty_sets)
+ {
+ RollupData *rollup = makeNode(RollupData);
+
+ rollup->groupClause = NIL;
+ rollup->gsets_data = empty_sets_data;
+ rollup->gsets = empty_sets;
+ rollup->numGroups = list_length(empty_sets);
+ rollup->hashable = false;
+ rollup->is_hashed = false;
+ new_rollups = lappend(new_rollups, rollup);
+ strat = AGG_MIXED;
+ }
+
+ add_path(grouped_rel, (Path *)
+ create_groupingsets_path(root,
+ grouped_rel,
+ path,
+ target,
+ (List *) parse->havingQual,
+ strat,
+ new_rollups,
+ agg_costs,
+ dNumGroups));
+ return;
+ }
+
+ /*
+ * If we have sorted input but nothing we can do with it, bail.
+ */
+ if (list_length(gd->rollups) == 0)
+ return;
+
+ /*
+ * Given sorted input, we try and make two paths: one sorted and one mixed
+ * sort/hash. (We need to try both because hashagg might be disabled, or
+ * some columns might not be sortable.)
+ *
+ * can_hash is passed in as false if some obstacle elsewhere (such as
+ * ordered aggs) means that we shouldn't consider hashing at all.
+ */
+ if (can_hash && gd->any_hashable)
+ {
+ List *rollups = NIL;
+ List *hash_sets = list_copy(gd->unsortable_sets);
+ double availspace = (work_mem * 1024.0);
+ ListCell *lc;
+
+ /*
+ * Account first for space needed for groups we can't sort at all.
+ */
+ availspace -= (double) estimate_hashagg_tablesize(path,
+ agg_costs,
+ gd->dNumHashGroups);
+
+ if (availspace > 0 && list_length(gd->rollups) > 1)
+ {
+ double scale;
+ int num_rollups = list_length(gd->rollups);
+ int k_capacity;
+ int *k_weights = palloc(num_rollups * sizeof(int));
+ Bitmapset *hash_items = NULL;
+ int i;
+
+ /*
+ * We treat this as a knapsack problem: the knapsack capacity
+ * represents work_mem, the item weights are the estimated memory
+ * usage of the hashtables needed to implement a single rollup, and
+ * we really ought to use the cost saving as the item value;
+ * however, currently the costs assigned to sort nodes don't
+ * reflect the comparison costs well, and so we treat all items as
+ * of equal value (each rollup we hash instead saves us one sort).
+ *
+ * To use the discrete knapsack, we need to scale the values to a
+ * reasonably small bounded range. We choose to allow a 5% error
+ * margin; we have no more than 4096 rollups in the worst possible
+ * case, which with a 5% error margin will require a bit over 42MB
+ * of workspace. (Anyone wanting to plan queries that complex had
+ * better have the memory for it. In more reasonable cases, with
+ * no more than a couple of dozen rollups, the memory usage will
+ * be negligible.)
+ *
+ * k_capacity is naturally bounded, but we clamp the values for
+ * scale and weight (below) to avoid overflows or underflows (or
+ * uselessly trying to use a scale factor less than 1 byte).
+ */
+ scale = Max(availspace / (20.0 * num_rollups), 1.0);
+ k_capacity = (int) floor(availspace / scale);
+
+ /*
+ * We leave the first rollup out of consideration since it's the
+ * one that matches the input sort order. We assign indexes "i"
+ * to only those entries considered for hashing; the second loop,
+ * below, must use the same condition.
+ */
+ i = 0;
+ for_each_cell(lc, lnext(list_head(gd->rollups)))
+ {
+ RollupData *rollup = lfirst(lc);
+
+ if (rollup->hashable)
+ {
+ double sz = estimate_hashagg_tablesize(path,
+ agg_costs,
+ rollup->numGroups);
+
+ /*
+ * If sz is enormous, but work_mem (and hence scale) is
+ * small, avoid integer overflow here.
+ */
+ k_weights[i] = (int) Min(floor(sz / scale),
+ k_capacity + 1.0);
+ ++i;
+ }
+ }
+
+ /*
+ * Apply knapsack algorithm; compute the set of items which
+ * maximizes the value stored (in this case the number of sorts
+ * saved) while keeping the total size (approximately) within
+ * capacity.
+ */
+ if (i > 0)
+ hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
+
+ if (!bms_is_empty(hash_items))
+ {
+ rollups = list_make1(linitial(gd->rollups));
+
+ i = 0;
+ for_each_cell(lc, lnext(list_head(gd->rollups)))
+ {
+ RollupData *rollup = lfirst(lc);
+
+ if (rollup->hashable)
+ {
+ if (bms_is_member(i, hash_items))
+ hash_sets = list_concat(hash_sets,
+ list_copy(rollup->gsets_data));
+ else
+ rollups = lappend(rollups, rollup);
+ ++i;
+ }
+ else
+ rollups = lappend(rollups, rollup);
+ }
+ }
+ }
+
+ if (!rollups && hash_sets)
+ rollups = list_copy(gd->rollups);
+
+ foreach(lc, hash_sets)
+ {
+ GroupingSetData *gs = lfirst(lc);
+ RollupData *rollup = makeNode(RollupData);
+
+ Assert(gs->set != NIL);
+
+ rollup->groupClause = preprocess_groupclause(root, gs->set);
+ rollup->gsets_data = list_make1(gs);
+ rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
+ rollup->gsets_data,
+ gd->tleref_to_colnum_map);
+ rollup->numGroups = gs->numGroups;
+ rollup->hashable = true;
+ rollup->is_hashed = true;
+ rollups = lcons(rollup, rollups);
+ }
+
+ if (rollups)
+ {
+ add_path(grouped_rel, (Path *)
+ create_groupingsets_path(root,
+ grouped_rel,
+ path,
+ target,
+ (List *) parse->havingQual,
+ AGG_MIXED,
+ rollups,
+ agg_costs,
+ dNumGroups));
+ }
+ }
+
+ /*
+ * Now try the simple sorted case.
+ */
+ if (!gd->unsortable_sets)
+ add_path(grouped_rel, (Path *)
+ create_groupingsets_path(root,
+ grouped_rel,
+ path,
+ target,
+ (List *) parse->havingQual,
+ AGG_SORTED,
+ gd->rollups,
+ agg_costs,
+ dNumGroups));
+}
+
/*
* create_window_paths
*