aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/optimizer/README1
-rw-r--r--src/backend/optimizer/geqo/geqo_eval.c2
-rw-r--r--src/backend/optimizer/path/allpaths.c26
-rw-r--r--src/backend/optimizer/plan/planner.c273
-rw-r--r--src/include/nodes/relation.h2
-rw-r--r--src/include/optimizer/paths.h3
6 files changed, 154 insertions, 153 deletions
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README
index 84e60f7f6f0..3e254c8b2d8 100644
--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
@@ -998,6 +998,7 @@ considered useful for each step. Currently, we may create these types of
additional RelOptInfos during upper-level planning:
UPPERREL_SETOP result of UNION/INTERSECT/EXCEPT, if any
+UPPERREL_PARTIAL_GROUP_AGG result of partial grouping/aggregation, if any
UPPERREL_GROUP_AGG result of grouping/aggregation, if any
UPPERREL_WINDOW result of window functions, if any
UPPERREL_DISTINCT result of "SELECT DISTINCT", if any
diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c
index 57f0f594e5b..0be2a73e05b 100644
--- a/src/backend/optimizer/geqo/geqo_eval.c
+++ b/src/backend/optimizer/geqo/geqo_eval.c
@@ -268,7 +268,7 @@ merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, bool force)
generate_partitionwise_join_paths(root, joinrel);
/* Create GatherPaths for any useful partial paths for rel */
- generate_gather_paths(root, joinrel);
+ generate_gather_paths(root, joinrel, false);
/* Find and save the cheapest paths for this joinrel */
set_cheapest(joinrel);
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index f714247ebb0..1c792a00eb2 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -488,7 +488,7 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
* we'll consider gathering partial paths for the parent appendrel.)
*/
if (rel->reloptkind == RELOPT_BASEREL)
- generate_gather_paths(root, rel);
+ generate_gather_paths(root, rel, false);
/*
* Allow a plugin to editorialize on the set of Paths for this base
@@ -2444,27 +2444,42 @@ set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
* This must not be called until after we're done creating all partial paths
* for the specified relation. (Otherwise, add_partial_path might delete a
* path that some GatherPath or GatherMergePath has a reference to.)
+ *
+ * If we're generating paths for a scan or join relation, override_rows will
+ * be false, and we'll just use the relation's size estimate. When we're
+ * being called for a partially-grouped path, though, we need to override
+ * the rowcount estimate. (It's not clear that the particular value we're
+ * using here is actually best, but the underlying rel has no estimate so
+ * we must do something.)
*/
void
-generate_gather_paths(PlannerInfo *root, RelOptInfo *rel)
+generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
{
Path *cheapest_partial_path;
Path *simple_gather_path;
ListCell *lc;
+ double rows;
+ double *rowsp = NULL;
/* If there are no partial paths, there's nothing to do here. */
if (rel->partial_pathlist == NIL)
return;
+ /* Should we override the rel's rowcount estimate? */
+ if (override_rows)
+ rowsp = &rows;
+
/*
* The output of Gather is always unsorted, so there's only one partial
* path of interest: the cheapest one. That will be the one at the front
* of partial_pathlist because of the way add_partial_path works.
*/
cheapest_partial_path = linitial(rel->partial_pathlist);
+ rows =
+ cheapest_partial_path->rows * cheapest_partial_path->parallel_workers;
simple_gather_path = (Path *)
create_gather_path(root, rel, cheapest_partial_path, rel->reltarget,
- NULL, NULL);
+ NULL, rowsp);
add_path(rel, simple_gather_path);
/*
@@ -2479,8 +2494,9 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel)
if (subpath->pathkeys == NIL)
continue;
+ rows = subpath->rows * subpath->parallel_workers;
path = create_gather_merge_path(root, rel, subpath, rel->reltarget,
- subpath->pathkeys, NULL, NULL);
+ subpath->pathkeys, NULL, rowsp);
add_path(rel, &path->path);
}
}
@@ -2653,7 +2669,7 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
generate_partitionwise_join_paths(root, rel);
/* Create GatherPaths for any useful partial paths for rel */
- generate_gather_paths(root, rel);
+ generate_gather_paths(root, rel, false);
/* Find and save the cheapest paths for this rel */
set_cheapest(rel);
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 3e8cd1447cc..e8f6cc559b3 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -186,19 +186,17 @@ static PathTarget *make_sort_input_target(PlannerInfo *root,
static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
List *targets, List *targets_contain_srfs);
static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
- RelOptInfo *grouped_rel, PathTarget *target,
- PathTarget *partial_grouping_target,
+ RelOptInfo *grouped_rel,
+ PathTarget *target,
+ RelOptInfo *partially_grouped_rel,
const AggClauseCosts *agg_costs,
const AggClauseCosts *agg_final_costs,
grouping_sets_data *gd, bool can_sort, bool can_hash,
double dNumGroups, List *havingQual);
-static void add_partial_paths_to_grouping_rel(PlannerInfo *root,
+static void add_paths_to_partial_grouping_rel(PlannerInfo *root,
RelOptInfo *input_rel,
- RelOptInfo *grouped_rel,
- PathTarget *target,
- PathTarget *partial_grouping_target,
+ RelOptInfo *partial_grouped_rel,
AggClauseCosts *agg_partial_costs,
- AggClauseCosts *agg_final_costs,
grouping_sets_data *gd,
bool can_sort,
bool can_hash,
@@ -3601,6 +3599,11 @@ estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
* create_grouping_paths
*
* Build a new upperrel containing Paths for grouping and/or aggregation.
+ * Along the way, we also build an upperrel for Paths which are partially
+ * grouped and/or aggregated. A partially grouped and/or aggregated path
+ * needs a FinalizeAggregate node to complete the aggregation. Currently,
+ * the only partially grouped paths we build are also partial paths; that
+ * is, they need a Gather and then a FinalizeAggregate.
*
* input_rel: contains the source-data Paths
* target: the pathtarget for the result Paths to compute
@@ -3627,7 +3630,7 @@ create_grouping_paths(PlannerInfo *root,
Query *parse = root->parse;
Path *cheapest_path = input_rel->cheapest_total_path;
RelOptInfo *grouped_rel;
- PathTarget *partial_grouping_target = NULL;
+ RelOptInfo *partially_grouped_rel;
AggClauseCosts agg_partial_costs; /* parallel only */
AggClauseCosts agg_final_costs; /* parallel only */
double dNumGroups;
@@ -3635,26 +3638,41 @@ create_grouping_paths(PlannerInfo *root,
bool can_sort;
bool try_parallel_aggregation;
- /* For now, do all work in the (GROUP_AGG, NULL) upperrel */
+ /*
+ * For now, all aggregated paths are added to the (GROUP_AGG, NULL)
+ * upperrel. Paths that are only partially aggregated go into the
+ * (UPPERREL_PARTIAL_GROUP_AGG, NULL) upperrel.
+ */
grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
+ partially_grouped_rel = fetch_upper_rel(root, UPPERREL_PARTIAL_GROUP_AGG,
+ NULL);
/*
* If the input relation is not parallel-safe, then the grouped relation
* can't be parallel-safe, either. Otherwise, it's parallel-safe if the
- * target list and HAVING quals are parallel-safe.
+ * target list and HAVING quals are parallel-safe. The partially grouped
+ * relation obeys the same rules.
*/
if (input_rel->consider_parallel &&
is_parallel_safe(root, (Node *) target->exprs) &&
is_parallel_safe(root, (Node *) parse->havingQual))
+ {
grouped_rel->consider_parallel = true;
+ partially_grouped_rel->consider_parallel = true;
+ }
/*
- * If the input rel belongs to a single FDW, so does the grouped rel.
+ * If the input rel belongs to a single FDW, so does the grouped rel. Same
+ * for the partially_grouped_rel.
*/
grouped_rel->serverid = input_rel->serverid;
grouped_rel->userid = input_rel->userid;
grouped_rel->useridiscurrent = input_rel->useridiscurrent;
grouped_rel->fdwroutine = input_rel->fdwroutine;
+ partially_grouped_rel->serverid = input_rel->serverid;
+ partially_grouped_rel->userid = input_rel->userid;
+ partially_grouped_rel->useridiscurrent = input_rel->useridiscurrent;
+ partially_grouped_rel->fdwroutine = input_rel->fdwroutine;
/*
* Check for degenerate grouping.
@@ -3778,14 +3796,13 @@ create_grouping_paths(PlannerInfo *root,
/*
* Before generating paths for grouped_rel, we first generate any possible
- * partial paths; that way, later code can easily consider both parallel
- * and non-parallel approaches to grouping. Note that the partial paths
- * we generate here are also partially aggregated, so simply pushing a
- * Gather node on top is insufficient to create a final path, as would be
- * the case for a scan/join rel.
+ * partial paths for partially_grouped_rel; that way, later code can
+ * easily consider both parallel and non-parallel approaches to grouping.
*/
if (try_parallel_aggregation)
{
+ PathTarget *partial_grouping_target;
+
/*
* Build target list for partial aggregate paths. These paths cannot
* just emit the same tlist as regular aggregate paths, because (1) we
@@ -3794,6 +3811,7 @@ create_grouping_paths(PlannerInfo *root,
* partial mode.
*/
partial_grouping_target = make_partial_grouping_target(root, target);
+ partially_grouped_rel->reltarget = partial_grouping_target;
/*
* Collect statistics about aggregates for estimating costs of
@@ -3817,16 +3835,16 @@ create_grouping_paths(PlannerInfo *root,
&agg_final_costs);
}
- add_partial_paths_to_grouping_rel(root, input_rel, grouped_rel, target,
- partial_grouping_target,
- &agg_partial_costs, &agg_final_costs,
+ add_paths_to_partial_grouping_rel(root, input_rel,
+ partially_grouped_rel,
+ &agg_partial_costs,
gd, can_sort, can_hash,
(List *) parse->havingQual);
}
/* Build final grouping paths */
add_paths_to_grouping_rel(root, input_rel, grouped_rel, target,
- partial_grouping_target, agg_costs,
+ partially_grouped_rel, agg_costs,
&agg_final_costs, gd, can_sort, can_hash,
dNumGroups, (List *) parse->havingQual);
@@ -3854,16 +3872,6 @@ create_grouping_paths(PlannerInfo *root,
/* Now choose the best path(s) */
set_cheapest(grouped_rel);
- /*
- * We've been using the partial pathlist for the grouped relation to hold
- * partially aggregated paths, but that's actually a little bit bogus
- * because it's unsafe for later planning stages -- like ordered_rel ---
- * to get the idea that they can use these partial paths as if they didn't
- * need a FinalizeAggregate step. Zap the partial pathlist at this stage
- * so we don't get confused.
- */
- grouped_rel->partial_pathlist = NIL;
-
return grouped_rel;
}
@@ -5996,8 +6004,9 @@ get_partitioned_child_rels_for_join(PlannerInfo *root, Relids join_relids)
*/
static void
add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
- RelOptInfo *grouped_rel, PathTarget *target,
- PathTarget *partial_grouping_target,
+ RelOptInfo *grouped_rel,
+ PathTarget *target,
+ RelOptInfo *partially_grouped_rel,
const AggClauseCosts *agg_costs,
const AggClauseCosts *agg_final_costs,
grouping_sets_data *gd, bool can_sort, bool can_hash,
@@ -6079,32 +6088,27 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
}
/*
- * Now generate a complete GroupAgg Path atop of the cheapest partial
- * path. We can do this using either Gather or Gather Merge.
+ * Instead of operating directly on the input relation, we can
+ * consider finalizing a partially aggregated path.
*/
- if (grouped_rel->partial_pathlist)
+ foreach(lc, partially_grouped_rel->pathlist)
{
- Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
- double total_groups = path->rows * path->parallel_workers;
-
- path = (Path *) create_gather_path(root,
- grouped_rel,
- path,
- partial_grouping_target,
- NULL,
- &total_groups);
+ Path *path = (Path *) lfirst(lc);
/*
- * Since Gather's output is always unsorted, we'll need to sort,
- * unless there's no GROUP BY clause or a degenerate (constant)
- * one, in which case there will only be a single group.
+ * Insert a Sort node, if required. But there's no point in
+ * sorting anything but the cheapest path.
*/
- if (root->group_pathkeys)
+ if (!pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
+ {
+ if (path != partially_grouped_rel->cheapest_total_path)
+ continue;
path = (Path *) create_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
-1.0);
+ }
if (parse->hasAggs)
add_path(grouped_rel, (Path *)
@@ -6127,70 +6131,6 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
parse->groupClause,
havingQual,
dNumGroups));
-
- /*
- * The point of using Gather Merge rather than Gather is that it
- * can preserve the ordering of the input path, so there's no
- * reason to try it unless (1) it's possible to produce more than
- * one output row and (2) we want the output path to be ordered.
- */
- if (parse->groupClause != NIL && root->group_pathkeys != NIL)
- {
- foreach(lc, grouped_rel->partial_pathlist)
- {
- Path *subpath = (Path *) lfirst(lc);
- Path *gmpath;
- double total_groups;
-
- /*
- * It's useful to consider paths that are already properly
- * ordered for Gather Merge, because those don't need a
- * sort. It's also useful to consider the cheapest path,
- * because sorting it in parallel and then doing Gather
- * Merge may be better than doing an unordered Gather
- * followed by a sort. But there's no point in considering
- * non-cheapest paths that aren't already sorted
- * correctly.
- */
- if (path != subpath &&
- !pathkeys_contained_in(root->group_pathkeys,
- subpath->pathkeys))
- continue;
-
- total_groups = subpath->rows * subpath->parallel_workers;
-
- gmpath = (Path *)
- create_gather_merge_path(root,
- grouped_rel,
- subpath,
- partial_grouping_target,
- root->group_pathkeys,
- NULL,
- &total_groups);
-
- if (parse->hasAggs)
- add_path(grouped_rel, (Path *)
- create_agg_path(root,
- grouped_rel,
- gmpath,
- target,
- parse->groupClause ? AGG_SORTED : AGG_PLAIN,
- AGGSPLIT_FINAL_DESERIAL,
- parse->groupClause,
- havingQual,
- agg_final_costs,
- dNumGroups));
- else
- add_path(grouped_rel, (Path *)
- create_group_path(root,
- grouped_rel,
- gmpath,
- target,
- parse->groupClause,
- havingQual,
- dNumGroups));
- }
- }
}
}
@@ -6240,29 +6180,19 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
}
/*
- * Generate a HashAgg Path atop of the cheapest partial path. Once
- * again, we'll only do this if it looks as though the hash table
- * won't exceed work_mem.
+ * Generate a Finalize HashAgg Path atop of the cheapest partially
+ * grouped path. Once again, we'll only do this if it looks as though
+ * the hash table won't exceed work_mem.
*/
- if (grouped_rel->partial_pathlist)
+ if (partially_grouped_rel->pathlist)
{
- Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
+ Path *path = partially_grouped_rel->cheapest_total_path;
hashaggtablesize = estimate_hashagg_tablesize(path,
agg_final_costs,
dNumGroups);
if (hashaggtablesize < work_mem * 1024L)
- {
- double total_groups = path->rows * path->parallel_workers;
-
- path = (Path *) create_gather_path(root,
- grouped_rel,
- path,
- partial_grouping_target,
- NULL,
- &total_groups);
-
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
@@ -6274,25 +6204,24 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
havingQual,
agg_final_costs,
dNumGroups));
- }
}
}
}
/*
- * add_partial_paths_to_grouping_rel
+ * add_paths_to_partial_grouping_rel
*
- * Add partial paths to grouping relation. These paths are not fully
- * aggregated; a FinalizeAggregate step is still required.
+ * First, generate partially aggregated partial paths from the partial paths
+ * for the input relation, and then generate partially aggregated non-partial
+ * paths using Gather or Gather Merge. All paths for this relation -- both
+ * partial and non-partial -- have been partially aggregated but require a
+ * subsequent FinalizeAggregate step.
*/
static void
-add_partial_paths_to_grouping_rel(PlannerInfo *root,
+add_paths_to_partial_grouping_rel(PlannerInfo *root,
RelOptInfo *input_rel,
- RelOptInfo *grouped_rel,
- PathTarget *target,
- PathTarget *partial_grouping_target,
+ RelOptInfo *partially_grouped_rel,
AggClauseCosts *agg_partial_costs,
- AggClauseCosts *agg_final_costs,
grouping_sets_data *gd,
bool can_sort,
bool can_hash,
@@ -6330,17 +6259,17 @@ add_partial_paths_to_grouping_rel(PlannerInfo *root,
/* Sort the cheapest partial path, if it isn't already */
if (!is_sorted)
path = (Path *) create_sort_path(root,
- grouped_rel,
+ partially_grouped_rel,
path,
root->group_pathkeys,
-1.0);
if (parse->hasAggs)
- add_partial_path(grouped_rel, (Path *)
+ add_partial_path(partially_grouped_rel, (Path *)
create_agg_path(root,
- grouped_rel,
+ partially_grouped_rel,
path,
- partial_grouping_target,
+ partially_grouped_rel->reltarget,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_INITIAL_SERIAL,
parse->groupClause,
@@ -6348,11 +6277,11 @@ add_partial_paths_to_grouping_rel(PlannerInfo *root,
agg_partial_costs,
dNumPartialGroups));
else
- add_partial_path(grouped_rel, (Path *)
+ add_partial_path(partially_grouped_rel, (Path *)
create_group_path(root,
- grouped_rel,
+ partially_grouped_rel,
path,
- partial_grouping_target,
+ partially_grouped_rel->reltarget,
parse->groupClause,
NIL,
dNumPartialGroups));
@@ -6376,11 +6305,11 @@ add_partial_paths_to_grouping_rel(PlannerInfo *root,
*/
if (hashaggtablesize < work_mem * 1024L)
{
- add_partial_path(grouped_rel, (Path *)
+ add_partial_path(partially_grouped_rel, (Path *)
create_agg_path(root,
- grouped_rel,
+ partially_grouped_rel,
cheapest_partial_path,
- partial_grouping_target,
+ partially_grouped_rel->reltarget,
AGG_HASHED,
AGGSPLIT_INITIAL_SERIAL,
parse->groupClause,
@@ -6389,6 +6318,58 @@ add_partial_paths_to_grouping_rel(PlannerInfo *root,
dNumPartialGroups));
}
}
+
+ /*
+ * If there is an FDW that's responsible for all baserels of the query,
+ * let it consider adding partially grouped ForeignPaths.
+ */
+ if (partially_grouped_rel->fdwroutine &&
+ partially_grouped_rel->fdwroutine->GetForeignUpperPaths)
+ {
+ FdwRoutine *fdwroutine = partially_grouped_rel->fdwroutine;
+
+ fdwroutine->GetForeignUpperPaths(root,
+ UPPERREL_PARTIAL_GROUP_AGG,
+ input_rel, partially_grouped_rel);
+ }
+
+ /*
+ * Try adding Gather or Gather Merge to partial paths to produce
+ * non-partial paths.
+ */
+ generate_gather_paths(root, partially_grouped_rel, true);
+
+ /*
+ * generate_gather_paths won't consider sorting the cheapest path to match
+ * the group keys and then applying a Gather Merge node to the result;
+ * that might be a winning strategy.
+ */
+ if (!pathkeys_contained_in(root->group_pathkeys,
+ cheapest_partial_path->pathkeys))
+ {
+ Path *path;
+ double total_groups;
+
+ total_groups =
+ cheapest_partial_path->rows * cheapest_partial_path->parallel_workers;
+ path = (Path *) create_sort_path(root, partially_grouped_rel,
+ cheapest_partial_path,
+ root->group_pathkeys,
+ -1.0);
+ path = (Path *)
+ create_gather_merge_path(root,
+ partially_grouped_rel,
+ path,
+ partially_grouped_rel->reltarget,
+ root->group_pathkeys,
+ NULL,
+ &total_groups);
+
+ add_path(partially_grouped_rel, path);
+ }
+
+ /* Now choose the best path(s) */
+ set_cheapest(partially_grouped_rel);
}
/*
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index b1c63173c22..db8de2dfd0c 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -71,6 +71,8 @@ typedef struct AggClauseCosts
typedef enum UpperRelationKind
{
UPPERREL_SETOP, /* result of UNION/INTERSECT/EXCEPT, if any */
+ UPPERREL_PARTIAL_GROUP_AGG, /* result of partial grouping/aggregation, if
+ * any */
UPPERREL_GROUP_AGG, /* result of grouping/aggregation, if any */
UPPERREL_WINDOW, /* result of window functions, if any */
UPPERREL_DISTINCT, /* result of "SELECT DISTINCT", if any */
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index b2b4353eeac..94f9bb2b574 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -53,7 +53,8 @@ extern void set_dummy_rel_pathlist(RelOptInfo *rel);
extern RelOptInfo *standard_join_search(PlannerInfo *root, int levels_needed,
List *initial_rels);
-extern void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel);
+extern void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel,
+ bool override_rows);
extern int compute_parallel_worker(RelOptInfo *rel, double heap_pages,
double index_pages, int max_workers);
extern void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel,