diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2016-03-07 15:58:22 -0500 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2016-03-07 15:58:22 -0500 |
commit | 3fc6e2d7f5b652b417fa6937c34de2438d60fa9f (patch) | |
tree | 8702775168bd6a68c44c398875fa0ef70d204128 /src/backend/optimizer/plan/planner.c | |
parent | b642e50aea1b966f3b78c49e806b4a2c5497a861 (diff) | |
download | postgresql-3fc6e2d7f5b652b417fa6937c34de2438d60fa9f.tar.gz postgresql-3fc6e2d7f5b652b417fa6937c34de2438d60fa9f.zip |
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
Diffstat (limited to 'src/backend/optimizer/plan/planner.c')
-rw-r--r-- | src/backend/optimizer/plan/planner.c | 2820 |
1 files changed, 1138 insertions, 1682 deletions
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 65b99e2af38..5fc8e5bd362 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -84,8 +84,9 @@ typedef struct /* Local functions */ static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind); static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode); -static Plan *inheritance_planner(PlannerInfo *root); -static Plan *grouping_planner(PlannerInfo *root, double tuple_fraction); +static void inheritance_planner(PlannerInfo *root); +static void grouping_planner(PlannerInfo *root, bool inheritance_update, + double tuple_fraction); static void preprocess_rowmarks(PlannerInfo *root); static double preprocess_limit(PlannerInfo *root, double tuple_fraction, @@ -96,52 +97,44 @@ static List *preprocess_groupclause(PlannerInfo *root, List *force); static List *extract_rollup_sets(List *groupingSets); static List *reorder_grouping_sets(List *groupingSets, List *sortclause); static void standard_qp_callback(PlannerInfo *root, void *extra); -static bool choose_hashed_grouping(PlannerInfo *root, - double tuple_fraction, double limit_tuples, - double path_rows, - Path *cheapest_path, Path *sorted_path, - double dNumGroups, AggClauseCosts *agg_costs); -static bool choose_hashed_distinct(PlannerInfo *root, - double tuple_fraction, double limit_tuples, - double path_rows, - Cost cheapest_startup_cost, Cost cheapest_total_cost, - int cheapest_path_width, - Cost sorted_startup_cost, Cost sorted_total_cost, - int sorted_path_width, - List *sorted_pathkeys, - double dNumDistinctRows); -static List *make_subplanTargetList(PlannerInfo *root, List *tlist, - AttrNumber **groupColIdx, bool *need_tlist_eval); +static double get_number_of_groups(PlannerInfo *root, + double path_rows, + List *rollup_lists, + List *rollup_groupclauses); +static RelOptInfo *create_grouping_paths(PlannerInfo *root, + RelOptInfo *input_rel, + PathTarget *target, + AttrNumber *groupColIdx, + List *rollup_lists, + List *rollup_groupclauses); +static RelOptInfo *create_window_paths(PlannerInfo *root, + RelOptInfo *input_rel, + List *base_tlist, + List *tlist, + WindowFuncLists *wflists, + List *activeWindows); +static void create_one_window_path(PlannerInfo *root, + RelOptInfo *window_rel, + Path *path, + List *base_tlist, + List *tlist, + WindowFuncLists *wflists, + List *activeWindows); +static RelOptInfo *create_distinct_paths(PlannerInfo *root, + RelOptInfo *input_rel); +static RelOptInfo *create_ordered_paths(PlannerInfo *root, + RelOptInfo *input_rel, + double limit_tuples); +static PathTarget *make_scanjoin_target(PlannerInfo *root, List *tlist, + AttrNumber **groupColIdx); static int get_grouping_column_index(Query *parse, TargetEntry *tle); -static void locate_grouping_columns(PlannerInfo *root, - List *tlist, - List *sub_tlist, - AttrNumber *groupColIdx); static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist); static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists); static List *make_windowInputTargetList(PlannerInfo *root, List *tlist, List *activeWindows); static List *make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc, List *tlist); -static void get_column_info_for_window(PlannerInfo *root, WindowClause *wc, - List *tlist, - int numSortCols, AttrNumber *sortColIdx, - int *partNumCols, - AttrNumber **partColIdx, - Oid **partOperators, - int *ordNumCols, - AttrNumber **ordColIdx, - Oid **ordOperators); -static Plan *build_grouping_chain(PlannerInfo *root, - Query *parse, - List *tlist, - bool need_sort_for_grouping, - List *rollup_groupclauses, - List *rollup_lists, - AttrNumber *groupColIdx, - AggClauseCosts *agg_costs, - long numGroups, - Plan *result_plan); + /***************************************************************************** * @@ -175,6 +168,8 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) PlannerGlobal *glob; double tuple_fraction; PlannerInfo *root; + RelOptInfo *final_rel; + Path *best_path; Plan *top_plan; ListCell *lp, *lr; @@ -292,8 +287,14 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) } /* primary planning entry point (may recurse for subqueries) */ - top_plan = subquery_planner(glob, parse, NULL, - false, tuple_fraction, &root); + root = subquery_planner(glob, parse, NULL, + false, tuple_fraction); + + /* Select best Path and turn it into a Plan */ + final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL); + best_path = get_cheapest_fractional_path(final_rel, tuple_fraction); + + top_plan = create_plan(root, best_path); /* * If creating a plan for a scrollable cursor, make sure it can run @@ -407,9 +408,6 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) * tuple_fraction is the fraction of tuples we expect will be retrieved. * tuple_fraction is interpreted as explained for grouping_planner, below. * - * If subroot isn't NULL, we pass back the query's final PlannerInfo struct; - * among other things this tells the output sort ordering of the plan. - * * Basically, this routine does the stuff that should only be done once * per Query object. It then calls grouping_planner. At one time, * grouping_planner could be invoked recursively on the same Query object; @@ -419,20 +417,23 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) * subquery_planner will be called recursively to handle sub-Query nodes * found within the query's expressions and rangetable. * - * Returns a query plan. + * Returns the PlannerInfo struct ("root") that contains all data generated + * while planning the subquery. In particular, the Path(s) attached to + * the (UPPERREL_FINAL, NULL) upperrel represent our conclusions about the + * cheapest way(s) to implement the query. The top level will select the + * best Path and pass it through createplan.c to produce a finished Plan. *-------------------- */ -Plan * +PlannerInfo * subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root, - bool hasRecursion, double tuple_fraction, - PlannerInfo **subroot) + bool hasRecursion, double tuple_fraction) { PlannerInfo *root; - Plan *plan; List *newWithCheckOptions; List *newHaving; bool hasOuterJoins; + RelOptInfo *final_rel; ListCell *l; /* Create a PlannerInfo data structure for this subquery */ @@ -450,15 +451,17 @@ subquery_planner(PlannerGlobal *glob, Query *parse, root->eq_classes = NIL; root->append_rel_list = NIL; root->rowMarks = NIL; - root->hasInheritedTarget = false; + memset(root->upper_rels, 0, sizeof(root->upper_rels)); + root->processed_tlist = NIL; root->grouping_map = NULL; - + root->minmax_aggs = NIL; + root->hasInheritedTarget = false; root->hasRecursion = hasRecursion; if (hasRecursion) root->wt_param_id = SS_assign_special_param(root); else root->wt_param_id = -1; - root->non_recursive_plan = NULL; + root->non_recursive_path = NULL; /* * If there is a WITH list, process each WITH query and build an initplan @@ -732,54 +735,9 @@ subquery_planner(PlannerGlobal *glob, Query *parse, */ if (parse->resultRelation && rt_fetch(parse->resultRelation, parse->rtable)->inh) - plan = inheritance_planner(root); + inheritance_planner(root); else - { - plan = grouping_planner(root, tuple_fraction); - /* If it's not SELECT, we need a ModifyTable node */ - if (parse->commandType != CMD_SELECT) - { - List *withCheckOptionLists; - List *returningLists; - List *rowMarks; - - /* - * Set up the WITH CHECK OPTION and RETURNING lists-of-lists, if - * needed. - */ - if (parse->withCheckOptions) - withCheckOptionLists = list_make1(parse->withCheckOptions); - else - withCheckOptionLists = NIL; - - if (parse->returningList) - returningLists = list_make1(parse->returningList); - else - returningLists = NIL; - - /* - * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node - * will have dealt with fetching non-locked marked rows, else we - * need to have ModifyTable do that. - */ - if (parse->rowMarks) - rowMarks = NIL; - else - rowMarks = root->rowMarks; - - plan = (Plan *) make_modifytable(root, - parse->commandType, - parse->canSetTag, - parse->resultRelation, - list_make1_int(parse->resultRelation), - list_make1(plan), - withCheckOptionLists, - returningLists, - rowMarks, - parse->onConflict, - SS_assign_special_param(root)); - } - } + grouping_planner(root, false, tuple_fraction); /* * Capture the set of outer-level param IDs we have access to, for use in @@ -788,17 +746,22 @@ subquery_planner(PlannerGlobal *glob, Query *parse, SS_identify_outer_params(root); /* - * If any initPlans were created in this query level, attach them to the - * topmost plan node for the level, and increment that node's cost to - * account for them. + * If any initPlans were created in this query level, increment the + * surviving Paths' costs to account for them. They won't actually get + * attached to the plan tree till create_plan() runs, but we want to be + * sure their costs are included now. */ - SS_attach_initplans(root, plan); + final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL); + SS_charge_for_initplans(root, final_rel); - /* Return internal info if caller wants it */ - if (subroot) - *subroot = root; + /* + * Make sure we've identified the cheapest Path for the final rel. (By + * doing this here not in grouping_planner, we include initPlan costs in + * the decision, though it's unlikely that will change anything.) + */ + set_cheapest(final_rel); - return plan; + return root; } /* @@ -944,7 +907,7 @@ preprocess_phv_expression(PlannerInfo *root, Expr *expr) /* * inheritance_planner - * Generate a plan in the case where the result relation is an + * Generate Paths in the case where the result relation is an * inheritance set. * * We have to handle this case differently from cases where a source relation @@ -955,9 +918,13 @@ preprocess_phv_expression(PlannerInfo *root, Expr *expr) * the UPDATE/DELETE target can never be the nullable side of an outer join, * so it's OK to generate the plan this way. * - * Returns a query plan. + * Returns nothing; the useful output is in the Paths we attach to + * the (UPPERREL_FINAL, NULL) upperrel stored in *root. + * + * Note that we have not done set_cheapest() on the final rel; it's convenient + * to leave this to the caller. */ -static Plan * +static void inheritance_planner(PlannerInfo *root) { Query *parse = root->parse; @@ -969,11 +936,13 @@ inheritance_planner(PlannerInfo *root) List *final_rtable = NIL; int save_rel_array_size = 0; RelOptInfo **save_rel_array = NULL; - List *subplans = NIL; + List *subpaths = NIL; + List *subroots = NIL; List *resultRelations = NIL; List *withCheckOptionLists = NIL; List *returningLists = NIL; List *rowMarks; + RelOptInfo *final_rel; ListCell *lc; Index rti; @@ -1060,8 +1029,9 @@ inheritance_planner(PlannerInfo *root) foreach(lc, root->append_rel_list) { AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc); - PlannerInfo subroot; - Plan *subplan; + PlannerInfo *subroot; + RelOptInfo *sub_final_rel; + Path *subpath; /* append_rel_list contains all append rels; ignore others */ if (appinfo->parent_relid != parentRTindex) @@ -1071,7 +1041,8 @@ inheritance_planner(PlannerInfo *root) * We need a working copy of the PlannerInfo so that we can control * propagation of information back to the main copy. */ - memcpy(&subroot, root, sizeof(PlannerInfo)); + subroot = makeNode(PlannerInfo); + memcpy(subroot, root, sizeof(PlannerInfo)); /* * Generate modified query with this rel as target. We first apply @@ -1079,7 +1050,7 @@ inheritance_planner(PlannerInfo *root) * references to the parent RTE to refer to the current child RTE, * then fool around with subquery RTEs. */ - subroot.parse = (Query *) + subroot->parse = (Query *) adjust_appendrel_attrs(root, (Node *) parse, appinfo); @@ -1090,7 +1061,7 @@ inheritance_planner(PlannerInfo *root) * executor doesn't need to see the modified copies --- we can just * pass it the original rowMarks list.) */ - subroot.rowMarks = (List *) copyObject(root->rowMarks); + subroot->rowMarks = (List *) copyObject(root->rowMarks); /* * The append_rel_list likewise might contain references to subquery @@ -1106,7 +1077,7 @@ inheritance_planner(PlannerInfo *root) { ListCell *lc2; - subroot.append_rel_list = NIL; + subroot->append_rel_list = NIL; foreach(lc2, root->append_rel_list) { AppendRelInfo *appinfo2 = (AppendRelInfo *) lfirst(lc2); @@ -1114,8 +1085,8 @@ inheritance_planner(PlannerInfo *root) if (bms_is_member(appinfo2->child_relid, modifiableARIindexes)) appinfo2 = (AppendRelInfo *) copyObject(appinfo2); - subroot.append_rel_list = lappend(subroot.append_rel_list, - appinfo2); + subroot->append_rel_list = lappend(subroot->append_rel_list, + appinfo2); } } @@ -1125,9 +1096,9 @@ inheritance_planner(PlannerInfo *root) * These won't be referenced, so there's no need to make them very * valid-looking. */ - while (list_length(subroot.parse->rtable) < list_length(final_rtable)) - subroot.parse->rtable = lappend(subroot.parse->rtable, - makeNode(RangeTblEntry)); + while (list_length(subroot->parse->rtable) < list_length(final_rtable)) + subroot->parse->rtable = lappend(subroot->parse->rtable, + makeNode(RangeTblEntry)); /* * If this isn't the first child Query, generate duplicates of all @@ -1156,15 +1127,15 @@ inheritance_planner(PlannerInfo *root) * save a few cycles by applying ChangeVarNodes before we * append the RTE to the rangetable. */ - newrti = list_length(subroot.parse->rtable) + 1; - ChangeVarNodes((Node *) subroot.parse, rti, newrti, 0); - ChangeVarNodes((Node *) subroot.rowMarks, rti, newrti, 0); + newrti = list_length(subroot->parse->rtable) + 1; + ChangeVarNodes((Node *) subroot->parse, rti, newrti, 0); + ChangeVarNodes((Node *) subroot->rowMarks, rti, newrti, 0); /* Skip processing unchanging parts of append_rel_list */ if (modifiableARIindexes != NULL) { ListCell *lc2; - foreach(lc2, subroot.append_rel_list) + foreach(lc2, subroot->append_rel_list) { AppendRelInfo *appinfo2 = (AppendRelInfo *) lfirst(lc2); @@ -1175,28 +1146,28 @@ inheritance_planner(PlannerInfo *root) } rte = copyObject(rte); ChangeVarNodes((Node *) rte->securityQuals, rti, newrti, 0); - subroot.parse->rtable = lappend(subroot.parse->rtable, - rte); + subroot->parse->rtable = lappend(subroot->parse->rtable, + rte); } rti++; } } /* There shouldn't be any OJ info to translate, as yet */ - Assert(subroot.join_info_list == NIL); + Assert(subroot->join_info_list == NIL); /* and we haven't created PlaceHolderInfos, either */ - Assert(subroot.placeholder_list == NIL); + Assert(subroot->placeholder_list == NIL); /* hack to mark target relation as an inheritance partition */ - subroot.hasInheritedTarget = true; + subroot->hasInheritedTarget = true; - /* Generate plan */ - subplan = grouping_planner(&subroot, 0.0 /* retrieve all tuples */ ); + /* Generate Path(s) for accessing this result relation */ + grouping_planner(subroot, true, 0.0 /* retrieve all tuples */ ); /* * Planning may have modified the query result relation (if there were * security barrier quals on the result RTE). */ - appinfo->child_relid = subroot.parse->resultRelation; + appinfo->child_relid = subroot->parse->resultRelation; /* * We'll use the first child relation (even if it's excluded) as the @@ -1213,21 +1184,28 @@ inheritance_planner(PlannerInfo *root) nominalRelation = appinfo->child_relid; /* + * Select cheapest path in case there's more than one. We always run + * modification queries to conclusion, so we care only for the + * cheapest-total path. + */ + sub_final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL); + set_cheapest(sub_final_rel); + subpath = sub_final_rel->cheapest_total_path; + + /* * If this child rel was excluded by constraint exclusion, exclude it * from the result plan. */ - if (is_dummy_plan(subplan)) + if (IS_DUMMY_PATH(subpath)) continue; - subplans = lappend(subplans, subplan); - /* * If this is the first non-excluded child, its post-planning rtable * becomes the initial contents of final_rtable; otherwise, append * just its modified subquery RTEs to final_rtable. */ if (final_rtable == NIL) - final_rtable = subroot.parse->rtable; + final_rtable = subroot->parse->rtable; else { List *tmp_rtable = NIL; @@ -1244,7 +1222,7 @@ inheritance_planner(PlannerInfo *root) * When this happens, we want to use the new subqueries in the * final rtable. */ - forboth(cell1, final_rtable, cell2, subroot.parse->rtable) + forboth(cell1, final_rtable, cell2, subroot->parse->rtable) { RangeTblEntry *rte1 = (RangeTblEntry *) lfirst(cell1); RangeTblEntry *rte2 = (RangeTblEntry *) lfirst(cell2); @@ -1261,7 +1239,7 @@ inheritance_planner(PlannerInfo *root) } final_rtable = list_concat(tmp_rtable, - list_copy_tail(subroot.parse->rtable, + list_copy_tail(subroot->parse->rtable, list_length(final_rtable))); } @@ -1272,19 +1250,25 @@ inheritance_planner(PlannerInfo *root) * have to propagate forward the RelOptInfos that were already built * in previous children. */ - Assert(subroot.simple_rel_array_size >= save_rel_array_size); + Assert(subroot->simple_rel_array_size >= save_rel_array_size); for (rti = 1; rti < save_rel_array_size; rti++) { RelOptInfo *brel = save_rel_array[rti]; if (brel) - subroot.simple_rel_array[rti] = brel; + subroot->simple_rel_array[rti] = brel; } - save_rel_array_size = subroot.simple_rel_array_size; - save_rel_array = subroot.simple_rel_array; + save_rel_array_size = subroot->simple_rel_array_size; + save_rel_array = subroot->simple_rel_array; /* Make sure any initplans from this rel get into the outer list */ - root->init_plans = subroot.init_plans; + root->init_plans = subroot->init_plans; + + /* Build list of sub-paths */ + subpaths = lappend(subpaths, subpath); + + /* Build list of modified subroots, too */ + subroots = lappend(subroots, subroot); /* Build list of target-relation RT indexes */ resultRelations = lappend_int(resultRelations, appinfo->child_relid); @@ -1292,40 +1276,44 @@ inheritance_planner(PlannerInfo *root) /* Build lists of per-relation WCO and RETURNING targetlists */ if (parse->withCheckOptions) withCheckOptionLists = lappend(withCheckOptionLists, - subroot.parse->withCheckOptions); + subroot->parse->withCheckOptions); if (parse->returningList) returningLists = lappend(returningLists, - subroot.parse->returningList); + subroot->parse->returningList); Assert(!parse->onConflict); } - /* Mark result as unordered (probably unnecessary) */ - root->query_pathkeys = NIL; + /* Result path must go into outer query's FINAL upperrel */ + final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL); /* * If we managed to exclude every child rel, return a dummy plan; it * doesn't even need a ModifyTable node. */ - if (subplans == NIL) + if (subpaths == NIL) { - /* although dummy, it must have a valid tlist for executor */ - List *tlist; - - tlist = preprocess_targetlist(root, parse->targetList); - return (Plan *) make_result(root, - tlist, - (Node *) list_make1(makeBoolConst(false, - false)), - NULL); + set_dummy_rel_pathlist(final_rel); + return; } /* * Put back the final adjusted rtable into the master copy of the Query. + * (We mustn't do this if we found no non-excluded children.) */ parse->rtable = final_rtable; root->simple_rel_array_size = save_rel_array_size; root->simple_rel_array = save_rel_array; + /* Must reconstruct master's simple_rte_array, too */ + root->simple_rte_array = (RangeTblEntry **) + palloc0((list_length(final_rtable) + 1) * sizeof(RangeTblEntry *)); + rti = 1; + foreach(lc, final_rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); + + root->simple_rte_array[rti++] = rte; + } /* * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node will @@ -1337,28 +1325,35 @@ inheritance_planner(PlannerInfo *root) else rowMarks = root->rowMarks; - /* And last, tack on a ModifyTable node to do the UPDATE/DELETE work */ - return (Plan *) make_modifytable(root, + /* Create Path representing a ModifyTable to do the UPDATE/DELETE work */ + add_path(final_rel, (Path *) + create_modifytable_path(root, final_rel, parse->commandType, parse->canSetTag, nominalRelation, resultRelations, - subplans, + subpaths, + subroots, withCheckOptionLists, returningLists, rowMarks, NULL, - SS_assign_special_param(root)); + SS_assign_special_param(root))); } /*-------------------- * grouping_planner * Perform planning steps related to grouping, aggregation, etc. - * This primarily means adding top-level processing to the basic - * query plan produced by query_planner. * - * tuple_fraction is the fraction of tuples we expect will be retrieved + * This function adds all required top-level processing to the scan/join + * Path(s) produced by query_planner. + * + * If inheritance_update is true, we're being called from inheritance_planner + * and should not include a ModifyTable step in the resulting Path(s). + * (inheritance_planner will create a single ModifyTable node covering all the + * target tables.) * + * tuple_fraction is the fraction of tuples we expect will be retrieved. * tuple_fraction is interpreted as follows: * 0: expect all tuples to be retrieved (normal case) * 0 < tuple_fraction < 1: expect the given fraction of tuples available @@ -1366,23 +1361,26 @@ inheritance_planner(PlannerInfo *root) * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples * expected to be retrieved (ie, a LIMIT specification) * - * Returns a query plan. Also, root->query_pathkeys is returned as the - * actual output ordering of the plan (in pathkey format). + * Returns nothing; the useful output is in the Paths we attach to the + * (UPPERREL_FINAL, NULL) upperrel in *root. In addition, + * root->processed_tlist contains the final processed targetlist. + * + * Note that we have not done set_cheapest() on the final rel; it's convenient + * to leave this to the caller. *-------------------- */ -static Plan * -grouping_planner(PlannerInfo *root, double tuple_fraction) +static void +grouping_planner(PlannerInfo *root, bool inheritance_update, + double tuple_fraction) { Query *parse = root->parse; List *tlist = parse->targetList; int64 offset_est = 0; int64 count_est = 0; double limit_tuples = -1.0; - Plan *result_plan; - List *current_pathkeys; - double dNumGroups = 0; - bool use_hashed_distinct = false; - bool tested_hashed_distinct = false; + RelOptInfo *current_rel; + RelOptInfo *final_rel; + ListCell *lc; /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */ if (parse->limitCount || parse->limitOffset) @@ -1398,36 +1396,29 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) limit_tuples = (double) count_est + (double) offset_est; } + /* Make tuple_fraction accessible to lower-level routines */ + root->tuple_fraction = tuple_fraction; + if (parse->setOperations) { - List *set_sortclauses; - /* * If there's a top-level ORDER BY, assume we have to fetch all the * tuples. This might be too simplistic given all the hackery below * to possibly avoid the sort; but the odds of accurate estimates here - * are pretty low anyway. + * are pretty low anyway. XXX try to get rid of this in favor of + * letting plan_set_operations generate both fast-start and + * cheapest-total paths. */ if (parse->sortClause) - tuple_fraction = 0.0; + root->tuple_fraction = 0.0; /* - * Construct the plan for set operations. The result will not need - * any work except perhaps a top-level sort and/or LIMIT. Note that - * any special work for recursive unions is the responsibility of + * Construct Paths for set operations. The results will not need any + * work except perhaps a top-level sort and/or LIMIT. Note that any + * special work for recursive unions is the responsibility of * plan_set_operations. */ - result_plan = plan_set_operations(root, tuple_fraction, - &set_sortclauses); - - /* - * Calculate pathkeys representing the sort order (if any) of the set - * operation's result. We have to do this before overwriting the sort - * key information... - */ - current_pathkeys = make_pathkeys_for_sortclauses(root, - set_sortclauses, - result_plan->targetlist); + current_rel = plan_set_operations(root); /* * We should not need to call preprocess_targetlist, since we must be @@ -1438,8 +1429,13 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) */ Assert(parse->commandType == CMD_SELECT); - tlist = postprocess_setop_tlist(copyObject(result_plan->targetlist), - tlist); + tlist = root->processed_tlist; /* from plan_set_operations */ + + /* for safety, copy processed_tlist instead of modifying in-place */ + tlist = postprocess_setop_tlist(copyObject(tlist), parse->targetList); + + /* Save aside the final decorated tlist */ + root->processed_tlist = tlist; /* * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have @@ -1465,33 +1461,25 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) else { /* No set operations, do regular planning */ - long numGroups = 0; - AggClauseCosts agg_costs; - int numGroupCols; - double path_rows; - bool use_hashed_grouping = false; + PathTarget *sub_target; + AttrNumber *groupColIdx; + double tlist_rows; + List *grouping_tlist; WindowFuncLists *wflists = NULL; List *activeWindows = NIL; - OnConflictExpr *onconfl; - int maxref = 0; List *rollup_lists = NIL; List *rollup_groupclauses = NIL; standard_qp_extra qp_extra; - RelOptInfo *final_rel; - Path *cheapest_path; - Path *sorted_path; - Path *best_path; - - MemSet(&agg_costs, 0, sizeof(AggClauseCosts)); /* A recursive query should always have setOperations */ Assert(!root->hasRecursion); - /* Preprocess grouping sets, if any */ + /* Preprocess grouping sets and GROUP BY clause, if any */ if (parse->groupingSets) { int *tleref_to_colnum_map; List *sets; + int maxref; ListCell *lc; ListCell *lc2; ListCell *lc_set; @@ -1499,7 +1487,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1); /* Identify max SortGroupRef in groupClause, for array sizing */ - /* (note this value will be used again later) */ + maxref = 0; foreach(lc, parse->groupClause) { SortGroupClause *gc = lfirst(lc); @@ -1570,21 +1558,17 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) } else { - /* Preprocess GROUP BY clause, if any */ + /* Preprocess regular GROUP BY clause, if any */ if (parse->groupClause) parse->groupClause = preprocess_groupclause(root, NIL); - rollup_groupclauses = list_make1(parse->groupClause); } - numGroupCols = list_length(parse->groupClause); - /* Preprocess targetlist */ tlist = preprocess_targetlist(root, tlist); - onconfl = parse->onConflict; - if (onconfl) - onconfl->onConflictSet = - preprocess_onconflict_targetlist(onconfl->onConflictSet, + if (parse->onConflict) + parse->onConflict->onConflictSet = + preprocess_onconflict_targetlist(parse->onConflict->onConflictSet, parse->resultRelation, parse->rtable); @@ -1597,6 +1581,15 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) root->glob->hasRowSecurity = true; /* + * We are now done hacking up the query's targetlist. Most of the + * remaining planning work will be done with the PathTarget + * representation of tlists, but save aside the full representation so + * that we can transfer its decoration (resnames etc) to the topmost + * tlist of the finished Plan. + */ + root->processed_tlist = tlist; + + /* * Locate any window functions in the tlist. (We don't need to look * anywhere else, since expressions used in ORDER BY will be in there * too.) Note that they could all have been eliminated by constant @@ -1613,34 +1606,13 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) } /* - * Do aggregate preprocessing, if the query has any aggs. - * - * Note: think not that we can turn off hasAggs if we find no aggs. It - * is possible for constant-expression simplification to remove all - * explicit references to aggs, but we still have to follow the - * aggregate semantics (eg, producing only one output row). + * Preprocess MIN/MAX aggregates, if any. Note: be careful about + * adding logic between here and the query_planner() call. Anything + * that is needed in MIN/MAX-optimizable cases will have to be + * duplicated in planagg.c. */ if (parse->hasAggs) - { - /* - * Collect statistics about aggregates for estimating costs. Note: - * we do not attempt to detect duplicate aggregates here; a - * somewhat-overestimated cost is okay for our present purposes. - */ - count_agg_clauses(root, (Node *) tlist, &agg_costs); - count_agg_clauses(root, parse->havingQual, &agg_costs); - - /* - * Preprocess MIN/MAX aggregates, if any. Note: be careful about - * adding logic between here and the optimize_minmax_aggregates - * call. Anything that is needed in MIN/MAX-optimizable cases - * will have to be duplicated in planagg.c. - */ preprocess_minmax_aggregates(root, tlist); - } - - /* Make tuple_fraction accessible to lower-level routines */ - root->tuple_fraction = tuple_fraction; /* * Figure out whether there's a hard limit on the number of rows that @@ -1661,1072 +1633,255 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) /* Set up data needed by standard_qp_callback */ qp_extra.tlist = tlist; qp_extra.activeWindows = activeWindows; - qp_extra.groupClause = llast(rollup_groupclauses); + qp_extra.groupClause = + parse->groupingSets ? llast(rollup_groupclauses) : parse->groupClause; /* - * Generate the best unsorted and presorted paths for this Query (but - * note there may not be any presorted paths). We also generate (in - * standard_qp_callback) pathkey representations of the query's sort - * clause, distinct clause, etc. + * Generate the best unsorted and presorted paths for the scan/join + * portion of this Query, ie the processing represented by the + * FROM/WHERE clauses. (Note there may not be any presorted paths.) + * We also generate (in standard_qp_callback) pathkey representations + * of the query's sort clause, distinct clause, etc. */ - final_rel = query_planner(root, tlist, - standard_qp_callback, &qp_extra); + current_rel = query_planner(root, tlist, + standard_qp_callback, &qp_extra); /* - * Extract rowcount estimate for use below. If final_rel has been - * proven dummy, its rows estimate will be zero; clamp it to one to - * avoid zero-divide in subsequent calculations. + * Now determine the tlist that we want the topmost scan/join plan + * node to emit; this may be different from the final tlist if + * grouping or aggregation is needed. This is also a convenient spot + * for conversion of the tlist to PathTarget format. + * + * Note: it's desirable to not do this till after query_planner(), + * because the target width estimates can use per-Var width numbers + * that were obtained within query_planner(). */ - path_rows = clamp_row_est(final_rel->rows); + sub_target = make_scanjoin_target(root, tlist, + &groupColIdx); /* - * If there's grouping going on, estimate the number of result groups. - * We couldn't do this any earlier because it depends on relation size - * estimates that are created within query_planner(). + * Forcibly apply that tlist to all the Paths for the scan/join rel. * - * Then convert tuple_fraction to fractional form if it is absolute, - * and if grouping or aggregation is involved, adjust tuple_fraction - * to describe the fraction of the underlying un-aggregated tuples - * that will be fetched. + * In principle we should re-run set_cheapest() here to identify the + * cheapest path, but it seems unlikely that adding the same tlist + * eval costs to all the paths would change that, so we don't bother. + * Instead, just assume that the cheapest-startup and cheapest-total + * paths remain so. (There should be no parameterized paths anymore, + * so we needn't worry about updating cheapest_parameterized_paths.) */ - dNumGroups = 1; /* in case not grouping */ - - if (parse->groupClause) + foreach(lc, current_rel->pathlist) { - List *groupExprs; - - if (parse->groupingSets) + Path *subpath = (Path *) lfirst(lc); + Path *path; + + Assert(subpath->param_info == NULL); + path = apply_projection_to_path(root, current_rel, + subpath, sub_target); + /* If we had to add a Result, path is different from subpath */ + if (path != subpath) { - ListCell *lc, - *lc2; - - dNumGroups = 0; - - forboth(lc, rollup_groupclauses, lc2, rollup_lists) - { - ListCell *lc3; - - groupExprs = get_sortgrouplist_exprs(lfirst(lc), - parse->targetList); - - foreach(lc3, lfirst(lc2)) - { - List *gset = lfirst(lc3); - - dNumGroups += estimate_num_groups(root, - groupExprs, - path_rows, - &gset); - } - } + lfirst(lc) = path; + if (subpath == current_rel->cheapest_startup_path) + current_rel->cheapest_startup_path = path; + if (subpath == current_rel->cheapest_total_path) + current_rel->cheapest_total_path = path; } - else - { - groupExprs = get_sortgrouplist_exprs(parse->groupClause, - parse->targetList); - - dNumGroups = estimate_num_groups(root, groupExprs, path_rows, - NULL); - } - - /* - * In GROUP BY mode, an absolute LIMIT is relative to the number - * of groups not the number of tuples. If the caller gave us a - * fraction, keep it as-is. (In both cases, we are effectively - * assuming that all the groups are about the same size.) - */ - if (tuple_fraction >= 1.0) - tuple_fraction /= dNumGroups; - - /* - * If there's more than one grouping set, we'll have to sort the - * entire input. - */ - if (list_length(rollup_lists) > 1) - tuple_fraction = 0.0; - - /* - * If both GROUP BY and ORDER BY are specified, we will need two - * levels of sort --- and, therefore, certainly need to read all - * the tuples --- unless ORDER BY is a subset of GROUP BY. - * Likewise if we have both DISTINCT and GROUP BY, or if we have a - * window specification not compatible with the GROUP BY. - */ - if (!pathkeys_contained_in(root->sort_pathkeys, - root->group_pathkeys) || - !pathkeys_contained_in(root->distinct_pathkeys, - root->group_pathkeys) || - !pathkeys_contained_in(root->window_pathkeys, - root->group_pathkeys)) - tuple_fraction = 0.0; - } - else if (parse->hasAggs || root->hasHavingQual || parse->groupingSets) - { - /* - * Ungrouped aggregate will certainly want to read all the tuples, - * and it will deliver a single result row per grouping set (or 1 - * if no grouping sets were explicitly given, in which case leave - * dNumGroups as-is) - */ - tuple_fraction = 0.0; - if (parse->groupingSets) - dNumGroups = list_length(parse->groupingSets); - } - else if (parse->distinctClause) - { - /* - * Since there was no grouping or aggregation, it's reasonable to - * assume the UNIQUE filter has effects comparable to GROUP BY. - * (If DISTINCT is used with grouping, we ignore its effects for - * rowcount estimation purposes; this amounts to assuming the - * grouped rows are distinct already.) - */ - List *distinctExprs; - - distinctExprs = get_sortgrouplist_exprs(parse->distinctClause, - parse->targetList); - dNumGroups = estimate_num_groups(root, distinctExprs, path_rows, NULL); - - /* - * Adjust tuple_fraction the same way as for GROUP BY, too. - */ - if (tuple_fraction >= 1.0) - tuple_fraction /= dNumGroups; - } - else - { - /* - * Plain non-grouped, non-aggregated query: an absolute tuple - * fraction can be divided by the number of tuples. - */ - if (tuple_fraction >= 1.0) - tuple_fraction /= path_rows; } /* - * Pick out the cheapest-total path as well as the cheapest presorted - * path for the requested pathkeys (if there is one). We should take - * the tuple fraction into account when selecting the cheapest - * presorted path, but not when selecting the cheapest-total path, - * since if we have to sort then we'll have to fetch all the tuples. - * (But there's a special case: if query_pathkeys is NIL, meaning - * order doesn't matter, then the "cheapest presorted" path will be - * the cheapest overall for the tuple fraction.) + * Determine the tlist we need grouping paths to emit. While we could + * skip this if we're not going to call create_grouping_paths, it's + * trivial unless we've got window functions, and then we have to do + * the work anyway. (XXX: refactor to work with PathTargets instead + * of tlists) */ - cheapest_path = final_rel->cheapest_total_path; - - sorted_path = - get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist, - root->query_pathkeys, - NULL, - tuple_fraction); - - /* Don't consider same path in both guises; just wastes effort */ - if (sorted_path == cheapest_path) - sorted_path = NULL; + if (activeWindows) + grouping_tlist = make_windowInputTargetList(root, + tlist, + activeWindows); + else + grouping_tlist = tlist; /* - * Forget about the presorted path if it would be cheaper to sort the - * cheapest-total path. Here we need consider only the behavior at - * the tuple_fraction point. Also, limit_tuples is only relevant if - * not grouping/aggregating, so use root->limit_tuples in the - * cost_sort call. + * If we have grouping and/or aggregation, consider ways to implement + * that. We build a new upperrel representing the output of this + * phase. */ - if (sorted_path) + if (parse->groupClause || parse->groupingSets || parse->hasAggs || + root->hasHavingQual) { - Path sort_path; /* dummy for result of cost_sort */ - - if (root->query_pathkeys == NIL || - pathkeys_contained_in(root->query_pathkeys, - cheapest_path->pathkeys)) - { - /* No sort needed for cheapest path */ - sort_path.startup_cost = cheapest_path->startup_cost; - sort_path.total_cost = cheapest_path->total_cost; - } - else - { - /* Figure cost for sorting */ - cost_sort(&sort_path, root, root->query_pathkeys, - cheapest_path->total_cost, - path_rows, cheapest_path->pathtarget->width, - 0.0, work_mem, root->limit_tuples); - } - - if (compare_fractional_path_costs(sorted_path, &sort_path, - tuple_fraction) > 0) - { - /* Presorted path is a loser */ - sorted_path = NULL; - } + current_rel = create_grouping_paths(root, + current_rel, + create_pathtarget(root, + grouping_tlist), + groupColIdx, + rollup_lists, + rollup_groupclauses); } /* - * Consider whether we want to use hashing instead of sorting. + * If we have window functions, consider ways to implement those. We + * build a new upperrel representing the output of this phase. */ - if (parse->groupClause) - { - /* - * If grouping, decide whether to use sorted or hashed grouping. - * If grouping sets are present, we can currently do only sorted - * grouping. - */ - - if (parse->groupingSets) - { - use_hashed_grouping = false; - } - else - { - use_hashed_grouping = - choose_hashed_grouping(root, - tuple_fraction, limit_tuples, - path_rows, - cheapest_path, sorted_path, - dNumGroups, &agg_costs); - } - - /* Also convert # groups to long int --- but 'ware overflow! */ - numGroups = (long) Min(dNumGroups, (double) LONG_MAX); - } - else if (parse->distinctClause && sorted_path && - !root->hasHavingQual && !parse->hasAggs && !activeWindows) + if (activeWindows) { - /* - * We'll reach the DISTINCT stage without any intermediate - * processing, so figure out whether we will want to hash or not - * so we can choose whether to use cheapest or sorted path. - */ - use_hashed_distinct = - choose_hashed_distinct(root, - tuple_fraction, limit_tuples, - path_rows, - cheapest_path->startup_cost, - cheapest_path->total_cost, - cheapest_path->pathtarget->width, - sorted_path->startup_cost, - sorted_path->total_cost, - sorted_path->pathtarget->width, - sorted_path->pathkeys, - dNumGroups); - tested_hashed_distinct = true; + current_rel = create_window_paths(root, + current_rel, + grouping_tlist, + tlist, + wflists, + activeWindows); } /* - * Select the best path. If we are doing hashed grouping, we will - * always read all the input tuples, so use the cheapest-total path. - * Otherwise, the comparison above is correct. + * If there are set-returning functions in the tlist, scale up the + * assumed output rowcounts of all surviving Paths to account for + * that. This is a bit of a kluge, but it's not clear how to account + * for it in a more principled way. We definitely don't want to apply + * the multiplier more than once, which would happen if we tried to + * fold it into PathTarget accounting. And the expansion does happen + * before any explicit DISTINCT or ORDER BY processing is done. */ - if (use_hashed_grouping || use_hashed_distinct || !sorted_path) - best_path = cheapest_path; - else - best_path = sorted_path; - - /* - * Check to see if it's possible to optimize MIN/MAX aggregates. If - * so, we will forget all the work we did so far to choose a "regular" - * path ... but we had to do it anyway to be able to tell which way is - * cheaper. - */ - result_plan = optimize_minmax_aggregates(root, - tlist, - &agg_costs, - best_path); - if (result_plan != NULL) - { - /* - * optimize_minmax_aggregates generated the full plan, with the - * right tlist, and it has no sort order. - */ - current_pathkeys = NIL; - } - else + tlist_rows = tlist_returns_set_rows(tlist); + if (tlist_rows > 1) { - /* - * Normal case --- create a plan according to query_planner's - * results. - */ - List *sub_tlist; - AttrNumber *groupColIdx = NULL; - bool need_tlist_eval = true; - bool need_sort_for_grouping = false; - - result_plan = create_plan(root, best_path); - current_pathkeys = best_path->pathkeys; - - /* Detect if we'll need an explicit sort for grouping */ - if (parse->groupClause && !use_hashed_grouping && - !pathkeys_contained_in(root->group_pathkeys, current_pathkeys)) - need_sort_for_grouping = true; - - /* - * Generate appropriate target list for scan/join subplan; may be - * different from tlist if grouping or aggregation is needed. - */ - sub_tlist = make_subplanTargetList(root, tlist, - &groupColIdx, - &need_tlist_eval); - - /* - * create_plan returns a plan with just a "flat" tlist of required - * Vars. Usually we need to insert the sub_tlist as the tlist of - * the top plan node. However, we can skip that if we determined - * that whatever create_plan chose to return will be good enough. - * - * If we need_sort_for_grouping, always override create_plan's - * tlist, so that we don't sort useless data from a "physical" - * tlist. - */ - if (need_tlist_eval || need_sort_for_grouping) + foreach(lc, current_rel->pathlist) { - /* - * If the top-level plan node is one that cannot do expression - * evaluation and its existing target list isn't already what - * we need, we must insert a Result node to project the - * desired tlist. - */ - if (!is_projection_capable_plan(result_plan) && - !tlist_same_exprs(sub_tlist, result_plan->targetlist)) - { - result_plan = (Plan *) make_result(root, - sub_tlist, - NULL, - result_plan); - } - else - { - /* - * Otherwise, just replace the subplan's flat tlist with - * the desired tlist. - */ - result_plan->targetlist = sub_tlist; - } + Path *path = (Path *) lfirst(lc); /* - * Also, account for the cost of evaluation of the sub_tlist. - * See comments for add_tlist_costs_to_plan() for more info. + * We assume that execution costs of the tlist as such were + * already accounted for. However, it still seems appropriate + * to charge something more for the executor's general costs + * of processing the added tuples. The cost is probably less + * than cpu_tuple_cost, though, so we arbitrarily use half of + * that. */ - add_tlist_costs_to_plan(root, result_plan, sub_tlist); - } - else - { - /* - * Since we're using create_plan's tlist and not the one - * make_subplanTargetList calculated, we have to refigure any - * grouping-column indexes make_subplanTargetList computed. - */ - locate_grouping_columns(root, tlist, result_plan->targetlist, - groupColIdx); - } + path->total_cost += path->rows * (tlist_rows - 1) * + cpu_tuple_cost / 2; - /* - * groupColIdx is now cast in stone, so record a mapping from - * tleSortGroupRef to column index. setrefs.c will need this to - * finalize GROUPING() operations. - */ - if (parse->groupingSets) - { - AttrNumber *grouping_map = palloc0(sizeof(AttrNumber) * (maxref + 1)); - ListCell *lc; - int i = 0; - - foreach(lc, parse->groupClause) - { - SortGroupClause *gc = lfirst(lc); - - grouping_map[gc->tleSortGroupRef] = groupColIdx[i++]; - } - - root->grouping_map = grouping_map; - } - - /* - * Insert AGG or GROUP node if needed, plus an explicit sort step - * if necessary. - * - * HAVING clause, if any, becomes qual of the Agg or Group node. - */ - if (use_hashed_grouping) - { - /* Hashed aggregate plan --- no sort needed */ - result_plan = (Plan *) make_agg(root, - tlist, - (List *) parse->havingQual, - AGG_HASHED, - &agg_costs, - numGroupCols, - groupColIdx, - extract_grouping_ops(parse->groupClause), - NIL, - numGroups, - false, - true, - result_plan); - /* Hashed aggregation produces randomly-ordered results */ - current_pathkeys = NIL; - } - else if (parse->hasAggs || - (parse->groupingSets && parse->groupClause)) - { - /* - * Aggregation and/or non-degenerate grouping sets. - * - * Output is in sorted order by group_pathkeys if, and only - * if, there is a single rollup operation on a non-empty list - * of grouping expressions. - */ - if (list_length(rollup_groupclauses) == 1 - && list_length(linitial(rollup_groupclauses)) > 0) - current_pathkeys = root->group_pathkeys; - else - current_pathkeys = NIL; - - result_plan = build_grouping_chain(root, - parse, - tlist, - need_sort_for_grouping, - rollup_groupclauses, - rollup_lists, - groupColIdx, - &agg_costs, - numGroups, - result_plan); + path->rows *= tlist_rows; } - else if (parse->groupClause) - { - /* - * GROUP BY without aggregation, so insert a group node (plus - * the appropriate sort node, if necessary). - * - * Add an explicit sort if we couldn't make the path come out - * the way the GROUP node needs it. - */ - if (need_sort_for_grouping) - { - result_plan = (Plan *) - make_sort_from_groupcols(root, - parse->groupClause, - groupColIdx, - result_plan); - current_pathkeys = root->group_pathkeys; - } - - result_plan = (Plan *) make_group(root, - tlist, - (List *) parse->havingQual, - numGroupCols, - groupColIdx, - extract_grouping_ops(parse->groupClause), - dNumGroups, - result_plan); - /* The Group node won't change sort ordering */ - } - else if (root->hasHavingQual || parse->groupingSets) - { - int nrows = list_length(parse->groupingSets); - - /* - * No aggregates, and no GROUP BY, but we have a HAVING qual - * or grouping sets (which by elimination of cases above must - * consist solely of empty grouping sets, since otherwise - * groupClause will be non-empty). - * - * This is a degenerate case in which we are supposed to emit - * either 0 or 1 row for each grouping set depending on - * whether HAVING succeeds. Furthermore, there cannot be any - * variables in either HAVING or the targetlist, so we - * actually do not need the FROM table at all! We can just - * throw away the plan-so-far and generate a Result node. This - * is a sufficiently unusual corner case that it's not worth - * contorting the structure of this routine to avoid having to - * generate the plan in the first place. - */ - result_plan = (Plan *) make_result(root, - tlist, - parse->havingQual, - NULL); - - /* - * Doesn't seem worthwhile writing code to cons up a - * generate_series or a values scan to emit multiple rows. - * Instead just clone the result in an Append. - */ - if (nrows > 1) - { - List *plans = list_make1(result_plan); - - while (--nrows > 0) - plans = lappend(plans, copyObject(result_plan)); - - result_plan = (Plan *) make_append(plans, tlist); - } - } - } /* end of non-minmax-aggregate case */ - - /* - * Since each window function could require a different sort order, we - * stack up a WindowAgg node for each window, with sort steps between - * them as needed. - */ - if (activeWindows) - { - List *window_tlist; - ListCell *l; - - /* - * If the top-level plan node is one that cannot do expression - * evaluation, we must insert a Result node to project the desired - * tlist. (In some cases this might not really be required, but - * it's not worth trying to avoid it. In particular, think not to - * skip adding the Result if the initial window_tlist matches the - * top-level plan node's output, because we might change the tlist - * inside the following loop.) Note that on second and subsequent - * passes through the following loop, the top-level node will be a - * WindowAgg which we know can project; so we only need to check - * once. - */ - if (!is_projection_capable_plan(result_plan)) - { - result_plan = (Plan *) make_result(root, - NIL, - NULL, - result_plan); - } - - /* - * The "base" targetlist for all steps of the windowing process is - * a flat tlist of all Vars and Aggs needed in the result. (In - * some cases we wouldn't need to propagate all of these all the - * way to the top, since they might only be needed as inputs to - * WindowFuncs. It's probably not worth trying to optimize that - * though.) We also add window partitioning and sorting - * expressions to the base tlist, to ensure they're computed only - * once at the bottom of the stack (that's critical for volatile - * functions). As we climb up the stack, we'll add outputs for - * the WindowFuncs computed at each level. - */ - window_tlist = make_windowInputTargetList(root, - tlist, - activeWindows); - - /* - * The copyObject steps here are needed to ensure that each plan - * node has a separately modifiable tlist. (XXX wouldn't a - * shallow list copy do for that?) - */ - result_plan->targetlist = (List *) copyObject(window_tlist); - - foreach(l, activeWindows) - { - WindowClause *wc = (WindowClause *) lfirst(l); - List *window_pathkeys; - int partNumCols; - AttrNumber *partColIdx; - Oid *partOperators; - int ordNumCols; - AttrNumber *ordColIdx; - Oid *ordOperators; - - window_pathkeys = make_pathkeys_for_window(root, - wc, - tlist); - - /* - * This is a bit tricky: we build a sort node even if we don't - * really have to sort. Even when no explicit sort is needed, - * we need to have suitable resjunk items added to the input - * plan's tlist for any partitioning or ordering columns that - * aren't plain Vars. (In theory, make_windowInputTargetList - * should have provided all such columns, but let's not assume - * that here.) Furthermore, this way we can use existing - * infrastructure to identify which input columns are the - * interesting ones. - */ - if (window_pathkeys) - { - Sort *sort_plan; - - sort_plan = make_sort_from_pathkeys(root, - result_plan, - window_pathkeys, - -1.0); - if (!pathkeys_contained_in(window_pathkeys, - current_pathkeys)) - { - /* we do indeed need to sort */ - result_plan = (Plan *) sort_plan; - current_pathkeys = window_pathkeys; - } - /* In either case, extract the per-column information */ - get_column_info_for_window(root, wc, tlist, - sort_plan->numCols, - sort_plan->sortColIdx, - &partNumCols, - &partColIdx, - &partOperators, - &ordNumCols, - &ordColIdx, - &ordOperators); - } - else - { - /* empty window specification, nothing to sort */ - partNumCols = 0; - partColIdx = NULL; - partOperators = NULL; - ordNumCols = 0; - ordColIdx = NULL; - ordOperators = NULL; - } - if (lnext(l)) - { - /* Add the current WindowFuncs to the running tlist */ - window_tlist = add_to_flat_tlist(window_tlist, - wflists->windowFuncs[wc->winref]); - } - else - { - /* Install the original tlist in the topmost WindowAgg */ - window_tlist = tlist; - } - - /* ... and make the WindowAgg plan node */ - result_plan = (Plan *) - make_windowagg(root, - (List *) copyObject(window_tlist), - wflists->windowFuncs[wc->winref], - wc->winref, - partNumCols, - partColIdx, - partOperators, - ordNumCols, - ordColIdx, - ordOperators, - wc->frameOptions, - wc->startOffset, - wc->endOffset, - result_plan); - } + /* There seems no need for a fresh set_cheapest comparison. */ } - } /* end of if (setOperations) */ - - /* - * If there is a DISTINCT clause, add the necessary node(s). - */ - if (parse->distinctClause) - { - double dNumDistinctRows; - long numDistinctRows; /* - * If there was grouping or aggregation, use the current number of - * rows as the estimated number of DISTINCT rows (ie, assume the - * result was already mostly unique). If not, use the number of - * distinct-groups calculated previously. + * If there is a DISTINCT clause, consider ways to implement that. We + * build a new upperrel representing the output of this phase. */ - if (parse->groupClause || parse->groupingSets || root->hasHavingQual || parse->hasAggs) - dNumDistinctRows = result_plan->plan_rows; - else - dNumDistinctRows = dNumGroups; - - /* Also convert to long int --- but 'ware overflow! */ - numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX); - - /* Choose implementation method if we didn't already */ - if (!tested_hashed_distinct) + if (parse->distinctClause) { - /* - * At this point, either hashed or sorted grouping will have to - * work from result_plan, so we pass that as both "cheapest" and - * "sorted". - */ - use_hashed_distinct = - choose_hashed_distinct(root, - tuple_fraction, limit_tuples, - result_plan->plan_rows, - result_plan->startup_cost, - result_plan->total_cost, - result_plan->plan_width, - result_plan->startup_cost, - result_plan->total_cost, - result_plan->plan_width, - current_pathkeys, - dNumDistinctRows); + current_rel = create_distinct_paths(root, + current_rel); } - if (use_hashed_distinct) - { - /* Hashed aggregate plan --- no sort needed */ - result_plan = (Plan *) make_agg(root, - result_plan->targetlist, - NIL, - AGG_HASHED, - NULL, - list_length(parse->distinctClause), - extract_grouping_cols(parse->distinctClause, - result_plan->targetlist), - extract_grouping_ops(parse->distinctClause), - NIL, - numDistinctRows, - false, - true, - result_plan); - /* Hashed aggregation produces randomly-ordered results */ - current_pathkeys = NIL; - } - else - { - /* - * Use a Unique node to implement DISTINCT. Add an explicit sort - * if we couldn't make the path come out the way the Unique node - * needs it. If we do have to sort, always sort by the more - * rigorous of DISTINCT and ORDER BY, to avoid a second sort - * below. However, for regular DISTINCT, don't sort now if we - * don't have to --- sorting afterwards will likely be cheaper, - * and also has the possibility of optimizing via LIMIT. But for - * DISTINCT ON, we *must* force the final sort now, else it won't - * have the desired behavior. - */ - List *needed_pathkeys; - - if (parse->hasDistinctOn && - list_length(root->distinct_pathkeys) < - list_length(root->sort_pathkeys)) - needed_pathkeys = root->sort_pathkeys; - else - needed_pathkeys = root->distinct_pathkeys; - - if (!pathkeys_contained_in(needed_pathkeys, current_pathkeys)) - { - if (list_length(root->distinct_pathkeys) >= - list_length(root->sort_pathkeys)) - current_pathkeys = root->distinct_pathkeys; - else - { - current_pathkeys = root->sort_pathkeys; - /* Assert checks that parser didn't mess up... */ - Assert(pathkeys_contained_in(root->distinct_pathkeys, - current_pathkeys)); - } - - result_plan = (Plan *) make_sort_from_pathkeys(root, - result_plan, - current_pathkeys, - -1.0); - } - - result_plan = (Plan *) make_unique(result_plan, - parse->distinctClause); - result_plan->plan_rows = dNumDistinctRows; - /* The Unique node won't change sort ordering */ - } - } + } /* end of if (setOperations) */ /* - * If ORDER BY was given and we were not able to make the plan come out in - * the right order, add an explicit sort step. + * If ORDER BY was given, consider ways to implement that, and generate a + * new upperrel containing only paths that emit the correct ordering. We + * can apply the original limit_tuples limit in sorting now. */ if (parse->sortClause) { - if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys)) - { - result_plan = (Plan *) make_sort_from_pathkeys(root, - result_plan, - root->sort_pathkeys, - limit_tuples); - current_pathkeys = root->sort_pathkeys; - } + current_rel = create_ordered_paths(root, + current_rel, + limit_tuples); } /* - * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node. - * (Note: we intentionally test parse->rowMarks not root->rowMarks here. - * If there are only non-locking rowmarks, they should be handled by the - * ModifyTable node instead.) + * Now we are prepared to build the final-output upperrel. Insert all + * surviving paths, with LockRows, Limit, and/or ModifyTable steps added + * if needed. */ - if (parse->rowMarks) + final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL); + + foreach(lc, current_rel->pathlist) { - result_plan = (Plan *) make_lockrows(result_plan, - root->rowMarks, - SS_assign_special_param(root)); + Path *path = (Path *) lfirst(lc); /* - * The result can no longer be assumed sorted, since locking might - * cause the sort key columns to be replaced with new values. + * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node. + * (Note: we intentionally test parse->rowMarks not root->rowMarks + * here. If there are only non-locking rowmarks, they should be + * handled by the ModifyTable node instead. However, root->rowMarks + * is what goes into the LockRows node.) */ - current_pathkeys = NIL; - } - - /* - * Finally, if there is a LIMIT/OFFSET clause, add the LIMIT node. - */ - if (limit_needed(parse)) - { - result_plan = (Plan *) make_limit(result_plan, - parse->limitOffset, - parse->limitCount, - offset_est, - count_est); - } - - /* - * Return the actual output ordering in query_pathkeys for possible use by - * an outer query level. - */ - root->query_pathkeys = current_pathkeys; - - return result_plan; -} - - -/* - * Given a groupclause for a collection of grouping sets, produce the - * corresponding groupColIdx. - * - * root->grouping_map maps the tleSortGroupRef to the actual column position in - * the input tuple. So we get the ref from the entries in the groupclause and - * look them up there. - */ -static AttrNumber * -remap_groupColIdx(PlannerInfo *root, List *groupClause) -{ - AttrNumber *grouping_map = root->grouping_map; - AttrNumber *new_grpColIdx; - ListCell *lc; - int i; - - Assert(grouping_map); - - new_grpColIdx = palloc0(sizeof(AttrNumber) * list_length(groupClause)); - - i = 0; - foreach(lc, groupClause) - { - SortGroupClause *clause = lfirst(lc); - - new_grpColIdx[i++] = grouping_map[clause->tleSortGroupRef]; - } - - return new_grpColIdx; -} - -/* - * Build Agg and Sort nodes to implement sorted grouping with one or more - * grouping sets. A plain GROUP BY or just the presence of aggregates counts - * for this purpose as a single grouping set; the calling code is responsible - * for providing a single-element rollup_groupclauses list for such cases, - * though rollup_lists may be nil. - * - * The last entry in rollup_groupclauses (which is the one the input is sorted - * on, if at all) is the one used for the returned Agg node. Any additional - * rollups are attached, with corresponding sort info, to subsidiary Agg and - * Sort nodes attached to the side of the real Agg node; these nodes don't - * participate in the plan directly, but they are both a convenient way to - * represent the required data and a convenient way to account for the costs - * of execution. - */ -static Plan * -build_grouping_chain(PlannerInfo *root, - Query *parse, - List *tlist, - bool need_sort_for_grouping, - List *rollup_groupclauses, - List *rollup_lists, - AttrNumber *groupColIdx, - AggClauseCosts *agg_costs, - long numGroups, - Plan *result_plan) -{ - AttrNumber *top_grpColIdx = groupColIdx; - List *chain = NIL; - - /* - * Prepare the grpColIdx for the real Agg node first, because we may need - * it for sorting - */ - if (parse->groupingSets) - top_grpColIdx = remap_groupColIdx(root, llast(rollup_groupclauses)); - - /* - * If we need a Sort operation on the input, generate that. - */ - if (need_sort_for_grouping) - { - result_plan = (Plan *) - make_sort_from_groupcols(root, - llast(rollup_groupclauses), - top_grpColIdx, - result_plan); - } - - /* - * Generate the side nodes that describe the other sort and group - * operations besides the top one. - */ - if (list_length(rollup_groupclauses) > 1) - { - ListCell *lc, - *lc2; - - Assert(list_length(rollup_groupclauses) == list_length(rollup_lists)); - forboth(lc, rollup_groupclauses, lc2, rollup_lists) + if (parse->rowMarks) { - List *groupClause = (List *) lfirst(lc); - List *gsets = (List *) lfirst(lc2); - AttrNumber *new_grpColIdx; - Plan *sort_plan; - Plan *agg_plan; - - /* We want to iterate over all but the last rollup list elements */ - if (lnext(lc) == NULL) - break; + path = (Path *) create_lockrows_path(root, final_rel, path, + root->rowMarks, + SS_assign_special_param(root)); + } - new_grpColIdx = remap_groupColIdx(root, groupClause); + /* + * If there is a LIMIT/OFFSET clause, add the LIMIT node. + */ + if (limit_needed(parse)) + { + path = (Path *) create_limit_path(root, final_rel, path, + parse->limitOffset, + parse->limitCount, + offset_est, count_est); + } - sort_plan = (Plan *) - make_sort_from_groupcols(root, - groupClause, - new_grpColIdx, - result_plan); + /* + * If this is an INSERT/UPDATE/DELETE, and we're not being called from + * inheritance_planner, add the ModifyTable node. + */ + if (parse->commandType != CMD_SELECT && !inheritance_update) + { + List *withCheckOptionLists; + List *returningLists; + List *rowMarks; /* - * sort_plan includes the cost of result_plan, which is not what - * we want (since we'll not actually run that plan again). So - * correct the cost figures. + * Set up the WITH CHECK OPTION and RETURNING lists-of-lists, if + * needed. */ - sort_plan->startup_cost -= result_plan->total_cost; - sort_plan->total_cost -= result_plan->total_cost; - - agg_plan = (Plan *) make_agg(root, - tlist, - (List *) parse->havingQual, - AGG_SORTED, - agg_costs, - list_length(linitial(gsets)), - new_grpColIdx, - extract_grouping_ops(groupClause), - gsets, - numGroups, - false, - true, - sort_plan); + if (parse->withCheckOptions) + withCheckOptionLists = list_make1(parse->withCheckOptions); + else + withCheckOptionLists = NIL; + + if (parse->returningList) + returningLists = list_make1(parse->returningList); + else + returningLists = NIL; /* - * Nuke stuff we don't need to avoid bloating debug output. + * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node + * will have dealt with fetching non-locked marked rows, else we + * need to have ModifyTable do that. */ - sort_plan->targetlist = NIL; - sort_plan->lefttree = NULL; - - agg_plan->targetlist = NIL; - agg_plan->qual = NIL; + if (parse->rowMarks) + rowMarks = NIL; + else + rowMarks = root->rowMarks; - chain = lappend(chain, agg_plan); + path = (Path *) + create_modifytable_path(root, final_rel, + parse->commandType, + parse->canSetTag, + parse->resultRelation, + list_make1_int(parse->resultRelation), + list_make1(path), + list_make1(root), + withCheckOptionLists, + returningLists, + rowMarks, + parse->onConflict, + SS_assign_special_param(root)); } - } - - /* - * Now make the final Agg node - */ - { - List *groupClause = (List *) llast(rollup_groupclauses); - List *gsets = rollup_lists ? (List *) llast(rollup_lists) : NIL; - int numGroupCols; - ListCell *lc; - - if (gsets) - numGroupCols = list_length(linitial(gsets)); - else - numGroupCols = list_length(parse->groupClause); - - result_plan = (Plan *) make_agg(root, - tlist, - (List *) parse->havingQual, - (numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN, - agg_costs, - numGroupCols, - top_grpColIdx, - extract_grouping_ops(groupClause), - gsets, - numGroups, - false, - true, - result_plan); - - ((Agg *) result_plan)->chain = chain; - - /* - * Add the additional costs. But only the total costs count, since the - * additional sorts aren't run on startup. - */ - foreach(lc, chain) - { - Plan *subplan = lfirst(lc); - result_plan->total_cost += subplan->total_cost; - } + /* And shove it into final_rel */ + add_path(final_rel, path); } - return result_plan; + /* Note: currently, we leave it to callers to do set_cheapest() */ } -/* - * add_tlist_costs_to_plan - * - * Estimate the execution costs associated with evaluating the targetlist - * expressions, and add them to the cost estimates for the Plan node. - * - * If the tlist contains set-returning functions, also inflate the Plan's cost - * and plan_rows estimates accordingly. (Hence, this must be called *after* - * any logic that uses plan_rows to, eg, estimate qual evaluation costs.) - * - * Note: during initial stages of planning, we mostly consider plan nodes with - * "flat" tlists, containing just Vars and PlaceHolderVars. The evaluation - * cost of Vars is zero according to the model used by cost_qual_eval() (or if - * you prefer, the cost is factored into cpu_tuple_cost). The evaluation cost - * of a PHV's expression is charged as part of the scan cost of whichever plan - * node first computes it, and then subsequent references to the PHV can be - * taken as having cost zero. Thus we can avoid worrying about tlist cost - * as such throughout query_planner() and subroutines. But once we apply a - * tlist that might contain actual operators, sub-selects, etc, we'd better - * account for its cost. Any set-returning functions in the tlist must also - * affect the estimated rowcount. - * - * Once grouping_planner() has applied a general tlist to the topmost - * scan/join plan node, any tlist eval cost for added-on nodes should be - * accounted for as we create those nodes. Presently, of the node types we - * can add on later, only Agg, WindowAgg, and Group project new tlists (the - * rest just copy their input tuples) --- so make_agg(), make_windowagg() and - * make_group() are responsible for calling this function to account for their - * tlist costs. - */ -void -add_tlist_costs_to_plan(PlannerInfo *root, Plan *plan, List *tlist) -{ - QualCost tlist_cost; - double tlist_rows; - - cost_qual_eval(&tlist_cost, tlist, root); - plan->startup_cost += tlist_cost.startup; - plan->total_cost += tlist_cost.startup + - tlist_cost.per_tuple * plan->plan_rows; - - tlist_rows = tlist_returns_set_rows(tlist); - if (tlist_rows > 1) - { - /* - * We assume that execution costs of the tlist proper were all - * accounted for by cost_qual_eval. However, it still seems - * appropriate to charge something more for the executor's general - * costs of processing the added tuples. The cost is probably less - * than cpu_tuple_cost, though, so we arbitrarily use half of that. - */ - plan->total_cost += plan->plan_rows * (tlist_rows - 1) * - cpu_tuple_cost / 2; - - plan->plan_rows *= tlist_rows; - } -} /* * Detect whether a plan node is a "dummy" plan created when a relation @@ -2987,7 +2142,7 @@ select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength) * for OFFSET but a little bit bogus for LIMIT: effectively we estimate * LIMIT 0 as though it were LIMIT 1. But this is in line with the planner's * usual practice of never estimating less than one row.) These values will - * be passed to make_limit, which see if you change this code. + * be passed to create_limit_path, which see if you change this code. * * The return value is the suitably adjusted tuple_fraction to use for * planning the query. This adjustment is not overridable, since it reflects @@ -3839,333 +2994,767 @@ standard_qp_callback(PlannerInfo *root, void *extra) } /* - * choose_hashed_grouping - should we use hashed grouping? + * Estimate number of groups produced by grouping clauses (1 if not grouping) * - * Returns TRUE to select hashing, FALSE to select sorting. + * path_rows: number of output rows from scan/join step + * rollup_lists: list of grouping sets, or NIL if not doing grouping sets + * rollup_groupclauses: list of grouping clauses for grouping sets, + * or NIL if not doing grouping sets */ -static bool -choose_hashed_grouping(PlannerInfo *root, - double tuple_fraction, double limit_tuples, - double path_rows, - Path *cheapest_path, Path *sorted_path, - double dNumGroups, AggClauseCosts *agg_costs) +static double +get_number_of_groups(PlannerInfo *root, + double path_rows, + List *rollup_lists, + List *rollup_groupclauses) { Query *parse = root->parse; - int numGroupCols = list_length(parse->groupClause); - bool can_hash; - bool can_sort; - Size hashentrysize; - List *target_pathkeys; - List *current_pathkeys; - Path hashed_p; - Path sorted_p; - int sorted_p_width; - - /* - * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY - * aggregates. (Doing so would imply storing *all* the input values in - * the hash table, and/or running many sorts in parallel, either of which - * seems like a certain loser.) We similarly don't support ordered-set - * aggregates in hashed aggregation, but that case is included in the - * numOrderedAggs count. - */ - can_hash = (agg_costs->numOrderedAggs == 0 && - grouping_is_hashable(parse->groupClause)); - can_sort = grouping_is_sortable(parse->groupClause); + double dNumGroups; - /* Quick out if only one choice is workable */ - if (!(can_hash && can_sort)) + if (parse->groupClause) { - if (can_hash) - return true; - else if (can_sort) - return false; + List *groupExprs; + + if (parse->groupingSets) + { + /* Add up the estimates for each grouping set */ + ListCell *lc, + *lc2; + + dNumGroups = 0; + forboth(lc, rollup_groupclauses, lc2, rollup_lists) + { + List *groupClause = (List *) lfirst(lc); + List *gsets = (List *) lfirst(lc2); + ListCell *lc3; + + groupExprs = get_sortgrouplist_exprs(groupClause, + parse->targetList); + + foreach(lc3, gsets) + { + List *gset = (List *) lfirst(lc3); + + dNumGroups += estimate_num_groups(root, + groupExprs, + path_rows, + &gset); + } + } + } else - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("could not implement GROUP BY"), - errdetail("Some of the datatypes only support hashing, while others only support sorting."))); + { + /* Plain GROUP BY */ + groupExprs = get_sortgrouplist_exprs(parse->groupClause, + parse->targetList); + + dNumGroups = estimate_num_groups(root, groupExprs, path_rows, + NULL); + } + } + else if (parse->groupingSets) + { + /* Empty grouping sets ... one result row for each one */ + dNumGroups = list_length(parse->groupingSets); } + else if (parse->hasAggs || root->hasHavingQual) + { + /* Plain aggregation, one result row */ + dNumGroups = 1; + } + else + { + /* Not grouping */ + dNumGroups = 1; + } + + return dNumGroups; +} + +/* + * create_grouping_paths + * + * Build a new upperrel containing Paths for grouping and/or aggregation. + * + * input_rel: contains the source-data Paths + * target: the pathtarget for the result Paths to compute + * groupColIdx: array of indexes of grouping columns in the source data + * rollup_lists: list of grouping sets, or NIL if not doing grouping sets + * rollup_groupclauses: list of grouping clauses for grouping sets, + * or NIL if not doing grouping sets + * + * We need to consider sorted and hashed aggregation in the same function, + * because otherwise (1) it would be harder to throw an appropriate error + * message if neither way works, and (2) we should not allow enable_hashagg or + * hashtable size considerations to dissuade us from using hashing if sorting + * is not possible. + */ +static RelOptInfo * +create_grouping_paths(PlannerInfo *root, + RelOptInfo *input_rel, + PathTarget *target, + AttrNumber *groupColIdx, + List *rollup_lists, + List *rollup_groupclauses) +{ + Query *parse = root->parse; + Path *cheapest_path = input_rel->cheapest_total_path; + RelOptInfo *grouped_rel; + AggClauseCosts agg_costs; + double dNumGroups; + bool allow_hash; + ListCell *lc; - /* Prefer sorting when enable_hashagg is off */ - if (!enable_hashagg) - return false; + /* For now, do all work in the (GROUP_AGG, NULL) upperrel */ + grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL); /* - * Don't do it if it doesn't look like the hashtable will fit into - * work_mem. + * Check for degenerate grouping. */ + if ((root->hasHavingQual || parse->groupingSets) && + !parse->hasAggs && parse->groupClause == NIL) + { + /* + * We have a HAVING qual and/or grouping sets, but no aggregates and + * no GROUP BY (which implies that the grouping sets are all empty). + * + * This is a degenerate case in which we are supposed to emit either + * zero or one row for each grouping set depending on whether HAVING + * succeeds. Furthermore, there cannot be any variables in either + * HAVING or the targetlist, so we actually do not need the FROM table + * at all! We can just throw away the plan-so-far and generate a + * Result node. This is a sufficiently unusual corner case that it's + * not worth contorting the structure of this module to avoid having + * to generate the earlier paths in the first place. + */ + int nrows = list_length(parse->groupingSets); + Path *path; + + if (nrows > 1) + { + /* + * Doesn't seem worthwhile writing code to cons up a + * generate_series or a values scan to emit multiple rows. Instead + * just make N clones and append them. (With a volatile HAVING + * clause, this means you might get between 0 and N output rows. + * Offhand I think that's desired.) + */ + List *paths = NIL; + + while (--nrows >= 0) + { + path = (Path *) + create_result_path(grouped_rel, + target, + (List *) parse->havingQual); + paths = lappend(paths, path); + } + path = (Path *) + create_append_path(grouped_rel, + paths, + NULL, + 0); + path->pathtarget = target; + } + else + { + /* No grouping sets, or just one, so one output row */ + path = (Path *) + create_result_path(grouped_rel, + target, + (List *) parse->havingQual); + } + + add_path(grouped_rel, path); - /* Estimate per-hash-entry space at tuple width... */ - hashentrysize = MAXALIGN(cheapest_path->pathtarget->width) + - MAXALIGN(SizeofMinimalTupleHeader); - /* plus space for pass-by-ref transition values... */ - hashentrysize += agg_costs->transitionSpace; - /* plus the per-hash-entry overhead */ - hashentrysize += hash_agg_entry_size(agg_costs->numAggs); + /* No need to consider any other alternatives. */ + set_cheapest(grouped_rel); - if (hashentrysize * dNumGroups > work_mem * 1024L) - return false; + return grouped_rel; + } /* - * When we have both GROUP BY and DISTINCT, use the more-rigorous of - * DISTINCT and ORDER BY as the assumed required output sort order. This - * is an oversimplification because the DISTINCT might get implemented via - * hashing, but it's not clear that the case is common enough (or that our - * estimates are good enough) to justify trying to solve it exactly. + * Collect statistics about aggregates for estimating costs. Note: we do + * not detect duplicate aggregates here; a somewhat-overestimated cost is + * okay for our purposes. */ - if (list_length(root->distinct_pathkeys) > - list_length(root->sort_pathkeys)) - target_pathkeys = root->distinct_pathkeys; - else - target_pathkeys = root->sort_pathkeys; + MemSet(&agg_costs, 0, sizeof(AggClauseCosts)); + if (parse->hasAggs) + { + count_agg_clauses(root, (Node *) target->exprs, &agg_costs); + count_agg_clauses(root, parse->havingQual, &agg_costs); + } + + /* + * Estimate number of groups. Note: if cheapest_path is a dummy, it will + * have zero rowcount estimate, which we don't want to use for fear of + * divide-by-zero. Hence clamp. + */ + dNumGroups = get_number_of_groups(root, + clamp_row_est(cheapest_path->rows), + rollup_lists, + rollup_groupclauses); /* - * See if the estimated cost is no more than doing it the other way. While - * avoiding the need for sorted input is usually a win, the fact that the - * output won't be sorted may be a loss; so we need to do an actual cost - * comparison. + * Consider sort-based implementations of grouping, if possible. (Note + * that if groupClause is empty, grouping_is_sortable() is trivially true, + * and all the pathkeys_contained_in() tests will succeed too, so that + * we'll consider every surviving input path.) + */ + if (grouping_is_sortable(parse->groupClause)) + { + /* + * Use any available suitably-sorted path as input, and also consider + * sorting the cheapest-total path. + */ + foreach(lc, input_rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + bool is_sorted; + + is_sorted = pathkeys_contained_in(root->group_pathkeys, + path->pathkeys); + if (path == cheapest_path || is_sorted) + { + /* Sort the cheapest-total path if it isn't already sorted */ + if (!is_sorted) + path = (Path *) create_sort_path(root, + grouped_rel, + path, + root->group_pathkeys, + -1.0); + + /* Now decide what to stick atop it */ + if (parse->groupingSets) + { + /* + * We have grouping sets, possibly with aggregation. Make + * a GroupingSetsPath. + */ + add_path(grouped_rel, (Path *) + create_groupingsets_path(root, + grouped_rel, + path, + target, + (List *) parse->havingQual, + groupColIdx, + rollup_lists, + rollup_groupclauses, + &agg_costs, + dNumGroups)); + } + else if (parse->hasAggs) + { + /* + * We have aggregation, possibly with plain GROUP BY. Make + * an AggPath. + */ + add_path(grouped_rel, (Path *) + create_agg_path(root, + grouped_rel, + path, + target, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + parse->groupClause, + (List *) parse->havingQual, + &agg_costs, + dNumGroups)); + } + else if (parse->groupClause) + { + /* + * We have GROUP BY without aggregation or grouping sets. + * Make a GroupPath. + */ + add_path(grouped_rel, (Path *) + create_group_path(root, + grouped_rel, + path, + target, + parse->groupClause, + (List *) parse->havingQual, + dNumGroups)); + } + else + { + /* Other cases should have been handled above */ + Assert(false); + } + } + } + } + + /* + * Consider hash-based implementations of grouping, if possible. * - * We need to consider cheapest_path + hashagg [+ final sort] versus - * either cheapest_path [+ sort] + group or agg [+ final sort] or - * presorted_path + group or agg [+ final sort] where brackets indicate a - * step that may not be needed. We assume grouping_planner() will have - * passed us a presorted path only if it's a winner compared to - * cheapest_path for this purpose. + * Hashed aggregation only applies if we're grouping. We currently can't + * hash if there are grouping sets, though. + * + * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY + * aggregates. (Doing so would imply storing *all* the input values in + * the hash table, and/or running many sorts in parallel, either of which + * seems like a certain loser.) We similarly don't support ordered-set + * aggregates in hashed aggregation, but that case is also included in the + * numOrderedAggs count. * - * These path variables are dummies that just hold cost fields; we don't - * make actual Paths for these steps. + * Note: grouping_is_hashable() is much more expensive to check than the + * other gating conditions, so we want to do it last. */ - cost_agg(&hashed_p, root, AGG_HASHED, agg_costs, - numGroupCols, dNumGroups, - cheapest_path->startup_cost, cheapest_path->total_cost, - path_rows); - /* Result of hashed agg is always unsorted */ - if (target_pathkeys) - cost_sort(&hashed_p, root, target_pathkeys, hashed_p.total_cost, - dNumGroups, cheapest_path->pathtarget->width, - 0.0, work_mem, limit_tuples); - - if (sorted_path) + allow_hash = (parse->groupClause != NIL && + parse->groupingSets == NIL && + agg_costs.numOrderedAggs == 0); + + /* Consider reasons to disable hashing, but only if we can sort instead */ + if (allow_hash && grouped_rel->pathlist != NIL) { - sorted_p.startup_cost = sorted_path->startup_cost; - sorted_p.total_cost = sorted_path->total_cost; - sorted_p_width = sorted_path->pathtarget->width; - current_pathkeys = sorted_path->pathkeys; + if (!enable_hashagg) + allow_hash = false; + else + { + /* + * Don't hash if it doesn't look like the hashtable will fit into + * work_mem. + */ + Size hashentrysize; + + /* Estimate per-hash-entry space at tuple width... */ + hashentrysize = MAXALIGN(cheapest_path->pathtarget->width) + + MAXALIGN(SizeofMinimalTupleHeader); + /* plus space for pass-by-ref transition values... */ + hashentrysize += agg_costs.transitionSpace; + /* plus the per-hash-entry overhead */ + hashentrysize += hash_agg_entry_size(agg_costs.numAggs); + + if (hashentrysize * dNumGroups > work_mem * 1024L) + allow_hash = false; + } } - else + + if (allow_hash && grouping_is_hashable(parse->groupClause)) { - sorted_p.startup_cost = cheapest_path->startup_cost; - sorted_p.total_cost = cheapest_path->total_cost; - sorted_p_width = cheapest_path->pathtarget->width; - current_pathkeys = cheapest_path->pathkeys; + /* + * We just need an Agg over the cheapest-total input path, since input + * order won't matter. + */ + add_path(grouped_rel, (Path *) + create_agg_path(root, grouped_rel, + cheapest_path, + target, + AGG_HASHED, + parse->groupClause, + (List *) parse->havingQual, + &agg_costs, + dNumGroups)); } - if (!pathkeys_contained_in(root->group_pathkeys, current_pathkeys)) + + /* Give a helpful error if we failed to find any implementation */ + if (grouped_rel->pathlist == NIL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("could not implement GROUP BY"), + errdetail("Some of the datatypes only support hashing, while others only support sorting."))); + + /* Now choose the best path(s) */ + set_cheapest(grouped_rel); + + return grouped_rel; +} + +/* + * create_window_paths + * + * Build a new upperrel containing Paths for window-function evaluation. + * + * input_rel: contains the source-data Paths + * base_tlist: result of make_windowInputTargetList + * tlist: query's final target list (which is what output paths should emit) + * wflists: result of find_window_functions + * activeWindows: result of select_active_windows + */ +static RelOptInfo * +create_window_paths(PlannerInfo *root, + RelOptInfo *input_rel, + List *base_tlist, + List *tlist, + WindowFuncLists *wflists, + List *activeWindows) +{ + RelOptInfo *window_rel; + ListCell *lc; + + /* For now, do all work in the (WINDOW, NULL) upperrel */ + window_rel = fetch_upper_rel(root, UPPERREL_WINDOW, NULL); + + /* + * Consider computing window functions starting from the existing + * cheapest-total path (which will likely require a sort) as well as any + * existing paths that satisfy root->window_pathkeys (which won't). + */ + foreach(lc, input_rel->pathlist) { - cost_sort(&sorted_p, root, root->group_pathkeys, sorted_p.total_cost, - path_rows, sorted_p_width, - 0.0, work_mem, -1.0); - current_pathkeys = root->group_pathkeys; + Path *path = (Path *) lfirst(lc); + + if (path == input_rel->cheapest_total_path || + pathkeys_contained_in(root->window_pathkeys, path->pathkeys)) + create_one_window_path(root, + window_rel, + path, + base_tlist, + tlist, + wflists, + activeWindows); } - if (parse->hasAggs) - cost_agg(&sorted_p, root, AGG_SORTED, agg_costs, - numGroupCols, dNumGroups, - sorted_p.startup_cost, sorted_p.total_cost, - path_rows); - else - cost_group(&sorted_p, root, numGroupCols, dNumGroups, - sorted_p.startup_cost, sorted_p.total_cost, - path_rows); - /* The Agg or Group node will preserve ordering */ - if (target_pathkeys && - !pathkeys_contained_in(target_pathkeys, current_pathkeys)) - cost_sort(&sorted_p, root, target_pathkeys, sorted_p.total_cost, - dNumGroups, sorted_p_width, - 0.0, work_mem, limit_tuples); + /* Now choose the best path(s) */ + set_cheapest(window_rel); + + return window_rel; +} + +/* + * Stack window-function implementation steps atop the given Path, and + * add the result to window_rel. + * + * window_rel: upperrel to contain result + * path: input Path to use + * base_tlist: result of make_windowInputTargetList + * tlist: query's final target list (which is what output paths should emit) + * wflists: result of find_window_functions + * activeWindows: result of select_active_windows + */ +static void +create_one_window_path(PlannerInfo *root, + RelOptInfo *window_rel, + Path *path, + List *base_tlist, + List *tlist, + WindowFuncLists *wflists, + List *activeWindows) +{ + List *window_tlist; + ListCell *l; /* - * Now make the decision using the top-level tuple fraction. + * Since each window clause could require a different sort order, we stack + * up a WindowAgg node for each clause, with sort steps between them as + * needed. (We assume that select_active_windows chose a good order for + * executing the clauses in.) + * + * The "base" targetlist for all steps of the windowing process is a flat + * tlist of all Vars and Aggs needed in the result. (In some cases we + * wouldn't need to propagate all of these all the way to the top, since + * they might only be needed as inputs to WindowFuncs. It's probably not + * worth trying to optimize that though.) We also add window partitioning + * and sorting expressions to the base tlist, to ensure they're computed + * only once at the bottom of the stack (that's critical for volatile + * functions). As we climb up the stack, we'll add outputs for the + * WindowFuncs computed at each level. + */ + window_tlist = base_tlist; + + /* + * Apply base_tlist to the given base path. If that path node is one that + * cannot do expression evaluation, we must insert a Result node to + * project the desired tlist. (In some cases this might not really be + * required, but it's not worth trying to avoid it.) If the query has + * both grouping and windowing, base_tlist was already applied to the + * input path, but apply_projection_to_path is smart about that. + * + * The seemingly redundant create_pathtarget() steps here are important to + * ensure that each path node has a separately modifiable tlist. */ - if (compare_fractional_path_costs(&hashed_p, &sorted_p, - tuple_fraction) < 0) + path = apply_projection_to_path(root, window_rel, + path, + create_pathtarget(root, base_tlist)); + + foreach(l, activeWindows) { - /* Hashed is cheaper, so use it */ - return true; + WindowClause *wc = (WindowClause *) lfirst(l); + List *window_pathkeys; + + window_pathkeys = make_pathkeys_for_window(root, + wc, + tlist); + + /* Sort if necessary */ + if (!pathkeys_contained_in(window_pathkeys, path->pathkeys)) + { + path = (Path *) create_sort_path(root, window_rel, + path, + window_pathkeys, + -1.0); + } + + if (lnext(l)) + { + /* Add the current WindowFuncs to the running tlist */ + window_tlist = add_to_flat_tlist(window_tlist, + wflists->windowFuncs[wc->winref]); + } + else + { + /* Install the final tlist in the topmost WindowAgg */ + window_tlist = tlist; + } + + path = (Path *) + create_windowagg_path(root, window_rel, path, + create_pathtarget(root, window_tlist), + wflists->windowFuncs[wc->winref], + wc, + window_pathkeys); } - return false; + + add_path(window_rel, path); } /* - * choose_hashed_distinct - should we use hashing for DISTINCT? + * create_distinct_paths * - * This is fairly similar to choose_hashed_grouping, but there are enough - * differences that it doesn't seem worth trying to unify the two functions. - * (One difference is that we sometimes apply this after forming a Plan, - * so the input alternatives can't be represented as Paths --- instead we - * pass in the costs as individual variables.) + * Build a new upperrel containing Paths for SELECT DISTINCT evaluation. * - * But note that making the two choices independently is a bit bogus in - * itself. If the two could be combined into a single choice operation - * it'd probably be better, but that seems far too unwieldy to be practical, - * especially considering that the combination of GROUP BY and DISTINCT - * isn't very common in real queries. By separating them, we are giving - * extra preference to using a sorting implementation when a common sort key - * is available ... and that's not necessarily wrong anyway. + * input_rel: contains the source-data Paths * - * Returns TRUE to select hashing, FALSE to select sorting. + * Note: input paths should already compute the desired pathtarget, since + * Sort/Unique won't project anything. */ -static bool -choose_hashed_distinct(PlannerInfo *root, - double tuple_fraction, double limit_tuples, - double path_rows, - Cost cheapest_startup_cost, Cost cheapest_total_cost, - int cheapest_path_width, - Cost sorted_startup_cost, Cost sorted_total_cost, - int sorted_path_width, - List *sorted_pathkeys, - double dNumDistinctRows) +static RelOptInfo * +create_distinct_paths(PlannerInfo *root, + RelOptInfo *input_rel) { Query *parse = root->parse; - int numDistinctCols = list_length(parse->distinctClause); - bool can_sort; - bool can_hash; - Size hashentrysize; - List *current_pathkeys; - List *needed_pathkeys; - Path hashed_p; - Path sorted_p; - - /* - * If we have a sortable DISTINCT ON clause, we always use sorting. This - * enforces the expected behavior of DISTINCT ON. - */ - can_sort = grouping_is_sortable(parse->distinctClause); - if (can_sort && parse->hasDistinctOn) - return false; + Path *cheapest_input_path = input_rel->cheapest_total_path; + RelOptInfo *distinct_rel; + double numDistinctRows; + bool allow_hash; + Path *path; + ListCell *lc; - can_hash = grouping_is_hashable(parse->distinctClause); + /* For now, do all work in the (DISTINCT, NULL) upperrel */ + distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL); - /* Quick out if only one choice is workable */ - if (!(can_hash && can_sort)) + /* Estimate number of distinct rows there will be */ + if (parse->groupClause || parse->groupingSets || parse->hasAggs || + root->hasHavingQual) { - if (can_hash) - return true; - else if (can_sort) - return false; - else - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("could not implement DISTINCT"), - errdetail("Some of the datatypes only support hashing, while others only support sorting."))); + /* + * If there was grouping or aggregation, use the number of input rows + * as the estimated number of DISTINCT rows (ie, assume the input is + * already mostly unique). + */ + numDistinctRows = cheapest_input_path->rows; } + else + { + /* + * Otherwise, the UNIQUE filter has effects comparable to GROUP BY. + */ + List *distinctExprs; - /* Prefer sorting when enable_hashagg is off */ - if (!enable_hashagg) - return false; + distinctExprs = get_sortgrouplist_exprs(parse->distinctClause, + parse->targetList); + numDistinctRows = estimate_num_groups(root, distinctExprs, + cheapest_input_path->rows, + NULL); + } /* - * Don't do it if it doesn't look like the hashtable will fit into - * work_mem. + * Consider sort-based implementations of DISTINCT, if possible. */ + if (grouping_is_sortable(parse->distinctClause)) + { + /* + * First, if we have any adequately-presorted paths, just stick a + * Unique node on those. Then consider doing an explicit sort of the + * cheapest input path and Unique'ing that. + * + * When we have DISTINCT ON, we must sort by the more rigorous of + * DISTINCT and ORDER BY, else it won't have the desired behavior. + * Also, if we do have to do an explicit sort, we might as well use + * the more rigorous ordering to avoid a second sort later. (Note + * that the parser will have ensured that one clause is a prefix of + * the other.) + */ + List *needed_pathkeys; + + if (parse->hasDistinctOn && + list_length(root->distinct_pathkeys) < + list_length(root->sort_pathkeys)) + needed_pathkeys = root->sort_pathkeys; + else + needed_pathkeys = root->distinct_pathkeys; - /* Estimate per-hash-entry space at tuple width... */ - hashentrysize = MAXALIGN(cheapest_path_width) + - MAXALIGN(SizeofMinimalTupleHeader); - /* plus the per-hash-entry overhead */ - hashentrysize += hash_agg_entry_size(0); + foreach(lc, input_rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + + if (pathkeys_contained_in(needed_pathkeys, path->pathkeys)) + { + add_path(distinct_rel, (Path *) + create_upper_unique_path(root, distinct_rel, + path, + list_length(root->distinct_pathkeys), + numDistinctRows)); + } + } - if (hashentrysize * dNumDistinctRows > work_mem * 1024L) - return false; + /* For explicit-sort case, always use the more rigorous clause */ + if (list_length(root->distinct_pathkeys) < + list_length(root->sort_pathkeys)) + { + needed_pathkeys = root->sort_pathkeys; + /* Assert checks that parser didn't mess up... */ + Assert(pathkeys_contained_in(root->distinct_pathkeys, + needed_pathkeys)); + } + else + needed_pathkeys = root->distinct_pathkeys; + + path = cheapest_input_path; + if (!pathkeys_contained_in(needed_pathkeys, path->pathkeys)) + path = (Path *) create_sort_path(root, distinct_rel, + path, + needed_pathkeys, + -1.0); + + add_path(distinct_rel, (Path *) + create_upper_unique_path(root, distinct_rel, + path, + list_length(root->distinct_pathkeys), + numDistinctRows)); + } /* - * See if the estimated cost is no more than doing it the other way. While - * avoiding the need for sorted input is usually a win, the fact that the - * output won't be sorted may be a loss; so we need to do an actual cost - * comparison. + * Consider hash-based implementations of DISTINCT, if possible. * - * We need to consider cheapest_path + hashagg [+ final sort] versus - * sorted_path [+ sort] + group [+ final sort] where brackets indicate a - * step that may not be needed. + * If we were not able to make any other types of path, we *must* hash or + * die trying. If we do have other choices, there are several things that + * should prevent selection of hashing: if the query uses DISTINCT ON + * (because it won't really have the expected behavior if we hash), or if + * enable_hashagg is off, or if it looks like the hashtable will exceed + * work_mem. * - * These path variables are dummies that just hold cost fields; we don't - * make actual Paths for these steps. + * Note: grouping_is_hashable() is much more expensive to check than the + * other gating conditions, so we want to do it last. */ - cost_agg(&hashed_p, root, AGG_HASHED, NULL, - numDistinctCols, dNumDistinctRows, - cheapest_startup_cost, cheapest_total_cost, - path_rows); + if (distinct_rel->pathlist == NIL) + allow_hash = true; /* we have no alternatives */ + else if (parse->hasDistinctOn || !enable_hashagg) + allow_hash = false; /* policy-based decision not to hash */ + else + { + Size hashentrysize; - /* - * Result of hashed agg is always unsorted, so if ORDER BY is present we - * need to charge for the final sort. - */ - if (parse->sortClause) - cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost, - dNumDistinctRows, cheapest_path_width, - 0.0, work_mem, limit_tuples); + /* Estimate per-hash-entry space at tuple width... */ + hashentrysize = MAXALIGN(cheapest_input_path->pathtarget->width) + + MAXALIGN(SizeofMinimalTupleHeader); + /* plus the per-hash-entry overhead */ + hashentrysize += hash_agg_entry_size(0); - /* - * Now for the GROUP case. See comments in grouping_planner about the - * sorting choices here --- this code should match that code. - */ - sorted_p.startup_cost = sorted_startup_cost; - sorted_p.total_cost = sorted_total_cost; - current_pathkeys = sorted_pathkeys; - if (parse->hasDistinctOn && - list_length(root->distinct_pathkeys) < - list_length(root->sort_pathkeys)) - needed_pathkeys = root->sort_pathkeys; - else - needed_pathkeys = root->distinct_pathkeys; - if (!pathkeys_contained_in(needed_pathkeys, current_pathkeys)) + /* Allow hashing only if hashtable is predicted to fit in work_mem */ + allow_hash = (hashentrysize * numDistinctRows <= work_mem * 1024L); + } + + if (allow_hash && grouping_is_hashable(parse->distinctClause)) { - if (list_length(root->distinct_pathkeys) >= - list_length(root->sort_pathkeys)) - current_pathkeys = root->distinct_pathkeys; - else - current_pathkeys = root->sort_pathkeys; - cost_sort(&sorted_p, root, current_pathkeys, sorted_p.total_cost, - path_rows, sorted_path_width, - 0.0, work_mem, -1.0); + /* Generate hashed aggregate path --- no sort needed */ + add_path(distinct_rel, (Path *) + create_agg_path(root, + distinct_rel, + cheapest_input_path, + cheapest_input_path->pathtarget, + AGG_HASHED, + parse->distinctClause, + NIL, + NULL, + numDistinctRows)); } - cost_group(&sorted_p, root, numDistinctCols, dNumDistinctRows, - sorted_p.startup_cost, sorted_p.total_cost, - path_rows); - if (parse->sortClause && - !pathkeys_contained_in(root->sort_pathkeys, current_pathkeys)) - cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost, - dNumDistinctRows, sorted_path_width, - 0.0, work_mem, limit_tuples); - /* - * Now make the decision using the top-level tuple fraction. - */ - if (compare_fractional_path_costs(&hashed_p, &sorted_p, - tuple_fraction) < 0) + /* Give a helpful error if we failed to find any implementation */ + if (distinct_rel->pathlist == NIL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("could not implement DISTINCT"), + errdetail("Some of the datatypes only support hashing, while others only support sorting."))); + + /* Now choose the best path(s) */ + set_cheapest(distinct_rel); + + return distinct_rel; +} + +/* + * create_ordered_paths + * + * Build a new upperrel containing Paths for ORDER BY evaluation. + * + * All paths in the result must satisfy the ORDER BY ordering. + * The only new path we need consider is an explicit sort on the + * cheapest-total existing path. + * + * input_rel: contains the source-data Paths + * limit_tuples: estimated bound on the number of output tuples, + * or -1 if no LIMIT or couldn't estimate + */ +static RelOptInfo * +create_ordered_paths(PlannerInfo *root, + RelOptInfo *input_rel, + double limit_tuples) +{ + Path *cheapest_input_path = input_rel->cheapest_total_path; + RelOptInfo *ordered_rel; + ListCell *lc; + + /* For now, do all work in the (ORDERED, NULL) upperrel */ + ordered_rel = fetch_upper_rel(root, UPPERREL_ORDERED, NULL); + + foreach(lc, input_rel->pathlist) { - /* Hashed is cheaper, so use it */ - return true; + Path *path = (Path *) lfirst(lc); + bool is_sorted; + + is_sorted = pathkeys_contained_in(root->sort_pathkeys, + path->pathkeys); + if (path == cheapest_input_path || is_sorted) + { + if (!is_sorted) + { + /* An explicit sort here can take advantage of LIMIT */ + path = (Path *) create_sort_path(root, + ordered_rel, + path, + root->sort_pathkeys, + limit_tuples); + } + add_path(ordered_rel, path); + } } - return false; + + /* + * No need to bother with set_cheapest here; grouping_planner does not + * need us to do it. + */ + Assert(ordered_rel->pathlist != NIL); + + return ordered_rel; } + /* - * make_subplanTargetList - * Generate appropriate target list when grouping is required. + * make_scanjoin_target + * Generate appropriate PathTarget for the result of scan/join steps. * - * When grouping_planner inserts grouping or aggregation plan nodes - * above the scan/join plan constructed by query_planner+create_plan, - * we typically want the scan/join plan to emit a different target list - * than the outer plan nodes should have. This routine generates the - * correct target list for the scan/join subplan. + * If there is grouping/aggregation or window functions, we typically want the + * scan/join plan to emit a different target list than the upper plan nodes + * will (in particular, it certainly can't include any aggregate or window + * function calls). This routine generates the correct target list for the + * scan/join subplan. * * The initial target list passed from the parser already contains entries * for all ORDER BY and GROUP BY expressions, but it will not have entries * for variables used only in HAVING clauses; so we need to add those * variables to the subplan target list. Also, we flatten all expressions - * except GROUP BY items into their component variables; the other expressions - * will be computed by the inserted nodes rather than by the subplan. + * except GROUP BY items into their component variables; other expressions + * will be computed by the upper plan nodes rather than by the subplan. * For example, given a query like * SELECT a+b,SUM(c+d) FROM table GROUP BY a+b; * we want to pass this targetlist to the subplan: @@ -4173,28 +3762,20 @@ choose_hashed_distinct(PlannerInfo *root, * where the a+b target will be used by the Sort/Group steps, and the * other targets will be used for computing the final results. * - * If we are grouping or aggregating, *and* there are no non-Var grouping - * expressions, then the returned tlist is effectively dummy; we do not - * need to force it to be evaluated, because all the Vars it contains - * should be present in the "flat" tlist generated by create_plan, though - * possibly in a different order. In that case we'll use create_plan's tlist, - * and the tlist made here is only needed as input to query_planner to tell - * it which Vars are needed in the output of the scan/join plan. + * We also convert from targetlist format (List of TargetEntry nodes) + * into PathTarget format, which is more compact and includes cost/width. * * 'tlist' is the query's target list. * 'groupColIdx' receives an array of column numbers for the GROUP BY * expressions (if there are any) in the returned target list. - * 'need_tlist_eval' is set true if we really need to evaluate the - * returned tlist as-is. (Note: locate_grouping_columns assumes - * that if this is FALSE, all grouping columns are simple Vars.) * - * The result is the targetlist to be passed to query_planner. + * The result is the PathTarget to be applied to the Paths returned from + * query_planner(). */ -static List * -make_subplanTargetList(PlannerInfo *root, - List *tlist, - AttrNumber **groupColIdx, - bool *need_tlist_eval) +static PathTarget * +make_scanjoin_target(PlannerInfo *root, + List *tlist, + AttrNumber **groupColIdx) { Query *parse = root->parse; List *sub_tlist; @@ -4205,15 +3786,12 @@ make_subplanTargetList(PlannerInfo *root, *groupColIdx = NULL; /* - * If we're not grouping or aggregating, there's nothing to do here; - * query_planner should receive the unmodified target list. + * If we're not grouping or aggregating or windowing, there's nothing to + * do here except convert to PathTarget format. */ - if (!parse->hasAggs && !parse->groupClause && !parse->groupingSets && !root->hasHavingQual && - !parse->hasWindowFuncs) - { - *need_tlist_eval = true; - return tlist; - } + if (!parse->hasAggs && !parse->groupClause && !parse->groupingSets && + !root->hasHavingQual && !parse->hasWindowFuncs) + return create_pathtarget(root, tlist); /* * Otherwise, we must build a tlist containing all grouping columns, plus @@ -4221,7 +3799,6 @@ make_subplanTargetList(PlannerInfo *root, */ sub_tlist = NIL; non_group_cols = NIL; - *need_tlist_eval = false; /* only eval if not flat tlist */ numCols = list_length(parse->groupClause); if (numCols > 0) @@ -4257,13 +3834,11 @@ make_subplanTargetList(PlannerInfo *root, list_length(sub_tlist) + 1, NULL, false); + newtle->ressortgroupref = tle->ressortgroupref; sub_tlist = lappend(sub_tlist, newtle); Assert(grpColIdx[colno] == 0); /* no dups expected */ grpColIdx[colno] = newtle->resno; - - if (!(newtle->expr && IsA(newtle->expr, Var))) - *need_tlist_eval = true; /* tlist contains non Vars */ } else { @@ -4308,7 +3883,7 @@ make_subplanTargetList(PlannerInfo *root, list_free(non_group_vars); list_free(non_group_cols); - return sub_tlist; + return create_pathtarget(root, sub_tlist); } /* @@ -4343,59 +3918,6 @@ get_grouping_column_index(Query *parse, TargetEntry *tle) } /* - * locate_grouping_columns - * Locate grouping columns in the tlist chosen by create_plan. - * - * This is only needed if we don't use the sub_tlist chosen by - * make_subplanTargetList. We have to forget the column indexes found - * by that routine and re-locate the grouping exprs in the real sub_tlist. - * We assume the grouping exprs are just Vars (see make_subplanTargetList). - */ -static void -locate_grouping_columns(PlannerInfo *root, - List *tlist, - List *sub_tlist, - AttrNumber *groupColIdx) -{ - int keyno = 0; - ListCell *gl; - - /* - * No work unless grouping. - */ - if (!root->parse->groupClause) - { - Assert(groupColIdx == NULL); - return; - } - Assert(groupColIdx != NULL); - - foreach(gl, root->parse->groupClause) - { - SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl); - Var *groupexpr = (Var *) get_sortgroupclause_expr(grpcl, tlist); - TargetEntry *te; - - /* - * The grouping column returned by create_plan might not have the same - * typmod as the original Var. (This can happen in cases where a - * set-returning function has been inlined, so that we now have more - * knowledge about what it returns than we did when the original Var - * was created.) So we can't use tlist_member() to search the tlist; - * instead use tlist_member_match_var. For safety, still check that - * the vartype matches. - */ - if (!(groupexpr && IsA(groupexpr, Var))) - elog(ERROR, "grouping column is not a Var as expected"); - te = tlist_member_match_var(groupexpr, sub_tlist); - if (!te) - elog(ERROR, "failed to locate grouping columns"); - Assert(((Var *) te->expr)->vartype == groupexpr->vartype); - groupColIdx[keyno++] = te->resno; - } -} - -/* * postprocess_setop_tlist * Fix up targetlist returned by plan_set_operations(). * @@ -4506,28 +4028,31 @@ select_active_windows(PlannerInfo *root, WindowFuncLists *wflists) * make_windowInputTargetList * Generate appropriate target list for initial input to WindowAgg nodes. * - * When grouping_planner inserts one or more WindowAgg nodes into the plan, - * this function computes the initial target list to be computed by the node - * just below the first WindowAgg. This list must contain all values needed - * to evaluate the window functions, compute the final target list, and - * perform any required final sort step. If multiple WindowAggs are needed, - * each intermediate one adds its window function results onto this tlist; - * only the topmost WindowAgg computes the actual desired target list. + * When the query has window functions, this function computes the initial + * target list to be computed by the node just below the first WindowAgg. + * This list must contain all values needed to evaluate the window functions, + * compute the final target list, and perform any required final sort step. + * If multiple WindowAggs are needed, each intermediate one adds its window + * function results onto this tlist; only the topmost WindowAgg computes the + * actual desired target list. * - * This function is much like make_subplanTargetList, though not quite enough + * This function is much like make_scanjoin_target, though not quite enough * like it to share code. As in that function, we flatten most expressions * into their component variables. But we do not want to flatten window * PARTITION BY/ORDER BY clauses, since that might result in multiple * evaluations of them, which would be bad (possibly even resulting in - * inconsistent answers, if they contain volatile functions). Also, we must - * not flatten GROUP BY clauses that were left unflattened by - * make_subplanTargetList, because we may no longer have access to the + * inconsistent answers, if they contain volatile functions). + * Also, we must not flatten GROUP BY clauses that were left unflattened by + * make_scanjoin_target, because we may no longer have access to the * individual Vars in them. * - * Another key difference from make_subplanTargetList is that we don't flatten + * Another key difference from make_scanjoin_target is that we don't flatten * Aggref expressions, since those are to be computed below the window * functions and just referenced like Vars above that. * + * XXX another difference is that this produces targetlist format not a + * PathTarget, but that should change sometime soon. + * * 'tlist' is the query's final target list. * 'activeWindows' is the list of active windows previously identified by * select_active_windows. @@ -4651,6 +4176,8 @@ make_windowInputTargetList(PlannerInfo *root, * The required ordering is first the PARTITION keys, then the ORDER keys. * In the future we might try to implement windowing using hashing, in which * case the ordering could be relaxed, but for now we always sort. + * + * Caution: if you change this, see createplan.c's get_column_info_for_window! */ static List * make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc, @@ -4681,113 +4208,42 @@ make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc, return window_pathkeys; } -/*---------- - * get_column_info_for_window - * Get the partitioning/ordering column numbers and equality operators - * for a WindowAgg node. - * - * This depends on the behavior of make_pathkeys_for_window()! +/* + * get_cheapest_fractional_path + * Find the cheapest path for retrieving a specified fraction of all + * the tuples expected to be returned by the given relation. * - * We are given the target WindowClause and an array of the input column - * numbers associated with the resulting pathkeys. In the easy case, there - * are the same number of pathkey columns as partitioning + ordering columns - * and we just have to copy some data around. However, it's possible that - * some of the original partitioning + ordering columns were eliminated as - * redundant during the transformation to pathkeys. (This can happen even - * though the parser gets rid of obvious duplicates. A typical scenario is a - * window specification "PARTITION BY x ORDER BY y" coupled with a clause - * "WHERE x = y" that causes the two sort columns to be recognized as - * redundant.) In that unusual case, we have to work a lot harder to - * determine which keys are significant. + * We interpret tuple_fraction the same way as grouping_planner. * - * The method used here is a bit brute-force: add the sort columns to a list - * one at a time and note when the resulting pathkey list gets longer. But - * it's a sufficiently uncommon case that a faster way doesn't seem worth - * the amount of code refactoring that'd be needed. - *---------- + * We assume set_cheapest() has been run on the given rel. */ -static void -get_column_info_for_window(PlannerInfo *root, WindowClause *wc, List *tlist, - int numSortCols, AttrNumber *sortColIdx, - int *partNumCols, - AttrNumber **partColIdx, - Oid **partOperators, - int *ordNumCols, - AttrNumber **ordColIdx, - Oid **ordOperators) +Path * +get_cheapest_fractional_path(RelOptInfo *rel, double tuple_fraction) { - int numPart = list_length(wc->partitionClause); - int numOrder = list_length(wc->orderClause); + Path *best_path = rel->cheapest_total_path; + ListCell *l; - if (numSortCols == numPart + numOrder) - { - /* easy case */ - *partNumCols = numPart; - *partColIdx = sortColIdx; - *partOperators = extract_grouping_ops(wc->partitionClause); - *ordNumCols = numOrder; - *ordColIdx = sortColIdx + numPart; - *ordOperators = extract_grouping_ops(wc->orderClause); - } - else + /* If all tuples will be retrieved, just return the cheapest-total path */ + if (tuple_fraction <= 0.0) + return best_path; + + /* Convert absolute # of tuples to a fraction; no need to clamp */ + if (tuple_fraction >= 1.0) + tuple_fraction /= best_path->rows; + + foreach(l, rel->pathlist) { - List *sortclauses; - List *pathkeys; - int scidx; - ListCell *lc; - - /* first, allocate what's certainly enough space for the arrays */ - *partNumCols = 0; - *partColIdx = (AttrNumber *) palloc(numPart * sizeof(AttrNumber)); - *partOperators = (Oid *) palloc(numPart * sizeof(Oid)); - *ordNumCols = 0; - *ordColIdx = (AttrNumber *) palloc(numOrder * sizeof(AttrNumber)); - *ordOperators = (Oid *) palloc(numOrder * sizeof(Oid)); - sortclauses = NIL; - pathkeys = NIL; - scidx = 0; - foreach(lc, wc->partitionClause) - { - SortGroupClause *sgc = (SortGroupClause *) lfirst(lc); - List *new_pathkeys; + Path *path = (Path *) lfirst(l); - sortclauses = lappend(sortclauses, sgc); - new_pathkeys = make_pathkeys_for_sortclauses(root, - sortclauses, - tlist); - if (list_length(new_pathkeys) > list_length(pathkeys)) - { - /* this sort clause is actually significant */ - (*partColIdx)[*partNumCols] = sortColIdx[scidx++]; - (*partOperators)[*partNumCols] = sgc->eqop; - (*partNumCols)++; - pathkeys = new_pathkeys; - } - } - foreach(lc, wc->orderClause) - { - SortGroupClause *sgc = (SortGroupClause *) lfirst(lc); - List *new_pathkeys; + if (path == rel->cheapest_total_path || + compare_fractional_path_costs(best_path, path, tuple_fraction) <= 0) + continue; - sortclauses = lappend(sortclauses, sgc); - new_pathkeys = make_pathkeys_for_sortclauses(root, - sortclauses, - tlist); - if (list_length(new_pathkeys) > list_length(pathkeys)) - { - /* this sort clause is actually significant */ - (*ordColIdx)[*ordNumCols] = sortColIdx[scidx++]; - (*ordOperators)[*ordNumCols] = sgc->eqop; - (*ordNumCols)++; - pathkeys = new_pathkeys; - } - } - /* complain if we didn't eat exactly the right number of sort cols */ - if (scidx != numSortCols) - elog(ERROR, "failed to deconstruct sort operators into partitioning/ordering operators"); + best_path = path; } -} + return best_path; +} /* * expression_planner |