diff options
author | Andres Freund <andres@anarazel.de> | 2017-01-18 12:46:50 -0800 |
---|---|---|
committer | Andres Freund <andres@anarazel.de> | 2017-01-18 13:40:27 -0800 |
commit | 69f4b9c85f168ae006929eec44fc44d569e846b9 (patch) | |
tree | 0a7fa27d1be6e341d14f067d4942c1c0e0964729 /src/backend/optimizer/plan/planner.c | |
parent | e37360d5df240443bb6e997d26d54f59146283fc (diff) | |
download | postgresql-69f4b9c85f168ae006929eec44fc44d569e846b9.tar.gz postgresql-69f4b9c85f168ae006929eec44fc44d569e846b9.zip |
Move targetlist SRF handling from expression evaluation to new executor node.
Evaluation of set returning functions (SRFs_ in the targetlist (like SELECT
generate_series(1,5)) so far was done in the expression evaluation (i.e.
ExecEvalExpr()) and projection (i.e. ExecProject/ExecTargetList) code.
This meant that most executor nodes performing projection, and most
expression evaluation functions, had to deal with the possibility that an
evaluated expression could return a set of return values.
That's bad because it leads to repeated code in a lot of places. It also,
and that's my (Andres's) motivation, made it a lot harder to implement a
more efficient way of doing expression evaluation.
To fix this, introduce a new executor node (ProjectSet) that can evaluate
targetlists containing one or more SRFs. To avoid the complexity of the old
way of handling nested expressions returning sets (e.g. having to pass up
ExprDoneCond, and dealing with arguments to functions returning sets etc.),
those SRFs can only be at the top level of the node's targetlist. The
planner makes sure (via split_pathtarget_at_srfs()) that SRF evaluation is
only necessary in ProjectSet nodes and that SRFs are only present at the
top level of the node's targetlist. If there are nested SRFs the planner
creates multiple stacked ProjectSet nodes. The ProjectSet nodes always get
input from an underlying node.
We also discussed and prototyped evaluating targetlist SRFs using ROWS
FROM(), but that turned out to be more complicated than we'd hoped.
While moving SRF evaluation to ProjectSet would allow to retain the old
"least common multiple" behavior when multiple SRFs are present in one
targetlist (i.e. continue returning rows until all SRFs are at the end of
their input at the same time), we decided to instead only return rows till
all SRFs are exhausted, returning NULL for already exhausted ones. We
deemed the previous behavior to be too confusing, unexpected and actually
not particularly useful.
As a side effect, the previously prohibited case of multiple set returning
arguments to a function, is now allowed. Not because it's particularly
desirable, but because it ends up working and there seems to be no argument
for adding code to prohibit it.
Currently the behavior for COALESCE and CASE containing SRFs has changed,
returning multiple rows from the expression, even when the SRF containing
"arm" of the expression is not evaluated. That's because the SRFs are
evaluated in a separate ProjectSet node. As that's quite confusing, we're
likely to instead prohibit SRFs in those places. But that's still being
discussed, and the code would reside in places not touched here, so that's
a task for later.
There's a lot of, now superfluous, code dealing with set return expressions
around. But as the changes to get rid of those are verbose largely boring,
it seems better for readability to keep the cleanup as a separate commit.
Author: Tom Lane and Andres Freund
Discussion: https://postgr.es/m/20160822214023.aaxz5l4igypowyri@alap3.anarazel.de
Diffstat (limited to 'src/backend/optimizer/plan/planner.c')
-rw-r--r-- | src/backend/optimizer/plan/planner.c | 219 |
1 files changed, 182 insertions, 37 deletions
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 25f2c5a6147..4b5902fc3ec 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -153,6 +153,8 @@ static List *make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc, static PathTarget *make_sort_input_target(PlannerInfo *root, PathTarget *final_target, bool *have_postponed_srfs); +static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel, + List *targets, List *targets_contain_srfs); /***************************************************************************** @@ -1400,8 +1402,9 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, int64 count_est = 0; double limit_tuples = -1.0; bool have_postponed_srfs = false; - double tlist_rows; PathTarget *final_target; + List *final_targets; + List *final_targets_contain_srfs; RelOptInfo *current_rel; RelOptInfo *final_rel; ListCell *lc; @@ -1464,6 +1467,10 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, /* Also extract the PathTarget form of the setop result tlist */ final_target = current_rel->cheapest_total_path->pathtarget; + /* The setop result tlist couldn't contain any SRFs */ + Assert(!parse->hasTargetSRFs); + final_targets = final_targets_contain_srfs = NIL; + /* * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have * checked already, but let's make sure). @@ -1489,8 +1496,14 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, { /* No set operations, do regular planning */ PathTarget *sort_input_target; + List *sort_input_targets; + List *sort_input_targets_contain_srfs; PathTarget *grouping_target; + List *grouping_targets; + List *grouping_targets_contain_srfs; PathTarget *scanjoin_target; + List *scanjoin_targets; + List *scanjoin_targets_contain_srfs; bool have_grouping; AggClauseCosts agg_costs; WindowFuncLists *wflists = NULL; @@ -1735,8 +1748,50 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, scanjoin_target = grouping_target; /* - * Forcibly apply scan/join target to all the Paths for the scan/join - * rel. + * If there are any SRFs in the targetlist, we must separate each of + * these PathTargets into SRF-computing and SRF-free targets. Replace + * each of the named targets with a SRF-free version, and remember the + * list of additional projection steps we need to add afterwards. + */ + if (parse->hasTargetSRFs) + { + /* final_target doesn't recompute any SRFs in sort_input_target */ + split_pathtarget_at_srfs(root, final_target, sort_input_target, + &final_targets, + &final_targets_contain_srfs); + final_target = (PathTarget *) linitial(final_targets); + Assert(!linitial_int(final_targets_contain_srfs)); + /* likewise for sort_input_target vs. grouping_target */ + split_pathtarget_at_srfs(root, sort_input_target, grouping_target, + &sort_input_targets, + &sort_input_targets_contain_srfs); + sort_input_target = (PathTarget *) linitial(sort_input_targets); + Assert(!linitial_int(sort_input_targets_contain_srfs)); + /* likewise for grouping_target vs. scanjoin_target */ + split_pathtarget_at_srfs(root, grouping_target, scanjoin_target, + &grouping_targets, + &grouping_targets_contain_srfs); + grouping_target = (PathTarget *) linitial(grouping_targets); + Assert(!linitial_int(grouping_targets_contain_srfs)); + /* scanjoin_target will not have any SRFs precomputed for it */ + split_pathtarget_at_srfs(root, scanjoin_target, NULL, + &scanjoin_targets, + &scanjoin_targets_contain_srfs); + scanjoin_target = (PathTarget *) linitial(scanjoin_targets); + Assert(!linitial_int(scanjoin_targets_contain_srfs)); + } + else + { + /* initialize lists, just to keep compiler quiet */ + final_targets = final_targets_contain_srfs = NIL; + sort_input_targets = sort_input_targets_contain_srfs = NIL; + grouping_targets = grouping_targets_contain_srfs = NIL; + scanjoin_targets = scanjoin_targets_contain_srfs = NIL; + } + + /* + * Forcibly apply SRF-free scan/join target to all the Paths for the + * scan/join rel. * * In principle we should re-run set_cheapest() here to identify the * cheapest path, but it seems unlikely that adding the same tlist @@ -1807,6 +1862,12 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, current_rel->partial_pathlist = NIL; } + /* Now fix things up if scan/join target contains SRFs */ + if (parse->hasTargetSRFs) + adjust_paths_for_srfs(root, current_rel, + scanjoin_targets, + scanjoin_targets_contain_srfs); + /* * Save the various upper-rel PathTargets we just computed into * root->upper_targets[]. The core code doesn't use this, but it @@ -1831,6 +1892,11 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, &agg_costs, rollup_lists, rollup_groupclauses); + /* Fix things up if grouping_target contains SRFs */ + if (parse->hasTargetSRFs) + adjust_paths_for_srfs(root, current_rel, + grouping_targets, + grouping_targets_contain_srfs); } /* @@ -1846,6 +1912,11 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, tlist, wflists, activeWindows); + /* Fix things up if sort_input_target contains SRFs */ + if (parse->hasTargetSRFs) + adjust_paths_for_srfs(root, current_rel, + sort_input_targets, + sort_input_targets_contain_srfs); } /* @@ -1874,40 +1945,11 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, final_target, have_postponed_srfs ? -1.0 : limit_tuples); - } - - /* - * If there are set-returning functions in the tlist, scale up the output - * rowcounts of all surviving Paths to account for that. Note that if any - * SRFs appear in sorting or grouping columns, we'll have underestimated - * the numbers of rows passing through earlier steps; but that's such a - * weird usage that it doesn't seem worth greatly complicating matters to - * account for it. - */ - if (parse->hasTargetSRFs) - tlist_rows = tlist_returns_set_rows(tlist); - else - tlist_rows = 1; - - if (tlist_rows > 1) - { - foreach(lc, current_rel->pathlist) - { - Path *path = (Path *) lfirst(lc); - - /* - * We assume that execution costs of the tlist as such were - * already accounted for. However, it still seems appropriate to - * charge something more for the executor's general costs of - * processing the added tuples. The cost is probably less than - * cpu_tuple_cost, though, so we arbitrarily use half of that. - */ - path->total_cost += path->rows * (tlist_rows - 1) * - cpu_tuple_cost / 2; - - path->rows *= tlist_rows; - } - /* No need to run set_cheapest; we're keeping all paths anyway. */ + /* Fix things up if final_target contains SRFs */ + if (parse->hasTargetSRFs) + adjust_paths_for_srfs(root, current_rel, + final_targets, + final_targets_contain_srfs); } /* @@ -5102,6 +5144,109 @@ get_cheapest_fractional_path(RelOptInfo *rel, double tuple_fraction) } /* + * adjust_paths_for_srfs + * Fix up the Paths of the given upperrel to handle tSRFs properly. + * + * The executor can only handle set-returning functions that appear at the + * top level of the targetlist of a ProjectSet plan node. If we have any SRFs + * that are not at top level, we need to split up the evaluation into multiple + * plan levels in which each level satisfies this constraint. This function + * modifies each Path of an upperrel that (might) compute any SRFs in its + * output tlist to insert appropriate projection steps. + * + * The given targets and targets_contain_srfs lists are from + * split_pathtarget_at_srfs(). We assume the existing Paths emit the first + * target in targets. + */ +static void +adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel, + List *targets, List *targets_contain_srfs) +{ + ListCell *lc; + + Assert(list_length(targets) == list_length(targets_contain_srfs)); + Assert(!linitial_int(targets_contain_srfs)); + + /* If no SRFs appear at this plan level, nothing to do */ + if (list_length(targets) == 1) + return; + + /* + * Stack SRF-evaluation nodes atop each path for the rel. + * + * In principle we should re-run set_cheapest() here to identify the + * cheapest path, but it seems unlikely that adding the same tlist eval + * costs to all the paths would change that, so we don't bother. Instead, + * just assume that the cheapest-startup and cheapest-total paths remain + * so. (There should be no parameterized paths anymore, so we needn't + * worry about updating cheapest_parameterized_paths.) + */ + foreach(lc, rel->pathlist) + { + Path *subpath = (Path *) lfirst(lc); + Path *newpath = subpath; + ListCell *lc1, + *lc2; + + Assert(subpath->param_info == NULL); + forboth(lc1, targets, lc2, targets_contain_srfs) + { + PathTarget *thistarget = (PathTarget *) lfirst(lc1); + bool contains_srfs = (bool) lfirst_int(lc2); + + /* If this level doesn't contain SRFs, do regular projection */ + if (contains_srfs) + newpath = (Path *) create_set_projection_path(root, + rel, + newpath, + thistarget); + else + newpath = (Path *) apply_projection_to_path(root, + rel, + newpath, + thistarget); + } + lfirst(lc) = newpath; + if (subpath == rel->cheapest_startup_path) + rel->cheapest_startup_path = newpath; + if (subpath == rel->cheapest_total_path) + rel->cheapest_total_path = newpath; + } + + /* Likewise for partial paths, if any */ + foreach(lc, rel->partial_pathlist) + { + Path *subpath = (Path *) lfirst(lc); + Path *newpath = subpath; + ListCell *lc1, + *lc2; + + Assert(subpath->param_info == NULL); + forboth(lc1, targets, lc2, targets_contain_srfs) + { + PathTarget *thistarget = (PathTarget *) lfirst(lc1); + bool contains_srfs = (bool) lfirst_int(lc2); + + /* If this level doesn't contain SRFs, do regular projection */ + if (contains_srfs) + newpath = (Path *) create_set_projection_path(root, + rel, + newpath, + thistarget); + else + { + /* avoid apply_projection_to_path, in case of multiple refs */ + newpath = (Path *) create_projection_path(root, + rel, + newpath, + thistarget); + } + } + lfirst(lc) = newpath; + } +} + +/* * expression_planner * Perform planner's transformations on a standalone expression. * |