diff options
Diffstat (limited to 'src/backend/optimizer/util')
-rw-r--r-- | src/backend/optimizer/util/clauses.c | 104 | ||||
-rw-r--r-- | src/backend/optimizer/util/pathnode.c | 66 | ||||
-rw-r--r-- | src/backend/optimizer/util/tlist.c | 199 |
3 files changed, 278 insertions, 91 deletions
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 9e122e383d8..85ffa3afc7c 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -99,7 +99,6 @@ static bool contain_agg_clause_walker(Node *node, void *context); static bool get_agg_clause_costs_walker(Node *node, get_agg_clause_costs_context *context); static bool find_window_functions_walker(Node *node, WindowFuncLists *lists); -static bool expression_returns_set_rows_walker(Node *node, double *count); static bool contain_subplans_walker(Node *node, void *context); static bool contain_mutable_functions_walker(Node *node, void *context); static bool contain_volatile_functions_walker(Node *node, void *context); @@ -790,114 +789,37 @@ find_window_functions_walker(Node *node, WindowFuncLists *lists) /* * expression_returns_set_rows * Estimate the number of rows returned by a set-returning expression. - * The result is 1 if there are no set-returning functions. + * The result is 1 if it's not a set-returning expression. * - * We use the product of the rowcount estimates of all the functions in - * the given tree (this corresponds to the behavior of ExecMakeFunctionResult - * for nested set-returning functions). + * We should only examine the top-level function or operator; it used to be + * appropriate to recurse, but not anymore. (Even if there are more SRFs in + * the function's inputs, their multipliers are accounted for separately.) * * Note: keep this in sync with expression_returns_set() in nodes/nodeFuncs.c. */ double expression_returns_set_rows(Node *clause) { - double result = 1; - - (void) expression_returns_set_rows_walker(clause, &result); - return clamp_row_est(result); -} - -static bool -expression_returns_set_rows_walker(Node *node, double *count) -{ - if (node == NULL) - return false; - if (IsA(node, FuncExpr)) + if (clause == NULL) + return 1.0; + if (IsA(clause, FuncExpr)) { - FuncExpr *expr = (FuncExpr *) node; + FuncExpr *expr = (FuncExpr *) clause; if (expr->funcretset) - *count *= get_func_rows(expr->funcid); + return clamp_row_est(get_func_rows(expr->funcid)); } - if (IsA(node, OpExpr)) + if (IsA(clause, OpExpr)) { - OpExpr *expr = (OpExpr *) node; + OpExpr *expr = (OpExpr *) clause; if (expr->opretset) { set_opfuncid(expr); - *count *= get_func_rows(expr->opfuncid); + return clamp_row_est(get_func_rows(expr->opfuncid)); } } - - /* Avoid recursion for some cases that can't return a set */ - if (IsA(node, Aggref)) - return false; - if (IsA(node, WindowFunc)) - return false; - if (IsA(node, DistinctExpr)) - return false; - if (IsA(node, NullIfExpr)) - return false; - if (IsA(node, ScalarArrayOpExpr)) - return false; - if (IsA(node, BoolExpr)) - return false; - if (IsA(node, SubLink)) - return false; - if (IsA(node, SubPlan)) - return false; - if (IsA(node, AlternativeSubPlan)) - return false; - if (IsA(node, ArrayExpr)) - return false; - if (IsA(node, RowExpr)) - return false; - if (IsA(node, RowCompareExpr)) - return false; - if (IsA(node, CoalesceExpr)) - return false; - if (IsA(node, MinMaxExpr)) - return false; - if (IsA(node, XmlExpr)) - return false; - - return expression_tree_walker(node, expression_returns_set_rows_walker, - (void *) count); -} - -/* - * tlist_returns_set_rows - * Estimate the number of rows returned by a set-returning targetlist. - * The result is 1 if there are no set-returning functions. - * - * Here, the result is the largest rowcount estimate of any of the tlist's - * expressions, not the product as you would get from naively applying - * expression_returns_set_rows() to the whole tlist. The behavior actually - * implemented by ExecTargetList produces a number of rows equal to the least - * common multiple of the expression rowcounts, so that the product would be - * a worst-case estimate that is typically not realistic. Taking the max as - * we do here is a best-case estimate that might not be realistic either, - * but it's probably closer for typical usages. We don't try to compute the - * actual LCM because we're working with very approximate estimates, so their - * LCM would be unduly noisy. - */ -double -tlist_returns_set_rows(List *tlist) -{ - double result = 1; - ListCell *lc; - - foreach(lc, tlist) - { - TargetEntry *tle = (TargetEntry *) lfirst(lc); - double colresult; - - colresult = expression_returns_set_rows((Node *) tle->expr); - if (result < colresult) - result = colresult; - } - return result; + return 1.0; } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 3b7c56d3c7d..f440875ceb1 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -2320,6 +2320,72 @@ apply_projection_to_path(PlannerInfo *root, } /* + * create_set_projection_path + * Creates a pathnode that represents performing a projection that + * includes set-returning functions. + * + * 'rel' is the parent relation associated with the result + * 'subpath' is the path representing the source of data + * 'target' is the PathTarget to be computed + */ +ProjectSetPath * +create_set_projection_path(PlannerInfo *root, + RelOptInfo *rel, + Path *subpath, + PathTarget *target) +{ + ProjectSetPath *pathnode = makeNode(ProjectSetPath); + double tlist_rows; + ListCell *lc; + + pathnode->path.pathtype = T_ProjectSet; + pathnode->path.parent = rel; + pathnode->path.pathtarget = target; + /* For now, assume we are above any joins, so no parameterization */ + pathnode->path.param_info = NULL; + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel && + subpath->parallel_safe && + is_parallel_safe(root, (Node *) target->exprs); + pathnode->path.parallel_workers = subpath->parallel_workers; + /* Projection does not change the sort order XXX? */ + pathnode->path.pathkeys = subpath->pathkeys; + + pathnode->subpath = subpath; + + /* + * Estimate number of rows produced by SRFs for each row of input; if + * there's more than one in this node, use the maximum. + */ + tlist_rows = 1; + foreach(lc, target->exprs) + { + Node *node = (Node *) lfirst(lc); + double itemrows; + + itemrows = expression_returns_set_rows(node); + if (tlist_rows < itemrows) + tlist_rows = itemrows; + } + + /* + * In addition to the cost of evaluating the tlist, charge cpu_tuple_cost + * per input row, and half of cpu_tuple_cost for each added output row. + * This is slightly bizarre maybe, but it's what 9.6 did; we may revisit + * this estimate later. + */ + pathnode->path.rows = subpath->rows * tlist_rows; + pathnode->path.startup_cost = subpath->startup_cost + + target->cost.startup; + pathnode->path.total_cost = subpath->total_cost + + target->cost.startup + + (cpu_tuple_cost + target->cost.per_tuple) * subpath->rows + + (pathnode->path.rows - subpath->rows) * cpu_tuple_cost / 2; + + return pathnode; +} + +/* * create_sort_path * Creates a pathnode that represents performing an explicit sort. * diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index 45205a830f1..cca5db88e2f 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -16,9 +16,20 @@ #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" +#include "optimizer/cost.h" #include "optimizer/tlist.h" +typedef struct +{ + List *nextlevel_tlist; + bool nextlevel_contains_srfs; +} split_pathtarget_context; + +static bool split_pathtarget_walker(Node *node, + split_pathtarget_context *context); + + /***************************************************************************** * Target list creation and searching utilities *****************************************************************************/ @@ -759,3 +770,191 @@ apply_pathtarget_labeling_to_tlist(List *tlist, PathTarget *target) i++; } } + +/* + * split_pathtarget_at_srfs + * Split given PathTarget into multiple levels to position SRFs safely + * + * The executor can only handle set-returning functions that appear at the + * top level of the targetlist of a ProjectSet plan node. If we have any SRFs + * that are not at top level, we need to split up the evaluation into multiple + * plan levels in which each level satisfies this constraint. This function + * creates appropriate PathTarget(s) for each level. + * + * As an example, consider the tlist expression + * x + srf1(srf2(y + z)) + * This expression should appear as-is in the top PathTarget, but below that + * we must have a PathTarget containing + * x, srf1(srf2(y + z)) + * and below that, another PathTarget containing + * x, srf2(y + z) + * and below that, another PathTarget containing + * x, y, z + * When these tlists are processed by setrefs.c, subexpressions that match + * output expressions of the next lower tlist will be replaced by Vars, + * so that what the executor gets are tlists looking like + * Var1 + Var2 + * Var1, srf1(Var2) + * Var1, srf2(Var2 + Var3) + * x, y, z + * which satisfy the desired property. + * + * In some cases, a SRF has already been evaluated in some previous plan level + * and we shouldn't expand it again (that is, what we see in the target is + * already meant as a reference to a lower subexpression). So, don't expand + * any tlist expressions that appear in input_target, if that's not NULL. + * In principle we might need to consider matching subexpressions to + * input_target, but for now it's not necessary because only ORDER BY and + * GROUP BY expressions are at issue and those will look the same at both + * plan levels. + * + * The outputs of this function are two parallel lists, one a list of + * PathTargets and the other an integer list of bool flags indicating + * whether the corresponding PathTarget contains any top-level SRFs. + * The lists are given in the order they'd need to be evaluated in, with + * the "lowest" PathTarget first. So the last list entry is always the + * originally given PathTarget, and any entries before it indicate evaluation + * levels that must be inserted below it. The first list entry must not + * contain any SRFs, since it will typically be attached to a plan node + * that cannot evaluate SRFs. + * + * Note: using a list for the flags may seem like overkill, since there + * are only a few possible patterns for which levels contain SRFs. + * But this representation decouples callers from that knowledge. + */ +void +split_pathtarget_at_srfs(PlannerInfo *root, + PathTarget *target, PathTarget *input_target, + List **targets, List **targets_contain_srfs) +{ + /* Initialize output lists to empty; we prepend to them within loop */ + *targets = *targets_contain_srfs = NIL; + + /* Loop to consider each level of PathTarget we need */ + for (;;) + { + bool target_contains_srfs = false; + split_pathtarget_context context; + ListCell *lc; + + context.nextlevel_tlist = NIL; + context.nextlevel_contains_srfs = false; + + /* + * Scan the PathTarget looking for SRFs. Top-level SRFs are handled + * in this loop, ones lower down are found by split_pathtarget_walker. + */ + foreach(lc, target->exprs) + { + Node *node = (Node *) lfirst(lc); + + /* + * A tlist item that is just a reference to an expression already + * computed in input_target need not be evaluated here, so just + * make sure it's included in the next PathTarget. + */ + if (input_target && list_member(input_target->exprs, node)) + { + context.nextlevel_tlist = lappend(context.nextlevel_tlist, node); + continue; + } + + /* Else, we need to compute this expression. */ + if (IsA(node, FuncExpr) && + ((FuncExpr *) node)->funcretset) + { + /* Top-level SRF: it can be evaluated here */ + target_contains_srfs = true; + /* Recursively examine SRF's inputs */ + split_pathtarget_walker((Node *) ((FuncExpr *) node)->args, + &context); + } + else if (IsA(node, OpExpr) && + ((OpExpr *) node)->opretset) + { + /* Same as above, but for set-returning operator */ + target_contains_srfs = true; + split_pathtarget_walker((Node *) ((OpExpr *) node)->args, + &context); + } + else + { + /* Not a top-level SRF, so recursively examine expression */ + split_pathtarget_walker(node, &context); + } + } + + /* + * Prepend current target and associated flag to output lists. + */ + *targets = lcons(target, *targets); + *targets_contain_srfs = lcons_int(target_contains_srfs, + *targets_contain_srfs); + + /* + * Done if we found no SRFs anywhere in this target; the tentative + * tlist we built for the next level can be discarded. + */ + if (!target_contains_srfs && !context.nextlevel_contains_srfs) + break; + + /* + * Else build the next PathTarget down, and loop back to process it. + * Copy the subexpressions to make sure PathTargets don't share + * substructure (might be unnecessary, but be safe); and drop any + * duplicate entries in the sub-targetlist. + */ + target = create_empty_pathtarget(); + add_new_columns_to_pathtarget(target, + (List *) copyObject(context.nextlevel_tlist)); + set_pathtarget_cost_width(root, target); + } +} + +/* Recursively examine expressions for split_pathtarget_at_srfs */ +static bool +split_pathtarget_walker(Node *node, split_pathtarget_context *context) +{ + if (node == NULL) + return false; + if (IsA(node, Var) || + IsA(node, PlaceHolderVar) || + IsA(node, Aggref) || + IsA(node, GroupingFunc) || + IsA(node, WindowFunc)) + { + /* + * Pass these items down to the child plan level for evaluation. + * + * We assume that these constructs cannot contain any SRFs (if one + * does, there will be an executor failure from a misplaced SRF). + */ + context->nextlevel_tlist = lappend(context->nextlevel_tlist, node); + + /* Having done that, we need not examine their sub-structure */ + return false; + } + else if ((IsA(node, FuncExpr) && + ((FuncExpr *) node)->funcretset) || + (IsA(node, OpExpr) && + ((OpExpr *) node)->opretset)) + { + /* + * Pass SRFs down to the child plan level for evaluation, and mark + * that it contains SRFs. (We are not at top level of our own tlist, + * else this would have been picked up by split_pathtarget_at_srfs.) + */ + context->nextlevel_tlist = lappend(context->nextlevel_tlist, node); + context->nextlevel_contains_srfs = true; + + /* Inputs to the SRF need not be considered here, so we're done */ + return false; + } + + /* + * Otherwise, the node is evaluatable within the current PathTarget, so + * recurse to examine its inputs. + */ + return expression_tree_walker(node, split_pathtarget_walker, + (void *) context); +} |