aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/util
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/optimizer/util')
-rw-r--r--src/backend/optimizer/util/clauses.c104
-rw-r--r--src/backend/optimizer/util/pathnode.c66
-rw-r--r--src/backend/optimizer/util/tlist.c199
3 files changed, 278 insertions, 91 deletions
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 9e122e383d8..85ffa3afc7c 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -99,7 +99,6 @@ static bool contain_agg_clause_walker(Node *node, void *context);
static bool get_agg_clause_costs_walker(Node *node,
get_agg_clause_costs_context *context);
static bool find_window_functions_walker(Node *node, WindowFuncLists *lists);
-static bool expression_returns_set_rows_walker(Node *node, double *count);
static bool contain_subplans_walker(Node *node, void *context);
static bool contain_mutable_functions_walker(Node *node, void *context);
static bool contain_volatile_functions_walker(Node *node, void *context);
@@ -790,114 +789,37 @@ find_window_functions_walker(Node *node, WindowFuncLists *lists)
/*
* expression_returns_set_rows
* Estimate the number of rows returned by a set-returning expression.
- * The result is 1 if there are no set-returning functions.
+ * The result is 1 if it's not a set-returning expression.
*
- * We use the product of the rowcount estimates of all the functions in
- * the given tree (this corresponds to the behavior of ExecMakeFunctionResult
- * for nested set-returning functions).
+ * We should only examine the top-level function or operator; it used to be
+ * appropriate to recurse, but not anymore. (Even if there are more SRFs in
+ * the function's inputs, their multipliers are accounted for separately.)
*
* Note: keep this in sync with expression_returns_set() in nodes/nodeFuncs.c.
*/
double
expression_returns_set_rows(Node *clause)
{
- double result = 1;
-
- (void) expression_returns_set_rows_walker(clause, &result);
- return clamp_row_est(result);
-}
-
-static bool
-expression_returns_set_rows_walker(Node *node, double *count)
-{
- if (node == NULL)
- return false;
- if (IsA(node, FuncExpr))
+ if (clause == NULL)
+ return 1.0;
+ if (IsA(clause, FuncExpr))
{
- FuncExpr *expr = (FuncExpr *) node;
+ FuncExpr *expr = (FuncExpr *) clause;
if (expr->funcretset)
- *count *= get_func_rows(expr->funcid);
+ return clamp_row_est(get_func_rows(expr->funcid));
}
- if (IsA(node, OpExpr))
+ if (IsA(clause, OpExpr))
{
- OpExpr *expr = (OpExpr *) node;
+ OpExpr *expr = (OpExpr *) clause;
if (expr->opretset)
{
set_opfuncid(expr);
- *count *= get_func_rows(expr->opfuncid);
+ return clamp_row_est(get_func_rows(expr->opfuncid));
}
}
-
- /* Avoid recursion for some cases that can't return a set */
- if (IsA(node, Aggref))
- return false;
- if (IsA(node, WindowFunc))
- return false;
- if (IsA(node, DistinctExpr))
- return false;
- if (IsA(node, NullIfExpr))
- return false;
- if (IsA(node, ScalarArrayOpExpr))
- return false;
- if (IsA(node, BoolExpr))
- return false;
- if (IsA(node, SubLink))
- return false;
- if (IsA(node, SubPlan))
- return false;
- if (IsA(node, AlternativeSubPlan))
- return false;
- if (IsA(node, ArrayExpr))
- return false;
- if (IsA(node, RowExpr))
- return false;
- if (IsA(node, RowCompareExpr))
- return false;
- if (IsA(node, CoalesceExpr))
- return false;
- if (IsA(node, MinMaxExpr))
- return false;
- if (IsA(node, XmlExpr))
- return false;
-
- return expression_tree_walker(node, expression_returns_set_rows_walker,
- (void *) count);
-}
-
-/*
- * tlist_returns_set_rows
- * Estimate the number of rows returned by a set-returning targetlist.
- * The result is 1 if there are no set-returning functions.
- *
- * Here, the result is the largest rowcount estimate of any of the tlist's
- * expressions, not the product as you would get from naively applying
- * expression_returns_set_rows() to the whole tlist. The behavior actually
- * implemented by ExecTargetList produces a number of rows equal to the least
- * common multiple of the expression rowcounts, so that the product would be
- * a worst-case estimate that is typically not realistic. Taking the max as
- * we do here is a best-case estimate that might not be realistic either,
- * but it's probably closer for typical usages. We don't try to compute the
- * actual LCM because we're working with very approximate estimates, so their
- * LCM would be unduly noisy.
- */
-double
-tlist_returns_set_rows(List *tlist)
-{
- double result = 1;
- ListCell *lc;
-
- foreach(lc, tlist)
- {
- TargetEntry *tle = (TargetEntry *) lfirst(lc);
- double colresult;
-
- colresult = expression_returns_set_rows((Node *) tle->expr);
- if (result < colresult)
- result = colresult;
- }
- return result;
+ return 1.0;
}
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 3b7c56d3c7d..f440875ceb1 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2320,6 +2320,72 @@ apply_projection_to_path(PlannerInfo *root,
}
/*
+ * create_set_projection_path
+ * Creates a pathnode that represents performing a projection that
+ * includes set-returning functions.
+ *
+ * 'rel' is the parent relation associated with the result
+ * 'subpath' is the path representing the source of data
+ * 'target' is the PathTarget to be computed
+ */
+ProjectSetPath *
+create_set_projection_path(PlannerInfo *root,
+ RelOptInfo *rel,
+ Path *subpath,
+ PathTarget *target)
+{
+ ProjectSetPath *pathnode = makeNode(ProjectSetPath);
+ double tlist_rows;
+ ListCell *lc;
+
+ pathnode->path.pathtype = T_ProjectSet;
+ pathnode->path.parent = rel;
+ pathnode->path.pathtarget = target;
+ /* For now, assume we are above any joins, so no parameterization */
+ pathnode->path.param_info = NULL;
+ pathnode->path.parallel_aware = false;
+ pathnode->path.parallel_safe = rel->consider_parallel &&
+ subpath->parallel_safe &&
+ is_parallel_safe(root, (Node *) target->exprs);
+ pathnode->path.parallel_workers = subpath->parallel_workers;
+ /* Projection does not change the sort order XXX? */
+ pathnode->path.pathkeys = subpath->pathkeys;
+
+ pathnode->subpath = subpath;
+
+ /*
+ * Estimate number of rows produced by SRFs for each row of input; if
+ * there's more than one in this node, use the maximum.
+ */
+ tlist_rows = 1;
+ foreach(lc, target->exprs)
+ {
+ Node *node = (Node *) lfirst(lc);
+ double itemrows;
+
+ itemrows = expression_returns_set_rows(node);
+ if (tlist_rows < itemrows)
+ tlist_rows = itemrows;
+ }
+
+ /*
+ * In addition to the cost of evaluating the tlist, charge cpu_tuple_cost
+ * per input row, and half of cpu_tuple_cost for each added output row.
+ * This is slightly bizarre maybe, but it's what 9.6 did; we may revisit
+ * this estimate later.
+ */
+ pathnode->path.rows = subpath->rows * tlist_rows;
+ pathnode->path.startup_cost = subpath->startup_cost +
+ target->cost.startup;
+ pathnode->path.total_cost = subpath->total_cost +
+ target->cost.startup +
+ (cpu_tuple_cost + target->cost.per_tuple) * subpath->rows +
+ (pathnode->path.rows - subpath->rows) * cpu_tuple_cost / 2;
+
+ return pathnode;
+}
+
+/*
* create_sort_path
* Creates a pathnode that represents performing an explicit sort.
*
diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c
index 45205a830f1..cca5db88e2f 100644
--- a/src/backend/optimizer/util/tlist.c
+++ b/src/backend/optimizer/util/tlist.c
@@ -16,9 +16,20 @@
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
+#include "optimizer/cost.h"
#include "optimizer/tlist.h"
+typedef struct
+{
+ List *nextlevel_tlist;
+ bool nextlevel_contains_srfs;
+} split_pathtarget_context;
+
+static bool split_pathtarget_walker(Node *node,
+ split_pathtarget_context *context);
+
+
/*****************************************************************************
* Target list creation and searching utilities
*****************************************************************************/
@@ -759,3 +770,191 @@ apply_pathtarget_labeling_to_tlist(List *tlist, PathTarget *target)
i++;
}
}
+
+/*
+ * split_pathtarget_at_srfs
+ * Split given PathTarget into multiple levels to position SRFs safely
+ *
+ * The executor can only handle set-returning functions that appear at the
+ * top level of the targetlist of a ProjectSet plan node. If we have any SRFs
+ * that are not at top level, we need to split up the evaluation into multiple
+ * plan levels in which each level satisfies this constraint. This function
+ * creates appropriate PathTarget(s) for each level.
+ *
+ * As an example, consider the tlist expression
+ * x + srf1(srf2(y + z))
+ * This expression should appear as-is in the top PathTarget, but below that
+ * we must have a PathTarget containing
+ * x, srf1(srf2(y + z))
+ * and below that, another PathTarget containing
+ * x, srf2(y + z)
+ * and below that, another PathTarget containing
+ * x, y, z
+ * When these tlists are processed by setrefs.c, subexpressions that match
+ * output expressions of the next lower tlist will be replaced by Vars,
+ * so that what the executor gets are tlists looking like
+ * Var1 + Var2
+ * Var1, srf1(Var2)
+ * Var1, srf2(Var2 + Var3)
+ * x, y, z
+ * which satisfy the desired property.
+ *
+ * In some cases, a SRF has already been evaluated in some previous plan level
+ * and we shouldn't expand it again (that is, what we see in the target is
+ * already meant as a reference to a lower subexpression). So, don't expand
+ * any tlist expressions that appear in input_target, if that's not NULL.
+ * In principle we might need to consider matching subexpressions to
+ * input_target, but for now it's not necessary because only ORDER BY and
+ * GROUP BY expressions are at issue and those will look the same at both
+ * plan levels.
+ *
+ * The outputs of this function are two parallel lists, one a list of
+ * PathTargets and the other an integer list of bool flags indicating
+ * whether the corresponding PathTarget contains any top-level SRFs.
+ * The lists are given in the order they'd need to be evaluated in, with
+ * the "lowest" PathTarget first. So the last list entry is always the
+ * originally given PathTarget, and any entries before it indicate evaluation
+ * levels that must be inserted below it. The first list entry must not
+ * contain any SRFs, since it will typically be attached to a plan node
+ * that cannot evaluate SRFs.
+ *
+ * Note: using a list for the flags may seem like overkill, since there
+ * are only a few possible patterns for which levels contain SRFs.
+ * But this representation decouples callers from that knowledge.
+ */
+void
+split_pathtarget_at_srfs(PlannerInfo *root,
+ PathTarget *target, PathTarget *input_target,
+ List **targets, List **targets_contain_srfs)
+{
+ /* Initialize output lists to empty; we prepend to them within loop */
+ *targets = *targets_contain_srfs = NIL;
+
+ /* Loop to consider each level of PathTarget we need */
+ for (;;)
+ {
+ bool target_contains_srfs = false;
+ split_pathtarget_context context;
+ ListCell *lc;
+
+ context.nextlevel_tlist = NIL;
+ context.nextlevel_contains_srfs = false;
+
+ /*
+ * Scan the PathTarget looking for SRFs. Top-level SRFs are handled
+ * in this loop, ones lower down are found by split_pathtarget_walker.
+ */
+ foreach(lc, target->exprs)
+ {
+ Node *node = (Node *) lfirst(lc);
+
+ /*
+ * A tlist item that is just a reference to an expression already
+ * computed in input_target need not be evaluated here, so just
+ * make sure it's included in the next PathTarget.
+ */
+ if (input_target && list_member(input_target->exprs, node))
+ {
+ context.nextlevel_tlist = lappend(context.nextlevel_tlist, node);
+ continue;
+ }
+
+ /* Else, we need to compute this expression. */
+ if (IsA(node, FuncExpr) &&
+ ((FuncExpr *) node)->funcretset)
+ {
+ /* Top-level SRF: it can be evaluated here */
+ target_contains_srfs = true;
+ /* Recursively examine SRF's inputs */
+ split_pathtarget_walker((Node *) ((FuncExpr *) node)->args,
+ &context);
+ }
+ else if (IsA(node, OpExpr) &&
+ ((OpExpr *) node)->opretset)
+ {
+ /* Same as above, but for set-returning operator */
+ target_contains_srfs = true;
+ split_pathtarget_walker((Node *) ((OpExpr *) node)->args,
+ &context);
+ }
+ else
+ {
+ /* Not a top-level SRF, so recursively examine expression */
+ split_pathtarget_walker(node, &context);
+ }
+ }
+
+ /*
+ * Prepend current target and associated flag to output lists.
+ */
+ *targets = lcons(target, *targets);
+ *targets_contain_srfs = lcons_int(target_contains_srfs,
+ *targets_contain_srfs);
+
+ /*
+ * Done if we found no SRFs anywhere in this target; the tentative
+ * tlist we built for the next level can be discarded.
+ */
+ if (!target_contains_srfs && !context.nextlevel_contains_srfs)
+ break;
+
+ /*
+ * Else build the next PathTarget down, and loop back to process it.
+ * Copy the subexpressions to make sure PathTargets don't share
+ * substructure (might be unnecessary, but be safe); and drop any
+ * duplicate entries in the sub-targetlist.
+ */
+ target = create_empty_pathtarget();
+ add_new_columns_to_pathtarget(target,
+ (List *) copyObject(context.nextlevel_tlist));
+ set_pathtarget_cost_width(root, target);
+ }
+}
+
+/* Recursively examine expressions for split_pathtarget_at_srfs */
+static bool
+split_pathtarget_walker(Node *node, split_pathtarget_context *context)
+{
+ if (node == NULL)
+ return false;
+ if (IsA(node, Var) ||
+ IsA(node, PlaceHolderVar) ||
+ IsA(node, Aggref) ||
+ IsA(node, GroupingFunc) ||
+ IsA(node, WindowFunc))
+ {
+ /*
+ * Pass these items down to the child plan level for evaluation.
+ *
+ * We assume that these constructs cannot contain any SRFs (if one
+ * does, there will be an executor failure from a misplaced SRF).
+ */
+ context->nextlevel_tlist = lappend(context->nextlevel_tlist, node);
+
+ /* Having done that, we need not examine their sub-structure */
+ return false;
+ }
+ else if ((IsA(node, FuncExpr) &&
+ ((FuncExpr *) node)->funcretset) ||
+ (IsA(node, OpExpr) &&
+ ((OpExpr *) node)->opretset))
+ {
+ /*
+ * Pass SRFs down to the child plan level for evaluation, and mark
+ * that it contains SRFs. (We are not at top level of our own tlist,
+ * else this would have been picked up by split_pathtarget_at_srfs.)
+ */
+ context->nextlevel_tlist = lappend(context->nextlevel_tlist, node);
+ context->nextlevel_contains_srfs = true;
+
+ /* Inputs to the SRF need not be considered here, so we're done */
+ return false;
+ }
+
+ /*
+ * Otherwise, the node is evaluatable within the current PathTarget, so
+ * recurse to examine its inputs.
+ */
+ return expression_tree_walker(node, split_pathtarget_walker,
+ (void *) context);
+}