diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2013-11-21 19:37:02 -0500 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2013-11-21 19:37:20 -0500 |
commit | 784e762e886e6f72f548da86a27cd2ead87dbd1c (patch) | |
tree | 9c21fc1545c96a655ec4591e1ba3c8d99cdfccf8 /src/backend/optimizer/path | |
parent | 38f432898131270e5b64245786cb67f322538bae (diff) | |
download | postgresql-784e762e886e6f72f548da86a27cd2ead87dbd1c.tar.gz postgresql-784e762e886e6f72f548da86a27cd2ead87dbd1c.zip |
Support multi-argument UNNEST(), and TABLE() syntax for multiple functions.
This patch adds the ability to write TABLE( function1(), function2(), ...)
as a single FROM-clause entry. The result is the concatenation of the
first row from each function, followed by the second row from each
function, etc; with NULLs inserted if any function produces fewer rows than
others. This is believed to be a much more useful behavior than what
Postgres currently does with multiple SRFs in a SELECT list.
This syntax also provides a reasonable way to combine use of column
definition lists with WITH ORDINALITY: put the column definition list
inside TABLE(), where it's clear that it doesn't control the ordinality
column as well.
Also implement SQL-compliant multiple-argument UNNEST(), by turning
UNNEST(a,b,c) into TABLE(unnest(a), unnest(b), unnest(c)).
The SQL standard specifies TABLE() with only a single function, not
multiple functions, and it seems to require an implicit UNNEST() which is
not what this patch does. There may be something wrong with that reading
of the spec, though, because if it's right then the spec's TABLE() is just
a pointless alternative spelling of UNNEST(). After further review of
that, we might choose to adopt a different syntax for what this patch does,
but in any case this functionality seems clearly worthwhile.
Andrew Gierth, reviewed by Zoltán Böszörményi and Heikki Linnakangas, and
significantly revised by me
Diffstat (limited to 'src/backend/optimizer/path')
-rw-r--r-- | src/backend/optimizer/path/allpaths.c | 51 | ||||
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 22 | ||||
-rw-r--r-- | src/backend/optimizer/path/pathkeys.c | 51 |
3 files changed, 118 insertions, 6 deletions
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index bfd3809a007..96fe50f0b27 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -18,6 +18,7 @@ #include <math.h> #include "catalog/pg_class.h" +#include "catalog/pg_operator.h" #include "foreign/fdwapi.h" #include "nodes/nodeFuncs.h" #ifdef OPTIMIZER_DEBUG @@ -1258,6 +1259,7 @@ static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { Relids required_outer; + List *pathkeys = NIL; /* * We don't support pushing join clauses into the quals of a function @@ -1266,8 +1268,55 @@ set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) */ required_outer = rel->lateral_relids; + /* + * The result is considered unordered unless ORDINALITY was used, in which + * case it is ordered by the ordinal column (the last one). See if we + * care, by checking for uses of that Var in equivalence classes. + */ + if (rte->funcordinality) + { + AttrNumber ordattno = rel->max_attr; + Var *var = NULL; + ListCell *lc; + + /* + * Is there a Var for it in reltargetlist? If not, the query did not + * reference the ordinality column, or at least not in any way that + * would be interesting for sorting. + */ + foreach(lc, rel->reltargetlist) + { + Var *node = (Var *) lfirst(lc); + + /* checking varno/varlevelsup is just paranoia */ + if (IsA(node, Var) && + node->varattno == ordattno && + node->varno == rel->relid && + node->varlevelsup == 0) + { + var = node; + break; + } + } + + /* + * Try to build pathkeys for this Var with int8 sorting. We tell + * build_expression_pathkey not to build any new equivalence class; if + * the Var isn't already mentioned in some EC, it means that nothing + * cares about the ordering. + */ + if (var) + pathkeys = build_expression_pathkey(root, + (Expr *) var, + NULL, /* below outer joins */ + Int8LessOperator, + rel->relids, + false); + } + /* Generate appropriate path */ - add_path(rel, create_functionscan_path(root, rel, required_outer)); + add_path(rel, create_functionscan_path(root, rel, + pathkeys, required_outer)); /* Select cheapest path (pretty easy in this case...) */ set_cheapest(rel); diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index e7f8cec0fed..50f08521bfe 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1076,9 +1076,9 @@ cost_functionscan(Path *path, PlannerInfo *root, path->rows = baserel->rows; /* - * Estimate costs of executing the function expression. + * Estimate costs of executing the function expression(s). * - * Currently, nodeFunctionscan.c always executes the function to + * Currently, nodeFunctionscan.c always executes the functions to * completion before returning any rows, and caches the results in a * tuplestore. So the function eval cost is all startup cost, and per-row * costs are minimal. @@ -1088,7 +1088,7 @@ cost_functionscan(Path *path, PlannerInfo *root, * estimates for functions tend to be, there's not a lot of point in that * refinement right now. */ - cost_qual_eval_node(&exprcost, rte->funcexpr, root); + cost_qual_eval_node(&exprcost, (Node *) rte->functions, root); startup_cost += exprcost.startup + exprcost.per_tuple; @@ -3845,14 +3845,26 @@ void set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel) { RangeTblEntry *rte; + ListCell *lc; /* Should only be applied to base relations that are functions */ Assert(rel->relid > 0); rte = planner_rt_fetch(rel->relid, root); Assert(rte->rtekind == RTE_FUNCTION); - /* Estimate number of rows the function itself will return */ - rel->tuples = expression_returns_set_rows(rte->funcexpr); + /* + * Estimate number of rows the functions will return. The rowcount of the + * node is that of the largest function result. + */ + rel->tuples = 0; + foreach(lc, rte->functions) + { + RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc); + double ntup = expression_returns_set_rows(rtfunc->funcexpr); + + if (ntup > rel->tuples) + rel->tuples = ntup; + } /* Now estimate number of output rows, etc */ set_baserel_size_estimates(root, rel); diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 032b2cdc133..9c8ede658f4 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -502,6 +502,57 @@ build_index_pathkeys(PlannerInfo *root, } /* + * build_expression_pathkey + * Build a pathkeys list that describes an ordering by a single expression + * using the given sort operator. + * + * expr, nullable_relids, and rel are as for make_pathkey_from_sortinfo. + * We induce the other arguments assuming default sort order for the operator. + * + * Similarly to make_pathkey_from_sortinfo, the result is NIL if create_it + * is false and the expression isn't already in some EquivalenceClass. + */ +List * +build_expression_pathkey(PlannerInfo *root, + Expr *expr, + Relids nullable_relids, + Oid opno, + Relids rel, + bool create_it) +{ + List *pathkeys; + Oid opfamily, + opcintype; + int16 strategy; + PathKey *cpathkey; + + /* Find the operator in pg_amop --- failure shouldn't happen */ + if (!get_ordering_op_properties(opno, + &opfamily, &opcintype, &strategy)) + elog(ERROR, "operator %u is not a valid ordering operator", + opno); + + cpathkey = make_pathkey_from_sortinfo(root, + expr, + nullable_relids, + opfamily, + opcintype, + exprCollation((Node *) expr), + (strategy == BTGreaterStrategyNumber), + (strategy == BTGreaterStrategyNumber), + 0, + rel, + create_it); + + if (cpathkey) + pathkeys = list_make1(cpathkey); + else + pathkeys = NIL; + + return pathkeys; +} + +/* * convert_subquery_pathkeys * Build a pathkeys list that describes the ordering of a subquery's * result, in the terms of the outer query. This is essentially a |