Support multi-argument UNNEST(), and TABLE() syntax for multiple functions.

This patch adds the ability to write TABLE( function1(), function2(), ...) as a single FROM-clause entry. The result is the concatenation of the first row from each function, followed by the second row from each function, etc; with NULLs inserted if any function produces fewer rows than others. This is believed to be a much more useful behavior than what Postgres currently does with multiple SRFs in a SELECT list. This syntax also provides a reasonable way to combine use of column definition lists with WITH ORDINALITY: put the column definition list inside TABLE(), where it's clear that it doesn't control the ordinality column as well. Also implement SQL-compliant multiple-argument UNNEST(), by turning UNNEST(a,b,c) into TABLE(unnest(a), unnest(b), unnest(c)). The SQL standard specifies TABLE() with only a single function, not multiple functions, and it seems to require an implicit UNNEST() which is not what this patch does. There may be something wrong with that reading of the spec, though, because if it's right then the spec's TABLE() is just a pointless alternative spelling of UNNEST(). After further review of that, we might choose to adopt a different syntax for what this patch does, but in any case this functionality seems clearly worthwhile. Andrew Gierth, reviewed by Zoltán Böszörményi and Heikki Linnakangas, and significantly revised by me
author: Tom Lane <tgl@sss.pgh.pa.us> 2013-11-21 19:37:02 -0500
committer: Tom Lane <tgl@sss.pgh.pa.us> 2013-11-21 19:37:20 -0500
commit: 784e762e886e6f72f548da86a27cd2ead87dbd1c (patch)
tree: 9c21fc1545c96a655ec4591e1ba3c8d99cdfccf8 /src/backend/optimizer/path
parent: 38f432898131270e5b64245786cb67f322538bae (diff)
download: postgresql-784e762e886e6f72f548da86a27cd2ead87dbd1c.tar.gz
postgresql-784e762e886e6f72f548da86a27cd2ead87dbd1c.zip
3 files changed, 118 insertions, 6 deletions
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index bfd3809a007..96fe50f0b27 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -18,6 +18,7 @@
 #include <math.h>
 
 #include "catalog/pg_class.h"
+#include "catalog/pg_operator.h"
 #include "foreign/fdwapi.h"
 #include "nodes/nodeFuncs.h"
 #ifdef OPTIMIZER_DEBUG
@@ -1258,6 +1259,7 @@ static void
 set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
 {
 	Relids		required_outer;
+	List	   *pathkeys = NIL;
 
 	/*
 	 * We don't support pushing join clauses into the quals of a function
@@ -1266,8 +1268,55 @@ set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
 	 */
 	required_outer = rel->lateral_relids;
 
+	/*
+	 * The result is considered unordered unless ORDINALITY was used, in which
+	 * case it is ordered by the ordinal column (the last one).  See if we
+	 * care, by checking for uses of that Var in equivalence classes.
+	 */
+	if (rte->funcordinality)
+	{
+		AttrNumber	ordattno = rel->max_attr;
+		Var		   *var = NULL;
+		ListCell   *lc;
+
+		/*
+		 * Is there a Var for it in reltargetlist?	If not, the query did not
+		 * reference the ordinality column, or at least not in any way that
+		 * would be interesting for sorting.
+		 */
+		foreach(lc, rel->reltargetlist)
+		{
+			Var		   *node = (Var *) lfirst(lc);
+
+			/* checking varno/varlevelsup is just paranoia */
+			if (IsA(node, Var) &&
+				node->varattno == ordattno &&
+				node->varno == rel->relid &&
+				node->varlevelsup == 0)
+			{
+				var = node;
+				break;
+			}
+		}
+
+		/*
+		 * Try to build pathkeys for this Var with int8 sorting.  We tell
+		 * build_expression_pathkey not to build any new equivalence class; if
+		 * the Var isn't already mentioned in some EC, it means that nothing
+		 * cares about the ordering.
+		 */
+		if (var)
+			pathkeys = build_expression_pathkey(root,
+												(Expr *) var,
+												NULL,	/* below outer joins */
+												Int8LessOperator,
+												rel->relids,
+												false);
+	}
+
 	/* Generate appropriate path */
-	add_path(rel, create_functionscan_path(root, rel, required_outer));
+	add_path(rel, create_functionscan_path(root, rel,
+										   pathkeys, required_outer));
 
 	/* Select cheapest path (pretty easy in this case...) */
 	set_cheapest(rel);
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index e7f8cec0fed..50f08521bfe 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1076,9 +1076,9 @@ cost_functionscan(Path *path, PlannerInfo *root,
 		path->rows = baserel->rows;
 
 	/*
-	 * Estimate costs of executing the function expression.
+	 * Estimate costs of executing the function expression(s).
 	 *
-	 * Currently, nodeFunctionscan.c always executes the function to
+	 * Currently, nodeFunctionscan.c always executes the functions to
 	 * completion before returning any rows, and caches the results in a
 	 * tuplestore.	So the function eval cost is all startup cost, and per-row
 	 * costs are minimal.
@@ -1088,7 +1088,7 @@ cost_functionscan(Path *path, PlannerInfo *root,
 	 * estimates for functions tend to be, there's not a lot of point in that
 	 * refinement right now.
 	 */
-	cost_qual_eval_node(&exprcost, rte->funcexpr, root);
+	cost_qual_eval_node(&exprcost, (Node *) rte->functions, root);
 
 	startup_cost += exprcost.startup + exprcost.per_tuple;
 
@@ -3845,14 +3845,26 @@ void
 set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel)
 {
 	RangeTblEntry *rte;
+	ListCell   *lc;
 
 	/* Should only be applied to base relations that are functions */
 	Assert(rel->relid > 0);
 	rte = planner_rt_fetch(rel->relid, root);
 	Assert(rte->rtekind == RTE_FUNCTION);
 
-	/* Estimate number of rows the function itself will return */
-	rel->tuples = expression_returns_set_rows(rte->funcexpr);
+	/*
+	 * Estimate number of rows the functions will return. The rowcount of the
+	 * node is that of the largest function result.
+	 */
+	rel->tuples = 0;
+	foreach(lc, rte->functions)
+	{
+		RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc);
+		double		ntup = expression_returns_set_rows(rtfunc->funcexpr);
+
+		if (ntup > rel->tuples)
+			rel->tuples = ntup;
+	}
 
 	/* Now estimate number of output rows, etc */
 	set_baserel_size_estimates(root, rel);
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 032b2cdc133..9c8ede658f4 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -502,6 +502,57 @@ build_index_pathkeys(PlannerInfo *root,
 }
 
 /*
+ * build_expression_pathkey
+ *	  Build a pathkeys list that describes an ordering by a single expression
+ *	  using the given sort operator.
+ *
+ * expr, nullable_relids, and rel are as for make_pathkey_from_sortinfo.
+ * We induce the other arguments assuming default sort order for the operator.
+ *
+ * Similarly to make_pathkey_from_sortinfo, the result is NIL if create_it
+ * is false and the expression isn't already in some EquivalenceClass.
+ */
+List *
+build_expression_pathkey(PlannerInfo *root,
+						 Expr *expr,
+						 Relids nullable_relids,
+						 Oid opno,
+						 Relids rel,
+						 bool create_it)
+{
+	List	   *pathkeys;
+	Oid			opfamily,
+				opcintype;
+	int16		strategy;
+	PathKey    *cpathkey;
+
+	/* Find the operator in pg_amop --- failure shouldn't happen */
+	if (!get_ordering_op_properties(opno,
+									&opfamily, &opcintype, &strategy))
+		elog(ERROR, "operator %u is not a valid ordering operator",
+			 opno);
+
+	cpathkey = make_pathkey_from_sortinfo(root,
+										  expr,
+										  nullable_relids,
+										  opfamily,
+										  opcintype,
+										  exprCollation((Node *) expr),
+									   (strategy == BTGreaterStrategyNumber),
+									   (strategy == BTGreaterStrategyNumber),
+										  0,
+										  rel,
+										  create_it);
+
+	if (cpathkey)
+		pathkeys = list_make1(cpathkey);
+	else
+		pathkeys = NIL;
+
+	return pathkeys;
+}
+
+/*
  * convert_subquery_pathkeys
  *	  Build a pathkeys list that describes the ordering of a subquery's
  *	  result, in the terms of the outer query.	This is essentially a
author	Tom Lane <tgl@sss.pgh.pa.us>	2013-11-21 19:37:02 -0500
committer	Tom Lane <tgl@sss.pgh.pa.us>	2013-11-21 19:37:20 -0500
commit	784e762e886e6f72f548da86a27cd2ead87dbd1c (patch)
tree	9c21fc1545c96a655ec4591e1ba3c8d99cdfccf8 /src/backend/optimizer/path
parent	38f432898131270e5b64245786cb67f322538bae (diff)
download	postgresql-784e762e886e6f72f548da86a27cd2ead87dbd1c.tar.gz postgresql-784e762e886e6f72f548da86a27cd2ead87dbd1c.zip