aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2012-09-13 11:31:40 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2012-09-13 11:32:25 -0400
commita20993608a65b9896b4c05cb7061bc60d6f1840a (patch)
treece1d91f8b25a661bd28ef85f19648f0bbd3a9c27 /src
parent6b896f511f8dbeac9f09cc54c6cb62bdff25e501 (diff)
downloadpostgresql-a20993608a65b9896b4c05cb7061bc60d6f1840a.tar.gz
postgresql-a20993608a65b9896b4c05cb7061bc60d6f1840a.zip
Fix case of window function + aggregate + GROUP BY expression.
In commit 1bc16a946008a7cbb33a9a06a7c6765a807d7f59 I added a minor optimization to drop the component variables of a GROUP BY expression from the target list computed at the aggregation level of a query, if those Vars weren't referenced elsewhere in the tlist. However, I overlooked that the window-function planning code would deconstruct such expressions and thus need to have access to their component variables. Fix it to not do that. While at it, I removed the distinction between volatile and nonvolatile window partition/order expressions: the code now computes all of them at the aggregation level. This saves a relatively expensive check for volatility, and it's unclear that the resulting plan isn't better anyway. Per bug #7535 from Louis-David Mitterrand. Back-patch to 9.2.
Diffstat (limited to 'src')
-rw-r--r--src/backend/optimizer/plan/planner.c163
-rw-r--r--src/test/regress/expected/window.out12
-rw-r--r--src/test/regress/sql/window.sql6
3 files changed, 137 insertions, 44 deletions
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 385e64647ea..b61005f98fe 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -90,8 +90,8 @@ static void locate_grouping_columns(PlannerInfo *root,
AttrNumber *groupColIdx);
static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
-static List *add_volatile_sort_exprs(List *window_tlist, List *tlist,
- List *activeWindows);
+static List *make_windowInputTargetList(PlannerInfo *root,
+ List *tlist, List *activeWindows);
static List *make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
List *tlist, bool canonicalize);
static void get_column_info_for_window(PlannerInfo *root, WindowClause *wc,
@@ -1555,31 +1555,25 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
/*
* The "base" targetlist for all steps of the windowing process is
- * a flat tlist of all Vars and Aggs needed in the result. (In
+ * a flat tlist of all Vars and Aggs needed in the result. (In
* some cases we wouldn't need to propagate all of these all the
* way to the top, since they might only be needed as inputs to
* WindowFuncs. It's probably not worth trying to optimize that
- * though.) We also need any volatile sort expressions, because
- * make_sort_from_pathkeys won't add those on its own, and anyway
- * we want them evaluated only once at the bottom of the stack. As
- * we climb up the stack, we add outputs for the WindowFuncs
- * computed at each level. Also, each input tlist has to present
- * all the columns needed to sort the data for the next WindowAgg
- * step. That's handled internally by make_sort_from_pathkeys,
- * but we need the copyObject steps here to ensure that each plan
- * node has a separately modifiable tlist.
- *
- * Note: it's essential here to use PVC_INCLUDE_AGGREGATES so that
- * Vars mentioned only in aggregate expressions aren't pulled out
- * as separate targetlist entries. Otherwise we could be putting
- * ungrouped Vars directly into an Agg node's tlist, resulting in
- * undefined behavior.
+ * though.) We also add window partitioning and sorting
+ * expressions to the base tlist, to ensure they're computed only
+ * once at the bottom of the stack (that's critical for volatile
+ * functions). As we climb up the stack, we'll add outputs for
+ * the WindowFuncs computed at each level.
+ */
+ window_tlist = make_windowInputTargetList(root,
+ tlist,
+ activeWindows);
+
+ /*
+ * The copyObject steps here are needed to ensure that each plan
+ * node has a separately modifiable tlist. (XXX wouldn't a
+ * shallow list copy do for that?)
*/
- window_tlist = flatten_tlist(tlist,
- PVC_INCLUDE_AGGREGATES,
- PVC_INCLUDE_PLACEHOLDERS);
- window_tlist = add_volatile_sort_exprs(window_tlist, tlist,
- activeWindows);
result_plan->targetlist = (List *) copyObject(window_tlist);
foreach(l, activeWindows)
@@ -1603,9 +1597,11 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
* really have to sort. Even when no explicit sort is needed,
* we need to have suitable resjunk items added to the input
* plan's tlist for any partitioning or ordering columns that
- * aren't plain Vars. Furthermore, this way we can use
- * existing infrastructure to identify which input columns are
- * the interesting ones.
+ * aren't plain Vars. (In theory, make_windowInputTargetList
+ * should have provided all such columns, but let's not assume
+ * that here.) Furthermore, this way we can use existing
+ * infrastructure to identify which input columns are the
+ * interesting ones.
*/
if (window_pathkeys)
{
@@ -3006,18 +3002,57 @@ select_active_windows(PlannerInfo *root, WindowFuncLists *wflists)
}
/*
- * add_volatile_sort_exprs
- * Identify any volatile sort/group expressions used by the active
- * windows, and add them to window_tlist if not already present.
- * Return the modified window_tlist.
+ * make_windowInputTargetList
+ * Generate appropriate target list for initial input to WindowAgg nodes.
+ *
+ * When grouping_planner inserts one or more WindowAgg nodes into the plan,
+ * this function computes the initial target list to be computed by the node
+ * just below the first WindowAgg. This list must contain all values needed
+ * to evaluate the window functions, compute the final target list, and
+ * perform any required final sort step. If multiple WindowAggs are needed,
+ * each intermediate one adds its window function results onto this tlist;
+ * only the topmost WindowAgg computes the actual desired target list.
+ *
+ * This function is much like make_subplanTargetList, though not quite enough
+ * like it to share code. As in that function, we flatten most expressions
+ * into their component variables. But we do not want to flatten window
+ * PARTITION BY/ORDER BY clauses, since that might result in multiple
+ * evaluations of them, which would be bad (possibly even resulting in
+ * inconsistent answers, if they contain volatile functions). Also, we must
+ * not flatten GROUP BY clauses that were left unflattened by
+ * make_subplanTargetList, because we may no longer have access to the
+ * individual Vars in them.
+ *
+ * Another key difference from make_subplanTargetList is that we don't flatten
+ * Aggref expressions, since those are to be computed below the window
+ * functions and just referenced like Vars above that.
+ *
+ * 'tlist' is the query's final target list.
+ * 'activeWindows' is the list of active windows previously identified by
+ * select_active_windows.
+ *
+ * The result is the targetlist to be computed by the plan node immediately
+ * below the first WindowAgg node.
*/
static List *
-add_volatile_sort_exprs(List *window_tlist, List *tlist, List *activeWindows)
+make_windowInputTargetList(PlannerInfo *root,
+ List *tlist,
+ List *activeWindows)
{
- Bitmapset *sgrefs = NULL;
+ Query *parse = root->parse;
+ Bitmapset *sgrefs;
+ List *new_tlist;
+ List *flattenable_cols;
+ List *flattenable_vars;
ListCell *lc;
- /* First, collect the sortgrouprefs of the windows into a bitmapset */
+ Assert(parse->hasWindowFuncs);
+
+ /*
+ * Collect the sortgroupref numbers of window PARTITION/ORDER BY clauses
+ * into a bitmapset for convenient reference below.
+ */
+ sgrefs = NULL;
foreach(lc, activeWindows)
{
WindowClause *wc = (WindowClause *) lfirst(lc);
@@ -3037,34 +3072,74 @@ add_volatile_sort_exprs(List *window_tlist, List *tlist, List *activeWindows)
}
}
+ /* Add in sortgroupref numbers of GROUP BY clauses, too */
+ foreach(lc, parse->groupClause)
+ {
+ SortGroupClause *grpcl = (SortGroupClause *) lfirst(lc);
+
+ sgrefs = bms_add_member(sgrefs, grpcl->tleSortGroupRef);
+ }
+
/*
- * Now scan the original tlist to find the referenced expressions. Any
- * that are volatile must be added to window_tlist.
- *
- * Note: we know that the input window_tlist contains no items marked with
- * ressortgrouprefs, so we don't have to worry about collisions of the
- * reference numbers.
+ * Construct a tlist containing all the non-flattenable tlist items, and
+ * save aside the others for a moment.
*/
+ new_tlist = NIL;
+ flattenable_cols = NIL;
+
foreach(lc, tlist)
{
TargetEntry *tle = (TargetEntry *) lfirst(lc);
+ /*
+ * Don't want to deconstruct window clauses or GROUP BY items. (Note
+ * that such items can't contain window functions, so it's okay to
+ * compute them below the WindowAgg nodes.)
+ */
if (tle->ressortgroupref != 0 &&
- bms_is_member(tle->ressortgroupref, sgrefs) &&
- contain_volatile_functions((Node *) tle->expr))
+ bms_is_member(tle->ressortgroupref, sgrefs))
{
+ /* Don't want to deconstruct this value, so add to new_tlist */
TargetEntry *newtle;
newtle = makeTargetEntry(tle->expr,
- list_length(window_tlist) + 1,
+ list_length(new_tlist) + 1,
NULL,
false);
+ /* Preserve its sortgroupref marking, in case it's volatile */
newtle->ressortgroupref = tle->ressortgroupref;
- window_tlist = lappend(window_tlist, newtle);
+ new_tlist = lappend(new_tlist, newtle);
+ }
+ else
+ {
+ /*
+ * Column is to be flattened, so just remember the expression for
+ * later call to pull_var_clause. There's no need for
+ * pull_var_clause to examine the TargetEntry node itself.
+ */
+ flattenable_cols = lappend(flattenable_cols, tle->expr);
}
}
- return window_tlist;
+ /*
+ * Pull out all the Vars and Aggrefs mentioned in flattenable columns, and
+ * add them to the result tlist if not already present. (Some might be
+ * there already because they're used directly as window/group clauses.)
+ *
+ * Note: it's essential to use PVC_INCLUDE_AGGREGATES here, so that the
+ * Aggrefs are placed in the Agg node's tlist and not left to be computed
+ * at higher levels.
+ */
+ flattenable_vars = pull_var_clause((Node *) flattenable_cols,
+ PVC_INCLUDE_AGGREGATES,
+ PVC_INCLUDE_PLACEHOLDERS);
+ new_tlist = add_to_flat_tlist(new_tlist, flattenable_vars);
+
+ /* clean up cruft */
+ list_free(flattenable_vars);
+ list_free(flattenable_cols);
+
+ return new_tlist;
}
/*
diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out
index 77786266255..49252c82ae3 100644
--- a/src/test/regress/expected/window.out
+++ b/src/test/regress/expected/window.out
@@ -614,6 +614,18 @@ group by ten order by ten;
9 | 10040184 | 7
(10 rows)
+-- window and aggregate with GROUP BY expression (9.2 bug)
+explain (costs off)
+select first_value(max(x)) over (), y
+ from (select unique1 as x, ten+four as y from tenk1) ss
+ group by y;
+ QUERY PLAN
+-------------------------------
+ WindowAgg
+ -> HashAggregate
+ -> Seq Scan on tenk1
+(3 rows)
+
-- test non-default frame specifications
SELECT four, ten,
sum(ten) over (partition by four order by ten),
diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql
index d8e9e7e3b1c..769be0fdc61 100644
--- a/src/test/regress/sql/window.sql
+++ b/src/test/regress/sql/window.sql
@@ -145,6 +145,12 @@ select ten,
from tenk1
group by ten order by ten;
+-- window and aggregate with GROUP BY expression (9.2 bug)
+explain (costs off)
+select first_value(max(x)) over (), y
+ from (select unique1 as x, ten+four as y from tenk1) ss
+ group by y;
+
-- test non-default frame specifications
SELECT four, ten,
sum(ten) over (partition by four order by ten),