aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/optimizer/path/costsize.c249
-rw-r--r--src/backend/optimizer/util/pathnode.c3
-rw-r--r--src/include/optimizer/cost.h2
-rw-r--r--src/test/regress/expected/window.out74
-rw-r--r--src/test/regress/sql/window.sql34
5 files changed, 358 insertions, 4 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index ef475d95a18..8b797e3b462 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -2810,6 +2810,226 @@ cost_agg(Path *path, PlannerInfo *root,
}
/*
+ * get_windowclause_startup_tuples
+ * Estimate how many tuples we'll need to fetch from a WindowAgg's
+ * subnode before we can output the first WindowAgg tuple.
+ *
+ * How many tuples need to be read depends on the WindowClause. For example,
+ * a WindowClause with no PARTITION BY and no ORDER BY requires that all
+ * subnode tuples are read and aggregated before the WindowAgg can output
+ * anything. If there's a PARTITION BY, then we only need to look at tuples
+ * in the first partition. Here we attempt to estimate just how many
+ * 'input_tuples' the WindowAgg will need to read for the given WindowClause
+ * before the first tuple can be output.
+ */
+static double
+get_windowclause_startup_tuples(PlannerInfo *root, WindowClause *wc,
+ double input_tuples)
+{
+ int frameOptions = wc->frameOptions;
+ double partition_tuples;
+ double return_tuples;
+ double peer_tuples;
+
+ /*
+ * First, figure out how many partitions there are likely to be and set
+ * partition_tuples according to that estimate.
+ */
+ if (wc->partitionClause != NIL)
+ {
+ double num_partitions;
+ List *partexprs = get_sortgrouplist_exprs(wc->partitionClause,
+ root->parse->targetList);
+
+ num_partitions = estimate_num_groups(root, partexprs, input_tuples,
+ NULL, NULL);
+ list_free(partexprs);
+
+ partition_tuples = input_tuples / num_partitions;
+ }
+ else
+ {
+ /* all tuples belong to the same partition */
+ partition_tuples = input_tuples;
+ }
+
+ /* estimate the number of tuples in each peer group */
+ if (wc->orderClause != NIL)
+ {
+ double num_groups;
+ List *orderexprs;
+
+ orderexprs = get_sortgrouplist_exprs(wc->orderClause,
+ root->parse->targetList);
+
+ /* estimate out how many peer groups there are in the partition */
+ num_groups = estimate_num_groups(root, orderexprs,
+ partition_tuples, NULL,
+ NULL);
+ list_free(orderexprs);
+ peer_tuples = partition_tuples / num_groups;
+ }
+ else
+ {
+ /* no ORDER BY so only 1 tuple belongs in each peer group */
+ peer_tuples = 1.0;
+ }
+
+ if (frameOptions & FRAMEOPTION_END_UNBOUNDED_FOLLOWING)
+ {
+ /* include all partition rows */
+ return_tuples = partition_tuples;
+ }
+ else if (frameOptions & FRAMEOPTION_END_CURRENT_ROW)
+ {
+ if (frameOptions & FRAMEOPTION_ROWS)
+ {
+ /* just count the current row */
+ return_tuples = 1.0;
+ }
+ else if (frameOptions & (FRAMEOPTION_RANGE | FRAMEOPTION_GROUPS))
+ {
+ /*
+ * When in RANGE/GROUPS mode, it's more complex. If there's no
+ * ORDER BY, then all rows in the partition are peers, otherwise
+ * we'll need to read the first group of peers.
+ */
+ if (wc->orderClause == NIL)
+ return_tuples = partition_tuples;
+ else
+ return_tuples = peer_tuples;
+ }
+ else
+ {
+ /*
+ * Something new we don't support yet? This needs attention.
+ * We'll just return 1.0 in the meantime.
+ */
+ Assert(false);
+ return_tuples = 1.0;
+ }
+ }
+ else if (frameOptions & FRAMEOPTION_END_OFFSET_PRECEDING)
+ {
+ /*
+ * BETWEEN ... AND N PRECEDING will only need to read the WindowAgg's
+ * subnode after N ROWS/RANGES/GROUPS. N can be 0, but not negative,
+ * so we'll just assume only the current row needs to be read to fetch
+ * the first WindowAgg row.
+ */
+ return_tuples = 1.0;
+ }
+ else if (frameOptions & FRAMEOPTION_END_OFFSET_FOLLOWING)
+ {
+ Const *endOffset = (Const *) wc->endOffset;
+ double end_offset_value;
+
+ /* try and figure out the value specified in the endOffset. */
+ if (IsA(endOffset, Const))
+ {
+ if (endOffset->constisnull)
+ {
+ /*
+ * NULLs are not allowed, but currently, there's no code to
+ * error out if there's a NULL Const. We'll only discover
+ * this during execution. For now, just pretend everything is
+ * fine and assume that just the current row/range/group will
+ * be needed.
+ */
+ end_offset_value = 1.0;
+ }
+ else
+ {
+ switch (endOffset->consttype)
+ {
+ case INT2OID:
+ end_offset_value =
+ (double) DatumGetInt16(endOffset->constvalue);
+ break;
+ case INT4OID:
+ end_offset_value =
+ (double) DatumGetInt32(endOffset->constvalue);
+ break;
+ case INT8OID:
+ end_offset_value =
+ (double) DatumGetInt64(endOffset->constvalue);
+ break;
+ default:
+ end_offset_value =
+ partition_tuples / peer_tuples *
+ DEFAULT_INEQ_SEL;
+ break;
+ }
+ }
+ }
+ else
+ {
+ /*
+ * When the end bound is not a Const, we'll just need to guess. We
+ * just make use of DEFAULT_INEQ_SEL.
+ */
+ end_offset_value =
+ partition_tuples / peer_tuples * DEFAULT_INEQ_SEL;
+ }
+
+ if (frameOptions & FRAMEOPTION_ROWS)
+ {
+ /* include the N FOLLOWING and the current row */
+ return_tuples = end_offset_value + 1.0;
+ }
+ else if (frameOptions & (FRAMEOPTION_RANGE | FRAMEOPTION_GROUPS))
+ {
+ /* include N FOLLOWING ranges/group and the initial range/group */
+ return_tuples = peer_tuples * (end_offset_value + 1.0);
+ }
+ else
+ {
+ /*
+ * Something new we don't support yet? This needs attention.
+ * We'll just return 1.0 in the meantime.
+ */
+ Assert(false);
+ return_tuples = 1.0;
+ }
+ }
+ else
+ {
+ /*
+ * Something new we don't support yet? This needs attention. We'll
+ * just return 1.0 in the meantime.
+ */
+ Assert(false);
+ return_tuples = 1.0;
+ }
+
+ if (wc->partitionClause != NIL || wc->orderClause != NIL)
+ {
+ /*
+ * Cap the return value to the estimated partition tuples and account
+ * for the extra tuple WindowAgg will need to read to confirm the next
+ * tuple does not belong to the same partition or peer group.
+ */
+ return_tuples = Min(return_tuples + 1.0, partition_tuples);
+ }
+ else
+ {
+ /*
+ * Cap the return value so it's never higher than the expected tuples
+ * in the partition.
+ */
+ return_tuples = Min(return_tuples, partition_tuples);
+ }
+
+ /*
+ * We needn't worry about any EXCLUDE options as those only exclude rows
+ * from being aggregated, not from being read from the WindowAgg's
+ * subnode.
+ */
+
+ return clamp_row_est(return_tuples);
+}
+
+/*
* cost_windowagg
* Determines and returns the cost of performing a WindowAgg plan node,
* including the cost of its input.
@@ -2818,18 +3038,33 @@ cost_agg(Path *path, PlannerInfo *root,
*/
void
cost_windowagg(Path *path, PlannerInfo *root,
- List *windowFuncs, int numPartCols, int numOrderCols,
+ List *windowFuncs, WindowClause *winclause,
Cost input_startup_cost, Cost input_total_cost,
double input_tuples)
{
Cost startup_cost;
Cost total_cost;
+ double startup_tuples;
+ int numPartCols;
+ int numOrderCols;
ListCell *lc;
+ numPartCols = list_length(winclause->partitionClause);
+ numOrderCols = list_length(winclause->orderClause);
+
startup_cost = input_startup_cost;
total_cost = input_total_cost;
/*
+ * Estimate how many tuples we'll need to read from the subnode before we
+ * can output the first WindowAgg row.
+ */
+ startup_tuples = get_windowclause_startup_tuples(root, winclause,
+ input_tuples);
+
+ elog(DEBUG1, "startup_tuples = %g", startup_tuples); /* XXX not for commit */
+
+ /*
* Window functions are assumed to cost their stated execution cost, plus
* the cost of evaluating their input expressions, per tuple. Since they
* may in fact evaluate their inputs at multiple rows during each cycle,
@@ -2880,6 +3115,18 @@ cost_windowagg(Path *path, PlannerInfo *root,
path->rows = input_tuples;
path->startup_cost = startup_cost;
path->total_cost = total_cost;
+
+ /*
+ * Also, take into account how many tuples we need to read from the
+ * subnode in order to produce the first tuple from the WindowAgg. To do
+ * this we proportion the run cost (total cost not including startup cost)
+ * over the estimated startup tuples. We already included the startup
+ * cost of the subnode, so we only need to do this when the estimated
+ * startup tuples is above 1.0.
+ */
+ if (startup_tuples > 1.0)
+ path->startup_cost += (total_cost - startup_cost) / input_tuples *
+ (startup_tuples - 1.0);
}
/*
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index f123fcb41e3..754f0b9f34c 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -3457,8 +3457,7 @@ create_windowagg_path(PlannerInfo *root,
*/
cost_windowagg(&pathnode->path, root,
windowFuncs,
- list_length(winclause->partitionClause),
- list_length(winclause->orderClause),
+ winclause,
subpath->startup_cost,
subpath->total_cost,
subpath->rows);
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 6cf49705d3a..bee090ffc2e 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -131,7 +131,7 @@ extern void cost_agg(Path *path, PlannerInfo *root,
Cost input_startup_cost, Cost input_total_cost,
double input_tuples, double input_width);
extern void cost_windowagg(Path *path, PlannerInfo *root,
- List *windowFuncs, int numPartCols, int numOrderCols,
+ List *windowFuncs, WindowClause *winclause,
Cost input_startup_cost, Cost input_total_cost,
double input_tuples);
extern void cost_group(Path *path, PlannerInfo *root,
diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out
index 1d4b78b9b27..2628033327d 100644
--- a/src/test/regress/expected/window.out
+++ b/src/test/regress/expected/window.out
@@ -4808,6 +4808,80 @@ SELECT i, b, bool_and(b) OVER w, bool_or(b) OVER w
5 | t | t | t
(5 rows)
+--
+-- Test WindowAgg costing takes into account the number of rows that need to
+-- be fetched before the first row can be output.
+--
+-- Ensure we get a cheap start up plan as the WindowAgg can output the first
+-- row after reading 1 row from the join.
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) OVER (ORDER BY t1.unique1)
+FROM tenk1 t1 INNER JOIN tenk1 t2 ON t1.unique1 = t2.tenthous
+LIMIT 1;
+ QUERY PLAN
+--------------------------------------------------------------------------
+ Limit
+ -> WindowAgg
+ -> Nested Loop
+ -> Index Only Scan using tenk1_unique1 on tenk1 t1
+ -> Index Only Scan using tenk1_thous_tenthous on tenk1 t2
+ Index Cond: (tenthous = t1.unique1)
+(6 rows)
+
+-- Ensure we get a cheap total plan. Lack of ORDER BY in the WindowClause
+-- means that all rows must be read from the join, so a cheap startup plan
+-- isn't a good choice.
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) OVER ()
+FROM tenk1 t1 INNER JOIN tenk1 t2 ON t1.unique1 = t2.tenthous
+LIMIT 1;
+ QUERY PLAN
+--------------------------------------------------------------------------------
+ Limit
+ -> WindowAgg
+ -> Hash Join
+ Hash Cond: (t1.unique1 = t2.tenthous)
+ -> Index Only Scan using tenk1_unique1 on tenk1 t1
+ -> Hash
+ -> Index Only Scan using tenk1_thous_tenthous on tenk1 t2
+(7 rows)
+
+-- Ensure we get a cheap total plan. This time use UNBOUNDED FOLLOWING, which
+-- needs to read all join rows to output the first WindowAgg row.
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) OVER (ORDER BY t1.unique1 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
+FROM tenk1 t1 INNER JOIN tenk1 t2 ON t1.unique1 = t2.tenthous
+LIMIT 1;
+ QUERY PLAN
+--------------------------------------------------------------------------------
+ Limit
+ -> WindowAgg
+ -> Merge Join
+ Merge Cond: (t1.unique1 = t2.tenthous)
+ -> Index Only Scan using tenk1_unique1 on tenk1 t1
+ -> Sort
+ Sort Key: t2.tenthous
+ -> Index Only Scan using tenk1_thous_tenthous on tenk1 t2
+(8 rows)
+
+-- Ensure we get a cheap total plan. This time use 10000 FOLLOWING so we need
+-- to read all join rows.
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) OVER (ORDER BY t1.unique1 ROWS BETWEEN UNBOUNDED PRECEDING AND 10000 FOLLOWING)
+FROM tenk1 t1 INNER JOIN tenk1 t2 ON t1.unique1 = t2.tenthous
+LIMIT 1;
+ QUERY PLAN
+--------------------------------------------------------------------------------
+ Limit
+ -> WindowAgg
+ -> Merge Join
+ Merge Cond: (t1.unique1 = t2.tenthous)
+ -> Index Only Scan using tenk1_unique1 on tenk1 t1
+ -> Sort
+ Sort Key: t2.tenthous
+ -> Index Only Scan using tenk1_thous_tenthous on tenk1 t2
+(8 rows)
+
-- Tests for problems with failure to walk or mutate expressions
-- within window frame clauses.
-- test walker (fails with collation error if expressions are not walked)
diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql
index 3ab6ac715d0..4789de09376 100644
--- a/src/test/regress/sql/window.sql
+++ b/src/test/regress/sql/window.sql
@@ -1716,6 +1716,40 @@ SELECT i, b, bool_and(b) OVER w, bool_or(b) OVER w
FROM (VALUES (1,true), (2,true), (3,false), (4,false), (5,true)) v(i,b)
WINDOW w AS (ORDER BY i ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING);
+--
+-- Test WindowAgg costing takes into account the number of rows that need to
+-- be fetched before the first row can be output.
+--
+
+-- Ensure we get a cheap start up plan as the WindowAgg can output the first
+-- row after reading 1 row from the join.
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) OVER (ORDER BY t1.unique1)
+FROM tenk1 t1 INNER JOIN tenk1 t2 ON t1.unique1 = t2.tenthous
+LIMIT 1;
+
+-- Ensure we get a cheap total plan. Lack of ORDER BY in the WindowClause
+-- means that all rows must be read from the join, so a cheap startup plan
+-- isn't a good choice.
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) OVER ()
+FROM tenk1 t1 INNER JOIN tenk1 t2 ON t1.unique1 = t2.tenthous
+LIMIT 1;
+
+-- Ensure we get a cheap total plan. This time use UNBOUNDED FOLLOWING, which
+-- needs to read all join rows to output the first WindowAgg row.
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) OVER (ORDER BY t1.unique1 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
+FROM tenk1 t1 INNER JOIN tenk1 t2 ON t1.unique1 = t2.tenthous
+LIMIT 1;
+
+-- Ensure we get a cheap total plan. This time use 10000 FOLLOWING so we need
+-- to read all join rows.
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) OVER (ORDER BY t1.unique1 ROWS BETWEEN UNBOUNDED PRECEDING AND 10000 FOLLOWING)
+FROM tenk1 t1 INNER JOIN tenk1 t2 ON t1.unique1 = t2.tenthous
+LIMIT 1;
+
-- Tests for problems with failure to walk or mutate expressions
-- within window frame clauses.