aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/optimizer/plan/planner.c222
-rw-r--r--src/test/regress/expected/incremental_sort.out13
-rw-r--r--src/test/regress/expected/select_distinct.out31
-rw-r--r--src/test/regress/expected/window.out10
-rw-r--r--src/test/regress/sql/select_distinct.sql8
5 files changed, 173 insertions, 111 deletions
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 000d757bdd8..044fb246665 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -4654,22 +4654,63 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
cheapest_partial_path->rows,
NULL, NULL);
- /* first try adding unique paths atop of sorted paths */
+ /*
+ * Try sorting the cheapest path and incrementally sorting any paths with
+ * presorted keys and put a unique paths atop of those.
+ */
if (grouping_is_sortable(parse->distinctClause))
{
foreach(lc, input_rel->partial_pathlist)
{
- Path *path = (Path *) lfirst(lc);
+ Path *input_path = (Path *) lfirst(lc);
+ Path *sorted_path;
+ bool is_sorted;
+ int presorted_keys;
- if (pathkeys_contained_in(root->distinct_pathkeys, path->pathkeys))
+ is_sorted = pathkeys_count_contained_in(root->distinct_pathkeys,
+ input_path->pathkeys,
+ &presorted_keys);
+
+ if (is_sorted)
+ sorted_path = input_path;
+ else
{
- add_partial_path(partial_distinct_rel, (Path *)
- create_upper_unique_path(root,
- partial_distinct_rel,
- path,
- list_length(root->distinct_pathkeys),
- numDistinctRows));
+ /*
+ * Try at least sorting the cheapest path and also try
+ * incrementally sorting any path which is partially sorted
+ * already (no need to deal with paths which have presorted
+ * keys when incremental sort is disabled unless it's the
+ * cheapest partial path).
+ */
+ if (input_path != cheapest_partial_path &&
+ (presorted_keys == 0 || !enable_incremental_sort))
+ continue;
+
+ /*
+ * We've no need to consider both a sort and incremental sort.
+ * We'll just do a sort if there are no presorted keys and an
+ * incremental sort when there are presorted keys.
+ */
+ if (presorted_keys == 0 || !enable_incremental_sort)
+ sorted_path = (Path *) create_sort_path(root,
+ partial_distinct_rel,
+ input_path,
+ root->distinct_pathkeys,
+ -1.0);
+ else
+ sorted_path = (Path *) create_incremental_sort_path(root,
+ partial_distinct_rel,
+ input_path,
+ root->distinct_pathkeys,
+ presorted_keys,
+ -1.0);
}
+
+ add_partial_path(partial_distinct_rel, (Path *)
+ create_upper_unique_path(root, partial_distinct_rel,
+ sorted_path,
+ list_length(root->distinct_pathkeys),
+ numDistinctRows));
}
}
@@ -4773,9 +4814,11 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
if (grouping_is_sortable(parse->distinctClause))
{
/*
- * First, if we have any adequately-presorted paths, just stick a
- * Unique node on those. Then consider doing an explicit sort of the
- * cheapest input path and Unique'ing that.
+ * Firstly, if we have any adequately-presorted paths, just stick a
+ * Unique node on those. We also, consider doing an explicit sort of
+ * the cheapest input path and Unique'ing that. If any paths have
+ * presorted keys then we'll create an incremental sort atop of those
+ * before adding a unique node on the top.
*
* When we have DISTINCT ON, we must sort by the more rigorous of
* DISTINCT and ORDER BY, else it won't have the desired behavior.
@@ -4785,8 +4828,8 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
* the other.)
*/
List *needed_pathkeys;
- Path *path;
ListCell *lc;
+ double limittuples = root->distinct_pathkeys == NIL ? 1.0 : -1.0;
if (parse->hasDistinctOn &&
list_length(root->distinct_pathkeys) <
@@ -4797,96 +4840,89 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
foreach(lc, input_rel->pathlist)
{
- path = (Path *) lfirst(lc);
+ Path *input_path = (Path *) lfirst(lc);
+ Path *sorted_path;
+ bool is_sorted;
+ int presorted_keys;
- if (pathkeys_contained_in(needed_pathkeys, path->pathkeys))
+ is_sorted = pathkeys_count_contained_in(needed_pathkeys,
+ input_path->pathkeys,
+ &presorted_keys);
+
+ if (is_sorted)
+ sorted_path = input_path;
+ else
{
/*
- * distinct_pathkeys may have become empty if all of the
- * pathkeys were determined to be redundant. If all of the
- * pathkeys are redundant then each DISTINCT target must only
- * allow a single value, therefore all resulting tuples must
- * be identical (or at least indistinguishable by an equality
- * check). We can uniquify these tuples simply by just taking
- * the first tuple. All we do here is add a path to do "LIMIT
- * 1" atop of 'path'. When doing a DISTINCT ON we may still
- * have a non-NIL sort_pathkeys list, so we must still only do
- * this with paths which are correctly sorted by
- * sort_pathkeys.
+ * Try at least sorting the cheapest path and also try
+ * incrementally sorting any path which is partially sorted
+ * already (no need to deal with paths which have presorted
+ * keys when incremental sort is disabled unless it's the
+ * cheapest input path).
*/
- if (root->distinct_pathkeys == NIL)
- {
- Node *limitCount;
-
- limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
- sizeof(int64),
- Int64GetDatum(1), false,
- FLOAT8PASSBYVAL);
+ if (input_path != cheapest_input_path &&
+ (presorted_keys == 0 || !enable_incremental_sort))
+ continue;
- /*
- * If the query already has a LIMIT clause, then we could
- * end up with a duplicate LimitPath in the final plan.
- * That does not seem worth troubling over too much.
- */
- add_path(distinct_rel, (Path *)
- create_limit_path(root, distinct_rel, path, NULL,
- limitCount, LIMIT_OPTION_COUNT,
- 0, 1));
- }
+ /*
+ * We've no need to consider both a sort and incremental sort.
+ * We'll just do a sort if there are no presorted keys and an
+ * incremental sort when there are presorted keys.
+ */
+ if (presorted_keys == 0 || !enable_incremental_sort)
+ sorted_path = (Path *) create_sort_path(root,
+ distinct_rel,
+ input_path,
+ needed_pathkeys,
+ limittuples);
else
- {
- add_path(distinct_rel, (Path *)
- create_upper_unique_path(root, distinct_rel,
- path,
- list_length(root->distinct_pathkeys),
- numDistinctRows));
- }
+ sorted_path = (Path *) create_incremental_sort_path(root,
+ distinct_rel,
+ input_path,
+ needed_pathkeys,
+ presorted_keys,
+ limittuples);
}
- }
- /* For explicit-sort case, always use the more rigorous clause */
- if (list_length(root->distinct_pathkeys) <
- list_length(root->sort_pathkeys))
- {
- needed_pathkeys = root->sort_pathkeys;
- /* Assert checks that parser didn't mess up... */
- Assert(pathkeys_contained_in(root->distinct_pathkeys,
- needed_pathkeys));
- }
- else
- needed_pathkeys = root->distinct_pathkeys;
+ /*
+ * distinct_pathkeys may have become empty if all of the pathkeys
+ * were determined to be redundant. If all of the pathkeys are
+ * redundant then each DISTINCT target must only allow a single
+ * value, therefore all resulting tuples must be identical (or at
+ * least indistinguishable by an equality check). We can uniquify
+ * these tuples simply by just taking the first tuple. All we do
+ * here is add a path to do "LIMIT 1" atop of 'sorted_path'. When
+ * doing a DISTINCT ON we may still have a non-NIL sort_pathkeys
+ * list, so we must still only do this with paths which are
+ * correctly sorted by sort_pathkeys.
+ */
+ if (root->distinct_pathkeys == NIL)
+ {
+ Node *limitCount;
- path = cheapest_input_path;
- if (!pathkeys_contained_in(needed_pathkeys, path->pathkeys))
- path = (Path *) create_sort_path(root, distinct_rel,
- path,
- needed_pathkeys,
- root->distinct_pathkeys == NIL ?
- 1.0 : -1.0);
+ limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
+ sizeof(int64),
+ Int64GetDatum(1), false,
+ FLOAT8PASSBYVAL);
- /*
- * As above, use a LimitPath instead of a UniquePath when all of the
- * distinct_pathkeys are redundant and we're only going to get a
- * series of tuples all with the same values anyway.
- */
- if (root->distinct_pathkeys == NIL)
- {
- Node *limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
- sizeof(int64),
- Int64GetDatum(1), false,
- FLOAT8PASSBYVAL);
-
- add_path(distinct_rel, (Path *)
- create_limit_path(root, distinct_rel, path, NULL,
- limitCount, LIMIT_OPTION_COUNT, 0, 1));
- }
- else
- {
- add_path(distinct_rel, (Path *)
- create_upper_unique_path(root, distinct_rel,
- path,
- list_length(root->distinct_pathkeys),
- numDistinctRows));
+ /*
+ * If the query already has a LIMIT clause, then we could end
+ * up with a duplicate LimitPath in the final plan. That does
+ * not seem worth troubling over too much.
+ */
+ add_path(distinct_rel, (Path *)
+ create_limit_path(root, distinct_rel, sorted_path,
+ NULL, limitCount,
+ LIMIT_OPTION_COUNT, 0, 1));
+ }
+ else
+ {
+ add_path(distinct_rel, (Path *)
+ create_upper_unique_path(root, distinct_rel,
+ sorted_path,
+ list_length(root->distinct_pathkeys),
+ numDistinctRows));
+ }
}
}
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index 1a1e8b2365b..0c3433f8e58 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1484,15 +1484,16 @@ explain (costs off) select * from t union select * from t order by 1,3;
-- Full sort, not just incremental sort can be pushed below a gather merge path
-- by generate_useful_gather_paths.
explain (costs off) select distinct a,b from t;
- QUERY PLAN
-------------------------------------------
+ QUERY PLAN
+------------------------------------------------
Unique
-> Gather Merge
Workers Planned: 2
- -> Sort
- Sort Key: a, b
- -> Parallel Seq Scan on t
-(6 rows)
+ -> Unique
+ -> Sort
+ Sort Key: a, b
+ -> Parallel Seq Scan on t
+(7 rows)
drop table t;
-- Sort pushdown can't go below where expressions are part of the rel target.
diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out
index 6ce889d87c1..1fc07f220fb 100644
--- a/src/test/regress/expected/select_distinct.out
+++ b/src/test/regress/expected/select_distinct.out
@@ -171,6 +171,20 @@ SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
SET jit_above_cost TO DEFAULT;
CREATE TABLE distinct_group_2 AS
SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;
+SET enable_seqscan = 0;
+-- Check to see we get an incremental sort plan
+EXPLAIN (costs off)
+SELECT DISTINCT hundred, two FROM tenk1;
+ QUERY PLAN
+-----------------------------------------------------
+ Unique
+ -> Incremental Sort
+ Sort Key: hundred, two
+ Presorted Key: hundred
+ -> Index Scan using tenk1_hundred on tenk1
+(5 rows)
+
+RESET enable_seqscan;
SET enable_hashagg=TRUE;
-- Produce results with hash aggregation.
SET enable_sort=FALSE;
@@ -265,15 +279,16 @@ $$ LANGUAGE plpgsql PARALLEL SAFE;
-- Ensure we do parallel distinct now that the function is parallel safe
EXPLAIN (COSTS OFF)
SELECT DISTINCT distinct_func(1) FROM tenk1;
- QUERY PLAN
-----------------------------------------------
+ QUERY PLAN
+----------------------------------------------------
Unique
- -> Sort
- Sort Key: (distinct_func(1))
- -> Gather
- Workers Planned: 2
- -> Parallel Seq Scan on tenk1
-(6 rows)
+ -> Gather Merge
+ Workers Planned: 2
+ -> Unique
+ -> Sort
+ Sort Key: (distinct_func(1))
+ -> Parallel Seq Scan on tenk1
+(7 rows)
RESET max_parallel_workers_per_gather;
RESET min_parallel_table_scan_size;
diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out
index 90e89fb5b68..b2c6605e60c 100644
--- a/src/test/regress/expected/window.out
+++ b/src/test/regress/expected/window.out
@@ -3944,8 +3944,9 @@ ORDER BY depname, enroll_date;
QUERY PLAN
-----------------------------------------------------------------------------------------------
Unique
- -> Sort
+ -> Incremental Sort
Sort Key: depname, enroll_date, empno, (sum(salary) OVER (?)), (min(salary) OVER (?))
+ Presorted Key: depname, enroll_date
-> WindowAgg
-> Incremental Sort
Sort Key: depname, enroll_date
@@ -3954,7 +3955,7 @@ ORDER BY depname, enroll_date;
-> Sort
Sort Key: depname, empno
-> Seq Scan on empsalary
-(11 rows)
+(12 rows)
-- As above but adjust the ORDER BY clause to help ensure the plan with the
-- minimum amount of sorting wasn't a fluke.
@@ -3970,8 +3971,9 @@ ORDER BY depname, empno;
QUERY PLAN
-----------------------------------------------------------------------------------------------
Unique
- -> Sort
+ -> Incremental Sort
Sort Key: depname, empno, enroll_date, (sum(salary) OVER (?)), (min(salary) OVER (?))
+ Presorted Key: depname, empno
-> WindowAgg
-> Incremental Sort
Sort Key: depname, empno
@@ -3980,7 +3982,7 @@ ORDER BY depname, empno;
-> Sort
Sort Key: depname, enroll_date
-> Seq Scan on empsalary
-(11 rows)
+(12 rows)
RESET enable_hashagg;
-- Test Sort node reordering
diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql
index 34020adad1d..1643526d991 100644
--- a/src/test/regress/sql/select_distinct.sql
+++ b/src/test/regress/sql/select_distinct.sql
@@ -69,6 +69,14 @@ SET jit_above_cost TO DEFAULT;
CREATE TABLE distinct_group_2 AS
SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;
+SET enable_seqscan = 0;
+
+-- Check to see we get an incremental sort plan
+EXPLAIN (costs off)
+SELECT DISTINCT hundred, two FROM tenk1;
+
+RESET enable_seqscan;
+
SET enable_hashagg=TRUE;
-- Produce results with hash aggregation.