5 files changed, 173 insertions, 111 deletions
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 000d757bdd8..044fb246665 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -4654,22 +4654,63 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
 										  cheapest_partial_path->rows,
 										  NULL, NULL);
 
-	/* first try adding unique paths atop of sorted paths */
+	/*
+	 * Try sorting the cheapest path and incrementally sorting any paths with
+	 * presorted keys and put a unique paths atop of those.
+	 */
 	if (grouping_is_sortable(parse->distinctClause))
 	{
 		foreach(lc, input_rel->partial_pathlist)
 		{
-			Path	   *path = (Path *) lfirst(lc);
+			Path	   *input_path = (Path *) lfirst(lc);
+			Path	   *sorted_path;
+			bool		is_sorted;
+			int			presorted_keys;
 
-			if (pathkeys_contained_in(root->distinct_pathkeys, path->pathkeys))
+			is_sorted = pathkeys_count_contained_in(root->distinct_pathkeys,
+													input_path->pathkeys,
+													&presorted_keys);
+
+			if (is_sorted)
+				sorted_path = input_path;
+			else
 			{
-				add_partial_path(partial_distinct_rel, (Path *)
-								 create_upper_unique_path(root,
-														  partial_distinct_rel,
-														  path,
-														  list_length(root->distinct_pathkeys),
-														  numDistinctRows));
+				/*
+				 * Try at least sorting the cheapest path and also try
+				 * incrementally sorting any path which is partially sorted
+				 * already (no need to deal with paths which have presorted
+				 * keys when incremental sort is disabled unless it's the
+				 * cheapest partial path).
+				 */
+				if (input_path != cheapest_partial_path &&
+					(presorted_keys == 0 || !enable_incremental_sort))
+					continue;
+
+				/*
+				 * We've no need to consider both a sort and incremental sort.
+				 * We'll just do a sort if there are no presorted keys and an
+				 * incremental sort when there are presorted keys.
+				 */
+				if (presorted_keys == 0 || !enable_incremental_sort)
+					sorted_path = (Path *) create_sort_path(root,
+															partial_distinct_rel,
+															input_path,
+															root->distinct_pathkeys,
+															-1.0);
+				else
+					sorted_path = (Path *) create_incremental_sort_path(root,
+																		partial_distinct_rel,
+																		input_path,
+																		root->distinct_pathkeys,
+																		presorted_keys,
+																		-1.0);
 			}
+
+			add_partial_path(partial_distinct_rel, (Path *)
+							 create_upper_unique_path(root, partial_distinct_rel,
+													  sorted_path,
+													  list_length(root->distinct_pathkeys),
+													  numDistinctRows));
 		}
 	}
 
@@ -4773,9 +4814,11 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
 	if (grouping_is_sortable(parse->distinctClause))
 	{
 		/*
-		 * First, if we have any adequately-presorted paths, just stick a
-		 * Unique node on those.  Then consider doing an explicit sort of the
-		 * cheapest input path and Unique'ing that.
+		 * Firstly, if we have any adequately-presorted paths, just stick a
+		 * Unique node on those.  We also, consider doing an explicit sort of
+		 * the cheapest input path and Unique'ing that.  If any paths have
+		 * presorted keys then we'll create an incremental sort atop of those
+		 * before adding a unique node on the top.
 		 *
 		 * When we have DISTINCT ON, we must sort by the more rigorous of
 		 * DISTINCT and ORDER BY, else it won't have the desired behavior.
@@ -4785,8 +4828,8 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
 		 * the other.)
 		 */
 		List	   *needed_pathkeys;
-		Path	   *path;
 		ListCell   *lc;
+		double		limittuples = root->distinct_pathkeys == NIL ? 1.0 : -1.0;
 
 		if (parse->hasDistinctOn &&
 			list_length(root->distinct_pathkeys) <
@@ -4797,96 +4840,89 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
 
 		foreach(lc, input_rel->pathlist)
 		{
-			path = (Path *) lfirst(lc);
+			Path	   *input_path = (Path *) lfirst(lc);
+			Path	   *sorted_path;
+			bool		is_sorted;
+			int			presorted_keys;
 
-			if (pathkeys_contained_in(needed_pathkeys, path->pathkeys))
+			is_sorted = pathkeys_count_contained_in(needed_pathkeys,
+													input_path->pathkeys,
+													&presorted_keys);
+
+			if (is_sorted)
+				sorted_path = input_path;
+			else
 			{
 				/*
-				 * distinct_pathkeys may have become empty if all of the
-				 * pathkeys were determined to be redundant.  If all of the
-				 * pathkeys are redundant then each DISTINCT target must only
-				 * allow a single value, therefore all resulting tuples must
-				 * be identical (or at least indistinguishable by an equality
-				 * check).  We can uniquify these tuples simply by just taking
-				 * the first tuple.  All we do here is add a path to do "LIMIT
-				 * 1" atop of 'path'.  When doing a DISTINCT ON we may still
-				 * have a non-NIL sort_pathkeys list, so we must still only do
-				 * this with paths which are correctly sorted by
-				 * sort_pathkeys.
+				 * Try at least sorting the cheapest path and also try
+				 * incrementally sorting any path which is partially sorted
+				 * already (no need to deal with paths which have presorted
+				 * keys when incremental sort is disabled unless it's the
+				 * cheapest input path).
 				 */
-				if (root->distinct_pathkeys == NIL)
-				{
-					Node	   *limitCount;
-
-					limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
-													sizeof(int64),
-													Int64GetDatum(1), false,
-													FLOAT8PASSBYVAL);
+				if (input_path != cheapest_input_path &&
+					(presorted_keys == 0 || !enable_incremental_sort))
+					continue;
 
-					/*
-					 * If the query already has a LIMIT clause, then we could
-					 * end up with a duplicate LimitPath in the final plan.
-					 * That does not seem worth troubling over too much.
-					 */
-					add_path(distinct_rel, (Path *)
-							 create_limit_path(root, distinct_rel, path, NULL,
-											   limitCount, LIMIT_OPTION_COUNT,
-											   0, 1));
-				}
+				/*
+				 * We've no need to consider both a sort and incremental sort.
+				 * We'll just do a sort if there are no presorted keys and an
+				 * incremental sort when there are presorted keys.
+				 */
+				if (presorted_keys == 0 || !enable_incremental_sort)
+					sorted_path = (Path *) create_sort_path(root,
+															distinct_rel,
+															input_path,
+															needed_pathkeys,
+															limittuples);
 				else
-				{
-					add_path(distinct_rel, (Path *)
-							 create_upper_unique_path(root, distinct_rel,
-													  path,
-													  list_length(root->distinct_pathkeys),
-													  numDistinctRows));
-				}
+					sorted_path = (Path *) create_incremental_sort_path(root,
+																		distinct_rel,
+																		input_path,
+																		needed_pathkeys,
+																		presorted_keys,
+																		limittuples);
 			}
-		}
 
-		/* For explicit-sort case, always use the more rigorous clause */
-		if (list_length(root->distinct_pathkeys) <
-			list_length(root->sort_pathkeys))
-		{
-			needed_pathkeys = root->sort_pathkeys;
-			/* Assert checks that parser didn't mess up... */
-			Assert(pathkeys_contained_in(root->distinct_pathkeys,
-										 needed_pathkeys));
-		}
-		else
-			needed_pathkeys = root->distinct_pathkeys;
+			/*
+			 * distinct_pathkeys may have become empty if all of the pathkeys
+			 * were determined to be redundant.  If all of the pathkeys are
+			 * redundant then each DISTINCT target must only allow a single
+			 * value, therefore all resulting tuples must be identical (or at
+			 * least indistinguishable by an equality check).  We can uniquify
+			 * these tuples simply by just taking the first tuple.  All we do
+			 * here is add a path to do "LIMIT 1" atop of 'sorted_path'.  When
+			 * doing a DISTINCT ON we may still have a non-NIL sort_pathkeys
+			 * list, so we must still only do this with paths which are
+			 * correctly sorted by sort_pathkeys.
+			 */
+			if (root->distinct_pathkeys == NIL)
+			{
+				Node	   *limitCount;
 
-		path = cheapest_input_path;
-		if (!pathkeys_contained_in(needed_pathkeys, path->pathkeys))
-			path = (Path *) create_sort_path(root, distinct_rel,
-											 path,
-											 needed_pathkeys,
-											 root->distinct_pathkeys == NIL ?
-											 1.0 : -1.0);
+				limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
+												sizeof(int64),
+												Int64GetDatum(1), false,
+												FLOAT8PASSBYVAL);
 
-		/*
-		 * As above, use a LimitPath instead of a UniquePath when all of the
-		 * distinct_pathkeys are redundant and we're only going to get a
-		 * series of tuples all with the same values anyway.
-		 */
-		if (root->distinct_pathkeys == NIL)
-		{
-			Node	   *limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
-														sizeof(int64),
-														Int64GetDatum(1), false,
-														FLOAT8PASSBYVAL);
-
-			add_path(distinct_rel, (Path *)
-					 create_limit_path(root, distinct_rel, path, NULL,
-									   limitCount, LIMIT_OPTION_COUNT, 0, 1));
-		}
-		else
-		{
-			add_path(distinct_rel, (Path *)
-					 create_upper_unique_path(root, distinct_rel,
-											  path,
-											  list_length(root->distinct_pathkeys),
-											  numDistinctRows));
+				/*
+				 * If the query already has a LIMIT clause, then we could end
+				 * up with a duplicate LimitPath in the final plan. That does
+				 * not seem worth troubling over too much.
+				 */
+				add_path(distinct_rel, (Path *)
+						 create_limit_path(root, distinct_rel, sorted_path,
+										   NULL, limitCount,
+										   LIMIT_OPTION_COUNT, 0, 1));
+			}
+			else
+			{
+				add_path(distinct_rel, (Path *)
+						 create_upper_unique_path(root, distinct_rel,
+												  sorted_path,
+												  list_length(root->distinct_pathkeys),
+												  numDistinctRows));
+			}
 		}
 	}
 
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index 1a1e8b2365b..0c3433f8e58 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1484,15 +1484,16 @@ explain (costs off) select * from t union select * from t order by 1,3;
 -- Full sort, not just incremental sort can be pushed below a gather merge path
 -- by generate_useful_gather_paths.
 explain (costs off) select distinct a,b from t;
-                QUERY PLAN                
-------------------------------------------
+                   QUERY PLAN                   
+------------------------------------------------
  Unique
    ->  Gather Merge
          Workers Planned: 2
-         ->  Sort
-               Sort Key: a, b
-               ->  Parallel Seq Scan on t
-(6 rows)
+         ->  Unique
+               ->  Sort
+                     Sort Key: a, b
+                     ->  Parallel Seq Scan on t
+(7 rows)
 
 drop table t;
 -- Sort pushdown can't go below where expressions are part of the rel target.
diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out
index 6ce889d87c1..1fc07f220fb 100644
--- a/src/test/regress/expected/select_distinct.out
+++ b/src/test/regress/expected/select_distinct.out
@@ -171,6 +171,20 @@ SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
 SET jit_above_cost TO DEFAULT;
 CREATE TABLE distinct_group_2 AS
 SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;
+SET enable_seqscan = 0;
+-- Check to see we get an incremental sort plan
+EXPLAIN (costs off)
+SELECT DISTINCT hundred, two FROM tenk1;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Unique
+   ->  Incremental Sort
+         Sort Key: hundred, two
+         Presorted Key: hundred
+         ->  Index Scan using tenk1_hundred on tenk1
+(5 rows)
+
+RESET enable_seqscan;
 SET enable_hashagg=TRUE;
 -- Produce results with hash aggregation.
 SET enable_sort=FALSE;
@@ -265,15 +279,16 @@ $$ LANGUAGE plpgsql PARALLEL SAFE;
 -- Ensure we do parallel distinct now that the function is parallel safe
 EXPLAIN (COSTS OFF)
 SELECT DISTINCT distinct_func(1) FROM tenk1;
-                  QUERY PLAN                  
-----------------------------------------------
+                     QUERY PLAN                     
+----------------------------------------------------
  Unique
-   ->  Sort
-         Sort Key: (distinct_func(1))
-         ->  Gather
-               Workers Planned: 2
-               ->  Parallel Seq Scan on tenk1
-(6 rows)
+   ->  Gather Merge
+         Workers Planned: 2
+         ->  Unique
+               ->  Sort
+                     Sort Key: (distinct_func(1))
+                     ->  Parallel Seq Scan on tenk1
+(7 rows)
 
 RESET max_parallel_workers_per_gather;
 RESET min_parallel_table_scan_size;
diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out
index 90e89fb5b68..b2c6605e60c 100644
--- a/src/test/regress/expected/window.out
+++ b/src/test/regress/expected/window.out
@@ -3944,8 +3944,9 @@ ORDER BY depname, enroll_date;
                                           QUERY PLAN                                           
 -----------------------------------------------------------------------------------------------
  Unique
-   ->  Sort
+   ->  Incremental Sort
          Sort Key: depname, enroll_date, empno, (sum(salary) OVER (?)), (min(salary) OVER (?))
+         Presorted Key: depname, enroll_date
          ->  WindowAgg
                ->  Incremental Sort
                      Sort Key: depname, enroll_date
@@ -3954,7 +3955,7 @@ ORDER BY depname, enroll_date;
                            ->  Sort
                                  Sort Key: depname, empno
                                  ->  Seq Scan on empsalary
-(11 rows)
+(12 rows)
 
 -- As above but adjust the ORDER BY clause to help ensure the plan with the
 -- minimum amount of sorting wasn't a fluke.
@@ -3970,8 +3971,9 @@ ORDER BY depname, empno;
                                           QUERY PLAN                                           
 -----------------------------------------------------------------------------------------------
  Unique
-   ->  Sort
+   ->  Incremental Sort
          Sort Key: depname, empno, enroll_date, (sum(salary) OVER (?)), (min(salary) OVER (?))
+         Presorted Key: depname, empno
          ->  WindowAgg
                ->  Incremental Sort
                      Sort Key: depname, empno
@@ -3980,7 +3982,7 @@ ORDER BY depname, empno;
                            ->  Sort
                                  Sort Key: depname, enroll_date
                                  ->  Seq Scan on empsalary
-(11 rows)
+(12 rows)
 
 RESET enable_hashagg;
 -- Test Sort node reordering
diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql
index 34020adad1d..1643526d991 100644
--- a/src/test/regress/sql/select_distinct.sql
+++ b/src/test/regress/sql/select_distinct.sql
@@ -69,6 +69,14 @@ SET jit_above_cost TO DEFAULT;
 CREATE TABLE distinct_group_2 AS
 SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;
 
+SET enable_seqscan = 0;
+
+-- Check to see we get an incremental sort plan
+EXPLAIN (costs off)
+SELECT DISTINCT hundred, two FROM tenk1;
+
+RESET enable_seqscan;
+
 SET enable_hashagg=TRUE;
 
 -- Produce results with hash aggregation.