aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/optimizer/path/pathkeys.c38
-rw-r--r--src/backend/optimizer/plan/planner.c456
-rw-r--r--src/backend/utils/misc/guc_tables.c10
-rw-r--r--src/backend/utils/misc/postgresql.conf.sample1
-rw-r--r--src/include/optimizer/optimizer.h1
-rw-r--r--src/test/regress/expected/select_distinct.out132
-rw-r--r--src/test/regress/expected/select_distinct_on.out125
-rw-r--r--src/test/regress/expected/sysviews.out3
-rw-r--r--src/test/regress/sql/select_distinct.sql53
-rw-r--r--src/test/regress/sql/select_distinct_on.sql40
10 files changed, 658 insertions, 201 deletions
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 7cf15e89b63..03c2ac36e05 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -2208,6 +2208,41 @@ pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
}
/*
+ * pathkeys_useful_for_distinct
+ * Count the number of pathkeys that are useful for DISTINCT or DISTINCT
+ * ON clause.
+ *
+ * DISTINCT keys could be reordered to benefit from the given pathkey list. As
+ * with pathkeys_useful_for_grouping, we return the number of leading keys in
+ * the list that are shared by the distinctClause pathkeys.
+ */
+static int
+pathkeys_useful_for_distinct(PlannerInfo *root, List *pathkeys)
+{
+ int n_common_pathkeys;
+
+ /*
+ * distinct_pathkeys may have become empty if all of the pathkeys were
+ * determined to be redundant. Return 0 in this case.
+ */
+ if (root->distinct_pathkeys == NIL)
+ return 0;
+
+ /* walk the pathkeys and search for matching DISTINCT key */
+ n_common_pathkeys = 0;
+ foreach_node(PathKey, pathkey, pathkeys)
+ {
+ /* no matching DISTINCT key, we're done */
+ if (!list_member_ptr(root->distinct_pathkeys, pathkey))
+ break;
+
+ n_common_pathkeys++;
+ }
+
+ return n_common_pathkeys;
+}
+
+/*
* pathkeys_useful_for_setop
* Count the number of leading common pathkeys root's 'setop_pathkeys' in
* 'pathkeys'.
@@ -2242,6 +2277,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
if (nuseful2 > nuseful)
nuseful = nuseful2;
+ nuseful2 = pathkeys_useful_for_distinct(root, pathkeys);
+ if (nuseful2 > nuseful)
+ nuseful = nuseful2;
nuseful2 = pathkeys_useful_for_setop(root, pathkeys);
if (nuseful2 > nuseful)
nuseful = nuseful2;
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 1f78dc3d530..b665a7762ec 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -67,6 +67,7 @@
double cursor_tuple_fraction = DEFAULT_CURSOR_TUPLE_FRACTION;
int debug_parallel_query = DEBUG_PARALLEL_OFF;
bool parallel_leader_participation = true;
+bool enable_distinct_reordering = true;
/* Hook for plugins to get control in planner() */
planner_hook_type planner_hook = NULL;
@@ -198,6 +199,9 @@ static void create_partial_distinct_paths(PlannerInfo *root,
static RelOptInfo *create_final_distinct_paths(PlannerInfo *root,
RelOptInfo *input_rel,
RelOptInfo *distinct_rel);
+static List *get_useful_pathkeys_for_distinct(PlannerInfo *root,
+ List *needed_pathkeys,
+ List *path_pathkeys);
static RelOptInfo *create_ordered_paths(PlannerInfo *root,
RelOptInfo *input_rel,
PathTarget *target,
@@ -235,6 +239,12 @@ static RelOptInfo *create_partial_grouping_paths(PlannerInfo *root,
grouping_sets_data *gd,
GroupPathExtraData *extra,
bool force_rel_creation);
+static Path *make_ordered_path(PlannerInfo *root,
+ RelOptInfo *rel,
+ Path *path,
+ Path *cheapest_path,
+ List *pathkeys,
+ double limit_tuples);
static void gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel);
static bool can_partial_agg(PlannerInfo *root);
static void apply_scanjoin_target_to_paths(PlannerInfo *root,
@@ -4938,7 +4948,10 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
/*
* Try sorting the cheapest path and incrementally sorting any paths with
- * presorted keys and put a unique paths atop of those.
+ * presorted keys and put a unique paths atop of those. We'll also
+ * attempt to reorder the required pathkeys to match the input path's
+ * pathkeys as much as possible, in hopes of avoiding a possible need to
+ * re-sort.
*/
if (grouping_is_sortable(root->processed_distinctClause))
{
@@ -4946,86 +4959,67 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
{
Path *input_path = (Path *) lfirst(lc);
Path *sorted_path;
- bool is_sorted;
- int presorted_keys;
+ List *useful_pathkeys_list = NIL;
- is_sorted = pathkeys_count_contained_in(root->distinct_pathkeys,
- input_path->pathkeys,
- &presorted_keys);
+ useful_pathkeys_list =
+ get_useful_pathkeys_for_distinct(root,
+ root->distinct_pathkeys,
+ input_path->pathkeys);
+ Assert(list_length(useful_pathkeys_list) > 0);
- if (is_sorted)
- sorted_path = input_path;
- else
+ foreach_node(List, useful_pathkeys, useful_pathkeys_list)
{
- /*
- * Try at least sorting the cheapest path and also try
- * incrementally sorting any path which is partially sorted
- * already (no need to deal with paths which have presorted
- * keys when incremental sort is disabled unless it's the
- * cheapest partial path).
- */
- if (input_path != cheapest_partial_path &&
- (presorted_keys == 0 || !enable_incremental_sort))
+ sorted_path = make_ordered_path(root,
+ partial_distinct_rel,
+ input_path,
+ cheapest_partial_path,
+ useful_pathkeys,
+ -1.0);
+
+ if (sorted_path == NULL)
continue;
/*
- * We've no need to consider both a sort and incremental sort.
- * We'll just do a sort if there are no presorted keys and an
- * incremental sort when there are presorted keys.
+ * An empty distinct_pathkeys means all tuples have the same
+ * value for the DISTINCT clause. See
+ * create_final_distinct_paths()
*/
- if (presorted_keys == 0 || !enable_incremental_sort)
- sorted_path = (Path *) create_sort_path(root,
- partial_distinct_rel,
- input_path,
- root->distinct_pathkeys,
- -1.0);
- else
- sorted_path = (Path *) create_incremental_sort_path(root,
- partial_distinct_rel,
- input_path,
- root->distinct_pathkeys,
- presorted_keys,
- -1.0);
- }
-
- /*
- * An empty distinct_pathkeys means all tuples have the same value
- * for the DISTINCT clause. See create_final_distinct_paths()
- */
- if (root->distinct_pathkeys == NIL)
- {
- Node *limitCount;
+ if (root->distinct_pathkeys == NIL)
+ {
+ Node *limitCount;
- limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
- sizeof(int64),
- Int64GetDatum(1), false,
- FLOAT8PASSBYVAL);
+ limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
+ sizeof(int64),
+ Int64GetDatum(1), false,
+ FLOAT8PASSBYVAL);
- /*
- * Apply a LimitPath onto the partial path to restrict the
- * tuples from each worker to 1. create_final_distinct_paths
- * will need to apply an additional LimitPath to restrict this
- * to a single row after the Gather node. If the query
- * already has a LIMIT clause, then we could end up with three
- * Limit nodes in the final plan. Consolidating the top two
- * of these could be done, but does not seem worth troubling
- * over.
- */
- add_partial_path(partial_distinct_rel, (Path *)
- create_limit_path(root, partial_distinct_rel,
- sorted_path,
- NULL,
- limitCount,
- LIMIT_OPTION_COUNT,
- 0, 1));
- }
- else
- {
- add_partial_path(partial_distinct_rel, (Path *)
- create_upper_unique_path(root, partial_distinct_rel,
- sorted_path,
- list_length(root->distinct_pathkeys),
- numDistinctRows));
+ /*
+ * Apply a LimitPath onto the partial path to restrict the
+ * tuples from each worker to 1.
+ * create_final_distinct_paths will need to apply an
+ * additional LimitPath to restrict this to a single row
+ * after the Gather node. If the query already has a
+ * LIMIT clause, then we could end up with three Limit
+ * nodes in the final plan. Consolidating the top two of
+ * these could be done, but does not seem worth troubling
+ * over.
+ */
+ add_partial_path(partial_distinct_rel, (Path *)
+ create_limit_path(root, partial_distinct_rel,
+ sorted_path,
+ NULL,
+ limitCount,
+ LIMIT_OPTION_COUNT,
+ 0, 1));
+ }
+ else
+ {
+ add_partial_path(partial_distinct_rel, (Path *)
+ create_upper_unique_path(root, partial_distinct_rel,
+ sorted_path,
+ list_length(root->distinct_pathkeys),
+ numDistinctRows));
+ }
}
}
}
@@ -5134,7 +5128,9 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
* Unique node on those. We also, consider doing an explicit sort of
* the cheapest input path and Unique'ing that. If any paths have
* presorted keys then we'll create an incremental sort atop of those
- * before adding a unique node on the top.
+ * before adding a unique node on the top. We'll also attempt to
+ * reorder the required pathkeys to match the input path's pathkeys as
+ * much as possible, in hopes of avoiding a possible need to re-sort.
*
* When we have DISTINCT ON, we must sort by the more rigorous of
* DISTINCT and ORDER BY, else it won't have the desired behavior.
@@ -5158,86 +5154,66 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
{
Path *input_path = (Path *) lfirst(lc);
Path *sorted_path;
- bool is_sorted;
- int presorted_keys;
+ List *useful_pathkeys_list = NIL;
- is_sorted = pathkeys_count_contained_in(needed_pathkeys,
- input_path->pathkeys,
- &presorted_keys);
+ useful_pathkeys_list =
+ get_useful_pathkeys_for_distinct(root,
+ needed_pathkeys,
+ input_path->pathkeys);
+ Assert(list_length(useful_pathkeys_list) > 0);
- if (is_sorted)
- sorted_path = input_path;
- else
+ foreach_node(List, useful_pathkeys, useful_pathkeys_list)
{
- /*
- * Try at least sorting the cheapest path and also try
- * incrementally sorting any path which is partially sorted
- * already (no need to deal with paths which have presorted
- * keys when incremental sort is disabled unless it's the
- * cheapest input path).
- */
- if (input_path != cheapest_input_path &&
- (presorted_keys == 0 || !enable_incremental_sort))
+ sorted_path = make_ordered_path(root,
+ distinct_rel,
+ input_path,
+ cheapest_input_path,
+ useful_pathkeys,
+ limittuples);
+
+ if (sorted_path == NULL)
continue;
/*
- * We've no need to consider both a sort and incremental sort.
- * We'll just do a sort if there are no presorted keys and an
- * incremental sort when there are presorted keys.
+ * distinct_pathkeys may have become empty if all of the
+ * pathkeys were determined to be redundant. If all of the
+ * pathkeys are redundant then each DISTINCT target must only
+ * allow a single value, therefore all resulting tuples must
+ * be identical (or at least indistinguishable by an equality
+ * check). We can uniquify these tuples simply by just taking
+ * the first tuple. All we do here is add a path to do "LIMIT
+ * 1" atop of 'sorted_path'. When doing a DISTINCT ON we may
+ * still have a non-NIL sort_pathkeys list, so we must still
+ * only do this with paths which are correctly sorted by
+ * sort_pathkeys.
*/
- if (presorted_keys == 0 || !enable_incremental_sort)
- sorted_path = (Path *) create_sort_path(root,
- distinct_rel,
- input_path,
- needed_pathkeys,
- limittuples);
- else
- sorted_path = (Path *) create_incremental_sort_path(root,
- distinct_rel,
- input_path,
- needed_pathkeys,
- presorted_keys,
- limittuples);
- }
-
- /*
- * distinct_pathkeys may have become empty if all of the pathkeys
- * were determined to be redundant. If all of the pathkeys are
- * redundant then each DISTINCT target must only allow a single
- * value, therefore all resulting tuples must be identical (or at
- * least indistinguishable by an equality check). We can uniquify
- * these tuples simply by just taking the first tuple. All we do
- * here is add a path to do "LIMIT 1" atop of 'sorted_path'. When
- * doing a DISTINCT ON we may still have a non-NIL sort_pathkeys
- * list, so we must still only do this with paths which are
- * correctly sorted by sort_pathkeys.
- */
- if (root->distinct_pathkeys == NIL)
- {
- Node *limitCount;
+ if (root->distinct_pathkeys == NIL)
+ {
+ Node *limitCount;
- limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
- sizeof(int64),
- Int64GetDatum(1), false,
- FLOAT8PASSBYVAL);
+ limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
+ sizeof(int64),
+ Int64GetDatum(1), false,
+ FLOAT8PASSBYVAL);
- /*
- * If the query already has a LIMIT clause, then we could end
- * up with a duplicate LimitPath in the final plan. That does
- * not seem worth troubling over too much.
- */
- add_path(distinct_rel, (Path *)
- create_limit_path(root, distinct_rel, sorted_path,
- NULL, limitCount,
- LIMIT_OPTION_COUNT, 0, 1));
- }
- else
- {
- add_path(distinct_rel, (Path *)
- create_upper_unique_path(root, distinct_rel,
- sorted_path,
- list_length(root->distinct_pathkeys),
- numDistinctRows));
+ /*
+ * If the query already has a LIMIT clause, then we could
+ * end up with a duplicate LimitPath in the final plan.
+ * That does not seem worth troubling over too much.
+ */
+ add_path(distinct_rel, (Path *)
+ create_limit_path(root, distinct_rel, sorted_path,
+ NULL, limitCount,
+ LIMIT_OPTION_COUNT, 0, 1));
+ }
+ else
+ {
+ add_path(distinct_rel, (Path *)
+ create_upper_unique_path(root, distinct_rel,
+ sorted_path,
+ list_length(root->distinct_pathkeys),
+ numDistinctRows));
+ }
}
}
}
@@ -5281,6 +5257,82 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
}
/*
+ * get_useful_pathkeys_for_distinct
+ * Get useful orderings of pathkeys for distinctClause by reordering
+ * 'needed_pathkeys' to match the given 'path_pathkeys' as much as possible.
+ *
+ * This returns a list of pathkeys that can be useful for DISTINCT or DISTINCT
+ * ON clause. For convenience, it always includes the given 'needed_pathkeys'.
+ */
+static List *
+get_useful_pathkeys_for_distinct(PlannerInfo *root, List *needed_pathkeys,
+ List *path_pathkeys)
+{
+ List *useful_pathkeys_list = NIL;
+ List *useful_pathkeys = NIL;
+
+ /* always include the given 'needed_pathkeys' */
+ useful_pathkeys_list = lappend(useful_pathkeys_list,
+ needed_pathkeys);
+
+ if (!enable_distinct_reordering)
+ return useful_pathkeys_list;
+
+ /*
+ * Scan the given 'path_pathkeys' and construct a list of PathKey nodes
+ * that match 'needed_pathkeys', but only up to the longest matching
+ * prefix.
+ *
+ * When we have DISTINCT ON, we must ensure that the resulting pathkey
+ * list matches initial distinctClause pathkeys; otherwise, it won't have
+ * the desired behavior.
+ */
+ foreach_node(PathKey, pathkey, path_pathkeys)
+ {
+ /*
+ * The PathKey nodes are canonical, so they can be checked for
+ * equality by simple pointer comparison.
+ */
+ if (!list_member_ptr(needed_pathkeys, pathkey))
+ break;
+ if (root->parse->hasDistinctOn &&
+ !list_member_ptr(root->distinct_pathkeys, pathkey))
+ break;
+
+ useful_pathkeys = lappend(useful_pathkeys, pathkey);
+ }
+
+ /* If no match at all, no point in reordering needed_pathkeys */
+ if (useful_pathkeys == NIL)
+ return useful_pathkeys_list;
+
+ /*
+ * If not full match, the resulting pathkey list is not useful without
+ * incremental sort.
+ */
+ if (list_length(useful_pathkeys) < list_length(needed_pathkeys) &&
+ !enable_incremental_sort)
+ return useful_pathkeys_list;
+
+ /* Append the remaining PathKey nodes in needed_pathkeys */
+ useful_pathkeys = list_concat_unique_ptr(useful_pathkeys,
+ needed_pathkeys);
+
+ /*
+ * If the resulting pathkey list is the same as the 'needed_pathkeys',
+ * just drop it.
+ */
+ if (compare_pathkeys(needed_pathkeys,
+ useful_pathkeys) == PATHKEYS_EQUAL)
+ return useful_pathkeys_list;
+
+ useful_pathkeys_list = lappend(useful_pathkeys_list,
+ useful_pathkeys);
+
+ return useful_pathkeys_list;
+}
+
+/*
* create_ordered_paths
*
* Build a new upperrel containing Paths for ORDER BY evaluation.
@@ -7009,58 +7061,6 @@ done:
}
/*
- * make_ordered_path
- * Return a path ordered by 'pathkeys' based on the given 'path'. May
- * return NULL if it doesn't make sense to generate an ordered path in
- * this case.
- */
-static Path *
-make_ordered_path(PlannerInfo *root, RelOptInfo *rel, Path *path,
- Path *cheapest_path, List *pathkeys)
-{
- bool is_sorted;
- int presorted_keys;
-
- is_sorted = pathkeys_count_contained_in(pathkeys,
- path->pathkeys,
- &presorted_keys);
-
- if (!is_sorted)
- {
- /*
- * Try at least sorting the cheapest path and also try incrementally
- * sorting any path which is partially sorted already (no need to deal
- * with paths which have presorted keys when incremental sort is
- * disabled unless it's the cheapest input path).
- */
- if (path != cheapest_path &&
- (presorted_keys == 0 || !enable_incremental_sort))
- return NULL;
-
- /*
- * We've no need to consider both a sort and incremental sort. We'll
- * just do a sort if there are no presorted keys and an incremental
- * sort when there are presorted keys.
- */
- if (presorted_keys == 0 || !enable_incremental_sort)
- path = (Path *) create_sort_path(root,
- rel,
- path,
- pathkeys,
- -1.0);
- else
- path = (Path *) create_incremental_sort_path(root,
- rel,
- path,
- pathkeys,
- presorted_keys,
- -1.0);
- }
-
- return path;
-}
-
-/*
* add_paths_to_grouping_rel
*
* Add non-partial paths to grouping relation.
@@ -7111,7 +7111,8 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
grouped_rel,
path,
cheapest_path,
- info->pathkeys);
+ info->pathkeys,
+ -1.0);
if (path == NULL)
continue;
@@ -7192,7 +7193,8 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
grouped_rel,
path,
partially_grouped_rel->cheapest_total_path,
- info->pathkeys);
+ info->pathkeys,
+ -1.0);
if (path == NULL)
continue;
@@ -7443,7 +7445,8 @@ create_partial_grouping_paths(PlannerInfo *root,
partially_grouped_rel,
path,
cheapest_total_path,
- info->pathkeys);
+ info->pathkeys,
+ -1.0);
if (path == NULL)
continue;
@@ -7500,7 +7503,8 @@ create_partial_grouping_paths(PlannerInfo *root,
partially_grouped_rel,
path,
cheapest_partial_path,
- info->pathkeys);
+ info->pathkeys,
+ -1.0);
if (path == NULL)
continue;
@@ -7587,6 +7591,58 @@ create_partial_grouping_paths(PlannerInfo *root,
}
/*
+ * make_ordered_path
+ * Return a path ordered by 'pathkeys' based on the given 'path'. May
+ * return NULL if it doesn't make sense to generate an ordered path in
+ * this case.
+ */
+static Path *
+make_ordered_path(PlannerInfo *root, RelOptInfo *rel, Path *path,
+ Path *cheapest_path, List *pathkeys, double limit_tuples)
+{
+ bool is_sorted;
+ int presorted_keys;
+
+ is_sorted = pathkeys_count_contained_in(pathkeys,
+ path->pathkeys,
+ &presorted_keys);
+
+ if (!is_sorted)
+ {
+ /*
+ * Try at least sorting the cheapest path and also try incrementally
+ * sorting any path which is partially sorted already (no need to deal
+ * with paths which have presorted keys when incremental sort is
+ * disabled unless it's the cheapest input path).
+ */
+ if (path != cheapest_path &&
+ (presorted_keys == 0 || !enable_incremental_sort))
+ return NULL;
+
+ /*
+ * We've no need to consider both a sort and incremental sort. We'll
+ * just do a sort if there are no presorted keys and an incremental
+ * sort when there are presorted keys.
+ */
+ if (presorted_keys == 0 || !enable_incremental_sort)
+ path = (Path *) create_sort_path(root,
+ rel,
+ path,
+ pathkeys,
+ limit_tuples);
+ else
+ path = (Path *) create_incremental_sort_path(root,
+ rel,
+ path,
+ pathkeys,
+ presorted_keys,
+ limit_tuples);
+ }
+
+ return path;
+}
+
+/*
* Generate Gather and Gather Merge paths for a grouping relation or partial
* grouping relation.
*
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 8a67f01200c..9845abd6932 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -997,6 +997,16 @@ struct config_bool ConfigureNamesBool[] =
NULL, NULL, NULL
},
{
+ {"enable_distinct_reordering", PGC_USERSET, QUERY_TUNING_METHOD,
+ gettext_noop("Enables reordering of DISTINCT pathkeys."),
+ NULL,
+ GUC_EXPLAIN
+ },
+ &enable_distinct_reordering,
+ true,
+ NULL, NULL, NULL
+ },
+ {
{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
gettext_noop("Enables genetic query optimization."),
gettext_noop("This algorithm attempts to do planning without "
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 39a3ac23127..407cd1e08ca 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -414,6 +414,7 @@
#enable_sort = on
#enable_tidscan = on
#enable_group_by_reordering = on
+#enable_distinct_reordering = on
# - Planner Cost Constants -
diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h
index 93e3dc719da..2e123e08b79 100644
--- a/src/include/optimizer/optimizer.h
+++ b/src/include/optimizer/optimizer.h
@@ -111,6 +111,7 @@ typedef enum
/* GUC parameters */
extern PGDLLIMPORT int debug_parallel_query;
extern PGDLLIMPORT bool parallel_leader_participation;
+extern PGDLLIMPORT bool enable_distinct_reordering;
extern struct PlannedStmt *planner(Query *parse, const char *query_string,
int cursorOptions,
diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out
index 82b8e54f5f1..379ba0bc9fa 100644
--- a/src/test/regress/expected/select_distinct.out
+++ b/src/test/regress/expected/select_distinct.out
@@ -464,3 +464,135 @@ SELECT null IS NOT DISTINCT FROM null as "yes";
t
(1 row)
+--
+-- Test the planner's ability to reorder the distinctClause Pathkeys to match
+-- the input path's ordering
+--
+CREATE TABLE distinct_tbl (x int, y int);
+INSERT INTO distinct_tbl SELECT i%10, i%10 FROM generate_series(1, 1000) AS i;
+CREATE INDEX distinct_tbl_x_y_idx ON distinct_tbl (x, y);
+ANALYZE distinct_tbl;
+-- Produce results with sorting.
+SET enable_hashagg TO OFF;
+-- Ensure we avoid the need to re-sort by reordering the distinctClause
+-- Pathkeys to match the ordering of the input path
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT y, x FROM distinct_tbl;
+ QUERY PLAN
+------------------------------------------------------------------
+ Unique
+ -> Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl
+(2 rows)
+
+SELECT DISTINCT y, x FROM distinct_tbl;
+ y | x
+---+---
+ 0 | 0
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+ 9 | 9
+(10 rows)
+
+-- Ensure we leverage incremental-sort by reordering the distinctClause
+-- Pathkeys to partially match the ordering of the input path
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT y, x FROM (SELECT * FROM distinct_tbl ORDER BY x) s;
+ QUERY PLAN
+------------------------------------------------------------------------------
+ Unique
+ -> Incremental Sort
+ Sort Key: s.x, s.y
+ Presorted Key: s.x
+ -> Subquery Scan on s
+ -> Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl
+(6 rows)
+
+SELECT DISTINCT y, x FROM (SELECT * FROM distinct_tbl ORDER BY x) s;
+ y | x
+---+---
+ 0 | 0
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+ 9 | 9
+(10 rows)
+
+-- Ensure we avoid the need to re-sort in partial distinct by reordering the
+-- distinctClause Pathkeys to match the ordering of the input path
+SET parallel_tuple_cost=0;
+SET parallel_setup_cost=0;
+SET min_parallel_table_scan_size=0;
+SET min_parallel_index_scan_size=0;
+SET max_parallel_workers_per_gather=2;
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT y, x FROM distinct_tbl limit 10;
+ QUERY PLAN
+---------------------------------------------------------------------------------------------
+ Limit
+ -> Unique
+ -> Gather Merge
+ Workers Planned: 1
+ -> Unique
+ -> Parallel Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl
+(6 rows)
+
+SELECT DISTINCT y, x FROM distinct_tbl limit 10;
+ y | x
+---+---
+ 0 | 0
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+ 9 | 9
+(10 rows)
+
+RESET max_parallel_workers_per_gather;
+RESET min_parallel_index_scan_size;
+RESET min_parallel_table_scan_size;
+RESET parallel_setup_cost;
+RESET parallel_tuple_cost;
+-- Ensure we reorder the distinctClause Pathkeys to match the ordering of the
+-- input path even if there is ORDER BY clause
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT y, x FROM distinct_tbl ORDER BY y;
+ QUERY PLAN
+------------------------------------------------------------------------
+ Sort
+ Sort Key: y
+ -> Unique
+ -> Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl
+(4 rows)
+
+SELECT DISTINCT y, x FROM distinct_tbl ORDER BY y;
+ y | x
+---+---
+ 0 | 0
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+ 9 | 9
+(10 rows)
+
+RESET enable_hashagg;
+DROP TABLE distinct_tbl;
diff --git a/src/test/regress/expected/select_distinct_on.out b/src/test/regress/expected/select_distinct_on.out
index 958381afe5e..69e0cf3959c 100644
--- a/src/test/regress/expected/select_distinct_on.out
+++ b/src/test/regress/expected/select_distinct_on.out
@@ -121,3 +121,128 @@ SELECT DISTINCT ON (four) four,hundred
Filter: (four = 0)
(3 rows)
+--
+-- Test the planner's ability to reorder the distinctClause Pathkeys to match
+-- the input path's ordering
+--
+CREATE TABLE distinct_tbl (x int, y int, z int);
+INSERT INTO distinct_tbl SELECT i%10, i%10, i%10 FROM generate_series(1, 1000) AS i;
+CREATE INDEX distinct_tbl_x_y_idx ON distinct_tbl (x, y);
+ANALYZE distinct_tbl;
+-- Produce results with sorting.
+SET enable_hashagg TO OFF;
+-- Ensure we avoid the need to re-sort by reordering the distinctClause
+-- Pathkeys to match the ordering of the input path
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (y, x) x, y FROM distinct_tbl;
+ QUERY PLAN
+------------------------------------------------------------------
+ Unique
+ -> Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl
+(2 rows)
+
+SELECT DISTINCT ON (y, x) x, y FROM distinct_tbl;
+ x | y
+---+---
+ 0 | 0
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+ 9 | 9
+(10 rows)
+
+-- Ensure we leverage incremental-sort by reordering the distinctClause
+-- Pathkeys to partially match the ordering of the input path
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (y, x) x, y FROM (SELECT * FROM distinct_tbl ORDER BY x) s;
+ QUERY PLAN
+------------------------------------------------------------------------------
+ Unique
+ -> Incremental Sort
+ Sort Key: s.x, s.y
+ Presorted Key: s.x
+ -> Subquery Scan on s
+ -> Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl
+(6 rows)
+
+SELECT DISTINCT ON (y, x) x, y FROM (SELECT * FROM distinct_tbl ORDER BY x) s;
+ x | y
+---+---
+ 0 | 0
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+ 9 | 9
+(10 rows)
+
+-- Ensure we reorder the distinctClause Pathkeys to match the ordering of the
+-- input path even if there is ORDER BY clause
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (y, x) x, y FROM distinct_tbl ORDER BY y;
+ QUERY PLAN
+------------------------------------------------------------------------
+ Sort
+ Sort Key: y
+ -> Unique
+ -> Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl
+(4 rows)
+
+SELECT DISTINCT ON (y, x) x, y FROM distinct_tbl ORDER BY y;
+ x | y
+---+---
+ 0 | 0
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+ 9 | 9
+(10 rows)
+
+-- Ensure the resulting pathkey list matches the initial distinctClause Pathkeys
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (y, x) x, y FROM (select * from distinct_tbl order by x, z, y) s ORDER BY y, x, z;
+ QUERY PLAN
+------------------------------------------------------------------------------------
+ Sort
+ Sort Key: s.y, s.x, s.z
+ -> Unique
+ -> Incremental Sort
+ Sort Key: s.x, s.y, s.z
+ Presorted Key: s.x
+ -> Subquery Scan on s
+ -> Sort
+ Sort Key: distinct_tbl.x, distinct_tbl.z, distinct_tbl.y
+ -> Seq Scan on distinct_tbl
+(10 rows)
+
+SELECT DISTINCT ON (y, x) x, y FROM (select * from distinct_tbl order by x, z, y) s ORDER BY y, x, z;
+ x | y
+---+---
+ 0 | 0
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+ 9 | 9
+(10 rows)
+
+RESET enable_hashagg;
+DROP TABLE distinct_tbl;
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index fad7fc3a7e0..91089ac215f 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -150,6 +150,7 @@ select name, setting from pg_settings where name like 'enable%';
--------------------------------+---------
enable_async_append | on
enable_bitmapscan | on
+ enable_distinct_reordering | on
enable_gathermerge | on
enable_group_by_reordering | on
enable_hashagg | on
@@ -170,7 +171,7 @@ select name, setting from pg_settings where name like 'enable%';
enable_seqscan | on
enable_sort | on
enable_tidscan | on
-(22 rows)
+(23 rows)
-- There are always wait event descriptions for various types. InjectionPoint
-- may be present or absent, depending on history since last postmaster start.
diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql
index da92c197aba..50ac7dde396 100644
--- a/src/test/regress/sql/select_distinct.sql
+++ b/src/test/regress/sql/select_distinct.sql
@@ -221,3 +221,56 @@ SELECT 1 IS NOT DISTINCT FROM 2 as "no";
SELECT 2 IS NOT DISTINCT FROM 2 as "yes";
SELECT 2 IS NOT DISTINCT FROM null as "no";
SELECT null IS NOT DISTINCT FROM null as "yes";
+
+--
+-- Test the planner's ability to reorder the distinctClause Pathkeys to match
+-- the input path's ordering
+--
+
+CREATE TABLE distinct_tbl (x int, y int);
+INSERT INTO distinct_tbl SELECT i%10, i%10 FROM generate_series(1, 1000) AS i;
+CREATE INDEX distinct_tbl_x_y_idx ON distinct_tbl (x, y);
+ANALYZE distinct_tbl;
+
+-- Produce results with sorting.
+SET enable_hashagg TO OFF;
+
+-- Ensure we avoid the need to re-sort by reordering the distinctClause
+-- Pathkeys to match the ordering of the input path
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT y, x FROM distinct_tbl;
+SELECT DISTINCT y, x FROM distinct_tbl;
+
+-- Ensure we leverage incremental-sort by reordering the distinctClause
+-- Pathkeys to partially match the ordering of the input path
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT y, x FROM (SELECT * FROM distinct_tbl ORDER BY x) s;
+SELECT DISTINCT y, x FROM (SELECT * FROM distinct_tbl ORDER BY x) s;
+
+-- Ensure we avoid the need to re-sort in partial distinct by reordering the
+-- distinctClause Pathkeys to match the ordering of the input path
+SET parallel_tuple_cost=0;
+SET parallel_setup_cost=0;
+SET min_parallel_table_scan_size=0;
+SET min_parallel_index_scan_size=0;
+SET max_parallel_workers_per_gather=2;
+
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT y, x FROM distinct_tbl limit 10;
+SELECT DISTINCT y, x FROM distinct_tbl limit 10;
+
+RESET max_parallel_workers_per_gather;
+RESET min_parallel_index_scan_size;
+RESET min_parallel_table_scan_size;
+RESET parallel_setup_cost;
+RESET parallel_tuple_cost;
+
+-- Ensure we reorder the distinctClause Pathkeys to match the ordering of the
+-- input path even if there is ORDER BY clause
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT y, x FROM distinct_tbl ORDER BY y;
+SELECT DISTINCT y, x FROM distinct_tbl ORDER BY y;
+
+RESET enable_hashagg;
+
+DROP TABLE distinct_tbl;
diff --git a/src/test/regress/sql/select_distinct_on.sql b/src/test/regress/sql/select_distinct_on.sql
index 261e6140feb..d79dace94c4 100644
--- a/src/test/regress/sql/select_distinct_on.sql
+++ b/src/test/regress/sql/select_distinct_on.sql
@@ -42,3 +42,43 @@ SELECT DISTINCT ON (four) four,two
EXPLAIN (COSTS OFF)
SELECT DISTINCT ON (four) four,hundred
FROM tenk1 WHERE four = 0 ORDER BY 1,2;
+
+--
+-- Test the planner's ability to reorder the distinctClause Pathkeys to match
+-- the input path's ordering
+--
+
+CREATE TABLE distinct_tbl (x int, y int, z int);
+INSERT INTO distinct_tbl SELECT i%10, i%10, i%10 FROM generate_series(1, 1000) AS i;
+CREATE INDEX distinct_tbl_x_y_idx ON distinct_tbl (x, y);
+ANALYZE distinct_tbl;
+
+-- Produce results with sorting.
+SET enable_hashagg TO OFF;
+
+-- Ensure we avoid the need to re-sort by reordering the distinctClause
+-- Pathkeys to match the ordering of the input path
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (y, x) x, y FROM distinct_tbl;
+SELECT DISTINCT ON (y, x) x, y FROM distinct_tbl;
+
+-- Ensure we leverage incremental-sort by reordering the distinctClause
+-- Pathkeys to partially match the ordering of the input path
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (y, x) x, y FROM (SELECT * FROM distinct_tbl ORDER BY x) s;
+SELECT DISTINCT ON (y, x) x, y FROM (SELECT * FROM distinct_tbl ORDER BY x) s;
+
+-- Ensure we reorder the distinctClause Pathkeys to match the ordering of the
+-- input path even if there is ORDER BY clause
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (y, x) x, y FROM distinct_tbl ORDER BY y;
+SELECT DISTINCT ON (y, x) x, y FROM distinct_tbl ORDER BY y;
+
+-- Ensure the resulting pathkey list matches the initial distinctClause Pathkeys
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (y, x) x, y FROM (select * from distinct_tbl order by x, z, y) s ORDER BY y, x, z;
+SELECT DISTINCT ON (y, x) x, y FROM (select * from distinct_tbl order by x, z, y) s ORDER BY y, x, z;
+
+RESET enable_hashagg;
+
+DROP TABLE distinct_tbl;