diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/optimizer/path/pathkeys.c | 38 | ||||
-rw-r--r-- | src/backend/optimizer/plan/planner.c | 456 | ||||
-rw-r--r-- | src/backend/utils/misc/guc_tables.c | 10 | ||||
-rw-r--r-- | src/backend/utils/misc/postgresql.conf.sample | 1 | ||||
-rw-r--r-- | src/include/optimizer/optimizer.h | 1 | ||||
-rw-r--r-- | src/test/regress/expected/select_distinct.out | 132 | ||||
-rw-r--r-- | src/test/regress/expected/select_distinct_on.out | 125 | ||||
-rw-r--r-- | src/test/regress/expected/sysviews.out | 3 | ||||
-rw-r--r-- | src/test/regress/sql/select_distinct.sql | 53 | ||||
-rw-r--r-- | src/test/regress/sql/select_distinct_on.sql | 40 |
10 files changed, 658 insertions, 201 deletions
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 7cf15e89b63..03c2ac36e05 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -2208,6 +2208,41 @@ pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys) } /* + * pathkeys_useful_for_distinct + * Count the number of pathkeys that are useful for DISTINCT or DISTINCT + * ON clause. + * + * DISTINCT keys could be reordered to benefit from the given pathkey list. As + * with pathkeys_useful_for_grouping, we return the number of leading keys in + * the list that are shared by the distinctClause pathkeys. + */ +static int +pathkeys_useful_for_distinct(PlannerInfo *root, List *pathkeys) +{ + int n_common_pathkeys; + + /* + * distinct_pathkeys may have become empty if all of the pathkeys were + * determined to be redundant. Return 0 in this case. + */ + if (root->distinct_pathkeys == NIL) + return 0; + + /* walk the pathkeys and search for matching DISTINCT key */ + n_common_pathkeys = 0; + foreach_node(PathKey, pathkey, pathkeys) + { + /* no matching DISTINCT key, we're done */ + if (!list_member_ptr(root->distinct_pathkeys, pathkey)) + break; + + n_common_pathkeys++; + } + + return n_common_pathkeys; +} + +/* * pathkeys_useful_for_setop * Count the number of leading common pathkeys root's 'setop_pathkeys' in * 'pathkeys'. @@ -2242,6 +2277,9 @@ truncate_useless_pathkeys(PlannerInfo *root, nuseful2 = pathkeys_useful_for_grouping(root, pathkeys); if (nuseful2 > nuseful) nuseful = nuseful2; + nuseful2 = pathkeys_useful_for_distinct(root, pathkeys); + if (nuseful2 > nuseful) + nuseful = nuseful2; nuseful2 = pathkeys_useful_for_setop(root, pathkeys); if (nuseful2 > nuseful) nuseful = nuseful2; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 1f78dc3d530..b665a7762ec 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -67,6 +67,7 @@ double cursor_tuple_fraction = DEFAULT_CURSOR_TUPLE_FRACTION; int debug_parallel_query = DEBUG_PARALLEL_OFF; bool parallel_leader_participation = true; +bool enable_distinct_reordering = true; /* Hook for plugins to get control in planner() */ planner_hook_type planner_hook = NULL; @@ -198,6 +199,9 @@ static void create_partial_distinct_paths(PlannerInfo *root, static RelOptInfo *create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *distinct_rel); +static List *get_useful_pathkeys_for_distinct(PlannerInfo *root, + List *needed_pathkeys, + List *path_pathkeys); static RelOptInfo *create_ordered_paths(PlannerInfo *root, RelOptInfo *input_rel, PathTarget *target, @@ -235,6 +239,12 @@ static RelOptInfo *create_partial_grouping_paths(PlannerInfo *root, grouping_sets_data *gd, GroupPathExtraData *extra, bool force_rel_creation); +static Path *make_ordered_path(PlannerInfo *root, + RelOptInfo *rel, + Path *path, + Path *cheapest_path, + List *pathkeys, + double limit_tuples); static void gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel); static bool can_partial_agg(PlannerInfo *root); static void apply_scanjoin_target_to_paths(PlannerInfo *root, @@ -4938,7 +4948,10 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, /* * Try sorting the cheapest path and incrementally sorting any paths with - * presorted keys and put a unique paths atop of those. + * presorted keys and put a unique paths atop of those. We'll also + * attempt to reorder the required pathkeys to match the input path's + * pathkeys as much as possible, in hopes of avoiding a possible need to + * re-sort. */ if (grouping_is_sortable(root->processed_distinctClause)) { @@ -4946,86 +4959,67 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, { Path *input_path = (Path *) lfirst(lc); Path *sorted_path; - bool is_sorted; - int presorted_keys; + List *useful_pathkeys_list = NIL; - is_sorted = pathkeys_count_contained_in(root->distinct_pathkeys, - input_path->pathkeys, - &presorted_keys); + useful_pathkeys_list = + get_useful_pathkeys_for_distinct(root, + root->distinct_pathkeys, + input_path->pathkeys); + Assert(list_length(useful_pathkeys_list) > 0); - if (is_sorted) - sorted_path = input_path; - else + foreach_node(List, useful_pathkeys, useful_pathkeys_list) { - /* - * Try at least sorting the cheapest path and also try - * incrementally sorting any path which is partially sorted - * already (no need to deal with paths which have presorted - * keys when incremental sort is disabled unless it's the - * cheapest partial path). - */ - if (input_path != cheapest_partial_path && - (presorted_keys == 0 || !enable_incremental_sort)) + sorted_path = make_ordered_path(root, + partial_distinct_rel, + input_path, + cheapest_partial_path, + useful_pathkeys, + -1.0); + + if (sorted_path == NULL) continue; /* - * We've no need to consider both a sort and incremental sort. - * We'll just do a sort if there are no presorted keys and an - * incremental sort when there are presorted keys. + * An empty distinct_pathkeys means all tuples have the same + * value for the DISTINCT clause. See + * create_final_distinct_paths() */ - if (presorted_keys == 0 || !enable_incremental_sort) - sorted_path = (Path *) create_sort_path(root, - partial_distinct_rel, - input_path, - root->distinct_pathkeys, - -1.0); - else - sorted_path = (Path *) create_incremental_sort_path(root, - partial_distinct_rel, - input_path, - root->distinct_pathkeys, - presorted_keys, - -1.0); - } - - /* - * An empty distinct_pathkeys means all tuples have the same value - * for the DISTINCT clause. See create_final_distinct_paths() - */ - if (root->distinct_pathkeys == NIL) - { - Node *limitCount; + if (root->distinct_pathkeys == NIL) + { + Node *limitCount; - limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid, - sizeof(int64), - Int64GetDatum(1), false, - FLOAT8PASSBYVAL); + limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid, + sizeof(int64), + Int64GetDatum(1), false, + FLOAT8PASSBYVAL); - /* - * Apply a LimitPath onto the partial path to restrict the - * tuples from each worker to 1. create_final_distinct_paths - * will need to apply an additional LimitPath to restrict this - * to a single row after the Gather node. If the query - * already has a LIMIT clause, then we could end up with three - * Limit nodes in the final plan. Consolidating the top two - * of these could be done, but does not seem worth troubling - * over. - */ - add_partial_path(partial_distinct_rel, (Path *) - create_limit_path(root, partial_distinct_rel, - sorted_path, - NULL, - limitCount, - LIMIT_OPTION_COUNT, - 0, 1)); - } - else - { - add_partial_path(partial_distinct_rel, (Path *) - create_upper_unique_path(root, partial_distinct_rel, - sorted_path, - list_length(root->distinct_pathkeys), - numDistinctRows)); + /* + * Apply a LimitPath onto the partial path to restrict the + * tuples from each worker to 1. + * create_final_distinct_paths will need to apply an + * additional LimitPath to restrict this to a single row + * after the Gather node. If the query already has a + * LIMIT clause, then we could end up with three Limit + * nodes in the final plan. Consolidating the top two of + * these could be done, but does not seem worth troubling + * over. + */ + add_partial_path(partial_distinct_rel, (Path *) + create_limit_path(root, partial_distinct_rel, + sorted_path, + NULL, + limitCount, + LIMIT_OPTION_COUNT, + 0, 1)); + } + else + { + add_partial_path(partial_distinct_rel, (Path *) + create_upper_unique_path(root, partial_distinct_rel, + sorted_path, + list_length(root->distinct_pathkeys), + numDistinctRows)); + } } } } @@ -5134,7 +5128,9 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, * Unique node on those. We also, consider doing an explicit sort of * the cheapest input path and Unique'ing that. If any paths have * presorted keys then we'll create an incremental sort atop of those - * before adding a unique node on the top. + * before adding a unique node on the top. We'll also attempt to + * reorder the required pathkeys to match the input path's pathkeys as + * much as possible, in hopes of avoiding a possible need to re-sort. * * When we have DISTINCT ON, we must sort by the more rigorous of * DISTINCT and ORDER BY, else it won't have the desired behavior. @@ -5158,86 +5154,66 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, { Path *input_path = (Path *) lfirst(lc); Path *sorted_path; - bool is_sorted; - int presorted_keys; + List *useful_pathkeys_list = NIL; - is_sorted = pathkeys_count_contained_in(needed_pathkeys, - input_path->pathkeys, - &presorted_keys); + useful_pathkeys_list = + get_useful_pathkeys_for_distinct(root, + needed_pathkeys, + input_path->pathkeys); + Assert(list_length(useful_pathkeys_list) > 0); - if (is_sorted) - sorted_path = input_path; - else + foreach_node(List, useful_pathkeys, useful_pathkeys_list) { - /* - * Try at least sorting the cheapest path and also try - * incrementally sorting any path which is partially sorted - * already (no need to deal with paths which have presorted - * keys when incremental sort is disabled unless it's the - * cheapest input path). - */ - if (input_path != cheapest_input_path && - (presorted_keys == 0 || !enable_incremental_sort)) + sorted_path = make_ordered_path(root, + distinct_rel, + input_path, + cheapest_input_path, + useful_pathkeys, + limittuples); + + if (sorted_path == NULL) continue; /* - * We've no need to consider both a sort and incremental sort. - * We'll just do a sort if there are no presorted keys and an - * incremental sort when there are presorted keys. + * distinct_pathkeys may have become empty if all of the + * pathkeys were determined to be redundant. If all of the + * pathkeys are redundant then each DISTINCT target must only + * allow a single value, therefore all resulting tuples must + * be identical (or at least indistinguishable by an equality + * check). We can uniquify these tuples simply by just taking + * the first tuple. All we do here is add a path to do "LIMIT + * 1" atop of 'sorted_path'. When doing a DISTINCT ON we may + * still have a non-NIL sort_pathkeys list, so we must still + * only do this with paths which are correctly sorted by + * sort_pathkeys. */ - if (presorted_keys == 0 || !enable_incremental_sort) - sorted_path = (Path *) create_sort_path(root, - distinct_rel, - input_path, - needed_pathkeys, - limittuples); - else - sorted_path = (Path *) create_incremental_sort_path(root, - distinct_rel, - input_path, - needed_pathkeys, - presorted_keys, - limittuples); - } - - /* - * distinct_pathkeys may have become empty if all of the pathkeys - * were determined to be redundant. If all of the pathkeys are - * redundant then each DISTINCT target must only allow a single - * value, therefore all resulting tuples must be identical (or at - * least indistinguishable by an equality check). We can uniquify - * these tuples simply by just taking the first tuple. All we do - * here is add a path to do "LIMIT 1" atop of 'sorted_path'. When - * doing a DISTINCT ON we may still have a non-NIL sort_pathkeys - * list, so we must still only do this with paths which are - * correctly sorted by sort_pathkeys. - */ - if (root->distinct_pathkeys == NIL) - { - Node *limitCount; + if (root->distinct_pathkeys == NIL) + { + Node *limitCount; - limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid, - sizeof(int64), - Int64GetDatum(1), false, - FLOAT8PASSBYVAL); + limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid, + sizeof(int64), + Int64GetDatum(1), false, + FLOAT8PASSBYVAL); - /* - * If the query already has a LIMIT clause, then we could end - * up with a duplicate LimitPath in the final plan. That does - * not seem worth troubling over too much. - */ - add_path(distinct_rel, (Path *) - create_limit_path(root, distinct_rel, sorted_path, - NULL, limitCount, - LIMIT_OPTION_COUNT, 0, 1)); - } - else - { - add_path(distinct_rel, (Path *) - create_upper_unique_path(root, distinct_rel, - sorted_path, - list_length(root->distinct_pathkeys), - numDistinctRows)); + /* + * If the query already has a LIMIT clause, then we could + * end up with a duplicate LimitPath in the final plan. + * That does not seem worth troubling over too much. + */ + add_path(distinct_rel, (Path *) + create_limit_path(root, distinct_rel, sorted_path, + NULL, limitCount, + LIMIT_OPTION_COUNT, 0, 1)); + } + else + { + add_path(distinct_rel, (Path *) + create_upper_unique_path(root, distinct_rel, + sorted_path, + list_length(root->distinct_pathkeys), + numDistinctRows)); + } } } } @@ -5281,6 +5257,82 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, } /* + * get_useful_pathkeys_for_distinct + * Get useful orderings of pathkeys for distinctClause by reordering + * 'needed_pathkeys' to match the given 'path_pathkeys' as much as possible. + * + * This returns a list of pathkeys that can be useful for DISTINCT or DISTINCT + * ON clause. For convenience, it always includes the given 'needed_pathkeys'. + */ +static List * +get_useful_pathkeys_for_distinct(PlannerInfo *root, List *needed_pathkeys, + List *path_pathkeys) +{ + List *useful_pathkeys_list = NIL; + List *useful_pathkeys = NIL; + + /* always include the given 'needed_pathkeys' */ + useful_pathkeys_list = lappend(useful_pathkeys_list, + needed_pathkeys); + + if (!enable_distinct_reordering) + return useful_pathkeys_list; + + /* + * Scan the given 'path_pathkeys' and construct a list of PathKey nodes + * that match 'needed_pathkeys', but only up to the longest matching + * prefix. + * + * When we have DISTINCT ON, we must ensure that the resulting pathkey + * list matches initial distinctClause pathkeys; otherwise, it won't have + * the desired behavior. + */ + foreach_node(PathKey, pathkey, path_pathkeys) + { + /* + * The PathKey nodes are canonical, so they can be checked for + * equality by simple pointer comparison. + */ + if (!list_member_ptr(needed_pathkeys, pathkey)) + break; + if (root->parse->hasDistinctOn && + !list_member_ptr(root->distinct_pathkeys, pathkey)) + break; + + useful_pathkeys = lappend(useful_pathkeys, pathkey); + } + + /* If no match at all, no point in reordering needed_pathkeys */ + if (useful_pathkeys == NIL) + return useful_pathkeys_list; + + /* + * If not full match, the resulting pathkey list is not useful without + * incremental sort. + */ + if (list_length(useful_pathkeys) < list_length(needed_pathkeys) && + !enable_incremental_sort) + return useful_pathkeys_list; + + /* Append the remaining PathKey nodes in needed_pathkeys */ + useful_pathkeys = list_concat_unique_ptr(useful_pathkeys, + needed_pathkeys); + + /* + * If the resulting pathkey list is the same as the 'needed_pathkeys', + * just drop it. + */ + if (compare_pathkeys(needed_pathkeys, + useful_pathkeys) == PATHKEYS_EQUAL) + return useful_pathkeys_list; + + useful_pathkeys_list = lappend(useful_pathkeys_list, + useful_pathkeys); + + return useful_pathkeys_list; +} + +/* * create_ordered_paths * * Build a new upperrel containing Paths for ORDER BY evaluation. @@ -7009,58 +7061,6 @@ done: } /* - * make_ordered_path - * Return a path ordered by 'pathkeys' based on the given 'path'. May - * return NULL if it doesn't make sense to generate an ordered path in - * this case. - */ -static Path * -make_ordered_path(PlannerInfo *root, RelOptInfo *rel, Path *path, - Path *cheapest_path, List *pathkeys) -{ - bool is_sorted; - int presorted_keys; - - is_sorted = pathkeys_count_contained_in(pathkeys, - path->pathkeys, - &presorted_keys); - - if (!is_sorted) - { - /* - * Try at least sorting the cheapest path and also try incrementally - * sorting any path which is partially sorted already (no need to deal - * with paths which have presorted keys when incremental sort is - * disabled unless it's the cheapest input path). - */ - if (path != cheapest_path && - (presorted_keys == 0 || !enable_incremental_sort)) - return NULL; - - /* - * We've no need to consider both a sort and incremental sort. We'll - * just do a sort if there are no presorted keys and an incremental - * sort when there are presorted keys. - */ - if (presorted_keys == 0 || !enable_incremental_sort) - path = (Path *) create_sort_path(root, - rel, - path, - pathkeys, - -1.0); - else - path = (Path *) create_incremental_sort_path(root, - rel, - path, - pathkeys, - presorted_keys, - -1.0); - } - - return path; -} - -/* * add_paths_to_grouping_rel * * Add non-partial paths to grouping relation. @@ -7111,7 +7111,8 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, grouped_rel, path, cheapest_path, - info->pathkeys); + info->pathkeys, + -1.0); if (path == NULL) continue; @@ -7192,7 +7193,8 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, grouped_rel, path, partially_grouped_rel->cheapest_total_path, - info->pathkeys); + info->pathkeys, + -1.0); if (path == NULL) continue; @@ -7443,7 +7445,8 @@ create_partial_grouping_paths(PlannerInfo *root, partially_grouped_rel, path, cheapest_total_path, - info->pathkeys); + info->pathkeys, + -1.0); if (path == NULL) continue; @@ -7500,7 +7503,8 @@ create_partial_grouping_paths(PlannerInfo *root, partially_grouped_rel, path, cheapest_partial_path, - info->pathkeys); + info->pathkeys, + -1.0); if (path == NULL) continue; @@ -7587,6 +7591,58 @@ create_partial_grouping_paths(PlannerInfo *root, } /* + * make_ordered_path + * Return a path ordered by 'pathkeys' based on the given 'path'. May + * return NULL if it doesn't make sense to generate an ordered path in + * this case. + */ +static Path * +make_ordered_path(PlannerInfo *root, RelOptInfo *rel, Path *path, + Path *cheapest_path, List *pathkeys, double limit_tuples) +{ + bool is_sorted; + int presorted_keys; + + is_sorted = pathkeys_count_contained_in(pathkeys, + path->pathkeys, + &presorted_keys); + + if (!is_sorted) + { + /* + * Try at least sorting the cheapest path and also try incrementally + * sorting any path which is partially sorted already (no need to deal + * with paths which have presorted keys when incremental sort is + * disabled unless it's the cheapest input path). + */ + if (path != cheapest_path && + (presorted_keys == 0 || !enable_incremental_sort)) + return NULL; + + /* + * We've no need to consider both a sort and incremental sort. We'll + * just do a sort if there are no presorted keys and an incremental + * sort when there are presorted keys. + */ + if (presorted_keys == 0 || !enable_incremental_sort) + path = (Path *) create_sort_path(root, + rel, + path, + pathkeys, + limit_tuples); + else + path = (Path *) create_incremental_sort_path(root, + rel, + path, + pathkeys, + presorted_keys, + limit_tuples); + } + + return path; +} + +/* * Generate Gather and Gather Merge paths for a grouping relation or partial * grouping relation. * diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 8a67f01200c..9845abd6932 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -997,6 +997,16 @@ struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, { + {"enable_distinct_reordering", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Enables reordering of DISTINCT pathkeys."), + NULL, + GUC_EXPLAIN + }, + &enable_distinct_reordering, + true, + NULL, NULL, NULL + }, + { {"geqo", PGC_USERSET, QUERY_TUNING_GEQO, gettext_noop("Enables genetic query optimization."), gettext_noop("This algorithm attempts to do planning without " diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 39a3ac23127..407cd1e08ca 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -414,6 +414,7 @@ #enable_sort = on #enable_tidscan = on #enable_group_by_reordering = on +#enable_distinct_reordering = on # - Planner Cost Constants - diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h index 93e3dc719da..2e123e08b79 100644 --- a/src/include/optimizer/optimizer.h +++ b/src/include/optimizer/optimizer.h @@ -111,6 +111,7 @@ typedef enum /* GUC parameters */ extern PGDLLIMPORT int debug_parallel_query; extern PGDLLIMPORT bool parallel_leader_participation; +extern PGDLLIMPORT bool enable_distinct_reordering; extern struct PlannedStmt *planner(Query *parse, const char *query_string, int cursorOptions, diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out index 82b8e54f5f1..379ba0bc9fa 100644 --- a/src/test/regress/expected/select_distinct.out +++ b/src/test/regress/expected/select_distinct.out @@ -464,3 +464,135 @@ SELECT null IS NOT DISTINCT FROM null as "yes"; t (1 row) +-- +-- Test the planner's ability to reorder the distinctClause Pathkeys to match +-- the input path's ordering +-- +CREATE TABLE distinct_tbl (x int, y int); +INSERT INTO distinct_tbl SELECT i%10, i%10 FROM generate_series(1, 1000) AS i; +CREATE INDEX distinct_tbl_x_y_idx ON distinct_tbl (x, y); +ANALYZE distinct_tbl; +-- Produce results with sorting. +SET enable_hashagg TO OFF; +-- Ensure we avoid the need to re-sort by reordering the distinctClause +-- Pathkeys to match the ordering of the input path +EXPLAIN (COSTS OFF) +SELECT DISTINCT y, x FROM distinct_tbl; + QUERY PLAN +------------------------------------------------------------------ + Unique + -> Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl +(2 rows) + +SELECT DISTINCT y, x FROM distinct_tbl; + y | x +---+--- + 0 | 0 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(10 rows) + +-- Ensure we leverage incremental-sort by reordering the distinctClause +-- Pathkeys to partially match the ordering of the input path +EXPLAIN (COSTS OFF) +SELECT DISTINCT y, x FROM (SELECT * FROM distinct_tbl ORDER BY x) s; + QUERY PLAN +------------------------------------------------------------------------------ + Unique + -> Incremental Sort + Sort Key: s.x, s.y + Presorted Key: s.x + -> Subquery Scan on s + -> Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl +(6 rows) + +SELECT DISTINCT y, x FROM (SELECT * FROM distinct_tbl ORDER BY x) s; + y | x +---+--- + 0 | 0 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(10 rows) + +-- Ensure we avoid the need to re-sort in partial distinct by reordering the +-- distinctClause Pathkeys to match the ordering of the input path +SET parallel_tuple_cost=0; +SET parallel_setup_cost=0; +SET min_parallel_table_scan_size=0; +SET min_parallel_index_scan_size=0; +SET max_parallel_workers_per_gather=2; +EXPLAIN (COSTS OFF) +SELECT DISTINCT y, x FROM distinct_tbl limit 10; + QUERY PLAN +--------------------------------------------------------------------------------------------- + Limit + -> Unique + -> Gather Merge + Workers Planned: 1 + -> Unique + -> Parallel Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl +(6 rows) + +SELECT DISTINCT y, x FROM distinct_tbl limit 10; + y | x +---+--- + 0 | 0 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(10 rows) + +RESET max_parallel_workers_per_gather; +RESET min_parallel_index_scan_size; +RESET min_parallel_table_scan_size; +RESET parallel_setup_cost; +RESET parallel_tuple_cost; +-- Ensure we reorder the distinctClause Pathkeys to match the ordering of the +-- input path even if there is ORDER BY clause +EXPLAIN (COSTS OFF) +SELECT DISTINCT y, x FROM distinct_tbl ORDER BY y; + QUERY PLAN +------------------------------------------------------------------------ + Sort + Sort Key: y + -> Unique + -> Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl +(4 rows) + +SELECT DISTINCT y, x FROM distinct_tbl ORDER BY y; + y | x +---+--- + 0 | 0 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(10 rows) + +RESET enable_hashagg; +DROP TABLE distinct_tbl; diff --git a/src/test/regress/expected/select_distinct_on.out b/src/test/regress/expected/select_distinct_on.out index 958381afe5e..69e0cf3959c 100644 --- a/src/test/regress/expected/select_distinct_on.out +++ b/src/test/regress/expected/select_distinct_on.out @@ -121,3 +121,128 @@ SELECT DISTINCT ON (four) four,hundred Filter: (four = 0) (3 rows) +-- +-- Test the planner's ability to reorder the distinctClause Pathkeys to match +-- the input path's ordering +-- +CREATE TABLE distinct_tbl (x int, y int, z int); +INSERT INTO distinct_tbl SELECT i%10, i%10, i%10 FROM generate_series(1, 1000) AS i; +CREATE INDEX distinct_tbl_x_y_idx ON distinct_tbl (x, y); +ANALYZE distinct_tbl; +-- Produce results with sorting. +SET enable_hashagg TO OFF; +-- Ensure we avoid the need to re-sort by reordering the distinctClause +-- Pathkeys to match the ordering of the input path +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (y, x) x, y FROM distinct_tbl; + QUERY PLAN +------------------------------------------------------------------ + Unique + -> Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl +(2 rows) + +SELECT DISTINCT ON (y, x) x, y FROM distinct_tbl; + x | y +---+--- + 0 | 0 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(10 rows) + +-- Ensure we leverage incremental-sort by reordering the distinctClause +-- Pathkeys to partially match the ordering of the input path +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (y, x) x, y FROM (SELECT * FROM distinct_tbl ORDER BY x) s; + QUERY PLAN +------------------------------------------------------------------------------ + Unique + -> Incremental Sort + Sort Key: s.x, s.y + Presorted Key: s.x + -> Subquery Scan on s + -> Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl +(6 rows) + +SELECT DISTINCT ON (y, x) x, y FROM (SELECT * FROM distinct_tbl ORDER BY x) s; + x | y +---+--- + 0 | 0 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(10 rows) + +-- Ensure we reorder the distinctClause Pathkeys to match the ordering of the +-- input path even if there is ORDER BY clause +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (y, x) x, y FROM distinct_tbl ORDER BY y; + QUERY PLAN +------------------------------------------------------------------------ + Sort + Sort Key: y + -> Unique + -> Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl +(4 rows) + +SELECT DISTINCT ON (y, x) x, y FROM distinct_tbl ORDER BY y; + x | y +---+--- + 0 | 0 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(10 rows) + +-- Ensure the resulting pathkey list matches the initial distinctClause Pathkeys +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (y, x) x, y FROM (select * from distinct_tbl order by x, z, y) s ORDER BY y, x, z; + QUERY PLAN +------------------------------------------------------------------------------------ + Sort + Sort Key: s.y, s.x, s.z + -> Unique + -> Incremental Sort + Sort Key: s.x, s.y, s.z + Presorted Key: s.x + -> Subquery Scan on s + -> Sort + Sort Key: distinct_tbl.x, distinct_tbl.z, distinct_tbl.y + -> Seq Scan on distinct_tbl +(10 rows) + +SELECT DISTINCT ON (y, x) x, y FROM (select * from distinct_tbl order by x, z, y) s ORDER BY y, x, z; + x | y +---+--- + 0 | 0 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(10 rows) + +RESET enable_hashagg; +DROP TABLE distinct_tbl; diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index fad7fc3a7e0..91089ac215f 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -150,6 +150,7 @@ select name, setting from pg_settings where name like 'enable%'; --------------------------------+--------- enable_async_append | on enable_bitmapscan | on + enable_distinct_reordering | on enable_gathermerge | on enable_group_by_reordering | on enable_hashagg | on @@ -170,7 +171,7 @@ select name, setting from pg_settings where name like 'enable%'; enable_seqscan | on enable_sort | on enable_tidscan | on -(22 rows) +(23 rows) -- There are always wait event descriptions for various types. InjectionPoint -- may be present or absent, depending on history since last postmaster start. diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql index da92c197aba..50ac7dde396 100644 --- a/src/test/regress/sql/select_distinct.sql +++ b/src/test/regress/sql/select_distinct.sql @@ -221,3 +221,56 @@ SELECT 1 IS NOT DISTINCT FROM 2 as "no"; SELECT 2 IS NOT DISTINCT FROM 2 as "yes"; SELECT 2 IS NOT DISTINCT FROM null as "no"; SELECT null IS NOT DISTINCT FROM null as "yes"; + +-- +-- Test the planner's ability to reorder the distinctClause Pathkeys to match +-- the input path's ordering +-- + +CREATE TABLE distinct_tbl (x int, y int); +INSERT INTO distinct_tbl SELECT i%10, i%10 FROM generate_series(1, 1000) AS i; +CREATE INDEX distinct_tbl_x_y_idx ON distinct_tbl (x, y); +ANALYZE distinct_tbl; + +-- Produce results with sorting. +SET enable_hashagg TO OFF; + +-- Ensure we avoid the need to re-sort by reordering the distinctClause +-- Pathkeys to match the ordering of the input path +EXPLAIN (COSTS OFF) +SELECT DISTINCT y, x FROM distinct_tbl; +SELECT DISTINCT y, x FROM distinct_tbl; + +-- Ensure we leverage incremental-sort by reordering the distinctClause +-- Pathkeys to partially match the ordering of the input path +EXPLAIN (COSTS OFF) +SELECT DISTINCT y, x FROM (SELECT * FROM distinct_tbl ORDER BY x) s; +SELECT DISTINCT y, x FROM (SELECT * FROM distinct_tbl ORDER BY x) s; + +-- Ensure we avoid the need to re-sort in partial distinct by reordering the +-- distinctClause Pathkeys to match the ordering of the input path +SET parallel_tuple_cost=0; +SET parallel_setup_cost=0; +SET min_parallel_table_scan_size=0; +SET min_parallel_index_scan_size=0; +SET max_parallel_workers_per_gather=2; + +EXPLAIN (COSTS OFF) +SELECT DISTINCT y, x FROM distinct_tbl limit 10; +SELECT DISTINCT y, x FROM distinct_tbl limit 10; + +RESET max_parallel_workers_per_gather; +RESET min_parallel_index_scan_size; +RESET min_parallel_table_scan_size; +RESET parallel_setup_cost; +RESET parallel_tuple_cost; + +-- Ensure we reorder the distinctClause Pathkeys to match the ordering of the +-- input path even if there is ORDER BY clause +EXPLAIN (COSTS OFF) +SELECT DISTINCT y, x FROM distinct_tbl ORDER BY y; +SELECT DISTINCT y, x FROM distinct_tbl ORDER BY y; + +RESET enable_hashagg; + +DROP TABLE distinct_tbl; diff --git a/src/test/regress/sql/select_distinct_on.sql b/src/test/regress/sql/select_distinct_on.sql index 261e6140feb..d79dace94c4 100644 --- a/src/test/regress/sql/select_distinct_on.sql +++ b/src/test/regress/sql/select_distinct_on.sql @@ -42,3 +42,43 @@ SELECT DISTINCT ON (four) four,two EXPLAIN (COSTS OFF) SELECT DISTINCT ON (four) four,hundred FROM tenk1 WHERE four = 0 ORDER BY 1,2; + +-- +-- Test the planner's ability to reorder the distinctClause Pathkeys to match +-- the input path's ordering +-- + +CREATE TABLE distinct_tbl (x int, y int, z int); +INSERT INTO distinct_tbl SELECT i%10, i%10, i%10 FROM generate_series(1, 1000) AS i; +CREATE INDEX distinct_tbl_x_y_idx ON distinct_tbl (x, y); +ANALYZE distinct_tbl; + +-- Produce results with sorting. +SET enable_hashagg TO OFF; + +-- Ensure we avoid the need to re-sort by reordering the distinctClause +-- Pathkeys to match the ordering of the input path +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (y, x) x, y FROM distinct_tbl; +SELECT DISTINCT ON (y, x) x, y FROM distinct_tbl; + +-- Ensure we leverage incremental-sort by reordering the distinctClause +-- Pathkeys to partially match the ordering of the input path +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (y, x) x, y FROM (SELECT * FROM distinct_tbl ORDER BY x) s; +SELECT DISTINCT ON (y, x) x, y FROM (SELECT * FROM distinct_tbl ORDER BY x) s; + +-- Ensure we reorder the distinctClause Pathkeys to match the ordering of the +-- input path even if there is ORDER BY clause +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (y, x) x, y FROM distinct_tbl ORDER BY y; +SELECT DISTINCT ON (y, x) x, y FROM distinct_tbl ORDER BY y; + +-- Ensure the resulting pathkey list matches the initial distinctClause Pathkeys +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (y, x) x, y FROM (select * from distinct_tbl order by x, z, y) s ORDER BY y, x, z; +SELECT DISTINCT ON (y, x) x, y FROM (select * from distinct_tbl order by x, z, y) s ORDER BY y, x, z; + +RESET enable_hashagg; + +DROP TABLE distinct_tbl; |