aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTomas Vondra <tomas.vondra@postgresql.org>2020-11-03 20:07:23 +0100
committerTomas Vondra <tomas.vondra@postgresql.org>2020-11-03 22:31:57 +0100
commitebb7ae839d033d0f279670e249f54646a08b8c48 (patch)
tree96921a9a470206f6df84d4072f6d0fc2dc18b7fe /src
parent92f87182f2c617fd420832972b6d0ae4527301c8 (diff)
downloadpostgresql-ebb7ae839d033d0f279670e249f54646a08b8c48.tar.gz
postgresql-ebb7ae839d033d0f279670e249f54646a08b8c48.zip
Fix get_useful_pathkeys_for_relation for volatile expressions
When considering Incremental Sort below a Gather Merge, we need to be a bit more careful when matching pathkeys to EC members. It's not enough to find a member whose Vars are all in the current relation's target; volatile expressions in particular need to be contained in the target, otherwise it's too early to use the pathkey. Reported-by: Jaime Casanova Author: James Coleman Reviewed-by: Tomas Vondra Backpatch-through: 13, where the incremental sort code was added Discussion: https://postgr.es/m/CAJGNTeNaxpXgBVcRhJX%2B2vSbq%2BF2kJqGBcvompmpvXb7pq%2BoFA%40mail.gmail.com
Diffstat (limited to 'src')
-rw-r--r--src/backend/optimizer/path/allpaths.c13
-rw-r--r--src/backend/optimizer/path/equivclass.c70
-rw-r--r--src/include/optimizer/paths.h1
-rw-r--r--src/test/regress/expected/incremental_sort.out98
-rw-r--r--src/test/regress/sql/incremental_sort.sql31
5 files changed, 207 insertions, 6 deletions
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 8ad6384c6ae..84a69b064a9 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -2816,7 +2816,8 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel)
/*
* Considering query_pathkeys is always worth it, because it might allow
* us to avoid a total sort when we have a partially presorted path
- * available.
+ * available or to push the total sort into the parallel portion of the
+ * query.
*/
if (root->query_pathkeys)
{
@@ -2829,17 +2830,17 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel)
EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
/*
- * We can only build an Incremental Sort for pathkeys which
- * contain an EC member in the current relation, so ignore any
- * suffix of the list as soon as we find a pathkey without an EC
- * member the relation.
+ * We can only build a sort for pathkeys which contain an EC
+ * member in the current relation's target, so ignore any suffix
+ * of the list as soon as we find a pathkey without an EC member
+ * in the relation.
*
* By still returning the prefix of the pathkeys list that does
* meet criteria of EC membership in the current relation, we
* enable not just an incremental sort on the entirety of
* query_pathkeys but also incremental sort below a JOIN.
*/
- if (!find_em_expr_for_rel(pathkey_ec, rel))
+ if (!find_em_expr_usable_for_sorting_rel(pathkey_ec, rel))
break;
npathkeys++;
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index a21b3b4756c..f98fd7b3eb8 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -798,6 +798,76 @@ find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
}
/*
+ * Find an equivalence class member expression that can be safely used by a
+ * sort node on top of the provided relation. The rules here must match those
+ * applied in prepare_sort_from_pathkeys.
+ */
+Expr *
+find_em_expr_usable_for_sorting_rel(EquivalenceClass *ec, RelOptInfo *rel)
+{
+ ListCell *lc_em;
+
+ /*
+ * If there is more than one equivalence member matching these
+ * requirements we'll be content to choose any one of them.
+ */
+ foreach(lc_em, ec->ec_members)
+ {
+ EquivalenceMember *em = lfirst(lc_em);
+ Expr *em_expr = em->em_expr;
+ PathTarget *target = rel->reltarget;
+ ListCell *lc_target_expr;
+
+ /*
+ * We shouldn't be trying to sort by an equivalence class that
+ * contains a constant, so no need to consider such cases any further.
+ */
+ if (em->em_is_const)
+ continue;
+
+ /*
+ * Any Vars in the equivalence class member need to come from this
+ * relation. This is a superset of prepare_sort_from_pathkeys ignoring
+ * child members unless they belong to the rel being sorted.
+ */
+ if (!bms_is_subset(em->em_relids, rel->relids))
+ continue;
+
+ /*
+ * As long as the expression isn't volatile then
+ * prepare_sort_from_pathkeys is able to generate a new target entry,
+ * so there's no need to verify that one already exists.
+ */
+ if (!ec->ec_has_volatile)
+ return em->em_expr;
+
+ /*
+ * If, however, it's volatile, we have to verify that the
+ * equivalence member's expr is already generated in the
+ * relation's target (we do strip relabels first from both
+ * expressions, which is cheap and might allow us to match
+ * more expressions).
+ */
+ while (em_expr && IsA(em_expr, RelabelType))
+ em_expr = ((RelabelType *) em_expr)->arg;
+
+ foreach(lc_target_expr, target->exprs)
+ {
+ Expr *target_expr = lfirst(lc_target_expr);
+
+ while (target_expr && IsA(target_expr, RelabelType))
+ target_expr = ((RelabelType *) target_expr)->arg;
+
+ if (equal(target_expr, em_expr))
+ return em->em_expr;
+ }
+ }
+
+ /* We didn't find any suitable equivalence class expression */
+ return NULL;
+}
+
+/*
* generate_base_implied_equalities
* Generate any restriction clauses that we can deduce from equivalence
* classes.
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 2134227ebcb..8a4c6f8b59c 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -135,6 +135,7 @@ extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root,
Relids rel,
bool create_it);
extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
+extern Expr *find_em_expr_usable_for_sorting_rel(EquivalenceClass *ec, RelOptInfo *rel);
extern void generate_base_implied_equalities(PlannerInfo *root);
extern List *generate_join_implied_equalities(PlannerInfo *root,
Relids join_relids,
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index e376ea12761..7cf2eee7c14 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1469,3 +1469,101 @@ explain (costs off) select * from t union select * from t order by 1,3;
(13 rows)
drop table t;
+-- Sort pushdown can't go below where expressions are part of the rel target.
+-- In particular this is interesting for volatile expressions which have to
+-- go above joins since otherwise we'll incorrectly use expression evaluations
+-- across multiple rows.
+set enable_hashagg=off;
+set enable_seqscan=off;
+set enable_incremental_sort = off;
+set parallel_tuple_cost=0;
+set parallel_setup_cost=0;
+set min_parallel_table_scan_size = 0;
+set min_parallel_index_scan_size = 0;
+-- Parallel sort below join.
+explain (costs off) select distinct sub.unique1, stringu1
+from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
+ QUERY PLAN
+--------------------------------------------------------------------------
+ Unique
+ -> Nested Loop
+ -> Gather Merge
+ Workers Planned: 2
+ -> Sort
+ Sort Key: tenk1.unique1, tenk1.stringu1
+ -> Parallel Index Scan using tenk1_unique1 on tenk1
+ -> Function Scan on generate_series
+(8 rows)
+
+explain (costs off) select sub.unique1, stringu1
+from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
+order by 1, 2;
+ QUERY PLAN
+--------------------------------------------------------------------
+ Nested Loop
+ -> Gather Merge
+ Workers Planned: 2
+ -> Sort
+ Sort Key: tenk1.unique1, tenk1.stringu1
+ -> Parallel Index Scan using tenk1_unique1 on tenk1
+ -> Function Scan on generate_series
+(7 rows)
+
+-- Parallel sort but with expression that can be safely generated at the base rel.
+explain (costs off) select distinct sub.unique1, md5(stringu1)
+from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
+ QUERY PLAN
+----------------------------------------------------------------------------------------
+ Unique
+ -> Nested Loop
+ -> Gather Merge
+ Workers Planned: 2
+ -> Sort
+ Sort Key: tenk1.unique1, (md5((tenk1.stringu1)::text)) COLLATE "C"
+ -> Parallel Index Scan using tenk1_unique1 on tenk1
+ -> Function Scan on generate_series
+(8 rows)
+
+explain (costs off) select sub.unique1, md5(stringu1)
+from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
+order by 1, 2;
+ QUERY PLAN
+----------------------------------------------------------------------------------
+ Nested Loop
+ -> Gather Merge
+ Workers Planned: 2
+ -> Sort
+ Sort Key: tenk1.unique1, (md5((tenk1.stringu1)::text)) COLLATE "C"
+ -> Parallel Index Scan using tenk1_unique1 on tenk1
+ -> Function Scan on generate_series
+(7 rows)
+
+-- Parallel sort but with expression not available until the upper rel.
+explain (costs off) select distinct sub.unique1, stringu1 || random()::text
+from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
+ QUERY PLAN
+---------------------------------------------------------------------------------------------
+ Unique
+ -> Sort
+ Sort Key: tenk1.unique1, (((tenk1.stringu1)::text || (random())::text)) COLLATE "C"
+ -> Gather
+ Workers Planned: 2
+ -> Nested Loop
+ -> Parallel Index Scan using tenk1_unique1 on tenk1
+ -> Function Scan on generate_series
+(8 rows)
+
+explain (costs off) select sub.unique1, stringu1 || random()::text
+from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
+order by 1, 2;
+ QUERY PLAN
+---------------------------------------------------------------------------------------
+ Sort
+ Sort Key: tenk1.unique1, (((tenk1.stringu1)::text || (random())::text)) COLLATE "C"
+ -> Gather
+ Workers Planned: 2
+ -> Nested Loop
+ -> Parallel Index Scan using tenk1_unique1 on tenk1
+ -> Function Scan on generate_series
+(7 rows)
+
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql
index 9c040c90e62..3ee11c394b9 100644
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -221,3 +221,34 @@ set enable_hashagg to off;
explain (costs off) select * from t union select * from t order by 1,3;
drop table t;
+
+-- Sort pushdown can't go below where expressions are part of the rel target.
+-- In particular this is interesting for volatile expressions which have to
+-- go above joins since otherwise we'll incorrectly use expression evaluations
+-- across multiple rows.
+set enable_hashagg=off;
+set enable_seqscan=off;
+set enable_incremental_sort = off;
+set parallel_tuple_cost=0;
+set parallel_setup_cost=0;
+set min_parallel_table_scan_size = 0;
+set min_parallel_index_scan_size = 0;
+
+-- Parallel sort below join.
+explain (costs off) select distinct sub.unique1, stringu1
+from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
+explain (costs off) select sub.unique1, stringu1
+from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
+order by 1, 2;
+-- Parallel sort but with expression that can be safely generated at the base rel.
+explain (costs off) select distinct sub.unique1, md5(stringu1)
+from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
+explain (costs off) select sub.unique1, md5(stringu1)
+from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
+order by 1, 2;
+-- Parallel sort but with expression not available until the upper rel.
+explain (costs off) select distinct sub.unique1, stringu1 || random()::text
+from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
+explain (costs off) select sub.unique1, stringu1 || random()::text
+from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
+order by 1, 2;