diff options
author | Alexander Korotkov <akorotkov@postgresql.org> | 2024-01-06 14:09:39 +0200 |
---|---|---|
committer | Alexander Korotkov <akorotkov@postgresql.org> | 2024-01-06 14:10:00 +0200 |
commit | 5ef34a8fc3899a306fbc907a762fee0ba3782462 (patch) | |
tree | d4002b3e7d1c69c37f2d1c89f68bd3c814fc077e /src | |
parent | 43b46aae12b220b7eca8250c6c361083f35985a0 (diff) | |
download | postgresql-5ef34a8fc3899a306fbc907a762fee0ba3782462.tar.gz postgresql-5ef34a8fc3899a306fbc907a762fee0ba3782462.zip |
Fix the issue that SJE mistakenly omits qual clauses
When the SJE code handles the transfer of qual clauses from the removed
relation to the remaining one, it replaces the Vars of the removed
relation with the Vars of the remaining relation for each clause, and
then reintegrates these clauses into the appropriate restriction or join
clause lists, while attempting to avoid duplicates.
However, the code compares RestrictInfo->clause to determine if two
clauses are duplicates. This is just flat wrong. Two RestrictInfos
with the same clause can have different required_relids,
incompatible_relids, is_pushed_down, and so on. This can cause qual
clauses to be mistakenly omitted, leading to wrong results.
This patch fixes it by comparing the entire RestrictInfos not just their
clauses ignoring 'rinfo_serial' field (otherwise almost all RestrictInfos will
be unique). Making 'rinfo_serial' equal_ignore would break other code. This
is why this commit implements our own comparison function for checking the
equality of RestrictInfos.
Reported-by: Zuming Jiang
Bug: #18261
Discussion: https://postgr.es/m/18261-2a75d748c928609b%40postgresql.org
Author: Richard Guo
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/optimizer/plan/analyzejoins.c | 26 | ||||
-rw-r--r-- | src/test/regress/expected/join.out | 26 | ||||
-rw-r--r-- | src/test/regress/sql/join.sql | 17 |
3 files changed, 65 insertions, 4 deletions
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index c053e32e54d..fb01fbe357a 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -1658,6 +1658,28 @@ update_eclasses(EquivalenceClass *ec, int from, int to) } /* + * "Logically" compares two RestrictInfo's ignoring the 'rinfo_serial' field, + * which makes almost every RestrictInfo unique. This type of comparison is + * useful when removing duplicates while moving RestrictInfo's from removed + * relation to remaining relation during self-join elimination. + * + * XXX: In the future, we might remove the 'rinfo_serial' field completely and + * get rid of this function. + */ +static bool +restrict_infos_logically_equal(RestrictInfo *a, RestrictInfo *b) +{ + int saved_rinfo_serial = a->rinfo_serial; + bool result; + + a->rinfo_serial = b->rinfo_serial; + result = equal(a, b); + a->rinfo_serial = saved_rinfo_serial; + + return result; +} + +/* * Remove a relation after we have proven that it participates only in an * unneeded unique self join. * @@ -1760,7 +1782,7 @@ remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark, if (src == rinfo || (rinfo->parent_ec != NULL && src->parent_ec == rinfo->parent_ec) - || equal(rinfo->clause, src->clause)) + || restrict_infos_logically_equal(rinfo, src)) { is_redundant = true; break; @@ -1788,7 +1810,7 @@ remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark, if (src == rinfo || (rinfo->parent_ec != NULL && src->parent_ec == rinfo->parent_ec) - || equal(rinfo->clause, src->clause)) + || restrict_infos_logically_equal(rinfo, src)) { is_redundant = true; break; diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index ffc60eb21d0..063396ab522 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -6742,7 +6742,7 @@ explain (costs off) select 1 from reset join_collapse_limit; reset enable_seqscan; -- Check that clauses from the join filter list is not lost on the self-join removal -CREATE TABLE emp1 ( id SERIAL PRIMARY KEY NOT NULL, code int); +CREATE TABLE emp1 (id SERIAL PRIMARY KEY NOT NULL, code int); explain (verbose, costs off) SELECT * FROM emp1 e1, emp1 e2 WHERE e1.id = e2.id AND e2.code <> e1.code; QUERY PLAN @@ -6880,6 +6880,30 @@ WHERE t1.id = emp1.id RETURNING emp1.id, emp1.code; Filter: (id IS NOT NULL) (3 rows) +-- Check that SJE does not mistakenly omit qual clauses (bug #18187) +explain (costs off) +select 1 from emp1 full join + (select * from emp1 t1 join + emp1 t2 join emp1 t3 on t2.id = t3.id + on true + where false) s on true +where false; + QUERY PLAN +-------------------------- + Result + One-Time Filter: false +(2 rows) + +select 1 from emp1 full join + (select * from emp1 t1 join + emp1 t2 join emp1 t3 on t2.id = t3.id + on true + where false) s on true +where false; + ?column? +---------- +(0 rows) + -- We can remove the join even if we find the join can't duplicate rows and -- the base quals of each side are different. In the following case we end up -- moving quals over to s1 to make it so it can't match any rows. diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index f51f39eee79..5c420a7c6d6 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -2568,7 +2568,7 @@ reset join_collapse_limit; reset enable_seqscan; -- Check that clauses from the join filter list is not lost on the self-join removal -CREATE TABLE emp1 ( id SERIAL PRIMARY KEY NOT NULL, code int); +CREATE TABLE emp1 (id SERIAL PRIMARY KEY NOT NULL, code int); explain (verbose, costs off) SELECT * FROM emp1 e1, emp1 e2 WHERE e1.id = e2.id AND e2.code <> e1.code; @@ -2622,6 +2622,21 @@ WITH t1 AS (SELECT * FROM emp1) UPDATE emp1 SET code = t1.code + 1 FROM t1 WHERE t1.id = emp1.id RETURNING emp1.id, emp1.code; +-- Check that SJE does not mistakenly omit qual clauses (bug #18187) +explain (costs off) +select 1 from emp1 full join + (select * from emp1 t1 join + emp1 t2 join emp1 t3 on t2.id = t3.id + on true + where false) s on true +where false; +select 1 from emp1 full join + (select * from emp1 t1 join + emp1 t2 join emp1 t3 on t2.id = t3.id + on true + where false) s on true +where false; + -- We can remove the join even if we find the join can't duplicate rows and -- the base quals of each side are different. In the following case we end up -- moving quals over to s1 to make it so it can't match any rows. |