diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2018-02-23 13:47:33 -0500 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2018-02-23 13:47:33 -0500 |
commit | 9afd513df042b22b98bb9b55f27265e95d34f9e6 (patch) | |
tree | c504ce00c4c8de02ebf0b105330ff0350b3c8c24 /src/backend/optimizer/plan/createplan.c | |
parent | f724022d0ae04e687c309f99df27b7ce64d19761 (diff) | |
download | postgresql-9afd513df042b22b98bb9b55f27265e95d34f9e6.tar.gz postgresql-9afd513df042b22b98bb9b55f27265e95d34f9e6.zip |
Fix planner failures with overlapping mergejoin clauses in an outer join.
Given overlapping or partially redundant join clauses, for example
t1 JOIN t2 ON t1.a = t2.x AND t1.b = t2.x
the planner's EquivalenceClass machinery will ordinarily refactor the
clauses as "t1.a = t1.b AND t1.a = t2.x", so that join processing doesn't
see multiple references to the same EquivalenceClass in a list of join
equality clauses. However, if the join is outer, it's incorrect to derive
a restriction clause on the outer side from the join conditions, so the
clause refactoring does not happen and we end up with overlapping join
conditions. The code that attempted to deal with such cases had several
subtle bugs, which could result in "left and right pathkeys do not match in
mergejoin" or "outer pathkeys do not match mergeclauses" planner errors,
if the selected join plan type was a mergejoin. (It does not appear that
any actually incorrect plan could have been emitted.)
The core of the problem really was failure to recognize that the outer and
inner relations' pathkeys have different relationships to the mergeclause
list. A join's mergeclause list is constructed by reference to the outer
pathkeys, so it will always be ordered the same as the outer pathkeys, but
this cannot be presumed true for the inner pathkeys. If the inner sides of
the mergeclauses contain multiple references to the same EquivalenceClass
({t2.x} in the above example) then a simplistic rendering of the required
inner sort order is like "ORDER BY t2.x, t2.x", but the pathkey machinery
recognizes that the second sort column is redundant and throws it away.
The mergejoin planning code failed to account for that behavior properly.
One error was to try to generate cut-down versions of the mergeclause list
from cut-down versions of the inner pathkeys in the same way as the initial
construction of the mergeclause list from the outer pathkeys was done; this
could lead to choosing a mergeclause list that fails to match the outer
pathkeys. The other problem was that the pathkey cross-checking code in
create_mergejoin_plan treated the inner and outer pathkey lists
identically, whereas actually the expectations for them must be different.
That led to false "pathkeys do not match" failures in some cases, and in
principle could have led to failure to detect bogus plans in other cases,
though there is no indication that such bogus plans could be generated.
Reported by Alexander Kuzmenkov, who also reviewed this patch. This has
been broken for years (back to around 8.3 according to my testing), so
back-patch to all supported branches.
Discussion: https://postgr.es/m/5dad9160-4632-0e47-e120-8e2082000c01@postgrespro.ru
Diffstat (limited to 'src/backend/optimizer/plan/createplan.c')
-rw-r--r-- | src/backend/optimizer/plan/createplan.c | 145 |
1 files changed, 70 insertions, 75 deletions
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index da0cc7f266c..9ae1bf31d5e 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -3791,6 +3791,8 @@ create_mergejoin_plan(PlannerInfo *root, Oid *mergecollations; int *mergestrategies; bool *mergenullsfirst; + PathKey *opathkey; + EquivalenceClass *opeclass; int i; ListCell *lc; ListCell *lop; @@ -3909,7 +3911,8 @@ create_mergejoin_plan(PlannerInfo *root, * Compute the opfamily/collation/strategy/nullsfirst arrays needed by the * executor. The information is in the pathkeys for the two inputs, but * we need to be careful about the possibility of mergeclauses sharing a - * pathkey (compare find_mergeclauses_for_pathkeys()). + * pathkey, as well as the possibility that the inner pathkeys are not in + * an order matching the mergeclauses. */ nClauses = list_length(mergeclauses); Assert(nClauses == list_length(best_path->path_mergeclauses)); @@ -3918,6 +3921,8 @@ create_mergejoin_plan(PlannerInfo *root, mergestrategies = (int *) palloc(nClauses * sizeof(int)); mergenullsfirst = (bool *) palloc(nClauses * sizeof(bool)); + opathkey = NULL; + opeclass = NULL; lop = list_head(outerpathkeys); lip = list_head(innerpathkeys); i = 0; @@ -3926,11 +3931,9 @@ create_mergejoin_plan(PlannerInfo *root, RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); EquivalenceClass *oeclass; EquivalenceClass *ieclass; - PathKey *opathkey; - PathKey *ipathkey; - EquivalenceClass *opeclass; - EquivalenceClass *ipeclass; - ListCell *l2; + PathKey *ipathkey = NULL; + EquivalenceClass *ipeclass = NULL; + bool first_inner_match = false; /* fetch outer/inner eclass from mergeclause */ if (rinfo->outer_is_left) @@ -3947,104 +3950,96 @@ create_mergejoin_plan(PlannerInfo *root, Assert(ieclass != NULL); /* - * For debugging purposes, we check that the eclasses match the paths' - * pathkeys. In typical cases the merge clauses are one-to-one with - * the pathkeys, but when dealing with partially redundant query - * conditions, we might have clauses that re-reference earlier path - * keys. The case that we need to reject is where a pathkey is - * entirely skipped over. + * We must identify the pathkey elements associated with this clause + * by matching the eclasses (which should give a unique match, since + * the pathkey lists should be canonical). In typical cases the merge + * clauses are one-to-one with the pathkeys, but when dealing with + * partially redundant query conditions, things are more complicated. * - * lop and lip reference the first as-yet-unused pathkey elements; - * it's okay to match them, or any element before them. If they're - * NULL then we have found all pathkey elements to be used. + * lop and lip reference the first as-yet-unmatched pathkey elements. + * If they're NULL then all pathkey elements have been matched. + * + * The ordering of the outer pathkeys should match the mergeclauses, + * by construction (see find_mergeclauses_for_outer_pathkeys()). There + * could be more than one mergeclause for the same outer pathkey, but + * no pathkey may be entirely skipped over. */ - if (lop) + if (oeclass != opeclass) /* multiple matches are not interesting */ { + /* doesn't match the current opathkey, so must match the next */ + if (lop == NULL) + elog(ERROR, "outer pathkeys do not match mergeclauses"); opathkey = (PathKey *) lfirst(lop); opeclass = opathkey->pk_eclass; - if (oeclass == opeclass) - { - /* fast path for typical case */ - lop = lnext(lop); - } - else - { - /* redundant clauses ... must match something before lop */ - foreach(l2, outerpathkeys) - { - if (l2 == lop) - break; - opathkey = (PathKey *) lfirst(l2); - opeclass = opathkey->pk_eclass; - if (oeclass == opeclass) - break; - } - if (oeclass != opeclass) - elog(ERROR, "outer pathkeys do not match mergeclauses"); - } - } - else - { - /* redundant clauses ... must match some already-used pathkey */ - opathkey = NULL; - opeclass = NULL; - foreach(l2, outerpathkeys) - { - opathkey = (PathKey *) lfirst(l2); - opeclass = opathkey->pk_eclass; - if (oeclass == opeclass) - break; - } - if (l2 == NULL) + lop = lnext(lop); + if (oeclass != opeclass) elog(ERROR, "outer pathkeys do not match mergeclauses"); } + /* + * The inner pathkeys likewise should not have skipped-over keys, but + * it's possible for a mergeclause to reference some earlier inner + * pathkey if we had redundant pathkeys. For example we might have + * mergeclauses like "o.a = i.x AND o.b = i.y AND o.c = i.x". The + * implied inner ordering is then "ORDER BY x, y, x", but the pathkey + * mechanism drops the second sort by x as redundant, and this code + * must cope. + * + * It's also possible for the implied inner-rel ordering to be like + * "ORDER BY x, y, x DESC". We still drop the second instance of x as + * redundant; but this means that the sort ordering of a redundant + * inner pathkey should not be considered significant. So we must + * detect whether this is the first clause matching an inner pathkey. + */ if (lip) { ipathkey = (PathKey *) lfirst(lip); ipeclass = ipathkey->pk_eclass; if (ieclass == ipeclass) { - /* fast path for typical case */ + /* successful first match to this inner pathkey */ lip = lnext(lip); - } - else - { - /* redundant clauses ... must match something before lip */ - foreach(l2, innerpathkeys) - { - if (l2 == lip) - break; - ipathkey = (PathKey *) lfirst(l2); - ipeclass = ipathkey->pk_eclass; - if (ieclass == ipeclass) - break; - } - if (ieclass != ipeclass) - elog(ERROR, "inner pathkeys do not match mergeclauses"); + first_inner_match = true; } } - else + if (!first_inner_match) { - /* redundant clauses ... must match some already-used pathkey */ - ipathkey = NULL; - ipeclass = NULL; + /* redundant clause ... must match something before lip */ + ListCell *l2; + foreach(l2, innerpathkeys) { + if (l2 == lip) + break; ipathkey = (PathKey *) lfirst(l2); ipeclass = ipathkey->pk_eclass; if (ieclass == ipeclass) break; } - if (l2 == NULL) + if (ieclass != ipeclass) elog(ERROR, "inner pathkeys do not match mergeclauses"); } - /* pathkeys should match each other too (more debugging) */ + /* + * The pathkeys should always match each other as to opfamily and + * collation (which affect equality), but if we're considering a + * redundant inner pathkey, its sort ordering might not match. In + * such cases we may ignore the inner pathkey's sort ordering and use + * the outer's. (In effect, we're lying to the executor about the + * sort direction of this inner column, but it does not matter since + * the run-time row comparisons would only reach this column when + * there's equality for the earlier column containing the same eclass. + * There could be only one value in this column for the range of inner + * rows having a given value in the earlier column, so it does not + * matter which way we imagine this column to be ordered.) But a + * non-redundant inner pathkey had better match outer's ordering too. + */ if (opathkey->pk_opfamily != ipathkey->pk_opfamily || - opathkey->pk_eclass->ec_collation != ipathkey->pk_eclass->ec_collation || - opathkey->pk_strategy != ipathkey->pk_strategy || - opathkey->pk_nulls_first != ipathkey->pk_nulls_first) + opathkey->pk_eclass->ec_collation != ipathkey->pk_eclass->ec_collation) + elog(ERROR, "left and right pathkeys do not match in mergejoin"); + if (first_inner_match && + (opathkey->pk_strategy != ipathkey->pk_strategy || + opathkey->pk_nulls_first != ipathkey->pk_nulls_first)) elog(ERROR, "left and right pathkeys do not match in mergejoin"); /* OK, save info for executor */ |