From 5b1f9ce1d9e8dcae2bcd93b2becffaba5e4f3049 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Wed, 20 Apr 2016 23:34:07 -0400 Subject: postgres_fdw: Don't push down certain full joins. If there's a filter condition on either side of a full outer join, it is neither correct to attach it to the join's ON clause nor to throw it into the toplevel WHERE clause. Just don't push down the join in that case. To maximize the number of cases where we can still push down full joins, push inner join conditions into the ON clause at the first opportunity rather than postponing them to the top-level WHERE clause. This produces nicer SQL, anyway. This bug was introduced in e4106b2528727c4b48639c0e12bf2f70a766b910. Ashutosh Bapat, per report from Rajkumar Raghuwanshi. --- contrib/postgres_fdw/postgres_fdw.c | 108 +++++++++++++++++++++--------------- 1 file changed, 62 insertions(+), 46 deletions(-) (limited to 'contrib/postgres_fdw/postgres_fdw.c') diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 066cffba955..28093e54562 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -3998,54 +3998,23 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, fpinfo->innerrel = innerrel; fpinfo->jointype = jointype; - /* - * If user is willing to estimate cost for a scan of either of the joining - * relations using EXPLAIN, he intends to estimate scans on that relation - * more accurately. Then, it makes sense to estimate the cost the join - * with that relation more accurately using EXPLAIN. - */ - fpinfo->use_remote_estimate = fpinfo_o->use_remote_estimate || - fpinfo_i->use_remote_estimate; - - /* - * Since both the joining relations come from the same server, the server - * level options should have same value for both the relations. Pick from - * any side. - */ - fpinfo->fdw_startup_cost = fpinfo_o->fdw_startup_cost; - fpinfo->fdw_tuple_cost = fpinfo_o->fdw_tuple_cost; - - /* - * Set cached relation costs to some negative value, so that we can detect - * when they are set to some sensible costs, during one (usually the - * first) of the calls to estimate_path_cost_size(). - */ - fpinfo->rel_startup_cost = -1; - fpinfo->rel_total_cost = -1; - - /* Mark that this join can be pushed down safely */ - fpinfo->pushdown_safe = true; - - /* - * Set fetch size to maximum of the joining sides, since we are expecting - * the rows returned by the join to be proportional to the relation sizes. - */ - if (fpinfo_o->fetch_size > fpinfo_i->fetch_size) - fpinfo->fetch_size = fpinfo_o->fetch_size; - else - fpinfo->fetch_size = fpinfo_i->fetch_size; - /* * Pull the other remote conditions from the joining relations into join - * clauses or other remote clauses (remote_conds) of this relation. This - * avoids building subqueries at every join step. + * clauses or other remote clauses (remote_conds) of this relation wherever + * possible. This avoids building subqueries at every join step, which is + * not currently supported by the deparser logic. * * For an inner join, clauses from both the relations are added to the - * other remote clauses. For an OUTER join, the clauses from the outer - * side are added to remote_conds since those can be evaluated after the - * join is evaluated. The clauses from inner side are added to the + * other remote clauses. For LEFT and RIGHT OUTER join, the clauses from the + * outer side are added to remote_conds since those can be evaluated after + * the join is evaluated. The clauses from inner side are added to the * joinclauses, since they need to evaluated while constructing the join. * + * For a FULL OUTER JOIN, the other clauses from either relation can not be + * added to the joinclauses or remote_conds, since each relation acts as an + * outer relation for the other. Consider such full outer join as + * unshippable because of the reasons mentioned above in this comment. + * * The joining sides can not have local conditions, thus no need to test * shippability of the clauses being pulled up. */ @@ -4073,10 +4042,8 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, break; case JOIN_FULL: - fpinfo->joinclauses = list_concat(fpinfo->joinclauses, - list_copy(fpinfo_i->remote_conds)); - fpinfo->joinclauses = list_concat(fpinfo->joinclauses, - list_copy(fpinfo_o->remote_conds)); + if (fpinfo_i->remote_conds || fpinfo_o->remote_conds) + return false; break; default: @@ -4084,6 +4051,55 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, elog(ERROR, "unsupported join type %d", jointype); } + /* + * For an inner join, as explained above all restrictions can be treated + * alike. Treating the pushed down conditions as join conditions allows a + * top level full outer join to be deparsed without requiring subqueries. + */ + if (jointype == JOIN_INNER) + { + Assert(!fpinfo->joinclauses); + fpinfo->joinclauses = fpinfo->remote_conds; + fpinfo->remote_conds = NIL; + } + + /* Mark that this join can be pushed down safely */ + fpinfo->pushdown_safe = true; + + /* + * If user is willing to estimate cost for a scan of either of the joining + * relations using EXPLAIN, he intends to estimate scans on that relation + * more accurately. Then, it makes sense to estimate the cost the join + * with that relation more accurately using EXPLAIN. + */ + fpinfo->use_remote_estimate = fpinfo_o->use_remote_estimate || + fpinfo_i->use_remote_estimate; + + /* + * Since both the joining relations come from the same server, the server + * level options should have same value for both the relations. Pick from + * any side. + */ + fpinfo->fdw_startup_cost = fpinfo_o->fdw_startup_cost; + fpinfo->fdw_tuple_cost = fpinfo_o->fdw_tuple_cost; + + /* + * Set cached relation costs to some negative value, so that we can detect + * when they are set to some sensible costs, during one (usually the + * first) of the calls to estimate_path_cost_size(). + */ + fpinfo->rel_startup_cost = -1; + fpinfo->rel_total_cost = -1; + + /* + * Set fetch size to maximum of the joining sides, since we are expecting + * the rows returned by the join to be proportional to the relation sizes. + */ + if (fpinfo_o->fetch_size > fpinfo_i->fetch_size) + fpinfo->fetch_size = fpinfo_o->fetch_size; + else + fpinfo->fetch_size = fpinfo_i->fetch_size; + /* * Set the string describing this join relation to be used in EXPLAIN * output of corresponding ForeignScan. -- cgit v1.2.3