aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/plan
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/optimizer/plan')
-rw-r--r--src/backend/optimizer/plan/analyzejoins.c1240
-rw-r--r--src/backend/optimizer/plan/planmain.c5
2 files changed, 1128 insertions, 117 deletions
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index b33fc671775..3aa04d0d4e1 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -22,6 +22,7 @@
*/
#include "postgres.h"
+#include "catalog/pg_class.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/joininfo.h"
#include "optimizer/optimizer.h"
@@ -30,27 +31,48 @@
#include "optimizer/placeholder.h"
#include "optimizer/planmain.h"
#include "optimizer/restrictinfo.h"
+#include "rewrite/rewriteManip.h"
#include "utils/lsyscache.h"
+/*
+ * Utility structure. A sorting procedure is needed to simplify the search
+ * of SJE-candidate baserels referencing the same database relation. Having
+ * collected all baserels from the query jointree, the planner sorts them
+ * according to the reloid value, groups them with the next pass and attempts
+ * to remove self-joins.
+ *
+ * Preliminary sorting prevents quadratic behavior that can be harmful in the
+ * case of numerous joins.
+ */
+typedef struct
+{
+ int relid;
+ Oid reloid;
+} SelfJoinCandidate;
+
+bool enable_self_join_elimination;
+
/* local functions */
static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
-static void remove_rel_from_query(PlannerInfo *root, int relid,
- SpecialJoinInfo *sjinfo);
+static void remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
+ SpecialJoinInfo *sjinfo);
static void remove_rel_from_restrictinfo(RestrictInfo *rinfo,
int relid, int ojrelid);
static void remove_rel_from_eclass(EquivalenceClass *ec,
- int relid, int ojrelid);
+ int relid, int ojrelid, int subst);
static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
- List *clause_list);
+ List *clause_list, List **extra_clauses);
static Oid distinct_col_search(int colno, List *colnos, List *opids);
static bool is_innerrel_unique_for(PlannerInfo *root,
Relids joinrelids,
Relids outerrelids,
RelOptInfo *innerrel,
JoinType jointype,
- List *restrictlist);
+ List *restrictlist,
+ List **extra_clauses);
+static int self_join_candidates_cmp(const void *a, const void *b);
/*
@@ -88,7 +110,7 @@ restart:
*/
innerrelid = bms_singleton_member(sjinfo->min_righthand);
- remove_rel_from_query(root, innerrelid, sjinfo);
+ remove_leftjoinrel_from_query(root, innerrelid, sjinfo);
/* We verify that exactly one reference gets removed from joinlist */
nremoved = 0;
@@ -276,7 +298,7 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
* Now that we have the relevant equality join clauses, try to prove the
* innerrel distinct.
*/
- if (rel_is_distinct_for(root, innerrel, clause_list))
+ if (rel_is_distinct_for(root, innerrel, clause_list, NULL))
return true;
/*
@@ -288,36 +310,31 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
/*
- * Remove the target relid and references to the target join from the
+ * Remove the target rel->relid and references to the target join from the
* planner's data structures, having determined that there is no need
- * to include them in the query.
+ * to include them in the query. Optionally replace them with subst if subst
+ * is non-negative.
*
- * We are not terribly thorough here. We only bother to update parts of
- * the planner's data structures that will actually be consulted later.
+ * This function updates only parts needed for both left-join removal and
+ * self-join removal.
*/
static void
-remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo)
+remove_rel_from_query(PlannerInfo *root, RelOptInfo *rel,
+ int subst, SpecialJoinInfo *sjinfo,
+ Relids joinrelids)
{
- RelOptInfo *rel = find_base_rel(root, relid);
- int ojrelid = sjinfo->ojrelid;
- Relids joinrelids;
- Relids join_plus_commute;
- List *joininfos;
+ int relid = rel->relid;
+ int ojrelid = (sjinfo != NULL) ? sjinfo->ojrelid : -1;
Index rti;
ListCell *l;
- /* Compute the relid set for the join we are considering */
- joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
- Assert(ojrelid != 0);
- joinrelids = bms_add_member(joinrelids, ojrelid);
-
/*
* Update all_baserels and related relid sets.
*/
- root->all_baserels = bms_del_member(root->all_baserels, relid);
- root->outer_join_rels = bms_del_member(root->outer_join_rels, ojrelid);
- root->all_query_rels = bms_del_member(root->all_query_rels, relid);
- root->all_query_rels = bms_del_member(root->all_query_rels, ojrelid);
+ root->all_baserels = adjust_relid_set(root->all_baserels, relid, subst);
+ root->outer_join_rels = adjust_relid_set(root->outer_join_rels, ojrelid, subst);
+ root->all_query_rels = adjust_relid_set(root->all_query_rels, relid, subst);
+ root->all_query_rels = adjust_relid_set(root->all_query_rels, ojrelid, subst);
/*
* Likewise remove references from SpecialJoinInfo data structures.
@@ -341,20 +358,33 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo)
sjinf->min_righthand = bms_copy(sjinf->min_righthand);
sjinf->syn_lefthand = bms_copy(sjinf->syn_lefthand);
sjinf->syn_righthand = bms_copy(sjinf->syn_righthand);
- /* Now remove relid and ojrelid bits from the sets: */
- sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand, relid);
- sjinf->min_righthand = bms_del_member(sjinf->min_righthand, relid);
- sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand, relid);
- sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand, relid);
- sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand, ojrelid);
- sjinf->min_righthand = bms_del_member(sjinf->min_righthand, ojrelid);
- sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand, ojrelid);
- sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand, ojrelid);
- /* relid cannot appear in these fields, but ojrelid can: */
- sjinf->commute_above_l = bms_del_member(sjinf->commute_above_l, ojrelid);
- sjinf->commute_above_r = bms_del_member(sjinf->commute_above_r, ojrelid);
- sjinf->commute_below_l = bms_del_member(sjinf->commute_below_l, ojrelid);
- sjinf->commute_below_r = bms_del_member(sjinf->commute_below_r, ojrelid);
+ /* Now remove relid from the sets: */
+ sjinf->min_lefthand = adjust_relid_set(sjinf->min_lefthand, relid, subst);
+ sjinf->min_righthand = adjust_relid_set(sjinf->min_righthand, relid, subst);
+ sjinf->syn_lefthand = adjust_relid_set(sjinf->syn_lefthand, relid, subst);
+ sjinf->syn_righthand = adjust_relid_set(sjinf->syn_righthand, relid, subst);
+
+ if (sjinfo != NULL)
+ {
+ Assert(subst <= 0 && ojrelid > 0);
+
+ /* Remove ojrelid bits from the sets: */
+ sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand, ojrelid);
+ sjinf->min_righthand = bms_del_member(sjinf->min_righthand, ojrelid);
+ sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand, ojrelid);
+ sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand, ojrelid);
+ /* relid cannot appear in these fields, but ojrelid can: */
+ sjinf->commute_above_l = bms_del_member(sjinf->commute_above_l, ojrelid);
+ sjinf->commute_above_r = bms_del_member(sjinf->commute_above_r, ojrelid);
+ sjinf->commute_below_l = bms_del_member(sjinf->commute_below_l, ojrelid);
+ sjinf->commute_below_r = bms_del_member(sjinf->commute_below_r, ojrelid);
+ }
+ else
+ {
+ Assert(subst > 0 && ojrelid == -1);
+
+ ChangeVarNodes((Node *) sjinf->semi_rhs_exprs, relid, subst, 0);
+ }
}
/*
@@ -375,10 +405,10 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo)
{
PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
- Assert(!bms_is_member(relid, phinfo->ph_lateral));
+ Assert(sjinfo == NULL || !bms_is_member(relid, phinfo->ph_lateral));
if (bms_is_subset(phinfo->ph_needed, joinrelids) &&
bms_is_member(relid, phinfo->ph_eval_at) &&
- !bms_is_member(ojrelid, phinfo->ph_eval_at))
+ (sjinfo == NULL || !bms_is_member(ojrelid, phinfo->ph_eval_at)))
{
root->placeholder_list = foreach_delete_current(root->placeholder_list,
l);
@@ -388,22 +418,113 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo)
{
PlaceHolderVar *phv = phinfo->ph_var;
- phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, relid);
- phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, ojrelid);
+ phinfo->ph_eval_at = adjust_relid_set(phinfo->ph_eval_at, relid, subst);
+ phinfo->ph_eval_at = adjust_relid_set(phinfo->ph_eval_at, ojrelid, subst);
Assert(!bms_is_empty(phinfo->ph_eval_at)); /* checked previously */
/* Reduce ph_needed to contain only "relation 0"; see below */
if (bms_is_member(0, phinfo->ph_needed))
phinfo->ph_needed = bms_make_singleton(0);
else
phinfo->ph_needed = NULL;
- phv->phrels = bms_del_member(phv->phrels, relid);
- phv->phrels = bms_del_member(phv->phrels, ojrelid);
+
+ phinfo->ph_lateral = adjust_relid_set(phinfo->ph_lateral, relid, subst);
+
+ /*
+ * ph_lateral might contain rels mentioned in ph_eval_at after the
+ * replacement, remove them.
+ */
+ phinfo->ph_lateral = bms_difference(phinfo->ph_lateral, phinfo->ph_eval_at);
+ /* ph_lateral might or might not be empty */
+
+ phv->phrels = adjust_relid_set(phv->phrels, relid, subst);
+ phv->phrels = adjust_relid_set(phv->phrels, ojrelid, subst);
Assert(!bms_is_empty(phv->phrels));
+
+ ChangeVarNodes((Node *) phv->phexpr, relid, subst, 0);
+
Assert(phv->phnullingrels == NULL); /* no need to adjust */
}
}
/*
+ * Likewise remove references from EquivalenceClasses.
+ */
+ foreach(l, root->eq_classes)
+ {
+ EquivalenceClass *ec = (EquivalenceClass *) lfirst(l);
+
+ if (bms_is_member(relid, ec->ec_relids) ||
+ (sjinfo == NULL || bms_is_member(ojrelid, ec->ec_relids)))
+ remove_rel_from_eclass(ec, relid, ojrelid, subst);
+ }
+
+ /*
+ * Finally, we must recompute per-Var attr_needed and per-PlaceHolderVar
+ * ph_needed relid sets. These have to be known accurately, else we may
+ * fail to remove other now-removable outer joins. And our removal of the
+ * join clause(s) for this outer join may mean that Vars that were
+ * formerly needed no longer are. So we have to do this honestly by
+ * repeating the construction of those relid sets. We can cheat to one
+ * small extent: we can avoid re-examining the targetlist and HAVING qual
+ * by preserving "relation 0" bits from the existing relid sets. This is
+ * safe because we'd never remove such references.
+ *
+ * So, start by removing all other bits from attr_needed sets and
+ * lateral_vars lists. (We already did this above for ph_needed.)
+ */
+ for (rti = 1; rti < root->simple_rel_array_size; rti++)
+ {
+ RelOptInfo *otherrel = root->simple_rel_array[rti];
+ int attroff;
+
+ /* there may be empty slots corresponding to non-baserel RTEs */
+ if (otherrel == NULL)
+ continue;
+
+ Assert(otherrel->relid == rti); /* sanity check on array */
+
+ for (attroff = otherrel->max_attr - otherrel->min_attr;
+ attroff >= 0;
+ attroff--)
+ {
+ if (bms_is_member(0, otherrel->attr_needed[attroff]))
+ otherrel->attr_needed[attroff] = bms_make_singleton(0);
+ else
+ otherrel->attr_needed[attroff] = NULL;
+ }
+
+ if (subst > 0)
+ ChangeVarNodes((Node *) otherrel->lateral_vars, relid, subst, 0);
+ }
+}
+
+/*
+ * Remove the target relid and references to the target join from the
+ * planner's data structures, having determined that there is no need
+ * to include them in the query.
+ *
+ * We are not terribly thorough here. We only bother to update parts of
+ * the planner's data structures that will actually be consulted later.
+ */
+static void
+remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
+ SpecialJoinInfo *sjinfo)
+{
+ RelOptInfo *rel = find_base_rel(root, relid);
+ int ojrelid = sjinfo->ojrelid;
+ Relids joinrelids;
+ Relids join_plus_commute;
+ List *joininfos;
+ ListCell *l;
+
+ /* Compute the relid set for the join we are considering */
+ joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
+ Assert(ojrelid != 0);
+ joinrelids = bms_add_member(joinrelids, ojrelid);
+
+ remove_rel_from_query(root, rel, -1, sjinfo, joinrelids);
+
+ /*
* Remove any joinquals referencing the rel from the joininfo lists.
*
* In some cases, a joinqual has to be put back after deleting its
@@ -466,18 +587,6 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo)
}
/*
- * Likewise remove references from EquivalenceClasses.
- */
- foreach(l, root->eq_classes)
- {
- EquivalenceClass *ec = (EquivalenceClass *) lfirst(l);
-
- if (bms_is_member(relid, ec->ec_relids) ||
- bms_is_member(ojrelid, ec->ec_relids))
- remove_rel_from_eclass(ec, relid, ojrelid);
- }
-
- /*
* There may be references to the rel in root->fkey_list, but if so,
* match_foreign_keys_to_quals() will get rid of them.
*/
@@ -493,42 +602,6 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo)
pfree(rel);
/*
- * Finally, we must recompute per-Var attr_needed and per-PlaceHolderVar
- * ph_needed relid sets. These have to be known accurately, else we may
- * fail to remove other now-removable outer joins. And our removal of the
- * join clause(s) for this outer join may mean that Vars that were
- * formerly needed no longer are. So we have to do this honestly by
- * repeating the construction of those relid sets. We can cheat to one
- * small extent: we can avoid re-examining the targetlist and HAVING qual
- * by preserving "relation 0" bits from the existing relid sets. This is
- * safe because we'd never remove such references.
- *
- * So, start by removing all other bits from attr_needed sets. (We
- * already did this above for ph_needed.)
- */
- for (rti = 1; rti < root->simple_rel_array_size; rti++)
- {
- RelOptInfo *otherrel = root->simple_rel_array[rti];
- int attroff;
-
- /* there may be empty slots corresponding to non-baserel RTEs */
- if (otherrel == NULL)
- continue;
-
- Assert(otherrel->relid == rti); /* sanity check on array */
-
- for (attroff = otherrel->max_attr - otherrel->min_attr;
- attroff >= 0;
- attroff--)
- {
- if (bms_is_member(0, otherrel->attr_needed[attroff]))
- otherrel->attr_needed[attroff] = bms_make_singleton(0);
- else
- otherrel->attr_needed[attroff] = NULL;
- }
- }
-
- /*
* Now repeat construction of attr_needed bits coming from all other
* sources.
*/
@@ -607,13 +680,13 @@ remove_rel_from_restrictinfo(RestrictInfo *rinfo, int relid, int ojrelid)
* level(s).
*/
static void
-remove_rel_from_eclass(EquivalenceClass *ec, int relid, int ojrelid)
+remove_rel_from_eclass(EquivalenceClass *ec, int relid, int ojrelid, int subst)
{
ListCell *lc;
/* Fix up the EC's overall relids */
- ec->ec_relids = bms_del_member(ec->ec_relids, relid);
- ec->ec_relids = bms_del_member(ec->ec_relids, ojrelid);
+ ec->ec_relids = adjust_relid_set(ec->ec_relids, relid, subst);
+ ec->ec_relids = adjust_relid_set(ec->ec_relids, ojrelid, subst);
/*
* Fix up the member expressions. Any non-const member that ends with
@@ -625,11 +698,11 @@ remove_rel_from_eclass(EquivalenceClass *ec, int relid, int ojrelid)
EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
if (bms_is_member(relid, cur_em->em_relids) ||
- bms_is_member(ojrelid, cur_em->em_relids))
+ (ojrelid != -1 && bms_is_member(ojrelid, cur_em->em_relids)))
{
Assert(!cur_em->em_is_const);
- cur_em->em_relids = bms_del_member(cur_em->em_relids, relid);
- cur_em->em_relids = bms_del_member(cur_em->em_relids, ojrelid);
+ cur_em->em_relids = adjust_relid_set(cur_em->em_relids, relid, subst);
+ cur_em->em_relids = adjust_relid_set(cur_em->em_relids, ojrelid, subst);
if (bms_is_empty(cur_em->em_relids))
ec->ec_members = foreach_delete_current(ec->ec_members, lc);
}
@@ -640,7 +713,10 @@ remove_rel_from_eclass(EquivalenceClass *ec, int relid, int ojrelid)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
- remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
+ if (ojrelid == -1)
+ ChangeVarNodes((Node *) rinfo, relid, subst, 0);
+ else
+ remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
}
/*
@@ -844,9 +920,15 @@ rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
* Note that the passed-in clause_list may be destructively modified! This
* is OK for current uses, because the clause_list is built by the caller for
* the sole purpose of passing to this function.
+ *
+ * (*extra_clauses) to be set to the right sides of baserestrictinfo clauses,
+ * looking like "x = const" if distinctness is derived from such clauses, not
+ * joininfo clauses. Pass NULL to the extra_clauses if this value is not
+ * needed.
*/
static bool
-rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
+rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list,
+ List **extra_clauses)
{
/*
* We could skip a couple of tests here if we assume all callers checked
@@ -859,10 +941,11 @@ rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
{
/*
* Examine the indexes to see if we have a matching unique index.
- * relation_has_unique_index_for automatically adds any usable
+ * relation_has_unique_index_ext automatically adds any usable
* restriction clauses for the rel, so we needn't do that here.
*/
- if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL))
+ if (relation_has_unique_index_ext(root, rel, clause_list, NIL, NIL,
+ extra_clauses))
return true;
}
else if (rel->rtekind == RTE_SUBQUERY)
@@ -1177,8 +1260,34 @@ innerrel_is_unique(PlannerInfo *root,
List *restrictlist,
bool force_cache)
{
+ return innerrel_is_unique_ext(root, joinrelids, outerrelids, innerrel,
+ jointype, restrictlist, force_cache, NULL);
+}
+
+/*
+ * innerrel_is_unique_ext
+ * Do the same as innerrel_is_unique(), but also set to (*extra_clauses)
+ * additional clauses from a baserestrictinfo list used to prove the
+ * uniqueness.
+ *
+ * A non-NULL extra_clauses indicates that we're checking for self-join and
+ * correspondingly dealing with filtered clauses.
+ */
+bool
+innerrel_is_unique_ext(PlannerInfo *root,
+ Relids joinrelids,
+ Relids outerrelids,
+ RelOptInfo *innerrel,
+ JoinType jointype,
+ List *restrictlist,
+ bool force_cache,
+ List **extra_clauses)
+{
MemoryContext old_context;
ListCell *lc;
+ UniqueRelInfo *uniqueRelInfo;
+ List *outer_exprs = NIL;
+ bool self_join = (extra_clauses != NULL);
/* Certainly can't prove uniqueness when there are no joinclauses */
if (restrictlist == NIL)
@@ -1193,17 +1302,28 @@ innerrel_is_unique(PlannerInfo *root,
/*
* Query the cache to see if we've managed to prove that innerrel is
- * unique for any subset of this outerrel. We don't need an exact match,
- * as extra outerrels can't make the innerrel any less unique (or more
- * formally, the restrictlist for a join to a superset outerrel must be a
- * superset of the conditions we successfully used before).
+ * unique for any subset of this outerrel. For non-self-join search, we
+ * don't need an exact match, as extra outerrels can't make the innerrel
+ * any less unique (or more formally, the restrictlist for a join to a
+ * superset outerrel must be a superset of the conditions we successfully
+ * used before). For self-join search, we require an exact match of
+ * outerrels because we need extra clauses to be valid for our case. Also,
+ * for self-join checking we've filtered the clauses list. Thus, we can
+ * match only the result cached for a self-join search for another
+ * self-join check.
*/
foreach(lc, innerrel->unique_for_rels)
{
- Relids unique_for_rels = (Relids) lfirst(lc);
+ uniqueRelInfo = (UniqueRelInfo *) lfirst(lc);
- if (bms_is_subset(unique_for_rels, outerrelids))
+ if ((!self_join && bms_is_subset(uniqueRelInfo->outerrelids, outerrelids)) ||
+ (self_join && bms_equal(uniqueRelInfo->outerrelids, outerrelids) &&
+ uniqueRelInfo->self_join))
+ {
+ if (extra_clauses)
+ *extra_clauses = uniqueRelInfo->extra_clauses;
return true; /* Success! */
+ }
}
/*
@@ -1220,7 +1340,8 @@ innerrel_is_unique(PlannerInfo *root,
/* No cached information, so try to make the proof. */
if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel,
- jointype, restrictlist))
+ jointype, restrictlist,
+ self_join ? &outer_exprs : NULL))
{
/*
* Cache the positive result for future probes, being sure to keep it
@@ -1233,10 +1354,16 @@ innerrel_is_unique(PlannerInfo *root,
* supersets of them anyway.
*/
old_context = MemoryContextSwitchTo(root->planner_cxt);
+ uniqueRelInfo = makeNode(UniqueRelInfo);
+ uniqueRelInfo->outerrelids = bms_copy(outerrelids);
+ uniqueRelInfo->self_join = self_join;
+ uniqueRelInfo->extra_clauses = outer_exprs;
innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
- bms_copy(outerrelids));
+ uniqueRelInfo);
MemoryContextSwitchTo(old_context);
+ if (extra_clauses)
+ *extra_clauses = outer_exprs;
return true; /* Success! */
}
else
@@ -1282,7 +1409,8 @@ is_innerrel_unique_for(PlannerInfo *root,
Relids outerrelids,
RelOptInfo *innerrel,
JoinType jointype,
- List *restrictlist)
+ List *restrictlist,
+ List **extra_clauses)
{
List *clause_list = NIL;
ListCell *lc;
@@ -1312,17 +1440,895 @@ is_innerrel_unique_for(PlannerInfo *root,
continue; /* not mergejoinable */
/*
- * Check if clause has the form "outer op inner" or "inner op outer",
- * and if so mark which side is inner.
+ * Check if the clause has the form "outer op inner" or "inner op
+ * outer", and if so mark which side is inner.
*/
if (!clause_sides_match_join(restrictinfo, outerrelids,
innerrel->relids))
continue; /* no good for these input relations */
- /* OK, add to list */
+ /* OK, add to the list */
clause_list = lappend(clause_list, restrictinfo);
}
/* Let rel_is_distinct_for() do the hard work */
- return rel_is_distinct_for(root, innerrel, clause_list);
+ return rel_is_distinct_for(root, innerrel, clause_list, extra_clauses);
+}
+
+/*
+ * Update EC members to point to the remaining relation instead of the removed
+ * one, removing duplicates.
+ *
+ * Restriction clauses for base relations are already distributed to
+ * the respective baserestrictinfo lists (see
+ * generate_implied_equalities_for_column). The above code has already processed
+ * this list and updated these clauses to reference the remaining
+ * relation, so that we can skip them here based on their relids.
+ *
+ * Likewise, we have already processed the join clauses that join the
+ * removed relation to the remaining one.
+ *
+ * Finally, there might be join clauses tying the removed relation to
+ * some third relation. We can't just delete the source clauses and
+ * regenerate them from the EC because the corresponding equality
+ * operators might be missing (see the handling of ec_broken).
+ * Therefore, we will update the references in the source clauses.
+ *
+ * Derived clauses can be generated again, so it is simpler just to
+ * delete them.
+ */
+static void
+update_eclasses(EquivalenceClass *ec, int from, int to)
+{
+ List *new_members = NIL;
+ List *new_sources = NIL;
+
+ foreach_node(EquivalenceMember, em, ec->ec_members)
+ {
+ bool is_redundant = false;
+
+ if (!bms_is_member(from, em->em_relids))
+ {
+ new_members = lappend(new_members, em);
+ continue;
+ }
+
+ em->em_relids = adjust_relid_set(em->em_relids, from, to);
+ em->em_jdomain->jd_relids = adjust_relid_set(em->em_jdomain->jd_relids, from, to);
+
+ /* We only process inner joins */
+ ChangeVarNodes((Node *) em->em_expr, from, to, 0);
+
+ foreach_node(EquivalenceMember, other, new_members)
+ {
+ if (!equal(em->em_relids, other->em_relids))
+ continue;
+
+ if (equal(em->em_expr, other->em_expr))
+ {
+ is_redundant = true;
+ break;
+ }
+ }
+
+ if (!is_redundant)
+ new_members = lappend(new_members, em);
+ }
+
+ list_free(ec->ec_members);
+ ec->ec_members = new_members;
+
+ list_free(ec->ec_derives);
+ ec->ec_derives = NULL;
+
+ /* Update EC source expressions */
+ foreach_node(RestrictInfo, rinfo, ec->ec_sources)
+ {
+ bool is_redundant = false;
+
+ if (!bms_is_member(from, rinfo->required_relids))
+ {
+ new_sources = lappend(new_sources, rinfo);
+ continue;
+ }
+
+ ChangeVarNodes((Node *) rinfo, from, to, 0);
+
+ /*
+ * After switching the clause to the remaining relation, check it for
+ * redundancy with existing ones. We don't have to check for
+ * redundancy with derived clauses, because we've just deleted them.
+ */
+ foreach_node(RestrictInfo, other, new_sources)
+ {
+ if (!equal(rinfo->clause_relids, other->clause_relids))
+ continue;
+
+ if (equal(rinfo->clause, other->clause))
+ {
+ is_redundant = true;
+ break;
+ }
+ }
+
+ if (!is_redundant)
+ new_sources = lappend(new_sources, rinfo);
+ }
+
+ list_free(ec->ec_sources);
+ ec->ec_sources = new_sources;
+ ec->ec_relids = adjust_relid_set(ec->ec_relids, from, to);
+}
+
+/*
+ * "Logically" compares two RestrictInfo's ignoring the 'rinfo_serial' field,
+ * which makes almost every RestrictInfo unique. This type of comparison is
+ * useful when removing duplicates while moving RestrictInfo's from removed
+ * relation to remaining relation during self-join elimination.
+ *
+ * XXX: In the future, we might remove the 'rinfo_serial' field completely and
+ * get rid of this function.
+ */
+static bool
+restrict_infos_logically_equal(RestrictInfo *a, RestrictInfo *b)
+{
+ int saved_rinfo_serial = a->rinfo_serial;
+ bool result;
+
+ a->rinfo_serial = b->rinfo_serial;
+ result = equal(a, b);
+ a->rinfo_serial = saved_rinfo_serial;
+
+ return result;
+}
+
+/*
+ * This function adds all non-redundant clauses to the keeping relation
+ * during self-join elimination. That is a contradictory operation. On the
+ * one hand, we reduce the length of the `restrict` lists, which can
+ * impact planning or executing time. Additionally, we improve the
+ * accuracy of cardinality estimation. On the other hand, it is one more
+ * place that can make planning time much longer in specific cases. It
+ * would have been better to avoid calling the equal() function here, but
+ * it's the only way to detect duplicated inequality expressions.
+ *
+ * (*keep_rinfo_list) is given by pointer because it might be altered by
+ * distribute_restrictinfo_to_rels().
+ */
+static void
+add_non_redundant_clauses(PlannerInfo *root,
+ List *rinfo_candidates,
+ List **keep_rinfo_list,
+ Index removed_relid)
+{
+ foreach_node(RestrictInfo, rinfo, rinfo_candidates)
+ {
+ bool is_redundant = false;
+
+ Assert(!bms_is_member(removed_relid, rinfo->required_relids));
+
+ foreach_node(RestrictInfo, src, (*keep_rinfo_list))
+ {
+ if (!bms_equal(src->clause_relids, rinfo->clause_relids))
+ /* Can't compare trivially different clauses */
+ continue;
+
+ if (src == rinfo ||
+ (rinfo->parent_ec != NULL &&
+ src->parent_ec == rinfo->parent_ec) ||
+ restrict_infos_logically_equal(rinfo, src))
+ {
+ is_redundant = true;
+ break;
+ }
+ }
+ if (!is_redundant)
+ distribute_restrictinfo_to_rels(root, rinfo);
+ }
+}
+
+/*
+ * Remove a relation after we have proven that it participates only in an
+ * unneeded unique self-join.
+ *
+ * Replace any links in planner info structures.
+ *
+ * Transfer join and restriction clauses from the removed relation to the
+ * remaining one. We change the Vars of the clause to point to the
+ * remaining relation instead of the removed one. The clauses that require
+ * a subset of joinrelids become restriction clauses of the remaining
+ * relation, and others remain join clauses. We append them to
+ * baserestrictinfo and joininfo, respectively, trying not to introduce
+ * duplicates.
+ *
+ * We also have to process the 'joinclauses' list here, because it
+ * contains EC-derived join clauses which must become filter clauses. It
+ * is not enough to just correct the ECs because the EC-derived
+ * restrictions are generated before join removal (see
+ * generate_base_implied_equalities).
+ *
+ * NOTE: Remember to keep the code in sync with PlannerInfo to be sure all
+ * cached relids and relid bitmapsets can be correctly cleaned during the
+ * self-join elimination procedure.
+ */
+static void
+remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark,
+ RelOptInfo *toKeep, RelOptInfo *toRemove,
+ List *restrictlist)
+{
+ List *joininfos;
+ ListCell *lc;
+ int i;
+ List *jinfo_candidates = NIL;
+ List *binfo_candidates = NIL;
+
+ Assert(toKeep->relid > 0);
+ Assert(toRemove->relid > 0);
+
+ /*
+ * Replace the index of the removing table with the keeping one. The
+ * technique of removing/distributing restrictinfo is used here to attach
+ * just appeared (for keeping relation) join clauses and avoid adding
+ * duplicates of those that already exist in the joininfo list.
+ */
+ joininfos = list_copy(toRemove->joininfo);
+ foreach_node(RestrictInfo, rinfo, joininfos)
+ {
+ remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
+ ChangeVarNodes((Node *) rinfo, toRemove->relid, toKeep->relid, 0);
+
+ if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
+ jinfo_candidates = lappend(jinfo_candidates, rinfo);
+ else
+ binfo_candidates = lappend(binfo_candidates, rinfo);
+ }
+
+ /*
+ * Concatenate restrictlist to the list of base restrictions of the
+ * removing table just to simplify the replacement procedure: all of them
+ * weren't connected to any keeping relations and need to be added to some
+ * rels.
+ */
+ toRemove->baserestrictinfo = list_concat(toRemove->baserestrictinfo,
+ restrictlist);
+ foreach_node(RestrictInfo, rinfo, toRemove->baserestrictinfo)
+ {
+ ChangeVarNodes((Node *) rinfo, toRemove->relid, toKeep->relid, 0);
+
+ if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
+ jinfo_candidates = lappend(jinfo_candidates, rinfo);
+ else
+ binfo_candidates = lappend(binfo_candidates, rinfo);
+ }
+
+ /*
+ * Now, add all non-redundant clauses to the keeping relation.
+ */
+ add_non_redundant_clauses(root, binfo_candidates,
+ &toKeep->baserestrictinfo, toRemove->relid);
+ add_non_redundant_clauses(root, jinfo_candidates,
+ &toKeep->joininfo, toRemove->relid);
+
+ list_free(binfo_candidates);
+ list_free(jinfo_candidates);
+
+ /*
+ * Arrange equivalence classes, mentioned removing a table, with the
+ * keeping one: varno of removing table should be replaced in members and
+ * sources lists. Also, remove duplicated elements if this replacement
+ * procedure created them.
+ */
+ i = -1;
+ while ((i = bms_next_member(toRemove->eclass_indexes, i)) >= 0)
+ {
+ EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, i);
+
+ update_eclasses(ec, toRemove->relid, toKeep->relid);
+ toKeep->eclass_indexes = bms_add_member(toKeep->eclass_indexes, i);
+ }
+
+ /*
+ * Transfer the targetlist and attr_needed flags.
+ */
+
+ foreach(lc, toRemove->reltarget->exprs)
+ {
+ Node *node = lfirst(lc);
+
+ ChangeVarNodes(node, toRemove->relid, toKeep->relid, 0);
+ if (!list_member(toKeep->reltarget->exprs, node))
+ toKeep->reltarget->exprs = lappend(toKeep->reltarget->exprs, node);
+ }
+
+ for (i = toKeep->min_attr; i <= toKeep->max_attr; i++)
+ {
+ int attno = i - toKeep->min_attr;
+
+ toRemove->attr_needed[attno] = adjust_relid_set(toRemove->attr_needed[attno],
+ toRemove->relid, toKeep->relid);
+ toKeep->attr_needed[attno] = bms_add_members(toKeep->attr_needed[attno],
+ toRemove->attr_needed[attno]);
+ }
+
+ /*
+ * If the removed relation has a row mark, transfer it to the remaining
+ * one.
+ *
+ * If both rels have row marks, just keep the one corresponding to the
+ * remaining relation because we verified earlier that they have the same
+ * strength.
+ */
+ if (rmark)
+ {
+ if (kmark)
+ {
+ Assert(kmark->markType == rmark->markType);
+
+ root->rowMarks = list_delete_ptr(root->rowMarks, rmark);
+ }
+ else
+ {
+ /* Shouldn't have inheritance children here. */
+ Assert(rmark->rti == rmark->prti);
+
+ rmark->rti = rmark->prti = toKeep->relid;
+ }
+ }
+
+ /*
+ * Replace varno in all the query structures, except nodes RangeTblRef
+ * otherwise later remove_rel_from_joinlist will yield errors.
+ */
+ ChangeVarNodesExtended((Node *) root->parse, toRemove->relid, toKeep->relid, 0, false);
+
+ /* Replace links in the planner info */
+ remove_rel_from_query(root, toRemove, toKeep->relid, NULL, NULL);
+
+ /* At last, replace varno in root targetlist and HAVING clause */
+ ChangeVarNodes((Node *) root->processed_tlist, toRemove->relid, toKeep->relid, 0);
+ ChangeVarNodes((Node *) root->processed_groupClause, toRemove->relid, toKeep->relid, 0);
+
+ adjust_relid_set(root->all_result_relids, toRemove->relid, toKeep->relid);
+ adjust_relid_set(root->leaf_result_relids, toRemove->relid, toKeep->relid);
+
+ /*
+ * There may be references to the rel in root->fkey_list, but if so,
+ * match_foreign_keys_to_quals() will get rid of them.
+ */
+
+ /*
+ * Finally, remove the rel from the baserel array to prevent it from being
+ * referenced again. (We can't do this earlier because
+ * remove_join_clause_from_rels will touch it.)
+ */
+ root->simple_rel_array[toRemove->relid] = NULL;
+
+ /* And nuke the RelOptInfo, just in case there's another access path. */
+ pfree(toRemove);
+
+ /*
+ * Now repeat construction of attr_needed bits coming from all other
+ * sources.
+ */
+ rebuild_placeholder_attr_needed(root);
+ rebuild_joinclause_attr_needed(root);
+ rebuild_eclass_attr_needed(root);
+ rebuild_lateral_attr_needed(root);
+}
+
+/*
+ * split_selfjoin_quals
+ * Processes 'joinquals' by building two lists: one containing the quals
+ * where the columns/exprs are on either side of the join match and
+ * another one containing the remaining quals.
+ *
+ * 'joinquals' must only contain quals for a RTE_RELATION being joined to
+ * itself.
+ */
+static void
+split_selfjoin_quals(PlannerInfo *root, List *joinquals, List **selfjoinquals,
+ List **otherjoinquals, int from, int to)
+{
+ List *sjoinquals = NIL;
+ List *ojoinquals = NIL;
+
+ foreach_node(RestrictInfo, rinfo, joinquals)
+ {
+ OpExpr *expr;
+ Node *leftexpr;
+ Node *rightexpr;
+
+ /* In general, clause looks like F(arg1) = G(arg2) */
+ if (!rinfo->mergeopfamilies ||
+ bms_num_members(rinfo->clause_relids) != 2 ||
+ bms_membership(rinfo->left_relids) != BMS_SINGLETON ||
+ bms_membership(rinfo->right_relids) != BMS_SINGLETON)
+ {
+ ojoinquals = lappend(ojoinquals, rinfo);
+ continue;
+ }
+
+ expr = (OpExpr *) rinfo->clause;
+
+ if (!IsA(expr, OpExpr) || list_length(expr->args) != 2)
+ {
+ ojoinquals = lappend(ojoinquals, rinfo);
+ continue;
+ }
+
+ leftexpr = get_leftop(rinfo->clause);
+ rightexpr = copyObject(get_rightop(rinfo->clause));
+
+ if (leftexpr && IsA(leftexpr, RelabelType))
+ leftexpr = (Node *) ((RelabelType *) leftexpr)->arg;
+ if (rightexpr && IsA(rightexpr, RelabelType))
+ rightexpr = (Node *) ((RelabelType *) rightexpr)->arg;
+
+ /*
+ * Quite an expensive operation, narrowing the use case. For example,
+ * when we have cast of the same var to different (but compatible)
+ * types.
+ */
+ ChangeVarNodes(rightexpr, bms_singleton_member(rinfo->right_relids),
+ bms_singleton_member(rinfo->left_relids), 0);
+
+ if (equal(leftexpr, rightexpr))
+ sjoinquals = lappend(sjoinquals, rinfo);
+ else
+ ojoinquals = lappend(ojoinquals, rinfo);
+ }
+
+ *selfjoinquals = sjoinquals;
+ *otherjoinquals = ojoinquals;
+}
+
+/*
+ * Check for a case when uniqueness is at least partly derived from a
+ * baserestrictinfo clause. In this case, we have a chance to return only
+ * one row (if such clauses on both sides of SJ are equal) or nothing (if they
+ * are different).
+ */
+static bool
+match_unique_clauses(PlannerInfo *root, RelOptInfo *outer, List *uclauses,
+ Index relid)
+{
+ foreach_node(RestrictInfo, rinfo, uclauses)
+ {
+ Expr *clause;
+ Node *iclause;
+ Node *c1;
+ bool matched = false;
+
+ Assert(outer->relid > 0 && relid > 0);
+
+ /* Only filters like f(R.x1,...,R.xN) == expr we should consider. */
+ Assert(bms_is_empty(rinfo->left_relids) ^
+ bms_is_empty(rinfo->right_relids));
+
+ clause = (Expr *) copyObject(rinfo->clause);
+ ChangeVarNodes((Node *) clause, relid, outer->relid, 0);
+
+ iclause = bms_is_empty(rinfo->left_relids) ? get_rightop(clause) :
+ get_leftop(clause);
+ c1 = bms_is_empty(rinfo->left_relids) ? get_leftop(clause) :
+ get_rightop(clause);
+
+ /*
+ * Compare these left and right sides with the corresponding sides of
+ * the outer's filters. If no one is detected - return immediately.
+ */
+ foreach_node(RestrictInfo, orinfo, outer->baserestrictinfo)
+ {
+ Node *oclause;
+ Node *c2;
+
+ if (orinfo->mergeopfamilies == NIL)
+ /* Don't consider clauses that aren't similar to 'F(X)=G(Y)' */
+ continue;
+
+ Assert(is_opclause(orinfo->clause));
+
+ oclause = bms_is_empty(orinfo->left_relids) ?
+ get_rightop(orinfo->clause) : get_leftop(orinfo->clause);
+ c2 = (bms_is_empty(orinfo->left_relids) ?
+ get_leftop(orinfo->clause) : get_rightop(orinfo->clause));
+
+ if (equal(iclause, oclause) && equal(c1, c2))
+ {
+ matched = true;
+ break;
+ }
+ }
+
+ if (!matched)
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Find and remove unique self-joins in a group of base relations that have
+ * the same Oid.
+ *
+ * Returns a set of relids that were removed.
+ */
+static Relids
+remove_self_joins_one_group(PlannerInfo *root, Relids relids)
+{
+ Relids result = NULL;
+ int k; /* Index of kept relation */
+ int r = -1; /* Index of removed relation */
+
+ while ((r = bms_next_member(relids, r)) > 0)
+ {
+ RelOptInfo *inner = root->simple_rel_array[r];
+
+ k = r;
+
+ while ((k = bms_next_member(relids, k)) > 0)
+ {
+ Relids joinrelids = NULL;
+ RelOptInfo *outer = root->simple_rel_array[k];
+ List *restrictlist;
+ List *selfjoinquals;
+ List *otherjoinquals;
+ ListCell *lc;
+ bool jinfo_check = true;
+ PlanRowMark *omark = NULL;
+ PlanRowMark *imark = NULL;
+ List *uclauses = NIL;
+
+ /* A sanity check: the relations have the same Oid. */
+ Assert(root->simple_rte_array[k]->relid ==
+ root->simple_rte_array[r]->relid);
+
+ /*
+ * It is impossible to eliminate the join of two relations if they
+ * belong to different rules of order. Otherwise, the planner
+ * can't find any variants of the correct query plan.
+ */
+ foreach(lc, root->join_info_list)
+ {
+ SpecialJoinInfo *info = (SpecialJoinInfo *) lfirst(lc);
+
+ if ((bms_is_member(k, info->syn_lefthand) ^
+ bms_is_member(r, info->syn_lefthand)) ||
+ (bms_is_member(k, info->syn_righthand) ^
+ bms_is_member(r, info->syn_righthand)))
+ {
+ jinfo_check = false;
+ break;
+ }
+ }
+ if (!jinfo_check)
+ continue;
+
+ /*
+ * Check Row Marks equivalence. We can't remove the join if the
+ * relations have row marks of different strength (e.g., one is
+ * locked FOR UPDATE, and another just has ROW_MARK_REFERENCE for
+ * EvalPlanQual rechecking).
+ */
+ foreach(lc, root->rowMarks)
+ {
+ PlanRowMark *rowMark = (PlanRowMark *) lfirst(lc);
+
+ if (rowMark->rti == k)
+ {
+ Assert(imark == NULL);
+ imark = rowMark;
+ }
+ else if (rowMark->rti == r)
+ {
+ Assert(omark == NULL);
+ omark = rowMark;
+ }
+
+ if (omark && imark)
+ break;
+ }
+ if (omark && imark && omark->markType != imark->markType)
+ continue;
+
+ /*
+ * We only deal with base rels here, so their relids bitset
+ * contains only one member -- their relid.
+ */
+ joinrelids = bms_add_member(joinrelids, r);
+ joinrelids = bms_add_member(joinrelids, k);
+
+ /*
+ * PHVs should not impose any constraints on removing self-joins.
+ */
+
+ /*
+ * At this stage, joininfo lists of inner and outer can contain
+ * only clauses required for a superior outer join that can't
+ * influence this optimization. So, we can avoid to call the
+ * build_joinrel_restrictlist() routine.
+ */
+ restrictlist = generate_join_implied_equalities(root, joinrelids,
+ inner->relids,
+ outer, NULL);
+ if (restrictlist == NIL)
+ continue;
+
+ /*
+ * Process restrictlist to separate the self-join quals from the
+ * other quals. e.g., "x = x" goes to selfjoinquals and "a = b" to
+ * otherjoinquals.
+ */
+ split_selfjoin_quals(root, restrictlist, &selfjoinquals,
+ &otherjoinquals, inner->relid, outer->relid);
+
+ Assert(list_length(restrictlist) ==
+ (list_length(selfjoinquals) + list_length(otherjoinquals)));
+
+ /*
+ * To enable SJE for the only degenerate case without any self
+ * join clauses at all, add baserestrictinfo to this list. The
+ * degenerate case works only if both sides have the same clause.
+ * So doesn't matter which side to add.
+ */
+ selfjoinquals = list_concat(selfjoinquals, outer->baserestrictinfo);
+
+ /*
+ * Determine if the inner table can duplicate outer rows. We must
+ * bypass the unique rel cache here since we're possibly using a
+ * subset of join quals. We can use 'force_cache' == true when all
+ * join quals are self-join quals. Otherwise, we could end up
+ * putting false negatives in the cache.
+ */
+ if (!innerrel_is_unique_ext(root, joinrelids, inner->relids,
+ outer, JOIN_INNER, selfjoinquals,
+ list_length(otherjoinquals) == 0,
+ &uclauses))
+ continue;
+
+ /*
+ * 'uclauses' is the copy of outer->baserestrictinfo that are
+ * associated with an index. We proved by matching selfjoinquals
+ * to a unique index that the outer relation has at most one
+ * matching row for each inner row. Sometimes that is not enough.
+ * e.g. "WHERE s1.b = s2.b AND s1.a = 1 AND s2.a = 2" when the
+ * unique index is (a,b). Having non-empty uclauses, we must
+ * validate that the inner baserestrictinfo contains the same
+ * expressions, or we won't match the same row on each side of the
+ * join.
+ */
+ if (!match_unique_clauses(root, inner, uclauses, outer->relid))
+ continue;
+
+ /*
+ * We can remove either relation, so remove the inner one in order
+ * to simplify this loop.
+ */
+ remove_self_join_rel(root, omark, imark, outer, inner, restrictlist);
+
+ result = bms_add_member(result, r);
+
+ /* We have removed the outer relation, try the next one. */
+ break;
+ }
+ }
+
+ return result;
+}
+
+/*
+ * Gather indexes of base relations from the joinlist and try to eliminate self
+ * joins.
+ */
+static Relids
+remove_self_joins_recurse(PlannerInfo *root, List *joinlist, Relids toRemove)
+{
+ ListCell *jl;
+ Relids relids = NULL;
+ SelfJoinCandidate *candidates = NULL;
+ int i;
+ int j;
+ int numRels;
+
+ /* Collect indexes of base relations of the join tree */
+ foreach(jl, joinlist)
+ {
+ Node *jlnode = (Node *) lfirst(jl);
+
+ if (IsA(jlnode, RangeTblRef))
+ {
+ int varno = ((RangeTblRef *) jlnode)->rtindex;
+ RangeTblEntry *rte = root->simple_rte_array[varno];
+
+ /*
+ * We only consider ordinary relations as candidates to be
+ * removed, and these relations should not have TABLESAMPLE
+ * clauses specified. Removing a relation with TABLESAMPLE clause
+ * could potentially change the syntax of the query. Because of
+ * UPDATE/DELETE EPQ mechanism, currently Query->resultRelation or
+ * Query->mergeTargetRelation associated rel cannot be eliminated.
+ */
+ if (rte->rtekind == RTE_RELATION &&
+ rte->relkind == RELKIND_RELATION &&
+ rte->tablesample == NULL &&
+ varno != root->parse->resultRelation &&
+ varno != root->parse->mergeTargetRelation)
+ {
+ Assert(!bms_is_member(varno, relids));
+ relids = bms_add_member(relids, varno);
+ }
+ }
+ else if (IsA(jlnode, List))
+ {
+ /* Recursively go inside the sub-joinlist */
+ toRemove = remove_self_joins_recurse(root, (List *) jlnode,
+ toRemove);
+ }
+ else
+ elog(ERROR, "unrecognized joinlist node type: %d",
+ (int) nodeTag(jlnode));
+ }
+
+ numRels = bms_num_members(relids);
+
+ /* Need at least two relations for the join */
+ if (numRels < 2)
+ return toRemove;
+
+ /*
+ * In order to find relations with the same oid we first build an array of
+ * candidates and then sort it by oid.
+ */
+ candidates = (SelfJoinCandidate *) palloc(sizeof(SelfJoinCandidate) *
+ numRels);
+ i = -1;
+ j = 0;
+ while ((i = bms_next_member(relids, i)) >= 0)
+ {
+ candidates[j].relid = i;
+ candidates[j].reloid = root->simple_rte_array[i]->relid;
+ j++;
+ }
+
+ qsort(candidates, numRels, sizeof(SelfJoinCandidate),
+ self_join_candidates_cmp);
+
+ /*
+ * Iteratively form a group of relation indexes with the same oid and
+ * launch the routine that detects self-joins in this group and removes
+ * excessive range table entries.
+ *
+ * At the end of the iteration, exclude the group from the overall relids
+ * list. So each next iteration of the cycle will involve less and less
+ * value of relids.
+ */
+ i = 0;
+ for (j = 1; j < numRels + 1; j++)
+ {
+ if (j == numRels || candidates[j].reloid != candidates[i].reloid)
+ {
+ if (j - i >= 2)
+ {
+ /* Create a group of relation indexes with the same oid */
+ Relids group = NULL;
+ Relids removed;
+
+ while (i < j)
+ {
+ group = bms_add_member(group, candidates[i].relid);
+ i++;
+ }
+ relids = bms_del_members(relids, group);
+
+ /*
+ * Try to remove self-joins from a group of identical entries.
+ * Make the next attempt iteratively - if something is deleted
+ * from a group, changes in clauses and equivalence classes
+ * can give us a chance to find more candidates.
+ */
+ do
+ {
+ Assert(!bms_overlap(group, toRemove));
+ removed = remove_self_joins_one_group(root, group);
+ toRemove = bms_add_members(toRemove, removed);
+ group = bms_del_members(group, removed);
+ } while (!bms_is_empty(removed) &&
+ bms_membership(group) == BMS_MULTIPLE);
+ bms_free(removed);
+ bms_free(group);
+ }
+ else
+ {
+ /* Single relation, just remove it from the set */
+ relids = bms_del_member(relids, candidates[i].relid);
+ i = j;
+ }
+ }
+ }
+
+ Assert(bms_is_empty(relids));
+
+ return toRemove;
+}
+
+/*
+ * Compare self-join candidates by their oids.
+ */
+static int
+self_join_candidates_cmp(const void *a, const void *b)
+{
+ const SelfJoinCandidate *ca = (const SelfJoinCandidate *) a;
+ const SelfJoinCandidate *cb = (const SelfJoinCandidate *) b;
+
+ if (ca->reloid != cb->reloid)
+ return (ca->reloid < cb->reloid ? -1 : 1);
+ else
+ return 0;
+}
+
+/*
+ * Find and remove useless self joins.
+ *
+ * Search for joins where a relation is joined to itself. If the join clause
+ * for each tuple from one side of the join is proven to match the same
+ * physical row (or nothing) on the other side, that self-join can be
+ * eliminated from the query. Suitable join clauses are assumed to be in the
+ * form of X = X, and can be replaced with NOT NULL clauses.
+ *
+ * For the sake of simplicity, we don't apply this optimization to special
+ * joins. Here is a list of what we could do in some particular cases:
+ * 'a a1 semi join a a2': is reduced to inner by reduce_unique_semijoins,
+ * and then removed normally.
+ * 'a a1 anti join a a2': could simplify to a scan with 'outer quals AND
+ * (IS NULL on join columns OR NOT inner quals)'.
+ * 'a a1 left join a a2': could simplify to a scan like inner but without
+ * NOT NULL conditions on join columns.
+ * 'a a1 left join (a a2 join b)': can't simplify this, because join to b
+ * can both remove rows and introduce duplicates.
+ *
+ * To search for removable joins, we order all the relations on their Oid,
+ * go over each set with the same Oid, and consider each pair of relations
+ * in this set.
+ *
+ * To remove the join, we mark one of the participating relations as dead
+ * and rewrite all references to it to point to the remaining relation.
+ * This includes modifying RestrictInfos, EquivalenceClasses, and
+ * EquivalenceMembers. We also have to modify the row marks. The join clauses
+ * of the removed relation become either restriction or join clauses, based on
+ * whether they reference any relations not participating in the removed join.
+ *
+ * 'joinlist' is the top-level joinlist of the query. If it has any
+ * references to the removed relations, we update them to point to the
+ * remaining ones.
+ */
+List *
+remove_useless_self_joins(PlannerInfo *root, List *joinlist)
+{
+ Relids toRemove = NULL;
+ int relid = -1;
+
+ if (!enable_self_join_elimination || joinlist == NIL ||
+ (list_length(joinlist) == 1 && !IsA(linitial(joinlist), List)))
+ return joinlist;
+
+ /*
+ * Merge pairs of relations participated in self-join. Remove unnecessary
+ * range table entries.
+ */
+ toRemove = remove_self_joins_recurse(root, joinlist, toRemove);
+
+ if (unlikely(toRemove != NULL))
+ {
+ /* At the end, remove orphaned relation links */
+ while ((relid = bms_next_member(toRemove, relid)) >= 0)
+ {
+ int nremoved = 0;
+
+ joinlist = remove_rel_from_joinlist(joinlist, relid, &nremoved);
+ if (nremoved != 1)
+ elog(ERROR, "failed to find relation %d in joinlist", relid);
+ }
+ }
+
+ return joinlist;
}
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index ade23fd9d56..5467e094ca7 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -234,6 +234,11 @@ query_planner(PlannerInfo *root,
reduce_unique_semijoins(root);
/*
+ * Remove self joins on a unique column.
+ */
+ joinlist = remove_useless_self_joins(root, joinlist);
+
+ /*
* Now distribute "placeholders" to base rels as needed. This has to be
* done after join removal because removal could change whether a
* placeholder is evaluable at a base rel.