aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/plan/analyzejoins.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2017-05-01 14:53:42 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2017-05-01 14:53:42 -0400
commit92a43e4857d9682b93c9f755f453cc8fd7c66c81 (patch)
tree804a1b8aa610596b00e53453894fafd94b28039a /src/backend/optimizer/plan/analyzejoins.c
parent2057a58d1629ebffce694e3cef7f714571a88dd7 (diff)
downloadpostgresql-92a43e4857d9682b93c9f755f453cc8fd7c66c81.tar.gz
postgresql-92a43e4857d9682b93c9f755f453cc8fd7c66c81.zip
Reduce semijoins with unique inner relations to plain inner joins.
If the inner relation can be proven unique, that is it can have no more than one matching row for any row of the outer query, then we might as well implement the semijoin as a plain inner join, allowing substantially more freedom to the planner. This is a form of outer join strength reduction, but it can't be implemented in reduce_outer_joins() because we don't have enough info about the individual relations at that stage. Instead do it much like remove_useless_joins(): once we've built base relations, we can make another pass over the SpecialJoinInfo list and get rid of any entries representing reducible semijoins. This is essentially a followon to the inner-unique patch (commit 9c7f5229a) and makes use of the proof machinery that that patch created. We need only minor refactoring of innerrel_is_unique's API to support this usage. Per performance complaint from Teodor Sigaev. Discussion: https://postgr.es/m/f994fc98-389f-4a46-d1bc-c42e05cb43ed@sigaev.ru
Diffstat (limited to 'src/backend/optimizer/plan/analyzejoins.c')
-rw-r--r--src/backend/optimizer/plan/analyzejoins.c121
1 files changed, 108 insertions, 13 deletions
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index 69b9be4d76b..34317fe7782 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -42,7 +42,7 @@ static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
List *clause_list);
static Oid distinct_col_search(int colno, List *colnos, List *opids);
static bool is_innerrel_unique_for(PlannerInfo *root,
- RelOptInfo *outerrel,
+ Relids outerrelids,
RelOptInfo *innerrel,
JoinType jointype,
List *restrictlist);
@@ -496,6 +496,88 @@ remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
/*
+ * reduce_unique_semijoins
+ * Check for semijoins that can be simplified to plain inner joins
+ * because the inner relation is provably unique for the join clauses.
+ *
+ * Ideally this would happen during reduce_outer_joins, but we don't have
+ * enough information at that point.
+ *
+ * To perform the strength reduction when applicable, we need only delete
+ * the semijoin's SpecialJoinInfo from root->join_info_list. (We don't
+ * bother fixing the join type attributed to it in the query jointree,
+ * since that won't be consulted again.)
+ */
+void
+reduce_unique_semijoins(PlannerInfo *root)
+{
+ ListCell *lc;
+ ListCell *next;
+
+ /*
+ * Scan the join_info_list to find semijoins. We can't use foreach
+ * because we may delete the current cell.
+ */
+ for (lc = list_head(root->join_info_list); lc != NULL; lc = next)
+ {
+ SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
+ int innerrelid;
+ RelOptInfo *innerrel;
+ Relids joinrelids;
+ List *restrictlist;
+
+ next = lnext(lc);
+
+ /*
+ * Must be a non-delaying semijoin to a single baserel, else we aren't
+ * going to be able to do anything with it. (It's probably not
+ * possible for delay_upper_joins to be set on a semijoin, but we
+ * might as well check.)
+ */
+ if (sjinfo->jointype != JOIN_SEMI ||
+ sjinfo->delay_upper_joins)
+ continue;
+
+ if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
+ continue;
+
+ innerrel = find_base_rel(root, innerrelid);
+
+ /*
+ * Before we trouble to run generate_join_implied_equalities, make a
+ * quick check to eliminate cases in which we will surely be unable to
+ * prove uniqueness of the innerrel.
+ */
+ if (!rel_supports_distinctness(root, innerrel))
+ continue;
+
+ /* Compute the relid set for the join we are considering */
+ joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
+
+ /*
+ * Since we're only considering a single-rel RHS, any join clauses it
+ * has must be clauses linking it to the semijoin's min_lefthand. We
+ * can also consider EC-derived join clauses.
+ */
+ restrictlist =
+ list_concat(generate_join_implied_equalities(root,
+ joinrelids,
+ sjinfo->min_lefthand,
+ innerrel),
+ innerrel->joininfo);
+
+ /* Test whether the innerrel is unique for those clauses. */
+ if (!innerrel_is_unique(root, sjinfo->min_lefthand, innerrel,
+ JOIN_SEMI, restrictlist, true))
+ continue;
+
+ /* OK, remove the SpecialJoinInfo from the list. */
+ root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo);
+ }
+}
+
+
+/*
* rel_supports_distinctness
* Could the relation possibly be proven distinct on some set of columns?
*
@@ -857,6 +939,10 @@ distinct_col_search(int colno, List *colnos, List *opids)
* Check if the innerrel provably contains at most one tuple matching any
* tuple from the outerrel, based on join clauses in the 'restrictlist'.
*
+ * We need an actual RelOptInfo for the innerrel, but it's sufficient to
+ * identify the outerrel by its Relids. This asymmetry supports use of this
+ * function before joinrels have been built.
+ *
* The proof must be made based only on clauses that will be "joinquals"
* rather than "otherquals" at execution. For an inner join there's no
* difference; but if the join is outer, we must ignore pushed-down quals,
@@ -867,13 +953,18 @@ distinct_col_search(int colno, List *colnos, List *opids)
*
* The actual proof is undertaken by is_innerrel_unique_for(); this function
* is a frontend that is mainly concerned with caching the answers.
+ * In particular, the force_cache argument allows overriding the internal
+ * heuristic about whether to cache negative answers; it should be "true"
+ * if making an inquiry that is not part of the normal bottom-up join search
+ * sequence.
*/
bool
innerrel_is_unique(PlannerInfo *root,
- RelOptInfo *outerrel,
+ Relids outerrelids,
RelOptInfo *innerrel,
JoinType jointype,
- List *restrictlist)
+ List *restrictlist,
+ bool force_cache)
{
MemoryContext old_context;
ListCell *lc;
@@ -900,7 +991,7 @@ innerrel_is_unique(PlannerInfo *root,
{
Relids unique_for_rels = (Relids) lfirst(lc);
- if (bms_is_subset(unique_for_rels, outerrel->relids))
+ if (bms_is_subset(unique_for_rels, outerrelids))
return true; /* Success! */
}
@@ -912,12 +1003,12 @@ innerrel_is_unique(PlannerInfo *root,
{
Relids unique_for_rels = (Relids) lfirst(lc);
- if (bms_is_subset(outerrel->relids, unique_for_rels))
+ if (bms_is_subset(outerrelids, unique_for_rels))
return false;
}
/* No cached information, so try to make the proof. */
- if (is_innerrel_unique_for(root, outerrel, innerrel,
+ if (is_innerrel_unique_for(root, outerrelids, innerrel,
jointype, restrictlist))
{
/*
@@ -932,7 +1023,7 @@ innerrel_is_unique(PlannerInfo *root,
*/
old_context = MemoryContextSwitchTo(root->planner_cxt);
innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
- bms_copy(outerrel->relids));
+ bms_copy(outerrelids));
MemoryContextSwitchTo(old_context);
return true; /* Success! */
@@ -949,15 +1040,19 @@ innerrel_is_unique(PlannerInfo *root,
* from smaller to larger. It is useful in GEQO mode, where the
* knowledge can be carried across successive planning attempts; and
* it's likely to be useful when using join-search plugins, too. Hence
- * cache only when join_search_private is non-NULL. (Yeah, that's a
- * hack, but it seems reasonable.)
+ * cache when join_search_private is non-NULL. (Yeah, that's a hack,
+ * but it seems reasonable.)
+ *
+ * Also, allow callers to override that heuristic and force caching;
+ * that's useful for reduce_unique_semijoins, which calls here before
+ * the normal join search starts.
*/
- if (root->join_search_private)
+ if (force_cache || root->join_search_private)
{
old_context = MemoryContextSwitchTo(root->planner_cxt);
innerrel->non_unique_for_rels =
lappend(innerrel->non_unique_for_rels,
- bms_copy(outerrel->relids));
+ bms_copy(outerrelids));
MemoryContextSwitchTo(old_context);
}
@@ -972,7 +1067,7 @@ innerrel_is_unique(PlannerInfo *root,
*/
static bool
is_innerrel_unique_for(PlannerInfo *root,
- RelOptInfo *outerrel,
+ Relids outerrelids,
RelOptInfo *innerrel,
JoinType jointype,
List *restrictlist)
@@ -1007,7 +1102,7 @@ is_innerrel_unique_for(PlannerInfo *root,
* Check if clause has the form "outer op inner" or "inner op outer",
* and if so mark which side is inner.
*/
- if (!clause_sides_match_join(restrictinfo, outerrel->relids,
+ if (!clause_sides_match_join(restrictinfo, outerrelids,
innerrel->relids))
continue; /* no good for these input relations */