aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2001-10-18 16:11:42 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2001-10-18 16:11:42 +0000
commit6254465d0621f724cdc9a1e99da24fa8a29f579d (patch)
tree84bd08321ce84de9daf6ab5264c889e5b5a92e4e /src
parent50450049581566ed47016cd89ba03b90be7ea1d0 (diff)
downloadpostgresql-6254465d0621f724cdc9a1e99da24fa8a29f579d.tar.gz
postgresql-6254465d0621f724cdc9a1e99da24fa8a29f579d.zip
Extend code that deduces implied equality clauses to detect whether a
clause being added to a particular restriction-clause list is redundant with those already in the list. This avoids useless work at runtime, and (perhaps more importantly) keeps the selectivity estimation routines from generating too-small estimates of numbers of output rows. Also some minor improvements in OPTIMIZER_DEBUG displays.
Diffstat (limited to 'src')
-rw-r--r--src/backend/nodes/print.c36
-rw-r--r--src/backend/optimizer/README17
-rw-r--r--src/backend/optimizer/path/allpaths.c154
-rw-r--r--src/backend/optimizer/path/joinrels.c6
-rw-r--r--src/backend/optimizer/path/pathkeys.c4
-rw-r--r--src/backend/optimizer/plan/initsplan.c151
-rw-r--r--src/backend/optimizer/plan/planner.c5
-rw-r--r--src/backend/optimizer/util/relnode.c102
-rw-r--r--src/include/optimizer/paths.h8
9 files changed, 373 insertions, 110 deletions
diff --git a/src/backend/nodes/print.c b/src/backend/nodes/print.c
index 932f55ab885..98347695622 100644
--- a/src/backend/nodes/print.c
+++ b/src/backend/nodes/print.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/nodes/print.c,v 1.47 2001/03/22 03:59:32 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/nodes/print.c,v 1.48 2001/10/18 16:11:41 tgl Exp $
*
* HISTORY
* AUTHOR DATE MAJOR EVENT
@@ -20,10 +20,12 @@
#include "postgres.h"
#include "access/printtup.h"
+#include "catalog/pg_type.h"
#include "nodes/print.h"
#include "optimizer/clauses.h"
#include "parser/parsetree.h"
#include "utils/lsyscache.h"
+#include "utils/syscache.h"
static char *plannode_type(Plan *p);
@@ -188,6 +190,36 @@ print_expr(Node *expr, List *rtable)
}
printf("%s.%s", relname, attname);
}
+ else if (IsA(expr, Const))
+ {
+ Const *c = (Const *) expr;
+ HeapTuple typeTup;
+ Oid typoutput;
+ Oid typelem;
+ char *outputstr;
+
+ if (c->constisnull)
+ {
+ printf("NULL");
+ return;
+ }
+
+ typeTup = SearchSysCache(TYPEOID,
+ ObjectIdGetDatum(c->consttype),
+ 0, 0, 0);
+ if (!HeapTupleIsValid(typeTup))
+ elog(ERROR, "Cache lookup for type %u failed", c->consttype);
+ typoutput = ((Form_pg_type) GETSTRUCT(typeTup))->typoutput;
+ typelem = ((Form_pg_type) GETSTRUCT(typeTup))->typelem;
+ ReleaseSysCache(typeTup);
+
+ outputstr = DatumGetCString(OidFunctionCall3(typoutput,
+ c->constvalue,
+ ObjectIdGetDatum(typelem),
+ Int32GetDatum(-1)));
+ printf("%s", outputstr);
+ pfree(outputstr);
+ }
else if (IsA(expr, Expr))
{
Expr *e = (Expr *) expr;
@@ -232,7 +264,7 @@ print_pathkeys(List *pathkeys, List *rtable)
if (lnext(k))
printf(", ");
}
- printf(") ");
+ printf(")");
if (lnext(i))
printf(", ");
}
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README
index 520ab1b2421..472efbcd9cc 100644
--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
@@ -461,6 +461,23 @@ code than it would otherwise have, and reduces the number of tuples
processed in join stages, so it's a win to make these deductions even
if we weren't forced to.
+When we generate implied equality constraints, we may find ourselves
+adding redundant clauses to specific relations. For example, consider
+ SELECT * FROM t1, t2, t3 WHERE t1.a = t2.b AND t2.b = t3.c;
+We will generate the implied clause t1.a = t3.c and add it to the tree.
+This is good since it allows us to consider joining t1 and t3 directly,
+which we otherwise wouldn't do. But when we reach the stage of joining
+all three relations, we will have redundant join clauses --- eg, if we
+join t1 and t2 first, then the path that joins (t1 t2) to t3 will have
+both t2.b = t3.c and t1.a = t3.c as restriction clauses. This is bad;
+not only is evaluation of the extra clause useless work at runtime,
+but the selectivity estimator routines will underestimate the number
+of tuples produced since they won't know that the two clauses are
+perfectly redundant. We fix this by detecting and removing redundant
+clauses as the restriction clause list is built for each join. (We
+can't do it sooner, since which clauses are redundant will vary depending
+on the join order.)
+
Yet another implication of all this is that mergejoinable operators
must form closed equivalence sets. For example, if "int2 = int4"
and "int4 = int8" are both marked mergejoinable, then there had better
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 8a6e9b9374a..ebc303d4654 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -8,13 +8,16 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.78 2001/07/31 17:56:30 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.79 2001/10/18 16:11:41 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
+#ifdef OPTIMIZER_DEBUG
+#include "nodes/print.h"
+#endif
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/geqo.h"
@@ -42,11 +45,6 @@ static void set_subquery_pathlist(Query *root, RelOptInfo *rel,
static RelOptInfo *make_one_rel_by_joins(Query *root, int levels_needed,
List *initial_rels);
-#ifdef OPTIMIZER_DEBUG
-static void debug_print_rel(Query *root, RelOptInfo *rel);
-
-#endif
-
/*
* make_one_rel
@@ -116,6 +114,10 @@ set_base_rel_pathlists(Query *root)
/* Plain relation */
set_plain_rel_pathlist(root, rel, rte);
}
+
+#ifdef OPTIMIZER_DEBUG
+ debug_print_rel(root, rel);
+#endif
}
}
@@ -520,10 +522,22 @@ make_one_rel_by_joins(Query *root, int levels_needed, List *initial_rels)
#ifdef OPTIMIZER_DEBUG
static void
-print_joinclauses(Query *root, List *clauses)
+print_relids(Relids relids)
+{
+ List *l;
+
+ foreach(l, relids)
+ {
+ printf("%d", lfirsti(l));
+ if (lnext(l))
+ printf(" ");
+ }
+}
+
+static void
+print_restrictclauses(Query *root, List *clauses)
{
List *l;
- extern void print_expr(Node *expr, List *rtable); /* in print.c */
foreach(l, clauses)
{
@@ -531,21 +545,17 @@ print_joinclauses(Query *root, List *clauses)
print_expr((Node *) c->clause, root->rtable);
if (lnext(l))
- printf(" ");
+ printf(", ");
}
}
static void
print_path(Query *root, Path *path, int indent)
{
- char *ptype = NULL;
- JoinPath *jp;
- bool join = false;
+ const char *ptype;
+ bool join;
int i;
- for (i = 0; i < indent; i++)
- printf("\t");
-
switch (nodeTag(path))
{
case T_Path:
@@ -556,6 +566,10 @@ print_path(Query *root, Path *path, int indent)
ptype = "IdxScan";
join = false;
break;
+ case T_TidPath:
+ ptype = "TidScan";
+ join = false;
+ break;
case T_NestPath:
ptype = "Nestloop";
join = true;
@@ -569,82 +583,82 @@ print_path(Query *root, Path *path, int indent)
join = true;
break;
default:
+ ptype = "???Path";
+ join = false;
break;
}
+
+ for (i = 0; i < indent; i++)
+ printf("\t");
+ printf("%s(", ptype);
+ print_relids(path->parent->relids);
+ printf(") rows=%.0f cost=%.2f..%.2f\n",
+ path->parent->rows, path->startup_cost, path->total_cost);
+
+ if (path->pathkeys)
+ {
+ for (i = 0; i < indent; i++)
+ printf("\t");
+ printf(" pathkeys: ");
+ print_pathkeys(path->pathkeys, root->rtable);
+ }
+
if (join)
{
- jp = (JoinPath *) path;
+ JoinPath *jp = (JoinPath *) path;
- printf("%s rows=%.0f cost=%.2f..%.2f\n",
- ptype, path->parent->rows,
- path->startup_cost, path->total_cost);
+ for (i = 0; i < indent; i++)
+ printf("\t");
+ printf(" clauses: ");
+ print_restrictclauses(root, jp->joinrestrictinfo);
+ printf("\n");
- if (path->pathkeys)
+ if (nodeTag(path) == T_MergePath)
{
- for (i = 0; i < indent; i++)
- printf("\t");
- printf(" pathkeys=");
- print_pathkeys(path->pathkeys, root->rtable);
- }
+ MergePath *mp = (MergePath *) path;
- switch (nodeTag(path))
- {
- case T_MergePath:
- case T_HashPath:
+ if (mp->outersortkeys || mp->innersortkeys)
+ {
for (i = 0; i < indent; i++)
printf("\t");
- printf(" clauses=(");
- print_joinclauses(root, jp->joinrestrictinfo);
- printf(")\n");
-
- if (nodeTag(path) == T_MergePath)
- {
- MergePath *mp = (MergePath *) path;
-
- if (mp->outersortkeys || mp->innersortkeys)
- {
- for (i = 0; i < indent; i++)
- printf("\t");
- printf(" sortouter=%d sortinner=%d\n",
- ((mp->outersortkeys) ? 1 : 0),
- ((mp->innersortkeys) ? 1 : 0));
- }
- }
- break;
- default:
- break;
+ printf(" sortouter=%d sortinner=%d\n",
+ ((mp->outersortkeys) ? 1 : 0),
+ ((mp->innersortkeys) ? 1 : 0));
+ }
}
+
print_path(root, jp->outerjoinpath, indent + 1);
print_path(root, jp->innerjoinpath, indent + 1);
}
- else
- {
- int relid = lfirsti(path->parent->relids);
-
- printf("%s(%d) rows=%.0f cost=%.2f..%.2f\n",
- ptype, relid, path->parent->rows,
- path->startup_cost, path->total_cost);
-
- if (path->pathkeys)
- {
- for (i = 0; i < indent; i++)
- printf("\t");
- printf(" pathkeys=");
- print_pathkeys(path->pathkeys, root->rtable);
- }
- }
}
-static void
+void
debug_print_rel(Query *root, RelOptInfo *rel)
{
List *l;
- printf("(");
- foreach(l, rel->relids)
- printf("%d ", lfirsti(l));
+ printf("RELOPTINFO (");
+ print_relids(rel->relids);
printf("): rows=%.0f width=%d\n", rel->rows, rel->width);
+ if (rel->baserestrictinfo)
+ {
+ printf("\tbaserestrictinfo: ");
+ print_restrictclauses(root, rel->baserestrictinfo);
+ printf("\n");
+ }
+
+ foreach(l, rel->joininfo)
+ {
+ JoinInfo *j = (JoinInfo *) lfirst(l);
+
+ printf("\tjoininfo (");
+ print_relids(j->unjoined_relids);
+ printf("): ");
+ print_restrictclauses(root, j->jinfo_restrictinfo);
+ printf("\n");
+ }
+
printf("\tpath list:\n");
foreach(l, rel->pathlist)
print_path(root, lfirst(l), 1);
@@ -652,6 +666,8 @@ debug_print_rel(Query *root, RelOptInfo *rel)
print_path(root, rel->cheapest_startup_path, 1);
printf("\n\tcheapest total path:\n");
print_path(root, rel->cheapest_total_path, 1);
+ printf("\n");
+ fflush(stdout);
}
#endif /* OPTIMIZER_DEBUG */
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index 3bde257a37e..8c805e0e3ed 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.53 2001/05/20 20:28:18 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.54 2001/10/18 16:11:41 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -374,6 +374,10 @@ make_jointree_rel(Query *root, Node *jtnode)
*/
set_cheapest(rel);
+#ifdef OPTIMIZER_DEBUG
+ debug_print_rel(root, rel);
+#endif
+
return rel;
}
else
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 4f4b6720e1e..9e964290e6d 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -11,7 +11,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.32 2001/03/22 06:16:14 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.33 2001/10/18 16:11:41 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -705,7 +705,7 @@ make_pathkeys_for_sortclauses(List *sortclauses,
* This is a worthwhile savings because these routines will be invoked
* many times when dealing with a many-relation query.
*/
-static void
+void
cache_mergeclause_pathkeys(Query *root, RestrictInfo *restrictinfo)
{
Node *key;
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index cf2f798954b..ff2e13ec977 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.63 2001/06/05 05:26:04 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.64 2001/10/18 16:11:41 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -40,10 +40,13 @@ static void mark_baserels_for_outer_join(Query *root, Relids rels,
static void distribute_qual_to_rels(Query *root, Node *clause,
bool ispusheddown,
bool isouterjoin,
+ bool isdeduced,
Relids qualscope);
static void add_join_info_to_rels(Query *root, RestrictInfo *restrictinfo,
Relids join_relids);
static void add_vars_to_targetlist(Query *root, List *vars);
+static bool qual_is_redundant(Query *root, RestrictInfo *restrictinfo,
+ List *restrictlist);
static void check_mergejoinable(RestrictInfo *restrictinfo);
static void check_hashjoinable(RestrictInfo *restrictinfo);
@@ -219,7 +222,7 @@ distribute_quals_to_rels(Query *root, Node *jtnode)
*/
foreach(qual, (List *) f->quals)
distribute_qual_to_rels(root, (Node *) lfirst(qual),
- true, false, result);
+ true, false, false, result);
}
else if (IsA(jtnode, JoinExpr))
{
@@ -279,7 +282,7 @@ distribute_quals_to_rels(Query *root, Node *jtnode)
foreach(qual, (List *) j->quals)
distribute_qual_to_rels(root, (Node *) lfirst(qual),
- false, isouterjoin, result);
+ false, isouterjoin, false, result);
}
else
elog(ERROR, "distribute_quals_to_rels: unexpected node type %d",
@@ -339,6 +342,7 @@ mark_baserels_for_outer_join(Query *root, Relids rels, Relids outerrels)
* 'ispusheddown': if TRUE, force the clause to be marked 'ispusheddown'
* (this indicates the clause came from a FromExpr, not a JoinExpr)
* 'isouterjoin': TRUE if the qual came from an OUTER JOIN's ON-clause
+ * 'isdeduced': TRUE if the qual came from implied-equality deduction
* 'qualscope': list of baserels the qual's syntactic scope covers
*
* 'qualscope' identifies what level of JOIN the qual came from. For a top
@@ -349,6 +353,7 @@ static void
distribute_qual_to_rels(Query *root, Node *clause,
bool ispusheddown,
bool isouterjoin,
+ bool isdeduced,
Relids qualscope)
{
RestrictInfo *restrictinfo = makeNode(RestrictInfo);
@@ -405,8 +410,17 @@ distribute_qual_to_rels(Query *root, Node *clause,
* clause's own reference list. At the time we are called, the
* outerjoinset list of each baserel will show exactly those outer
* joins that are below the qual in the join tree.
+ *
+ * If the qual came from implied-equality deduction, we can evaluate
+ * the qual at its natural semantic level.
+ *
*/
- if (isouterjoin)
+ if (isdeduced)
+ {
+ Assert(sameseti(relids, qualscope));
+ can_be_equijoin = true;
+ }
+ else if (isouterjoin)
{
relids = qualscope;
can_be_equijoin = false;
@@ -461,9 +475,6 @@ distribute_qual_to_rels(Query *root, Node *clause,
*/
RelOptInfo *rel = build_base_rel(root, lfirsti(relids));
- rel->baserestrictinfo = lappend(rel->baserestrictinfo,
- restrictinfo);
-
/*
* Check for a "mergejoinable" clause even though it's not a join
* clause. This is so that we can recognize that "a.x = a.y"
@@ -474,10 +485,26 @@ distribute_qual_to_rels(Query *root, Node *clause,
*/
if (can_be_equijoin)
check_mergejoinable(restrictinfo);
+
+ /*
+ * If the clause was deduced from implied equality, check to see
+ * whether it is redundant with restriction clauses we already have
+ * for this rel. Note we cannot apply this check to user-written
+ * clauses, since we haven't found the canonical pathkey sets yet
+ * while processing user clauses. (NB: no comparable check is done
+ * in the join-clause case; redundancy will be detected when the
+ * join clause is moved into a join rel's restriction list.)
+ */
+ if (!isdeduced ||
+ !qual_is_redundant(root, restrictinfo, rel->baserestrictinfo))
+ {
+ /* Add clause to rel's restriction list */
+ rel->baserestrictinfo = lappend(rel->baserestrictinfo,
+ restrictinfo);
+ }
}
else if (relids != NIL)
{
-
/*
* 'clause' is a join clause, since there is more than one rel in
* the relid list. Set additional RestrictInfo fields for
@@ -524,9 +551,11 @@ distribute_qual_to_rels(Query *root, Node *clause,
* the two sides represent equivalent PathKeyItems for path keys: any
* path that is sorted by one side will also be sorted by the other
* (as soon as the two rels are joined, that is). Record the key
- * equivalence for future use.
+ * equivalence for future use. (We can skip this for a deduced clause,
+ * since the keys are already known equivalent in that case.)
*/
- if (can_be_equijoin && restrictinfo->mergejoinoperator != InvalidOid)
+ if (can_be_equijoin && restrictinfo->mergejoinoperator != InvalidOid &&
+ !isdeduced)
add_equijoined_keys(root, restrictinfo);
}
@@ -684,18 +713,106 @@ process_implied_equality(Query *root, Node *item1, Node *item2,
/*
* Note: we mark the qual "pushed down" to ensure that it can never be
- * taken for an original JOIN/ON clause. We also claim it is an
- * outer- join clause, which it isn't, but that keeps
- * distribute_qual_to_rels from examining the outerjoinsets of the
- * relevant rels (which are no longer of interest, but could keep the
- * qual from being pushed down to where it should be). It'll also
- * save a useless call to add_equijoined keys...
+ * taken for an original JOIN/ON clause.
*/
distribute_qual_to_rels(root, (Node *) clause,
- true, true,
+ true, false, true,
pull_varnos((Node *) clause));
}
+/*
+ * qual_is_redundant
+ * Detect whether an implied-equality qual that turns out to be a
+ * restriction clause for a single base relation is redundant with
+ * already-known restriction clauses for that rel. This occurs with,
+ * for example,
+ * SELECT * FROM tab WHERE f1 = f2 AND f2 = f3;
+ * We need to suppress the redundant condition to avoid computing
+ * too-small selectivity, not to mention wasting time at execution.
+ */
+static bool
+qual_is_redundant(Query *root,
+ RestrictInfo *restrictinfo,
+ List *restrictlist)
+{
+ List *oldquals;
+ List *olditem;
+ Node *newleft;
+ Node *newright;
+ List *equalvars;
+ bool someadded;
+
+ /*
+ * Set cached pathkeys. NB: it is okay to do this now because this
+ * routine is only invoked while we are generating implied equalities.
+ * Therefore, the equi_key_list is already complete and so we can
+ * correctly determine canonical pathkeys.
+ */
+ cache_mergeclause_pathkeys(root, restrictinfo);
+ /* If different, say "not redundant" (should never happen) */
+ if (restrictinfo->left_pathkey != restrictinfo->right_pathkey)
+ return false;
+ /*
+ * Scan existing quals to find those referencing same pathkeys.
+ * Usually there will be few, if any, so build a list of just the
+ * interesting ones.
+ */
+ oldquals = NIL;
+ foreach(olditem, restrictlist)
+ {
+ RestrictInfo *oldrinfo = (RestrictInfo *) lfirst(olditem);
+
+ if (oldrinfo->mergejoinoperator != InvalidOid)
+ {
+ cache_mergeclause_pathkeys(root, oldrinfo);
+ if (restrictinfo->left_pathkey == oldrinfo->left_pathkey &&
+ restrictinfo->right_pathkey == oldrinfo->right_pathkey)
+ oldquals = lcons(oldrinfo, oldquals);
+ }
+ }
+ if (oldquals == NIL)
+ return false;
+ /*
+ * Now, we want to develop a list of Vars that are known equal to the
+ * left side of the new qual. We traverse the old-quals list repeatedly
+ * to transitively expand the Vars list. If at any point we find we
+ * can reach the right-side Var of the new qual, we are done. We give
+ * up when we can't expand the equalvars list any more.
+ */
+ newleft = (Node *) get_leftop(restrictinfo->clause);
+ newright = (Node *) get_rightop(restrictinfo->clause);
+ equalvars = makeList1(newleft);
+ do {
+ someadded = false;
+ foreach(olditem, oldquals)
+ {
+ RestrictInfo *oldrinfo = (RestrictInfo *) lfirst(olditem);
+ Node *oldleft = (Node *) get_leftop(oldrinfo->clause);
+ Node *oldright = (Node *) get_rightop(oldrinfo->clause);
+ Node *newguy = NULL;
+
+ if (member(oldleft, equalvars))
+ newguy = oldright;
+ else if (member(oldright, equalvars))
+ newguy = oldleft;
+ else
+ continue;
+ if (equal(newguy, newright))
+ return true; /* we proved new clause is redundant */
+ equalvars = lcons(newguy, equalvars);
+ someadded = true;
+ /*
+ * Remove this qual from list, since we don't need it anymore.
+ * Note this doesn't break the foreach() loop, since lremove
+ * doesn't touch the next-link of the removed cons cell.
+ */
+ oldquals = lremove(oldrinfo, oldquals);
+ }
+ } while (someadded);
+
+ return false; /* it's not redundant */
+}
+
/*****************************************************************************
*
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index a2fa8832058..48e41686e9b 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.108 2001/06/05 05:26:04 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.109 2001/10/18 16:11:41 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -17,6 +17,9 @@
#include "catalog/pg_type.h"
#include "nodes/makefuncs.h"
+#ifdef OPTIMIZER_DEBUG
+#include "nodes/print.h"
+#endif
#include "optimizer/clauses.h"
#include "optimizer/paths.h"
#include "optimizer/planmain.h"
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index 86d923116a4..3b85f321dfb 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.33 2001/05/20 20:28:19 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.34 2001/10/18 16:11:42 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -17,6 +17,7 @@
#include "optimizer/cost.h"
#include "optimizer/joininfo.h"
#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
#include "optimizer/plancat.h"
#include "optimizer/tlist.h"
#include "parser/parsetree.h"
@@ -24,9 +25,10 @@
static RelOptInfo *make_base_rel(Query *root, int relid);
static List *new_join_tlist(List *tlist, int first_resdomno);
-static List *build_joinrel_restrictlist(RelOptInfo *joinrel,
- RelOptInfo *outer_rel,
- RelOptInfo *inner_rel);
+static List *build_joinrel_restrictlist(Query *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *outer_rel,
+ RelOptInfo *inner_rel);
static void build_joinrel_joinlist(RelOptInfo *joinrel,
RelOptInfo *outer_rel,
RelOptInfo *inner_rel);
@@ -274,7 +276,8 @@ build_join_rel(Query *root,
* particular pair of component relations.
*/
if (restrictlist_ptr)
- *restrictlist_ptr = build_joinrel_restrictlist(joinrel,
+ *restrictlist_ptr = build_joinrel_restrictlist(root,
+ joinrel,
outer_rel,
inner_rel);
return joinrel;
@@ -326,7 +329,10 @@ build_join_rel(Query *root,
* caller might or might not need the restrictlist, but I need it
* anyway for set_joinrel_size_estimates().)
*/
- restrictlist = build_joinrel_restrictlist(joinrel, outer_rel, inner_rel);
+ restrictlist = build_joinrel_restrictlist(root,
+ joinrel,
+ outer_rel,
+ inner_rel);
if (restrictlist_ptr)
*restrictlist_ptr = restrictlist;
build_joinrel_joinlist(joinrel, outer_rel, inner_rel);
@@ -407,6 +413,11 @@ new_join_tlist(List *tlist,
* join paths made from this pair of sub-relations. (It will not need to
* be considered further up the join tree.)
*
+ * When building a restriction list, we eliminate redundant clauses.
+ * We don't try to do that for join clause lists, since the join clauses
+ * aren't really doing anything, just waiting to become part of higher
+ * levels' restriction lists.
+ *
* 'joinrel' is a join relation node
* 'outer_rel' and 'inner_rel' are a pair of relations that can be joined
* to form joinrel.
@@ -421,19 +432,80 @@ new_join_tlist(List *tlist,
* the original nodes in the lists made for the join relation.
*/
static List *
-build_joinrel_restrictlist(RelOptInfo *joinrel,
+build_joinrel_restrictlist(Query *root,
+ RelOptInfo *joinrel,
RelOptInfo *outer_rel,
RelOptInfo *inner_rel)
{
+ List *result = NIL;
+ List *rlist;
+ List *item;
+
+ /*
+ * Collect all the clauses that syntactically belong at this level.
+ */
+ rlist = nconc(subbuild_joinrel_restrictlist(joinrel,
+ outer_rel->joininfo),
+ subbuild_joinrel_restrictlist(joinrel,
+ inner_rel->joininfo));
/*
- * We must eliminate duplicates, since we will see the same clauses
- * arriving from both input relations...
+ * Eliminate duplicate and redundant clauses.
+ *
+ * We must eliminate duplicates, since we will see many of the same
+ * clauses arriving from both input relations. Also, if a clause is
+ * a mergejoinable clause, it's possible that it is redundant with
+ * previous clauses (see optimizer/README for discussion). We detect
+ * that case and omit the redundant clause from the result list.
+ *
+ * We can detect redundant mergejoinable clauses very cheaply by using
+ * their left and right pathkeys, which uniquely identify the sets of
+ * equijoined variables in question. All the members of a pathkey set
+ * that are in the left relation have already been forced to be equal;
+ * likewise for those in the right relation. So, we need to have only
+ * one clause that checks equality between any set member on the left and
+ * any member on the right; by transitivity, all the rest are then equal.
*/
- return set_union(subbuild_joinrel_restrictlist(joinrel,
- outer_rel->joininfo),
- subbuild_joinrel_restrictlist(joinrel,
- inner_rel->joininfo));
+ foreach(item, rlist)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(item);
+
+ /* eliminate duplicates */
+ if (member(rinfo, result))
+ continue;
+
+ /* check for redundant merge clauses */
+ if (rinfo->mergejoinoperator != InvalidOid)
+ {
+ bool redundant = false;
+ List *olditem;
+
+ cache_mergeclause_pathkeys(root, rinfo);
+
+ foreach(olditem, result)
+ {
+ RestrictInfo *oldrinfo = (RestrictInfo *) lfirst(olditem);
+
+ if (oldrinfo->mergejoinoperator != InvalidOid &&
+ rinfo->left_pathkey == oldrinfo->left_pathkey &&
+ rinfo->right_pathkey == oldrinfo->right_pathkey)
+ {
+ redundant = true;
+ break;
+ }
+ }
+
+ if (redundant)
+ continue;
+ }
+
+ /* otherwise, add it to result list */
+ result = lappend(result, rinfo);
+ }
+
+ freeList(rlist);
+
+ return result;
}
static void
@@ -458,7 +530,6 @@ subbuild_joinrel_restrictlist(RelOptInfo *joinrel,
if (is_subseti(joininfo->unjoined_relids, joinrel->relids))
{
-
/*
* Clauses in this JoinInfo list become restriction clauses
* for the joinrel, since they refer to no outside rels.
@@ -471,7 +542,6 @@ subbuild_joinrel_restrictlist(RelOptInfo *joinrel,
}
else
{
-
/*
* These clauses are still join clauses at this level, so we
* ignore them in this routine.
@@ -497,7 +567,6 @@ subbuild_joinrel_joinlist(RelOptInfo *joinrel,
joinrel->relids);
if (new_unjoined_relids == NIL)
{
-
/*
* Clauses in this JoinInfo list become restriction clauses
* for the joinrel, since they refer to no outside rels. So we
@@ -506,7 +575,6 @@ subbuild_joinrel_joinlist(RelOptInfo *joinrel,
}
else
{
-
/*
* These clauses are still join clauses at this level, so find
* or make the appropriate JoinInfo item for the joinrel, and
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 08fe2ccd776..1dcb2cc2730 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: paths.h,v 1.55 2001/08/21 16:36:06 tgl Exp $
+ * $Id: paths.h,v 1.56 2001/10/18 16:11:42 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -31,6 +31,10 @@ extern int geqo_rels;
extern RelOptInfo *make_one_rel(Query *root);
extern RelOptInfo *make_fromexpr_rel(Query *root, FromExpr *from);
+#ifdef OPTIMIZER_DEBUG
+extern void debug_print_rel(Query *root, RelOptInfo *rel);
+#endif
+
/*
* indxpath.c
* routines to generate index paths
@@ -111,6 +115,8 @@ extern List *build_join_pathkeys(Query *root,
List *outer_pathkeys);
extern List *make_pathkeys_for_sortclauses(List *sortclauses,
List *tlist);
+extern void cache_mergeclause_pathkeys(Query *root,
+ RestrictInfo *restrictinfo);
extern List *find_mergeclauses_for_pathkeys(Query *root,
List *pathkeys,
List *restrictinfos);