diff options
Diffstat (limited to 'src/backend/optimizer')
21 files changed, 1358 insertions, 477 deletions
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README index a867cd885ed..38901ede1fd 100644 --- a/src/backend/optimizer/README +++ b/src/backend/optimizer/README @@ -35,10 +35,10 @@ RelOptInfo.pathlist. (Actually, we discard Paths that are obviously inferior alternatives before they ever get into the pathlist --- what ends up in the pathlist is the cheapest way of generating each potentially useful sort ordering of the relation.) Also create RelOptInfo.joininfo -nodes that list all the joins that involve this relation. For example, -the WHERE clause "tab1.col1 = tab2.col1" generates a JoinInfo for tab1 -listing tab2 as an unjoined relation, and also one for tab2 showing tab1 -as an unjoined relation. +nodes that list all the join clauses that involve this relation. For +example, the WHERE clause "tab1.col1 = tab2.col1" generates a JoinInfo +for tab1 listing tab2 as an unjoined relation, and also one for tab2 +showing tab1 as an unjoined relation. If we have only a single base relation in the query, we are done now. Otherwise we have to figure out how to join the base relations into a @@ -128,6 +128,19 @@ Once we have built the final join rel, we use either the cheapest path for it or the cheapest path with the desired ordering (if that's cheaper than applying a sort to the cheapest other path). +The above dynamic-programming search is only conducted for simple cross +joins (ie, SELECT FROM tab1, tab2, ...). When the FROM clause contains +explicit JOIN clauses, we join rels in exactly the order implied by the +join tree. Searching for the best possible join order is done only at +the top implicit-cross-join level. For example, in + SELECT FROM tab1, tab2, (tab3 NATURAL JOIN tab4) +we will always join tab3 to tab4 and then consider all ways to join that +result to tab1 and tab2. Note that the JOIN syntax only constrains the +order of joining --- we will still consider all available Paths and +join methods for each JOIN operator. We also consider both sides of +the JOIN operator as inner or outer (so that we can transform RIGHT JOIN +into LEFT JOIN). + Optimizer Functions ------------------- @@ -158,13 +171,12 @@ planner() get a target list that only contains column names, no expressions if none, then return ---subplanner() - make list of relations in target - make list of relations in where clause + make list of base relations used in query split up the qual into restrictions (a=1) and joins (b=c) - find relation clauses can do merge sort and hash joins + find relation clauses that can do merge sort and hash joins ----make_one_rel() set_base_rel_pathlist() - find scan and all index paths for each relation + find scan and all index paths for each base relation find selectivity of columns used in joins -----make_one_rel_by_joins() jump to geqo if needed diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c index 7b9542cb1b2..f32b0d64eeb 100644 --- a/src/backend/optimizer/geqo/geqo_eval.c +++ b/src/backend/optimizer/geqo/geqo_eval.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: geqo_eval.c,v 1.53 2000/07/28 02:13:16 tgl Exp $ + * $Id: geqo_eval.c,v 1.54 2000/09/12 21:06:50 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -93,11 +93,11 @@ geqo_eval(Query *root, Gene *tour, int num_gene) * Returns a new join relation incorporating all joins in a left-sided tree. */ RelOptInfo * -gimme_tree(Query *root, Gene *tour, int rel_count, int num_gene, RelOptInfo *old_rel) +gimme_tree(Query *root, Gene *tour, int rel_count, int num_gene, + RelOptInfo *old_rel) { RelOptInfo *inner_rel; /* current relation */ int base_rel_index; - RelOptInfo *new_rel; if (rel_count < num_gene) { /* tree not yet finished */ @@ -116,16 +116,22 @@ gimme_tree(Query *root, Gene *tour, int rel_count, int num_gene, RelOptInfo *old else { /* tree main part */ List *acceptable_rels = lcons(inner_rel, NIL); - - new_rel = make_rels_by_clause_joins(root, old_rel, - acceptable_rels); - if (!new_rel) + List *new_rels; + RelOptInfo *new_rel; + + new_rels = make_rels_by_clause_joins(root, old_rel, + acceptable_rels); + /* Shouldn't get more than one result */ + Assert(length(new_rels) <= 1); + if (new_rels == NIL) { - new_rel = make_rels_by_clauseless_joins(root, old_rel, - acceptable_rels); - if (!new_rel) + new_rels = make_rels_by_clauseless_joins(root, old_rel, + acceptable_rels); + Assert(length(new_rels) <= 1); + if (new_rels == NIL) elog(ERROR, "gimme_tree: failed to construct join rel"); } + new_rel = (RelOptInfo *) lfirst(new_rels); rel_count++; Assert(length(new_rel->relids) == rel_count); diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 999364d5637..605b60b5845 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.62 2000/05/31 00:28:22 petere Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.63 2000/09/12 21:06:52 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -26,7 +26,9 @@ int geqo_rels = DEFAULT_GEQO_RELS; static void set_base_rel_pathlist(Query *root); -static RelOptInfo *make_one_rel_by_joins(Query *root, int levels_needed); +static List *build_jointree_rels(Query *root); +static RelOptInfo *make_one_rel_by_joins(Query *root, int levels_needed, + List *initial_rels); #ifdef OPTIMIZER_DEBUG static void debug_print_rel(Query *root, RelOptInfo *rel); @@ -43,27 +45,38 @@ RelOptInfo * make_one_rel(Query *root) { int levels_needed; + List *initial_rels; /* - * Set the number of join (not nesting) levels yet to be processed. + * Count the number of top-level jointree nodes. This is the depth + * of the dynamic-programming algorithm we must employ to consider + * all ways of joining the top-level nodes. Currently, we build + * JoinExpr joins in exactly the order implied by the join expression, + * so no dynamic-programming search is needed within a JoinExpr. */ - levels_needed = length(root->base_rel_list); + levels_needed = length(root->jointree); if (levels_needed <= 0) - return NULL; + return NULL; /* nothing to do? */ /* * Generate access paths for the base rels. */ set_base_rel_pathlist(root); + /* + * Construct a list of rels corresponding to the toplevel jointree nodes. + * This may contain both base rels and rels constructed according to + * explicit JOIN directives. + */ + initial_rels = build_jointree_rels(root); + if (levels_needed == 1) { - /* - * Single relation, no more processing is required. + * Single jointree node, so we're done. */ - return (RelOptInfo *) lfirst(root->base_rel_list); + return (RelOptInfo *) lfirst(initial_rels); } else { @@ -71,7 +84,7 @@ make_one_rel(Query *root) /* * Generate join tree. */ - return make_one_rel_by_joins(root, levels_needed); + return make_one_rel_by_joins(root, levels_needed, initial_rels); } } @@ -126,19 +139,46 @@ set_base_rel_pathlist(Query *root) } /* + * build_jointree_rels + * Construct a RelOptInfo for each item in the query's jointree. + * + * At present, we handle explicit joins in the FROM clause exactly as + * specified, with no search for other join orders. Only the cross-product + * joins at the top level are involved in the dynamic-programming search. + */ +static List * +build_jointree_rels(Query *root) +{ + List *rels = NIL; + List *jt; + + foreach(jt, root->jointree) + { + Node *jtnode = (Node *) lfirst(jt); + + rels = lappend(rels, make_rel_from_jointree(root, jtnode)); + } + return rels; +} + +/* * make_one_rel_by_joins * Find all possible joinpaths for a query by successively finding ways * to join component relations into join relations. * * 'levels_needed' is the number of iterations needed, ie, the number of - * base relations present in the query + * independent jointree items in the query. This is > 1. + * + * 'initial_rels' is a list of RelOptInfo nodes for each independent + * jointree item. These are the components to be joined together. * * Returns the final level of join relations, i.e., the relation that is * the result of joining all the original relations together. */ static RelOptInfo * -make_one_rel_by_joins(Query *root, int levels_needed) +make_one_rel_by_joins(Query *root, int levels_needed, List *initial_rels) { + List **joinitems; int lev; RelOptInfo *rel; @@ -152,34 +192,35 @@ make_one_rel_by_joins(Query *root, int levels_needed) /* * We employ a simple "dynamic programming" algorithm: we first find - * all ways to build joins of two base relations, then all ways to - * build joins of three base relations (from two-base-rel joins and - * other base rels), then four-base-rel joins, and so on until we have - * considered all ways to join all N relations into one rel. + * all ways to build joins of two jointree items, then all ways to + * build joins of three items (from two-item joins and single items), + * then four-item joins, and so on until we have considered all ways + * to join all the items into one rel. + * + * joinitems[j] is a list of all the j-item rels. Initially we set + * joinitems[1] to represent all the single-jointree-item relations. */ + joinitems = (List **) palloc((levels_needed+1) * sizeof(List *)); + MemSet(joinitems, 0, (levels_needed+1) * sizeof(List *)); + + joinitems[1] = initial_rels; for (lev = 2; lev <= levels_needed; lev++) { - List *first_old_rel = root->join_rel_list; List *x; /* * Determine all possible pairs of relations to be joined at this * level, and build paths for making each one from every available - * pair of lower-level relations. Results are prepended to - * root->join_rel_list. + * pair of lower-level relations. */ - make_rels_by_joins(root, lev); + joinitems[lev] = make_rels_by_joins(root, lev, joinitems); /* - * The relations created at the current level will appear at the - * front of root->join_rel_list. + * Do cleanup work on each just-processed rel. */ - foreach(x, root->join_rel_list) + foreach(x, joinitems[lev]) { - if (x == first_old_rel) - break; /* no more rels added at this level */ - rel = (RelOptInfo *) lfirst(x); #ifdef NOT_USED @@ -202,14 +243,12 @@ make_one_rel_by_joins(Query *root, int levels_needed) } /* - * Now, the front of the join_rel_list should be the single rel + * We should have a single rel at the final level, * representing the join of all the base rels. */ - Assert(length(root->join_rel_list) > 0); - rel = (RelOptInfo *) lfirst(root->join_rel_list); - Assert(length(rel->relids) == levels_needed); - Assert(length(root->join_rel_list) == 1 || - length(((RelOptInfo *) lsecond(root->join_rel_list))->relids) < levels_needed); + Assert(length(joinitems[levels_needed]) == 1); + rel = (RelOptInfo *) lfirst(joinitems[levels_needed]); + Assert(length(rel->relids) == length(root->base_rel_list)); return rel; } diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 05f32d25972..3156a951314 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.94 2000/08/24 03:29:04 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.95 2000/09/12 21:06:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1482,7 +1482,9 @@ index_innerjoin(Query *root, RelOptInfo *rel, IndexOptInfo *index, { List *clausegroup = lfirst(i); IndexPath *pathnode = makeNode(IndexPath); - List *indexquals; + List *indexquals = NIL; + bool alljoinquals = true; + List *temp; /* XXX this code ought to be merged with create_index_path? */ @@ -1496,7 +1498,16 @@ index_innerjoin(Query *root, RelOptInfo *rel, IndexOptInfo *index, */ pathnode->path.pathkeys = NIL; - indexquals = get_actual_clauses(clausegroup); + /* extract bare indexqual clauses, check whether all from JOIN/ON */ + foreach(temp, clausegroup) + { + RestrictInfo *clause = (RestrictInfo *) lfirst(temp); + + indexquals = lappend(indexquals, clause->clause); + if (! clause->isjoinqual) + alljoinquals = false; + } + /* expand special operators to indexquals the executor can handle */ indexquals = expand_indexqual_conditions(indexquals); @@ -1514,6 +1525,8 @@ index_innerjoin(Query *root, RelOptInfo *rel, IndexOptInfo *index, /* joinrelids saves the rels needed on the outer side of the join */ pathnode->joinrelids = lfirst(outerrelids_list); + pathnode->alljoinquals = alljoinquals; + /* * We must compute the estimated number of output rows for the * indexscan. This is less than rel->rows because of the diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index c2ca38490e3..367e1ac9767 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.55 2000/05/30 00:49:47 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.56 2000/09/12 21:06:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -25,27 +25,32 @@ #include "utils/lsyscache.h" static void sort_inner_and_outer(Query *root, RelOptInfo *joinrel, - RelOptInfo *outerrel, RelOptInfo *innerrel, - List *restrictlist, List *mergeclause_list); + RelOptInfo *outerrel, RelOptInfo *innerrel, + List *restrictlist, List *mergeclause_list, + JoinType jointype); static void match_unsorted_outer(Query *root, RelOptInfo *joinrel, - RelOptInfo *outerrel, RelOptInfo *innerrel, - List *restrictlist, List *mergeclause_list); + RelOptInfo *outerrel, RelOptInfo *innerrel, + List *restrictlist, List *mergeclause_list, + JoinType jointype); #ifdef NOT_USED static void match_unsorted_inner(Query *root, RelOptInfo *joinrel, - RelOptInfo *outerrel, RelOptInfo *innerrel, - List *restrictlist, List *mergeclause_list); + RelOptInfo *outerrel, RelOptInfo *innerrel, + List *restrictlist, List *mergeclause_list, + JoinType jointype); #endif static void hash_inner_and_outer(Query *root, RelOptInfo *joinrel, - RelOptInfo *outerrel, RelOptInfo *innerrel, - List *restrictlist); -static Path *best_innerjoin(List *join_paths, List *outer_relid); + RelOptInfo *outerrel, RelOptInfo *innerrel, + List *restrictlist, JoinType jointype); +static Path *best_innerjoin(List *join_paths, List *outer_relid, + JoinType jointype); static Selectivity estimate_disbursion(Query *root, Var *var); static List *select_mergejoin_clauses(RelOptInfo *joinrel, - RelOptInfo *outerrel, - RelOptInfo *innerrel, - List *restrictlist); + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist, + JoinType jointype); /* @@ -64,6 +69,7 @@ add_paths_to_joinrel(Query *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, + JoinType jointype, List *restrictlist) { List *mergeclause_list = NIL; @@ -75,14 +81,15 @@ add_paths_to_joinrel(Query *root, mergeclause_list = select_mergejoin_clauses(joinrel, outerrel, innerrel, - restrictlist); + restrictlist, + jointype); /* * 1. Consider mergejoin paths where both relations must be explicitly * sorted. */ sort_inner_and_outer(root, joinrel, outerrel, innerrel, - restrictlist, mergeclause_list); + restrictlist, mergeclause_list, jointype); /* * 2. Consider paths where the outer relation need not be explicitly @@ -90,7 +97,7 @@ add_paths_to_joinrel(Query *root, * path is already ordered. */ match_unsorted_outer(root, joinrel, outerrel, innerrel, - restrictlist, mergeclause_list); + restrictlist, mergeclause_list, jointype); #ifdef NOT_USED @@ -107,7 +114,7 @@ add_paths_to_joinrel(Query *root, * other order. */ match_unsorted_inner(root, joinrel, outerrel, innerrel, - restrictlist, mergeclause_list); + restrictlist, mergeclause_list, jointype); #endif /* @@ -116,7 +123,7 @@ add_paths_to_joinrel(Query *root, */ if (enable_hashjoin) hash_inner_and_outer(root, joinrel, outerrel, innerrel, - restrictlist); + restrictlist, jointype); } /* @@ -131,6 +138,7 @@ add_paths_to_joinrel(Query *root, * clauses that apply to this join * 'mergeclause_list' is a list of RestrictInfo nodes for available * mergejoin clauses in this join + * 'jointype' is the type of join to do */ static void sort_inner_and_outer(Query *root, @@ -138,7 +146,8 @@ sort_inner_and_outer(Query *root, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, - List *mergeclause_list) + List *mergeclause_list, + JoinType jointype) { List *i; @@ -187,10 +196,10 @@ sort_inner_and_outer(Query *root, */ outerkeys = make_pathkeys_for_mergeclauses(root, curclause_list, - outerrel->targetlist); + outerrel); innerkeys = make_pathkeys_for_mergeclauses(root, curclause_list, - innerrel->targetlist); + innerrel); /* Build pathkeys representing output sort order. */ merge_pathkeys = build_join_pathkeys(outerkeys, joinrel->targetlist, @@ -204,6 +213,7 @@ sort_inner_and_outer(Query *root, */ add_path(joinrel, (Path *) create_mergejoin_path(joinrel, + jointype, outerrel->cheapest_total_path, innerrel->cheapest_total_path, restrictlist, @@ -243,6 +253,7 @@ sort_inner_and_outer(Query *root, * clauses that apply to this join * 'mergeclause_list' is a list of RestrictInfo nodes for available * mergejoin clauses in this join + * 'jointype' is the type of join to do */ static void match_unsorted_outer(Query *root, @@ -250,16 +261,33 @@ match_unsorted_outer(Query *root, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, - List *mergeclause_list) + List *mergeclause_list, + JoinType jointype) { + bool nestjoinOK; Path *bestinnerjoin; List *i; /* + * Nestloop only supports inner and left joins. + */ + switch (jointype) + { + case JOIN_INNER: + case JOIN_LEFT: + nestjoinOK = true; + break; + default: + nestjoinOK = false; + break; + } + + /* * Get the best innerjoin indexpath (if any) for this outer rel. It's * the same for all outer paths. */ - bestinnerjoin = best_innerjoin(innerrel->innerjoin, outerrel->relids); + bestinnerjoin = best_innerjoin(innerrel->innerjoin, outerrel->relids, + jointype); foreach(i, outerrel->pathlist) { @@ -282,31 +310,38 @@ match_unsorted_outer(Query *root, joinrel->targetlist, root->equi_key_list); - /* - * Always consider a nestloop join with this outer and cheapest- - * total-cost inner. Consider nestloops using the cheapest- - * startup-cost inner as well, and the best innerjoin indexpath. - */ - add_path(joinrel, (Path *) - create_nestloop_path(joinrel, - outerpath, - innerrel->cheapest_total_path, - restrictlist, - merge_pathkeys)); - if (innerrel->cheapest_startup_path != innerrel->cheapest_total_path) - add_path(joinrel, (Path *) - create_nestloop_path(joinrel, - outerpath, - innerrel->cheapest_startup_path, - restrictlist, - merge_pathkeys)); - if (bestinnerjoin != NULL) + if (nestjoinOK) + { + /* + * Always consider a nestloop join with this outer and cheapest- + * total-cost inner. Consider nestloops using the cheapest- + * startup-cost inner as well, and the best innerjoin indexpath. + */ add_path(joinrel, (Path *) create_nestloop_path(joinrel, + jointype, outerpath, - bestinnerjoin, + innerrel->cheapest_total_path, restrictlist, merge_pathkeys)); + if (innerrel->cheapest_startup_path != + innerrel->cheapest_total_path) + add_path(joinrel, (Path *) + create_nestloop_path(joinrel, + jointype, + outerpath, + innerrel->cheapest_startup_path, + restrictlist, + merge_pathkeys)); + if (bestinnerjoin != NULL) + add_path(joinrel, (Path *) + create_nestloop_path(joinrel, + jointype, + outerpath, + bestinnerjoin, + restrictlist, + merge_pathkeys)); + } /* Look for useful mergeclauses (if any) */ mergeclauses = find_mergeclauses_for_pathkeys(outerpath->pathkeys, @@ -319,7 +354,7 @@ match_unsorted_outer(Query *root, /* Compute the required ordering of the inner path */ innersortkeys = make_pathkeys_for_mergeclauses(root, mergeclauses, - innerrel->targetlist); + innerrel); /* * Generate a mergejoin on the basis of sorting the cheapest @@ -328,6 +363,7 @@ match_unsorted_outer(Query *root, */ add_path(joinrel, (Path *) create_mergejoin_path(joinrel, + jointype, outerpath, innerrel->cheapest_total_path, restrictlist, @@ -373,6 +409,7 @@ match_unsorted_outer(Query *root, newclauses = mergeclauses; add_path(joinrel, (Path *) create_mergejoin_path(joinrel, + jointype, outerpath, innerpath, restrictlist, @@ -409,6 +446,7 @@ match_unsorted_outer(Query *root, } add_path(joinrel, (Path *) create_mergejoin_path(joinrel, + jointype, outerpath, innerpath, restrictlist, @@ -437,6 +475,7 @@ match_unsorted_outer(Query *root, * clauses that apply to this join * 'mergeclause_list' is a list of RestrictInfo nodes for available * mergejoin clauses in this join + * 'jointype' is the type of join to do */ static void match_unsorted_inner(Query *root, @@ -444,7 +483,8 @@ match_unsorted_inner(Query *root, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, - List *mergeclause_list) + List *mergeclause_list, + JoinType jointype) { List *i; @@ -466,7 +506,7 @@ match_unsorted_inner(Query *root, /* Compute the required ordering of the outer path */ outersortkeys = make_pathkeys_for_mergeclauses(root, mergeclauses, - outerrel->targetlist); + outerrel); /* * Generate a mergejoin on the basis of sorting the cheapest @@ -478,6 +518,7 @@ match_unsorted_inner(Query *root, root->equi_key_list); add_path(joinrel, (Path *) create_mergejoin_path(joinrel, + jointype, outerrel->cheapest_total_path, innerpath, restrictlist, @@ -506,6 +547,7 @@ match_unsorted_inner(Query *root, root->equi_key_list); add_path(joinrel, (Path *) create_mergejoin_path(joinrel, + jointype, totalouterpath, innerpath, restrictlist, @@ -524,6 +566,7 @@ match_unsorted_inner(Query *root, root->equi_key_list); add_path(joinrel, (Path *) create_mergejoin_path(joinrel, + jointype, startupouterpath, innerpath, restrictlist, @@ -547,19 +590,37 @@ match_unsorted_inner(Query *root, * 'innerrel' is the inner join relation * 'restrictlist' contains all of the RestrictInfo nodes for restriction * clauses that apply to this join + * 'jointype' is the type of join to do */ static void hash_inner_and_outer(Query *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, - List *restrictlist) + List *restrictlist, + JoinType jointype) { Relids outerrelids = outerrel->relids; Relids innerrelids = innerrel->relids; + bool isouterjoin; List *i; /* + * Hashjoin only supports inner and left joins. + */ + switch (jointype) + { + case JOIN_INNER: + isouterjoin = false; + break; + case JOIN_LEFT: + isouterjoin = true; + break; + default: + return; + } + + /* * Scan the join's restrictinfo list to find hashjoinable clauses that * are usable with this pair of sub-relations. Since we currently * accept only var-op-var clauses as hashjoinable, we need only check @@ -581,6 +642,13 @@ hash_inner_and_outer(Query *root, if (restrictinfo->hashjoinoperator == InvalidOid) continue; /* not hashjoinable */ + /* + * If processing an outer join, only use explicit join clauses for + * hashing. For inner joins we need not be so picky. + */ + if (isouterjoin && !restrictinfo->isjoinqual) + continue; + clause = restrictinfo->clause; /* these must be OK, since check_hashjoinable accepted the clause */ left = get_leftop(clause); @@ -609,6 +677,7 @@ hash_inner_and_outer(Query *root, */ add_path(joinrel, (Path *) create_hashjoin_path(joinrel, + jointype, outerrel->cheapest_total_path, innerrel->cheapest_total_path, restrictlist, @@ -617,6 +686,7 @@ hash_inner_and_outer(Query *root, if (outerrel->cheapest_startup_path != outerrel->cheapest_total_path) add_path(joinrel, (Path *) create_hashjoin_path(joinrel, + jointype, outerrel->cheapest_startup_path, innerrel->cheapest_total_path, restrictlist, @@ -641,26 +711,49 @@ hash_inner_and_outer(Query *root, * usable path. */ static Path * -best_innerjoin(List *join_paths, Relids outer_relids) +best_innerjoin(List *join_paths, Relids outer_relids, JoinType jointype) { Path *cheapest = (Path *) NULL; + bool isouterjoin; List *join_path; + /* + * Nestloop only supports inner and left joins. + */ + switch (jointype) + { + case JOIN_INNER: + isouterjoin = false; + break; + case JOIN_LEFT: + isouterjoin = true; + break; + default: + return NULL; + } + foreach(join_path, join_paths) { - Path *path = (Path *) lfirst(join_path); + IndexPath *path = (IndexPath *) lfirst(join_path); Assert(IsA(path, IndexPath)); /* + * If processing an outer join, only use explicit join clauses in the + * inner indexscan. For inner joins we need not be so picky. + */ + if (isouterjoin && !path->alljoinquals) + continue; + + /* * path->joinrelids is the set of base rels that must be part of * outer_relids in order to use this inner path, because those * rels are used in the index join quals of this inner path. */ - if (is_subseti(((IndexPath *) path)->joinrelids, outer_relids) && + if (is_subseti(path->joinrelids, outer_relids) && (cheapest == NULL || - compare_path_costs(path, cheapest, TOTAL_COST) < 0)) - cheapest = path; + compare_path_costs((Path *) path, cheapest, TOTAL_COST) < 0)) + cheapest = (Path *) path; } return cheapest; } @@ -684,6 +777,9 @@ estimate_disbursion(Query *root, Var *var) relid = getrelid(var->varno, root->rtable); + if (relid == InvalidOid) + return 0.1; + return (Selectivity) get_attdisbursion(relid, var->varattno, 0.1); } @@ -707,11 +803,13 @@ static List * select_mergejoin_clauses(RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, - List *restrictlist) + List *restrictlist, + JoinType jointype) { List *result_list = NIL; Relids outerrelids = outerrel->relids; Relids innerrelids = innerrel->relids; + bool isouterjoin = IS_OUTER_JOIN(jointype); List *i; foreach(i, restrictlist) @@ -721,6 +819,37 @@ select_mergejoin_clauses(RelOptInfo *joinrel, Var *left, *right; + /* + * If processing an outer join, only use explicit join clauses in the + * merge. For inner joins we need not be so picky. + * + * Furthermore, if it is a right/full join then *all* the explicit + * join clauses must be mergejoinable, else the executor will fail. + * If we are asked for a right join then just return NIL to indicate + * no mergejoin is possible (we can handle it as a left join instead). + * If we are asked for a full join then emit an error, because there + * is no fallback. + */ + if (isouterjoin) + { + if (!restrictinfo->isjoinqual) + continue; + switch (jointype) + { + case JOIN_RIGHT: + if (restrictinfo->mergejoinoperator == InvalidOid) + return NIL; /* not mergejoinable */ + break; + case JOIN_FULL: + if (restrictinfo->mergejoinoperator == InvalidOid) + elog(ERROR, "FULL JOIN is only supported with mergejoinable join conditions"); + break; + default: + /* otherwise, it's OK to have nonmergeable join quals */ + break; + } + } + if (restrictinfo->mergejoinoperator == InvalidOid) continue; /* not mergejoinable */ diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 741efe928c2..3cab2daba5c 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.46 2000/05/30 00:49:47 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.47 2000/09/12 21:06:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -19,62 +19,52 @@ static RelOptInfo *make_join_rel(Query *root, RelOptInfo *rel1, - RelOptInfo *rel2); + RelOptInfo *rel2, JoinType jointype); /* * make_rels_by_joins * Consider ways to produce join relations containing exactly 'level' - * base relations. (This is one step of the dynamic-programming method + * jointree items. (This is one step of the dynamic-programming method * embodied in make_one_rel_by_joins.) Join rel nodes for each feasible - * combination of base rels are created and added to the front of the - * query's join_rel_list. Implementation paths are created for each - * such joinrel, too. + * combination of lower-level rels are created and returned in a list. + * Implementation paths are created for each such joinrel, too. * - * Returns nothing, but adds entries to root->join_rel_list. + * level: level of rels we want to make this time. + * joinrels[j], 1 <= j < level, is a list of rels containing j items. */ -void -make_rels_by_joins(Query *root, int level) +List * +make_rels_by_joins(Query *root, int level, List **joinrels) { - List *first_old_rel = root->join_rel_list; + List *result_rels = NIL; + List *new_rels; + List *nr; List *r; + int k; /* * First, consider left-sided and right-sided plans, in which rels of - * exactly level-1 member relations are joined against base relations. - * We prefer to join using join clauses, but if we find a rel of - * level-1 members that has no join clauses, we will generate - * Cartesian-product joins against all base rels not already contained - * in it. + * exactly level-1 member relations are joined against initial relations. + * We prefer to join using join clauses, but if we find a rel of level-1 + * members that has no join clauses, we will generate Cartesian-product + * joins against all initial rels not already contained in it. * - * In the first pass (level == 2), we try to join each base rel to each - * base rel that appears later in base_rel_list. (The mirror-image + * In the first pass (level == 2), we try to join each initial rel to each + * initial rel that appears later in joinrels[1]. (The mirror-image * joins are handled automatically by make_join_rel.) In later - * passes, we try to join rels of size level-1 from join_rel_list to - * each base rel in base_rel_list. - * - * We assume that the rels already present in join_rel_list appear in - * decreasing order of level (number of members). This should be true - * since we always add new higher-level rels to the front of the list. + * passes, we try to join rels of size level-1 from joinrels[level-1] + * to each initial rel in joinrels[1]. */ - if (level == 2) - r = root->base_rel_list;/* level-1 is base rels */ - else - r = root->join_rel_list; - for (; r != NIL; r = lnext(r)) + foreach(r, joinrels[level-1]) { RelOptInfo *old_rel = (RelOptInfo *) lfirst(r); - int old_level = length(old_rel->relids); List *other_rels; - if (old_level != level - 1) - break; - if (level == 2) - other_rels = lnext(r); /* only consider remaining base + other_rels = lnext(r); /* only consider remaining initial * rels */ else - other_rels = root->base_rel_list; /* consider all base rels */ + other_rels = joinrels[1]; /* consider all initial rels */ if (old_rel->joininfo != NIL) { @@ -87,9 +77,9 @@ make_rels_by_joins(Query *root, int level) * have those other rels collected into a join rel. See also * the last-ditch case below. */ - make_rels_by_clause_joins(root, - old_rel, - other_rels); + new_rels = make_rels_by_clause_joins(root, + old_rel, + other_rels); } else { @@ -98,64 +88,90 @@ make_rels_by_joins(Query *root, int level) * Oops, we have a relation that is not joined to any other * relation. Cartesian product time. */ - make_rels_by_clauseless_joins(root, - old_rel, - other_rels); + new_rels = make_rels_by_clauseless_joins(root, + old_rel, + other_rels); + } + + /* + * At levels above 2 we will generate the same joined relation + * in multiple ways --- for example (a join b) join c is the same + * RelOptInfo as (b join c) join a, though the second case will + * add a different set of Paths to it. To avoid making extra work + * for subsequent passes, do not enter the same RelOptInfo into our + * output list multiple times. + */ + foreach(nr, new_rels) + { + RelOptInfo *jrel = (RelOptInfo *) lfirst(nr); + + if (!ptrMember(jrel, result_rels)) + result_rels = lcons(jrel, result_rels); } } /* - * Now, consider "bushy plans" in which relations of k base rels are - * joined to relations of level-k base rels, for 2 <= k <= level-2. - * The previous loop left r pointing to the first rel of level - * level-2. + * Now, consider "bushy plans" in which relations of k initial rels are + * joined to relations of level-k initial rels, for 2 <= k <= level-2. * * We only consider bushy-plan joins for pairs of rels where there is a * suitable join clause, in order to avoid unreasonable growth of * planning time. */ - for (; r != NIL; r = lnext(r)) + for (k = 2; ; k++) { - RelOptInfo *old_rel = (RelOptInfo *) lfirst(r); - int old_level = length(old_rel->relids); - List *r2; + int other_level = level - k; /* - * We can quit once past the halfway point (make_join_rel took - * care of making the opposite-direction joins) + * Since make_join_rel(x, y) handles both x,y and y,x cases, + * we only need to go as far as the halfway point. */ - if (old_level * 2 < level) + if (k > other_level) break; - if (old_rel->joininfo == NIL) - continue; /* we ignore clauseless joins here */ - - foreach(r2, lnext(r)) + foreach(r, joinrels[k]) { - RelOptInfo *new_rel = (RelOptInfo *) lfirst(r2); - int new_level = length(new_rel->relids); - - if (old_level + new_level > level) - continue; /* scan down to new_rels of right size */ - if (old_level + new_level < level) - break; /* no more new_rels of right size */ - if (nonoverlap_setsi(old_rel->relids, new_rel->relids)) + RelOptInfo *old_rel = (RelOptInfo *) lfirst(r); + List *other_rels; + List *r2; + + if (old_rel->joininfo == NIL) + continue; /* we ignore clauseless joins here */ + + if (k == other_level) + other_rels = lnext(r); /* only consider remaining rels */ + else + other_rels = joinrels[other_level]; + + foreach(r2, other_rels) { - List *i; - - /* - * OK, we can build a rel of the right level from this - * pair of rels. Do so if there is at least one usable - * join clause. - */ - foreach(i, old_rel->joininfo) - { - JoinInfo *joininfo = (JoinInfo *) lfirst(i); + RelOptInfo *new_rel = (RelOptInfo *) lfirst(r2); - if (is_subseti(joininfo->unjoined_relids, new_rel->relids)) + if (nonoverlap_setsi(old_rel->relids, new_rel->relids)) + { + List *i; + + /* + * OK, we can build a rel of the right level from this + * pair of rels. Do so if there is at least one usable + * join clause. + */ + foreach(i, old_rel->joininfo) { - make_join_rel(root, old_rel, new_rel); - break; + JoinInfo *joininfo = (JoinInfo *) lfirst(i); + + if (is_subseti(joininfo->unjoined_relids, + new_rel->relids)) + { + RelOptInfo *jrel; + + jrel = make_join_rel(root, old_rel, new_rel, + JOIN_INNER); + /* Avoid making duplicate entries ... */ + if (!ptrMember(jrel, result_rels)) + result_rels = lcons(jrel, result_rels); + break; /* need not consider more joininfos */ + } } } } @@ -174,39 +190,41 @@ make_rels_by_joins(Query *root, int level) * no choice but to make cartesian joins. We consider only left-sided * and right-sided cartesian joins in this case (no bushy). */ - if (root->join_rel_list == first_old_rel) + if (result_rels == NIL) { /* This loop is just like the first one, except we always call * make_rels_by_clauseless_joins(). */ - if (level == 2) - r = root->base_rel_list; /* level-1 is base rels */ - else - r = root->join_rel_list; - for (; r != NIL; r = lnext(r)) + foreach(r, joinrels[level-1]) { RelOptInfo *old_rel = (RelOptInfo *) lfirst(r); - int old_level = length(old_rel->relids); List *other_rels; - if (old_level != level - 1) - break; - if (level == 2) - other_rels = lnext(r); /* only consider remaining base + other_rels = lnext(r); /* only consider remaining initial * rels */ else - other_rels = root->base_rel_list; /* consider all base rels */ + other_rels = joinrels[1]; /* consider all initial rels */ + + new_rels = make_rels_by_clauseless_joins(root, + old_rel, + other_rels); + + foreach(nr, new_rels) + { + RelOptInfo *jrel = (RelOptInfo *) lfirst(nr); - make_rels_by_clauseless_joins(root, - old_rel, - other_rels); + if (!ptrMember(jrel, result_rels)) + result_rels = lcons(jrel, result_rels); + } } - if (root->join_rel_list == first_old_rel) + if (result_rels == NIL) elog(ERROR, "make_rels_by_joins: failed to build any %d-way joins", level); } + + return result_rels; } /* @@ -214,28 +232,23 @@ make_rels_by_joins(Query *root, int level) * Build joins between the given relation 'old_rel' and other relations * that are mentioned within old_rel's joininfo nodes (i.e., relations * that participate in join clauses that 'old_rel' also participates in). - * The join rel nodes are added to root->join_rel_list. + * The join rel nodes are returned in a list. * * 'old_rel' is the relation entry for the relation to be joined * 'other_rels': other rels to be considered for joining * - * Currently, this is only used with base rels in other_rels, but it would - * work for joining to joinrels too, if the caller ensures there is no + * Currently, this is only used with initial rels in other_rels, but it + * will work for joining to joinrels too, if the caller ensures there is no * membership overlap between old_rel and the rels in other_rels. (We need - * no extra test for overlap for base rels, since the is_subset test can + * no extra test for overlap for initial rels, since the is_subset test can * only succeed when other_rel is not already part of old_rel.) - * - * Returns NULL if no suitable joins were found, else the last suitable - * joinrel processed. (The only caller who checks the return value is - * geqo_eval.c, and it sets things up so there can be no more than one - * "suitable" joinrel; so we don't bother with returning a list.) */ -RelOptInfo * +List * make_rels_by_clause_joins(Query *root, RelOptInfo *old_rel, List *other_rels) { - RelOptInfo *result = NULL; + List *result = NIL; List *i, *j; @@ -249,7 +262,9 @@ make_rels_by_clause_joins(Query *root, RelOptInfo *other_rel = (RelOptInfo *) lfirst(j); if (is_subseti(unjoined_relids, other_rel->relids)) - result = make_join_rel(root, old_rel, other_rel); + result = lcons(make_join_rel(root, old_rel, other_rel, + JOIN_INNER), + result); } } @@ -261,24 +276,20 @@ make_rels_by_clause_joins(Query *root, * Given a relation 'old_rel' and a list of other relations * 'other_rels', create a join relation between 'old_rel' and each * member of 'other_rels' that isn't already included in 'old_rel'. + * The join rel nodes are returned in a list. * * 'old_rel' is the relation entry for the relation to be joined * 'other_rels': other rels to be considered for joining * - * Currently, this is only used with base rels in other_rels, but it would + * Currently, this is only used with initial rels in other_rels, but it would * work for joining to joinrels too. - * - * Returns NULL if no suitable joins were found, else the last suitable - * joinrel processed. (The only caller who checks the return value is - * geqo_eval.c, and it sets things up so there can be no more than one - * "suitable" joinrel; so we don't bother with returning a list.) */ -RelOptInfo * +List * make_rels_by_clauseless_joins(Query *root, RelOptInfo *old_rel, List *other_rels) { - RelOptInfo *result = NULL; + List *result = NIL; List *i; foreach(i, other_rels) @@ -286,7 +297,9 @@ make_rels_by_clauseless_joins(Query *root, RelOptInfo *other_rel = (RelOptInfo *) lfirst(i); if (nonoverlap_setsi(other_rel->relids, old_rel->relids)) - result = make_join_rel(root, old_rel, other_rel); + result = lcons(make_join_rel(root, old_rel, other_rel, + JOIN_INNER), + result); } return result; @@ -294,16 +307,62 @@ make_rels_by_clauseless_joins(Query *root, /* + * make_rel_from_jointree + * Find or build a RelOptInfojoin rel representing a specific + * jointree item. For JoinExprs, we only consider the construction + * path that corresponds exactly to what the user wrote. + */ +RelOptInfo * +make_rel_from_jointree(Query *root, Node *jtnode) +{ + if (IsA(jtnode, RangeTblRef)) + { + int varno = ((RangeTblRef *) jtnode)->rtindex; + + return get_base_rel(root, varno); + } + else if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + RelOptInfo *rel, + *lrel, + *rrel; + + /* Recurse */ + lrel = make_rel_from_jointree(root, j->larg); + rrel = make_rel_from_jointree(root, j->rarg); + + /* Make this join rel */ + rel = make_join_rel(root, lrel, rrel, j->jointype); + + /* + * Since we are only going to consider this one way to do it, + * we're done generating Paths for this joinrel and can now select + * the cheapest. In fact we *must* do so now, since next level up + * will need it! + */ + set_cheapest(rel); + + return rel; + } + else + elog(ERROR, "make_rel_from_jointree: unexpected node type %d", + nodeTag(jtnode)); + return NULL; /* keep compiler quiet */ +} + + +/* * make_join_rel * Find or create a join RelOptInfo that represents the join of * the two given rels, and add to it path information for paths * created with the two rels as outer and inner rel. * (The join rel may already contain paths generated from other * pairs of rels that add up to the same set of base rels.) - * The join rel is stored in the query's join_rel_list. */ static RelOptInfo * -make_join_rel(Query *root, RelOptInfo *rel1, RelOptInfo *rel2) +make_join_rel(Query *root, RelOptInfo *rel1, RelOptInfo *rel2, + JoinType jointype) { RelOptInfo *joinrel; List *restrictlist; @@ -315,10 +374,39 @@ make_join_rel(Query *root, RelOptInfo *rel1, RelOptInfo *rel2) joinrel = get_join_rel(root, rel1, rel2, &restrictlist); /* - * We consider paths using each rel as both outer and inner. + * Consider paths using each rel as both outer and inner. */ - add_paths_to_joinrel(root, joinrel, rel1, rel2, restrictlist); - add_paths_to_joinrel(root, joinrel, rel2, rel1, restrictlist); + switch (jointype) + { + case JOIN_INNER: + add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_INNER, + restrictlist); + add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_INNER, + restrictlist); + break; + case JOIN_LEFT: + add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_LEFT, + restrictlist); + add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_RIGHT, + restrictlist); + break; + case JOIN_FULL: + add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_FULL, + restrictlist); + add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_FULL, + restrictlist); + break; + case JOIN_RIGHT: + add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_RIGHT, + restrictlist); + add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_LEFT, + restrictlist); + break; + default: + elog(ERROR, "make_join_rel: unsupported join type %d", + (int) jointype); + break; + } return joinrel; } diff --git a/src/backend/optimizer/path/orindxpath.c b/src/backend/optimizer/path/orindxpath.c index 62a02836fec..2a76f63eb7c 100644 --- a/src/backend/optimizer/path/orindxpath.c +++ b/src/backend/optimizer/path/orindxpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/orindxpath.c,v 1.40 2000/05/30 00:49:47 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/orindxpath.c,v 1.41 2000/09/12 21:06:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -101,6 +101,7 @@ create_or_index_paths(Query *root, /* This isn't a nestloop innerjoin, so: */ pathnode->joinrelids = NIL; /* no join clauses here */ + pathnode->alljoinquals = false; pathnode->rows = rel->rows; best_or_subclause_indices(root, diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 6d7b67bee3d..c6eccddab19 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -11,7 +11,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.24 2000/08/08 15:41:31 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.25 2000/09/12 21:06:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -694,8 +694,8 @@ find_mergeclauses_for_pathkeys(List *pathkeys, List *restrictinfos) * * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses * that will be used in a merge join. - * 'tlist' is a relation target list for either the inner or outer - * side of the proposed join rel. (Not actually needed anymore) + * 'rel' is the relation the pathkeys will apply to (ie, either the inner + * or outer side of the proposed join rel). * * Returns a pathkeys list that can be applied to the indicated relation. * @@ -706,7 +706,7 @@ find_mergeclauses_for_pathkeys(List *pathkeys, List *restrictinfos) List * make_pathkeys_for_mergeclauses(Query *root, List *mergeclauses, - List *tlist) + RelOptInfo *rel) { List *pathkeys = NIL; List *i; @@ -722,30 +722,37 @@ make_pathkeys_for_mergeclauses(Query *root, Assert(restrictinfo->mergejoinoperator != InvalidOid); /* - * Find the key and sortop needed for this mergeclause. - * - * Both sides of the mergeclause should appear in one of the query's - * pathkey equivalence classes, so it doesn't matter which one we - * use here. + * Which key and sortop is needed for this relation? */ key = (Node *) get_leftop(restrictinfo->clause); sortop = restrictinfo->left_sortop; + if (!IsA(key, Var) || + !intMember(((Var *) key)->varno, rel->relids)) + { + key = (Node *) get_rightop(restrictinfo->clause); + sortop = restrictinfo->right_sortop; + if (!IsA(key, Var) || + !intMember(((Var *) key)->varno, rel->relids)) + elog(ERROR, "make_pathkeys_for_mergeclauses: can't identify which side of mergeclause to use"); + } /* - * Find pathkey sublist for this sort item. We expect to find the - * canonical set including the mergeclause's left and right sides; - * if we get back just the one item, something is rotten. + * Find or create canonical pathkey sublist for this sort item. */ item = makePathKeyItem(key, sortop); pathkey = make_canonical_pathkey(root, item); - Assert(length(pathkey) > 1); /* - * Since the item we just made is not in the returned canonical - * set, we can free it --- this saves a useful amount of storage - * in a big join tree. + * Most of the time we will get back a canonical pathkey set + * including both the mergeclause's left and right sides (the only + * case where we don't is if the mergeclause appeared in an OUTER + * JOIN, which causes us not to generate an equijoin set from it). + * Therefore, most of the time the item we just made is not part + * of the returned structure, and we can free it. This check + * saves a useful amount of storage in a big join tree. */ - pfree(item); + if (item != (PathKeyItem *) lfirst(pathkey)) + pfree(item); pathkeys = lappend(pathkeys, pathkey); } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index c049f5d86b6..96dc3327b7f 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.95 2000/08/13 02:50:06 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.96 2000/09/12 21:06:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -42,36 +42,47 @@ static IndexScan *create_indexscan_node(Query *root, IndexPath *best_path, static TidScan *create_tidscan_node(TidPath *best_path, List *tlist, List *scan_clauses); static NestLoop *create_nestloop_node(NestPath *best_path, List *tlist, - List *clauses, Plan *outer_node, List *outer_tlist, - Plan *inner_node, List *inner_tlist); + List *joinclauses, List *otherclauses, + Plan *outer_node, List *outer_tlist, + Plan *inner_node, List *inner_tlist); static MergeJoin *create_mergejoin_node(MergePath *best_path, List *tlist, - List *clauses, Plan *outer_node, List *outer_tlist, - Plan *inner_node, List *inner_tlist); + List *joinclauses, List *otherclauses, + Plan *outer_node, List *outer_tlist, + Plan *inner_node, List *inner_tlist); static HashJoin *create_hashjoin_node(HashPath *best_path, List *tlist, - List *clauses, Plan *outer_node, List *outer_tlist, - Plan *inner_node, List *inner_tlist); + List *joinclauses, List *otherclauses, + Plan *outer_node, List *outer_tlist, + Plan *inner_node, List *inner_tlist); static List *fix_indxqual_references(List *indexquals, IndexPath *index_path); static List *fix_indxqual_sublist(List *indexqual, int baserelid, Oid relam, Form_pg_index index); static Node *fix_indxqual_operand(Node *node, int baserelid, Form_pg_index index, Oid *opclass); +static SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid); static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid, List *indxid, List *indxqual, List *indxqualorig, ScanDirection indexscandir); static TidScan *make_tidscan(List *qptlist, List *qpqual, Index scanrelid, List *tideval); -static NestLoop *make_nestloop(List *qptlist, List *qpqual, Plan *lefttree, - Plan *righttree); -static HashJoin *make_hashjoin(List *tlist, List *qpqual, - List *hashclauses, Plan *lefttree, Plan *righttree); +static NestLoop *make_nestloop(List *tlist, + List *joinclauses, List *otherclauses, + Plan *lefttree, Plan *righttree, + JoinType jointype); +static HashJoin *make_hashjoin(List *tlist, + List *joinclauses, List *otherclauses, + List *hashclauses, + Plan *lefttree, Plan *righttree, + JoinType jointype); static Hash *make_hash(List *tlist, Node *hashkey, Plan *lefttree); -static MergeJoin *make_mergejoin(List *tlist, List *qpqual, - List *mergeclauses, Plan *righttree, Plan *lefttree); +static MergeJoin *make_mergejoin(List *tlist, + List *joinclauses, List *otherclauses, + List *mergeclauses, + Plan *lefttree, Plan *righttree, + JoinType jointype); static void copy_path_costsize(Plan *dest, Path *src); static void copy_plan_costsize(Plan *dest, Plan *src); -static SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid); /* * create_plan @@ -195,7 +206,8 @@ create_join_node(Query *root, JoinPath *best_path, List *tlist) List *outer_tlist; Plan *inner_node; List *inner_tlist; - List *clauses; + List *joinclauses; + List *otherclauses; Join *retval = NULL; outer_node = create_plan(root, best_path->outerjoinpath); @@ -204,14 +216,25 @@ create_join_node(Query *root, JoinPath *best_path, List *tlist) inner_node = create_plan(root, best_path->innerjoinpath); inner_tlist = inner_node->targetlist; - clauses = get_actual_clauses(best_path->joinrestrictinfo); + if (IS_OUTER_JOIN(best_path->jointype)) + { + get_actual_join_clauses(best_path->joinrestrictinfo, + &joinclauses, &otherclauses); + } + else + { + /* We can treat all clauses alike for an inner join */ + joinclauses = get_actual_clauses(best_path->joinrestrictinfo); + otherclauses = NIL; + } switch (best_path->path.pathtype) { case T_MergeJoin: retval = (Join *) create_mergejoin_node((MergePath *) best_path, tlist, - clauses, + joinclauses, + otherclauses, outer_node, outer_tlist, inner_node, @@ -220,7 +243,8 @@ create_join_node(Query *root, JoinPath *best_path, List *tlist) case T_HashJoin: retval = (Join *) create_hashjoin_node((HashPath *) best_path, tlist, - clauses, + joinclauses, + otherclauses, outer_node, outer_tlist, inner_node, @@ -229,7 +253,8 @@ create_join_node(Query *root, JoinPath *best_path, List *tlist) case T_NestLoop: retval = (Join *) create_nestloop_node((NestPath *) best_path, tlist, - clauses, + joinclauses, + otherclauses, outer_node, outer_tlist, inner_node, @@ -411,30 +436,6 @@ create_indexscan_node(Query *root, return scan_node; } -static TidScan * -make_tidscan(List *qptlist, - List *qpqual, - Index scanrelid, - List *tideval) -{ - TidScan *node = makeNode(TidScan); - Plan *plan = &node->scan.plan; - - /* cost should be inserted by caller */ - plan->state = (EState *) NULL; - plan->targetlist = qptlist; - plan->qual = qpqual; - plan->lefttree = NULL; - plan->righttree = NULL; - node->scan.scanrelid = scanrelid; - node->tideval = copyObject(tideval); /* XXX do we really need a - * copy? */ - node->needRescan = false; - node->scan.scanstate = (CommonScanState *) NULL; - - return node; -} - /* * create_tidscan_node * Returns a tidscan node for the base relation scanned by 'best_path' @@ -488,7 +489,8 @@ create_tidscan_node(TidPath *best_path, List *tlist, List *scan_clauses) static NestLoop * create_nestloop_node(NestPath *best_path, List *tlist, - List *clauses, + List *joinclauses, + List *otherclauses, Plan *outer_node, List *outer_tlist, Plan *inner_node, @@ -535,7 +537,8 @@ create_nestloop_node(NestPath *best_path, * attnos, and may have been commuted as well). */ if (length(indxqualorig) == 1) /* single indexscan? */ - clauses = set_difference(clauses, lfirst(indxqualorig)); + joinclauses = set_difference(joinclauses, + lfirst(indxqualorig)); /* only refs to outer vars get changed in the inner indexqual */ innerscan->indxqualorig = join_references(indxqualorig, @@ -577,15 +580,26 @@ create_nestloop_node(NestPath *best_path, inner_node); } + /* + * Set quals to contain INNER/OUTER var references. + */ + joinclauses = join_references(joinclauses, + outer_tlist, + inner_tlist, + (Index) 0); + otherclauses = join_references(otherclauses, + outer_tlist, + inner_tlist, + (Index) 0); + join_node = make_nestloop(tlist, - join_references(clauses, - outer_tlist, - inner_tlist, - (Index) 0), + joinclauses, + otherclauses, outer_node, - inner_node); + inner_node, + best_path->jointype); - copy_path_costsize(&join_node->join, &best_path->path); + copy_path_costsize(&join_node->join.plan, &best_path->path); return join_node; } @@ -593,14 +607,14 @@ create_nestloop_node(NestPath *best_path, static MergeJoin * create_mergejoin_node(MergePath *best_path, List *tlist, - List *clauses, + List *joinclauses, + List *otherclauses, Plan *outer_node, List *outer_tlist, Plan *inner_node, List *inner_tlist) { - List *qpqual, - *mergeclauses; + List *mergeclauses; MergeJoin *join_node; mergeclauses = get_actual_clauses(best_path->path_mergeclauses); @@ -610,10 +624,18 @@ create_mergejoin_node(MergePath *best_path, * the list of quals that must be checked as qpquals. Set those * clauses to contain INNER/OUTER var references. */ - qpqual = join_references(set_difference(clauses, mergeclauses), - outer_tlist, - inner_tlist, - (Index) 0); + joinclauses = join_references(set_difference(joinclauses, mergeclauses), + outer_tlist, + inner_tlist, + (Index) 0); + + /* + * Fix the additional qpquals too. + */ + otherclauses = join_references(otherclauses, + outer_tlist, + inner_tlist, + (Index) 0); /* * Now set the references in the mergeclauses and rearrange them so @@ -640,13 +662,54 @@ create_mergejoin_node(MergePath *best_path, inner_node, best_path->innersortkeys); + /* + * The executor requires the inner side of a mergejoin to support "mark" + * and "restore" operations. Not all plan types do, so we must be careful + * not to generate an invalid plan. If necessary, an invalid inner plan + * can be handled by inserting a Materialize node. + * + * Since the inner side must be ordered, and only Sorts and IndexScans can + * create order to begin with, you might think there's no problem --- but + * you'd be wrong. Nestloop and merge joins can *preserve* the order of + * their inputs, so they can be selected as the input of a mergejoin, + * and that won't work in the present executor. + * + * Doing this here is a bit of a kluge since the cost of the Materialize + * wasn't taken into account in our earlier decisions. But Materialize + * is hard to estimate a cost for, and the above consideration shows that + * this is a rare case anyway, so this seems an acceptable way to proceed. + * + * This check must agree with ExecMarkPos/ExecRestrPos in + * executor/execAmi.c! + */ + switch (nodeTag(inner_node)) + { + case T_SeqScan: + case T_IndexScan: + case T_Material: + case T_Sort: + /* OK, these inner plans support mark/restore */ + break; + + default: + /* Ooops, need to materialize the inner plan */ + inner_node = (Plan *) make_material(inner_tlist, + inner_node); + break; + } + + /* + * Now we can build the mergejoin node. + */ join_node = make_mergejoin(tlist, - qpqual, + joinclauses, + otherclauses, mergeclauses, + outer_node, inner_node, - outer_node); + best_path->jpath.jointype); - copy_path_costsize(&join_node->join, &best_path->jpath.path); + copy_path_costsize(&join_node->join.plan, &best_path->jpath.path); return join_node; } @@ -654,13 +717,13 @@ create_mergejoin_node(MergePath *best_path, static HashJoin * create_hashjoin_node(HashPath *best_path, List *tlist, - List *clauses, + List *joinclauses, + List *otherclauses, Plan *outer_node, List *outer_tlist, Plan *inner_node, List *inner_tlist) { - List *qpqual; List *hashclauses; HashJoin *join_node; Hash *hash_node; @@ -679,10 +742,18 @@ create_hashjoin_node(HashPath *best_path, * the list of quals that must be checked as qpquals. Set those * clauses to contain INNER/OUTER var references. */ - qpqual = join_references(set_difference(clauses, hashclauses), - outer_tlist, - inner_tlist, - (Index) 0); + joinclauses = join_references(set_difference(joinclauses, hashclauses), + outer_tlist, + inner_tlist, + (Index) 0); + + /* + * Fix the additional qpquals too. + */ + otherclauses = join_references(otherclauses, + outer_tlist, + inner_tlist, + (Index) 0); /* * Now set the references in the hashclauses and rearrange them so @@ -701,12 +772,14 @@ create_hashjoin_node(HashPath *best_path, */ hash_node = make_hash(inner_tlist, innerhashkey, inner_node); join_node = make_hashjoin(tlist, - qpqual, + joinclauses, + otherclauses, hashclauses, outer_node, - (Plan *) hash_node); + (Plan *) hash_node, + best_path->jpath.jointype); - copy_path_costsize(&join_node->join, &best_path->jpath.path); + copy_path_costsize(&join_node->join.plan, &best_path->jpath.path); return join_node; } @@ -1065,45 +1138,75 @@ make_indexscan(List *qptlist, return node; } +static TidScan * +make_tidscan(List *qptlist, + List *qpqual, + Index scanrelid, + List *tideval) +{ + TidScan *node = makeNode(TidScan); + Plan *plan = &node->scan.plan; + + /* cost should be inserted by caller */ + plan->state = (EState *) NULL; + plan->targetlist = qptlist; + plan->qual = qpqual; + plan->lefttree = NULL; + plan->righttree = NULL; + node->scan.scanrelid = scanrelid; + node->tideval = copyObject(tideval); /* XXX do we really need a + * copy? */ + node->needRescan = false; + node->scan.scanstate = (CommonScanState *) NULL; + + return node; +} + static NestLoop * -make_nestloop(List *qptlist, - List *qpqual, +make_nestloop(List *tlist, + List *joinclauses, + List *otherclauses, Plan *lefttree, - Plan *righttree) + Plan *righttree, + JoinType jointype) { NestLoop *node = makeNode(NestLoop); - Plan *plan = &node->join; + Plan *plan = &node->join.plan; /* cost should be inserted by caller */ plan->state = (EState *) NULL; - plan->targetlist = qptlist; - plan->qual = qpqual; + plan->targetlist = tlist; + plan->qual = otherclauses; plan->lefttree = lefttree; plan->righttree = righttree; - node->nlstate = (NestLoopState *) NULL; + node->join.jointype = jointype; + node->join.joinqual = joinclauses; return node; } static HashJoin * make_hashjoin(List *tlist, - List *qpqual, + List *joinclauses, + List *otherclauses, List *hashclauses, Plan *lefttree, - Plan *righttree) + Plan *righttree, + JoinType jointype) { HashJoin *node = makeNode(HashJoin); - Plan *plan = &node->join; + Plan *plan = &node->join.plan; /* cost should be inserted by caller */ plan->state = (EState *) NULL; plan->targetlist = tlist; - plan->qual = qpqual; + plan->qual = otherclauses; plan->lefttree = lefttree; plan->righttree = righttree; node->hashclauses = hashclauses; - node->hashdone = false; + node->join.jointype = jointype; + node->join.joinqual = joinclauses; return node; } @@ -1133,21 +1236,25 @@ make_hash(List *tlist, Node *hashkey, Plan *lefttree) static MergeJoin * make_mergejoin(List *tlist, - List *qpqual, + List *joinclauses, + List *otherclauses, List *mergeclauses, + Plan *lefttree, Plan *righttree, - Plan *lefttree) + JoinType jointype) { MergeJoin *node = makeNode(MergeJoin); - Plan *plan = &node->join; + Plan *plan = &node->join.plan; /* cost should be inserted by caller */ plan->state = (EState *) NULL; plan->targetlist = tlist; - plan->qual = qpqual; + plan->qual = otherclauses; plan->lefttree = lefttree; plan->righttree = righttree; node->mergeclauses = mergeclauses; + node->join.jointype = jointype; + node->join.joinqual = joinclauses; return node; } diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 8ffd35c9bb0..bf728ca1bdc 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.49 2000/08/13 02:50:07 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.50 2000/09/12 21:06:54 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -26,13 +26,18 @@ #include "optimizer/planmain.h" #include "optimizer/tlist.h" #include "optimizer/var.h" +#include "parser/parsetree.h" #include "parser/parse_expr.h" #include "parser/parse_oper.h" #include "parser/parse_type.h" #include "utils/lsyscache.h" -static void add_restrict_and_join_to_rel(Query *root, Node *clause); +static void mark_baserels_for_outer_join(Query *root, Relids rels, + Relids outerrels); +static void add_restrict_and_join_to_rel(Query *root, Node *clause, + bool isjoinqual, + Relids outerjoinrelids); static void add_join_info_to_rels(Query *root, RestrictInfo *restrictinfo, Relids join_relids); static void add_vars_to_targetlist(Query *root, List *vars); @@ -47,14 +52,14 @@ static void check_hashjoinable(RestrictInfo *restrictinfo); *****************************************************************************/ /* - * make_var_only_tlist + * build_base_rel_tlists * Creates rel nodes for every relation mentioned in the target list * 'tlist' (if a node hasn't already been created) and adds them to - * *query_relation_list*. Creates targetlist entries for each member of - * 'tlist' and adds them to the tlist field of the appropriate rel node. + * root->base_rel_list. Creates targetlist entries for each var seen + * in 'tlist' and adds them to the tlist of the appropriate rel node. */ void -make_var_only_tlist(Query *root, List *tlist) +build_base_rel_tlists(Query *root, List *tlist) { List *tlist_vars = pull_var_clause((Node *) tlist, false); @@ -82,48 +87,75 @@ add_vars_to_targetlist(Query *root, List *vars) } } -/* +/*---------- * add_missing_rels_to_query * - * If we have a range variable in the FROM clause that does not appear + * If we have a relation listed in the join tree that does not appear * in the target list nor qualifications, we must add it to the base - * relation list so that it will be joined. For instance, "select f.x - * from foo f, foo f2" is a join of f and f2. Note that if we have - * "select foo.x from foo f", it also gets turned into a join (between - * foo as foo and foo as f). + * relation list so that it can be processed. For instance, + * select f.x from foo f, foo f2 + * is a join of f and f2. Note that if we have + * select foo.x from foo f + * this also gets turned into a join (between foo as foo and foo as f). * * To avoid putting useless entries into the per-relation targetlists, * this should only be called after all the variables in the targetlist * and quals have been processed by the routines above. + * + * Returns a list of all the base relations (RelOptInfo nodes) that appear + * in the join tree. This list can be used for cross-checking in the + * reverse direction, ie, that we have a join tree entry for every + * relation used in the query. + *---------- */ -void -add_missing_rels_to_query(Query *root) +List * +add_missing_rels_to_query(Query *root, Node *jtnode) { - int varno = 1; - List *l; + List *result = NIL; - foreach(l, root->rtable) + if (jtnode == NULL) + return NIL; + if (IsA(jtnode, List)) { - RangeTblEntry *rte = (RangeTblEntry *) lfirst(l); + List *l; - if (rte->inJoinSet) + foreach(l, (List *) jtnode) { - RelOptInfo *rel = get_base_rel(root, varno); + result = nconc(result, + add_missing_rels_to_query(root, lfirst(l))); + } + } + else if (IsA(jtnode, RangeTblRef)) + { + int varno = ((RangeTblRef *) jtnode)->rtindex; + RelOptInfo *rel = get_base_rel(root, varno); - /* - * If the rel isn't otherwise referenced, give it a dummy - * targetlist consisting of its own OID. - */ - if (rel->targetlist == NIL) - { - Var *var = makeVar(varno, ObjectIdAttributeNumber, - OIDOID, -1, 0); + /* + * If the rel isn't otherwise referenced, give it a dummy + * targetlist consisting of its own OID. + */ + if (rel->targetlist == NIL) + { + Var *var = makeVar(varno, ObjectIdAttributeNumber, + OIDOID, -1, 0); - add_var_to_tlist(rel, var); - } + add_var_to_tlist(rel, var); } - varno++; + + result = lcons(rel, NIL); } + else if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + + result = add_missing_rels_to_query(root, j->larg); + result = nconc(result, + add_missing_rels_to_query(root, j->rarg)); + } + else + elog(ERROR, "add_missing_rels_to_query: unexpected node type %d", + nodeTag(jtnode)); + return result; } @@ -135,10 +167,144 @@ add_missing_rels_to_query(Query *root) /* + * add_join_quals_to_rels + * Recursively scan the join tree for JOIN/ON (and JOIN/USING) qual + * clauses, and add these to the appropriate JoinInfo lists. Also, + * mark base RelOptInfos with outerjoinset information, which will + * be needed for proper placement of WHERE clauses during + * add_restrict_and_join_to_rels(). + * + * NOTE: when dealing with inner joins, it is appropriate to let a qual clause + * be evaluated at the lowest level where all the variables it mentions are + * available. However, we cannot do this within an outer join since the qual + * might eliminate matching rows and cause a NULL row to be added improperly. + * Therefore, rels appearing within (the nullable side of) an outer join + * are marked with outerjoinset = list of Relids used at the outer join node. + * This list will be added to the list of rels referenced by quals using + * such a rel, thereby forcing them up the join tree to the right level. + * + * To ease the calculation of these values, add_join_quals_to_rels() returns + * the list of Relids involved in its own level of join. This is just an + * internal convenience; no outside callers pay attention to the result. + */ +Relids +add_join_quals_to_rels(Query *root, Node *jtnode) +{ + Relids result = NIL; + + if (jtnode == NULL) + return result; + if (IsA(jtnode, List)) + { + List *l; + + /* + * Note: we assume it's impossible to see same RT index from more + * than one subtree, so nconc() is OK rather than LispUnioni(). + */ + foreach(l, (List *) jtnode) + result = nconc(result, + add_join_quals_to_rels(root, lfirst(l))); + } + else if (IsA(jtnode, RangeTblRef)) + { + int varno = ((RangeTblRef *) jtnode)->rtindex; + + /* No quals to deal with, just return correct result */ + result = lconsi(varno, NIL); + } + else if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + Relids leftids, + rightids, + outerjoinids; + List *qual; + + /* + * Order of operations here is subtle and critical. First we recurse + * to handle sub-JOINs. Their join quals will be placed without + * regard for whether this level is an outer join, which is correct. + * Then, if we are an outer join, we mark baserels contained within + * the nullable side(s) with our own rel list; this will restrict + * placement of subsequent quals using those rels, including our own + * quals, quals above us in the join tree, and WHERE quals. + * Finally we place our own join quals. + */ + leftids = add_join_quals_to_rels(root, j->larg); + rightids = add_join_quals_to_rels(root, j->rarg); + + result = nconc(listCopy(leftids), rightids); + + outerjoinids = NIL; + switch (j->jointype) + { + case JOIN_INNER: + /* Inner join adds no restrictions for quals */ + break; + case JOIN_LEFT: + mark_baserels_for_outer_join(root, rightids, result); + outerjoinids = result; + break; + case JOIN_FULL: + mark_baserels_for_outer_join(root, result, result); + outerjoinids = result; + break; + case JOIN_RIGHT: + mark_baserels_for_outer_join(root, leftids, result); + outerjoinids = result; + break; + case JOIN_UNION: + /* + * This is where we fail if upper levels of planner haven't + * rewritten UNION JOIN as an Append ... + */ + elog(ERROR, "UNION JOIN is not implemented yet"); + break; + default: + elog(ERROR, "add_join_quals_to_rels: unsupported join type %d", + (int) j->jointype); + break; + } + + foreach(qual, (List *) j->quals) + add_restrict_and_join_to_rel(root, (Node *) lfirst(qual), + true, outerjoinids); + } + else + elog(ERROR, "add_join_quals_to_rels: unexpected node type %d", + nodeTag(jtnode)); + return result; +} + +/* + * mark_baserels_for_outer_join + * Mark all base rels listed in 'rels' as having the given outerjoinset. + */ +static void +mark_baserels_for_outer_join(Query *root, Relids rels, Relids outerrels) +{ + List *relid; + + foreach(relid, rels) + { + RelOptInfo *rel = get_base_rel(root, lfirsti(relid)); + + /* + * Since we do this bottom-up, any outer-rels previously marked + * should be within the new outer join set. + */ + Assert(is_subseti(rel->outerjoinset, outerrels)); + + rel->outerjoinset = outerrels; + } +} + +/* * add_restrict_and_join_to_rels * Fill RestrictInfo and JoinInfo lists of relation entries for all * relations appearing within clauses. Creates new relation entries if - * necessary, adding them to *query_relation_list*. + * necessary, adding them to root->base_rel_list. * * 'clauses': the list of clauses in the cnfify'd query qualification. */ @@ -148,7 +314,8 @@ add_restrict_and_join_to_rels(Query *root, List *clauses) List *clause; foreach(clause, clauses) - add_restrict_and_join_to_rel(root, (Node *) lfirst(clause)); + add_restrict_and_join_to_rel(root, (Node *) lfirst(clause), + false, NIL); } /* @@ -157,17 +324,31 @@ add_restrict_and_join_to_rels(Query *root, List *clauses) * (depending on whether the clause is a join) of each base relation * mentioned in the clause. A RestrictInfo node is created and added to * the appropriate list for each rel. Also, if the clause uses a - * mergejoinable operator, enter the left- and right-side expressions - * into the query's lists of equijoined vars. + * mergejoinable operator and is not an outer-join qual, enter the left- + * and right-side expressions into the query's lists of equijoined vars. + * + * isjoinqual is true if the clause came from JOIN/ON or JOIN/USING; + * we have to mark the created RestrictInfo accordingly. If the JOIN + * is an OUTER join, the caller must set outerjoinrelids = all relids of join, + * which will override the joinrel identifiers extracted from the clause + * itself. For inner join quals and WHERE clauses, set outerjoinrelids = NIL. + * (Passing the whole list, and not just an "isouterjoin" boolean, is simply + * a speed optimization: we could extract the same list from the base rels' + * outerjoinsets, but since add_join_quals_to_rels() already knows what we + * should use, might as well pass it in instead of recalculating it.) */ static void -add_restrict_and_join_to_rel(Query *root, Node *clause) +add_restrict_and_join_to_rel(Query *root, Node *clause, + bool isjoinqual, + Relids outerjoinrelids) { RestrictInfo *restrictinfo = makeNode(RestrictInfo); Relids relids; List *vars; + bool can_be_equijoin; restrictinfo->clause = (Expr *) clause; + restrictinfo->isjoinqual = isjoinqual; restrictinfo->subclauseindices = NIL; restrictinfo->mergejoinoperator = InvalidOid; restrictinfo->left_sortop = InvalidOid; @@ -179,6 +360,44 @@ add_restrict_and_join_to_rel(Query *root, Node *clause) */ clause_get_relids_vars(clause, &relids, &vars); + /* + * If caller has given us a join relid list, use it; otherwise, we must + * scan the referenced base rels and add in any outer-join rel lists. + * This prevents the clause from being applied at a lower level of joining + * than any OUTER JOIN that should be evaluated before it. + */ + if (outerjoinrelids) + { + /* Safety check: parser should have enforced this to start with */ + if (! is_subseti(relids, outerjoinrelids)) + elog(ERROR, "JOIN qualification may not refer to other relations"); + relids = outerjoinrelids; + can_be_equijoin = false; + } + else + { + Relids newrelids = relids; + List *relid; + + /* We rely on LispUnioni to be nondestructive of its input lists... */ + can_be_equijoin = true; + foreach(relid, relids) + { + RelOptInfo *rel = get_base_rel(root, lfirsti(relid)); + + if (rel->outerjoinset) + { + newrelids = LispUnioni(newrelids, rel->outerjoinset); + /* + * Because application of the qual will be delayed by outer + * join, we mustn't assume its vars are equal everywhere. + */ + can_be_equijoin = false; + } + } + relids = newrelids; + } + if (length(relids) == 1) { @@ -199,7 +418,8 @@ add_restrict_and_join_to_rel(Query *root, Node *clause) * that "a.x = a.y AND a.x = b.z AND a.y = c.q" allows us to * consider z and q equal after their rels are joined. */ - check_mergejoinable(restrictinfo); + if (can_be_equijoin) + check_mergejoinable(restrictinfo); } else if (relids != NIL) { @@ -209,11 +429,11 @@ add_restrict_and_join_to_rel(Query *root, Node *clause) * the relid list. Set additional RestrictInfo fields for * joining. * - * We need the merge info whether or not mergejoin is enabled (for - * constructing equijoined-var lists), but we don't bother setting - * hash info if hashjoin is disabled. + * We don't bother setting the merge/hashjoin info if we're not + * going to need it. */ - check_mergejoinable(restrictinfo); + if (enable_mergejoin || can_be_equijoin) + check_mergejoinable(restrictinfo); if (enable_hashjoin) check_hashjoinable(restrictinfo); @@ -223,7 +443,7 @@ add_restrict_and_join_to_rel(Query *root, Node *clause) add_join_info_to_rels(root, restrictinfo, relids); /* - * Add vars used in the join clause to targetlists of member + * Add vars used in the join clause to targetlists of their * relations, so that they will be emitted by the plan nodes that * scan those relations (else they won't be available at the join * node!). @@ -241,12 +461,14 @@ add_restrict_and_join_to_rel(Query *root, Node *clause) } /* - * If the clause has a mergejoinable operator, then the two sides + * If the clause has a mergejoinable operator, and is not an outer-join + * qualification nor bubbled up due to an outer join, then the two sides * represent equivalent PathKeyItems for path keys: any path that is - * sorted by one side will also be sorted by the other (after joining, - * that is). Record the key equivalence for future use. + * sorted by one side will also be sorted by the other (as soon as the + * two rels are joined, that is). Record the key equivalence for future + * use. */ - if (restrictinfo->mergejoinoperator != InvalidOid) + if (can_be_equijoin && restrictinfo->mergejoinoperator != InvalidOid) add_equijoined_keys(root, restrictinfo); } @@ -392,7 +614,8 @@ process_implied_equality(Query *root, Node *item1, Node *item2, BOOLOID); /* operator result type */ clause->args = lcons(item1, lcons(item2, NIL)); - add_restrict_and_join_to_rel(root, (Node *) clause); + add_restrict_and_join_to_rel(root, (Node *) clause, + false, NIL); } diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index abb468aa8d1..1fcbe64e888 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -14,7 +14,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.58 2000/08/13 02:50:07 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.59 2000/09/12 21:06:54 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -28,6 +28,7 @@ #include "optimizer/paths.h" #include "optimizer/planmain.h" #include "optimizer/tlist.h" +#include "parser/parsetree.h" #include "utils/memutils.h" @@ -41,11 +42,8 @@ static Plan *subplanner(Query *root, List *flat_tlist, List *qual, * not any fancier features. * * tlist is the target list the query should produce (NOT root->targetList!) - * qual is the qualification of the query (likewise!) * tuple_fraction is the fraction of tuples we expect will be retrieved * - * qual must already have been converted to implicit-AND form. - * * Note: the Query node now also includes a query_pathkeys field, which * is both an input and an output of query_planner(). The input value * signals query_planner that the indicated sort order is wanted in the @@ -75,9 +73,9 @@ static Plan *subplanner(Query *root, List *flat_tlist, List *qual, Plan * query_planner(Query *root, List *tlist, - List *qual, double tuple_fraction) { + List *normal_qual; List *noncachable_qual; List *constant_qual; List *var_only_tlist; @@ -96,7 +94,7 @@ query_planner(Query *root, root->query_pathkeys = NIL; /* signal unordered result */ /* Make childless Result node to evaluate given tlist. */ - return (Plan *) make_result(tlist, (Node *) qual, (Plan *) NULL); + return (Plan *) make_result(tlist, root->qual, (Plan *) NULL); } /* @@ -111,10 +109,12 @@ query_planner(Query *root, * noncachable functions but no vars, such as "WHERE random() < 0.5". * These cannot be treated as normal restriction or join quals, but * they're not constants either. Instead, attach them to the qpqual - * of the top-level plan, so that they get evaluated once per potential + * of the top plan, so that they get evaluated once per potential * output tuple. */ - qual = pull_constant_clauses(qual, &noncachable_qual, &constant_qual); + normal_qual = pull_constant_clauses((List *) root->qual, + &noncachable_qual, + &constant_qual); /* * Create a target list that consists solely of (resdom var) target @@ -132,7 +132,7 @@ query_planner(Query *root, /* * Choose the best access path and build a plan for it. */ - subplan = subplanner(root, var_only_tlist, qual, tuple_fraction); + subplan = subplanner(root, var_only_tlist, normal_qual, tuple_fraction); /* * Handle the noncachable quals. @@ -188,6 +188,8 @@ subplanner(Query *root, List *qual, double tuple_fraction) { + List *joined_rels; + List *brel; RelOptInfo *final_rel; Plan *resultplan; MemoryContext mycontext; @@ -196,7 +198,7 @@ subplanner(Query *root, Path *presortedpath; /* - * Initialize the targetlist and qualification, adding entries to + * Examine the targetlist and qualifications, adding entries to * base_rel_list as relation references are found (e.g., in the * qualification, the targetlist, etc.). Restrict and join clauses * are added to appropriate lists belonging to the mentioned @@ -207,13 +209,29 @@ subplanner(Query *root, root->join_rel_list = NIL; root->equi_key_list = NIL; - make_var_only_tlist(root, flat_tlist); + build_base_rel_tlists(root, flat_tlist); + (void) add_join_quals_to_rels(root, (Node *) root->jointree); + /* this must happen after add_join_quals_to_rels: */ add_restrict_and_join_to_rels(root, qual); /* - * Make sure we have RelOptInfo nodes for all relations used. + * Make sure we have RelOptInfo nodes for all relations to be joined. + */ + joined_rels = add_missing_rels_to_query(root, (Node *) root->jointree); + + /* + * Check that the join tree includes all the base relations used in + * the query --- otherwise, the parser or rewriter messed up. */ - add_missing_rels_to_query(root); + foreach(brel, root->base_rel_list) + { + RelOptInfo *baserel = (RelOptInfo *) lfirst(brel); + int relid = lfirsti(baserel->relids); + + if (! ptrMember(baserel, joined_rels)) + elog(ERROR, "Internal error: no jointree entry for rel %s (%d)", + rt_fetch(relid, root->rtable)->eref->relname, relid); + } /* * Use the completed lists of equijoined keys to deduce any implied @@ -258,12 +276,11 @@ subplanner(Query *root, * We expect to end up here for a trivial INSERT ... VALUES query * (which will have a target relation, so it gets past * query_planner's check for empty range table; but the target rel - * is unreferenced and not marked inJoinSet, so we find there is - * nothing to join). + * is not in the join tree, so we find there is nothing to join). * * It's also possible to get here if the query was rewritten by the - * rule processor (creating rangetable entries not marked - * inJoinSet) but the rules either did nothing or were simplified + * rule processor (creating dummy rangetable entries that are not in + * the join tree) but the rules either did nothing or were simplified * to nothing by constant-expression folding. So, don't complain. */ root->query_pathkeys = NIL; /* signal unordered result */ diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 4be9b05bb90..7ffbb4666d9 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.88 2000/08/21 20:55:29 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.89 2000/09/12 21:06:54 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -29,6 +29,7 @@ #include "utils/lsyscache.h" +static void preprocess_join_conditions(Query *parse, Node *jtnode); static List *make_subplanTargetList(Query *parse, List *tlist, AttrNumber **groupColIdx); static Plan *make_groupplan(List *group_tlist, bool tuplePerGroup, @@ -163,6 +164,7 @@ subquery_planner(Query *parse, double tuple_fraction) * canonicalize_qual? */ parse->qual = (Node *) canonicalize_qual((Expr *) parse->qual, true); + #ifdef OPTIMIZER_DEBUG printf("After canonicalize_qual()\n"); pprint(parse->qual); @@ -211,6 +213,9 @@ subquery_planner(Query *parse, double tuple_fraction) parse->havingQual = SS_replace_correlation_vars(parse->havingQual); } + /* Do all the above for each qual condition (ON clause) in the join tree */ + preprocess_join_conditions(parse, (Node *) parse->jointree); + /* Do the main planning (potentially recursive) */ return union_planner(parse, tuple_fraction); @@ -224,6 +229,58 @@ subquery_planner(Query *parse, double tuple_fraction) */ } +/* + * preprocess_join_conditions + * Recursively scan the query's jointree and do subquery_planner's + * qual preprocessing work on each ON condition found therein. + */ +static void +preprocess_join_conditions(Query *parse, Node *jtnode) +{ + if (jtnode == NULL) + return; + if (IsA(jtnode, List)) + { + List *l; + + foreach(l, (List *) jtnode) + preprocess_join_conditions(parse, lfirst(l)); + } + else if (IsA(jtnode, RangeTblRef)) + { + /* nothing to do here */ + } + else if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + + preprocess_join_conditions(parse, j->larg); + preprocess_join_conditions(parse, j->rarg); + + /* Simplify constant expressions */ + j->quals = eval_const_expressions(j->quals); + + /* Canonicalize the qual, and convert it to implicit-AND format */ + j->quals = (Node *) canonicalize_qual((Expr *) j->quals, true); + + /* Expand SubLinks to SubPlans */ + if (parse->hasSubLinks) + { + j->quals = SS_process_sublinks(j->quals); + /* + * ON conditions, like WHERE clauses, are evaluated pre-GROUP; + * so we allow ungrouped vars in them. + */ + } + + /* Replace uplevel vars with Param nodes */ + if (PlannerQueryLevel > 1) + j->quals = SS_replace_correlation_vars(j->quals); + } + else + elog(ERROR, "preprocess_join_conditions: unexpected node type %d", + nodeTag(jtnode)); +} /*-------------------- * union_planner @@ -542,7 +599,6 @@ union_planner(Query *parse, /* Generate the (sub) plan */ result_plan = query_planner(parse, sub_tlist, - (List *) parse->qual, tuple_fraction); /* diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index d8a09c017dd..d30636c185e 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/setrefs.c,v 1.64 2000/06/04 20:50:50 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/setrefs.c,v 1.65 2000/09/12 21:06:54 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -106,11 +106,13 @@ set_plan_references(Plan *plan) set_join_references((Join *) plan); fix_expr_references(plan, (Node *) plan->targetlist); fix_expr_references(plan, (Node *) plan->qual); + fix_expr_references(plan, (Node *) ((Join *) plan)->joinqual); break; case T_MergeJoin: set_join_references((Join *) plan); fix_expr_references(plan, (Node *) plan->targetlist); fix_expr_references(plan, (Node *) plan->qual); + fix_expr_references(plan, (Node *) ((Join *) plan)->joinqual); fix_expr_references(plan, (Node *) ((MergeJoin *) plan)->mergeclauses); break; @@ -118,6 +120,7 @@ set_plan_references(Plan *plan) set_join_references((Join *) plan); fix_expr_references(plan, (Node *) plan->targetlist); fix_expr_references(plan, (Node *) plan->qual); + fix_expr_references(plan, (Node *) ((Join *) plan)->joinqual); fix_expr_references(plan, (Node *) ((HashJoin *) plan)->hashclauses); break; @@ -236,15 +239,15 @@ fix_expr_references(Plan *plan, Node *node) static void set_join_references(Join *join) { - Plan *outer = join->lefttree; - Plan *inner = join->righttree; + Plan *outer = join->plan.lefttree; + Plan *inner = join->plan.righttree; List *outer_tlist = ((outer == NULL) ? NIL : outer->targetlist); List *inner_tlist = ((inner == NULL) ? NIL : inner->targetlist); - join->targetlist = join_references(join->targetlist, - outer_tlist, - inner_tlist, - (Index) 0); + join->plan.targetlist = join_references(join->plan.targetlist, + outer_tlist, + inner_tlist, + (Index) 0); } /* diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index b0772b83f1c..24a0aae55cd 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.40 2000/08/06 04:13:22 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.41 2000/09/12 21:06:54 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -649,12 +649,21 @@ SS_finalize_plan(Plan *plan) */ break; + case T_NestLoop: + finalize_primnode((Node *) ((Join *) plan)->joinqual, + &results); + break; + case T_MergeJoin: + finalize_primnode((Node *) ((Join *) plan)->joinqual, + &results); finalize_primnode((Node *) ((MergeJoin *) plan)->mergeclauses, &results); break; case T_HashJoin: + finalize_primnode((Node *) ((Join *) plan)->joinqual, + &results); finalize_primnode((Node *) ((HashJoin *) plan)->hashclauses, &results); break; @@ -671,7 +680,6 @@ SS_finalize_plan(Plan *plan) case T_Agg: case T_SeqScan: - case T_NestLoop: case T_Material: case T_Sort: case T_Unique: diff --git a/src/backend/optimizer/prep/prepkeyset.c b/src/backend/optimizer/prep/prepkeyset.c index fc192e6f28b..a28e329e537 100644 --- a/src/backend/optimizer/prep/prepkeyset.c +++ b/src/backend/optimizer/prep/prepkeyset.c @@ -107,6 +107,7 @@ transformKeySetQuery(Query *origNode) Node_Copy(origNode, unionNode, distinctClause); Node_Copy(origNode, unionNode, sortClause); Node_Copy(origNode, unionNode, rtable); + Node_Copy(origNode, unionNode, jointree); Node_Copy(origNode, unionNode, targetList); origNode->unionClause = lappend(origNode->unionClause, unionNode); diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index f069cafdf66..d284dd51e02 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.51 2000/06/20 04:22:16 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.52 2000/09/12 21:06:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -528,12 +528,9 @@ fix_parsetree_attnums(Index rt_index, context.new_relid = new_relid; context.sublevels_up = 0; - /* - * We must scan both the targetlist and qual, but we know the - * havingQual is empty, so we can ignore it. - */ - fix_parsetree_attnums_walker((Node *) parsetree->targetList, &context); - fix_parsetree_attnums_walker((Node *) parsetree->qual, &context); + query_tree_walker(parsetree, + fix_parsetree_attnums_walker, + (void *) &context); } /* @@ -565,38 +562,17 @@ fix_parsetree_attnums_walker(Node *node, } return false; } - if (IsA(node, SubLink)) + if (IsA(node, Query)) { + /* Recurse into subselects */ + bool result; - /* - * Standard expression_tree_walker will not recurse into - * subselect, but here we must do so. - */ - SubLink *sub = (SubLink *) node; - - if (fix_parsetree_attnums_walker((Node *) (sub->lefthand), context)) - return true; context->sublevels_up++; - if (fix_parsetree_attnums_walker((Node *) (sub->subselect), context)) - { - context->sublevels_up--; - return true; - } + result = query_tree_walker((Query *) node, + fix_parsetree_attnums_walker, + (void *) context); context->sublevels_up--; - return false; - } - if (IsA(node, Query)) - { - /* Reach here after recursing down into subselect above... */ - Query *qry = (Query *) node; - - if (fix_parsetree_attnums_walker((Node *) (qry->targetList), context)) - return true; - if (fix_parsetree_attnums_walker((Node *) (qry->qual), context)) - return true; - if (fix_parsetree_attnums_walker((Node *) (qry->havingQual), context)) - return true; - return false; + return result; } return expression_tree_walker(node, fix_parsetree_attnums_walker, (void *) context); diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index cf0b6dd703c..36c7abd85b5 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.73 2000/08/24 03:29:05 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.74 2000/09/12 21:06:58 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -591,7 +591,7 @@ check_subplans_for_ungrouped_vars_walker(Node *node, elog(ERROR, "cache lookup of attribute %d in relation %u failed", var->varattno, rte->relid); elog(ERROR, "Sub-SELECT uses un-GROUPed attribute %s.%s from outer query", - rte->ref->relname, attname); + rte->eref->relname, attname); } } } @@ -1639,25 +1639,44 @@ simplify_op_or_func(Expr *expr, List *args) * will have List structure at the top level, and it handles TargetEntry nodes * so that a scan of a target list can be handled without additional code. * (But only the "expr" part of a TargetEntry is examined, unless the walker - * chooses to process TargetEntry nodes specially.) + * chooses to process TargetEntry nodes specially.) Also, RangeTblRef and + * JoinExpr nodes are handled, so that qual expressions in a jointree can be + * processed without additional code. + * + * expression_tree_walker will handle SubLink and SubPlan nodes by recursing + * normally into the "lefthand" arguments (which belong to the outer plan). + * It will also call the walker on the sub-Query node; however, when + * expression_tree_walker itself is called on a Query node, it does nothing + * and returns "false". The net effect is that unless the walker does + * something special at a Query node, sub-selects will not be visited + * during an expression tree walk. This is exactly the behavior wanted + * in many cases --- and for those walkers that do want to recurse into + * sub-selects, special behavior is typically needed anyway at the entry + * to a sub-select (such as incrementing a depth counter). A walker that + * wants to examine sub-selects should include code along the lines of: + * + * if (IsA(node, Query)) + * { + * adjust context for subquery; + * result = query_tree_walker((Query *) node, my_walker, context); + * restore context if needed; + * return result; + * } * - * expression_tree_walker will handle a SUBPLAN_EXPR node by recursing into - * the args and slink->oper lists (which belong to the outer plan), but it - * will *not* visit the inner plan, since that's typically what expression - * tree walkers want. A walker that wants to visit the subplan can force - * appropriate behavior by recognizing subplan expression nodes and doing - * the right thing. + * query_tree_walker is a convenience routine (see below) that calls the + * walker on all the expression subtrees of the given Query node. * - * Bare SubLink nodes (without a SUBPLAN_EXPR) are handled by recursing into - * the "lefthand" argument list only. (A bare SubLink should be seen only if - * the tree has not yet been processed by subselect.c.) Again, this can be - * overridden by the walker, but it seems to be the most useful default - * behavior. + * NOTE: currently, because make_subplan() clears the subselect link in + * a SubLink node, it is not actually possible to recurse into subselects + * of an already-planned expression tree. This is OK for current uses, + * but ought to be cleaned up when we redesign querytree processing. *-------------------- */ bool - expression_tree_walker(Node *node, bool (*walker) (), void *context) +expression_tree_walker(Node *node, + bool (*walker) (), + void *context) { List *temp; @@ -1677,6 +1696,7 @@ bool case T_Const: case T_Var: case T_Param: + case T_RangeTblRef: /* primitive node types with no subnodes */ break; case T_Expr: @@ -1750,17 +1770,31 @@ bool /* * If the SubLink has already been processed by - * subselect.c, it will have lefthand=NIL, and we only - * need to look at the oper list. Otherwise we only need - * to look at lefthand (the Oper nodes in the oper list - * are deemed uninteresting). + * subselect.c, it will have lefthand=NIL, and we need to + * scan the oper list. Otherwise we only need to look at + * the lefthand list (the incomplete Oper nodes in the oper + * list are deemed uninteresting, perhaps even confusing). */ if (sublink->lefthand) - return walker((Node *) sublink->lefthand, context); + { + if (walker((Node *) sublink->lefthand, context)) + return true; + } else - return walker((Node *) sublink->oper, context); + { + if (walker((Node *) sublink->oper, context)) + return true; + } + /* + * Also invoke the walker on the sublink's Query node, + * so it can recurse into the sub-query if it wants to. + */ + return walker(sublink->subselect, context); } break; + case T_Query: + /* Do nothing with a sub-Query, per discussion above */ + break; case T_List: foreach(temp, (List *) node) { @@ -1770,6 +1804,23 @@ bool break; case T_TargetEntry: return walker(((TargetEntry *) node)->expr, context); + case T_JoinExpr: + { + JoinExpr *join = (JoinExpr *) node; + + if (walker(join->larg, context)) + return true; + if (walker(join->rarg, context)) + return true; + if (walker(join->quals, context)) + return true; + if (walker((Node *) join->colvars, context)) + return true; + /* alias clause, using list, colnames list are deemed + * uninteresting. + */ + } + break; default: elog(ERROR, "expression_tree_walker: Unexpected node type %d", nodeTag(node)); @@ -1778,6 +1829,37 @@ bool return false; } +/* + * query_tree_walker --- initiate a walk of a Query's expressions + * + * This routine exists just to reduce the number of places that need to know + * where all the expression subtrees of a Query are. Note it can be used + * for starting a walk at top level of a Query regardless of whether the + * walker intends to descend into subqueries. It is also useful for + * descending into subqueries within a walker. + */ +bool +query_tree_walker(Query *query, + bool (*walker) (), + void *context) +{ + Assert(query != NULL && IsA(query, Query)); + + if (walker((Node *) query->targetList, context)) + return true; + if (walker(query->qual, context)) + return true; + if (walker(query->havingQual, context)) + return true; + if (walker((Node *) query->jointree, context)) + return true; + /* + * XXX for subselect-in-FROM, may need to examine rtable as well + */ + return false; +} + + /*-------------------- * expression_tree_mutator() is designed to support routines that make a * modified copy of an expression tree, with some nodes being added, @@ -1838,7 +1920,9 @@ bool */ Node * - expression_tree_mutator(Node *node, Node *(*mutator) (), void *context) +expression_tree_mutator(Node *node, + Node *(*mutator) (), + void *context) { /* @@ -1866,6 +1950,7 @@ Node * case T_Const: case T_Var: case T_Param: + case T_RangeTblRef: /* primitive node types with no subnodes */ return (Node *) copyObject(node); case T_Expr: @@ -2044,6 +2129,20 @@ Node * return (Node *) newnode; } break; + case T_JoinExpr: + { + JoinExpr *join = (JoinExpr *) node; + JoinExpr *newnode; + + FLATCOPY(newnode, join, JoinExpr); + MUTATE(newnode->larg, join->larg, Node *); + MUTATE(newnode->rarg, join->rarg, Node *); + MUTATE(newnode->quals, join->quals, Node *); + MUTATE(newnode->colvars, join->colvars, List *); + /* We do not mutate alias, using, or colnames by default */ + return (Node *) newnode; + } + break; default: elog(ERROR, "expression_tree_mutator: Unexpected node type %d", nodeTag(node)); diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 5588e91e5b7..fc73bb2b664 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.64 2000/05/30 00:49:49 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.65 2000/09/12 21:06:58 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -119,7 +119,9 @@ set_cheapest(RelOptInfo *parent_rel) Path *cheapest_total_path; Assert(IsA(parent_rel, RelOptInfo)); - Assert(pathlist != NIL); + + if (pathlist == NIL) + elog(ERROR, "Unable to devise a query plan for the given query"); cheapest_startup_path = cheapest_total_path = (Path *) lfirst(pathlist); @@ -352,6 +354,7 @@ create_index_path(Query *root, * number of rows is the same as the parent rel's estimate. */ pathnode->joinrelids = NIL; /* no join clauses here */ + pathnode->alljoinquals = false; pathnode->rows = rel->rows; cost_index(&pathnode->path, root, rel, index, indexquals, false); @@ -393,6 +396,7 @@ create_tidscan_path(RelOptInfo *rel, List *tideval) * relations. * * 'joinrel' is the join relation. + * 'jointype' is the type of join required * 'outer_path' is the outer path * 'inner_path' is the inner path * 'restrict_clauses' are the RestrictInfo nodes to apply at the join @@ -403,6 +407,7 @@ create_tidscan_path(RelOptInfo *rel, List *tideval) */ NestPath * create_nestloop_path(RelOptInfo *joinrel, + JoinType jointype, Path *outer_path, Path *inner_path, List *restrict_clauses, @@ -412,6 +417,7 @@ create_nestloop_path(RelOptInfo *joinrel, pathnode->path.pathtype = T_NestLoop; pathnode->path.parent = joinrel; + pathnode->jointype = jointype; pathnode->outerjoinpath = outer_path; pathnode->innerjoinpath = inner_path; pathnode->joinrestrictinfo = restrict_clauses; @@ -428,6 +434,7 @@ create_nestloop_path(RelOptInfo *joinrel, * two relations * * 'joinrel' is the join relation + * 'jointype' is the type of join required * 'outer_path' is the outer path * 'inner_path' is the inner path * 'restrict_clauses' are the RestrictInfo nodes to apply at the join @@ -440,6 +447,7 @@ create_nestloop_path(RelOptInfo *joinrel, */ MergePath * create_mergejoin_path(RelOptInfo *joinrel, + JoinType jointype, Path *outer_path, Path *inner_path, List *restrict_clauses, @@ -463,6 +471,7 @@ create_mergejoin_path(RelOptInfo *joinrel, pathnode->jpath.path.pathtype = T_MergeJoin; pathnode->jpath.path.parent = joinrel; + pathnode->jpath.jointype = jointype; pathnode->jpath.outerjoinpath = outer_path; pathnode->jpath.innerjoinpath = inner_path; pathnode->jpath.joinrestrictinfo = restrict_clauses; @@ -486,6 +495,7 @@ create_mergejoin_path(RelOptInfo *joinrel, * Creates a pathnode corresponding to a hash join between two relations. * * 'joinrel' is the join relation + * 'jointype' is the type of join required * 'outer_path' is the cheapest outer path * 'inner_path' is the cheapest inner path * 'restrict_clauses' are the RestrictInfo nodes to apply at the join @@ -496,6 +506,7 @@ create_mergejoin_path(RelOptInfo *joinrel, */ HashPath * create_hashjoin_path(RelOptInfo *joinrel, + JoinType jointype, Path *outer_path, Path *inner_path, List *restrict_clauses, @@ -506,6 +517,7 @@ create_hashjoin_path(RelOptInfo *joinrel, pathnode->jpath.path.pathtype = T_HashJoin; pathnode->jpath.path.parent = joinrel; + pathnode->jpath.jointype = jointype; pathnode->jpath.outerjoinpath = outer_path; pathnode->jpath.innerjoinpath = inner_path; pathnode->jpath.joinrestrictinfo = restrict_clauses; diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 070fabf7669..87e87597d11 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.27 2000/06/18 22:44:12 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.28 2000/09/12 21:06:58 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -72,6 +72,7 @@ get_base_rel(Query *root, int relid) rel->tuples = 0; rel->baserestrictinfo = NIL; rel->baserestrictcost = 0; + rel->outerjoinset = NIL; rel->joininfo = NIL; rel->innerjoin = NIL; @@ -178,6 +179,7 @@ get_join_rel(Query *root, joinrel->tuples = 0; joinrel->baserestrictinfo = NIL; joinrel->baserestrictcost = 0; + joinrel->outerjoinset = NIL; joinrel->joininfo = NIL; joinrel->innerjoin = NIL; @@ -216,8 +218,7 @@ get_join_rel(Query *root, restrictlist); /* - * Add the joinrel to the front of the query's joinrel list. - * (allpaths.c depends on this ordering!) + * Add the joinrel to the query's joinrel list. */ root->join_rel_list = lcons(joinrel, root->join_rel_list); diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c index 3ce924de1bb..adbfd884c36 100644 --- a/src/backend/optimizer/util/restrictinfo.c +++ b/src/backend/optimizer/util/restrictinfo.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.10 2000/05/30 00:49:49 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.11 2000/09/12 21:06:58 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -54,3 +54,29 @@ get_actual_clauses(List *restrictinfo_list) } return result; } + +/* + * get_actual_join_clauses + * + * Extract clauses from 'restrictinfo_list', separating those that + * came from JOIN/ON conditions from those that didn't. + */ +void +get_actual_join_clauses(List *restrictinfo_list, + List **joinquals, List **otherquals) +{ + List *temp; + + *joinquals = NIL; + *otherquals = NIL; + + foreach(temp, restrictinfo_list) + { + RestrictInfo *clause = (RestrictInfo *) lfirst(temp); + + if (clause->isjoinqual) + *joinquals = lappend(*joinquals, clause->clause); + else + *otherquals = lappend(*otherquals, clause->clause); + } +} diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c index bed7be7f08a..ec9f9dafd0b 100644 --- a/src/backend/optimizer/util/var.c +++ b/src/backend/optimizer/util/var.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/var.c,v 1.26 2000/04/12 17:15:24 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/var.c,v 1.27 2000/09/12 21:06:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -16,6 +16,7 @@ #include "postgres.h" +#include "nodes/plannodes.h" #include "optimizer/clauses.h" #include "optimizer/var.h" @@ -23,10 +24,17 @@ typedef struct { List *varlist; + int sublevels_up; +} pull_varnos_context; + +typedef struct +{ + List *varlist; bool includeUpperVars; } pull_var_clause_context; -static bool pull_varnos_walker(Node *node, List **listptr); +static bool pull_varnos_walker(Node *node, + pull_varnos_context *context); static bool contain_var_clause_walker(Node *node, void *context); static bool pull_var_clause_walker(Node *node, pull_var_clause_context *context); @@ -35,21 +43,39 @@ static bool pull_var_clause_walker(Node *node, /* * pull_varnos * - * Create a list of all the distinct varnos present in a parsetree - * (tlist or qual). Note that only varnos attached to level-zero - * Vars are considered --- upper Vars refer to some other rtable! + * Create a list of all the distinct varnos present in a parsetree. + * Only varnos that reference level-zero rtable entries are considered. + * + * NOTE: unlike other routines in this file, pull_varnos() is used on + * not-yet-planned expressions. It may therefore find bare SubLinks, + * and if so it needs to recurse into them to look for uplevel references + * to the desired rtable level! But when we find a completed SubPlan, + * we only need to look at the parameters passed to the subplan. */ List * pull_varnos(Node *node) { - List *result = NIL; + pull_varnos_context context; + + context.varlist = NIL; + context.sublevels_up = 0; + + /* + * Must be prepared to start with a Query or a bare expression tree; + * if it's a Query, go straight to query_tree_walker to make sure that + * sublevels_up doesn't get incremented prematurely. + */ + if (node && IsA(node, Query)) + query_tree_walker((Query *) node, pull_varnos_walker, + (void *) &context); + else + pull_varnos_walker(node, &context); - pull_varnos_walker(node, &result); - return result; + return context.varlist; } static bool -pull_varnos_walker(Node *node, List **listptr) +pull_varnos_walker(Node *node, pull_varnos_context *context) { if (node == NULL) return false; @@ -57,11 +83,42 @@ pull_varnos_walker(Node *node, List **listptr) { Var *var = (Var *) node; - if (var->varlevelsup == 0 && !intMember(var->varno, *listptr)) - *listptr = lconsi(var->varno, *listptr); + if (var->varlevelsup == context->sublevels_up && + !intMember(var->varno, context->varlist)) + context->varlist = lconsi(var->varno, context->varlist); + return false; + } + if (is_subplan(node)) + { + /* + * Already-planned subquery. Examine the args list (parameters + * to be passed to subquery), as well as the "oper" list which + * is executed by the outer query. But short-circuit recursion into + * the subquery itself, which would be a waste of effort. + */ + Expr *expr = (Expr *) node; + + if (pull_varnos_walker((Node*) ((SubPlan*) expr->oper)->sublink->oper, + context)) + return true; + if (pull_varnos_walker((Node *) expr->args, + context)) + return true; return false; } - return expression_tree_walker(node, pull_varnos_walker, (void *) listptr); + if (IsA(node, Query)) + { + /* Recurse into not-yet-planned subquery */ + bool result; + + context->sublevels_up++; + result = query_tree_walker((Query *) node, pull_varnos_walker, + (void *) context); + context->sublevels_up--; + return result; + } + return expression_tree_walker(node, pull_varnos_walker, + (void *) context); } /* |