aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/path/joinpath.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/optimizer/path/joinpath.c')
-rw-r--r--src/backend/optimizer/path/joinpath.c623
1 files changed, 623 insertions, 0 deletions
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
new file mode 100644
index 00000000000..e727388715c
--- /dev/null
+++ b/src/backend/optimizer/path/joinpath.c
@@ -0,0 +1,623 @@
+/*-------------------------------------------------------------------------
+ *
+ * joinpath.c--
+ * Routines to find all possible paths for processing a set of joins
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.1.1.1 1996/07/09 06:21:35 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include <math.h>
+
+#include "storage/buf_internals.h"
+
+#include "nodes/pg_list.h"
+#include "nodes/relation.h"
+#include "nodes/plannodes.h"
+
+#include "optimizer/internal.h"
+#include "optimizer/paths.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/keys.h"
+#include "optimizer/cost.h" /* for _enable_{hashjoin, _enable_mergesort} */
+
+static Path *best_innerjoin(List *join_paths, List *outer_relid);
+static List *sort_inner_and_outer(Rel *joinrel, Rel *outerrel, Rel *innerrel,
+ List *mergeinfo_list);
+static List *match_unsorted_outer(Rel *joinrel, Rel *outerrel, Rel *innerrel,
+ List *outerpath_list, Path *cheapest_inner, Path *best_innerjoin,
+ List *mergeinfo_list);
+static List *match_unsorted_inner(Rel *joinrel, Rel *outerrel, Rel *innerrel,
+ List *innerpath_list, List *mergeinfo_list);
+static bool EnoughMemoryForHashjoin(Rel *hashrel);
+static List *hash_inner_and_outer(Rel *joinrel, Rel *outerrel, Rel *innerrel,
+ List *hashinfo_list);
+
+/*
+ * find-all-join-paths--
+ * Creates all possible ways to process joins for each of the join
+ * relations in the list 'joinrels.' Each unique path will be included
+ * in the join relation's 'pathlist' field.
+ *
+ * In postgres, n-way joins are handled left-only(permuting clauseless
+ * joins doesn't usually win much).
+ *
+ * if BushyPlanFlag is true, bushy tree plans will be generated
+ *
+ * 'joinrels' is the list of relation entries to be joined
+ *
+ * Modifies the pathlist field of the appropriate rel node to contain
+ * the unique join paths.
+ * If bushy trees are considered, may modify the relid field of the
+ * join rel nodes to flatten the lists.
+ *
+ * Returns nothing of interest. (?)
+ * It does a destructive modification.
+ */
+void
+find_all_join_paths(Query *root, List *joinrels)
+{
+ List *mergeinfo_list = NIL;
+ List *hashinfo_list = NIL;
+ List *temp_list = NIL;
+ List *path = NIL;
+
+ while (joinrels != NIL) {
+ Rel *joinrel = (Rel *)lfirst(joinrels);
+ List *innerrelids;
+ List *outerrelids;
+ Rel *innerrel;
+ Rel *outerrel;
+ Path *bestinnerjoin;
+ List *pathlist = NIL;
+
+ innerrelids = lsecond(joinrel->relids);
+ outerrelids = lfirst(joinrel->relids);
+
+ /*
+ * base relation id is an integer and join relation relid is a
+ * list of integers.
+ */
+ innerrel = (length(innerrelids)==1)?
+ get_base_rel(root, lfirsti(innerrelids)) : get_join_rel(root,innerrelids);
+ outerrel = (length(outerrelids)==1)?
+ get_base_rel(root, lfirsti(outerrelids)) : get_join_rel(root, outerrelids);
+
+ bestinnerjoin = best_innerjoin(innerrel->innerjoin,
+ outerrel->relids);
+ if( _enable_mergesort_ ) {
+ mergeinfo_list =
+ group_clauses_by_order(joinrel->clauseinfo,
+ lfirsti(innerrel->relids));
+ }
+
+ if( _enable_hashjoin_ ) {
+ hashinfo_list =
+ group_clauses_by_hashop(joinrel->clauseinfo,
+ lfirsti(innerrel->relids));
+ }
+
+ /* need to flatten the relids list */
+ joinrel->relids = intAppend(outerrelids, innerrelids);
+
+ /*
+ * 1. Consider mergesort paths where both relations must be
+ * explicitly sorted.
+ */
+ pathlist = sort_inner_and_outer(joinrel,outerrel,
+ innerrel,mergeinfo_list);
+
+ /*
+ * 2. Consider paths where the outer relation need not be explicitly
+ * sorted. This may include either nestloops and mergesorts where
+ * the outer path is already ordered.
+ */
+ pathlist =
+ add_pathlist(joinrel, pathlist,
+ match_unsorted_outer(joinrel,
+ outerrel,
+ innerrel,
+ outerrel->pathlist,
+ (Path*)innerrel->cheapestpath,
+ bestinnerjoin,
+ mergeinfo_list));
+
+ /*
+ * 3. Consider paths where the inner relation need not be explicitly
+ * sorted. This may include nestloops and mergesorts the actual
+ * nestloop nodes were constructed in (match-unsorted-outer).
+ */
+ pathlist =
+ add_pathlist(joinrel,pathlist,
+ match_unsorted_inner(joinrel,outerrel,
+ innerrel,
+ innerrel->pathlist,
+ mergeinfo_list));
+
+ /*
+ * 4. Consider paths where both outer and inner relations must be
+ * hashed before being joined.
+ */
+
+ pathlist =
+ add_pathlist(joinrel, pathlist,
+ hash_inner_and_outer(joinrel,outerrel,
+ innerrel,hashinfo_list));
+
+ joinrel->pathlist = pathlist;
+
+ /*
+ * 'OuterJoinCost is only valid when calling (match-unsorted-inner)
+ * with the same arguments as the previous invokation of
+ * (match-unsorted-outer), so clear the field before going on.
+ */
+ temp_list = innerrel->pathlist;
+ foreach(path, temp_list) {
+
+ /*
+ * XXX
+ *
+ * This gross hack is to get around an apparent optimizer bug on
+ * Sparc (or maybe it is a bug of ours?) that causes really wierd
+ * behavior.
+ */
+ if (IsA_JoinPath(path)) {
+ ((Path*)lfirst(path))->outerjoincost = (Cost) 0;
+ }
+
+ /* do it iff it is a join path, which is not always
+ true, esp since the base level */
+ }
+
+ joinrels = lnext(joinrels);
+ }
+}
+
+/*
+ * best-innerjoin--
+ * Find the cheapest index path that has already been identified by
+ * (indexable_joinclauses) as being a possible inner path for the given
+ * outer relation in a nestloop join.
+ *
+ * 'join-paths' is a list of join nodes
+ * 'outer-relid' is the relid of the outer join relation
+ *
+ * Returns the pathnode of the selected path.
+ */
+static Path *
+best_innerjoin(List *join_paths, List *outer_relids)
+{
+ Path *cheapest = (Path*)NULL;
+ List *join_path;
+
+ foreach(join_path, join_paths) {
+ Path *path = (Path *)lfirst(join_path);
+
+ if (intMember(lfirsti(path->joinid), outer_relids)
+ && ((cheapest==NULL ||
+ path_is_cheaper((Path*)lfirst(join_path),cheapest)))) {
+
+ cheapest = (Path*)lfirst(join_path);
+ }
+ }
+ return(cheapest);
+}
+
+/*
+ * sort-inner-and-outer--
+ * Create mergesort join paths by explicitly sorting both the outer and
+ * inner join relations on each available merge ordering.
+ *
+ * 'joinrel' is the join relation
+ * 'outerrel' is the outer join relation
+ * 'innerrel' is the inner join relation
+ * 'mergeinfo-list' is a list of nodes containing info on(mergesortable)
+ * clauses for joining the relations
+ *
+ * Returns a list of mergesort paths.
+ */
+static List *
+sort_inner_and_outer(Rel *joinrel,
+ Rel *outerrel,
+ Rel *innerrel,
+ List *mergeinfo_list)
+{
+ List *ms_list = NIL;
+ MInfo *xmergeinfo = (MInfo*)NULL;
+ MergePath *temp_node = (MergePath*)NULL;
+ List *i;
+ List *outerkeys = NIL;
+ List *innerkeys = NIL;
+ List *merge_pathkeys = NIL;
+
+ foreach(i, mergeinfo_list) {
+ xmergeinfo = (MInfo *)lfirst(i);
+
+ outerkeys =
+ extract_path_keys(xmergeinfo->jmethod.jmkeys,
+ outerrel->targetlist,
+ OUTER);
+
+ innerkeys =
+ extract_path_keys(xmergeinfo->jmethod.jmkeys,
+ innerrel->targetlist,
+ INNER);
+
+ merge_pathkeys =
+ new_join_pathkeys(outerkeys, joinrel->targetlist,
+ xmergeinfo->jmethod.clauses);
+
+ temp_node =
+ create_mergesort_path(joinrel,
+ outerrel->size,
+ innerrel->size,
+ outerrel->width,
+ innerrel->width,
+ (Path*)outerrel->cheapestpath,
+ (Path*)innerrel->cheapestpath,
+ merge_pathkeys,
+ xmergeinfo->m_ordering,
+ xmergeinfo->jmethod.clauses,
+ outerkeys,
+ innerkeys);
+
+ ms_list = lappend(ms_list, temp_node);
+ }
+ return(ms_list);
+}
+
+/*
+ * match-unsorted-outer--
+ * Creates possible join paths for processing a single join relation
+ * 'joinrel' by employing either iterative substitution or
+ * mergesorting on each of its possible outer paths(assuming that the
+ * outer relation need not be explicitly sorted).
+ *
+ * 1. The inner path is the cheapest available inner path.
+ * 2. Mergesort wherever possible. Mergesorts are considered if there
+ * are mergesortable join clauses between the outer and inner join
+ * relations such that the outer path is keyed on the variables
+ * appearing in the clauses. The corresponding inner merge path is
+ * either a path whose keys match those of the outer path(if such a
+ * path is available) or an explicit sort on the appropriate inner
+ * join keys, whichever is cheaper.
+ *
+ * 'joinrel' is the join relation
+ * 'outerrel' is the outer join relation
+ * 'innerrel' is the inner join relation
+ * 'outerpath-list' is the list of possible outer paths
+ * 'cheapest-inner' is the cheapest inner path
+ * 'best-innerjoin' is the best inner index path(if any)
+ * 'mergeinfo-list' is a list of nodes containing info on mergesortable
+ * clauses
+ *
+ * Returns a list of possible join path nodes.
+ */
+static List *
+match_unsorted_outer(Rel *joinrel,
+ Rel *outerrel,
+ Rel *innerrel,
+ List *outerpath_list,
+ Path *cheapest_inner,
+ Path *best_innerjoin,
+ List *mergeinfo_list)
+{
+ Path *outerpath = (Path*)NULL;
+ List *jp_list = NIL;
+ List *temp_node = NIL;
+ List *merge_pathkeys = NIL;
+ Path *nestinnerpath =(Path*)NULL;
+ List *paths = NIL;
+ List *i = NIL;
+ PathOrder *outerpath_ordering = NULL;
+
+ foreach(i,outerpath_list) {
+ List *clauses = NIL;
+ List *matchedJoinKeys = NIL;
+ List *matchedJoinClauses = NIL;
+ MInfo *xmergeinfo = (MInfo*)NULL;
+
+ outerpath = (Path*)lfirst(i);
+
+ outerpath_ordering = &outerpath->p_ordering;
+
+ if (outerpath_ordering) {
+ xmergeinfo =
+ match_order_mergeinfo(outerpath_ordering,
+ mergeinfo_list);
+ }
+
+ if (xmergeinfo) {
+ clauses = xmergeinfo->jmethod.clauses;
+ }
+
+ if (clauses) {
+ List *keys = xmergeinfo->jmethod.jmkeys;
+ List *clauses = xmergeinfo->jmethod.clauses;
+
+ matchedJoinKeys =
+ match_pathkeys_joinkeys(outerpath->keys,
+ keys,
+ clauses,
+ OUTER,
+ &matchedJoinClauses);
+ merge_pathkeys =
+ new_join_pathkeys(outerpath->keys,
+ joinrel->targetlist, clauses);
+ } else {
+ merge_pathkeys = outerpath->keys;
+ }
+
+ if(best_innerjoin &&
+ path_is_cheaper(best_innerjoin, cheapest_inner)) {
+ nestinnerpath = best_innerjoin;
+ } else {
+ nestinnerpath = cheapest_inner;
+ }
+
+ paths = lcons(create_nestloop_path(joinrel,
+ outerrel,
+ outerpath,
+ nestinnerpath,
+ merge_pathkeys),
+ NIL);
+
+ if (clauses && matchedJoinKeys) {
+ bool path_is_cheaper_than_sort;
+ List *varkeys = NIL;
+ Path *mergeinnerpath =
+ match_paths_joinkeys(matchedJoinKeys,
+ outerpath_ordering,
+ innerrel->pathlist,
+ INNER);
+
+ path_is_cheaper_than_sort =
+ (bool) (mergeinnerpath &&
+ (mergeinnerpath->path_cost <
+ (cheapest_inner->path_cost +
+ cost_sort(matchedJoinKeys,
+ innerrel->size,
+ innerrel->width,
+ false))));
+ if(!path_is_cheaper_than_sort) {
+ varkeys =
+ extract_path_keys(matchedJoinKeys,
+ innerrel->targetlist,
+ INNER);
+ }
+
+
+ /*
+ * Keep track of the cost of the outer path used with
+ * this ordered inner path for later processing in
+ * (match-unsorted-inner), since it isn't a sort and
+ * thus wouldn't otherwise be considered.
+ */
+ if (path_is_cheaper_than_sort) {
+ mergeinnerpath->outerjoincost = outerpath->path_cost;
+ } else {
+ mergeinnerpath = cheapest_inner;
+ }
+
+ temp_node =
+ lcons(create_mergesort_path(joinrel,
+ outerrel->size,
+ innerrel->size,
+ outerrel->width,
+ innerrel->width,
+ outerpath,
+ mergeinnerpath,
+ merge_pathkeys,
+ xmergeinfo->m_ordering,
+ matchedJoinClauses,
+ NIL,
+ varkeys),
+ paths);
+ } else {
+ temp_node = paths;
+ }
+ jp_list = nconc(jp_list, temp_node);
+ }
+ return(jp_list);
+}
+
+/*
+ * match-unsorted-inner --
+ * Find the cheapest ordered join path for a given(ordered, unsorted)
+ * inner join path.
+ *
+ * Scans through each path available on an inner join relation and tries
+ * matching its ordering keys against those of mergejoin clauses.
+ * If 1. an appropriately-ordered inner path and matching mergeclause are
+ * found, and
+ * 2. sorting the cheapest outer path is cheaper than using an ordered
+ * but unsorted outer path(as was considered in
+ * (match-unsorted-outer)),
+ * then this merge path is considered.
+ *
+ * 'joinrel' is the join result relation
+ * 'outerrel' is the outer join relation
+ * 'innerrel' is the inner join relation
+ * 'innerpath-list' is the list of possible inner join paths
+ * 'mergeinfo-list' is a list of nodes containing info on mergesortable
+ * clauses
+ *
+ * Returns a list of possible merge paths.
+ */
+static List *
+match_unsorted_inner(Rel *joinrel,
+ Rel *outerrel,
+ Rel *innerrel,
+ List *innerpath_list,
+ List *mergeinfo_list)
+{
+ Path *innerpath = (Path*)NULL;
+ List *mp_list = NIL;
+ List *temp_node = NIL;
+ PathOrder *innerpath_ordering = NULL;
+ Cost temp1 = 0.0;
+ bool temp2 = false;
+ List *i = NIL;
+
+ foreach (i, innerpath_list) {
+ MInfo *xmergeinfo = (MInfo*)NULL;
+ List *clauses = NIL;
+ List *matchedJoinKeys = NIL;
+ List *matchedJoinClauses = NIL;
+
+ innerpath = (Path*)lfirst(i);
+
+ innerpath_ordering = &innerpath->p_ordering;
+
+ if (innerpath_ordering) {
+ xmergeinfo =
+ match_order_mergeinfo(innerpath_ordering,
+ mergeinfo_list);
+ }
+
+ if (xmergeinfo) {
+ clauses = ((JoinMethod*)xmergeinfo)->clauses;
+ }
+
+ if (clauses) {
+ List *keys = xmergeinfo->jmethod.jmkeys;
+ List *cls = xmergeinfo->jmethod.clauses;
+
+ matchedJoinKeys =
+ match_pathkeys_joinkeys(innerpath->keys,
+ keys,
+ cls,
+ INNER,
+ &matchedJoinClauses);
+ }
+
+ /*
+ * (match-unsorted-outer) if it is applicable.
+ * 'OuterJoinCost was set above in
+ */
+ if (clauses && matchedJoinKeys) {
+ temp1 = outerrel->cheapestpath->path_cost +
+ cost_sort(matchedJoinKeys, outerrel->size, outerrel->width,
+ false);
+
+ temp2 = (bool) (FLOAT_IS_ZERO(innerpath->outerjoincost)
+ || (innerpath->outerjoincost > temp1));
+
+ if(temp2) {
+ List *outerkeys =
+ extract_path_keys(matchedJoinKeys,
+ outerrel->targetlist,
+ OUTER);
+ List *merge_pathkeys =
+ new_join_pathkeys(outerkeys,
+ joinrel->targetlist,
+ clauses);
+
+ temp_node =
+ lcons(create_mergesort_path(joinrel,
+ outerrel->size,
+ innerrel->size,
+ outerrel->width,
+ innerrel->width,
+ (Path*)outerrel->cheapestpath,
+ innerpath,
+ merge_pathkeys,
+ xmergeinfo->m_ordering,
+ matchedJoinClauses,
+ outerkeys,
+ NIL),
+ NIL);
+
+ mp_list = nconc(mp_list,temp_node);
+ }
+ }
+ }
+ return(mp_list);
+
+}
+
+static bool
+EnoughMemoryForHashjoin(Rel *hashrel)
+{
+ int ntuples;
+ int tupsize;
+ int pages;
+
+ ntuples = hashrel->size;
+ if (ntuples == 0) ntuples = 1000;
+ tupsize = hashrel->width + sizeof(HeapTupleData);
+ pages = page_size(ntuples, tupsize);
+ /*
+ * if amount of buffer space below hashjoin threshold,
+ * return false
+ */
+ if (ceil(sqrt((double)pages)) > NBuffers)
+ return false;
+ return true;
+}
+
+/*
+ * hash-inner-and-outer-- XXX HASH
+ * Create hashjoin join paths by explicitly hashing both the outer and
+ * inner join relations on each available hash op.
+ *
+ * 'joinrel' is the join relation
+ * 'outerrel' is the outer join relation
+ * 'innerrel' is the inner join relation
+ * 'hashinfo-list' is a list of nodes containing info on(hashjoinable)
+ * clauses for joining the relations
+ *
+ * Returns a list of hashjoin paths.
+ */
+static List *
+hash_inner_and_outer(Rel *joinrel,
+ Rel *outerrel,
+ Rel *innerrel,
+ List *hashinfo_list)
+{
+ HInfo *xhashinfo = (HInfo*)NULL;
+ List *hjoin_list = NIL;
+ HashPath *temp_node = (HashPath*)NULL;
+ List *i = NIL;
+ List *outerkeys = NIL;
+ List *innerkeys = NIL;
+ List *hash_pathkeys = NIL;
+
+ foreach (i, hashinfo_list) {
+ xhashinfo = (HInfo*)lfirst(i);
+ outerkeys =
+ extract_path_keys(((JoinMethod*)xhashinfo)->jmkeys,
+ outerrel->targetlist,
+ OUTER);
+ innerkeys =
+ extract_path_keys(((JoinMethod*)xhashinfo)->jmkeys,
+ innerrel->targetlist,
+ INNER);
+ hash_pathkeys =
+ new_join_pathkeys(outerkeys,
+ joinrel->targetlist,
+ ((JoinMethod*)xhashinfo)->clauses);
+
+ if (EnoughMemoryForHashjoin(innerrel)) {
+ temp_node = create_hashjoin_path(joinrel,
+ outerrel->size,
+ innerrel->size,
+ outerrel->width,
+ innerrel->width,
+ (Path*)outerrel->cheapestpath,
+ (Path*)innerrel->cheapestpath,
+ hash_pathkeys,
+ xhashinfo->hashop,
+ ((JoinMethod*)xhashinfo)->clauses,
+ outerkeys,
+ innerkeys);
+ hjoin_list = lappend(hjoin_list, temp_node);
+ }
+ }
+ return(hjoin_list);
+}
+