diff options
Diffstat (limited to 'src/backend/optimizer/path/pathkeys.c')
-rw-r--r-- | src/backend/optimizer/path/pathkeys.c | 691 |
1 files changed, 398 insertions, 293 deletions
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 5aeda1e154e..b578e33f5c8 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.18 2000/01/26 05:56:34 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.19 2000/02/15 20:49:17 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -17,6 +17,7 @@ #include "nodes/makefuncs.h" #include "optimizer/clauses.h" #include "optimizer/joininfo.h" +#include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/tlist.h" #include "optimizer/var.h" @@ -25,9 +26,9 @@ #include "utils/lsyscache.h" static PathKeyItem *makePathKeyItem(Node *key, Oid sortop); -static Var *find_indexkey_var(int indexkey, List *tlist); -static List *build_join_pathkey(List *pathkeys, List *join_rel_tlist, - List *joinclauses); +static List *make_canonical_pathkey(Query *root, PathKeyItem *item); +static Var *find_indexkey_var(Query *root, RelOptInfo *rel, + AttrNumber varattno); /*-------------------- @@ -50,50 +51,122 @@ static List *build_join_pathkey(List *pathkeys, List *join_rel_tlist, * Note that a multi-pass indexscan (OR clause scan) has NIL pathkeys since * we can say nothing about the overall order of its result. Also, an * indexscan on an unordered type of index generates NIL pathkeys. However, - * we can always create a pathkey by doing an explicit sort. - * - * Multi-relation RelOptInfo Path's are more complicated. Mergejoins are - * only performed with equijoins ("="). Because of this, the resulting - * multi-relation path actually has more than one primary key. For example, - * a mergejoin using a clause "tab1.col1 = tab2.col1" would generate pathkeys - * of ( (tab1.col1/sortop1 tab2.col1/sortop2) ), indicating that the major - * sort order of the Path can be taken to be *either* tab1.col1 or tab2.col1. - * They are equal, so they are both primary sort keys. This allows future - * joins to use either var as a pre-sorted key to prevent upper Mergejoins - * from having to re-sort the Path. This is why pathkeys is a List of Lists. - * - * Note that while the order of the top list is meaningful (primary vs. - * secondary sort key), the order of each sublist is arbitrary. No code - * working with pathkeys should generate a result that depends on the order - * of a pathkey sublist. + * we can always create a pathkey by doing an explicit sort. The pathkeys + * for a sort plan's output just represent the sort key fields and the + * ordering operators used. + * + * Things get more interesting when we consider joins. Suppose we do a + * mergejoin between A and B using the mergeclause A.X = B.Y. The output + * of the mergejoin is sorted by X --- but it is also sorted by Y. We + * represent this fact by listing both keys in a single pathkey sublist: + * ( (A.X/xsortop B.Y/ysortop) ). This pathkey asserts that the major + * sort order of the Path can be taken to be *either* A.X or B.Y. + * They are equal, so they are both primary sort keys. By doing this, + * we allow future joins to use either var as a pre-sorted key, so upper + * Mergejoins may be able to avoid having to re-sort the Path. This is + * why pathkeys is a List of Lists. * * We keep a sortop associated with each PathKeyItem because cross-data-type - * mergejoins are possible; for example int4=int8 is mergejoinable. In this - * case we need to remember that the left var is ordered by int4lt while - * the right var is ordered by int8lt. So the different members of each - * sublist could have different sortops. - * - * When producing the pathkeys for a merge or nestloop join, we can keep - * all of the keys of the outer path, since the ordering of the outer path - * will be preserved in the result. We add to each pathkey sublist any inner - * vars that are equijoined to any of the outer vars in the sublist. In the - * nestloop case we have to be careful to consider only equijoin operators; - * the nestloop's join clauses might include non-equijoin operators. - * (Currently, we do this by considering only mergejoinable operators while - * making the pathkeys, since we have no separate marking for operators that - * are equijoins but aren't mergejoinable.) + * mergejoins are possible; for example int4 = int8 is mergejoinable. + * In this case we need to remember that the left var is ordered by int4lt + * while the right var is ordered by int8lt. So the different members of + * each sublist could have different sortops. + * + * Note that while the order of the top list is meaningful (primary vs. + * secondary sort key), the order of each sublist is arbitrary. Each sublist + * should be regarded as a set of equivalent keys, with no significance + * to the list order. + * + * With a little further thought, it becomes apparent that pathkeys for + * joins need not only come from mergejoins. For example, if we do a + * nestloop join between outer relation A and inner relation B, then any + * pathkeys relevant to A are still valid for the join result: we have + * not altered the order of the tuples from A. Even more interesting, + * if there was a mergeclause (more formally, an "equijoin clause") A.X=B.Y, + * and A.X was a pathkey for the outer relation A, then we can assert that + * B.Y is a pathkey for the join result; X was ordered before and still is, + * and the joined values of Y are equal to the joined values of X, so Y + * must now be ordered too. This is true even though we used no mergejoin. + * + * More generally, whenever we have an equijoin clause A.X = B.Y and a + * pathkey A.X, we can add B.Y to that pathkey if B is part of the joined + * relation the pathkey is for, *no matter how we formed the join*. + * + * In short, then: when producing the pathkeys for a merge or nestloop join, + * we can keep all of the keys of the outer path, since the ordering of the + * outer path will be preserved in the result. Furthermore, we can add to + * each pathkey sublist any inner vars that are equijoined to any of the + * outer vars in the sublist; this works regardless of whether we are + * implementing the join using that equijoin clause as a mergeclause, + * or merely enforcing the clause after-the-fact as a qpqual filter. * * Although Hashjoins also work only with equijoin operators, it is *not* * safe to consider the output of a Hashjoin to be sorted in any particular * order --- not even the outer path's order. This is true because the * executor might have to split the join into multiple batches. Therefore - * a Hashjoin is always given NIL pathkeys. + * a Hashjoin is always given NIL pathkeys. (Also, we need to use only + * mergejoinable operators when deducing which inner vars are now sorted, + * because a mergejoin operator tells us which left- and right-datatype + * sortops can be considered equivalent, whereas a hashjoin operator + * doesn't imply anything about sort order.) * * Pathkeys are also useful to represent an ordering that we wish to achieve, * since they are easily compared to the pathkeys of a potential candidate * path. So, SortClause lists are turned into pathkeys lists for use inside * the optimizer. * + * OK, now for how it *really* works: + * + * We did implement pathkeys just as described above, and found that the + * planner spent a huge amount of time comparing pathkeys, because the + * representation of pathkeys as unordered lists made it expensive to decide + * whether two were equal or not. So, we've modified the representation + * as described next. + * + * If we scan the WHERE clause for equijoin clauses (mergejoinable clauses) + * during planner startup, we can construct lists of equivalent pathkey items + * for the query. There could be more than two items per equivalence set; + * for example, WHERE A.X = B.Y AND B.Y = C.Z AND D.R = E.S creates the + * equivalence sets { A.X B.Y C.Z } and { D.R E.S } (plus associated sortops). + * Any pathkey item that belongs to an equivalence set implies that all the + * other items in its set apply to the relation too, or at least all the ones + * that are for fields present in the relation. (Some of the items in the + * set might be for as-yet-unjoined relations.) Furthermore, any multi-item + * pathkey sublist that appears at any stage of planning the query *must* be + * a subset of one or another of these equivalence sets; there's no way we'd + * have put two items in the same pathkey sublist unless they were equijoined + * in WHERE. + * + * Now suppose that we allow a pathkey sublist to contain pathkey items for + * vars that are not yet part of the pathkey's relation. This introduces + * no logical difficulty, because such items can easily be seen to be + * irrelevant; we just mandate that they be ignored. But having allowed + * this, we can declare (by fiat) that any multiple-item pathkey sublist + * must be equal() to the appropriate equivalence set. In effect, whenever + * we make a pathkey sublist that mentions any var appearing in an + * equivalence set, we instantly add all the other vars equivalenced to it, + * whether they appear yet in the pathkey's relation or not. And we also + * mandate that the pathkey sublist appear in the same order as the + * equivalence set it comes from. (In practice, we simply return a pointer + * to the relevant equivalence set without building any new sublist at all.) + * This makes comparing pathkeys very simple and fast, and saves a lot of + * work and memory space for pathkey construction as well. + * + * Note that pathkey sublists having just one item still exist, and are + * not expected to be equal() to any equivalence set. This occurs when + * we describe a sort order that involves a var that's not mentioned in + * any equijoin clause of the WHERE. We could add singleton sets containing + * such vars to the query's list of equivalence sets, but there's little + * point in doing so. + * + * By the way, it's OK and even useful for us to build equivalence sets + * that mention multiple vars from the same relation. For example, if + * we have WHERE A.X = A.Y and we are scanning A using an index on X, + * we can legitimately conclude that the path is sorted by Y as well; + * and this could be handy if Y is the variable used in other join clauses + * or ORDER BY. So, any WHERE clause with a mergejoinable operator can + * contribute to an equivalence set, even if it's not a join clause. + * * -- bjm & tgl *-------------------- */ @@ -113,6 +186,129 @@ makePathKeyItem(Node *key, Oid sortop) return item; } +/* + * add_equijoined_keys + * The given clause has a mergejoinable operator, so its two sides + * can be considered equal after restriction clause application; in + * particular, any pathkey mentioning one side (with the correct sortop) + * can be expanded to include the other as well. Record the vars and + * associated sortops in the query's equi_key_list for future use. + * + * The query's equi_key_list field points to a list of sublists of PathKeyItem + * nodes, where each sublist is a set of two or more vars+sortops that have + * been identified as logically equivalent (and, therefore, we may consider + * any two in a set to be equal). As described above, we will subsequently + * use direct pointers to one of these sublists to represent any pathkey + * that involves an equijoined variable. + * + * This code would actually work fine with expressions more complex than + * a single Var, but currently it won't see any because check_mergejoinable + * won't accept such clauses as mergejoinable. + */ +void +add_equijoined_keys(Query *root, RestrictInfo *restrictinfo) +{ + Expr *clause = restrictinfo->clause; + PathKeyItem *item1 = makePathKeyItem((Node *) get_leftop(clause), + restrictinfo->left_sortop); + PathKeyItem *item2 = makePathKeyItem((Node *) get_rightop(clause), + restrictinfo->right_sortop); + List *newset, + *cursetlink; + + /* We might see a clause X=X; don't make a single-element list from it */ + if (equal(item1, item2)) + return; + /* + * Our plan is to make a two-element set, then sweep through the existing + * equijoin sets looking for matches to item1 or item2. When we find one, + * we remove that set from equi_key_list and union it into our new set. + * When done, we add the new set to the front of equi_key_list. + * + * This is a standard UNION-FIND problem, for which there exist better + * data structures than simple lists. If this code ever proves to be + * a bottleneck then it could be sped up --- but for now, simple is + * beautiful. + */ + newset = lcons(item1, lcons(item2, NIL)); + + foreach(cursetlink, root->equi_key_list) + { + List *curset = lfirst(cursetlink); + + if (member(item1, curset) || member(item2, curset)) + { + /* Found a set to merge into our new set */ + newset = LispUnion(newset, curset); + /* Remove old set from equi_key_list. NOTE this does not change + * lnext(cursetlink), so the outer foreach doesn't break. + */ + root->equi_key_list = lremove(curset, root->equi_key_list); + freeList(curset); /* might as well recycle old cons cells */ + } + } + + root->equi_key_list = lcons(newset, root->equi_key_list); +} + +/* + * make_canonical_pathkey + * Given a PathKeyItem, find the equi_key_list subset it is a member of, + * if any. If so, return a pointer to that sublist, which is the + * canonical representation (for this query) of that PathKeyItem's + * equivalence set. If it is not found, return a single-element list + * containing the PathKeyItem (when the item has no equivalence peers, + * we just allow it to be a standalone list). + * + * Note that this function must not be used until after we have completed + * scanning the WHERE clause for equijoin operators. + */ +static List * +make_canonical_pathkey(Query *root, PathKeyItem *item) +{ + List *cursetlink; + + foreach(cursetlink, root->equi_key_list) + { + List *curset = lfirst(cursetlink); + + if (member(item, curset)) + return curset; + } + return lcons(item, NIL); +} + +/* + * canonicalize_pathkeys + * Convert a not-necessarily-canonical pathkeys list to canonical form. + * + * Note that this function must not be used until after we have completed + * scanning the WHERE clause for equijoin operators. + */ +List * +canonicalize_pathkeys(Query *root, List *pathkeys) +{ + List *new_pathkeys = NIL; + List *i; + + foreach(i, pathkeys) + { + List *pathkey = (List *) lfirst(i); + PathKeyItem *item; + + /* + * It's sufficient to look at the first entry in the sublist; + * if there are more entries, they're already part of an + * equivalence set by definition. + */ + Assert(pathkey != NIL); + item = (PathKeyItem *) lfirst(pathkey); + new_pathkeys = lappend(new_pathkeys, + make_canonical_pathkey(root, item)); + } + return new_pathkeys; +} + /**************************************************************************** * PATHKEY COMPARISONS ****************************************************************************/ @@ -126,15 +322,21 @@ makePathKeyItem(Node *key, Oid sortop) * it contains all the keys of the other plus more. For example, either * ((A) (B)) or ((A B)) is better than ((A)). * - * This gets called a lot, so it is optimized. + * Because we actually only expect to see canonicalized pathkey sublists, + * we don't have to do the full two-way-subset-inclusion test on each + * pair of sublists that is implied by the above statement. Instead we + * just do an equal(). In the normal case where multi-element sublists + * are pointers into the root's equi_key_list, equal() will be very fast: + * it will recognize pointer equality when the sublists are the same, + * and will fail at the first sublist element when they are not. + * + * Yes, this gets called enough to be worth coding it this tensely. */ PathKeysComparison compare_pathkeys(List *keys1, List *keys2) { List *key1, *key2; - bool key1_subsetof_key2 = true, - key2_subsetof_key1 = true; for (key1 = keys1, key2 = keys2; key1 != NIL && key2 != NIL; @@ -142,36 +344,12 @@ compare_pathkeys(List *keys1, List *keys2) { List *subkey1 = lfirst(key1); List *subkey2 = lfirst(key2); - List *i; - /* We have to do this the hard way since the ordering of the subkey - * lists is arbitrary. + /* We will never have two subkeys where one is a subset of the other, + * because of the canonicalization explained above. Either they are + * equal or they ain't. */ - if (key1_subsetof_key2) - { - foreach(i, subkey1) - { - if (! member(lfirst(i), subkey2)) - { - key1_subsetof_key2 = false; - break; - } - } - } - - if (key2_subsetof_key1) - { - foreach(i, subkey2) - { - if (! member(lfirst(i), subkey1)) - { - key2_subsetof_key1 = false; - break; - } - } - } - - if (!key1_subsetof_key2 && !key2_subsetof_key1) + if (! equal(subkey1, subkey2)) return PATHKEYS_DIFFERENT; /* no need to keep looking */ } @@ -180,18 +358,11 @@ compare_pathkeys(List *keys1, List *keys2) * of the other list are not NIL --- no pathkey list should ever have * a NIL sublist.) */ - if (key1 != NIL) - key1_subsetof_key2 = false; - if (key2 != NIL) - key2_subsetof_key1 = false; - - if (key1_subsetof_key2 && key2_subsetof_key1) + if (key1 == NIL && key2 == NIL) return PATHKEYS_EQUAL; - if (key1_subsetof_key2) - return PATHKEYS_BETTER2; - if (key2_subsetof_key1) - return PATHKEYS_BETTER1; - return PATHKEYS_DIFFERENT; + if (key1 != NIL) + return PATHKEYS_BETTER1; /* key1 is longer */ + return PATHKEYS_BETTER2; /* key2 is longer */ } /* @@ -215,16 +386,16 @@ pathkeys_contained_in(List *keys1, List *keys2) /* * get_cheapest_path_for_pathkeys - * Find the cheapest path in 'paths' that satisfies the given pathkeys. - * Return NULL if no such path. + * Find the cheapest path (according to the specified criterion) that + * satisfies the given pathkeys. Return NULL if no such path. * - * 'paths' is a list of possible paths (either inner or outer) - * 'pathkeys' represents a required ordering - * if 'indexpaths_only' is true, only IndexPaths will be considered. + * 'paths' is a list of possible paths that all generate the same relation + * 'pathkeys' represents a required ordering (already canonicalized!) + * 'cost_criterion' is STARTUP_COST or TOTAL_COST */ Path * get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, - bool indexpaths_only) + CostSelector cost_criterion) { Path *matched_path = NULL; List *i; @@ -233,15 +404,55 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, { Path *path = (Path *) lfirst(i); - if (indexpaths_only && ! IsA(path, IndexPath)) + /* + * Since cost comparison is a lot cheaper than pathkey comparison, + * do that first. (XXX is that still true?) + */ + if (matched_path != NULL && + compare_path_costs(matched_path, path, cost_criterion) <= 0) continue; if (pathkeys_contained_in(pathkeys, path->pathkeys)) - { - if (matched_path == NULL || - path->path_cost < matched_path->path_cost) - matched_path = path; - } + matched_path = path; + } + return matched_path; +} + +/* + * get_cheapest_fractional_path_for_pathkeys + * Find the cheapest path (for retrieving a specified fraction of all + * the tuples) that satisfies the given pathkeys. + * Return NULL if no such path. + * + * See compare_fractional_path_costs() for the interpretation of the fraction + * parameter. + * + * 'paths' is a list of possible paths that all generate the same relation + * 'pathkeys' represents a required ordering (already canonicalized!) + * 'fraction' is the fraction of the total tuples expected to be retrieved + */ +Path * +get_cheapest_fractional_path_for_pathkeys(List *paths, + List *pathkeys, + double fraction) +{ + Path *matched_path = NULL; + List *i; + + foreach(i, paths) + { + Path *path = (Path *) lfirst(i); + + /* + * Since cost comparison is a lot cheaper than pathkey comparison, + * do that first. + */ + if (matched_path != NULL && + compare_fractional_path_costs(matched_path, path, fraction) <= 0) + continue; + + if (pathkeys_contained_in(pathkeys, path->pathkeys)) + matched_path = path; } return matched_path; } @@ -255,18 +466,22 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, * Build a pathkeys list that describes the ordering induced by an index * scan using the given index. (Note that an unordered index doesn't * induce any ordering; such an index will have no sortop OIDS in - * its "ordering" field.) + * its "ordering" field, and we will return NIL.) * - * Vars in the resulting pathkeys list are taken from the rel's targetlist. - * If we can't find the indexkey in the targetlist, we assume that the - * ordering of that key is not interesting. + * If 'scandir' is BackwardScanDirection, attempt to build pathkeys + * representing a backwards scan of the index. Return NIL if can't do it. */ List * -build_index_pathkeys(Query *root, RelOptInfo *rel, IndexOptInfo *index) +build_index_pathkeys(Query *root, + RelOptInfo *rel, + IndexOptInfo *index, + ScanDirection scandir) { List *retval = NIL; int *indexkeys = index->indexkeys; Oid *ordering = index->ordering; + PathKeyItem *item; + Oid sortop; if (!indexkeys || indexkeys[0] == 0 || !ordering || ordering[0] == InvalidOid) @@ -275,8 +490,6 @@ build_index_pathkeys(Query *root, RelOptInfo *rel, IndexOptInfo *index) if (index->indproc) { /* Functional index: build a representation of the function call */ - int relid = lfirsti(rel->relids); - Oid reloid = getrelid(relid, root->rtable); Func *funcnode = makeNode(Func); List *funcargs = NIL; @@ -291,43 +504,42 @@ build_index_pathkeys(Query *root, RelOptInfo *rel, IndexOptInfo *index) while (*indexkeys != 0) { - int varattno = *indexkeys; - Oid vartypeid = get_atttype(reloid, varattno); - int32 type_mod = get_atttypmod(reloid, varattno); - funcargs = lappend(funcargs, - makeVar(relid, varattno, vartypeid, - type_mod, 0)); + find_indexkey_var(root, rel, *indexkeys)); indexkeys++; } + sortop = *ordering; + if (ScanDirectionIsBackward(scandir)) + { + sortop = get_commutator(sortop); + if (sortop == InvalidOid) + return NIL; /* oops, no reverse sort operator? */ + } + /* Make a one-sublist pathkeys list for the function expression */ - retval = lcons(lcons( - makePathKeyItem((Node *) make_funcclause(funcnode, funcargs), - *ordering), - NIL), NIL); + item = makePathKeyItem((Node *) make_funcclause(funcnode, funcargs), + sortop); + retval = lcons(make_canonical_pathkey(root, item), NIL); } else { /* Normal non-functional index */ - List *rel_tlist = rel->targetlist; - while (*indexkeys != 0 && *ordering != InvalidOid) { - Var *relvar = find_indexkey_var(*indexkeys, rel_tlist); + Var *relvar = find_indexkey_var(root, rel, *indexkeys); - /* If we can find no tlist entry for the n'th sort key, - * then we're done generating pathkeys; any subsequent sort keys - * no longer apply, since we can't represent the ordering properly - * even if there are tlist entries for them. - */ - if (!relvar) - break; - /* OK, make a one-element sublist for this sort key */ - retval = lappend(retval, - lcons(makePathKeyItem((Node *) relvar, - *ordering), - NIL)); + sortop = *ordering; + if (ScanDirectionIsBackward(scandir)) + { + sortop = get_commutator(sortop); + if (sortop == InvalidOid) + break; /* oops, no reverse sort operator? */ + } + + /* OK, make a sublist for this sort key */ + item = makePathKeyItem((Node *) relvar, sortop); + retval = lappend(retval, make_canonical_pathkey(root, item)); indexkeys++; ordering++; @@ -338,21 +550,37 @@ build_index_pathkeys(Query *root, RelOptInfo *rel, IndexOptInfo *index) } /* - * Find a var in a relation's targetlist that matches an indexkey attrnum. + * Find or make a Var node for the specified attribute of the rel. + * + * We first look for the var in the rel's target list, because that's + * easy and fast. But the var might not be there (this should normally + * only happen for vars that are used in WHERE restriction clauses, + * but not in join clauses or in the SELECT target list). In that case, + * gin up a Var node the hard way. */ static Var * -find_indexkey_var(int indexkey, List *tlist) +find_indexkey_var(Query *root, RelOptInfo *rel, AttrNumber varattno) { List *temp; + int relid; + Oid reloid, + vartypeid; + int32 type_mod; - foreach(temp, tlist) + foreach(temp, rel->targetlist) { Var *tle_var = get_expr(lfirst(temp)); - if (IsA(tle_var, Var) && tle_var->varattno == indexkey) + if (IsA(tle_var, Var) && tle_var->varattno == varattno) return tle_var; } - return NULL; + + relid = lfirsti(rel->relids); + reloid = getrelid(relid, root->rtable); + vartypeid = get_atttype(reloid, varattno); + type_mod = get_atttypmod(reloid, varattno); + + return makeVar(relid, varattno, vartypeid, type_mod, 0); } /* @@ -360,164 +588,33 @@ find_indexkey_var(int indexkey, List *tlist) * Build the path keys for a join relation constructed by mergejoin or * nestloop join. These keys should include all the path key vars of the * outer path (since the join will retain the ordering of the outer path) - * plus any vars of the inner path that are mergejoined to the outer vars. + * plus any vars of the inner path that are equijoined to the outer vars. * - * Per the discussion at the top of this file, mergejoined inner vars + * Per the discussion at the top of this file, equijoined inner vars * can be considered path keys of the result, just the same as the outer - * vars they were joined with. - * - * We can also use inner path vars as pathkeys of a nestloop join, but we - * must be careful that we only consider equijoin clauses and not general - * join clauses. For example, "t1.a < t2.b" might be a join clause of a - * nestloop, but it doesn't result in b acquiring the ordering of a! - * joinpath.c handles that problem by only passing this routine clauses - * that are marked mergejoinable, even if a nestloop join is being built. - * Therefore we only have 't1.a = t2.b' style clauses, and can expect that - * the inner var will acquire the outer's ordering no matter which join - * method is actually used. - * - * We drop pathkeys that are not vars of the join relation's tlist, - * on the assumption that they are not interesting to higher levels. - * (Is this correct?? To support expression pathkeys we might want to - * check that all vars mentioned in the key are in the tlist, instead.) - * - * All vars in the result are taken from the join relation's tlist, - * not from the given pathkeys or joinclauses. + * vars they were joined with; furthermore, it doesn't matter what kind + * of join algorithm is actually used. * * 'outer_pathkeys' is the list of the outer path's path keys * 'join_rel_tlist' is the target list of the join relation - * 'joinclauses' is the list of mergejoinable clauses to consider (note this - * is a list of RestrictInfos, not just bare qual clauses); can be NIL + * 'equi_key_list' is the query's list of pathkeyitem equivalence sets * * Returns the list of new path keys. - * */ List * build_join_pathkeys(List *outer_pathkeys, List *join_rel_tlist, - List *joinclauses) + List *equi_key_list) { - List *final_pathkeys = NIL; - List *i; - - foreach(i, outer_pathkeys) - { - List *outer_pathkey = lfirst(i); - List *new_pathkey; - - new_pathkey = build_join_pathkey(outer_pathkey, join_rel_tlist, - joinclauses); - /* if we can find no sortable vars for the n'th sort key, - * then we're done generating pathkeys; any subsequent sort keys - * no longer apply, since we can't represent the ordering properly. - */ - if (new_pathkey == NIL) - break; - final_pathkeys = lappend(final_pathkeys, new_pathkey); - } - return final_pathkeys; -} - -/* - * build_join_pathkey - * Generate an individual pathkey sublist, consisting of the outer vars - * already mentioned in 'pathkey' plus any inner vars that are joined to - * them (and thus can now also be considered path keys, per discussion - * at the top of this file). - * - * Note that each returned pathkey uses the var node found in - * 'join_rel_tlist' rather than the input pathkey or joinclause var node. - * (Is this important?) - * - * Returns a new pathkey (list of PathKeyItems). - */ -static List * -build_join_pathkey(List *pathkey, - List *join_rel_tlist, - List *joinclauses) -{ - List *new_pathkey = NIL; - List *i, - *j; - - foreach(i, pathkey) - { - PathKeyItem *key = (PathKeyItem *) lfirst(i); - Node *tlist_key; - - Assert(key && IsA(key, PathKeyItem)); - - tlist_key = matching_tlist_expr(key->key, join_rel_tlist); - if (tlist_key) - new_pathkey = lcons(makePathKeyItem(tlist_key, - key->sortop), - new_pathkey); - - foreach(j, joinclauses) - { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j); - Expr *joinclause = restrictinfo->clause; - /* We assume the clause is a binary opclause... */ - Node *l = (Node *) get_leftop(joinclause); - Node *r = (Node *) get_rightop(joinclause); - Node *other_var = NULL; - Oid other_sortop = InvalidOid; - - if (equal(key->key, l)) - { - other_var = r; - other_sortop = restrictinfo->right_sortop; - } - else if (equal(key->key, r)) - { - other_var = l; - other_sortop = restrictinfo->left_sortop; - } - - if (other_var && other_sortop) - { - tlist_key = matching_tlist_expr(other_var, join_rel_tlist); - if (tlist_key) - new_pathkey = lcons(makePathKeyItem(tlist_key, - other_sortop), - new_pathkey); - } - } - } - - return new_pathkey; -} - -/* - * commute_pathkeys - * Attempt to commute the operators in a set of pathkeys, producing - * pathkeys that describe the reverse sort order (DESC instead of ASC). - * Returns TRUE if successful (all the operators have commutators). - * - * CAUTION: given pathkeys are modified in place, even if not successful!! - * Usually, caller should have just built or copied the pathkeys list to - * ensure there are no unwanted side-effects. - */ -bool -commute_pathkeys(List *pathkeys) -{ - List *i; - - foreach(i, pathkeys) - { - List *pathkey = lfirst(i); - List *j; - - foreach(j, pathkey) - { - PathKeyItem *key = lfirst(j); - - key->sortop = get_commutator(key->sortop); - if (key->sortop == InvalidOid) - return false; - } - } - return true; /* successful */ + /* + * This used to be quite a complex bit of code, but now that all + * pathkey sublists start out life canonicalized, we don't have to + * do a darn thing here! The inner-rel vars we used to need to add + * are *already* part of the outer pathkey! + * + * I'd remove the routine entirely, but maybe someday we'll need it... + */ + return outer_pathkeys; } /**************************************************************************** @@ -529,11 +626,18 @@ commute_pathkeys(List *pathkeys) * Generate a pathkeys list that represents the sort order specified * by a list of SortClauses (GroupClauses will work too!) * + * NB: the result is NOT in canonical form, but must be passed through + * canonicalize_pathkeys() before it can be used for comparisons or + * labeling relation sort orders. (We do things this way because + * union_planner needs to be able to construct requested pathkeys before + * the pathkey equivalence sets have been created for the query.) + * * 'sortclauses' is a list of SortClause or GroupClause nodes * 'tlist' is the targetlist to find the referenced tlist entries in */ List * -make_pathkeys_for_sortclauses(List *sortclauses, List *tlist) +make_pathkeys_for_sortclauses(List *sortclauses, + List *tlist) { List *pathkeys = NIL; List *i; @@ -546,7 +650,11 @@ make_pathkeys_for_sortclauses(List *sortclauses, List *tlist) sortkey = get_sortgroupclause_expr(sortcl, tlist); pathkey = makePathKeyItem(sortkey, sortcl->sortop); - /* pathkey becomes a one-element sublist */ + /* + * The pathkey becomes a one-element sublist, for now; + * canonicalize_pathkeys() might replace it with a longer + * sublist later. + */ pathkeys = lappend(pathkeys, lcons(pathkey, NIL)); } return pathkeys; @@ -599,6 +707,7 @@ find_mergeclauses_for_pathkeys(List *pathkeys, List *restrictinfos) { PathKeyItem *keyitem = lfirst(j); Node *key = keyitem->key; + Oid keyop = keyitem->sortop; List *k; foreach(k, restrictinfos) @@ -607,8 +716,10 @@ find_mergeclauses_for_pathkeys(List *pathkeys, List *restrictinfos) Assert(restrictinfo->mergejoinoperator != InvalidOid); - if ((equal(key, get_leftop(restrictinfo->clause)) || - equal(key, get_rightop(restrictinfo->clause))) && + if (((keyop == restrictinfo->left_sortop && + equal(key, get_leftop(restrictinfo->clause))) || + (keyop == restrictinfo->right_sortop && + equal(key, get_rightop(restrictinfo->clause)))) && ! member(restrictinfo, mergeclauses)) { matched_restrictinfo = restrictinfo; @@ -645,7 +756,7 @@ find_mergeclauses_for_pathkeys(List *pathkeys, List *restrictinfos) * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses * that will be used in a merge join. * 'tlist' is a relation target list for either the inner or outer - * side of the proposed join rel. + * side of the proposed join rel. (Not actually needed anymore) * * Returns a pathkeys list that can be applied to the indicated relation. * @@ -654,7 +765,9 @@ find_mergeclauses_for_pathkeys(List *pathkeys, List *restrictinfos) * just make the keys, eh? */ List * -make_pathkeys_for_mergeclauses(List *mergeclauses, List *tlist) +make_pathkeys_for_mergeclauses(Query *root, + List *mergeclauses, + List *tlist) { List *pathkeys = NIL; List *i; @@ -664,32 +777,24 @@ make_pathkeys_for_mergeclauses(List *mergeclauses, List *tlist) RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i); Node *key; Oid sortop; + PathKeyItem *item; Assert(restrictinfo->mergejoinoperator != InvalidOid); /* * Find the key and sortop needed for this mergeclause. * - * We can use either side of the mergeclause, since we haven't yet - * committed to which side will be inner. + * Both sides of the mergeclause should appear in one of the + * query's pathkey equivalence classes, so it doesn't matter + * which one we use here. */ - key = matching_tlist_expr((Node *) get_leftop(restrictinfo->clause), - tlist); + key = (Node *) get_leftop(restrictinfo->clause); sortop = restrictinfo->left_sortop; - if (! key) - { - key = matching_tlist_expr((Node *) get_rightop(restrictinfo->clause), - tlist); - sortop = restrictinfo->right_sortop; - } - if (! key) - elog(ERROR, "make_pathkeys_for_mergeclauses: can't find key"); /* * Add a pathkey sublist for this sort item */ - pathkeys = lappend(pathkeys, - lcons(makePathKeyItem(key, sortop), - NIL)); + item = makePathKeyItem(key, sortop); + pathkeys = lappend(pathkeys, make_canonical_pathkey(root, item)); } return pathkeys; |