diff options
Diffstat (limited to 'src/backend/optimizer/util/pathnode.c')
-rw-r--r-- | src/backend/optimizer/util/pathnode.c | 129 |
1 files changed, 89 insertions, 40 deletions
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index e1390aa1112..8ee6346e5b1 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.135 2007/01/05 22:19:32 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.136 2007/01/10 18:06:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -28,12 +28,14 @@ #include "parser/parsetree.h" #include "utils/memutils.h" #include "utils/selfuncs.h" +#include "utils/lsyscache.h" #include "utils/syscache.h" static List *translate_sub_tlist(List *tlist, int relid); -static bool query_is_distinct_for(Query *query, List *colnos); -static bool hash_safe_tlist(List *tlist); +static bool query_is_distinct_for(Query *query, List *colnos, List *opids); +static Oid distinct_col_search(int colno, List *colnos, List *opids); +static bool hash_safe_operators(List *opids); /***************************************************************************** @@ -733,6 +735,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath) Path agg_path; /* dummy for result of cost_agg */ MemoryContext oldcontext; List *sub_targetlist; + List *in_operators; ListCell *l; int numCols; @@ -769,9 +772,11 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath) /* * Try to identify the targetlist that will actually be unique-ified. In * current usage, this routine is only used for sub-selects of IN clauses, - * so we should be able to find the tlist in in_info_list. + * so we should be able to find the tlist in in_info_list. Get the IN + * clause's operators, too, because they determine what "unique" means. */ sub_targetlist = NIL; + in_operators = NIL; foreach(l, root->in_info_list) { InClauseInfo *ininfo = (InClauseInfo *) lfirst(l); @@ -779,6 +784,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath) if (bms_equal(ininfo->righthand, rel->relids)) { sub_targetlist = ininfo->sub_targetlist; + in_operators = ininfo->in_operators; break; } } @@ -801,7 +807,8 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath) sub_tlist_colnos = translate_sub_tlist(sub_targetlist, rel->relid); if (sub_tlist_colnos && - query_is_distinct_for(rte->subquery, sub_tlist_colnos)) + query_is_distinct_for(rte->subquery, + sub_tlist_colnos, in_operators)) { pathnode->umethod = UNIQUE_PATH_NOOP; pathnode->rows = rel->rows; @@ -847,11 +854,11 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath) /* * Is it safe to use a hashed implementation? If so, estimate and compare - * costs. We only try this if we know the targetlist for sure (else we - * can't be sure about the datatypes involved). + * costs. We only try this if we know the IN operators, else we can't + * check their hashability. */ pathnode->umethod = UNIQUE_PATH_SORT; - if (enable_hashagg && sub_targetlist && hash_safe_tlist(sub_targetlist)) + if (enable_hashagg && in_operators && hash_safe_operators(in_operators)) { /* * Estimate the overhead per hashtable entry at 64 bytes (same as in @@ -924,16 +931,25 @@ translate_sub_tlist(List *tlist, int relid) * * colnos is an integer list of output column numbers (resno's). We are * interested in whether rows consisting of just these columns are certain - * to be distinct. + * to be distinct. "Distinctness" is defined according to whether the + * corresponding upper-level equality operators listed in opids would think + * the values are distinct. (Note: the opids entries could be cross-type + * operators, and thus not exactly the equality operators that the subquery + * would use itself. We assume that the subquery is compatible if these + * operators appear in the same btree opfamily as the ones the subquery uses.) */ static bool -query_is_distinct_for(Query *query, List *colnos) +query_is_distinct_for(Query *query, List *colnos, List *opids) { ListCell *l; + Oid opid; + + Assert(list_length(colnos) == list_length(opids)); /* * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the - * columns in the DISTINCT clause appear in colnos. + * columns in the DISTINCT clause appear in colnos and operator + * semantics match. */ if (query->distinctClause) { @@ -943,7 +959,9 @@ query_is_distinct_for(Query *query, List *colnos) TargetEntry *tle = get_sortgroupclause_tle(scl, query->targetList); - if (!list_member_int(colnos, tle->resno)) + opid = distinct_col_search(tle->resno, colnos, opids); + if (!OidIsValid(opid) || + !ops_in_same_btree_opfamily(opid, scl->sortop)) break; /* exit early if no match */ } if (l == NULL) /* had matches for all? */ @@ -952,7 +970,7 @@ query_is_distinct_for(Query *query, List *colnos) /* * Similarly, GROUP BY guarantees uniqueness if all the grouped columns - * appear in colnos. + * appear in colnos and operator semantics match. */ if (query->groupClause) { @@ -962,7 +980,9 @@ query_is_distinct_for(Query *query, List *colnos) TargetEntry *tle = get_sortgroupclause_tle(grpcl, query->targetList); - if (!list_member_int(colnos, tle->resno)) + opid = distinct_col_search(tle->resno, colnos, opids); + if (!OidIsValid(opid) || + !ops_in_same_btree_opfamily(opid, grpcl->sortop)) break; /* exit early if no match */ } if (l == NULL) /* had matches for all? */ @@ -972,7 +992,7 @@ query_is_distinct_for(Query *query, List *colnos) { /* * If we have no GROUP BY, but do have aggregates or HAVING, then the - * result is at most one row so it's surely unique. + * result is at most one row so it's surely unique, for any operators. */ if (query->hasAggs || query->havingQual) return true; @@ -980,7 +1000,13 @@ query_is_distinct_for(Query *query, List *colnos) /* * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row, - * except with ALL + * except with ALL. + * + * XXX this code knows that prepunion.c will adopt the default ordering + * operator for each column datatype as the sortop. It'd probably be + * better if these operators were chosen at parse time and stored into + * the parsetree, instead of leaving bits of the planner to decide + * semantics. */ if (query->setOperations) { @@ -996,8 +1022,13 @@ query_is_distinct_for(Query *query, List *colnos) { TargetEntry *tle = (TargetEntry *) lfirst(l); - if (!tle->resjunk && - !list_member_int(colnos, tle->resno)) + if (tle->resjunk) + continue; /* ignore resjunk columns */ + + opid = distinct_col_search(tle->resno, colnos, opids); + if (!OidIsValid(opid) || + !ops_in_same_btree_opfamily(opid, + ordering_oper_opid(exprType((Node *) tle->expr)))) break; /* exit early if no match */ } if (l == NULL) /* had matches for all? */ @@ -1014,31 +1045,46 @@ query_is_distinct_for(Query *query, List *colnos) } /* - * hash_safe_tlist - can datatypes of given tlist be hashed? + * distinct_col_search - subroutine for query_is_distinct_for * - * We assume hashed aggregation will work if the datatype's equality operator - * is marked hashjoinable. + * If colno is in colnos, return the corresponding element of opids, + * else return InvalidOid. (We expect colnos does not contain duplicates, + * so the result is well-defined.) + */ +static Oid +distinct_col_search(int colno, List *colnos, List *opids) +{ + ListCell *lc1, + *lc2; + + forboth(lc1, colnos, lc2, opids) + { + if (colno == lfirst_int(lc1)) + return lfirst_oid(lc2); + } + return InvalidOid; +} + +/* + * hash_safe_operators - can all the specified IN operators be hashed? * - * XXX this probably should be somewhere else. See also hash_safe_grouping - * in plan/planner.c. + * We assume hashed aggregation will work if each IN operator is marked + * hashjoinable. If the IN operators are cross-type, this could conceivably + * fail: the aggregation will need a hashable equality operator for the RHS + * datatype --- but it's pretty hard to conceive of a hash opclass that has + * cross-type hashing without support for hashing the individual types, so + * we don't expend cycles here to support the case. We could check + * get_compatible_hash_operator() instead of just op_hashjoinable(), but the + * former is a significantly more expensive test. */ static bool -hash_safe_tlist(List *tlist) +hash_safe_operators(List *opids) { - ListCell *tl; + ListCell *lc; - foreach(tl, tlist) + foreach(lc, opids) { - Node *expr = (Node *) lfirst(tl); - Operator optup; - bool oprcanhash; - - optup = equality_oper(exprType(expr), true); - if (!optup) - return false; - oprcanhash = ((Form_pg_operator) GETSTRUCT(optup))->oprcanhash; - ReleaseSysCache(optup); - if (!oprcanhash) + if (!op_hashjoinable(lfirst_oid(lc))) return false; } return true; @@ -1156,6 +1202,7 @@ create_nestloop_path(PlannerInfo *root, * ordering for each merge clause * 'mergestrategies' are the btree operator strategies identifying the merge * ordering for each merge clause + * 'mergenullsfirst' are the nulls first/last flags for each merge clause * 'outersortkeys' are the sort varkeys for the outer relation * 'innersortkeys' are the sort varkeys for the inner relation */ @@ -1168,8 +1215,9 @@ create_mergejoin_path(PlannerInfo *root, List *restrict_clauses, List *pathkeys, List *mergeclauses, - List *mergefamilies, - List *mergestrategies, + Oid *mergefamilies, + int *mergestrategies, + bool *mergenullsfirst, List *outersortkeys, List *innersortkeys) { @@ -1210,8 +1258,9 @@ create_mergejoin_path(PlannerInfo *root, pathnode->jpath.joinrestrictinfo = restrict_clauses; pathnode->jpath.path.pathkeys = pathkeys; pathnode->path_mergeclauses = mergeclauses; - pathnode->path_mergefamilies = mergefamilies; - pathnode->path_mergestrategies = mergestrategies; + pathnode->path_mergeFamilies = mergefamilies; + pathnode->path_mergeStrategies = mergestrategies; + pathnode->path_mergeNullsFirst = mergenullsfirst; pathnode->outersortkeys = outersortkeys; pathnode->innersortkeys = innersortkeys; |