diff options
Diffstat (limited to 'src/backend/optimizer')
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 10 | ||||
-rw-r--r-- | src/backend/optimizer/path/joinpath.c | 71 | ||||
-rw-r--r-- | src/backend/optimizer/plan/createplan.c | 114 | ||||
-rw-r--r-- | src/backend/optimizer/plan/planner.c | 92 | ||||
-rw-r--r-- | src/backend/optimizer/plan/subselect.c | 41 | ||||
-rw-r--r-- | src/backend/optimizer/util/clauses.c | 3 | ||||
-rw-r--r-- | src/backend/optimizer/util/pathnode.c | 129 |
7 files changed, 321 insertions, 139 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 69d1a890e4b..e1c003b5047 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -54,7 +54,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.173 2007/01/08 16:09:22 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.174 2007/01/10 18:06:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1258,8 +1258,8 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) Path *outer_path = path->jpath.outerjoinpath; Path *inner_path = path->jpath.innerjoinpath; List *mergeclauses = path->path_mergeclauses; - List *mergefamilies = path->path_mergefamilies; - List *mergestrategies = path->path_mergestrategies; + Oid *mergeFamilies = path->path_mergeFamilies; + int *mergeStrategies = path->path_mergeStrategies; List *outersortkeys = path->outersortkeys; List *innersortkeys = path->innersortkeys; Cost startup_cost = 0; @@ -1357,8 +1357,8 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) firstclause = (RestrictInfo *) linitial(mergeclauses); if (firstclause->left_mergescansel < 0) /* not computed yet? */ mergejoinscansel(root, (Node *) firstclause->clause, - linitial_oid(mergefamilies), - linitial_int(mergestrategies), + mergeFamilies[0], + mergeStrategies[0], &firstclause->left_mergescansel, &firstclause->right_mergescansel); diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index bde38af0ddd..2885b021546 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.109 2007/01/05 22:19:31 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.110 2007/01/10 18:06:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -40,8 +40,10 @@ static List *select_mergejoin_clauses(RelOptInfo *joinrel, RelOptInfo *innerrel, List *restrictlist, JoinType jointype); -static void build_mergejoin_strat_lists(List *mergeclauses, - List **mergefamilies, List **mergestrategies); +static void build_mergejoin_strat_arrays(List *mergeclauses, + Oid **mergefamilies, + int **mergestrategies, + bool **mergenullsfirst); /* @@ -228,8 +230,9 @@ sort_inner_and_outer(PlannerInfo *root, List *front_pathkey = (List *) lfirst(l); List *cur_pathkeys; List *cur_mergeclauses; - List *mergefamilies; - List *mergestrategies; + Oid *mergefamilies; + int *mergestrategies; + bool *mergenullsfirst; List *outerkeys; List *innerkeys; List *merge_pathkeys; @@ -275,8 +278,10 @@ sort_inner_and_outer(PlannerInfo *root, outerkeys); /* Build opfamily info for execution */ - build_mergejoin_strat_lists(cur_mergeclauses, - &mergefamilies, &mergestrategies); + build_mergejoin_strat_arrays(cur_mergeclauses, + &mergefamilies, + &mergestrategies, + &mergenullsfirst); /* * And now we can make the path. @@ -292,6 +297,7 @@ sort_inner_and_outer(PlannerInfo *root, cur_mergeclauses, mergefamilies, mergestrategies, + mergenullsfirst, outerkeys, innerkeys)); } @@ -421,8 +427,9 @@ match_unsorted_outer(PlannerInfo *root, Path *outerpath = (Path *) lfirst(l); List *merge_pathkeys; List *mergeclauses; - List *mergefamilies; - List *mergestrategies; + Oid *mergefamilies; + int *mergestrategies; + bool *mergenullsfirst; List *innersortkeys; List *trialsortkeys; Path *cheapest_startup_inner; @@ -530,8 +537,10 @@ match_unsorted_outer(PlannerInfo *root, innerrel); /* Build opfamily info for execution */ - build_mergejoin_strat_lists(mergeclauses, - &mergefamilies, &mergestrategies); + build_mergejoin_strat_arrays(mergeclauses, + &mergefamilies, + &mergestrategies, + &mergenullsfirst); /* * Generate a mergejoin on the basis of sorting the cheapest inner. @@ -550,6 +559,7 @@ match_unsorted_outer(PlannerInfo *root, mergeclauses, mergefamilies, mergestrategies, + mergenullsfirst, NIL, innersortkeys)); @@ -610,8 +620,10 @@ match_unsorted_outer(PlannerInfo *root, newclauses = mergeclauses; /* Build opfamily info for execution */ - build_mergejoin_strat_lists(newclauses, - &mergefamilies, &mergestrategies); + build_mergejoin_strat_arrays(newclauses, + &mergefamilies, + &mergestrategies, + &mergenullsfirst); add_path(joinrel, (Path *) create_mergejoin_path(root, @@ -624,6 +636,7 @@ match_unsorted_outer(PlannerInfo *root, newclauses, mergefamilies, mergestrategies, + mergenullsfirst, NIL, NIL)); cheapest_total_inner = innerpath; @@ -661,8 +674,10 @@ match_unsorted_outer(PlannerInfo *root, } /* Build opfamily info for execution */ - build_mergejoin_strat_lists(newclauses, - &mergefamilies, &mergestrategies); + build_mergejoin_strat_arrays(newclauses, + &mergefamilies, + &mergestrategies, + &mergenullsfirst); add_path(joinrel, (Path *) create_mergejoin_path(root, @@ -675,6 +690,7 @@ match_unsorted_outer(PlannerInfo *root, newclauses, mergefamilies, mergestrategies, + mergenullsfirst, NIL, NIL)); } @@ -981,20 +997,26 @@ select_mergejoin_clauses(RelOptInfo *joinrel, } /* - * Temporary hack to build opfamily and strategy lists needed for mergejoin + * Temporary hack to build opfamily and strategy info needed for mergejoin * by the executor. We need to rethink the planner's handling of merge * planning so that it can deal with multiple possible merge orders, but * that's not done yet. */ static void -build_mergejoin_strat_lists(List *mergeclauses, - List **mergefamilies, List **mergestrategies) +build_mergejoin_strat_arrays(List *mergeclauses, + Oid **mergefamilies, + int **mergestrategies, + bool **mergenullsfirst) { + int nClauses = list_length(mergeclauses); + int i; ListCell *l; - *mergefamilies = NIL; - *mergestrategies = NIL; + *mergefamilies = (Oid *) palloc(nClauses * sizeof(Oid)); + *mergestrategies = (int *) palloc(nClauses * sizeof(int)); + *mergenullsfirst = (bool *) palloc(nClauses * sizeof(bool)); + i = 0; foreach(l, mergeclauses) { RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); @@ -1003,11 +1025,16 @@ build_mergejoin_strat_lists(List *mergeclauses, * We do not need to worry about whether the mergeclause will be * commuted at runtime --- it's the same opfamily either way. */ - *mergefamilies = lappend_oid(*mergefamilies, restrictinfo->mergeopfamily); + (*mergefamilies)[i] = restrictinfo->mergeopfamily; /* * For the moment, strategy must always be LessThan --- see * hack version of get_op_mergejoin_info */ - *mergestrategies = lappend_int(*mergestrategies, BTLessStrategyNumber); + (*mergestrategies)[i] = BTLessStrategyNumber; + + /* And we only allow NULLS LAST, too */ + (*mergenullsfirst)[i] = false; + + i++; } } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 8f1d8e81cc4..9a8204392a1 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.220 2007/01/09 02:14:12 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.221 2007/01/10 18:06:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -113,7 +113,10 @@ static HashJoin *make_hashjoin(List *tlist, static Hash *make_hash(Plan *lefttree); static MergeJoin *make_mergejoin(List *tlist, List *joinclauses, List *otherclauses, - List *mergeclauses, List *mergefamilies, List *mergestrategies, + List *mergeclauses, + Oid *mergefamilies, + int *mergestrategies, + bool *mergenullsfirst, Plan *lefttree, Plan *righttree, JoinType jointype); static Sort *make_sort(PlannerInfo *root, Plan *lefttree, int numCols, @@ -595,6 +598,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path) Plan *plan; Plan *subplan; List *uniq_exprs; + List *in_operators; List *newtlist; int nextresno; bool newitems; @@ -626,10 +630,12 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path) * To find the correct list of values to unique-ify, we look in the * information saved for IN expressions. If this code is ever used in * other scenarios, some other way of finding what to unique-ify will - * be needed. + * be needed. The IN clause's operators are needed too, since they + * determine what the meaning of "unique" is in this context. *---------- */ uniq_exprs = NIL; /* just to keep compiler quiet */ + in_operators = NIL; foreach(l, root->in_info_list) { InClauseInfo *ininfo = (InClauseInfo *) lfirst(l); @@ -637,6 +643,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path) if (bms_equal(ininfo->righthand, best_path->path.parent->relids)) { uniq_exprs = ininfo->sub_targetlist; + in_operators = ininfo->in_operators; break; } } @@ -687,8 +694,8 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path) newtlist = subplan->targetlist; numGroupCols = list_length(uniq_exprs); groupColIdx = (AttrNumber *) palloc(numGroupCols * sizeof(AttrNumber)); - groupColPos = 0; + groupColPos = 0; foreach(l, uniq_exprs) { Node *uniqexpr = lfirst(l); @@ -703,10 +710,31 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path) if (best_path->umethod == UNIQUE_PATH_HASH) { long numGroups; + Oid *groupOperators; numGroups = (long) Min(best_path->rows, (double) LONG_MAX); /* + * Get the (presumed hashable) equality operators for the Agg node + * to use. Normally these are the same as the IN clause operators, + * but if those are cross-type operators then the equality operators + * are the ones for the IN clause operators' RHS datatype. + */ + groupOperators = (Oid *) palloc(numGroupCols * sizeof(Oid)); + groupColPos = 0; + foreach(l, in_operators) + { + Oid in_oper = lfirst_oid(l); + Oid eq_oper; + + eq_oper = get_compatible_hash_operator(in_oper, false); + if (!OidIsValid(eq_oper)) /* shouldn't happen */ + elog(ERROR, "could not find compatible hash operator for operator %u", + in_oper); + groupOperators[groupColPos++] = eq_oper; + } + + /* * Since the Agg node is going to project anyway, we can give it the * minimum output tlist, without any stuff we might have added to the * subplan tlist. @@ -717,6 +745,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path) AGG_HASHED, numGroupCols, groupColIdx, + groupOperators, numGroups, 0, subplan); @@ -725,18 +754,29 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path) { List *sortList = NIL; - for (groupColPos = 0; groupColPos < numGroupCols; groupColPos++) + /* Create an ORDER BY list to sort the input compatibly */ + groupColPos = 0; + foreach(l, in_operators) { + Oid in_oper = lfirst_oid(l); + Oid sortop; TargetEntry *tle; + SortClause *sortcl; + sortop = get_ordering_op_for_equality_op(in_oper, false); + if (!OidIsValid(sortop)) /* shouldn't happen */ + elog(ERROR, "could not find ordering operator for equality operator %u", + in_oper); tle = get_tle_by_resno(subplan->targetlist, groupColIdx[groupColPos]); Assert(tle != NULL); - sortList = addTargetToSortList(NULL, tle, - sortList, subplan->targetlist, - SORTBY_DEFAULT, - SORTBY_NULLS_DEFAULT, - NIL, false); + sortcl = makeNode(SortClause); + sortcl->tleSortGroupRef = assignSortGroupRef(tle, + subplan->targetlist); + sortcl->sortop = sortop; + sortcl->nulls_first = false; + sortList = lappend(sortList, sortcl); + groupColPos++; } plan = (Plan *) make_sort_from_sortclauses(root, sortList, subplan); plan = (Plan *) make_unique(plan, sortList); @@ -1542,8 +1582,9 @@ create_mergejoin_plan(PlannerInfo *root, joinclauses, otherclauses, mergeclauses, - best_path->path_mergefamilies, - best_path->path_mergestrategies, + best_path->path_mergeFamilies, + best_path->path_mergeStrategies, + best_path->path_mergeNullsFirst, outer_plan, inner_plan, best_path->jpath.jointype); @@ -2335,8 +2376,9 @@ make_mergejoin(List *tlist, List *joinclauses, List *otherclauses, List *mergeclauses, - List *mergefamilies, - List *mergestrategies, + Oid *mergefamilies, + int *mergestrategies, + bool *mergenullsfirst, Plan *lefttree, Plan *righttree, JoinType jointype) @@ -2350,8 +2392,9 @@ make_mergejoin(List *tlist, plan->lefttree = lefttree; plan->righttree = righttree; node->mergeclauses = mergeclauses; - node->mergefamilies = mergefamilies; - node->mergestrategies = mergestrategies; + node->mergeFamilies = mergefamilies; + node->mergeStrategies = mergestrategies; + node->mergeNullsFirst = mergenullsfirst; node->join.jointype = jointype; node->join.joinqual = joinclauses; @@ -2613,7 +2656,7 @@ make_sort_from_sortclauses(PlannerInfo *root, List *sortcls, Plan *lefttree) * This might look like it could be merged with make_sort_from_sortclauses, * but presently we *must* use the grpColIdx[] array to locate sort columns, * because the child plan's tlist is not marked with ressortgroupref info - * appropriate to the grouping node. So, only the sort direction info + * appropriate to the grouping node. So, only the sort ordering info * is used from the GroupClause entries. */ Sort * @@ -2716,7 +2759,7 @@ materialize_finished_plan(Plan *subplan) Agg * make_agg(PlannerInfo *root, List *tlist, List *qual, AggStrategy aggstrategy, - int numGroupCols, AttrNumber *grpColIdx, + int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, long numGroups, int numAggs, Plan *lefttree) { @@ -2728,6 +2771,7 @@ make_agg(PlannerInfo *root, List *tlist, List *qual, node->aggstrategy = aggstrategy; node->numCols = numGroupCols; node->grpColIdx = grpColIdx; + node->grpOperators = grpOperators; node->numGroups = numGroups; copy_plan_costsize(plan, lefttree); /* only care about copying size */ @@ -2784,6 +2828,7 @@ make_group(PlannerInfo *root, List *qual, int numGroupCols, AttrNumber *grpColIdx, + Oid *grpOperators, double numGroups, Plan *lefttree) { @@ -2794,6 +2839,7 @@ make_group(PlannerInfo *root, node->numCols = numGroupCols; node->grpColIdx = grpColIdx; + node->grpOperators = grpOperators; copy_plan_costsize(plan, lefttree); /* only care about copying size */ cost_group(&group_path, root, @@ -2841,7 +2887,8 @@ make_group(PlannerInfo *root, /* * distinctList is a list of SortClauses, identifying the targetlist items - * that should be considered by the Unique filter. + * that should be considered by the Unique filter. The input path must + * already be sorted accordingly. */ Unique * make_unique(Plan *lefttree, List *distinctList) @@ -2851,6 +2898,7 @@ make_unique(Plan *lefttree, List *distinctList) int numCols = list_length(distinctList); int keyno = 0; AttrNumber *uniqColIdx; + Oid *uniqOperators; ListCell *slitem; copy_plan_costsize(plan, lefttree); @@ -2874,28 +2922,37 @@ make_unique(Plan *lefttree, List *distinctList) plan->righttree = NULL; /* - * convert SortClause list into array of attr indexes, as wanted by exec + * convert SortClause list into arrays of attr indexes and equality + * operators, as wanted by executor */ Assert(numCols > 0); uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); + uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols); foreach(slitem, distinctList) { SortClause *sortcl = (SortClause *) lfirst(slitem); TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist); - uniqColIdx[keyno++] = tle->resno; + uniqColIdx[keyno] = tle->resno; + uniqOperators[keyno] = get_equality_op_for_ordering_op(sortcl->sortop); + if (!OidIsValid(uniqOperators[keyno])) /* shouldn't happen */ + elog(ERROR, "could not find equality operator for ordering operator %u", + sortcl->sortop); + keyno++; } node->numCols = numCols; node->uniqColIdx = uniqColIdx; + node->uniqOperators = uniqOperators; return node; } /* * distinctList is a list of SortClauses, identifying the targetlist items - * that should be considered by the SetOp filter. + * that should be considered by the SetOp filter. The input path must + * already be sorted accordingly. */ SetOp * make_setop(SetOpCmd cmd, Plan *lefttree, @@ -2906,6 +2963,7 @@ make_setop(SetOpCmd cmd, Plan *lefttree, int numCols = list_length(distinctList); int keyno = 0; AttrNumber *dupColIdx; + Oid *dupOperators; ListCell *slitem; copy_plan_costsize(plan, lefttree); @@ -2930,22 +2988,30 @@ make_setop(SetOpCmd cmd, Plan *lefttree, plan->righttree = NULL; /* - * convert SortClause list into array of attr indexes, as wanted by exec + * convert SortClause list into arrays of attr indexes and equality + * operators, as wanted by executor */ Assert(numCols > 0); dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); + dupOperators = (Oid *) palloc(sizeof(Oid) * numCols); foreach(slitem, distinctList) { SortClause *sortcl = (SortClause *) lfirst(slitem); TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist); - dupColIdx[keyno++] = tle->resno; + dupColIdx[keyno] = tle->resno; + dupOperators[keyno] = get_equality_op_for_ordering_op(sortcl->sortop); + if (!OidIsValid(dupOperators[keyno])) /* shouldn't happen */ + elog(ERROR, "could not find equality operator for ordering operator %u", + sortcl->sortop); + keyno++; } node->cmd = cmd; node->numCols = numCols; node->dupColIdx = dupColIdx; + node->dupOperators = dupOperators; node->flagColIdx = flagColIdx; return node; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index a4de2dd36a5..5fcfc58bf9b 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.210 2007/01/05 22:19:32 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.211 2007/01/10 18:06:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -38,6 +38,7 @@ #include "parser/parse_expr.h" #include "parser/parse_oper.h" #include "parser/parsetree.h" +#include "utils/lsyscache.h" #include "utils/syscache.h" @@ -62,10 +63,11 @@ static bool is_dummy_plan(Plan *plan); static double preprocess_limit(PlannerInfo *root, double tuple_fraction, int64 *offset_est, int64 *count_est); +static Oid *extract_grouping_ops(List *groupClause); static bool choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, Path *cheapest_path, Path *sorted_path, - double dNumGroups, AggClauseCounts *agg_counts); -static bool hash_safe_grouping(PlannerInfo *root); + Oid *groupOperators, double dNumGroups, + AggClauseCounts *agg_counts); static List *make_subplanTargetList(PlannerInfo *root, List *tlist, AttrNumber **groupColIdx, bool *need_tlist_eval); static void locate_grouping_columns(PlannerInfo *root, @@ -750,6 +752,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) List *sub_tlist; List *group_pathkeys; AttrNumber *groupColIdx = NULL; + Oid *groupOperators = NULL; bool need_tlist_eval = true; QualCost tlist_cost; Path *cheapest_path; @@ -829,14 +832,17 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) sort_pathkeys = root->sort_pathkeys; /* - * If grouping, decide whether we want to use hashed grouping. + * If grouping, extract the grouping operators and decide whether we + * want to use hashed grouping. */ if (parse->groupClause) { + groupOperators = extract_grouping_ops(parse->groupClause); use_hashed_grouping = choose_hashed_grouping(root, tuple_fraction, cheapest_path, sorted_path, - dNumGroups, &agg_counts); + groupOperators, dNumGroups, + &agg_counts); /* Also convert # groups to long int --- but 'ware overflow! */ numGroups = (long) Min(dNumGroups, (double) LONG_MAX); @@ -956,6 +962,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) AGG_HASHED, numGroupCols, groupColIdx, + groupOperators, numGroups, agg_counts.numAggs, result_plan); @@ -999,6 +1006,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) aggstrategy, numGroupCols, groupColIdx, + groupOperators, numGroups, agg_counts.numAggs, result_plan); @@ -1027,6 +1035,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) (List *) parse->havingQual, numGroupCols, groupColIdx, + groupOperators, dNumGroups, result_plan); /* The Group node won't change sort ordering */ @@ -1338,12 +1347,41 @@ preprocess_limit(PlannerInfo *root, double tuple_fraction, } /* + * extract_grouping_ops - make an array of the equality operator OIDs + * for the GROUP BY clause + */ +static Oid * +extract_grouping_ops(List *groupClause) +{ + int numCols = list_length(groupClause); + int colno = 0; + Oid *groupOperators; + ListCell *glitem; + + groupOperators = (Oid *) palloc(sizeof(Oid) * numCols); + + foreach(glitem, groupClause) + { + GroupClause *groupcl = (GroupClause *) lfirst(glitem); + + groupOperators[colno] = get_equality_op_for_ordering_op(groupcl->sortop); + if (!OidIsValid(groupOperators[colno])) /* shouldn't happen */ + elog(ERROR, "could not find equality operator for ordering operator %u", + groupcl->sortop); + colno++; + } + + return groupOperators; +} + +/* * choose_hashed_grouping - should we use hashed grouping? */ static bool choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, Path *cheapest_path, Path *sorted_path, - double dNumGroups, AggClauseCounts *agg_counts) + Oid *groupOperators, double dNumGroups, + AggClauseCounts *agg_counts) { int numGroupCols = list_length(root->parse->groupClause); double cheapest_path_rows; @@ -1352,10 +1390,13 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, List *current_pathkeys; Path hashed_p; Path sorted_p; + int i; /* * Check can't-do-it conditions, including whether the grouping operators - * are hashjoinable. + * are hashjoinable. (We assume hashing is OK if they are marked + * oprcanhash. If there isn't actually a supporting hash function, + * the executor will complain at runtime.) * * Executor doesn't support hashed aggregation with DISTINCT aggregates. * (Doing so would imply storing *all* the input values in the hash table, @@ -1365,8 +1406,11 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, return false; if (agg_counts->numDistinctAggs != 0) return false; - if (!hash_safe_grouping(root)) - return false; + for (i = 0; i < numGroupCols; i++) + { + if (!op_hashjoinable(groupOperators[i])) + return false; + } /* * Don't do it if it doesn't look like the hashtable will fit into @@ -1471,36 +1515,6 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, return false; } -/* - * hash_safe_grouping - are grouping operators hashable? - * - * We assume hashed aggregation will work if the datatype's equality operator - * is marked hashjoinable. - */ -static bool -hash_safe_grouping(PlannerInfo *root) -{ - ListCell *gl; - - foreach(gl, root->parse->groupClause) - { - GroupClause *grpcl = (GroupClause *) lfirst(gl); - TargetEntry *tle = get_sortgroupclause_tle(grpcl, - root->parse->targetList); - Operator optup; - bool oprcanhash; - - optup = equality_oper(exprType((Node *) tle->expr), true); - if (!optup) - return false; - oprcanhash = ((Form_pg_operator) GETSTRUCT(optup))->oprcanhash; - ReleaseSysCache(optup); - if (!oprcanhash) - return false; - } - return true; -} - /*--------------- * make_subplanTargetList * Generate appropriate target list when grouping is required. diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index ed76612dc0a..7339445e046 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.116 2007/01/05 22:19:32 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.117 2007/01/10 18:06:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -671,11 +671,13 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink) { Query *parse = root->parse; Query *subselect = (Query *) sublink->subselect; + List *in_operators; Relids left_varnos; int rtindex; RangeTblEntry *rte; RangeTblRef *rtr; InClauseInfo *ininfo; + Node *result; /* * The sublink type must be "= ANY" --- that is, an IN operator. We @@ -689,15 +691,31 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink) return NULL; if (sublink->testexpr && IsA(sublink->testexpr, OpExpr)) { + Oid opno = ((OpExpr *) sublink->testexpr)->opno; List *opfamilies; List *opstrats; - get_op_btree_interpretation(((OpExpr *) sublink->testexpr)->opno, - &opfamilies, &opstrats); + get_op_btree_interpretation(opno, &opfamilies, &opstrats); if (!list_member_int(opstrats, ROWCOMPARE_EQ)) return NULL; + in_operators = list_make1_oid(opno); + } + else if (and_clause(sublink->testexpr)) + { + ListCell *lc; + + /* OK, but we need to extract the per-column operator OIDs */ + in_operators = NIL; + foreach(lc, ((BoolExpr *) sublink->testexpr)->args) + { + OpExpr *op = (OpExpr *) lfirst(lc); + + if (!IsA(op, OpExpr)) /* probably shouldn't happen */ + return NULL; + in_operators = lappend_oid(in_operators, op->opno); + } } - else if (!and_clause(sublink->testexpr)) + else return NULL; /* @@ -745,16 +763,23 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink) ininfo = makeNode(InClauseInfo); ininfo->lefthand = left_varnos; ininfo->righthand = bms_make_singleton(rtindex); - root->in_info_list = lappend(root->in_info_list, ininfo); + ininfo->in_operators = in_operators; /* * Build the result qual expression. As a side effect, * ininfo->sub_targetlist is filled with a list of Vars representing the * subselect outputs. */ - return convert_testexpr(sublink->testexpr, - rtindex, - &ininfo->sub_targetlist); + result = convert_testexpr(sublink->testexpr, + rtindex, + &ininfo->sub_targetlist); + + Assert(list_length(in_operators) == list_length(ininfo->sub_targetlist)); + + /* Add the completed node to the query's list */ + root->in_info_list = lappend(root->in_info_list, ininfo); + + return result; } /* diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 3250beafb67..4858f985cff 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.228 2007/01/09 02:14:13 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.229 2007/01/10 18:06:04 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -3995,6 +3995,7 @@ expression_tree_mutator(Node *node, FLATCOPY(newnode, ininfo, InClauseInfo); MUTATE(newnode->sub_targetlist, ininfo->sub_targetlist, List *); + /* Assume we need not make a copy of in_operators list */ return (Node *) newnode; } break; diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index e1390aa1112..8ee6346e5b1 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.135 2007/01/05 22:19:32 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.136 2007/01/10 18:06:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -28,12 +28,14 @@ #include "parser/parsetree.h" #include "utils/memutils.h" #include "utils/selfuncs.h" +#include "utils/lsyscache.h" #include "utils/syscache.h" static List *translate_sub_tlist(List *tlist, int relid); -static bool query_is_distinct_for(Query *query, List *colnos); -static bool hash_safe_tlist(List *tlist); +static bool query_is_distinct_for(Query *query, List *colnos, List *opids); +static Oid distinct_col_search(int colno, List *colnos, List *opids); +static bool hash_safe_operators(List *opids); /***************************************************************************** @@ -733,6 +735,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath) Path agg_path; /* dummy for result of cost_agg */ MemoryContext oldcontext; List *sub_targetlist; + List *in_operators; ListCell *l; int numCols; @@ -769,9 +772,11 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath) /* * Try to identify the targetlist that will actually be unique-ified. In * current usage, this routine is only used for sub-selects of IN clauses, - * so we should be able to find the tlist in in_info_list. + * so we should be able to find the tlist in in_info_list. Get the IN + * clause's operators, too, because they determine what "unique" means. */ sub_targetlist = NIL; + in_operators = NIL; foreach(l, root->in_info_list) { InClauseInfo *ininfo = (InClauseInfo *) lfirst(l); @@ -779,6 +784,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath) if (bms_equal(ininfo->righthand, rel->relids)) { sub_targetlist = ininfo->sub_targetlist; + in_operators = ininfo->in_operators; break; } } @@ -801,7 +807,8 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath) sub_tlist_colnos = translate_sub_tlist(sub_targetlist, rel->relid); if (sub_tlist_colnos && - query_is_distinct_for(rte->subquery, sub_tlist_colnos)) + query_is_distinct_for(rte->subquery, + sub_tlist_colnos, in_operators)) { pathnode->umethod = UNIQUE_PATH_NOOP; pathnode->rows = rel->rows; @@ -847,11 +854,11 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath) /* * Is it safe to use a hashed implementation? If so, estimate and compare - * costs. We only try this if we know the targetlist for sure (else we - * can't be sure about the datatypes involved). + * costs. We only try this if we know the IN operators, else we can't + * check their hashability. */ pathnode->umethod = UNIQUE_PATH_SORT; - if (enable_hashagg && sub_targetlist && hash_safe_tlist(sub_targetlist)) + if (enable_hashagg && in_operators && hash_safe_operators(in_operators)) { /* * Estimate the overhead per hashtable entry at 64 bytes (same as in @@ -924,16 +931,25 @@ translate_sub_tlist(List *tlist, int relid) * * colnos is an integer list of output column numbers (resno's). We are * interested in whether rows consisting of just these columns are certain - * to be distinct. + * to be distinct. "Distinctness" is defined according to whether the + * corresponding upper-level equality operators listed in opids would think + * the values are distinct. (Note: the opids entries could be cross-type + * operators, and thus not exactly the equality operators that the subquery + * would use itself. We assume that the subquery is compatible if these + * operators appear in the same btree opfamily as the ones the subquery uses.) */ static bool -query_is_distinct_for(Query *query, List *colnos) +query_is_distinct_for(Query *query, List *colnos, List *opids) { ListCell *l; + Oid opid; + + Assert(list_length(colnos) == list_length(opids)); /* * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the - * columns in the DISTINCT clause appear in colnos. + * columns in the DISTINCT clause appear in colnos and operator + * semantics match. */ if (query->distinctClause) { @@ -943,7 +959,9 @@ query_is_distinct_for(Query *query, List *colnos) TargetEntry *tle = get_sortgroupclause_tle(scl, query->targetList); - if (!list_member_int(colnos, tle->resno)) + opid = distinct_col_search(tle->resno, colnos, opids); + if (!OidIsValid(opid) || + !ops_in_same_btree_opfamily(opid, scl->sortop)) break; /* exit early if no match */ } if (l == NULL) /* had matches for all? */ @@ -952,7 +970,7 @@ query_is_distinct_for(Query *query, List *colnos) /* * Similarly, GROUP BY guarantees uniqueness if all the grouped columns - * appear in colnos. + * appear in colnos and operator semantics match. */ if (query->groupClause) { @@ -962,7 +980,9 @@ query_is_distinct_for(Query *query, List *colnos) TargetEntry *tle = get_sortgroupclause_tle(grpcl, query->targetList); - if (!list_member_int(colnos, tle->resno)) + opid = distinct_col_search(tle->resno, colnos, opids); + if (!OidIsValid(opid) || + !ops_in_same_btree_opfamily(opid, grpcl->sortop)) break; /* exit early if no match */ } if (l == NULL) /* had matches for all? */ @@ -972,7 +992,7 @@ query_is_distinct_for(Query *query, List *colnos) { /* * If we have no GROUP BY, but do have aggregates or HAVING, then the - * result is at most one row so it's surely unique. + * result is at most one row so it's surely unique, for any operators. */ if (query->hasAggs || query->havingQual) return true; @@ -980,7 +1000,13 @@ query_is_distinct_for(Query *query, List *colnos) /* * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row, - * except with ALL + * except with ALL. + * + * XXX this code knows that prepunion.c will adopt the default ordering + * operator for each column datatype as the sortop. It'd probably be + * better if these operators were chosen at parse time and stored into + * the parsetree, instead of leaving bits of the planner to decide + * semantics. */ if (query->setOperations) { @@ -996,8 +1022,13 @@ query_is_distinct_for(Query *query, List *colnos) { TargetEntry *tle = (TargetEntry *) lfirst(l); - if (!tle->resjunk && - !list_member_int(colnos, tle->resno)) + if (tle->resjunk) + continue; /* ignore resjunk columns */ + + opid = distinct_col_search(tle->resno, colnos, opids); + if (!OidIsValid(opid) || + !ops_in_same_btree_opfamily(opid, + ordering_oper_opid(exprType((Node *) tle->expr)))) break; /* exit early if no match */ } if (l == NULL) /* had matches for all? */ @@ -1014,31 +1045,46 @@ query_is_distinct_for(Query *query, List *colnos) } /* - * hash_safe_tlist - can datatypes of given tlist be hashed? + * distinct_col_search - subroutine for query_is_distinct_for * - * We assume hashed aggregation will work if the datatype's equality operator - * is marked hashjoinable. + * If colno is in colnos, return the corresponding element of opids, + * else return InvalidOid. (We expect colnos does not contain duplicates, + * so the result is well-defined.) + */ +static Oid +distinct_col_search(int colno, List *colnos, List *opids) +{ + ListCell *lc1, + *lc2; + + forboth(lc1, colnos, lc2, opids) + { + if (colno == lfirst_int(lc1)) + return lfirst_oid(lc2); + } + return InvalidOid; +} + +/* + * hash_safe_operators - can all the specified IN operators be hashed? * - * XXX this probably should be somewhere else. See also hash_safe_grouping - * in plan/planner.c. + * We assume hashed aggregation will work if each IN operator is marked + * hashjoinable. If the IN operators are cross-type, this could conceivably + * fail: the aggregation will need a hashable equality operator for the RHS + * datatype --- but it's pretty hard to conceive of a hash opclass that has + * cross-type hashing without support for hashing the individual types, so + * we don't expend cycles here to support the case. We could check + * get_compatible_hash_operator() instead of just op_hashjoinable(), but the + * former is a significantly more expensive test. */ static bool -hash_safe_tlist(List *tlist) +hash_safe_operators(List *opids) { - ListCell *tl; + ListCell *lc; - foreach(tl, tlist) + foreach(lc, opids) { - Node *expr = (Node *) lfirst(tl); - Operator optup; - bool oprcanhash; - - optup = equality_oper(exprType(expr), true); - if (!optup) - return false; - oprcanhash = ((Form_pg_operator) GETSTRUCT(optup))->oprcanhash; - ReleaseSysCache(optup); - if (!oprcanhash) + if (!op_hashjoinable(lfirst_oid(lc))) return false; } return true; @@ -1156,6 +1202,7 @@ create_nestloop_path(PlannerInfo *root, * ordering for each merge clause * 'mergestrategies' are the btree operator strategies identifying the merge * ordering for each merge clause + * 'mergenullsfirst' are the nulls first/last flags for each merge clause * 'outersortkeys' are the sort varkeys for the outer relation * 'innersortkeys' are the sort varkeys for the inner relation */ @@ -1168,8 +1215,9 @@ create_mergejoin_path(PlannerInfo *root, List *restrict_clauses, List *pathkeys, List *mergeclauses, - List *mergefamilies, - List *mergestrategies, + Oid *mergefamilies, + int *mergestrategies, + bool *mergenullsfirst, List *outersortkeys, List *innersortkeys) { @@ -1210,8 +1258,9 @@ create_mergejoin_path(PlannerInfo *root, pathnode->jpath.joinrestrictinfo = restrict_clauses; pathnode->jpath.path.pathkeys = pathkeys; pathnode->path_mergeclauses = mergeclauses; - pathnode->path_mergefamilies = mergefamilies; - pathnode->path_mergestrategies = mergestrategies; + pathnode->path_mergeFamilies = mergefamilies; + pathnode->path_mergeStrategies = mergestrategies; + pathnode->path_mergeNullsFirst = mergenullsfirst; pathnode->outersortkeys = outersortkeys; pathnode->innersortkeys = innersortkeys; |