diff options
Diffstat (limited to 'src/backend/optimizer/prep/prepunion.c')
-rw-r--r-- | src/backend/optimizer/prep/prepunion.c | 282 |
1 files changed, 235 insertions, 47 deletions
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 31ba005edb7..750d59a9515 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -22,7 +22,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.149 2008/08/02 21:32:00 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.150 2008/08/07 01:11:50 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -32,8 +32,12 @@ #include "access/heapam.h" #include "catalog/namespace.h" #include "catalog/pg_type.h" +#include "miscadmin.h" #include "nodes/makefuncs.h" #include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" #include "optimizer/plancat.h" #include "optimizer/planmain.h" #include "optimizer/planner.h" @@ -61,6 +65,13 @@ static List *recurse_union_children(Node *setOp, PlannerInfo *root, double tuple_fraction, SetOperationStmt *top_union, List *refnames_tlist); +static Plan *make_union_unique(SetOperationStmt *op, Plan *plan, + PlannerInfo *root, double tuple_fraction, + List **sortClauses); +static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses, + Plan *input_plan, + double tuple_fraction, double dNumDistinctRows, + const char *construct); static List *generate_setop_tlist(List *colTypes, int flag, Index varno, bool hack_constants, @@ -69,7 +80,7 @@ static List *generate_setop_tlist(List *colTypes, int flag, static List *generate_append_tlist(List *colTypes, bool flag, List *input_plans, List *refnames_tlist); -static List *generate_setop_sortlist(List *targetlist); +static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist); static void expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti); static void make_inh_translation_lists(Relation oldrelation, @@ -99,7 +110,8 @@ static List *adjust_inherited_tlist(List *tlist, * top level has already been factored into tuple_fraction. * * *sortClauses is an output argument: it is set to a list of SortGroupClauses - * representing the result ordering of the topmost set operation. + * representing the result ordering of the topmost set operation. (This will + * be NIL if the output isn't ordered.) */ Plan * plan_set_operations(PlannerInfo *root, double tuple_fraction, @@ -287,8 +299,8 @@ generate_union_plan(SetOperationStmt *op, PlannerInfo *root, /* * If any of my children are identical UNION nodes (same op, all-flag, and * colTypes) then they can be merged into this node so that we generate - * only one Append and Sort for the lot. Recurse to find such nodes and - * compute their children's plans. + * only one Append and unique-ification for the lot. Recurse to find such + * nodes and compute their children's plans. */ planlist = list_concat(recurse_union_children(op->larg, root, tuple_fraction, @@ -314,22 +326,12 @@ generate_union_plan(SetOperationStmt *op, PlannerInfo *root, /* * For UNION ALL, we just need the Append plan. For UNION, need to add - * Sort and Unique nodes to produce unique output. + * node(s) to remove duplicates. */ - if (!op->all) - { - List *sortList; - - sortList = generate_setop_sortlist(tlist); - if (sortList) - { - plan = (Plan *) make_sort_from_sortclauses(root, sortList, plan); - plan = (Plan *) make_unique(plan, sortList); - } - *sortClauses = sortList; - } + if (op->all) + *sortClauses = NIL; /* result of UNION ALL is always unsorted */ else - *sortClauses = NIL; + plan = make_union_unique(op, plan, root, tuple_fraction, sortClauses); return plan; } @@ -346,7 +348,7 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root, *rplan, *plan; List *tlist, - *sortList, + *groupList, *planlist, *child_sortclauses; SetOpCmd cmd; @@ -381,19 +383,24 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root, */ plan = (Plan *) make_append(planlist, false, tlist); - /* - * Sort the child results, then add a SetOp plan node to generate the - * correct output. - */ - sortList = generate_setop_sortlist(tlist); + /* Identify the grouping semantics */ + groupList = generate_setop_grouplist(op, tlist); - if (sortList == NIL) /* nothing to sort on? */ + /* punt if nothing to group on (can this happen?) */ + if (groupList == NIL) { *sortClauses = NIL; return plan; } - plan = (Plan *) make_sort_from_sortclauses(root, sortList, plan); + /* + * Decide whether to hash or sort, and add a sort node if needed. + */ + plan = (Plan *) make_sort_from_sortclauses(root, groupList, plan); + + /* + * Finally, add a SetOp plan node to generate the correct output. + */ switch (op->op) { case SETOP_INTERSECT: @@ -403,14 +410,13 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root, cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT; break; default: - elog(ERROR, "unrecognized set op: %d", - (int) op->op); + elog(ERROR, "unrecognized set op: %d", (int) op->op); cmd = SETOPCMD_INTERSECT; /* keep compiler quiet */ break; } - plan = (Plan *) make_setop(cmd, plan, sortList, list_length(op->colTypes) + 1); + plan = (Plan *) make_setop(cmd, plan, groupList, list_length(op->colTypes) + 1); - *sortClauses = sortList; + *sortClauses = groupList; return plan; } @@ -466,6 +472,171 @@ recurse_union_children(Node *setOp, PlannerInfo *root, } /* + * Add nodes to the given plan tree to unique-ify the result of a UNION. + */ +static Plan * +make_union_unique(SetOperationStmt *op, Plan *plan, + PlannerInfo *root, double tuple_fraction, + List **sortClauses) +{ + List *groupList; + double dNumDistinctRows; + long numDistinctRows; + + /* Identify the grouping semantics */ + groupList = generate_setop_grouplist(op, plan->targetlist); + + /* punt if nothing to group on (can this happen?) */ + if (groupList == NIL) + { + *sortClauses = NIL; + return plan; + } + + /* + * XXX for the moment, take the number of distinct groups as being the + * total input size, ie, the worst case. This is too conservative, but + * we don't want to risk having the hashtable overrun memory; also, + * it's not clear how to get a decent estimate of the true size. One + * should note as well the propensity of novices to write UNION rather + * than UNION ALL even when they don't expect any duplicates... + */ + dNumDistinctRows = plan->plan_rows; + + /* Also convert to long int --- but 'ware overflow! */ + numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX); + + /* Decide whether to hash or sort */ + if (choose_hashed_setop(root, groupList, plan, + tuple_fraction, dNumDistinctRows, + "UNION")) + { + /* Hashed aggregate plan --- no sort needed */ + plan = (Plan *) make_agg(root, + plan->targetlist, + NIL, + AGG_HASHED, + list_length(groupList), + extract_grouping_cols(groupList, + plan->targetlist), + extract_grouping_ops(groupList), + numDistinctRows, + 0, + plan); + /* Hashed aggregation produces randomly-ordered results */ + *sortClauses = NIL; + } + else + { + /* Sort and Unique */ + plan = (Plan *) make_sort_from_sortclauses(root, groupList, plan); + plan = (Plan *) make_unique(plan, groupList); + plan->plan_rows = dNumDistinctRows; + /* We know the sort order of the result */ + *sortClauses = groupList; + } + + return plan; +} + +/* + * choose_hashed_setop - should we use hashing for a set operation? + */ +static bool +choose_hashed_setop(PlannerInfo *root, List *groupClauses, + Plan *input_plan, + double tuple_fraction, double dNumDistinctRows, + const char *construct) +{ + int numDistinctCols = list_length(groupClauses); + bool can_sort; + bool can_hash; + Size hashentrysize; + List *needed_pathkeys; + Path hashed_p; + Path sorted_p; + + /* Check whether the operators support sorting or hashing */ + can_sort = grouping_is_sortable(groupClauses); + can_hash = grouping_is_hashable(groupClauses); + if (can_hash && can_sort) + { + /* we have a meaningful choice to make, continue ... */ + } + else if (can_hash) + return true; + else if (can_sort) + return false; + else + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + /* translator: %s is UNION, INTERSECT, or EXCEPT */ + errmsg("could not implement %s", construct), + errdetail("Some of the datatypes only support hashing, while others only support sorting."))); + + /* Prefer sorting when enable_hashagg is off */ + if (!enable_hashagg) + return false; + + /* + * Don't do it if it doesn't look like the hashtable will fit into + * work_mem. + */ + hashentrysize = MAXALIGN(input_plan->plan_width) + MAXALIGN(sizeof(MinimalTupleData)); + + if (hashentrysize * dNumDistinctRows > work_mem * 1024L) + return false; + + /* + * See if the estimated cost is no more than doing it the other way. + * + * We need to consider input_plan + hashagg versus input_plan + sort + + * group. Note that the actual result plan might involve a SetOp or + * Unique node, not Agg or Group, but the cost estimates for Agg and Group + * should be close enough for our purposes here. + * + * These path variables are dummies that just hold cost fields; we don't + * make actual Paths for these steps. + */ + cost_agg(&hashed_p, root, AGG_HASHED, 0, + numDistinctCols, dNumDistinctRows, + input_plan->startup_cost, input_plan->total_cost, + input_plan->plan_rows); + + /* + * Now for the sorted case. Note that the input is *always* unsorted, + * since it was made by appending unrelated sub-relations together. + */ + sorted_p.startup_cost = input_plan->startup_cost; + sorted_p.total_cost = input_plan->total_cost; + /* XXX this is more expensive than cost_sort really needs: */ + needed_pathkeys = make_pathkeys_for_sortclauses(root, + groupClauses, + input_plan->targetlist, + true); + cost_sort(&sorted_p, root, needed_pathkeys, sorted_p.total_cost, + input_plan->plan_rows, input_plan->plan_width, -1.0); + cost_group(&sorted_p, root, numDistinctCols, dNumDistinctRows, + sorted_p.startup_cost, sorted_p.total_cost, + input_plan->plan_rows); + + /* + * Now make the decision using the top-level tuple fraction. First we + * have to convert an absolute count (LIMIT) into fractional form. + */ + if (tuple_fraction >= 1.0) + tuple_fraction /= dNumDistinctRows; + + if (compare_fractional_path_costs(&hashed_p, &sorted_p, + tuple_fraction) < 0) + { + /* Hashed is cheaper, so use it */ + return true; + } + return false; +} + +/* * Generate targetlist for a set-operation plan node * * colTypes: column datatypes for non-junk columns @@ -677,30 +848,47 @@ generate_append_tlist(List *colTypes, bool flag, } /* - * generate_setop_sortlist - * Build a SortGroupClause list enumerating all the non-resjunk - * tlist entries, using default ordering properties. + * generate_setop_grouplist + * Build a SortGroupClause list defining the sort/grouping properties + * of the setop's output columns. * - * For now, we require all the items to be sortable. Eventually we - * should implement hashing setops and allow hash-only datatypes. + * Parse analysis already determined the properties and built a suitable + * list, except that the entries do not have sortgrouprefs set because + * the parser output representation doesn't include a tlist for each + * setop. So what we need to do here is copy that list and install + * proper sortgrouprefs into it and into the targetlist. */ static List * -generate_setop_sortlist(List *targetlist) +generate_setop_grouplist(SetOperationStmt *op, List *targetlist) { - List *sortlist = NIL; - ListCell *l; + List *grouplist = (List *) copyObject(op->groupClauses); + ListCell *lg; + ListCell *lt; + Index refno = 1; - foreach(l, targetlist) + lg = list_head(grouplist); + foreach(lt, targetlist) { - TargetEntry *tle = (TargetEntry *) lfirst(l); + TargetEntry *tle = (TargetEntry *) lfirst(lt); + SortGroupClause *sgc; - if (!tle->resjunk) - sortlist = addTargetToGroupList(NULL, tle, - sortlist, targetlist, - true, /* XXX fixme someday */ - false); + /* tlist shouldn't have any sortgrouprefs yet */ + Assert(tle->ressortgroupref == 0); + + if (tle->resjunk) + continue; /* ignore resjunk columns */ + + /* non-resjunk columns should have grouping clauses */ + Assert(lg != NULL); + sgc = (SortGroupClause *) lfirst(lg); + lg = lnext(lg); + Assert(sgc->tleSortGroupRef == 0); + + /* we could use assignSortGroupRef here, but seems a bit silly */ + sgc->tleSortGroupRef = tle->ressortgroupref = refno++; } - return sortlist; + Assert(lg == NULL); + return grouplist; } |