diff options
Diffstat (limited to 'src/backend/optimizer/prep/prepunion.c')
-rw-r--r-- | src/backend/optimizer/prep/prepunion.c | 156 |
1 files changed, 89 insertions, 67 deletions
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 9c3822f19ad..1c43370005e 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -65,7 +65,7 @@ static List *plan_union_children(PlannerInfo *root, List **istrivial_tlist); static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel); static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses, - Path *input_path, + Path *lpath, Path *rpath, double dNumGroups, double dNumOutputRows, const char *construct); static List *generate_setop_tlist(List *colTypes, List *colCollations, @@ -315,8 +315,8 @@ recurse_set_operations(Node *setOp, PlannerInfo *root, * to the corresponding tlist entries of the subplan. However, since * the subplan was generated by generate_union_paths() or * generate_nonunion_paths(), and hence its tlist was generated by - * generate_append_tlist(), this will work. We just tell - * generate_setop_tlist() to use varno 0. + * generate_append_tlist() or generate_setop_tlist(), this will work. + * We just tell generate_setop_tlist() to use varno 0. */ if (flag >= 0 || !tlist_same_datatypes(*pTargetList, colTypes, junkOK) || @@ -1028,29 +1028,27 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, *path; List *lpath_tlist, *rpath_tlist, - *tlist_list, *tlist, - *groupList, - *pathlist; + *groupList; bool lpath_trivial_tlist, - rpath_trivial_tlist; + rpath_trivial_tlist, + result_trivial_tlist; double dLeftGroups, dRightGroups, dNumGroups, dNumOutputRows; bool use_hash; SetOpCmd cmd; - int firstFlag; /* * Tell children to fetch all tuples. */ root->tuple_fraction = 0.0; - /* Recurse on children, ensuring their outputs are marked */ + /* Recurse on children */ lrel = recurse_set_operations(op->larg, root, op->colTypes, op->colCollations, - false, 0, + false, -1, refnames_tlist, &lpath_tlist, &lpath_trivial_tlist); @@ -1060,10 +1058,9 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, else dLeftGroups = lrel->rows; - lpath = lrel->cheapest_total_path; rrel = recurse_set_operations(op->rarg, root, op->colTypes, op->colCollations, - false, 1, + false, -1, refnames_tlist, &rpath_tlist, &rpath_trivial_tlist); @@ -1073,41 +1070,51 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, else dRightGroups = rrel->rows; - rpath = rrel->cheapest_total_path; - /* Undo effects of forcing tuple_fraction to 0 */ root->tuple_fraction = save_fraction; /* * For EXCEPT, we must put the left input first. For INTERSECT, either * order should give the same results, and we prefer to put the smaller - * input first in order to minimize the size of the hash table in the - * hashing case. "Smaller" means the one with the fewer groups. + * input first in order to (a) minimize the size of the hash table in the + * hashing case, and (b) improve our chances of exploiting the executor's + * fast path for empty left-hand input. "Smaller" means the one with the + * fewer groups. */ - if (op->op == SETOP_EXCEPT || dLeftGroups <= dRightGroups) - { - pathlist = list_make2(lpath, rpath); - tlist_list = list_make2(lpath_tlist, rpath_tlist); - firstFlag = 0; - } - else + if (op->op != SETOP_EXCEPT && dLeftGroups > dRightGroups) { - pathlist = list_make2(rpath, lpath); - tlist_list = list_make2(rpath_tlist, lpath_tlist); - firstFlag = 1; + /* need to swap the two inputs */ + RelOptInfo *tmprel; + List *tmplist; + double tmpd; + + tmprel = lrel; + lrel = rrel; + rrel = tmprel; + tmplist = lpath_tlist; + lpath_tlist = rpath_tlist; + rpath_tlist = tmplist; + tmpd = dLeftGroups; + dLeftGroups = dRightGroups; + dRightGroups = tmpd; } + lpath = lrel->cheapest_total_path; + rpath = rrel->cheapest_total_path; + /* - * Generate tlist for Append plan node. + * Generate tlist for SetOp plan node. * - * The tlist for an Append plan isn't important as far as the Append is + * The tlist for a SetOp plan isn't important so far as the SetOp is * concerned, but we must make it look real anyway for the benefit of the - * next plan level up. In fact, it has to be real enough that the flag - * column is shown as a variable not a constant, else setrefs.c will get - * confused. + * next plan level up. */ - tlist = generate_append_tlist(op->colTypes, op->colCollations, true, - tlist_list, refnames_tlist); + tlist = generate_setop_tlist(op->colTypes, op->colCollations, -1, + 0, false, lpath_tlist, refnames_tlist, + &result_trivial_tlist); + + /* We should not have needed any type coercions in the tlist */ + Assert(result_trivial_tlist); *pTargetList = tlist; @@ -1116,12 +1123,6 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, bms_union(lrel->relids, rrel->relids)); result_rel->reltarget = create_pathtarget(root, tlist); - /* - * Append the child results together. - */ - path = (Path *) create_append_path(root, result_rel, pathlist, NIL, - NIL, NULL, 0, false, -1); - /* Identify the grouping semantics */ groupList = generate_setop_grouplist(op, tlist); @@ -1140,25 +1141,40 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, } else { - dNumGroups = Min(dLeftGroups, dRightGroups); + dNumGroups = dLeftGroups; dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups; } /* - * Decide whether to hash or sort, and add a sort node if needed. + * Decide whether to hash or sort, and add sort nodes if needed. */ - use_hash = choose_hashed_setop(root, groupList, path, + use_hash = choose_hashed_setop(root, groupList, lpath, rpath, dNumGroups, dNumOutputRows, (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT"); if (groupList && !use_hash) - path = (Path *) create_sort_path(root, - result_rel, - path, - make_pathkeys_for_sortclauses(root, - groupList, - tlist), - -1.0); + { + List *pathkeys; + + pathkeys = make_pathkeys_for_sortclauses(root, + groupList, + lpath_tlist); + if (!pathkeys_contained_in(pathkeys, lpath->pathkeys)) + lpath = (Path *) create_sort_path(root, + lpath->parent, + lpath, + pathkeys, + -1.0); + pathkeys = make_pathkeys_for_sortclauses(root, + groupList, + rpath_tlist); + if (!pathkeys_contained_in(pathkeys, rpath->pathkeys)) + rpath = (Path *) create_sort_path(root, + rpath->parent, + rpath, + pathkeys, + -1.0); + } /* * Finally, add a SetOp path node to generate the correct output. @@ -1178,12 +1194,11 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, } path = (Path *) create_setop_path(root, result_rel, - path, + lpath, + rpath, cmd, use_hash ? SETOP_HASHED : SETOP_SORTED, groupList, - list_length(op->colTypes) + 1, - use_hash ? firstFlag : -1, dNumGroups, dNumOutputRows); @@ -1285,10 +1300,13 @@ postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel) /* * choose_hashed_setop - should we use hashing for a set operation? + * + * XXX probably this should go away: just make both paths and let + * add_path sort it out. */ static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses, - Path *input_path, + Path *lpath, Path *rpath, double dNumGroups, double dNumOutputRows, const char *construct) { @@ -1327,7 +1345,7 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses, * Don't do it if it doesn't look like the hashtable will fit into * hash_mem. */ - hashentrysize = MAXALIGN(input_path->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader); + hashentrysize = MAXALIGN(lpath->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader); if (hashentrysize * dNumGroups > hash_mem_limit) return false; @@ -1336,9 +1354,9 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses, * See if the estimated cost is no more than doing it the other way. * * We need to consider input_plan + hashagg versus input_plan + sort + - * group. Note that the actual result plan might involve a SetOp or - * Unique node, not Agg or Group, but the cost estimates for Agg and Group - * should be close enough for our purposes here. + * group. XXX NOT TRUE: Note that the actual result plan might involve a + * SetOp or Unique node, not Agg or Group, but the cost estimates for Agg + * and Group should be close enough for our purposes here. * * These path variables are dummies that just hold cost fields; we don't * make actual Paths for these steps. @@ -1346,27 +1364,31 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses, cost_agg(&hashed_p, root, AGG_HASHED, NULL, numGroupCols, dNumGroups, NIL, - input_path->disabled_nodes, - input_path->startup_cost, input_path->total_cost, - input_path->rows, input_path->pathtarget->width); + lpath->disabled_nodes + rpath->disabled_nodes, + lpath->startup_cost + rpath->startup_cost, + lpath->total_cost + rpath->total_cost, + lpath->rows + rpath->rows, + lpath->pathtarget->width); /* - * Now for the sorted case. Note that the input is *always* unsorted, - * since it was made by appending unrelated sub-relations together. + * Now for the sorted case. XXX NOT TRUE: Note that the input is *always* + * unsorted, since it was made by appending unrelated sub-relations + * together. */ - sorted_p.disabled_nodes = input_path->disabled_nodes; - sorted_p.startup_cost = input_path->startup_cost; - sorted_p.total_cost = input_path->total_cost; + sorted_p.disabled_nodes = lpath->disabled_nodes + rpath->disabled_nodes; + sorted_p.startup_cost = lpath->startup_cost + rpath->startup_cost; + sorted_p.total_cost = lpath->total_cost + rpath->total_cost; /* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */ cost_sort(&sorted_p, root, NIL, sorted_p.disabled_nodes, sorted_p.total_cost, - input_path->rows, input_path->pathtarget->width, + lpath->rows + rpath->rows, + lpath->pathtarget->width, 0.0, work_mem, -1.0); cost_group(&sorted_p, root, numGroupCols, dNumGroups, NIL, sorted_p.disabled_nodes, sorted_p.startup_cost, sorted_p.total_cost, - input_path->rows); + lpath->rows + rpath->rows); /* * Now make the decision using the top-level tuple fraction. First we |