aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/prep/prepunion.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/optimizer/prep/prepunion.c')
-rw-r--r--src/backend/optimizer/prep/prepunion.c156
1 files changed, 89 insertions, 67 deletions
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 9c3822f19ad..1c43370005e 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -65,7 +65,7 @@ static List *plan_union_children(PlannerInfo *root,
List **istrivial_tlist);
static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
- Path *input_path,
+ Path *lpath, Path *rpath,
double dNumGroups, double dNumOutputRows,
const char *construct);
static List *generate_setop_tlist(List *colTypes, List *colCollations,
@@ -315,8 +315,8 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
* to the corresponding tlist entries of the subplan. However, since
* the subplan was generated by generate_union_paths() or
* generate_nonunion_paths(), and hence its tlist was generated by
- * generate_append_tlist(), this will work. We just tell
- * generate_setop_tlist() to use varno 0.
+ * generate_append_tlist() or generate_setop_tlist(), this will work.
+ * We just tell generate_setop_tlist() to use varno 0.
*/
if (flag >= 0 ||
!tlist_same_datatypes(*pTargetList, colTypes, junkOK) ||
@@ -1028,29 +1028,27 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
*path;
List *lpath_tlist,
*rpath_tlist,
- *tlist_list,
*tlist,
- *groupList,
- *pathlist;
+ *groupList;
bool lpath_trivial_tlist,
- rpath_trivial_tlist;
+ rpath_trivial_tlist,
+ result_trivial_tlist;
double dLeftGroups,
dRightGroups,
dNumGroups,
dNumOutputRows;
bool use_hash;
SetOpCmd cmd;
- int firstFlag;
/*
* Tell children to fetch all tuples.
*/
root->tuple_fraction = 0.0;
- /* Recurse on children, ensuring their outputs are marked */
+ /* Recurse on children */
lrel = recurse_set_operations(op->larg, root,
op->colTypes, op->colCollations,
- false, 0,
+ false, -1,
refnames_tlist,
&lpath_tlist,
&lpath_trivial_tlist);
@@ -1060,10 +1058,9 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
else
dLeftGroups = lrel->rows;
- lpath = lrel->cheapest_total_path;
rrel = recurse_set_operations(op->rarg, root,
op->colTypes, op->colCollations,
- false, 1,
+ false, -1,
refnames_tlist,
&rpath_tlist,
&rpath_trivial_tlist);
@@ -1073,41 +1070,51 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
else
dRightGroups = rrel->rows;
- rpath = rrel->cheapest_total_path;
-
/* Undo effects of forcing tuple_fraction to 0 */
root->tuple_fraction = save_fraction;
/*
* For EXCEPT, we must put the left input first. For INTERSECT, either
* order should give the same results, and we prefer to put the smaller
- * input first in order to minimize the size of the hash table in the
- * hashing case. "Smaller" means the one with the fewer groups.
+ * input first in order to (a) minimize the size of the hash table in the
+ * hashing case, and (b) improve our chances of exploiting the executor's
+ * fast path for empty left-hand input. "Smaller" means the one with the
+ * fewer groups.
*/
- if (op->op == SETOP_EXCEPT || dLeftGroups <= dRightGroups)
- {
- pathlist = list_make2(lpath, rpath);
- tlist_list = list_make2(lpath_tlist, rpath_tlist);
- firstFlag = 0;
- }
- else
+ if (op->op != SETOP_EXCEPT && dLeftGroups > dRightGroups)
{
- pathlist = list_make2(rpath, lpath);
- tlist_list = list_make2(rpath_tlist, lpath_tlist);
- firstFlag = 1;
+ /* need to swap the two inputs */
+ RelOptInfo *tmprel;
+ List *tmplist;
+ double tmpd;
+
+ tmprel = lrel;
+ lrel = rrel;
+ rrel = tmprel;
+ tmplist = lpath_tlist;
+ lpath_tlist = rpath_tlist;
+ rpath_tlist = tmplist;
+ tmpd = dLeftGroups;
+ dLeftGroups = dRightGroups;
+ dRightGroups = tmpd;
}
+ lpath = lrel->cheapest_total_path;
+ rpath = rrel->cheapest_total_path;
+
/*
- * Generate tlist for Append plan node.
+ * Generate tlist for SetOp plan node.
*
- * The tlist for an Append plan isn't important as far as the Append is
+ * The tlist for a SetOp plan isn't important so far as the SetOp is
* concerned, but we must make it look real anyway for the benefit of the
- * next plan level up. In fact, it has to be real enough that the flag
- * column is shown as a variable not a constant, else setrefs.c will get
- * confused.
+ * next plan level up.
*/
- tlist = generate_append_tlist(op->colTypes, op->colCollations, true,
- tlist_list, refnames_tlist);
+ tlist = generate_setop_tlist(op->colTypes, op->colCollations, -1,
+ 0, false, lpath_tlist, refnames_tlist,
+ &result_trivial_tlist);
+
+ /* We should not have needed any type coercions in the tlist */
+ Assert(result_trivial_tlist);
*pTargetList = tlist;
@@ -1116,12 +1123,6 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
bms_union(lrel->relids, rrel->relids));
result_rel->reltarget = create_pathtarget(root, tlist);
- /*
- * Append the child results together.
- */
- path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
- NIL, NULL, 0, false, -1);
-
/* Identify the grouping semantics */
groupList = generate_setop_grouplist(op, tlist);
@@ -1140,25 +1141,40 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
}
else
{
- dNumGroups = Min(dLeftGroups, dRightGroups);
+ dNumGroups = dLeftGroups;
dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
}
/*
- * Decide whether to hash or sort, and add a sort node if needed.
+ * Decide whether to hash or sort, and add sort nodes if needed.
*/
- use_hash = choose_hashed_setop(root, groupList, path,
+ use_hash = choose_hashed_setop(root, groupList, lpath, rpath,
dNumGroups, dNumOutputRows,
(op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT");
if (groupList && !use_hash)
- path = (Path *) create_sort_path(root,
- result_rel,
- path,
- make_pathkeys_for_sortclauses(root,
- groupList,
- tlist),
- -1.0);
+ {
+ List *pathkeys;
+
+ pathkeys = make_pathkeys_for_sortclauses(root,
+ groupList,
+ lpath_tlist);
+ if (!pathkeys_contained_in(pathkeys, lpath->pathkeys))
+ lpath = (Path *) create_sort_path(root,
+ lpath->parent,
+ lpath,
+ pathkeys,
+ -1.0);
+ pathkeys = make_pathkeys_for_sortclauses(root,
+ groupList,
+ rpath_tlist);
+ if (!pathkeys_contained_in(pathkeys, rpath->pathkeys))
+ rpath = (Path *) create_sort_path(root,
+ rpath->parent,
+ rpath,
+ pathkeys,
+ -1.0);
+ }
/*
* Finally, add a SetOp path node to generate the correct output.
@@ -1178,12 +1194,11 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
}
path = (Path *) create_setop_path(root,
result_rel,
- path,
+ lpath,
+ rpath,
cmd,
use_hash ? SETOP_HASHED : SETOP_SORTED,
groupList,
- list_length(op->colTypes) + 1,
- use_hash ? firstFlag : -1,
dNumGroups,
dNumOutputRows);
@@ -1285,10 +1300,13 @@ postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
/*
* choose_hashed_setop - should we use hashing for a set operation?
+ *
+ * XXX probably this should go away: just make both paths and let
+ * add_path sort it out.
*/
static bool
choose_hashed_setop(PlannerInfo *root, List *groupClauses,
- Path *input_path,
+ Path *lpath, Path *rpath,
double dNumGroups, double dNumOutputRows,
const char *construct)
{
@@ -1327,7 +1345,7 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
* Don't do it if it doesn't look like the hashtable will fit into
* hash_mem.
*/
- hashentrysize = MAXALIGN(input_path->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
+ hashentrysize = MAXALIGN(lpath->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
if (hashentrysize * dNumGroups > hash_mem_limit)
return false;
@@ -1336,9 +1354,9 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
* See if the estimated cost is no more than doing it the other way.
*
* We need to consider input_plan + hashagg versus input_plan + sort +
- * group. Note that the actual result plan might involve a SetOp or
- * Unique node, not Agg or Group, but the cost estimates for Agg and Group
- * should be close enough for our purposes here.
+ * group. XXX NOT TRUE: Note that the actual result plan might involve a
+ * SetOp or Unique node, not Agg or Group, but the cost estimates for Agg
+ * and Group should be close enough for our purposes here.
*
* These path variables are dummies that just hold cost fields; we don't
* make actual Paths for these steps.
@@ -1346,27 +1364,31 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
cost_agg(&hashed_p, root, AGG_HASHED, NULL,
numGroupCols, dNumGroups,
NIL,
- input_path->disabled_nodes,
- input_path->startup_cost, input_path->total_cost,
- input_path->rows, input_path->pathtarget->width);
+ lpath->disabled_nodes + rpath->disabled_nodes,
+ lpath->startup_cost + rpath->startup_cost,
+ lpath->total_cost + rpath->total_cost,
+ lpath->rows + rpath->rows,
+ lpath->pathtarget->width);
/*
- * Now for the sorted case. Note that the input is *always* unsorted,
- * since it was made by appending unrelated sub-relations together.
+ * Now for the sorted case. XXX NOT TRUE: Note that the input is *always*
+ * unsorted, since it was made by appending unrelated sub-relations
+ * together.
*/
- sorted_p.disabled_nodes = input_path->disabled_nodes;
- sorted_p.startup_cost = input_path->startup_cost;
- sorted_p.total_cost = input_path->total_cost;
+ sorted_p.disabled_nodes = lpath->disabled_nodes + rpath->disabled_nodes;
+ sorted_p.startup_cost = lpath->startup_cost + rpath->startup_cost;
+ sorted_p.total_cost = lpath->total_cost + rpath->total_cost;
/* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */
cost_sort(&sorted_p, root, NIL, sorted_p.disabled_nodes,
sorted_p.total_cost,
- input_path->rows, input_path->pathtarget->width,
+ lpath->rows + rpath->rows,
+ lpath->pathtarget->width,
0.0, work_mem, -1.0);
cost_group(&sorted_p, root, numGroupCols, dNumGroups,
NIL,
sorted_p.disabled_nodes,
sorted_p.startup_cost, sorted_p.total_cost,
- input_path->rows);
+ lpath->rows + rpath->rows);
/*
* Now make the decision using the top-level tuple fraction. First we