aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/optimizer')
-rw-r--r--src/backend/optimizer/README2
-rw-r--r--src/backend/optimizer/plan/createplan.c81
-rw-r--r--src/backend/optimizer/prep/prepunion.c156
-rw-r--r--src/backend/optimizer/util/pathnode.c50
4 files changed, 162 insertions, 127 deletions
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README
index 2ab4f3dbf37..f341d9f303c 100644
--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
@@ -649,7 +649,7 @@ RelOptInfo - a relation or joined relations
GroupingSetsPath - an Agg plan node used to implement GROUPING SETS
MinMaxAggPath - a Result plan node with subplans performing MIN/MAX
WindowAggPath - a WindowAgg plan node applied to some sub-path
- SetOpPath - a SetOp plan node applied to some sub-path
+ SetOpPath - a SetOp plan node applied to two sub-paths
RecursiveUnionPath - a RecursiveUnion plan node applied to two sub-paths
LockRowsPath - a LockRows plan node applied to some sub-path
ModifyTablePath - a ModifyTable plan node applied to some sub-path(s)
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 178c572b021..b3e2294e84f 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -301,9 +301,9 @@ static Unique *make_unique_from_pathkeys(Plan *lefttree,
List *pathkeys, int numCols);
static Gather *make_gather(List *qptlist, List *qpqual,
int nworkers, int rescan_param, bool single_copy, Plan *subplan);
-static SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
- List *distinctList, AttrNumber flagColIdx, int firstFlag,
- long numGroups);
+static SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy,
+ List *tlist, Plan *lefttree, Plan *righttree,
+ List *groupList, long numGroups);
static LockRows *make_lockrows(Plan *lefttree, List *rowMarks, int epqParam);
static Result *make_result(List *tlist, Node *resconstantqual, Plan *subplan);
static ProjectSet *make_project_set(List *tlist, Plan *subplan);
@@ -2719,25 +2719,29 @@ static SetOp *
create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags)
{
SetOp *plan;
- Plan *subplan;
+ List *tlist = build_path_tlist(root, &best_path->path);
+ Plan *leftplan;
+ Plan *rightplan;
long numGroups;
/*
* SetOp doesn't project, so tlist requirements pass through; moreover we
* need grouping columns to be labeled.
*/
- subplan = create_plan_recurse(root, best_path->subpath,
- flags | CP_LABEL_TLIST);
+ leftplan = create_plan_recurse(root, best_path->leftpath,
+ flags | CP_LABEL_TLIST);
+ rightplan = create_plan_recurse(root, best_path->rightpath,
+ flags | CP_LABEL_TLIST);
/* Convert numGroups to long int --- but 'ware overflow! */
numGroups = clamp_cardinality_to_long(best_path->numGroups);
plan = make_setop(best_path->cmd,
best_path->strategy,
- subplan,
- best_path->distinctList,
- best_path->flagColIdx,
- best_path->firstFlag,
+ tlist,
+ leftplan,
+ rightplan,
+ best_path->groupList,
numGroups);
copy_generic_path_info(&plan->plan, (Path *) best_path);
@@ -6950,57 +6954,62 @@ make_gather(List *qptlist,
}
/*
- * distinctList is a list of SortGroupClauses, identifying the targetlist
- * items that should be considered by the SetOp filter. The input path must
- * already be sorted accordingly.
+ * groupList is a list of SortGroupClauses, identifying the targetlist
+ * items that should be considered by the SetOp filter. The input plans must
+ * already be sorted accordingly, if we're doing SETOP_SORTED mode.
*/
static SetOp *
-make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
- List *distinctList, AttrNumber flagColIdx, int firstFlag,
- long numGroups)
+make_setop(SetOpCmd cmd, SetOpStrategy strategy,
+ List *tlist, Plan *lefttree, Plan *righttree,
+ List *groupList, long numGroups)
{
SetOp *node = makeNode(SetOp);
Plan *plan = &node->plan;
- int numCols = list_length(distinctList);
+ int numCols = list_length(groupList);
int keyno = 0;
- AttrNumber *dupColIdx;
- Oid *dupOperators;
- Oid *dupCollations;
+ AttrNumber *cmpColIdx;
+ Oid *cmpOperators;
+ Oid *cmpCollations;
+ bool *cmpNullsFirst;
ListCell *slitem;
- plan->targetlist = lefttree->targetlist;
+ plan->targetlist = tlist;
plan->qual = NIL;
plan->lefttree = lefttree;
- plan->righttree = NULL;
+ plan->righttree = righttree;
/*
- * convert SortGroupClause list into arrays of attr indexes and equality
+ * convert SortGroupClause list into arrays of attr indexes and comparison
* operators, as wanted by executor
*/
- dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
- dupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
- dupCollations = (Oid *) palloc(sizeof(Oid) * numCols);
+ cmpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
+ cmpOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+ cmpCollations = (Oid *) palloc(sizeof(Oid) * numCols);
+ cmpNullsFirst = (bool *) palloc(sizeof(bool) * numCols);
- foreach(slitem, distinctList)
+ foreach(slitem, groupList)
{
SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem);
TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist);
- dupColIdx[keyno] = tle->resno;
- dupOperators[keyno] = sortcl->eqop;
- dupCollations[keyno] = exprCollation((Node *) tle->expr);
- Assert(OidIsValid(dupOperators[keyno]));
+ cmpColIdx[keyno] = tle->resno;
+ if (strategy == SETOP_HASHED)
+ cmpOperators[keyno] = sortcl->eqop;
+ else
+ cmpOperators[keyno] = sortcl->sortop;
+ Assert(OidIsValid(cmpOperators[keyno]));
+ cmpCollations[keyno] = exprCollation((Node *) tle->expr);
+ cmpNullsFirst[keyno] = sortcl->nulls_first;
keyno++;
}
node->cmd = cmd;
node->strategy = strategy;
node->numCols = numCols;
- node->dupColIdx = dupColIdx;
- node->dupOperators = dupOperators;
- node->dupCollations = dupCollations;
- node->flagColIdx = flagColIdx;
- node->firstFlag = firstFlag;
+ node->cmpColIdx = cmpColIdx;
+ node->cmpOperators = cmpOperators;
+ node->cmpCollations = cmpCollations;
+ node->cmpNullsFirst = cmpNullsFirst;
node->numGroups = numGroups;
return node;
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 9c3822f19ad..1c43370005e 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -65,7 +65,7 @@ static List *plan_union_children(PlannerInfo *root,
List **istrivial_tlist);
static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
- Path *input_path,
+ Path *lpath, Path *rpath,
double dNumGroups, double dNumOutputRows,
const char *construct);
static List *generate_setop_tlist(List *colTypes, List *colCollations,
@@ -315,8 +315,8 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
* to the corresponding tlist entries of the subplan. However, since
* the subplan was generated by generate_union_paths() or
* generate_nonunion_paths(), and hence its tlist was generated by
- * generate_append_tlist(), this will work. We just tell
- * generate_setop_tlist() to use varno 0.
+ * generate_append_tlist() or generate_setop_tlist(), this will work.
+ * We just tell generate_setop_tlist() to use varno 0.
*/
if (flag >= 0 ||
!tlist_same_datatypes(*pTargetList, colTypes, junkOK) ||
@@ -1028,29 +1028,27 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
*path;
List *lpath_tlist,
*rpath_tlist,
- *tlist_list,
*tlist,
- *groupList,
- *pathlist;
+ *groupList;
bool lpath_trivial_tlist,
- rpath_trivial_tlist;
+ rpath_trivial_tlist,
+ result_trivial_tlist;
double dLeftGroups,
dRightGroups,
dNumGroups,
dNumOutputRows;
bool use_hash;
SetOpCmd cmd;
- int firstFlag;
/*
* Tell children to fetch all tuples.
*/
root->tuple_fraction = 0.0;
- /* Recurse on children, ensuring their outputs are marked */
+ /* Recurse on children */
lrel = recurse_set_operations(op->larg, root,
op->colTypes, op->colCollations,
- false, 0,
+ false, -1,
refnames_tlist,
&lpath_tlist,
&lpath_trivial_tlist);
@@ -1060,10 +1058,9 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
else
dLeftGroups = lrel->rows;
- lpath = lrel->cheapest_total_path;
rrel = recurse_set_operations(op->rarg, root,
op->colTypes, op->colCollations,
- false, 1,
+ false, -1,
refnames_tlist,
&rpath_tlist,
&rpath_trivial_tlist);
@@ -1073,41 +1070,51 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
else
dRightGroups = rrel->rows;
- rpath = rrel->cheapest_total_path;
-
/* Undo effects of forcing tuple_fraction to 0 */
root->tuple_fraction = save_fraction;
/*
* For EXCEPT, we must put the left input first. For INTERSECT, either
* order should give the same results, and we prefer to put the smaller
- * input first in order to minimize the size of the hash table in the
- * hashing case. "Smaller" means the one with the fewer groups.
+ * input first in order to (a) minimize the size of the hash table in the
+ * hashing case, and (b) improve our chances of exploiting the executor's
+ * fast path for empty left-hand input. "Smaller" means the one with the
+ * fewer groups.
*/
- if (op->op == SETOP_EXCEPT || dLeftGroups <= dRightGroups)
- {
- pathlist = list_make2(lpath, rpath);
- tlist_list = list_make2(lpath_tlist, rpath_tlist);
- firstFlag = 0;
- }
- else
+ if (op->op != SETOP_EXCEPT && dLeftGroups > dRightGroups)
{
- pathlist = list_make2(rpath, lpath);
- tlist_list = list_make2(rpath_tlist, lpath_tlist);
- firstFlag = 1;
+ /* need to swap the two inputs */
+ RelOptInfo *tmprel;
+ List *tmplist;
+ double tmpd;
+
+ tmprel = lrel;
+ lrel = rrel;
+ rrel = tmprel;
+ tmplist = lpath_tlist;
+ lpath_tlist = rpath_tlist;
+ rpath_tlist = tmplist;
+ tmpd = dLeftGroups;
+ dLeftGroups = dRightGroups;
+ dRightGroups = tmpd;
}
+ lpath = lrel->cheapest_total_path;
+ rpath = rrel->cheapest_total_path;
+
/*
- * Generate tlist for Append plan node.
+ * Generate tlist for SetOp plan node.
*
- * The tlist for an Append plan isn't important as far as the Append is
+ * The tlist for a SetOp plan isn't important so far as the SetOp is
* concerned, but we must make it look real anyway for the benefit of the
- * next plan level up. In fact, it has to be real enough that the flag
- * column is shown as a variable not a constant, else setrefs.c will get
- * confused.
+ * next plan level up.
*/
- tlist = generate_append_tlist(op->colTypes, op->colCollations, true,
- tlist_list, refnames_tlist);
+ tlist = generate_setop_tlist(op->colTypes, op->colCollations, -1,
+ 0, false, lpath_tlist, refnames_tlist,
+ &result_trivial_tlist);
+
+ /* We should not have needed any type coercions in the tlist */
+ Assert(result_trivial_tlist);
*pTargetList = tlist;
@@ -1116,12 +1123,6 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
bms_union(lrel->relids, rrel->relids));
result_rel->reltarget = create_pathtarget(root, tlist);
- /*
- * Append the child results together.
- */
- path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
- NIL, NULL, 0, false, -1);
-
/* Identify the grouping semantics */
groupList = generate_setop_grouplist(op, tlist);
@@ -1140,25 +1141,40 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
}
else
{
- dNumGroups = Min(dLeftGroups, dRightGroups);
+ dNumGroups = dLeftGroups;
dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
}
/*
- * Decide whether to hash or sort, and add a sort node if needed.
+ * Decide whether to hash or sort, and add sort nodes if needed.
*/
- use_hash = choose_hashed_setop(root, groupList, path,
+ use_hash = choose_hashed_setop(root, groupList, lpath, rpath,
dNumGroups, dNumOutputRows,
(op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT");
if (groupList && !use_hash)
- path = (Path *) create_sort_path(root,
- result_rel,
- path,
- make_pathkeys_for_sortclauses(root,
- groupList,
- tlist),
- -1.0);
+ {
+ List *pathkeys;
+
+ pathkeys = make_pathkeys_for_sortclauses(root,
+ groupList,
+ lpath_tlist);
+ if (!pathkeys_contained_in(pathkeys, lpath->pathkeys))
+ lpath = (Path *) create_sort_path(root,
+ lpath->parent,
+ lpath,
+ pathkeys,
+ -1.0);
+ pathkeys = make_pathkeys_for_sortclauses(root,
+ groupList,
+ rpath_tlist);
+ if (!pathkeys_contained_in(pathkeys, rpath->pathkeys))
+ rpath = (Path *) create_sort_path(root,
+ rpath->parent,
+ rpath,
+ pathkeys,
+ -1.0);
+ }
/*
* Finally, add a SetOp path node to generate the correct output.
@@ -1178,12 +1194,11 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
}
path = (Path *) create_setop_path(root,
result_rel,
- path,
+ lpath,
+ rpath,
cmd,
use_hash ? SETOP_HASHED : SETOP_SORTED,
groupList,
- list_length(op->colTypes) + 1,
- use_hash ? firstFlag : -1,
dNumGroups,
dNumOutputRows);
@@ -1285,10 +1300,13 @@ postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
/*
* choose_hashed_setop - should we use hashing for a set operation?
+ *
+ * XXX probably this should go away: just make both paths and let
+ * add_path sort it out.
*/
static bool
choose_hashed_setop(PlannerInfo *root, List *groupClauses,
- Path *input_path,
+ Path *lpath, Path *rpath,
double dNumGroups, double dNumOutputRows,
const char *construct)
{
@@ -1327,7 +1345,7 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
* Don't do it if it doesn't look like the hashtable will fit into
* hash_mem.
*/
- hashentrysize = MAXALIGN(input_path->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
+ hashentrysize = MAXALIGN(lpath->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
if (hashentrysize * dNumGroups > hash_mem_limit)
return false;
@@ -1336,9 +1354,9 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
* See if the estimated cost is no more than doing it the other way.
*
* We need to consider input_plan + hashagg versus input_plan + sort +
- * group. Note that the actual result plan might involve a SetOp or
- * Unique node, not Agg or Group, but the cost estimates for Agg and Group
- * should be close enough for our purposes here.
+ * group. XXX NOT TRUE: Note that the actual result plan might involve a
+ * SetOp or Unique node, not Agg or Group, but the cost estimates for Agg
+ * and Group should be close enough for our purposes here.
*
* These path variables are dummies that just hold cost fields; we don't
* make actual Paths for these steps.
@@ -1346,27 +1364,31 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
cost_agg(&hashed_p, root, AGG_HASHED, NULL,
numGroupCols, dNumGroups,
NIL,
- input_path->disabled_nodes,
- input_path->startup_cost, input_path->total_cost,
- input_path->rows, input_path->pathtarget->width);
+ lpath->disabled_nodes + rpath->disabled_nodes,
+ lpath->startup_cost + rpath->startup_cost,
+ lpath->total_cost + rpath->total_cost,
+ lpath->rows + rpath->rows,
+ lpath->pathtarget->width);
/*
- * Now for the sorted case. Note that the input is *always* unsorted,
- * since it was made by appending unrelated sub-relations together.
+ * Now for the sorted case. XXX NOT TRUE: Note that the input is *always*
+ * unsorted, since it was made by appending unrelated sub-relations
+ * together.
*/
- sorted_p.disabled_nodes = input_path->disabled_nodes;
- sorted_p.startup_cost = input_path->startup_cost;
- sorted_p.total_cost = input_path->total_cost;
+ sorted_p.disabled_nodes = lpath->disabled_nodes + rpath->disabled_nodes;
+ sorted_p.startup_cost = lpath->startup_cost + rpath->startup_cost;
+ sorted_p.total_cost = lpath->total_cost + rpath->total_cost;
/* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */
cost_sort(&sorted_p, root, NIL, sorted_p.disabled_nodes,
sorted_p.total_cost,
- input_path->rows, input_path->pathtarget->width,
+ lpath->rows + rpath->rows,
+ lpath->pathtarget->width,
0.0, work_mem, -1.0);
cost_group(&sorted_p, root, numGroupCols, dNumGroups,
NIL,
sorted_p.disabled_nodes,
sorted_p.startup_cost, sorted_p.total_cost,
- input_path->rows);
+ lpath->rows + rpath->rows);
/*
* Now make the decision using the top-level tuple fraction. First we
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index fc97bf6ee26..e52e4b1d677 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -3634,25 +3634,26 @@ create_windowagg_path(PlannerInfo *root,
* Creates a pathnode that represents computation of INTERSECT or EXCEPT
*
* 'rel' is the parent relation associated with the result
- * 'subpath' is the path representing the source of data
+ * 'leftpath' is the path representing the left-hand source of data
+ * 'rightpath' is the path representing the right-hand source of data
* 'cmd' is the specific semantics (INTERSECT or EXCEPT, with/without ALL)
* 'strategy' is the implementation strategy (sorted or hashed)
- * 'distinctList' is a list of SortGroupClause's representing the grouping
- * 'flagColIdx' is the column number where the flag column will be, if any
- * 'firstFlag' is the flag value for the first input relation when hashing;
- * or -1 when sorting
- * 'numGroups' is the estimated number of distinct groups
+ * 'groupList' is a list of SortGroupClause's representing the grouping
+ * 'numGroups' is the estimated number of distinct groups in left-hand input
* 'outputRows' is the estimated number of output rows
+ *
+ * leftpath and rightpath must produce the same columns. Moreover, if
+ * strategy is SETOP_SORTED, leftpath and rightpath must both be sorted
+ * by all the grouping columns.
*/
SetOpPath *
create_setop_path(PlannerInfo *root,
RelOptInfo *rel,
- Path *subpath,
+ Path *leftpath,
+ Path *rightpath,
SetOpCmd cmd,
SetOpStrategy strategy,
- List *distinctList,
- AttrNumber flagColIdx,
- int firstFlag,
+ List *groupList,
double numGroups,
double outputRows)
{
@@ -3660,34 +3661,37 @@ create_setop_path(PlannerInfo *root,
pathnode->path.pathtype = T_SetOp;
pathnode->path.parent = rel;
- /* SetOp doesn't project, so use source path's pathtarget */
- pathnode->path.pathtarget = subpath->pathtarget;
+ pathnode->path.pathtarget = rel->reltarget;
/* For now, assume we are above any joins, so no parameterization */
pathnode->path.param_info = NULL;
pathnode->path.parallel_aware = false;
pathnode->path.parallel_safe = rel->consider_parallel &&
- subpath->parallel_safe;
- pathnode->path.parallel_workers = subpath->parallel_workers;
+ leftpath->parallel_safe && rightpath->parallel_safe;
+ pathnode->path.parallel_workers =
+ leftpath->parallel_workers + rightpath->parallel_workers;
/* SetOp preserves the input sort order if in sort mode */
pathnode->path.pathkeys =
- (strategy == SETOP_SORTED) ? subpath->pathkeys : NIL;
+ (strategy == SETOP_SORTED) ? leftpath->pathkeys : NIL;
- pathnode->subpath = subpath;
+ pathnode->leftpath = leftpath;
+ pathnode->rightpath = rightpath;
pathnode->cmd = cmd;
pathnode->strategy = strategy;
- pathnode->distinctList = distinctList;
- pathnode->flagColIdx = flagColIdx;
- pathnode->firstFlag = firstFlag;
+ pathnode->groupList = groupList;
pathnode->numGroups = numGroups;
/*
* Charge one cpu_operator_cost per comparison per input tuple. We assume
* all columns get compared at most of the tuples.
+ *
+ * XXX all wrong for hashing
*/
- pathnode->path.disabled_nodes = subpath->disabled_nodes;
- pathnode->path.startup_cost = subpath->startup_cost;
- pathnode->path.total_cost = subpath->total_cost +
- cpu_operator_cost * subpath->rows * list_length(distinctList);
+ pathnode->path.disabled_nodes =
+ leftpath->disabled_nodes + rightpath->disabled_nodes;
+ pathnode->path.startup_cost =
+ leftpath->startup_cost + rightpath->startup_cost;
+ pathnode->path.total_cost = leftpath->total_cost + rightpath->total_cost +
+ cpu_operator_cost * (leftpath->rows + rightpath->rows) * list_length(groupList);
pathnode->path.rows = outputRows;
return pathnode;