diff options
Diffstat (limited to 'src/backend/optimizer')
-rw-r--r-- | src/backend/optimizer/README | 2 | ||||
-rw-r--r-- | src/backend/optimizer/plan/createplan.c | 81 | ||||
-rw-r--r-- | src/backend/optimizer/prep/prepunion.c | 156 | ||||
-rw-r--r-- | src/backend/optimizer/util/pathnode.c | 50 |
4 files changed, 162 insertions, 127 deletions
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README index 2ab4f3dbf37..f341d9f303c 100644 --- a/src/backend/optimizer/README +++ b/src/backend/optimizer/README @@ -649,7 +649,7 @@ RelOptInfo - a relation or joined relations GroupingSetsPath - an Agg plan node used to implement GROUPING SETS MinMaxAggPath - a Result plan node with subplans performing MIN/MAX WindowAggPath - a WindowAgg plan node applied to some sub-path - SetOpPath - a SetOp plan node applied to some sub-path + SetOpPath - a SetOp plan node applied to two sub-paths RecursiveUnionPath - a RecursiveUnion plan node applied to two sub-paths LockRowsPath - a LockRows plan node applied to some sub-path ModifyTablePath - a ModifyTable plan node applied to some sub-path(s) diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 178c572b021..b3e2294e84f 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -301,9 +301,9 @@ static Unique *make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols); static Gather *make_gather(List *qptlist, List *qpqual, int nworkers, int rescan_param, bool single_copy, Plan *subplan); -static SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, - List *distinctList, AttrNumber flagColIdx, int firstFlag, - long numGroups); +static SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy, + List *tlist, Plan *lefttree, Plan *righttree, + List *groupList, long numGroups); static LockRows *make_lockrows(Plan *lefttree, List *rowMarks, int epqParam); static Result *make_result(List *tlist, Node *resconstantqual, Plan *subplan); static ProjectSet *make_project_set(List *tlist, Plan *subplan); @@ -2719,25 +2719,29 @@ static SetOp * create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags) { SetOp *plan; - Plan *subplan; + List *tlist = build_path_tlist(root, &best_path->path); + Plan *leftplan; + Plan *rightplan; long numGroups; /* * SetOp doesn't project, so tlist requirements pass through; moreover we * need grouping columns to be labeled. */ - subplan = create_plan_recurse(root, best_path->subpath, - flags | CP_LABEL_TLIST); + leftplan = create_plan_recurse(root, best_path->leftpath, + flags | CP_LABEL_TLIST); + rightplan = create_plan_recurse(root, best_path->rightpath, + flags | CP_LABEL_TLIST); /* Convert numGroups to long int --- but 'ware overflow! */ numGroups = clamp_cardinality_to_long(best_path->numGroups); plan = make_setop(best_path->cmd, best_path->strategy, - subplan, - best_path->distinctList, - best_path->flagColIdx, - best_path->firstFlag, + tlist, + leftplan, + rightplan, + best_path->groupList, numGroups); copy_generic_path_info(&plan->plan, (Path *) best_path); @@ -6950,57 +6954,62 @@ make_gather(List *qptlist, } /* - * distinctList is a list of SortGroupClauses, identifying the targetlist - * items that should be considered by the SetOp filter. The input path must - * already be sorted accordingly. + * groupList is a list of SortGroupClauses, identifying the targetlist + * items that should be considered by the SetOp filter. The input plans must + * already be sorted accordingly, if we're doing SETOP_SORTED mode. */ static SetOp * -make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, - List *distinctList, AttrNumber flagColIdx, int firstFlag, - long numGroups) +make_setop(SetOpCmd cmd, SetOpStrategy strategy, + List *tlist, Plan *lefttree, Plan *righttree, + List *groupList, long numGroups) { SetOp *node = makeNode(SetOp); Plan *plan = &node->plan; - int numCols = list_length(distinctList); + int numCols = list_length(groupList); int keyno = 0; - AttrNumber *dupColIdx; - Oid *dupOperators; - Oid *dupCollations; + AttrNumber *cmpColIdx; + Oid *cmpOperators; + Oid *cmpCollations; + bool *cmpNullsFirst; ListCell *slitem; - plan->targetlist = lefttree->targetlist; + plan->targetlist = tlist; plan->qual = NIL; plan->lefttree = lefttree; - plan->righttree = NULL; + plan->righttree = righttree; /* - * convert SortGroupClause list into arrays of attr indexes and equality + * convert SortGroupClause list into arrays of attr indexes and comparison * operators, as wanted by executor */ - dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); - dupOperators = (Oid *) palloc(sizeof(Oid) * numCols); - dupCollations = (Oid *) palloc(sizeof(Oid) * numCols); + cmpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); + cmpOperators = (Oid *) palloc(sizeof(Oid) * numCols); + cmpCollations = (Oid *) palloc(sizeof(Oid) * numCols); + cmpNullsFirst = (bool *) palloc(sizeof(bool) * numCols); - foreach(slitem, distinctList) + foreach(slitem, groupList) { SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem); TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist); - dupColIdx[keyno] = tle->resno; - dupOperators[keyno] = sortcl->eqop; - dupCollations[keyno] = exprCollation((Node *) tle->expr); - Assert(OidIsValid(dupOperators[keyno])); + cmpColIdx[keyno] = tle->resno; + if (strategy == SETOP_HASHED) + cmpOperators[keyno] = sortcl->eqop; + else + cmpOperators[keyno] = sortcl->sortop; + Assert(OidIsValid(cmpOperators[keyno])); + cmpCollations[keyno] = exprCollation((Node *) tle->expr); + cmpNullsFirst[keyno] = sortcl->nulls_first; keyno++; } node->cmd = cmd; node->strategy = strategy; node->numCols = numCols; - node->dupColIdx = dupColIdx; - node->dupOperators = dupOperators; - node->dupCollations = dupCollations; - node->flagColIdx = flagColIdx; - node->firstFlag = firstFlag; + node->cmpColIdx = cmpColIdx; + node->cmpOperators = cmpOperators; + node->cmpCollations = cmpCollations; + node->cmpNullsFirst = cmpNullsFirst; node->numGroups = numGroups; return node; diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 9c3822f19ad..1c43370005e 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -65,7 +65,7 @@ static List *plan_union_children(PlannerInfo *root, List **istrivial_tlist); static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel); static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses, - Path *input_path, + Path *lpath, Path *rpath, double dNumGroups, double dNumOutputRows, const char *construct); static List *generate_setop_tlist(List *colTypes, List *colCollations, @@ -315,8 +315,8 @@ recurse_set_operations(Node *setOp, PlannerInfo *root, * to the corresponding tlist entries of the subplan. However, since * the subplan was generated by generate_union_paths() or * generate_nonunion_paths(), and hence its tlist was generated by - * generate_append_tlist(), this will work. We just tell - * generate_setop_tlist() to use varno 0. + * generate_append_tlist() or generate_setop_tlist(), this will work. + * We just tell generate_setop_tlist() to use varno 0. */ if (flag >= 0 || !tlist_same_datatypes(*pTargetList, colTypes, junkOK) || @@ -1028,29 +1028,27 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, *path; List *lpath_tlist, *rpath_tlist, - *tlist_list, *tlist, - *groupList, - *pathlist; + *groupList; bool lpath_trivial_tlist, - rpath_trivial_tlist; + rpath_trivial_tlist, + result_trivial_tlist; double dLeftGroups, dRightGroups, dNumGroups, dNumOutputRows; bool use_hash; SetOpCmd cmd; - int firstFlag; /* * Tell children to fetch all tuples. */ root->tuple_fraction = 0.0; - /* Recurse on children, ensuring their outputs are marked */ + /* Recurse on children */ lrel = recurse_set_operations(op->larg, root, op->colTypes, op->colCollations, - false, 0, + false, -1, refnames_tlist, &lpath_tlist, &lpath_trivial_tlist); @@ -1060,10 +1058,9 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, else dLeftGroups = lrel->rows; - lpath = lrel->cheapest_total_path; rrel = recurse_set_operations(op->rarg, root, op->colTypes, op->colCollations, - false, 1, + false, -1, refnames_tlist, &rpath_tlist, &rpath_trivial_tlist); @@ -1073,41 +1070,51 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, else dRightGroups = rrel->rows; - rpath = rrel->cheapest_total_path; - /* Undo effects of forcing tuple_fraction to 0 */ root->tuple_fraction = save_fraction; /* * For EXCEPT, we must put the left input first. For INTERSECT, either * order should give the same results, and we prefer to put the smaller - * input first in order to minimize the size of the hash table in the - * hashing case. "Smaller" means the one with the fewer groups. + * input first in order to (a) minimize the size of the hash table in the + * hashing case, and (b) improve our chances of exploiting the executor's + * fast path for empty left-hand input. "Smaller" means the one with the + * fewer groups. */ - if (op->op == SETOP_EXCEPT || dLeftGroups <= dRightGroups) - { - pathlist = list_make2(lpath, rpath); - tlist_list = list_make2(lpath_tlist, rpath_tlist); - firstFlag = 0; - } - else + if (op->op != SETOP_EXCEPT && dLeftGroups > dRightGroups) { - pathlist = list_make2(rpath, lpath); - tlist_list = list_make2(rpath_tlist, lpath_tlist); - firstFlag = 1; + /* need to swap the two inputs */ + RelOptInfo *tmprel; + List *tmplist; + double tmpd; + + tmprel = lrel; + lrel = rrel; + rrel = tmprel; + tmplist = lpath_tlist; + lpath_tlist = rpath_tlist; + rpath_tlist = tmplist; + tmpd = dLeftGroups; + dLeftGroups = dRightGroups; + dRightGroups = tmpd; } + lpath = lrel->cheapest_total_path; + rpath = rrel->cheapest_total_path; + /* - * Generate tlist for Append plan node. + * Generate tlist for SetOp plan node. * - * The tlist for an Append plan isn't important as far as the Append is + * The tlist for a SetOp plan isn't important so far as the SetOp is * concerned, but we must make it look real anyway for the benefit of the - * next plan level up. In fact, it has to be real enough that the flag - * column is shown as a variable not a constant, else setrefs.c will get - * confused. + * next plan level up. */ - tlist = generate_append_tlist(op->colTypes, op->colCollations, true, - tlist_list, refnames_tlist); + tlist = generate_setop_tlist(op->colTypes, op->colCollations, -1, + 0, false, lpath_tlist, refnames_tlist, + &result_trivial_tlist); + + /* We should not have needed any type coercions in the tlist */ + Assert(result_trivial_tlist); *pTargetList = tlist; @@ -1116,12 +1123,6 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, bms_union(lrel->relids, rrel->relids)); result_rel->reltarget = create_pathtarget(root, tlist); - /* - * Append the child results together. - */ - path = (Path *) create_append_path(root, result_rel, pathlist, NIL, - NIL, NULL, 0, false, -1); - /* Identify the grouping semantics */ groupList = generate_setop_grouplist(op, tlist); @@ -1140,25 +1141,40 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, } else { - dNumGroups = Min(dLeftGroups, dRightGroups); + dNumGroups = dLeftGroups; dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups; } /* - * Decide whether to hash or sort, and add a sort node if needed. + * Decide whether to hash or sort, and add sort nodes if needed. */ - use_hash = choose_hashed_setop(root, groupList, path, + use_hash = choose_hashed_setop(root, groupList, lpath, rpath, dNumGroups, dNumOutputRows, (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT"); if (groupList && !use_hash) - path = (Path *) create_sort_path(root, - result_rel, - path, - make_pathkeys_for_sortclauses(root, - groupList, - tlist), - -1.0); + { + List *pathkeys; + + pathkeys = make_pathkeys_for_sortclauses(root, + groupList, + lpath_tlist); + if (!pathkeys_contained_in(pathkeys, lpath->pathkeys)) + lpath = (Path *) create_sort_path(root, + lpath->parent, + lpath, + pathkeys, + -1.0); + pathkeys = make_pathkeys_for_sortclauses(root, + groupList, + rpath_tlist); + if (!pathkeys_contained_in(pathkeys, rpath->pathkeys)) + rpath = (Path *) create_sort_path(root, + rpath->parent, + rpath, + pathkeys, + -1.0); + } /* * Finally, add a SetOp path node to generate the correct output. @@ -1178,12 +1194,11 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, } path = (Path *) create_setop_path(root, result_rel, - path, + lpath, + rpath, cmd, use_hash ? SETOP_HASHED : SETOP_SORTED, groupList, - list_length(op->colTypes) + 1, - use_hash ? firstFlag : -1, dNumGroups, dNumOutputRows); @@ -1285,10 +1300,13 @@ postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel) /* * choose_hashed_setop - should we use hashing for a set operation? + * + * XXX probably this should go away: just make both paths and let + * add_path sort it out. */ static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses, - Path *input_path, + Path *lpath, Path *rpath, double dNumGroups, double dNumOutputRows, const char *construct) { @@ -1327,7 +1345,7 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses, * Don't do it if it doesn't look like the hashtable will fit into * hash_mem. */ - hashentrysize = MAXALIGN(input_path->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader); + hashentrysize = MAXALIGN(lpath->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader); if (hashentrysize * dNumGroups > hash_mem_limit) return false; @@ -1336,9 +1354,9 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses, * See if the estimated cost is no more than doing it the other way. * * We need to consider input_plan + hashagg versus input_plan + sort + - * group. Note that the actual result plan might involve a SetOp or - * Unique node, not Agg or Group, but the cost estimates for Agg and Group - * should be close enough for our purposes here. + * group. XXX NOT TRUE: Note that the actual result plan might involve a + * SetOp or Unique node, not Agg or Group, but the cost estimates for Agg + * and Group should be close enough for our purposes here. * * These path variables are dummies that just hold cost fields; we don't * make actual Paths for these steps. @@ -1346,27 +1364,31 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses, cost_agg(&hashed_p, root, AGG_HASHED, NULL, numGroupCols, dNumGroups, NIL, - input_path->disabled_nodes, - input_path->startup_cost, input_path->total_cost, - input_path->rows, input_path->pathtarget->width); + lpath->disabled_nodes + rpath->disabled_nodes, + lpath->startup_cost + rpath->startup_cost, + lpath->total_cost + rpath->total_cost, + lpath->rows + rpath->rows, + lpath->pathtarget->width); /* - * Now for the sorted case. Note that the input is *always* unsorted, - * since it was made by appending unrelated sub-relations together. + * Now for the sorted case. XXX NOT TRUE: Note that the input is *always* + * unsorted, since it was made by appending unrelated sub-relations + * together. */ - sorted_p.disabled_nodes = input_path->disabled_nodes; - sorted_p.startup_cost = input_path->startup_cost; - sorted_p.total_cost = input_path->total_cost; + sorted_p.disabled_nodes = lpath->disabled_nodes + rpath->disabled_nodes; + sorted_p.startup_cost = lpath->startup_cost + rpath->startup_cost; + sorted_p.total_cost = lpath->total_cost + rpath->total_cost; /* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */ cost_sort(&sorted_p, root, NIL, sorted_p.disabled_nodes, sorted_p.total_cost, - input_path->rows, input_path->pathtarget->width, + lpath->rows + rpath->rows, + lpath->pathtarget->width, 0.0, work_mem, -1.0); cost_group(&sorted_p, root, numGroupCols, dNumGroups, NIL, sorted_p.disabled_nodes, sorted_p.startup_cost, sorted_p.total_cost, - input_path->rows); + lpath->rows + rpath->rows); /* * Now make the decision using the top-level tuple fraction. First we diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index fc97bf6ee26..e52e4b1d677 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -3634,25 +3634,26 @@ create_windowagg_path(PlannerInfo *root, * Creates a pathnode that represents computation of INTERSECT or EXCEPT * * 'rel' is the parent relation associated with the result - * 'subpath' is the path representing the source of data + * 'leftpath' is the path representing the left-hand source of data + * 'rightpath' is the path representing the right-hand source of data * 'cmd' is the specific semantics (INTERSECT or EXCEPT, with/without ALL) * 'strategy' is the implementation strategy (sorted or hashed) - * 'distinctList' is a list of SortGroupClause's representing the grouping - * 'flagColIdx' is the column number where the flag column will be, if any - * 'firstFlag' is the flag value for the first input relation when hashing; - * or -1 when sorting - * 'numGroups' is the estimated number of distinct groups + * 'groupList' is a list of SortGroupClause's representing the grouping + * 'numGroups' is the estimated number of distinct groups in left-hand input * 'outputRows' is the estimated number of output rows + * + * leftpath and rightpath must produce the same columns. Moreover, if + * strategy is SETOP_SORTED, leftpath and rightpath must both be sorted + * by all the grouping columns. */ SetOpPath * create_setop_path(PlannerInfo *root, RelOptInfo *rel, - Path *subpath, + Path *leftpath, + Path *rightpath, SetOpCmd cmd, SetOpStrategy strategy, - List *distinctList, - AttrNumber flagColIdx, - int firstFlag, + List *groupList, double numGroups, double outputRows) { @@ -3660,34 +3661,37 @@ create_setop_path(PlannerInfo *root, pathnode->path.pathtype = T_SetOp; pathnode->path.parent = rel; - /* SetOp doesn't project, so use source path's pathtarget */ - pathnode->path.pathtarget = subpath->pathtarget; + pathnode->path.pathtarget = rel->reltarget; /* For now, assume we are above any joins, so no parameterization */ pathnode->path.param_info = NULL; pathnode->path.parallel_aware = false; pathnode->path.parallel_safe = rel->consider_parallel && - subpath->parallel_safe; - pathnode->path.parallel_workers = subpath->parallel_workers; + leftpath->parallel_safe && rightpath->parallel_safe; + pathnode->path.parallel_workers = + leftpath->parallel_workers + rightpath->parallel_workers; /* SetOp preserves the input sort order if in sort mode */ pathnode->path.pathkeys = - (strategy == SETOP_SORTED) ? subpath->pathkeys : NIL; + (strategy == SETOP_SORTED) ? leftpath->pathkeys : NIL; - pathnode->subpath = subpath; + pathnode->leftpath = leftpath; + pathnode->rightpath = rightpath; pathnode->cmd = cmd; pathnode->strategy = strategy; - pathnode->distinctList = distinctList; - pathnode->flagColIdx = flagColIdx; - pathnode->firstFlag = firstFlag; + pathnode->groupList = groupList; pathnode->numGroups = numGroups; /* * Charge one cpu_operator_cost per comparison per input tuple. We assume * all columns get compared at most of the tuples. + * + * XXX all wrong for hashing */ - pathnode->path.disabled_nodes = subpath->disabled_nodes; - pathnode->path.startup_cost = subpath->startup_cost; - pathnode->path.total_cost = subpath->total_cost + - cpu_operator_cost * subpath->rows * list_length(distinctList); + pathnode->path.disabled_nodes = + leftpath->disabled_nodes + rightpath->disabled_nodes; + pathnode->path.startup_cost = + leftpath->startup_cost + rightpath->startup_cost; + pathnode->path.total_cost = leftpath->total_cost + rightpath->total_cost + + cpu_operator_cost * (leftpath->rows + rightpath->rows) * list_length(groupList); pathnode->path.rows = outputRows; return pathnode; |