diff options
Diffstat (limited to 'src/backend/optimizer/path/allpaths.c')
-rw-r--r-- | src/backend/optimizer/path/allpaths.c | 217 |
1 files changed, 215 insertions, 2 deletions
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index ccf46dd0aab..255f56b8276 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -556,7 +556,7 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, */ if (rel->reloptkind == RELOPT_BASEREL && bms_membership(root->all_baserels) != BMS_SINGLETON) - generate_gather_paths(root, rel, false); + generate_useful_gather_paths(root, rel, false); /* Now find the cheapest of the paths for this rel */ set_cheapest(rel); @@ -2728,6 +2728,219 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows) } /* + * get_useful_pathkeys_for_relation + * Determine which orderings of a relation might be useful. + * + * Getting data in sorted order can be useful either because the requested + * order matches the final output ordering for the overall query we're + * planning, or because it enables an efficient merge join. Here, we try + * to figure out which pathkeys to consider. + * + * This allows us to do incremental sort on top of an index scan under a gather + * merge node, i.e. parallelized. + * + * XXX At the moment this can only ever return a list with a single element, + * because it looks at query_pathkeys only. So we might return the pathkeys + * directly, but it seems plausible we'll want to consider other orderings + * in the future. For example, we might want to consider pathkeys useful for + * merge joins. + */ +static List * +get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel) +{ + List *useful_pathkeys_list = NIL; + + /* + * Considering query_pathkeys is always worth it, because it might allow us + * to avoid a total sort when we have a partially presorted path available. + */ + if (root->query_pathkeys) + { + ListCell *lc; + int npathkeys = 0; /* useful pathkeys */ + + foreach(lc, root->query_pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(lc); + EquivalenceClass *pathkey_ec = pathkey->pk_eclass; + + /* + * We can only build an Incremental Sort for pathkeys which contain + * an EC member in the current relation, so ignore any suffix of the + * list as soon as we find a pathkey without an EC member the + * relation. + * + * By still returning the prefix of the pathkeys list that does meet + * criteria of EC membership in the current relation, we enable not + * just an incremental sort on the entirety of query_pathkeys but + * also incremental sort below a JOIN. + */ + if (!find_em_expr_for_rel(pathkey_ec, rel)) + break; + + npathkeys++; + } + + /* + * The whole query_pathkeys list matches, so append it directly, to allow + * comparing pathkeys easily by comparing list pointer. If we have to truncate + * the pathkeys, we gotta do a copy though. + */ + if (npathkeys == list_length(root->query_pathkeys)) + useful_pathkeys_list = lappend(useful_pathkeys_list, + root->query_pathkeys); + else if (npathkeys > 0) + useful_pathkeys_list = lappend(useful_pathkeys_list, + list_truncate(list_copy(root->query_pathkeys), + npathkeys)); + } + + return useful_pathkeys_list; +} + +/* + * generate_useful_gather_paths + * Generate parallel access paths for a relation by pushing a Gather or + * Gather Merge on top of a partial path. + * + * Unlike plain generate_gather_paths, this looks both at pathkeys of input + * paths (aiming to preserve the ordering), but also considers ordering that + * might be useful for nodes above the gather merge node, and tries to add + * a sort (regular or incremental) to provide that. + */ +void +generate_useful_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows) +{ + ListCell *lc; + double rows; + double *rowsp = NULL; + List *useful_pathkeys_list = NIL; + Path *cheapest_partial_path = NULL; + + /* If there are no partial paths, there's nothing to do here. */ + if (rel->partial_pathlist == NIL) + return; + + /* Should we override the rel's rowcount estimate? */ + if (override_rows) + rowsp = &rows; + + /* generate the regular gather (merge) paths */ + generate_gather_paths(root, rel, override_rows); + + /* consider incremental sort for interesting orderings */ + useful_pathkeys_list = get_useful_pathkeys_for_relation(root, rel); + + /* used for explicit (full) sort paths */ + cheapest_partial_path = linitial(rel->partial_pathlist); + + /* + * Consider incremental sort paths for each interesting ordering. + */ + foreach(lc, useful_pathkeys_list) + { + List *useful_pathkeys = lfirst(lc); + ListCell *lc2; + bool is_sorted; + int presorted_keys; + + foreach(lc2, rel->partial_pathlist) + { + Path *subpath = (Path *) lfirst(lc2); + GatherMergePath *path; + + /* + * If the path has no ordering at all, then we can't use either + * incremental sort or rely on implict sorting with a gather merge. + */ + if (subpath->pathkeys == NIL) + continue; + + is_sorted = pathkeys_count_contained_in(useful_pathkeys, + subpath->pathkeys, + &presorted_keys); + + /* + * We don't need to consider the case where a subpath is already + * fully sorted because generate_gather_paths already creates a + * gather merge path for every subpath that has pathkeys present. + * + * But since the subpath is already sorted, we know we don't need + * to consider adding a sort (other either kind) on top of it, so + * we can continue here. + */ + if (is_sorted) + continue; + + /* + * Consider regular sort for the cheapest partial path (for each + * useful pathkeys). We know the path is not sorted, because we'd + * not get here otherwise. + * + * This is not redundant with the gather paths created in + * generate_gather_paths, because that doesn't generate ordered + * output. Here we add an explicit sort to match the useful + * ordering. + */ + if (cheapest_partial_path == subpath) + { + Path *tmp; + + tmp = (Path *) create_sort_path(root, + rel, + subpath, + useful_pathkeys, + -1.0); + + rows = tmp->rows * tmp->parallel_workers; + + path = create_gather_merge_path(root, rel, + tmp, + rel->reltarget, + tmp->pathkeys, + NULL, + rowsp); + + add_path(rel, &path->path); + + /* Fall through */ + } + + /* + * Consider incremental sort, but only when the subpath is already + * partially sorted on a pathkey prefix. + */ + if (enable_incrementalsort && presorted_keys > 0) + { + Path *tmp; + + /* + * We should have already excluded pathkeys of length 1 because + * then presorted_keys > 0 would imply is_sorted was true. + */ + Assert(list_length(useful_pathkeys) != 1); + + tmp = (Path *) create_incremental_sort_path(root, + rel, + subpath, + useful_pathkeys, + presorted_keys, + -1); + + path = create_gather_merge_path(root, rel, + tmp, + rel->reltarget, + tmp->pathkeys, + NULL, + rowsp); + + add_path(rel, &path->path); + } + } + } +} + +/* * make_rel_from_joinlist * Build access paths using a "joinlist" to guide the join path search. * @@ -2899,7 +3112,7 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels) * once we know the final targetlist (see grouping_planner). */ if (lev < levels_needed) - generate_gather_paths(root, rel, false); + generate_useful_gather_paths(root, rel, false); /* Find and save the cheapest paths for this rel */ set_cheapest(rel); |