diff options
author | Alexander Korotkov <akorotkov@postgresql.org> | 2024-11-24 01:41:45 +0200 |
---|---|---|
committer | Alexander Korotkov <akorotkov@postgresql.org> | 2024-11-24 01:41:45 +0200 |
commit | ae4569161a27823793ca24825bbabce2a91a0bc9 (patch) | |
tree | 59d2131807aed4b471ad48e0d7a0e55bfa213ff9 /src/backend/optimizer/path/indxpath.c | |
parent | d4378c0005e61b1bb78e88097ea6efcdddbe2d6e (diff) | |
download | postgresql-ae4569161a27823793ca24825bbabce2a91a0bc9.tar.gz postgresql-ae4569161a27823793ca24825bbabce2a91a0bc9.zip |
Teach bitmap path generation about transforming OR-clauses to SAOP's
When optimizer generates bitmap paths, it considers breaking OR-clause
arguments one-by-one. But now, a group of similar OR-clauses can be
transformed into SAOP during index matching. So, bitmap paths should
keep up.
This commit teaches bitmap paths generation machinery to group similar
OR-clauses into dedicated RestrictInfos. Those RestrictInfos are considered
both to match index as a whole (as SAOP), or to match as a set of individual
OR-clause argument one-by-one (the old way).
Therefore, bitmap path generation will takes advantage of OR-clauses to SAOP's
transformation. The old way of handling them is also considered. So, there
shouldn't be planning regression.
Discussion: https://postgr.es/m/CAPpHfdu5iQOjF93vGbjidsQkhHvY2NSm29duENYH_cbhC6x%2BMg%40mail.gmail.com
Author: Alexander Korotkov, Andrey Lepikhov
Reviewed-by: Alena Rybakina, Andrei Lepikhov, Jian he, Robert Haas
Reviewed-by: Peter Geoghegan
Diffstat (limited to 'src/backend/optimizer/path/indxpath.c')
-rw-r--r-- | src/backend/optimizer/path/indxpath.c | 430 |
1 files changed, 428 insertions, 2 deletions
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 61b18482410..5d8d0c389c9 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -1174,6 +1174,383 @@ build_paths_for_OR(PlannerInfo *root, RelOptInfo *rel, } /* + * Utility structure used to group similar OR-clause arguments in + * group_similar_or_args(). It represents information about the OR-clause + * argument and its matching index key. + */ +typedef struct +{ + int indexnum; /* index of the matching index, or -1 if no + * matching index */ + int colnum; /* index of the matching column, or -1 if no + * matching index */ + Oid opno; /* OID of the OpClause operator, or InvalidOid + * if not an OpExpr */ + Oid inputcollid; /* OID of the OpClause input collation */ + int argindex; /* index of the clause in the list of + * arguments */ +} OrArgIndexMatch; + +/* + * Comparison function for OrArgIndexMatch which provides sort order placing + * similar OR-clause arguments together. + */ +static int +or_arg_index_match_cmp(const void *a, const void *b) +{ + const OrArgIndexMatch *match_a = (const OrArgIndexMatch *) a; + const OrArgIndexMatch *match_b = (const OrArgIndexMatch *) b; + + if (match_a->indexnum < match_b->indexnum) + return -1; + else if (match_a->indexnum > match_b->indexnum) + return 1; + + if (match_a->colnum < match_b->colnum) + return -1; + else if (match_a->colnum > match_b->colnum) + return 1; + + if (match_a->opno < match_b->opno) + return -1; + else if (match_a->opno > match_b->opno) + return 1; + + if (match_a->inputcollid < match_b->inputcollid) + return -1; + else if (match_a->inputcollid > match_b->inputcollid) + return 1; + + if (match_a->argindex < match_b->argindex) + return -1; + else if (match_a->argindex > match_b->argindex) + return 1; + + return 0; +} + +/* + * group_similar_or_args + * Transform incoming OR-restrictinfo into a list of sub-restrictinfos, + * each of them containing a subset of similar OR-clause arguments from + * the source rinfo. + * + * Similar OR-clause arguments are of the form "indexkey op constant" having + * the same indexkey, operator, and collation. Constant may comprise either + * Const or Param. It may be employed later, during the + * match_clause_to_indexcol() to transform the whole OR-sub-rinfo to an SAOP + * clause. + * + * Returns the processed list of OR-clause arguments. + */ +static List * +group_similar_or_args(PlannerInfo *root, RelOptInfo *rel, RestrictInfo *rinfo) +{ + int n; + int i; + int group_start; + OrArgIndexMatch *matches; + bool matched = false; + ListCell *lc; + ListCell *lc2; + List *orargs; + List *result = NIL; + + Assert(IsA(rinfo->orclause, BoolExpr)); + orargs = ((BoolExpr *) rinfo->orclause)->args; + n = list_length(orargs); + + /* + * To avoid N^2 behavior, take utility pass along the list of OR-clause + * arguments. For each argument, fill the OrArgIndexMatch structure, + * which will be used to sort these arguments at the next step. + */ + i = -1; + matches = (OrArgIndexMatch *) palloc(sizeof(OrArgIndexMatch) * n); + foreach(lc, orargs) + { + Node *arg = lfirst(lc); + RestrictInfo *argrinfo; + OpExpr *clause; + Oid opno; + Node *leftop, + *rightop; + Node *nonConstExpr; + int indexnum; + int colnum; + + i++; + matches[i].argindex = i; + matches[i].indexnum = -1; + matches[i].colnum = -1; + matches[i].opno = InvalidOid; + matches[i].inputcollid = InvalidOid; + + if (!IsA(arg, RestrictInfo)) + continue; + + argrinfo = castNode(RestrictInfo, arg); + + /* Only operator clauses can match */ + if (!IsA(argrinfo->clause, OpExpr)) + continue; + + clause = (OpExpr *) argrinfo->clause; + opno = clause->opno; + + /* Only binary operators can match */ + if (list_length(clause->args) != 2) + continue; + + /* + * Ignore any RelabelType node above the operands. This is needed to + * be able to apply indexscanning in binary-compatible-operator cases. + * Note: we can assume there is at most one RelabelType node; + * eval_const_expressions() will have simplified if more than one. + */ + leftop = get_leftop(clause); + if (IsA(leftop, RelabelType)) + leftop = (Node *) ((RelabelType *) leftop)->arg; + + rightop = get_rightop(clause); + if (IsA(rightop, RelabelType)) + rightop = (Node *) ((RelabelType *) rightop)->arg; + + /* + * Check for clauses of the form: (indexkey operator constant) or + * (constant operator indexkey). But we don't know a particular index + * yet. First check for a constant, which must be Const or Param. + * That's cheaper than search for an index key among all indexes. + */ + if (IsA(leftop, Const) || IsA(leftop, Param)) + { + opno = get_commutator(opno); + + if (!OidIsValid(opno)) + { + /* commutator doesn't exist, we can't reverse the order */ + continue; + } + nonConstExpr = rightop; + } + else if (IsA(rightop, Const) || IsA(rightop, Param)) + { + nonConstExpr = leftop; + } + else + { + continue; + } + + /* + * Match non-constant part to the index key. It's possible that a + * single non-constant part matches multiple index keys. It's OK, we + * just stop with first matching index key. Given that this choice is + * determined the same for every clause, we will group similar clauses + * together anyway. + */ + indexnum = 0; + foreach(lc2, rel->indexlist) + { + IndexOptInfo *index = (IndexOptInfo *) lfirst(lc2); + + /* Ignore index if it doesn't support bitmap scans */ + if (!index->amhasgetbitmap) + continue; + + for (colnum = 0; colnum < index->nkeycolumns; colnum++) + { + if (match_index_to_operand(nonConstExpr, colnum, index)) + { + matches[i].indexnum = indexnum; + matches[i].colnum = colnum; + matches[i].opno = opno; + matches[i].inputcollid = clause->inputcollid; + matched = true; + break; + } + } + + /* + * Stop looping through the indexes, if we managed to match + * nonConstExpr to any index column. + */ + if (matches[i].indexnum >= 0) + break; + indexnum++; + } + } + + /* + * Fast-path check: if no clause is matching to the index column, we can + * just give up at this stage and return the clause list as-is. + */ + if (!matched) + { + pfree(matches); + return orargs; + } + + /* Sort clauses to make similar clauses go together */ + qsort(matches, n, sizeof(OrArgIndexMatch), or_arg_index_match_cmp); + + /* + * Group similar clauses into single sub-restrictinfo. Side effect: the + * resulting list of restrictions will be sorted by indexnum and colnum. + */ + group_start = 0; + for (i = 1; i <= n; i++) + { + /* Check if it's a group boundary */ + if (group_start >= 0 && + (i == n || + matches[i].indexnum != matches[group_start].indexnum || + matches[i].colnum != matches[group_start].colnum || + matches[i].opno != matches[group_start].opno || + matches[i].inputcollid != matches[group_start].inputcollid || + matches[i].indexnum == -1)) + { + /* + * One clause in group: add it "as is" to the upper-level OR. + */ + if (i - group_start == 1) + { + result = lappend(result, + list_nth(orargs, + matches[group_start].argindex)); + } + else + { + /* + * Two or more clauses in a group: create a nested OR. + */ + List *args = NIL; + List *rargs = NIL; + RestrictInfo *subrinfo; + int j; + + Assert(i - group_start >= 2); + + /* Construct the list of nested OR arguments */ + for (j = group_start; j < i; j++) + { + Node *arg = list_nth(orargs, matches[j].argindex); + + rargs = lappend(rargs, arg); + if (IsA(arg, RestrictInfo)) + args = lappend(args, ((RestrictInfo *) arg)->clause); + else + args = lappend(args, arg); + } + + /* Construct the nested OR and wrap it with RestrictInfo */ + subrinfo = make_plain_restrictinfo(root, + make_orclause(args), + make_orclause(rargs), + rinfo->is_pushed_down, + rinfo->has_clone, + rinfo->is_clone, + rinfo->pseudoconstant, + rinfo->security_level, + rinfo->required_relids, + rinfo->incompatible_relids, + rinfo->outer_relids); + result = lappend(result, subrinfo); + } + + group_start = i; + } + } + pfree(matches); + return result; +} + +/* + * make_bitmap_paths_for_or_group + * Generate bitmap paths for a group of similar OR-clause arguments + * produced by group_similar_or_args(). + * + * This function considers two cases: (1) matching a group of clauses to + * the index as a whole, and (2) matching the individual clauses one-by-one. + * (1) typically comprises an optimal solution. If not, (2) typically + * comprises fair alternative. + * + * Ideally, we could consider all arbitrary splits of arguments into + * subgroups, but that could lead to unacceptable computational complexity. + * This is why we only consider two cases of above. + */ +static List * +make_bitmap_paths_for_or_group(PlannerInfo *root, RelOptInfo *rel, + RestrictInfo *ri, List *other_clauses) +{ + List *jointlist = NIL; + List *splitlist = NIL; + ListCell *lc; + List *orargs; + List *args = ((BoolExpr *) ri->orclause)->args; + Cost jointcost = 0.0, + splitcost = 0.0; + Path *bitmapqual; + List *indlist; + + /* + * First, try to match the whole group to the one index. + */ + orargs = list_make1(ri); + indlist = build_paths_for_OR(root, rel, + orargs, + other_clauses); + if (indlist != NIL) + { + bitmapqual = choose_bitmap_and(root, rel, indlist); + jointcost = bitmapqual->total_cost; + jointlist = list_make1(bitmapqual); + } + + /* + * If we manage to find a bitmap scan, which uses the group of OR-clause + * arguments as a whole, we can skip matching OR-clause arguments + * one-by-one as long as there are no other clauses, which can bring more + * efficiency to one-by-one case. + */ + if (jointlist != NIL && other_clauses == NIL) + return jointlist; + + /* + * Also try to match all containing clauses one-by-one. + */ + foreach(lc, args) + { + orargs = list_make1(lfirst(lc)); + + indlist = build_paths_for_OR(root, rel, + orargs, + other_clauses); + + if (indlist == NIL) + { + splitlist = NIL; + break; + } + + bitmapqual = choose_bitmap_and(root, rel, indlist); + splitcost += bitmapqual->total_cost; + splitlist = lappend(splitlist, bitmapqual); + } + + /* + * Pick the best option. + */ + if (splitlist == NIL) + return jointlist; + else if (jointlist == NIL) + return splitlist; + else + return (jointcost < splitcost) ? jointlist : splitlist; +} + + +/* * generate_bitmap_or_paths * Look through the list of clauses to find OR clauses, and generate * a BitmapOrPath for each one we can handle that way. Return a list @@ -1203,6 +1580,8 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel, List *pathlist; Path *bitmapqual; ListCell *j; + List *groupedArgs; + List *inner_other_clauses = NIL; /* Ignore RestrictInfos that aren't ORs */ if (!restriction_is_or_clause(rinfo)) @@ -1213,7 +1592,29 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel, * the OR, else we can't use it. */ pathlist = NIL; - foreach(j, ((BoolExpr *) rinfo->orclause)->args) + + /* + * Group the similar OR-clause arguments into dedicated RestrictInfos, + * because each of those RestrictInfos has a chance to match the index + * as a whole. + */ + groupedArgs = group_similar_or_args(root, rel, rinfo); + + if (groupedArgs != ((BoolExpr *) rinfo->orclause)->args) + { + /* + * Some parts of the rinfo were probably grouped. In this case, + * we have a set of sub-rinfos that together are an exact + * duplicate of rinfo. Thus, we need to remove the rinfo from + * other clauses. match_clauses_to_index detects duplicated + * iclauses by comparing pointers to original rinfos that would be + * different. So, we must delete rinfo to avoid de-facto + * duplicated clauses in the index clauses list. + */ + inner_other_clauses = list_delete(list_copy(all_clauses), rinfo); + } + + foreach(j, groupedArgs) { Node *orarg = (Node *) lfirst(j); List *indlist; @@ -1233,12 +1634,34 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel, andargs, all_clauses)); } + else if (restriction_is_or_clause(castNode(RestrictInfo, orarg))) + { + RestrictInfo *ri = castNode(RestrictInfo, orarg); + + /* + * Generate bitmap paths for the group of similar OR-clause + * arguments. + */ + indlist = make_bitmap_paths_for_or_group(root, + rel, ri, + inner_other_clauses); + + if (indlist == NIL) + { + pathlist = NIL; + break; + } + else + { + pathlist = list_concat(pathlist, indlist); + continue; + } + } else { RestrictInfo *ri = castNode(RestrictInfo, orarg); List *orargs; - Assert(!restriction_is_or_clause(ri)); orargs = list_make1(ri); indlist = build_paths_for_OR(root, rel, @@ -1264,6 +1687,9 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel, pathlist = lappend(pathlist, bitmapqual); } + if (inner_other_clauses != NIL) + list_free(inner_other_clauses); + /* * If we have a match for every arm, then turn them into a * BitmapOrPath, and add to result list. |