aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/path/indxpath.c
diff options
context:
space:
mode:
authorAlexander Korotkov <akorotkov@postgresql.org>2024-11-24 01:41:45 +0200
committerAlexander Korotkov <akorotkov@postgresql.org>2024-11-24 01:41:45 +0200
commitae4569161a27823793ca24825bbabce2a91a0bc9 (patch)
tree59d2131807aed4b471ad48e0d7a0e55bfa213ff9 /src/backend/optimizer/path/indxpath.c
parentd4378c0005e61b1bb78e88097ea6efcdddbe2d6e (diff)
downloadpostgresql-ae4569161a27823793ca24825bbabce2a91a0bc9.tar.gz
postgresql-ae4569161a27823793ca24825bbabce2a91a0bc9.zip
Teach bitmap path generation about transforming OR-clauses to SAOP's
When optimizer generates bitmap paths, it considers breaking OR-clause arguments one-by-one. But now, a group of similar OR-clauses can be transformed into SAOP during index matching. So, bitmap paths should keep up. This commit teaches bitmap paths generation machinery to group similar OR-clauses into dedicated RestrictInfos. Those RestrictInfos are considered both to match index as a whole (as SAOP), or to match as a set of individual OR-clause argument one-by-one (the old way). Therefore, bitmap path generation will takes advantage of OR-clauses to SAOP's transformation. The old way of handling them is also considered. So, there shouldn't be planning regression. Discussion: https://postgr.es/m/CAPpHfdu5iQOjF93vGbjidsQkhHvY2NSm29duENYH_cbhC6x%2BMg%40mail.gmail.com Author: Alexander Korotkov, Andrey Lepikhov Reviewed-by: Alena Rybakina, Andrei Lepikhov, Jian he, Robert Haas Reviewed-by: Peter Geoghegan
Diffstat (limited to 'src/backend/optimizer/path/indxpath.c')
-rw-r--r--src/backend/optimizer/path/indxpath.c430
1 files changed, 428 insertions, 2 deletions
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 61b18482410..5d8d0c389c9 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -1174,6 +1174,383 @@ build_paths_for_OR(PlannerInfo *root, RelOptInfo *rel,
}
/*
+ * Utility structure used to group similar OR-clause arguments in
+ * group_similar_or_args(). It represents information about the OR-clause
+ * argument and its matching index key.
+ */
+typedef struct
+{
+ int indexnum; /* index of the matching index, or -1 if no
+ * matching index */
+ int colnum; /* index of the matching column, or -1 if no
+ * matching index */
+ Oid opno; /* OID of the OpClause operator, or InvalidOid
+ * if not an OpExpr */
+ Oid inputcollid; /* OID of the OpClause input collation */
+ int argindex; /* index of the clause in the list of
+ * arguments */
+} OrArgIndexMatch;
+
+/*
+ * Comparison function for OrArgIndexMatch which provides sort order placing
+ * similar OR-clause arguments together.
+ */
+static int
+or_arg_index_match_cmp(const void *a, const void *b)
+{
+ const OrArgIndexMatch *match_a = (const OrArgIndexMatch *) a;
+ const OrArgIndexMatch *match_b = (const OrArgIndexMatch *) b;
+
+ if (match_a->indexnum < match_b->indexnum)
+ return -1;
+ else if (match_a->indexnum > match_b->indexnum)
+ return 1;
+
+ if (match_a->colnum < match_b->colnum)
+ return -1;
+ else if (match_a->colnum > match_b->colnum)
+ return 1;
+
+ if (match_a->opno < match_b->opno)
+ return -1;
+ else if (match_a->opno > match_b->opno)
+ return 1;
+
+ if (match_a->inputcollid < match_b->inputcollid)
+ return -1;
+ else if (match_a->inputcollid > match_b->inputcollid)
+ return 1;
+
+ if (match_a->argindex < match_b->argindex)
+ return -1;
+ else if (match_a->argindex > match_b->argindex)
+ return 1;
+
+ return 0;
+}
+
+/*
+ * group_similar_or_args
+ * Transform incoming OR-restrictinfo into a list of sub-restrictinfos,
+ * each of them containing a subset of similar OR-clause arguments from
+ * the source rinfo.
+ *
+ * Similar OR-clause arguments are of the form "indexkey op constant" having
+ * the same indexkey, operator, and collation. Constant may comprise either
+ * Const or Param. It may be employed later, during the
+ * match_clause_to_indexcol() to transform the whole OR-sub-rinfo to an SAOP
+ * clause.
+ *
+ * Returns the processed list of OR-clause arguments.
+ */
+static List *
+group_similar_or_args(PlannerInfo *root, RelOptInfo *rel, RestrictInfo *rinfo)
+{
+ int n;
+ int i;
+ int group_start;
+ OrArgIndexMatch *matches;
+ bool matched = false;
+ ListCell *lc;
+ ListCell *lc2;
+ List *orargs;
+ List *result = NIL;
+
+ Assert(IsA(rinfo->orclause, BoolExpr));
+ orargs = ((BoolExpr *) rinfo->orclause)->args;
+ n = list_length(orargs);
+
+ /*
+ * To avoid N^2 behavior, take utility pass along the list of OR-clause
+ * arguments. For each argument, fill the OrArgIndexMatch structure,
+ * which will be used to sort these arguments at the next step.
+ */
+ i = -1;
+ matches = (OrArgIndexMatch *) palloc(sizeof(OrArgIndexMatch) * n);
+ foreach(lc, orargs)
+ {
+ Node *arg = lfirst(lc);
+ RestrictInfo *argrinfo;
+ OpExpr *clause;
+ Oid opno;
+ Node *leftop,
+ *rightop;
+ Node *nonConstExpr;
+ int indexnum;
+ int colnum;
+
+ i++;
+ matches[i].argindex = i;
+ matches[i].indexnum = -1;
+ matches[i].colnum = -1;
+ matches[i].opno = InvalidOid;
+ matches[i].inputcollid = InvalidOid;
+
+ if (!IsA(arg, RestrictInfo))
+ continue;
+
+ argrinfo = castNode(RestrictInfo, arg);
+
+ /* Only operator clauses can match */
+ if (!IsA(argrinfo->clause, OpExpr))
+ continue;
+
+ clause = (OpExpr *) argrinfo->clause;
+ opno = clause->opno;
+
+ /* Only binary operators can match */
+ if (list_length(clause->args) != 2)
+ continue;
+
+ /*
+ * Ignore any RelabelType node above the operands. This is needed to
+ * be able to apply indexscanning in binary-compatible-operator cases.
+ * Note: we can assume there is at most one RelabelType node;
+ * eval_const_expressions() will have simplified if more than one.
+ */
+ leftop = get_leftop(clause);
+ if (IsA(leftop, RelabelType))
+ leftop = (Node *) ((RelabelType *) leftop)->arg;
+
+ rightop = get_rightop(clause);
+ if (IsA(rightop, RelabelType))
+ rightop = (Node *) ((RelabelType *) rightop)->arg;
+
+ /*
+ * Check for clauses of the form: (indexkey operator constant) or
+ * (constant operator indexkey). But we don't know a particular index
+ * yet. First check for a constant, which must be Const or Param.
+ * That's cheaper than search for an index key among all indexes.
+ */
+ if (IsA(leftop, Const) || IsA(leftop, Param))
+ {
+ opno = get_commutator(opno);
+
+ if (!OidIsValid(opno))
+ {
+ /* commutator doesn't exist, we can't reverse the order */
+ continue;
+ }
+ nonConstExpr = rightop;
+ }
+ else if (IsA(rightop, Const) || IsA(rightop, Param))
+ {
+ nonConstExpr = leftop;
+ }
+ else
+ {
+ continue;
+ }
+
+ /*
+ * Match non-constant part to the index key. It's possible that a
+ * single non-constant part matches multiple index keys. It's OK, we
+ * just stop with first matching index key. Given that this choice is
+ * determined the same for every clause, we will group similar clauses
+ * together anyway.
+ */
+ indexnum = 0;
+ foreach(lc2, rel->indexlist)
+ {
+ IndexOptInfo *index = (IndexOptInfo *) lfirst(lc2);
+
+ /* Ignore index if it doesn't support bitmap scans */
+ if (!index->amhasgetbitmap)
+ continue;
+
+ for (colnum = 0; colnum < index->nkeycolumns; colnum++)
+ {
+ if (match_index_to_operand(nonConstExpr, colnum, index))
+ {
+ matches[i].indexnum = indexnum;
+ matches[i].colnum = colnum;
+ matches[i].opno = opno;
+ matches[i].inputcollid = clause->inputcollid;
+ matched = true;
+ break;
+ }
+ }
+
+ /*
+ * Stop looping through the indexes, if we managed to match
+ * nonConstExpr to any index column.
+ */
+ if (matches[i].indexnum >= 0)
+ break;
+ indexnum++;
+ }
+ }
+
+ /*
+ * Fast-path check: if no clause is matching to the index column, we can
+ * just give up at this stage and return the clause list as-is.
+ */
+ if (!matched)
+ {
+ pfree(matches);
+ return orargs;
+ }
+
+ /* Sort clauses to make similar clauses go together */
+ qsort(matches, n, sizeof(OrArgIndexMatch), or_arg_index_match_cmp);
+
+ /*
+ * Group similar clauses into single sub-restrictinfo. Side effect: the
+ * resulting list of restrictions will be sorted by indexnum and colnum.
+ */
+ group_start = 0;
+ for (i = 1; i <= n; i++)
+ {
+ /* Check if it's a group boundary */
+ if (group_start >= 0 &&
+ (i == n ||
+ matches[i].indexnum != matches[group_start].indexnum ||
+ matches[i].colnum != matches[group_start].colnum ||
+ matches[i].opno != matches[group_start].opno ||
+ matches[i].inputcollid != matches[group_start].inputcollid ||
+ matches[i].indexnum == -1))
+ {
+ /*
+ * One clause in group: add it "as is" to the upper-level OR.
+ */
+ if (i - group_start == 1)
+ {
+ result = lappend(result,
+ list_nth(orargs,
+ matches[group_start].argindex));
+ }
+ else
+ {
+ /*
+ * Two or more clauses in a group: create a nested OR.
+ */
+ List *args = NIL;
+ List *rargs = NIL;
+ RestrictInfo *subrinfo;
+ int j;
+
+ Assert(i - group_start >= 2);
+
+ /* Construct the list of nested OR arguments */
+ for (j = group_start; j < i; j++)
+ {
+ Node *arg = list_nth(orargs, matches[j].argindex);
+
+ rargs = lappend(rargs, arg);
+ if (IsA(arg, RestrictInfo))
+ args = lappend(args, ((RestrictInfo *) arg)->clause);
+ else
+ args = lappend(args, arg);
+ }
+
+ /* Construct the nested OR and wrap it with RestrictInfo */
+ subrinfo = make_plain_restrictinfo(root,
+ make_orclause(args),
+ make_orclause(rargs),
+ rinfo->is_pushed_down,
+ rinfo->has_clone,
+ rinfo->is_clone,
+ rinfo->pseudoconstant,
+ rinfo->security_level,
+ rinfo->required_relids,
+ rinfo->incompatible_relids,
+ rinfo->outer_relids);
+ result = lappend(result, subrinfo);
+ }
+
+ group_start = i;
+ }
+ }
+ pfree(matches);
+ return result;
+}
+
+/*
+ * make_bitmap_paths_for_or_group
+ * Generate bitmap paths for a group of similar OR-clause arguments
+ * produced by group_similar_or_args().
+ *
+ * This function considers two cases: (1) matching a group of clauses to
+ * the index as a whole, and (2) matching the individual clauses one-by-one.
+ * (1) typically comprises an optimal solution. If not, (2) typically
+ * comprises fair alternative.
+ *
+ * Ideally, we could consider all arbitrary splits of arguments into
+ * subgroups, but that could lead to unacceptable computational complexity.
+ * This is why we only consider two cases of above.
+ */
+static List *
+make_bitmap_paths_for_or_group(PlannerInfo *root, RelOptInfo *rel,
+ RestrictInfo *ri, List *other_clauses)
+{
+ List *jointlist = NIL;
+ List *splitlist = NIL;
+ ListCell *lc;
+ List *orargs;
+ List *args = ((BoolExpr *) ri->orclause)->args;
+ Cost jointcost = 0.0,
+ splitcost = 0.0;
+ Path *bitmapqual;
+ List *indlist;
+
+ /*
+ * First, try to match the whole group to the one index.
+ */
+ orargs = list_make1(ri);
+ indlist = build_paths_for_OR(root, rel,
+ orargs,
+ other_clauses);
+ if (indlist != NIL)
+ {
+ bitmapqual = choose_bitmap_and(root, rel, indlist);
+ jointcost = bitmapqual->total_cost;
+ jointlist = list_make1(bitmapqual);
+ }
+
+ /*
+ * If we manage to find a bitmap scan, which uses the group of OR-clause
+ * arguments as a whole, we can skip matching OR-clause arguments
+ * one-by-one as long as there are no other clauses, which can bring more
+ * efficiency to one-by-one case.
+ */
+ if (jointlist != NIL && other_clauses == NIL)
+ return jointlist;
+
+ /*
+ * Also try to match all containing clauses one-by-one.
+ */
+ foreach(lc, args)
+ {
+ orargs = list_make1(lfirst(lc));
+
+ indlist = build_paths_for_OR(root, rel,
+ orargs,
+ other_clauses);
+
+ if (indlist == NIL)
+ {
+ splitlist = NIL;
+ break;
+ }
+
+ bitmapqual = choose_bitmap_and(root, rel, indlist);
+ splitcost += bitmapqual->total_cost;
+ splitlist = lappend(splitlist, bitmapqual);
+ }
+
+ /*
+ * Pick the best option.
+ */
+ if (splitlist == NIL)
+ return jointlist;
+ else if (jointlist == NIL)
+ return splitlist;
+ else
+ return (jointcost < splitcost) ? jointlist : splitlist;
+}
+
+
+/*
* generate_bitmap_or_paths
* Look through the list of clauses to find OR clauses, and generate
* a BitmapOrPath for each one we can handle that way. Return a list
@@ -1203,6 +1580,8 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
List *pathlist;
Path *bitmapqual;
ListCell *j;
+ List *groupedArgs;
+ List *inner_other_clauses = NIL;
/* Ignore RestrictInfos that aren't ORs */
if (!restriction_is_or_clause(rinfo))
@@ -1213,7 +1592,29 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
* the OR, else we can't use it.
*/
pathlist = NIL;
- foreach(j, ((BoolExpr *) rinfo->orclause)->args)
+
+ /*
+ * Group the similar OR-clause arguments into dedicated RestrictInfos,
+ * because each of those RestrictInfos has a chance to match the index
+ * as a whole.
+ */
+ groupedArgs = group_similar_or_args(root, rel, rinfo);
+
+ if (groupedArgs != ((BoolExpr *) rinfo->orclause)->args)
+ {
+ /*
+ * Some parts of the rinfo were probably grouped. In this case,
+ * we have a set of sub-rinfos that together are an exact
+ * duplicate of rinfo. Thus, we need to remove the rinfo from
+ * other clauses. match_clauses_to_index detects duplicated
+ * iclauses by comparing pointers to original rinfos that would be
+ * different. So, we must delete rinfo to avoid de-facto
+ * duplicated clauses in the index clauses list.
+ */
+ inner_other_clauses = list_delete(list_copy(all_clauses), rinfo);
+ }
+
+ foreach(j, groupedArgs)
{
Node *orarg = (Node *) lfirst(j);
List *indlist;
@@ -1233,12 +1634,34 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
andargs,
all_clauses));
}
+ else if (restriction_is_or_clause(castNode(RestrictInfo, orarg)))
+ {
+ RestrictInfo *ri = castNode(RestrictInfo, orarg);
+
+ /*
+ * Generate bitmap paths for the group of similar OR-clause
+ * arguments.
+ */
+ indlist = make_bitmap_paths_for_or_group(root,
+ rel, ri,
+ inner_other_clauses);
+
+ if (indlist == NIL)
+ {
+ pathlist = NIL;
+ break;
+ }
+ else
+ {
+ pathlist = list_concat(pathlist, indlist);
+ continue;
+ }
+ }
else
{
RestrictInfo *ri = castNode(RestrictInfo, orarg);
List *orargs;
- Assert(!restriction_is_or_clause(ri));
orargs = list_make1(ri);
indlist = build_paths_for_OR(root, rel,
@@ -1264,6 +1687,9 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
pathlist = lappend(pathlist, bitmapqual);
}
+ if (inner_other_clauses != NIL)
+ list_free(inner_other_clauses);
+
/*
* If we have a match for every arm, then turn them into a
* BitmapOrPath, and add to result list.