aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer
diff options
context:
space:
mode:
authorDavid Rowley <drowley@postgresql.org>2021-04-08 23:51:22 +1200
committerDavid Rowley <drowley@postgresql.org>2021-04-08 23:51:22 +1200
commit50e17ad281b8d1c1b410c9833955bc80fbad4078 (patch)
treefaf07e47e95ceade572aaf2afdca08bc35ed69e7 /src/backend/optimizer
parent1d257577e08d3e598011d6850fd1025858de8c8c (diff)
downloadpostgresql-50e17ad281b8d1c1b410c9833955bc80fbad4078.tar.gz
postgresql-50e17ad281b8d1c1b410c9833955bc80fbad4078.zip
Speedup ScalarArrayOpExpr evaluation
ScalarArrayOpExprs with "useOr=true" and a set of Consts on the righthand side have traditionally been evaluated by using a linear search over the array. When these arrays contain large numbers of elements then this linear search could become a significant part of execution time. Here we add a new method of evaluating ScalarArrayOpExpr expressions to allow them to be evaluated by first building a hash table containing each element, then on subsequent evaluations, we just probe that hash table to determine if there is a match. The planner is in charge of determining when this optimization is possible and it enables it by setting hashfuncid in the ScalarArrayOpExpr. The executor will only perform the hash table evaluation when the hashfuncid is set. This means that not all cases are optimized. For example CHECK constraints containing an IN clause won't go through the planner, so won't get the hashfuncid set. We could maybe do something about that at some later date. The reason we're not doing it now is from fear that we may slow down cases where the expression is evaluated only once. Those cases can be common, for example, a single row INSERT to a table with a CHECK constraint containing an IN clause. In the planner, we enable this when there are suitable hash functions for the ScalarArrayOpExpr's operator and only when there is at least MIN_ARRAY_SIZE_FOR_HASHED_SAOP elements in the array. The threshold is currently set to 9. Author: James Coleman, David Rowley Reviewed-by: David Rowley, Tomas Vondra, Heikki Linnakangas Discussion: https://postgr.es/m/CAAaqYe8x62+=wn0zvNKCj55tPpg-JBHzhZFFc6ANovdqFw7-dA@mail.gmail.com
Diffstat (limited to 'src/backend/optimizer')
-rw-r--r--src/backend/optimizer/path/costsize.c43
-rw-r--r--src/backend/optimizer/plan/planner.c10
-rw-r--r--src/backend/optimizer/plan/setrefs.c10
-rw-r--r--src/backend/optimizer/prep/prepqual.c1
-rw-r--r--src/backend/optimizer/util/clauses.c64
5 files changed, 118 insertions, 10 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 05686d01942..8577c7b1389 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -4436,21 +4436,50 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
}
else if (IsA(node, ScalarArrayOpExpr))
{
- /*
- * Estimate that the operator will be applied to about half of the
- * array elements before the answer is determined.
- */
ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node;
Node *arraynode = (Node *) lsecond(saop->args);
QualCost sacosts;
+ QualCost hcosts;
+ int estarraylen = estimate_array_length(arraynode);
set_sa_opfuncid(saop);
sacosts.startup = sacosts.per_tuple = 0;
add_function_cost(context->root, saop->opfuncid, NULL,
&sacosts);
- context->total.startup += sacosts.startup;
- context->total.per_tuple += sacosts.per_tuple *
- estimate_array_length(arraynode) * 0.5;
+
+ if (OidIsValid(saop->hashfuncid))
+ {
+ /* Handle costs for hashed ScalarArrayOpExpr */
+ hcosts.startup = hcosts.per_tuple = 0;
+
+ add_function_cost(context->root, saop->hashfuncid, NULL, &hcosts);
+ context->total.startup += sacosts.startup + hcosts.startup;
+
+ /* Estimate the cost of building the hashtable. */
+ context->total.startup += estarraylen * hcosts.per_tuple;
+
+ /*
+ * XXX should we charge a little bit for sacosts.per_tuple when
+ * building the table, or is it ok to assume there will be zero
+ * hash collision?
+ */
+
+ /*
+ * Charge for hashtable lookups. Charge a single hash and a
+ * single comparison.
+ */
+ context->total.per_tuple += hcosts.per_tuple + sacosts.per_tuple;
+ }
+ else
+ {
+ /*
+ * Estimate that the operator will be applied to about half of the
+ * array elements before the answer is determined.
+ */
+ context->total.startup += sacosts.startup;
+ context->total.per_tuple += sacosts.per_tuple *
+ estimate_array_length(arraynode) * 0.5;
+ }
}
else if (IsA(node, Aggref) ||
IsA(node, WindowFunc))
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 898d7fcb0bd..1868c4eff47 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -1110,6 +1110,16 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind)
#endif
}
+ /*
+ * Check for ANY ScalarArrayOpExpr with Const arrays and set the
+ * hashfuncid of any that might execute more quickly by using hash lookups
+ * instead of a linear search.
+ */
+ if (kind == EXPRKIND_QUAL || kind == EXPRKIND_TARGET)
+ {
+ convert_saop_to_hashed_saop(expr);
+ }
+
/* Expand SubLinks to SubPlans */
if (root->parse->hasSubLinks)
expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 70c0fa07e6e..9f40ed77e64 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -1674,9 +1674,13 @@ fix_expr_common(PlannerInfo *root, Node *node)
}
else if (IsA(node, ScalarArrayOpExpr))
{
- set_sa_opfuncid((ScalarArrayOpExpr *) node);
- record_plan_function_dependency(root,
- ((ScalarArrayOpExpr *) node)->opfuncid);
+ ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node;
+
+ set_sa_opfuncid(saop);
+ record_plan_function_dependency(root, saop->opfuncid);
+
+ if (!OidIsValid(saop->hashfuncid))
+ record_plan_function_dependency(root, saop->hashfuncid);
}
else if (IsA(node, Const))
{
diff --git a/src/backend/optimizer/prep/prepqual.c b/src/backend/optimizer/prep/prepqual.c
index 8d4dc9cd105..42c3e4dc046 100644
--- a/src/backend/optimizer/prep/prepqual.c
+++ b/src/backend/optimizer/prep/prepqual.c
@@ -127,6 +127,7 @@ negate_clause(Node *node)
newopexpr->opno = negator;
newopexpr->opfuncid = InvalidOid;
+ newopexpr->hashfuncid = InvalidOid;
newopexpr->useOr = !saopexpr->useOr;
newopexpr->inputcollid = saopexpr->inputcollid;
newopexpr->args = saopexpr->args;
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 9a6e3dab834..526997327c6 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -106,6 +106,7 @@ static bool contain_leaked_vars_walker(Node *node, void *context);
static Relids find_nonnullable_rels_walker(Node *node, bool top_level);
static List *find_nonnullable_vars_walker(Node *node, bool top_level);
static bool is_strict_saop(ScalarArrayOpExpr *expr, bool falseOK);
+static bool convert_saop_to_hashed_saop_walker(Node *node, void *context);
static Node *eval_const_expressions_mutator(Node *node,
eval_const_expressions_context *context);
static bool contain_non_const_walker(Node *node, void *context);
@@ -2101,6 +2102,69 @@ eval_const_expressions(PlannerInfo *root, Node *node)
return eval_const_expressions_mutator(node, &context);
}
+#define MIN_ARRAY_SIZE_FOR_HASHED_SAOP 9
+/*--------------------
+ * convert_saop_to_hashed_saop
+ *
+ * Recursively search 'node' for ScalarArrayOpExprs and fill in the hash
+ * function for any ScalarArrayOpExpr that looks like it would be useful to
+ * evaluate using a hash table rather than a linear search.
+ *
+ * We'll use a hash table if all of the following conditions are met:
+ * 1. The 2nd argument of the array contain only Consts.
+ * 2. useOr is true.
+ * 3. There's valid hash function for both left and righthand operands and
+ * these hash functions are the same.
+ * 4. If the array contains enough elements for us to consider it to be
+ * worthwhile using a hash table rather than a linear search.
+ */
+void
+convert_saop_to_hashed_saop(Node *node)
+{
+ (void) convert_saop_to_hashed_saop_walker(node, NULL);
+}
+
+static bool
+convert_saop_to_hashed_saop_walker(Node *node, void *context)
+{
+ if (node == NULL)
+ return false;
+
+ if (IsA(node, ScalarArrayOpExpr))
+ {
+ ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node;
+ Expr *arrayarg = (Expr *) lsecond(saop->args);
+ Oid lefthashfunc;
+ Oid righthashfunc;
+
+ if (saop->useOr && arrayarg && IsA(arrayarg, Const) &&
+ !((Const *) arrayarg)->constisnull &&
+ get_op_hash_functions(saop->opno, &lefthashfunc, &righthashfunc) &&
+ lefthashfunc == righthashfunc)
+ {
+ Datum arrdatum = ((Const *) arrayarg)->constvalue;
+ ArrayType *arr = (ArrayType *) DatumGetPointer(arrdatum);
+ int nitems;
+
+ /*
+ * Only fill in the hash functions if the array looks large enough
+ * for it to be worth hashing instead of doing a linear search.
+ */
+ nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr));
+
+ if (nitems >= MIN_ARRAY_SIZE_FOR_HASHED_SAOP)
+ {
+ /* Looks good. Fill in the hash functions */
+ saop->hashfuncid = lefthashfunc;
+ }
+ return true;
+ }
+ }
+
+ return expression_tree_walker(node, convert_saop_to_hashed_saop_walker, NULL);
+}
+
+
/*--------------------
* estimate_expression_value
*