diff options
author | Alexander Korotkov <akorotkov@postgresql.org> | 2025-04-04 16:01:50 +0300 |
---|---|---|
committer | Alexander Korotkov <akorotkov@postgresql.org> | 2025-04-04 16:01:50 +0300 |
commit | c0962a113d1f2f94cb7222a7ca025a67e9ce3860 (patch) | |
tree | 6da1bad29d2394bca13b4c29c4df5ae9a4ceee67 /src/backend/optimizer/plan/subselect.c | |
parent | d48d2e2dc8be50d3ca13305b5699384329b15433 (diff) | |
download | postgresql-c0962a113d1f2f94cb7222a7ca025a67e9ce3860.tar.gz postgresql-c0962a113d1f2f94cb7222a7ca025a67e9ce3860.zip |
Convert 'x IN (VALUES ...)' to 'x = ANY ...' then appropriate
This commit implements the automatic conversion of 'x IN (VALUES ...)' into
ScalarArrayOpExpr. That simplifies the query tree, eliminating the appearance
of an unnecessary join.
Since VALUES describes a relational table, and the value of such a list is
a table row, the optimizer will likely face an underestimation problem due to
the inability to estimate cardinality through MCV statistics. The cardinality
evaluation mechanism can work with the array inclusion check operation.
If the array is small enough (< 100 elements), it will perform a statistical
evaluation element by element.
We perform the transformation in the convert_ANY_sublink_to_join() if VALUES
RTE is proper and the transformation is convertible. The conversion is only
possible for operations on scalar values, not rows. Also, we currently
support the transformation only when it ends up with a constant array.
Otherwise, the evaluation of non-hashed SAOP might be slower than the
corresponding Hash Join with VALUES.
Discussion: https://postgr.es/m/0184212d-1248-4f1f-a42d-f5cb1c1976d2%40tantorlabs.com
Author: Alena Rybakina <a.rybakina@postgrespro.ru>
Author: Andrei Lepikhov <lepihov@gmail.com>
Reviewed-by: Ivan Kush <ivan.kush@tantorlabs.com>
Reviewed-by: Alexander Korotkov <aekorotkov@gmail.com>
Diffstat (limited to 'src/backend/optimizer/plan/subselect.c')
-rw-r--r-- | src/backend/optimizer/plan/subselect.c | 80 |
1 files changed, 80 insertions, 0 deletions
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 8230cbea3c3..e7cb3fede66 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -1214,6 +1214,86 @@ inline_cte_walker(Node *node, inline_cte_walker_context *context) return expression_tree_walker(node, inline_cte_walker, context); } +/* + * Attempt to transform 'testexpr' over the VALUES subquery into + * a ScalarArrayOpExpr. We currently support the transformation only when + * it ends up with a constant array. Otherwise, the evaluation of non-hashed + * SAOP might be slower than the corresponding Hash Join with VALUES. + * + * Return transformed ScalarArrayOpExpr or NULL if transformation isn't + * allowed. + */ +ScalarArrayOpExpr * +convert_VALUES_to_ANY(PlannerInfo *root, Node *testexpr, Query *values) +{ + RangeTblEntry *rte; + Node *leftop; + Node *rightop; + Oid opno; + ListCell *lc; + Oid inputcollid; + List *exprs = NIL; + + /* + * Check we have a binary operator over a single-column subquery with no + * joins and no LIMIT/OFFSET/ORDER BY clauses. + */ + if (!IsA(testexpr, OpExpr) || + list_length(((OpExpr *) testexpr)->args) != 2 || + list_length(values->targetList) > 1 || + values->limitCount != NULL || + values->limitOffset != NULL || + values->sortClause != NIL || + list_length(values->rtable) != 1) + return NULL; + + rte = linitial_node(RangeTblEntry, values->rtable); + leftop = linitial(((OpExpr *) testexpr)->args); + rightop = lsecond(((OpExpr *) testexpr)->args); + opno = ((OpExpr *) testexpr)->opno; + inputcollid = ((OpExpr *) testexpr)->inputcollid; + + /* + * Also, check that only RTE corresponds to VALUES; the list of values has + * at least two items and no volatile functions. + */ + if (rte->rtekind != RTE_VALUES || + list_length(rte->values_lists) < 2 || + contain_volatile_functions((Node *) rte->values_lists)) + return NULL; + + foreach(lc, rte->values_lists) + { + List *elem = lfirst(lc); + Node *value = linitial(elem); + + /* + * Prepare an evaluation of the right side of the operator with + * substitution of the given value. + */ + value = convert_testexpr(root, rightop, list_make1(value)); + + /* + * Try to evaluate constant expressions. We could get Const as a + * result. + */ + value = eval_const_expressions(root, value); + + /* + * As we only support constant output arrays, all the items must also + * be constant. + */ + if (!IsA(value, Const)) + return NULL; + + exprs = lappend(exprs, value); + } + + /* Finally, build ScalarArrayOpExpr at the top of the 'exprs' list. */ + return make_SAOP_expr(opno, leftop, exprType(rightop), + linitial_oid(rte->colcollations), inputcollid, + exprs, false); +} /* * convert_ANY_sublink_to_join: try to convert an ANY SubLink to a join |