aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/plan/subselect.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2020-08-14 22:14:03 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2020-08-14 22:14:03 -0400
commit1e7629d2c95ffd290ab0e18d7618ca9d9da94265 (patch)
treea53cea140fbb2fbbd7b2a6182cf4ff1a2eae559b /src/backend/optimizer/plan/subselect.c
parent73487a60fc1063ba4b5178b69aee4ee210c182c4 (diff)
downloadpostgresql-1e7629d2c95ffd290ab0e18d7618ca9d9da94265.tar.gz
postgresql-1e7629d2c95ffd290ab0e18d7618ca9d9da94265.zip
Be more careful about the shape of hashable subplan clauses.
nodeSubplan.c expects that the testexpr for a hashable ANY SubPlan has the form of one or more OpExprs whose LHS is an expression of the outer query's, while the RHS is an expression over Params representing output columns of the subquery. However, the planner only went as far as verifying that the clauses were all binary OpExprs. This works 99.99% of the time, because the clauses have the right shape when emitted by the parser --- but it's possible for function inlining to break that, as reported by PegoraroF10. To fix, teach the planner to check that the LHS and RHS contain the right things, or more accurately don't contain the wrong things. Given that this has been broken for years without anyone noticing, it seems sufficient to just give up hashing when it happens, rather than go to the trouble of commuting the clauses back again (which wouldn't necessarily work anyway). While poking at that, I also noticed that nodeSubplan.c had a baked-in assumption that the number of hash clauses is identical to the number of subquery output columns. Again, that's fine as far as parser output goes, but it's not hard to break it via function inlining. There seems little reason for that assumption though --- AFAICS, the only thing it's buying us is not having to store the number of hash clauses explicitly. Adding code to the planner to reject such cases would take more code than getting nodeSubplan.c to cope, so I fixed it that way. This has been broken for as long as we've had hashable SubPlans, so back-patch to all supported branches. Discussion: https://postgr.es/m/1549209182255-0.post@n3.nabble.com
Diffstat (limited to 'src/backend/optimizer/plan/subselect.c')
-rw-r--r--src/backend/optimizer/plan/subselect.c77
1 files changed, 55 insertions, 22 deletions
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 9a8f738c9d0..6eb794669fe 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -69,7 +69,7 @@ typedef struct inline_cte_walker_context
static Node *build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
List *plan_params,
SubLinkType subLinkType, int subLinkId,
- Node *testexpr, bool adjust_testexpr,
+ Node *testexpr, List *testexpr_paramids,
bool unknownEqFalse);
static List *generate_subquery_params(PlannerInfo *root, List *tlist,
List **paramIds);
@@ -81,7 +81,8 @@ static Node *convert_testexpr(PlannerInfo *root,
static Node *convert_testexpr_mutator(Node *node,
convert_testexpr_context *context);
static bool subplan_is_hashable(Plan *plan);
-static bool testexpr_is_hashable(Node *testexpr);
+static bool testexpr_is_hashable(Node *testexpr, List *param_ids);
+static bool test_opexpr_is_hashable(OpExpr *testexpr, List *param_ids);
static bool hash_ok_operator(OpExpr *expr);
static bool contain_dml(Node *node);
static bool contain_dml_walker(Node *node, void *context);
@@ -237,7 +238,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
/* And convert to SubPlan or InitPlan format. */
result = build_subplan(root, plan, subroot, plan_params,
subLinkType, subLinkId,
- testexpr, true, isTopQual);
+ testexpr, NIL, isTopQual);
/*
* If it's a correlated EXISTS with an unimportant targetlist, we might be
@@ -291,12 +292,11 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
plan_params,
ANY_SUBLINK, 0,
newtestexpr,
- false, true));
+ paramIds,
+ true));
/* Check we got what we expected */
Assert(hashplan->parParam == NIL);
Assert(hashplan->useHashTable);
- /* build_subplan won't have filled in paramIds */
- hashplan->paramIds = paramIds;
/* Leave it to the executor to decide which plan to use */
asplan = makeNode(AlternativeSubPlan);
@@ -319,7 +319,7 @@ static Node *
build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
List *plan_params,
SubLinkType subLinkType, int subLinkId,
- Node *testexpr, bool adjust_testexpr,
+ Node *testexpr, List *testexpr_paramids,
bool unknownEqFalse)
{
Node *result;
@@ -484,10 +484,10 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
else
{
/*
- * Adjust the Params in the testexpr, unless caller said it's not
- * needed.
+ * Adjust the Params in the testexpr, unless caller already took care
+ * of it (as indicated by passing a list of Param IDs).
*/
- if (testexpr && adjust_testexpr)
+ if (testexpr && testexpr_paramids == NIL)
{
List *params;
@@ -499,7 +499,10 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
params);
}
else
+ {
splan->testexpr = testexpr;
+ splan->paramIds = testexpr_paramids;
+ }
/*
* We can't convert subplans of ALL_SUBLINK or ANY_SUBLINK types to
@@ -511,7 +514,7 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
if (subLinkType == ANY_SUBLINK &&
splan->parParam == NIL &&
subplan_is_hashable(plan) &&
- testexpr_is_hashable(splan->testexpr))
+ testexpr_is_hashable(splan->testexpr, splan->paramIds))
splan->useHashTable = true;
/*
@@ -734,24 +737,20 @@ subplan_is_hashable(Plan *plan)
/*
* testexpr_is_hashable: is an ANY SubLink's test expression hashable?
+ *
+ * To identify LHS vs RHS of the hash expression, we must be given the
+ * list of output Param IDs of the SubLink's subquery.
*/
static bool
-testexpr_is_hashable(Node *testexpr)
+testexpr_is_hashable(Node *testexpr, List *param_ids)
{
/*
* The testexpr must be a single OpExpr, or an AND-clause containing only
- * OpExprs.
- *
- * The combining operators must be hashable and strict. The need for
- * hashability is obvious, since we want to use hashing. Without
- * strictness, behavior in the presence of nulls is too unpredictable. We
- * actually must assume even more than plain strictness: they can't yield
- * NULL for non-null inputs, either (see nodeSubplan.c). However, hash
- * indexes and hash joins assume that too.
+ * OpExprs, each of which satisfy test_opexpr_is_hashable().
*/
if (testexpr && IsA(testexpr, OpExpr))
{
- if (hash_ok_operator((OpExpr *) testexpr))
+ if (test_opexpr_is_hashable((OpExpr *) testexpr, param_ids))
return true;
}
else if (is_andclause(testexpr))
@@ -764,7 +763,7 @@ testexpr_is_hashable(Node *testexpr)
if (!IsA(andarg, OpExpr))
return false;
- if (!hash_ok_operator((OpExpr *) andarg))
+ if (!test_opexpr_is_hashable((OpExpr *) andarg, param_ids))
return false;
}
return true;
@@ -773,6 +772,40 @@ testexpr_is_hashable(Node *testexpr)
return false;
}
+static bool
+test_opexpr_is_hashable(OpExpr *testexpr, List *param_ids)
+{
+ /*
+ * The combining operator must be hashable and strict. The need for
+ * hashability is obvious, since we want to use hashing. Without
+ * strictness, behavior in the presence of nulls is too unpredictable. We
+ * actually must assume even more than plain strictness: it can't yield
+ * NULL for non-null inputs, either (see nodeSubplan.c). However, hash
+ * indexes and hash joins assume that too.
+ */
+ if (!hash_ok_operator(testexpr))
+ return false;
+
+ /*
+ * The left and right inputs must belong to the outer and inner queries
+ * respectively; hence Params that will be supplied by the subquery must
+ * not appear in the LHS, and Vars of the outer query must not appear in
+ * the RHS. (Ordinarily, this must be true because of the way that the
+ * parser builds an ANY SubLink's testexpr ... but inlining of functions
+ * could have changed the expression's structure, so we have to check.
+ * Such cases do not occur often enough to be worth trying to optimize, so
+ * we don't worry about trying to commute the clause or anything like
+ * that; we just need to be sure not to build an invalid plan.)
+ */
+ if (list_length(testexpr->args) != 2)
+ return false;
+ if (contain_exec_param((Node *) linitial(testexpr->args), param_ids))
+ return false;
+ if (contain_var_clause((Node *) lsecond(testexpr->args)))
+ return false;
+ return true;
+}
+
/*
* Check expression is hashable + strict
*