aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/util/var.c
diff options
context:
space:
mode:
authorRichard Guo <rguo@postgresql.org>2024-09-10 12:35:34 +0900
committerRichard Guo <rguo@postgresql.org>2024-09-10 12:35:34 +0900
commit247dea89f7616fdf06b7272b74abafc29e8e5860 (patch)
tree8f4f7c0acb5b9842df4517019805d7531aec1b46 /src/backend/optimizer/util/var.c
parentfba49d5293b4455b25485450baf02af42bf543d7 (diff)
downloadpostgresql-247dea89f7616fdf06b7272b74abafc29e8e5860.tar.gz
postgresql-247dea89f7616fdf06b7272b74abafc29e8e5860.zip
Introduce an RTE for the grouping step
If there are subqueries in the grouping expressions, each of these subqueries in the targetlist and HAVING clause is expanded into distinct SubPlan nodes. As a result, only one of these SubPlan nodes would be converted to reference to the grouping key column output by the Agg node; others would have to get evaluated afresh. This is not efficient, and with grouping sets this can cause wrong results issues in cases where they should go to NULL because they are from the wrong grouping set. Furthermore, during re-evaluation, these SubPlan nodes might use nulled column values from grouping sets, which is not correct. This issue is not limited to subqueries. For other types of expressions that are part of grouping items, if they are transformed into another form during preprocessing, they may fail to match lower target items. This can also lead to wrong results with grouping sets. To fix this issue, we introduce a new kind of RTE representing the output of the grouping step, with columns that are the Vars or expressions being grouped on. In the parser, we replace the grouping expressions in the targetlist and HAVING clause with Vars referencing this new RTE, so that the output of the parser directly expresses the semantic requirement that the grouping expressions be gotten from the grouping output rather than computed some other way. In the planner, we first preprocess all the columns of this new RTE and then replace any Vars in the targetlist and HAVING clause that reference this new RTE with the underlying grouping expressions, so that we will have only one instance of a SubPlan node for each subquery contained in the grouping expressions. Bump catversion because this changes the querytree produced by the parser. Thanks to Tom Lane for the idea to invent a new kind of RTE. Per reports from Geoff Winkless, Tobias Wendorff, Richard Guo from various threads. Author: Richard Guo Reviewed-by: Ashutosh Bapat, Sutou Kouhei Discussion: https://postgr.es/m/CAMbWs4_dp7e7oTwaiZeBX8+P1rXw4ThkZxh1QG81rhu9Z47VsQ@mail.gmail.com
Diffstat (limited to 'src/backend/optimizer/util/var.c')
-rw-r--r--src/backend/optimizer/util/var.c138
1 files changed, 138 insertions, 0 deletions
diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c
index 844fc30978b..b189185fca2 100644
--- a/src/backend/optimizer/util/var.c
+++ b/src/backend/optimizer/util/var.c
@@ -81,6 +81,8 @@ static bool pull_var_clause_walker(Node *node,
pull_var_clause_context *context);
static Node *flatten_join_alias_vars_mutator(Node *node,
flatten_join_alias_vars_context *context);
+static Node *flatten_group_exprs_mutator(Node *node,
+ flatten_join_alias_vars_context *context);
static Node *add_nullingrels_if_needed(PlannerInfo *root, Node *newnode,
Var *oldvar);
static bool is_standard_join_alias_expression(Node *newnode, Var *oldvar);
@@ -893,6 +895,7 @@ flatten_join_alias_vars_mutator(Node *node,
}
/* Already-planned tree not supported */
Assert(!IsA(node, SubPlan));
+ Assert(!IsA(node, AlternativeSubPlan));
/* Shouldn't need to handle these planner auxiliary nodes here */
Assert(!IsA(node, SpecialJoinInfo));
Assert(!IsA(node, PlaceHolderInfo));
@@ -903,6 +906,141 @@ flatten_join_alias_vars_mutator(Node *node,
}
/*
+ * flatten_group_exprs
+ * Replace Vars that reference GROUP outputs with the underlying grouping
+ * expressions.
+ */
+Node *
+flatten_group_exprs(PlannerInfo *root, Query *query, Node *node)
+{
+ flatten_join_alias_vars_context context;
+
+ /*
+ * We do not expect this to be applied to the whole Query, only to
+ * expressions or LATERAL subqueries. Hence, if the top node is a Query,
+ * it's okay to immediately increment sublevels_up.
+ */
+ Assert(node != (Node *) query);
+
+ context.root = root;
+ context.query = query;
+ context.sublevels_up = 0;
+ /* flag whether grouping expressions could possibly contain SubLinks */
+ context.possible_sublink = query->hasSubLinks;
+ /* if hasSubLinks is already true, no need to work hard */
+ context.inserted_sublink = query->hasSubLinks;
+
+ return flatten_group_exprs_mutator(node, &context);
+}
+
+static Node *
+flatten_group_exprs_mutator(Node *node,
+ flatten_join_alias_vars_context *context)
+{
+ if (node == NULL)
+ return NULL;
+ if (IsA(node, Var))
+ {
+ Var *var = (Var *) node;
+ RangeTblEntry *rte;
+ Node *newvar;
+
+ /* No change unless Var belongs to the GROUP of the target level */
+ if (var->varlevelsup != context->sublevels_up)
+ return node; /* no need to copy, really */
+ rte = rt_fetch(var->varno, context->query->rtable);
+ if (rte->rtekind != RTE_GROUP)
+ return node;
+
+ /* Expand group exprs reference */
+ Assert(var->varattno > 0);
+ newvar = (Node *) list_nth(rte->groupexprs, var->varattno - 1);
+ Assert(newvar != NULL);
+ newvar = copyObject(newvar);
+
+ /*
+ * If we are expanding an expr carried down from an upper query, must
+ * adjust its varlevelsup fields.
+ */
+ if (context->sublevels_up != 0)
+ IncrementVarSublevelsUp(newvar, context->sublevels_up, 0);
+
+ /* Preserve original Var's location, if possible */
+ if (IsA(newvar, Var))
+ ((Var *) newvar)->location = var->location;
+
+ /* Detect if we are adding a sublink to query */
+ if (context->possible_sublink && !context->inserted_sublink)
+ context->inserted_sublink = checkExprHasSubLink(newvar);
+
+ return newvar;
+ }
+
+ if (IsA(node, Aggref))
+ {
+ Aggref *agg = (Aggref *) node;
+
+ if ((int) agg->agglevelsup == context->sublevels_up)
+ {
+ /*
+ * If we find an aggregate call of the original level, do not
+ * recurse into its normal arguments, ORDER BY arguments, or
+ * filter; there are no grouped vars there. But we should check
+ * direct arguments as though they weren't in an aggregate.
+ */
+ agg = copyObject(agg);
+ agg->aggdirectargs = (List *)
+ flatten_group_exprs_mutator((Node *) agg->aggdirectargs, context);
+
+ return (Node *) agg;
+ }
+
+ /*
+ * We can skip recursing into aggregates of higher levels altogether,
+ * since they could not possibly contain Vars of concern to us (see
+ * transformAggregateCall). We do need to look at aggregates of lower
+ * levels, however.
+ */
+ if ((int) agg->agglevelsup > context->sublevels_up)
+ return node;
+ }
+
+ if (IsA(node, GroupingFunc))
+ {
+ GroupingFunc *grp = (GroupingFunc *) node;
+
+ /*
+ * If we find a GroupingFunc node of the original or higher level, do
+ * not recurse into its arguments; there are no grouped vars there.
+ */
+ if ((int) grp->agglevelsup >= context->sublevels_up)
+ return node;
+ }
+
+ if (IsA(node, Query))
+ {
+ /* Recurse into RTE subquery or not-yet-planned sublink subquery */
+ Query *newnode;
+ bool save_inserted_sublink;
+
+ context->sublevels_up++;
+ save_inserted_sublink = context->inserted_sublink;
+ context->inserted_sublink = ((Query *) node)->hasSubLinks;
+ newnode = query_tree_mutator((Query *) node,
+ flatten_group_exprs_mutator,
+ (void *) context,
+ QTW_IGNORE_GROUPEXPRS);
+ newnode->hasSubLinks |= context->inserted_sublink;
+ context->inserted_sublink = save_inserted_sublink;
+ context->sublevels_up--;
+ return (Node *) newnode;
+ }
+
+ return expression_tree_mutator(node, flatten_group_exprs_mutator,
+ (void *) context);
+}
+
+/*
* Add oldvar's varnullingrels, if any, to a flattened join alias expression.
* The newnode has been copied, so we can modify it freely.
*/