diff options
Diffstat (limited to 'src/backend/parser/parse_clause.c')
-rw-r--r-- | src/backend/parser/parse_clause.c | 504 |
1 files changed, 460 insertions, 44 deletions
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index 6b1bbe57d0e..a90bcf40c9d 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -15,6 +15,8 @@ #include "postgres.h" +#include "miscadmin.h" + #include "access/heapam.h" #include "catalog/catalog.h" #include "access/htup_details.h" @@ -43,7 +45,6 @@ #include "utils/rel.h" #include "utils/syscache.h" - /* Convenience macro for the most common makeNamespaceItem() case */ #define makeDefaultNSItem(rte) makeNamespaceItem(rte, true, true, false, true) @@ -1725,40 +1726,181 @@ findTargetlistEntrySQL99(ParseState *pstate, Node *node, List **tlist, return target_result; } +/*------------------------------------------------------------------------- + * Flatten out parenthesized sublists in grouping lists, and some cases + * of nested grouping sets. + * + * Inside a grouping set (ROLLUP, CUBE, or GROUPING SETS), we expect the + * content to be nested no more than 2 deep: i.e. ROLLUP((a,b),(c,d)) is + * ok, but ROLLUP((a,(b,c)),d) is flattened to ((a,b,c),d), which we then + * normalize to ((a,b,c),(d)). + * + * CUBE or ROLLUP can be nested inside GROUPING SETS (but not the reverse), + * and we leave that alone if we find it. But if we see GROUPING SETS inside + * GROUPING SETS, we can flatten and normalize as follows: + * GROUPING SETS (a, (b,c), GROUPING SETS ((c,d),(e)), (f,g)) + * becomes + * GROUPING SETS ((a), (b,c), (c,d), (e), (f,g)) + * + * This is per the spec's syntax transformations, but these are the only such + * transformations we do in parse analysis, so that queries retain the + * originally specified grouping set syntax for CUBE and ROLLUP as much as + * possible when deparsed. (Full expansion of the result into a list of + * grouping sets is left to the planner.) + * + * When we're done, the resulting list should contain only these possible + * elements: + * - an expression + * - a CUBE or ROLLUP with a list of expressions nested 2 deep + * - a GROUPING SET containing any of: + * - expression lists + * - empty grouping sets + * - CUBE or ROLLUP nodes with lists nested 2 deep + * The return is a new list, but doesn't deep-copy the old nodes except for + * GroupingSet nodes. + * + * As a side effect, flag whether the list has any GroupingSet nodes. + *------------------------------------------------------------------------- + */ +static Node * +flatten_grouping_sets(Node *expr, bool toplevel, bool *hasGroupingSets) +{ + /* just in case of pathological input */ + check_stack_depth(); + + if (expr == (Node *) NIL) + return (Node *) NIL; + + switch (expr->type) + { + case T_RowExpr: + { + RowExpr *r = (RowExpr *) expr; + if (r->row_format == COERCE_IMPLICIT_CAST) + return flatten_grouping_sets((Node *) r->args, + false, NULL); + } + break; + case T_GroupingSet: + { + GroupingSet *gset = (GroupingSet *) expr; + ListCell *l2; + List *result_set = NIL; + + if (hasGroupingSets) + *hasGroupingSets = true; + + /* + * at the top level, we skip over all empty grouping sets; the + * caller can supply the canonical GROUP BY () if nothing is left. + */ + + if (toplevel && gset->kind == GROUPING_SET_EMPTY) + return (Node *) NIL; + + foreach(l2, gset->content) + { + Node *n2 = flatten_grouping_sets(lfirst(l2), false, NULL); + + result_set = lappend(result_set, n2); + } + + /* + * At top level, keep the grouping set node; but if we're in a nested + * grouping set, then we need to concat the flattened result into the + * outer list if it's simply nested. + */ + + if (toplevel || (gset->kind != GROUPING_SET_SETS)) + { + return (Node *) makeGroupingSet(gset->kind, result_set, gset->location); + } + else + return (Node *) result_set; + } + case T_List: + { + List *result = NIL; + ListCell *l; + + foreach(l, (List *)expr) + { + Node *n = flatten_grouping_sets(lfirst(l), toplevel, hasGroupingSets); + if (n != (Node *) NIL) + { + if (IsA(n,List)) + result = list_concat(result, (List *) n); + else + result = lappend(result, n); + } + } + + return (Node *) result; + } + default: + break; + } + + return expr; +} + /* - * transformGroupClause - - * transform a GROUP BY clause + * Transform a single expression within a GROUP BY clause or grouping set. * - * GROUP BY items will be added to the targetlist (as resjunk columns) - * if not already present, so the targetlist must be passed by reference. + * The expression is added to the targetlist if not already present, and to the + * flatresult list (which will become the groupClause) if not already present + * there. The sortClause is consulted for operator and sort order hints. * - * This is also used for window PARTITION BY clauses (which act almost the - * same, but are always interpreted per SQL99 rules). + * Returns the ressortgroupref of the expression. + * + * flatresult reference to flat list of SortGroupClause nodes + * seen_local bitmapset of sortgrouprefs already seen at the local level + * pstate ParseState + * gexpr node to transform + * targetlist reference to TargetEntry list + * sortClause ORDER BY clause (SortGroupClause nodes) + * exprKind expression kind + * useSQL99 SQL99 rather than SQL92 syntax + * toplevel false if within any grouping set */ -List * -transformGroupClause(ParseState *pstate, List *grouplist, - List **targetlist, List *sortClause, - ParseExprKind exprKind, bool useSQL99) +static Index +transformGroupClauseExpr(List **flatresult, Bitmapset *seen_local, + ParseState *pstate, Node *gexpr, + List **targetlist, List *sortClause, + ParseExprKind exprKind, bool useSQL99, bool toplevel) { - List *result = NIL; - ListCell *gl; + TargetEntry *tle; + bool found = false; + + if (useSQL99) + tle = findTargetlistEntrySQL99(pstate, gexpr, + targetlist, exprKind); + else + tle = findTargetlistEntrySQL92(pstate, gexpr, + targetlist, exprKind); - foreach(gl, grouplist) + if (tle->ressortgroupref > 0) { - Node *gexpr = (Node *) lfirst(gl); - TargetEntry *tle; - bool found = false; + ListCell *sl; - if (useSQL99) - tle = findTargetlistEntrySQL99(pstate, gexpr, - targetlist, exprKind); - else - tle = findTargetlistEntrySQL92(pstate, gexpr, - targetlist, exprKind); + /* + * Eliminate duplicates (GROUP BY x, x) but only at local level. + * (Duplicates in grouping sets can affect the number of returned + * rows, so can't be dropped indiscriminately.) + * + * Since we don't care about anything except the sortgroupref, + * we can use a bitmapset rather than scanning lists. + */ + if (bms_is_member(tle->ressortgroupref,seen_local)) + return 0; - /* Eliminate duplicates (GROUP BY x, x) */ - if (targetIsInSortList(tle, InvalidOid, result)) - continue; + /* + * If we're already in the flat clause list, we don't need + * to consider adding ourselves again. + */ + found = targetIsInSortList(tle, InvalidOid, *flatresult); + if (found) + return tle->ressortgroupref; /* * If the GROUP BY tlist entry also appears in ORDER BY, copy operator @@ -1770,35 +1912,308 @@ transformGroupClause(ParseState *pstate, List *grouplist, * sort step, and it allows the user to choose the equality semantics * used by GROUP BY, should she be working with a datatype that has * more than one equality operator. + * + * If we're in a grouping set, though, we force our requested ordering + * to be NULLS LAST, because if we have any hope of using a sorted agg + * for the job, we're going to be tacking on generated NULL values + * after the corresponding groups. If the user demands nulls first, + * another sort step is going to be inevitable, but that's the + * planner's problem. */ - if (tle->ressortgroupref > 0) + + foreach(sl, sortClause) { - ListCell *sl; + SortGroupClause *sc = (SortGroupClause *) lfirst(sl); - foreach(sl, sortClause) + if (sc->tleSortGroupRef == tle->ressortgroupref) { - SortGroupClause *sc = (SortGroupClause *) lfirst(sl); + SortGroupClause *grpc = copyObject(sc); + if (!toplevel) + grpc->nulls_first = false; + *flatresult = lappend(*flatresult, grpc); + found = true; + break; + } + } + } - if (sc->tleSortGroupRef == tle->ressortgroupref) - { - result = lappend(result, copyObject(sc)); - found = true; + /* + * If no match in ORDER BY, just add it to the result using default + * sort/group semantics. + */ + if (!found) + *flatresult = addTargetToGroupList(pstate, tle, + *flatresult, *targetlist, + exprLocation(gexpr), + true); + + /* + * _something_ must have assigned us a sortgroupref by now... + */ + + return tle->ressortgroupref; +} + +/* + * Transform a list of expressions within a GROUP BY clause or grouping set. + * + * The list of expressions belongs to a single clause within which duplicates + * can be safely eliminated. + * + * Returns an integer list of ressortgroupref values. + * + * flatresult reference to flat list of SortGroupClause nodes + * pstate ParseState + * list nodes to transform + * targetlist reference to TargetEntry list + * sortClause ORDER BY clause (SortGroupClause nodes) + * exprKind expression kind + * useSQL99 SQL99 rather than SQL92 syntax + * toplevel false if within any grouping set + */ +static List * +transformGroupClauseList(List **flatresult, + ParseState *pstate, List *list, + List **targetlist, List *sortClause, + ParseExprKind exprKind, bool useSQL99, bool toplevel) +{ + Bitmapset *seen_local = NULL; + List *result = NIL; + ListCell *gl; + + foreach(gl, list) + { + Node *gexpr = (Node *) lfirst(gl); + + Index ref = transformGroupClauseExpr(flatresult, + seen_local, + pstate, + gexpr, + targetlist, + sortClause, + exprKind, + useSQL99, + toplevel); + if (ref > 0) + { + seen_local = bms_add_member(seen_local, ref); + result = lappend_int(result, ref); + } + } + + return result; +} + +/* + * Transform a grouping set and (recursively) its content. + * + * The grouping set might be a GROUPING SETS node with other grouping sets + * inside it, but SETS within SETS have already been flattened out before + * reaching here. + * + * Returns the transformed node, which now contains SIMPLE nodes with lists + * of ressortgrouprefs rather than expressions. + * + * flatresult reference to flat list of SortGroupClause nodes + * pstate ParseState + * gset grouping set to transform + * targetlist reference to TargetEntry list + * sortClause ORDER BY clause (SortGroupClause nodes) + * exprKind expression kind + * useSQL99 SQL99 rather than SQL92 syntax + * toplevel false if within any grouping set + */ +static Node * +transformGroupingSet(List **flatresult, + ParseState *pstate, GroupingSet *gset, + List **targetlist, List *sortClause, + ParseExprKind exprKind, bool useSQL99, bool toplevel) +{ + ListCell *gl; + List *content = NIL; + + Assert(toplevel || gset->kind != GROUPING_SET_SETS); + + foreach(gl, gset->content) + { + Node *n = lfirst(gl); + + if (IsA(n, List)) + { + List *l = transformGroupClauseList(flatresult, + pstate, (List *) n, + targetlist, sortClause, + exprKind, useSQL99, false); + + content = lappend(content, makeGroupingSet(GROUPING_SET_SIMPLE, + l, + exprLocation(n))); + } + else if (IsA(n, GroupingSet)) + { + GroupingSet *gset2 = (GroupingSet *) lfirst(gl); + + content = lappend(content, transformGroupingSet(flatresult, + pstate, gset2, + targetlist, sortClause, + exprKind, useSQL99, false)); + } + else + { + Index ref = transformGroupClauseExpr(flatresult, + NULL, + pstate, + n, + targetlist, + sortClause, + exprKind, + useSQL99, + false); + + content = lappend(content, makeGroupingSet(GROUPING_SET_SIMPLE, + list_make1_int(ref), + exprLocation(n))); + } + } + + /* Arbitrarily cap the size of CUBE, which has exponential growth */ + if (gset->kind == GROUPING_SET_CUBE) + { + if (list_length(content) > 12) + ereport(ERROR, + (errcode(ERRCODE_TOO_MANY_COLUMNS), + errmsg("CUBE is limited to 12 elements"), + parser_errposition(pstate, gset->location))); + } + + return (Node *) makeGroupingSet(gset->kind, content, gset->location); +} + + +/* + * transformGroupClause - + * transform a GROUP BY clause + * + * GROUP BY items will be added to the targetlist (as resjunk columns) + * if not already present, so the targetlist must be passed by reference. + * + * This is also used for window PARTITION BY clauses (which act almost the + * same, but are always interpreted per SQL99 rules). + * + * Grouping sets make this a lot more complex than it was. Our goal here is + * twofold: we make a flat list of SortGroupClause nodes referencing each + * distinct expression used for grouping, with those expressions added to the + * targetlist if needed. At the same time, we build the groupingSets tree, + * which stores only ressortgrouprefs as integer lists inside GroupingSet nodes + * (possibly nested, but limited in depth: a GROUPING_SET_SETS node can contain + * nested SIMPLE, CUBE or ROLLUP nodes, but not more sets - we flatten that + * out; while CUBE and ROLLUP can contain only SIMPLE nodes). + * + * We skip much of the hard work if there are no grouping sets. + * + * One subtlety is that the groupClause list can end up empty while the + * groupingSets list is not; this happens if there are only empty grouping + * sets, or an explicit GROUP BY (). This has the same effect as specifying + * aggregates or a HAVING clause with no GROUP BY; the output is one row per + * grouping set even if the input is empty. + * + * Returns the transformed (flat) groupClause. + * + * pstate ParseState + * grouplist clause to transform + * groupingSets reference to list to contain the grouping set tree + * targetlist reference to TargetEntry list + * sortClause ORDER BY clause (SortGroupClause nodes) + * exprKind expression kind + * useSQL99 SQL99 rather than SQL92 syntax + */ +List * +transformGroupClause(ParseState *pstate, List *grouplist, List **groupingSets, + List **targetlist, List *sortClause, + ParseExprKind exprKind, bool useSQL99) +{ + List *result = NIL; + List *flat_grouplist; + List *gsets = NIL; + ListCell *gl; + bool hasGroupingSets = false; + Bitmapset *seen_local = NULL; + + /* + * Recursively flatten implicit RowExprs. (Technically this is only + * needed for GROUP BY, per the syntax rules for grouping sets, but + * we do it anyway.) + */ + flat_grouplist = (List *) flatten_grouping_sets((Node *) grouplist, + true, + &hasGroupingSets); + + /* + * If the list is now empty, but hasGroupingSets is true, it's because + * we elided redundant empty grouping sets. Restore a single empty + * grouping set to leave a canonical form: GROUP BY () + */ + + if (flat_grouplist == NIL && hasGroupingSets) + { + flat_grouplist = list_make1(makeGroupingSet(GROUPING_SET_EMPTY, + NIL, + exprLocation((Node *) grouplist))); + } + + foreach(gl, flat_grouplist) + { + Node *gexpr = (Node *) lfirst(gl); + + if (IsA(gexpr, GroupingSet)) + { + GroupingSet *gset = (GroupingSet *) gexpr; + + switch (gset->kind) + { + case GROUPING_SET_EMPTY: + gsets = lappend(gsets, gset); + break; + case GROUPING_SET_SIMPLE: + /* can't happen */ + Assert(false); + break; + case GROUPING_SET_SETS: + case GROUPING_SET_CUBE: + case GROUPING_SET_ROLLUP: + gsets = lappend(gsets, + transformGroupingSet(&result, + pstate, gset, + targetlist, sortClause, + exprKind, useSQL99, true)); break; - } } } + else + { + Index ref = transformGroupClauseExpr(&result, seen_local, + pstate, gexpr, + targetlist, sortClause, + exprKind, useSQL99, true); - /* - * If no match in ORDER BY, just add it to the result using default - * sort/group semantics. - */ - if (!found) - result = addTargetToGroupList(pstate, tle, - result, *targetlist, - exprLocation(gexpr), - true); + if (ref > 0) + { + seen_local = bms_add_member(seen_local, ref); + if (hasGroupingSets) + gsets = lappend(gsets, + makeGroupingSet(GROUPING_SET_SIMPLE, + list_make1_int(ref), + exprLocation(gexpr))); + } + } } + /* parser should prevent this */ + Assert(gsets == NIL || groupingSets != NULL); + + if (groupingSets) + *groupingSets = gsets; + return result; } @@ -1903,6 +2318,7 @@ transformWindowDefinitions(ParseState *pstate, true /* force SQL99 rules */ ); partitionClause = transformGroupClause(pstate, windef->partitionClause, + NULL, targetlist, orderClause, EXPR_KIND_WINDOW_PARTITION, |