aboutsummaryrefslogtreecommitdiff
path: root/src/backend/parser
diff options
context:
space:
mode:
authorTomas Vondra <tomas.vondra@postgresql.org>2021-03-18 17:45:38 +0100
committerTomas Vondra <tomas.vondra@postgresql.org>2021-03-18 18:22:18 +0100
commitbe45be9c33a85e72cdaeb9967e9f6d2d00199e09 (patch)
treec728067c32404e7475ebf4c66561d7edf2dd35b3 /src/backend/parser
parentcd91de0d17952b5763466cfa663e98318f26d357 (diff)
downloadpostgresql-be45be9c33a85e72cdaeb9967e9f6d2d00199e09.tar.gz
postgresql-be45be9c33a85e72cdaeb9967e9f6d2d00199e09.zip
Implement GROUP BY DISTINCT
With grouping sets, it's possible that some of the grouping sets are duplicate. This is especially common with CUBE and ROLLUP clauses. For example GROUP BY CUBE (a,b), CUBE (b,c) is equivalent to GROUP BY GROUPING SETS ( (a, b, c), (a, b, c), (a, b, c), (a, b), (a, b), (a, b), (a), (a), (a), (c, a), (c, a), (c, a), (c), (b, c), (b), () ) Some of the grouping sets are calculated multiple times, which is mostly unnecessary. This commit implements a new GROUP BY DISTINCT feature, as defined in the SQL standard, which eliminates the duplicate sets. Author: Vik Fearing Reviewed-by: Erik Rijkers, Georgios Kokolatos, Tomas Vondra Discussion: https://postgr.es/m/bf3805a8-d7d1-ae61-fece-761b7ff41ecc@postgresfriends.org
Diffstat (limited to 'src/backend/parser')
-rw-r--r--src/backend/parser/analyze.c1
-rw-r--r--src/backend/parser/gram.y59
-rw-r--r--src/backend/parser/parse_agg.c58
3 files changed, 97 insertions, 21 deletions
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 0f3a70c49a8..7149724953d 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -1264,6 +1264,7 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
qry->sortClause,
EXPR_KIND_GROUP_BY,
false /* allow SQL92 rules */ );
+ qry->groupDistinct = stmt->groupDistinct;
if (stmt->distinctClause == NIL)
{
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 652be0b96da..fd07e7107d8 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -134,6 +134,13 @@ typedef struct SelectLimit
LimitOption limitOption;
} SelectLimit;
+/* Private struct for the result of group_clause production */
+typedef struct GroupClause
+{
+ bool distinct;
+ List *list;
+} GroupClause;
+
/* ConstraintAttributeSpec yields an integer bitmask of these flags: */
#define CAS_NOT_DEFERRABLE 0x01
#define CAS_DEFERRABLE 0x02
@@ -250,6 +257,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
PartitionBoundSpec *partboundspec;
RoleSpec *rolespec;
struct SelectLimit *selectlimit;
+ SetQuantifier setquantifier;
+ struct GroupClause *groupclause;
}
%type <node> stmt schema_stmt
@@ -405,7 +414,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
target_list opt_target_list insert_column_list set_target_list
set_clause_list set_clause
def_list operator_def_list indirection opt_indirection
- reloption_list group_clause TriggerFuncArgs opclass_item_list opclass_drop_list
+ reloption_list TriggerFuncArgs opclass_item_list opclass_drop_list
opclass_purpose opt_opfamily transaction_mode_list_or_empty
OptTableFuncElementList TableFuncElementList opt_type_modifiers
prep_type_clause
@@ -418,6 +427,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
vacuum_relation_list opt_vacuum_relation_list
drop_option_list
+%type <groupclause> group_clause
%type <list> group_by_list
%type <node> group_by_item empty_grouping_set rollup_clause cube_clause
%type <node> grouping_sets_clause
@@ -443,7 +453,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
%type <node> for_locking_item
%type <list> for_locking_clause opt_for_locking_clause for_locking_items
%type <list> locked_rels_list
-%type <boolean> all_or_distinct
+%type <setquantifier> set_quantifier
%type <node> join_qual
%type <jtype> join_type
@@ -11294,7 +11304,8 @@ simple_select:
n->intoClause = $4;
n->fromClause = $5;
n->whereClause = $6;
- n->groupClause = $7;
+ n->groupClause = ($7)->list;
+ n->groupDistinct = ($7)->distinct;
n->havingClause = $8;
n->windowClause = $9;
$$ = (Node *)n;
@@ -11309,7 +11320,8 @@ simple_select:
n->intoClause = $4;
n->fromClause = $5;
n->whereClause = $6;
- n->groupClause = $7;
+ n->groupClause = ($7)->list;
+ n->groupDistinct = ($7)->distinct;
n->havingClause = $8;
n->windowClause = $9;
$$ = (Node *)n;
@@ -11334,17 +11346,17 @@ simple_select:
n->fromClause = list_make1($2);
$$ = (Node *)n;
}
- | select_clause UNION all_or_distinct select_clause
+ | select_clause UNION set_quantifier select_clause
{
- $$ = makeSetOp(SETOP_UNION, $3, $1, $4);
+ $$ = makeSetOp(SETOP_UNION, $3 == SET_QUANTIFIER_ALL, $1, $4);
}
- | select_clause INTERSECT all_or_distinct select_clause
+ | select_clause INTERSECT set_quantifier select_clause
{
- $$ = makeSetOp(SETOP_INTERSECT, $3, $1, $4);
+ $$ = makeSetOp(SETOP_INTERSECT, $3 == SET_QUANTIFIER_ALL, $1, $4);
}
- | select_clause EXCEPT all_or_distinct select_clause
+ | select_clause EXCEPT set_quantifier select_clause
{
- $$ = makeSetOp(SETOP_EXCEPT, $3, $1, $4);
+ $$ = makeSetOp(SETOP_EXCEPT, $3 == SET_QUANTIFIER_ALL, $1, $4);
}
;
@@ -11542,10 +11554,10 @@ opt_table: TABLE
| /*EMPTY*/
;
-all_or_distinct:
- ALL { $$ = true; }
- | DISTINCT { $$ = false; }
- | /*EMPTY*/ { $$ = false; }
+set_quantifier:
+ ALL { $$ = SET_QUANTIFIER_ALL; }
+ | DISTINCT { $$ = SET_QUANTIFIER_DISTINCT; }
+ | /*EMPTY*/ { $$ = SET_QUANTIFIER_DEFAULT; }
;
/* We use (NIL) as a placeholder to indicate that all target expressions
@@ -11771,8 +11783,20 @@ first_or_next: FIRST_P { $$ = 0; }
* GroupingSet node of some type.
*/
group_clause:
- GROUP_P BY group_by_list { $$ = $3; }
- | /*EMPTY*/ { $$ = NIL; }
+ GROUP_P BY set_quantifier group_by_list
+ {
+ GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
+ n->distinct = $3 == SET_QUANTIFIER_DISTINCT;
+ n->list = $4;
+ $$ = n;
+ }
+ | /*EMPTY*/
+ {
+ GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
+ n->distinct = false;
+ n->list = NIL;
+ $$ = n;
+ }
;
group_by_list:
@@ -15145,7 +15169,8 @@ PLpgSQL_Expr: opt_distinct_clause opt_target_list
n->targetList = $2;
n->fromClause = $3;
n->whereClause = $4;
- n->groupClause = $5;
+ n->groupClause = ($5)->list;
+ n->groupDistinct = ($5)->distinct;
n->havingClause = $6;
n->windowClause = $7;
n->sortClause = $8;
diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c
index fd08b9eeff0..899327aaf4e 100644
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -1071,7 +1071,7 @@ parseCheckAggregates(ParseState *pstate, Query *qry)
* The limit of 4096 is arbitrary and exists simply to avoid resource
* issues from pathological constructs.
*/
- List *gsets = expand_grouping_sets(qry->groupingSets, 4096);
+ List *gsets = expand_grouping_sets(qry->groupingSets, qry->groupDistinct, 4096);
if (!gsets)
ereport(ERROR,
@@ -1735,6 +1735,33 @@ cmp_list_len_asc(const ListCell *a, const ListCell *b)
return (la > lb) ? 1 : (la == lb) ? 0 : -1;
}
+/* list_sort comparator to sort sub-lists by length and contents */
+static int
+cmp_list_len_contents_asc(const ListCell *a, const ListCell *b)
+{
+ int res = cmp_list_len_asc(a, b);
+
+ if (res == 0)
+ {
+ List *la = (List *) lfirst(a);
+ List *lb = (List *) lfirst(b);
+ ListCell *lca;
+ ListCell *lcb;
+
+ forboth(lca, la, lcb, lb)
+ {
+ int va = intVal(lca);
+ int vb = intVal(lcb);
+ if (va > vb)
+ return 1;
+ if (va < vb)
+ return -1;
+ }
+ }
+
+ return res;
+}
+
/*
* Expand a groupingSets clause to a flat list of grouping sets.
* The returned list is sorted by length, shortest sets first.
@@ -1743,7 +1770,7 @@ cmp_list_len_asc(const ListCell *a, const ListCell *b)
* some consistency checks.
*/
List *
-expand_grouping_sets(List *groupingSets, int limit)
+expand_grouping_sets(List *groupingSets, bool groupDistinct, int limit)
{
List *expanded_groups = NIL;
List *result = NIL;
@@ -1801,8 +1828,31 @@ expand_grouping_sets(List *groupingSets, int limit)
result = new_result;
}
- /* Now sort the lists by length */
- list_sort(result, cmp_list_len_asc);
+ /* Now sort the lists by length and deduplicate if necessary */
+ if (!groupDistinct || list_length(result) < 2)
+ list_sort(result, cmp_list_len_asc);
+ else
+ {
+ ListCell *cell;
+ List *prev;
+
+ /* Sort each groupset individually */
+ foreach(cell, result)
+ list_sort(lfirst(cell), list_int_cmp);
+
+ /* Now sort the list of groupsets by length and contents */
+ list_sort(result, cmp_list_len_contents_asc);
+
+ /* Finally, remove duplicates */
+ prev = list_nth_node(List, result, 0);
+ for_each_from(cell, result, 1)
+ {
+ if (equal(lfirst(cell), prev))
+ foreach_delete_current(result, cell);
+ else
+ prev = lfirst(cell);
+ }
+ }
return result;
}