Implement GROUP BY DISTINCT

With grouping sets, it's possible that some of the grouping sets are duplicate. This is especially common with CUBE and ROLLUP clauses. For example GROUP BY CUBE (a,b), CUBE (b,c) is equivalent to GROUP BY GROUPING SETS ( (a, b, c), (a, b, c), (a, b, c), (a, b), (a, b), (a, b), (a), (a), (a), (c, a), (c, a), (c, a), (c), (b, c), (b), () ) Some of the grouping sets are calculated multiple times, which is mostly unnecessary. This commit implements a new GROUP BY DISTINCT feature, as defined in the SQL standard, which eliminates the duplicate sets. Author: Vik Fearing Reviewed-by: Erik Rijkers, Georgios Kokolatos, Tomas Vondra Discussion: https://postgr.es/m/bf3805a8-d7d1-ae61-fece-761b7ff41ecc@postgresfriends.org
author: Tomas Vondra <tomas.vondra@postgresql.org> 2021-03-18 17:45:38 +0100
committer: Tomas Vondra <tomas.vondra@postgresql.org> 2021-03-18 18:22:18 +0100
commit: be45be9c33a85e72cdaeb9967e9f6d2d00199e09 (patch)
tree: c728067c32404e7475ebf4c66561d7edf2dd35b3 /src/backend/parser/parse_agg.c
parent: cd91de0d17952b5763466cfa663e98318f26d357 (diff)
download: postgresql-be45be9c33a85e72cdaeb9967e9f6d2d00199e09.tar.gz
postgresql-be45be9c33a85e72cdaeb9967e9f6d2d00199e09.zip
1 files changed, 54 insertions, 4 deletions
diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c
index fd08b9eeff0..899327aaf4e 100644
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -1071,7 +1071,7 @@ parseCheckAggregates(ParseState *pstate, Query *qry)
 		 * The limit of 4096 is arbitrary and exists simply to avoid resource
 		 * issues from pathological constructs.
 		 */
-		List	   *gsets = expand_grouping_sets(qry->groupingSets, 4096);
+		List	   *gsets = expand_grouping_sets(qry->groupingSets, qry->groupDistinct, 4096);
 
 		if (!gsets)
 			ereport(ERROR,
@@ -1735,6 +1735,33 @@ cmp_list_len_asc(const ListCell *a, const ListCell *b)
 	return (la > lb) ? 1 : (la == lb) ? 0 : -1;
 }
 
+/* list_sort comparator to sort sub-lists by length and contents */
+static int
+cmp_list_len_contents_asc(const ListCell *a, const ListCell *b)
+{
+	int		res = cmp_list_len_asc(a, b);
+
+	if (res == 0)
+	{
+		List		   *la = (List *) lfirst(a);
+		List		   *lb = (List *) lfirst(b);
+		ListCell	   *lca;
+		ListCell	   *lcb;
+
+		forboth(lca, la, lcb, lb)
+		{
+			int		va = intVal(lca);
+			int		vb = intVal(lcb);
+			if (va > vb)
+				return 1;
+			if (va < vb)
+				return -1;
+		}
+	}
+
+	return res;
+}
+
 /*
  * Expand a groupingSets clause to a flat list of grouping sets.
  * The returned list is sorted by length, shortest sets first.
@@ -1743,7 +1770,7 @@ cmp_list_len_asc(const ListCell *a, const ListCell *b)
  * some consistency checks.
  */
 List *
-expand_grouping_sets(List *groupingSets, int limit)
+expand_grouping_sets(List *groupingSets, bool groupDistinct, int limit)
 {
 	List	   *expanded_groups = NIL;
 	List	   *result = NIL;
@@ -1801,8 +1828,31 @@ expand_grouping_sets(List *groupingSets, int limit)
 		result = new_result;
 	}
 
-	/* Now sort the lists by length */
-	list_sort(result, cmp_list_len_asc);
+	/* Now sort the lists by length and deduplicate if necessary */
+	if (!groupDistinct || list_length(result) < 2)
+		list_sort(result, cmp_list_len_asc);
+	else
+	{
+		ListCell   *cell;
+		List	   *prev;
+
+		/* Sort each groupset individually */
+		foreach(cell, result)
+			list_sort(lfirst(cell), list_int_cmp);
+
+		/* Now sort the list of groupsets by length and contents */
+		list_sort(result, cmp_list_len_contents_asc);
+
+		/* Finally, remove duplicates */
+		prev = list_nth_node(List, result, 0);
+		for_each_from(cell, result, 1)
+		{
+			if (equal(lfirst(cell), prev))
+				foreach_delete_current(result, cell);
+			else
+				prev = lfirst(cell);
+		}
+	}
 
 	return result;
 }
author	Tomas Vondra <tomas.vondra@postgresql.org>	2021-03-18 17:45:38 +0100
committer	Tomas Vondra <tomas.vondra@postgresql.org>	2021-03-18 18:22:18 +0100
commit	be45be9c33a85e72cdaeb9967e9f6d2d00199e09 (patch)
tree	c728067c32404e7475ebf4c66561d7edf2dd35b3 /src/backend/parser/parse_agg.c
parent	cd91de0d17952b5763466cfa663e98318f26d357 (diff)
download	postgresql-be45be9c33a85e72cdaeb9967e9f6d2d00199e09.tar.gz postgresql-be45be9c33a85e72cdaeb9967e9f6d2d00199e09.zip