-- -- SELECT_DISTINCT_ON -- SELECT DISTINCT ON (string4) string4, two, ten FROM onek ORDER BY string4 using <, two using >, ten using <; string4 | two | ten ---------+-----+----- AAAAxx | 1 | 1 HHHHxx | 1 | 1 OOOOxx | 1 | 1 VVVVxx | 1 | 1 (4 rows) -- this will fail due to conflict of ordering requirements SELECT DISTINCT ON (string4, ten) string4, two, ten FROM onek ORDER BY string4 using <, two using <, ten using <; ERROR: SELECT DISTINCT ON expressions must match initial ORDER BY expressions LINE 1: SELECT DISTINCT ON (string4, ten) string4, two, ten ^ SELECT DISTINCT ON (string4, ten) string4, ten, two FROM onek ORDER BY string4 using <, ten using >, two using <; string4 | ten | two ---------+-----+----- AAAAxx | 9 | 1 AAAAxx | 8 | 0 AAAAxx | 7 | 1 AAAAxx | 6 | 0 AAAAxx | 5 | 1 AAAAxx | 4 | 0 AAAAxx | 3 | 1 AAAAxx | 2 | 0 AAAAxx | 1 | 1 AAAAxx | 0 | 0 HHHHxx | 9 | 1 HHHHxx | 8 | 0 HHHHxx | 7 | 1 HHHHxx | 6 | 0 HHHHxx | 5 | 1 HHHHxx | 4 | 0 HHHHxx | 3 | 1 HHHHxx | 2 | 0 HHHHxx | 1 | 1 HHHHxx | 0 | 0 OOOOxx | 9 | 1 OOOOxx | 8 | 0 OOOOxx | 7 | 1 OOOOxx | 6 | 0 OOOOxx | 5 | 1 OOOOxx | 4 | 0 OOOOxx | 3 | 1 OOOOxx | 2 | 0 OOOOxx | 1 | 1 OOOOxx | 0 | 0 VVVVxx | 9 | 1 VVVVxx | 8 | 0 VVVVxx | 7 | 1 VVVVxx | 6 | 0 VVVVxx | 5 | 1 VVVVxx | 4 | 0 VVVVxx | 3 | 1 VVVVxx | 2 | 0 VVVVxx | 1 | 1 VVVVxx | 0 | 0 (40 rows) -- bug #5049: early 8.4.x chokes on volatile DISTINCT ON clauses select distinct on (1) floor(random()) as r, f1 from int4_tbl order by 1,2; r | f1 ---+------------- 0 | -2147483647 (1 row) -- -- Test the planner's ability to use a LIMIT 1 instead of a Unique node when -- all of the distinct_pathkeys have been marked as redundant -- -- Ensure we also get a LIMIT plan with DISTINCT ON EXPLAIN (COSTS OFF) SELECT DISTINCT ON (four) four,two FROM tenk1 WHERE four = 0 ORDER BY 1; QUERY PLAN ---------------------------- Limit -> Seq Scan on tenk1 Filter: (four = 0) (3 rows) -- and check the result of the above query is correct SELECT DISTINCT ON (four) four,two FROM tenk1 WHERE four = 0 ORDER BY 1; four | two ------+----- 0 | 0 (1 row) -- Ensure a Sort -> Limit is used when the ORDER BY contains additional cols EXPLAIN (COSTS OFF) SELECT DISTINCT ON (four) four,two FROM tenk1 WHERE four = 0 ORDER BY 1,2; QUERY PLAN ---------------------------------- Limit -> Sort Sort Key: two -> Seq Scan on tenk1 Filter: (four = 0) (5 rows) -- Same again but use a column that is indexed so that we get an index scan -- then a limit EXPLAIN (COSTS OFF) SELECT DISTINCT ON (four) four,hundred FROM tenk1 WHERE four = 0 ORDER BY 1,2; QUERY PLAN ----------------------------------------------- Limit -> Index Scan using tenk1_hundred on tenk1 Filter: (four = 0) (3 rows) -- -- Test the planner's ability to reorder the distinctClause Pathkeys to match -- the input path's ordering -- CREATE TABLE distinct_on_tbl (x int, y int, z int); INSERT INTO distinct_on_tbl SELECT i%10, i%10, i%10 FROM generate_series(1, 1000) AS i; CREATE INDEX distinct_on_tbl_x_y_idx ON distinct_on_tbl (x, y); ANALYZE distinct_on_tbl; -- Produce results with sorting. SET enable_hashagg TO OFF; -- Ensure we avoid the need to re-sort by reordering the distinctClause -- Pathkeys to match the ordering of the input path EXPLAIN (COSTS OFF) SELECT DISTINCT ON (y, x) x, y FROM distinct_on_tbl; QUERY PLAN ------------------------------------------------------------------------ Unique -> Index Only Scan using distinct_on_tbl_x_y_idx on distinct_on_tbl (2 rows) SELECT DISTINCT ON (y, x) x, y FROM distinct_on_tbl; x | y ---+--- 0 | 0 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 6 | 6 7 | 7 8 | 8 9 | 9 (10 rows) -- Ensure we leverage incremental-sort by reordering the distinctClause -- Pathkeys to partially match the ordering of the input path EXPLAIN (COSTS OFF) SELECT DISTINCT ON (y, x) x, y FROM (SELECT * FROM distinct_on_tbl ORDER BY x) s; QUERY PLAN ------------------------------------------------------------------------------------ Unique -> Incremental Sort Sort Key: s.x, s.y Presorted Key: s.x -> Subquery Scan on s -> Index Only Scan using distinct_on_tbl_x_y_idx on distinct_on_tbl (6 rows) SELECT DISTINCT ON (y, x) x, y FROM (SELECT * FROM distinct_on_tbl ORDER BY x) s; x | y ---+--- 0 | 0 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 6 | 6 7 | 7 8 | 8 9 | 9 (10 rows) -- Ensure we reorder the distinctClause Pathkeys to match the ordering of the -- input path even if there is ORDER BY clause EXPLAIN (COSTS OFF) SELECT DISTINCT ON (y, x) x, y FROM distinct_on_tbl ORDER BY y; QUERY PLAN ------------------------------------------------------------------------------ Sort Sort Key: y -> Unique -> Index Only Scan using distinct_on_tbl_x_y_idx on distinct_on_tbl (4 rows) SELECT DISTINCT ON (y, x) x, y FROM distinct_on_tbl ORDER BY y; x | y ---+--- 0 | 0 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 6 | 6 7 | 7 8 | 8 9 | 9 (10 rows) -- Ensure the resulting pathkey list matches the initial distinctClause Pathkeys EXPLAIN (COSTS OFF) SELECT DISTINCT ON (y, x) x, y FROM (select * from distinct_on_tbl order by x, z, y) s ORDER BY y, x, z; QUERY PLAN --------------------------------------------------------------------------------------------- Sort Sort Key: s.y, s.x, s.z -> Unique -> Incremental Sort Sort Key: s.x, s.y, s.z Presorted Key: s.x -> Subquery Scan on s -> Sort Sort Key: distinct_on_tbl.x, distinct_on_tbl.z, distinct_on_tbl.y -> Seq Scan on distinct_on_tbl (10 rows) SELECT DISTINCT ON (y, x) x, y FROM (select * from distinct_on_tbl order by x, z, y) s ORDER BY y, x, z; x | y ---+--- 0 | 0 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 6 | 6 7 | 7 8 | 8 9 | 9 (10 rows) RESET enable_hashagg; DROP TABLE distinct_on_tbl;