diff options
author | Álvaro Herrera <alvherre@alvh.no-ip.org> | 2025-03-18 18:56:11 +0100 |
---|---|---|
committer | Álvaro Herrera <alvherre@alvh.no-ip.org> | 2025-03-18 18:56:11 +0100 |
commit | 62d712ecfd940f60e68bde5b6972b6859937c412 (patch) | |
tree | cad7cd2202f4dd3ad75c152e6aca98246ef661cd /contrib/pg_stat_statements | |
parent | 247ce06b883d7b3a40d08312dc03dfb37fbff212 (diff) | |
download | postgresql-62d712ecfd940f60e68bde5b6972b6859937c412.tar.gz postgresql-62d712ecfd940f60e68bde5b6972b6859937c412.zip |
Introduce squashing of constant lists in query jumbling
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on the number of parameters, because every element of
ArrayExpr is individually jumbled. Most of the time that's undesirable,
especially if the list becomes too large.
Fix this by introducing a new GUC query_id_squash_values which modifies
the node jumbling code to only consider the first and last element of a
list of constants, rather than each list element individually. This
affects both the query_id generated by query jumbling, as well as
pg_stat_statements query normalization so that it suppresses printing of
the individual elements of such a list.
The default value is off, meaning the previous behavior is maintained.
Author: Dmitry Dolgov <9erthalion6@gmail.com>
Reviewed-by: Sergey Dudoladov (mysterious, off-list)
Reviewed-by: David Geier <geidav.pg@gmail.com>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Reviewed-by: Álvaro Herrera <alvherre@alvh.no-ip.org>
Reviewed-by: Sami Imseih <samimseih@gmail.com>
Reviewed-by: Sutou Kouhei <kou@clear-code.com>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Reviewed-by: Michael Paquier <michael@paquier.xyz>
Reviewed-by: Marcos Pegoraro <marcos@f10.com.br>
Reviewed-by: Julien Rouhaud <rjuju123@gmail.com>
Reviewed-by: Zhihong Yu <zyu@yugabyte.com>
Tested-by: Yasuo Honda <yasuo.honda@gmail.com>
Tested-by: Sergei Kornilov <sk@zsrv.org>
Tested-by: Maciek Sakrejda <m.sakrejda@gmail.com>
Tested-by: Chengxi Sun <sunchengxi@highgo.com>
Tested-by: Jakub Wartak <jakub.wartak@enterprisedb.com>
Discussion: https://postgr.es/m/CA+q6zcWtUbT_Sxj0V6HY6EZ89uv5wuG5aefpe_9n0Jr3VwntFg@mail.gmail.com
Diffstat (limited to 'contrib/pg_stat_statements')
-rw-r--r-- | contrib/pg_stat_statements/Makefile | 2 | ||||
-rw-r--r-- | contrib/pg_stat_statements/expected/squashing.out | 464 | ||||
-rw-r--r-- | contrib/pg_stat_statements/meson.build | 1 | ||||
-rw-r--r-- | contrib/pg_stat_statements/pg_stat_statements.c | 76 | ||||
-rw-r--r-- | contrib/pg_stat_statements/sql/squashing.sql | 180 |
5 files changed, 712 insertions, 11 deletions
diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile index 241c02587bc..b2bd8794d2a 100644 --- a/contrib/pg_stat_statements/Makefile +++ b/contrib/pg_stat_statements/Makefile @@ -20,7 +20,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS)) REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf REGRESS = select dml cursors utility level_tracking planning \ user_activity wal entry_timestamp privileges extended \ - parallel cleanup oldextversions + parallel cleanup oldextversions squashing # Disabled because these tests require "shared_preload_libraries=pg_stat_statements", # which typical installcheck users do not have (e.g. buildfarm clients). NO_INSTALLCHECK = 1 diff --git a/contrib/pg_stat_statements/expected/squashing.out b/contrib/pg_stat_statements/expected/squashing.out new file mode 100644 index 00000000000..55aa5109433 --- /dev/null +++ b/contrib/pg_stat_statements/expected/squashing.out @@ -0,0 +1,464 @@ +-- +-- Const squashing functionality +-- +CREATE EXTENSION pg_stat_statements; +CREATE TABLE test_squash (id int, data int); +-- IN queries +-- No squashing is performed, as a baseline result +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); + id | data +----+------ +(0 rows) + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +--------------------------------------------------------------------------------------+------- + SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1 + SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1 + SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(4 rows) + +-- Normal scenario, too many simple constants for an IN query +SET query_id_squash_values = on; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_squash WHERE id IN (1); + id | data +----+------ +(0 rows) + +SELECT * FROM test_squash WHERE id IN (1, 2, 3); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +-------------------------------------------------------+------- + SELECT * FROM test_squash WHERE id IN ($1 /*, ... */) | 1 + SELECT * FROM test_squash WHERE id IN ($1) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(3 rows) + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); + id | data +----+------ +(0 rows) + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_squash WHERE id IN ($1 /*, ... */) | 4 + SELECT * FROM test_squash WHERE id IN ($1) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(4 rows) + +-- More conditions in the query +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) AND data = 2; + id | data +----+------ +(0 rows) + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) AND data = 2; + id | data +----+------ +(0 rows) + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) AND data = 2; + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +---------------------------------------------------------------------+------- + SELECT * FROM test_squash WHERE id IN ($1 /*, ... */) AND data = $2 | 3 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- Multiple squashed intervals +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) + AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9); + id | data +----+------ +(0 rows) + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) + AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) + AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +-------------------------------------------------------+------- + SELECT * FROM test_squash WHERE id IN ($1 /*, ... */)+| 3 + AND data IN ($2 /*, ... */) | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- No constants simplification for OpExpr +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- In the following two queries the operator expressions (+) and (@) have +-- different oppno, and will be given different query_id if squashed, even though +-- the normalized query will be the same +SELECT * FROM test_squash WHERE id IN + (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9); + id | data +----+------ +(0 rows) + +SELECT * FROM test_squash WHERE id IN + (@ '-1', @ '-2', @ '-3', @ '-4', @ '-5', @ '-6', @ '-7', @ '-8', @ '-9'); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------------------------------------------------------+------- + SELECT * FROM test_squash WHERE id IN +| 1 + ($1 + $2, $3 + $4, $5 + $6, $7 + $8, $9 + $10, $11 + $12, $13 + $14, $15 + $16, $17 + $18) | + SELECT * FROM test_squash WHERE id IN +| 1 + (@ $1, @ $2, @ $3, @ $4, @ $5, @ $6, @ $7, @ $8, @ $9) | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(3 rows) + +-- FuncExpr +-- Verify multiple type representation end up with the same query_id +CREATE TABLE test_float (data float); +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT data FROM test_float WHERE data IN (1, 2); + data +------ +(0 rows) + +SELECT data FROM test_float WHERE data IN (1, '2'); + data +------ +(0 rows) + +SELECT data FROM test_float WHERE data IN ('1', 2); + data +------ +(0 rows) + +SELECT data FROM test_float WHERE data IN ('1', '2'); + data +------ +(0 rows) + +SELECT data FROM test_float WHERE data IN (1.0, 1.0); + data +------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +-----------------------------------------------------------+------- + SELECT data FROM test_float WHERE data IN ($1 /*, ... */) | 5 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- Numeric type, implicit cast is squashed +CREATE TABLE test_squash_numeric (id int, data numeric(5, 2)); +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_squash_numeric WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +-----------------------------------------------------------------+------- + SELECT * FROM test_squash_numeric WHERE data IN ($1 /*, ... */) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- Bigint, implicit cast is squashed +CREATE TABLE test_squash_bigint (id int, data bigint); +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_squash_bigint WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------------------+------- + SELECT * FROM test_squash_bigint WHERE data IN ($1 /*, ... */) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- Bigint, explicit cast is not squashed +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_squash_bigint WHERE data IN + (1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint, + 7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------+------- + SELECT * FROM test_squash_bigint WHERE data IN +| 1 + ($1 /*, ... */::bigint) | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- Bigint, long tokens with parenthesis +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_squash_bigint WHERE id IN + (abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700), + abs(800), abs(900), abs(1000), ((abs(1100)))); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +-------------------------------------------------------------------------+------- + SELECT * FROM test_squash_bigint WHERE id IN +| 1 + (abs($1), abs($2), abs($3), abs($4), abs($5), abs($6), abs($7),+| + abs($8), abs($9), abs($10), ((abs($11)))) | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- CoerceViaIO, SubLink instead of a Const +CREATE TABLE test_squash_jsonb (id int, data jsonb); +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_squash_jsonb WHERE data IN + ((SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb, + (SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb, + (SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb, + (SELECT '"10"')::jsonb); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------------------------+------- + SELECT * FROM test_squash_jsonb WHERE data IN +| 1 + ((SELECT $1)::jsonb, (SELECT $2)::jsonb, (SELECT $3)::jsonb,+| + (SELECT $4)::jsonb, (SELECT $5)::jsonb, (SELECT $6)::jsonb,+| + (SELECT $7)::jsonb, (SELECT $8)::jsonb, (SELECT $9)::jsonb,+| + (SELECT $10)::jsonb) | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- CoerceViaIO +-- Create some dummy type to force CoerceViaIO +CREATE TYPE casttesttype; +CREATE FUNCTION casttesttype_in(cstring) + RETURNS casttesttype + AS 'textin' + LANGUAGE internal STRICT IMMUTABLE; +NOTICE: return type casttesttype is only a shell +CREATE FUNCTION casttesttype_out(casttesttype) + RETURNS cstring + AS 'textout' + LANGUAGE internal STRICT IMMUTABLE; +NOTICE: argument type casttesttype is only a shell +LINE 1: CREATE FUNCTION casttesttype_out(casttesttype) + ^ +CREATE TYPE casttesttype ( + internallength = variable, + input = casttesttype_in, + output = casttesttype_out, + alignment = int4 +); +CREATE CAST (int4 AS casttesttype) WITH INOUT; +CREATE FUNCTION casttesttype_eq(casttesttype, casttesttype) +returns boolean language sql immutable as $$ + SELECT true +$$; +CREATE OPERATOR = ( + leftarg = casttesttype, + rightarg = casttesttype, + procedure = casttesttype_eq, + commutator = =); +CREATE TABLE test_squash_cast (id int, data casttesttype); +-- Use the introduced type to construct a list of CoerceViaIO around Const +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_squash_cast WHERE data IN + (1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype, + 4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype, + 7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype, + 10::int4::casttesttype, 11::int4::casttesttype); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------+------- + SELECT * FROM test_squash_cast WHERE data IN +| 1 + ($1 /*, ... */::int4::casttesttype) | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- Some casting expression are simplified to Const +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_squash_jsonb WHERE data IN + (('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb, + ( '"5"')::jsonb, ( '"6"')::jsonb, ( '"7"')::jsonb, ( '"8"')::jsonb, + ( '"9"')::jsonb, ( '"10"')::jsonb); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------+------- + SELECT * FROM test_squash_jsonb WHERE data IN +| 1 + (($1 /*, ... */)::jsonb) | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- RelabelType +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_squash WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------+------- + SELECT * FROM test_squash WHERE id IN ($1 /*, ... */::oid) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- Test constants evaluation in a CTE, which was causing issues in the past +WITH cte AS ( + SELECT 'const' as const FROM test_squash +) +SELECT ARRAY['a', 'b', 'c', const::varchar] AS result +FROM cte; + result +-------- +(0 rows) + +-- Simple array would be squashed as well +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + array +------------------------ + {1,2,3,4,5,6,7,8,9,10} +(1 row) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------+------- + SELECT ARRAY[$1 /*, ... */] | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +RESET query_id_squash_values; diff --git a/contrib/pg_stat_statements/meson.build b/contrib/pg_stat_statements/meson.build index 4446af58c58..01a6cbdcf61 100644 --- a/contrib/pg_stat_statements/meson.build +++ b/contrib/pg_stat_statements/meson.build @@ -56,6 +56,7 @@ tests += { 'parallel', 'cleanup', 'oldextversions', + 'squashing', ], 'regress_args': ['--temp-config', files('pg_stat_statements.conf')], # Disabled because these tests require diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index b245d04097d..8ab9ad58e1c 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -296,7 +296,6 @@ static bool pgss_track_planning = false; /* whether to track planning * duration */ static bool pgss_save = true; /* whether to save stats across shutdown */ - #define pgss_enabled(level) \ (!IsParallelWorker() && \ (pgss_track == PGSS_TRACK_ALL || \ @@ -2823,6 +2822,10 @@ generate_normalized_query(JumbleState *jstate, const char *query, n_quer_loc = 0, /* Normalized query byte location */ last_off = 0, /* Offset from start for previous tok */ last_tok_len = 0; /* Length (in bytes) of that tok */ + bool in_squashed = false; /* in a run of squashed consts? */ + int skipped_constants = 0; /* Position adjustment of later + * constants after squashed ones */ + /* * Get constants' lengths (core system only gives us locations). Note @@ -2836,6 +2839,9 @@ generate_normalized_query(JumbleState *jstate, const char *query, * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We * could refine that limit based on the max value of n for the current * query, but it hardly seems worth any extra effort to do so. + * + * Note this also gives enough room for the commented-out ", ..." list + * syntax used by constant squashing. */ norm_query_buflen = query_len + jstate->clocations_count * 10; @@ -2848,6 +2854,7 @@ generate_normalized_query(JumbleState *jstate, const char *query, tok_len; /* Length (in bytes) of that tok */ off = jstate->clocations[i].location; + /* Adjust recorded location if we're dealing with partial string */ off -= query_loc; @@ -2856,18 +2863,67 @@ generate_normalized_query(JumbleState *jstate, const char *query, if (tok_len < 0) continue; /* ignore any duplicates */ - /* Copy next chunk (what precedes the next constant) */ - len_to_wrt = off - last_off; - len_to_wrt -= last_tok_len; + /* + * What to do next depends on whether we're squashing constant lists, + * and whether we're already in a run of such constants. + */ + if (!jstate->clocations[i].squashed) + { + /* + * This location corresponds to a constant not to be squashed. + * Print what comes before the constant ... + */ + len_to_wrt = off - last_off; + len_to_wrt -= last_tok_len; + + Assert(len_to_wrt >= 0); + + memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); + n_quer_loc += len_to_wrt; - Assert(len_to_wrt >= 0); - memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); - n_quer_loc += len_to_wrt; + /* ... and then a param symbol replacing the constant itself */ + n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d", + i + 1 + jstate->highest_extern_param_id - skipped_constants); - /* And insert a param symbol in place of the constant token */ - n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d", - i + 1 + jstate->highest_extern_param_id); + /* In case previous constants were merged away, stop doing that */ + in_squashed = false; + } + else if (!in_squashed) + { + /* + * This location is the start position of a run of constants to be + * squashed, so we need to print the representation of starting a + * group of stashed constants. + * + * Print what comes before the constant ... + */ + len_to_wrt = off - last_off; + len_to_wrt -= last_tok_len; + Assert(len_to_wrt >= 0); + Assert(i + 1 < jstate->clocations_count); + Assert(jstate->clocations[i + 1].squashed); + memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); + n_quer_loc += len_to_wrt; + + /* ... and then start a run of squashed constants */ + n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d /*, ... */", + i + 1 + jstate->highest_extern_param_id - skipped_constants); + + /* The next location will match the block below, to end the run */ + in_squashed = true; + + skipped_constants++; + } + else + { + /* + * The second location of a run of squashable elements; this + * indicates its end. + */ + in_squashed = false; + } + /* Otherwise the constant is squashed away -- move forward */ quer_loc = off + tok_len; last_off = off; last_tok_len = tok_len; diff --git a/contrib/pg_stat_statements/sql/squashing.sql b/contrib/pg_stat_statements/sql/squashing.sql new file mode 100644 index 00000000000..56ee8ccb9a1 --- /dev/null +++ b/contrib/pg_stat_statements/sql/squashing.sql @@ -0,0 +1,180 @@ +-- +-- Const squashing functionality +-- +CREATE EXTENSION pg_stat_statements; + +CREATE TABLE test_squash (id int, data int); + +-- IN queries + +-- No squashing is performed, as a baseline result +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Normal scenario, too many simple constants for an IN query +SET query_id_squash_values = on; + +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_squash WHERE id IN (1); +SELECT * FROM test_squash WHERE id IN (1, 2, 3); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- More conditions in the query +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) AND data = 2; +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) AND data = 2; +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) AND data = 2; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Multiple squashed intervals +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) + AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) + AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) + AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- No constants simplification for OpExpr +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + +-- In the following two queries the operator expressions (+) and (@) have +-- different oppno, and will be given different query_id if squashed, even though +-- the normalized query will be the same +SELECT * FROM test_squash WHERE id IN + (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9); +SELECT * FROM test_squash WHERE id IN + (@ '-1', @ '-2', @ '-3', @ '-4', @ '-5', @ '-6', @ '-7', @ '-8', @ '-9'); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- FuncExpr + +-- Verify multiple type representation end up with the same query_id +CREATE TABLE test_float (data float); +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT data FROM test_float WHERE data IN (1, 2); +SELECT data FROM test_float WHERE data IN (1, '2'); +SELECT data FROM test_float WHERE data IN ('1', 2); +SELECT data FROM test_float WHERE data IN ('1', '2'); +SELECT data FROM test_float WHERE data IN (1.0, 1.0); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Numeric type, implicit cast is squashed +CREATE TABLE test_squash_numeric (id int, data numeric(5, 2)); +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_squash_numeric WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Bigint, implicit cast is squashed +CREATE TABLE test_squash_bigint (id int, data bigint); +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_squash_bigint WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Bigint, explicit cast is not squashed +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_squash_bigint WHERE data IN + (1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint, + 7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Bigint, long tokens with parenthesis +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_squash_bigint WHERE id IN + (abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700), + abs(800), abs(900), abs(1000), ((abs(1100)))); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- CoerceViaIO, SubLink instead of a Const +CREATE TABLE test_squash_jsonb (id int, data jsonb); +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_squash_jsonb WHERE data IN + ((SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb, + (SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb, + (SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb, + (SELECT '"10"')::jsonb); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- CoerceViaIO + +-- Create some dummy type to force CoerceViaIO +CREATE TYPE casttesttype; + +CREATE FUNCTION casttesttype_in(cstring) + RETURNS casttesttype + AS 'textin' + LANGUAGE internal STRICT IMMUTABLE; + +CREATE FUNCTION casttesttype_out(casttesttype) + RETURNS cstring + AS 'textout' + LANGUAGE internal STRICT IMMUTABLE; + +CREATE TYPE casttesttype ( + internallength = variable, + input = casttesttype_in, + output = casttesttype_out, + alignment = int4 +); + +CREATE CAST (int4 AS casttesttype) WITH INOUT; + +CREATE FUNCTION casttesttype_eq(casttesttype, casttesttype) +returns boolean language sql immutable as $$ + SELECT true +$$; + +CREATE OPERATOR = ( + leftarg = casttesttype, + rightarg = casttesttype, + procedure = casttesttype_eq, + commutator = =); + +CREATE TABLE test_squash_cast (id int, data casttesttype); + +-- Use the introduced type to construct a list of CoerceViaIO around Const +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_squash_cast WHERE data IN + (1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype, + 4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype, + 7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype, + 10::int4::casttesttype, 11::int4::casttesttype); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Some casting expression are simplified to Const +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_squash_jsonb WHERE data IN + (('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb, + ( '"5"')::jsonb, ( '"6"')::jsonb, ( '"7"')::jsonb, ( '"8"')::jsonb, + ( '"9"')::jsonb, ( '"10"')::jsonb); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- RelabelType +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_squash WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Test constants evaluation in a CTE, which was causing issues in the past +WITH cte AS ( + SELECT 'const' as const FROM test_squash +) +SELECT ARRAY['a', 'b', 'c', const::varchar] AS result +FROM cte; + +-- Simple array would be squashed as well +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +RESET query_id_squash_values; |