aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTomas Vondra <tomas.vondra@postgresql.org>2021-04-06 16:12:37 +0200
committerTomas Vondra <tomas.vondra@postgresql.org>2021-04-06 16:56:06 +0200
commit518442c7f334f3b05ea28b7ef50f1b551cfcc23e (patch)
tree5a24b7e2dea054ed0fb094afbb8dcbd7fa441d70 /src
parent7ab96cf6b312cfcd79cdc1a69c6bdb75de0ed30f (diff)
downloadpostgresql-518442c7f334f3b05ea28b7ef50f1b551cfcc23e.tar.gz
postgresql-518442c7f334f3b05ea28b7ef50f1b551cfcc23e.zip
Fix handling of clauses incompatible with extended statistics
Handling of incompatible clauses while applying extended statistics was a bit confused - while handling a mix of compatible and incompatible clauses it sometimes incorrectly treated the incompatible clauses as compatible, resulting in a crash. Fixed by reworking the code applying the selected statistics object to make it easier to understand, and adding a proper compatibility check. Reported-by: David Rowley Discussion: https://postgr.es/m/CAApHDvpYT10-nkSp8xXe-nbO3jmoaRyRFHbzh-RWMfAJynqgpQ%40mail.gmail.com
Diffstat (limited to 'src')
-rw-r--r--src/backend/statistics/extended_stats.c86
-rw-r--r--src/backend/statistics/mcv.c4
-rw-r--r--src/test/regress/expected/stats_ext.out19
-rw-r--r--src/test/regress/sql/stats_ext.sql19
4 files changed, 100 insertions, 28 deletions
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index dd3c84a91c0..463d44a68a4 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -1255,6 +1255,10 @@ choose_best_statistics(List *stats, char requiredkind,
/*
* Collect attributes and expressions in remaining (unestimated)
* clauses fully covered by this statistic object.
+ *
+ * We know already estimated clauses have both clause_attnums and
+ * clause_exprs set to NULL. We leave the pointers NULL if already
+ * estimated, or we reset them to NULL after estimating the clause.
*/
for (i = 0; i < nclauses; i++)
{
@@ -1758,39 +1762,65 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
/* record which clauses are simple (single column or expression) */
simple_clauses = NULL;
- listidx = 0;
+ listidx = -1;
foreach(l, clauses)
{
+ /* Increment the index before we decide if to skip the clause. */
+ listidx++;
+
/*
- * If the clause is not already estimated and is compatible with
- * the selected statistics object (all attributes and expressions
- * covered), mark it as estimated and add it to the list to
- * estimate.
+ * Ignore clauses from which we did not extract any attnums or
+ * expressions (this needs to be consistent with what we do in
+ * choose_best_statistics).
+ *
+ * This also eliminates already estimated clauses - both those
+ * estimated before and during applying extended statistics.
+ *
+ * XXX This check is needed because both bms_is_subset and
+ * stat_covers_expressions return true for empty attnums and
+ * expressions.
*/
- if (!bms_is_member(listidx, *estimatedclauses) &&
- bms_is_subset(list_attnums[listidx], stat->keys) &&
- stat_covers_expressions(stat, list_exprs[listidx], NULL))
- {
- /* record simple clauses (single column or expression) */
- if ((list_attnums[listidx] == NULL &&
- list_length(list_exprs[listidx]) == 1) ||
- (list_exprs[listidx] == NIL &&
- bms_membership(list_attnums[listidx]) == BMS_SINGLETON))
- simple_clauses = bms_add_member(simple_clauses,
- list_length(stat_clauses));
-
- /* add clause to list and mark as estimated */
- stat_clauses = lappend(stat_clauses, (Node *) lfirst(l));
- *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
-
- bms_free(list_attnums[listidx]);
- list_attnums[listidx] = NULL;
-
- list_free(list_exprs[listidx]);
- list_exprs[listidx] = NULL;
- }
+ if (!list_attnums[listidx] && !list_exprs[listidx])
+ continue;
- listidx++;
+ /*
+ * The clause was not estimated yet, and we've extracted either
+ * attnums of expressions from it. Ignore it if it's not fully
+ * covered by the chosen statistics.
+ *
+ * We need to check both attributes and expressions, and reject
+ * if either is not covered.
+ */
+ if (!bms_is_subset(list_attnums[listidx], stat->keys) ||
+ !stat_covers_expressions(stat, list_exprs[listidx], NULL))
+ continue;
+
+ /*
+ * Now we know the clause is compatible (we have either atttnums
+ * or expressions extracted from it), and was not estimated yet.
+ */
+
+ /* record simple clauses (single column or expression) */
+ if ((list_attnums[listidx] == NULL &&
+ list_length(list_exprs[listidx]) == 1) ||
+ (list_exprs[listidx] == NIL &&
+ bms_membership(list_attnums[listidx]) == BMS_SINGLETON))
+ simple_clauses = bms_add_member(simple_clauses,
+ list_length(stat_clauses));
+
+ /* add clause to list and mark it as estimated */
+ stat_clauses = lappend(stat_clauses, (Node *) lfirst(l));
+ *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
+
+ /*
+ * Reset the pointers, so that choose_best_statistics knows this
+ * clause was estimated and does not consider it again.
+ */
+ bms_free(list_attnums[listidx]);
+ list_attnums[listidx] = NULL;
+
+ list_free(list_exprs[listidx]);
+ list_exprs[listidx] = NULL;
}
if (is_or)
diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c
index 2a00fb48483..9ab3e81a91d 100644
--- a/src/backend/statistics/mcv.c
+++ b/src/backend/statistics/mcv.c
@@ -1575,6 +1575,8 @@ mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid)
(idx <= bms_num_members(keys) + list_length(exprs)));
}
+ Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs)));
+
return idx;
}
@@ -1654,6 +1656,8 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
/* match the attribute/expression to a dimension of the statistic */
idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
+ Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs)));
+
/*
* Walk through the MCV items and evaluate the current clause. We
* can skip items that were already ruled out, and terminate if
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 679fd2d64d4..8c214d8dfc5 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -2938,6 +2938,25 @@ SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (b
(1 row)
DROP TABLE expr_stats;
+-- test handling of a mix of compatible and incompatible expressions
+CREATE TABLE expr_stats_incompatible_test (
+ c0 double precision,
+ c1 boolean NOT NULL
+);
+CREATE STATISTICS expr_stat_comp_1 ON c0, c1 FROM expr_stats_incompatible_test;
+INSERT INTO expr_stats_incompatible_test VALUES (1234,false), (5678,true);
+ANALYZE expr_stats_incompatible_test;
+SELECT c0 FROM ONLY expr_stats_incompatible_test WHERE
+(
+ upper('x') LIKE ('x'||('[0,1]'::int4range))
+ AND
+ (c0 IN (0, 1) OR c1)
+);
+ c0
+----
+(0 rows)
+
+DROP TABLE expr_stats_incompatible_test;
-- Permission tests. Users should not be able to see specific data values in
-- the extended statistics, if they lack permission to see those values in
-- the underlying table.
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 7e092571ca0..e033080d4fb 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -1470,6 +1470,25 @@ SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (b
DROP TABLE expr_stats;
+-- test handling of a mix of compatible and incompatible expressions
+CREATE TABLE expr_stats_incompatible_test (
+ c0 double precision,
+ c1 boolean NOT NULL
+);
+
+CREATE STATISTICS expr_stat_comp_1 ON c0, c1 FROM expr_stats_incompatible_test;
+
+INSERT INTO expr_stats_incompatible_test VALUES (1234,false), (5678,true);
+ANALYZE expr_stats_incompatible_test;
+
+SELECT c0 FROM ONLY expr_stats_incompatible_test WHERE
+(
+ upper('x') LIKE ('x'||('[0,1]'::int4range))
+ AND
+ (c0 IN (0, 1) OR c1)
+);
+
+DROP TABLE expr_stats_incompatible_test;
-- Permission tests. Users should not be able to see specific data values in
-- the extended statistics, if they lack permission to see those values in