diff options
-rw-r--r-- | src/backend/statistics/dependencies.c | 116 | ||||
-rw-r--r-- | src/test/regress/expected/stats_ext.out | 57 | ||||
-rw-r--r-- | src/test/regress/sql/stats_ext.sql | 35 |
3 files changed, 169 insertions, 39 deletions
diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c index e97f2b4bfec..e2f6c5bb979 100644 --- a/src/backend/statistics/dependencies.c +++ b/src/backend/statistics/dependencies.c @@ -77,8 +77,8 @@ static bool dependency_implies_attribute(MVDependency *dependency, AttrNumber attnum); static bool dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum); -static MVDependency *find_strongest_dependency(StatisticExtInfo *stats, - MVDependencies *dependencies, +static MVDependency *find_strongest_dependency(MVDependencies **dependencies, + int ndependencies, Bitmapset *attnums); static void @@ -862,10 +862,10 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum) * (see the comment in dependencies_clauselist_selectivity). */ static MVDependency * -find_strongest_dependency(StatisticExtInfo *stats, MVDependencies *dependencies, +find_strongest_dependency(MVDependencies **dependencies, int ndependencies, Bitmapset *attnums) { - int i; + int i, j; MVDependency *strongest = NULL; /* number of attnums in clauses */ @@ -876,36 +876,39 @@ find_strongest_dependency(StatisticExtInfo *stats, MVDependencies *dependencies, * fully-matched dependencies. We do the cheap checks first, before * matching it against the attnums. */ - for (i = 0; i < dependencies->ndeps; i++) + for (i = 0; i < ndependencies; i++) { - MVDependency *dependency = dependencies->deps[i]; - - /* - * Skip dependencies referencing more attributes than available - * clauses, as those can't be fully matched. - */ - if (dependency->nattributes > nattnums) - continue; - - if (strongest) + for (j = 0; j < dependencies[i]->ndeps; j++) { - /* skip dependencies on fewer attributes than the strongest. */ - if (dependency->nattributes < strongest->nattributes) - continue; + MVDependency *dependency = dependencies[i]->deps[j]; - /* also skip weaker dependencies when attribute count matches */ - if (strongest->nattributes == dependency->nattributes && - strongest->degree > dependency->degree) + /* + * Skip dependencies referencing more attributes than available + * clauses, as those can't be fully matched. + */ + if (dependency->nattributes > nattnums) continue; - } - /* - * this dependency is stronger, but we must still check that it's - * fully matched to these attnums. We perform this check last as it's - * slightly more expensive than the previous checks. - */ - if (dependency_is_fully_matched(dependency, attnums)) - strongest = dependency; /* save new best match */ + if (strongest) + { + /* skip dependencies on fewer attributes than the strongest. */ + if (dependency->nattributes < strongest->nattributes) + continue; + + /* also skip weaker dependencies when attribute count matches */ + if (strongest->nattributes == dependency->nattributes && + strongest->degree > dependency->degree) + continue; + } + + /* + * this dependency is stronger, but we must still check that it's + * fully matched to these attnums. We perform this check last as it's + * slightly more expensive than the previous checks. + */ + if (dependency_is_fully_matched(dependency, attnums)) + strongest = dependency; /* save new best match */ + } } return strongest; @@ -949,10 +952,11 @@ dependencies_clauselist_selectivity(PlannerInfo *root, Selectivity s1 = 1.0; ListCell *l; Bitmapset *clauses_attnums = NULL; - StatisticExtInfo *stat; - MVDependencies *dependencies; Bitmapset **list_attnums; int listidx; + MVDependencies **dependencies = NULL; + int ndependencies = 0; + int i; /* check if there's any stats that might be useful for us. */ if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES)) @@ -1001,20 +1005,50 @@ dependencies_clauselist_selectivity(PlannerInfo *root, return 1.0; } - /* find the best suited statistics object for these attnums */ - stat = choose_best_statistics(rel->statlist, STATS_EXT_DEPENDENCIES, - list_attnums, list_length(clauses)); + /* + * Load all functional dependencies matching at least two parameters. We + * can simply consider all dependencies at once, without having to search + * for the best statistics object. + * + * To not waste cycles and memory, we deserialize dependencies only for + * statistics that match at least two attributes. The array is allocated + * with the assumption that all objects match - we could grow the array + * to make it just the right size, but it's likely wasteful anyway thanks + * to moving the freed chunks to freelists etc. + */ + ndependencies = 0; + dependencies = (MVDependencies **) palloc(sizeof(MVDependencies *) * + list_length(rel->statlist)); + + foreach(l,rel->statlist) + { + StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l); + Bitmapset *matched; + int num_matched; + + /* skip statistics that are not of the correct type */ + if (stat->kind != STATS_EXT_DEPENDENCIES) + continue; + + matched = bms_intersect(clauses_attnums, stat->keys); + num_matched = bms_num_members(matched); + bms_free(matched); + + /* skip objects matching fewer than two attributes from clauses */ + if (num_matched < 2) + continue; + + dependencies[ndependencies++] + = statext_dependencies_load(stat->statOid); + } /* if no matching stats could be found then we've nothing to do */ - if (!stat) + if (!ndependencies) { pfree(list_attnums); return 1.0; } - /* load the dependency items stored in the statistics object */ - dependencies = statext_dependencies_load(stat->statOid); - /* * Apply the dependencies recursively, starting with the widest/strongest * ones, and proceeding to the smaller/weaker ones. At the end of each @@ -1027,7 +1061,7 @@ dependencies_clauselist_selectivity(PlannerInfo *root, MVDependency *dependency; /* the widest/strongest dependency, fully matched by clauses */ - dependency = find_strongest_dependency(stat, dependencies, + dependency = find_strongest_dependency(dependencies, ndependencies, clauses_attnums); /* if no suitable dependency was found, we're done */ @@ -1097,6 +1131,10 @@ dependencies_clauselist_selectivity(PlannerInfo *root, s1 *= (dependency->degree + (1 - dependency->degree) * s2); } + /* free deserialized functional dependencies (and then the array) */ + for (i = 0; i < ndependencies; i++) + pfree(dependencies[i]); + pfree(dependencies); pfree(list_attnums); diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index 6fb1aeb596a..d467c0dd7d9 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -451,6 +451,63 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE 50 | 50 (1 row) +-- check the ability to use multiple functional dependencies +CREATE TABLE functional_dependencies_multi ( + a INTEGER, + b INTEGER, + c INTEGER, + d INTEGER +); +INSERT INTO functional_dependencies_multi (a, b, c, d) + SELECT + mod(i,7), + mod(i,7), + mod(i,11), + mod(i,11) + FROM generate_series(1,5000) s(i); +ANALYZE functional_dependencies_multi; +-- estimates without any functional dependencies +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0'); + estimated | actual +-----------+-------- + 102 | 714 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE c = 0 AND d = 0'); + estimated | actual +-----------+-------- + 41 | 454 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0'); + estimated | actual +-----------+-------- + 1 | 64 +(1 row) + +-- create separate functional dependencies +CREATE STATISTICS functional_dependencies_multi_1 (dependencies) ON a, b FROM functional_dependencies_multi; +CREATE STATISTICS functional_dependencies_multi_2 (dependencies) ON c, d FROM functional_dependencies_multi; +ANALYZE functional_dependencies_multi; +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0'); + estimated | actual +-----------+-------- + 714 | 714 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE c = 0 AND d = 0'); + estimated | actual +-----------+-------- + 454 | 454 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0'); + estimated | actual +-----------+-------- + 65 | 64 +(1 row) + +DROP TABLE functional_dependencies_multi; -- MCV lists CREATE TABLE mcv_lists ( filler1 TEXT, diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index 4999d89c8cd..734b1f60c94 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -291,6 +291,41 @@ ANALYZE functional_dependencies; SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1'); +-- check the ability to use multiple functional dependencies +CREATE TABLE functional_dependencies_multi ( + a INTEGER, + b INTEGER, + c INTEGER, + d INTEGER +); + +INSERT INTO functional_dependencies_multi (a, b, c, d) + SELECT + mod(i,7), + mod(i,7), + mod(i,11), + mod(i,11) + FROM generate_series(1,5000) s(i); + +ANALYZE functional_dependencies_multi; + +-- estimates without any functional dependencies +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0'); +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE c = 0 AND d = 0'); +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0'); + +-- create separate functional dependencies +CREATE STATISTICS functional_dependencies_multi_1 (dependencies) ON a, b FROM functional_dependencies_multi; +CREATE STATISTICS functional_dependencies_multi_2 (dependencies) ON c, d FROM functional_dependencies_multi; + +ANALYZE functional_dependencies_multi; + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0'); +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE c = 0 AND d = 0'); +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0'); + +DROP TABLE functional_dependencies_multi; + -- MCV lists CREATE TABLE mcv_lists ( filler1 TEXT, |