diff options
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/commands/analyze.c | 16 | ||||
-rw-r--r-- | src/backend/tsearch/ts_typanalyze.c | 2 | ||||
-rw-r--r-- | src/backend/utils/adt/rangetypes_typanalyze.c | 4 | ||||
-rw-r--r-- | src/backend/utils/adt/selfuncs.c | 12 |
4 files changed, 23 insertions, 11 deletions
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 5fcedd78554..9ac71220a2a 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -2049,8 +2049,11 @@ compute_distinct_stats(VacAttrStatsP stats, if (nmultiple == 0) { - /* If we found no repeated values, assume it's a unique column */ - stats->stadistinct = -1.0; + /* + * If we found no repeated non-null values, assume it's a unique + * column; but be sure to discount for any nulls we found. + */ + stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac); } else if (track_cnt < track_max && toowide_cnt == 0 && nmultiple == track_cnt) @@ -2426,8 +2429,11 @@ compute_scalar_stats(VacAttrStatsP stats, if (nmultiple == 0) { - /* If we found no repeated values, assume it's a unique column */ - stats->stadistinct = -1.0; + /* + * If we found no repeated non-null values, assume it's a unique + * column; but be sure to discount for any nulls we found. + */ + stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac); } else if (toowide_cnt == 0 && nmultiple == ndistinct) { @@ -2753,7 +2759,7 @@ compute_scalar_stats(VacAttrStatsP stats, else stats->stawidth = stats->attrtype->typlen; /* Assume all too-wide values are distinct, so it's a unique column */ - stats->stadistinct = -1.0; + stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac); } else if (null_cnt > 0) { diff --git a/src/backend/tsearch/ts_typanalyze.c b/src/backend/tsearch/ts_typanalyze.c index 0f851ead060..817453ce011 100644 --- a/src/backend/tsearch/ts_typanalyze.c +++ b/src/backend/tsearch/ts_typanalyze.c @@ -295,7 +295,7 @@ compute_tsvector_stats(VacAttrStats *stats, stats->stawidth = total_width / (double) nonnull_cnt; /* Assume it's a unique column (see notes above) */ - stats->stadistinct = -1.0; + stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac); /* * Construct an array of the interesting hashtable items, that is, diff --git a/src/backend/utils/adt/rangetypes_typanalyze.c b/src/backend/utils/adt/rangetypes_typanalyze.c index fcb71d3f36e..56504fcf3c2 100644 --- a/src/backend/utils/adt/rangetypes_typanalyze.c +++ b/src/backend/utils/adt/rangetypes_typanalyze.c @@ -203,7 +203,9 @@ compute_range_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, /* Do the simple null-frac and width stats */ stats->stanullfrac = (double) null_cnt / (double) samplerows; stats->stawidth = total_width / (double) non_null_cnt; - stats->stadistinct = -1.0; + + /* Estimate that non-null values are unique */ + stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac); /* Must copy the target values into anl_context */ old_cxt = MemoryContextSwitchTo(stats->anl_context); diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index cc2a9a1b6c5..56943f2a87a 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -4738,6 +4738,7 @@ double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault) { double stadistinct; + double stanullfrac = 0.0; double ntuples; *isdefault = false; @@ -4745,7 +4746,8 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault) /* * Determine the stadistinct value to use. There are cases where we can * get an estimate even without a pg_statistic entry, or can get a better - * value than is in pg_statistic. + * value than is in pg_statistic. Grab stanullfrac too if we can find it + * (otherwise, assume no nulls, for lack of any better idea). */ if (HeapTupleIsValid(vardata->statsTuple)) { @@ -4754,6 +4756,7 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault) stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); stadistinct = stats->stadistinct; + stanullfrac = stats->stanullfrac; } else if (vardata->vartype == BOOLOID) { @@ -4777,7 +4780,7 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault) { case ObjectIdAttributeNumber: case SelfItemPointerAttributeNumber: - stadistinct = -1.0; /* unique */ + stadistinct = -1.0; /* unique (and all non null) */ break; case TableOidAttributeNumber: stadistinct = 1.0; /* only 1 value */ @@ -4799,10 +4802,11 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault) * If there is a unique index or DISTINCT clause for the variable, assume * it is unique no matter what pg_statistic says; the statistics could be * out of date, or we might have found a partial unique index that proves - * the var is unique for this query. + * the var is unique for this query. However, we'd better still believe + * the null-fraction statistic. */ if (vardata->isunique) - stadistinct = -1.0; + stadistinct = -1.0 * (1.0 - stanullfrac); /* * If we had an absolute estimate, use that. |