aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2011-09-04 15:41:49 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2011-09-04 15:41:49 -0400
commit4c2777d0b733220d9029f78817af8ce671e4a5ad (patch)
treeed2001e3d7e99d05a77fbf1dac278a21fad853e4 /src
parent1cb108efb0e60d87e4adec38e7636b6e8efbeb57 (diff)
downloadpostgresql-4c2777d0b733220d9029f78817af8ce671e4a5ad.tar.gz
postgresql-4c2777d0b733220d9029f78817af8ce671e4a5ad.zip
Change get_variable_numdistinct's API to flag default estimates explicitly.
Formerly, callers tested for DEFAULT_NUM_DISTINCT, which had the problem that a perfectly solid estimate might be mistaken for a content-free default.
Diffstat (limited to 'src')
-rw-r--r--src/backend/utils/adt/selfuncs.c70
-rw-r--r--src/include/utils/selfuncs.h3
2 files changed, 45 insertions, 28 deletions
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 4f3eb8cd01e..5d999e6bfa2 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -244,6 +244,7 @@ var_eq_const(VariableStatData *vardata, Oid operator,
bool varonleft)
{
double selec;
+ bool isdefault;
/*
* If the constant is NULL, assume operator is strict and return zero, ie,
@@ -344,7 +345,7 @@ var_eq_const(VariableStatData *vardata, Oid operator,
* all the not-common values share this remaining fraction
* equally, so we divide by the number of other distinct values.
*/
- otherdistinct = get_variable_numdistinct(vardata) - nnumbers;
+ otherdistinct = get_variable_numdistinct(vardata, &isdefault) - nnumbers;
if (otherdistinct > 1)
selec /= otherdistinct;
@@ -366,7 +367,7 @@ var_eq_const(VariableStatData *vardata, Oid operator,
* of distinct values and assuming they are equally common. (The guess
* is unlikely to be very good, but we do know a few special cases.)
*/
- selec = 1.0 / get_variable_numdistinct(vardata);
+ selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
}
/* result should be in range, but make sure... */
@@ -384,6 +385,7 @@ var_eq_non_const(VariableStatData *vardata, Oid operator,
bool varonleft)
{
double selec;
+ bool isdefault;
/*
* If we matched the var to a unique index, assume there is exactly one
@@ -414,7 +416,7 @@ var_eq_non_const(VariableStatData *vardata, Oid operator,
* idea?)
*/
selec = 1.0 - stats->stanullfrac;
- ndistinct = get_variable_numdistinct(vardata);
+ ndistinct = get_variable_numdistinct(vardata, &isdefault);
if (ndistinct > 1)
selec /= ndistinct;
@@ -441,7 +443,7 @@ var_eq_non_const(VariableStatData *vardata, Oid operator,
* of distinct values and assuming they are equally common. (The guess
* is unlikely to be very good, but we do know a few special cases.)
*/
- selec = 1.0 / get_variable_numdistinct(vardata);
+ selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
}
/* result should be in range, but make sure... */
@@ -2071,6 +2073,8 @@ eqjoinsel_inner(Oid operator,
double selec;
double nd1;
double nd2;
+ bool isdefault1;
+ bool isdefault2;
Form_pg_statistic stats1 = NULL;
Form_pg_statistic stats2 = NULL;
bool have_mcvs1 = false;
@@ -2084,8 +2088,8 @@ eqjoinsel_inner(Oid operator,
float4 *numbers2 = NULL;
int nnumbers2 = 0;
- nd1 = get_variable_numdistinct(vardata1);
- nd2 = get_variable_numdistinct(vardata2);
+ nd1 = get_variable_numdistinct(vardata1, &isdefault1);
+ nd2 = get_variable_numdistinct(vardata2, &isdefault2);
if (HeapTupleIsValid(vardata1->statsTuple))
{
@@ -2296,6 +2300,8 @@ eqjoinsel_semi(Oid operator,
double selec;
double nd1;
double nd2;
+ bool isdefault1;
+ bool isdefault2;
Form_pg_statistic stats1 = NULL;
bool have_mcvs1 = false;
Datum *values1 = NULL;
@@ -2308,8 +2314,8 @@ eqjoinsel_semi(Oid operator,
float4 *numbers2 = NULL;
int nnumbers2 = 0;
- nd1 = get_variable_numdistinct(vardata1);
- nd2 = get_variable_numdistinct(vardata2);
+ nd1 = get_variable_numdistinct(vardata1, &isdefault1);
+ nd2 = get_variable_numdistinct(vardata2, &isdefault2);
/*
* We clamp nd2 to be not more than what we estimate the inner relation's
@@ -2441,7 +2447,7 @@ eqjoinsel_semi(Oid operator,
* nd2 is default, punt and assume half of the uncertain rows have
* join partners.
*/
- if (nd1 != DEFAULT_NUM_DISTINCT && nd2 != DEFAULT_NUM_DISTINCT)
+ if (!isdefault1 && !isdefault2)
{
nd1 -= nmatches;
nd2 -= nmatches;
@@ -2464,7 +2470,7 @@ eqjoinsel_semi(Oid operator,
*/
double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
- if (nd1 != DEFAULT_NUM_DISTINCT && nd2 != DEFAULT_NUM_DISTINCT)
+ if (!isdefault1 && !isdefault2)
{
if (nd1 <= nd2 || nd2 < 0)
selec = 1.0 - nullfrac1;
@@ -2955,9 +2961,10 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
{
GroupVarInfo *varinfo;
double ndistinct;
+ bool isdefault;
ListCell *lc;
- ndistinct = get_variable_numdistinct(vardata);
+ ndistinct = get_variable_numdistinct(vardata, &isdefault);
/* cannot use foreach here because of possible list_delete */
lc = list_head(varinfos);
@@ -3292,14 +3299,23 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
stanullfrac,
mcvfreq,
avgfreq;
+ bool isdefault;
float4 *numbers;
int nnumbers;
examine_variable(root, hashkey, 0, &vardata);
- /* Get number of distinct values and fraction that are null */
- ndistinct = get_variable_numdistinct(&vardata);
+ /* Get number of distinct values */
+ ndistinct = get_variable_numdistinct(&vardata, &isdefault);
+ /* If ndistinct isn't real, punt and return 0.1, per comments above */
+ if (isdefault)
+ {
+ ReleaseVariableStats(vardata);
+ return (Selectivity) 0.1;
+ }
+
+ /* Get fraction that are null */
if (HeapTupleIsValid(vardata.statsTuple))
{
Form_pg_statistic stats;
@@ -3308,19 +3324,7 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
stanullfrac = stats->stanullfrac;
}
else
- {
- /*
- * Believe a default ndistinct only if it came from stats. Otherwise
- * punt and return 0.1, per comments above.
- */
- if (ndistinct == DEFAULT_NUM_DISTINCT)
- {
- ReleaseVariableStats(vardata);
- return (Selectivity) 0.1;
- }
-
stanullfrac = 0.0;
- }
/* Compute avg freq of all distinct data values in raw relation */
avgfreq = (1.0 - stanullfrac) / ndistinct;
@@ -4414,16 +4418,20 @@ examine_simple_variable(PlannerInfo *root, Var *var,
* Estimate the number of distinct values of a variable.
*
* vardata: results of examine_variable
+ * *isdefault: set to TRUE if the result is a default rather than based on
+ * anything meaningful.
*
* NB: be careful to produce an integral result, since callers may compare
* the result to exact integer counts.
*/
double
-get_variable_numdistinct(VariableStatData *vardata)
+get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
{
double stadistinct;
double ntuples;
+ *isdefault = false;
+
/*
* Determine the stadistinct value to use. There are cases where we can
* get an estimate even without a pg_statistic entry, or can get a better
@@ -4496,10 +4504,16 @@ get_variable_numdistinct(VariableStatData *vardata)
* Otherwise we need to get the relation size; punt if not available.
*/
if (vardata->rel == NULL)
+ {
+ *isdefault = true;
return DEFAULT_NUM_DISTINCT;
+ }
ntuples = vardata->rel->tuples;
if (ntuples <= 0.0)
+ {
+ *isdefault = true;
return DEFAULT_NUM_DISTINCT;
+ }
/*
* If we had a relative estimate, use that.
@@ -4509,11 +4523,13 @@ get_variable_numdistinct(VariableStatData *vardata)
/*
* With no data, estimate ndistinct = ntuples if the table is small, else
- * use default.
+ * use default. We use DEFAULT_NUM_DISTINCT as the cutoff for "small"
+ * so that the behavior isn't discontinuous.
*/
if (ntuples < DEFAULT_NUM_DISTINCT)
return ntuples;
+ *isdefault = true;
return DEFAULT_NUM_DISTINCT;
}
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 4208588c2eb..32d14b60290 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -121,7 +121,8 @@ extern void get_join_variables(PlannerInfo *root, List *args,
VariableStatData *vardata1,
VariableStatData *vardata2,
bool *join_is_reversed);
-extern double get_variable_numdistinct(VariableStatData *vardata);
+extern double get_variable_numdistinct(VariableStatData *vardata,
+ bool *isdefault);
extern double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
Datum constval, bool varonleft,
double *sumcommonp);