diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2001-06-10 02:59:35 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2001-06-10 02:59:35 +0000 |
commit | a8fe109ac186dea6616ec207c349ef92c214bfda (patch) | |
tree | ecd7c9ac7c001c11d5e01294a2846a01c55f0b1a /src | |
parent | 26c94c5d9c7ff7753dad85184adfe774b71580ff (diff) | |
download | postgresql-a8fe109ac186dea6616ec207c349ef92c214bfda.tar.gz postgresql-a8fe109ac186dea6616ec207c349ef92c214bfda.zip |
Fix thinko in hash cost estimation: average frequency
should be computed from total number of distinct values in whole
relation, not # distinct values we expect to have after restriction
clauses are applied.
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 22 |
1 files changed, 6 insertions, 16 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 65c211deaee..06793f1d8b4 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -42,7 +42,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.75 2001/06/05 05:26:04 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.76 2001/06/10 02:59:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -873,6 +873,9 @@ estimate_hash_bucketsize(Query *root, Var *var) if (ndistinct < 0.0) ndistinct = -ndistinct * rel->tuples; + /* Also compute avg freq of all distinct data values in raw relation */ + avgfreq = (1.0 - stats->stanullfrac) / ndistinct; + /* * Adjust ndistinct to account for restriction clauses. Observe we are * assuming that the data distribution is affected uniformly by the @@ -884,17 +887,6 @@ estimate_hash_bucketsize(Query *root, Var *var) ndistinct *= rel->rows / rel->tuples; /* - * Discourage use of hash join if there seem not to be very many distinct - * data values. The threshold here is somewhat arbitrary, as is the - * fraction used to "discourage" the choice. - */ - if (ndistinct < 50.0) - { - ReleaseSysCache(tuple); - return 0.5; - } - - /* * Form initial estimate of bucketsize fraction. Here we use rel->rows, * ie the number of rows after applying restriction clauses, because * that's what the fraction will eventually be multiplied by in @@ -903,8 +895,8 @@ estimate_hash_bucketsize(Query *root, Var *var) estfract = (double) NTUP_PER_BUCKET / rel->rows; /* - * Adjust estimated bucketsize if too few distinct values to fill - * all the buckets. + * Adjust estimated bucketsize if too few distinct values (after + * restriction clauses) to fill all the buckets. */ needdistinct = rel->rows / (double) NTUP_PER_BUCKET; if (ndistinct < needdistinct) @@ -931,8 +923,6 @@ estimate_hash_bucketsize(Query *root, Var *var) /* * Adjust estimated bucketsize upward to account for skewed distribution. */ - avgfreq = (1.0 - stats->stanullfrac) / ndistinct; - if (avgfreq > 0.0 && mcvfreq > avgfreq) estfract *= mcvfreq / avgfreq; |