aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlvaro Herrera <alvherre@alvh.no-ip.org>2017-03-27 12:52:50 -0300
committerAlvaro Herrera <alvherre@alvh.no-ip.org>2017-03-27 13:14:23 -0300
commit1f171a1803c28d3ae24636c9ca3352ec82c39e5f (patch)
tree48fd0576fb3248f2cd6e740c8d7c0107fc39686a /src
parent827d6f977940952ebef4bd21fb0f97be4e20c0c4 (diff)
downloadpostgresql-1f171a1803c28d3ae24636c9ca3352ec82c39e5f.tar.gz
postgresql-1f171a1803c28d3ae24636c9ca3352ec82c39e5f.zip
Fix thinko in estimate_num_groups
The code for the reworked n-distinct estimation on commit 7b504eb282 was written differently in a previous version of the patch, prior to commit; on rewriting it, we missed updating an initializer. This caused the code to (mistakenly) apply a fudge factor even in the case where a single value is applied, leading to incorrect results. This means that the 'relvarcount' variable name is now wrong. Add a comment to try and make the situation clearer, and remove an incorrect comment I added. Problem noticed, and code patch, by Tomas Vondra. Additional commentary by Álvaro.
Diffstat (limited to 'src')
-rw-r--r--src/backend/utils/adt/selfuncs.c8
1 files changed, 6 insertions, 2 deletions
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index cc24c8aeb56..5c382a2013e 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3404,7 +3404,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
RelOptInfo *rel = varinfo1->rel;
double reldistinct = 1;
double relmaxndistinct = reldistinct;
- int relvarcount = 1;
+ int relvarcount = 0;
List *newvarinfos = NIL;
List *relvarinfos = NIL;
@@ -3436,6 +3436,10 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
* we multiply them together. Any remaining relvarinfos after
* no more multivariate matches are found are assumed independent too,
* so their individual ndistinct estimates are multiplied also.
+ *
+ * While iterating, count how many separate numdistinct values we
+ * apply. We apply a fudge factor below, but only if we multiplied
+ * more than one such values.
*/
while (relvarinfos)
{
@@ -3447,7 +3451,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
reldistinct *= mvndistinct;
if (relmaxndistinct < mvndistinct)
relmaxndistinct = mvndistinct;
- relvarcount++; /* inaccurate, but doesn't matter */
+ relvarcount++;
}
else
{