diff options
author | drh <> | 2022-04-27 16:41:56 +0000 |
---|---|---|
committer | drh <> | 2022-04-27 16:41:56 +0000 |
commit | 609959285bc035aa18da0d89bea484e1b5432105 (patch) | |
tree | 78ef4c354277499766150e0fb643518026b77d37 /src | |
parent | 9684d71081fd07cd79f2738f7bfd83cde3aa49a0 (diff) | |
download | sqlite-609959285bc035aa18da0d89bea484e1b5432105.tar.gz sqlite-609959285bc035aa18da0d89bea484e1b5432105.zip |
When computing STAT1 values using ANALYZE, if a ratio comes out to be between
1.0 and 1.1, then round it down to 1 rather than the using the default rounding
rule of changing it to 2. The reduces the estimation error for the
case where a column value is very nearly, but not quite unique.
FossilOrigin-Name: eb59c46a5aed69bc6fd096997bf24c082e533c1085439f6ec1fbe5ff78e8b374
Diffstat (limited to 'src')
-rw-r--r-- | src/analyze.c | 8 |
1 files changed, 7 insertions, 1 deletions
diff --git a/src/analyze.c b/src/analyze.c index e8699653f..39009899a 100644 --- a/src/analyze.c +++ b/src/analyze.c @@ -847,9 +847,14 @@ static void statGet( ** * "WHERE a=? AND b=?" matches 2 rows. ** ** If D is the count of distinct values and K is the total number of - ** rows, then each estimate is computed as: + ** rows, then each estimate is usually computed as: ** ** I = (K+D-1)/D + ** + ** In other words, I is K/D rounded up to the next whole integer. + ** However, if I is between 1.0 and 1.1 (in other words if I is + ** close to 1.0 but just a little larger) then do not round up but + ** instead keep the I value at 1.0. */ sqlite3_str sStat; /* Text of the constructed "stat" line */ int i; /* Loop counter */ @@ -860,6 +865,7 @@ static void statGet( for(i=0; i<p->nKeyCol; i++){ u64 nDistinct = p->current.anDLt[i] + 1; u64 iVal = (p->nRow + nDistinct - 1) / nDistinct; + if( iVal==2 && p->nRow*10 <= nDistinct*11 ) iVal = 1; sqlite3_str_appendf(&sStat, " %llu", iVal); assert( p->current.anEq[i] ); } |