aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordrh <>2022-04-27 16:41:56 +0000
committerdrh <>2022-04-27 16:41:56 +0000
commit609959285bc035aa18da0d89bea484e1b5432105 (patch)
tree78ef4c354277499766150e0fb643518026b77d37 /src
parent9684d71081fd07cd79f2738f7bfd83cde3aa49a0 (diff)
downloadsqlite-609959285bc035aa18da0d89bea484e1b5432105.tar.gz
sqlite-609959285bc035aa18da0d89bea484e1b5432105.zip
When computing STAT1 values using ANALYZE, if a ratio comes out to be between
1.0 and 1.1, then round it down to 1 rather than the using the default rounding rule of changing it to 2. The reduces the estimation error for the case where a column value is very nearly, but not quite unique. FossilOrigin-Name: eb59c46a5aed69bc6fd096997bf24c082e533c1085439f6ec1fbe5ff78e8b374
Diffstat (limited to 'src')
-rw-r--r--src/analyze.c8
1 files changed, 7 insertions, 1 deletions
diff --git a/src/analyze.c b/src/analyze.c
index e8699653f..39009899a 100644
--- a/src/analyze.c
+++ b/src/analyze.c
@@ -847,9 +847,14 @@ static void statGet(
** * "WHERE a=? AND b=?" matches 2 rows.
**
** If D is the count of distinct values and K is the total number of
- ** rows, then each estimate is computed as:
+ ** rows, then each estimate is usually computed as:
**
** I = (K+D-1)/D
+ **
+ ** In other words, I is K/D rounded up to the next whole integer.
+ ** However, if I is between 1.0 and 1.1 (in other words if I is
+ ** close to 1.0 but just a little larger) then do not round up but
+ ** instead keep the I value at 1.0.
*/
sqlite3_str sStat; /* Text of the constructed "stat" line */
int i; /* Loop counter */
@@ -860,6 +865,7 @@ static void statGet(
for(i=0; i<p->nKeyCol; i++){
u64 nDistinct = p->current.anDLt[i] + 1;
u64 iVal = (p->nRow + nDistinct - 1) / nDistinct;
+ if( iVal==2 && p->nRow*10 <= nDistinct*11 ) iVal = 1;
sqlite3_str_appendf(&sStat, " %llu", iVal);
assert( p->current.anEq[i] );
}