diff options
author | drh <> | 2024-02-20 12:48:00 +0000 |
---|---|---|
committer | drh <> | 2024-02-20 12:48:00 +0000 |
commit | 68d92c4ad3342a97ae523574cb8e09d1043970a5 (patch) | |
tree | 0f6ed269e065b93243259a7ea02a8ff9db3cf516 /src/pragma.c | |
parent | e0a9935be1c506646566f6b7845eb381bb219e16 (diff) | |
parent | 4189c44cff384341fb2a7780a7b5b35c734db9af (diff) | |
download | sqlite-68d92c4ad3342a97ae523574cb8e09d1043970a5.tar.gz sqlite-68d92c4ad3342a97ae523574cb8e09d1043970a5.zip |
Enhancements to PRAGMA optimize and ANALYZE. Add the 0x10000 flag to
PRAGMA optimize. ANALYZE now records zero-size partial indexes in the
sqlite_stat1 table. PRAGMA optimize looks for both growth and shrinkage
in table sizes, and uses tighter bounds (10x rather than 25x) to trigger
a re-analyze. PRAGMA optimize automatically uses are reasonable
analysis_limit to prevent excessive runtimes.
FossilOrigin-Name: 63ef234e88857a653fa3541e80d59802ceccb806ac8296e8bae79a385b7086f7
Diffstat (limited to 'src/pragma.c')
-rw-r--r-- | src/pragma.c | 196 |
1 files changed, 158 insertions, 38 deletions
diff --git a/src/pragma.c b/src/pragma.c index 872177e13..877fb3f72 100644 --- a/src/pragma.c +++ b/src/pragma.c @@ -31,6 +31,34 @@ #include "pragma.h" /* +** When the 0x10 bit of PRAGMA optimize is set, any ANALYZE commands +** will be run with an analysis_limit set to the lessor of the value of +** the following macro or to the actual analysis_limit if it is non-zero, +** in order to prevent PRAGMA optimize from running for too long. +** +** The value of 2000 is chosen emperically so that the worst-case run-time +** for PRAGMA optimize does not exceed 100 milliseconds against a variety +** of test databases on a RaspberryPI-4 compiled using -Os and without +** -DSQLITE_DEBUG. Of course, your mileage may vary. For the purpose of +** his paragraph, "worst-case" means that ANALYZE ends up being +** run on every table in the database. The worst case typically only +** happens if PRAGMA optimize is run on a database file for which ANALYZE +** has not been previously run and the 0x10000 flag is included so that +** all tables are analyzed. The usual case for PRAGMA optimize is that +** no ANALYZE commands will be run at all, or if any ANALYZE happens it +** will be against a single table, so that expected timing for PRAGMA +** optimize on a PI-4 is more like 1 millisecond or less with the 0x10000 +** flag or less than 100 microseconds without the 0x10000 flag. +** +** An analysis limit of 2000 is almost always sufficient for the query +** planner to fully characterize an index. The additional accuracy from +** a larger analysis is not usually helpful. +*/ +#ifndef SQLITE_DEFAULT_OPTIMIZE_LIMIT +# define SQLITE_DEFAULT_OPTIMIZE_LIMIT 2000 +#endif + +/* ** Interpret the given string as a safety level. Return 0 for OFF, ** 1 for ON or NORMAL, 2 for FULL, and 3 for EXTRA. Return 1 for an empty or ** unrecognized string argument. The FULL and EXTRA option is disallowed @@ -2383,44 +2411,66 @@ void sqlite3Pragma( ** ** The optional argument is a bitmask of optimizations to perform: ** - ** 0x0001 Debugging mode. Do not actually perform any optimizations - ** but instead return one line of text for each optimization - ** that would have been done. Off by default. + ** 0x00001 Debugging mode. Do not actually perform any optimizations + ** but instead return one line of text for each optimization + ** that would have been done. Off by default. ** - ** 0x0002 Run ANALYZE on tables that might benefit. On by default. - ** See below for additional information. + ** 0x00002 Run ANALYZE on tables that might benefit. On by default. + ** See below for additional information. ** - ** 0x0004 (Not yet implemented) Record usage and performance - ** information from the current session in the - ** database file so that it will be available to "optimize" - ** pragmas run by future database connections. + ** 0x00010 Run all ANALYZE operations using an analysis_limit that + ** is the lessor of the current analysis_limit and the + ** SQLITE_DEFAULT_OPTIMIZE_LIMIT compile-time option. + ** The default value of SQLITE_DEFAULT_OPTIMIZE_LIMIT is + ** currently (2024-02-19) set to 2000, which is such that + ** the worst case run-time for PRAGMA optimize on a 100MB + ** database will usually be less than 100 milliseconds on + ** a RaspberryPI-4 class machine. On by default. ** - ** 0x0008 (Not yet implemented) Create indexes that might have - ** been helpful to recent queries + ** 0x00020 Run ANALYZE on any table that has a index that lacks an + ** entry in the sqlite_stat1 table. On by default. ** - ** The default MASK is and always shall be 0xfffe. 0xfffe means perform all - ** of the optimizations listed above except Debug Mode, including new - ** optimizations that have not yet been invented. If new optimizations are - ** ever added that should be off by default, those off-by-default - ** optimizations will have bitmasks of 0x10000 or larger. + ** 0x10000 Look at tables to see if they need to be reanalyzed + ** due to growth or shrinkage even if they have not been + ** queried during the current connection. Off by default. + ** + ** The default MASK is and always shall be 0x0fffe. In the current + ** implementation, the default mask only covers the 0x00002 optimization, + ** though additional optimizations that are covered by 0x0fffe might be + ** added in the future. Optimizations that are off by default and must + ** be explicitly requested have masks of 0x10000 or greater. ** ** DETERMINATION OF WHEN TO RUN ANALYZE ** ** In the current implementation, a table is analyzed if only if all of ** the following are true: ** - ** (1) MASK bit 0x02 is set. + ** (1) MASK bit 0x00002 is set. + ** + ** (2) The table is an ordinary table, not a virtual table or view. ** - ** (2) The query planner used sqlite_stat1-style statistics for one or - ** more indexes of the table at some point during the lifetime of - ** the current connection. + ** (3) The table name does not begin with "sqlite_". ** - ** (3) One or more indexes of the table are currently unanalyzed OR - ** the number of rows in the table has increased by 25 times or more - ** since the last time ANALYZE was run. + ** (4) One or more of the following is true: + ** (4a) The 0x10000 MASK bit is set. + ** (4b) One or more indexes on the table lacks an entry + ** in the sqlite_stat1 table. + ** (4c) The query planner used sqlite_stat1-style statistics for one + ** or more indexes of the tableat some point during the lifetime + ** of the current connection. + ** + ** (5) One or more of the following is true: + ** (5a) One or more indexes on the table lacks an entry + ** in the sqlite_stat1 table. (Same as 4a) + ** (5b) The number of rows in the table has increased or decreased by + ** 10-fold. In other words, the current size of the table is + ** 10 times larger than the size in sqlite_stat1 or else the + ** current size is less than 1/10th the size in sqlite_stat1. ** ** The rules for when tables are analyzed are likely to change in - ** future releases. + ** future releases. Future versions of SQLite might accept a string + ** literal argument to this pragma that contains a mnemonic description + ** of the options rather than a bitmap. */ case PragTyp_OPTIMIZE: { int iDbLast; /* Loop termination point for the schema loop */ @@ -2432,6 +2482,11 @@ void sqlite3Pragma( LogEst szThreshold; /* Size threshold above which reanalysis needed */ char *zSubSql; /* SQL statement for the OP_SqlExec opcode */ u32 opMask; /* Mask of operations to perform */ + int nLimit; /* Analysis limit to use */ + int nCheck = 0; /* Number of tables to be optimized */ + int nBtree = 0; /* Number of btrees to scan */ + int nIndex; /* Number of indexes on the current table */ + int hasStat1; /* True if any STAT1 info available for the table */ if( zRight ){ opMask = (u32)sqlite3Atoi(zRight); @@ -2439,6 +2494,14 @@ void sqlite3Pragma( }else{ opMask = 0xfffe; } + if( (opMask & 0x10)==0 ){ + nLimit = 0; + }else if( db->nAnalysisLimit>0 + && db->nAnalysisLimit<SQLITE_DEFAULT_OPTIMIZE_LIMIT ){ + nLimit = 0; + }else{ + nLimit = SQLITE_DEFAULT_OPTIMIZE_LIMIT; + } iTabCur = pParse->nTab++; for(iDbLast = zDb?iDb:db->nDb-1; iDb<=iDbLast; iDb++){ if( iDb==1 ) continue; @@ -2447,23 +2510,64 @@ void sqlite3Pragma( for(k=sqliteHashFirst(&pSchema->tblHash); k; k=sqliteHashNext(k)){ pTab = (Table*)sqliteHashData(k); - /* If table pTab has not been used in a way that would benefit from - ** having analysis statistics during the current session, then skip it. - ** This also has the effect of skipping virtual tables and views */ - if( (pTab->tabFlags & TF_MaybeReanalyze)==0 ) continue; + /* This only works for ordinary tables */ + if( !IsOrdinaryTable(pTab) ) continue; + + /* Do not scan system tables */ + if( 0==sqlite3StrNICmp(pTab->zName, "sqlite_", 7) ) continue; - /* Reanalyze if the table is 25 times larger than the last analysis */ - szThreshold = pTab->nRowLogEst + 46; assert( sqlite3LogEst(25)==46 ); + /* Find the size of the table as last recorded in sqlite_stat1. + ** If any index is unanalyzed, then the threshold is -1 to + ** indicate a new, unanalyzed index + */ + szThreshold = pTab->nRowLogEst; + hasStat1 = (pTab->tabFlags & TF_HasStat1)!=0; + nIndex = 0; for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ - if( !pIdx->hasStat1 ){ - szThreshold = 0; /* Always analyze if any index lacks statistics */ - break; + nIndex++; + if( pIdx->hasStat1 ){ + hasStat1 = 1; + }else{ + szThreshold = -1; /* Always analyze if any index lacks statistics */ } } - if( szThreshold ){ - sqlite3OpenTable(pParse, iTabCur, iDb, pTab, OP_OpenRead); - sqlite3VdbeAddOp3(v, OP_IfSmaller, iTabCur, - sqlite3VdbeCurrentAddr(v)+2+(opMask&1), szThreshold); + + /* If table pTab has not been used in a way that would benefit from + ** having analysis statistics during the current session, then skip it, + ** unless the 0x10000 MASK bit is set. */ + if( (pTab->tabFlags & TF_MaybeReanalyze)!=0 ){ + /* Check for size change if stat1 has been used for a query */ + }else if( opMask & 0x10000 ){ + /* Check for size change if 0x10000 is set */ + }else if( pTab->pIndex!=0 && szThreshold<0 ){ + /* Do analysis if unanalyzed indexes exists */ + }else{ + /* Otherwise, we can skip this table */ + continue; + } + + nCheck++; + if( nCheck==2 ){ + /* If ANALYZE might be invoked two or more times, hold a write + ** transaction for efficiency */ + sqlite3BeginWriteOperation(pParse, 0, iDb); + } + nBtree += nIndex+1; + + /* Reanalyze if the table is 10 times larger or smaller than + ** the last analysis. Unconditional reanalysis if there are + ** unanalyzed indexes. */ + sqlite3OpenTable(pParse, iTabCur, iDb, pTab, OP_OpenRead); + if( szThreshold>=0 ){ + const LogEst iRange = 33; /* 10x size change */ + sqlite3VdbeAddOp4Int(v, OP_IfSizeBetween, iTabCur, + sqlite3VdbeCurrentAddr(v)+2+(opMask&1), + szThreshold>=iRange ? szThreshold-iRange : -1, + szThreshold+iRange); + VdbeCoverage(v); + }else{ + sqlite3VdbeAddOp2(v, OP_Rewind, iTabCur, + sqlite3VdbeCurrentAddr(v)+2+(opMask&1)); VdbeCoverage(v); } zSubSql = sqlite3MPrintf(db, "ANALYZE \"%w\".\"%w\"", @@ -2473,11 +2577,27 @@ void sqlite3Pragma( sqlite3VdbeAddOp4(v, OP_String8, 0, r1, 0, zSubSql, P4_DYNAMIC); sqlite3VdbeAddOp2(v, OP_ResultRow, r1, 1); }else{ - sqlite3VdbeAddOp4(v, OP_SqlExec, 0, 0, 0, zSubSql, P4_DYNAMIC); + sqlite3VdbeAddOp4(v, OP_SqlExec, nLimit ? 0x02 : 00, nLimit, 0, + zSubSql, P4_DYNAMIC); } } } sqlite3VdbeAddOp0(v, OP_Expire); + + /* In a schema with a large number of tables and indexes, scale back + ** the analysis_limit to avoid excess run-time in the worst case. + */ + if( !db->mallocFailed && nLimit>0 && nBtree>100 ){ + int iAddr, iEnd; + VdbeOp *aOp; + nLimit = 100*nLimit/nBtree; + if( nLimit<100 ) nLimit = 100; + aOp = sqlite3VdbeGetOp(v, 0); + iEnd = sqlite3VdbeCurrentAddr(v); + for(iAddr=0; iAddr<iEnd; iAddr++){ + if( aOp[iAddr].opcode==OP_SqlExec ) aOp[iAddr].p2 = nLimit; + } + } break; } |