diff options
-rw-r--r-- | main.mk | 8 | ||||
-rw-r--r-- | manifest | 51 | ||||
-rw-r--r-- | manifest.uuid | 2 | ||||
-rw-r--r-- | src/log.c | 1659 | ||||
-rw-r--r-- | src/log.h | 63 | ||||
-rw-r--r-- | src/os_unix.c | 6 | ||||
-rw-r--r-- | src/pager.c | 745 | ||||
-rw-r--r-- | src/pager.h | 2 | ||||
-rw-r--r-- | src/pragma.c | 8 | ||||
-rw-r--r-- | src/vdbe.c | 11 | ||||
-rw-r--r-- | test/lock2.test | 63 | ||||
-rw-r--r-- | test/lock_common.tcl | 77 | ||||
-rw-r--r-- | test/quick.test | 4 | ||||
-rw-r--r-- | test/tester.tcl | 1 | ||||
-rw-r--r-- | test/thread_common.tcl | 2 | ||||
-rw-r--r-- | test/wal.test | 700 | ||||
-rw-r--r-- | test/walcrash.test | 251 | ||||
-rw-r--r-- | test/walslow.test | 71 | ||||
-rw-r--r-- | test/walthread.test | 198 | ||||
-rw-r--r-- | tool/mksqlite3c.tcl | 2 |
20 files changed, 3559 insertions, 365 deletions
@@ -56,7 +56,7 @@ LIBOBJ+= alter.o analyze.o attach.o auth.o \ fts3.o fts3_expr.o fts3_hash.o fts3_icu.o fts3_porter.o \ fts3_snippet.o fts3_tokenizer.o fts3_tokenizer1.o fts3_write.o \ func.o global.o hash.o \ - icu.o insert.o journal.o legacy.o loadext.o \ + icu.o insert.o journal.o legacy.o loadext.o log.o \ main.o malloc.o mem0.o mem1.o mem2.o mem3.o mem5.o \ memjournal.o \ mutex.o mutex_noop.o mutex_os2.o mutex_unix.o mutex_w32.o \ @@ -101,6 +101,8 @@ SRC = \ $(TOP)/src/journal.c \ $(TOP)/src/legacy.c \ $(TOP)/src/loadext.c \ + $(TOP)/src/log.c \ + $(TOP)/src/log.h \ $(TOP)/src/main.c \ $(TOP)/src/malloc.c \ $(TOP)/src/mem0.c \ @@ -255,8 +257,8 @@ TESTSRC = \ TESTSRC2 = \ $(TOP)/src/attach.c $(TOP)/src/backup.c $(TOP)/src/btree.c \ $(TOP)/src/build.c $(TOP)/src/date.c \ - $(TOP)/src/expr.c $(TOP)/src/func.c $(TOP)/src/insert.c $(TOP)/src/mem5.c \ - $(TOP)/src/os.c \ + $(TOP)/src/expr.c $(TOP)/src/func.c $(TOP)/src/insert.c $(TOP)/src/log.c \ + $(TOP)/src/mem5.c $(TOP)/src/os.c \ $(TOP)/src/os_os2.c $(TOP)/src/os_unix.c $(TOP)/src/os_win.c \ $(TOP)/src/pager.c $(TOP)/src/pragma.c $(TOP)/src/prepare.c \ $(TOP)/src/printf.c $(TOP)/src/random.c $(TOP)/src/pcache.c \ @@ -1,8 +1,5 @@ ------BEGIN PGP SIGNED MESSAGE----- -Hash: SHA1 - -C Change\ssqlite3_step()\sso\sthat\sit\sautomatically\scalls\ssqlite3_reset()\sinstead\nof\sreturning\sSQLITE_MISUSE\swhen\sinvoked\son\sa\sprepared\sstatement\sthat\npreviously\sreturned\sany\svalue\sother\sthan\sSQLITE_ROW. -D 2010-04-17T12:53:20 +C Merge\swith\strunk\scommit\s[3e646e3f4c]. +D 2010-04-17T15:45:35 F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0 F Makefile.in 4f2f967b7e58a35bb74fb7ec8ae90e0f4ca7868b F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654 @@ -92,7 +89,7 @@ F ext/rtree/tkt3363.test 2bf324f7908084a5f463de3109db9c6e607feb1b F ext/rtree/viewrtree.tcl eea6224b3553599ae665b239bd827e182b466024 F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 -F main.mk d286b99eb87db41cfc5394e346604ef49509867d +F main.mk f12991ace528dd01d018420988ff053350ae81f8 F mkdll.sh 7d09b23c05d56532e9d44a50868eb4b12ff4f74a F mkextu.sh 416f9b7089d80e5590a29692c9d9280a10dbad9f F mkextw.sh 4123480947681d9b434a5e7b1ee08135abe409ac @@ -134,6 +131,8 @@ F src/journal.c b0ea6b70b532961118ab70301c00a33089f9315c F src/legacy.c a199d7683d60cef73089e892409113e69c23a99f F src/lempar.c 7f026423f4d71d989e719a743f98a1cbd4e6d99e F src/loadext.c 1c7a61ce1281041f437333f366a96aa0d29bb581 +F src/log.c a72baea84cecef9a4e45308b1504e6fe69c8284e +F src/log.h a2654af46ce7b5732f4d5a731abfdd180f0a06d9 F src/main.c c0e7192bad5b90544508b241eb2487ac661de890 F src/malloc.c a08f16d134f0bfab6b20c3cd142ebf3e58235a6a F src/mem0.c 6a55ebe57c46ca1a7d98da93aaa07f99f1059645 @@ -153,15 +152,15 @@ F src/os.c 8bc63cf91e9802e2b807198e54e50227fa889306 F src/os.h 534b082c3cb349ad05fa6fa0b06087e022af282c F src/os_common.h 240c88b163b02c21a9f21f87d49678a0aa21ff30 F src/os_os2.c 75a8c7b9a00a2cf1a65f9fa4afbc27d46634bb2f -F src/os_unix.c 148d2f625db3727250c0b880481ae7630b6d0eb0 +F src/os_unix.c 5bf0015cebe2f21635da2af983c348eb88b3b4c1 F src/os_win.c 1c7453c2df4dab26d90ff6f91272aea18bcf7053 -F src/pager.c da5ed17bb729c27a16c45fe38e9531c240a1c6a4 -F src/pager.h ef8a2cf10084f60ab45ee2dfded8bf8b0c655ddf +F src/pager.c 751ada65b9a4aa0b31c36ffa3f6548200a55ca16 +F src/pager.h ce5d076f3860a5f2d7460c582cd68383343b33cf F src/parse.y ace5c7a125d9f2a410e431ee3209034105045f7e F src/pcache.c ace8f6a5ecd4711cc66a1b23053be7109bd437cf F src/pcache.h c683390d50f856d4cd8e24342ae62027d1bb6050 F src/pcache1.c 6dc1871ce8ead9187161c370a58cd06c84221f76 -F src/pragma.c e166ea41544f8e57a08db86dbe87212b7d378fe8 +F src/pragma.c f12cb58a8aa0d80cfed282ef87a285ed71beb793 F src/prepare.c fd1398cb1da54385ba5bd68d93928f10d10a1d9c F src/printf.c 5f5b65a83e63f2096a541a340722a509fa0240a7 F src/random.c cd4a67b3953b88019f8cd4ccd81394a8ddfaba50 @@ -215,7 +214,7 @@ F src/update.c c0dc6b75ad28b76b619042d934f337b02acee208 F src/utf.c 1baeeac91707a4df97ccc6141ec0f808278af685 F src/util.c 32aebf04c10e51ad3977a928b7416bed671b620b F src/vacuum.c b1d542c8919d4d11119f78069e1906a1ad07e0ee -F src/vdbe.c 2abd931ea2aec3eacc6426677f40cc5a1071d34e +F src/vdbe.c 2e2aaa765de667dd15e0462cf853efd1b2f97998 F src/vdbe.h 471f6a3dcec4817ca33596fe7f6654d56c0e75f3 F src/vdbeInt.h 19ebc8c2a2e938340051ee65af3f377fb99102d1 F src/vdbeapi.c 11bcc381e81e797fcf3e81fa6a14ec16a04801cc @@ -474,12 +473,13 @@ F test/limit.test 2db7b3b34fb925b8e847d583d2eb67531d0ce67e F test/loadext.test 0393ce12d9616aa87597dd0ec88181de181f6db0 F test/loadext2.test 0bcaeb4d81cd5b6e883fdfea3c1bdbe1f173cbca F test/lock.test 842e80b6be816c79525a20b098cca066989feed7 -F test/lock2.test 7bb642551df59b3de135291d62ee82409420181e +F test/lock2.test ec208a5f394d92affaf599fde3f374361657d0ff F test/lock3.test f271375930711ae044080f4fe6d6eda930870d00 F test/lock4.test f4f36271aa5ae1da449646bf43c7341f6b2b4c4e F test/lock5.test 6b1f78f09ad1522843dad571b76b321e6f439bf7 F test/lock6.test 862aa71e97b288d6b3f92ba3313f51bd0b003776 F test/lock7.test 64006c84c1c616657e237c7ad6532b765611cf64 +F test/lock_common.tcl 58aa21f38c28223cc1107b5b2c9d7d61aa428e79 F test/lookaside.test 1dd350dc6dff015c47c07fcc5a727a72fc5bae02 F test/main.test 2be2352ac77ac5b238c6337a5469aeeef57677e6 F test/make-where7.tcl 05c16b5d4f5d6512881dfec560cb793915932ef9 @@ -538,7 +538,7 @@ F test/pragma2.test 5364893491b9231dd170e3459bfc2e2342658b47 F test/printf.test 05970cde31b1a9f54bd75af60597be75a5c54fea F test/progress.test 5b075c3c790c7b2a61419bc199db87aaf48b8301 F test/ptrchng.test ef1aa72d6cf35a2bbd0869a649b744e9d84977fc -F test/quick.test d6591e74f3ac19da7fd076845f06dca48fd43cff +F test/quick.test 6f202befe1cfae0b63df96b3120a8022ab11f574 F test/quote.test 215897dbe8de1a6f701265836d6601cc6ed103e6 F test/randexpr1.tcl 40dec52119ed3a2b8b2a773bce24b63a3a746459 F test/randexpr1.test 1084050991e9ba22c1c10edd8d84673b501cc25a @@ -603,7 +603,7 @@ F test/tclsqlite.test bf4227eb236a4c097aa7974a2bf7d3225acf34be F test/tempdb.test 1bf52da28a9c24e29717362a87722dff08feb72b F test/temptable.test f42121a0d29a62f00f93274464164177ab1cc24a F test/temptrigger.test b0273db072ce5f37cf19140ceb1f0d524bbe9f05 -F test/tester.tcl e1f581c7a2648a0aaa51135c4d2e7be68f4b9292 +F test/tester.tcl 49d76f12940160d623da104f995530fc6ee8f46f F test/thread001.test a3e6a7254d1cb057836cb3145b60c10bf5b7e60f F test/thread002.test afd20095e6e845b405df4f2c920cb93301ca69db F test/thread003.test b824d4f52b870ae39fc5bae4d8070eca73085dca @@ -611,7 +611,7 @@ F test/thread004.test f51dfc3936184aaf73ee85f315224baad272a87f F test/thread005.test bf5c374ca65dd89fd56c8fe511ccfb46875bda5e F test/thread1.test 862dd006d189e8b0946935db17399dcac2f8ef91 F test/thread2.test 6e0997f7beabb6a7e471bd18740ed04805c785f4 -F test/thread_common.tcl b65e6b1d1d90dc885e10ad080896c6c56eef0819 +F test/thread_common.tcl 0b07423d29ddb73d4bacbac69268c8d37b6cc5d2 F test/threadtest1.c 6029d9c5567db28e6dc908a0c63099c3ba6c383b F test/threadtest2.c ace893054fa134af3fc8d6e7cfecddb8e3acefb9 F test/tkt-02a8e81d44.test 58494de77be2cf249228ada3f313fa399821c6ab @@ -758,6 +758,10 @@ F test/vtabE.test 7c4693638d7797ce2eda17af74292b97e705cc61 F test/vtab_alter.test 9e374885248f69e251bdaacf480b04a197f125e5 F test/vtab_err.test 0d4d8eb4def1d053ac7c5050df3024fd47a3fbd8 F test/vtab_shared.test 0eff9ce4f19facbe0a3e693f6c14b80711a4222d +F test/wal.test a56ff378f58b145fd3bf38c277fbfe792cd47bdd +F test/walcrash.test 45cfbab30bb7cbe0b2e9d5cabe90dbcad10cb89b +F test/walslow.test 38076d5fad49e3678027be0f8110e6a32d531dc2 +F test/walthread.test 27e44ee6fd02f1f494a24f999c97086af3ab739d F test/where.test de337a3fe0a459ec7c93db16a519657a90552330 F test/where2.test 45eacc126aabb37959a387aa83e59ce1f1f03820 F test/where3.test aa44a9b29e8c9f3d7bb94a3bb3a95b31627d520d @@ -781,7 +785,7 @@ F tool/lempar.c 01ca97f87610d1dac6d8cd96ab109ab1130e76dc F tool/mkkeywordhash.c d2e6b4a5965e23afb80fbe74bb54648cd371f309 F tool/mkopts.tcl 66ac10d240cc6e86abd37dc908d50382f84ff46e F tool/mkspeedsql.tcl a1a334d288f7adfe6e996f2e712becf076745c97 -F tool/mksqlite3c.tcl 4c6924c7e877defa8f9a12ef1e6867de614acf3f +F tool/mksqlite3c.tcl 25ec827588893857eba2d24a645ace1bb7cdab73 F tool/mksqlite3h.tcl eb100dce83f24b501b325b340f8b5eb8e5106b3b F tool/mksqlite3internalh.tcl 7b43894e21bcb1bb39e11547ce7e38a063357e87 F tool/omittest.tcl 27d6f6e3b1e95aeb26a1c140e6eb57771c6d794a @@ -801,14 +805,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P f96782b389b5b97b488dc5814f7082e0393f64cd -R b3683c8d5f87dead098717870c446ce4 -U drh -Z 68bc4fd82825f4b7f1278d3f78ee95b8 ------BEGIN PGP SIGNATURE----- -Version: GnuPG v1.4.6 (GNU/Linux) - -iD8DBQFLya9GoxKgR168RlERAvQaAJwLUmtTGSRsZdMt+rOX4V9Acu7enQCdFgG+ -yMxp/Ep2vaOwFANf9gUAX1Q= -=yLEB ------END PGP SIGNATURE----- +P 9bc9b6847303d0324543a9ded8dd0473490122d8 3e646e3f4cd0ca288e444561e951cecfdaee2ab5 +R 9ec1fc417b85c6217c6e7a04071a1912 +U dan +Z 5c9ba544c6cd36a35ee164445a4a1f25 diff --git a/manifest.uuid b/manifest.uuid index 9d8bb1e64..d19551428 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -3e646e3f4cd0ca288e444561e951cecfdaee2ab5
\ No newline at end of file +43463970f5885fb116588695146f2a56cb22804a
\ No newline at end of file diff --git a/src/log.c b/src/log.c new file mode 100644 index 000000000..4253d659a --- /dev/null +++ b/src/log.c @@ -0,0 +1,1659 @@ + +/* +** This file contains the implementation of a log file used in +** "journal_mode=wal" mode. +*/ + +/* +** LOG FILE FORMAT +** +** A log file consists of a header followed by zero or more log frames. +** The log header is 12 bytes in size and consists of the following three +** big-endian 32-bit unsigned integer values: +** +** 0: Database page size, +** 4: Randomly selected salt value 1, +** 8: Randomly selected salt value 2. +** +** Immediately following the log header are zero or more log frames. Each +** frame itself consists of a 16-byte header followed by a <page-size> bytes +** of page data. The header is broken into 4 big-endian 32-bit unsigned +** integer values, as follows: +** +** 0: Page number. +** 4: For commit records, the size of the database image in pages +** after the commit. For all other records, zero. +** 8: Checksum value 1. +** 12: Checksum value 2. +*/ + +/* +** LOG SUMMARY FORMAT +** +** TODO. +*/ + +#include "log.h" + +#include <unistd.h> +#include <fcntl.h> +#include <sys/mman.h> + +typedef struct LogSummaryHdr LogSummaryHdr; +typedef struct LogSummary LogSummary; +typedef struct LogIterator LogIterator; +typedef struct LogLock LogLock; + + +/* +** The following structure may be used to store the same data that +** is stored in the log-summary header. +** +** Member variables iCheck1 and iCheck2 contain the checksum for the +** last frame written to the log, or 2 and 3 respectively if the log +** is currently empty. +*/ +struct LogSummaryHdr { + u32 iChange; /* Counter incremented each transaction */ + u32 pgsz; /* Database page size in bytes */ + u32 iLastPg; /* Address of last valid frame in log */ + u32 nPage; /* Size of database in pages */ + u32 iCheck1; /* Checkpoint value 1 */ + u32 iCheck2; /* Checkpoint value 2 */ +}; + +/* Size of serialized LogSummaryHdr object. */ +#define LOGSUMMARY_HDR_NFIELD (sizeof(LogSummaryHdr) / sizeof(u32)) + +#define LOGSUMMARY_FRAME_OFFSET \ + (LOGSUMMARY_HDR_NFIELD + LOG_CKSM_BYTES/sizeof(u32)) + + + +/* Size of frame header */ +#define LOG_FRAME_HDRSIZE 16 +#define LOG_HDRSIZE 12 + +/* +** Return the offset of frame iFrame in the log file, assuming a database +** page size of pgsz bytes. The offset returned is to the start of the +** log frame-header. +*/ +#define logFrameOffset(iFrame, pgsz) ( \ + LOG_HDRSIZE + ((iFrame)-1)*((pgsz)+LOG_FRAME_HDRSIZE) \ +) + +/* +** There is one instance of this structure for each log-summary object +** that this process has a connection to. They are stored in a linked +** list starting at pLogSummary (global variable). +** +** TODO: LogSummary.fd is a unix file descriptor. Unix APIs are used +** directly in this implementation because the VFS does not support +** the required blocking file-locks. +*/ +struct LogSummary { + sqlite3_mutex *mutex; /* Mutex used to protect this object */ + int nRef; /* Number of pointers to this structure */ + int fd; /* File descriptor open on log-summary */ + char *zPath; /* Path to associated WAL file */ + LogLock *pLock; /* Linked list of locks on this object */ + LogSummary *pNext; /* Next in global list */ + int nData; /* Size of aData allocation/mapping */ + u32 *aData; /* File body */ +}; + + +/* +** The four lockable regions associated with each log-summary. A connection +** may take either a SHARED or EXCLUSIVE lock on each. An ORed combination +** of the following bitmasks is passed as the second argument to the +** logLockRegion() function. +*/ +#define LOG_REGION_A 0x01 +#define LOG_REGION_B 0x02 +#define LOG_REGION_C 0x04 +#define LOG_REGION_D 0x08 + +#define LOG_LOCK_MUTEX 12 +#define LOG_LOCK_DMH 13 +#define LOG_LOCK_REGION 14 + +/* +** A single instance of this structure is allocated as part of each +** connection to a database log. All structures associated with the +** same log file are linked together into a list using LogLock.pNext +** starting at LogSummary.pLock. +** +** The mLock field of the structure describes the locks (if any) +** currently held by the connection. If a SHARED lock is held on +** any of the four locking regions, then the associated LOG_REGION_X +** bit (see above) is set. If an EXCLUSIVE lock is held on the region, +** then the (LOG_REGION_X << 8) bit is set. +*/ +struct LogLock { + LogLock *pNext; /* Next lock on the same log */ + u32 mLock; /* Mask of locks */ +}; + +struct Log { + LogSummary *pSummary; /* Log file summary data */ + sqlite3_vfs *pVfs; /* The VFS used to create pFd */ + sqlite3_file *pFd; /* File handle for log file */ + int sync_flags; /* Flags to use with OsSync() */ + int isLocked; /* Non-zero if a snapshot is held open */ + int isWriteLocked; /* True if this is the writer connection */ + LogSummaryHdr hdr; /* Log summary header for current snapshot */ + LogLock lock; /* Lock held by this connection (if any) */ +}; + + +/* +** This structure is used to implement an iterator that iterates through +** all frames in the log in database page order. Where two or more frames +** correspond to the same database page, the iterator visits only the +** frame most recently written to the log. +** +** The internals of this structure are only accessed by: +** +** logIteratorInit() - Create a new iterator, +** logIteratorNext() - Step an iterator, +** logIteratorFree() - Free an iterator. +** +** This functionality is used by the checkpoint code (see logCheckpoint()). +*/ +struct LogIterator { + int nSegment; /* Size of LogIterator.aSegment[] array */ + int nFinal; /* Elements in segment nSegment-1 */ + struct LogSegment { + int iNext; /* Next aIndex index */ + u8 *aIndex; /* Pointer to index array */ + u32 *aDbPage; /* Pointer to db page array */ + } aSegment[1]; +}; + + + +/* +** List of all LogSummary objects created by this process. Protected by +** static mutex LOG_SUMMARY_MUTEX. TODO: Should have a dedicated mutex +** here instead of borrowing the LRU mutex. +*/ +#define LOG_SUMMARY_MUTEX SQLITE_MUTEX_STATIC_LRU +static LogSummary *pLogSummary = 0; + +/* +** Generate an 8 byte checksum based on the data in array aByte[] and the +** initial values of aCksum[0] and aCksum[1]. The checksum is written into +** aCksum[] before returning. +*/ +#define LOG_CKSM_BYTES 8 +static void logChecksumBytes(u8 *aByte, int nByte, u32 *aCksum){ + u64 sum1 = aCksum[0]; + u64 sum2 = aCksum[1]; + u32 *a32 = (u32 *)aByte; + u32 *aEnd = (u32 *)&aByte[nByte]; + + assert( LOG_CKSM_BYTES==2*sizeof(u32) ); + assert( (nByte&0x00000003)==0 ); + + do { + sum1 += (*a32++); + sum2 += sum1; + } while( a32<aEnd ); + + aCksum[0] = sum1 + (sum1>>24); + aCksum[1] = sum2 + (sum2>>24); +} + +/* +** Argument zPath must be a nul-terminated string containing a path-name. +** This function modifies the string in-place by removing any "./" or "../" +** elements in the path. For example, the following input: +** +** "/home/user/plans/good/../evil/./world_domination.txt" +** +** is overwritten with the 'normalized' version: +** +** "/home/user/plans/evil/world_domination.txt" +*/ +static void logNormalizePath(char *zPath){ + int i, j; + char *z = zPath; + int n = strlen(z); + + while( n>1 && z[n-1]=='/' ){ n--; } + for(i=j=0; i<n; i++){ + if( z[i]=='/' ){ + if( z[i+1]=='/' ) continue; + if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){ + i += 1; + continue; + } + if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){ + while( j>0 && z[j-1]!='/' ){ j--; } + if( j>0 ){ j--; } + i += 2; + continue; + } + } + z[j++] = z[i]; + } + z[j] = 0; +} + +/* +** Lock the summary file pSummary->fd. +*/ +static int logSummaryLock(LogSummary *pSummary){ + int rc; + struct flock f; + memset(&f, 0, sizeof(f)); + f.l_type = F_WRLCK; + f.l_whence = SEEK_SET; + f.l_start = 0; + f.l_len = 1; + rc = fcntl(pSummary->fd, F_SETLKW, &f); + if( rc!=0 ){ + return SQLITE_IOERR; + } + return SQLITE_OK; +} + +/* +** Unlock the summary file pSummary->fd. +*/ +static int logSummaryUnlock(LogSummary *pSummary){ + int rc; + struct flock f; + memset(&f, 0, sizeof(f)); + f.l_type = F_UNLCK; + f.l_whence = SEEK_SET; + f.l_start = 0; + f.l_len = 1; + rc = fcntl(pSummary->fd, F_SETLK, &f); + if( rc!=0 ){ + return SQLITE_IOERR; + } + return SQLITE_OK; +} + +/* +** Memory map the first nByte bytes of the summary file opened with +** pSummary->fd at pSummary->aData. If the summary file is smaller than +** nByte bytes in size when this function is called, ftruncate() is +** used to expand it before it is mapped. +** +** It is assumed that an exclusive lock is held on the summary file +** by the caller (to protect the ftruncate()). +*/ +static int logSummaryMap(LogSummary *pSummary, int nByte){ + struct stat sStat; + int rc; + int fd = pSummary->fd; + void *pMap; + + assert( pSummary->aData==0 ); + + /* If the file is less than nByte bytes in size, cause it to grow. */ + rc = fstat(fd, &sStat); + if( rc!=0 ) return SQLITE_IOERR; + if( sStat.st_size<nByte ){ + rc = ftruncate(fd, nByte); + if( rc!=0 ) return SQLITE_IOERR; + } + + /* Map the file. */ + pMap = mmap(0, nByte, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + if( pMap==MAP_FAILED ){ + return SQLITE_IOERR; + } + pSummary->aData = (u32 *)pMap; + pSummary->nData = nByte; + + return SQLITE_OK; +} + +/* +** Unmap the log-summary mapping and close the file-descriptor. If +** the isTruncate argument is non-zero, truncate the log-summary file +** region to zero bytes. +** +** Regardless of the value of isTruncate, close the file-descriptor +** opened on the log-summary file. +*/ +static int logSummaryUnmap(LogSummary *pSummary, int isUnlink){ + int rc = SQLITE_OK; + if( pSummary->aData ){ + assert( pSummary->fd>0 ); + munmap(pSummary->aData, pSummary->nData); + pSummary->aData = 0; + if( isUnlink ){ + char *zFile = sqlite3_mprintf("%s-summary", pSummary->zPath); + if( !zFile ){ + rc = SQLITE_NOMEM; + } + unlink(zFile); + sqlite3_free(zFile); + } + } + if( pSummary->fd>0 ){ + close(pSummary->fd); + pSummary->fd = -1; + } + return rc; +} + +static void logSummaryWriteHdr(LogSummary *pSummary, LogSummaryHdr *pHdr){ + u32 *aData = pSummary->aData; + memcpy(aData, pHdr, sizeof(LogSummaryHdr)); + aData[LOGSUMMARY_HDR_NFIELD] = 1; + aData[LOGSUMMARY_HDR_NFIELD+1] = 1; + logChecksumBytes( + (u8 *)aData, sizeof(LogSummaryHdr), &aData[LOGSUMMARY_HDR_NFIELD] + ); +} + +/* +** This function encodes a single frame header and writes it to a buffer +** supplied by the caller. A log frame-header is made up of a series of +** 4-byte big-endian integers, as follows: +** +** 0: Database page size in bytes. +** 4: Page number. +** 8: New database size (for commit frames, otherwise zero). +** 12: Frame checksum 1. +** 16: Frame checksum 2. +*/ +static void logEncodeFrame( + u32 *aCksum, /* IN/OUT: Checksum values */ + u32 iPage, /* Database page number for frame */ + u32 nTruncate, /* New db size (or 0 for non-commit frames) */ + int nData, /* Database page size (size of aData[]) */ + u8 *aData, /* Pointer to page data (for checksum) */ + u8 *aFrame /* OUT: Write encoded frame here */ +){ + assert( LOG_FRAME_HDRSIZE==16 ); + + sqlite3Put4byte(&aFrame[0], iPage); + sqlite3Put4byte(&aFrame[4], nTruncate); + + logChecksumBytes(aFrame, 8, aCksum); + logChecksumBytes(aData, nData, aCksum); + + sqlite3Put4byte(&aFrame[8], aCksum[0]); + sqlite3Put4byte(&aFrame[12], aCksum[1]); +} + +/* +** Return 1 and populate *piPage, *pnTruncate and aCksum if the +** frame checksum looks Ok. Otherwise return 0. +*/ +static int logDecodeFrame( + u32 *aCksum, /* IN/OUT: Checksum values */ + u32 *piPage, /* OUT: Database page number for frame */ + u32 *pnTruncate, /* OUT: New db size (or 0 if not commit) */ + int nData, /* Database page size (size of aData[]) */ + u8 *aData, /* Pointer to page data (for checksum) */ + u8 *aFrame /* Frame data */ +){ + assert( LOG_FRAME_HDRSIZE==16 ); + + logChecksumBytes(aFrame, 8, aCksum); + logChecksumBytes(aData, nData, aCksum); + + if( aCksum[0]!=sqlite3Get4byte(&aFrame[8]) + || aCksum[1]!=sqlite3Get4byte(&aFrame[12]) + ){ + /* Checksum failed. */ + return 0; + } + + *piPage = sqlite3Get4byte(&aFrame[0]); + *pnTruncate = sqlite3Get4byte(&aFrame[4]); + return 1; +} + +static void logMergesort8( + Pgno *aContent, /* Pages in log */ + u8 *aBuffer, /* Buffer of at least *pnList items to use */ + u8 *aList, /* IN/OUT: List to sort */ + int *pnList /* IN/OUT: Number of elements in aList[] */ +){ + int nList = *pnList; + if( nList>1 ){ + int nLeft = nList / 2; /* Elements in left list */ + int nRight = nList - nLeft; /* Elements in right list */ + u8 *aLeft = aList; /* Left list */ + u8 *aRight = &aList[nLeft]; /* Right list */ + int iLeft = 0; /* Current index in aLeft */ + int iRight = 0; /* Current index in aright */ + int iOut = 0; /* Current index in output buffer */ + + /* TODO: Change to non-recursive version. */ + logMergesort8(aContent, aBuffer, aLeft, &nLeft); + logMergesort8(aContent, aBuffer, aRight, &nRight); + + while( iRight<nRight || iLeft<nLeft ){ + u8 logpage; + Pgno dbpage; + + if( (iLeft<nLeft) + && (iRight>=nRight || aContent[aLeft[iLeft]]<aContent[aRight[iRight]]) + ){ + logpage = aLeft[iLeft++]; + }else{ + logpage = aRight[iRight++]; + } + dbpage = aContent[logpage]; + + aBuffer[iOut++] = logpage; + if( iLeft<nLeft && aContent[aLeft[iLeft]]==dbpage ) iLeft++; + + assert( iLeft>=nLeft || aContent[aLeft[iLeft]]>dbpage ); + assert( iRight>=nRight || aContent[aRight[iRight]]>dbpage ); + } + memcpy(aList, aBuffer, sizeof(aList[0])*iOut); + *pnList = iOut; + } + +#ifdef SQLITE_DEBUG + { + int i; + for(i=1; i<*pnList; i++){ + assert( aContent[aList[i]] > aContent[aList[i-1]] ); + } + } +#endif +} + + +/* +** Return the index in the LogSummary.aData array that corresponds to +** frame iFrame. The log-summary file consists of a header, followed by +** alternating "map" and "index" blocks. +*/ +static int logSummaryEntry(u32 iFrame){ + return ((((iFrame-1)>>8)<<6) + iFrame-1 + 2 + LOGSUMMARY_HDR_NFIELD); +} + + +/* +** Set an entry in the log-summary map to map log frame iFrame to db +** page iPage. Values are always appended to the log-summary (i.e. the +** value of iFrame is always exactly one more than the value passed to +** the previous call), but that restriction is not enforced or asserted +** here. +*/ +static void logSummaryAppend(LogSummary *pSummary, u32 iFrame, u32 iPage){ + u32 iSlot = logSummaryEntry(iFrame); + + /* Set the log-summary entry itself */ + pSummary->aData[iSlot] = iPage; + + /* If the frame number is a multiple of 256 (frames are numbered starting + ** at 1), build an index of the most recently added 256 frames. + */ + if( (iFrame&0x000000FF)==0 ){ + int i; /* Iterator used while initializing aIndex */ + u32 *aFrame; /* Pointer to array of 256 frames */ + int nIndex; /* Number of entries in index */ + u8 *aIndex; /* 256 bytes to build index in */ + u8 *aTmp; /* Scratch space to use while sorting */ + + aFrame = &pSummary->aData[iSlot-255]; + aIndex = (u8 *)&pSummary->aData[iSlot+1]; + aTmp = &aIndex[256]; + + nIndex = 256; + for(i=0; i<256; i++) aIndex[i] = (u8)i; + logMergesort8(aFrame, aTmp, aIndex, &nIndex); + memset(&aIndex[nIndex], aIndex[nIndex-1], 256-nIndex); + } +} + + +/* +** Recover the log-summary by reading the log file. The caller must hold +** an exclusive lock on the log-summary file. +*/ +static int logSummaryRecover(LogSummary *pSummary, sqlite3_file *pFd){ + int rc; /* Return Code */ + i64 nSize; /* Size of log file */ + LogSummaryHdr hdr; /* Recovered log-summary header */ + + memset(&hdr, 0, sizeof(hdr)); + + rc = sqlite3OsFileSize(pFd, &nSize); + if( rc!=SQLITE_OK ){ + return rc; + } + + if( nSize>LOG_FRAME_HDRSIZE ){ + u8 aBuf[LOG_FRAME_HDRSIZE]; /* Buffer to load first frame header into */ + u8 *aFrame = 0; /* Malloc'd buffer to load entire frame */ + int nFrame; /* Number of bytes at aFrame */ + u8 *aData; /* Pointer to data part of aFrame buffer */ + int iFrame; /* Index of last frame read */ + i64 iOffset; /* Next offset to read from log file */ + int nPgsz; /* Page size according to the log */ + u32 aCksum[2]; /* Running checksum */ + + /* Read in the first frame header in the file (to determine the + ** database page size). + */ + rc = sqlite3OsRead(pFd, aBuf, LOG_HDRSIZE, 0); + if( rc!=SQLITE_OK ){ + return rc; + } + + /* If the database page size is not a power of two, or is greater than + ** SQLITE_MAX_PAGE_SIZE, conclude that the log file contains no valid data. + */ + nPgsz = sqlite3Get4byte(&aBuf[0]); + if( nPgsz&(nPgsz-1) || nPgsz>SQLITE_MAX_PAGE_SIZE ){ + goto finished; + } + aCksum[0] = sqlite3Get4byte(&aBuf[4]); + aCksum[1] = sqlite3Get4byte(&aBuf[8]); + + /* Malloc a buffer to read frames into. */ + nFrame = nPgsz + LOG_FRAME_HDRSIZE; + aFrame = (u8 *)sqlite3_malloc(nFrame); + if( !aFrame ){ + return SQLITE_NOMEM; + } + aData = &aFrame[LOG_FRAME_HDRSIZE]; + + /* Read all frames from the log file. */ + iFrame = 0; + for(iOffset=LOG_HDRSIZE; (iOffset+nFrame)<=nSize; iOffset+=nFrame){ + u32 pgno; /* Database page number for frame */ + u32 nTruncate; /* dbsize field from frame header */ + int isValid; /* True if this frame is valid */ + + /* Read and decode the next log frame. */ + rc = sqlite3OsRead(pFd, aFrame, nFrame, iOffset); + if( rc!=SQLITE_OK ) break; + isValid = logDecodeFrame(aCksum, &pgno, &nTruncate, nPgsz, aData, aFrame); + if( !isValid ) break; + logSummaryAppend(pSummary, ++iFrame, pgno); + + /* If nTruncate is non-zero, this is a commit record. */ + if( nTruncate ){ + hdr.iCheck1 = aCksum[0]; + hdr.iCheck2 = aCksum[1]; + hdr.iLastPg = iFrame; + hdr.nPage = nTruncate; + hdr.pgsz = nPgsz; + } + } + + sqlite3_free(aFrame); + }else{ + hdr.iCheck1 = 2; + hdr.iCheck2 = 3; + } + +finished: + logSummaryWriteHdr(pSummary, &hdr); + return rc; +} + +/* +** Values for the third parameter to logLockRegion(). +*/ +#define LOG_UNLOCK 0 +#define LOG_RDLOCK 1 +#define LOG_WRLOCK 2 +#define LOG_WRLOCKW 3 + +static int logLockFd(LogSummary *pSummary, int iStart, int nByte, int op){ + int aType[4] = { + F_UNLCK, /* LOG_UNLOCK */ + F_RDLCK, /* LOG_RDLOCK */ + F_WRLCK, /* LOG_WRLOCK */ + F_WRLCK /* LOG_WRLOCKW */ + }; + int aOp[4] = { + F_SETLK, /* LOG_UNLOCK */ + F_SETLK, /* LOG_RDLOCK */ + F_SETLK, /* LOG_WRLOCK */ + F_SETLKW /* LOG_WRLOCKW */ + }; + + struct flock f; /* Locking operation */ + int rc; /* Value returned by fcntl() */ + + assert( ArraySize(aType)==ArraySize(aOp) ); + assert( op>=0 && op<ArraySize(aType) ); + + memset(&f, 0, sizeof(f)); + f.l_type = aType[op]; + f.l_whence = SEEK_SET; + f.l_start = iStart; + f.l_len = nByte; + rc = fcntl(pSummary->fd, aOp[op], &f); + return (rc==0) ? SQLITE_OK : SQLITE_BUSY; +} + +static int logLockRegion(Log *pLog, u32 mRegion, int op){ + LogSummary *pSummary = pLog->pSummary; + LogLock *p; /* Used to iterate through in-process locks */ + u32 mOther; /* Locks held by other connections */ + u32 mNew; /* New mask for pLog */ + + assert( + /* Writer lock operations */ + (op==LOG_WRLOCK && mRegion==(LOG_REGION_C|LOG_REGION_D)) + || (op==LOG_UNLOCK && mRegion==(LOG_REGION_C|LOG_REGION_D)) + + /* Normal reader lock operations */ + || (op==LOG_RDLOCK && mRegion==(LOG_REGION_A|LOG_REGION_B)) + || (op==LOG_UNLOCK && mRegion==(LOG_REGION_A)) + || (op==LOG_UNLOCK && mRegion==(LOG_REGION_B)) + + /* Region D reader lock operations */ + || (op==LOG_RDLOCK && mRegion==(LOG_REGION_D)) + || (op==LOG_RDLOCK && mRegion==(LOG_REGION_A)) + || (op==LOG_UNLOCK && mRegion==(LOG_REGION_D)) + + /* Checkpointer lock operations */ + || (op==LOG_WRLOCK && mRegion==(LOG_REGION_B|LOG_REGION_C)) + || (op==LOG_WRLOCK && mRegion==(LOG_REGION_A)) + || (op==LOG_UNLOCK && mRegion==(LOG_REGION_B|LOG_REGION_C)) + || (op==LOG_UNLOCK && mRegion==(LOG_REGION_A|LOG_REGION_B|LOG_REGION_C)) + ); + + /* Assert that a connection never tries to go from an EXCLUSIVE to a + ** SHARED lock on a region. Moving from SHARED to EXCLUSIVE sometimes + ** happens though (when a region D reader upgrades to a writer). + */ + assert( op!=LOG_RDLOCK || 0==(pLog->lock.mLock & (mRegion<<8)) ); + + sqlite3_mutex_enter(pSummary->mutex); + + /* Calculate a mask of logs held by all connections in this process apart + ** from this one. The least significant byte of the mask contains a mask + ** of the SHARED logs held. The next least significant byte of the mask + ** indicates the EXCLUSIVE locks held. For example, to test if some other + ** connection is holding a SHARED lock on region A, or an EXCLUSIVE lock + ** on region C, do: + ** + ** hasSharedOnA = (mOther & (LOG_REGION_A<<0)); + ** hasExclusiveOnC = (mOther & (LOG_REGION_C<<8)); + ** + ** In all masks, if the bit in the EXCLUSIVE byte mask is set, so is the + ** corresponding bit in the SHARED mask. + */ + mOther = 0; + for(p=pSummary->pLock; p; p=p->pNext){ + assert( (p->mLock & (p->mLock<<8))==(p->mLock&0x0000FF00) ); + if( p!=&pLog->lock ){ + mOther |= p->mLock; + } + } + + /* If this call is to lock a region (not to unlock one), test if locks held + ** by any other connection in this process prevent the new locks from + ** begin granted. If so, exit the summary mutex and return SQLITE_BUSY. + */ + if( op && (mOther & (mRegion << (op==LOG_RDLOCK ? 8 : 0))) ){ + sqlite3_mutex_leave(pSummary->mutex); + return SQLITE_BUSY; + } + + /* Figure out the new log mask for this connection. */ + switch( op ){ + case LOG_UNLOCK: + mNew = (pLog->lock.mLock & ~(mRegion|(mRegion<<8))); + break; + case LOG_RDLOCK: + mNew = (pLog->lock.mLock | mRegion); + break; + default: + assert( op==LOG_WRLOCK ); + mNew = (pLog->lock.mLock | (mRegion<<8) | mRegion); + break; + } + + /* Now modify the locks held on the log-summary file descriptor. This + ** file descriptor is shared by all log connections in this process. + ** Therefore: + ** + ** + If one or more log connections in this process hold a SHARED lock + ** on a region, the file-descriptor should hold a SHARED lock on + ** the file region. + ** + ** + If a log connection in this process holds an EXCLUSIVE lock on a + ** region, the file-descriptor should also hold an EXCLUSIVE lock on + ** the region in question. + ** + ** If this is an LOG_UNLOCK operation, only regions for which no other + ** connection holds a lock should actually be unlocked. And if this + ** is a LOG_RDLOCK operation and other connections already hold all + ** the required SHARED locks, then no system call is required. + */ + if( op==LOG_UNLOCK ){ + mRegion = (mRegion & ~mOther); + } + if( (op==LOG_WRLOCK) + || (op==LOG_UNLOCK && mRegion) + || (op==LOG_RDLOCK && (mOther&mRegion)!=mRegion) + ){ + struct LockMap { + int iStart; /* Byte offset to start locking operation */ + int iLen; /* Length field for locking operation */ + } aMap[] = { + /* 0000 */ {0, 0}, /* 0001 */ {4+LOG_LOCK_REGION, 1}, + /* 0010 */ {3+LOG_LOCK_REGION, 1}, /* 0011 */ {3+LOG_LOCK_REGION, 2}, + /* 0100 */ {2+LOG_LOCK_REGION, 1}, /* 0101 */ {0, 0}, + /* 0110 */ {2+LOG_LOCK_REGION, 2}, /* 0111 */ {2+LOG_LOCK_REGION, 3}, + /* 1000 */ {1+LOG_LOCK_REGION, 1}, /* 1001 */ {0, 0}, + /* 1010 */ {0, 0}, /* 1011 */ {0, 0}, + /* 1100 */ {1+LOG_LOCK_REGION, 2}, /* 1101 */ {0, 0}, + /* 1110 */ {0, 0}, /* 1111 */ {0, 0} + }; + int rc; /* Return code of logLockFd() */ + + assert( mRegion<ArraySize(aMap) && aMap[mRegion].iStart!=0 ); + + rc = logLockFd(pSummary, aMap[mRegion].iStart, aMap[mRegion].iLen, op); + if( rc!=0 ){ + sqlite3_mutex_leave(pSummary->mutex); + return rc; + } + } + + pLog->lock.mLock = mNew; + sqlite3_mutex_leave(pSummary->mutex); + return SQLITE_OK; +} + +static int logLockDMH(LogSummary *pSummary, int eLock){ + assert( eLock==LOG_RDLOCK || eLock==LOG_WRLOCK ); + return logLockFd(pSummary, LOG_LOCK_DMH, 1, eLock); +} + +static int logLockMutex(LogSummary *pSummary, int eLock){ + assert( eLock==LOG_WRLOCKW || eLock==LOG_UNLOCK ); + logLockFd(pSummary, LOG_LOCK_MUTEX, 1, eLock); + return SQLITE_OK; +} + + + +/* +** This function intializes the connection to the log-summary identified +** by struct pSummary. +*/ +static int logSummaryInit( + LogSummary *pSummary, /* Log summary object to initialize */ + sqlite3_file *pFd /* File descriptor open on log file */ +){ + int rc; /* Return Code */ + char *zFile; /* File name for summary file */ + + assert( pSummary->fd<0 ); + assert( pSummary->aData==0 ); + assert( pSummary->nRef>0 ); + assert( pSummary->zPath ); + + /* Open a file descriptor on the summary file. */ + zFile = sqlite3_mprintf("%s-summary", pSummary->zPath); + if( !zFile ){ + return SQLITE_NOMEM; + } + pSummary->fd = open(zFile, O_RDWR|O_CREAT, S_IWUSR|S_IRUSR); + sqlite3_free(zFile); + if( pSummary->fd<0 ){ + return SQLITE_IOERR; + } + + /* Grab an exclusive lock the summary file. Then mmap() it. + ** + ** TODO: This code needs to be enhanced to support a growable mapping. + ** For now, just make the mapping very large to start with. The + ** pages should not be allocated until they are first accessed anyhow, + ** so using a large mapping consumes no more resources than a smaller + ** one would. + */ + assert( sqlite3_mutex_held(pSummary->mutex) ); + rc = logLockMutex(pSummary, LOG_WRLOCKW); + if( rc!=SQLITE_OK ) return rc; + rc = logSummaryMap(pSummary, 512*1024); + if( rc!=SQLITE_OK ) goto out; + + /* Try to obtain an EXCLUSIVE lock on the dead-mans-hand region. If this + ** is possible, the contents of the log-summary file (if any) may not + ** be trusted. Zero the log-summary header before continuing. + */ + rc = logLockDMH(pSummary, LOG_WRLOCK); + if( rc==SQLITE_OK ){ + memset(pSummary->aData, 0, (LOGSUMMARY_HDR_NFIELD+2)*sizeof(u32) ); + } + rc = logLockDMH(pSummary, LOG_RDLOCK); + if( rc!=SQLITE_OK ){ + return SQLITE_IOERR; + } + + out: + logLockMutex(pSummary, LOG_UNLOCK); + return rc; +} + +/* +** Open a connection to the log file associated with database zDb. The +** database file does not actually have to exist. zDb is used only to +** figure out the name of the log file to open. If the log file does not +** exist it is created by this call. +** +** A SHARED lock should be held on the database file when this function +** is called. The purpose of this SHARED lock is to prevent any other +** client from unlinking the log or log-summary file. If another process +** were to do this just after this client opened one of these files, the +** system would be badly broken. +*/ +int sqlite3LogOpen( + sqlite3_vfs *pVfs, /* vfs module to open log file with */ + const char *zDb, /* Name of database file */ + Log **ppLog /* OUT: Allocated Log handle */ +){ + int rc = SQLITE_OK; /* Return Code */ + Log *pRet; /* Object to allocate and return */ + LogSummary *pSummary = 0; /* Summary object */ + sqlite3_mutex *mutex = 0; /* LOG_SUMMARY_MUTEX mutex */ + int flags; /* Flags passed to OsOpen() */ + char *zWal = 0; /* Path to WAL file */ + int nWal; /* Length of zWal in bytes */ + + assert( zDb ); + + /* Allocate an instance of struct Log to return. */ + *ppLog = 0; + pRet = (Log *)sqlite3MallocZero(sizeof(Log) + pVfs->szOsFile); + if( !pRet ) goto out; + pRet->pVfs = pVfs; + pRet->pFd = (sqlite3_file *)&pRet[1]; + pRet->sync_flags = SQLITE_SYNC_NORMAL; + + /* Normalize the path name. */ + zWal = sqlite3_mprintf("%s-wal", zDb); + if( !zWal ) goto out; + logNormalizePath(zWal); + flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_MAIN_JOURNAL); + nWal = sqlite3Strlen30(zWal); + + /* Enter the mutex that protects the linked-list of LogSummary structures */ + if( sqlite3GlobalConfig.bCoreMutex ){ + mutex = sqlite3_mutex_alloc(LOG_SUMMARY_MUTEX); + } + sqlite3_mutex_enter(mutex); + + /* Search for an existing log summary object in the linked list. If one + ** cannot be found, allocate and initialize a new object. + */ + for(pSummary=pLogSummary; pSummary; pSummary=pSummary->pNext){ + int nPath = sqlite3Strlen30(pSummary->zPath); + if( nWal==nPath && 0==memcmp(pSummary->zPath, zWal, nPath) ) break; + } + if( !pSummary ){ + int nByte = sizeof(LogSummary) + nWal + 1; + pSummary = (LogSummary *)sqlite3MallocZero(nByte); + if( !pSummary ){ + rc = SQLITE_NOMEM; + goto out; + } + if( sqlite3GlobalConfig.bCoreMutex ){ + pSummary->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_RECURSIVE); + } + pSummary->zPath = (char *)&pSummary[1]; + pSummary->fd = -1; + memcpy(pSummary->zPath, zWal, nWal); + pSummary->pNext = pLogSummary; + pLogSummary = pSummary; + } + pSummary->nRef++; + pRet->pSummary = pSummary; + + /* Exit the mutex protecting the linked-list of LogSummary objects. */ + sqlite3_mutex_leave(mutex); + mutex = 0; + + /* Open file handle on the log file. */ + rc = sqlite3OsOpen(pVfs, pSummary->zPath, pRet->pFd, flags, &flags); + if( rc!=SQLITE_OK ) goto out; + + /* Object pSummary is shared between all connections to the database made + ** by this process. So at this point it may or may not be connected to + ** the log-summary. If it is not, connect it. + */ + sqlite3_mutex_enter(pSummary->mutex); + mutex = pSummary->mutex; + if( pSummary->fd<0 ){ + rc = logSummaryInit(pSummary, pRet->pFd); + } + + pRet->lock.pNext = pSummary->pLock; + pSummary->pLock = &pRet->lock; + + out: + sqlite3_mutex_leave(mutex); + sqlite3_free(zWal); + if( rc!=SQLITE_OK ){ + assert(0); + if( pRet ){ + sqlite3OsClose(pRet->pFd); + sqlite3_free(pRet); + } + assert( !pSummary || pSummary->nRef==0 ); + sqlite3_free(pSummary); + } + *ppLog = pRet; + return rc; +} + +static int logIteratorNext( + LogIterator *p, /* Iterator */ + u32 *piPage, /* OUT: Next db page to write */ + u32 *piFrame /* OUT: Log frame to read from */ +){ + u32 iMin = *piPage; + u32 iRet = 0xFFFFFFFF; + int i; + int nBlock = p->nFinal; + + for(i=p->nSegment-1; i>=0; i--){ + struct LogSegment *pSegment = &p->aSegment[i]; + while( pSegment->iNext<nBlock ){ + u32 iPg = pSegment->aDbPage[pSegment->aIndex[pSegment->iNext]]; + if( iPg>iMin ){ + if( iPg<iRet ){ + iRet = iPg; + *piFrame = i*256 + 1 + pSegment->aIndex[pSegment->iNext]; + } + break; + } + pSegment->iNext++; + } + + nBlock = 256; + } + + *piPage = iRet; + return (iRet==0xFFFFFFFF); +} + +static LogIterator *logIteratorInit(Log *pLog){ + u32 *aData = pLog->pSummary->aData; + LogIterator *p; /* Return value */ + int nSegment; /* Number of segments to merge */ + u32 iLast; /* Last frame in log */ + int nByte; /* Number of bytes to allocate */ + int i; /* Iterator variable */ + int nFinal; /* Number of unindexed entries */ + struct LogSegment *pFinal; /* Final (unindexed) segment */ + u8 *aTmp; /* Temp space used by merge-sort */ + + iLast = pLog->hdr.iLastPg; + nSegment = (iLast >> 8) + 1; + nFinal = (iLast & 0x000000FF); + + nByte = sizeof(LogIterator) + (nSegment-1)*sizeof(struct LogSegment) + 512; + p = (LogIterator *)sqlite3_malloc(nByte); + if( p ){ + memset(p, 0, nByte); + p->nSegment = nSegment; + p->nFinal = nFinal; + } + + for(i=0; i<nSegment-1; i++){ + p->aSegment[i].aDbPage = &aData[logSummaryEntry(i*256+1)]; + p->aSegment[i].aIndex = (u8 *)&aData[logSummaryEntry(i*256+1)+256]; + } + pFinal = &p->aSegment[nSegment-1]; + + pFinal->aDbPage = &aData[logSummaryEntry((nSegment-1)*256+1)]; + pFinal->aIndex = (u8 *)&pFinal[1]; + aTmp = &pFinal->aIndex[256]; + for(i=0; i<nFinal; i++){ + pFinal->aIndex[i] = i; + } + logMergesort8(pFinal->aDbPage, aTmp, pFinal->aIndex, &nFinal); + p->nFinal = nFinal; + + return p; +} + +/* +** Free a log iterator allocated by logIteratorInit(). +*/ +static void logIteratorFree(LogIterator *p){ + sqlite3_free(p); +} + +/* +** Checkpoint the contents of the log file. +*/ +static int logCheckpoint( + Log *pLog, /* Log connection */ + sqlite3_file *pFd, /* File descriptor open on db file */ + u8 *zBuf /* Temporary buffer to use */ +){ + int rc; /* Return code */ + int pgsz = pLog->hdr.pgsz; /* Database page-size */ + LogIterator *pIter = 0; /* Log iterator context */ + u32 iDbpage = 0; /* Next database page to write */ + u32 iFrame = 0; /* Log frame containing data for iDbpage */ + + if( pLog->hdr.iLastPg==0 ){ + return SQLITE_OK; + } + + /* Allocate the iterator */ + pIter = logIteratorInit(pLog); + if( !pIter ) return SQLITE_NOMEM; + + /* Sync the log file to disk */ + rc = sqlite3OsSync(pLog->pFd, pLog->sync_flags); + if( rc!=SQLITE_OK ) goto out; + + /* Iterate through the contents of the log, copying data to the db file. */ + while( 0==logIteratorNext(pIter, &iDbpage, &iFrame) ){ + rc = sqlite3OsRead(pLog->pFd, zBuf, pgsz, + logFrameOffset(iFrame, pgsz) + LOG_FRAME_HDRSIZE + ); + if( rc!=SQLITE_OK ) goto out; + rc = sqlite3OsWrite(pFd, zBuf, pgsz, (iDbpage-1)*pgsz); + if( rc!=SQLITE_OK ) goto out; + } + + /* Truncate the database file */ + rc = sqlite3OsTruncate(pFd, ((i64)pLog->hdr.nPage*(i64)pgsz)); + if( rc!=SQLITE_OK ) goto out; + + /* Sync the database file. If successful, update the log-summary. */ + rc = sqlite3OsSync(pFd, pLog->sync_flags); + if( rc!=SQLITE_OK ) goto out; + pLog->hdr.iLastPg = 0; + pLog->hdr.iCheck1 = 2; + pLog->hdr.iCheck2 = 3; + logSummaryWriteHdr(pLog->pSummary, &pLog->hdr); + + /* TODO: If a crash occurs and the current log is copied into the + ** database there is no problem. However, if a crash occurs while + ** writing the next transaction into the start of the log, such that: + ** + ** * The first transaction currently in the log is left intact, but + ** * The second (or subsequent) transaction is damaged, + ** + ** then the database could become corrupt. + ** + ** The easiest thing to do would be to write and sync a dummy header + ** into the log at this point. Unfortunately, that turns out to be + ** an unwelcome performance hit. Alternatives are... + */ +#if 0 + memset(zBuf, 0, LOG_FRAME_HDRSIZE); + rc = sqlite3OsWrite(pLog->pFd, zBuf, LOG_FRAME_HDRSIZE, 0); + if( rc!=SQLITE_OK ) goto out; + rc = sqlite3OsSync(pLog->pFd, pLog->sync_flags); +#endif + + out: + logIteratorFree(pIter); + return rc; +} + +/* +** Close a connection to a log file. +*/ +int sqlite3LogClose( + Log *pLog, /* Log to close */ + sqlite3_file *pFd, /* Database file */ + u8 *zBuf /* Buffer of at least page-size bytes */ +){ + int rc = SQLITE_OK; + if( pLog ){ + LogLock **ppL; + LogSummary *pSummary = pLog->pSummary; + sqlite3_mutex *mutex = 0; + + sqlite3_mutex_enter(pSummary->mutex); + for(ppL=&pSummary->pLock; *ppL!=&pLog->lock; ppL=&(*ppL)->pNext); + *ppL = pLog->lock.pNext; + sqlite3_mutex_leave(pSummary->mutex); + + if( sqlite3GlobalConfig.bCoreMutex ){ + mutex = sqlite3_mutex_alloc(LOG_SUMMARY_MUTEX); + } + sqlite3_mutex_enter(mutex); + + /* Decrement the reference count on the log summary. If this is the last + ** reference to the log summary object in this process, the object will + ** be freed. If this is also the last connection to the database, then + ** checkpoint the database and truncate the log and log-summary files + ** to zero bytes in size. + **/ + pSummary->nRef--; + if( pSummary->nRef==0 ){ + int rc; + LogSummary **pp; + for(pp=&pLogSummary; *pp!=pSummary; pp=&(*pp)->pNext); + *pp = (*pp)->pNext; + + sqlite3_mutex_leave(mutex); + + rc = sqlite3OsLock(pFd, SQLITE_LOCK_EXCLUSIVE); + if( rc==SQLITE_OK ){ + + /* This is the last connection to the database (including other + ** processes). Do three things: + ** + ** 1. Checkpoint the db. + ** 2. Truncate the log file. + ** 3. Unlink the log-summary file. + */ + rc = logCheckpoint(pLog, pFd, zBuf); + if( rc==SQLITE_OK ){ + rc = sqlite3OsDelete(pLog->pVfs, pSummary->zPath, 0); + } + + logSummaryUnmap(pSummary, 1); + }else{ + if( rc==SQLITE_BUSY ){ + rc = SQLITE_OK; + } + logSummaryUnmap(pSummary, 0); + } + sqlite3OsUnlock(pFd, SQLITE_LOCK_NONE); + + sqlite3_mutex_free(pSummary->mutex); + sqlite3_free(pSummary); + }else{ + sqlite3_mutex_leave(mutex); + } + + /* Close the connection to the log file and free the Log handle. */ + sqlite3OsClose(pLog->pFd); + sqlite3_free(pLog); + } + return rc; +} + +/* +** Set the flags to pass to the sqlite3OsSync() function when syncing +** the log file. +*/ +#if 0 +void sqlite3LogSetSyncflags(Log *pLog, int sync_flags){ + assert( sync_flags==SQLITE_SYNC_NORMAL || sync_flags==SQLITE_SYNC_FULL ); + pLog->sync_flags = sync_flags; +} +#endif + +/* +** Enter and leave the log-summary mutex. In this context, entering the +** log-summary mutex means: +** +** 1. Obtaining mutex pLog->pSummary->mutex, and +** 2. Taking an exclusive lock on the log-summary file. +** +** i.e. this mutex locks out other processes as well as other threads +** hosted in this address space. +*/ +static int logEnterMutex(Log *pLog){ + LogSummary *pSummary = pLog->pSummary; + int rc; + + sqlite3_mutex_enter(pSummary->mutex); + rc = logLockMutex(pSummary, LOG_WRLOCKW); + if( rc!=SQLITE_OK ){ + sqlite3_mutex_leave(pSummary->mutex); + } + return rc; +} +static void logLeaveMutex(Log *pLog){ + LogSummary *pSummary = pLog->pSummary; + logLockMutex(pSummary, LOG_UNLOCK); + sqlite3_mutex_leave(pSummary->mutex); +} + +/* +** Try to read the log-summary header. Attempt to verify the header +** checksum. If the checksum can be verified, copy the log-summary +** header into structure pLog->hdr. If the contents of pLog->hdr are +** modified by this and pChanged is not NULL, set *pChanged to 1. +** Otherwise leave *pChanged unmodified. +** +** If the checksum cannot be verified return SQLITE_ERROR. +*/ +int logSummaryTryHdr(Log *pLog, int *pChanged){ + u32 aCksum[2] = {1, 1}; + u32 aHdr[LOGSUMMARY_HDR_NFIELD+2]; + + /* First try to read the header without a lock. Verify the checksum + ** before returning. This will almost always work. + */ + memcpy(aHdr, pLog->pSummary->aData, sizeof(aHdr)); + logChecksumBytes((u8*)aHdr, sizeof(u32)*LOGSUMMARY_HDR_NFIELD, aCksum); + if( aCksum[0]!=aHdr[LOGSUMMARY_HDR_NFIELD] + || aCksum[1]!=aHdr[LOGSUMMARY_HDR_NFIELD+1] + ){ + return SQLITE_ERROR; + } + + if( memcmp(&pLog->hdr, aHdr, sizeof(LogSummaryHdr)) ){ + if( pChanged ){ + *pChanged = 1; + } + memcpy(&pLog->hdr, aHdr, sizeof(LogSummaryHdr)); + } + return SQLITE_OK; +} + +/* +** Read the log-summary header from the log-summary file into structure +** pLog->hdr. If attempting to verify the header checksum fails, try +** to recover the log before returning. +** +** If the log-summary header is successfully read, return SQLITE_OK. +** Otherwise an SQLite error code. +*/ +int logSummaryReadHdr(Log *pLog, int *pChanged){ + int rc; + + /* First try to read the header without a lock. Verify the checksum + ** before returning. This will almost always work. + */ + if( SQLITE_OK==logSummaryTryHdr(pLog, pChanged) ){ + return SQLITE_OK; + } + + /* If the first attempt to read the header failed, lock the log-summary + ** file and try again. If the header checksum verification fails this + ** time as well, run log recovery. + */ + if( SQLITE_OK==(rc = logEnterMutex(pLog)) ){ + if( SQLITE_OK!=logSummaryTryHdr(pLog, pChanged) ){ + if( pChanged ){ + *pChanged = 1; + } + rc = logSummaryRecover(pLog->pSummary, pLog->pFd); + if( rc==SQLITE_OK ){ + rc = logSummaryTryHdr(pLog, 0); + } + } + logLeaveMutex(pLog); + } + + return rc; +} + +/* +** Lock a snapshot. +** +** If this call obtains a new read-lock and the database contents have been +** modified since the most recent call to LogCloseSnapshot() on this Log +** connection, then *pChanged is set to 1 before returning. Otherwise, it +** is left unmodified. This is used by the pager layer to determine whether +** or not any cached pages may be safely reused. +*/ +int sqlite3LogOpenSnapshot(Log *pLog, int *pChanged){ + int rc = SQLITE_OK; + if( pLog->isLocked==0 ){ + int nAttempt; + + /* Obtain a snapshot-lock on the log-summary file. The procedure + ** for obtaining the snapshot log is: + ** + ** 1. Attempt a SHARED lock on regions A and B. + ** 2a. If step 1 is successful, drop the lock on region B. + ** 2b. If step 1 is unsuccessful, attempt a SHARED lock on region D. + ** 3. Repeat the above until the lock attempt in step 1 or 2b is + ** successful. + ** + ** If neither of the locks can be obtained after 5 tries, presumably + ** something is wrong (i.e. a process not following the locking protocol). + ** Return an error code in this case. + */ + rc = SQLITE_BUSY; + for(nAttempt=0; nAttempt<5 && rc==SQLITE_BUSY; nAttempt++){ + rc = logLockRegion(pLog, LOG_REGION_A|LOG_REGION_B, LOG_RDLOCK); + if( rc==SQLITE_BUSY ){ + rc = logLockRegion(pLog, LOG_REGION_D, LOG_RDLOCK); + if( rc==SQLITE_OK ) pLog->isLocked = LOG_REGION_D; + }else{ + logLockRegion(pLog, LOG_REGION_B, LOG_UNLOCK); + pLog->isLocked = LOG_REGION_A; + } + } + if( rc!=SQLITE_OK ){ + return rc; + } + + rc = logSummaryReadHdr(pLog, pChanged); + if( rc!=SQLITE_OK ){ + /* An error occured while attempting log recovery. */ + sqlite3LogCloseSnapshot(pLog); + } + } + return rc; +} + +/* +** Unlock the current snapshot. +*/ +void sqlite3LogCloseSnapshot(Log *pLog){ + if( pLog->isLocked ){ + assert( pLog->isLocked==LOG_REGION_A || pLog->isLocked==LOG_REGION_D ); + logLockRegion(pLog, pLog->isLocked, LOG_UNLOCK); + } + pLog->isLocked = 0; +} + +/* +** Read a page from the log, if it is present. +*/ +int sqlite3LogRead(Log *pLog, Pgno pgno, int *pInLog, u8 *pOut){ + u32 iRead = 0; + u32 *aData = pLog->pSummary->aData; + int iFrame = (pLog->hdr.iLastPg & 0xFFFFFF00); + + assert( pLog->isLocked ); + + /* Do a linear search of the unindexed block of page-numbers (if any) + ** at the end of the log-summary. An alternative to this would be to + ** build an index in private memory each time a read transaction is + ** opened on a new snapshot. + */ + if( pLog->hdr.iLastPg ){ + u32 *pi = &aData[logSummaryEntry(pLog->hdr.iLastPg)]; + u32 *piStop = pi - (pLog->hdr.iLastPg & 0xFF); + while( *pi!=pgno && pi!=piStop ) pi--; + if( pi!=piStop ){ + iRead = (pi-piStop) + iFrame; + } + } + assert( iRead==0 || aData[logSummaryEntry(iRead)]==pgno ); + + while( iRead==0 && iFrame>0 ){ + int iLow = 0; + int iHigh = 255; + u32 *aFrame; + u8 *aIndex; + + iFrame -= 256; + aFrame = &aData[logSummaryEntry(iFrame+1)]; + aIndex = (u8 *)&aFrame[256]; + + while( iLow<=iHigh ){ + int iTest = (iLow+iHigh)>>1; + u32 iPg = aFrame[aIndex[iTest]]; + + if( iPg==pgno ){ + iRead = iFrame + 1 + aIndex[iTest]; + break; + } + else if( iPg<pgno ){ + iLow = iTest+1; + }else{ + iHigh = iTest-1; + } + } + } + assert( iRead==0 || aData[logSummaryEntry(iRead)]==pgno ); + + /* If iRead is non-zero, then it is the log frame number that contains the + ** required page. Read and return data from the log file. + */ + if( iRead ){ + i64 iOffset = logFrameOffset(iRead, pLog->hdr.pgsz) + LOG_FRAME_HDRSIZE; + *pInLog = 1; + return sqlite3OsRead(pLog->pFd, pOut, pLog->hdr.pgsz, iOffset); + } + + *pInLog = 0; + return SQLITE_OK; +} + + +/* +** Set *pPgno to the size of the database file (or zero, if unknown). +*/ +void sqlite3LogMaxpgno(Log *pLog, Pgno *pPgno){ + assert( pLog->isLocked ); + *pPgno = pLog->hdr.nPage; +} + +/* +** This function returns SQLITE_OK if the caller may write to the database. +** Otherwise, if the caller is operating on a snapshot that has already +** been overwritten by another writer, SQLITE_BUSY is returned. +*/ +int sqlite3LogWriteLock(Log *pLog, int op){ + assert( pLog->isLocked ); + if( op ){ + + /* Obtain the writer lock */ + int rc = logLockRegion(pLog, LOG_REGION_C|LOG_REGION_D, LOG_WRLOCK); + if( rc!=SQLITE_OK ){ + return rc; + } + + /* If this is connection is a region D reader, then the SHARED lock on + ** region D has just been upgraded to EXCLUSIVE. But no lock at all is + ** held on region A. This means that if the write-transaction is committed + ** and this connection downgrades to a reader, it will be left with no + ** lock at all. And so its snapshot could get clobbered by a checkpoint + ** operation. + ** + ** To stop this from happening, grab a SHARED lock on region A now. + ** This should always be successful, as the only time a client holds + ** an EXCLUSIVE lock on region A, it must also be holding an EXCLUSIVE + ** lock on region C (a checkpointer does this). This is not possible, + ** as this connection currently has the EXCLUSIVE lock on region C. + */ + if( pLog->isLocked==LOG_REGION_D ){ + logLockRegion(pLog, LOG_REGION_A, LOG_RDLOCK); + pLog->isLocked = LOG_REGION_A; + } + + /* If this connection is not reading the most recent database snapshot, + ** it is not possible to write to the database. In this case release + ** the write locks and return SQLITE_BUSY. + */ + if( memcmp(&pLog->hdr, pLog->pSummary->aData, sizeof(pLog->hdr)) ){ + logLockRegion(pLog, LOG_REGION_C|LOG_REGION_D, LOG_UNLOCK); + return SQLITE_BUSY; + } + pLog->isWriteLocked = 1; + + }else if( pLog->isWriteLocked ){ + logLockRegion(pLog, LOG_REGION_C|LOG_REGION_D, LOG_UNLOCK); + memcpy(&pLog->hdr, pLog->pSummary->aData, sizeof(pLog->hdr)); + pLog->isWriteLocked = 0; + } + return SQLITE_OK; +} + +/* +** Write a set of frames to the log. The caller must hold at least a +** RESERVED lock on the database file. +*/ +int sqlite3LogFrames( + Log *pLog, /* Log handle to write to */ + int nPgsz, /* Database page-size in bytes */ + PgHdr *pList, /* List of dirty pages to write */ + Pgno nTruncate, /* Database size after this commit */ + int isCommit, /* True if this is a commit */ + int isSync /* True to sync the log file */ +){ + int rc; /* Used to catch return codes */ + u32 iFrame; /* Next frame address */ + u8 aFrame[LOG_FRAME_HDRSIZE]; /* Buffer to assemble frame-header in */ + PgHdr *p; /* Iterator to run through pList with. */ + u32 aCksum[2]; /* Checksums */ + PgHdr *pLast; /* Last frame in list */ + int nLast = 0; /* Number of extra copies of last page */ + + assert( LOG_FRAME_HDRSIZE==(4 * 2 + LOG_CKSM_BYTES) ); + assert( pList ); + + /* If this is the first frame written into the log, write the log + ** header to the start of the log file. See comments at the top of + ** this file for a description of the log-header format. + */ + assert( LOG_FRAME_HDRSIZE>=LOG_HDRSIZE ); + iFrame = pLog->hdr.iLastPg; + if( iFrame==0 ){ + sqlite3Put4byte(aFrame, nPgsz); + sqlite3_randomness(8, &aFrame[4]); + pLog->hdr.iCheck1 = sqlite3Get4byte(&aFrame[4]); + pLog->hdr.iCheck2 = sqlite3Get4byte(&aFrame[8]); + rc = sqlite3OsWrite(pLog->pFd, aFrame, LOG_HDRSIZE, 0); + if( rc!=SQLITE_OK ){ + return rc; + } + } + + aCksum[0] = pLog->hdr.iCheck1; + aCksum[1] = pLog->hdr.iCheck2; + + /* Write the log file. */ + for(p=pList; p; p=p->pDirty){ + u32 nDbsize; /* Db-size field for frame header */ + i64 iOffset; /* Write offset in log file */ + + iOffset = logFrameOffset(++iFrame, nPgsz); + + /* Populate and write the frame header */ + nDbsize = (isCommit && p->pDirty==0) ? nTruncate : 0; + logEncodeFrame(aCksum, p->pgno, nDbsize, nPgsz, p->pData, aFrame); + rc = sqlite3OsWrite(pLog->pFd, aFrame, sizeof(aFrame), iOffset); + if( rc!=SQLITE_OK ){ + return rc; + } + + /* Write the page data */ + rc = sqlite3OsWrite(pLog->pFd, p->pData, nPgsz, iOffset + sizeof(aFrame)); + if( rc!=SQLITE_OK ){ + return rc; + } + pLast = p; + } + + /* Sync the log file if the 'isSync' flag was specified. */ + if( isSync ){ + i64 iSegment = sqlite3OsSectorSize(pLog->pFd); + i64 iOffset = logFrameOffset(iFrame+1, nPgsz); + + assert( isCommit ); + + if( iSegment<SQLITE_DEFAULT_SECTOR_SIZE ){ + iSegment = SQLITE_DEFAULT_SECTOR_SIZE; + } + iSegment = (((iOffset+iSegment-1)/iSegment) * iSegment); + while( iOffset<iSegment ){ + logEncodeFrame(aCksum,pLast->pgno,nTruncate,nPgsz,pLast->pData,aFrame); + rc = sqlite3OsWrite(pLog->pFd, aFrame, sizeof(aFrame), iOffset); + if( rc!=SQLITE_OK ){ + return rc; + } + + iOffset += LOG_FRAME_HDRSIZE; + rc = sqlite3OsWrite(pLog->pFd, pLast->pData, nPgsz, iOffset); + if( rc!=SQLITE_OK ){ + return rc; + } + nLast++; + iOffset += nPgsz; + } + + rc = sqlite3OsSync(pLog->pFd, pLog->sync_flags); + if( rc!=SQLITE_OK ){ + return rc; + } + } + + /* Append data to the log summary. It is not necessary to lock the + ** log-summary to do this as the RESERVED lock held on the db file + ** guarantees that there are no other writers, and no data that may + ** be in use by existing readers is being overwritten. + */ + iFrame = pLog->hdr.iLastPg; + for(p=pList; p; p=p->pDirty){ + iFrame++; + logSummaryAppend(pLog->pSummary, iFrame, p->pgno); + } + while( nLast>0 ){ + iFrame++; + nLast--; + logSummaryAppend(pLog->pSummary, iFrame, pLast->pgno); + } + + /* Update the private copy of the header. */ + pLog->hdr.pgsz = nPgsz; + pLog->hdr.iLastPg = iFrame; + if( isCommit ){ + pLog->hdr.iChange++; + pLog->hdr.nPage = nTruncate; + } + pLog->hdr.iCheck1 = aCksum[0]; + pLog->hdr.iCheck2 = aCksum[1]; + + /* If this is a commit, update the log-summary header too. */ + if( isCommit && SQLITE_OK==(rc = logEnterMutex(pLog)) ){ + logSummaryWriteHdr(pLog->pSummary, &pLog->hdr); + logLeaveMutex(pLog); + } + + return SQLITE_OK; +} + +/* +** Checkpoint the database: +** +** 1. Wait for an EXCLUSIVE lock on regions B and C. +** 2. Wait for an EXCLUSIVE lock on region A. +** 3. Copy the contents of the log into the database file. +** 4. Zero the log-summary header (so new readers will ignore the log). +** 5. Drop the locks obtained in steps 1 and 2. +*/ +int sqlite3LogCheckpoint( + Log *pLog, /* Log connection */ + sqlite3_file *pFd, /* File descriptor open on db file */ + u8 *zBuf, /* Temporary buffer to use */ + int (*xBusyHandler)(void *), /* Pointer to busy-handler function */ + void *pBusyHandlerArg /* Argument to pass to xBusyHandler */ +){ + int rc; /* Return code */ + + assert( !pLog->isLocked ); + + /* Wait for an EXCLUSIVE lock on regions B and C. */ + do { + rc = logLockRegion(pLog, LOG_REGION_B|LOG_REGION_C, LOG_WRLOCK); + }while( rc==SQLITE_BUSY && xBusyHandler(pBusyHandlerArg) ); + if( rc!=SQLITE_OK ) return rc; + + /* Wait for an EXCLUSIVE lock on region A. */ + do { + rc = logLockRegion(pLog, LOG_REGION_A, LOG_WRLOCK); + }while( rc==SQLITE_BUSY && xBusyHandler(pBusyHandlerArg) ); + if( rc!=SQLITE_OK ){ + logLockRegion(pLog, LOG_REGION_B|LOG_REGION_C, LOG_UNLOCK); + return rc; + } + + /* Copy data from the log to the database file. */ + rc = logSummaryReadHdr(pLog, 0); + if( rc==SQLITE_OK ){ + rc = logCheckpoint(pLog, pFd, zBuf); + } + + /* Release the locks. */ + logLockRegion(pLog, LOG_REGION_A|LOG_REGION_B|LOG_REGION_C, LOG_UNLOCK); + return rc; +} + diff --git a/src/log.h b/src/log.h new file mode 100644 index 000000000..816f9354e --- /dev/null +++ b/src/log.h @@ -0,0 +1,63 @@ +/* +** 2010 February 1 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This header file defines the interface to the write-ahead logging +** system. Refer to the comments below and the header comment attached to +** the implementation of each function in log.c for further details. +*/ + +#ifndef _LOG_H_ +#define _LOG_H_ + +#include "sqliteInt.h" + +/* Flags that may be set in the 'flags' argument to sqlite3LogWrite(): */ +#define LOG_MASK_COMMIT 0x08 +#define LOG_MASK_MASTERJOURNAL 0x10 +#define LOG_MASK_TRUNCATE 0x20 + + +#define LOG_TRUNCATE_BIT 0x80000000 + +/* Connection to a log file. There is one object of this type for each pager. */ +typedef struct Log Log; + +/* Open and close a connection to a log file. */ +int sqlite3LogOpen(sqlite3_vfs*, const char *zDb, Log **ppLog); +int sqlite3LogClose(Log *pLog, sqlite3_file *pFd, u8 *zBuf); + +/* Configure the log connection. */ +void sqlite3LogSetSyncflags(Log *, int sync_flags); + +/* Used by readers to open (lock) and close (unlock) a database snapshot. */ +int sqlite3LogOpenSnapshot(Log *pLog, int *); +void sqlite3LogCloseSnapshot(Log *pLog); + +/* Read a page from the log, if it is present. */ +int sqlite3LogRead(Log *pLog, Pgno pgno, int *pInLog, u8 *pOut); +void sqlite3LogMaxpgno(Log *pLog, Pgno *pPgno); + +/* Obtain or release the WRITER lock. */ +int sqlite3LogWriteLock(Log *pLog, int op); + +/* Write a segment to the log. */ +int sqlite3LogFrames(Log *pLog, int, PgHdr *, Pgno, int, int); + +/* Copy pages from the log to the database file */ +int sqlite3LogCheckpoint( + Log *pLog, /* Log connection */ + sqlite3_file *pFd, /* File descriptor open on db file */ + u8 *zBuf, /* Temporary buffer to use */ + int (*xBusyHandler)(void *), /* Pointer to busy-handler function */ + void *pBusyHandlerArg /* Argument to pass to xBusyHandler */ +); + +#endif /* _LOG_H_ */ diff --git a/src/os_unix.c b/src/os_unix.c index 769e75df3..80ce9e0b0 100644 --- a/src/os_unix.c +++ b/src/os_unix.c @@ -1536,9 +1536,11 @@ static int _posixUnlock(sqlite3_file *id, int locktype, int handleNFSUnlock){ ** the file has changed and hence might not know to flush their ** cache. The use of a stale cache can lead to database corruption. */ +#if 0 assert( pFile->inNormalWrite==0 || pFile->dbUpdate==0 || pFile->transCntrChng==1 ); +#endif pFile->inNormalWrite = 0; #endif @@ -2956,10 +2958,12 @@ static int unixRead( /* If this is a database file (not a journal, master-journal or temp ** file), the bytes in the locking range should never be read or written. */ +#if 0 assert( pFile->pUnused==0 || offset>=PENDING_BYTE+512 || offset+amt<=PENDING_BYTE ); +#endif got = seekAndRead(pFile, offset, pBuf, amt); if( got==amt ){ @@ -3031,10 +3035,12 @@ static int unixWrite( /* If this is a database file (not a journal, master-journal or temp ** file), the bytes in the locking range should never be read or written. */ +#if 0 assert( pFile->pUnused==0 || offset>=PENDING_BYTE+512 || offset+amt<=PENDING_BYTE ); +#endif #ifndef NDEBUG /* If we are doing a normal write to a database file (as opposed to diff --git a/src/pager.c b/src/pager.c index d5c236e24..68d561400 100644 --- a/src/pager.c +++ b/src/pager.c @@ -20,6 +20,7 @@ */ #ifndef SQLITE_OMIT_DISKIO #include "sqliteInt.h" +#include "log.h" /* ******************** NOTES ON THE DESIGN OF THE PAGER ************************ @@ -397,6 +398,7 @@ struct Pager { char *pTmpSpace; /* Pager.pageSize bytes of space for tmp use */ PCache *pPCache; /* Pointer to page cache object */ sqlite3_backup *pBackup; /* Pointer to list of ongoing backup processes */ + Log *pLog; /* Log used by "journal_mode=wal" */ }; /* @@ -489,6 +491,7 @@ static int assert_pager_state(Pager *pPager){ } #endif + /* ** Return true if it is necessary to write page *pPg into the sub-journal. ** A page needs to be written into the sub-journal if there exists one @@ -1186,6 +1189,14 @@ static int addToSavepointBitvecs(Pager *pPager, Pgno pgno){ } /* +** Return true if this pager uses a write-ahead log instead of the usual +** rollback journal. Otherwise false. +*/ +static int pagerUseLog(Pager *pPager){ + return (pPager->pLog!=0); +} + +/* ** Unlock the database file. This function is a no-op if the pager ** is in exclusive mode. ** @@ -1197,7 +1208,7 @@ static int addToSavepointBitvecs(Pager *pPager, Pgno pgno){ */ static void pager_unlock(Pager *pPager){ if( !pPager->exclusiveMode ){ - int rc; /* Return code */ + int rc = SQLITE_OK; /* Return code */ /* Always close the journal file when dropping the database lock. ** Otherwise, another connection with journal_mode=delete might @@ -1216,7 +1227,11 @@ static void pager_unlock(Pager *pPager){ */ pPager->dbSizeValid = 0; - rc = osUnlock(pPager->fd, NO_LOCK); + if( pagerUseLog(pPager) ){ + sqlite3LogCloseSnapshot(pPager->pLog); + }else{ + rc = osUnlock(pPager->fd, NO_LOCK); + } if( rc ){ pPager->errCode = rc; } @@ -1365,6 +1380,7 @@ static int pager_end_transaction(Pager *pPager, int hasMaster){ assert( isOpen(pPager->jfd) || pPager->pInJournal==0 ); if( isOpen(pPager->jfd) ){ + assert( !pagerUseLog(pPager) ); /* Finalize the journal file. */ if( sqlite3IsMemJournal(pPager->jfd) ){ @@ -1408,7 +1424,10 @@ static int pager_end_transaction(Pager *pPager, int hasMaster){ pPager->nRec = 0; sqlite3PcacheCleanAll(pPager->pPCache); - if( !pPager->exclusiveMode ){ + if( pagerUseLog(pPager) ){ + rc2 = sqlite3LogWriteLock(pPager->pLog, 0); + pPager->state = PAGER_SHARED; + }else if( !pPager->exclusiveMode ){ rc2 = osUnlock(pPager->fd, SHARED_LOCK); pPager->state = PAGER_SHARED; pPager->changeCountDone = 0; @@ -2120,6 +2139,9 @@ end_playback: if( rc==SQLITE_OK && pPager->noSync==0 && pPager->state>=PAGER_EXCLUSIVE ){ rc = sqlite3OsSync(pPager->fd, pPager->sync_flags); } + if( rc==SQLITE_OK && pPager->noSync==0 && pPager->state>=PAGER_EXCLUSIVE ){ + rc = sqlite3OsSync(pPager->fd, pPager->sync_flags); + } if( rc==SQLITE_OK ){ rc = pager_end_transaction(pPager, zMaster[0]!='\0'); testcase( rc!=SQLITE_OK ); @@ -2140,6 +2162,97 @@ end_playback: return rc; } + +/* +** Read the content for page pPg out of the database file and into +** pPg->pData. A shared lock or greater must be held on the database +** file before this function is called. +** +** If page 1 is read, then the value of Pager.dbFileVers[] is set to +** the value read from the database file. +** +** If an IO error occurs, then the IO error is returned to the caller. +** Otherwise, SQLITE_OK is returned. +*/ +static int readDbPage(PgHdr *pPg){ + Pager *pPager = pPg->pPager; /* Pager object associated with page pPg */ + Pgno pgno = pPg->pgno; /* Page number to read */ + int rc = SQLITE_OK; /* Return code */ + i64 iOffset; /* Byte offset of file to read from */ + int isInLog = 0; /* True if page is in log file */ + + assert( pPager->state>=PAGER_SHARED && !MEMDB ); + assert( isOpen(pPager->fd) ); + + if( NEVER(!isOpen(pPager->fd)) ){ + assert( pPager->tempFile ); + memset(pPg->pData, 0, pPager->pageSize); + return SQLITE_OK; + } + + if( pagerUseLog(pPager) ){ + /* Try to pull the page from the write-ahead log. */ + rc = sqlite3LogRead(pPager->pLog, pgno, &isInLog, pPg->pData); + } + if( rc==SQLITE_OK && !isInLog ){ + iOffset = (pgno-1)*(i64)pPager->pageSize; + rc = sqlite3OsRead(pPager->fd, pPg->pData, pPager->pageSize, iOffset); + if( rc==SQLITE_IOERR_SHORT_READ ){ + rc = SQLITE_OK; + } + } + + if( pgno==1 ){ + if( rc ){ + /* If the read is unsuccessful, set the dbFileVers[] to something + ** that will never be a valid file version. dbFileVers[] is a copy + ** of bytes 24..39 of the database. Bytes 28..31 should always be + ** zero. Bytes 32..35 and 35..39 should be page numbers which are + ** never 0xffffffff. So filling pPager->dbFileVers[] with all 0xff + ** bytes should suffice. + ** + ** For an encrypted database, the situation is more complex: bytes + ** 24..39 of the database are white noise. But the probability of + ** white noising equaling 16 bytes of 0xff is vanishingly small so + ** we should still be ok. + */ + memset(pPager->dbFileVers, 0xff, sizeof(pPager->dbFileVers)); + }else{ + u8 *dbFileVers = &((u8*)pPg->pData)[24]; + memcpy(&pPager->dbFileVers, dbFileVers, sizeof(pPager->dbFileVers)); + } + } + CODEC1(pPager, pPg->pData, pgno, 3, rc = SQLITE_NOMEM); + + PAGER_INCR(sqlite3_pager_readdb_count); + PAGER_INCR(pPager->nRead); + IOTRACE(("PGIN %p %d\n", pPager, pgno)); + PAGERTRACE(("FETCH %d page %d hash(%08x)\n", + PAGERID(pPager), pgno, pager_pagehash(pPg))); + + return rc; +} + +static int pagerRollbackLog(Pager *pPager){ + int rc = SQLITE_OK; + PgHdr *pList = sqlite3PcacheDirtyList(pPager->pPCache); + pPager->dbSize = pPager->dbOrigSize; + while( pList && rc==SQLITE_OK ){ + PgHdr *pNext = pList->pDirty; + if( sqlite3PcachePageRefcount(pList)==0 ){ + sqlite3PagerLookup(pPager, pList->pgno); + sqlite3PcacheDrop(pList); + }else{ + rc = readDbPage(pList); + if( rc==SQLITE_OK ){ + pPager->xReiniter(pList); + } + } + pList = pNext; + } + return rc; +} + /* ** Playback savepoint pSavepoint. Or, if pSavepoint==NULL, then playback ** the entire master journal file. The case pSavepoint==NULL occurs when @@ -2197,12 +2310,17 @@ static int pagerPlaybackSavepoint(Pager *pPager, PagerSavepoint *pSavepoint){ */ pPager->dbSize = pSavepoint ? pSavepoint->nOrig : pPager->dbOrigSize; + if( !pSavepoint && pagerUseLog(pPager) ){ + return pagerRollbackLog(pPager); + } + /* Use pPager->journalOff as the effective size of the main rollback ** journal. The actual file might be larger than this in ** PAGER_JOURNALMODE_TRUNCATE or PAGER_JOURNALMODE_PERSIST. But anything ** past pPager->journalOff is off-limits to us. */ szJ = pPager->journalOff; + assert( pagerUseLog(pPager)==0 || szJ==0 ); /* Begin by rolling back records from the main journal starting at ** PagerSavepoint.iOffset and continuing to the next journal header. @@ -2211,7 +2329,7 @@ static int pagerPlaybackSavepoint(Pager *pPager, PagerSavepoint *pSavepoint){ ** will be skipped automatically. Pages are added to pDone as they ** are played back. */ - if( pSavepoint ){ + if( pSavepoint && !pagerUseLog(pPager) ){ iHdrOff = pSavepoint->iHdrOffset ? pSavepoint->iHdrOffset : szJ; pPager->journalOff = pSavepoint->iOffset; while( rc==SQLITE_OK && pPager->journalOff<iHdrOff ){ @@ -2558,7 +2676,7 @@ int sqlite3PagerReadFileheader(Pager *pPager, int N, unsigned char *pDest){ ** and *pnPage is set to the number of pages in the database. */ int sqlite3PagerPagecount(Pager *pPager, int *pnPage){ - Pgno nPage; /* Value to return via *pnPage */ + Pgno nPage = 0; /* Value to return via *pnPage */ /* Determine the number of pages in the file. Store this in nPage. */ if( pPager->dbSizeValid ){ @@ -2567,15 +2685,23 @@ int sqlite3PagerPagecount(Pager *pPager, int *pnPage){ int rc; /* Error returned by OsFileSize() */ i64 n = 0; /* File size in bytes returned by OsFileSize() */ - assert( isOpen(pPager->fd) || pPager->tempFile ); - if( isOpen(pPager->fd) && (0 != (rc = sqlite3OsFileSize(pPager->fd, &n))) ){ - pager_error(pPager, rc); - return rc; + if( pagerUseLog(pPager) ){ + sqlite3LogMaxpgno(pPager->pLog, &nPage); } - if( n>0 && n<pPager->pageSize ){ - nPage = 1; - }else{ - nPage = (Pgno)(n / pPager->pageSize); + + if( nPage==0 ){ + assert( isOpen(pPager->fd) || pPager->tempFile ); + if( isOpen(pPager->fd) ){ + if( SQLITE_OK!=(rc = sqlite3OsFileSize(pPager->fd, &n)) ){ + pager_error(pPager, rc); + return rc; + } + } + if( n>0 && n<pPager->pageSize ){ + nPage = 1; + }else{ + nPage = (Pgno)(n / pPager->pageSize); + } } if( pPager->state!=PAGER_UNLOCK ){ pPager->dbSize = nPage; @@ -2698,6 +2824,7 @@ void sqlite3PagerTruncateImage(Pager *pPager, Pgno nPage){ assertTruncateConstraint(pPager); } + /* ** This function is called before attempting a hot-journal rollback. It ** syncs the journal file to disk, then sets pPager->journalHdr to the @@ -2738,10 +2865,14 @@ static int pagerSyncHotJournal(Pager *pPager){ ** to the caller. */ int sqlite3PagerClose(Pager *pPager){ + u8 *pTmp = (u8 *)pPager->pTmpSpace; + disable_simulated_io_errors(); sqlite3BeginBenignMalloc(); pPager->errCode = 0; pPager->exclusiveMode = 0; + sqlite3LogClose(pPager->pLog, pPager->fd, pTmp); + pPager->pLog = 0; pager_reset(pPager); if( MEMDB ){ pager_unlock(pPager); @@ -2762,7 +2893,7 @@ int sqlite3PagerClose(Pager *pPager){ PAGERTRACE(("CLOSE %d\n", PAGERID(pPager))); IOTRACE(("CLOSE %p\n", pPager)) sqlite3OsClose(pPager->fd); - sqlite3PageFree(pPager->pTmpSpace); + sqlite3PageFree(pTmp); sqlite3PcacheClose(pPager->pPCache); #ifdef SQLITE_HAS_CODEC @@ -2978,6 +3109,7 @@ static int pager_write_pagelist(PgHdr *pList){ ** EXCLUSIVE, it means the database file has been changed and any rollback ** will require a journal playback. */ + assert( !pagerUseLog(pList->pPager) ); assert( pPager->state>=PAGER_RESERVED ); rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); @@ -3066,7 +3198,10 @@ static int subjournalPage(PgHdr *pPg){ CODEC2(pPager, pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2); PAGERTRACE(("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno)); - assert( pageInJournal(pPg) || pPg->pgno>pPager->dbOrigSize ); + assert( pagerUseLog(pPager) + || pageInJournal(pPg) + || pPg->pgno>pPager->dbOrigSize + ); rc = write32bits(pPager->sjfd, offset, pPg->pgno); if( rc==SQLITE_OK ){ rc = sqlite3OsWrite(pPager->sjfd, pData2, pPager->pageSize, offset+4); @@ -3107,74 +3242,79 @@ static int pagerStress(void *p, PgHdr *pPg){ assert( pPg->pPager==pPager ); assert( pPg->flags&PGHDR_DIRTY ); - /* The doNotSync flag is set by the sqlite3PagerWrite() function while it - ** is journalling a set of two or more database pages that are stored - ** on the same disk sector. Syncing the journal is not allowed while - ** this is happening as it is important that all members of such a - ** set of pages are synced to disk together. So, if the page this function - ** is trying to make clean will require a journal sync and the doNotSync - ** flag is set, return without doing anything. The pcache layer will - ** just have to go ahead and allocate a new page buffer instead of - ** reusing pPg. - ** - ** Similarly, if the pager has already entered the error state, do not - ** try to write the contents of pPg to disk. - */ - if( NEVER(pPager->errCode) - || (pPager->doNotSync && pPg->flags&PGHDR_NEED_SYNC) - ){ - return SQLITE_OK; - } - - /* Sync the journal file if required. */ - if( pPg->flags&PGHDR_NEED_SYNC ){ - rc = syncJournal(pPager); - if( rc==SQLITE_OK && pPager->fullSync && - !(pPager->journalMode==PAGER_JOURNALMODE_MEMORY) && - !(sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND) + pPg->pDirty = 0; + if( pagerUseLog(pPager) ){ + /* Write a single frame for this page to the log. */ + rc = sqlite3LogFrames(pPager->pLog, pPager->pageSize, pPg, 0, 0, 0); + }else{ + /* The doNotSync flag is set by the sqlite3PagerWrite() function while it + ** is journalling a set of two or more database pages that are stored + ** on the same disk sector. Syncing the journal is not allowed while + ** this is happening as it is important that all members of such a + ** set of pages are synced to disk together. So, if the page this function + ** is trying to make clean will require a journal sync and the doNotSync + ** flag is set, return without doing anything. The pcache layer will + ** just have to go ahead and allocate a new page buffer instead of + ** reusing pPg. + ** + ** Similarly, if the pager has already entered the error state, do not + ** try to write the contents of pPg to disk. + */ + if( NEVER(pPager->errCode) + || (pPager->doNotSync && pPg->flags&PGHDR_NEED_SYNC) ){ - pPager->nRec = 0; - rc = writeJournalHdr(pPager); + return SQLITE_OK; + } + + /* Sync the journal file if required. */ + if( pPg->flags&PGHDR_NEED_SYNC ){ + rc = syncJournal(pPager); + if( rc==SQLITE_OK && pPager->fullSync && + !(pPager->journalMode==PAGER_JOURNALMODE_MEMORY) && + !(sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND) + ){ + pPager->nRec = 0; + rc = writeJournalHdr(pPager); + } + } + + /* If the page number of this page is larger than the current size of + ** the database image, it may need to be written to the sub-journal. + ** This is because the call to pager_write_pagelist() below will not + ** actually write data to the file in this case. + ** + ** Consider the following sequence of events: + ** + ** BEGIN; + ** <journal page X> + ** <modify page X> + ** SAVEPOINT sp; + ** <shrink database file to Y pages> + ** pagerStress(page X) + ** ROLLBACK TO sp; + ** + ** If (X>Y), then when pagerStress is called page X will not be written + ** out to the database file, but will be dropped from the cache. Then, + ** following the "ROLLBACK TO sp" statement, reading page X will read + ** data from the database file. This will be the copy of page X as it + ** was when the transaction started, not as it was when "SAVEPOINT sp" + ** was executed. + ** + ** The solution is to write the current data for page X into the + ** sub-journal file now (if it is not already there), so that it will + ** be restored to its current value when the "ROLLBACK TO sp" is + ** executed. + */ + if( NEVER( + rc==SQLITE_OK && pPg->pgno>pPager->dbSize && subjRequiresPage(pPg) + ) ){ + rc = subjournalPage(pPg); + } + + /* Write the contents of the page out to the database file. */ + if( rc==SQLITE_OK ){ + rc = pager_write_pagelist(pPg); } - } - - /* If the page number of this page is larger than the current size of - ** the database image, it may need to be written to the sub-journal. - ** This is because the call to pager_write_pagelist() below will not - ** actually write data to the file in this case. - ** - ** Consider the following sequence of events: - ** - ** BEGIN; - ** <journal page X> - ** <modify page X> - ** SAVEPOINT sp; - ** <shrink database file to Y pages> - ** pagerStress(page X) - ** ROLLBACK TO sp; - ** - ** If (X>Y), then when pagerStress is called page X will not be written - ** out to the database file, but will be dropped from the cache. Then, - ** following the "ROLLBACK TO sp" statement, reading page X will read - ** data from the database file. This will be the copy of page X as it - ** was when the transaction started, not as it was when "SAVEPOINT sp" - ** was executed. - ** - ** The solution is to write the current data for page X into the - ** sub-journal file now (if it is not already there), so that it will - ** be restored to its current value when the "ROLLBACK TO sp" is - ** executed. - */ - if( NEVER( - rc==SQLITE_OK && pPg->pgno>pPager->dbSize && subjRequiresPage(pPg) - ) ){ - rc = subjournalPage(pPg); - } - - /* Write the contents of the page out to the database file. */ - if( rc==SQLITE_OK ){ - pPg->pDirty = 0; - rc = pager_write_pagelist(pPg); } /* Mark the page as clean. */ @@ -3583,66 +3723,54 @@ static int hasHotJournal(Pager *pPager, int *pExists){ } /* -** Read the content for page pPg out of the database file and into -** pPg->pData. A shared lock or greater must be held on the database -** file before this function is called. -** -** If page 1 is read, then the value of Pager.dbFileVers[] is set to -** the value read from the database file. -** -** If an IO error occurs, then the IO error is returned to the caller. -** Otherwise, SQLITE_OK is returned. +** Open a connection to the write-ahead log file for pager pPager. If +** the log connection is already open, this function is a no-op. */ -static int readDbPage(PgHdr *pPg){ - Pager *pPager = pPg->pPager; /* Pager object associated with page pPg */ - Pgno pgno = pPg->pgno; /* Page number to read */ - int rc; /* Return code */ - i64 iOffset; /* Byte offset of file to read from */ - - assert( pPager->state>=PAGER_SHARED && !MEMDB ); - assert( isOpen(pPager->fd) ); +static int pagerOpenLog(Pager *pPager){ + if( !pPager->pLog ){ + int rc; /* Return code */ + + /* Before opening the log file, obtain a SHARED lock on the database + ** file. This lock will not be released until after the log file + ** connection has been closed. The purpose of this lock is to stop + ** any other process from unlinking the log or log-summary files while + ** this connection still has them open. An EXCLUSIVE lock on the + ** database file is required to unlink either of those two files. + */ + assert( pPager->state==PAGER_UNLOCK ); + rc = pager_wait_on_lock(pPager, SHARED_LOCK); + if( rc!=SQLITE_OK ){ + assert( pPager->state==PAGER_UNLOCK ); + return pager_error(pPager, rc); + } + assert( pPager->state>=SHARED_LOCK ); - if( NEVER(!isOpen(pPager->fd)) ){ - assert( pPager->tempFile ); - memset(pPg->pData, 0, pPager->pageSize); - return SQLITE_OK; - } - iOffset = (pgno-1)*(i64)pPager->pageSize; - rc = sqlite3OsRead(pPager->fd, pPg->pData, pPager->pageSize, iOffset); - if( rc==SQLITE_IOERR_SHORT_READ ){ - rc = SQLITE_OK; - } - if( pgno==1 ){ - if( rc ){ - /* If the read is unsuccessful, set the dbFileVers[] to something - ** that will never be a valid file version. dbFileVers[] is a copy - ** of bytes 24..39 of the database. Bytes 28..31 should always be - ** zero. Bytes 32..35 and 35..39 should be page numbers which are - ** never 0xffffffff. So filling pPager->dbFileVers[] with all 0xff - ** bytes should suffice. - ** - ** For an encrypted database, the situation is more complex: bytes - ** 24..39 of the database are white noise. But the probability of - ** white noising equaling 16 bytes of 0xff is vanishingly small so - ** we should still be ok. - */ - memset(pPager->dbFileVers, 0xff, sizeof(pPager->dbFileVers)); - }else{ - u8 *dbFileVers = &((u8*)pPg->pData)[24]; - memcpy(&pPager->dbFileVers, dbFileVers, sizeof(pPager->dbFileVers)); + /* Open the connection to the log file. If this operation fails, + ** (e.g. due to malloc() failure), unlock the database file and + ** return an error code. + */ + rc = sqlite3LogOpen(pPager->pVfs, pPager->zFilename, &pPager->pLog); + if( rc!=SQLITE_OK ){ + osUnlock(pPager->fd, SQLITE_LOCK_NONE); + pPager->state = PAGER_UNLOCK; + return rc; } + }else{ + /* If the log file was already open, check that the pager is still holding + ** the required SHARED lock on the database file. + */ +#ifdef SQLITE_DEBUG + int locktype; + sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_LOCKSTATE, &locktype); + assert( locktype==SQLITE_LOCK_SHARED ); +#endif + pPager->state = PAGER_SHARED; } - CODEC1(pPager, pPg->pData, pgno, 3, rc = SQLITE_NOMEM); - - PAGER_INCR(sqlite3_pager_readdb_count); - PAGER_INCR(pPager->nRead); - IOTRACE(("PGIN %p %d\n", pPager, pgno)); - PAGERTRACE(("FETCH %d page %d hash(%08x)\n", - PAGERID(pPager), pgno, pager_pagehash(pPg))); - return rc; + return SQLITE_OK; } + /* ** This function is called to obtain a shared lock on the database file. ** It is illegal to call sqlite3PagerAcquire() until after this function @@ -3696,7 +3824,27 @@ int sqlite3PagerSharedLock(Pager *pPager){ pager_reset(pPager); } - if( pPager->state==PAGER_UNLOCK || isErrorReset ){ + + if( pPager->journalMode==PAGER_JOURNALMODE_WAL ){ + int changed = 0; /* True if the cache must be flushed */ + + /* Open the log file, if it is not already open. */ + rc = pagerOpenLog(pPager); + if( rc!=SQLITE_OK ){ + return rc; + } + + /* Open a log snapshot to read from. */ + rc = sqlite3LogOpenSnapshot(pPager->pLog, &changed); + if( rc==SQLITE_OK ){ + int dummy; + if( changed ){ + pager_reset(pPager); + assert( pPager->errCode || pPager->dbSizeValid==0 ); + } + rc = sqlite3PagerPagecount(pPager, &dummy); + } + }else if( pPager->state==PAGER_UNLOCK || isErrorReset ){ sqlite3_vfs * const pVfs = pPager->pVfs; int isHotJournal = 0; assert( !MEMDB ); @@ -3785,7 +3933,7 @@ int sqlite3PagerSharedLock(Pager *pPager){ pPager->journalOff = 0; pPager->setMaster = 0; pPager->journalHdr = 0; - + /* Make sure the journal file has been synced to disk. */ /* Playback and delete the journal. Drop the database write @@ -3992,8 +4140,8 @@ int sqlite3PagerAcquire( if( MEMDB || nMax<(int)pgno || noContent || !isOpen(pPager->fd) ){ if( pgno>pPager->mxPgno ){ - rc = SQLITE_FULL; - goto pager_acquire_err; + rc = SQLITE_FULL; + goto pager_acquire_err; } if( noContent ){ /* Failure to set the bits in the InJournal bit-vectors is benign. @@ -4088,7 +4236,7 @@ void sqlite3PagerUnref(DbPage *pPg){ */ static int openSubJournal(Pager *pPager){ int rc = SQLITE_OK; - if( isOpen(pPager->jfd) && !isOpen(pPager->sjfd) ){ + if( (pagerUseLog(pPager) || isOpen(pPager->jfd)) && !isOpen(pPager->sjfd) ){ if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY || pPager->subjInMemory ){ sqlite3MemJournalOpen(pPager->sjfd); }else{ @@ -4224,16 +4372,29 @@ int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){ assert( pPager->pInJournal==0 ); assert( !MEMDB && !pPager->tempFile ); - /* Obtain a RESERVED lock on the database file. If the exFlag parameter - ** is true, then immediately upgrade this to an EXCLUSIVE lock. The - ** busy-handler callback can be used when upgrading to the EXCLUSIVE - ** lock, but not when obtaining the RESERVED lock. - */ - rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK); - if( rc==SQLITE_OK ){ - pPager->state = PAGER_RESERVED; - if( exFlag ){ - rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); + if( pagerUseLog(pPager) ){ + /* Grab the write lock on the log file. If successful, upgrade to + ** PAGER_EXCLUSIVE state. Otherwise, return an error code to the caller. + ** The busy-handler is not invoked if another connection already + ** holds the write-lock. If possible, the upper layer will call it. + */ + rc = sqlite3LogWriteLock(pPager->pLog, 1); + if( rc==SQLITE_OK ){ + pPager->dbOrigSize = pPager->dbSize; + pPager->state = PAGER_RESERVED; + } + }else{ + /* Obtain a RESERVED lock on the database file. If the exFlag parameter + ** is true, then immediately upgrade this to an EXCLUSIVE lock. The + ** busy-handler callback can be used when upgrading to the EXCLUSIVE + ** lock, but not when obtaining the RESERVED lock. + */ + rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK); + if( rc==SQLITE_OK ){ + pPager->state = PAGER_RESERVED; + if( exFlag ){ + rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); + } } } @@ -4249,6 +4410,7 @@ int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){ ** kept open and either was truncated to 0 bytes or its header was ** overwritten with zeros. */ + assert( pagerUseLog(pPager)==0 ); assert( pPager->nRec==0 ); assert( pPager->dbOrigSize==0 ); assert( pPager->pInJournal==0 ); @@ -4303,6 +4465,7 @@ static int pager_write(PgHdr *pPg){ */ sqlite3PcacheMakeDirty(pPg); if( pageInJournal(pPg) && !subjRequiresPage(pPg) ){ + assert( !pagerUseLog(pPager) ); pPager->dbModified = 1; }else{ @@ -4318,7 +4481,10 @@ static int pager_write(PgHdr *pPg){ if( rc!=SQLITE_OK ){ return rc; } - if( !isOpen(pPager->jfd) && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){ + if( !isOpen(pPager->jfd) + && pPager->journalMode!=PAGER_JOURNALMODE_OFF + && pPager->journalMode!=PAGER_JOURNALMODE_WAL + ){ assert( pPager->useJournal ); rc = pager_open_journal(pPager); if( rc!=SQLITE_OK ) return rc; @@ -4330,6 +4496,7 @@ static int pager_write(PgHdr *pPg){ ** the transaction journal if it is not there already. */ if( !pageInJournal(pPg) && isOpen(pPager->jfd) ){ + assert( !pagerUseLog(pPager) ); if( pPg->pgno<=pPager->dbOrigSize ){ u32 cksum; char *pData2; @@ -4710,129 +4877,138 @@ int sqlite3PagerCommitPhaseOne( */ sqlite3BackupRestart(pPager->pBackup); }else if( pPager->state!=PAGER_SYNCED && pPager->dbModified ){ - - /* The following block updates the change-counter. Exactly how it - ** does this depends on whether or not the atomic-update optimization - ** was enabled at compile time, and if this transaction meets the - ** runtime criteria to use the operation: - ** - ** * The file-system supports the atomic-write property for - ** blocks of size page-size, and - ** * This commit is not part of a multi-file transaction, and - ** * Exactly one page has been modified and store in the journal file. - ** - ** If the optimization was not enabled at compile time, then the - ** pager_incr_changecounter() function is called to update the change - ** counter in 'indirect-mode'. If the optimization is compiled in but - ** is not applicable to this transaction, call sqlite3JournalCreate() - ** to make sure the journal file has actually been created, then call - ** pager_incr_changecounter() to update the change-counter in indirect - ** mode. - ** - ** Otherwise, if the optimization is both enabled and applicable, - ** then call pager_incr_changecounter() to update the change-counter - ** in 'direct' mode. In this case the journal file will never be - ** created for this transaction. - */ -#ifdef SQLITE_ENABLE_ATOMIC_WRITE - PgHdr *pPg; - assert( isOpen(pPager->jfd) || pPager->journalMode==PAGER_JOURNALMODE_OFF ); - if( !zMaster && isOpen(pPager->jfd) - && pPager->journalOff==jrnlBufferSize(pPager) - && pPager->dbSize>=pPager->dbFileSize - && (0==(pPg = sqlite3PcacheDirtyList(pPager->pPCache)) || 0==pPg->pDirty) - ){ - /* Update the db file change counter via the direct-write method. The - ** following call will modify the in-memory representation of page 1 - ** to include the updated change counter and then write page 1 - ** directly to the database file. Because of the atomic-write - ** property of the host file-system, this is safe. - */ - rc = pager_incr_changecounter(pPager, 1); + if( pagerUseLog(pPager) ){ + PgHdr *pList = sqlite3PcacheDirtyList(pPager->pPCache); + if( pList ){ + rc = sqlite3LogFrames(pPager->pLog, pPager->pageSize, pList, + pPager->dbSize, 1, pPager->fullSync + ); + } + sqlite3PcacheCleanAll(pPager->pPCache); }else{ - rc = sqlite3JournalCreate(pPager->jfd); - if( rc==SQLITE_OK ){ - rc = pager_incr_changecounter(pPager, 0); + /* The following block updates the change-counter. Exactly how it + ** does this depends on whether or not the atomic-update optimization + ** was enabled at compile time, and if this transaction meets the + ** runtime criteria to use the operation: + ** + ** * The file-system supports the atomic-write property for + ** blocks of size page-size, and + ** * This commit is not part of a multi-file transaction, and + ** * Exactly one page has been modified and store in the journal file. + ** + ** If the optimization was not enabled at compile time, then the + ** pager_incr_changecounter() function is called to update the change + ** counter in 'indirect-mode'. If the optimization is compiled in but + ** is not applicable to this transaction, call sqlite3JournalCreate() + ** to make sure the journal file has actually been created, then call + ** pager_incr_changecounter() to update the change-counter in indirect + ** mode. + ** + ** Otherwise, if the optimization is both enabled and applicable, + ** then call pager_incr_changecounter() to update the change-counter + ** in 'direct' mode. In this case the journal file will never be + ** created for this transaction. + */ + #ifdef SQLITE_ENABLE_ATOMIC_WRITE + PgHdr *pPg; + assert( isOpen(pPager->jfd) || pPager->journalMode==PAGER_JOURNALMODE_OFF ); + if( !zMaster && isOpen(pPager->jfd) + && pPager->journalOff==jrnlBufferSize(pPager) + && pPager->dbSize>=pPager->dbFileSize + && (0==(pPg = sqlite3PcacheDirtyList(pPager->pPCache)) || 0==pPg->pDirty) + ){ + /* Update the db file change counter via the direct-write method. The + ** following call will modify the in-memory representation of page 1 + ** to include the updated change counter and then write page 1 + ** directly to the database file. Because of the atomic-write + ** property of the host file-system, this is safe. + */ + rc = pager_incr_changecounter(pPager, 1); + }else{ + rc = sqlite3JournalCreate(pPager->jfd); + if( rc==SQLITE_OK ){ + rc = pager_incr_changecounter(pPager, 0); + } } - } -#else - rc = pager_incr_changecounter(pPager, 0); -#endif - if( rc!=SQLITE_OK ) goto commit_phase_one_exit; - - /* If this transaction has made the database smaller, then all pages - ** being discarded by the truncation must be written to the journal - ** file. This can only happen in auto-vacuum mode. - ** - ** Before reading the pages with page numbers larger than the - ** current value of Pager.dbSize, set dbSize back to the value - ** that it took at the start of the transaction. Otherwise, the - ** calls to sqlite3PagerGet() return zeroed pages instead of - ** reading data from the database file. - ** - ** When journal_mode==OFF the dbOrigSize is always zero, so this - ** block never runs if journal_mode=OFF. - */ -#ifndef SQLITE_OMIT_AUTOVACUUM - if( pPager->dbSize<pPager->dbOrigSize - && ALWAYS(pPager->journalMode!=PAGER_JOURNALMODE_OFF) - ){ - Pgno i; /* Iterator variable */ - const Pgno iSkip = PAGER_MJ_PGNO(pPager); /* Pending lock page */ - const Pgno dbSize = pPager->dbSize; /* Database image size */ - pPager->dbSize = pPager->dbOrigSize; - for( i=dbSize+1; i<=pPager->dbOrigSize; i++ ){ - if( !sqlite3BitvecTest(pPager->pInJournal, i) && i!=iSkip ){ - PgHdr *pPage; /* Page to journal */ - rc = sqlite3PagerGet(pPager, i, &pPage); - if( rc!=SQLITE_OK ) goto commit_phase_one_exit; - rc = sqlite3PagerWrite(pPage); - sqlite3PagerUnref(pPage); - if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + #else + rc = pager_incr_changecounter(pPager, 0); + #endif + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + + /* If this transaction has made the database smaller, then all pages + ** being discarded by the truncation must be written to the journal + ** file. This can only happen in auto-vacuum mode. + ** + ** Before reading the pages with page numbers larger than the + ** current value of Pager.dbSize, set dbSize back to the value + ** that it took at the start of the transaction. Otherwise, the + ** calls to sqlite3PagerGet() return zeroed pages instead of + ** reading data from the database file. + ** + ** When journal_mode==OFF the dbOrigSize is always zero, so this + ** block never runs if journal_mode=OFF. + */ + #ifndef SQLITE_OMIT_AUTOVACUUM + if( pPager->dbSize<pPager->dbOrigSize + && ALWAYS(pPager->journalMode!=PAGER_JOURNALMODE_OFF) + ){ + Pgno i; /* Iterator variable */ + const Pgno iSkip = PAGER_MJ_PGNO(pPager); /* Pending lock page */ + const Pgno dbSize = pPager->dbSize; /* Database image size */ + pPager->dbSize = pPager->dbOrigSize; + for( i=dbSize+1; i<=pPager->dbOrigSize; i++ ){ + if( !sqlite3BitvecTest(pPager->pInJournal, i) && i!=iSkip ){ + PgHdr *pPage; /* Page to journal */ + rc = sqlite3PagerGet(pPager, i, &pPage); + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + rc = sqlite3PagerWrite(pPage); + sqlite3PagerUnref(pPage); + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + } } + pPager->dbSize = dbSize; } - pPager->dbSize = dbSize; - } -#endif - - /* Write the master journal name into the journal file. If a master - ** journal file name has already been written to the journal file, - ** or if zMaster is NULL (no master journal), then this call is a no-op. - */ - rc = writeMasterJournal(pPager, zMaster); - if( rc!=SQLITE_OK ) goto commit_phase_one_exit; - - /* Sync the journal file. If the atomic-update optimization is being - ** used, this call will not create the journal file or perform any - ** real IO. - */ - rc = syncJournal(pPager); - if( rc!=SQLITE_OK ) goto commit_phase_one_exit; - - /* Write all dirty pages to the database file. */ - rc = pager_write_pagelist(sqlite3PcacheDirtyList(pPager->pPCache)); - if( rc!=SQLITE_OK ){ - assert( rc!=SQLITE_IOERR_BLOCKED ); - goto commit_phase_one_exit; - } - sqlite3PcacheCleanAll(pPager->pPCache); - - /* If the file on disk is not the same size as the database image, - ** then use pager_truncate to grow or shrink the file here. - */ - if( pPager->dbSize!=pPager->dbFileSize ){ - Pgno nNew = pPager->dbSize - (pPager->dbSize==PAGER_MJ_PGNO(pPager)); - assert( pPager->state>=PAGER_EXCLUSIVE ); - rc = pager_truncate(pPager, nNew); + #endif + + /* Write the master journal name into the journal file. If a master + ** journal file name has already been written to the journal file, + ** or if zMaster is NULL (no master journal), then this call is a no-op. + */ + rc = writeMasterJournal(pPager, zMaster); if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + + /* Sync the journal file. If the atomic-update optimization is being + ** used, this call will not create the journal file or perform any + ** real IO. + */ + rc = syncJournal(pPager); + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + + /* Write all dirty pages to the database file. */ + rc = pager_write_pagelist(sqlite3PcacheDirtyList(pPager->pPCache)); + if( rc!=SQLITE_OK ){ + assert( rc!=SQLITE_IOERR_BLOCKED ); + goto commit_phase_one_exit; + } + sqlite3PcacheCleanAll(pPager->pPCache); + + /* If the file on disk is not the same size as the database image, + ** then use pager_truncate to grow or shrink the file here. + */ + if( pPager->dbSize!=pPager->dbFileSize ){ + Pgno nNew = pPager->dbSize - (pPager->dbSize==PAGER_MJ_PGNO(pPager)); + assert( pPager->state>=PAGER_EXCLUSIVE ); + rc = pager_truncate(pPager, nNew); + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + } + + /* Finally, sync the database file. */ + if( !pPager->noSync && !noSync ){ + rc = sqlite3OsSync(pPager->fd, pPager->sync_flags); + } + IOTRACE(("DBSYNC %p\n", pPager)) } - /* Finally, sync the database file. */ - if( !pPager->noSync && !noSync ){ - rc = sqlite3OsSync(pPager->fd, pPager->sync_flags); - } - IOTRACE(("DBSYNC %p\n", pPager)) - pPager->state = PAGER_SYNCED; } @@ -4940,7 +5116,12 @@ int sqlite3PagerCommitPhaseTwo(Pager *pPager){ int sqlite3PagerRollback(Pager *pPager){ int rc = SQLITE_OK; /* Return code */ PAGERTRACE(("ROLLBACK %d\n", PAGERID(pPager))); - if( !pPager->dbModified || !isOpen(pPager->jfd) ){ + if( pagerUseLog(pPager) ){ + int rc2; + rc = sqlite3PagerSavepoint(pPager, SAVEPOINT_ROLLBACK, -1); + rc2 = pager_end_transaction(pPager, pPager->setMaster); + if( rc==SQLITE_OK ) rc = rc2; + }else if( !pPager->dbModified || !isOpen(pPager->jfd) ){ rc = pager_end_transaction(pPager, pPager->setMaster); }else if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){ if( pPager->state>=PAGER_EXCLUSIVE ){ @@ -5158,7 +5339,7 @@ int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint){ ** not yet been opened. In this case there have been no changes to ** the database file, so the playback operation can be skipped. */ - else if( isOpen(pPager->jfd) ){ + else if( pagerUseLog(pPager) || isOpen(pPager->jfd) ){ PagerSavepoint *pSavepoint = (nNew==0)?0:&pPager->aSavepoint[nNew-1]; rc = pagerPlaybackSavepoint(pPager, pSavepoint); assert(rc!=SQLITE_DONE); @@ -5435,6 +5616,7 @@ int sqlite3PagerLockingMode(Pager *pPager, int eMode){ ** PAGER_JOURNALMODE_PERSIST ** PAGER_JOURNALMODE_OFF ** PAGER_JOURNALMODE_MEMORY +** PAGER_JOURNALMODE_WAL ** ** If the parameter is not _QUERY, then the journal_mode is set to the ** value specified if the change is allowed. The change is disallowed @@ -5453,11 +5635,12 @@ int sqlite3PagerJournalMode(Pager *pPager, int eMode){ || eMode==PAGER_JOURNALMODE_TRUNCATE || eMode==PAGER_JOURNALMODE_PERSIST || eMode==PAGER_JOURNALMODE_OFF + || eMode==PAGER_JOURNALMODE_WAL || eMode==PAGER_JOURNALMODE_MEMORY ); assert( PAGER_JOURNALMODE_QUERY<0 ); if( eMode>=0 - && (!MEMDB || eMode==PAGER_JOURNALMODE_MEMORY - || eMode==PAGER_JOURNALMODE_OFF) + && (pPager->tempFile==0 || eMode!=PAGER_JOURNALMODE_WAL) + && (!MEMDB || eMode==PAGER_JOURNALMODE_MEMORY||eMode==PAGER_JOURNALMODE_OFF) && !pPager->dbModified && (!isOpen(pPager->jfd) || 0==pPager->journalOff) ){ @@ -5473,6 +5656,14 @@ int sqlite3PagerJournalMode(Pager *pPager, int eMode){ && !pPager->exclusiveMode ){ sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0); } + + /* Switching into WAL mode can only take place when no + ** locks are held on the database file. + */ + if( eMode==PAGER_JOURNALMODE_WAL && pPager->state!=PAGER_UNLOCK ){ + return (int)pPager->journalMode; + } + pPager->journalMode = (u8)eMode; } return (int)pPager->journalMode; @@ -5501,4 +5692,18 @@ sqlite3_backup **sqlite3PagerBackupPtr(Pager *pPager){ return &pPager->pBackup; } +/* +** This function is called when the user invokes "PRAGMA checkpoint". +*/ +int sqlite3PagerCheckpoint(Pager *pPager){ + int rc = SQLITE_OK; + if( pPager->pLog ){ + u8 *zBuf = (u8 *)pPager->pTmpSpace; + rc = sqlite3LogCheckpoint(pPager->pLog, pPager->fd, + zBuf, pPager->xBusyHandler, pPager->pBusyHandlerArg + ); + } + return rc; +} + #endif /* SQLITE_OMIT_DISKIO */ diff --git a/src/pager.h b/src/pager.h index 7d778c82c..1e14d2ea6 100644 --- a/src/pager.h +++ b/src/pager.h @@ -76,6 +76,7 @@ typedef struct PgHdr DbPage; #define PAGER_JOURNALMODE_OFF 2 /* Journal omitted. */ #define PAGER_JOURNALMODE_TRUNCATE 3 /* Commit by truncating journal */ #define PAGER_JOURNALMODE_MEMORY 4 /* In-memory journal file */ +#define PAGER_JOURNALMODE_WAL 5 /* Use write-ahead logging */ /* ** The remainder of this file contains the declarations of the functions @@ -132,6 +133,7 @@ int sqlite3PagerRollback(Pager*); int sqlite3PagerOpenSavepoint(Pager *pPager, int n); int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint); int sqlite3PagerSharedLock(Pager *pPager); +int sqlite3PagerCheckpoint(Pager *pPager); /* Functions used to query pager state and configuration. */ u8 sqlite3PagerIsreadonly(Pager*); diff --git a/src/pragma.c b/src/pragma.c index f03078f24..137ff510d 100644 --- a/src/pragma.c +++ b/src/pragma.c @@ -515,7 +515,7 @@ void sqlite3Pragma( if( sqlite3StrICmp(zLeft,"journal_mode")==0 ){ int eMode; static char * const azModeName[] = { - "delete", "persist", "off", "truncate", "memory" + "delete", "persist", "off", "truncate", "memory", "wal" }; if( zRight==0 ){ @@ -561,6 +561,7 @@ void sqlite3Pragma( || eMode==PAGER_JOURNALMODE_TRUNCATE || eMode==PAGER_JOURNALMODE_PERSIST || eMode==PAGER_JOURNALMODE_OFF + || eMode==PAGER_JOURNALMODE_WAL || eMode==PAGER_JOURNALMODE_MEMORY ); sqlite3VdbeSetNumCols(v, 1); sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "journal_mode", SQLITE_STATIC); @@ -1383,6 +1384,11 @@ void sqlite3Pragma( }else #endif /* SQLITE_OMIT_COMPILEOPTION_DIAGS */ + if( sqlite3StrICmp(zLeft, "checkpoint")==0 ){ + sqlite3VdbeUsesBtree(v, iDb); + sqlite3VdbeAddOp3(v, OP_Checkpoint, iDb, 0, 0); + }else + #if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) /* ** Report the current state of file logs for all databases diff --git a/src/vdbe.c b/src/vdbe.c index c1b0eea31..42562cee0 100644 --- a/src/vdbe.c +++ b/src/vdbe.c @@ -5186,6 +5186,17 @@ case OP_AggFinal: { break; } +/* Opcode: Checkpoint P1 * * * * +*/ +case OP_Checkpoint: { + Btree *pBt; /* Btree to checkpoint */ + + assert( pOp->p1>=0 && pOp->p1<db->nDb ); + assert( (p->btreeMask & (1<<pOp->p1))!=0 ); + pBt = db->aDb[pOp->p1].pBt; + rc = sqlite3PagerCheckpoint(sqlite3BtreePager(pBt)); + break; +}; #if !defined(SQLITE_OMIT_VACUUM) && !defined(SQLITE_OMIT_ATTACH) /* Opcode: Vacuum * * * * * diff --git a/test/lock2.test b/test/lock2.test index a2b75ca31..63319535d 100644 --- a/test/lock2.test +++ b/test/lock2.test @@ -16,69 +16,8 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl +source $testdir/lock_common.tcl -# Launch another testfixture process to be controlled by this one. A -# channel name is returned that may be passed as the first argument to proc -# 'testfixture' to execute a command. The child testfixture process is shut -# down by closing the channel. -proc launch_testfixture {} { - set prg [info nameofexec] - if {$prg eq ""} { - set prg [file join . testfixture] - } - set chan [open "|$prg tf_main.tcl" r+] - fconfigure $chan -buffering line - return $chan -} - -# Execute a command in a child testfixture process, connected by two-way -# channel $chan. Return the result of the command, or an error message. -proc testfixture {chan cmd} { - puts $chan $cmd - puts $chan OVER - set r "" - while { 1 } { - set line [gets $chan] - if { $line == "OVER" } { - return $r - } - if {[eof $chan]} { - return "ERROR: Child process hung up" - } - append r $line - } -} - -# Write the main loop for the child testfixture processes into file -# tf_main.tcl. The parent (this script) interacts with the child processes -# via a two way pipe. The parent writes a script to the stdin of the child -# process, followed by the word "OVER" on a line of its own. The child -# process evaluates the script and writes the results to stdout, followed -# by an "OVER" of its own. -set f [open tf_main.tcl w] -puts $f { - set l [open log w] - set script "" - while {![eof stdin]} { - flush stdout - set line [gets stdin] - puts $l "READ $line" - if { $line == "OVER" } { - catch {eval $script} result - puts $result - puts $l "WRITE $result" - puts OVER - puts $l "WRITE OVER" - flush stdout - set script "" - } else { - append script $line - append script " ; " - } - } - close $l -} -close $f # Simple locking test case: # diff --git a/test/lock_common.tcl b/test/lock_common.tcl new file mode 100644 index 000000000..31c04e853 --- /dev/null +++ b/test/lock_common.tcl @@ -0,0 +1,77 @@ +# 2010 April 14 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file contains code used by several different test scripts. The +# code in this file allows testfixture to control another process (or +# processes) to test locking. +# + +# Launch another testfixture process to be controlled by this one. A +# channel name is returned that may be passed as the first argument to proc +# 'testfixture' to execute a command. The child testfixture process is shut +# down by closing the channel. +proc launch_testfixture {} { + set prg [info nameofexec] + if {$prg eq ""} { + set prg [file join . testfixture] + } + set chan [open "|$prg tf_main.tcl" r+] + fconfigure $chan -buffering line + return $chan +} + +# Execute a command in a child testfixture process, connected by two-way +# channel $chan. Return the result of the command, or an error message. +proc testfixture {chan cmd} { + puts $chan $cmd + puts $chan OVER + set r "" + while { 1 } { + set line [gets $chan] + if { $line == "OVER" } { + return $r + } + if {[eof $chan]} { + return "ERROR: Child process hung up" + } + append r $line + } +} + +# Write the main loop for the child testfixture processes into file +# tf_main.tcl. The parent (this script) interacts with the child processes +# via a two way pipe. The parent writes a script to the stdin of the child +# process, followed by the word "OVER" on a line of its own. The child +# process evaluates the script and writes the results to stdout, followed +# by an "OVER" of its own. +set f [open tf_main.tcl w] +puts $f { + set l [open log w] + set script "" + while {![eof stdin]} { + flush stdout + set line [gets stdin] + puts $l "READ $line" + if { $line == "OVER" } { + catch {eval $script} result + puts $result + puts $l "WRITE $result" + puts OVER + puts $l "WRITE OVER" + flush stdout + set script "" + } else { + append script $line + append script " ; " + } + } + close $l +} +close $f diff --git a/test/quick.test b/test/quick.test index 431b829ef..044951f70 100644 --- a/test/quick.test +++ b/test/quick.test @@ -101,6 +101,10 @@ set EXCLUDE { vtab_err.test veryquick.test mallocAll.test + + walslow.test + walcrash.test + walthread.test } if {[sqlite3 -has-codec]} { diff --git a/test/tester.tcl b/test/tester.tcl index 8fe877ec0..44798a709 100644 --- a/test/tester.tcl +++ b/test/tester.tcl @@ -143,6 +143,7 @@ proc reset_db {} { catch {db close} file delete -force test.db file delete -force test.db-journal + file delete -force test.db-wal sqlite3 db ./test.db set ::DB [sqlite3_connection_pointer db] if {[info exists ::SETUP_SQL]} { diff --git a/test/thread_common.tcl b/test/thread_common.tcl index bbd9389ea..673afdd80 100644 --- a/test/thread_common.tcl +++ b/test/thread_common.tcl @@ -80,7 +80,7 @@ set thread_procs { } proc thread_spawn {varname args} { - sqlthread spawn $varname [join $args ;] + sqlthread spawn $varname [join $args {;}] } # Return true if this build can run the multi-threaded tests. diff --git a/test/wal.test b/test/wal.test new file mode 100644 index 000000000..fb21d820f --- /dev/null +++ b/test/wal.test @@ -0,0 +1,700 @@ +# 2010 April 13 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing the operation of the library in +# "PRAGMA journal_mode=WAL" mode. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/lock_common.tcl + +proc reopen_db {} { + catch { db close } + file delete -force test.db test.db-wal + sqlite3_wal db test.db +} + +set ::blobcnt 0 +proc blob {nByte} { + incr ::blobcnt + return [string range [string repeat "${::blobcnt}x" $nByte] 1 $nByte] +} + +proc sqlite3_wal {args} { + eval sqlite3 $args + [lindex $args 0] eval { PRAGMA journal_mode = wal } + [lindex $args 0] eval { PRAGMA synchronous = normal } + [lindex $args 0] function blob blob +} + +proc log_file_size {nFrame pgsz} { + expr {12 + ($pgsz+16)*$nFrame} +} + +proc log_deleted {logfile} { + return [expr [file exists $logfile]==0] +} + +# +# These are 'warm-body' tests used while developing the WAL code. They +# serve to prove that a few really simple cases work: +# +# wal-1.*: Read and write the database. +# wal-2.*: Test MVCC with one reader, one writer. +# wal-3.*: Test transaction rollback. +# wal-4.*: Test savepoint/statement rollback. +# wal-5.*: Test the temp database. +# wal-6.*: Test creating databases with different page sizes. +# + +do_test wal-0.1 { + execsql { PRAGMA synchronous = normal } + execsql { PRAGMA journal_mode = wal } +} {wal} + +do_test wal-1.0 { + execsql { + BEGIN; + CREATE TABLE t1(a, b); + } + list [file exists test.db-journal] [file exists test.db-wal] +} {0 1} +do_test wal-1.1 { + execsql COMMIT + list [file exists test.db-journal] [file exists test.db-wal] +} {0 1} +do_test wal-1.2 { + # There are now two pages in the log. + file size test.db-wal +} [log_file_size 2 1024] + +do_test wal-1.3 { + execsql { SELECT * FROM sqlite_master } +} {table t1 t1 2 {CREATE TABLE t1(a, b)}} + +do_test wal-1.4 { + execsql { INSERT INTO t1 VALUES(1, 2) } + execsql { INSERT INTO t1 VALUES(3, 4) } + execsql { INSERT INTO t1 VALUES(5, 6) } + execsql { INSERT INTO t1 VALUES(7, 8) } + execsql { INSERT INTO t1 VALUES(9, 10) } +} {} + +do_test wal-1.5 { + execsql { SELECT * FROM t1 } +} {1 2 3 4 5 6 7 8 9 10} + +do_test wal-2.1 { + sqlite3_wal db2 ./test.db + execsql { BEGIN; SELECT * FROM t1 } db2 +} {1 2 3 4 5 6 7 8 9 10} + +do_test wal-2.2 { + execsql { INSERT INTO t1 VALUES(11, 12) } + execsql { SELECT * FROM t1 } +} {1 2 3 4 5 6 7 8 9 10 11 12} + +do_test wal-2.3 { + execsql { SELECT * FROM t1 } db2 +} {1 2 3 4 5 6 7 8 9 10} + +do_test wal-2.4 { + execsql { INSERT INTO t1 VALUES(13, 14) } + execsql { SELECT * FROM t1 } +} {1 2 3 4 5 6 7 8 9 10 11 12 13 14} + +do_test wal-2.5 { + execsql { SELECT * FROM t1 } db2 +} {1 2 3 4 5 6 7 8 9 10} + +do_test wal-2.6 { + execsql { COMMIT; SELECT * FROM t1 } db2 +} {1 2 3 4 5 6 7 8 9 10 11 12 13 14} + +do_test wal-3.1 { + execsql { BEGIN; DELETE FROM t1 } + execsql { SELECT * FROM t1 } +} {} +do_test wal-3.2 { + execsql { SELECT * FROM t1 } db2 +} {1 2 3 4 5 6 7 8 9 10 11 12 13 14} +do_test wal-3.3 { + execsql { ROLLBACK } + execsql { SELECT * FROM t1 } +} {1 2 3 4 5 6 7 8 9 10 11 12 13 14} +db2 close + +do_test wal-4.1 { + execsql { + DELETE FROM t1; + BEGIN; + INSERT INTO t1 VALUES('a', 'b'); + SAVEPOINT sp; + INSERT INTO t1 VALUES('c', 'd'); + SELECT * FROM t1; + } +} {a b c d} +do_test wal-4.2 { + execsql { + ROLLBACK TO sp; + SELECT * FROM t1; + } +} {a b} +do_test wal-4.3 { + execsql { + COMMIT; + SELECT * FROM t1; + } +} {a b} + +do_test wal-5.1 { + execsql { + CREATE TEMP TABLE t2(a, b); + INSERT INTO t2 VALUES(1, 2); + } +} {} +do_test wal-5.2 { + execsql { + BEGIN; + INSERT INTO t2 VALUES(3, 4); + SELECT * FROM t2; + } +} {1 2 3 4} +do_test wal-5.3 { + execsql { + ROLLBACK; + SELECT * FROM t2; + } +} {1 2} +do_test wal-5.4 { + execsql { + CREATE TEMP TABLE t3(x UNIQUE); + BEGIN; + INSERT INTO t2 VALUES(3, 4); + INSERT INTO t3 VALUES('abc'); + } + catchsql { INSERT INTO t3 VALUES('abc') } +} {1 {column x is not unique}} +do_test wal-5.5 { + execsql { + COMMIT; + SELECT * FROM t2; + } +} {1 2 3 4} +db close + +foreach sector {512 4096} { + sqlite3_simulate_device -sectorsize $sector + foreach pgsz {512 1024 2048 4096} { + file delete -force test.db test.db-wal + do_test wal-6.$sector.$pgsz.1 { + sqlite3_wal db test.db -vfs devsym + execsql " + PRAGMA page_size = $pgsz ; + " + execsql " + CREATE TABLE t1(a, b); + INSERT INTO t1 VALUES(1, 2); + " + db close + file size test.db + } [expr $pgsz*2] + + do_test wal-6.$sector.$pgsz.2 { + log_deleted test.db-wal + } {1} + } +} + +do_test wal-7.1 { + file delete -force test.db test.db-wal + sqlite3_wal db test.db + execsql { + PRAGMA page_size = 1024; + CREATE TABLE t1(a, b); + INSERT INTO t1 VALUES(1, 2); + } + list [file size test.db] [file size test.db-wal] +} [list 0 [log_file_size 3 1024]] +do_test wal-7.2 { + execsql { PRAGMA checkpoint } + list [file size test.db] [file size test.db-wal] +} [list 2048 [log_file_size 3 1024]] + +# Execute some transactions in auto-vacuum mode to test database file +# truncation. +# +do_test wal-8.1 { + reopen_db + execsql { + PRAGMA auto_vacuum = 1; + PRAGMA auto_vacuum; + } +} {1} +do_test wal-8.2 { + execsql { + PRAGMA page_size = 1024; + CREATE TABLE t1(x); + INSERT INTO t1 VALUES(blob(900)); + INSERT INTO t1 VALUES(blob(900)); + INSERT INTO t1 SELECT blob(900) FROM t1; /* 4 */ + INSERT INTO t1 SELECT blob(900) FROM t1; /* 8 */ + INSERT INTO t1 SELECT blob(900) FROM t1; /* 16 */ + INSERT INTO t1 SELECT blob(900) FROM t1; /* 32 */ + INSERT INTO t1 SELECT blob(900) FROM t1; /* 64 */ + PRAGMA checkpoint; + } + file size test.db +} [expr 68*1024] +do_test wal-8.3 { + execsql { + DELETE FROM t1 WHERE rowid<54; + PRAGMA checkpoint; + } + file size test.db +} [expr 14*1024] + +# Run some "warm-body" tests to ensure that log-summary files with more +# than 256 entries (log summaries that contain index blocks) work Ok. +# +do_test wal-9.1 { + reopen_db + execsql { + PRAGMA page_size = 1024; + CREATE TABLE t1(x PRIMARY KEY); + INSERT INTO t1 VALUES(blob(900)); + INSERT INTO t1 VALUES(blob(900)); + INSERT INTO t1 SELECT blob(900) FROM t1; /* 4 */ + INSERT INTO t1 SELECT blob(900) FROM t1; /* 8 */ + INSERT INTO t1 SELECT blob(900) FROM t1; /* 16 */ + INSERT INTO t1 SELECT blob(900) FROM t1; /* 32 */ + INSERT INTO t1 SELECT blob(900) FROM t1; /* 64 */ + INSERT INTO t1 SELECT blob(900) FROM t1; /* 128 */ + INSERT INTO t1 SELECT blob(900) FROM t1; /* 256 */ + } + file size test.db +} 0 +do_test wal-9.2 { + sqlite3_wal db2 test.db + execsql {PRAGMA integrity_check } db2 +} {ok} + +do_test wal-9.3 { + file delete -force test2.db test2.db-wal + file copy test.db test2.db + file copy test.db-wal test2.db-wal + sqlite3_wal db3 test2.db + execsql {PRAGMA integrity_check } db3 +} {ok} +db3 close + +do_test wal-9.4 { + execsql { PRAGMA checkpoint } + db2 close + sqlite3_wal db2 test.db + execsql {PRAGMA integrity_check } db2 +} {ok} + +foreach handle {db db2 db3} { catch { $handle close } } +unset handle + +#------------------------------------------------------------------------- +# The following block of tests - wal-10.* - test that the WAL locking +# scheme works in simple cases. This block of tests is run twice. Once +# using multiple connections in the address space of the current process, +# and once with all connections except one running in external processes. +# +foreach code [list { + set ::code2_chan [launch_testfixture] + set ::code3_chan [launch_testfixture] + proc code2 {tcl} { testfixture $::code2_chan $tcl } + proc code3 {tcl} { testfixture $::code3_chan $tcl } + set tn 1 +} { + proc code2 {tcl} { uplevel #0 $tcl } + proc code3 {tcl} { uplevel #0 $tcl } + set tn 2 +}] { + + eval $code + reopen_db + + # Open connections [db2] and [db3]. Depending on which iteration this + # is, the connections may be created in this interpreter, or in + # interpreters running in other OS processes. As such, the [db2] and [db3] + # commands should only be accessed within [code2] and [code3] blocks, + # respectively. + # + code2 { sqlite3 db2 test.db ; db2 eval { PRAGMA journal_mode = WAL } } + code3 { sqlite3 db3 test.db ; db3 eval { PRAGMA journal_mode = WAL } } + + # Shorthand commands. Execute SQL using database connection [db2] or + # [db3]. Return the results. + # + proc sql2 {sql} { code2 [list db2 eval $sql] } + proc sql3 {sql} { code3 [list db3 eval $sql] } + + # Initialize the database schema and contents. + # + do_test wal-10.$tn.1 { + execsql { + CREATE TABLE t1(a, b); + INSERT INTO t1 VALUES(1, 2); + SELECT * FROM t1; + } + } {1 2} + + # Open a transaction and write to the database using [db]. Check that [db2] + # is still able to read the snapshot before the transaction was opened. + # + do_test wal-10.$tn.2 { + execsql { BEGIN; INSERT INTO t1 VALUES(3, 4); } + sql2 {SELECT * FROM t1} + } {1 2} + + # Have [db] commit the transaction. Check that [db2] is now seeing the + # new, updated snapshot. + # + do_test wal-10.$tn.3 { + execsql { COMMIT } + sql2 {SELECT * FROM t1} + } {1 2 3 4} + + # Have [db2] open a read transaction. Then write to the db via [db]. Check + # that [db2] is still seeing the original snapshot. Then read with [db3]. + # [db3] should see the newly committed data. + # + do_test wal-10.$tn.4 { + sql2 { BEGIN ; SELECT * FROM t1} + } {1 2 3 4} + do_test wal-10.$tn.5 { + execsql { INSERT INTO t1 VALUES(5, 6); } + sql2 {SELECT * FROM t1} + } {1 2 3 4} + do_test wal-10.$tn.6 { + sql3 {SELECT * FROM t1} + } {1 2 3 4 5 6} + do_test wal-10.$tn.7 { + sql2 COMMIT + } {} + + # Have [db2] open a write transaction. Then attempt to write to the + # database via [db]. This should fail (writer lock cannot be obtained). + # + # Then open a read-transaction with [db]. Commit the [db2] transaction + # to disk. Verify that [db] still cannot write to the database (because + # it is reading an old snapshot). + # + # Close the current [db] transaction. Open a new one. [db] can now write + # to the database (as it is not locked and [db] is reading the latest + # snapshot). + # + do_test wal-10.$tn.7 { + sql2 { BEGIN; INSERT INTO t1 VALUES(7, 8) ; } + catchsql { INSERT INTO t1 VALUES(9, 10) } + } {1 {database is locked}} + do_test wal-10.$tn.8 { + execsql { BEGIN ; SELECT * FROM t1 } + } {1 2 3 4 5 6} + do_test wal-10.$tn.9 { + sql2 COMMIT + catchsql { INSERT INTO t1 VALUES(9, 10) } + } {1 {database is locked}} + do_test wal-10.$tn.10 { + execsql { COMMIT; BEGIN; INSERT INTO t1 VALUES(9, 10); COMMIT; } + execsql { SELECT * FROM t1 } + } {1 2 3 4 5 6 7 8 9 10} + + # Open a read transaction with [db2]. Check that this prevents [db] from + # checkpointing the database. But not from writing to it. + # + do_test wal-10.$tn.11 { + sql2 { BEGIN; SELECT * FROM t1 } + } {1 2 3 4 5 6 7 8 9 10} + do_test wal-10.$tn.12 { + catchsql { PRAGMA checkpoint } + } {1 {database is locked}} + do_test wal-10.$tn.13 { + execsql { INSERT INTO t1 VALUES(11, 12) } + sql2 {SELECT * FROM t1} + } {1 2 3 4 5 6 7 8 9 10} + + # Connection [db2] is holding a lock on a snapshot, preventing [db] from + # checkpointing the database. Add a busy-handler to [db]. If [db2] completes + # its transaction from within the busy-handler, [db] is able to complete + # the checkpoint operation. + # + proc busyhandler x { + if {$x==4} { sql2 COMMIT } + if {$x<5} { return 0 } + return 1 + } + db busy busyhandler + do_test wal-10.$tn.14 { + execsql { PRAGMA checkpoint } + } {} + + # Similar to the test above. Except this time, a new read transaction is + # started (db3) while the checkpointer is waiting for an old one (db2) to + # finish. The checkpointer can finish, but any subsequent write operations + # must wait until after db3 has closed the read transaction, as db3 is a + # "region D" writer. + # + db busy {} + do_test wal-10.$tn.15 { + sql2 { BEGIN; SELECT * FROM t1; } + } {1 2 3 4 5 6 7 8 9 10 11 12} + do_test wal-10.$tn.16 { + catchsql { PRAGMA checkpoint } + } {1 {database is locked}} + proc busyhandler x { + if {$x==3} { sql3 { BEGIN; SELECT * FROM t1 } } + if {$x==4} { sql2 COMMIT } + if {$x<5} { return 0 } + return 1 + } + db busy busyhandler + do_test wal-10.$tn.17 { + execsql { PRAGMA checkpoint } + } {} + do_test wal-10.$tn.18 { + sql3 { SELECT * FROM t1 } + } {1 2 3 4 5 6 7 8 9 10 11 12} + do_test wal-10.$tn.19 { + catchsql { INSERT INTO t1 VALUES(13, 14) } + } {1 {database is locked}} + do_test wal-10.$tn.20 { + execsql { SELECT * FROM t1 } + } {1 2 3 4 5 6 7 8 9 10 11 12} + do_test wal-10.$tn.21 { + sql3 COMMIT + } {} + do_test wal-10.$tn.22 { + execsql { INSERT INTO t1 VALUES(13, 14) } + execsql { SELECT * FROM t1 } + } {1 2 3 4 5 6 7 8 9 10 11 12 13 14} + + # Set [db3] up as a "region D" reader again. Then upgrade it to a writer + # and back down to a reader. Then, check that a checkpoint is not possible + # (as [db3] still has a snapshot locked). + # + do_test wal-10.$tn.23 { + execsql { PRAGMA checkpoint } + } {} + do_test wal-10.$tn.24 { + sql2 { BEGIN; SELECT * FROM t1; } + } {1 2 3 4 5 6 7 8 9 10 11 12 13 14} + do_test wal-10.$tn.25 { + execsql { PRAGMA checkpoint } + } {} + do_test wal-10.$tn.26 { + catchsql { INSERT INTO t1 VALUES(15, 16) } + } {1 {database is locked}} + do_test wal-10.$tn.27 { + sql3 { INSERT INTO t1 VALUES(15, 16) } + } {} + do_test wal-10.$tn.28 { + code3 { + set ::STMT [sqlite3_prepare db3 "SELECT * FROM t1" -1 TAIL] + sqlite3_step $::STMT + } + sql3 COMMIT + execsql { SELECT * FROM t1 } + } {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16} + db busy {} + do_test wal-10.$tn.29 { + execsql { INSERT INTO t1 VALUES(17, 18) } + catchsql { PRAGMA checkpoint } + } {1 {database is locked}} + do_test wal-10.$tn.30 { + code3 { sqlite3_finalize $::STMT } + execsql { PRAGMA checkpoint } + } {} + + # At one point, if a reader failed to upgrade to a writer because it + # was reading an old snapshot, the write-locks were not being released. + # Test that this bug has been fixed. + # + do_test wal-10.$tn.31 { + execsql { BEGIN ; SELECT * FROM t1 } + sql2 { INSERT INTO t1 VALUES(19, 20) } + catchsql { INSERT INTO t1 VALUES(21, 22) } + } {1 {database is locked}} + do_test wal-10.$tn.32 { + # This statement would fail when the bug was present. + sql2 { INSERT INTO t1 VALUES(21, 22) } + } {} + do_test wal-10.$tn.33 { + execsql { SELECT * FROM t1 ; COMMIT } + } {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18} + do_test wal-10.$tn.34 { + execsql { SELECT * FROM t1 } + } {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22} + + catch { db close } + catch { code2 { db2 close } } + catch { code3 { db3 close } } + catch { close $::code2_chan } + catch { close $::code3_chan } +} + +#------------------------------------------------------------------------- +# This block of tests, wal-11.*, test that nothing goes terribly wrong +# if frames must be written to the log file before a transaction is +# committed (in order to free up memory). +# +do_test wal-11.1 { + reopen_db + execsql { + PRAGMA cache_size = 10; + PRAGMA page_size = 1024; + CREATE TABLE t1(x PRIMARY KEY); + } + list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044] +} {0 3} +do_test wal-11.2 { + execsql { PRAGMA checkpoint } + list [expr [file size test.db]/1024] [file size test.db-wal] +} [list 3 [log_file_size 3 1024]] +do_test wal-11.3 { + execsql { INSERT INTO t1 VALUES( blob(900) ) } + list [expr [file size test.db]/1024] [file size test.db-wal] +} [list 3 [log_file_size 4 1024]] + +do_test wal-11.4 { + execsql { + BEGIN; + INSERT INTO t1 SELECT blob(900) FROM t1; -- 2 + INSERT INTO t1 SELECT blob(900) FROM t1; -- 4 + INSERT INTO t1 SELECT blob(900) FROM t1; -- 8 + INSERT INTO t1 SELECT blob(900) FROM t1; -- 16 + } + list [expr [file size test.db]/1024] [file size test.db-wal] +} [list 3 [log_file_size 32 1024]] +do_test wal-11.5 { + execsql { + SELECT count(*) FROM t1; + PRAGMA integrity_check; + } +} {16 ok} +do_test wal-11.6 { + execsql COMMIT + list [expr [file size test.db]/1024] [file size test.db-wal] +} [list 3 [log_file_size 41 1024]] +do_test wal-11.7 { + execsql { + SELECT count(*) FROM t1; + PRAGMA integrity_check; + } +} {16 ok} +do_test wal-11.8 { + execsql { PRAGMA checkpoint } + list [expr [file size test.db]/1024] [file size test.db-wal] +} [list 37 [log_file_size 41 1024]] +do_test wal-11.9 { + db close + list [expr [file size test.db]/1024] [log_deleted test.db-wal] +} {37 1} +sqlite3_wal db test.db +do_test wal-11.10 { + execsql { + PRAGMA cache_size = 10; + BEGIN; + INSERT INTO t1 SELECT blob(900) FROM t1; -- 32 + SELECT count(*) FROM t1; + } + list [expr [file size test.db]/1024] [file size test.db-wal] +} [list 37 [log_file_size 35 1024]] +do_test wal-11.11 { + execsql { + SELECT count(*) FROM t1; + ROLLBACK; + SELECT count(*) FROM t1; + } +} {32 16} +do_test wal-11.12 { + list [expr [file size test.db]/1024] [file size test.db-wal] +} [list 37 [log_file_size 35 1024]] +do_test wal-11.13 { + execsql { + INSERT INTO t1 VALUES( blob(900) ); + SELECT count(*) FROM t1; + PRAGMA integrity_check; + } +} {17 ok} +do_test wal-11.14 { + list [expr [file size test.db]/1024] [file size test.db-wal] +} [list 37 [log_file_size 35 1024]] + + +#------------------------------------------------------------------------- +# This block of tests, wal-12.*, tests the fix for a problem that +# could occur if a log that is a prefix of an older log is written +# into a reused log file. +# +reopen_db +do_test wal-12.1 { + execsql { + PRAGMA page_size = 1024; + CREATE TABLE t1(x, y); + CREATE TABLE t2(x, y); + INSERT INTO t1 VALUES('A', 1); + } + list [expr [file size test.db]/1024] [file size test.db-wal] +} [list 0 [log_file_size 5 1024]] +do_test wal-12.2 { + db close + sqlite3_wal db test.db + execsql { + UPDATE t1 SET y = 0 WHERE x = 'A'; + } + list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044] +} {3 1} +do_test wal-12.3 { + execsql { INSERT INTO t2 VALUES('B', 1) } + list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044] +} {3 2} + +do_test wal-12.4 { + file copy -force test.db test2.db + file copy -force test.db-wal test2.db-wal + sqlite3_wal db2 test2.db +breakpoint + execsql { SELECT * FROM t2 } db2 +} {B 1} +db2 close + +file copy -force test.db-wal A +do_test wal-12.5 { + execsql { + PRAGMA checkpoint; + UPDATE t2 SET y = 2 WHERE x = 'B'; + PRAGMA checkpoint; + UPDATE t1 SET y = 1 WHERE x = 'A'; + PRAGMA checkpoint; + UPDATE t1 SET y = 0 WHERE x = 'A'; + SELECT * FROM t2; + } +} {B 2} +file copy -force test.db-wal B + +do_test wal-12.4 { + file copy -force test.db test2.db + file copy -force test.db-wal test2.db-wal + sqlite3_wal db2 test2.db + execsql { SELECT * FROM t2 } db2 +} {B 2} +db2 close + + +finish_test + diff --git a/test/walcrash.test b/test/walcrash.test new file mode 100644 index 000000000..dd4c57294 --- /dev/null +++ b/test/walcrash.test @@ -0,0 +1,251 @@ +# 2010 February 8 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing the operation of the library when +# recovering a database following a simulated system failure in +# "PRAGMA journal_mode=WAL" mode. +# + +# +# These are 'warm-body' tests of database recovery used while developing +# the WAL code. They serve to prove that a few really simple cases work: +# +# walcrash-1.*: Recover a database. +# walcrash-2.*: Recover a database where the failed transaction spanned more +# than one page. +# walcrash-3.*: Recover multiple databases where the failed transaction +# was a multi-file transaction. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +db close + +set seed 0 +set REPEATS 100 + +proc sqlite3_wal {args} { + eval sqlite3 $args + [lindex $args 0] eval { PRAGMA journal_mode = wal } +} + +# walcrash-1.* +# +for {set i 1} {$i < $REPEATS} {incr i} { + file delete -force test.db test.db-wal + do_test walcrash-1.$i.1 { + crashsql -delay 4 -file test.db-wal -seed [incr seed] { + PRAGMA journal_mode = WAL; + CREATE TABLE t1(a, b); + INSERT INTO t1 VALUES(1, 1); + INSERT INTO t1 VALUES(2, 3); + INSERT INTO t1 VALUES(3, 6); + } + } {1 {child process exited abnormally}} + do_test walcrash-1.$i.2 { + sqlite3_wal db test.db + execsql { SELECT sum(a)==max(b) FROM t1 } + } {1} + integrity_check walcrash-1.$i.3 + db close + + do_test walcrash-1.$i.4 { + crashsql -delay 2 -file test.db-wal -seed [incr seed] { + PRAGMA journal_mode = WAL; + PRAGMA journal_mode = WAL; + INSERT INTO t1 VALUES(4, (SELECT sum(a) FROM t1) + 4); + INSERT INTO t1 VALUES(5, (SELECT sum(a) FROM t1) + 5); + } + } {1 {child process exited abnormally}} + do_test walcrash-1.$i.5 { + sqlite3_wal db test.db + execsql { SELECT sum(a)==max(b) FROM t1 } + } {1} + integrity_check walcrash-1.$i.6 + db close +} + +# walcrash-2.* +# +for {set i 1} {$i < $REPEATS} {incr i} { + file delete -force test.db test.db-wal + do_test walcrash-2.$i.1 { + crashsql -delay 4 -file test.db-wal -seed [incr seed] { + PRAGMA journal_mode = WAL; + CREATE TABLE t1(a PRIMARY KEY, b); + INSERT INTO t1 VALUES(1, 2); + INSERT INTO t1 VALUES(3, 4); + INSERT INTO t1 VALUES(5, 9); + } + } {1 {child process exited abnormally}} + do_test walcrash-2.$i.2 { + sqlite3_wal db test.db + execsql { SELECT sum(a)==max(b) FROM t1 } + } {1} + integrity_check walcrash-2.$i.3 + db close + + do_test walcrash-2.$i.4 { + crashsql -delay 2 -file test.db-wal -seed [incr seed] { + PRAGMA journal_mode = WAL; + INSERT INTO t1 VALUES(6, (SELECT sum(a) FROM t1) + 6); + INSERT INTO t1 VALUES(7, (SELECT sum(a) FROM t1) + 7); + } + } {1 {child process exited abnormally}} + do_test walcrash-2.$i.5 { + sqlite3_wal db test.db + execsql { SELECT sum(a)==max(b) FROM t1 } + } {1} + integrity_check walcrash-2.$i.6 + db close +} + +# walcrash-3.* +# +# for {set i 1} {$i < $REPEATS} {incr i} { +# file delete -force test.db test.db-wal +# file delete -force test2.db test2.db-wal +# +# do_test walcrash-3.$i.1 { +# crashsql -delay 2 -file test2.db-wal -seed [incr seed] { +# PRAGMA journal_mode = WAL; +# ATTACH 'test2.db' AS aux; +# CREATE TABLE t1(a PRIMARY KEY, b); +# CREATE TABLE aux.t2(a PRIMARY KEY, b); +# BEGIN; +# INSERT INTO t1 VALUES(1, 2); +# INSERT INTO t2 VALUES(1, 2); +# COMMIT; +# } +# } {1 {child process exited abnormally}} +# +# do_test walcrash-3.$i.2 { +# sqlite3_wal db test.db +# execsql { +# ATTACH 'test2.db' AS aux; +# SELECT * FROM t1 EXCEPT SELECT * FROM t2; +# } +# } {} +# do_test walcrash-3.$i.3 { execsql { PRAGMA main.integrity_check } } {ok} +# do_test walcrash-3.$i.4 { execsql { PRAGMA aux.integrity_check } } {ok} +# +# db close +# } + +# walcrash-4.* +# +for {set i 1} {$i < $REPEATS} {incr i} { + file delete -force test.db test.db-wal + file delete -force test2.db test2.db-wal + + do_test walcrash-4.$i.1 { + crashsql -delay 3 -file test.db-wal -seed [incr seed] -blocksize 4096 { + PRAGMA journal_mode = WAL; + PRAGMA page_size = 1024; + CREATE TABLE t1(a PRIMARY KEY, b); + INSERT INTO t1 VALUES(1, 2); + INSERT INTO t1 VALUES(3, 4); + } + } {1 {child process exited abnormally}} + + do_test walcrash-4.$i.2 { + sqlite3_wal db test.db + execsql { + SELECT * FROM t1 WHERE a = 1; + } + } {1 2} + do_test walcrash-4.$i.3 { execsql { PRAGMA main.integrity_check } } {ok} + + db close +} + +# walcrash-5.* +# +for {set i 1} {$i < $REPEATS} {incr i} { + file delete -force test.db test.db-wal + file delete -force test2.db test2.db-wal + + do_test walcrash-5.$i.1 { + crashsql -delay 11 -file test.db-wal -seed [incr seed] -blocksize 4096 { + PRAGMA journal_mode = WAL; + PRAGMA page_size = 1024; + BEGIN; + CREATE TABLE t1(x PRIMARY KEY); + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 4 */ + COMMIT; + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 8 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 12 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 16 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 20 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 24 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 28 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 32 */ + + PRAGMA checkpoint; + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 VALUES(randomblob(900)); + } + } {1 {child process exited abnormally}} + + do_test walcrash-5.$i.2 { + sqlite3_wal db test.db + execsql { SELECT count(*)==33 OR count(*)==34 FROM t1 WHERE x != 1 } + } {1} + do_test walcrash-5.$i.3 { execsql { PRAGMA main.integrity_check } } {ok} + + db close +} + +# walcrash-6.* +# +for {set i 1} {$i < $REPEATS} {incr i} { + file delete -force test.db test.db-wal + file delete -force test2.db test2.db-wal + + do_test walcrash-6.$i.1 { + crashsql -delay 12 -file test.db-wal -seed [incr seed] -blocksize 512 { + PRAGMA journal_mode = WAL; + PRAGMA page_size = 1024; + BEGIN; + CREATE TABLE t1(x PRIMARY KEY); + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 4 */ + COMMIT; + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 8 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 12 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 16 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 20 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 24 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 28 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 32 */ + + PRAGMA checkpoint; + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 VALUES(randomblob(900)); + } + } {1 {child process exited abnormally}} + + do_test walcrash-6.$i.2 { + sqlite3_wal db test.db + execsql { SELECT count(*)==34 OR count(*)==35 FROM t1 WHERE x != 1 } + } {1} + do_test walcrash-6.$i.3 { execsql { PRAGMA main.integrity_check } } {ok} + + db close +} + +finish_test + diff --git a/test/walslow.test b/test/walslow.test new file mode 100644 index 000000000..73f93a48d --- /dev/null +++ b/test/walslow.test @@ -0,0 +1,71 @@ +# 2010 March 17 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing the operation of the library in +# "PRAGMA journal_mode=WAL" mode. The tests in this file use +# brute force methods, so may take a while to run. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +proc reopen_db {} { + catch { db close } + file delete -force test.db test.db-wal + sqlite3 db test.db + execsql { PRAGMA journal_mode = wal } +} + +db close +save_prng_state +for {set seed 1} {$seed<10} {incr seed} { + expr srand($seed) + restore_prng_state + reopen_db + do_test walslow-1.seed=$seed.0 { + execsql { CREATE TABLE t1(a, b) } + execsql { CREATE INDEX i1 ON t1(a) } + execsql { CREATE INDEX i2 ON t1(b) } + } {} + + for {set iTest 1} {$iTest < 100} {incr iTest} { + + do_test walslow-1.seed=$seed.$iTest.1 { + set w [expr int(rand()*2000)] + set x [expr int(rand()*2000)] + execsql { INSERT INTO t1 VALUES(randomblob($w), randomblob($x)) } + execsql { PRAGMA integrity_check } + } {ok} + + do_test walslow-1.seed=$seed.$iTest.2 { + execsql "PRAGMA checkpoint;" + execsql { PRAGMA integrity_check } + } {ok} + + do_test walslow-1.seed=$seed.$iTest.3 { + file delete -force testX.db testX.db-wal + file copy test.db testX.db + file copy test.db-wal testX.db-wal + + sqlite3 db2 testX.db + execsql { PRAGMA journal_mode = WAL } db2 + execsql { PRAGMA integrity_check } db2 + } {ok} + + do_test walslow-1.seed=$seed.$iTest.4 { + execsql { SELECT count(*) FROM t1 WHERE a!=b } db2 + } [execsql { SELECT count(*) FROM t1 WHERE a!=b }] + db2 close + } +} + + +finish_test diff --git a/test/walthread.test b/test/walthread.test new file mode 100644 index 000000000..08219a7f2 --- /dev/null +++ b/test/walthread.test @@ -0,0 +1,198 @@ +# 2010 April 13 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing the operation of the library in +# "PRAGMA journal_mode=WAL" mode. +# + +set testdir [file dirname $argv0] + +source $testdir/tester.tcl +if {[run_thread_tests]==0} { finish_test ; return } + +do_test walthread-1.1 { + execsql { + PRAGMA journal_mode = WAL; + PRAGMA lock_status; + CREATE TABLE t1(x PRIMARY KEY); + PRAGMA lock_status; + INSERT INTO t1 VALUES(randomblob(100)); + INSERT INTO t1 VALUES(randomblob(100)); + INSERT INTO t1 SELECT md5sum(x) FROM t1; + } +} {wal main unlocked temp closed main shared temp closed} +do_test walthread-1.2 { + execsql { + SELECT (SELECT count(*) FROM t1), ( + SELECT md5sum(x) FROM t1 WHERE oid != (SELECT max(oid) FROM t1) + ) == ( + SELECT x FROM t1 WHERE oid = (SELECT max(oid) FROM t1) + ) + } +} {3 1} +do_test walthread-1.3 { + execsql { PRAGMA integrity_check } +} {ok} +do_test walthread-1.4 { + execsql { PRAGMA lock_status } +} {main shared temp unknown} + +#-------------------------------------------------------------------------- +# Start N threads. Each thread performs both read and write transactions. +# Each read transaction consists of: +# +# 1) Reading the md5sum of all but the last table row, +# 2) Running integrity check. +# 3) Reading the value stored in the last table row, +# 4) Check that the values read in steps 1 and 3 are the same, and that +# the md5sum of all but the last table row has not changed. +# +# Each write transaction consists of: +# +# 1) Modifying the contents of t1 (inserting, updating, deleting rows). +# 2) Appending a new row to the table containing the md5sum() of all +# rows in the table. +# +# Each of the N threads runs N read transactions followed by a single write +# transaction in a loop as fast as possible. +# +# Ther is also a single checkpointer thread. It runs the following loop: +# +# 1) Execute "CHECKPOINT main 32 -1 1" +# 2) Sleep for 500 ms. +# + +set thread_program { + proc rest {ms} { + set ::rest 0 + after $ms {set ::rest 1} + vwait ::rest + } + + proc dosql {DB sql} { + set res "" + set stmt [sqlite3_prepare_v2 $DB $sql -1 dummy_tail] + set rc [sqlite3_step $stmt] + if {$rc eq "SQLITE_ROW"} { + set res [sqlite3_column_text $stmt 0] + } + set rc [sqlite3_finalize $stmt] + + if {$rc ne "SQLITE_OK"} { + error "$rc: [sqlite3_errmsg $DB]" + } + return $res + } + + proc read_transaction {DB} { + dosql $DB BEGIN + + set md5_1 [dosql $DB { + SELECT md5sum(x) FROM t1 WHERE rowid != (SELECT max(rowid) FROM t1) + }] + set check [dosql $DB { PRAGMA integrity_check }] + set md5_2 [dosql $DB { + SELECT x FROM t1 WHERE rowid = (SELECT max(rowid) FROM t1) + }] + set md5_3 [dosql $DB { + SELECT md5sum(x) FROM t1 WHERE rowid != (SELECT max(rowid) FROM t1) + }] + + dosql $DB COMMIT + + if {$check ne "ok" + || $md5_1 ne $md5_2 + || $md5_2 ne $md5_3 + } { + error "Failed read transaction $check $md5_1 $md5_2 $md5_3" + } + } + + proc write_transaction {DB} { + dosql $DB BEGIN + dosql $DB "INSERT INTO t1 VALUES(randomblob(100))" + dosql $DB "INSERT INTO t1 VALUES(randomblob(100))" + dosql $DB "INSERT INTO t1 SELECT md5sum(x) FROM t1" + dosql $DB COMMIT + } + + proc checkpointer {DB} { + while { !$::finished } { + dosql $DB "PRAGMA checkpoint" + rest 1000 + } + } + + proc worker {DB N} { + set j 0 + while { !$::finished } { + for {set i 0} {$i < $N} {incr i} { read_transaction $DB } + write_transaction $DB + rest 1 + } + } + + set ::finished 0 + after [expr $seconds*1000] {set ::finished 1} + + set ::DB [sqlthread open test.db] + dosql $::DB { PRAGMA journal_mode = WAL } + + + set rc [catch { + if {$role eq "worker"} { worker $DB $N } + if {$role eq "checkpointer"} { checkpointer $DB } + } msg] + + sqlite3_close $::DB + + if {$rc==0} { set msg OK } + set msg +} + +set NTHREAD 6 +set SECONDS 30 + +#set prg "set N $NTHREAD ; set seconds $SECONDS" +set prg "set N 1 ; set seconds $SECONDS" + +array unset finished +for {set i 0} {$i < $NTHREAD} {incr i} { + thread_spawn finished($i) {set role worker} $prg $thread_program +} +thread_spawn finished(C) {set role checkpointer} $prg $thread_program +#set finished(C) 1 + +puts "... test runs for approximately $SECONDS seconds ..." +for {set i 0} {$i < $::NTHREAD} {incr i} { + if {![info exists finished($i)]} { + vwait finished($i) + } + do_test walthread-2.$i { + set ::finished($i) + } OK +} +do_test walthread-2.C { + if {![info exists finished(C)]} { vwait finished(C) } + set ::finished(C) +} OK + +set logsize 0 + +set rows [execsql { SELECT count(*) FROM t1 }] +catch { set logsize [expr [file size test.db-wal] / 1024] } +set dbsize [expr [file size test.db] / 1024] + +puts "rows=$rows db=${dbsize}K log=${logsize}K" + +finish_test + + diff --git a/tool/mksqlite3c.tcl b/tool/mksqlite3c.tcl index 38cee50ec..12e4a5ce0 100644 --- a/tool/mksqlite3c.tcl +++ b/tool/mksqlite3c.tcl @@ -93,6 +93,7 @@ foreach hdr { hash.h hwtime.h keywordhash.h + log.h mutex.h opcodes.h os_common.h @@ -243,6 +244,7 @@ foreach file { pcache.c pcache1.c rowset.c + log.c pager.c btmutex.c |