diff options
author | dan <dan@noemail.net> | 2015-01-01 18:03:49 +0000 |
---|---|---|
committer | dan <dan@noemail.net> | 2015-01-01 18:03:49 +0000 |
commit | 37db72f1f7ca41c61b1c084bdb75f3111ceba6f8 (patch) | |
tree | 5a01dcb28b6513353a64e93b11d6243845e69bc7 /src/wal.c | |
parent | 6024772ba292a9abc6810dd0b12767d02b47ccf1 (diff) | |
parent | a0de826c9ff84bd19de76ebbc0d4bdafa9686d3a (diff) | |
download | sqlite-37db72f1f7ca41c61b1c084bdb75f3111ceba6f8.tar.gz sqlite-37db72f1f7ca41c61b1c084bdb75f3111ceba6f8.zip |
Merge latest trunk changes with this branch.
FossilOrigin-Name: 4b3651677e7132c4c45605bc1f216fc08ef31198
Diffstat (limited to 'src/wal.c')
-rw-r--r-- | src/wal.c | 146 |
1 files changed, 99 insertions, 47 deletions
@@ -574,7 +574,7 @@ static volatile WalIndexHdr *walIndexHdr(Wal *pWal){ ** The argument to this macro must be of type u32. On a little-endian ** architecture, it returns the u32 value that results from interpreting ** the 4 bytes as a big-endian value. On a big-endian architecture, it -** returns the value that would be produced by intepreting the 4 bytes +** returns the value that would be produced by interpreting the 4 bytes ** of the input value as a little-endian integer. */ #define BYTESWAP32(x) ( \ @@ -988,7 +988,7 @@ static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){ assert( idx <= HASHTABLE_NSLOT/2 + 1 ); /* If this is the first entry to be added to this hash-table, zero the - ** entire hash table and aPgno[] array before proceding. + ** entire hash table and aPgno[] array before proceeding. */ if( idx==1 ){ int nByte = (int)((u8 *)&aHash[HASHTABLE_NSLOT] - (u8 *)&aPgno[1]); @@ -1504,7 +1504,7 @@ static void walMergesort( ** Free an iterator allocated by walIteratorInit(). */ static void walIteratorFree(WalIterator *p){ - sqlite3ScratchFree(p); + sqlite3_free(p); } /* @@ -1539,7 +1539,7 @@ static int walIteratorInit(Wal *pWal, WalIterator **pp){ nByte = sizeof(WalIterator) + (nSegment-1)*sizeof(struct WalSegment) + iLast*sizeof(ht_slot); - p = (WalIterator *)sqlite3ScratchMalloc(nByte); + p = (WalIterator *)sqlite3_malloc(nByte); if( !p ){ return SQLITE_NOMEM; } @@ -1549,7 +1549,7 @@ static int walIteratorInit(Wal *pWal, WalIterator **pp){ /* Allocate temporary space used by the merge-sort routine. This block ** of memory will be freed before this function returns. */ - aTmp = (ht_slot *)sqlite3ScratchMalloc( + aTmp = (ht_slot *)sqlite3_malloc( sizeof(ht_slot) * (iLast>HASHTABLE_NPAGE?HASHTABLE_NPAGE:iLast) ); if( !aTmp ){ @@ -1586,7 +1586,7 @@ static int walIteratorInit(Wal *pWal, WalIterator **pp){ p->aSegment[i].aPgno = (u32 *)aPgno; } } - sqlite3ScratchFree(aTmp); + sqlite3_free(aTmp); if( rc!=SQLITE_OK ){ walIteratorFree(p); @@ -1624,6 +1624,38 @@ static int walPagesize(Wal *pWal){ } /* +** The following is guaranteed when this function is called: +** +** a) the WRITER lock is held, +** b) the entire log file has been checkpointed, and +** c) any existing readers are reading exclusively from the database +** file - there are no readers that may attempt to read a frame from +** the log file. +** +** This function updates the shared-memory structures so that the next +** client to write to the database (which may be this one) does so by +** writing frames into the start of the log file. +** +** The value of parameter salt1 is used as the aSalt[1] value in the +** new wal-index header. It should be passed a pseudo-random value (i.e. +** one obtained from sqlite3_randomness()). +*/ +static void walRestartHdr(Wal *pWal, u32 salt1){ + volatile WalCkptInfo *pInfo = walCkptInfo(pWal); + int i; /* Loop counter */ + u32 *aSalt = pWal->hdr.aSalt; /* Big-endian salt values */ + pWal->nCkpt++; + pWal->hdr.mxFrame = 0; + sqlite3Put4byte((u8*)&aSalt[0], 1 + sqlite3Get4byte((u8*)&aSalt[0])); + memcpy(&pWal->hdr.aSalt[1], &salt1, 4); + walIndexWriteHdr(pWal); + pInfo->nBackfill = 0; + pInfo->aReadMark[1] = 0; + for(i=2; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED; + assert( pInfo->aReadMark[0]==0 ); +} + +/* ** Copy as much content as we can from the WAL back into the database file ** in response to an sqlite3_wal_checkpoint() request or the equivalent. ** @@ -1646,7 +1678,7 @@ static int walPagesize(Wal *pWal){ ** database file. ** ** This routine uses and updates the nBackfill field of the wal-index header. -** This is the only routine tha will increase the value of nBackfill. +** This is the only routine that will increase the value of nBackfill. ** (A WAL reset or recovery will revert nBackfill to zero, but not increase ** its value.) ** @@ -1657,7 +1689,7 @@ static int walPagesize(Wal *pWal){ static int walCheckpoint( Wal *pWal, /* Wal connection */ int eMode, /* One of PASSIVE, FULL or RESTART */ - int (*xBusyCall)(void*), /* Function to call when busy */ + int (*xBusy)(void*), /* Function to call when busy */ void *pBusyArg, /* Context argument for xBusyHandler */ int sync_flags, /* Flags for OsSync() (or 0) */ u8 *zBuf /* Temporary buffer to use */ @@ -1671,7 +1703,6 @@ static int walCheckpoint( u32 mxPage; /* Max database page to write */ int i; /* Loop counter */ volatile WalCkptInfo *pInfo; /* The checkpoint status information */ - int (*xBusy)(void*) = 0; /* Function to call when waiting for locks */ szPage = walPagesize(pWal); testcase( szPage<=32768 ); @@ -1686,7 +1717,9 @@ static int walCheckpoint( } assert( pIter ); - if( eMode!=SQLITE_CHECKPOINT_PASSIVE ) xBusy = xBusyCall; + /* EVIDENCE-OF: R-62920-47450 The busy-handler callback is never invoked + ** in the SQLITE_CHECKPOINT_PASSIVE mode. */ + assert( eMode!=SQLITE_CHECKPOINT_PASSIVE || xBusy==0 ); /* Compute in mxSafeFrame the index of the last frame of the WAL that is ** safe to write into the database. Frames beyond mxSafeFrame might @@ -1775,19 +1808,38 @@ static int walCheckpoint( rc = SQLITE_OK; } - /* If this is an SQLITE_CHECKPOINT_RESTART operation, and the entire wal - ** file has been copied into the database file, then block until all - ** readers have finished using the wal file. This ensures that the next - ** process to write to the database restarts the wal file. + /* If this is an SQLITE_CHECKPOINT_RESTART or TRUNCATE operation, and the + ** entire wal file has been copied into the database file, then block + ** until all readers have finished using the wal file. This ensures that + ** the next process to write to the database restarts the wal file. */ if( rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){ assert( pWal->writeLock ); if( pInfo->nBackfill<pWal->hdr.mxFrame ){ rc = SQLITE_BUSY; - }else if( eMode==SQLITE_CHECKPOINT_RESTART ){ + }else if( eMode>=SQLITE_CHECKPOINT_RESTART ){ + u32 salt1; + sqlite3_randomness(4, &salt1); assert( mxSafeFrame==pWal->hdr.mxFrame ); rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(1), WAL_NREADER-1); if( rc==SQLITE_OK ){ + if( eMode==SQLITE_CHECKPOINT_TRUNCATE ){ + /* IMPLEMENTATION-OF: R-44699-57140 This mode works the same way as + ** SQLITE_CHECKPOINT_RESTART with the addition that it also + ** truncates the log file to zero bytes just prior to a + ** successful return. + ** + ** In theory, it might be safe to do this without updating the + ** wal-index header in shared memory, as all subsequent reader or + ** writer clients should see that the entire log file has been + ** checkpointed and behave accordingly. This seems unsafe though, + ** as it would leave the system in a state where the contents of + ** the wal-index header do not match the contents of the + ** file-system. To avoid this, update the wal-index header to + ** indicate that the log file contains zero valid frames. */ + walRestartHdr(pWal, salt1); + rc = sqlite3OsTruncate(pWal->pWalFd, 0); + } walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); } } @@ -1950,7 +2002,7 @@ static int walIndexTryHdr(Wal *pWal, int *pChanged){ ** wal-index from the WAL before returning. ** ** Set *pChanged to 1 if the wal-index header value in pWal->hdr is -** changed by this opertion. If pWal->hdr is unchanged, set *pChanged +** changed by this operation. If pWal->hdr is unchanged, set *pChanged ** to 0. ** ** If the wal-index header is successfully read, return SQLITE_OK. @@ -2154,7 +2206,7 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ ** may have been appended to the log before READ_LOCK(0) was obtained. ** When holding READ_LOCK(0), the reader ignores the entire log file, ** which implies that the database file contains a trustworthy - ** snapshoT. Since holding READ_LOCK(0) prevents a checkpoint from + ** snapshot. Since holding READ_LOCK(0) prevents a checkpoint from ** happening, this is usually correct. ** ** However, if frames have been appended to the log (or if the log @@ -2360,7 +2412,7 @@ int sqlite3WalFindFrame( for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){ u32 iFrame = aHash[iKey] + iZero; if( iFrame<=iLast && aPgno[aHash[iKey]]==pgno ){ - /* assert( iFrame>iRead ); -- not true if there is corruption */ + assert( iFrame>iRead || CORRUPT_DB ); iRead = iFrame; } if( (nCollide--)==0 ){ @@ -2525,7 +2577,6 @@ int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){ } if( iMax!=pWal->hdr.mxFrame ) walCleanupHash(pWal); } - assert( rc==SQLITE_OK ); return rc; } @@ -2574,7 +2625,6 @@ int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){ return rc; } - /* ** This function is called just before writing a set of frames to the log ** file (see sqlite3WalFrames()). It checks to see if, instead of appending @@ -2607,20 +2657,8 @@ static int walRestartLog(Wal *pWal){ ** In theory it would be Ok to update the cache of the header only ** at this point. But updating the actual wal-index header is also ** safe and means there is no special case for sqlite3WalUndo() - ** to handle if this transaction is rolled back. - */ - int i; /* Loop counter */ - u32 *aSalt = pWal->hdr.aSalt; /* Big-endian salt values */ - - pWal->nCkpt++; - pWal->hdr.mxFrame = 0; - sqlite3Put4byte((u8*)&aSalt[0], 1 + sqlite3Get4byte((u8*)&aSalt[0])); - aSalt[1] = salt1; - walIndexWriteHdr(pWal); - pInfo->nBackfill = 0; - pInfo->aReadMark[1] = 0; - for(i=2; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED; - assert( pInfo->aReadMark[0]==0 ); + ** to handle if this transaction is rolled back. */ + walRestartHdr(pWal, salt1); walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); }else if( rc!=SQLITE_BUSY ){ return rc; @@ -2822,7 +2860,7 @@ int sqlite3WalFrames( ** ** Padding and syncing only occur if this set of frames complete a ** transaction and if PRAGMA synchronous=FULL. If synchronous==NORMAL - ** or synchonous==OFF, then no padding or syncing are needed. + ** or synchronous==OFF, then no padding or syncing are needed. ** ** If SQLITE_IOCAP_POWERSAFE_OVERWRITE is defined, then padding is not ** needed and only the sync is done. If padding is needed, then the @@ -2908,7 +2946,7 @@ int sqlite3WalFrames( */ int sqlite3WalCheckpoint( Wal *pWal, /* Wal connection */ - int eMode, /* PASSIVE, FULL or RESTART */ + int eMode, /* PASSIVE, FULL, RESTART, or TRUNCATE */ int (*xBusy)(void*), /* Function to call when busy */ void *pBusyArg, /* Context argument for xBusyHandler */ int sync_flags, /* Flags to sync db file with (or 0) */ @@ -2920,29 +2958,42 @@ int sqlite3WalCheckpoint( int rc; /* Return code */ int isChanged = 0; /* True if a new wal-index header is loaded */ int eMode2 = eMode; /* Mode to pass to walCheckpoint() */ + int (*xBusy2)(void*) = xBusy; /* Busy handler for eMode2 */ assert( pWal->ckptLock==0 ); assert( pWal->writeLock==0 ); + /* EVIDENCE-OF: R-62920-47450 The busy-handler callback is never invoked + ** in the SQLITE_CHECKPOINT_PASSIVE mode. */ + assert( eMode!=SQLITE_CHECKPOINT_PASSIVE || xBusy==0 ); + if( pWal->readOnly ) return SQLITE_READONLY; WALTRACE(("WAL%p: checkpoint begins\n", pWal)); + + /* IMPLEMENTATION-OF: R-62028-47212 All calls obtain an exclusive + ** "checkpoint" lock on the database file. */ rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1); if( rc ){ - /* Usually this is SQLITE_BUSY meaning that another thread or process - ** is already running a checkpoint, or maybe a recovery. But it might - ** also be SQLITE_IOERR. */ + /* EVIDENCE-OF: R-10421-19736 If any other process is running a + ** checkpoint operation at the same time, the lock cannot be obtained and + ** SQLITE_BUSY is returned. + ** EVIDENCE-OF: R-53820-33897 Even if there is a busy-handler configured, + ** it will not be invoked in this case. + */ + testcase( rc==SQLITE_BUSY ); + testcase( xBusy!=0 ); return rc; } pWal->ckptLock = 1; - /* If this is a blocking-checkpoint, then obtain the write-lock as well - ** to prevent any writers from running while the checkpoint is underway. - ** This has to be done before the call to walIndexReadHdr() below. + /* IMPLEMENTATION-OF: R-59782-36818 The SQLITE_CHECKPOINT_FULL, RESTART and + ** TRUNCATE modes also obtain the exclusive "writer" lock on the database + ** file. ** - ** If the writer lock cannot be obtained, then a passive checkpoint is - ** run instead. Since the checkpointer is not holding the writer lock, - ** there is no point in blocking waiting for any readers. Assuming no - ** other error occurs, this function will return SQLITE_BUSY to the caller. + ** EVIDENCE-OF: R-60642-04082 If the writer lock cannot be obtained + ** immediately, and a busy-handler is configured, it is invoked and the + ** writer lock retried until either the busy-handler returns 0 or the + ** lock is successfully obtained. */ if( eMode!=SQLITE_CHECKPOINT_PASSIVE ){ rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_WRITE_LOCK, 1); @@ -2950,6 +3001,7 @@ int sqlite3WalCheckpoint( pWal->writeLock = 1; }else if( rc==SQLITE_BUSY ){ eMode2 = SQLITE_CHECKPOINT_PASSIVE; + xBusy2 = 0; rc = SQLITE_OK; } } @@ -2967,7 +3019,7 @@ int sqlite3WalCheckpoint( if( pWal->hdr.mxFrame && walPagesize(pWal)!=nBuf ){ rc = SQLITE_CORRUPT_BKPT; }else{ - rc = walCheckpoint(pWal, eMode2, xBusy, pBusyArg, sync_flags, zBuf); + rc = walCheckpoint(pWal, eMode2, xBusy2, pBusyArg, sync_flags, zBuf); } /* If no error occurred, set the output variables. */ |