diff options
Diffstat (limited to 'src/wal.c')
-rw-r--r-- | src/wal.c | 136 |
1 files changed, 107 insertions, 29 deletions
@@ -2004,6 +2004,19 @@ static int walIteratorInit(Wal *pWal, u32 nBackfill, WalIterator **pp){ } #ifdef SQLITE_ENABLE_SETLK_TIMEOUT + + +/* +** Attempt to enable blocking locks that block for nMs ms. Return 1 if +** blocking locks are successfully enabled, or 0 otherwise. +*/ +static int walEnableBlockingMs(Wal *pWal, int nMs){ + int rc = sqlite3OsFileControl( + pWal->pDbFd, SQLITE_FCNTL_LOCK_TIMEOUT, (void*)&nMs + ); + return (rc==SQLITE_OK); +} + /* ** Attempt to enable blocking locks. Blocking locks are enabled only if (a) ** they are supported by the VFS, and (b) the database handle is configured @@ -2015,11 +2028,7 @@ static int walEnableBlocking(Wal *pWal){ if( pWal->db ){ int tmout = pWal->db->busyTimeout; if( tmout ){ - int rc; - rc = sqlite3OsFileControl( - pWal->pDbFd, SQLITE_FCNTL_LOCK_TIMEOUT, (void*)&tmout - ); - res = (rc==SQLITE_OK); + res = walEnableBlockingMs(pWal, tmout); } } return res; @@ -2068,20 +2077,10 @@ void sqlite3WalDb(Wal *pWal, sqlite3 *db){ pWal->db = db; } -/* -** Take an exclusive WRITE lock. Blocking if so configured. -*/ -static int walLockWriter(Wal *pWal){ - int rc; - walEnableBlocking(pWal); - rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1); - walDisableBlocking(pWal); - return rc; -} #else # define walEnableBlocking(x) 0 # define walDisableBlocking(x) -# define walLockWriter(pWal) walLockExclusive((pWal), WAL_WRITE_LOCK, 1) +# define walEnableBlockingMs(pWal, ms) 0 # define sqlite3WalDb(pWal, db) #endif /* ifdef SQLITE_ENABLE_SETLK_TIMEOUT */ @@ -2682,7 +2681,9 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){ } }else{ int bWriteLock = pWal->writeLock; - if( bWriteLock || SQLITE_OK==(rc = walLockWriter(pWal)) ){ + if( bWriteLock + || SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1)) + ){ pWal->writeLock = 1; if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){ badHdr = walIndexTryHdr(pWal, pChanged); @@ -2690,7 +2691,8 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){ /* If the wal-index header is still malformed even while holding ** a WRITE lock, it can only mean that the header is corrupted and ** needs to be reconstructed. So run recovery to do exactly that. - */ + ** Disable blocking locks first. */ + walDisableBlocking(pWal); rc = walIndexRecover(pWal); *pChanged = 1; } @@ -2901,6 +2903,37 @@ static int walBeginShmUnreliable(Wal *pWal, int *pChanged){ } /* +** The final argument passed to walTryBeginRead() is of type (int*). The +** caller should invoke walTryBeginRead as follows: +** +** int cnt = 0; +** do { +** rc = walTryBeginRead(..., &cnt); +** }while( rc==WAL_RETRY ); +** +** The final value of "cnt" is of no use to the caller. It is used by +** the implementation of walTryBeginRead() as follows: +** +** + Each time walTryBeginRead() is called, it is incremented. Once +** it reaches WAL_RETRY_PROTOCOL_LIMIT - indicating that walTryBeginRead() +** has many times been invoked and failed with WAL_RETRY - walTryBeginRead() +** returns SQLITE_PROTOCOL. +** +** + If SQLITE_ENABLE_SETLK_TIMEOUT is defined and walTryBeginRead() failed +** because a blocking lock timed out (SQLITE_BUSY_TIMEOUT from the OS +** layer), the WAL_RETRY_BLOCKED_MASK bit is set in "cnt". In this case +** the next invocation of walTryBeginRead() may omit an expected call to +** sqlite3OsSleep(). There has already been a delay when the previous call +** waited on a lock. +*/ +#define WAL_RETRY_PROTOCOL_LIMIT 100 +#ifdef SQLITE_ENABLE_SETLK_TIMEOUT +# define WAL_RETRY_BLOCKED_MASK 0x10000000 +#else +# define WAL_RETRY_BLOCKED_MASK 0 +#endif + +/* ** Attempt to start a read transaction. This might fail due to a race or ** other transient condition. When that happens, it returns WAL_RETRY to ** indicate to the caller that it is safe to retry immediately. @@ -2950,13 +2983,16 @@ static int walBeginShmUnreliable(Wal *pWal, int *pChanged){ ** so it takes care to hold an exclusive lock on the corresponding ** WAL_READ_LOCK() while changing values. */ -static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ +static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int *pCnt){ volatile WalCkptInfo *pInfo; /* Checkpoint information in wal-index */ u32 mxReadMark; /* Largest aReadMark[] value */ int mxI; /* Index of largest aReadMark[] value */ int i; /* Loop counter */ int rc = SQLITE_OK; /* Return code */ u32 mxFrame; /* Wal frame to lock to */ +#ifdef SQLITE_ENABLE_SETLK_TIMEOUT + int nBlockTmout = 0; +#endif assert( pWal->readLock<0 ); /* Not currently locked */ @@ -2980,14 +3016,34 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ ** so that on the 100th (and last) RETRY we delay for 323 milliseconds. ** The total delay time before giving up is less than 10 seconds. */ - if( cnt>5 ){ + (*pCnt)++; + if( *pCnt>5 ){ int nDelay = 1; /* Pause time in microseconds */ - if( cnt>100 ){ + int cnt = (*pCnt & ~WAL_RETRY_BLOCKED_MASK); + if( cnt>WAL_RETRY_PROTOCOL_LIMIT ){ VVA_ONLY( pWal->lockError = 1; ) return SQLITE_PROTOCOL; } - if( cnt>=10 ) nDelay = (cnt-9)*(cnt-9)*39; + if( *pCnt>=10 ) nDelay = (cnt-9)*(cnt-9)*39; +#ifdef SQLITE_ENABLE_SETLK_TIMEOUT + /* In SQLITE_ENABLE_SETLK_TIMEOUT builds, configure the file-descriptor + ** to block for locks for approximately nDelay us. This affects three + ** locks: (a) the shared lock taken on the DMS slot in os_unix.c (if + ** using os_unix.c), (b) the WRITER lock taken in walIndexReadHdr() if the + ** first attempted read fails, and (c) the shared lock taken on the + ** read-mark. + ** + ** If the previous call failed due to an SQLITE_BUSY_TIMEOUT error, + ** then sleep for the minimum of 1us. The previous call already provided + ** an extra delay while it was blocking on the lock. + */ + nBlockTmout = (nDelay+998) / 1000; + if( !useWal && walEnableBlockingMs(pWal, nBlockTmout) ){ + if( *pCnt & WAL_RETRY_BLOCKED_MASK ) nDelay = 1; + } +#endif sqlite3OsSleep(pWal->pVfs, nDelay); + *pCnt &= ~WAL_RETRY_BLOCKED_MASK; } if( !useWal ){ @@ -2995,6 +3051,13 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ if( pWal->bShmUnreliable==0 ){ rc = walIndexReadHdr(pWal, pChanged); } +#ifdef SQLITE_ENABLE_SETLK_TIMEOUT + walDisableBlocking(pWal); + if( rc==SQLITE_BUSY_TIMEOUT ){ + rc = SQLITE_BUSY; + *pCnt |= WAL_RETRY_BLOCKED_MASK; + } +#endif if( rc==SQLITE_BUSY ){ /* If there is not a recovery running in another thread or process ** then convert BUSY errors to WAL_RETRY. If recovery is known to @@ -3109,9 +3172,19 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTINIT; } + (void)walEnableBlockingMs(pWal, nBlockTmout); rc = walLockShared(pWal, WAL_READ_LOCK(mxI)); + walDisableBlocking(pWal); if( rc ){ - return rc==SQLITE_BUSY ? WAL_RETRY : rc; +#ifdef SQLITE_ENABLE_SETLK_TIMEOUT + if( rc==SQLITE_BUSY_TIMEOUT ){ + *pCnt |= WAL_RETRY_BLOCKED_MASK; + } +#else + assert( rc!=SQLITE_BUSY_TIMEOUT ); +#endif + assert( (rc&0xFF)!=SQLITE_BUSY||rc==SQLITE_BUSY||rc==SQLITE_BUSY_TIMEOUT ); + return (rc&0xFF)==SQLITE_BUSY ? WAL_RETRY : rc; } /* Now that the read-lock has been obtained, check that neither the ** value in the aReadMark[] array or the contents of the wal-index @@ -3299,7 +3372,7 @@ static int walBeginReadTransaction(Wal *pWal, int *pChanged){ #endif do{ - rc = walTryBeginRead(pWal, pChanged, 0, ++cnt); + rc = walTryBeginRead(pWal, pChanged, 0, &cnt); }while( rc==WAL_RETRY ); testcase( (rc&0xff)==SQLITE_BUSY ); testcase( (rc&0xff)==SQLITE_IOERR ); @@ -3480,6 +3553,7 @@ static int walFindFrame( iRead = iFrame; } if( (nCollide--)==0 ){ + *piRead = 0; return SQLITE_CORRUPT_BKPT; } iKey = walNextHash(iKey); @@ -3783,7 +3857,7 @@ static int walRestartLog(Wal *pWal){ cnt = 0; do{ int notUsed; - rc = walTryBeginRead(pWal, ¬Used, 1, ++cnt); + rc = walTryBeginRead(pWal, ¬Used, 1, &cnt); }while( rc==WAL_RETRY ); assert( (rc&0xff)!=SQLITE_BUSY ); /* BUSY not possible when useWal==1 */ testcase( (rc&0xff)==SQLITE_IOERR ); @@ -4204,10 +4278,9 @@ int sqlite3WalCheckpoint( if( pWal->readOnly ) return SQLITE_READONLY; WALTRACE(("WAL%p: checkpoint begins\n", pWal)); - /* Enable blocking locks, if possible. If blocking locks are successfully - ** enabled, set xBusy2=0 so that the busy-handler is never invoked. */ + /* Enable blocking locks, if possible. */ sqlite3WalDb(pWal, db); - (void)walEnableBlocking(pWal); + if( xBusy2 ) (void)walEnableBlocking(pWal); /* IMPLEMENTATION-OF: R-62028-47212 All calls obtain an exclusive ** "checkpoint" lock on the database file. @@ -4248,9 +4321,14 @@ int sqlite3WalCheckpoint( /* Read the wal-index header. */ SEH_TRY { if( rc==SQLITE_OK ){ + /* For a passive checkpoint, do not re-enable blocking locks after + ** reading the wal-index header. A passive checkpoint should not block + ** or invoke the busy handler. The only lock such a checkpoint may + ** attempt to obtain is a lock on a read-slot, and it should give up + ** immediately and do a partial checkpoint if it cannot obtain it. */ walDisableBlocking(pWal); rc = walIndexReadHdr(pWal, &isChanged); - (void)walEnableBlocking(pWal); + if( eMode2!=SQLITE_CHECKPOINT_PASSIVE ) (void)walEnableBlocking(pWal); if( isChanged && pWal->pDbFd->pMethods->iVersion>=3 ){ sqlite3OsUnfetch(pWal->pDbFd, 0, 0); } |