aboutsummaryrefslogtreecommitdiff
path: root/src/wal.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/wal.c')
-rw-r--r--src/wal.c136
1 files changed, 107 insertions, 29 deletions
diff --git a/src/wal.c b/src/wal.c
index d63c13ffc..fd2eabfd9 100644
--- a/src/wal.c
+++ b/src/wal.c
@@ -2004,6 +2004,19 @@ static int walIteratorInit(Wal *pWal, u32 nBackfill, WalIterator **pp){
}
#ifdef SQLITE_ENABLE_SETLK_TIMEOUT
+
+
+/*
+** Attempt to enable blocking locks that block for nMs ms. Return 1 if
+** blocking locks are successfully enabled, or 0 otherwise.
+*/
+static int walEnableBlockingMs(Wal *pWal, int nMs){
+ int rc = sqlite3OsFileControl(
+ pWal->pDbFd, SQLITE_FCNTL_LOCK_TIMEOUT, (void*)&nMs
+ );
+ return (rc==SQLITE_OK);
+}
+
/*
** Attempt to enable blocking locks. Blocking locks are enabled only if (a)
** they are supported by the VFS, and (b) the database handle is configured
@@ -2015,11 +2028,7 @@ static int walEnableBlocking(Wal *pWal){
if( pWal->db ){
int tmout = pWal->db->busyTimeout;
if( tmout ){
- int rc;
- rc = sqlite3OsFileControl(
- pWal->pDbFd, SQLITE_FCNTL_LOCK_TIMEOUT, (void*)&tmout
- );
- res = (rc==SQLITE_OK);
+ res = walEnableBlockingMs(pWal, tmout);
}
}
return res;
@@ -2068,20 +2077,10 @@ void sqlite3WalDb(Wal *pWal, sqlite3 *db){
pWal->db = db;
}
-/*
-** Take an exclusive WRITE lock. Blocking if so configured.
-*/
-static int walLockWriter(Wal *pWal){
- int rc;
- walEnableBlocking(pWal);
- rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1);
- walDisableBlocking(pWal);
- return rc;
-}
#else
# define walEnableBlocking(x) 0
# define walDisableBlocking(x)
-# define walLockWriter(pWal) walLockExclusive((pWal), WAL_WRITE_LOCK, 1)
+# define walEnableBlockingMs(pWal, ms) 0
# define sqlite3WalDb(pWal, db)
#endif /* ifdef SQLITE_ENABLE_SETLK_TIMEOUT */
@@ -2682,7 +2681,9 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){
}
}else{
int bWriteLock = pWal->writeLock;
- if( bWriteLock || SQLITE_OK==(rc = walLockWriter(pWal)) ){
+ if( bWriteLock
+ || SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1))
+ ){
pWal->writeLock = 1;
if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){
badHdr = walIndexTryHdr(pWal, pChanged);
@@ -2690,7 +2691,8 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){
/* If the wal-index header is still malformed even while holding
** a WRITE lock, it can only mean that the header is corrupted and
** needs to be reconstructed. So run recovery to do exactly that.
- */
+ ** Disable blocking locks first. */
+ walDisableBlocking(pWal);
rc = walIndexRecover(pWal);
*pChanged = 1;
}
@@ -2901,6 +2903,37 @@ static int walBeginShmUnreliable(Wal *pWal, int *pChanged){
}
/*
+** The final argument passed to walTryBeginRead() is of type (int*). The
+** caller should invoke walTryBeginRead as follows:
+**
+** int cnt = 0;
+** do {
+** rc = walTryBeginRead(..., &cnt);
+** }while( rc==WAL_RETRY );
+**
+** The final value of "cnt" is of no use to the caller. It is used by
+** the implementation of walTryBeginRead() as follows:
+**
+** + Each time walTryBeginRead() is called, it is incremented. Once
+** it reaches WAL_RETRY_PROTOCOL_LIMIT - indicating that walTryBeginRead()
+** has many times been invoked and failed with WAL_RETRY - walTryBeginRead()
+** returns SQLITE_PROTOCOL.
+**
+** + If SQLITE_ENABLE_SETLK_TIMEOUT is defined and walTryBeginRead() failed
+** because a blocking lock timed out (SQLITE_BUSY_TIMEOUT from the OS
+** layer), the WAL_RETRY_BLOCKED_MASK bit is set in "cnt". In this case
+** the next invocation of walTryBeginRead() may omit an expected call to
+** sqlite3OsSleep(). There has already been a delay when the previous call
+** waited on a lock.
+*/
+#define WAL_RETRY_PROTOCOL_LIMIT 100
+#ifdef SQLITE_ENABLE_SETLK_TIMEOUT
+# define WAL_RETRY_BLOCKED_MASK 0x10000000
+#else
+# define WAL_RETRY_BLOCKED_MASK 0
+#endif
+
+/*
** Attempt to start a read transaction. This might fail due to a race or
** other transient condition. When that happens, it returns WAL_RETRY to
** indicate to the caller that it is safe to retry immediately.
@@ -2950,13 +2983,16 @@ static int walBeginShmUnreliable(Wal *pWal, int *pChanged){
** so it takes care to hold an exclusive lock on the corresponding
** WAL_READ_LOCK() while changing values.
*/
-static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
+static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int *pCnt){
volatile WalCkptInfo *pInfo; /* Checkpoint information in wal-index */
u32 mxReadMark; /* Largest aReadMark[] value */
int mxI; /* Index of largest aReadMark[] value */
int i; /* Loop counter */
int rc = SQLITE_OK; /* Return code */
u32 mxFrame; /* Wal frame to lock to */
+#ifdef SQLITE_ENABLE_SETLK_TIMEOUT
+ int nBlockTmout = 0;
+#endif
assert( pWal->readLock<0 ); /* Not currently locked */
@@ -2980,14 +3016,34 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
** so that on the 100th (and last) RETRY we delay for 323 milliseconds.
** The total delay time before giving up is less than 10 seconds.
*/
- if( cnt>5 ){
+ (*pCnt)++;
+ if( *pCnt>5 ){
int nDelay = 1; /* Pause time in microseconds */
- if( cnt>100 ){
+ int cnt = (*pCnt & ~WAL_RETRY_BLOCKED_MASK);
+ if( cnt>WAL_RETRY_PROTOCOL_LIMIT ){
VVA_ONLY( pWal->lockError = 1; )
return SQLITE_PROTOCOL;
}
- if( cnt>=10 ) nDelay = (cnt-9)*(cnt-9)*39;
+ if( *pCnt>=10 ) nDelay = (cnt-9)*(cnt-9)*39;
+#ifdef SQLITE_ENABLE_SETLK_TIMEOUT
+ /* In SQLITE_ENABLE_SETLK_TIMEOUT builds, configure the file-descriptor
+ ** to block for locks for approximately nDelay us. This affects three
+ ** locks: (a) the shared lock taken on the DMS slot in os_unix.c (if
+ ** using os_unix.c), (b) the WRITER lock taken in walIndexReadHdr() if the
+ ** first attempted read fails, and (c) the shared lock taken on the
+ ** read-mark.
+ **
+ ** If the previous call failed due to an SQLITE_BUSY_TIMEOUT error,
+ ** then sleep for the minimum of 1us. The previous call already provided
+ ** an extra delay while it was blocking on the lock.
+ */
+ nBlockTmout = (nDelay+998) / 1000;
+ if( !useWal && walEnableBlockingMs(pWal, nBlockTmout) ){
+ if( *pCnt & WAL_RETRY_BLOCKED_MASK ) nDelay = 1;
+ }
+#endif
sqlite3OsSleep(pWal->pVfs, nDelay);
+ *pCnt &= ~WAL_RETRY_BLOCKED_MASK;
}
if( !useWal ){
@@ -2995,6 +3051,13 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
if( pWal->bShmUnreliable==0 ){
rc = walIndexReadHdr(pWal, pChanged);
}
+#ifdef SQLITE_ENABLE_SETLK_TIMEOUT
+ walDisableBlocking(pWal);
+ if( rc==SQLITE_BUSY_TIMEOUT ){
+ rc = SQLITE_BUSY;
+ *pCnt |= WAL_RETRY_BLOCKED_MASK;
+ }
+#endif
if( rc==SQLITE_BUSY ){
/* If there is not a recovery running in another thread or process
** then convert BUSY errors to WAL_RETRY. If recovery is known to
@@ -3109,9 +3172,19 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTINIT;
}
+ (void)walEnableBlockingMs(pWal, nBlockTmout);
rc = walLockShared(pWal, WAL_READ_LOCK(mxI));
+ walDisableBlocking(pWal);
if( rc ){
- return rc==SQLITE_BUSY ? WAL_RETRY : rc;
+#ifdef SQLITE_ENABLE_SETLK_TIMEOUT
+ if( rc==SQLITE_BUSY_TIMEOUT ){
+ *pCnt |= WAL_RETRY_BLOCKED_MASK;
+ }
+#else
+ assert( rc!=SQLITE_BUSY_TIMEOUT );
+#endif
+ assert( (rc&0xFF)!=SQLITE_BUSY||rc==SQLITE_BUSY||rc==SQLITE_BUSY_TIMEOUT );
+ return (rc&0xFF)==SQLITE_BUSY ? WAL_RETRY : rc;
}
/* Now that the read-lock has been obtained, check that neither the
** value in the aReadMark[] array or the contents of the wal-index
@@ -3299,7 +3372,7 @@ static int walBeginReadTransaction(Wal *pWal, int *pChanged){
#endif
do{
- rc = walTryBeginRead(pWal, pChanged, 0, ++cnt);
+ rc = walTryBeginRead(pWal, pChanged, 0, &cnt);
}while( rc==WAL_RETRY );
testcase( (rc&0xff)==SQLITE_BUSY );
testcase( (rc&0xff)==SQLITE_IOERR );
@@ -3480,6 +3553,7 @@ static int walFindFrame(
iRead = iFrame;
}
if( (nCollide--)==0 ){
+ *piRead = 0;
return SQLITE_CORRUPT_BKPT;
}
iKey = walNextHash(iKey);
@@ -3783,7 +3857,7 @@ static int walRestartLog(Wal *pWal){
cnt = 0;
do{
int notUsed;
- rc = walTryBeginRead(pWal, &notUsed, 1, ++cnt);
+ rc = walTryBeginRead(pWal, &notUsed, 1, &cnt);
}while( rc==WAL_RETRY );
assert( (rc&0xff)!=SQLITE_BUSY ); /* BUSY not possible when useWal==1 */
testcase( (rc&0xff)==SQLITE_IOERR );
@@ -4204,10 +4278,9 @@ int sqlite3WalCheckpoint(
if( pWal->readOnly ) return SQLITE_READONLY;
WALTRACE(("WAL%p: checkpoint begins\n", pWal));
- /* Enable blocking locks, if possible. If blocking locks are successfully
- ** enabled, set xBusy2=0 so that the busy-handler is never invoked. */
+ /* Enable blocking locks, if possible. */
sqlite3WalDb(pWal, db);
- (void)walEnableBlocking(pWal);
+ if( xBusy2 ) (void)walEnableBlocking(pWal);
/* IMPLEMENTATION-OF: R-62028-47212 All calls obtain an exclusive
** "checkpoint" lock on the database file.
@@ -4248,9 +4321,14 @@ int sqlite3WalCheckpoint(
/* Read the wal-index header. */
SEH_TRY {
if( rc==SQLITE_OK ){
+ /* For a passive checkpoint, do not re-enable blocking locks after
+ ** reading the wal-index header. A passive checkpoint should not block
+ ** or invoke the busy handler. The only lock such a checkpoint may
+ ** attempt to obtain is a lock on a read-slot, and it should give up
+ ** immediately and do a partial checkpoint if it cannot obtain it. */
walDisableBlocking(pWal);
rc = walIndexReadHdr(pWal, &isChanged);
- (void)walEnableBlocking(pWal);
+ if( eMode2!=SQLITE_CHECKPOINT_PASSIVE ) (void)walEnableBlocking(pWal);
if( isChanged && pWal->pDbFd->pMethods->iVersion>=3 ){
sqlite3OsUnfetch(pWal->pDbFd, 0, 0);
}