aboutsummaryrefslogtreecommitdiff
path: root/src/wal.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/wal.c')
-rw-r--r--src/wal.c418
1 files changed, 270 insertions, 148 deletions
diff --git a/src/wal.c b/src/wal.c
index 3e4f4acfd..c6d4476e7 100644
--- a/src/wal.c
+++ b/src/wal.c
@@ -467,6 +467,9 @@ struct Wal {
#ifdef SQLITE_ENABLE_SNAPSHOT
WalIndexHdr *pSnapshot; /* Start transaction here if not NULL */
#endif
+#ifdef SQLITE_ENABLE_SETLK_TIMEOUT
+ sqlite3 *db;
+#endif
};
/*
@@ -565,7 +568,7 @@ static SQLITE_NOINLINE int walIndexPageRealloc(
if( pWal->nWiData<=iPage ){
sqlite3_int64 nByte = sizeof(u32*)*(iPage+1);
volatile u32 **apNew;
- apNew = (volatile u32 **)sqlite3_realloc64((void *)pWal->apWiData, nByte);
+ apNew = (volatile u32 **)sqlite3Realloc((void *)pWal->apWiData, nByte);
if( !apNew ){
*ppPage = 0;
return SQLITE_NOMEM_BKPT;
@@ -686,6 +689,10 @@ static void walChecksumBytes(
aOut[1] = s2;
}
+/*
+** If there is the possibility of concurrent access to the SHM file
+** from multiple threads and/or processes, then do a memory barrier.
+*/
static void walShmBarrier(Wal *pWal){
if( pWal->exclusiveMode!=WAL_HEAPMEMORY_MODE ){
sqlite3OsShmBarrier(pWal->pDbFd);
@@ -693,11 +700,24 @@ static void walShmBarrier(Wal *pWal){
}
/*
+** Add the SQLITE_NO_TSAN as part of the return-type of a function
+** definition as a hint that the function contains constructs that
+** might give false-positive TSAN warnings.
+**
+** See tag-20200519-1.
+*/
+#if defined(__clang__) && !defined(SQLITE_NO_TSAN)
+# define SQLITE_NO_TSAN __attribute__((no_sanitize_thread))
+#else
+# define SQLITE_NO_TSAN
+#endif
+
+/*
** Write the header information in pWal->hdr into the wal-index.
**
** The checksum on pWal->hdr is updated before it is written.
*/
-static void walIndexWriteHdr(Wal *pWal){
+static SQLITE_NO_TSAN void walIndexWriteHdr(Wal *pWal){
volatile WalIndexHdr *aHdr = walIndexHdr(pWal);
const int nCksum = offsetof(WalIndexHdr, aCksum);
@@ -705,6 +725,7 @@ static void walIndexWriteHdr(Wal *pWal){
pWal->hdr.isInit = 1;
pWal->hdr.iVersion = WALINDEX_MAX_VERSION;
walChecksumBytes(1, (u8*)&pWal->hdr, nCksum, 0, pWal->hdr.aCksum);
+ /* Possible TSAN false-positive. See tag-20200519-1 */
memcpy((void*)&aHdr[1], (const void*)&pWal->hdr, sizeof(WalIndexHdr));
walShmBarrier(pWal);
memcpy((void*)&aHdr[0], (const void*)&pWal->hdr, sizeof(WalIndexHdr));
@@ -840,7 +861,7 @@ static int walLockShared(Wal *pWal, int lockIdx){
SQLITE_SHM_LOCK | SQLITE_SHM_SHARED);
WALTRACE(("WAL%p: acquire SHARED-%s %s\n", pWal,
walLockName(lockIdx), rc ? "failed" : "ok"));
- VVA_ONLY( pWal->lockError = (u8)(rc!=SQLITE_OK && rc!=SQLITE_BUSY); )
+ VVA_ONLY( pWal->lockError = (u8)(rc!=SQLITE_OK && (rc&0xFF)!=SQLITE_BUSY); )
return rc;
}
static void walUnlockShared(Wal *pWal, int lockIdx){
@@ -856,7 +877,7 @@ static int walLockExclusive(Wal *pWal, int lockIdx, int n){
SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE);
WALTRACE(("WAL%p: acquire EXCLUSIVE-%s cnt=%d %s\n", pWal,
walLockName(lockIdx), n, rc ? "failed" : "ok"));
- VVA_ONLY( pWal->lockError = (u8)(rc!=SQLITE_OK && rc!=SQLITE_BUSY); )
+ VVA_ONLY( pWal->lockError = (u8)(rc!=SQLITE_OK && (rc&0xFF)!=SQLITE_BUSY); )
return rc;
}
static void walUnlockExclusive(Wal *pWal, int lockIdx, int n){
@@ -1075,7 +1096,7 @@ static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){
if( (nCollide--)==0 ) return SQLITE_CORRUPT_BKPT;
}
sLoc.aPgno[idx] = iPage;
- sLoc.aHash[iKey] = (ht_slot)idx;
+ AtomicStore(&sLoc.aHash[iKey], (ht_slot)idx);
#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
/* Verify that the number of entries in the hash table exactly equals
@@ -1128,11 +1149,6 @@ static int walIndexRecover(Wal *pWal){
u32 aFrameCksum[2] = {0, 0};
int iLock; /* Lock offset to lock for checkpoint */
-#ifdef SQLITE_ENABLE_SETLK_TIMEOUT
- int tmout = 0;
- sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_LOCK_TIMEOUT, (void*)&tmout);
-#endif
-
/* Obtain an exclusive lock on all byte in the locking range not already
** locked by the caller. The caller is guaranteed to have locked the
** WAL_WRITE_LOCK byte, and may have also locked the WAL_CKPT_LOCK byte.
@@ -1681,6 +1697,89 @@ static int walIteratorInit(Wal *pWal, u32 nBackfill, WalIterator **pp){
return rc;
}
+#ifdef SQLITE_ENABLE_SETLK_TIMEOUT
+/*
+** Attempt to enable blocking locks. Blocking locks are enabled only if (a)
+** they are supported by the VFS, and (b) the database handle is configured
+** with a busy-timeout. Return 1 if blocking locks are successfully enabled,
+** or 0 otherwise.
+*/
+static int walEnableBlocking(Wal *pWal){
+ int res = 0;
+ if( pWal->db ){
+ int tmout = pWal->db->busyTimeout;
+ if( tmout ){
+ int rc;
+ rc = sqlite3OsFileControl(
+ pWal->pDbFd, SQLITE_FCNTL_LOCK_TIMEOUT, (void*)&tmout
+ );
+ res = (rc==SQLITE_OK);
+ }
+ }
+ return res;
+}
+
+/*
+** Disable blocking locks.
+*/
+static void walDisableBlocking(Wal *pWal){
+ int tmout = 0;
+ sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_LOCK_TIMEOUT, (void*)&tmout);
+}
+
+/*
+** If parameter bLock is true, attempt to enable blocking locks, take
+** the WRITER lock, and then disable blocking locks. If blocking locks
+** cannot be enabled, no attempt to obtain the WRITER lock is made. Return
+** an SQLite error code if an error occurs, or SQLITE_OK otherwise. It is not
+** an error if blocking locks can not be enabled.
+**
+** If the bLock parameter is false and the WRITER lock is held, release it.
+*/
+int sqlite3WalWriteLock(Wal *pWal, int bLock){
+ int rc = SQLITE_OK;
+ assert( pWal->readLock<0 || bLock==0 );
+ if( bLock ){
+ assert( pWal->db );
+ if( walEnableBlocking(pWal) ){
+ rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1);
+ if( rc==SQLITE_OK ){
+ pWal->writeLock = 1;
+ }
+ walDisableBlocking(pWal);
+ }
+ }else if( pWal->writeLock ){
+ walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
+ pWal->writeLock = 0;
+ }
+ return rc;
+}
+
+/*
+** Set the database handle used to determine if blocking locks are required.
+*/
+void sqlite3WalDb(Wal *pWal, sqlite3 *db){
+ pWal->db = db;
+}
+
+/*
+** Take an exclusive WRITE lock. Blocking if so configured.
+*/
+static int walLockWriter(Wal *pWal){
+ int rc;
+ walEnableBlocking(pWal);
+ rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1);
+ walDisableBlocking(pWal);
+ return rc;
+}
+#else
+# define walEnableBlocking(x) 0
+# define walDisableBlocking(x)
+# define walLockWriter(pWal) walLockExclusive((pWal), WAL_WRITE_LOCK, 1)
+# define sqlite3WalDb(pWal, db)
+#endif /* ifdef SQLITE_ENABLE_SETLK_TIMEOUT */
+
+
/*
** Attempt to obtain the exclusive WAL lock defined by parameters lockIdx and
** n. If the attempt fails and parameter xBusy is not NULL, then it is a
@@ -1698,6 +1797,12 @@ static int walBusyLock(
do {
rc = walLockExclusive(pWal, lockIdx, n);
}while( xBusy && rc==SQLITE_BUSY && xBusy(pBusyArg) );
+#ifdef SQLITE_ENABLE_SETLK_TIMEOUT
+ if( rc==SQLITE_BUSY_TIMEOUT ){
+ walDisableBlocking(pWal);
+ rc = SQLITE_BUSY;
+ }
+#endif
return rc;
}
@@ -1735,7 +1840,7 @@ static void walRestartHdr(Wal *pWal, u32 salt1){
sqlite3Put4byte((u8*)&aSalt[0], 1 + sqlite3Get4byte((u8*)&aSalt[0]));
memcpy(&pWal->hdr.aSalt[1], &salt1, 4);
walIndexWriteHdr(pWal);
- pInfo->nBackfill = 0;
+ AtomicStore(&pInfo->nBackfill, 0);
pInfo->nBackfillAttempted = 0;
pInfo->aReadMark[1] = 0;
for(i=2; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED;
@@ -1810,32 +1915,13 @@ static int walCheckpoint(
mxSafeFrame = pWal->hdr.mxFrame;
mxPage = pWal->hdr.nPage;
for(i=1; i<WAL_NREADER; i++){
- /* Thread-sanitizer reports that the following is an unsafe read,
- ** as some other thread may be in the process of updating the value
- ** of the aReadMark[] slot. The assumption here is that if that is
- ** happening, the other client may only be increasing the value,
- ** not decreasing it. So assuming either that either the "old" or
- ** "new" version of the value is read, and not some arbitrary value
- ** that would never be written by a real client, things are still
- ** safe.
- **
- ** Astute readers have pointed out that the assumption stated in the
- ** last sentence of the previous paragraph is not guaranteed to be
- ** true for all conforming systems. However, the assumption is true
- ** for all compilers and architectures in common use today (circa
- ** 2019-11-27) and the alternatives are both slow and complex, and
- ** so we will continue to go with the current design for now. If this
- ** bothers you, or if you really are running on a system where aligned
- ** 32-bit reads and writes are not atomic, then you can simply avoid
- ** the use of WAL mode, or only use WAL mode together with
- ** PRAGMA locking_mode=EXCLUSIVE and all will be well.
- */
- u32 y = pInfo->aReadMark[i];
+ u32 y = AtomicLoad(pInfo->aReadMark+i);
if( mxSafeFrame>y ){
assert( y<=pWal->hdr.mxFrame );
rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(i), 1);
if( rc==SQLITE_OK ){
- pInfo->aReadMark[i] = (i==1 ? mxSafeFrame : READMARK_NOT_USED);
+ u32 iMark = (i==1 ? mxSafeFrame : READMARK_NOT_USED);
+ AtomicStore(pInfo->aReadMark+i, iMark);
walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
}else if( rc==SQLITE_BUSY ){
mxSafeFrame = y;
@@ -1853,7 +1939,7 @@ static int walCheckpoint(
}
if( pIter
- && (rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(0),1))==SQLITE_OK
+ && (rc = walBusyLock(pWal,xBusy,pBusyArg,WAL_READ_LOCK(0),1))==SQLITE_OK
){
u32 nBackfill = pInfo->nBackfill;
@@ -1868,6 +1954,7 @@ static int walCheckpoint(
if( rc==SQLITE_OK ){
i64 nReq = ((i64)mxPage * szPage);
i64 nSize; /* Current size of database file */
+ sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_CKPT_START, 0);
rc = sqlite3OsFileSize(pWal->pDbFd, &nSize);
if( rc==SQLITE_OK && nSize<nReq ){
sqlite3OsFileControlHint(pWal->pDbFd, SQLITE_FCNTL_SIZE_HINT, &nReq);
@@ -1895,6 +1982,7 @@ static int walCheckpoint(
rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, iOffset);
if( rc!=SQLITE_OK ) break;
}
+ sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_CKPT_DONE, 0);
/* If work was actually accomplished... */
if( rc==SQLITE_OK ){
@@ -1907,11 +1995,7 @@ static int walCheckpoint(
}
}
if( rc==SQLITE_OK ){
- rc = sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_CKPT_DONE, 0);
- if( rc==SQLITE_NOTFOUND ) rc = SQLITE_OK;
- }
- if( rc==SQLITE_OK ){
- pInfo->nBackfill = mxSafeFrame;
+ AtomicStore(&pInfo->nBackfill, mxSafeFrame);
}
}
@@ -2070,7 +2154,7 @@ int sqlite3WalClose(
** If the checksum cannot be verified return non-zero. If the header
** is read successfully and the checksum verified, return zero.
*/
-static int walIndexTryHdr(Wal *pWal, int *pChanged){
+static SQLITE_NO_TSAN int walIndexTryHdr(Wal *pWal, int *pChanged){
u32 aCksum[2]; /* Checksum on the header content */
WalIndexHdr h1, h2; /* Two copies of the header content */
WalIndexHdr volatile *aHdr; /* Header in shared memory */
@@ -2083,13 +2167,19 @@ static int walIndexTryHdr(Wal *pWal, int *pChanged){
** meaning it is possible that an inconsistent snapshot is read
** from the file. If this happens, return non-zero.
**
+ ** tag-20200519-1:
** There are two copies of the header at the beginning of the wal-index.
** When reading, read [0] first then [1]. Writes are in the reverse order.
** Memory barriers are used to prevent the compiler or the hardware from
- ** reordering the reads and writes.
+ ** reordering the reads and writes. TSAN and similar tools can sometimes
+ ** give false-positive warnings about these accesses because the tools do not
+ ** account for the double-read and the memory barrier. The use of mutexes
+ ** here would be problematic as the memory being accessed is potentially
+ ** shared among multiple processes and not all mutex implementions work
+ ** reliably in that environment.
*/
aHdr = walIndexHdr(pWal);
- memcpy(&h1, (void *)&aHdr[0], sizeof(h1));
+ memcpy(&h1, (void *)&aHdr[0], sizeof(h1)); /* Possible TSAN false-positive */
walShmBarrier(pWal);
memcpy(&h2, (void *)&aHdr[1], sizeof(h2));
@@ -2179,28 +2269,32 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){
/* If the first attempt failed, it might have been due to a race
** with a writer. So get a WRITE lock and try again.
*/
- assert( badHdr==0 || pWal->writeLock==0 );
if( badHdr ){
if( pWal->bShmUnreliable==0 && (pWal->readOnly & WAL_SHM_RDONLY) ){
if( SQLITE_OK==(rc = walLockShared(pWal, WAL_WRITE_LOCK)) ){
walUnlockShared(pWal, WAL_WRITE_LOCK);
rc = SQLITE_READONLY_RECOVERY;
}
- }else if( SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1)) ){
- pWal->writeLock = 1;
- if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){
- badHdr = walIndexTryHdr(pWal, pChanged);
- if( badHdr ){
- /* If the wal-index header is still malformed even while holding
- ** a WRITE lock, it can only mean that the header is corrupted and
- ** needs to be reconstructed. So run recovery to do exactly that.
- */
- rc = walIndexRecover(pWal);
- *pChanged = 1;
+ }else{
+ int bWriteLock = pWal->writeLock;
+ if( bWriteLock || SQLITE_OK==(rc = walLockWriter(pWal)) ){
+ pWal->writeLock = 1;
+ if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){
+ badHdr = walIndexTryHdr(pWal, pChanged);
+ if( badHdr ){
+ /* If the wal-index header is still malformed even while holding
+ ** a WRITE lock, it can only mean that the header is corrupted and
+ ** needs to be reconstructed. So run recovery to do exactly that.
+ */
+ rc = walIndexRecover(pWal);
+ *pChanged = 1;
+ }
+ }
+ if( bWriteLock==0 ){
+ pWal->writeLock = 0;
+ walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
}
}
- pWal->writeLock = 0;
- walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
}
}
@@ -2530,7 +2624,7 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
assert( pWal->nWiData>0 );
assert( pWal->apWiData[0]!=0 );
pInfo = walCkptInfo(pWal);
- if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame
+ if( !useWal && AtomicLoad(&pInfo->nBackfill)==pWal->hdr.mxFrame
#ifdef SQLITE_ENABLE_SNAPSHOT
&& (pWal->pSnapshot==0 || pWal->hdr.mxFrame==0)
#endif
@@ -2697,7 +2791,7 @@ int sqlite3WalSnapshotRecover(Wal *pWal){
rc = SQLITE_NOMEM;
}else{
u32 i = pInfo->nBackfillAttempted;
- for(i=pInfo->nBackfillAttempted; i>pInfo->nBackfill; i--){
+ for(i=pInfo->nBackfillAttempted; i>AtomicLoad(&pInfo->nBackfill); i--){
WalHashLoc sLoc; /* Hash table location */
u32 pgno; /* Page number in db file */
i64 iDbOff; /* Offset of db file entry */
@@ -2752,22 +2846,36 @@ int sqlite3WalSnapshotRecover(Wal *pWal){
int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){
int rc; /* Return code */
int cnt = 0; /* Number of TryBeginRead attempts */
-#ifdef SQLITE_ENABLE_SETLK_TIMEOUT
- int tmout = 0;
-#endif
-
#ifdef SQLITE_ENABLE_SNAPSHOT
int bChanged = 0;
WalIndexHdr *pSnapshot = pWal->pSnapshot;
- if( pSnapshot && memcmp(pSnapshot, &pWal->hdr, sizeof(WalIndexHdr))!=0 ){
- bChanged = 1;
- }
#endif
-#ifdef SQLITE_ENABLE_SETLK_TIMEOUT
- /* Disable blocking locks. They are not useful when trying to open a
- ** read-transaction, and blocking may cause deadlock anyway. */
- sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_LOCK_TIMEOUT, (void*)&tmout);
+ assert( pWal->ckptLock==0 );
+
+#ifdef SQLITE_ENABLE_SNAPSHOT
+ if( pSnapshot ){
+ if( memcmp(pSnapshot, &pWal->hdr, sizeof(WalIndexHdr))!=0 ){
+ bChanged = 1;
+ }
+
+ /* It is possible that there is a checkpointer thread running
+ ** concurrent with this code. If this is the case, it may be that the
+ ** checkpointer has already determined that it will checkpoint
+ ** snapshot X, where X is later in the wal file than pSnapshot, but
+ ** has not yet set the pInfo->nBackfillAttempted variable to indicate
+ ** its intent. To avoid the race condition this leads to, ensure that
+ ** there is no checkpointer process by taking a shared CKPT lock
+ ** before checking pInfo->nBackfillAttempted. */
+ (void)walEnableBlocking(pWal);
+ rc = walLockShared(pWal, WAL_CKPT_LOCK);
+ walDisableBlocking(pWal);
+
+ if( rc!=SQLITE_OK ){
+ return rc;
+ }
+ pWal->ckptLock = 1;
+ }
#endif
do{
@@ -2778,16 +2886,6 @@ int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){
testcase( rc==SQLITE_PROTOCOL );
testcase( rc==SQLITE_OK );
-#ifdef SQLITE_ENABLE_SETLK_TIMEOUT
- /* If they were disabled earlier and the read-transaction has been
- ** successfully opened, re-enable blocking locks. This is because the
- ** connection may attempt to upgrade to a write-transaction, which does
- ** benefit from using blocking locks. */
- if( rc==SQLITE_OK ){
- sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_LOCK_TIMEOUT, (void*)&tmout);
- }
-#endif
-
#ifdef SQLITE_ENABLE_SNAPSHOT
if( rc==SQLITE_OK ){
if( pSnapshot && memcmp(pSnapshot, &pWal->hdr, sizeof(WalIndexHdr))!=0 ){
@@ -2809,48 +2907,42 @@ int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){
assert( pWal->readLock>0 || pWal->hdr.mxFrame==0 );
assert( pInfo->aReadMark[pWal->readLock]<=pSnapshot->mxFrame );
- /* It is possible that there is a checkpointer thread running
- ** concurrent with this code. If this is the case, it may be that the
- ** checkpointer has already determined that it will checkpoint
- ** snapshot X, where X is later in the wal file than pSnapshot, but
- ** has not yet set the pInfo->nBackfillAttempted variable to indicate
- ** its intent. To avoid the race condition this leads to, ensure that
- ** there is no checkpointer process by taking a shared CKPT lock
- ** before checking pInfo->nBackfillAttempted.
- **
- ** TODO: Does the aReadMark[] lock prevent a checkpointer from doing
- ** this already?
- */
- rc = walLockShared(pWal, WAL_CKPT_LOCK);
-
- if( rc==SQLITE_OK ){
- /* Check that the wal file has not been wrapped. Assuming that it has
- ** not, also check that no checkpointer has attempted to checkpoint any
- ** frames beyond pSnapshot->mxFrame. If either of these conditions are
- ** true, return SQLITE_ERROR_SNAPSHOT. Otherwise, overwrite pWal->hdr
- ** with *pSnapshot and set *pChanged as appropriate for opening the
- ** snapshot. */
- if( !memcmp(pSnapshot->aSalt, pWal->hdr.aSalt, sizeof(pWal->hdr.aSalt))
- && pSnapshot->mxFrame>=pInfo->nBackfillAttempted
- ){
- assert( pWal->readLock>0 );
- memcpy(&pWal->hdr, pSnapshot, sizeof(WalIndexHdr));
- *pChanged = bChanged;
- }else{
- rc = SQLITE_ERROR_SNAPSHOT;
- }
-
- /* Release the shared CKPT lock obtained above. */
- walUnlockShared(pWal, WAL_CKPT_LOCK);
- pWal->minFrame = 1;
+ /* Check that the wal file has not been wrapped. Assuming that it has
+ ** not, also check that no checkpointer has attempted to checkpoint any
+ ** frames beyond pSnapshot->mxFrame. If either of these conditions are
+ ** true, return SQLITE_ERROR_SNAPSHOT. Otherwise, overwrite pWal->hdr
+ ** with *pSnapshot and set *pChanged as appropriate for opening the
+ ** snapshot. */
+ if( !memcmp(pSnapshot->aSalt, pWal->hdr.aSalt, sizeof(pWal->hdr.aSalt))
+ && pSnapshot->mxFrame>=pInfo->nBackfillAttempted
+ ){
+ assert( pWal->readLock>0 );
+ memcpy(&pWal->hdr, pSnapshot, sizeof(WalIndexHdr));
+ *pChanged = bChanged;
+ }else{
+ rc = SQLITE_ERROR_SNAPSHOT;
}
+ /* A client using a non-current snapshot may not ignore any frames
+ ** from the start of the wal file. This is because, for a system
+ ** where (minFrame < iSnapshot < maxFrame), a checkpointer may
+ ** have omitted to checkpoint a frame earlier than minFrame in
+ ** the file because there exists a frame after iSnapshot that
+ ** is the same database page. */
+ pWal->minFrame = 1;
if( rc!=SQLITE_OK ){
sqlite3WalEndReadTransaction(pWal);
}
}
}
+
+ /* Release the shared CKPT lock obtained above. */
+ if( pWal->ckptLock ){
+ assert( pSnapshot );
+ walUnlockShared(pWal, WAL_CKPT_LOCK);
+ pWal->ckptLock = 0;
+ }
#endif
return rc;
}
@@ -2930,14 +3022,15 @@ int sqlite3WalFindFrame(
int iKey; /* Hash slot index */
int nCollide; /* Number of hash collisions remaining */
int rc; /* Error code */
+ u32 iH;
rc = walHashGet(pWal, iHash, &sLoc);
if( rc!=SQLITE_OK ){
return rc;
}
nCollide = HASHTABLE_NSLOT;
- for(iKey=walHash(pgno); sLoc.aHash[iKey]; iKey=walNextHash(iKey)){
- u32 iH = sLoc.aHash[iKey];
+ iKey = walHash(pgno);
+ while( (iH = AtomicLoad(&sLoc.aHash[iKey]))!=0 ){
u32 iFrame = iH + sLoc.iZero;
if( iFrame<=iLast && iFrame>=pWal->minFrame && sLoc.aPgno[iH]==pgno ){
assert( iFrame>iRead || CORRUPT_DB );
@@ -2946,6 +3039,7 @@ int sqlite3WalFindFrame(
if( (nCollide--)==0 ){
return SQLITE_CORRUPT_BKPT;
}
+ iKey = walNextHash(iKey);
}
if( iRead ) break;
}
@@ -3021,6 +3115,16 @@ Pgno sqlite3WalDbsize(Wal *pWal){
int sqlite3WalBeginWriteTransaction(Wal *pWal){
int rc;
+#ifdef SQLITE_ENABLE_SETLK_TIMEOUT
+ /* If the write-lock is already held, then it was obtained before the
+ ** read-transaction was even opened, making this call a no-op.
+ ** Return early. */
+ if( pWal->writeLock ){
+ assert( !memcmp(&pWal->hdr,(void *)walIndexHdr(pWal),sizeof(WalIndexHdr)) );
+ return SQLITE_OK;
+ }
+#endif
+
/* Cannot start a write transaction without first holding a read
** transaction. */
assert( pWal->readLock>=0 );
@@ -3597,45 +3701,52 @@ int sqlite3WalCheckpoint(
if( pWal->readOnly ) return SQLITE_READONLY;
WALTRACE(("WAL%p: checkpoint begins\n", pWal));
+ /* Enable blocking locks, if possible. If blocking locks are successfully
+ ** enabled, set xBusy2=0 so that the busy-handler is never invoked. */
+ sqlite3WalDb(pWal, db);
+ (void)walEnableBlocking(pWal);
+
/* IMPLEMENTATION-OF: R-62028-47212 All calls obtain an exclusive
- ** "checkpoint" lock on the database file. */
+ ** "checkpoint" lock on the database file.
+ ** EVIDENCE-OF: R-10421-19736 If any other process is running a
+ ** checkpoint operation at the same time, the lock cannot be obtained and
+ ** SQLITE_BUSY is returned.
+ ** EVIDENCE-OF: R-53820-33897 Even if there is a busy-handler configured,
+ ** it will not be invoked in this case.
+ */
rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1);
- if( rc ){
- /* EVIDENCE-OF: R-10421-19736 If any other process is running a
- ** checkpoint operation at the same time, the lock cannot be obtained and
- ** SQLITE_BUSY is returned.
- ** EVIDENCE-OF: R-53820-33897 Even if there is a busy-handler configured,
- ** it will not be invoked in this case.
- */
- testcase( rc==SQLITE_BUSY );
- testcase( xBusy!=0 );
- return rc;
- }
- pWal->ckptLock = 1;
+ testcase( rc==SQLITE_BUSY );
+ testcase( rc!=SQLITE_OK && xBusy2!=0 );
+ if( rc==SQLITE_OK ){
+ pWal->ckptLock = 1;
- /* IMPLEMENTATION-OF: R-59782-36818 The SQLITE_CHECKPOINT_FULL, RESTART and
- ** TRUNCATE modes also obtain the exclusive "writer" lock on the database
- ** file.
- **
- ** EVIDENCE-OF: R-60642-04082 If the writer lock cannot be obtained
- ** immediately, and a busy-handler is configured, it is invoked and the
- ** writer lock retried until either the busy-handler returns 0 or the
- ** lock is successfully obtained.
- */
- if( eMode!=SQLITE_CHECKPOINT_PASSIVE ){
- rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_WRITE_LOCK, 1);
- if( rc==SQLITE_OK ){
- pWal->writeLock = 1;
- }else if( rc==SQLITE_BUSY ){
- eMode2 = SQLITE_CHECKPOINT_PASSIVE;
- xBusy2 = 0;
- rc = SQLITE_OK;
+ /* IMPLEMENTATION-OF: R-59782-36818 The SQLITE_CHECKPOINT_FULL, RESTART and
+ ** TRUNCATE modes also obtain the exclusive "writer" lock on the database
+ ** file.
+ **
+ ** EVIDENCE-OF: R-60642-04082 If the writer lock cannot be obtained
+ ** immediately, and a busy-handler is configured, it is invoked and the
+ ** writer lock retried until either the busy-handler returns 0 or the
+ ** lock is successfully obtained.
+ */
+ if( eMode!=SQLITE_CHECKPOINT_PASSIVE ){
+ rc = walBusyLock(pWal, xBusy2, pBusyArg, WAL_WRITE_LOCK, 1);
+ if( rc==SQLITE_OK ){
+ pWal->writeLock = 1;
+ }else if( rc==SQLITE_BUSY ){
+ eMode2 = SQLITE_CHECKPOINT_PASSIVE;
+ xBusy2 = 0;
+ rc = SQLITE_OK;
+ }
}
}
+
/* Read the wal-index header. */
if( rc==SQLITE_OK ){
+ walDisableBlocking(pWal);
rc = walIndexReadHdr(pWal, &isChanged);
+ (void)walEnableBlocking(pWal);
if( isChanged && pWal->pDbFd->pMethods->iVersion>=3 ){
sqlite3OsUnfetch(pWal->pDbFd, 0, 0);
}
@@ -3667,11 +3778,19 @@ int sqlite3WalCheckpoint(
memset(&pWal->hdr, 0, sizeof(WalIndexHdr));
}
+ walDisableBlocking(pWal);
+ sqlite3WalDb(pWal, 0);
+
/* Release the locks. */
sqlite3WalEndWriteTransaction(pWal);
- walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1);
- pWal->ckptLock = 0;
+ if( pWal->ckptLock ){
+ walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1);
+ pWal->ckptLock = 0;
+ }
WALTRACE(("WAL%p: checkpoint %s\n", pWal, rc ? "failed" : "ok"));
+#ifdef SQLITE_ENABLE_SETLK_TIMEOUT
+ if( rc==SQLITE_BUSY_TIMEOUT ) rc = SQLITE_BUSY;
+#endif
return (rc==SQLITE_OK && eMode!=eMode2 ? SQLITE_BUSY : rc);
}
@@ -3788,7 +3907,10 @@ int sqlite3WalSnapshotGet(Wal *pWal, sqlite3_snapshot **ppSnapshot){
/* Try to open on pSnapshot when the next read-transaction starts
*/
-void sqlite3WalSnapshotOpen(Wal *pWal, sqlite3_snapshot *pSnapshot){
+void sqlite3WalSnapshotOpen(
+ Wal *pWal,
+ sqlite3_snapshot *pSnapshot
+){
pWal->pSnapshot = (WalIndexHdr*)pSnapshot;
}