aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/main.c2
-rw-r--r--src/os_unix.c110
-rw-r--r--src/os_win.c107
-rw-r--r--src/printf.c7
-rw-r--r--src/sqlite.h.in2
-rw-r--r--src/wal.c290
6 files changed, 430 insertions, 88 deletions
diff --git a/src/main.c b/src/main.c
index 49613f6c7..d090845ae 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1314,7 +1314,7 @@ const char *sqlite3ErrName(int rc){
case SQLITE_NOMEM: zName = "SQLITE_NOMEM"; break;
case SQLITE_READONLY: zName = "SQLITE_READONLY"; break;
case SQLITE_READONLY_RECOVERY: zName = "SQLITE_READONLY_RECOVERY"; break;
- case SQLITE_READONLY_CANTLOCK: zName = "SQLITE_READONLY_CANTLOCK"; break;
+ case SQLITE_READONLY_CANTINIT: zName = "SQLITE_READONLY_CANTINIT"; break;
case SQLITE_READONLY_ROLLBACK: zName = "SQLITE_READONLY_ROLLBACK"; break;
case SQLITE_READONLY_DBMOVED: zName = "SQLITE_READONLY_DBMOVED"; break;
case SQLITE_INTERRUPT: zName = "SQLITE_INTERRUPT"; break;
diff --git a/src/os_unix.c b/src/os_unix.c
index c33d21fd9..c5d9aca2c 100644
--- a/src/os_unix.c
+++ b/src/os_unix.c
@@ -4108,6 +4108,7 @@ struct unixShmNode {
int szRegion; /* Size of shared-memory regions */
u16 nRegion; /* Size of array apRegion */
u8 isReadonly; /* True if read-only */
+ u8 isUnlocked; /* True if no DMS lock held */
char **apRegion; /* Array of mapped shared-memory regions */
int nRef; /* Number of unixShm objects pointing to this */
unixShm *pFirst; /* All unixShm objects pointing to this */
@@ -4271,6 +4272,64 @@ static void unixShmPurge(unixFile *pFd){
}
/*
+** The DMS lock has not yet been taken on shm file pShmNode. Attempt to
+** take it now. Return SQLITE_OK if successful, or an SQLite error
+** code otherwise.
+**
+** If the DMS cannot be locked because this is a readonly_shm=1
+** connection and no other process already holds a lock, return
+** SQLITE_READONLY_CANTINIT and set pShmNode->isUnlocked=1.
+*/
+static int unixLockSharedMemory(unixFile *pDbFd, unixShmNode *pShmNode){
+ struct flock lock;
+ int rc = SQLITE_OK;
+
+ /* Use F_GETLK to determine the locks other processes are holding
+ ** on the DMS byte. If it indicates that another process is holding
+ ** a SHARED lock, then this process may also take a SHARED lock
+ ** and proceed with opening the *-shm file.
+ **
+ ** Or, if no other process is holding any lock, then this process
+ ** is the first to open it. In this case take an EXCLUSIVE lock on the
+ ** DMS byte and truncate the *-shm file to zero bytes in size. Then
+ ** downgrade to a SHARED lock on the DMS byte.
+ **
+ ** If another process is holding an EXCLUSIVE lock on the DMS byte,
+ ** return SQLITE_BUSY to the caller (it will try again). An earlier
+ ** version of this code attempted the SHARED lock at this point. But
+ ** this introduced a subtle race condition: if the process holding
+ ** EXCLUSIVE failed just before truncating the *-shm file, then this
+ ** process might open and use the *-shm file without truncating it.
+ ** And if the *-shm file has been corrupted by a power failure or
+ ** system crash, the database itself may also become corrupt. */
+ lock.l_whence = SEEK_SET;
+ lock.l_start = UNIX_SHM_DMS;
+ lock.l_len = 1;
+ lock.l_type = F_WRLCK;
+ if( osFcntl(pShmNode->h, F_GETLK, &lock)!=0 ) {
+ rc = SQLITE_IOERR_LOCK;
+ }else if( lock.l_type==F_UNLCK ){
+ if( pShmNode->isReadonly ){
+ pShmNode->isUnlocked = 1;
+ rc = SQLITE_READONLY_CANTINIT;
+ }else{
+ rc = unixShmSystemLock(pDbFd, F_WRLCK, UNIX_SHM_DMS, 1);
+ if( rc==SQLITE_OK && robust_ftruncate(pShmNode->h, 0) ){
+ rc = unixLogError(SQLITE_IOERR_SHMOPEN,"ftruncate",pShmNode->zFilename);
+ }
+ }
+ }else if( lock.l_type==F_WRLCK ){
+ rc = SQLITE_BUSY;
+ }
+
+ if( rc==SQLITE_OK ){
+ assert( lock.l_type==F_UNLCK || lock.l_type==F_RDLCK );
+ rc = unixShmSystemLock(pDbFd, F_RDLCK, UNIX_SHM_DMS, 1);
+ }
+ return rc;
+}
+
+/*
** Open a shared-memory area associated with open database file pDbFd.
** This particular implementation uses mmapped files.
**
@@ -4308,9 +4367,9 @@ static void unixShmPurge(unixFile *pFd){
static int unixOpenSharedMemory(unixFile *pDbFd){
struct unixShm *p = 0; /* The connection to be opened */
struct unixShmNode *pShmNode; /* The underlying mmapped file */
- int rc; /* Result code */
+ int rc = SQLITE_OK; /* Result code */
unixInodeInfo *pInode; /* The inode of fd */
- char *zShmFilename; /* Name of the file used for SHM */
+ char *zShm; /* Name of the file used for SHM */
int nShmFilename; /* Size of the SHM filename in bytes */
/* Allocate space for the new unixShm object. */
@@ -4351,14 +4410,14 @@ static int unixOpenSharedMemory(unixFile *pDbFd){
goto shm_open_err;
}
memset(pShmNode, 0, sizeof(*pShmNode)+nShmFilename);
- zShmFilename = pShmNode->zFilename = (char*)&pShmNode[1];
+ zShm = pShmNode->zFilename = (char*)&pShmNode[1];
#ifdef SQLITE_SHM_DIRECTORY
- sqlite3_snprintf(nShmFilename, zShmFilename,
+ sqlite3_snprintf(nShmFilename, zShm,
SQLITE_SHM_DIRECTORY "/sqlite-shm-%x-%x",
(u32)sStat.st_ino, (u32)sStat.st_dev);
#else
- sqlite3_snprintf(nShmFilename, zShmFilename, "%s-shm", zBasePath);
- sqlite3FileSuffix3(pDbFd->zPath, zShmFilename);
+ sqlite3_snprintf(nShmFilename, zShm, "%s-shm", zBasePath);
+ sqlite3FileSuffix3(pDbFd->zPath, zShm);
#endif
pShmNode->h = -1;
pDbFd->pInode->pShmNode = pShmNode;
@@ -4372,15 +4431,16 @@ static int unixOpenSharedMemory(unixFile *pDbFd){
}
if( pInode->bProcessLock==0 ){
- int openFlags = O_RDWR | O_CREAT;
- if( sqlite3_uri_boolean(pDbFd->zPath, "readonly_shm", 0) ){
- openFlags = O_RDONLY;
- pShmNode->isReadonly = 1;
+ if( 0==sqlite3_uri_boolean(pDbFd->zPath, "readonly_shm", 0) ){
+ pShmNode->h = robust_open(zShm, O_RDWR|O_CREAT, (sStat.st_mode&0777));
}
- pShmNode->h = robust_open(zShmFilename, openFlags, (sStat.st_mode&0777));
if( pShmNode->h<0 ){
- rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShmFilename);
- goto shm_open_err;
+ pShmNode->h = robust_open(zShm, O_RDONLY, (sStat.st_mode&0777));
+ if( pShmNode->h<0 ){
+ rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShm);
+ goto shm_open_err;
+ }
+ pShmNode->isReadonly = 1;
}
/* If this process is running as root, make sure that the SHM file
@@ -4388,20 +4448,9 @@ static int unixOpenSharedMemory(unixFile *pDbFd){
** the original owner will not be able to connect.
*/
robustFchown(pShmNode->h, sStat.st_uid, sStat.st_gid);
-
- /* Check to see if another process is holding the dead-man switch.
- ** If not, truncate the file to zero length.
- */
- rc = SQLITE_OK;
- if( unixShmSystemLock(pDbFd, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){
- if( robust_ftruncate(pShmNode->h, 0) ){
- rc = unixLogError(SQLITE_IOERR_SHMOPEN, "ftruncate", zShmFilename);
- }
- }
- if( rc==SQLITE_OK ){
- rc = unixShmSystemLock(pDbFd, F_RDLCK, UNIX_SHM_DMS, 1);
- }
- if( rc ) goto shm_open_err;
+
+ rc = unixLockSharedMemory(pDbFd, pShmNode);
+ if( rc!=SQLITE_OK && rc!=SQLITE_READONLY_CANTINIT ) goto shm_open_err;
}
}
@@ -4425,7 +4474,7 @@ static int unixOpenSharedMemory(unixFile *pDbFd){
p->pNext = pShmNode->pFirst;
pShmNode->pFirst = p;
sqlite3_mutex_leave(pShmNode->mutex);
- return SQLITE_OK;
+ return rc;
/* Jump here on any error */
shm_open_err:
@@ -4477,6 +4526,11 @@ static int unixShmMap(
p = pDbFd->pShm;
pShmNode = p->pShmNode;
sqlite3_mutex_enter(pShmNode->mutex);
+ if( pShmNode->isUnlocked ){
+ rc = unixLockSharedMemory(pDbFd, pShmNode);
+ if( rc!=SQLITE_OK ) goto shmpage_out;
+ pShmNode->isUnlocked = 0;
+ }
assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 );
assert( pShmNode->pInode==pDbFd->pInode );
assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 );
diff --git a/src/os_win.c b/src/os_win.c
index 245a86045..086bbf90a 100644
--- a/src/os_win.c
+++ b/src/os_win.c
@@ -3673,6 +3673,9 @@ struct winShmNode {
int szRegion; /* Size of shared-memory regions */
int nRegion; /* Size of array apRegion */
+ u8 isReadonly; /* True if read-only */
+ u8 isUnlocked; /* True if no DMS lock held */
+
struct ShmRegion {
HANDLE hMap; /* File handle from CreateFileMapping */
void *pMap;
@@ -3821,6 +3824,37 @@ static void winShmPurge(sqlite3_vfs *pVfs, int deleteFlag){
}
/*
+** The DMS lock has not yet been taken on shm file pShmNode. Attempt to
+** take it now. Return SQLITE_OK if successful, or an SQLite error
+** code otherwise.
+**
+** If the DMS cannot be locked because this is a readonly_shm=1
+** connection and no other process already holds a lock, return
+** SQLITE_READONLY_CANTINIT and set pShmNode->isUnlocked=1.
+*/
+static int winLockSharedMemory(winShmNode *pShmNode){
+ int rc = winShmSystemLock(pShmNode, WINSHM_WRLCK, WIN_SHM_DMS, 1);
+
+ if( rc==SQLITE_OK ){
+ if( pShmNode->isReadonly ){
+ pShmNode->isUnlocked = 1;
+ winShmSystemLock(pShmNode, WINSHM_UNLCK, WIN_SHM_DMS, 1);
+ return SQLITE_READONLY_CANTINIT;
+ }else if( winTruncate((sqlite3_file*)&pShmNode->hFile, 0) ){
+ winShmSystemLock(pShmNode, WINSHM_UNLCK, WIN_SHM_DMS, 1);
+ return winLogError(SQLITE_IOERR_SHMOPEN, osGetLastError(),
+ "winLockSharedMemory", pShmNode->zFilename);
+ }
+ }
+
+ if( rc==SQLITE_OK ){
+ winShmSystemLock(pShmNode, WINSHM_UNLCK, WIN_SHM_DMS, 1);
+ }
+
+ return winShmSystemLock(pShmNode, WINSHM_RDLCK, WIN_SHM_DMS, 1);
+}
+
+/*
** Open the shared-memory area associated with database file pDbFd.
**
** When opening a new shared-memory file, if no other instances of that
@@ -3829,9 +3863,10 @@ static void winShmPurge(sqlite3_vfs *pVfs, int deleteFlag){
*/
static int winOpenSharedMemory(winFile *pDbFd){
struct winShm *p; /* The connection to be opened */
- struct winShmNode *pShmNode = 0; /* The underlying mmapped file */
- int rc; /* Result code */
- struct winShmNode *pNew; /* Newly allocated winShmNode */
+ winShmNode *pShmNode = 0; /* The underlying mmapped file */
+ int rc = SQLITE_OK; /* Result code */
+ int rc2 = SQLITE_ERROR; /* winOpen result code */
+ winShmNode *pNew; /* Newly allocated winShmNode */
int nName; /* Size of zName in bytes */
assert( pDbFd->pShm==0 ); /* Not previously opened */
@@ -3878,30 +3913,29 @@ static int winOpenSharedMemory(winFile *pDbFd){
}
}
- rc = winOpen(pDbFd->pVfs,
- pShmNode->zFilename, /* Name of the file (UTF-8) */
- (sqlite3_file*)&pShmNode->hFile, /* File handle here */
- SQLITE_OPEN_WAL | SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE,
- 0);
- if( SQLITE_OK!=rc ){
- goto shm_open_err;
+ if( 0==sqlite3_uri_boolean(pDbFd->zPath, "readonly_shm", 0) ){
+ rc2 = winOpen(pDbFd->pVfs,
+ pShmNode->zFilename,
+ (sqlite3_file*)&pShmNode->hFile,
+ SQLITE_OPEN_WAL|SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE,
+ 0);
}
-
- /* Check to see if another process is holding the dead-man switch.
- ** If not, truncate the file to zero length.
- */
- if( winShmSystemLock(pShmNode, WINSHM_WRLCK, WIN_SHM_DMS, 1)==SQLITE_OK ){
- rc = winTruncate((sqlite3_file *)&pShmNode->hFile, 0);
- if( rc!=SQLITE_OK ){
- rc = winLogError(SQLITE_IOERR_SHMOPEN, osGetLastError(),
- "winOpenShm", pDbFd->zPath);
+ if( rc2!=SQLITE_OK ){
+ rc2 = winOpen(pDbFd->pVfs,
+ pShmNode->zFilename,
+ (sqlite3_file*)&pShmNode->hFile,
+ SQLITE_OPEN_WAL|SQLITE_OPEN_READONLY,
+ 0);
+ if( rc2!=SQLITE_OK ){
+ rc = winLogError(rc2, osGetLastError(), "winOpenShm",
+ pShmNode->zFilename);
+ goto shm_open_err;
}
+ pShmNode->isReadonly = 1;
}
- if( rc==SQLITE_OK ){
- winShmSystemLock(pShmNode, WINSHM_UNLCK, WIN_SHM_DMS, 1);
- rc = winShmSystemLock(pShmNode, WINSHM_RDLCK, WIN_SHM_DMS, 1);
- }
- if( rc ) goto shm_open_err;
+
+ rc = winLockSharedMemory(pShmNode);
+ if( rc!=SQLITE_OK && rc!=SQLITE_READONLY_CANTINIT ) goto shm_open_err;
}
/* Make the new connection a child of the winShmNode */
@@ -3924,7 +3958,7 @@ static int winOpenSharedMemory(winFile *pDbFd){
p->pNext = pShmNode->pFirst;
pShmNode->pFirst = p;
sqlite3_mutex_leave(pShmNode->mutex);
- return SQLITE_OK;
+ return rc;
/* Jump here on any error */
shm_open_err:
@@ -4128,6 +4162,8 @@ static int winShmMap(
winFile *pDbFd = (winFile*)fd;
winShm *pShm = pDbFd->pShm;
winShmNode *pShmNode;
+ DWORD protect = PAGE_READWRITE;
+ DWORD flags = FILE_MAP_WRITE | FILE_MAP_READ;
int rc = SQLITE_OK;
if( !pShm ){
@@ -4138,6 +4174,11 @@ static int winShmMap(
pShmNode = pShm->pShmNode;
sqlite3_mutex_enter(pShmNode->mutex);
+ if( pShmNode->isUnlocked ){
+ rc = winLockSharedMemory(pShmNode);
+ if( rc!=SQLITE_OK ) goto shmpage_out;
+ pShmNode->isUnlocked = 0;
+ }
assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 );
if( pShmNode->nRegion<=iRegion ){
@@ -4184,21 +4225,26 @@ static int winShmMap(
}
pShmNode->aRegion = apNew;
+ if( pShmNode->isReadonly ){
+ protect = PAGE_READONLY;
+ flags = FILE_MAP_READ;
+ }
+
while( pShmNode->nRegion<=iRegion ){
HANDLE hMap = NULL; /* file-mapping handle */
void *pMap = 0; /* Mapped memory region */
#if SQLITE_OS_WINRT
hMap = osCreateFileMappingFromApp(pShmNode->hFile.h,
- NULL, PAGE_READWRITE, nByte, NULL
+ NULL, protect, nByte, NULL
);
#elif defined(SQLITE_WIN32_HAS_WIDE)
hMap = osCreateFileMappingW(pShmNode->hFile.h,
- NULL, PAGE_READWRITE, 0, nByte, NULL
+ NULL, protect, 0, nByte, NULL
);
#elif defined(SQLITE_WIN32_HAS_ANSI) && SQLITE_WIN32_CREATEFILEMAPPINGA
hMap = osCreateFileMappingA(pShmNode->hFile.h,
- NULL, PAGE_READWRITE, 0, nByte, NULL
+ NULL, protect, 0, nByte, NULL
);
#endif
OSTRACE(("SHM-MAP-CREATE pid=%lu, region=%d, size=%d, rc=%s\n",
@@ -4208,11 +4254,11 @@ static int winShmMap(
int iOffset = pShmNode->nRegion*szRegion;
int iOffsetShift = iOffset % winSysInfo.dwAllocationGranularity;
#if SQLITE_OS_WINRT
- pMap = osMapViewOfFileFromApp(hMap, FILE_MAP_WRITE | FILE_MAP_READ,
+ pMap = osMapViewOfFileFromApp(hMap, flags,
iOffset - iOffsetShift, szRegion + iOffsetShift
);
#else
- pMap = osMapViewOfFile(hMap, FILE_MAP_WRITE | FILE_MAP_READ,
+ pMap = osMapViewOfFile(hMap, flags,
0, iOffset - iOffsetShift, szRegion + iOffsetShift
);
#endif
@@ -4243,6 +4289,7 @@ shmpage_out:
}else{
*pp = 0;
}
+ if( pShmNode->isReadonly && rc==SQLITE_OK ) rc = SQLITE_READONLY;
sqlite3_mutex_leave(pShmNode->mutex);
return rc;
}
diff --git a/src/printf.c b/src/printf.c
index 9427844e0..ca8d26e4f 100644
--- a/src/printf.c
+++ b/src/printf.c
@@ -1092,8 +1092,15 @@ void sqlite3DebugPrintf(const char *zFormat, ...){
sqlite3VXPrintf(&acc, zFormat, ap);
va_end(ap);
sqlite3StrAccumFinish(&acc);
+#ifdef SQLITE_OS_TRACE_PROC
+ {
+ extern void SQLITE_OS_TRACE_PROC(const char *zBuf, int nBuf);
+ SQLITE_OS_TRACE_PROC(zBuf, sizeof(zBuf));
+ }
+#else
fprintf(stdout,"%s", zBuf);
fflush(stdout);
+#endif
}
#endif
diff --git a/src/sqlite.h.in b/src/sqlite.h.in
index f2a379d55..443406442 100644
--- a/src/sqlite.h.in
+++ b/src/sqlite.h.in
@@ -508,11 +508,13 @@ int sqlite3_exec(
#define SQLITE_CANTOPEN_ISDIR (SQLITE_CANTOPEN | (2<<8))
#define SQLITE_CANTOPEN_FULLPATH (SQLITE_CANTOPEN | (3<<8))
#define SQLITE_CANTOPEN_CONVPATH (SQLITE_CANTOPEN | (4<<8))
+#define SQLITE_CANTOPEN_DIRTYWAL (SQLITE_CANTOPEN | (5<<8))
#define SQLITE_CORRUPT_VTAB (SQLITE_CORRUPT | (1<<8))
#define SQLITE_READONLY_RECOVERY (SQLITE_READONLY | (1<<8))
#define SQLITE_READONLY_CANTLOCK (SQLITE_READONLY | (2<<8))
#define SQLITE_READONLY_ROLLBACK (SQLITE_READONLY | (3<<8))
#define SQLITE_READONLY_DBMOVED (SQLITE_READONLY | (4<<8))
+#define SQLITE_READONLY_CANTINIT (SQLITE_READONLY | (5<<8))
#define SQLITE_ABORT_ROLLBACK (SQLITE_ABORT | (2<<8))
#define SQLITE_CONSTRAINT_CHECK (SQLITE_CONSTRAINT | (1<<8))
#define SQLITE_CONSTRAINT_COMMITHOOK (SQLITE_CONSTRAINT | (2<<8))
diff --git a/src/wal.c b/src/wal.c
index 19c9ea0a0..376151e1a 100644
--- a/src/wal.c
+++ b/src/wal.c
@@ -455,6 +455,7 @@ struct Wal {
u8 truncateOnCommit; /* True to truncate WAL file on commit */
u8 syncHeader; /* Fsync the WAL header if true */
u8 padToSectorBoundary; /* Pad transactions out to the next sector */
+ u8 bShmUnreliable; /* SHM content is read-only and unreliable */
WalIndexHdr hdr; /* Wal-index header for current transaction */
u32 minFrame; /* Ignore wal frames before this one */
u32 iReCksum; /* On commit, recalculate checksums from here */
@@ -544,6 +545,11 @@ struct WalIterator {
** is broken into pages of WALINDEX_PGSZ bytes. Wal-index pages are
** numbered from zero.
**
+** If the wal-index is currently smaller the iPage pages then the size
+** of the wal-index might be increased, but only if it is safe to do
+** so. It is safe to enlarge the wal-index if pWal->writeLock is true
+** or pWal->exclusiveMode==WAL_HEAPMEMORY_MODE.
+**
** If this call is successful, *ppPage is set to point to the wal-index
** page and SQLITE_OK is returned. If an error (an OOM or VFS error) occurs,
** then an SQLite error code is returned and *ppPage is set to 0.
@@ -575,9 +581,13 @@ static int walIndexPage(Wal *pWal, int iPage, volatile u32 **ppPage){
rc = sqlite3OsShmMap(pWal->pDbFd, iPage, WALINDEX_PGSZ,
pWal->writeLock, (void volatile **)&pWal->apWiData[iPage]
);
- if( rc==SQLITE_READONLY ){
+ assert( pWal->apWiData[iPage]!=0 || rc!=SQLITE_OK || pWal->writeLock==0 );
+ testcase( pWal->apWiData[iPage]==0 && rc==SQLITE_OK );
+ if( (rc&0xff)==SQLITE_READONLY ){
pWal->readOnly |= WAL_SHM_RDONLY;
- rc = SQLITE_OK;
+ if( rc==SQLITE_READONLY ){
+ rc = SQLITE_OK;
+ }
}
}
}
@@ -1099,7 +1109,6 @@ static int walIndexRecover(Wal *pWal){
i64 nSize; /* Size of log file */
u32 aFrameCksum[2] = {0, 0};
int iLock; /* Lock offset to lock for checkpoint */
- int nLock; /* Number of locks to hold */
/* Obtain an exclusive lock on all byte in the locking range not already
** locked by the caller. The caller is guaranteed to have locked the
@@ -1112,11 +1121,17 @@ static int walIndexRecover(Wal *pWal){
assert( WAL_CKPT_LOCK==WAL_ALL_BUT_WRITE );
assert( pWal->writeLock );
iLock = WAL_ALL_BUT_WRITE + pWal->ckptLock;
- nLock = SQLITE_SHM_NLOCK - iLock;
- rc = walLockExclusive(pWal, iLock, nLock);
+ rc = walLockExclusive(pWal, iLock, WAL_READ_LOCK(0)-iLock);
+ if( rc==SQLITE_OK ){
+ rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
+ if( rc!=SQLITE_OK ){
+ walUnlockExclusive(pWal, iLock, WAL_READ_LOCK(0)-iLock);
+ }
+ }
if( rc ){
return rc;
}
+
WALTRACE(("WAL%p: recovery begin...\n", pWal));
memset(&pWal->hdr, 0, sizeof(WalIndexHdr));
@@ -1254,7 +1269,8 @@ finished:
recovery_error:
WALTRACE(("WAL%p: recovery %s\n", pWal, rc ? "failed" : "ok"));
- walUnlockExclusive(pWal, iLock, nLock);
+ walUnlockExclusive(pWal, iLock, WAL_READ_LOCK(0)-iLock);
+ walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
return rc;
}
@@ -1262,13 +1278,14 @@ recovery_error:
** Close an open wal-index.
*/
static void walIndexClose(Wal *pWal, int isDelete){
- if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ){
+ if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE || pWal->bShmUnreliable ){
int i;
for(i=0; i<pWal->nWiData; i++){
sqlite3_free((void *)pWal->apWiData[i]);
pWal->apWiData[i] = 0;
}
- }else{
+ }
+ if( pWal->exclusiveMode!=WAL_HEAPMEMORY_MODE ){
sqlite3OsShmUnmap(pWal->pDbFd, isDelete);
}
}
@@ -2062,6 +2079,12 @@ static int walIndexTryHdr(Wal *pWal, int *pChanged){
}
/*
+** This is the value that walTryBeginRead returns when it needs to
+** be retried.
+*/
+#define WAL_RETRY (-1)
+
+/*
** Read the wal-index header from the wal-index and into pWal->hdr.
** If the wal-header appears to be corrupt, try to reconstruct the
** wal-index from the WAL before returning.
@@ -2084,9 +2107,29 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){
assert( pChanged );
rc = walIndexPage(pWal, 0, &page0);
if( rc!=SQLITE_OK ){
- return rc;
- };
- assert( page0 || pWal->writeLock==0 );
+ assert( rc!=SQLITE_READONLY ); /* READONLY changed to OK in walIndexPage */
+ if( rc==SQLITE_READONLY_CANTINIT ){
+ /* The SQLITE_READONLY_CANTINIT return means that the shared-memory
+ ** was openable but is not writable, and this thread is unable to
+ ** confirm that another write-capable connection has the shared-memory
+ ** open, and hence the content of the shared-memory is unreliable,
+ ** since the shared-memory might be inconsistent with the WAL file
+ ** and there is no writer on hand to fix it. */
+ assert( page0==0 );
+ assert( pWal->writeLock==0 );
+ assert( pWal->readOnly & WAL_SHM_RDONLY );
+ pWal->bShmUnreliable = 1;
+ pWal->exclusiveMode = WAL_HEAPMEMORY_MODE;
+ *pChanged = 1;
+ }else{
+ return rc; /* Any other non-OK return is just an error */
+ }
+ }else{
+ /* page0 can be NULL if the SHM is zero bytes in size and pWal->writeLock
+ ** is zero, which prevents the SHM from growing */
+ testcase( page0!=0 );
+ }
+ assert( page0!=0 || pWal->writeLock==0 );
/* If the first page of the wal-index has been mapped, try to read the
** wal-index header immediately, without holding any lock. This usually
@@ -2100,7 +2143,7 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){
*/
assert( badHdr==0 || pWal->writeLock==0 );
if( badHdr ){
- if( pWal->readOnly & WAL_SHM_RDONLY ){
+ if( pWal->bShmUnreliable==0 && (pWal->readOnly & WAL_SHM_RDONLY) ){
if( SQLITE_OK==(rc = walLockShared(pWal, WAL_WRITE_LOCK)) ){
walUnlockShared(pWal, WAL_WRITE_LOCK);
rc = SQLITE_READONLY_RECOVERY;
@@ -2130,15 +2173,193 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){
if( badHdr==0 && pWal->hdr.iVersion!=WALINDEX_MAX_VERSION ){
rc = SQLITE_CANTOPEN_BKPT;
}
+ if( pWal->bShmUnreliable ){
+ if( rc!=SQLITE_OK ){
+ walIndexClose(pWal, 0);
+ pWal->bShmUnreliable = 0;
+ assert( pWal->nWiData>0 && pWal->apWiData[0]==0 );
+ /* walIndexRecover() might have returned SHORT_READ if a concurrent
+ ** writer truncated the WAL out from under it. If that happens, it
+ ** indicates that a writer has fixed the SHM file for us, so retry */
+ if( rc==SQLITE_IOERR_SHORT_READ ) rc = WAL_RETRY;
+ }
+ pWal->exclusiveMode = WAL_NORMAL_MODE;
+ }
return rc;
}
/*
-** This is the value that walTryBeginRead returns when it needs to
-** be retried.
-*/
-#define WAL_RETRY (-1)
+** Open a transaction in a connection where the shared-memory is read-only
+** and where we cannot verify that there is a separate write-capable connection
+** on hand to keep the shared-memory up-to-date with the WAL file.
+**
+** This can happen, for example, when the shared-memory is implemented by
+** memory-mapping a *-shm file, where a prior writer has shut down and
+** left the *-shm file on disk, and now the present connection is trying
+** to use that database but lacks write permission on the *-shm file.
+** Other scenarios are also possible, depending on the VFS implementation.
+**
+** Precondition:
+**
+** The *-wal file has been read and an appropriate wal-index has been
+** constructed in pWal->apWiData[] using heap memory instead of shared
+** memory.
+**
+** If this function returns SQLITE_OK, then the read transaction has
+** been successfully opened. In this case output variable (*pChanged)
+** is set to true before returning if the caller should discard the
+** contents of the page cache before proceeding. Or, if it returns
+** WAL_RETRY, then the heap memory wal-index has been discarded and
+** the caller should retry opening the read transaction from the
+** beginning (including attempting to map the *-shm file).
+**
+** If an error occurs, an SQLite error code is returned.
+*/
+static int walBeginShmUnreliable(Wal *pWal, int *pChanged){
+ i64 szWal; /* Size of wal file on disk in bytes */
+ i64 iOffset; /* Current offset when reading wal file */
+ u8 aBuf[WAL_HDRSIZE]; /* Buffer to load WAL header into */
+ u8 *aFrame = 0; /* Malloc'd buffer to load entire frame */
+ int szFrame; /* Number of bytes in buffer aFrame[] */
+ u8 *aData; /* Pointer to data part of aFrame buffer */
+ volatile void *pDummy; /* Dummy argument for xShmMap */
+ int rc; /* Return code */
+ u32 aSaveCksum[2]; /* Saved copy of pWal->hdr.aFrameCksum */
+
+ assert( pWal->bShmUnreliable );
+ assert( pWal->readOnly & WAL_SHM_RDONLY );
+ assert( pWal->nWiData>0 && pWal->apWiData[0] );
+
+ /* Take WAL_READ_LOCK(0). This has the effect of preventing any
+ ** writers from running a checkpoint, but does not stop them
+ ** from running recovery. */
+ rc = walLockShared(pWal, WAL_READ_LOCK(0));
+ if( rc!=SQLITE_OK ){
+ if( rc==SQLITE_BUSY ) rc = WAL_RETRY;
+ goto begin_unreliable_shm_out;
+ }
+ pWal->readLock = 0;
+
+ /* Check to see if a separate writer has attached to the shared-memory area,
+ ** thus making the shared-memory "reliable" again. Do this by invoking
+ ** the xShmMap() routine of the VFS and looking to see if the return
+ ** is SQLITE_READONLY instead of SQLITE_READONLY_CANTINIT.
+ **
+ ** If the shared-memory is now "reliable" return WAL_RETRY, which will
+ ** cause the heap-memory WAL-index to be discarded and the actual
+ ** shared memory to be used in its place.
+ **
+ ** This step is important because, even though this connection is holding
+ ** the WAL_READ_LOCK(0) which prevents a checkpoint, a writer might
+ ** have already checkpointed the WAL file and, while the current
+ ** is active, wrap the WAL and start overwriting frames that this
+ ** process wants to use.
+ **
+ ** Once sqlite3OsShmMap() has been called for an sqlite3_file and has
+ ** returned any SQLITE_READONLY value, it must return only SQLITE_READONLY
+ ** or SQLITE_READONLY_CANTINIT or some error for all subsequent invocations,
+ ** even if some external agent does a "chmod" to make the shared-memory
+ ** writable by us, until sqlite3OsShmUnmap() has been called.
+ ** This is a requirement on the VFS implementation.
+ */
+ rc = sqlite3OsShmMap(pWal->pDbFd, 0, WALINDEX_PGSZ, 0, &pDummy);
+ assert( rc!=SQLITE_OK ); /* SQLITE_OK not possible for read-only connection */
+ if( rc!=SQLITE_READONLY_CANTINIT ){
+ rc = (rc==SQLITE_READONLY ? WAL_RETRY : rc);
+ goto begin_unreliable_shm_out;
+ }
+
+ /* We reach this point only if the real shared-memory is still unreliable.
+ ** Assume the in-memory WAL-index substitute is correct and load it
+ ** into pWal->hdr.
+ */
+ memcpy(&pWal->hdr, (void*)walIndexHdr(pWal), sizeof(WalIndexHdr));
+
+ /* Make sure some writer hasn't come in and changed the WAL file out
+ ** from under us, then disconnected, while we were not looking.
+ */
+ rc = sqlite3OsFileSize(pWal->pWalFd, &szWal);
+ if( rc!=SQLITE_OK ){
+ goto begin_unreliable_shm_out;
+ }
+ if( szWal<WAL_HDRSIZE ){
+ /* If the wal file is too small to contain a wal-header and the
+ ** wal-index header has mxFrame==0, then it must be safe to proceed
+ ** reading the database file only. However, the page cache cannot
+ ** be trusted, as a read/write connection may have connected, written
+ ** the db, run a checkpoint, truncated the wal file and disconnected
+ ** since this client's last read transaction. */
+ *pChanged = 1;
+ rc = (pWal->hdr.mxFrame==0 ? SQLITE_OK : WAL_RETRY);
+ goto begin_unreliable_shm_out;
+ }
+
+ /* Check the salt keys at the start of the wal file still match. */
+ rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0);
+ if( rc!=SQLITE_OK ){
+ goto begin_unreliable_shm_out;
+ }
+ if( memcmp(&pWal->hdr.aSalt, &aBuf[16], 8) ){
+ /* Some writer has wrapped the WAL file while we were not looking.
+ ** Return WAL_RETRY which will cause the in-memory WAL-index to be
+ ** rebuilt. */
+ rc = WAL_RETRY;
+ goto begin_unreliable_shm_out;
+ }
+
+ /* Allocate a buffer to read frames into */
+ szFrame = pWal->hdr.szPage + WAL_FRAME_HDRSIZE;
+ aFrame = (u8 *)sqlite3_malloc64(szFrame);
+ if( aFrame==0 ){
+ rc = SQLITE_NOMEM_BKPT;
+ goto begin_unreliable_shm_out;
+ }
+ aData = &aFrame[WAL_FRAME_HDRSIZE];
+
+ /* Check to see if a complete transaction has been appended to the
+ ** wal file since the heap-memory wal-index was created. If so, the
+ ** heap-memory wal-index is discarded and WAL_RETRY returned to
+ ** the caller. */
+ aSaveCksum[0] = pWal->hdr.aFrameCksum[0];
+ aSaveCksum[1] = pWal->hdr.aFrameCksum[1];
+ for(iOffset=walFrameOffset(pWal->hdr.mxFrame+1, pWal->hdr.szPage);
+ iOffset+szFrame<=szWal;
+ iOffset+=szFrame
+ ){
+ u32 pgno; /* Database page number for frame */
+ u32 nTruncate; /* dbsize field from frame header */
+
+ /* Read and decode the next log frame. */
+ rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset);
+ if( rc!=SQLITE_OK ) break;
+ if( !walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame) ) break;
+
+ /* If nTruncate is non-zero, then a complete transaction has been
+ ** appended to this wal file. Set rc to WAL_RETRY and break out of
+ ** the loop. */
+ if( nTruncate ){
+ rc = WAL_RETRY;
+ break;
+ }
+ }
+ pWal->hdr.aFrameCksum[0] = aSaveCksum[0];
+ pWal->hdr.aFrameCksum[1] = aSaveCksum[1];
+
+ begin_unreliable_shm_out:
+ sqlite3_free(aFrame);
+ if( rc!=SQLITE_OK ){
+ int i;
+ for(i=0; i<pWal->nWiData; i++){
+ sqlite3_free((void*)pWal->apWiData[i]);
+ pWal->apWiData[i] = 0;
+ }
+ pWal->bShmUnreliable = 0;
+ sqlite3WalEndReadTransaction(pWal);
+ *pChanged = 1;
+ }
+ return rc;
+}
/*
** Attempt to start a read transaction. This might fail due to a race or
@@ -2200,6 +2421,9 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
assert( pWal->readLock<0 ); /* Not currently locked */
+ /* useWal may only be set for read/write connections */
+ assert( (pWal->readOnly & WAL_SHM_RDONLY)==0 || useWal==0 );
+
/* Take steps to avoid spinning forever if there is a protocol error.
**
** Circumstances that cause a RETRY should only last for the briefest
@@ -2228,7 +2452,10 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
}
if( !useWal ){
- rc = walIndexReadHdr(pWal, pChanged);
+ assert( rc==SQLITE_OK );
+ if( pWal->bShmUnreliable==0 ){
+ rc = walIndexReadHdr(pWal, pChanged);
+ }
if( rc==SQLITE_BUSY ){
/* If there is not a recovery running in another thread or process
** then convert BUSY errors to WAL_RETRY. If recovery is known to
@@ -2257,10 +2484,15 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
if( rc!=SQLITE_OK ){
return rc;
}
+ else if( pWal->bShmUnreliable ){
+ return walBeginShmUnreliable(pWal, pChanged);
+ }
}
+ assert( pWal->nWiData>0 );
+ assert( pWal->apWiData[0]!=0 );
pInfo = walCkptInfo(pWal);
- if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame
+ if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame
#ifdef SQLITE_ENABLE_SNAPSHOT
&& (pWal->pSnapshot==0 || pWal->hdr.mxFrame==0
|| 0==memcmp(&pWal->hdr, pWal->pSnapshot, sizeof(WalIndexHdr)))
@@ -2334,7 +2566,7 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
}
if( mxI==0 ){
assert( rc==SQLITE_BUSY || (pWal->readOnly & WAL_SHM_RDONLY)!=0 );
- return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTLOCK;
+ return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTINIT;
}
rc = walLockShared(pWal, WAL_READ_LOCK(mxI));
@@ -2606,7 +2838,7 @@ int sqlite3WalFindFrame(
** then the WAL is ignored by the reader so return early, as if the
** WAL were empty.
*/
- if( iLast==0 || pWal->readLock==0 ){
+ if( iLast==0 || (pWal->readLock==0 && pWal->bShmUnreliable==0) ){
*piRead = 0;
return SQLITE_OK;
}
@@ -2669,8 +2901,8 @@ int sqlite3WalFindFrame(
{
u32 iRead2 = 0;
u32 iTest;
- assert( pWal->minFrame>0 );
- for(iTest=iLast; iTest>=pWal->minFrame; iTest--){
+ assert( pWal->bShmUnreliable || pWal->minFrame>0 );
+ for(iTest=iLast; iTest>=pWal->minFrame && iTest>0; iTest--){
if( walFramePgno(pWal, iTest)==pgno ){
iRead2 = iTest;
break;
@@ -3446,24 +3678,24 @@ int sqlite3WalExclusiveMode(Wal *pWal, int op){
assert( pWal->readLock>=0 || (op<=0 && pWal->exclusiveMode==0) );
if( op==0 ){
- if( pWal->exclusiveMode ){
- pWal->exclusiveMode = 0;
+ if( pWal->exclusiveMode!=WAL_NORMAL_MODE ){
+ pWal->exclusiveMode = WAL_NORMAL_MODE;
if( walLockShared(pWal, WAL_READ_LOCK(pWal->readLock))!=SQLITE_OK ){
- pWal->exclusiveMode = 1;
+ pWal->exclusiveMode = WAL_EXCLUSIVE_MODE;
}
- rc = pWal->exclusiveMode==0;
+ rc = pWal->exclusiveMode==WAL_NORMAL_MODE;
}else{
/* Already in locking_mode=NORMAL */
rc = 0;
}
}else if( op>0 ){
- assert( pWal->exclusiveMode==0 );
+ assert( pWal->exclusiveMode==WAL_NORMAL_MODE );
assert( pWal->readLock>=0 );
walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));
- pWal->exclusiveMode = 1;
+ pWal->exclusiveMode = WAL_EXCLUSIVE_MODE;
rc = 1;
}else{
- rc = pWal->exclusiveMode==0;
+ rc = pWal->exclusiveMode==WAL_NORMAL_MODE;
}
return rc;
}