aboutsummaryrefslogtreecommitdiff
path: root/src/os_unix.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/os_unix.c')
-rw-r--r--src/os_unix.c833
1 files changed, 831 insertions, 2 deletions
diff --git a/src/os_unix.c b/src/os_unix.c
index 769e75df3..986963a18 100644
--- a/src/os_unix.c
+++ b/src/os_unix.c
@@ -119,6 +119,7 @@
#include <time.h>
#include <sys/time.h>
#include <errno.h>
+#include <sys/mman.h>
#if SQLITE_ENABLE_LOCKING_STYLE
# include <sys/ioctl.h>
@@ -1536,9 +1537,11 @@ static int _posixUnlock(sqlite3_file *id, int locktype, int handleNFSUnlock){
** the file has changed and hence might not know to flush their
** cache. The use of a stale cache can lead to database corruption.
*/
+#if 0
assert( pFile->inNormalWrite==0
|| pFile->dbUpdate==0
|| pFile->transCntrChng==1 );
+#endif
pFile->inNormalWrite = 0;
#endif
@@ -2956,10 +2959,12 @@ static int unixRead(
/* If this is a database file (not a journal, master-journal or temp
** file), the bytes in the locking range should never be read or written. */
+#if 0
assert( pFile->pUnused==0
|| offset>=PENDING_BYTE+512
|| offset+amt<=PENDING_BYTE
);
+#endif
got = seekAndRead(pFile, offset, pBuf, amt);
if( got==amt ){
@@ -3031,10 +3036,12 @@ static int unixWrite(
/* If this is a database file (not a journal, master-journal or temp
** file), the bytes in the locking range should never be read or written. */
+#if 0
assert( pFile->pUnused==0
|| offset>=PENDING_BYTE+512
|| offset+amt<=PENDING_BYTE
);
+#endif
#ifndef NDEBUG
/* If we are doing a normal write to a database file (as opposed to
@@ -4555,6 +4562,820 @@ static int unixGetLastError(sqlite3_vfs *NotUsed, int NotUsed2, char *NotUsed3){
return 0;
}
+#ifndef SQLITE_OMIT_WAL
+
+/* Forward reference */
+typedef struct unixShm unixShm;
+typedef struct unixShmFile unixShmFile;
+
+/*
+** Object used to represent a single file opened and mmapped to provide
+** shared memory. When multiple threads all reference the same
+** log-summary, each thread has its own unixFile object, but they all
+** point to a single instance of this object. In other words, each
+** log-summary is opened only once per process.
+**
+** unixMutexHeld() must be true when creating or destroying
+** this object or while reading or writing the following fields:
+**
+** nRef
+** pNext
+**
+** The following fields are read-only after the object is created:
+**
+** fid
+** zFilename
+**
+** Either unixShmFile.mutex must be held or unixShmFile.nRef==0 and
+** unixMutexHeld() is true when reading or writing any other field
+** in this structure.
+**
+** To avoid deadlocks, mutex and mutexBuf are always released in the
+** reverse order that they are acquired. mutexBuf is always acquired
+** first and released last. This invariant is check by asserting
+** sqlite3_mutex_notheld() on mutex whenever mutexBuf is acquired or
+** released.
+*/
+struct unixShmFile {
+ struct unixFileId fid; /* Unique file identifier */
+ sqlite3_mutex *mutex; /* Mutex to access this object */
+ sqlite3_mutex *mutexBuf; /* Mutex to access zBuf[] */
+ char *zFilename; /* Name of the file */
+ int h; /* Open file descriptor */
+ int szMap; /* Size of the mapping of file into memory */
+ char *pMMapBuf; /* Where currently mmapped(). NULL if unmapped */
+ int nRef; /* Number of unixShm objects pointing to this */
+ unixShm *pFirst; /* All unixShm objects pointing to this */
+ unixShmFile *pNext; /* Next in list of all unixShmFile objects */
+#ifdef SQLITE_DEBUG
+ u8 exclMask; /* Mask of exclusive locks held */
+ u8 sharedMask; /* Mask of shared locks held */
+ u8 nextShmId; /* Next available unixShm.id value */
+#endif
+};
+
+/*
+** A global array of all unixShmFile objects.
+**
+** The unixMutexHeld() must be true while reading or writing this list.
+*/
+static unixShmFile *unixShmFileList = 0;
+
+/*
+** Structure used internally by this VFS to record the state of an
+** open shared memory connection.
+**
+** unixShm.pFile->mutex must be held while reading or writing the
+** unixShm.pNext and unixShm.locks[] elements.
+**
+** The unixShm.pFile element is initialized when the object is created
+** and is read-only thereafter.
+*/
+struct unixShm {
+ unixShmFile *pFile; /* The underlying unixShmFile object */
+ unixShm *pNext; /* Next unixShm with the same unixShmFile */
+ u8 lockState; /* Current lock state */
+ u8 readLock; /* Which of the two read-lock states to use */
+ u8 hasMutex; /* True if holding the unixShmFile mutex */
+ u8 hasMutexBuf; /* True if holding pFile->mutexBuf */
+ u8 sharedMask; /* Mask of shared locks held */
+ u8 exclMask; /* Mask of exclusive locks held */
+#ifdef SQLITE_DEBUG
+ u8 id; /* Id of this connection with its unixShmFile */
+#endif
+};
+
+/*
+** Size increment by which shared memory grows
+*/
+#define SQLITE_UNIX_SHM_INCR 4096
+
+/*
+** Constants used for locking
+*/
+#define UNIX_SHM_BASE 32 /* Byte offset of the first lock byte */
+#define UNIX_SHM_MUTEX 0x01 /* Mask for MUTEX lock */
+#define UNIX_SHM_DMS 0x04 /* Mask for Dead-Man-Switch lock */
+#define UNIX_SHM_A 0x10 /* Mask for region locks... */
+#define UNIX_SHM_B 0x20
+#define UNIX_SHM_C 0x40
+#define UNIX_SHM_D 0x80
+
+#ifdef SQLITE_DEBUG
+/*
+** Return a pointer to a nul-terminated string in static memory that
+** describes a locking mask. The string is of the form "MSABCD" with
+** each character representing a lock. "M" for MUTEX, "S" for DMS,
+** and "A" through "D" for the region locks. If a lock is held, the
+** letter is shown. If the lock is not held, the letter is converted
+** to ".".
+**
+** This routine is for debugging purposes only and does not appear
+** in a production build.
+*/
+static const char *unixShmLockString(u8 mask){
+ static char zBuf[48];
+ static int iBuf = 0;
+ char *z;
+
+ z = &zBuf[iBuf];
+ iBuf += 8;
+ if( iBuf>=sizeof(zBuf) ) iBuf = 0;
+
+ z[0] = (mask & UNIX_SHM_MUTEX) ? 'M' : '.';
+ z[1] = (mask & UNIX_SHM_DMS) ? 'S' : '.';
+ z[2] = (mask & UNIX_SHM_A) ? 'A' : '.';
+ z[3] = (mask & UNIX_SHM_B) ? 'B' : '.';
+ z[4] = (mask & UNIX_SHM_C) ? 'C' : '.';
+ z[5] = (mask & UNIX_SHM_D) ? 'D' : '.';
+ z[6] = 0;
+ return z;
+}
+#endif /* SQLITE_DEBUG */
+
+/*
+** Apply posix advisory locks for all bytes identified in lockMask.
+**
+** lockMask might contain multiple bits but all bits are guaranteed
+** to be contiguous.
+**
+** Locks block if the UNIX_SHM_MUTEX bit is set and are non-blocking
+** otherwise.
+*/
+static int unixShmSystemLock(
+ unixShmFile *pFile, /* Apply locks to this open shared-memory segment */
+ int lockType, /* F_UNLCK, F_RDLCK, or F_WRLCK */
+ u8 lockMask /* Which bytes to lock or unlock */
+){
+ struct flock f; /* The posix advisory locking structure */
+ int lockOp; /* The opcode for fcntl() */
+ int i; /* Offset into the locking byte range */
+ int rc; /* Result code form fcntl() */
+ u8 mask; /* Mask of bits in lockMask */
+
+ /* Access to the unixShmFile object is serialized by the caller */
+ assert( sqlite3_mutex_held(pFile->mutex) || pFile->nRef==0 );
+
+ /* Initialize the locking parameters */
+ memset(&f, 0, sizeof(f));
+ f.l_type = lockType;
+ f.l_whence = SEEK_SET;
+ if( (lockMask & UNIX_SHM_MUTEX)!=0 && lockType!=F_UNLCK ){
+ lockOp = F_SETLKW;
+ OSTRACE(("SHM-LOCK requesting blocking lock\n"));
+ }else{
+ lockOp = F_SETLK;
+ }
+
+ /* Find the first bit in lockMask that is set */
+ for(i=0, mask=0x01; mask!=0 && (lockMask&mask)==0; mask <<= 1, i++){}
+ assert( mask!=0 );
+ f.l_start = i+UNIX_SHM_BASE;
+ f.l_len = 1;
+
+ /* Extend the locking range for each additional bit that is set */
+ mask <<= 1;
+ while( mask!=0 && (lockMask & mask)!=0 ){
+ f.l_len++;
+ mask <<= 1;
+ }
+
+ /* Verify that all bits set in lockMask are contiguous */
+ assert( mask==0 || (lockMask & ~(mask | (mask-1)))==0 );
+
+ /* Acquire the system-level lock */
+ rc = fcntl(pFile->h, lockOp, &f);
+ rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY;
+
+ /* Update the global lock state and do debug tracing */
+#ifdef SQLITE_DEBUG
+ OSTRACE(("SHM-LOCK "));
+ if( rc==SQLITE_OK ){
+ if( lockType==F_UNLCK ){
+ OSTRACE(("unlock ok"));
+ pFile->exclMask &= ~lockMask;
+ pFile->sharedMask &= ~lockMask;
+ }else if( lockType==F_RDLCK ){
+ OSTRACE(("read-lock ok"));
+ pFile->exclMask &= ~lockMask;
+ pFile->sharedMask |= lockMask;
+ }else{
+ assert( lockType==F_WRLCK );
+ OSTRACE(("write-lock ok"));
+ pFile->exclMask |= lockMask;
+ pFile->sharedMask &= ~lockMask;
+ }
+ }else{
+ if( lockType==F_UNLCK ){
+ OSTRACE(("unlock failed"));
+ }else if( lockType==F_RDLCK ){
+ OSTRACE(("read-lock failed"));
+ }else{
+ assert( lockType==F_WRLCK );
+ OSTRACE(("write-lock failed"));
+ }
+ }
+ OSTRACE((" - change requested %s - afterwards %s:%s\n",
+ unixShmLockString(lockMask),
+ unixShmLockString(pFile->sharedMask),
+ unixShmLockString(pFile->exclMask)));
+#endif
+
+ return rc;
+}
+
+/*
+** For connection p, unlock all of the locks identified by the unlockMask
+** parameter.
+*/
+static int unixShmUnlock(
+ unixShmFile *pFile, /* The underlying shared-memory file */
+ unixShm *p, /* The connection to be unlocked */
+ u8 unlockMask /* Mask of locks to be unlocked */
+){
+ int rc; /* Result code */
+ unixShm *pX; /* For looping over all sibling connections */
+ u8 allMask; /* Union of locks held by connections other than "p" */
+
+ /* Access to the unixShmFile object is serialized by the caller */
+ assert( sqlite3_mutex_held(pFile->mutex) );
+
+ /* Compute locks held by sibling connections */
+ allMask = 0;
+ for(pX=pFile->pFirst; pX; pX=pX->pNext){
+ if( pX==p ) continue;
+ assert( (pX->exclMask & (p->exclMask|p->sharedMask))==0 );
+ allMask |= pX->sharedMask;
+ }
+
+ /* Unlock the system-level locks */
+ if( (unlockMask & allMask)!=unlockMask ){
+ rc = unixShmSystemLock(pFile, F_UNLCK, unlockMask & ~allMask);
+ }else{
+ rc = SQLITE_OK;
+ }
+
+ /* Undo the local locks */
+ if( rc==SQLITE_OK ){
+ p->exclMask &= ~unlockMask;
+ p->sharedMask &= ~unlockMask;
+ }
+ return rc;
+}
+
+/*
+** Get reader locks for connection p on all locks in the readMask parameter.
+*/
+static int unixShmSharedLock(
+ unixShmFile *pFile, /* The underlying shared-memory file */
+ unixShm *p, /* The connection to get the shared locks */
+ u8 readMask /* Mask of shared locks to be acquired */
+){
+ int rc; /* Result code */
+ unixShm *pX; /* For looping over all sibling connections */
+ u8 allShared; /* Union of locks held by connections other than "p" */
+
+ /* Access to the unixShmFile object is serialized by the caller */
+ assert( sqlite3_mutex_held(pFile->mutex) );
+
+ /* Find out which shared locks are already held by sibling connections.
+ ** If any sibling already holds an exclusive lock, go ahead and return
+ ** SQLITE_BUSY.
+ */
+ allShared = 0;
+ for(pX=pFile->pFirst; pX; pX=pX->pNext){
+ if( pX==p ) continue;
+ if( (pX->exclMask & readMask)!=0 ) return SQLITE_BUSY;
+ allShared |= pX->sharedMask;
+ }
+
+ /* Get shared locks at the system level, if necessary */
+ if( (~allShared) & readMask ){
+ rc = unixShmSystemLock(pFile, F_RDLCK, readMask);
+ }else{
+ rc = SQLITE_OK;
+ }
+
+ /* Get the local shared locks */
+ if( rc==SQLITE_OK ){
+ p->sharedMask |= readMask;
+ }
+ return rc;
+}
+
+/*
+** For connection p, get an exclusive lock on all locks identified in
+** the writeMask parameter.
+*/
+static int unixShmExclusiveLock(
+ unixShmFile *pFile, /* The underlying shared-memory file */
+ unixShm *p, /* The connection to get the exclusive locks */
+ u8 writeMask /* Mask of exclusive locks to be acquired */
+){
+ int rc; /* Result code */
+ unixShm *pX; /* For looping over all sibling connections */
+
+ /* Access to the unixShmFile object is serialized by the caller */
+ assert( sqlite3_mutex_held(pFile->mutex) );
+
+ /* Make sure no sibling connections hold locks that will block this
+ ** lock. If any do, return SQLITE_BUSY right away.
+ */
+ for(pX=pFile->pFirst; pX; pX=pX->pNext){
+ if( pX==p ) continue;
+ if( (pX->exclMask & writeMask)!=0 ) return SQLITE_BUSY;
+ if( (pX->sharedMask & writeMask)!=0 ) return SQLITE_BUSY;
+ }
+
+ /* Get the exclusive locks at the system level. Then if successful
+ ** also mark the local connection as being locked.
+ */
+ rc = unixShmSystemLock(pFile, F_WRLCK, writeMask);
+ if( rc==SQLITE_OK ){
+ p->sharedMask &= ~writeMask;
+ p->exclMask |= writeMask;
+ }
+ return rc;
+}
+
+/*
+** Purge the unixShmFileList list of all entries with unixShmFile.nRef==0.
+**
+** This is not a VFS shared-memory method; it is a utility function called
+** by VFS shared-memory methods.
+*/
+static void unixShmPurge(void){
+ unixShmFile **pp;
+ unixShmFile *p;
+ assert( unixMutexHeld() );
+ pp = &unixShmFileList;
+ while( (p = *pp)!=0 ){
+ if( p->nRef==0 ){
+ if( p->mutex ) sqlite3_mutex_free(p->mutex);
+ if( p->mutexBuf ) sqlite3_mutex_free(p->mutexBuf);
+ if( p->h>=0 ) close(p->h);
+ *pp = p->pNext;
+ sqlite3_free(p);
+ }else{
+ pp = &p->pNext;
+ }
+ }
+}
+
+/*
+** Open a shared-memory area. This particular implementation uses
+** mmapped files.
+**
+** zName is a filename used to identify the shared-memory area. The
+** implementation does not (and perhaps should not) use this name
+** directly, but rather use it as a template for finding an appropriate
+** name for the shared-memory storage. In this implementation, the
+** string "-index" is appended to zName and used as the name of the
+** mmapped file.
+**
+** When opening a new shared-memory file, if no other instances of that
+** file are currently open, in this process or in other processes, then
+** the file must be truncated to zero length or have its header cleared.
+*/
+static int unixShmOpen(
+ sqlite3_vfs *pVfs, /* The VFS */
+ const char *zName, /* Base name of file to mmap */
+ sqlite3_shm **pShm /* Write the unixShm object created here */
+){
+ struct unixShm *p = 0; /* The connection to be opened */
+ struct unixShmFile *pFile = 0; /* The underlying mmapped file */
+ int rc; /* Result code */
+ struct unixFileId fid; /* Unix file identifier */
+ struct unixShmFile *pNew; /* Newly allocated pFile */
+ struct stat sStat; /* Result from stat() an fstat() */
+ int nName; /* Size of zName in bytes */
+
+ /* Allocate space for the new sqlite3_shm object. Also speculatively
+ ** allocate space for a new unixShmFile and filename.
+ */
+ p = sqlite3_malloc( sizeof(*p) );
+ if( p==0 ) return SQLITE_NOMEM;
+ memset(p, 0, sizeof(*p));
+ nName = strlen(zName);
+ pNew = sqlite3_malloc( sizeof(*pFile) + nName + 10 );
+ if( pNew==0 ){
+ rc = SQLITE_NOMEM;
+ goto shm_open_err;
+ }
+ memset(pNew, 0, sizeof(*pNew));
+ pNew->zFilename = (char*)&pNew[1];
+ sqlite3_snprintf(nName+10, pNew->zFilename, "%s-index", zName);
+
+ /* Look to see if there is an existing unixShmFile that can be used.
+ ** If no matching unixShmFile currently exists, create a new one.
+ */
+ unixEnterMutex();
+ rc = stat(pNew->zFilename, &sStat);
+ if( rc==0 ){
+ memset(&fid, 0, sizeof(fid));
+ fid.dev = sStat.st_dev;
+ fid.ino = sStat.st_ino;
+ for(pFile = unixShmFileList; pFile; pFile=pFile->pNext){
+ if( memcmp(&pFile->fid, &fid, sizeof(fid))==0 ) break;
+ }
+ }
+ if( pFile ){
+ sqlite3_free(pNew);
+ }else{
+ pFile = pNew;
+ pNew = 0;
+ pFile->h = -1;
+ pFile->pNext = unixShmFileList;
+ unixShmFileList = pFile;
+
+ pFile->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST);
+ if( pFile->mutex==0 ){
+ rc = SQLITE_NOMEM;
+ goto shm_open_err;
+ }
+ pFile->mutexBuf = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST);
+ if( pFile->mutexBuf==0 ){
+ rc = SQLITE_NOMEM;
+ goto shm_open_err;
+ }
+
+ pFile->h = open(pFile->zFilename, O_RDWR|O_CREAT, 0664);
+ if( pFile->h<0 ){
+ rc = SQLITE_CANTOPEN_BKPT;
+ goto shm_open_err;
+ }
+
+ rc = fstat(pFile->h, &sStat);
+ if( rc ){
+ rc = SQLITE_CANTOPEN_BKPT;
+ goto shm_open_err;
+ }
+ pFile->fid.dev = sStat.st_dev;
+ pFile->fid.ino = sStat.st_ino;
+
+ /* Check to see if another process is holding the dead-man switch.
+ ** If not, truncate the file to zero length.
+ */
+ if( unixShmSystemLock(pFile, F_WRLCK, UNIX_SHM_MUTEX) ){
+ rc = SQLITE_IOERR_LOCK;
+ goto shm_open_err;
+ }
+ if( unixShmSystemLock(pFile, F_WRLCK, UNIX_SHM_DMS)==SQLITE_OK ){
+ if( ftruncate(pFile->h, 0) ){
+ rc = SQLITE_IOERR;
+ }
+ }
+ if( rc==SQLITE_OK ){
+ rc = unixShmSystemLock(pFile, F_RDLCK, UNIX_SHM_DMS);
+ }
+ unixShmSystemLock(pFile, F_UNLCK, UNIX_SHM_MUTEX);
+ if( rc ) goto shm_open_err;
+ }
+
+ /* Make the new connection a child of the unixShmFile */
+ p->pFile = pFile;
+ p->pNext = pFile->pFirst;
+#ifdef SQLITE_DEBUG
+ p->id = pFile->nextShmId++;
+#endif
+ pFile->pFirst = p;
+ pFile->nRef++;
+ *pShm = (sqlite3_shm*)p;
+ unixLeaveMutex();
+ return SQLITE_OK;
+
+ /* Jump here on any error */
+shm_open_err:
+ unixShmPurge();
+ sqlite3_free(p);
+ sqlite3_free(pFile);
+ sqlite3_free(pNew);
+ *pShm = 0;
+ unixLeaveMutex();
+ return rc;
+}
+
+/*
+** Close a connection to shared-memory. Delete the underlying
+** storage if deleteFlag is true.
+*/
+static int unixShmClose(sqlite3_shm *pSharedMem, int deleteFlag){
+ unixShm *p; /* The connection to be closed */
+ unixShmFile *pFile; /* The underlying shared-memory file */
+ unixShm **pp; /* For looping over sibling connections */
+
+ if( pSharedMem==0 ) return SQLITE_OK;
+ p = (struct unixShm*)pSharedMem;
+ pFile = p->pFile;
+
+ /* Verify that the connection being closed holds no locks */
+ assert( p->exclMask==0 );
+ assert( p->sharedMask==0 );
+
+ /* Remove connection p from the set of connections associated with pFile */
+ sqlite3_mutex_enter(pFile->mutex);
+ for(pp=&pFile->pFirst; (*pp)!=p; pp = &(*pp)->pNext){}
+ *pp = p->pNext;
+
+ /* Free the connection p */
+ sqlite3_free(p);
+ sqlite3_mutex_leave(pFile->mutex);
+
+ /* If pFile->nRef has reached 0, then close the underlying
+ ** shared-memory file, too */
+ unixEnterMutex();
+ assert( pFile->nRef>0 );
+ pFile->nRef--;
+ if( pFile->nRef==0 ){
+ if( deleteFlag ) unlink(pFile->zFilename);
+ unixShmPurge();
+ }
+ unixLeaveMutex();
+
+ return SQLITE_OK;
+}
+
+/*
+** Query and/or changes the size of the underlying storage for
+** a shared-memory segment. The reqSize parameter is the new size
+** of the underlying storage, or -1 to do just a query. The size
+** of the underlying storage (after resizing if resizing occurs) is
+** written into pNewSize.
+**
+** This routine does not (necessarily) change the size of the mapping
+** of the underlying storage into memory. Use xShmGet() to change
+** the mapping size.
+**
+** The reqSize parameter is the minimum size requested. The implementation
+** is free to expand the storage to some larger amount if it chooses.
+*/
+static int unixShmSize(
+ sqlite3_shm *pSharedMem, /* Pointer returned by unixShmOpen() */
+ int reqSize, /* Requested size. -1 for query only */
+ int *pNewSize /* Write new size here */
+){
+ unixShm *p = (unixShm*)pSharedMem;
+ unixShmFile *pFile = p->pFile;
+ int rc = SQLITE_OK;
+ struct stat sStat;
+
+ if( reqSize>=0 ){
+ reqSize = (reqSize + SQLITE_UNIX_SHM_INCR - 1)/SQLITE_UNIX_SHM_INCR;
+ reqSize *= SQLITE_UNIX_SHM_INCR;
+ rc = ftruncate(pFile->h, reqSize);
+ }
+ if( fstat(pFile->h, &sStat)==0 ){
+ *pNewSize = (int)sStat.st_size;
+ }else{
+ *pNewSize = 0;
+ rc = SQLITE_IOERR;
+ }
+ return rc;
+}
+
+
+/*
+** Map the shared storage into memory. The minimum size of the
+** mapping should be reqMapSize if reqMapSize is positive. If
+** reqMapSize is zero or negative, the implementation can choose
+** whatever mapping size is convenient.
+**
+** *ppBuf is made to point to the memory which is a mapping of the
+** underlying storage. A mutex is acquired to prevent other threads
+** from running while *ppBuf is in use in order to prevent other threads
+** remapping *ppBuf out from under this thread. The unixShmRelease()
+** call will release the mutex. However, if the lock state is CHECKPOINT,
+** the mutex is not acquired because CHECKPOINT will never remap the
+** buffer. RECOVER might remap, though, so CHECKPOINT will acquire
+** the mutex if and when it promotes to RECOVER.
+**
+** RECOVER needs to be atomic. The same mutex that prevents *ppBuf from
+** being remapped also prevents more than one thread from being in
+** RECOVER at a time. But, RECOVER sometimes wants to remap itself.
+** To prevent RECOVER from losing its lock while remapping, the
+** mutex is not released by unixShmRelease() when in RECOVER.
+**
+** *pNewMapSize is set to the size of the mapping.
+**
+** *ppBuf and *pNewMapSize might be NULL and zero if no space has
+** yet been allocated to the underlying storage.
+*/
+static int unixShmGet(
+ sqlite3_shm *pSharedMem, /* Pointer returned by unixShmOpen() */
+ int reqMapSize, /* Requested size of mapping. -1 means don't care */
+ int *pNewMapSize, /* Write new size of mapping here */
+ void **ppBuf /* Write mapping buffer origin here */
+){
+ unixShm *p = (unixShm*)pSharedMem;
+ unixShmFile *pFile = p->pFile;
+ int rc = SQLITE_OK;
+
+ if( p->lockState!=SQLITE_SHM_CHECKPOINT && p->hasMutexBuf==0 ){
+ assert( sqlite3_mutex_notheld(pFile->mutex) );
+ sqlite3_mutex_enter(pFile->mutexBuf);
+ p->hasMutexBuf = 1;
+ }
+ sqlite3_mutex_enter(pFile->mutex);
+ if( pFile->szMap==0 || reqMapSize>pFile->szMap ){
+ int actualSize;
+ if( unixShmSize(pSharedMem, -1, &actualSize)==SQLITE_OK
+ && reqMapSize<actualSize
+ ){
+ reqMapSize = actualSize;
+ }
+ if( pFile->pMMapBuf ){
+ munmap(pFile->pMMapBuf, pFile->szMap);
+ }
+ pFile->pMMapBuf = mmap(0, reqMapSize, PROT_READ|PROT_WRITE, MAP_SHARED,
+ pFile->h, 0);
+ pFile->szMap = pFile->pMMapBuf ? reqMapSize : 0;
+ }
+ *pNewMapSize = pFile->szMap;
+ *ppBuf = pFile->pMMapBuf;
+ sqlite3_mutex_leave(pFile->mutex);
+ return rc;
+}
+
+/*
+** Release the lock held on the shared memory segment to that other
+** threads are free to resize it if necessary.
+**
+** If the lock is not currently held, this routine is a harmless no-op.
+**
+** If the shared-memory object is in lock state RECOVER, then we do not
+** really want to release the lock, so in that case too, this routine
+** is a no-op.
+*/
+static int unixShmRelease(sqlite3_shm *pSharedMem){
+ unixShm *p = (unixShm*)pSharedMem;
+ if( p->hasMutexBuf && p->lockState!=SQLITE_SHM_RECOVER ){
+ unixShmFile *pFile = p->pFile;
+ assert( sqlite3_mutex_notheld(pFile->mutex) );
+ sqlite3_mutex_leave(pFile->mutexBuf);
+ p->hasMutexBuf = 0;
+ }
+ return SQLITE_OK;
+}
+
+/*
+** Symbolic names for LOCK states used for debugging.
+*/
+#ifdef SQLITE_DEBUG
+static const char *azLkName[] = {
+ "UNLOCK",
+ "READ",
+ "READ_FULL",
+ "WRITE",
+ "PENDING",
+ "CHECKPOINT",
+ "RECOVER"
+};
+#endif
+
+
+/*
+** Change the lock state for a shared-memory segment.
+*/
+static int unixShmLock(
+ sqlite3_shm *pSharedMem, /* Pointer from unixShmOpen() */
+ int desiredLock, /* One of SQLITE_SHM_xxxxx locking states */
+ int *pGotLock /* The lock you actually got */
+){
+ unixShm *p = (unixShm*)pSharedMem;
+ unixShmFile *pFile = p->pFile;
+ int rc = SQLITE_PROTOCOL;
+
+ /* Note that SQLITE_SHM_READ_FULL and SQLITE_SHM_PENDING are never
+ ** directly requested; they are side effects from requesting
+ ** SQLITE_SHM_READ and SQLITE_SHM_CHECKPOINT, respectively.
+ */
+ assert( desiredLock==SQLITE_SHM_QUERY
+ || desiredLock==SQLITE_SHM_UNLOCK
+ || desiredLock==SQLITE_SHM_READ
+ || desiredLock==SQLITE_SHM_WRITE
+ || desiredLock==SQLITE_SHM_CHECKPOINT
+ || desiredLock==SQLITE_SHM_RECOVER );
+
+ /* Return directly if this is just a lock state query, or if
+ ** the connection is already in the desired locking state.
+ */
+ if( desiredLock==SQLITE_SHM_QUERY
+ || desiredLock==p->lockState
+ || (desiredLock==SQLITE_SHM_READ && p->lockState==SQLITE_SHM_READ_FULL)
+ ){
+ OSTRACE(("SHM-LOCK shmid-%d, pid-%d request %s and got %s\n",
+ p->id, getpid(), azLkName[desiredLock], azLkName[p->lockState]));
+ if( pGotLock ) *pGotLock = p->lockState;
+ return SQLITE_OK;
+ }
+
+ OSTRACE(("SHM-LOCK shmid-%d, pid-%d request %s->%s\n",
+ p->id, getpid(), azLkName[p->lockState], azLkName[desiredLock]));
+
+ if( desiredLock==SQLITE_SHM_RECOVER && !p->hasMutexBuf ){
+ assert( sqlite3_mutex_notheld(pFile->mutex) );
+ sqlite3_mutex_enter(pFile->mutexBuf);
+ p->hasMutexBuf = 1;
+ }
+ sqlite3_mutex_enter(pFile->mutex);
+ switch( desiredLock ){
+ case SQLITE_SHM_UNLOCK: {
+ assert( p->lockState!=SQLITE_SHM_RECOVER );
+ unixShmUnlock(pFile, p, UNIX_SHM_A|UNIX_SHM_B|UNIX_SHM_C|UNIX_SHM_D);
+ rc = SQLITE_OK;
+ p->lockState = SQLITE_SHM_UNLOCK;
+ break;
+ }
+ case SQLITE_SHM_READ: {
+ if( p->lockState==SQLITE_SHM_UNLOCK ){
+ int nAttempt;
+ rc = SQLITE_BUSY;
+ assert( p->lockState==SQLITE_SHM_UNLOCK );
+ for(nAttempt=0; nAttempt<5 && rc==SQLITE_BUSY; nAttempt++){
+ rc = unixShmSharedLock(pFile, p, UNIX_SHM_A|UNIX_SHM_B);
+ if( rc==SQLITE_BUSY ){
+ rc = unixShmSharedLock(pFile, p, UNIX_SHM_D);
+ if( rc==SQLITE_OK ){
+ p->lockState = p->readLock = SQLITE_SHM_READ_FULL;
+ }
+ }else{
+ unixShmUnlock(pFile, p, UNIX_SHM_B);
+ p->lockState = p->readLock = SQLITE_SHM_READ;
+ }
+ }
+ }else if( p->lockState==SQLITE_SHM_WRITE ){
+ rc = unixShmSharedLock(pFile, p, UNIX_SHM_A);
+ unixShmUnlock(pFile, p, UNIX_SHM_C|UNIX_SHM_D);
+ p->lockState = p->readLock = SQLITE_SHM_READ;
+ }else{
+ assert( p->lockState==SQLITE_SHM_RECOVER );
+ unixShmUnlock(pFile, p, UNIX_SHM_MUTEX);
+ p->lockState = p->readLock;
+ rc = SQLITE_OK;
+ }
+ break;
+ }
+ case SQLITE_SHM_WRITE: {
+ assert( p->lockState==SQLITE_SHM_READ
+ || p->lockState==SQLITE_SHM_READ_FULL );
+ rc = unixShmExclusiveLock(pFile, p, UNIX_SHM_C|UNIX_SHM_D);
+ if( rc==SQLITE_OK ){
+ p->lockState = SQLITE_SHM_WRITE;
+ }
+ break;
+ }
+ case SQLITE_SHM_CHECKPOINT: {
+ assert( p->lockState==SQLITE_SHM_UNLOCK
+ || p->lockState==SQLITE_SHM_PENDING
+ || p->lockState==SQLITE_SHM_RECOVER );
+ if( p->lockState==SQLITE_SHM_RECOVER ){
+ unixShmUnlock(pFile, p, UNIX_SHM_MUTEX);
+ p->lockState = SQLITE_SHM_CHECKPOINT;
+ rc = SQLITE_OK;
+ }
+ if( p->lockState==SQLITE_SHM_UNLOCK ){
+ rc = unixShmExclusiveLock(pFile, p, UNIX_SHM_B|UNIX_SHM_C);
+ if( rc==SQLITE_OK ){
+ p->lockState = SQLITE_SHM_PENDING;
+ }
+ }
+ if( p->lockState==SQLITE_SHM_PENDING ){
+ rc = unixShmExclusiveLock(pFile, p, UNIX_SHM_A);
+ if( rc==SQLITE_OK ){
+ p->lockState = SQLITE_SHM_CHECKPOINT;
+ }
+ }
+ break;
+ }
+ default: {
+ assert( desiredLock==SQLITE_SHM_RECOVER );
+ assert( p->lockState==SQLITE_SHM_READ
+ || p->lockState==SQLITE_SHM_READ_FULL
+ || p->lockState==SQLITE_SHM_CHECKPOINT );
+ assert( sqlite3_mutex_held(pFile->mutexBuf) );
+ rc = unixShmExclusiveLock(pFile, p, UNIX_SHM_MUTEX);
+ if( rc==SQLITE_OK ){
+ p->lockState = SQLITE_SHM_RECOVER;
+ }
+ break;
+ }
+ }
+ sqlite3_mutex_leave(pFile->mutex);
+ OSTRACE(("SHM-LOCK shmid-%d, pid-%d got %s\n",
+ p->id, getpid(), azLkName[p->lockState]));
+ if( pGotLock ) *pGotLock = p->lockState;
+ return rc;
+}
+
+#else
+# define unixShmOpen 0
+# define unixShmSize 0
+# define unixShmGet 0
+# define unixShmRelease 0
+# define unixShmLock 0
+# define unixShmClose 0
+#endif /* #ifndef SQLITE_OMIT_WAL */
+
/*
************************ End of sqlite3_vfs methods ***************************
******************************************************************************/
@@ -5755,7 +6576,7 @@ int sqlite3_os_init(void){
** that filesystem time.
*/
#define UNIXVFS(VFSNAME, FINDER) { \
- 1, /* iVersion */ \
+ 2, /* iVersion */ \
sizeof(unixFile), /* szOsFile */ \
MAX_PATHNAME, /* mxPathname */ \
0, /* pNext */ \
@@ -5772,7 +6593,15 @@ int sqlite3_os_init(void){
unixRandomness, /* xRandomness */ \
unixSleep, /* xSleep */ \
unixCurrentTime, /* xCurrentTime */ \
- unixGetLastError /* xGetLastError */ \
+ unixGetLastError, /* xGetLastError */ \
+ unixShmOpen, /* xShmOpen */ \
+ unixShmSize, /* xShmSize */ \
+ unixShmGet, /* xShmGet */ \
+ unixShmRelease, /* xShmRelease */ \
+ unixShmLock, /* xShmLock */ \
+ unixShmClose, /* xShmClose */ \
+ 0, /* xRename */ \
+ 0, /* xCurrentTimeInt64 */ \
}
/*