aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/os.c18
-rw-r--r--src/os.h4
-rw-r--r--src/os_unix.c275
-rw-r--r--src/os_win.c268
-rw-r--r--src/sqlite.h.in4
-rw-r--r--src/test6.c30
-rw-r--r--src/test_devsym.c37
-rw-r--r--src/test_osinst.c69
-rw-r--r--src/test_vfs.c161
-rw-r--r--src/wal.c759
10 files changed, 705 insertions, 920 deletions
diff --git a/src/os.c b/src/os.c
index 0b17a6b63..8fa10a9c0 100644
--- a/src/os.c
+++ b/src/os.c
@@ -101,15 +101,6 @@ int sqlite3OsDeviceCharacteristics(sqlite3_file *id){
int sqlite3OsShmOpen(sqlite3_file *id){
return id->pMethods->xShmOpen(id);
}
-int sqlite3OsShmSize(sqlite3_file *id, int reqSize, int *pNewSize){
- return id->pMethods->xShmSize(id, reqSize, pNewSize);
-}
-int sqlite3OsShmGet(sqlite3_file *id,int reqSize,int *pSize,void volatile **pp){
- return id->pMethods->xShmGet(id, reqSize, pSize, pp);
-}
-int sqlite3OsShmRelease(sqlite3_file *id){
- return id->pMethods->xShmRelease(id);
-}
int sqlite3OsShmLock(sqlite3_file *id, int offset, int n, int flags){
return id->pMethods->xShmLock(id, offset, n, flags);
}
@@ -119,6 +110,15 @@ void sqlite3OsShmBarrier(sqlite3_file *id){
int sqlite3OsShmClose(sqlite3_file *id, int deleteFlag){
return id->pMethods->xShmClose(id, deleteFlag);
}
+int sqlite3OsShmMap(
+ sqlite3_file *id,
+ int iPage,
+ int pgsz,
+ int isWrite,
+ void volatile **pp
+){
+ return id->pMethods->xShmMap(id, iPage, pgsz, isWrite, pp);
+}
/*
** The next group of routines are convenience wrappers around the
diff --git a/src/os.h b/src/os.h
index 001732798..d51eec3ab 100644
--- a/src/os.h
+++ b/src/os.h
@@ -248,12 +248,10 @@ int sqlite3OsFileControl(sqlite3_file*,int,void*);
int sqlite3OsSectorSize(sqlite3_file *id);
int sqlite3OsDeviceCharacteristics(sqlite3_file *id);
int sqlite3OsShmOpen(sqlite3_file *id);
-int sqlite3OsShmSize(sqlite3_file *id, int, int*);
-int sqlite3OsShmGet(sqlite3_file *id, int, int*, void volatile**);
-int sqlite3OsShmRelease(sqlite3_file *id);
int sqlite3OsShmLock(sqlite3_file *id, int, int, int);
void sqlite3OsShmBarrier(sqlite3_file *id);
int sqlite3OsShmClose(sqlite3_file *id, int);
+int sqlite3OsShmMap(sqlite3_file *,int,int,int,void volatile **);
/*
** Functions for accessing sqlite3_vfs methods
diff --git a/src/os_unix.c b/src/os_unix.c
index dadc3c98a..fcccead72 100644
--- a/src/os_unix.c
+++ b/src/os_unix.c
@@ -3128,21 +3128,15 @@ static int unixDeviceCharacteristics(sqlite3_file *NotUsed){
** Either unixShmNode.mutex must be held or unixShmNode.nRef==0 and
** unixMutexHeld() is true when reading or writing any other field
** in this structure.
-**
-** To avoid deadlocks, mutex and mutexBuf are always released in the
-** reverse order that they are acquired. mutexBuf is always acquired
-** first and released last. This invariant is check by asserting
-** sqlite3_mutex_notheld() on mutex whenever mutexBuf is acquired or
-** released.
*/
struct unixShmNode {
unixInodeInfo *pInode; /* unixInodeInfo that owns this SHM node */
sqlite3_mutex *mutex; /* Mutex to access this object */
- sqlite3_mutex *mutexBuf; /* Mutex to access zBuf[] */
char *zFilename; /* Name of the mmapped file */
int h; /* Open file descriptor */
- int szMap; /* Size of the mapping into memory */
- char *pMMapBuf; /* Where currently mmapped(). NULL if unmapped */
+ int szRegion; /* Size of shared-memory regions */
+ int nRegion; /* Size of array apRegion */
+ char **apRegion; /* Array of mapped shared-memory regions */
int nRef; /* Number of unixShm objects pointing to this */
unixShm *pFirst; /* All unixShm objects pointing to this */
#ifdef SQLITE_DEBUG
@@ -3169,7 +3163,6 @@ struct unixShm {
unixShmNode *pShmNode; /* The underlying unixShmNode object */
unixShm *pNext; /* Next unixShm with the same unixShmNode */
u8 hasMutex; /* True if holding the unixShmNode mutex */
- u8 hasMutexBuf; /* True if holding pFile->mutexBuf */
u16 sharedMask; /* Mask of shared locks held */
u16 exclMask; /* Mask of exclusive locks held */
#ifdef SQLITE_DEBUG
@@ -3266,10 +3259,13 @@ static void unixShmPurge(unixFile *pFd){
unixShmNode *p = pFd->pInode->pShmNode;
assert( unixMutexHeld() );
if( p && p->nRef==0 ){
+ int i;
assert( p->pInode==pFd->pInode );
if( p->mutex ) sqlite3_mutex_free(p->mutex);
- if( p->mutexBuf ) sqlite3_mutex_free(p->mutexBuf);
- if( p->pMMapBuf ) munmap(p->pMMapBuf, p->szMap);
+ for(i=0; i<p->nRegion; i++){
+ munmap(p->apRegion[i], p->szRegion);
+ }
+ sqlite3_free(p->apRegion);
if( p->h>=0 ) close(p->h);
p->pInode->pShmNode = 0;
sqlite3_free(p);
@@ -3345,11 +3341,6 @@ static int unixShmOpen(
rc = SQLITE_NOMEM;
goto shm_open_err;
}
- pShmNode->mutexBuf = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST);
- if( pShmNode->mutexBuf==0 ){
- rc = SQLITE_NOMEM;
- goto shm_open_err;
- }
pShmNode->h = open(pShmNode->zFilename, O_RDWR|O_CREAT, 0664);
if( pShmNode->h<0 ){
@@ -3420,7 +3411,6 @@ static int unixShmClose(
*pp = p->pNext;
/* Free the connection p */
- assert( p->hasMutexBuf==0 );
sqlite3_free(p);
pDbFd->pShm = 0;
sqlite3_mutex_leave(pShmNode->mutex);
@@ -3440,148 +3430,6 @@ static int unixShmClose(
}
/*
-** Changes the size of the underlying storage for a shared-memory segment.
-**
-** The reqSize parameter is the new requested size of the shared memory.
-** This implementation is free to increase the shared memory size to
-** any amount greater than or equal to reqSize. If the shared memory is
-** already as big or bigger as reqSize, this routine is a no-op.
-**
-** The reqSize parameter is the minimum size requested. The implementation
-** is free to expand the storage to some larger amount if it chooses.
-*/
-static int unixShmSize(
- sqlite3_file *fd, /* The open database file holding SHM */
- int reqSize, /* Requested size. -1 for query only */
- int *pNewSize /* Write new size here */
-){
- unixFile *pDbFd = (unixFile*)fd;
- unixShm *p = pDbFd->pShm;
- unixShmNode *pShmNode = p->pShmNode;
- int rc = SQLITE_OK;
- struct stat sStat;
-
- assert( pShmNode==pDbFd->pInode->pShmNode );
- assert( pShmNode->pInode==pDbFd->pInode );
-
- while( 1 ){
- if( fstat(pShmNode->h, &sStat)==0 ){
- *pNewSize = (int)sStat.st_size;
- if( reqSize<=(int)sStat.st_size ) break;
- }else{
- *pNewSize = 0;
- rc = SQLITE_IOERR_SHMSIZE;
- break;
- }
- rc = ftruncate(pShmNode->h, reqSize);
- reqSize = -1;
- }
- return rc;
-}
-
-/*
-** Release the lock held on the shared memory segment to that other
-** threads are free to resize it if necessary.
-**
-** If the lock is not currently held, this routine is a harmless no-op.
-**
-** If the shared-memory object is in lock state RECOVER, then we do not
-** really want to release the lock, so in that case too, this routine
-** is a no-op.
-*/
-static int unixShmRelease(sqlite3_file *fd){
- unixFile *pDbFd = (unixFile*)fd;
- unixShm *p = pDbFd->pShm;
-
- if( p->hasMutexBuf ){
- assert( sqlite3_mutex_notheld(p->pShmNode->mutex) );
- sqlite3_mutex_leave(p->pShmNode->mutexBuf);
- p->hasMutexBuf = 0;
- }
- return SQLITE_OK;
-}
-
-/*
-** Map the shared storage into memory.
-**
-** If reqMapSize is positive, then an attempt is made to make the
-** mapping at least reqMapSize bytes in size. However, the mapping
-** will never be larger than the size of the underlying shared memory
-** as set by prior calls to xShmSize().
-**
-** *ppBuf is made to point to the memory which is a mapping of the
-** underlying storage. A mutex is acquired to prevent other threads
-** from running while *ppBuf is in use in order to prevent other threads
-** remapping *ppBuf out from under this thread. The unixShmRelease()
-** call will release the mutex. However, if the lock state is CHECKPOINT,
-** the mutex is not acquired because CHECKPOINT will never remap the
-** buffer. RECOVER might remap, though, so CHECKPOINT will acquire
-** the mutex if and when it promotes to RECOVER.
-**
-** RECOVER needs to be atomic. The same mutex that prevents *ppBuf from
-** being remapped also prevents more than one thread from being in
-** RECOVER at a time. But, RECOVER sometimes wants to remap itself.
-** To prevent RECOVER from losing its lock while remapping, the
-** mutex is not released by unixShmRelease() when in RECOVER.
-**
-** *pNewMapSize is set to the size of the mapping. Usually *pNewMapSize
-** will be reqMapSize or larger, though it could be smaller if the
-** underlying shared memory has never been enlarged to reqMapSize bytes
-** by prior calls to xShmSize().
-**
-** *ppBuf might be NULL and zero if no space has
-** yet been allocated to the underlying storage.
-*/
-static int unixShmGet(
- sqlite3_file *fd, /* Database file holding shared memory */
- int reqMapSize, /* Requested size of mapping. -1 means don't care */
- int *pNewMapSize, /* Write new size of mapping here */
- void volatile **ppBuf /* Write mapping buffer origin here */
-){
- unixFile *pDbFd = (unixFile*)fd;
- unixShm *p = pDbFd->pShm;
- unixShmNode *pShmNode = p->pShmNode;
- int rc = SQLITE_OK;
-
- assert( pShmNode==pDbFd->pInode->pShmNode );
- assert( pShmNode->pInode==pDbFd->pInode );
-
- if( p->hasMutexBuf==0 ){
- assert( sqlite3_mutex_notheld(pShmNode->mutex) );
- sqlite3_mutex_enter(pShmNode->mutexBuf);
- p->hasMutexBuf = 1;
- }
- sqlite3_mutex_enter(pShmNode->mutex);
- if( pShmNode->szMap==0 || reqMapSize>pShmNode->szMap ){
- int actualSize;
- if( unixShmSize(fd, -1, &actualSize)!=SQLITE_OK ){
- actualSize = 0;
- }
- reqMapSize = actualSize;
- if( pShmNode->pMMapBuf || reqMapSize<=0 ){
- munmap(pShmNode->pMMapBuf, pShmNode->szMap);
- }
- if( reqMapSize>0 ){
- pShmNode->pMMapBuf = mmap(0, reqMapSize, PROT_READ|PROT_WRITE, MAP_SHARED,
- pShmNode->h, 0);
- pShmNode->szMap = pShmNode->pMMapBuf ? reqMapSize : 0;
- }else{
- pShmNode->pMMapBuf = 0;
- pShmNode->szMap = 0;
- }
- }
- *pNewMapSize = pShmNode->szMap;
- *ppBuf = pShmNode->pMMapBuf;
- sqlite3_mutex_leave(pShmNode->mutex);
- if( *ppBuf==0 ){
- /* Do not hold the mutex if a NULL pointer is being returned. */
- unixShmRelease(fd);
- }
- return rc;
-}
-
-
-/*
** Change the lock state for a shared-memory segment.
**
** Note that the relationship between SHAREd and EXCLUSIVE locks is a little
@@ -3700,21 +3548,114 @@ static int unixShmLock(
** any load or store begun after the barrier.
*/
static void unixShmBarrier(
- sqlite3_file *fd /* Database file holding the shared memory */
+ sqlite3_file *fd /* Database file holding the shared memory */
){
unixEnterMutex();
unixLeaveMutex();
}
+/*
+** This function is called to obtain a pointer to region iRegion of the
+** shared-memory associated with the database file fd. Shared-memory regions
+** are numbered starting from zero. Each shared-memory region is szRegion
+** bytes in size.
+**
+** If an error occurs, an error code is returned and *pp is set to NULL.
+**
+** Otherwise, if the isWrite parameter is 0 and the requested shared-memory
+** region has not been allocated (by any client, including one running in a
+** separate process), then *pp is set to NULL and SQLITE_OK returned. If
+** isWrite is non-zero and the requested shared-memory region has not yet
+** been allocated, it is allocated by this function.
+**
+** If the shared-memory region has already been allocated or is allocated by
+** this call as described above, then it is mapped into this processes
+** address space (if it is not already), *pp is set to point to the mapped
+** memory and SQLITE_OK returned.
+*/
+static int unixShmMap(
+ sqlite3_file *fd, /* Handle open on database file */
+ int iRegion, /* Region to retrieve */
+ int szRegion, /* Size of regions */
+ int isWrite, /* True to extend file if necessary */
+ void volatile **pp /* OUT: Mapped memory */
+){
+ unixFile *pDbFd = (unixFile*)fd;
+ unixShm *p = pDbFd->pShm;
+ unixShmNode *pShmNode = p->pShmNode;
+ int rc = SQLITE_OK;
+
+ sqlite3_mutex_enter(pShmNode->mutex);
+ assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 );
+
+ if( pShmNode->nRegion<=iRegion ){
+ char **apNew; /* New apRegion[] array */
+ int nByte = (iRegion+1)*szRegion; /* Minimum required file size */
+ struct stat sStat; /* Used by fstat() */
+
+ pShmNode->szRegion = szRegion;
+
+ /* The requested region is not mapped into this processes address space.
+ ** Check to see if it has been allocated (i.e. if the wal-index file is
+ ** large enough to contain the requested region).
+ */
+ if( fstat(pShmNode->h, &sStat) ){
+ rc = SQLITE_IOERR_SHMSIZE;
+ goto shmpage_out;
+ }
+
+ if( sStat.st_size<nByte ){
+ /* The requested memory region does not exist. If isWrite is set to
+ ** zero, exit early. *pp will be set to NULL and SQLITE_OK returned.
+ **
+ ** Alternatively, if isWrite is non-zero, use ftruncate() to allocate
+ ** the requested memory region.
+ */
+ if( !isWrite ) goto shmpage_out;
+ if( ftruncate(pShmNode->h, nByte) ){
+ rc = SQLITE_IOERR_SHMSIZE;
+ goto shmpage_out;
+ }
+ }
+
+ /* Map the requested memory region into this processes address space. */
+ apNew = (char **)sqlite3_realloc(
+ pShmNode->apRegion, (iRegion+1)*sizeof(char *)
+ );
+ if( !apNew ){
+ rc = SQLITE_IOERR_NOMEM;
+ goto shmpage_out;
+ }
+ pShmNode->apRegion = apNew;
+ while(pShmNode->nRegion<=iRegion){
+ void *pMem = mmap(0, szRegion, PROT_READ|PROT_WRITE,
+ MAP_SHARED, pShmNode->h, iRegion*szRegion
+ );
+ if( pMem==MAP_FAILED ){
+ rc = SQLITE_IOERR;
+ goto shmpage_out;
+ }
+ pShmNode->apRegion[pShmNode->nRegion] = pMem;
+ pShmNode->nRegion++;
+ }
+ }
+
+shmpage_out:
+ if( pShmNode->nRegion>iRegion ){
+ *pp = pShmNode->apRegion[iRegion];
+ }else{
+ *pp = 0;
+ }
+ sqlite3_mutex_leave(pShmNode->mutex);
+ return rc;
+}
#else
# define unixShmOpen 0
-# define unixShmSize 0
-# define unixShmGet 0
-# define unixShmRelease 0
# define unixShmLock 0
# define unixShmBarrier 0
# define unixShmClose 0
+# define unixShmMap 0
#endif /* #ifndef SQLITE_OMIT_WAL */
/*
@@ -3773,12 +3714,10 @@ static const sqlite3_io_methods METHOD = { \
unixSectorSize, /* xSectorSize */ \
unixDeviceCharacteristics, /* xDeviceCapabilities */ \
unixShmOpen, /* xShmOpen */ \
- unixShmSize, /* xShmSize */ \
- unixShmGet, /* xShmGet */ \
- unixShmRelease, /* xShmRelease */ \
unixShmLock, /* xShmLock */ \
unixShmBarrier, /* xShmBarrier */ \
- unixShmClose /* xShmClose */ \
+ unixShmClose, /* xShmClose */ \
+ unixShmMap /* xShmMap */ \
}; \
static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \
UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \
diff --git a/src/os_win.c b/src/os_win.c
index 1a9994b08..3a6b47771 100644
--- a/src/os_win.c
+++ b/src/os_win.c
@@ -1216,13 +1216,17 @@ static int winShmMutexHeld(void) {
*/
struct winShmNode {
sqlite3_mutex *mutex; /* Mutex to access this object */
- sqlite3_mutex *mutexBuf; /* Mutex to access zBuf[] */
char *zFilename; /* Name of the file */
winFile hFile; /* File handle from winOpen */
- HANDLE hMap; /* File handle from CreateFileMapping */
+
+ int szRegion; /* Size of shared-memory regions */
+ int nRegion; /* Size of array apRegion */
+ struct ShmRegion {
+ HANDLE hMap; /* File handle from CreateFileMapping */
+ void *pMap;
+ } *aRegion;
DWORD lastErrno; /* The Windows errno from the last I/O error */
- int szMap; /* Size of the mapping of file into memory */
- char *pMMapBuf; /* Where currently mmapped(). NULL if unmapped */
+
int nRef; /* Number of winShm objects pointing to this */
winShm *pFirst; /* All winShm objects pointing to this */
winShmNode *pNext; /* Next in list of all winShmNode objects */
@@ -1325,19 +1329,18 @@ static void winShmPurge(sqlite3_vfs *pVfs, int deleteFlag){
pp = &winShmNodeList;
while( (p = *pp)!=0 ){
if( p->nRef==0 ){
+ int i;
if( p->mutex ) sqlite3_mutex_free(p->mutex);
- if( p->mutexBuf ) sqlite3_mutex_free(p->mutexBuf);
- if( p->pMMapBuf ){
- UnmapViewOfFile(p->pMMapBuf);
- }
- if( INVALID_HANDLE_VALUE != p->hMap ){
- CloseHandle(p->hMap);
+ for(i=0; i<p->nRegion; i++){
+ UnmapViewOfFile(p->aRegion[i].pMap);
+ CloseHandle(p->aRegion[i].hMap);
}
if( p->hFile.h != INVALID_HANDLE_VALUE ) {
winClose((sqlite3_file *)&p->hFile);
}
if( deleteFlag ) winDelete(pVfs, p->zFilename, 0);
*pp = p->pNext;
+ sqlite3_free(p->aRegion);
sqlite3_free(p);
}else{
pp = &p->pNext;
@@ -1404,8 +1407,6 @@ static int winShmOpen(
}else{
pShmNode = pNew;
pNew = 0;
- pShmNode->pMMapBuf = NULL;
- pShmNode->hMap = INVALID_HANDLE_VALUE;
((winFile*)(&pShmNode->hFile))->h = INVALID_HANDLE_VALUE;
pShmNode->pNext = winShmNodeList;
winShmNodeList = pShmNode;
@@ -1415,11 +1416,6 @@ static int winShmOpen(
rc = SQLITE_NOMEM;
goto shm_open_err;
}
- pShmNode->mutexBuf = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST);
- if( pShmNode->mutexBuf==0 ){
- rc = SQLITE_NOMEM;
- goto shm_open_err;
- }
rc = winOpen(pDbFd->pVfs,
pShmNode->zFilename, /* Name of the file (UTF-8) */
(sqlite3_file*)&pShmNode->hFile, /* File handle here */
@@ -1507,171 +1503,113 @@ static int winShmClose(
}
/*
-** Increase the size of the underlying storage for a shared-memory segment.
+** This function is called to obtain a pointer to region iRegion of the
+** shared-memory associated with the database file fd. Shared-memory regions
+** are numbered starting from zero. Each shared-memory region is szRegion
+** bytes in size.
**
-** The reqSize parameter is the new requested minimum size of the underlying
-** shared memory. This routine may choose to make the shared memory larger
-** than this value (for example to round the shared memory size up to an
-** operating-system dependent page size.)
+** If an error occurs, an error code is returned and *pp is set to NULL.
**
-** This routine will only grow the size of shared memory. A request for
-** a smaller size is a no-op.
-*/
-static int winShmSize(
- sqlite3_file *fd, /* Database holding the shared memory */
- int reqSize, /* Requested size. -1 for query only */
- int *pNewSize /* Write new size here */
+** Otherwise, if the isWrite parameter is 0 and the requested shared-memory
+** region has not been allocated (by any client, including one running in a
+** separate process), then *pp is set to NULL and SQLITE_OK returned. If
+** isWrite is non-zero and the requested shared-memory region has not yet
+** been allocated, it is allocated by this function.
+**
+** If the shared-memory region has already been allocated or is allocated by
+** this call as described above, then it is mapped into this processes
+** address space (if it is not already), *pp is set to point to the mapped
+** memory and SQLITE_OK returned.
+*/
+static int winShmMap(
+ sqlite3_file *fd, /* Handle open on database file */
+ int iRegion, /* Region to retrieve */
+ int szRegion, /* Size of regions */
+ int isWrite, /* True to extend file if necessary */
+ void volatile **pp /* OUT: Mapped memory */
){
winFile *pDbFd = (winFile*)fd;
winShm *p = pDbFd->pShm;
winShmNode *pShmNode = p->pShmNode;
int rc = SQLITE_OK;
- *pNewSize = 0;
- if( reqSize>=0 ){
- sqlite3_int64 sz;
- rc = winFileSize((sqlite3_file *)&pShmNode->hFile, &sz);
- if( SQLITE_OK==rc && reqSize>sz ){
- rc = winTruncate((sqlite3_file *)&pShmNode->hFile, reqSize);
- }
- }
- if( SQLITE_OK==rc ){
- sqlite3_int64 sz;
+ sqlite3_mutex_enter(pShmNode->mutex);
+ assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 );
+
+ if( pShmNode->nRegion<=iRegion ){
+ struct ShmRegion *apNew; /* New aRegion[] array */
+ int nByte = (iRegion+1)*szRegion; /* Minimum required file size */
+ sqlite3_int64 sz; /* Current size of wal-index file */
+
+ pShmNode->szRegion = szRegion;
+
+ /* The requested region is not mapped into this processes address space.
+ ** Check to see if it has been allocated (i.e. if the wal-index file is
+ ** large enough to contain the requested region).
+ */
rc = winFileSize((sqlite3_file *)&pShmNode->hFile, &sz);
- if( SQLITE_OK==rc ){
- *pNewSize = (int)sz;
- }else{
- rc = SQLITE_IOERR;
+ if( rc!=SQLITE_OK ){
+ goto shmpage_out;
}
- }
- return rc;
-}
+ if( sz<nByte ){
+ /* The requested memory region does not exist. If isWrite is set to
+ ** zero, exit early. *pp will be set to NULL and SQLITE_OK returned.
+ **
+ ** Alternatively, if isWrite is non-zero, use ftruncate() to allocate
+ ** the requested memory region.
+ */
+ if( !isWrite ) goto shmpage_out;
+ rc = winTruncate((sqlite3_file *)&pShmNode->hFile, nByte);
+ if( rc!=SQLITE_OK ){
+ goto shmpage_out;
+ }
+ }
-/*
-** Map the shared storage into memory. The minimum size of the
-** mapping should be reqMapSize if reqMapSize is positive. If
-** reqMapSize is zero or negative, the implementation can choose
-** whatever mapping size is convenient.
-**
-** *ppBuf is made to point to the memory which is a mapping of the
-** underlying storage. A mutex is acquired to prevent other threads
-** from running while *ppBuf is in use in order to prevent other threads
-** remapping *ppBuf out from under this thread. The winShmRelease()
-** call will release the mutex. However, if the lock state is CHECKPOINT,
-** the mutex is not acquired because CHECKPOINT will never remap the
-** buffer. RECOVER might remap, though, so CHECKPOINT will acquire
-** the mutex if and when it promotes to RECOVER.
-**
-** RECOVER needs to be atomic. The same mutex that prevents *ppBuf from
-** being remapped also prevents more than one thread from being in
-** RECOVER at a time. But, RECOVER sometimes wants to remap itself.
-** To prevent RECOVER from losing its lock while remapping, the
-** mutex is not released by winShmRelease() when in RECOVER.
-**
-** *pNewMapSize is set to the size of the mapping.
-**
-** *ppBuf and *pNewMapSize might be NULL and zero if no space has
-** yet been allocated to the underlying storage.
-*/
-static int winShmGet(
- sqlite3_file *fd, /* The database file holding the shared memory */
- int reqMapSize, /* Requested size of mapping. -1 means don't care */
- int *pNewMapSize, /* Write new size of mapping here */
- void volatile **ppBuf /* Write mapping buffer origin here */
-){
- winFile *pDbFd = (winFile*)fd;
- winShm *p = pDbFd->pShm;
- winShmNode *pShmNode = p->pShmNode;
- int rc = SQLITE_OK;
-
- if( p->hasMutexBuf==0 ){
- assert( sqlite3_mutex_notheld(pShmNode->mutex) );
- sqlite3_mutex_enter(pShmNode->mutexBuf);
- p->hasMutexBuf = 1;
- }
- sqlite3_mutex_enter(pShmNode->mutex);
- if( pShmNode->szMap==0 || reqMapSize>pShmNode->szMap ){
- int actualSize;
- if( winShmSize(fd, -1, &actualSize)==SQLITE_OK
- && reqMapSize<actualSize
- ){
- reqMapSize = actualSize;
+ /* Map the requested memory region into this processes address space. */
+ apNew = (struct ShmRegion *)sqlite3_realloc(
+ pShmNode->aRegion, (iRegion+1)*sizeof(apNew[0])
+ );
+ if( !apNew ){
+ rc = SQLITE_IOERR_NOMEM;
+ goto shmpage_out;
}
- if( pShmNode->pMMapBuf ){
- if( !UnmapViewOfFile(pShmNode->pMMapBuf) ){
+ pShmNode->aRegion = apNew;
+
+ while( pShmNode->nRegion<=iRegion ){
+ HANDLE hMap; /* file-mapping handle */
+ void *pMap = 0; /* Mapped memory region */
+
+ hMap = CreateFileMapping(pShmNode->hFile.h,
+ NULL, PAGE_READWRITE, 0, nByte, NULL
+ );
+ if( hMap ){
+ pMap = MapViewOfFile(hMap, FILE_MAP_WRITE | FILE_MAP_READ,
+ 0, 0, nByte
+ );
+ }
+ if( !pMap ){
pShmNode->lastErrno = GetLastError();
rc = SQLITE_IOERR;
+ if( hMap ) CloseHandle(hMap);
+ goto shmpage_out;
}
- CloseHandle(pShmNode->hMap);
- pShmNode->hMap = INVALID_HANDLE_VALUE;
- }
- if( SQLITE_OK == rc ){
- pShmNode->pMMapBuf = 0;
- if( reqMapSize == 0 ){
- /* can't create 0 byte file mapping in Windows */
- pShmNode->szMap = 0;
- }else{
- /* create the file mapping object */
- if( INVALID_HANDLE_VALUE == pShmNode->hMap ){
- /* TBD provide an object name to each file
- ** mapping so it can be re-used across processes.
- */
- pShmNode->hMap = CreateFileMapping(pShmNode->hFile.h,
- NULL,
- PAGE_READWRITE,
- 0,
- reqMapSize,
- NULL);
- }
- if( NULL==pShmNode->hMap ){
- pShmNode->lastErrno = GetLastError();
- rc = SQLITE_IOERR;
- pShmNode->szMap = 0;
- pShmNode->hMap = INVALID_HANDLE_VALUE;
- }else{
- pShmNode->pMMapBuf = MapViewOfFile(pShmNode->hMap,
- FILE_MAP_WRITE | FILE_MAP_READ,
- 0,
- 0,
- reqMapSize);
- if( !pShmNode->pMMapBuf ){
- pShmNode->lastErrno = GetLastError();
- rc = SQLITE_IOERR;
- pShmNode->szMap = 0;
- }else{
- pShmNode->szMap = reqMapSize;
- }
- }
- }
+
+ pShmNode->aRegion[pShmNode->nRegion].pMap = pMap;
+ pShmNode->aRegion[pShmNode->nRegion].hMap = hMap;
+ pShmNode->nRegion++;
}
}
- *pNewMapSize = pShmNode->szMap;
- *ppBuf = pShmNode->pMMapBuf;
- sqlite3_mutex_leave(pShmNode->mutex);
- return rc;
-}
-/*
-** Release the lock held on the shared memory segment so that other
-** threads are free to resize it if necessary.
-**
-** If the lock is not currently held, this routine is a harmless no-op.
-**
-** If the shared-memory object is in lock state RECOVER, then we do not
-** really want to release the lock, so in that case too, this routine
-** is a no-op.
-*/
-static int winShmRelease(sqlite3_file *fd){
- winFile *pDbFd = (winFile*)fd;
- winShm *p = pDbFd->pShm;
- if( p->hasMutexBuf ){
- winShmNode *pShmNode = p->pShmNode;
- assert( sqlite3_mutex_notheld(pShmNode->mutex) );
- sqlite3_mutex_leave(pShmNode->mutexBuf);
- p->hasMutexBuf = 0;
+shmpage_out:
+ if( pShmNode->nRegion>iRegion ){
+ char *p = (char *)pShmNode->aRegion[iRegion].pMap;
+ *pp = (void *)&p[iRegion*szRegion];
+ }else{
+ *pp = 0;
}
- return SQLITE_OK;
+ sqlite3_mutex_leave(pShmNode->mutex);
+ return rc;
}
/*
@@ -1756,12 +1694,10 @@ static const sqlite3_io_methods winIoMethod = {
winSectorSize,
winDeviceCharacteristics,
winShmOpen, /* xShmOpen */
- winShmSize, /* xShmSize */
- winShmGet, /* xShmGet */
- winShmRelease, /* xShmRelease */
winShmLock, /* xShmLock */
winShmBarrier, /* xShmBarrier */
- winShmClose /* xShmClose */
+ winShmClose, /* xShmClose */
+ winShmMap /* xShmMap */
};
/***************************************************************************
diff --git a/src/sqlite.h.in b/src/sqlite.h.in
index e583e47d9..f6ec12b98 100644
--- a/src/sqlite.h.in
+++ b/src/sqlite.h.in
@@ -660,12 +660,10 @@ struct sqlite3_io_methods {
int (*xDeviceCharacteristics)(sqlite3_file*);
/* Methods above are valid for version 1 */
int (*xShmOpen)(sqlite3_file*);
- int (*xShmSize)(sqlite3_file*, int reqSize, int *pNewSize);
- int (*xShmGet)(sqlite3_file*, int reqSize, int *pSize, void volatile**);
- int (*xShmRelease)(sqlite3_file*);
int (*xShmLock)(sqlite3_file*, int offset, int n, int flags);
void (*xShmBarrier)(sqlite3_file*);
int (*xShmClose)(sqlite3_file*, int deleteFlag);
+ int (*xShmMap)(sqlite3_file*, int iPage, int pgsz, int, void volatile**);
/* Methods above are valid for version 2 */
/* Additional methods may be added in future releases */
};
diff --git a/src/test6.c b/src/test6.c
index 1dded82ef..d6e6db2c1 100644
--- a/src/test6.c
+++ b/src/test6.c
@@ -526,20 +526,6 @@ static int cfDeviceCharacteristics(sqlite3_file *pFile){
static int cfShmOpen(sqlite3_file *pFile){
return sqlite3OsShmOpen(((CrashFile*)pFile)->pRealFile);
}
-static int cfShmSize(sqlite3_file *pFile, int reqSize, int *pNew){
- return sqlite3OsShmSize(((CrashFile*)pFile)->pRealFile, reqSize, pNew);
-}
-static int cfShmGet(
- sqlite3_file *pFile,
- int reqSize,
- int *pSize,
- void volatile **pp
-){
- return sqlite3OsShmGet(((CrashFile*)pFile)->pRealFile, reqSize, pSize, pp);
-}
-static int cfShmRelease(sqlite3_file *pFile){
- return sqlite3OsShmRelease(((CrashFile*)pFile)->pRealFile);
-}
static int cfShmLock(sqlite3_file *pFile, int ofst, int n, int flags){
return sqlite3OsShmLock(((CrashFile*)pFile)->pRealFile, ofst, n, flags);
}
@@ -549,7 +535,15 @@ static void cfShmBarrier(sqlite3_file *pFile){
static int cfShmClose(sqlite3_file *pFile, int delFlag){
return sqlite3OsShmClose(((CrashFile*)pFile)->pRealFile, delFlag);
}
-
+static int cfShmMap(
+ sqlite3_file *pFile, /* Handle open on database file */
+ int iRegion, /* Region to retrieve */
+ int sz, /* Size of regions */
+ int w, /* True to extend file if necessary */
+ void volatile **pp /* OUT: Mapped memory */
+){
+ return sqlite3OsShmMap(((CrashFile*)pFile)->pRealFile, iRegion, sz, w, pp);
+}
static const sqlite3_io_methods CrashFileVtab = {
2, /* iVersion */
@@ -566,12 +560,10 @@ static const sqlite3_io_methods CrashFileVtab = {
cfSectorSize, /* xSectorSize */
cfDeviceCharacteristics, /* xDeviceCharacteristics */
cfShmOpen, /* xShmOpen */
- cfShmSize, /* xShmSize */
- cfShmGet, /* xShmGet */
- cfShmRelease, /* xShmRelease */
cfShmLock, /* xShmLock */
cfShmBarrier, /* xShmBarrier */
- cfShmClose /* xShmClose */
+ cfShmClose, /* xShmClose */
+ cfShmMap /* xShmMap */
};
/*
diff --git a/src/test_devsym.c b/src/test_devsym.c
index 046480493..98d6e2a30 100644
--- a/src/test_devsym.c
+++ b/src/test_devsym.c
@@ -51,12 +51,10 @@ static int devsymFileControl(sqlite3_file*, int op, void *pArg);
static int devsymSectorSize(sqlite3_file*);
static int devsymDeviceCharacteristics(sqlite3_file*);
static int devsymShmOpen(sqlite3_file*);
-static int devsymShmSize(sqlite3_file*,int,int*);
-static int devsymShmGet(sqlite3_file*,int,int*,volatile void**);
-static int devsymShmRelease(sqlite3_file*);
static int devsymShmLock(sqlite3_file*,int,int,int);
static void devsymShmBarrier(sqlite3_file*);
static int devsymShmClose(sqlite3_file*,int);
+static int devsymShmMap(sqlite3_file*,int,int,int, void volatile **);
/*
** Method declarations for devsym_vfs.
@@ -120,12 +118,10 @@ static sqlite3_io_methods devsym_io_methods = {
devsymSectorSize, /* xSectorSize */
devsymDeviceCharacteristics, /* xDeviceCharacteristics */
devsymShmOpen, /* xShmOpen */
- devsymShmSize, /* xShmSize */
- devsymShmGet, /* xShmGet */
- devsymShmRelease, /* xShmRelease */
devsymShmLock, /* xShmLock */
devsymShmBarrier, /* xShmBarrier */
- devsymShmClose /* xShmClose */
+ devsymShmClose, /* xShmClose */
+ devsymShmMap /* xShmMap */
};
struct DevsymGlobal {
@@ -246,23 +242,6 @@ static int devsymShmOpen(sqlite3_file *pFile){
devsym_file *p = (devsym_file *)pFile;
return sqlite3OsShmOpen(p->pReal);
}
-static int devsymShmSize(sqlite3_file *pFile, int reqSize, int *pSize){
- devsym_file *p = (devsym_file *)pFile;
- return sqlite3OsShmSize(p->pReal, reqSize, pSize);
-}
-static int devsymShmGet(
- sqlite3_file *pFile,
- int reqSz,
- int *pSize,
- void volatile **pp
-){
- devsym_file *p = (devsym_file *)pFile;
- return sqlite3OsShmGet(p->pReal, reqSz, pSize, pp);
-}
-static int devsymShmRelease(sqlite3_file *pFile){
- devsym_file *p = (devsym_file *)pFile;
- return sqlite3OsShmRelease(p->pReal);
-}
static int devsymShmLock(sqlite3_file *pFile, int ofst, int n, int flags){
devsym_file *p = (devsym_file *)pFile;
return sqlite3OsShmLock(p->pReal, ofst, n, flags);
@@ -275,6 +254,16 @@ static int devsymShmClose(sqlite3_file *pFile, int delFlag){
devsym_file *p = (devsym_file *)pFile;
return sqlite3OsShmClose(p->pReal, delFlag);
}
+static int devsymShmMap(
+ sqlite3_file *pFile,
+ int iRegion,
+ int szRegion,
+ int isWrite,
+ void volatile **pp
+){
+ devsym_file *p = (devsym_file *)pFile;
+ return sqlite3OsShmMap(p->pReal, iRegion, szRegion, isWrite, pp);
+}
diff --git a/src/test_osinst.c b/src/test_osinst.c
index b60f84a57..f97822a1a 100644
--- a/src/test_osinst.c
+++ b/src/test_osinst.c
@@ -100,11 +100,9 @@
#define OS_WRITE 20
#define OS_SHMOPEN 21
#define OS_SHMCLOSE 22
-#define OS_SHMGET 23
-#define OS_SHMRELEASE 24
+#define OS_SHMMAP 23
#define OS_SHMLOCK 25
#define OS_SHMBARRIER 26
-#define OS_SHMSIZE 27
#define OS_ANNOTATE 28
#define OS_NUMEVENTS 29
@@ -152,12 +150,10 @@ static int vfslogSectorSize(sqlite3_file*);
static int vfslogDeviceCharacteristics(sqlite3_file*);
static int vfslogShmOpen(sqlite3_file *pFile);
-static int vfslogShmSize(sqlite3_file *pFile, int reqSize, int *pNewSize);
-static int vfslogShmGet(sqlite3_file *pFile, int,int*,volatile void **);
-static int vfslogShmRelease(sqlite3_file *pFile);
static int vfslogShmLock(sqlite3_file *pFile, int ofst, int n, int flags);
static void vfslogShmBarrier(sqlite3_file*);
static int vfslogShmClose(sqlite3_file *pFile, int deleteFlag);
+static int vfslogShmMap(sqlite3_file *pFile,int,int,int,volatile void **);
/*
** Method declarations for vfslog_vfs.
@@ -216,12 +212,10 @@ static sqlite3_io_methods vfslog_io_methods = {
vfslogSectorSize, /* xSectorSize */
vfslogDeviceCharacteristics, /* xDeviceCharacteristics */
vfslogShmOpen, /* xShmOpen */
- vfslogShmSize, /* xShmSize */
- vfslogShmGet, /* xShmGet */
- vfslogShmRelease, /* xShmRelease */
vfslogShmLock, /* xShmLock */
vfslogShmBarrier, /* xShmBarrier */
- vfslogShmClose /* xShmClose */
+ vfslogShmClose, /* xShmClose */
+ vfslogShmMap /* xShmMap */
};
#if defined(SQLITE_OS_UNIX) && !defined(NO_GETTOD)
@@ -441,41 +435,6 @@ static int vfslogShmOpen(sqlite3_file *pFile){
vfslog_call(p->pVfslog, OS_SHMOPEN, p->iFileId, t, rc, 0, 0);
return rc;
}
-static int vfslogShmSize(sqlite3_file *pFile, int reqSize, int *pNewSize){
- int rc;
- sqlite3_uint64 t;
- VfslogFile *p = (VfslogFile *)pFile;
- t = vfslog_time();
- rc = p->pReal->pMethods->xShmSize(p->pReal, reqSize, pNewSize);
- t = vfslog_time() - t;
- vfslog_call(p->pVfslog, OS_SHMSIZE, p->iFileId, t, rc, 0, 0);
- return rc;
-}
-static int vfslogShmGet(
- sqlite3_file *pFile,
- int req,
- int *pSize,
- volatile void **pp
-){
- int rc;
- sqlite3_uint64 t;
- VfslogFile *p = (VfslogFile *)pFile;
- t = vfslog_time();
- rc = p->pReal->pMethods->xShmGet(p->pReal, req, pSize, pp);
- t = vfslog_time() - t;
- vfslog_call(p->pVfslog, OS_SHMGET, p->iFileId, t, rc, 0, 0);
- return rc;
-}
-static int vfslogShmRelease(sqlite3_file *pFile){
- int rc;
- sqlite3_uint64 t;
- VfslogFile *p = (VfslogFile *)pFile;
- t = vfslog_time();
- rc = p->pReal->pMethods->xShmRelease(p->pReal);
- t = vfslog_time() - t;
- vfslog_call(p->pVfslog, OS_SHMRELEASE, p->iFileId, t, rc, 0, 0);
- return rc;
-}
static int vfslogShmLock(sqlite3_file *pFile, int ofst, int n, int flags){
int rc;
sqlite3_uint64 t;
@@ -504,6 +463,22 @@ static int vfslogShmClose(sqlite3_file *pFile, int deleteFlag){
vfslog_call(p->pVfslog, OS_SHMCLOSE, p->iFileId, t, rc, 0, 0);
return rc;
}
+static int vfslogShmMap(
+ sqlite3_file *pFile,
+ int iRegion,
+ int szRegion,
+ int isWrite,
+ volatile void **pp
+){
+ int rc;
+ sqlite3_uint64 t;
+ VfslogFile *p = (VfslogFile *)pFile;
+ t = vfslog_time();
+ rc = p->pReal->pMethods->xShmMap(p->pReal, iRegion, szRegion, isWrite, pp);
+ t = vfslog_time() - t;
+ vfslog_call(p->pVfslog, OS_SHMMAP, p->iFileId, t, rc, 0, 0);
+ return rc;
+}
/*
@@ -826,11 +801,9 @@ static const char *vfslog_eventname(int eEvent){
case OS_SHMCLOSE: zEvent = "xShmClose"; break;
case OS_SHMOPEN: zEvent = "xShmOpen"; break;
- case OS_SHMGET: zEvent = "xShmGet"; break;
- case OS_SHMSIZE: zEvent = "xShmSize"; break;
- case OS_SHMRELEASE: zEvent = "xShmRelease"; break;
case OS_SHMLOCK: zEvent = "xShmLock"; break;
case OS_SHMBARRIER: zEvent = "xShmBarrier"; break;
+ case OS_SHMMAP: zEvent = "xShmMap"; break;
case OS_ANNOTATE: zEvent = "annotation"; break;
}
diff --git a/src/test_vfs.c b/src/test_vfs.c
index 1083080eb..89cc842ca 100644
--- a/src/test_vfs.c
+++ b/src/test_vfs.c
@@ -69,16 +69,17 @@ struct Testvfs {
** + Invoking the Tcl callback script.
*/
#define TESTVFS_SHMOPEN_MASK 0x00000001
-#define TESTVFS_SHMSIZE_MASK 0x00000002
-#define TESTVFS_SHMGET_MASK 0x00000004
-#define TESTVFS_SHMRELEASE_MASK 0x00000008
#define TESTVFS_SHMLOCK_MASK 0x00000010
#define TESTVFS_SHMBARRIER_MASK 0x00000020
#define TESTVFS_SHMCLOSE_MASK 0x00000040
+#define TESTVFS_SHMPAGE_MASK 0x00000080
-#define TESTVFS_OPEN_MASK 0x00000080
-#define TESTVFS_SYNC_MASK 0x00000100
-#define TESTVFS_ALL_MASK 0x000001FF
+#define TESTVFS_OPEN_MASK 0x00000100
+#define TESTVFS_SYNC_MASK 0x00000200
+#define TESTVFS_ALL_MASK 0x000003FF
+
+
+#define TESTVFS_MAX_PAGES 256
/*
** A shared-memory buffer. There is one of these objects for each shared
@@ -87,8 +88,8 @@ struct Testvfs {
*/
struct TestvfsBuffer {
char *zFile; /* Associated file name */
- int n; /* Size of allocated buffer in bytes */
- u8 *a; /* Buffer allocated using ckalloc() */
+ int pgsz; /* Page size */
+ u8 *aPage[TESTVFS_MAX_PAGES]; /* Array of ckalloc'd pages */
TestvfsFile *pFile; /* List of open handles */
TestvfsBuffer *pNext; /* Next in linked list of all buffers */
};
@@ -133,12 +134,10 @@ static int tvfsSleep(sqlite3_vfs*, int microseconds);
static int tvfsCurrentTime(sqlite3_vfs*, double*);
static int tvfsShmOpen(sqlite3_file*);
-static int tvfsShmSize(sqlite3_file*, int , int *);
-static int tvfsShmGet(sqlite3_file*, int , int *, volatile void **);
-static int tvfsShmRelease(sqlite3_file*);
static int tvfsShmLock(sqlite3_file*, int , int, int);
static void tvfsShmBarrier(sqlite3_file*);
static int tvfsShmClose(sqlite3_file*, int);
+static int tvfsShmPage(sqlite3_file*,int,int,int, void volatile **);
static sqlite3_io_methods tvfs_io_methods = {
2, /* iVersion */
@@ -155,12 +154,10 @@ static sqlite3_io_methods tvfs_io_methods = {
tvfsSectorSize, /* xSectorSize */
tvfsDeviceCharacteristics, /* xDeviceCharacteristics */
tvfsShmOpen, /* xShmOpen */
- tvfsShmSize, /* xShmSize */
- tvfsShmGet, /* xShmGet */
- tvfsShmRelease, /* xShmRelease */
tvfsShmLock, /* xShmLock */
tvfsShmBarrier, /* xShmBarrier */
- tvfsShmClose /* xShmClose */
+ tvfsShmClose, /* xShmClose */
+ tvfsShmPage /* xShmPage */
};
static int tvfsResultCode(Testvfs *p, int *pRc){
@@ -443,12 +440,10 @@ static int tvfsOpen(
memcpy(pMethods, &tvfs_io_methods, sizeof(sqlite3_io_methods));
if( ((Testvfs *)pVfs->pAppData)->isNoshm ){
pMethods->xShmOpen = 0;
- pMethods->xShmGet = 0;
- pMethods->xShmSize = 0;
- pMethods->xShmRelease = 0;
pMethods->xShmClose = 0;
pMethods->xShmLock = 0;
pMethods->xShmBarrier = 0;
+ pMethods->xShmMap = 0;
}
pFile->pMethods = pMethods;
}
@@ -547,16 +542,6 @@ static int tvfsCurrentTime(sqlite3_vfs *pVfs, double *pTimeOut){
return PARENTVFS(pVfs)->xCurrentTime(PARENTVFS(pVfs), pTimeOut);
}
-static void tvfsGrowBuffer(TestvfsFile *pFd, int reqSize, int *pNewSize){
- TestvfsBuffer *pBuffer = pFd->pShm;
- if( reqSize>pBuffer->n ){
- pBuffer->a = (u8 *)ckrealloc((char *)pBuffer->a, reqSize);
- memset(&pBuffer->a[pBuffer->n], 0x55, reqSize-pBuffer->n);
- pBuffer->n = reqSize;
- }
- *pNewSize = pBuffer->n;
-}
-
static int tvfsInjectIoerr(Testvfs *p){
int ret = 0;
if( p->ioerr ){
@@ -619,71 +604,51 @@ static int tvfsShmOpen(
return SQLITE_OK;
}
-static int tvfsShmSize(
- sqlite3_file *pFile,
- int reqSize,
- int *pNewSize
-){
- int rc = SQLITE_OK;
- TestvfsFile *pFd = (TestvfsFile *)pFile;
- Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData);
-
- if( p->pScript && p->mask&TESTVFS_SHMSIZE_MASK ){
- tvfsExecTcl(p, "xShmSize",
- Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId, 0
- );
- tvfsResultCode(p, &rc);
- }
- if( rc==SQLITE_OK && p->mask&TESTVFS_SHMSIZE_MASK && tvfsInjectIoerr(p) ){
- rc = SQLITE_IOERR;
- }
- if( rc==SQLITE_OK ){
- tvfsGrowBuffer(pFd, reqSize, pNewSize);
+static void tvfsAllocPage(TestvfsBuffer *p, int iPage, int pgsz){
+ assert( iPage<TESTVFS_MAX_PAGES );
+ if( p->aPage[iPage]==0 ){
+ p->aPage[iPage] = (u8 *)ckalloc(pgsz);
+ memset(p->aPage[iPage], 0, pgsz);
+ p->pgsz = pgsz;
}
- return rc;
}
-static int tvfsShmGet(
- sqlite3_file *pFile,
- int reqMapSize,
- int *pMapSize,
- volatile void **pp
+static int tvfsShmPage(
+ sqlite3_file *pFile, /* Handle open on database file */
+ int iPage, /* Page to retrieve */
+ int pgsz, /* Size of pages */
+ int isWrite, /* True to extend file if necessary */
+ void volatile **pp /* OUT: Mapped memory */
){
int rc = SQLITE_OK;
TestvfsFile *pFd = (TestvfsFile *)pFile;
Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData);
- if( p->pScript && p->mask&TESTVFS_SHMGET_MASK ){
- tvfsExecTcl(p, "xShmGet",
- Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId,
- Tcl_NewIntObj(reqMapSize)
+ if( p->pScript && p->mask&TESTVFS_SHMPAGE_MASK ){
+ Tcl_Obj *pArg = Tcl_NewObj();
+ Tcl_IncrRefCount(pArg);
+ Tcl_ListObjAppendElement(p->interp, pArg, Tcl_NewIntObj(iPage));
+ Tcl_ListObjAppendElement(p->interp, pArg, Tcl_NewIntObj(pgsz));
+ Tcl_ListObjAppendElement(p->interp, pArg, Tcl_NewIntObj(isWrite));
+ tvfsExecTcl(p, "xShmPage",
+ Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId, pArg
);
tvfsResultCode(p, &rc);
+ Tcl_DecrRefCount(pArg);
}
- if( rc==SQLITE_OK && p->mask&TESTVFS_SHMGET_MASK && tvfsInjectIoerr(p) ){
+ if( rc==SQLITE_OK && p->mask&TESTVFS_SHMPAGE_MASK && tvfsInjectIoerr(p) ){
rc = SQLITE_IOERR;
}
- *pMapSize = pFd->pShm->n;
- *pp = pFd->pShm->a;
- return rc;
-}
-
-static int tvfsShmRelease(sqlite3_file *pFile){
- int rc = SQLITE_OK;
- TestvfsFile *pFd = (TestvfsFile *)pFile;
- Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData);
-
- if( p->pScript && p->mask&TESTVFS_SHMRELEASE_MASK ){
- tvfsExecTcl(p, "xShmRelease",
- Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId, 0
- );
- tvfsResultCode(p, &rc);
+ if( rc==SQLITE_OK && isWrite && !pFd->pShm->aPage[iPage] ){
+ tvfsAllocPage(pFd->pShm, iPage, pgsz);
}
+ *pp = (void volatile *)pFd->pShm->aPage[iPage];
return rc;
}
+
static int tvfsShmLock(
sqlite3_file *pFile,
int ofst,
@@ -782,10 +747,13 @@ static int tvfsShmClose(
*ppFd = pFd->pNext;
if( pBuffer->pFile==0 ){
+ int i;
TestvfsBuffer **pp;
for(pp=&p->pBuffer; *pp!=pBuffer; pp=&((*pp)->pNext));
*pp = (*pp)->pNext;
- ckfree((char *)pBuffer->a);
+ for(i=0; pBuffer->aPage[i]; i++){
+ ckfree((char *)pBuffer->aPage[i]);
+ }
ckfree((char *)pBuffer);
}
pFd->pShm = 0;
@@ -821,28 +789,46 @@ static int testvfs_obj_cmd(
switch( (enum DB_enum)i ){
case CMD_SHM: {
+ Tcl_Obj *pObj;
+ int i;
TestvfsBuffer *pBuffer;
char *zName;
if( objc!=3 && objc!=4 ){
Tcl_WrongNumArgs(interp, 2, objv, "FILE ?VALUE?");
return TCL_ERROR;
}
- zName = Tcl_GetString(objv[2]);
+ zName = ckalloc(p->pParent->mxPathname);
+ p->pParent->xFullPathname(
+ p->pParent, Tcl_GetString(objv[2]),
+ p->pParent->mxPathname, zName
+ );
for(pBuffer=p->pBuffer; pBuffer; pBuffer=pBuffer->pNext){
if( 0==strcmp(pBuffer->zFile, zName) ) break;
}
+ ckfree(zName);
if( !pBuffer ){
- Tcl_AppendResult(interp, "no such file: ", zName, 0);
+ Tcl_AppendResult(interp, "no such file: ", Tcl_GetString(objv[2]), 0);
return TCL_ERROR;
}
if( objc==4 ){
int n;
u8 *a = Tcl_GetByteArrayFromObj(objv[3], &n);
- pBuffer->a = (u8 *)ckrealloc((char *)pBuffer->a, n);
- pBuffer->n = n;
- memcpy(pBuffer->a, a, n);
+ assert( pBuffer->pgsz==0 || pBuffer->pgsz==32768 );
+ for(i=0; i*32768<n; i++){
+ int nByte = 32768;
+ tvfsAllocPage(pBuffer, i, 32768);
+ if( n-i*32768<32768 ){
+ nByte = n;
+ }
+ memcpy(pBuffer->aPage[i], &a[i*32768], nByte);
+ }
}
- Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(pBuffer->a, pBuffer->n));
+
+ pObj = Tcl_NewObj();
+ for(i=0; pBuffer->aPage[i]; i++){
+ Tcl_AppendObjToObj(pObj, Tcl_NewByteArrayObj(pBuffer->aPage[i], 32768));
+ }
+ Tcl_SetObjResult(interp, pObj);
break;
}
@@ -852,12 +838,10 @@ static int testvfs_obj_cmd(
int mask;
} vfsmethod [] = {
{ "xShmOpen", TESTVFS_SHMOPEN_MASK },
- { "xShmSize", TESTVFS_SHMSIZE_MASK },
- { "xShmGet", TESTVFS_SHMGET_MASK },
- { "xShmRelease", TESTVFS_SHMRELEASE_MASK },
{ "xShmLock", TESTVFS_SHMLOCK_MASK },
{ "xShmBarrier", TESTVFS_SHMBARRIER_MASK },
{ "xShmClose", TESTVFS_SHMCLOSE_MASK },
+ { "xShmPage", TESTVFS_SHMPAGE_MASK },
{ "xSync", TESTVFS_SYNC_MASK },
{ "xOpen", TESTVFS_OPEN_MASK },
};
@@ -899,6 +883,7 @@ static int testvfs_obj_cmd(
ckfree((char *)p->apScript);
p->apScript = 0;
p->nScript = 0;
+ p->pScript = 0;
}
Tcl_GetStringFromObj(objv[2], &nByte);
if( nByte>0 ){
@@ -1071,6 +1056,13 @@ static int testvfs_cmd(
p = (Testvfs *)ckalloc(nByte);
memset(p, 0, nByte);
+ /* Create the new object command before querying SQLite for a default VFS
+ ** to use for 'real' IO operations. This is because creating the new VFS
+ ** may delete an existing [testvfs] VFS of the same name. If such a VFS
+ ** is currently the default, the new [testvfs] may end up calling the
+ ** methods of a deleted object.
+ */
+ Tcl_CreateObjCommand(interp, zVfs, testvfs_obj_cmd, p, testvfs_obj_del);
p->pParent = sqlite3_vfs_find(0);
p->interp = interp;
@@ -1087,7 +1079,6 @@ static int testvfs_cmd(
p->isNoshm = isNoshm;
p->mask = TESTVFS_ALL_MASK;
- Tcl_CreateObjCommand(interp, zVfs, testvfs_obj_cmd, p, testvfs_obj_del);
sqlite3_vfs_register(pVfs, isDefault);
return TCL_OK;
diff --git a/src/wal.c b/src/wal.c
index 017b84490..775f9556b 100644
--- a/src/wal.c
+++ b/src/wal.c
@@ -141,21 +141,33 @@
** more index blocks.
**
** The wal-index header contains the total number of frames within the WAL
-** in the the mxFrame field. Each index block contains information on
-** HASHTABLE_NPAGE frames. Each index block contains two sections, a
-** mapping which is a database page number for each frame, and a hash
-** table used to look up frames by page number. The mapping section is
-** an array of HASHTABLE_NPAGE 32-bit page numbers. The first entry on the
-** array is the page number for the first frame; the second entry is the
-** page number for the second frame; and so forth. The last index block
-** holds a total of (mxFrame%HASHTABLE_NPAGE) page numbers. All index
-** blocks other than the last are completely full with HASHTABLE_NPAGE
-** page numbers. All index blocks are the same size; the mapping section
-** of the last index block merely contains unused entries if mxFrame is
-** not an even multiple of HASHTABLE_NPAGE.
+** in the the mxFrame field.
+**
+** Each index block except for the first contains information on
+** HASHTABLE_NPAGE frames. The first index block contains information on
+** HASHTABLE_NPAGE_ONE frames. The values of HASHTABLE_NPAGE_ONE and
+** HASHTABLE_NPAGE are selected so that together the wal-index header and
+** first index block are the same size as all other index blocks in the
+** wal-index.
+**
+** Each index block contains two sections, a page-mapping that contains the
+** database page number associated with each wal frame, and a hash-table
+** that allows users to query an index block for a specific page number.
+** The page-mapping is an array of HASHTABLE_NPAGE (or HASHTABLE_NPAGE_ONE
+** for the first index block) 32-bit page numbers. The first entry in the
+** first index-block contains the database page number corresponding to the
+** first frame in the WAL file. The first entry in the second index block
+** in the WAL file corresponds to the (HASHTABLE_NPAGE_ONE+1)th frame in
+** the log, and so on.
+**
+** The last index block in a wal-index usually contains less than the full
+** complement of HASHTABLE_NPAGE (or HASHTABLE_NPAGE_ONE) page-numbers,
+** depending on the contents of the WAL file. This does not change the
+** allocated size of the page-mapping array - the page-mapping array merely
+** contains unused entries.
**
** Even without using the hash table, the last frame for page P
-** can be found by scanning the mapping sections of each index block
+** can be found by scanning the page-mapping sections of each index block
** starting with the last index block and moving toward the first, and
** within each index block, starting at the end and moving toward the
** beginning. The first entry that equals P corresponds to the frame
@@ -370,8 +382,8 @@ struct Wal {
sqlite3_file *pDbFd; /* File handle for the database file */
sqlite3_file *pWalFd; /* File handle for WAL file */
u32 iCallback; /* Value to pass to log callback (or 0) */
- int szWIndex; /* Size of the wal-index that is mapped in mem */
- volatile u32 *pWiData; /* Pointer to wal-index content in memory */
+ int nWiData; /* Size of array apWiData */
+ volatile u32 **apWiData; /* Pointer to wal-index content in memory */
u16 szPage; /* Database page size */
i16 readLock; /* Which read lock is being held. -1 for none */
u8 exclusiveMode; /* Non-zero if connection is in exclusive mode */
@@ -387,13 +399,10 @@ struct Wal {
};
/*
-** Return a pointer to the WalCkptInfo structure in the wal-index.
+** Each page of the wal-index mapping contains a hash-table made up of
+** an array of HASHTABLE_NSLOT elements of the following type.
*/
-static volatile WalCkptInfo *walCkptInfo(Wal *pWal){
- assert( pWal->pWiData!=0 );
- return (volatile WalCkptInfo*)&pWal->pWiData[sizeof(WalIndexHdr)/2];
-}
-
+typedef u16 ht_slot;
/*
** This structure is used to implement an iterator that loops through
@@ -411,17 +420,96 @@ static volatile WalCkptInfo *walCkptInfo(Wal *pWal){
** This functionality is used by the checkpoint code (see walCheckpoint()).
*/
struct WalIterator {
- int iPrior; /* Last result returned from the iterator */
- int nSegment; /* Size of the aSegment[] array */
- int nFinal; /* Elements in aSegment[nSegment-1] */
+ int iPrior; /* Last result returned from the iterator */
+ int nSegment; /* Size of the aSegment[] array */
struct WalSegment {
- int iNext; /* Next slot in aIndex[] not previously returned */
- u8 *aIndex; /* i0, i1, i2... such that aPgno[iN] ascending */
- u32 *aPgno; /* 256 page numbers. Pointer to Wal.pWiData */
- } aSegment[1]; /* One for every 256 entries in the WAL */
+ int iNext; /* Next slot in aIndex[] not yet returned */
+ ht_slot *aIndex; /* i0, i1, i2... such that aPgno[iN] ascend */
+ u32 *aPgno; /* Array of page numbers. */
+ int nEntry; /* Max size of aPgno[] and aIndex[] arrays */
+ int iZero; /* Frame number associated with aPgno[0] */
+ } aSegment[1]; /* One for every 32KB page in the WAL */
};
/*
+** Define the parameters of the hash tables in the wal-index file. There
+** is a hash-table following every HASHTABLE_NPAGE page numbers in the
+** wal-index.
+**
+** Changing any of these constants will alter the wal-index format and
+** create incompatibilities.
+*/
+#define HASHTABLE_NPAGE 4096 /* Must be power of 2 */
+#define HASHTABLE_HASH_1 383 /* Should be prime */
+#define HASHTABLE_NSLOT (HASHTABLE_NPAGE*2) /* Must be a power of 2 */
+
+/*
+** The block of page numbers associated with the first hash-table in a
+** wal-index is smaller than usual. This is so that there is a complete
+** hash-table on each aligned 32KB page of the wal-index.
+*/
+#define HASHTABLE_NPAGE_ONE (HASHTABLE_NPAGE - (WALINDEX_HDR_SIZE/sizeof(u32)))
+
+/* The wal-index is divided into pages of WALINDEX_PGSZ bytes each. */
+#define WALINDEX_PGSZ ( \
+ sizeof(ht_slot)*HASHTABLE_NSLOT + HASHTABLE_NPAGE*sizeof(u32) \
+)
+
+/*
+** Obtain a pointer to the iPage'th page of the wal-index. The wal-index
+** is broken into pages of WALINDEX_PGSZ bytes. Wal-index pages are
+** numbered from zero.
+**
+** If this call is successful, *ppPage is set to point to the wal-index
+** page and SQLITE_OK is returned. If an error (an OOM or VFS error) occurs,
+** then an SQLite error code is returned and *ppPage is set to 0.
+*/
+static int walIndexPage(Wal *pWal, int iPage, volatile u32 **ppPage){
+ int rc = SQLITE_OK;
+
+ /* Enlarge the pWal->apWiData[] array if required */
+ if( pWal->nWiData<=iPage ){
+ int nByte = sizeof(u32 *)*(iPage+1);
+ volatile u32 **apNew;
+ apNew = (volatile u32 **)sqlite3_realloc(pWal->apWiData, nByte);
+ if( !apNew ){
+ *ppPage = 0;
+ return SQLITE_NOMEM;
+ }
+ memset(&apNew[pWal->nWiData], 0, sizeof(u32 *)*(iPage+1-pWal->nWiData));
+ pWal->apWiData = apNew;
+ pWal->nWiData = iPage+1;
+ }
+
+ /* Request a pointer to the required page from the VFS */
+ if( pWal->apWiData[iPage]==0 ){
+ rc = sqlite3OsShmMap(pWal->pDbFd, iPage, WALINDEX_PGSZ,
+ pWal->writeLock, (void volatile **)&pWal->apWiData[iPage]
+ );
+ }
+
+ *ppPage = pWal->apWiData[iPage];
+ assert( iPage==0 || *ppPage || rc!=SQLITE_OK );
+ return rc;
+}
+
+/*
+** Return a pointer to the WalCkptInfo structure in the wal-index.
+*/
+static volatile WalCkptInfo *walCkptInfo(Wal *pWal){
+ assert( pWal->nWiData>0 && pWal->apWiData[0] );
+ return (volatile WalCkptInfo*)&(pWal->apWiData[0][sizeof(WalIndexHdr)/2]);
+}
+
+/*
+** Return a pointer to the WalIndexHdr structure in the wal-index.
+*/
+static volatile WalIndexHdr *walIndexHdr(Wal *pWal){
+ assert( pWal->nWiData>0 && pWal->apWiData[0] );
+ return (volatile WalIndexHdr*)pWal->apWiData[0];
+}
+
+/*
** The argument to this macro must be of type u32. On a little-endian
** architecture, it returns the u32 value that results from interpreting
** the 4 bytes as a big-endian value. On a big-endian architecture, it
@@ -486,16 +574,15 @@ static void walChecksumBytes(
** The checksum on pWal->hdr is updated before it is written.
*/
static void walIndexWriteHdr(Wal *pWal){
- WalIndexHdr *aHdr;
+ volatile WalIndexHdr *aHdr = walIndexHdr(pWal);
+ const int nCksum = offsetof(WalIndexHdr, aCksum);
assert( pWal->writeLock );
pWal->hdr.isInit = 1;
- walChecksumBytes(1, (u8*)&pWal->hdr, offsetof(WalIndexHdr, aCksum),
- 0, pWal->hdr.aCksum);
- aHdr = (WalIndexHdr*)pWal->pWiData;
- memcpy(&aHdr[1], &pWal->hdr, sizeof(WalIndexHdr));
+ walChecksumBytes(1, (u8*)&pWal->hdr, nCksum, 0, pWal->hdr.aCksum);
+ memcpy((void *)&aHdr[1], (void *)&pWal->hdr, sizeof(WalIndexHdr));
sqlite3OsShmBarrier(pWal->pDbFd);
- memcpy(&aHdr[0], &pWal->hdr, sizeof(WalIndexHdr));
+ memcpy((void *)&aHdr[0], (void *)&pWal->hdr, sizeof(WalIndexHdr));
}
/*
@@ -586,19 +673,6 @@ static int walDecodeFrame(
return 1;
}
-/*
-** Define the parameters of the hash tables in the wal-index file. There
-** is a hash-table following every HASHTABLE_NPAGE page numbers in the
-** wal-index.
-**
-** Changing any of these constants will alter the wal-index format and
-** create incompatibilities.
-*/
-#define HASHTABLE_NPAGE 4096 /* Must be power of 2 and multiple of 256 */
-#define HASHTABLE_DATATYPE u16
-#define HASHTABLE_HASH_1 383 /* Should be prime */
-#define HASHTABLE_NSLOT (HASHTABLE_NPAGE*2) /* Must be a power of 2 */
-#define HASHTABLE_NBYTE (sizeof(HASHTABLE_DATATYPE)*HASHTABLE_NSLOT)
#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
/*
@@ -664,96 +738,6 @@ static void walUnlockExclusive(Wal *pWal, int lockIdx, int n){
}
/*
-** Return the index in the Wal.pWiData array that corresponds to
-** frame iFrame.
-**
-** Wal.pWiData is an array of u32 elements that is the wal-index.
-** The array begins with a header and is then followed by alternating
-** "map" and "hash-table" blocks. Each "map" block consists of
-** HASHTABLE_NPAGE u32 elements which are page numbers corresponding
-** to frames in the WAL file.
-**
-** This routine returns an index X such that Wal.pWiData[X] is part
-** of a "map" block that contains the page number of the iFrame-th
-** frame in the WAL file.
-*/
-static int walIndexEntry(u32 iFrame){
- return (
- (WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED)/sizeof(u32)
- + (((iFrame-1)/HASHTABLE_NPAGE) * HASHTABLE_NBYTE)/sizeof(u32)
- + (iFrame-1)
- );
-}
-
-/*
-** Return the minimum size of the shared-memory, in bytes, that is needed
-** to support a wal-index containing frame iFrame. The value returned
-** includes the wal-index header and the complete "block" containing iFrame,
-** including the hash table segment that follows the block.
-*/
-static int walMappingSize(u32 iFrame){
- const int nByte = (sizeof(u32)*HASHTABLE_NPAGE + HASHTABLE_NBYTE) ;
- return ( WALINDEX_LOCK_OFFSET
- + WALINDEX_LOCK_RESERVED
- + nByte * ((iFrame + HASHTABLE_NPAGE - 1)/HASHTABLE_NPAGE)
- );
-}
-
-/*
-** Release our reference to the wal-index memory map, if we are holding
-** it.
-*/
-static void walIndexUnmap(Wal *pWal){
- if( pWal->pWiData ){
- sqlite3OsShmRelease(pWal->pDbFd);
- }
- pWal->pWiData = 0;
- pWal->szWIndex = -1;
-}
-
-/*
-** Map the wal-index file into memory if it isn't already.
-**
-** The reqSize parameter is the requested size of the mapping. The
-** mapping will be at least this big if the underlying storage is
-** that big. But the mapping will never grow larger than the underlying
-** storage. Use the walIndexRemap() to enlarget the storage space.
-*/
-static int walIndexMap(Wal *pWal, int reqSize){
- int rc = SQLITE_OK;
- if( pWal->pWiData==0 || reqSize>pWal->szWIndex ){
- walIndexUnmap(pWal);
- rc = sqlite3OsShmGet(pWal->pDbFd, reqSize, &pWal->szWIndex,
- (void volatile**)(char volatile*)&pWal->pWiData);
- if( rc!=SQLITE_OK ){
- walIndexUnmap(pWal);
- }
- }
- return rc;
-}
-
-/*
-** Enlarge the wal-index to be at least enlargeTo bytes in size and
-** Remap the wal-index so that the mapping covers the full size
-** of the underlying file.
-**
-** If enlargeTo is non-negative, then increase the size of the underlying
-** storage to be at least as big as enlargeTo before remapping.
-*/
-static int walIndexRemap(Wal *pWal, int enlargeTo){
- int rc;
- int sz;
- assert( pWal->writeLock );
- rc = sqlite3OsShmSize(pWal->pDbFd, enlargeTo, &sz);
- if( rc==SQLITE_OK && sz>pWal->szWIndex ){
- walIndexUnmap(pWal);
- rc = walIndexMap(pWal, sz);
- }
- assert( pWal->szWIndex>=enlargeTo || rc!=SQLITE_OK );
- return rc;
-}
-
-/*
** Compute a hash on a page number. The resulting hash value must land
** between 0 and (HASHTABLE_NSLOT-1). The walHashNext() function advances
** the hash to the next value in the event of a collision.
@@ -767,10 +751,10 @@ static int walNextHash(int iPriorHash){
return (iPriorHash+1)&(HASHTABLE_NSLOT-1);
}
-
/*
-** Find the hash table and (section of the) page number array used to
-** store data for WAL frame iFrame.
+** Return pointers to the hash table and page number array stored on
+** page iHash of the wal-index. The wal-index is broken into 32KB pages
+** numbered starting from 0.
**
** Set output variable *paHash to point to the start of the hash table
** in the wal-index file. Set *piZero to one less than the frame
@@ -778,38 +762,67 @@ static int walNextHash(int iPriorHash){
** slot in the hash table is set to N, it refers to frame number
** (*piZero+N) in the log.
**
-** Finally, set *paPgno such that for all frames F between (*piZero+1) and
-** (*piZero+HASHTABLE_NPAGE), (*paPgno)[F] is the database page number
-** associated with frame F.
+** Finally, set *paPgno so that *paPgno[1] is the page number of the
+** first frame indexed by the hash table, frame (*piZero+1).
*/
-static void walHashFind(
+static int walHashGet(
Wal *pWal, /* WAL handle */
- u32 iFrame, /* Find the hash table indexing this frame */
- volatile HASHTABLE_DATATYPE **paHash, /* OUT: Pointer to hash index */
+ int iHash, /* Find the iHash'th table */
+ volatile ht_slot **paHash, /* OUT: Pointer to hash index */
volatile u32 **paPgno, /* OUT: Pointer to page number array */
u32 *piZero /* OUT: Frame associated with *paPgno[0] */
){
- u32 iZero;
+ int rc; /* Return code */
volatile u32 *aPgno;
- volatile HASHTABLE_DATATYPE *aHash;
- iZero = ((iFrame-1)/HASHTABLE_NPAGE) * HASHTABLE_NPAGE;
- aPgno = &pWal->pWiData[walIndexEntry(iZero+1)-iZero-1];
- aHash = (HASHTABLE_DATATYPE *)&aPgno[iZero+HASHTABLE_NPAGE+1];
+ rc = walIndexPage(pWal, iHash, &aPgno);
+ assert( rc==SQLITE_OK || iHash>0 );
- /* Assert that:
- **
- ** + the mapping is large enough for this hash-table, and
- **
- ** + that aPgno[iZero+1] really is the database page number associated
- ** with the first frame indexed by this hash table.
- */
- assert( (u32*)(&aHash[HASHTABLE_NSLOT])<=&pWal->pWiData[pWal->szWIndex/4] );
- assert( walIndexEntry(iZero+1)==(&aPgno[iZero+1] - pWal->pWiData) );
+ if( rc==SQLITE_OK ){
+ u32 iZero;
+ volatile ht_slot *aHash;
+
+ aHash = (volatile ht_slot *)&aPgno[HASHTABLE_NPAGE];
+ if( iHash==0 ){
+ aPgno = &aPgno[WALINDEX_HDR_SIZE/sizeof(u32)];
+ iZero = 0;
+ }else{
+ iZero = HASHTABLE_NPAGE_ONE + (iHash-1)*HASHTABLE_NPAGE;
+ }
+
+ *paPgno = &aPgno[-1];
+ *paHash = aHash;
+ *piZero = iZero;
+ }
+ return rc;
+}
- *paHash = aHash;
- *paPgno = aPgno;
- *piZero = iZero;
+/*
+** Return the number of the wal-index page that contains the hash-table
+** and page-number array that contain entries corresponding to WAL frame
+** iFrame. The wal-index is broken up into 32KB pages. Wal-index pages
+** are numbered starting from 0.
+*/
+static int walFramePage(u32 iFrame){
+ int iHash = (iFrame+HASHTABLE_NPAGE-HASHTABLE_NPAGE_ONE-1) / HASHTABLE_NPAGE;
+ assert( (iHash==0 || iFrame>HASHTABLE_NPAGE_ONE)
+ && (iHash>=1 || iFrame<=HASHTABLE_NPAGE_ONE)
+ && (iHash<=1 || iFrame>(HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE))
+ && (iHash>=2 || iFrame<=HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE)
+ && (iHash<=2 || iFrame>(HASHTABLE_NPAGE_ONE+2*HASHTABLE_NPAGE))
+ );
+ return iHash;
+}
+
+/*
+** Return the page number associated with frame iFrame in this WAL.
+*/
+static u32 walFramePgno(Wal *pWal, u32 iFrame){
+ int iHash = walFramePage(iFrame);
+ if( iHash==0 ){
+ return pWal->apWiData[0][WALINDEX_HDR_SIZE/sizeof(u32) + iFrame - 1];
+ }
+ return pWal->apWiData[iHash][(iFrame-1-HASHTABLE_NPAGE_ONE)%HASHTABLE_NPAGE];
}
/*
@@ -825,35 +838,44 @@ static void walHashFind(
** actually needed.
*/
static void walCleanupHash(Wal *pWal){
- volatile HASHTABLE_DATATYPE *aHash; /* Pointer to hash table to clear */
- volatile u32 *aPgno; /* Unused return from walHashFind() */
- u32 iZero; /* frame == (aHash[x]+iZero) */
- int iLimit = 0; /* Zero values greater than this */
+ volatile ht_slot *aHash; /* Pointer to hash table to clear */
+ volatile u32 *aPgno; /* Page number array for hash table */
+ u32 iZero; /* frame == (aHash[x]+iZero) */
+ int iLimit = 0; /* Zero values greater than this */
+ int nByte; /* Number of bytes to zero in aPgno[] */
+ int i; /* Used to iterate through aHash[] */
assert( pWal->writeLock );
testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE-1 );
testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE );
testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE+1 );
- if( (pWal->hdr.mxFrame % HASHTABLE_NPAGE)>0 ){
- int nByte; /* Number of bytes to zero in aPgno[] */
- int i; /* Used to iterate through aHash[] */
-
- walHashFind(pWal, pWal->hdr.mxFrame+1, &aHash, &aPgno, &iZero);
- iLimit = pWal->hdr.mxFrame - iZero;
- assert( iLimit>0 );
- for(i=0; i<HASHTABLE_NSLOT; i++){
- if( aHash[i]>iLimit ){
- aHash[i] = 0;
- }
- }
- /* Zero the entries in the aPgno array that correspond to frames with
- ** frame numbers greater than pWal->hdr.mxFrame.
- */
- nByte = sizeof(u32) * (HASHTABLE_NPAGE-iLimit);
- memset((void *)&aPgno[iZero+iLimit+1], 0, nByte);
- assert( &((u8 *)&aPgno[iZero+iLimit+1])[nByte]==(u8 *)aHash );
+ if( pWal->hdr.mxFrame==0 ) return;
+
+ /* Obtain pointers to the hash-table and page-number array containing
+ ** the entry that corresponds to frame pWal->hdr.mxFrame. It is guaranteed
+ ** that the page said hash-table and array reside on is already mapped.
+ */
+ assert( pWal->nWiData>walFramePage(pWal->hdr.mxFrame) );
+ assert( pWal->apWiData[walFramePage(pWal->hdr.mxFrame)] );
+ walHashGet(pWal, walFramePage(pWal->hdr.mxFrame), &aHash, &aPgno, &iZero);
+
+ /* Zero all hash-table entries that correspond to frame numbers greater
+ ** than pWal->hdr.mxFrame.
+ */
+ iLimit = pWal->hdr.mxFrame - iZero;
+ assert( iLimit>0 );
+ for(i=0; i<HASHTABLE_NSLOT; i++){
+ if( aHash[i]>iLimit ){
+ aHash[i] = 0;
+ }
}
+
+ /* Zero the entries in the aPgno array that correspond to frames with
+ ** frame numbers greater than pWal->hdr.mxFrame.
+ */
+ nByte = ((char *)aHash - (char *)&aPgno[iLimit+1]);
+ memset((void *)&aPgno[iLimit+1], 0, nByte);
#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
/* Verify that the every entry in the mapping region is still reachable
@@ -863,7 +885,7 @@ static void walCleanupHash(Wal *pWal){
int i; /* Loop counter */
int iKey; /* Hash key */
for(i=1; i<=iLimit; i++){
- for(iKey=walHash(aPgno[i+iZero]); aHash[iKey]; iKey=walNextHash(iKey)){
+ for(iKey=walHash(aPgno[i]); aHash[iKey]; iKey=walNextHash(iKey)){
if( aHash[iKey]==i ) break;
}
assert( aHash[iKey]==i );
@@ -879,50 +901,47 @@ static void walCleanupHash(Wal *pWal){
*/
static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){
int rc; /* Return code */
- int nMapping; /* Required mapping size in bytes */
-
- /* Make sure the wal-index is mapped. Enlarge the mapping if required. */
- nMapping = walMappingSize(iFrame);
- rc = walIndexMap(pWal, nMapping);
- while( rc==SQLITE_OK && nMapping>pWal->szWIndex ){
- rc = walIndexRemap(pWal, nMapping);
- }
+ u32 iZero; /* One less than frame number of aPgno[1] */
+ volatile u32 *aPgno; /* Page number array */
+ volatile ht_slot *aHash; /* Hash table */
+
+ rc = walHashGet(pWal, walFramePage(iFrame), &aHash, &aPgno, &iZero);
- /* Assuming the wal-index file was successfully mapped, find the hash
- ** table and section of of the page number array that pertain to frame
- ** iFrame of the WAL. Then populate the page number array and the hash
- ** table entry.
+ /* Assuming the wal-index file was successfully mapped, populate the
+ ** page number array and hash table entry.
*/
if( rc==SQLITE_OK ){
int iKey; /* Hash table key */
- u32 iZero; /* One less than frame number of aPgno[1] */
- volatile u32 *aPgno; /* Page number array */
- volatile HASHTABLE_DATATYPE *aHash; /* Hash table */
- int idx; /* Value to write to hash-table slot */
- TESTONLY( int nCollide = 0; /* Number of hash collisions */ )
+ int idx; /* Value to write to hash-table slot */
+ TESTONLY( int nCollide = 0; /* Number of hash collisions */ )
- walHashFind(pWal, iFrame, &aHash, &aPgno, &iZero);
idx = iFrame - iZero;
+ assert( idx <= HASHTABLE_NSLOT/2 + 1 );
+
+ /* If this is the first entry to be added to this hash-table, zero the
+ ** entire hash table and aPgno[] array before proceding.
+ */
if( idx==1 ){
- memset((void*)&aPgno[iZero+1], 0, HASHTABLE_NPAGE*sizeof(u32));
- memset((void*)aHash, 0, HASHTABLE_NBYTE);
+ int nByte = (u8 *)&aHash[HASHTABLE_NSLOT] - (u8 *)&aPgno[1];
+ memset((void*)&aPgno[1], 0, nByte);
}
- assert( idx <= HASHTABLE_NSLOT/2 + 1 );
- if( aPgno[iFrame] ){
- /* If the entry in aPgno[] is already set, then the previous writer
- ** must have exited unexpectedly in the middle of a transaction (after
- ** writing one or more dirty pages to the WAL to free up memory).
- ** Remove the remnants of that writers uncommitted transaction from
- ** the hash-table before writing any new entries.
- */
+ /* If the entry in aPgno[] is already set, then the previous writer
+ ** must have exited unexpectedly in the middle of a transaction (after
+ ** writing one or more dirty pages to the WAL to free up memory).
+ ** Remove the remnants of that writers uncommitted transaction from
+ ** the hash-table before writing any new entries.
+ */
+ if( aPgno[idx] ){
walCleanupHash(pWal);
- assert( !aPgno[iFrame] );
+ assert( !aPgno[idx] );
}
- aPgno[iFrame] = iPage;
+
+ /* Write the aPgno[] array entry and the hash-table slot. */
for(iKey=walHash(iPage); aHash[iKey]; iKey=walNextHash(iKey)){
assert( nCollide++ < idx );
}
+ aPgno[idx] = iPage;
aHash[iKey] = idx;
#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
@@ -944,7 +963,7 @@ static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){
if( (idx&0x3ff)==0 ){
int i; /* Loop counter */
for(i=1; i<=idx; i++){
- for(iKey=walHash(aPgno[i+iZero]); aHash[iKey]; iKey=walNextHash(iKey)){
+ for(iKey=walHash(aPgno[i]); aHash[iKey]; iKey=walNextHash(iKey)){
if( aHash[iKey]==i ) break;
}
assert( aHash[iKey]==i );
@@ -1076,9 +1095,6 @@ static int walIndexRecover(Wal *pWal){
}
finished:
- if( rc==SQLITE_OK && pWal->hdr.mxFrame==0 ){
- rc = walIndexRemap(pWal, walMappingSize(1));
- }
if( rc==SQLITE_OK ){
volatile WalCkptInfo *pInfo;
int i;
@@ -1164,7 +1180,6 @@ int sqlite3WalOpen(
pRet->pVfs = pVfs;
pRet->pWalFd = (sqlite3_file *)&pRet[1];
pRet->pDbFd = pDbFd;
- pRet->szWIndex = -1;
pRet->readLock = -1;
sqlite3_randomness(8, &pRet->hdr.aSalt);
pRet->zWalName = zWal = pVfs->szOsFile + (char*)pRet->pWalFd;
@@ -1207,24 +1222,22 @@ static int walIteratorNext(
u32 iMin; /* Result pgno must be greater than iMin */
u32 iRet = 0xFFFFFFFF; /* 0xffffffff is never a valid page number */
int i; /* For looping through segments */
- int nBlock = p->nFinal; /* Number of entries in current segment */
iMin = p->iPrior;
assert( iMin<0xffffffff );
for(i=p->nSegment-1; i>=0; i--){
struct WalSegment *pSegment = &p->aSegment[i];
- while( pSegment->iNext<nBlock ){
+ while( pSegment->iNext<pSegment->nEntry ){
u32 iPg = pSegment->aPgno[pSegment->aIndex[pSegment->iNext]];
if( iPg>iMin ){
if( iPg<iRet ){
iRet = iPg;
- *piFrame = i*256 + 1 + pSegment->aIndex[pSegment->iNext];
+ *piFrame = pSegment->iZero + pSegment->aIndex[pSegment->iNext];
}
break;
}
pSegment->iNext++;
}
- nBlock = 256;
}
*piPage = p->iPrior = iRet;
@@ -1232,28 +1245,28 @@ static int walIteratorNext(
}
-static void walMergesort8(
- Pgno *aContent, /* Pages in wal */
- u8 *aBuffer, /* Buffer of at least *pnList items to use */
- u8 *aList, /* IN/OUT: List to sort */
+static void walMergesort(
+ u32 *aContent, /* Pages in wal */
+ ht_slot *aBuffer, /* Buffer of at least *pnList items to use */
+ ht_slot *aList, /* IN/OUT: List to sort */
int *pnList /* IN/OUT: Number of elements in aList[] */
){
int nList = *pnList;
if( nList>1 ){
int nLeft = nList / 2; /* Elements in left list */
int nRight = nList - nLeft; /* Elements in right list */
- u8 *aLeft = aList; /* Left list */
- u8 *aRight = &aList[nLeft]; /* Right list */
int iLeft = 0; /* Current index in aLeft */
int iRight = 0; /* Current index in aright */
int iOut = 0; /* Current index in output buffer */
+ ht_slot *aLeft = aList; /* Left list */
+ ht_slot *aRight = aList+nLeft;/* Right list */
/* TODO: Change to non-recursive version. */
- walMergesort8(aContent, aBuffer, aLeft, &nLeft);
- walMergesort8(aContent, aBuffer, aRight, &nRight);
+ walMergesort(aContent, aBuffer, aLeft, &nLeft);
+ walMergesort(aContent, aBuffer, aRight, &nRight);
while( iRight<nRight || iLeft<nLeft ){
- u8 logpage;
+ ht_slot logpage;
Pgno dbpage;
if( (iLeft<nLeft)
@@ -1285,6 +1298,13 @@ static void walMergesort8(
#endif
}
+/*
+** Free an iterator allocated by walIteratorInit().
+*/
+static void walIteratorFree(WalIterator *p){
+ sqlite3_free(p);
+}
+
/*
** Map the wal-index into memory owned by this thread, if it is not
** mapped already. Then construct a WalInterator object that can be
@@ -1300,71 +1320,71 @@ static void walMergesort8(
** prior to the WalIterator object being destroyed.
*/
static int walIteratorInit(Wal *pWal, WalIterator **pp){
- u32 *aData; /* Content of the wal-index file */
- WalIterator *p; /* Return value */
- int nSegment; /* Number of segments to merge */
- u32 iLast; /* Last frame in log */
- int nByte; /* Number of bytes to allocate */
- int i; /* Iterator variable */
- int nFinal; /* Number of unindexed entries */
- u8 *aTmp; /* Temp space used by merge-sort */
- u8 *aSpace; /* Surplus space on the end of the allocation */
-
- /* Make sure the wal-index is mapped into local memory */
- assert( pWal->pWiData && pWal->szWIndex>=walMappingSize(pWal->hdr.mxFrame) );
+ WalIterator *p; /* Return value */
+ int nSegment; /* Number of segments to merge */
+ u32 iLast; /* Last frame in log */
+ int nByte; /* Number of bytes to allocate */
+ int i; /* Iterator variable */
+ ht_slot *aTmp; /* Temp space used by merge-sort */
+ ht_slot *aSpace; /* Space at the end of the allocation */
/* This routine only runs while holding SQLITE_SHM_CHECKPOINT. No other
** thread is able to write to shared memory while this routine is
** running (or, indeed, while the WalIterator object exists). Hence,
- ** we can cast off the volatile qualifacation from shared memory
+ ** we can cast off the volatile qualification from shared memory
*/
assert( pWal->ckptLock );
- aData = (u32*)pWal->pWiData;
+ iLast = pWal->hdr.mxFrame;
/* Allocate space for the WalIterator object */
- iLast = pWal->hdr.mxFrame;
- nSegment = (iLast >> 8) + 1;
- nFinal = (iLast & 0x000000FF);
- nByte = sizeof(WalIterator) + (nSegment+1)*(sizeof(struct WalSegment)+256);
+ nSegment = walFramePage(iLast) + 1;
+ nByte = sizeof(WalIterator)
+ + nSegment*(sizeof(struct WalSegment))
+ + (nSegment+1)*(HASHTABLE_NPAGE * sizeof(ht_slot));
p = (WalIterator *)sqlite3_malloc(nByte);
if( !p ){
return SQLITE_NOMEM;
}
memset(p, 0, nByte);
- /* Initialize the WalIterator object. Each 256-entry segment is
- ** presorted in order to make iterating through all entries much
- ** faster.
- */
+ /* Allocate space for the WalIterator object */
p->nSegment = nSegment;
- aSpace = (u8 *)&p->aSegment[nSegment];
- aTmp = &aSpace[nSegment*256];
+ aSpace = (ht_slot *)&p->aSegment[nSegment];
+ aTmp = &aSpace[HASHTABLE_NPAGE*nSegment];
for(i=0; i<nSegment; i++){
+ volatile ht_slot *aHash;
int j;
- int nIndex = (i==nSegment-1) ? nFinal : 256;
- p->aSegment[i].aPgno = &aData[walIndexEntry(i*256+1)];
- p->aSegment[i].aIndex = aSpace;
- for(j=0; j<nIndex; j++){
+ u32 iZero;
+ int nEntry;
+ volatile u32 *aPgno;
+ int rc;
+
+ rc = walHashGet(pWal, i, &aHash, &aPgno, &iZero);
+ if( rc!=SQLITE_OK ){
+ walIteratorFree(p);
+ return rc;
+ }
+ aPgno++;
+ nEntry = ((i+1)==nSegment)?iLast-iZero:(u32 *)aHash-(u32 *)aPgno;
+ iZero++;
+
+ for(j=0; j<nEntry; j++){
aSpace[j] = j;
}
- walMergesort8(p->aSegment[i].aPgno, aTmp, aSpace, &nIndex);
- memset(&aSpace[nIndex], aSpace[nIndex-1], 256-nIndex);
- aSpace += 256;
- p->nFinal = nIndex;
+ walMergesort((u32 *)aPgno, aTmp, aSpace, &nEntry);
+ p->aSegment[i].iZero = iZero;
+ p->aSegment[i].nEntry = nEntry;
+ p->aSegment[i].aIndex = aSpace;
+ p->aSegment[i].aPgno = (u32 *)aPgno;
+ aSpace += HASHTABLE_NPAGE;
}
+ assert( aSpace==aTmp );
- /* Return the fully initializd WalIterator object */
+ /* Return the fully initialized WalIterator object */
*pp = p;
return SQLITE_OK ;
}
-/*
-** Free an iterator allocated by walIteratorInit().
-*/
-static void walIteratorFree(WalIterator *p){
- sqlite3_free(p);
-}
-
/*
** Copy as much content as we can from the WAL back into the database file
** in response to an sqlite3_wal_checkpoint() request or the equivalent.
@@ -1409,7 +1429,6 @@ static int walCheckpoint(
u32 iFrame = 0; /* Wal frame containing data for iDbpage */
u32 mxSafeFrame; /* Max frame that can be backfilled */
int i; /* Loop counter */
- volatile WalIndexHdr *pHdr; /* The actual wal-index header in SHM */
volatile WalCkptInfo *pInfo; /* The checkpoint status information */
/* Allocate the iterator */
@@ -1430,9 +1449,7 @@ static int walCheckpoint(
** cannot be backfilled from the WAL.
*/
mxSafeFrame = pWal->hdr.mxFrame;
- pHdr = (volatile WalIndexHdr*)pWal->pWiData;
- pInfo = (volatile WalCkptInfo*)&pHdr[2];
- assert( pInfo==walCkptInfo(pWal) );
+ pInfo = walCkptInfo(pWal);
for(i=1; i<WAL_NREADER; i++){
u32 y = pInfo->aReadMark[i];
if( mxSafeFrame>=y ){
@@ -1461,6 +1478,7 @@ static int walCheckpoint(
/* Iterate through the contents of the WAL, copying data to the db file. */
while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){
+ assert( walFramePgno(pWal, iFrame)==iDbpage );
if( iFrame<=nBackfill || iFrame>mxSafeFrame ) continue;
rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage,
walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE
@@ -1472,7 +1490,7 @@ static int walCheckpoint(
/* If work was actually accomplished... */
if( rc==SQLITE_OK ){
- if( mxSafeFrame==pHdr[0].mxFrame ){
+ if( mxSafeFrame==walIndexHdr(pWal)->mxFrame ){
rc = sqlite3OsTruncate(pWal->pDbFd, ((i64)pWal->hdr.nPage*(i64)szPage));
if( rc==SQLITE_OK && sync_flags ){
rc = sqlite3OsSync(pWal->pDbFd, sync_flags);
@@ -1525,7 +1543,6 @@ int sqlite3WalClose(
if( rc==SQLITE_OK ){
isDelete = 1;
}
- walIndexUnmap(pWal);
}
walIndexClose(pWal, isDelete);
@@ -1534,6 +1551,7 @@ int sqlite3WalClose(
sqlite3OsDelete(pWal->pVfs, pWal->zWalName, 0);
}
WALTRACE(("WAL%p: closed\n", pWal));
+ sqlite3_free(pWal->apWiData);
sqlite3_free(pWal);
}
return rc;
@@ -1557,16 +1575,12 @@ int sqlite3WalClose(
** is read successfully and the checksum verified, return zero.
*/
int walIndexTryHdr(Wal *pWal, int *pChanged){
- u32 aCksum[2]; /* Checksum on the header content */
- WalIndexHdr h1, h2; /* Two copies of the header content */
- WalIndexHdr *aHdr; /* Header in shared memory */
+ u32 aCksum[2]; /* Checksum on the header content */
+ WalIndexHdr h1, h2; /* Two copies of the header content */
+ WalIndexHdr volatile *aHdr; /* Header in shared memory */
- if( pWal->szWIndex < WALINDEX_HDR_SIZE ){
- /* The wal-index is not large enough to hold the header, then assume
- ** header is invalid. */
- return 1;
- }
- assert( pWal->pWiData );
+ /* The first page of the wal-index must be mapped at this point. */
+ assert( pWal->nWiData>0 && pWal->apWiData[0] );
/* Read the header. This might happen currently with a write to the
** same area of shared memory on a different CPU in a SMP,
@@ -1578,10 +1592,10 @@ int walIndexTryHdr(Wal *pWal, int *pChanged){
** Memory barriers are used to prevent the compiler or the hardware from
** reordering the reads and writes.
*/
- aHdr = (WalIndexHdr*)pWal->pWiData;
- memcpy(&h1, &aHdr[0], sizeof(h1));
+ aHdr = walIndexHdr(pWal);
+ memcpy(&h1, (void *)&aHdr[0], sizeof(h1));
sqlite3OsShmBarrier(pWal->pDbFd);
- memcpy(&h2, &aHdr[1], sizeof(h2));
+ memcpy(&h2, (void *)&aHdr[1], sizeof(h2));
if( memcmp(&h1, &h2, sizeof(h1))!=0 ){
return 1; /* Dirty read */
@@ -1625,26 +1639,32 @@ int walIndexTryHdr(Wal *pWal, int *pChanged){
static int walIndexReadHdr(Wal *pWal, int *pChanged){
int rc; /* Return code */
int badHdr; /* True if a header read failed */
+ volatile u32 *page0;
+ /* Ensure that page 0 of the wal-index (the page that contains the
+ ** wal-index header) is mapped. Return early if an error occurs here.
+ */
assert( pChanged );
- rc = walIndexMap(pWal, walMappingSize(1));
+ rc = walIndexPage(pWal, 0, &page0);
if( rc!=SQLITE_OK ){
return rc;
- }
+ };
+ assert( page0 || pWal->writeLock==0 );
- /* Try once to read the header straight out. This works most of the
- ** time.
+ /* If the first page of the wal-index has been mapped, try to read the
+ ** wal-index header immediately, without holding any lock. This usually
+ ** works, but may fail if the wal-index header is corrupt or currently
+ ** being modified by another user.
*/
- badHdr = walIndexTryHdr(pWal, pChanged);
+ badHdr = (page0 ? walIndexTryHdr(pWal, pChanged) : 1);
/* If the first attempt failed, it might have been due to a race
** with a writer. So get a WRITE lock and try again.
*/
assert( badHdr==0 || pWal->writeLock==0 );
- if( badHdr ){
- rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1);
- if( rc==SQLITE_OK ){
- pWal->writeLock = 1;
+ if( badHdr && SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1)) ){
+ pWal->writeLock = 1;
+ if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){
badHdr = walIndexTryHdr(pWal, pChanged);
if( badHdr ){
/* If the wal-index header is still malformed even while holding
@@ -1654,17 +1674,9 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){
rc = walIndexRecover(pWal);
*pChanged = 1;
}
- walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
- pWal->writeLock = 0;
- }
- }
-
- /* Make sure the mapping is large enough to cover the entire wal-index */
- if( rc==SQLITE_OK ){
- int szWanted = walMappingSize(pWal->hdr.mxFrame);
- if( pWal->szWIndex<szWanted ){
- rc = walIndexMap(pWal, szWanted);
}
+ pWal->writeLock = 0;
+ walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
}
return rc;
@@ -1705,12 +1717,11 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){
** WAL_READ_LOCK() while changing values.
*/
static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
- volatile WalIndexHdr *pHdr; /* Header of the wal-index */
volatile WalCkptInfo *pInfo; /* Checkpoint information in wal-index */
u32 mxReadMark; /* Largest aReadMark[] value */
int mxI; /* Index of largest aReadMark[] value */
int i; /* Loop counter */
- int rc; /* Return code */
+ int rc = SQLITE_OK; /* Return code */
assert( pWal->readLock<0 ); /* Not currently locked */
@@ -1739,16 +1750,12 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
rc = SQLITE_BUSY_RECOVERY;
}
}
- }else{
- rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame));
}
if( rc!=SQLITE_OK ){
return rc;
}
- pHdr = (volatile WalIndexHdr*)pWal->pWiData;
- pInfo = (volatile WalCkptInfo*)&pHdr[2];
- assert( pInfo==walCkptInfo(pWal) );
+ pInfo = walCkptInfo(pWal);
if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame ){
/* The WAL has been completely backfilled (or it is empty).
** and can be safely ignored.
@@ -1756,7 +1763,7 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
rc = walLockShared(pWal, WAL_READ_LOCK(0));
sqlite3OsShmBarrier(pWal->pDbFd);
if( rc==SQLITE_OK ){
- if( memcmp((void *)pHdr, &pWal->hdr, sizeof(WalIndexHdr)) ){
+ if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){
/* It is not safe to allow the reader to continue here if frames
** may have been appended to the log before READ_LOCK(0) was obtained.
** When holding READ_LOCK(0), the reader ignores the entire log file,
@@ -1850,7 +1857,7 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
*/
sqlite3OsShmBarrier(pWal->pDbFd);
if( pInfo->aReadMark[mxI]!=mxReadMark
- || memcmp((void *)pHdr, &pWal->hdr, sizeof(WalIndexHdr))
+ || memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr))
){
walUnlockShared(pWal, WAL_READ_LOCK(mxI));
return WAL_RETRY;
@@ -1883,7 +1890,6 @@ int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){
do{
rc = walTryBeginRead(pWal, pChanged, 0, ++cnt);
}while( rc==WAL_RETRY );
- walIndexUnmap(pWal);
return rc;
}
@@ -1913,7 +1919,6 @@ int sqlite3WalRead(
int nOut, /* Size of buffer pOut in bytes */
u8 *pOut /* Buffer to write page data to */
){
- int rc; /* Return code */
u32 iRead = 0; /* If !=0, WAL frame to return data from */
u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */
int iHash; /* Used to loop through N hash tables */
@@ -1932,12 +1937,6 @@ int sqlite3WalRead(
return SQLITE_OK;
}
- /* Ensure the wal-index is mapped. */
- rc = walIndexMap(pWal, walMappingSize(iLast));
- if( rc!=SQLITE_OK ){
- return rc;
- }
-
/* Search the hash table or tables for an entry matching page number
** pgno. Each iteration of the following for() loop searches one
** hash table (each hash table indexes up to HASHTABLE_NPAGE frames).
@@ -1963,25 +1962,25 @@ int sqlite3WalRead(
** This condition filters out entries that were added to the hash
** table after the current read-transaction had started.
*/
- for(iHash=iLast; iHash>0 && iRead==0; iHash-=HASHTABLE_NPAGE){
- volatile HASHTABLE_DATATYPE *aHash; /* Pointer to hash table */
- volatile u32 *aPgno; /* Pointer to array of page numbers */
+ for(iHash=walFramePage(iLast); iHash>=0 && iRead==0; iHash--){
+ volatile ht_slot *aHash; /* Pointer to hash table */
+ volatile u32 *aPgno; /* Pointer to array of page numbers */
u32 iZero; /* Frame number corresponding to aPgno[0] */
int iKey; /* Hash slot index */
- int mxHash; /* upper bound on aHash[] values */
+ int rc;
- walHashFind(pWal, iHash, &aHash, &aPgno, &iZero);
- mxHash = iLast - iZero;
- if( mxHash > HASHTABLE_NPAGE ) mxHash = HASHTABLE_NPAGE;
+ rc = walHashGet(pWal, iHash, &aHash, &aPgno, &iZero);
+ if( rc!=SQLITE_OK ){
+ return rc;
+ }
for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){
u32 iFrame = aHash[iKey] + iZero;
- if( iFrame<=iLast && aPgno[iFrame]==pgno ){
+ if( iFrame<=iLast && aPgno[aHash[iKey]]==pgno ){
assert( iFrame>iRead );
iRead = iFrame;
}
}
}
- assert( iRead==0 || pWal->pWiData[walIndexEntry(iRead)]==pgno );
#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
/* If expensive assert() statements are available, do a linear search
@@ -1991,7 +1990,7 @@ int sqlite3WalRead(
u32 iRead2 = 0;
u32 iTest;
for(iTest=iLast; iTest>0; iTest--){
- if( pWal->pWiData[walIndexEntry(iTest)]==pgno ){
+ if( walFramePgno(pWal, iTest)==pgno ){
iRead2 = iTest;
break;
}
@@ -2003,7 +2002,6 @@ int sqlite3WalRead(
/* If iRead is non-zero, then it is the log frame number that contains the
** required page. Read and return data from the log file.
*/
- walIndexUnmap(pWal);
if( iRead ){
i64 iOffset = walFrameOffset(iRead, pWal->hdr.szPage) + WAL_FRAME_HDRSIZE;
*pInWal = 1;
@@ -2057,19 +2055,12 @@ int sqlite3WalBeginWriteTransaction(Wal *pWal){
** time the read transaction on this connection was started, then
** the write is disallowed.
*/
- rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame));
- if( rc ){
- walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
- pWal->writeLock = 0;
- return rc;
- }
- if( memcmp(&pWal->hdr, (void*)pWal->pWiData, sizeof(WalIndexHdr))!=0 ){
+ if( memcmp(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr))!=0 ){
walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
pWal->writeLock = 0;
rc = SQLITE_BUSY;
}
- walIndexUnmap(pWal);
return rc;
}
@@ -2098,39 +2089,35 @@ int sqlite3WalEndWriteTransaction(Wal *pWal){
int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){
int rc = SQLITE_OK;
if( pWal->writeLock ){
- int unused;
Pgno iMax = pWal->hdr.mxFrame;
Pgno iFrame;
- assert( pWal->pWiData==0 );
- rc = walIndexReadHdr(pWal, &unused);
- if( rc==SQLITE_OK ){
- rc = walIndexMap(pWal, walMappingSize(iMax));
- }
- if( rc==SQLITE_OK ){
- for(iFrame=pWal->hdr.mxFrame+1;
- ALWAYS(rc==SQLITE_OK) && iFrame<=iMax;
- iFrame++
- ){
- /* This call cannot fail. Unless the page for which the page number
- ** is passed as the second argument is (a) in the cache and
- ** (b) has an outstanding reference, then xUndo is either a no-op
- ** (if (a) is false) or simply expels the page from the cache (if (b)
- ** is false).
- **
- ** If the upper layer is doing a rollback, it is guaranteed that there
- ** are no outstanding references to any page other than page 1. And
- ** page 1 is never written to the log until the transaction is
- ** committed. As a result, the call to xUndo may not fail.
- */
- assert( pWal->writeLock );
- assert( pWal->pWiData[walIndexEntry(iFrame)]!=1 );
- rc = xUndo(pUndoCtx, pWal->pWiData[walIndexEntry(iFrame)]);
- }
- walCleanupHash(pWal);
+ /* Restore the clients cache of the wal-index header to the state it
+ ** was in before the client began writing to the database.
+ */
+ memcpy(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr));
+
+ for(iFrame=pWal->hdr.mxFrame+1;
+ ALWAYS(rc==SQLITE_OK) && iFrame<=iMax;
+ iFrame++
+ ){
+ /* This call cannot fail. Unless the page for which the page number
+ ** is passed as the second argument is (a) in the cache and
+ ** (b) has an outstanding reference, then xUndo is either a no-op
+ ** (if (a) is false) or simply expels the page from the cache (if (b)
+ ** is false).
+ **
+ ** If the upper layer is doing a rollback, it is guaranteed that there
+ ** are no outstanding references to any page other than page 1. And
+ ** page 1 is never written to the log until the transaction is
+ ** committed. As a result, the call to xUndo may not fail.
+ */
+ assert( walFramePgno(pWal, iFrame)!=1 );
+ rc = xUndo(pUndoCtx, walFramePgno(pWal, iFrame));
}
- walIndexUnmap(pWal);
+ walCleanupHash(pWal);
}
+ assert( rc==SQLITE_OK );
return rc;
}
@@ -2170,16 +2157,12 @@ int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){
}
if( aWalData[0]<pWal->hdr.mxFrame ){
- rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame));
pWal->hdr.mxFrame = aWalData[0];
pWal->hdr.aFrameCksum[0] = aWalData[1];
pWal->hdr.aFrameCksum[1] = aWalData[2];
- if( rc==SQLITE_OK ){
- walCleanupHash(pWal);
- }
+ walCleanupHash(pWal);
}
- walIndexUnmap(pWal);
return rc;
}
@@ -2199,9 +2182,7 @@ static int walRestartLog(Wal *pWal){
int rc = SQLITE_OK;
int cnt;
- if( pWal->readLock==0
- && SQLITE_OK==(rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame)))
- ){
+ if( pWal->readLock==0 ){
volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
assert( pInfo->nBackfill==pWal->hdr.mxFrame );
if( pInfo->nBackfill>0 ){
@@ -2237,11 +2218,6 @@ static int walRestartLog(Wal *pWal){
int notUsed;
rc = walTryBeginRead(pWal, &notUsed, 1, ++cnt);
}while( rc==WAL_RETRY );
-
- /* Unmap the wal-index before returning. Otherwise the VFS layer may
- ** hold a mutex for the duration of the IO performed by WalFrames().
- */
- walIndexUnmap(pWal);
}
return rc;
}
@@ -2267,7 +2243,6 @@ int sqlite3WalFrames(
assert( pList );
assert( pWal->writeLock );
- assert( pWal->pWiData==0 );
#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
{ int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){}
@@ -2280,10 +2255,8 @@ int sqlite3WalFrames(
** log file, instead of appending to it at pWal->hdr.mxFrame.
*/
if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){
- assert( pWal->pWiData==0 );
return rc;
}
- assert( pWal->pWiData==0 && pWal->readLock>0 );
/* If this is the first frame written into the log, write the WAL
** header to the start of the WAL file. See comments at the top of
@@ -2358,7 +2331,6 @@ int sqlite3WalFrames(
rc = sqlite3OsSync(pWal->pWalFd, sync_flags);
}
- assert( pWal->pWiData==0 );
/* Append data to the wal-index. It is not necessary to lock the
** wal-index to do this as the SQLITE_SHM_WRITE lock held on the wal-index
@@ -2391,7 +2363,6 @@ int sqlite3WalFrames(
}
}
- walIndexUnmap(pWal);
WALTRACE(("WAL%p: frame write %s\n", pWal, rc ? "failed" : "ok"));
return rc;
}
@@ -2412,7 +2383,6 @@ int sqlite3WalCheckpoint(
int rc; /* Return code */
int isChanged = 0; /* True if a new wal-index header is loaded */
- assert( pWal->pWiData==0 );
assert( pWal->ckptLock==0 );
WALTRACE(("WAL%p: checkpoint begins\n", pWal));
@@ -2441,7 +2411,6 @@ int sqlite3WalCheckpoint(
}
/* Release the locks. */
- walIndexUnmap(pWal);
walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1);
pWal->ckptLock = 0;
WALTRACE(("WAL%p: checkpoint %s\n", pWal, rc ? "failed" : "ok"));