diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/os.c | 18 | ||||
-rw-r--r-- | src/os.h | 4 | ||||
-rw-r--r-- | src/os_unix.c | 275 | ||||
-rw-r--r-- | src/os_win.c | 268 | ||||
-rw-r--r-- | src/sqlite.h.in | 4 | ||||
-rw-r--r-- | src/test6.c | 30 | ||||
-rw-r--r-- | src/test_devsym.c | 37 | ||||
-rw-r--r-- | src/test_osinst.c | 69 | ||||
-rw-r--r-- | src/test_vfs.c | 161 | ||||
-rw-r--r-- | src/wal.c | 759 |
10 files changed, 705 insertions, 920 deletions
@@ -101,15 +101,6 @@ int sqlite3OsDeviceCharacteristics(sqlite3_file *id){ int sqlite3OsShmOpen(sqlite3_file *id){ return id->pMethods->xShmOpen(id); } -int sqlite3OsShmSize(sqlite3_file *id, int reqSize, int *pNewSize){ - return id->pMethods->xShmSize(id, reqSize, pNewSize); -} -int sqlite3OsShmGet(sqlite3_file *id,int reqSize,int *pSize,void volatile **pp){ - return id->pMethods->xShmGet(id, reqSize, pSize, pp); -} -int sqlite3OsShmRelease(sqlite3_file *id){ - return id->pMethods->xShmRelease(id); -} int sqlite3OsShmLock(sqlite3_file *id, int offset, int n, int flags){ return id->pMethods->xShmLock(id, offset, n, flags); } @@ -119,6 +110,15 @@ void sqlite3OsShmBarrier(sqlite3_file *id){ int sqlite3OsShmClose(sqlite3_file *id, int deleteFlag){ return id->pMethods->xShmClose(id, deleteFlag); } +int sqlite3OsShmMap( + sqlite3_file *id, + int iPage, + int pgsz, + int isWrite, + void volatile **pp +){ + return id->pMethods->xShmMap(id, iPage, pgsz, isWrite, pp); +} /* ** The next group of routines are convenience wrappers around the @@ -248,12 +248,10 @@ int sqlite3OsFileControl(sqlite3_file*,int,void*); int sqlite3OsSectorSize(sqlite3_file *id); int sqlite3OsDeviceCharacteristics(sqlite3_file *id); int sqlite3OsShmOpen(sqlite3_file *id); -int sqlite3OsShmSize(sqlite3_file *id, int, int*); -int sqlite3OsShmGet(sqlite3_file *id, int, int*, void volatile**); -int sqlite3OsShmRelease(sqlite3_file *id); int sqlite3OsShmLock(sqlite3_file *id, int, int, int); void sqlite3OsShmBarrier(sqlite3_file *id); int sqlite3OsShmClose(sqlite3_file *id, int); +int sqlite3OsShmMap(sqlite3_file *,int,int,int,void volatile **); /* ** Functions for accessing sqlite3_vfs methods diff --git a/src/os_unix.c b/src/os_unix.c index dadc3c98a..fcccead72 100644 --- a/src/os_unix.c +++ b/src/os_unix.c @@ -3128,21 +3128,15 @@ static int unixDeviceCharacteristics(sqlite3_file *NotUsed){ ** Either unixShmNode.mutex must be held or unixShmNode.nRef==0 and ** unixMutexHeld() is true when reading or writing any other field ** in this structure. -** -** To avoid deadlocks, mutex and mutexBuf are always released in the -** reverse order that they are acquired. mutexBuf is always acquired -** first and released last. This invariant is check by asserting -** sqlite3_mutex_notheld() on mutex whenever mutexBuf is acquired or -** released. */ struct unixShmNode { unixInodeInfo *pInode; /* unixInodeInfo that owns this SHM node */ sqlite3_mutex *mutex; /* Mutex to access this object */ - sqlite3_mutex *mutexBuf; /* Mutex to access zBuf[] */ char *zFilename; /* Name of the mmapped file */ int h; /* Open file descriptor */ - int szMap; /* Size of the mapping into memory */ - char *pMMapBuf; /* Where currently mmapped(). NULL if unmapped */ + int szRegion; /* Size of shared-memory regions */ + int nRegion; /* Size of array apRegion */ + char **apRegion; /* Array of mapped shared-memory regions */ int nRef; /* Number of unixShm objects pointing to this */ unixShm *pFirst; /* All unixShm objects pointing to this */ #ifdef SQLITE_DEBUG @@ -3169,7 +3163,6 @@ struct unixShm { unixShmNode *pShmNode; /* The underlying unixShmNode object */ unixShm *pNext; /* Next unixShm with the same unixShmNode */ u8 hasMutex; /* True if holding the unixShmNode mutex */ - u8 hasMutexBuf; /* True if holding pFile->mutexBuf */ u16 sharedMask; /* Mask of shared locks held */ u16 exclMask; /* Mask of exclusive locks held */ #ifdef SQLITE_DEBUG @@ -3266,10 +3259,13 @@ static void unixShmPurge(unixFile *pFd){ unixShmNode *p = pFd->pInode->pShmNode; assert( unixMutexHeld() ); if( p && p->nRef==0 ){ + int i; assert( p->pInode==pFd->pInode ); if( p->mutex ) sqlite3_mutex_free(p->mutex); - if( p->mutexBuf ) sqlite3_mutex_free(p->mutexBuf); - if( p->pMMapBuf ) munmap(p->pMMapBuf, p->szMap); + for(i=0; i<p->nRegion; i++){ + munmap(p->apRegion[i], p->szRegion); + } + sqlite3_free(p->apRegion); if( p->h>=0 ) close(p->h); p->pInode->pShmNode = 0; sqlite3_free(p); @@ -3345,11 +3341,6 @@ static int unixShmOpen( rc = SQLITE_NOMEM; goto shm_open_err; } - pShmNode->mutexBuf = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); - if( pShmNode->mutexBuf==0 ){ - rc = SQLITE_NOMEM; - goto shm_open_err; - } pShmNode->h = open(pShmNode->zFilename, O_RDWR|O_CREAT, 0664); if( pShmNode->h<0 ){ @@ -3420,7 +3411,6 @@ static int unixShmClose( *pp = p->pNext; /* Free the connection p */ - assert( p->hasMutexBuf==0 ); sqlite3_free(p); pDbFd->pShm = 0; sqlite3_mutex_leave(pShmNode->mutex); @@ -3440,148 +3430,6 @@ static int unixShmClose( } /* -** Changes the size of the underlying storage for a shared-memory segment. -** -** The reqSize parameter is the new requested size of the shared memory. -** This implementation is free to increase the shared memory size to -** any amount greater than or equal to reqSize. If the shared memory is -** already as big or bigger as reqSize, this routine is a no-op. -** -** The reqSize parameter is the minimum size requested. The implementation -** is free to expand the storage to some larger amount if it chooses. -*/ -static int unixShmSize( - sqlite3_file *fd, /* The open database file holding SHM */ - int reqSize, /* Requested size. -1 for query only */ - int *pNewSize /* Write new size here */ -){ - unixFile *pDbFd = (unixFile*)fd; - unixShm *p = pDbFd->pShm; - unixShmNode *pShmNode = p->pShmNode; - int rc = SQLITE_OK; - struct stat sStat; - - assert( pShmNode==pDbFd->pInode->pShmNode ); - assert( pShmNode->pInode==pDbFd->pInode ); - - while( 1 ){ - if( fstat(pShmNode->h, &sStat)==0 ){ - *pNewSize = (int)sStat.st_size; - if( reqSize<=(int)sStat.st_size ) break; - }else{ - *pNewSize = 0; - rc = SQLITE_IOERR_SHMSIZE; - break; - } - rc = ftruncate(pShmNode->h, reqSize); - reqSize = -1; - } - return rc; -} - -/* -** Release the lock held on the shared memory segment to that other -** threads are free to resize it if necessary. -** -** If the lock is not currently held, this routine is a harmless no-op. -** -** If the shared-memory object is in lock state RECOVER, then we do not -** really want to release the lock, so in that case too, this routine -** is a no-op. -*/ -static int unixShmRelease(sqlite3_file *fd){ - unixFile *pDbFd = (unixFile*)fd; - unixShm *p = pDbFd->pShm; - - if( p->hasMutexBuf ){ - assert( sqlite3_mutex_notheld(p->pShmNode->mutex) ); - sqlite3_mutex_leave(p->pShmNode->mutexBuf); - p->hasMutexBuf = 0; - } - return SQLITE_OK; -} - -/* -** Map the shared storage into memory. -** -** If reqMapSize is positive, then an attempt is made to make the -** mapping at least reqMapSize bytes in size. However, the mapping -** will never be larger than the size of the underlying shared memory -** as set by prior calls to xShmSize(). -** -** *ppBuf is made to point to the memory which is a mapping of the -** underlying storage. A mutex is acquired to prevent other threads -** from running while *ppBuf is in use in order to prevent other threads -** remapping *ppBuf out from under this thread. The unixShmRelease() -** call will release the mutex. However, if the lock state is CHECKPOINT, -** the mutex is not acquired because CHECKPOINT will never remap the -** buffer. RECOVER might remap, though, so CHECKPOINT will acquire -** the mutex if and when it promotes to RECOVER. -** -** RECOVER needs to be atomic. The same mutex that prevents *ppBuf from -** being remapped also prevents more than one thread from being in -** RECOVER at a time. But, RECOVER sometimes wants to remap itself. -** To prevent RECOVER from losing its lock while remapping, the -** mutex is not released by unixShmRelease() when in RECOVER. -** -** *pNewMapSize is set to the size of the mapping. Usually *pNewMapSize -** will be reqMapSize or larger, though it could be smaller if the -** underlying shared memory has never been enlarged to reqMapSize bytes -** by prior calls to xShmSize(). -** -** *ppBuf might be NULL and zero if no space has -** yet been allocated to the underlying storage. -*/ -static int unixShmGet( - sqlite3_file *fd, /* Database file holding shared memory */ - int reqMapSize, /* Requested size of mapping. -1 means don't care */ - int *pNewMapSize, /* Write new size of mapping here */ - void volatile **ppBuf /* Write mapping buffer origin here */ -){ - unixFile *pDbFd = (unixFile*)fd; - unixShm *p = pDbFd->pShm; - unixShmNode *pShmNode = p->pShmNode; - int rc = SQLITE_OK; - - assert( pShmNode==pDbFd->pInode->pShmNode ); - assert( pShmNode->pInode==pDbFd->pInode ); - - if( p->hasMutexBuf==0 ){ - assert( sqlite3_mutex_notheld(pShmNode->mutex) ); - sqlite3_mutex_enter(pShmNode->mutexBuf); - p->hasMutexBuf = 1; - } - sqlite3_mutex_enter(pShmNode->mutex); - if( pShmNode->szMap==0 || reqMapSize>pShmNode->szMap ){ - int actualSize; - if( unixShmSize(fd, -1, &actualSize)!=SQLITE_OK ){ - actualSize = 0; - } - reqMapSize = actualSize; - if( pShmNode->pMMapBuf || reqMapSize<=0 ){ - munmap(pShmNode->pMMapBuf, pShmNode->szMap); - } - if( reqMapSize>0 ){ - pShmNode->pMMapBuf = mmap(0, reqMapSize, PROT_READ|PROT_WRITE, MAP_SHARED, - pShmNode->h, 0); - pShmNode->szMap = pShmNode->pMMapBuf ? reqMapSize : 0; - }else{ - pShmNode->pMMapBuf = 0; - pShmNode->szMap = 0; - } - } - *pNewMapSize = pShmNode->szMap; - *ppBuf = pShmNode->pMMapBuf; - sqlite3_mutex_leave(pShmNode->mutex); - if( *ppBuf==0 ){ - /* Do not hold the mutex if a NULL pointer is being returned. */ - unixShmRelease(fd); - } - return rc; -} - - -/* ** Change the lock state for a shared-memory segment. ** ** Note that the relationship between SHAREd and EXCLUSIVE locks is a little @@ -3700,21 +3548,114 @@ static int unixShmLock( ** any load or store begun after the barrier. */ static void unixShmBarrier( - sqlite3_file *fd /* Database file holding the shared memory */ + sqlite3_file *fd /* Database file holding the shared memory */ ){ unixEnterMutex(); unixLeaveMutex(); } +/* +** This function is called to obtain a pointer to region iRegion of the +** shared-memory associated with the database file fd. Shared-memory regions +** are numbered starting from zero. Each shared-memory region is szRegion +** bytes in size. +** +** If an error occurs, an error code is returned and *pp is set to NULL. +** +** Otherwise, if the isWrite parameter is 0 and the requested shared-memory +** region has not been allocated (by any client, including one running in a +** separate process), then *pp is set to NULL and SQLITE_OK returned. If +** isWrite is non-zero and the requested shared-memory region has not yet +** been allocated, it is allocated by this function. +** +** If the shared-memory region has already been allocated or is allocated by +** this call as described above, then it is mapped into this processes +** address space (if it is not already), *pp is set to point to the mapped +** memory and SQLITE_OK returned. +*/ +static int unixShmMap( + sqlite3_file *fd, /* Handle open on database file */ + int iRegion, /* Region to retrieve */ + int szRegion, /* Size of regions */ + int isWrite, /* True to extend file if necessary */ + void volatile **pp /* OUT: Mapped memory */ +){ + unixFile *pDbFd = (unixFile*)fd; + unixShm *p = pDbFd->pShm; + unixShmNode *pShmNode = p->pShmNode; + int rc = SQLITE_OK; + + sqlite3_mutex_enter(pShmNode->mutex); + assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 ); + + if( pShmNode->nRegion<=iRegion ){ + char **apNew; /* New apRegion[] array */ + int nByte = (iRegion+1)*szRegion; /* Minimum required file size */ + struct stat sStat; /* Used by fstat() */ + + pShmNode->szRegion = szRegion; + + /* The requested region is not mapped into this processes address space. + ** Check to see if it has been allocated (i.e. if the wal-index file is + ** large enough to contain the requested region). + */ + if( fstat(pShmNode->h, &sStat) ){ + rc = SQLITE_IOERR_SHMSIZE; + goto shmpage_out; + } + + if( sStat.st_size<nByte ){ + /* The requested memory region does not exist. If isWrite is set to + ** zero, exit early. *pp will be set to NULL and SQLITE_OK returned. + ** + ** Alternatively, if isWrite is non-zero, use ftruncate() to allocate + ** the requested memory region. + */ + if( !isWrite ) goto shmpage_out; + if( ftruncate(pShmNode->h, nByte) ){ + rc = SQLITE_IOERR_SHMSIZE; + goto shmpage_out; + } + } + + /* Map the requested memory region into this processes address space. */ + apNew = (char **)sqlite3_realloc( + pShmNode->apRegion, (iRegion+1)*sizeof(char *) + ); + if( !apNew ){ + rc = SQLITE_IOERR_NOMEM; + goto shmpage_out; + } + pShmNode->apRegion = apNew; + while(pShmNode->nRegion<=iRegion){ + void *pMem = mmap(0, szRegion, PROT_READ|PROT_WRITE, + MAP_SHARED, pShmNode->h, iRegion*szRegion + ); + if( pMem==MAP_FAILED ){ + rc = SQLITE_IOERR; + goto shmpage_out; + } + pShmNode->apRegion[pShmNode->nRegion] = pMem; + pShmNode->nRegion++; + } + } + +shmpage_out: + if( pShmNode->nRegion>iRegion ){ + *pp = pShmNode->apRegion[iRegion]; + }else{ + *pp = 0; + } + sqlite3_mutex_leave(pShmNode->mutex); + return rc; +} #else # define unixShmOpen 0 -# define unixShmSize 0 -# define unixShmGet 0 -# define unixShmRelease 0 # define unixShmLock 0 # define unixShmBarrier 0 # define unixShmClose 0 +# define unixShmMap 0 #endif /* #ifndef SQLITE_OMIT_WAL */ /* @@ -3773,12 +3714,10 @@ static const sqlite3_io_methods METHOD = { \ unixSectorSize, /* xSectorSize */ \ unixDeviceCharacteristics, /* xDeviceCapabilities */ \ unixShmOpen, /* xShmOpen */ \ - unixShmSize, /* xShmSize */ \ - unixShmGet, /* xShmGet */ \ - unixShmRelease, /* xShmRelease */ \ unixShmLock, /* xShmLock */ \ unixShmBarrier, /* xShmBarrier */ \ - unixShmClose /* xShmClose */ \ + unixShmClose, /* xShmClose */ \ + unixShmMap /* xShmMap */ \ }; \ static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \ UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \ diff --git a/src/os_win.c b/src/os_win.c index 1a9994b08..3a6b47771 100644 --- a/src/os_win.c +++ b/src/os_win.c @@ -1216,13 +1216,17 @@ static int winShmMutexHeld(void) { */ struct winShmNode { sqlite3_mutex *mutex; /* Mutex to access this object */ - sqlite3_mutex *mutexBuf; /* Mutex to access zBuf[] */ char *zFilename; /* Name of the file */ winFile hFile; /* File handle from winOpen */ - HANDLE hMap; /* File handle from CreateFileMapping */ + + int szRegion; /* Size of shared-memory regions */ + int nRegion; /* Size of array apRegion */ + struct ShmRegion { + HANDLE hMap; /* File handle from CreateFileMapping */ + void *pMap; + } *aRegion; DWORD lastErrno; /* The Windows errno from the last I/O error */ - int szMap; /* Size of the mapping of file into memory */ - char *pMMapBuf; /* Where currently mmapped(). NULL if unmapped */ + int nRef; /* Number of winShm objects pointing to this */ winShm *pFirst; /* All winShm objects pointing to this */ winShmNode *pNext; /* Next in list of all winShmNode objects */ @@ -1325,19 +1329,18 @@ static void winShmPurge(sqlite3_vfs *pVfs, int deleteFlag){ pp = &winShmNodeList; while( (p = *pp)!=0 ){ if( p->nRef==0 ){ + int i; if( p->mutex ) sqlite3_mutex_free(p->mutex); - if( p->mutexBuf ) sqlite3_mutex_free(p->mutexBuf); - if( p->pMMapBuf ){ - UnmapViewOfFile(p->pMMapBuf); - } - if( INVALID_HANDLE_VALUE != p->hMap ){ - CloseHandle(p->hMap); + for(i=0; i<p->nRegion; i++){ + UnmapViewOfFile(p->aRegion[i].pMap); + CloseHandle(p->aRegion[i].hMap); } if( p->hFile.h != INVALID_HANDLE_VALUE ) { winClose((sqlite3_file *)&p->hFile); } if( deleteFlag ) winDelete(pVfs, p->zFilename, 0); *pp = p->pNext; + sqlite3_free(p->aRegion); sqlite3_free(p); }else{ pp = &p->pNext; @@ -1404,8 +1407,6 @@ static int winShmOpen( }else{ pShmNode = pNew; pNew = 0; - pShmNode->pMMapBuf = NULL; - pShmNode->hMap = INVALID_HANDLE_VALUE; ((winFile*)(&pShmNode->hFile))->h = INVALID_HANDLE_VALUE; pShmNode->pNext = winShmNodeList; winShmNodeList = pShmNode; @@ -1415,11 +1416,6 @@ static int winShmOpen( rc = SQLITE_NOMEM; goto shm_open_err; } - pShmNode->mutexBuf = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); - if( pShmNode->mutexBuf==0 ){ - rc = SQLITE_NOMEM; - goto shm_open_err; - } rc = winOpen(pDbFd->pVfs, pShmNode->zFilename, /* Name of the file (UTF-8) */ (sqlite3_file*)&pShmNode->hFile, /* File handle here */ @@ -1507,171 +1503,113 @@ static int winShmClose( } /* -** Increase the size of the underlying storage for a shared-memory segment. +** This function is called to obtain a pointer to region iRegion of the +** shared-memory associated with the database file fd. Shared-memory regions +** are numbered starting from zero. Each shared-memory region is szRegion +** bytes in size. ** -** The reqSize parameter is the new requested minimum size of the underlying -** shared memory. This routine may choose to make the shared memory larger -** than this value (for example to round the shared memory size up to an -** operating-system dependent page size.) +** If an error occurs, an error code is returned and *pp is set to NULL. ** -** This routine will only grow the size of shared memory. A request for -** a smaller size is a no-op. -*/ -static int winShmSize( - sqlite3_file *fd, /* Database holding the shared memory */ - int reqSize, /* Requested size. -1 for query only */ - int *pNewSize /* Write new size here */ +** Otherwise, if the isWrite parameter is 0 and the requested shared-memory +** region has not been allocated (by any client, including one running in a +** separate process), then *pp is set to NULL and SQLITE_OK returned. If +** isWrite is non-zero and the requested shared-memory region has not yet +** been allocated, it is allocated by this function. +** +** If the shared-memory region has already been allocated or is allocated by +** this call as described above, then it is mapped into this processes +** address space (if it is not already), *pp is set to point to the mapped +** memory and SQLITE_OK returned. +*/ +static int winShmMap( + sqlite3_file *fd, /* Handle open on database file */ + int iRegion, /* Region to retrieve */ + int szRegion, /* Size of regions */ + int isWrite, /* True to extend file if necessary */ + void volatile **pp /* OUT: Mapped memory */ ){ winFile *pDbFd = (winFile*)fd; winShm *p = pDbFd->pShm; winShmNode *pShmNode = p->pShmNode; int rc = SQLITE_OK; - *pNewSize = 0; - if( reqSize>=0 ){ - sqlite3_int64 sz; - rc = winFileSize((sqlite3_file *)&pShmNode->hFile, &sz); - if( SQLITE_OK==rc && reqSize>sz ){ - rc = winTruncate((sqlite3_file *)&pShmNode->hFile, reqSize); - } - } - if( SQLITE_OK==rc ){ - sqlite3_int64 sz; + sqlite3_mutex_enter(pShmNode->mutex); + assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 ); + + if( pShmNode->nRegion<=iRegion ){ + struct ShmRegion *apNew; /* New aRegion[] array */ + int nByte = (iRegion+1)*szRegion; /* Minimum required file size */ + sqlite3_int64 sz; /* Current size of wal-index file */ + + pShmNode->szRegion = szRegion; + + /* The requested region is not mapped into this processes address space. + ** Check to see if it has been allocated (i.e. if the wal-index file is + ** large enough to contain the requested region). + */ rc = winFileSize((sqlite3_file *)&pShmNode->hFile, &sz); - if( SQLITE_OK==rc ){ - *pNewSize = (int)sz; - }else{ - rc = SQLITE_IOERR; + if( rc!=SQLITE_OK ){ + goto shmpage_out; } - } - return rc; -} + if( sz<nByte ){ + /* The requested memory region does not exist. If isWrite is set to + ** zero, exit early. *pp will be set to NULL and SQLITE_OK returned. + ** + ** Alternatively, if isWrite is non-zero, use ftruncate() to allocate + ** the requested memory region. + */ + if( !isWrite ) goto shmpage_out; + rc = winTruncate((sqlite3_file *)&pShmNode->hFile, nByte); + if( rc!=SQLITE_OK ){ + goto shmpage_out; + } + } -/* -** Map the shared storage into memory. The minimum size of the -** mapping should be reqMapSize if reqMapSize is positive. If -** reqMapSize is zero or negative, the implementation can choose -** whatever mapping size is convenient. -** -** *ppBuf is made to point to the memory which is a mapping of the -** underlying storage. A mutex is acquired to prevent other threads -** from running while *ppBuf is in use in order to prevent other threads -** remapping *ppBuf out from under this thread. The winShmRelease() -** call will release the mutex. However, if the lock state is CHECKPOINT, -** the mutex is not acquired because CHECKPOINT will never remap the -** buffer. RECOVER might remap, though, so CHECKPOINT will acquire -** the mutex if and when it promotes to RECOVER. -** -** RECOVER needs to be atomic. The same mutex that prevents *ppBuf from -** being remapped also prevents more than one thread from being in -** RECOVER at a time. But, RECOVER sometimes wants to remap itself. -** To prevent RECOVER from losing its lock while remapping, the -** mutex is not released by winShmRelease() when in RECOVER. -** -** *pNewMapSize is set to the size of the mapping. -** -** *ppBuf and *pNewMapSize might be NULL and zero if no space has -** yet been allocated to the underlying storage. -*/ -static int winShmGet( - sqlite3_file *fd, /* The database file holding the shared memory */ - int reqMapSize, /* Requested size of mapping. -1 means don't care */ - int *pNewMapSize, /* Write new size of mapping here */ - void volatile **ppBuf /* Write mapping buffer origin here */ -){ - winFile *pDbFd = (winFile*)fd; - winShm *p = pDbFd->pShm; - winShmNode *pShmNode = p->pShmNode; - int rc = SQLITE_OK; - - if( p->hasMutexBuf==0 ){ - assert( sqlite3_mutex_notheld(pShmNode->mutex) ); - sqlite3_mutex_enter(pShmNode->mutexBuf); - p->hasMutexBuf = 1; - } - sqlite3_mutex_enter(pShmNode->mutex); - if( pShmNode->szMap==0 || reqMapSize>pShmNode->szMap ){ - int actualSize; - if( winShmSize(fd, -1, &actualSize)==SQLITE_OK - && reqMapSize<actualSize - ){ - reqMapSize = actualSize; + /* Map the requested memory region into this processes address space. */ + apNew = (struct ShmRegion *)sqlite3_realloc( + pShmNode->aRegion, (iRegion+1)*sizeof(apNew[0]) + ); + if( !apNew ){ + rc = SQLITE_IOERR_NOMEM; + goto shmpage_out; } - if( pShmNode->pMMapBuf ){ - if( !UnmapViewOfFile(pShmNode->pMMapBuf) ){ + pShmNode->aRegion = apNew; + + while( pShmNode->nRegion<=iRegion ){ + HANDLE hMap; /* file-mapping handle */ + void *pMap = 0; /* Mapped memory region */ + + hMap = CreateFileMapping(pShmNode->hFile.h, + NULL, PAGE_READWRITE, 0, nByte, NULL + ); + if( hMap ){ + pMap = MapViewOfFile(hMap, FILE_MAP_WRITE | FILE_MAP_READ, + 0, 0, nByte + ); + } + if( !pMap ){ pShmNode->lastErrno = GetLastError(); rc = SQLITE_IOERR; + if( hMap ) CloseHandle(hMap); + goto shmpage_out; } - CloseHandle(pShmNode->hMap); - pShmNode->hMap = INVALID_HANDLE_VALUE; - } - if( SQLITE_OK == rc ){ - pShmNode->pMMapBuf = 0; - if( reqMapSize == 0 ){ - /* can't create 0 byte file mapping in Windows */ - pShmNode->szMap = 0; - }else{ - /* create the file mapping object */ - if( INVALID_HANDLE_VALUE == pShmNode->hMap ){ - /* TBD provide an object name to each file - ** mapping so it can be re-used across processes. - */ - pShmNode->hMap = CreateFileMapping(pShmNode->hFile.h, - NULL, - PAGE_READWRITE, - 0, - reqMapSize, - NULL); - } - if( NULL==pShmNode->hMap ){ - pShmNode->lastErrno = GetLastError(); - rc = SQLITE_IOERR; - pShmNode->szMap = 0; - pShmNode->hMap = INVALID_HANDLE_VALUE; - }else{ - pShmNode->pMMapBuf = MapViewOfFile(pShmNode->hMap, - FILE_MAP_WRITE | FILE_MAP_READ, - 0, - 0, - reqMapSize); - if( !pShmNode->pMMapBuf ){ - pShmNode->lastErrno = GetLastError(); - rc = SQLITE_IOERR; - pShmNode->szMap = 0; - }else{ - pShmNode->szMap = reqMapSize; - } - } - } + + pShmNode->aRegion[pShmNode->nRegion].pMap = pMap; + pShmNode->aRegion[pShmNode->nRegion].hMap = hMap; + pShmNode->nRegion++; } } - *pNewMapSize = pShmNode->szMap; - *ppBuf = pShmNode->pMMapBuf; - sqlite3_mutex_leave(pShmNode->mutex); - return rc; -} -/* -** Release the lock held on the shared memory segment so that other -** threads are free to resize it if necessary. -** -** If the lock is not currently held, this routine is a harmless no-op. -** -** If the shared-memory object is in lock state RECOVER, then we do not -** really want to release the lock, so in that case too, this routine -** is a no-op. -*/ -static int winShmRelease(sqlite3_file *fd){ - winFile *pDbFd = (winFile*)fd; - winShm *p = pDbFd->pShm; - if( p->hasMutexBuf ){ - winShmNode *pShmNode = p->pShmNode; - assert( sqlite3_mutex_notheld(pShmNode->mutex) ); - sqlite3_mutex_leave(pShmNode->mutexBuf); - p->hasMutexBuf = 0; +shmpage_out: + if( pShmNode->nRegion>iRegion ){ + char *p = (char *)pShmNode->aRegion[iRegion].pMap; + *pp = (void *)&p[iRegion*szRegion]; + }else{ + *pp = 0; } - return SQLITE_OK; + sqlite3_mutex_leave(pShmNode->mutex); + return rc; } /* @@ -1756,12 +1694,10 @@ static const sqlite3_io_methods winIoMethod = { winSectorSize, winDeviceCharacteristics, winShmOpen, /* xShmOpen */ - winShmSize, /* xShmSize */ - winShmGet, /* xShmGet */ - winShmRelease, /* xShmRelease */ winShmLock, /* xShmLock */ winShmBarrier, /* xShmBarrier */ - winShmClose /* xShmClose */ + winShmClose, /* xShmClose */ + winShmMap /* xShmMap */ }; /*************************************************************************** diff --git a/src/sqlite.h.in b/src/sqlite.h.in index e583e47d9..f6ec12b98 100644 --- a/src/sqlite.h.in +++ b/src/sqlite.h.in @@ -660,12 +660,10 @@ struct sqlite3_io_methods { int (*xDeviceCharacteristics)(sqlite3_file*); /* Methods above are valid for version 1 */ int (*xShmOpen)(sqlite3_file*); - int (*xShmSize)(sqlite3_file*, int reqSize, int *pNewSize); - int (*xShmGet)(sqlite3_file*, int reqSize, int *pSize, void volatile**); - int (*xShmRelease)(sqlite3_file*); int (*xShmLock)(sqlite3_file*, int offset, int n, int flags); void (*xShmBarrier)(sqlite3_file*); int (*xShmClose)(sqlite3_file*, int deleteFlag); + int (*xShmMap)(sqlite3_file*, int iPage, int pgsz, int, void volatile**); /* Methods above are valid for version 2 */ /* Additional methods may be added in future releases */ }; diff --git a/src/test6.c b/src/test6.c index 1dded82ef..d6e6db2c1 100644 --- a/src/test6.c +++ b/src/test6.c @@ -526,20 +526,6 @@ static int cfDeviceCharacteristics(sqlite3_file *pFile){ static int cfShmOpen(sqlite3_file *pFile){ return sqlite3OsShmOpen(((CrashFile*)pFile)->pRealFile); } -static int cfShmSize(sqlite3_file *pFile, int reqSize, int *pNew){ - return sqlite3OsShmSize(((CrashFile*)pFile)->pRealFile, reqSize, pNew); -} -static int cfShmGet( - sqlite3_file *pFile, - int reqSize, - int *pSize, - void volatile **pp -){ - return sqlite3OsShmGet(((CrashFile*)pFile)->pRealFile, reqSize, pSize, pp); -} -static int cfShmRelease(sqlite3_file *pFile){ - return sqlite3OsShmRelease(((CrashFile*)pFile)->pRealFile); -} static int cfShmLock(sqlite3_file *pFile, int ofst, int n, int flags){ return sqlite3OsShmLock(((CrashFile*)pFile)->pRealFile, ofst, n, flags); } @@ -549,7 +535,15 @@ static void cfShmBarrier(sqlite3_file *pFile){ static int cfShmClose(sqlite3_file *pFile, int delFlag){ return sqlite3OsShmClose(((CrashFile*)pFile)->pRealFile, delFlag); } - +static int cfShmMap( + sqlite3_file *pFile, /* Handle open on database file */ + int iRegion, /* Region to retrieve */ + int sz, /* Size of regions */ + int w, /* True to extend file if necessary */ + void volatile **pp /* OUT: Mapped memory */ +){ + return sqlite3OsShmMap(((CrashFile*)pFile)->pRealFile, iRegion, sz, w, pp); +} static const sqlite3_io_methods CrashFileVtab = { 2, /* iVersion */ @@ -566,12 +560,10 @@ static const sqlite3_io_methods CrashFileVtab = { cfSectorSize, /* xSectorSize */ cfDeviceCharacteristics, /* xDeviceCharacteristics */ cfShmOpen, /* xShmOpen */ - cfShmSize, /* xShmSize */ - cfShmGet, /* xShmGet */ - cfShmRelease, /* xShmRelease */ cfShmLock, /* xShmLock */ cfShmBarrier, /* xShmBarrier */ - cfShmClose /* xShmClose */ + cfShmClose, /* xShmClose */ + cfShmMap /* xShmMap */ }; /* diff --git a/src/test_devsym.c b/src/test_devsym.c index 046480493..98d6e2a30 100644 --- a/src/test_devsym.c +++ b/src/test_devsym.c @@ -51,12 +51,10 @@ static int devsymFileControl(sqlite3_file*, int op, void *pArg); static int devsymSectorSize(sqlite3_file*); static int devsymDeviceCharacteristics(sqlite3_file*); static int devsymShmOpen(sqlite3_file*); -static int devsymShmSize(sqlite3_file*,int,int*); -static int devsymShmGet(sqlite3_file*,int,int*,volatile void**); -static int devsymShmRelease(sqlite3_file*); static int devsymShmLock(sqlite3_file*,int,int,int); static void devsymShmBarrier(sqlite3_file*); static int devsymShmClose(sqlite3_file*,int); +static int devsymShmMap(sqlite3_file*,int,int,int, void volatile **); /* ** Method declarations for devsym_vfs. @@ -120,12 +118,10 @@ static sqlite3_io_methods devsym_io_methods = { devsymSectorSize, /* xSectorSize */ devsymDeviceCharacteristics, /* xDeviceCharacteristics */ devsymShmOpen, /* xShmOpen */ - devsymShmSize, /* xShmSize */ - devsymShmGet, /* xShmGet */ - devsymShmRelease, /* xShmRelease */ devsymShmLock, /* xShmLock */ devsymShmBarrier, /* xShmBarrier */ - devsymShmClose /* xShmClose */ + devsymShmClose, /* xShmClose */ + devsymShmMap /* xShmMap */ }; struct DevsymGlobal { @@ -246,23 +242,6 @@ static int devsymShmOpen(sqlite3_file *pFile){ devsym_file *p = (devsym_file *)pFile; return sqlite3OsShmOpen(p->pReal); } -static int devsymShmSize(sqlite3_file *pFile, int reqSize, int *pSize){ - devsym_file *p = (devsym_file *)pFile; - return sqlite3OsShmSize(p->pReal, reqSize, pSize); -} -static int devsymShmGet( - sqlite3_file *pFile, - int reqSz, - int *pSize, - void volatile **pp -){ - devsym_file *p = (devsym_file *)pFile; - return sqlite3OsShmGet(p->pReal, reqSz, pSize, pp); -} -static int devsymShmRelease(sqlite3_file *pFile){ - devsym_file *p = (devsym_file *)pFile; - return sqlite3OsShmRelease(p->pReal); -} static int devsymShmLock(sqlite3_file *pFile, int ofst, int n, int flags){ devsym_file *p = (devsym_file *)pFile; return sqlite3OsShmLock(p->pReal, ofst, n, flags); @@ -275,6 +254,16 @@ static int devsymShmClose(sqlite3_file *pFile, int delFlag){ devsym_file *p = (devsym_file *)pFile; return sqlite3OsShmClose(p->pReal, delFlag); } +static int devsymShmMap( + sqlite3_file *pFile, + int iRegion, + int szRegion, + int isWrite, + void volatile **pp +){ + devsym_file *p = (devsym_file *)pFile; + return sqlite3OsShmMap(p->pReal, iRegion, szRegion, isWrite, pp); +} diff --git a/src/test_osinst.c b/src/test_osinst.c index b60f84a57..f97822a1a 100644 --- a/src/test_osinst.c +++ b/src/test_osinst.c @@ -100,11 +100,9 @@ #define OS_WRITE 20 #define OS_SHMOPEN 21 #define OS_SHMCLOSE 22 -#define OS_SHMGET 23 -#define OS_SHMRELEASE 24 +#define OS_SHMMAP 23 #define OS_SHMLOCK 25 #define OS_SHMBARRIER 26 -#define OS_SHMSIZE 27 #define OS_ANNOTATE 28 #define OS_NUMEVENTS 29 @@ -152,12 +150,10 @@ static int vfslogSectorSize(sqlite3_file*); static int vfslogDeviceCharacteristics(sqlite3_file*); static int vfslogShmOpen(sqlite3_file *pFile); -static int vfslogShmSize(sqlite3_file *pFile, int reqSize, int *pNewSize); -static int vfslogShmGet(sqlite3_file *pFile, int,int*,volatile void **); -static int vfslogShmRelease(sqlite3_file *pFile); static int vfslogShmLock(sqlite3_file *pFile, int ofst, int n, int flags); static void vfslogShmBarrier(sqlite3_file*); static int vfslogShmClose(sqlite3_file *pFile, int deleteFlag); +static int vfslogShmMap(sqlite3_file *pFile,int,int,int,volatile void **); /* ** Method declarations for vfslog_vfs. @@ -216,12 +212,10 @@ static sqlite3_io_methods vfslog_io_methods = { vfslogSectorSize, /* xSectorSize */ vfslogDeviceCharacteristics, /* xDeviceCharacteristics */ vfslogShmOpen, /* xShmOpen */ - vfslogShmSize, /* xShmSize */ - vfslogShmGet, /* xShmGet */ - vfslogShmRelease, /* xShmRelease */ vfslogShmLock, /* xShmLock */ vfslogShmBarrier, /* xShmBarrier */ - vfslogShmClose /* xShmClose */ + vfslogShmClose, /* xShmClose */ + vfslogShmMap /* xShmMap */ }; #if defined(SQLITE_OS_UNIX) && !defined(NO_GETTOD) @@ -441,41 +435,6 @@ static int vfslogShmOpen(sqlite3_file *pFile){ vfslog_call(p->pVfslog, OS_SHMOPEN, p->iFileId, t, rc, 0, 0); return rc; } -static int vfslogShmSize(sqlite3_file *pFile, int reqSize, int *pNewSize){ - int rc; - sqlite3_uint64 t; - VfslogFile *p = (VfslogFile *)pFile; - t = vfslog_time(); - rc = p->pReal->pMethods->xShmSize(p->pReal, reqSize, pNewSize); - t = vfslog_time() - t; - vfslog_call(p->pVfslog, OS_SHMSIZE, p->iFileId, t, rc, 0, 0); - return rc; -} -static int vfslogShmGet( - sqlite3_file *pFile, - int req, - int *pSize, - volatile void **pp -){ - int rc; - sqlite3_uint64 t; - VfslogFile *p = (VfslogFile *)pFile; - t = vfslog_time(); - rc = p->pReal->pMethods->xShmGet(p->pReal, req, pSize, pp); - t = vfslog_time() - t; - vfslog_call(p->pVfslog, OS_SHMGET, p->iFileId, t, rc, 0, 0); - return rc; -} -static int vfslogShmRelease(sqlite3_file *pFile){ - int rc; - sqlite3_uint64 t; - VfslogFile *p = (VfslogFile *)pFile; - t = vfslog_time(); - rc = p->pReal->pMethods->xShmRelease(p->pReal); - t = vfslog_time() - t; - vfslog_call(p->pVfslog, OS_SHMRELEASE, p->iFileId, t, rc, 0, 0); - return rc; -} static int vfslogShmLock(sqlite3_file *pFile, int ofst, int n, int flags){ int rc; sqlite3_uint64 t; @@ -504,6 +463,22 @@ static int vfslogShmClose(sqlite3_file *pFile, int deleteFlag){ vfslog_call(p->pVfslog, OS_SHMCLOSE, p->iFileId, t, rc, 0, 0); return rc; } +static int vfslogShmMap( + sqlite3_file *pFile, + int iRegion, + int szRegion, + int isWrite, + volatile void **pp +){ + int rc; + sqlite3_uint64 t; + VfslogFile *p = (VfslogFile *)pFile; + t = vfslog_time(); + rc = p->pReal->pMethods->xShmMap(p->pReal, iRegion, szRegion, isWrite, pp); + t = vfslog_time() - t; + vfslog_call(p->pVfslog, OS_SHMMAP, p->iFileId, t, rc, 0, 0); + return rc; +} /* @@ -826,11 +801,9 @@ static const char *vfslog_eventname(int eEvent){ case OS_SHMCLOSE: zEvent = "xShmClose"; break; case OS_SHMOPEN: zEvent = "xShmOpen"; break; - case OS_SHMGET: zEvent = "xShmGet"; break; - case OS_SHMSIZE: zEvent = "xShmSize"; break; - case OS_SHMRELEASE: zEvent = "xShmRelease"; break; case OS_SHMLOCK: zEvent = "xShmLock"; break; case OS_SHMBARRIER: zEvent = "xShmBarrier"; break; + case OS_SHMMAP: zEvent = "xShmMap"; break; case OS_ANNOTATE: zEvent = "annotation"; break; } diff --git a/src/test_vfs.c b/src/test_vfs.c index 1083080eb..89cc842ca 100644 --- a/src/test_vfs.c +++ b/src/test_vfs.c @@ -69,16 +69,17 @@ struct Testvfs { ** + Invoking the Tcl callback script. */ #define TESTVFS_SHMOPEN_MASK 0x00000001 -#define TESTVFS_SHMSIZE_MASK 0x00000002 -#define TESTVFS_SHMGET_MASK 0x00000004 -#define TESTVFS_SHMRELEASE_MASK 0x00000008 #define TESTVFS_SHMLOCK_MASK 0x00000010 #define TESTVFS_SHMBARRIER_MASK 0x00000020 #define TESTVFS_SHMCLOSE_MASK 0x00000040 +#define TESTVFS_SHMPAGE_MASK 0x00000080 -#define TESTVFS_OPEN_MASK 0x00000080 -#define TESTVFS_SYNC_MASK 0x00000100 -#define TESTVFS_ALL_MASK 0x000001FF +#define TESTVFS_OPEN_MASK 0x00000100 +#define TESTVFS_SYNC_MASK 0x00000200 +#define TESTVFS_ALL_MASK 0x000003FF + + +#define TESTVFS_MAX_PAGES 256 /* ** A shared-memory buffer. There is one of these objects for each shared @@ -87,8 +88,8 @@ struct Testvfs { */ struct TestvfsBuffer { char *zFile; /* Associated file name */ - int n; /* Size of allocated buffer in bytes */ - u8 *a; /* Buffer allocated using ckalloc() */ + int pgsz; /* Page size */ + u8 *aPage[TESTVFS_MAX_PAGES]; /* Array of ckalloc'd pages */ TestvfsFile *pFile; /* List of open handles */ TestvfsBuffer *pNext; /* Next in linked list of all buffers */ }; @@ -133,12 +134,10 @@ static int tvfsSleep(sqlite3_vfs*, int microseconds); static int tvfsCurrentTime(sqlite3_vfs*, double*); static int tvfsShmOpen(sqlite3_file*); -static int tvfsShmSize(sqlite3_file*, int , int *); -static int tvfsShmGet(sqlite3_file*, int , int *, volatile void **); -static int tvfsShmRelease(sqlite3_file*); static int tvfsShmLock(sqlite3_file*, int , int, int); static void tvfsShmBarrier(sqlite3_file*); static int tvfsShmClose(sqlite3_file*, int); +static int tvfsShmPage(sqlite3_file*,int,int,int, void volatile **); static sqlite3_io_methods tvfs_io_methods = { 2, /* iVersion */ @@ -155,12 +154,10 @@ static sqlite3_io_methods tvfs_io_methods = { tvfsSectorSize, /* xSectorSize */ tvfsDeviceCharacteristics, /* xDeviceCharacteristics */ tvfsShmOpen, /* xShmOpen */ - tvfsShmSize, /* xShmSize */ - tvfsShmGet, /* xShmGet */ - tvfsShmRelease, /* xShmRelease */ tvfsShmLock, /* xShmLock */ tvfsShmBarrier, /* xShmBarrier */ - tvfsShmClose /* xShmClose */ + tvfsShmClose, /* xShmClose */ + tvfsShmPage /* xShmPage */ }; static int tvfsResultCode(Testvfs *p, int *pRc){ @@ -443,12 +440,10 @@ static int tvfsOpen( memcpy(pMethods, &tvfs_io_methods, sizeof(sqlite3_io_methods)); if( ((Testvfs *)pVfs->pAppData)->isNoshm ){ pMethods->xShmOpen = 0; - pMethods->xShmGet = 0; - pMethods->xShmSize = 0; - pMethods->xShmRelease = 0; pMethods->xShmClose = 0; pMethods->xShmLock = 0; pMethods->xShmBarrier = 0; + pMethods->xShmMap = 0; } pFile->pMethods = pMethods; } @@ -547,16 +542,6 @@ static int tvfsCurrentTime(sqlite3_vfs *pVfs, double *pTimeOut){ return PARENTVFS(pVfs)->xCurrentTime(PARENTVFS(pVfs), pTimeOut); } -static void tvfsGrowBuffer(TestvfsFile *pFd, int reqSize, int *pNewSize){ - TestvfsBuffer *pBuffer = pFd->pShm; - if( reqSize>pBuffer->n ){ - pBuffer->a = (u8 *)ckrealloc((char *)pBuffer->a, reqSize); - memset(&pBuffer->a[pBuffer->n], 0x55, reqSize-pBuffer->n); - pBuffer->n = reqSize; - } - *pNewSize = pBuffer->n; -} - static int tvfsInjectIoerr(Testvfs *p){ int ret = 0; if( p->ioerr ){ @@ -619,71 +604,51 @@ static int tvfsShmOpen( return SQLITE_OK; } -static int tvfsShmSize( - sqlite3_file *pFile, - int reqSize, - int *pNewSize -){ - int rc = SQLITE_OK; - TestvfsFile *pFd = (TestvfsFile *)pFile; - Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData); - - if( p->pScript && p->mask&TESTVFS_SHMSIZE_MASK ){ - tvfsExecTcl(p, "xShmSize", - Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId, 0 - ); - tvfsResultCode(p, &rc); - } - if( rc==SQLITE_OK && p->mask&TESTVFS_SHMSIZE_MASK && tvfsInjectIoerr(p) ){ - rc = SQLITE_IOERR; - } - if( rc==SQLITE_OK ){ - tvfsGrowBuffer(pFd, reqSize, pNewSize); +static void tvfsAllocPage(TestvfsBuffer *p, int iPage, int pgsz){ + assert( iPage<TESTVFS_MAX_PAGES ); + if( p->aPage[iPage]==0 ){ + p->aPage[iPage] = (u8 *)ckalloc(pgsz); + memset(p->aPage[iPage], 0, pgsz); + p->pgsz = pgsz; } - return rc; } -static int tvfsShmGet( - sqlite3_file *pFile, - int reqMapSize, - int *pMapSize, - volatile void **pp +static int tvfsShmPage( + sqlite3_file *pFile, /* Handle open on database file */ + int iPage, /* Page to retrieve */ + int pgsz, /* Size of pages */ + int isWrite, /* True to extend file if necessary */ + void volatile **pp /* OUT: Mapped memory */ ){ int rc = SQLITE_OK; TestvfsFile *pFd = (TestvfsFile *)pFile; Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData); - if( p->pScript && p->mask&TESTVFS_SHMGET_MASK ){ - tvfsExecTcl(p, "xShmGet", - Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId, - Tcl_NewIntObj(reqMapSize) + if( p->pScript && p->mask&TESTVFS_SHMPAGE_MASK ){ + Tcl_Obj *pArg = Tcl_NewObj(); + Tcl_IncrRefCount(pArg); + Tcl_ListObjAppendElement(p->interp, pArg, Tcl_NewIntObj(iPage)); + Tcl_ListObjAppendElement(p->interp, pArg, Tcl_NewIntObj(pgsz)); + Tcl_ListObjAppendElement(p->interp, pArg, Tcl_NewIntObj(isWrite)); + tvfsExecTcl(p, "xShmPage", + Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId, pArg ); tvfsResultCode(p, &rc); + Tcl_DecrRefCount(pArg); } - if( rc==SQLITE_OK && p->mask&TESTVFS_SHMGET_MASK && tvfsInjectIoerr(p) ){ + if( rc==SQLITE_OK && p->mask&TESTVFS_SHMPAGE_MASK && tvfsInjectIoerr(p) ){ rc = SQLITE_IOERR; } - *pMapSize = pFd->pShm->n; - *pp = pFd->pShm->a; - return rc; -} - -static int tvfsShmRelease(sqlite3_file *pFile){ - int rc = SQLITE_OK; - TestvfsFile *pFd = (TestvfsFile *)pFile; - Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData); - - if( p->pScript && p->mask&TESTVFS_SHMRELEASE_MASK ){ - tvfsExecTcl(p, "xShmRelease", - Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId, 0 - ); - tvfsResultCode(p, &rc); + if( rc==SQLITE_OK && isWrite && !pFd->pShm->aPage[iPage] ){ + tvfsAllocPage(pFd->pShm, iPage, pgsz); } + *pp = (void volatile *)pFd->pShm->aPage[iPage]; return rc; } + static int tvfsShmLock( sqlite3_file *pFile, int ofst, @@ -782,10 +747,13 @@ static int tvfsShmClose( *ppFd = pFd->pNext; if( pBuffer->pFile==0 ){ + int i; TestvfsBuffer **pp; for(pp=&p->pBuffer; *pp!=pBuffer; pp=&((*pp)->pNext)); *pp = (*pp)->pNext; - ckfree((char *)pBuffer->a); + for(i=0; pBuffer->aPage[i]; i++){ + ckfree((char *)pBuffer->aPage[i]); + } ckfree((char *)pBuffer); } pFd->pShm = 0; @@ -821,28 +789,46 @@ static int testvfs_obj_cmd( switch( (enum DB_enum)i ){ case CMD_SHM: { + Tcl_Obj *pObj; + int i; TestvfsBuffer *pBuffer; char *zName; if( objc!=3 && objc!=4 ){ Tcl_WrongNumArgs(interp, 2, objv, "FILE ?VALUE?"); return TCL_ERROR; } - zName = Tcl_GetString(objv[2]); + zName = ckalloc(p->pParent->mxPathname); + p->pParent->xFullPathname( + p->pParent, Tcl_GetString(objv[2]), + p->pParent->mxPathname, zName + ); for(pBuffer=p->pBuffer; pBuffer; pBuffer=pBuffer->pNext){ if( 0==strcmp(pBuffer->zFile, zName) ) break; } + ckfree(zName); if( !pBuffer ){ - Tcl_AppendResult(interp, "no such file: ", zName, 0); + Tcl_AppendResult(interp, "no such file: ", Tcl_GetString(objv[2]), 0); return TCL_ERROR; } if( objc==4 ){ int n; u8 *a = Tcl_GetByteArrayFromObj(objv[3], &n); - pBuffer->a = (u8 *)ckrealloc((char *)pBuffer->a, n); - pBuffer->n = n; - memcpy(pBuffer->a, a, n); + assert( pBuffer->pgsz==0 || pBuffer->pgsz==32768 ); + for(i=0; i*32768<n; i++){ + int nByte = 32768; + tvfsAllocPage(pBuffer, i, 32768); + if( n-i*32768<32768 ){ + nByte = n; + } + memcpy(pBuffer->aPage[i], &a[i*32768], nByte); + } } - Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(pBuffer->a, pBuffer->n)); + + pObj = Tcl_NewObj(); + for(i=0; pBuffer->aPage[i]; i++){ + Tcl_AppendObjToObj(pObj, Tcl_NewByteArrayObj(pBuffer->aPage[i], 32768)); + } + Tcl_SetObjResult(interp, pObj); break; } @@ -852,12 +838,10 @@ static int testvfs_obj_cmd( int mask; } vfsmethod [] = { { "xShmOpen", TESTVFS_SHMOPEN_MASK }, - { "xShmSize", TESTVFS_SHMSIZE_MASK }, - { "xShmGet", TESTVFS_SHMGET_MASK }, - { "xShmRelease", TESTVFS_SHMRELEASE_MASK }, { "xShmLock", TESTVFS_SHMLOCK_MASK }, { "xShmBarrier", TESTVFS_SHMBARRIER_MASK }, { "xShmClose", TESTVFS_SHMCLOSE_MASK }, + { "xShmPage", TESTVFS_SHMPAGE_MASK }, { "xSync", TESTVFS_SYNC_MASK }, { "xOpen", TESTVFS_OPEN_MASK }, }; @@ -899,6 +883,7 @@ static int testvfs_obj_cmd( ckfree((char *)p->apScript); p->apScript = 0; p->nScript = 0; + p->pScript = 0; } Tcl_GetStringFromObj(objv[2], &nByte); if( nByte>0 ){ @@ -1071,6 +1056,13 @@ static int testvfs_cmd( p = (Testvfs *)ckalloc(nByte); memset(p, 0, nByte); + /* Create the new object command before querying SQLite for a default VFS + ** to use for 'real' IO operations. This is because creating the new VFS + ** may delete an existing [testvfs] VFS of the same name. If such a VFS + ** is currently the default, the new [testvfs] may end up calling the + ** methods of a deleted object. + */ + Tcl_CreateObjCommand(interp, zVfs, testvfs_obj_cmd, p, testvfs_obj_del); p->pParent = sqlite3_vfs_find(0); p->interp = interp; @@ -1087,7 +1079,6 @@ static int testvfs_cmd( p->isNoshm = isNoshm; p->mask = TESTVFS_ALL_MASK; - Tcl_CreateObjCommand(interp, zVfs, testvfs_obj_cmd, p, testvfs_obj_del); sqlite3_vfs_register(pVfs, isDefault); return TCL_OK; @@ -141,21 +141,33 @@ ** more index blocks. ** ** The wal-index header contains the total number of frames within the WAL -** in the the mxFrame field. Each index block contains information on -** HASHTABLE_NPAGE frames. Each index block contains two sections, a -** mapping which is a database page number for each frame, and a hash -** table used to look up frames by page number. The mapping section is -** an array of HASHTABLE_NPAGE 32-bit page numbers. The first entry on the -** array is the page number for the first frame; the second entry is the -** page number for the second frame; and so forth. The last index block -** holds a total of (mxFrame%HASHTABLE_NPAGE) page numbers. All index -** blocks other than the last are completely full with HASHTABLE_NPAGE -** page numbers. All index blocks are the same size; the mapping section -** of the last index block merely contains unused entries if mxFrame is -** not an even multiple of HASHTABLE_NPAGE. +** in the the mxFrame field. +** +** Each index block except for the first contains information on +** HASHTABLE_NPAGE frames. The first index block contains information on +** HASHTABLE_NPAGE_ONE frames. The values of HASHTABLE_NPAGE_ONE and +** HASHTABLE_NPAGE are selected so that together the wal-index header and +** first index block are the same size as all other index blocks in the +** wal-index. +** +** Each index block contains two sections, a page-mapping that contains the +** database page number associated with each wal frame, and a hash-table +** that allows users to query an index block for a specific page number. +** The page-mapping is an array of HASHTABLE_NPAGE (or HASHTABLE_NPAGE_ONE +** for the first index block) 32-bit page numbers. The first entry in the +** first index-block contains the database page number corresponding to the +** first frame in the WAL file. The first entry in the second index block +** in the WAL file corresponds to the (HASHTABLE_NPAGE_ONE+1)th frame in +** the log, and so on. +** +** The last index block in a wal-index usually contains less than the full +** complement of HASHTABLE_NPAGE (or HASHTABLE_NPAGE_ONE) page-numbers, +** depending on the contents of the WAL file. This does not change the +** allocated size of the page-mapping array - the page-mapping array merely +** contains unused entries. ** ** Even without using the hash table, the last frame for page P -** can be found by scanning the mapping sections of each index block +** can be found by scanning the page-mapping sections of each index block ** starting with the last index block and moving toward the first, and ** within each index block, starting at the end and moving toward the ** beginning. The first entry that equals P corresponds to the frame @@ -370,8 +382,8 @@ struct Wal { sqlite3_file *pDbFd; /* File handle for the database file */ sqlite3_file *pWalFd; /* File handle for WAL file */ u32 iCallback; /* Value to pass to log callback (or 0) */ - int szWIndex; /* Size of the wal-index that is mapped in mem */ - volatile u32 *pWiData; /* Pointer to wal-index content in memory */ + int nWiData; /* Size of array apWiData */ + volatile u32 **apWiData; /* Pointer to wal-index content in memory */ u16 szPage; /* Database page size */ i16 readLock; /* Which read lock is being held. -1 for none */ u8 exclusiveMode; /* Non-zero if connection is in exclusive mode */ @@ -387,13 +399,10 @@ struct Wal { }; /* -** Return a pointer to the WalCkptInfo structure in the wal-index. +** Each page of the wal-index mapping contains a hash-table made up of +** an array of HASHTABLE_NSLOT elements of the following type. */ -static volatile WalCkptInfo *walCkptInfo(Wal *pWal){ - assert( pWal->pWiData!=0 ); - return (volatile WalCkptInfo*)&pWal->pWiData[sizeof(WalIndexHdr)/2]; -} - +typedef u16 ht_slot; /* ** This structure is used to implement an iterator that loops through @@ -411,17 +420,96 @@ static volatile WalCkptInfo *walCkptInfo(Wal *pWal){ ** This functionality is used by the checkpoint code (see walCheckpoint()). */ struct WalIterator { - int iPrior; /* Last result returned from the iterator */ - int nSegment; /* Size of the aSegment[] array */ - int nFinal; /* Elements in aSegment[nSegment-1] */ + int iPrior; /* Last result returned from the iterator */ + int nSegment; /* Size of the aSegment[] array */ struct WalSegment { - int iNext; /* Next slot in aIndex[] not previously returned */ - u8 *aIndex; /* i0, i1, i2... such that aPgno[iN] ascending */ - u32 *aPgno; /* 256 page numbers. Pointer to Wal.pWiData */ - } aSegment[1]; /* One for every 256 entries in the WAL */ + int iNext; /* Next slot in aIndex[] not yet returned */ + ht_slot *aIndex; /* i0, i1, i2... such that aPgno[iN] ascend */ + u32 *aPgno; /* Array of page numbers. */ + int nEntry; /* Max size of aPgno[] and aIndex[] arrays */ + int iZero; /* Frame number associated with aPgno[0] */ + } aSegment[1]; /* One for every 32KB page in the WAL */ }; /* +** Define the parameters of the hash tables in the wal-index file. There +** is a hash-table following every HASHTABLE_NPAGE page numbers in the +** wal-index. +** +** Changing any of these constants will alter the wal-index format and +** create incompatibilities. +*/ +#define HASHTABLE_NPAGE 4096 /* Must be power of 2 */ +#define HASHTABLE_HASH_1 383 /* Should be prime */ +#define HASHTABLE_NSLOT (HASHTABLE_NPAGE*2) /* Must be a power of 2 */ + +/* +** The block of page numbers associated with the first hash-table in a +** wal-index is smaller than usual. This is so that there is a complete +** hash-table on each aligned 32KB page of the wal-index. +*/ +#define HASHTABLE_NPAGE_ONE (HASHTABLE_NPAGE - (WALINDEX_HDR_SIZE/sizeof(u32))) + +/* The wal-index is divided into pages of WALINDEX_PGSZ bytes each. */ +#define WALINDEX_PGSZ ( \ + sizeof(ht_slot)*HASHTABLE_NSLOT + HASHTABLE_NPAGE*sizeof(u32) \ +) + +/* +** Obtain a pointer to the iPage'th page of the wal-index. The wal-index +** is broken into pages of WALINDEX_PGSZ bytes. Wal-index pages are +** numbered from zero. +** +** If this call is successful, *ppPage is set to point to the wal-index +** page and SQLITE_OK is returned. If an error (an OOM or VFS error) occurs, +** then an SQLite error code is returned and *ppPage is set to 0. +*/ +static int walIndexPage(Wal *pWal, int iPage, volatile u32 **ppPage){ + int rc = SQLITE_OK; + + /* Enlarge the pWal->apWiData[] array if required */ + if( pWal->nWiData<=iPage ){ + int nByte = sizeof(u32 *)*(iPage+1); + volatile u32 **apNew; + apNew = (volatile u32 **)sqlite3_realloc(pWal->apWiData, nByte); + if( !apNew ){ + *ppPage = 0; + return SQLITE_NOMEM; + } + memset(&apNew[pWal->nWiData], 0, sizeof(u32 *)*(iPage+1-pWal->nWiData)); + pWal->apWiData = apNew; + pWal->nWiData = iPage+1; + } + + /* Request a pointer to the required page from the VFS */ + if( pWal->apWiData[iPage]==0 ){ + rc = sqlite3OsShmMap(pWal->pDbFd, iPage, WALINDEX_PGSZ, + pWal->writeLock, (void volatile **)&pWal->apWiData[iPage] + ); + } + + *ppPage = pWal->apWiData[iPage]; + assert( iPage==0 || *ppPage || rc!=SQLITE_OK ); + return rc; +} + +/* +** Return a pointer to the WalCkptInfo structure in the wal-index. +*/ +static volatile WalCkptInfo *walCkptInfo(Wal *pWal){ + assert( pWal->nWiData>0 && pWal->apWiData[0] ); + return (volatile WalCkptInfo*)&(pWal->apWiData[0][sizeof(WalIndexHdr)/2]); +} + +/* +** Return a pointer to the WalIndexHdr structure in the wal-index. +*/ +static volatile WalIndexHdr *walIndexHdr(Wal *pWal){ + assert( pWal->nWiData>0 && pWal->apWiData[0] ); + return (volatile WalIndexHdr*)pWal->apWiData[0]; +} + +/* ** The argument to this macro must be of type u32. On a little-endian ** architecture, it returns the u32 value that results from interpreting ** the 4 bytes as a big-endian value. On a big-endian architecture, it @@ -486,16 +574,15 @@ static void walChecksumBytes( ** The checksum on pWal->hdr is updated before it is written. */ static void walIndexWriteHdr(Wal *pWal){ - WalIndexHdr *aHdr; + volatile WalIndexHdr *aHdr = walIndexHdr(pWal); + const int nCksum = offsetof(WalIndexHdr, aCksum); assert( pWal->writeLock ); pWal->hdr.isInit = 1; - walChecksumBytes(1, (u8*)&pWal->hdr, offsetof(WalIndexHdr, aCksum), - 0, pWal->hdr.aCksum); - aHdr = (WalIndexHdr*)pWal->pWiData; - memcpy(&aHdr[1], &pWal->hdr, sizeof(WalIndexHdr)); + walChecksumBytes(1, (u8*)&pWal->hdr, nCksum, 0, pWal->hdr.aCksum); + memcpy((void *)&aHdr[1], (void *)&pWal->hdr, sizeof(WalIndexHdr)); sqlite3OsShmBarrier(pWal->pDbFd); - memcpy(&aHdr[0], &pWal->hdr, sizeof(WalIndexHdr)); + memcpy((void *)&aHdr[0], (void *)&pWal->hdr, sizeof(WalIndexHdr)); } /* @@ -586,19 +673,6 @@ static int walDecodeFrame( return 1; } -/* -** Define the parameters of the hash tables in the wal-index file. There -** is a hash-table following every HASHTABLE_NPAGE page numbers in the -** wal-index. -** -** Changing any of these constants will alter the wal-index format and -** create incompatibilities. -*/ -#define HASHTABLE_NPAGE 4096 /* Must be power of 2 and multiple of 256 */ -#define HASHTABLE_DATATYPE u16 -#define HASHTABLE_HASH_1 383 /* Should be prime */ -#define HASHTABLE_NSLOT (HASHTABLE_NPAGE*2) /* Must be a power of 2 */ -#define HASHTABLE_NBYTE (sizeof(HASHTABLE_DATATYPE)*HASHTABLE_NSLOT) #if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) /* @@ -664,96 +738,6 @@ static void walUnlockExclusive(Wal *pWal, int lockIdx, int n){ } /* -** Return the index in the Wal.pWiData array that corresponds to -** frame iFrame. -** -** Wal.pWiData is an array of u32 elements that is the wal-index. -** The array begins with a header and is then followed by alternating -** "map" and "hash-table" blocks. Each "map" block consists of -** HASHTABLE_NPAGE u32 elements which are page numbers corresponding -** to frames in the WAL file. -** -** This routine returns an index X such that Wal.pWiData[X] is part -** of a "map" block that contains the page number of the iFrame-th -** frame in the WAL file. -*/ -static int walIndexEntry(u32 iFrame){ - return ( - (WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED)/sizeof(u32) - + (((iFrame-1)/HASHTABLE_NPAGE) * HASHTABLE_NBYTE)/sizeof(u32) - + (iFrame-1) - ); -} - -/* -** Return the minimum size of the shared-memory, in bytes, that is needed -** to support a wal-index containing frame iFrame. The value returned -** includes the wal-index header and the complete "block" containing iFrame, -** including the hash table segment that follows the block. -*/ -static int walMappingSize(u32 iFrame){ - const int nByte = (sizeof(u32)*HASHTABLE_NPAGE + HASHTABLE_NBYTE) ; - return ( WALINDEX_LOCK_OFFSET - + WALINDEX_LOCK_RESERVED - + nByte * ((iFrame + HASHTABLE_NPAGE - 1)/HASHTABLE_NPAGE) - ); -} - -/* -** Release our reference to the wal-index memory map, if we are holding -** it. -*/ -static void walIndexUnmap(Wal *pWal){ - if( pWal->pWiData ){ - sqlite3OsShmRelease(pWal->pDbFd); - } - pWal->pWiData = 0; - pWal->szWIndex = -1; -} - -/* -** Map the wal-index file into memory if it isn't already. -** -** The reqSize parameter is the requested size of the mapping. The -** mapping will be at least this big if the underlying storage is -** that big. But the mapping will never grow larger than the underlying -** storage. Use the walIndexRemap() to enlarget the storage space. -*/ -static int walIndexMap(Wal *pWal, int reqSize){ - int rc = SQLITE_OK; - if( pWal->pWiData==0 || reqSize>pWal->szWIndex ){ - walIndexUnmap(pWal); - rc = sqlite3OsShmGet(pWal->pDbFd, reqSize, &pWal->szWIndex, - (void volatile**)(char volatile*)&pWal->pWiData); - if( rc!=SQLITE_OK ){ - walIndexUnmap(pWal); - } - } - return rc; -} - -/* -** Enlarge the wal-index to be at least enlargeTo bytes in size and -** Remap the wal-index so that the mapping covers the full size -** of the underlying file. -** -** If enlargeTo is non-negative, then increase the size of the underlying -** storage to be at least as big as enlargeTo before remapping. -*/ -static int walIndexRemap(Wal *pWal, int enlargeTo){ - int rc; - int sz; - assert( pWal->writeLock ); - rc = sqlite3OsShmSize(pWal->pDbFd, enlargeTo, &sz); - if( rc==SQLITE_OK && sz>pWal->szWIndex ){ - walIndexUnmap(pWal); - rc = walIndexMap(pWal, sz); - } - assert( pWal->szWIndex>=enlargeTo || rc!=SQLITE_OK ); - return rc; -} - -/* ** Compute a hash on a page number. The resulting hash value must land ** between 0 and (HASHTABLE_NSLOT-1). The walHashNext() function advances ** the hash to the next value in the event of a collision. @@ -767,10 +751,10 @@ static int walNextHash(int iPriorHash){ return (iPriorHash+1)&(HASHTABLE_NSLOT-1); } - /* -** Find the hash table and (section of the) page number array used to -** store data for WAL frame iFrame. +** Return pointers to the hash table and page number array stored on +** page iHash of the wal-index. The wal-index is broken into 32KB pages +** numbered starting from 0. ** ** Set output variable *paHash to point to the start of the hash table ** in the wal-index file. Set *piZero to one less than the frame @@ -778,38 +762,67 @@ static int walNextHash(int iPriorHash){ ** slot in the hash table is set to N, it refers to frame number ** (*piZero+N) in the log. ** -** Finally, set *paPgno such that for all frames F between (*piZero+1) and -** (*piZero+HASHTABLE_NPAGE), (*paPgno)[F] is the database page number -** associated with frame F. +** Finally, set *paPgno so that *paPgno[1] is the page number of the +** first frame indexed by the hash table, frame (*piZero+1). */ -static void walHashFind( +static int walHashGet( Wal *pWal, /* WAL handle */ - u32 iFrame, /* Find the hash table indexing this frame */ - volatile HASHTABLE_DATATYPE **paHash, /* OUT: Pointer to hash index */ + int iHash, /* Find the iHash'th table */ + volatile ht_slot **paHash, /* OUT: Pointer to hash index */ volatile u32 **paPgno, /* OUT: Pointer to page number array */ u32 *piZero /* OUT: Frame associated with *paPgno[0] */ ){ - u32 iZero; + int rc; /* Return code */ volatile u32 *aPgno; - volatile HASHTABLE_DATATYPE *aHash; - iZero = ((iFrame-1)/HASHTABLE_NPAGE) * HASHTABLE_NPAGE; - aPgno = &pWal->pWiData[walIndexEntry(iZero+1)-iZero-1]; - aHash = (HASHTABLE_DATATYPE *)&aPgno[iZero+HASHTABLE_NPAGE+1]; + rc = walIndexPage(pWal, iHash, &aPgno); + assert( rc==SQLITE_OK || iHash>0 ); - /* Assert that: - ** - ** + the mapping is large enough for this hash-table, and - ** - ** + that aPgno[iZero+1] really is the database page number associated - ** with the first frame indexed by this hash table. - */ - assert( (u32*)(&aHash[HASHTABLE_NSLOT])<=&pWal->pWiData[pWal->szWIndex/4] ); - assert( walIndexEntry(iZero+1)==(&aPgno[iZero+1] - pWal->pWiData) ); + if( rc==SQLITE_OK ){ + u32 iZero; + volatile ht_slot *aHash; + + aHash = (volatile ht_slot *)&aPgno[HASHTABLE_NPAGE]; + if( iHash==0 ){ + aPgno = &aPgno[WALINDEX_HDR_SIZE/sizeof(u32)]; + iZero = 0; + }else{ + iZero = HASHTABLE_NPAGE_ONE + (iHash-1)*HASHTABLE_NPAGE; + } + + *paPgno = &aPgno[-1]; + *paHash = aHash; + *piZero = iZero; + } + return rc; +} - *paHash = aHash; - *paPgno = aPgno; - *piZero = iZero; +/* +** Return the number of the wal-index page that contains the hash-table +** and page-number array that contain entries corresponding to WAL frame +** iFrame. The wal-index is broken up into 32KB pages. Wal-index pages +** are numbered starting from 0. +*/ +static int walFramePage(u32 iFrame){ + int iHash = (iFrame+HASHTABLE_NPAGE-HASHTABLE_NPAGE_ONE-1) / HASHTABLE_NPAGE; + assert( (iHash==0 || iFrame>HASHTABLE_NPAGE_ONE) + && (iHash>=1 || iFrame<=HASHTABLE_NPAGE_ONE) + && (iHash<=1 || iFrame>(HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE)) + && (iHash>=2 || iFrame<=HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE) + && (iHash<=2 || iFrame>(HASHTABLE_NPAGE_ONE+2*HASHTABLE_NPAGE)) + ); + return iHash; +} + +/* +** Return the page number associated with frame iFrame in this WAL. +*/ +static u32 walFramePgno(Wal *pWal, u32 iFrame){ + int iHash = walFramePage(iFrame); + if( iHash==0 ){ + return pWal->apWiData[0][WALINDEX_HDR_SIZE/sizeof(u32) + iFrame - 1]; + } + return pWal->apWiData[iHash][(iFrame-1-HASHTABLE_NPAGE_ONE)%HASHTABLE_NPAGE]; } /* @@ -825,35 +838,44 @@ static void walHashFind( ** actually needed. */ static void walCleanupHash(Wal *pWal){ - volatile HASHTABLE_DATATYPE *aHash; /* Pointer to hash table to clear */ - volatile u32 *aPgno; /* Unused return from walHashFind() */ - u32 iZero; /* frame == (aHash[x]+iZero) */ - int iLimit = 0; /* Zero values greater than this */ + volatile ht_slot *aHash; /* Pointer to hash table to clear */ + volatile u32 *aPgno; /* Page number array for hash table */ + u32 iZero; /* frame == (aHash[x]+iZero) */ + int iLimit = 0; /* Zero values greater than this */ + int nByte; /* Number of bytes to zero in aPgno[] */ + int i; /* Used to iterate through aHash[] */ assert( pWal->writeLock ); testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE-1 ); testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE ); testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE+1 ); - if( (pWal->hdr.mxFrame % HASHTABLE_NPAGE)>0 ){ - int nByte; /* Number of bytes to zero in aPgno[] */ - int i; /* Used to iterate through aHash[] */ - - walHashFind(pWal, pWal->hdr.mxFrame+1, &aHash, &aPgno, &iZero); - iLimit = pWal->hdr.mxFrame - iZero; - assert( iLimit>0 ); - for(i=0; i<HASHTABLE_NSLOT; i++){ - if( aHash[i]>iLimit ){ - aHash[i] = 0; - } - } - /* Zero the entries in the aPgno array that correspond to frames with - ** frame numbers greater than pWal->hdr.mxFrame. - */ - nByte = sizeof(u32) * (HASHTABLE_NPAGE-iLimit); - memset((void *)&aPgno[iZero+iLimit+1], 0, nByte); - assert( &((u8 *)&aPgno[iZero+iLimit+1])[nByte]==(u8 *)aHash ); + if( pWal->hdr.mxFrame==0 ) return; + + /* Obtain pointers to the hash-table and page-number array containing + ** the entry that corresponds to frame pWal->hdr.mxFrame. It is guaranteed + ** that the page said hash-table and array reside on is already mapped. + */ + assert( pWal->nWiData>walFramePage(pWal->hdr.mxFrame) ); + assert( pWal->apWiData[walFramePage(pWal->hdr.mxFrame)] ); + walHashGet(pWal, walFramePage(pWal->hdr.mxFrame), &aHash, &aPgno, &iZero); + + /* Zero all hash-table entries that correspond to frame numbers greater + ** than pWal->hdr.mxFrame. + */ + iLimit = pWal->hdr.mxFrame - iZero; + assert( iLimit>0 ); + for(i=0; i<HASHTABLE_NSLOT; i++){ + if( aHash[i]>iLimit ){ + aHash[i] = 0; + } } + + /* Zero the entries in the aPgno array that correspond to frames with + ** frame numbers greater than pWal->hdr.mxFrame. + */ + nByte = ((char *)aHash - (char *)&aPgno[iLimit+1]); + memset((void *)&aPgno[iLimit+1], 0, nByte); #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT /* Verify that the every entry in the mapping region is still reachable @@ -863,7 +885,7 @@ static void walCleanupHash(Wal *pWal){ int i; /* Loop counter */ int iKey; /* Hash key */ for(i=1; i<=iLimit; i++){ - for(iKey=walHash(aPgno[i+iZero]); aHash[iKey]; iKey=walNextHash(iKey)){ + for(iKey=walHash(aPgno[i]); aHash[iKey]; iKey=walNextHash(iKey)){ if( aHash[iKey]==i ) break; } assert( aHash[iKey]==i ); @@ -879,50 +901,47 @@ static void walCleanupHash(Wal *pWal){ */ static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){ int rc; /* Return code */ - int nMapping; /* Required mapping size in bytes */ - - /* Make sure the wal-index is mapped. Enlarge the mapping if required. */ - nMapping = walMappingSize(iFrame); - rc = walIndexMap(pWal, nMapping); - while( rc==SQLITE_OK && nMapping>pWal->szWIndex ){ - rc = walIndexRemap(pWal, nMapping); - } + u32 iZero; /* One less than frame number of aPgno[1] */ + volatile u32 *aPgno; /* Page number array */ + volatile ht_slot *aHash; /* Hash table */ + + rc = walHashGet(pWal, walFramePage(iFrame), &aHash, &aPgno, &iZero); - /* Assuming the wal-index file was successfully mapped, find the hash - ** table and section of of the page number array that pertain to frame - ** iFrame of the WAL. Then populate the page number array and the hash - ** table entry. + /* Assuming the wal-index file was successfully mapped, populate the + ** page number array and hash table entry. */ if( rc==SQLITE_OK ){ int iKey; /* Hash table key */ - u32 iZero; /* One less than frame number of aPgno[1] */ - volatile u32 *aPgno; /* Page number array */ - volatile HASHTABLE_DATATYPE *aHash; /* Hash table */ - int idx; /* Value to write to hash-table slot */ - TESTONLY( int nCollide = 0; /* Number of hash collisions */ ) + int idx; /* Value to write to hash-table slot */ + TESTONLY( int nCollide = 0; /* Number of hash collisions */ ) - walHashFind(pWal, iFrame, &aHash, &aPgno, &iZero); idx = iFrame - iZero; + assert( idx <= HASHTABLE_NSLOT/2 + 1 ); + + /* If this is the first entry to be added to this hash-table, zero the + ** entire hash table and aPgno[] array before proceding. + */ if( idx==1 ){ - memset((void*)&aPgno[iZero+1], 0, HASHTABLE_NPAGE*sizeof(u32)); - memset((void*)aHash, 0, HASHTABLE_NBYTE); + int nByte = (u8 *)&aHash[HASHTABLE_NSLOT] - (u8 *)&aPgno[1]; + memset((void*)&aPgno[1], 0, nByte); } - assert( idx <= HASHTABLE_NSLOT/2 + 1 ); - if( aPgno[iFrame] ){ - /* If the entry in aPgno[] is already set, then the previous writer - ** must have exited unexpectedly in the middle of a transaction (after - ** writing one or more dirty pages to the WAL to free up memory). - ** Remove the remnants of that writers uncommitted transaction from - ** the hash-table before writing any new entries. - */ + /* If the entry in aPgno[] is already set, then the previous writer + ** must have exited unexpectedly in the middle of a transaction (after + ** writing one or more dirty pages to the WAL to free up memory). + ** Remove the remnants of that writers uncommitted transaction from + ** the hash-table before writing any new entries. + */ + if( aPgno[idx] ){ walCleanupHash(pWal); - assert( !aPgno[iFrame] ); + assert( !aPgno[idx] ); } - aPgno[iFrame] = iPage; + + /* Write the aPgno[] array entry and the hash-table slot. */ for(iKey=walHash(iPage); aHash[iKey]; iKey=walNextHash(iKey)){ assert( nCollide++ < idx ); } + aPgno[idx] = iPage; aHash[iKey] = idx; #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT @@ -944,7 +963,7 @@ static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){ if( (idx&0x3ff)==0 ){ int i; /* Loop counter */ for(i=1; i<=idx; i++){ - for(iKey=walHash(aPgno[i+iZero]); aHash[iKey]; iKey=walNextHash(iKey)){ + for(iKey=walHash(aPgno[i]); aHash[iKey]; iKey=walNextHash(iKey)){ if( aHash[iKey]==i ) break; } assert( aHash[iKey]==i ); @@ -1076,9 +1095,6 @@ static int walIndexRecover(Wal *pWal){ } finished: - if( rc==SQLITE_OK && pWal->hdr.mxFrame==0 ){ - rc = walIndexRemap(pWal, walMappingSize(1)); - } if( rc==SQLITE_OK ){ volatile WalCkptInfo *pInfo; int i; @@ -1164,7 +1180,6 @@ int sqlite3WalOpen( pRet->pVfs = pVfs; pRet->pWalFd = (sqlite3_file *)&pRet[1]; pRet->pDbFd = pDbFd; - pRet->szWIndex = -1; pRet->readLock = -1; sqlite3_randomness(8, &pRet->hdr.aSalt); pRet->zWalName = zWal = pVfs->szOsFile + (char*)pRet->pWalFd; @@ -1207,24 +1222,22 @@ static int walIteratorNext( u32 iMin; /* Result pgno must be greater than iMin */ u32 iRet = 0xFFFFFFFF; /* 0xffffffff is never a valid page number */ int i; /* For looping through segments */ - int nBlock = p->nFinal; /* Number of entries in current segment */ iMin = p->iPrior; assert( iMin<0xffffffff ); for(i=p->nSegment-1; i>=0; i--){ struct WalSegment *pSegment = &p->aSegment[i]; - while( pSegment->iNext<nBlock ){ + while( pSegment->iNext<pSegment->nEntry ){ u32 iPg = pSegment->aPgno[pSegment->aIndex[pSegment->iNext]]; if( iPg>iMin ){ if( iPg<iRet ){ iRet = iPg; - *piFrame = i*256 + 1 + pSegment->aIndex[pSegment->iNext]; + *piFrame = pSegment->iZero + pSegment->aIndex[pSegment->iNext]; } break; } pSegment->iNext++; } - nBlock = 256; } *piPage = p->iPrior = iRet; @@ -1232,28 +1245,28 @@ static int walIteratorNext( } -static void walMergesort8( - Pgno *aContent, /* Pages in wal */ - u8 *aBuffer, /* Buffer of at least *pnList items to use */ - u8 *aList, /* IN/OUT: List to sort */ +static void walMergesort( + u32 *aContent, /* Pages in wal */ + ht_slot *aBuffer, /* Buffer of at least *pnList items to use */ + ht_slot *aList, /* IN/OUT: List to sort */ int *pnList /* IN/OUT: Number of elements in aList[] */ ){ int nList = *pnList; if( nList>1 ){ int nLeft = nList / 2; /* Elements in left list */ int nRight = nList - nLeft; /* Elements in right list */ - u8 *aLeft = aList; /* Left list */ - u8 *aRight = &aList[nLeft]; /* Right list */ int iLeft = 0; /* Current index in aLeft */ int iRight = 0; /* Current index in aright */ int iOut = 0; /* Current index in output buffer */ + ht_slot *aLeft = aList; /* Left list */ + ht_slot *aRight = aList+nLeft;/* Right list */ /* TODO: Change to non-recursive version. */ - walMergesort8(aContent, aBuffer, aLeft, &nLeft); - walMergesort8(aContent, aBuffer, aRight, &nRight); + walMergesort(aContent, aBuffer, aLeft, &nLeft); + walMergesort(aContent, aBuffer, aRight, &nRight); while( iRight<nRight || iLeft<nLeft ){ - u8 logpage; + ht_slot logpage; Pgno dbpage; if( (iLeft<nLeft) @@ -1285,6 +1298,13 @@ static void walMergesort8( #endif } +/* +** Free an iterator allocated by walIteratorInit(). +*/ +static void walIteratorFree(WalIterator *p){ + sqlite3_free(p); +} + /* ** Map the wal-index into memory owned by this thread, if it is not ** mapped already. Then construct a WalInterator object that can be @@ -1300,71 +1320,71 @@ static void walMergesort8( ** prior to the WalIterator object being destroyed. */ static int walIteratorInit(Wal *pWal, WalIterator **pp){ - u32 *aData; /* Content of the wal-index file */ - WalIterator *p; /* Return value */ - int nSegment; /* Number of segments to merge */ - u32 iLast; /* Last frame in log */ - int nByte; /* Number of bytes to allocate */ - int i; /* Iterator variable */ - int nFinal; /* Number of unindexed entries */ - u8 *aTmp; /* Temp space used by merge-sort */ - u8 *aSpace; /* Surplus space on the end of the allocation */ - - /* Make sure the wal-index is mapped into local memory */ - assert( pWal->pWiData && pWal->szWIndex>=walMappingSize(pWal->hdr.mxFrame) ); + WalIterator *p; /* Return value */ + int nSegment; /* Number of segments to merge */ + u32 iLast; /* Last frame in log */ + int nByte; /* Number of bytes to allocate */ + int i; /* Iterator variable */ + ht_slot *aTmp; /* Temp space used by merge-sort */ + ht_slot *aSpace; /* Space at the end of the allocation */ /* This routine only runs while holding SQLITE_SHM_CHECKPOINT. No other ** thread is able to write to shared memory while this routine is ** running (or, indeed, while the WalIterator object exists). Hence, - ** we can cast off the volatile qualifacation from shared memory + ** we can cast off the volatile qualification from shared memory */ assert( pWal->ckptLock ); - aData = (u32*)pWal->pWiData; + iLast = pWal->hdr.mxFrame; /* Allocate space for the WalIterator object */ - iLast = pWal->hdr.mxFrame; - nSegment = (iLast >> 8) + 1; - nFinal = (iLast & 0x000000FF); - nByte = sizeof(WalIterator) + (nSegment+1)*(sizeof(struct WalSegment)+256); + nSegment = walFramePage(iLast) + 1; + nByte = sizeof(WalIterator) + + nSegment*(sizeof(struct WalSegment)) + + (nSegment+1)*(HASHTABLE_NPAGE * sizeof(ht_slot)); p = (WalIterator *)sqlite3_malloc(nByte); if( !p ){ return SQLITE_NOMEM; } memset(p, 0, nByte); - /* Initialize the WalIterator object. Each 256-entry segment is - ** presorted in order to make iterating through all entries much - ** faster. - */ + /* Allocate space for the WalIterator object */ p->nSegment = nSegment; - aSpace = (u8 *)&p->aSegment[nSegment]; - aTmp = &aSpace[nSegment*256]; + aSpace = (ht_slot *)&p->aSegment[nSegment]; + aTmp = &aSpace[HASHTABLE_NPAGE*nSegment]; for(i=0; i<nSegment; i++){ + volatile ht_slot *aHash; int j; - int nIndex = (i==nSegment-1) ? nFinal : 256; - p->aSegment[i].aPgno = &aData[walIndexEntry(i*256+1)]; - p->aSegment[i].aIndex = aSpace; - for(j=0; j<nIndex; j++){ + u32 iZero; + int nEntry; + volatile u32 *aPgno; + int rc; + + rc = walHashGet(pWal, i, &aHash, &aPgno, &iZero); + if( rc!=SQLITE_OK ){ + walIteratorFree(p); + return rc; + } + aPgno++; + nEntry = ((i+1)==nSegment)?iLast-iZero:(u32 *)aHash-(u32 *)aPgno; + iZero++; + + for(j=0; j<nEntry; j++){ aSpace[j] = j; } - walMergesort8(p->aSegment[i].aPgno, aTmp, aSpace, &nIndex); - memset(&aSpace[nIndex], aSpace[nIndex-1], 256-nIndex); - aSpace += 256; - p->nFinal = nIndex; + walMergesort((u32 *)aPgno, aTmp, aSpace, &nEntry); + p->aSegment[i].iZero = iZero; + p->aSegment[i].nEntry = nEntry; + p->aSegment[i].aIndex = aSpace; + p->aSegment[i].aPgno = (u32 *)aPgno; + aSpace += HASHTABLE_NPAGE; } + assert( aSpace==aTmp ); - /* Return the fully initializd WalIterator object */ + /* Return the fully initialized WalIterator object */ *pp = p; return SQLITE_OK ; } -/* -** Free an iterator allocated by walIteratorInit(). -*/ -static void walIteratorFree(WalIterator *p){ - sqlite3_free(p); -} - /* ** Copy as much content as we can from the WAL back into the database file ** in response to an sqlite3_wal_checkpoint() request or the equivalent. @@ -1409,7 +1429,6 @@ static int walCheckpoint( u32 iFrame = 0; /* Wal frame containing data for iDbpage */ u32 mxSafeFrame; /* Max frame that can be backfilled */ int i; /* Loop counter */ - volatile WalIndexHdr *pHdr; /* The actual wal-index header in SHM */ volatile WalCkptInfo *pInfo; /* The checkpoint status information */ /* Allocate the iterator */ @@ -1430,9 +1449,7 @@ static int walCheckpoint( ** cannot be backfilled from the WAL. */ mxSafeFrame = pWal->hdr.mxFrame; - pHdr = (volatile WalIndexHdr*)pWal->pWiData; - pInfo = (volatile WalCkptInfo*)&pHdr[2]; - assert( pInfo==walCkptInfo(pWal) ); + pInfo = walCkptInfo(pWal); for(i=1; i<WAL_NREADER; i++){ u32 y = pInfo->aReadMark[i]; if( mxSafeFrame>=y ){ @@ -1461,6 +1478,7 @@ static int walCheckpoint( /* Iterate through the contents of the WAL, copying data to the db file. */ while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ + assert( walFramePgno(pWal, iFrame)==iDbpage ); if( iFrame<=nBackfill || iFrame>mxSafeFrame ) continue; rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE @@ -1472,7 +1490,7 @@ static int walCheckpoint( /* If work was actually accomplished... */ if( rc==SQLITE_OK ){ - if( mxSafeFrame==pHdr[0].mxFrame ){ + if( mxSafeFrame==walIndexHdr(pWal)->mxFrame ){ rc = sqlite3OsTruncate(pWal->pDbFd, ((i64)pWal->hdr.nPage*(i64)szPage)); if( rc==SQLITE_OK && sync_flags ){ rc = sqlite3OsSync(pWal->pDbFd, sync_flags); @@ -1525,7 +1543,6 @@ int sqlite3WalClose( if( rc==SQLITE_OK ){ isDelete = 1; } - walIndexUnmap(pWal); } walIndexClose(pWal, isDelete); @@ -1534,6 +1551,7 @@ int sqlite3WalClose( sqlite3OsDelete(pWal->pVfs, pWal->zWalName, 0); } WALTRACE(("WAL%p: closed\n", pWal)); + sqlite3_free(pWal->apWiData); sqlite3_free(pWal); } return rc; @@ -1557,16 +1575,12 @@ int sqlite3WalClose( ** is read successfully and the checksum verified, return zero. */ int walIndexTryHdr(Wal *pWal, int *pChanged){ - u32 aCksum[2]; /* Checksum on the header content */ - WalIndexHdr h1, h2; /* Two copies of the header content */ - WalIndexHdr *aHdr; /* Header in shared memory */ + u32 aCksum[2]; /* Checksum on the header content */ + WalIndexHdr h1, h2; /* Two copies of the header content */ + WalIndexHdr volatile *aHdr; /* Header in shared memory */ - if( pWal->szWIndex < WALINDEX_HDR_SIZE ){ - /* The wal-index is not large enough to hold the header, then assume - ** header is invalid. */ - return 1; - } - assert( pWal->pWiData ); + /* The first page of the wal-index must be mapped at this point. */ + assert( pWal->nWiData>0 && pWal->apWiData[0] ); /* Read the header. This might happen currently with a write to the ** same area of shared memory on a different CPU in a SMP, @@ -1578,10 +1592,10 @@ int walIndexTryHdr(Wal *pWal, int *pChanged){ ** Memory barriers are used to prevent the compiler or the hardware from ** reordering the reads and writes. */ - aHdr = (WalIndexHdr*)pWal->pWiData; - memcpy(&h1, &aHdr[0], sizeof(h1)); + aHdr = walIndexHdr(pWal); + memcpy(&h1, (void *)&aHdr[0], sizeof(h1)); sqlite3OsShmBarrier(pWal->pDbFd); - memcpy(&h2, &aHdr[1], sizeof(h2)); + memcpy(&h2, (void *)&aHdr[1], sizeof(h2)); if( memcmp(&h1, &h2, sizeof(h1))!=0 ){ return 1; /* Dirty read */ @@ -1625,26 +1639,32 @@ int walIndexTryHdr(Wal *pWal, int *pChanged){ static int walIndexReadHdr(Wal *pWal, int *pChanged){ int rc; /* Return code */ int badHdr; /* True if a header read failed */ + volatile u32 *page0; + /* Ensure that page 0 of the wal-index (the page that contains the + ** wal-index header) is mapped. Return early if an error occurs here. + */ assert( pChanged ); - rc = walIndexMap(pWal, walMappingSize(1)); + rc = walIndexPage(pWal, 0, &page0); if( rc!=SQLITE_OK ){ return rc; - } + }; + assert( page0 || pWal->writeLock==0 ); - /* Try once to read the header straight out. This works most of the - ** time. + /* If the first page of the wal-index has been mapped, try to read the + ** wal-index header immediately, without holding any lock. This usually + ** works, but may fail if the wal-index header is corrupt or currently + ** being modified by another user. */ - badHdr = walIndexTryHdr(pWal, pChanged); + badHdr = (page0 ? walIndexTryHdr(pWal, pChanged) : 1); /* If the first attempt failed, it might have been due to a race ** with a writer. So get a WRITE lock and try again. */ assert( badHdr==0 || pWal->writeLock==0 ); - if( badHdr ){ - rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1); - if( rc==SQLITE_OK ){ - pWal->writeLock = 1; + if( badHdr && SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1)) ){ + pWal->writeLock = 1; + if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){ badHdr = walIndexTryHdr(pWal, pChanged); if( badHdr ){ /* If the wal-index header is still malformed even while holding @@ -1654,17 +1674,9 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){ rc = walIndexRecover(pWal); *pChanged = 1; } - walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); - pWal->writeLock = 0; - } - } - - /* Make sure the mapping is large enough to cover the entire wal-index */ - if( rc==SQLITE_OK ){ - int szWanted = walMappingSize(pWal->hdr.mxFrame); - if( pWal->szWIndex<szWanted ){ - rc = walIndexMap(pWal, szWanted); } + pWal->writeLock = 0; + walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); } return rc; @@ -1705,12 +1717,11 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){ ** WAL_READ_LOCK() while changing values. */ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ - volatile WalIndexHdr *pHdr; /* Header of the wal-index */ volatile WalCkptInfo *pInfo; /* Checkpoint information in wal-index */ u32 mxReadMark; /* Largest aReadMark[] value */ int mxI; /* Index of largest aReadMark[] value */ int i; /* Loop counter */ - int rc; /* Return code */ + int rc = SQLITE_OK; /* Return code */ assert( pWal->readLock<0 ); /* Not currently locked */ @@ -1739,16 +1750,12 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ rc = SQLITE_BUSY_RECOVERY; } } - }else{ - rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame)); } if( rc!=SQLITE_OK ){ return rc; } - pHdr = (volatile WalIndexHdr*)pWal->pWiData; - pInfo = (volatile WalCkptInfo*)&pHdr[2]; - assert( pInfo==walCkptInfo(pWal) ); + pInfo = walCkptInfo(pWal); if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame ){ /* The WAL has been completely backfilled (or it is empty). ** and can be safely ignored. @@ -1756,7 +1763,7 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ rc = walLockShared(pWal, WAL_READ_LOCK(0)); sqlite3OsShmBarrier(pWal->pDbFd); if( rc==SQLITE_OK ){ - if( memcmp((void *)pHdr, &pWal->hdr, sizeof(WalIndexHdr)) ){ + if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){ /* It is not safe to allow the reader to continue here if frames ** may have been appended to the log before READ_LOCK(0) was obtained. ** When holding READ_LOCK(0), the reader ignores the entire log file, @@ -1850,7 +1857,7 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ */ sqlite3OsShmBarrier(pWal->pDbFd); if( pInfo->aReadMark[mxI]!=mxReadMark - || memcmp((void *)pHdr, &pWal->hdr, sizeof(WalIndexHdr)) + || memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){ walUnlockShared(pWal, WAL_READ_LOCK(mxI)); return WAL_RETRY; @@ -1883,7 +1890,6 @@ int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){ do{ rc = walTryBeginRead(pWal, pChanged, 0, ++cnt); }while( rc==WAL_RETRY ); - walIndexUnmap(pWal); return rc; } @@ -1913,7 +1919,6 @@ int sqlite3WalRead( int nOut, /* Size of buffer pOut in bytes */ u8 *pOut /* Buffer to write page data to */ ){ - int rc; /* Return code */ u32 iRead = 0; /* If !=0, WAL frame to return data from */ u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */ int iHash; /* Used to loop through N hash tables */ @@ -1932,12 +1937,6 @@ int sqlite3WalRead( return SQLITE_OK; } - /* Ensure the wal-index is mapped. */ - rc = walIndexMap(pWal, walMappingSize(iLast)); - if( rc!=SQLITE_OK ){ - return rc; - } - /* Search the hash table or tables for an entry matching page number ** pgno. Each iteration of the following for() loop searches one ** hash table (each hash table indexes up to HASHTABLE_NPAGE frames). @@ -1963,25 +1962,25 @@ int sqlite3WalRead( ** This condition filters out entries that were added to the hash ** table after the current read-transaction had started. */ - for(iHash=iLast; iHash>0 && iRead==0; iHash-=HASHTABLE_NPAGE){ - volatile HASHTABLE_DATATYPE *aHash; /* Pointer to hash table */ - volatile u32 *aPgno; /* Pointer to array of page numbers */ + for(iHash=walFramePage(iLast); iHash>=0 && iRead==0; iHash--){ + volatile ht_slot *aHash; /* Pointer to hash table */ + volatile u32 *aPgno; /* Pointer to array of page numbers */ u32 iZero; /* Frame number corresponding to aPgno[0] */ int iKey; /* Hash slot index */ - int mxHash; /* upper bound on aHash[] values */ + int rc; - walHashFind(pWal, iHash, &aHash, &aPgno, &iZero); - mxHash = iLast - iZero; - if( mxHash > HASHTABLE_NPAGE ) mxHash = HASHTABLE_NPAGE; + rc = walHashGet(pWal, iHash, &aHash, &aPgno, &iZero); + if( rc!=SQLITE_OK ){ + return rc; + } for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){ u32 iFrame = aHash[iKey] + iZero; - if( iFrame<=iLast && aPgno[iFrame]==pgno ){ + if( iFrame<=iLast && aPgno[aHash[iKey]]==pgno ){ assert( iFrame>iRead ); iRead = iFrame; } } } - assert( iRead==0 || pWal->pWiData[walIndexEntry(iRead)]==pgno ); #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT /* If expensive assert() statements are available, do a linear search @@ -1991,7 +1990,7 @@ int sqlite3WalRead( u32 iRead2 = 0; u32 iTest; for(iTest=iLast; iTest>0; iTest--){ - if( pWal->pWiData[walIndexEntry(iTest)]==pgno ){ + if( walFramePgno(pWal, iTest)==pgno ){ iRead2 = iTest; break; } @@ -2003,7 +2002,6 @@ int sqlite3WalRead( /* If iRead is non-zero, then it is the log frame number that contains the ** required page. Read and return data from the log file. */ - walIndexUnmap(pWal); if( iRead ){ i64 iOffset = walFrameOffset(iRead, pWal->hdr.szPage) + WAL_FRAME_HDRSIZE; *pInWal = 1; @@ -2057,19 +2055,12 @@ int sqlite3WalBeginWriteTransaction(Wal *pWal){ ** time the read transaction on this connection was started, then ** the write is disallowed. */ - rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame)); - if( rc ){ - walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); - pWal->writeLock = 0; - return rc; - } - if( memcmp(&pWal->hdr, (void*)pWal->pWiData, sizeof(WalIndexHdr))!=0 ){ + if( memcmp(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr))!=0 ){ walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); pWal->writeLock = 0; rc = SQLITE_BUSY; } - walIndexUnmap(pWal); return rc; } @@ -2098,39 +2089,35 @@ int sqlite3WalEndWriteTransaction(Wal *pWal){ int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){ int rc = SQLITE_OK; if( pWal->writeLock ){ - int unused; Pgno iMax = pWal->hdr.mxFrame; Pgno iFrame; - assert( pWal->pWiData==0 ); - rc = walIndexReadHdr(pWal, &unused); - if( rc==SQLITE_OK ){ - rc = walIndexMap(pWal, walMappingSize(iMax)); - } - if( rc==SQLITE_OK ){ - for(iFrame=pWal->hdr.mxFrame+1; - ALWAYS(rc==SQLITE_OK) && iFrame<=iMax; - iFrame++ - ){ - /* This call cannot fail. Unless the page for which the page number - ** is passed as the second argument is (a) in the cache and - ** (b) has an outstanding reference, then xUndo is either a no-op - ** (if (a) is false) or simply expels the page from the cache (if (b) - ** is false). - ** - ** If the upper layer is doing a rollback, it is guaranteed that there - ** are no outstanding references to any page other than page 1. And - ** page 1 is never written to the log until the transaction is - ** committed. As a result, the call to xUndo may not fail. - */ - assert( pWal->writeLock ); - assert( pWal->pWiData[walIndexEntry(iFrame)]!=1 ); - rc = xUndo(pUndoCtx, pWal->pWiData[walIndexEntry(iFrame)]); - } - walCleanupHash(pWal); + /* Restore the clients cache of the wal-index header to the state it + ** was in before the client began writing to the database. + */ + memcpy(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr)); + + for(iFrame=pWal->hdr.mxFrame+1; + ALWAYS(rc==SQLITE_OK) && iFrame<=iMax; + iFrame++ + ){ + /* This call cannot fail. Unless the page for which the page number + ** is passed as the second argument is (a) in the cache and + ** (b) has an outstanding reference, then xUndo is either a no-op + ** (if (a) is false) or simply expels the page from the cache (if (b) + ** is false). + ** + ** If the upper layer is doing a rollback, it is guaranteed that there + ** are no outstanding references to any page other than page 1. And + ** page 1 is never written to the log until the transaction is + ** committed. As a result, the call to xUndo may not fail. + */ + assert( walFramePgno(pWal, iFrame)!=1 ); + rc = xUndo(pUndoCtx, walFramePgno(pWal, iFrame)); } - walIndexUnmap(pWal); + walCleanupHash(pWal); } + assert( rc==SQLITE_OK ); return rc; } @@ -2170,16 +2157,12 @@ int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){ } if( aWalData[0]<pWal->hdr.mxFrame ){ - rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame)); pWal->hdr.mxFrame = aWalData[0]; pWal->hdr.aFrameCksum[0] = aWalData[1]; pWal->hdr.aFrameCksum[1] = aWalData[2]; - if( rc==SQLITE_OK ){ - walCleanupHash(pWal); - } + walCleanupHash(pWal); } - walIndexUnmap(pWal); return rc; } @@ -2199,9 +2182,7 @@ static int walRestartLog(Wal *pWal){ int rc = SQLITE_OK; int cnt; - if( pWal->readLock==0 - && SQLITE_OK==(rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame))) - ){ + if( pWal->readLock==0 ){ volatile WalCkptInfo *pInfo = walCkptInfo(pWal); assert( pInfo->nBackfill==pWal->hdr.mxFrame ); if( pInfo->nBackfill>0 ){ @@ -2237,11 +2218,6 @@ static int walRestartLog(Wal *pWal){ int notUsed; rc = walTryBeginRead(pWal, ¬Used, 1, ++cnt); }while( rc==WAL_RETRY ); - - /* Unmap the wal-index before returning. Otherwise the VFS layer may - ** hold a mutex for the duration of the IO performed by WalFrames(). - */ - walIndexUnmap(pWal); } return rc; } @@ -2267,7 +2243,6 @@ int sqlite3WalFrames( assert( pList ); assert( pWal->writeLock ); - assert( pWal->pWiData==0 ); #if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){} @@ -2280,10 +2255,8 @@ int sqlite3WalFrames( ** log file, instead of appending to it at pWal->hdr.mxFrame. */ if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){ - assert( pWal->pWiData==0 ); return rc; } - assert( pWal->pWiData==0 && pWal->readLock>0 ); /* If this is the first frame written into the log, write the WAL ** header to the start of the WAL file. See comments at the top of @@ -2358,7 +2331,6 @@ int sqlite3WalFrames( rc = sqlite3OsSync(pWal->pWalFd, sync_flags); } - assert( pWal->pWiData==0 ); /* Append data to the wal-index. It is not necessary to lock the ** wal-index to do this as the SQLITE_SHM_WRITE lock held on the wal-index @@ -2391,7 +2363,6 @@ int sqlite3WalFrames( } } - walIndexUnmap(pWal); WALTRACE(("WAL%p: frame write %s\n", pWal, rc ? "failed" : "ok")); return rc; } @@ -2412,7 +2383,6 @@ int sqlite3WalCheckpoint( int rc; /* Return code */ int isChanged = 0; /* True if a new wal-index header is loaded */ - assert( pWal->pWiData==0 ); assert( pWal->ckptLock==0 ); WALTRACE(("WAL%p: checkpoint begins\n", pWal)); @@ -2441,7 +2411,6 @@ int sqlite3WalCheckpoint( } /* Release the locks. */ - walIndexUnmap(pWal); walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1); pWal->ckptLock = 0; WALTRACE(("WAL%p: checkpoint %s\n", pWal, rc ? "failed" : "ok")); |