aboutsummaryrefslogtreecommitdiff
path: root/src/os_unix.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/os_unix.c')
-rw-r--r--src/os_unix.c213
1 files changed, 129 insertions, 84 deletions
diff --git a/src/os_unix.c b/src/os_unix.c
index 2f03c3967..334719200 100644
--- a/src/os_unix.c
+++ b/src/os_unix.c
@@ -249,9 +249,14 @@ struct unixFile {
unsigned char transCntrChng; /* True if the transaction counter changed */
unsigned char dbUpdate; /* True if any part of database file changed */
unsigned char inNormalWrite; /* True if in a normal write operation */
- sqlite3_int64 mmapSize; /* Size of xMremap() */
- void *pMapRegion; /* Area memory mapped */
+
#endif
+ sqlite3_int64 mmapSize; /* Usable size of mapping at pMapRegion */
+ sqlite3_int64 mmapOrigsize; /* Actual size of mapping at pMapRegion */
+ sqlite3_int64 mmapLimit; /* Configured FCNTL_MMAP_SIZE value */
+ void *pMapRegion; /* Memory mapped region */
+ int nFetchOut; /* Number of outstanding xFetch refs */
+
#ifdef SQLITE_TEST
/* In test mode, increase the size of this structure a bit so that
** it is larger than the struct CrashFile defined in test6.c.
@@ -1805,6 +1810,9 @@ static int unixUnlock(sqlite3_file *id, int eFileLock){
return posixUnlock(id, eFileLock, 0);
}
+static int unixMapfile(unixFile *pFd, i64 nByte);
+static void unixUnmapfile(unixFile *pFd);
+
/*
** This function performs the parts of the "close file" operation
** common to all locking schemes. It closes the directory and file
@@ -1817,6 +1825,7 @@ static int unixUnlock(sqlite3_file *id, int eFileLock){
*/
static int closeUnixFile(sqlite3_file *id){
unixFile *pFile = (unixFile*)id;
+ unixUnmapfile(pFile);
if( pFile->h>=0 ){
robust_close(pFile, pFile->h, __LINE__);
pFile->h = -1;
@@ -3074,7 +3083,6 @@ static int unixRead(
unixFile *pFile = (unixFile *)id;
int got;
assert( id );
- assert( offset>=pFile->mmapSize ); /* Never read from the mmapped region */
/* If this is a database file (not a journal, master-journal or temp
** file), the bytes in the locking range should never be read or written. */
@@ -3085,6 +3093,21 @@ static int unixRead(
);
#endif
+ /* Deal with as much of this write request as possible by transfering
+ ** data to the memory mapping using memcpy(). */
+ if( offset<pFile->mmapSize ){
+ if( offset+amt <= pFile->mmapSize ){
+ memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt);
+ return SQLITE_OK;
+ }else{
+ int nCopy = pFile->mmapSize - offset;
+ memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy);
+ pBuf = &((u8 *)pBuf)[nCopy];
+ amt -= nCopy;
+ offset += nCopy;
+ }
+ }
+
got = seekAndRead(pFile, offset, pBuf, amt);
if( got==amt ){
return SQLITE_OK;
@@ -3157,7 +3180,6 @@ static int unixWrite(
int wrote = 0;
assert( id );
assert( amt>0 );
- assert( offset>=pFile->mmapSize ); /* Never write into the mmapped region */
/* If this is a database file (not a journal, master-journal or temp
** file), the bytes in the locking range should never be read or written. */
@@ -3190,6 +3212,21 @@ static int unixWrite(
}
#endif
+ /* Deal with as much of this write request as possible by transfering
+ ** data from the memory mapping using memcpy(). */
+ if( offset<pFile->mmapSize ){
+ if( offset+amt <= pFile->mmapSize ){
+ memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt);
+ return SQLITE_OK;
+ }else{
+ int nCopy = pFile->mmapSize - offset;
+ memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy);
+ pBuf = &((u8 *)pBuf)[nCopy];
+ amt -= nCopy;
+ offset += nCopy;
+ }
+ }
+
while( amt>0 && (wrote = seekAndWrite(pFile, offset, pBuf, amt))>0 ){
amt -= wrote;
offset += wrote;
@@ -3470,6 +3507,7 @@ static int unixTruncate(sqlite3_file *id, i64 nByte){
if( pFile->inNormalWrite && nByte==0 ){
pFile->transCntrChng = 1;
}
+#endif
/* If the file was just truncated to a size smaller than the currently
** mapped region, reduce the effective mapping size as well. SQLite will
@@ -3478,7 +3516,6 @@ static int unixTruncate(sqlite3_file *id, i64 nByte){
if( nByte<pFile->mmapSize ){
pFile->mmapSize = nByte;
}
-#endif
return SQLITE_OK;
}
@@ -3568,6 +3605,19 @@ static int fcntlSizeHint(unixFile *pFile, i64 nByte){
}
}
+ if( pFile->mmapLimit>0 ){
+ int rc;
+ if( pFile->szChunk<=0 ){
+ if( robust_ftruncate(pFile->h, nByte) ){
+ pFile->lastErrno = errno;
+ return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath);
+ }
+ }
+
+ rc = unixMapfile(pFile, nByte);
+ return rc;
+ }
+
return SQLITE_OK;
}
@@ -3635,8 +3685,8 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){
}
return SQLITE_OK;
}
- case SQLITE_FCNTL_GETFD: {
- *(int*)pArg = pFile->h;
+ case SQLITE_FCNTL_MMAP_SIZE: {
+ pFile->mmapLimit = *(i64*)pArg;
return SQLITE_OK;
}
#ifdef SQLITE_DEBUG
@@ -4451,91 +4501,86 @@ static int unixShmUnmap(
*/
#define ROUNDUP(x,y) (((x)+y-1)&~(y-1))
-/*
-** Map, remap or unmap part of the database file.
-*/
-static int unixMremap(
- sqlite3_file *fd, /* Main database file */
- int flags, /* Mask of SQLITE_MREMAP_XXX flags */
- sqlite3_int64 iOff, /* Offset to start mapping at */
- sqlite3_int64 nOld, /* Size of old mapping, or zero */
- sqlite3_int64 nNew, /* Size of new mapping, or zero */
- void **ppMap /* IN/OUT: Old/new mappings */
-){
- unixFile *p = (unixFile *)fd; /* The underlying database file */
- int rc = SQLITE_OK; /* Return code */
- void *pNew = 0; /* New mapping */
- i64 nNewRnd; /* nNew rounded up */
- i64 nOldRnd; /* nOld rounded up */
-
- assert( iOff==0 );
- /* assert( p->mmapSize==nOld ); */
- assert( p->pMapRegion==0 || p->pMapRegion==(*ppMap) );
-
- /* If the SQLITE_MREMAP_EXTEND flag is set, then the size of the requested
- ** mapping (nNew bytes) may be greater than the size of the database file.
- ** If this is the case, extend the file on disk using ftruncate(). */
- assert( nNew>0 || (flags & SQLITE_MREMAP_EXTEND)==0 );
- if( flags & SQLITE_MREMAP_EXTEND ){
+static void unixUnmapfile(unixFile *pFd){
+ assert( pFd->nFetchOut==0 );
+ if( pFd->pMapRegion ){
+ munmap(pFd->pMapRegion, pFd->mmapOrigsize);
+ pFd->pMapRegion = 0;
+ pFd->mmapSize = 0;
+ pFd->mmapOrigsize = 0;
+ }
+}
+
+static int unixMapfile(unixFile *pFd, i64 nByte){
+ i64 nMap = nByte;
+ int rc;
+
+ assert( nMap>=0 || pFd->nFetchOut==0 );
+ if( pFd->nFetchOut>0 ) return SQLITE_OK;
+
+ if( nMap<0 ){
struct stat statbuf; /* Low-level file information */
- rc = osFstat(p->h, &statbuf);
- if( rc==SQLITE_OK && nNew>statbuf.st_size ){
- rc = robust_ftruncate(p->h, nNew);
+ rc = osFstat(pFd->h, &statbuf);
+ if( rc!=SQLITE_OK ){
+ return SQLITE_IOERR_FSTAT;
}
- if( rc!=SQLITE_OK ) return rc;
+ nMap = statbuf.st_size;
+ }
+ if( nMap>pFd->mmapLimit ){
+ nMap = pFd->mmapLimit;
}
- /* According to some sources, the effect of changing the size of the
- ** underlying file on mapped regions that correspond to the added or
- ** removed pages is undefined. However, there is reason to believe that
- ** on modern platforms like Linux or OSX, things just work. For example,
- ** it is possible to create a mapping larger than the file on disk and
- ** extend the file on disk later on.
- **
- ** Exploit this on Linux and OSX to reduce the number of munmap()/mmap()
- ** calls required if the file size is changing. In this case all mappings
- ** are rounded up to the nearest 4MB. And if a new mapping is requested
- ** that has the same rounded size as an old mapping, the old mapping can
- ** be reused as is. */
-#if defined(__APPLE__) || defined(__linux__)
- nNewRnd = ROUNDUP(nNew, 4096*1024);
- nOldRnd = ROUNDUP(nOld, 4096*1024);
-#else
- nNewRnd = ROUNDUP(nNew, 4096*1);
- nOldRnd = ROUNDUP(nOld, 4096*1);
-#endif
+ if( nMap!=pFd->mmapSize ){
+ void *pNew;
+ unixUnmapfile(pFd);
- /* On OSX or Linux, reuse the old mapping if it is the right size. */
-#if defined(__APPLE__) || defined(__linux__)
- if( nNewRnd==nOldRnd ){
- VVA_ONLY( p->mmapSize = nNew; )
- return SQLITE_OK;
+ if( nMap>0 ){
+ void *pNew;
+ int flags = PROT_READ;
+ if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE;
+ pNew = mmap(0, ROUNDUP(nMap, 4096), flags, MAP_SHARED, pFd->h, 0);
+ if( pNew==MAP_FAILED ){
+ return SQLITE_IOERR_MREMAP;
+ }
+
+ pFd->pMapRegion = pNew;
+ pFd->mmapOrigsize = pFd->mmapSize = nMap;
+ }
}
-#endif
- /* If we get this far, unmap any old mapping. */
- if( nOldRnd!=0 ){
- void *pOld = *ppMap;
- munmap(pOld, nOldRnd);
- VVA_ONLY( p->mmapSize = 0; p->pMapRegion = 0; );
- }
-
- /* And, if required, use mmap() to create a new mapping. */
- if( nNewRnd>0 ){
- int flags = PROT_READ;
- if( (p->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE;
- pNew = mmap(0, nNewRnd, flags, MAP_SHARED, p->h, iOff);
- if( pNew==MAP_FAILED ){
- pNew = 0;
- VVA_ONLY( p->mmapSize = 0; p->pMapRegion = 0; )
- rc = SQLITE_IOERR_MREMAP;
- }else{
- VVA_ONLY( p->mmapSize = nNew; p->pMapRegion = pNew; )
+ return SQLITE_OK;
+}
+
+static int unixFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){
+ unixFile *pFd = (unixFile *)fd; /* The underlying database file */
+ *pp = 0;
+
+ if( pFd->mmapLimit>0 ){
+ if( pFd->pMapRegion==0 ){
+ int rc = unixMapfile(pFd, -1);
+ if( rc!=SQLITE_OK ) return rc;
}
+ if( pFd->mmapSize >= iOff+nAmt ){
+ *pp = &((u8 *)pFd->pMapRegion)[iOff];
+ pFd->nFetchOut++;
+ }
+ }
+ return SQLITE_OK;
+}
+
+static int unixUnfetch(sqlite3_file *fd, void *p){
+ unixFile *pFd = (unixFile *)fd; /* The underlying database file */
+
+ assert( (p==0)==(pFd->nFetchOut==0) );
+
+ if( p ){
+ pFd->nFetchOut--;
+ }else{
+ unixUnmapfile(pFd);
}
- *ppMap = pNew;
- return rc;
+ assert( pFd->nFetchOut>=0 );
+ return SQLITE_OK;
}
/*
@@ -4597,7 +4642,8 @@ static const sqlite3_io_methods METHOD = { \
unixShmLock, /* xShmLock */ \
unixShmBarrier, /* xShmBarrier */ \
unixShmUnmap, /* xShmUnmap */ \
- unixMremap, /* xMremap */ \
+ unixFetch, /* xFetch */ \
+ unixUnfetch, /* xUnfetch */ \
}; \
static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \
UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \
@@ -4865,7 +4911,6 @@ static int fillInUnixFile(
pNew->pVfs = pVfs;
pNew->zPath = zFilename;
pNew->ctrlFlags = (u8)ctrlFlags;
- VVA_ONLY( pNew->mmapSize = 0; )
if( sqlite3_uri_boolean(((ctrlFlags & UNIXFILE_URI) ? zFilename : 0),
"psow", SQLITE_POWERSAFE_OVERWRITE) ){
pNew->ctrlFlags |= UNIXFILE_PSOW;