aboutsummaryrefslogtreecommitdiff
path: root/src/os_unix.c
diff options
context:
space:
mode:
authordrh <drh@noemail.net>2013-04-22 23:59:06 +0000
committerdrh <drh@noemail.net>2013-04-22 23:59:06 +0000
commit16fb176814bd2b8fe46f7ed0e50a0b3d33db89a0 (patch)
tree95791389bfc2d16f158af35b89e36fd39b23cc53 /src/os_unix.c
parent8bc8bfcb05ab15cac3c84b137b572350beb4de65 (diff)
parentda8caa0b2da85ebaf75fce1b19f5d645246f3eba (diff)
downloadsqlite-16fb176814bd2b8fe46f7ed0e50a0b3d33db89a0.tar.gz
sqlite-16fb176814bd2b8fe46f7ed0e50a0b3d33db89a0.zip
Merge the latest trunk changes into the sessions branch.
FossilOrigin-Name: 6994826c0784280f2e9728dfa4185848846d03df
Diffstat (limited to 'src/os_unix.c')
-rw-r--r--src/os_unix.c395
1 files changed, 388 insertions, 7 deletions
diff --git a/src/os_unix.c b/src/os_unix.c
index 84b1594ac..7448afe4c 100644
--- a/src/os_unix.c
+++ b/src/os_unix.c
@@ -225,6 +225,11 @@ struct unixFile {
const char *zPath; /* Name of the file */
unixShm *pShm; /* Shared memory segment information */
int szChunk; /* Configured by FCNTL_CHUNK_SIZE */
+ int nFetchOut; /* Number of outstanding xFetch refs */
+ sqlite3_int64 mmapSize; /* Usable size of mapping at pMapRegion */
+ sqlite3_int64 mmapSizeActual; /* Actual size of mapping at pMapRegion */
+ sqlite3_int64 mmapSizeMax; /* Configured FCNTL_MMAP_SIZE value */
+ void *pMapRegion; /* Memory mapped region */
#ifdef __QNXNTO__
int sectorSize; /* Device sector size */
int deviceCharacteristics; /* Precomputed device characteristics */
@@ -249,7 +254,9 @@ struct unixFile {
unsigned char transCntrChng; /* True if the transaction counter changed */
unsigned char dbUpdate; /* True if any part of database file changed */
unsigned char inNormalWrite; /* True if in a normal write operation */
+
#endif
+
#ifdef SQLITE_TEST
/* In test mode, increase the size of this structure a bit so that
** it is larger than the struct CrashFile defined in test6.c.
@@ -273,6 +280,7 @@ struct unixFile {
#define UNIXFILE_DELETE 0x20 /* Delete on close */
#define UNIXFILE_URI 0x40 /* Filename might have query parameters */
#define UNIXFILE_NOLOCK 0x80 /* Do no file locking */
+#define UNIXFILE_WARNED 0x0100 /* verifyDbFile() warnings have been issued */
/*
** Include code that is common to all os_*.c files
@@ -307,6 +315,17 @@ struct unixFile {
#endif
/*
+** HAVE_MREMAP defaults to true on Linux and false everywhere else.
+*/
+#if !defined(HAVE_MREMAP)
+# if defined(__linux__) && defined(_GNU_SOURCE)
+# define HAVE_MREMAP 1
+# else
+# define HAVE_MREMAP 0
+# endif
+#endif
+
+/*
** Different Unix systems declare open() in different ways. Same use
** open(const char*,int,mode_t). Others use open(const char*,int,...).
** The difference is important when using a pointer to the function.
@@ -437,6 +456,19 @@ static struct unix_syscall {
{ "fchown", (sqlite3_syscall_ptr)posixFchown, 0 },
#define osFchown ((int(*)(int,uid_t,gid_t))aSyscall[20].pCurrent)
+ { "mmap", (sqlite3_syscall_ptr)mmap, 0 },
+#define osMmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[21].pCurrent)
+
+ { "munmap", (sqlite3_syscall_ptr)munmap, 0 },
+#define osMunmap ((void*(*)(void*,size_t))aSyscall[22].pCurrent)
+
+#if HAVE_MREMAP
+ { "mremap", (sqlite3_syscall_ptr)mremap, 0 },
+#else
+ { "mremap", (sqlite3_syscall_ptr)0, 0 },
+#endif
+#define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[23].pCurrent)
+
}; /* End of the overrideable system calls */
/*
@@ -768,7 +800,6 @@ static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) {
}
-
/******************************************************************************
****************** Begin Unique File ID Utility Used By VxWorks ***************
**
@@ -1104,7 +1135,6 @@ static int unixLogErrorAtLine(
zErr = strerror(iErrno);
#endif
- assert( errcode!=SQLITE_OK );
if( zPath==0 ) zPath = "";
sqlite3_log(errcode,
"os_unix.c:%d: (%d) %s(%s) - %s",
@@ -1271,6 +1301,50 @@ static int findInodeInfo(
/*
+** Check a unixFile that is a database. Verify the following:
+**
+** (1) There is exactly one hard link on the file
+** (2) The file is not a symbolic link
+** (3) The file has not been renamed or unlinked
+**
+** Issue sqlite3_log(SQLITE_WARNING,...) messages if anything is not right.
+*/
+static void verifyDbFile(unixFile *pFile){
+ struct stat buf;
+ int rc;
+ if( pFile->ctrlFlags & UNIXFILE_WARNED ){
+ /* One or more of the following warnings have already been issued. Do not
+ ** repeat them so as not to clutter the error log */
+ return;
+ }
+ rc = osFstat(pFile->h, &buf);
+ if( rc!=0 ){
+ sqlite3_log(SQLITE_WARNING, "cannot fstat db file %s", pFile->zPath);
+ pFile->ctrlFlags |= UNIXFILE_WARNED;
+ return;
+ }
+ if( buf.st_nlink==0 && (pFile->ctrlFlags & UNIXFILE_DELETE)==0 ){
+ sqlite3_log(SQLITE_WARNING, "file unlinked while open: %s", pFile->zPath);
+ pFile->ctrlFlags |= UNIXFILE_WARNED;
+ return;
+ }
+ if( buf.st_nlink>1 ){
+ sqlite3_log(SQLITE_WARNING, "multiple links to file: %s", pFile->zPath);
+ pFile->ctrlFlags |= UNIXFILE_WARNED;
+ return;
+ }
+ if( pFile->pInode!=0
+ && ((rc = osStat(pFile->zPath, &buf))!=0
+ || buf.st_ino!=pFile->pInode->fileId.ino)
+ ){
+ sqlite3_log(SQLITE_WARNING, "file renamed while open: %s", pFile->zPath);
+ pFile->ctrlFlags |= UNIXFILE_WARNED;
+ return;
+ }
+}
+
+
+/*
** This routine checks if there is a RESERVED lock held on the specified
** file by this or any other process. If such a lock is held, set *pResOut
** to a non-zero value otherwise *pResOut is set to zero. The return value
@@ -1800,9 +1874,13 @@ end_unlock:
** the requested locking level, this routine is a no-op.
*/
static int unixUnlock(sqlite3_file *id, int eFileLock){
+ assert( eFileLock==SHARED_LOCK || ((unixFile *)id)->nFetchOut==0 );
return posixUnlock(id, eFileLock, 0);
}
+static int unixMapfile(unixFile *pFd, i64 nByte);
+static void unixUnmapfile(unixFile *pFd);
+
/*
** This function performs the parts of the "close file" operation
** common to all locking schemes. It closes the directory and file
@@ -1815,6 +1893,7 @@ static int unixUnlock(sqlite3_file *id, int eFileLock){
*/
static int closeUnixFile(sqlite3_file *id){
unixFile *pFile = (unixFile*)id;
+ unixUnmapfile(pFile);
if( pFile->h>=0 ){
robust_close(pFile, pFile->h, __LINE__);
pFile->h = -1;
@@ -1841,6 +1920,7 @@ static int closeUnixFile(sqlite3_file *id){
static int unixClose(sqlite3_file *id){
int rc = SQLITE_OK;
unixFile *pFile = (unixFile *)id;
+ verifyDbFile(pFile);
unixUnlock(id, NO_LOCK);
unixEnterMutex();
@@ -3082,6 +3162,23 @@ static int unixRead(
);
#endif
+#if SQLITE_MAX_MMAP_SIZE>0
+ /* Deal with as much of this read request as possible by transfering
+ ** data from the memory mapping using memcpy(). */
+ if( offset<pFile->mmapSize ){
+ if( offset+amt <= pFile->mmapSize ){
+ memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt);
+ return SQLITE_OK;
+ }else{
+ int nCopy = pFile->mmapSize - offset;
+ memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy);
+ pBuf = &((u8 *)pBuf)[nCopy];
+ amt -= nCopy;
+ offset += nCopy;
+ }
+ }
+#endif
+
got = seekAndRead(pFile, offset, pBuf, amt);
if( got==amt ){
return SQLITE_OK;
@@ -3186,6 +3283,23 @@ static int unixWrite(
}
#endif
+#if SQLITE_MAX_MMAP_SIZE>0
+ /* Deal with as much of this write request as possible by transfering
+ ** data from the memory mapping using memcpy(). */
+ if( offset<pFile->mmapSize ){
+ if( offset+amt <= pFile->mmapSize ){
+ memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt);
+ return SQLITE_OK;
+ }else{
+ int nCopy = pFile->mmapSize - offset;
+ memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy);
+ pBuf = &((u8 *)pBuf)[nCopy];
+ amt -= nCopy;
+ offset += nCopy;
+ }
+ }
+#endif
+
while( amt>0 && (wrote = seekAndWrite(pFile, offset, pBuf, amt))>0 ){
amt -= wrote;
offset += wrote;
@@ -3468,6 +3582,14 @@ static int unixTruncate(sqlite3_file *id, i64 nByte){
}
#endif
+ /* If the file was just truncated to a size smaller than the currently
+ ** mapped region, reduce the effective mapping size as well. SQLite will
+ ** use read() and write() to access data beyond this point from now on.
+ */
+ if( nByte<pFile->mmapSize ){
+ pFile->mmapSize = nByte;
+ }
+
return SQLITE_OK;
}
}
@@ -3556,6 +3678,19 @@ static int fcntlSizeHint(unixFile *pFile, i64 nByte){
}
}
+ if( pFile->mmapSizeMax>0 && nByte>pFile->mmapSize ){
+ int rc;
+ if( pFile->szChunk<=0 ){
+ if( robust_ftruncate(pFile->h, nByte) ){
+ pFile->lastErrno = errno;
+ return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath);
+ }
+ }
+
+ rc = unixMapfile(pFile, nByte);
+ return rc;
+ }
+
return SQLITE_OK;
}
@@ -3623,6 +3758,18 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){
}
return SQLITE_OK;
}
+ case SQLITE_FCNTL_MMAP_SIZE: {
+ i64 newLimit = *(i64*)pArg;
+ if( newLimit>sqlite3GlobalConfig.mxMmap ){
+ newLimit = sqlite3GlobalConfig.mxMmap;
+ }
+ *(i64*)pArg = pFile->mmapSizeMax;
+ if( newLimit>=0 ){
+ pFile->mmapSizeMax = newLimit;
+ if( newLimit<pFile->mmapSize ) pFile->mmapSize = newLimit;
+ }
+ return SQLITE_OK;
+ }
#ifdef SQLITE_DEBUG
/* The pager calls this method to signal that it has done
** a rollback and that the database is therefore unchanged and
@@ -3935,7 +4082,7 @@ static void unixShmPurge(unixFile *pFd){
sqlite3_mutex_free(p->mutex);
for(i=0; i<p->nRegion; i++){
if( p->h>=0 ){
- munmap(p->apRegion[i], p->szRegion);
+ osMunmap(p->apRegion[i], p->szRegion);
}else{
sqlite3_free(p->apRegion[i]);
}
@@ -4208,7 +4355,7 @@ static int unixShmMap(
while(pShmNode->nRegion<=iRegion){
void *pMem;
if( pShmNode->h>=0 ){
- pMem = mmap(0, szRegion,
+ pMem = osMmap(0, szRegion,
pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE,
MAP_SHARED, pShmNode->h, szRegion*(i64)pShmNode->nRegion
);
@@ -4426,6 +4573,236 @@ static int unixShmUnmap(
#endif /* #ifndef SQLITE_OMIT_WAL */
/*
+** If it is currently memory mapped, unmap file pFd.
+*/
+static void unixUnmapfile(unixFile *pFd){
+ assert( pFd->nFetchOut==0 );
+#if SQLITE_MAX_MMAP_SIZE>0
+ if( pFd->pMapRegion ){
+ osMunmap(pFd->pMapRegion, pFd->mmapSizeActual);
+ pFd->pMapRegion = 0;
+ pFd->mmapSize = 0;
+ pFd->mmapSizeActual = 0;
+ }
+#endif
+}
+
+#if SQLITE_MAX_MMAP_SIZE>0
+/*
+** Return the system page size.
+*/
+static int unixGetPagesize(void){
+#if HAVE_MREMAP
+ return 512;
+#elif defined(_BSD_SOURCE)
+ return getpagesize();
+#else
+ return (int)sysconf(_SC_PAGESIZE);
+#endif
+}
+#endif /* SQLITE_MAX_MMAP_SIZE>0 */
+
+#if SQLITE_MAX_MMAP_SIZE>0
+/*
+** Attempt to set the size of the memory mapping maintained by file
+** descriptor pFd to nNew bytes. Any existing mapping is discarded.
+**
+** If successful, this function sets the following variables:
+**
+** unixFile.pMapRegion
+** unixFile.mmapSize
+** unixFile.mmapSizeActual
+**
+** If unsuccessful, an error message is logged via sqlite3_log() and
+** the three variables above are zeroed. In this case SQLite should
+** continue accessing the database using the xRead() and xWrite()
+** methods.
+*/
+static void unixRemapfile(
+ unixFile *pFd, /* File descriptor object */
+ i64 nNew /* Required mapping size */
+){
+ const char *zErr = "mmap";
+ int h = pFd->h; /* File descriptor open on db file */
+ u8 *pOrig = (u8 *)pFd->pMapRegion; /* Pointer to current file mapping */
+ i64 nOrig = pFd->mmapSizeActual; /* Size of pOrig region in bytes */
+ u8 *pNew = 0; /* Location of new mapping */
+ int flags = PROT_READ; /* Flags to pass to mmap() */
+
+ assert( pFd->nFetchOut==0 );
+ assert( nNew>pFd->mmapSize );
+ assert( nNew<=pFd->mmapSizeMax );
+ assert( nNew>0 );
+ assert( pFd->mmapSizeActual>=pFd->mmapSize );
+ assert( MAP_FAILED!=0 );
+
+ if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE;
+
+ if( pOrig ){
+ const int szSyspage = unixGetPagesize();
+ i64 nReuse = (pFd->mmapSize & ~(szSyspage-1));
+ u8 *pReq = &pOrig[nReuse];
+
+ /* Unmap any pages of the existing mapping that cannot be reused. */
+ if( nReuse!=nOrig ){
+ osMunmap(pReq, nOrig-nReuse);
+ }
+
+#if HAVE_MREMAP
+ pNew = osMremap(pOrig, nReuse, nNew, MREMAP_MAYMOVE);
+ zErr = "mremap";
+#else
+ pNew = osMmap(pReq, nNew-nReuse, flags, MAP_SHARED, h, nReuse);
+ if( pNew!=MAP_FAILED ){
+ if( pNew!=pReq ){
+ osMunmap(pNew, nNew - nReuse);
+ pNew = 0;
+ }else{
+ pNew = pOrig;
+ }
+ }
+#endif
+
+ /* The attempt to extend the existing mapping failed. Free it. */
+ if( pNew==MAP_FAILED || pNew==0 ){
+ osMunmap(pOrig, nReuse);
+ }
+ }
+
+ /* If pNew is still NULL, try to create an entirely new mapping. */
+ if( pNew==0 ){
+ pNew = osMmap(0, nNew, flags, MAP_SHARED, h, 0);
+ }
+
+ if( pNew==MAP_FAILED ){
+ pNew = 0;
+ nNew = 0;
+ unixLogError(SQLITE_OK, zErr, pFd->zPath);
+
+ /* If the mmap() above failed, assume that all subsequent mmap() calls
+ ** will probably fail too. Fall back to using xRead/xWrite exclusively
+ ** in this case. */
+ pFd->mmapSizeMax = 0;
+ }
+ pFd->pMapRegion = (void *)pNew;
+ pFd->mmapSize = pFd->mmapSizeActual = nNew;
+}
+#endif
+
+/*
+** Memory map or remap the file opened by file-descriptor pFd (if the file
+** is already mapped, the existing mapping is replaced by the new). Or, if
+** there already exists a mapping for this file, and there are still
+** outstanding xFetch() references to it, this function is a no-op.
+**
+** If parameter nByte is non-negative, then it is the requested size of
+** the mapping to create. Otherwise, if nByte is less than zero, then the
+** requested size is the size of the file on disk. The actual size of the
+** created mapping is either the requested size or the value configured
+** using SQLITE_FCNTL_MMAP_LIMIT, whichever is smaller.
+**
+** SQLITE_OK is returned if no error occurs (even if the mapping is not
+** recreated as a result of outstanding references) or an SQLite error
+** code otherwise.
+*/
+static int unixMapfile(unixFile *pFd, i64 nByte){
+#if SQLITE_MAX_MMAP_SIZE>0
+ i64 nMap = nByte;
+ int rc;
+
+ assert( nMap>=0 || pFd->nFetchOut==0 );
+ if( pFd->nFetchOut>0 ) return SQLITE_OK;
+
+ if( nMap<0 ){
+ struct stat statbuf; /* Low-level file information */
+ rc = osFstat(pFd->h, &statbuf);
+ if( rc!=SQLITE_OK ){
+ return SQLITE_IOERR_FSTAT;
+ }
+ nMap = statbuf.st_size;
+ }
+ if( nMap>pFd->mmapSizeMax ){
+ nMap = pFd->mmapSizeMax;
+ }
+
+ if( nMap!=pFd->mmapSize ){
+ if( nMap>0 ){
+ unixRemapfile(pFd, nMap);
+ }else{
+ unixUnmapfile(pFd);
+ }
+ }
+#endif
+
+ return SQLITE_OK;
+}
+
+/*
+** If possible, return a pointer to a mapping of file fd starting at offset
+** iOff. The mapping must be valid for at least nAmt bytes.
+**
+** If such a pointer can be obtained, store it in *pp and return SQLITE_OK.
+** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK.
+** Finally, if an error does occur, return an SQLite error code. The final
+** value of *pp is undefined in this case.
+**
+** If this function does return a pointer, the caller must eventually
+** release the reference by calling unixUnfetch().
+*/
+static int unixFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){
+#if SQLITE_MAX_MMAP_SIZE>0
+ unixFile *pFd = (unixFile *)fd; /* The underlying database file */
+#endif
+ *pp = 0;
+
+#if SQLITE_MAX_MMAP_SIZE>0
+ if( pFd->mmapSizeMax>0 ){
+ if( pFd->pMapRegion==0 ){
+ int rc = unixMapfile(pFd, -1);
+ if( rc!=SQLITE_OK ) return rc;
+ }
+ if( pFd->mmapSize >= iOff+nAmt ){
+ *pp = &((u8 *)pFd->pMapRegion)[iOff];
+ pFd->nFetchOut++;
+ }
+ }
+#endif
+ return SQLITE_OK;
+}
+
+/*
+** If the third argument is non-NULL, then this function releases a
+** reference obtained by an earlier call to unixFetch(). The second
+** argument passed to this function must be the same as the corresponding
+** argument that was passed to the unixFetch() invocation.
+**
+** Or, if the third argument is NULL, then this function is being called
+** to inform the VFS layer that, according to POSIX, any existing mapping
+** may now be invalid and should be unmapped.
+*/
+static int unixUnfetch(sqlite3_file *fd, i64 iOff, void *p){
+ unixFile *pFd = (unixFile *)fd; /* The underlying database file */
+ UNUSED_PARAMETER(iOff);
+
+ /* If p==0 (unmap the entire file) then there must be no outstanding
+ ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference),
+ ** then there must be at least one outstanding. */
+ assert( (p==0)==(pFd->nFetchOut==0) );
+
+ /* If p!=0, it must match the iOff value. */
+ assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] );
+
+ if( p ){
+ pFd->nFetchOut--;
+ }else{
+ unixUnmapfile(pFd);
+ }
+
+ assert( pFd->nFetchOut>=0 );
+ return SQLITE_OK;
+}
+
+/*
** Here ends the implementation of all sqlite3_file methods.
**
********************** End sqlite3_file Methods *******************************
@@ -4483,7 +4860,9 @@ static const sqlite3_io_methods METHOD = { \
unixShmMap, /* xShmMap */ \
unixShmLock, /* xShmLock */ \
unixShmBarrier, /* xShmBarrier */ \
- unixShmUnmap /* xShmUnmap */ \
+ unixShmUnmap, /* xShmUnmap */ \
+ unixFetch, /* xFetch */ \
+ unixUnfetch, /* xUnfetch */ \
}; \
static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \
UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \
@@ -4500,7 +4879,7 @@ static const sqlite3_io_methods *(*const FINDER)(const char*,unixFile *p) \
IOMETHODS(
posixIoFinder, /* Finder function name */
posixIoMethods, /* sqlite3_io_methods object name */
- 2, /* shared memory is enabled */
+ 3, /* shared memory and mmap are enabled */
unixClose, /* xClose method */
unixLock, /* xLock method */
unixUnlock, /* xUnlock method */
@@ -4751,6 +5130,7 @@ static int fillInUnixFile(
pNew->pVfs = pVfs;
pNew->zPath = zFilename;
pNew->ctrlFlags = (u8)ctrlFlags;
+ pNew->mmapSizeMax = sqlite3GlobalConfig.mxMmap;
if( sqlite3_uri_boolean(((ctrlFlags & UNIXFILE_URI) ? zFilename : 0),
"psow", SQLITE_POWERSAFE_OVERWRITE) ){
pNew->ctrlFlags |= UNIXFILE_PSOW;
@@ -4895,6 +5275,7 @@ static int fillInUnixFile(
}else{
pNew->pMethod = pLockingStyle;
OpenCounter(+1);
+ verifyDbFile(pNew);
}
return rc;
}
@@ -6988,7 +7369,7 @@ int sqlite3_os_init(void){
/* Double-check that the aSyscall[] array has been constructed
** correctly. See ticket [bb3a86e890c8e96ab] */
- assert( ArraySize(aSyscall)==21 );
+ assert( ArraySize(aSyscall)==24 );
/* Register all VFSes defined in the aVfs[] array */
for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){