diff options
author | drh <drh@noemail.net> | 2009-09-03 16:23:44 +0000 |
---|---|---|
committer | drh <drh@noemail.net> | 2009-09-03 16:23:44 +0000 |
commit | 0c2694b744f33a5725fb0659b4f4c72d9e54b9da (patch) | |
tree | 1607800a5502d6bac69dc618c608a7872302fc1f /src/os_unix.c | |
parent | d7d385dde05651804f48880e8d793c554cf8f51a (diff) | |
download | sqlite-0c2694b744f33a5725fb0659b4f4c72d9e54b9da.tar.gz sqlite-0c2694b744f33a5725fb0659b4f4c72d9e54b9da.zip |
Add the "unix-wfl" VFS that does whole-file locking in order to help NFS
do better cache coherency.
FossilOrigin-Name: 2aeab80e5b84f5e94c5c99b4adeca805601c844b
Diffstat (limited to 'src/os_unix.c')
-rw-r--r-- | src/os_unix.c | 160 |
1 files changed, 118 insertions, 42 deletions
diff --git a/src/os_unix.c b/src/os_unix.c index 15bd8cc63..83e9ce231 100644 --- a/src/os_unix.c +++ b/src/os_unix.c @@ -195,6 +195,7 @@ struct unixFile { int lastErrno; /* The unix errno from the last I/O error */ void *lockingContext; /* Locking style specific state */ UnixUnusedFd *pUnused; /* Pre-allocated UnixUnusedFd */ + int fileFlags; /* Miscellanous flags */ #if SQLITE_ENABLE_LOCKING_STYLE int openFlags; /* The flags specified at open() */ #endif @@ -226,6 +227,11 @@ struct unixFile { }; /* +** The following macros define bits in unixFile.fileFlags +*/ +#define SQLITE_WHOLE_FILE_LOCKING 0x0001 /* Use whole-file locking */ + +/* ** Include code that is common to all os_*.c files */ #include "os_common.h" @@ -1150,6 +1156,62 @@ static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){ } /* +** Perform a file locking operation on a range of bytes in a file. +** The "op" parameter should be one of F_RDLCK, F_WRLCK, or F_UNLCK. +** Return 0 on success or -1 for failure. On failure, write the error +** code into *pErrcode. +** +** If the SQLITE_WHOLE_FILE_LOCKING bit is clear, then only lock +** the range of bytes on the locking page between SHARED_FIRST and +** SHARED_SIZE. If SQLITE_WHOLE_FILE_LOCKING is set, then lock all +** bytes from 0 up to but not including PENDING_BYTE, and all bytes +** that follow SHARED_FIRST. +** +** In other words, of SQLITE_WHOLE_FILE_LOCKING if false (the historical +** default case) then only lock a small range of bytes from SHARED_FIRST +** through SHARED_FIRST+SHARED_SIZE-1. But if SQLITE_WHOLE_FILE_LOCKING is +** true then lock every byte in the file except for PENDING_BYTE and +** RESERVED_BYTE. +** +** SQLITE_WHOLE_FILE_LOCKING=true overlaps SQLITE_WHOLE_FILE_LOCKING=false +** and so the locking schemes are compatible. One type of lock will +** effectively exclude the other type. The reason for using the +** SQLITE_WHOLE_FILE_LOCKING=true is that by indicating the full range +** of bytes to be read or written, we give hints to NFS to help it +** maintain cache coherency. On the other hand, whole file locking +** is slower, so we don't want to use it except for NFS. +*/ +static int rangeLock(unixFile *pFile, int op, int *pErrcode){ + struct flock lock; + int rc; + lock.l_type = op; + lock.l_start = SHARED_FIRST; + lock.l_whence = SEEK_SET; + if( (pFile->fileFlags & SQLITE_WHOLE_FILE_LOCKING)==0 ){ + lock.l_len = SHARED_SIZE; + rc = fcntl(pFile->h, F_SETLK, &lock); + *pErrcode = errno; + }else{ + lock.l_len = 0; + rc = fcntl(pFile->h, F_SETLK, &lock); + *pErrcode = errno; + if( NEVER(op==F_UNLCK) || rc!=(-1) ){ + lock.l_start = 0; + lock.l_len = PENDING_BYTE; + rc = fcntl(pFile->h, F_SETLK, &lock); + if( ALWAYS(op!=F_UNLCK) && rc==(-1) ){ + *pErrcode = errno; + lock.l_type = F_UNLCK; + lock.l_start = SHARED_FIRST; + lock.l_len = 0; + fcntl(pFile->h, F_SETLK, &lock); + } + } + } + return rc; +} + +/* ** Lock the file with the lock specified by parameter locktype - one ** of the following: ** @@ -1217,6 +1279,7 @@ static int unixLock(sqlite3_file *id, int locktype){ struct unixLockInfo *pLock = pFile->pLock; struct flock lock; int s; + int tErrno; assert( pFile ); OSTRACE7("LOCK %d %s was %s(%s,%d) pid=%d\n", pFile->h, @@ -1233,7 +1296,10 @@ static int unixLock(sqlite3_file *id, int locktype){ return SQLITE_OK; } - /* Make sure the locking sequence is correct + /* Make sure the locking sequence is correct. + ** (1) We never move from unlocked to anything higher than shared lock. + ** (2) SQLite never explicitly requests a pendig lock. + ** (3) A shared lock is always held when a reserve lock is requested. */ assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK ); assert( locktype!=PENDING_LOCK ); @@ -1277,14 +1343,13 @@ static int unixLock(sqlite3_file *id, int locktype){ goto end_lock; } - lock.l_len = 1L; - - lock.l_whence = SEEK_SET; /* A PENDING lock is needed before acquiring a SHARED lock and before ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will ** be released. */ + lock.l_len = 1L; + lock.l_whence = SEEK_SET; if( locktype==SHARED_LOCK || (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK) ){ @@ -1292,7 +1357,7 @@ static int unixLock(sqlite3_file *id, int locktype){ lock.l_start = PENDING_BYTE; s = fcntl(pFile->h, F_SETLK, &lock); if( s==(-1) ){ - int tErrno = errno; + tErrno = errno; rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); if( IS_LOCK_ERROR(rc) ){ pFile->lastErrno = tErrno; @@ -1306,16 +1371,12 @@ static int unixLock(sqlite3_file *id, int locktype){ ** operating system calls for the specified lock. */ if( locktype==SHARED_LOCK ){ - int tErrno = 0; assert( pLock->cnt==0 ); assert( pLock->locktype==0 ); /* Now get the read-lock */ - lock.l_start = SHARED_FIRST; - lock.l_len = SHARED_SIZE; - if( (s = fcntl(pFile->h, F_SETLK, &lock))==(-1) ){ - tErrno = errno; - } + s = rangeLock(pFile, F_RDLCK, &tErrno); + /* Drop the temporary PENDING lock */ lock.l_start = PENDING_BYTE; lock.l_len = 1L; @@ -1355,17 +1416,16 @@ static int unixLock(sqlite3_file *id, int locktype){ switch( locktype ){ case RESERVED_LOCK: lock.l_start = RESERVED_BYTE; + s = fcntl(pFile->h, F_SETLK, &lock); + tErrno = errno; break; case EXCLUSIVE_LOCK: - lock.l_start = SHARED_FIRST; - lock.l_len = SHARED_SIZE; + s = rangeLock(pFile, F_WRLCK, &tErrno); break; default: assert(0); } - s = fcntl(pFile->h, F_SETLK, &lock); if( s==(-1) ){ - int tErrno = errno; rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); if( IS_LOCK_ERROR(rc) ){ pFile->lastErrno = tErrno; @@ -1457,11 +1517,12 @@ static void setPendingFd(unixFile *pFile){ ** the requested locking level, this routine is a no-op. */ static int unixUnlock(sqlite3_file *id, int locktype){ - struct unixLockInfo *pLock; - struct flock lock; - int rc = SQLITE_OK; - unixFile *pFile = (unixFile*)id; - int h; + unixFile *pFile = (unixFile*)id; /* The open file */ + struct unixLockInfo *pLock; /* Structure describing current lock state */ + struct flock lock; /* Information passed into fcntl() */ + int rc = SQLITE_OK; /* Return code from this interface */ + int h; /* The underlying file descriptor */ + int tErrno; /* Error code from system call errors */ assert( pFile ); OSTRACE7("UNLOCK %d %d was %d(%d,%d) pid=%d\n", pFile->h, locktype, @@ -1501,12 +1562,7 @@ static int unixUnlock(sqlite3_file *id, int locktype){ if( locktype==SHARED_LOCK ){ - lock.l_type = F_RDLCK; - lock.l_whence = SEEK_SET; - lock.l_start = SHARED_FIRST; - lock.l_len = SHARED_SIZE; - if( fcntl(h, F_SETLK, &lock)==(-1) ){ - int tErrno = errno; + if( rangeLock(pFile, F_RDLCK, &tErrno)==(-1) ){ rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_RDLOCK); if( IS_LOCK_ERROR(rc) ){ pFile->lastErrno = tErrno; @@ -1521,7 +1577,7 @@ static int unixUnlock(sqlite3_file *id, int locktype){ if( fcntl(h, F_SETLK, &lock)!=(-1) ){ pLock->locktype = SHARED_LOCK; }else{ - int tErrno = errno; + tErrno = errno; rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); if( IS_LOCK_ERROR(rc) ){ pFile->lastErrno = tErrno; @@ -1547,7 +1603,7 @@ static int unixUnlock(sqlite3_file *id, int locktype){ if( fcntl(h, F_SETLK, &lock)!=(-1) ){ pLock->locktype = NO_LOCK; }else{ - int tErrno = errno; + tErrno = errno; rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); if( IS_LOCK_ERROR(rc) ){ pFile->lastErrno = tErrno; @@ -1696,7 +1752,7 @@ static int nolockClose(sqlite3_file *id) { /****************************************************************************** ************************* Begin dot-file Locking ****************************** ** -** The dotfile locking implementation uses the existing of separate lock +** The dotfile locking implementation uses the existance of separate lock ** files in order to control access to the database. This works on just ** about every filesystem imaginable. But there are serious downsides: ** @@ -3199,11 +3255,11 @@ static const sqlite3_io_methods METHOD = { \ unixSectorSize, /* xSectorSize */ \ unixDeviceCharacteristics /* xDeviceCapabilities */ \ }; \ -static const sqlite3_io_methods *FINDER##Impl(const char *z, int h){ \ - UNUSED_PARAMETER(z); UNUSED_PARAMETER(h); \ +static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \ + UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \ return &METHOD; \ } \ -static const sqlite3_io_methods *(*const FINDER)(const char*,int) \ +static const sqlite3_io_methods *(*const FINDER)(const char*,unixFile *p) \ = FINDER##Impl; /* @@ -3270,6 +3326,23 @@ IOMETHODS( #endif /* +** The "Whole File Locking" finder returns the same set of methods as +** the posix locking finder. But it also sets the SQLITE_WHOLE_FILE_LOCKING +** flag to force the posix advisory locks to cover the whole file instead +** of just a small span of bytes near the 1GiB boundary. Whole File Locking +** is useful on NFS-mounted files since it helps NFS to maintain cache +** coherency. But it is a detriment to other filesystems since it runs +** slower. +*/ +static const sqlite3_io_methods *posixWflIoFinderImpl(const char*z, unixFile*p){ + UNUSED_PARAMETER(z); + p->fileFlags = SQLITE_WHOLE_FILE_LOCKING; + return &posixIoMethods; +} +static const sqlite3_io_methods + *(*const posixWflIoFinder)(const char*,unixFile *p) = posixWflIoFinderImpl; + +/* ** The proxy locking method is a "super-method" in the sense that it ** opens secondary file descriptors for the conch and lock files and ** it uses proxy, dot-file, AFP, and flock() locking methods on those @@ -3304,7 +3377,7 @@ IOMETHODS( */ static const sqlite3_io_methods *autolockIoFinderImpl( const char *filePath, /* name of the database file */ - int fd /* file descriptor open on the database file */ + unixFile *pNew /* open file object for the database file */ ){ static const struct Mapping { const char *zFilesystem; /* Filesystem type name */ @@ -3349,14 +3422,15 @@ static const sqlite3_io_methods *autolockIoFinderImpl( lockInfo.l_start = 0; lockInfo.l_whence = SEEK_SET; lockInfo.l_type = F_RDLCK; - if( fcntl(fd, F_GETLK, &lockInfo)!=-1 ) { + if( fcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) { + pNew->fileFlags = SQLITE_WHOLE_FILE_LOCKING; return &posixIoMethods; }else{ return &dotlockIoMethods; } } -static const sqlite3_io_methods *(*const autolockIoFinder)(const char*,int) - = autolockIoFinderImpl; +static const sqlite3_io_methods + *(*const autolockIoFinder)(const char*,unixFile*) = autolockIoFinderImpl; #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ @@ -3370,7 +3444,7 @@ static const sqlite3_io_methods *(*const autolockIoFinder)(const char*,int) */ static const sqlite3_io_methods *autolockIoFinderImpl( const char *filePath, /* name of the database file */ - int fd /* file descriptor open on the database file */ + unixFile *pNew /* the open file object */ ){ struct flock lockInfo; @@ -3387,21 +3461,21 @@ static const sqlite3_io_methods *autolockIoFinderImpl( lockInfo.l_start = 0; lockInfo.l_whence = SEEK_SET; lockInfo.l_type = F_RDLCK; - if( fcntl(fd, F_GETLK, &lockInfo)!=-1 ) { + if( fcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) { return &posixIoMethods; }else{ return &semIoMethods; } } -static const sqlite3_io_methods *(*const autolockIoFinder)(const char*,int) - = autolockIoFinderImpl; +static const sqlite3_io_methods + *(*const autolockIoFinder)(const char*,unixFile*) = autolockIoFinderImpl; #endif /* OS_VXWORKS && SQLITE_ENABLE_LOCKING_STYLE */ /* ** An abstract type for a pointer to a IO method finder function: */ -typedef const sqlite3_io_methods *(*finder_type)(const char*,int); +typedef const sqlite3_io_methods *(*finder_type)(const char*,unixFile*); /**************************************************************************** @@ -3439,6 +3513,7 @@ static int fillInUnixFile( pNew->h = h; pNew->dirfd = dirfd; SET_THREADID(pNew); + pNew->fileFlags = 0; #if OS_VXWORKS pNew->pId = vxworksFindFileId(zFilename); @@ -3451,7 +3526,7 @@ static int fillInUnixFile( if( noLock ){ pLockingStyle = &nolockIoMethods; }else{ - pLockingStyle = (**(finder_type*)pVfs->pAppData)(zFilename, h); + pLockingStyle = (**(finder_type*)pVfs->pAppData)(zFilename, pNew); #if SQLITE_ENABLE_LOCKING_STYLE /* Cache zFilename in the locking context (AFP and dotlock override) for ** proxyLock activation is possible (remote proxy is based on db name) @@ -5266,6 +5341,7 @@ int sqlite3_os_init(void){ #endif UNIXVFS("unix-none", nolockIoFinder ), UNIXVFS("unix-dotfile", dotlockIoFinder ), + UNIXVFS("unix-wfl", posixWflIoFinder ), #if OS_VXWORKS UNIXVFS("unix-namedsem", semIoFinder ), #endif |