diff options
author | drh <drh@noemail.net> | 2012-01-05 13:02:36 +0000 |
---|---|---|
committer | drh <drh@noemail.net> | 2012-01-05 13:02:36 +0000 |
commit | 54cced185c6affa94c321847a18a0c72185c8fc7 (patch) | |
tree | ac91465d586e4cf42dac3b3365dd07da0091ca53 /src/os_unix.c | |
parent | f004e1d69ed564e38dea5d870fd75782f3819831 (diff) | |
parent | e0711b47b12cf0312690c7240d6ee09b67ce4822 (diff) | |
download | sqlite-54cced185c6affa94c321847a18a0c72185c8fc7.tar.gz sqlite-54cced185c6affa94c321847a18a0c72185c8fc7.zip |
Merge all of the latest trunk changes into the sessions branch.
FossilOrigin-Name: a9bcb432f58b96f079a73c456efd4851c582221e
Diffstat (limited to 'src/os_unix.c')
-rw-r--r-- | src/os_unix.c | 174 |
1 files changed, 113 insertions, 61 deletions
diff --git a/src/os_unix.c b/src/os_unix.c index 0ea6daf27..4f263bcdd 100644 --- a/src/os_unix.c +++ b/src/os_unix.c @@ -123,6 +123,7 @@ #include <sys/mman.h> #endif + #if SQLITE_ENABLE_LOCKING_STYLE # include <sys/ioctl.h> # if OS_VXWORKS @@ -206,6 +207,7 @@ struct UnixUnusedFd { typedef struct unixFile unixFile; struct unixFile { sqlite3_io_methods const *pMethod; /* Always the first entry */ + sqlite3_vfs *pVfs; /* The VFS that created this unixFile */ unixInodeInfo *pInode; /* Info about locks on this inode */ int h; /* The file descriptor */ unsigned char eFileLock; /* The type of lock held on this fd */ @@ -257,6 +259,7 @@ struct unixFile { #else # define UNIXFILE_DIRSYNC 0x00 #endif +#define UNIXFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */ /* ** Include code that is common to all os_*.c files @@ -407,6 +410,12 @@ static struct unix_syscall { { "openDirectory", (sqlite3_syscall_ptr)openDirectory, 0 }, #define osOpenDirectory ((int(*)(const char*,int*))aSyscall[17].pCurrent) + { "mkdir", (sqlite3_syscall_ptr)mkdir, 0 }, +#define osMkdir ((int(*)(const char*,mode_t))aSyscall[18].pCurrent) + + { "rmdir", (sqlite3_syscall_ptr)rmdir, 0 }, +#define osRmdir ((int(*)(const char*))aSyscall[19].pCurrent) + }; /* End of the overrideable system calls */ /* @@ -1845,8 +1854,8 @@ static int nolockClose(sqlite3_file *id) { ************************* Begin dot-file Locking ****************************** ** ** The dotfile locking implementation uses the existance of separate lock -** files in order to control access to the database. This works on just -** about every filesystem imaginable. But there are serious downsides: +** files (really a directory) to control access to the database. This works +** on just about every filesystem imaginable. But there are serious downsides: ** ** (1) There is zero concurrency. A single reader blocks all other ** connections from reading or writing the database. @@ -1857,15 +1866,15 @@ static int nolockClose(sqlite3_file *id) { ** Nevertheless, a dotlock is an appropriate locking mode for use if no ** other locking strategy is available. ** -** Dotfile locking works by creating a file in the same directory as the -** database and with the same name but with a ".lock" extension added. -** The existance of a lock file implies an EXCLUSIVE lock. All other lock -** types (SHARED, RESERVED, PENDING) are mapped into EXCLUSIVE. +** Dotfile locking works by creating a subdirectory in the same directory as +** the database and with the same name but with a ".lock" extension added. +** The existance of a lock directory implies an EXCLUSIVE lock. All other +** lock types (SHARED, RESERVED, PENDING) are mapped into EXCLUSIVE. */ /* ** The file suffix added to the data base filename in order to create the -** lock file. +** lock directory. */ #define DOTLOCK_SUFFIX ".lock" @@ -1932,7 +1941,6 @@ static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) { */ static int dotlockLock(sqlite3_file *id, int eFileLock) { unixFile *pFile = (unixFile*)id; - int fd; char *zLockFile = (char *)pFile->lockingContext; int rc = SQLITE_OK; @@ -1952,9 +1960,9 @@ static int dotlockLock(sqlite3_file *id, int eFileLock) { } /* grab an exclusive lock */ - fd = robust_open(zLockFile,O_RDONLY|O_CREAT|O_EXCL,0600); - if( fd<0 ){ - /* failed to open/create the file, someone else may have stolen the lock */ + rc = osMkdir(zLockFile, 0777); + if( rc<0 ){ + /* failed to open/create the lock directory */ int tErrno = errno; if( EEXIST == tErrno ){ rc = SQLITE_BUSY; @@ -1966,7 +1974,6 @@ static int dotlockLock(sqlite3_file *id, int eFileLock) { } return rc; } - robust_close(pFile, fd, __LINE__); /* got it, set the type and return ok */ pFile->eFileLock = eFileLock; @@ -1985,6 +1992,7 @@ static int dotlockLock(sqlite3_file *id, int eFileLock) { static int dotlockUnlock(sqlite3_file *id, int eFileLock) { unixFile *pFile = (unixFile*)id; char *zLockFile = (char *)pFile->lockingContext; + int rc; assert( pFile ); OSTRACE(("UNLOCK %d %d was %d pid=%d (dotlock)\n", pFile->h, eFileLock, @@ -2006,9 +2014,11 @@ static int dotlockUnlock(sqlite3_file *id, int eFileLock) { /* To fully unlock the database, delete the lock file */ assert( eFileLock==NO_LOCK ); - if( osUnlink(zLockFile) ){ - int rc = 0; + rc = osRmdir(zLockFile); + if( rc<0 && errno==ENOTDIR ) rc = osUnlink(zLockFile); + if( rc<0 ){ int tErrno = errno; + rc = 0; if( ENOENT != tErrno ){ rc = SQLITE_IOERR_UNLOCK; } @@ -2944,35 +2954,48 @@ static int nfsUnlock(sqlite3_file *id, int eFileLock){ */ static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){ int got; + int prior = 0; #if (!defined(USE_PREAD) && !defined(USE_PREAD64)) i64 newOffset; #endif TIMER_START; + do{ #if defined(USE_PREAD) - do{ got = osPread(id->h, pBuf, cnt, offset); }while( got<0 && errno==EINTR ); - SimulateIOError( got = -1 ); + got = osPread(id->h, pBuf, cnt, offset); + SimulateIOError( got = -1 ); #elif defined(USE_PREAD64) - do{ got = osPread64(id->h, pBuf, cnt, offset); }while( got<0 && errno==EINTR); - SimulateIOError( got = -1 ); + got = osPread64(id->h, pBuf, cnt, offset); + SimulateIOError( got = -1 ); #else - newOffset = lseek(id->h, offset, SEEK_SET); - SimulateIOError( newOffset-- ); - if( newOffset!=offset ){ - if( newOffset == -1 ){ - ((unixFile*)id)->lastErrno = errno; - }else{ - ((unixFile*)id)->lastErrno = 0; + newOffset = lseek(id->h, offset, SEEK_SET); + SimulateIOError( newOffset-- ); + if( newOffset!=offset ){ + if( newOffset == -1 ){ + ((unixFile*)id)->lastErrno = errno; + }else{ + ((unixFile*)id)->lastErrno = 0; + } + return -1; } - return -1; - } - do{ got = osRead(id->h, pBuf, cnt); }while( got<0 && errno==EINTR ); + got = osRead(id->h, pBuf, cnt); #endif + if( got==cnt ) break; + if( got<0 ){ + if( errno==EINTR ){ got = 1; continue; } + prior = 0; + ((unixFile*)id)->lastErrno = errno; + break; + }else if( got>0 ){ + cnt -= got; + offset += got; + prior += got; + pBuf = (void*)(got + (char*)pBuf); + } + }while( got>0 ); TIMER_END; - if( got<0 ){ - ((unixFile*)id)->lastErrno = errno; - } - OSTRACE(("READ %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED)); - return got; + OSTRACE(("READ %-3d %5d %7lld %llu\n", + id->h, got+prior, offset-prior, TIMER_ELAPSED)); + return got+prior; } /* @@ -3478,6 +3501,22 @@ static int fcntlSizeHint(unixFile *pFile, i64 nByte){ } /* +** If *pArg is inititially negative then this is a query. Set *pArg to +** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set. +** +** If *pArg is 0 or 1, then clear or set the mask bit of pFile->ctrlFlags. +*/ +static void unixModeBit(unixFile *pFile, unsigned char mask, int *pArg){ + if( *pArg<0 ){ + *pArg = (pFile->ctrlFlags & mask)!=0; + }else if( (*pArg)==0 ){ + pFile->ctrlFlags &= ~mask; + }else{ + pFile->ctrlFlags |= mask; + } +} + +/* ** Information and control of an open file handle. */ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ @@ -3503,14 +3542,15 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ return rc; } case SQLITE_FCNTL_PERSIST_WAL: { - int bPersist = *(int*)pArg; - if( bPersist<0 ){ - *(int*)pArg = (pFile->ctrlFlags & UNIXFILE_PERSIST_WAL)!=0; - }else if( bPersist==0 ){ - pFile->ctrlFlags &= ~UNIXFILE_PERSIST_WAL; - }else{ - pFile->ctrlFlags |= UNIXFILE_PERSIST_WAL; - } + unixModeBit(pFile, UNIXFILE_PERSIST_WAL, (int*)pArg); + return SQLITE_OK; + } + case SQLITE_FCNTL_POWERSAFE_OVERWRITE: { + unixModeBit(pFile, UNIXFILE_PSOW, (int*)pArg); + return SQLITE_OK; + } + case SQLITE_FCNTL_VFSNAME: { + *(char**)pArg = sqlite3_mprintf("%s", pFile->pVfs->zName); return SQLITE_OK; } #ifndef NDEBUG @@ -3530,9 +3570,6 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ return proxyFileControl(id,op,pArg); } #endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) */ - case SQLITE_FCNTL_SYNC_OMITTED: { - return SQLITE_OK; /* A no-op */ - } } return SQLITE_NOTFOUND; } @@ -3547,17 +3584,31 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ ** a database and its journal file) that the sector size will be the ** same for both. */ -static int unixSectorSize(sqlite3_file *NotUsed){ - UNUSED_PARAMETER(NotUsed); +static int unixSectorSize(sqlite3_file *pFile){ + (void)pFile; return SQLITE_DEFAULT_SECTOR_SIZE; } /* -** Return the device characteristics for the file. This is always 0 for unix. +** Return the device characteristics for the file. +** +** This VFS is set up to return SQLITE_IOCAP_POWERSAFE_OVERWRITE by default. +** However, that choice is contraversial since technically the underlying +** file system does not always provide powersafe overwrites. (In other +** words, after a power-loss event, parts of the file that were never +** written might end up being altered.) However, non-PSOW behavior is very, +** very rare. And asserting PSOW makes a large reduction in the amount +** of required I/O for journaling, since a lot of padding is eliminated. +** Hence, while POWERSAFE_OVERWRITE is on by default, there is a file-control +** available to turn it off and URI query parameter available to turn it off. */ -static int unixDeviceCharacteristics(sqlite3_file *NotUsed){ - UNUSED_PARAMETER(NotUsed); - return 0; +static int unixDeviceCharacteristics(sqlite3_file *id){ + unixFile *p = (unixFile*)id; + if( p->ctrlFlags & UNIXFILE_PSOW ){ + return SQLITE_IOCAP_POWERSAFE_OVERWRITE; + }else{ + return 0; + } } #ifndef SQLITE_OMIT_WAL @@ -3812,9 +3863,9 @@ static int unixOpenSharedMemory(unixFile *pDbFd){ } #ifdef SQLITE_SHM_DIRECTORY - nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 30; + nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 31; #else - nShmFilename = 5 + (int)strlen(pDbFd->zPath); + nShmFilename = 6 + (int)strlen(pDbFd->zPath); #endif pShmNode = sqlite3_malloc( sizeof(*pShmNode) + nShmFilename ); if( pShmNode==0 ){ @@ -3841,10 +3892,8 @@ static int unixOpenSharedMemory(unixFile *pDbFd){ } if( pInode->bProcessLock==0 ){ - const char *zRO; int openFlags = O_RDWR | O_CREAT; - zRO = sqlite3_uri_parameter(pDbFd->zPath, "readonly_shm"); - if( zRO && sqlite3GetBoolean(zRO) ){ + if( sqlite3_uri_boolean(pDbFd->zPath, "readonly_shm", 0) ){ openFlags = O_RDONLY; pShmNode->isReadonly = 1; } @@ -4540,11 +4589,14 @@ static int fillInUnixFile( OSTRACE(("OPEN %-3d %s\n", h, zFilename)); pNew->h = h; + pNew->pVfs = pVfs; pNew->zPath = zFilename; + pNew->ctrlFlags = 0; + if( sqlite3_uri_boolean(zFilename, "psow", SQLITE_POWERSAFE_OVERWRITE) ){ + pNew->ctrlFlags |= UNIXFILE_PSOW; + } if( memcmp(pVfs->zName,"unix-excl",10)==0 ){ - pNew->ctrlFlags = UNIXFILE_EXCL; - }else{ - pNew->ctrlFlags = 0; + pNew->ctrlFlags |= UNIXFILE_EXCL; } if( isReadOnly ){ pNew->ctrlFlags |= UNIXFILE_RDONLY; @@ -4879,7 +4931,7 @@ static int findCreateFileMode( */ nDb = sqlite3Strlen30(zPath) - 1; #ifdef SQLITE_ENABLE_8_3_NAMES - while( nDb>0 && !sqlite3Isalnum(zPath[nDb]) ) nDb--; + while( nDb>0 && sqlite3Isalnum(zPath[nDb]) ) nDb--; if( nDb==0 || zPath[nDb]!='-' ) return SQLITE_OK; #else while( zPath[nDb]!='-' ){ @@ -5715,7 +5767,7 @@ static int proxyCreateLockPath(const char *lockPath){ if( i-start>2 || (i-start==1 && buf[start] != '.' && buf[start] != '/') || (i-start==2 && buf[start] != '.' && buf[start+1] != '.') ){ buf[i]='\0'; - if( mkdir(buf, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){ + if( osMkdir(buf, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){ int err=errno; if( err!=EEXIST ) { OSTRACE(("CREATELOCKPATH FAILED creating %s, " @@ -6751,7 +6803,7 @@ int sqlite3_os_init(void){ /* Double-check that the aSyscall[] array has been constructed ** correctly. See ticket [bb3a86e890c8e96ab] */ - assert( ArraySize(aSyscall)==18 ); + assert( ArraySize(aSyscall)==20 ); /* Register all VFSes defined in the aVfs[] array */ for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){ |