aboutsummaryrefslogtreecommitdiff
path: root/src/os_unix.c
diff options
context:
space:
mode:
authordrh <drh@noemail.net>2012-01-05 13:02:36 +0000
committerdrh <drh@noemail.net>2012-01-05 13:02:36 +0000
commit54cced185c6affa94c321847a18a0c72185c8fc7 (patch)
treeac91465d586e4cf42dac3b3365dd07da0091ca53 /src/os_unix.c
parentf004e1d69ed564e38dea5d870fd75782f3819831 (diff)
parente0711b47b12cf0312690c7240d6ee09b67ce4822 (diff)
downloadsqlite-54cced185c6affa94c321847a18a0c72185c8fc7.tar.gz
sqlite-54cced185c6affa94c321847a18a0c72185c8fc7.zip
Merge all of the latest trunk changes into the sessions branch.
FossilOrigin-Name: a9bcb432f58b96f079a73c456efd4851c582221e
Diffstat (limited to 'src/os_unix.c')
-rw-r--r--src/os_unix.c174
1 files changed, 113 insertions, 61 deletions
diff --git a/src/os_unix.c b/src/os_unix.c
index 0ea6daf27..4f263bcdd 100644
--- a/src/os_unix.c
+++ b/src/os_unix.c
@@ -123,6 +123,7 @@
#include <sys/mman.h>
#endif
+
#if SQLITE_ENABLE_LOCKING_STYLE
# include <sys/ioctl.h>
# if OS_VXWORKS
@@ -206,6 +207,7 @@ struct UnixUnusedFd {
typedef struct unixFile unixFile;
struct unixFile {
sqlite3_io_methods const *pMethod; /* Always the first entry */
+ sqlite3_vfs *pVfs; /* The VFS that created this unixFile */
unixInodeInfo *pInode; /* Info about locks on this inode */
int h; /* The file descriptor */
unsigned char eFileLock; /* The type of lock held on this fd */
@@ -257,6 +259,7 @@ struct unixFile {
#else
# define UNIXFILE_DIRSYNC 0x00
#endif
+#define UNIXFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */
/*
** Include code that is common to all os_*.c files
@@ -407,6 +410,12 @@ static struct unix_syscall {
{ "openDirectory", (sqlite3_syscall_ptr)openDirectory, 0 },
#define osOpenDirectory ((int(*)(const char*,int*))aSyscall[17].pCurrent)
+ { "mkdir", (sqlite3_syscall_ptr)mkdir, 0 },
+#define osMkdir ((int(*)(const char*,mode_t))aSyscall[18].pCurrent)
+
+ { "rmdir", (sqlite3_syscall_ptr)rmdir, 0 },
+#define osRmdir ((int(*)(const char*))aSyscall[19].pCurrent)
+
}; /* End of the overrideable system calls */
/*
@@ -1845,8 +1854,8 @@ static int nolockClose(sqlite3_file *id) {
************************* Begin dot-file Locking ******************************
**
** The dotfile locking implementation uses the existance of separate lock
-** files in order to control access to the database. This works on just
-** about every filesystem imaginable. But there are serious downsides:
+** files (really a directory) to control access to the database. This works
+** on just about every filesystem imaginable. But there are serious downsides:
**
** (1) There is zero concurrency. A single reader blocks all other
** connections from reading or writing the database.
@@ -1857,15 +1866,15 @@ static int nolockClose(sqlite3_file *id) {
** Nevertheless, a dotlock is an appropriate locking mode for use if no
** other locking strategy is available.
**
-** Dotfile locking works by creating a file in the same directory as the
-** database and with the same name but with a ".lock" extension added.
-** The existance of a lock file implies an EXCLUSIVE lock. All other lock
-** types (SHARED, RESERVED, PENDING) are mapped into EXCLUSIVE.
+** Dotfile locking works by creating a subdirectory in the same directory as
+** the database and with the same name but with a ".lock" extension added.
+** The existance of a lock directory implies an EXCLUSIVE lock. All other
+** lock types (SHARED, RESERVED, PENDING) are mapped into EXCLUSIVE.
*/
/*
** The file suffix added to the data base filename in order to create the
-** lock file.
+** lock directory.
*/
#define DOTLOCK_SUFFIX ".lock"
@@ -1932,7 +1941,6 @@ static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) {
*/
static int dotlockLock(sqlite3_file *id, int eFileLock) {
unixFile *pFile = (unixFile*)id;
- int fd;
char *zLockFile = (char *)pFile->lockingContext;
int rc = SQLITE_OK;
@@ -1952,9 +1960,9 @@ static int dotlockLock(sqlite3_file *id, int eFileLock) {
}
/* grab an exclusive lock */
- fd = robust_open(zLockFile,O_RDONLY|O_CREAT|O_EXCL,0600);
- if( fd<0 ){
- /* failed to open/create the file, someone else may have stolen the lock */
+ rc = osMkdir(zLockFile, 0777);
+ if( rc<0 ){
+ /* failed to open/create the lock directory */
int tErrno = errno;
if( EEXIST == tErrno ){
rc = SQLITE_BUSY;
@@ -1966,7 +1974,6 @@ static int dotlockLock(sqlite3_file *id, int eFileLock) {
}
return rc;
}
- robust_close(pFile, fd, __LINE__);
/* got it, set the type and return ok */
pFile->eFileLock = eFileLock;
@@ -1985,6 +1992,7 @@ static int dotlockLock(sqlite3_file *id, int eFileLock) {
static int dotlockUnlock(sqlite3_file *id, int eFileLock) {
unixFile *pFile = (unixFile*)id;
char *zLockFile = (char *)pFile->lockingContext;
+ int rc;
assert( pFile );
OSTRACE(("UNLOCK %d %d was %d pid=%d (dotlock)\n", pFile->h, eFileLock,
@@ -2006,9 +2014,11 @@ static int dotlockUnlock(sqlite3_file *id, int eFileLock) {
/* To fully unlock the database, delete the lock file */
assert( eFileLock==NO_LOCK );
- if( osUnlink(zLockFile) ){
- int rc = 0;
+ rc = osRmdir(zLockFile);
+ if( rc<0 && errno==ENOTDIR ) rc = osUnlink(zLockFile);
+ if( rc<0 ){
int tErrno = errno;
+ rc = 0;
if( ENOENT != tErrno ){
rc = SQLITE_IOERR_UNLOCK;
}
@@ -2944,35 +2954,48 @@ static int nfsUnlock(sqlite3_file *id, int eFileLock){
*/
static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){
int got;
+ int prior = 0;
#if (!defined(USE_PREAD) && !defined(USE_PREAD64))
i64 newOffset;
#endif
TIMER_START;
+ do{
#if defined(USE_PREAD)
- do{ got = osPread(id->h, pBuf, cnt, offset); }while( got<0 && errno==EINTR );
- SimulateIOError( got = -1 );
+ got = osPread(id->h, pBuf, cnt, offset);
+ SimulateIOError( got = -1 );
#elif defined(USE_PREAD64)
- do{ got = osPread64(id->h, pBuf, cnt, offset); }while( got<0 && errno==EINTR);
- SimulateIOError( got = -1 );
+ got = osPread64(id->h, pBuf, cnt, offset);
+ SimulateIOError( got = -1 );
#else
- newOffset = lseek(id->h, offset, SEEK_SET);
- SimulateIOError( newOffset-- );
- if( newOffset!=offset ){
- if( newOffset == -1 ){
- ((unixFile*)id)->lastErrno = errno;
- }else{
- ((unixFile*)id)->lastErrno = 0;
+ newOffset = lseek(id->h, offset, SEEK_SET);
+ SimulateIOError( newOffset-- );
+ if( newOffset!=offset ){
+ if( newOffset == -1 ){
+ ((unixFile*)id)->lastErrno = errno;
+ }else{
+ ((unixFile*)id)->lastErrno = 0;
+ }
+ return -1;
}
- return -1;
- }
- do{ got = osRead(id->h, pBuf, cnt); }while( got<0 && errno==EINTR );
+ got = osRead(id->h, pBuf, cnt);
#endif
+ if( got==cnt ) break;
+ if( got<0 ){
+ if( errno==EINTR ){ got = 1; continue; }
+ prior = 0;
+ ((unixFile*)id)->lastErrno = errno;
+ break;
+ }else if( got>0 ){
+ cnt -= got;
+ offset += got;
+ prior += got;
+ pBuf = (void*)(got + (char*)pBuf);
+ }
+ }while( got>0 );
TIMER_END;
- if( got<0 ){
- ((unixFile*)id)->lastErrno = errno;
- }
- OSTRACE(("READ %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED));
- return got;
+ OSTRACE(("READ %-3d %5d %7lld %llu\n",
+ id->h, got+prior, offset-prior, TIMER_ELAPSED));
+ return got+prior;
}
/*
@@ -3478,6 +3501,22 @@ static int fcntlSizeHint(unixFile *pFile, i64 nByte){
}
/*
+** If *pArg is inititially negative then this is a query. Set *pArg to
+** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set.
+**
+** If *pArg is 0 or 1, then clear or set the mask bit of pFile->ctrlFlags.
+*/
+static void unixModeBit(unixFile *pFile, unsigned char mask, int *pArg){
+ if( *pArg<0 ){
+ *pArg = (pFile->ctrlFlags & mask)!=0;
+ }else if( (*pArg)==0 ){
+ pFile->ctrlFlags &= ~mask;
+ }else{
+ pFile->ctrlFlags |= mask;
+ }
+}
+
+/*
** Information and control of an open file handle.
*/
static int unixFileControl(sqlite3_file *id, int op, void *pArg){
@@ -3503,14 +3542,15 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){
return rc;
}
case SQLITE_FCNTL_PERSIST_WAL: {
- int bPersist = *(int*)pArg;
- if( bPersist<0 ){
- *(int*)pArg = (pFile->ctrlFlags & UNIXFILE_PERSIST_WAL)!=0;
- }else if( bPersist==0 ){
- pFile->ctrlFlags &= ~UNIXFILE_PERSIST_WAL;
- }else{
- pFile->ctrlFlags |= UNIXFILE_PERSIST_WAL;
- }
+ unixModeBit(pFile, UNIXFILE_PERSIST_WAL, (int*)pArg);
+ return SQLITE_OK;
+ }
+ case SQLITE_FCNTL_POWERSAFE_OVERWRITE: {
+ unixModeBit(pFile, UNIXFILE_PSOW, (int*)pArg);
+ return SQLITE_OK;
+ }
+ case SQLITE_FCNTL_VFSNAME: {
+ *(char**)pArg = sqlite3_mprintf("%s", pFile->pVfs->zName);
return SQLITE_OK;
}
#ifndef NDEBUG
@@ -3530,9 +3570,6 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){
return proxyFileControl(id,op,pArg);
}
#endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) */
- case SQLITE_FCNTL_SYNC_OMITTED: {
- return SQLITE_OK; /* A no-op */
- }
}
return SQLITE_NOTFOUND;
}
@@ -3547,17 +3584,31 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){
** a database and its journal file) that the sector size will be the
** same for both.
*/
-static int unixSectorSize(sqlite3_file *NotUsed){
- UNUSED_PARAMETER(NotUsed);
+static int unixSectorSize(sqlite3_file *pFile){
+ (void)pFile;
return SQLITE_DEFAULT_SECTOR_SIZE;
}
/*
-** Return the device characteristics for the file. This is always 0 for unix.
+** Return the device characteristics for the file.
+**
+** This VFS is set up to return SQLITE_IOCAP_POWERSAFE_OVERWRITE by default.
+** However, that choice is contraversial since technically the underlying
+** file system does not always provide powersafe overwrites. (In other
+** words, after a power-loss event, parts of the file that were never
+** written might end up being altered.) However, non-PSOW behavior is very,
+** very rare. And asserting PSOW makes a large reduction in the amount
+** of required I/O for journaling, since a lot of padding is eliminated.
+** Hence, while POWERSAFE_OVERWRITE is on by default, there is a file-control
+** available to turn it off and URI query parameter available to turn it off.
*/
-static int unixDeviceCharacteristics(sqlite3_file *NotUsed){
- UNUSED_PARAMETER(NotUsed);
- return 0;
+static int unixDeviceCharacteristics(sqlite3_file *id){
+ unixFile *p = (unixFile*)id;
+ if( p->ctrlFlags & UNIXFILE_PSOW ){
+ return SQLITE_IOCAP_POWERSAFE_OVERWRITE;
+ }else{
+ return 0;
+ }
}
#ifndef SQLITE_OMIT_WAL
@@ -3812,9 +3863,9 @@ static int unixOpenSharedMemory(unixFile *pDbFd){
}
#ifdef SQLITE_SHM_DIRECTORY
- nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 30;
+ nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 31;
#else
- nShmFilename = 5 + (int)strlen(pDbFd->zPath);
+ nShmFilename = 6 + (int)strlen(pDbFd->zPath);
#endif
pShmNode = sqlite3_malloc( sizeof(*pShmNode) + nShmFilename );
if( pShmNode==0 ){
@@ -3841,10 +3892,8 @@ static int unixOpenSharedMemory(unixFile *pDbFd){
}
if( pInode->bProcessLock==0 ){
- const char *zRO;
int openFlags = O_RDWR | O_CREAT;
- zRO = sqlite3_uri_parameter(pDbFd->zPath, "readonly_shm");
- if( zRO && sqlite3GetBoolean(zRO) ){
+ if( sqlite3_uri_boolean(pDbFd->zPath, "readonly_shm", 0) ){
openFlags = O_RDONLY;
pShmNode->isReadonly = 1;
}
@@ -4540,11 +4589,14 @@ static int fillInUnixFile(
OSTRACE(("OPEN %-3d %s\n", h, zFilename));
pNew->h = h;
+ pNew->pVfs = pVfs;
pNew->zPath = zFilename;
+ pNew->ctrlFlags = 0;
+ if( sqlite3_uri_boolean(zFilename, "psow", SQLITE_POWERSAFE_OVERWRITE) ){
+ pNew->ctrlFlags |= UNIXFILE_PSOW;
+ }
if( memcmp(pVfs->zName,"unix-excl",10)==0 ){
- pNew->ctrlFlags = UNIXFILE_EXCL;
- }else{
- pNew->ctrlFlags = 0;
+ pNew->ctrlFlags |= UNIXFILE_EXCL;
}
if( isReadOnly ){
pNew->ctrlFlags |= UNIXFILE_RDONLY;
@@ -4879,7 +4931,7 @@ static int findCreateFileMode(
*/
nDb = sqlite3Strlen30(zPath) - 1;
#ifdef SQLITE_ENABLE_8_3_NAMES
- while( nDb>0 && !sqlite3Isalnum(zPath[nDb]) ) nDb--;
+ while( nDb>0 && sqlite3Isalnum(zPath[nDb]) ) nDb--;
if( nDb==0 || zPath[nDb]!='-' ) return SQLITE_OK;
#else
while( zPath[nDb]!='-' ){
@@ -5715,7 +5767,7 @@ static int proxyCreateLockPath(const char *lockPath){
if( i-start>2 || (i-start==1 && buf[start] != '.' && buf[start] != '/')
|| (i-start==2 && buf[start] != '.' && buf[start+1] != '.') ){
buf[i]='\0';
- if( mkdir(buf, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){
+ if( osMkdir(buf, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){
int err=errno;
if( err!=EEXIST ) {
OSTRACE(("CREATELOCKPATH FAILED creating %s, "
@@ -6751,7 +6803,7 @@ int sqlite3_os_init(void){
/* Double-check that the aSyscall[] array has been constructed
** correctly. See ticket [bb3a86e890c8e96ab] */
- assert( ArraySize(aSyscall)==18 );
+ assert( ArraySize(aSyscall)==20 );
/* Register all VFSes defined in the aVfs[] array */
for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){