diff options
author | drh <drh@noemail.net> | 2017-08-01 14:16:15 +0000 |
---|---|---|
committer | drh <drh@noemail.net> | 2017-08-01 14:16:15 +0000 |
commit | b7673ede37708db59ada16666f679b27fae84c5b (patch) | |
tree | d5ad8ad5e009dfb79912c6daf756f2b5f0afa4bc /src/os_unix.c | |
parent | e9e1074d27e03e3a54f6547251923b27d7e8b51b (diff) | |
parent | 2fc2f92ed6cad3822f5bacf57040e2d4c61cd859 (diff) | |
download | sqlite-b7673ede37708db59ada16666f679b27fae84c5b.tar.gz sqlite-b7673ede37708db59ada16666f679b27fae84c5b.zip |
Take advantage of atomic-write capabilities in the F2FS filesystem when the
database is stored on such a filesystem. This is a compile-time option
activated using SQLITE_ENABLE_BATCH_ATOMIC_WRITE.
FossilOrigin-Name: 24190b221f73472dafaead6de101b4debc2c91c1ca28d70b45a38df5bb61fb39
Diffstat (limited to 'src/os_unix.c')
-rw-r--r-- | src/os_unix.c | 118 |
1 files changed, 84 insertions, 34 deletions
diff --git a/src/os_unix.c b/src/os_unix.c index 7f0ebdba6..157be3c3a 100644 --- a/src/os_unix.c +++ b/src/os_unix.c @@ -90,6 +90,7 @@ #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> +#include <sys/ioctl.h> #include <unistd.h> #include <time.h> #include <sys/time.h> @@ -220,10 +221,8 @@ struct unixFile { sqlite3_int64 mmapSizeMax; /* Configured FCNTL_MMAP_SIZE value */ void *pMapRegion; /* Memory mapped region */ #endif -#ifdef __QNXNTO__ int sectorSize; /* Device sector size */ int deviceCharacteristics; /* Precomputed device characteristics */ -#endif #if SQLITE_ENABLE_LOCKING_STYLE int openFlags; /* The flags specified at open() */ #endif @@ -328,6 +327,20 @@ static pid_t randomnessPid = 0; # define lseek lseek64 #endif +#ifdef __linux__ +/* +** Linux-specific IOCTL magic numbers used for controlling F2FS +*/ +#define F2FS_IOCTL_MAGIC 0xf5 +#define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) +#define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) +#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) +#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) +#define F2FS_IOC_GET_FEATURES _IOR(F2FS_IOCTL_MAGIC, 12, u32) +#define F2FS_FEATURE_ATOMIC_WRITE 0x0004 +#endif /* __linux__ */ + + /* ** Different Unix systems declare open() in different ways. Same use ** open(const char*,int,mode_t). Others use open(const char*,int,...). @@ -500,6 +513,9 @@ static struct unix_syscall { #endif #define osLstat ((int(*)(const char*,struct stat*))aSyscall[27].pCurrent) + { "ioctl", (sqlite3_syscall_ptr)ioctl, 0 }, +#define osIoctl ((int(*)(int,int,...))aSyscall[28].pCurrent) + }; /* End of the overrideable system calls */ @@ -3777,6 +3793,21 @@ static int unixGetTempname(int nBuf, char *zBuf); static int unixFileControl(sqlite3_file *id, int op, void *pArg){ unixFile *pFile = (unixFile*)id; switch( op ){ +#if defined(__linux__) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) + case SQLITE_FCNTL_BEGIN_ATOMIC_WRITE: { + int rc = osIoctl(pFile->h, F2FS_IOC_START_ATOMIC_WRITE); + return rc ? SQLITE_IOERR_BEGIN_ATOMIC : SQLITE_OK; + } + case SQLITE_FCNTL_COMMIT_ATOMIC_WRITE: { + int rc = osIoctl(pFile->h, F2FS_IOC_COMMIT_ATOMIC_WRITE); + return rc ? SQLITE_IOERR_COMMIT_ATOMIC : SQLITE_OK; + } + case SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE: { + int rc = osIoctl(pFile->h, F2FS_IOC_ABORT_VOLATILE_WRITE); + return rc ? SQLITE_IOERR_ROLLBACK_ATOMIC : SQLITE_OK; + } +#endif /* __linux__ && SQLITE_ENABLE_BATCH_ATOMIC_WRITE */ + case SQLITE_FCNTL_LOCKSTATE: { *(int*)pArg = pFile->eFileLock; return SQLITE_OK; @@ -3860,30 +3891,41 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ } /* -** Return the sector size in bytes of the underlying block device for -** the specified file. This is almost always 512 bytes, but may be -** larger for some devices. +** If pFd->sectorSize is non-zero when this function is called, it is a +** no-op. Otherwise, the values of pFd->sectorSize and +** pFd->deviceCharacteristics are set according to the file-system +** characteristics. ** -** SQLite code assumes this function cannot fail. It also assumes that -** if two files are created in the same file-system directory (i.e. -** a database and its journal file) that the sector size will be the -** same for both. +** There are two versions of this function. One for QNX and one for all +** other systems. */ -#ifndef __QNXNTO__ -static int unixSectorSize(sqlite3_file *NotUsed){ - UNUSED_PARAMETER(NotUsed); - return SQLITE_DEFAULT_SECTOR_SIZE; -} -#endif +#ifndef __QNXNTO__ +static void setDeviceCharacteristics(unixFile *pFd){ + assert( pFd->deviceCharacteristics==0 || pFd->sectorSize!=0 ); + if( pFd->sectorSize==0 ){ +#if defined(__linux__) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) + int res; + u32 f = 0; -/* -** The following version of unixSectorSize() is optimized for QNX. -*/ -#ifdef __QNXNTO__ + /* Check for support for F2FS atomic batch writes. */ + res = osIoctl(pFd->h, F2FS_IOC_GET_FEATURES, &f); + if( res==0 && (f & F2FS_FEATURE_ATOMIC_WRITE) ){ + pFd->deviceCharacteristics = SQLITE_IOCAP_BATCH_ATOMIC; + } +#endif /* __linux__ && SQLITE_ENABLE_BATCH_ATOMIC_WRITE */ + + /* Set the POWERSAFE_OVERWRITE flag if requested. */ + if( pFd->ctrlFlags & UNIXFILE_PSOW ){ + pFd->deviceCharacteristics |= SQLITE_IOCAP_POWERSAFE_OVERWRITE; + } + + pFd->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; + } +} +#else #include <sys/dcmd_blk.h> #include <sys/statvfs.h> -static int unixSectorSize(sqlite3_file *id){ - unixFile *pFile = (unixFile*)id; +static void setDeviceCharacteristics(unixFile *pFile){ if( pFile->sectorSize == 0 ){ struct statvfs fsInfo; @@ -3952,9 +3994,24 @@ static int unixSectorSize(sqlite3_file *id){ pFile->deviceCharacteristics = 0; pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; } - return pFile->sectorSize; } -#endif /* __QNXNTO__ */ +#endif + +/* +** Return the sector size in bytes of the underlying block device for +** the specified file. This is almost always 512 bytes, but may be +** larger for some devices. +** +** SQLite code assumes this function cannot fail. It also assumes that +** if two files are created in the same file-system directory (i.e. +** a database and its journal file) that the sector size will be the +** same for both. +*/ +static int unixSectorSize(sqlite3_file *id){ + unixFile *pFd = (unixFile*)id; + setDeviceCharacteristics(pFd); + return pFd->sectorSize; +} /* ** Return the device characteristics for the file. @@ -3970,16 +4027,9 @@ static int unixSectorSize(sqlite3_file *id){ ** available to turn it off and URI query parameter available to turn it off. */ static int unixDeviceCharacteristics(sqlite3_file *id){ - unixFile *p = (unixFile*)id; - int rc = 0; -#ifdef __QNXNTO__ - if( p->sectorSize==0 ) unixSectorSize(id); - rc = p->deviceCharacteristics; -#endif - if( p->ctrlFlags & UNIXFILE_PSOW ){ - rc |= SQLITE_IOCAP_POWERSAFE_OVERWRITE; - } - return rc; + unixFile *pFd = (unixFile*)id; + setDeviceCharacteristics(pFd); + return pFd->deviceCharacteristics; } #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 @@ -7598,7 +7648,7 @@ int sqlite3_os_init(void){ /* Double-check that the aSyscall[] array has been constructed ** correctly. See ticket [bb3a86e890c8e96ab] */ - assert( ArraySize(aSyscall)==28 ); + assert( ArraySize(aSyscall)==29 ); /* Register all VFSes defined in the aVfs[] array */ for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){ |