diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/ctime.c | 3 | ||||
-rw-r--r-- | src/memjournal.c | 32 | ||||
-rw-r--r-- | src/os_unix.c | 118 | ||||
-rw-r--r-- | src/pager.c | 152 | ||||
-rw-r--r-- | src/sqlite.h.in | 47 | ||||
-rw-r--r-- | src/sqliteInt.h | 12 | ||||
-rw-r--r-- | src/test1.c | 41 | ||||
-rw-r--r-- | src/test6.c | 25 | ||||
-rw-r--r-- | src/test_devsym.c | 218 |
9 files changed, 509 insertions, 139 deletions
diff --git a/src/ctime.c b/src/ctime.c index ef2523a9b..e8f4e7f90 100644 --- a/src/ctime.c +++ b/src/ctime.c @@ -184,6 +184,9 @@ static const char * const sqlite3azCompileOpt[] = { #if SQLITE_ENABLE_ATOMIC_WRITE "ENABLE_ATOMIC_WRITE", #endif +#if SQLITE_ENABLE_BATCH_ATOMIC_WRITE + "ENABLE_BATCH_ATOMIC_WRITE", +#endif #if SQLITE_ENABLE_CEROD "ENABLE_CEROD", #endif diff --git a/src/memjournal.c b/src/memjournal.c index cd8b87d8a..3b0e7a672 100644 --- a/src/memjournal.c +++ b/src/memjournal.c @@ -96,7 +96,8 @@ static int memjrnlRead( int iChunkOffset; FileChunk *pChunk; -#ifdef SQLITE_ENABLE_ATOMIC_WRITE +#if defined(SQLITE_ENABLE_ATOMIC_WRITE) \ + || defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) if( (iAmt+iOfst)>p->endpoint.iOffset ){ return SQLITE_IOERR_SHORT_READ; } @@ -215,7 +216,8 @@ static int memjrnlWrite( ** atomic-write optimization. In this case the first 28 bytes of the ** journal file may be written as part of committing the transaction. */ assert( iOfst==p->endpoint.iOffset || iOfst==0 ); -#ifdef SQLITE_ENABLE_ATOMIC_WRITE +#if defined(SQLITE_ENABLE_ATOMIC_WRITE) \ + || defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) if( iOfst==0 && p->pFirst ){ assert( p->nChunkSize>iAmt ); memcpy((u8*)p->pFirst->zChunk, zBuf, iAmt); @@ -384,17 +386,31 @@ void sqlite3MemJournalOpen(sqlite3_file *pJfd){ sqlite3JournalOpen(0, 0, pJfd, 0, -1); } -#ifdef SQLITE_ENABLE_ATOMIC_WRITE +#if defined(SQLITE_ENABLE_ATOMIC_WRITE) \ + || defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) /* ** If the argument p points to a MemJournal structure that is not an ** in-memory-only journal file (i.e. is one that was opened with a +ve -** nSpill parameter), and the underlying file has not yet been created, -** create it now. +** nSpill parameter or as SQLITE_OPEN_MAIN_JOURNAL), and the underlying +** file has not yet been created, create it now. */ -int sqlite3JournalCreate(sqlite3_file *p){ +int sqlite3JournalCreate(sqlite3_file *pJfd){ int rc = SQLITE_OK; - if( p->pMethods==&MemJournalMethods && ((MemJournal*)p)->nSpill>0 ){ - rc = memjrnlCreateFile((MemJournal*)p); + MemJournal *p = (MemJournal*)pJfd; + if( p->pMethod==&MemJournalMethods && ( +#ifdef SQLITE_ENABLE_ATOMIC_WRITE + p->nSpill>0 +#else + /* While this appears to not be possible without ATOMIC_WRITE, the + ** paths are complex, so it seems prudent to leave the test in as + ** a NEVER(), in case our analysis is subtly flawed. */ + NEVER(p->nSpill>0) +#endif +#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE + || (p->flags & SQLITE_OPEN_MAIN_JOURNAL) +#endif + )){ + rc = memjrnlCreateFile(p); } return rc; } diff --git a/src/os_unix.c b/src/os_unix.c index 7f0ebdba6..157be3c3a 100644 --- a/src/os_unix.c +++ b/src/os_unix.c @@ -90,6 +90,7 @@ #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> +#include <sys/ioctl.h> #include <unistd.h> #include <time.h> #include <sys/time.h> @@ -220,10 +221,8 @@ struct unixFile { sqlite3_int64 mmapSizeMax; /* Configured FCNTL_MMAP_SIZE value */ void *pMapRegion; /* Memory mapped region */ #endif -#ifdef __QNXNTO__ int sectorSize; /* Device sector size */ int deviceCharacteristics; /* Precomputed device characteristics */ -#endif #if SQLITE_ENABLE_LOCKING_STYLE int openFlags; /* The flags specified at open() */ #endif @@ -328,6 +327,20 @@ static pid_t randomnessPid = 0; # define lseek lseek64 #endif +#ifdef __linux__ +/* +** Linux-specific IOCTL magic numbers used for controlling F2FS +*/ +#define F2FS_IOCTL_MAGIC 0xf5 +#define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) +#define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) +#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) +#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) +#define F2FS_IOC_GET_FEATURES _IOR(F2FS_IOCTL_MAGIC, 12, u32) +#define F2FS_FEATURE_ATOMIC_WRITE 0x0004 +#endif /* __linux__ */ + + /* ** Different Unix systems declare open() in different ways. Same use ** open(const char*,int,mode_t). Others use open(const char*,int,...). @@ -500,6 +513,9 @@ static struct unix_syscall { #endif #define osLstat ((int(*)(const char*,struct stat*))aSyscall[27].pCurrent) + { "ioctl", (sqlite3_syscall_ptr)ioctl, 0 }, +#define osIoctl ((int(*)(int,int,...))aSyscall[28].pCurrent) + }; /* End of the overrideable system calls */ @@ -3777,6 +3793,21 @@ static int unixGetTempname(int nBuf, char *zBuf); static int unixFileControl(sqlite3_file *id, int op, void *pArg){ unixFile *pFile = (unixFile*)id; switch( op ){ +#if defined(__linux__) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) + case SQLITE_FCNTL_BEGIN_ATOMIC_WRITE: { + int rc = osIoctl(pFile->h, F2FS_IOC_START_ATOMIC_WRITE); + return rc ? SQLITE_IOERR_BEGIN_ATOMIC : SQLITE_OK; + } + case SQLITE_FCNTL_COMMIT_ATOMIC_WRITE: { + int rc = osIoctl(pFile->h, F2FS_IOC_COMMIT_ATOMIC_WRITE); + return rc ? SQLITE_IOERR_COMMIT_ATOMIC : SQLITE_OK; + } + case SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE: { + int rc = osIoctl(pFile->h, F2FS_IOC_ABORT_VOLATILE_WRITE); + return rc ? SQLITE_IOERR_ROLLBACK_ATOMIC : SQLITE_OK; + } +#endif /* __linux__ && SQLITE_ENABLE_BATCH_ATOMIC_WRITE */ + case SQLITE_FCNTL_LOCKSTATE: { *(int*)pArg = pFile->eFileLock; return SQLITE_OK; @@ -3860,30 +3891,41 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ } /* -** Return the sector size in bytes of the underlying block device for -** the specified file. This is almost always 512 bytes, but may be -** larger for some devices. +** If pFd->sectorSize is non-zero when this function is called, it is a +** no-op. Otherwise, the values of pFd->sectorSize and +** pFd->deviceCharacteristics are set according to the file-system +** characteristics. ** -** SQLite code assumes this function cannot fail. It also assumes that -** if two files are created in the same file-system directory (i.e. -** a database and its journal file) that the sector size will be the -** same for both. +** There are two versions of this function. One for QNX and one for all +** other systems. */ -#ifndef __QNXNTO__ -static int unixSectorSize(sqlite3_file *NotUsed){ - UNUSED_PARAMETER(NotUsed); - return SQLITE_DEFAULT_SECTOR_SIZE; -} -#endif +#ifndef __QNXNTO__ +static void setDeviceCharacteristics(unixFile *pFd){ + assert( pFd->deviceCharacteristics==0 || pFd->sectorSize!=0 ); + if( pFd->sectorSize==0 ){ +#if defined(__linux__) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) + int res; + u32 f = 0; -/* -** The following version of unixSectorSize() is optimized for QNX. -*/ -#ifdef __QNXNTO__ + /* Check for support for F2FS atomic batch writes. */ + res = osIoctl(pFd->h, F2FS_IOC_GET_FEATURES, &f); + if( res==0 && (f & F2FS_FEATURE_ATOMIC_WRITE) ){ + pFd->deviceCharacteristics = SQLITE_IOCAP_BATCH_ATOMIC; + } +#endif /* __linux__ && SQLITE_ENABLE_BATCH_ATOMIC_WRITE */ + + /* Set the POWERSAFE_OVERWRITE flag if requested. */ + if( pFd->ctrlFlags & UNIXFILE_PSOW ){ + pFd->deviceCharacteristics |= SQLITE_IOCAP_POWERSAFE_OVERWRITE; + } + + pFd->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; + } +} +#else #include <sys/dcmd_blk.h> #include <sys/statvfs.h> -static int unixSectorSize(sqlite3_file *id){ - unixFile *pFile = (unixFile*)id; +static void setDeviceCharacteristics(unixFile *pFile){ if( pFile->sectorSize == 0 ){ struct statvfs fsInfo; @@ -3952,9 +3994,24 @@ static int unixSectorSize(sqlite3_file *id){ pFile->deviceCharacteristics = 0; pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; } - return pFile->sectorSize; } -#endif /* __QNXNTO__ */ +#endif + +/* +** Return the sector size in bytes of the underlying block device for +** the specified file. This is almost always 512 bytes, but may be +** larger for some devices. +** +** SQLite code assumes this function cannot fail. It also assumes that +** if two files are created in the same file-system directory (i.e. +** a database and its journal file) that the sector size will be the +** same for both. +*/ +static int unixSectorSize(sqlite3_file *id){ + unixFile *pFd = (unixFile*)id; + setDeviceCharacteristics(pFd); + return pFd->sectorSize; +} /* ** Return the device characteristics for the file. @@ -3970,16 +4027,9 @@ static int unixSectorSize(sqlite3_file *id){ ** available to turn it off and URI query parameter available to turn it off. */ static int unixDeviceCharacteristics(sqlite3_file *id){ - unixFile *p = (unixFile*)id; - int rc = 0; -#ifdef __QNXNTO__ - if( p->sectorSize==0 ) unixSectorSize(id); - rc = p->deviceCharacteristics; -#endif - if( p->ctrlFlags & UNIXFILE_PSOW ){ - rc |= SQLITE_IOCAP_POWERSAFE_OVERWRITE; - } - return rc; + unixFile *pFd = (unixFile*)id; + setDeviceCharacteristics(pFd); + return pFd->deviceCharacteristics; } #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 @@ -7598,7 +7648,7 @@ int sqlite3_os_init(void){ /* Double-check that the aSyscall[] array has been constructed ** correctly. See ticket [bb3a86e890c8e96ab] */ - assert( ArraySize(aSyscall)==28 ); + assert( ArraySize(aSyscall)==29 ); /* Register all VFSes defined in the aVfs[] array */ for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){ diff --git a/src/pager.c b/src/pager.c index aee50e531..51661044d 100644 --- a/src/pager.c +++ b/src/pager.c @@ -947,6 +947,7 @@ static int assert_pager_state(Pager *p){ assert( isOpen(p->jfd) || p->journalMode==PAGER_JOURNALMODE_OFF || p->journalMode==PAGER_JOURNALMODE_WAL + || (sqlite3OsDeviceCharacteristics(p->fd)&SQLITE_IOCAP_BATCH_ATOMIC) ); assert( pPager->dbOrigSize<=pPager->dbHintSize ); break; @@ -958,6 +959,7 @@ static int assert_pager_state(Pager *p){ assert( isOpen(p->jfd) || p->journalMode==PAGER_JOURNALMODE_OFF || p->journalMode==PAGER_JOURNALMODE_WAL + || (sqlite3OsDeviceCharacteristics(p->fd)&SQLITE_IOCAP_BATCH_ATOMIC) ); break; @@ -1168,34 +1170,45 @@ static int pagerLockDb(Pager *pPager, int eLock){ } /* -** This function determines whether or not the atomic-write optimization -** can be used with this pager. The optimization can be used if: +** This function determines whether or not the atomic-write or +** atomic-batch-write optimizations can be used with this pager. The +** atomic-write optimization can be used if: ** ** (a) the value returned by OsDeviceCharacteristics() indicates that ** a database page may be written atomically, and ** (b) the value returned by OsSectorSize() is less than or equal ** to the page size. ** -** The optimization is also always enabled for temporary files. It is -** an error to call this function if pPager is opened on an in-memory -** database. +** If it can be used, then the value returned is the size of the journal +** file when it contains rollback data for exactly one page. ** -** If the optimization cannot be used, 0 is returned. If it can be used, -** then the value returned is the size of the journal file when it -** contains rollback data for exactly one page. +** The atomic-batch-write optimization can be used if OsDeviceCharacteristics() +** returns a value with the SQLITE_IOCAP_BATCH_ATOMIC bit set. -1 is +** returned in this case. +** +** If neither optimization can be used, 0 is returned. */ -#ifdef SQLITE_ENABLE_ATOMIC_WRITE static int jrnlBufferSize(Pager *pPager){ assert( !MEMDB ); - if( !pPager->tempFile ){ - int dc; /* Device characteristics */ - int nSector; /* Sector size */ - int szPage; /* Page size */ - assert( isOpen(pPager->fd) ); - dc = sqlite3OsDeviceCharacteristics(pPager->fd); - nSector = pPager->sectorSize; - szPage = pPager->pageSize; +#if defined(SQLITE_ENABLE_ATOMIC_WRITE) \ + || defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) + int dc; /* Device characteristics */ + + assert( isOpen(pPager->fd) ); + dc = sqlite3OsDeviceCharacteristics(pPager->fd); +#endif + +#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE + if( dc&SQLITE_IOCAP_BATCH_ATOMIC ){ + return -1; + } +#endif + +#ifdef SQLITE_ENABLE_ATOMIC_WRITE + { + int nSector = pPager->sectorSize; + int szPage = pPager->pageSize; assert(SQLITE_IOCAP_ATOMIC512==(512>>8)); assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8)); @@ -1205,11 +1218,11 @@ static int jrnlBufferSize(Pager *pPager){ } return JOURNAL_HDR_SZ(pPager) + JOURNAL_PG_SZ(pPager); -} -#else -# define jrnlBufferSize(x) 0 #endif + return 0; +} + /* ** If SQLITE_CHECK_PAGES is defined then we do some sanity checking ** on the cache using a hash function. This is used for testing @@ -2012,7 +2025,9 @@ static int pager_end_transaction(Pager *pPager, int hasMaster, int bCommit){ } releaseAllSavepoints(pPager); - assert( isOpen(pPager->jfd) || pPager->pInJournal==0 ); + assert( isOpen(pPager->jfd) || pPager->pInJournal==0 + || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_BATCH_ATOMIC) + ); if( isOpen(pPager->jfd) ){ assert( !pagerUseWal(pPager) ); @@ -4567,6 +4582,13 @@ static int pagerStress(void *p, PgHdr *pPg){ rc = pagerWalFrames(pPager, pPg, 0, 0); } }else{ + +#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE + if( pPager->tempFile==0 ){ + rc = sqlite3JournalCreate(pPager->jfd); + if( rc!=SQLITE_OK ) return pager_error(pPager, rc); + } +#endif /* Sync the journal file if required. */ if( pPg->flags&PGHDR_NEED_SYNC @@ -6347,6 +6369,21 @@ int sqlite3PagerCommitPhaseOne( sqlite3PcacheCleanAll(pPager->pPCache); } }else{ + /* The bBatch boolean is true if the batch-atomic-write commit method + ** should be used. No rollback journal is created if batch-atomic-write + ** is enabled. + */ + sqlite3_file *fd = pPager->fd; +#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE + const int bBatch = zMaster==0 /* An SQLITE_IOCAP_BATCH_ATOMIC commit */ + && (sqlite3OsDeviceCharacteristics(fd) & SQLITE_IOCAP_BATCH_ATOMIC) + && !pPager->noSync + && sqlite3JournalIsInMemory(pPager->jfd); +#else +# define bBatch 0 +#endif + +#ifdef SQLITE_ENABLE_ATOMIC_WRITE /* The following block updates the change-counter. Exactly how it ** does this depends on whether or not the atomic-update optimization ** was enabled at compile time, and if this transaction meets the @@ -6370,33 +6407,40 @@ int sqlite3PagerCommitPhaseOne( ** in 'direct' mode. In this case the journal file will never be ** created for this transaction. */ - #ifdef SQLITE_ENABLE_ATOMIC_WRITE - PgHdr *pPg; - assert( isOpen(pPager->jfd) - || pPager->journalMode==PAGER_JOURNALMODE_OFF - || pPager->journalMode==PAGER_JOURNALMODE_WAL - ); - if( !zMaster && isOpen(pPager->jfd) - && pPager->journalOff==jrnlBufferSize(pPager) - && pPager->dbSize>=pPager->dbOrigSize - && (0==(pPg = sqlite3PcacheDirtyList(pPager->pPCache)) || 0==pPg->pDirty) - ){ - /* Update the db file change counter via the direct-write method. The - ** following call will modify the in-memory representation of page 1 - ** to include the updated change counter and then write page 1 - ** directly to the database file. Because of the atomic-write - ** property of the host file-system, this is safe. - */ - rc = pager_incr_changecounter(pPager, 1); - }else{ - rc = sqlite3JournalCreate(pPager->jfd); - if( rc==SQLITE_OK ){ - rc = pager_incr_changecounter(pPager, 0); + if( bBatch==0 ){ + PgHdr *pPg; + assert( isOpen(pPager->jfd) + || pPager->journalMode==PAGER_JOURNALMODE_OFF + || pPager->journalMode==PAGER_JOURNALMODE_WAL + ); + if( !zMaster && isOpen(pPager->jfd) + && pPager->journalOff==jrnlBufferSize(pPager) + && pPager->dbSize>=pPager->dbOrigSize + && (!(pPg = sqlite3PcacheDirtyList(pPager->pPCache)) || 0==pPg->pDirty) + ){ + /* Update the db file change counter via the direct-write method. The + ** following call will modify the in-memory representation of page 1 + ** to include the updated change counter and then write page 1 + ** directly to the database file. Because of the atomic-write + ** property of the host file-system, this is safe. + */ + rc = pager_incr_changecounter(pPager, 1); + }else{ + rc = sqlite3JournalCreate(pPager->jfd); + if( rc==SQLITE_OK ){ + rc = pager_incr_changecounter(pPager, 0); + } } } - #else +#else +#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE + if( zMaster ){ + rc = sqlite3JournalCreate(pPager->jfd); + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + } +#endif rc = pager_incr_changecounter(pPager, 0); - #endif +#endif if( rc!=SQLITE_OK ) goto commit_phase_one_exit; /* Write the master journal name into the journal file. If a master @@ -6419,8 +6463,24 @@ int sqlite3PagerCommitPhaseOne( */ rc = syncJournal(pPager, 0); if( rc!=SQLITE_OK ) goto commit_phase_one_exit; - + + if( bBatch ){ + /* The pager is now in DBMOD state. But regardless of what happens + ** next, attempting to play the journal back into the database would + ** be unsafe. Close it now to make sure that does not happen. */ + sqlite3OsClose(pPager->jfd); + rc = sqlite3OsFileControl(fd, SQLITE_FCNTL_BEGIN_ATOMIC_WRITE, 0); + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + } rc = pager_write_pagelist(pPager,sqlite3PcacheDirtyList(pPager->pPCache)); + if( bBatch ){ + if( rc==SQLITE_OK ){ + rc = sqlite3OsFileControl(fd, SQLITE_FCNTL_COMMIT_ATOMIC_WRITE, 0); + }else{ + sqlite3OsFileControl(fd, SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE, 0); + } + } + if( rc!=SQLITE_OK ){ assert( rc!=SQLITE_IOERR_BLOCKED ); goto commit_phase_one_exit; diff --git a/src/sqlite.h.in b/src/sqlite.h.in index e8f34e4ab..ad97b9a0d 100644 --- a/src/sqlite.h.in +++ b/src/sqlite.h.in @@ -494,6 +494,9 @@ int sqlite3_exec( #define SQLITE_IOERR_CONVPATH (SQLITE_IOERR | (26<<8)) #define SQLITE_IOERR_VNODE (SQLITE_IOERR | (27<<8)) #define SQLITE_IOERR_AUTH (SQLITE_IOERR | (28<<8)) +#define SQLITE_IOERR_BEGIN_ATOMIC (SQLITE_IOERR | (29<<8)) +#define SQLITE_IOERR_COMMIT_ATOMIC (SQLITE_IOERR | (30<<8)) +#define SQLITE_IOERR_ROLLBACK_ATOMIC (SQLITE_IOERR | (31<<8)) #define SQLITE_LOCKED_SHAREDCACHE (SQLITE_LOCKED | (1<<8)) #define SQLITE_BUSY_RECOVERY (SQLITE_BUSY | (1<<8)) #define SQLITE_BUSY_SNAPSHOT (SQLITE_BUSY | (2<<8)) @@ -580,6 +583,11 @@ int sqlite3_exec( ** SQLITE_IOCAP_IMMUTABLE flag indicates that the file is on ** read-only media and cannot be changed even by processes with ** elevated privileges. +** +** The SQLITE_IOCAP_BATCH_ATOMIC property means that the underlying +** filesystem supports doing multiple write operations atomically when those +** write operations are bracketed by [SQLITE_FCNTL_BEGIN_ATOMIC_WRITE] and +** [SQLITE_FCNTL_COMMIT_ATOMIC_WRITE]. */ #define SQLITE_IOCAP_ATOMIC 0x00000001 #define SQLITE_IOCAP_ATOMIC512 0x00000002 @@ -595,6 +603,7 @@ int sqlite3_exec( #define SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN 0x00000800 #define SQLITE_IOCAP_POWERSAFE_OVERWRITE 0x00001000 #define SQLITE_IOCAP_IMMUTABLE 0x00002000 +#define SQLITE_IOCAP_BATCH_ATOMIC 0x00004000 /* ** CAPI3REF: File Locking Levels @@ -729,6 +738,7 @@ struct sqlite3_file { ** <li> [SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN] ** <li> [SQLITE_IOCAP_POWERSAFE_OVERWRITE] ** <li> [SQLITE_IOCAP_IMMUTABLE] +** <li> [SQLITE_IOCAP_BATCH_ATOMIC] ** </ul> ** ** The SQLITE_IOCAP_ATOMIC property means that all writes of @@ -1012,6 +1022,40 @@ struct sqlite3_io_methods { ** The [SQLITE_FCNTL_RBU] opcode is implemented by the special VFS used by ** the RBU extension only. All other VFS should return SQLITE_NOTFOUND for ** this opcode. +** +** <li>[[SQLITE_FCNTL_BEGIN_ATOMIC_WRITE]] +** If the [SQLITE_FCNTL_BEGIN_ATOMIC_WRITE] opcode returns SQLITE_OK, then +** the file descriptor is placed in "batch write mode", which +** means all subsequent write operations will be deferred and done +** atomically at the next [SQLITE_FCNTL_COMMIT_ATOMIC_WRITE]. Systems +** that do not support batch atomic writes will return SQLITE_NOTFOUND. +** ^Following a successful SQLITE_FCNTL_BEGIN_ATOMIC_WRITE and prior to +** the closing [SQLITE_FCNTL_COMMIT_ATOMIC_WRITE] or +** [SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE], SQLite will make +** no VFS interface calls on the same [sqlite3_file] file descriptor +** except for calls to the xWrite method and the xFileControl method +** with [SQLITE_FCNTL_SIZE_HINT]. +** +** <li>[[SQLITE_FCNTL_COMMIT_ATOMIC_WRITE]] +** The [SQLITE_FCNTL_COMMIT_ATOMIC_WRITE] opcode causes all write +** operations since the previous successful call to +** [SQLITE_FCNTL_BEGIN_ATOMIC_WRITE] to be performed atomically. +** This file control returns [SQLITE_OK] if and only if the writes were +** all performed successfully and have been committed to persistent storage. +** ^Regardless of whether or not it is successful, this file control takes +** the file descriptor out of batch write mode so that all subsequent +** write operations are independent. +** ^SQLite will never invoke SQLITE_FCNTL_COMMIT_ATOMIC_WRITE without +** a prior successful call to [SQLITE_FCNTL_BEGIN_ATOMIC_WRITE]. +** +** <li>[[SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE]] +** The [SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE] opcode causes all write +** operations since the previous successful call to +** [SQLITE_FCNTL_BEGIN_ATOMIC_WRITE] to be rolled back. +** ^This file control takes the file descriptor out of batch write mode +** so that all subsequent write operations are independent. +** ^SQLite will never invoke SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE without +** a prior successful call to [SQLITE_FCNTL_BEGIN_ATOMIC_WRITE]. ** </ul> */ #define SQLITE_FCNTL_LOCKSTATE 1 @@ -1043,6 +1087,9 @@ struct sqlite3_io_methods { #define SQLITE_FCNTL_JOURNAL_POINTER 28 #define SQLITE_FCNTL_WIN32_GET_HANDLE 29 #define SQLITE_FCNTL_PDB 30 +#define SQLITE_FCNTL_BEGIN_ATOMIC_WRITE 31 +#define SQLITE_FCNTL_COMMIT_ATOMIC_WRITE 32 +#define SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE 33 /* deprecated names */ #define SQLITE_GET_LOCKPROXYFILE SQLITE_FCNTL_GET_LOCKPROXYFILE diff --git a/src/sqliteInt.h b/src/sqliteInt.h index 724cd5312..b6085152c 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -631,6 +631,15 @@ #endif /* +** The compile-time options SQLITE_MMAP_READWRITE and +** SQLITE_ENABLE_BATCH_ATOMIC_WRITE are not compatible with one another. +** You must choose one or the other (or neither) but not both. +*/ +#if defined(SQLITE_MMAP_READWRITE) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) +#error Cannot use both SQLITE_MMAP_READWRITE and SQLITE_ENABLE_BATCH_ATOMIC_WRITE +#endif + +/* ** GCC does not define the offsetof() macro so we'll have to do it ** ourselves. */ @@ -4287,7 +4296,8 @@ int sqlite3FindInIndex(Parse *, Expr *, u32, int*, int*); int sqlite3JournalOpen(sqlite3_vfs *, const char *, sqlite3_file *, int, int); int sqlite3JournalSize(sqlite3_vfs *); -#ifdef SQLITE_ENABLE_ATOMIC_WRITE +#if defined(SQLITE_ENABLE_ATOMIC_WRITE) \ + || defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) int sqlite3JournalCreate(sqlite3_file *); #endif diff --git a/src/test1.c b/src/test1.c index df7685f24..7a6ff2163 100644 --- a/src/test1.c +++ b/src/test1.c @@ -2554,6 +2554,46 @@ static int SQLITE_TCLAPI test_delete_database( } /* +** Usage: atomic_batch_write PATH +*/ +static int SQLITE_TCLAPI test_atomic_batch_write( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + char *zFile = 0; /* Path to file to test */ + sqlite3 *db = 0; /* Database handle */ + sqlite3_file *pFd = 0; /* SQLite fd open on zFile */ + int bRes = 0; /* Integer result of this command */ + int dc = 0; /* Device-characteristics mask */ + int rc; /* sqlite3_open() return code */ + + if( objc!=2 ){ + Tcl_WrongNumArgs(interp, 1, objv, "PATH"); + return TCL_ERROR; + } + zFile = Tcl_GetString(objv[1]); + + rc = sqlite3_open(zFile, &db); + if( rc!=SQLITE_OK ){ + Tcl_AppendResult(interp, sqlite3_errmsg(db), 0); + sqlite3_close(db); + return TCL_ERROR; + } + + rc = sqlite3_file_control(db, "main", SQLITE_FCNTL_FILE_POINTER, (void*)&pFd); + dc = pFd->pMethods->xDeviceCharacteristics(pFd); + if( dc & SQLITE_IOCAP_BATCH_ATOMIC ){ + bRes = 1; + } + + Tcl_SetObjResult(interp, Tcl_NewIntObj(bRes)); + sqlite3_close(db); + return TCL_OK; +} + +/* ** Usage: sqlite3_next_stmt DB STMT ** ** Return the next statment in sequence after STMT. @@ -7645,6 +7685,7 @@ int Sqlitetest1_Init(Tcl_Interp *interp){ { "sqlite3_snapshot_cmp_blob", test_snapshot_cmp_blob, 0 }, #endif { "sqlite3_delete_database", test_delete_database, 0 }, + { "atomic_batch_write", test_atomic_batch_write, 0 }, }; static int bitmask_size = sizeof(Bitmask)*8; static int longdouble_size = sizeof(LONGDOUBLE_TYPE); diff --git a/src/test6.c b/src/test6.c index 849cdeb3b..9a3aa093f 100644 --- a/src/test6.c +++ b/src/test6.c @@ -736,6 +736,7 @@ static int processDevSymArgs( { "sequential", SQLITE_IOCAP_SEQUENTIAL }, { "safe_append", SQLITE_IOCAP_SAFE_APPEND }, { "powersafe_overwrite", SQLITE_IOCAP_POWERSAFE_OVERWRITE }, + { "batch-atomic", SQLITE_IOCAP_BATCH_ATOMIC }, { 0, 0 } }; @@ -976,7 +977,30 @@ static int SQLITE_TCLAPI devSymObjCmd( devsym_register(iDc, iSectorSize); return TCL_OK; +} +/* +** tclcmd: sqlite3_crash_on_write N +*/ +static int SQLITE_TCLAPI writeCrashObjCmd( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + void devsym_crash_on_write(int); + int nWrite = 0; + + if( objc!=2 ){ + Tcl_WrongNumArgs(interp, 1, objv, "NWRITE"); + return TCL_ERROR; + } + if( Tcl_GetIntFromObj(interp, objv[1], &nWrite) ){ + return TCL_ERROR; + } + + devsym_crash_on_write(nWrite); + return TCL_OK; } /* @@ -1068,6 +1092,7 @@ int Sqlitetest6_Init(Tcl_Interp *interp){ Tcl_CreateObjCommand(interp, "sqlite3_crashparams", crashParamsObjCmd, 0, 0); Tcl_CreateObjCommand(interp, "sqlite3_crash_now", crashNowCmd, 0, 0); Tcl_CreateObjCommand(interp, "sqlite3_simulate_device", devSymObjCmd, 0, 0); + Tcl_CreateObjCommand(interp, "sqlite3_crash_on_write", writeCrashObjCmd,0,0); Tcl_CreateObjCommand(interp, "unregister_devsim", dsUnregisterObjCmd, 0, 0); Tcl_CreateObjCommand(interp, "register_jt_vfs", jtObjCmd, 0, 0); Tcl_CreateObjCommand(interp, "unregister_jt_vfs", jtUnregisterObjCmd, 0, 0); diff --git a/src/test_devsym.c b/src/test_devsym.c index 9a1ba09d6..0da6671cf 100644 --- a/src/test_devsym.c +++ b/src/test_devsym.c @@ -28,6 +28,7 @@ ** Name used to identify this VFS. */ #define DEVSYM_VFS_NAME "devsym" +#define WRITECRASH_NAME "writecrash" typedef struct devsym_file devsym_file; struct devsym_file { @@ -72,61 +73,13 @@ static int devsymRandomness(sqlite3_vfs*, int nByte, char *zOut); static int devsymSleep(sqlite3_vfs*, int microseconds); static int devsymCurrentTime(sqlite3_vfs*, double*); -static sqlite3_vfs devsym_vfs = { - 2, /* iVersion */ - sizeof(devsym_file), /* szOsFile */ - DEVSYM_MAX_PATHNAME, /* mxPathname */ - 0, /* pNext */ - DEVSYM_VFS_NAME, /* zName */ - 0, /* pAppData */ - devsymOpen, /* xOpen */ - devsymDelete, /* xDelete */ - devsymAccess, /* xAccess */ - devsymFullPathname, /* xFullPathname */ -#ifndef SQLITE_OMIT_LOAD_EXTENSION - devsymDlOpen, /* xDlOpen */ - devsymDlError, /* xDlError */ - devsymDlSym, /* xDlSym */ - devsymDlClose, /* xDlClose */ -#else - 0, /* xDlOpen */ - 0, /* xDlError */ - 0, /* xDlSym */ - 0, /* xDlClose */ -#endif /* SQLITE_OMIT_LOAD_EXTENSION */ - devsymRandomness, /* xRandomness */ - devsymSleep, /* xSleep */ - devsymCurrentTime, /* xCurrentTime */ - 0, /* xGetLastError */ - 0 /* xCurrentTimeInt64 */ -}; - -static sqlite3_io_methods devsym_io_methods = { - 2, /* iVersion */ - devsymClose, /* xClose */ - devsymRead, /* xRead */ - devsymWrite, /* xWrite */ - devsymTruncate, /* xTruncate */ - devsymSync, /* xSync */ - devsymFileSize, /* xFileSize */ - devsymLock, /* xLock */ - devsymUnlock, /* xUnlock */ - devsymCheckReservedLock, /* xCheckReservedLock */ - devsymFileControl, /* xFileControl */ - devsymSectorSize, /* xSectorSize */ - devsymDeviceCharacteristics, /* xDeviceCharacteristics */ - devsymShmMap, /* xShmMap */ - devsymShmLock, /* xShmLock */ - devsymShmBarrier, /* xShmBarrier */ - devsymShmUnmap /* xShmUnmap */ -}; - struct DevsymGlobal { sqlite3_vfs *pVfs; int iDeviceChar; int iSectorSize; + int nWriteCrash; }; -struct DevsymGlobal g = {0, 0, 512}; +struct DevsymGlobal g = {0, 0, 512, 0}; /* ** Close an devsym-file. @@ -271,6 +224,26 @@ static int devsymOpen( int flags, int *pOutFlags ){ +static sqlite3_io_methods devsym_io_methods = { + 2, /* iVersion */ + devsymClose, /* xClose */ + devsymRead, /* xRead */ + devsymWrite, /* xWrite */ + devsymTruncate, /* xTruncate */ + devsymSync, /* xSync */ + devsymFileSize, /* xFileSize */ + devsymLock, /* xLock */ + devsymUnlock, /* xUnlock */ + devsymCheckReservedLock, /* xCheckReservedLock */ + devsymFileControl, /* xFileControl */ + devsymSectorSize, /* xSectorSize */ + devsymDeviceCharacteristics, /* xDeviceCharacteristics */ + devsymShmMap, /* xShmMap */ + devsymShmLock, /* xShmLock */ + devsymShmBarrier, /* xShmBarrier */ + devsymShmUnmap /* xShmUnmap */ +}; + int rc; devsym_file *p = (devsym_file *)pFile; p->pReal = (sqlite3_file *)&p[1]; @@ -372,6 +345,137 @@ static int devsymCurrentTime(sqlite3_vfs *pVfs, double *pTimeOut){ return g.pVfs->xCurrentTime(g.pVfs, pTimeOut); } +/* +** Return the sector-size in bytes for an writecrash-file. +*/ +static int writecrashSectorSize(sqlite3_file *pFile){ + devsym_file *p = (devsym_file *)pFile; + return sqlite3OsSectorSize(p->pReal); +} + +/* +** Return the device characteristic flags supported by an writecrash-file. +*/ +static int writecrashDeviceCharacteristics(sqlite3_file *pFile){ + devsym_file *p = (devsym_file *)pFile; + return sqlite3OsDeviceCharacteristics(p->pReal); +} + +/* +** Write data to an writecrash-file. +*/ +static int writecrashWrite( + sqlite3_file *pFile, + const void *zBuf, + int iAmt, + sqlite_int64 iOfst +){ + devsym_file *p = (devsym_file *)pFile; + if( g.nWriteCrash>0 ){ + g.nWriteCrash--; + if( g.nWriteCrash==0 ) abort(); + } + return sqlite3OsWrite(p->pReal, zBuf, iAmt, iOfst); +} + +/* +** Open an writecrash file handle. +*/ +static int writecrashOpen( + sqlite3_vfs *pVfs, + const char *zName, + sqlite3_file *pFile, + int flags, + int *pOutFlags +){ +static sqlite3_io_methods writecrash_io_methods = { + 2, /* iVersion */ + devsymClose, /* xClose */ + devsymRead, /* xRead */ + writecrashWrite, /* xWrite */ + devsymTruncate, /* xTruncate */ + devsymSync, /* xSync */ + devsymFileSize, /* xFileSize */ + devsymLock, /* xLock */ + devsymUnlock, /* xUnlock */ + devsymCheckReservedLock, /* xCheckReservedLock */ + devsymFileControl, /* xFileControl */ + writecrashSectorSize, /* xSectorSize */ + writecrashDeviceCharacteristics, /* xDeviceCharacteristics */ + devsymShmMap, /* xShmMap */ + devsymShmLock, /* xShmLock */ + devsymShmBarrier, /* xShmBarrier */ + devsymShmUnmap /* xShmUnmap */ +}; + + int rc; + devsym_file *p = (devsym_file *)pFile; + p->pReal = (sqlite3_file *)&p[1]; + rc = sqlite3OsOpen(g.pVfs, zName, p->pReal, flags, pOutFlags); + if( p->pReal->pMethods ){ + pFile->pMethods = &writecrash_io_methods; + } + return rc; +} + +static sqlite3_vfs devsym_vfs = { + 2, /* iVersion */ + sizeof(devsym_file), /* szOsFile */ + DEVSYM_MAX_PATHNAME, /* mxPathname */ + 0, /* pNext */ + DEVSYM_VFS_NAME, /* zName */ + 0, /* pAppData */ + devsymOpen, /* xOpen */ + devsymDelete, /* xDelete */ + devsymAccess, /* xAccess */ + devsymFullPathname, /* xFullPathname */ +#ifndef SQLITE_OMIT_LOAD_EXTENSION + devsymDlOpen, /* xDlOpen */ + devsymDlError, /* xDlError */ + devsymDlSym, /* xDlSym */ + devsymDlClose, /* xDlClose */ +#else + 0, /* xDlOpen */ + 0, /* xDlError */ + 0, /* xDlSym */ + 0, /* xDlClose */ +#endif /* SQLITE_OMIT_LOAD_EXTENSION */ + devsymRandomness, /* xRandomness */ + devsymSleep, /* xSleep */ + devsymCurrentTime, /* xCurrentTime */ + 0, /* xGetLastError */ + 0 /* xCurrentTimeInt64 */ +}; + +static sqlite3_vfs writecrash_vfs = { + 2, /* iVersion */ + sizeof(devsym_file), /* szOsFile */ + DEVSYM_MAX_PATHNAME, /* mxPathname */ + 0, /* pNext */ + WRITECRASH_NAME, /* zName */ + 0, /* pAppData */ + writecrashOpen, /* xOpen */ + devsymDelete, /* xDelete */ + devsymAccess, /* xAccess */ + devsymFullPathname, /* xFullPathname */ +#ifndef SQLITE_OMIT_LOAD_EXTENSION + devsymDlOpen, /* xDlOpen */ + devsymDlError, /* xDlError */ + devsymDlSym, /* xDlSym */ + devsymDlClose, /* xDlClose */ +#else + 0, /* xDlOpen */ + 0, /* xDlError */ + 0, /* xDlSym */ + 0, /* xDlClose */ +#endif /* SQLITE_OMIT_LOAD_EXTENSION */ + devsymRandomness, /* xRandomness */ + devsymSleep, /* xSleep */ + devsymCurrentTime, /* xCurrentTime */ + 0, /* xGetLastError */ + 0 /* xCurrentTimeInt64 */ +}; + /* ** This procedure registers the devsym vfs with SQLite. If the argument is @@ -379,10 +483,13 @@ static int devsymCurrentTime(sqlite3_vfs *pVfs, double *pTimeOut){ ** available function in this file. */ void devsym_register(int iDeviceChar, int iSectorSize){ + if( g.pVfs==0 ){ g.pVfs = sqlite3_vfs_find(0); devsym_vfs.szOsFile += g.pVfs->szOsFile; + writecrash_vfs.szOsFile += g.pVfs->szOsFile; sqlite3_vfs_register(&devsym_vfs, 0); + sqlite3_vfs_register(&writecrash_vfs, 0); } if( iDeviceChar>=0 ){ g.iDeviceChar = iDeviceChar; @@ -403,4 +510,15 @@ void devsym_unregister(){ g.iSectorSize = 0; } +void devsym_crash_on_write(int nWrite){ + if( g.pVfs==0 ){ + g.pVfs = sqlite3_vfs_find(0); + devsym_vfs.szOsFile += g.pVfs->szOsFile; + writecrash_vfs.szOsFile += g.pVfs->szOsFile; + sqlite3_vfs_register(&devsym_vfs, 0); + sqlite3_vfs_register(&writecrash_vfs, 0); + } + g.nWriteCrash = nWrite; +} + #endif |