aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordrh <drh@noemail.net>2013-04-04 00:40:17 +0000
committerdrh <drh@noemail.net>2013-04-04 00:40:17 +0000
commita478b3fa8034302ab417dcec4ea7eae0980862bc (patch)
treeb6768c0d2d39f553f18e790d70e1b250db9bb9b7 /src
parent936c6d7e9e640e1269545ed9e878186a9b2b83f2 (diff)
parentb0a8ea046a5083b6f0e862e5e5ddcb50c6957334 (diff)
downloadsqlite-a478b3fa8034302ab417dcec4ea7eae0980862bc.tar.gz
sqlite-a478b3fa8034302ab417dcec4ea7eae0980862bc.zip
Try to use mmap() to speed access to the database file on windows, linux,
and mac. FossilOrigin-Name: fff2be60779571c8fb89158db976ec3755e9a223
Diffstat (limited to 'src')
-rw-r--r--src/backup.c3
-rw-r--r--src/btree.c113
-rw-r--r--src/btree.h1
-rw-r--r--src/ctime.c3
-rw-r--r--src/global.c1
-rw-r--r--src/main.c8
-rw-r--r--src/os.c8
-rw-r--r--src/os.h2
-rw-r--r--src/os_unix.c324
-rw-r--r--src/os_win.c268
-rw-r--r--src/pager.c219
-rw-r--r--src/pager.h7
-rw-r--r--src/pcache.h2
-rw-r--r--src/pragma.c34
-rw-r--r--src/shell.c53
-rw-r--r--src/sqlite.h.in25
-rw-r--r--src/sqliteInt.h2
-rw-r--r--src/sqliteLimit.h20
-rw-r--r--src/test1.c28
-rw-r--r--src/test_syscall.c35
-rw-r--r--src/wal.c67
-rw-r--r--src/wal.h4
22 files changed, 1112 insertions, 115 deletions
diff --git a/src/backup.c b/src/backup.c
index 71a8a1a3e..252f61cfc 100644
--- a/src/backup.c
+++ b/src/backup.c
@@ -397,7 +397,8 @@ int sqlite3_backup_step(sqlite3_backup *p, int nPage){
const Pgno iSrcPg = p->iNext; /* Source page number */
if( iSrcPg!=PENDING_BYTE_PAGE(p->pSrc->pBt) ){
DbPage *pSrcPg; /* Source page object */
- rc = sqlite3PagerGet(pSrcPager, iSrcPg, &pSrcPg);
+ rc = sqlite3PagerAcquire(pSrcPager, iSrcPg, &pSrcPg,
+ PAGER_ACQUIRE_READONLY);
if( rc==SQLITE_OK ){
rc = backupOnePage(p, iSrcPg, sqlite3PagerGetData(pSrcPg), 0);
sqlite3PagerUnref(pSrcPg);
diff --git a/src/btree.c b/src/btree.c
index 96140d68c..c13f0194f 100644
--- a/src/btree.c
+++ b/src/btree.c
@@ -1581,13 +1581,17 @@ static int btreeGetPage(
BtShared *pBt, /* The btree */
Pgno pgno, /* Number of the page to fetch */
MemPage **ppPage, /* Return the page in this parameter */
- int noContent /* Do not load page content if true */
+ int noContent, /* Do not load page content if true */
+ int bReadonly /* True if a read-only (mmap) page is ok */
){
int rc;
DbPage *pDbPage;
+ int flags = (noContent ? PAGER_ACQUIRE_NOCONTENT : 0)
+ | (bReadonly ? PAGER_ACQUIRE_READONLY : 0);
+ assert( noContent==0 || bReadonly==0 );
assert( sqlite3_mutex_held(pBt->mutex) );
- rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, noContent);
+ rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, flags);
if( rc ) return rc;
*ppPage = btreePageFromDbPage(pDbPage, pgno, pBt);
return SQLITE_OK;
@@ -1630,9 +1634,10 @@ u32 sqlite3BtreeLastPage(Btree *p){
** may remain unchanged, or it may be set to an invalid value.
*/
static int getAndInitPage(
- BtShared *pBt, /* The database file */
- Pgno pgno, /* Number of the page to get */
- MemPage **ppPage /* Write the page pointer here */
+ BtShared *pBt, /* The database file */
+ Pgno pgno, /* Number of the page to get */
+ MemPage **ppPage, /* Write the page pointer here */
+ int bReadonly /* True if a read-only (mmap) page is ok */
){
int rc;
assert( sqlite3_mutex_held(pBt->mutex) );
@@ -1640,7 +1645,7 @@ static int getAndInitPage(
if( pgno>btreePagecount(pBt) ){
rc = SQLITE_CORRUPT_BKPT;
}else{
- rc = btreeGetPage(pBt, pgno, ppPage, 0);
+ rc = btreeGetPage(pBt, pgno, ppPage, 0, bReadonly);
if( rc==SQLITE_OK ){
rc = btreeInitPage(*ppPage);
if( rc!=SQLITE_OK ){
@@ -1871,6 +1876,7 @@ int sqlite3BtreeOpen(
rc = sqlite3PagerOpen(pVfs, &pBt->pPager, zFilename,
EXTRA_SIZE, flags, vfsFlags, pageReinit);
if( rc==SQLITE_OK ){
+ sqlite3PagerSetMmapLimit(pBt->pPager, db->mxMmap);
rc = sqlite3PagerReadFileheader(pBt->pPager,sizeof(zDbHeader),zDbHeader);
}
if( rc!=SQLITE_OK ){
@@ -2138,6 +2144,19 @@ int sqlite3BtreeSetCacheSize(Btree *p, int mxPage){
}
/*
+** Change the limit on the amount of the database file that may be
+** memory mapped.
+*/
+int sqlite3BtreeSetMmapLimit(Btree *p, sqlite3_int64 mxMmap){
+ BtShared *pBt = p->pBt;
+ assert( sqlite3_mutex_held(p->db->mutex) );
+ sqlite3BtreeEnter(p);
+ sqlite3PagerSetMmapLimit(pBt->pPager, mxMmap);
+ sqlite3BtreeLeave(p);
+ return SQLITE_OK;
+}
+
+/*
** Change the way data is synced to disk in order to increase or decrease
** how well the database resists damage due to OS crashes and power
** failures. Level 1 is the same as asynchronous (no syncs() occur and
@@ -2362,7 +2381,7 @@ static int lockBtree(BtShared *pBt){
assert( pBt->pPage1==0 );
rc = sqlite3PagerSharedLock(pBt->pPager);
if( rc!=SQLITE_OK ) return rc;
- rc = btreeGetPage(pBt, 1, &pPage1, 0);
+ rc = btreeGetPage(pBt, 1, &pPage1, 0, 0);
if( rc!=SQLITE_OK ) return rc;
/* Do some checking to help insure the file we opened really is
@@ -2921,7 +2940,7 @@ static int relocatePage(
** iPtrPage.
*/
if( eType!=PTRMAP_ROOTPAGE ){
- rc = btreeGetPage(pBt, iPtrPage, &pPtrPage, 0);
+ rc = btreeGetPage(pBt, iPtrPage, &pPtrPage, 0, 0);
if( rc!=SQLITE_OK ){
return rc;
}
@@ -3005,7 +3024,7 @@ static int incrVacuumStep(BtShared *pBt, Pgno nFin, Pgno iLastPg, int bCommit){
u8 eMode = BTALLOC_ANY; /* Mode parameter for allocateBtreePage() */
Pgno iNear = 0; /* nearby parameter for allocateBtreePage() */
- rc = btreeGetPage(pBt, iLastPg, &pLastPg, 0);
+ rc = btreeGetPage(pBt, iLastPg, &pLastPg, 0, 0);
if( rc!=SQLITE_OK ){
return rc;
}
@@ -3097,8 +3116,11 @@ int sqlite3BtreeIncrVacuum(Btree *p){
if( nOrig<nFin ){
rc = SQLITE_CORRUPT_BKPT;
}else if( nFree>0 ){
- invalidateAllOverflowCache(pBt);
- rc = incrVacuumStep(pBt, nFin, nOrig, 0);
+ rc = saveAllCursors(pBt, 0, 0);
+ if( rc==SQLITE_OK ){
+ invalidateAllOverflowCache(pBt);
+ rc = incrVacuumStep(pBt, nFin, nOrig, 0);
+ }
if( rc==SQLITE_OK ){
rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
put4byte(&pBt->pPage1->aData[28], pBt->nPage);
@@ -3146,7 +3168,9 @@ static int autoVacuumCommit(BtShared *pBt){
nFree = get4byte(&pBt->pPage1->aData[36]);
nFin = finalDbSize(pBt, nOrig, nFree);
if( nFin>nOrig ) return SQLITE_CORRUPT_BKPT;
-
+ if( nFin<nOrig ){
+ rc = saveAllCursors(pBt, 0, 0);
+ }
for(iFree=nOrig; iFree>nFin && rc==SQLITE_OK; iFree--){
rc = incrVacuumStep(pBt, nFin, iFree, 1);
}
@@ -3163,7 +3187,7 @@ static int autoVacuumCommit(BtShared *pBt){
}
}
- assert( nRef==sqlite3PagerRefcount(pPager) );
+ assert( nRef>=sqlite3PagerRefcount(pPager) );
return rc;
}
@@ -3419,7 +3443,7 @@ int sqlite3BtreeRollback(Btree *p, int tripCode){
/* The rollback may have destroyed the pPage1->aData value. So
** call btreeGetPage() on page 1 again to make
** sure pPage1->aData is set correctly. */
- if( btreeGetPage(pBt, 1, &pPage1, 0)==SQLITE_OK ){
+ if( btreeGetPage(pBt, 1, &pPage1, 0, 0)==SQLITE_OK ){
int nPage = get4byte(28+(u8*)pPage1->aData);
testcase( nPage==0 );
if( nPage==0 ) sqlite3PagerPagecount(pBt->pPager, &nPage);
@@ -3853,7 +3877,7 @@ static int getOverflowPage(
assert( next==0 || rc==SQLITE_DONE );
if( rc==SQLITE_OK ){
- rc = btreeGetPage(pBt, ovfl, &pPage, 0);
+ rc = btreeGetPage(pBt, ovfl, &pPage, 0, (ppPage==0));
assert( rc==SQLITE_OK || pPage==0 );
if( rc==SQLITE_OK ){
next = get4byte(pPage->aData);
@@ -4074,7 +4098,9 @@ static int accessPayload(
{
DbPage *pDbPage;
- rc = sqlite3PagerGet(pBt->pPager, nextPage, &pDbPage);
+ rc = sqlite3PagerAcquire(pBt->pPager, nextPage, &pDbPage,
+ (eOp==0 ? PAGER_ACQUIRE_READONLY : 0)
+ );
if( rc==SQLITE_OK ){
aPayload = sqlite3PagerGetData(pDbPage);
nextPage = get4byte(aPayload);
@@ -4253,10 +4279,11 @@ static int moveToChild(BtCursor *pCur, u32 newPgno){
assert( cursorHoldsMutex(pCur) );
assert( pCur->eState==CURSOR_VALID );
assert( pCur->iPage<BTCURSOR_MAX_DEPTH );
+ assert( pCur->iPage>=0 );
if( pCur->iPage>=(BTCURSOR_MAX_DEPTH-1) ){
return SQLITE_CORRUPT_BKPT;
}
- rc = getAndInitPage(pBt, newPgno, &pNewPage);
+ rc = getAndInitPage(pBt, newPgno, &pNewPage, (pCur->wrFlag==0));
if( rc ) return rc;
pCur->apPage[i+1] = pNewPage;
pCur->aiIdx[i+1] = 0;
@@ -4373,7 +4400,7 @@ static int moveToRoot(BtCursor *pCur){
pCur->eState = CURSOR_INVALID;
return SQLITE_OK;
}else{
- rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->apPage[0]);
+ rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->apPage[0], pCur->wrFlag==0);
if( rc!=SQLITE_OK ){
pCur->eState = CURSOR_INVALID;
return rc;
@@ -4987,7 +5014,7 @@ static int allocateBtreePage(
if( iTrunk>mxPage ){
rc = SQLITE_CORRUPT_BKPT;
}else{
- rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0);
+ rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0, 0);
}
if( rc ){
pTrunk = 0;
@@ -5051,7 +5078,7 @@ static int allocateBtreePage(
goto end_allocate_page;
}
testcase( iNewTrunk==mxPage );
- rc = btreeGetPage(pBt, iNewTrunk, &pNewTrunk, 0);
+ rc = btreeGetPage(pBt, iNewTrunk, &pNewTrunk, 0, 0);
if( rc!=SQLITE_OK ){
goto end_allocate_page;
}
@@ -5131,7 +5158,7 @@ static int allocateBtreePage(
}
put4byte(&aData[4], k-1);
noContent = !btreeGetHasContent(pBt, *pPgno);
- rc = btreeGetPage(pBt, *pPgno, ppPage, noContent);
+ rc = btreeGetPage(pBt, *pPgno, ppPage, noContent, 0);
if( rc==SQLITE_OK ){
rc = sqlite3PagerWrite((*ppPage)->pDbPage);
if( rc!=SQLITE_OK ){
@@ -5179,7 +5206,7 @@ static int allocateBtreePage(
MemPage *pPg = 0;
TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", pBt->nPage));
assert( pBt->nPage!=PENDING_BYTE_PAGE(pBt) );
- rc = btreeGetPage(pBt, pBt->nPage, &pPg, bNoContent);
+ rc = btreeGetPage(pBt, pBt->nPage, &pPg, bNoContent, 0);
if( rc==SQLITE_OK ){
rc = sqlite3PagerWrite(pPg->pDbPage);
releasePage(pPg);
@@ -5193,7 +5220,7 @@ static int allocateBtreePage(
*pPgno = pBt->nPage;
assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
- rc = btreeGetPage(pBt, *pPgno, ppPage, bNoContent);
+ rc = btreeGetPage(pBt, *pPgno, ppPage, bNoContent, 0);
if( rc ) return rc;
rc = sqlite3PagerWrite((*ppPage)->pDbPage);
if( rc!=SQLITE_OK ){
@@ -5261,7 +5288,7 @@ static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){
/* If the secure_delete option is enabled, then
** always fully overwrite deleted information with zeros.
*/
- if( (!pPage && ((rc = btreeGetPage(pBt, iPage, &pPage, 0))!=0) )
+ if( (!pPage && ((rc = btreeGetPage(pBt, iPage, &pPage, 0, 0))!=0) )
|| ((rc = sqlite3PagerWrite(pPage->pDbPage))!=0)
){
goto freepage_out;
@@ -5288,7 +5315,7 @@ static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){
u32 nLeaf; /* Initial number of leaf cells on trunk page */
iTrunk = get4byte(&pPage1->aData[32]);
- rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0);
+ rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0, 0);
if( rc!=SQLITE_OK ){
goto freepage_out;
}
@@ -5334,7 +5361,7 @@ static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){
** first trunk in the free-list is full. Either way, the page being freed
** will become the new first trunk page in the free-list.
*/
- if( pPage==0 && SQLITE_OK!=(rc = btreeGetPage(pBt, iPage, &pPage, 0)) ){
+ if( pPage==0 && SQLITE_OK!=(rc = btreeGetPage(pBt, iPage, &pPage, 0, 0)) ){
goto freepage_out;
}
rc = sqlite3PagerWrite(pPage->pDbPage);
@@ -6135,7 +6162,7 @@ static int balance_nonroot(
}
pgno = get4byte(pRight);
while( 1 ){
- rc = getAndInitPage(pBt, pgno, &apOld[i]);
+ rc = getAndInitPage(pBt, pgno, &apOld[i], 0);
if( rc ){
memset(apOld, 0, (i+1)*sizeof(MemPage*));
goto balance_cleanup;
@@ -7223,10 +7250,17 @@ static int btreeCreateTable(Btree *p, int *piTable, int createTabFlags){
u8 eType = 0;
Pgno iPtrPage = 0;
+ /* Save the positions of any open cursors. This is required in
+ ** case they are holding a reference to an xFetch reference
+ ** corresponding to page pgnoRoot. */
+ rc = saveAllCursors(pBt, 0, 0);
releasePage(pPageMove);
+ if( rc!=SQLITE_OK ){
+ return rc;
+ }
/* Move the page currently at pgnoRoot to pgnoMove. */
- rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0);
+ rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0, 0);
if( rc!=SQLITE_OK ){
return rc;
}
@@ -7247,7 +7281,7 @@ static int btreeCreateTable(Btree *p, int *piTable, int createTabFlags){
if( rc!=SQLITE_OK ){
return rc;
}
- rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0);
+ rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0, 0);
if( rc!=SQLITE_OK ){
return rc;
}
@@ -7323,7 +7357,7 @@ static int clearDatabasePage(
return SQLITE_CORRUPT_BKPT;
}
- rc = getAndInitPage(pBt, pgno, &pPage);
+ rc = getAndInitPage(pBt, pgno, &pPage, 0);
if( rc ) return rc;
for(i=0; i<pPage->nCell; i++){
pCell = findCell(pPage, i);
@@ -7425,7 +7459,7 @@ static int btreeDropTable(Btree *p, Pgno iTable, int *piMoved){
return SQLITE_LOCKED_SHAREDCACHE;
}
- rc = btreeGetPage(pBt, (Pgno)iTable, &pPage, 0);
+ rc = btreeGetPage(pBt, (Pgno)iTable, &pPage, 0, 0);
if( rc ) return rc;
rc = sqlite3BtreeClearTable(p, iTable, 0);
if( rc ){
@@ -7460,7 +7494,7 @@ static int btreeDropTable(Btree *p, Pgno iTable, int *piMoved){
*/
MemPage *pMove;
releasePage(pPage);
- rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0);
+ rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0, 0);
if( rc!=SQLITE_OK ){
return rc;
}
@@ -7470,7 +7504,7 @@ static int btreeDropTable(Btree *p, Pgno iTable, int *piMoved){
return rc;
}
pMove = 0;
- rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0);
+ rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0, 0);
freePage(pMove, &rc);
releasePage(pMove);
if( rc!=SQLITE_OK ){
@@ -7882,7 +7916,7 @@ static int checkTreePage(
usableSize = pBt->usableSize;
if( iPage==0 ) return 0;
if( checkRef(pCheck, iPage, zParentContext) ) return 0;
- if( (rc = btreeGetPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){
+ if( (rc = btreeGetPage(pBt, (Pgno)iPage, &pPage, 0, 0))!=0 ){
checkAppendMsg(pCheck, zContext,
"unable to get the page. error code=%d", rc);
return 0;
@@ -8354,6 +8388,17 @@ int sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void *z){
return SQLITE_ABORT;
}
+ /* Save the positions of all other cursors open on this table. This is
+ ** required in case any of them are holding references to an xFetch
+ ** version of the b-tree page modified by the accessPayload call below.
+ **
+ ** Note that pCsr must be open on a BTREE_INTKEY table and saveCursorPosition()
+ ** and hence saveAllCursors() cannot fail on a BTREE_INTKEY table, hence
+ ** saveAllCursors can only return SQLITE_OK.
+ */
+ VVA_ONLY(rc =) saveAllCursors(pCsr->pBt, pCsr->pgnoRoot, pCsr);
+ assert( rc==SQLITE_OK );
+
/* Check some assumptions:
** (a) the cursor is open for writing,
** (b) there is a read/write transaction open,
diff --git a/src/btree.h b/src/btree.h
index d4c9fe37d..23f6ad706 100644
--- a/src/btree.h
+++ b/src/btree.h
@@ -63,6 +63,7 @@ int sqlite3BtreeOpen(
int sqlite3BtreeClose(Btree*);
int sqlite3BtreeSetCacheSize(Btree*,int);
+int sqlite3BtreeSetMmapLimit(Btree*,sqlite3_int64);
int sqlite3BtreeSetSafetyLevel(Btree*,int,int,int);
int sqlite3BtreeSyncDisabled(Btree*);
int sqlite3BtreeSetPageSize(Btree *p, int nPagesize, int nReserve, int eFix);
diff --git a/src/ctime.c b/src/ctime.c
index c42454ca7..ee6d4cbe2 100644
--- a/src/ctime.c
+++ b/src/ctime.c
@@ -57,6 +57,9 @@ static const char * const azCompileOpt[] = {
#ifdef SQLITE_DEFAULT_LOCKING_MODE
"DEFAULT_LOCKING_MODE=" CTIMEOPT_VAL(SQLITE_DEFAULT_LOCKING_MODE),
#endif
+#ifdef SQLITE_DEFAULT_MMAP_LIMIT
+ "DEFAULT_MMAP_LIMIT=" CTIMEOPT_VAL(SQLITE_DEFAULT_MMAP_LIMIT),
+#endif
#ifdef SQLITE_DISABLE_DIRSYNC
"DISABLE_DIRSYNC",
#endif
diff --git a/src/global.c b/src/global.c
index f5da7e7f1..aec3958c1 100644
--- a/src/global.c
+++ b/src/global.c
@@ -156,6 +156,7 @@ SQLITE_WSD struct Sqlite3Config sqlite3Config = {
(void*)0, /* pHeap */
0, /* nHeap */
0, 0, /* mnHeap, mxHeap */
+ SQLITE_DEFAULT_MMAP_LIMIT, /* mxMmap */
(void*)0, /* pScratch */
0, /* szScratch */
0, /* nScratch */
diff --git a/src/main.c b/src/main.c
index 2fcb236e8..288d4535f 100644
--- a/src/main.c
+++ b/src/main.c
@@ -496,6 +496,13 @@ int sqlite3_config(int op, ...){
}
#endif
+ case SQLITE_CONFIG_MMAP_LIMIT: {
+ sqlite3_int64 mxMmap = va_arg(ap, sqlite3_int64);
+ if( mxMmap<0 ) mxMmap = SQLITE_DEFAULT_MMAP_LIMIT;
+ sqlite3GlobalConfig.mxMmap = mxMmap;
+ break;
+ }
+
default: {
rc = SQLITE_ERROR;
break;
@@ -2316,6 +2323,7 @@ static int openDatabase(
memcpy(db->aLimit, aHardLimit, sizeof(db->aLimit));
db->autoCommit = 1;
db->nextAutovac = -1;
+ db->mxMmap = sqlite3GlobalConfig.mxMmap;
db->nextPagesize = 0;
db->flags |= SQLITE_ShortColNames | SQLITE_AutoIndex | SQLITE_EnableTrigger
#if SQLITE_DEFAULT_FILE_FORMAT<4
diff --git a/src/os.c b/src/os.c
index b5e918a72..2ba857e2a 100644
--- a/src/os.c
+++ b/src/os.c
@@ -141,6 +141,14 @@ int sqlite3OsShmMap(
return id->pMethods->xShmMap(id, iPage, pgsz, bExtend, pp);
}
+int sqlite3OsFetch(sqlite3_file *id, i64 iOff, int iAmt, void **pp){
+ DO_OS_MALLOC_TEST(id);
+ return id->pMethods->xFetch(id, iOff, iAmt, pp);
+}
+int sqlite3OsUnfetch(sqlite3_file *id, i64 iOff, void *p){
+ return id->pMethods->xUnfetch(id, iOff, p);
+}
+
/*
** The next group of routines are convenience wrappers around the
** VFS methods.
diff --git a/src/os.h b/src/os.h
index 1ec7d4ba1..82e07ee1c 100644
--- a/src/os.h
+++ b/src/os.h
@@ -259,6 +259,8 @@ int sqlite3OsShmMap(sqlite3_file *,int,int,int,void volatile **);
int sqlite3OsShmLock(sqlite3_file *id, int, int, int);
void sqlite3OsShmBarrier(sqlite3_file *id);
int sqlite3OsShmUnmap(sqlite3_file *id, int);
+int sqlite3OsFetch(sqlite3_file *id, i64, int, void **);
+int sqlite3OsUnfetch(sqlite3_file *, i64, void *);
/*
diff --git a/src/os_unix.c b/src/os_unix.c
index 84b1594ac..75f923601 100644
--- a/src/os_unix.c
+++ b/src/os_unix.c
@@ -225,6 +225,11 @@ struct unixFile {
const char *zPath; /* Name of the file */
unixShm *pShm; /* Shared memory segment information */
int szChunk; /* Configured by FCNTL_CHUNK_SIZE */
+ int nFetchOut; /* Number of outstanding xFetch refs */
+ sqlite3_int64 mmapSize; /* Usable size of mapping at pMapRegion */
+ sqlite3_int64 mmapOrigsize; /* Actual size of mapping at pMapRegion */
+ sqlite3_int64 mmapLimit; /* Configured FCNTL_MMAP_LIMIT value */
+ void *pMapRegion; /* Memory mapped region */
#ifdef __QNXNTO__
int sectorSize; /* Device sector size */
int deviceCharacteristics; /* Precomputed device characteristics */
@@ -249,7 +254,9 @@ struct unixFile {
unsigned char transCntrChng; /* True if the transaction counter changed */
unsigned char dbUpdate; /* True if any part of database file changed */
unsigned char inNormalWrite; /* True if in a normal write operation */
+
#endif
+
#ifdef SQLITE_TEST
/* In test mode, increase the size of this structure a bit so that
** it is larger than the struct CrashFile defined in test6.c.
@@ -307,6 +314,17 @@ struct unixFile {
#endif
/*
+** HAVE_MREMAP defaults to true on Linux and false everywhere else.
+*/
+#if !defined(HAVE_MREMAP)
+# if defined(__linux__) && defined(_GNU_SOURCE)
+# define HAVE_MREMAP 1
+# else
+# define HAVE_MREMAP 0
+# endif
+#endif
+
+/*
** Different Unix systems declare open() in different ways. Same use
** open(const char*,int,mode_t). Others use open(const char*,int,...).
** The difference is important when using a pointer to the function.
@@ -437,6 +455,19 @@ static struct unix_syscall {
{ "fchown", (sqlite3_syscall_ptr)posixFchown, 0 },
#define osFchown ((int(*)(int,uid_t,gid_t))aSyscall[20].pCurrent)
+ { "mmap", (sqlite3_syscall_ptr)mmap, 0 },
+#define osMmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[21].pCurrent)
+
+ { "munmap", (sqlite3_syscall_ptr)munmap, 0 },
+#define osMunmap ((void*(*)(void*,size_t))aSyscall[22].pCurrent)
+
+#if HAVE_MREMAP
+ { "mremap", (sqlite3_syscall_ptr)mremap, 0 },
+#else
+ { "mremap", (sqlite3_syscall_ptr)0, 0 },
+#endif
+#define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[23].pCurrent)
+
}; /* End of the overrideable system calls */
/*
@@ -1104,7 +1135,6 @@ static int unixLogErrorAtLine(
zErr = strerror(iErrno);
#endif
- assert( errcode!=SQLITE_OK );
if( zPath==0 ) zPath = "";
sqlite3_log(errcode,
"os_unix.c:%d: (%d) %s(%s) - %s",
@@ -1800,9 +1830,13 @@ end_unlock:
** the requested locking level, this routine is a no-op.
*/
static int unixUnlock(sqlite3_file *id, int eFileLock){
+ assert( eFileLock==SHARED_LOCK || ((unixFile *)id)->nFetchOut==0 );
return posixUnlock(id, eFileLock, 0);
}
+static int unixMapfile(unixFile *pFd, i64 nByte);
+static void unixUnmapfile(unixFile *pFd);
+
/*
** This function performs the parts of the "close file" operation
** common to all locking schemes. It closes the directory and file
@@ -1815,6 +1849,7 @@ static int unixUnlock(sqlite3_file *id, int eFileLock){
*/
static int closeUnixFile(sqlite3_file *id){
unixFile *pFile = (unixFile*)id;
+ unixUnmapfile(pFile);
if( pFile->h>=0 ){
robust_close(pFile, pFile->h, __LINE__);
pFile->h = -1;
@@ -3082,6 +3117,21 @@ static int unixRead(
);
#endif
+ /* Deal with as much of this read request as possible by transfering
+ ** data from the memory mapping using memcpy(). */
+ if( offset<pFile->mmapSize ){
+ if( offset+amt <= pFile->mmapSize ){
+ memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt);
+ return SQLITE_OK;
+ }else{
+ int nCopy = pFile->mmapSize - offset;
+ memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy);
+ pBuf = &((u8 *)pBuf)[nCopy];
+ amt -= nCopy;
+ offset += nCopy;
+ }
+ }
+
got = seekAndRead(pFile, offset, pBuf, amt);
if( got==amt ){
return SQLITE_OK;
@@ -3186,6 +3236,21 @@ static int unixWrite(
}
#endif
+ /* Deal with as much of this write request as possible by transfering
+ ** data from the memory mapping using memcpy(). */
+ if( offset<pFile->mmapSize ){
+ if( offset+amt <= pFile->mmapSize ){
+ memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt);
+ return SQLITE_OK;
+ }else{
+ int nCopy = pFile->mmapSize - offset;
+ memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy);
+ pBuf = &((u8 *)pBuf)[nCopy];
+ amt -= nCopy;
+ offset += nCopy;
+ }
+ }
+
while( amt>0 && (wrote = seekAndWrite(pFile, offset, pBuf, amt))>0 ){
amt -= wrote;
offset += wrote;
@@ -3468,6 +3533,14 @@ static int unixTruncate(sqlite3_file *id, i64 nByte){
}
#endif
+ /* If the file was just truncated to a size smaller than the currently
+ ** mapped region, reduce the effective mapping size as well. SQLite will
+ ** use read() and write() to access data beyond this point from now on.
+ */
+ if( nByte<pFile->mmapSize ){
+ pFile->mmapSize = nByte;
+ }
+
return SQLITE_OK;
}
}
@@ -3556,6 +3629,19 @@ static int fcntlSizeHint(unixFile *pFile, i64 nByte){
}
}
+ if( pFile->mmapLimit>0 && nByte>pFile->mmapSize ){
+ int rc;
+ if( pFile->szChunk<=0 ){
+ if( robust_ftruncate(pFile->h, nByte) ){
+ pFile->lastErrno = errno;
+ return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath);
+ }
+ }
+
+ rc = unixMapfile(pFile, nByte);
+ return rc;
+ }
+
return SQLITE_OK;
}
@@ -3623,6 +3709,12 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){
}
return SQLITE_OK;
}
+ case SQLITE_FCNTL_MMAP_LIMIT: {
+ i64 newLimit = *(i64*)pArg;
+ *(i64*)pArg = pFile->mmapLimit;
+ if( newLimit>=0 ) pFile->mmapLimit = newLimit;
+ return SQLITE_OK;
+ }
#ifdef SQLITE_DEBUG
/* The pager calls this method to signal that it has done
** a rollback and that the database is therefore unchanged and
@@ -3935,7 +4027,7 @@ static void unixShmPurge(unixFile *pFd){
sqlite3_mutex_free(p->mutex);
for(i=0; i<p->nRegion; i++){
if( p->h>=0 ){
- munmap(p->apRegion[i], p->szRegion);
+ osMunmap(p->apRegion[i], p->szRegion);
}else{
sqlite3_free(p->apRegion[i]);
}
@@ -4208,7 +4300,7 @@ static int unixShmMap(
while(pShmNode->nRegion<=iRegion){
void *pMem;
if( pShmNode->h>=0 ){
- pMem = mmap(0, szRegion,
+ pMem = osMmap(0, szRegion,
pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE,
MAP_SHARED, pShmNode->h, szRegion*(i64)pShmNode->nRegion
);
@@ -4426,6 +4518,223 @@ static int unixShmUnmap(
#endif /* #ifndef SQLITE_OMIT_WAL */
/*
+** If it is currently memory mapped, unmap file pFd.
+*/
+static void unixUnmapfile(unixFile *pFd){
+ assert( pFd->nFetchOut==0 );
+ if( pFd->pMapRegion ){
+ osMunmap(pFd->pMapRegion, pFd->mmapOrigsize);
+ pFd->pMapRegion = 0;
+ pFd->mmapSize = 0;
+ pFd->mmapOrigsize = 0;
+ }
+}
+
+/*
+** Return the system page size.
+*/
+static int unixGetPagesize(void){
+#if HAVE_MREMAP
+ return 512;
+#elif defined(_BSD_SOURCE)
+ return getpagesize();
+#else
+ return (int)sysconf(_SC_PAGESIZE);
+#endif
+}
+
+/*
+** Attempt to set the size of the memory mapping maintained by file
+** descriptor pFd to nNew bytes. Any existing mapping is discarded.
+**
+** If successful, this function sets the following variables:
+**
+** unixFile.pMapRegion
+** unixFile.mmapSize
+** unixFile.mmapOrigsize
+**
+** If unsuccessful, an error message is logged via sqlite3_log() and
+** the three variables above are zeroed. In this case SQLite should
+** continue accessing the database using the xRead() and xWrite()
+** methods.
+*/
+static void unixRemapfile(
+ unixFile *pFd, /* File descriptor object */
+ i64 nNew /* Required mapping size */
+){
+ const char *zErr = "mmap";
+ int h = pFd->h; /* File descriptor open on db file */
+ u8 *pOrig = (u8 *)pFd->pMapRegion; /* Pointer to current file mapping */
+ i64 nOrig = pFd->mmapOrigsize; /* Size of pOrig region in bytes */
+ u8 *pNew = 0; /* Location of new mapping */
+ int flags = PROT_READ; /* Flags to pass to mmap() */
+
+ assert( pFd->nFetchOut==0 );
+ assert( nNew>pFd->mmapSize );
+ assert( nNew<=pFd->mmapLimit );
+ assert( nNew>0 );
+ assert( pFd->mmapOrigsize>=pFd->mmapSize );
+ assert( MAP_FAILED!=0 );
+
+ if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE;
+
+ if( pOrig ){
+ const int szSyspage = unixGetPagesize();
+ i64 nReuse = (pFd->mmapSize & ~(szSyspage-1));
+ u8 *pReq = &pOrig[nReuse];
+
+ /* Unmap any pages of the existing mapping that cannot be reused. */
+ if( nReuse!=nOrig ){
+ osMunmap(pReq, nOrig-nReuse);
+ }
+
+#if HAVE_MREMAP
+ pNew = osMremap(pOrig, nReuse, nNew, MREMAP_MAYMOVE);
+ zErr = "mremap";
+#else
+ pNew = osMmap(pReq, nNew-nReuse, flags, MAP_SHARED, h, nReuse);
+ if( pNew!=MAP_FAILED ){
+ if( pNew!=pReq ){
+ osMunmap(pNew, nNew - nReuse);
+ pNew = 0;
+ }else{
+ pNew = pOrig;
+ }
+ }
+#endif
+
+ /* The attempt to extend the existing mapping failed. Free it. */
+ if( pNew==MAP_FAILED || pNew==0 ){
+ osMunmap(pOrig, nReuse);
+ }
+ }
+
+ /* If pNew is still NULL, try to create an entirely new mapping. */
+ if( pNew==0 ){
+ pNew = osMmap(0, nNew, flags, MAP_SHARED, h, 0);
+ }
+
+ if( pNew==MAP_FAILED ){
+ pNew = 0;
+ nNew = 0;
+ unixLogError(SQLITE_OK, zErr, pFd->zPath);
+
+ /* If the mmap() above failed, assume that all subsequent mmap() calls
+ ** will probably fail too. Fall back to using xRead/xWrite exclusively
+ ** in this case. */
+ pFd->mmapLimit = 0;
+ }
+ pFd->pMapRegion = (void *)pNew;
+ pFd->mmapSize = pFd->mmapOrigsize = nNew;
+}
+
+/*
+** Memory map or remap the file opened by file-descriptor pFd (if the file
+** is already mapped, the existing mapping is replaced by the new). Or, if
+** there already exists a mapping for this file, and there are still
+** outstanding xFetch() references to it, this function is a no-op.
+**
+** If parameter nByte is non-negative, then it is the requested size of
+** the mapping to create. Otherwise, if nByte is less than zero, then the
+** requested size is the size of the file on disk. The actual size of the
+** created mapping is either the requested size or the value configured
+** using SQLITE_FCNTL_MMAP_LIMIT, whichever is smaller.
+**
+** SQLITE_OK is returned if no error occurs (even if the mapping is not
+** recreated as a result of outstanding references) or an SQLite error
+** code otherwise.
+*/
+static int unixMapfile(unixFile *pFd, i64 nByte){
+ i64 nMap = nByte;
+ int rc;
+
+ assert( nMap>=0 || pFd->nFetchOut==0 );
+ if( pFd->nFetchOut>0 ) return SQLITE_OK;
+
+ if( nMap<0 ){
+ struct stat statbuf; /* Low-level file information */
+ rc = osFstat(pFd->h, &statbuf);
+ if( rc!=SQLITE_OK ){
+ return SQLITE_IOERR_FSTAT;
+ }
+ nMap = statbuf.st_size;
+ }
+ if( nMap>pFd->mmapLimit ){
+ nMap = pFd->mmapLimit;
+ }
+
+ if( nMap!=pFd->mmapSize ){
+ if( nMap>0 ){
+ unixRemapfile(pFd, nMap);
+ }else{
+ unixUnmapfile(pFd);
+ }
+ }
+
+ return SQLITE_OK;
+}
+
+/*
+** If possible, return a pointer to a mapping of file fd starting at offset
+** iOff. The mapping must be valid for at least nAmt bytes.
+**
+** If such a pointer can be obtained, store it in *pp and return SQLITE_OK.
+** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK.
+** Finally, if an error does occur, return an SQLite error code. The final
+** value of *pp is undefined in this case.
+**
+** If this function does return a pointer, the caller must eventually
+** release the reference by calling unixUnfetch().
+*/
+static int unixFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){
+ unixFile *pFd = (unixFile *)fd; /* The underlying database file */
+ *pp = 0;
+
+ if( pFd->mmapLimit>0 ){
+ if( pFd->pMapRegion==0 ){
+ int rc = unixMapfile(pFd, -1);
+ if( rc!=SQLITE_OK ) return rc;
+ }
+ if( pFd->mmapSize >= iOff+nAmt ){
+ *pp = &((u8 *)pFd->pMapRegion)[iOff];
+ pFd->nFetchOut++;
+ }
+ }
+ return SQLITE_OK;
+}
+
+/*
+** If the third argument is non-NULL, then this function releases a
+** reference obtained by an earlier call to unixFetch(). The second
+** argument passed to this function must be the same as the corresponding
+** argument that was passed to the unixFetch() invocation.
+**
+** Or, if the third argument is NULL, then this function is being called
+** to inform the VFS layer that, according to POSIX, any existing mapping
+** may now be invalid and should be unmapped.
+*/
+static int unixUnfetch(sqlite3_file *fd, i64 iOff, void *p){
+ unixFile *pFd = (unixFile *)fd; /* The underlying database file */
+
+ /* If p==0 (unmap the entire file) then there must be no outstanding
+ ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference),
+ ** then there must be at least one outstanding. */
+ assert( (p==0)==(pFd->nFetchOut==0) );
+
+ /* If p!=0, it must match the iOff value. */
+ assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] );
+
+ if( p ){
+ pFd->nFetchOut--;
+ }else{
+ unixUnmapfile(pFd);
+ }
+
+ assert( pFd->nFetchOut>=0 );
+ return SQLITE_OK;
+}
+
+/*
** Here ends the implementation of all sqlite3_file methods.
**
********************** End sqlite3_file Methods *******************************
@@ -4483,7 +4792,9 @@ static const sqlite3_io_methods METHOD = { \
unixShmMap, /* xShmMap */ \
unixShmLock, /* xShmLock */ \
unixShmBarrier, /* xShmBarrier */ \
- unixShmUnmap /* xShmUnmap */ \
+ unixShmUnmap, /* xShmUnmap */ \
+ unixFetch, /* xFetch */ \
+ unixUnfetch, /* xUnfetch */ \
}; \
static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \
UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \
@@ -4500,7 +4811,7 @@ static const sqlite3_io_methods *(*const FINDER)(const char*,unixFile *p) \
IOMETHODS(
posixIoFinder, /* Finder function name */
posixIoMethods, /* sqlite3_io_methods object name */
- 2, /* shared memory is enabled */
+ 3, /* shared memory and mmap are enabled */
unixClose, /* xClose method */
unixLock, /* xLock method */
unixUnlock, /* xUnlock method */
@@ -4751,6 +5062,7 @@ static int fillInUnixFile(
pNew->pVfs = pVfs;
pNew->zPath = zFilename;
pNew->ctrlFlags = (u8)ctrlFlags;
+ pNew->mmapLimit = sqlite3GlobalConfig.mxMmap;
if( sqlite3_uri_boolean(((ctrlFlags & UNIXFILE_URI) ? zFilename : 0),
"psow", SQLITE_POWERSAFE_OVERWRITE) ){
pNew->ctrlFlags |= UNIXFILE_PSOW;
@@ -6988,7 +7300,7 @@ int sqlite3_os_init(void){
/* Double-check that the aSyscall[] array has been constructed
** correctly. See ticket [bb3a86e890c8e96ab] */
- assert( ArraySize(aSyscall)==21 );
+ assert( ArraySize(aSyscall)==24 );
/* Register all VFSes defined in the aVfs[] array */
for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){
diff --git a/src/os_win.c b/src/os_win.c
index 58ba25f30..a7966171b 100644
--- a/src/os_win.c
+++ b/src/os_win.c
@@ -150,11 +150,18 @@ struct winFile {
winceLock local; /* Locks obtained by this instance of winFile */
winceLock *shared; /* Global shared lock memory for the file */
#endif
+ int nFetchOut; /* Number of outstanding xFetch references */
+ HANDLE hMap; /* Handle for accessing memory mapping */
+ void *pMapRegion; /* Area memory mapped */
+ sqlite3_int64 mmapSize; /* Usable size of mapped region */
+ sqlite3_int64 mmapOrigsize; /* Actual size of mapped region */
+ sqlite3_int64 mmapLimit; /* Configured FCNTL_MMAP_LIMIT value */
};
/*
** Allowed values for winFile.ctrlFlags
*/
+#define WINFILE_RDONLY 0x02 /* Connection is read only */
#define WINFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */
#define WINFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */
@@ -2061,6 +2068,9 @@ static int seekWinFile(winFile *pFile, sqlite3_int64 iOffset){
#endif
}
+/* Forward references to VFS methods */
+static int winUnmapfile(winFile*);
+
/*
** Close a file.
**
@@ -2082,6 +2092,10 @@ static int winClose(sqlite3_file *id){
#endif
OSTRACE(("CLOSE %d\n", pFile->h));
assert( pFile->h!=NULL && pFile->h!=INVALID_HANDLE_VALUE );
+
+ rc = winUnmapfile(pFile);
+ if( rc!=SQLITE_OK ) return rc;
+
do{
rc = osCloseHandle(pFile->h);
/* SimulateIOError( rc=0; cnt=MX_CLOSE_ATTEMPT; ); */
@@ -2130,9 +2144,25 @@ static int winRead(
int nRetry = 0; /* Number of retrys */
assert( id!=0 );
+ assert( amt>0 );
SimulateIOError(return SQLITE_IOERR_READ);
OSTRACE(("READ %d lock=%d\n", pFile->h, pFile->locktype));
+ /* Deal with as much of this read request as possible by transfering
+ ** data from the memory mapping using memcpy(). */
+ if( offset<pFile->mmapSize ){
+ if( offset+amt <= pFile->mmapSize ){
+ memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt);
+ return SQLITE_OK;
+ }else{
+ int nCopy = (int)(pFile->mmapSize - offset);
+ memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy);
+ pBuf = &((u8 *)pBuf)[nCopy];
+ amt -= nCopy;
+ offset += nCopy;
+ }
+ }
+
#if SQLITE_OS_WINCE
if( seekWinFile(pFile, offset) ){
return SQLITE_FULL;
@@ -2182,6 +2212,21 @@ static int winWrite(
OSTRACE(("WRITE %d lock=%d\n", pFile->h, pFile->locktype));
+ /* Deal with as much of this write request as possible by transfering
+ ** data from the memory mapping using memcpy(). */
+ if( offset<pFile->mmapSize ){
+ if( offset+amt <= pFile->mmapSize ){
+ memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt);
+ return SQLITE_OK;
+ }else{
+ int nCopy = (int)(pFile->mmapSize - offset);
+ memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy);
+ pBuf = &((u8 *)pBuf)[nCopy];
+ amt -= nCopy;
+ offset += nCopy;
+ }
+ }
+
#if SQLITE_OS_WINCE
rc = seekWinFile(pFile, offset);
if( rc==0 ){
@@ -2249,6 +2294,7 @@ static int winWrite(
static int winTruncate(sqlite3_file *id, sqlite3_int64 nByte){
winFile *pFile = (winFile*)id; /* File handle object */
int rc = SQLITE_OK; /* Return code for this function */
+ DWORD lastErrno;
assert( pFile );
@@ -2267,11 +2313,20 @@ static int winTruncate(sqlite3_file *id, sqlite3_int64 nByte){
/* SetEndOfFile() returns non-zero when successful, or zero when it fails. */
if( seekWinFile(pFile, nByte) ){
rc = winLogError(SQLITE_IOERR_TRUNCATE, pFile->lastErrno,
- "winTruncate1", pFile->zPath);
- }else if( 0==osSetEndOfFile(pFile->h) ){
- pFile->lastErrno = osGetLastError();
+ "winTruncate1", pFile->zPath);
+ }else if( 0==osSetEndOfFile(pFile->h) &&
+ ((lastErrno = osGetLastError())!=ERROR_USER_MAPPED_FILE) ){
+ pFile->lastErrno = lastErrno;
rc = winLogError(SQLITE_IOERR_TRUNCATE, pFile->lastErrno,
- "winTruncate2", pFile->zPath);
+ "winTruncate2", pFile->zPath);
+ }
+
+ /* If the file was truncated to a size smaller than the currently
+ ** mapped region, reduce the effective mapping size as well. SQLite will
+ ** use read() and write() to access data beyond this point from now on.
+ */
+ if( pFile->pMapRegion && nByte<pFile->mmapSize ){
+ pFile->mmapSize = nByte;
}
OSTRACE(("TRUNCATE %d %lld %s\n", pFile->h, nByte, rc ? "failed" : "ok"));
@@ -2781,6 +2836,12 @@ static int winFileControl(sqlite3_file *id, int op, void *pArg){
}
return SQLITE_OK;
}
+ case SQLITE_FCNTL_MMAP_LIMIT: {
+ i64 newLimit = *(i64*)pArg;
+ *(i64*)pArg = pFile->mmapLimit;
+ if( newLimit>=0 ) pFile->mmapLimit = newLimit;
+ return SQLITE_OK;
+ }
}
return SQLITE_NOTFOUND;
}
@@ -3452,6 +3513,184 @@ shmpage_out:
#endif /* #ifndef SQLITE_OMIT_WAL */
/*
+** Cleans up the mapped region of the specified file, if any.
+*/
+static int winUnmapfile(winFile *pFile){
+ assert( pFile!=0 );
+ if( pFile->pMapRegion ){
+ if( !osUnmapViewOfFile(pFile->pMapRegion) ){
+ pFile->lastErrno = osGetLastError();
+ return winLogError(SQLITE_IOERR_MMAP, pFile->lastErrno,
+ "winUnmap1", pFile->zPath);
+ }
+ pFile->pMapRegion = 0;
+ pFile->mmapSize = 0;
+ pFile->mmapOrigsize = 0;
+ }
+ if( pFile->hMap!=NULL ){
+ if( !osCloseHandle(pFile->hMap) ){
+ pFile->lastErrno = osGetLastError();
+ return winLogError(SQLITE_IOERR_MMAP, pFile->lastErrno,
+ "winUnmap2", pFile->zPath);
+ }
+ pFile->hMap = NULL;
+ }
+ return SQLITE_OK;
+}
+
+/*
+** Memory map or remap the file opened by file-descriptor pFd (if the file
+** is already mapped, the existing mapping is replaced by the new). Or, if
+** there already exists a mapping for this file, and there are still
+** outstanding xFetch() references to it, this function is a no-op.
+**
+** If parameter nByte is non-negative, then it is the requested size of
+** the mapping to create. Otherwise, if nByte is less than zero, then the
+** requested size is the size of the file on disk. The actual size of the
+** created mapping is either the requested size or the value configured
+** using SQLITE_FCNTL_MMAP_LIMIT, whichever is smaller.
+**
+** SQLITE_OK is returned if no error occurs (even if the mapping is not
+** recreated as a result of outstanding references) or an SQLite error
+** code otherwise.
+*/
+static int winMapfile(winFile *pFd, sqlite3_int64 nByte){
+ sqlite3_int64 nMap = nByte;
+ int rc;
+
+ assert( nMap>=0 || pFd->nFetchOut==0 );
+ if( pFd->nFetchOut>0 ) return SQLITE_OK;
+
+ if( nMap<0 ){
+ rc = winFileSize((sqlite3_file*)pFd, &nMap);
+ if( rc ){
+ return SQLITE_IOERR_FSTAT;
+ }
+ }
+ if( nMap>pFd->mmapLimit ){
+ nMap = pFd->mmapLimit;
+ }
+ nMap &= ~(sqlite3_int64)(winSysInfo.dwPageSize - 1);
+
+ if( nMap==0 && pFd->mmapSize>0 ){
+ winUnmapfile(pFd);
+ }
+ if( nMap!=pFd->mmapSize ){
+ void *pNew = 0;
+ DWORD protect = PAGE_READONLY;
+ DWORD flags = FILE_MAP_READ;
+
+ winUnmapfile(pFd);
+ if( (pFd->ctrlFlags & WINFILE_RDONLY)==0 ){
+ protect = PAGE_READWRITE;
+ flags |= FILE_MAP_WRITE;
+ }
+#if SQLITE_OS_WINRT
+ pFd->hMap = osCreateFileMappingFromApp(pFd->h, NULL, protect, nMap, NULL);
+#elif defined(SQLITE_WIN32_HAS_WIDE)
+ pFd->hMap = osCreateFileMappingW(pFd->h, NULL, protect,
+ (DWORD)((nMap>>32) & 0xffffffff),
+ (DWORD)(nMap & 0xffffffff), NULL);
+#elif defined(SQLITE_WIN32_HAS_ANSI)
+ pFd->hMap = osCreateFileMappingA(pFd->h, NULL, protect,
+ (DWORD)((nMap>>32) & 0xffffffff),
+ (DWORD)(nMap & 0xffffffff), NULL);
+#endif
+ if( pFd->hMap==NULL ){
+ pFd->lastErrno = osGetLastError();
+ rc = winLogError(SQLITE_IOERR_MMAP, pFd->lastErrno,
+ "winMapfile", pFd->zPath);
+ /* Log the error, but continue normal operation using xRead/xWrite */
+ return SQLITE_OK;
+ }
+ assert( (nMap % winSysInfo.dwPageSize)==0 );
+#if SQLITE_OS_WINRT
+ pNew = osMapViewOfFileFromApp(pFd->hMap, flags, 0, nMap);
+#else
+ assert( sizeof(SIZE_T)==sizeof(sqlite3_int64) || nMap<=0xffffffff );
+ pNew = osMapViewOfFile(pFd->hMap, flags, 0, 0, (SIZE_T)nMap);
+#endif
+ if( pNew==NULL ){
+ osCloseHandle(pFd->hMap);
+ pFd->hMap = NULL;
+ pFd->lastErrno = osGetLastError();
+ winLogError(SQLITE_IOERR_MMAP, pFd->lastErrno,
+ "winMapfile", pFd->zPath);
+ return SQLITE_OK;
+ }
+ pFd->pMapRegion = pNew;
+ pFd->mmapSize = nMap;
+ pFd->mmapOrigsize = nMap;
+ }
+
+ return SQLITE_OK;
+}
+
+/*
+** If possible, return a pointer to a mapping of file fd starting at offset
+** iOff. The mapping must be valid for at least nAmt bytes.
+**
+** If such a pointer can be obtained, store it in *pp and return SQLITE_OK.
+** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK.
+** Finally, if an error does occur, return an SQLite error code. The final
+** value of *pp is undefined in this case.
+**
+** If this function does return a pointer, the caller must eventually
+** release the reference by calling unixUnfetch().
+*/
+static int winFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){
+ winFile *pFd = (winFile*)fd; /* The underlying database file */
+ *pp = 0;
+
+ if( pFd->mmapLimit>0 ){
+ if( pFd->pMapRegion==0 ){
+ int rc = winMapfile(pFd, -1);
+ if( rc!=SQLITE_OK ) return rc;
+ }
+ if( pFd->mmapSize >= iOff+nAmt ){
+ *pp = &((u8 *)pFd->pMapRegion)[iOff];
+ pFd->nFetchOut++;
+ }
+ }
+ return SQLITE_OK;
+}
+
+/*
+** If the third argument is non-NULL, then this function releases a
+** reference obtained by an earlier call to unixFetch(). The second
+** argument passed to this function must be the same as the corresponding
+** argument that was passed to the unixFetch() invocation.
+**
+** Or, if the third argument is NULL, then this function is being called
+** to inform the VFS layer that, according to POSIX, any existing mapping
+** may now be invalid and should be unmapped.
+*/
+static int winUnfetch(sqlite3_file *fd, i64 iOff, void *p){
+ winFile *pFd = (winFile*)fd; /* The underlying database file */
+
+ /* If p==0 (unmap the entire file) then there must be no outstanding
+ ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference),
+ ** then there must be at least one outstanding. */
+ assert( (p==0)==(pFd->nFetchOut==0) );
+
+ /* If p!=0, it must match the iOff value. */
+ assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] );
+
+ if( p ){
+ pFd->nFetchOut--;
+ }else{
+ /* FIXME: If Windows truly always prevents truncating or deleting a
+ ** file while a mapping is held, then the following winUnmapfile() call
+ ** is unnecessary can can be omitted - potentially improving
+ ** performance. */
+ winUnmapfile(pFd);
+ }
+
+ assert( pFd->nFetchOut>=0 );
+ return SQLITE_OK;
+}
+
+/*
** Here ends the implementation of all sqlite3_file methods.
**
********************** End sqlite3_file Methods *******************************
@@ -3462,7 +3701,7 @@ shmpage_out:
** sqlite3_file for win32.
*/
static const sqlite3_io_methods winIoMethod = {
- 2, /* iVersion */
+ 3, /* iVersion */
winClose, /* xClose */
winRead, /* xRead */
winWrite, /* xWrite */
@@ -3478,7 +3717,9 @@ static const sqlite3_io_methods winIoMethod = {
winShmMap, /* xShmMap */
winShmLock, /* xShmLock */
winShmBarrier, /* xShmBarrier */
- winShmUnmap /* xShmUnmap */
+ winShmUnmap, /* xShmUnmap */
+ winFetch, /* xFetch */
+ winUnfetch /* xUnfetch */
};
/****************************************************************************
@@ -3654,9 +3895,7 @@ static int winOpen(
int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE);
int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE);
int isCreate = (flags & SQLITE_OPEN_CREATE);
-#ifndef NDEBUG
int isReadonly = (flags & SQLITE_OPEN_READONLY);
-#endif
int isReadWrite = (flags & SQLITE_OPEN_READWRITE);
#ifndef NDEBUG
@@ -3867,11 +4106,19 @@ static int winOpen(
pFile->pMethod = &winIoMethod;
pFile->pVfs = pVfs;
pFile->h = h;
+ if( isReadonly ){
+ pFile->ctrlFlags |= WINFILE_RDONLY;
+ }
if( sqlite3_uri_boolean(zName, "psow", SQLITE_POWERSAFE_OVERWRITE) ){
pFile->ctrlFlags |= WINFILE_PSOW;
}
pFile->lastErrno = NO_ERROR;
pFile->zPath = zName;
+ pFile->hMap = NULL;
+ pFile->pMapRegion = 0;
+ pFile->mmapSize = 0;
+ pFile->mmapOrigsize = 0;
+ pFile->mmapLimit = sqlite3GlobalConfig.mxMmap;
OpenCounter(+1);
return rc;
@@ -4500,7 +4747,6 @@ int sqlite3_os_init(void){
** correctly. See ticket [bb3a86e890c8e96ab] */
assert( ArraySize(aSyscall)==74 );
-#ifndef SQLITE_OMIT_WAL
/* get memory map allocation granularity */
memset(&winSysInfo, 0, sizeof(SYSTEM_INFO));
#if SQLITE_OS_WINRT
@@ -4508,8 +4754,8 @@ int sqlite3_os_init(void){
#else
osGetSystemInfo(&winSysInfo);
#endif
- assert(winSysInfo.dwAllocationGranularity > 0);
-#endif
+ assert( winSysInfo.dwAllocationGranularity>0 );
+ assert( winSysInfo.dwPageSize>0 );
sqlite3_vfs_register(&winVfs, 1);
return SQLITE_OK;
diff --git a/src/pager.c b/src/pager.c
index dfa179d54..ae2f10069 100644
--- a/src/pager.c
+++ b/src/pager.c
@@ -655,6 +655,11 @@ struct Pager {
PagerSavepoint *aSavepoint; /* Array of active savepoints */
int nSavepoint; /* Number of elements in aSavepoint[] */
char dbFileVers[16]; /* Changes whenever database file changes */
+
+ u8 bUseFetch; /* True to use xFetch() */
+ int nMmapOut; /* Number of mmap pages currently outstanding */
+ sqlite3_int64 mxMmap; /* Desired maximum mmap size */
+ PgHdr *pMmapFreelist; /* List of free mmap page headers (pDirty) */
/*
** End of the routinely-changing class members
***************************************************************************/
@@ -2252,7 +2257,7 @@ static int pager_playback_one_page(
i64 ofst = (pgno-1)*(i64)pPager->pageSize;
testcase( !isSavepnt && pPg!=0 && (pPg->flags&PGHDR_NEED_SYNC)!=0 );
assert( !pagerUseWal(pPager) );
- rc = sqlite3OsWrite(pPager->fd, (u8*)aData, pPager->pageSize, ofst);
+ rc = sqlite3OsWrite(pPager->fd, (u8 *)aData, pPager->pageSize, ofst);
if( pgno>pPager->dbFileSize ){
pPager->dbFileSize = pgno;
}
@@ -2834,11 +2839,10 @@ end_playback:
** If an IO error occurs, then the IO error is returned to the caller.
** Otherwise, SQLITE_OK is returned.
*/
-static int readDbPage(PgHdr *pPg){
+static int readDbPage(PgHdr *pPg, u32 iFrame){
Pager *pPager = pPg->pPager; /* Pager object associated with page pPg */
Pgno pgno = pPg->pgno; /* Page number to read */
int rc = SQLITE_OK; /* Return code */
- int isInWal = 0; /* True if page is in log file */
int pgsz = pPager->pageSize; /* Number of bytes to read */
assert( pPager->eState>=PAGER_READER && !MEMDB );
@@ -2850,11 +2854,10 @@ static int readDbPage(PgHdr *pPg){
return SQLITE_OK;
}
- if( pagerUseWal(pPager) ){
+ if( iFrame ){
/* Try to pull the page from the write-ahead log. */
- rc = sqlite3WalRead(pPager->pWal, pgno, &isInWal, pgsz, pPg->pData);
- }
- if( rc==SQLITE_OK && !isInWal ){
+ rc = sqlite3WalReadFrame(pPager->pWal, iFrame, pgsz, pPg->pData);
+ }else{
i64 iOffset = (pgno-1)*(i64)pPager->pageSize;
rc = sqlite3OsRead(pPager->fd, pPg->pData, pgsz, iOffset);
if( rc==SQLITE_IOERR_SHORT_READ ){
@@ -2933,12 +2936,17 @@ static int pagerUndoCallback(void *pCtx, Pgno iPg){
Pager *pPager = (Pager *)pCtx;
PgHdr *pPg;
+ assert( pagerUseWal(pPager) );
pPg = sqlite3PagerLookup(pPager, iPg);
if( pPg ){
if( sqlite3PcachePageRefcount(pPg)==1 ){
sqlite3PcacheDrop(pPg);
}else{
- rc = readDbPage(pPg);
+ u32 iFrame = 0;
+ rc = sqlite3WalFindFrame(pPager->pWal, pPg->pgno, &iFrame);
+ if( rc==SQLITE_OK ){
+ rc = readDbPage(pPg, iFrame);
+ }
if( rc==SQLITE_OK ){
pPager->xReiniter(pPg);
}
@@ -3082,6 +3090,7 @@ static int pagerBeginReadTransaction(Pager *pPager){
rc = sqlite3WalBeginReadTransaction(pPager->pWal, &changed);
if( rc!=SQLITE_OK || changed ){
pager_reset(pPager);
+ if( pPager->bUseFetch ) sqlite3OsUnfetch(pPager->fd, 0, 0);
}
return rc;
@@ -3344,6 +3353,27 @@ void sqlite3PagerSetCachesize(Pager *pPager, int mxPage){
}
/*
+** Invoke SQLITE_FCNTL_MMAP_LIMIT based on the current value of mxMmap.
+*/
+static void pagerFixMaplimit(Pager *pPager){
+ sqlite3_file *fd = pPager->fd;
+ if( isOpen(fd) ){
+ sqlite3_int64 mx;
+ pPager->bUseFetch = (fd->pMethods->iVersion>=3) && pPager->mxMmap>0;
+ mx = pPager->mxMmap;
+ sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_MMAP_LIMIT, &mx);
+ }
+}
+
+/*
+** Change the maximum size of any memory mapping made of the database file.
+*/
+void sqlite3PagerSetMmapLimit(Pager *pPager, sqlite3_int64 mxMmap){
+ pPager->mxMmap = mxMmap;
+ pagerFixMaplimit(pPager);
+}
+
+/*
** Free as much memory as possible from the pager.
*/
void sqlite3PagerShrink(Pager *pPager){
@@ -3578,6 +3608,7 @@ int sqlite3PagerSetPagesize(Pager *pPager, u32 *pPageSize, int nReserve){
assert( nReserve>=0 && nReserve<1000 );
pPager->nReserve = (i16)nReserve;
pagerReportSize(pPager);
+ pagerFixMaplimit(pPager);
}
return rc;
}
@@ -3804,6 +3835,81 @@ static int pagerSyncHotJournal(Pager *pPager){
}
/*
+** Obtain a reference to a memory mapped page object for page number pgno.
+** The new object will use the pointer pData, obtained from xFetch().
+** If successful, set *ppPage to point to the new page reference
+** and return SQLITE_OK. Otherwise, return an SQLite error code and set
+** *ppPage to zero.
+**
+** Page references obtained by calling this function should be released
+** by calling pagerReleaseMapPage().
+*/
+static int pagerAcquireMapPage(
+ Pager *pPager, /* Pager object */
+ Pgno pgno, /* Page number */
+ void *pData, /* xFetch()'d data for this page */
+ PgHdr **ppPage /* OUT: Acquired page object */
+){
+ PgHdr *p; /* Memory mapped page to return */
+
+ if( pPager->pMmapFreelist ){
+ *ppPage = p = pPager->pMmapFreelist;
+ pPager->pMmapFreelist = p->pDirty;
+ p->pDirty = 0;
+ memset(p->pExtra, 0, pPager->nExtra);
+ }else{
+ *ppPage = p = (PgHdr *)sqlite3MallocZero(sizeof(PgHdr) + pPager->nExtra);
+ if( p==0 ){
+ sqlite3OsUnfetch(pPager->fd, (i64)(pgno-1) * pPager->pageSize, pData);
+ return SQLITE_NOMEM;
+ }
+ p->pExtra = (void *)&p[1];
+ p->flags = PGHDR_MMAP;
+ p->nRef = 1;
+ p->pPager = pPager;
+ }
+
+ assert( p->pExtra==(void *)&p[1] );
+ assert( p->pPage==0 );
+ assert( p->flags==PGHDR_MMAP );
+ assert( p->pPager==pPager );
+ assert( p->nRef==1 );
+
+ p->pgno = pgno;
+ p->pData = pData;
+ pPager->nMmapOut++;
+
+ return SQLITE_OK;
+}
+
+/*
+** Release a reference to page pPg. pPg must have been returned by an
+** earlier call to pagerAcquireMapPage().
+*/
+static void pagerReleaseMapPage(PgHdr *pPg){
+ Pager *pPager = pPg->pPager;
+ pPager->nMmapOut--;
+ pPg->pDirty = pPager->pMmapFreelist;
+ pPager->pMmapFreelist = pPg;
+
+ assert( pPager->fd->pMethods->iVersion>=3 );
+ sqlite3OsUnfetch(pPager->fd, (i64)(pPg->pgno-1)*pPager->pageSize, pPg->pData);
+}
+
+/*
+** Free all PgHdr objects stored in the Pager.pMmapFreelist list.
+*/
+static void pagerFreeMapHdrs(Pager *pPager){
+ PgHdr *p;
+ PgHdr *pNext;
+ for(p=pPager->pMmapFreelist; p; p=pNext){
+ pNext = p->pDirty;
+ sqlite3_free(p);
+ }
+}
+
+
+/*
** Shutdown the page cache. Free all memory and close all files.
**
** If a transaction was in progress when this routine is called, that
@@ -3823,6 +3929,7 @@ int sqlite3PagerClose(Pager *pPager){
assert( assert_pager_state(pPager) );
disable_simulated_io_errors();
sqlite3BeginBenignMalloc();
+ pagerFreeMapHdrs(pPager);
/* pPager->errCode = 0; */
pPager->exclusiveMode = 0;
#ifndef SQLITE_OMIT_WAL
@@ -4084,7 +4191,9 @@ static int pager_write_pagelist(Pager *pPager, PgHdr *pList){
** file size will be.
*/
assert( rc!=SQLITE_OK || isOpen(pPager->fd) );
- if( rc==SQLITE_OK && pPager->dbSize>pPager->dbHintSize ){
+ if( rc==SQLITE_OK
+ && (pList->pDirty ? pPager->dbSize : pList->pgno+1)>pPager->dbHintSize
+ ){
sqlite3_int64 szFile = pPager->pageSize * (sqlite3_int64)pPager->dbSize;
sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_SIZE_HINT, &szFile);
pPager->dbHintSize = pPager->dbSize;
@@ -4638,6 +4747,7 @@ int sqlite3PagerOpen(
/* pPager->pBusyHandlerArg = 0; */
pPager->xReiniter = xReinit;
/* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */
+ /* pPager->mxMmap = SQLITE_DEFAULT_MMAP_LIMIT // will be set by btree.c */
*ppPager = pPager;
return SQLITE_OK;
@@ -4929,9 +5039,11 @@ int sqlite3PagerSharedLock(Pager *pPager){
);
}
- if( !pPager->tempFile
- && (pPager->pBackup || sqlite3PcachePagecount(pPager->pPCache)>0)
- ){
+ if( !pPager->tempFile && (
+ pPager->pBackup
+ || sqlite3PcachePagecount(pPager->pPCache)>0
+ || pPager->bUseFetch
+ )){
/* The shared-lock has just been acquired on the database file
** and there are already pages in the cache (from a previous
** read or write transaction). Check to see if the database
@@ -4957,7 +5069,7 @@ int sqlite3PagerSharedLock(Pager *pPager){
if( nPage>0 ){
IOTRACE(("CKVERS %p %d\n", pPager, sizeof(dbFileVers)));
rc = sqlite3OsRead(pPager->fd, &dbFileVers, sizeof(dbFileVers), 24);
- if( rc!=SQLITE_OK ){
+ if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){
goto failed;
}
}else{
@@ -4966,6 +5078,16 @@ int sqlite3PagerSharedLock(Pager *pPager){
if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){
pager_reset(pPager);
+
+ /* Unmap the database file. It is possible that external processes
+ ** may have truncated the database file and then extended it back
+ ** to its original size while this process was not holding a lock.
+ ** In this case there may exist a Pager.pMap mapping that appears
+ ** to be the right size but is not actually valid. Avoid this
+ ** possibility by unmapping the db here. */
+ if( pPager->bUseFetch ){
+ sqlite3OsUnfetch(pPager->fd, 0, 0);
+ }
}
}
@@ -5007,7 +5129,7 @@ int sqlite3PagerSharedLock(Pager *pPager){
** nothing to rollback, so this routine is a no-op.
*/
static void pagerUnlockIfUnused(Pager *pPager){
- if( (sqlite3PcacheRefCount(pPager->pPCache)==0) ){
+ if( pPager->nMmapOut==0 && (sqlite3PcacheRefCount(pPager->pPCache)==0) ){
pagerUnlockAndRollback(pPager);
}
}
@@ -5066,13 +5188,24 @@ int sqlite3PagerAcquire(
Pager *pPager, /* The pager open on the database file */
Pgno pgno, /* Page number to fetch */
DbPage **ppPage, /* Write a pointer to the page here */
- int noContent /* Do not bother reading content from disk if true */
+ int flags /* PAGER_ACQUIRE_XXX flags */
){
- int rc;
- PgHdr *pPg;
+ int rc = SQLITE_OK;
+ PgHdr *pPg = 0;
+ u32 iFrame = 0; /* Frame to read from WAL file */
+ const int noContent = (flags & PAGER_ACQUIRE_NOCONTENT);
+
+ /* It is acceptable to use a read-only (mmap) page for any page except
+ ** page 1 if there is no write-transaction open or the ACQUIRE_READONLY
+ ** flag was specified by the caller. And so long as the db is not a
+ ** temporary or in-memory database. */
+ const int bMmapOk = (pgno!=1 && pPager->bUseFetch
+ && (pPager->eState==PAGER_READER || (flags & PAGER_ACQUIRE_READONLY))
+ );
assert( pPager->eState>=PAGER_READER );
assert( assert_pager_state(pPager) );
+ assert( noContent==0 || bMmapOk==0 );
if( pgno==0 ){
return SQLITE_CORRUPT_BKPT;
@@ -5083,6 +5216,39 @@ int sqlite3PagerAcquire(
if( pPager->errCode!=SQLITE_OK ){
rc = pPager->errCode;
}else{
+
+ if( bMmapOk && pagerUseWal(pPager) ){
+ rc = sqlite3WalFindFrame(pPager->pWal, pgno, &iFrame);
+ if( rc!=SQLITE_OK ) goto pager_acquire_err;
+ }
+
+ if( iFrame==0 && bMmapOk ){
+ void *pData = 0;
+
+ rc = sqlite3OsFetch(pPager->fd,
+ (i64)(pgno-1) * pPager->pageSize, pPager->pageSize, &pData
+ );
+
+ if( rc==SQLITE_OK && pData ){
+ if( pPager->eState>PAGER_READER ){
+ (void)sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &pPg);
+ }
+ if( pPg==0 ){
+ rc = pagerAcquireMapPage(pPager, pgno, pData, &pPg);
+ }else{
+ sqlite3OsUnfetch(pPager->fd, (i64)(pgno-1)*pPager->pageSize, pData);
+ }
+ if( pPg ){
+ assert( rc==SQLITE_OK );
+ *ppPage = pPg;
+ return SQLITE_OK;
+ }
+ }
+ if( rc!=SQLITE_OK ){
+ goto pager_acquire_err;
+ }
+ }
+
rc = sqlite3PcacheFetch(pPager->pPCache, pgno, 1, ppPage);
}
@@ -5141,9 +5307,13 @@ int sqlite3PagerAcquire(
memset(pPg->pData, 0, pPager->pageSize);
IOTRACE(("ZERO %p %d\n", pPager, pgno));
}else{
+ if( pagerUseWal(pPager) && bMmapOk==0 ){
+ rc = sqlite3WalFindFrame(pPager->pWal, pgno, &iFrame);
+ if( rc!=SQLITE_OK ) goto pager_acquire_err;
+ }
assert( pPg->pPager==pPager );
pPager->aStat[PAGER_STAT_MISS]++;
- rc = readDbPage(pPg);
+ rc = readDbPage(pPg, iFrame);
if( rc!=SQLITE_OK ){
goto pager_acquire_err;
}
@@ -5196,7 +5366,11 @@ DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){
void sqlite3PagerUnref(DbPage *pPg){
if( pPg ){
Pager *pPager = pPg->pPager;
- sqlite3PcacheRelease(pPg);
+ if( pPg->flags & PGHDR_MMAP ){
+ pagerReleaseMapPage(pPg);
+ }else{
+ sqlite3PcacheRelease(pPg);
+ }
pagerUnlockIfUnused(pPager);
}
}
@@ -5531,6 +5705,7 @@ int sqlite3PagerWrite(DbPage *pDbPage){
Pager *pPager = pPg->pPager;
Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize);
+ assert( (pPg->flags & PGHDR_MMAP)==0 );
assert( pPager->eState>=PAGER_WRITER_LOCKED );
assert( pPager->eState!=PAGER_ERROR );
assert( assert_pager_state(pPager) );
@@ -6087,7 +6262,7 @@ int sqlite3PagerRollback(Pager *pPager){
}
assert( pPager->eState==PAGER_READER || rc!=SQLITE_OK );
- assert( rc==SQLITE_OK || rc==SQLITE_FULL
+ assert( rc==SQLITE_OK || rc==SQLITE_FULL || rc==SQLITE_CORRUPT
|| rc==SQLITE_NOMEM || (rc&0xFF)==SQLITE_IOERR );
/* If an error occurs during a ROLLBACK, we can no longer trust the pager
@@ -6821,11 +6996,12 @@ static int pagerOpenWal(Pager *pPager){
** (e.g. due to malloc() failure), return an error code.
*/
if( rc==SQLITE_OK ){
- rc = sqlite3WalOpen(pPager->pVfs,
+ rc = sqlite3WalOpen(pPager->pVfs,
pPager->fd, pPager->zWal, pPager->exclusiveMode,
pPager->journalSizeLimit, &pPager->pWal
);
}
+ pagerFixMaplimit(pPager);
return rc;
}
@@ -6916,6 +7092,7 @@ int sqlite3PagerCloseWal(Pager *pPager){
rc = sqlite3WalClose(pPager->pWal, pPager->ckptSyncFlags,
pPager->pageSize, (u8*)pPager->pTmpSpace);
pPager->pWal = 0;
+ pagerFixMaplimit(pPager);
}
}
return rc;
diff --git a/src/pager.h b/src/pager.h
index 90f8e6af7..6f659136e 100644
--- a/src/pager.h
+++ b/src/pager.h
@@ -79,6 +79,12 @@ typedef struct PgHdr DbPage;
#define PAGER_JOURNALMODE_WAL 5 /* Use write-ahead logging */
/*
+** Flags that make up the mask passed to sqlite3PagerAcquire().
+*/
+#define PAGER_ACQUIRE_NOCONTENT 0x01 /* Do not load data from disk */
+#define PAGER_ACQUIRE_READONLY 0x02 /* Read-only page is acceptable */
+
+/*
** The remainder of this file contains the declarations of the functions
** that make up the Pager sub-system API. See source code comments for
** a detailed description of each routine.
@@ -102,6 +108,7 @@ void sqlite3PagerSetBusyhandler(Pager*, int(*)(void *), void *);
int sqlite3PagerSetPagesize(Pager*, u32*, int);
int sqlite3PagerMaxPageCount(Pager*, int);
void sqlite3PagerSetCachesize(Pager*, int);
+void sqlite3PagerSetMmapLimit(Pager *, sqlite3_int64);
void sqlite3PagerShrink(Pager*);
void sqlite3PagerSetSafetyLevel(Pager*,int,int,int);
int sqlite3PagerLockingMode(Pager *, int);
diff --git a/src/pcache.h b/src/pcache.h
index b9135fd85..f4d4ad71c 100644
--- a/src/pcache.h
+++ b/src/pcache.h
@@ -53,6 +53,8 @@ struct PgHdr {
#define PGHDR_REUSE_UNLIKELY 0x010 /* A hint that reuse is unlikely */
#define PGHDR_DONT_WRITE 0x020 /* Do not write content to disk */
+#define PGHDR_MMAP 0x040 /* This is an mmap page object */
+
/* Initialize and shutdown the page cache subsystem */
int sqlite3PcacheInitialize(void);
void sqlite3PcacheShutdown(void);
diff --git a/src/pragma.c b/src/pragma.c
index f6dadbd4a..3527c0974 100644
--- a/src/pragma.c
+++ b/src/pragma.c
@@ -745,6 +745,40 @@ void sqlite3Pragma(
}else
/*
+ ** PRAGMA [database.]mmap_limit(N)
+ **
+ ** Used to set mapping size limit. The mapping size limit is
+ ** used to limit the aggregate size of all memory mapped regions of the
+ ** database file. If this parameter is set to zero, then memory mapping
+ ** is not used at all. If N is negative, then the default memory map
+ ** limit determined by sqlite3_config(SQLITE_CONFIG_MMAP_LIMIT) is set.
+ ** The parameter N is measured in bytes.
+ **
+ ** This value is advisory. The underlying VFS is free to memory map
+ ** as little or as much as it wants. Except, if N is set to 0 then the
+ ** upper layers will never invoke the xFetch interfaces to the VFS.
+ */
+ if( sqlite3StrICmp(zLeft,"mmap_limit")==0 ){
+ sqlite3_int64 mx;
+ assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
+ if( zRight ){
+ int ii;
+ sqlite3Atoi64(zRight, &mx, 1000, SQLITE_UTF8);
+ if( mx<0 ) mx = sqlite3GlobalConfig.mxMmap;
+ if( pId2->n==0 ) db->mxMmap = mx;
+ for(ii=db->nDb-1; ii>=0; ii--){
+ if( db->aDb[ii].pBt && (ii==iDb || pId2->n==0) ){
+ sqlite3BtreeSetMmapLimit(db->aDb[ii].pBt, mx);
+ }
+ }
+ }
+ mx = -1;
+ if( sqlite3_file_control(db,zDb,SQLITE_FCNTL_MMAP_LIMIT,&mx)==SQLITE_OK ){
+ returnSingleInt(pParse, "mmap_limit", mx);
+ }
+ }else
+
+ /*
** PRAGMA temp_store
** PRAGMA temp_store = "default"|"memory"|"file"
**
diff --git a/src/shell.c b/src/shell.c
index c6d7fa3d5..6a0e04747 100644
--- a/src/shell.c
+++ b/src/shell.c
@@ -1553,6 +1553,43 @@ static int booleanValue(char *zArg){
}
/*
+** Interpret zArg as an integer value, possibly with suffixes.
+*/
+static sqlite3_int64 integerValue(const char *zArg){
+ sqlite3_int64 v = 0;
+ static const struct { char *zSuffix; int iMult; } aMult[] = {
+ { "KiB", 1024 },
+ { "MiB", 1024*1024 },
+ { "GiB", 1024*1024*1024 },
+ { "KB", 1000 },
+ { "MB", 1000000 },
+ { "GB", 1000000000 },
+ { "K", 1000 },
+ { "M", 1000000 },
+ { "G", 1000000000 },
+ };
+ int i;
+ int isNeg = 0;
+ if( zArg[0]=='-' ){
+ isNeg = 1;
+ zArg++;
+ }else if( zArg[0]=='+' ){
+ zArg++;
+ }
+ while( isdigit(zArg[0]) ){
+ v = v*10 + zArg[0] - '0';
+ zArg++;
+ }
+ for(i=0; i<sizeof(aMult)/sizeof(aMult[0]); i++){
+ if( sqlite3_stricmp(aMult[i].zSuffix, zArg)==0 ){
+ v *= aMult[i].iMult;
+ break;
+ }
+ }
+ return isNeg? -v : v;
+}
+
+/*
** Close an output file, assuming it is not stderr or stdout
*/
static void output_file_close(FILE *f){
@@ -2469,7 +2506,7 @@ static int do_meta_command(char *zLine, struct callback_data *p){
/* sqlite3_test_control(int, uint) */
case SQLITE_TESTCTRL_PENDING_BYTE:
if( nArg==3 ){
- unsigned int opt = (unsigned int)atoi(azArg[2]);
+ unsigned int opt = (unsigned int)integerValue(azArg[2]);
rc = sqlite3_test_control(testctrl, opt);
printf("%d (0x%08x)\n", rc, rc);
} else {
@@ -2561,7 +2598,7 @@ static int do_meta_command(char *zLine, struct callback_data *p){
#if defined(SQLITE_DEBUG) && defined(SQLITE_ENABLE_WHERETRACE)
if( c=='w' && strncmp(azArg[0], "wheretrace", n)==0 ){
extern int sqlite3WhereTrace;
- sqlite3WhereTrace = atoi(azArg[1]);
+ sqlite3WhereTrace = booleanValue(azArg[1]);
}else
#endif
@@ -2882,6 +2919,7 @@ static const char zOptions[] =
" -interactive force interactive I/O\n"
" -line set output mode to 'line'\n"
" -list set output mode to 'list'\n"
+ " -mmap N default mmap size set to N\n"
#ifdef SQLITE_ENABLE_MULTIPLEX
" -multiplex enable the multiplexor VFS\n"
#endif
@@ -3001,12 +3039,7 @@ int main(int argc, char **argv){
sqlite3_int64 szHeap;
zSize = cmdline_option_value(argc, argv, ++i);
- szHeap = atoi(zSize);
- for(j=0; (c = zSize[j])!=0; j++){
- if( c=='M' ){ szHeap *= 1000000; break; }
- if( c=='K' ){ szHeap *= 1000; break; }
- if( c=='G' ){ szHeap *= 1000000000; break; }
- }
+ szHeap = integerValue(zSize);
if( szHeap>0x7fff0000 ) szHeap = 0x7fff0000;
sqlite3_config(SQLITE_CONFIG_HEAP, malloc((int)szHeap), (int)szHeap, 64);
#endif
@@ -3026,6 +3059,8 @@ int main(int argc, char **argv){
extern int sqlite3_multiple_initialize(const char*,int);
sqlite3_multiplex_initialize(0, 1);
#endif
+ }else if( strcmp(z,"-mmap")==0 ){
+ sqlite3_config(SQLITE_CONFIG_MMAP_LIMIT, integerValue(cmdline_option_value(argc,argv,++i)));
}else if( strcmp(z,"-vfs")==0 ){
sqlite3_vfs *pVfs = sqlite3_vfs_find(cmdline_option_value(argc,argv,++i));
if( pVfs ){
@@ -3111,6 +3146,8 @@ int main(int argc, char **argv){
stdin_is_interactive = 0;
}else if( strcmp(z,"-heap")==0 ){
i++;
+ }else if( strcmp(z,"-mmap")==0 ){
+ i++;
}else if( strcmp(z,"-vfs")==0 ){
i++;
#ifdef SQLITE_ENABLE_VFSTRACE
diff --git a/src/sqlite.h.in b/src/sqlite.h.in
index 5c89f7e4e..a4fb1a8a9 100644
--- a/src/sqlite.h.in
+++ b/src/sqlite.h.in
@@ -470,6 +470,7 @@ int sqlite3_exec(
#define SQLITE_IOERR_SHMMAP (SQLITE_IOERR | (21<<8))
#define SQLITE_IOERR_SEEK (SQLITE_IOERR | (22<<8))
#define SQLITE_IOERR_DELETE_NOENT (SQLITE_IOERR | (23<<8))
+#define SQLITE_IOERR_MMAP (SQLITE_IOERR | (24<<8))
#define SQLITE_LOCKED_SHAREDCACHE (SQLITE_LOCKED | (1<<8))
#define SQLITE_BUSY_RECOVERY (SQLITE_BUSY | (1<<8))
#define SQLITE_CANTOPEN_NOTEMPDIR (SQLITE_CANTOPEN | (1<<8))
@@ -728,6 +729,9 @@ struct sqlite3_io_methods {
void (*xShmBarrier)(sqlite3_file*);
int (*xShmUnmap)(sqlite3_file*, int deleteFlag);
/* Methods above are valid for version 2 */
+ int (*xFetch)(sqlite3_file*, sqlite3_int64 iOfst, int iAmt, void **pp);
+ int (*xUnfetch)(sqlite3_file*, sqlite3_int64 iOfst, void *p);
+ /* Methods above are valid for version 3 */
/* Additional methods may be added in future releases */
};
@@ -882,6 +886,12 @@ struct sqlite3_io_methods {
** written into memory obtained from [sqlite3_malloc()]. The caller should
** invoke [sqlite3_free()] on the result to avoid a memory leak.
**
+** <li>[[SQLITE_FCNTL_MMAP_LIMIT]]
+** The argument is assumed to pointer to a value of type sqlite3_int64 that
+** is an advisory maximum number of bytes in the file to memory map. The
+** pointer is overwritten with the old value. The limit is not changed if
+** the original value pointed to is negative.
+**
** </ul>
*/
#define SQLITE_FCNTL_LOCKSTATE 1
@@ -900,6 +910,7 @@ struct sqlite3_io_methods {
#define SQLITE_FCNTL_PRAGMA 14
#define SQLITE_FCNTL_BUSYHANDLER 15
#define SQLITE_FCNTL_TEMPFILENAME 16
+#define SQLITE_FCNTL_MMAP_LIMIT 18
/*
** CAPI3REF: Mutex Handle
@@ -1628,6 +1639,19 @@ struct sqlite3_mem_methods {
** points to has just been executed. Or, if the fourth parameter is 2, then
** the connection being passed as the second parameter is being closed. The
** third parameter is passed NULL In this case.
+**
+** [[SQLITE_CONFIG_MMAP_LIMIT]]
+** <dt>SQLITE_CONFIG_MMAP_LIMIT
+** <dd>The sole argument should be a 64-bit integer (an sqlite3_int64) that
+** is the default maximum number of bytes of process address space that
+** should be used for accessing each database file using memory mapping.
+** The default setting can be overridden by each database connection using
+** either the [PRAGMA mmap_limit] command or the "mmaplimit" query parameter
+** on the [URI filename] when opening the databaes file or by using the
+** [SQLITE_FCNTL_MMAP_LIMIT] file control. The value set here overrides the
+** compile-time default that is set using the [SQLITE_DEFAULT_MMAP_LIMIT]
+** compile-time option. If the argument to this option is negative, then
+** the memory map limit is set to the compile-time default.
** </dl>
*/
#define SQLITE_CONFIG_SINGLETHREAD 1 /* nil */
@@ -1651,6 +1675,7 @@ struct sqlite3_mem_methods {
#define SQLITE_CONFIG_GETPCACHE2 19 /* sqlite3_pcache_methods2* */
#define SQLITE_CONFIG_COVERING_INDEX_SCAN 20 /* int */
#define SQLITE_CONFIG_SQLLOG 21 /* xSqllog, void* */
+#define SQLITE_CONFIG_MMAP_LIMIT 22 /* sqlite3_int64 */
/*
** CAPI3REF: Database Connection Configuration Options
diff --git a/src/sqliteInt.h b/src/sqliteInt.h
index 129c4c5ea..d89405179 100644
--- a/src/sqliteInt.h
+++ b/src/sqliteInt.h
@@ -834,6 +834,7 @@ struct sqlite3 {
int nDb; /* Number of backends currently in use */
int flags; /* Miscellaneous flags. See below */
i64 lastRowid; /* ROWID of most recent insert (see above) */
+ i64 mxMmap; /* Default mmap_limit setting */
unsigned int openFlags; /* Flags passed to sqlite3_vfs.xOpen() */
int errCode; /* Most recent error code (SQLITE_*) */
int errMask; /* & result codes with this before returning */
@@ -2505,6 +2506,7 @@ struct Sqlite3Config {
void *pHeap; /* Heap storage space */
int nHeap; /* Size of pHeap[] */
int mnReq, mxReq; /* Min and max heap requests sizes */
+ sqlite3_int64 mxMmap; /* Maximum mmap() space per open file */
void *pScratch; /* Scratch memory */
int szScratch; /* Size of each scratch buffer */
int nScratch; /* Number of scratch buffers */
diff --git a/src/sqliteLimit.h b/src/sqliteLimit.h
index c7aee53ce..d3a3b4ac9 100644
--- a/src/sqliteLimit.h
+++ b/src/sqliteLimit.h
@@ -206,3 +206,23 @@
#ifndef SQLITE_MAX_TRIGGER_DEPTH
# define SQLITE_MAX_TRIGGER_DEPTH 1000
#endif
+
+/*
+** Default maximum size of memory used by xFetch in the VFS.
+*/
+#ifdef __APPLE__
+# include <TargetConditionals.h>
+# if TARGET_OS_IPHONE
+# define SQLITE_DEFAULT_MMAP_LIMIT 0
+# endif
+#endif
+#ifndef SQLITE_DEFAULT_MMAP_LIMIT
+# if defined(__linux__) \
+ || defined(_WIN32) \
+ || (defined(__APPLE__) && defined(__MACH__)) \
+ || defined(__sun)
+# define SQLITE_DEFAULT_MMAP_LIMIT 268435456 /* = 256*1024*1024 */
+# else
+# define SQLITE_DEFAULT_MMAP_LIMIT 0
+# endif
+#endif
diff --git a/src/test1.c b/src/test1.c
index 0bace8442..fb340f92d 100644
--- a/src/test1.c
+++ b/src/test1.c
@@ -5844,6 +5844,31 @@ static int test_test_control(
return TCL_OK;
}
+#if SQLITE_OS_UNIX
+#include <sys/time.h>
+#include <sys/resource.h>
+
+static int test_getrusage(
+ void * clientData,
+ Tcl_Interp *interp,
+ int objc,
+ Tcl_Obj *CONST objv[]
+){
+ char buf[1024];
+ struct rusage r;
+ memset(&r, 0, sizeof(r));
+ getrusage(RUSAGE_SELF, &r);
+
+ sprintf(buf, "ru_utime=%d.%06d ru_stime=%d.%06d ru_minflt=%d ru_majflt=%d",
+ (int)r.ru_utime.tv_sec, (int)r.ru_utime.tv_usec,
+ (int)r.ru_stime.tv_sec, (int)r.ru_stime.tv_usec,
+ (int)r.ru_minflt, (int)r.ru_majflt
+ );
+ Tcl_SetObjResult(interp, Tcl_NewStringObj(buf, -1));
+ return TCL_OK;
+}
+#endif
+
#if SQLITE_OS_WIN
/*
** Information passed from the main thread into the windows file locker
@@ -6233,6 +6258,9 @@ int Sqlitetest1_Init(Tcl_Interp *interp){
{ "print_explain_query_plan", test_print_eqp, 0 },
#endif
{ "sqlite3_test_control", test_test_control },
+#if SQLITE_OS_UNIX
+ { "getrusage", test_getrusage },
+#endif
};
static int bitmask_size = sizeof(Bitmask)*8;
int i;
diff --git a/src/test_syscall.c b/src/test_syscall.c
index d484f22db..3d117fb2a 100644
--- a/src/test_syscall.c
+++ b/src/test_syscall.c
@@ -23,7 +23,7 @@
**
** open close access getcwd stat fstat
** ftruncate fcntl read pread pread64 write
-** pwrite pwrite64 fchmod fallocate
+** pwrite pwrite64 fchmod fallocate mmap
**
** test_syscall uninstall
** Uninstall all wrapper functions.
@@ -81,6 +81,7 @@
/* From test1.c */
extern const char *sqlite3TestErrorName(int);
+#include <sys/mman.h>
#include <sys/types.h>
#include <errno.h>
@@ -106,7 +107,8 @@ static int ts_pwrite(int fd, const void *aBuf, size_t nBuf, off_t off);
static int ts_pwrite64(int fd, const void *aBuf, size_t nBuf, off_t off);
static int ts_fchmod(int fd, mode_t mode);
static int ts_fallocate(int fd, off_t off, off_t len);
-
+static void *ts_mmap(void *, size_t, int, int, int, off_t);
+static void *ts_mremap(void*, size_t, size_t, int, ...);
struct TestSyscallArray {
const char *zName;
@@ -131,6 +133,8 @@ struct TestSyscallArray {
/* 13 */ { "pwrite64", (sqlite3_syscall_ptr)ts_pwrite64, 0, 0, 0 },
/* 14 */ { "fchmod", (sqlite3_syscall_ptr)ts_fchmod, 0, 0, 0 },
/* 15 */ { "fallocate", (sqlite3_syscall_ptr)ts_fallocate, 0, 0, 0 },
+ /* 16 */ { "mmap", (sqlite3_syscall_ptr)ts_mmap, 0, 0, 0 },
+ /* 17 */ { "mremap", (sqlite3_syscall_ptr)ts_mremap, 0, 0, 0 },
{ 0, 0, 0, 0, 0 }
};
@@ -152,6 +156,8 @@ struct TestSyscallArray {
aSyscall[13].xOrig)
#define orig_fchmod ((int(*)(int,mode_t))aSyscall[14].xOrig)
#define orig_fallocate ((int(*)(int,off_t,off_t))aSyscall[15].xOrig)
+#define orig_mmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[16].xOrig)
+#define orig_mremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[17].xOrig)
/*
** This function is called exactly once from within each invocation of a
@@ -377,6 +383,31 @@ static int ts_fallocate(int fd, off_t off, off_t len){
return orig_fallocate(fd, off, len);
}
+static void *ts_mmap(
+ void *pAddr,
+ size_t nByte,
+ int prot,
+ int flags,
+ int fd,
+ off_t iOff
+){
+ if( tsIsFailErrno("mmap") ){
+ return MAP_FAILED;
+ }
+ return orig_mmap(pAddr, nByte, prot, flags, fd, iOff);
+}
+
+static void *ts_mremap(void *a, size_t b, size_t c, int d, ...){
+ va_list ap;
+ void *pArg;
+ if( tsIsFailErrno("mremap") ){
+ return MAP_FAILED;
+ }
+ va_start(ap, d);
+ pArg = va_arg(ap, void *);
+ return orig_mremap(a, b, c, d, pArg);
+}
+
static int test_syscall_install(
void * clientData,
Tcl_Interp *interp,
diff --git a/src/wal.c b/src/wal.c
index 0d7271bf0..881f0a4c7 100644
--- a/src/wal.c
+++ b/src/wal.c
@@ -1208,7 +1208,7 @@ finished:
*/
if( pWal->hdr.nPage ){
sqlite3_log(SQLITE_OK, "Recovered %d frames from WAL file %s",
- pWal->hdr.nPage, pWal->zWalName
+ pWal->hdr.mxFrame, pWal->zWalName
);
}
}
@@ -1722,8 +1722,8 @@ static int walCheckpoint(
rc = sqlite3OsSync(pWal->pWalFd, sync_flags);
}
- /* If the database file may grow as a result of this checkpoint, hint
- ** about the eventual size of the db file to the VFS layer.
+ /* If the database may grow as a result of this checkpoint, hint
+ ** about the eventual size of the db file to the VFS layer.
*/
if( rc==SQLITE_OK ){
i64 nReq = ((i64)mxPage * szPage);
@@ -1733,6 +1733,7 @@ static int walCheckpoint(
}
}
+
/* Iterate through the contents of the WAL, copying data to the db file. */
while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){
i64 iOffset;
@@ -2287,19 +2288,17 @@ void sqlite3WalEndReadTransaction(Wal *pWal){
}
/*
-** Read a page from the WAL, if it is present in the WAL and if the
-** current read transaction is configured to use the WAL.
+** Search the wal file for page pgno. If found, set *piRead to the frame that
+** contains the page. Otherwise, if pgno is not in the wal file, set *piRead
+** to zero.
**
-** The *pInWal is set to 1 if the requested page is in the WAL and
-** has been loaded. Or *pInWal is set to 0 if the page was not in
-** the WAL and needs to be read out of the database.
+** Return SQLITE_OK if successful, or an error code if an error occurs. If an
+** error does occur, the final value of *piRead is undefined.
*/
-int sqlite3WalRead(
+int sqlite3WalFindFrame(
Wal *pWal, /* WAL handle */
Pgno pgno, /* Database page number to read data for */
- int *pInWal, /* OUT: True if data is read from WAL */
- int nOut, /* Size of buffer pOut in bytes */
- u8 *pOut /* Buffer to write page data to */
+ u32 *piRead /* OUT: Frame number (or zero) */
){
u32 iRead = 0; /* If !=0, WAL frame to return data from */
u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */
@@ -2315,7 +2314,7 @@ int sqlite3WalRead(
** WAL were empty.
*/
if( iLast==0 || pWal->readLock==0 ){
- *pInWal = 0;
+ *piRead = 0;
return SQLITE_OK;
}
@@ -2386,26 +2385,31 @@ int sqlite3WalRead(
}
#endif
- /* If iRead is non-zero, then it is the log frame number that contains the
- ** required page. Read and return data from the log file.
- */
- if( iRead ){
- int sz;
- i64 iOffset;
- sz = pWal->hdr.szPage;
- sz = (sz&0xfe00) + ((sz&0x0001)<<16);
- testcase( sz<=32768 );
- testcase( sz>=65536 );
- iOffset = walFrameOffset(iRead, sz) + WAL_FRAME_HDRSIZE;
- *pInWal = 1;
- /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */
- return sqlite3OsRead(pWal->pWalFd, pOut, (nOut>sz ? sz : nOut), iOffset);
- }
-
- *pInWal = 0;
+ *piRead = iRead;
return SQLITE_OK;
}
+/*
+** Read the contents of frame iRead from the wal file into buffer pOut
+** (which is nOut bytes in size). Return SQLITE_OK if successful, or an
+** error code otherwise.
+*/
+int sqlite3WalReadFrame(
+ Wal *pWal, /* WAL handle */
+ u32 iRead, /* Frame to read */
+ int nOut, /* Size of buffer pOut in bytes */
+ u8 *pOut /* Buffer to write page data to */
+){
+ int sz;
+ i64 iOffset;
+ sz = pWal->hdr.szPage;
+ sz = (sz&0xfe00) + ((sz&0x0001)<<16);
+ testcase( sz<=32768 );
+ testcase( sz>=65536 );
+ iOffset = walFrameOffset(iRead, sz) + WAL_FRAME_HDRSIZE;
+ /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */
+ return sqlite3OsRead(pWal->pWalFd, pOut, (nOut>sz ? sz : nOut), iOffset);
+}
/*
** Return the size of the database in pages (or zero, if unknown).
@@ -2952,6 +2956,9 @@ int sqlite3WalCheckpoint(
/* Read the wal-index header. */
if( rc==SQLITE_OK ){
rc = walIndexReadHdr(pWal, &isChanged);
+ if( isChanged && pWal->pDbFd->pMethods->iVersion>=3 ){
+ sqlite3OsUnfetch(pWal->pDbFd, 0, 0);
+ }
}
/* Copy data from the log to the database file. */
diff --git a/src/wal.h b/src/wal.h
index a848de152..ff7624af6 100644
--- a/src/wal.h
+++ b/src/wal.h
@@ -31,7 +31,6 @@
# define sqlite3WalClose(w,x,y,z) 0
# define sqlite3WalBeginReadTransaction(y,z) 0
# define sqlite3WalEndReadTransaction(z)
-# define sqlite3WalRead(v,w,x,y,z) 0
# define sqlite3WalDbsize(y) 0
# define sqlite3WalBeginWriteTransaction(y) 0
# define sqlite3WalEndWriteTransaction(x) 0
@@ -71,7 +70,8 @@ int sqlite3WalBeginReadTransaction(Wal *pWal, int *);
void sqlite3WalEndReadTransaction(Wal *pWal);
/* Read a page from the write-ahead log, if it is present. */
-int sqlite3WalRead(Wal *pWal, Pgno pgno, int *pInWal, int nOut, u8 *pOut);
+int sqlite3WalFindFrame(Wal *, Pgno, u32 *);
+int sqlite3WalReadFrame(Wal *, u32, int, u8 *);
/* If the WAL is not empty, return the size of the database. */
Pgno sqlite3WalDbsize(Wal *pWal);