aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backup.c180
-rw-r--r--src/btree.c44
-rw-r--r--src/btree.h1
-rw-r--r--src/build.c82
-rw-r--r--src/ctime.c6
-rw-r--r--src/fkey.c17
-rw-r--r--src/main.c10
-rw-r--r--src/os_unix.c266
-rw-r--r--src/os_win.c327
-rw-r--r--src/pager.c30
-rw-r--r--src/pager.h4
-rw-r--r--src/pcache1.c257
-rw-r--r--src/sqlite.h.in27
-rw-r--r--src/sqliteInt.h20
-rw-r--r--src/tclsqlite.c2
-rw-r--r--src/test1.c64
-rw-r--r--src/test6.c10
-rw-r--r--src/test_config.c12
-rw-r--r--src/test_malloc.c4
-rw-r--r--src/test_multiplex.c9
-rw-r--r--src/test_quota.c171
-rw-r--r--src/test_rtree.c2
-rw-r--r--src/test_syscall.c2
-rw-r--r--src/test_thread.c2
-rw-r--r--src/test_vfs.c21
-rw-r--r--src/vdbe.c129
-rw-r--r--src/vdbe.h8
-rw-r--r--src/vdbeInt.h23
-rw-r--r--src/vdbeaux.c26
-rw-r--r--src/vdbemem.c36
-rw-r--r--src/vdbesort.c711
-rw-r--r--src/wal.c4
-rw-r--r--src/where.c2
33 files changed, 2134 insertions, 375 deletions
diff --git a/src/backup.c b/src/backup.c
index 4d7ae3183..70a782665 100644
--- a/src/backup.c
+++ b/src/backup.c
@@ -410,102 +410,106 @@ int sqlite3_backup_step(sqlite3_backup *p, int nPage){
** the case where the source and destination databases have the
** same schema version.
*/
- if( rc==SQLITE_DONE
- && (rc = sqlite3BtreeUpdateMeta(p->pDest,1,p->iDestSchema+1))==SQLITE_OK
- ){
- int nDestTruncate;
-
- if( p->pDestDb ){
- sqlite3ResetInternalSchema(p->pDestDb, -1);
- }
-
- /* Set nDestTruncate to the final number of pages in the destination
- ** database. The complication here is that the destination page
- ** size may be different to the source page size.
- **
- ** If the source page size is smaller than the destination page size,
- ** round up. In this case the call to sqlite3OsTruncate() below will
- ** fix the size of the file. However it is important to call
- ** sqlite3PagerTruncateImage() here so that any pages in the
- ** destination file that lie beyond the nDestTruncate page mark are
- ** journalled by PagerCommitPhaseOne() before they are destroyed
- ** by the file truncation.
- */
- assert( pgszSrc==sqlite3BtreeGetPageSize(p->pSrc) );
- assert( pgszDest==sqlite3BtreeGetPageSize(p->pDest) );
- if( pgszSrc<pgszDest ){
- int ratio = pgszDest/pgszSrc;
- nDestTruncate = (nSrcPage+ratio-1)/ratio;
- if( nDestTruncate==(int)PENDING_BYTE_PAGE(p->pDest->pBt) ){
- nDestTruncate--;
+ if( rc==SQLITE_DONE ){
+ rc = sqlite3BtreeUpdateMeta(p->pDest,1,p->iDestSchema+1);
+ if( rc==SQLITE_OK ){
+ if( p->pDestDb ){
+ sqlite3ResetInternalSchema(p->pDestDb, -1);
+ }
+ if( destMode==PAGER_JOURNALMODE_WAL ){
+ rc = sqlite3BtreeSetVersion(p->pDest, 2);
}
- }else{
- nDestTruncate = nSrcPage * (pgszSrc/pgszDest);
}
- sqlite3PagerTruncateImage(pDestPager, nDestTruncate);
-
- if( pgszSrc<pgszDest ){
- /* If the source page-size is smaller than the destination page-size,
- ** two extra things may need to happen:
- **
- ** * The destination may need to be truncated, and
+ if( rc==SQLITE_OK ){
+ int nDestTruncate;
+ /* Set nDestTruncate to the final number of pages in the destination
+ ** database. The complication here is that the destination page
+ ** size may be different to the source page size.
**
- ** * Data stored on the pages immediately following the
- ** pending-byte page in the source database may need to be
- ** copied into the destination database.
+ ** If the source page size is smaller than the destination page size,
+ ** round up. In this case the call to sqlite3OsTruncate() below will
+ ** fix the size of the file. However it is important to call
+ ** sqlite3PagerTruncateImage() here so that any pages in the
+ ** destination file that lie beyond the nDestTruncate page mark are
+ ** journalled by PagerCommitPhaseOne() before they are destroyed
+ ** by the file truncation.
*/
- const i64 iSize = (i64)pgszSrc * (i64)nSrcPage;
- sqlite3_file * const pFile = sqlite3PagerFile(pDestPager);
- i64 iOff;
- i64 iEnd;
-
- assert( pFile );
- assert( (i64)nDestTruncate*(i64)pgszDest >= iSize || (
- nDestTruncate==(int)(PENDING_BYTE_PAGE(p->pDest->pBt)-1)
- && iSize>=PENDING_BYTE && iSize<=PENDING_BYTE+pgszDest
- ));
-
- /* This call ensures that all data required to recreate the original
- ** database has been stored in the journal for pDestPager and the
- ** journal synced to disk. So at this point we may safely modify
- ** the database file in any way, knowing that if a power failure
- ** occurs, the original database will be reconstructed from the
- ** journal file. */
- rc = sqlite3PagerCommitPhaseOne(pDestPager, 0, 1);
-
- /* Write the extra pages and truncate the database file as required. */
- iEnd = MIN(PENDING_BYTE + pgszDest, iSize);
- for(
- iOff=PENDING_BYTE+pgszSrc;
- rc==SQLITE_OK && iOff<iEnd;
- iOff+=pgszSrc
- ){
- PgHdr *pSrcPg = 0;
- const Pgno iSrcPg = (Pgno)((iOff/pgszSrc)+1);
- rc = sqlite3PagerGet(pSrcPager, iSrcPg, &pSrcPg);
- if( rc==SQLITE_OK ){
- u8 *zData = sqlite3PagerGetData(pSrcPg);
- rc = sqlite3OsWrite(pFile, zData, pgszSrc, iOff);
+ assert( pgszSrc==sqlite3BtreeGetPageSize(p->pSrc) );
+ assert( pgszDest==sqlite3BtreeGetPageSize(p->pDest) );
+ if( pgszSrc<pgszDest ){
+ int ratio = pgszDest/pgszSrc;
+ nDestTruncate = (nSrcPage+ratio-1)/ratio;
+ if( nDestTruncate==(int)PENDING_BYTE_PAGE(p->pDest->pBt) ){
+ nDestTruncate--;
}
- sqlite3PagerUnref(pSrcPg);
- }
- if( rc==SQLITE_OK ){
- rc = backupTruncateFile(pFile, iSize);
+ }else{
+ nDestTruncate = nSrcPage * (pgszSrc/pgszDest);
}
+ sqlite3PagerTruncateImage(pDestPager, nDestTruncate);
+
+ if( pgszSrc<pgszDest ){
+ /* If the source page-size is smaller than the destination page-size,
+ ** two extra things may need to happen:
+ **
+ ** * The destination may need to be truncated, and
+ **
+ ** * Data stored on the pages immediately following the
+ ** pending-byte page in the source database may need to be
+ ** copied into the destination database.
+ */
+ const i64 iSize = (i64)pgszSrc * (i64)nSrcPage;
+ sqlite3_file * const pFile = sqlite3PagerFile(pDestPager);
+ i64 iOff;
+ i64 iEnd;
+
+ assert( pFile );
+ assert( (i64)nDestTruncate*(i64)pgszDest >= iSize || (
+ nDestTruncate==(int)(PENDING_BYTE_PAGE(p->pDest->pBt)-1)
+ && iSize>=PENDING_BYTE && iSize<=PENDING_BYTE+pgszDest
+ ));
+
+ /* This call ensures that all data required to recreate the original
+ ** database has been stored in the journal for pDestPager and the
+ ** journal synced to disk. So at this point we may safely modify
+ ** the database file in any way, knowing that if a power failure
+ ** occurs, the original database will be reconstructed from the
+ ** journal file. */
+ rc = sqlite3PagerCommitPhaseOne(pDestPager, 0, 1);
+
+ /* Write the extra pages and truncate the database file as required */
+ iEnd = MIN(PENDING_BYTE + pgszDest, iSize);
+ for(
+ iOff=PENDING_BYTE+pgszSrc;
+ rc==SQLITE_OK && iOff<iEnd;
+ iOff+=pgszSrc
+ ){
+ PgHdr *pSrcPg = 0;
+ const Pgno iSrcPg = (Pgno)((iOff/pgszSrc)+1);
+ rc = sqlite3PagerGet(pSrcPager, iSrcPg, &pSrcPg);
+ if( rc==SQLITE_OK ){
+ u8 *zData = sqlite3PagerGetData(pSrcPg);
+ rc = sqlite3OsWrite(pFile, zData, pgszSrc, iOff);
+ }
+ sqlite3PagerUnref(pSrcPg);
+ }
+ if( rc==SQLITE_OK ){
+ rc = backupTruncateFile(pFile, iSize);
+ }
- /* Sync the database file to disk. */
- if( rc==SQLITE_OK ){
- rc = sqlite3PagerSync(pDestPager);
+ /* Sync the database file to disk. */
+ if( rc==SQLITE_OK ){
+ rc = sqlite3PagerSync(pDestPager);
+ }
+ }else{
+ rc = sqlite3PagerCommitPhaseOne(pDestPager, 0, 0);
+ }
+
+ /* Finish committing the transaction to the destination database. */
+ if( SQLITE_OK==rc
+ && SQLITE_OK==(rc = sqlite3BtreeCommitPhaseTwo(p->pDest, 0))
+ ){
+ rc = SQLITE_DONE;
}
- }else{
- rc = sqlite3PagerCommitPhaseOne(pDestPager, 0, 0);
- }
-
- /* Finish committing the transaction to the destination database. */
- if( SQLITE_OK==rc
- && SQLITE_OK==(rc = sqlite3BtreeCommitPhaseTwo(p->pDest, 0))
- ){
- rc = SQLITE_DONE;
}
}
diff --git a/src/btree.c b/src/btree.c
index 3d7162dbb..d77fce4c8 100644
--- a/src/btree.c
+++ b/src/btree.c
@@ -1734,11 +1734,22 @@ int sqlite3BtreeOpen(
/* A BTREE_SINGLE database is always a temporary and/or ephemeral */
assert( (flags & BTREE_SINGLE)==0 || isTempDb );
+ /* The BTREE_SORTER flag is only used if SQLITE_OMIT_MERGE_SORT is undef */
+#ifdef SQLITE_OMIT_MERGE_SORT
+ assert( (flags & BTREE_SORTER)==0 );
+#endif
+
+ /* BTREE_SORTER is always on a BTREE_SINGLE, BTREE_OMIT_JOURNAL */
+ assert( (flags & BTREE_SORTER)==0 ||
+ (flags & (BTREE_SINGLE|BTREE_OMIT_JOURNAL))
+ ==(BTREE_SINGLE|BTREE_OMIT_JOURNAL) );
+
if( db->flags & SQLITE_NoReadlock ){
flags |= BTREE_NO_READLOCK;
}
if( isMemdb ){
flags |= BTREE_MEMORY;
+ flags &= ~BTREE_SORTER;
}
if( (vfsFlags & SQLITE_OPEN_MAIN_DB)!=0 && (isMemdb || isTempDb) ){
vfsFlags = (vfsFlags & ~SQLITE_OPEN_MAIN_DB) | SQLITE_OPEN_TEMP_DB;
@@ -3468,7 +3479,8 @@ static int btreeCursor(
return SQLITE_READONLY;
}
if( iTable==1 && btreePagecount(pBt)==0 ){
- return SQLITE_EMPTY;
+ assert( wrFlag==0 );
+ iTable = 0;
}
/* Now that no other errors can occur, finish filling in the BtCursor
@@ -4222,6 +4234,9 @@ static int moveToRoot(BtCursor *pCur){
releasePage(pCur->apPage[i]);
}
pCur->iPage = 0;
+ }else if( pCur->pgnoRoot==0 ){
+ pCur->eState = CURSOR_INVALID;
+ return SQLITE_OK;
}else{
rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->apPage[0]);
if( rc!=SQLITE_OK ){
@@ -4331,7 +4346,7 @@ int sqlite3BtreeFirst(BtCursor *pCur, int *pRes){
rc = moveToRoot(pCur);
if( rc==SQLITE_OK ){
if( pCur->eState==CURSOR_INVALID ){
- assert( pCur->apPage[pCur->iPage]->nCell==0 );
+ assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 );
*pRes = 1;
}else{
assert( pCur->apPage[pCur->iPage]->nCell>0 );
@@ -4370,7 +4385,7 @@ int sqlite3BtreeLast(BtCursor *pCur, int *pRes){
rc = moveToRoot(pCur);
if( rc==SQLITE_OK ){
if( CURSOR_INVALID==pCur->eState ){
- assert( pCur->apPage[pCur->iPage]->nCell==0 );
+ assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 );
*pRes = 1;
}else{
assert( pCur->eState==CURSOR_VALID );
@@ -4443,12 +4458,12 @@ int sqlite3BtreeMovetoUnpacked(
if( rc ){
return rc;
}
- assert( pCur->apPage[pCur->iPage] );
- assert( pCur->apPage[pCur->iPage]->isInit );
- assert( pCur->apPage[pCur->iPage]->nCell>0 || pCur->eState==CURSOR_INVALID );
+ assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage] );
+ assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->isInit );
+ assert( pCur->eState==CURSOR_INVALID || pCur->apPage[pCur->iPage]->nCell>0 );
if( pCur->eState==CURSOR_INVALID ){
*pRes = -1;
- assert( pCur->apPage[pCur->iPage]->nCell==0 );
+ assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 );
return SQLITE_OK;
}
assert( pCur->apPage[0]->intKey || pIdxKey );
@@ -7277,9 +7292,16 @@ static int btreeDropTable(Btree *p, Pgno iTable, int *piMoved){
return rc;
}
int sqlite3BtreeDropTable(Btree *p, int iTable, int *piMoved){
+ BtShared *pBt = p->pBt;
int rc;
sqlite3BtreeEnter(p);
- rc = btreeDropTable(p, iTable, piMoved);
+ if( (pBt->openFlags&BTREE_SINGLE) ){
+ pBt->nPage = 0;
+ sqlite3PagerTruncateImage(pBt->pPager, 1);
+ rc = newDatabase(pBt);
+ }else{
+ rc = btreeDropTable(p, iTable, piMoved);
+ }
sqlite3BtreeLeave(p);
return rc;
}
@@ -7358,6 +7380,11 @@ int sqlite3BtreeUpdateMeta(Btree *p, int idx, u32 iMeta){
int sqlite3BtreeCount(BtCursor *pCur, i64 *pnEntry){
i64 nEntry = 0; /* Value to return in *pnEntry */
int rc; /* Return code */
+
+ if( pCur->pgnoRoot==0 ){
+ *pnEntry = 0;
+ return SQLITE_OK;
+ }
rc = moveToRoot(pCur);
/* Unless an error occurs, the following loop runs one iteration for each
@@ -8142,7 +8169,6 @@ int sqlite3BtreeSetVersion(Btree *pBtree, int iVersion){
BtShared *pBt = pBtree->pBt;
int rc; /* Return code */
- assert( pBtree->inTrans==TRANS_NONE );
assert( iVersion==1 || iVersion==2 );
/* If setting the version fields to 1, do not automatically open the
diff --git a/src/btree.h b/src/btree.h
index 9e3a73b3b..ce19826ad 100644
--- a/src/btree.h
+++ b/src/btree.h
@@ -61,6 +61,7 @@ int sqlite3BtreeOpen(
#define BTREE_MEMORY 4 /* This is an in-memory DB */
#define BTREE_SINGLE 8 /* The file contains at most 1 b-tree */
#define BTREE_UNORDERED 16 /* Use of a hash implementation is OK */
+#define BTREE_SORTER 32 /* Used as accumulator in external merge sort */
int sqlite3BtreeClose(Btree*);
int sqlite3BtreeSetCacheSize(Btree*,int);
diff --git a/src/build.c b/src/build.c
index 455b35b56..29fbf9271 100644
--- a/src/build.c
+++ b/src/build.c
@@ -1674,7 +1674,7 @@ void sqlite3CreateView(
const char *z;
Token sEnd;
DbFixer sFix;
- Token *pName;
+ Token *pName = 0;
int iDb;
sqlite3 *db = pParse->db;
@@ -1981,6 +1981,29 @@ static void destroyTable(Parse *pParse, Table *pTab){
}
/*
+** Remove entries from the sqlite_stat1 and sqlite_stat2 tables
+** after a DROP INDEX or DROP TABLE command.
+*/
+static void sqlite3ClearStatTables(
+ Parse *pParse, /* The parsing context */
+ int iDb, /* The database number */
+ const char *zType, /* "idx" or "tbl" */
+ const char *zName /* Name of index or table */
+){
+ static const char *azStatTab[] = { "sqlite_stat1", "sqlite_stat2" };
+ int i;
+ const char *zDbName = pParse->db->aDb[iDb].zName;
+ for(i=0; i<ArraySize(azStatTab); i++){
+ if( sqlite3FindTable(pParse->db, azStatTab[i], zDbName) ){
+ sqlite3NestedParse(pParse,
+ "DELETE FROM %Q.%s WHERE %s=%Q",
+ zDbName, azStatTab[i], zType, zName
+ );
+ }
+ }
+}
+
+/*
** This routine is called to do the work of a DROP TABLE statement.
** pName is the name of the table to be dropped.
*/
@@ -2119,14 +2142,7 @@ void sqlite3DropTable(Parse *pParse, SrcList *pName, int isView, int noErr){
sqlite3NestedParse(pParse,
"DELETE FROM %Q.%s WHERE tbl_name=%Q and type!='trigger'",
pDb->zName, SCHEMA_TABLE(iDb), pTab->zName);
-
- /* Drop any statistics from the sqlite_stat1 table, if it exists */
- if( sqlite3FindTable(db, "sqlite_stat1", db->aDb[iDb].zName) ){
- sqlite3NestedParse(pParse,
- "DELETE FROM %Q.sqlite_stat1 WHERE tbl=%Q", pDb->zName, pTab->zName
- );
- }
-
+ sqlite3ClearStatTables(pParse, iDb, "tbl", pTab->zName);
if( !isView && !IsVirtual(pTab) ){
destroyTable(pParse, pTab);
}
@@ -2308,6 +2324,7 @@ static void sqlite3RefillIndex(Parse *pParse, Index *pIndex, int memRootPage){
Table *pTab = pIndex->pTable; /* The table that is indexed */
int iTab = pParse->nTab++; /* Btree cursor used for pTab */
int iIdx = pParse->nTab++; /* Btree cursor used for pIndex */
+ int iSorter = iTab; /* Cursor opened by OpenSorter (if in use) */
int addr1; /* Address of top of loop */
int tnum; /* Root page of index */
Vdbe *v; /* Generate code into this virtual machine */
@@ -2317,6 +2334,15 @@ static void sqlite3RefillIndex(Parse *pParse, Index *pIndex, int memRootPage){
sqlite3 *db = pParse->db; /* The database connection */
int iDb = sqlite3SchemaToIndex(db, pIndex->pSchema);
+ /* Set bUseSorter to use OP_OpenSorter, or clear it to insert directly
+ ** into the index. The sorter is used unless either OMIT_MERGE_SORT is
+ ** defined or the system is configured to store temp files in-memory. */
+#ifdef SQLITE_OMIT_MERGE_SORT
+ static const int bUseSorter = 0;
+#else
+ const int bUseSorter = !sqlite3TempInMemory(pParse->db);
+#endif
+
#ifndef SQLITE_OMIT_AUTHORIZATION
if( sqlite3AuthCheck(pParse, SQLITE_REINDEX, pIndex->zName, 0,
db->aDb[iDb].zName ) ){
@@ -2341,10 +2367,29 @@ static void sqlite3RefillIndex(Parse *pParse, Index *pIndex, int memRootPage){
if( memRootPage>=0 ){
sqlite3VdbeChangeP5(v, 1);
}
+
+ /* Open the sorter cursor if we are to use one. */
+ if( bUseSorter ){
+ iSorter = pParse->nTab++;
+ sqlite3VdbeAddOp4(v, OP_OpenSorter, iSorter, 0, 0, (char*)pKey, P4_KEYINFO);
+ sqlite3VdbeChangeP5(v, BTREE_SORTER);
+ }
+
+ /* Open the table. Loop through all rows of the table, inserting index
+ ** records into the sorter. */
sqlite3OpenTable(pParse, iTab, iDb, pTab, OP_OpenRead);
addr1 = sqlite3VdbeAddOp2(v, OP_Rewind, iTab, 0);
regRecord = sqlite3GetTempReg(pParse);
regIdxKey = sqlite3GenerateIndexKey(pParse, pIndex, iTab, regRecord, 1);
+
+ if( bUseSorter ){
+ sqlite3VdbeAddOp2(v, OP_IdxInsert, iSorter, regRecord);
+ sqlite3VdbeAddOp2(v, OP_Next, iTab, addr1+1);
+ sqlite3VdbeJumpHere(v, addr1);
+ addr1 = sqlite3VdbeAddOp2(v, OP_Sort, iSorter, 0);
+ sqlite3VdbeAddOp2(v, OP_RowKey, iSorter, regRecord);
+ }
+
if( pIndex->onError!=OE_None ){
const int regRowid = regIdxKey + pIndex->nColumn;
const int j2 = sqlite3VdbeCurrentAddr(v) + 2;
@@ -2363,13 +2408,15 @@ static void sqlite3RefillIndex(Parse *pParse, Index *pIndex, int memRootPage){
sqlite3HaltConstraint(
pParse, OE_Abort, "indexed columns are not unique", P4_STATIC);
}
- sqlite3VdbeAddOp2(v, OP_IdxInsert, iIdx, regRecord);
+ sqlite3VdbeAddOp3(v, OP_IdxInsert, iIdx, regRecord, bUseSorter);
sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT);
sqlite3ReleaseTempReg(pParse, regRecord);
- sqlite3VdbeAddOp2(v, OP_Next, iTab, addr1+1);
+ sqlite3VdbeAddOp2(v, OP_Next, iSorter, addr1+1);
sqlite3VdbeJumpHere(v, addr1);
+
sqlite3VdbeAddOp1(v, OP_Close, iTab);
sqlite3VdbeAddOp1(v, OP_Close, iIdx);
+ sqlite3VdbeAddOp1(v, OP_Close, iSorter);
}
/*
@@ -2949,15 +2996,9 @@ void sqlite3DropIndex(Parse *pParse, SrcList *pName, int ifExists){
sqlite3BeginWriteOperation(pParse, 1, iDb);
sqlite3NestedParse(pParse,
"DELETE FROM %Q.%s WHERE name=%Q AND type='index'",
- db->aDb[iDb].zName, SCHEMA_TABLE(iDb),
- pIndex->zName
+ db->aDb[iDb].zName, SCHEMA_TABLE(iDb), pIndex->zName
);
- if( sqlite3FindTable(db, "sqlite_stat1", db->aDb[iDb].zName) ){
- sqlite3NestedParse(pParse,
- "DELETE FROM %Q.sqlite_stat1 WHERE idx=%Q",
- db->aDb[iDb].zName, pIndex->zName
- );
- }
+ sqlite3ClearStatTables(pParse, iDb, "idx", pIndex->zName);
sqlite3ChangeCookie(pParse, iDb);
destroyRootPage(pParse, pIndex->tnum, iDb);
sqlite3VdbeAddOp4(v, OP_DropIndex, iDb, 0, 0, pIndex->zName, 0);
@@ -3329,8 +3370,9 @@ void sqlite3SrcListIndexedBy(Parse *pParse, SrcList *p, Token *pIndexedBy){
** operator with A. This routine shifts that operator over to B.
*/
void sqlite3SrcListShiftJoinType(SrcList *p){
- if( p && p->a ){
+ if( p ){
int i;
+ assert( p->a || p->nSrc==0 );
for(i=p->nSrc-1; i>0; i--){
p->a[i].jointype = p->a[i-1].jointype;
}
diff --git a/src/ctime.c b/src/ctime.c
index a128f61a6..77174d0da 100644
--- a/src/ctime.c
+++ b/src/ctime.c
@@ -257,6 +257,9 @@ static const char * const azCompileOpt[] = {
#ifdef SQLITE_OMIT_MEMORYDB
"OMIT_MEMORYDB",
#endif
+#ifdef SQLITE_OMIT_MERGE_SORT
+ "OMIT_MERGE_SORT",
+#endif
#ifdef SQLITE_OMIT_OR_OPTIMIZATION
"OMIT_OR_OPTIMIZATION",
#endif
@@ -323,6 +326,9 @@ static const char * const azCompileOpt[] = {
#ifdef SQLITE_OMIT_XFER_OPT
"OMIT_XFER_OPT",
#endif
+#ifdef SQLITE_PAGECACHE_BLOCKALLOC
+ "PAGECACHE_BLOCKALLOC",
+#endif
#ifdef SQLITE_PERFORMANCE_TRACE
"PERFORMANCE_TRACE",
#endif
diff --git a/src/fkey.c b/src/fkey.c
index 37d4744dd..f0a9fb6ba 100644
--- a/src/fkey.c
+++ b/src/fkey.c
@@ -734,7 +734,24 @@ void sqlite3FkCheck(
pTo = sqlite3LocateTable(pParse, 0, pFKey->zTo, zDb);
}
if( !pTo || locateFkeyIndex(pParse, pTo, pFKey, &pIdx, &aiFree) ){
+ assert( isIgnoreErrors==0 || (regOld!=0 && regNew==0) );
if( !isIgnoreErrors || db->mallocFailed ) return;
+ if( pTo==0 ){
+ /* If isIgnoreErrors is true, then a table is being dropped. In this
+ ** case SQLite runs a "DELETE FROM xxx" on the table being dropped
+ ** before actually dropping it in order to check FK constraints.
+ ** If the parent table of an FK constraint on the current table is
+ ** missing, behave as if it is empty. i.e. decrement the relevant
+ ** FK counter for each row of the current table with non-NULL keys.
+ */
+ Vdbe *v = sqlite3GetVdbe(pParse);
+ int iJump = sqlite3VdbeCurrentAddr(v) + pFKey->nCol + 1;
+ for(i=0; i<pFKey->nCol; i++){
+ int iReg = pFKey->aCol[i].iFrom + regOld + 1;
+ sqlite3VdbeAddOp2(v, OP_IsNull, iReg, iJump);
+ }
+ sqlite3VdbeAddOp2(v, OP_FkCounter, pFKey->isDeferred, -1);
+ }
continue;
}
assert( pFKey->nCol==1 || (aiFree && pIdx) );
diff --git a/src/main.c b/src/main.c
index b23663045..efdb2076b 100644
--- a/src/main.c
+++ b/src/main.c
@@ -234,6 +234,16 @@ int sqlite3_initialize(void){
#endif
#endif
+ /* Do extra initialization steps requested by the SQLITE_EXTRA_INIT
+ ** compile-time option.
+ */
+#ifdef SQLITE_EXTRA_INIT
+ if( rc==SQLITE_OK && sqlite3GlobalConfig.isInit ){
+ int SQLITE_EXTRA_INIT(void);
+ rc = SQLITE_EXTRA_INIT();
+ }
+#endif
+
return rc;
}
diff --git a/src/os_unix.c b/src/os_unix.c
index b2956c164..8abef8de8 100644
--- a/src/os_unix.c
+++ b/src/os_unix.c
@@ -208,7 +208,6 @@ struct unixFile {
sqlite3_io_methods const *pMethod; /* Always the first entry */
unixInodeInfo *pInode; /* Info about locks on this inode */
int h; /* The file descriptor */
- int dirfd; /* File descriptor for the directory */
unsigned char eFileLock; /* The type of lock held on this fd */
unsigned char ctrlFlags; /* Behavioral bits. UNIXFILE_* flags */
int lastErrno; /* The unix errno from last I/O error */
@@ -250,8 +249,14 @@ struct unixFile {
/*
** Allowed values for the unixFile.ctrlFlags bitmask:
*/
-#define UNIXFILE_EXCL 0x01 /* Connections from one process only */
-#define UNIXFILE_RDONLY 0x02 /* Connection is read only */
+#define UNIXFILE_EXCL 0x01 /* Connections from one process only */
+#define UNIXFILE_RDONLY 0x02 /* Connection is read only */
+#define UNIXFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */
+#ifndef SQLITE_DISABLE_DIRSYNC
+# define UNIXFILE_DIRSYNC 0x08 /* Directory sync needed */
+#else
+# define UNIXFILE_DIRSYNC 0x00
+#endif
/*
** Include code that is common to all os_*.c files
@@ -297,6 +302,9 @@ static int posixOpen(const char *zFile, int flags, int mode){
return open(zFile, flags, mode);
}
+/* Forward reference */
+static int openDirectory(const char*, int*);
+
/*
** Many system calls are accessed through pointer-to-functions so that
** they may be overridden at runtime to facilitate fault injection during
@@ -393,6 +401,12 @@ static struct unix_syscall {
#endif
#define osFallocate ((int(*)(int,off_t,off_t))aSyscall[15].pCurrent)
+ { "unlink", (sqlite3_syscall_ptr)unlink, 0 },
+#define osUnlink ((int(*)(const char*))aSyscall[16].pCurrent)
+
+ { "openDirectory", (sqlite3_syscall_ptr)openDirectory, 0 },
+#define osOpenDirectory ((int(*)(const char*,int*))aSyscall[17].pCurrent)
+
}; /* End of the overrideable system calls */
/*
@@ -1749,10 +1763,6 @@ static int unixUnlock(sqlite3_file *id, int eFileLock){
*/
static int closeUnixFile(sqlite3_file *id){
unixFile *pFile = (unixFile*)id;
- if( pFile->dirfd>=0 ){
- robust_close(pFile, pFile->dirfd, __LINE__);
- pFile->dirfd=-1;
- }
if( pFile->h>=0 ){
robust_close(pFile, pFile->h, __LINE__);
pFile->h = -1;
@@ -1760,7 +1770,7 @@ static int closeUnixFile(sqlite3_file *id){
#if OS_VXWORKS
if( pFile->pId ){
if( pFile->isDelete ){
- unlink(pFile->pId->zCanonicalName);
+ osUnlink(pFile->pId->zCanonicalName);
}
vxworksReleaseFileId(pFile->pId);
pFile->pId = 0;
@@ -2009,7 +2019,7 @@ static int dotlockUnlock(sqlite3_file *id, int eFileLock) {
/* To fully unlock the database, delete the lock file */
assert( eFileLock==NO_LOCK );
- if( unlink(zLockFile) ){
+ if( osUnlink(zLockFile) ){
int rc = 0;
int tErrno = errno;
if( ENOENT != tErrno ){
@@ -2515,11 +2525,12 @@ static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){
int rc = SQLITE_OK;
int reserved = 0;
unixFile *pFile = (unixFile*)id;
+ afpLockingContext *context;
SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
assert( pFile );
- afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
+ context = (afpLockingContext *) pFile->lockingContext;
if( context->reserved ){
*pResOut = 1;
return SQLITE_OK;
@@ -2659,7 +2670,7 @@ static int afpLock(sqlite3_file *id, int eFileLock){
** operating system calls for the specified lock.
*/
if( eFileLock==SHARED_LOCK ){
- int lrc1, lrc2, lrc1Errno;
+ int lrc1, lrc2, lrc1Errno = 0;
long lk, mask;
assert( pInode->nShared==0 );
@@ -3033,17 +3044,19 @@ static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){
#elif defined(USE_PREAD64)
do{ got = osPwrite64(id->h, pBuf, cnt, offset);}while( got<0 && errno==EINTR);
#else
- newOffset = lseek(id->h, offset, SEEK_SET);
- SimulateIOError( newOffset-- );
- if( newOffset!=offset ){
- if( newOffset == -1 ){
- ((unixFile*)id)->lastErrno = errno;
- }else{
- ((unixFile*)id)->lastErrno = 0;
+ do{
+ newOffset = lseek(id->h, offset, SEEK_SET);
+ SimulateIOError( newOffset-- );
+ if( newOffset!=offset ){
+ if( newOffset == -1 ){
+ ((unixFile*)id)->lastErrno = errno;
+ }else{
+ ((unixFile*)id)->lastErrno = 0;
+ }
+ return -1;
}
- return -1;
- }
- do{ got = osWrite(id->h, pBuf, cnt); }while( got<0 && errno==EINTR );
+ got = osWrite(id->h, pBuf, cnt);
+ }while( got<0 && errno==EINTR );
#endif
TIMER_END;
if( got<0 ){
@@ -3246,6 +3259,50 @@ static int full_fsync(int fd, int fullSync, int dataOnly){
}
/*
+** Open a file descriptor to the directory containing file zFilename.
+** If successful, *pFd is set to the opened file descriptor and
+** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM
+** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined
+** value.
+**
+** The directory file descriptor is used for only one thing - to
+** fsync() a directory to make sure file creation and deletion events
+** are flushed to disk. Such fsyncs are not needed on newer
+** journaling filesystems, but are required on older filesystems.
+**
+** This routine can be overridden using the xSetSysCall interface.
+** The ability to override this routine was added in support of the
+** chromium sandbox. Opening a directory is a security risk (we are
+** told) so making it overrideable allows the chromium sandbox to
+** replace this routine with a harmless no-op. To make this routine
+** a no-op, replace it with a stub that returns SQLITE_OK but leaves
+** *pFd set to a negative number.
+**
+** If SQLITE_OK is returned, the caller is responsible for closing
+** the file descriptor *pFd using close().
+*/
+static int openDirectory(const char *zFilename, int *pFd){
+ int ii;
+ int fd = -1;
+ char zDirname[MAX_PATHNAME+1];
+
+ sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename);
+ for(ii=(int)strlen(zDirname); ii>1 && zDirname[ii]!='/'; ii--);
+ if( ii>0 ){
+ zDirname[ii] = '\0';
+ fd = robust_open(zDirname, O_RDONLY|O_BINARY, 0);
+ if( fd>=0 ){
+#ifdef FD_CLOEXEC
+ osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
+#endif
+ OSTRACE(("OPENDIR %-3d %s\n", fd, zDirname));
+ }
+ }
+ *pFd = fd;
+ return (fd>=0?SQLITE_OK:unixLogError(SQLITE_CANTOPEN_BKPT, "open", zDirname));
+}
+
+/*
** Make sure all writes to a particular file are committed to disk.
**
** If dataOnly==0 then both the file itself and its metadata (file
@@ -3285,28 +3342,23 @@ static int unixSync(sqlite3_file *id, int flags){
pFile->lastErrno = errno;
return unixLogError(SQLITE_IOERR_FSYNC, "full_fsync", pFile->zPath);
}
- if( pFile->dirfd>=0 ){
- OSTRACE(("DIRSYNC %-3d (have_fullfsync=%d fullsync=%d)\n", pFile->dirfd,
+
+ /* Also fsync the directory containing the file if the DIRSYNC flag
+ ** is set. This is a one-time occurrance. Many systems (examples: AIX)
+ ** are unable to fsync a directory, so ignore errors on the fsync.
+ */
+ if( pFile->ctrlFlags & UNIXFILE_DIRSYNC ){
+ int dirfd;
+ OSTRACE(("DIRSYNC %s (have_fullfsync=%d fullsync=%d)\n", pFile->zPath,
HAVE_FULLFSYNC, isFullsync));
-#ifndef SQLITE_DISABLE_DIRSYNC
- /* The directory sync is only attempted if full_fsync is
- ** turned off or unavailable. If a full_fsync occurred above,
- ** then the directory sync is superfluous.
- */
- if( (!HAVE_FULLFSYNC || !isFullsync) && full_fsync(pFile->dirfd,0,0) ){
- /*
- ** We have received multiple reports of fsync() returning
- ** errors when applied to directories on certain file systems.
- ** A failed directory sync is not a big deal. So it seems
- ** better to ignore the error. Ticket #1657
- */
- /* pFile->lastErrno = errno; */
- /* return SQLITE_IOERR; */
+ rc = osOpenDirectory(pFile->zPath, &dirfd);
+ if( rc==SQLITE_OK && dirfd>=0 ){
+ full_fsync(dirfd, 0, 0);
+ robust_close(pFile, dirfd, __LINE__);
+ }else if( rc==SQLITE_CANTOPEN ){
+ rc = SQLITE_OK;
}
-#endif
- /* Only need to sync once, so close the directory when we are done */
- robust_close(pFile, pFile->dirfd, __LINE__);
- pFile->dirfd = -1;
+ pFile->ctrlFlags &= ~UNIXFILE_DIRSYNC;
}
return rc;
}
@@ -3388,11 +3440,9 @@ static int proxyFileControl(sqlite3_file*,int,void*);
/*
** This function is called to handle the SQLITE_FCNTL_SIZE_HINT
-** file-control operation.
-**
-** If the user has configured a chunk-size for this file, it could be
-** that the file needs to be extended at this point. Otherwise, the
-** SQLITE_FCNTL_SIZE_HINT operation is a no-op for Unix.
+** file-control operation. Enlarge the database to nBytes in size
+** (rounded up to the next chunk-size). If the database is already
+** nBytes or larger, this routine is a no-op.
*/
static int fcntlSizeHint(unixFile *pFile, i64 nByte){
if( pFile->szChunk ){
@@ -3444,21 +3494,37 @@ static int fcntlSizeHint(unixFile *pFile, i64 nByte){
** Information and control of an open file handle.
*/
static int unixFileControl(sqlite3_file *id, int op, void *pArg){
+ unixFile *pFile = (unixFile*)id;
switch( op ){
case SQLITE_FCNTL_LOCKSTATE: {
- *(int*)pArg = ((unixFile*)id)->eFileLock;
+ *(int*)pArg = pFile->eFileLock;
return SQLITE_OK;
}
case SQLITE_LAST_ERRNO: {
- *(int*)pArg = ((unixFile*)id)->lastErrno;
+ *(int*)pArg = pFile->lastErrno;
return SQLITE_OK;
}
case SQLITE_FCNTL_CHUNK_SIZE: {
- ((unixFile*)id)->szChunk = *(int *)pArg;
+ pFile->szChunk = *(int *)pArg;
return SQLITE_OK;
}
case SQLITE_FCNTL_SIZE_HINT: {
- return fcntlSizeHint((unixFile *)id, *(i64 *)pArg);
+ int rc;
+ SimulateIOErrorBenign(1);
+ rc = fcntlSizeHint(pFile, *(i64 *)pArg);
+ SimulateIOErrorBenign(0);
+ return rc;
+ }
+ case SQLITE_FCNTL_PERSIST_WAL: {
+ int bPersist = *(int*)pArg;
+ if( bPersist<0 ){
+ *(int*)pArg = (pFile->ctrlFlags & UNIXFILE_PERSIST_WAL)!=0;
+ }else if( bPersist==0 ){
+ pFile->ctrlFlags &= ~UNIXFILE_PERSIST_WAL;
+ }else{
+ pFile->ctrlFlags |= UNIXFILE_PERSIST_WAL;
+ }
+ return SQLITE_OK;
}
#ifndef NDEBUG
/* The pager calls this method to signal that it has done
@@ -4143,7 +4209,7 @@ static int unixShmUnmap(
assert( pShmNode->nRef>0 );
pShmNode->nRef--;
if( pShmNode->nRef==0 ){
- if( deleteFlag && pShmNode->h>=0 ) unlink(pShmNode->zFilename);
+ if( deleteFlag && pShmNode->h>=0 ) osUnlink(pShmNode->zFilename);
unixShmPurge(pDbFd);
}
unixLeaveMutex();
@@ -4456,7 +4522,7 @@ typedef const sqlite3_io_methods *(*finder_type)(const char*,unixFile*);
static int fillInUnixFile(
sqlite3_vfs *pVfs, /* Pointer to vfs object */
int h, /* Open file descriptor of file being opened */
- int dirfd, /* Directory file descriptor */
+ int syncDir, /* True to sync directory on first sync */
sqlite3_file *pId, /* Write to the unixFile structure here */
const char *zFilename, /* Name of the file being opened */
int noLock, /* Omit locking if true */
@@ -4487,7 +4553,6 @@ static int fillInUnixFile(
OSTRACE(("OPEN %-3d %s\n", h, zFilename));
pNew->h = h;
- pNew->dirfd = dirfd;
pNew->zPath = zFilename;
if( memcmp(pVfs->zName,"unix-excl",10)==0 ){
pNew->ctrlFlags = UNIXFILE_EXCL;
@@ -4497,6 +4562,9 @@ static int fillInUnixFile(
if( isReadOnly ){
pNew->ctrlFlags |= UNIXFILE_RDONLY;
}
+ if( syncDir ){
+ pNew->ctrlFlags |= UNIXFILE_DIRSYNC;
+ }
#if OS_VXWORKS
pNew->pId = vxworksFindFileId(zFilename);
@@ -4623,13 +4691,12 @@ static int fillInUnixFile(
if( rc!=SQLITE_OK ){
if( h>=0 ) robust_close(pNew, h, __LINE__);
h = -1;
- unlink(zFilename);
+ osUnlink(zFilename);
isDelete = 0;
}
pNew->isDelete = isDelete;
#endif
if( rc!=SQLITE_OK ){
- if( dirfd>=0 ) robust_close(pNew, dirfd, __LINE__);
if( h>=0 ) robust_close(pNew, h, __LINE__);
}else{
pNew->pMethod = pLockingStyle;
@@ -4639,37 +4706,6 @@ static int fillInUnixFile(
}
/*
-** Open a file descriptor to the directory containing file zFilename.
-** If successful, *pFd is set to the opened file descriptor and
-** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM
-** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined
-** value.
-**
-** If SQLITE_OK is returned, the caller is responsible for closing
-** the file descriptor *pFd using close().
-*/
-static int openDirectory(const char *zFilename, int *pFd){
- int ii;
- int fd = -1;
- char zDirname[MAX_PATHNAME+1];
-
- sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename);
- for(ii=(int)strlen(zDirname); ii>1 && zDirname[ii]!='/'; ii--);
- if( ii>0 ){
- zDirname[ii] = '\0';
- fd = robust_open(zDirname, O_RDONLY|O_BINARY, 0);
- if( fd>=0 ){
-#ifdef FD_CLOEXEC
- osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
-#endif
- OSTRACE(("OPENDIR %-3d %s\n", fd, zDirname));
- }
- }
- *pFd = fd;
- return (fd>=0?SQLITE_OK:unixLogError(SQLITE_CANTOPEN_BKPT, "open", zDirname));
-}
-
-/*
** Return the name of a directory in which to put temporary files.
** If no suitable temporary file directory can be found, return NULL.
*/
@@ -4783,7 +4819,7 @@ static UnixUnusedFd *findReusableFd(const char *zPath, int flags){
**
** Even if a subsequent open() call does succeed, the consequences of
** not searching for a resusable file descriptor are not dire. */
- if( 0==stat(zPath, &sStat) ){
+ if( 0==osStat(zPath, &sStat) ){
unixInodeInfo *pInode;
unixEnterMutex();
@@ -4859,7 +4895,7 @@ static int findCreateFileMode(
memcpy(zDb, zPath, nDb);
zDb[nDb] = '\0';
- if( 0==stat(zDb, &sStat) ){
+ if( 0==osStat(zDb, &sStat) ){
*pMode = sStat.st_mode & 0777;
}else{
rc = SQLITE_IOERR_FSTAT;
@@ -4901,7 +4937,6 @@ static int unixOpen(
){
unixFile *p = (unixFile *)pFile;
int fd = -1; /* File descriptor returned by open() */
- int dirfd = -1; /* Directory file descriptor */
int openFlags = 0; /* Flags to pass to open() */
int eType = flags&0xFFFFFF00; /* Type of file to open */
int noLock; /* True to omit locking primitives */
@@ -4915,12 +4950,15 @@ static int unixOpen(
#if SQLITE_ENABLE_LOCKING_STYLE
int isAutoProxy = (flags & SQLITE_OPEN_AUTOPROXY);
#endif
+#if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE
+ struct statfs fsInfo;
+#endif
/* If creating a master or main-file journal, this function will open
** a file-descriptor on the directory too. The first time unixSync()
** is called the directory file descriptor will be fsync()ed and close()d.
*/
- int isOpenDirectory = (isCreate && (
+ int syncDir = (isCreate && (
eType==SQLITE_OPEN_MASTER_JOURNAL
|| eType==SQLITE_OPEN_MAIN_JOURNAL
|| eType==SQLITE_OPEN_WAL
@@ -4974,7 +5012,7 @@ static int unixOpen(
p->pUnused = pUnused;
}else if( !zName ){
/* If zName is NULL, the upper layer is requesting a temp file. */
- assert(isDelete && !isOpenDirectory);
+ assert(isDelete && !syncDir);
rc = unixGetTempname(MAX_PATHNAME+1, zTmpname);
if( rc!=SQLITE_OK ){
return rc;
@@ -5030,7 +5068,7 @@ static int unixOpen(
#if OS_VXWORKS
zPath = zName;
#else
- unlink(zName);
+ osUnlink(zName);
#endif
}
#if SQLITE_ENABLE_LOCKING_STYLE
@@ -5039,19 +5077,6 @@ static int unixOpen(
}
#endif
- if( isOpenDirectory ){
- rc = openDirectory(zPath, &dirfd);
- if( rc!=SQLITE_OK ){
- /* It is safe to close fd at this point, because it is guaranteed not
- ** to be open on a database file. If it were open on a database file,
- ** it would not be safe to close as this would release any locks held
- ** on the file by this process. */
- assert( eType!=SQLITE_OPEN_MAIN_DB );
- robust_close(p, fd, __LINE__);
- goto open_finished;
- }
- }
-
#ifdef FD_CLOEXEC
osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
#endif
@@ -5060,10 +5085,8 @@ static int unixOpen(
#if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE
- struct statfs fsInfo;
if( fstatfs(fd, &fsInfo) == -1 ){
((unixFile*)pFile)->lastErrno = errno;
- if( dirfd>=0 ) robust_close(p, dirfd, __LINE__);
robust_close(p, fd, __LINE__);
return SQLITE_IOERR_ACCESS;
}
@@ -5085,7 +5108,6 @@ static int unixOpen(
if( envforce!=NULL ){
useProxy = atoi(envforce)>0;
}else{
- struct statfs fsInfo;
if( statfs(zPath, &fsInfo) == -1 ){
/* In theory, the close(fd) call is sub-optimal. If the file opened
** with fd is a database file, and there are other connections open
@@ -5095,9 +5117,6 @@ static int unixOpen(
** not while other file descriptors opened by the same process on
** the same file are working. */
p->lastErrno = errno;
- if( dirfd>=0 ){
- robust_close(p, dirfd, __LINE__);
- }
robust_close(p, fd, __LINE__);
rc = SQLITE_IOERR_ACCESS;
goto open_finished;
@@ -5105,7 +5124,7 @@ static int unixOpen(
useProxy = !(fsInfo.f_flags&MNT_LOCAL);
}
if( useProxy ){
- rc = fillInUnixFile(pVfs, fd, dirfd, pFile, zPath, noLock,
+ rc = fillInUnixFile(pVfs, fd, syncDir, pFile, zPath, noLock,
isDelete, isReadonly);
if( rc==SQLITE_OK ){
rc = proxyTransformUnixFile((unixFile*)pFile, ":auto:");
@@ -5123,7 +5142,7 @@ static int unixOpen(
}
#endif
- rc = fillInUnixFile(pVfs, fd, dirfd, pFile, zPath, noLock,
+ rc = fillInUnixFile(pVfs, fd, syncDir, pFile, zPath, noLock,
isDelete, isReadonly);
open_finished:
if( rc!=SQLITE_OK ){
@@ -5145,13 +5164,13 @@ static int unixDelete(
int rc = SQLITE_OK;
UNUSED_PARAMETER(NotUsed);
SimulateIOError(return SQLITE_IOERR_DELETE);
- if( unlink(zPath)==(-1) && errno!=ENOENT ){
+ if( osUnlink(zPath)==(-1) && errno!=ENOENT ){
return unixLogError(SQLITE_IOERR_DELETE, "unlink", zPath);
}
#ifndef SQLITE_DISABLE_DIRSYNC
if( dirSync ){
int fd;
- rc = openDirectory(zPath, &fd);
+ rc = osOpenDirectory(zPath, &fd);
if( rc==SQLITE_OK ){
#if OS_VXWORKS
if( fsync(fd)==-1 )
@@ -5162,6 +5181,8 @@ static int unixDelete(
rc = unixLogError(SQLITE_IOERR_DIR_FSYNC, "fsync", zPath);
}
robust_close(0, fd, __LINE__);
+ }else if( rc==SQLITE_CANTOPEN ){
+ rc = SQLITE_OK;
}
}
#endif
@@ -5204,7 +5225,7 @@ static int unixAccess(
*pResOut = (osAccess(zPath, amode)==0);
if( flags==SQLITE_ACCESS_EXISTS && *pResOut ){
struct stat buf;
- if( 0==stat(zPath, &buf) && buf.st_size==0 ){
+ if( 0==osStat(zPath, &buf) && buf.st_size==0 ){
*pResOut = 0;
}
}
@@ -5723,7 +5744,6 @@ static int proxyCreateUnixFile(
int islockfile /* if non zero missing dirs will be created */
) {
int fd = -1;
- int dirfd = -1;
unixFile *pNew;
int rc = SQLITE_OK;
int openFlags = O_RDWR | O_CREAT;
@@ -5788,7 +5808,7 @@ static int proxyCreateUnixFile(
pUnused->flags = openFlags;
pNew->pUnused = pUnused;
- rc = fillInUnixFile(&dummyVfs, fd, dirfd, (sqlite3_file*)pNew, path, 0, 0, 0);
+ rc = fillInUnixFile(&dummyVfs, fd, 0, (sqlite3_file*)pNew, path, 0, 0, 0);
if( rc==SQLITE_OK ){
*ppFile = pNew;
return SQLITE_OK;
@@ -5828,6 +5848,8 @@ static int proxyGetHostID(unsigned char *pHostID, int *pError){
return SQLITE_IOERR;
}
}
+#else
+ UNUSED_PARAMETER(pError);
#endif
#ifdef SQLITE_TEST
/* simulate multiple hosts by creating unique hostid file paths */
@@ -5902,7 +5924,7 @@ static int proxyBreakConchLock(unixFile *pFile, uuid_t myHostID){
end_breaklock:
if( rc ){
if( fd>=0 ){
- unlink(tPath);
+ osUnlink(tPath);
robust_close(pFile, fd, __LINE__);
}
fprintf(stderr, "failed to break stale lock on %s, %s\n", cPath, errmsg);
@@ -5920,6 +5942,7 @@ static int proxyConchLock(unixFile *pFile, uuid_t myHostID, int lockType){
int nTries = 0;
struct timespec conchModTime;
+ memset(&conchModTime, 0, sizeof(conchModTime));
do {
rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType);
nTries ++;
@@ -6151,11 +6174,12 @@ static int proxyTakeConch(unixFile *pFile){
end_takeconch:
OSTRACE(("TRANSPROXY: CLOSE %d\n", pFile->h));
if( rc==SQLITE_OK && pFile->openFlags ){
+ int fd;
if( pFile->h>=0 ){
robust_close(pFile, pFile->h, __LINE__);
}
pFile->h = -1;
- int fd = robust_open(pCtx->dbPath, pFile->openFlags,
+ fd = robust_open(pCtx->dbPath, pFile->openFlags,
SQLITE_DEFAULT_FILE_PERMISSIONS);
OSTRACE(("TRANSPROXY: OPEN %d\n", fd));
if( fd>=0 ){
@@ -6725,7 +6749,7 @@ int sqlite3_os_init(void){
/* Double-check that the aSyscall[] array has been constructed
** correctly. See ticket [bb3a86e890c8e96ab] */
- assert( ArraySize(aSyscall)==16 );
+ assert( ArraySize(aSyscall)==18 );
/* Register all VFSes defined in the aVfs[] array */
for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){
diff --git a/src/os_win.c b/src/os_win.c
index 5d9bf5984..02a7a0c62 100644
--- a/src/os_win.c
+++ b/src/os_win.c
@@ -102,8 +102,9 @@ struct winFile {
const sqlite3_io_methods *pMethod; /*** Must be first ***/
sqlite3_vfs *pVfs; /* The VFS used to open this file */
HANDLE h; /* Handle for accessing the file */
- unsigned char locktype; /* Type of lock currently held on this file */
+ u8 locktype; /* Type of lock currently held on this file */
short sharedLockByte; /* Randomly chosen byte used as a shared lock */
+ u8 bPersistWal; /* True to persist WAL files */
DWORD lastErrno; /* The Windows errno from the last I/O error */
DWORD sectorSize; /* Sector size of the device file is on */
winShm *pShm; /* Instance of shared memory on this file */
@@ -118,6 +119,76 @@ struct winFile {
#endif
};
+/*
+ * If compiled with SQLITE_WIN32_MALLOC on Windows, we will use the
+ * various Win32 API heap functions instead of our own.
+ */
+#ifdef SQLITE_WIN32_MALLOC
+/*
+ * The initial size of the Win32-specific heap. This value may be zero.
+ */
+#ifndef SQLITE_WIN32_HEAP_INIT_SIZE
+# define SQLITE_WIN32_HEAP_INIT_SIZE ((SQLITE_DEFAULT_CACHE_SIZE) * \
+ (SQLITE_DEFAULT_PAGE_SIZE) + 4194304)
+#endif
+
+/*
+ * The maximum size of the Win32-specific heap. This value may be zero.
+ */
+#ifndef SQLITE_WIN32_HEAP_MAX_SIZE
+# define SQLITE_WIN32_HEAP_MAX_SIZE (0)
+#endif
+
+/*
+ * The extra flags to use in calls to the Win32 heap APIs. This value may be
+ * zero for the default behavior.
+ */
+#ifndef SQLITE_WIN32_HEAP_FLAGS
+# define SQLITE_WIN32_HEAP_FLAGS (0)
+#endif
+
+/*
+** The winMemData structure stores information required by the Win32-specific
+** sqlite3_mem_methods implementation.
+*/
+typedef struct winMemData winMemData;
+struct winMemData {
+#ifndef NDEBUG
+ u32 magic; /* Magic number to detect structure corruption. */
+#endif
+ HANDLE hHeap; /* The handle to our heap. */
+ BOOL bOwned; /* Do we own the heap (i.e. destroy it on shutdown)? */
+};
+
+#ifndef NDEBUG
+#define WINMEM_MAGIC 0x42b2830b
+#endif
+
+static struct winMemData win_mem_data = {
+#ifndef NDEBUG
+ WINMEM_MAGIC,
+#endif
+ NULL, FALSE
+};
+
+#ifndef NDEBUG
+#define winMemAssertMagic() assert( win_mem_data.magic==WINMEM_MAGIC )
+#else
+#define winMemAssertMagic()
+#endif
+
+#define winMemGetHeap() win_mem_data.hHeap
+
+static void *winMemMalloc(int nBytes);
+static void winMemFree(void *pPrior);
+static void *winMemRealloc(void *pPrior, int nBytes);
+static int winMemSize(void *p);
+static int winMemRoundup(int n);
+static int winMemInit(void *pAppData);
+static void winMemShutdown(void *pAppData);
+
+const sqlite3_mem_methods *sqlite3MemGetWin32(void);
+#endif /* SQLITE_WIN32_MALLOC */
/*
** Forward prototypes.
@@ -170,6 +241,188 @@ static int sqlite3_os_type = 0;
}
#endif /* SQLITE_OS_WINCE */
+#ifdef SQLITE_WIN32_MALLOC
+/*
+** Allocate nBytes of memory.
+*/
+static void *winMemMalloc(int nBytes){
+ HANDLE hHeap;
+ void *p;
+
+ winMemAssertMagic();
+ hHeap = winMemGetHeap();
+ assert( hHeap!=0 );
+ assert( hHeap!=INVALID_HANDLE_VALUE );
+#ifdef SQLITE_WIN32_MALLOC_VALIDATE
+ assert ( HeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, NULL) );
+#endif
+ assert( nBytes>=0 );
+ p = HeapAlloc(hHeap, SQLITE_WIN32_HEAP_FLAGS, (SIZE_T)nBytes);
+ if( !p ){
+ sqlite3_log(SQLITE_NOMEM, "failed to HeapAlloc %u bytes (%d), heap=%p",
+ nBytes, GetLastError(), (void*)hHeap);
+ }
+ return p;
+}
+
+/*
+** Free memory.
+*/
+static void winMemFree(void *pPrior){
+ HANDLE hHeap;
+
+ winMemAssertMagic();
+ hHeap = winMemGetHeap();
+ assert( hHeap!=0 );
+ assert( hHeap!=INVALID_HANDLE_VALUE );
+#ifdef SQLITE_WIN32_MALLOC_VALIDATE
+ assert ( HeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior) );
+#endif
+ if( !pPrior ) return; /* Passing NULL to HeapFree is undefined. */
+ if( !HeapFree(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior) ){
+ sqlite3_log(SQLITE_NOMEM, "failed to HeapFree block %p (%d), heap=%p",
+ pPrior, GetLastError(), (void*)hHeap);
+ }
+}
+
+/*
+** Change the size of an existing memory allocation
+*/
+static void *winMemRealloc(void *pPrior, int nBytes){
+ HANDLE hHeap;
+ void *p;
+
+ winMemAssertMagic();
+ hHeap = winMemGetHeap();
+ assert( hHeap!=0 );
+ assert( hHeap!=INVALID_HANDLE_VALUE );
+#ifdef SQLITE_WIN32_MALLOC_VALIDATE
+ assert ( HeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior) );
+#endif
+ assert( nBytes>=0 );
+ if( !pPrior ){
+ p = HeapAlloc(hHeap, SQLITE_WIN32_HEAP_FLAGS, (SIZE_T)nBytes);
+ }else{
+ p = HeapReAlloc(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior, (SIZE_T)nBytes);
+ }
+ if( !p ){
+ sqlite3_log(SQLITE_NOMEM, "failed to %s %u bytes (%d), heap=%p",
+ pPrior ? "HeapReAlloc" : "HeapAlloc", nBytes, GetLastError(),
+ (void*)hHeap);
+ }
+ return p;
+}
+
+/*
+** Return the size of an outstanding allocation, in bytes.
+*/
+static int winMemSize(void *p){
+ HANDLE hHeap;
+ SIZE_T n;
+
+ winMemAssertMagic();
+ hHeap = winMemGetHeap();
+ assert( hHeap!=0 );
+ assert( hHeap!=INVALID_HANDLE_VALUE );
+#ifdef SQLITE_WIN32_MALLOC_VALIDATE
+ assert ( HeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, NULL) );
+#endif
+ if( !p ) return 0;
+ n = HeapSize(hHeap, SQLITE_WIN32_HEAP_FLAGS, p);
+ if( n==(SIZE_T)-1 ){
+ sqlite3_log(SQLITE_NOMEM, "failed to HeapSize block %p (%d), heap=%p",
+ p, GetLastError(), (void*)hHeap);
+ return 0;
+ }
+ return (int)n;
+}
+
+/*
+** Round up a request size to the next valid allocation size.
+*/
+static int winMemRoundup(int n){
+ return n;
+}
+
+/*
+** Initialize this module.
+*/
+static int winMemInit(void *pAppData){
+ winMemData *pWinMemData = (winMemData *)pAppData;
+
+ if( !pWinMemData ) return SQLITE_ERROR;
+ assert( pWinMemData->magic==WINMEM_MAGIC );
+ if( !pWinMemData->hHeap ){
+ pWinMemData->hHeap = HeapCreate(SQLITE_WIN32_HEAP_FLAGS,
+ SQLITE_WIN32_HEAP_INIT_SIZE,
+ SQLITE_WIN32_HEAP_MAX_SIZE);
+ if( !pWinMemData->hHeap ){
+ sqlite3_log(SQLITE_NOMEM,
+ "failed to HeapCreate (%d), flags=%u, initSize=%u, maxSize=%u",
+ GetLastError(), SQLITE_WIN32_HEAP_FLAGS, SQLITE_WIN32_HEAP_INIT_SIZE,
+ SQLITE_WIN32_HEAP_MAX_SIZE);
+ return SQLITE_NOMEM;
+ }
+ pWinMemData->bOwned = TRUE;
+ }
+ assert( pWinMemData->hHeap!=0 );
+ assert( pWinMemData->hHeap!=INVALID_HANDLE_VALUE );
+#ifdef SQLITE_WIN32_MALLOC_VALIDATE
+ assert( HeapValidate(pWinMemData->hHeap, SQLITE_WIN32_HEAP_FLAGS, NULL) );
+#endif
+ return SQLITE_OK;
+}
+
+/*
+** Deinitialize this module.
+*/
+static void winMemShutdown(void *pAppData){
+ winMemData *pWinMemData = (winMemData *)pAppData;
+
+ if( !pWinMemData ) return;
+ if( pWinMemData->hHeap ){
+ assert( pWinMemData->hHeap!=INVALID_HANDLE_VALUE );
+#ifdef SQLITE_WIN32_MALLOC_VALIDATE
+ assert( HeapValidate(pWinMemData->hHeap, SQLITE_WIN32_HEAP_FLAGS, NULL) );
+#endif
+ if( pWinMemData->bOwned ){
+ if( !HeapDestroy(pWinMemData->hHeap) ){
+ sqlite3_log(SQLITE_NOMEM, "failed to HeapDestroy (%d), heap=%p",
+ GetLastError(), (void*)pWinMemData->hHeap);
+ }
+ pWinMemData->bOwned = FALSE;
+ }
+ pWinMemData->hHeap = NULL;
+ }
+}
+
+/*
+** Populate the low-level memory allocation function pointers in
+** sqlite3GlobalConfig.m with pointers to the routines in this file. The
+** arguments specify the block of memory to manage.
+**
+** This routine is only called by sqlite3_config(), and therefore
+** is not required to be threadsafe (it is not).
+*/
+const sqlite3_mem_methods *sqlite3MemGetWin32(void){
+ static const sqlite3_mem_methods winMemMethods = {
+ winMemMalloc,
+ winMemFree,
+ winMemRealloc,
+ winMemSize,
+ winMemRoundup,
+ winMemInit,
+ winMemShutdown,
+ &win_mem_data
+ };
+ return &winMemMethods;
+}
+
+void sqlite3MemSetDefault(void){
+ sqlite3_config(SQLITE_CONFIG_MALLOC, sqlite3MemGetWin32());
+}
+#endif /* SQLITE_WIN32_MALLOC */
+
/*
** Convert a UTF-8 string to microsoft unicode (UTF-16?).
**
@@ -1335,24 +1588,41 @@ static int winUnlock(sqlite3_file *id, int locktype){
** Control and query of the open file handle.
*/
static int winFileControl(sqlite3_file *id, int op, void *pArg){
+ winFile *pFile = (winFile*)id;
switch( op ){
case SQLITE_FCNTL_LOCKSTATE: {
- *(int*)pArg = ((winFile*)id)->locktype;
+ *(int*)pArg = pFile->locktype;
return SQLITE_OK;
}
case SQLITE_LAST_ERRNO: {
- *(int*)pArg = (int)((winFile*)id)->lastErrno;
+ *(int*)pArg = (int)pFile->lastErrno;
return SQLITE_OK;
}
case SQLITE_FCNTL_CHUNK_SIZE: {
- ((winFile*)id)->szChunk = *(int *)pArg;
+ pFile->szChunk = *(int *)pArg;
return SQLITE_OK;
}
case SQLITE_FCNTL_SIZE_HINT: {
- sqlite3_int64 sz = *(sqlite3_int64*)pArg;
- SimulateIOErrorBenign(1);
- winTruncate(id, sz);
- SimulateIOErrorBenign(0);
+ winFile *pFile = (winFile*)id;
+ sqlite3_int64 oldSz;
+ int rc = winFileSize(id, &oldSz);
+ if( rc==SQLITE_OK ){
+ sqlite3_int64 newSz = *(sqlite3_int64*)pArg;
+ if( newSz>oldSz ){
+ SimulateIOErrorBenign(1);
+ rc = winTruncate(id, newSz);
+ SimulateIOErrorBenign(0);
+ }
+ }
+ return rc;
+ }
+ case SQLITE_FCNTL_PERSIST_WAL: {
+ int bPersist = *(int*)pArg;
+ if( bPersist<0 ){
+ *(int*)pArg = pFile->bPersistWal;
+ }else{
+ pFile->bPersistWal = bPersist!=0;
+ }
return SQLITE_OK;
}
case SQLITE_FCNTL_SYNC_OMITTED: {
@@ -2180,6 +2450,7 @@ static int winOpen(
winFile *pFile = (winFile*)id;
void *zConverted; /* Filename in OS encoding */
const char *zUtf8Name = zName; /* Filename in UTF-8 encoding */
+ int cnt = 0;
/* If argument zPath is a NULL pointer, this function is required to open
** a temporary file. Use this buffer to store the file name in.
@@ -2299,31 +2570,31 @@ static int winOpen(
#endif
if( isNT() ){
- h = CreateFileW((WCHAR*)zConverted,
- dwDesiredAccess,
- dwShareMode,
- NULL,
- dwCreationDisposition,
- dwFlagsAndAttributes,
- NULL
- );
+ while( (h = CreateFileW((WCHAR*)zConverted,
+ dwDesiredAccess,
+ dwShareMode, NULL,
+ dwCreationDisposition,
+ dwFlagsAndAttributes,
+ NULL))==INVALID_HANDLE_VALUE &&
+ retryIoerr(&cnt) ){}
/* isNT() is 1 if SQLITE_OS_WINCE==1, so this else is never executed.
** Since the ASCII version of these Windows API do not exist for WINCE,
** it's important to not reference them for WINCE builds.
*/
#if SQLITE_OS_WINCE==0
}else{
- h = CreateFileA((char*)zConverted,
- dwDesiredAccess,
- dwShareMode,
- NULL,
- dwCreationDisposition,
- dwFlagsAndAttributes,
- NULL
- );
+ while( (h = CreateFileA((char*)zConverted,
+ dwDesiredAccess,
+ dwShareMode, NULL,
+ dwCreationDisposition,
+ dwFlagsAndAttributes,
+ NULL))==INVALID_HANDLE_VALUE &&
+ retryIoerr(&cnt) ){}
#endif
}
+ logIoerr(cnt);
+
OSTRACE(("OPEN %d %s 0x%lx %s\n",
h, zName, dwDesiredAccess,
h==INVALID_HANDLE_VALUE ? "failed" : "ok"));
@@ -2455,9 +2726,9 @@ static int winAccess(
int cnt = 0;
WIN32_FILE_ATTRIBUTE_DATA sAttrData;
memset(&sAttrData, 0, sizeof(sAttrData));
- while( (rc = GetFileAttributesExW((WCHAR*)zConverted,
+ while( !(rc = GetFileAttributesExW((WCHAR*)zConverted,
GetFileExInfoStandard,
- &sAttrData)) && rc==0 && retryIoerr(&cnt) ){}
+ &sAttrData)) && retryIoerr(&cnt) ){}
if( rc ){
/* For an SQLITE_ACCESS_EXISTS query, treat a zero-length file
** as if it does not exist.
@@ -2470,6 +2741,7 @@ static int winAccess(
attr = sAttrData.dwFileAttributes;
}
}else{
+ logIoerr(cnt);
if( GetLastError()!=ERROR_FILE_NOT_FOUND ){
winLogError(SQLITE_IOERR_ACCESS, "winAccess", zFilename);
free(zConverted);
@@ -2494,7 +2766,8 @@ static int winAccess(
rc = attr!=INVALID_FILE_ATTRIBUTES;
break;
case SQLITE_ACCESS_READWRITE:
- rc = (attr & FILE_ATTRIBUTE_READONLY)==0;
+ rc = attr!=INVALID_FILE_ATTRIBUTES &&
+ (attr & FILE_ATTRIBUTE_READONLY)==0;
break;
default:
assert(!"Invalid flags argument");
diff --git a/src/pager.c b/src/pager.c
index 7ff9a9a00..373d06aec 100644
--- a/src/pager.c
+++ b/src/pager.c
@@ -620,6 +620,8 @@ struct Pager {
u8 tempFile; /* zFilename is a temporary file */
u8 readOnly; /* True for a read-only database */
u8 memDb; /* True to inhibit all file I/O */
+ u8 hasSeenStress; /* pagerStress() called one or more times */
+ u8 isSorter; /* True for a PAGER_SORTER */
/**************************************************************************
** The following block contains those class members that change during
@@ -843,6 +845,15 @@ static int assert_pager_state(Pager *p){
assert( pagerUseWal(p)==0 );
}
+ /* A sorter is a temp file that never spills to disk and always has
+ ** the doNotSpill flag set
+ */
+ if( p->isSorter ){
+ assert( p->tempFile );
+ assert( p->doNotSpill );
+ assert( p->fd->pMethods==0 );
+ }
+
/* If changeCountDone is set, a RESERVED lock or greater must be held
** on the file.
*/
@@ -3739,6 +3750,7 @@ static int pagerSyncHotJournal(Pager *pPager){
int sqlite3PagerClose(Pager *pPager){
u8 *pTmp = (u8 *)pPager->pTmpSpace;
+ assert( assert_pager_state(pPager) );
disable_simulated_io_errors();
sqlite3BeginBenignMalloc();
/* pPager->errCode = 0; */
@@ -4173,6 +4185,7 @@ static int pagerStress(void *p, PgHdr *pPg){
** be called in the error state. Nevertheless, we include a NEVER()
** test for the error state as a safeguard against future changes.
*/
+ pPager->hasSeenStress = 1;
if( NEVER(pPager->errCode) ) return SQLITE_OK;
if( pPager->doNotSpill ) return SQLITE_OK;
if( pPager->doNotSyncSpill && (pPg->flags & PGHDR_NEED_SYNC)!=0 ){
@@ -4544,6 +4557,12 @@ int sqlite3PagerOpen(
/* pPager->pBusyHandlerArg = 0; */
pPager->xReiniter = xReinit;
/* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */
+#ifndef SQLITE_OMIT_MERGE_SORT
+ if( flags & PAGER_SORTER ){
+ pPager->doNotSpill = 1;
+ pPager->isSorter = 1;
+ }
+#endif
*ppPager = pPager;
return SQLITE_OK;
@@ -6088,6 +6107,17 @@ int sqlite3PagerIsMemdb(Pager *pPager){
return MEMDB;
}
+#ifndef SQLITE_OMIT_MERGE_SORT
+/*
+** Return true if the pager has seen a pagerStress callback.
+*/
+int sqlite3PagerUnderStress(Pager *pPager){
+ assert( pPager->isSorter );
+ assert( pPager->doNotSpill );
+ return pPager->hasSeenStress;
+}
+#endif
+
/*
** Check that there are at least nSavepoint savepoints open. If there are
** currently less than nSavepoints open, then open one or more savepoints
diff --git a/src/pager.h b/src/pager.h
index eab7ddaf8..ccd7467d6 100644
--- a/src/pager.h
+++ b/src/pager.h
@@ -60,6 +60,7 @@ typedef struct PgHdr DbPage;
#define PAGER_OMIT_JOURNAL 0x0001 /* Do not use a rollback journal */
#define PAGER_NO_READLOCK 0x0002 /* Omit readlocks on readonly files */
#define PAGER_MEMORY 0x0004 /* In-memory database */
+#define PAGER_SORTER 0x0020 /* Accumulator in external merge sort */
/*
** Valid values for the second argument to sqlite3PagerLockingMode().
@@ -155,6 +156,9 @@ const char *sqlite3PagerJournalname(Pager*);
int sqlite3PagerNosync(Pager*);
void *sqlite3PagerTempSpace(Pager*);
int sqlite3PagerIsMemdb(Pager*);
+#ifndef SQLITE_OMIT_MERGE_SORT
+int sqlite3PagerUnderStress(Pager*);
+#endif
/* Functions used to truncate the database file. */
void sqlite3PagerTruncateImage(Pager*,Pgno);
diff --git a/src/pcache1.c b/src/pcache1.c
index e47265a22..de96e5242 100644
--- a/src/pcache1.c
+++ b/src/pcache1.c
@@ -24,6 +24,9 @@ typedef struct PgHdr1 PgHdr1;
typedef struct PgFreeslot PgFreeslot;
typedef struct PGroup PGroup;
+typedef struct PGroupBlock PGroupBlock;
+typedef struct PGroupBlockList PGroupBlockList;
+
/* Each page cache (or PCache) belongs to a PGroup. A PGroup is a set
** of one or more PCaches that are able to recycle each others unpinned
** pages when they are under memory pressure. A PGroup is an instance of
@@ -53,8 +56,66 @@ struct PGroup {
int mxPinned; /* nMaxpage + 10 - nMinPage */
int nCurrentPage; /* Number of purgeable pages allocated */
PgHdr1 *pLruHead, *pLruTail; /* LRU list of unpinned pages */
+#ifdef SQLITE_PAGECACHE_BLOCKALLOC
+ int isBusy; /* Do not run ReleaseMemory() if true */
+ PGroupBlockList *pBlockList; /* List of block-lists for this group */
+#endif
+};
+
+/*
+** If SQLITE_PAGECACHE_BLOCKALLOC is defined when the library is built,
+** each PGroup structure has a linked list of the the following starting
+** at PGroup.pBlockList. There is one entry for each distinct page-size
+** currently used by members of the PGroup (i.e. 1024 bytes, 4096 bytes
+** etc.). Variable PGroupBlockList.nByte is set to the actual allocation
+** size requested by each pcache, which is the database page-size plus
+** the various header structures used by the pcache, pager and btree layers.
+** Usually around (pgsz+200) bytes.
+**
+** This size (pgsz+200) bytes is not allocated efficiently by some
+** implementations of malloc. In particular, some implementations are only
+** able to allocate blocks of memory chunks of 2^N bytes, where N is some
+** integer value. Since the page-size is a power of 2, this means we
+** end up wasting (pgsz-200) bytes in each allocation.
+**
+** If SQLITE_PAGECACHE_BLOCKALLOC is defined, the (pgsz+200) byte blocks
+** are not allocated directly. Instead, blocks of roughly M*(pgsz+200) bytes
+** are requested from malloc allocator. After a block is returned,
+** sqlite3MallocSize() is used to determine how many (pgsz+200) byte
+** allocations can fit in the space returned by malloc(). This value may
+** be more than M.
+**
+** The blocks are stored in a doubly-linked list. Variable PGroupBlock.nEntry
+** contains the number of allocations that will fit in the aData[] space.
+** nEntry is limited to the number of bits in bitmask mUsed. If a slot
+** within aData is in use, the corresponding bit in mUsed is set. Thus
+** when (mUsed+1==(1 << nEntry)) the block is completely full.
+**
+** Each time a slot within a block is freed, the block is moved to the start
+** of the linked-list. And if a block becomes completely full, then it is
+** moved to the end of the list. As a result, when searching for a free
+** slot, only the first block in the list need be examined. If it is full,
+** then it is guaranteed that all blocks are full.
+*/
+struct PGroupBlockList {
+ int nByte; /* Size of each allocation in bytes */
+ PGroupBlock *pFirst; /* First PGroupBlock in list */
+ PGroupBlock *pLast; /* Last PGroupBlock in list */
+ PGroupBlockList *pNext; /* Next block-list attached to group */
+};
+
+struct PGroupBlock {
+ Bitmask mUsed; /* Mask of used slots */
+ int nEntry; /* Maximum number of allocations in aData[] */
+ u8 *aData; /* Pointer to data block */
+ PGroupBlock *pNext; /* Next PGroupBlock in list */
+ PGroupBlock *pPrev; /* Previous PGroupBlock in list */
+ PGroupBlockList *pList; /* Owner list */
};
+/* Minimum value for PGroupBlock.nEntry */
+#define PAGECACHE_BLOCKALLOC_MINENTRY 15
+
/* Each page cache is an instance of the following object. Every
** open database file (including each in-memory database and each
** temporary or transient database) has a single page cache which
@@ -159,6 +220,17 @@ static SQLITE_WSD struct PCacheGlobal {
#define PAGE_TO_PGHDR1(c, p) (PgHdr1*)(((char*)p) + c->szPage)
/*
+** Blocks used by the SQLITE_PAGECACHE_BLOCKALLOC blocks to store/retrieve
+** a PGroupBlock pointer based on a pointer to a page buffer.
+*/
+#define PAGE_SET_BLOCKPTR(pCache, pPg, pBlock) \
+ ( *(PGroupBlock **)&(((u8*)pPg)[sizeof(PgHdr1) + pCache->szPage]) = pBlock )
+
+#define PAGE_GET_BLOCKPTR(pCache, pPg) \
+ ( *(PGroupBlock **)&(((u8*)pPg)[sizeof(PgHdr1) + pCache->szPage]) )
+
+
+/*
** Macros to enter and leave the PCache LRU mutex.
*/
#define pcache1EnterMutex(X) sqlite3_mutex_enter((X)->mutex)
@@ -283,13 +355,146 @@ static int pcache1MemSize(void *p){
}
#endif /* SQLITE_ENABLE_MEMORY_MANAGEMENT */
+#ifdef SQLITE_PAGECACHE_BLOCKALLOC
+/*
+** The block pBlock belongs to list pList but is not currently linked in.
+** Insert it into the start of the list.
+*/
+static void addBlockToList(PGroupBlockList *pList, PGroupBlock *pBlock){
+ pBlock->pPrev = 0;
+ pBlock->pNext = pList->pFirst;
+ pList->pFirst = pBlock;
+ if( pBlock->pNext ){
+ pBlock->pNext->pPrev = pBlock;
+ }else{
+ assert( pList->pLast==0 );
+ pList->pLast = pBlock;
+ }
+}
+
+/*
+** If there are no blocks in the list headed by pList, remove pList
+** from the pGroup->pBlockList list and free it with sqlite3_free().
+*/
+static void freeListIfEmpty(PGroup *pGroup, PGroupBlockList *pList){
+ assert( sqlite3_mutex_held(pGroup->mutex) );
+ if( pList->pFirst==0 ){
+ PGroupBlockList **pp;
+ for(pp=&pGroup->pBlockList; *pp!=pList; pp=&(*pp)->pNext);
+ *pp = (*pp)->pNext;
+ sqlite3_free(pList);
+ }
+}
+#endif /* SQLITE_PAGECACHE_BLOCKALLOC */
+
/*
** Allocate a new page object initially associated with cache pCache.
*/
static PgHdr1 *pcache1AllocPage(PCache1 *pCache){
int nByte = sizeof(PgHdr1) + pCache->szPage;
- void *pPg = pcache1Alloc(nByte);
+ void *pPg = 0;
PgHdr1 *p;
+
+#ifdef SQLITE_PAGECACHE_BLOCKALLOC
+ PGroup *pGroup = pCache->pGroup;
+ PGroupBlockList *pList;
+ PGroupBlock *pBlock;
+ int i;
+
+ nByte += sizeof(PGroupBlockList *);
+ nByte = ROUND8(nByte);
+
+ for(pList=pGroup->pBlockList; pList; pList=pList->pNext){
+ if( pList->nByte==nByte ) break;
+ }
+ if( pList==0 ){
+ PGroupBlockList *pNew;
+ assert( pGroup->isBusy==0 );
+ assert( sqlite3_mutex_held(pGroup->mutex) );
+ pGroup->isBusy = 1; /* Disable sqlite3PcacheReleaseMemory() */
+ pNew = (PGroupBlockList *)sqlite3MallocZero(sizeof(PGroupBlockList));
+ pGroup->isBusy = 0; /* Reenable sqlite3PcacheReleaseMemory() */
+ if( pNew==0 ){
+ /* malloc() failure. Return early. */
+ return 0;
+ }
+#ifdef SQLITE_DEBUG
+ for(pList=pGroup->pBlockList; pList; pList=pList->pNext){
+ assert( pList->nByte!=nByte );
+ }
+#endif
+ pNew->nByte = nByte;
+ pNew->pNext = pGroup->pBlockList;
+ pGroup->pBlockList = pNew;
+ pList = pNew;
+ }
+
+ pBlock = pList->pFirst;
+ if( pBlock==0 || pBlock->mUsed==(((Bitmask)1<<pBlock->nEntry)-1) ){
+ int sz;
+
+ /* Allocate a new block. Try to allocate enough space for the PGroupBlock
+ ** structure and MINENTRY allocations of nByte bytes each. If the
+ ** allocator returns more memory than requested, then more than MINENTRY
+ ** allocations may fit in it. */
+ assert( sqlite3_mutex_held(pGroup->mutex) );
+ pcache1LeaveMutex(pCache->pGroup);
+ sz = sizeof(PGroupBlock) + PAGECACHE_BLOCKALLOC_MINENTRY * nByte;
+ pBlock = (PGroupBlock *)sqlite3Malloc(sz);
+ pcache1EnterMutex(pCache->pGroup);
+
+ if( !pBlock ){
+ freeListIfEmpty(pGroup, pList);
+ return 0;
+ }
+ pBlock->nEntry = (sqlite3MallocSize(pBlock) - sizeof(PGroupBlock)) / nByte;
+ if( pBlock->nEntry>=BMS ){
+ pBlock->nEntry = BMS-1;
+ }
+ pBlock->pList = pList;
+ pBlock->mUsed = 0;
+ pBlock->aData = (u8 *)&pBlock[1];
+ addBlockToList(pList, pBlock);
+
+ sz = sqlite3MallocSize(pBlock);
+ sqlite3_mutex_enter(pcache1.mutex);
+ sqlite3StatusAdd(SQLITE_STATUS_PAGECACHE_OVERFLOW, sz);
+ sqlite3_mutex_leave(pcache1.mutex);
+ }
+
+ for(i=0; pPg==0 && ALWAYS(i<pBlock->nEntry); i++){
+ if( 0==(pBlock->mUsed & ((Bitmask)1<<i)) ){
+ pBlock->mUsed |= ((Bitmask)1<<i);
+ pPg = (void *)&pBlock->aData[pList->nByte * i];
+ }
+ }
+ assert( pPg );
+ PAGE_SET_BLOCKPTR(pCache, pPg, pBlock);
+
+ /* If the block is now full, shift it to the end of the list */
+ if( pBlock->mUsed==(((Bitmask)1<<pBlock->nEntry)-1) && pList->pLast!=pBlock ){
+ assert( pList->pFirst==pBlock );
+ assert( pBlock->pPrev==0 );
+ assert( pList->pLast->pNext==0 );
+ pList->pFirst = pBlock->pNext;
+ pList->pFirst->pPrev = 0;
+ pBlock->pPrev = pList->pLast;
+ pBlock->pNext = 0;
+ pList->pLast->pNext = pBlock;
+ pList->pLast = pBlock;
+ }
+ p = PAGE_TO_PGHDR1(pCache, pPg);
+ if( pCache->bPurgeable ){
+ pCache->pGroup->nCurrentPage++;
+ }
+#else
+ /* The group mutex must be released before pcache1Alloc() is called. This
+ ** is because it may call sqlite3_release_memory(), which assumes that
+ ** this mutex is not held. */
+ assert( sqlite3_mutex_held(pCache->pGroup->mutex) );
+ pcache1LeaveMutex(pCache->pGroup);
+ pPg = pcache1Alloc(nByte);
+ pcache1EnterMutex(pCache->pGroup);
if( pPg ){
p = PAGE_TO_PGHDR1(pCache, pPg);
if( pCache->bPurgeable ){
@@ -298,6 +503,7 @@ static PgHdr1 *pcache1AllocPage(PCache1 *pCache){
}else{
p = 0;
}
+#endif
return p;
}
@@ -311,10 +517,52 @@ static PgHdr1 *pcache1AllocPage(PCache1 *pCache){
static void pcache1FreePage(PgHdr1 *p){
if( ALWAYS(p) ){
PCache1 *pCache = p->pCache;
+ void *pPg = PGHDR1_TO_PAGE(p);
+
+#ifdef SQLITE_PAGECACHE_BLOCKALLOC
+ PGroupBlock *pBlock = PAGE_GET_BLOCKPTR(pCache, pPg);
+ PGroupBlockList *pList = pBlock->pList;
+ int i = ((u8 *)pPg - pBlock->aData) / pList->nByte;
+
+ assert( pPg==(void *)&pBlock->aData[i*pList->nByte] );
+ assert( pBlock->mUsed & ((Bitmask)1<<i) );
+ pBlock->mUsed &= ~((Bitmask)1<<i);
+
+ /* Remove the block from the list. If it is completely empty, free it.
+ ** Or if it is not completely empty, re-insert it at the start of the
+ ** list. */
+ if( pList->pFirst==pBlock ){
+ pList->pFirst = pBlock->pNext;
+ if( pList->pFirst ) pList->pFirst->pPrev = 0;
+ }else{
+ pBlock->pPrev->pNext = pBlock->pNext;
+ }
+ if( pList->pLast==pBlock ){
+ pList->pLast = pBlock->pPrev;
+ if( pList->pLast ) pList->pLast->pNext = 0;
+ }else{
+ pBlock->pNext->pPrev = pBlock->pPrev;
+ }
+
+ if( pBlock->mUsed==0 ){
+ PGroup *pGroup = p->pCache->pGroup;
+
+ int sz = sqlite3MallocSize(pBlock);
+ sqlite3_mutex_enter(pcache1.mutex);
+ sqlite3StatusAdd(SQLITE_STATUS_PAGECACHE_OVERFLOW, -sz);
+ sqlite3_mutex_leave(pcache1.mutex);
+ freeListIfEmpty(pGroup, pList);
+ sqlite3_free(pBlock);
+ }else{
+ addBlockToList(pList, pBlock);
+ }
+#else
+ assert( sqlite3_mutex_held(p->pCache->pGroup->mutex) );
+ pcache1Free(pPg);
+#endif
if( pCache->bPurgeable ){
pCache->pGroup->nCurrentPage--;
}
- pcache1Free(PGHDR1_TO_PAGE(p));
}
}
@@ -752,9 +1000,7 @@ static void *pcache1Fetch(sqlite3_pcache *p, unsigned int iKey, int createFlag){
*/
if( !pPage ){
if( createFlag==1 ) sqlite3BeginBenignMalloc();
- pcache1LeaveMutex(pGroup);
pPage = pcache1AllocPage(pCache);
- pcache1EnterMutex(pGroup);
if( createFlag==1 ) sqlite3EndBenignMalloc();
}
@@ -924,6 +1170,9 @@ void sqlite3PCacheSetDefault(void){
*/
int sqlite3PcacheReleaseMemory(int nReq){
int nFree = 0;
+#ifdef SQLITE_PAGECACHE_BLOCKALLOC
+ if( pcache1.grp.isBusy ) return 0;
+#endif
assert( sqlite3_mutex_notheld(pcache1.grp.mutex) );
assert( sqlite3_mutex_notheld(pcache1.mutex) );
if( pcache1.pStart==0 ){
diff --git a/src/sqlite.h.in b/src/sqlite.h.in
index b8e8bfbc4..ba5c20265 100644
--- a/src/sqlite.h.in
+++ b/src/sqlite.h.in
@@ -752,6 +752,20 @@ struct sqlite3_io_methods {
** is not changed but instead the prior value of that setting is written
** into the array entry, allowing the current retry settings to be
** interrogated. The zDbName parameter is ignored.
+**
+** ^The [SQLITE_FCNTL_PERSIST_WAL] opcode is used to set or query the
+** persistent [WAL | Write AHead Log] setting. By default, the auxiliary
+** write ahead log and shared memory files used for transaction control
+** are automatically deleted when the latest connection to the database
+** closes. Setting persistent WAL mode causes those files to persist after
+** close. Persisting the files is useful when other processes that do not
+** have write permission on the directory containing the database file want
+** to read the database file, as the WAL and shared memory files must exist
+** in order for the database to be readable. The fourth parameter to
+** [sqlite3_file_control()] for this opcode should be a pointer to an integer.
+** That integer is 0 to disable persistent WAL mode or 1 to enable persistent
+** WAL mode. If the integer is -1, then it is overwritten with the current
+** WAL persistence setting.
**
*/
#define SQLITE_FCNTL_LOCKSTATE 1
@@ -763,6 +777,7 @@ struct sqlite3_io_methods {
#define SQLITE_FCNTL_FILE_POINTER 7
#define SQLITE_FCNTL_SYNC_OMITTED 8
#define SQLITE_FCNTL_WIN32_AV_RETRY 9
+#define SQLITE_FCNTL_PERSIST_WAL 10
/*
** CAPI3REF: Mutex Handle
@@ -1190,16 +1205,10 @@ int sqlite3_db_config(sqlite3*, int op, ...);
** order to verify that SQLite recovers gracefully from such
** conditions.
**
-** The xMalloc and xFree methods must work like the
-** malloc() and free() functions from the standard C library.
-** The xRealloc method must work like realloc() from the standard C library
-** with the exception that if the second argument to xRealloc is zero,
-** xRealloc must be a no-op - it must not perform any allocation or
-** deallocation. ^SQLite guarantees that the second argument to
+** The xMalloc, xRealloc, and xFree methods must work like the
+** malloc(), realloc() and free() functions from the standard C library.
+** ^SQLite guarantees that the second argument to
** xRealloc is always a value returned by a prior call to xRoundup.
-** And so in cases where xRoundup always returns a positive number,
-** xRealloc can perform exactly as the standard library realloc() and
-** still be in compliance with this specification.
**
** xSize should return the allocated size of a memory allocation
** previously obtained from xMalloc or xRealloc. The allocated size
diff --git a/src/sqliteInt.h b/src/sqliteInt.h
index bcf6a591a..5934c7431 100644
--- a/src/sqliteInt.h
+++ b/src/sqliteInt.h
@@ -147,19 +147,25 @@
** specify which memory allocation subsystem to use.
**
** SQLITE_SYSTEM_MALLOC // Use normal system malloc()
+** SQLITE_WIN32_MALLOC // Use Win32 native heap API
** SQLITE_MEMDEBUG // Debugging version of system malloc()
**
+** On Windows, if the SQLITE_WIN32_MALLOC_VALIDATE macro is defined and the
+** assert() macro is enabled, each call into the Win32 native heap subsystem
+** will cause HeapValidate to be called. If heap validation should fail, an
+** assertion will be triggered.
+**
** (Historical note: There used to be several other options, but we've
** pared it down to just these two.)
**
** If none of the above are defined, then set SQLITE_SYSTEM_MALLOC as
** the default.
*/
-#if defined(SQLITE_SYSTEM_MALLOC)+defined(SQLITE_MEMDEBUG)>1
+#if defined(SQLITE_SYSTEM_MALLOC)+defined(SQLITE_WIN32_MALLOC)+defined(SQLITE_MEMDEBUG)>1
# error "At most one of the following compile-time configuration options\
- is allows: SQLITE_SYSTEM_MALLOC, SQLITE_MEMDEBUG"
+ is allows: SQLITE_SYSTEM_MALLOC, SQLITE_WIN32_MALLOC, SQLITE_MEMDEBUG"
#endif
-#if defined(SQLITE_SYSTEM_MALLOC)+defined(SQLITE_MEMDEBUG)==0
+#if defined(SQLITE_SYSTEM_MALLOC)+defined(SQLITE_WIN32_MALLOC)+defined(SQLITE_MEMDEBUG)==0
# define SQLITE_SYSTEM_MALLOC 1
#endif
@@ -367,6 +373,14 @@
#endif
/*
+** If all temporary storage is in-memory, then omit the external merge-sort
+** logic since it is superfluous.
+*/
+#if SQLITE_TEMP_STORE==3 && !defined(SQLITE_OMIT_MERGE_SORT)
+# define SQLITE_OMIT_MERGE_SORT
+#endif
+
+/*
** GCC does not define the offsetof() macro so we'll have to do it
** ourselves.
*/
diff --git a/src/tclsqlite.c b/src/tclsqlite.c
index 339b8967d..d2a0582e4 100644
--- a/src/tclsqlite.c
+++ b/src/tclsqlite.c
@@ -2242,6 +2242,8 @@ static int DbObjCmd(void *cd, Tcl_Interp *interp, int objc,Tcl_Obj *const*objv){
if( choice==DB_ONECOLUMN ){
if( rc==TCL_OK ){
Tcl_SetObjResult(interp, dbEvalColumnValue(&sEval, 0));
+ }else if( rc==TCL_BREAK ){
+ Tcl_ResetResult(interp);
}
}else if( rc==TCL_BREAK || rc==TCL_OK ){
Tcl_SetObjResult(interp, Tcl_NewBooleanObj(rc==TCL_OK));
diff --git a/src/test1.c b/src/test1.c
index 3301ab740..59b570c28 100644
--- a/src/test1.c
+++ b/src/test1.c
@@ -4991,9 +4991,8 @@ static int file_control_chunksize_test(
/*
** tclcmd: file_control_sizehint_test DB DBNAME SIZE
**
-** This TCL command runs the sqlite3_file_control interface and
-** verifies correct operation of the SQLITE_GET_LOCKPROXYFILE and
-** SQLITE_SET_LOCKPROXYFILE verbs.
+** This TCL command runs the sqlite3_file_control interface
+** with SQLITE_FCNTL_SIZE_HINT
*/
static int file_control_sizehint_test(
ClientData clientData, /* Pointer to sqlite3_enable_XXX function */
@@ -5129,6 +5128,38 @@ static int file_control_win32_av_retry(
return TCL_OK;
}
+/*
+** tclcmd: file_control_persist_wal DB PERSIST-FLAG
+**
+** This TCL command runs the sqlite3_file_control interface with
+** the SQLITE_FCNTL_PERSIST_WAL opcode.
+*/
+static int file_control_persist_wal(
+ ClientData clientData, /* Pointer to sqlite3_enable_XXX function */
+ Tcl_Interp *interp, /* The TCL interpreter that invoked this command */
+ int objc, /* Number of arguments */
+ Tcl_Obj *CONST objv[] /* Command arguments */
+){
+ sqlite3 *db;
+ int rc;
+ int bPersist;
+ char z[100];
+
+ if( objc!=3 ){
+ Tcl_AppendResult(interp, "wrong # args: should be \"",
+ Tcl_GetStringFromObj(objv[0], 0), " DB FLAG", 0);
+ return TCL_ERROR;
+ }
+ if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ){
+ return TCL_ERROR;
+ }
+ if( Tcl_GetIntFromObj(interp, objv[2], &bPersist) ) return TCL_ERROR;
+ rc = sqlite3_file_control(db, NULL, SQLITE_FCNTL_PERSIST_WAL, (void*)&bPersist);
+ sqlite3_snprintf(sizeof(z), z, "%d %d", rc, bPersist);
+ Tcl_AppendResult(interp, z, (char*)0);
+ return TCL_OK;
+}
+
/*
** tclcmd: sqlite3_vfs_list
@@ -5613,6 +5644,7 @@ static int test_test_control(
** background thread.
*/
struct win32FileLocker {
+ char *evName; /* Name of event to signal thread startup */
HANDLE h; /* Handle of the file to be locked */
int delay1; /* Delay before locking */
int delay2; /* Delay before unlocking */
@@ -5628,6 +5660,13 @@ struct win32FileLocker {
*/
static void win32_file_locker(void *pAppData){
struct win32FileLocker *p = (struct win32FileLocker*)pAppData;
+ if( p->evName ){
+ HANDLE ev = OpenEvent(EVENT_MODIFY_STATE, FALSE, p->evName);
+ if ( ev ){
+ SetEvent(ev);
+ CloseHandle(ev);
+ }
+ }
if( p->delay1 ) Sleep(p->delay1);
if( LockFile(p->h, 0, 0, 100000000, 0) ){
Sleep(p->delay2);
@@ -5656,16 +5695,18 @@ static int win32_file_lock(
int objc,
Tcl_Obj *CONST objv[]
){
- static struct win32FileLocker x = { 0, 0, 0 };
+ static struct win32FileLocker x = { "win32_file_lock", 0, 0, 0, 0, 0 };
const char *zFilename;
+ char zBuf[200];
int retry = 0;
+ HANDLE ev;
+ DWORD wResult;
if( objc!=4 && objc!=1 ){
Tcl_WrongNumArgs(interp, 1, objv, "FILENAME DELAY1 DELAY2");
return TCL_ERROR;
}
if( objc==1 ){
- char zBuf[200];
sqlite3_snprintf(sizeof(zBuf), zBuf, "%d %d %d %d %d",
x.ok, x.err, x.delay1, x.delay2, x.h);
Tcl_AppendResult(interp, zBuf, (char*)0);
@@ -5689,8 +5730,20 @@ static int win32_file_lock(
Tcl_AppendResult(interp, "cannot open file: ", zFilename, (char*)0);
return TCL_ERROR;
}
+ ev = CreateEvent(NULL, TRUE, FALSE, x.evName);
+ if ( !ev ){
+ Tcl_AppendResult(interp, "cannot create event: ", x.evName, (char*)0);
+ return TCL_ERROR;
+ }
_beginthread(win32_file_locker, 0, (void*)&x);
Sleep(0);
+ if ( (wResult = WaitForSingleObject(ev, 10000))!=WAIT_OBJECT_0 ){
+ sqlite3_snprintf(sizeof(zBuf), zBuf, "0x%x", wResult);
+ Tcl_AppendResult(interp, "wait failed: ", zBuf, (char*)0);
+ CloseHandle(ev);
+ return TCL_ERROR;
+ }
+ CloseHandle(ev);
return TCL_OK;
}
#endif
@@ -5928,6 +5981,7 @@ int Sqlitetest1_Init(Tcl_Interp *interp){
{ "file_control_chunksize_test", file_control_chunksize_test, 0 },
{ "file_control_sizehint_test", file_control_sizehint_test, 0 },
{ "file_control_win32_av_retry", file_control_win32_av_retry, 0 },
+ { "file_control_persist_wal", file_control_persist_wal, 0 },
{ "sqlite3_vfs_list", vfs_list, 0 },
{ "sqlite3_create_function_v2", test_create_function_v2, 0 },
diff --git a/src/test6.c b/src/test6.c
index c9c8a4d20..23fb14c5b 100644
--- a/src/test6.c
+++ b/src/test6.c
@@ -505,6 +505,16 @@ static int cfCheckReservedLock(sqlite3_file *pFile, int *pResOut){
return sqlite3OsCheckReservedLock(((CrashFile *)pFile)->pRealFile, pResOut);
}
static int cfFileControl(sqlite3_file *pFile, int op, void *pArg){
+ if( op==SQLITE_FCNTL_SIZE_HINT ){
+ CrashFile *pCrash = (CrashFile *)pFile;
+ i64 nByte = *(i64 *)pArg;
+ if( nByte>pCrash->iSize ){
+ if( SQLITE_OK==writeListAppend(pFile, nByte, 0, 0) ){
+ pCrash->iSize = nByte;
+ }
+ }
+ return SQLITE_OK;
+ }
return sqlite3OsFileControl(((CrashFile *)pFile)->pRealFile, op, pArg);
}
diff --git a/src/test_config.c b/src/test_config.c
index e8d6f88f6..5af60bafd 100644
--- a/src/test_config.c
+++ b/src/test_config.c
@@ -363,6 +363,12 @@ Tcl_SetVar2(interp, "sqlite_options", "long_double",
Tcl_SetVar2(interp, "sqlite_options", "memorymanage", "0", TCL_GLOBAL_ONLY);
#endif
+#ifdef SQLITE_OMIT_MERGE_SORT
+ Tcl_SetVar2(interp, "sqlite_options", "mergesort", "0", TCL_GLOBAL_ONLY);
+#else
+ Tcl_SetVar2(interp, "sqlite_options", "mergesort", "1", TCL_GLOBAL_ONLY);
+#endif
+
#ifdef SQLITE_OMIT_OR_OPTIMIZATION
Tcl_SetVar2(interp, "sqlite_options", "or_opt", "0", TCL_GLOBAL_ONLY);
#else
@@ -549,6 +555,12 @@ Tcl_SetVar2(interp, "sqlite_options", "long_double",
Tcl_SetVar2(interp, "sqlite_options", "yytrackmaxstackdepth", "0", TCL_GLOBAL_ONLY);
#endif
+#ifdef SQLITE_PAGECACHE_BLOCKALLOC
+ Tcl_SetVar2(interp, "sqlite_options", "blockalloc", "1", TCL_GLOBAL_ONLY);
+#else
+ Tcl_SetVar2(interp, "sqlite_options", "blockalloc", "0", TCL_GLOBAL_ONLY);
+#endif
+
#define LINKVAR(x) { \
static const int cv_ ## x = SQLITE_ ## x; \
Tcl_LinkVar(interp, "SQLITE_" #x, (char *)&(cv_ ## x), \
diff --git a/src/test_malloc.c b/src/test_malloc.c
index 5023dca44..46ec94d32 100644
--- a/src/test_malloc.c
+++ b/src/test_malloc.c
@@ -1222,7 +1222,7 @@ static int test_dump_memsys3(
return TCL_ERROR;
}
- switch( (int)clientData ){
+ switch( SQLITE_PTR_TO_INT(clientData) ){
case 3: {
#ifdef SQLITE_ENABLE_MEMSYS3
extern void sqlite3Memsys3Dump(const char*);
@@ -1460,7 +1460,7 @@ int Sqlitetest_malloc_Init(Tcl_Interp *interp){
};
int i;
for(i=0; i<sizeof(aObjCmd)/sizeof(aObjCmd[0]); i++){
- ClientData c = (ClientData)aObjCmd[i].clientData;
+ ClientData c = (ClientData)SQLITE_INT_TO_PTR(aObjCmd[i].clientData);
Tcl_CreateObjCommand(interp, aObjCmd[i].zName, aObjCmd[i].xProc, c, 0);
}
return TCL_OK;
diff --git a/src/test_multiplex.c b/src/test_multiplex.c
index d31684794..5d29607ac 100644
--- a/src/test_multiplex.c
+++ b/src/test_multiplex.c
@@ -39,8 +39,13 @@
** URI.
**
** The multiplex VFS allows databases up to 32 GiB in size. But it splits
-** the files up into 1 GiB pieces, so that they will work even on filesystems
-** that do not support large files.
+** the files up into smaller pieces, so that they will work even on
+** filesystems that do not support large files. The default chunk size
+** is 2147418112 bytes (which is 64KiB less than 2GiB) but this can be
+** changed at compile-time by defining the SQLITE_MULTIPLEX_CHUNK_SIZE
+** macro. Use the "chunksize=NNNN" query parameter with a URI filename
+** in order to select an alternative chunk size for individual connections
+** at run-time.
*/
#include "sqlite3.h"
#include <string.h>
diff --git a/src/test_quota.c b/src/test_quota.c
index 9b0e4a9dd..74d1a6d3b 100644
--- a/src/test_quota.c
+++ b/src/test_quota.c
@@ -95,6 +95,7 @@ struct quotaFile {
quotaGroup *pGroup; /* Quota group to which this file belongs */
sqlite3_int64 iSize; /* Current size of this file */
int nRef; /* Number of times this file is open */
+ int deleteOnClose; /* True to delete this file when it closes */
quotaFile *pNext, **ppPrev; /* Linked list of files in the same group */
};
@@ -164,12 +165,45 @@ static struct {
static void quotaEnter(void){ sqlite3_mutex_enter(gQuota.pMutex); }
static void quotaLeave(void){ sqlite3_mutex_leave(gQuota.pMutex); }
+/* Count the number of open files in a quotaGroup
+*/
+static int quotaGroupOpenFileCount(quotaGroup *pGroup){
+ int N = 0;
+ quotaFile *pFile = pGroup->pFiles;
+ while( pFile ){
+ if( pFile->nRef ) N++;
+ pFile = pFile->pNext;
+ }
+ return N;
+}
+
+/* Remove a file from a quota group.
+*/
+static void quotaRemoveFile(quotaFile *pFile){
+ quotaGroup *pGroup = pFile->pGroup;
+ pGroup->iSize -= pFile->iSize;
+ *pFile->ppPrev = pFile->pNext;
+ if( pFile->pNext ) pFile->pNext->ppPrev = pFile->ppPrev;
+ sqlite3_free(pFile);
+}
+
+/* Remove all files from a quota group. It is always the case that
+** all files will be closed when this routine is called.
+*/
+static void quotaRemoveAllFiles(quotaGroup *pGroup){
+ while( pGroup->pFiles ){
+ assert( pGroup->pFiles->nRef==0 );
+ quotaRemoveFile(pGroup->pFiles);
+ }
+}
+
/* If the reference count and threshold for a quotaGroup are both
** zero, then destroy the quotaGroup.
*/
static void quotaGroupDeref(quotaGroup *pGroup){
- if( pGroup->pFiles==0 && pGroup->iLimit==0 ){
+ if( pGroup->iLimit==0 && quotaGroupOpenFileCount(pGroup)==0 ){
+ quotaRemoveAllFiles(pGroup);
*pGroup->ppPrev = pGroup->pNext;
if( pGroup->pNext ) pGroup->pNext->ppPrev = pGroup->ppPrev;
if( pGroup->xDestroy ) pGroup->xDestroy(pGroup->pArg);
@@ -276,6 +310,17 @@ static sqlite3_file *quotaSubOpen(sqlite3_file *pConn){
return (sqlite3_file*)&p[1];
}
+/* Find a file in a quota group and return a pointer to that file.
+** Return NULL if the file is not in the group.
+*/
+static quotaFile *quotaFindFile(quotaGroup *pGroup, const char *zName){
+ quotaFile *pFile = pGroup->pFiles;
+ while( pFile && strcmp(pFile->zFilename, zName)!=0 ){
+ pFile = pFile->pNext;
+ }
+ return pFile;
+}
+
/************************* VFS Method Wrappers *****************************/
/*
** This is the xOpen method used for the "quota" VFS.
@@ -319,8 +364,7 @@ static int quotaOpen(
pSubOpen = quotaSubOpen(pConn);
rc = pOrigVfs->xOpen(pOrigVfs, zName, pSubOpen, flags, pOutFlags);
if( rc==SQLITE_OK ){
- for(pFile=pGroup->pFiles; pFile && strcmp(pFile->zFilename, zName);
- pFile=pFile->pNext){}
+ pFile = quotaFindFile(pGroup, zName);
if( pFile==0 ){
int nName = strlen(zName);
pFile = (quotaFile *)sqlite3_malloc( sizeof(*pFile) + nName + 1 );
@@ -337,6 +381,7 @@ static int quotaOpen(
pFile->ppPrev = &pGroup->pFiles;
pGroup->pFiles = pFile;
pFile->pGroup = pGroup;
+ pFile->deleteOnClose = (flags & SQLITE_OPEN_DELETEONCLOSE)!=0;
}
pFile->nRef++;
pQuotaOpen->pFile = pFile;
@@ -351,6 +396,49 @@ static int quotaOpen(
return rc;
}
+/*
+** This is the xDelete method used for the "quota" VFS.
+**
+** If the file being deleted is part of the quota group, then reduce
+** the size of the quota group accordingly. And remove the file from
+** the set of files in the quota group.
+*/
+static int quotaDelete(
+ sqlite3_vfs *pVfs, /* The quota VFS */
+ const char *zName, /* Name of file to be deleted */
+ int syncDir /* Do a directory sync after deleting */
+){
+ int rc; /* Result code */
+ quotaFile *pFile; /* Files in the quota */
+ quotaGroup *pGroup; /* The group file belongs to */
+ sqlite3_vfs *pOrigVfs = gQuota.pOrigVfs; /* Real VFS */
+
+ /* Do the actual file delete */
+ rc = pOrigVfs->xDelete(pOrigVfs, zName, syncDir);
+
+ /* If the file just deleted is a member of a quota group, then remove
+ ** it from that quota group.
+ */
+ if( rc==SQLITE_OK ){
+ quotaEnter();
+ pGroup = quotaGroupFind(zName);
+ if( pGroup ){
+ pFile = quotaFindFile(pGroup, zName);
+ if( pFile ){
+ if( pFile->nRef ){
+ pFile->deleteOnClose = 1;
+ }else{
+ quotaRemoveFile(pFile);
+ quotaGroupDeref(pGroup);
+ }
+ }
+ }
+ quotaLeave();
+ }
+ return rc;
+}
+
+
/************************ I/O Method Wrappers *******************************/
/* xClose requests get passed through to the original VFS. But we
@@ -367,11 +455,8 @@ static int quotaClose(sqlite3_file *pConn){
pFile->nRef--;
if( pFile->nRef==0 ){
quotaGroup *pGroup = pFile->pGroup;
- pGroup->iSize -= pFile->iSize;
- if( pFile->pNext ) pFile->pNext->ppPrev = pFile->ppPrev;
- *pFile->ppPrev = pFile->pNext;
+ if( pFile->deleteOnClose ) quotaRemoveFile(pFile);
quotaGroupDeref(pGroup);
- sqlite3_free(pFile);
}
quotaLeave();
return rc;
@@ -586,6 +671,7 @@ int sqlite3_quota_initialize(const char *zOrigVfsName, int makeDefault){
gQuota.pOrigVfs = pOrigVfs;
gQuota.sThisVfs = *pOrigVfs;
gQuota.sThisVfs.xOpen = quotaOpen;
+ gQuota.sThisVfs.xDelete = quotaDelete;
gQuota.sThisVfs.szOsFile += sizeof(quotaConn);
gQuota.sThisVfs.zName = "quota";
gQuota.sIoMethodsV1.iVersion = 1;
@@ -617,19 +703,20 @@ int sqlite3_quota_initialize(const char *zOrigVfsName, int makeDefault){
** All SQLite database connections must be closed before calling this
** routine.
**
-** THIS ROUTINE IS NOT THREADSAFE. Call this routine exactly one while
+** THIS ROUTINE IS NOT THREADSAFE. Call this routine exactly once while
** shutting down in order to free all remaining quota groups.
*/
int sqlite3_quota_shutdown(void){
quotaGroup *pGroup;
if( gQuota.isInitialized==0 ) return SQLITE_MISUSE;
for(pGroup=gQuota.pGroup; pGroup; pGroup=pGroup->pNext){
- if( pGroup->pFiles ) return SQLITE_MISUSE;
+ if( quotaGroupOpenFileCount(pGroup)>0 ) return SQLITE_MISUSE;
}
while( gQuota.pGroup ){
pGroup = gQuota.pGroup;
gQuota.pGroup = pGroup->pNext;
pGroup->iLimit = 0;
+ assert( quotaGroupOpenFileCount(pGroup)==0 );
quotaGroupDeref(pGroup);
}
gQuota.isInitialized = 0;
@@ -708,6 +795,43 @@ int sqlite3_quota_set(
return SQLITE_OK;
}
+/*
+** Bring the named file under quota management. Or if it is already under
+** management, update its size.
+*/
+int sqlite3_quota_file(const char *zFilename){
+ char *zFull;
+ sqlite3_file *fd;
+ int rc;
+ int outFlags = 0;
+ sqlite3_int64 iSize;
+ fd = sqlite3_malloc(gQuota.sThisVfs.szOsFile + gQuota.sThisVfs.mxPathname+1);
+ if( fd==0 ) return SQLITE_NOMEM;
+ zFull = gQuota.sThisVfs.szOsFile + (char*)fd;
+ rc = gQuota.pOrigVfs->xFullPathname(gQuota.pOrigVfs, zFilename,
+ gQuota.sThisVfs.mxPathname+1, zFull);
+ if( rc==SQLITE_OK ){
+ rc = quotaOpen(&gQuota.sThisVfs, zFull, fd,
+ SQLITE_OPEN_READONLY | SQLITE_OPEN_MAIN_DB, &outFlags);
+ }
+ if( rc==SQLITE_OK ){
+ fd->pMethods->xFileSize(fd, &iSize);
+ fd->pMethods->xClose(fd);
+ }else if( rc==SQLITE_CANTOPEN ){
+ quotaGroup *pGroup;
+ quotaFile *pFile;
+ quotaEnter();
+ pGroup = quotaGroupFind(zFull);
+ if( pGroup ){
+ pFile = quotaFindFile(pGroup, zFull);
+ if( pFile ) quotaRemoveFile(pFile);
+ }
+ quotaLeave();
+ }
+ sqlite3_free(fd);
+ return rc;
+}
+
/***************************** Test Code ***********************************/
#ifdef SQLITE_TEST
@@ -885,6 +1009,32 @@ static int test_quota_set(
}
/*
+** tclcmd: sqlite3_quota_file FILENAME
+*/
+static int test_quota_file(
+ void * clientData,
+ Tcl_Interp *interp,
+ int objc,
+ Tcl_Obj *CONST objv[]
+){
+ const char *zFilename; /* File pattern to configure */
+ int rc; /* Value returned by quota_file() */
+
+ /* Process arguments */
+ if( objc!=2 ){
+ Tcl_WrongNumArgs(interp, 1, objv, "FILENAME");
+ return TCL_ERROR;
+ }
+ zFilename = Tcl_GetString(objv[1]);
+
+ /* Invoke sqlite3_quota_file() */
+ rc = sqlite3_quota_file(zFilename);
+
+ Tcl_SetResult(interp, (char *)sqlite3TestErrorName(rc), TCL_STATIC);
+ return TCL_OK;
+}
+
+/*
** tclcmd: sqlite3_quota_dump
*/
static int test_quota_dump(
@@ -917,6 +1067,8 @@ static int test_quota_dump(
Tcl_NewWideIntObj(pFile->iSize));
Tcl_ListObjAppendElement(interp, pFileTerm,
Tcl_NewWideIntObj(pFile->nRef));
+ Tcl_ListObjAppendElement(interp, pFileTerm,
+ Tcl_NewWideIntObj(pFile->deleteOnClose));
Tcl_ListObjAppendElement(interp, pGroupTerm, pFileTerm);
}
Tcl_ListObjAppendElement(interp, pResult, pGroupTerm);
@@ -939,6 +1091,7 @@ int Sqlitequota_Init(Tcl_Interp *interp){
{ "sqlite3_quota_initialize", test_quota_initialize },
{ "sqlite3_quota_shutdown", test_quota_shutdown },
{ "sqlite3_quota_set", test_quota_set },
+ { "sqlite3_quota_file", test_quota_file },
{ "sqlite3_quota_dump", test_quota_dump },
};
int i;
diff --git a/src/test_rtree.c b/src/test_rtree.c
index 5fc994ddf..9745b0054 100644
--- a/src/test_rtree.c
+++ b/src/test_rtree.c
@@ -18,6 +18,7 @@
/* Solely for the UNUSED_PARAMETER() macro. */
#include "sqliteInt.h"
+#ifdef SQLITE_ENABLE_RTREE
/*
** Type used to cache parameter information for the "circle" r-tree geometry
** callback.
@@ -230,6 +231,7 @@ static int cube_geom(
return SQLITE_OK;
}
+#endif /* SQLITE_ENABLE_RTREE */
static int register_cube_geom(
void * clientData,
diff --git a/src/test_syscall.c b/src/test_syscall.c
index b8b05c590..d484f22db 100644
--- a/src/test_syscall.c
+++ b/src/test_syscall.c
@@ -325,6 +325,7 @@ static int ts_pread64(int fd, void *aBuf, size_t nBuf, off_t off){
*/
static int ts_write(int fd, const void *aBuf, size_t nBuf){
if( tsIsFailErrno("write") ){
+ if( tsErrno("write")==EINTR ) orig_write(fd, aBuf, nBuf/2);
return -1;
}
return orig_write(fd, aBuf, nBuf);
@@ -671,4 +672,3 @@ int SqlitetestSyscall_Init(Tcl_Interp *interp){
return TCL_OK;
}
#endif
-
diff --git a/src/test_thread.c b/src/test_thread.c
index 3a13dd668..08df14c2c 100644
--- a/src/test_thread.c
+++ b/src/test_thread.c
@@ -305,7 +305,7 @@ static int sqlthread_id(
Tcl_Obj *CONST objv[]
){
Tcl_ThreadId id = Tcl_GetCurrentThread();
- Tcl_SetObjResult(interp, Tcl_NewIntObj((int)id));
+ Tcl_SetObjResult(interp, Tcl_NewIntObj(SQLITE_PTR_TO_INT(id)));
UNUSED_PARAMETER(clientData);
UNUSED_PARAMETER(objc);
UNUSED_PARAMETER(objv);
diff --git a/src/test_vfs.c b/src/test_vfs.c
index a8b53526d..546cb7cf4 100644
--- a/src/test_vfs.c
+++ b/src/test_vfs.c
@@ -123,6 +123,8 @@ struct Testvfs {
#define TESTVFS_TRUNCATE_MASK 0x00002000
#define TESTVFS_ACCESS_MASK 0x00004000
#define TESTVFS_FULLPATHNAME_MASK 0x00008000
+#define TESTVFS_READ_MASK 0x00010000
+
#define TESTVFS_ALL_MASK 0x0001FFFF
@@ -325,8 +327,22 @@ static int tvfsRead(
int iAmt,
sqlite_int64 iOfst
){
- TestvfsFd *p = tvfsGetFd(pFile);
- return sqlite3OsRead(p->pReal, zBuf, iAmt, iOfst);
+ int rc = SQLITE_OK;
+ TestvfsFd *pFd = tvfsGetFd(pFile);
+ Testvfs *p = (Testvfs *)pFd->pVfs->pAppData;
+ if( p->pScript && p->mask&TESTVFS_READ_MASK ){
+ tvfsExecTcl(p, "xRead",
+ Tcl_NewStringObj(pFd->zFilename, -1), pFd->pShmId, 0
+ );
+ tvfsResultCode(p, &rc);
+ }
+ if( rc==SQLITE_OK && p->mask&TESTVFS_READ_MASK && tvfsInjectIoerr(p) ){
+ rc = SQLITE_IOERR;
+ }
+ if( rc==SQLITE_OK ){
+ rc = sqlite3OsRead(pFd->pReal, zBuf, iAmt, iOfst);
+ }
+ return rc;
}
/*
@@ -1030,6 +1046,7 @@ static int testvfs_obj_cmd(
{ "xSync", TESTVFS_SYNC_MASK },
{ "xDelete", TESTVFS_DELETE_MASK },
{ "xWrite", TESTVFS_WRITE_MASK },
+ { "xRead", TESTVFS_READ_MASK },
{ "xTruncate", TESTVFS_TRUNCATE_MASK },
{ "xOpen", TESTVFS_OPEN_MASK },
{ "xClose", TESTVFS_CLOSE_MASK },
diff --git a/src/vdbe.c b/src/vdbe.c
index bec422a98..e7c7ed7fe 100644
--- a/src/vdbe.c
+++ b/src/vdbe.c
@@ -157,6 +157,13 @@ int sqlite3_found_count = 0;
*/
#define ExpandBlob(P) (((P)->flags&MEM_Zero)?sqlite3VdbeMemExpandBlob(P):0)
+/* Return true if the cursor was opened using the OP_OpenSorter opcode. */
+#ifdef SQLITE_OMIT_MERGE_SORT
+# define isSorter(x) 0
+#else
+# define isSorter(x) ((x)->pSorter!=0)
+#endif
+
/*
** Argument pMem points at a register that will be passed to a
** user-defined function or returned to the user as the result of a query.
@@ -666,7 +673,7 @@ int sqlite3VdbeExec(
assert( pOp->p2<=p->nMem );
pOut = &aMem[pOp->p2];
memAboutToChange(p, pOut);
- sqlite3VdbeMemReleaseExternal(pOut);
+ MemReleaseExt(pOut);
pOut->flags = MEM_Int;
}
@@ -1027,6 +1034,11 @@ case OP_Move: {
zMalloc = pOut->zMalloc;
pOut->zMalloc = 0;
sqlite3VdbeMemMove(pOut, pIn1);
+#ifdef SQLITE_DEBUG
+ if( pOut->pScopyFrom>=&aMem[p1] && pOut->pScopyFrom<&aMem[p1+pOp->p3] ){
+ pOut->pScopyFrom += p1 - pOp->p2;
+ }
+#endif
pIn1->zMalloc = zMalloc;
REGISTER_TRACE(p2++, pOut);
pIn1++;
@@ -2106,6 +2118,7 @@ case OP_Column: {
u32 szField; /* Number of bytes in the content of a field */
int szHdr; /* Size of the header size field at start of record */
int avail; /* Number of bytes of available data */
+ u32 t; /* A type code from the record header */
Mem *pReg; /* PseudoTable input register */
@@ -2117,7 +2130,6 @@ case OP_Column: {
assert( pOp->p3>0 && pOp->p3<=p->nMem );
pDest = &aMem[pOp->p3];
memAboutToChange(p, pDest);
- MemSetTypeFlag(pDest, MEM_Null);
zRec = 0;
/* This block sets the variable payloadSize to be the total number of
@@ -2161,7 +2173,7 @@ case OP_Column: {
rc = sqlite3BtreeDataSize(pCrsr, &payloadSize);
assert( rc==SQLITE_OK ); /* DataSize() cannot fail */
}
- }else if( pC->pseudoTableReg>0 ){
+ }else if( ALWAYS(pC->pseudoTableReg>0) ){
pReg = &aMem[pC->pseudoTableReg];
assert( pReg->flags & MEM_Blob );
assert( memIsValid(pReg) );
@@ -2174,9 +2186,10 @@ case OP_Column: {
payloadSize = 0;
}
- /* If payloadSize is 0, then just store a NULL */
+ /* If payloadSize is 0, then just store a NULL. This can happen because of
+ ** nullRow or because of a corrupt database. */
if( payloadSize==0 ){
- assert( pDest->flags&MEM_Null );
+ MemSetTypeFlag(pDest, MEM_Null);
goto op_column_out;
}
assert( db->aLimit[SQLITE_LIMIT_LENGTH]>=0 );
@@ -2283,8 +2296,14 @@ case OP_Column: {
for(i=0; i<nField; i++){
if( zIdx<zEndHdr ){
aOffset[i] = offset;
- zIdx += getVarint32(zIdx, aType[i]);
- szField = sqlite3VdbeSerialTypeLen(aType[i]);
+ if( zIdx[0]<0x80 ){
+ t = zIdx[0];
+ zIdx++;
+ }else{
+ zIdx += sqlite3GetVarint32(zIdx, &t);
+ }
+ aType[i] = t;
+ szField = sqlite3VdbeSerialTypeLen(t);
offset += szField;
if( offset<szField ){ /* True if offset overflows */
zIdx = &zEndHdr[1]; /* Forces SQLITE_CORRUPT return below */
@@ -2325,7 +2344,7 @@ case OP_Column: {
if( aOffset[p2] ){
assert( rc==SQLITE_OK );
if( zRec ){
- sqlite3VdbeMemReleaseExternal(pDest);
+ MemReleaseExt(pDest);
sqlite3VdbeSerialGet((u8 *)&zRec[aOffset[p2]], aType[p2], pDest);
}else{
len = sqlite3VdbeSerialTypeLen(aType[p2]);
@@ -2342,7 +2361,7 @@ case OP_Column: {
if( pOp->p4type==P4_MEM ){
sqlite3VdbeMemShallowCopy(pDest, pOp->p4.pMem, MEM_Static);
}else{
- assert( pDest->flags&MEM_Null );
+ MemSetTypeFlag(pDest, MEM_Null);
}
}
@@ -2538,7 +2557,7 @@ case OP_Count: { /* out2-prerelease */
BtCursor *pCrsr;
pCrsr = p->apCsr[pOp->p1]->pCursor;
- if( pCrsr ){
+ if( ALWAYS(pCrsr) ){
rc = sqlite3BtreeCount(pCrsr, &nEntry);
}else{
nEntry = 0;
@@ -3100,15 +3119,9 @@ case OP_OpenWrite: {
rc = sqlite3BtreeCursor(pX, p2, wrFlag, pKeyInfo, pCur->pCursor);
pCur->pKeyInfo = pKeyInfo;
- /* Since it performs no memory allocation or IO, the only values that
- ** sqlite3BtreeCursor() may return are SQLITE_EMPTY and SQLITE_OK.
- ** SQLITE_EMPTY is only returned when attempting to open the table
- ** rooted at page 1 of a zero-byte database. */
- assert( rc==SQLITE_EMPTY || rc==SQLITE_OK );
- if( rc==SQLITE_EMPTY ){
- pCur->pCursor = 0;
- rc = SQLITE_OK;
- }
+ /* Since it performs no memory allocation or IO, the only value that
+ ** sqlite3BtreeCursor() may return is SQLITE_OK. */
+ assert( rc==SQLITE_OK );
/* Set the VdbeCursor.isTable and isIndex variables. Previous versions of
** SQLite used to check if the root-page flags were sane at this point
@@ -3119,7 +3132,7 @@ case OP_OpenWrite: {
break;
}
-/* Opcode: OpenEphemeral P1 P2 * P4 *
+/* Opcode: OpenEphemeral P1 P2 * P4 P5
**
** Open a new cursor P1 to a transient table.
** The cursor is always opened read/write even if
@@ -3136,6 +3149,11 @@ case OP_OpenWrite: {
** to a TEMP table at the SQL level, or to a table opened by
** this opcode. Then this opcode was call OpenVirtual. But
** that created confusion with the whole virtual-table idea.
+**
+** The P5 parameter can be a mask of the BTREE_* flags defined
+** in btree.h. These flags control aspects of the operation of
+** the btree. The BTREE_OMIT_JOURNAL and BTREE_SINGLE flags are
+** added automatically.
*/
/* Opcode: OpenAutoindex P1 P2 * P4 *
**
@@ -3144,6 +3162,13 @@ case OP_OpenWrite: {
** by this opcode will be used for automatically created transient
** indices in joins.
*/
+/* Opcode: OpenSorter P1 P2 * P4 *
+**
+** This opcode works like OP_OpenEphemeral except that it opens
+** a transient index that is specifically designed to sort large
+** tables using an external merge-sort algorithm.
+*/
+case OP_OpenSorter:
case OP_OpenAutoindex:
case OP_OpenEphemeral: {
VdbeCursor *pCx;
@@ -3155,6 +3180,7 @@ case OP_OpenEphemeral: {
SQLITE_OPEN_TRANSIENT_DB;
assert( pOp->p1>=0 );
+ assert( (pOp->opcode==OP_OpenSorter)==((pOp->p5 & BTREE_SORTER)!=0) );
pCx = allocateCursor(p, pOp->p1, pOp->p2, -1, 1);
if( pCx==0 ) goto no_mem;
pCx->nullRow = 1;
@@ -3188,6 +3214,11 @@ case OP_OpenEphemeral: {
}
pCx->isOrdered = (pOp->p5!=BTREE_UNORDERED);
pCx->isIndex = !pCx->isTable;
+#ifndef SQLITE_OMIT_MERGE_SORT
+ if( rc==SQLITE_OK && pOp->opcode==OP_OpenSorter ){
+ rc = sqlite3VdbeSorterInit(db, pCx);
+ }
+#endif
break;
}
@@ -3303,7 +3334,7 @@ case OP_SeekGt: { /* jump, in3 */
assert( OP_SeekGe == OP_SeekLt+2 );
assert( OP_SeekGt == OP_SeekLt+3 );
assert( pC->isOrdered );
- if( pC->pCursor!=0 ){
+ if( ALWAYS(pC->pCursor!=0) ){
oc = pOp->opcode;
pC->nullRow = 0;
if( pC->isTable ){
@@ -3661,7 +3692,7 @@ case OP_NotExists: { /* jump, in3 */
assert( pC->isTable );
assert( pC->pseudoTableReg==0 );
pCrsr = pC->pCursor;
- if( pCrsr!=0 ){
+ if( ALWAYS(pCrsr!=0) ){
res = 0;
iKey = pIn3->u.i;
rc = sqlite3BtreeMovetoUnpacked(pCrsr, 0, iKey, 0, &res);
@@ -4077,6 +4108,13 @@ case OP_RowData: {
assert( pC!=0 );
assert( pC->nullRow==0 );
assert( pC->pseudoTableReg==0 );
+
+ if( isSorter(pC) ){
+ assert( pOp->opcode==OP_RowKey );
+ rc = sqlite3VdbeSorterRowkey(pC, pOut);
+ break;
+ }
+
assert( pC->pCursor!=0 );
pCrsr = pC->pCursor;
assert( sqlite3BtreeCursorIsValid(pCrsr) );
@@ -4181,6 +4219,7 @@ case OP_NullRow: {
assert( pC!=0 );
pC->nullRow = 1;
pC->rowidIsValid = 0;
+ assert( pC->pCursor || pC->pVtabCursor );
if( pC->pCursor ){
sqlite3BtreeClearCursor(pC->pCursor);
}
@@ -4204,7 +4243,7 @@ case OP_Last: { /* jump */
pC = p->apCsr[pOp->p1];
assert( pC!=0 );
pCrsr = pC->pCursor;
- if( pCrsr==0 ){
+ if( NEVER(pCrsr==0) ){
res = 1;
}else{
rc = sqlite3BtreeLast(pCrsr, &res);
@@ -4257,7 +4296,11 @@ case OP_Rewind: { /* jump */
pC = p->apCsr[pOp->p1];
assert( pC!=0 );
res = 1;
- if( (pCrsr = pC->pCursor)!=0 ){
+ if( isSorter(pC) ){
+ rc = sqlite3VdbeSorterRewind(db, pC, &res);
+ }else{
+ pCrsr = pC->pCursor;
+ assert( pCrsr );
rc = sqlite3BtreeFirst(pCrsr, &res);
pC->atFirst = res==0 ?1:0;
pC->deferredMoveto = 0;
@@ -4272,7 +4315,7 @@ case OP_Rewind: { /* jump */
break;
}
-/* Opcode: Next P1 P2 * * P5
+/* Opcode: Next P1 P2 * P4 P5
**
** Advance cursor P1 so that it points to the next key/data pair in its
** table or index. If there are no more key/value pairs then fall through
@@ -4281,6 +4324,9 @@ case OP_Rewind: { /* jump */
**
** The P1 cursor must be for a real table, not a pseudo-table.
**
+** P4 is always of type P4_ADVANCE. The function pointer points to
+** sqlite3BtreeNext().
+**
** If P5 is positive and the jump is taken, then event counter
** number P5-1 in the prepared statement is incremented.
**
@@ -4295,13 +4341,15 @@ case OP_Rewind: { /* jump */
**
** The P1 cursor must be for a real table, not a pseudo-table.
**
+** P4 is always of type P4_ADVANCE. The function pointer points to
+** sqlite3BtreePrevious().
+**
** If P5 is positive and the jump is taken, then event counter
** number P5-1 in the prepared statement is incremented.
*/
case OP_Prev: /* jump */
case OP_Next: { /* jump */
VdbeCursor *pC;
- BtCursor *pCrsr;
int res;
CHECK_FOR_INTERRUPT;
@@ -4311,15 +4359,17 @@ case OP_Next: { /* jump */
if( pC==0 ){
break; /* See ticket #2273 */
}
- pCrsr = pC->pCursor;
- if( pCrsr==0 ){
- pC->nullRow = 1;
- break;
+ if( isSorter(pC) ){
+ assert( pOp->opcode==OP_Next );
+ rc = sqlite3VdbeSorterNext(db, pC, &res);
+ }else{
+ res = 1;
+ assert( pC->deferredMoveto==0 );
+ assert( pC->pCursor );
+ assert( pOp->opcode!=OP_Next || pOp->p4.xAdvance==sqlite3BtreeNext );
+ assert( pOp->opcode!=OP_Prev || pOp->p4.xAdvance==sqlite3BtreePrevious );
+ rc = pOp->p4.xAdvance(pC->pCursor, &res);
}
- res = 1;
- assert( pC->deferredMoveto==0 );
- rc = pOp->opcode==OP_Next ? sqlite3BtreeNext(pCrsr, &res) :
- sqlite3BtreePrevious(pCrsr, &res);
pC->nullRow = (u8)res;
pC->cacheStatus = CACHE_STALE;
if( res==0 ){
@@ -4363,10 +4413,13 @@ case OP_IdxInsert: { /* in2 */
if( rc==SQLITE_OK ){
nKey = pIn2->n;
zKey = pIn2->z;
- rc = sqlite3BtreeInsert(pCrsr, zKey, nKey, "", 0, 0, pOp->p3,
- ((pOp->p5 & OPFLAG_USESEEKRESULT) ? pC->seekResult : 0)
- );
- assert( pC->deferredMoveto==0 );
+ rc = sqlite3VdbeSorterWrite(db, pC, nKey);
+ if( rc==SQLITE_OK ){
+ rc = sqlite3BtreeInsert(pCrsr, zKey, nKey, "", 0, 0, pOp->p3,
+ ((pOp->p5 & OPFLAG_USESEEKRESULT) ? pC->seekResult : 0)
+ );
+ assert( pC->deferredMoveto==0 );
+ }
pC->cacheStatus = CACHE_STALE;
}
}
diff --git a/src/vdbe.h b/src/vdbe.h
index e66ee3024..972854851 100644
--- a/src/vdbe.h
+++ b/src/vdbe.h
@@ -61,6 +61,7 @@ struct VdbeOp {
KeyInfo *pKeyInfo; /* Used when p4type is P4_KEYINFO */
int *ai; /* Used when p4type is P4_INTARRAY */
SubProgram *pProgram; /* Used when p4type is P4_SUBPROGRAM */
+ int (*xAdvance)(BtCursor *, int *);
} p4;
#ifdef SQLITE_DEBUG
char *zComment; /* Comment to improve readability */
@@ -116,6 +117,7 @@ typedef struct VdbeOpList VdbeOpList;
#define P4_INT32 (-14) /* P4 is a 32-bit signed integer */
#define P4_INTARRAY (-15) /* P4 is a vector of 32-bit integers */
#define P4_SUBPROGRAM (-18) /* P4 is a pointer to a SubProgram structure */
+#define P4_ADVANCE (-19) /* P4 is a pointer to BtreeNext() or BtreePrev() */
/* When adding a P4 argument using P4_KEYINFO, a copy of the KeyInfo structure
** is made. That copy is freed when the Vdbe is finalized. But if the
@@ -173,9 +175,9 @@ int sqlite3VdbeAddOp4(Vdbe*,int,int,int,int,const char *zP4,int);
int sqlite3VdbeAddOp4Int(Vdbe*,int,int,int,int,int);
int sqlite3VdbeAddOpList(Vdbe*, int nOp, VdbeOpList const *aOp);
void sqlite3VdbeAddParseSchemaOp(Vdbe*,int,char*);
-void sqlite3VdbeChangeP1(Vdbe*, int addr, int P1);
-void sqlite3VdbeChangeP2(Vdbe*, int addr, int P2);
-void sqlite3VdbeChangeP3(Vdbe*, int addr, int P3);
+void sqlite3VdbeChangeP1(Vdbe*, u32 addr, int P1);
+void sqlite3VdbeChangeP2(Vdbe*, u32 addr, int P2);
+void sqlite3VdbeChangeP3(Vdbe*, u32 addr, int P3);
void sqlite3VdbeChangeP5(Vdbe*, u8 P5);
void sqlite3VdbeJumpHere(Vdbe*, int addr);
void sqlite3VdbeChangeToNoop(Vdbe*, int addr, int N);
diff --git a/src/vdbeInt.h b/src/vdbeInt.h
index 0aeb3af7a..846d80707 100644
--- a/src/vdbeInt.h
+++ b/src/vdbeInt.h
@@ -30,6 +30,9 @@ typedef struct VdbeOp Op;
*/
typedef unsigned char Bool;
+/* Opaque type used by code in vdbesort.c */
+typedef struct VdbeSorter VdbeSorter;
+
/*
** A cursor is a pointer into a single BTree within a database file.
** The cursor can seek to a BTree entry with a particular key, or
@@ -61,6 +64,7 @@ struct VdbeCursor {
i64 seqCount; /* Sequence counter */
i64 movetoTarget; /* Argument to the deferred sqlite3BtreeMoveto() */
i64 lastRowid; /* Last rowid from a Next or NextIdx operation */
+ VdbeSorter *pSorter; /* Sorter object for OP_OpenSorter cursors */
/* Result of last sqlite3BtreeMoveto() done by an OP_NotExists or
** OP_IsUnique opcode on this cursor. */
@@ -380,6 +384,9 @@ int sqlite3VdbeMemNumerify(Mem*);
int sqlite3VdbeMemFromBtree(BtCursor*,int,int,int,Mem*);
void sqlite3VdbeMemRelease(Mem *p);
void sqlite3VdbeMemReleaseExternal(Mem *p);
+#define MemReleaseExt(X) \
+ if((X)->flags&(MEM_Agg|MEM_Dyn|MEM_RowSet|MEM_Frame)) \
+ sqlite3VdbeMemReleaseExternal(X);
int sqlite3VdbeMemFinalize(Mem*, FuncDef*);
const char *sqlite3OpcodeName(int);
int sqlite3VdbeMemGrow(Mem *pMem, int n, int preserve);
@@ -388,6 +395,22 @@ void sqlite3VdbeFrameDelete(VdbeFrame*);
int sqlite3VdbeFrameRestore(VdbeFrame *);
void sqlite3VdbeMemStoreType(Mem *pMem);
+#ifdef SQLITE_OMIT_MERGE_SORT
+# define sqlite3VdbeSorterInit(Y,Z) SQLITE_OK
+# define sqlite3VdbeSorterWrite(X,Y,Z) SQLITE_OK
+# define sqlite3VdbeSorterClose(Y,Z)
+# define sqlite3VdbeSorterRowkey(Y,Z) SQLITE_OK
+# define sqlite3VdbeSorterRewind(X,Y,Z) SQLITE_OK
+# define sqlite3VdbeSorterNext(X,Y,Z) SQLITE_OK
+#else
+int sqlite3VdbeSorterInit(sqlite3 *, VdbeCursor *);
+int sqlite3VdbeSorterWrite(sqlite3 *, VdbeCursor *, int);
+void sqlite3VdbeSorterClose(sqlite3 *, VdbeCursor *);
+int sqlite3VdbeSorterRowkey(VdbeCursor *, Mem *);
+int sqlite3VdbeSorterRewind(sqlite3 *, VdbeCursor *, int *);
+int sqlite3VdbeSorterNext(sqlite3 *, VdbeCursor *, int *);
+#endif
+
#if !defined(SQLITE_OMIT_SHARED_CACHE) && SQLITE_THREADSAFE>0
void sqlite3VdbeEnter(Vdbe*);
void sqlite3VdbeLeave(Vdbe*);
diff --git a/src/vdbeaux.c b/src/vdbeaux.c
index 989a8003d..053d89f3b 100644
--- a/src/vdbeaux.c
+++ b/src/vdbeaux.c
@@ -433,6 +433,12 @@ static void resolveP2Values(Vdbe *p, int *pMaxFuncArgs){
n = pOp[-1].p1;
if( n>nMaxArgs ) nMaxArgs = n;
#endif
+ }else if( opcode==OP_Next ){
+ pOp->p4.xAdvance = sqlite3BtreeNext;
+ pOp->p4type = P4_ADVANCE;
+ }else if( opcode==OP_Prev ){
+ pOp->p4.xAdvance = sqlite3BtreePrevious;
+ pOp->p4type = P4_ADVANCE;
}
if( (pOp->opflags & OPFLG_JUMP)!=0 && pOp->p2<0 ){
@@ -524,10 +530,9 @@ int sqlite3VdbeAddOpList(Vdbe *p, int nOp, VdbeOpList const *aOp){
** static array using sqlite3VdbeAddOpList but we want to make a
** few minor changes to the program.
*/
-void sqlite3VdbeChangeP1(Vdbe *p, int addr, int val){
+void sqlite3VdbeChangeP1(Vdbe *p, u32 addr, int val){
assert( p!=0 );
- assert( addr>=0 );
- if( p->nOp>addr ){
+ if( ((u32)p->nOp)>addr ){
p->aOp[addr].p1 = val;
}
}
@@ -536,10 +541,9 @@ void sqlite3VdbeChangeP1(Vdbe *p, int addr, int val){
** Change the value of the P2 operand for a specific instruction.
** This routine is useful for setting a jump destination.
*/
-void sqlite3VdbeChangeP2(Vdbe *p, int addr, int val){
+void sqlite3VdbeChangeP2(Vdbe *p, u32 addr, int val){
assert( p!=0 );
- assert( addr>=0 );
- if( p->nOp>addr ){
+ if( ((u32)p->nOp)>addr ){
p->aOp[addr].p2 = val;
}
}
@@ -547,10 +551,9 @@ void sqlite3VdbeChangeP2(Vdbe *p, int addr, int val){
/*
** Change the value of the P3 operand for a specific instruction.
*/
-void sqlite3VdbeChangeP3(Vdbe *p, int addr, int val){
+void sqlite3VdbeChangeP3(Vdbe *p, u32 addr, int val){
assert( p!=0 );
- assert( addr>=0 );
- if( p->nOp>addr ){
+ if( ((u32)p->nOp)>addr ){
p->aOp[addr].p3 = val;
}
}
@@ -942,6 +945,10 @@ static char *displayP4(Op *pOp, char *zTemp, int nTemp){
sqlite3_snprintf(nTemp, zTemp, "program");
break;
}
+ case P4_ADVANCE: {
+ zTemp[0] = 0;
+ break;
+ }
default: {
zP4 = pOp->p4.z;
if( zP4==0 ){
@@ -1565,6 +1572,7 @@ void sqlite3VdbeFreeCursor(Vdbe *p, VdbeCursor *pCx){
if( pCx==0 ){
return;
}
+ sqlite3VdbeSorterClose(p->db, pCx);
if( pCx->pBt ){
sqlite3BtreeClose(pCx->pBt);
/* The pCx->pCursor will be close automatically, if it exists, by
diff --git a/src/vdbemem.c b/src/vdbemem.c
index 882c68633..d51257282 100644
--- a/src/vdbemem.c
+++ b/src/vdbemem.c
@@ -271,24 +271,18 @@ int sqlite3VdbeMemFinalize(Mem *pMem, FuncDef *pFunc){
*/
void sqlite3VdbeMemReleaseExternal(Mem *p){
assert( p->db==0 || sqlite3_mutex_held(p->db->mutex) );
- testcase( p->flags & MEM_Agg );
- testcase( p->flags & MEM_Dyn );
- testcase( p->flags & MEM_RowSet );
- testcase( p->flags & MEM_Frame );
- if( p->flags&(MEM_Agg|MEM_Dyn|MEM_RowSet|MEM_Frame) ){
- if( p->flags&MEM_Agg ){
- sqlite3VdbeMemFinalize(p, p->u.pDef);
- assert( (p->flags & MEM_Agg)==0 );
- sqlite3VdbeMemRelease(p);
- }else if( p->flags&MEM_Dyn && p->xDel ){
- assert( (p->flags&MEM_RowSet)==0 );
- p->xDel((void *)p->z);
- p->xDel = 0;
- }else if( p->flags&MEM_RowSet ){
- sqlite3RowSetClear(p->u.pRowSet);
- }else if( p->flags&MEM_Frame ){
- sqlite3VdbeMemSetNull(p);
- }
+ if( p->flags&MEM_Agg ){
+ sqlite3VdbeMemFinalize(p, p->u.pDef);
+ assert( (p->flags & MEM_Agg)==0 );
+ sqlite3VdbeMemRelease(p);
+ }else if( p->flags&MEM_Dyn && p->xDel ){
+ assert( (p->flags&MEM_RowSet)==0 );
+ p->xDel((void *)p->z);
+ p->xDel = 0;
+ }else if( p->flags&MEM_RowSet ){
+ sqlite3RowSetClear(p->u.pRowSet);
+ }else if( p->flags&MEM_Frame ){
+ sqlite3VdbeMemSetNull(p);
}
}
@@ -298,7 +292,7 @@ void sqlite3VdbeMemReleaseExternal(Mem *p){
** (Mem.type==SQLITE_TEXT).
*/
void sqlite3VdbeMemRelease(Mem *p){
- sqlite3VdbeMemReleaseExternal(p);
+ MemReleaseExt(p);
sqlite3DbFree(p->db, p->zMalloc);
p->z = 0;
p->zMalloc = 0;
@@ -620,7 +614,7 @@ void sqlite3VdbeMemPrepareToChange(Vdbe *pVdbe, Mem *pMem){
*/
void sqlite3VdbeMemShallowCopy(Mem *pTo, const Mem *pFrom, int srcType){
assert( (pFrom->flags & MEM_RowSet)==0 );
- sqlite3VdbeMemReleaseExternal(pTo);
+ MemReleaseExt(pTo);
memcpy(pTo, pFrom, MEMCELLSIZE);
pTo->xDel = 0;
if( (pFrom->flags&MEM_Static)==0 ){
@@ -638,7 +632,7 @@ int sqlite3VdbeMemCopy(Mem *pTo, const Mem *pFrom){
int rc = SQLITE_OK;
assert( (pFrom->flags & MEM_RowSet)==0 );
- sqlite3VdbeMemReleaseExternal(pTo);
+ MemReleaseExt(pTo);
memcpy(pTo, pFrom, MEMCELLSIZE);
pTo->flags &= ~MEM_Dyn;
diff --git a/src/vdbesort.c b/src/vdbesort.c
new file mode 100644
index 000000000..be99d397d
--- /dev/null
+++ b/src/vdbesort.c
@@ -0,0 +1,711 @@
+/*
+** 2011 July 9
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains code for the VdbeSorter object, used in concert with
+** a VdbeCursor to sort large numbers of keys (as may be required, for
+** example, by CREATE INDEX statements on tables too large to fit in main
+** memory).
+*/
+
+#include "sqliteInt.h"
+#include "vdbeInt.h"
+
+#ifndef SQLITE_OMIT_MERGE_SORT
+
+typedef struct VdbeSorterIter VdbeSorterIter;
+
+/*
+** NOTES ON DATA STRUCTURE USED FOR N-WAY MERGES:
+**
+** As keys are added to the sorter, they are written to disk in a series
+** of sorted packed-memory-arrays (PMAs). The size of each PMA is roughly
+** the same as the cache-size allowed for temporary databases. In order
+** to allow the caller to extract keys from the sorter in sorted order,
+** all PMAs currently stored on disk must be merged together. This comment
+** describes the data structure used to do so. The structure supports
+** merging any number of arrays in a single pass with no redundant comparison
+** operations.
+**
+** The aIter[] array contains an iterator for each of the PMAs being merged.
+** An aIter[] iterator either points to a valid key or else is at EOF. For
+** the purposes of the paragraphs below, we assume that the array is actually
+** N elements in size, where N is the smallest power of 2 greater to or equal
+** to the number of iterators being merged. The extra aIter[] elements are
+** treated as if they are empty (always at EOF).
+**
+** The aTree[] array is also N elements in size. The value of N is stored in
+** the VdbeSorter.nTree variable.
+**
+** The final (N/2) elements of aTree[] contain the results of comparing
+** pairs of iterator keys together. Element i contains the result of
+** comparing aIter[2*i-N] and aIter[2*i-N+1]. Whichever key is smaller, the
+** aTree element is set to the index of it.
+**
+** For the purposes of this comparison, EOF is considered greater than any
+** other key value. If the keys are equal (only possible with two EOF
+** values), it doesn't matter which index is stored.
+**
+** The (N/4) elements of aTree[] that preceed the final (N/2) described
+** above contains the index of the smallest of each block of 4 iterators.
+** And so on. So that aTree[1] contains the index of the iterator that
+** currently points to the smallest key value. aTree[0] is unused.
+**
+** Example:
+**
+** aIter[0] -> Banana
+** aIter[1] -> Feijoa
+** aIter[2] -> Elderberry
+** aIter[3] -> Currant
+** aIter[4] -> Grapefruit
+** aIter[5] -> Apple
+** aIter[6] -> Durian
+** aIter[7] -> EOF
+**
+** aTree[] = { X, 5 0, 5 0, 3, 5, 6 }
+**
+** The current element is "Apple" (the value of the key indicated by
+** iterator 5). When the Next() operation is invoked, iterator 5 will
+** be advanced to the next key in its segment. Say the next key is
+** "Eggplant":
+**
+** aIter[5] -> Eggplant
+**
+** The contents of aTree[] are updated first by comparing the new iterator
+** 5 key to the current key of iterator 4 (still "Grapefruit"). The iterator
+** 5 value is still smaller, so aTree[6] is set to 5. And so on up the tree.
+** The value of iterator 6 - "Durian" - is now smaller than that of iterator
+** 5, so aTree[3] is set to 6. Key 0 is smaller than key 6 (Banana<Durian),
+** so the value written into element 1 of the array is 0. As follows:
+**
+** aTree[] = { X, 0 0, 6 0, 3, 5, 6 }
+**
+** In other words, each time we advance to the next sorter element, log2(N)
+** key comparison operations are required, where N is the number of segments
+** being merged (rounded up to the next power of 2).
+*/
+struct VdbeSorter {
+ int nWorking; /* Start a new b-tree after this many pages */
+ int nBtree; /* Current size of b-tree contents as PMA */
+ int nTree; /* Used size of aTree/aIter (power of 2) */
+ VdbeSorterIter *aIter; /* Array of iterators to merge */
+ int *aTree; /* Current state of incremental merge */
+ i64 iWriteOff; /* Current write offset within file pTemp1 */
+ i64 iReadOff; /* Current read offset within file pTemp1 */
+ sqlite3_file *pTemp1; /* PMA file 1 */
+ int nPMA; /* Number of PMAs stored in pTemp1 */
+};
+
+/*
+** The following type is an iterator for a PMA. It caches the current key in
+** variables nKey/aKey. If the iterator is at EOF, pFile==0.
+*/
+struct VdbeSorterIter {
+ i64 iReadOff; /* Current read offset */
+ i64 iEof; /* 1 byte past EOF for this iterator */
+ sqlite3_file *pFile; /* File iterator is reading from */
+ int nAlloc; /* Bytes of space at aAlloc */
+ u8 *aAlloc; /* Allocated space */
+ int nKey; /* Number of bytes in key */
+ u8 *aKey; /* Pointer to current key */
+};
+
+/* Minimum allowable value for the VdbeSorter.nWorking variable */
+#define SORTER_MIN_WORKING 10
+
+/* Maximum number of segments to merge in a single pass. */
+#define SORTER_MAX_MERGE_COUNT 16
+
+/*
+** Free all memory belonging to the VdbeSorterIter object passed as the second
+** argument. All structure fields are set to zero before returning.
+*/
+static void vdbeSorterIterZero(sqlite3 *db, VdbeSorterIter *pIter){
+ sqlite3DbFree(db, pIter->aAlloc);
+ memset(pIter, 0, sizeof(VdbeSorterIter));
+}
+
+/*
+** Advance iterator pIter to the next key in its PMA. Return SQLITE_OK if
+** no error occurs, or an SQLite error code if one does.
+*/
+static int vdbeSorterIterNext(
+ sqlite3 *db, /* Database handle (for sqlite3DbMalloc() ) */
+ VdbeSorterIter *pIter /* Iterator to advance */
+){
+ int rc; /* Return Code */
+ int nRead; /* Number of bytes read */
+ int nRec; /* Size of record in bytes */
+ int iOff; /* Size of serialized size varint in bytes */
+
+ nRead = pIter->iEof - pIter->iReadOff;
+ if( nRead>5 ) nRead = 5;
+ if( nRead<=0 ){
+ /* This is an EOF condition */
+ vdbeSorterIterZero(db, pIter);
+ return SQLITE_OK;
+ }
+
+ rc = sqlite3OsRead(pIter->pFile, pIter->aAlloc, nRead, pIter->iReadOff);
+ iOff = getVarint32(pIter->aAlloc, nRec);
+
+ if( rc==SQLITE_OK && (iOff+nRec)>nRead ){
+ int nRead2; /* Number of extra bytes to read */
+ if( (iOff+nRec)>pIter->nAlloc ){
+ int nNew = pIter->nAlloc*2;
+ while( (iOff+nRec)>nNew ) nNew = nNew*2;
+ pIter->aAlloc = sqlite3DbReallocOrFree(db, pIter->aAlloc, nNew);
+ if( !pIter->aAlloc ) return SQLITE_NOMEM;
+ pIter->nAlloc = nNew;
+ }
+
+ nRead2 = iOff + nRec - nRead;
+ rc = sqlite3OsRead(
+ pIter->pFile, &pIter->aAlloc[nRead], nRead2, pIter->iReadOff+nRead
+ );
+ }
+
+ assert( nRec>0 || rc!=SQLITE_OK );
+ pIter->iReadOff += iOff+nRec;
+ pIter->nKey = nRec;
+ pIter->aKey = &pIter->aAlloc[iOff];
+ return rc;
+}
+
+/*
+** Write a single varint, value iVal, to file-descriptor pFile. Return
+** SQLITE_OK if successful, or an SQLite error code if some error occurs.
+**
+** The value of *piOffset when this function is called is used as the byte
+** offset in file pFile to write to. Before returning, *piOffset is
+** incremented by the number of bytes written.
+*/
+static int vdbeSorterWriteVarint(
+ sqlite3_file *pFile, /* File to write to */
+ i64 iVal, /* Value to write as a varint */
+ i64 *piOffset /* IN/OUT: Write offset in file pFile */
+){
+ u8 aVarint[9]; /* Buffer large enough for a varint */
+ int nVarint; /* Number of used bytes in varint */
+ int rc; /* Result of write() call */
+
+ nVarint = sqlite3PutVarint(aVarint, iVal);
+ rc = sqlite3OsWrite(pFile, aVarint, nVarint, *piOffset);
+ *piOffset += nVarint;
+
+ return rc;
+}
+
+/*
+** Read a single varint from file-descriptor pFile. Return SQLITE_OK if
+** successful, or an SQLite error code if some error occurs.
+**
+** The value of *piOffset when this function is called is used as the
+** byte offset in file pFile from whence to read the varint. If successful
+** (i.e. if no IO error occurs), then *piOffset is set to the offset of
+** the first byte past the end of the varint before returning. *piVal is
+** set to the integer value read. If an error occurs, the final values of
+** both *piOffset and *piVal are undefined.
+*/
+static int vdbeSorterReadVarint(
+ sqlite3_file *pFile, /* File to read from */
+ i64 iEof, /* Total number of bytes in file */
+ i64 *piOffset, /* IN/OUT: Read offset in pFile */
+ i64 *piVal /* OUT: Value read from file */
+){
+ u8 aVarint[9]; /* Buffer large enough for a varint */
+ i64 iOff = *piOffset; /* Offset in file to read from */
+ int nRead = 9; /* Number of bytes to read from file */
+ int rc; /* Return code */
+
+ assert( iEof>iOff );
+ if( (iEof-iOff)<nRead ){
+ nRead = iEof-iOff;
+ }
+
+ rc = sqlite3OsRead(pFile, aVarint, nRead, iOff);
+ if( rc==SQLITE_OK ){
+ *piOffset += getVarint(aVarint, (u64 *)piVal);
+ }
+
+ return rc;
+}
+
+/*
+** Initialize iterator pIter to scan through the PMA stored in file pFile
+** starting at offset iStart and ending at offset iEof-1. This function
+** leaves the iterator pointing to the first key in the PMA (or EOF if the
+** PMA is empty).
+*/
+static int vdbeSorterIterInit(
+ sqlite3 *db, /* Database handle */
+ VdbeSorter *pSorter, /* Sorter object */
+ i64 iStart, /* Start offset in pFile */
+ VdbeSorterIter *pIter, /* Iterator to populate */
+ i64 *pnByte /* IN/OUT: Increment this value by PMA size */
+){
+ int rc;
+
+ assert( pSorter->iWriteOff>iStart );
+ assert( pIter->aAlloc==0 );
+ pIter->pFile = pSorter->pTemp1;
+ pIter->iReadOff = iStart;
+ pIter->nAlloc = 128;
+ pIter->aAlloc = (u8 *)sqlite3DbMallocRaw(db, pIter->nAlloc);
+ if( !pIter->aAlloc ){
+ rc = SQLITE_NOMEM;
+ }else{
+ i64 iEof = pSorter->iWriteOff; /* EOF of file pSorter->pTemp1 */
+ i64 nByte; /* Total size of PMA in bytes */
+ rc = vdbeSorterReadVarint(pSorter->pTemp1, iEof, &pIter->iReadOff, &nByte);
+ *pnByte += nByte;
+ pIter->iEof = pIter->iReadOff + nByte;
+ }
+ if( rc==SQLITE_OK ){
+ rc = vdbeSorterIterNext(db, pIter);
+ }
+ return rc;
+}
+
+/*
+** This function is called to compare two iterator keys when merging
+** multiple b-tree segments. Parameter iOut is the index of the aTree[]
+** value to recalculate.
+*/
+static int vdbeSorterDoCompare(VdbeCursor *pCsr, int iOut){
+ VdbeSorter *pSorter = pCsr->pSorter;
+ int i1;
+ int i2;
+ int iRes;
+ VdbeSorterIter *p1;
+ VdbeSorterIter *p2;
+
+ assert( iOut<pSorter->nTree && iOut>0 );
+
+ if( iOut>=(pSorter->nTree/2) ){
+ i1 = (iOut - pSorter->nTree/2) * 2;
+ i2 = i1 + 1;
+ }else{
+ i1 = pSorter->aTree[iOut*2];
+ i2 = pSorter->aTree[iOut*2+1];
+ }
+
+ p1 = &pSorter->aIter[i1];
+ p2 = &pSorter->aIter[i2];
+
+ if( p1->pFile==0 ){
+ iRes = i2;
+ }else if( p2->pFile==0 ){
+ iRes = i1;
+ }else{
+ char aSpace[150];
+ UnpackedRecord *r1;
+
+ r1 = sqlite3VdbeRecordUnpack(
+ pCsr->pKeyInfo, p1->nKey, p1->aKey, aSpace, sizeof(aSpace)
+ );
+ if( r1==0 ) return SQLITE_NOMEM;
+
+ if( sqlite3VdbeRecordCompare(p2->nKey, p2->aKey, r1)>=0 ){
+ iRes = i1;
+ }else{
+ iRes = i2;
+ }
+ sqlite3VdbeDeleteUnpackedRecord(r1);
+ }
+
+ pSorter->aTree[iOut] = iRes;
+ return SQLITE_OK;
+}
+
+/*
+** Initialize the temporary index cursor just opened as a sorter cursor.
+*/
+int sqlite3VdbeSorterInit(sqlite3 *db, VdbeCursor *pCsr){
+ assert( pCsr->pKeyInfo && pCsr->pBt );
+ pCsr->pSorter = sqlite3DbMallocZero(db, sizeof(VdbeSorter));
+ return (pCsr->pSorter ? SQLITE_OK : SQLITE_NOMEM);
+}
+
+/*
+** Free any cursor components allocated by sqlite3VdbeSorterXXX routines.
+*/
+void sqlite3VdbeSorterClose(sqlite3 *db, VdbeCursor *pCsr){
+ VdbeSorter *pSorter = pCsr->pSorter;
+ if( pSorter ){
+ if( pSorter->aIter ){
+ int i;
+ for(i=0; i<pSorter->nTree; i++){
+ vdbeSorterIterZero(db, &pSorter->aIter[i]);
+ }
+ sqlite3DbFree(db, pSorter->aIter);
+ }
+ if( pSorter->pTemp1 ){
+ sqlite3OsCloseFree(pSorter->pTemp1);
+ }
+ sqlite3DbFree(db, pSorter);
+ pCsr->pSorter = 0;
+ }
+}
+
+/*
+** Allocate space for a file-handle and open a temporary file. If successful,
+** set *ppFile to point to the malloc'd file-handle and return SQLITE_OK.
+** Otherwise, set *ppFile to 0 and return an SQLite error code.
+*/
+static int vdbeSorterOpenTempFile(sqlite3 *db, sqlite3_file **ppFile){
+ int dummy;
+ return sqlite3OsOpenMalloc(db->pVfs, 0, ppFile,
+ SQLITE_OPEN_TEMP_JOURNAL |
+ SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE |
+ SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE, &dummy
+ );
+}
+
+
+/*
+** Write the current contents of the b-tree to a PMA. Return SQLITE_OK
+** if successful, or an SQLite error code otherwise.
+**
+** The format of a PMA is:
+**
+** * A varint. This varint contains the total number of bytes of content
+** in the PMA (not including the varint itself).
+**
+** * One or more records packed end-to-end in order of ascending keys.
+** Each record consists of a varint followed by a blob of data (the
+** key). The varint is the number of bytes in the blob of data.
+*/
+static int vdbeSorterBtreeToPMA(sqlite3 *db, VdbeCursor *pCsr){
+ int rc = SQLITE_OK; /* Return code */
+ VdbeSorter *pSorter = pCsr->pSorter;
+ int res = 0;
+
+ /* sqlite3BtreeFirst() cannot fail because sorter btrees are always held
+ ** in memory and so an I/O error is not possible. */
+ rc = sqlite3BtreeFirst(pCsr->pCursor, &res);
+ if( NEVER(rc!=SQLITE_OK) || res ) return rc;
+ assert( pSorter->nBtree>0 );
+
+ /* If the first temporary PMA file has not been opened, open it now. */
+ if( pSorter->pTemp1==0 ){
+ rc = vdbeSorterOpenTempFile(db, &pSorter->pTemp1);
+ assert( rc!=SQLITE_OK || pSorter->pTemp1 );
+ assert( pSorter->iWriteOff==0 );
+ assert( pSorter->nPMA==0 );
+ }
+
+ if( rc==SQLITE_OK ){
+ i64 iWriteOff = pSorter->iWriteOff;
+ void *aMalloc = 0; /* Array used to hold a single record */
+ int nMalloc = 0; /* Allocated size of aMalloc[] in bytes */
+
+ pSorter->nPMA++;
+ for(
+ rc = vdbeSorterWriteVarint(pSorter->pTemp1, pSorter->nBtree, &iWriteOff);
+ rc==SQLITE_OK && res==0;
+ rc = sqlite3BtreeNext(pCsr->pCursor, &res)
+ ){
+ i64 nKey; /* Size of this key in bytes */
+
+ /* Write the size of the record in bytes to the output file */
+ (void)sqlite3BtreeKeySize(pCsr->pCursor, &nKey);
+ rc = vdbeSorterWriteVarint(pSorter->pTemp1, nKey, &iWriteOff);
+
+ /* Make sure the aMalloc[] buffer is large enough for the record */
+ if( rc==SQLITE_OK && nKey>nMalloc ){
+ aMalloc = sqlite3DbReallocOrFree(db, aMalloc, nKey);
+ if( !aMalloc ){
+ rc = SQLITE_NOMEM;
+ }else{
+ nMalloc = nKey;
+ }
+ }
+
+ /* Write the record itself to the output file */
+ if( rc==SQLITE_OK ){
+ /* sqlite3BtreeKey() cannot fail because sorter btrees held in memory */
+ rc = sqlite3BtreeKey(pCsr->pCursor, 0, nKey, aMalloc);
+ if( ALWAYS(rc==SQLITE_OK) ){
+ rc = sqlite3OsWrite(pSorter->pTemp1, aMalloc, nKey, iWriteOff);
+ iWriteOff += nKey;
+ }
+ }
+
+ if( rc!=SQLITE_OK ) break;
+ }
+
+ /* This assert verifies that unless an error has occurred, the size of
+ ** the PMA on disk is the same as the expected size stored in
+ ** pSorter->nBtree. */
+ assert( rc!=SQLITE_OK || pSorter->nBtree==(
+ iWriteOff-pSorter->iWriteOff-sqlite3VarintLen(pSorter->nBtree)
+ ));
+
+ pSorter->iWriteOff = iWriteOff;
+ sqlite3DbFree(db, aMalloc);
+ }
+
+ pSorter->nBtree = 0;
+ return rc;
+}
+
+/*
+** This function is called on a sorter cursor by the VDBE before each row
+** is inserted into VdbeCursor.pCsr. Argument nKey is the size of the key, in
+** bytes, about to be inserted.
+**
+** If it is determined that the temporary b-tree accessed via VdbeCursor.pCsr
+** is large enough, its contents are written to a sorted PMA on disk and the
+** tree emptied. This prevents the b-tree (which must be small enough to
+** fit entirely in the cache in order to support efficient inserts) from
+** growing too large.
+**
+** An SQLite error code is returned if an error occurs. Otherwise, SQLITE_OK.
+*/
+int sqlite3VdbeSorterWrite(sqlite3 *db, VdbeCursor *pCsr, int nKey){
+ int rc = SQLITE_OK; /* Return code */
+ VdbeSorter *pSorter = pCsr->pSorter;
+ if( pSorter ){
+ Pager *pPager = sqlite3BtreePager(pCsr->pBt);
+ int nPage; /* Current size of temporary file in pages */
+
+ /* Sorters never spill to disk */
+ assert( sqlite3PagerFile(pPager)->pMethods==0 );
+
+ /* Determine how many pages the temporary b-tree has grown to */
+ sqlite3PagerPagecount(pPager, &nPage);
+
+ /* If pSorter->nWorking is still zero, but the temporary file has been
+ ** created in the file-system, then the most recent insert into the
+ ** current b-tree segment probably caused the cache to overflow (it is
+ ** also possible that sqlite3_release_memory() was called). So set the
+ ** size of the working set to a little less than the current size of the
+ ** file in pages. */
+ if( pSorter->nWorking==0 && sqlite3PagerUnderStress(pPager) ){
+ pSorter->nWorking = nPage-5;
+ if( pSorter->nWorking<SORTER_MIN_WORKING ){
+ pSorter->nWorking = SORTER_MIN_WORKING;
+ }
+ }
+
+ /* If the number of pages used by the current b-tree segment is greater
+ ** than the size of the working set (VdbeSorter.nWorking), start a new
+ ** segment b-tree. */
+ if( pSorter->nWorking && nPage>=pSorter->nWorking ){
+ BtCursor *p = pCsr->pCursor;/* Cursor structure to close and reopen */
+ int iRoot; /* Root page of new tree */
+
+ /* Copy the current contents of the b-tree into a PMA in sorted order.
+ ** Close the currently open b-tree cursor. */
+ rc = vdbeSorterBtreeToPMA(db, pCsr);
+ sqlite3BtreeCloseCursor(p);
+
+ if( rc==SQLITE_OK ){
+ rc = sqlite3BtreeDropTable(pCsr->pBt, 2, 0);
+#ifdef SQLITE_DEBUG
+ sqlite3PagerPagecount(pPager, &nPage);
+ assert( rc!=SQLITE_OK || nPage==1 );
+#endif
+ }
+ if( rc==SQLITE_OK ){
+ rc = sqlite3BtreeCreateTable(pCsr->pBt, &iRoot, BTREE_BLOBKEY);
+ }
+ if( rc==SQLITE_OK ){
+ assert( iRoot==2 );
+ rc = sqlite3BtreeCursor(pCsr->pBt, iRoot, 1, pCsr->pKeyInfo, p);
+ }
+ }
+
+ pSorter->nBtree += sqlite3VarintLen(nKey) + nKey;
+ }
+ return rc;
+}
+
+/*
+** Helper function for sqlite3VdbeSorterRewind().
+*/
+static int vdbeSorterInitMerge(
+ sqlite3 *db, /* Database handle */
+ VdbeCursor *pCsr, /* Cursor handle for this sorter */
+ i64 *pnByte /* Sum of bytes in all opened PMAs */
+){
+ VdbeSorter *pSorter = pCsr->pSorter;
+ int rc = SQLITE_OK; /* Return code */
+ int i; /* Used to iterator through aIter[] */
+ i64 nByte = 0; /* Total bytes in all opened PMAs */
+
+ /* Initialize the iterators. */
+ for(i=0; rc==SQLITE_OK && i<SORTER_MAX_MERGE_COUNT; i++){
+ VdbeSorterIter *pIter = &pSorter->aIter[i];
+ rc = vdbeSorterIterInit(db, pSorter, pSorter->iReadOff, pIter, &nByte);
+ pSorter->iReadOff = pIter->iEof;
+ assert( pSorter->iReadOff<=pSorter->iWriteOff || rc!=SQLITE_OK );
+ if( pSorter->iReadOff>=pSorter->iWriteOff ) break;
+ }
+
+ /* Initialize the aTree[] array. */
+ for(i=pSorter->nTree-1; rc==SQLITE_OK && i>0; i--){
+ rc = vdbeSorterDoCompare(pCsr, i);
+ }
+
+ *pnByte = nByte;
+ return rc;
+}
+
+/*
+** Once the sorter has been populated, this function is called to prepare
+** for iterating through its contents in sorted order.
+*/
+int sqlite3VdbeSorterRewind(sqlite3 *db, VdbeCursor *pCsr, int *pbEof){
+ VdbeSorter *pSorter = pCsr->pSorter;
+ int rc; /* Return code */
+ sqlite3_file *pTemp2 = 0; /* Second temp file to use */
+ i64 iWrite2 = 0; /* Write offset for pTemp2 */
+ int nIter; /* Number of iterators used */
+ int nByte; /* Bytes of space required for aIter/aTree */
+ int N = 2; /* Power of 2 >= nIter */
+
+ assert( pSorter );
+
+ /* Write the current b-tree to a PMA. Close the b-tree cursor. */
+ rc = vdbeSorterBtreeToPMA(db, pCsr);
+ sqlite3BtreeCloseCursor(pCsr->pCursor);
+ if( rc!=SQLITE_OK ) return rc;
+ if( pSorter->nPMA==0 ){
+ *pbEof = 1;
+ return SQLITE_OK;
+ }
+
+ /* Allocate space for aIter[] and aTree[]. */
+ nIter = pSorter->nPMA;
+ if( nIter>SORTER_MAX_MERGE_COUNT ) nIter = SORTER_MAX_MERGE_COUNT;
+ assert( nIter>0 );
+ while( N<nIter ) N += N;
+ nByte = N * (sizeof(int) + sizeof(VdbeSorterIter));
+ pSorter->aIter = (VdbeSorterIter *)sqlite3DbMallocZero(db, nByte);
+ if( !pSorter->aIter ) return SQLITE_NOMEM;
+ pSorter->aTree = (int *)&pSorter->aIter[N];
+ pSorter->nTree = N;
+
+ do {
+ int iNew; /* Index of new, merged, PMA */
+
+ for(iNew=0;
+ rc==SQLITE_OK && iNew*SORTER_MAX_MERGE_COUNT<pSorter->nPMA;
+ iNew++
+ ){
+ i64 nWrite; /* Number of bytes in new PMA */
+
+ /* If there are SORTER_MAX_MERGE_COUNT or less PMAs in file pTemp1,
+ ** initialize an iterator for each of them and break out of the loop.
+ ** These iterators will be incrementally merged as the VDBE layer calls
+ ** sqlite3VdbeSorterNext().
+ **
+ ** Otherwise, if pTemp1 contains more than SORTER_MAX_MERGE_COUNT PMAs,
+ ** initialize interators for SORTER_MAX_MERGE_COUNT of them. These PMAs
+ ** are merged into a single PMA that is written to file pTemp2.
+ */
+ rc = vdbeSorterInitMerge(db, pCsr, &nWrite);
+ assert( rc!=SQLITE_OK || pSorter->aIter[ pSorter->aTree[1] ].pFile );
+ if( rc!=SQLITE_OK || pSorter->nPMA<=SORTER_MAX_MERGE_COUNT ){
+ break;
+ }
+
+ /* Open the second temp file, if it is not already open. */
+ if( pTemp2==0 ){
+ assert( iWrite2==0 );
+ rc = vdbeSorterOpenTempFile(db, &pTemp2);
+ }
+
+ if( rc==SQLITE_OK ){
+ rc = vdbeSorterWriteVarint(pTemp2, nWrite, &iWrite2);
+ }
+
+ if( rc==SQLITE_OK ){
+ int bEof = 0;
+ while( rc==SQLITE_OK && bEof==0 ){
+ int nToWrite;
+ VdbeSorterIter *pIter = &pSorter->aIter[ pSorter->aTree[1] ];
+ assert( pIter->pFile );
+ nToWrite = pIter->nKey + sqlite3VarintLen(pIter->nKey);
+ rc = sqlite3OsWrite(pTemp2, pIter->aAlloc, nToWrite, iWrite2);
+ iWrite2 += nToWrite;
+ if( rc==SQLITE_OK ){
+ rc = sqlite3VdbeSorterNext(db, pCsr, &bEof);
+ }
+ }
+ }
+ }
+
+ if( pSorter->nPMA<=SORTER_MAX_MERGE_COUNT ){
+ break;
+ }else{
+ sqlite3_file *pTmp = pSorter->pTemp1;
+ pSorter->nPMA = iNew;
+ pSorter->pTemp1 = pTemp2;
+ pTemp2 = pTmp;
+ pSorter->iWriteOff = iWrite2;
+ pSorter->iReadOff = 0;
+ iWrite2 = 0;
+ }
+ }while( rc==SQLITE_OK );
+
+ if( pTemp2 ){
+ sqlite3OsCloseFree(pTemp2);
+ }
+ *pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0);
+ return rc;
+}
+
+/*
+** Advance to the next element in the sorter.
+*/
+int sqlite3VdbeSorterNext(sqlite3 *db, VdbeCursor *pCsr, int *pbEof){
+ VdbeSorter *pSorter = pCsr->pSorter;
+ int iPrev = pSorter->aTree[1]; /* Index of iterator to advance */
+ int i; /* Index of aTree[] to recalculate */
+ int rc; /* Return code */
+
+ rc = vdbeSorterIterNext(db, &pSorter->aIter[iPrev]);
+ for(i=(pSorter->nTree+iPrev)/2; rc==SQLITE_OK && i>0; i=i/2){
+ rc = vdbeSorterDoCompare(pCsr, i);
+ }
+
+ *pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0);
+ return rc;
+}
+
+/*
+** Copy the current sorter key into the memory cell pOut.
+*/
+int sqlite3VdbeSorterRowkey(VdbeCursor *pCsr, Mem *pOut){
+ VdbeSorter *pSorter = pCsr->pSorter;
+ VdbeSorterIter *pIter;
+
+ pIter = &pSorter->aIter[ pSorter->aTree[1] ];
+
+ /* Coverage testing note: As things are currently, this call will always
+ ** succeed. This is because the memory cell passed by the VDBE layer
+ ** happens to be the same one as was used to assemble the keys before they
+ ** were passed to the sorter - meaning it is always large enough for the
+ ** largest key. But this could change very easily, so we leave the call
+ ** to sqlite3VdbeMemGrow() in. */
+ if( NEVER(sqlite3VdbeMemGrow(pOut, pIter->nKey, 0)) ){
+ return SQLITE_NOMEM;
+ }
+ pOut->n = pIter->nKey;
+ MemSetTypeFlag(pOut, MEM_Blob);
+ memcpy(pOut->z, pIter->aKey, pIter->nKey);
+
+ return SQLITE_OK;
+}
+
+#endif /* #ifndef SQLITE_OMIT_MERGE_SORT */
diff --git a/src/wal.c b/src/wal.c
index b9a03dff2..3bc42ffb1 100644
--- a/src/wal.c
+++ b/src/wal.c
@@ -1804,13 +1804,15 @@ int sqlite3WalClose(
*/
rc = sqlite3OsLock(pWal->pDbFd, SQLITE_LOCK_EXCLUSIVE);
if( rc==SQLITE_OK ){
+ int bPersistWal = -1;
if( pWal->exclusiveMode==WAL_NORMAL_MODE ){
pWal->exclusiveMode = WAL_EXCLUSIVE_MODE;
}
rc = sqlite3WalCheckpoint(
pWal, SQLITE_CHECKPOINT_PASSIVE, 0, 0, sync_flags, nBuf, zBuf, 0, 0
);
- if( rc==SQLITE_OK ){
+ sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_PERSIST_WAL, &bPersistWal);
+ if( rc==SQLITE_OK && bPersistWal!=1 ){
isDelete = 1;
}
}
diff --git a/src/where.c b/src/where.c
index d31223286..21fb7f45f 100644
--- a/src/where.c
+++ b/src/where.c
@@ -2142,6 +2142,7 @@ static sqlite3_index_info *allocateIndexInfo(
testcase( pTerm->eOperator==WO_IN );
testcase( pTerm->eOperator==WO_ISNULL );
if( pTerm->eOperator & (WO_IN|WO_ISNULL) ) continue;
+ if( pTerm->wtFlags & TERM_VNULL ) continue;
nTerm++;
}
@@ -2192,6 +2193,7 @@ static sqlite3_index_info *allocateIndexInfo(
testcase( pTerm->eOperator==WO_IN );
testcase( pTerm->eOperator==WO_ISNULL );
if( pTerm->eOperator & (WO_IN|WO_ISNULL) ) continue;
+ if( pTerm->wtFlags & TERM_VNULL ) continue;
pIdxCons[j].iColumn = pTerm->u.leftColumn;
pIdxCons[j].iTermOffset = i;
pIdxCons[j].op = (u8)pTerm->eOperator;