diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backup.c | 180 | ||||
-rw-r--r-- | src/btree.c | 44 | ||||
-rw-r--r-- | src/btree.h | 1 | ||||
-rw-r--r-- | src/build.c | 82 | ||||
-rw-r--r-- | src/ctime.c | 6 | ||||
-rw-r--r-- | src/fkey.c | 17 | ||||
-rw-r--r-- | src/main.c | 10 | ||||
-rw-r--r-- | src/os_unix.c | 266 | ||||
-rw-r--r-- | src/os_win.c | 327 | ||||
-rw-r--r-- | src/pager.c | 30 | ||||
-rw-r--r-- | src/pager.h | 4 | ||||
-rw-r--r-- | src/pcache1.c | 257 | ||||
-rw-r--r-- | src/sqlite.h.in | 27 | ||||
-rw-r--r-- | src/sqliteInt.h | 20 | ||||
-rw-r--r-- | src/tclsqlite.c | 2 | ||||
-rw-r--r-- | src/test1.c | 64 | ||||
-rw-r--r-- | src/test6.c | 10 | ||||
-rw-r--r-- | src/test_config.c | 12 | ||||
-rw-r--r-- | src/test_malloc.c | 4 | ||||
-rw-r--r-- | src/test_multiplex.c | 9 | ||||
-rw-r--r-- | src/test_quota.c | 171 | ||||
-rw-r--r-- | src/test_rtree.c | 2 | ||||
-rw-r--r-- | src/test_syscall.c | 2 | ||||
-rw-r--r-- | src/test_thread.c | 2 | ||||
-rw-r--r-- | src/test_vfs.c | 21 | ||||
-rw-r--r-- | src/vdbe.c | 129 | ||||
-rw-r--r-- | src/vdbe.h | 8 | ||||
-rw-r--r-- | src/vdbeInt.h | 23 | ||||
-rw-r--r-- | src/vdbeaux.c | 26 | ||||
-rw-r--r-- | src/vdbemem.c | 36 | ||||
-rw-r--r-- | src/vdbesort.c | 711 | ||||
-rw-r--r-- | src/wal.c | 4 | ||||
-rw-r--r-- | src/where.c | 2 |
33 files changed, 2134 insertions, 375 deletions
diff --git a/src/backup.c b/src/backup.c index 4d7ae3183..70a782665 100644 --- a/src/backup.c +++ b/src/backup.c @@ -410,102 +410,106 @@ int sqlite3_backup_step(sqlite3_backup *p, int nPage){ ** the case where the source and destination databases have the ** same schema version. */ - if( rc==SQLITE_DONE - && (rc = sqlite3BtreeUpdateMeta(p->pDest,1,p->iDestSchema+1))==SQLITE_OK - ){ - int nDestTruncate; - - if( p->pDestDb ){ - sqlite3ResetInternalSchema(p->pDestDb, -1); - } - - /* Set nDestTruncate to the final number of pages in the destination - ** database. The complication here is that the destination page - ** size may be different to the source page size. - ** - ** If the source page size is smaller than the destination page size, - ** round up. In this case the call to sqlite3OsTruncate() below will - ** fix the size of the file. However it is important to call - ** sqlite3PagerTruncateImage() here so that any pages in the - ** destination file that lie beyond the nDestTruncate page mark are - ** journalled by PagerCommitPhaseOne() before they are destroyed - ** by the file truncation. - */ - assert( pgszSrc==sqlite3BtreeGetPageSize(p->pSrc) ); - assert( pgszDest==sqlite3BtreeGetPageSize(p->pDest) ); - if( pgszSrc<pgszDest ){ - int ratio = pgszDest/pgszSrc; - nDestTruncate = (nSrcPage+ratio-1)/ratio; - if( nDestTruncate==(int)PENDING_BYTE_PAGE(p->pDest->pBt) ){ - nDestTruncate--; + if( rc==SQLITE_DONE ){ + rc = sqlite3BtreeUpdateMeta(p->pDest,1,p->iDestSchema+1); + if( rc==SQLITE_OK ){ + if( p->pDestDb ){ + sqlite3ResetInternalSchema(p->pDestDb, -1); + } + if( destMode==PAGER_JOURNALMODE_WAL ){ + rc = sqlite3BtreeSetVersion(p->pDest, 2); } - }else{ - nDestTruncate = nSrcPage * (pgszSrc/pgszDest); } - sqlite3PagerTruncateImage(pDestPager, nDestTruncate); - - if( pgszSrc<pgszDest ){ - /* If the source page-size is smaller than the destination page-size, - ** two extra things may need to happen: - ** - ** * The destination may need to be truncated, and + if( rc==SQLITE_OK ){ + int nDestTruncate; + /* Set nDestTruncate to the final number of pages in the destination + ** database. The complication here is that the destination page + ** size may be different to the source page size. ** - ** * Data stored on the pages immediately following the - ** pending-byte page in the source database may need to be - ** copied into the destination database. + ** If the source page size is smaller than the destination page size, + ** round up. In this case the call to sqlite3OsTruncate() below will + ** fix the size of the file. However it is important to call + ** sqlite3PagerTruncateImage() here so that any pages in the + ** destination file that lie beyond the nDestTruncate page mark are + ** journalled by PagerCommitPhaseOne() before they are destroyed + ** by the file truncation. */ - const i64 iSize = (i64)pgszSrc * (i64)nSrcPage; - sqlite3_file * const pFile = sqlite3PagerFile(pDestPager); - i64 iOff; - i64 iEnd; - - assert( pFile ); - assert( (i64)nDestTruncate*(i64)pgszDest >= iSize || ( - nDestTruncate==(int)(PENDING_BYTE_PAGE(p->pDest->pBt)-1) - && iSize>=PENDING_BYTE && iSize<=PENDING_BYTE+pgszDest - )); - - /* This call ensures that all data required to recreate the original - ** database has been stored in the journal for pDestPager and the - ** journal synced to disk. So at this point we may safely modify - ** the database file in any way, knowing that if a power failure - ** occurs, the original database will be reconstructed from the - ** journal file. */ - rc = sqlite3PagerCommitPhaseOne(pDestPager, 0, 1); - - /* Write the extra pages and truncate the database file as required. */ - iEnd = MIN(PENDING_BYTE + pgszDest, iSize); - for( - iOff=PENDING_BYTE+pgszSrc; - rc==SQLITE_OK && iOff<iEnd; - iOff+=pgszSrc - ){ - PgHdr *pSrcPg = 0; - const Pgno iSrcPg = (Pgno)((iOff/pgszSrc)+1); - rc = sqlite3PagerGet(pSrcPager, iSrcPg, &pSrcPg); - if( rc==SQLITE_OK ){ - u8 *zData = sqlite3PagerGetData(pSrcPg); - rc = sqlite3OsWrite(pFile, zData, pgszSrc, iOff); + assert( pgszSrc==sqlite3BtreeGetPageSize(p->pSrc) ); + assert( pgszDest==sqlite3BtreeGetPageSize(p->pDest) ); + if( pgszSrc<pgszDest ){ + int ratio = pgszDest/pgszSrc; + nDestTruncate = (nSrcPage+ratio-1)/ratio; + if( nDestTruncate==(int)PENDING_BYTE_PAGE(p->pDest->pBt) ){ + nDestTruncate--; } - sqlite3PagerUnref(pSrcPg); - } - if( rc==SQLITE_OK ){ - rc = backupTruncateFile(pFile, iSize); + }else{ + nDestTruncate = nSrcPage * (pgszSrc/pgszDest); } + sqlite3PagerTruncateImage(pDestPager, nDestTruncate); + + if( pgszSrc<pgszDest ){ + /* If the source page-size is smaller than the destination page-size, + ** two extra things may need to happen: + ** + ** * The destination may need to be truncated, and + ** + ** * Data stored on the pages immediately following the + ** pending-byte page in the source database may need to be + ** copied into the destination database. + */ + const i64 iSize = (i64)pgszSrc * (i64)nSrcPage; + sqlite3_file * const pFile = sqlite3PagerFile(pDestPager); + i64 iOff; + i64 iEnd; + + assert( pFile ); + assert( (i64)nDestTruncate*(i64)pgszDest >= iSize || ( + nDestTruncate==(int)(PENDING_BYTE_PAGE(p->pDest->pBt)-1) + && iSize>=PENDING_BYTE && iSize<=PENDING_BYTE+pgszDest + )); + + /* This call ensures that all data required to recreate the original + ** database has been stored in the journal for pDestPager and the + ** journal synced to disk. So at this point we may safely modify + ** the database file in any way, knowing that if a power failure + ** occurs, the original database will be reconstructed from the + ** journal file. */ + rc = sqlite3PagerCommitPhaseOne(pDestPager, 0, 1); + + /* Write the extra pages and truncate the database file as required */ + iEnd = MIN(PENDING_BYTE + pgszDest, iSize); + for( + iOff=PENDING_BYTE+pgszSrc; + rc==SQLITE_OK && iOff<iEnd; + iOff+=pgszSrc + ){ + PgHdr *pSrcPg = 0; + const Pgno iSrcPg = (Pgno)((iOff/pgszSrc)+1); + rc = sqlite3PagerGet(pSrcPager, iSrcPg, &pSrcPg); + if( rc==SQLITE_OK ){ + u8 *zData = sqlite3PagerGetData(pSrcPg); + rc = sqlite3OsWrite(pFile, zData, pgszSrc, iOff); + } + sqlite3PagerUnref(pSrcPg); + } + if( rc==SQLITE_OK ){ + rc = backupTruncateFile(pFile, iSize); + } - /* Sync the database file to disk. */ - if( rc==SQLITE_OK ){ - rc = sqlite3PagerSync(pDestPager); + /* Sync the database file to disk. */ + if( rc==SQLITE_OK ){ + rc = sqlite3PagerSync(pDestPager); + } + }else{ + rc = sqlite3PagerCommitPhaseOne(pDestPager, 0, 0); + } + + /* Finish committing the transaction to the destination database. */ + if( SQLITE_OK==rc + && SQLITE_OK==(rc = sqlite3BtreeCommitPhaseTwo(p->pDest, 0)) + ){ + rc = SQLITE_DONE; } - }else{ - rc = sqlite3PagerCommitPhaseOne(pDestPager, 0, 0); - } - - /* Finish committing the transaction to the destination database. */ - if( SQLITE_OK==rc - && SQLITE_OK==(rc = sqlite3BtreeCommitPhaseTwo(p->pDest, 0)) - ){ - rc = SQLITE_DONE; } } diff --git a/src/btree.c b/src/btree.c index 3d7162dbb..d77fce4c8 100644 --- a/src/btree.c +++ b/src/btree.c @@ -1734,11 +1734,22 @@ int sqlite3BtreeOpen( /* A BTREE_SINGLE database is always a temporary and/or ephemeral */ assert( (flags & BTREE_SINGLE)==0 || isTempDb ); + /* The BTREE_SORTER flag is only used if SQLITE_OMIT_MERGE_SORT is undef */ +#ifdef SQLITE_OMIT_MERGE_SORT + assert( (flags & BTREE_SORTER)==0 ); +#endif + + /* BTREE_SORTER is always on a BTREE_SINGLE, BTREE_OMIT_JOURNAL */ + assert( (flags & BTREE_SORTER)==0 || + (flags & (BTREE_SINGLE|BTREE_OMIT_JOURNAL)) + ==(BTREE_SINGLE|BTREE_OMIT_JOURNAL) ); + if( db->flags & SQLITE_NoReadlock ){ flags |= BTREE_NO_READLOCK; } if( isMemdb ){ flags |= BTREE_MEMORY; + flags &= ~BTREE_SORTER; } if( (vfsFlags & SQLITE_OPEN_MAIN_DB)!=0 && (isMemdb || isTempDb) ){ vfsFlags = (vfsFlags & ~SQLITE_OPEN_MAIN_DB) | SQLITE_OPEN_TEMP_DB; @@ -3468,7 +3479,8 @@ static int btreeCursor( return SQLITE_READONLY; } if( iTable==1 && btreePagecount(pBt)==0 ){ - return SQLITE_EMPTY; + assert( wrFlag==0 ); + iTable = 0; } /* Now that no other errors can occur, finish filling in the BtCursor @@ -4222,6 +4234,9 @@ static int moveToRoot(BtCursor *pCur){ releasePage(pCur->apPage[i]); } pCur->iPage = 0; + }else if( pCur->pgnoRoot==0 ){ + pCur->eState = CURSOR_INVALID; + return SQLITE_OK; }else{ rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->apPage[0]); if( rc!=SQLITE_OK ){ @@ -4331,7 +4346,7 @@ int sqlite3BtreeFirst(BtCursor *pCur, int *pRes){ rc = moveToRoot(pCur); if( rc==SQLITE_OK ){ if( pCur->eState==CURSOR_INVALID ){ - assert( pCur->apPage[pCur->iPage]->nCell==0 ); + assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 ); *pRes = 1; }else{ assert( pCur->apPage[pCur->iPage]->nCell>0 ); @@ -4370,7 +4385,7 @@ int sqlite3BtreeLast(BtCursor *pCur, int *pRes){ rc = moveToRoot(pCur); if( rc==SQLITE_OK ){ if( CURSOR_INVALID==pCur->eState ){ - assert( pCur->apPage[pCur->iPage]->nCell==0 ); + assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 ); *pRes = 1; }else{ assert( pCur->eState==CURSOR_VALID ); @@ -4443,12 +4458,12 @@ int sqlite3BtreeMovetoUnpacked( if( rc ){ return rc; } - assert( pCur->apPage[pCur->iPage] ); - assert( pCur->apPage[pCur->iPage]->isInit ); - assert( pCur->apPage[pCur->iPage]->nCell>0 || pCur->eState==CURSOR_INVALID ); + assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage] ); + assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->isInit ); + assert( pCur->eState==CURSOR_INVALID || pCur->apPage[pCur->iPage]->nCell>0 ); if( pCur->eState==CURSOR_INVALID ){ *pRes = -1; - assert( pCur->apPage[pCur->iPage]->nCell==0 ); + assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 ); return SQLITE_OK; } assert( pCur->apPage[0]->intKey || pIdxKey ); @@ -7277,9 +7292,16 @@ static int btreeDropTable(Btree *p, Pgno iTable, int *piMoved){ return rc; } int sqlite3BtreeDropTable(Btree *p, int iTable, int *piMoved){ + BtShared *pBt = p->pBt; int rc; sqlite3BtreeEnter(p); - rc = btreeDropTable(p, iTable, piMoved); + if( (pBt->openFlags&BTREE_SINGLE) ){ + pBt->nPage = 0; + sqlite3PagerTruncateImage(pBt->pPager, 1); + rc = newDatabase(pBt); + }else{ + rc = btreeDropTable(p, iTable, piMoved); + } sqlite3BtreeLeave(p); return rc; } @@ -7358,6 +7380,11 @@ int sqlite3BtreeUpdateMeta(Btree *p, int idx, u32 iMeta){ int sqlite3BtreeCount(BtCursor *pCur, i64 *pnEntry){ i64 nEntry = 0; /* Value to return in *pnEntry */ int rc; /* Return code */ + + if( pCur->pgnoRoot==0 ){ + *pnEntry = 0; + return SQLITE_OK; + } rc = moveToRoot(pCur); /* Unless an error occurs, the following loop runs one iteration for each @@ -8142,7 +8169,6 @@ int sqlite3BtreeSetVersion(Btree *pBtree, int iVersion){ BtShared *pBt = pBtree->pBt; int rc; /* Return code */ - assert( pBtree->inTrans==TRANS_NONE ); assert( iVersion==1 || iVersion==2 ); /* If setting the version fields to 1, do not automatically open the diff --git a/src/btree.h b/src/btree.h index 9e3a73b3b..ce19826ad 100644 --- a/src/btree.h +++ b/src/btree.h @@ -61,6 +61,7 @@ int sqlite3BtreeOpen( #define BTREE_MEMORY 4 /* This is an in-memory DB */ #define BTREE_SINGLE 8 /* The file contains at most 1 b-tree */ #define BTREE_UNORDERED 16 /* Use of a hash implementation is OK */ +#define BTREE_SORTER 32 /* Used as accumulator in external merge sort */ int sqlite3BtreeClose(Btree*); int sqlite3BtreeSetCacheSize(Btree*,int); diff --git a/src/build.c b/src/build.c index 455b35b56..29fbf9271 100644 --- a/src/build.c +++ b/src/build.c @@ -1674,7 +1674,7 @@ void sqlite3CreateView( const char *z; Token sEnd; DbFixer sFix; - Token *pName; + Token *pName = 0; int iDb; sqlite3 *db = pParse->db; @@ -1981,6 +1981,29 @@ static void destroyTable(Parse *pParse, Table *pTab){ } /* +** Remove entries from the sqlite_stat1 and sqlite_stat2 tables +** after a DROP INDEX or DROP TABLE command. +*/ +static void sqlite3ClearStatTables( + Parse *pParse, /* The parsing context */ + int iDb, /* The database number */ + const char *zType, /* "idx" or "tbl" */ + const char *zName /* Name of index or table */ +){ + static const char *azStatTab[] = { "sqlite_stat1", "sqlite_stat2" }; + int i; + const char *zDbName = pParse->db->aDb[iDb].zName; + for(i=0; i<ArraySize(azStatTab); i++){ + if( sqlite3FindTable(pParse->db, azStatTab[i], zDbName) ){ + sqlite3NestedParse(pParse, + "DELETE FROM %Q.%s WHERE %s=%Q", + zDbName, azStatTab[i], zType, zName + ); + } + } +} + +/* ** This routine is called to do the work of a DROP TABLE statement. ** pName is the name of the table to be dropped. */ @@ -2119,14 +2142,7 @@ void sqlite3DropTable(Parse *pParse, SrcList *pName, int isView, int noErr){ sqlite3NestedParse(pParse, "DELETE FROM %Q.%s WHERE tbl_name=%Q and type!='trigger'", pDb->zName, SCHEMA_TABLE(iDb), pTab->zName); - - /* Drop any statistics from the sqlite_stat1 table, if it exists */ - if( sqlite3FindTable(db, "sqlite_stat1", db->aDb[iDb].zName) ){ - sqlite3NestedParse(pParse, - "DELETE FROM %Q.sqlite_stat1 WHERE tbl=%Q", pDb->zName, pTab->zName - ); - } - + sqlite3ClearStatTables(pParse, iDb, "tbl", pTab->zName); if( !isView && !IsVirtual(pTab) ){ destroyTable(pParse, pTab); } @@ -2308,6 +2324,7 @@ static void sqlite3RefillIndex(Parse *pParse, Index *pIndex, int memRootPage){ Table *pTab = pIndex->pTable; /* The table that is indexed */ int iTab = pParse->nTab++; /* Btree cursor used for pTab */ int iIdx = pParse->nTab++; /* Btree cursor used for pIndex */ + int iSorter = iTab; /* Cursor opened by OpenSorter (if in use) */ int addr1; /* Address of top of loop */ int tnum; /* Root page of index */ Vdbe *v; /* Generate code into this virtual machine */ @@ -2317,6 +2334,15 @@ static void sqlite3RefillIndex(Parse *pParse, Index *pIndex, int memRootPage){ sqlite3 *db = pParse->db; /* The database connection */ int iDb = sqlite3SchemaToIndex(db, pIndex->pSchema); + /* Set bUseSorter to use OP_OpenSorter, or clear it to insert directly + ** into the index. The sorter is used unless either OMIT_MERGE_SORT is + ** defined or the system is configured to store temp files in-memory. */ +#ifdef SQLITE_OMIT_MERGE_SORT + static const int bUseSorter = 0; +#else + const int bUseSorter = !sqlite3TempInMemory(pParse->db); +#endif + #ifndef SQLITE_OMIT_AUTHORIZATION if( sqlite3AuthCheck(pParse, SQLITE_REINDEX, pIndex->zName, 0, db->aDb[iDb].zName ) ){ @@ -2341,10 +2367,29 @@ static void sqlite3RefillIndex(Parse *pParse, Index *pIndex, int memRootPage){ if( memRootPage>=0 ){ sqlite3VdbeChangeP5(v, 1); } + + /* Open the sorter cursor if we are to use one. */ + if( bUseSorter ){ + iSorter = pParse->nTab++; + sqlite3VdbeAddOp4(v, OP_OpenSorter, iSorter, 0, 0, (char*)pKey, P4_KEYINFO); + sqlite3VdbeChangeP5(v, BTREE_SORTER); + } + + /* Open the table. Loop through all rows of the table, inserting index + ** records into the sorter. */ sqlite3OpenTable(pParse, iTab, iDb, pTab, OP_OpenRead); addr1 = sqlite3VdbeAddOp2(v, OP_Rewind, iTab, 0); regRecord = sqlite3GetTempReg(pParse); regIdxKey = sqlite3GenerateIndexKey(pParse, pIndex, iTab, regRecord, 1); + + if( bUseSorter ){ + sqlite3VdbeAddOp2(v, OP_IdxInsert, iSorter, regRecord); + sqlite3VdbeAddOp2(v, OP_Next, iTab, addr1+1); + sqlite3VdbeJumpHere(v, addr1); + addr1 = sqlite3VdbeAddOp2(v, OP_Sort, iSorter, 0); + sqlite3VdbeAddOp2(v, OP_RowKey, iSorter, regRecord); + } + if( pIndex->onError!=OE_None ){ const int regRowid = regIdxKey + pIndex->nColumn; const int j2 = sqlite3VdbeCurrentAddr(v) + 2; @@ -2363,13 +2408,15 @@ static void sqlite3RefillIndex(Parse *pParse, Index *pIndex, int memRootPage){ sqlite3HaltConstraint( pParse, OE_Abort, "indexed columns are not unique", P4_STATIC); } - sqlite3VdbeAddOp2(v, OP_IdxInsert, iIdx, regRecord); + sqlite3VdbeAddOp3(v, OP_IdxInsert, iIdx, regRecord, bUseSorter); sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT); sqlite3ReleaseTempReg(pParse, regRecord); - sqlite3VdbeAddOp2(v, OP_Next, iTab, addr1+1); + sqlite3VdbeAddOp2(v, OP_Next, iSorter, addr1+1); sqlite3VdbeJumpHere(v, addr1); + sqlite3VdbeAddOp1(v, OP_Close, iTab); sqlite3VdbeAddOp1(v, OP_Close, iIdx); + sqlite3VdbeAddOp1(v, OP_Close, iSorter); } /* @@ -2949,15 +2996,9 @@ void sqlite3DropIndex(Parse *pParse, SrcList *pName, int ifExists){ sqlite3BeginWriteOperation(pParse, 1, iDb); sqlite3NestedParse(pParse, "DELETE FROM %Q.%s WHERE name=%Q AND type='index'", - db->aDb[iDb].zName, SCHEMA_TABLE(iDb), - pIndex->zName + db->aDb[iDb].zName, SCHEMA_TABLE(iDb), pIndex->zName ); - if( sqlite3FindTable(db, "sqlite_stat1", db->aDb[iDb].zName) ){ - sqlite3NestedParse(pParse, - "DELETE FROM %Q.sqlite_stat1 WHERE idx=%Q", - db->aDb[iDb].zName, pIndex->zName - ); - } + sqlite3ClearStatTables(pParse, iDb, "idx", pIndex->zName); sqlite3ChangeCookie(pParse, iDb); destroyRootPage(pParse, pIndex->tnum, iDb); sqlite3VdbeAddOp4(v, OP_DropIndex, iDb, 0, 0, pIndex->zName, 0); @@ -3329,8 +3370,9 @@ void sqlite3SrcListIndexedBy(Parse *pParse, SrcList *p, Token *pIndexedBy){ ** operator with A. This routine shifts that operator over to B. */ void sqlite3SrcListShiftJoinType(SrcList *p){ - if( p && p->a ){ + if( p ){ int i; + assert( p->a || p->nSrc==0 ); for(i=p->nSrc-1; i>0; i--){ p->a[i].jointype = p->a[i-1].jointype; } diff --git a/src/ctime.c b/src/ctime.c index a128f61a6..77174d0da 100644 --- a/src/ctime.c +++ b/src/ctime.c @@ -257,6 +257,9 @@ static const char * const azCompileOpt[] = { #ifdef SQLITE_OMIT_MEMORYDB "OMIT_MEMORYDB", #endif +#ifdef SQLITE_OMIT_MERGE_SORT + "OMIT_MERGE_SORT", +#endif #ifdef SQLITE_OMIT_OR_OPTIMIZATION "OMIT_OR_OPTIMIZATION", #endif @@ -323,6 +326,9 @@ static const char * const azCompileOpt[] = { #ifdef SQLITE_OMIT_XFER_OPT "OMIT_XFER_OPT", #endif +#ifdef SQLITE_PAGECACHE_BLOCKALLOC + "PAGECACHE_BLOCKALLOC", +#endif #ifdef SQLITE_PERFORMANCE_TRACE "PERFORMANCE_TRACE", #endif diff --git a/src/fkey.c b/src/fkey.c index 37d4744dd..f0a9fb6ba 100644 --- a/src/fkey.c +++ b/src/fkey.c @@ -734,7 +734,24 @@ void sqlite3FkCheck( pTo = sqlite3LocateTable(pParse, 0, pFKey->zTo, zDb); } if( !pTo || locateFkeyIndex(pParse, pTo, pFKey, &pIdx, &aiFree) ){ + assert( isIgnoreErrors==0 || (regOld!=0 && regNew==0) ); if( !isIgnoreErrors || db->mallocFailed ) return; + if( pTo==0 ){ + /* If isIgnoreErrors is true, then a table is being dropped. In this + ** case SQLite runs a "DELETE FROM xxx" on the table being dropped + ** before actually dropping it in order to check FK constraints. + ** If the parent table of an FK constraint on the current table is + ** missing, behave as if it is empty. i.e. decrement the relevant + ** FK counter for each row of the current table with non-NULL keys. + */ + Vdbe *v = sqlite3GetVdbe(pParse); + int iJump = sqlite3VdbeCurrentAddr(v) + pFKey->nCol + 1; + for(i=0; i<pFKey->nCol; i++){ + int iReg = pFKey->aCol[i].iFrom + regOld + 1; + sqlite3VdbeAddOp2(v, OP_IsNull, iReg, iJump); + } + sqlite3VdbeAddOp2(v, OP_FkCounter, pFKey->isDeferred, -1); + } continue; } assert( pFKey->nCol==1 || (aiFree && pIdx) ); diff --git a/src/main.c b/src/main.c index b23663045..efdb2076b 100644 --- a/src/main.c +++ b/src/main.c @@ -234,6 +234,16 @@ int sqlite3_initialize(void){ #endif #endif + /* Do extra initialization steps requested by the SQLITE_EXTRA_INIT + ** compile-time option. + */ +#ifdef SQLITE_EXTRA_INIT + if( rc==SQLITE_OK && sqlite3GlobalConfig.isInit ){ + int SQLITE_EXTRA_INIT(void); + rc = SQLITE_EXTRA_INIT(); + } +#endif + return rc; } diff --git a/src/os_unix.c b/src/os_unix.c index b2956c164..8abef8de8 100644 --- a/src/os_unix.c +++ b/src/os_unix.c @@ -208,7 +208,6 @@ struct unixFile { sqlite3_io_methods const *pMethod; /* Always the first entry */ unixInodeInfo *pInode; /* Info about locks on this inode */ int h; /* The file descriptor */ - int dirfd; /* File descriptor for the directory */ unsigned char eFileLock; /* The type of lock held on this fd */ unsigned char ctrlFlags; /* Behavioral bits. UNIXFILE_* flags */ int lastErrno; /* The unix errno from last I/O error */ @@ -250,8 +249,14 @@ struct unixFile { /* ** Allowed values for the unixFile.ctrlFlags bitmask: */ -#define UNIXFILE_EXCL 0x01 /* Connections from one process only */ -#define UNIXFILE_RDONLY 0x02 /* Connection is read only */ +#define UNIXFILE_EXCL 0x01 /* Connections from one process only */ +#define UNIXFILE_RDONLY 0x02 /* Connection is read only */ +#define UNIXFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */ +#ifndef SQLITE_DISABLE_DIRSYNC +# define UNIXFILE_DIRSYNC 0x08 /* Directory sync needed */ +#else +# define UNIXFILE_DIRSYNC 0x00 +#endif /* ** Include code that is common to all os_*.c files @@ -297,6 +302,9 @@ static int posixOpen(const char *zFile, int flags, int mode){ return open(zFile, flags, mode); } +/* Forward reference */ +static int openDirectory(const char*, int*); + /* ** Many system calls are accessed through pointer-to-functions so that ** they may be overridden at runtime to facilitate fault injection during @@ -393,6 +401,12 @@ static struct unix_syscall { #endif #define osFallocate ((int(*)(int,off_t,off_t))aSyscall[15].pCurrent) + { "unlink", (sqlite3_syscall_ptr)unlink, 0 }, +#define osUnlink ((int(*)(const char*))aSyscall[16].pCurrent) + + { "openDirectory", (sqlite3_syscall_ptr)openDirectory, 0 }, +#define osOpenDirectory ((int(*)(const char*,int*))aSyscall[17].pCurrent) + }; /* End of the overrideable system calls */ /* @@ -1749,10 +1763,6 @@ static int unixUnlock(sqlite3_file *id, int eFileLock){ */ static int closeUnixFile(sqlite3_file *id){ unixFile *pFile = (unixFile*)id; - if( pFile->dirfd>=0 ){ - robust_close(pFile, pFile->dirfd, __LINE__); - pFile->dirfd=-1; - } if( pFile->h>=0 ){ robust_close(pFile, pFile->h, __LINE__); pFile->h = -1; @@ -1760,7 +1770,7 @@ static int closeUnixFile(sqlite3_file *id){ #if OS_VXWORKS if( pFile->pId ){ if( pFile->isDelete ){ - unlink(pFile->pId->zCanonicalName); + osUnlink(pFile->pId->zCanonicalName); } vxworksReleaseFileId(pFile->pId); pFile->pId = 0; @@ -2009,7 +2019,7 @@ static int dotlockUnlock(sqlite3_file *id, int eFileLock) { /* To fully unlock the database, delete the lock file */ assert( eFileLock==NO_LOCK ); - if( unlink(zLockFile) ){ + if( osUnlink(zLockFile) ){ int rc = 0; int tErrno = errno; if( ENOENT != tErrno ){ @@ -2515,11 +2525,12 @@ static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){ int rc = SQLITE_OK; int reserved = 0; unixFile *pFile = (unixFile*)id; + afpLockingContext *context; SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); assert( pFile ); - afpLockingContext *context = (afpLockingContext *) pFile->lockingContext; + context = (afpLockingContext *) pFile->lockingContext; if( context->reserved ){ *pResOut = 1; return SQLITE_OK; @@ -2659,7 +2670,7 @@ static int afpLock(sqlite3_file *id, int eFileLock){ ** operating system calls for the specified lock. */ if( eFileLock==SHARED_LOCK ){ - int lrc1, lrc2, lrc1Errno; + int lrc1, lrc2, lrc1Errno = 0; long lk, mask; assert( pInode->nShared==0 ); @@ -3033,17 +3044,19 @@ static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){ #elif defined(USE_PREAD64) do{ got = osPwrite64(id->h, pBuf, cnt, offset);}while( got<0 && errno==EINTR); #else - newOffset = lseek(id->h, offset, SEEK_SET); - SimulateIOError( newOffset-- ); - if( newOffset!=offset ){ - if( newOffset == -1 ){ - ((unixFile*)id)->lastErrno = errno; - }else{ - ((unixFile*)id)->lastErrno = 0; + do{ + newOffset = lseek(id->h, offset, SEEK_SET); + SimulateIOError( newOffset-- ); + if( newOffset!=offset ){ + if( newOffset == -1 ){ + ((unixFile*)id)->lastErrno = errno; + }else{ + ((unixFile*)id)->lastErrno = 0; + } + return -1; } - return -1; - } - do{ got = osWrite(id->h, pBuf, cnt); }while( got<0 && errno==EINTR ); + got = osWrite(id->h, pBuf, cnt); + }while( got<0 && errno==EINTR ); #endif TIMER_END; if( got<0 ){ @@ -3246,6 +3259,50 @@ static int full_fsync(int fd, int fullSync, int dataOnly){ } /* +** Open a file descriptor to the directory containing file zFilename. +** If successful, *pFd is set to the opened file descriptor and +** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM +** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined +** value. +** +** The directory file descriptor is used for only one thing - to +** fsync() a directory to make sure file creation and deletion events +** are flushed to disk. Such fsyncs are not needed on newer +** journaling filesystems, but are required on older filesystems. +** +** This routine can be overridden using the xSetSysCall interface. +** The ability to override this routine was added in support of the +** chromium sandbox. Opening a directory is a security risk (we are +** told) so making it overrideable allows the chromium sandbox to +** replace this routine with a harmless no-op. To make this routine +** a no-op, replace it with a stub that returns SQLITE_OK but leaves +** *pFd set to a negative number. +** +** If SQLITE_OK is returned, the caller is responsible for closing +** the file descriptor *pFd using close(). +*/ +static int openDirectory(const char *zFilename, int *pFd){ + int ii; + int fd = -1; + char zDirname[MAX_PATHNAME+1]; + + sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename); + for(ii=(int)strlen(zDirname); ii>1 && zDirname[ii]!='/'; ii--); + if( ii>0 ){ + zDirname[ii] = '\0'; + fd = robust_open(zDirname, O_RDONLY|O_BINARY, 0); + if( fd>=0 ){ +#ifdef FD_CLOEXEC + osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC); +#endif + OSTRACE(("OPENDIR %-3d %s\n", fd, zDirname)); + } + } + *pFd = fd; + return (fd>=0?SQLITE_OK:unixLogError(SQLITE_CANTOPEN_BKPT, "open", zDirname)); +} + +/* ** Make sure all writes to a particular file are committed to disk. ** ** If dataOnly==0 then both the file itself and its metadata (file @@ -3285,28 +3342,23 @@ static int unixSync(sqlite3_file *id, int flags){ pFile->lastErrno = errno; return unixLogError(SQLITE_IOERR_FSYNC, "full_fsync", pFile->zPath); } - if( pFile->dirfd>=0 ){ - OSTRACE(("DIRSYNC %-3d (have_fullfsync=%d fullsync=%d)\n", pFile->dirfd, + + /* Also fsync the directory containing the file if the DIRSYNC flag + ** is set. This is a one-time occurrance. Many systems (examples: AIX) + ** are unable to fsync a directory, so ignore errors on the fsync. + */ + if( pFile->ctrlFlags & UNIXFILE_DIRSYNC ){ + int dirfd; + OSTRACE(("DIRSYNC %s (have_fullfsync=%d fullsync=%d)\n", pFile->zPath, HAVE_FULLFSYNC, isFullsync)); -#ifndef SQLITE_DISABLE_DIRSYNC - /* The directory sync is only attempted if full_fsync is - ** turned off or unavailable. If a full_fsync occurred above, - ** then the directory sync is superfluous. - */ - if( (!HAVE_FULLFSYNC || !isFullsync) && full_fsync(pFile->dirfd,0,0) ){ - /* - ** We have received multiple reports of fsync() returning - ** errors when applied to directories on certain file systems. - ** A failed directory sync is not a big deal. So it seems - ** better to ignore the error. Ticket #1657 - */ - /* pFile->lastErrno = errno; */ - /* return SQLITE_IOERR; */ + rc = osOpenDirectory(pFile->zPath, &dirfd); + if( rc==SQLITE_OK && dirfd>=0 ){ + full_fsync(dirfd, 0, 0); + robust_close(pFile, dirfd, __LINE__); + }else if( rc==SQLITE_CANTOPEN ){ + rc = SQLITE_OK; } -#endif - /* Only need to sync once, so close the directory when we are done */ - robust_close(pFile, pFile->dirfd, __LINE__); - pFile->dirfd = -1; + pFile->ctrlFlags &= ~UNIXFILE_DIRSYNC; } return rc; } @@ -3388,11 +3440,9 @@ static int proxyFileControl(sqlite3_file*,int,void*); /* ** This function is called to handle the SQLITE_FCNTL_SIZE_HINT -** file-control operation. -** -** If the user has configured a chunk-size for this file, it could be -** that the file needs to be extended at this point. Otherwise, the -** SQLITE_FCNTL_SIZE_HINT operation is a no-op for Unix. +** file-control operation. Enlarge the database to nBytes in size +** (rounded up to the next chunk-size). If the database is already +** nBytes or larger, this routine is a no-op. */ static int fcntlSizeHint(unixFile *pFile, i64 nByte){ if( pFile->szChunk ){ @@ -3444,21 +3494,37 @@ static int fcntlSizeHint(unixFile *pFile, i64 nByte){ ** Information and control of an open file handle. */ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ + unixFile *pFile = (unixFile*)id; switch( op ){ case SQLITE_FCNTL_LOCKSTATE: { - *(int*)pArg = ((unixFile*)id)->eFileLock; + *(int*)pArg = pFile->eFileLock; return SQLITE_OK; } case SQLITE_LAST_ERRNO: { - *(int*)pArg = ((unixFile*)id)->lastErrno; + *(int*)pArg = pFile->lastErrno; return SQLITE_OK; } case SQLITE_FCNTL_CHUNK_SIZE: { - ((unixFile*)id)->szChunk = *(int *)pArg; + pFile->szChunk = *(int *)pArg; return SQLITE_OK; } case SQLITE_FCNTL_SIZE_HINT: { - return fcntlSizeHint((unixFile *)id, *(i64 *)pArg); + int rc; + SimulateIOErrorBenign(1); + rc = fcntlSizeHint(pFile, *(i64 *)pArg); + SimulateIOErrorBenign(0); + return rc; + } + case SQLITE_FCNTL_PERSIST_WAL: { + int bPersist = *(int*)pArg; + if( bPersist<0 ){ + *(int*)pArg = (pFile->ctrlFlags & UNIXFILE_PERSIST_WAL)!=0; + }else if( bPersist==0 ){ + pFile->ctrlFlags &= ~UNIXFILE_PERSIST_WAL; + }else{ + pFile->ctrlFlags |= UNIXFILE_PERSIST_WAL; + } + return SQLITE_OK; } #ifndef NDEBUG /* The pager calls this method to signal that it has done @@ -4143,7 +4209,7 @@ static int unixShmUnmap( assert( pShmNode->nRef>0 ); pShmNode->nRef--; if( pShmNode->nRef==0 ){ - if( deleteFlag && pShmNode->h>=0 ) unlink(pShmNode->zFilename); + if( deleteFlag && pShmNode->h>=0 ) osUnlink(pShmNode->zFilename); unixShmPurge(pDbFd); } unixLeaveMutex(); @@ -4456,7 +4522,7 @@ typedef const sqlite3_io_methods *(*finder_type)(const char*,unixFile*); static int fillInUnixFile( sqlite3_vfs *pVfs, /* Pointer to vfs object */ int h, /* Open file descriptor of file being opened */ - int dirfd, /* Directory file descriptor */ + int syncDir, /* True to sync directory on first sync */ sqlite3_file *pId, /* Write to the unixFile structure here */ const char *zFilename, /* Name of the file being opened */ int noLock, /* Omit locking if true */ @@ -4487,7 +4553,6 @@ static int fillInUnixFile( OSTRACE(("OPEN %-3d %s\n", h, zFilename)); pNew->h = h; - pNew->dirfd = dirfd; pNew->zPath = zFilename; if( memcmp(pVfs->zName,"unix-excl",10)==0 ){ pNew->ctrlFlags = UNIXFILE_EXCL; @@ -4497,6 +4562,9 @@ static int fillInUnixFile( if( isReadOnly ){ pNew->ctrlFlags |= UNIXFILE_RDONLY; } + if( syncDir ){ + pNew->ctrlFlags |= UNIXFILE_DIRSYNC; + } #if OS_VXWORKS pNew->pId = vxworksFindFileId(zFilename); @@ -4623,13 +4691,12 @@ static int fillInUnixFile( if( rc!=SQLITE_OK ){ if( h>=0 ) robust_close(pNew, h, __LINE__); h = -1; - unlink(zFilename); + osUnlink(zFilename); isDelete = 0; } pNew->isDelete = isDelete; #endif if( rc!=SQLITE_OK ){ - if( dirfd>=0 ) robust_close(pNew, dirfd, __LINE__); if( h>=0 ) robust_close(pNew, h, __LINE__); }else{ pNew->pMethod = pLockingStyle; @@ -4639,37 +4706,6 @@ static int fillInUnixFile( } /* -** Open a file descriptor to the directory containing file zFilename. -** If successful, *pFd is set to the opened file descriptor and -** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM -** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined -** value. -** -** If SQLITE_OK is returned, the caller is responsible for closing -** the file descriptor *pFd using close(). -*/ -static int openDirectory(const char *zFilename, int *pFd){ - int ii; - int fd = -1; - char zDirname[MAX_PATHNAME+1]; - - sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename); - for(ii=(int)strlen(zDirname); ii>1 && zDirname[ii]!='/'; ii--); - if( ii>0 ){ - zDirname[ii] = '\0'; - fd = robust_open(zDirname, O_RDONLY|O_BINARY, 0); - if( fd>=0 ){ -#ifdef FD_CLOEXEC - osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC); -#endif - OSTRACE(("OPENDIR %-3d %s\n", fd, zDirname)); - } - } - *pFd = fd; - return (fd>=0?SQLITE_OK:unixLogError(SQLITE_CANTOPEN_BKPT, "open", zDirname)); -} - -/* ** Return the name of a directory in which to put temporary files. ** If no suitable temporary file directory can be found, return NULL. */ @@ -4783,7 +4819,7 @@ static UnixUnusedFd *findReusableFd(const char *zPath, int flags){ ** ** Even if a subsequent open() call does succeed, the consequences of ** not searching for a resusable file descriptor are not dire. */ - if( 0==stat(zPath, &sStat) ){ + if( 0==osStat(zPath, &sStat) ){ unixInodeInfo *pInode; unixEnterMutex(); @@ -4859,7 +4895,7 @@ static int findCreateFileMode( memcpy(zDb, zPath, nDb); zDb[nDb] = '\0'; - if( 0==stat(zDb, &sStat) ){ + if( 0==osStat(zDb, &sStat) ){ *pMode = sStat.st_mode & 0777; }else{ rc = SQLITE_IOERR_FSTAT; @@ -4901,7 +4937,6 @@ static int unixOpen( ){ unixFile *p = (unixFile *)pFile; int fd = -1; /* File descriptor returned by open() */ - int dirfd = -1; /* Directory file descriptor */ int openFlags = 0; /* Flags to pass to open() */ int eType = flags&0xFFFFFF00; /* Type of file to open */ int noLock; /* True to omit locking primitives */ @@ -4915,12 +4950,15 @@ static int unixOpen( #if SQLITE_ENABLE_LOCKING_STYLE int isAutoProxy = (flags & SQLITE_OPEN_AUTOPROXY); #endif +#if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE + struct statfs fsInfo; +#endif /* If creating a master or main-file journal, this function will open ** a file-descriptor on the directory too. The first time unixSync() ** is called the directory file descriptor will be fsync()ed and close()d. */ - int isOpenDirectory = (isCreate && ( + int syncDir = (isCreate && ( eType==SQLITE_OPEN_MASTER_JOURNAL || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_WAL @@ -4974,7 +5012,7 @@ static int unixOpen( p->pUnused = pUnused; }else if( !zName ){ /* If zName is NULL, the upper layer is requesting a temp file. */ - assert(isDelete && !isOpenDirectory); + assert(isDelete && !syncDir); rc = unixGetTempname(MAX_PATHNAME+1, zTmpname); if( rc!=SQLITE_OK ){ return rc; @@ -5030,7 +5068,7 @@ static int unixOpen( #if OS_VXWORKS zPath = zName; #else - unlink(zName); + osUnlink(zName); #endif } #if SQLITE_ENABLE_LOCKING_STYLE @@ -5039,19 +5077,6 @@ static int unixOpen( } #endif - if( isOpenDirectory ){ - rc = openDirectory(zPath, &dirfd); - if( rc!=SQLITE_OK ){ - /* It is safe to close fd at this point, because it is guaranteed not - ** to be open on a database file. If it were open on a database file, - ** it would not be safe to close as this would release any locks held - ** on the file by this process. */ - assert( eType!=SQLITE_OPEN_MAIN_DB ); - robust_close(p, fd, __LINE__); - goto open_finished; - } - } - #ifdef FD_CLOEXEC osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC); #endif @@ -5060,10 +5085,8 @@ static int unixOpen( #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE - struct statfs fsInfo; if( fstatfs(fd, &fsInfo) == -1 ){ ((unixFile*)pFile)->lastErrno = errno; - if( dirfd>=0 ) robust_close(p, dirfd, __LINE__); robust_close(p, fd, __LINE__); return SQLITE_IOERR_ACCESS; } @@ -5085,7 +5108,6 @@ static int unixOpen( if( envforce!=NULL ){ useProxy = atoi(envforce)>0; }else{ - struct statfs fsInfo; if( statfs(zPath, &fsInfo) == -1 ){ /* In theory, the close(fd) call is sub-optimal. If the file opened ** with fd is a database file, and there are other connections open @@ -5095,9 +5117,6 @@ static int unixOpen( ** not while other file descriptors opened by the same process on ** the same file are working. */ p->lastErrno = errno; - if( dirfd>=0 ){ - robust_close(p, dirfd, __LINE__); - } robust_close(p, fd, __LINE__); rc = SQLITE_IOERR_ACCESS; goto open_finished; @@ -5105,7 +5124,7 @@ static int unixOpen( useProxy = !(fsInfo.f_flags&MNT_LOCAL); } if( useProxy ){ - rc = fillInUnixFile(pVfs, fd, dirfd, pFile, zPath, noLock, + rc = fillInUnixFile(pVfs, fd, syncDir, pFile, zPath, noLock, isDelete, isReadonly); if( rc==SQLITE_OK ){ rc = proxyTransformUnixFile((unixFile*)pFile, ":auto:"); @@ -5123,7 +5142,7 @@ static int unixOpen( } #endif - rc = fillInUnixFile(pVfs, fd, dirfd, pFile, zPath, noLock, + rc = fillInUnixFile(pVfs, fd, syncDir, pFile, zPath, noLock, isDelete, isReadonly); open_finished: if( rc!=SQLITE_OK ){ @@ -5145,13 +5164,13 @@ static int unixDelete( int rc = SQLITE_OK; UNUSED_PARAMETER(NotUsed); SimulateIOError(return SQLITE_IOERR_DELETE); - if( unlink(zPath)==(-1) && errno!=ENOENT ){ + if( osUnlink(zPath)==(-1) && errno!=ENOENT ){ return unixLogError(SQLITE_IOERR_DELETE, "unlink", zPath); } #ifndef SQLITE_DISABLE_DIRSYNC if( dirSync ){ int fd; - rc = openDirectory(zPath, &fd); + rc = osOpenDirectory(zPath, &fd); if( rc==SQLITE_OK ){ #if OS_VXWORKS if( fsync(fd)==-1 ) @@ -5162,6 +5181,8 @@ static int unixDelete( rc = unixLogError(SQLITE_IOERR_DIR_FSYNC, "fsync", zPath); } robust_close(0, fd, __LINE__); + }else if( rc==SQLITE_CANTOPEN ){ + rc = SQLITE_OK; } } #endif @@ -5204,7 +5225,7 @@ static int unixAccess( *pResOut = (osAccess(zPath, amode)==0); if( flags==SQLITE_ACCESS_EXISTS && *pResOut ){ struct stat buf; - if( 0==stat(zPath, &buf) && buf.st_size==0 ){ + if( 0==osStat(zPath, &buf) && buf.st_size==0 ){ *pResOut = 0; } } @@ -5723,7 +5744,6 @@ static int proxyCreateUnixFile( int islockfile /* if non zero missing dirs will be created */ ) { int fd = -1; - int dirfd = -1; unixFile *pNew; int rc = SQLITE_OK; int openFlags = O_RDWR | O_CREAT; @@ -5788,7 +5808,7 @@ static int proxyCreateUnixFile( pUnused->flags = openFlags; pNew->pUnused = pUnused; - rc = fillInUnixFile(&dummyVfs, fd, dirfd, (sqlite3_file*)pNew, path, 0, 0, 0); + rc = fillInUnixFile(&dummyVfs, fd, 0, (sqlite3_file*)pNew, path, 0, 0, 0); if( rc==SQLITE_OK ){ *ppFile = pNew; return SQLITE_OK; @@ -5828,6 +5848,8 @@ static int proxyGetHostID(unsigned char *pHostID, int *pError){ return SQLITE_IOERR; } } +#else + UNUSED_PARAMETER(pError); #endif #ifdef SQLITE_TEST /* simulate multiple hosts by creating unique hostid file paths */ @@ -5902,7 +5924,7 @@ static int proxyBreakConchLock(unixFile *pFile, uuid_t myHostID){ end_breaklock: if( rc ){ if( fd>=0 ){ - unlink(tPath); + osUnlink(tPath); robust_close(pFile, fd, __LINE__); } fprintf(stderr, "failed to break stale lock on %s, %s\n", cPath, errmsg); @@ -5920,6 +5942,7 @@ static int proxyConchLock(unixFile *pFile, uuid_t myHostID, int lockType){ int nTries = 0; struct timespec conchModTime; + memset(&conchModTime, 0, sizeof(conchModTime)); do { rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType); nTries ++; @@ -6151,11 +6174,12 @@ static int proxyTakeConch(unixFile *pFile){ end_takeconch: OSTRACE(("TRANSPROXY: CLOSE %d\n", pFile->h)); if( rc==SQLITE_OK && pFile->openFlags ){ + int fd; if( pFile->h>=0 ){ robust_close(pFile, pFile->h, __LINE__); } pFile->h = -1; - int fd = robust_open(pCtx->dbPath, pFile->openFlags, + fd = robust_open(pCtx->dbPath, pFile->openFlags, SQLITE_DEFAULT_FILE_PERMISSIONS); OSTRACE(("TRANSPROXY: OPEN %d\n", fd)); if( fd>=0 ){ @@ -6725,7 +6749,7 @@ int sqlite3_os_init(void){ /* Double-check that the aSyscall[] array has been constructed ** correctly. See ticket [bb3a86e890c8e96ab] */ - assert( ArraySize(aSyscall)==16 ); + assert( ArraySize(aSyscall)==18 ); /* Register all VFSes defined in the aVfs[] array */ for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){ diff --git a/src/os_win.c b/src/os_win.c index 5d9bf5984..02a7a0c62 100644 --- a/src/os_win.c +++ b/src/os_win.c @@ -102,8 +102,9 @@ struct winFile { const sqlite3_io_methods *pMethod; /*** Must be first ***/ sqlite3_vfs *pVfs; /* The VFS used to open this file */ HANDLE h; /* Handle for accessing the file */ - unsigned char locktype; /* Type of lock currently held on this file */ + u8 locktype; /* Type of lock currently held on this file */ short sharedLockByte; /* Randomly chosen byte used as a shared lock */ + u8 bPersistWal; /* True to persist WAL files */ DWORD lastErrno; /* The Windows errno from the last I/O error */ DWORD sectorSize; /* Sector size of the device file is on */ winShm *pShm; /* Instance of shared memory on this file */ @@ -118,6 +119,76 @@ struct winFile { #endif }; +/* + * If compiled with SQLITE_WIN32_MALLOC on Windows, we will use the + * various Win32 API heap functions instead of our own. + */ +#ifdef SQLITE_WIN32_MALLOC +/* + * The initial size of the Win32-specific heap. This value may be zero. + */ +#ifndef SQLITE_WIN32_HEAP_INIT_SIZE +# define SQLITE_WIN32_HEAP_INIT_SIZE ((SQLITE_DEFAULT_CACHE_SIZE) * \ + (SQLITE_DEFAULT_PAGE_SIZE) + 4194304) +#endif + +/* + * The maximum size of the Win32-specific heap. This value may be zero. + */ +#ifndef SQLITE_WIN32_HEAP_MAX_SIZE +# define SQLITE_WIN32_HEAP_MAX_SIZE (0) +#endif + +/* + * The extra flags to use in calls to the Win32 heap APIs. This value may be + * zero for the default behavior. + */ +#ifndef SQLITE_WIN32_HEAP_FLAGS +# define SQLITE_WIN32_HEAP_FLAGS (0) +#endif + +/* +** The winMemData structure stores information required by the Win32-specific +** sqlite3_mem_methods implementation. +*/ +typedef struct winMemData winMemData; +struct winMemData { +#ifndef NDEBUG + u32 magic; /* Magic number to detect structure corruption. */ +#endif + HANDLE hHeap; /* The handle to our heap. */ + BOOL bOwned; /* Do we own the heap (i.e. destroy it on shutdown)? */ +}; + +#ifndef NDEBUG +#define WINMEM_MAGIC 0x42b2830b +#endif + +static struct winMemData win_mem_data = { +#ifndef NDEBUG + WINMEM_MAGIC, +#endif + NULL, FALSE +}; + +#ifndef NDEBUG +#define winMemAssertMagic() assert( win_mem_data.magic==WINMEM_MAGIC ) +#else +#define winMemAssertMagic() +#endif + +#define winMemGetHeap() win_mem_data.hHeap + +static void *winMemMalloc(int nBytes); +static void winMemFree(void *pPrior); +static void *winMemRealloc(void *pPrior, int nBytes); +static int winMemSize(void *p); +static int winMemRoundup(int n); +static int winMemInit(void *pAppData); +static void winMemShutdown(void *pAppData); + +const sqlite3_mem_methods *sqlite3MemGetWin32(void); +#endif /* SQLITE_WIN32_MALLOC */ /* ** Forward prototypes. @@ -170,6 +241,188 @@ static int sqlite3_os_type = 0; } #endif /* SQLITE_OS_WINCE */ +#ifdef SQLITE_WIN32_MALLOC +/* +** Allocate nBytes of memory. +*/ +static void *winMemMalloc(int nBytes){ + HANDLE hHeap; + void *p; + + winMemAssertMagic(); + hHeap = winMemGetHeap(); + assert( hHeap!=0 ); + assert( hHeap!=INVALID_HANDLE_VALUE ); +#ifdef SQLITE_WIN32_MALLOC_VALIDATE + assert ( HeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, NULL) ); +#endif + assert( nBytes>=0 ); + p = HeapAlloc(hHeap, SQLITE_WIN32_HEAP_FLAGS, (SIZE_T)nBytes); + if( !p ){ + sqlite3_log(SQLITE_NOMEM, "failed to HeapAlloc %u bytes (%d), heap=%p", + nBytes, GetLastError(), (void*)hHeap); + } + return p; +} + +/* +** Free memory. +*/ +static void winMemFree(void *pPrior){ + HANDLE hHeap; + + winMemAssertMagic(); + hHeap = winMemGetHeap(); + assert( hHeap!=0 ); + assert( hHeap!=INVALID_HANDLE_VALUE ); +#ifdef SQLITE_WIN32_MALLOC_VALIDATE + assert ( HeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior) ); +#endif + if( !pPrior ) return; /* Passing NULL to HeapFree is undefined. */ + if( !HeapFree(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior) ){ + sqlite3_log(SQLITE_NOMEM, "failed to HeapFree block %p (%d), heap=%p", + pPrior, GetLastError(), (void*)hHeap); + } +} + +/* +** Change the size of an existing memory allocation +*/ +static void *winMemRealloc(void *pPrior, int nBytes){ + HANDLE hHeap; + void *p; + + winMemAssertMagic(); + hHeap = winMemGetHeap(); + assert( hHeap!=0 ); + assert( hHeap!=INVALID_HANDLE_VALUE ); +#ifdef SQLITE_WIN32_MALLOC_VALIDATE + assert ( HeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior) ); +#endif + assert( nBytes>=0 ); + if( !pPrior ){ + p = HeapAlloc(hHeap, SQLITE_WIN32_HEAP_FLAGS, (SIZE_T)nBytes); + }else{ + p = HeapReAlloc(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior, (SIZE_T)nBytes); + } + if( !p ){ + sqlite3_log(SQLITE_NOMEM, "failed to %s %u bytes (%d), heap=%p", + pPrior ? "HeapReAlloc" : "HeapAlloc", nBytes, GetLastError(), + (void*)hHeap); + } + return p; +} + +/* +** Return the size of an outstanding allocation, in bytes. +*/ +static int winMemSize(void *p){ + HANDLE hHeap; + SIZE_T n; + + winMemAssertMagic(); + hHeap = winMemGetHeap(); + assert( hHeap!=0 ); + assert( hHeap!=INVALID_HANDLE_VALUE ); +#ifdef SQLITE_WIN32_MALLOC_VALIDATE + assert ( HeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, NULL) ); +#endif + if( !p ) return 0; + n = HeapSize(hHeap, SQLITE_WIN32_HEAP_FLAGS, p); + if( n==(SIZE_T)-1 ){ + sqlite3_log(SQLITE_NOMEM, "failed to HeapSize block %p (%d), heap=%p", + p, GetLastError(), (void*)hHeap); + return 0; + } + return (int)n; +} + +/* +** Round up a request size to the next valid allocation size. +*/ +static int winMemRoundup(int n){ + return n; +} + +/* +** Initialize this module. +*/ +static int winMemInit(void *pAppData){ + winMemData *pWinMemData = (winMemData *)pAppData; + + if( !pWinMemData ) return SQLITE_ERROR; + assert( pWinMemData->magic==WINMEM_MAGIC ); + if( !pWinMemData->hHeap ){ + pWinMemData->hHeap = HeapCreate(SQLITE_WIN32_HEAP_FLAGS, + SQLITE_WIN32_HEAP_INIT_SIZE, + SQLITE_WIN32_HEAP_MAX_SIZE); + if( !pWinMemData->hHeap ){ + sqlite3_log(SQLITE_NOMEM, + "failed to HeapCreate (%d), flags=%u, initSize=%u, maxSize=%u", + GetLastError(), SQLITE_WIN32_HEAP_FLAGS, SQLITE_WIN32_HEAP_INIT_SIZE, + SQLITE_WIN32_HEAP_MAX_SIZE); + return SQLITE_NOMEM; + } + pWinMemData->bOwned = TRUE; + } + assert( pWinMemData->hHeap!=0 ); + assert( pWinMemData->hHeap!=INVALID_HANDLE_VALUE ); +#ifdef SQLITE_WIN32_MALLOC_VALIDATE + assert( HeapValidate(pWinMemData->hHeap, SQLITE_WIN32_HEAP_FLAGS, NULL) ); +#endif + return SQLITE_OK; +} + +/* +** Deinitialize this module. +*/ +static void winMemShutdown(void *pAppData){ + winMemData *pWinMemData = (winMemData *)pAppData; + + if( !pWinMemData ) return; + if( pWinMemData->hHeap ){ + assert( pWinMemData->hHeap!=INVALID_HANDLE_VALUE ); +#ifdef SQLITE_WIN32_MALLOC_VALIDATE + assert( HeapValidate(pWinMemData->hHeap, SQLITE_WIN32_HEAP_FLAGS, NULL) ); +#endif + if( pWinMemData->bOwned ){ + if( !HeapDestroy(pWinMemData->hHeap) ){ + sqlite3_log(SQLITE_NOMEM, "failed to HeapDestroy (%d), heap=%p", + GetLastError(), (void*)pWinMemData->hHeap); + } + pWinMemData->bOwned = FALSE; + } + pWinMemData->hHeap = NULL; + } +} + +/* +** Populate the low-level memory allocation function pointers in +** sqlite3GlobalConfig.m with pointers to the routines in this file. The +** arguments specify the block of memory to manage. +** +** This routine is only called by sqlite3_config(), and therefore +** is not required to be threadsafe (it is not). +*/ +const sqlite3_mem_methods *sqlite3MemGetWin32(void){ + static const sqlite3_mem_methods winMemMethods = { + winMemMalloc, + winMemFree, + winMemRealloc, + winMemSize, + winMemRoundup, + winMemInit, + winMemShutdown, + &win_mem_data + }; + return &winMemMethods; +} + +void sqlite3MemSetDefault(void){ + sqlite3_config(SQLITE_CONFIG_MALLOC, sqlite3MemGetWin32()); +} +#endif /* SQLITE_WIN32_MALLOC */ + /* ** Convert a UTF-8 string to microsoft unicode (UTF-16?). ** @@ -1335,24 +1588,41 @@ static int winUnlock(sqlite3_file *id, int locktype){ ** Control and query of the open file handle. */ static int winFileControl(sqlite3_file *id, int op, void *pArg){ + winFile *pFile = (winFile*)id; switch( op ){ case SQLITE_FCNTL_LOCKSTATE: { - *(int*)pArg = ((winFile*)id)->locktype; + *(int*)pArg = pFile->locktype; return SQLITE_OK; } case SQLITE_LAST_ERRNO: { - *(int*)pArg = (int)((winFile*)id)->lastErrno; + *(int*)pArg = (int)pFile->lastErrno; return SQLITE_OK; } case SQLITE_FCNTL_CHUNK_SIZE: { - ((winFile*)id)->szChunk = *(int *)pArg; + pFile->szChunk = *(int *)pArg; return SQLITE_OK; } case SQLITE_FCNTL_SIZE_HINT: { - sqlite3_int64 sz = *(sqlite3_int64*)pArg; - SimulateIOErrorBenign(1); - winTruncate(id, sz); - SimulateIOErrorBenign(0); + winFile *pFile = (winFile*)id; + sqlite3_int64 oldSz; + int rc = winFileSize(id, &oldSz); + if( rc==SQLITE_OK ){ + sqlite3_int64 newSz = *(sqlite3_int64*)pArg; + if( newSz>oldSz ){ + SimulateIOErrorBenign(1); + rc = winTruncate(id, newSz); + SimulateIOErrorBenign(0); + } + } + return rc; + } + case SQLITE_FCNTL_PERSIST_WAL: { + int bPersist = *(int*)pArg; + if( bPersist<0 ){ + *(int*)pArg = pFile->bPersistWal; + }else{ + pFile->bPersistWal = bPersist!=0; + } return SQLITE_OK; } case SQLITE_FCNTL_SYNC_OMITTED: { @@ -2180,6 +2450,7 @@ static int winOpen( winFile *pFile = (winFile*)id; void *zConverted; /* Filename in OS encoding */ const char *zUtf8Name = zName; /* Filename in UTF-8 encoding */ + int cnt = 0; /* If argument zPath is a NULL pointer, this function is required to open ** a temporary file. Use this buffer to store the file name in. @@ -2299,31 +2570,31 @@ static int winOpen( #endif if( isNT() ){ - h = CreateFileW((WCHAR*)zConverted, - dwDesiredAccess, - dwShareMode, - NULL, - dwCreationDisposition, - dwFlagsAndAttributes, - NULL - ); + while( (h = CreateFileW((WCHAR*)zConverted, + dwDesiredAccess, + dwShareMode, NULL, + dwCreationDisposition, + dwFlagsAndAttributes, + NULL))==INVALID_HANDLE_VALUE && + retryIoerr(&cnt) ){} /* isNT() is 1 if SQLITE_OS_WINCE==1, so this else is never executed. ** Since the ASCII version of these Windows API do not exist for WINCE, ** it's important to not reference them for WINCE builds. */ #if SQLITE_OS_WINCE==0 }else{ - h = CreateFileA((char*)zConverted, - dwDesiredAccess, - dwShareMode, - NULL, - dwCreationDisposition, - dwFlagsAndAttributes, - NULL - ); + while( (h = CreateFileA((char*)zConverted, + dwDesiredAccess, + dwShareMode, NULL, + dwCreationDisposition, + dwFlagsAndAttributes, + NULL))==INVALID_HANDLE_VALUE && + retryIoerr(&cnt) ){} #endif } + logIoerr(cnt); + OSTRACE(("OPEN %d %s 0x%lx %s\n", h, zName, dwDesiredAccess, h==INVALID_HANDLE_VALUE ? "failed" : "ok")); @@ -2455,9 +2726,9 @@ static int winAccess( int cnt = 0; WIN32_FILE_ATTRIBUTE_DATA sAttrData; memset(&sAttrData, 0, sizeof(sAttrData)); - while( (rc = GetFileAttributesExW((WCHAR*)zConverted, + while( !(rc = GetFileAttributesExW((WCHAR*)zConverted, GetFileExInfoStandard, - &sAttrData)) && rc==0 && retryIoerr(&cnt) ){} + &sAttrData)) && retryIoerr(&cnt) ){} if( rc ){ /* For an SQLITE_ACCESS_EXISTS query, treat a zero-length file ** as if it does not exist. @@ -2470,6 +2741,7 @@ static int winAccess( attr = sAttrData.dwFileAttributes; } }else{ + logIoerr(cnt); if( GetLastError()!=ERROR_FILE_NOT_FOUND ){ winLogError(SQLITE_IOERR_ACCESS, "winAccess", zFilename); free(zConverted); @@ -2494,7 +2766,8 @@ static int winAccess( rc = attr!=INVALID_FILE_ATTRIBUTES; break; case SQLITE_ACCESS_READWRITE: - rc = (attr & FILE_ATTRIBUTE_READONLY)==0; + rc = attr!=INVALID_FILE_ATTRIBUTES && + (attr & FILE_ATTRIBUTE_READONLY)==0; break; default: assert(!"Invalid flags argument"); diff --git a/src/pager.c b/src/pager.c index 7ff9a9a00..373d06aec 100644 --- a/src/pager.c +++ b/src/pager.c @@ -620,6 +620,8 @@ struct Pager { u8 tempFile; /* zFilename is a temporary file */ u8 readOnly; /* True for a read-only database */ u8 memDb; /* True to inhibit all file I/O */ + u8 hasSeenStress; /* pagerStress() called one or more times */ + u8 isSorter; /* True for a PAGER_SORTER */ /************************************************************************** ** The following block contains those class members that change during @@ -843,6 +845,15 @@ static int assert_pager_state(Pager *p){ assert( pagerUseWal(p)==0 ); } + /* A sorter is a temp file that never spills to disk and always has + ** the doNotSpill flag set + */ + if( p->isSorter ){ + assert( p->tempFile ); + assert( p->doNotSpill ); + assert( p->fd->pMethods==0 ); + } + /* If changeCountDone is set, a RESERVED lock or greater must be held ** on the file. */ @@ -3739,6 +3750,7 @@ static int pagerSyncHotJournal(Pager *pPager){ int sqlite3PagerClose(Pager *pPager){ u8 *pTmp = (u8 *)pPager->pTmpSpace; + assert( assert_pager_state(pPager) ); disable_simulated_io_errors(); sqlite3BeginBenignMalloc(); /* pPager->errCode = 0; */ @@ -4173,6 +4185,7 @@ static int pagerStress(void *p, PgHdr *pPg){ ** be called in the error state. Nevertheless, we include a NEVER() ** test for the error state as a safeguard against future changes. */ + pPager->hasSeenStress = 1; if( NEVER(pPager->errCode) ) return SQLITE_OK; if( pPager->doNotSpill ) return SQLITE_OK; if( pPager->doNotSyncSpill && (pPg->flags & PGHDR_NEED_SYNC)!=0 ){ @@ -4544,6 +4557,12 @@ int sqlite3PagerOpen( /* pPager->pBusyHandlerArg = 0; */ pPager->xReiniter = xReinit; /* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */ +#ifndef SQLITE_OMIT_MERGE_SORT + if( flags & PAGER_SORTER ){ + pPager->doNotSpill = 1; + pPager->isSorter = 1; + } +#endif *ppPager = pPager; return SQLITE_OK; @@ -6088,6 +6107,17 @@ int sqlite3PagerIsMemdb(Pager *pPager){ return MEMDB; } +#ifndef SQLITE_OMIT_MERGE_SORT +/* +** Return true if the pager has seen a pagerStress callback. +*/ +int sqlite3PagerUnderStress(Pager *pPager){ + assert( pPager->isSorter ); + assert( pPager->doNotSpill ); + return pPager->hasSeenStress; +} +#endif + /* ** Check that there are at least nSavepoint savepoints open. If there are ** currently less than nSavepoints open, then open one or more savepoints diff --git a/src/pager.h b/src/pager.h index eab7ddaf8..ccd7467d6 100644 --- a/src/pager.h +++ b/src/pager.h @@ -60,6 +60,7 @@ typedef struct PgHdr DbPage; #define PAGER_OMIT_JOURNAL 0x0001 /* Do not use a rollback journal */ #define PAGER_NO_READLOCK 0x0002 /* Omit readlocks on readonly files */ #define PAGER_MEMORY 0x0004 /* In-memory database */ +#define PAGER_SORTER 0x0020 /* Accumulator in external merge sort */ /* ** Valid values for the second argument to sqlite3PagerLockingMode(). @@ -155,6 +156,9 @@ const char *sqlite3PagerJournalname(Pager*); int sqlite3PagerNosync(Pager*); void *sqlite3PagerTempSpace(Pager*); int sqlite3PagerIsMemdb(Pager*); +#ifndef SQLITE_OMIT_MERGE_SORT +int sqlite3PagerUnderStress(Pager*); +#endif /* Functions used to truncate the database file. */ void sqlite3PagerTruncateImage(Pager*,Pgno); diff --git a/src/pcache1.c b/src/pcache1.c index e47265a22..de96e5242 100644 --- a/src/pcache1.c +++ b/src/pcache1.c @@ -24,6 +24,9 @@ typedef struct PgHdr1 PgHdr1; typedef struct PgFreeslot PgFreeslot; typedef struct PGroup PGroup; +typedef struct PGroupBlock PGroupBlock; +typedef struct PGroupBlockList PGroupBlockList; + /* Each page cache (or PCache) belongs to a PGroup. A PGroup is a set ** of one or more PCaches that are able to recycle each others unpinned ** pages when they are under memory pressure. A PGroup is an instance of @@ -53,8 +56,66 @@ struct PGroup { int mxPinned; /* nMaxpage + 10 - nMinPage */ int nCurrentPage; /* Number of purgeable pages allocated */ PgHdr1 *pLruHead, *pLruTail; /* LRU list of unpinned pages */ +#ifdef SQLITE_PAGECACHE_BLOCKALLOC + int isBusy; /* Do not run ReleaseMemory() if true */ + PGroupBlockList *pBlockList; /* List of block-lists for this group */ +#endif +}; + +/* +** If SQLITE_PAGECACHE_BLOCKALLOC is defined when the library is built, +** each PGroup structure has a linked list of the the following starting +** at PGroup.pBlockList. There is one entry for each distinct page-size +** currently used by members of the PGroup (i.e. 1024 bytes, 4096 bytes +** etc.). Variable PGroupBlockList.nByte is set to the actual allocation +** size requested by each pcache, which is the database page-size plus +** the various header structures used by the pcache, pager and btree layers. +** Usually around (pgsz+200) bytes. +** +** This size (pgsz+200) bytes is not allocated efficiently by some +** implementations of malloc. In particular, some implementations are only +** able to allocate blocks of memory chunks of 2^N bytes, where N is some +** integer value. Since the page-size is a power of 2, this means we +** end up wasting (pgsz-200) bytes in each allocation. +** +** If SQLITE_PAGECACHE_BLOCKALLOC is defined, the (pgsz+200) byte blocks +** are not allocated directly. Instead, blocks of roughly M*(pgsz+200) bytes +** are requested from malloc allocator. After a block is returned, +** sqlite3MallocSize() is used to determine how many (pgsz+200) byte +** allocations can fit in the space returned by malloc(). This value may +** be more than M. +** +** The blocks are stored in a doubly-linked list. Variable PGroupBlock.nEntry +** contains the number of allocations that will fit in the aData[] space. +** nEntry is limited to the number of bits in bitmask mUsed. If a slot +** within aData is in use, the corresponding bit in mUsed is set. Thus +** when (mUsed+1==(1 << nEntry)) the block is completely full. +** +** Each time a slot within a block is freed, the block is moved to the start +** of the linked-list. And if a block becomes completely full, then it is +** moved to the end of the list. As a result, when searching for a free +** slot, only the first block in the list need be examined. If it is full, +** then it is guaranteed that all blocks are full. +*/ +struct PGroupBlockList { + int nByte; /* Size of each allocation in bytes */ + PGroupBlock *pFirst; /* First PGroupBlock in list */ + PGroupBlock *pLast; /* Last PGroupBlock in list */ + PGroupBlockList *pNext; /* Next block-list attached to group */ +}; + +struct PGroupBlock { + Bitmask mUsed; /* Mask of used slots */ + int nEntry; /* Maximum number of allocations in aData[] */ + u8 *aData; /* Pointer to data block */ + PGroupBlock *pNext; /* Next PGroupBlock in list */ + PGroupBlock *pPrev; /* Previous PGroupBlock in list */ + PGroupBlockList *pList; /* Owner list */ }; +/* Minimum value for PGroupBlock.nEntry */ +#define PAGECACHE_BLOCKALLOC_MINENTRY 15 + /* Each page cache is an instance of the following object. Every ** open database file (including each in-memory database and each ** temporary or transient database) has a single page cache which @@ -159,6 +220,17 @@ static SQLITE_WSD struct PCacheGlobal { #define PAGE_TO_PGHDR1(c, p) (PgHdr1*)(((char*)p) + c->szPage) /* +** Blocks used by the SQLITE_PAGECACHE_BLOCKALLOC blocks to store/retrieve +** a PGroupBlock pointer based on a pointer to a page buffer. +*/ +#define PAGE_SET_BLOCKPTR(pCache, pPg, pBlock) \ + ( *(PGroupBlock **)&(((u8*)pPg)[sizeof(PgHdr1) + pCache->szPage]) = pBlock ) + +#define PAGE_GET_BLOCKPTR(pCache, pPg) \ + ( *(PGroupBlock **)&(((u8*)pPg)[sizeof(PgHdr1) + pCache->szPage]) ) + + +/* ** Macros to enter and leave the PCache LRU mutex. */ #define pcache1EnterMutex(X) sqlite3_mutex_enter((X)->mutex) @@ -283,13 +355,146 @@ static int pcache1MemSize(void *p){ } #endif /* SQLITE_ENABLE_MEMORY_MANAGEMENT */ +#ifdef SQLITE_PAGECACHE_BLOCKALLOC +/* +** The block pBlock belongs to list pList but is not currently linked in. +** Insert it into the start of the list. +*/ +static void addBlockToList(PGroupBlockList *pList, PGroupBlock *pBlock){ + pBlock->pPrev = 0; + pBlock->pNext = pList->pFirst; + pList->pFirst = pBlock; + if( pBlock->pNext ){ + pBlock->pNext->pPrev = pBlock; + }else{ + assert( pList->pLast==0 ); + pList->pLast = pBlock; + } +} + +/* +** If there are no blocks in the list headed by pList, remove pList +** from the pGroup->pBlockList list and free it with sqlite3_free(). +*/ +static void freeListIfEmpty(PGroup *pGroup, PGroupBlockList *pList){ + assert( sqlite3_mutex_held(pGroup->mutex) ); + if( pList->pFirst==0 ){ + PGroupBlockList **pp; + for(pp=&pGroup->pBlockList; *pp!=pList; pp=&(*pp)->pNext); + *pp = (*pp)->pNext; + sqlite3_free(pList); + } +} +#endif /* SQLITE_PAGECACHE_BLOCKALLOC */ + /* ** Allocate a new page object initially associated with cache pCache. */ static PgHdr1 *pcache1AllocPage(PCache1 *pCache){ int nByte = sizeof(PgHdr1) + pCache->szPage; - void *pPg = pcache1Alloc(nByte); + void *pPg = 0; PgHdr1 *p; + +#ifdef SQLITE_PAGECACHE_BLOCKALLOC + PGroup *pGroup = pCache->pGroup; + PGroupBlockList *pList; + PGroupBlock *pBlock; + int i; + + nByte += sizeof(PGroupBlockList *); + nByte = ROUND8(nByte); + + for(pList=pGroup->pBlockList; pList; pList=pList->pNext){ + if( pList->nByte==nByte ) break; + } + if( pList==0 ){ + PGroupBlockList *pNew; + assert( pGroup->isBusy==0 ); + assert( sqlite3_mutex_held(pGroup->mutex) ); + pGroup->isBusy = 1; /* Disable sqlite3PcacheReleaseMemory() */ + pNew = (PGroupBlockList *)sqlite3MallocZero(sizeof(PGroupBlockList)); + pGroup->isBusy = 0; /* Reenable sqlite3PcacheReleaseMemory() */ + if( pNew==0 ){ + /* malloc() failure. Return early. */ + return 0; + } +#ifdef SQLITE_DEBUG + for(pList=pGroup->pBlockList; pList; pList=pList->pNext){ + assert( pList->nByte!=nByte ); + } +#endif + pNew->nByte = nByte; + pNew->pNext = pGroup->pBlockList; + pGroup->pBlockList = pNew; + pList = pNew; + } + + pBlock = pList->pFirst; + if( pBlock==0 || pBlock->mUsed==(((Bitmask)1<<pBlock->nEntry)-1) ){ + int sz; + + /* Allocate a new block. Try to allocate enough space for the PGroupBlock + ** structure and MINENTRY allocations of nByte bytes each. If the + ** allocator returns more memory than requested, then more than MINENTRY + ** allocations may fit in it. */ + assert( sqlite3_mutex_held(pGroup->mutex) ); + pcache1LeaveMutex(pCache->pGroup); + sz = sizeof(PGroupBlock) + PAGECACHE_BLOCKALLOC_MINENTRY * nByte; + pBlock = (PGroupBlock *)sqlite3Malloc(sz); + pcache1EnterMutex(pCache->pGroup); + + if( !pBlock ){ + freeListIfEmpty(pGroup, pList); + return 0; + } + pBlock->nEntry = (sqlite3MallocSize(pBlock) - sizeof(PGroupBlock)) / nByte; + if( pBlock->nEntry>=BMS ){ + pBlock->nEntry = BMS-1; + } + pBlock->pList = pList; + pBlock->mUsed = 0; + pBlock->aData = (u8 *)&pBlock[1]; + addBlockToList(pList, pBlock); + + sz = sqlite3MallocSize(pBlock); + sqlite3_mutex_enter(pcache1.mutex); + sqlite3StatusAdd(SQLITE_STATUS_PAGECACHE_OVERFLOW, sz); + sqlite3_mutex_leave(pcache1.mutex); + } + + for(i=0; pPg==0 && ALWAYS(i<pBlock->nEntry); i++){ + if( 0==(pBlock->mUsed & ((Bitmask)1<<i)) ){ + pBlock->mUsed |= ((Bitmask)1<<i); + pPg = (void *)&pBlock->aData[pList->nByte * i]; + } + } + assert( pPg ); + PAGE_SET_BLOCKPTR(pCache, pPg, pBlock); + + /* If the block is now full, shift it to the end of the list */ + if( pBlock->mUsed==(((Bitmask)1<<pBlock->nEntry)-1) && pList->pLast!=pBlock ){ + assert( pList->pFirst==pBlock ); + assert( pBlock->pPrev==0 ); + assert( pList->pLast->pNext==0 ); + pList->pFirst = pBlock->pNext; + pList->pFirst->pPrev = 0; + pBlock->pPrev = pList->pLast; + pBlock->pNext = 0; + pList->pLast->pNext = pBlock; + pList->pLast = pBlock; + } + p = PAGE_TO_PGHDR1(pCache, pPg); + if( pCache->bPurgeable ){ + pCache->pGroup->nCurrentPage++; + } +#else + /* The group mutex must be released before pcache1Alloc() is called. This + ** is because it may call sqlite3_release_memory(), which assumes that + ** this mutex is not held. */ + assert( sqlite3_mutex_held(pCache->pGroup->mutex) ); + pcache1LeaveMutex(pCache->pGroup); + pPg = pcache1Alloc(nByte); + pcache1EnterMutex(pCache->pGroup); if( pPg ){ p = PAGE_TO_PGHDR1(pCache, pPg); if( pCache->bPurgeable ){ @@ -298,6 +503,7 @@ static PgHdr1 *pcache1AllocPage(PCache1 *pCache){ }else{ p = 0; } +#endif return p; } @@ -311,10 +517,52 @@ static PgHdr1 *pcache1AllocPage(PCache1 *pCache){ static void pcache1FreePage(PgHdr1 *p){ if( ALWAYS(p) ){ PCache1 *pCache = p->pCache; + void *pPg = PGHDR1_TO_PAGE(p); + +#ifdef SQLITE_PAGECACHE_BLOCKALLOC + PGroupBlock *pBlock = PAGE_GET_BLOCKPTR(pCache, pPg); + PGroupBlockList *pList = pBlock->pList; + int i = ((u8 *)pPg - pBlock->aData) / pList->nByte; + + assert( pPg==(void *)&pBlock->aData[i*pList->nByte] ); + assert( pBlock->mUsed & ((Bitmask)1<<i) ); + pBlock->mUsed &= ~((Bitmask)1<<i); + + /* Remove the block from the list. If it is completely empty, free it. + ** Or if it is not completely empty, re-insert it at the start of the + ** list. */ + if( pList->pFirst==pBlock ){ + pList->pFirst = pBlock->pNext; + if( pList->pFirst ) pList->pFirst->pPrev = 0; + }else{ + pBlock->pPrev->pNext = pBlock->pNext; + } + if( pList->pLast==pBlock ){ + pList->pLast = pBlock->pPrev; + if( pList->pLast ) pList->pLast->pNext = 0; + }else{ + pBlock->pNext->pPrev = pBlock->pPrev; + } + + if( pBlock->mUsed==0 ){ + PGroup *pGroup = p->pCache->pGroup; + + int sz = sqlite3MallocSize(pBlock); + sqlite3_mutex_enter(pcache1.mutex); + sqlite3StatusAdd(SQLITE_STATUS_PAGECACHE_OVERFLOW, -sz); + sqlite3_mutex_leave(pcache1.mutex); + freeListIfEmpty(pGroup, pList); + sqlite3_free(pBlock); + }else{ + addBlockToList(pList, pBlock); + } +#else + assert( sqlite3_mutex_held(p->pCache->pGroup->mutex) ); + pcache1Free(pPg); +#endif if( pCache->bPurgeable ){ pCache->pGroup->nCurrentPage--; } - pcache1Free(PGHDR1_TO_PAGE(p)); } } @@ -752,9 +1000,7 @@ static void *pcache1Fetch(sqlite3_pcache *p, unsigned int iKey, int createFlag){ */ if( !pPage ){ if( createFlag==1 ) sqlite3BeginBenignMalloc(); - pcache1LeaveMutex(pGroup); pPage = pcache1AllocPage(pCache); - pcache1EnterMutex(pGroup); if( createFlag==1 ) sqlite3EndBenignMalloc(); } @@ -924,6 +1170,9 @@ void sqlite3PCacheSetDefault(void){ */ int sqlite3PcacheReleaseMemory(int nReq){ int nFree = 0; +#ifdef SQLITE_PAGECACHE_BLOCKALLOC + if( pcache1.grp.isBusy ) return 0; +#endif assert( sqlite3_mutex_notheld(pcache1.grp.mutex) ); assert( sqlite3_mutex_notheld(pcache1.mutex) ); if( pcache1.pStart==0 ){ diff --git a/src/sqlite.h.in b/src/sqlite.h.in index b8e8bfbc4..ba5c20265 100644 --- a/src/sqlite.h.in +++ b/src/sqlite.h.in @@ -752,6 +752,20 @@ struct sqlite3_io_methods { ** is not changed but instead the prior value of that setting is written ** into the array entry, allowing the current retry settings to be ** interrogated. The zDbName parameter is ignored. +** +** ^The [SQLITE_FCNTL_PERSIST_WAL] opcode is used to set or query the +** persistent [WAL | Write AHead Log] setting. By default, the auxiliary +** write ahead log and shared memory files used for transaction control +** are automatically deleted when the latest connection to the database +** closes. Setting persistent WAL mode causes those files to persist after +** close. Persisting the files is useful when other processes that do not +** have write permission on the directory containing the database file want +** to read the database file, as the WAL and shared memory files must exist +** in order for the database to be readable. The fourth parameter to +** [sqlite3_file_control()] for this opcode should be a pointer to an integer. +** That integer is 0 to disable persistent WAL mode or 1 to enable persistent +** WAL mode. If the integer is -1, then it is overwritten with the current +** WAL persistence setting. ** */ #define SQLITE_FCNTL_LOCKSTATE 1 @@ -763,6 +777,7 @@ struct sqlite3_io_methods { #define SQLITE_FCNTL_FILE_POINTER 7 #define SQLITE_FCNTL_SYNC_OMITTED 8 #define SQLITE_FCNTL_WIN32_AV_RETRY 9 +#define SQLITE_FCNTL_PERSIST_WAL 10 /* ** CAPI3REF: Mutex Handle @@ -1190,16 +1205,10 @@ int sqlite3_db_config(sqlite3*, int op, ...); ** order to verify that SQLite recovers gracefully from such ** conditions. ** -** The xMalloc and xFree methods must work like the -** malloc() and free() functions from the standard C library. -** The xRealloc method must work like realloc() from the standard C library -** with the exception that if the second argument to xRealloc is zero, -** xRealloc must be a no-op - it must not perform any allocation or -** deallocation. ^SQLite guarantees that the second argument to +** The xMalloc, xRealloc, and xFree methods must work like the +** malloc(), realloc() and free() functions from the standard C library. +** ^SQLite guarantees that the second argument to ** xRealloc is always a value returned by a prior call to xRoundup. -** And so in cases where xRoundup always returns a positive number, -** xRealloc can perform exactly as the standard library realloc() and -** still be in compliance with this specification. ** ** xSize should return the allocated size of a memory allocation ** previously obtained from xMalloc or xRealloc. The allocated size diff --git a/src/sqliteInt.h b/src/sqliteInt.h index bcf6a591a..5934c7431 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -147,19 +147,25 @@ ** specify which memory allocation subsystem to use. ** ** SQLITE_SYSTEM_MALLOC // Use normal system malloc() +** SQLITE_WIN32_MALLOC // Use Win32 native heap API ** SQLITE_MEMDEBUG // Debugging version of system malloc() ** +** On Windows, if the SQLITE_WIN32_MALLOC_VALIDATE macro is defined and the +** assert() macro is enabled, each call into the Win32 native heap subsystem +** will cause HeapValidate to be called. If heap validation should fail, an +** assertion will be triggered. +** ** (Historical note: There used to be several other options, but we've ** pared it down to just these two.) ** ** If none of the above are defined, then set SQLITE_SYSTEM_MALLOC as ** the default. */ -#if defined(SQLITE_SYSTEM_MALLOC)+defined(SQLITE_MEMDEBUG)>1 +#if defined(SQLITE_SYSTEM_MALLOC)+defined(SQLITE_WIN32_MALLOC)+defined(SQLITE_MEMDEBUG)>1 # error "At most one of the following compile-time configuration options\ - is allows: SQLITE_SYSTEM_MALLOC, SQLITE_MEMDEBUG" + is allows: SQLITE_SYSTEM_MALLOC, SQLITE_WIN32_MALLOC, SQLITE_MEMDEBUG" #endif -#if defined(SQLITE_SYSTEM_MALLOC)+defined(SQLITE_MEMDEBUG)==0 +#if defined(SQLITE_SYSTEM_MALLOC)+defined(SQLITE_WIN32_MALLOC)+defined(SQLITE_MEMDEBUG)==0 # define SQLITE_SYSTEM_MALLOC 1 #endif @@ -367,6 +373,14 @@ #endif /* +** If all temporary storage is in-memory, then omit the external merge-sort +** logic since it is superfluous. +*/ +#if SQLITE_TEMP_STORE==3 && !defined(SQLITE_OMIT_MERGE_SORT) +# define SQLITE_OMIT_MERGE_SORT +#endif + +/* ** GCC does not define the offsetof() macro so we'll have to do it ** ourselves. */ diff --git a/src/tclsqlite.c b/src/tclsqlite.c index 339b8967d..d2a0582e4 100644 --- a/src/tclsqlite.c +++ b/src/tclsqlite.c @@ -2242,6 +2242,8 @@ static int DbObjCmd(void *cd, Tcl_Interp *interp, int objc,Tcl_Obj *const*objv){ if( choice==DB_ONECOLUMN ){ if( rc==TCL_OK ){ Tcl_SetObjResult(interp, dbEvalColumnValue(&sEval, 0)); + }else if( rc==TCL_BREAK ){ + Tcl_ResetResult(interp); } }else if( rc==TCL_BREAK || rc==TCL_OK ){ Tcl_SetObjResult(interp, Tcl_NewBooleanObj(rc==TCL_OK)); diff --git a/src/test1.c b/src/test1.c index 3301ab740..59b570c28 100644 --- a/src/test1.c +++ b/src/test1.c @@ -4991,9 +4991,8 @@ static int file_control_chunksize_test( /* ** tclcmd: file_control_sizehint_test DB DBNAME SIZE ** -** This TCL command runs the sqlite3_file_control interface and -** verifies correct operation of the SQLITE_GET_LOCKPROXYFILE and -** SQLITE_SET_LOCKPROXYFILE verbs. +** This TCL command runs the sqlite3_file_control interface +** with SQLITE_FCNTL_SIZE_HINT */ static int file_control_sizehint_test( ClientData clientData, /* Pointer to sqlite3_enable_XXX function */ @@ -5129,6 +5128,38 @@ static int file_control_win32_av_retry( return TCL_OK; } +/* +** tclcmd: file_control_persist_wal DB PERSIST-FLAG +** +** This TCL command runs the sqlite3_file_control interface with +** the SQLITE_FCNTL_PERSIST_WAL opcode. +*/ +static int file_control_persist_wal( + ClientData clientData, /* Pointer to sqlite3_enable_XXX function */ + Tcl_Interp *interp, /* The TCL interpreter that invoked this command */ + int objc, /* Number of arguments */ + Tcl_Obj *CONST objv[] /* Command arguments */ +){ + sqlite3 *db; + int rc; + int bPersist; + char z[100]; + + if( objc!=3 ){ + Tcl_AppendResult(interp, "wrong # args: should be \"", + Tcl_GetStringFromObj(objv[0], 0), " DB FLAG", 0); + return TCL_ERROR; + } + if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ){ + return TCL_ERROR; + } + if( Tcl_GetIntFromObj(interp, objv[2], &bPersist) ) return TCL_ERROR; + rc = sqlite3_file_control(db, NULL, SQLITE_FCNTL_PERSIST_WAL, (void*)&bPersist); + sqlite3_snprintf(sizeof(z), z, "%d %d", rc, bPersist); + Tcl_AppendResult(interp, z, (char*)0); + return TCL_OK; +} + /* ** tclcmd: sqlite3_vfs_list @@ -5613,6 +5644,7 @@ static int test_test_control( ** background thread. */ struct win32FileLocker { + char *evName; /* Name of event to signal thread startup */ HANDLE h; /* Handle of the file to be locked */ int delay1; /* Delay before locking */ int delay2; /* Delay before unlocking */ @@ -5628,6 +5660,13 @@ struct win32FileLocker { */ static void win32_file_locker(void *pAppData){ struct win32FileLocker *p = (struct win32FileLocker*)pAppData; + if( p->evName ){ + HANDLE ev = OpenEvent(EVENT_MODIFY_STATE, FALSE, p->evName); + if ( ev ){ + SetEvent(ev); + CloseHandle(ev); + } + } if( p->delay1 ) Sleep(p->delay1); if( LockFile(p->h, 0, 0, 100000000, 0) ){ Sleep(p->delay2); @@ -5656,16 +5695,18 @@ static int win32_file_lock( int objc, Tcl_Obj *CONST objv[] ){ - static struct win32FileLocker x = { 0, 0, 0 }; + static struct win32FileLocker x = { "win32_file_lock", 0, 0, 0, 0, 0 }; const char *zFilename; + char zBuf[200]; int retry = 0; + HANDLE ev; + DWORD wResult; if( objc!=4 && objc!=1 ){ Tcl_WrongNumArgs(interp, 1, objv, "FILENAME DELAY1 DELAY2"); return TCL_ERROR; } if( objc==1 ){ - char zBuf[200]; sqlite3_snprintf(sizeof(zBuf), zBuf, "%d %d %d %d %d", x.ok, x.err, x.delay1, x.delay2, x.h); Tcl_AppendResult(interp, zBuf, (char*)0); @@ -5689,8 +5730,20 @@ static int win32_file_lock( Tcl_AppendResult(interp, "cannot open file: ", zFilename, (char*)0); return TCL_ERROR; } + ev = CreateEvent(NULL, TRUE, FALSE, x.evName); + if ( !ev ){ + Tcl_AppendResult(interp, "cannot create event: ", x.evName, (char*)0); + return TCL_ERROR; + } _beginthread(win32_file_locker, 0, (void*)&x); Sleep(0); + if ( (wResult = WaitForSingleObject(ev, 10000))!=WAIT_OBJECT_0 ){ + sqlite3_snprintf(sizeof(zBuf), zBuf, "0x%x", wResult); + Tcl_AppendResult(interp, "wait failed: ", zBuf, (char*)0); + CloseHandle(ev); + return TCL_ERROR; + } + CloseHandle(ev); return TCL_OK; } #endif @@ -5928,6 +5981,7 @@ int Sqlitetest1_Init(Tcl_Interp *interp){ { "file_control_chunksize_test", file_control_chunksize_test, 0 }, { "file_control_sizehint_test", file_control_sizehint_test, 0 }, { "file_control_win32_av_retry", file_control_win32_av_retry, 0 }, + { "file_control_persist_wal", file_control_persist_wal, 0 }, { "sqlite3_vfs_list", vfs_list, 0 }, { "sqlite3_create_function_v2", test_create_function_v2, 0 }, diff --git a/src/test6.c b/src/test6.c index c9c8a4d20..23fb14c5b 100644 --- a/src/test6.c +++ b/src/test6.c @@ -505,6 +505,16 @@ static int cfCheckReservedLock(sqlite3_file *pFile, int *pResOut){ return sqlite3OsCheckReservedLock(((CrashFile *)pFile)->pRealFile, pResOut); } static int cfFileControl(sqlite3_file *pFile, int op, void *pArg){ + if( op==SQLITE_FCNTL_SIZE_HINT ){ + CrashFile *pCrash = (CrashFile *)pFile; + i64 nByte = *(i64 *)pArg; + if( nByte>pCrash->iSize ){ + if( SQLITE_OK==writeListAppend(pFile, nByte, 0, 0) ){ + pCrash->iSize = nByte; + } + } + return SQLITE_OK; + } return sqlite3OsFileControl(((CrashFile *)pFile)->pRealFile, op, pArg); } diff --git a/src/test_config.c b/src/test_config.c index e8d6f88f6..5af60bafd 100644 --- a/src/test_config.c +++ b/src/test_config.c @@ -363,6 +363,12 @@ Tcl_SetVar2(interp, "sqlite_options", "long_double", Tcl_SetVar2(interp, "sqlite_options", "memorymanage", "0", TCL_GLOBAL_ONLY); #endif +#ifdef SQLITE_OMIT_MERGE_SORT + Tcl_SetVar2(interp, "sqlite_options", "mergesort", "0", TCL_GLOBAL_ONLY); +#else + Tcl_SetVar2(interp, "sqlite_options", "mergesort", "1", TCL_GLOBAL_ONLY); +#endif + #ifdef SQLITE_OMIT_OR_OPTIMIZATION Tcl_SetVar2(interp, "sqlite_options", "or_opt", "0", TCL_GLOBAL_ONLY); #else @@ -549,6 +555,12 @@ Tcl_SetVar2(interp, "sqlite_options", "long_double", Tcl_SetVar2(interp, "sqlite_options", "yytrackmaxstackdepth", "0", TCL_GLOBAL_ONLY); #endif +#ifdef SQLITE_PAGECACHE_BLOCKALLOC + Tcl_SetVar2(interp, "sqlite_options", "blockalloc", "1", TCL_GLOBAL_ONLY); +#else + Tcl_SetVar2(interp, "sqlite_options", "blockalloc", "0", TCL_GLOBAL_ONLY); +#endif + #define LINKVAR(x) { \ static const int cv_ ## x = SQLITE_ ## x; \ Tcl_LinkVar(interp, "SQLITE_" #x, (char *)&(cv_ ## x), \ diff --git a/src/test_malloc.c b/src/test_malloc.c index 5023dca44..46ec94d32 100644 --- a/src/test_malloc.c +++ b/src/test_malloc.c @@ -1222,7 +1222,7 @@ static int test_dump_memsys3( return TCL_ERROR; } - switch( (int)clientData ){ + switch( SQLITE_PTR_TO_INT(clientData) ){ case 3: { #ifdef SQLITE_ENABLE_MEMSYS3 extern void sqlite3Memsys3Dump(const char*); @@ -1460,7 +1460,7 @@ int Sqlitetest_malloc_Init(Tcl_Interp *interp){ }; int i; for(i=0; i<sizeof(aObjCmd)/sizeof(aObjCmd[0]); i++){ - ClientData c = (ClientData)aObjCmd[i].clientData; + ClientData c = (ClientData)SQLITE_INT_TO_PTR(aObjCmd[i].clientData); Tcl_CreateObjCommand(interp, aObjCmd[i].zName, aObjCmd[i].xProc, c, 0); } return TCL_OK; diff --git a/src/test_multiplex.c b/src/test_multiplex.c index d31684794..5d29607ac 100644 --- a/src/test_multiplex.c +++ b/src/test_multiplex.c @@ -39,8 +39,13 @@ ** URI. ** ** The multiplex VFS allows databases up to 32 GiB in size. But it splits -** the files up into 1 GiB pieces, so that they will work even on filesystems -** that do not support large files. +** the files up into smaller pieces, so that they will work even on +** filesystems that do not support large files. The default chunk size +** is 2147418112 bytes (which is 64KiB less than 2GiB) but this can be +** changed at compile-time by defining the SQLITE_MULTIPLEX_CHUNK_SIZE +** macro. Use the "chunksize=NNNN" query parameter with a URI filename +** in order to select an alternative chunk size for individual connections +** at run-time. */ #include "sqlite3.h" #include <string.h> diff --git a/src/test_quota.c b/src/test_quota.c index 9b0e4a9dd..74d1a6d3b 100644 --- a/src/test_quota.c +++ b/src/test_quota.c @@ -95,6 +95,7 @@ struct quotaFile { quotaGroup *pGroup; /* Quota group to which this file belongs */ sqlite3_int64 iSize; /* Current size of this file */ int nRef; /* Number of times this file is open */ + int deleteOnClose; /* True to delete this file when it closes */ quotaFile *pNext, **ppPrev; /* Linked list of files in the same group */ }; @@ -164,12 +165,45 @@ static struct { static void quotaEnter(void){ sqlite3_mutex_enter(gQuota.pMutex); } static void quotaLeave(void){ sqlite3_mutex_leave(gQuota.pMutex); } +/* Count the number of open files in a quotaGroup +*/ +static int quotaGroupOpenFileCount(quotaGroup *pGroup){ + int N = 0; + quotaFile *pFile = pGroup->pFiles; + while( pFile ){ + if( pFile->nRef ) N++; + pFile = pFile->pNext; + } + return N; +} + +/* Remove a file from a quota group. +*/ +static void quotaRemoveFile(quotaFile *pFile){ + quotaGroup *pGroup = pFile->pGroup; + pGroup->iSize -= pFile->iSize; + *pFile->ppPrev = pFile->pNext; + if( pFile->pNext ) pFile->pNext->ppPrev = pFile->ppPrev; + sqlite3_free(pFile); +} + +/* Remove all files from a quota group. It is always the case that +** all files will be closed when this routine is called. +*/ +static void quotaRemoveAllFiles(quotaGroup *pGroup){ + while( pGroup->pFiles ){ + assert( pGroup->pFiles->nRef==0 ); + quotaRemoveFile(pGroup->pFiles); + } +} + /* If the reference count and threshold for a quotaGroup are both ** zero, then destroy the quotaGroup. */ static void quotaGroupDeref(quotaGroup *pGroup){ - if( pGroup->pFiles==0 && pGroup->iLimit==0 ){ + if( pGroup->iLimit==0 && quotaGroupOpenFileCount(pGroup)==0 ){ + quotaRemoveAllFiles(pGroup); *pGroup->ppPrev = pGroup->pNext; if( pGroup->pNext ) pGroup->pNext->ppPrev = pGroup->ppPrev; if( pGroup->xDestroy ) pGroup->xDestroy(pGroup->pArg); @@ -276,6 +310,17 @@ static sqlite3_file *quotaSubOpen(sqlite3_file *pConn){ return (sqlite3_file*)&p[1]; } +/* Find a file in a quota group and return a pointer to that file. +** Return NULL if the file is not in the group. +*/ +static quotaFile *quotaFindFile(quotaGroup *pGroup, const char *zName){ + quotaFile *pFile = pGroup->pFiles; + while( pFile && strcmp(pFile->zFilename, zName)!=0 ){ + pFile = pFile->pNext; + } + return pFile; +} + /************************* VFS Method Wrappers *****************************/ /* ** This is the xOpen method used for the "quota" VFS. @@ -319,8 +364,7 @@ static int quotaOpen( pSubOpen = quotaSubOpen(pConn); rc = pOrigVfs->xOpen(pOrigVfs, zName, pSubOpen, flags, pOutFlags); if( rc==SQLITE_OK ){ - for(pFile=pGroup->pFiles; pFile && strcmp(pFile->zFilename, zName); - pFile=pFile->pNext){} + pFile = quotaFindFile(pGroup, zName); if( pFile==0 ){ int nName = strlen(zName); pFile = (quotaFile *)sqlite3_malloc( sizeof(*pFile) + nName + 1 ); @@ -337,6 +381,7 @@ static int quotaOpen( pFile->ppPrev = &pGroup->pFiles; pGroup->pFiles = pFile; pFile->pGroup = pGroup; + pFile->deleteOnClose = (flags & SQLITE_OPEN_DELETEONCLOSE)!=0; } pFile->nRef++; pQuotaOpen->pFile = pFile; @@ -351,6 +396,49 @@ static int quotaOpen( return rc; } +/* +** This is the xDelete method used for the "quota" VFS. +** +** If the file being deleted is part of the quota group, then reduce +** the size of the quota group accordingly. And remove the file from +** the set of files in the quota group. +*/ +static int quotaDelete( + sqlite3_vfs *pVfs, /* The quota VFS */ + const char *zName, /* Name of file to be deleted */ + int syncDir /* Do a directory sync after deleting */ +){ + int rc; /* Result code */ + quotaFile *pFile; /* Files in the quota */ + quotaGroup *pGroup; /* The group file belongs to */ + sqlite3_vfs *pOrigVfs = gQuota.pOrigVfs; /* Real VFS */ + + /* Do the actual file delete */ + rc = pOrigVfs->xDelete(pOrigVfs, zName, syncDir); + + /* If the file just deleted is a member of a quota group, then remove + ** it from that quota group. + */ + if( rc==SQLITE_OK ){ + quotaEnter(); + pGroup = quotaGroupFind(zName); + if( pGroup ){ + pFile = quotaFindFile(pGroup, zName); + if( pFile ){ + if( pFile->nRef ){ + pFile->deleteOnClose = 1; + }else{ + quotaRemoveFile(pFile); + quotaGroupDeref(pGroup); + } + } + } + quotaLeave(); + } + return rc; +} + + /************************ I/O Method Wrappers *******************************/ /* xClose requests get passed through to the original VFS. But we @@ -367,11 +455,8 @@ static int quotaClose(sqlite3_file *pConn){ pFile->nRef--; if( pFile->nRef==0 ){ quotaGroup *pGroup = pFile->pGroup; - pGroup->iSize -= pFile->iSize; - if( pFile->pNext ) pFile->pNext->ppPrev = pFile->ppPrev; - *pFile->ppPrev = pFile->pNext; + if( pFile->deleteOnClose ) quotaRemoveFile(pFile); quotaGroupDeref(pGroup); - sqlite3_free(pFile); } quotaLeave(); return rc; @@ -586,6 +671,7 @@ int sqlite3_quota_initialize(const char *zOrigVfsName, int makeDefault){ gQuota.pOrigVfs = pOrigVfs; gQuota.sThisVfs = *pOrigVfs; gQuota.sThisVfs.xOpen = quotaOpen; + gQuota.sThisVfs.xDelete = quotaDelete; gQuota.sThisVfs.szOsFile += sizeof(quotaConn); gQuota.sThisVfs.zName = "quota"; gQuota.sIoMethodsV1.iVersion = 1; @@ -617,19 +703,20 @@ int sqlite3_quota_initialize(const char *zOrigVfsName, int makeDefault){ ** All SQLite database connections must be closed before calling this ** routine. ** -** THIS ROUTINE IS NOT THREADSAFE. Call this routine exactly one while +** THIS ROUTINE IS NOT THREADSAFE. Call this routine exactly once while ** shutting down in order to free all remaining quota groups. */ int sqlite3_quota_shutdown(void){ quotaGroup *pGroup; if( gQuota.isInitialized==0 ) return SQLITE_MISUSE; for(pGroup=gQuota.pGroup; pGroup; pGroup=pGroup->pNext){ - if( pGroup->pFiles ) return SQLITE_MISUSE; + if( quotaGroupOpenFileCount(pGroup)>0 ) return SQLITE_MISUSE; } while( gQuota.pGroup ){ pGroup = gQuota.pGroup; gQuota.pGroup = pGroup->pNext; pGroup->iLimit = 0; + assert( quotaGroupOpenFileCount(pGroup)==0 ); quotaGroupDeref(pGroup); } gQuota.isInitialized = 0; @@ -708,6 +795,43 @@ int sqlite3_quota_set( return SQLITE_OK; } +/* +** Bring the named file under quota management. Or if it is already under +** management, update its size. +*/ +int sqlite3_quota_file(const char *zFilename){ + char *zFull; + sqlite3_file *fd; + int rc; + int outFlags = 0; + sqlite3_int64 iSize; + fd = sqlite3_malloc(gQuota.sThisVfs.szOsFile + gQuota.sThisVfs.mxPathname+1); + if( fd==0 ) return SQLITE_NOMEM; + zFull = gQuota.sThisVfs.szOsFile + (char*)fd; + rc = gQuota.pOrigVfs->xFullPathname(gQuota.pOrigVfs, zFilename, + gQuota.sThisVfs.mxPathname+1, zFull); + if( rc==SQLITE_OK ){ + rc = quotaOpen(&gQuota.sThisVfs, zFull, fd, + SQLITE_OPEN_READONLY | SQLITE_OPEN_MAIN_DB, &outFlags); + } + if( rc==SQLITE_OK ){ + fd->pMethods->xFileSize(fd, &iSize); + fd->pMethods->xClose(fd); + }else if( rc==SQLITE_CANTOPEN ){ + quotaGroup *pGroup; + quotaFile *pFile; + quotaEnter(); + pGroup = quotaGroupFind(zFull); + if( pGroup ){ + pFile = quotaFindFile(pGroup, zFull); + if( pFile ) quotaRemoveFile(pFile); + } + quotaLeave(); + } + sqlite3_free(fd); + return rc; +} + /***************************** Test Code ***********************************/ #ifdef SQLITE_TEST @@ -885,6 +1009,32 @@ static int test_quota_set( } /* +** tclcmd: sqlite3_quota_file FILENAME +*/ +static int test_quota_file( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + const char *zFilename; /* File pattern to configure */ + int rc; /* Value returned by quota_file() */ + + /* Process arguments */ + if( objc!=2 ){ + Tcl_WrongNumArgs(interp, 1, objv, "FILENAME"); + return TCL_ERROR; + } + zFilename = Tcl_GetString(objv[1]); + + /* Invoke sqlite3_quota_file() */ + rc = sqlite3_quota_file(zFilename); + + Tcl_SetResult(interp, (char *)sqlite3TestErrorName(rc), TCL_STATIC); + return TCL_OK; +} + +/* ** tclcmd: sqlite3_quota_dump */ static int test_quota_dump( @@ -917,6 +1067,8 @@ static int test_quota_dump( Tcl_NewWideIntObj(pFile->iSize)); Tcl_ListObjAppendElement(interp, pFileTerm, Tcl_NewWideIntObj(pFile->nRef)); + Tcl_ListObjAppendElement(interp, pFileTerm, + Tcl_NewWideIntObj(pFile->deleteOnClose)); Tcl_ListObjAppendElement(interp, pGroupTerm, pFileTerm); } Tcl_ListObjAppendElement(interp, pResult, pGroupTerm); @@ -939,6 +1091,7 @@ int Sqlitequota_Init(Tcl_Interp *interp){ { "sqlite3_quota_initialize", test_quota_initialize }, { "sqlite3_quota_shutdown", test_quota_shutdown }, { "sqlite3_quota_set", test_quota_set }, + { "sqlite3_quota_file", test_quota_file }, { "sqlite3_quota_dump", test_quota_dump }, }; int i; diff --git a/src/test_rtree.c b/src/test_rtree.c index 5fc994ddf..9745b0054 100644 --- a/src/test_rtree.c +++ b/src/test_rtree.c @@ -18,6 +18,7 @@ /* Solely for the UNUSED_PARAMETER() macro. */ #include "sqliteInt.h" +#ifdef SQLITE_ENABLE_RTREE /* ** Type used to cache parameter information for the "circle" r-tree geometry ** callback. @@ -230,6 +231,7 @@ static int cube_geom( return SQLITE_OK; } +#endif /* SQLITE_ENABLE_RTREE */ static int register_cube_geom( void * clientData, diff --git a/src/test_syscall.c b/src/test_syscall.c index b8b05c590..d484f22db 100644 --- a/src/test_syscall.c +++ b/src/test_syscall.c @@ -325,6 +325,7 @@ static int ts_pread64(int fd, void *aBuf, size_t nBuf, off_t off){ */ static int ts_write(int fd, const void *aBuf, size_t nBuf){ if( tsIsFailErrno("write") ){ + if( tsErrno("write")==EINTR ) orig_write(fd, aBuf, nBuf/2); return -1; } return orig_write(fd, aBuf, nBuf); @@ -671,4 +672,3 @@ int SqlitetestSyscall_Init(Tcl_Interp *interp){ return TCL_OK; } #endif - diff --git a/src/test_thread.c b/src/test_thread.c index 3a13dd668..08df14c2c 100644 --- a/src/test_thread.c +++ b/src/test_thread.c @@ -305,7 +305,7 @@ static int sqlthread_id( Tcl_Obj *CONST objv[] ){ Tcl_ThreadId id = Tcl_GetCurrentThread(); - Tcl_SetObjResult(interp, Tcl_NewIntObj((int)id)); + Tcl_SetObjResult(interp, Tcl_NewIntObj(SQLITE_PTR_TO_INT(id))); UNUSED_PARAMETER(clientData); UNUSED_PARAMETER(objc); UNUSED_PARAMETER(objv); diff --git a/src/test_vfs.c b/src/test_vfs.c index a8b53526d..546cb7cf4 100644 --- a/src/test_vfs.c +++ b/src/test_vfs.c @@ -123,6 +123,8 @@ struct Testvfs { #define TESTVFS_TRUNCATE_MASK 0x00002000 #define TESTVFS_ACCESS_MASK 0x00004000 #define TESTVFS_FULLPATHNAME_MASK 0x00008000 +#define TESTVFS_READ_MASK 0x00010000 + #define TESTVFS_ALL_MASK 0x0001FFFF @@ -325,8 +327,22 @@ static int tvfsRead( int iAmt, sqlite_int64 iOfst ){ - TestvfsFd *p = tvfsGetFd(pFile); - return sqlite3OsRead(p->pReal, zBuf, iAmt, iOfst); + int rc = SQLITE_OK; + TestvfsFd *pFd = tvfsGetFd(pFile); + Testvfs *p = (Testvfs *)pFd->pVfs->pAppData; + if( p->pScript && p->mask&TESTVFS_READ_MASK ){ + tvfsExecTcl(p, "xRead", + Tcl_NewStringObj(pFd->zFilename, -1), pFd->pShmId, 0 + ); + tvfsResultCode(p, &rc); + } + if( rc==SQLITE_OK && p->mask&TESTVFS_READ_MASK && tvfsInjectIoerr(p) ){ + rc = SQLITE_IOERR; + } + if( rc==SQLITE_OK ){ + rc = sqlite3OsRead(pFd->pReal, zBuf, iAmt, iOfst); + } + return rc; } /* @@ -1030,6 +1046,7 @@ static int testvfs_obj_cmd( { "xSync", TESTVFS_SYNC_MASK }, { "xDelete", TESTVFS_DELETE_MASK }, { "xWrite", TESTVFS_WRITE_MASK }, + { "xRead", TESTVFS_READ_MASK }, { "xTruncate", TESTVFS_TRUNCATE_MASK }, { "xOpen", TESTVFS_OPEN_MASK }, { "xClose", TESTVFS_CLOSE_MASK }, diff --git a/src/vdbe.c b/src/vdbe.c index bec422a98..e7c7ed7fe 100644 --- a/src/vdbe.c +++ b/src/vdbe.c @@ -157,6 +157,13 @@ int sqlite3_found_count = 0; */ #define ExpandBlob(P) (((P)->flags&MEM_Zero)?sqlite3VdbeMemExpandBlob(P):0) +/* Return true if the cursor was opened using the OP_OpenSorter opcode. */ +#ifdef SQLITE_OMIT_MERGE_SORT +# define isSorter(x) 0 +#else +# define isSorter(x) ((x)->pSorter!=0) +#endif + /* ** Argument pMem points at a register that will be passed to a ** user-defined function or returned to the user as the result of a query. @@ -666,7 +673,7 @@ int sqlite3VdbeExec( assert( pOp->p2<=p->nMem ); pOut = &aMem[pOp->p2]; memAboutToChange(p, pOut); - sqlite3VdbeMemReleaseExternal(pOut); + MemReleaseExt(pOut); pOut->flags = MEM_Int; } @@ -1027,6 +1034,11 @@ case OP_Move: { zMalloc = pOut->zMalloc; pOut->zMalloc = 0; sqlite3VdbeMemMove(pOut, pIn1); +#ifdef SQLITE_DEBUG + if( pOut->pScopyFrom>=&aMem[p1] && pOut->pScopyFrom<&aMem[p1+pOp->p3] ){ + pOut->pScopyFrom += p1 - pOp->p2; + } +#endif pIn1->zMalloc = zMalloc; REGISTER_TRACE(p2++, pOut); pIn1++; @@ -2106,6 +2118,7 @@ case OP_Column: { u32 szField; /* Number of bytes in the content of a field */ int szHdr; /* Size of the header size field at start of record */ int avail; /* Number of bytes of available data */ + u32 t; /* A type code from the record header */ Mem *pReg; /* PseudoTable input register */ @@ -2117,7 +2130,6 @@ case OP_Column: { assert( pOp->p3>0 && pOp->p3<=p->nMem ); pDest = &aMem[pOp->p3]; memAboutToChange(p, pDest); - MemSetTypeFlag(pDest, MEM_Null); zRec = 0; /* This block sets the variable payloadSize to be the total number of @@ -2161,7 +2173,7 @@ case OP_Column: { rc = sqlite3BtreeDataSize(pCrsr, &payloadSize); assert( rc==SQLITE_OK ); /* DataSize() cannot fail */ } - }else if( pC->pseudoTableReg>0 ){ + }else if( ALWAYS(pC->pseudoTableReg>0) ){ pReg = &aMem[pC->pseudoTableReg]; assert( pReg->flags & MEM_Blob ); assert( memIsValid(pReg) ); @@ -2174,9 +2186,10 @@ case OP_Column: { payloadSize = 0; } - /* If payloadSize is 0, then just store a NULL */ + /* If payloadSize is 0, then just store a NULL. This can happen because of + ** nullRow or because of a corrupt database. */ if( payloadSize==0 ){ - assert( pDest->flags&MEM_Null ); + MemSetTypeFlag(pDest, MEM_Null); goto op_column_out; } assert( db->aLimit[SQLITE_LIMIT_LENGTH]>=0 ); @@ -2283,8 +2296,14 @@ case OP_Column: { for(i=0; i<nField; i++){ if( zIdx<zEndHdr ){ aOffset[i] = offset; - zIdx += getVarint32(zIdx, aType[i]); - szField = sqlite3VdbeSerialTypeLen(aType[i]); + if( zIdx[0]<0x80 ){ + t = zIdx[0]; + zIdx++; + }else{ + zIdx += sqlite3GetVarint32(zIdx, &t); + } + aType[i] = t; + szField = sqlite3VdbeSerialTypeLen(t); offset += szField; if( offset<szField ){ /* True if offset overflows */ zIdx = &zEndHdr[1]; /* Forces SQLITE_CORRUPT return below */ @@ -2325,7 +2344,7 @@ case OP_Column: { if( aOffset[p2] ){ assert( rc==SQLITE_OK ); if( zRec ){ - sqlite3VdbeMemReleaseExternal(pDest); + MemReleaseExt(pDest); sqlite3VdbeSerialGet((u8 *)&zRec[aOffset[p2]], aType[p2], pDest); }else{ len = sqlite3VdbeSerialTypeLen(aType[p2]); @@ -2342,7 +2361,7 @@ case OP_Column: { if( pOp->p4type==P4_MEM ){ sqlite3VdbeMemShallowCopy(pDest, pOp->p4.pMem, MEM_Static); }else{ - assert( pDest->flags&MEM_Null ); + MemSetTypeFlag(pDest, MEM_Null); } } @@ -2538,7 +2557,7 @@ case OP_Count: { /* out2-prerelease */ BtCursor *pCrsr; pCrsr = p->apCsr[pOp->p1]->pCursor; - if( pCrsr ){ + if( ALWAYS(pCrsr) ){ rc = sqlite3BtreeCount(pCrsr, &nEntry); }else{ nEntry = 0; @@ -3100,15 +3119,9 @@ case OP_OpenWrite: { rc = sqlite3BtreeCursor(pX, p2, wrFlag, pKeyInfo, pCur->pCursor); pCur->pKeyInfo = pKeyInfo; - /* Since it performs no memory allocation or IO, the only values that - ** sqlite3BtreeCursor() may return are SQLITE_EMPTY and SQLITE_OK. - ** SQLITE_EMPTY is only returned when attempting to open the table - ** rooted at page 1 of a zero-byte database. */ - assert( rc==SQLITE_EMPTY || rc==SQLITE_OK ); - if( rc==SQLITE_EMPTY ){ - pCur->pCursor = 0; - rc = SQLITE_OK; - } + /* Since it performs no memory allocation or IO, the only value that + ** sqlite3BtreeCursor() may return is SQLITE_OK. */ + assert( rc==SQLITE_OK ); /* Set the VdbeCursor.isTable and isIndex variables. Previous versions of ** SQLite used to check if the root-page flags were sane at this point @@ -3119,7 +3132,7 @@ case OP_OpenWrite: { break; } -/* Opcode: OpenEphemeral P1 P2 * P4 * +/* Opcode: OpenEphemeral P1 P2 * P4 P5 ** ** Open a new cursor P1 to a transient table. ** The cursor is always opened read/write even if @@ -3136,6 +3149,11 @@ case OP_OpenWrite: { ** to a TEMP table at the SQL level, or to a table opened by ** this opcode. Then this opcode was call OpenVirtual. But ** that created confusion with the whole virtual-table idea. +** +** The P5 parameter can be a mask of the BTREE_* flags defined +** in btree.h. These flags control aspects of the operation of +** the btree. The BTREE_OMIT_JOURNAL and BTREE_SINGLE flags are +** added automatically. */ /* Opcode: OpenAutoindex P1 P2 * P4 * ** @@ -3144,6 +3162,13 @@ case OP_OpenWrite: { ** by this opcode will be used for automatically created transient ** indices in joins. */ +/* Opcode: OpenSorter P1 P2 * P4 * +** +** This opcode works like OP_OpenEphemeral except that it opens +** a transient index that is specifically designed to sort large +** tables using an external merge-sort algorithm. +*/ +case OP_OpenSorter: case OP_OpenAutoindex: case OP_OpenEphemeral: { VdbeCursor *pCx; @@ -3155,6 +3180,7 @@ case OP_OpenEphemeral: { SQLITE_OPEN_TRANSIENT_DB; assert( pOp->p1>=0 ); + assert( (pOp->opcode==OP_OpenSorter)==((pOp->p5 & BTREE_SORTER)!=0) ); pCx = allocateCursor(p, pOp->p1, pOp->p2, -1, 1); if( pCx==0 ) goto no_mem; pCx->nullRow = 1; @@ -3188,6 +3214,11 @@ case OP_OpenEphemeral: { } pCx->isOrdered = (pOp->p5!=BTREE_UNORDERED); pCx->isIndex = !pCx->isTable; +#ifndef SQLITE_OMIT_MERGE_SORT + if( rc==SQLITE_OK && pOp->opcode==OP_OpenSorter ){ + rc = sqlite3VdbeSorterInit(db, pCx); + } +#endif break; } @@ -3303,7 +3334,7 @@ case OP_SeekGt: { /* jump, in3 */ assert( OP_SeekGe == OP_SeekLt+2 ); assert( OP_SeekGt == OP_SeekLt+3 ); assert( pC->isOrdered ); - if( pC->pCursor!=0 ){ + if( ALWAYS(pC->pCursor!=0) ){ oc = pOp->opcode; pC->nullRow = 0; if( pC->isTable ){ @@ -3661,7 +3692,7 @@ case OP_NotExists: { /* jump, in3 */ assert( pC->isTable ); assert( pC->pseudoTableReg==0 ); pCrsr = pC->pCursor; - if( pCrsr!=0 ){ + if( ALWAYS(pCrsr!=0) ){ res = 0; iKey = pIn3->u.i; rc = sqlite3BtreeMovetoUnpacked(pCrsr, 0, iKey, 0, &res); @@ -4077,6 +4108,13 @@ case OP_RowData: { assert( pC!=0 ); assert( pC->nullRow==0 ); assert( pC->pseudoTableReg==0 ); + + if( isSorter(pC) ){ + assert( pOp->opcode==OP_RowKey ); + rc = sqlite3VdbeSorterRowkey(pC, pOut); + break; + } + assert( pC->pCursor!=0 ); pCrsr = pC->pCursor; assert( sqlite3BtreeCursorIsValid(pCrsr) ); @@ -4181,6 +4219,7 @@ case OP_NullRow: { assert( pC!=0 ); pC->nullRow = 1; pC->rowidIsValid = 0; + assert( pC->pCursor || pC->pVtabCursor ); if( pC->pCursor ){ sqlite3BtreeClearCursor(pC->pCursor); } @@ -4204,7 +4243,7 @@ case OP_Last: { /* jump */ pC = p->apCsr[pOp->p1]; assert( pC!=0 ); pCrsr = pC->pCursor; - if( pCrsr==0 ){ + if( NEVER(pCrsr==0) ){ res = 1; }else{ rc = sqlite3BtreeLast(pCrsr, &res); @@ -4257,7 +4296,11 @@ case OP_Rewind: { /* jump */ pC = p->apCsr[pOp->p1]; assert( pC!=0 ); res = 1; - if( (pCrsr = pC->pCursor)!=0 ){ + if( isSorter(pC) ){ + rc = sqlite3VdbeSorterRewind(db, pC, &res); + }else{ + pCrsr = pC->pCursor; + assert( pCrsr ); rc = sqlite3BtreeFirst(pCrsr, &res); pC->atFirst = res==0 ?1:0; pC->deferredMoveto = 0; @@ -4272,7 +4315,7 @@ case OP_Rewind: { /* jump */ break; } -/* Opcode: Next P1 P2 * * P5 +/* Opcode: Next P1 P2 * P4 P5 ** ** Advance cursor P1 so that it points to the next key/data pair in its ** table or index. If there are no more key/value pairs then fall through @@ -4281,6 +4324,9 @@ case OP_Rewind: { /* jump */ ** ** The P1 cursor must be for a real table, not a pseudo-table. ** +** P4 is always of type P4_ADVANCE. The function pointer points to +** sqlite3BtreeNext(). +** ** If P5 is positive and the jump is taken, then event counter ** number P5-1 in the prepared statement is incremented. ** @@ -4295,13 +4341,15 @@ case OP_Rewind: { /* jump */ ** ** The P1 cursor must be for a real table, not a pseudo-table. ** +** P4 is always of type P4_ADVANCE. The function pointer points to +** sqlite3BtreePrevious(). +** ** If P5 is positive and the jump is taken, then event counter ** number P5-1 in the prepared statement is incremented. */ case OP_Prev: /* jump */ case OP_Next: { /* jump */ VdbeCursor *pC; - BtCursor *pCrsr; int res; CHECK_FOR_INTERRUPT; @@ -4311,15 +4359,17 @@ case OP_Next: { /* jump */ if( pC==0 ){ break; /* See ticket #2273 */ } - pCrsr = pC->pCursor; - if( pCrsr==0 ){ - pC->nullRow = 1; - break; + if( isSorter(pC) ){ + assert( pOp->opcode==OP_Next ); + rc = sqlite3VdbeSorterNext(db, pC, &res); + }else{ + res = 1; + assert( pC->deferredMoveto==0 ); + assert( pC->pCursor ); + assert( pOp->opcode!=OP_Next || pOp->p4.xAdvance==sqlite3BtreeNext ); + assert( pOp->opcode!=OP_Prev || pOp->p4.xAdvance==sqlite3BtreePrevious ); + rc = pOp->p4.xAdvance(pC->pCursor, &res); } - res = 1; - assert( pC->deferredMoveto==0 ); - rc = pOp->opcode==OP_Next ? sqlite3BtreeNext(pCrsr, &res) : - sqlite3BtreePrevious(pCrsr, &res); pC->nullRow = (u8)res; pC->cacheStatus = CACHE_STALE; if( res==0 ){ @@ -4363,10 +4413,13 @@ case OP_IdxInsert: { /* in2 */ if( rc==SQLITE_OK ){ nKey = pIn2->n; zKey = pIn2->z; - rc = sqlite3BtreeInsert(pCrsr, zKey, nKey, "", 0, 0, pOp->p3, - ((pOp->p5 & OPFLAG_USESEEKRESULT) ? pC->seekResult : 0) - ); - assert( pC->deferredMoveto==0 ); + rc = sqlite3VdbeSorterWrite(db, pC, nKey); + if( rc==SQLITE_OK ){ + rc = sqlite3BtreeInsert(pCrsr, zKey, nKey, "", 0, 0, pOp->p3, + ((pOp->p5 & OPFLAG_USESEEKRESULT) ? pC->seekResult : 0) + ); + assert( pC->deferredMoveto==0 ); + } pC->cacheStatus = CACHE_STALE; } } diff --git a/src/vdbe.h b/src/vdbe.h index e66ee3024..972854851 100644 --- a/src/vdbe.h +++ b/src/vdbe.h @@ -61,6 +61,7 @@ struct VdbeOp { KeyInfo *pKeyInfo; /* Used when p4type is P4_KEYINFO */ int *ai; /* Used when p4type is P4_INTARRAY */ SubProgram *pProgram; /* Used when p4type is P4_SUBPROGRAM */ + int (*xAdvance)(BtCursor *, int *); } p4; #ifdef SQLITE_DEBUG char *zComment; /* Comment to improve readability */ @@ -116,6 +117,7 @@ typedef struct VdbeOpList VdbeOpList; #define P4_INT32 (-14) /* P4 is a 32-bit signed integer */ #define P4_INTARRAY (-15) /* P4 is a vector of 32-bit integers */ #define P4_SUBPROGRAM (-18) /* P4 is a pointer to a SubProgram structure */ +#define P4_ADVANCE (-19) /* P4 is a pointer to BtreeNext() or BtreePrev() */ /* When adding a P4 argument using P4_KEYINFO, a copy of the KeyInfo structure ** is made. That copy is freed when the Vdbe is finalized. But if the @@ -173,9 +175,9 @@ int sqlite3VdbeAddOp4(Vdbe*,int,int,int,int,const char *zP4,int); int sqlite3VdbeAddOp4Int(Vdbe*,int,int,int,int,int); int sqlite3VdbeAddOpList(Vdbe*, int nOp, VdbeOpList const *aOp); void sqlite3VdbeAddParseSchemaOp(Vdbe*,int,char*); -void sqlite3VdbeChangeP1(Vdbe*, int addr, int P1); -void sqlite3VdbeChangeP2(Vdbe*, int addr, int P2); -void sqlite3VdbeChangeP3(Vdbe*, int addr, int P3); +void sqlite3VdbeChangeP1(Vdbe*, u32 addr, int P1); +void sqlite3VdbeChangeP2(Vdbe*, u32 addr, int P2); +void sqlite3VdbeChangeP3(Vdbe*, u32 addr, int P3); void sqlite3VdbeChangeP5(Vdbe*, u8 P5); void sqlite3VdbeJumpHere(Vdbe*, int addr); void sqlite3VdbeChangeToNoop(Vdbe*, int addr, int N); diff --git a/src/vdbeInt.h b/src/vdbeInt.h index 0aeb3af7a..846d80707 100644 --- a/src/vdbeInt.h +++ b/src/vdbeInt.h @@ -30,6 +30,9 @@ typedef struct VdbeOp Op; */ typedef unsigned char Bool; +/* Opaque type used by code in vdbesort.c */ +typedef struct VdbeSorter VdbeSorter; + /* ** A cursor is a pointer into a single BTree within a database file. ** The cursor can seek to a BTree entry with a particular key, or @@ -61,6 +64,7 @@ struct VdbeCursor { i64 seqCount; /* Sequence counter */ i64 movetoTarget; /* Argument to the deferred sqlite3BtreeMoveto() */ i64 lastRowid; /* Last rowid from a Next or NextIdx operation */ + VdbeSorter *pSorter; /* Sorter object for OP_OpenSorter cursors */ /* Result of last sqlite3BtreeMoveto() done by an OP_NotExists or ** OP_IsUnique opcode on this cursor. */ @@ -380,6 +384,9 @@ int sqlite3VdbeMemNumerify(Mem*); int sqlite3VdbeMemFromBtree(BtCursor*,int,int,int,Mem*); void sqlite3VdbeMemRelease(Mem *p); void sqlite3VdbeMemReleaseExternal(Mem *p); +#define MemReleaseExt(X) \ + if((X)->flags&(MEM_Agg|MEM_Dyn|MEM_RowSet|MEM_Frame)) \ + sqlite3VdbeMemReleaseExternal(X); int sqlite3VdbeMemFinalize(Mem*, FuncDef*); const char *sqlite3OpcodeName(int); int sqlite3VdbeMemGrow(Mem *pMem, int n, int preserve); @@ -388,6 +395,22 @@ void sqlite3VdbeFrameDelete(VdbeFrame*); int sqlite3VdbeFrameRestore(VdbeFrame *); void sqlite3VdbeMemStoreType(Mem *pMem); +#ifdef SQLITE_OMIT_MERGE_SORT +# define sqlite3VdbeSorterInit(Y,Z) SQLITE_OK +# define sqlite3VdbeSorterWrite(X,Y,Z) SQLITE_OK +# define sqlite3VdbeSorterClose(Y,Z) +# define sqlite3VdbeSorterRowkey(Y,Z) SQLITE_OK +# define sqlite3VdbeSorterRewind(X,Y,Z) SQLITE_OK +# define sqlite3VdbeSorterNext(X,Y,Z) SQLITE_OK +#else +int sqlite3VdbeSorterInit(sqlite3 *, VdbeCursor *); +int sqlite3VdbeSorterWrite(sqlite3 *, VdbeCursor *, int); +void sqlite3VdbeSorterClose(sqlite3 *, VdbeCursor *); +int sqlite3VdbeSorterRowkey(VdbeCursor *, Mem *); +int sqlite3VdbeSorterRewind(sqlite3 *, VdbeCursor *, int *); +int sqlite3VdbeSorterNext(sqlite3 *, VdbeCursor *, int *); +#endif + #if !defined(SQLITE_OMIT_SHARED_CACHE) && SQLITE_THREADSAFE>0 void sqlite3VdbeEnter(Vdbe*); void sqlite3VdbeLeave(Vdbe*); diff --git a/src/vdbeaux.c b/src/vdbeaux.c index 989a8003d..053d89f3b 100644 --- a/src/vdbeaux.c +++ b/src/vdbeaux.c @@ -433,6 +433,12 @@ static void resolveP2Values(Vdbe *p, int *pMaxFuncArgs){ n = pOp[-1].p1; if( n>nMaxArgs ) nMaxArgs = n; #endif + }else if( opcode==OP_Next ){ + pOp->p4.xAdvance = sqlite3BtreeNext; + pOp->p4type = P4_ADVANCE; + }else if( opcode==OP_Prev ){ + pOp->p4.xAdvance = sqlite3BtreePrevious; + pOp->p4type = P4_ADVANCE; } if( (pOp->opflags & OPFLG_JUMP)!=0 && pOp->p2<0 ){ @@ -524,10 +530,9 @@ int sqlite3VdbeAddOpList(Vdbe *p, int nOp, VdbeOpList const *aOp){ ** static array using sqlite3VdbeAddOpList but we want to make a ** few minor changes to the program. */ -void sqlite3VdbeChangeP1(Vdbe *p, int addr, int val){ +void sqlite3VdbeChangeP1(Vdbe *p, u32 addr, int val){ assert( p!=0 ); - assert( addr>=0 ); - if( p->nOp>addr ){ + if( ((u32)p->nOp)>addr ){ p->aOp[addr].p1 = val; } } @@ -536,10 +541,9 @@ void sqlite3VdbeChangeP1(Vdbe *p, int addr, int val){ ** Change the value of the P2 operand for a specific instruction. ** This routine is useful for setting a jump destination. */ -void sqlite3VdbeChangeP2(Vdbe *p, int addr, int val){ +void sqlite3VdbeChangeP2(Vdbe *p, u32 addr, int val){ assert( p!=0 ); - assert( addr>=0 ); - if( p->nOp>addr ){ + if( ((u32)p->nOp)>addr ){ p->aOp[addr].p2 = val; } } @@ -547,10 +551,9 @@ void sqlite3VdbeChangeP2(Vdbe *p, int addr, int val){ /* ** Change the value of the P3 operand for a specific instruction. */ -void sqlite3VdbeChangeP3(Vdbe *p, int addr, int val){ +void sqlite3VdbeChangeP3(Vdbe *p, u32 addr, int val){ assert( p!=0 ); - assert( addr>=0 ); - if( p->nOp>addr ){ + if( ((u32)p->nOp)>addr ){ p->aOp[addr].p3 = val; } } @@ -942,6 +945,10 @@ static char *displayP4(Op *pOp, char *zTemp, int nTemp){ sqlite3_snprintf(nTemp, zTemp, "program"); break; } + case P4_ADVANCE: { + zTemp[0] = 0; + break; + } default: { zP4 = pOp->p4.z; if( zP4==0 ){ @@ -1565,6 +1572,7 @@ void sqlite3VdbeFreeCursor(Vdbe *p, VdbeCursor *pCx){ if( pCx==0 ){ return; } + sqlite3VdbeSorterClose(p->db, pCx); if( pCx->pBt ){ sqlite3BtreeClose(pCx->pBt); /* The pCx->pCursor will be close automatically, if it exists, by diff --git a/src/vdbemem.c b/src/vdbemem.c index 882c68633..d51257282 100644 --- a/src/vdbemem.c +++ b/src/vdbemem.c @@ -271,24 +271,18 @@ int sqlite3VdbeMemFinalize(Mem *pMem, FuncDef *pFunc){ */ void sqlite3VdbeMemReleaseExternal(Mem *p){ assert( p->db==0 || sqlite3_mutex_held(p->db->mutex) ); - testcase( p->flags & MEM_Agg ); - testcase( p->flags & MEM_Dyn ); - testcase( p->flags & MEM_RowSet ); - testcase( p->flags & MEM_Frame ); - if( p->flags&(MEM_Agg|MEM_Dyn|MEM_RowSet|MEM_Frame) ){ - if( p->flags&MEM_Agg ){ - sqlite3VdbeMemFinalize(p, p->u.pDef); - assert( (p->flags & MEM_Agg)==0 ); - sqlite3VdbeMemRelease(p); - }else if( p->flags&MEM_Dyn && p->xDel ){ - assert( (p->flags&MEM_RowSet)==0 ); - p->xDel((void *)p->z); - p->xDel = 0; - }else if( p->flags&MEM_RowSet ){ - sqlite3RowSetClear(p->u.pRowSet); - }else if( p->flags&MEM_Frame ){ - sqlite3VdbeMemSetNull(p); - } + if( p->flags&MEM_Agg ){ + sqlite3VdbeMemFinalize(p, p->u.pDef); + assert( (p->flags & MEM_Agg)==0 ); + sqlite3VdbeMemRelease(p); + }else if( p->flags&MEM_Dyn && p->xDel ){ + assert( (p->flags&MEM_RowSet)==0 ); + p->xDel((void *)p->z); + p->xDel = 0; + }else if( p->flags&MEM_RowSet ){ + sqlite3RowSetClear(p->u.pRowSet); + }else if( p->flags&MEM_Frame ){ + sqlite3VdbeMemSetNull(p); } } @@ -298,7 +292,7 @@ void sqlite3VdbeMemReleaseExternal(Mem *p){ ** (Mem.type==SQLITE_TEXT). */ void sqlite3VdbeMemRelease(Mem *p){ - sqlite3VdbeMemReleaseExternal(p); + MemReleaseExt(p); sqlite3DbFree(p->db, p->zMalloc); p->z = 0; p->zMalloc = 0; @@ -620,7 +614,7 @@ void sqlite3VdbeMemPrepareToChange(Vdbe *pVdbe, Mem *pMem){ */ void sqlite3VdbeMemShallowCopy(Mem *pTo, const Mem *pFrom, int srcType){ assert( (pFrom->flags & MEM_RowSet)==0 ); - sqlite3VdbeMemReleaseExternal(pTo); + MemReleaseExt(pTo); memcpy(pTo, pFrom, MEMCELLSIZE); pTo->xDel = 0; if( (pFrom->flags&MEM_Static)==0 ){ @@ -638,7 +632,7 @@ int sqlite3VdbeMemCopy(Mem *pTo, const Mem *pFrom){ int rc = SQLITE_OK; assert( (pFrom->flags & MEM_RowSet)==0 ); - sqlite3VdbeMemReleaseExternal(pTo); + MemReleaseExt(pTo); memcpy(pTo, pFrom, MEMCELLSIZE); pTo->flags &= ~MEM_Dyn; diff --git a/src/vdbesort.c b/src/vdbesort.c new file mode 100644 index 000000000..be99d397d --- /dev/null +++ b/src/vdbesort.c @@ -0,0 +1,711 @@ +/* +** 2011 July 9 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains code for the VdbeSorter object, used in concert with +** a VdbeCursor to sort large numbers of keys (as may be required, for +** example, by CREATE INDEX statements on tables too large to fit in main +** memory). +*/ + +#include "sqliteInt.h" +#include "vdbeInt.h" + +#ifndef SQLITE_OMIT_MERGE_SORT + +typedef struct VdbeSorterIter VdbeSorterIter; + +/* +** NOTES ON DATA STRUCTURE USED FOR N-WAY MERGES: +** +** As keys are added to the sorter, they are written to disk in a series +** of sorted packed-memory-arrays (PMAs). The size of each PMA is roughly +** the same as the cache-size allowed for temporary databases. In order +** to allow the caller to extract keys from the sorter in sorted order, +** all PMAs currently stored on disk must be merged together. This comment +** describes the data structure used to do so. The structure supports +** merging any number of arrays in a single pass with no redundant comparison +** operations. +** +** The aIter[] array contains an iterator for each of the PMAs being merged. +** An aIter[] iterator either points to a valid key or else is at EOF. For +** the purposes of the paragraphs below, we assume that the array is actually +** N elements in size, where N is the smallest power of 2 greater to or equal +** to the number of iterators being merged. The extra aIter[] elements are +** treated as if they are empty (always at EOF). +** +** The aTree[] array is also N elements in size. The value of N is stored in +** the VdbeSorter.nTree variable. +** +** The final (N/2) elements of aTree[] contain the results of comparing +** pairs of iterator keys together. Element i contains the result of +** comparing aIter[2*i-N] and aIter[2*i-N+1]. Whichever key is smaller, the +** aTree element is set to the index of it. +** +** For the purposes of this comparison, EOF is considered greater than any +** other key value. If the keys are equal (only possible with two EOF +** values), it doesn't matter which index is stored. +** +** The (N/4) elements of aTree[] that preceed the final (N/2) described +** above contains the index of the smallest of each block of 4 iterators. +** And so on. So that aTree[1] contains the index of the iterator that +** currently points to the smallest key value. aTree[0] is unused. +** +** Example: +** +** aIter[0] -> Banana +** aIter[1] -> Feijoa +** aIter[2] -> Elderberry +** aIter[3] -> Currant +** aIter[4] -> Grapefruit +** aIter[5] -> Apple +** aIter[6] -> Durian +** aIter[7] -> EOF +** +** aTree[] = { X, 5 0, 5 0, 3, 5, 6 } +** +** The current element is "Apple" (the value of the key indicated by +** iterator 5). When the Next() operation is invoked, iterator 5 will +** be advanced to the next key in its segment. Say the next key is +** "Eggplant": +** +** aIter[5] -> Eggplant +** +** The contents of aTree[] are updated first by comparing the new iterator +** 5 key to the current key of iterator 4 (still "Grapefruit"). The iterator +** 5 value is still smaller, so aTree[6] is set to 5. And so on up the tree. +** The value of iterator 6 - "Durian" - is now smaller than that of iterator +** 5, so aTree[3] is set to 6. Key 0 is smaller than key 6 (Banana<Durian), +** so the value written into element 1 of the array is 0. As follows: +** +** aTree[] = { X, 0 0, 6 0, 3, 5, 6 } +** +** In other words, each time we advance to the next sorter element, log2(N) +** key comparison operations are required, where N is the number of segments +** being merged (rounded up to the next power of 2). +*/ +struct VdbeSorter { + int nWorking; /* Start a new b-tree after this many pages */ + int nBtree; /* Current size of b-tree contents as PMA */ + int nTree; /* Used size of aTree/aIter (power of 2) */ + VdbeSorterIter *aIter; /* Array of iterators to merge */ + int *aTree; /* Current state of incremental merge */ + i64 iWriteOff; /* Current write offset within file pTemp1 */ + i64 iReadOff; /* Current read offset within file pTemp1 */ + sqlite3_file *pTemp1; /* PMA file 1 */ + int nPMA; /* Number of PMAs stored in pTemp1 */ +}; + +/* +** The following type is an iterator for a PMA. It caches the current key in +** variables nKey/aKey. If the iterator is at EOF, pFile==0. +*/ +struct VdbeSorterIter { + i64 iReadOff; /* Current read offset */ + i64 iEof; /* 1 byte past EOF for this iterator */ + sqlite3_file *pFile; /* File iterator is reading from */ + int nAlloc; /* Bytes of space at aAlloc */ + u8 *aAlloc; /* Allocated space */ + int nKey; /* Number of bytes in key */ + u8 *aKey; /* Pointer to current key */ +}; + +/* Minimum allowable value for the VdbeSorter.nWorking variable */ +#define SORTER_MIN_WORKING 10 + +/* Maximum number of segments to merge in a single pass. */ +#define SORTER_MAX_MERGE_COUNT 16 + +/* +** Free all memory belonging to the VdbeSorterIter object passed as the second +** argument. All structure fields are set to zero before returning. +*/ +static void vdbeSorterIterZero(sqlite3 *db, VdbeSorterIter *pIter){ + sqlite3DbFree(db, pIter->aAlloc); + memset(pIter, 0, sizeof(VdbeSorterIter)); +} + +/* +** Advance iterator pIter to the next key in its PMA. Return SQLITE_OK if +** no error occurs, or an SQLite error code if one does. +*/ +static int vdbeSorterIterNext( + sqlite3 *db, /* Database handle (for sqlite3DbMalloc() ) */ + VdbeSorterIter *pIter /* Iterator to advance */ +){ + int rc; /* Return Code */ + int nRead; /* Number of bytes read */ + int nRec; /* Size of record in bytes */ + int iOff; /* Size of serialized size varint in bytes */ + + nRead = pIter->iEof - pIter->iReadOff; + if( nRead>5 ) nRead = 5; + if( nRead<=0 ){ + /* This is an EOF condition */ + vdbeSorterIterZero(db, pIter); + return SQLITE_OK; + } + + rc = sqlite3OsRead(pIter->pFile, pIter->aAlloc, nRead, pIter->iReadOff); + iOff = getVarint32(pIter->aAlloc, nRec); + + if( rc==SQLITE_OK && (iOff+nRec)>nRead ){ + int nRead2; /* Number of extra bytes to read */ + if( (iOff+nRec)>pIter->nAlloc ){ + int nNew = pIter->nAlloc*2; + while( (iOff+nRec)>nNew ) nNew = nNew*2; + pIter->aAlloc = sqlite3DbReallocOrFree(db, pIter->aAlloc, nNew); + if( !pIter->aAlloc ) return SQLITE_NOMEM; + pIter->nAlloc = nNew; + } + + nRead2 = iOff + nRec - nRead; + rc = sqlite3OsRead( + pIter->pFile, &pIter->aAlloc[nRead], nRead2, pIter->iReadOff+nRead + ); + } + + assert( nRec>0 || rc!=SQLITE_OK ); + pIter->iReadOff += iOff+nRec; + pIter->nKey = nRec; + pIter->aKey = &pIter->aAlloc[iOff]; + return rc; +} + +/* +** Write a single varint, value iVal, to file-descriptor pFile. Return +** SQLITE_OK if successful, or an SQLite error code if some error occurs. +** +** The value of *piOffset when this function is called is used as the byte +** offset in file pFile to write to. Before returning, *piOffset is +** incremented by the number of bytes written. +*/ +static int vdbeSorterWriteVarint( + sqlite3_file *pFile, /* File to write to */ + i64 iVal, /* Value to write as a varint */ + i64 *piOffset /* IN/OUT: Write offset in file pFile */ +){ + u8 aVarint[9]; /* Buffer large enough for a varint */ + int nVarint; /* Number of used bytes in varint */ + int rc; /* Result of write() call */ + + nVarint = sqlite3PutVarint(aVarint, iVal); + rc = sqlite3OsWrite(pFile, aVarint, nVarint, *piOffset); + *piOffset += nVarint; + + return rc; +} + +/* +** Read a single varint from file-descriptor pFile. Return SQLITE_OK if +** successful, or an SQLite error code if some error occurs. +** +** The value of *piOffset when this function is called is used as the +** byte offset in file pFile from whence to read the varint. If successful +** (i.e. if no IO error occurs), then *piOffset is set to the offset of +** the first byte past the end of the varint before returning. *piVal is +** set to the integer value read. If an error occurs, the final values of +** both *piOffset and *piVal are undefined. +*/ +static int vdbeSorterReadVarint( + sqlite3_file *pFile, /* File to read from */ + i64 iEof, /* Total number of bytes in file */ + i64 *piOffset, /* IN/OUT: Read offset in pFile */ + i64 *piVal /* OUT: Value read from file */ +){ + u8 aVarint[9]; /* Buffer large enough for a varint */ + i64 iOff = *piOffset; /* Offset in file to read from */ + int nRead = 9; /* Number of bytes to read from file */ + int rc; /* Return code */ + + assert( iEof>iOff ); + if( (iEof-iOff)<nRead ){ + nRead = iEof-iOff; + } + + rc = sqlite3OsRead(pFile, aVarint, nRead, iOff); + if( rc==SQLITE_OK ){ + *piOffset += getVarint(aVarint, (u64 *)piVal); + } + + return rc; +} + +/* +** Initialize iterator pIter to scan through the PMA stored in file pFile +** starting at offset iStart and ending at offset iEof-1. This function +** leaves the iterator pointing to the first key in the PMA (or EOF if the +** PMA is empty). +*/ +static int vdbeSorterIterInit( + sqlite3 *db, /* Database handle */ + VdbeSorter *pSorter, /* Sorter object */ + i64 iStart, /* Start offset in pFile */ + VdbeSorterIter *pIter, /* Iterator to populate */ + i64 *pnByte /* IN/OUT: Increment this value by PMA size */ +){ + int rc; + + assert( pSorter->iWriteOff>iStart ); + assert( pIter->aAlloc==0 ); + pIter->pFile = pSorter->pTemp1; + pIter->iReadOff = iStart; + pIter->nAlloc = 128; + pIter->aAlloc = (u8 *)sqlite3DbMallocRaw(db, pIter->nAlloc); + if( !pIter->aAlloc ){ + rc = SQLITE_NOMEM; + }else{ + i64 iEof = pSorter->iWriteOff; /* EOF of file pSorter->pTemp1 */ + i64 nByte; /* Total size of PMA in bytes */ + rc = vdbeSorterReadVarint(pSorter->pTemp1, iEof, &pIter->iReadOff, &nByte); + *pnByte += nByte; + pIter->iEof = pIter->iReadOff + nByte; + } + if( rc==SQLITE_OK ){ + rc = vdbeSorterIterNext(db, pIter); + } + return rc; +} + +/* +** This function is called to compare two iterator keys when merging +** multiple b-tree segments. Parameter iOut is the index of the aTree[] +** value to recalculate. +*/ +static int vdbeSorterDoCompare(VdbeCursor *pCsr, int iOut){ + VdbeSorter *pSorter = pCsr->pSorter; + int i1; + int i2; + int iRes; + VdbeSorterIter *p1; + VdbeSorterIter *p2; + + assert( iOut<pSorter->nTree && iOut>0 ); + + if( iOut>=(pSorter->nTree/2) ){ + i1 = (iOut - pSorter->nTree/2) * 2; + i2 = i1 + 1; + }else{ + i1 = pSorter->aTree[iOut*2]; + i2 = pSorter->aTree[iOut*2+1]; + } + + p1 = &pSorter->aIter[i1]; + p2 = &pSorter->aIter[i2]; + + if( p1->pFile==0 ){ + iRes = i2; + }else if( p2->pFile==0 ){ + iRes = i1; + }else{ + char aSpace[150]; + UnpackedRecord *r1; + + r1 = sqlite3VdbeRecordUnpack( + pCsr->pKeyInfo, p1->nKey, p1->aKey, aSpace, sizeof(aSpace) + ); + if( r1==0 ) return SQLITE_NOMEM; + + if( sqlite3VdbeRecordCompare(p2->nKey, p2->aKey, r1)>=0 ){ + iRes = i1; + }else{ + iRes = i2; + } + sqlite3VdbeDeleteUnpackedRecord(r1); + } + + pSorter->aTree[iOut] = iRes; + return SQLITE_OK; +} + +/* +** Initialize the temporary index cursor just opened as a sorter cursor. +*/ +int sqlite3VdbeSorterInit(sqlite3 *db, VdbeCursor *pCsr){ + assert( pCsr->pKeyInfo && pCsr->pBt ); + pCsr->pSorter = sqlite3DbMallocZero(db, sizeof(VdbeSorter)); + return (pCsr->pSorter ? SQLITE_OK : SQLITE_NOMEM); +} + +/* +** Free any cursor components allocated by sqlite3VdbeSorterXXX routines. +*/ +void sqlite3VdbeSorterClose(sqlite3 *db, VdbeCursor *pCsr){ + VdbeSorter *pSorter = pCsr->pSorter; + if( pSorter ){ + if( pSorter->aIter ){ + int i; + for(i=0; i<pSorter->nTree; i++){ + vdbeSorterIterZero(db, &pSorter->aIter[i]); + } + sqlite3DbFree(db, pSorter->aIter); + } + if( pSorter->pTemp1 ){ + sqlite3OsCloseFree(pSorter->pTemp1); + } + sqlite3DbFree(db, pSorter); + pCsr->pSorter = 0; + } +} + +/* +** Allocate space for a file-handle and open a temporary file. If successful, +** set *ppFile to point to the malloc'd file-handle and return SQLITE_OK. +** Otherwise, set *ppFile to 0 and return an SQLite error code. +*/ +static int vdbeSorterOpenTempFile(sqlite3 *db, sqlite3_file **ppFile){ + int dummy; + return sqlite3OsOpenMalloc(db->pVfs, 0, ppFile, + SQLITE_OPEN_TEMP_JOURNAL | + SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | + SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE, &dummy + ); +} + + +/* +** Write the current contents of the b-tree to a PMA. Return SQLITE_OK +** if successful, or an SQLite error code otherwise. +** +** The format of a PMA is: +** +** * A varint. This varint contains the total number of bytes of content +** in the PMA (not including the varint itself). +** +** * One or more records packed end-to-end in order of ascending keys. +** Each record consists of a varint followed by a blob of data (the +** key). The varint is the number of bytes in the blob of data. +*/ +static int vdbeSorterBtreeToPMA(sqlite3 *db, VdbeCursor *pCsr){ + int rc = SQLITE_OK; /* Return code */ + VdbeSorter *pSorter = pCsr->pSorter; + int res = 0; + + /* sqlite3BtreeFirst() cannot fail because sorter btrees are always held + ** in memory and so an I/O error is not possible. */ + rc = sqlite3BtreeFirst(pCsr->pCursor, &res); + if( NEVER(rc!=SQLITE_OK) || res ) return rc; + assert( pSorter->nBtree>0 ); + + /* If the first temporary PMA file has not been opened, open it now. */ + if( pSorter->pTemp1==0 ){ + rc = vdbeSorterOpenTempFile(db, &pSorter->pTemp1); + assert( rc!=SQLITE_OK || pSorter->pTemp1 ); + assert( pSorter->iWriteOff==0 ); + assert( pSorter->nPMA==0 ); + } + + if( rc==SQLITE_OK ){ + i64 iWriteOff = pSorter->iWriteOff; + void *aMalloc = 0; /* Array used to hold a single record */ + int nMalloc = 0; /* Allocated size of aMalloc[] in bytes */ + + pSorter->nPMA++; + for( + rc = vdbeSorterWriteVarint(pSorter->pTemp1, pSorter->nBtree, &iWriteOff); + rc==SQLITE_OK && res==0; + rc = sqlite3BtreeNext(pCsr->pCursor, &res) + ){ + i64 nKey; /* Size of this key in bytes */ + + /* Write the size of the record in bytes to the output file */ + (void)sqlite3BtreeKeySize(pCsr->pCursor, &nKey); + rc = vdbeSorterWriteVarint(pSorter->pTemp1, nKey, &iWriteOff); + + /* Make sure the aMalloc[] buffer is large enough for the record */ + if( rc==SQLITE_OK && nKey>nMalloc ){ + aMalloc = sqlite3DbReallocOrFree(db, aMalloc, nKey); + if( !aMalloc ){ + rc = SQLITE_NOMEM; + }else{ + nMalloc = nKey; + } + } + + /* Write the record itself to the output file */ + if( rc==SQLITE_OK ){ + /* sqlite3BtreeKey() cannot fail because sorter btrees held in memory */ + rc = sqlite3BtreeKey(pCsr->pCursor, 0, nKey, aMalloc); + if( ALWAYS(rc==SQLITE_OK) ){ + rc = sqlite3OsWrite(pSorter->pTemp1, aMalloc, nKey, iWriteOff); + iWriteOff += nKey; + } + } + + if( rc!=SQLITE_OK ) break; + } + + /* This assert verifies that unless an error has occurred, the size of + ** the PMA on disk is the same as the expected size stored in + ** pSorter->nBtree. */ + assert( rc!=SQLITE_OK || pSorter->nBtree==( + iWriteOff-pSorter->iWriteOff-sqlite3VarintLen(pSorter->nBtree) + )); + + pSorter->iWriteOff = iWriteOff; + sqlite3DbFree(db, aMalloc); + } + + pSorter->nBtree = 0; + return rc; +} + +/* +** This function is called on a sorter cursor by the VDBE before each row +** is inserted into VdbeCursor.pCsr. Argument nKey is the size of the key, in +** bytes, about to be inserted. +** +** If it is determined that the temporary b-tree accessed via VdbeCursor.pCsr +** is large enough, its contents are written to a sorted PMA on disk and the +** tree emptied. This prevents the b-tree (which must be small enough to +** fit entirely in the cache in order to support efficient inserts) from +** growing too large. +** +** An SQLite error code is returned if an error occurs. Otherwise, SQLITE_OK. +*/ +int sqlite3VdbeSorterWrite(sqlite3 *db, VdbeCursor *pCsr, int nKey){ + int rc = SQLITE_OK; /* Return code */ + VdbeSorter *pSorter = pCsr->pSorter; + if( pSorter ){ + Pager *pPager = sqlite3BtreePager(pCsr->pBt); + int nPage; /* Current size of temporary file in pages */ + + /* Sorters never spill to disk */ + assert( sqlite3PagerFile(pPager)->pMethods==0 ); + + /* Determine how many pages the temporary b-tree has grown to */ + sqlite3PagerPagecount(pPager, &nPage); + + /* If pSorter->nWorking is still zero, but the temporary file has been + ** created in the file-system, then the most recent insert into the + ** current b-tree segment probably caused the cache to overflow (it is + ** also possible that sqlite3_release_memory() was called). So set the + ** size of the working set to a little less than the current size of the + ** file in pages. */ + if( pSorter->nWorking==0 && sqlite3PagerUnderStress(pPager) ){ + pSorter->nWorking = nPage-5; + if( pSorter->nWorking<SORTER_MIN_WORKING ){ + pSorter->nWorking = SORTER_MIN_WORKING; + } + } + + /* If the number of pages used by the current b-tree segment is greater + ** than the size of the working set (VdbeSorter.nWorking), start a new + ** segment b-tree. */ + if( pSorter->nWorking && nPage>=pSorter->nWorking ){ + BtCursor *p = pCsr->pCursor;/* Cursor structure to close and reopen */ + int iRoot; /* Root page of new tree */ + + /* Copy the current contents of the b-tree into a PMA in sorted order. + ** Close the currently open b-tree cursor. */ + rc = vdbeSorterBtreeToPMA(db, pCsr); + sqlite3BtreeCloseCursor(p); + + if( rc==SQLITE_OK ){ + rc = sqlite3BtreeDropTable(pCsr->pBt, 2, 0); +#ifdef SQLITE_DEBUG + sqlite3PagerPagecount(pPager, &nPage); + assert( rc!=SQLITE_OK || nPage==1 ); +#endif + } + if( rc==SQLITE_OK ){ + rc = sqlite3BtreeCreateTable(pCsr->pBt, &iRoot, BTREE_BLOBKEY); + } + if( rc==SQLITE_OK ){ + assert( iRoot==2 ); + rc = sqlite3BtreeCursor(pCsr->pBt, iRoot, 1, pCsr->pKeyInfo, p); + } + } + + pSorter->nBtree += sqlite3VarintLen(nKey) + nKey; + } + return rc; +} + +/* +** Helper function for sqlite3VdbeSorterRewind(). +*/ +static int vdbeSorterInitMerge( + sqlite3 *db, /* Database handle */ + VdbeCursor *pCsr, /* Cursor handle for this sorter */ + i64 *pnByte /* Sum of bytes in all opened PMAs */ +){ + VdbeSorter *pSorter = pCsr->pSorter; + int rc = SQLITE_OK; /* Return code */ + int i; /* Used to iterator through aIter[] */ + i64 nByte = 0; /* Total bytes in all opened PMAs */ + + /* Initialize the iterators. */ + for(i=0; rc==SQLITE_OK && i<SORTER_MAX_MERGE_COUNT; i++){ + VdbeSorterIter *pIter = &pSorter->aIter[i]; + rc = vdbeSorterIterInit(db, pSorter, pSorter->iReadOff, pIter, &nByte); + pSorter->iReadOff = pIter->iEof; + assert( pSorter->iReadOff<=pSorter->iWriteOff || rc!=SQLITE_OK ); + if( pSorter->iReadOff>=pSorter->iWriteOff ) break; + } + + /* Initialize the aTree[] array. */ + for(i=pSorter->nTree-1; rc==SQLITE_OK && i>0; i--){ + rc = vdbeSorterDoCompare(pCsr, i); + } + + *pnByte = nByte; + return rc; +} + +/* +** Once the sorter has been populated, this function is called to prepare +** for iterating through its contents in sorted order. +*/ +int sqlite3VdbeSorterRewind(sqlite3 *db, VdbeCursor *pCsr, int *pbEof){ + VdbeSorter *pSorter = pCsr->pSorter; + int rc; /* Return code */ + sqlite3_file *pTemp2 = 0; /* Second temp file to use */ + i64 iWrite2 = 0; /* Write offset for pTemp2 */ + int nIter; /* Number of iterators used */ + int nByte; /* Bytes of space required for aIter/aTree */ + int N = 2; /* Power of 2 >= nIter */ + + assert( pSorter ); + + /* Write the current b-tree to a PMA. Close the b-tree cursor. */ + rc = vdbeSorterBtreeToPMA(db, pCsr); + sqlite3BtreeCloseCursor(pCsr->pCursor); + if( rc!=SQLITE_OK ) return rc; + if( pSorter->nPMA==0 ){ + *pbEof = 1; + return SQLITE_OK; + } + + /* Allocate space for aIter[] and aTree[]. */ + nIter = pSorter->nPMA; + if( nIter>SORTER_MAX_MERGE_COUNT ) nIter = SORTER_MAX_MERGE_COUNT; + assert( nIter>0 ); + while( N<nIter ) N += N; + nByte = N * (sizeof(int) + sizeof(VdbeSorterIter)); + pSorter->aIter = (VdbeSorterIter *)sqlite3DbMallocZero(db, nByte); + if( !pSorter->aIter ) return SQLITE_NOMEM; + pSorter->aTree = (int *)&pSorter->aIter[N]; + pSorter->nTree = N; + + do { + int iNew; /* Index of new, merged, PMA */ + + for(iNew=0; + rc==SQLITE_OK && iNew*SORTER_MAX_MERGE_COUNT<pSorter->nPMA; + iNew++ + ){ + i64 nWrite; /* Number of bytes in new PMA */ + + /* If there are SORTER_MAX_MERGE_COUNT or less PMAs in file pTemp1, + ** initialize an iterator for each of them and break out of the loop. + ** These iterators will be incrementally merged as the VDBE layer calls + ** sqlite3VdbeSorterNext(). + ** + ** Otherwise, if pTemp1 contains more than SORTER_MAX_MERGE_COUNT PMAs, + ** initialize interators for SORTER_MAX_MERGE_COUNT of them. These PMAs + ** are merged into a single PMA that is written to file pTemp2. + */ + rc = vdbeSorterInitMerge(db, pCsr, &nWrite); + assert( rc!=SQLITE_OK || pSorter->aIter[ pSorter->aTree[1] ].pFile ); + if( rc!=SQLITE_OK || pSorter->nPMA<=SORTER_MAX_MERGE_COUNT ){ + break; + } + + /* Open the second temp file, if it is not already open. */ + if( pTemp2==0 ){ + assert( iWrite2==0 ); + rc = vdbeSorterOpenTempFile(db, &pTemp2); + } + + if( rc==SQLITE_OK ){ + rc = vdbeSorterWriteVarint(pTemp2, nWrite, &iWrite2); + } + + if( rc==SQLITE_OK ){ + int bEof = 0; + while( rc==SQLITE_OK && bEof==0 ){ + int nToWrite; + VdbeSorterIter *pIter = &pSorter->aIter[ pSorter->aTree[1] ]; + assert( pIter->pFile ); + nToWrite = pIter->nKey + sqlite3VarintLen(pIter->nKey); + rc = sqlite3OsWrite(pTemp2, pIter->aAlloc, nToWrite, iWrite2); + iWrite2 += nToWrite; + if( rc==SQLITE_OK ){ + rc = sqlite3VdbeSorterNext(db, pCsr, &bEof); + } + } + } + } + + if( pSorter->nPMA<=SORTER_MAX_MERGE_COUNT ){ + break; + }else{ + sqlite3_file *pTmp = pSorter->pTemp1; + pSorter->nPMA = iNew; + pSorter->pTemp1 = pTemp2; + pTemp2 = pTmp; + pSorter->iWriteOff = iWrite2; + pSorter->iReadOff = 0; + iWrite2 = 0; + } + }while( rc==SQLITE_OK ); + + if( pTemp2 ){ + sqlite3OsCloseFree(pTemp2); + } + *pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0); + return rc; +} + +/* +** Advance to the next element in the sorter. +*/ +int sqlite3VdbeSorterNext(sqlite3 *db, VdbeCursor *pCsr, int *pbEof){ + VdbeSorter *pSorter = pCsr->pSorter; + int iPrev = pSorter->aTree[1]; /* Index of iterator to advance */ + int i; /* Index of aTree[] to recalculate */ + int rc; /* Return code */ + + rc = vdbeSorterIterNext(db, &pSorter->aIter[iPrev]); + for(i=(pSorter->nTree+iPrev)/2; rc==SQLITE_OK && i>0; i=i/2){ + rc = vdbeSorterDoCompare(pCsr, i); + } + + *pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0); + return rc; +} + +/* +** Copy the current sorter key into the memory cell pOut. +*/ +int sqlite3VdbeSorterRowkey(VdbeCursor *pCsr, Mem *pOut){ + VdbeSorter *pSorter = pCsr->pSorter; + VdbeSorterIter *pIter; + + pIter = &pSorter->aIter[ pSorter->aTree[1] ]; + + /* Coverage testing note: As things are currently, this call will always + ** succeed. This is because the memory cell passed by the VDBE layer + ** happens to be the same one as was used to assemble the keys before they + ** were passed to the sorter - meaning it is always large enough for the + ** largest key. But this could change very easily, so we leave the call + ** to sqlite3VdbeMemGrow() in. */ + if( NEVER(sqlite3VdbeMemGrow(pOut, pIter->nKey, 0)) ){ + return SQLITE_NOMEM; + } + pOut->n = pIter->nKey; + MemSetTypeFlag(pOut, MEM_Blob); + memcpy(pOut->z, pIter->aKey, pIter->nKey); + + return SQLITE_OK; +} + +#endif /* #ifndef SQLITE_OMIT_MERGE_SORT */ @@ -1804,13 +1804,15 @@ int sqlite3WalClose( */ rc = sqlite3OsLock(pWal->pDbFd, SQLITE_LOCK_EXCLUSIVE); if( rc==SQLITE_OK ){ + int bPersistWal = -1; if( pWal->exclusiveMode==WAL_NORMAL_MODE ){ pWal->exclusiveMode = WAL_EXCLUSIVE_MODE; } rc = sqlite3WalCheckpoint( pWal, SQLITE_CHECKPOINT_PASSIVE, 0, 0, sync_flags, nBuf, zBuf, 0, 0 ); - if( rc==SQLITE_OK ){ + sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_PERSIST_WAL, &bPersistWal); + if( rc==SQLITE_OK && bPersistWal!=1 ){ isDelete = 1; } } diff --git a/src/where.c b/src/where.c index d31223286..21fb7f45f 100644 --- a/src/where.c +++ b/src/where.c @@ -2142,6 +2142,7 @@ static sqlite3_index_info *allocateIndexInfo( testcase( pTerm->eOperator==WO_IN ); testcase( pTerm->eOperator==WO_ISNULL ); if( pTerm->eOperator & (WO_IN|WO_ISNULL) ) continue; + if( pTerm->wtFlags & TERM_VNULL ) continue; nTerm++; } @@ -2192,6 +2193,7 @@ static sqlite3_index_info *allocateIndexInfo( testcase( pTerm->eOperator==WO_IN ); testcase( pTerm->eOperator==WO_ISNULL ); if( pTerm->eOperator & (WO_IN|WO_ISNULL) ) continue; + if( pTerm->wtFlags & TERM_VNULL ) continue; pIdxCons[j].iColumn = pTerm->u.leftColumn; pIdxCons[j].iTermOffset = i; pIdxCons[j].op = (u8)pTerm->eOperator; |