aboutsummaryrefslogtreecommitdiff
path: root/src/btree.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/btree.c')
-rwxr-xr-x[-rw-r--r--]src/btree.c3025
1 files changed, 1937 insertions, 1088 deletions
diff --git a/src/btree.c b/src/btree.c
index 56e99dcb6..5be30d562 100644..100755
--- a/src/btree.c
+++ b/src/btree.c
@@ -51,7 +51,7 @@ int sqlite3BtreeTrace=1; /* True to enable tracing */
#define BTALLOC_LE 2 /* Allocate any page <= the parameter */
/*
-** Macro IfNotOmitAV(x) returns (x) if SQLITE_OMIT_AUTOVACUUM is not
+** Macro IfNotOmitAV(x) returns (x) if SQLITE_OMIT_AUTOVACUUM is not
** defined, or 0 if it is. For example:
**
** bIncrVacuum = IfNotOmitAV(pBtShared->incrVacuum);
@@ -66,7 +66,7 @@ int sqlite3BtreeTrace=1; /* True to enable tracing */
/*
** A list of BtShared objects that are eligible for participation
** in shared cache. This variable has file scope during normal builds,
-** but the test harness needs to access it so we make it global for
+** but the test harness needs to access it so we make it global for
** test builds.
**
** Access to this variable is protected by SQLITE_MUTEX_STATIC_MAIN.
@@ -101,7 +101,7 @@ int sqlite3_enable_shared_cache(int enable){
** manipulate entries in the BtShared.pLock linked list used to store
** shared-cache table level locks. If the library is compiled with the
** shared-cache feature disabled, then there is only ever one user
- ** of each BtShared structure and so this locking is not necessary.
+ ** of each BtShared structure and so this locking is not necessary.
** So define the lock related functions as no-ops.
*/
#define querySharedCacheTableLock(a,b,c) SQLITE_OK
@@ -112,6 +112,17 @@ int sqlite3_enable_shared_cache(int enable){
#define hasReadConflicts(a, b) 0
#endif
+#ifdef SQLITE_DEBUG
+/*
+** Return and reset the seek counter for a Btree object.
+*/
+sqlite3_uint64 sqlite3BtreeSeekCount(Btree *pBt){
+ u64 n = pBt->nSeek;
+ pBt->nSeek = 0;
+ return n;
+}
+#endif
+
/*
** Implementation of the SQLITE_CORRUPT_PAGE() macro. Takes a single
** (MemPage*) as an argument. The (MemPage*) must not be NULL.
@@ -125,8 +136,8 @@ int sqlite3_enable_shared_cache(int enable){
int corruptPageError(int lineno, MemPage *p){
char *zMsg;
sqlite3BeginBenignMalloc();
- zMsg = sqlite3_mprintf("database corruption page %d of %s",
- (int)p->pgno, sqlite3PagerFilename(p->pBt->pPager, 0)
+ zMsg = sqlite3_mprintf("database corruption page %u of %s",
+ p->pgno, sqlite3PagerFilename(p->pBt->pPager, 0)
);
sqlite3EndBenignMalloc();
if( zMsg ){
@@ -146,15 +157,15 @@ int corruptPageError(int lineno, MemPage *p){
/*
**** This function is only used as part of an assert() statement. ***
**
-** Check to see if pBtree holds the required locks to read or write to the
+** Check to see if pBtree holds the required locks to read or write to the
** table with root page iRoot. Return 1 if it does and 0 if not.
**
-** For example, when writing to a table with root-page iRoot via
+** For example, when writing to a table with root-page iRoot via
** Btree connection pBtree:
**
** assert( hasSharedCacheTableLock(pBtree, iRoot, 0, WRITE_LOCK) );
**
-** When writing to an index that resides in a sharable database, the
+** When writing to an index that resides in a sharable database, the
** caller should have first obtained a lock specifying the root page of
** the corresponding table. This makes things a bit more complicated,
** as this module treats each table as a separate structure. To determine
@@ -176,7 +187,7 @@ static int hasSharedCacheTableLock(
BtLock *pLock;
/* If this database is not shareable, or if the client is reading
- ** and has the read-uncommitted flag set, then no lock is required.
+ ** and has the read-uncommitted flag set, then no lock is required.
** Return true immediately.
*/
if( (pBtree->sharable==0)
@@ -203,7 +214,7 @@ static int hasSharedCacheTableLock(
int bSeen = 0;
for(p=sqliteHashFirst(&pSchema->idxHash); p; p=sqliteHashNext(p)){
Index *pIdx = (Index *)sqliteHashData(p);
- if( pIdx->tnum==(int)iRoot ){
+ if( pIdx->tnum==iRoot ){
if( bSeen ){
/* Two or more indexes share the same root page. There must
** be imposter tables. So just return true. The assert is not
@@ -218,13 +229,13 @@ static int hasSharedCacheTableLock(
iTab = iRoot;
}
- /* Search for the required lock. Either a write-lock on root-page iTab, a
+ /* Search for the required lock. Either a write-lock on root-page iTab, a
** write-lock on the schema table, or (if the client is reading) a
** read-lock on iTab will suffice. Return 1 if any of these are found. */
for(pLock=pBtree->pBt->pLock; pLock; pLock=pLock->pNext){
- if( pLock->pBtree==pBtree
+ if( pLock->pBtree==pBtree
&& (pLock->iTable==iTab || (pLock->eLock==WRITE_LOCK && pLock->iTable==1))
- && pLock->eLock>=eLockType
+ && pLock->eLock>=eLockType
){
return 1;
}
@@ -257,7 +268,7 @@ static int hasSharedCacheTableLock(
static int hasReadConflicts(Btree *pBtree, Pgno iRoot){
BtCursor *p;
for(p=pBtree->pBt->pCursor; p; p=p->pNext){
- if( p->pgnoRoot==iRoot
+ if( p->pgnoRoot==iRoot
&& p->pBtree!=pBtree
&& 0==(p->pBtree->db->flags & SQLITE_ReadUncommit)
){
@@ -269,7 +280,7 @@ static int hasReadConflicts(Btree *pBtree, Pgno iRoot){
#endif /* #ifdef SQLITE_DEBUG */
/*
-** Query to see if Btree handle p may obtain a lock of type eLock
+** Query to see if Btree handle p may obtain a lock of type eLock
** (READ_LOCK or WRITE_LOCK) on the table with root-page iTab. Return
** SQLITE_OK if the lock may be obtained (by calling
** setSharedCacheTableLock()), or SQLITE_LOCKED if not.
@@ -282,14 +293,14 @@ static int querySharedCacheTableLock(Btree *p, Pgno iTab, u8 eLock){
assert( eLock==READ_LOCK || eLock==WRITE_LOCK );
assert( p->db!=0 );
assert( !(p->db->flags&SQLITE_ReadUncommit)||eLock==WRITE_LOCK||iTab==1 );
-
+
/* If requesting a write-lock, then the Btree must have an open write
- ** transaction on this file. And, obviously, for this to be so there
+ ** transaction on this file. And, obviously, for this to be so there
** must be an open write transaction on the file itself.
*/
assert( eLock==READ_LOCK || (p==pBt->pWriter && p->inTrans==TRANS_WRITE) );
assert( eLock==READ_LOCK || pBt->inTransaction==TRANS_WRITE );
-
+
/* This routine is a no-op if the shared-cache is not enabled */
if( !p->sharable ){
return SQLITE_OK;
@@ -304,7 +315,7 @@ static int querySharedCacheTableLock(Btree *p, Pgno iTab, u8 eLock){
}
for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){
- /* The condition (pIter->eLock!=eLock) in the following if(...)
+ /* The condition (pIter->eLock!=eLock) in the following if(...)
** statement is a simplification of:
**
** (eLock==WRITE_LOCK || pIter->eLock==WRITE_LOCK)
@@ -331,7 +342,7 @@ static int querySharedCacheTableLock(Btree *p, Pgno iTab, u8 eLock){
#ifndef SQLITE_OMIT_SHARED_CACHE
/*
** Add a lock on the table with root-page iTable to the shared-btree used
-** by Btree handle p. Parameter eLock must be either READ_LOCK or
+** by Btree handle p. Parameter eLock must be either READ_LOCK or
** WRITE_LOCK.
**
** This function assumes the following:
@@ -343,7 +354,7 @@ static int querySharedCacheTableLock(Btree *p, Pgno iTab, u8 eLock){
** with the requested lock (i.e. querySharedCacheTableLock() has
** already been called and returned SQLITE_OK).
**
-** SQLITE_OK is returned if the lock is added successfully. SQLITE_NOMEM
+** SQLITE_OK is returned if the lock is added successfully. SQLITE_NOMEM
** is returned if a malloc attempt fails.
*/
static int setSharedCacheTableLock(Btree *p, Pgno iTable, u8 eLock){
@@ -357,11 +368,11 @@ static int setSharedCacheTableLock(Btree *p, Pgno iTable, u8 eLock){
/* A connection with the read-uncommitted flag set will never try to
** obtain a read-lock using this function. The only read-lock obtained
- ** by a connection in read-uncommitted mode is on the sqlite_schema
+ ** by a connection in read-uncommitted mode is on the sqlite_schema
** table, and that lock is obtained in BtreeBeginTrans(). */
assert( 0==(p->db->flags&SQLITE_ReadUncommit) || eLock==WRITE_LOCK );
- /* This function should only be called on a sharable b-tree after it
+ /* This function should only be called on a sharable b-tree after it
** has been determined that no other b-tree holds a conflicting lock. */
assert( p->sharable );
assert( SQLITE_OK==querySharedCacheTableLock(p, iTable, eLock) );
@@ -406,7 +417,7 @@ static int setSharedCacheTableLock(Btree *p, Pgno iTable, u8 eLock){
** Release all the table locks (locks obtained via calls to
** the setSharedCacheTableLock() procedure) held by Btree object p.
**
-** This function assumes that Btree p has an open read or write
+** This function assumes that Btree p has an open read or write
** transaction. If it does not, then the BTS_PENDING flag
** may be incorrectly cleared.
*/
@@ -438,7 +449,7 @@ static void clearAllSharedCacheTableLocks(Btree *p){
pBt->pWriter = 0;
pBt->btsFlags &= ~(BTS_EXCLUSIVE|BTS_PENDING);
}else if( pBt->nTransaction==2 ){
- /* This function is called when Btree p is concluding its
+ /* This function is called when Btree p is concluding its
** transaction. If there currently exists a writer, and p is not
** that writer, then the number of locks held by connections other
** than the writer must be about to drop to zero. In this case
@@ -484,7 +495,7 @@ static int cursorHoldsMutex(BtCursor *p){
}
/* Verify that the cursor and the BtShared agree about what is the current
-** database connetion. This is important in shared-cache mode. If the database
+** database connetion. This is important in shared-cache mode. If the database
** connection pointers get out-of-sync, it is possible for routines like
** btreeInitPage() to reference an stale connection pointer that references a
** a connection that has already closed. This routine is used inside assert()
@@ -536,7 +547,7 @@ static void invalidateIncrblobCursors(
int isClearTable /* True if all rows are being deleted */
){
BtCursor *p;
- if( pBtree->hasIncrblobCur==0 ) return;
+ assert( pBtree->hasIncrblobCur );
assert( sqlite3BtreeHoldsMutex(pBtree) );
pBtree->hasIncrblobCur = 0;
for(p=pBtree->pBt->pCursor; p; p=p->pNext){
@@ -555,8 +566,8 @@ static void invalidateIncrblobCursors(
#endif /* SQLITE_OMIT_INCRBLOB */
/*
-** Set bit pgno of the BtShared.pHasContent bitvec. This is called
-** when a page that previously contained data becomes a free-list leaf
+** Set bit pgno of the BtShared.pHasContent bitvec. This is called
+** when a page that previously contained data becomes a free-list leaf
** page.
**
** The BtShared.pHasContent bitvec exists to work around an obscure
@@ -582,7 +593,7 @@ static void invalidateIncrblobCursors(
** may be lost. In the event of a rollback, it may not be possible
** to restore the database to its original configuration.
**
-** The solution is the BtShared.pHasContent bitvec. Whenever a page is
+** The solution is the BtShared.pHasContent bitvec. Whenever a page is
** moved to become a free-list leaf page, the corresponding bit is
** set in the bitvec. Whenever a leaf page is extracted from the free-list,
** optimization 2 above is omitted if the corresponding bit is already
@@ -643,13 +654,13 @@ static void btreeReleaseAllCursorPages(BtCursor *pCur){
** The cursor passed as the only argument must point to a valid entry
** when this function is called (i.e. have eState==CURSOR_VALID). This
** function saves the current cursor key in variables pCur->nKey and
-** pCur->pKey. SQLITE_OK is returned if successful or an SQLite error
+** pCur->pKey. SQLITE_OK is returned if successful or an SQLite error
** code otherwise.
**
** If the cursor is open on an intkey table, then the integer key
** (the rowid) is stored in pCur->nKey and pCur->pKey is left set to
-** NULL. If the cursor is open on a non-intkey table, then pCur->pKey is
-** set to point to a malloced buffer pCur->nKey bytes in size containing
+** NULL. If the cursor is open on a non-intkey table, then pCur->pKey is
+** set to point to a malloced buffer pCur->nKey bytes in size containing
** the key.
*/
static int saveCursorKey(BtCursor *pCur){
@@ -665,8 +676,8 @@ static int saveCursorKey(BtCursor *pCur){
/* For an index btree, save the complete key content. It is possible
** that the current key is corrupt. In that case, it is possible that
** the sqlite3VdbeRecordUnpack() function may overread the buffer by
- ** up to the size of 1 varint plus 1 8-byte value when the cursor
- ** position is restored. Hence the 17 bytes of padding allocated
+ ** up to the size of 1 varint plus 1 8-byte value when the cursor
+ ** position is restored. Hence the 17 bytes of padding allocated
** below. */
void *pKey;
pCur->nKey = sqlite3BtreePayloadSize(pCur);
@@ -688,11 +699,11 @@ static int saveCursorKey(BtCursor *pCur){
}
/*
-** Save the current cursor position in the variables BtCursor.nKey
+** Save the current cursor position in the variables BtCursor.nKey
** and BtCursor.pKey. The cursor's state is set to CURSOR_REQUIRESEEK.
**
** The caller must ensure that the cursor is valid (has eState==CURSOR_VALID)
-** prior to calling this routine.
+** prior to calling this routine.
*/
static int saveCursorPosition(BtCursor *pCur){
int rc;
@@ -731,7 +742,7 @@ static int SQLITE_NOINLINE saveCursorsOnList(BtCursor*,Pgno,BtCursor*);
** routine is called just before cursor pExcept is used to modify the
** table, for example in BtreeDelete() or BtreeInsert().
**
-** If there are two or more cursors on the same btree, then all such
+** If there are two or more cursors on the same btree, then all such
** cursors should have their BTCF_Multiple flag set. The btreeCursor()
** routine enforces that rule. This routine only needs to be called in
** the uncommon case when pExpect has the BTCF_Multiple flag set.
@@ -796,7 +807,7 @@ void sqlite3BtreeClearCursor(BtCursor *pCur){
/*
** In this version of BtreeMoveto, pKey is a packed index record
** such as is generated by the OP_MakeRecord opcode. Unpack the
-** record and then call BtreeMovetoUnpacked() to do the work.
+** record and then call sqlite3BtreeIndexMoveto() to do the work.
*/
static int btreeMoveto(
BtCursor *pCur, /* Cursor open on the btree to be searched */
@@ -816,24 +827,22 @@ static int btreeMoveto(
sqlite3VdbeRecordUnpack(pKeyInfo, (int)nKey, pKey, pIdxKey);
if( pIdxKey->nField==0 || pIdxKey->nField>pKeyInfo->nAllField ){
rc = SQLITE_CORRUPT_BKPT;
- goto moveto_done;
+ }else{
+ rc = sqlite3BtreeIndexMoveto(pCur, pIdxKey, pRes);
}
+ sqlite3DbFree(pCur->pKeyInfo->db, pIdxKey);
}else{
pIdxKey = 0;
- }
- rc = sqlite3BtreeMovetoUnpacked(pCur, pIdxKey, nKey, bias, pRes);
-moveto_done:
- if( pIdxKey ){
- sqlite3DbFree(pCur->pKeyInfo->db, pIdxKey);
+ rc = sqlite3BtreeTableMoveto(pCur, nKey, bias, pRes);
}
return rc;
}
/*
** Restore the cursor to the position it was in (or as close to as possible)
-** when saveCursorPosition() was called. Note that this call deletes the
+** when saveCursorPosition() was called. Note that this call deletes the
** saved position info stored by saveCursorPosition(), so there can be
-** at most one effective restoreCursorPosition() call after each
+** at most one effective restoreCursorPosition() call after each
** saveCursorPosition().
*/
static int btreeRestoreCursorPosition(BtCursor *pCur){
@@ -901,7 +910,7 @@ BtCursor *sqlite3BtreeFakeValidCursor(void){
/*
** This routine restores a cursor back to its original position after it
** has been moved by some outside activity (such as a btree rebalance or
-** a row having been deleted out from under the cursor).
+** a row having been deleted out from under the cursor).
**
** On success, the *pDifferentRow parameter is false if the cursor is left
** pointing at exactly the same row. *pDifferntRow is the row the cursor
@@ -937,8 +946,25 @@ int sqlite3BtreeCursorRestore(BtCursor *pCur, int *pDifferentRow){
*/
void sqlite3BtreeCursorHint(BtCursor *pCur, int eHintType, ...){
/* Used only by system that substitute their own storage engine */
+#ifdef SQLITE_DEBUG
+ if( ALWAYS(eHintType==BTREE_HINT_RANGE) ){
+ va_list ap;
+ Expr *pExpr;
+ Walker w;
+ memset(&w, 0, sizeof(w));
+ w.xExprCallback = sqlite3CursorRangeHintExprCheck;
+ va_start(ap, eHintType);
+ pExpr = va_arg(ap, Expr*);
+ w.u.aMem = va_arg(ap, Mem*);
+ va_end(ap);
+ assert( pExpr!=0 );
+ assert( w.u.aMem!=0 );
+ sqlite3WalkExpr(&w, pExpr);
+ }
+#endif /* SQLITE_DEBUG */
}
-#endif
+#endif /* SQLITE_ENABLE_CURSOR_HINTS */
+
/*
** Provide flag hints to the cursor.
@@ -966,7 +992,7 @@ static Pgno ptrmapPageno(BtShared *pBt, Pgno pgno){
if( pgno<2 ) return 0;
nPagesPerMapPage = (pBt->usableSize/5)+1;
iPtrMap = (pgno-2)/nPagesPerMapPage;
- ret = (iPtrMap*nPagesPerMapPage) + 2;
+ ret = (iPtrMap*nPagesPerMapPage) + 2;
if( ret==PENDING_BYTE_PAGE(pBt) ){
ret++;
}
@@ -1023,7 +1049,7 @@ static void ptrmapPut(BtShared *pBt, Pgno key, u8 eType, Pgno parent, int *pRC){
pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage);
if( eType!=pPtrmap[offset] || get4byte(&pPtrmap[offset+1])!=parent ){
- TRACE(("PTRMAP_UPDATE: %d->(%d,%d)\n", key, eType, parent));
+ TRACE(("PTRMAP_UPDATE: %u->(%u,%u)\n", key, eType, parent));
*pRC= rc = sqlite3PagerWrite(pDbPage);
if( rc==SQLITE_OK ){
pPtrmap[offset] = eType;
@@ -1133,6 +1159,24 @@ static SQLITE_NOINLINE void btreeParseCellAdjustSizeForOverflow(
}
/*
+** Given a record with nPayload bytes of payload stored within btree
+** page pPage, return the number of bytes of payload stored locally.
+*/
+static int btreePayloadToLocal(MemPage *pPage, i64 nPayload){
+ int maxLocal; /* Maximum amount of payload held locally */
+ maxLocal = pPage->maxLocal;
+ if( nPayload<=maxLocal ){
+ return nPayload;
+ }else{
+ int minLocal; /* Minimum amount of payload held locally */
+ int surplus; /* Overflow payload available for local storage */
+ minLocal = pPage->minLocal;
+ surplus = minLocal + (nPayload - minLocal)%(pPage->pBt->usableSize-4);
+ return ( surplus <= maxLocal ) ? surplus : minLocal;
+ }
+}
+
+/*
** The following routines are implementations of the MemPage.xParseCell()
** method.
**
@@ -1198,19 +1242,37 @@ static void btreeParseCellPtr(
**
** pIter += getVarint(pIter, (u64*)&pInfo->nKey);
**
- ** The code is inlined to avoid a function call.
+ ** The code is inlined and the loop is unrolled for performance.
+ ** This routine is a high-runner.
*/
iKey = *pIter;
if( iKey>=0x80 ){
- u8 *pEnd = &pIter[7];
- iKey &= 0x7f;
- while(1){
- iKey = (iKey<<7) | (*++pIter & 0x7f);
- if( (*pIter)<0x80 ) break;
- if( pIter>=pEnd ){
- iKey = (iKey<<8) | *++pIter;
- break;
+ u8 x;
+ iKey = (iKey<<7) ^ (x = *++pIter);
+ if( x>=0x80 ){
+ iKey = (iKey<<7) ^ (x = *++pIter);
+ if( x>=0x80 ){
+ iKey = (iKey<<7) ^ 0x10204000 ^ (x = *++pIter);
+ if( x>=0x80 ){
+ iKey = (iKey<<7) ^ 0x4000 ^ (x = *++pIter);
+ if( x>=0x80 ){
+ iKey = (iKey<<7) ^ 0x4000 ^ (x = *++pIter);
+ if( x>=0x80 ){
+ iKey = (iKey<<7) ^ 0x4000 ^ (x = *++pIter);
+ if( x>=0x80 ){
+ iKey = (iKey<<7) ^ 0x4000 ^ (x = *++pIter);
+ if( x>=0x80 ){
+ iKey = (iKey<<8) ^ 0x8000 ^ (*++pIter);
+ }
+ }
+ }
+ }
+ }
+ }else{
+ iKey ^= 0x204000;
}
+ }else{
+ iKey ^= 0x4000;
}
}
pIter++;
@@ -1219,7 +1281,7 @@ static void btreeParseCellPtr(
pInfo->nPayload = nPayload;
pInfo->pPayload = pIter;
testcase( nPayload==pPage->maxLocal );
- testcase( nPayload==pPage->maxLocal+1 );
+ testcase( nPayload==(u32)pPage->maxLocal+1 );
if( nPayload<=pPage->maxLocal ){
/* This is the (easy) common case where the entire payload fits
** on the local page. No overflow is required.
@@ -1256,7 +1318,7 @@ static void btreeParseCellPtrIndex(
pInfo->nPayload = nPayload;
pInfo->pPayload = pIter;
testcase( nPayload==pPage->maxLocal );
- testcase( nPayload==pPage->maxLocal+1 );
+ testcase( nPayload==(u32)pPage->maxLocal+1 );
if( nPayload<=pPage->maxLocal ){
/* This is the (easy) common case where the entire payload fits
** on the local page. No overflow is required.
@@ -1286,10 +1348,12 @@ static void btreeParseCell(
** the space used by the cell pointer.
**
** cellSizePtrNoPayload() => table internal nodes
-** cellSizePtr() => all index nodes & table leaf nodes
+** cellSizePtrTableLeaf() => table leaf nodes
+** cellSizePtr() => index internal nodes
+** cellSizeIdxLeaf() => index leaf nodes
*/
static u16 cellSizePtr(MemPage *pPage, u8 *pCell){
- u8 *pIter = pCell + pPage->childPtrSize; /* For looping over bytes of pCell */
+ u8 *pIter = pCell + 4; /* For looping over bytes of pCell */
u8 *pEnd; /* End mark for a varint */
u32 nSize; /* Size value to return */
@@ -1302,6 +1366,7 @@ static u16 cellSizePtr(MemPage *pPage, u8 *pCell){
pPage->xParseCell(pPage, pCell, &debuginfo);
#endif
+ assert( pPage->childPtrSize==4 );
nSize = *pIter;
if( nSize>=0x80 ){
pEnd = &pIter[8];
@@ -1311,15 +1376,50 @@ static u16 cellSizePtr(MemPage *pPage, u8 *pCell){
}while( *(pIter)>=0x80 && pIter<pEnd );
}
pIter++;
- if( pPage->intKey ){
- /* pIter now points at the 64-bit integer key value, a variable length
- ** integer. The following block moves pIter to point at the first byte
- ** past the end of the key value. */
- pEnd = &pIter[9];
- while( (*pIter++)&0x80 && pIter<pEnd );
+ testcase( nSize==pPage->maxLocal );
+ testcase( nSize==(u32)pPage->maxLocal+1 );
+ if( nSize<=pPage->maxLocal ){
+ nSize += (u32)(pIter - pCell);
+ assert( nSize>4 );
+ }else{
+ int minLocal = pPage->minLocal;
+ nSize = minLocal + (nSize - minLocal) % (pPage->pBt->usableSize - 4);
+ testcase( nSize==pPage->maxLocal );
+ testcase( nSize==(u32)pPage->maxLocal+1 );
+ if( nSize>pPage->maxLocal ){
+ nSize = minLocal;
+ }
+ nSize += 4 + (u16)(pIter - pCell);
}
+ assert( nSize==debuginfo.nSize || CORRUPT_DB );
+ return (u16)nSize;
+}
+static u16 cellSizePtrIdxLeaf(MemPage *pPage, u8 *pCell){
+ u8 *pIter = pCell; /* For looping over bytes of pCell */
+ u8 *pEnd; /* End mark for a varint */
+ u32 nSize; /* Size value to return */
+
+#ifdef SQLITE_DEBUG
+ /* The value returned by this function should always be the same as
+ ** the (CellInfo.nSize) value found by doing a full parse of the
+ ** cell. If SQLITE_DEBUG is defined, an assert() at the bottom of
+ ** this function verifies that this invariant is not violated. */
+ CellInfo debuginfo;
+ pPage->xParseCell(pPage, pCell, &debuginfo);
+#endif
+
+ assert( pPage->childPtrSize==0 );
+ nSize = *pIter;
+ if( nSize>=0x80 ){
+ pEnd = &pIter[8];
+ nSize &= 0x7f;
+ do{
+ nSize = (nSize<<7) | (*++pIter & 0x7f);
+ }while( *(pIter)>=0x80 && pIter<pEnd );
+ }
+ pIter++;
testcase( nSize==pPage->maxLocal );
- testcase( nSize==pPage->maxLocal+1 );
+ testcase( nSize==(u32)pPage->maxLocal+1 );
if( nSize<=pPage->maxLocal ){
nSize += (u32)(pIter - pCell);
if( nSize<4 ) nSize = 4;
@@ -1327,7 +1427,7 @@ static u16 cellSizePtr(MemPage *pPage, u8 *pCell){
int minLocal = pPage->minLocal;
nSize = minLocal + (nSize - minLocal) % (pPage->pBt->usableSize - 4);
testcase( nSize==pPage->maxLocal );
- testcase( nSize==pPage->maxLocal+1 );
+ testcase( nSize==(u32)pPage->maxLocal+1 );
if( nSize>pPage->maxLocal ){
nSize = minLocal;
}
@@ -1357,6 +1457,58 @@ static u16 cellSizePtrNoPayload(MemPage *pPage, u8 *pCell){
assert( debuginfo.nSize==(u16)(pIter - pCell) || CORRUPT_DB );
return (u16)(pIter - pCell);
}
+static u16 cellSizePtrTableLeaf(MemPage *pPage, u8 *pCell){
+ u8 *pIter = pCell; /* For looping over bytes of pCell */
+ u8 *pEnd; /* End mark for a varint */
+ u32 nSize; /* Size value to return */
+
+#ifdef SQLITE_DEBUG
+ /* The value returned by this function should always be the same as
+ ** the (CellInfo.nSize) value found by doing a full parse of the
+ ** cell. If SQLITE_DEBUG is defined, an assert() at the bottom of
+ ** this function verifies that this invariant is not violated. */
+ CellInfo debuginfo;
+ pPage->xParseCell(pPage, pCell, &debuginfo);
+#endif
+
+ nSize = *pIter;
+ if( nSize>=0x80 ){
+ pEnd = &pIter[8];
+ nSize &= 0x7f;
+ do{
+ nSize = (nSize<<7) | (*++pIter & 0x7f);
+ }while( *(pIter)>=0x80 && pIter<pEnd );
+ }
+ pIter++;
+ /* pIter now points at the 64-bit integer key value, a variable length
+ ** integer. The following block moves pIter to point at the first byte
+ ** past the end of the key value. */
+ if( (*pIter++)&0x80
+ && (*pIter++)&0x80
+ && (*pIter++)&0x80
+ && (*pIter++)&0x80
+ && (*pIter++)&0x80
+ && (*pIter++)&0x80
+ && (*pIter++)&0x80
+ && (*pIter++)&0x80 ){ pIter++; }
+ testcase( nSize==pPage->maxLocal );
+ testcase( nSize==(u32)pPage->maxLocal+1 );
+ if( nSize<=pPage->maxLocal ){
+ nSize += (u32)(pIter - pCell);
+ if( nSize<4 ) nSize = 4;
+ }else{
+ int minLocal = pPage->minLocal;
+ nSize = minLocal + (nSize - minLocal) % (pPage->pBt->usableSize - 4);
+ testcase( nSize==pPage->maxLocal );
+ testcase( nSize==(u32)pPage->maxLocal+1 );
+ if( nSize>pPage->maxLocal ){
+ nSize = minLocal;
+ }
+ nSize += 4 + (u16)(pIter - pCell);
+ }
+ assert( nSize==debuginfo.nSize || CORRUPT_DB );
+ return (u16)nSize;
+}
#ifdef SQLITE_DEBUG
@@ -1370,7 +1522,7 @@ static u16 cellSize(MemPage *pPage, int iCell){
#ifndef SQLITE_OMIT_AUTOVACUUM
/*
** The cell pCell is currently part of page pSrc but will ultimately be part
-** of pPage. (pSrc and pPager are often the same.) If pCell contains a
+** of pPage. (pSrc and pPage are often the same.) If pCell contains a
** pointer to an overflow page, insert an entry into the pointer-map for
** the overflow page that will be valid after pCell has been moved to pPage.
*/
@@ -1381,7 +1533,7 @@ static void ptrmapPutOvflPtr(MemPage *pPage, MemPage *pSrc, u8 *pCell,int *pRC){
pPage->xParseCell(pPage, pCell, &info);
if( info.nLocal<info.nPayload ){
Pgno ovfl;
- if( SQLITE_WITHIN(pSrc->aDataEnd, pCell, pCell+info.nLocal) ){
+ if( SQLITE_OVERFLOW(pSrc->aDataEnd, pCell, pCell+info.nLocal) ){
testcase( pSrc!=pPage );
*pRC = SQLITE_CORRUPT_BKPT;
return;
@@ -1419,14 +1571,14 @@ static int defragmentPage(MemPage *pPage, int nMaxFrag){
unsigned char *src; /* Source of content */
int iCellFirst; /* First allowable cell index */
int iCellLast; /* Last possible cell index */
+ int iCellStart; /* First cell offset in input */
assert( sqlite3PagerIswriteable(pPage->pDbPage) );
assert( pPage->pBt!=0 );
assert( pPage->pBt->usableSize <= SQLITE_MAX_PAGE_SIZE );
assert( pPage->nOverflow==0 );
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- temp = 0;
- src = data = pPage->aData;
+ data = pPage->aData;
hdr = pPage->hdrOffset;
cellOffset = pPage->cellOffset;
nCell = pPage->nCell;
@@ -1437,7 +1589,7 @@ static int defragmentPage(MemPage *pPage, int nMaxFrag){
/* This block handles pages with two or fewer free blocks and nMaxFrag
** or fewer fragmented bytes. In this case it is faster to move the
** two (or one) blocks of cells using memmove() and add the required
- ** offsets to each pointer in the cell-pointer array than it is to
+ ** offsets to each pointer in the cell-pointer array than it is to
** reconstruct the entire page. */
if( (int)data[hdr+7]<=nMaxFrag ){
int iFree = get2byte(&data[hdr+1]);
@@ -1460,7 +1612,7 @@ static int defragmentPage(MemPage *pPage, int nMaxFrag){
if( iFree2+sz2 > usableSize ) return SQLITE_CORRUPT_PAGE(pPage);
memmove(&data[iFree+sz+sz2], &data[iFree+sz], iFree2-(iFree+sz));
sz += sz2;
- }else if( NEVER(iFree+sz>usableSize) ){
+ }else if( iFree+sz>usableSize ){
return SQLITE_CORRUPT_PAGE(pPage);
}
@@ -1479,41 +1631,39 @@ static int defragmentPage(MemPage *pPage, int nMaxFrag){
cbrk = usableSize;
iCellLast = usableSize - 4;
- for(i=0; i<nCell; i++){
- u8 *pAddr; /* The i-th cell pointer */
- pAddr = &data[cellOffset + i*2];
- pc = get2byte(pAddr);
- testcase( pc==iCellFirst );
- testcase( pc==iCellLast );
- /* These conditions have already been verified in btreeInitPage()
- ** if PRAGMA cell_size_check=ON.
- */
- if( pc<iCellFirst || pc>iCellLast ){
- return SQLITE_CORRUPT_PAGE(pPage);
- }
- assert( pc>=iCellFirst && pc<=iCellLast );
- size = pPage->xCellSize(pPage, &src[pc]);
- cbrk -= size;
- if( cbrk<iCellFirst || pc+size>usableSize ){
- return SQLITE_CORRUPT_PAGE(pPage);
- }
- assert( cbrk+size<=usableSize && cbrk>=iCellFirst );
- testcase( cbrk+size==usableSize );
- testcase( pc+size==usableSize );
- put2byte(pAddr, cbrk);
- if( temp==0 ){
- int x;
- if( cbrk==pc ) continue;
- temp = sqlite3PagerTempSpace(pPage->pBt->pPager);
- x = get2byte(&data[hdr+5]);
- memcpy(&temp[x], &data[x], (cbrk+size) - x);
- src = temp;
+ iCellStart = get2byte(&data[hdr+5]);
+ if( nCell>0 ){
+ temp = sqlite3PagerTempSpace(pPage->pBt->pPager);
+ memcpy(temp, data, usableSize);
+ src = temp;
+ for(i=0; i<nCell; i++){
+ u8 *pAddr; /* The i-th cell pointer */
+ pAddr = &data[cellOffset + i*2];
+ pc = get2byte(pAddr);
+ testcase( pc==iCellFirst );
+ testcase( pc==iCellLast );
+ /* These conditions have already been verified in btreeInitPage()
+ ** if PRAGMA cell_size_check=ON.
+ */
+ if( pc>iCellLast ){
+ return SQLITE_CORRUPT_PAGE(pPage);
+ }
+ assert( pc>=0 && pc<=iCellLast );
+ size = pPage->xCellSize(pPage, &src[pc]);
+ cbrk -= size;
+ if( cbrk<iCellStart || pc+size>usableSize ){
+ return SQLITE_CORRUPT_PAGE(pPage);
+ }
+ assert( cbrk+size<=usableSize && cbrk>=iCellStart );
+ testcase( cbrk+size==usableSize );
+ testcase( pc+size==usableSize );
+ put2byte(pAddr, cbrk);
+ memcpy(&data[cbrk], &src[pc], size);
}
- memcpy(&data[cbrk], &src[pc], size);
}
data[hdr+7] = 0;
- defragment_out:
+defragment_out:
assert( pPage->nFree>=0 );
if( data[hdr+7]+cbrk-iCellFirst!=pPage->nFree ){
return SQLITE_CORRUPT_PAGE(pPage);
@@ -1545,7 +1695,8 @@ static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc){
const int hdr = pPg->hdrOffset; /* Offset to page header */
u8 * const aData = pPg->aData; /* Page data */
int iAddr = hdr + 1; /* Address of ptr to pc */
- int pc = get2byte(&aData[iAddr]); /* Address of a free slot */
+ u8 *pTmp = &aData[iAddr]; /* Temporary ptr into aData[] */
+ int pc = get2byte(pTmp); /* Address of a free slot */
int x; /* Excess size of the slot */
int maxPC = pPg->pBt->usableSize - nByte; /* Max address for a usable slot */
int size; /* Size of the free slot */
@@ -1555,7 +1706,8 @@ static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc){
/* EVIDENCE-OF: R-22710-53328 The third and fourth bytes of each
** freeblock form a big-endian integer which is the size of the freeblock
** in bytes, including the 4-byte header. */
- size = get2byte(&aData[pc+2]);
+ pTmp = &aData[pc+2];
+ size = get2byte(pTmp);
if( (x = size - nByte)>=0 ){
testcase( x==4 );
testcase( x==3 );
@@ -1568,6 +1720,7 @@ static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc){
** fragmented bytes within the page. */
memcpy(&aData[iAddr], &aData[pc], 2);
aData[hdr+7] += (u8)x;
+ return &aData[pc];
}else if( x+pc > maxPC ){
/* This slot extends off the end of the usable part of the page */
*pRc = SQLITE_CORRUPT_PAGE(pPg);
@@ -1580,10 +1733,11 @@ static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc){
return &aData[pc + x];
}
iAddr = pc;
- pc = get2byte(&aData[pc]);
- if( pc<=iAddr+size ){
+ pTmp = &aData[pc];
+ pc = get2byte(pTmp);
+ if( pc<=iAddr ){
if( pc ){
- /* The next slot in the chain is not past the end of the current slot */
+ /* The next slot in the chain comes before the current slot */
*pRc = SQLITE_CORRUPT_PAGE(pPg);
}
return 0;
@@ -1609,13 +1763,14 @@ static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc){
** allocation is being made in order to insert a new cell, so we will
** also end up needing a new cell pointer.
*/
-static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){
+static SQLITE_INLINE int allocateSpace(MemPage *pPage, int nByte, int *pIdx){
const int hdr = pPage->hdrOffset; /* Local cache of pPage->hdrOffset */
u8 * const data = pPage->aData; /* Local cache of pPage->aData */
int top; /* First byte of cell content area */
int rc = SQLITE_OK; /* Integer return code */
+ u8 *pTmp; /* Temp ptr into data[] */
int gap; /* First byte of gap between cell pointers and cell content */
-
+
assert( sqlite3PagerIswriteable(pPage->pDbPage) );
assert( pPage->pBt );
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
@@ -1632,14 +1787,16 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){
** then the cell content offset of an empty page wants to be 65536.
** However, that integer is too large to be stored in a 2-byte unsigned
** integer, so a value of 0 is used in its place. */
- top = get2byte(&data[hdr+5]);
- assert( top<=(int)pPage->pBt->usableSize ); /* by btreeComputeFreeSpace() */
+ pTmp = &data[hdr+5];
+ top = get2byte(pTmp);
if( gap>top ){
if( top==0 && pPage->pBt->usableSize==65536 ){
top = 65536;
}else{
return SQLITE_CORRUPT_PAGE(pPage);
}
+ }else if( top>(int)pPage->pBt->usableSize ){
+ return SQLITE_CORRUPT_PAGE(pPage);
}
/* If there is enough space between gap and top for one more cell pointer,
@@ -1655,7 +1812,7 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){
int g2;
assert( pSpace+nByte<=data+pPage->pBt->usableSize );
*pIdx = g2 = (int)(pSpace-data);
- if( NEVER(g2<=gap) ){
+ if( g2<=gap ){
return SQLITE_CORRUPT_PAGE(pPage);
}else{
return SQLITE_OK;
@@ -1701,7 +1858,7 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){
**
** Even though the freeblock list was checked by btreeComputeFreeSpace(),
** that routine will not detect overlap between cells or freeblocks. Nor
-** does it detect cells or freeblocks that encrouch into the reserved bytes
+** does it detect cells or freeblocks that encroach into the reserved bytes
** at the end of the page. So do additional corruption checks inside this
** routine and return SQLITE_CORRUPT if any problems are found.
*/
@@ -1714,6 +1871,7 @@ static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){
u16 x; /* Offset to cell content area */
u32 iEnd = iStart + iSize; /* First byte past the iStart buffer */
unsigned char *data = pPage->aData; /* Page content */
+ u8 *pTmp; /* Temporary ptr into data[] */
assert( pPage->pBt!=0 );
assert( sqlite3PagerIswriteable(pPage->pDbPage) );
@@ -1721,9 +1879,9 @@ static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){
assert( CORRUPT_DB || iEnd <= pPage->pBt->usableSize );
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
assert( iSize>=4 ); /* Minimum cell size is 4 */
- assert( iStart<=pPage->pBt->usableSize-4 );
+ assert( CORRUPT_DB || iStart<=pPage->pBt->usableSize-4 );
- /* The list of freeblocks must be in ascending order. Find the
+ /* The list of freeblocks must be in ascending order. Find the
** spot on the list where iStart should be inserted.
*/
hdr = pPage->hdrOffset;
@@ -1732,7 +1890,7 @@ static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){
iFreeBlk = 0; /* Shortcut for the case when the freelist is empty */
}else{
while( (iFreeBlk = get2byte(&data[iPtr]))<iStart ){
- if( iFreeBlk<iPtr+4 ){
+ if( iFreeBlk<=iPtr ){
if( iFreeBlk==0 ) break; /* TH3: corrupt082.100 */
return SQLITE_CORRUPT_PAGE(pPage);
}
@@ -1741,8 +1899,8 @@ static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){
if( iFreeBlk>pPage->pBt->usableSize-4 ){ /* TH3: corrupt081.100 */
return SQLITE_CORRUPT_PAGE(pPage);
}
- assert( iFreeBlk>iPtr || iFreeBlk==0 );
-
+ assert( iFreeBlk>iPtr || iFreeBlk==0 || CORRUPT_DB );
+
/* At this point:
** iFreeBlk: First freeblock after iStart, or zero if none
** iPtr: The address of a pointer to iFreeBlk
@@ -1759,7 +1917,7 @@ static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){
iSize = iEnd - iStart;
iFreeBlk = get2byte(&data[iFreeBlk]);
}
-
+
/* If iPtr is another freeblock (that is, if iPtr is not the freelist
** pointer in the page header) then check to see if iStart should be
** coalesced onto the end of iPtr.
@@ -1776,7 +1934,13 @@ static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){
if( nFrag>data[hdr+7] ) return SQLITE_CORRUPT_PAGE(pPage);
data[hdr+7] -= nFrag;
}
- x = get2byte(&data[hdr+5]);
+ pTmp = &data[hdr+5];
+ x = get2byte(pTmp);
+ if( pPage->pBt->btsFlags & BTS_FAST_SECURE ){
+ /* Overwrite deleted information with zeros when the secure_delete
+ ** option is enabled */
+ memset(&data[iStart], 0, iSize);
+ }
if( iStart<=x ){
/* The new freeblock is at the beginning of the cell content area,
** so just extend the cell content area rather than create another
@@ -1788,14 +1952,9 @@ static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){
}else{
/* Insert the new freeblock into the freelist */
put2byte(&data[iPtr], iStart);
+ put2byte(&data[iStart], iFreeBlk);
+ put2byte(&data[iStart+2], iSize);
}
- if( pPage->pBt->btsFlags & BTS_FAST_SECURE ){
- /* Overwrite deleted information with zeros when the secure_delete
- ** option is enabled */
- memset(&data[iStart], 0, iSize);
- }
- put2byte(&data[iStart], iFreeBlk);
- put2byte(&data[iStart+2], iSize);
pPage->nFree += iOrigSize;
return SQLITE_OK;
}
@@ -1807,57 +1966,67 @@ static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){
** Only the following combinations are supported. Anything different
** indicates a corrupt database files:
**
-** PTF_ZERODATA
-** PTF_ZERODATA | PTF_LEAF
-** PTF_LEAFDATA | PTF_INTKEY
-** PTF_LEAFDATA | PTF_INTKEY | PTF_LEAF
+** PTF_ZERODATA (0x02, 2)
+** PTF_LEAFDATA | PTF_INTKEY (0x05, 5)
+** PTF_ZERODATA | PTF_LEAF (0x0a, 10)
+** PTF_LEAFDATA | PTF_INTKEY | PTF_LEAF (0x0d, 13)
*/
static int decodeFlags(MemPage *pPage, int flagByte){
BtShared *pBt; /* A copy of pPage->pBt */
assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) );
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- pPage->leaf = (u8)(flagByte>>3); assert( PTF_LEAF == 1<<3 );
- flagByte &= ~PTF_LEAF;
- pPage->childPtrSize = 4-4*pPage->leaf;
- pPage->xCellSize = cellSizePtr;
pBt = pPage->pBt;
- if( flagByte==(PTF_LEAFDATA | PTF_INTKEY) ){
- /* EVIDENCE-OF: R-07291-35328 A value of 5 (0x05) means the page is an
- ** interior table b-tree page. */
- assert( (PTF_LEAFDATA|PTF_INTKEY)==5 );
- /* EVIDENCE-OF: R-26900-09176 A value of 13 (0x0d) means the page is a
- ** leaf table b-tree page. */
- assert( (PTF_LEAFDATA|PTF_INTKEY|PTF_LEAF)==13 );
- pPage->intKey = 1;
- if( pPage->leaf ){
+ pPage->max1bytePayload = pBt->max1bytePayload;
+ if( flagByte>=(PTF_ZERODATA | PTF_LEAF) ){
+ pPage->childPtrSize = 0;
+ pPage->leaf = 1;
+ if( flagByte==(PTF_LEAFDATA | PTF_INTKEY | PTF_LEAF) ){
pPage->intKeyLeaf = 1;
+ pPage->xCellSize = cellSizePtrTableLeaf;
pPage->xParseCell = btreeParseCellPtr;
+ pPage->intKey = 1;
+ pPage->maxLocal = pBt->maxLeaf;
+ pPage->minLocal = pBt->minLeaf;
+ }else if( flagByte==(PTF_ZERODATA | PTF_LEAF) ){
+ pPage->intKey = 0;
+ pPage->intKeyLeaf = 0;
+ pPage->xCellSize = cellSizePtrIdxLeaf;
+ pPage->xParseCell = btreeParseCellPtrIndex;
+ pPage->maxLocal = pBt->maxLocal;
+ pPage->minLocal = pBt->minLocal;
}else{
+ pPage->intKey = 0;
+ pPage->intKeyLeaf = 0;
+ pPage->xCellSize = cellSizePtrIdxLeaf;
+ pPage->xParseCell = btreeParseCellPtrIndex;
+ return SQLITE_CORRUPT_PAGE(pPage);
+ }
+ }else{
+ pPage->childPtrSize = 4;
+ pPage->leaf = 0;
+ if( flagByte==(PTF_ZERODATA) ){
+ pPage->intKey = 0;
+ pPage->intKeyLeaf = 0;
+ pPage->xCellSize = cellSizePtr;
+ pPage->xParseCell = btreeParseCellPtrIndex;
+ pPage->maxLocal = pBt->maxLocal;
+ pPage->minLocal = pBt->minLocal;
+ }else if( flagByte==(PTF_LEAFDATA | PTF_INTKEY) ){
pPage->intKeyLeaf = 0;
pPage->xCellSize = cellSizePtrNoPayload;
pPage->xParseCell = btreeParseCellPtrNoPayload;
+ pPage->intKey = 1;
+ pPage->maxLocal = pBt->maxLeaf;
+ pPage->minLocal = pBt->minLeaf;
+ }else{
+ pPage->intKey = 0;
+ pPage->intKeyLeaf = 0;
+ pPage->xCellSize = cellSizePtr;
+ pPage->xParseCell = btreeParseCellPtrIndex;
+ return SQLITE_CORRUPT_PAGE(pPage);
}
- pPage->maxLocal = pBt->maxLeaf;
- pPage->minLocal = pBt->minLeaf;
- }else if( flagByte==PTF_ZERODATA ){
- /* EVIDENCE-OF: R-43316-37308 A value of 2 (0x02) means the page is an
- ** interior index b-tree page. */
- assert( (PTF_ZERODATA)==2 );
- /* EVIDENCE-OF: R-59615-42828 A value of 10 (0x0a) means the page is a
- ** leaf index b-tree page. */
- assert( (PTF_ZERODATA|PTF_LEAF)==10 );
- pPage->intKey = 0;
- pPage->intKeyLeaf = 0;
- pPage->xParseCell = btreeParseCellPtrIndex;
- pPage->maxLocal = pBt->maxLocal;
- pPage->minLocal = pBt->minLocal;
- }else{
- /* EVIDENCE-OF: R-47608-56469 Any other value for the b-tree page type is
- ** an error. */
- return SQLITE_CORRUPT_PAGE(pPage);
}
- pPage->max1bytePayload = pBt->max1bytePayload;
return SQLITE_OK;
}
@@ -1906,7 +2075,7 @@ static int btreeComputeFreeSpace(MemPage *pPage){
/* EVIDENCE-OF: R-55530-52930 In a well-formed b-tree page, there will
** always be at least one cell before the first freeblock.
*/
- return SQLITE_CORRUPT_PAGE(pPage);
+ return SQLITE_CORRUPT_PAGE(pPage);
}
while( 1 ){
if( pc>iCellLast ){
@@ -1945,7 +2114,7 @@ static int btreeComputeFreeSpace(MemPage *pPage){
/*
** Do additional sanity check after btreeInitPage() if
-** PRAGMA cell_size_check=ON
+** PRAGMA cell_size_check=ON
*/
static SQLITE_NOINLINE int btreeCellSizeCheck(MemPage *pPage){
int iCellFirst; /* First allowable cell or freeblock offset */
@@ -1983,7 +2152,7 @@ static SQLITE_NOINLINE int btreeCellSizeCheck(MemPage *pPage){
** Initialize the auxiliary information for a disk block.
**
** Return SQLITE_OK on success. If we see that the page does
-** not contain a well-formed database page, then return
+** not contain a well-formed database page, then return
** SQLITE_CORRUPT. Note that a return of SQLITE_OK does not
** guarantee that the page is well-formed. It only shows that
** we failed to detect any corruption.
@@ -2012,7 +2181,7 @@ static int btreeInitPage(MemPage *pPage){
pPage->nOverflow = 0;
pPage->cellOffset = pPage->hdrOffset + 8 + pPage->childPtrSize;
pPage->aCellIdx = data + pPage->childPtrSize + 8;
- pPage->aDataEnd = pPage->aData + pBt->usableSize;
+ pPage->aDataEnd = pPage->aData + pBt->pageSize;
pPage->aDataOfst = pPage->aData + pPage->childPtrSize;
/* EVIDENCE-OF: R-37002-32774 The two-byte integer at offset 3 gives the
** number of cells on the page. */
@@ -2047,7 +2216,7 @@ static void zeroPage(MemPage *pPage, int flags){
u8 hdr = pPage->hdrOffset;
u16 first;
- assert( sqlite3PagerPagenumber(pPage->pDbPage)==pPage->pgno );
+ assert( sqlite3PagerPagenumber(pPage->pDbPage)==pPage->pgno || CORRUPT_DB );
assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage );
assert( sqlite3PagerGetData(pPage->pDbPage) == data );
assert( sqlite3PagerIswriteable(pPage->pDbPage) );
@@ -2063,7 +2232,7 @@ static void zeroPage(MemPage *pPage, int flags){
pPage->nFree = (u16)(pBt->usableSize - first);
decodeFlags(pPage, flags);
pPage->cellOffset = first;
- pPage->aDataEnd = &data[pBt->usableSize];
+ pPage->aDataEnd = &data[pBt->pageSize];
pPage->aCellIdx = &data[first];
pPage->aDataOfst = &data[pPage->childPtrSize];
pPage->nOverflow = 0;
@@ -2088,7 +2257,7 @@ static MemPage *btreePageFromDbPage(DbPage *pDbPage, Pgno pgno, BtShared *pBt){
pPage->hdrOffset = pgno==1 ? 100 : 0;
}
assert( pPage->aData==sqlite3PagerGetData(pDbPage) );
- return pPage;
+ return pPage;
}
/*
@@ -2139,78 +2308,50 @@ static MemPage *btreePageLookup(BtShared *pBt, Pgno pgno){
** error, return ((unsigned int)-1).
*/
static Pgno btreePagecount(BtShared *pBt){
- assert( (pBt->nPage & 0x80000000)==0 || CORRUPT_DB );
return pBt->nPage;
}
-u32 sqlite3BtreeLastPage(Btree *p){
+Pgno sqlite3BtreeLastPage(Btree *p){
assert( sqlite3BtreeHoldsMutex(p) );
- return btreePagecount(p->pBt) & 0x7fffffff;
+ return btreePagecount(p->pBt);
}
/*
** Get a page from the pager and initialize it.
-**
-** If pCur!=0 then the page is being fetched as part of a moveToChild()
-** call. Do additional sanity checking on the page in this case.
-** And if the fetch fails, this routine must decrement pCur->iPage.
-**
-** The page is fetched as read-write unless pCur is not NULL and is
-** a read-only cursor.
-**
-** If an error occurs, then *ppPage is undefined. It
-** may remain unchanged, or it may be set to an invalid value.
*/
static int getAndInitPage(
BtShared *pBt, /* The database file */
Pgno pgno, /* Number of the page to get */
MemPage **ppPage, /* Write the page pointer here */
- BtCursor *pCur, /* Cursor to receive the page, or NULL */
int bReadOnly /* True for a read-only page */
){
int rc;
DbPage *pDbPage;
+ MemPage *pPage;
assert( sqlite3_mutex_held(pBt->mutex) );
- assert( pCur==0 || ppPage==&pCur->pPage );
- assert( pCur==0 || bReadOnly==pCur->curPagerFlags );
- assert( pCur==0 || pCur->iPage>0 );
if( pgno>btreePagecount(pBt) ){
- rc = SQLITE_CORRUPT_BKPT;
- goto getAndInitPage_error1;
+ *ppPage = 0;
+ return SQLITE_CORRUPT_BKPT;
}
rc = sqlite3PagerGet(pBt->pPager, pgno, (DbPage**)&pDbPage, bReadOnly);
if( rc ){
- goto getAndInitPage_error1;
+ *ppPage = 0;
+ return rc;
}
- *ppPage = (MemPage*)sqlite3PagerGetExtra(pDbPage);
- if( (*ppPage)->isInit==0 ){
+ pPage = (MemPage*)sqlite3PagerGetExtra(pDbPage);
+ if( pPage->isInit==0 ){
btreePageFromDbPage(pDbPage, pgno, pBt);
- rc = btreeInitPage(*ppPage);
+ rc = btreeInitPage(pPage);
if( rc!=SQLITE_OK ){
- goto getAndInitPage_error2;
+ releasePage(pPage);
+ *ppPage = 0;
+ return rc;
}
}
- assert( (*ppPage)->pgno==pgno );
- assert( (*ppPage)->aData==sqlite3PagerGetData(pDbPage) );
-
- /* If obtaining a child page for a cursor, we must verify that the page is
- ** compatible with the root page. */
- if( pCur && ((*ppPage)->nCell<1 || (*ppPage)->intKey!=pCur->curIntKey) ){
- rc = SQLITE_CORRUPT_PGNO(pgno);
- goto getAndInitPage_error2;
- }
+ assert( pPage->pgno==pgno || CORRUPT_DB );
+ assert( pPage->aData==sqlite3PagerGetData(pDbPage) );
+ *ppPage = pPage;
return SQLITE_OK;
-
-getAndInitPage_error2:
- releasePage(*ppPage);
-getAndInitPage_error1:
- if( pCur ){
- pCur->iPage--;
- pCur->pPage = pCur->apPage[pCur->iPage];
- }
- testcase( pgno==0 );
- assert( pgno!=0 || rc==SQLITE_CORRUPT );
- return rc;
}
/*
@@ -2293,7 +2434,7 @@ static void pageReinit(DbPage *pData){
** call to btreeInitPage() will likely return SQLITE_CORRUPT.
** But no harm is done by this. And it is very important that
** btreeInitPage() be called on every btree page so we make
- ** the call for every page that comes in for re-initing. */
+ ** the call for every page that comes in for re-initializing. */
btreeInitPage(pPage);
}
}
@@ -2311,11 +2452,11 @@ static int btreeInvokeBusyHandler(void *pArg){
/*
** Open a database file.
-**
+**
** zFilename is the name of the database file. If zFilename is NULL
** then an ephemeral database is created. The ephemeral database might
** be exclusively in memory, or it might use a disk-based memory cache.
-** Either way, the ephemeral database will be automatically deleted
+** Either way, the ephemeral database will be automatically deleted
** when sqlite3BtreeClose() is called.
**
** If zFilename is ":memory:" then an in-memory database is created
@@ -2348,7 +2489,7 @@ int sqlite3BtreeOpen(
/* True if opening an ephemeral, temporary database */
const int isTempDb = zFilename==0 || zFilename[0]==0;
- /* Set the variable isMemdb to true for an in-memory database, or
+ /* Set the variable isMemdb to true for an in-memory database, or
** false for a file-based database.
*/
#ifdef SQLITE_OMIT_MEMORYDB
@@ -2471,7 +2612,10 @@ int sqlite3BtreeOpen(
assert( sizeof(u32)==4 );
assert( sizeof(u16)==2 );
assert( sizeof(Pgno)==4 );
-
+
+ /* Suppress false-positive compiler warning from PVS-Studio */
+ memset(&zDbHeader[16], 0, 8);
+
pBt = sqlite3MallocZero( sizeof(*pBt) );
if( pBt==0 ){
rc = SQLITE_NOMEM_BKPT;
@@ -2490,7 +2634,7 @@ int sqlite3BtreeOpen(
pBt->db = db;
sqlite3PagerSetBusyHandler(pBt->pPager, btreeInvokeBusyHandler, pBt);
p->pBt = pBt;
-
+
pBt->pCursor = 0;
pBt->pPage1 = 0;
if( sqlite3PagerIsreadonly(pBt->pPager) ) pBt->btsFlags |= BTS_READ_ONLY;
@@ -2534,7 +2678,7 @@ int sqlite3BtreeOpen(
if( rc ) goto btree_open_out;
pBt->usableSize = pBt->pageSize - nReserve;
assert( (pBt->pageSize & 7)==0 ); /* 8-byte alignment of pageSize */
-
+
#if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
/* Add the new BtShared object to the linked list sharable BtShareds.
*/
@@ -2606,7 +2750,7 @@ btree_open_out:
** do not change the pager-cache size.
*/
if( sqlite3BtreeSchema(p, 0, 0)==0 ){
- sqlite3PagerSetCachesize(p->pBt->pPager, SQLITE_DEFAULT_CACHE_SIZE);
+ sqlite3BtreeSetCacheSize(p, SQLITE_DEFAULT_CACHE_SIZE);
}
pFile = sqlite3PagerFile(pBt->pPager);
@@ -2663,34 +2807,42 @@ static int removeFromSharingList(BtShared *pBt){
}
/*
-** Make sure pBt->pTmpSpace points to an allocation of
+** Make sure pBt->pTmpSpace points to an allocation of
** MX_CELL_SIZE(pBt) bytes with a 4-byte prefix for a left-child
** pointer.
*/
-static void allocateTempSpace(BtShared *pBt){
- if( !pBt->pTmpSpace ){
- pBt->pTmpSpace = sqlite3PageMalloc( pBt->pageSize );
-
- /* One of the uses of pBt->pTmpSpace is to format cells before
- ** inserting them into a leaf page (function fillInCell()). If
- ** a cell is less than 4 bytes in size, it is rounded up to 4 bytes
- ** by the various routines that manipulate binary cells. Which
- ** can mean that fillInCell() only initializes the first 2 or 3
- ** bytes of pTmpSpace, but that the first 4 bytes are copied from
- ** it into a database page. This is not actually a problem, but it
- ** does cause a valgrind error when the 1 or 2 bytes of unitialized
- ** data is passed to system call write(). So to avoid this error,
- ** zero the first 4 bytes of temp space here.
- **
- ** Also: Provide four bytes of initialized space before the
- ** beginning of pTmpSpace as an area available to prepend the
- ** left-child pointer to the beginning of a cell.
- */
- if( pBt->pTmpSpace ){
- memset(pBt->pTmpSpace, 0, 8);
- pBt->pTmpSpace += 4;
- }
+static SQLITE_NOINLINE int allocateTempSpace(BtShared *pBt){
+ assert( pBt!=0 );
+ assert( pBt->pTmpSpace==0 );
+ /* This routine is called only by btreeCursor() when allocating the
+ ** first write cursor for the BtShared object */
+ assert( pBt->pCursor!=0 && (pBt->pCursor->curFlags & BTCF_WriteFlag)!=0 );
+ pBt->pTmpSpace = sqlite3PageMalloc( pBt->pageSize );
+ if( pBt->pTmpSpace==0 ){
+ BtCursor *pCur = pBt->pCursor;
+ pBt->pCursor = pCur->pNext; /* Unlink the cursor */
+ memset(pCur, 0, sizeof(*pCur));
+ return SQLITE_NOMEM_BKPT;
}
+
+ /* One of the uses of pBt->pTmpSpace is to format cells before
+ ** inserting them into a leaf page (function fillInCell()). If
+ ** a cell is less than 4 bytes in size, it is rounded up to 4 bytes
+ ** by the various routines that manipulate binary cells. Which
+ ** can mean that fillInCell() only initializes the first 2 or 3
+ ** bytes of pTmpSpace, but that the first 4 bytes are copied from
+ ** it into a database page. This is not actually a problem, but it
+ ** does cause a valgrind error when the 1 or 2 bytes of uninitialized
+ ** data is passed to system call write(). So to avoid this error,
+ ** zero the first 4 bytes of temp space here.
+ **
+ ** Also: Provide four bytes of initialized space before the
+ ** beginning of pTmpSpace as an area available to prepend the
+ ** left-child pointer to the beginning of a cell.
+ */
+ memset(pBt->pTmpSpace, 0, 8);
+ pBt->pTmpSpace += 4;
+ return SQLITE_OK;
}
/*
@@ -2709,19 +2861,23 @@ static void freeTempSpace(BtShared *pBt){
*/
int sqlite3BtreeClose(Btree *p){
BtShared *pBt = p->pBt;
- BtCursor *pCur;
/* Close all cursors opened via this handle. */
assert( sqlite3_mutex_held(p->db->mutex) );
sqlite3BtreeEnter(p);
- pCur = pBt->pCursor;
- while( pCur ){
- BtCursor *pTmp = pCur;
- pCur = pCur->pNext;
- if( pTmp->pBtree==p ){
- sqlite3BtreeCloseCursor(pTmp);
+
+ /* Verify that no other cursors have this Btree open */
+#ifdef SQLITE_DEBUG
+ {
+ BtCursor *pCur = pBt->pCursor;
+ while( pCur ){
+ BtCursor *pTmp = pCur;
+ pCur = pCur->pNext;
+ assert( pTmp->pBtree!=p );
+
}
}
+#endif
/* Rollback any active transaction and free the handle structure.
** The call to sqlite3BtreeRollback() drops any table-locks held by
@@ -2731,7 +2887,7 @@ int sqlite3BtreeClose(Btree *p){
sqlite3BtreeLeave(p);
/* If there are still other outstanding references to the shared-btree
- ** structure, return now. The remainder of this procedure cleans
+ ** structure, return now. The remainder of this procedure cleans
** up the shared-btree.
*/
assert( p->wantToLock==0 && p->locked==0 );
@@ -2837,7 +2993,7 @@ int sqlite3BtreeSetPagerFlags(
/*
** Change the default pages size and the number of reserved bytes per page.
-** Or, if the page size has already been fixed, return SQLITE_READONLY
+** Or, if the page size has already been fixed, return SQLITE_READONLY
** without changing anything.
**
** The page size must be a power of 2 between 512 and 65536. If the page
@@ -2873,6 +3029,7 @@ int sqlite3BtreeSetPageSize(Btree *p, int pageSize, int nReserve, int iFix){
((pageSize-1)&pageSize)==0 ){
assert( (pageSize & 7)==0 );
assert( !pBt->pCursor );
+ if( nReserve>32 && pageSize==512 ) pageSize = 1024;
pBt->pageSize = (u32)pageSize;
freeTempSpace(pBt);
}
@@ -2896,7 +3053,7 @@ int sqlite3BtreeGetPageSize(Btree *p){
** held.
**
** This is useful in one special case in the backup API code where it is
-** known that the shared b-tree mutex is held, but the mutex on the
+** known that the shared b-tree mutex is held, but the mutex on the
** database handle that owns *p is not. In this case if sqlite3BtreeEnter()
** were to be called, it might collide with some other operation on the
** database handle that owns *p, causing undefined behavior.
@@ -2910,7 +3067,7 @@ int sqlite3BtreeGetReserveNoMutex(Btree *p){
/*
** Return the number of bytes of space at the end of every page that
-** are intentually left unused. This is the "reserved" space that is
+** are intentionally left unused. This is the "reserved" space that is
** sometimes used by extensions.
**
** The value returned is the larger of the current reserve size and
@@ -2932,8 +3089,8 @@ int sqlite3BtreeGetRequestedReserve(Btree *p){
** No changes are made if mxPage is 0 or negative.
** Regardless of the value of mxPage, return the maximum page count.
*/
-int sqlite3BtreeMaxPageCount(Btree *p, int mxPage){
- int n;
+Pgno sqlite3BtreeMaxPageCount(Btree *p, Pgno mxPage){
+ Pgno n;
sqlite3BtreeEnter(p);
n = sqlite3PagerMaxPageCount(p->pBt->pPager, mxPage);
sqlite3BtreeLeave(p);
@@ -2976,7 +3133,7 @@ int sqlite3BtreeSecureDelete(Btree *p, int newFlag){
/*
** Change the 'auto-vacuum' property of the database. If the 'autoVacuum'
** parameter is non-zero, then auto-vacuum mode is enabled. If zero, it
-** is disabled. The default value for the auto-vacuum property is
+** is disabled. The default value for the auto-vacuum property is
** determined by the SQLITE_DEFAULT_AUTOVACUUM macro.
*/
int sqlite3BtreeSetAutoVacuum(Btree *p, int autoVacuum){
@@ -3000,7 +3157,7 @@ int sqlite3BtreeSetAutoVacuum(Btree *p, int autoVacuum){
}
/*
-** Return the value of the 'auto-vacuum' property. If auto-vacuum is
+** Return the value of the 'auto-vacuum' property. If auto-vacuum is
** enabled 1 is returned. Otherwise 0.
*/
int sqlite3BtreeGetAutoVacuum(Btree *p){
@@ -3032,9 +3189,9 @@ static void setDefaultSyncFlag(BtShared *pBt, u8 safety_level){
Db *pDb;
if( (db=pBt->db)!=0 && (pDb=db->aDb)!=0 ){
while( pDb->pBt==0 || pDb->pBt->pBt!=pBt ){ pDb++; }
- if( pDb->bSyncSet==0
- && pDb->safety_level!=safety_level
- && pDb!=&db->aDb[1]
+ if( pDb->bSyncSet==0
+ && pDb->safety_level!=safety_level
+ && pDb!=&db->aDb[1]
){
pDb->safety_level = safety_level;
sqlite3PagerSetFlags(pBt->pPager,
@@ -3057,14 +3214,13 @@ static int newDatabase(BtShared*);
** SQLITE_OK is returned on success. If the file is not a
** well-formed database file, then SQLITE_CORRUPT is returned.
** SQLITE_BUSY is returned if the database is locked. SQLITE_NOMEM
-** is returned if we run out of memory.
+** is returned if we run out of memory.
*/
static int lockBtree(BtShared *pBt){
int rc; /* Result code from subfunctions */
MemPage *pPage1; /* Page 1 of the database file */
u32 nPage; /* Number of pages in the database */
u32 nPageFile = 0; /* Number of pages in the database file */
- u32 nPageHeader; /* Number of pages in the database according to hdr */
assert( sqlite3_mutex_held(pBt->mutex) );
assert( pBt->pPage1==0 );
@@ -3074,9 +3230,9 @@ static int lockBtree(BtShared *pBt){
if( rc!=SQLITE_OK ) return rc;
/* Do some checking to help insure the file we opened really is
- ** a valid database file.
+ ** a valid database file.
*/
- nPage = nPageHeader = get4byte(28+(u8*)pPage1->aData);
+ nPage = get4byte(28+(u8*)pPage1->aData);
sqlite3PagerPagecount(pBt->pPager, (int*)&nPageFile);
if( nPage==0 || memcmp(24+(u8*)pPage1->aData, 92+(u8*)pPage1->aData,4)!=0 ){
nPage = nPageFile;
@@ -3111,8 +3267,8 @@ static int lockBtree(BtShared *pBt){
goto page1_init_failed;
}
- /* If the write version is set to 2, this database should be accessed
- ** in WAL mode. If the log is not already open, open it now. Then
+ /* If the read version is set to 2, this database should be accessed
+ ** in WAL mode. If the log is not already open, open it now. Then
** return SQLITE_OK and return without populating BtShared.pPage1.
** The caller detects this and calls this function again. This is
** required as the version of page 1 currently in the page1 buffer
@@ -3153,16 +3309,15 @@ static int lockBtree(BtShared *pBt){
/* EVIDENCE-OF: R-25008-21688 The size of a page is a power of two
** between 512 and 65536 inclusive. */
if( ((pageSize-1)&pageSize)!=0
- || pageSize>SQLITE_MAX_PAGE_SIZE
- || pageSize<=256
+ || pageSize>SQLITE_MAX_PAGE_SIZE
+ || pageSize<=256
){
goto page1_init_failed;
}
- pBt->btsFlags |= BTS_PAGESIZE_FIXED;
assert( (pageSize & 7)==0 );
/* EVIDENCE-OF: R-59310-51205 The "reserved space" size in the 1-byte
** integer at offset 20 is the number of bytes of space at the end of
- ** each page to reserve for extensions.
+ ** each page to reserve for extensions.
**
** EVIDENCE-OF: R-37497-42412 The size of the reserved region is
** determined by the one-byte unsigned integer found at an offset of 20
@@ -3178,14 +3333,19 @@ static int lockBtree(BtShared *pBt){
releasePageOne(pPage1);
pBt->usableSize = usableSize;
pBt->pageSize = pageSize;
+ pBt->btsFlags |= BTS_PAGESIZE_FIXED;
freeTempSpace(pBt);
rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize,
pageSize-usableSize);
return rc;
}
- if( sqlite3WritableSchema(pBt->db)==0 && nPage>nPageFile ){
- rc = SQLITE_CORRUPT_BKPT;
- goto page1_init_failed;
+ if( nPage>nPageFile ){
+ if( sqlite3WritableSchema(pBt->db)==0 ){
+ rc = SQLITE_CORRUPT_BKPT;
+ goto page1_init_failed;
+ }else{
+ nPage = nPageFile;
+ }
}
/* EVIDENCE-OF: R-28312-64704 However, the usable size is not allowed to
** be less than 480. In other words, if the page size is 512, then the
@@ -3193,6 +3353,7 @@ static int lockBtree(BtShared *pBt){
if( usableSize<480 ){
goto page1_init_failed;
}
+ pBt->btsFlags |= BTS_PAGESIZE_FIXED;
pBt->pageSize = pageSize;
pBt->usableSize = usableSize;
#ifndef SQLITE_OMIT_AUTOVACUUM
@@ -3252,7 +3413,7 @@ static int countValidCursors(BtShared *pBt, int wrOnly){
int r = 0;
for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
if( (wrOnly==0 || (pCur->curFlags & BTCF_WriteFlag)!=0)
- && pCur->eState!=CURSOR_FAULT ) r++;
+ && pCur->eState!=CURSOR_FAULT ) r++;
}
return r;
}
@@ -3261,7 +3422,7 @@ static int countValidCursors(BtShared *pBt, int wrOnly){
/*
** If there are no outstanding cursors and we are not in the middle
** of a transaction but there is a read lock on the database, then
-** this routine unrefs the first page of the database file which
+** this routine unrefs the first page of the database file which
** has the effect of releasing the read lock.
**
** If there is a transaction in progress, this routine is a no-op.
@@ -3345,8 +3506,8 @@ int sqlite3BtreeNewDb(Btree *p){
** upgraded to exclusive by calling this routine a second time - the
** exclusivity flag only works for a new transaction.
**
-** A write-transaction must be started before attempting any
-** changes to the database. None of the following routines
+** A write-transaction must be started before attempting any
+** changes to the database. None of the following routines
** will work unless a transaction is started first:
**
** sqlite3BtreeCreateTable()
@@ -3360,7 +3521,7 @@ int sqlite3BtreeNewDb(Btree *p){
** If an initial attempt to acquire the lock fails because of lock contention
** and the database was previously unlocked, then invoke the busy handler
** if there is one. But if there was previously a read-lock, do not
-** invoke the busy handler - just return SQLITE_BUSY. SQLITE_BUSY is
+** invoke the busy handler - just return SQLITE_BUSY. SQLITE_BUSY is
** returned when there is already a read-lock in order to avoid a deadlock.
**
** Suppose there are two processes A and B. A has a read lock and B has
@@ -3371,7 +3532,11 @@ int sqlite3BtreeNewDb(Btree *p){
** when A already has a read lock, we encourage A to give up and let B
** proceed.
*/
-int sqlite3BtreeBeginTrans(Btree *p, int wrflag, int *pSchemaVersion){
+static SQLITE_NOINLINE int btreeBeginTrans(
+ Btree *p, /* The btree in which to start the transaction */
+ int wrflag, /* True to start a write transaction */
+ int *pSchemaVersion /* Put schema version number here, if not NULL */
+){
BtShared *pBt = p->pBt;
Pager *pPager = pBt->pPager;
int rc = SQLITE_OK;
@@ -3388,8 +3553,8 @@ int sqlite3BtreeBeginTrans(Btree *p, int wrflag, int *pSchemaVersion){
}
assert( pBt->inTransaction==TRANS_WRITE || IfNotOmitAV(pBt->bDoTruncate)==0 );
- if( (p->db->flags & SQLITE_ResetDatabase)
- && sqlite3PagerIsreadonly(pPager)==0
+ if( (p->db->flags & SQLITE_ResetDatabase)
+ && sqlite3PagerIsreadonly(pPager)==0
){
pBt->btsFlags &= ~BTS_READ_ONLY;
}
@@ -3403,7 +3568,7 @@ int sqlite3BtreeBeginTrans(Btree *p, int wrflag, int *pSchemaVersion){
#ifndef SQLITE_OMIT_SHARED_CACHE
{
sqlite3 *pBlock = 0;
- /* If another database handle has already opened a write transaction
+ /* If another database handle has already opened a write transaction
** on this shared-btree structure and a second write transaction is
** requested, return SQLITE_LOCKED.
*/
@@ -3428,8 +3593,8 @@ int sqlite3BtreeBeginTrans(Btree *p, int wrflag, int *pSchemaVersion){
}
#endif
- /* Any read-only or read-write transaction implies a read-lock on
- ** page 1. So if some other shared-cache client already has a write-lock
+ /* Any read-only or read-write transaction implies a read-lock on
+ ** page 1. So if some other shared-cache client already has a write-lock
** on page 1, the transaction cannot be opened. */
rc = querySharedCacheTableLock(p, SCHEMA_ROOT, READ_LOCK);
if( SQLITE_OK!=rc ) goto trans_begun;
@@ -3452,7 +3617,7 @@ int sqlite3BtreeBeginTrans(Btree *p, int wrflag, int *pSchemaVersion){
/* Call lockBtree() until either pBt->pPage1 is populated or
** lockBtree() returns something other than SQLITE_OK. lockBtree()
** may return SQLITE_OK but leave pBt->pPage1 set to 0 if after
- ** reading page 1 it discovers that the page-size of the database
+ ** reading page 1 it discovers that the page-size of the database
** file is not pBt->pageSize. In this case lockBtree() will update
** pBt->pageSize to the page-size of the file on disk.
*/
@@ -3473,7 +3638,7 @@ int sqlite3BtreeBeginTrans(Btree *p, int wrflag, int *pSchemaVersion){
}
}
}
-
+
if( rc!=SQLITE_OK ){
(void)sqlite3PagerWalWriteLock(pPager, 0);
unlockBtreeIfUnused(pBt);
@@ -3512,7 +3677,7 @@ int sqlite3BtreeBeginTrans(Btree *p, int wrflag, int *pSchemaVersion){
/* If the db-size header field is incorrect (as it may be if an old
** client has been writing the database file), update it now. Doing
- ** this sooner rather than later means the database size can safely
+ ** this sooner rather than later means the database size can safely
** re-read the database size from page 1 if a savepoint or transaction
** rollback occurs within the transaction.
*/
@@ -3543,6 +3708,28 @@ trans_begun:
sqlite3BtreeLeave(p);
return rc;
}
+int sqlite3BtreeBeginTrans(Btree *p, int wrflag, int *pSchemaVersion){
+ BtShared *pBt;
+ if( p->sharable
+ || p->inTrans==TRANS_NONE
+ || (p->inTrans==TRANS_READ && wrflag!=0)
+ ){
+ return btreeBeginTrans(p,wrflag,pSchemaVersion);
+ }
+ pBt = p->pBt;
+ if( pSchemaVersion ){
+ *pSchemaVersion = get4byte(&pBt->pPage1->aData[40]);
+ }
+ if( wrflag ){
+ /* This call makes sure that the pager has the correct number of
+ ** open savepoints. If the second parameter is greater than 0 and
+ ** the sub-journal is not already open, then it will be opened here.
+ */
+ return sqlite3PagerOpenSavepoint(pBt->pPager, p->db->nSavepoint);
+ }else{
+ return SQLITE_OK;
+ }
+}
#ifndef SQLITE_OMIT_AUTOVACUUM
@@ -3587,7 +3774,7 @@ static int setChildPtrmaps(MemPage *pPage){
** that it points to iTo. Parameter eType describes the type of pointer to
** be modified, as follows:
**
-** PTRMAP_BTREE: pPage is a btree-page. The pointer points at a child
+** PTRMAP_BTREE: pPage is a btree-page. The pointer points at a child
** page of pPage.
**
** PTRMAP_OVERFLOW1: pPage is a btree-page. The pointer points at an overflow
@@ -3629,15 +3816,18 @@ static int modifyPagePointer(MemPage *pPage, Pgno iFrom, Pgno iTo, u8 eType){
}
}
}else{
+ if( pCell+4 > pPage->aData+pPage->pBt->usableSize ){
+ return SQLITE_CORRUPT_PAGE(pPage);
+ }
if( get4byte(pCell)==iFrom ){
put4byte(pCell, iTo);
break;
}
}
}
-
+
if( i==nCell ){
- if( eType!=PTRMAP_BTREE ||
+ if( eType!=PTRMAP_BTREE ||
get4byte(&pPage->aData[pPage->hdrOffset+8])!=iFrom ){
return SQLITE_CORRUPT_PAGE(pPage);
}
@@ -3649,11 +3839,11 @@ static int modifyPagePointer(MemPage *pPage, Pgno iFrom, Pgno iTo, u8 eType){
/*
-** Move the open database page pDbPage to location iFreePage in the
+** Move the open database page pDbPage to location iFreePage in the
** database. The pDbPage reference remains valid.
**
** The isCommit flag indicates that there is no need to remember that
-** the journal needs to be sync()ed before database page pDbPage->pgno
+** the journal needs to be sync()ed before database page pDbPage->pgno
** can be written to. The caller has already promised not to write to that
** page.
*/
@@ -3670,14 +3860,14 @@ static int relocatePage(
Pager *pPager = pBt->pPager;
int rc;
- assert( eType==PTRMAP_OVERFLOW2 || eType==PTRMAP_OVERFLOW1 ||
+ assert( eType==PTRMAP_OVERFLOW2 || eType==PTRMAP_OVERFLOW1 ||
eType==PTRMAP_BTREE || eType==PTRMAP_ROOTPAGE );
assert( sqlite3_mutex_held(pBt->mutex) );
assert( pDbPage->pBt==pBt );
if( iDbPage<3 ) return SQLITE_CORRUPT_BKPT;
/* Move page iDbPage from its current location to page number iFreePage */
- TRACE(("AUTOVACUUM: Moving %d to free page %d (ptr page %d type %d)\n",
+ TRACE(("AUTOVACUUM: Moving %u to free page %u (ptr page %u type %u)\n",
iDbPage, iFreePage, iPtrPage, eType));
rc = sqlite3PagerMovepage(pPager, pDbPage->pDbPage, iFreePage, isCommit);
if( rc!=SQLITE_OK ){
@@ -3736,19 +3926,19 @@ static int allocateBtreePage(BtShared *, MemPage **, Pgno *, Pgno, u8);
/*
** Perform a single step of an incremental-vacuum. If successful, return
-** SQLITE_OK. If there is no work to do (and therefore no point in
-** calling this function again), return SQLITE_DONE. Or, if an error
+** SQLITE_OK. If there is no work to do (and therefore no point in
+** calling this function again), return SQLITE_DONE. Or, if an error
** occurs, return some other error code.
**
-** More specifically, this function attempts to re-organize the database so
+** More specifically, this function attempts to re-organize the database so
** that the last page of the file currently in use is no longer in use.
**
** Parameter nFin is the number of pages that this database would contain
** were this function called until it returns SQLITE_DONE.
**
-** If the bCommit parameter is non-zero, this function assumes that the
-** caller will keep calling incrVacuumStep() until it returns SQLITE_DONE
-** or an error. bCommit is passed true for an auto-vacuum-on-commit
+** If the bCommit parameter is non-zero, this function assumes that the
+** caller will keep calling incrVacuumStep() until it returns SQLITE_DONE
+** or an error. bCommit is passed true for an auto-vacuum-on-commit
** operation, or false for an incremental vacuum.
*/
static int incrVacuumStep(BtShared *pBt, Pgno nFin, Pgno iLastPg, int bCommit){
@@ -3779,7 +3969,7 @@ static int incrVacuumStep(BtShared *pBt, Pgno nFin, Pgno iLastPg, int bCommit){
if( bCommit==0 ){
/* Remove the page from the files free-list. This is not required
** if bCommit is non-zero. In that case, the free-list will be
- ** truncated to zero after this function returns, so it doesn't
+ ** truncated to zero after this function returns, so it doesn't
** matter if it still contains some garbage entries.
*/
Pgno iFreePg;
@@ -3815,15 +4005,20 @@ static int incrVacuumStep(BtShared *pBt, Pgno nFin, Pgno iLastPg, int bCommit){
}
do {
MemPage *pFreePg;
+ Pgno dbSize = btreePagecount(pBt);
rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, iNear, eMode);
if( rc!=SQLITE_OK ){
releasePage(pLastPg);
return rc;
}
releasePage(pFreePg);
+ if( iFreePg>dbSize ){
+ releasePage(pLastPg);
+ return SQLITE_CORRUPT_BKPT;
+ }
}while( bCommit && iFreePg>nFin );
assert( iFreePg<iLastPg );
-
+
rc = relocatePage(pBt, pLastPg, eType, iPtrPage, iFreePg, bCommit);
releasePage(pLastPg);
if( rc!=SQLITE_OK ){
@@ -3844,7 +4039,7 @@ static int incrVacuumStep(BtShared *pBt, Pgno nFin, Pgno iLastPg, int bCommit){
/*
** The database opened by the first argument is an auto-vacuum database
-** nOrig pages in size containing nFree free pages. Return the expected
+** nOrig pages in size containing nFree free pages. Return the expected
** size of the database in pages following an auto-vacuum operation.
*/
static Pgno finalDbSize(BtShared *pBt, Pgno nOrig, Pgno nFree){
@@ -3871,7 +4066,7 @@ static Pgno finalDbSize(BtShared *pBt, Pgno nOrig, Pgno nFree){
**
** If the incremental vacuum is finished after this function has run,
** SQLITE_DONE is returned. If it is not finished, but no error occurred,
-** SQLITE_OK is returned. Otherwise an SQLite error code.
+** SQLITE_OK is returned. Otherwise an SQLite error code.
*/
int sqlite3BtreeIncrVacuum(Btree *p){
int rc;
@@ -3886,7 +4081,7 @@ int sqlite3BtreeIncrVacuum(Btree *p){
Pgno nFree = get4byte(&pBt->pPage1->aData[36]);
Pgno nFin = finalDbSize(pBt, nOrig, nFree);
- if( nOrig<nFin ){
+ if( nOrig<nFin || nFree>=nOrig ){
rc = SQLITE_CORRUPT_BKPT;
}else if( nFree>0 ){
rc = saveAllCursors(pBt, 0, 0);
@@ -3909,16 +4104,18 @@ int sqlite3BtreeIncrVacuum(Btree *p){
/*
** This routine is called prior to sqlite3PagerCommit when a transaction
** is committed for an auto-vacuum database.
-**
-** If SQLITE_OK is returned, then *pnTrunc is set to the number of pages
-** the database file should be truncated to during the commit process.
-** i.e. the database has been reorganized so that only the first *pnTrunc
-** pages are in use.
*/
-static int autoVacuumCommit(BtShared *pBt){
+static int autoVacuumCommit(Btree *p){
int rc = SQLITE_OK;
- Pager *pPager = pBt->pPager;
- VVA_ONLY( int nRef = sqlite3PagerRefcount(pPager); )
+ Pager *pPager;
+ BtShared *pBt;
+ sqlite3 *db;
+ VVA_ONLY( int nRef );
+
+ assert( p!=0 );
+ pBt = p->pBt;
+ pPager = pBt->pPager;
+ VVA_ONLY( nRef = sqlite3PagerRefcount(pPager); )
assert( sqlite3_mutex_held(pBt->mutex) );
invalidateAllOverflowCache(pBt);
@@ -3926,6 +4123,7 @@ static int autoVacuumCommit(BtShared *pBt){
if( !pBt->incrVacuum ){
Pgno nFin; /* Number of pages in database after autovacuuming */
Pgno nFree; /* Number of pages on the freelist initially */
+ Pgno nVac; /* Number of pages to vacuum */
Pgno iFree; /* The next page to be freed */
Pgno nOrig; /* Database size before freeing */
@@ -3939,18 +4137,42 @@ static int autoVacuumCommit(BtShared *pBt){
}
nFree = get4byte(&pBt->pPage1->aData[36]);
- nFin = finalDbSize(pBt, nOrig, nFree);
+ db = p->db;
+ if( db->xAutovacPages ){
+ int iDb;
+ for(iDb=0; ALWAYS(iDb<db->nDb); iDb++){
+ if( db->aDb[iDb].pBt==p ) break;
+ }
+ nVac = db->xAutovacPages(
+ db->pAutovacPagesArg,
+ db->aDb[iDb].zDbSName,
+ nOrig,
+ nFree,
+ pBt->pageSize
+ );
+ if( nVac>nFree ){
+ nVac = nFree;
+ }
+ if( nVac==0 ){
+ return SQLITE_OK;
+ }
+ }else{
+ nVac = nFree;
+ }
+ nFin = finalDbSize(pBt, nOrig, nVac);
if( nFin>nOrig ) return SQLITE_CORRUPT_BKPT;
if( nFin<nOrig ){
rc = saveAllCursors(pBt, 0, 0);
}
for(iFree=nOrig; iFree>nFin && rc==SQLITE_OK; iFree--){
- rc = incrVacuumStep(pBt, nFin, iFree, 1);
+ rc = incrVacuumStep(pBt, nFin, iFree, nVac==nFree);
}
if( (rc==SQLITE_DONE || rc==SQLITE_OK) && nFree>0 ){
rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
- put4byte(&pBt->pPage1->aData[32], 0);
- put4byte(&pBt->pPage1->aData[36], 0);
+ if( nVac==nFree ){
+ put4byte(&pBt->pPage1->aData[32], 0);
+ put4byte(&pBt->pPage1->aData[36], 0);
+ }
put4byte(&pBt->pPage1->aData[28], nFin);
pBt->bDoTruncate = 1;
pBt->nPage = nFin;
@@ -3985,7 +4207,7 @@ static int autoVacuumCommit(BtShared *pBt){
**
** Otherwise, sync the database file for the btree pBt. zSuperJrnl points to
** the name of a super-journal file that should be written into the
-** individual journal file, or is NULL, indicating no super-journal file
+** individual journal file, or is NULL, indicating no super-journal file
** (single database transaction).
**
** When this is called, the super-journal should already have been
@@ -4001,7 +4223,7 @@ int sqlite3BtreeCommitPhaseOne(Btree *p, const char *zSuperJrnl){
sqlite3BtreeEnter(p);
#ifndef SQLITE_OMIT_AUTOVACUUM
if( pBt->autoVacuum ){
- rc = autoVacuumCommit(pBt);
+ rc = autoVacuumCommit(p);
if( rc!=SQLITE_OK ){
sqlite3BtreeLeave(p);
return rc;
@@ -4036,8 +4258,8 @@ static void btreeEndTransaction(Btree *p){
downgradeAllSharedCacheTableLocks(p);
p->inTrans = TRANS_READ;
}else{
- /* If the handle had any kind of transaction open, decrement the
- ** transaction count of the shared btree. If the transaction count
+ /* If the handle had any kind of transaction open, decrement the
+ ** transaction count of the shared btree. If the transaction count
** reaches 0, set the shared state to TRANS_NONE. The unlockBtreeIfUnused()
** call below will unlock the pager. */
if( p->inTrans!=TRANS_NONE ){
@@ -4048,7 +4270,7 @@ static void btreeEndTransaction(Btree *p){
}
}
- /* Set the current transaction state to TRANS_NONE and unlock the
+ /* Set the current transaction state to TRANS_NONE and unlock the
** pager if this call closed the only read or write transaction. */
p->inTrans = TRANS_NONE;
unlockBtreeIfUnused(pBt);
@@ -4069,12 +4291,12 @@ static void btreeEndTransaction(Btree *p){
** the rollback journal (which causes the transaction to commit) and
** drop locks.
**
-** Normally, if an error occurs while the pager layer is attempting to
+** Normally, if an error occurs while the pager layer is attempting to
** finalize the underlying journal file, this function returns an error and
** the upper layer will attempt a rollback. However, if the second argument
-** is non-zero then this b-tree transaction is part of a multi-file
-** transaction. In this case, the transaction has already been committed
-** (by deleting a super-journal file) and the caller will ignore this
+** is non-zero then this b-tree transaction is part of a multi-file
+** transaction. In this case, the transaction has already been committed
+** (by deleting a super-journal file) and the caller will ignore this
** functions return code. So, even if an error occurs in the pager layer,
** reset the b-tree objects internal state to indicate that the write
** transaction has been closed. This is quite safe, as the pager will have
@@ -4089,7 +4311,7 @@ int sqlite3BtreeCommitPhaseTwo(Btree *p, int bCleanup){
sqlite3BtreeEnter(p);
btreeIntegrity(p);
- /* If the handle has a write-transaction open, commit the shared-btrees
+ /* If the handle has a write-transaction open, commit the shared-btrees
** transaction and set the shared state to TRANS_READ.
*/
if( p->inTrans==TRANS_WRITE ){
@@ -4102,7 +4324,7 @@ int sqlite3BtreeCommitPhaseTwo(Btree *p, int bCleanup){
sqlite3BtreeLeave(p);
return rc;
}
- p->iDataVersion--; /* Compensate for pPager->iDataVersion++; */
+ p->iBDataVersion--; /* Compensate for pPager->iDataVersion++; */
pBt->inTransaction = TRANS_READ;
btreeClearHasContent(pBt);
}
@@ -4138,15 +4360,15 @@ int sqlite3BtreeCommit(Btree *p){
**
** This routine gets called when a rollback occurs. If the writeOnly
** flag is true, then only write-cursors need be tripped - read-only
-** cursors save their current positions so that they may continue
-** following the rollback. Or, if writeOnly is false, all cursors are
+** cursors save their current positions so that they may continue
+** following the rollback. Or, if writeOnly is false, all cursors are
** tripped. In general, writeOnly is false if the transaction being
** rolled back modified the database schema. In this case b-tree root
** pages may be moved or deleted from the database altogether, making
** it unsafe for read cursors to continue.
**
-** If the writeOnly flag is true and an error is encountered while
-** saving the current position of a read-only cursor, all cursors,
+** If the writeOnly flag is true and an error is encountered while
+** saving the current position of a read-only cursor, all cursors,
** including all read-cursors are tripped.
**
** SQLITE_OK is returned if successful, or if an error occurs while
@@ -4188,7 +4410,7 @@ static void btreeSetNPage(BtShared *pBt, MemPage *pPage1){
int nPage = get4byte(&pPage1->aData[28]);
testcase( nPage==0 );
if( nPage==0 ) sqlite3PagerPagecount(pBt->pPager, &nPage);
- testcase( pBt->nPage!=nPage );
+ testcase( pBt->nPage!=(u32)nPage );
pBt->nPage = nPage;
}
@@ -4252,8 +4474,8 @@ int sqlite3BtreeRollback(Btree *p, int tripCode, int writeOnly){
/*
** Start a statement subtransaction. The subtransaction can be rolled
-** back independently of the main transaction. You must start a transaction
-** before starting a subtransaction. The subtransaction is ended automatically
+** back independently of the main transaction. You must start a transaction
+** before starting a subtransaction. The subtransaction is ended automatically
** if the main transaction commits or rolls back.
**
** Statement subtransactions are used around individual SQL statements
@@ -4290,11 +4512,11 @@ int sqlite3BtreeBeginStmt(Btree *p, int iStatement){
/*
** The second argument to this function, op, is always SAVEPOINT_ROLLBACK
** or SAVEPOINT_RELEASE. This function either releases or rolls back the
-** savepoint identified by parameter iSavepoint, depending on the value
+** savepoint identified by parameter iSavepoint, depending on the value
** of op.
**
** Normally, iSavepoint is greater than or equal to zero. However, if op is
-** SAVEPOINT_ROLLBACK, then iSavepoint may also be -1. In this case the
+** SAVEPOINT_ROLLBACK, then iSavepoint may also be -1. In this case the
** contents of the entire transaction are rolled back. This is different
** from a normal transaction rollback, as no locks are released and the
** transaction remains open.
@@ -4319,7 +4541,7 @@ int sqlite3BtreeSavepoint(Btree *p, int op, int iSavepoint){
rc = newDatabase(pBt);
btreeSetNPage(pBt, pBt->pPage1);
- /* pBt->nPage might be zero if the database was corrupt when
+ /* pBt->nPage might be zero if the database was corrupt when
** the transaction was started. Otherwise, it must be at least 1. */
assert( CORRUPT_DB || pBt->nPage>0 );
}
@@ -4357,10 +4579,10 @@ int sqlite3BtreeSavepoint(Btree *p, int op, int iSavepoint){
** is set. If FORDELETE is set, that is a hint to the implementation that
** this cursor will only be used to seek to and delete entries of an index
** as part of a larger DELETE statement. The FORDELETE hint is not used by
-** this implementation. But in a hypothetical alternative storage engine
+** this implementation. But in a hypothetical alternative storage engine
** in which index entries are automatically deleted when corresponding table
** rows are deleted, the FORDELETE flag is a hint that all SEEK and DELETE
-** operations on this cursor can be no-ops and all READ operations can
+** operations on this cursor can be no-ops and all READ operations can
** return a null row (2-bytes: 0x01 0x00).
**
** No checking is done to make sure that page iTable really is the
@@ -4372,7 +4594,7 @@ int sqlite3BtreeSavepoint(Btree *p, int op, int iSavepoint){
*/
static int btreeCursor(
Btree *p, /* The btree */
- int iTable, /* Root page of table to open */
+ Pgno iTable, /* Root page of table to open */
int wrFlag, /* 1 to write. 0 read-only */
struct KeyInfo *pKeyInfo, /* First arg to comparison function */
BtCursor *pCur /* Space for new cursor */
@@ -4381,14 +4603,14 @@ static int btreeCursor(
BtCursor *pX; /* Looping over other all cursors */
assert( sqlite3BtreeHoldsMutex(p) );
- assert( wrFlag==0
- || wrFlag==BTREE_WRCSR
- || wrFlag==(BTREE_WRCSR|BTREE_FORDELETE)
+ assert( wrFlag==0
+ || wrFlag==BTREE_WRCSR
+ || wrFlag==(BTREE_WRCSR|BTREE_FORDELETE)
);
- /* The following assert statements verify that if this is a sharable
- ** b-tree database, the connection is holding the required table locks,
- ** and that no other connection has any open cursor that conflicts with
+ /* The following assert statements verify that if this is a sharable
+ ** b-tree database, the connection is holding the required table locks,
+ ** and that no other connection has any open cursor that conflicts with
** this lock. The iTable<1 term disables the check for corrupt schemas. */
assert( hasSharedCacheTableLock(p, iTable, pKeyInfo!=0, (wrFlag?2:1))
|| iTable<1 );
@@ -4400,10 +4622,6 @@ static int btreeCursor(
assert( pBt->pPage1 && pBt->pPage1->aData );
assert( wrFlag==0 || (pBt->btsFlags & BTS_READ_ONLY)==0 );
- if( wrFlag ){
- allocateTempSpace(pBt);
- if( pBt->pTmpSpace==0 ) return SQLITE_NOMEM_BKPT;
- }
if( iTable<=1 ){
if( iTable<1 ){
return SQLITE_CORRUPT_BKPT;
@@ -4415,29 +4633,35 @@ static int btreeCursor(
/* Now that no other errors can occur, finish filling in the BtCursor
** variables and link the cursor into the BtShared list. */
- pCur->pgnoRoot = (Pgno)iTable;
+ pCur->pgnoRoot = iTable;
pCur->iPage = -1;
pCur->pKeyInfo = pKeyInfo;
pCur->pBtree = p;
pCur->pBt = pBt;
- pCur->curFlags = wrFlag ? BTCF_WriteFlag : 0;
- pCur->curPagerFlags = wrFlag ? 0 : PAGER_GET_READONLY;
+ pCur->curFlags = 0;
/* If there are two or more cursors on the same btree, then all such
** cursors *must* have the BTCF_Multiple flag set. */
for(pX=pBt->pCursor; pX; pX=pX->pNext){
- if( pX->pgnoRoot==(Pgno)iTable ){
+ if( pX->pgnoRoot==iTable ){
pX->curFlags |= BTCF_Multiple;
- pCur->curFlags |= BTCF_Multiple;
+ pCur->curFlags = BTCF_Multiple;
}
}
+ pCur->eState = CURSOR_INVALID;
pCur->pNext = pBt->pCursor;
pBt->pCursor = pCur;
- pCur->eState = CURSOR_INVALID;
+ if( wrFlag ){
+ pCur->curFlags |= BTCF_WriteFlag;
+ pCur->curPagerFlags = 0;
+ if( pBt->pTmpSpace==0 ) return allocateTempSpace(pBt);
+ }else{
+ pCur->curPagerFlags = PAGER_GET_READONLY;
+ }
return SQLITE_OK;
}
static int btreeCursorWithLock(
Btree *p, /* The btree */
- int iTable, /* Root page of table to open */
+ Pgno iTable, /* Root page of table to open */
int wrFlag, /* 1 to write. 0 read-only */
struct KeyInfo *pKeyInfo, /* First arg to comparison function */
BtCursor *pCur /* Space for new cursor */
@@ -4450,7 +4674,7 @@ static int btreeCursorWithLock(
}
int sqlite3BtreeCursor(
Btree *p, /* The btree */
- int iTable, /* Root page of table to open */
+ Pgno iTable, /* Root page of table to open */
int wrFlag, /* 1 to write. 0 read-only */
struct KeyInfo *pKeyInfo, /* First arg to xCompare() */
BtCursor *pCur /* Write new cursor here */
@@ -4512,7 +4736,14 @@ int sqlite3BtreeCloseCursor(BtCursor *pCur){
unlockBtreeIfUnused(pBt);
sqlite3_free(pCur->aOverflow);
sqlite3_free(pCur->pKey);
- sqlite3BtreeLeave(pBtree);
+ if( (pBt->openFlags & BTREE_SINGLE) && pBt->pCursor==0 ){
+ /* Since the BtShared is not sharable, there is no need to
+ ** worry about the missing sqlite3BtreeLeave() call here. */
+ assert( pBtree->sharable==0 );
+ sqlite3BtreeClose(pBtree);
+ }else{
+ sqlite3BtreeLeave(pBtree);
+ }
pCur->pBtree = 0;
}
return SQLITE_OK;
@@ -4594,7 +4825,6 @@ void sqlite3BtreeCursorUnpin(BtCursor *pCur){
pCur->curFlags &= ~BTCF_Pinned;
}
-#ifdef SQLITE_ENABLE_OFFSET_SQL_FUNC
/*
** Return the offset into the database file for the start of the
** payload to which the cursor is pointing.
@@ -4606,7 +4836,6 @@ i64 sqlite3BtreeOffset(BtCursor *pCur){
return (i64)pCur->pBt->pageSize*((i64)pCur->pPage->pgno - 1) +
(i64)(pCur->info.pPayload - pCur->pPage->aData);
}
-#endif /* SQLITE_ENABLE_OFFSET_SQL_FUNC */
/*
** Return the number of bytes of payload for the entry that pCur is
@@ -4632,7 +4861,7 @@ u32 sqlite3BtreePayloadSize(BtCursor *pCur){
** routine always returns 2147483647 (which is the largest record
** that SQLite can handle) or more. But returning a smaller value might
** prevent large memory allocations when trying to interpret a
-** corrupt datrabase.
+** corrupt database.
**
** The current implementation merely returns the size of the underlying
** database file.
@@ -4645,15 +4874,15 @@ sqlite3_int64 sqlite3BtreeMaxRecordSize(BtCursor *pCur){
/*
** Given the page number of an overflow page in the database (parameter
-** ovfl), this function finds the page number of the next page in the
+** ovfl), this function finds the page number of the next page in the
** linked list of overflow pages. If possible, it uses the auto-vacuum
-** pointer-map data instead of reading the content of page ovfl to do so.
+** pointer-map data instead of reading the content of page ovfl to do so.
**
** If an error occurs an SQLite error code is returned. Otherwise:
**
-** The page number of the next overflow page in the linked list is
-** written to *pPgnoNext. If page ovfl is the last page in its linked
-** list, *pPgnoNext is set to zero.
+** The page number of the next overflow page in the linked list is
+** written to *pPgnoNext. If page ovfl is the last page in its linked
+** list, *pPgnoNext is set to zero.
**
** If ppPage is not NULL, and a reference to the MemPage object corresponding
** to page number pOvfl was obtained, then *ppPage is set to point to that
@@ -4677,9 +4906,9 @@ static int getOverflowPage(
#ifndef SQLITE_OMIT_AUTOVACUUM
/* Try to find the next page in the overflow list using the
- ** autovacuum pointer-map pages. Guess that the next page in
- ** the overflow list is page number (ovfl+1). If that guess turns
- ** out to be wrong, fall back to loading the data of page
+ ** autovacuum pointer-map pages. Guess that the next page in
+ ** the overflow list is page number (ovfl+1). If that guess turns
+ ** out to be wrong, fall back to loading the data of page
** number ovfl to determine the next page number.
*/
if( pBt->autoVacuum ){
@@ -4767,8 +4996,8 @@ static int copyPayload(
**
** If the current cursor entry uses one or more overflow pages
** this function may allocate space for and lazily populate
-** the overflow page-list cache array (BtCursor.aOverflow).
-** Subsequent calls use this cache to make seeking to the supplied offset
+** the overflow page-list cache array (BtCursor.aOverflow).
+** Subsequent calls use this cache to make seeking to the supplied offset
** more efficient.
**
** Once an overflow page-list cache has been allocated, it must be
@@ -4784,7 +5013,7 @@ static int accessPayload(
BtCursor *pCur, /* Cursor pointing to entry to read from */
u32 offset, /* Begin reading this far into payload */
u32 amt, /* Read this many bytes */
- unsigned char *pBuf, /* Write the bytes into this buffer */
+ unsigned char *pBuf, /* Write the bytes into this buffer */
int eOp /* zero to read. non-zero to write. */
){
unsigned char *aPayload;
@@ -4799,7 +5028,9 @@ static int accessPayload(
assert( pPage );
assert( eOp==0 || eOp==1 );
assert( pCur->eState==CURSOR_VALID );
- assert( pCur->ix<pPage->nCell );
+ if( pCur->ix>=pPage->nCell ){
+ return SQLITE_CORRUPT_PAGE(pPage);
+ }
assert( cursorHoldsMutex(pCur) );
getCellInfo(pCur);
@@ -4875,6 +5106,7 @@ static int accessPayload(
assert( rc==SQLITE_OK && amt>0 );
while( nextPage ){
/* If required, populate the overflow page-list cache. */
+ if( nextPage > pBt->nPage ) return SQLITE_CORRUPT_BKPT;
assert( pCur->aOverflow[iIdx]==0
|| pCur->aOverflow[iIdx]==nextPage
|| CORRUPT_DB );
@@ -4907,12 +5139,12 @@ static int accessPayload(
#ifdef SQLITE_DIRECT_OVERFLOW_READ
/* If all the following are true:
**
- ** 1) this is a read operation, and
+ ** 1) this is a read operation, and
** 2) data is required from the start of this overflow page, and
** 3) there are no dirty pages in the page-cache
** 4) the database is file-backed, and
** 5) the page is not in the WAL file
- ** 6) at least 4 bytes have already been read into the output buffer
+ ** 6) at least 4 bytes have already been read into the output buffer
**
** then data can be read directly from the database file into the
** output buffer, bypassing the page-cache altogether. This speeds
@@ -4985,7 +5217,6 @@ int sqlite3BtreePayload(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){
assert( cursorHoldsMutex(pCur) );
assert( pCur->eState==CURSOR_VALID );
assert( pCur->iPage>=0 && pCur->pPage );
- assert( pCur->ix<pCur->pPage->nCell );
return accessPayload(pCur, offset, amt, (unsigned char*)pBuf, 0);
}
@@ -5020,7 +5251,7 @@ int sqlite3BtreePayloadChecked(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){
#endif /* SQLITE_OMIT_INCRBLOB */
/*
-** Return a pointer to payload information from the entry that the
+** Return a pointer to payload information from the entry that the
** pCur cursor is pointing to. The pointer is to the beginning of
** the key if index btrees (pPage->intKey==0) and is the data for
** table btrees (pPage->intKey==1). The number of bytes of available
@@ -5047,7 +5278,7 @@ static const void *fetchPayload(
assert( pCur->eState==CURSOR_VALID );
assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
assert( cursorOwnsBtShared(pCur) );
- assert( pCur->ix<pCur->pPage->nCell );
+ assert( pCur->ix<pCur->pPage->nCell || CORRUPT_DB );
assert( pCur->info.nSize>0 );
assert( pCur->info.pPayload>pCur->pPage->aData || CORRUPT_DB );
assert( pCur->info.pPayload<pCur->pPage->aDataEnd ||CORRUPT_DB);
@@ -5092,8 +5323,7 @@ const void *sqlite3BtreePayloadFetch(BtCursor *pCur, u32 *pAmt){
** vice-versa).
*/
static int moveToChild(BtCursor *pCur, u32 newPgno){
- BtShared *pBt = pCur->pBt;
-
+ int rc;
assert( cursorOwnsBtShared(pCur) );
assert( pCur->eState==CURSOR_VALID );
assert( pCur->iPage<BTCURSOR_MAX_DEPTH );
@@ -5107,12 +5337,23 @@ static int moveToChild(BtCursor *pCur, u32 newPgno){
pCur->apPage[pCur->iPage] = pCur->pPage;
pCur->ix = 0;
pCur->iPage++;
- return getAndInitPage(pBt, newPgno, &pCur->pPage, pCur, pCur->curPagerFlags);
+ rc = getAndInitPage(pCur->pBt, newPgno, &pCur->pPage, pCur->curPagerFlags);
+ assert( pCur->pPage!=0 || rc!=SQLITE_OK );
+ if( rc==SQLITE_OK
+ && (pCur->pPage->nCell<1 || pCur->pPage->intKey!=pCur->curIntKey)
+ ){
+ releasePage(pCur->pPage);
+ rc = SQLITE_CORRUPT_PGNO(newPgno);
+ }
+ if( rc ){
+ pCur->pPage = pCur->apPage[--pCur->iPage];
+ }
+ return rc;
}
#ifdef SQLITE_DEBUG
/*
-** Page pParent is an internal (non-leaf) tree page. This function
+** Page pParent is an internal (non-leaf) tree page. This function
** asserts that page number iChild is the left-child if the iIdx'th
** cell in page pParent. Or, if iIdx is equal to the total number of
** cells in pParent, that page number iChild is the right-child of
@@ -5129,7 +5370,7 @@ static void assertParentIndex(MemPage *pParent, int iIdx, Pgno iChild){
}
}
#else
-# define assertParentIndex(x,y,z)
+# define assertParentIndex(x,y,z)
#endif
/*
@@ -5147,8 +5388,8 @@ static void moveToParent(BtCursor *pCur){
assert( pCur->iPage>0 );
assert( pCur->pPage );
assertParentIndex(
- pCur->apPage[pCur->iPage-1],
- pCur->aiIdx[pCur->iPage-1],
+ pCur->apPage[pCur->iPage-1],
+ pCur->aiIdx[pCur->iPage-1],
pCur->pPage->pgno
);
testcase( pCur->aiIdx[pCur->iPage-1] > pCur->apPage[pCur->iPage-1]->nCell );
@@ -5165,19 +5406,19 @@ static void moveToParent(BtCursor *pCur){
**
** If the table has a virtual root page, then the cursor is moved to point
** to the virtual root page instead of the actual root page. A table has a
-** virtual root page when the actual root page contains no cells and a
+** virtual root page when the actual root page contains no cells and a
** single child page. This can only happen with the table rooted at page 1.
**
-** If the b-tree structure is empty, the cursor state is set to
+** If the b-tree structure is empty, the cursor state is set to
** CURSOR_INVALID and this routine returns SQLITE_EMPTY. Otherwise,
** the cursor is set to point to the first cell located on the root
** (or virtual root) page and the cursor state is set to CURSOR_VALID.
**
** If this function returns successfully, it may be assumed that the
-** page-header flags indicate that the [virtual] root-page is the expected
+** page-header flags indicate that the [virtual] root-page is the expected
** kind of b-tree page (i.e. if when opening the cursor the caller did not
** specify a KeyInfo structure the flags byte is set to 0x05 or 0x0D,
-** indicating a table b-tree, or if the caller did specify a KeyInfo
+** indicating a table b-tree, or if the caller did specify a KeyInfo
** structure the flags byte is set to 0x02 or 0x0A, indicating an index
** b-tree).
*/
@@ -5198,7 +5439,7 @@ static int moveToRoot(BtCursor *pCur){
while( --pCur->iPage ){
releasePageNotNull(pCur->apPage[pCur->iPage]);
}
- pCur->pPage = pCur->apPage[0];
+ pRoot = pCur->pPage = pCur->apPage[0];
goto skip_init;
}
}else if( pCur->pgnoRoot==0 ){
@@ -5213,8 +5454,8 @@ static int moveToRoot(BtCursor *pCur){
}
sqlite3BtreeClearCursor(pCur);
}
- rc = getAndInitPage(pCur->pBtree->pBt, pCur->pgnoRoot, &pCur->pPage,
- 0, pCur->curPagerFlags);
+ rc = getAndInitPage(pCur->pBt, pCur->pgnoRoot, &pCur->pPage,
+ pCur->curPagerFlags);
if( rc!=SQLITE_OK ){
pCur->eState = CURSOR_INVALID;
return rc;
@@ -5223,29 +5464,28 @@ static int moveToRoot(BtCursor *pCur){
pCur->curIntKey = pCur->pPage->intKey;
}
pRoot = pCur->pPage;
- assert( pRoot->pgno==pCur->pgnoRoot );
+ assert( pRoot->pgno==pCur->pgnoRoot || CORRUPT_DB );
/* If pCur->pKeyInfo is not NULL, then the caller that opened this cursor
** expected to open it on an index b-tree. Otherwise, if pKeyInfo is
** NULL, the caller expects a table b-tree. If this is not the case,
- ** return an SQLITE_CORRUPT error.
+ ** return an SQLITE_CORRUPT error.
**
** Earlier versions of SQLite assumed that this test could not fail
** if the root page was already loaded when this function was called (i.e.
- ** if pCur->iPage>=0). But this is not so if the database is corrupted
- ** in such a way that page pRoot is linked into a second b-tree table
+ ** if pCur->iPage>=0). But this is not so if the database is corrupted
+ ** in such a way that page pRoot is linked into a second b-tree table
** (or the freelist). */
assert( pRoot->intKey==1 || pRoot->intKey==0 );
if( pRoot->isInit==0 || (pCur->pKeyInfo==0)!=pRoot->intKey ){
return SQLITE_CORRUPT_PAGE(pCur->pPage);
}
-skip_init:
+skip_init:
pCur->ix = 0;
pCur->info.nSize = 0;
pCur->curFlags &= ~(BTCF_AtLast|BTCF_ValidNKey|BTCF_ValidOvfl);
- pRoot = pCur->pPage;
if( pRoot->nCell>0 ){
pCur->eState = CURSOR_VALID;
}else if( !pRoot->leaf ){
@@ -5327,7 +5567,7 @@ int sqlite3BtreeFirst(BtCursor *pCur, int *pRes){
*pRes = 0;
rc = moveToLeftmost(pCur);
}else if( rc==SQLITE_EMPTY ){
- assert( pCur->pgnoRoot==0 || pCur->pPage->nCell==0 );
+ assert( pCur->pgnoRoot==0 || (pCur->pPage!=0 && pCur->pPage->nCell==0) );
*pRes = 1;
rc = SQLITE_OK;
}
@@ -5338,52 +5578,50 @@ int sqlite3BtreeFirst(BtCursor *pCur, int *pRes){
** on success. Set *pRes to 0 if the cursor actually points to something
** or set *pRes to 1 if the table is empty.
*/
+static SQLITE_NOINLINE int btreeLast(BtCursor *pCur, int *pRes){
+ int rc = moveToRoot(pCur);
+ if( rc==SQLITE_OK ){
+ assert( pCur->eState==CURSOR_VALID );
+ *pRes = 0;
+ rc = moveToRightmost(pCur);
+ if( rc==SQLITE_OK ){
+ pCur->curFlags |= BTCF_AtLast;
+ }else{
+ pCur->curFlags &= ~BTCF_AtLast;
+ }
+ }else if( rc==SQLITE_EMPTY ){
+ assert( pCur->pgnoRoot==0 || pCur->pPage->nCell==0 );
+ *pRes = 1;
+ rc = SQLITE_OK;
+ }
+ return rc;
+}
int sqlite3BtreeLast(BtCursor *pCur, int *pRes){
- int rc;
-
assert( cursorOwnsBtShared(pCur) );
assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
/* If the cursor already points to the last entry, this is a no-op. */
if( CURSOR_VALID==pCur->eState && (pCur->curFlags & BTCF_AtLast)!=0 ){
#ifdef SQLITE_DEBUG
- /* This block serves to assert() that the cursor really does point
+ /* This block serves to assert() that the cursor really does point
** to the last entry in the b-tree. */
int ii;
for(ii=0; ii<pCur->iPage; ii++){
assert( pCur->aiIdx[ii]==pCur->apPage[ii]->nCell );
}
- assert( pCur->ix==pCur->pPage->nCell-1 );
+ assert( pCur->ix==pCur->pPage->nCell-1 || CORRUPT_DB );
+ testcase( pCur->ix!=pCur->pPage->nCell-1 );
+ /* ^-- dbsqlfuzz b92b72e4de80b5140c30ab71372ca719b8feb618 */
assert( pCur->pPage->leaf );
#endif
*pRes = 0;
return SQLITE_OK;
}
-
- rc = moveToRoot(pCur);
- if( rc==SQLITE_OK ){
- assert( pCur->eState==CURSOR_VALID );
- *pRes = 0;
- rc = moveToRightmost(pCur);
- if( rc==SQLITE_OK ){
- pCur->curFlags |= BTCF_AtLast;
- }else{
- pCur->curFlags &= ~BTCF_AtLast;
- }
- }else if( rc==SQLITE_EMPTY ){
- assert( pCur->pgnoRoot==0 || pCur->pPage->nCell==0 );
- *pRes = 1;
- rc = SQLITE_OK;
- }
- return rc;
+ return btreeLast(pCur, pRes);
}
-/* Move the cursor so that it points to an entry near the key
-** specified by pIdxKey or intKey. Return a success code.
-**
-** For INTKEY tables, the intKey parameter is used. pIdxKey
-** must be NULL. For index tables, pIdxKey is used and intKey
-** is ignored.
+/* Move the cursor so that it points to an entry in a table (a.k.a INTKEY)
+** table near the key intKey. Return a success code.
**
** If an exact match is not found, then the cursor is always
** left pointing at a leaf page which would hold the entry if it
@@ -5391,44 +5629,37 @@ int sqlite3BtreeLast(BtCursor *pCur, int *pRes){
** before or after the key.
**
** An integer is written into *pRes which is the result of
-** comparing the key with the entry to which the cursor is
+** comparing the key with the entry to which the cursor is
** pointing. The meaning of the integer written into
** *pRes is as follows:
**
** *pRes<0 The cursor is left pointing at an entry that
-** is smaller than intKey/pIdxKey or if the table is empty
+** is smaller than intKey or if the table is empty
** and the cursor is therefore left point to nothing.
**
** *pRes==0 The cursor is left pointing at an entry that
-** exactly matches intKey/pIdxKey.
+** exactly matches intKey.
**
** *pRes>0 The cursor is left pointing at an entry that
-** is larger than intKey/pIdxKey.
-**
-** For index tables, the pIdxKey->eqSeen field is set to 1 if there
-** exists an entry in the table that exactly matches pIdxKey.
+** is larger than intKey.
*/
-int sqlite3BtreeMovetoUnpacked(
+int sqlite3BtreeTableMoveto(
BtCursor *pCur, /* The cursor to be moved */
- UnpackedRecord *pIdxKey, /* Unpacked index key */
i64 intKey, /* The table key */
int biasRight, /* If true, bias the search to the high end */
int *pRes /* Write search results here */
){
int rc;
- RecordCompare xRecordCompare;
assert( cursorOwnsBtShared(pCur) );
assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
assert( pRes );
- assert( (pIdxKey==0)==(pCur->pKeyInfo==0) );
- assert( pCur->eState!=CURSOR_VALID || (pIdxKey==0)==(pCur->curIntKey!=0) );
+ assert( pCur->pKeyInfo==0 );
+ assert( pCur->eState!=CURSOR_VALID || pCur->curIntKey!=0 );
/* If the cursor is already positioned at the point we are trying
** to move to, then just return without doing any work */
- if( pIdxKey==0
- && pCur->eState==CURSOR_VALID && (pCur->curFlags & BTCF_ValidNKey)!=0
- ){
+ if( pCur->eState==CURSOR_VALID && (pCur->curFlags & BTCF_ValidNKey)!=0 ){
if( pCur->info.nKey==intKey ){
*pRes = 0;
return SQLITE_OK;
@@ -5441,7 +5672,7 @@ int sqlite3BtreeMovetoUnpacked(
/* If the requested key is one more than the previous key, then
** try to get there using sqlite3BtreeNext() rather than a full
** binary search. This is an optimization only. The correct answer
- ** is still obtained without this case, only a little more slowely */
+ ** is still obtained without this case, only a little more slowly. */
if( pCur->info.nKey+1==intKey ){
*pRes = 0;
rc = sqlite3BtreeNext(pCur, 0);
@@ -5450,25 +5681,16 @@ int sqlite3BtreeMovetoUnpacked(
if( pCur->info.nKey==intKey ){
return SQLITE_OK;
}
- }else if( rc==SQLITE_DONE ){
- rc = SQLITE_OK;
- }else{
+ }else if( rc!=SQLITE_DONE ){
return rc;
}
}
}
}
- if( pIdxKey ){
- xRecordCompare = sqlite3VdbeFindCompare(pIdxKey);
- pIdxKey->errCode = 0;
- assert( pIdxKey->default_rc==1
- || pIdxKey->default_rc==0
- || pIdxKey->default_rc==-1
- );
- }else{
- xRecordCompare = 0; /* All keys are integers */
- }
+#ifdef SQLITE_DEBUG
+ pCur->pBtree->nSeek++; /* Performance measurement during testing */
+#endif
rc = moveToRoot(pCur);
if( rc ){
@@ -5484,7 +5706,8 @@ int sqlite3BtreeMovetoUnpacked(
assert( pCur->eState==CURSOR_VALID );
assert( pCur->pPage->nCell > 0 );
assert( pCur->iPage==0 || pCur->apPage[0]->intKey==pCur->curIntKey );
- assert( pCur->curIntKey || pIdxKey );
+ assert( pCur->curIntKey );
+
for(;;){
int lwr, upr, idx, c;
Pgno chldPg;
@@ -5498,144 +5721,55 @@ int sqlite3BtreeMovetoUnpacked(
** be the right kind (index or table) of b-tree page. Otherwise
** a moveToChild() or moveToRoot() call would have detected corruption. */
assert( pPage->nCell>0 );
- assert( pPage->intKey==(pIdxKey==0) );
+ assert( pPage->intKey );
lwr = 0;
upr = pPage->nCell-1;
assert( biasRight==0 || biasRight==1 );
idx = upr>>(1-biasRight); /* idx = biasRight ? upr : (lwr+upr)/2; */
- pCur->ix = (u16)idx;
- if( xRecordCompare==0 ){
- for(;;){
- i64 nCellKey;
- pCell = findCellPastPtr(pPage, idx);
- if( pPage->intKeyLeaf ){
- while( 0x80 <= *(pCell++) ){
- if( pCell>=pPage->aDataEnd ){
- return SQLITE_CORRUPT_PAGE(pPage);
- }
- }
- }
- getVarint(pCell, (u64*)&nCellKey);
- if( nCellKey<intKey ){
- lwr = idx+1;
- if( lwr>upr ){ c = -1; break; }
- }else if( nCellKey>intKey ){
- upr = idx-1;
- if( lwr>upr ){ c = +1; break; }
- }else{
- assert( nCellKey==intKey );
- pCur->ix = (u16)idx;
- if( !pPage->leaf ){
- lwr = idx;
- goto moveto_next_layer;
- }else{
- pCur->curFlags |= BTCF_ValidNKey;
- pCur->info.nKey = nCellKey;
- pCur->info.nSize = 0;
- *pRes = 0;
- return SQLITE_OK;
+ for(;;){
+ i64 nCellKey;
+ pCell = findCellPastPtr(pPage, idx);
+ if( pPage->intKeyLeaf ){
+ while( 0x80 <= *(pCell++) ){
+ if( pCell>=pPage->aDataEnd ){
+ return SQLITE_CORRUPT_PAGE(pPage);
}
}
- assert( lwr+upr>=0 );
- idx = (lwr+upr)>>1; /* idx = (lwr+upr)/2; */
}
- }else{
- for(;;){
- int nCell; /* Size of the pCell cell in bytes */
- pCell = findCellPastPtr(pPage, idx);
-
- /* The maximum supported page-size is 65536 bytes. This means that
- ** the maximum number of record bytes stored on an index B-Tree
- ** page is less than 16384 bytes and may be stored as a 2-byte
- ** varint. This information is used to attempt to avoid parsing
- ** the entire cell by checking for the cases where the record is
- ** stored entirely within the b-tree page by inspecting the first
- ** 2 bytes of the cell.
- */
- nCell = pCell[0];
- if( nCell<=pPage->max1bytePayload ){
- /* This branch runs if the record-size field of the cell is a
- ** single byte varint and the record fits entirely on the main
- ** b-tree page. */
- testcase( pCell+nCell+1==pPage->aDataEnd );
- c = xRecordCompare(nCell, (void*)&pCell[1], pIdxKey);
- }else if( !(pCell[1] & 0x80)
- && (nCell = ((nCell&0x7f)<<7) + pCell[1])<=pPage->maxLocal
- ){
- /* The record-size field is a 2 byte varint and the record
- ** fits entirely on the main b-tree page. */
- testcase( pCell+nCell+2==pPage->aDataEnd );
- c = xRecordCompare(nCell, (void*)&pCell[2], pIdxKey);
- }else{
- /* The record flows over onto one or more overflow pages. In
- ** this case the whole cell needs to be parsed, a buffer allocated
- ** and accessPayload() used to retrieve the record into the
- ** buffer before VdbeRecordCompare() can be called.
- **
- ** If the record is corrupt, the xRecordCompare routine may read
- ** up to two varints past the end of the buffer. An extra 18
- ** bytes of padding is allocated at the end of the buffer in
- ** case this happens. */
- void *pCellKey;
- u8 * const pCellBody = pCell - pPage->childPtrSize;
- const int nOverrun = 18; /* Size of the overrun padding */
- pPage->xParseCell(pPage, pCellBody, &pCur->info);
- nCell = (int)pCur->info.nKey;
- testcase( nCell<0 ); /* True if key size is 2^32 or more */
- testcase( nCell==0 ); /* Invalid key size: 0x80 0x80 0x00 */
- testcase( nCell==1 ); /* Invalid key size: 0x80 0x80 0x01 */
- testcase( nCell==2 ); /* Minimum legal index key size */
- if( nCell<2 || nCell/pCur->pBt->usableSize>pCur->pBt->nPage ){
- rc = SQLITE_CORRUPT_PAGE(pPage);
- goto moveto_finish;
- }
- pCellKey = sqlite3Malloc( nCell+nOverrun );
- if( pCellKey==0 ){
- rc = SQLITE_NOMEM_BKPT;
- goto moveto_finish;
- }
- pCur->ix = (u16)idx;
- rc = accessPayload(pCur, 0, nCell, (unsigned char*)pCellKey, 0);
- memset(((u8*)pCellKey)+nCell,0,nOverrun); /* Fix uninit warnings */
- pCur->curFlags &= ~BTCF_ValidOvfl;
- if( rc ){
- sqlite3_free(pCellKey);
- goto moveto_finish;
- }
- c = sqlite3VdbeRecordCompare(nCell, pCellKey, pIdxKey);
- sqlite3_free(pCellKey);
- }
- assert(
- (pIdxKey->errCode!=SQLITE_CORRUPT || c==0)
- && (pIdxKey->errCode!=SQLITE_NOMEM || pCur->pBtree->db->mallocFailed)
- );
- if( c<0 ){
- lwr = idx+1;
- }else if( c>0 ){
- upr = idx-1;
+ getVarint(pCell, (u64*)&nCellKey);
+ if( nCellKey<intKey ){
+ lwr = idx+1;
+ if( lwr>upr ){ c = -1; break; }
+ }else if( nCellKey>intKey ){
+ upr = idx-1;
+ if( lwr>upr ){ c = +1; break; }
+ }else{
+ assert( nCellKey==intKey );
+ pCur->ix = (u16)idx;
+ if( !pPage->leaf ){
+ lwr = idx;
+ goto moveto_table_next_layer;
}else{
- assert( c==0 );
+ pCur->curFlags |= BTCF_ValidNKey;
+ pCur->info.nKey = nCellKey;
+ pCur->info.nSize = 0;
*pRes = 0;
- rc = SQLITE_OK;
- pCur->ix = (u16)idx;
- if( pIdxKey->errCode ) rc = SQLITE_CORRUPT_BKPT;
- goto moveto_finish;
+ return SQLITE_OK;
}
- if( lwr>upr ) break;
- assert( lwr+upr>=0 );
- idx = (lwr+upr)>>1; /* idx = (lwr+upr)/2 */
}
+ assert( lwr+upr>=0 );
+ idx = (lwr+upr)>>1; /* idx = (lwr+upr)/2; */
}
- assert( lwr==upr+1 || (pPage->intKey && !pPage->leaf) );
+ assert( lwr==upr+1 || !pPage->leaf );
assert( pPage->isInit );
if( pPage->leaf ){
assert( pCur->ix<pCur->pPage->nCell );
pCur->ix = (u16)idx;
*pRes = c;
rc = SQLITE_OK;
- goto moveto_finish;
+ goto moveto_table_finish;
}
-moveto_next_layer:
+moveto_table_next_layer:
if( lwr>=pPage->nCell ){
chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]);
}else{
@@ -5645,7 +5779,326 @@ moveto_next_layer:
rc = moveToChild(pCur, chldPg);
if( rc ) break;
}
-moveto_finish:
+moveto_table_finish:
+ pCur->info.nSize = 0;
+ assert( (pCur->curFlags & BTCF_ValidOvfl)==0 );
+ return rc;
+}
+
+/*
+** Compare the "idx"-th cell on the page the cursor pCur is currently
+** pointing to to pIdxKey using xRecordCompare. Return negative or
+** zero if the cell is less than or equal pIdxKey. Return positive
+** if unknown.
+**
+** Return value negative: Cell at pCur[idx] less than pIdxKey
+**
+** Return value is zero: Cell at pCur[idx] equals pIdxKey
+**
+** Return value positive: Nothing is known about the relationship
+** of the cell at pCur[idx] and pIdxKey.
+**
+** This routine is part of an optimization. It is always safe to return
+** a positive value as that will cause the optimization to be skipped.
+*/
+static int indexCellCompare(
+ BtCursor *pCur,
+ int idx,
+ UnpackedRecord *pIdxKey,
+ RecordCompare xRecordCompare
+){
+ MemPage *pPage = pCur->pPage;
+ int c;
+ int nCell; /* Size of the pCell cell in bytes */
+ u8 *pCell = findCellPastPtr(pPage, idx);
+
+ nCell = pCell[0];
+ if( nCell<=pPage->max1bytePayload ){
+ /* This branch runs if the record-size field of the cell is a
+ ** single byte varint and the record fits entirely on the main
+ ** b-tree page. */
+ testcase( pCell+nCell+1==pPage->aDataEnd );
+ c = xRecordCompare(nCell, (void*)&pCell[1], pIdxKey);
+ }else if( !(pCell[1] & 0x80)
+ && (nCell = ((nCell&0x7f)<<7) + pCell[1])<=pPage->maxLocal
+ ){
+ /* The record-size field is a 2 byte varint and the record
+ ** fits entirely on the main b-tree page. */
+ testcase( pCell+nCell+2==pPage->aDataEnd );
+ c = xRecordCompare(nCell, (void*)&pCell[2], pIdxKey);
+ }else{
+ /* If the record extends into overflow pages, do not attempt
+ ** the optimization. */
+ c = 99;
+ }
+ return c;
+}
+
+/*
+** Return true (non-zero) if pCur is current pointing to the last
+** page of a table.
+*/
+static int cursorOnLastPage(BtCursor *pCur){
+ int i;
+ assert( pCur->eState==CURSOR_VALID );
+ for(i=0; i<pCur->iPage; i++){
+ MemPage *pPage = pCur->apPage[i];
+ if( pCur->aiIdx[i]<pPage->nCell ) return 0;
+ }
+ return 1;
+}
+
+/* Move the cursor so that it points to an entry in an index table
+** near the key pIdxKey. Return a success code.
+**
+** If an exact match is not found, then the cursor is always
+** left pointing at a leaf page which would hold the entry if it
+** were present. The cursor might point to an entry that comes
+** before or after the key.
+**
+** An integer is written into *pRes which is the result of
+** comparing the key with the entry to which the cursor is
+** pointing. The meaning of the integer written into
+** *pRes is as follows:
+**
+** *pRes<0 The cursor is left pointing at an entry that
+** is smaller than pIdxKey or if the table is empty
+** and the cursor is therefore left point to nothing.
+**
+** *pRes==0 The cursor is left pointing at an entry that
+** exactly matches pIdxKey.
+**
+** *pRes>0 The cursor is left pointing at an entry that
+** is larger than pIdxKey.
+**
+** The pIdxKey->eqSeen field is set to 1 if there
+** exists an entry in the table that exactly matches pIdxKey.
+*/
+int sqlite3BtreeIndexMoveto(
+ BtCursor *pCur, /* The cursor to be moved */
+ UnpackedRecord *pIdxKey, /* Unpacked index key */
+ int *pRes /* Write search results here */
+){
+ int rc;
+ RecordCompare xRecordCompare;
+
+ assert( cursorOwnsBtShared(pCur) );
+ assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
+ assert( pRes );
+ assert( pCur->pKeyInfo!=0 );
+
+#ifdef SQLITE_DEBUG
+ pCur->pBtree->nSeek++; /* Performance measurement during testing */
+#endif
+
+ xRecordCompare = sqlite3VdbeFindCompare(pIdxKey);
+ pIdxKey->errCode = 0;
+ assert( pIdxKey->default_rc==1
+ || pIdxKey->default_rc==0
+ || pIdxKey->default_rc==-1
+ );
+
+
+ /* Check to see if we can skip a lot of work. Two cases:
+ **
+ ** (1) If the cursor is already pointing to the very last cell
+ ** in the table and the pIdxKey search key is greater than or
+ ** equal to that last cell, then no movement is required.
+ **
+ ** (2) If the cursor is on the last page of the table and the first
+ ** cell on that last page is less than or equal to the pIdxKey
+ ** search key, then we can start the search on the current page
+ ** without needing to go back to root.
+ */
+ if( pCur->eState==CURSOR_VALID
+ && pCur->pPage->leaf
+ && cursorOnLastPage(pCur)
+ ){
+ int c;
+ if( pCur->ix==pCur->pPage->nCell-1
+ && (c = indexCellCompare(pCur, pCur->ix, pIdxKey, xRecordCompare))<=0
+ && pIdxKey->errCode==SQLITE_OK
+ ){
+ *pRes = c;
+ return SQLITE_OK; /* Cursor already pointing at the correct spot */
+ }
+ if( pCur->iPage>0
+ && indexCellCompare(pCur, 0, pIdxKey, xRecordCompare)<=0
+ && pIdxKey->errCode==SQLITE_OK
+ ){
+ pCur->curFlags &= ~BTCF_ValidOvfl;
+ if( !pCur->pPage->isInit ){
+ return SQLITE_CORRUPT_BKPT;
+ }
+ goto bypass_moveto_root; /* Start search on the current page */
+ }
+ pIdxKey->errCode = SQLITE_OK;
+ }
+
+ rc = moveToRoot(pCur);
+ if( rc ){
+ if( rc==SQLITE_EMPTY ){
+ assert( pCur->pgnoRoot==0 || pCur->pPage->nCell==0 );
+ *pRes = -1;
+ return SQLITE_OK;
+ }
+ return rc;
+ }
+
+bypass_moveto_root:
+ assert( pCur->pPage );
+ assert( pCur->pPage->isInit );
+ assert( pCur->eState==CURSOR_VALID );
+ assert( pCur->pPage->nCell > 0 );
+ assert( pCur->curIntKey==0 );
+ assert( pIdxKey!=0 );
+ for(;;){
+ int lwr, upr, idx, c;
+ Pgno chldPg;
+ MemPage *pPage = pCur->pPage;
+ u8 *pCell; /* Pointer to current cell in pPage */
+
+ /* pPage->nCell must be greater than zero. If this is the root-page
+ ** the cursor would have been INVALID above and this for(;;) loop
+ ** not run. If this is not the root-page, then the moveToChild() routine
+ ** would have already detected db corruption. Similarly, pPage must
+ ** be the right kind (index or table) of b-tree page. Otherwise
+ ** a moveToChild() or moveToRoot() call would have detected corruption. */
+ assert( pPage->nCell>0 );
+ assert( pPage->intKey==0 );
+ lwr = 0;
+ upr = pPage->nCell-1;
+ idx = upr>>1; /* idx = (lwr+upr)/2; */
+ for(;;){
+ int nCell; /* Size of the pCell cell in bytes */
+ pCell = findCellPastPtr(pPage, idx);
+
+ /* The maximum supported page-size is 65536 bytes. This means that
+ ** the maximum number of record bytes stored on an index B-Tree
+ ** page is less than 16384 bytes and may be stored as a 2-byte
+ ** varint. This information is used to attempt to avoid parsing
+ ** the entire cell by checking for the cases where the record is
+ ** stored entirely within the b-tree page by inspecting the first
+ ** 2 bytes of the cell.
+ */
+ nCell = pCell[0];
+ if( nCell<=pPage->max1bytePayload ){
+ /* This branch runs if the record-size field of the cell is a
+ ** single byte varint and the record fits entirely on the main
+ ** b-tree page. */
+ testcase( pCell+nCell+1==pPage->aDataEnd );
+ c = xRecordCompare(nCell, (void*)&pCell[1], pIdxKey);
+ }else if( !(pCell[1] & 0x80)
+ && (nCell = ((nCell&0x7f)<<7) + pCell[1])<=pPage->maxLocal
+ ){
+ /* The record-size field is a 2 byte varint and the record
+ ** fits entirely on the main b-tree page. */
+ testcase( pCell+nCell+2==pPage->aDataEnd );
+ c = xRecordCompare(nCell, (void*)&pCell[2], pIdxKey);
+ }else{
+ /* The record flows over onto one or more overflow pages. In
+ ** this case the whole cell needs to be parsed, a buffer allocated
+ ** and accessPayload() used to retrieve the record into the
+ ** buffer before VdbeRecordCompare() can be called.
+ **
+ ** If the record is corrupt, the xRecordCompare routine may read
+ ** up to two varints past the end of the buffer. An extra 18
+ ** bytes of padding is allocated at the end of the buffer in
+ ** case this happens. */
+ void *pCellKey;
+ u8 * const pCellBody = pCell - pPage->childPtrSize;
+ const int nOverrun = 18; /* Size of the overrun padding */
+ pPage->xParseCell(pPage, pCellBody, &pCur->info);
+ nCell = (int)pCur->info.nKey;
+ testcase( nCell<0 ); /* True if key size is 2^32 or more */
+ testcase( nCell==0 ); /* Invalid key size: 0x80 0x80 0x00 */
+ testcase( nCell==1 ); /* Invalid key size: 0x80 0x80 0x01 */
+ testcase( nCell==2 ); /* Minimum legal index key size */
+ if( nCell<2 || nCell/pCur->pBt->usableSize>pCur->pBt->nPage ){
+ rc = SQLITE_CORRUPT_PAGE(pPage);
+ goto moveto_index_finish;
+ }
+ pCellKey = sqlite3Malloc( nCell+nOverrun );
+ if( pCellKey==0 ){
+ rc = SQLITE_NOMEM_BKPT;
+ goto moveto_index_finish;
+ }
+ pCur->ix = (u16)idx;
+ rc = accessPayload(pCur, 0, nCell, (unsigned char*)pCellKey, 0);
+ memset(((u8*)pCellKey)+nCell,0,nOverrun); /* Fix uninit warnings */
+ pCur->curFlags &= ~BTCF_ValidOvfl;
+ if( rc ){
+ sqlite3_free(pCellKey);
+ goto moveto_index_finish;
+ }
+ c = sqlite3VdbeRecordCompare(nCell, pCellKey, pIdxKey);
+ sqlite3_free(pCellKey);
+ }
+ assert(
+ (pIdxKey->errCode!=SQLITE_CORRUPT || c==0)
+ && (pIdxKey->errCode!=SQLITE_NOMEM || pCur->pBtree->db->mallocFailed)
+ );
+ if( c<0 ){
+ lwr = idx+1;
+ }else if( c>0 ){
+ upr = idx-1;
+ }else{
+ assert( c==0 );
+ *pRes = 0;
+ rc = SQLITE_OK;
+ pCur->ix = (u16)idx;
+ if( pIdxKey->errCode ) rc = SQLITE_CORRUPT_BKPT;
+ goto moveto_index_finish;
+ }
+ if( lwr>upr ) break;
+ assert( lwr+upr>=0 );
+ idx = (lwr+upr)>>1; /* idx = (lwr+upr)/2 */
+ }
+ assert( lwr==upr+1 || (pPage->intKey && !pPage->leaf) );
+ assert( pPage->isInit );
+ if( pPage->leaf ){
+ assert( pCur->ix<pCur->pPage->nCell || CORRUPT_DB );
+ pCur->ix = (u16)idx;
+ *pRes = c;
+ rc = SQLITE_OK;
+ goto moveto_index_finish;
+ }
+ if( lwr>=pPage->nCell ){
+ chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]);
+ }else{
+ chldPg = get4byte(findCell(pPage, lwr));
+ }
+
+ /* This block is similar to an in-lined version of:
+ **
+ ** pCur->ix = (u16)lwr;
+ ** rc = moveToChild(pCur, chldPg);
+ ** if( rc ) break;
+ */
+ pCur->info.nSize = 0;
+ pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl);
+ if( pCur->iPage>=(BTCURSOR_MAX_DEPTH-1) ){
+ return SQLITE_CORRUPT_BKPT;
+ }
+ pCur->aiIdx[pCur->iPage] = (u16)lwr;
+ pCur->apPage[pCur->iPage] = pCur->pPage;
+ pCur->ix = 0;
+ pCur->iPage++;
+ rc = getAndInitPage(pCur->pBt, chldPg, &pCur->pPage, pCur->curPagerFlags);
+ if( rc==SQLITE_OK
+ && (pCur->pPage->nCell<1 || pCur->pPage->intKey!=pCur->curIntKey)
+ ){
+ releasePage(pCur->pPage);
+ rc = SQLITE_CORRUPT_PGNO(chldPg);
+ }
+ if( rc ){
+ pCur->pPage = pCur->apPage[--pCur->iPage];
+ break;
+ }
+ /*
+ ***** End of in-lined moveToChild() call */
+ }
+moveto_index_finish:
pCur->info.nSize = 0;
assert( (pCur->curFlags & BTCF_ValidOvfl)==0 );
return rc;
@@ -5669,7 +6122,7 @@ int sqlite3BtreeEof(BtCursor *pCur){
/*
** Return an estimate for the number of rows in the table that pCur is
-** pointing to. Return a negative number if no estimate is currently
+** pointing to. Return a negative number if no estimate is currently
** available.
*/
i64 sqlite3BtreeRowCountEst(BtCursor *pCur){
@@ -5693,7 +6146,7 @@ i64 sqlite3BtreeRowCountEst(BtCursor *pCur){
}
/*
-** Advance the cursor to the next entry in the database.
+** Advance the cursor to the next entry in the database.
** Return value:
**
** SQLITE_OK success
@@ -5735,27 +6188,11 @@ static SQLITE_NOINLINE int btreeNext(BtCursor *pCur){
pPage = pCur->pPage;
idx = ++pCur->ix;
+ if( sqlite3FaultSim(412) ) pPage->isInit = 0;
if( !pPage->isInit ){
- /* The only known way for this to happen is for there to be a
- ** recursive SQL function that does a DELETE operation as part of a
- ** SELECT which deletes content out from under an active cursor
- ** in a corrupt database file where the table being DELETE-ed from
- ** has pages in common with the table being queried. See TH3
- ** module cov1/btree78.test testcase 220 (2018-06-08) for an
- ** example. */
return SQLITE_CORRUPT_BKPT;
}
- /* If the database file is corrupt, it is possible for the value of idx
- ** to be invalid here. This can only occur if a second cursor modifies
- ** the page while cursor pCur is holding a reference to it. Which can
- ** only happen if the database is corrupt in such a way as to link the
- ** page into more than one b-tree structure.
- **
- ** Update 2019-12-23: appears to long longer be possible after the
- ** addition of anotherValidCursor() condition on balance_deeper(). */
- harmless( idx>pPage->nCell );
-
if( idx>=pPage->nCell ){
if( !pPage->leaf ){
rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8]));
@@ -5898,7 +6335,7 @@ int sqlite3BtreePrevious(BtCursor *pCur, int flags){
** SQLITE_OK is returned on success. Any other return value indicates
** an error. *ppPage is set to NULL in the event of an error.
**
-** If the "nearby" parameter is not 0, then an effort is made to
+** If the "nearby" parameter is not 0, then an effort is made to
** locate a page close to the page number "nearby". This can be used in an
** attempt to keep related pages close to each other in the database file,
** which in turn can make database access faster.
@@ -5928,8 +6365,8 @@ static int allocateBtreePage(
assert( eMode==BTALLOC_ANY || (nearby>0 && IfNotOmitAV(pBt->autoVacuum)) );
pPage1 = pBt->pPage1;
mxPage = btreePagecount(pBt);
- /* EVIDENCE-OF: R-05119-02637 The 4-byte big-endian integer at offset 36
- ** stores stores the total number of pages on the freelist. */
+ /* EVIDENCE-OF: R-21003-45125 The 4-byte big-endian integer at offset 36
+ ** stores the total number of pages on the freelist. */
n = get4byte(&pPage1->aData[36]);
testcase( n==mxPage-1 );
if( n>=mxPage ){
@@ -5940,7 +6377,7 @@ static int allocateBtreePage(
Pgno iTrunk;
u8 searchList = 0; /* If the free-list must be searched for 'nearby' */
u32 nSearch = 0; /* Count of the number of search attempts */
-
+
/* If eMode==BTALLOC_EXACT and a query of the pointer-map
** shows that the page 'nearby' is somewhere on the free-list, then
** the entire-list will be searched for that page.
@@ -6003,8 +6440,8 @@ static int allocateBtreePage(
** is the number of leaf page pointers to follow. */
k = get4byte(&pTrunk->aData[4]);
if( k==0 && !searchList ){
- /* The trunk has no leaves and the list is not being searched.
- ** So extract the trunk page itself and use it as the newly
+ /* The trunk has no leaves and the list is not being searched.
+ ** So extract the trunk page itself and use it as the newly
** allocated page */
assert( pPrevTrunk==0 );
rc = sqlite3PagerWrite(pTrunk->pDbPage);
@@ -6015,14 +6452,14 @@ static int allocateBtreePage(
memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4);
*ppPage = pTrunk;
pTrunk = 0;
- TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1));
+ TRACE(("ALLOCATE: %u trunk - %u free pages left\n", *pPgno, n-1));
}else if( k>(u32)(pBt->usableSize/4 - 2) ){
/* Value of k is out of range. Database corruption */
rc = SQLITE_CORRUPT_PGNO(iTrunk);
goto end_allocate_page;
#ifndef SQLITE_OMIT_AUTOVACUUM
- }else if( searchList
- && (nearby==iTrunk || (iTrunk<nearby && eMode==BTALLOC_LE))
+ }else if( searchList
+ && (nearby==iTrunk || (iTrunk<nearby && eMode==BTALLOC_LE))
){
/* The list is being searched and this trunk page is the page
** to allocate, regardless of whether it has leaves.
@@ -6045,13 +6482,13 @@ static int allocateBtreePage(
memcpy(&pPrevTrunk->aData[0], &pTrunk->aData[0], 4);
}
}else{
- /* The trunk page is required by the caller but it contains
+ /* The trunk page is required by the caller but it contains
** pointers to free-list leaves. The first leaf becomes a trunk
** page in this case.
*/
MemPage *pNewTrunk;
Pgno iNewTrunk = get4byte(&pTrunk->aData[8]);
- if( iNewTrunk>mxPage ){
+ if( iNewTrunk>mxPage ){
rc = SQLITE_CORRUPT_PGNO(iTrunk);
goto end_allocate_page;
}
@@ -6081,7 +6518,7 @@ static int allocateBtreePage(
}
}
pTrunk = 0;
- TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1));
+ TRACE(("ALLOCATE: %u trunk - %u free pages left\n", *pPgno, n-1));
#endif
}else if( k>0 ){
/* Extract a leaf from the trunk */
@@ -6116,18 +6553,18 @@ static int allocateBtreePage(
iPage = get4byte(&aData[8+closest*4]);
testcase( iPage==mxPage );
- if( iPage>mxPage ){
+ if( iPage>mxPage || iPage<2 ){
rc = SQLITE_CORRUPT_PGNO(iTrunk);
goto end_allocate_page;
}
testcase( iPage==mxPage );
- if( !searchList
- || (iPage==nearby || (iPage<nearby && eMode==BTALLOC_LE))
+ if( !searchList
+ || (iPage==nearby || (iPage<nearby && eMode==BTALLOC_LE))
){
int noContent;
*pPgno = iPage;
- TRACE(("ALLOCATE: %d was leaf %d of %d on trunk %d"
- ": %d more free pages\n",
+ TRACE(("ALLOCATE: %u was leaf %u of %u on trunk %u"
+ ": %u more free pages\n",
*pPgno, closest+1, k, pTrunk->pgno, n-1));
rc = sqlite3PagerWrite(pTrunk->pDbPage);
if( rc ) goto end_allocate_page;
@@ -6163,7 +6600,7 @@ static int allocateBtreePage(
** not set the no-content flag. This causes the pager to load and journal
** the current page content before overwriting it.
**
- ** Note that the pager will not actually attempt to load or journal
+ ** Note that the pager will not actually attempt to load or journal
** content for any page that really does lie past the end of the database
** file on disk. So the effects of disabling the no-content optimization
** here are confined to those pages that lie between the end of the
@@ -6183,7 +6620,7 @@ static int allocateBtreePage(
** becomes a new pointer-map page, the second is used by the caller.
*/
MemPage *pPg = 0;
- TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", pBt->nPage));
+ TRACE(("ALLOCATE: %u from end of file (pointer-map page)\n", pBt->nPage));
assert( pBt->nPage!=PENDING_BYTE_PAGE(pBt) );
rc = btreeGetUnusedPage(pBt, pBt->nPage, &pPg, bNoContent);
if( rc==SQLITE_OK ){
@@ -6206,7 +6643,7 @@ static int allocateBtreePage(
releasePage(*ppPage);
*ppPage = 0;
}
- TRACE(("ALLOCATE: %d from end of file\n", *pPgno));
+ TRACE(("ALLOCATE: %u from end of file\n", *pPgno));
}
assert( CORRUPT_DB || *pPgno!=PENDING_BYTE_PAGE(pBt) );
@@ -6220,12 +6657,12 @@ end_allocate_page:
}
/*
-** This function is used to add page iPage to the database file free-list.
+** This function is used to add page iPage to the database file free-list.
** It is assumed that the page is not already a part of the free-list.
**
** The value passed as the second argument to this function is optional.
-** If the caller happens to have a pointer to the MemPage object
-** corresponding to page iPage handy, it may pass it as the second value.
+** If the caller happens to have a pointer to the MemPage object
+** corresponding to page iPage handy, it may pass it as the second value.
** Otherwise, it may pass NULL.
**
** If a pointer to a MemPage object is passed as the second argument,
@@ -6233,7 +6670,7 @@ end_allocate_page:
*/
static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){
MemPage *pTrunk = 0; /* Free-list trunk page */
- Pgno iTrunk = 0; /* Page number of free-list trunk page */
+ Pgno iTrunk = 0; /* Page number of free-list trunk page */
MemPage *pPage1 = pBt->pPage1; /* Local reference to page 1 */
MemPage *pPage; /* Page being freed. May be NULL. */
int rc; /* Return Code */
@@ -6274,7 +6711,7 @@ static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){
/* If the database supports auto-vacuum, write an entry in the pointer-map
** to indicate that the page is free.
*/
- if( ISAUTOVACUUM ){
+ if( ISAUTOVACUUM(pBt) ){
ptrmapPut(pBt, iPage, PTRMAP_FREEPAGE, 0, &rc);
if( rc ) goto freepage_out;
}
@@ -6290,6 +6727,10 @@ static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){
u32 nLeaf; /* Initial number of leaf cells on trunk page */
iTrunk = get4byte(&pPage1->aData[32]);
+ if( iTrunk>btreePagecount(pBt) ){
+ rc = SQLITE_CORRUPT_BKPT;
+ goto freepage_out;
+ }
rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0);
if( rc!=SQLITE_OK ){
goto freepage_out;
@@ -6330,14 +6771,14 @@ static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){
}
rc = btreeSetHasContent(pBt, iPage);
}
- TRACE(("FREE-PAGE: %d leaf on trunk page %d\n",pPage->pgno,pTrunk->pgno));
+ TRACE(("FREE-PAGE: %u leaf on trunk page %u\n",pPage->pgno,pTrunk->pgno));
goto freepage_out;
}
}
/* If control flows to this point, then it was not possible to add the
** the page being freed as a leaf page of the first trunk in the free-list.
- ** Possibly because the free-list is empty, or possibly because the
+ ** Possibly because the free-list is empty, or possibly because the
** first trunk in the free-list is full. Either way, the page being freed
** will become the new first trunk page in the free-list.
*/
@@ -6351,7 +6792,7 @@ static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){
put4byte(pPage->aData, iTrunk);
put4byte(&pPage->aData[4], 0);
put4byte(&pPage1->aData[32], iPage);
- TRACE(("FREE-PAGE: %d new trunk page replacing %d\n", pPage->pgno, iTrunk));
+ TRACE(("FREE-PAGE: %u new trunk page replacing %u\n", pPage->pgno, iTrunk));
freepage_out:
if( pPage ){
@@ -6368,10 +6809,9 @@ static void freePage(MemPage *pPage, int *pRC){
}
/*
-** Free any overflow pages associated with the given Cell. Store
-** size information about the cell in pInfo.
+** Free the overflow pages associated with the given Cell.
*/
-static int clearCell(
+static SQLITE_NOINLINE int clearCellOverflow(
MemPage *pPage, /* The page that contains the Cell */
unsigned char *pCell, /* First byte of the Cell */
CellInfo *pInfo /* Size information about the cell */
@@ -6383,10 +6823,7 @@ static int clearCell(
u32 ovflPageSize;
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- pPage->xParseCell(pPage, pCell, pInfo);
- if( pInfo->nLocal==pInfo->nPayload ){
- return SQLITE_OK; /* No overflow pages. Return without doing anything */
- }
+ assert( pInfo->nLocal!=pInfo->nPayload );
testcase( pCell + pInfo->nSize == pPage->aDataEnd );
testcase( pCell + (pInfo->nSize-1) == pPage->aDataEnd );
if( pCell + pInfo->nSize > pPage->aDataEnd ){
@@ -6398,15 +6835,15 @@ static int clearCell(
assert( pBt->usableSize > 4 );
ovflPageSize = pBt->usableSize - 4;
nOvfl = (pInfo->nPayload - pInfo->nLocal + ovflPageSize - 1)/ovflPageSize;
- assert( nOvfl>0 ||
+ assert( nOvfl>0 ||
(CORRUPT_DB && (pInfo->nPayload + ovflPageSize)<ovflPageSize)
);
while( nOvfl-- ){
Pgno iNext = 0;
MemPage *pOvfl = 0;
if( ovflPgno<2 || ovflPgno>btreePagecount(pBt) ){
- /* 0 is not a legal page number and page 1 cannot be an
- ** overflow page. Therefore if ovflPgno<2 or past the end of the
+ /* 0 is not a legal page number and page 1 cannot be an
+ ** overflow page. Therefore if ovflPgno<2 or past the end of the
** file the database must be corrupt. */
return SQLITE_CORRUPT_BKPT;
}
@@ -6418,11 +6855,11 @@ static int clearCell(
if( ( pOvfl || ((pOvfl = btreePageLookup(pBt, ovflPgno))!=0) )
&& sqlite3PagerPageRefcount(pOvfl->pDbPage)!=1
){
- /* There is no reason any cursor should have an outstanding reference
+ /* There is no reason any cursor should have an outstanding reference
** to an overflow page belonging to a cell that is being deleted/updated.
- ** So if there exists more than one reference to this page, then it
- ** must not really be an overflow page and the database must be corrupt.
- ** It is helpful to detect this before calling freePage2(), as
+ ** So if there exists more than one reference to this page, then it
+ ** must not really be an overflow page and the database must be corrupt.
+ ** It is helpful to detect this before calling freePage2(), as
** freePage2() may zero the page contents if secure-delete mode is
** enabled. If this 'overflow' page happens to be a page that the
** caller is iterating through or using in some other way, this
@@ -6442,6 +6879,21 @@ static int clearCell(
return SQLITE_OK;
}
+/* Call xParseCell to compute the size of a cell. If the cell contains
+** overflow, then invoke cellClearOverflow to clear out that overflow.
+** Store the result code (SQLITE_OK or some error code) in rc.
+**
+** Implemented as macro to force inlining for performance.
+*/
+#define BTREE_CLEAR_CELL(rc, pPage, pCell, sInfo) \
+ pPage->xParseCell(pPage, pCell, &sInfo); \
+ if( sInfo.nLocal!=sInfo.nPayload ){ \
+ rc = clearCellOverflow(pPage, pCell, &sInfo); \
+ }else{ \
+ rc = SQLITE_OK; \
+ }
+
+
/*
** Create the byte sequence used to represent a cell on page pPage
** and write that byte sequence into pCell[]. Overflow pages are
@@ -6493,7 +6945,7 @@ static int fillInCell(
pSrc = pX->pKey;
nHeader += putVarint32(&pCell[nHeader], nPayload);
}
-
+
/* Fill in the payload */
pPayload = &pCell[nHeader];
if( nPayload<=pPage->maxLocal ){
@@ -6584,8 +7036,8 @@ static int fillInCell(
if( pBt->autoVacuum ){
do{
pgnoOvfl++;
- } while(
- PTRMAP_ISPAGE(pBt, pgnoOvfl) || pgnoOvfl==PENDING_BYTE_PAGE(pBt)
+ } while(
+ PTRMAP_ISPAGE(pBt, pgnoOvfl) || pgnoOvfl==PENDING_BYTE_PAGE(pBt)
);
}
#endif
@@ -6593,9 +7045,9 @@ static int fillInCell(
#ifndef SQLITE_OMIT_AUTOVACUUM
/* If the database supports auto-vacuum, and the second or subsequent
** overflow page is being allocated, add an entry to the pointer-map
- ** for that page now.
+ ** for that page now.
**
- ** If this is the first overflow page, then write a partial entry
+ ** If this is the first overflow page, then write a partial entry
** to the pointer-map. If we write nothing to this pointer-map slot,
** then the optimistic overflow chain processing in clearCell()
** may misinterpret the uninitialized values and delete the
@@ -6652,16 +7104,18 @@ static void dropCell(MemPage *pPage, int idx, int sz, int *pRC){
int hdr; /* Beginning of the header. 0 most pages. 100 page 1 */
if( *pRC ) return;
- assert( idx>=0 && idx<pPage->nCell );
+ assert( idx>=0 );
+ assert( idx<pPage->nCell );
assert( CORRUPT_DB || sz==cellSize(pPage, idx) );
assert( sqlite3PagerIswriteable(pPage->pDbPage) );
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
assert( pPage->nFree>=0 );
data = pPage->aData;
ptr = &pPage->aCellIdx[2*idx];
+ assert( pPage->pBt->usableSize > (u32)(ptr-data) );
pc = get2byte(ptr);
hdr = pPage->hdrOffset;
- testcase( pc==get2byte(&data[hdr+5]) );
+ testcase( pc==(u32)get2byte(&data[hdr+5]) );
testcase( pc+sz==pPage->pBt->usableSize );
if( pc+sz > pPage->pBt->usableSize ){
*pRC = SQLITE_CORRUPT_BKPT;
@@ -6694,27 +7148,31 @@ static void dropCell(MemPage *pPage, int idx, int sz, int *pRC){
** will not fit, then make a copy of the cell content into pTemp if
** pTemp is not null. Regardless of pTemp, allocate a new entry
** in pPage->apOvfl[] and make it point to the cell content (either
-** in pTemp or the original pCell) and also record its index.
-** Allocating a new entry in pPage->aCell[] implies that
+** in pTemp or the original pCell) and also record its index.
+** Allocating a new entry in pPage->aCell[] implies that
** pPage->nOverflow is incremented.
**
-** *pRC must be SQLITE_OK when this routine is called.
+** The insertCellFast() routine below works exactly the same as
+** insertCell() except that it lacks the pTemp and iChild parameters
+** which are assumed zero. Other than that, the two routines are the
+** same.
+**
+** Fixes or enhancements to this routine should be reflected in
+** insertCellFast()!
*/
-static void insertCell(
+static int insertCell(
MemPage *pPage, /* Page into which we are copying */
int i, /* New cell becomes the i-th cell of the page */
u8 *pCell, /* Content of the new cell */
int sz, /* Bytes of content in pCell */
u8 *pTemp, /* Temp storage space for pCell, if needed */
- Pgno iChild, /* If non-zero, replace first 4 bytes with this value */
- int *pRC /* Read and write return code from here */
+ Pgno iChild /* If non-zero, replace first 4 bytes with this value */
){
int idx = 0; /* Where to write new cell content in data[] */
int j; /* Loop counter */
u8 *data; /* The content of the whole page */
u8 *pIns; /* The point in pPage->aCellIdx[] where no cell inserted */
- assert( *pRC==SQLITE_OK );
assert( i>=0 && i<=pPage->nCell+pPage->nOverflow );
assert( MX_CELL(pPage->pBt)<=10921 );
assert( pPage->nCell<=MX_CELL(pPage->pBt) || CORRUPT_DB );
@@ -6723,14 +7181,103 @@ static void insertCell(
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
assert( sz==pPage->xCellSize(pPage, pCell) || CORRUPT_DB );
assert( pPage->nFree>=0 );
+ assert( iChild>0 );
if( pPage->nOverflow || sz+2>pPage->nFree ){
if( pTemp ){
memcpy(pTemp, pCell, sz);
pCell = pTemp;
}
- if( iChild ){
- put4byte(pCell, iChild);
+ put4byte(pCell, iChild);
+ j = pPage->nOverflow++;
+ /* Comparison against ArraySize-1 since we hold back one extra slot
+ ** as a contingency. In other words, never need more than 3 overflow
+ ** slots but 4 are allocated, just to be safe. */
+ assert( j < ArraySize(pPage->apOvfl)-1 );
+ pPage->apOvfl[j] = pCell;
+ pPage->aiOvfl[j] = (u16)i;
+
+ /* When multiple overflows occur, they are always sequential and in
+ ** sorted order. This invariants arise because multiple overflows can
+ ** only occur when inserting divider cells into the parent page during
+ ** balancing, and the dividers are adjacent and sorted.
+ */
+ assert( j==0 || pPage->aiOvfl[j-1]<(u16)i ); /* Overflows in sorted order */
+ assert( j==0 || i==pPage->aiOvfl[j-1]+1 ); /* Overflows are sequential */
+ }else{
+ int rc = sqlite3PagerWrite(pPage->pDbPage);
+ if( NEVER(rc!=SQLITE_OK) ){
+ return rc;
+ }
+ assert( sqlite3PagerIswriteable(pPage->pDbPage) );
+ data = pPage->aData;
+ assert( &data[pPage->cellOffset]==pPage->aCellIdx );
+ rc = allocateSpace(pPage, sz, &idx);
+ if( rc ){ return rc; }
+ /* The allocateSpace() routine guarantees the following properties
+ ** if it returns successfully */
+ assert( idx >= 0 );
+ assert( idx >= pPage->cellOffset+2*pPage->nCell+2 || CORRUPT_DB );
+ assert( idx+sz <= (int)pPage->pBt->usableSize );
+ pPage->nFree -= (u16)(2 + sz);
+ /* In a corrupt database where an entry in the cell index section of
+ ** a btree page has a value of 3 or less, the pCell value might point
+ ** as many as 4 bytes in front of the start of the aData buffer for
+ ** the source page. Make sure this does not cause problems by not
+ ** reading the first 4 bytes */
+ memcpy(&data[idx+4], pCell+4, sz-4);
+ put4byte(&data[idx], iChild);
+ pIns = pPage->aCellIdx + i*2;
+ memmove(pIns+2, pIns, 2*(pPage->nCell - i));
+ put2byte(pIns, idx);
+ pPage->nCell++;
+ /* increment the cell count */
+ if( (++data[pPage->hdrOffset+4])==0 ) data[pPage->hdrOffset+3]++;
+ assert( get2byte(&data[pPage->hdrOffset+3])==pPage->nCell || CORRUPT_DB );
+#ifndef SQLITE_OMIT_AUTOVACUUM
+ if( pPage->pBt->autoVacuum ){
+ int rc2 = SQLITE_OK;
+ /* The cell may contain a pointer to an overflow page. If so, write
+ ** the entry for the overflow page into the pointer map.
+ */
+ ptrmapPutOvflPtr(pPage, pPage, pCell, &rc2);
+ if( rc2 ) return rc2;
}
+#endif
+ }
+ return SQLITE_OK;
+}
+
+/*
+** This variant of insertCell() assumes that the pTemp and iChild
+** parameters are both zero. Use this variant in sqlite3BtreeInsert()
+** for performance improvement, and also so that this variant is only
+** called from that one place, and is thus inlined, and thus runs must
+** faster.
+**
+** Fixes or enhancements to this routine should be reflected into
+** the insertCell() routine.
+*/
+static int insertCellFast(
+ MemPage *pPage, /* Page into which we are copying */
+ int i, /* New cell becomes the i-th cell of the page */
+ u8 *pCell, /* Content of the new cell */
+ int sz /* Bytes of content in pCell */
+){
+ int idx = 0; /* Where to write new cell content in data[] */
+ int j; /* Loop counter */
+ u8 *data; /* The content of the whole page */
+ u8 *pIns; /* The point in pPage->aCellIdx[] where no cell inserted */
+
+ assert( i>=0 && i<=pPage->nCell+pPage->nOverflow );
+ assert( MX_CELL(pPage->pBt)<=10921 );
+ assert( pPage->nCell<=MX_CELL(pPage->pBt) || CORRUPT_DB );
+ assert( pPage->nOverflow<=ArraySize(pPage->apOvfl) );
+ assert( ArraySize(pPage->apOvfl)==ArraySize(pPage->aiOvfl) );
+ assert( sqlite3_mutex_held(pPage->pBt->mutex) );
+ assert( sz==pPage->xCellSize(pPage, pCell) || CORRUPT_DB );
+ assert( pPage->nFree>=0 );
+ assert( pPage->nOverflow==0 );
+ if( sz+2>pPage->nFree ){
j = pPage->nOverflow++;
/* Comparison against ArraySize-1 since we hold back one extra slot
** as a contingency. In other words, never need more than 3 overflow
@@ -6749,31 +7296,20 @@ static void insertCell(
}else{
int rc = sqlite3PagerWrite(pPage->pDbPage);
if( rc!=SQLITE_OK ){
- *pRC = rc;
- return;
+ return rc;
}
assert( sqlite3PagerIswriteable(pPage->pDbPage) );
data = pPage->aData;
assert( &data[pPage->cellOffset]==pPage->aCellIdx );
rc = allocateSpace(pPage, sz, &idx);
- if( rc ){ *pRC = rc; return; }
+ if( rc ){ return rc; }
/* The allocateSpace() routine guarantees the following properties
** if it returns successfully */
assert( idx >= 0 );
assert( idx >= pPage->cellOffset+2*pPage->nCell+2 || CORRUPT_DB );
assert( idx+sz <= (int)pPage->pBt->usableSize );
pPage->nFree -= (u16)(2 + sz);
- if( iChild ){
- /* In a corrupt database where an entry in the cell index section of
- ** a btree page has a value of 3 or less, the pCell value might point
- ** as many as 4 bytes in front of the start of the aData buffer for
- ** the source page. Make sure this does not cause problems by not
- ** reading the first 4 bytes */
- memcpy(&data[idx+4], pCell+4, sz-4);
- put4byte(&data[idx], iChild);
- }else{
- memcpy(&data[idx], pCell, sz);
- }
+ memcpy(&data[idx], pCell, sz);
pIns = pPage->aCellIdx + i*2;
memmove(pIns+2, pIns, 2*(pPage->nCell - i));
put2byte(pIns, idx);
@@ -6783,13 +7319,16 @@ static void insertCell(
assert( get2byte(&data[pPage->hdrOffset+3])==pPage->nCell || CORRUPT_DB );
#ifndef SQLITE_OMIT_AUTOVACUUM
if( pPage->pBt->autoVacuum ){
+ int rc2 = SQLITE_OK;
/* The cell may contain a pointer to an overflow page. If so, write
** the entry for the overflow page into the pointer map.
*/
- ptrmapPutOvflPtr(pPage, pPage, pCell, pRC);
+ ptrmapPutOvflPtr(pPage, pPage, pCell, &rc2);
+ if( rc2 ) return rc2;
}
#endif
}
+ return SQLITE_OK;
}
/*
@@ -6890,14 +7429,16 @@ struct CellArray {
** computed.
*/
static void populateCellCache(CellArray *p, int idx, int N){
+ MemPage *pRef = p->pRef;
+ u16 *szCell = p->szCell;
assert( idx>=0 && idx+N<=p->nCell );
while( N>0 ){
assert( p->apCell[idx]!=0 );
- if( p->szCell[idx]==0 ){
- p->szCell[idx] = p->pRef->xCellSize(p->pRef, p->apCell[idx]);
+ if( szCell[idx]==0 ){
+ szCell[idx] = pRef->xCellSize(pRef, p->apCell[idx]);
}else{
assert( CORRUPT_DB ||
- p->szCell[idx]==p->pRef->xCellSize(p->pRef, p->apCell[idx]) );
+ szCell[idx]==pRef->xCellSize(pRef, p->apCell[idx]) );
}
idx++;
N--;
@@ -6920,16 +7461,16 @@ static u16 cachedCellSize(CellArray *p, int N){
}
/*
-** Array apCell[] contains pointers to nCell b-tree page cells. The
+** Array apCell[] contains pointers to nCell b-tree page cells. The
** szCell[] array contains the size in bytes of each cell. This function
** replaces the current contents of page pPg with the contents of the cell
** array.
**
** Some of the cells in apCell[] may currently be stored in pPg. This
-** function works around problems caused by this by making a copy of any
+** function works around problems caused by this by making a copy of any
** such cells before overwriting the page data.
**
-** The MemPage.nFree field is invalidated by this function. It is the
+** The MemPage.nFree field is invalidated by this function. It is the
** responsibility of the caller to set it correctly.
*/
static int rebuildPage(
@@ -6951,12 +7492,13 @@ static int rebuildPage(
int k; /* Current slot in pCArray->apEnd[] */
u8 *pSrcEnd; /* Current pCArray->apEnd[k] value */
+ assert( nCell>0 );
assert( i<iEnd );
j = get2byte(&aData[hdr+5]);
- if( NEVER(j>(u32)usableSize) ){ j = 0; }
+ if( j>(u32)usableSize ){ j = 0; }
memcpy(&pTmp[j], &aData[j], usableSize - j);
- for(k=0; pCArray->ixNx[k]<=i && ALWAYS(k<NB*2); k++){}
+ for(k=0; ALWAYS(k<NB*2) && pCArray->ixNx[k]<=i; k++){}
pSrcEnd = pCArray->apEnd[k];
pData = pEnd;
@@ -6964,7 +7506,7 @@ static int rebuildPage(
u8 *pCell = pCArray->apCell[i];
u16 sz = pCArray->szCell[i];
assert( sz>0 );
- if( SQLITE_WITHIN(pCell,aData,pEnd) ){
+ if( SQLITE_WITHIN(pCell,aData+j,pEnd) ){
if( ((uptr)(pCell+sz))>(uptr)pEnd ) return SQLITE_CORRUPT_BKPT;
pCell = &pTmp[pCell - aData];
}else if( (uptr)(pCell+sz)>(uptr)pSrcEnd
@@ -6977,9 +7519,8 @@ static int rebuildPage(
put2byte(pCellptr, (pData - aData));
pCellptr += 2;
if( pData < pCellptr ) return SQLITE_CORRUPT_BKPT;
- memcpy(pData, pCell, sz);
+ memmove(pData, pCell, sz);
assert( sz==pPg->xCellSize(pPg, pCell) || CORRUPT_DB );
- testcase( sz!=pPg->xCellSize(pPg,pCell) )
i++;
if( i>=iEnd ) break;
if( pCArray->ixNx[k]<=i ){
@@ -7012,7 +7553,7 @@ static int rebuildPage(
** cell in the array. It is the responsibility of the caller to ensure
** that it is safe to overwrite this part of the cell-pointer array.
**
-** When this function is called, *ppData points to the start of the
+** When this function is called, *ppData points to the start of the
** content area on page pPg. If the size of the content area is extended,
** *ppData is updated to point to the new start of the content area
** before returning.
@@ -7020,7 +7561,7 @@ static int rebuildPage(
** Finally, argument pBegin points to the byte immediately following the
** end of the space required by this page for the cell-pointer area (for
** all cells - not just those inserted by the current call). If the content
-** area must be extended to before this point in order to accomodate all
+** area must be extended to before this point in order to accommodate all
** cells in apCell[], then the cells do not fit and non-zero is returned.
*/
static int pageInsertArray(
@@ -7040,7 +7581,7 @@ static int pageInsertArray(
u8 *pEnd; /* Maximum extent of cell data */
assert( CORRUPT_DB || pPg->hdrOffset==0 ); /* Never called on page 1 */
if( iEnd<=iFirst ) return 0;
- for(k=0; pCArray->ixNx[k]<=i && ALWAYS(k<NB*2); k++){}
+ for(k=0; ALWAYS(k<NB*2) && pCArray->ixNx[k]<=i ; k++){}
pEnd = pCArray->apEnd[k];
while( 1 /*Exit by break*/ ){
int sz, rc;
@@ -7098,37 +7639,50 @@ static int pageFreeArray(
u8 * const pEnd = &aData[pPg->pBt->usableSize];
u8 * const pStart = &aData[pPg->hdrOffset + 8 + pPg->childPtrSize];
int nRet = 0;
- int i;
+ int i, j;
int iEnd = iFirst + nCell;
- u8 *pFree = 0;
- int szFree = 0;
+ int nFree = 0;
+ int aOfst[10];
+ int aAfter[10];
for(i=iFirst; i<iEnd; i++){
u8 *pCell = pCArray->apCell[i];
if( SQLITE_WITHIN(pCell, pStart, pEnd) ){
int sz;
+ int iAfter;
+ int iOfst;
/* No need to use cachedCellSize() here. The sizes of all cells that
** are to be freed have already been computing while deciding which
** cells need freeing */
sz = pCArray->szCell[i]; assert( sz>0 );
- if( pFree!=(pCell + sz) ){
- if( pFree ){
- assert( pFree>aData && (pFree - aData)<65536 );
- freeSpace(pPg, (u16)(pFree - aData), szFree);
+ iOfst = (u16)(pCell - aData);
+ iAfter = iOfst+sz;
+ for(j=0; j<nFree; j++){
+ if( aOfst[j]==iAfter ){
+ aOfst[j] = iOfst;
+ break;
+ }else if( aAfter[j]==iOfst ){
+ aAfter[j] = iAfter;
+ break;
}
- pFree = pCell;
- szFree = sz;
- if( pFree+sz>pEnd ) return 0;
- }else{
- pFree = pCell;
- szFree += sz;
+ }
+ if( j>=nFree ){
+ if( nFree>=(int)(sizeof(aOfst)/sizeof(aOfst[0])) ){
+ for(j=0; j<nFree; j++){
+ freeSpace(pPg, aOfst[j], aAfter[j]-aOfst[j]);
+ }
+ nFree = 0;
+ }
+ aOfst[nFree] = iOfst;
+ aAfter[nFree] = iAfter;
+ if( &aData[iAfter]>pEnd ) return 0;
+ nFree++;
}
nRet++;
}
}
- if( pFree ){
- assert( pFree>aData && (pFree - aData)<65536 );
- freeSpace(pPg, (u16)(pFree - aData), szFree);
+ for(j=0; j<nFree; j++){
+ freeSpace(pPg, aOfst[j], aAfter[j]-aOfst[j]);
}
return nRet;
}
@@ -7181,8 +7735,9 @@ static int editPage(
nCell -= nTail;
}
- pData = &aData[get2byteNotZero(&aData[hdr+5])];
+ pData = &aData[get2byte(&aData[hdr+5])];
if( pData<pBegin ) goto editpage_fail;
+ if( NEVER(pData>pPg->aDataEnd) ) goto editpage_fail;
/* Add cells to the start of the page */
if( iNew<iOld ){
@@ -7244,6 +7799,7 @@ static int editPage(
return SQLITE_OK;
editpage_fail:
/* Unable to edit this page. Rebuild it from scratch instead. */
+ if( nNew<1 ) return SQLITE_CORRUPT_BKPT;
populateCellCache(pCArray, iNew, nNew);
return rebuildPage(pCArray, iNew, nNew, pPg);
}
@@ -7282,12 +7838,12 @@ static int balance_quick(MemPage *pParent, MemPage *pPage, u8 *pSpace){
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
assert( sqlite3PagerIswriteable(pParent->pDbPage) );
assert( pPage->nOverflow==1 );
-
+
if( pPage->nCell==0 ) return SQLITE_CORRUPT_BKPT; /* dbfuzz001.test */
assert( pPage->nFree>=0 );
assert( pParent->nFree>=0 );
- /* Allocate a new page. This page will become the right-sibling of
+ /* Allocate a new page. This page will become the right-sibling of
** pPage. Make the parent page writable, so that the new divider cell
** may be inserted. If both these operations are successful, proceed.
*/
@@ -7318,28 +7874,28 @@ static int balance_quick(MemPage *pParent, MemPage *pPage, u8 *pSpace){
pNew->nFree = pBt->usableSize - pNew->cellOffset - 2 - szCell;
/* If this is an auto-vacuum database, update the pointer map
- ** with entries for the new page, and any pointer from the
+ ** with entries for the new page, and any pointer from the
** cell on the page to an overflow page. If either of these
** operations fails, the return code is set, but the contents
- ** of the parent page are still manipulated by thh code below.
+ ** of the parent page are still manipulated by the code below.
** That is Ok, at this point the parent page is guaranteed to
** be marked as dirty. Returning an error code will cause a
** rollback, undoing any changes made to the parent page.
*/
- if( ISAUTOVACUUM ){
+ if( ISAUTOVACUUM(pBt) ){
ptrmapPut(pBt, pgnoNew, PTRMAP_BTREE, pParent->pgno, &rc);
if( szCell>pNew->minLocal ){
ptrmapPutOvflPtr(pNew, pNew, pCell, &rc);
}
}
-
+
/* Create a divider cell to insert into pParent. The divider cell
** consists of a 4-byte page number (the page number of pPage) and
** a variable length key value (which must be the same value as the
** largest key on pPage).
**
- ** To find the largest key value on pPage, first find the right-most
- ** cell on pPage. The first two fields of this cell are the
+ ** To find the largest key value on pPage, first find the right-most
+ ** cell on pPage. The first two fields of this cell are the
** record-length (a variable length integer at most 32-bits in size)
** and the key value (a variable length integer, may have any value).
** The first of the while(...) loops below skips over the record-length
@@ -7354,13 +7910,13 @@ static int balance_quick(MemPage *pParent, MemPage *pPage, u8 *pSpace){
/* Insert the new divider cell into pParent. */
if( rc==SQLITE_OK ){
- insertCell(pParent, pParent->nCell, pSpace, (int)(pOut-pSpace),
- 0, pPage->pgno, &rc);
+ rc = insertCell(pParent, pParent->nCell, pSpace, (int)(pOut-pSpace),
+ 0, pPage->pgno);
}
/* Set the right-child pointer of pParent to point to the new page. */
put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew);
-
+
/* Release the reference to the new page. */
releasePage(pNew);
}
@@ -7372,7 +7928,7 @@ static int balance_quick(MemPage *pParent, MemPage *pPage, u8 *pSpace){
#if 0
/*
** This function does not contribute anything to the operation of SQLite.
-** it is sometimes activated temporarily while debugging code responsible
+** it is sometimes activated temporarily while debugging code responsible
** for setting pointer-map entries.
*/
static int ptrmapCheckPages(MemPage **apPage, int nPage){
@@ -7387,7 +7943,7 @@ static int ptrmapCheckPages(MemPage **apPage, int nPage){
for(j=0; j<pPage->nCell; j++){
CellInfo info;
u8 *z;
-
+
z = findCell(pPage, j);
pPage->xParseCell(pPage, z, &info);
if( info.nLocal<info.nPayload ){
@@ -7412,7 +7968,7 @@ static int ptrmapCheckPages(MemPage **apPage, int nPage){
#endif
/*
-** This function is used to copy the contents of the b-tree node stored
+** This function is used to copy the contents of the b-tree node stored
** on page pFrom to page pTo. If page pFrom was not a leaf page, then
** the pointer-map entries for each child page are updated so that the
** parent page stored in the pointer map is page pTo. If pFrom contained
@@ -7420,11 +7976,11 @@ static int ptrmapCheckPages(MemPage **apPage, int nPage){
** map entries are also updated so that the parent page is page pTo.
**
** If pFrom is currently carrying any overflow cells (entries in the
-** MemPage.apOvfl[] array), they are not copied to pTo.
+** MemPage.apOvfl[] array), they are not copied to pTo.
**
** Before returning, page pTo is reinitialized using btreeInitPage().
**
-** The performance of this function is not critical. It is only used by
+** The performance of this function is not critical. It is only used by
** the balance_shallower() and balance_deeper() procedures, neither of
** which are called often under normal circumstances.
*/
@@ -7437,20 +7993,20 @@ static void copyNodeContent(MemPage *pFrom, MemPage *pTo, int *pRC){
int const iToHdr = ((pTo->pgno==1) ? 100 : 0);
int rc;
int iData;
-
-
+
+
assert( pFrom->isInit );
assert( pFrom->nFree>=iToHdr );
assert( get2byte(&aFrom[iFromHdr+5]) <= (int)pBt->usableSize );
-
+
/* Copy the b-tree node content from page pFrom to page pTo. */
iData = get2byte(&aFrom[iFromHdr+5]);
memcpy(&aTo[iData], &aFrom[iData], pBt->usableSize-iData);
memcpy(&aTo[iToHdr], &aFrom[iFromHdr], pFrom->cellOffset + 2*pFrom->nCell);
-
+
/* Reinitialize page pTo so that the contents of the MemPage structure
** match the new data. The initialization of pTo can actually fail under
- ** fairly obscure circumstances, even though it is a copy of initialized
+ ** fairly obscure circumstances, even though it is a copy of initialized
** page pFrom.
*/
pTo->isInit = 0;
@@ -7460,11 +8016,11 @@ static void copyNodeContent(MemPage *pFrom, MemPage *pTo, int *pRC){
*pRC = rc;
return;
}
-
+
/* If this is an auto-vacuum database, update the pointer-map entries
** for any b-tree or overflow pages that pTo now contains the pointers to.
*/
- if( ISAUTOVACUUM ){
+ if( ISAUTOVACUUM(pBt) ){
*pRC = setChildPtrmaps(pTo);
}
}
@@ -7475,13 +8031,13 @@ static void copyNodeContent(MemPage *pFrom, MemPage *pTo, int *pRC){
** (hereafter "the page") and up to 2 siblings so that all pages have about the
** same amount of free space. Usually a single sibling on either side of the
** page are used in the balancing, though both siblings might come from one
-** side if the page is the first or last child of its parent. If the page
+** side if the page is the first or last child of its parent. If the page
** has fewer than 2 siblings (something which can only happen if the page
** is a root page or a child of a root page) then all available siblings
** participate in the balancing.
**
-** The number of siblings of the page might be increased or decreased by
-** one or two in an effort to keep pages nearly full but not over full.
+** The number of siblings of the page might be increased or decreased by
+** one or two in an effort to keep pages nearly full but not over full.
**
** Note that when this routine is called, some of the cells on the page
** might not actually be stored in MemPage.aData[]. This can happen
@@ -7492,7 +8048,7 @@ static void copyNodeContent(MemPage *pFrom, MemPage *pTo, int *pRC){
** inserted into or removed from the parent page (pParent). Doing so
** may cause the parent page to become overfull or underfull. If this
** happens, it is the responsibility of the caller to invoke the correct
-** balancing routine to fix this problem (see the balance() routine).
+** balancing routine to fix this problem (see the balance() routine).
**
** If this routine fails for any reason, it might leave the database
** in a corrupted state. So if this routine fails, the database should
@@ -7507,7 +8063,7 @@ static void copyNodeContent(MemPage *pFrom, MemPage *pTo, int *pRC){
** of the page-size, the aOvflSpace[] buffer is guaranteed to be large
** enough for all overflow cells.
**
-** If aOvflSpace is set to a null pointer, this function returns
+** If aOvflSpace is set to a null pointer, this function returns
** SQLITE_NOMEM.
*/
static int balance_nonroot(
@@ -7542,19 +8098,16 @@ static int balance_nonroot(
Pgno pgno; /* Temp var to store a page number in */
u8 abDone[NB+2]; /* True after i'th new page is populated */
Pgno aPgno[NB+2]; /* Page numbers of new pages before shuffling */
- Pgno aPgOrder[NB+2]; /* Copy of aPgno[] used for sorting pages */
- u16 aPgFlags[NB+2]; /* flags field of new pages before shuffling */
- CellArray b; /* Parsed information on cells being balanced */
+ CellArray b; /* Parsed information on cells being balanced */
memset(abDone, 0, sizeof(abDone));
- b.nCell = 0;
- b.apCell = 0;
+ memset(&b, 0, sizeof(b));
pBt = pParent->pBt;
assert( sqlite3_mutex_held(pBt->mutex) );
assert( sqlite3PagerIswriteable(pParent->pDbPage) );
/* At this point pParent may have at most one overflow cell. And if
- ** this overflow cell is present, it must be the cell with
+ ** this overflow cell is present, it must be the cell with
** index iParentIdx. This scenario comes about when this function
** is called (indirectly) from sqlite3BtreeDelete().
*/
@@ -7566,11 +8119,11 @@ static int balance_nonroot(
}
assert( pParent->nFree>=0 );
- /* Find the sibling pages to balance. Also locate the cells in pParent
- ** that divide the siblings. An attempt is made to find NN siblings on
- ** either side of pPage. More siblings are taken from one side, however,
+ /* Find the sibling pages to balance. Also locate the cells in pParent
+ ** that divide the siblings. An attempt is made to find NN siblings on
+ ** either side of pPage. More siblings are taken from one side, however,
** if there are fewer than NN siblings on the other side. If pParent
- ** has NB or fewer children then all children of pParent are taken.
+ ** has NB or fewer children then all children of pParent are taken.
**
** This loop also drops the divider cells from the parent page. This
** way, the remainder of the function does not have to deal with any
@@ -7582,7 +8135,7 @@ static int balance_nonroot(
nxDiv = 0;
}else{
assert( bBulk==0 || bBulk==1 );
- if( iParentIdx==0 ){
+ if( iParentIdx==0 ){
nxDiv = 0;
}else if( iParentIdx==i ){
nxDiv = i-2+bBulk;
@@ -7599,7 +8152,9 @@ static int balance_nonroot(
}
pgno = get4byte(pRight);
while( 1 ){
- rc = getAndInitPage(pBt, pgno, &apOld[i], 0, 0);
+ if( rc==SQLITE_OK ){
+ rc = getAndInitPage(pBt, pgno, &apOld[i], 0);
+ }
if( rc ){
memset(apOld, 0, (i+1)*sizeof(MemPage*));
goto balance_cleanup;
@@ -7611,6 +8166,7 @@ static int balance_nonroot(
goto balance_cleanup;
}
}
+ nMaxCells += apOld[i]->nCell + ArraySize(pParent->apOvfl);
if( (i--)==0 ) break;
if( pParent->nOverflow && i+nxDiv==pParent->aiOvfl[0] ){
@@ -7628,7 +8184,7 @@ static int balance_nonroot(
** This is safe because dropping a cell only overwrites the first
** four bytes of it, and this function does not need the first
** four bytes of the divider cell. So the pointer is safe to use
- ** later on.
+ ** later on.
**
** But not if we are in secure-delete mode. In secure-delete mode,
** the dropCell() routine will overwrite the entire cell with zeroes.
@@ -7638,12 +8194,10 @@ static int balance_nonroot(
if( pBt->btsFlags & BTS_FAST_SECURE ){
int iOff;
+ /* If the following if() condition is not true, the db is corrupted.
+ ** The call to dropCell() below will detect this. */
iOff = SQLITE_PTR_TO_INT(apDiv[i]) - SQLITE_PTR_TO_INT(pParent->aData);
- if( (iOff+szNew[i])>(int)pBt->usableSize ){
- rc = SQLITE_CORRUPT_BKPT;
- memset(apOld, 0, (i+1)*sizeof(MemPage*));
- goto balance_cleanup;
- }else{
+ if( (iOff+szNew[i])<=(int)pBt->usableSize ){
memcpy(&aOvflSpace[iOff], apDiv[i], szNew[i]);
apDiv[i] = &aOvflSpace[apDiv[i]-pParent->aData];
}
@@ -7654,7 +8208,6 @@ static int balance_nonroot(
/* Make nMaxCells a multiple of 4 in order to preserve 8-byte
** alignment */
- nMaxCells = nOld*(MX_CELL(pBt) + ArraySize(pParent->apOvfl));
nMaxCells = (nMaxCells + 3)&~3;
/*
@@ -7771,7 +8324,7 @@ static int balance_nonroot(
b.szCell[b.nCell] = b.szCell[b.nCell] - leafCorrection;
if( !pOld->leaf ){
assert( leafCorrection==0 );
- assert( pOld->hdrOffset==0 );
+ assert( pOld->hdrOffset==0 || CORRUPT_DB );
/* The right pointer of the child page pOld becomes the left
** pointer of the divider cell */
memcpy(b.apCell[b.nCell], &pOld->aData[8], 4);
@@ -7794,7 +8347,7 @@ static int balance_nonroot(
** Figure out the number of pages needed to hold all b.nCell cells.
** Store this number in "k". Also compute szNew[] which is the total
** size of all cells on the i-th page and cntNew[] which is the index
- ** in b.apCell[] of the cell that divides page i from page i+1.
+ ** in b.apCell[] of the cell that divides page i from page i+1.
** cntNew[k] should equal b.nCell.
**
** Values computed by this block:
@@ -7804,7 +8357,7 @@ static int balance_nonroot(
** cntNew[i]: Index in b.apCell[] and b.szCell[] for the first cell to
** the right of the i-th sibling page.
** usableSpace: Number of bytes of space available on each sibling.
- **
+ **
*/
usableSpace = pBt->usableSize - 12 + leafCorrection;
for(i=k=0; i<nOld; i++, k++){
@@ -7891,15 +8444,17 @@ static int balance_nonroot(
d = r + 1 - leafData;
(void)cachedCellSize(&b, d);
do{
+ int szR, szD;
assert( d<nMaxCells );
assert( r<nMaxCells );
- (void)cachedCellSize(&b, r);
+ szR = cachedCellSize(&b, r);
+ szD = b.szCell[d];
if( szRight!=0
- && (bBulk || szRight+b.szCell[d]+2 > szLeft-(b.szCell[r]+(i==k-1?0:2)))){
+ && (bBulk || szRight+szD+2 > szLeft-(szR+(i==k-1?0:2)))){
break;
}
- szRight += b.szCell[d] + 2;
- szLeft -= b.szCell[r] + 2;
+ szRight += szD + 2;
+ szLeft -= szR + 2;
cntNew[i-1] = r;
r--;
d--;
@@ -7912,7 +8467,7 @@ static int balance_nonroot(
}
}
- /* Sanity check: For a non-corrupt database file one of the follwing
+ /* Sanity check: For a non-corrupt database file one of the following
** must be true:
** (1) We found one or more cells (cntNew[0])>0), or
** (2) pPage is a virtual root page. A virtual root page is when
@@ -7920,7 +8475,7 @@ static int balance_nonroot(
** that page.
*/
assert( cntNew[0]>0 || (pParent->pgno==1 && pParent->nCell==0) || CORRUPT_DB);
- TRACE(("BALANCE: old: %d(nc=%d) %d(nc=%d) %d(nc=%d)\n",
+ TRACE(("BALANCE: old: %u(nc=%u) %u(nc=%u) %u(nc=%u)\n",
apOld[0]->pgno, apOld[0]->nCell,
nOld>=2 ? apOld[1]->pgno : 0, nOld>=2 ? apOld[1]->nCell : 0,
nOld>=3 ? apOld[2]->pgno : 0, nOld>=3 ? apOld[2]->nCell : 0
@@ -7937,6 +8492,11 @@ static int balance_nonroot(
apOld[i] = 0;
rc = sqlite3PagerWrite(pNew->pDbPage);
nNew++;
+ if( sqlite3PagerPageRefcount(pNew->pDbPage)!=1+(i==(iParentIdx-nxDiv))
+ && rc==SQLITE_OK
+ ){
+ rc = SQLITE_CORRUPT_BKPT;
+ }
if( rc ) goto balance_cleanup;
}else{
assert( i>0 );
@@ -7948,7 +8508,7 @@ static int balance_nonroot(
cntOld[i] = b.nCell;
/* Set the pointer-map entry for the new sibling page. */
- if( ISAUTOVACUUM ){
+ if( ISAUTOVACUUM(pBt) ){
ptrmapPut(pBt, pNew->pgno, PTRMAP_BTREE, pParent->pgno, &rc);
if( rc!=SQLITE_OK ){
goto balance_cleanup;
@@ -7958,52 +8518,49 @@ static int balance_nonroot(
}
/*
- ** Reassign page numbers so that the new pages are in ascending order.
+ ** Reassign page numbers so that the new pages are in ascending order.
** This helps to keep entries in the disk file in order so that a scan
- ** of the table is closer to a linear scan through the file. That in turn
+ ** of the table is closer to a linear scan through the file. That in turn
** helps the operating system to deliver pages from the disk more rapidly.
**
- ** An O(n^2) insertion sort algorithm is used, but since n is never more
- ** than (NB+2) (a small constant), that should not be a problem.
+ ** An O(N*N) sort algorithm is used, but since N is never more than NB+2
+ ** (5), that is not a performance concern.
**
- ** When NB==3, this one optimization makes the database about 25% faster
+ ** When NB==3, this one optimization makes the database about 25% faster
** for large insertions and deletions.
*/
for(i=0; i<nNew; i++){
- aPgOrder[i] = aPgno[i] = apNew[i]->pgno;
- aPgFlags[i] = apNew[i]->pDbPage->flags;
- for(j=0; j<i; j++){
- if( aPgno[j]==aPgno[i] ){
- /* This branch is taken if the set of sibling pages somehow contains
- ** duplicate entries. This can happen if the database is corrupt.
- ** It would be simpler to detect this as part of the loop below, but
- ** we do the detection here in order to avoid populating the pager
- ** cache with two separate objects associated with the same
- ** page number. */
- assert( CORRUPT_DB );
- rc = SQLITE_CORRUPT_BKPT;
- goto balance_cleanup;
- }
- }
+ aPgno[i] = apNew[i]->pgno;
+ assert( apNew[i]->pDbPage->flags & PGHDR_WRITEABLE );
+ assert( apNew[i]->pDbPage->flags & PGHDR_DIRTY );
}
- for(i=0; i<nNew; i++){
- int iBest = 0; /* aPgno[] index of page number to use */
- for(j=1; j<nNew; j++){
- if( aPgOrder[j]<aPgOrder[iBest] ) iBest = j;
- }
- pgno = aPgOrder[iBest];
- aPgOrder[iBest] = 0xffffffff;
- if( iBest!=i ){
- if( iBest>i ){
- sqlite3PagerRekey(apNew[iBest]->pDbPage, pBt->nPage+iBest+1, 0);
- }
- sqlite3PagerRekey(apNew[i]->pDbPage, pgno, aPgFlags[iBest]);
- apNew[i]->pgno = pgno;
+ for(i=0; i<nNew-1; i++){
+ int iB = i;
+ for(j=i+1; j<nNew; j++){
+ if( apNew[j]->pgno < apNew[iB]->pgno ) iB = j;
}
- }
- TRACE(("BALANCE: new: %d(%d nc=%d) %d(%d nc=%d) %d(%d nc=%d) "
- "%d(%d nc=%d) %d(%d nc=%d)\n",
+ /* If apNew[i] has a page number that is bigger than any of the
+ ** subsequence apNew[i] entries, then swap apNew[i] with the subsequent
+ ** entry that has the smallest page number (which we know to be
+ ** entry apNew[iB]).
+ */
+ if( iB!=i ){
+ Pgno pgnoA = apNew[i]->pgno;
+ Pgno pgnoB = apNew[iB]->pgno;
+ Pgno pgnoTemp = (PENDING_BYTE/pBt->pageSize)+1;
+ u16 fgA = apNew[i]->pDbPage->flags;
+ u16 fgB = apNew[iB]->pDbPage->flags;
+ sqlite3PagerRekey(apNew[i]->pDbPage, pgnoTemp, fgB);
+ sqlite3PagerRekey(apNew[iB]->pDbPage, pgnoA, fgA);
+ sqlite3PagerRekey(apNew[i]->pDbPage, pgnoB, fgB);
+ apNew[i]->pgno = pgnoB;
+ apNew[iB]->pgno = pgnoA;
+ }
+ }
+
+ TRACE(("BALANCE: new: %u(%u nc=%u) %u(%u nc=%u) %u(%u nc=%u) "
+ "%u(%u nc=%u) %u(%u nc=%u)\n",
apNew[0]->pgno, szNew[0], cntNew[0],
nNew>=2 ? apNew[1]->pgno : 0, nNew>=2 ? szNew[1] : 0,
nNew>=2 ? cntNew[1] - cntNew[0] - !leafData : 0,
@@ -8021,14 +8578,14 @@ static int balance_nonroot(
put4byte(pRight, apNew[nNew-1]->pgno);
/* If the sibling pages are not leaves, ensure that the right-child pointer
- ** of the right-most new sibling page is set to the value that was
+ ** of the right-most new sibling page is set to the value that was
** originally in the same field of the right-most old sibling page. */
if( (pageFlags & PTF_LEAF)==0 && nOld!=nNew ){
MemPage *pOld = (nNew>nOld ? apNew : apOld)[nOld-1];
memcpy(&apNew[nNew-1]->aData[8], &pOld->aData[8], 4);
}
- /* Make any required updates to pointer map entries associated with
+ /* Make any required updates to pointer map entries associated with
** cells stored on sibling pages following the balance operation. Pointer
** map entries associated with divider cells are set by the insertCell()
** routine. The associated pointer map entries are:
@@ -8039,12 +8596,12 @@ static int balance_nonroot(
** b) if the sibling pages are not leaves, the child page associated
** with the cell.
**
- ** If the sibling pages are not leaves, then the pointer map entry
- ** associated with the right-child of each sibling may also need to be
- ** updated. This happens below, after the sibling pages have been
+ ** If the sibling pages are not leaves, then the pointer map entry
+ ** associated with the right-child of each sibling may also need to be
+ ** updated. This happens below, after the sibling pages have been
** populated, not here.
*/
- if( ISAUTOVACUUM ){
+ if( ISAUTOVACUUM(pBt) ){
MemPage *pOld;
MemPage *pNew = pOld = apNew[0];
int cntOldNext = pNew->nCell + pNew->nOverflow;
@@ -8066,7 +8623,7 @@ static int balance_nonroot(
}
/* Cell pCell is destined for new sibling page pNew. Originally, it
- ** was either part of sibling page iOld (possibly an overflow cell),
+ ** was either part of sibling page iOld (possibly an overflow cell),
** or else the divider cell to the left of sibling page iOld. So,
** if sibling page iOld had the same page number as pNew, and if
** pCell really was a part of sibling page iOld (not a divider or
@@ -8091,6 +8648,7 @@ static int balance_nonroot(
u8 *pCell;
u8 *pTemp;
int sz;
+ u8 *pSrcEnd;
MemPage *pNew = apNew[i];
j = cntNew[i];
@@ -8102,9 +8660,9 @@ static int balance_nonroot(
if( !pNew->leaf ){
memcpy(&pNew->aData[8], pCell, 4);
}else if( leafData ){
- /* If the tree is a leaf-data tree, and the siblings are leaves,
- ** then there is no divider cell in b.apCell[]. Instead, the divider
- ** cell consists of the integer key for the right-most cell of
+ /* If the tree is a leaf-data tree, and the siblings are leaves,
+ ** then there is no divider cell in b.apCell[]. Instead, the divider
+ ** cell consists of the integer key for the right-most cell of
** the sibling-page assembled above only.
*/
CellInfo info;
@@ -8117,9 +8675,9 @@ static int balance_nonroot(
pCell -= 4;
/* Obscure case for non-leaf-data trees: If the cell at pCell was
** previously stored on a leaf node, and its reported size was 4
- ** bytes, then it may actually be smaller than this
+ ** bytes, then it may actually be smaller than this
** (see btreeParseCellPtr(), 4 bytes is the minimum size of
- ** any cell). But it is important to pass the correct size to
+ ** any cell). But it is important to pass the correct size to
** insertCell(), so reparse the cell now.
**
** This can only happen for b-trees used to evaluate "IN (SELECT ...)"
@@ -8134,7 +8692,13 @@ static int balance_nonroot(
iOvflSpace += sz;
assert( sz<=pBt->maxLocal+23 );
assert( iOvflSpace <= (int)pBt->pageSize );
- insertCell(pParent, nxDiv+i, pCell, sz, pTemp, pNew->pgno, &rc);
+ for(k=0; ALWAYS(k<NB*2) && b.ixNx[k]<=j; k++){}
+ pSrcEnd = b.apEnd[k];
+ if( SQLITE_OVERFLOW(pSrcEnd, pCell, pCell+sz) ){
+ rc = SQLITE_CORRUPT_BKPT;
+ goto balance_cleanup;
+ }
+ rc = insertCell(pParent, nxDiv+i, pCell, sz, pTemp, pNew->pgno);
if( rc!=SQLITE_OK ) goto balance_cleanup;
assert( sqlite3PagerIswriteable(pParent->pDbPage) );
}
@@ -8164,6 +8728,8 @@ static int balance_nonroot(
for(i=1-nNew; i<nNew; i++){
int iPg = i<0 ? -i : i;
assert( iPg>=0 && iPg<nNew );
+ assert( iPg>=1 || i>=0 );
+ assert( iPg<ArraySize(cntOld) );
if( abDone[iPg] ) continue; /* Skip pages already processed */
if( i>=0 /* On the upwards pass, or... */
|| cntOld[iPg-1]>=cntNew[iPg-1] /* Condition (1) is true */
@@ -8211,8 +8777,8 @@ static int balance_nonroot(
** b-tree structure by one. This is described as the "balance-shallower"
** sub-algorithm in some documentation.
**
- ** If this is an auto-vacuum database, the call to copyNodeContent()
- ** sets all pointer-map entries corresponding to database image pages
+ ** If this is an auto-vacuum database, the call to copyNodeContent()
+ ** sets all pointer-map entries corresponding to database image pages
** for which the pointer is stored within the content being copied.
**
** It is critical that the child page be defragmented before being
@@ -8223,14 +8789,14 @@ static int balance_nonroot(
assert( nNew==1 || CORRUPT_DB );
rc = defragmentPage(apNew[0], -1);
testcase( rc!=SQLITE_OK );
- assert( apNew[0]->nFree ==
+ assert( apNew[0]->nFree ==
(get2byteNotZero(&apNew[0]->aData[5]) - apNew[0]->cellOffset
- apNew[0]->nCell*2)
|| rc!=SQLITE_OK
);
copyNodeContent(apNew[0], pParent, &rc);
freePage(apNew[0], &rc);
- }else if( ISAUTOVACUUM && !leafCorrection ){
+ }else if( ISAUTOVACUUM(pBt) && !leafCorrection ){
/* Fix the pointer map entries associated with the right-child of each
** sibling page. All other pointer map entries have already been taken
** care of. */
@@ -8241,7 +8807,7 @@ static int balance_nonroot(
}
assert( pParent->isInit );
- TRACE(("BALANCE: finished: old=%d new=%d cells=%d\n",
+ TRACE(("BALANCE: finished: old=%u new=%u cells=%u\n",
nOld, nNew, b.nCell));
/* Free any old pages that were not reused as new pages.
@@ -8251,9 +8817,9 @@ static int balance_nonroot(
}
#if 0
- if( ISAUTOVACUUM && rc==SQLITE_OK && apNew[0]->isInit ){
+ if( ISAUTOVACUUM(pBt) && rc==SQLITE_OK && apNew[0]->isInit ){
/* The ptrmapCheckPages() contains assert() statements that verify that
- ** all pointer map pages are set correctly. This is helpful while
+ ** all pointer map pages are set correctly. This is helpful while
** debugging. This is usually disabled because a corrupt database may
** cause an assert() statement to fail. */
ptrmapCheckPages(apNew, nNew);
@@ -8283,15 +8849,15 @@ balance_cleanup:
**
** A new child page is allocated and the contents of the current root
** page, including overflow cells, are copied into the child. The root
-** page is then overwritten to make it an empty page with the right-child
+** page is then overwritten to make it an empty page with the right-child
** pointer pointing to the new page.
**
-** Before returning, all pointer-map entries corresponding to pages
+** Before returning, all pointer-map entries corresponding to pages
** that the new child-page now contains pointers to are updated. The
** entry corresponding to the new right-child pointer of the root
** page is also updated.
**
-** If successful, *ppChild is set to contain a reference to the child
+** If successful, *ppChild is set to contain a reference to the child
** page and SQLITE_OK is returned. In this case the caller is required
** to call releasePage() on *ppChild exactly once. If an error occurs,
** an error code is returned and *ppChild is set to 0.
@@ -8305,7 +8871,7 @@ static int balance_deeper(MemPage *pRoot, MemPage **ppChild){
assert( pRoot->nOverflow>0 );
assert( sqlite3_mutex_held(pBt->mutex) );
- /* Make pRoot, the root page of the b-tree, writable. Allocate a new
+ /* Make pRoot, the root page of the b-tree, writable. Allocate a new
** page that will become the new right-child of pPage. Copy the contents
** of the node stored on pRoot into the new child page.
*/
@@ -8313,7 +8879,7 @@ static int balance_deeper(MemPage *pRoot, MemPage **ppChild){
if( rc==SQLITE_OK ){
rc = allocateBtreePage(pBt,&pChild,&pgnoChild,pRoot->pgno,0);
copyNodeContent(pRoot, pChild, &rc);
- if( ISAUTOVACUUM ){
+ if( ISAUTOVACUUM(pBt) ){
ptrmapPut(pBt, pgnoChild, PTRMAP_BTREE, pRoot->pgno, &rc);
}
}
@@ -8326,7 +8892,7 @@ static int balance_deeper(MemPage *pRoot, MemPage **ppChild){
assert( sqlite3PagerIswriteable(pRoot->pDbPage) );
assert( pChild->nCell==pRoot->nCell || CORRUPT_DB );
- TRACE(("BALANCE: copy root %d into %d\n", pRoot->pgno, pChild->pgno));
+ TRACE(("BALANCE: copy root %u into %u\n", pRoot->pgno, pChild->pgno));
/* Copy the overflow cells from pRoot to pChild */
memcpy(pChild->aiOvfl, pRoot->aiOvfl,
@@ -8347,7 +8913,7 @@ static int balance_deeper(MemPage *pRoot, MemPage **ppChild){
** Return SQLITE_CORRUPT if any cursor other than pCur is currently valid
** on the same B-tree as pCur.
**
-** This can if a database is corrupt with two or more SQL tables
+** This can occur if a database is corrupt with two or more SQL tables
** pointing to the same b-tree. If an insert occurs on one SQL table
** and causes a BEFORE TRIGGER to do a secondary insert on the other SQL
** table linked to the same b-tree. If the secondary insert causes a
@@ -8370,7 +8936,7 @@ static int anotherValidCursor(BtCursor *pCur){
/*
** The page that pCur currently points to has just been modified in
** some way. This function figures out if this modification means the
-** tree needs to be balanced, and if so calls the appropriate balancing
+** tree needs to be balanced, and if so calls the appropriate balancing
** routine. Balancing routines are:
**
** balance_quick()
@@ -8379,7 +8945,6 @@ static int anotherValidCursor(BtCursor *pCur){
*/
static int balance(BtCursor *pCur){
int rc = SQLITE_OK;
- const int nMin = pCur->pBt->usableSize * 2 / 3;
u8 aBalanceQuickSpace[13];
u8 *pFree = 0;
@@ -8391,7 +8956,11 @@ static int balance(BtCursor *pCur){
MemPage *pPage = pCur->pPage;
if( NEVER(pPage->nFree<0) && btreeComputeFreeSpace(pPage) ) break;
- if( pPage->nOverflow==0 && pPage->nFree<=nMin ){
+ if( pPage->nOverflow==0 && pPage->nFree*3<=(int)pCur->pBt->usableSize*2 ){
+ /* No rebalance required as long as:
+ ** (1) There are no overflow cells
+ ** (2) The amount of free space on the page is less than 2/3rds of
+ ** the total usable space on the page. */
break;
}else if( (iPage = pCur->iPage)==0 ){
if( pPage->nOverflow && (rc = anotherValidCursor(pCur))==SQLITE_OK ){
@@ -8399,7 +8968,7 @@ static int balance(BtCursor *pCur){
** balance_deeper() function to create a new child for the root-page
** and copy the current contents of the root-page to it. The
** next iteration of the do-loop will balance the child page.
- */
+ */
assert( balance_deeper_called==0 );
VVA_ONLY( balance_deeper_called++ );
rc = balance_deeper(pPage, &pCur->apPage[1]);
@@ -8414,6 +8983,11 @@ static int balance(BtCursor *pCur){
}else{
break;
}
+ }else if( sqlite3PagerPageRefcount(pPage->pDbPage)>1 ){
+ /* The page being written is not a root page, and there is currently
+ ** more than one reference to it. This only happens if the page is one
+ ** of its own ancestor pages. Corruption. */
+ rc = SQLITE_CORRUPT_BKPT;
}else{
MemPage * const pParent = pCur->apPage[iPage-1];
int const iIdx = pCur->aiIdx[iPage-1];
@@ -8433,17 +9007,17 @@ static int balance(BtCursor *pCur){
/* Call balance_quick() to create a new sibling of pPage on which
** to store the overflow cell. balance_quick() inserts a new cell
** into pParent, which may cause pParent overflow. If this
- ** happens, the next iteration of the do-loop will balance pParent
+ ** happens, the next iteration of the do-loop will balance pParent
** use either balance_nonroot() or balance_deeper(). Until this
** happens, the overflow cell is stored in the aBalanceQuickSpace[]
- ** buffer.
+ ** buffer.
**
** The purpose of the following assert() is to check that only a
** single call to balance_quick() is made for each call to this
** function. If this were not verified, a subtle bug involving reuse
** of the aBalanceQuickSpace[] might sneak in.
*/
- assert( balance_quick_called==0 );
+ assert( balance_quick_called==0 );
VVA_ONLY( balance_quick_called++ );
rc = balance_quick(pParent, pPage, aBalanceQuickSpace);
}else
@@ -8454,15 +9028,15 @@ static int balance(BtCursor *pCur){
** modifying the contents of pParent, which may cause pParent to
** become overfull or underfull. The next iteration of the do-loop
** will balance the parent page to correct this.
- **
+ **
** If the parent page becomes overfull, the overflow cell or cells
- ** are stored in the pSpace buffer allocated immediately below.
+ ** are stored in the pSpace buffer allocated immediately below.
** A subsequent iteration of the do-loop will deal with this by
** calling balance_nonroot() (balance_deeper() may be called first,
** but it doesn't deal with overflow cells - just moves them to a
- ** different page). Once this subsequent call to balance_nonroot()
+ ** different page). Once this subsequent call to balance_nonroot()
** has completed, it is safe to release the pSpace buffer used by
- ** the previous call, as the overflow cell data will have been
+ ** the previous call, as the overflow cell data will have been
** copied either into the body of a database page or into the new
** pSpace buffer passed to the latter call to balance_nonroot().
*/
@@ -8470,9 +9044,9 @@ static int balance(BtCursor *pCur){
rc = balance_nonroot(pParent, iIdx, pSpace, iPage==1,
pCur->hints&BTREE_BULKLOAD);
if( pFree ){
- /* If pFree is not NULL, it points to the pSpace buffer used
+ /* If pFree is not NULL, it points to the pSpace buffer used
** by a previous call to balance_nonroot(). Its contents are
- ** now stored either on real database pages or within the
+ ** now stored either on real database pages or within the
** new pSpace buffer, so it may be safely freed here. */
sqlite3PageFree(pFree);
}
@@ -8512,7 +9086,7 @@ static int btreeOverwriteContent(
){
int nData = pX->nData - iOffset;
if( nData<=0 ){
- /* Overwritting with zeros */
+ /* Overwriting with zeros */
int i;
for(i=0; i<iAmt && pDest[i]==0; i++){}
if( i<iAmt ){
@@ -8544,9 +9118,13 @@ static int btreeOverwriteContent(
/*
** Overwrite the cell that cursor pCur is pointing to with fresh content
-** contained in pX.
+** contained in pX. In this variant, pCur is pointing to an overflow
+** cell.
*/
-static int btreeOverwriteCell(BtCursor *pCur, const BtreePayload *pX){
+static SQLITE_NOINLINE int btreeOverwriteOverflowCell(
+ BtCursor *pCur, /* Cursor pointing to cell to overwrite */
+ const BtreePayload *pX /* Content to write into the cell */
+){
int iOffset; /* Next byte of pX->pData to write */
int nTotal = pX->nData + pX->nZero; /* Total bytes of to write */
int rc; /* Return code */
@@ -8555,16 +9133,12 @@ static int btreeOverwriteCell(BtCursor *pCur, const BtreePayload *pX){
Pgno ovflPgno; /* Next overflow page to write */
u32 ovflPageSize; /* Size to write on overflow page */
- if( pCur->info.pPayload + pCur->info.nLocal > pPage->aDataEnd
- || pCur->info.pPayload < pPage->aData + pPage->cellOffset
- ){
- return SQLITE_CORRUPT_BKPT;
- }
+ assert( pCur->info.nLocal<nTotal ); /* pCur is an overflow cell */
+
/* Overwrite the local portion first */
rc = btreeOverwriteContent(pPage, pCur->info.pPayload, pX,
0, pCur->info.nLocal);
if( rc ) return rc;
- if( pCur->info.nLocal==nTotal ) return SQLITE_OK;
/* Now overwrite the overflow pages */
iOffset = pCur->info.nLocal;
@@ -8576,7 +9150,7 @@ static int btreeOverwriteCell(BtCursor *pCur, const BtreePayload *pX){
do{
rc = btreeGetPage(pBt, ovflPgno, &pPage, 0);
if( rc ) return rc;
- if( sqlite3PagerPageRefcount(pPage->pDbPage)!=1 ){
+ if( sqlite3PagerPageRefcount(pPage->pDbPage)!=1 || pPage->isInit ){
rc = SQLITE_CORRUPT_BKPT;
}else{
if( iOffset+ovflPageSize<(u32)nTotal ){
@@ -8591,7 +9165,30 @@ static int btreeOverwriteCell(BtCursor *pCur, const BtreePayload *pX){
if( rc ) return rc;
iOffset += ovflPageSize;
}while( iOffset<nTotal );
- return SQLITE_OK;
+ return SQLITE_OK;
+}
+
+/*
+** Overwrite the cell that cursor pCur is pointing to with fresh content
+** contained in pX.
+*/
+static int btreeOverwriteCell(BtCursor *pCur, const BtreePayload *pX){
+ int nTotal = pX->nData + pX->nZero; /* Total bytes of to write */
+ MemPage *pPage = pCur->pPage; /* Page being written */
+
+ if( pCur->info.pPayload + pCur->info.nLocal > pPage->aDataEnd
+ || pCur->info.pPayload < pPage->aData + pPage->cellOffset
+ ){
+ return SQLITE_CORRUPT_BKPT;
+ }
+ if( pCur->info.nLocal==nTotal ){
+ /* The entire cell is local */
+ return btreeOverwriteContent(pPage, pCur->info.pPayload, pX,
+ 0, pCur->info.nLocal);
+ }else{
+ /* The cell contains overflow content */
+ return btreeOverwriteOverflowCell(pCur, pX);
+ }
}
@@ -8607,11 +9204,11 @@ static int btreeOverwriteCell(BtCursor *pCur, const BtreePayload *pX){
** hold the content of the row.
**
** For an index btree (used for indexes and WITHOUT ROWID tables), the
-** key is an arbitrary byte sequence stored in pX.pKey,nKey. The
+** key is an arbitrary byte sequence stored in pX.pKey,nKey. The
** pX.pData,nData,nZero fields must be zero.
**
** If the seekResult parameter is non-zero, then a successful call to
-** MovetoUnpacked() to seek cursor pCur to (pKey,nKey) has already
+** sqlite3BtreeIndexMoveto() to seek cursor pCur to (pKey,nKey) has already
** been performed. In other words, if seekResult!=0 then the cursor
** is currently pointing to a cell that will be adjacent to the cell
** to be inserted. If seekResult<0 then pCur points to a cell that is
@@ -8629,7 +9226,7 @@ int sqlite3BtreeInsert(
BtCursor *pCur, /* Insert data into the table of this cursor */
const BtreePayload *pX, /* Content of the row to be inserted */
int flags, /* True if this is likely an append */
- int seekResult /* Result of prior MovetoUnpacked() call */
+ int seekResult /* Result of prior IndexMoveto() call */
){
int rc;
int loc = seekResult; /* -1: before desired location +1: after */
@@ -8637,53 +9234,68 @@ int sqlite3BtreeInsert(
int idx;
MemPage *pPage;
Btree *p = pCur->pBtree;
- BtShared *pBt = p->pBt;
unsigned char *oldCell;
unsigned char *newCell = 0;
- assert( (flags & (BTREE_SAVEPOSITION|BTREE_APPEND))==flags );
-
- if( pCur->eState==CURSOR_FAULT ){
- assert( pCur->skipNext!=SQLITE_OK );
- return pCur->skipNext;
- }
-
- assert( cursorOwnsBtShared(pCur) );
- assert( (pCur->curFlags & BTCF_WriteFlag)!=0
- && pBt->inTransaction==TRANS_WRITE
- && (pBt->btsFlags & BTS_READ_ONLY)==0 );
- assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) );
-
- /* Assert that the caller has been consistent. If this cursor was opened
- ** expecting an index b-tree, then the caller should be inserting blob
- ** keys with no associated data. If the cursor was opened expecting an
- ** intkey table, the caller should be inserting integer keys with a
- ** blob of associated data. */
- assert( (pX->pKey==0)==(pCur->pKeyInfo==0) );
+ assert( (flags & (BTREE_SAVEPOSITION|BTREE_APPEND|BTREE_PREFORMAT))==flags );
+ assert( (flags & BTREE_PREFORMAT)==0 || seekResult || pCur->pKeyInfo==0 );
/* Save the positions of any other cursors open on this table.
**
** In some cases, the call to btreeMoveto() below is a no-op. For
** example, when inserting data into a table with auto-generated integer
- ** keys, the VDBE layer invokes sqlite3BtreeLast() to figure out the
- ** integer key to use. It then calls this function to actually insert the
+ ** keys, the VDBE layer invokes sqlite3BtreeLast() to figure out the
+ ** integer key to use. It then calls this function to actually insert the
** data into the intkey B-Tree. In this case btreeMoveto() recognizes
** that the cursor is already where it needs to be and returns without
** doing any work. To avoid thwarting these optimizations, it is important
** not to clear the cursor here.
*/
if( pCur->curFlags & BTCF_Multiple ){
- rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur);
+ rc = saveAllCursors(p->pBt, pCur->pgnoRoot, pCur);
if( rc ) return rc;
+ if( loc && pCur->iPage<0 ){
+ /* This can only happen if the schema is corrupt such that there is more
+ ** than one table or index with the same root page as used by the cursor.
+ ** Which can only happen if the SQLITE_NoSchemaError flag was set when
+ ** the schema was loaded. This cannot be asserted though, as a user might
+ ** set the flag, load the schema, and then unset the flag. */
+ return SQLITE_CORRUPT_BKPT;
+ }
}
+ /* Ensure that the cursor is not in the CURSOR_FAULT state and that it
+ ** points to a valid cell.
+ */
+ if( pCur->eState>=CURSOR_REQUIRESEEK ){
+ testcase( pCur->eState==CURSOR_REQUIRESEEK );
+ testcase( pCur->eState==CURSOR_FAULT );
+ rc = moveToRoot(pCur);
+ if( rc && rc!=SQLITE_EMPTY ) return rc;
+ }
+
+ assert( cursorOwnsBtShared(pCur) );
+ assert( (pCur->curFlags & BTCF_WriteFlag)!=0
+ && p->pBt->inTransaction==TRANS_WRITE
+ && (p->pBt->btsFlags & BTS_READ_ONLY)==0 );
+ assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) );
+
+ /* Assert that the caller has been consistent. If this cursor was opened
+ ** expecting an index b-tree, then the caller should be inserting blob
+ ** keys with no associated data. If the cursor was opened expecting an
+ ** intkey table, the caller should be inserting integer keys with a
+ ** blob of associated data. */
+ assert( (flags & BTREE_PREFORMAT) || (pX->pKey==0)==(pCur->pKeyInfo==0) );
+
if( pCur->pKeyInfo==0 ){
assert( pX->pKey==0 );
- /* If this is an insert into a table b-tree, invalidate any incrblob
+ /* If this is an insert into a table b-tree, invalidate any incrblob
** cursors open on the row being replaced */
- invalidateIncrblobCursors(p, pCur->pgnoRoot, pX->nKey, 0);
+ if( p->hasIncrblobCur ){
+ invalidateIncrblobCursors(p, pCur->pgnoRoot, pX->nKey, 0);
+ }
- /* If BTREE_SAVEPOSITION is set, the cursor must already be pointing
+ /* If BTREE_SAVEPOSITION is set, the cursor must already be pointing
** to a row with the same key as the new entry being inserted.
*/
#ifdef SQLITE_DEBUG
@@ -8714,13 +9326,14 @@ int sqlite3BtreeInsert(
** to an adjacent cell. Move the cursor so that it is pointing either
** to the cell to be overwritten or an adjacent cell.
*/
- rc = sqlite3BtreeMovetoUnpacked(pCur, 0, pX->nKey, flags!=0, &loc);
+ rc = sqlite3BtreeTableMoveto(pCur, pX->nKey,
+ (flags & BTREE_APPEND)!=0, &loc);
if( rc ) return rc;
}
}else{
/* This is an index or a WITHOUT ROWID table */
- /* If BTREE_SAVEPOSITION is set, the cursor must already be pointing
+ /* If BTREE_SAVEPOSITION is set, the cursor must already be pointing
** to a row with the same key as the new entry being inserted.
*/
assert( (flags & BTREE_SAVEPOSITION)==0 || loc==0 );
@@ -8737,13 +9350,11 @@ int sqlite3BtreeInsert(
r.aMem = pX->aMem;
r.nField = pX->nMem;
r.default_rc = 0;
- r.errCode = 0;
- r.r1 = 0;
- r.r2 = 0;
r.eqSeen = 0;
- rc = sqlite3BtreeMovetoUnpacked(pCur, &r, 0, flags!=0, &loc);
+ rc = sqlite3BtreeIndexMoveto(pCur, &r, &loc);
}else{
- rc = btreeMoveto(pCur, pX->pKey, pX->nKey, flags!=0, &loc);
+ rc = btreeMoveto(pCur, pX->pKey, pX->nKey,
+ (flags & BTREE_APPEND)!=0, &loc);
}
if( rc ) return rc;
}
@@ -8762,34 +9373,57 @@ int sqlite3BtreeInsert(
return btreeOverwriteCell(pCur, &x2);
}
}
-
}
- assert( pCur->eState==CURSOR_VALID
- || (pCur->eState==CURSOR_INVALID && loc)
- || CORRUPT_DB );
+ assert( pCur->eState==CURSOR_VALID
+ || (pCur->eState==CURSOR_INVALID && loc) || CORRUPT_DB );
pPage = pCur->pPage;
- assert( pPage->intKey || pX->nKey>=0 );
+ assert( pPage->intKey || pX->nKey>=0 || (flags & BTREE_PREFORMAT) );
assert( pPage->leaf || !pPage->intKey );
if( pPage->nFree<0 ){
- rc = btreeComputeFreeSpace(pPage);
+ if( NEVER(pCur->eState>CURSOR_INVALID) ){
+ /* ^^^^^--- due to the moveToRoot() call above */
+ rc = SQLITE_CORRUPT_BKPT;
+ }else{
+ rc = btreeComputeFreeSpace(pPage);
+ }
if( rc ) return rc;
}
- TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n",
+ TRACE(("INSERT: table=%u nkey=%lld ndata=%u page=%u %s\n",
pCur->pgnoRoot, pX->nKey, pX->nData, pPage->pgno,
loc==0 ? "overwrite" : "new entry"));
- assert( pPage->isInit );
- newCell = pBt->pTmpSpace;
+ assert( pPage->isInit || CORRUPT_DB );
+ newCell = p->pBt->pTmpSpace;
assert( newCell!=0 );
- rc = fillInCell(pPage, newCell, pX, &szNew);
- if( rc ) goto end_insert;
+ assert( BTREE_PREFORMAT==OPFLAG_PREFORMAT );
+ if( flags & BTREE_PREFORMAT ){
+ rc = SQLITE_OK;
+ szNew = p->pBt->nPreformatSize;
+ if( szNew<4 ) szNew = 4;
+ if( ISAUTOVACUUM(p->pBt) && szNew>pPage->maxLocal ){
+ CellInfo info;
+ pPage->xParseCell(pPage, newCell, &info);
+ if( info.nPayload!=info.nLocal ){
+ Pgno ovfl = get4byte(&newCell[szNew-4]);
+ ptrmapPut(p->pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno, &rc);
+ if( NEVER(rc) ) goto end_insert;
+ }
+ }
+ }else{
+ rc = fillInCell(pPage, newCell, pX, &szNew);
+ if( rc ) goto end_insert;
+ }
assert( szNew==pPage->xCellSize(pPage, newCell) );
- assert( szNew <= MX_CELL_SIZE(pBt) );
+ assert( szNew <= MX_CELL_SIZE(p->pBt) );
idx = pCur->ix;
+ pCur->info.nSize = 0;
if( loc==0 ){
CellInfo info;
- assert( idx<pPage->nCell );
+ assert( idx>=0 );
+ if( idx>=pPage->nCell ){
+ return SQLITE_CORRUPT_BKPT;
+ }
rc = sqlite3PagerWrite(pPage->pDbPage);
if( rc ){
goto end_insert;
@@ -8798,17 +9432,17 @@ int sqlite3BtreeInsert(
if( !pPage->leaf ){
memcpy(newCell, oldCell, 4);
}
- rc = clearCell(pPage, oldCell, &info);
+ BTREE_CLEAR_CELL(rc, pPage, oldCell, info);
testcase( pCur->curFlags & BTCF_ValidOvfl );
invalidateOverflowCache(pCur);
- if( info.nSize==szNew && info.nLocal==info.nPayload
- && (!ISAUTOVACUUM || szNew<pPage->minLocal)
+ if( info.nSize==szNew && info.nLocal==info.nPayload
+ && (!ISAUTOVACUUM(p->pBt) || szNew<pPage->minLocal)
){
/* Overwrite the old cell with the new if they are the same size.
** We could also try to do this if the old cell is smaller, then add
** the leftover space to the free list. But experiments show that
** doing that is no faster then skipping this optimization and just
- ** calling dropCell() and insertCell().
+ ** calling dropCell() and insertCell().
**
** This optimization cannot be used on an autovacuum database if the
** new entry uses overflow pages, as the insertCell() call below is
@@ -8832,11 +9466,11 @@ int sqlite3BtreeInsert(
}else{
assert( pPage->leaf );
}
- insertCell(pPage, idx, newCell, szNew, 0, 0, &rc);
+ rc = insertCellFast(pPage, idx, newCell, szNew);
assert( pPage->nOverflow==0 || rc==SQLITE_OK );
assert( rc!=SQLITE_OK || pPage->nCell>0 || pPage->nOverflow>0 );
- /* If no error has occurred and pPage has an overflow cell, call balance()
+ /* If no error has occurred and pPage has an overflow cell, call balance()
** to redistribute the cells within the tree. Since balance() may move
** the cursor, zero the BtCursor.info.nSize and BTCF_ValidNKey
** variables.
@@ -8856,14 +9490,13 @@ int sqlite3BtreeInsert(
** larger than the largest existing key, it is possible to insert the
** row without seeking the cursor. This can be a big performance boost.
*/
- pCur->info.nSize = 0;
if( pPage->nOverflow ){
assert( rc==SQLITE_OK );
pCur->curFlags &= ~(BTCF_ValidNKey);
rc = balance(pCur);
/* Must make sure nOverflow is reset to zero even if the balance()
- ** fails. Internal data structure corruption will result otherwise.
+ ** fails. Internal data structure corruption will result otherwise.
** Also, set the cursor state to invalid. This stops saveCursorPosition()
** from trying to save the current position of the cursor. */
pCur->pPage->nOverflow = 0;
@@ -8890,7 +9523,119 @@ end_insert:
}
/*
-** Delete the entry that the cursor is pointing to.
+** This function is used as part of copying the current row from cursor
+** pSrc into cursor pDest. If the cursors are open on intkey tables, then
+** parameter iKey is used as the rowid value when the record is copied
+** into pDest. Otherwise, the record is copied verbatim.
+**
+** This function does not actually write the new value to cursor pDest.
+** Instead, it creates and populates any required overflow pages and
+** writes the data for the new cell into the BtShared.pTmpSpace buffer
+** for the destination database. The size of the cell, in bytes, is left
+** in BtShared.nPreformatSize. The caller completes the insertion by
+** calling sqlite3BtreeInsert() with the BTREE_PREFORMAT flag specified.
+**
+** SQLITE_OK is returned if successful, or an SQLite error code otherwise.
+*/
+int sqlite3BtreeTransferRow(BtCursor *pDest, BtCursor *pSrc, i64 iKey){
+ BtShared *pBt = pDest->pBt;
+ u8 *aOut = pBt->pTmpSpace; /* Pointer to next output buffer */
+ const u8 *aIn; /* Pointer to next input buffer */
+ u32 nIn; /* Size of input buffer aIn[] */
+ u32 nRem; /* Bytes of data still to copy */
+
+ getCellInfo(pSrc);
+ if( pSrc->info.nPayload<0x80 ){
+ *(aOut++) = pSrc->info.nPayload;
+ }else{
+ aOut += sqlite3PutVarint(aOut, pSrc->info.nPayload);
+ }
+ if( pDest->pKeyInfo==0 ) aOut += putVarint(aOut, iKey);
+ nIn = pSrc->info.nLocal;
+ aIn = pSrc->info.pPayload;
+ if( aIn+nIn>pSrc->pPage->aDataEnd ){
+ return SQLITE_CORRUPT_BKPT;
+ }
+ nRem = pSrc->info.nPayload;
+ if( nIn==nRem && nIn<pDest->pPage->maxLocal ){
+ memcpy(aOut, aIn, nIn);
+ pBt->nPreformatSize = nIn + (aOut - pBt->pTmpSpace);
+ return SQLITE_OK;
+ }else{
+ int rc = SQLITE_OK;
+ Pager *pSrcPager = pSrc->pBt->pPager;
+ u8 *pPgnoOut = 0;
+ Pgno ovflIn = 0;
+ DbPage *pPageIn = 0;
+ MemPage *pPageOut = 0;
+ u32 nOut; /* Size of output buffer aOut[] */
+
+ nOut = btreePayloadToLocal(pDest->pPage, pSrc->info.nPayload);
+ pBt->nPreformatSize = nOut + (aOut - pBt->pTmpSpace);
+ if( nOut<pSrc->info.nPayload ){
+ pPgnoOut = &aOut[nOut];
+ pBt->nPreformatSize += 4;
+ }
+
+ if( nRem>nIn ){
+ if( aIn+nIn+4>pSrc->pPage->aDataEnd ){
+ return SQLITE_CORRUPT_BKPT;
+ }
+ ovflIn = get4byte(&pSrc->info.pPayload[nIn]);
+ }
+
+ do {
+ nRem -= nOut;
+ do{
+ assert( nOut>0 );
+ if( nIn>0 ){
+ int nCopy = MIN(nOut, nIn);
+ memcpy(aOut, aIn, nCopy);
+ nOut -= nCopy;
+ nIn -= nCopy;
+ aOut += nCopy;
+ aIn += nCopy;
+ }
+ if( nOut>0 ){
+ sqlite3PagerUnref(pPageIn);
+ pPageIn = 0;
+ rc = sqlite3PagerGet(pSrcPager, ovflIn, &pPageIn, PAGER_GET_READONLY);
+ if( rc==SQLITE_OK ){
+ aIn = (const u8*)sqlite3PagerGetData(pPageIn);
+ ovflIn = get4byte(aIn);
+ aIn += 4;
+ nIn = pSrc->pBt->usableSize - 4;
+ }
+ }
+ }while( rc==SQLITE_OK && nOut>0 );
+
+ if( rc==SQLITE_OK && nRem>0 && ALWAYS(pPgnoOut) ){
+ Pgno pgnoNew;
+ MemPage *pNew = 0;
+ rc = allocateBtreePage(pBt, &pNew, &pgnoNew, 0, 0);
+ put4byte(pPgnoOut, pgnoNew);
+ if( ISAUTOVACUUM(pBt) && pPageOut ){
+ ptrmapPut(pBt, pgnoNew, PTRMAP_OVERFLOW2, pPageOut->pgno, &rc);
+ }
+ releasePage(pPageOut);
+ pPageOut = pNew;
+ if( pPageOut ){
+ pPgnoOut = pPageOut->aData;
+ put4byte(pPgnoOut, 0);
+ aOut = &pPgnoOut[4];
+ nOut = MIN(pBt->usableSize - 4, nRem);
+ }
+ }
+ }while( nRem>0 && rc==SQLITE_OK );
+
+ releasePage(pPageOut);
+ sqlite3PagerUnref(pPageIn);
+ return rc;
+ }
+}
+
+/*
+** Delete the entry that the cursor is pointing to.
**
** If the BTREE_SAVEPOSITION bit of the flags parameter is zero, then
** the cursor is left pointing at an arbitrary location after the delete.
@@ -8908,15 +9653,14 @@ end_insert:
*/
int sqlite3BtreeDelete(BtCursor *pCur, u8 flags){
Btree *p = pCur->pBtree;
- BtShared *pBt = p->pBt;
- int rc; /* Return code */
- MemPage *pPage; /* Page to delete cell from */
- unsigned char *pCell; /* Pointer to cell to delete */
- int iCellIdx; /* Index of cell to delete */
- int iCellDepth; /* Depth of node containing pCell */
- CellInfo info; /* Size of the cell being deleted */
- int bSkipnext = 0; /* Leaf cursor in SKIPNEXT state */
- u8 bPreserve = flags & BTREE_SAVEPOSITION; /* Keep cursor valid */
+ BtShared *pBt = p->pBt;
+ int rc; /* Return code */
+ MemPage *pPage; /* Page to delete cell from */
+ unsigned char *pCell; /* Pointer to cell to delete */
+ int iCellIdx; /* Index of cell to delete */
+ int iCellDepth; /* Depth of node containing pCell */
+ CellInfo info; /* Size of the cell being deleted */
+ u8 bPreserve; /* Keep cursor valid. 2 for CURSOR_SKIPNEXT */
assert( cursorOwnsBtShared(pCur) );
assert( pBt->inTransaction==TRANS_WRITE );
@@ -8925,30 +9669,52 @@ int sqlite3BtreeDelete(BtCursor *pCur, u8 flags){
assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) );
assert( !hasReadConflicts(p, pCur->pgnoRoot) );
assert( (flags & ~(BTREE_SAVEPOSITION | BTREE_AUXDELETE))==0 );
- if( pCur->eState==CURSOR_REQUIRESEEK ){
- rc = btreeRestoreCursorPosition(pCur);
- if( rc ) return rc;
+ if( pCur->eState!=CURSOR_VALID ){
+ if( pCur->eState>=CURSOR_REQUIRESEEK ){
+ rc = btreeRestoreCursorPosition(pCur);
+ assert( rc!=SQLITE_OK || CORRUPT_DB || pCur->eState==CURSOR_VALID );
+ if( rc || pCur->eState!=CURSOR_VALID ) return rc;
+ }else{
+ return SQLITE_CORRUPT_BKPT;
+ }
}
assert( pCur->eState==CURSOR_VALID );
iCellDepth = pCur->iPage;
iCellIdx = pCur->ix;
pPage = pCur->pPage;
+ if( pPage->nCell<=iCellIdx ){
+ return SQLITE_CORRUPT_BKPT;
+ }
pCell = findCell(pPage, iCellIdx);
- if( pPage->nFree<0 && btreeComputeFreeSpace(pPage) ) return SQLITE_CORRUPT;
+ if( pPage->nFree<0 && btreeComputeFreeSpace(pPage) ){
+ return SQLITE_CORRUPT_BKPT;
+ }
+ if( pCell<&pPage->aCellIdx[pPage->nCell] ){
+ return SQLITE_CORRUPT_BKPT;
+ }
- /* If the bPreserve flag is set to true, then the cursor position must
+ /* If the BTREE_SAVEPOSITION bit is on, then the cursor position must
** be preserved following this delete operation. If the current delete
** will cause a b-tree rebalance, then this is done by saving the cursor
- ** key and leaving the cursor in CURSOR_REQUIRESEEK state before
- ** returning.
+ ** key and leaving the cursor in CURSOR_REQUIRESEEK state before
+ ** returning.
**
- ** Or, if the current delete will not cause a rebalance, then the cursor
+ ** If the current delete will not cause a rebalance, then the cursor
** will be left in CURSOR_SKIPNEXT state pointing to the entry immediately
- ** before or after the deleted entry. In this case set bSkipnext to true. */
+ ** before or after the deleted entry.
+ **
+ ** The bPreserve value records which path is required:
+ **
+ ** bPreserve==0 Not necessary to save the cursor position
+ ** bPreserve==1 Use CURSOR_REQUIRESEEK to save the cursor position
+ ** bPreserve==2 Cursor won't move. Set CURSOR_SKIPNEXT.
+ */
+ bPreserve = (flags & BTREE_SAVEPOSITION)!=0;
if( bPreserve ){
- if( !pPage->leaf
- || (pPage->nFree+cellSizePtr(pPage,pCell)+2)>(int)(pBt->usableSize*2/3)
+ if( !pPage->leaf
+ || (pPage->nFree+pPage->xCellSize(pPage,pCell)+2) >
+ (int)(pBt->usableSize*2/3)
|| pPage->nCell==1 /* See dbfuzz001.test for a test case */
){
/* A b-tree rebalance will be required after deleting this entry.
@@ -8956,7 +9722,7 @@ int sqlite3BtreeDelete(BtCursor *pCur, u8 flags){
rc = saveCursorKey(pCur);
if( rc ) return rc;
}else{
- bSkipnext = 1;
+ bPreserve = 2;
}
}
@@ -8982,7 +9748,7 @@ int sqlite3BtreeDelete(BtCursor *pCur, u8 flags){
/* If this is a delete operation to remove a row from a table b-tree,
** invalidate any incrblob cursors open on the row being deleted. */
- if( pCur->pKeyInfo==0 ){
+ if( pCur->pKeyInfo==0 && p->hasIncrblobCur ){
invalidateIncrblobCursors(p, pCur->pgnoRoot, pCur->info.nKey, 0);
}
@@ -8991,7 +9757,7 @@ int sqlite3BtreeDelete(BtCursor *pCur, u8 flags){
** itself from within the page. */
rc = sqlite3PagerWrite(pPage->pDbPage);
if( rc ) return rc;
- rc = clearCell(pPage, pCell, &info);
+ BTREE_CLEAR_CELL(rc, pPage, pCell, info);
dropCell(pPage, iCellIdx, info.nSize, &rc);
if( rc ) return rc;
@@ -9023,7 +9789,7 @@ int sqlite3BtreeDelete(BtCursor *pCur, u8 flags){
assert( pTmp!=0 );
rc = sqlite3PagerWrite(pLeaf->pDbPage);
if( rc==SQLITE_OK ){
- insertCell(pPage, iCellIdx, pCell-4, nCell+4, pTmp, n, &rc);
+ rc = insertCell(pPage, iCellIdx, pCell-4, nCell+4, pTmp, n);
}
dropCell(pLeaf, pLeaf->nCell-1, nCell, &rc);
if( rc ) return rc;
@@ -9042,9 +9808,17 @@ int sqlite3BtreeDelete(BtCursor *pCur, u8 flags){
** on the leaf node first. If the balance proceeds far enough up the
** tree that we can be sure that any problem in the internal node has
** been corrected, so be it. Otherwise, after balancing the leaf node,
- ** walk the cursor up the tree to the internal node and balance it as
+ ** walk the cursor up the tree to the internal node and balance it as
** well. */
- rc = balance(pCur);
+ assert( pCur->pPage->nOverflow==0 );
+ assert( pCur->pPage->nFree>=0 );
+ if( pCur->pPage->nFree*3<=(int)pCur->pBt->usableSize*2 ){
+ /* Optimization: If the free space is less than 2/3rds of the page,
+ ** then balance() will always be a no-op. No need to invoke it. */
+ rc = SQLITE_OK;
+ }else{
+ rc = balance(pCur);
+ }
if( rc==SQLITE_OK && pCur->iPage>iCellDepth ){
releasePageNotNull(pCur->pPage);
pCur->iPage--;
@@ -9056,8 +9830,8 @@ int sqlite3BtreeDelete(BtCursor *pCur, u8 flags){
}
if( rc==SQLITE_OK ){
- if( bSkipnext ){
- assert( bPreserve && (pCur->iPage==iCellDepth || CORRUPT_DB) );
+ if( bPreserve>1 ){
+ assert( (pCur->iPage==iCellDepth || CORRUPT_DB) );
assert( pPage==pCur->pPage || CORRUPT_DB );
assert( (pPage->nCell>0 || CORRUPT_DB) && iCellIdx<=pPage->nCell );
pCur->eState = CURSOR_SKIPNEXT;
@@ -9090,12 +9864,12 @@ int sqlite3BtreeDelete(BtCursor *pCur, u8 flags){
** BTREE_INTKEY|BTREE_LEAFDATA Used for SQL tables with rowid keys
** BTREE_ZERODATA Used for SQL indices
*/
-static int btreeCreateTable(Btree *p, int *piTable, int createTabFlags){
+static int btreeCreateTable(Btree *p, Pgno *piTable, int createTabFlags){
BtShared *pBt = p->pBt;
MemPage *pRoot;
Pgno pgnoRoot;
int rc;
- int ptfFlags; /* Page-type flage for the root page of new table */
+ int ptfFlags; /* Page-type flags for the root page of new table */
assert( sqlite3BtreeHoldsMutex(p) );
assert( pBt->inTransaction==TRANS_WRITE );
@@ -9123,6 +9897,9 @@ static int btreeCreateTable(Btree *p, int *piTable, int createTabFlags){
** created so far, so the new root-page is (meta[3]+1).
*/
sqlite3BtreeGetMeta(p, BTREE_LARGEST_ROOT_PAGE, &pgnoRoot);
+ if( pgnoRoot>btreePagecount(pBt) ){
+ return SQLITE_CORRUPT_BKPT;
+ }
pgnoRoot++;
/* The new root-page may not be allocated on a pointer-map page, or the
@@ -9132,8 +9909,7 @@ static int btreeCreateTable(Btree *p, int *piTable, int createTabFlags){
pgnoRoot==PENDING_BYTE_PAGE(pBt) ){
pgnoRoot++;
}
- assert( pgnoRoot>=3 || CORRUPT_DB );
- testcase( pgnoRoot<3 );
+ assert( pgnoRoot>=3 );
/* Allocate a page. The page that currently resides at pgnoRoot will
** be moved to the allocated page (unless the allocated page happens
@@ -9196,7 +9972,7 @@ static int btreeCreateTable(Btree *p, int *piTable, int createTabFlags){
}
}else{
pRoot = pPageMove;
- }
+ }
/* Update the pointer-map and meta-data with the new root-page number. */
ptrmapPut(pBt, pgnoRoot, PTRMAP_ROOTPAGE, 0, &rc);
@@ -9230,10 +10006,10 @@ static int btreeCreateTable(Btree *p, int *piTable, int createTabFlags){
zeroPage(pRoot, ptfFlags);
sqlite3PagerUnref(pRoot->pDbPage);
assert( (pBt->openFlags & BTREE_SINGLE)==0 || pgnoRoot==2 );
- *piTable = (int)pgnoRoot;
+ *piTable = pgnoRoot;
return SQLITE_OK;
}
-int sqlite3BtreeCreateTable(Btree *p, int *piTable, int flags){
+int sqlite3BtreeCreateTable(Btree *p, Pgno *piTable, int flags){
int rc;
sqlite3BtreeEnter(p);
rc = btreeCreateTable(p, piTable, flags);
@@ -9249,7 +10025,7 @@ static int clearDatabasePage(
BtShared *pBt, /* The BTree that contains the table */
Pgno pgno, /* Page number to clear */
int freePageFlag, /* Deallocate page if true */
- int *pnChange /* Add number of Cells freed to this counter */
+ i64 *pnChange /* Add number of Cells freed to this counter */
){
MemPage *pPage;
int rc;
@@ -9262,13 +10038,14 @@ static int clearDatabasePage(
if( pgno>btreePagecount(pBt) ){
return SQLITE_CORRUPT_BKPT;
}
- rc = getAndInitPage(pBt, pgno, &pPage, 0, 0);
+ rc = getAndInitPage(pBt, pgno, &pPage, 0);
if( rc ) return rc;
- if( pPage->bBusy ){
+ if( (pBt->openFlags & BTREE_SINGLE)==0
+ && sqlite3PagerPageRefcount(pPage->pDbPage) != (1 + (pgno==1))
+ ){
rc = SQLITE_CORRUPT_BKPT;
goto cleardatabasepage_out;
}
- pPage->bBusy = 1;
hdr = pPage->hdrOffset;
for(i=0; i<pPage->nCell; i++){
pCell = findCell(pPage, i);
@@ -9276,14 +10053,15 @@ static int clearDatabasePage(
rc = clearDatabasePage(pBt, get4byte(pCell), 1, pnChange);
if( rc ) goto cleardatabasepage_out;
}
- rc = clearCell(pPage, pCell, &info);
+ BTREE_CLEAR_CELL(rc, pPage, pCell, info);
if( rc ) goto cleardatabasepage_out;
}
if( !pPage->leaf ){
rc = clearDatabasePage(pBt, get4byte(&pPage->aData[hdr+8]), 1, pnChange);
if( rc ) goto cleardatabasepage_out;
- }else if( pnChange ){
- assert( pPage->intKey || CORRUPT_DB );
+ if( pPage->intKey ) pnChange = 0;
+ }
+ if( pnChange ){
testcase( !pPage->intKey );
*pnChange += pPage->nCell;
}
@@ -9294,7 +10072,6 @@ static int clearDatabasePage(
}
cleardatabasepage_out:
- pPage->bBusy = 0;
releasePage(pPage);
return rc;
}
@@ -9308,11 +10085,10 @@ cleardatabasepage_out:
** read cursors on the table. Open write cursors are moved to the
** root of the table.
**
-** If pnChange is not NULL, then table iTable must be an intkey table. The
-** integer value pointed to by pnChange is incremented by the number of
-** entries in the table.
+** If pnChange is not NULL, then the integer value pointed to by pnChange
+** is incremented by the number of entries in the table.
*/
-int sqlite3BtreeClearTable(Btree *p, int iTable, int *pnChange){
+int sqlite3BtreeClearTable(Btree *p, int iTable, i64 *pnChange){
int rc;
BtShared *pBt = p->pBt;
sqlite3BtreeEnter(p);
@@ -9324,7 +10100,9 @@ int sqlite3BtreeClearTable(Btree *p, int iTable, int *pnChange){
/* Invalidate all incrblob cursors open on table iTable (assuming iTable
** is the root of a table b-tree - if it is not, the following call is
** a no-op). */
- invalidateIncrblobCursors(p, (Pgno)iTable, 0, 1);
+ if( p->hasIncrblobCur ){
+ invalidateIncrblobCursors(p, (Pgno)iTable, 0, 1);
+ }
rc = clearDatabasePage(pBt, (Pgno)iTable, 0, pnChange);
}
sqlite3BtreeLeave(p);
@@ -9349,12 +10127,12 @@ int sqlite3BtreeClearTableOfCursor(BtCursor *pCur){
** cursors on the table.
**
** If AUTOVACUUM is enabled and the page at iTable is not the last
-** root page in the database file, then the last root page
+** root page in the database file, then the last root page
** in the database file is moved into the slot formerly occupied by
** iTable and that last slot formerly occupied by the last root page
** is added to the freelist instead of iTable. In this say, all
** root pages are kept at the beginning of the database file, which
-** is necessary for AUTOVACUUM to work right. *piMoved is set to the
+** is necessary for AUTOVACUUM to work right. *piMoved is set to the
** page number that used to be the last root page in the file before
** the move. If no page gets moved, *piMoved is set to 0.
** The last root page is recorded in meta[3] and the value of
@@ -9372,10 +10150,10 @@ static int btreeDropTable(Btree *p, Pgno iTable, int *piMoved){
return SQLITE_CORRUPT_BKPT;
}
- rc = btreeGetPage(pBt, (Pgno)iTable, &pPage, 0);
- if( rc ) return rc;
rc = sqlite3BtreeClearTable(p, iTable, 0);
- if( rc ){
+ if( rc ) return rc;
+ rc = btreeGetPage(pBt, (Pgno)iTable, &pPage, 0);
+ if( NEVER(rc) ){
releasePage(pPage);
return rc;
}
@@ -9392,7 +10170,7 @@ static int btreeDropTable(Btree *p, Pgno iTable, int *piMoved){
if( iTable==maxRootPgno ){
/* If the table being dropped is the table with the largest root-page
- ** number in the database, put the root page on the free list.
+ ** number in the database, put the root page on the free list.
*/
freePage(pPage, &rc);
releasePage(pPage);
@@ -9401,7 +10179,7 @@ static int btreeDropTable(Btree *p, Pgno iTable, int *piMoved){
}
}else{
/* The table being dropped does not have the largest root-page
- ** number in the database. So move the page that does into the
+ ** number in the database. So move the page that does into the
** gap left by the deleted root-page.
*/
MemPage *pMove;
@@ -9443,7 +10221,7 @@ static int btreeDropTable(Btree *p, Pgno iTable, int *piMoved){
releasePage(pPage);
}
#endif
- return rc;
+ return rc;
}
int sqlite3BtreeDropTable(Btree *p, int iTable, int *piMoved){
int rc;
@@ -9462,7 +10240,7 @@ int sqlite3BtreeDropTable(Btree *p, int iTable, int *piMoved){
** is the number of free pages currently in the database. Meta[1]
** through meta[15] are available for use by higher layers. Meta[0]
** is read-only, the others are read/write.
-**
+**
** The schema layer numbers meta values differently. At the schema
** layer (and the SetCookie and ReadCookie opcodes) the number of
** free pages is not visible. So Cookie[0] is the same as Meta[1].
@@ -9484,7 +10262,7 @@ void sqlite3BtreeGetMeta(Btree *p, int idx, u32 *pMeta){
assert( idx>=0 && idx<=15 );
if( idx==BTREE_DATA_VERSION ){
- *pMeta = sqlite3PagerDataVersion(pBt->pPager) + p->iDataVersion;
+ *pMeta = sqlite3PagerDataVersion(pBt->pPager) + p->iBDataVersion;
}else{
*pMeta = get4byte(&pBt->pPage1->aData[36 + idx*4]);
}
@@ -9532,7 +10310,7 @@ int sqlite3BtreeUpdateMeta(Btree *p, int idx, u32 iMeta){
** The first argument, pCur, is a cursor opened on some b-tree. Count the
** number of entries in the b-tree and write the result to *pnEntry.
**
-** SQLITE_OK is returned if the operation is successfully executed.
+** SQLITE_OK is returned if the operation is successfully executed.
** Otherwise, if an error is encountered (i.e. an IO error or database
** corruption) an SQLite error code is returned.
*/
@@ -9547,13 +10325,13 @@ int sqlite3BtreeCount(sqlite3 *db, BtCursor *pCur, i64 *pnEntry){
}
/* Unless an error occurs, the following loop runs one iteration for each
- ** page in the B-Tree structure (not including overflow pages).
+ ** page in the B-Tree structure (not including overflow pages).
*/
while( rc==SQLITE_OK && !AtomicLoad(&db->u1.isInterrupted) ){
int iIdx; /* Index of child node in parent */
MemPage *pPage; /* Current page of the b-tree */
- /* If this is a leaf page or the tree is not an int-key tree, then
+ /* If this is a leaf page or the tree is not an int-key tree, then
** this page contains countable entries. Increment the entry counter
** accordingly.
*/
@@ -9562,7 +10340,7 @@ int sqlite3BtreeCount(sqlite3 *db, BtCursor *pCur, i64 *pnEntry){
nEntry += pPage->nCell;
}
- /* pPage is a leaf node. This loop navigates the cursor so that it
+ /* pPage is a leaf node. This loop navigates the cursor so that it
** points to the first interior cell that it points to the parent of
** the next page in the tree that has not yet been visited. The
** pCur->aiIdx[pCur->iPage] value is set to the index of the parent cell
@@ -9586,7 +10364,7 @@ int sqlite3BtreeCount(sqlite3 *db, BtCursor *pCur, i64 *pnEntry){
pPage = pCur->pPage;
}
- /* Descend to the child node of the cell that the cursor currently
+ /* Descend to the child node of the cell that the cursor currently
** points at. This is the right-child if (iIdx==pPage->nCell).
*/
iIdx = pCur->ix;
@@ -9611,6 +10389,41 @@ Pager *sqlite3BtreePager(Btree *p){
#ifndef SQLITE_OMIT_INTEGRITY_CHECK
/*
+** Record an OOM error during integrity_check
+*/
+static void checkOom(IntegrityCk *pCheck){
+ pCheck->rc = SQLITE_NOMEM;
+ pCheck->mxErr = 0; /* Causes integrity_check processing to stop */
+ if( pCheck->nErr==0 ) pCheck->nErr++;
+}
+
+/*
+** Invoke the progress handler, if appropriate. Also check for an
+** interrupt.
+*/
+static void checkProgress(IntegrityCk *pCheck){
+ sqlite3 *db = pCheck->db;
+ if( AtomicLoad(&db->u1.isInterrupted) ){
+ pCheck->rc = SQLITE_INTERRUPT;
+ pCheck->nErr++;
+ pCheck->mxErr = 0;
+ }
+#ifndef SQLITE_OMIT_PROGRESS_CALLBACK
+ if( db->xProgress ){
+ assert( db->nProgressOps>0 );
+ pCheck->nStep++;
+ if( (pCheck->nStep % db->nProgressOps)==0
+ && db->xProgress(db->pProgressArg)
+ ){
+ pCheck->rc = SQLITE_INTERRUPT;
+ pCheck->nErr++;
+ pCheck->mxErr = 0;
+ }
+ }
+#endif
+}
+
+/*
** Append a message to the error message string.
*/
static void checkAppendMsg(
@@ -9619,6 +10432,7 @@ static void checkAppendMsg(
...
){
va_list ap;
+ checkProgress(pCheck);
if( !pCheck->mxErr ) return;
pCheck->mxErr--;
pCheck->nErr++;
@@ -9627,12 +10441,13 @@ static void checkAppendMsg(
sqlite3_str_append(&pCheck->errMsg, "\n", 1);
}
if( pCheck->zPfx ){
- sqlite3_str_appendf(&pCheck->errMsg, pCheck->zPfx, pCheck->v1, pCheck->v2);
+ sqlite3_str_appendf(&pCheck->errMsg, pCheck->zPfx,
+ pCheck->v0, pCheck->v1, pCheck->v2);
}
sqlite3_str_vappendf(&pCheck->errMsg, zFormat, ap);
va_end(ap);
if( pCheck->errMsg.accError==SQLITE_NOMEM ){
- pCheck->mallocFailed = 1;
+ checkOom(pCheck);
}
}
#endif /* SQLITE_OMIT_INTEGRITY_CHECK */
@@ -9644,7 +10459,8 @@ static void checkAppendMsg(
** corresponds to page iPg is already set.
*/
static int getPageReferenced(IntegrityCk *pCheck, Pgno iPg){
- assert( iPg<=pCheck->nPage && sizeof(pCheck->aPgRef[0])==1 );
+ assert( pCheck->aPgRef!=0 );
+ assert( iPg<=pCheck->nCkPage && sizeof(pCheck->aPgRef[0])==1 );
return (pCheck->aPgRef[iPg/8] & (1 << (iPg & 0x07)));
}
@@ -9652,7 +10468,8 @@ static int getPageReferenced(IntegrityCk *pCheck, Pgno iPg){
** Set the bit in the IntegrityCk.aPgRef[] array that corresponds to page iPg.
*/
static void setPageReferenced(IntegrityCk *pCheck, Pgno iPg){
- assert( iPg<=pCheck->nPage && sizeof(pCheck->aPgRef[0])==1 );
+ assert( pCheck->aPgRef!=0 );
+ assert( iPg<=pCheck->nCkPage && sizeof(pCheck->aPgRef[0])==1 );
pCheck->aPgRef[iPg/8] |= (1 << (iPg & 0x07));
}
@@ -9666,22 +10483,21 @@ static void setPageReferenced(IntegrityCk *pCheck, Pgno iPg){
** Also check that the page number is in bounds.
*/
static int checkRef(IntegrityCk *pCheck, Pgno iPage){
- if( iPage>pCheck->nPage || iPage==0 ){
- checkAppendMsg(pCheck, "invalid page number %d", iPage);
+ if( iPage>pCheck->nCkPage || iPage==0 ){
+ checkAppendMsg(pCheck, "invalid page number %u", iPage);
return 1;
}
if( getPageReferenced(pCheck, iPage) ){
- checkAppendMsg(pCheck, "2nd reference to page %d", iPage);
+ checkAppendMsg(pCheck, "2nd reference to page %u", iPage);
return 1;
}
- if( AtomicLoad(&pCheck->db->u1.isInterrupted) ) return 1;
setPageReferenced(pCheck, iPage);
return 0;
}
#ifndef SQLITE_OMIT_AUTOVACUUM
/*
-** Check that the entry in the pointer-map for page iChild maps to
+** Check that the entry in the pointer-map for page iChild maps to
** page iParent, pointer type ptrType. If not, append an error message
** to pCheck.
*/
@@ -9697,14 +10513,14 @@ static void checkPtrmap(
rc = ptrmapGet(pCheck->pBt, iChild, &ePtrmapType, &iPtrmapParent);
if( rc!=SQLITE_OK ){
- if( rc==SQLITE_NOMEM || rc==SQLITE_IOERR_NOMEM ) pCheck->mallocFailed = 1;
- checkAppendMsg(pCheck, "Failed to read ptrmap key=%d", iChild);
+ if( rc==SQLITE_NOMEM || rc==SQLITE_IOERR_NOMEM ) checkOom(pCheck);
+ checkAppendMsg(pCheck, "Failed to read ptrmap key=%u", iChild);
return;
}
if( ePtrmapType!=eType || iPtrmapParent!=iParent ){
checkAppendMsg(pCheck,
- "Bad ptr map entry key=%d expected=(%d,%d) got=(%d,%d)",
+ "Bad ptr map entry key=%u expected=(%u,%u) got=(%u,%u)",
iChild, eType, iParent, ePtrmapType, iPtrmapParent);
}
}
@@ -9717,7 +10533,7 @@ static void checkPtrmap(
static void checkList(
IntegrityCk *pCheck, /* Integrity checking context */
int isFreeList, /* True for a freelist. False for overflow page list */
- int iPage, /* Page number for first page in the list */
+ Pgno iPage, /* Page number for first page in the list */
u32 N /* Expected number of pages in the list */
){
int i;
@@ -9729,7 +10545,7 @@ static void checkList(
if( checkRef(pCheck, iPage) ) break;
N--;
if( sqlite3PagerGet(pCheck->pPager, (Pgno)iPage, &pOvflPage, 0) ){
- checkAppendMsg(pCheck, "failed to get page %d", iPage);
+ checkAppendMsg(pCheck, "failed to get page %u", iPage);
break;
}
pOvflData = (unsigned char *)sqlite3PagerGetData(pOvflPage);
@@ -9742,7 +10558,7 @@ static void checkList(
#endif
if( n>pCheck->pBt->usableSize/4-2 ){
checkAppendMsg(pCheck,
- "freelist leaf count too big on page %d", iPage);
+ "freelist leaf count too big on page %u", iPage);
N--;
}else{
for(i=0; i<(int)n; i++){
@@ -9774,7 +10590,7 @@ static void checkList(
}
if( N && nErrAtStart==pCheck->nErr ){
checkAppendMsg(pCheck,
- "%s is %d but should be %d",
+ "%s is %u but should be %u",
isFreeList ? "size" : "overflow list length",
expected-N, expected);
}
@@ -9799,12 +10615,14 @@ static void checkList(
** property.
**
** This heap is used for cell overlap and coverage testing. Each u32
-** entry represents the span of a cell or freeblock on a btree page.
+** entry represents the span of a cell or freeblock on a btree page.
** The upper 16 bits are the index of the first byte of a range and the
** lower 16 bits are the index of the last byte of that range.
*/
static void btreeHeapInsert(u32 *aHeap, u32 x){
- u32 j, i = ++aHeap[0];
+ u32 j, i;
+ assert( aHeap!=0 );
+ i = ++aHeap[0];
aHeap[i] = x;
while( (j = i/2)>0 && aHeap[j]>aHeap[i] ){
x = aHeap[j];
@@ -9829,7 +10647,7 @@ static int btreeHeapPull(u32 *aHeap, u32 *pOut){
aHeap[j] = x;
i = j;
}
- return 1;
+ return 1;
}
#ifndef SQLITE_OMIT_INTEGRITY_CHECK
@@ -9837,7 +10655,7 @@ static int btreeHeapPull(u32 *aHeap, u32 *pOut){
** Do various sanity checks on a single page of a tree. Return
** the tree depth. Root pages return 0. Parents of root pages
** return 1, and so forth.
-**
+**
** These checks are done:
**
** 1. Make sure that cells and freeblocks do not overlap
@@ -9849,7 +10667,7 @@ static int btreeHeapPull(u32 *aHeap, u32 *pOut){
*/
static int checkTreePage(
IntegrityCk *pCheck, /* Context for the sanity check */
- int iPage, /* Page number of the page to check */
+ Pgno iPage, /* Page number of the page to check */
i64 *piMinKey, /* Write minimum integer primary key here */
i64 maxKey /* Error if integer primary key greater than this */
){
@@ -9881,15 +10699,18 @@ static int checkTreePage(
/* Check that the page exists
*/
+ checkProgress(pCheck);
+ if( pCheck->mxErr==0 ) goto end_of_check;
pBt = pCheck->pBt;
usableSize = pBt->usableSize;
if( iPage==0 ) return 0;
if( checkRef(pCheck, iPage) ) return 0;
- pCheck->zPfx = "Page %d: ";
+ pCheck->zPfx = "Tree %u page %u: ";
pCheck->v1 = iPage;
- if( (rc = btreeGetPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){
+ if( (rc = btreeGetPage(pBt, iPage, &pPage, 0))!=0 ){
checkAppendMsg(pCheck,
"unable to get the page. error code=%d", rc);
+ if( rc==SQLITE_IOERR_NOMEM ) pCheck->rc = SQLITE_NOMEM;
goto end_of_check;
}
@@ -9912,7 +10733,7 @@ static int checkTreePage(
hdr = pPage->hdrOffset;
/* Set up for cell analysis */
- pCheck->zPfx = "On tree page %d cell %d: ";
+ pCheck->zPfx = "Tree %u page %u cell %u: ";
contentOffset = get2byteNotZero(&data[hdr+5]);
assert( contentOffset<=usableSize ); /* Enforced by btreeInitPage() */
@@ -9932,7 +10753,7 @@ static int checkTreePage(
pgno = get4byte(&data[hdr+8]);
#ifndef SQLITE_OMIT_AUTOVACUUM
if( pBt->autoVacuum ){
- pCheck->zPfx = "On page %d at right child: ";
+ pCheck->zPfx = "Tree %u page %u right child: ";
checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage);
}
#endif
@@ -9956,7 +10777,7 @@ static int checkTreePage(
pc = get2byteAligned(pCellIdx);
pCellIdx -= 2;
if( pc<contentOffset || pc>usableSize-4 ){
- checkAppendMsg(pCheck, "Offset %d out of range %d..%d",
+ checkAppendMsg(pCheck, "Offset %u out of range %u..%u",
pc, contentOffset, usableSize-4);
doCoverageCheck = 0;
continue;
@@ -10035,7 +10856,7 @@ static int checkTreePage(
**
** EVIDENCE-OF: R-20690-50594 The second field of the b-tree page header
** is the offset of the first freeblock, or zero if there are no
- ** freeblocks on the page.
+ ** freeblocks on the page.
*/
i = get2byte(&data[hdr+1]);
while( i>0 ){
@@ -10055,13 +10876,13 @@ static int checkTreePage(
assert( (u32)j<=usableSize-4 ); /* Enforced by btreeComputeFreeSpace() */
i = j;
}
- /* Analyze the min-heap looking for overlap between cells and/or
+ /* Analyze the min-heap looking for overlap between cells and/or
** freeblocks, and counting the number of untracked bytes in nFrag.
- **
+ **
** Each min-heap entry is of the form: (start_address<<16)|end_address.
** There is an implied first entry the covers the page header, the cell
** pointer index, and the gap between the cell pointer index and the start
- ** of cell content.
+ ** of cell content.
**
** The loop below pulls entries from the min-heap in order and compares
** the start_address against the previous end_address. If there is an
@@ -10073,7 +10894,7 @@ static int checkTreePage(
while( btreeHeapPull(heap,&x) ){
if( (prev&0xffff)>=(x>>16) ){
checkAppendMsg(pCheck,
- "Multiple uses for byte %u of page %d", x>>16, iPage);
+ "Multiple uses for byte %u of page %u", x>>16, iPage);
break;
}else{
nFrag += (x>>16) - (prev&0xffff) - 1;
@@ -10088,7 +10909,7 @@ static int checkTreePage(
*/
if( heap[0]==0 && nFrag!=data[hdr+7] ){
checkAppendMsg(pCheck,
- "Fragmentation of %d bytes reported as %d on page %d",
+ "Fragmentation of %u bytes reported as %u on page %u",
nFrag, data[hdr+7], iPage);
}
}
@@ -10116,83 +10937,101 @@ end_of_check:
** allocation errors, an error message held in memory obtained from
** malloc is returned if *pnErr is non-zero. If *pnErr==0 then NULL is
** returned. If a memory allocation error occurs, NULL is returned.
+**
+** If the first entry in aRoot[] is 0, that indicates that the list of
+** root pages is incomplete. This is a "partial integrity-check". This
+** happens when performing an integrity check on a single table. The
+** zero is skipped, of course. But in addition, the freelist checks
+** and the checks to make sure every page is referenced are also skipped,
+** since obviously it is not possible to know which pages are covered by
+** the unverified btrees. Except, if aRoot[1] is 1, then the freelist
+** checks are still performed.
*/
-char *sqlite3BtreeIntegrityCheck(
+int sqlite3BtreeIntegrityCheck(
sqlite3 *db, /* Database connection that is running the check */
Btree *p, /* The btree to be checked */
- int *aRoot, /* An array of root pages numbers for individual trees */
+ Pgno *aRoot, /* An array of root pages numbers for individual trees */
int nRoot, /* Number of entries in aRoot[] */
int mxErr, /* Stop reporting errors after this many */
- int *pnErr /* Write number of errors seen to this variable */
+ int *pnErr, /* OUT: Write number of errors seen to this variable */
+ char **pzOut /* OUT: Write the error message string here */
){
Pgno i;
IntegrityCk sCheck;
BtShared *pBt = p->pBt;
u64 savedDbFlags = pBt->db->flags;
char zErr[100];
+ int bPartial = 0; /* True if not checking all btrees */
+ int bCkFreelist = 1; /* True to scan the freelist */
VVA_ONLY( int nRef );
+ assert( nRoot>0 );
+
+ /* aRoot[0]==0 means this is a partial check */
+ if( aRoot[0]==0 ){
+ assert( nRoot>1 );
+ bPartial = 1;
+ if( aRoot[1]!=1 ) bCkFreelist = 0;
+ }
sqlite3BtreeEnter(p);
assert( p->inTrans>TRANS_NONE && pBt->inTransaction>TRANS_NONE );
VVA_ONLY( nRef = sqlite3PagerRefcount(pBt->pPager) );
assert( nRef>=0 );
+ memset(&sCheck, 0, sizeof(sCheck));
sCheck.db = db;
sCheck.pBt = pBt;
sCheck.pPager = pBt->pPager;
- sCheck.nPage = btreePagecount(sCheck.pBt);
+ sCheck.nCkPage = btreePagecount(sCheck.pBt);
sCheck.mxErr = mxErr;
- sCheck.nErr = 0;
- sCheck.mallocFailed = 0;
- sCheck.zPfx = 0;
- sCheck.v1 = 0;
- sCheck.v2 = 0;
- sCheck.aPgRef = 0;
- sCheck.heap = 0;
sqlite3StrAccumInit(&sCheck.errMsg, 0, zErr, sizeof(zErr), SQLITE_MAX_LENGTH);
sCheck.errMsg.printfFlags = SQLITE_PRINTF_INTERNAL;
- if( sCheck.nPage==0 ){
+ if( sCheck.nCkPage==0 ){
goto integrity_ck_cleanup;
}
- sCheck.aPgRef = sqlite3MallocZero((sCheck.nPage / 8)+ 1);
+ sCheck.aPgRef = sqlite3MallocZero((sCheck.nCkPage / 8)+ 1);
if( !sCheck.aPgRef ){
- sCheck.mallocFailed = 1;
+ checkOom(&sCheck);
goto integrity_ck_cleanup;
}
sCheck.heap = (u32*)sqlite3PageMalloc( pBt->pageSize );
if( sCheck.heap==0 ){
- sCheck.mallocFailed = 1;
+ checkOom(&sCheck);
goto integrity_ck_cleanup;
}
i = PENDING_BYTE_PAGE(pBt);
- if( i<=sCheck.nPage ) setPageReferenced(&sCheck, i);
+ if( i<=sCheck.nCkPage ) setPageReferenced(&sCheck, i);
/* Check the integrity of the freelist
*/
- sCheck.zPfx = "Main freelist: ";
- checkList(&sCheck, 1, get4byte(&pBt->pPage1->aData[32]),
- get4byte(&pBt->pPage1->aData[36]));
- sCheck.zPfx = 0;
+ if( bCkFreelist ){
+ sCheck.zPfx = "Freelist: ";
+ checkList(&sCheck, 1, get4byte(&pBt->pPage1->aData[32]),
+ get4byte(&pBt->pPage1->aData[36]));
+ sCheck.zPfx = 0;
+ }
/* Check all the tables.
*/
#ifndef SQLITE_OMIT_AUTOVACUUM
- if( pBt->autoVacuum ){
- int mx = 0;
- int mxInHdr;
- for(i=0; (int)i<nRoot; i++) if( mx<aRoot[i] ) mx = aRoot[i];
- mxInHdr = get4byte(&pBt->pPage1->aData[52]);
- if( mx!=mxInHdr ){
+ if( !bPartial ){
+ if( pBt->autoVacuum ){
+ Pgno mx = 0;
+ Pgno mxInHdr;
+ for(i=0; (int)i<nRoot; i++) if( mx<aRoot[i] ) mx = aRoot[i];
+ mxInHdr = get4byte(&pBt->pPage1->aData[52]);
+ if( mx!=mxInHdr ){
+ checkAppendMsg(&sCheck,
+ "max rootpage (%u) disagrees with header (%u)",
+ mx, mxInHdr
+ );
+ }
+ }else if( get4byte(&pBt->pPage1->aData[64])!=0 ){
checkAppendMsg(&sCheck,
- "max rootpage (%d) disagrees with header (%d)",
- mx, mxInHdr
+ "incremental_vacuum enabled with a max rootpage of zero"
);
}
- }else if( get4byte(&pBt->pPage1->aData[64])!=0 ){
- checkAppendMsg(&sCheck,
- "incremental_vacuum enabled with a max rootpage of zero"
- );
}
#endif
testcase( pBt->db->flags & SQLITE_CellSizeCk );
@@ -10201,34 +11040,37 @@ char *sqlite3BtreeIntegrityCheck(
i64 notUsed;
if( aRoot[i]==0 ) continue;
#ifndef SQLITE_OMIT_AUTOVACUUM
- if( pBt->autoVacuum && aRoot[i]>1 ){
+ if( pBt->autoVacuum && aRoot[i]>1 && !bPartial ){
checkPtrmap(&sCheck, aRoot[i], PTRMAP_ROOTPAGE, 0);
}
#endif
+ sCheck.v0 = aRoot[i];
checkTreePage(&sCheck, aRoot[i], &notUsed, LARGEST_INT64);
}
pBt->db->flags = savedDbFlags;
/* Make sure every page in the file is referenced
*/
- for(i=1; i<=sCheck.nPage && sCheck.mxErr; i++){
+ if( !bPartial ){
+ for(i=1; i<=sCheck.nCkPage && sCheck.mxErr; i++){
#ifdef SQLITE_OMIT_AUTOVACUUM
- if( getPageReferenced(&sCheck, i)==0 ){
- checkAppendMsg(&sCheck, "Page %d is never used", i);
- }
+ if( getPageReferenced(&sCheck, i)==0 ){
+ checkAppendMsg(&sCheck, "Page %u: never used", i);
+ }
#else
- /* If the database supports auto-vacuum, make sure no tables contain
- ** references to pointer-map pages.
- */
- if( getPageReferenced(&sCheck, i)==0 &&
- (PTRMAP_PAGENO(pBt, i)!=i || !pBt->autoVacuum) ){
- checkAppendMsg(&sCheck, "Page %d is never used", i);
- }
- if( getPageReferenced(&sCheck, i)!=0 &&
- (PTRMAP_PAGENO(pBt, i)==i && pBt->autoVacuum) ){
- checkAppendMsg(&sCheck, "Pointer map page %d is referenced", i);
- }
+ /* If the database supports auto-vacuum, make sure no tables contain
+ ** references to pointer-map pages.
+ */
+ if( getPageReferenced(&sCheck, i)==0 &&
+ (PTRMAP_PAGENO(pBt, i)!=i || !pBt->autoVacuum) ){
+ checkAppendMsg(&sCheck, "Page %u: never used", i);
+ }
+ if( getPageReferenced(&sCheck, i)!=0 &&
+ (PTRMAP_PAGENO(pBt, i)==i && pBt->autoVacuum) ){
+ checkAppendMsg(&sCheck, "Page %u: pointer map referenced", i);
+ }
#endif
+ }
}
/* Clean up and report errors.
@@ -10236,16 +11078,17 @@ char *sqlite3BtreeIntegrityCheck(
integrity_ck_cleanup:
sqlite3PageFree(sCheck.heap);
sqlite3_free(sCheck.aPgRef);
- if( sCheck.mallocFailed ){
+ *pnErr = sCheck.nErr;
+ if( sCheck.nErr==0 ){
sqlite3_str_reset(&sCheck.errMsg);
- sCheck.nErr++;
+ *pzOut = 0;
+ }else{
+ *pzOut = sqlite3StrAccumFinish(&sCheck.errMsg);
}
- *pnErr = sCheck.nErr;
- if( sCheck.nErr==0 ) sqlite3_str_reset(&sCheck.errMsg);
/* Make sure this analysis did not leave any unref() pages. */
assert( nRef==sqlite3PagerRefcount(pBt->pPager) );
sqlite3BtreeLeave(p);
- return sqlite3StrAccumFinish(&sCheck.errMsg);
+ return sCheck.rc;
}
#endif /* SQLITE_OMIT_INTEGRITY_CHECK */
@@ -10275,18 +11118,19 @@ const char *sqlite3BtreeGetJournalname(Btree *p){
}
/*
-** Return non-zero if a transaction is active.
+** Return one of SQLITE_TXN_NONE, SQLITE_TXN_READ, or SQLITE_TXN_WRITE
+** to describe the current transaction state of Btree p.
*/
-int sqlite3BtreeIsInTrans(Btree *p){
+int sqlite3BtreeTxnState(Btree *p){
assert( p==0 || sqlite3_mutex_held(p->db->mutex) );
- return (p && (p->inTrans==TRANS_WRITE));
+ return p ? p->inTrans : 0;
}
#ifndef SQLITE_OMIT_WAL
/*
** Run a checkpoint on the Btree passed as the first argument.
**
-** Return SQLITE_LOCKED if this or any other connection has an open
+** Return SQLITE_LOCKED if this or any other connection has an open
** transaction on the shared-cache the argument Btree is connected to.
**
** Parameter eMode is one of SQLITE_CHECKPOINT_PASSIVE, FULL or RESTART.
@@ -10308,14 +11152,8 @@ int sqlite3BtreeCheckpoint(Btree *p, int eMode, int *pnLog, int *pnCkpt){
#endif
/*
-** Return non-zero if a read (or write) transaction is active.
+** Return true if there is currently a backup running on Btree p.
*/
-int sqlite3BtreeIsInReadTrans(Btree *p){
- assert( p );
- assert( sqlite3_mutex_held(p->db->mutex) );
- return p->inTrans!=TRANS_NONE;
-}
-
int sqlite3BtreeIsInBackup(Btree *p){
assert( p );
assert( sqlite3_mutex_held(p->db->mutex) );
@@ -10325,20 +11163,20 @@ int sqlite3BtreeIsInBackup(Btree *p){
/*
** This function returns a pointer to a blob of memory associated with
** a single shared-btree. The memory is used by client code for its own
-** purposes (for example, to store a high-level schema associated with
+** purposes (for example, to store a high-level schema associated with
** the shared-btree). The btree layer manages reference counting issues.
**
** The first time this is called on a shared-btree, nBytes bytes of memory
-** are allocated, zeroed, and returned to the caller. For each subsequent
+** are allocated, zeroed, and returned to the caller. For each subsequent
** call the nBytes parameter is ignored and a pointer to the same blob
-** of memory returned.
+** of memory returned.
**
** If the nBytes parameter is 0 and the blob of memory has not yet been
** allocated, a null pointer is returned. If the blob has already been
** allocated, it is returned as normal.
**
-** Just before the shared-btree is closed, the function passed as the
-** xFree argument when the memory allocation was made is invoked on the
+** Just before the shared-btree is closed, the function passed as the
+** xFree argument when the memory allocation was made is invoked on the
** blob of allocated memory. The xFree function should not call sqlite3_free()
** on the memory, the btree layer does that.
*/
@@ -10354,8 +11192,8 @@ void *sqlite3BtreeSchema(Btree *p, int nBytes, void(*xFree)(void *)){
}
/*
-** Return SQLITE_LOCKED_SHAREDCACHE if another user of the same shared
-** btree as the argument handle holds an exclusive lock on the
+** Return SQLITE_LOCKED_SHAREDCACHE if another user of the same shared
+** btree as the argument handle holds an exclusive lock on the
** sqlite_schema table. Otherwise SQLITE_OK.
*/
int sqlite3BtreeSchemaLocked(Btree *p){
@@ -10396,11 +11234,11 @@ int sqlite3BtreeLockTable(Btree *p, int iTab, u8 isWriteLock){
#ifndef SQLITE_OMIT_INCRBLOB
/*
-** Argument pCsr must be a cursor opened for writing on an
-** INTKEY table currently pointing at a valid table entry.
+** Argument pCsr must be a cursor opened for writing on an
+** INTKEY table currently pointing at a valid table entry.
** This function modifies the data stored as part of that entry.
**
-** Only the data content may only be modified, it is not possible to
+** Only the data content may only be modified, it is not possible to
** change the length of the data stored. If this function is called with
** parameters that attempt to write past the end of the existing data,
** no modifications are made and SQLITE_CORRUPT is returned.
@@ -10431,7 +11269,7 @@ int sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void *z){
VVA_ONLY(rc =) saveAllCursors(pCsr->pBt, pCsr->pgnoRoot, pCsr);
assert( rc==SQLITE_OK );
- /* Check some assumptions:
+ /* Check some assumptions:
** (a) the cursor is open for writing,
** (b) there is a read/write transaction open,
** (c) the connection holds a write-lock on the table (if required),
@@ -10450,7 +11288,7 @@ int sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void *z){
return accessPayload(pCsr, offset, amt, (unsigned char *)z, 1);
}
-/*
+/*
** Mark this cursor as an incremental blob cursor.
*/
void sqlite3BtreeIncrblobCursor(BtCursor *pCur){
@@ -10460,14 +11298,14 @@ void sqlite3BtreeIncrblobCursor(BtCursor *pCur){
#endif
/*
-** Set both the "read version" (single byte at byte offset 18) and
+** Set both the "read version" (single byte at byte offset 18) and
** "write version" (single byte at byte offset 19) fields in the database
** header to iVersion.
*/
int sqlite3BtreeSetVersion(Btree *pBtree, int iVersion){
BtShared *pBt = pBtree->pBt;
int rc; /* Return code */
-
+
assert( iVersion==1 || iVersion==2 );
/* If setting the version fields to 1, do not automatically open the
@@ -10515,6 +11353,17 @@ int sqlite3BtreeIsReadonly(Btree *p){
*/
int sqlite3HeaderSizeBtree(void){ return ROUND8(sizeof(MemPage)); }
+/*
+** If no transaction is active and the database is not a temp-db, clear
+** the in-memory pager cache.
+*/
+void sqlite3BtreeClearCache(Btree *p){
+ BtShared *pBt = p->pBt;
+ if( pBt->inTransaction==TRANS_NONE ){
+ sqlite3PagerClearCache(pBt->pPager);
+ }
+}
+
#if !defined(SQLITE_OMIT_SHARED_CACHE)
/*
** Return true if the Btree passed as the only argument is sharable.
@@ -10525,7 +11374,7 @@ int sqlite3BtreeSharable(Btree *p){
/*
** Return the number of connections to the BtShared object accessed by
-** the Btree handle passed as the only argument. For private caches
+** the Btree handle passed as the only argument. For private caches
** this is always 1. For shared caches it may be 1 or greater.
*/
int sqlite3BtreeConnectionCount(Btree *p){