diff options
Diffstat (limited to 'src/btree.c')
-rw-r--r-- | src/btree.c | 145 |
1 files changed, 109 insertions, 36 deletions
diff --git a/src/btree.c b/src/btree.c index 1d82a2b62..c23cdb947 100644 --- a/src/btree.c +++ b/src/btree.c @@ -600,10 +600,15 @@ static void btreeReleaseAllCursorPages(BtCursor *pCur){ static int saveCursorPosition(BtCursor *pCur){ int rc; - assert( CURSOR_VALID==pCur->eState ); + assert( CURSOR_VALID==pCur->eState || CURSOR_SKIPNEXT==pCur->eState ); assert( 0==pCur->pKey ); assert( cursorHoldsMutex(pCur) ); + if( pCur->eState==CURSOR_SKIPNEXT ){ + pCur->eState = CURSOR_VALID; + }else{ + pCur->skipNext = 0; + } rc = sqlite3BtreeKeySize(pCur, &pCur->nKey); assert( rc==SQLITE_OK ); /* KeySize() cannot fail */ @@ -674,7 +679,7 @@ static int SQLITE_NOINLINE saveCursorsOnList( ){ do{ if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) ){ - if( p->eState==CURSOR_VALID ){ + if( p->eState==CURSOR_VALID || p->eState==CURSOR_SKIPNEXT ){ int rc = saveCursorPosition(p); if( SQLITE_OK!=rc ){ return rc; @@ -746,17 +751,19 @@ static int btreeMoveto( */ static int btreeRestoreCursorPosition(BtCursor *pCur){ int rc; + int skipNext; assert( cursorHoldsMutex(pCur) ); assert( pCur->eState>=CURSOR_REQUIRESEEK ); if( pCur->eState==CURSOR_FAULT ){ return pCur->skipNext; } pCur->eState = CURSOR_INVALID; - rc = btreeMoveto(pCur, pCur->pKey, pCur->nKey, 0, &pCur->skipNext); + rc = btreeMoveto(pCur, pCur->pKey, pCur->nKey, 0, &skipNext); if( rc==SQLITE_OK ){ sqlite3_free(pCur->pKey); pCur->pKey = 0; assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_INVALID ); + pCur->skipNext |= skipNext; if( pCur->skipNext && pCur->eState==CURSOR_VALID ){ pCur->eState = CURSOR_SKIPNEXT; } @@ -808,9 +815,10 @@ int sqlite3BtreeCursorRestore(BtCursor *pCur, int *pDifferentRow){ *pDifferentRow = 1; return rc; } - if( pCur->eState!=CURSOR_VALID || NEVER(pCur->skipNext!=0) ){ + if( pCur->eState!=CURSOR_VALID ){ *pDifferentRow = 1; }else{ + assert( pCur->skipNext==0 ); *pDifferentRow = 0; } return SQLITE_OK; @@ -1951,16 +1959,18 @@ int sqlite3BtreeOpen( */ if( isTempDb==0 && (isMemdb==0 || (vfsFlags&SQLITE_OPEN_URI)!=0) ){ if( vfsFlags & SQLITE_OPEN_SHAREDCACHE ){ + int nFilename = sqlite3Strlen30(zFilename)+1; int nFullPathname = pVfs->mxPathname+1; - char *zFullPathname = sqlite3Malloc(nFullPathname); + char *zFullPathname = sqlite3Malloc(MAX(nFullPathname,nFilename)); MUTEX_LOGIC( sqlite3_mutex *mutexShared; ) + p->sharable = 1; if( !zFullPathname ){ sqlite3_free(p); return SQLITE_NOMEM; } if( isMemdb ){ - memcpy(zFullPathname, zFilename, sqlite3Strlen30(zFilename)+1); + memcpy(zFullPathname, zFilename, nFilename); }else{ rc = sqlite3OsFullPathname(pVfs, zFilename, nFullPathname, zFullPathname); @@ -2017,8 +2027,8 @@ int sqlite3BtreeOpen( ** the right size. This is to guard against size changes that result ** when compiling on a different architecture. */ - assert( sizeof(i64)==8 || sizeof(i64)==4 ); - assert( sizeof(u64)==8 || sizeof(u64)==4 ); + assert( sizeof(i64)==8 ); + assert( sizeof(u64)==8 ); assert( sizeof(u32)==4 ); assert( sizeof(u16)==2 ); assert( sizeof(Pgno)==4 ); @@ -3625,7 +3635,7 @@ int sqlite3BtreeTripAllCursors(Btree *pBtree, int errCode, int writeOnly){ for(p=pBtree->pBt->pCursor; p; p=p->pNext){ int i; if( writeOnly && (p->curFlags & BTCF_WriteFlag)==0 ){ - if( p->eState==CURSOR_VALID ){ + if( p->eState==CURSOR_VALID || p->eState==CURSOR_SKIPNEXT ){ rc = saveCursorPosition(p); if( rc!=SQLITE_OK ){ (void)sqlite3BtreeTripAllCursors(pBtree, rc, 0); @@ -4031,6 +4041,8 @@ int sqlite3BtreeKeySize(BtCursor *pCur, i64 *pSize){ int sqlite3BtreeDataSize(BtCursor *pCur, u32 *pSize){ assert( cursorHoldsMutex(pCur) ); assert( pCur->eState==CURSOR_VALID ); + assert( pCur->iPage>=0 ); + assert( pCur->iPage<BTCURSOR_MAX_DEPTH ); assert( pCur->apPage[pCur->iPage]->intKeyLeaf==1 ); getCellInfo(pCur); *pSize = pCur->info.nPayload; @@ -4439,13 +4451,18 @@ static const void *fetchPayload( BtCursor *pCur, /* Cursor pointing to entry to read from */ u32 *pAmt /* Write the number of available bytes here */ ){ + u32 amt; assert( pCur!=0 && pCur->iPage>=0 && pCur->apPage[pCur->iPage]); assert( pCur->eState==CURSOR_VALID ); assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); assert( cursorHoldsMutex(pCur) ); assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell ); assert( pCur->info.nSize>0 ); - *pAmt = pCur->info.nLocal; + assert( pCur->info.pPayload>pCur->apPage[pCur->iPage]->aData || CORRUPT_DB ); + assert( pCur->info.pPayload<pCur->apPage[pCur->iPage]->aDataEnd ||CORRUPT_DB); + amt = (int)(pCur->apPage[pCur->iPage]->aDataEnd - pCur->info.pPayload); + if( pCur->info.nLocal<amt ) amt = pCur->info.nLocal; + *pAmt = amt; return (void*)pCur->info.pPayload; } @@ -4509,7 +4526,7 @@ static int moveToChild(BtCursor *pCur, u32 newPgno){ return SQLITE_OK; } -#if 0 +#if SQLITE_DEBUG /* ** Page pParent is an internal (non-leaf) tree page. This function ** asserts that page number iChild is the left-child if the iIdx'th @@ -4518,6 +4535,8 @@ static int moveToChild(BtCursor *pCur, u32 newPgno){ ** the page. */ static void assertParentIndex(MemPage *pParent, int iIdx, Pgno iChild){ + if( CORRUPT_DB ) return; /* The conditions tested below might not be true + ** in a corrupt database */ assert( iIdx<=pParent->nCell ); if( iIdx==pParent->nCell ){ assert( get4byte(&pParent->aData[pParent->hdrOffset+8])==iChild ); @@ -4542,19 +4561,11 @@ static void moveToParent(BtCursor *pCur){ assert( pCur->eState==CURSOR_VALID ); assert( pCur->iPage>0 ); assert( pCur->apPage[pCur->iPage] ); - - /* UPDATE: It is actually possible for the condition tested by the assert - ** below to be untrue if the database file is corrupt. This can occur if - ** one cursor has modified page pParent while a reference to it is held - ** by a second cursor. Which can only happen if a single page is linked - ** into more than one b-tree structure in a corrupt database. */ -#if 0 assertParentIndex( pCur->apPage[pCur->iPage-1], pCur->aiIdx[pCur->iPage-1], pCur->apPage[pCur->iPage]->pgno ); -#endif testcase( pCur->aiIdx[pCur->iPage-1] > pCur->apPage[pCur->iPage-1]->nCell ); releasePage(pCur->apPage[pCur->iPage]); @@ -6729,7 +6740,6 @@ static int balance_nonroot( }else if( iParentIdx==i ){ nxDiv = i-2+bBulk; }else{ - assert( bBulk==0 ); nxDiv = iParentIdx-1; } i = 2-bBulk; @@ -7502,6 +7512,7 @@ static int balance(BtCursor *pCur){ /* The next iteration of the do-loop balances the parent page. */ releasePage(pPage); pCur->iPage--; + assert( pCur->iPage>=0 ); } }while( rc==SQLITE_OK ); @@ -7978,9 +7989,13 @@ static int clearDatabasePage( if( pgno>btreePagecount(pBt) ){ return SQLITE_CORRUPT_BKPT; } - rc = getAndInitPage(pBt, pgno, &pPage, 0); if( rc ) return rc; + if( pPage->bBusy ){ + rc = SQLITE_CORRUPT_BKPT; + goto cleardatabasepage_out; + } + pPage->bBusy = 1; hdr = pPage->hdrOffset; for(i=0; i<pPage->nCell; i++){ pCell = findCell(pPage, i); @@ -8005,6 +8020,7 @@ static int clearDatabasePage( } cleardatabasepage_out: + pPage->bBusy = 0; releasePage(pPage); return rc; } @@ -8512,6 +8528,57 @@ static void checkList( } #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ +/* +** An implementation of a min-heap. +** +** aHeap[0] is the number of elements on the heap. aHeap[1] is the +** root element. The daughter nodes of aHeap[N] are aHeap[N*2] +** and aHeap[N*2+1]. +** +** The heap property is this: Every node is less than or equal to both +** of its daughter nodes. A consequence of the heap property is that the +** root node aHeap[1] is always the minimum value current in the heap. +** +** The btreeHeapInsert() routine inserts an unsigned 32-bit number onto +** the heap, preserving the heap property. The btreeHeapPull() routine +** removes the root element from the heap (the minimum value in the heap) +** and then move other nodes around as necessary to preserve the heap +** property. +** +** This heap is used for cell overlap and coverage testing. Each u32 +** entry represents the span of a cell or freeblock on a btree page. +** The upper 16 bits are the index of the first byte of a range and the +** lower 16 bits are the index of the last byte of that range. +*/ +static void btreeHeapInsert(u32 *aHeap, u32 x){ + u32 j, i = ++aHeap[0]; + aHeap[i] = x; + while( (j = i/2)>0 && aHeap[j]>aHeap[i] ){ + x = aHeap[j]; + aHeap[j] = aHeap[i]; + aHeap[i] = x; + i = j; + } +} +static int btreeHeapPull(u32 *aHeap, u32 *pOut){ + u32 j, i, x; + if( (x = aHeap[0])==0 ) return 0; + *pOut = aHeap[1]; + aHeap[1] = aHeap[x]; + aHeap[x] = 0xffffffff; + aHeap[0]--; + i = 1; + while( (j = i*2)<=aHeap[0] ){ + if( aHeap[j]>aHeap[j+1] ) j++; + if( aHeap[i]<aHeap[j] ) break; + x = aHeap[i]; + aHeap[i] = aHeap[j]; + aHeap[j] = x; + i = j; + } + return 1; +} + #ifndef SQLITE_OMIT_INTEGRITY_CHECK /* ** Do various sanity checks on a single page of a tree. Return @@ -8544,7 +8611,8 @@ static int checkTreePage( u8 *data; BtShared *pBt; int usableSize; - char *hit = 0; + u32 *heap = 0; + u32 x, prev = 0; i64 nMinKey = 0; i64 nMaxKey = 0; const char *saved_zPfx = pCheck->zPfx; @@ -8689,15 +8757,15 @@ static int checkTreePage( */ data = pPage->aData; hdr = pPage->hdrOffset; - hit = sqlite3PageMalloc( pBt->pageSize ); + heap = (u32*)sqlite3PageMalloc( pBt->pageSize ); pCheck->zPfx = 0; - if( hit==0 ){ + if( heap==0 ){ pCheck->mallocFailed = 1; }else{ int contentOffset = get2byteNotZero(&data[hdr+5]); assert( contentOffset<=usableSize ); /* Enforced by btreeInitPage() */ - memset(hit+contentOffset, 0, usableSize-contentOffset); - memset(hit, 1, contentOffset); + heap[0] = 0; + btreeHeapInsert(heap, contentOffset-1); /* EVIDENCE-OF: R-37002-32774 The two-byte integer at offset 3 gives the ** number of cells on the page. */ nCell = get2byte(&data[hdr+3]); @@ -8709,7 +8777,6 @@ static int checkTreePage( for(i=0; i<nCell; i++){ int pc = get2byte(&data[cellStart+i*2]); u32 size = 65536; - int j; if( pc<=usableSize-4 ){ size = cellSizePtr(pPage, &data[pc]); } @@ -8718,7 +8785,7 @@ static int checkTreePage( checkAppendMsg(pCheck, "Corruption detected in cell %d on page %d",i,iPage); }else{ - for(j=pc+size-1; j>=pc; j--) hit[j]++; + btreeHeapInsert(heap, (pc<<16)|(pc+size-1)); } } /* EVIDENCE-OF: R-20690-50594 The second field of the b-tree page header @@ -8730,7 +8797,7 @@ static int checkTreePage( assert( i<=usableSize-4 ); /* Enforced by btreeInitPage() */ size = get2byte(&data[i+2]); assert( i+size<=usableSize ); /* Enforced by btreeInitPage() */ - for(j=i+size-1; j>=i; j--) hit[j]++; + btreeHeapInsert(heap, (i<<16)|(i+size-1)); /* EVIDENCE-OF: R-58208-19414 The first 2 bytes of a freeblock are a ** big-endian integer which is the offset in the b-tree page of the next ** freeblock in the chain, or zero if the freeblock is the last on the @@ -8742,27 +8809,33 @@ static int checkTreePage( assert( j<=usableSize-4 ); /* Enforced by btreeInitPage() */ i = j; } - for(i=cnt=0; i<usableSize; i++){ - if( hit[i]==0 ){ - cnt++; - }else if( hit[i]>1 ){ + cnt = 0; + assert( heap[0]>0 ); + assert( (heap[1]>>16)==0 ); + btreeHeapPull(heap,&prev); + while( btreeHeapPull(heap,&x) ){ + if( (prev&0xffff)+1>(x>>16) ){ checkAppendMsg(pCheck, - "Multiple uses for byte %d of page %d", i, iPage); + "Multiple uses for byte %u of page %d", x>>16, iPage); break; + }else{ + cnt += (x>>16) - (prev&0xffff) - 1; + prev = x; } } + cnt += usableSize - (prev&0xffff) - 1; /* EVIDENCE-OF: R-43263-13491 The total number of bytes in all fragments ** is stored in the fifth field of the b-tree page header. ** EVIDENCE-OF: R-07161-27322 The one-byte integer at offset 7 gives the ** number of fragmented free bytes within the cell content area. */ - if( cnt!=data[hdr+7] ){ + if( heap[0]==0 && cnt!=data[hdr+7] ){ checkAppendMsg(pCheck, "Fragmentation of %d bytes reported as %d on page %d", cnt, data[hdr+7], iPage); } } - sqlite3PageFree(hit); + sqlite3PageFree(heap); releasePage(pPage); end_of_check: |