aboutsummaryrefslogtreecommitdiff
path: root/src/btree.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/btree.c')
-rw-r--r--src/btree.c145
1 files changed, 109 insertions, 36 deletions
diff --git a/src/btree.c b/src/btree.c
index 1d82a2b62..c23cdb947 100644
--- a/src/btree.c
+++ b/src/btree.c
@@ -600,10 +600,15 @@ static void btreeReleaseAllCursorPages(BtCursor *pCur){
static int saveCursorPosition(BtCursor *pCur){
int rc;
- assert( CURSOR_VALID==pCur->eState );
+ assert( CURSOR_VALID==pCur->eState || CURSOR_SKIPNEXT==pCur->eState );
assert( 0==pCur->pKey );
assert( cursorHoldsMutex(pCur) );
+ if( pCur->eState==CURSOR_SKIPNEXT ){
+ pCur->eState = CURSOR_VALID;
+ }else{
+ pCur->skipNext = 0;
+ }
rc = sqlite3BtreeKeySize(pCur, &pCur->nKey);
assert( rc==SQLITE_OK ); /* KeySize() cannot fail */
@@ -674,7 +679,7 @@ static int SQLITE_NOINLINE saveCursorsOnList(
){
do{
if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) ){
- if( p->eState==CURSOR_VALID ){
+ if( p->eState==CURSOR_VALID || p->eState==CURSOR_SKIPNEXT ){
int rc = saveCursorPosition(p);
if( SQLITE_OK!=rc ){
return rc;
@@ -746,17 +751,19 @@ static int btreeMoveto(
*/
static int btreeRestoreCursorPosition(BtCursor *pCur){
int rc;
+ int skipNext;
assert( cursorHoldsMutex(pCur) );
assert( pCur->eState>=CURSOR_REQUIRESEEK );
if( pCur->eState==CURSOR_FAULT ){
return pCur->skipNext;
}
pCur->eState = CURSOR_INVALID;
- rc = btreeMoveto(pCur, pCur->pKey, pCur->nKey, 0, &pCur->skipNext);
+ rc = btreeMoveto(pCur, pCur->pKey, pCur->nKey, 0, &skipNext);
if( rc==SQLITE_OK ){
sqlite3_free(pCur->pKey);
pCur->pKey = 0;
assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_INVALID );
+ pCur->skipNext |= skipNext;
if( pCur->skipNext && pCur->eState==CURSOR_VALID ){
pCur->eState = CURSOR_SKIPNEXT;
}
@@ -808,9 +815,10 @@ int sqlite3BtreeCursorRestore(BtCursor *pCur, int *pDifferentRow){
*pDifferentRow = 1;
return rc;
}
- if( pCur->eState!=CURSOR_VALID || NEVER(pCur->skipNext!=0) ){
+ if( pCur->eState!=CURSOR_VALID ){
*pDifferentRow = 1;
}else{
+ assert( pCur->skipNext==0 );
*pDifferentRow = 0;
}
return SQLITE_OK;
@@ -1951,16 +1959,18 @@ int sqlite3BtreeOpen(
*/
if( isTempDb==0 && (isMemdb==0 || (vfsFlags&SQLITE_OPEN_URI)!=0) ){
if( vfsFlags & SQLITE_OPEN_SHAREDCACHE ){
+ int nFilename = sqlite3Strlen30(zFilename)+1;
int nFullPathname = pVfs->mxPathname+1;
- char *zFullPathname = sqlite3Malloc(nFullPathname);
+ char *zFullPathname = sqlite3Malloc(MAX(nFullPathname,nFilename));
MUTEX_LOGIC( sqlite3_mutex *mutexShared; )
+
p->sharable = 1;
if( !zFullPathname ){
sqlite3_free(p);
return SQLITE_NOMEM;
}
if( isMemdb ){
- memcpy(zFullPathname, zFilename, sqlite3Strlen30(zFilename)+1);
+ memcpy(zFullPathname, zFilename, nFilename);
}else{
rc = sqlite3OsFullPathname(pVfs, zFilename,
nFullPathname, zFullPathname);
@@ -2017,8 +2027,8 @@ int sqlite3BtreeOpen(
** the right size. This is to guard against size changes that result
** when compiling on a different architecture.
*/
- assert( sizeof(i64)==8 || sizeof(i64)==4 );
- assert( sizeof(u64)==8 || sizeof(u64)==4 );
+ assert( sizeof(i64)==8 );
+ assert( sizeof(u64)==8 );
assert( sizeof(u32)==4 );
assert( sizeof(u16)==2 );
assert( sizeof(Pgno)==4 );
@@ -3625,7 +3635,7 @@ int sqlite3BtreeTripAllCursors(Btree *pBtree, int errCode, int writeOnly){
for(p=pBtree->pBt->pCursor; p; p=p->pNext){
int i;
if( writeOnly && (p->curFlags & BTCF_WriteFlag)==0 ){
- if( p->eState==CURSOR_VALID ){
+ if( p->eState==CURSOR_VALID || p->eState==CURSOR_SKIPNEXT ){
rc = saveCursorPosition(p);
if( rc!=SQLITE_OK ){
(void)sqlite3BtreeTripAllCursors(pBtree, rc, 0);
@@ -4031,6 +4041,8 @@ int sqlite3BtreeKeySize(BtCursor *pCur, i64 *pSize){
int sqlite3BtreeDataSize(BtCursor *pCur, u32 *pSize){
assert( cursorHoldsMutex(pCur) );
assert( pCur->eState==CURSOR_VALID );
+ assert( pCur->iPage>=0 );
+ assert( pCur->iPage<BTCURSOR_MAX_DEPTH );
assert( pCur->apPage[pCur->iPage]->intKeyLeaf==1 );
getCellInfo(pCur);
*pSize = pCur->info.nPayload;
@@ -4439,13 +4451,18 @@ static const void *fetchPayload(
BtCursor *pCur, /* Cursor pointing to entry to read from */
u32 *pAmt /* Write the number of available bytes here */
){
+ u32 amt;
assert( pCur!=0 && pCur->iPage>=0 && pCur->apPage[pCur->iPage]);
assert( pCur->eState==CURSOR_VALID );
assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
assert( cursorHoldsMutex(pCur) );
assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell );
assert( pCur->info.nSize>0 );
- *pAmt = pCur->info.nLocal;
+ assert( pCur->info.pPayload>pCur->apPage[pCur->iPage]->aData || CORRUPT_DB );
+ assert( pCur->info.pPayload<pCur->apPage[pCur->iPage]->aDataEnd ||CORRUPT_DB);
+ amt = (int)(pCur->apPage[pCur->iPage]->aDataEnd - pCur->info.pPayload);
+ if( pCur->info.nLocal<amt ) amt = pCur->info.nLocal;
+ *pAmt = amt;
return (void*)pCur->info.pPayload;
}
@@ -4509,7 +4526,7 @@ static int moveToChild(BtCursor *pCur, u32 newPgno){
return SQLITE_OK;
}
-#if 0
+#if SQLITE_DEBUG
/*
** Page pParent is an internal (non-leaf) tree page. This function
** asserts that page number iChild is the left-child if the iIdx'th
@@ -4518,6 +4535,8 @@ static int moveToChild(BtCursor *pCur, u32 newPgno){
** the page.
*/
static void assertParentIndex(MemPage *pParent, int iIdx, Pgno iChild){
+ if( CORRUPT_DB ) return; /* The conditions tested below might not be true
+ ** in a corrupt database */
assert( iIdx<=pParent->nCell );
if( iIdx==pParent->nCell ){
assert( get4byte(&pParent->aData[pParent->hdrOffset+8])==iChild );
@@ -4542,19 +4561,11 @@ static void moveToParent(BtCursor *pCur){
assert( pCur->eState==CURSOR_VALID );
assert( pCur->iPage>0 );
assert( pCur->apPage[pCur->iPage] );
-
- /* UPDATE: It is actually possible for the condition tested by the assert
- ** below to be untrue if the database file is corrupt. This can occur if
- ** one cursor has modified page pParent while a reference to it is held
- ** by a second cursor. Which can only happen if a single page is linked
- ** into more than one b-tree structure in a corrupt database. */
-#if 0
assertParentIndex(
pCur->apPage[pCur->iPage-1],
pCur->aiIdx[pCur->iPage-1],
pCur->apPage[pCur->iPage]->pgno
);
-#endif
testcase( pCur->aiIdx[pCur->iPage-1] > pCur->apPage[pCur->iPage-1]->nCell );
releasePage(pCur->apPage[pCur->iPage]);
@@ -6729,7 +6740,6 @@ static int balance_nonroot(
}else if( iParentIdx==i ){
nxDiv = i-2+bBulk;
}else{
- assert( bBulk==0 );
nxDiv = iParentIdx-1;
}
i = 2-bBulk;
@@ -7502,6 +7512,7 @@ static int balance(BtCursor *pCur){
/* The next iteration of the do-loop balances the parent page. */
releasePage(pPage);
pCur->iPage--;
+ assert( pCur->iPage>=0 );
}
}while( rc==SQLITE_OK );
@@ -7978,9 +7989,13 @@ static int clearDatabasePage(
if( pgno>btreePagecount(pBt) ){
return SQLITE_CORRUPT_BKPT;
}
-
rc = getAndInitPage(pBt, pgno, &pPage, 0);
if( rc ) return rc;
+ if( pPage->bBusy ){
+ rc = SQLITE_CORRUPT_BKPT;
+ goto cleardatabasepage_out;
+ }
+ pPage->bBusy = 1;
hdr = pPage->hdrOffset;
for(i=0; i<pPage->nCell; i++){
pCell = findCell(pPage, i);
@@ -8005,6 +8020,7 @@ static int clearDatabasePage(
}
cleardatabasepage_out:
+ pPage->bBusy = 0;
releasePage(pPage);
return rc;
}
@@ -8512,6 +8528,57 @@ static void checkList(
}
#endif /* SQLITE_OMIT_INTEGRITY_CHECK */
+/*
+** An implementation of a min-heap.
+**
+** aHeap[0] is the number of elements on the heap. aHeap[1] is the
+** root element. The daughter nodes of aHeap[N] are aHeap[N*2]
+** and aHeap[N*2+1].
+**
+** The heap property is this: Every node is less than or equal to both
+** of its daughter nodes. A consequence of the heap property is that the
+** root node aHeap[1] is always the minimum value current in the heap.
+**
+** The btreeHeapInsert() routine inserts an unsigned 32-bit number onto
+** the heap, preserving the heap property. The btreeHeapPull() routine
+** removes the root element from the heap (the minimum value in the heap)
+** and then move other nodes around as necessary to preserve the heap
+** property.
+**
+** This heap is used for cell overlap and coverage testing. Each u32
+** entry represents the span of a cell or freeblock on a btree page.
+** The upper 16 bits are the index of the first byte of a range and the
+** lower 16 bits are the index of the last byte of that range.
+*/
+static void btreeHeapInsert(u32 *aHeap, u32 x){
+ u32 j, i = ++aHeap[0];
+ aHeap[i] = x;
+ while( (j = i/2)>0 && aHeap[j]>aHeap[i] ){
+ x = aHeap[j];
+ aHeap[j] = aHeap[i];
+ aHeap[i] = x;
+ i = j;
+ }
+}
+static int btreeHeapPull(u32 *aHeap, u32 *pOut){
+ u32 j, i, x;
+ if( (x = aHeap[0])==0 ) return 0;
+ *pOut = aHeap[1];
+ aHeap[1] = aHeap[x];
+ aHeap[x] = 0xffffffff;
+ aHeap[0]--;
+ i = 1;
+ while( (j = i*2)<=aHeap[0] ){
+ if( aHeap[j]>aHeap[j+1] ) j++;
+ if( aHeap[i]<aHeap[j] ) break;
+ x = aHeap[i];
+ aHeap[i] = aHeap[j];
+ aHeap[j] = x;
+ i = j;
+ }
+ return 1;
+}
+
#ifndef SQLITE_OMIT_INTEGRITY_CHECK
/*
** Do various sanity checks on a single page of a tree. Return
@@ -8544,7 +8611,8 @@ static int checkTreePage(
u8 *data;
BtShared *pBt;
int usableSize;
- char *hit = 0;
+ u32 *heap = 0;
+ u32 x, prev = 0;
i64 nMinKey = 0;
i64 nMaxKey = 0;
const char *saved_zPfx = pCheck->zPfx;
@@ -8689,15 +8757,15 @@ static int checkTreePage(
*/
data = pPage->aData;
hdr = pPage->hdrOffset;
- hit = sqlite3PageMalloc( pBt->pageSize );
+ heap = (u32*)sqlite3PageMalloc( pBt->pageSize );
pCheck->zPfx = 0;
- if( hit==0 ){
+ if( heap==0 ){
pCheck->mallocFailed = 1;
}else{
int contentOffset = get2byteNotZero(&data[hdr+5]);
assert( contentOffset<=usableSize ); /* Enforced by btreeInitPage() */
- memset(hit+contentOffset, 0, usableSize-contentOffset);
- memset(hit, 1, contentOffset);
+ heap[0] = 0;
+ btreeHeapInsert(heap, contentOffset-1);
/* EVIDENCE-OF: R-37002-32774 The two-byte integer at offset 3 gives the
** number of cells on the page. */
nCell = get2byte(&data[hdr+3]);
@@ -8709,7 +8777,6 @@ static int checkTreePage(
for(i=0; i<nCell; i++){
int pc = get2byte(&data[cellStart+i*2]);
u32 size = 65536;
- int j;
if( pc<=usableSize-4 ){
size = cellSizePtr(pPage, &data[pc]);
}
@@ -8718,7 +8785,7 @@ static int checkTreePage(
checkAppendMsg(pCheck,
"Corruption detected in cell %d on page %d",i,iPage);
}else{
- for(j=pc+size-1; j>=pc; j--) hit[j]++;
+ btreeHeapInsert(heap, (pc<<16)|(pc+size-1));
}
}
/* EVIDENCE-OF: R-20690-50594 The second field of the b-tree page header
@@ -8730,7 +8797,7 @@ static int checkTreePage(
assert( i<=usableSize-4 ); /* Enforced by btreeInitPage() */
size = get2byte(&data[i+2]);
assert( i+size<=usableSize ); /* Enforced by btreeInitPage() */
- for(j=i+size-1; j>=i; j--) hit[j]++;
+ btreeHeapInsert(heap, (i<<16)|(i+size-1));
/* EVIDENCE-OF: R-58208-19414 The first 2 bytes of a freeblock are a
** big-endian integer which is the offset in the b-tree page of the next
** freeblock in the chain, or zero if the freeblock is the last on the
@@ -8742,27 +8809,33 @@ static int checkTreePage(
assert( j<=usableSize-4 ); /* Enforced by btreeInitPage() */
i = j;
}
- for(i=cnt=0; i<usableSize; i++){
- if( hit[i]==0 ){
- cnt++;
- }else if( hit[i]>1 ){
+ cnt = 0;
+ assert( heap[0]>0 );
+ assert( (heap[1]>>16)==0 );
+ btreeHeapPull(heap,&prev);
+ while( btreeHeapPull(heap,&x) ){
+ if( (prev&0xffff)+1>(x>>16) ){
checkAppendMsg(pCheck,
- "Multiple uses for byte %d of page %d", i, iPage);
+ "Multiple uses for byte %u of page %d", x>>16, iPage);
break;
+ }else{
+ cnt += (x>>16) - (prev&0xffff) - 1;
+ prev = x;
}
}
+ cnt += usableSize - (prev&0xffff) - 1;
/* EVIDENCE-OF: R-43263-13491 The total number of bytes in all fragments
** is stored in the fifth field of the b-tree page header.
** EVIDENCE-OF: R-07161-27322 The one-byte integer at offset 7 gives the
** number of fragmented free bytes within the cell content area.
*/
- if( cnt!=data[hdr+7] ){
+ if( heap[0]==0 && cnt!=data[hdr+7] ){
checkAppendMsg(pCheck,
"Fragmentation of %d bytes reported as %d on page %d",
cnt, data[hdr+7], iPage);
}
}
- sqlite3PageFree(hit);
+ sqlite3PageFree(heap);
releasePage(pPage);
end_of_check: