aboutsummaryrefslogtreecommitdiff
path: root/src/btree.c
diff options
context:
space:
mode:
authordrh <drh@noemail.net>2014-10-31 14:53:32 +0000
committerdrh <drh@noemail.net>2014-10-31 14:53:32 +0000
commitca3e10ea37c4808fa84063f06b02229801b28cc0 (patch)
tree25f7d9b2ab4ff71311882bcee951b6e9ab1cb62f /src/btree.c
parentbc2866ca7abc2ad7cbb6878d65cd18e584cff8f5 (diff)
parent0fb5daed34ba6f16d761b8ad02b944ec59cf5c03 (diff)
downloadsqlite-ca3e10ea37c4808fa84063f06b02229801b28cc0.tar.gz
sqlite-ca3e10ea37c4808fa84063f06b02229801b28cc0.zip
Merge recent trunk enhancements, and in particular the improvements to
the b-tree balancing logic, into the sessions branch. FossilOrigin-Name: 28b044a51215a3f64dafb2cf3b6cb7d2029580ef
Diffstat (limited to 'src/btree.c')
-rw-r--r--src/btree.c909
1 files changed, 596 insertions, 313 deletions
diff --git a/src/btree.c b/src/btree.c
index 758dfe633..9300a6a54 100644
--- a/src/btree.c
+++ b/src/btree.c
@@ -1151,6 +1151,7 @@ static int defragmentPage(MemPage *pPage){
int nCell; /* Number of cells on the page */
unsigned char *data; /* The page data */
unsigned char *temp; /* Temp area for cell content */
+ unsigned char *src; /* Source of content */
int iCellFirst; /* First allowable cell index */
int iCellLast; /* Last possible cell index */
@@ -1160,15 +1161,13 @@ static int defragmentPage(MemPage *pPage){
assert( pPage->pBt->usableSize <= SQLITE_MAX_PAGE_SIZE );
assert( pPage->nOverflow==0 );
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- temp = sqlite3PagerTempSpace(pPage->pBt->pPager);
- data = pPage->aData;
+ temp = 0;
+ src = data = pPage->aData;
hdr = pPage->hdrOffset;
cellOffset = pPage->cellOffset;
nCell = pPage->nCell;
assert( nCell==get2byte(&data[hdr+3]) );
usableSize = pPage->pBt->usableSize;
- cbrk = get2byte(&data[hdr+5]);
- memcpy(&temp[cbrk], &data[cbrk], usableSize - cbrk);
cbrk = usableSize;
iCellFirst = cellOffset + 2*nCell;
iCellLast = usableSize - 4;
@@ -1187,7 +1186,7 @@ static int defragmentPage(MemPage *pPage){
}
#endif
assert( pc>=iCellFirst && pc<=iCellLast );
- size = cellSizePtr(pPage, &temp[pc]);
+ size = cellSizePtr(pPage, &src[pc]);
cbrk -= size;
#if defined(SQLITE_ENABLE_OVERSIZE_CELL_CHECK)
if( cbrk<iCellFirst ){
@@ -1201,8 +1200,16 @@ static int defragmentPage(MemPage *pPage){
assert( cbrk+size<=usableSize && cbrk>=iCellFirst );
testcase( cbrk+size==usableSize );
testcase( pc+size==usableSize );
- memcpy(&data[cbrk], &temp[pc], size);
put2byte(pAddr, cbrk);
+ if( temp==0 ){
+ int x;
+ if( cbrk==pc ) continue;
+ temp = sqlite3PagerTempSpace(pPage->pBt->pPager);
+ x = get2byte(&data[hdr+5]);
+ memcpy(&temp[x], &data[x], (cbrk+size) - x);
+ src = temp;
+ }
+ memcpy(&data[cbrk], &src[pc], size);
}
assert( cbrk>=iCellFirst );
put2byte(&data[hdr+5], cbrk);
@@ -1218,6 +1225,62 @@ static int defragmentPage(MemPage *pPage){
}
/*
+** Search the free-list on page pPg for space to store a cell nByte bytes in
+** size. If one can be found, return a pointer to the space and remove it
+** from the free-list.
+**
+** If no suitable space can be found on the free-list, return NULL.
+**
+** This function may detect corruption within pPg. If corruption is
+** detected then *pRc is set to SQLITE_CORRUPT and NULL is returned.
+**
+** If a slot of at least nByte bytes is found but cannot be used because
+** there are already at least 60 fragmented bytes on the page, return NULL.
+** In this case, if pbDefrag parameter is not NULL, set *pbDefrag to true.
+*/
+static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc, int *pbDefrag){
+ const int hdr = pPg->hdrOffset;
+ u8 * const aData = pPg->aData;
+ int iAddr;
+ int pc;
+ int usableSize = pPg->pBt->usableSize;
+
+ for(iAddr=hdr+1; (pc = get2byte(&aData[iAddr]))>0; iAddr=pc){
+ int size; /* Size of the free slot */
+ if( pc>usableSize-4 || pc<iAddr+4 ){
+ *pRc = SQLITE_CORRUPT_BKPT;
+ return 0;
+ }
+ size = get2byte(&aData[pc+2]);
+ if( size>=nByte ){
+ int x = size - nByte;
+ testcase( x==4 );
+ testcase( x==3 );
+ if( x<4 ){
+ if( aData[hdr+7]>=60 ){
+ if( pbDefrag ) *pbDefrag = 1;
+ return 0;
+ }
+ /* Remove the slot from the free-list. Update the number of
+ ** fragmented bytes within the page. */
+ memcpy(&aData[iAddr], &aData[pc], 2);
+ aData[hdr+7] += (u8)x;
+ }else if( size+pc > usableSize ){
+ *pRc = SQLITE_CORRUPT_BKPT;
+ return 0;
+ }else{
+ /* The slot remains on the free-list. Reduce its size to account
+ ** for the portion used by the new allocation. */
+ put2byte(&aData[pc+2], x);
+ }
+ return &aData[pc + x];
+ }
+ }
+
+ return 0;
+}
+
+/*
** Allocate nByte bytes of space from within the B-Tree page passed
** as the first argument. Write into *pIdx the index into pPage->aData[]
** of the first byte of allocated space. Return either SQLITE_OK or
@@ -1236,7 +1299,6 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){
int top; /* First byte of cell content area */
int gap; /* First byte of gap between cell pointers and cell content */
int rc; /* Integer return code */
- int usableSize; /* Usable size of the page */
assert( sqlite3PagerIswriteable(pPage->pDbPage) );
assert( pPage->pBt );
@@ -1244,8 +1306,7 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){
assert( nByte>=0 ); /* Minimum cell size is 4 */
assert( pPage->nFree>=nByte );
assert( pPage->nOverflow==0 );
- usableSize = pPage->pBt->usableSize;
- assert( nByte < usableSize-8 );
+ assert( nByte < (int)(pPage->pBt->usableSize-8) );
assert( pPage->cellOffset == hdr + 12 - 4*pPage->leaf );
gap = pPage->cellOffset + 2*pPage->nCell;
@@ -1267,33 +1328,14 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){
testcase( gap+1==top );
testcase( gap==top );
if( gap+2<=top && (data[hdr+1] || data[hdr+2]) ){
- int pc, addr;
- for(addr=hdr+1; (pc = get2byte(&data[addr]))>0; addr=pc){
- int size; /* Size of the free slot */
- if( pc>usableSize-4 || pc<addr+4 ){
- return SQLITE_CORRUPT_BKPT;
- }
- size = get2byte(&data[pc+2]);
- if( size>=nByte ){
- int x = size - nByte;
- testcase( x==4 );
- testcase( x==3 );
- if( x<4 ){
- if( data[hdr+7]>=60 ) goto defragment_page;
- /* Remove the slot from the free-list. Update the number of
- ** fragmented bytes within the page. */
- memcpy(&data[addr], &data[pc], 2);
- data[hdr+7] += (u8)x;
- }else if( size+pc > usableSize ){
- return SQLITE_CORRUPT_BKPT;
- }else{
- /* The slot remains on the free-list. Reduce its size to account
- ** for the portion used by the new allocation. */
- put2byte(&data[pc+2], x);
- }
- *pIdx = pc + x;
- return SQLITE_OK;
- }
+ int rc = SQLITE_OK;
+ int bDefrag = 0;
+ u8 *pSpace = pageFindSlot(pPage, nByte, &rc, &bDefrag);
+ if( rc ) return rc;
+ if( bDefrag ) goto defragment_page;
+ if( pSpace ){
+ *pIdx = pSpace - data;
+ return SQLITE_OK;
}
}
@@ -1302,7 +1344,7 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){
*/
testcase( gap+2+nByte==top );
if( gap+2+nByte>top ){
-defragment_page:
+ defragment_page:
testcase( pPage->nCell==0 );
rc = defragmentPage(pPage);
if( rc ) return rc;
@@ -1350,7 +1392,7 @@ static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){
assert( pPage->pBt!=0 );
assert( sqlite3PagerIswriteable(pPage->pDbPage) );
assert( iStart>=pPage->hdrOffset+6+pPage->childPtrSize );
- assert( iEnd <= pPage->pBt->usableSize );
+ assert( CORRUPT_DB || iEnd <= pPage->pBt->usableSize );
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
assert( iSize>=4 ); /* Minimum cell size is 4 */
assert( iStart<=iLast );
@@ -5938,45 +5980,256 @@ static void insertCell(
}
/*
-** Add a list of cells to a page. The page should be initially empty.
-** The cells are guaranteed to fit on the page.
+** Array apCell[] contains pointers to nCell b-tree page cells. The
+** szCell[] array contains the size in bytes of each cell. This function
+** replaces the current contents of page pPg with the contents of the cell
+** array.
+**
+** Some of the cells in apCell[] may currently be stored in pPg. This
+** function works around problems caused by this by making a copy of any
+** such cells before overwriting the page data.
+**
+** The MemPage.nFree field is invalidated by this function. It is the
+** responsibility of the caller to set it correctly.
*/
-static void assemblePage(
- MemPage *pPage, /* The page to be assembled */
- int nCell, /* The number of cells to add to this page */
- u8 **apCell, /* Pointers to cell bodies */
- u16 *aSize /* Sizes of the cells */
+static void rebuildPage(
+ MemPage *pPg, /* Edit this page */
+ int nCell, /* Final number of cells on page */
+ u8 **apCell, /* Array of cells */
+ u16 *szCell /* Array of cell sizes */
){
- int i; /* Loop counter */
- u8 *pCellptr; /* Address of next cell pointer */
- int cellbody; /* Address of next cell body */
- u8 * const data = pPage->aData; /* Pointer to data for pPage */
- const int hdr = pPage->hdrOffset; /* Offset of header on pPage */
- const int nUsable = pPage->pBt->usableSize; /* Usable size of page */
+ const int hdr = pPg->hdrOffset; /* Offset of header on pPg */
+ u8 * const aData = pPg->aData; /* Pointer to data for pPg */
+ const int usableSize = pPg->pBt->usableSize;
+ u8 * const pEnd = &aData[usableSize];
+ int i;
+ u8 *pCellptr = pPg->aCellIdx;
+ u8 *pTmp = sqlite3PagerTempSpace(pPg->pBt->pPager);
+ u8 *pData;
- assert( pPage->nOverflow==0 );
- assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- assert( nCell>=0 && nCell<=(int)MX_CELL(pPage->pBt)
- && (int)MX_CELL(pPage->pBt)<=10921);
- assert( sqlite3PagerIswriteable(pPage->pDbPage) );
+ i = get2byte(&aData[hdr+5]);
+ memcpy(&pTmp[i], &aData[i], usableSize - i);
- /* Check that the page has just been zeroed by zeroPage() */
- assert( pPage->nCell==0 );
- assert( get2byteNotZero(&data[hdr+5])==nUsable );
+ pData = pEnd;
+ for(i=0; i<nCell; i++){
+ u8 *pCell = apCell[i];
+ if( pCell>aData && pCell<pEnd ){
+ pCell = &pTmp[pCell - aData];
+ }
+ pData -= szCell[i];
+ memcpy(pData, pCell, szCell[i]);
+ put2byte(pCellptr, (pData - aData));
+ pCellptr += 2;
+ assert( szCell[i]==cellSizePtr(pPg, pCell) );
+ }
+
+ /* The pPg->nFree field is now set incorrectly. The caller will fix it. */
+ pPg->nCell = nCell;
+ pPg->nOverflow = 0;
+
+ put2byte(&aData[hdr+1], 0);
+ put2byte(&aData[hdr+3], pPg->nCell);
+ put2byte(&aData[hdr+5], pData - aData);
+ aData[hdr+7] = 0x00;
+}
+
+/*
+** Array apCell[] contains nCell pointers to b-tree cells. Array szCell
+** contains the size in bytes of each such cell. This function attempts to
+** add the cells stored in the array to page pPg. If it cannot (because
+** the page needs to be defragmented before the cells will fit), non-zero
+** is returned. Otherwise, if the cells are added successfully, zero is
+** returned.
+**
+** Argument pCellptr points to the first entry in the cell-pointer array
+** (part of page pPg) to populate. After cell apCell[0] is written to the
+** page body, a 16-bit offset is written to pCellptr. And so on, for each
+** cell in the array. It is the responsibility of the caller to ensure
+** that it is safe to overwrite this part of the cell-pointer array.
+**
+** When this function is called, *ppData points to the start of the
+** content area on page pPg. If the size of the content area is extended,
+** *ppData is updated to point to the new start of the content area
+** before returning.
+**
+** Finally, argument pBegin points to the byte immediately following the
+** end of the space required by this page for the cell-pointer area (for
+** all cells - not just those inserted by the current call). If the content
+** area must be extended to before this point in order to accomodate all
+** cells in apCell[], then the cells do not fit and non-zero is returned.
+*/
+static int pageInsertArray(
+ MemPage *pPg, /* Page to add cells to */
+ u8 *pBegin, /* End of cell-pointer array */
+ u8 **ppData, /* IN/OUT: Page content -area pointer */
+ u8 *pCellptr, /* Pointer to cell-pointer area */
+ int nCell, /* Number of cells to add to pPg */
+ u8 **apCell, /* Array of cells */
+ u16 *szCell /* Array of cell sizes */
+){
+ int i;
+ u8 *aData = pPg->aData;
+ u8 *pData = *ppData;
+ const int bFreelist = aData[1] || aData[2];
+ assert( CORRUPT_DB || pPg->hdrOffset==0 ); /* Never called on page 1 */
+ for(i=0; i<nCell; i++){
+ int sz = szCell[i];
+ int rc;
+ u8 *pSlot;
+ if( bFreelist==0 || (pSlot = pageFindSlot(pPg, sz, &rc, 0))==0 ){
+ pData -= sz;
+ if( pData<pBegin ) return 1;
+ pSlot = pData;
+ }
+ memcpy(pSlot, apCell[i], sz);
+ put2byte(pCellptr, (pSlot - aData));
+ pCellptr += 2;
+ }
+ *ppData = pData;
+ return 0;
+}
- pCellptr = &pPage->aCellIdx[nCell*2];
- cellbody = nUsable;
- for(i=nCell-1; i>=0; i--){
- u16 sz = aSize[i];
- pCellptr -= 2;
- cellbody -= sz;
- put2byte(pCellptr, cellbody);
- memcpy(&data[cellbody], apCell[i], sz);
+/*
+** Array apCell[] contains nCell pointers to b-tree cells. Array szCell
+** contains the size in bytes of each such cell. This function adds the
+** space associated with each cell in the array that is currently stored
+** within the body of pPg to the pPg free-list. The cell-pointers and other
+** fields of the page are not updated.
+**
+** This function returns the total number of cells added to the free-list.
+*/
+static int pageFreeArray(
+ MemPage *pPg, /* Page to edit */
+ int nCell, /* Cells to delete */
+ u8 **apCell, /* Array of cells */
+ u16 *szCell /* Array of cell sizes */
+){
+ u8 * const aData = pPg->aData;
+ u8 * const pEnd = &aData[pPg->pBt->usableSize];
+ u8 * const pStart = &aData[pPg->hdrOffset + 8 + pPg->childPtrSize];
+ int nRet = 0;
+ int i;
+ u8 *pFree = 0;
+ int szFree = 0;
+
+ for(i=0; i<nCell; i++){
+ u8 *pCell = apCell[i];
+ if( pCell>=pStart && pCell<pEnd ){
+ int sz = szCell[i];
+ if( pFree!=(pCell + sz) ){
+ if( pFree ) freeSpace(pPg, pFree - aData, szFree);
+ pFree = pCell;
+ szFree = sz;
+ if( pFree+sz>pEnd ) return 0;
+ }else{
+ pFree = pCell;
+ szFree += sz;
+ }
+ nRet++;
+ }
}
- put2byte(&data[hdr+3], nCell);
- put2byte(&data[hdr+5], cellbody);
- pPage->nFree -= (nCell*2 + nUsable - cellbody);
- pPage->nCell = (u16)nCell;
+ if( pFree ) freeSpace(pPg, pFree - aData, szFree);
+ return nRet;
+}
+
+/*
+** The pPg->nFree field is invalid when this function returns. It is the
+** responsibility of the caller to set it correctly.
+*/
+static void editPage(
+ MemPage *pPg, /* Edit this page */
+ int iOld, /* Index of first cell currently on page */
+ int iNew, /* Index of new first cell on page */
+ int nNew, /* Final number of cells on page */
+ u8 **apCell, /* Array of cells */
+ u16 *szCell /* Array of cell sizes */
+){
+ u8 * const aData = pPg->aData;
+ const int hdr = pPg->hdrOffset;
+ u8 *pBegin = &pPg->aCellIdx[nNew * 2];
+ int nCell = pPg->nCell; /* Cells stored on pPg */
+ u8 *pData;
+ u8 *pCellptr;
+ int i;
+ int iOldEnd = iOld + pPg->nCell + pPg->nOverflow;
+ int iNewEnd = iNew + nNew;
+
+#ifdef SQLITE_DEBUG
+ u8 *pTmp = sqlite3PagerTempSpace(pPg->pBt->pPager);
+ memcpy(pTmp, aData, pPg->pBt->usableSize);
+#endif
+
+ /* Remove cells from the start and end of the page */
+ if( iOld<iNew ){
+ int nShift = pageFreeArray(
+ pPg, iNew-iOld, &apCell[iOld], &szCell[iOld]
+ );
+ memmove(pPg->aCellIdx, &pPg->aCellIdx[nShift*2], nCell*2);
+ nCell -= nShift;
+ }
+ if( iNewEnd < iOldEnd ){
+ nCell -= pageFreeArray(
+ pPg, iOldEnd-iNewEnd, &apCell[iNewEnd], &szCell[iNewEnd]
+ );
+ }
+
+ pData = &aData[get2byte(&aData[hdr+5])];
+ if( pData<pBegin ) goto editpage_fail;
+
+ /* Add cells to the start of the page */
+ if( iNew<iOld ){
+ int nAdd = iOld-iNew;
+ pCellptr = pPg->aCellIdx;
+ memmove(&pCellptr[nAdd*2], pCellptr, nCell*2);
+ if( pageInsertArray(
+ pPg, pBegin, &pData, pCellptr,
+ nAdd, &apCell[iNew], &szCell[iNew]
+ ) ) goto editpage_fail;
+ nCell += nAdd;
+ }
+
+ /* Add any overflow cells */
+ for(i=0; i<pPg->nOverflow; i++){
+ int iCell = (iOld + pPg->aiOvfl[i]) - iNew;
+ if( iCell>=0 && iCell<nNew ){
+ u8 *pCellptr = &pPg->aCellIdx[iCell * 2];
+ memmove(&pCellptr[2], pCellptr, (nCell - iCell) * 2);
+ nCell++;
+ if( pageInsertArray(
+ pPg, pBegin, &pData, pCellptr,
+ 1, &apCell[iCell + iNew], &szCell[iCell + iNew]
+ ) ) goto editpage_fail;
+ }
+ }
+
+ /* Append cells to the end of the page */
+ pCellptr = &pPg->aCellIdx[nCell*2];
+ if( pageInsertArray(
+ pPg, pBegin, &pData, pCellptr,
+ nNew-nCell, &apCell[iNew+nCell], &szCell[iNew+nCell]
+ ) ) goto editpage_fail;
+
+ pPg->nCell = nNew;
+ pPg->nOverflow = 0;
+
+ put2byte(&aData[hdr+3], pPg->nCell);
+ put2byte(&aData[hdr+5], pData - aData);
+
+#ifdef SQLITE_DEBUG
+ for(i=0; i<nNew && !CORRUPT_DB; i++){
+ u8 *pCell = apCell[i+iNew];
+ int iOff = get2byte(&pPg->aCellIdx[i*2]);
+ if( pCell>=aData && pCell<&aData[pPg->pBt->usableSize] ){
+ pCell = &pTmp[pCell - aData];
+ }
+ assert( 0==memcmp(pCell, &aData[iOff], szCell[i+iNew]) );
+ }
+#endif
+
+ return;
+ editpage_fail:
+ /* Unable to edit this page. Rebuild it from scratch instead. */
+ rebuildPage(pPg, nNew, &apCell[iNew], &szCell[iNew]);
}
/*
@@ -6048,7 +6301,8 @@ static int balance_quick(MemPage *pParent, MemPage *pPage, u8 *pSpace){
assert( sqlite3PagerIswriteable(pNew->pDbPage) );
assert( pPage->aData[0]==(PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF) );
zeroPage(pNew, PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF);
- assemblePage(pNew, 1, &pCell, &szCell);
+ rebuildPage(pNew, 1, &pCell, &szCell);
+ pNew->nFree = pBt->usableSize - pNew->cellOffset - 2 - szCell;
/* If this is an auto-vacuum database, update the pointer map
** with entries for the new page, and any pointer from the
@@ -6267,17 +6521,22 @@ static int balance_nonroot(
int iOvflSpace = 0; /* First unused byte of aOvflSpace[] */
int szScratch; /* Size of scratch memory requested */
MemPage *apOld[NB]; /* pPage and up to two siblings */
- MemPage *apCopy[NB]; /* Private copies of apOld[] pages */
MemPage *apNew[NB+2]; /* pPage and up to NB siblings after balancing */
u8 *pRight; /* Location in parent of right-sibling pointer */
u8 *apDiv[NB-1]; /* Divider cells in pParent */
int cntNew[NB+2]; /* Index in aCell[] of cell after i-th page */
- int szNew[NB+2]; /* Combined size of cells place on i-th page */
+ int cntOld[NB+2]; /* Old index in aCell[] after i-th page */
+ int szNew[NB+2]; /* Combined size of cells placed on i-th page */
u8 **apCell = 0; /* All cells begin balanced */
u16 *szCell; /* Local size of all cells in apCell[] */
u8 *aSpace1; /* Space for copies of dividers cells */
Pgno pgno; /* Temp var to store a page number in */
+ u8 abDone[NB+2]; /* True after i'th new page is populated */
+ Pgno aPgno[NB+2]; /* Page numbers of new pages before shuffling */
+ Pgno aPgOrder[NB+2]; /* Copy of aPgno[] used for sorting pages */
+ u16 aPgFlags[NB+2]; /* flags field of new pages before shuffling */
+ memset(abDone, 0, sizeof(abDone));
pBt = pParent->pBt;
assert( sqlite3_mutex_held(pBt->mutex) );
assert( sqlite3PagerIswriteable(pParent->pDbPage) );
@@ -6386,12 +6645,11 @@ static int balance_nonroot(
/*
** Allocate space for memory structures
*/
- k = pBt->pageSize + ROUND8(sizeof(MemPage));
szScratch =
nMaxCells*sizeof(u8*) /* apCell */
+ nMaxCells*sizeof(u16) /* szCell */
- + pBt->pageSize /* aSpace1 */
- + k*nOld; /* Page copies (apCopy) */
+ + pBt->pageSize; /* aSpace1 */
+ assert( szScratch<=16896 || szScratch<=6*pBt->pageSize );
apCell = sqlite3ScratchMalloc( szScratch );
if( apCell==0 ){
rc = SQLITE_NOMEM;
@@ -6404,8 +6662,8 @@ static int balance_nonroot(
/*
** Load pointers to all cells on sibling pages and the divider cells
** into the local apCell[] array. Make copies of the divider cells
- ** into space obtained from aSpace1[] and remove the divider cells
- ** from pParent.
+ ** into space obtained from aSpace1[]. The divider cells have already
+ ** been removed from pParent.
**
** If the siblings are on leaf pages, then the child pointers of the
** divider cells are stripped from the cells before they are copied
@@ -6421,15 +6679,7 @@ static int balance_nonroot(
leafData = apOld[0]->intKeyLeaf;
for(i=0; i<nOld; i++){
int limit;
-
- /* Before doing anything else, take a copy of the i'th original sibling
- ** The rest of this function will use data from the copies rather
- ** that the original pages since the original pages will be in the
- ** process of being overwritten. */
- MemPage *pOld = apCopy[i] = (MemPage*)&aSpace1[pBt->pageSize + k*i];
- memcpy(pOld, apOld[i], sizeof(MemPage));
- pOld->aData = (void*)&pOld[1];
- memcpy(pOld->aData, apOld[i]->aData, pBt->pageSize);
+ MemPage *pOld = apOld[i];
limit = pOld->nCell+pOld->nOverflow;
if( pOld->nOverflow>0 ){
@@ -6450,6 +6700,7 @@ static int balance_nonroot(
nCell++;
}
}
+ cntOld[i] = nCell;
if( i<nOld-1 && !leafData){
u16 sz = (u16)szNew[i];
u8 *pTemp;
@@ -6501,7 +6752,7 @@ static int balance_nonroot(
assert( i<nMaxCells );
subtotal += szCell[i] + 2;
if( subtotal > usableSpace ){
- szNew[k] = subtotal - szCell[i];
+ szNew[k] = subtotal - szCell[i] - 2;
cntNew[k] = i;
if( leafData ){ i--; }
subtotal = 0;
@@ -6515,9 +6766,10 @@ static int balance_nonroot(
/*
** The packing computed by the previous block is biased toward the siblings
- ** on the left side. The left siblings are always nearly full, while the
- ** right-most sibling might be nearly empty. This block of code attempts
- ** to adjust the packing of siblings to get a better balance.
+ ** on the left side (siblings with smaller keys). The left siblings are
+ ** always nearly full, while the right-most sibling might be nearly empty.
+ ** The next block of code attempts to adjust the packing of siblings to
+ ** get a better balance.
**
** This adjustment is more than an optimization. The packing above might
** be so out of balance as to be illegal. For example, the right-most
@@ -6546,22 +6798,18 @@ static int balance_nonroot(
szNew[i-1] = szLeft;
}
- /* Either we found one or more cells (cntnew[0])>0) or pPage is
- ** a virtual root page. A virtual root page is when the real root
- ** page is page 1 and we are the only child of that page.
- **
- ** UPDATE: The assert() below is not necessarily true if the database
- ** file is corrupt. The corruption will be detected and reported later
- ** in this procedure so there is no need to act upon it now.
+ /* Sanity check: For a non-corrupt database file one of the follwing
+ ** must be true:
+ ** (1) We found one or more cells (cntNew[0])>0), or
+ ** (2) pPage is a virtual root page. A virtual root page is when
+ ** the real root page is page 1 and we are the only child of
+ ** that page.
*/
-#if 0
- assert( cntNew[0]>0 || (pParent->pgno==1 && pParent->nCell==0) );
-#endif
-
- TRACE(("BALANCE: old: %d %d %d ",
- apOld[0]->pgno,
- nOld>=2 ? apOld[1]->pgno : 0,
- nOld>=3 ? apOld[2]->pgno : 0
+ assert( cntNew[0]>0 || (pParent->pgno==1 && pParent->nCell==0) || CORRUPT_DB);
+ TRACE(("BALANCE: old: %d(nc=%d) %d(nc=%d) %d(nc=%d)\n",
+ apOld[0]->pgno, apOld[0]->nCell,
+ nOld>=2 ? apOld[1]->pgno : 0, nOld>=2 ? apOld[1]->nCell : 0,
+ nOld>=3 ? apOld[2]->pgno : 0, nOld>=3 ? apOld[2]->nCell : 0
));
/*
@@ -6584,8 +6832,10 @@ static int balance_nonroot(
assert( i>0 );
rc = allocateBtreePage(pBt, &pNew, &pgno, (bBulk ? 1 : pgno), 0);
if( rc ) goto balance_cleanup;
+ zeroPage(pNew, pageFlags);
apNew[i] = pNew;
nNew++;
+ cntOld[i] = nCell;
/* Set the pointer-map entry for the new sibling page. */
if( ISAUTOVACUUM ){
@@ -6597,135 +6847,244 @@ static int balance_nonroot(
}
}
- /* Free any old pages that were not reused as new pages.
- */
- while( i<nOld ){
- freePage(apOld[i], &rc);
- if( rc ) goto balance_cleanup;
- releasePage(apOld[i]);
- apOld[i] = 0;
- i++;
- }
-
/*
- ** Put the new pages in ascending order. This helps to
- ** keep entries in the disk file in order so that a scan
- ** of the table is a linear scan through the file. That
- ** in turn helps the operating system to deliver pages
- ** from the disk more rapidly.
+ ** Reassign page numbers so that the new pages are in ascending order.
+ ** This helps to keep entries in the disk file in order so that a scan
+ ** of the table is closer to a linear scan through the file. That in turn
+ ** helps the operating system to deliver pages from the disk more rapidly.
**
- ** An O(n^2) insertion sort algorithm is used, but since
- ** n is never more than NB (a small constant), that should
- ** not be a problem.
+ ** An O(n^2) insertion sort algorithm is used, but since n is never more
+ ** than (NB+2) (a small constant), that should not be a problem.
**
- ** When NB==3, this one optimization makes the database
- ** about 25% faster for large insertions and deletions.
+ ** When NB==3, this one optimization makes the database about 25% faster
+ ** for large insertions and deletions.
*/
- for(i=0; i<k-1; i++){
- int minV = apNew[i]->pgno;
- int minI = i;
- for(j=i+1; j<k; j++){
- if( apNew[j]->pgno<(unsigned)minV ){
- minI = j;
- minV = apNew[j]->pgno;
+ for(i=0; i<nNew; i++){
+ aPgOrder[i] = aPgno[i] = apNew[i]->pgno;
+ aPgFlags[i] = apNew[i]->pDbPage->flags;
+ for(j=0; j<i; j++){
+ if( aPgno[j]==aPgno[i] ){
+ /* This branch is taken if the set of sibling pages somehow contains
+ ** duplicate entries. This can happen if the database is corrupt.
+ ** It would be simpler to detect this as part of the loop below, but
+ ** we do the detection here in order to avoid populating the pager
+ ** cache with two separate objects associated with the same
+ ** page number. */
+ assert( CORRUPT_DB );
+ rc = SQLITE_CORRUPT_BKPT;
+ goto balance_cleanup;
}
}
- if( minI>i ){
- MemPage *pT;
- pT = apNew[i];
- apNew[i] = apNew[minI];
- apNew[minI] = pT;
+ }
+ for(i=0; i<nNew; i++){
+ int iBest = 0; /* aPgno[] index of page number to use */
+ Pgno pgno; /* Page number to use */
+ for(j=1; j<nNew; j++){
+ if( aPgOrder[j]<aPgOrder[iBest] ) iBest = j;
+ }
+ pgno = aPgOrder[iBest];
+ aPgOrder[iBest] = 0xffffffff;
+ if( iBest!=i ){
+ if( iBest>i ){
+ sqlite3PagerRekey(apNew[iBest]->pDbPage, pBt->nPage+iBest+1, 0);
+ }
+ sqlite3PagerRekey(apNew[i]->pDbPage, pgno, aPgFlags[iBest]);
+ apNew[i]->pgno = pgno;
}
}
- TRACE(("new: %d(%d) %d(%d) %d(%d) %d(%d) %d(%d)\n",
- apNew[0]->pgno, szNew[0],
+
+ TRACE(("BALANCE: new: %d(%d nc=%d) %d(%d nc=%d) %d(%d nc=%d) "
+ "%d(%d nc=%d) %d(%d nc=%d)\n",
+ apNew[0]->pgno, szNew[0], cntNew[0],
nNew>=2 ? apNew[1]->pgno : 0, nNew>=2 ? szNew[1] : 0,
+ nNew>=2 ? cntNew[1] - cntNew[0] - !leafData : 0,
nNew>=3 ? apNew[2]->pgno : 0, nNew>=3 ? szNew[2] : 0,
+ nNew>=3 ? cntNew[2] - cntNew[1] - !leafData : 0,
nNew>=4 ? apNew[3]->pgno : 0, nNew>=4 ? szNew[3] : 0,
- nNew>=5 ? apNew[4]->pgno : 0, nNew>=5 ? szNew[4] : 0));
+ nNew>=4 ? cntNew[3] - cntNew[2] - !leafData : 0,
+ nNew>=5 ? apNew[4]->pgno : 0, nNew>=5 ? szNew[4] : 0,
+ nNew>=5 ? cntNew[4] - cntNew[3] - !leafData : 0
+ ));
assert( sqlite3PagerIswriteable(pParent->pDbPage) );
put4byte(pRight, apNew[nNew-1]->pgno);
- /*
- ** Evenly distribute the data in apCell[] across the new pages.
- ** Insert divider cells into pParent as necessary.
+ /* If the sibling pages are not leaves, ensure that the right-child pointer
+ ** of the right-most new sibling page is set to the value that was
+ ** originally in the same field of the right-most old sibling page. */
+ if( (pageFlags & PTF_LEAF)==0 && nOld!=nNew ){
+ MemPage *pOld = (nNew>nOld ? apNew : apOld)[nOld-1];
+ memcpy(&apNew[nNew-1]->aData[8], &pOld->aData[8], 4);
+ }
+
+ /* Make any required updates to pointer map entries associated with
+ ** cells stored on sibling pages following the balance operation. Pointer
+ ** map entries associated with divider cells are set by the insertCell()
+ ** routine. The associated pointer map entries are:
+ **
+ ** a) if the cell contains a reference to an overflow chain, the
+ ** entry associated with the first page in the overflow chain, and
+ **
+ ** b) if the sibling pages are not leaves, the child page associated
+ ** with the cell.
+ **
+ ** If the sibling pages are not leaves, then the pointer map entry
+ ** associated with the right-child of each sibling may also need to be
+ ** updated. This happens below, after the sibling pages have been
+ ** populated, not here.
*/
- j = 0;
- for(i=0; i<nNew; i++){
- /* Assemble the new sibling page. */
+ if( ISAUTOVACUUM ){
+ MemPage *pNew = apNew[0];
+ u8 *aOld = pNew->aData;
+ int cntOldNext = pNew->nCell + pNew->nOverflow;
+ int usableSize = pBt->usableSize;
+ int iNew = 0;
+ int iOld = 0;
+
+ for(i=0; i<nCell; i++){
+ u8 *pCell = apCell[i];
+ if( i==cntOldNext ){
+ MemPage *pOld = (++iOld)<nNew ? apNew[iOld] : apOld[iOld];
+ cntOldNext += pOld->nCell + pOld->nOverflow + !leafData;
+ aOld = pOld->aData;
+ }
+ if( i==cntNew[iNew] ){
+ pNew = apNew[++iNew];
+ if( !leafData ) continue;
+ }
+
+ /* Cell pCell is destined for new sibling page pNew. Originally, it
+ ** was either part of sibling page iOld (possibly an overflow cell),
+ ** or else the divider cell to the left of sibling page iOld. So,
+ ** if sibling page iOld had the same page number as pNew, and if
+ ** pCell really was a part of sibling page iOld (not a divider or
+ ** overflow cell), we can skip updating the pointer map entries. */
+ if( pNew->pgno!=aPgno[iOld] || pCell<aOld || pCell>=&aOld[usableSize] ){
+ if( !leafCorrection ){
+ ptrmapPut(pBt, get4byte(pCell), PTRMAP_BTREE, pNew->pgno, &rc);
+ }
+ if( szCell[i]>pNew->minLocal ){
+ ptrmapPutOvflPtr(pNew, pCell, &rc);
+ }
+ }
+ }
+ }
+
+ /* Insert new divider cells into pParent. */
+ for(i=0; i<nNew-1; i++){
+ u8 *pCell;
+ u8 *pTemp;
+ int sz;
MemPage *pNew = apNew[i];
+ j = cntNew[i];
+
assert( j<nMaxCells );
- zeroPage(pNew, pageFlags);
- assemblePage(pNew, cntNew[i]-j, &apCell[j], &szCell[j]);
- assert( pNew->nCell>0 || (nNew==1 && cntNew[0]==0) );
- assert( pNew->nOverflow==0 );
+ pCell = apCell[j];
+ sz = szCell[j] + leafCorrection;
+ pTemp = &aOvflSpace[iOvflSpace];
+ if( !pNew->leaf ){
+ memcpy(&pNew->aData[8], pCell, 4);
+ }else if( leafData ){
+ /* If the tree is a leaf-data tree, and the siblings are leaves,
+ ** then there is no divider cell in apCell[]. Instead, the divider
+ ** cell consists of the integer key for the right-most cell of
+ ** the sibling-page assembled above only.
+ */
+ CellInfo info;
+ j--;
+ btreeParseCellPtr(pNew, apCell[j], &info);
+ pCell = pTemp;
+ sz = 4 + putVarint(&pCell[4], info.nKey);
+ pTemp = 0;
+ }else{
+ pCell -= 4;
+ /* Obscure case for non-leaf-data trees: If the cell at pCell was
+ ** previously stored on a leaf node, and its reported size was 4
+ ** bytes, then it may actually be smaller than this
+ ** (see btreeParseCellPtr(), 4 bytes is the minimum size of
+ ** any cell). But it is important to pass the correct size to
+ ** insertCell(), so reparse the cell now.
+ **
+ ** Note that this can never happen in an SQLite data file, as all
+ ** cells are at least 4 bytes. It only happens in b-trees used
+ ** to evaluate "IN (SELECT ...)" and similar clauses.
+ */
+ if( szCell[j]==4 ){
+ assert(leafCorrection==4);
+ sz = cellSizePtr(pParent, pCell);
+ }
+ }
+ iOvflSpace += sz;
+ assert( sz<=pBt->maxLocal+23 );
+ assert( iOvflSpace <= (int)pBt->pageSize );
+ insertCell(pParent, nxDiv+i, pCell, sz, pTemp, pNew->pgno, &rc);
+ if( rc!=SQLITE_OK ) goto balance_cleanup;
+ assert( sqlite3PagerIswriteable(pParent->pDbPage) );
+ }
- j = cntNew[i];
+ /* Now update the actual sibling pages. The order in which they are updated
+ ** is important, as this code needs to avoid disrupting any page from which
+ ** cells may still to be read. In practice, this means:
+ **
+ ** (1) If cells are moving left (from apNew[iPg] to apNew[iPg-1])
+ ** then it is not safe to update page apNew[iPg] until after
+ ** the left-hand sibling apNew[iPg-1] has been updated.
+ **
+ ** (2) If cells are moving right (from apNew[iPg] to apNew[iPg+1])
+ ** then it is not safe to update page apNew[iPg] until after
+ ** the right-hand sibling apNew[iPg+1] has been updated.
+ **
+ ** If neither of the above apply, the page is safe to update.
+ **
+ ** The iPg value in the following loop starts at nNew-1 goes down
+ ** to 0, then back up to nNew-1 again, thus making two passes over
+ ** the pages. On the initial downward pass, only condition (1) above
+ ** needs to be tested because (2) will always be true from the previous
+ ** step. On the upward pass, both conditions are always true, so the
+ ** upwards pass simply processes pages that were missed on the downward
+ ** pass.
+ */
+ for(i=1-nNew; i<nNew; i++){
+ int iPg = i<0 ? -i : i;
+ assert( iPg>=0 && iPg<nNew );
+ if( abDone[iPg] ) continue; /* Skip pages already processed */
+ if( i>=0 /* On the upwards pass, or... */
+ || cntOld[iPg-1]>=cntNew[iPg-1] /* Condition (1) is true */
+ ){
+ int iNew;
+ int iOld;
+ int nNewCell;
- /* If the sibling page assembled above was not the right-most sibling,
- ** insert a divider cell into the parent page.
- */
- assert( i<nNew-1 || j==nCell );
- if( j<nCell ){
- u8 *pCell;
- u8 *pTemp;
- int sz;
-
- assert( j<nMaxCells );
- pCell = apCell[j];
- sz = szCell[j] + leafCorrection;
- pTemp = &aOvflSpace[iOvflSpace];
- if( !pNew->leaf ){
- memcpy(&pNew->aData[8], pCell, 4);
- }else if( leafData ){
- /* If the tree is a leaf-data tree, and the siblings are leaves,
- ** then there is no divider cell in apCell[]. Instead, the divider
- ** cell consists of the integer key for the right-most cell of
- ** the sibling-page assembled above only.
- */
- CellInfo info;
- j--;
- btreeParseCellPtr(pNew, apCell[j], &info);
- pCell = pTemp;
- sz = 4 + putVarint(&pCell[4], info.nKey);
- pTemp = 0;
+ /* Verify condition (1): If cells are moving left, update iPg
+ ** only after iPg-1 has already been updated. */
+ assert( iPg==0 || cntOld[iPg-1]>=cntNew[iPg-1] || abDone[iPg-1] );
+
+ /* Verify condition (2): If cells are moving right, update iPg
+ ** only after iPg+1 has already been updated. */
+ assert( cntNew[iPg]>=cntOld[iPg] || abDone[iPg+1] );
+
+ if( iPg==0 ){
+ iNew = iOld = 0;
+ nNewCell = cntNew[0];
}else{
- pCell -= 4;
- /* Obscure case for non-leaf-data trees: If the cell at pCell was
- ** previously stored on a leaf node, and its reported size was 4
- ** bytes, then it may actually be smaller than this
- ** (see btreeParseCellPtr(), 4 bytes is the minimum size of
- ** any cell). But it is important to pass the correct size to
- ** insertCell(), so reparse the cell now.
- **
- ** Note that this can never happen in an SQLite data file, as all
- ** cells are at least 4 bytes. It only happens in b-trees used
- ** to evaluate "IN (SELECT ...)" and similar clauses.
- */
- if( szCell[j]==4 ){
- assert(leafCorrection==4);
- sz = cellSizePtr(pParent, pCell);
- }
+ iOld = iPg<nOld ? (cntOld[iPg-1] + !leafData) : nCell;
+ iNew = cntNew[iPg-1] + !leafData;
+ nNewCell = cntNew[iPg] - iNew;
}
- iOvflSpace += sz;
- assert( sz<=pBt->maxLocal+23 );
- assert( iOvflSpace <= (int)pBt->pageSize );
- insertCell(pParent, nxDiv, pCell, sz, pTemp, pNew->pgno, &rc);
- if( rc!=SQLITE_OK ) goto balance_cleanup;
- assert( sqlite3PagerIswriteable(pParent->pDbPage) );
- j++;
- nxDiv++;
+ editPage(apNew[iPg], iOld, iNew, nNewCell, apCell, szCell);
+ abDone[iPg]++;
+ apNew[iPg]->nFree = usableSpace-szNew[iPg];
+ assert( apNew[iPg]->nOverflow==0 );
+ assert( apNew[iPg]->nCell==nNewCell );
}
}
- assert( j==nCell );
+
+ /* All pages have been processed exactly once */
+ assert( memcmp(abDone, "\01\01\01\01\01", nNew)==0 );
+
assert( nOld>0 );
assert( nNew>0 );
- if( (pageFlags & PTF_LEAF)==0 ){
- u8 *zChild = &apCopy[nOld-1]->aData[8];
- memcpy(&apNew[nNew-1]->aData[8], zChild, 4);
- }
if( isRoot && pParent->nCell==0 && pParent->hdrOffset<=apNew[0]->nFree ){
/* The root page of the b-tree now contains no cells. The only sibling
@@ -6738,126 +7097,50 @@ static int balance_nonroot(
** sets all pointer-map entries corresponding to database image pages
** for which the pointer is stored within the content being copied.
**
- ** The second assert below verifies that the child page is defragmented
- ** (it must be, as it was just reconstructed using assemblePage()). This
- ** is important if the parent page happens to be page 1 of the database
- ** image. */
+ ** It is critical that the child page be defragmented before being
+ ** copied into the parent, because if the parent is page 1 then it will
+ ** by smaller than the child due to the database header, and so all the
+ ** free space needs to be up front.
+ */
assert( nNew==1 );
+ rc = defragmentPage(apNew[0]);
+ testcase( rc!=SQLITE_OK );
assert( apNew[0]->nFree ==
- (get2byte(&apNew[0]->aData[5])-apNew[0]->cellOffset-apNew[0]->nCell*2)
+ (get2byte(&apNew[0]->aData[5])-apNew[0]->cellOffset-apNew[0]->nCell*2)
+ || rc!=SQLITE_OK
);
copyNodeContent(apNew[0], pParent, &rc);
freePage(apNew[0], &rc);
- }else if( ISAUTOVACUUM ){
- /* Fix the pointer-map entries for all the cells that were shifted around.
- ** There are several different types of pointer-map entries that need to
- ** be dealt with by this routine. Some of these have been set already, but
- ** many have not. The following is a summary:
- **
- ** 1) The entries associated with new sibling pages that were not
- ** siblings when this function was called. These have already
- ** been set. We don't need to worry about old siblings that were
- ** moved to the free-list - the freePage() code has taken care
- ** of those.
- **
- ** 2) The pointer-map entries associated with the first overflow
- ** page in any overflow chains used by new divider cells. These
- ** have also already been taken care of by the insertCell() code.
- **
- ** 3) If the sibling pages are not leaves, then the child pages of
- ** cells stored on the sibling pages may need to be updated.
- **
- ** 4) If the sibling pages are not internal intkey nodes, then any
- ** overflow pages used by these cells may need to be updated
- ** (internal intkey nodes never contain pointers to overflow pages).
- **
- ** 5) If the sibling pages are not leaves, then the pointer-map
- ** entries for the right-child pages of each sibling may need
- ** to be updated.
- **
- ** Cases 1 and 2 are dealt with above by other code. The next
- ** block deals with cases 3 and 4 and the one after that, case 5. Since
- ** setting a pointer map entry is a relatively expensive operation, this
- ** code only sets pointer map entries for child or overflow pages that have
- ** actually moved between pages. */
- MemPage *pNew = apNew[0];
- MemPage *pOld = apCopy[0];
- int nOverflow = pOld->nOverflow;
- int iNextOld = pOld->nCell + nOverflow;
- int iOverflow = (nOverflow ? pOld->aiOvfl[0] : -1);
- j = 0; /* Current 'old' sibling page */
- k = 0; /* Current 'new' sibling page */
- for(i=0; i<nCell; i++){
- int isDivider = 0;
- while( i==iNextOld ){
- /* Cell i is the cell immediately following the last cell on old
- ** sibling page j. If the siblings are not leaf pages of an
- ** intkey b-tree, then cell i was a divider cell. */
- assert( j+1 < ArraySize(apCopy) );
- assert( j+1 < nOld );
- pOld = apCopy[++j];
- iNextOld = i + !leafData + pOld->nCell + pOld->nOverflow;
- if( pOld->nOverflow ){
- nOverflow = pOld->nOverflow;
- iOverflow = i + !leafData + pOld->aiOvfl[0];
- }
- isDivider = !leafData;
- }
-
- assert(nOverflow>0 || iOverflow<i );
- assert(nOverflow<2 || pOld->aiOvfl[0]==pOld->aiOvfl[1]-1);
- assert(nOverflow<3 || pOld->aiOvfl[1]==pOld->aiOvfl[2]-1);
- if( i==iOverflow ){
- isDivider = 1;
- if( (--nOverflow)>0 ){
- iOverflow++;
- }
- }
-
- if( i==cntNew[k] ){
- /* Cell i is the cell immediately following the last cell on new
- ** sibling page k. If the siblings are not leaf pages of an
- ** intkey b-tree, then cell i is a divider cell. */
- pNew = apNew[++k];
- if( !leafData ) continue;
- }
- assert( j<nOld );
- assert( k<nNew );
-
- /* If the cell was originally divider cell (and is not now) or
- ** an overflow cell, or if the cell was located on a different sibling
- ** page before the balancing, then the pointer map entries associated
- ** with any child or overflow pages need to be updated. */
- if( isDivider || pOld->pgno!=pNew->pgno ){
- if( !leafCorrection ){
- ptrmapPut(pBt, get4byte(apCell[i]), PTRMAP_BTREE, pNew->pgno, &rc);
- }
- if( szCell[i]>pNew->minLocal ){
- ptrmapPutOvflPtr(pNew, apCell[i], &rc);
- }
- }
+ }else if( ISAUTOVACUUM && !leafCorrection ){
+ /* Fix the pointer map entries associated with the right-child of each
+ ** sibling page. All other pointer map entries have already been taken
+ ** care of. */
+ for(i=0; i<nNew; i++){
+ u32 key = get4byte(&apNew[i]->aData[8]);
+ ptrmapPut(pBt, key, PTRMAP_BTREE, apNew[i]->pgno, &rc);
}
+ }
- if( !leafCorrection ){
- for(i=0; i<nNew; i++){
- u32 key = get4byte(&apNew[i]->aData[8]);
- ptrmapPut(pBt, key, PTRMAP_BTREE, apNew[i]->pgno, &rc);
- }
- }
+ assert( pParent->isInit );
+ TRACE(("BALANCE: finished: old=%d new=%d cells=%d\n",
+ nOld, nNew, nCell));
+
+ /* Free any old pages that were not reused as new pages.
+ */
+ for(i=nNew; i<nOld; i++){
+ freePage(apOld[i], &rc);
+ }
#if 0
+ if( ISAUTOVACUUM && rc==SQLITE_OK && apNew[0]->isInit ){
/* The ptrmapCheckPages() contains assert() statements that verify that
** all pointer map pages are set correctly. This is helpful while
** debugging. This is usually disabled because a corrupt database may
** cause an assert() statement to fail. */
ptrmapCheckPages(apNew, nNew);
ptrmapCheckPages(&pParent, 1);
-#endif
}
-
- assert( pParent->isInit );
- TRACE(("BALANCE: finished: old=%d new=%d cells=%d\n",
- nOld, nNew, nCell));
+#endif
/*
** Cleanup before returning.