aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordrh <drh@noemail.net>2004-05-02 21:12:19 +0000
committerdrh <drh@noemail.net>2004-05-02 21:12:19 +0000
commit4b70f11aea979654bddebbd361a233a90e761eb1 (patch)
tree233f57755f5270888db1439b26c2a962375ccbc3 /src
parent8856d6aad283c0fa91cd316af6a48e51eb292c3b (diff)
downloadsqlite-4b70f11aea979654bddebbd361a233a90e761eb1.tar.gz
sqlite-4b70f11aea979654bddebbd361a233a90e761eb1.zip
Changes to btree for the new file format are mostly complete. Still need
to test and debug. (CVS 1311) FossilOrigin-Name: 0eee3b5cd400e9548437632ec1dfe625a3fca9cf
Diffstat (limited to 'src')
-rw-r--r--src/btree.c866
1 files changed, 426 insertions, 440 deletions
diff --git a/src/btree.c b/src/btree.c
index bd846f95e..252a433bd 100644
--- a/src/btree.c
+++ b/src/btree.c
@@ -9,7 +9,7 @@
** May you share freely, never taking more than you give.
**
*************************************************************************
-** $Id: btree.c,v 1.106 2004/04/29 14:42:46 drh Exp $
+** $Id: btree.c,v 1.107 2004/05/02 21:12:19 drh Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
@@ -152,6 +152,35 @@
#include "btree.h"
#include <assert.h>
+
+/* Maximum page size. The upper bound on this value is 65536 (a limit
+** imposed by the 2-byte offset at the beginning of each cell.) The
+** maximum page size determines the amount of stack space allocated
+** by many of the routines in this module. On embedded architectures
+** or any machine where memory and especially stack memory is limited,
+** one may wish to chose a smaller value for the maximum page size.
+*/
+#ifndef MX_PAGE_SIZE
+# define MX_PAGE_SIZE 1024
+#endif
+
+/* Individual entries or "cells" are limited in size so that at least
+** this many cells will fit on one page. Changing this value will result
+** in an incompatible database.
+*/
+#define MN_CELLS_PER_PAGE 4
+
+/* The following value is the maximum cell size assuming a maximum page
+** size give above.
+*/
+#define MX_CELL_SIZE ((MX_PAGE_SIZE-10)/MN_CELLS_PER_PAGE)
+
+/* The maximum number of cells on a single page of the database. This
+** assumes a minimum cell size of 3 bytes. Such small cells will be
+** exceedingly rare, but they are possible.
+*/
+#define MX_CELL ((MX_PAGE_SIZE-10)/3)
+
/* Forward declarations */
typedef struct MemPage MemPage;
@@ -162,17 +191,18 @@ typedef struct MemPage MemPage;
static const char zMagicHeader[] = "SQLite version 3";
/*
-** Page type flags
+** Page type flags. An ORed combination of these flags appear as the
+** first byte of every BTree page.
*/
#define PTF_LEAF 0x01
#define PTF_ZERODATA 0x02
#define PTF_INTKEY 0x04
+/* Idea for the future: PTF_LEAFDATA */
/*
** As each page of the file is loaded into memory, an instance of the following
** structure is appended and initialized to zero. This structure stores
** information about the page that is decoded from the raw file page.
-** The extra two entries in apCell[] are an allowance for this situation.
**
** The pParent field points back to the parent page. This allows us to
** walk up the BTree from any leaf to the root. Care must be taken to
@@ -194,6 +224,7 @@ struct MemPage {
int idxParent; /* Index in pParent->aCell[] of this node */
int nFree; /* Number of free bytes on the page */
int nCell; /* Number of entries on this page */
+ int nCellAlloc; /* Number of slots allocated in aCell[] */
unsigned char **aCell; /* Pointer to start of each cell */
};
@@ -431,7 +462,7 @@ static int allocateSpace(MemPage *pPage, int nByte){
#endif
data = pPage->aData;
- assert( sqlitepager_iswriteable(data) );
+ assert( sqlitepager_iswriteable(data->aData) );
assert( pPage->pBt );
if( nByte<4 ) nByte = 4;
if( pPage->nFree<nByte || pPage->isOverfull ) return 0;
@@ -488,7 +519,7 @@ static void freeSpace(MemPage *pPage, int start, int size){
unsigned char *data = pPage->aData;
assert( pPage->pBt!=0 );
- assert( sqlitepager_iswriteable(data) );
+ assert( sqlitepager_iswriteable(data->aData) );
assert( start>=pPage->hdrOffset+6+(pPage->leaf?0:4) );
assert( end<=pPage->pBt->pageSize );
if( size<4 ) size = 4;
@@ -536,6 +567,26 @@ static void freeSpace(MemPage *pPage, int start, int size){
** positive if the first key is less than, equal to, or greater than
** the second.
**
+** The key consists of multiple fields. Each field begins with a variable
+** length integer which determines the field type and the number of bytes
+** of key data to follow for that field.
+**
+** initial varint bytes to follow type
+** -------------- --------------- ---------------
+** 0 0 NULL
+** 1 1 signed integer
+** 2 2 signed integer
+** 3 4 signed integer
+** 4 8 signed integer
+** 5 8 IEEE float
+** 6..12 reserved for expansion
+** N>=12 and even (N-12)/2 BLOB
+** N>=13 and odd (N-13)/2 text
+**
+** For a particular database, text is always either UTF-8, UTF-16BE, or
+** UTF-16LE. Which of these three formats to use is determined by one
+** of the meta values in the file header.
+**
*/
static int keyComp(
void *userData,
@@ -602,6 +653,21 @@ bad_key:
#endif
/*
+** Resize the aCell[] array of the given page so that it is able to
+** hold at least nNewSz entries.
+**
+** Return SQLITE_OK or SQLITE_NOMEM.
+*/
+static int resizeCellArray(MemPage *pPage, int nNewSz){
+ if( pPage->nCellAlloc<nNewSize ){
+ pPage->aCell = sqliteRealloc(pPage->aCell, nNewSz*sizeof(pPage->aCell[0]) );
+ if( sqlite_malloc_failed ) return SQLITE_NOMEM;
+ pPage->nCellAlloc = nNewSize;
+ }
+ return SQLITE_OK;
+}
+
+/*
** Initialize the auxiliary information for a disk block.
**
** The pParent parameter must be a pointer to the MemPage which
@@ -622,23 +688,22 @@ static int initPage(
int sumCell = 0; /* Total size of all cells */
assert( pPage->pBt!=0 );
+ assert( pParent==0 || pParent->pBt==pPage->pBt );
+ assert( pPage->pgno==sqlitepager_pagenumber(pPage->aData) );
assert( pPage->aData == &((unsigned char*)pPage)[pPage->pBt->pageSize] );
- if( pPage->pParent ){
- assert( pPage->pParent==pParent );
- return SQLITE_OK;
- }
+ assert( pPage->isInit==0 || pPage->pParent==pParent );
+ if( pPage->isInit ) return SQLITE_OK;
+ assert( pPage->pParent==0 );
+ pPage->pParent = pParent;
if( pParent ){
- pPage->pParent = pParent;
sqlitepager_ref(pParent->aData);
}
- if( pPage->isInit ) return SQLITE_OK;
- pPage->nCell = 0;
- assert( sqlitepager_pagenumber(pPage->aData)==pPage->pgno );
- pPage->hdrOffset = hdr = pgnoThis==1 ? 100 : 0;
- c = pPage->aData[pPage->hdrOffset];
+ pPage->nCell = pPage->nCellAlloc = 0;
+ pPage->hdrOffset = hdr = pPage->pgno==1 ? 100 : 0;
+ c = pPage->aData[hdr];
pPage->intKey = (c & PTF_INTKEY)!=0;
pPage->zeroData = (c & PTF_ZERODATA)!=0;
- pPage->leaf = (c & PTF_INTKEY)!=0;
+ pPage->leaf = (c & PTF_LEAF)!=0;
/* Initialize the cell count and cell pointers */
pc = get2byte(&data[hdr+3]);
@@ -648,8 +713,7 @@ static int initPage(
pPage->nCell++;
pc = get2byte(&data[pc]);
}
- pPage->aCell = sqlite_malloc( sizeof(pPage->aCell[0])*pPage->nCell );
- if( pPage->aCell==0 ){
+ if( resizeCellArray(pPage, pPage->nCell) ){
return SQLITE_NOMEM;
}
pc = get2byte(&data[hdr+3]);
@@ -692,7 +756,7 @@ static void zeroPage(MemPage *pPage, int flags){
int hdr = pPage->hdrOffset;
int first;
- assert( sqlitepager_iswriteable(data) );
+ assert( sqlitepager_iswriteable(data->aData) );
memset(&data[hdr], 0, pBt->pageSize - hdr);
data[hdr] = flags;
first = hdr + 6 + 4*((flags&0x01)!=0);
@@ -730,7 +794,7 @@ static int getPage(Btree *pBt, Pgno pgno, MemPage **ppPage){
** Release a MemPage. This should be called once for each prior
** call to getPage.
*/
-static int releasePage(MemPage *pPage){
+static void releasePage(MemPage *pPage){
if( pPage ){
assert( pPage->aData );
assert( pPage->pBt );
@@ -808,8 +872,8 @@ int sqliteBtreeOpen(
pBt->pCursor = 0;
pBt->page1 = 0;
pBt->readOnly = sqlitepager_isreadonly(pBt->pPager);
- pBt->pageSize = SQLITE_PAGE_SIZE;
- pBt->maxLocal = (SQLITE_PAGE_SIZE-10)/4-12;
+ pBt->pageSize = SQLITE_PAGE_SIZE; /* FIX ME - read from header */
+ pBt->maxLocal = (pBt->pageSize-10)/4-12;
*ppBtree = pBt;
return SQLITE_OK;
}
@@ -1153,7 +1217,7 @@ int sqlite3BtreeCursor(
*ppCur = 0;
return SQLITE_READONLY;
}
- if( pBt->page1==0 ){
+ if( pBt->pPage1==0 ){
rc = lockBtree(pBt);
if( rc!=SQLITE_OK ){
*ppCur = 0;
@@ -1166,7 +1230,7 @@ int sqlite3BtreeCursor(
goto create_cursor_exception;
}
pCur->pgnoRoot = (Pgno)iTable;
- rc = getPage(pBt, pCur->pgnoRoot, (void**)&pCur->pPage);
+ rc = getPage(pBt, pCur->pgnoRoot, &pCur->pPage);
if( rc!=SQLITE_OK ){
goto create_cursor_exception;
}
@@ -1260,7 +1324,7 @@ static void releaseTempCursor(BtCursor *pCur){
** the key for the current entry. If the cursor is not pointing
** to a valid entry, *pSize is set to 0.
**
-** For a table with the intKey flag set, this routine returns the key
+** For a table with the INTKEY flag set, this routine returns the key
** itself, not the number of bytes in the key.
*/
int sqlite3BtreeKeySize(BtCursor *pCur, u64 *pSize){
@@ -1412,7 +1476,13 @@ int sqlite3BtreeKey(BtCursor *pCur, int offset, int amt, void *pBuf){
**
** This routine is used to do quick key comparisons in the
** common case where the entire key fits in the payload area
-** of a cell and does not overflow onto secondary pages.
+** of a cell and does not overflow onto secondary pages. If
+** this routine returns 0 for a valid cursor, the caller will
+** need to allocate a buffer big enough to hold the whole key
+** then use sqlite3BtreeKey() to copy the key value into the
+** buffer. That is substantially slower. But fortunately,
+** most keys are small enough to fit in the payload area so
+** the slower method is rarely needed.
*/
void *sqlite3BtreeKeyFetch(BtCursor *pCur){
unsigned char *aPayload;
@@ -1609,7 +1679,7 @@ static int moveToRoot(BtCursor *pCur){
assert( pRoot->pgno==1 );
subpage = get4byte(&pRoot->aData[pRoot->hdrOffset+6]);
assert( subpage>0 );
- rc = movetoChild(pCur, subpage);
+ rc = moveToChild(pCur, subpage);
}
return rc;
}
@@ -2063,8 +2133,8 @@ static int clearCell(MemPage *pPage, unsigned char *pCell){
}
/*
-** Compute the number of bytes required by a cell header. If the *pHeader
-** argument is not NULL, fill it in with the bytes of the header.
+** Compute the number of bytes required by a cell header. Fill in
+** the nData and nKey values of the header that pHeader points to.
*/
static int makeCellHeader(
MemPage *pPage, /* The page that will contain the cell */
@@ -2088,10 +2158,10 @@ static int makeCellHeader(
*/
static int fillInCell(
MemPage *pPage, /* The page that contains the cell */
- unsigned char *pCell, /* Pointer to start of payload area */
- int nHeader, /* Number of bytes in the cell header */
+ unsigned char *pCell, /* Complete text of the cell */
const void *pKey, u64 nKey, /* The key */
- const void *pData,int nData /* The data */
+ const void *pData,int nData, /* The data */
+ int *pnSize /* Write cell size here */
){
int nPayload;
const void *pSrc;
@@ -2102,7 +2172,9 @@ static int fillInCell(
unsigned char *pPayload;
Btree *pBt = pPage->pBt;
Pgno pgnoOvfl = 0;
+ int nHeader;
+ nHeader = makeCellHeader(pPage, pCell, nKey, nData);
nPayload = nData;
if( pPage->intKey ){
pSrc = pData;
@@ -2113,6 +2185,11 @@ static int fillInCell(
pSrc = pKey;
nSrc = nKey;
}
+ if( nPayload>pBt->maxLocal ){
+ *pnSize = nHeader + pBt->maxLocal + 4;
+ }else{
+ *pnSize = nHeader + nPayload;
+ }
spaceLeft = pBt->maxLocal;
pPayload = &pCell[nHeader];
pPrior = &pPayload[pBt->maxLocal];
@@ -2153,38 +2230,45 @@ static int fillInCell(
** given in the second argument so that MemPage.pParent holds the
** pointer in the third argument.
*/
-static void reparentPage(Pager *pPager, Pgno pgno, MemPage *pNewParent,int idx){
+static void reparentPage(Btree *pBt, Pgno pgno, MemPage *pNewParent, int idx){
MemPage *pThis;
+ unsigned char *aData;
if( pgno==0 ) return;
- assert( pPager!=0 );
- pThis = sqlitepager_lookup(pPager, pgno);
+ assert( pBt->pPager!=0 );
+ aData = sqlitepager_lookup(pBt->pPager, pgno);
+ pThis = (MemPage)&aData[pBt->pageSize];
if( pThis && pThis->isInit ){
if( pThis->pParent!=pNewParent ){
- if( pThis->pParent ) sqlitepager_unref(pThis->pParent);
+ if( pThis->pParent ) sqlitepager_unref(pThis->pParent->aData);
pThis->pParent = pNewParent;
- if( pNewParent ) sqlitepager_ref(pNewParent);
+ if( pNewParent ) sqlitepager_ref(pNewParent->aData);
}
pThis->idxParent = idx;
- sqlitepager_unref(pThis);
+ sqlitepager_unref(aData);
}
}
/*
-** Reparent all children of the given page to be the given page.
+** Change the pParent pointer of all children of pPage to point back
+** to pPage.
+**
** In other words, for every child of pPage, invoke reparentPage()
** to make sure that each child knows that pPage is its parent.
**
** This routine gets called after you memcpy() one page into
** another.
*/
-static void reparentChildPages(Btree *pBt, MemPage *pPage){
+static void reparentChildPages(MemPage *pPage){
int i;
- Pager *pPager = pBt->pPager;
+ Btree *pBt;
+
+ if( pPage->left ) return;
+ pBt = pPage->pBt;
for(i=0; i<pPage->nCell; i++){
- reparentPage(pPager, SWAB32(pBt, pPage->apCell[i]->h.leftChild), pPage, i);
+ reparentPage(pBt, get4byte(&pPage->aCell[i][2]), pPage, i);
}
- reparentPage(pPager, SWAB32(pBt, pPage->u.hdr.rightChild), pPage, i);
+ reparentPage(pBt, get4byte(&pPage->aData[pPage->hdrOffset+6]), pPage, i);
pPage->idxShift = 0;
}
@@ -2201,14 +2285,16 @@ static void reparentChildPages(Btree *pBt, MemPage *pPage){
** routine will be called soon after this routine in order to rebuild
** the linked list.
*/
-static void dropCell(Btree *pBt, MemPage *pPage, int idx, int sz){
+static void dropCell(MemPage *pPage, int idx, int sz){
int j;
assert( idx>=0 && idx<pPage->nCell );
- assert( sz==cellSize(pBt, pPage->apCell[idx]) );
- assert( sqlitepager_iswriteable(pPage) );
- freeSpace(pBt, pPage, Addr(pPage->apCell[idx]) - Addr(pPage), sz);
+ assert( sz==cellSize(pPage, pPage->aCell[idx]) );
+ assert( sqlitepager_iswriteable(pPage->aData) );
+ assert( pPage->aCell[idx]>=pPage->aData );
+ assert( pPage->aCell[idx]<&pPage->aData[pPage->pBt->pageSize-sz] );
+ freeSpace(pPage, idx, sz);
for(j=idx; j<pPage->nCell-1; j++){
- pPage->apCell[j] = pPage->apCell[j+1];
+ pPage->aCell[j] = pPage->aCell[j+1];
}
pPage->nCell--;
pPage->idxShift = 1;
@@ -2227,69 +2313,78 @@ static void dropCell(Btree *pBt, MemPage *pPage, int idx, int sz){
** routine will be called soon after this routine in order to rebuild
** the linked list.
*/
-static void insertCell(Btree *pBt, MemPage *pPage, int i, Cell *pCell, int sz){
+static void insertCell(MemPage *pPage, int i, unsigned char *pCell, int sz){
int idx, j;
assert( i>=0 && i<=pPage->nCell );
assert( sz==cellSize(pBt, pCell) );
- assert( sqlitepager_iswriteable(pPage) );
+ assert( sqlitepager_iswriteable(pPage->aData) );
idx = allocateSpace(pBt, pPage, sz);
+ resizeCellArray(pPage, pPage->nCell+1);
for(j=pPage->nCell; j>i; j--){
- pPage->apCell[j] = pPage->apCell[j-1];
+ pPage->aCell[j] = pPage->aCell[j-1];
}
pPage->nCell++;
if( idx<=0 ){
pPage->isOverfull = 1;
- pPage->apCell[i] = pCell;
+ pPage->aCell[i] = pCell;
}else{
- memcpy(&pPage->u.aDisk[idx], pCell, sz);
- pPage->apCell[i] = (Cell*)&pPage->u.aDisk[idx];
+ memcpy(&pPage->aData[idx], pCell, sz);
+ pPage->aCell[i] = &pPage->aData[idx];
}
pPage->idxShift = 1;
}
/*
** Rebuild the linked list of cells on a page so that the cells
-** occur in the order specified by the pPage->apCell[] array.
+** occur in the order specified by the pPage->aCell[] array.
** Invoke this routine once to repair damage after one or more
** invocations of either insertCell() or dropCell().
*/
-static void relinkCellList(Btree *pBt, MemPage *pPage){
- int i;
- u16 *pIdx;
- assert( sqlitepager_iswriteable(pPage) );
- pIdx = &pPage->u.hdr.firstCell;
+static void relinkCellList(MemPage *pPage){
+ int i, idxFrom;
+ assert( sqlitepager_iswriteable(pPage->aData) );
+ idxFrom = pPage->hdrOffset+3;
for(i=0; i<pPage->nCell; i++){
- int idx = Addr(pPage->apCell[i]) - Addr(pPage);
- assert( idx>0 && idx<SQLITE_USABLE_SIZE );
- *pIdx = SWAB16(pBt, idx);
- pIdx = &pPage->apCell[i]->h.iNext;
+ int idx = Addr(pPage->aCell[i]) - Addr(pPage);
+ assert( idx>pPage->hdrOffset && idx<pPage->pBt->pageSize );
+ put2byte(&pPage->aData[idxFrom], idx);
+ idxFrom = idx;
}
- *pIdx = 0;
+ put2byte(&pPage->aData[idxFrom], 0);
}
/*
-** Make a copy of the contents of pFrom into pTo. The pFrom->apCell[]
-** pointers that point into pFrom->u.aDisk[] must be adjusted to point
-** into pTo->u.aDisk[] instead. But some pFrom->apCell[] entries might
-** not point to pFrom->u.aDisk[]. Those are unchanged.
+** Make a copy of the contents of pFrom into pTo. The pFrom->aCell[]
+** pointers that point into pFrom->aData[] must be adjusted to point
+** into pTo->aData[] instead. But some pFrom->aCell[] entries might
+** not point to pFrom->aData[]. Those are unchanged.
*/
static void copyPage(MemPage *pTo, MemPage *pFrom){
uptr from, to;
int i;
- memcpy(pTo->u.aDisk, pFrom->u.aDisk, SQLITE_USABLE_SIZE);
+ int pageSize;
+ int ofst;
+
+ assert( pTo->hdrOffset==0 );
+ ofst = pFrom->hdrOffset;
+ pageSize = pTo->pBt->pageSize;
+ memcpy(pTo->aData, &pFrom->aData[ofst], pageSize - ofst);
pTo->pParent = 0;
pTo->isInit = 1;
+ resizeCellArray(pTo, pFrom->nCell);
pTo->nCell = pFrom->nCell;
- pTo->nFree = pFrom->nFree;
+ pTo->nFree = pFrom->nFree + ofst;
+ assert( pTo->aData[5]<155 );
+ pTo->aData[5] += ofst;
pTo->isOverfull = pFrom->isOverfull;
- to = Addr(pTo);
- from = Addr(pFrom);
+ to = Addr(pTo->aData);
+ from = Addr(pFrom->aData);
for(i=0; i<pTo->nCell; i++){
- uptr x = Addr(pFrom->apCell[i]);
- if( x>from && x<from+SQLITE_USABLE_SIZE ){
- *((uptr*)&pTo->apCell[i]) = x + to - from;
+ uptr x = Addr(pFrom->aCell[i]);
+ if( x>from && x<from+pageSize ){
+ *((uptr*)&pTo->aCell[i]) = x + to - from;
}else{
- pTo->apCell[i] = pFrom->apCell[i];
+ pTo->aCell[i] = pFrom->aCell[i];
}
}
}
@@ -2325,21 +2420,16 @@ static void copyPage(MemPage *pTo, MemPage *pFrom){
** or decreased by one, as necessary, to keep the root page from being
** overfull or empty.
**
-** This routine calls relinkCellList() on its input page regardless of
+** This routine alwyas calls relinkCellList() on its input page regardless of
** whether or not it does any real balancing. Client routines will typically
** invoke insertCell() or dropCell() before calling this routine, so we
** need to call relinkCellList() to clean up the mess that those other
** routines left behind.
**
-** pCur is left pointing to the same cell as when this routine was called
-** even if that cell gets moved to a different page. pCur may be NULL.
-** Set the pCur parameter to NULL if you do not care about keeping track
-** of a cell as that will save this routine the work of keeping track of it.
-**
** Note that when this routine is called, some of the Cells on pPage
-** might not actually be stored in pPage->u.aDisk[]. This can happen
+** might not actually be stored in pPage->aData[]. This can happen
** if the page is overfull. Part of the job of this routine is to
-** make sure all Cells for pPage once again fit in pPage->u.aDisk[].
+** make sure all Cells for pPage once again fit in pPage->aData[].
**
** In the course of balancing the siblings of pPage, the parent of pPage
** might become overfull or underfull. If that happens, then this routine
@@ -2349,8 +2439,9 @@ static void copyPage(MemPage *pTo, MemPage *pFrom){
** in a corrupted state. So if this routine fails, the database should
** be rolled back.
*/
-static int balance(Btree *pBt, MemPage *pPage, BtCursor *pCur){
+static int balance(MemPage *pPage){
MemPage *pParent; /* The parent of pPage */
+ Btree *pBt; /* The whole database */
int nCell; /* Number of cells in apCell[] */
int nOld; /* Number of pages in apOld[] */
int nNew; /* Number of pages in apNew[] */
@@ -2362,35 +2453,34 @@ static int balance(Btree *pBt, MemPage *pPage, BtCursor *pCur){
int iCur; /* apCell[iCur] is the cell of the cursor */
MemPage *pOldCurPage; /* The cursor originally points to this page */
int subtotal; /* Subtotal of bytes in cells on one page */
- MemPage *extraUnref = 0; /* A page that needs to be unref-ed */
MemPage *apOld[NB]; /* pPage and up to two siblings */
Pgno pgnoOld[NB]; /* Page numbers for each page in apOld[] */
+ MemPage *apCopy[NB]; /* Private copies of apOld[] pages */
MemPage *apNew[NB+1]; /* pPage and up to NB siblings after balancing */
Pgno pgnoNew[NB+1]; /* Page numbers for each page in apNew[] */
int idxDiv[NB]; /* Indices of divider cells in pParent */
- Cell *apDiv[NB]; /* Divider cells in pParent */
- Cell aTemp[NB]; /* Temporary holding area for apDiv[] */
+ u8 *apDiv[NB]; /* Divider cells in pParent */
+ u8 aTemp[NB][MX_CELL_SIZE]; /* Temporary holding area for apDiv[] */
int cntNew[NB+1]; /* Index in apCell[] of cell after i-th page */
int szNew[NB+1]; /* Combined size of cells place on i-th page */
- MemPage aOld[NB]; /* Temporary copies of pPage and its siblings */
- Cell *apCell[(MX_CELL+2)*NB]; /* All cells from pages being balanced */
+ u8 *apCell[(MX_CELL+2)*NB]; /* All cells from pages being balanced */
int szCell[(MX_CELL+2)*NB]; /* Local size of all cells */
+ u8 aCopy[NB][MX_PAGE_SIZE+sizeof(MemPage)]; /* Space for apCopy[] */
/*
** Return without doing any work if pPage is neither overfull nor
** underfull.
*/
- assert( sqlitepager_iswriteable(pPage) );
- if( !pPage->isOverfull && pPage->nFree<SQLITE_USABLE_SIZE/2
- && pPage->nCell>=2){
- relinkCellList(pBt, pPage);
+ assert( sqlitepager_iswriteable(pPage->aData) );
+ pBt = pPage->pBt;
+ if( !pPage->isOverfull && pPage->nFree<pBt->pageSize/2 && pPage->nCell>=2){
+ relinkCellList(pPage);
return SQLITE_OK;
}
/*
- ** Find the parent of the page to be balanceed.
- ** If there is no parent, it means this page is the root page and
- ** special rules apply.
+ ** Find the parent of the page to be balanced. If there is no parent,
+ ** it means this page is the root page and special rules apply.
*/
pParent = pPage->pParent;
if( pParent==0 ){
@@ -2407,35 +2497,46 @@ static int balance(Btree *pBt, MemPage *pPage, BtCursor *pCur){
** will fit. This reduces the depth of the BTree by one.
**
** If the root page is page 1, it has less space available than
- ** the child, so it might not be able to hold all of the information
- ** in the child. If this is the case, then do not do the transfer.
- ** Leave page 1 empty except for the right-pointer to the child page.
- ** The child page becomes the virtual root of the tree.
+ ** its child (due to the 100 byte header that occurs at the beginning
+ ** of the database fle), so it might not be able to hold all of the
+ ** information currently contained in the child. If this is the
+ ** case, then do not do the transfer. Leave page 1 empty except
+ ** for the right-pointer to the child page. The child page becomes
+ ** the virtual root of the tree.
*/
pgnoChild = get4byte(pPage->aData[pPage->hdrOffset+6]);
assert( pgnoChild>0 && pgnoChild<=sqlit3pager_pagecount(pBt->pPager) );
rc = getPage(pBt, pgnoChild, &pChild);
if( rc ) return rc;
if( pPage->pgno==1 ){
- rc = initPage(pChild);
+ rc = initPage(pChild, pPage);
if( rc ) return rc;
if( pChild->nFree>=100 ){
+ /* The child information will fit on the root page, so do the
+ ** copy */
+ zeroPage(pPage, pChild->aData[0]);
+ resizeCellArray(pPage, pChild->nCell);
+ for(i=0; i<pChild->nCell; i++){
+ insertCell(pPage, i, pChild->aCell[i],
+ cellSize(pChild, pChild->aCell[i]));
+ }
+ freePage(pChild);
+ }else{
+ /* The child has more information that will fit on the root.
+ ** The tree is already balanced. Do nothing. */
}
}else{
- memcpy(pPage, pChild, SQLITE_USABLE_SIZE);
+ memcpy(pPage, pChild, pBt->pageSize);
pPage->isInit = 0;
- rc = initPage(pBt, pPage, sqlitepager_pagenumber(pPage), 0);
+ pPage->pParent = 0;
+ rc = initPage(pPage, 0);
assert( rc==SQLITE_OK );
- reparentChildPages(pBt, pPage);
- if( pCur && pCur->pPage==pChild ){
- sqlitepager_unref(pChild);
- pCur->pPage = pPage;
- sqlitepager_ref(pPage);
+ freePage(pChild);
}
- freePage(pBt, pChild, pgnoChild);
- sqlitepager_unref(pChild);
+ reparentChildPages(pPage);
+ releasePage(pChild);
}else{
- relinkCellList(pBt, pPage);
+ relinkCellList(pPage);
}
return SQLITE_OK;
}
@@ -2453,46 +2554,39 @@ static int balance(Btree *pBt, MemPage *pPage, BtCursor *pCur){
** child. Then fall thru to the code below which will cause
** the overfull child page to be split.
*/
- rc = sqlitepager_write(pPage);
- if( rc ) return rc;
- rc = allocatePage(pBt, &pChild, &pgnoChild, sqlitepager_pagenumber(pPage));
+ rc = allocatePage(pBt, &pChild, &pgnoChild, pPage->pgno);
if( rc ) return rc;
- assert( sqlitepager_iswriteable(pChild) );
+ assert( sqlitepager_iswriteable(pChild->aData) );
copyPage(pChild, pPage);
pChild->pParent = pPage;
pChild->idxParent = 0;
- sqlitepager_ref(pPage);
+ sqlitepager_ref(pPage->aData);
pChild->isOverfull = 1;
- if( pCur && pCur->pPage==pPage ){
- sqlitepager_unref(pPage);
- pCur->pPage = pChild;
- }else{
- extraUnref = pChild;
- }
- zeroPage(pBt, pPage);
- pPage->u.hdr.rightChild = SWAB32(pBt, pgnoChild);
+ zeroPage(pPage, pPage->aData[pPage->hdrOffset] & ~PTF_LEAF);
+ put4byte(&pPage->aData[pPage->hdrOffset+6], pChild->pgno);
pParent = pPage;
pPage = pChild;
}
- rc = sqlitepager_write(pParent);
+ rc = sqlitepager_write(pParent->aData);
if( rc ) return rc;
assert( pParent->isInit );
/*
- ** Find the Cell in the parent page whose h.leftChild points back
+ ** Find the cell in the parent page whose left child points back
** to pPage. The "idx" variable is the index of that cell. If pPage
** is the rightmost child of pParent then set idx to pParent->nCell
*/
if( pParent->idxShift ){
Pgno pgno, swabPgno;
- pgno = sqlitepager_pagenumber(pPage);
- swabPgno = SWAB32(pBt, pgno);
+ pgno = pPage->pgno;
+ assert( pgno==sqlitepager_pagenumber(pPage->aData) );
for(idx=0; idx<pParent->nCell; idx++){
- if( pParent->apCell[idx]->h.leftChild==swabPgno ){
+ if( get4byte(pParent->aCell[idx][2])==pgno ){
break;
}
}
- assert( idx<pParent->nCell || pParent->u.hdr.rightChild==swabPgno );
+ assert( idx<pParent->nCell
+ || get4byte(&pParent->aData[pParent->hdrOffset+6])==pgno );
}else{
idx = pPage->idxParent;
}
@@ -2502,10 +2596,10 @@ static int balance(Btree *pBt, MemPage *pPage, BtCursor *pCur){
** directly to balance_cleanup at any moment.
*/
nOld = nNew = 0;
- sqlitepager_ref(pParent);
+ sqlitepager_ref(pParent->aData);
/*
- ** Find sibling pages to pPage and the Cells in pParent that divide
+ ** Find sibling pages to pPage and the cells in pParent that divide
** the siblings. An attempt is made to find NN siblings on either
** side of pPage. More siblings are taken from one side, however, if
** pPage there are fewer than NN siblings on the other side. If pParent
@@ -2522,76 +2616,71 @@ static int balance(Btree *pBt, MemPage *pPage, BtCursor *pCur){
for(i=0, k=nxDiv; i<NB; i++, k++){
if( k<pParent->nCell ){
idxDiv[i] = k;
- apDiv[i] = pParent->apCell[k];
+ apDiv[i] = pParent->aCell[k];
nDiv++;
- pgnoOld[i] = SWAB32(pBt, apDiv[i]->h.leftChild);
+ assert( !pParent->left );
+ pgnoOld[i] = get4byte(&apDev[i][2]);
}else if( k==pParent->nCell ){
- pgnoOld[i] = SWAB32(pBt, pParent->u.hdr.rightChild);
+ pgnoOld[i] = get4byte(&pParent->aData[pParent->hdrOffset+6]);
}else{
break;
}
- rc = sqlitepager_get(pBt->pPager, pgnoOld[i], (void**)&apOld[i]);
+ rc = getPage(pBt, pgnoOld[i], &apOld[i]);
if( rc ) goto balance_cleanup;
- rc = initPage(pBt, apOld[i], pgnoOld[i], pParent);
+ rc = initPage(apOld[i], pParent);
if( rc ) goto balance_cleanup;
apOld[i]->idxParent = k;
nOld++;
}
/*
- ** Set iCur to be the index in apCell[] of the cell that the cursor
- ** is pointing to. We will need this later on in order to keep the
- ** cursor pointing at the same cell. If pCur points to a page that
- ** has no involvement with this rebalancing, then set iCur to a large
- ** number so that the iCur==j tests always fail in the main cell
- ** distribution loop below.
- */
- if( pCur ){
- iCur = 0;
- for(i=0; i<nOld; i++){
- if( pCur->pPage==apOld[i] ){
- iCur += pCur->idx;
- break;
- }
- iCur += apOld[i]->nCell;
- if( i<nOld-1 && pCur->pPage==pParent && pCur->idx==idxDiv[i] ){
- break;
- }
- iCur++;
- }
- pOldCurPage = pCur->pPage;
- }
-
- /*
** Make copies of the content of pPage and its siblings into aOld[].
** The rest of this function will use data from the copies rather
** that the original pages since the original pages will be in the
** process of being overwritten.
*/
for(i=0; i<nOld; i++){
- copyPage(&aOld[i], apOld[i]);
+ apCopy[i] = (MemPage*)&aCopy[i+1][-sizeof(MemPage)];
+ memset(apCopy[i], 0, sizeof(MemPage));
+ apCopy[i]->aData = &((u8*)apCopy)[-pBt->pageSize];
+ copyPage(apCopy[i], apOld[i]);
}
/*
** Load pointers to all cells on sibling pages and the divider cells
** into the local apCell[] array. Make copies of the divider cells
** into aTemp[] and remove the the divider Cells from pParent.
+ **
+ ** If the siblings are on leaf pages, then the child pointers of the
+ ** divider cells are stripped from the cells before they are copied
+ ** into aTemp[]. In this wall, all cells in apCell[] are without
+ ** child pointers. If siblings are not leaves, then all cell in
+ ** apCell[] include child pointers. Either way, all cells in apCell[]
+ ** are alike.
*/
nCell = 0;
+ leafCorrection = pPage->leaf*4;
for(i=0; i<nOld; i++){
- MemPage *pOld = &aOld[i];
+ MemPage *pOld = apCopy[i];
for(j=0; j<pOld->nCell; j++){
- apCell[nCell] = pOld->apCell[j];
- szCell[nCell] = cellSize(pBt, apCell[nCell]);
+ apCell[nCell] = pOld->aCell[j];
+ szCell[nCell] = cellSize(pOld, apCell[nCell]);
nCell++;
}
if( i<nOld-1 ){
- szCell[nCell] = cellSize(pBt, apDiv[i]);
- memcpy(&aTemp[i], apDiv[i], szCell[nCell]);
- apCell[nCell] = &aTemp[i];
- dropCell(pBt, pParent, nxDiv, szCell[nCell]);
- assert( SWAB32(pBt, apCell[nCell]->h.leftChild)==pgnoOld[i] );
- apCell[nCell]->h.leftChild = pOld->u.hdr.rightChild;
+ szCell[nCell] = cellSize(pParent, apDiv[i]) - leafCorrection;
+ memcpy(aTemp[i], apDiv[i], szCell[nCell] + leafCorrection);
+ apCell[nCell] = &aTemp[i][leafCorrection];
+ dropCell(pParent, nxDiv, szCell[nCell]);
+ assert( get4byte(&apCell[nCell][2])==pgnoOld[i] );
+ if( !pOld->leaf ){
+ assert( leafCorrection==0 );
+ /* The right pointer of the child page pOld becomes the left
+ ** pointer of the divider cell */
+ memcpy(&apCell[nCell][2], &pOld->aData[pOld->hdrOffset+6], 4);
+ }else{
+ assert( leafCorrection==4 );
+ }
nCell++;
}
}
@@ -2600,15 +2689,16 @@ static int balance(Btree *pBt, MemPage *pPage, BtCursor *pCur){
** Figure out the number of pages needed to hold all nCell cells.
** Store this number in "k". Also compute szNew[] which is the total
** size of all cells on the i-th page and cntNew[] which is the index
- ** in apCell[] of the cell that divides path i from path i+1.
+ ** in apCell[] of the cell that divides page i from page i+1.
** cntNew[k] should equal nCell.
**
** This little patch of code is critical for keeping the tree
** balanced.
*/
+ usableSpace = pBt->pageSize - 10 + leafCorrection;
for(subtotal=k=i=0; i<nCell; i++){
subtotal += szCell[i];
- if( subtotal > USABLE_SPACE ){
+ if( subtotal > usableSpace ){
szNew[k] = subtotal - szCell[i];
cntNew[k] = i;
subtotal = 0;
@@ -2619,7 +2709,7 @@ static int balance(Btree *pBt, MemPage *pPage, BtCursor *pCur){
cntNew[k] = nCell;
k++;
for(i=k-1; i>0; i--){
- while( szNew[i]<USABLE_SPACE/2 ){
+ while( szNew[i]<usableSpace/2 ){
cntNew[i-1]--;
assert( cntNew[i-1]>0 );
szNew[i] += szCell[cntNew[i-1]];
@@ -2631,6 +2721,8 @@ static int balance(Btree *pBt, MemPage *pPage, BtCursor *pCur){
/*
** Allocate k new pages. Reuse old pages where possible.
*/
+ assert( pPage->pgno>1 );
+ pageFlags = pPage->aData[0];
for(i=0; i<k; i++){
if( i<nOld ){
apNew[i] = apOld[i];
@@ -2642,16 +2734,16 @@ static int balance(Btree *pBt, MemPage *pPage, BtCursor *pCur){
if( rc ) goto balance_cleanup;
}
nNew++;
- zeroPage(pBt, apNew[i]);
+ zeroPage(apNew[i], pageFlags);
apNew[i]->isInit = 1;
}
/* Free any old pages that were not reused as new pages.
*/
while( i<nOld ){
- rc = freePage(pBt, apOld[i], pgnoOld[i]);
+ rc = freePage(apOld[i]);
if( rc ) goto balance_cleanup;
- sqlitepager_unref(apOld[i]);
+ sqlitepager_unref(apOld[i]->aData);
apOld[i] = 0;
i++;
}
@@ -2698,74 +2790,66 @@ static int balance(Btree *pBt, MemPage *pPage, BtCursor *pCur){
j = 0;
for(i=0; i<nNew; i++){
MemPage *pNew = apNew[i];
+ assert( pNew->pgno==pgnoNew[i] );
+ resizeCellArray(pNew, cntNew[i] - j);
while( j<cntNew[i] ){
assert( pNew->nFree>=szCell[j] );
- if( pCur && iCur==j ){ pCur->pPage = pNew; pCur->idx = pNew->nCell; }
insertCell(pBt, pNew, pNew->nCell, apCell[j], szCell[j]);
j++;
}
assert( pNew->nCell>0 );
assert( !pNew->isOverfull );
- relinkCellList(pBt, pNew);
+ relinkCellList(pNew);
if( i<nNew-1 && j<nCell ){
- pNew->u.hdr.rightChild = apCell[j]->h.leftChild;
- apCell[j]->h.leftChild = SWAB32(pBt, pgnoNew[i]);
- if( pCur && iCur==j ){ pCur->pPage = pParent; pCur->idx = nxDiv; }
- insertCell(pBt, pParent, nxDiv, apCell[j], szCell[j]);
+ u8 *pCell = apCell[j];
+ if( !pNew->leaf ){
+ memcpy(&pNew->aData[6], &apCell[j][2], 4);
+ }else{
+ pCell -= 4;
+ }
+ insertCell(pParent, nxDiv, pCell, szCell[j]+leafCorrection);
+ put4byte(&pParent->aCell[nxDiv][2], pNew->pgno);
j++;
nxDiv++;
}
}
assert( j==nCell );
- apNew[nNew-1]->u.hdr.rightChild = aOld[nOld-1].u.hdr.rightChild;
+ if( (pageFlags & PTF_LEAF)==0 ){
+ memcpy(&apNew[nNew-1]->aData[6], &apCopy[nOld-1]->aData[6], 4);
+ }
if( nxDiv==pParent->nCell ){
- pParent->u.hdr.rightChild = SWAB32(pBt, pgnoNew[nNew-1]);
+ /* Right-most sibling is the right-most child of pParent */
+ put4byte(&pParent->aData[pParent->hdrOffset+6], pgnoNew[nNew-1]);
}else{
- pParent->apCell[nxDiv]->h.leftChild = SWAB32(pBt, pgnoNew[nNew-1]);
- }
- if( pCur ){
- if( j<=iCur && pCur->pPage==pParent && pCur->idx>idxDiv[nOld-1] ){
- assert( pCur->pPage==pOldCurPage );
- pCur->idx += nNew - nOld;
- }else{
- assert( pOldCurPage!=0 );
- sqlitepager_ref(pCur->pPage);
- sqlitepager_unref(pOldCurPage);
- }
+ /* Right-most sibling is the left child of the first entry in pParent
+ ** past the right-most divider entry */
+ put4byte(&pParent->apCell[nxDiv][2], pgnoNew[nNew-1]);
}
/*
** Reparent children of all cells.
*/
for(i=0; i<nNew; i++){
- reparentChildPages(pBt, apNew[i]);
+ reparentChildPages(apNew[i]);
}
- reparentChildPages(pBt, pParent);
+ reparentChildPages(pParent);
/*
** balance the parent page.
*/
- rc = balance(pBt, pParent, pCur);
+ rc = balance(pParent);
/*
** Cleanup before returning.
*/
balance_cleanup:
- if( extraUnref ){
- sqlitepager_unref(extraUnref);
- }
for(i=0; i<nOld; i++){
- if( apOld[i]!=0 && apOld[i]!=&aOld[i] ) sqlitepager_unref(apOld[i]);
+ if( apOld[i]!=0 ) sqlitepager_unref(apOld[i]->aData);
}
for(i=0; i<nNew; i++){
- sqlitepager_unref(apNew[i]);
- }
- if( pCur && pCur->pPage==0 ){
- pCur->pPage = pParent;
- pCur->idx = 0;
- }else{
- sqlitepager_unref(pParent);
+ sqlitepager_unref(apNew[i]->aData);
}
+ sqlitepager_unref(pParent->aData);
return rc;
}
@@ -2802,19 +2886,22 @@ static int checkReadLocks(BtCursor *pCur){
** Insert a new record into the BTree. The key is given by (pKey,nKey)
** and the data is given by (pData,nData). The cursor is used only to
** define what database the record should be inserted into. The cursor
-** is left pointing at the new record.
+** is left pointing at a random location.
+**
+** For an INTKEY table, only the nKey value of the key is used. pKey is
+** ignored. For a ZERODATA table, the pData and nData are both ignored.
*/
int sqlite3BtreeInsert(
BtCursor *pCur, /* Insert data into the table of this cursor */
- const void *pKey, int nKey, /* The key of the new record */
+ const void *pKey, u64 nKey, /* The key of the new record */
const void *pData, int nData /* The data of the new record */
){
- Cell newCell;
int rc;
int loc;
int szNew;
MemPage *pPage;
Btree *pBt = pCur->pBt;
+ unsigned char newCell[MX_CELL_SIZE], *oldCell;
if( pCur->pPage==0 ){
return SQLITE_ABORT; /* A rollback destroyed this cursor */
@@ -2833,48 +2920,46 @@ int sqlite3BtreeInsert(
rc = sqlite3BtreeMoveto(pCur, pKey, nKey, &loc);
if( rc ) return rc;
pPage = pCur->pPage;
+ assert( nData==0 || pPage->zeroData!=0 );
assert( pPage->isInit );
rc = sqlitepager_write(pPage);
if( rc ) return rc;
- rc = fillInCell(pBt, &newCell, pKey, nKey, pData, nData);
+ rc = fillInCell(pPage, &newCell, pKey, nKey, pData, nData, &szNew);
if( rc ) return rc;
- szNew = cellSize(pBt, &newCell);
+ assert( szNew==cellSize(pPage, newCell) );
if( loc==0 ){
- newCell.h.leftChild = pPage->apCell[pCur->idx]->h.leftChild;
- rc = clearCell(pBt, pPage->apCell[pCur->idx]);
+ int szOld
+ assert( pCur->idx>=0 && pCur->idx<pPage->nPage );
+ oldCell = pPage->aCell[pCur->idx];
+ if( !pPage->leaf ){
+ memcpy(&newCell[2], &oldCell[2], 4);
+ }
+ szOld = cellSize(pPage, oldCell);
+ rc = clearCell(pPage, oldCell);
if( rc ) return rc;
- dropCell(pBt, pPage, pCur->idx, cellSize(pBt, pPage->apCell[pCur->idx]));
+ dropCell(pPage, pCur->idx, szOld);
}else if( loc<0 && pPage->nCell>0 ){
- assert( pPage->u.hdr.rightChild==0 ); /* Must be a leaf page */
+ assert( pPage->leaf );
pCur->idx++;
}else{
- assert( pPage->u.hdr.rightChild==0 ); /* Must be a leaf page */
+ assert( pPage->leaf );
}
- insertCell(pBt, pPage, pCur->idx, &newCell, szNew);
- rc = balance(pCur->pBt, pPage, pCur);
+ insertCell(pPage, pCur->idx, &newCell, szNew);
+ rc = balance(pPage);
/* sqliteBtreePageDump(pCur->pBt, pCur->pgnoRoot, 1); */
/* fflush(stdout); */
+ moveToRoot(pCur);
pCur->eSkip = SKIP_INVALID;
return rc;
}
/*
-** Delete the entry that the cursor is pointing to.
-**
-** The cursor is left pointing at either the next or the previous
-** entry. If the cursor is left pointing to the next entry, then
-** the pCur->eSkip flag is set to SKIP_NEXT which forces the next call to
-** sqliteBtreeNext() to be a no-op. That way, you can always call
-** sqliteBtreeNext() after a delete and the cursor will be left
-** pointing to the first entry after the deleted entry. Similarly,
-** pCur->eSkip is set to SKIP_PREV is the cursor is left pointing to
-** the entry prior to the deleted entry so that a subsequent call to
-** sqliteBtreePrevious() will always leave the cursor pointing at the
-** entry immediately before the one that was deleted.
+** Delete the entry that the cursor is pointing to. The cursor
+** is left pointing at a random location.
*/
int sqlite3BtreeDelete(BtCursor *pCur){
MemPage *pPage = pCur->pPage;
- Cell *pCell;
+ unsigned char *pCell;
int rc;
Pgno pgnoChild;
Btree *pBt = pCur->pBt;
@@ -2899,10 +2984,12 @@ int sqlite3BtreeDelete(BtCursor *pCur){
}
rc = sqlitepager_write(pPage);
if( rc ) return rc;
- pCell = pPage->apCell[pCur->idx];
- pgnoChild = SWAB32(pBt, pCell->h.leftChild);
- clearCell(pBt, pCell);
- if( pgnoChild ){
+ pCell = pPage->aCell[pCur->idx];
+ if( !pPage->leaf ){
+ pgnoChild = get4byte(&pCell[2]);
+ }
+ clearCell(pPage, pCell);
+ if( !pPage->leaf ){
/*
** The entry we are about to delete is not a leaf so if we do not
** do something we will leave a hole on an internal page.
@@ -2911,7 +2998,7 @@ int sqlite3BtreeDelete(BtCursor *pCur){
** to be a leaf so we can use it.
*/
BtCursor leafCur;
- Cell *pNext;
+ unsigned char *pNext;
int szNext;
int notUsed;
getTempCursor(pCur, &leafCur);
@@ -2922,32 +3009,21 @@ int sqlite3BtreeDelete(BtCursor *pCur){
}
rc = sqlitepager_write(leafCur.pPage);
if( rc ) return rc;
- dropCell(pBt, pPage, pCur->idx, cellSize(pBt, pCell));
- pNext = leafCur.pPage->apCell[leafCur.idx];
- szNext = cellSize(pBt, pNext);
- pNext->h.leftChild = SWAB32(pBt, pgnoChild);
- insertCell(pBt, pPage, pCur->idx, pNext, szNext);
- rc = balance(pBt, pPage, pCur);
+ dropCell(pPage, pCur->idx, cellSize(pPage, pCell));
+ pNext = leafCur.pPage->aCell[leafCur.idx];
+ szNext = cellSize(leafCur.pPage, pNext);
+ insertCell(pPage, pCur->idx, &pNext[-4], szNext+4);
+ put4byte(&pNext[-2], pgnoChild);
+ rc = balance(pPage);
if( rc ) return rc;
- pCur->eSkip = SKIP_NEXT;
- dropCell(pBt, leafCur.pPage, leafCur.idx, szNext);
- rc = balance(pBt, leafCur.pPage, pCur);
+ dropCell(leafCur.pPage, leafCur.idx, szNext);
+ rc = balance(leafCur.pPage);
releaseTempCursor(&leafCur);
}else{
- dropCell(pBt, pPage, pCur->idx, cellSize(pBt, pCell));
- if( pCur->idx>=pPage->nCell ){
- pCur->idx = pPage->nCell-1;
- if( pCur->idx<0 ){
- pCur->idx = 0;
- pCur->eSkip = SKIP_NEXT;
- }else{
- pCur->eSkip = SKIP_PREV;
- }
- }else{
- pCur->eSkip = SKIP_NEXT;
- }
- rc = balance(pBt, pPage, pCur);
+ dropCell(pPage, pCur->idx, cellSize(pPage, pCell));
+ rc = balance(pPage);
}
+ moveToRoot(pCur);
return rc;
}
@@ -2974,7 +3050,7 @@ int sqlite3BtreeCreateTable(Btree *pBt, int *piTable, int flags){
}
rc = allocatePage(pBt, &pRoot, &pgnoRoot, 0);
if( rc ) return rc;
- assert( sqlitepager_iswriteable(pRoot) );
+ assert( sqlitepager_iswriteable(pRoot->aData) );
zeroPage(pBt, pRoot);
sqlitepager_unref(pRoot);
*piTable = (int)pgnoRoot;
@@ -2985,39 +3061,42 @@ int sqlite3BtreeCreateTable(Btree *pBt, int *piTable, int flags){
** Erase the given database page and all its children. Return
** the page to the freelist.
*/
-static int clearDatabasePage(Btree *pBt, Pgno pgno, int freePageFlag){
+static int clearDatabasePage(
+ Btree *pBt, /* The BTree that contains the table */
+ Pgno pgno, /* Page number to clear */
+ MemPage *pParent, /* Parent page. NULL for the root */
+ int freePageFlag /* Deallocate page if true */
+){
MemPage *pPage;
int rc;
- Cell *pCell;
- int idx;
+ unsigned char *pCell;
+ int i;
- rc = sqlitepager_get(pBt->pPager, pgno, (void**)&pPage);
+ rc = getPage(pBt, pgno, &pPage);
if( rc ) return rc;
- rc = sqlitepager_write(pPage);
+ rc = sqlitepager_write(pPage->aData);
if( rc ) return rc;
- rc = initPage(pBt, pPage, pgno, 0);
+ rc = initPage(pPage, pParent);
if( rc ) return rc;
- idx = SWAB16(pBt, pPage->u.hdr.firstCell);
- while( idx>0 ){
- pCell = (Cell*)&pPage->u.aDisk[idx];
- idx = SWAB16(pBt, pCell->h.iNext);
- if( pCell->h.leftChild ){
- rc = clearDatabasePage(pBt, SWAB32(pBt, pCell->h.leftChild), 1);
+ for(i=0; i<pPage->nCell; i++){
+ pCell = pPage->aCell[i];
+ if( !pPage->leaf ){
+ rc = clearDatabasePage(pBt, get4byte(&pCell[2]), 1);
if( rc ) return rc;
}
- rc = clearCell(pBt, pCell);
+ rc = clearCell(pPage, pCell);
if( rc ) return rc;
}
- if( pPage->u.hdr.rightChild ){
- rc = clearDatabasePage(pBt, SWAB32(pBt, pPage->u.hdr.rightChild), 1);
+ if( !pPage->left ){
+ rc = clearDatabasePage(pBt, get4byte(&pPage->aData[6]), 1);
if( rc ) return rc;
}
if( freePageFlag ){
- rc = freePage(pBt, pPage, pgno);
+ rc = freePage(pPage);
}else{
- zeroPage(pBt, pPage);
+ zeroPage(pPage, pPage->aData[0]);
}
- sqlitepager_unref(pPage);
+ releasePage(pPage);
return rc;
}
@@ -3060,117 +3139,19 @@ int sqlite3BtreeDropTable(Btree *pBt, int iTable){
return SQLITE_LOCKED; /* Cannot drop a table that has a cursor */
}
}
- rc = sqlitepager_get(pBt->pPager, (Pgno)iTable, (void**)&pPage);
+ rc = getPage(pBt, (Pgno)iTable, pPage);
if( rc ) return rc;
rc = sqlite3BtreeClearTable(pBt, iTable);
if( rc ) return rc;
- if( iTable>2 ){
+ if( iTable>1 ){
rc = freePage(pBt, pPage, iTable);
}else{
zeroPage(pBt, pPage);
}
- sqlitepager_unref(pPage);
+ releasePage(pPage);
return rc;
}
-#if 0 /* UNTESTED */
-/*
-** Copy all cell data from one database file into another.
-** pages back the freelist.
-*/
-static int copyCell(Btree *pBtFrom, BTree *pBtTo, Cell *pCell){
- Pager *pFromPager = pBtFrom->pPager;
- OverflowPage *pOvfl;
- Pgno ovfl, nextOvfl;
- Pgno *pPrev;
- int rc = SQLITE_OK;
- MemPage *pNew, *pPrevPg;
- Pgno new;
-
- if( NKEY(pBtTo, pCell->h) + NDATA(pBtTo, pCell->h) <= MX_LOCAL_PAYLOAD ){
- return SQLITE_OK;
- }
- pPrev = &pCell->ovfl;
- pPrevPg = 0;
- ovfl = SWAB32(pBtTo, pCell->ovfl);
- while( ovfl && rc==SQLITE_OK ){
- rc = sqlitepager_get(pFromPager, ovfl, (void**)&pOvfl);
- if( rc ) return rc;
- nextOvfl = SWAB32(pBtFrom, pOvfl->iNext);
- rc = allocatePage(pBtTo, &pNew, &new, 0);
- if( rc==SQLITE_OK ){
- rc = sqlitepager_write(pNew);
- if( rc==SQLITE_OK ){
- memcpy(pNew, pOvfl, SQLITE_USABLE_SIZE);
- *pPrev = SWAB32(pBtTo, new);
- if( pPrevPg ){
- sqlitepager_unref(pPrevPg);
- }
- pPrev = &pOvfl->iNext;
- pPrevPg = pNew;
- }
- }
- sqlitepager_unref(pOvfl);
- ovfl = nextOvfl;
- }
- if( pPrevPg ){
- sqlitepager_unref(pPrevPg);
- }
- return rc;
-}
-#endif
-
-
-#if 0 /* UNTESTED */
-/*
-** Copy a page of data from one database over to another.
-*/
-static int copyDatabasePage(
- Btree *pBtFrom,
- Pgno pgnoFrom,
- Btree *pBtTo,
- Pgno *pTo
-){
- MemPage *pPageFrom, *pPage;
- Pgno to;
- int rc;
- Cell *pCell;
- int idx;
-
- rc = sqlitepager_get(pBtFrom->pPager, pgno, (void**)&pPageFrom);
- if( rc ) return rc;
- rc = allocatePage(pBt, &pPage, pTo, 0);
- if( rc==SQLITE_OK ){
- rc = sqlitepager_write(pPage);
- }
- if( rc==SQLITE_OK ){
- memcpy(pPage, pPageFrom, SQLITE_USABLE_SIZE);
- idx = SWAB16(pBt, pPage->u.hdr.firstCell);
- while( idx>0 ){
- pCell = (Cell*)&pPage->u.aDisk[idx];
- idx = SWAB16(pBt, pCell->h.iNext);
- if( pCell->h.leftChild ){
- Pgno newChld;
- rc = copyDatabasePage(pBtFrom, SWAB32(pBtFrom, pCell->h.leftChild),
- pBtTo, &newChld);
- if( rc ) return rc;
- pCell->h.leftChild = SWAB32(pBtFrom, newChld);
- }
- rc = copyCell(pBtFrom, pBtTo, pCell);
- if( rc ) return rc;
- }
- if( pPage->u.hdr.rightChild ){
- Pgno newChld;
- rc = copyDatabasePage(pBtFrom, SWAB32(pBtFrom, pPage->u.hdr.rightChild),
- pBtTo, &newChld);
- if( rc ) return rc;
- pPage->u.hdr.rightChild = SWAB32(pBtTo, newChild);
- }
- }
- sqlitepager_unref(pPage);
- return rc;
-}
-#endif
/*
** Read the meta-information out of a database file.
@@ -3178,13 +3159,12 @@ static int copyDatabasePage(
int sqlite3BtreeGetMeta(Btree *pBt, int idx, u32 *pMeta){
int rc;
int i;
+ unsigned char *pP1;
+ assert( idx>=0 && idx<15 );
rc = sqlitepager_get(pBt->pPager, 1, (void**)&pP1);
if( rc ) return rc;
- aMeta[0] = SWAB32(pBt, pP1->nFree);
- for(i=0; i<sizeof(pP1->aMeta)/sizeof(pP1->aMeta[0]); i++){
- aMeta[i+1] = SWAB32(pBt, pP1->aMeta[i]);
- }
+ *pMeta = get4byte(&pP1[40 + idx*4]);
sqlitepager_unref(pP1);
return SQLITE_OK;
}
@@ -3193,17 +3173,17 @@ int sqlite3BtreeGetMeta(Btree *pBt, int idx, u32 *pMeta){
** Write meta-information back into the database.
*/
int sqlite3BtreeUpdateMeta(Btree *pBt, int idx, u32 iMeta){
- PageOne *pP1;
+ unsigned char *pP1;
int rc, i;
+ assert( idx>=0 && idx<15 );
if( !pBt->inTrans ){
return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
}
- pP1 = pBt->page1;
+ rc = sqlitepager_get(pBt->pPager, 1, (void**)&pP1);
+ if( rc ) return rc;
rc = sqlitepager_write(pP1);
- if( rc ) return rc;
- for(i=0; i<sizeof(pP1->aMeta)/sizeof(pP1->aMeta[0]); i++){
- pP1->aMeta[i] = SWAB32(pBt, aMeta[i+1]);
- }
+ if( rc ) return rc;
+ put4byte(&pP1[40 + idx*4], iMeta);
return SQLITE_OK;
}
@@ -3224,19 +3204,31 @@ static int fileBtreePageDump(Btree *pBt, int pgno, int recursive){
int i, j;
int nFree;
u16 idx;
+ int hdrOffset;
char range[20];
unsigned char payload[20];
- rc = sqlitepager_get(pBt->pPager, (Pgno)pgno, (void**)&pPage);
+ rc = getPage(pBt, (Pgno)pgno, &pPage);
if( rc ){
return rc;
}
- if( recursive ) printf("PAGE %d:\n", pgno);
+ printf("PAGE %d: flags=0x%02x frag=%d\n", pgno, pPage->aData[0],
+ pPage->aData[5]);
i = 0;
- idx = SWAB16(pBt, pPage->u.hdr.firstCell);
- while( idx>0 && idx<=SQLITE_USABLE_SIZE-MIN_CELL_SIZE ){
- Cell *pCell = (Cell*)&pPage->u.aDisk[idx];
- int sz = cellSize(pBt, pCell);
+ hdrOffset = pgno==1 ? 100 : 0;
+ idx = get2byte(&pPage->aData[hdrOffset+3]);
+ while( idx>0 && idx<=pBt->pageSize ){
+ u64 nData, nKey;
+ int nHeader;
+ Pgno child;
+ unsigned char *pCell = &pPage->aData[idx];
+ int sz = cellSize(pPage, pCell);
sprintf(range,"%d..%d", idx, idx+sz-1);
+ parseCellHeader(pPage, pCell, &nData, &nKey, &nHeader);
+ if( pPage->leaf ){
+ child = 0;
+ }else{
+ child = get4byte(&pCell[2]);
+ }
sz = NKEY(pBt, pCell->h) + NDATA(pBt, pCell->h);
if( sz>sizeof(payload)-1 ) sz = sizeof(payload)-1;
memcpy(payload, pCell->aPayload, sz);
@@ -3246,43 +3238,43 @@ static int fileBtreePageDump(Btree *pBt, int pgno, int recursive){
payload[sz] = 0;
printf(
"cell %2d: i=%-10s chld=%-4d nk=%-4d nd=%-4d payload=%s\n",
- i, range, (int)pCell->h.leftChild,
- NKEY(pBt, pCell->h), NDATA(pBt, pCell->h),
- payload
+ i, range, child, (int)nKey, (int)nData, payload
);
- if( pPage->isInit && pPage->apCell[i]!=pCell ){
- printf("**** apCell[%d] does not match on prior entry ****\n", i);
+ if( pPage->isInit && pPage->aCell[i]!=pCell ){
+ printf("**** aCell[%d] does not match on prior entry ****\n", i);
}
i++;
- idx = SWAB16(pBt, pCell->h.iNext);
+ idx = get2byte(pCell);
}
if( idx!=0 ){
printf("ERROR: next cell index out of range: %d\n", idx);
}
- printf("right_child: %d\n", SWAB32(pBt, pPage->u.hdr.rightChild));
+ if( !pPage->leaf ){
+ printf("right_child: %d\n", get4byte(&pPage->aData[6]));
+ }
nFree = 0;
i = 0;
- idx = SWAB16(pBt, pPage->u.hdr.firstFree);
+ idx = get2byte(&pPage->aData[hdrOffset+1]);
while( idx>0 && idx<SQLITE_USABLE_SIZE ){
- FreeBlk *p = (FreeBlk*)&pPage->u.aDisk[idx];
- sprintf(range,"%d..%d", idx, idx+p->iSize-1);
- nFree += SWAB16(pBt, p->iSize);
+ int sz = get2byte(&pPage->aData[idx+2]);
+ sprintf(range,"%d..%d", idx, idx+sz-1);
+ nFree += sz;
printf("freeblock %2d: i=%-10s size=%-4d total=%d\n",
- i, range, SWAB16(pBt, p->iSize), nFree);
- idx = SWAB16(pBt, p->iNext);
+ i, range, sz, nFree);
+ idx = get2byte(&pPage->aData[idx]);
i++;
}
if( idx!=0 ){
printf("ERROR: next freeblock index out of range: %d\n", idx);
}
- if( recursive && pPage->u.hdr.rightChild!=0 ){
- idx = SWAB16(pBt, pPage->u.hdr.firstCell);
+ if( recursive && !pPage->left ){
+ idx = get2byte(&pPage->aData[hdrOffset+3]);
while( idx>0 && idx<SQLITE_USABLE_SIZE-MIN_CELL_SIZE ){
- Cell *pCell = (Cell*)&pPage->u.aDisk[idx];
- fileBtreePageDump(pBt, SWAB32(pBt, pCell->h.leftChild), 1);
- idx = SWAB16(pBt, pCell->h.iNext);
+ unsigned char *pCell = &pPage->aData[idx];
+ fileBtreePageDump(pBt, get4byte(&pPage->aData[idx+2]), 1);
+ idx = get2byte(&pPage->aData[idx]);
}
- fileBtreePageDump(pBt, SWAB32(pBt, pPage->u.hdr.rightChild), 1);
+ fileBtreePageDump(pBt, get4byte(&pPage->aData[hdrOffset+6]), 1);
}
sqlitepager_unref(pPage);
return SQLITE_OK;
@@ -3309,25 +3301,26 @@ static int fileBtreeCursorDump(BtCursor *pCur, int *aResult){
int cnt, idx;
MemPage *pPage = pCur->pPage;
Btree *pBt = pCur->pBt;
+ assert( pPage->isInit );
aResult[0] = sqlitepager_pagenumber(pPage);
aResult[1] = pCur->idx;
aResult[2] = pPage->nCell;
if( pCur->idx>=0 && pCur->idx<pPage->nCell ){
- aResult[3] = cellSize(pBt, pPage->apCell[pCur->idx]);
- aResult[6] = SWAB32(pBt, pPage->apCell[pCur->idx]->h.leftChild);
+ aResult[3] = cellSize(pPage, pPage->aCell[pCur->idx]);
+ aResult[6] = pPage->leaf ? 0 : get4byte(&pPage->aCell[pCur->idx][2]);
}else{
aResult[3] = 0;
aResult[6] = 0;
}
aResult[4] = pPage->nFree;
cnt = 0;
- idx = SWAB16(pBt, pPage->u.hdr.firstFree);
+ idx = get2byte(&pPage->aData[pPage->hdrOffset+1]);
while( idx>0 && idx<SQLITE_USABLE_SIZE ){
cnt++;
- idx = SWAB16(pBt, ((FreeBlk*)&pPage->u.aDisk[idx])->iNext);
+ idx = get2byte(&pPage->aData[idx]);
}
aResult[5] = cnt;
- aResult[7] = SWAB32(pBt, pPage->u.hdr.rightChild);
+ aResult[7] = pPage->leaf ? 0 : get4byte(&pPage->aData[pPage->hdrOffset+6]);
return SQLITE_OK;
}
#endif
@@ -3406,7 +3399,7 @@ static void checkList(
int i;
char zMsg[100];
while( N-- > 0 ){
- OverflowPage *pOvfl;
+ unsigned char *pOvfl;
if( iPage<1 ){
sprintf(zMsg, "%d pages missing from overflow list", N+1);
checkAppendMsg(pCheck, zContext, zMsg);
@@ -3419,14 +3412,13 @@ static void checkList(
break;
}
if( isFreeList ){
- FreelistInfo *pInfo = (FreelistInfo*)pOvfl->aPayload;
- int n = SWAB32(pCheck->pBt, pInfo->nFree);
+ int n = get4byte(&pOvfl[4]);
for(i=0; i<n; i++){
- checkRef(pCheck, SWAB32(pCheck->pBt, pInfo->aFree[i]), zContext);
+ checkRef(pCheck, get4byte(&pOvfl[8+i*4]), zContext);
}
N -= n;
}
- iPage = SWAB32(pCheck->pBt, pOvfl->iNext);
+ iPage = get4byte(pOvfl);
sqlitepager_unref(pOvfl);
}
}
@@ -3492,18 +3484,20 @@ static int checkTreePage(
if( iPage==0 ) return 0;
if( checkRef(pCheck, iPage, zParentContext) ) return 0;
sprintf(zContext, "On tree page %d: ", iPage);
- if( (rc = sqlitepager_get(pCheck->pPager, (Pgno)iPage, (void**)&pPage))!=0 ){
+ if( (rc = getPage(pBt, (Pgno)iPage, &pPage))!=0 ){
sprintf(zMsg, "unable to get the page. error code=%d", rc);
checkAppendMsg(pCheck, zContext, zMsg);
return 0;
}
- if( (rc = initPage(pBt, pPage, (Pgno)iPage, pParent))!=0 ){
+ if( (rc = initPage(pPage, pParent))!=0 ){
sprintf(zMsg, "initPage() returns error code %d", rc);
checkAppendMsg(pCheck, zContext, zMsg);
sqlitepager_unref(pPage);
return 0;
}
+#if 0
+
/* Check out all the cells.
*/
depth = 0;
@@ -3584,17 +3578,9 @@ static int checkTreePage(
}
}
- /* Check that free space is kept to a minimum
- */
-#if 0
- if( pParent && pParent->nCell>2 && pPage->nFree>3*SQLITE_USABLE_SIZE/4 ){
- sprintf(zMsg, "free space (%d) greater than max (%d)", pPage->nFree,
- SQLITE_USABLE_SIZE/3);
- checkAppendMsg(pCheck, zContext, zMsg);
- }
#endif
- sqlitepager_unref(pPage);
+ releasePage(pPage);
return depth;
}