aboutsummaryrefslogtreecommitdiff
path: root/ext/fts5/fts5_index.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/fts5/fts5_index.c')
-rw-r--r--ext/fts5/fts5_index.c847
1 files changed, 491 insertions, 356 deletions
diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c
index 5bf4feba9..cd3402418 100644
--- a/ext/fts5/fts5_index.c
+++ b/ext/fts5/fts5_index.c
@@ -87,7 +87,6 @@
** + total number of segments in level.
** + for each segment from oldest to newest:
** + segment id (always > 0)
-** + b-tree height (1 -> root is leaf, 2 -> root is parent of leaf etc.)
** + first leaf page number (often 1, always greater than 0)
** + final leaf page number
**
@@ -95,15 +94,16 @@
**
** A single record within the %_data table. The data is a list of varints.
** The first value is the number of rows in the index. Then, for each column
-** from left to right, the total number of tokens in the column for all
+** from left to right, the total number of tokens in the column for all
** rows of the table.
**
** 3. Segment leaves:
**
-** TERM DOCLIST FORMAT:
+** TERM/DOCLIST FORMAT:
**
** Most of each segment leaf is taken up by term/doclist data. The
-** general format of the term/doclist data is:
+** general format of term/doclist, starting with the first term
+** on the leaf page, is:
**
** varint : size of first term
** blob: first term data
@@ -123,7 +123,6 @@
** varint: rowid delta (always > 0)
** poslist: next poslist
** }
-** 0x00 byte
**
** poslist format:
**
@@ -143,11 +142,28 @@
** varint: offset delta + 2
** }
**
-** PAGINATION
+** PAGE FORMAT
**
-** The format described above is only accurate if the entire term/doclist
-** data fits on a single leaf page. If this is not the case, the format
-** is changed in two ways:
+** Each leaf page begins with a 4-byte header containing 2 16-bit
+** unsigned integer fields in big-endian format. They are:
+**
+** * The byte offset of the first rowid on the page, if it exists
+** and occurs before the first term (otherwise 0).
+**
+** * The byte offset of the start of the page footer. If the page
+** footer is 0 bytes in size, then this field is the same as the
+** size of the leaf page in bytes.
+**
+** The page footer consists of a single varint for each term located
+** on the page. Each varint is the byte offset of the current term
+** within the page, delta-compressed against the previous value. In
+** other words, the first varint in the footer is the byte offset of
+** the first term, the second is the byte offset of the second less that
+** of the first, and so on.
+**
+** The term/doclist format described above is accurate if the entire
+** term/doclist data fits on a single leaf page. If this is not the case,
+** the format is changed in two ways:
**
** + if the first rowid on a page occurs before the first term, it
** is stored as a literal value:
@@ -160,45 +176,6 @@
** varint : size of first term
** blob: first term data
**
-** Each leaf page begins with:
-**
-** + 2-byte unsigned containing offset to first rowid (or 0).
-** + 2-byte unsigned containing offset to first term (or 0).
-**
-** Followed by term/doclist data.
-**
-** 4. Segment interior nodes:
-**
-** The interior nodes turn the list of leaves into a b+tree.
-**
-** Each interior node begins with a varint - the page number of the left
-** most child node. Following this, for each leaf page except the first,
-** the interior nodes contain:
-**
-** a) If the leaf page contains at least one term, then a term-prefix that
-** is greater than all previous terms, and less than or equal to the
-** first term on the leaf page.
-**
-** b) If the leaf page no terms, a record indicating how many consecutive
-** leaves contain no terms, and whether or not there is an associated
-** by-rowid index record.
-**
-** By definition, there is never more than one type (b) record in a row.
-** Type (b) records only ever appear on height=1 pages - immediate parents
-** of leaves. Only type (a) records are pushed to higher levels.
-**
-** Term format:
-**
-** * Number of bytes in common with previous term plus 2, as a varint.
-** * Number of bytes of new term data, as a varint.
-** * new term data.
-**
-** No-term format:
-**
-** * either an 0x00 or 0x01 byte. If the value 0x01 is used, then there
-** is an associated index-by-rowid record.
-** * the number of zero-term leaves as a varint.
-**
** 5. Segment doclist indexes:
**
** Doclist indexes are themselves b-trees, however they usually consist of
@@ -237,28 +214,19 @@
#define FTS5_STRUCTURE_ROWID 10 /* The structure record */
/*
-** Macros determining the rowids used by segment nodes. All nodes in all
-** segments for all indexes (the regular FTS index and any prefix indexes)
-** are stored in the %_data table with large positive rowids.
-**
-** The %_data table may contain up to (1<<FTS5_SEGMENT_INDEX_BITS)
-** indexes - one regular term index and zero or more prefix indexes.
+** Macros determining the rowids used by segment leaves and dlidx leaves
+** and nodes. All nodes and leaves are stored in the %_data table with large
+** positive rowids.
**
-** Each segment in an index has a unique id greater than zero.
+** Each segment has a unique non-zero 16-bit id.
**
-** Each node in a segment b-tree is assigned a "page number" that is unique
-** within nodes of its height within the segment (leaf nodes have a height
-** of 0, parents 1, etc.). Page numbers are allocated sequentially so that
-** a nodes page number is always one more than its left sibling.
-**
-** The rowid for a node is then found using the FTS5_SEGMENT_ROWID() macro
-** below. The FTS5_SEGMENT_*_BITS macros define the number of bits used
-** to encode the three FTS5_SEGMENT_ROWID() arguments. This module returns
-** SQLITE_FULL and fails the current operation if they ever prove too small.
+** The rowid for each segment leaf is found by passing the segment id and
+** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered
+** sequentially starting from 1.
*/
#define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */
#define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */
-#define FTS5_DATA_HEIGHT_B 5 /* Max b-tree height of 32 */
+#define FTS5_DATA_HEIGHT_B 5 /* Max dlidx tree height of 32 */
#define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */
#define fts5_dri(segid, dlidx, height, pgno) ( \
@@ -268,8 +236,8 @@
((i64)(pgno)) \
)
-#define FTS5_SEGMENT_ROWID(segid, height, pgno) fts5_dri(segid, 0, height, pgno)
-#define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno)
+#define FTS5_SEGMENT_ROWID(segid, pgno) fts5_dri(segid, 0, 0, pgno)
+#define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno)
/*
** Maximum segments permitted in a single index
@@ -303,7 +271,8 @@ typedef struct Fts5StructureSegment Fts5StructureSegment;
struct Fts5Data {
u8 *p; /* Pointer to buffer containing record */
- int n; /* Size of record in bytes */
+ int nn; /* Size of record in bytes */
+ int szLeaf; /* Size of leaf without page-index */
};
/*
@@ -355,7 +324,6 @@ struct Fts5DoclistIter {
*/
struct Fts5StructureSegment {
int iSegid; /* Segment id */
- int nHeight; /* Height of segment b-tree */
int pgnoFirst; /* First leaf page number in segment */
int pgnoLast; /* Last leaf page number in segment */
};
@@ -377,7 +345,9 @@ struct Fts5Structure {
*/
struct Fts5PageWriter {
int pgno; /* Page number for this page */
- Fts5Buffer buf; /* Buffer containing page data */
+ int iPrevPgidx; /* Previous value written into pgidx */
+ Fts5Buffer buf; /* Buffer containing leaf data */
+ Fts5Buffer pgidx; /* Buffer containing page-index */
Fts5Buffer term; /* Buffer containing previous term on page */
};
struct Fts5DlidxWriter {
@@ -392,6 +362,7 @@ struct Fts5SegWriter {
i64 iPrevRowid; /* Previous rowid written to current leaf */
u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */
u8 bFirstRowidInPage; /* True if next rowid is first in page */
+ /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */
u8 bFirstTermInPage; /* True if next term will be first in leaf */
int nLeafWritten; /* Number of leaf pages written */
int nEmpty; /* Number of contiguous term-less nodes */
@@ -472,6 +443,9 @@ struct Fts5CResult {
** For each rowid on the page corresponding to the current term, the
** corresponding aRowidOffset[] entry is set to the byte offset of the
** start of the "position-list-size" field within the page.
+**
+** iTermIdx:
+** Index of current term on iTermLeafPgno.
*/
struct Fts5SegIter {
Fts5StructureSegment *pSeg; /* Segment to iterate through */
@@ -486,6 +460,9 @@ struct Fts5SegIter {
int iTermLeafPgno;
int iTermLeafOffset;
+ int iPgidxOff; /* Next offset in pgidx */
+ int iEndofDoclist;
+
/* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
int iRowidOffset; /* Current entry in aRowidOffset[] */
int nRowidOffset; /* Allocated size of aRowidOffset[] array */
@@ -500,10 +477,29 @@ struct Fts5SegIter {
int bDel; /* True if the delete flag is set */
};
+/*
+** Argument is a pointer to an Fts5Data structure that contains a
+** leaf page.
+*/
+#define ASSERT_SZLEAF_OK(x) assert( \
+ (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \
+)
+
#define FTS5_SEGITER_ONETERM 0x01
#define FTS5_SEGITER_REVERSE 0x02
+/*
+** Argument is a pointer to an Fts5Data structure that contains a leaf
+** page. This macro evaluates to true if the leaf contains no terms, or
+** false if it contains at least one term.
+*/
+#define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)
+
+#define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))
+
+#define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))
+
/*
** poslist:
** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
@@ -618,6 +614,11 @@ static int fts5BlobCompare(
}
#endif
+static int fts5LeafFirstTermOff(Fts5Data *pLeaf){
+ int ret;
+ fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret);
+ return ret;
+}
/*
** Close the read-only blob handle, if it is open.
@@ -679,7 +680,7 @@ static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
int nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING;
pRet = (Fts5Data*)sqlite3_malloc(nAlloc);
if( pRet ){
- pRet->n = nByte;
+ pRet->nn = nByte;
aOut = pRet->p = (u8*)&pRet[1];
}else{
rc = SQLITE_NOMEM;
@@ -691,6 +692,9 @@ static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
if( rc!=SQLITE_OK ){
sqlite3_free(pRet);
pRet = 0;
+ }else{
+ /* TODO1: Fix this */
+ pRet->szLeaf = fts5GetU16(&pRet->p[2]);
}
}
p->rc = rc;
@@ -785,8 +789,8 @@ static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){
** Remove all records associated with segment iSegid.
*/
static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){
- i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0, 0);
- i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0, 0)-1;
+ i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0);
+ i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1;
fts5DataDelete(p, iFirst, iLast);
if( p->pIdxDeleter==0 ){
Fts5Config *pConfig = p->pConfig;
@@ -883,7 +887,6 @@ static int fts5StructureDecode(
pLvl->nSeg = nTotal;
for(iSeg=0; iSeg<nTotal; iSeg++){
i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].iSegid);
- i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].nHeight);
i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst);
i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast);
}
@@ -974,8 +977,9 @@ static Fts5Structure *fts5StructureRead(Fts5Index *p){
pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
if( p->rc ) return 0;
- memset(&pData->p[pData->n], 0, FTS5_DATA_PADDING);
- p->rc = fts5StructureDecode(pData->p, pData->n, &iCookie, &pRet);
+ /* TODO: Do we need this if the leaf-index is appended? Probably... */
+ memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING);
+ p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet);
if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){
p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
}
@@ -1039,7 +1043,6 @@ static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){
for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid);
- fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].nHeight);
fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst);
fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast);
}
@@ -1128,8 +1131,9 @@ static void fts5StructurePromote(
int szPromote = 0; /* Promote anything this size or smaller */
Fts5StructureSegment *pSeg; /* Segment just written */
int szSeg; /* Size of segment just written */
+ int nSeg = pStruct->aLevel[iLvl].nSeg;
-
+ if( nSeg==0 ) return;
pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1];
szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst);
@@ -1178,11 +1182,11 @@ static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){
pLvl->iFirstOff = pLvl->iOff;
}else{
int iOff;
- for(iOff=pLvl->iOff; iOff<pData->n; iOff++){
+ for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){
if( pData->p[iOff] ) break;
}
- if( iOff<pData->n ){
+ if( iOff<pData->nn ){
i64 iVal;
pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1;
iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal);
@@ -1425,6 +1429,7 @@ static void fts5SegIterNextPage(
Fts5Index *p, /* FTS5 backend object */
Fts5SegIter *pIter /* Iterator to advance to next page */
){
+ Fts5Data *pLeaf;
Fts5StructureSegment *pSeg = pIter->pSeg;
fts5DataRelease(pIter->pLeaf);
pIter->iLeafPgno++;
@@ -1434,11 +1439,23 @@ static void fts5SegIterNextPage(
pIter->pNextLeaf = 0;
}else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
pIter->pLeaf = fts5DataRead(p,
- FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, pIter->iLeafPgno)
+ FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)
);
}else{
pIter->pLeaf = 0;
}
+ pLeaf = pIter->pLeaf;
+
+ if( pLeaf ){
+ pIter->iPgidxOff = pLeaf->szLeaf;
+ if( fts5LeafIsTermless(pLeaf) ){
+ pIter->iEndofDoclist = pLeaf->nn+1;
+ }else{
+ pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],
+ pIter->iEndofDoclist
+ );
+ }
+ }
}
/*
@@ -1470,7 +1487,8 @@ static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){
static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
if( p->rc==SQLITE_OK ){
int iOff = pIter->iLeafOffset; /* Offset to read at */
- if( iOff>=pIter->pLeaf->n ){
+ ASSERT_SZLEAF_OK(pIter->pLeaf);
+ if( iOff>=pIter->pLeaf->szLeaf ){
p->rc = FTS5_CORRUPT;
}else{
const u8 *a = &pIter->pLeaf->p[iOff];
@@ -1483,7 +1501,8 @@ static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
int iOff = pIter->iLeafOffset;
- if( iOff>=pIter->pLeaf->n ){
+ ASSERT_SZLEAF_OK(pIter->pLeaf);
+ if( iOff>=pIter->pLeaf->szLeaf ){
fts5SegIterNextPage(p, pIter);
if( pIter->pLeaf==0 ){
if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
@@ -1524,6 +1543,14 @@ static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
pIter->iTermLeafPgno = pIter->iLeafPgno;
pIter->iLeafOffset = iOff;
+ if( pIter->iPgidxOff>=pIter->pLeaf->nn ){
+ pIter->iEndofDoclist = pIter->pLeaf->nn+1;
+ }else{
+ int nExtra;
+ pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra);
+ pIter->iEndofDoclist += nExtra;
+ }
+
fts5SegIterLoadRowid(p, pIter);
}
@@ -1558,8 +1585,10 @@ static void fts5SegIterInit(
}
if( p->rc==SQLITE_OK ){
- u8 *a = pIter->pLeaf->p;
- pIter->iLeafOffset = fts5GetU16(&a[2]);
+ pIter->iLeafOffset = 4;
+ assert_nc( pIter->pLeaf->nn>4 );
+ assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 );
+ pIter->iPgidxOff = pIter->pLeaf->szLeaf+1;
fts5SegIterLoadTerm(p, pIter, 0);
fts5SegIterLoadNPos(p, pIter);
}
@@ -1581,11 +1610,16 @@ static void fts5SegIterInit(
** byte of the position list content associated with said rowid.
*/
static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
- int n = pIter->pLeaf->n;
+ int n = pIter->pLeaf->szLeaf;
int i = pIter->iLeafOffset;
u8 *a = pIter->pLeaf->p;
int iRowidOffset = 0;
+ if( n>pIter->iEndofDoclist ){
+ n = pIter->iEndofDoclist;
+ }
+
+ ASSERT_SZLEAF_OK(pIter->pLeaf);
while( 1 ){
i64 iDelta = 0;
int nPos;
@@ -1595,7 +1629,6 @@ static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
i += nPos;
if( i>=n ) break;
i += fts5GetVarint(&a[i], (u64*)&iDelta);
- if( iDelta==0 ) break;
pIter->iRowid += iDelta;
if( iRowidOffset>=pIter->nRowidOffset ){
@@ -1629,17 +1662,17 @@ static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){
Fts5Data *pNew;
pIter->iLeafPgno--;
pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
- pIter->pSeg->iSegid, 0, pIter->iLeafPgno
+ pIter->pSeg->iSegid, pIter->iLeafPgno
));
if( pNew ){
if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
- if( pIter->iTermLeafOffset<pNew->n ){
+ if( pIter->iTermLeafOffset<pNew->szLeaf ){
pIter->pLeaf = pNew;
pIter->iLeafOffset = pIter->iTermLeafOffset;
}
}else{
- int iRowidOff, dummy;
- fts5LeafHeader(pNew, &iRowidOff, &dummy);
+ int iRowidOff;
+ iRowidOff = fts5LeafFirstRowidOff(pNew);
if( iRowidOff ){
pIter->pLeaf = pNew;
pIter->iLeafOffset = iRowidOff;
@@ -1657,6 +1690,7 @@ static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){
}
if( pIter->pLeaf ){
+ pIter->iEndofDoclist = pIter->pLeaf->nn+1;
fts5SegIterReverseInitPage(p, pIter);
}
}
@@ -1712,26 +1746,27 @@ static void fts5SegIterNext(
/* Search for the end of the position list within the current page. */
u8 *a = pLeaf->p;
- int n = pLeaf->n;
+ int n = pLeaf->szLeaf;
+ ASSERT_SZLEAF_OK(pLeaf);
iOff = pIter->iLeafOffset + pIter->nPos;
if( iOff<n ){
- /* The next entry is on the current page */
- u64 iDelta;
- iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
- pIter->iLeafOffset = iOff;
- if( iDelta==0 ){
+ /* The next entry is on the current page. */
+ assert_nc( iOff<=pIter->iEndofDoclist );
+ if( iOff>=pIter->iEndofDoclist ){
bNewTerm = 1;
- if( iOff>=n ){
- fts5SegIterNextPage(p, pIter);
- pIter->iLeafOffset = 4;
- }else if( iOff!=fts5GetU16(&a[2]) ){
- pIter->iLeafOffset += fts5GetVarint32(&a[iOff], nKeep);
+ if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
+ iOff += fts5GetVarint32(&a[iOff], nKeep);
}
}else{
+ u64 iDelta;
+ iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
pIter->iRowid += iDelta;
+ assert_nc( iDelta>0 );
}
+ pIter->iLeafOffset = iOff;
+
}else if( pIter->pSeg==0 ){
const u8 *pList = 0;
const char *zTerm = 0;
@@ -1745,7 +1780,9 @@ static void fts5SegIterNext(
pIter->pLeaf = 0;
}else{
pIter->pLeaf->p = (u8*)pList;
- pIter->pLeaf->n = nList;
+ pIter->pLeaf->nn = nList;
+ pIter->pLeaf->szLeaf = nList;
+ pIter->iEndofDoclist = nList+1;
sqlite3Fts5BufferSet(&p->rc, &pIter->term, strlen(zTerm), (u8*)zTerm);
pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
}
@@ -1756,15 +1793,27 @@ static void fts5SegIterNext(
fts5SegIterNextPage(p, pIter);
pLeaf = pIter->pLeaf;
if( pLeaf==0 ) break;
- if( (iOff = fts5GetU16(&pLeaf->p[0])) && iOff<pLeaf->n ){
+ ASSERT_SZLEAF_OK(pLeaf);
+ if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
pIter->iLeafOffset = iOff;
+
+ if( pLeaf->nn>pLeaf->szLeaf ){
+ pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
+ &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
+ );
+ }
+
}
- else if( (iOff = fts5GetU16(&pLeaf->p[2])) ){
+ else if( pLeaf->nn>pLeaf->szLeaf ){
+ pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
+ &pLeaf->p[pLeaf->szLeaf], iOff
+ );
pIter->iLeafOffset = iOff;
+ pIter->iEndofDoclist = iOff;
bNewTerm = 1;
}
- if( iOff>=pLeaf->n ){
+ if( iOff>=pLeaf->szLeaf ){
p->rc = FTS5_CORRUPT;
return;
}
@@ -1778,6 +1827,7 @@ static void fts5SegIterNext(
fts5DataRelease(pIter->pLeaf);
pIter->pLeaf = 0;
}else{
+ int nExtra;
fts5SegIterLoadTerm(p, pIter, nKeep);
fts5SegIterLoadNPos(p, pIter);
if( pbNewTerm ) *pbNewTerm = 1;
@@ -1805,7 +1855,7 @@ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
if( pDlidx ){
int iSegid = pIter->pSeg->iSegid;
pgnoLast = fts5DlidxIterPgno(pDlidx);
- pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, 0, pgnoLast));
+ pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast));
}else{
int iOff; /* Byte offset within pLeaf */
Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
@@ -1814,48 +1864,29 @@ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
** byte of position-list content for the current rowid. Back it up
** so that it points to the start of the position-list size field. */
pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel);
- iOff = pIter->iLeafOffset;
- assert( iOff>=4 );
-
- /* Search for a new term within the current leaf. If one can be found,
- ** then this page contains the largest rowid for the current term. */
- while( iOff<pLeaf->n ){
- int nPos;
- i64 iDelta;
- int bDummy;
-
- /* Read the position-list size field */
- iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy);
- iOff += nPos;
- if( iOff>=pLeaf->n ) break;
-
- /* Rowid delta. Or, if 0x00, the end of doclist marker. */
- nPos = fts5GetVarint(&pLeaf->p[iOff], (u64*)&iDelta);
- if( iDelta==0 ) break;
- iOff += nPos;
- }
/* If this condition is true then the largest rowid for the current
** term may not be stored on the current page. So search forward to
** see where said rowid really is. */
- if( iOff>=pLeaf->n ){
+ if( pIter->iEndofDoclist>=pLeaf->szLeaf ){
int pgno;
Fts5StructureSegment *pSeg = pIter->pSeg;
/* The last rowid in the doclist may not be on the current page. Search
** forward to find the page containing the last rowid. */
for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){
- i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, pgno);
+ i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
Fts5Data *pNew = fts5DataRead(p, iAbs);
if( pNew ){
- int iRowid, iTerm;
- fts5LeafHeader(pNew, &iRowid, &iTerm);
+ int iRowid, bTermless;
+ iRowid = fts5LeafFirstRowidOff(pNew);
+ bTermless = fts5LeafIsTermless(pNew);
if( iRowid ){
SWAPVAL(Fts5Data*, pNew, pLast);
pgnoLast = pgno;
}
fts5DataRelease(pNew);
- if( iTerm ) break;
+ if( bTermless==0 ) break;
}
}
}
@@ -1871,14 +1902,20 @@ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
** first rowid on this page.
*/
if( pLast ){
- int dummy;
int iOff;
fts5DataRelease(pIter->pLeaf);
pIter->pLeaf = pLast;
pIter->iLeafPgno = pgnoLast;
- fts5LeafHeader(pLast, &iOff, &dummy);
+ iOff = fts5LeafFirstRowidOff(pLast);
iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
pIter->iLeafOffset = iOff;
+
+ if( fts5LeafIsTermless(pLast) ){
+ pIter->iEndofDoclist = pLast->nn+1;
+ }else{
+ pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast);
+ }
+
}
fts5SegIterReverseInitPage(p, pIter);
@@ -1901,30 +1938,20 @@ static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){
/* Check if the current doclist ends on this page. If it does, return
** early without loading the doclist-index (as it belongs to a different
** term. */
- if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
- int iOff = pIter->iLeafOffset + pIter->nPos;
- while( iOff<pLeaf->n ){
- int bDummy;
- int nPos;
- i64 iDelta;
-
- /* iOff is currently the offset of the start of position list data */
- iOff += fts5GetVarint(&pLeaf->p[iOff], (u64*)&iDelta);
- if( iDelta==0 ) return;
- assert_nc( iOff<pLeaf->n );
- iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy);
- iOff += nPos;
- }
+ if( pIter->iTermLeafPgno==pIter->iLeafPgno
+ && pIter->iEndofDoclist<pLeaf->szLeaf
+ ){
+ return;
}
pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
}
#define fts5IndexGetVarint32(a, iOff, nVal) { \
- nVal = a[iOff++]; \
+ nVal = (a)[iOff++]; \
if( nVal & 0x80 ){ \
iOff--; \
- iOff += fts5GetVarint32(&a[iOff], nVal); \
+ iOff += fts5GetVarint32(&(a)[iOff], nVal); \
} \
}
@@ -1955,34 +1982,35 @@ static void fts5LeafSeek(
){
int iOff;
const u8 *a = pIter->pLeaf->p;
- int n = pIter->pLeaf->n;
+ int szLeaf = pIter->pLeaf->szLeaf;
+ int n = pIter->pLeaf->nn;
int nMatch = 0;
int nKeep = 0;
int nNew = 0;
+ int iTerm = 0;
+ int iTermOff;
+ int iPgidx; /* Current offset in pgidx */
+ int bEndOfPage = 0;
assert( p->rc==SQLITE_OK );
- assert( pIter->pLeaf );
- iOff = fts5GetU16(&a[2]);
- if( iOff<4 || iOff>=n ){
- p->rc = FTS5_CORRUPT;
- return;
- }
+ iPgidx = szLeaf;
+ iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff);
+ iOff = iTermOff;
while( 1 ){
- int i;
- int nCmp;
/* Figure out how many new bytes are in this term */
fts5IndexGetVarint32(a, iOff, nNew);
-
if( nKeep<nMatch ){
goto search_failed;
}
assert( nKeep>=nMatch );
if( nKeep==nMatch ){
+ int nCmp;
+ int i;
nCmp = MIN(nNew, nTerm-nMatch);
for(i=0; i<nCmp; i++){
if( a[iOff+i]!=pTerm[nMatch+i] ) break;
@@ -1999,29 +2027,15 @@ static void fts5LeafSeek(
goto search_failed;
}
}
- iOff += nNew;
-
- /* Skip past the doclist. If the end of the page is reached, bail out. */
- while( 1 ){
- int nPos;
- /* Skip past rowid delta */
- fts5IndexSkipVarint(a, iOff);
-
- /* Skip past position list */
- fts5IndexGetVarint32(a, iOff, nPos);
- iOff += (nPos >> 1);
- if( iOff>=(n-1) ){
- iOff = n;
- goto search_failed;
- }
+ if( iPgidx>=n ){
+ bEndOfPage = 1;
+ break;
+ }
- /* If this is the end of the doclist, break out of the loop */
- if( a[iOff]==0x00 ){
- iOff++;
- break;
- }
- };
+ iPgidx += fts5GetVarint32(&a[iPgidx], nKeep);
+ iTermOff += nKeep;
+ iOff = iTermOff;
/* Read the nKeep field of the next term. */
fts5IndexGetVarint32(a, iOff, nKeep);
@@ -2032,14 +2046,15 @@ static void fts5LeafSeek(
fts5DataRelease(pIter->pLeaf);
pIter->pLeaf = 0;
return;
- }else if( iOff>=n ){
+ }else if( bEndOfPage ){
do {
+ iTerm = 0;
fts5SegIterNextPage(p, pIter);
if( pIter->pLeaf==0 ) return;
a = pIter->pLeaf->p;
- iOff = fts5GetU16(&a[2]);
- if( iOff ){
- if( iOff<4 || iOff>=n ){
+ if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
+ fts5GetVarint32(&pIter->pLeaf->p[pIter->pLeaf->szLeaf], iOff);
+ if( iOff<4 || iOff>=pIter->pLeaf->szLeaf ){
p->rc = FTS5_CORRUPT;
}else{
nKeep = 0;
@@ -2051,6 +2066,7 @@ static void fts5LeafSeek(
}
search_success:
+
pIter->iLeafOffset = iOff + nNew;
pIter->iTermLeafOffset = pIter->iLeafOffset;
pIter->iTermLeafPgno = pIter->iLeafPgno;
@@ -2058,6 +2074,15 @@ static void fts5LeafSeek(
fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
+ if( iPgidx>=n ){
+ pIter->iEndofDoclist = pIter->pLeaf->nn+1;
+ }else{
+ int nExtra;
+ iPgidx += fts5GetVarint32(&a[iPgidx], nExtra);
+ pIter->iEndofDoclist = iTermOff + nExtra;
+ }
+ pIter->iPgidxOff = iPgidx;
+
fts5SegIterLoadRowid(p, pIter);
fts5SegIterLoadNPos(p, pIter);
}
@@ -2190,9 +2215,10 @@ static void fts5SegIterHashInit(
pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data));
if( pLeaf==0 ) return;
pLeaf->p = (u8*)pList;
- pLeaf->n = nList;
+ pLeaf->nn = pLeaf->szLeaf = nList;
pIter->pLeaf = pLeaf;
pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid);
+ pIter->iEndofDoclist = pLeaf->nn+1;
if( flags & FTS5INDEX_QUERY_DESC ){
pIter->flags |= FTS5_SEGITER_REVERSE;
@@ -2383,9 +2409,9 @@ static void fts5SegIterGotoPage(
if( p->rc==SQLITE_OK ){
int iOff;
u8 *a = pIter->pLeaf->p;
- int n = pIter->pLeaf->n;
+ int n = pIter->pLeaf->szLeaf;
- iOff = fts5GetU16(&a[0]);
+ iOff = fts5LeafFirstRowidOff(pIter->pLeaf);
if( iOff<4 || iOff>=n ){
p->rc = FTS5_CORRUPT;
}else{
@@ -2717,9 +2743,10 @@ static void fts5MultiIterNew2(
Fts5SegIter *pIter = &pNew->aSeg[1];
pIter->flags = FTS5_SEGITER_ONETERM;
- if( pData->n>0 ){
+ if( pData->szLeaf>0 ){
pIter->pLeaf = pData;
pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid);
+ pIter->iEndofDoclist = pData->nn;
pNew->aFirst[1].iFirst = 1;
if( bDesc ){
pNew->bRev = 1;
@@ -2797,7 +2824,7 @@ static void fts5ChunkIterate(
int nRem = pSeg->nPos; /* Number of bytes still to come */
Fts5Data *pData = 0;
u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
- int nChunk = MIN(nRem, pSeg->pLeaf->n - pSeg->iLeafOffset);
+ int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
int pgno = pSeg->iLeafPgno;
int pgnoSave = 0;
@@ -2813,10 +2840,10 @@ static void fts5ChunkIterate(
break;
}else{
pgno++;
- pData = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, 0, pgno));
+ pData = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno));
if( pData==0 ) break;
pChunk = &pData->p[4];
- nChunk = MIN(nRem, pData->n - 4);
+ nChunk = MIN(nRem, pData->szLeaf - 4);
if( pgno==pgnoSave ){
assert( pSeg->pNextLeaf==0 );
pSeg->pNextLeaf = pData;
@@ -3102,19 +3129,30 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
Fts5PageWriter *pPage = &pWriter->writer;
i64 iRowid;
+ assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) );
+
+ /* Set the szLeaf header field. */
+ assert( 0==fts5GetU16(&pPage->buf.p[2]) );
+ fts5PutU16(&pPage->buf.p[2], pPage->buf.n);
+
if( pWriter->bFirstTermInPage ){
/* No term was written to this page. */
- assert( 0==fts5GetU16(&pPage->buf.p[2]) );
+ assert( pPage->pgidx.n==0 );
fts5WriteBtreeNoTerm(p, pWriter);
+ }else{
+ /* Append the pgidx to the page buffer. Set the szLeaf header field. */
+ fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p);
}
- /* Write the current page to the db. */
- iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, 0, pPage->pgno);
+ /* Write the page out to disk */
+ iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno);
fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);
/* Initialize the next page. */
fts5BufferZero(&pPage->buf);
+ fts5BufferZero(&pPage->pgidx);
fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
+ pPage->iPrevPgidx = 0;
pPage->pgno++;
/* Increase the leaves written counter */
@@ -3139,20 +3177,31 @@ static void fts5WriteAppendTerm(
){
int nPrefix; /* Bytes of prefix compression for term */
Fts5PageWriter *pPage = &pWriter->writer;
+ Fts5Buffer *pPgidx = &pWriter->writer.pgidx;
- assert( pPage->buf.n==0 || pPage->buf.n>4 );
- if( pPage->buf.n==0 ){
- /* Zero the first term and first rowid fields */
- static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
- fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
- assert( pWriter->bFirstTermInPage );
- }
if( p->rc ) return;
+ assert( pPage->buf.n>=4 );
+ assert( pPage->buf.n>4 || pWriter->bFirstTermInPage );
+
+ /* If the current leaf page is full, flush it to disk. */
+ if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){
+ if( pPage->buf.n>4 ){
+ fts5WriteFlushLeaf(p, pWriter);
+ }
+ fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING);
+ }
+ /* TODO1: Updating pgidx here. */
+ pPgidx->n += sqlite3Fts5PutVarint(
+ &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx
+ );
+ pPage->iPrevPgidx = pPage->buf.n;
+#if 0
+ fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
+ pPgidx->n += 2;
+#endif
+
if( pWriter->bFirstTermInPage ){
- /* Update the "first term" field of the page header. */
- assert( pPage->buf.p[2]==0 && pPage->buf.p[3]==0 );
- fts5PutU16(&pPage->buf.p[2], pPage->buf.n);
nPrefix = 0;
if( pPage->pgno!=1 ){
/* This is the first term on a leaf that is not the leftmost leaf in
@@ -3194,11 +3243,6 @@ static void fts5WriteAppendTerm(
assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
pWriter->aDlidx[0].pgno = pPage->pgno;
-
- /* If the current leaf page is full, flush it to disk. */
- if( pPage->buf.n>=p->pConfig->pgsz ){
- fts5WriteFlushLeaf(p, pWriter);
- }
}
/*
@@ -3213,6 +3257,10 @@ static void fts5WriteAppendRowid(
if( p->rc==SQLITE_OK ){
Fts5PageWriter *pPage = &pWriter->writer;
+ if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){
+ fts5WriteFlushLeaf(p, pWriter);
+ }
+
/* If this is to be the first rowid written to the page, set the
** rowid-pointer in the page-header. Also append a value to the dlidx
** buffer, in case a doclist-index is required. */
@@ -3233,10 +3281,6 @@ static void fts5WriteAppendRowid(
pWriter->bFirstRowidInPage = 0;
fts5BufferAppendVarint(&p->rc, &pPage->buf, nPos);
-
- if( pPage->buf.n>=p->pConfig->pgsz ){
- fts5WriteFlushLeaf(p, pWriter);
- }
}
}
@@ -3251,8 +3295,10 @@ static void fts5WriteAppendPoslistData(
int n = nData;
assert( p->pConfig->pgsz>0 );
- while( p->rc==SQLITE_OK && (pPage->buf.n + n)>=p->pConfig->pgsz ){
- int nReq = p->pConfig->pgsz - pPage->buf.n;
+ while( p->rc==SQLITE_OK
+ && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz
+ ){
+ int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n;
int nCopy = 0;
while( nCopy<nReq ){
i64 dummy;
@@ -3279,7 +3325,6 @@ static void fts5WriteAppendZerobyte(Fts5Index *p, Fts5SegWriter *pWriter){
static void fts5WriteFinish(
Fts5Index *p,
Fts5SegWriter *pWriter, /* Writer object */
- int *pnHeight, /* OUT: Height of the b-tree */
int *pnLeaf /* OUT: Number of leaf pages in b-tree */
){
int i;
@@ -3287,7 +3332,6 @@ static void fts5WriteFinish(
if( p->rc==SQLITE_OK ){
if( pLeaf->pgno==1 && pLeaf->buf.n==0 ){
*pnLeaf = 0;
- *pnHeight = 0;
}else{
if( pLeaf->buf.n>4 ){
fts5WriteFlushLeaf(p, pWriter);
@@ -3295,11 +3339,11 @@ static void fts5WriteFinish(
*pnLeaf = pLeaf->pgno-1;
fts5WriteFlushBtree(p, pWriter);
- *pnHeight = 0;
}
}
fts5BufferFree(&pLeaf->term);
fts5BufferFree(&pLeaf->buf);
+ fts5BufferFree(&pLeaf->pgidx);
fts5BufferFree(&pWriter->btterm);
for(i=0; i<pWriter->nDlidx; i++){
@@ -3313,6 +3357,8 @@ static void fts5WriteInit(
Fts5SegWriter *pWriter,
int iSegid
){
+ const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING;
+
memset(pWriter, 0, sizeof(Fts5SegWriter));
pWriter->iSegid = iSegid;
@@ -3321,6 +3367,10 @@ static void fts5WriteInit(
pWriter->bFirstTermInPage = 1;
pWriter->iBtPage = 1;
+ /* Grow the two buffers to pgsz + padding bytes in size. */
+ fts5BufferGrow(&p->rc, &pWriter->writer.pgidx, nBuffer);
+ fts5BufferGrow(&p->rc, &pWriter->writer.buf, nBuffer);
+
if( p->pIdxWriter==0 ){
Fts5Config *pConfig = p->pConfig;
fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf(
@@ -3330,6 +3380,13 @@ static void fts5WriteInit(
}
if( p->rc==SQLITE_OK ){
+ /* Initialize the 4-byte leaf-page header to 0x00. */
+ memset(pWriter->writer.buf.p, 0, 4);
+ pWriter->writer.buf.n = 4;
+
+ /* Bind the current output segment id to the index-writer. This is an
+ ** optimization over binding the same value over and over as rows are
+ ** inserted into %_idx by the current writer. */
sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
}
}
@@ -3358,19 +3415,37 @@ static void fts5TrimSegments(Fts5Index *p, Fts5IndexIter *pIter){
i64 iLeafRowid;
Fts5Data *pData;
int iId = pSeg->pSeg->iSegid;
- u8 aHdr[4] = {0x00, 0x00, 0x00, 0x04};
+ u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00};
- iLeafRowid = FTS5_SEGMENT_ROWID(iId, 0, pSeg->iTermLeafPgno);
+ iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno);
pData = fts5DataRead(p, iLeafRowid);
if( pData ){
fts5BufferZero(&buf);
+ fts5BufferGrow(&p->rc, &buf, pData->nn);
fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr);
fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n);
fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p);
- fts5BufferAppendBlob(&p->rc, &buf, pData->n - iOff, &pData->p[iOff]);
+ fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff, &pData->p[iOff]);
+ if( p->rc==SQLITE_OK ){
+ /* Set the szLeaf field */
+ fts5PutU16(&buf.p[2], buf.n);
+ }
+
+ /* Set up the new page-index array */
+ fts5BufferAppendVarint(&p->rc, &buf, 4);
+ if( pSeg->iLeafPgno==pSeg->iTermLeafPgno
+ && pSeg->iEndofDoclist<pData->szLeaf
+ ){
+ int nDiff = pData->szLeaf - pSeg->iEndofDoclist;
+ fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4);
+ fts5BufferAppendBlob(&p->rc, &buf,
+ pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]
+ );
+ }
+
fts5DataRelease(pData);
pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
- fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 0, 1), iLeafRowid);
+ fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid);
fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
}
}
@@ -3470,8 +3545,9 @@ static void fts5IndexMergeLevel(
}
/* This is a new term. Append a term to the output segment. */
+ /* TODO2: Doclist 0x00 term */
if( bRequireDoclistTerm ){
- fts5WriteAppendZerobyte(p, &writer);
+ /* fts5WriteAppendZerobyte(p, &writer); */
}
fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
fts5BufferSet(&p->rc, &term, nTerm, pTerm);
@@ -3489,7 +3565,7 @@ static void fts5IndexMergeLevel(
/* Flush the last leaf page to disk. Set the output segment b-tree height
** and last leaf page number at the same time. */
- fts5WriteFinish(p, &writer, &pSeg->nHeight, &pSeg->pgnoLast);
+ fts5WriteFinish(p, &writer, &pSeg->pgnoLast);
if( fts5MultiIterEof(p, pIter) ){
int i;
@@ -3614,6 +3690,7 @@ static void fts5IndexCrisismerge(
assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 );
while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){
fts5IndexMergeLevel(p, &pStruct, iLvl, 0);
+ assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) );
fts5StructurePromote(p, iLvl+1, pStruct);
iLvl++;
}
@@ -3641,10 +3718,12 @@ static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
int ret;
u32 dummy;
ret = fts5GetVarint32(aBuf, dummy);
- while( 1 ){
- int i = fts5GetVarint32(&aBuf[ret], dummy);
- if( (ret + i) > nMax ) break;
- ret += i;
+ if( ret<nMax ){
+ while( 1 ){
+ int i = fts5GetVarint32(&aBuf[ret], dummy);
+ if( (ret + i) > nMax ) break;
+ ret += i;
+ }
}
return ret;
}
@@ -3677,75 +3756,39 @@ static void fts5FlushOneHash(Fts5Index *p){
const int pgsz = p->pConfig->pgsz;
Fts5StructureSegment *pSeg; /* New segment within pStruct */
- int nHeight; /* Height of new segment b-tree */
Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */
+ Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */
const u8 *zPrev = 0;
Fts5SegWriter writer;
fts5WriteInit(p, &writer, iSegid);
- /* Pre-allocate the buffer used to assemble leaf pages to the target
- ** page size. */
- assert( pgsz>0 );
pBuf = &writer.writer.buf;
- fts5BufferGrow(&p->rc, pBuf, pgsz + 20);
+ pPgidx = &writer.writer.pgidx;
+
+ /* fts5WriteInit() should have initialized the buffers to (most likely)
+ ** the maximum space required. */
+ assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) );
+ assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) );
/* Begin scanning through hash table entries. This loop runs once for each
** term/doclist currently stored within the hash table. */
if( p->rc==SQLITE_OK ){
- memset(pBuf->p, 0, 4);
- pBuf->n = 4;
p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
}
while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
const char *zTerm; /* Buffer containing term */
- int nTerm; /* Size of zTerm in bytes */
const u8 *pDoclist; /* Pointer to doclist for this term */
int nDoclist; /* Size of doclist in bytes */
int nSuffix; /* Size of term suffix */
+ /* Write the term for this entry to disk. */
sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
- nTerm = strlen(zTerm);
-
- /* Decide if the term will fit on the current leaf. If it will not,
- ** flush the leaf to disk here. */
- if( pBuf->n>4 && (pBuf->n + nTerm + 2) > pgsz ){
- fts5WriteFlushLeaf(p, &writer);
- pBuf = &writer.writer.buf;
- if( (nTerm + 32) > pBuf->nSpace ){
- fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n);
- if( p->rc ) break;
- }
- }
-
- /* Write the term to the leaf. And if it is the first on the leaf, and
- ** the leaf is not page number 1, push it up into the b-tree hierarchy
- ** as well. */
- if( writer.bFirstTermInPage==0 ){
- int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm);
- pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], nPre);
- nSuffix = nTerm - nPre;
- }else{
- fts5PutU16(&pBuf->p[2], pBuf->n);
- writer.bFirstTermInPage = 0;
- if( writer.writer.pgno!=1 ){
- int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm);
- fts5WriteBtreeTerm(p, &writer, nPre+1, (const u8*)zTerm);
- pBuf = &writer.writer.buf;
- assert( nPre<nTerm );
- }
- nSuffix = nTerm;
- }
- pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], nSuffix);
- fts5BufferSafeAppendBlob(pBuf, (const u8*)&zTerm[nTerm-nSuffix], nSuffix);
+ fts5WriteAppendTerm(p, &writer, strlen(zTerm), zTerm);
- /* We just wrote a term into page writer.aWriter[0].pgno. If a
- ** doclist-index is to be generated for this doclist, it will be
- ** associated with this page. */
- assert( writer.nDlidx>0 && writer.aDlidx[0].buf.n==0 );
- writer.aDlidx[0].pgno = writer.writer.pgno;
-
- if( pgsz>=(pBuf->n + nDoclist + 1) ){
+ if( writer.bFirstRowidInPage==0
+ && pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1)
+ ){
/* The entire doclist will fit on the current leaf. */
fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
}else{
@@ -3753,7 +3796,7 @@ static void fts5FlushOneHash(Fts5Index *p){
i64 iDelta = 0;
int iOff = 0;
- writer.bFirstRowidInPage = 0;
+ /* writer.bFirstRowidInPage = 0; */
/* The entire doclist will not fit on this leaf. The following
** loop iterates through the poslists that make up the current
@@ -3777,7 +3820,7 @@ static void fts5FlushOneHash(Fts5Index *p){
}
assert( pBuf->n<=pBuf->nSpace );
- if( (pBuf->n + nCopy) <= pgsz ){
+ if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
/* The entire poslist will fit on the current leaf. So copy
** it in one go. */
fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
@@ -3788,7 +3831,7 @@ static void fts5FlushOneHash(Fts5Index *p){
const u8 *pPoslist = &pDoclist[iOff];
int iPos = 0;
while( p->rc==SQLITE_OK ){
- int nSpace = pgsz - pBuf->n;
+ int nSpace = pgsz - pBuf->n - pPgidx->n;
int n = 0;
if( (nCopy - iPos)<=nSpace ){
n = nCopy - iPos;
@@ -3798,9 +3841,8 @@ static void fts5FlushOneHash(Fts5Index *p){
assert( n>0 );
fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
iPos += n;
- if( pBuf->n>=pgsz ){
+ if( (pBuf->n + pPgidx->n)>=pgsz ){
fts5WriteFlushLeaf(p, &writer);
- pBuf = &writer.writer.buf;
}
if( iPos>=nCopy ) break;
}
@@ -3809,13 +3851,14 @@ static void fts5FlushOneHash(Fts5Index *p){
}
}
- pBuf->p[pBuf->n++] = '\0';
+ /* TODO2: Doclist terminator written here. */
+ /* pBuf->p[pBuf->n++] = '\0'; */
assert( pBuf->n<=pBuf->nSpace );
zPrev = (const u8*)zTerm;
sqlite3Fts5HashScanNext(pHash);
}
sqlite3Fts5HashClear(pHash);
- fts5WriteFinish(p, &writer, &nHeight, &pgnoLast);
+ fts5WriteFinish(p, &writer, &pgnoLast);
/* Update the Fts5Structure. It is written back to the database by the
** fts5StructureRelease() call below. */
@@ -3826,7 +3869,6 @@ static void fts5FlushOneHash(Fts5Index *p){
if( p->rc==SQLITE_OK ){
pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
pSeg->iSegid = iSegid;
- pSeg->nHeight = nHeight;
pSeg->pgnoFirst = 1;
pSeg->pgnoLast = pgnoLast;
pStruct->nSegment++;
@@ -3928,7 +3970,10 @@ static void fts5PoslistCallback(
void *pCtx,
const u8 *pChunk, int nChunk
){
- fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pCtx, nChunk, pChunk);
+ assert_nc( nChunk>=0 );
+ if( nChunk>0 ){
+ fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pCtx, nChunk, pChunk);
+ }
}
/*
@@ -4163,7 +4208,7 @@ static void fts5SetupPrefixIter(
pData = fts5IdxMalloc(p, sizeof(Fts5Data) + doclist.n);
if( pData ){
pData->p = (u8*)&pData[1];
- pData->n = doclist.n;
+ pData->nn = pData->szLeaf = doclist.n;
memcpy(pData->p, doclist.p, doclist.n);
fts5MultiIterNew2(p, pData, bDesc, ppIter);
}
@@ -4393,7 +4438,12 @@ int sqlite3Fts5IndexQuery(
memcpy(&buf.p[1], pToken, nToken);
#ifdef SQLITE_DEBUG
- if( flags & FTS5INDEX_QUERY_TEST_NOIDX ){
+ /* If the QUERY_TEST_NOIDX flag was specified, then this must be a
+ ** prefix-query. Instead of using a prefix-index (if one exists),
+ ** evaluate the prefix query using the main FTS index. This is used
+ ** for internal sanity checking by the integrity-check in debug
+ ** mode only. */
+ if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){
assert( flags & FTS5INDEX_QUERY_PREFIX );
iIdx = 1+pConfig->nPrefix;
}else
@@ -4513,7 +4563,7 @@ int sqlite3Fts5IterPoslist(
assert( pIter->pIndex->rc==SQLITE_OK );
*piRowid = pSeg->iRowid;
*pn = pSeg->nPos;
- if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->n ){
+ if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->szLeaf ){
*pp = &pSeg->pLeaf->p[pSeg->iLeafOffset];
}else{
fts5BufferZero(&pIter->poslist);
@@ -4561,11 +4611,11 @@ int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){
*pnRow = 0;
memset(anSize, 0, sizeof(i64) * nCol);
pData = fts5DataRead(p, FTS5_AVERAGES_ROWID);
- if( p->rc==SQLITE_OK && pData->n ){
+ if( p->rc==SQLITE_OK && pData->nn ){
int i = 0;
int iCol;
i += fts5GetVarint(&pData->p[i], (u64*)pnRow);
- for(iCol=0; i<pData->n && iCol<nCol; iCol++){
+ for(iCol=0; i<pData->nn && iCol<nCol; iCol++){
i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]);
}
}
@@ -4770,18 +4820,25 @@ static void fts5TestTerm(
if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
/* If this is a prefix query, check that the results returned if the
- ** the index is disabled are the same. In both ASC and DESC order. */
- if( iIdx>0 && rc==SQLITE_OK ){
- int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
- ck2 = 0;
- rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
- if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
- }
- if( iIdx>0 && rc==SQLITE_OK ){
- int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC;
- ck2 = 0;
- rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
- if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
+ ** the index is disabled are the same. In both ASC and DESC order.
+ **
+ ** This check may only be performed if the hash table is empty. This
+ ** is because the hash table only supports a single scan query at
+ ** a time, and the multi-iter loop from which this function is called
+ ** is already performing such a scan. */
+ if( p->nPendingData==0 ){
+ if( iIdx>0 && rc==SQLITE_OK ){
+ int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
+ ck2 = 0;
+ rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
+ if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
+ }
+ if( iIdx>0 && rc==SQLITE_OK ){
+ int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC;
+ ck2 = 0;
+ rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
+ if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
+ }
}
cksum3 ^= ck1;
@@ -4820,16 +4877,67 @@ static void fts5IndexIntegrityCheckEmpty(
/* Now check that the iter.nEmpty leaves following the current leaf
** (a) exist and (b) contain no terms. */
for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){
- Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, i));
+ Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i));
if( pLeaf ){
- if( 0!=fts5GetU16(&pLeaf->p[2]) ) p->rc = FTS5_CORRUPT;
- if( i>=iNoRowid && 0!=fts5GetU16(&pLeaf->p[0]) ) p->rc = FTS5_CORRUPT;
+ if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT;
+ if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT;
}
fts5DataRelease(pLeaf);
if( p->rc ) break;
}
}
+static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){
+ int nPg = (pLeaf->nn - pLeaf->szLeaf) / 2;
+ int iTermOff = 0;
+ int ii;
+
+ Fts5Buffer buf1 = {0,0,0};
+ Fts5Buffer buf2 = {0,0,0};
+
+ ii = pLeaf->szLeaf;
+ while( ii<pLeaf->nn && p->rc==SQLITE_OK ){
+ int res;
+ int iOff;
+ int nIncr;
+
+ ii += fts5GetVarint32(&pLeaf->p[ii], nIncr);
+ iTermOff += nIncr;
+ iOff = iTermOff;
+
+ if( iOff>=pLeaf->szLeaf ){
+ p->rc = FTS5_CORRUPT;
+ }else if( iTermOff==nIncr ){
+ int nByte;
+ iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
+ if( (iOff+nByte)>pLeaf->szLeaf ){
+ p->rc = FTS5_CORRUPT;
+ }else{
+ fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
+ }
+ }else{
+ int nKeep, nByte;
+ iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep);
+ iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
+ if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){
+ p->rc = FTS5_CORRUPT;
+ }else{
+ buf1.n = nKeep;
+ fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
+ }
+
+ if( p->rc==SQLITE_OK ){
+ res = fts5BufferCompare(&buf1, &buf2);
+ if( res<=0 ) p->rc = FTS5_CORRUPT;
+ }
+ }
+ fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p);
+ }
+
+ fts5BufferFree(&buf1);
+ fts5BufferFree(&buf2);
+}
+
static void fts5IndexIntegrityCheckSegment(
Fts5Index *p, /* FTS5 backend object */
Fts5StructureSegment *pSeg /* Segment to check internal consistency */
@@ -4851,7 +4959,6 @@ static void fts5IndexIntegrityCheckSegment(
while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
i64 iRow; /* Rowid for this leaf */
Fts5Data *pLeaf; /* Data for this leaf */
- int iOff; /* Offset of first term on leaf */
int nIdxTerm = sqlite3_column_bytes(pStmt, 1);
const char *zIdxTerm = (const char*)sqlite3_column_text(pStmt, 1);
@@ -4861,7 +4968,7 @@ static void fts5IndexIntegrityCheckSegment(
/* If the leaf in question has already been trimmed from the segment,
** ignore this b-tree entry. Otherwise, load it into memory. */
if( iIdxLeaf<pSeg->pgnoFirst ) continue;
- iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, iIdxLeaf);
+ iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf);
pLeaf = fts5DataRead(p, iRow);
if( pLeaf==0 ) break;
@@ -4869,15 +4976,16 @@ static void fts5IndexIntegrityCheckSegment(
** to or larger than the split-key in zIdxTerm. Also check that if there
** is also a rowid pointer within the leaf page header, it points to a
** location before the term. */
- iOff = fts5GetU16(&pLeaf->p[2]);
- if( iOff==0 ){
+ if( pLeaf->nn<=pLeaf->szLeaf ){
p->rc = FTS5_CORRUPT;
}else{
- int iRowidOff;
+ int iOff; /* Offset of first term on leaf */
+ int iRowidOff; /* Offset of first rowid on leaf */
int nTerm; /* Size of term on leaf in bytes */
int res; /* Comparison of term and split-key */
- iRowidOff = fts5GetU16(&pLeaf->p[0]);
+ iOff = fts5LeafFirstTermOff(pLeaf);
+ iRowidOff = fts5LeafFirstRowidOff(pLeaf);
if( iRowidOff>=iOff ){
p->rc = FTS5_CORRUPT;
}else{
@@ -4886,6 +4994,8 @@ static void fts5IndexIntegrityCheckSegment(
if( res==0 ) res = nTerm - nIdxTerm;
if( res<0 ) p->rc = FTS5_CORRUPT;
}
+
+ fts5IntegrityCheckPgidx(p, pLeaf);
}
fts5DataRelease(pLeaf);
if( p->rc ) break;
@@ -4913,10 +5023,10 @@ static void fts5IndexIntegrityCheckSegment(
/* Check any rowid-less pages that occur before the current leaf. */
for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){
- iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPg);
+ iKey = FTS5_SEGMENT_ROWID(iSegid, iPg);
pLeaf = fts5DataRead(p, iKey);
if( pLeaf ){
- if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT;
+ if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT;
fts5DataRelease(pLeaf);
}
}
@@ -4924,12 +5034,13 @@ static void fts5IndexIntegrityCheckSegment(
/* Check that the leaf page indicated by the iterator really does
** contain the rowid suggested by the same. */
- iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPrevLeaf);
+ iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf);
pLeaf = fts5DataRead(p, iKey);
if( pLeaf ){
i64 iRowid;
- int iRowidOff = fts5GetU16(&pLeaf->p[0]);
- if( iRowidOff>=pLeaf->n ){
+ int iRowidOff = fts5LeafFirstRowidOff(pLeaf);
+ ASSERT_SZLEAF_OK(pLeaf);
+ if( iRowidOff>=pLeaf->szLeaf ){
p->rc = FTS5_CORRUPT;
}else{
fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
@@ -5130,9 +5241,8 @@ static void fts5DebugStructure(
);
for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
- sqlite3Fts5BufferAppendPrintf(pRc, pBuf,
- " {id=%d h=%d leaves=%d..%d}", pSeg->iSegid, pSeg->nHeight,
- pSeg->pgnoFirst, pSeg->pgnoLast
+ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d}",
+ pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast
);
}
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
@@ -5193,8 +5303,10 @@ static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
i64 iDocid;
int iOff = 0;
- iOff = sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDocid);
- sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid);
+ if( n>0 ){
+ iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid);
+ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
+ }
while( iOff<n ){
int nPos;
int bDummy;
@@ -5205,7 +5317,7 @@ static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
if( iDelta==0 ) return iOff;
iDocid += iDelta;
- sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid);
+ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
}
}
@@ -5231,13 +5343,18 @@ static void fts5DecodeFunction(
assert( nArg==2 );
memset(&s, 0, sizeof(Fts5Buffer));
iRowid = sqlite3_value_int64(apVal[0]);
+
+ /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[]
+ ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
+ ** buffer overreads even if the record is corrupt. */
n = sqlite3_value_bytes(apVal[1]);
aBlob = sqlite3_value_blob(apVal[1]);
-
nSpace = n + FTS5_DATA_ZERO_PADDING;
a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
if( a==0 ) goto decode_out;
memcpy(a, aBlob, n);
+
+
fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno);
fts5DebugRowid(&rc, &s, iRowid);
@@ -5246,7 +5363,7 @@ static void fts5DecodeFunction(
Fts5DlidxLvl lvl;
dlidx.p = a;
- dlidx.n = n;
+ dlidx.nn = n;
memset(&lvl, 0, sizeof(Fts5DlidxLvl));
lvl.pData = &dlidx;
@@ -5264,52 +5381,73 @@ static void fts5DecodeFunction(
fts5DecodeStructure(&rc, &s, a, n);
}
}else{
- Fts5Buffer term;
+ Fts5Buffer term; /* Current term read from page */
+ int szLeaf; /* Offset of pgidx in a[] */
+ int iPgidxOff;
+ int iPgidxPrev = 0; /* Previous value read from pgidx */
int iTermOff = 0;
int iRowidOff = 0;
int iOff;
- int nKeep = 0;
+ int nDoclist;
memset(&term, 0, sizeof(Fts5Buffer));
- if( n>=4 ){
- iRowidOff = fts5GetU16(&a[0]);
- iTermOff = fts5GetU16(&a[2]);
- }else{
+ if( n<4 ){
sqlite3Fts5BufferSet(&rc, &s, 8, (const u8*)"corrupt");
goto decode_out;
+ }else{
+ iRowidOff = fts5GetU16(&a[0]);
+ iPgidxOff = szLeaf = fts5GetU16(&a[2]);
+ if( iPgidxOff<n ){
+ fts5GetVarint32(&a[iPgidxOff], iTermOff);
+ }
}
- if( iRowidOff ){
+ /* Decode the position list tail at the start of the page */
+ if( iRowidOff!=0 ){
iOff = iRowidOff;
- }else if( iTermOff ){
+ }else if( iTermOff!=0 ){
iOff = iTermOff;
}else{
- iOff = n;
+ iOff = szLeaf;
}
fts5DecodePoslist(&rc, &s, &a[4], iOff-4);
- assert( iRowidOff==0 || iOff==iRowidOff );
- if( iRowidOff ){
- iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff);
- }
+ /* Decode any more doclist data that appears on the page before the
+ ** first term. */
+ nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff;
+ fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist);
+
+ while( iPgidxOff<n ){
+ int bFirst = (iPgidxOff==szLeaf); /* True for first term on page */
+ int nByte; /* Bytes of data */
+ int iEnd;
+
+ iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte);
+ iPgidxPrev += nByte;
+ iOff = iPgidxPrev;
+
+ if( iPgidxOff<n ){
+ fts5GetVarint32(&a[iPgidxOff], nByte);
+ iEnd = iPgidxPrev + nByte;
+ }else{
+ iEnd = szLeaf;
+ }
- assert( iTermOff==0 || iOff==iTermOff );
- while( iOff<n ){
- int nByte;
+ if( bFirst==0 ){
+ iOff += fts5GetVarint32(&a[iOff], nByte);
+ term.n = nByte;
+ }
iOff += fts5GetVarint32(&a[iOff], nByte);
- term.n= nKeep;
fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
iOff += nByte;
sqlite3Fts5BufferAppendPrintf(
&rc, &s, " term=%.*s", term.n, (const char*)term.p
- );
- iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff);
- if( iOff<n ){
- iOff += fts5GetVarint32(&a[iOff], nKeep);
- }
+ );
+ iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff);
}
+
fts5BufferFree(&term);
}
@@ -5339,22 +5477,19 @@ static void fts5RowidFunction(
if( 0==sqlite3_stricmp(zArg, "segment") ){
i64 iRowid;
int segid, height, pgno;
- if( nArg!=4 ){
+ if( nArg!=3 ){
sqlite3_result_error(pCtx,
- "should be: fts5_rowid('segment', segid, height, pgno))", -1
+ "should be: fts5_rowid('segment', segid, pgno))", -1
);
}else{
segid = sqlite3_value_int(apVal[1]);
- height = sqlite3_value_int(apVal[2]);
- pgno = sqlite3_value_int(apVal[3]);
- iRowid = FTS5_SEGMENT_ROWID(segid, height, pgno);
+ pgno = sqlite3_value_int(apVal[2]);
+ iRowid = FTS5_SEGMENT_ROWID(segid, pgno);
sqlite3_result_int64(pCtx, iRowid);
}
- }else {
+ }else{
sqlite3_result_error(pCtx,
- "first arg to fts5_rowid() must be 'segment' "
- "or 'start-of-index'"
- , -1
+ "first arg to fts5_rowid() must be 'segment'" , -1
);
}
}