aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordan <dan@noemail.net>2015-09-10 17:23:37 +0000
committerdan <dan@noemail.net>2015-09-10 17:23:37 +0000
commit33c47f565cd80b9c0a94b47bbdd794393cc158fe (patch)
tree10c9667a5762f7fa608e538ebf9450f3493d98aa
parent383de6918c42b6a8d927234be6a3c81689acd5cd (diff)
parent204debf3a3f1b084b4182b10e3c2427c41b92bb6 (diff)
downloadsqlite-33c47f565cd80b9c0a94b47bbdd794393cc158fe.tar.gz
sqlite-33c47f565cd80b9c0a94b47bbdd794393cc158fe.zip
Modify the fts5 leaf page format to permit faster seek operations. This is a file-format change. Any existing databases can be upgraded by running the fts5 'rebuild' command.
FossilOrigin-Name: 0c0c4ae971e54efc526eed7bd071c90dfadb95ff
-rw-r--r--ext/fts5/fts5Int.h12
-rw-r--r--ext/fts5/fts5_buffer.c6
-rw-r--r--ext/fts5/fts5_config.c3
-rw-r--r--ext/fts5/fts5_index.c847
-rw-r--r--ext/fts5/fts5_main.c4
-rw-r--r--ext/fts5/test/fts5aa.test4
-rw-r--r--ext/fts5/test/fts5ad.test3
-rw-r--r--ext/fts5/test/fts5al.test6
-rw-r--r--ext/fts5/test/fts5corrupt.test4
-rw-r--r--ext/fts5/test/fts5corrupt2.test10
-rw-r--r--ext/fts5/test/fts5rowid.test8
-rw-r--r--ext/fts5/test/fts5simple.test173
-rw-r--r--ext/fts5/test/fts5version.test10
-rw-r--r--ext/fts5/tool/loadfts5.tcl18
-rw-r--r--manifest39
-rw-r--r--manifest.uuid2
16 files changed, 747 insertions, 402 deletions
diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h
index 837ecb1cc..c7f724eab 100644
--- a/ext/fts5/fts5Int.h
+++ b/ext/fts5/fts5Int.h
@@ -117,6 +117,12 @@ typedef struct Fts5Config Fts5Config;
** bColumnsize:
** True if the %_docsize table is created.
**
+** bPrefixIndex:
+** This is only used for debugging. If set to false, any prefix indexes
+** are ignored. This value is configured using:
+**
+** INSERT INTO tbl(tbl, rank) VALUES('prefix-index', $bPrefixIndex);
+**
*/
struct Fts5Config {
sqlite3 *db; /* Database handle */
@@ -145,10 +151,14 @@ struct Fts5Config {
/* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */
char **pzErrmsg;
+
+#ifdef SQLITE_DEBUG
+ int bPrefixIndex; /* True to use prefix-indexes */
+#endif
};
/* Current expected value of %_config table 'version' field */
-#define FTS5_CURRENT_VERSION 3
+#define FTS5_CURRENT_VERSION 4
#define FTS5_CONTENT_NORMAL 0
#define FTS5_CONTENT_NONE 1
diff --git a/ext/fts5/fts5_buffer.c b/ext/fts5/fts5_buffer.c
index 07e1243c3..1a7c0d0f8 100644
--- a/ext/fts5/fts5_buffer.c
+++ b/ext/fts5/fts5_buffer.c
@@ -16,12 +16,14 @@
#include "fts5Int.h"
int sqlite3Fts5BufferGrow(int *pRc, Fts5Buffer *pBuf, int nByte){
- /* A no-op if an error has already occurred */
- if( *pRc ) return 1;
if( (pBuf->n + nByte) > pBuf->nSpace ){
u8 *pNew;
int nNew = pBuf->nSpace ? pBuf->nSpace*2 : 64;
+
+ /* A no-op if an error has already occurred */
+ if( *pRc ) return 1;
+
while( nNew<(pBuf->n + nByte) ){
nNew = nNew * 2;
}
diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c
index 74faf6dd3..19e3d3ab1 100644
--- a/ext/fts5/fts5_config.c
+++ b/ext/fts5/fts5_config.c
@@ -480,6 +480,9 @@ int sqlite3Fts5ConfigParse(
pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1);
pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1);
pRet->bColumnsize = 1;
+#ifdef SQLITE_DEBUG
+ pRet->bPrefixIndex = 1;
+#endif
if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){
*pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName);
rc = SQLITE_ERROR;
diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c
index 5bf4feba9..cd3402418 100644
--- a/ext/fts5/fts5_index.c
+++ b/ext/fts5/fts5_index.c
@@ -87,7 +87,6 @@
** + total number of segments in level.
** + for each segment from oldest to newest:
** + segment id (always > 0)
-** + b-tree height (1 -> root is leaf, 2 -> root is parent of leaf etc.)
** + first leaf page number (often 1, always greater than 0)
** + final leaf page number
**
@@ -95,15 +94,16 @@
**
** A single record within the %_data table. The data is a list of varints.
** The first value is the number of rows in the index. Then, for each column
-** from left to right, the total number of tokens in the column for all
+** from left to right, the total number of tokens in the column for all
** rows of the table.
**
** 3. Segment leaves:
**
-** TERM DOCLIST FORMAT:
+** TERM/DOCLIST FORMAT:
**
** Most of each segment leaf is taken up by term/doclist data. The
-** general format of the term/doclist data is:
+** general format of term/doclist, starting with the first term
+** on the leaf page, is:
**
** varint : size of first term
** blob: first term data
@@ -123,7 +123,6 @@
** varint: rowid delta (always > 0)
** poslist: next poslist
** }
-** 0x00 byte
**
** poslist format:
**
@@ -143,11 +142,28 @@
** varint: offset delta + 2
** }
**
-** PAGINATION
+** PAGE FORMAT
**
-** The format described above is only accurate if the entire term/doclist
-** data fits on a single leaf page. If this is not the case, the format
-** is changed in two ways:
+** Each leaf page begins with a 4-byte header containing 2 16-bit
+** unsigned integer fields in big-endian format. They are:
+**
+** * The byte offset of the first rowid on the page, if it exists
+** and occurs before the first term (otherwise 0).
+**
+** * The byte offset of the start of the page footer. If the page
+** footer is 0 bytes in size, then this field is the same as the
+** size of the leaf page in bytes.
+**
+** The page footer consists of a single varint for each term located
+** on the page. Each varint is the byte offset of the current term
+** within the page, delta-compressed against the previous value. In
+** other words, the first varint in the footer is the byte offset of
+** the first term, the second is the byte offset of the second less that
+** of the first, and so on.
+**
+** The term/doclist format described above is accurate if the entire
+** term/doclist data fits on a single leaf page. If this is not the case,
+** the format is changed in two ways:
**
** + if the first rowid on a page occurs before the first term, it
** is stored as a literal value:
@@ -160,45 +176,6 @@
** varint : size of first term
** blob: first term data
**
-** Each leaf page begins with:
-**
-** + 2-byte unsigned containing offset to first rowid (or 0).
-** + 2-byte unsigned containing offset to first term (or 0).
-**
-** Followed by term/doclist data.
-**
-** 4. Segment interior nodes:
-**
-** The interior nodes turn the list of leaves into a b+tree.
-**
-** Each interior node begins with a varint - the page number of the left
-** most child node. Following this, for each leaf page except the first,
-** the interior nodes contain:
-**
-** a) If the leaf page contains at least one term, then a term-prefix that
-** is greater than all previous terms, and less than or equal to the
-** first term on the leaf page.
-**
-** b) If the leaf page no terms, a record indicating how many consecutive
-** leaves contain no terms, and whether or not there is an associated
-** by-rowid index record.
-**
-** By definition, there is never more than one type (b) record in a row.
-** Type (b) records only ever appear on height=1 pages - immediate parents
-** of leaves. Only type (a) records are pushed to higher levels.
-**
-** Term format:
-**
-** * Number of bytes in common with previous term plus 2, as a varint.
-** * Number of bytes of new term data, as a varint.
-** * new term data.
-**
-** No-term format:
-**
-** * either an 0x00 or 0x01 byte. If the value 0x01 is used, then there
-** is an associated index-by-rowid record.
-** * the number of zero-term leaves as a varint.
-**
** 5. Segment doclist indexes:
**
** Doclist indexes are themselves b-trees, however they usually consist of
@@ -237,28 +214,19 @@
#define FTS5_STRUCTURE_ROWID 10 /* The structure record */
/*
-** Macros determining the rowids used by segment nodes. All nodes in all
-** segments for all indexes (the regular FTS index and any prefix indexes)
-** are stored in the %_data table with large positive rowids.
-**
-** The %_data table may contain up to (1<<FTS5_SEGMENT_INDEX_BITS)
-** indexes - one regular term index and zero or more prefix indexes.
+** Macros determining the rowids used by segment leaves and dlidx leaves
+** and nodes. All nodes and leaves are stored in the %_data table with large
+** positive rowids.
**
-** Each segment in an index has a unique id greater than zero.
+** Each segment has a unique non-zero 16-bit id.
**
-** Each node in a segment b-tree is assigned a "page number" that is unique
-** within nodes of its height within the segment (leaf nodes have a height
-** of 0, parents 1, etc.). Page numbers are allocated sequentially so that
-** a nodes page number is always one more than its left sibling.
-**
-** The rowid for a node is then found using the FTS5_SEGMENT_ROWID() macro
-** below. The FTS5_SEGMENT_*_BITS macros define the number of bits used
-** to encode the three FTS5_SEGMENT_ROWID() arguments. This module returns
-** SQLITE_FULL and fails the current operation if they ever prove too small.
+** The rowid for each segment leaf is found by passing the segment id and
+** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered
+** sequentially starting from 1.
*/
#define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */
#define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */
-#define FTS5_DATA_HEIGHT_B 5 /* Max b-tree height of 32 */
+#define FTS5_DATA_HEIGHT_B 5 /* Max dlidx tree height of 32 */
#define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */
#define fts5_dri(segid, dlidx, height, pgno) ( \
@@ -268,8 +236,8 @@
((i64)(pgno)) \
)
-#define FTS5_SEGMENT_ROWID(segid, height, pgno) fts5_dri(segid, 0, height, pgno)
-#define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno)
+#define FTS5_SEGMENT_ROWID(segid, pgno) fts5_dri(segid, 0, 0, pgno)
+#define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno)
/*
** Maximum segments permitted in a single index
@@ -303,7 +271,8 @@ typedef struct Fts5StructureSegment Fts5StructureSegment;
struct Fts5Data {
u8 *p; /* Pointer to buffer containing record */
- int n; /* Size of record in bytes */
+ int nn; /* Size of record in bytes */
+ int szLeaf; /* Size of leaf without page-index */
};
/*
@@ -355,7 +324,6 @@ struct Fts5DoclistIter {
*/
struct Fts5StructureSegment {
int iSegid; /* Segment id */
- int nHeight; /* Height of segment b-tree */
int pgnoFirst; /* First leaf page number in segment */
int pgnoLast; /* Last leaf page number in segment */
};
@@ -377,7 +345,9 @@ struct Fts5Structure {
*/
struct Fts5PageWriter {
int pgno; /* Page number for this page */
- Fts5Buffer buf; /* Buffer containing page data */
+ int iPrevPgidx; /* Previous value written into pgidx */
+ Fts5Buffer buf; /* Buffer containing leaf data */
+ Fts5Buffer pgidx; /* Buffer containing page-index */
Fts5Buffer term; /* Buffer containing previous term on page */
};
struct Fts5DlidxWriter {
@@ -392,6 +362,7 @@ struct Fts5SegWriter {
i64 iPrevRowid; /* Previous rowid written to current leaf */
u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */
u8 bFirstRowidInPage; /* True if next rowid is first in page */
+ /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */
u8 bFirstTermInPage; /* True if next term will be first in leaf */
int nLeafWritten; /* Number of leaf pages written */
int nEmpty; /* Number of contiguous term-less nodes */
@@ -472,6 +443,9 @@ struct Fts5CResult {
** For each rowid on the page corresponding to the current term, the
** corresponding aRowidOffset[] entry is set to the byte offset of the
** start of the "position-list-size" field within the page.
+**
+** iTermIdx:
+** Index of current term on iTermLeafPgno.
*/
struct Fts5SegIter {
Fts5StructureSegment *pSeg; /* Segment to iterate through */
@@ -486,6 +460,9 @@ struct Fts5SegIter {
int iTermLeafPgno;
int iTermLeafOffset;
+ int iPgidxOff; /* Next offset in pgidx */
+ int iEndofDoclist;
+
/* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
int iRowidOffset; /* Current entry in aRowidOffset[] */
int nRowidOffset; /* Allocated size of aRowidOffset[] array */
@@ -500,10 +477,29 @@ struct Fts5SegIter {
int bDel; /* True if the delete flag is set */
};
+/*
+** Argument is a pointer to an Fts5Data structure that contains a
+** leaf page.
+*/
+#define ASSERT_SZLEAF_OK(x) assert( \
+ (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \
+)
+
#define FTS5_SEGITER_ONETERM 0x01
#define FTS5_SEGITER_REVERSE 0x02
+/*
+** Argument is a pointer to an Fts5Data structure that contains a leaf
+** page. This macro evaluates to true if the leaf contains no terms, or
+** false if it contains at least one term.
+*/
+#define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)
+
+#define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))
+
+#define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))
+
/*
** poslist:
** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
@@ -618,6 +614,11 @@ static int fts5BlobCompare(
}
#endif
+static int fts5LeafFirstTermOff(Fts5Data *pLeaf){
+ int ret;
+ fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret);
+ return ret;
+}
/*
** Close the read-only blob handle, if it is open.
@@ -679,7 +680,7 @@ static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
int nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING;
pRet = (Fts5Data*)sqlite3_malloc(nAlloc);
if( pRet ){
- pRet->n = nByte;
+ pRet->nn = nByte;
aOut = pRet->p = (u8*)&pRet[1];
}else{
rc = SQLITE_NOMEM;
@@ -691,6 +692,9 @@ static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
if( rc!=SQLITE_OK ){
sqlite3_free(pRet);
pRet = 0;
+ }else{
+ /* TODO1: Fix this */
+ pRet->szLeaf = fts5GetU16(&pRet->p[2]);
}
}
p->rc = rc;
@@ -785,8 +789,8 @@ static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){
** Remove all records associated with segment iSegid.
*/
static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){
- i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0, 0);
- i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0, 0)-1;
+ i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0);
+ i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1;
fts5DataDelete(p, iFirst, iLast);
if( p->pIdxDeleter==0 ){
Fts5Config *pConfig = p->pConfig;
@@ -883,7 +887,6 @@ static int fts5StructureDecode(
pLvl->nSeg = nTotal;
for(iSeg=0; iSeg<nTotal; iSeg++){
i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].iSegid);
- i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].nHeight);
i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst);
i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast);
}
@@ -974,8 +977,9 @@ static Fts5Structure *fts5StructureRead(Fts5Index *p){
pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
if( p->rc ) return 0;
- memset(&pData->p[pData->n], 0, FTS5_DATA_PADDING);
- p->rc = fts5StructureDecode(pData->p, pData->n, &iCookie, &pRet);
+ /* TODO: Do we need this if the leaf-index is appended? Probably... */
+ memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING);
+ p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet);
if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){
p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
}
@@ -1039,7 +1043,6 @@ static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){
for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid);
- fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].nHeight);
fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst);
fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast);
}
@@ -1128,8 +1131,9 @@ static void fts5StructurePromote(
int szPromote = 0; /* Promote anything this size or smaller */
Fts5StructureSegment *pSeg; /* Segment just written */
int szSeg; /* Size of segment just written */
+ int nSeg = pStruct->aLevel[iLvl].nSeg;
-
+ if( nSeg==0 ) return;
pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1];
szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst);
@@ -1178,11 +1182,11 @@ static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){
pLvl->iFirstOff = pLvl->iOff;
}else{
int iOff;
- for(iOff=pLvl->iOff; iOff<pData->n; iOff++){
+ for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){
if( pData->p[iOff] ) break;
}
- if( iOff<pData->n ){
+ if( iOff<pData->nn ){
i64 iVal;
pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1;
iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal);
@@ -1425,6 +1429,7 @@ static void fts5SegIterNextPage(
Fts5Index *p, /* FTS5 backend object */
Fts5SegIter *pIter /* Iterator to advance to next page */
){
+ Fts5Data *pLeaf;
Fts5StructureSegment *pSeg = pIter->pSeg;
fts5DataRelease(pIter->pLeaf);
pIter->iLeafPgno++;
@@ -1434,11 +1439,23 @@ static void fts5SegIterNextPage(
pIter->pNextLeaf = 0;
}else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
pIter->pLeaf = fts5DataRead(p,
- FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, pIter->iLeafPgno)
+ FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)
);
}else{
pIter->pLeaf = 0;
}
+ pLeaf = pIter->pLeaf;
+
+ if( pLeaf ){
+ pIter->iPgidxOff = pLeaf->szLeaf;
+ if( fts5LeafIsTermless(pLeaf) ){
+ pIter->iEndofDoclist = pLeaf->nn+1;
+ }else{
+ pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],
+ pIter->iEndofDoclist
+ );
+ }
+ }
}
/*
@@ -1470,7 +1487,8 @@ static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){
static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
if( p->rc==SQLITE_OK ){
int iOff = pIter->iLeafOffset; /* Offset to read at */
- if( iOff>=pIter->pLeaf->n ){
+ ASSERT_SZLEAF_OK(pIter->pLeaf);
+ if( iOff>=pIter->pLeaf->szLeaf ){
p->rc = FTS5_CORRUPT;
}else{
const u8 *a = &pIter->pLeaf->p[iOff];
@@ -1483,7 +1501,8 @@ static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
int iOff = pIter->iLeafOffset;
- if( iOff>=pIter->pLeaf->n ){
+ ASSERT_SZLEAF_OK(pIter->pLeaf);
+ if( iOff>=pIter->pLeaf->szLeaf ){
fts5SegIterNextPage(p, pIter);
if( pIter->pLeaf==0 ){
if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
@@ -1524,6 +1543,14 @@ static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
pIter->iTermLeafPgno = pIter->iLeafPgno;
pIter->iLeafOffset = iOff;
+ if( pIter->iPgidxOff>=pIter->pLeaf->nn ){
+ pIter->iEndofDoclist = pIter->pLeaf->nn+1;
+ }else{
+ int nExtra;
+ pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra);
+ pIter->iEndofDoclist += nExtra;
+ }
+
fts5SegIterLoadRowid(p, pIter);
}
@@ -1558,8 +1585,10 @@ static void fts5SegIterInit(
}
if( p->rc==SQLITE_OK ){
- u8 *a = pIter->pLeaf->p;
- pIter->iLeafOffset = fts5GetU16(&a[2]);
+ pIter->iLeafOffset = 4;
+ assert_nc( pIter->pLeaf->nn>4 );
+ assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 );
+ pIter->iPgidxOff = pIter->pLeaf->szLeaf+1;
fts5SegIterLoadTerm(p, pIter, 0);
fts5SegIterLoadNPos(p, pIter);
}
@@ -1581,11 +1610,16 @@ static void fts5SegIterInit(
** byte of the position list content associated with said rowid.
*/
static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
- int n = pIter->pLeaf->n;
+ int n = pIter->pLeaf->szLeaf;
int i = pIter->iLeafOffset;
u8 *a = pIter->pLeaf->p;
int iRowidOffset = 0;
+ if( n>pIter->iEndofDoclist ){
+ n = pIter->iEndofDoclist;
+ }
+
+ ASSERT_SZLEAF_OK(pIter->pLeaf);
while( 1 ){
i64 iDelta = 0;
int nPos;
@@ -1595,7 +1629,6 @@ static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
i += nPos;
if( i>=n ) break;
i += fts5GetVarint(&a[i], (u64*)&iDelta);
- if( iDelta==0 ) break;
pIter->iRowid += iDelta;
if( iRowidOffset>=pIter->nRowidOffset ){
@@ -1629,17 +1662,17 @@ static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){
Fts5Data *pNew;
pIter->iLeafPgno--;
pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
- pIter->pSeg->iSegid, 0, pIter->iLeafPgno
+ pIter->pSeg->iSegid, pIter->iLeafPgno
));
if( pNew ){
if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
- if( pIter->iTermLeafOffset<pNew->n ){
+ if( pIter->iTermLeafOffset<pNew->szLeaf ){
pIter->pLeaf = pNew;
pIter->iLeafOffset = pIter->iTermLeafOffset;
}
}else{
- int iRowidOff, dummy;
- fts5LeafHeader(pNew, &iRowidOff, &dummy);
+ int iRowidOff;
+ iRowidOff = fts5LeafFirstRowidOff(pNew);
if( iRowidOff ){
pIter->pLeaf = pNew;
pIter->iLeafOffset = iRowidOff;
@@ -1657,6 +1690,7 @@ static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){
}
if( pIter->pLeaf ){
+ pIter->iEndofDoclist = pIter->pLeaf->nn+1;
fts5SegIterReverseInitPage(p, pIter);
}
}
@@ -1712,26 +1746,27 @@ static void fts5SegIterNext(
/* Search for the end of the position list within the current page. */
u8 *a = pLeaf->p;
- int n = pLeaf->n;
+ int n = pLeaf->szLeaf;
+ ASSERT_SZLEAF_OK(pLeaf);
iOff = pIter->iLeafOffset + pIter->nPos;
if( iOff<n ){
- /* The next entry is on the current page */
- u64 iDelta;
- iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
- pIter->iLeafOffset = iOff;
- if( iDelta==0 ){
+ /* The next entry is on the current page. */
+ assert_nc( iOff<=pIter->iEndofDoclist );
+ if( iOff>=pIter->iEndofDoclist ){
bNewTerm = 1;
- if( iOff>=n ){
- fts5SegIterNextPage(p, pIter);
- pIter->iLeafOffset = 4;
- }else if( iOff!=fts5GetU16(&a[2]) ){
- pIter->iLeafOffset += fts5GetVarint32(&a[iOff], nKeep);
+ if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
+ iOff += fts5GetVarint32(&a[iOff], nKeep);
}
}else{
+ u64 iDelta;
+ iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
pIter->iRowid += iDelta;
+ assert_nc( iDelta>0 );
}
+ pIter->iLeafOffset = iOff;
+
}else if( pIter->pSeg==0 ){
const u8 *pList = 0;
const char *zTerm = 0;
@@ -1745,7 +1780,9 @@ static void fts5SegIterNext(
pIter->pLeaf = 0;
}else{
pIter->pLeaf->p = (u8*)pList;
- pIter->pLeaf->n = nList;
+ pIter->pLeaf->nn = nList;
+ pIter->pLeaf->szLeaf = nList;
+ pIter->iEndofDoclist = nList+1;
sqlite3Fts5BufferSet(&p->rc, &pIter->term, strlen(zTerm), (u8*)zTerm);
pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
}
@@ -1756,15 +1793,27 @@ static void fts5SegIterNext(
fts5SegIterNextPage(p, pIter);
pLeaf = pIter->pLeaf;
if( pLeaf==0 ) break;
- if( (iOff = fts5GetU16(&pLeaf->p[0])) && iOff<pLeaf->n ){
+ ASSERT_SZLEAF_OK(pLeaf);
+ if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
pIter->iLeafOffset = iOff;
+
+ if( pLeaf->nn>pLeaf->szLeaf ){
+ pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
+ &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
+ );
+ }
+
}
- else if( (iOff = fts5GetU16(&pLeaf->p[2])) ){
+ else if( pLeaf->nn>pLeaf->szLeaf ){
+ pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
+ &pLeaf->p[pLeaf->szLeaf], iOff
+ );
pIter->iLeafOffset = iOff;
+ pIter->iEndofDoclist = iOff;
bNewTerm = 1;
}
- if( iOff>=pLeaf->n ){
+ if( iOff>=pLeaf->szLeaf ){
p->rc = FTS5_CORRUPT;
return;
}
@@ -1778,6 +1827,7 @@ static void fts5SegIterNext(
fts5DataRelease(pIter->pLeaf);
pIter->pLeaf = 0;
}else{
+ int nExtra;
fts5SegIterLoadTerm(p, pIter, nKeep);
fts5SegIterLoadNPos(p, pIter);
if( pbNewTerm ) *pbNewTerm = 1;
@@ -1805,7 +1855,7 @@ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
if( pDlidx ){
int iSegid = pIter->pSeg->iSegid;
pgnoLast = fts5DlidxIterPgno(pDlidx);
- pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, 0, pgnoLast));
+ pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast));
}else{
int iOff; /* Byte offset within pLeaf */
Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
@@ -1814,48 +1864,29 @@ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
** byte of position-list content for the current rowid. Back it up
** so that it points to the start of the position-list size field. */
pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel);
- iOff = pIter->iLeafOffset;
- assert( iOff>=4 );
-
- /* Search for a new term within the current leaf. If one can be found,
- ** then this page contains the largest rowid for the current term. */
- while( iOff<pLeaf->n ){
- int nPos;
- i64 iDelta;
- int bDummy;
-
- /* Read the position-list size field */
- iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy);
- iOff += nPos;
- if( iOff>=pLeaf->n ) break;
-
- /* Rowid delta. Or, if 0x00, the end of doclist marker. */
- nPos = fts5GetVarint(&pLeaf->p[iOff], (u64*)&iDelta);
- if( iDelta==0 ) break;
- iOff += nPos;
- }
/* If this condition is true then the largest rowid for the current
** term may not be stored on the current page. So search forward to
** see where said rowid really is. */
- if( iOff>=pLeaf->n ){
+ if( pIter->iEndofDoclist>=pLeaf->szLeaf ){
int pgno;
Fts5StructureSegment *pSeg = pIter->pSeg;
/* The last rowid in the doclist may not be on the current page. Search
** forward to find the page containing the last rowid. */
for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){
- i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, pgno);
+ i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
Fts5Data *pNew = fts5DataRead(p, iAbs);
if( pNew ){
- int iRowid, iTerm;
- fts5LeafHeader(pNew, &iRowid, &iTerm);
+ int iRowid, bTermless;
+ iRowid = fts5LeafFirstRowidOff(pNew);
+ bTermless = fts5LeafIsTermless(pNew);
if( iRowid ){
SWAPVAL(Fts5Data*, pNew, pLast);
pgnoLast = pgno;
}
fts5DataRelease(pNew);
- if( iTerm ) break;
+ if( bTermless==0 ) break;
}
}
}
@@ -1871,14 +1902,20 @@ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
** first rowid on this page.
*/
if( pLast ){
- int dummy;
int iOff;
fts5DataRelease(pIter->pLeaf);
pIter->pLeaf = pLast;
pIter->iLeafPgno = pgnoLast;
- fts5LeafHeader(pLast, &iOff, &dummy);
+ iOff = fts5LeafFirstRowidOff(pLast);
iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
pIter->iLeafOffset = iOff;
+
+ if( fts5LeafIsTermless(pLast) ){
+ pIter->iEndofDoclist = pLast->nn+1;
+ }else{
+ pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast);
+ }
+
}
fts5SegIterReverseInitPage(p, pIter);
@@ -1901,30 +1938,20 @@ static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){
/* Check if the current doclist ends on this page. If it does, return
** early without loading the doclist-index (as it belongs to a different
** term. */
- if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
- int iOff = pIter->iLeafOffset + pIter->nPos;
- while( iOff<pLeaf->n ){
- int bDummy;
- int nPos;
- i64 iDelta;
-
- /* iOff is currently the offset of the start of position list data */
- iOff += fts5GetVarint(&pLeaf->p[iOff], (u64*)&iDelta);
- if( iDelta==0 ) return;
- assert_nc( iOff<pLeaf->n );
- iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy);
- iOff += nPos;
- }
+ if( pIter->iTermLeafPgno==pIter->iLeafPgno
+ && pIter->iEndofDoclist<pLeaf->szLeaf
+ ){
+ return;
}
pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
}
#define fts5IndexGetVarint32(a, iOff, nVal) { \
- nVal = a[iOff++]; \
+ nVal = (a)[iOff++]; \
if( nVal & 0x80 ){ \
iOff--; \
- iOff += fts5GetVarint32(&a[iOff], nVal); \
+ iOff += fts5GetVarint32(&(a)[iOff], nVal); \
} \
}
@@ -1955,34 +1982,35 @@ static void fts5LeafSeek(
){
int iOff;
const u8 *a = pIter->pLeaf->p;
- int n = pIter->pLeaf->n;
+ int szLeaf = pIter->pLeaf->szLeaf;
+ int n = pIter->pLeaf->nn;
int nMatch = 0;
int nKeep = 0;
int nNew = 0;
+ int iTerm = 0;
+ int iTermOff;
+ int iPgidx; /* Current offset in pgidx */
+ int bEndOfPage = 0;
assert( p->rc==SQLITE_OK );
- assert( pIter->pLeaf );
- iOff = fts5GetU16(&a[2]);
- if( iOff<4 || iOff>=n ){
- p->rc = FTS5_CORRUPT;
- return;
- }
+ iPgidx = szLeaf;
+ iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff);
+ iOff = iTermOff;
while( 1 ){
- int i;
- int nCmp;
/* Figure out how many new bytes are in this term */
fts5IndexGetVarint32(a, iOff, nNew);
-
if( nKeep<nMatch ){
goto search_failed;
}
assert( nKeep>=nMatch );
if( nKeep==nMatch ){
+ int nCmp;
+ int i;
nCmp = MIN(nNew, nTerm-nMatch);
for(i=0; i<nCmp; i++){
if( a[iOff+i]!=pTerm[nMatch+i] ) break;
@@ -1999,29 +2027,15 @@ static void fts5LeafSeek(
goto search_failed;
}
}
- iOff += nNew;
-
- /* Skip past the doclist. If the end of the page is reached, bail out. */
- while( 1 ){
- int nPos;
- /* Skip past rowid delta */
- fts5IndexSkipVarint(a, iOff);
-
- /* Skip past position list */
- fts5IndexGetVarint32(a, iOff, nPos);
- iOff += (nPos >> 1);
- if( iOff>=(n-1) ){
- iOff = n;
- goto search_failed;
- }
+ if( iPgidx>=n ){
+ bEndOfPage = 1;
+ break;
+ }
- /* If this is the end of the doclist, break out of the loop */
- if( a[iOff]==0x00 ){
- iOff++;
- break;
- }
- };
+ iPgidx += fts5GetVarint32(&a[iPgidx], nKeep);
+ iTermOff += nKeep;
+ iOff = iTermOff;
/* Read the nKeep field of the next term. */
fts5IndexGetVarint32(a, iOff, nKeep);
@@ -2032,14 +2046,15 @@ static void fts5LeafSeek(
fts5DataRelease(pIter->pLeaf);
pIter->pLeaf = 0;
return;
- }else if( iOff>=n ){
+ }else if( bEndOfPage ){
do {
+ iTerm = 0;
fts5SegIterNextPage(p, pIter);
if( pIter->pLeaf==0 ) return;
a = pIter->pLeaf->p;
- iOff = fts5GetU16(&a[2]);
- if( iOff ){
- if( iOff<4 || iOff>=n ){
+ if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
+ fts5GetVarint32(&pIter->pLeaf->p[pIter->pLeaf->szLeaf], iOff);
+ if( iOff<4 || iOff>=pIter->pLeaf->szLeaf ){
p->rc = FTS5_CORRUPT;
}else{
nKeep = 0;
@@ -2051,6 +2066,7 @@ static void fts5LeafSeek(
}
search_success:
+
pIter->iLeafOffset = iOff + nNew;
pIter->iTermLeafOffset = pIter->iLeafOffset;
pIter->iTermLeafPgno = pIter->iLeafPgno;
@@ -2058,6 +2074,15 @@ static void fts5LeafSeek(
fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
+ if( iPgidx>=n ){
+ pIter->iEndofDoclist = pIter->pLeaf->nn+1;
+ }else{
+ int nExtra;
+ iPgidx += fts5GetVarint32(&a[iPgidx], nExtra);
+ pIter->iEndofDoclist = iTermOff + nExtra;
+ }
+ pIter->iPgidxOff = iPgidx;
+
fts5SegIterLoadRowid(p, pIter);
fts5SegIterLoadNPos(p, pIter);
}
@@ -2190,9 +2215,10 @@ static void fts5SegIterHashInit(
pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data));
if( pLeaf==0 ) return;
pLeaf->p = (u8*)pList;
- pLeaf->n = nList;
+ pLeaf->nn = pLeaf->szLeaf = nList;
pIter->pLeaf = pLeaf;
pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid);
+ pIter->iEndofDoclist = pLeaf->nn+1;
if( flags & FTS5INDEX_QUERY_DESC ){
pIter->flags |= FTS5_SEGITER_REVERSE;
@@ -2383,9 +2409,9 @@ static void fts5SegIterGotoPage(
if( p->rc==SQLITE_OK ){
int iOff;
u8 *a = pIter->pLeaf->p;
- int n = pIter->pLeaf->n;
+ int n = pIter->pLeaf->szLeaf;
- iOff = fts5GetU16(&a[0]);
+ iOff = fts5LeafFirstRowidOff(pIter->pLeaf);
if( iOff<4 || iOff>=n ){
p->rc = FTS5_CORRUPT;
}else{
@@ -2717,9 +2743,10 @@ static void fts5MultiIterNew2(
Fts5SegIter *pIter = &pNew->aSeg[1];
pIter->flags = FTS5_SEGITER_ONETERM;
- if( pData->n>0 ){
+ if( pData->szLeaf>0 ){
pIter->pLeaf = pData;
pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid);
+ pIter->iEndofDoclist = pData->nn;
pNew->aFirst[1].iFirst = 1;
if( bDesc ){
pNew->bRev = 1;
@@ -2797,7 +2824,7 @@ static void fts5ChunkIterate(
int nRem = pSeg->nPos; /* Number of bytes still to come */
Fts5Data *pData = 0;
u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
- int nChunk = MIN(nRem, pSeg->pLeaf->n - pSeg->iLeafOffset);
+ int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
int pgno = pSeg->iLeafPgno;
int pgnoSave = 0;
@@ -2813,10 +2840,10 @@ static void fts5ChunkIterate(
break;
}else{
pgno++;
- pData = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, 0, pgno));
+ pData = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno));
if( pData==0 ) break;
pChunk = &pData->p[4];
- nChunk = MIN(nRem, pData->n - 4);
+ nChunk = MIN(nRem, pData->szLeaf - 4);
if( pgno==pgnoSave ){
assert( pSeg->pNextLeaf==0 );
pSeg->pNextLeaf = pData;
@@ -3102,19 +3129,30 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
Fts5PageWriter *pPage = &pWriter->writer;
i64 iRowid;
+ assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) );
+
+ /* Set the szLeaf header field. */
+ assert( 0==fts5GetU16(&pPage->buf.p[2]) );
+ fts5PutU16(&pPage->buf.p[2], pPage->buf.n);
+
if( pWriter->bFirstTermInPage ){
/* No term was written to this page. */
- assert( 0==fts5GetU16(&pPage->buf.p[2]) );
+ assert( pPage->pgidx.n==0 );
fts5WriteBtreeNoTerm(p, pWriter);
+ }else{
+ /* Append the pgidx to the page buffer. Set the szLeaf header field. */
+ fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p);
}
- /* Write the current page to the db. */
- iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, 0, pPage->pgno);
+ /* Write the page out to disk */
+ iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno);
fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);
/* Initialize the next page. */
fts5BufferZero(&pPage->buf);
+ fts5BufferZero(&pPage->pgidx);
fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
+ pPage->iPrevPgidx = 0;
pPage->pgno++;
/* Increase the leaves written counter */
@@ -3139,20 +3177,31 @@ static void fts5WriteAppendTerm(
){
int nPrefix; /* Bytes of prefix compression for term */
Fts5PageWriter *pPage = &pWriter->writer;
+ Fts5Buffer *pPgidx = &pWriter->writer.pgidx;
- assert( pPage->buf.n==0 || pPage->buf.n>4 );
- if( pPage->buf.n==0 ){
- /* Zero the first term and first rowid fields */
- static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
- fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
- assert( pWriter->bFirstTermInPage );
- }
if( p->rc ) return;
+ assert( pPage->buf.n>=4 );
+ assert( pPage->buf.n>4 || pWriter->bFirstTermInPage );
+
+ /* If the current leaf page is full, flush it to disk. */
+ if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){
+ if( pPage->buf.n>4 ){
+ fts5WriteFlushLeaf(p, pWriter);
+ }
+ fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING);
+ }
+ /* TODO1: Updating pgidx here. */
+ pPgidx->n += sqlite3Fts5PutVarint(
+ &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx
+ );
+ pPage->iPrevPgidx = pPage->buf.n;
+#if 0
+ fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
+ pPgidx->n += 2;
+#endif
+
if( pWriter->bFirstTermInPage ){
- /* Update the "first term" field of the page header. */
- assert( pPage->buf.p[2]==0 && pPage->buf.p[3]==0 );
- fts5PutU16(&pPage->buf.p[2], pPage->buf.n);
nPrefix = 0;
if( pPage->pgno!=1 ){
/* This is the first term on a leaf that is not the leftmost leaf in
@@ -3194,11 +3243,6 @@ static void fts5WriteAppendTerm(
assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
pWriter->aDlidx[0].pgno = pPage->pgno;
-
- /* If the current leaf page is full, flush it to disk. */
- if( pPage->buf.n>=p->pConfig->pgsz ){
- fts5WriteFlushLeaf(p, pWriter);
- }
}
/*
@@ -3213,6 +3257,10 @@ static void fts5WriteAppendRowid(
if( p->rc==SQLITE_OK ){
Fts5PageWriter *pPage = &pWriter->writer;
+ if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){
+ fts5WriteFlushLeaf(p, pWriter);
+ }
+
/* If this is to be the first rowid written to the page, set the
** rowid-pointer in the page-header. Also append a value to the dlidx
** buffer, in case a doclist-index is required. */
@@ -3233,10 +3281,6 @@ static void fts5WriteAppendRowid(
pWriter->bFirstRowidInPage = 0;
fts5BufferAppendVarint(&p->rc, &pPage->buf, nPos);
-
- if( pPage->buf.n>=p->pConfig->pgsz ){
- fts5WriteFlushLeaf(p, pWriter);
- }
}
}
@@ -3251,8 +3295,10 @@ static void fts5WriteAppendPoslistData(
int n = nData;
assert( p->pConfig->pgsz>0 );
- while( p->rc==SQLITE_OK && (pPage->buf.n + n)>=p->pConfig->pgsz ){
- int nReq = p->pConfig->pgsz - pPage->buf.n;
+ while( p->rc==SQLITE_OK
+ && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz
+ ){
+ int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n;
int nCopy = 0;
while( nCopy<nReq ){
i64 dummy;
@@ -3279,7 +3325,6 @@ static void fts5WriteAppendZerobyte(Fts5Index *p, Fts5SegWriter *pWriter){
static void fts5WriteFinish(
Fts5Index *p,
Fts5SegWriter *pWriter, /* Writer object */
- int *pnHeight, /* OUT: Height of the b-tree */
int *pnLeaf /* OUT: Number of leaf pages in b-tree */
){
int i;
@@ -3287,7 +3332,6 @@ static void fts5WriteFinish(
if( p->rc==SQLITE_OK ){
if( pLeaf->pgno==1 && pLeaf->buf.n==0 ){
*pnLeaf = 0;
- *pnHeight = 0;
}else{
if( pLeaf->buf.n>4 ){
fts5WriteFlushLeaf(p, pWriter);
@@ -3295,11 +3339,11 @@ static void fts5WriteFinish(
*pnLeaf = pLeaf->pgno-1;
fts5WriteFlushBtree(p, pWriter);
- *pnHeight = 0;
}
}
fts5BufferFree(&pLeaf->term);
fts5BufferFree(&pLeaf->buf);
+ fts5BufferFree(&pLeaf->pgidx);
fts5BufferFree(&pWriter->btterm);
for(i=0; i<pWriter->nDlidx; i++){
@@ -3313,6 +3357,8 @@ static void fts5WriteInit(
Fts5SegWriter *pWriter,
int iSegid
){
+ const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING;
+
memset(pWriter, 0, sizeof(Fts5SegWriter));
pWriter->iSegid = iSegid;
@@ -3321,6 +3367,10 @@ static void fts5WriteInit(
pWriter->bFirstTermInPage = 1;
pWriter->iBtPage = 1;
+ /* Grow the two buffers to pgsz + padding bytes in size. */
+ fts5BufferGrow(&p->rc, &pWriter->writer.pgidx, nBuffer);
+ fts5BufferGrow(&p->rc, &pWriter->writer.buf, nBuffer);
+
if( p->pIdxWriter==0 ){
Fts5Config *pConfig = p->pConfig;
fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf(
@@ -3330,6 +3380,13 @@ static void fts5WriteInit(
}
if( p->rc==SQLITE_OK ){
+ /* Initialize the 4-byte leaf-page header to 0x00. */
+ memset(pWriter->writer.buf.p, 0, 4);
+ pWriter->writer.buf.n = 4;
+
+ /* Bind the current output segment id to the index-writer. This is an
+ ** optimization over binding the same value over and over as rows are
+ ** inserted into %_idx by the current writer. */
sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
}
}
@@ -3358,19 +3415,37 @@ static void fts5TrimSegments(Fts5Index *p, Fts5IndexIter *pIter){
i64 iLeafRowid;
Fts5Data *pData;
int iId = pSeg->pSeg->iSegid;
- u8 aHdr[4] = {0x00, 0x00, 0x00, 0x04};
+ u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00};
- iLeafRowid = FTS5_SEGMENT_ROWID(iId, 0, pSeg->iTermLeafPgno);
+ iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno);
pData = fts5DataRead(p, iLeafRowid);
if( pData ){
fts5BufferZero(&buf);
+ fts5BufferGrow(&p->rc, &buf, pData->nn);
fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr);
fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n);
fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p);
- fts5BufferAppendBlob(&p->rc, &buf, pData->n - iOff, &pData->p[iOff]);
+ fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff, &pData->p[iOff]);
+ if( p->rc==SQLITE_OK ){
+ /* Set the szLeaf field */
+ fts5PutU16(&buf.p[2], buf.n);
+ }
+
+ /* Set up the new page-index array */
+ fts5BufferAppendVarint(&p->rc, &buf, 4);
+ if( pSeg->iLeafPgno==pSeg->iTermLeafPgno
+ && pSeg->iEndofDoclist<pData->szLeaf
+ ){
+ int nDiff = pData->szLeaf - pSeg->iEndofDoclist;
+ fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4);
+ fts5BufferAppendBlob(&p->rc, &buf,
+ pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]
+ );
+ }
+
fts5DataRelease(pData);
pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
- fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 0, 1), iLeafRowid);
+ fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid);
fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
}
}
@@ -3470,8 +3545,9 @@ static void fts5IndexMergeLevel(
}
/* This is a new term. Append a term to the output segment. */
+ /* TODO2: Doclist 0x00 term */
if( bRequireDoclistTerm ){
- fts5WriteAppendZerobyte(p, &writer);
+ /* fts5WriteAppendZerobyte(p, &writer); */
}
fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
fts5BufferSet(&p->rc, &term, nTerm, pTerm);
@@ -3489,7 +3565,7 @@ static void fts5IndexMergeLevel(
/* Flush the last leaf page to disk. Set the output segment b-tree height
** and last leaf page number at the same time. */
- fts5WriteFinish(p, &writer, &pSeg->nHeight, &pSeg->pgnoLast);
+ fts5WriteFinish(p, &writer, &pSeg->pgnoLast);
if( fts5MultiIterEof(p, pIter) ){
int i;
@@ -3614,6 +3690,7 @@ static void fts5IndexCrisismerge(
assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 );
while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){
fts5IndexMergeLevel(p, &pStruct, iLvl, 0);
+ assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) );
fts5StructurePromote(p, iLvl+1, pStruct);
iLvl++;
}
@@ -3641,10 +3718,12 @@ static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
int ret;
u32 dummy;
ret = fts5GetVarint32(aBuf, dummy);
- while( 1 ){
- int i = fts5GetVarint32(&aBuf[ret], dummy);
- if( (ret + i) > nMax ) break;
- ret += i;
+ if( ret<nMax ){
+ while( 1 ){
+ int i = fts5GetVarint32(&aBuf[ret], dummy);
+ if( (ret + i) > nMax ) break;
+ ret += i;
+ }
}
return ret;
}
@@ -3677,75 +3756,39 @@ static void fts5FlushOneHash(Fts5Index *p){
const int pgsz = p->pConfig->pgsz;
Fts5StructureSegment *pSeg; /* New segment within pStruct */
- int nHeight; /* Height of new segment b-tree */
Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */
+ Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */
const u8 *zPrev = 0;
Fts5SegWriter writer;
fts5WriteInit(p, &writer, iSegid);
- /* Pre-allocate the buffer used to assemble leaf pages to the target
- ** page size. */
- assert( pgsz>0 );
pBuf = &writer.writer.buf;
- fts5BufferGrow(&p->rc, pBuf, pgsz + 20);
+ pPgidx = &writer.writer.pgidx;
+
+ /* fts5WriteInit() should have initialized the buffers to (most likely)
+ ** the maximum space required. */
+ assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) );
+ assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) );
/* Begin scanning through hash table entries. This loop runs once for each
** term/doclist currently stored within the hash table. */
if( p->rc==SQLITE_OK ){
- memset(pBuf->p, 0, 4);
- pBuf->n = 4;
p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
}
while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
const char *zTerm; /* Buffer containing term */
- int nTerm; /* Size of zTerm in bytes */
const u8 *pDoclist; /* Pointer to doclist for this term */
int nDoclist; /* Size of doclist in bytes */
int nSuffix; /* Size of term suffix */
+ /* Write the term for this entry to disk. */
sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
- nTerm = strlen(zTerm);
-
- /* Decide if the term will fit on the current leaf. If it will not,
- ** flush the leaf to disk here. */
- if( pBuf->n>4 && (pBuf->n + nTerm + 2) > pgsz ){
- fts5WriteFlushLeaf(p, &writer);
- pBuf = &writer.writer.buf;
- if( (nTerm + 32) > pBuf->nSpace ){
- fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n);
- if( p->rc ) break;
- }
- }
-
- /* Write the term to the leaf. And if it is the first on the leaf, and
- ** the leaf is not page number 1, push it up into the b-tree hierarchy
- ** as well. */
- if( writer.bFirstTermInPage==0 ){
- int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm);
- pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], nPre);
- nSuffix = nTerm - nPre;
- }else{
- fts5PutU16(&pBuf->p[2], pBuf->n);
- writer.bFirstTermInPage = 0;
- if( writer.writer.pgno!=1 ){
- int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm);
- fts5WriteBtreeTerm(p, &writer, nPre+1, (const u8*)zTerm);
- pBuf = &writer.writer.buf;
- assert( nPre<nTerm );
- }
- nSuffix = nTerm;
- }
- pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], nSuffix);
- fts5BufferSafeAppendBlob(pBuf, (const u8*)&zTerm[nTerm-nSuffix], nSuffix);
+ fts5WriteAppendTerm(p, &writer, strlen(zTerm), zTerm);
- /* We just wrote a term into page writer.aWriter[0].pgno. If a
- ** doclist-index is to be generated for this doclist, it will be
- ** associated with this page. */
- assert( writer.nDlidx>0 && writer.aDlidx[0].buf.n==0 );
- writer.aDlidx[0].pgno = writer.writer.pgno;
-
- if( pgsz>=(pBuf->n + nDoclist + 1) ){
+ if( writer.bFirstRowidInPage==0
+ && pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1)
+ ){
/* The entire doclist will fit on the current leaf. */
fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
}else{
@@ -3753,7 +3796,7 @@ static void fts5FlushOneHash(Fts5Index *p){
i64 iDelta = 0;
int iOff = 0;
- writer.bFirstRowidInPage = 0;
+ /* writer.bFirstRowidInPage = 0; */
/* The entire doclist will not fit on this leaf. The following
** loop iterates through the poslists that make up the current
@@ -3777,7 +3820,7 @@ static void fts5FlushOneHash(Fts5Index *p){
}
assert( pBuf->n<=pBuf->nSpace );
- if( (pBuf->n + nCopy) <= pgsz ){
+ if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
/* The entire poslist will fit on the current leaf. So copy
** it in one go. */
fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
@@ -3788,7 +3831,7 @@ static void fts5FlushOneHash(Fts5Index *p){
const u8 *pPoslist = &pDoclist[iOff];
int iPos = 0;
while( p->rc==SQLITE_OK ){
- int nSpace = pgsz - pBuf->n;
+ int nSpace = pgsz - pBuf->n - pPgidx->n;
int n = 0;
if( (nCopy - iPos)<=nSpace ){
n = nCopy - iPos;
@@ -3798,9 +3841,8 @@ static void fts5FlushOneHash(Fts5Index *p){
assert( n>0 );
fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
iPos += n;
- if( pBuf->n>=pgsz ){
+ if( (pBuf->n + pPgidx->n)>=pgsz ){
fts5WriteFlushLeaf(p, &writer);
- pBuf = &writer.writer.buf;
}
if( iPos>=nCopy ) break;
}
@@ -3809,13 +3851,14 @@ static void fts5FlushOneHash(Fts5Index *p){
}
}
- pBuf->p[pBuf->n++] = '\0';
+ /* TODO2: Doclist terminator written here. */
+ /* pBuf->p[pBuf->n++] = '\0'; */
assert( pBuf->n<=pBuf->nSpace );
zPrev = (const u8*)zTerm;
sqlite3Fts5HashScanNext(pHash);
}
sqlite3Fts5HashClear(pHash);
- fts5WriteFinish(p, &writer, &nHeight, &pgnoLast);
+ fts5WriteFinish(p, &writer, &pgnoLast);
/* Update the Fts5Structure. It is written back to the database by the
** fts5StructureRelease() call below. */
@@ -3826,7 +3869,6 @@ static void fts5FlushOneHash(Fts5Index *p){
if( p->rc==SQLITE_OK ){
pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
pSeg->iSegid = iSegid;
- pSeg->nHeight = nHeight;
pSeg->pgnoFirst = 1;
pSeg->pgnoLast = pgnoLast;
pStruct->nSegment++;
@@ -3928,7 +3970,10 @@ static void fts5PoslistCallback(
void *pCtx,
const u8 *pChunk, int nChunk
){
- fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pCtx, nChunk, pChunk);
+ assert_nc( nChunk>=0 );
+ if( nChunk>0 ){
+ fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pCtx, nChunk, pChunk);
+ }
}
/*
@@ -4163,7 +4208,7 @@ static void fts5SetupPrefixIter(
pData = fts5IdxMalloc(p, sizeof(Fts5Data) + doclist.n);
if( pData ){
pData->p = (u8*)&pData[1];
- pData->n = doclist.n;
+ pData->nn = pData->szLeaf = doclist.n;
memcpy(pData->p, doclist.p, doclist.n);
fts5MultiIterNew2(p, pData, bDesc, ppIter);
}
@@ -4393,7 +4438,12 @@ int sqlite3Fts5IndexQuery(
memcpy(&buf.p[1], pToken, nToken);
#ifdef SQLITE_DEBUG
- if( flags & FTS5INDEX_QUERY_TEST_NOIDX ){
+ /* If the QUERY_TEST_NOIDX flag was specified, then this must be a
+ ** prefix-query. Instead of using a prefix-index (if one exists),
+ ** evaluate the prefix query using the main FTS index. This is used
+ ** for internal sanity checking by the integrity-check in debug
+ ** mode only. */
+ if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){
assert( flags & FTS5INDEX_QUERY_PREFIX );
iIdx = 1+pConfig->nPrefix;
}else
@@ -4513,7 +4563,7 @@ int sqlite3Fts5IterPoslist(
assert( pIter->pIndex->rc==SQLITE_OK );
*piRowid = pSeg->iRowid;
*pn = pSeg->nPos;
- if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->n ){
+ if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->szLeaf ){
*pp = &pSeg->pLeaf->p[pSeg->iLeafOffset];
}else{
fts5BufferZero(&pIter->poslist);
@@ -4561,11 +4611,11 @@ int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){
*pnRow = 0;
memset(anSize, 0, sizeof(i64) * nCol);
pData = fts5DataRead(p, FTS5_AVERAGES_ROWID);
- if( p->rc==SQLITE_OK && pData->n ){
+ if( p->rc==SQLITE_OK && pData->nn ){
int i = 0;
int iCol;
i += fts5GetVarint(&pData->p[i], (u64*)pnRow);
- for(iCol=0; i<pData->n && iCol<nCol; iCol++){
+ for(iCol=0; i<pData->nn && iCol<nCol; iCol++){
i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]);
}
}
@@ -4770,18 +4820,25 @@ static void fts5TestTerm(
if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
/* If this is a prefix query, check that the results returned if the
- ** the index is disabled are the same. In both ASC and DESC order. */
- if( iIdx>0 && rc==SQLITE_OK ){
- int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
- ck2 = 0;
- rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
- if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
- }
- if( iIdx>0 && rc==SQLITE_OK ){
- int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC;
- ck2 = 0;
- rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
- if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
+ ** the index is disabled are the same. In both ASC and DESC order.
+ **
+ ** This check may only be performed if the hash table is empty. This
+ ** is because the hash table only supports a single scan query at
+ ** a time, and the multi-iter loop from which this function is called
+ ** is already performing such a scan. */
+ if( p->nPendingData==0 ){
+ if( iIdx>0 && rc==SQLITE_OK ){
+ int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
+ ck2 = 0;
+ rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
+ if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
+ }
+ if( iIdx>0 && rc==SQLITE_OK ){
+ int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC;
+ ck2 = 0;
+ rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
+ if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
+ }
}
cksum3 ^= ck1;
@@ -4820,16 +4877,67 @@ static void fts5IndexIntegrityCheckEmpty(
/* Now check that the iter.nEmpty leaves following the current leaf
** (a) exist and (b) contain no terms. */
for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){
- Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, i));
+ Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i));
if( pLeaf ){
- if( 0!=fts5GetU16(&pLeaf->p[2]) ) p->rc = FTS5_CORRUPT;
- if( i>=iNoRowid && 0!=fts5GetU16(&pLeaf->p[0]) ) p->rc = FTS5_CORRUPT;
+ if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT;
+ if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT;
}
fts5DataRelease(pLeaf);
if( p->rc ) break;
}
}
+static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){
+ int nPg = (pLeaf->nn - pLeaf->szLeaf) / 2;
+ int iTermOff = 0;
+ int ii;
+
+ Fts5Buffer buf1 = {0,0,0};
+ Fts5Buffer buf2 = {0,0,0};
+
+ ii = pLeaf->szLeaf;
+ while( ii<pLeaf->nn && p->rc==SQLITE_OK ){
+ int res;
+ int iOff;
+ int nIncr;
+
+ ii += fts5GetVarint32(&pLeaf->p[ii], nIncr);
+ iTermOff += nIncr;
+ iOff = iTermOff;
+
+ if( iOff>=pLeaf->szLeaf ){
+ p->rc = FTS5_CORRUPT;
+ }else if( iTermOff==nIncr ){
+ int nByte;
+ iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
+ if( (iOff+nByte)>pLeaf->szLeaf ){
+ p->rc = FTS5_CORRUPT;
+ }else{
+ fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
+ }
+ }else{
+ int nKeep, nByte;
+ iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep);
+ iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
+ if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){
+ p->rc = FTS5_CORRUPT;
+ }else{
+ buf1.n = nKeep;
+ fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
+ }
+
+ if( p->rc==SQLITE_OK ){
+ res = fts5BufferCompare(&buf1, &buf2);
+ if( res<=0 ) p->rc = FTS5_CORRUPT;
+ }
+ }
+ fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p);
+ }
+
+ fts5BufferFree(&buf1);
+ fts5BufferFree(&buf2);
+}
+
static void fts5IndexIntegrityCheckSegment(
Fts5Index *p, /* FTS5 backend object */
Fts5StructureSegment *pSeg /* Segment to check internal consistency */
@@ -4851,7 +4959,6 @@ static void fts5IndexIntegrityCheckSegment(
while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
i64 iRow; /* Rowid for this leaf */
Fts5Data *pLeaf; /* Data for this leaf */
- int iOff; /* Offset of first term on leaf */
int nIdxTerm = sqlite3_column_bytes(pStmt, 1);
const char *zIdxTerm = (const char*)sqlite3_column_text(pStmt, 1);
@@ -4861,7 +4968,7 @@ static void fts5IndexIntegrityCheckSegment(
/* If the leaf in question has already been trimmed from the segment,
** ignore this b-tree entry. Otherwise, load it into memory. */
if( iIdxLeaf<pSeg->pgnoFirst ) continue;
- iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, iIdxLeaf);
+ iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf);
pLeaf = fts5DataRead(p, iRow);
if( pLeaf==0 ) break;
@@ -4869,15 +4976,16 @@ static void fts5IndexIntegrityCheckSegment(
** to or larger than the split-key in zIdxTerm. Also check that if there
** is also a rowid pointer within the leaf page header, it points to a
** location before the term. */
- iOff = fts5GetU16(&pLeaf->p[2]);
- if( iOff==0 ){
+ if( pLeaf->nn<=pLeaf->szLeaf ){
p->rc = FTS5_CORRUPT;
}else{
- int iRowidOff;
+ int iOff; /* Offset of first term on leaf */
+ int iRowidOff; /* Offset of first rowid on leaf */
int nTerm; /* Size of term on leaf in bytes */
int res; /* Comparison of term and split-key */
- iRowidOff = fts5GetU16(&pLeaf->p[0]);
+ iOff = fts5LeafFirstTermOff(pLeaf);
+ iRowidOff = fts5LeafFirstRowidOff(pLeaf);
if( iRowidOff>=iOff ){
p->rc = FTS5_CORRUPT;
}else{
@@ -4886,6 +4994,8 @@ static void fts5IndexIntegrityCheckSegment(
if( res==0 ) res = nTerm - nIdxTerm;
if( res<0 ) p->rc = FTS5_CORRUPT;
}
+
+ fts5IntegrityCheckPgidx(p, pLeaf);
}
fts5DataRelease(pLeaf);
if( p->rc ) break;
@@ -4913,10 +5023,10 @@ static void fts5IndexIntegrityCheckSegment(
/* Check any rowid-less pages that occur before the current leaf. */
for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){
- iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPg);
+ iKey = FTS5_SEGMENT_ROWID(iSegid, iPg);
pLeaf = fts5DataRead(p, iKey);
if( pLeaf ){
- if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT;
+ if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT;
fts5DataRelease(pLeaf);
}
}
@@ -4924,12 +5034,13 @@ static void fts5IndexIntegrityCheckSegment(
/* Check that the leaf page indicated by the iterator really does
** contain the rowid suggested by the same. */
- iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPrevLeaf);
+ iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf);
pLeaf = fts5DataRead(p, iKey);
if( pLeaf ){
i64 iRowid;
- int iRowidOff = fts5GetU16(&pLeaf->p[0]);
- if( iRowidOff>=pLeaf->n ){
+ int iRowidOff = fts5LeafFirstRowidOff(pLeaf);
+ ASSERT_SZLEAF_OK(pLeaf);
+ if( iRowidOff>=pLeaf->szLeaf ){
p->rc = FTS5_CORRUPT;
}else{
fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
@@ -5130,9 +5241,8 @@ static void fts5DebugStructure(
);
for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
- sqlite3Fts5BufferAppendPrintf(pRc, pBuf,
- " {id=%d h=%d leaves=%d..%d}", pSeg->iSegid, pSeg->nHeight,
- pSeg->pgnoFirst, pSeg->pgnoLast
+ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d}",
+ pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast
);
}
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
@@ -5193,8 +5303,10 @@ static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
i64 iDocid;
int iOff = 0;
- iOff = sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDocid);
- sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid);
+ if( n>0 ){
+ iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid);
+ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
+ }
while( iOff<n ){
int nPos;
int bDummy;
@@ -5205,7 +5317,7 @@ static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
if( iDelta==0 ) return iOff;
iDocid += iDelta;
- sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid);
+ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
}
}
@@ -5231,13 +5343,18 @@ static void fts5DecodeFunction(
assert( nArg==2 );
memset(&s, 0, sizeof(Fts5Buffer));
iRowid = sqlite3_value_int64(apVal[0]);
+
+ /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[]
+ ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
+ ** buffer overreads even if the record is corrupt. */
n = sqlite3_value_bytes(apVal[1]);
aBlob = sqlite3_value_blob(apVal[1]);
-
nSpace = n + FTS5_DATA_ZERO_PADDING;
a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
if( a==0 ) goto decode_out;
memcpy(a, aBlob, n);
+
+
fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno);
fts5DebugRowid(&rc, &s, iRowid);
@@ -5246,7 +5363,7 @@ static void fts5DecodeFunction(
Fts5DlidxLvl lvl;
dlidx.p = a;
- dlidx.n = n;
+ dlidx.nn = n;
memset(&lvl, 0, sizeof(Fts5DlidxLvl));
lvl.pData = &dlidx;
@@ -5264,52 +5381,73 @@ static void fts5DecodeFunction(
fts5DecodeStructure(&rc, &s, a, n);
}
}else{
- Fts5Buffer term;
+ Fts5Buffer term; /* Current term read from page */
+ int szLeaf; /* Offset of pgidx in a[] */
+ int iPgidxOff;
+ int iPgidxPrev = 0; /* Previous value read from pgidx */
int iTermOff = 0;
int iRowidOff = 0;
int iOff;
- int nKeep = 0;
+ int nDoclist;
memset(&term, 0, sizeof(Fts5Buffer));
- if( n>=4 ){
- iRowidOff = fts5GetU16(&a[0]);
- iTermOff = fts5GetU16(&a[2]);
- }else{
+ if( n<4 ){
sqlite3Fts5BufferSet(&rc, &s, 8, (const u8*)"corrupt");
goto decode_out;
+ }else{
+ iRowidOff = fts5GetU16(&a[0]);
+ iPgidxOff = szLeaf = fts5GetU16(&a[2]);
+ if( iPgidxOff<n ){
+ fts5GetVarint32(&a[iPgidxOff], iTermOff);
+ }
}
- if( iRowidOff ){
+ /* Decode the position list tail at the start of the page */
+ if( iRowidOff!=0 ){
iOff = iRowidOff;
- }else if( iTermOff ){
+ }else if( iTermOff!=0 ){
iOff = iTermOff;
}else{
- iOff = n;
+ iOff = szLeaf;
}
fts5DecodePoslist(&rc, &s, &a[4], iOff-4);
- assert( iRowidOff==0 || iOff==iRowidOff );
- if( iRowidOff ){
- iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff);
- }
+ /* Decode any more doclist data that appears on the page before the
+ ** first term. */
+ nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff;
+ fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist);
+
+ while( iPgidxOff<n ){
+ int bFirst = (iPgidxOff==szLeaf); /* True for first term on page */
+ int nByte; /* Bytes of data */
+ int iEnd;
+
+ iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte);
+ iPgidxPrev += nByte;
+ iOff = iPgidxPrev;
+
+ if( iPgidxOff<n ){
+ fts5GetVarint32(&a[iPgidxOff], nByte);
+ iEnd = iPgidxPrev + nByte;
+ }else{
+ iEnd = szLeaf;
+ }
- assert( iTermOff==0 || iOff==iTermOff );
- while( iOff<n ){
- int nByte;
+ if( bFirst==0 ){
+ iOff += fts5GetVarint32(&a[iOff], nByte);
+ term.n = nByte;
+ }
iOff += fts5GetVarint32(&a[iOff], nByte);
- term.n= nKeep;
fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
iOff += nByte;
sqlite3Fts5BufferAppendPrintf(
&rc, &s, " term=%.*s", term.n, (const char*)term.p
- );
- iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff);
- if( iOff<n ){
- iOff += fts5GetVarint32(&a[iOff], nKeep);
- }
+ );
+ iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff);
}
+
fts5BufferFree(&term);
}
@@ -5339,22 +5477,19 @@ static void fts5RowidFunction(
if( 0==sqlite3_stricmp(zArg, "segment") ){
i64 iRowid;
int segid, height, pgno;
- if( nArg!=4 ){
+ if( nArg!=3 ){
sqlite3_result_error(pCtx,
- "should be: fts5_rowid('segment', segid, height, pgno))", -1
+ "should be: fts5_rowid('segment', segid, pgno))", -1
);
}else{
segid = sqlite3_value_int(apVal[1]);
- height = sqlite3_value_int(apVal[2]);
- pgno = sqlite3_value_int(apVal[3]);
- iRowid = FTS5_SEGMENT_ROWID(segid, height, pgno);
+ pgno = sqlite3_value_int(apVal[2]);
+ iRowid = FTS5_SEGMENT_ROWID(segid, pgno);
sqlite3_result_int64(pCtx, iRowid);
}
- }else {
+ }else{
sqlite3_result_error(pCtx,
- "first arg to fts5_rowid() must be 'segment' "
- "or 'start-of-index'"
- , -1
+ "first arg to fts5_rowid() must be 'segment'" , -1
);
}
}
diff --git a/ext/fts5/fts5_main.c b/ext/fts5/fts5_main.c
index f337005d0..2fd633bd2 100644
--- a/ext/fts5/fts5_main.c
+++ b/ext/fts5/fts5_main.c
@@ -1317,6 +1317,10 @@ static int fts5SpecialInsert(
rc = sqlite3Fts5StorageMerge(pTab->pStorage, nMerge);
}else if( 0==sqlite3_stricmp("integrity-check", z) ){
rc = sqlite3Fts5StorageIntegrity(pTab->pStorage);
+#ifdef SQLITE_DEBUG
+ }else if( 0==sqlite3_stricmp("prefix-index", z) ){
+ pConfig->bPrefixIndex = sqlite3_value_int(pVal);
+#endif
}else{
rc = sqlite3Fts5IndexLoadConfig(pTab->pIndex);
if( rc==SQLITE_OK ){
diff --git a/ext/fts5/test/fts5aa.test b/ext/fts5/test/fts5aa.test
index e0ff6a613..1d48e4f7d 100644
--- a/ext/fts5/test/fts5aa.test
+++ b/ext/fts5/test/fts5aa.test
@@ -51,7 +51,7 @@ do_execsql_test 2.1 {
do_test 2.2 {
execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 }
-} {/{{structure} {lvl=0 nMerge=0 nSeg=1 {id=[0123456789]* h=0 leaves=1..1}}}/}
+} {/{{structure} {lvl=0 nMerge=0 nSeg=1 {id=[0123456789]* leaves=1..1}}}/}
foreach w {a b c d e f} {
do_execsql_test 2.3.$w.asc {
@@ -139,7 +139,6 @@ foreach {i x y} {
#-------------------------------------------------------------------------
#
-breakpoint
reset_db
do_execsql_test 6.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x,y);
@@ -201,6 +200,7 @@ for {set i 1} {$i <= 10} {incr i} {
}
execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }
} {}
+ if {[set_test_counter errors]} break
}
#-------------------------------------------------------------------------
diff --git a/ext/fts5/test/fts5ad.test b/ext/fts5/test/fts5ad.test
index b998db05a..3881c7e16 100644
--- a/ext/fts5/test/fts5ad.test
+++ b/ext/fts5/test/fts5ad.test
@@ -205,6 +205,9 @@ foreach {T create} {
return $ret
}
+ do_execsql_test $T.integrity {
+ INSERT INTO t1(t1) VALUES('integrity-check');
+ }
foreach {bAsc sql} {
1 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix}
diff --git a/ext/fts5/test/fts5al.test b/ext/fts5/test/fts5al.test
index 99dfeb357..efad1b206 100644
--- a/ext/fts5/test/fts5al.test
+++ b/ext/fts5/test/fts5al.test
@@ -26,17 +26,17 @@ ifcapable !fts5 {
do_execsql_test 1.1 {
CREATE VIRTUAL TABLE ft1 USING fts5(x);
SELECT * FROM ft1_config;
-} {version 3}
+} {version 4}
do_execsql_test 1.2 {
INSERT INTO ft1(ft1, rank) VALUES('pgsz', 32);
SELECT * FROM ft1_config;
-} {pgsz 32 version 3}
+} {pgsz 32 version 4}
do_execsql_test 1.3 {
INSERT INTO ft1(ft1, rank) VALUES('pgsz', 64);
SELECT * FROM ft1_config;
-} {pgsz 64 version 3}
+} {pgsz 64 version 4}
#--------------------------------------------------------------------------
# Test the logic for parsing the rank() function definition.
diff --git a/ext/fts5/test/fts5corrupt.test b/ext/fts5/test/fts5corrupt.test
index 3f57eb515..edaafb237 100644
--- a/ext/fts5/test/fts5corrupt.test
+++ b/ext/fts5/test/fts5corrupt.test
@@ -43,7 +43,7 @@ set segid [lindex [fts5_level_segids t1] 0]
do_test 1.3 {
execsql {
- DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', $segid, 0, 4);
+ DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', $segid, 4);
}
catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {1 {database disk image is malformed}}
@@ -52,7 +52,7 @@ do_test 1.4 {
db_restore_and_reopen
execsql {
UPDATE t1_data set block = X'00000000' || substr(block, 5) WHERE
- rowid = fts5_rowid('segment', $segid, 0, 4);
+ rowid = fts5_rowid('segment', $segid, 4);
}
catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {1 {database disk image is malformed}}
diff --git a/ext/fts5/test/fts5corrupt2.test b/ext/fts5/test/fts5corrupt2.test
index 3e8323b98..3a4fcfaae 100644
--- a/ext/fts5/test/fts5corrupt2.test
+++ b/ext/fts5/test/fts5corrupt2.test
@@ -209,13 +209,13 @@ foreach {tn nCut} {
execsql ROLLBACK
}
- do_test 4.$tn.x { expr $nCorrupt>0 } 1
+ # do_test 4.$tn.x { expr $nCorrupt>0 } 1
}
}
set doc [string repeat "A B C " 1000]
-do_execsql_test 4.0 {
+do_execsql_test 5.0 {
CREATE VIRTUAL TABLE x5 USING fts5(tt);
INSERT INTO x5(x5, rank) VALUES('pgsz', 32);
WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<10)
@@ -230,7 +230,7 @@ foreach {tn hdr} {
foreach rowid [db eval {SELECT rowid FROM x5_data WHERE rowid>10}] {
if {$rowid & $mask} continue
incr tn2
- do_test 4.$tn.$tn2 {
+ do_test 5.$tn.$tn2 {
execsql BEGIN
set fd [db incrblob main x5_data block $rowid]
@@ -248,7 +248,7 @@ foreach {tn hdr} {
#--------------------------------------------------------------------
reset_db
-do_execsql_test 5.1 {
+do_execsql_test 6.1 {
CREATE VIRTUAL TABLE x5 USING fts5(tt);
INSERT INTO x5 VALUES('a');
INSERT INTO x5 VALUES('a a');
@@ -262,7 +262,7 @@ proc colsize {cmd i} {
}
sqlite3_fts5_create_function db colsize colsize
-do_catchsql_test 5.2 {
+do_catchsql_test 6.2 {
SELECT colsize(x5, 0) FROM x5 WHERE x5 MATCH 'a'
} {1 SQLITE_CORRUPT_VTAB}
diff --git a/ext/fts5/test/fts5rowid.test b/ext/fts5/test/fts5rowid.test
index 453d79867..e9dffa579 100644
--- a/ext/fts5/test/fts5rowid.test
+++ b/ext/fts5/test/fts5rowid.test
@@ -27,15 +27,15 @@ do_catchsql_test 1.1 {
do_catchsql_test 1.2 {
SELECT fts5_rowid('segment')
-} {1 {should be: fts5_rowid('segment', segid, height, pgno))}}
+} {1 {should be: fts5_rowid('segment', segid, pgno))}}
do_execsql_test 1.3 {
- SELECT fts5_rowid('segment', 1, 1, 1)
-} {139586437121}
+ SELECT fts5_rowid('segment', 1, 1)
+} {137438953473}
do_catchsql_test 1.4 {
SELECT fts5_rowid('nosucharg');
-} {1 {first arg to fts5_rowid() must be 'segment' or 'start-of-index'}}
+} {1 {first arg to fts5_rowid() must be 'segment'}}
#-------------------------------------------------------------------------
diff --git a/ext/fts5/test/fts5simple.test b/ext/fts5/test/fts5simple.test
new file mode 100644
index 000000000..c93519e6b
--- /dev/null
+++ b/ext/fts5/test/fts5simple.test
@@ -0,0 +1,173 @@
+# 2015 September 05
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#*************************************************************************
+#
+
+source [file join [file dirname [info script]] fts5_common.tcl]
+set testprefix fts5simple
+
+# If SQLITE_ENABLE_FTS5 is defined, omit this file.
+ifcapable !fts5 {
+ finish_test
+ return
+}
+
+if 1 {
+#-------------------------------------------------------------------------
+#
+set doc "x x [string repeat {y } 50]z z"
+do_execsql_test 1.0 {
+ CREATE VIRTUAL TABLE t1 USING fts5(x);
+ INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
+ BEGIN;
+ INSERT INTO t1 VALUES($doc);
+ COMMIT;
+}
+
+do_execsql_test 1.1 {
+ INSERT INTO t1(t1) VALUES('integrity-check');
+}
+
+#-------------------------------------------------------------------------
+#
+reset_db
+do_execsql_test 2.0 {
+ CREATE VIRTUAL TABLE t1 USING fts5(x);
+ INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
+ INSERT INTO t1 VALUES('a b c');
+ INSERT INTO t1 VALUES('d e f');
+ INSERT INTO t1(t1) VALUES('optimize');
+}
+
+do_execsql_test 2.1 {
+ INSERT INTO t1(t1) VALUES('integrity-check');
+} {}
+
+
+#-------------------------------------------------------------------------
+#
+reset_db
+do_execsql_test 3.0 {
+ CREATE VIRTUAL TABLE t1 USING fts5(x, prefix='1,2');
+ INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
+ BEGIN;
+ INSERT INTO t1 VALUES('one');
+ SELECT * FROM t1 WHERE t1 MATCH 'o*';
+} {one}
+
+do_execsql_test 3.1 {
+ INSERT INTO t1(t1) VALUES('integrity-check');
+} {}
+
+#-------------------------------------------------------------------------
+reset_db
+do_execsql_test 4.1 {
+ CREATE VIRTUAL TABLE t11 USING fts5(content);
+ INSERT INTO t11(t11, rank) VALUES('pgsz', 32);
+ INSERT INTO t11 VALUES('another');
+ INSERT INTO t11 VALUES('string');
+ INSERT INTO t11 VALUES('of');
+ INSERT INTO t11 VALUES('text');
+}
+do_test 4.2 {
+ execsql { INSERT INTO t11(t11) VALUES('optimize') }
+} {}
+do_execsql_test 4.3 {
+ INSERT INTO t11(t11) VALUES('integrity-check');
+} {}
+
+#db eval { SELECT fts5_decode(rowid, block) as x FROM t11_data } { puts $x }
+
+#-------------------------------------------------------------------------
+reset_db
+set doc [string repeat "x y " 5]
+do_execsql_test 5.1 {
+ CREATE VIRTUAL TABLE yy USING fts5(content);
+ INSERT INTO yy(yy, rank) VALUES('pgsz', 32);
+ BEGIN;
+ INSERT INTO yy VALUES($doc);
+ INSERT INTO yy VALUES($doc);
+ INSERT INTO yy VALUES($doc);
+ INSERT INTO yy VALUES($doc);
+ INSERT INTO yy VALUES($doc);
+ INSERT INTO yy VALUES($doc);
+ INSERT INTO yy VALUES($doc);
+ INSERT INTO yy VALUES($doc);
+ COMMIT;
+}
+
+do_execsql_test 5.2 {
+ SELECT rowid FROM yy WHERE yy MATCH 'y' ORDER BY rowid ASC
+} {1 2 3 4 5 6 7 8}
+
+do_execsql_test 5.3 {
+ SELECT rowid FROM yy WHERE yy MATCH 'y' ORDER BY rowid DESC
+} {8 7 6 5 4 3 2 1}
+
+#db eval { SELECT fts5_decode(rowid, block) as x FROM yy_data } { puts $x }
+
+#-------------------------------------------------------------------------
+reset_db
+do_execsql_test 5.1 {
+ CREATE VIRTUAL TABLE tt USING fts5(content);
+ INSERT INTO tt(tt, rank) VALUES('pgsz', 32);
+ INSERT INTO tt VALUES('aa');
+}
+
+do_execsql_test 5.2 {
+ SELECT rowid FROM tt WHERE tt MATCH 'a*';
+} {1}
+
+do_execsql_test 5.3 {
+ DELETE FROM tt;
+ BEGIN;
+ INSERT INTO tt VALUES('aa');
+ INSERT INTO tt VALUES('ab');
+ COMMIT;
+} {}
+
+do_execsql_test 5.4 {
+ SELECT rowid FROM tt WHERE tt MATCH 'a*';
+} {1 2}
+
+}
+
+do_execsql_test 5.5 {
+ DELETE FROM tt;
+ BEGIN;
+ INSERT INTO tt VALUES('aa');
+ INSERT INTO tt VALUES('ab');
+ INSERT INTO tt VALUES('aa');
+ INSERT INTO tt VALUES('ab');
+ INSERT INTO tt VALUES('aa');
+ INSERT INTO tt VALUES('ab');
+ INSERT INTO tt VALUES('aa');
+ INSERT INTO tt VALUES('ab');
+ COMMIT;
+ SELECT rowid FROM tt WHERE tt MATCH 'a*';
+} {1 2 3 4 5 6 7 8}
+
+do_execsql_test 5.6 {
+ INSERT INTO tt(tt) VALUES('integrity-check');
+}
+
+reset_db
+do_execsql_test 5.7 {
+ CREATE VIRTUAL TABLE tt USING fts5(content);
+ INSERT INTO tt(tt, rank) VALUES('pgsz', 32);
+ INSERT INTO tt VALUES('aa ab ac ad ae af');
+}
+
+do_execsql_test 5.8 {
+ SELECT rowid FROM tt WHERE tt MATCH 'a*';
+} {1}
+
+finish_test
+
diff --git a/ext/fts5/test/fts5version.test b/ext/fts5/test/fts5version.test
index 8c5a77214..7e4d74d11 100644
--- a/ext/fts5/test/fts5version.test
+++ b/ext/fts5/test/fts5version.test
@@ -30,34 +30,34 @@ do_execsql_test 1.1 {
do_execsql_test 1.2 {
SELECT * FROM t1_config WHERE k='version'
-} {version 3}
+} {version 4}
do_execsql_test 1.3 {
SELECT rowid FROM t1 WHERE t1 MATCH 'a';
} {1}
do_execsql_test 1.4 {
- UPDATE t1_config set v=4 WHERE k='version';
+ UPDATE t1_config set v=5 WHERE k='version';
}
do_test 1.5 {
db close
sqlite3 db test.db
catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' }
-} {1 {invalid fts5 file format (found 4, expected 3) - run 'rebuild'}}
+} {1 {invalid fts5 file format (found 5, expected 4) - run 'rebuild'}}
do_test 1.6 {
db close
sqlite3 db test.db
catchsql { INSERT INTO t1 VALUES('x y z') }
-} {1 {invalid fts5 file format (found 4, expected 3) - run 'rebuild'}}
+} {1 {invalid fts5 file format (found 5, expected 4) - run 'rebuild'}}
do_test 1.7 {
execsql { DELETE FROM t1_config WHERE k='version' }
db close
sqlite3 db test.db
catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' }
-} {1 {invalid fts5 file format (found 0, expected 3) - run 'rebuild'}}
+} {1 {invalid fts5 file format (found 0, expected 4) - run 'rebuild'}}
finish_test
diff --git a/ext/fts5/tool/loadfts5.tcl b/ext/fts5/tool/loadfts5.tcl
index 048de3ccd..4bf89d781 100644
--- a/ext/fts5/tool/loadfts5.tcl
+++ b/ext/fts5/tool/loadfts5.tcl
@@ -19,6 +19,12 @@ proc load_hierachy {dir} {
db eval { INSERT INTO t1 VALUES($f, loadfile($f)) }
incr ::nRow
+ if {$::O(trans) && ($::nRow % $::O(trans))==0} {
+ db eval { COMMIT }
+ db eval { INSERT INTO t1(t1) VALUES('integrity-check') }
+ db eval { BEGIN }
+ }
+
if {($::nRow % $::nRowPerDot)==0} {
puts -nonewline .
if {($::nRow % (65*$::nRowPerDot))==0} { puts "" }
@@ -41,6 +47,7 @@ proc usage {} {
puts stderr " -automerge N (set the automerge parameter to N)"
puts stderr " -crisismerge N (set the crisismerge parameter to N)"
puts stderr " -prefix PREFIX (comma separated prefix= argument)"
+ puts stderr " -trans N (commit after N inserts - 0 == never)"
exit 1
}
@@ -51,6 +58,7 @@ set O(delete) 0
set O(automerge) -1
set O(crisismerge) -1
set O(prefix) ""
+set O(trans) 0
if {[llength $argv]<2} usage
set nOpt [expr {[llength $argv]-2}]
@@ -77,6 +85,11 @@ for {set i 0} {$i < $nOpt} {incr i} {
if { [incr i]>=$nOpt } usage
set O(limit) [lindex $argv $i]
}
+
+ -trans {
+ if { [incr i]>=$nOpt } usage
+ set O(trans) [lindex $argv $i]
+ }
-automerge {
if { [incr i]>=$nOpt } usage
@@ -104,8 +117,9 @@ if {$O(delete)} { file delete -force $dbfile }
sqlite3 db $dbfile
catch { load_static_extension db fts5 }
db func loadfile loadfile
+db eval "PRAGMA page_size=4096"
-db transaction {
+db eval BEGIN
set pref ""
if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" }
catch {
@@ -126,7 +140,7 @@ db transaction {
}
}
load_hierachy [lindex $argv end]
-}
+db eval COMMIT
diff --git a/manifest b/manifest
index 18da2c9e5..6d894da74 100644
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Create\sseparate\s"path"\sand\s"root"\scolumns\sin\sthe\sjson_each()\sand\sjson_tree()\nvirtual\stables.\s\s"Root"\sis\sthe\s2nd\sparameter\sand\sis\sfixed.\s\s\n"Path"\svaries\sas\sjson_tree()\swalks\sthe\shierarchy.
-D 2015-09-10T17:20:57.334
+C Modify\sthe\sfts5\sleaf\spage\sformat\sto\spermit\sfaster\sseek\soperations.\sThis\sis\sa\sfile-format\schange.\sAny\sexisting\sdatabases\scan\sbe\supgraded\sby\srunning\sthe\sfts5\s'rebuild'\scommand.
+D 2015-09-10T17:23:37.872
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in f85066ce844a28b671aaeeff320921cd0ce36239
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@@ -106,14 +106,14 @@ F ext/fts3/unicode/mkunicode.tcl 95cf7ec186e48d4985e433ff8a1c89090a774252
F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95
F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0
F ext/fts5/fts5.h f04659e0df5af83731b102189a32280f74f4a6bc
-F ext/fts5/fts5Int.h f65d41f66accad0a289d6bd66b13c07d2932f9be
+F ext/fts5/fts5Int.h 81ba5e474979b166a52a8be306aa3b09d43a10e9
F ext/fts5/fts5_aux.c 7a307760a9c57c750d043188ec0bad59f5b5ec7e
-F ext/fts5/fts5_buffer.c 80f9ba4431848cb857e3d2158f5280093dcd8015
-F ext/fts5/fts5_config.c 80b61fd2c6844b64a3e72a64572d50a812da9384
+F ext/fts5/fts5_buffer.c 64dcaf36a3ebda9e84b7c3b8788887ec325e12a4
+F ext/fts5/fts5_config.c 57ee5fe71578cb494574fc0e6e51acb9a22a8695
F ext/fts5/fts5_expr.c a7726fe7045eec7caca8a074af747c8ea3545b83
F ext/fts5/fts5_hash.c 4bf4b99708848357b8a2b5819e509eb6d3df9246
-F ext/fts5/fts5_index.c 950e37028cc81ae21534819e79c73aea7efa6c8e
-F ext/fts5/fts5_main.c e9d0892424bb7f0a8b58613d4ff75cb650cf286e
+F ext/fts5/fts5_index.c 093e2e5936dab536cbe3e321bf4b53dda2b40547
+F ext/fts5/fts5_main.c 4b04c934084ea24a858438a04b5be8af3a9e0311
F ext/fts5/fts5_storage.c 120f7b143688b5b7710dacbd48cff211609b8059
F ext/fts5/fts5_tcl.c 6da58d6e8f42a93c4486b5ba9b187a7f995dee37
F ext/fts5/fts5_test_mi.c e96be827aa8f571031e65e481251dc1981d608bf
@@ -124,10 +124,10 @@ F ext/fts5/fts5_vocab.c 4622e0b7d84a488a1585aaa56eb214ee67a988bc
F ext/fts5/fts5parse.y 833db1101b78c0c47686ab1b84918e38c36e9452
F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba
F ext/fts5/test/fts5_common.tcl b6e6a40ef5d069c8e86ca4fbad491e1195485dbc
-F ext/fts5/test/fts5aa.test f558e1e5ccffa75d69e9a4814245d468ec6b6608
+F ext/fts5/test/fts5aa.test 4804f237005bb4ba8ea4a76120d8011ebcb5d611
F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad
F ext/fts5/test/fts5ac.test 9737992d08c56bfd4803e933744d2d764e23795c
-F ext/fts5/test/fts5ad.test b2edee8b7de0c21d2c88f8a18c195034aad6952d
+F ext/fts5/test/fts5ad.test e3dfb150fce971b4fd832498c29f56924d451b63
F ext/fts5/test/fts5ae.test 0a9984fc3479f89f8c63d9848d6ed0c465dfcebe
F ext/fts5/test/fts5af.test c2501ec2b61d6b179c305f5d2b8782ab3d4f832a
F ext/fts5/test/fts5ag.test ec3e119b728196620a31507ef503c455a7a73505
@@ -135,7 +135,7 @@ F ext/fts5/test/fts5ah.test e592c4978622dbc4de552cd0f9395df60ac5d54c
F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37
F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8
F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592
-F ext/fts5/test/fts5al.test 440d77c0b39ba73bad2ceb8986c2bb1093570735
+F ext/fts5/test/fts5al.test 5c79525671862861906fa0a848da462a8473eafb
F ext/fts5/test/fts5alter.test 6022c61467a82aa11c70822ccad22b328dcf0d04
F ext/fts5/test/fts5auto.test caa5bcf917db11944655a2a9bd38c67c520376ca
F ext/fts5/test/fts5aux.test 8c687c948cc98e9a94be014df7d518acc1b3b74f
@@ -144,8 +144,8 @@ F ext/fts5/test/fts5bigpl.test 04ee0d7eebbebf17c31f5a0b5c5f9494eac3a0cb
F ext/fts5/test/fts5columnsize.test a8cfef21ffa1c264b9f670a7d94eeaccb5341c07
F ext/fts5/test/fts5config.test ad2ff42ddc856aed2d05bf89dc1c578c8a39ea3b
F ext/fts5/test/fts5content.test 9a952c95518a14182dc3b59e3c8fa71cda82a4e1
-F ext/fts5/test/fts5corrupt.test 928c9c91d40690d301f943a7ed0ffc19e0d0e7b6
-F ext/fts5/test/fts5corrupt2.test 1a830ccd6dbe1b601c7e3f5bbc1cf77bd8c8803b
+F ext/fts5/test/fts5corrupt.test c2ad090192708150d50d961278df10ae7a4b8b62
+F ext/fts5/test/fts5corrupt2.test 26c0a39dd9ff73207e6229f83b50b21d37c7658c
F ext/fts5/test/fts5corrupt3.test 1ccf575f5126e79f9fec7979fd02a1f40a076be3
F ext/fts5/test/fts5dlidx.test 59b80bbe34169a082c575d9c26f0a7019a7b79c1
F ext/fts5/test/fts5doclist.test 8edb5b57e5f144030ed74ec00ef6fa4294fed79b
@@ -172,16 +172,17 @@ F ext/fts5/test/fts5prefix.test 552a462f0e8595676611f41643de217fb4ac2808
F ext/fts5/test/fts5rank.test 11dcebba31d822f7e99685b4ea2c2ae3ec0b16f1
F ext/fts5/test/fts5rebuild.test 03935f617ace91ed23a6099c7c74d905227ff29b
F ext/fts5/test/fts5restart.test c17728fdea26e7d0f617d22ad5b4b2862b994c17
-F ext/fts5/test/fts5rowid.test 6f9833b23b176dc4aa15b7fc02afeb2b220fd460
+F ext/fts5/test/fts5rowid.test 3e3b66670ca65540fa321250ac12f890b17f9312
+F ext/fts5/test/fts5simple.test f629e24a35a9f31cfb16c9920e8c2316e3d93e94
F ext/fts5/test/fts5synonym.test cf88c0a56d5ea9591e3939ef1f6e294f7f2d0671
F ext/fts5/test/fts5tokenizer.test ea4df698b35cc427ebf2ba22829d0e28386d8c89
F ext/fts5/test/fts5unicode.test fbef8d8a3b4b88470536cc57604a82ca52e51841
F ext/fts5/test/fts5unicode2.test c1dd890ba32b7609adba78e420faa847abe43b59
F ext/fts5/test/fts5unicode3.test 35c3d02aa7acf7d43d8de3bfe32c15ba96e8928e
F ext/fts5/test/fts5unindexed.test e9539d5b78c677315e7ed8ea911d4fd25437c680
-F ext/fts5/test/fts5version.test 205beb2a67d9496af64df959e6a19238f69b83e8
+F ext/fts5/test/fts5version.test 978f59541d8cef7e8591f8be2115ec5ccb863e2e
F ext/fts5/test/fts5vocab.test cdf97b9678484e9bad5062edf9c9106e5c3b0c5c
-F ext/fts5/tool/loadfts5.tcl 95edf0b6b92a09f9ed85595038b1108127987556
+F ext/fts5/tool/loadfts5.tcl 58e90407cc5c2b1770460119488fd7c0090d4dd3
F ext/fts5/tool/mkfts5c.tcl 5745072c7de346e18c7f491e4c3281fe8a1cfe51
F ext/fts5/tool/showfts5.tcl 9eaf6c3df352f98a2ab5ce1921dd94128ab1381d
F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43
@@ -1384,7 +1385,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
-P 47a46a9fa4a96cdb96a20b6aec802661b1ee4598
-R 8aee48c10c3e97ebfdd79a73a44181a5
-U drh
-Z 9f31b9d4db81b45c399857b6b93c0174
+P 127cce3eb96b819005832997e0a082df9fb96f0b 99de5e3613d557728dd196353516bc7cf64a0e6c
+R 94d00b9df8244e9367669b9ccbab9a5c
+U dan
+Z a63610a6f3469795714a7c77d99e36a6
diff --git a/manifest.uuid b/manifest.uuid
index 9d15c460f..a1569dabf 100644
--- a/manifest.uuid
+++ b/manifest.uuid
@@ -1 +1 @@
-127cce3eb96b819005832997e0a082df9fb96f0b \ No newline at end of file
+0c0c4ae971e54efc526eed7bd071c90dfadb95ff \ No newline at end of file