diff options
Diffstat (limited to 'ext/fts5/fts5_index.c')
-rw-r--r-- | ext/fts5/fts5_index.c | 5337 |
1 files changed, 5337 insertions, 0 deletions
diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c new file mode 100644 index 000000000..05c8d6831 --- /dev/null +++ b/ext/fts5/fts5_index.c @@ -0,0 +1,5337 @@ +/* +** 2014 May 31 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** Low level access to the FTS index stored in the database file. The +** routines in this file file implement all read and write access to the +** %_data table. Other parts of the system access this functionality via +** the interface defined in fts5Int.h. +*/ + +#ifdef SQLITE_ENABLE_FTS5 + +#include "fts5Int.h" + +/* +** Overview: +** +** The %_data table contains all the FTS indexes for an FTS5 virtual table. +** As well as the main term index, there may be up to 31 prefix indexes. +** The format is similar to FTS3/4, except that: +** +** * all segment b-tree leaf data is stored in fixed size page records +** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is +** taken to ensure it is possible to iterate in either direction through +** the entries in a doclist, or to seek to a specific entry within a +** doclist, without loading it into memory. +** +** * large doclists that span many pages have associated "doclist index" +** records that contain a copy of the first docid on each page spanned by +** the doclist. This is used to speed up seek operations, and merges of +** large doclists with very small doclists. +** +** * extra fields in the "structure record" record the state of ongoing +** incremental merge operations. +** +*/ + + +#define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */ +#define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */ + +#define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */ + +/* +** Details: +** +** The %_data table managed by this module, +** +** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB); +** +** , contains the following 5 types of records. See the comments surrounding +** the FTS5_*_ROWID macros below for a description of how %_data rowids are +** assigned to each fo them. +** +** 1. Structure Records: +** +** The set of segments that make up an index - the index structure - are +** recorded in a single record within the %_data table. The record consists +** of a single 32-bit configuration cookie value followed by a list of +** SQLite varints. If the FTS table features more than one index (because +** there are one or more prefix indexes), it is guaranteed that all share +** the same cookie value. +** +** Immediately following the configuration cookie, the record begins with +** three varints: +** +** + number of levels, +** + total number of segments on all levels, +** + value of write counter. +** +** Then, for each level from 0 to nMax: +** +** + number of input segments in ongoing merge. +** + total number of segments in level. +** + for each segment from oldest to newest: +** + segment id (always > 0) +** + b-tree height (1 -> root is leaf, 2 -> root is parent of leaf etc.) +** + first leaf page number (often 1, always greater than 0) +** + final leaf page number +** +** 2. The Averages Record: +** +** A single record within the %_data table. The data is a list of varints. +** The first value is the number of rows in the index. Then, for each column +** from left to right, the total number of tokens in the column for all +** rows of the table. +** +** 3. Segment leaves: +** +** TERM DOCLIST FORMAT: +** +** Most of each segment leaf is taken up by term/doclist data. The +** general format of the term/doclist data is: +** +** varint : size of first term +** blob: first term data +** doclist: first doclist +** zero-or-more { +** varint: number of bytes in common with previous term +** varint: number of bytes of new term data (nNew) +** blob: nNew bytes of new term data +** doclist: next doclist +** } +** +** doclist format: +** +** varint: first rowid +** poslist: first poslist +** zero-or-more { +** varint: rowid delta (always > 0) +** poslist: next poslist +** } +** 0x00 byte +** +** poslist format: +** +** varint: size of poslist in bytes multiplied by 2, not including +** this field. Plus 1 if this entry carries the "delete" flag. +** collist: collist for column 0 +** zero-or-more { +** 0x01 byte +** varint: column number (I) +** collist: collist for column I +** } +** +** collist format: +** +** varint: first offset + 2 +** zero-or-more { +** varint: offset delta + 2 +** } +** +** PAGINATION +** +** The format described above is only accurate if the entire term/doclist +** data fits on a single leaf page. If this is not the case, the format +** is changed in two ways: +** +** + if the first rowid on a page occurs before the first term, it +** is stored as a literal value: +** +** varint: first rowid +** +** + the first term on each page is stored in the same way as the +** very first term of the segment: +** +** varint : size of first term +** blob: first term data +** +** Each leaf page begins with: +** +** + 2-byte unsigned containing offset to first rowid (or 0). +** + 2-byte unsigned containing offset to first term (or 0). +** +** Followed by term/doclist data. +** +** 4. Segment interior nodes: +** +** The interior nodes turn the list of leaves into a b+tree. +** +** Each interior node begins with a varint - the page number of the left +** most child node. Following this, for each leaf page except the first, +** the interior nodes contain: +** +** a) If the leaf page contains at least one term, then a term-prefix that +** is greater than all previous terms, and less than or equal to the +** first term on the leaf page. +** +** b) If the leaf page no terms, a record indicating how many consecutive +** leaves contain no terms, and whether or not there is an associated +** by-rowid index record. +** +** By definition, there is never more than one type (b) record in a row. +** Type (b) records only ever appear on height=1 pages - immediate parents +** of leaves. Only type (a) records are pushed to higher levels. +** +** Term format: +** +** * Number of bytes in common with previous term plus 2, as a varint. +** * Number of bytes of new term data, as a varint. +** * new term data. +** +** No-term format: +** +** * either an 0x00 or 0x01 byte. If the value 0x01 is used, then there +** is an associated index-by-rowid record. +** * the number of zero-term leaves as a varint. +** +** 5. Segment doclist indexes: +** +** A list of varints. If the first termless page contains at least one +** docid, the list begins with that docid as a varint followed by the +** value 1 (0x01). Or, if the first termless page contains no docids, +** a varint containing the last docid stored on the term page followed +** by a 0 (0x00) value. +** +** For each subsequent page in the doclist, either a 0x00 byte if the +** page contains no terms, or a delta-encoded docid (always +ve) +** representing the first docid on the page otherwise. +*/ + +/* +** Rowids for the averages and structure records in the %_data table. +*/ +#define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */ +#define FTS5_STRUCTURE_ROWID(iIdx) (10 + (iIdx)) /* For structure records */ + +/* +** Macros determining the rowids used by segment nodes. All nodes in all +** segments for all indexes (the regular FTS index and any prefix indexes) +** are stored in the %_data table with large positive rowids. +** +** The %_data table may contain up to (1<<FTS5_SEGMENT_INDEX_BITS) +** indexes - one regular term index and zero or more prefix indexes. +** +** Each segment in an index has a unique id greater than zero. +** +** Each node in a segment b-tree is assigned a "page number" that is unique +** within nodes of its height within the segment (leaf nodes have a height +** of 0, parents 1, etc.). Page numbers are allocated sequentially so that +** a nodes page number is always one more than its left sibling. +** +** The rowid for a node is then found using the FTS5_SEGMENT_ROWID() macro +** below. The FTS5_SEGMENT_*_BITS macros define the number of bits used +** to encode the three FTS5_SEGMENT_ROWID() arguments. This module returns +** SQLITE_FULL and fails the current operation if they ever prove too small. +*/ +#define FTS5_DATA_IDX_B 5 /* Max of 31 prefix indexes */ +#define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */ +#define FTS5_DATA_HEIGHT_B 5 /* Max b-tree height of 32 */ +#define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */ + +#define FTS5_SEGMENT_ROWID(idx, segid, height, pgno) ( \ + ((i64)(idx) << (FTS5_DATA_ID_B + FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \ + ((i64)(segid) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \ + ((i64)(height) << (FTS5_DATA_PAGE_B)) + \ + ((i64)(pgno)) \ +) + +#if FTS5_MAX_PREFIX_INDEXES > ((1<<FTS5_DATA_IDX_B)-1) +# error "FTS5_MAX_PREFIX_INDEXES is too large" +#endif + +/* +** The height of segment b-trees is actually limited to one less than +** (1<<HEIGHT_BITS). This is because the rowid address space for nodes +** with such a height is used by doclist indexes. +*/ +#define FTS5_SEGMENT_MAX_HEIGHT ((1 << FTS5_DATA_HEIGHT_B)-1) + +/* +** The rowid for the doclist index associated with leaf page pgno of segment +** segid in index idx. +*/ +#define FTS5_DOCLIST_IDX_ROWID(idx, segid, pgno) \ + FTS5_SEGMENT_ROWID(idx, segid, FTS5_SEGMENT_MAX_HEIGHT, pgno) + +#ifdef SQLITE_DEBUG +int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; } +#endif + + +/* +** Each time a blob is read from the %_data table, it is padded with this +** many zero bytes. This makes it easier to decode the various record formats +** without overreading if the records are corrupt. +*/ +#define FTS5_DATA_ZERO_PADDING 8 + +typedef struct Fts5BtreeIter Fts5BtreeIter; +typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel; +typedef struct Fts5ChunkIter Fts5ChunkIter; +typedef struct Fts5Data Fts5Data; +typedef struct Fts5DlidxIter Fts5DlidxIter; +typedef struct Fts5MultiSegIter Fts5MultiSegIter; +typedef struct Fts5NodeIter Fts5NodeIter; +typedef struct Fts5PageWriter Fts5PageWriter; +typedef struct Fts5PosIter Fts5PosIter; +typedef struct Fts5SegIter Fts5SegIter; +typedef struct Fts5DoclistIter Fts5DoclistIter; +typedef struct Fts5SegWriter Fts5SegWriter; +typedef struct Fts5Structure Fts5Structure; +typedef struct Fts5StructureLevel Fts5StructureLevel; +typedef struct Fts5StructureSegment Fts5StructureSegment; + +struct Fts5Data { + u8 *p; /* Pointer to buffer containing record */ + int n; /* Size of record in bytes */ + int nRef; /* Ref count */ +}; + +/* +** One object per %_data table. +*/ +struct Fts5Index { + Fts5Config *pConfig; /* Virtual table configuration */ + char *zDataTbl; /* Name of %_data table */ + int nWorkUnit; /* Leaf pages in a "unit" of work */ + + /* + ** Variables related to the accumulation of tokens and doclists within the + ** in-memory hash tables before they are flushed to disk. + */ + Fts5Hash **apHash; /* Array of hash tables */ + int nMaxPendingData; /* Max pending data before flush to disk */ + int nPendingData; /* Current bytes of pending data */ + i64 iWriteRowid; /* Rowid for current doc being written */ + + /* Error state. */ + int rc; /* Current error code */ + + /* State used by the fts5DataXXX() functions. */ + sqlite3_blob *pReader; /* RO incr-blob open on %_data table */ + sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */ + sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */ + int nRead; /* Total number of blocks read */ +}; + +struct Fts5DoclistIter { + int bDesc; /* True for DESC order, false for ASC */ + u8 *a; + int n; + int i; + + /* Output variables. aPoslist==0 at EOF */ + i64 iRowid; + u8 *aPoslist; + int nPoslist; +}; + +/* +** Each iterator used by external modules is an instance of this type. +*/ +struct Fts5IndexIter { + Fts5Index *pIndex; + Fts5Structure *pStruct; + Fts5MultiSegIter *pMulti; + Fts5DoclistIter *pDoclist; + Fts5Buffer poslist; /* Buffer containing current poslist */ +}; + +/* +** The contents of the "structure" record for each index are represented +** using an Fts5Structure record in memory. Which uses instances of the +** other Fts5StructureXXX types as components. +*/ +struct Fts5StructureSegment { + int iSegid; /* Segment id */ + int nHeight; /* Height of segment b-tree */ + int pgnoFirst; /* First leaf page number in segment */ + int pgnoLast; /* Last leaf page number in segment */ +}; +struct Fts5StructureLevel { + int nMerge; /* Number of segments in incr-merge */ + int nSeg; /* Total number of segments on level */ + Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */ +}; +struct Fts5Structure { + u64 nWriteCounter; /* Total leaves written to level 0 */ + int nLevel; /* Number of levels in this index */ + Fts5StructureLevel aLevel[0]; /* Array of nLevel level objects */ +}; + +/* +** An object of type Fts5SegWriter is used to write to segments. +*/ +struct Fts5PageWriter { + int pgno; /* Page number for this page */ + Fts5Buffer buf; /* Buffer containing page data */ + Fts5Buffer term; /* Buffer containing previous term on page */ +}; +struct Fts5SegWriter { + int iIdx; /* Index to write to */ + int iSegid; /* Segid to write to */ + int nWriter; /* Number of entries in aWriter */ + Fts5PageWriter *aWriter; /* Array of PageWriter objects */ + i64 iPrevRowid; /* Previous docid written to current leaf */ + u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */ + u8 bFirstRowidInPage; /* True if next rowid is first in page */ + u8 bFirstTermInPage; /* True if next term will be first in leaf */ + int nLeafWritten; /* Number of leaf pages written */ + int nEmpty; /* Number of contiguous term-less nodes */ + Fts5Buffer cdlidx; /* Doclist index */ + i64 iDlidxPrev; /* Previous rowid appended to dlidx */ + int bDlidxPrevValid; /* True if iDlidxPrev is valid */ +}; + +/* +** Object for iterating through the merged results of one or more segments, +** visiting each term/docid pair in the merged data. +** +** nSeg is always a power of two greater than or equal to the number of +** segments that this object is merging data from. Both the aSeg[] and +** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded +** with zeroed objects - these are handled as if they were iterators opened +** on empty segments. +** +** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an +** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the +** comparison in this context is the index of the iterator that currently +** points to the smaller term/rowid combination. Iterators at EOF are +** considered to be greater than all other iterators. +** +** aFirst[1] contains the index in aSeg[] of the iterator that points to +** the smallest key overall. aFirst[0] is unused. +*/ + +typedef struct Fts5CResult Fts5CResult; +struct Fts5CResult { + u16 iFirst; /* aSeg[] index of firstest iterator */ + u8 bTermEq; /* True if the terms are equal */ +}; + +struct Fts5MultiSegIter { + int nSeg; /* Size of aSeg[] array */ + int bRev; /* True to iterate in reverse order */ + int bSkipEmpty; /* True to skip deleted entries */ + Fts5SegIter *aSeg; /* Array of segment iterators */ + Fts5CResult *aFirst; /* Current merge state (see above) */ +}; + +/* +** Object for iterating through a single segment, visiting each term/docid +** pair in the segment. +** +** pSeg: +** The segment to iterate through. +** +** iLeafPgno: +** Current leaf page number within segment. +** +** iLeafOffset: +** Byte offset within the current leaf that is the first byte of the +** position list data (one byte passed the position-list size field). +** rowid field of the current entry. Usually this is the size field of the +** position list data. The exception is if the rowid for the current entry +** is the last thing on the leaf page. +** +** pLeaf: +** Buffer containing current leaf page data. Set to NULL at EOF. +** +** iTermLeafPgno, iTermLeafOffset: +** Leaf page number containing the last term read from the segment. And +** the offset immediately following the term data. +** +** flags: +** Mask of FTS5_SEGITER_XXX values. Interpreted as follows: +** +** FTS5_SEGITER_ONETERM: +** If set, set the iterator to point to EOF after the current doclist +** has been exhausted. Do not proceed to the next term in the segment. +** +** FTS5_SEGITER_REVERSE: +** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If +** it is set, iterate through docids in descending order instead of the +** default ascending order. +** +** iRowidOffset/nRowidOffset/aRowidOffset: +** These are used if the FTS5_SEGITER_REVERSE flag is set. +** +** For each rowid on the page corresponding to the current term, the +** corresponding aRowidOffset[] entry is set to the byte offset of the +** start of the "position-list-size" field within the page. +*/ +struct Fts5SegIter { + Fts5StructureSegment *pSeg; /* Segment to iterate through */ + int iIdx; /* Byte offset within current leaf */ + int flags; /* Mask of configuration flags */ + int iLeafPgno; /* Current leaf page number */ + Fts5Data *pLeaf; /* Current leaf data */ + int iLeafOffset; /* Byte offset within current leaf */ + + /* The page and offset from which the current term was read. The offset + ** is the offset of the first rowid in the current doclist. */ + int iTermLeafPgno; + int iTermLeafOffset; + + /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */ + int iRowidOffset; /* Current entry in aRowidOffset[] */ + int nRowidOffset; /* Allocated size of aRowidOffset[] array */ + int *aRowidOffset; /* Array of offset to rowid fields */ + + Fts5DlidxIter *pDlidx; /* If there is a doclist-index */ + + /* Variables populated based on current entry. */ + Fts5Buffer term; /* Current term */ + i64 iRowid; /* Current rowid */ + int nPos; /* Number of bytes in current position list */ + int bDel; /* True if the delete flag is set */ +}; + +#define FTS5_SEGITER_ONETERM 0x01 +#define FTS5_SEGITER_REVERSE 0x02 + + +/* +** Object for iterating through paginated data. +*/ +struct Fts5ChunkIter { + Fts5Data *pLeaf; /* Current leaf data. NULL -> EOF. */ + i64 iLeafRowid; /* Absolute rowid of current leaf */ + int nRem; /* Remaining bytes of data to read */ + + /* Output parameters */ + u8 *p; /* Pointer to chunk of data */ + int n; /* Size of buffer p in bytes */ +}; + +/* +** Object for iterating through a single position list on disk. +*/ +struct Fts5PosIter { + Fts5ChunkIter chunk; /* Current chunk of data */ + int iOff; /* Offset within chunk data */ + + int iCol; + int iPos; +}; + +/* +** Object for iterating through the conents of a single internal node in +** memory. +*/ +struct Fts5NodeIter { + /* Internal. Set and managed by fts5NodeIterXXX() functions. Except, + ** the EOF test for the iterator is (Fts5NodeIter.aData==0). */ + const u8 *aData; + int nData; + int iOff; + + /* Output variables */ + Fts5Buffer term; + int nEmpty; + int iChild; + int bDlidx; +}; + +/* +** An instance of the following type is used to iterate through the contents +** of a doclist-index record. +** +** pData: +** Record containing the doclist-index data. +** +** bEof: +** Set to true once iterator has reached EOF. +** +** iOff: +** Set to the current offset within record pData. +*/ +struct Fts5DlidxIter { + Fts5Data *pData; /* Data for doclist index, if any */ + int iOff; /* Current offset into pDlidx */ + int bEof; /* At EOF already */ + int iFirstOff; /* Used by reverse iterators only */ + + /* Output variables */ + int iLeafPgno; /* Page number of current leaf page */ + i64 iRowid; /* First rowid on leaf iLeafPgno */ +}; + + +/* +** An Fts5BtreeIter object is used to iterate through all entries in the +** b-tree hierarchy belonging to a single fts5 segment. In this case the +** "b-tree hierarchy" is all b-tree nodes except leaves. Each entry in the +** b-tree hierarchy consists of the following: +** +** iLeaf: The page number of the leaf page the entry points to. +** +** term: A split-key that all terms on leaf page $iLeaf must be greater +** than or equal to. The "term" associated with the first b-tree +** hierarchy entry (the one that points to leaf page 1) is always +** an empty string. +** +** nEmpty: The number of empty (termless) leaf pages that immediately +** following iLeaf. +** +** The Fts5BtreeIter object is only used as part of the integrity-check code. +*/ +struct Fts5BtreeIterLevel { + Fts5NodeIter s; /* Iterator for the current node */ + Fts5Data *pData; /* Data for the current node */ +}; +struct Fts5BtreeIter { + Fts5Index *p; /* FTS5 backend object */ + Fts5StructureSegment *pSeg; /* Iterate through this segment's b-tree */ + int iIdx; /* Index pSeg belongs to */ + int nLvl; /* Size of aLvl[] array */ + Fts5BtreeIterLevel *aLvl; /* Level for each tier of b-tree */ + + /* Output variables */ + Fts5Buffer term; /* Current term */ + int iLeaf; /* Leaf containing terms >= current term */ + int nEmpty; /* Number of "empty" leaves following iLeaf */ + int bEof; /* Set to true at EOF */ + int bDlidx; /* True if there exists a dlidx */ +}; + + +static void fts5PutU16(u8 *aOut, u16 iVal){ + aOut[0] = (iVal>>8); + aOut[1] = (iVal&0xFF); +} + +static u16 fts5GetU16(const u8 *aIn){ + return ((u16)aIn[0] << 8) + aIn[1]; +} + +/* +** This is a copy of the sqlite3GetVarint32() routine from the SQLite core. +** Except, this version does handle the single byte case that the core +** version depends on being handled before its function is called. +*/ +int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v){ + u32 a,b; + + /* The 1-byte case. Overwhelmingly the most common. */ + a = *p; + /* a: p0 (unmasked) */ + if (!(a&0x80)) + { + /* Values between 0 and 127 */ + *v = a; + return 1; + } + + /* The 2-byte case */ + p++; + b = *p; + /* b: p1 (unmasked) */ + if (!(b&0x80)) + { + /* Values between 128 and 16383 */ + a &= 0x7f; + a = a<<7; + *v = a | b; + return 2; + } + + /* The 3-byte case */ + p++; + a = a<<14; + a |= *p; + /* a: p0<<14 | p2 (unmasked) */ + if (!(a&0x80)) + { + /* Values between 16384 and 2097151 */ + a &= (0x7f<<14)|(0x7f); + b &= 0x7f; + b = b<<7; + *v = a | b; + return 3; + } + + /* A 32-bit varint is used to store size information in btrees. + ** Objects are rarely larger than 2MiB limit of a 3-byte varint. + ** A 3-byte varint is sufficient, for example, to record the size + ** of a 1048569-byte BLOB or string. + ** + ** We only unroll the first 1-, 2-, and 3- byte cases. The very + ** rare larger cases can be handled by the slower 64-bit varint + ** routine. + */ + { + u64 v64; + u8 n; + p -= 2; + n = sqlite3GetVarint(p, &v64); + *v = (u32)v64; + assert( n>3 && n<=9 ); + return n; + } +} + +int sqlite3Fts5GetVarintLen(u32 iVal){ + if( iVal<(1 << 7 ) ) return 1; + if( iVal<(1 << 14) ) return 2; + if( iVal<(1 << 21) ) return 3; + if( iVal<(1 << 28) ) return 4; + return 5; +} + +/* +** Allocate and return a buffer at least nByte bytes in size. +** +** If an OOM error is encountered, return NULL and set the error code in +** the Fts5Index handle passed as the first argument. +*/ +static void *fts5IdxMalloc(Fts5Index *p, int nByte){ + void *pRet = 0; + if( p->rc==SQLITE_OK ){ + pRet = sqlite3_malloc(nByte); + if( pRet==0 ){ + p->rc = SQLITE_NOMEM; + }else{ + memset(pRet, 0, nByte); + } + } + return pRet; +} + +/* +** Compare the contents of the pLeft buffer with the pRight/nRight blob. +** +** Return -ve if pLeft is smaller than pRight, 0 if they are equal or +** +ve if pRight is smaller than pLeft. In other words: +** +** res = *pLeft - *pRight +*/ +static int fts5BufferCompareBlob( + Fts5Buffer *pLeft, /* Left hand side of comparison */ + const u8 *pRight, int nRight /* Right hand side of comparison */ +){ + int nCmp = MIN(pLeft->n, nRight); + int res = memcmp(pLeft->p, pRight, nCmp); + return (res==0 ? (pLeft->n - nRight) : res); +} + +/* +** Compare the contents of the two buffers using memcmp(). If one buffer +** is a prefix of the other, it is considered the lesser. +** +** Return -ve if pLeft is smaller than pRight, 0 if they are equal or +** +ve if pRight is smaller than pLeft. In other words: +** +** res = *pLeft - *pRight +*/ +static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){ + int nCmp = MIN(pLeft->n, pRight->n); + int res = memcmp(pLeft->p, pRight->p, nCmp); + return (res==0 ? (pLeft->n - pRight->n) : res); +} + + +/* +** Close the read-only blob handle, if it is open. +*/ +static void fts5CloseReader(Fts5Index *p){ + if( p->pReader ){ + sqlite3_blob *pReader = p->pReader; + p->pReader = 0; + sqlite3_blob_close(pReader); + } +} + +static Fts5Data *fts5DataReadOrBuffer( + Fts5Index *p, + Fts5Buffer *pBuf, + i64 iRowid +){ + Fts5Data *pRet = 0; + if( p->rc==SQLITE_OK ){ + int rc = SQLITE_OK; + +#if 0 +Fts5Buffer buf = {0,0,0}; +fts5DebugRowid(&rc, &buf, iRowid); +fprintf(stdout, "read: %s\n", buf.p); +fflush(stdout); +sqlite3_free(buf.p); +#endif + if( p->pReader ){ + /* This call may return SQLITE_ABORT if there has been a savepoint + ** rollback since it was last used. In this case a new blob handle + ** is required. */ + rc = sqlite3_blob_reopen(p->pReader, iRowid); + if( rc==SQLITE_ABORT ){ + fts5CloseReader(p); + rc = SQLITE_OK; + } + } + + /* If the blob handle is not yet open, open and seek it. Otherwise, use + ** the blob_reopen() API to reseek the existing blob handle. */ + if( p->pReader==0 ){ + Fts5Config *pConfig = p->pConfig; + rc = sqlite3_blob_open(pConfig->db, + pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader + ); + } + + if( rc==SQLITE_OK ){ + u8 *aOut; /* Read blob data into this buffer */ + int nByte = sqlite3_blob_bytes(p->pReader); + if( pBuf ){ + fts5BufferZero(pBuf); + fts5BufferGrow(&rc, pBuf, nByte); + aOut = pBuf->p; + pBuf->n = nByte; + }else{ + int nSpace = nByte + FTS5_DATA_ZERO_PADDING; + pRet = (Fts5Data*)sqlite3Fts5MallocZero(&rc, nSpace+sizeof(Fts5Data)); + if( pRet ){ + pRet->n = nByte; + aOut = pRet->p = (u8*)&pRet[1]; + pRet->nRef = 1; + } + } + + if( rc==SQLITE_OK ){ + rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0); + } + if( rc!=SQLITE_OK ){ + sqlite3_free(pRet); + pRet = 0; + } + } + p->rc = rc; + p->nRead++; + } + + return pRet; +} + +/* +** Retrieve a record from the %_data table. +** +** If an error occurs, NULL is returned and an error left in the +** Fts5Index object. +*/ +static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){ + Fts5Data *pRet = fts5DataReadOrBuffer(p, 0, iRowid); + assert( (pRet==0)==(p->rc!=SQLITE_OK) ); + return pRet; +} + +/* +** Read a record from the %_data table into the buffer supplied as the +** second argument. +** +** If an error occurs, an error is left in the Fts5Index object. If an +** error has already occurred when this function is called, it is a +** no-op. +*/ +static void fts5DataBuffer(Fts5Index *p, Fts5Buffer *pBuf, i64 iRowid){ + (void)fts5DataReadOrBuffer(p, pBuf, iRowid); +} + +/* +** Release a reference to data record returned by an earlier call to +** fts5DataRead(). +*/ +static void fts5DataRelease(Fts5Data *pData){ + if( pData ){ + assert( pData->nRef>0 ); + pData->nRef--; + if( pData->nRef==0 ) sqlite3_free(pData); + } +} + +static void fts5DataReference(Fts5Data *pData){ + pData->nRef++; +} + +/* +** INSERT OR REPLACE a record into the %_data table. +*/ +static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){ + if( p->rc!=SQLITE_OK ) return; + + if( p->pWriter==0 ){ + int rc; + Fts5Config *pConfig = p->pConfig; + char *zSql = sqlite3_mprintf( + "REPLACE INTO '%q'.%Q(id, block) VALUES(?,?)", pConfig->zDb, p->zDataTbl + ); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p->pWriter, 0); + sqlite3_free(zSql); + } + if( rc!=SQLITE_OK ){ + p->rc = rc; + return; + } + } + + sqlite3_bind_int64(p->pWriter, 1, iRowid); + sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC); + sqlite3_step(p->pWriter); + p->rc = sqlite3_reset(p->pWriter); +} + +/* +** Execute the following SQL: +** +** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast +*/ +static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){ + if( p->rc!=SQLITE_OK ) return; + + if( p->pDeleter==0 ){ + int rc; + Fts5Config *pConfig = p->pConfig; + char *zSql = sqlite3_mprintf( + "DELETE FROM '%q'.%Q WHERE id>=? AND id<=?", pConfig->zDb, p->zDataTbl + ); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p->pDeleter, 0); + sqlite3_free(zSql); + } + if( rc!=SQLITE_OK ){ + p->rc = rc; + return; + } + } + + sqlite3_bind_int64(p->pDeleter, 1, iFirst); + sqlite3_bind_int64(p->pDeleter, 2, iLast); + sqlite3_step(p->pDeleter); + p->rc = sqlite3_reset(p->pDeleter); +} + +/* +** Close the sqlite3_blob handle used to read records from the %_data table. +** And discard any cached reads. This function is called at the end of +** a read transaction or when any sub-transaction is rolled back. +*/ +#if 0 +static void fts5DataReset(Fts5Index *p){ + if( p->pReader ){ + sqlite3_blob_close(p->pReader); + p->pReader = 0; + } +} +#endif + +/* +** Remove all records associated with segment iSegid in index iIdx. +*/ +static void fts5DataRemoveSegment(Fts5Index *p, int iIdx, int iSegid){ + i64 iFirst = FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, 0); + i64 iLast = FTS5_SEGMENT_ROWID(iIdx, iSegid+1, 0, 0)-1; + fts5DataDelete(p, iFirst, iLast); +} + +/* +** Release a reference to an Fts5Structure object returned by an earlier +** call to fts5StructureRead() or fts5StructureDecode(). +*/ +static void fts5StructureRelease(Fts5Structure *pStruct){ + if( pStruct ){ + int i; + for(i=0; i<pStruct->nLevel; i++){ + sqlite3_free(pStruct->aLevel[i].aSeg); + } + sqlite3_free(pStruct); + } +} + +/* +** Deserialize and return the structure record currently stored in serialized +** form within buffer pData/nData. +** +** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array +** are over-allocated by one slot. This allows the structure contents +** to be more easily edited. +** +** If an error occurs, *ppOut is set to NULL and an SQLite error code +** returned. Otherwise, *ppOut is set to point to the new object and +** SQLITE_OK returned. +*/ +static int fts5StructureDecode( + const u8 *pData, /* Buffer containing serialized structure */ + int nData, /* Size of buffer pData in bytes */ + int *piCookie, /* Configuration cookie value */ + Fts5Structure **ppOut /* OUT: Deserialized object */ +){ + int rc = SQLITE_OK; + int i = 0; + int iLvl; + int nLevel = 0; + int nSegment = 0; + int nByte; /* Bytes of space to allocate at pRet */ + Fts5Structure *pRet = 0; /* Structure object to return */ + + /* Grab the cookie value */ + if( piCookie ) *piCookie = sqlite3Fts5Get32(pData); + i = 4; + + /* Read the total number of levels and segments from the start of the + ** structure record. */ + i += fts5GetVarint32(&pData[i], nLevel); + i += fts5GetVarint32(&pData[i], nSegment); + nByte = ( + sizeof(Fts5Structure) + /* Main structure */ + sizeof(Fts5StructureLevel) * (nLevel) /* aLevel[] array */ + ); + pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte); + + if( pRet ){ + pRet->nLevel = nLevel; + i += sqlite3GetVarint(&pData[i], &pRet->nWriteCounter); + + for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){ + Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl]; + int nTotal; + int iSeg; + + i += fts5GetVarint32(&pData[i], pLvl->nMerge); + i += fts5GetVarint32(&pData[i], nTotal); + assert( nTotal>=pLvl->nMerge ); + pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc, + nTotal * sizeof(Fts5StructureSegment) + ); + + if( rc==SQLITE_OK ){ + pLvl->nSeg = nTotal; + for(iSeg=0; iSeg<nTotal; iSeg++){ + i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].iSegid); + i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].nHeight); + i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst); + i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast); + } + }else{ + fts5StructureRelease(pRet); + pRet = 0; + } + } + } + + *ppOut = pRet; + return rc; +} + +/* +** +*/ +static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){ + if( *pRc==SQLITE_OK ){ + Fts5Structure *pStruct = *ppStruct; + int nLevel = pStruct->nLevel; + int nByte = ( + sizeof(Fts5Structure) + /* Main structure */ + sizeof(Fts5StructureLevel) * (nLevel+1) /* aLevel[] array */ + ); + + pStruct = sqlite3_realloc(pStruct, nByte); + if( pStruct ){ + memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel)); + pStruct->nLevel++; + *ppStruct = pStruct; + }else{ + *pRc = SQLITE_NOMEM; + } + } +} + +/* +** Extend level iLvl so that there is room for at least nExtra more +** segments. +*/ +static void fts5StructureExtendLevel( + int *pRc, + Fts5Structure *pStruct, + int iLvl, + int nExtra, + int bInsert +){ + if( *pRc==SQLITE_OK ){ + Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; + Fts5StructureSegment *aNew; + int nByte; + + nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment); + aNew = sqlite3_realloc(pLvl->aSeg, nByte); + if( aNew ){ + if( bInsert==0 ){ + memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra); + }else{ + int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment); + memmove(&aNew[nExtra], aNew, nMove); + memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra); + } + pLvl->aSeg = aNew; + }else{ + *pRc = SQLITE_NOMEM; + } + } +} + +/* +** Read, deserialize and return the structure record for index iIdx. +** +** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array +** are over-allocated as described for function fts5StructureDecode() +** above. +** +** If an error occurs, NULL is returned and an error code left in the +** Fts5Index handle. If an error has already occurred when this function +** is called, it is a no-op. +*/ +static Fts5Structure *fts5StructureRead(Fts5Index *p, int iIdx){ + Fts5Config *pConfig = p->pConfig; + Fts5Structure *pRet = 0; /* Object to return */ + Fts5Data *pData; /* %_data entry containing structure record */ + int iCookie; /* Configuration cookie */ + + assert( iIdx<=pConfig->nPrefix ); + pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID(iIdx)); + if( !pData ) return 0; + p->rc = fts5StructureDecode(pData->p, pData->n, &iCookie, &pRet); + + if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){ + p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie); + } + + fts5DataRelease(pData); + if( p->rc!=SQLITE_OK ){ + fts5StructureRelease(pRet); + pRet = 0; + } + return pRet; +} + +/* +** Return the total number of segments in index structure pStruct. +*/ +static int fts5StructureCountSegments(Fts5Structure *pStruct){ + int nSegment = 0; /* Total number of segments */ + if( pStruct ){ + int iLvl; /* Used to iterate through levels */ + for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ + nSegment += pStruct->aLevel[iLvl].nSeg; + } + } + + return nSegment; +} + +/* +** Serialize and store the "structure" record for index iIdx. +** +** If an error occurs, leave an error code in the Fts5Index object. If an +** error has already occurred, this function is a no-op. +*/ +static void fts5StructureWrite(Fts5Index *p, int iIdx, Fts5Structure *pStruct){ + if( p->rc==SQLITE_OK ){ + int nSegment; /* Total number of segments */ + Fts5Buffer buf; /* Buffer to serialize record into */ + int iLvl; /* Used to iterate through levels */ + int iCookie; /* Cookie value to store */ + + nSegment = fts5StructureCountSegments(pStruct); + memset(&buf, 0, sizeof(Fts5Buffer)); + + /* Append the current configuration cookie */ + iCookie = p->pConfig->iCookie; + if( iCookie<0 ) iCookie = 0; + fts5BufferAppend32(&p->rc, &buf, iCookie); + + fts5BufferAppendVarint(&p->rc, &buf, pStruct->nLevel); + fts5BufferAppendVarint(&p->rc, &buf, nSegment); + fts5BufferAppendVarint(&p->rc, &buf, (i64)pStruct->nWriteCounter); + + for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ + int iSeg; /* Used to iterate through segments */ + Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; + fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge); + fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg); + assert( pLvl->nMerge<=pLvl->nSeg ); + + for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ + fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid); + fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].nHeight); + fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst); + fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast); + } + } + + fts5DataWrite(p, FTS5_STRUCTURE_ROWID(iIdx), buf.p, buf.n); + fts5BufferFree(&buf); + } +} + +#if 0 +static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*); +static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){ + int rc = SQLITE_OK; + Fts5Buffer buf; + memset(&buf, 0, sizeof(buf)); + fts5DebugStructure(&rc, &buf, pStruct); + fprintf(stdout, "%s: %s\n", zCaption, buf.p); + fflush(stdout); + fts5BufferFree(&buf); +} +#else +# define fts5PrintStructure(x,y) +#endif + +static int fts5SegmentSize(Fts5StructureSegment *pSeg){ + return 1 + pSeg->pgnoLast - pSeg->pgnoFirst; +} + +/* +** Return a copy of index structure pStruct. Except, promote as many +** segments as possible to level iPromote. If an OOM occurs, NULL is +** returned. +*/ +static void fts5StructurePromoteTo( + Fts5Index *p, + int iPromote, + int szPromote, + Fts5Structure *pStruct +){ + int il, is; + Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote]; + + if( pOut->nMerge==0 ){ + for(il=iPromote+1; il<pStruct->nLevel; il++){ + Fts5StructureLevel *pLvl = &pStruct->aLevel[il]; + if( pLvl->nMerge ) return; + for(is=pLvl->nSeg-1; is>=0; is--){ + int sz = fts5SegmentSize(&pLvl->aSeg[is]); + if( sz>szPromote ) return; + fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1); + if( p->rc ) return; + memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment)); + pOut->nSeg++; + pLvl->nSeg--; + } + } + } +} + +/* +** A new segment has just been written to level iLvl of index structure +** pStruct. This function determines if any segments should be promoted +** as a result. Segments are promoted in two scenarios: +** +** a) If the segment just written is smaller than one or more segments +** within the previous populated level, it is promoted to the previous +** populated level. +** +** b) If the segment just written is larger than the newest segment on +** the next populated level, then that segment, and any other adjacent +** segments that are also smaller than the one just written, are +** promoted. +** +** If one or more segments are promoted, the structure object is updated +** to reflect this. +*/ +static void fts5StructurePromote( + Fts5Index *p, /* FTS5 backend object */ + int iLvl, /* Index level just updated */ + Fts5Structure *pStruct /* Index structure */ +){ + if( p->rc==SQLITE_OK ){ + int iTst; + int iPromote = -1; + int szPromote; /* Promote anything this size or smaller */ + Fts5StructureSegment *pSeg; /* Segment just written */ + int szSeg; /* Size of segment just written */ + + + pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1]; + szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst); + + /* Check for condition (a) */ + for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--); + if( iTst>=0 ){ + int i; + int szMax = 0; + Fts5StructureLevel *pTst = &pStruct->aLevel[iTst]; + assert( pTst->nMerge==0 ); + for(i=0; i<pTst->nSeg; i++){ + int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1; + if( sz>szMax ) szMax = sz; + } + if( szMax>=szSeg ){ + /* Condition (a) is true. Promote the newest segment on level + ** iLvl to level iTst. */ + iPromote = iTst; + szPromote = szMax; + } + } + + /* If condition (a) is not met, assume (b) is true. StructurePromoteTo() + ** is a no-op if it is not. */ + if( iPromote<0 ){ + iPromote = iLvl; + szPromote = szSeg; + } + fts5StructurePromoteTo(p, iPromote, szPromote, pStruct); + } +} + + +/* +** If the pIter->iOff offset currently points to an entry indicating one +** or more term-less nodes, advance past it and set pIter->nEmpty to +** the number of empty child nodes. +*/ +static void fts5NodeIterGobbleNEmpty(Fts5NodeIter *pIter){ + if( pIter->iOff<pIter->nData && 0==(pIter->aData[pIter->iOff] & 0xfe) ){ + pIter->bDlidx = pIter->aData[pIter->iOff] & 0x01; + pIter->iOff++; + pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], pIter->nEmpty); + }else{ + pIter->nEmpty = 0; + pIter->bDlidx = 0; + } +} + +/* +** Advance to the next entry within the node. +*/ +static void fts5NodeIterNext(int *pRc, Fts5NodeIter *pIter){ + if( pIter->iOff>=pIter->nData ){ + pIter->aData = 0; + pIter->iChild += pIter->nEmpty; + }else{ + int nPre, nNew; + pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], nPre); + pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], nNew); + pIter->term.n = nPre-2; + fts5BufferAppendBlob(pRc, &pIter->term, nNew, pIter->aData+pIter->iOff); + pIter->iOff += nNew; + pIter->iChild += (1 + pIter->nEmpty); + fts5NodeIterGobbleNEmpty(pIter); + if( *pRc ) pIter->aData = 0; + } +} + + +/* +** Initialize the iterator object pIter to iterate through the internal +** segment node in pData. +*/ +static void fts5NodeIterInit(const u8 *aData, int nData, Fts5NodeIter *pIter){ + memset(pIter, 0, sizeof(*pIter)); + pIter->aData = aData; + pIter->nData = nData; + pIter->iOff = fts5GetVarint32(aData, pIter->iChild); + fts5NodeIterGobbleNEmpty(pIter); +} + +/* +** Free any memory allocated by the iterator object. +*/ +static void fts5NodeIterFree(Fts5NodeIter *pIter){ + fts5BufferFree(&pIter->term); +} + +/* +** The iterator passed as the first argument has the following fields set +** as follows. This function sets up the rest of the iterator so that it +** points to the first rowid in the doclist-index. +** +** pData: pointer to doclist-index record, +** iLeafPgno: page number that this doclist-index is associated with. +** +** When this function is called pIter->iLeafPgno is the page number the +** doclist is associated with (the one featuring the term). +*/ +static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){ + Fts5Data *pData = pIter->pData; + int i; + int bPresent; + + assert( pIter->pData ); + assert( pIter->iLeafPgno>0 ); + + /* Read the first rowid value. And the "present" flag that follows it. */ + pIter->iOff += getVarint(&pData->p[0], (u64*)&pIter->iRowid); + bPresent = pData->p[pIter->iOff++]; + if( bPresent ){ + i = 0; + }else{ + /* Count the number of leading 0x00 bytes. */ + for(i=1; pIter->iOff<pData->n; i++){ + if( pData->p[pIter->iOff] ) break; + pIter->iOff++; + } + + /* Unless we are already at the end of the doclist-index, load the first + ** rowid value. */ + if( pIter->iOff<pData->n ){ + i64 iVal; + pIter->iOff += getVarint(&pData->p[pIter->iOff], (u64*)&iVal); + pIter->iRowid += iVal; + }else{ + pIter->bEof = 1; + } + } + pIter->iLeafPgno += (i+1); + + pIter->iFirstOff = pIter->iOff; + return pIter->bEof; +} + +/* +** Advance the iterator passed as the only argument. +*/ +static int fts5DlidxIterNext(Fts5DlidxIter *pIter){ + Fts5Data *pData = pIter->pData; + int iOff; + + for(iOff=pIter->iOff; iOff<pData->n; iOff++){ + if( pData->p[iOff] ) break; + } + + if( iOff<pData->n ){ + i64 iVal; + pIter->iLeafPgno += (iOff - pIter->iOff) + 1; + iOff += getVarint(&pData->p[iOff], (u64*)&iVal); + pIter->iRowid += iVal; + pIter->iOff = iOff; + }else{ + pIter->bEof = 1; + } + + return pIter->bEof; +} + +static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){ + return pIter->bEof; +} + +static void fts5DlidxIterLast(Fts5DlidxIter *pIter){ + if( fts5DlidxIterFirst(pIter)==0 ){ + while( 0==fts5DlidxIterNext(pIter) ); + pIter->bEof = 0; + } +} + +static int fts5DlidxIterPrev(Fts5DlidxIter *pIter){ + int iOff = pIter->iOff; + + assert( pIter->bEof==0 ); + if( iOff<=pIter->iFirstOff ){ + pIter->bEof = 1; + }else{ + u8 *a = pIter->pData->p; + i64 iVal; + int iLimit; + + /* Currently iOff points to the first byte of a varint. This block + ** decrements iOff until it points to the first byte of the previous + ** varint. Taking care not to read any memory locations that occur + ** before the buffer in memory. */ + iLimit = (iOff>9 ? iOff-9 : 0); + for(iOff--; iOff>iLimit; iOff--){ + if( (a[iOff-1] & 0x80)==0 ) break; + } + + getVarint(&a[iOff], (u64*)&iVal); + pIter->iRowid -= iVal; + pIter->iLeafPgno--; + + /* Skip backwards passed any 0x00 bytes. */ + while( iOff>pIter->iFirstOff + && a[iOff-1]==0x00 && (a[iOff-2] & 0x80)==0 + ){ + iOff--; + pIter->iLeafPgno--; + } + pIter->iOff = iOff; + } + + return pIter->bEof; +} + +static Fts5DlidxIter *fts5DlidxIterInit( + Fts5Index *p, /* Fts5 Backend to iterate within */ + int bRev, /* True for ORDER BY ASC */ + int iIdx, int iSegid, /* Segment iSegid within index iIdx */ + int iLeafPg /* Leaf page number to load dlidx for */ +){ + Fts5DlidxIter *pIter; + + pIter = (Fts5DlidxIter*)fts5IdxMalloc(p, sizeof(Fts5DlidxIter)); + if( pIter==0 ) return 0; + + pIter->pData = fts5DataRead(p, FTS5_DOCLIST_IDX_ROWID(iIdx, iSegid, iLeafPg)); + if( pIter->pData==0 ){ + sqlite3_free(pIter); + pIter = 0; + }else{ + pIter->iLeafPgno = iLeafPg; + if( bRev==0 ){ + fts5DlidxIterFirst(pIter); + }else{ + fts5DlidxIterLast(pIter); + } + } + + return pIter; +} + +/* +** Free a doclist-index iterator object allocated by fts5DlidxIterInit(). +*/ +static void fts5DlidxIterFree(Fts5DlidxIter *pIter){ + if( pIter ){ + fts5DataRelease(pIter->pData); + sqlite3_free(pIter); + } +} + +static void fts5LeafHeader(Fts5Data *pLeaf, int *piRowid, int *piTerm){ + *piRowid = (int)fts5GetU16(&pLeaf->p[0]); + *piTerm = (int)fts5GetU16(&pLeaf->p[2]); +} + +/* +** Load the next leaf page into the segment iterator. +*/ +static void fts5SegIterNextPage( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegIter *pIter /* Iterator to advance to next page */ +){ + Fts5StructureSegment *pSeg = pIter->pSeg; + fts5DataRelease(pIter->pLeaf); + pIter->iLeafPgno++; + if( pIter->iLeafPgno<=pSeg->pgnoLast ){ + pIter->pLeaf = fts5DataRead(p, + FTS5_SEGMENT_ROWID(pIter->iIdx, pSeg->iSegid, 0, pIter->iLeafPgno) + ); + }else{ + pIter->pLeaf = 0; + } +} + +/* +** Argument p points to a buffer containing a varint to be interpreted as a +** position list size field. Read the varint and return the number of bytes +** read. Before returning, set *pnSz to the number of bytes in the position +** list, and *pbDel to true if the delete flag is set, or false otherwise. +*/ +static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){ + int nSz; + int n = fts5GetVarint32(p, nSz); + *pnSz = nSz/2; + *pbDel = nSz & 0x0001; + return n; +} + +/* +** Fts5SegIter.iLeafOffset currently points to the first byte of a +** position-list size field. Read the value of the field and store it +** in the following variables: +** +** Fts5SegIter.nPos +** Fts5SegIter.bDel +** +** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the +** position list content (if any). +*/ +static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){ + if( p->rc==SQLITE_OK ){ + int iOff = pIter->iLeafOffset; /* Offset to read at */ + if( iOff>=pIter->pLeaf->n ){ + assert( 0 ); + fts5SegIterNextPage(p, pIter); + if( pIter->pLeaf==0 ){ + if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT; + return; + } + iOff = 4; + } + iOff += fts5GetPoslistSize(pIter->pLeaf->p+iOff, &pIter->nPos,&pIter->bDel); + pIter->iLeafOffset = iOff; + } +} + +/* +** Fts5SegIter.iLeafOffset currently points to the first byte of the +** "nSuffix" field of a term. Function parameter nKeep contains the value +** of the "nPrefix" field (if there was one - it is passed 0 if this is +** the first term in the segment). +** +** This function populates: +** +** Fts5SegIter.term +** Fts5SegIter.rowid +** Fts5SegIter.nPos +** Fts5SegIter.bDel +** +** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of +** the first position list. The position list belonging to document +** (Fts5SegIter.iRowid). +*/ +static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){ + u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ + int iOff = pIter->iLeafOffset; /* Offset to read at */ + int nNew; /* Bytes of new data */ + + iOff += fts5GetVarint32(&a[iOff], nNew); + pIter->term.n = nKeep; + fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); + iOff += nNew; + pIter->iTermLeafOffset = iOff; + pIter->iTermLeafPgno = pIter->iLeafPgno; + if( iOff>=pIter->pLeaf->n ){ + fts5SegIterNextPage(p, pIter); + if( pIter->pLeaf==0 ){ + if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT; + return; + } + iOff = 4; + a = pIter->pLeaf->p; + } + iOff += sqlite3GetVarint(&a[iOff], (u64*)&pIter->iRowid); + pIter->iLeafOffset = iOff; +} + +/* +** Initialize the iterator object pIter to iterate through the entries in +** segment pSeg within index iIdx. The iterator is left pointing to the +** first entry when this function returns. +** +** If an error occurs, Fts5Index.rc is set to an appropriate error code. If +** an error has already occurred when this function is called, it is a no-op. +*/ +static void fts5SegIterInit( + Fts5Index *p, + int iIdx, /* Config.aHash[] index of FTS index */ + Fts5StructureSegment *pSeg, /* Description of segment */ + Fts5SegIter *pIter /* Object to populate */ +){ + if( pSeg->pgnoFirst==0 ){ + /* This happens if the segment is being used as an input to an incremental + ** merge and all data has already been "trimmed". See function + ** fts5TrimSegments() for details. In this case leave the iterator empty. + ** The caller will see the (pIter->pLeaf==0) and assume the iterator is + ** at EOF already. */ + assert( pIter->pLeaf==0 ); + return; + } + + if( p->rc==SQLITE_OK ){ + memset(pIter, 0, sizeof(*pIter)); + pIter->pSeg = pSeg; + pIter->iIdx = iIdx; + pIter->iLeafPgno = pSeg->pgnoFirst-1; + fts5SegIterNextPage(p, pIter); + } + + if( p->rc==SQLITE_OK ){ + u8 *a = pIter->pLeaf->p; + pIter->iLeafOffset = fts5GetU16(&a[2]); + fts5SegIterLoadTerm(p, pIter, 0); + fts5SegIterLoadNPos(p, pIter); + } +} + +/* +** This function is only ever called on iterators created by calls to +** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set. +** +** The iterator is in an unusual state when this function is called: the +** Fts5SegIter.iLeafOffset variable is set to the offset of the start of +** the position-list size field for the first relevant rowid on the page. +** Fts5SegIter.rowid is set, but nPos and bDel are not. +** +** This function advances the iterator so that it points to the last +** relevant rowid on the page and, if necessary, initializes the +** aRowidOffset[] and iRowidOffset variables. At this point the iterator +** is in its regular state - Fts5SegIter.iLeafOffset points to the first +** byte of the position list content associated with said rowid. +*/ +static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ + int n = pIter->pLeaf->n; + int i = pIter->iLeafOffset; + u8 *a = pIter->pLeaf->p; + int iRowidOffset = 0; + + while( p->rc==SQLITE_OK && i<n ){ + i64 iDelta = 0; + int nPos; + int bDummy; + + i += fts5GetPoslistSize(&a[i], &nPos, &bDummy); + i += nPos; + if( i>=n ) break; + i += getVarint(&a[i], (u64*)&iDelta); + if( iDelta==0 ) break; + pIter->iRowid += iDelta; + + if( iRowidOffset>=pIter->nRowidOffset ){ + int nNew = pIter->nRowidOffset + 8; + int *aNew = (int*)sqlite3_realloc(pIter->aRowidOffset, nNew*sizeof(int)); + if( aNew==0 ){ + p->rc = SQLITE_NOMEM; + break; + } + pIter->aRowidOffset = aNew; + pIter->nRowidOffset = nNew; + } + + pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset; + pIter->iLeafOffset = i; + } + pIter->iRowidOffset = iRowidOffset; + fts5SegIterLoadNPos(p, pIter); +} + +/* +** +*/ +static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){ + assert( pIter->flags & FTS5_SEGITER_REVERSE ); + assert( pIter->flags & FTS5_SEGITER_ONETERM ); + + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = 0; + while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){ + Fts5Data *pNew; + pIter->iLeafPgno--; + pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID( + pIter->iIdx, pIter->pSeg->iSegid, 0, pIter->iLeafPgno + )); + if( pNew ){ + if( pIter->iLeafPgno==pIter->iTermLeafPgno ){ + if( pIter->iTermLeafOffset<pNew->n ){ + pIter->pLeaf = pNew; + pIter->iLeafOffset = pIter->iTermLeafOffset; + } + }else{ + int iRowidOff, dummy; + fts5LeafHeader(pNew, &iRowidOff, &dummy); + if( iRowidOff ){ + pIter->pLeaf = pNew; + pIter->iLeafOffset = iRowidOff; + } + } + + if( pIter->pLeaf ){ + u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset]; + pIter->iLeafOffset += getVarint(a, (u64*)&pIter->iRowid); + break; + }else{ + fts5DataRelease(pNew); + } + } + } + + if( pIter->pLeaf ){ + fts5SegIterReverseInitPage(p, pIter); + } +} + +/* +** Return true if the iterator passed as the second argument currently +** points to a delete marker. A delete marker is an entry with a 0 byte +** position-list. +*/ +static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5MultiSegIter *pIter){ + Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; + return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0); +} + +/* +** Advance iterator pIter to the next entry. +** +** If an error occurs, Fts5Index.rc is set to an appropriate error code. It +** is not considered an error if the iterator reaches EOF. If an error has +** already occurred when this function is called, it is a no-op. +*/ +static void fts5SegIterNext( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegIter *pIter, /* Iterator to advance */ + int *pbNewTerm /* OUT: Set for new term */ +){ + assert( pbNewTerm==0 || *pbNewTerm==0 ); + if( p->rc==SQLITE_OK ){ + if( pIter->flags & FTS5_SEGITER_REVERSE ){ + + if( pIter->iRowidOffset>0 ){ + u8 *a = pIter->pLeaf->p; + int iOff; + int nPos; + int bDummy; + i64 iDelta; + + if( p->rc==SQLITE_OK ){ + pIter->iRowidOffset--; + pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset]; + iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy); + iOff += nPos; + getVarint(&a[iOff], (u64*)&iDelta); + pIter->iRowid -= iDelta; + fts5SegIterLoadNPos(p, pIter); + } + }else{ + fts5SegIterReverseNewPage(p, pIter); + } + }else{ + Fts5Data *pLeaf = pIter->pLeaf; + int iOff; + int bNewTerm = 0; + int nKeep = 0; + + /* Search for the end of the position list within the current page. */ + u8 *a = pLeaf->p; + int n = pLeaf->n; + + iOff = pIter->iLeafOffset + pIter->nPos; + + if( iOff<n ){ + /* The next entry is on the current page */ + u64 iDelta; + iOff += sqlite3GetVarint(&a[iOff], &iDelta); + pIter->iLeafOffset = iOff; + if( iDelta==0 ){ + bNewTerm = 1; + if( iOff>=n ){ + fts5SegIterNextPage(p, pIter); + pIter->iLeafOffset = 4; + }else if( iOff!=fts5GetU16(&a[2]) ){ + pIter->iLeafOffset += fts5GetVarint32(&a[iOff], nKeep); + } + }else{ + pIter->iRowid += iDelta; + } + }else if( pIter->pSeg==0 ){ + const u8 *pList = 0; + const char *zTerm; + int nList; + if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){ + sqlite3Fts5HashScanNext(p->apHash[0]); + sqlite3Fts5HashScanEntry(p->apHash[0], &zTerm, &pList, &nList); + } + if( pList==0 ){ + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = 0; + }else{ + pIter->pLeaf->p = (u8*)pList; + pIter->pLeaf->n = nList; + sqlite3Fts5BufferSet(&p->rc, &pIter->term, strlen(zTerm), (u8*)zTerm); + pIter->iLeafOffset = getVarint(pList, (u64*)&pIter->iRowid); + } + }else{ + iOff = 0; + /* Next entry is not on the current page */ + while( iOff==0 ){ + fts5SegIterNextPage(p, pIter); + pLeaf = pIter->pLeaf; + if( pLeaf==0 ) break; + if( (iOff = fts5GetU16(&pLeaf->p[0])) ){ + iOff += sqlite3GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); + pIter->iLeafOffset = iOff; + } + else if( (iOff = fts5GetU16(&pLeaf->p[2])) ){ + pIter->iLeafOffset = iOff; + bNewTerm = 1; + } + } + } + + /* Check if the iterator is now at EOF. If so, return early. */ + if( pIter->pLeaf ){ + if( bNewTerm ){ + if( pIter->flags & FTS5_SEGITER_ONETERM ){ + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = 0; + }else{ + fts5SegIterLoadTerm(p, pIter, nKeep); + fts5SegIterLoadNPos(p, pIter); + if( pbNewTerm ) *pbNewTerm = 1; + } + }else{ + fts5SegIterLoadNPos(p, pIter); + } + } + } + } +} + +#define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; } + +/* +** Iterator pIter currently points to the first rowid in a doclist. This +** function sets the iterator up so that iterates in reverse order through +** the doclist. +*/ +static void fts5SegIterReverse(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ + Fts5DlidxIter *pDlidx = pIter->pDlidx; + Fts5Data *pLast = 0; + int pgnoLast = 0; + + if( pDlidx ){ + /* If the doclist-iterator is already at EOF, then the current doclist + ** contains no entries except those on the current page. */ + if( fts5DlidxIterEof(p, pDlidx)==0 ){ + int iSegid = pIter->pSeg->iSegid; + pgnoLast = pDlidx->iLeafPgno; + pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, pgnoLast)); + }else{ + pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel); + } + }else{ + int iOff; /* Byte offset within pLeaf */ + Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ + + /* Currently, Fts5SegIter.iLeafOffset (and iOff) points to the first + ** byte of position-list content for the current rowid. Back it up + ** so that it points to the start of the position-list size field. */ + pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel); + iOff = pIter->iLeafOffset; + assert( iOff>=4 ); + + /* Search for a new term within the current leaf. If one can be found, + ** then this page contains the largest rowid for the current term. */ + while( iOff<pLeaf->n ){ + int nPos; + i64 iDelta; + int bDummy; + + /* Read the position-list size field */ + iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy); + iOff += nPos; + if( iOff>=pLeaf->n ) break; + + /* Rowid delta. Or, if 0x00, the end of doclist marker. */ + nPos = getVarint(&pLeaf->p[iOff], (u64*)&iDelta); + if( iDelta==0 ) break; + iOff += nPos; + } + + /* If this condition is true then the largest rowid for the current + ** term may not be stored on the current page. So search forward to + ** see where said rowid really is. */ + if( iOff>=pLeaf->n ){ + int pgno; + Fts5StructureSegment *pSeg = pIter->pSeg; + + /* The last rowid in the doclist may not be on the current page. Search + ** forward to find the page containing the last rowid. */ + for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){ + i64 iAbs = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, 0, pgno); + Fts5Data *pNew = fts5DataRead(p, iAbs); + if( pNew ){ + int iRowid, iTerm; + fts5LeafHeader(pNew, &iRowid, &iTerm); + if( iRowid ){ + SWAPVAL(Fts5Data*, pNew, pLast); + pgnoLast = pgno; + } + fts5DataRelease(pNew); + if( iTerm ) break; + } + } + } + } + + /* If pLast is NULL at this point, then the last rowid for this doclist + ** lies on the page currently indicated by the iterator. In this case + ** pIter->iLeafOffset is already set to point to the position-list size + ** field associated with the first relevant rowid on the page. + ** + ** Or, if pLast is non-NULL, then it is the page that contains the last + ** rowid. In this case configure the iterator so that it points to the + ** first rowid on this page. + */ + if( pLast ){ + int dummy; + int iOff; + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = pLast; + pIter->iLeafPgno = pgnoLast; + fts5LeafHeader(pLast, &iOff, &dummy); + iOff += getVarint(&pLast->p[iOff], (u64*)&pIter->iRowid); + pIter->iLeafOffset = iOff; + } + + fts5SegIterReverseInitPage(p, pIter); +} + +/* +** Iterator pIter currently points to the first rowid of a doclist within +** index iIdx. There is a doclist-index associated with the final term on +** the current page. If the current term is the last term on the page, +** load the doclist-index from disk and initialize an iterator at +** (pIter->pDlidx). +*/ +static void fts5SegIterLoadDlidx(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ + int iSeg = pIter->pSeg->iSegid; + int bRev = (pIter->flags & FTS5_SEGITER_REVERSE); + Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ + + assert( pIter->flags & FTS5_SEGITER_ONETERM ); + assert( pIter->pDlidx==0 ); + + /* Check if the current doclist ends on this page. If it does, return + ** early without loading the doclist-index (as it belongs to a different + ** term. */ + if( pIter->iTermLeafPgno==pIter->iLeafPgno ){ + int iOff = pIter->iLeafOffset + pIter->nPos; + while( iOff<pLeaf->n ){ + i64 iDelta; + + /* iOff is currently the offset of the start of position list data */ + iOff += getVarint(&pLeaf->p[iOff], (u64*)&iDelta); + if( iDelta==0 ) return; + + if( iOff<pLeaf->n ){ + int bDummy; + int nPos; + iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy); + iOff += nPos; + } + } + } + + pIter->pDlidx = fts5DlidxIterInit(p, bRev, iIdx, iSeg, pIter->iTermLeafPgno); +} + +/* +** Initialize the object pIter to point to term pTerm/nTerm within segment +** pSeg, index iIdx. If there is no such term in the index, the iterator +** is set to EOF. +** +** If an error occurs, Fts5Index.rc is set to an appropriate error code. If +** an error has already occurred when this function is called, it is a no-op. +*/ +static void fts5SegIterSeekInit( + Fts5Index *p, /* FTS5 backend */ + int iIdx, /* Config.aHash[] index of FTS index */ + const u8 *pTerm, int nTerm, /* Term to seek to */ + int flags, /* Mask of FTS5INDEX_XXX flags */ + Fts5StructureSegment *pSeg, /* Description of segment */ + Fts5SegIter *pIter /* Object to populate */ +){ + int iPg = 1; + int h; + int bGe = ((flags & FTS5INDEX_QUERY_PREFIX) && iIdx==0); + int bDlidx = 0; /* True if there is a doclist-index */ + + assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 ); + assert( pTerm && nTerm ); + memset(pIter, 0, sizeof(*pIter)); + pIter->pSeg = pSeg; + pIter->iIdx = iIdx; + + /* This block sets stack variable iPg to the leaf page number that may + ** contain term (pTerm/nTerm), if it is present in the segment. */ + for(h=pSeg->nHeight-1; h>0; h--){ + Fts5NodeIter node; /* For iterating through internal nodes */ + i64 iRowid = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, h, iPg); + Fts5Data *pNode = fts5DataRead(p, iRowid); + if( pNode==0 ) break; + + fts5NodeIterInit(pNode->p, pNode->n, &node); + assert( node.term.n==0 ); + + iPg = node.iChild; + bDlidx = node.bDlidx; + for(fts5NodeIterNext(&p->rc, &node); + node.aData && fts5BufferCompareBlob(&node.term, pTerm, nTerm)<=0; + fts5NodeIterNext(&p->rc, &node) + ){ + iPg = node.iChild; + bDlidx = node.bDlidx; + } + fts5NodeIterFree(&node); + fts5DataRelease(pNode); + } + + if( iPg<pSeg->pgnoFirst ){ + iPg = pSeg->pgnoFirst; + bDlidx = 0; + } + + pIter->iLeafPgno = iPg - 1; + fts5SegIterNextPage(p, pIter); + + if( pIter->pLeaf ){ + int res; + pIter->iLeafOffset = fts5GetU16(&pIter->pLeaf->p[2]); + fts5SegIterLoadTerm(p, pIter, 0); + fts5SegIterLoadNPos(p, pIter); + do { + res = fts5BufferCompareBlob(&pIter->term, pTerm, nTerm); + if( res>=0 ) break; + fts5SegIterNext(p, pIter, 0); + }while( pIter->pLeaf && p->rc==SQLITE_OK ); + + if( bGe==0 && res ){ + /* Set iterator to point to EOF */ + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = 0; + } + } + + if( p->rc==SQLITE_OK && bGe==0 ){ + pIter->flags |= FTS5_SEGITER_ONETERM; + if( pIter->pLeaf ){ + if( flags & FTS5INDEX_QUERY_DESC ){ + pIter->flags |= FTS5_SEGITER_REVERSE; + } + if( bDlidx ){ + fts5SegIterLoadDlidx(p, iIdx, pIter); + } + if( flags & FTS5INDEX_QUERY_DESC ){ + fts5SegIterReverse(p, iIdx, pIter); + } + } + } +} + +/* +** Initialize the object pIter to point to term pTerm/nTerm within the +** in-memory hash table iIdx. If there is no such term in the table, the +** iterator is set to EOF. +** +** If an error occurs, Fts5Index.rc is set to an appropriate error code. If +** an error has already occurred when this function is called, it is a no-op. +*/ +static void fts5SegIterHashInit( + Fts5Index *p, /* FTS5 backend */ + int iIdx, /* Config.aHash[] index of FTS index */ + const u8 *pTerm, int nTerm, /* Term to seek to */ + int flags, /* Mask of FTS5INDEX_XXX flags */ + Fts5SegIter *pIter /* Object to populate */ +){ + Fts5Hash *pHash = p->apHash[iIdx]; + const u8 *pList = 0; + int nList = 0; + const u8 *z = 0; + int n = 0; + + assert( pHash ); + assert( p->rc==SQLITE_OK ); + + if( pTerm==0 || (iIdx==0 && (flags & FTS5INDEX_QUERY_PREFIX)) ){ + p->rc = sqlite3Fts5HashScanInit(pHash, (const char*)pTerm, nTerm); + sqlite3Fts5HashScanEntry(pHash, (const char**)&z, &pList, &nList); + n = (z ? strlen((const char*)z) : 0); + }else{ + pIter->flags |= FTS5_SEGITER_ONETERM; + sqlite3Fts5HashQuery(pHash, (const char*)pTerm, nTerm, &pList, &nList); + z = pTerm; + n = nTerm; + } + + if( pList ){ + Fts5Data *pLeaf; + sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z); + pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data)); + if( pLeaf==0 ) return; + pLeaf->nRef = 1; + pLeaf->p = (u8*)pList; + pLeaf->n = nList; + pIter->pLeaf = pLeaf; + pIter->iLeafOffset = getVarint(pLeaf->p, (u64*)&pIter->iRowid); + + if( flags & FTS5INDEX_QUERY_DESC ){ + pIter->flags |= FTS5_SEGITER_REVERSE; + fts5SegIterReverseInitPage(p, pIter); + }else{ + fts5SegIterLoadNPos(p, pIter); + } + } +} + +/* +** Zero the iterator passed as the only argument. +*/ +static void fts5SegIterClear(Fts5SegIter *pIter){ + fts5BufferFree(&pIter->term); + fts5DataRelease(pIter->pLeaf); + fts5DlidxIterFree(pIter->pDlidx); + sqlite3_free(pIter->aRowidOffset); + memset(pIter, 0, sizeof(Fts5SegIter)); +} + +#ifdef SQLITE_DEBUG + +/* +** This function is used as part of the big assert() procedure implemented by +** fts5AssertMultiIterSetup(). It ensures that the result currently stored +** in *pRes is the correct result of comparing the current positions of the +** two iterators. +*/ +static void fts5AssertComparisonResult( + Fts5MultiSegIter *pIter, + Fts5SegIter *p1, + Fts5SegIter *p2, + Fts5CResult *pRes +){ + int i1 = p1 - pIter->aSeg; + int i2 = p2 - pIter->aSeg; + + if( p1->pLeaf || p2->pLeaf ){ + if( p1->pLeaf==0 ){ + assert( pRes->iFirst==i2 ); + }else if( p2->pLeaf==0 ){ + assert( pRes->iFirst==i1 ); + }else{ + int nMin = MIN(p1->term.n, p2->term.n); + int res = memcmp(p1->term.p, p2->term.p, nMin); + if( res==0 ) res = p1->term.n - p2->term.n; + + if( res==0 ){ + assert( pRes->bTermEq==1 ); + assert( p1->iRowid!=p2->iRowid ); + res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1; + }else{ + assert( pRes->bTermEq==0 ); + } + + if( res<0 ){ + assert( pRes->iFirst==i1 ); + }else{ + assert( pRes->iFirst==i2 ); + } + } + } +} + +/* +** This function is a no-op unless SQLITE_DEBUG is defined when this module +** is compiled. In that case, this function is essentially an assert() +** statement used to verify that the contents of the pIter->aFirst[] array +** are correct. +*/ +static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5MultiSegIter *pIter){ + if( p->rc==SQLITE_OK ){ + int i; + for(i=0; i<pIter->nSeg; i+=2){ + Fts5SegIter *p1 = &pIter->aSeg[i]; + Fts5SegIter *p2 = &pIter->aSeg[i+1]; + Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2]; + fts5AssertComparisonResult(pIter, p1, p2, pRes); + } + + for(i=1; i<(pIter->nSeg / 2); i+=2){ + Fts5CResult *pRes = &pIter->aFirst[i]; + Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ]; + Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ]; + + fts5AssertComparisonResult(pIter, p1, p2, pRes); + } + } +} +#else +# define fts5AssertMultiIterSetup(x,y) +#endif + +/* +** Do the comparison necessary to populate pIter->aFirst[iOut]. +** +** If the returned value is non-zero, then it is the index of an entry +** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing +** to a key that is a duplicate of another, higher priority, +** segment-iterator in the pSeg->aSeg[] array. +*/ +static int fts5MultiIterDoCompare(Fts5MultiSegIter *pIter, int iOut){ + int i1; /* Index of left-hand Fts5SegIter */ + int i2; /* Index of right-hand Fts5SegIter */ + int iRes; + Fts5SegIter *p1; /* Left-hand Fts5SegIter */ + Fts5SegIter *p2; /* Right-hand Fts5SegIter */ + Fts5CResult *pRes = &pIter->aFirst[iOut]; + + assert( iOut<pIter->nSeg && iOut>0 ); + assert( pIter->bRev==0 || pIter->bRev==1 ); + + if( iOut>=(pIter->nSeg/2) ){ + i1 = (iOut - pIter->nSeg/2) * 2; + i2 = i1 + 1; + }else{ + i1 = pIter->aFirst[iOut*2].iFirst; + i2 = pIter->aFirst[iOut*2+1].iFirst; + } + p1 = &pIter->aSeg[i1]; + p2 = &pIter->aSeg[i2]; + + pRes->bTermEq = 0; + if( p1->pLeaf==0 ){ /* If p1 is at EOF */ + iRes = i2; + }else if( p2->pLeaf==0 ){ /* If p2 is at EOF */ + iRes = i1; + }else{ + int res = fts5BufferCompare(&p1->term, &p2->term); + if( res==0 ){ + assert( i2>i1 ); + assert( i2!=0 ); + pRes->bTermEq = 1; + if( p1->iRowid==p2->iRowid ){ + p1->bDel = p2->bDel; + return i2; + } + res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1; + } + assert( res!=0 ); + if( res<0 ){ + iRes = i1; + }else{ + iRes = i2; + } + } + + pRes->iFirst = iRes; + return 0; +} + +/* +** Move the seg-iter so that it points to the first rowid on page iLeafPgno. +** It is an error if leaf iLeafPgno contains no rowid. +*/ +static void fts5SegIterGotoPage( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegIter *pIter, /* Iterator to advance */ + int iLeafPgno +){ + assert( iLeafPgno>pIter->iLeafPgno ); + if( p->rc==SQLITE_OK ){ + pIter->iLeafPgno = iLeafPgno-1; + fts5SegIterNextPage(p, pIter); + assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno ); + } + + if( p->rc==SQLITE_OK ){ + int iOff; + u8 *a = pIter->pLeaf->p; + int n = pIter->pLeaf->n; + + iOff = fts5GetU16(&a[0]); + if( iOff<4 || iOff>=n ){ + p->rc = FTS5_CORRUPT; + }else{ + iOff += getVarint(&a[iOff], (u64*)&pIter->iRowid); + pIter->iLeafOffset = iOff; + fts5SegIterLoadNPos(p, pIter); + } + } +} + +/* +** Advance the iterator passed as the second argument until it is at or +** past rowid iFrom. Regardless of the value of iFrom, the iterator is +** always advanced at least once. +*/ +static void fts5SegIterNextFrom( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegIter *pIter, /* Iterator to advance */ + i64 iMatch /* Advance iterator at least this far */ +){ + int bRev = (pIter->flags & FTS5_SEGITER_REVERSE); + Fts5DlidxIter *pDlidx = pIter->pDlidx; + int iLeafPgno = pIter->iLeafPgno; + int bMove = 1; + + assert( pIter->flags & FTS5_SEGITER_ONETERM ); + assert( pIter->pDlidx ); + assert( pIter->pLeaf ); + + if( bRev==0 ){ + while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch>pDlidx->iRowid ){ + iLeafPgno = pDlidx->iLeafPgno; + fts5DlidxIterNext(pDlidx); + } + assert( iLeafPgno>=pIter->iLeafPgno || p->rc ); + if( iLeafPgno>pIter->iLeafPgno ){ + fts5SegIterGotoPage(p, pIter, iLeafPgno); + bMove = 0; + } + }else{ + assert( iMatch<pIter->iRowid ); + while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch<pDlidx->iRowid ){ + fts5DlidxIterPrev(pDlidx); + } + iLeafPgno = pDlidx->iLeafPgno; + + assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno ); + + if( iLeafPgno<pIter->iLeafPgno ){ + pIter->iLeafPgno = iLeafPgno+1; + fts5SegIterReverseNewPage(p, pIter); + bMove = 0; + } + } + + while( 1 ){ + if( bMove ) fts5SegIterNext(p, pIter, 0); + if( pIter->pLeaf==0 ) break; + if( bRev==0 && pIter->iRowid>=iMatch ) break; + if( bRev!=0 && pIter->iRowid<=iMatch ) break; + bMove = 1; + } +} + + +/* +** Free the iterator object passed as the second argument. +*/ +static void fts5MultiIterFree(Fts5Index *p, Fts5MultiSegIter *pIter){ + if( pIter ){ + int i; + for(i=0; i<pIter->nSeg; i++){ + fts5SegIterClear(&pIter->aSeg[i]); + } + sqlite3_free(pIter); + } +} + +static void fts5MultiIterAdvanced( + Fts5Index *p, /* FTS5 backend to iterate within */ + Fts5MultiSegIter *pIter, /* Iterator to update aFirst[] array for */ + int iChanged, /* Index of sub-iterator just advanced */ + int iMinset /* Minimum entry in aFirst[] to set */ +){ + int i; + for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){ + int iEq; + if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){ + fts5SegIterNext(p, &pIter->aSeg[iEq], 0); + i = pIter->nSeg + iEq; + } + } +} + +static int fts5MultiIterAdvanceRowid( + Fts5Index *p, /* FTS5 backend to iterate within */ + Fts5MultiSegIter *pIter, /* Iterator to update aFirst[] array for */ + int iChanged /* Index of sub-iterator just advanced */ +){ + int i; + Fts5SegIter *pNew = &pIter->aSeg[iChanged]; + Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001]; + + for(i=(pIter->nSeg+iChanged)/2; p->rc==SQLITE_OK; i=i/2){ + Fts5CResult *pRes = &pIter->aFirst[i]; + + assert( pNew->pLeaf ); + assert( pRes->bTermEq==0 || pOther->pLeaf ); + + if( pRes->bTermEq ){ + if( pNew->iRowid==pOther->iRowid ){ + return 1; + }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){ + pNew = pOther; + } + } + pRes->iFirst = (pNew - pIter->aSeg); + if( i==1 ) break; + + pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ]; + } + + return 0; +} + +/* +** Move the iterator to the next entry. +** +** If an error occurs, an error code is left in Fts5Index.rc. It is not +** considered an error if the iterator reaches EOF, or if it is already at +** EOF when this function is called. +*/ +static void fts5MultiIterNext( + Fts5Index *p, + Fts5MultiSegIter *pIter, + int bFrom, /* True if argument iFrom is valid */ + i64 iFrom /* Advance at least as far as this */ +){ + if( p->rc==SQLITE_OK ){ + int bUseFrom = bFrom; + do { + int iFirst = pIter->aFirst[1].iFirst; + int bNewTerm = 0; + Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; + if( bUseFrom && pSeg->pDlidx ){ + fts5SegIterNextFrom(p, pSeg, iFrom); + }else{ + fts5SegIterNext(p, pSeg, &bNewTerm); + } + + if( pSeg->pLeaf==0 || bNewTerm + || fts5MultiIterAdvanceRowid(p, pIter, iFirst) + ){ + fts5MultiIterAdvanced(p, pIter, iFirst, 1); + } + fts5AssertMultiIterSetup(p, pIter); + + bUseFrom = 0; + }while( pIter->bSkipEmpty && fts5MultiIterIsEmpty(p, pIter) ); + } +} + +/* +** Allocate a new Fts5MultiSegIter object. +** +** The new object will be used to iterate through data in structure pStruct. +** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel +** is zero or greater, data from the first nSegment segments on level iLevel +** is merged. +** +** The iterator initially points to the first term/rowid entry in the +** iterated data. +*/ +static void fts5MultiIterNew( + Fts5Index *p, /* FTS5 backend to iterate within */ + Fts5Structure *pStruct, /* Structure of specific index */ + int iIdx, /* Config.aHash[] index of FTS index */ + int bSkipEmpty, /* True to ignore delete-keys */ + int flags, /* FTS5INDEX_QUERY_XXX flags */ + const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */ + int iLevel, /* Level to iterate (-1 for all) */ + int nSegment, /* Number of segments to merge (iLevel>=0) */ + Fts5MultiSegIter **ppOut /* New object */ +){ + int nSeg; /* Number of segments merged */ + int nSlot; /* Power of two >= nSeg */ + int iIter = 0; /* */ + int iSeg; /* Used to iterate through segments */ + Fts5StructureLevel *pLvl; + Fts5MultiSegIter *pNew; + + assert( (pTerm==0 && nTerm==0) || iLevel<0 ); + + /* Allocate space for the new multi-seg-iterator. */ + if( iLevel<0 ){ + nSeg = fts5StructureCountSegments(pStruct); + nSeg += (p->apHash ? 1 : 0); + }else{ + nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment); + } + for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2); + *ppOut = pNew = fts5IdxMalloc(p, + sizeof(Fts5MultiSegIter) + /* pNew */ + sizeof(Fts5SegIter) * nSlot + /* pNew->aSeg[] */ + sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */ + ); + if( pNew==0 ) return; + pNew->nSeg = nSlot; + pNew->aSeg = (Fts5SegIter*)&pNew[1]; + pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot]; + pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC)); + pNew->bSkipEmpty = bSkipEmpty; + + /* Initialize each of the component segment iterators. */ + if( iLevel<0 ){ + Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel]; + if( p->apHash ){ + /* Add a segment iterator for the current contents of the hash table. */ + Fts5SegIter *pIter = &pNew->aSeg[iIter++]; + fts5SegIterHashInit(p, iIdx, pTerm, nTerm, flags, pIter); + } + for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){ + for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){ + Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; + Fts5SegIter *pIter = &pNew->aSeg[iIter++]; + if( pTerm==0 ){ + fts5SegIterInit(p, iIdx, pSeg, pIter); + }else{ + fts5SegIterSeekInit(p, iIdx, pTerm, nTerm, flags, pSeg, pIter); + } + } + } + }else{ + pLvl = &pStruct->aLevel[iLevel]; + for(iSeg=nSeg-1; iSeg>=0; iSeg--){ + fts5SegIterInit(p, iIdx, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]); + } + } + assert( iIter==nSeg ); + + /* If the above was successful, each component iterators now points + ** to the first entry in its segment. In this case initialize the + ** aFirst[] array. Or, if an error has occurred, free the iterator + ** object and set the output variable to NULL. */ + if( p->rc==SQLITE_OK ){ + for(iIter=nSlot-1; iIter>0; iIter--){ + int iEq; + if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){ + fts5SegIterNext(p, &pNew->aSeg[iEq], 0); + fts5MultiIterAdvanced(p, pNew, iEq, iIter); + } + } + fts5AssertMultiIterSetup(p, pNew); + + if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){ + fts5MultiIterNext(p, pNew, 0, 0); + } + }else{ + fts5MultiIterFree(p, pNew); + *ppOut = 0; + } +} + +/* +** Return true if the iterator is at EOF or if an error has occurred. +** False otherwise. +*/ +static int fts5MultiIterEof(Fts5Index *p, Fts5MultiSegIter *pIter){ + return (p->rc || pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0); +} + +/* +** Return the rowid of the entry that the iterator currently points +** to. If the iterator points to EOF when this function is called the +** results are undefined. +*/ +static i64 fts5MultiIterRowid(Fts5MultiSegIter *pIter){ + assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf ); + return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid; +} + +/* +** Move the iterator to the next entry at or following iMatch. +*/ +static void fts5MultiIterNextFrom( + Fts5Index *p, + Fts5MultiSegIter *pIter, + i64 iMatch +){ + while( 1 ){ + i64 iRowid; + fts5MultiIterNext(p, pIter, 1, iMatch); + if( fts5MultiIterEof(p, pIter) ) break; + iRowid = fts5MultiIterRowid(pIter); + if( pIter->bRev==0 && iRowid>=iMatch ) break; + if( pIter->bRev!=0 && iRowid<=iMatch ) break; + } +} + +/* +** Return a pointer to a buffer containing the term associated with the +** entry that the iterator currently points to. +*/ +static const u8 *fts5MultiIterTerm(Fts5MultiSegIter *pIter, int *pn){ + Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; + *pn = p->term.n; + return p->term.p; +} + +/* +** Return true if the chunk iterator passed as the second argument is +** at EOF. Or if an error has already occurred. Otherwise, return false. +*/ +static int fts5ChunkIterEof(Fts5Index *p, Fts5ChunkIter *pIter){ + return (p->rc || pIter->pLeaf==0); +} + +/* +** Advance the chunk-iterator to the next chunk of data to read. +*/ +static void fts5ChunkIterNext(Fts5Index *p, Fts5ChunkIter *pIter){ + assert( pIter->nRem>=pIter->n ); + pIter->nRem -= pIter->n; + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = 0; + pIter->p = 0; + if( pIter->nRem>0 ){ + Fts5Data *pLeaf; + pIter->iLeafRowid++; + pLeaf = pIter->pLeaf = fts5DataRead(p, pIter->iLeafRowid); + if( pLeaf ){ + pIter->n = MIN(pIter->nRem, pLeaf->n-4); + pIter->p = pLeaf->p+4; + } + } +} + +/* +** Intialize the chunk iterator to read the position list data for which +** the size field is at offset iOff of leaf pLeaf. +*/ +static void fts5ChunkIterInit( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegIter *pSeg, /* Segment iterator to read poslist from */ + Fts5ChunkIter *pIter /* Initialize this object */ +){ + Fts5Data *pLeaf = pSeg->pLeaf; + int iOff = pSeg->iLeafOffset; + + memset(pIter, 0, sizeof(*pIter)); + /* If Fts5SegIter.pSeg is NULL, then this iterator iterates through data + ** currently stored in a hash table. In this case there is no leaf-rowid + ** to calculate. */ + if( pSeg->pSeg ){ + int iId = pSeg->pSeg->iSegid; + i64 rowid = FTS5_SEGMENT_ROWID(pSeg->iIdx, iId, 0, pSeg->iLeafPgno); + pIter->iLeafRowid = rowid; + } + + fts5DataReference(pLeaf); + pIter->pLeaf = pLeaf; + pIter->nRem = pSeg->nPos; + pIter->n = MIN(pLeaf->n - iOff, pIter->nRem); + pIter->p = pLeaf->p + iOff; + if( pIter->n==0 ){ + fts5ChunkIterNext(p, pIter); + } +} + +static void fts5ChunkIterRelease(Fts5ChunkIter *pIter){ + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = 0; +} + +/* +** Read and return the next 32-bit varint from the position-list iterator +** passed as the second argument. +** +** If an error occurs, zero is returned an an error code left in +** Fts5Index.rc. If an error has already occurred when this function is +** called, it is a no-op. +*/ +static int fts5PosIterReadVarint(Fts5Index *p, Fts5PosIter *pIter){ + int iVal = 0; + if( p->rc==SQLITE_OK ){ + if( pIter->iOff>=pIter->chunk.n ){ + fts5ChunkIterNext(p, &pIter->chunk); + if( fts5ChunkIterEof(p, &pIter->chunk) ) return 0; + pIter->iOff = 0; + } + pIter->iOff += fts5GetVarint32(&pIter->chunk.p[pIter->iOff], iVal); + } + return iVal; +} + +/* +** Advance the position list iterator to the next entry. +*/ +static void fts5PosIterNext(Fts5Index *p, Fts5PosIter *pIter){ + int iVal; + assert( fts5ChunkIterEof(p, &pIter->chunk)==0 ); + iVal = fts5PosIterReadVarint(p, pIter); + if( fts5ChunkIterEof(p, &pIter->chunk)==0 ){ + if( iVal==1 ){ + pIter->iCol = fts5PosIterReadVarint(p, pIter); + pIter->iPos = fts5PosIterReadVarint(p, pIter) - 2; + }else{ + pIter->iPos += (iVal - 2); + } + } +} + +/* +** Initialize the Fts5PosIter object passed as the final argument to iterate +** through the position-list associated with the index entry that iterator +** pMulti currently points to. +*/ +static void fts5PosIterInit( + Fts5Index *p, /* FTS5 backend object */ + Fts5MultiSegIter *pMulti, /* Multi-seg iterator to read pos-list from */ + Fts5PosIter *pIter /* Initialize this object */ +){ + if( p->rc==SQLITE_OK ){ + Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; + memset(pIter, 0, sizeof(*pIter)); + fts5ChunkIterInit(p, pSeg, &pIter->chunk); + if( fts5ChunkIterEof(p, &pIter->chunk)==0 ){ + fts5PosIterNext(p, pIter); + } + } +} + +/* +** Return true if the position iterator passed as the second argument is +** at EOF. Or if an error has already occurred. Otherwise, return false. +*/ +static int fts5PosIterEof(Fts5Index *p, Fts5PosIter *pIter){ + return (p->rc || pIter->chunk.pLeaf==0); +} + +/* +** Allocate a new segment-id for the structure pStruct. +** +** If an error has already occurred, this function is a no-op. 0 is +** returned in this case. +*/ +static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){ + int i; + if( p->rc!=SQLITE_OK ) return 0; + + for(i=0; i<100; i++){ + int iSegid; + sqlite3_randomness(sizeof(int), (void*)&iSegid); + iSegid = iSegid & ((1 << FTS5_DATA_ID_B)-1); + if( iSegid ){ + int iLvl, iSeg; + for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ + for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ + if( iSegid==pStruct->aLevel[iLvl].aSeg[iSeg].iSegid ){ + iSegid = 0; + } + } + } + } + if( iSegid ) return iSegid; + } + + p->rc = SQLITE_ERROR; + return 0; +} + +/* +** Discard all data currently cached in the hash-tables. +*/ +static void fts5IndexDiscardData(Fts5Index *p){ + assert( p->apHash || p->nPendingData==0 ); + if( p->apHash ){ + Fts5Config *pConfig = p->pConfig; + int i; + for(i=0; i<=pConfig->nPrefix; i++){ + if( p->apHash[i] ) sqlite3Fts5HashClear(p->apHash[i]); + } + p->nPendingData = 0; + } +} + +/* +** Return the size of the prefix, in bytes, that buffer (nNew/pNew) shares +** with buffer (nOld/pOld). +*/ +static int fts5PrefixCompress( + int nOld, const u8 *pOld, + int nNew, const u8 *pNew +){ + int i; + for(i=0; i<nNew && i<nOld; i++){ + if( pOld[i]!=pNew[i] ) break; + } + return i; +} + +/* +** If an "nEmpty" record must be written to the b-tree before the next +** term, write it now. +*/ +static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){ + if( pWriter->nEmpty ){ + int bFlag = 0; + Fts5PageWriter *pPg; + pPg = &pWriter->aWriter[1]; + if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE && pWriter->cdlidx.n ){ + i64 iKey = FTS5_DOCLIST_IDX_ROWID( + pWriter->iIdx, pWriter->iSegid, + pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty + ); + assert( pWriter->cdlidx.n>0 ); + fts5DataWrite(p, iKey, pWriter->cdlidx.p, pWriter->cdlidx.n); + bFlag = 1; + } + fts5BufferAppendVarint(&p->rc, &pPg->buf, bFlag); + fts5BufferAppendVarint(&p->rc, &pPg->buf, pWriter->nEmpty); + pWriter->nEmpty = 0; + } + + /* Whether or not it was written to disk, zero the doclist index at this + ** point */ + sqlite3Fts5BufferZero(&pWriter->cdlidx); + pWriter->bDlidxPrevValid = 0; +} + +static void fts5WriteBtreeGrow(Fts5Index *p, Fts5SegWriter *pWriter){ + if( p->rc==SQLITE_OK ){ + Fts5PageWriter *aNew; + Fts5PageWriter *pNew; + int nNew = sizeof(Fts5PageWriter) * (pWriter->nWriter+1); + + aNew = (Fts5PageWriter*)sqlite3_realloc(pWriter->aWriter, nNew); + if( aNew==0 ){ + p->rc = SQLITE_NOMEM; + return; + } + + pNew = &aNew[pWriter->nWriter]; + memset(pNew, 0, sizeof(Fts5PageWriter)); + pNew->pgno = 1; + fts5BufferAppendVarint(&p->rc, &pNew->buf, 1); + + pWriter->nWriter++; + pWriter->aWriter = aNew; + } +} + +/* +** This is called once for each leaf page except the first that contains +** at least one term. Argument (nTerm/pTerm) is the split-key - a term that +** is larger than all terms written to earlier leaves, and equal to or +** smaller than the first term on the new leaf. +** +** If an error occurs, an error code is left in Fts5Index.rc. If an error +** has already occurred when this function is called, it is a no-op. +*/ +static void fts5WriteBtreeTerm( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegWriter *pWriter, /* Writer object */ + int nTerm, const u8 *pTerm /* First term on new page */ +){ + int iHeight; + for(iHeight=1; 1; iHeight++){ + Fts5PageWriter *pPage; + + if( iHeight>=pWriter->nWriter ){ + fts5WriteBtreeGrow(p, pWriter); + if( p->rc ) return; + } + pPage = &pWriter->aWriter[iHeight]; + + fts5WriteBtreeNEmpty(p, pWriter); + + if( pPage->buf.n>=p->pConfig->pgsz ){ + /* pPage will be written to disk. The term will be written into the + ** parent of pPage. */ + i64 iRowid = FTS5_SEGMENT_ROWID( + pWriter->iIdx, pWriter->iSegid, iHeight, pPage->pgno + ); + fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); + fts5BufferZero(&pPage->buf); + fts5BufferZero(&pPage->term); + fts5BufferAppendVarint(&p->rc, &pPage->buf, pPage[-1].pgno); + pPage->pgno++; + }else{ + int nPre = fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm); + fts5BufferAppendVarint(&p->rc, &pPage->buf, nPre+2); + fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm-nPre); + fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm-nPre, pTerm+nPre); + fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm); + break; + } + } +} + +static void fts5WriteBtreeNoTerm( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegWriter *pWriter /* Writer object */ +){ + if( pWriter->bFirstRowidInPage ){ + /* No rowids on this page. Append an 0x00 byte to the current + ** doclist-index */ + if( pWriter->bDlidxPrevValid==0 ){ + i64 iRowid = pWriter->iPrevRowid; + sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iRowid); + pWriter->bDlidxPrevValid = 1; + pWriter->iDlidxPrev = iRowid; + } + sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, 0); + } + pWriter->nEmpty++; +} + +/* +** Rowid iRowid has just been appended to the current leaf page. As it is +** the first on its page, append an entry to the current doclist-index. +*/ +static void fts5WriteDlidxAppend( + Fts5Index *p, + Fts5SegWriter *pWriter, + i64 iRowid +){ + i64 iVal; + if( pWriter->bDlidxPrevValid ){ + iVal = iRowid - pWriter->iDlidxPrev; + }else{ + sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iRowid); + iVal = 1; + } + sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->cdlidx, iVal); + pWriter->bDlidxPrevValid = 1; + pWriter->iDlidxPrev = iRowid; +} + +static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ + static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; + Fts5PageWriter *pPage = &pWriter->aWriter[0]; + i64 iRowid; + + if( pWriter->bFirstTermInPage ){ + /* No term was written to this page. */ + assert( 0==fts5GetU16(&pPage->buf.p[2]) ); + fts5WriteBtreeNoTerm(p, pWriter); + } + + /* Write the current page to the db. */ + iRowid = FTS5_SEGMENT_ROWID(pWriter->iIdx, pWriter->iSegid, 0, pPage->pgno); + fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); + + /* Initialize the next page. */ + fts5BufferZero(&pPage->buf); + fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); + pPage->pgno++; + + /* Increase the leaves written counter */ + pWriter->nLeafWritten++; + + /* The new leaf holds no terms */ + pWriter->bFirstTermInPage = 1; +} + +/* +** Append term pTerm/nTerm to the segment being written by the writer passed +** as the second argument. +** +** If an error occurs, set the Fts5Index.rc error code. If an error has +** already occurred, this function is a no-op. +*/ +static void fts5WriteAppendTerm( + Fts5Index *p, + Fts5SegWriter *pWriter, + int nTerm, const u8 *pTerm +){ + int nPrefix; /* Bytes of prefix compression for term */ + Fts5PageWriter *pPage = &pWriter->aWriter[0]; + + assert( pPage==0 || pPage->buf.n==0 || pPage->buf.n>4 ); + if( pPage && pPage->buf.n==0 ){ + /* Zero the first term and first docid fields */ + static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; + fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); + assert( pWriter->bFirstTermInPage ); + } + if( p->rc ) return; + + if( pWriter->bFirstTermInPage ){ + /* Update the "first term" field of the page header. */ + assert( pPage->buf.p[2]==0 && pPage->buf.p[3]==0 ); + fts5PutU16(&pPage->buf.p[2], pPage->buf.n); + nPrefix = 0; + if( pPage->pgno!=1 ){ + /* This is the first term on a leaf that is not the leftmost leaf in + ** the segment b-tree. In this case it is necessary to add a term to + ** the b-tree hierarchy that is (a) larger than the largest term + ** already written to the segment and (b) smaller than or equal to + ** this term. In other words, a prefix of (pTerm/nTerm) that is one + ** byte longer than the longest prefix (pTerm/nTerm) shares with the + ** previous term. + ** + ** Usually, the previous term is available in pPage->term. The exception + ** is if this is the first term written in an incremental-merge step. + ** In this case the previous term is not available, so just write a + ** copy of (pTerm/nTerm) into the parent node. This is slightly + ** inefficient, but still correct. */ + int n = nTerm; + if( pPage->term.n ){ + n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm); + } + fts5WriteBtreeTerm(p, pWriter, n, pTerm); + pPage = &pWriter->aWriter[0]; + } + }else{ + nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm); + fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix); + } + + /* Append the number of bytes of new data, then the term data itself + ** to the page. */ + fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix); + fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]); + + /* Update the Fts5PageWriter.term field. */ + fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm); + pWriter->bFirstTermInPage = 0; + + pWriter->bFirstRowidInPage = 0; + pWriter->bFirstRowidInDoclist = 1; + + /* If the current leaf page is full, flush it to disk. */ + if( pPage->buf.n>=p->pConfig->pgsz ){ + fts5WriteFlushLeaf(p, pWriter); + pWriter->bFirstRowidInPage = 1; + } +} + +/* +** Append a docid and position-list size field to the writers output. +*/ +static void fts5WriteAppendRowid( + Fts5Index *p, + Fts5SegWriter *pWriter, + i64 iRowid, + int nPos +){ + if( p->rc==SQLITE_OK ){ + Fts5PageWriter *pPage = &pWriter->aWriter[0]; + + /* If this is to be the first docid written to the page, set the + ** docid-pointer in the page-header. Also append a value to the dlidx + ** buffer, in case a doclist-index is required. */ + if( pWriter->bFirstRowidInPage ){ + fts5PutU16(pPage->buf.p, pPage->buf.n); + fts5WriteDlidxAppend(p, pWriter, iRowid); + } + + /* Write the docid. */ + if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){ + fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid); + }else{ + assert( p->rc || iRowid>pWriter->iPrevRowid ); + fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid); + } + pWriter->iPrevRowid = iRowid; + pWriter->bFirstRowidInDoclist = 0; + pWriter->bFirstRowidInPage = 0; + + fts5BufferAppendVarint(&p->rc, &pPage->buf, nPos); + + if( pPage->buf.n>=p->pConfig->pgsz ){ + fts5WriteFlushLeaf(p, pWriter); + pWriter->bFirstRowidInPage = 1; + } + } +} + +static void fts5WriteAppendPoslistInt( + Fts5Index *p, + Fts5SegWriter *pWriter, + int iVal +){ + if( p->rc==SQLITE_OK ){ + Fts5PageWriter *pPage = &pWriter->aWriter[0]; + fts5BufferAppendVarint(&p->rc, &pPage->buf, iVal); + if( pPage->buf.n>=p->pConfig->pgsz ){ + fts5WriteFlushLeaf(p, pWriter); + pWriter->bFirstRowidInPage = 1; + } + } +} + +static void fts5WriteAppendPoslistData( + Fts5Index *p, + Fts5SegWriter *pWriter, + const u8 *aData, + int nData +){ + Fts5PageWriter *pPage = &pWriter->aWriter[0]; + const u8 *a = aData; + int n = nData; + + assert( p->pConfig->pgsz>0 ); + while( p->rc==SQLITE_OK && (pPage->buf.n + n)>=p->pConfig->pgsz ){ + int nReq = p->pConfig->pgsz - pPage->buf.n; + int nCopy = 0; + while( nCopy<nReq ){ + i64 dummy; + nCopy += getVarint(&a[nCopy], (u64*)&dummy); + } + fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a); + a += nCopy; + n -= nCopy; + fts5WriteFlushLeaf(p, pWriter); + pWriter->bFirstRowidInPage = 1; + } + if( n>0 ){ + fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a); + } +} + +static void fts5WriteAppendZerobyte(Fts5Index *p, Fts5SegWriter *pWriter){ + fts5BufferAppendVarint(&p->rc, &pWriter->aWriter[0].buf, 0); +} + +/* +** Flush any data cached by the writer object to the database. Free any +** allocations associated with the writer. +*/ +static void fts5WriteFinish( + Fts5Index *p, + Fts5SegWriter *pWriter, /* Writer object */ + int *pnHeight, /* OUT: Height of the b-tree */ + int *pnLeaf /* OUT: Number of leaf pages in b-tree */ +){ + int i; + if( p->rc==SQLITE_OK ){ + Fts5PageWriter *pLeaf = &pWriter->aWriter[0]; + if( pLeaf->pgno==1 && pLeaf->buf.n==0 ){ + *pnLeaf = 0; + *pnHeight = 0; + }else{ + if( pLeaf->buf.n>4 ){ + fts5WriteFlushLeaf(p, pWriter); + } + *pnLeaf = pLeaf->pgno-1; + if( pWriter->nWriter==1 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ + fts5WriteBtreeGrow(p, pWriter); + } + if( pWriter->nWriter>1 ){ + fts5WriteBtreeNEmpty(p, pWriter); + } + *pnHeight = pWriter->nWriter; + + for(i=1; i<pWriter->nWriter; i++){ + Fts5PageWriter *pPg = &pWriter->aWriter[i]; + fts5DataWrite(p, + FTS5_SEGMENT_ROWID(pWriter->iIdx, pWriter->iSegid, i, pPg->pgno), + pPg->buf.p, pPg->buf.n + ); + } + } + } + for(i=0; i<pWriter->nWriter; i++){ + Fts5PageWriter *pPg = &pWriter->aWriter[i]; + assert( pPg || p->rc!=SQLITE_OK ); + if( pPg ){ + fts5BufferFree(&pPg->term); + fts5BufferFree(&pPg->buf); + } + } + sqlite3_free(pWriter->aWriter); + sqlite3Fts5BufferFree(&pWriter->cdlidx); +} + +static void fts5WriteInit( + Fts5Index *p, + Fts5SegWriter *pWriter, + int iIdx, int iSegid +){ + memset(pWriter, 0, sizeof(Fts5SegWriter)); + pWriter->iIdx = iIdx; + pWriter->iSegid = iSegid; + + pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p,sizeof(Fts5PageWriter)); + if( pWriter->aWriter==0 ) return; + pWriter->nWriter = 1; + pWriter->aWriter[0].pgno = 1; + pWriter->bFirstTermInPage = 1; +} + +static void fts5WriteInitForAppend( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegWriter *pWriter, /* Writer to initialize */ + int iIdx, /* Index segment is a part of */ + Fts5StructureSegment *pSeg /* Segment object to append to */ +){ + int nByte = pSeg->nHeight * sizeof(Fts5PageWriter); + memset(pWriter, 0, sizeof(Fts5SegWriter)); + pWriter->iIdx = iIdx; + pWriter->iSegid = pSeg->iSegid; + pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, nByte); + pWriter->nWriter = pSeg->nHeight; + + if( p->rc==SQLITE_OK ){ + int pgno = 1; + int i; + pWriter->aWriter[0].pgno = pSeg->pgnoLast+1; + for(i=pSeg->nHeight-1; i>0; i--){ + i64 iRowid = FTS5_SEGMENT_ROWID(pWriter->iIdx, pWriter->iSegid, i, pgno); + Fts5PageWriter *pPg = &pWriter->aWriter[i]; + pPg->pgno = pgno; + fts5DataBuffer(p, &pPg->buf, iRowid); + if( p->rc==SQLITE_OK ){ + Fts5NodeIter ss; + fts5NodeIterInit(pPg->buf.p, pPg->buf.n, &ss); + while( ss.aData ) fts5NodeIterNext(&p->rc, &ss); + fts5BufferSet(&p->rc, &pPg->term, ss.term.n, ss.term.p); + pgno = ss.iChild; + fts5NodeIterFree(&ss); + } + } + if( pSeg->nHeight==1 ){ + pWriter->nEmpty = pSeg->pgnoLast-1; + } + assert( (pgno+pWriter->nEmpty)==pSeg->pgnoLast ); + pWriter->bFirstTermInPage = 1; + assert( pWriter->aWriter[0].term.n==0 ); + } +} + +/* +** Iterator pIter was used to iterate through the input segments of on an +** incremental merge operation. This function is called if the incremental +** merge step has finished but the input has not been completely exhausted. +*/ +static void fts5TrimSegments(Fts5Index *p, Fts5MultiSegIter *pIter){ + int i; + Fts5Buffer buf; + memset(&buf, 0, sizeof(Fts5Buffer)); + for(i=0; i<pIter->nSeg; i++){ + Fts5SegIter *pSeg = &pIter->aSeg[i]; + if( pSeg->pSeg==0 ){ + /* no-op */ + }else if( pSeg->pLeaf==0 ){ + /* All keys from this input segment have been transfered to the output. + ** Set both the first and last page-numbers to 0 to indicate that the + ** segment is now empty. */ + pSeg->pSeg->pgnoLast = 0; + pSeg->pSeg->pgnoFirst = 0; + }else{ + int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */ + i64 iLeafRowid; + Fts5Data *pData; + int iId = pSeg->pSeg->iSegid; + u8 aHdr[4] = {0x00, 0x00, 0x00, 0x04}; + + iLeafRowid = FTS5_SEGMENT_ROWID(pSeg->iIdx, iId, 0, pSeg->iTermLeafPgno); + pData = fts5DataRead(p, iLeafRowid); + if( pData ){ + fts5BufferZero(&buf); + fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr); + fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n); + fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p); + fts5BufferAppendBlob(&p->rc, &buf, pData->n - iOff, &pData->p[iOff]); + fts5DataRelease(pData); + pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno; + fts5DataDelete(p, FTS5_SEGMENT_ROWID(pSeg->iIdx, iId, 0, 1),iLeafRowid); + fts5DataWrite(p, iLeafRowid, buf.p, buf.n); + } + } + } + fts5BufferFree(&buf); +} + +/* +** +*/ +static void fts5IndexMergeLevel( + Fts5Index *p, /* FTS5 backend object */ + int iIdx, /* Index to work on */ + Fts5Structure **ppStruct, /* IN/OUT: Stucture of index iIdx */ + int iLvl, /* Level to read input from */ + int *pnRem /* Write up to this many output leaves */ +){ + Fts5Structure *pStruct = *ppStruct; + Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; + Fts5StructureLevel *pLvlOut; + Fts5MultiSegIter *pIter = 0; /* Iterator to read input data */ + int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */ + int nInput; /* Number of input segments */ + Fts5SegWriter writer; /* Writer object */ + Fts5StructureSegment *pSeg; /* Output segment */ + Fts5Buffer term; + int bRequireDoclistTerm = 0; /* Doclist terminator (0x00) required */ + int bOldest; /* True if the output segment is the oldest */ + + assert( iLvl<pStruct->nLevel ); + assert( pLvl->nMerge<=pLvl->nSeg ); + + memset(&writer, 0, sizeof(Fts5SegWriter)); + memset(&term, 0, sizeof(Fts5Buffer)); + writer.iIdx = iIdx; + if( pLvl->nMerge ){ + pLvlOut = &pStruct->aLevel[iLvl+1]; + assert( pLvlOut->nSeg>0 ); + nInput = pLvl->nMerge; + fts5WriteInitForAppend(p, &writer, iIdx, &pLvlOut->aSeg[pLvlOut->nSeg-1]); + pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1]; + }else{ + int iSegid = fts5AllocateSegid(p, pStruct); + + /* Extend the Fts5Structure object as required to ensure the output + ** segment exists. */ + if( iLvl==pStruct->nLevel-1 ){ + fts5StructureAddLevel(&p->rc, ppStruct); + pStruct = *ppStruct; + } + fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0); + if( p->rc ) return; + pLvl = &pStruct->aLevel[iLvl]; + pLvlOut = &pStruct->aLevel[iLvl+1]; + + fts5WriteInit(p, &writer, iIdx, iSegid); + + /* Add the new segment to the output level */ + if( iLvl+1==pStruct->nLevel ) pStruct->nLevel++; + pSeg = &pLvlOut->aSeg[pLvlOut->nSeg]; + pLvlOut->nSeg++; + pSeg->pgnoFirst = 1; + pSeg->iSegid = iSegid; + + /* Read input from all segments in the input level */ + nInput = pLvl->nSeg; + } + bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2); + +#if 0 +fprintf(stdout, "merging %d segments from level %d!", nInput, iLvl); +fflush(stdout); +#endif + + assert( iLvl>=0 ); + for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, 0, iLvl, nInput, &pIter); + fts5MultiIterEof(p, pIter)==0; + fts5MultiIterNext(p, pIter, 0, 0) + ){ + Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; + Fts5ChunkIter sPos; /* Used to iterate through position list */ + int nPos; /* position-list size field value */ + int nTerm; + const u8 *pTerm; + + /* Check for key annihilation. */ + if( pSeg->nPos==0 && (bOldest || pSeg->bDel==0) ) continue; + + fts5ChunkIterInit(p, pSeg, &sPos); + + pTerm = fts5MultiIterTerm(pIter, &nTerm); + if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){ + if( pnRem && writer.nLeafWritten>nRem ){ + fts5ChunkIterRelease(&sPos); + break; + } + + /* This is a new term. Append a term to the output segment. */ + if( bRequireDoclistTerm ){ + fts5WriteAppendZerobyte(p, &writer); + } + fts5WriteAppendTerm(p, &writer, nTerm, pTerm); + fts5BufferSet(&p->rc, &term, nTerm, pTerm); + bRequireDoclistTerm = 1; + } + + /* Append the rowid to the output */ + /* WRITEPOSLISTSIZE */ + nPos = pSeg->nPos*2 + pSeg->bDel; + fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter), nPos); + + for(/* noop */; !fts5ChunkIterEof(p, &sPos); fts5ChunkIterNext(p, &sPos)){ + fts5WriteAppendPoslistData(p, &writer, sPos.p, sPos.n); + } + + fts5ChunkIterRelease(&sPos); + } + + /* Flush the last leaf page to disk. Set the output segment b-tree height + ** and last leaf page number at the same time. */ + fts5WriteFinish(p, &writer, &pSeg->nHeight, &pSeg->pgnoLast); + + if( fts5MultiIterEof(p, pIter) ){ + int i; + + /* Remove the redundant segments from the %_data table */ + for(i=0; i<nInput; i++){ + fts5DataRemoveSegment(p, iIdx, pLvl->aSeg[i].iSegid); + } + + /* Remove the redundant segments from the input level */ + if( pLvl->nSeg!=nInput ){ + int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment); + memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove); + } + pLvl->nSeg -= nInput; + pLvl->nMerge = 0; + if( pSeg->pgnoLast==0 ){ + pLvlOut->nSeg--; + } + }else{ + assert( pSeg->nHeight>0 && pSeg->pgnoLast>0 ); + fts5TrimSegments(p, pIter); + pLvl->nMerge = nInput; + } + + fts5MultiIterFree(p, pIter); + fts5BufferFree(&term); + if( pnRem ) *pnRem -= writer.nLeafWritten; +} + +/* +** A total of nLeaf leaf pages of data has just been flushed to a level-0 +** segments in index iIdx with structure pStruct. This function updates the +** write-counter accordingly and, if necessary, performs incremental merge +** work. +** +** If an error occurs, set the Fts5Index.rc error code. If an error has +** already occurred, this function is a no-op. +*/ +static void fts5IndexWork( + Fts5Index *p, /* FTS5 backend object */ + int iIdx, /* Index to work on */ + Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ + int nLeaf /* Number of output leaves just written */ +){ + if( p->rc==SQLITE_OK ){ + Fts5Structure *pStruct = *ppStruct; + i64 nWrite; /* Initial value of write-counter */ + int nWork; /* Number of work-quanta to perform */ + int nRem; /* Number of leaf pages left to write */ + + /* Update the write-counter. While doing so, set nWork. */ + nWrite = pStruct->nWriteCounter; + nWork = ((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit); + pStruct->nWriteCounter += nLeaf; + nRem = p->nWorkUnit * nWork * pStruct->nLevel; + + while( nRem>0 ){ + int iLvl; /* To iterate through levels */ + int iBestLvl = 0; /* Level offering the most input segments */ + int nBest = 0; /* Number of input segments on best level */ + + /* Set iBestLvl to the level to read input segments from. */ + assert( pStruct->nLevel>0 ); + for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ + Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; + if( pLvl->nMerge ){ + if( pLvl->nMerge>nBest ){ + iBestLvl = iLvl; + nBest = pLvl->nMerge; + } + break; + } + if( pLvl->nSeg>nBest ){ + nBest = pLvl->nSeg; + iBestLvl = iLvl; + } + } + + /* If nBest is still 0, then the index must be empty. */ +#ifdef SQLITE_DEBUG + for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){ + assert( pStruct->aLevel[iLvl].nSeg==0 ); + } +#endif + + if( nBest<p->pConfig->nAutomerge + && pStruct->aLevel[iBestLvl].nMerge==0 + ){ + break; + } + fts5IndexMergeLevel(p, iIdx, &pStruct, iBestLvl, &nRem); + assert( nRem==0 || p->rc==SQLITE_OK ); + if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){ + fts5StructurePromote(p, iBestLvl+1, pStruct); + } + *ppStruct = pStruct; + } + + } +} + +static void fts5IndexCrisisMerge( + Fts5Index *p, /* FTS5 backend object */ + int iIdx, /* Index to work on */ + Fts5Structure **ppStruct /* IN/OUT: Current structure of index */ +){ + Fts5Structure *pStruct = *ppStruct; + int iLvl = 0; + while( p->rc==SQLITE_OK + && iLvl<pStruct->nLevel + && pStruct->aLevel[iLvl].nSeg>=p->pConfig->nCrisisMerge + ){ + fts5IndexMergeLevel(p, iIdx, &pStruct, iLvl, 0); + fts5StructurePromote(p, iLvl+1, pStruct); + iLvl++; + } + *ppStruct = pStruct; +} + +static int fts5IndexReturn(Fts5Index *p){ + int rc = p->rc; + p->rc = SQLITE_OK; + return rc; +} + +typedef struct Fts5FlushCtx Fts5FlushCtx; +struct Fts5FlushCtx { + Fts5Index *pIdx; + Fts5SegWriter writer; +}; + +/* +** Buffer aBuf[] contains a list of varints, all small enough to fit +** in a 32-bit integer. Return the size of the largest prefix of this +** list nMax bytes or less in size. +*/ +static int fts5PoslistPrefix(const u8 *aBuf, int nMax){ + int ret; + u32 dummy; + ret = fts5GetVarint32(aBuf, dummy); + while( 1 ){ + int i = fts5GetVarint32(&aBuf[ret], dummy); + if( (ret + i) > nMax ) break; + ret += i; + } + return ret; +} + +#define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \ + assert( pBuf->nSpace>=(pBuf->n+nBlob) ); \ + memcpy(&pBuf->p[pBuf->n], pBlob, nBlob); \ + pBuf->n += nBlob; \ +} + +/* +** Flush the contents of in-memory hash table iHash to a new level-0 +** segment on disk. Also update the corresponding structure record. +** +** If an error occurs, set the Fts5Index.rc error code. If an error has +** already occurred, this function is a no-op. +*/ +static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ + Fts5Hash *pHash = p->apHash[iHash]; + Fts5Structure *pStruct; + int iSegid; + int pgnoLast = 0; /* Last leaf page number in segment */ + + /* Obtain a reference to the index structure and allocate a new segment-id + ** for the new level-0 segment. */ + pStruct = fts5StructureRead(p, iHash); + iSegid = fts5AllocateSegid(p, pStruct); + + if( iSegid ){ + const int pgsz = p->pConfig->pgsz; + + Fts5StructureSegment *pSeg; /* New segment within pStruct */ + int nHeight; /* Height of new segment b-tree */ + Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */ + const u8 *zPrev = 0; + + Fts5SegWriter writer; + fts5WriteInit(p, &writer, iHash, iSegid); + + /* Pre-allocate the buffer used to assemble leaf pages to the target + ** page size. */ + assert( pgsz>0 ); + pBuf = &writer.aWriter[0].buf; + fts5BufferGrow(&p->rc, pBuf, pgsz + 20); + + /* Begin scanning through hash table entries. */ + if( p->rc==SQLITE_OK ){ + memset(pBuf->p, 0, 4); + pBuf->n = 4; + p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0); + } + + while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){ + const char *zTerm; + int nTerm; + const u8 *pDoclist; + int nDoclist; + int nSuffix; /* Size of term suffix */ + + sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist); + nTerm = strlen(zTerm); + + /* Decide if the term will fit on the current leaf. If it will not, + ** flush the leaf to disk here. */ + if( (pBuf->n + nTerm + 2) > pgsz ){ + fts5WriteFlushLeaf(p, &writer); + pBuf = &writer.aWriter[0].buf; + if( (nTerm + 32) > pBuf->nSpace ){ + fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n); + if( p->rc ) break; + } + } + + /* Write the term to the leaf. And push it up into the b-tree hierarchy */ + if( writer.bFirstTermInPage==0 ){ + int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm); + pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nPre); + nSuffix = nTerm - nPre; + }else{ + fts5PutU16(&pBuf->p[2], pBuf->n); + writer.bFirstTermInPage = 0; + if( writer.aWriter[0].pgno!=1 ){ + int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm); + fts5WriteBtreeTerm(p, &writer, nPre+1, (const u8*)zTerm); + pBuf = &writer.aWriter[0].buf; + assert( nPre<nTerm ); + } + nSuffix = nTerm; + } + pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nSuffix); + fts5BufferSafeAppendBlob(pBuf, (const u8*)&zTerm[nTerm-nSuffix], nSuffix); + + if( pgsz>=(pBuf->n + nDoclist + 1) ){ + /* The entire doclist will fit on the current leaf. */ + fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist); + }else{ + i64 iRowid = 0; + i64 iDelta = 0; + int iOff = 0; + int bFirstDocid = 0; + + /* The entire doclist will not fit on this leaf. The following + ** loop iterates through the poslists that make up the current + ** doclist. */ + while( iOff<nDoclist ){ + int nPos; + int nCopy; + int bDummy; + iOff += getVarint(&pDoclist[iOff], (u64*)&iDelta); + nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy); + nCopy += nPos; + iRowid += iDelta; + + if( bFirstDocid ){ + fts5PutU16(&pBuf->p[0], pBuf->n); /* first docid on page */ + pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iRowid); + bFirstDocid = 0; + fts5WriteDlidxAppend(p, &writer, iRowid); + }else{ + pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iDelta); + } + assert( pBuf->n<=pBuf->nSpace ); + + if( (pBuf->n + nCopy) <= pgsz ){ + /* The entire poslist will fit on the current leaf. So copy + ** it in one go. */ + fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy); + }else{ + /* The entire poslist will not fit on this leaf. So it needs + ** to be broken into sections. The only qualification being + ** that each varint must be stored contiguously. */ + const u8 *pPoslist = &pDoclist[iOff]; + int iPos = 0; + while( 1 ){ + int nSpace = pgsz - pBuf->n; + int n = 0; + if( (nCopy - iPos)<=nSpace ){ + n = nCopy - iPos; + }else{ + n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); + assert( n>=nSpace ); + } + assert( n>0 ); + fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n); + iPos += n; + if( pBuf->n>=pgsz ){ + fts5WriteFlushLeaf(p, &writer); + pBuf = &writer.aWriter[0].buf; + } + if( iPos>=nCopy ) break; + } + bFirstDocid = 1; + } + iOff += nCopy; + } + } + + pBuf->p[pBuf->n++] = '\0'; + assert( pBuf->n<=pBuf->nSpace ); + zPrev = (const u8*)zTerm; + sqlite3Fts5HashScanNext(pHash); + } + sqlite3Fts5HashClear(pHash); + fts5WriteFinish(p, &writer, &nHeight, &pgnoLast); + + /* Update the Fts5Structure. It is written back to the database by the + ** fts5StructureRelease() call below. */ + if( pStruct->nLevel==0 ){ + fts5StructureAddLevel(&p->rc, &pStruct); + } + fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0); + if( p->rc==SQLITE_OK ){ + pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ]; + pSeg->iSegid = iSegid; + pSeg->nHeight = nHeight; + pSeg->pgnoFirst = 1; + pSeg->pgnoLast = pgnoLast; + } + fts5StructurePromote(p, 0, pStruct); + } + + + if( p->pConfig->nAutomerge>0 ) fts5IndexWork(p, iHash, &pStruct, pgnoLast); + fts5IndexCrisisMerge(p, iHash, &pStruct); + fts5StructureWrite(p, iHash, pStruct); + fts5StructureRelease(pStruct); +} + +/* +** Flush any data stored in the in-memory hash tables to the database. +*/ +static void fts5IndexFlush(Fts5Index *p){ + Fts5Config *pConfig = p->pConfig; + int i; /* Used to iterate through indexes */ + int nLeaf = 0; /* Number of leaves written */ + + /* If an error has already occured this call is a no-op. */ + if( p->rc!=SQLITE_OK || p->nPendingData==0 ) return; + assert( p->apHash ); + + /* Flush the terms and each prefix index to disk */ + for(i=0; i<=pConfig->nPrefix; i++){ + fts5FlushOneHash(p, i, &nLeaf); + } + p->nPendingData = 0; +} + + +int sqlite3Fts5IndexOptimize(Fts5Index *p){ + Fts5Config *pConfig = p->pConfig; + int i; + + fts5IndexFlush(p); + for(i=0; i<=pConfig->nPrefix; i++){ + Fts5Structure *pStruct = fts5StructureRead(p, i); + Fts5Structure *pNew = 0; + int nSeg = 0; + if( pStruct ){ + nSeg = fts5StructureCountSegments(pStruct); + if( nSeg>1 ){ + int nByte = sizeof(Fts5Structure); + nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel); + pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte); + } + } + if( pNew ){ + Fts5StructureLevel *pLvl; + int nByte = nSeg * sizeof(Fts5StructureSegment); + pNew->nLevel = pStruct->nLevel+1; + pNew->nWriteCounter = pStruct->nWriteCounter; + pLvl = &pNew->aLevel[pStruct->nLevel]; + pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte); + if( pLvl->aSeg ){ + int iLvl, iSeg; + int iSegOut = 0; + for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ + for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ + pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg]; + iSegOut++; + } + } + pLvl->nSeg = nSeg; + }else{ + sqlite3_free(pNew); + pNew = 0; + } + } + + if( pNew ){ + int iLvl = pNew->nLevel-1; + while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){ + int nRem = FTS5_OPT_WORK_UNIT; + fts5IndexMergeLevel(p, i, &pNew, iLvl, &nRem); + } + + fts5StructureWrite(p, i, pNew); + fts5StructureRelease(pNew); + } + + fts5StructureRelease(pStruct); + } + + return fts5IndexReturn(p); +} + + + +/* +** Return a simple checksum value based on the arguments. +*/ +static u64 fts5IndexEntryCksum( + i64 iRowid, + int iCol, + int iPos, + const char *pTerm, + int nTerm +){ + int i; + u64 ret = iRowid; + ret += (ret<<3) + iCol; + ret += (ret<<3) + iPos; + for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i]; + return ret; +} + +static void fts5BtreeIterInit( + Fts5Index *p, + int iIdx, + Fts5StructureSegment *pSeg, + Fts5BtreeIter *pIter +){ + int nByte; + int i; + nByte = sizeof(pIter->aLvl[0]) * (pSeg->nHeight-1); + memset(pIter, 0, sizeof(*pIter)); + if( nByte ){ + pIter->aLvl = (Fts5BtreeIterLevel*)fts5IdxMalloc(p, nByte); + } + if( p->rc==SQLITE_OK ){ + pIter->nLvl = pSeg->nHeight-1; + pIter->iIdx = iIdx; + pIter->p = p; + pIter->pSeg = pSeg; + } + for(i=0; p->rc==SQLITE_OK && i<pIter->nLvl; i++){ + i64 iRowid = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, i+1, 1); + Fts5Data *pData; + pIter->aLvl[i].pData = pData = fts5DataRead(p, iRowid); + if( pData ){ + fts5NodeIterInit(pData->p, pData->n, &pIter->aLvl[i].s); + } + } + + if( pIter->nLvl==0 || p->rc ){ + pIter->bEof = 1; + pIter->iLeaf = pSeg->pgnoLast; + }else{ + pIter->nEmpty = pIter->aLvl[0].s.nEmpty; + pIter->iLeaf = pIter->aLvl[0].s.iChild; + pIter->bDlidx = pIter->aLvl[0].s.bDlidx; + } +} + +static void fts5BtreeIterNext(Fts5BtreeIter *pIter){ + Fts5Index *p = pIter->p; + int i; + + assert( pIter->bEof==0 && pIter->aLvl[0].s.aData ); + for(i=0; i<pIter->nLvl && p->rc==SQLITE_OK; i++){ + Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i]; + fts5NodeIterNext(&p->rc, &pLvl->s); + if( pLvl->s.aData ){ + fts5BufferSet(&p->rc, &pIter->term, pLvl->s.term.n, pLvl->s.term.p); + break; + }else{ + fts5NodeIterFree(&pLvl->s); + fts5DataRelease(pLvl->pData); + pLvl->pData = 0; + } + } + if( i==pIter->nLvl || p->rc ){ + pIter->bEof = 1; + }else{ + int iSegid = pIter->pSeg->iSegid; + for(i--; i>=0; i--){ + Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i]; + i64 iRowid = FTS5_SEGMENT_ROWID(pIter->iIdx,iSegid,i+1,pLvl[1].s.iChild); + pLvl->pData = fts5DataRead(p, iRowid); + if( pLvl->pData ){ + fts5NodeIterInit(pLvl->pData->p, pLvl->pData->n, &pLvl->s); + } + } + } + + pIter->nEmpty = pIter->aLvl[0].s.nEmpty; + pIter->bDlidx = pIter->aLvl[0].s.bDlidx; + pIter->iLeaf = pIter->aLvl[0].s.iChild; + assert( p->rc==SQLITE_OK || pIter->bEof ); +} + +static void fts5BtreeIterFree(Fts5BtreeIter *pIter){ + int i; + for(i=0; i<pIter->nLvl; i++){ + Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i]; + fts5NodeIterFree(&pLvl->s); + if( pLvl->pData ){ + fts5DataRelease(pLvl->pData); + pLvl->pData = 0; + } + } + sqlite3_free(pIter->aLvl); + fts5BufferFree(&pIter->term); +} + +/* +** This function is purely an internal test. It does not contribute to +** FTS functionality, or even the integrity-check, in any way. +** +** Instead, it tests that the same set of pgno/rowid combinations are +** visited regardless of whether the doclist-index identified by parameters +** iIdx/iSegid/iLeaf is iterated in forwards or reverse order. +*/ +#ifdef SQLITE_DEBUG +static void fts5DlidxIterTestReverse( + Fts5Index *p, + int iIdx, /* Index to load doclist-index from */ + int iSegid, /* Segment id to load from */ + int iLeaf /* Load doclist-index for this leaf */ +){ + Fts5DlidxIter *pDlidx = 0; + i64 cksum1 = 13; + i64 cksum2 = 13; + + for(pDlidx=fts5DlidxIterInit(p, 0, iIdx, iSegid, iLeaf); + fts5DlidxIterEof(p, pDlidx)==0; + fts5DlidxIterNext(pDlidx) + ){ + assert( pDlidx->iLeafPgno>iLeaf ); + cksum1 = (cksum1 ^ ( (i64)(pDlidx->iLeafPgno) << 32 )); + cksum1 = (cksum1 ^ pDlidx->iRowid); + } + fts5DlidxIterFree(pDlidx); + pDlidx = 0; + + for(pDlidx=fts5DlidxIterInit(p, 1, iIdx, iSegid, iLeaf); + fts5DlidxIterEof(p, pDlidx)==0; + fts5DlidxIterPrev(pDlidx) + ){ + assert( pDlidx->iLeafPgno>iLeaf ); + cksum2 = (cksum2 ^ ( (i64)(pDlidx->iLeafPgno) << 32 )); + cksum2 = (cksum2 ^ pDlidx->iRowid); + } + fts5DlidxIterFree(pDlidx); + pDlidx = 0; + + if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT; +} +#else +# define fts5DlidxIterTestReverse(w,x,y,z) +#endif + +static void fts5IndexIntegrityCheckSegment( + Fts5Index *p, /* FTS5 backend object */ + int iIdx, /* Index that pSeg is a part of */ + Fts5StructureSegment *pSeg /* Segment to check internal consistency */ +){ + Fts5BtreeIter iter; /* Used to iterate through b-tree hierarchy */ + + /* Iterate through the b-tree hierarchy. */ + for(fts5BtreeIterInit(p, iIdx, pSeg, &iter); + iter.bEof==0; + fts5BtreeIterNext(&iter) + ){ + i64 iRow; /* Rowid for this leaf */ + Fts5Data *pLeaf; /* Data for this leaf */ + int iOff; /* Offset of first term on leaf */ + int i; /* Used to iterate through empty leaves */ + + /* If the leaf in question has already been trimmed from the segment, + ** ignore this b-tree entry. Otherwise, load it into memory. */ + if( iter.iLeaf<pSeg->pgnoFirst ) continue; + iRow = FTS5_SEGMENT_ROWID(iIdx, pSeg->iSegid, 0, iter.iLeaf); + pLeaf = fts5DataRead(p, iRow); + if( pLeaf==0 ) break; + + /* Check that the leaf contains at least one term, and that it is equal + ** to or larger than the split-key in iter.term. */ + iOff = fts5GetU16(&pLeaf->p[2]); + if( iOff==0 ){ + p->rc = FTS5_CORRUPT; + }else{ + int nTerm; /* Size of term on leaf in bytes */ + int res; /* Comparison of term and split-key */ + iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm); + res = memcmp(&pLeaf->p[iOff], iter.term.p, MIN(nTerm, iter.term.n)); + if( res==0 ) res = nTerm - iter.term.n; + if( res<0 ){ + p->rc = FTS5_CORRUPT; + } + } + fts5DataRelease(pLeaf); + if( p->rc ) break; + + /* Now check that the iter.nEmpty leaves following the current leaf + ** (a) exist and (b) contain no terms. */ + for(i=1; p->rc==SQLITE_OK && i<=iter.nEmpty; i++){ + pLeaf = fts5DataRead(p, iRow+i); + if( pLeaf && 0!=fts5GetU16(&pLeaf->p[2]) ){ + p->rc = FTS5_CORRUPT; + } + fts5DataRelease(pLeaf); + } + + /* If there is a doclist-index, check that it looks right. */ + if( iter.bDlidx ){ + Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */ + int iPrevLeaf = iter.iLeaf; + int iSegid = pSeg->iSegid; + int iPg; + i64 iKey; + + for(pDlidx=fts5DlidxIterInit(p, 0, iIdx, iSegid, iter.iLeaf); + fts5DlidxIterEof(p, pDlidx)==0; + fts5DlidxIterNext(pDlidx) + ){ + + /* Check any rowid-less pages that occur before the current leaf. */ + for(iPg=iPrevLeaf+1; iPg<pDlidx->iLeafPgno; iPg++){ + iKey = FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, iPg); + pLeaf = fts5DataRead(p, iKey); + if( pLeaf ){ + if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT; + fts5DataRelease(pLeaf); + } + } + iPrevLeaf = pDlidx->iLeafPgno; + + /* Check that the leaf page indicated by the iterator really does + ** contain the rowid suggested by the same. */ + iKey = FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, pDlidx->iLeafPgno); + pLeaf = fts5DataRead(p, iKey); + if( pLeaf ){ + i64 iRowid; + int iRowidOff = fts5GetU16(&pLeaf->p[0]); + getVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); + if( iRowid!=pDlidx->iRowid ) p->rc = FTS5_CORRUPT; + fts5DataRelease(pLeaf); + } + + } + + for(iPg=iPrevLeaf+1; iPg<=(iter.iLeaf + iter.nEmpty); iPg++){ + iKey = FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, iPg); + pLeaf = fts5DataRead(p, iKey); + if( pLeaf ){ + if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT; + fts5DataRelease(pLeaf); + } + } + + fts5DlidxIterFree(pDlidx); + fts5DlidxIterTestReverse(p, iIdx, iSegid, iter.iLeaf); + } + } + + /* Either iter.iLeaf must be the rightmost leaf-page in the segment, or + ** else the segment has been completely emptied by an ongoing merge + ** operation. */ + if( p->rc==SQLITE_OK + && iter.iLeaf!=pSeg->pgnoLast + && (pSeg->pgnoFirst || pSeg->pgnoLast) + ){ + p->rc = FTS5_CORRUPT; + } + + fts5BtreeIterFree(&iter); +} + +/* +** Iterator pMulti currently points to a valid entry (not EOF). This +** function appends a copy of the position-list of the entry pMulti +** currently points to to buffer pBuf. +** +** If an error occurs, an error code is left in p->rc. It is assumed +** no error has already occurred when this function is called. +*/ +static void fts5MultiIterPoslist( + Fts5Index *p, + Fts5MultiSegIter *pMulti, + int bSz, + Fts5Buffer *pBuf +){ + if( p->rc==SQLITE_OK ){ + Fts5ChunkIter iter; + Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; + assert( fts5MultiIterEof(p, pMulti)==0 ); + fts5ChunkIterInit(p, pSeg, &iter); + if( fts5ChunkIterEof(p, &iter)==0 ){ + if( bSz ){ + /* WRITEPOSLISTSIZE */ + fts5BufferAppendVarint(&p->rc, pBuf, iter.nRem * 2); + } + while( fts5ChunkIterEof(p, &iter)==0 ){ + fts5BufferAppendBlob(&p->rc, pBuf, iter.n, iter.p); + fts5ChunkIterNext(p, &iter); + } + } + fts5ChunkIterRelease(&iter); + } +} + +static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ + if( pIter->i<pIter->n ){ + int bDummy; + if( pIter->i ){ + i64 iDelta; + pIter->i += getVarint(&pIter->a[pIter->i], (u64*)&iDelta); + if( pIter->bDesc ){ + pIter->iRowid -= iDelta; + }else{ + pIter->iRowid += iDelta; + } + }else{ + pIter->i += getVarint(&pIter->a[pIter->i], (u64*)&pIter->iRowid); + } + pIter->i += fts5GetPoslistSize( + &pIter->a[pIter->i], &pIter->nPoslist, &bDummy + ); + pIter->aPoslist = &pIter->a[pIter->i]; + pIter->i += pIter->nPoslist; + }else{ + pIter->aPoslist = 0; + } +} + +static void fts5DoclistIterInit( + Fts5Buffer *pBuf, + int bDesc, + Fts5DoclistIter *pIter +){ + memset(pIter, 0, sizeof(*pIter)); + pIter->a = pBuf->p; + pIter->n = pBuf->n; + pIter->bDesc = bDesc; + fts5DoclistIterNext(pIter); +} + +/* +** Append a doclist to buffer pBuf. +*/ +static void fts5MergeAppendDocid( + int *pRc, /* IN/OUT: Error code */ + int bDesc, + Fts5Buffer *pBuf, /* Buffer to write to */ + i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */ + i64 iRowid /* Rowid to append */ +){ + if( pBuf->n==0 ){ + fts5BufferAppendVarint(pRc, pBuf, iRowid); + }else if( bDesc ){ + fts5BufferAppendVarint(pRc, pBuf, *piLastRowid - iRowid); + }else{ + fts5BufferAppendVarint(pRc, pBuf, iRowid - *piLastRowid); + } + *piLastRowid = iRowid; +} + +/* +** Buffers p1 and p2 contain doclists. This function merges the content +** of the two doclists together and sets buffer p1 to the result before +** returning. +** +** If an error occurs, an error code is left in p->rc. If an error has +** already occurred, this function is a no-op. +*/ +static void fts5MergePrefixLists( + Fts5Index *p, /* FTS5 backend object */ + int bDesc, + Fts5Buffer *p1, /* First list to merge */ + Fts5Buffer *p2 /* Second list to merge */ +){ + if( p2->n ){ + i64 iLastRowid = 0; + Fts5DoclistIter i1; + Fts5DoclistIter i2; + Fts5Buffer out; + Fts5Buffer tmp; + memset(&out, 0, sizeof(out)); + memset(&tmp, 0, sizeof(tmp)); + + fts5DoclistIterInit(p1, bDesc, &i1); + fts5DoclistIterInit(p2, bDesc, &i2); + while( p->rc==SQLITE_OK && (i1.aPoslist!=0 || i2.aPoslist!=0) ){ + if( i2.aPoslist==0 || (i1.aPoslist && + ( (bDesc && i1.iRowid>i2.iRowid) || (!bDesc && i1.iRowid<i2.iRowid) ) + )){ + /* Copy entry from i1 */ + fts5MergeAppendDocid(&p->rc, bDesc, &out, &iLastRowid, i1.iRowid); + /* WRITEPOSLISTSIZE */ + fts5BufferAppendVarint(&p->rc, &out, i1.nPoslist * 2); + fts5BufferAppendBlob(&p->rc, &out, i1.nPoslist, i1.aPoslist); + fts5DoclistIterNext(&i1); + } + else if( i1.aPoslist==0 || i2.iRowid!=i1.iRowid ){ + /* Copy entry from i2 */ + fts5MergeAppendDocid(&p->rc, bDesc, &out, &iLastRowid, i2.iRowid); + /* WRITEPOSLISTSIZE */ + fts5BufferAppendVarint(&p->rc, &out, i2.nPoslist * 2); + fts5BufferAppendBlob(&p->rc, &out, i2.nPoslist, i2.aPoslist); + fts5DoclistIterNext(&i2); + } + else{ + Fts5PoslistReader r1; + Fts5PoslistReader r2; + Fts5PoslistWriter writer; + + memset(&writer, 0, sizeof(writer)); + + /* Merge the two position lists. */ + fts5MergeAppendDocid(&p->rc, bDesc, &out, &iLastRowid, i2.iRowid); + fts5BufferZero(&tmp); + sqlite3Fts5PoslistReaderInit(-1, i1.aPoslist, i1.nPoslist, &r1); + sqlite3Fts5PoslistReaderInit(-1, i2.aPoslist, i2.nPoslist, &r2); + while( p->rc==SQLITE_OK && (r1.bEof==0 || r2.bEof==0) ){ + i64 iNew; + if( r2.bEof || (r1.bEof==0 && r1.iPos<r2.iPos) ){ + iNew = r1.iPos; + sqlite3Fts5PoslistReaderNext(&r1); + }else{ + iNew = r2.iPos; + sqlite3Fts5PoslistReaderNext(&r2); + if( r1.iPos==r2.iPos ) sqlite3Fts5PoslistReaderNext(&r1); + } + p->rc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew); + } + + /* WRITEPOSLISTSIZE */ + fts5BufferAppendVarint(&p->rc, &out, tmp.n * 2); + fts5BufferAppendBlob(&p->rc, &out, tmp.n, tmp.p); + fts5DoclistIterNext(&i1); + fts5DoclistIterNext(&i2); + } + } + + fts5BufferSet(&p->rc, p1, out.n, out.p); + fts5BufferFree(&tmp); + fts5BufferFree(&out); + } +} + +static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){ + Fts5Buffer tmp = *p1; + *p1 = *p2; + *p2 = tmp; +} + +static void fts5SetupPrefixIter( + Fts5Index *p, /* Index to read from */ + int bDesc, /* True for "ORDER BY rowid DESC" */ + const u8 *pToken, /* Buffer containing prefix to match */ + int nToken, /* Size of buffer pToken in bytes */ + Fts5IndexIter *pIter /* Populate this object */ +){ + Fts5Structure *pStruct; + Fts5Buffer *aBuf; + const int nBuf = 32; + + aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); + pStruct = fts5StructureRead(p, 0); + + if( aBuf && pStruct ){ + Fts5DoclistIter *pDoclist; + int i; + i64 iLastRowid = 0; + Fts5MultiSegIter *p1 = 0; /* Iterator used to gather data from index */ + Fts5Buffer doclist; + + memset(&doclist, 0, sizeof(doclist)); + for(fts5MultiIterNew(p, pStruct, 0, 1, 1, pToken, nToken, -1, 0, &p1); + fts5MultiIterEof(p, p1)==0; + fts5MultiIterNext(p, p1, 0, 0) + ){ + i64 iRowid = fts5MultiIterRowid(p1); + int nTerm; + const u8 *pTerm = fts5MultiIterTerm(p1, &nTerm); + assert( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); + if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break; + + if( doclist.n>0 + && ((!bDesc && iRowid<=iLastRowid) || (bDesc && iRowid>=iLastRowid)) + ){ + + for(i=0; p->rc==SQLITE_OK && doclist.n; i++){ + assert( i<nBuf ); + if( aBuf[i].n==0 ){ + fts5BufferSwap(&doclist, &aBuf[i]); + fts5BufferZero(&doclist); + }else{ + fts5MergePrefixLists(p, bDesc, &doclist, &aBuf[i]); + fts5BufferZero(&aBuf[i]); + } + } + } + if( doclist.n==0 ){ + fts5BufferAppendVarint(&p->rc, &doclist, iRowid); + }else if( bDesc ){ + fts5BufferAppendVarint(&p->rc, &doclist, iLastRowid - iRowid); + }else{ + fts5BufferAppendVarint(&p->rc, &doclist, iRowid - iLastRowid); + } + iLastRowid = iRowid; + fts5MultiIterPoslist(p, p1, 1, &doclist); + } + + for(i=0; i<nBuf; i++){ + fts5MergePrefixLists(p, bDesc, &doclist, &aBuf[i]); + fts5BufferFree(&aBuf[i]); + } + fts5MultiIterFree(p, p1); + + pDoclist = (Fts5DoclistIter*)fts5IdxMalloc(p, sizeof(Fts5DoclistIter)); + if( !pDoclist ){ + fts5BufferFree(&doclist); + }else{ + pIter->pDoclist = pDoclist; + fts5DoclistIterInit(&doclist, bDesc, pIter->pDoclist); + } + } + + fts5StructureRelease(pStruct); + sqlite3_free(aBuf); +} + +static int fts5QueryCksum( + Fts5Index *p, + const char *z, + int n, + int flags, + u64 *pCksum +){ + u64 cksum = *pCksum; + Fts5IndexIter *pIdxIter = 0; + int rc = sqlite3Fts5IndexQuery(p, z, n, flags, &pIdxIter); + + while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){ + const u8 *pPos; + int nPos; + i64 rowid = sqlite3Fts5IterRowid(pIdxIter); + rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos); + if( rc==SQLITE_OK ){ + Fts5PoslistReader sReader; + for(sqlite3Fts5PoslistReaderInit(-1, pPos, nPos, &sReader); + sReader.bEof==0; + sqlite3Fts5PoslistReaderNext(&sReader) + ){ + int iCol = FTS5_POS2COLUMN(sReader.iPos); + int iOff = FTS5_POS2OFFSET(sReader.iPos); + cksum ^= fts5IndexEntryCksum(rowid, iCol, iOff, z, n); + } + rc = sqlite3Fts5IterNext(pIdxIter); + } + } + sqlite3Fts5IterClose(pIdxIter); + + *pCksum = cksum; + return rc; +} + +/* +** Run internal checks to ensure that the FTS index (a) is internally +** consistent and (b) contains entries for which the XOR of the checksums +** as calculated by fts5IndexEntryCksum() is cksum. +** +** Return SQLITE_CORRUPT if any of the internal checks fail, or if the +** checksum does not match. Return SQLITE_OK if all checks pass without +** error, or some other SQLite error code if another error (e.g. OOM) +** occurs. +*/ +int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ + Fts5Config *pConfig = p->pConfig; + int iIdx; /* Used to iterate through indexes */ + u64 cksum2 = 0; /* Checksum based on contents of indexes */ + u64 cksum3 = 0; /* Checksum based on contents of indexes */ + Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */ + + /* Check that the internal nodes of each segment match the leaves */ + for(iIdx=0; p->rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){ + Fts5Structure *pStruct = fts5StructureRead(p, iIdx); + if( pStruct ){ + int iLvl, iSeg; + for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ + for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ + Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; + fts5IndexIntegrityCheckSegment(p, iIdx, pSeg); + } + } + } + fts5StructureRelease(pStruct); + } + + /* The cksum argument passed to this function is a checksum calculated + ** based on all expected entries in the FTS index (including prefix index + ** entries). This block checks that a checksum calculated based on the + ** actual contents of FTS index is identical. + ** + ** Two versions of the same checksum are calculated. The first (stack + ** variable cksum2) based on entries extracted from the full-text index + ** while doing a linear scan of each individual index in turn. + ** + ** As each term visited by the linear scans, a separate query for the + ** same term is performed. cksum3 is calculated based on the entries + ** extracted by these queries. + */ + for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){ + Fts5MultiSegIter *pIter; + Fts5Structure *pStruct = fts5StructureRead(p, iIdx); + for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, 0, -1, 0, &pIter); + fts5MultiIterEof(p, pIter)==0; + fts5MultiIterNext(p, pIter, 0, 0) + ){ + Fts5PosIter sPos; /* Used to iterate through position list */ + int n; /* Size of term in bytes */ + i64 iRowid = fts5MultiIterRowid(pIter); + char *z = (char*)fts5MultiIterTerm(pIter, &n); + + /* Update cksum2 with the entries associated with the current term + ** and rowid. */ + for(fts5PosIterInit(p, pIter, &sPos); + fts5PosIterEof(p, &sPos)==0; + fts5PosIterNext(p, &sPos) + ){ + cksum2 ^= fts5IndexEntryCksum(iRowid, sPos.iCol, sPos.iPos, z, n); + } + + /* If this is a new term, query for it. Update cksum3 with the results. */ + if( p->rc==SQLITE_OK && (term.n!=n || memcmp(term.p, z, n)) ){ + int rc; + int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX); + u64 ck1 = 0; + u64 ck2 = 0; + + /* Check that the results returned for ASC and DESC queries are + ** the same. If not, call this corruption. */ + rc = fts5QueryCksum(p, z, n, flags, &ck1); + if( rc==SQLITE_OK ){ + rc = fts5QueryCksum(p, z, n, flags|FTS5INDEX_QUERY_DESC, &ck2); + } + if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; + + /* If this is a prefix query, check that the results returned if the + ** the index is disabled are the same. In both ASC and DESC order. */ + if( iIdx>0 && rc==SQLITE_OK ){ + int f = flags|FTS5INDEX_QUERY_TEST_NOIDX; + ck2 = 0; + rc = fts5QueryCksum(p, z, n, f, &ck2); + if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; + } + if( iIdx>0 && rc==SQLITE_OK ){ + int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC; + ck2 = 0; + rc = fts5QueryCksum(p, z, n, f, &ck2); + if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; + } + + cksum3 ^= ck1; + fts5BufferSet(&rc, &term, n, (const u8*)z); + p->rc = rc; + } + } + fts5MultiIterFree(p, pIter); + fts5StructureRelease(pStruct); + } + if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT; + if( p->rc==SQLITE_OK && cksum!=cksum3 ) p->rc = FTS5_CORRUPT; + + fts5BufferFree(&term); + return fts5IndexReturn(p); +} + + +/* +** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain +** to the document with rowid iRowid. +*/ +int sqlite3Fts5IndexBeginWrite(Fts5Index *p, i64 iRowid){ + assert( p->rc==SQLITE_OK ); + + /* Allocate hash tables if they have not already been allocated */ + if( p->apHash==0 ){ + int i; + int rc = SQLITE_OK; + int nHash = p->pConfig->nPrefix + 1; + Fts5Hash **apNew; + + apNew = (Fts5Hash**)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Hash*)*nHash); + for(i=0; rc==SQLITE_OK && i<nHash; i++){ + rc = sqlite3Fts5HashNew(&apNew[i], &p->nPendingData); + } + if( rc==SQLITE_OK ){ + p->apHash = apNew; + }else{ + if( apNew ){ + for(i=0; i<nHash; i++){ + sqlite3Fts5HashFree(apNew[i]); + } + sqlite3_free(apNew); + } + return rc; + } + } + + if( iRowid<=p->iWriteRowid || (p->nPendingData > p->nMaxPendingData) ){ + fts5IndexFlush(p); + } + p->iWriteRowid = iRowid; + return fts5IndexReturn(p); +} + +/* +** Commit data to disk. +*/ +int sqlite3Fts5IndexSync(Fts5Index *p, int bCommit){ + assert( p->rc==SQLITE_OK ); + fts5IndexFlush(p); + if( bCommit ) fts5CloseReader(p); + return fts5IndexReturn(p); +} + +/* +** Discard any data stored in the in-memory hash tables. Do not write it +** to the database. Additionally, assume that the contents of the %_data +** table may have changed on disk. So any in-memory caches of %_data +** records must be invalidated. +*/ +int sqlite3Fts5IndexRollback(Fts5Index *p){ + fts5CloseReader(p); + fts5IndexDiscardData(p); + assert( p->rc==SQLITE_OK ); + return SQLITE_OK; +} + +/* +** The %_data table is completely empty when this function is called. This +** function populates it with the initial structure objects for each index, +** and the initial version of the "averages" record (a zero-byte blob). +*/ +int sqlite3Fts5IndexReinit(Fts5Index *p){ + int i; + Fts5Structure s; + + memset(&s, 0, sizeof(Fts5Structure)); + for(i=0; i<p->pConfig->nPrefix+1; i++){ + fts5StructureWrite(p, i, &s); + } + if( p->rc==SQLITE_OK ){ + p->rc = sqlite3Fts5IndexSetAverages(p, (const u8*)"", 0); + } + + return fts5IndexReturn(p); +} + +/* +** Open a new Fts5Index handle. If the bCreate argument is true, create +** and initialize the underlying %_data table. +** +** If successful, set *pp to point to the new object and return SQLITE_OK. +** Otherwise, set *pp to NULL and return an SQLite error code. +*/ +int sqlite3Fts5IndexOpen( + Fts5Config *pConfig, + int bCreate, + Fts5Index **pp, + char **pzErr +){ + int rc = SQLITE_OK; + Fts5Index *p; /* New object */ + + *pp = p = (Fts5Index*)sqlite3_malloc(sizeof(Fts5Index)); + if( !p ) return SQLITE_NOMEM; + + memset(p, 0, sizeof(Fts5Index)); + p->pConfig = pConfig; + p->nWorkUnit = FTS5_WORK_UNIT; + p->nMaxPendingData = 1024*1024; + p->zDataTbl = sqlite3_mprintf("%s_data", pConfig->zName); + if( p->zDataTbl==0 ){ + rc = SQLITE_NOMEM; + }else if( bCreate ){ + rc = sqlite3Fts5CreateTable( + pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr + ); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5IndexReinit(p); + } + } + + assert( p->rc==SQLITE_OK || rc!=SQLITE_OK ); + if( rc ){ + sqlite3Fts5IndexClose(p, 0); + *pp = 0; + } + return rc; +} + +/* +** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen(). +*/ +int sqlite3Fts5IndexClose(Fts5Index *p, int bDestroy){ + int rc = SQLITE_OK; + if( p ){ + if( bDestroy ){ + rc = sqlite3Fts5DropTable(p->pConfig, "data"); + } + assert( p->pReader==0 ); + sqlite3_finalize(p->pWriter); + sqlite3_finalize(p->pDeleter); + if( p->apHash ){ + int i; + for(i=0; i<=p->pConfig->nPrefix; i++){ + sqlite3Fts5HashFree(p->apHash[i]); + } + sqlite3_free(p->apHash); + } + sqlite3_free(p->zDataTbl); + sqlite3_free(p); + } + return rc; +} + +/* +** Argument p points to a buffer containing utf-8 text that is n bytes in +** size. Return the number of bytes in the nChar character prefix of the +** buffer, or 0 if there are less than nChar characters in total. +*/ +static int fts5IndexCharlenToBytelen(const char *p, int nByte, int nChar){ + int n = 0; + int i; + for(i=0; i<nChar; i++){ + if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */ + if( (unsigned char)p[n++]>=0xc0 ){ + while( (p[n] & 0xc0)==0x80 ) n++; + } + } + return n; +} + +/* +** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of +** unicode characters in the string. +*/ +int fts5IndexCharlen(const char *pIn, int nIn){ + int nChar = 0; + int i = 0; + while( i<nIn ){ + if( (unsigned char)pIn[i++]>=0xc0 ){ + while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++; + } + nChar++; + } + return nChar; +} + +/* +** Calculate and return a checksum that is the XOR of the index entry +** checksum of all entries that would be generated by the token specified +** by the final 5 arguments. +*/ +u64 sqlite3Fts5IndexCksum( + Fts5Config *pConfig, /* Configuration object */ + i64 iRowid, /* Document term appears in */ + int iCol, /* Column term appears in */ + int iPos, /* Position term appears in */ + const char *pTerm, int nTerm /* Term at iPos */ +){ + u64 ret = 0; /* Return value */ + int iIdx; /* For iterating through indexes */ + + ret = fts5IndexEntryCksum(iRowid, iCol, iPos, pTerm, nTerm); + + for(iIdx=0; iIdx<pConfig->nPrefix; iIdx++){ + int nByte = fts5IndexCharlenToBytelen(pTerm, nTerm, pConfig->aPrefix[iIdx]); + if( nByte ){ + ret ^= fts5IndexEntryCksum(iRowid, iCol, iPos, pTerm, nByte); + } + } + + return ret; +} + +/* +** Insert or remove data to or from the index. Each time a document is +** added to or removed from the index, this function is called one or more +** times. +** +** For an insert, it must be called once for each token in the new document. +** If the operation is a delete, it must be called (at least) once for each +** unique token in the document with an iCol value less than zero. The iPos +** argument is ignored for a delete. +*/ +int sqlite3Fts5IndexWrite( + Fts5Index *p, /* Index to write to */ + int iCol, /* Column token appears in (-ve -> delete) */ + int iPos, /* Position of token within column */ + const char *pToken, int nToken /* Token to add or remove to or from index */ +){ + int i; /* Used to iterate through indexes */ + int rc; /* Return code */ + Fts5Config *pConfig = p->pConfig; + + assert( p->rc==SQLITE_OK ); + + /* Add the new token to the main terms hash table. And to each of the + ** prefix hash tables that it is large enough for. */ + rc = sqlite3Fts5HashWrite( + p->apHash[0], p->iWriteRowid, iCol, iPos, pToken, nToken + ); + for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){ + int nByte = fts5IndexCharlenToBytelen(pToken, nToken, pConfig->aPrefix[i]); + if( nByte ){ + rc = sqlite3Fts5HashWrite( + p->apHash[i+1], p->iWriteRowid, iCol, iPos, pToken, nByte + ); + } + } + + return rc; +} + +/* +** Open a new iterator to iterate though all docids that match the +** specified token or token prefix. +*/ +int sqlite3Fts5IndexQuery( + Fts5Index *p, /* FTS index to query */ + const char *pToken, int nToken, /* Token (or prefix) to query for */ + int flags, /* Mask of FTS5INDEX_QUERY_X flags */ + Fts5IndexIter **ppIter /* OUT: New iterator object */ +){ + Fts5Config *pConfig = p->pConfig; + Fts5IndexIter *pRet; + int iIdx = 0; + + if( flags & FTS5INDEX_QUERY_PREFIX ){ + if( flags & FTS5INDEX_QUERY_TEST_NOIDX ){ + iIdx = 1+pConfig->nPrefix; + }else{ + int nChar = fts5IndexCharlen(pToken, nToken); + for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){ + if( pConfig->aPrefix[iIdx-1]==nChar ) break; + } + } + } + + pRet = (Fts5IndexIter*)sqlite3Fts5MallocZero(&p->rc, sizeof(Fts5IndexIter)); + if( pRet ){ + memset(pRet, 0, sizeof(Fts5IndexIter)); + + pRet->pIndex = p; + if( iIdx<=pConfig->nPrefix ){ + pRet->pStruct = fts5StructureRead(p, iIdx); + if( pRet->pStruct ){ + fts5MultiIterNew(p, pRet->pStruct, + iIdx, 1, flags, (const u8*)pToken, nToken, -1, 0, &pRet->pMulti + ); + } + }else{ + int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0; + fts5SetupPrefixIter(p, bDesc, (const u8*)pToken, nToken, pRet); + } + } + + if( p->rc ){ + sqlite3Fts5IterClose(pRet); + pRet = 0; + } + *ppIter = pRet; + return fts5IndexReturn(p); +} + +/* +** Return true if the iterator passed as the only argument is at EOF. +*/ +int sqlite3Fts5IterEof(Fts5IndexIter *pIter){ + assert( pIter->pIndex->rc==SQLITE_OK ); + if( pIter->pDoclist ){ + return pIter->pDoclist->aPoslist==0; + }else{ + return fts5MultiIterEof(pIter->pIndex, pIter->pMulti); + } +} + +/* +** Move to the next matching rowid. +*/ +int sqlite3Fts5IterNext(Fts5IndexIter *pIter){ + assert( pIter->pIndex->rc==SQLITE_OK ); + if( pIter->pDoclist ){ + fts5DoclistIterNext(pIter->pDoclist); + }else{ + fts5BufferZero(&pIter->poslist); + fts5MultiIterNext(pIter->pIndex, pIter->pMulti, 0, 0); + } + return fts5IndexReturn(pIter->pIndex); +} + +/* +** Move the doclist-iter passed as the first argument to the next +** matching rowid that occurs at or after iMatch. The definition of "at +** or after" depends on whether this iterator iterates in ascending or +** descending rowid order. +*/ +static void fts5DoclistIterNextFrom(Fts5DoclistIter *p, i64 iMatch){ + do{ + i64 iRowid = p->iRowid; + if( p->bDesc==0 && iRowid>=iMatch ) break; + if( p->bDesc!=0 && iRowid<=iMatch ) break; + fts5DoclistIterNext(p); + }while( p->aPoslist ); +} + +/* +** Move to the next matching rowid that occurs at or after iMatch. The +** definition of "at or after" depends on whether this iterator iterates +** in ascending or descending rowid order. +*/ +int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIter, i64 iMatch){ + if( pIter->pDoclist ){ + fts5DoclistIterNextFrom(pIter->pDoclist, iMatch); + }else{ + fts5MultiIterNextFrom(pIter->pIndex, pIter->pMulti, iMatch); + } + return fts5IndexReturn(pIter->pIndex); +} + +/* +** Return the current rowid. +*/ +i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIter){ + if( pIter->pDoclist ){ + return pIter->pDoclist->iRowid; + }else{ + return fts5MultiIterRowid(pIter->pMulti); + } +} + + +/* +** Return a pointer to a buffer containing a copy of the position list for +** the current entry. Output variable *pn is set to the size of the buffer +** in bytes before returning. +** +** The returned position list does not include the "number of bytes" varint +** field that starts the position list on disk. +*/ +int sqlite3Fts5IterPoslist(Fts5IndexIter *pIter, const u8 **pp, int *pn){ + assert( pIter->pIndex->rc==SQLITE_OK ); + if( pIter->pDoclist ){ + *pn = pIter->pDoclist->nPoslist; + *pp = pIter->pDoclist->aPoslist; + }else{ + Fts5Index *p = pIter->pIndex; + fts5BufferZero(&pIter->poslist); + fts5MultiIterPoslist(p, pIter->pMulti, 0, &pIter->poslist); + *pn = pIter->poslist.n; + *pp = pIter->poslist.p; + } + return fts5IndexReturn(pIter->pIndex); +} + +/* +** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery(). +*/ +void sqlite3Fts5IterClose(Fts5IndexIter *pIter){ + if( pIter ){ + if( pIter->pDoclist ){ + sqlite3_free(pIter->pDoclist->a); + sqlite3_free(pIter->pDoclist); + }else{ + fts5MultiIterFree(pIter->pIndex, pIter->pMulti); + fts5StructureRelease(pIter->pStruct); + fts5BufferFree(&pIter->poslist); + } + fts5CloseReader(pIter->pIndex); + sqlite3_free(pIter); + } +} + +/* +** Read the "averages" record into the buffer supplied as the second +** argument. Return SQLITE_OK if successful, or an SQLite error code +** if an error occurs. +*/ +int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf){ + assert( p->rc==SQLITE_OK ); + fts5DataReadOrBuffer(p, pBuf, FTS5_AVERAGES_ROWID); + return fts5IndexReturn(p); +} + +/* +** Replace the current "averages" record with the contents of the buffer +** supplied as the second argument. +*/ +int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){ + assert( p->rc==SQLITE_OK ); + fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData); + return fts5IndexReturn(p); +} + +/* +** Return the total number of blocks this module has read from the %_data +** table since it was created. +*/ +int sqlite3Fts5IndexReads(Fts5Index *p){ + return p->nRead; +} + +/* +** Set the 32-bit cookie value stored at the start of all structure +** records to the value passed as the second argument. +** +** Return SQLITE_OK if successful, or an SQLite error code if an error +** occurs. +*/ +int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){ + int rc = SQLITE_OK; + Fts5Config *pConfig = p->pConfig; + u8 aCookie[4]; + int i; + + assert( p->rc==SQLITE_OK ); + sqlite3Fts5Put32(aCookie, iNew); + for(i=0; rc==SQLITE_OK && i<=pConfig->nPrefix; i++){ + sqlite3_blob *pBlob = 0; + i64 iRowid = FTS5_STRUCTURE_ROWID(i); + rc = sqlite3_blob_open( + pConfig->db, pConfig->zDb, p->zDataTbl, "block", iRowid, 1, &pBlob + ); + if( rc==SQLITE_OK ){ + sqlite3_blob_write(pBlob, aCookie, 4, 0); + rc = sqlite3_blob_close(pBlob); + } + } + + return rc; +} + +int sqlite3Fts5IndexLoadConfig(Fts5Index *p){ + Fts5Structure *pStruct; + pStruct = fts5StructureRead(p, 0); + fts5StructureRelease(pStruct); + return fts5IndexReturn(p); +} + +/************************************************************************* +************************************************************************** +** Below this point is the implementation of the fts5_decode() scalar +** function only. +*/ + +/* +** Decode a segment-data rowid from the %_data table. This function is +** the opposite of macro FTS5_SEGMENT_ROWID(). +*/ +static void fts5DecodeRowid( + i64 iRowid, /* Rowid from %_data table */ + int *piIdx, /* OUT: Index */ + int *piSegid, /* OUT: Segment id */ + int *piHeight, /* OUT: Height */ + int *piPgno /* OUT: Page number */ +){ + *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1)); + iRowid >>= FTS5_DATA_PAGE_B; + + *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1)); + iRowid >>= FTS5_DATA_HEIGHT_B; + + *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1)); + iRowid >>= FTS5_DATA_ID_B; + + *piIdx = (int)(iRowid & (((i64)1 << FTS5_DATA_IDX_B) - 1)); +} + +static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){ + int iIdx,iSegid,iHeight,iPgno; /* Rowid compenents */ + fts5DecodeRowid(iKey, &iIdx, &iSegid, &iHeight, &iPgno); + + if( iSegid==0 ){ + if( iKey==FTS5_AVERAGES_ROWID ){ + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(averages) "); + }else{ + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, + "{structure idx=%d}", (int)(iKey-10) + ); + } + } + else if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){ + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(dlidx idx=%d segid=%d pgno=%d)", + iIdx, iSegid, iPgno + ); + }else{ + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(idx=%d segid=%d h=%d pgno=%d)", + iIdx, iSegid, iHeight, iPgno + ); + } +} + +static void fts5DebugStructure( + int *pRc, /* IN/OUT: error code */ + Fts5Buffer *pBuf, + Fts5Structure *p +){ + int iLvl, iSeg; /* Iterate through levels, segments */ + + for(iLvl=0; iLvl<p->nLevel; iLvl++){ + Fts5StructureLevel *pLvl = &p->aLevel[iLvl]; + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, + " {lvl=%d nMerge=%d", iLvl, pLvl->nMerge + ); + for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ + Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, + " {id=%d h=%d leaves=%d..%d}", pSeg->iSegid, pSeg->nHeight, + pSeg->pgnoFirst, pSeg->pgnoLast + ); + } + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}"); + } +} + +/* +** This is part of the fts5_decode() debugging aid. +** +** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This +** function appends a human-readable representation of the same object +** to the buffer passed as the second argument. +*/ +static void fts5DecodeStructure( + int *pRc, /* IN/OUT: error code */ + Fts5Buffer *pBuf, + const u8 *pBlob, int nBlob +){ + int rc; /* Return code */ + Fts5Structure *p = 0; /* Decoded structure object */ + + rc = fts5StructureDecode(pBlob, nBlob, 0, &p); + if( rc!=SQLITE_OK ){ + *pRc = rc; + return; + } + + fts5DebugStructure(pRc, pBuf, p); + fts5StructureRelease(p); +} + +/* +** Buffer (a/n) is assumed to contain a list of serialized varints. Read +** each varint and append its string representation to buffer pBuf. Return +** after either the input buffer is exhausted or a 0 value is read. +** +** The return value is the number of bytes read from the input buffer. +*/ +static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ + int iOff = 0; + while( iOff<n ){ + int iVal; + iOff += fts5GetVarint32(&a[iOff], iVal); + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal); + } + return iOff; +} + +/* +** The start of buffer (a/n) contains the start of a doclist. The doclist +** may or may not finish within the buffer. This function appends a text +** representation of the part of the doclist that is present to buffer +** pBuf. +** +** The return value is the number of bytes read from the input buffer. +*/ +static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ + i64 iDocid; + int iOff = 0; + + if( iOff<n ){ + iOff += sqlite3GetVarint(&a[iOff], (u64*)&iDocid); + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid); + } + while( iOff<n ){ + int nPos; + int bDummy; + iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy); + iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos)); + if( iOff<n ){ + i64 iDelta; + iOff += sqlite3GetVarint(&a[iOff], (u64*)&iDelta); + if( iDelta==0 ) return iOff; + iDocid += iDelta; + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid); + } + } + + return iOff; +} + +/* +** The implementation of user-defined scalar function fts5_decode(). +*/ +static void fts5DecodeFunction( + sqlite3_context *pCtx, /* Function call context */ + int nArg, /* Number of args (always 2) */ + sqlite3_value **apVal /* Function arguments */ +){ + i64 iRowid; /* Rowid for record being decoded */ + int iIdx,iSegid,iHeight,iPgno; /* Rowid components */ + const u8 *aBlob; int n; /* Record to decode */ + u8 *a = 0; + Fts5Buffer s; /* Build up text to return here */ + int rc = SQLITE_OK; /* Return code */ + int nSpace = 0; + + assert( nArg==2 ); + memset(&s, 0, sizeof(Fts5Buffer)); + iRowid = sqlite3_value_int64(apVal[0]); + n = sqlite3_value_bytes(apVal[1]); + aBlob = sqlite3_value_blob(apVal[1]); + + nSpace = n + FTS5_DATA_ZERO_PADDING; + a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace); + if( a==0 ) goto decode_out; + memcpy(a, aBlob, n); + fts5DecodeRowid(iRowid, &iIdx, &iSegid, &iHeight, &iPgno); + + fts5DebugRowid(&rc, &s, iRowid); + if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){ + Fts5Data dlidx; + Fts5DlidxIter iter; + + dlidx.p = a; + dlidx.n = n; + dlidx.nRef = 2; + + memset(&iter, 0, sizeof(Fts5DlidxIter)); + iter.pData = &dlidx; + iter.iLeafPgno = iPgno; + + for(fts5DlidxIterFirst(&iter); iter.bEof==0; fts5DlidxIterNext(&iter)){ + sqlite3Fts5BufferAppendPrintf(&rc, &s, + " %d(%lld)", iter.iLeafPgno, iter.iRowid + ); + } + }else if( iSegid==0 ){ + if( iRowid==FTS5_AVERAGES_ROWID ){ + /* todo */ + }else{ + fts5DecodeStructure(&rc, &s, a, n); + } + }else{ + + Fts5Buffer term; + memset(&term, 0, sizeof(Fts5Buffer)); + + if( iHeight==0 ){ + int iTermOff = 0; + int iRowidOff = 0; + int iOff; + int nKeep = 0; + + if( n>=4 ){ + iRowidOff = fts5GetU16(&a[0]); + iTermOff = fts5GetU16(&a[2]); + }else{ + sqlite3Fts5BufferSet(&rc, &s, 8, (const u8*)"corrupt"); + goto decode_out; + } + + if( iRowidOff ){ + iOff = iRowidOff; + }else if( iTermOff ){ + iOff = iTermOff; + }else{ + iOff = n; + } + fts5DecodePoslist(&rc, &s, &a[4], iOff-4); + + assert( iRowidOff==0 || iOff==iRowidOff ); + if( iRowidOff ){ + iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff); + } + + assert( iTermOff==0 || iOff==iTermOff ); + while( iOff<n ){ + int nByte; + iOff += fts5GetVarint32(&a[iOff], nByte); + term.n= nKeep; + fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]); + iOff += nByte; + + sqlite3Fts5BufferAppendPrintf( + &rc, &s, " term=%.*s", term.n, (const char*)term.p + ); + iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff); + if( iOff<n ){ + iOff += fts5GetVarint32(&a[iOff], nKeep); + } + } + fts5BufferFree(&term); + }else{ + Fts5NodeIter ss; + for(fts5NodeIterInit(a, n, &ss); ss.aData; fts5NodeIterNext(&rc, &ss)){ + if( ss.term.n==0 ){ + sqlite3Fts5BufferAppendPrintf(&rc, &s, " left=%d", ss.iChild); + }else{ + sqlite3Fts5BufferAppendPrintf(&rc,&s, " \"%.*s\"", + ss.term.n, ss.term.p + ); + } + if( ss.nEmpty ){ + sqlite3Fts5BufferAppendPrintf(&rc, &s, " empty=%d%s", ss.nEmpty, + ss.bDlidx ? "*" : "" + ); + } + } + fts5NodeIterFree(&ss); + } + } + + decode_out: + sqlite3_free(a); + if( rc==SQLITE_OK ){ + sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT); + }else{ + sqlite3_result_error_code(pCtx, rc); + } + fts5BufferFree(&s); +} + +/* +** The implementation of user-defined scalar function fts5_rowid(). +*/ +static void fts5RowidFunction( + sqlite3_context *pCtx, /* Function call context */ + int nArg, /* Number of args (always 2) */ + sqlite3_value **apVal /* Function arguments */ +){ + const char *zArg; + if( nArg==0 ){ + sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1); + }else{ + zArg = (const char*)sqlite3_value_text(apVal[0]); + if( 0==sqlite3_stricmp(zArg, "segment") ){ + i64 iRowid; + int idx, segid, height, pgno; + if( nArg!=5 ){ + sqlite3_result_error(pCtx, + "should be: fts5_rowid('segment', idx, segid, height, pgno))", -1 + ); + }else{ + idx = sqlite3_value_int(apVal[1]); + segid = sqlite3_value_int(apVal[2]); + height = sqlite3_value_int(apVal[3]); + pgno = sqlite3_value_int(apVal[4]); + iRowid = FTS5_SEGMENT_ROWID(idx, segid, height, pgno); + sqlite3_result_int64(pCtx, iRowid); + } + }else if( 0==sqlite3_stricmp(zArg, "start-of-index") ){ + i64 iRowid; + int idx; + if( nArg!=2 ){ + sqlite3_result_error(pCtx, + "should be: fts5_rowid('start-of-index', idx)", -1 + ); + }else{ + idx = sqlite3_value_int(apVal[1]); + iRowid = FTS5_SEGMENT_ROWID(idx, 1, 0, 0); + sqlite3_result_int64(pCtx, iRowid); + } + }else { + sqlite3_result_error(pCtx, + "first arg to fts5_rowid() must be 'segment' " + "or 'start-of-index'" + , -1 + ); + } + } +} + +/* +** This is called as part of registering the FTS5 module with database +** connection db. It registers several user-defined scalar functions useful +** with FTS5. +** +** If successful, SQLITE_OK is returned. If an error occurs, some other +** SQLite error code is returned instead. +*/ +int sqlite3Fts5IndexInit(sqlite3 *db){ + int rc = sqlite3_create_function( + db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0 + ); + if( rc==SQLITE_OK ){ + rc = sqlite3_create_function( + db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0 + ); + } + return rc; +} + +#endif /* SQLITE_ENABLE_FTS5 */ |