aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ext/fts5/fts5.h30
-rw-r--r--ext/fts5/fts5Int.h33
-rw-r--r--ext/fts5/fts5_config.c10
-rw-r--r--ext/fts5/fts5_expr.c163
-rw-r--r--ext/fts5/fts5_hash.c49
-rw-r--r--ext/fts5/fts5_index.c837
-rw-r--r--ext/fts5/fts5_main.c70
-rw-r--r--ext/fts5/fts5_tcl.c208
-rw-r--r--ext/fts5/test/fts5_common.tcl14
-rw-r--r--ext/fts5/test/fts5aa.test68
-rw-r--r--ext/fts5/test/fts5faultH.test93
-rw-r--r--ext/fts5/test/fts5origintext.test297
-rw-r--r--ext/fts5/test/fts5origintext2.test146
-rw-r--r--ext/fts5/test/fts5origintext3.test101
-rw-r--r--ext/fts5/test/fts5origintext4.test66
-rw-r--r--ext/fts5/test/fts5origintext5.test273
-rw-r--r--ext/fts5/test/fts5simple2.test4
-rw-r--r--manifest41
-rw-r--r--manifest.uuid2
19 files changed, 2329 insertions, 176 deletions
diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h
index 323d73a28..63c9765eb 100644
--- a/ext/fts5/fts5.h
+++ b/ext/fts5/fts5.h
@@ -261,9 +261,30 @@ struct Fts5PhraseIter {
**
** xPhraseNextColumn()
** See xPhraseFirstColumn above.
+**
+** xQueryToken(pFts5, iPhrase, iToken, ppToken, pnToken)
+** This is used to access token iToken of phrase iPhrase of the current
+** query. Before returning, output parameter *ppToken is set to point
+** to a buffer containing the requested token, and *pnToken to the
+** size of this buffer in bytes.
+**
+** The output text is not a copy of the query text that specified the
+** token. It is the output of the tokenizer module. For tokendata=1
+** tables, this includes any embedded 0x00 and trailing data.
+**
+** xInstToken(pFts5, iIdx, iToken, ppToken, pnToken)
+** This is used to access token iToken of phrase hit iIdx within the
+** current row.
+**
+** The output text is not a copy of the document text that was tokenized.
+** It is the output of the tokenizer module. For tokendata=1 tables, this
+** includes any embedded 0x00 and trailing data.
+**
+** This API can be quite slow if used with an FTS5 table created with the
+** "detail=none" or "detail=column" option.
*/
struct Fts5ExtensionApi {
- int iVersion; /* Currently always set to 2 */
+ int iVersion; /* Currently always set to 3 */
void *(*xUserData)(Fts5Context*);
@@ -298,6 +319,13 @@ struct Fts5ExtensionApi {
int (*xPhraseFirstColumn)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*);
void (*xPhraseNextColumn)(Fts5Context*, Fts5PhraseIter*, int *piCol);
+
+ /* Below this point are iVersion>=3 only */
+ int (*xQueryToken)(Fts5Context*,
+ int iPhrase, int iToken,
+ const char **ppToken, int *pnToken
+ );
+ int (*xInstToken)(Fts5Context*, int iIdx, int iToken, const char**, int*);
};
/*
diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h
index 8bbafbaaf..9beb26e05 100644
--- a/ext/fts5/fts5Int.h
+++ b/ext/fts5/fts5Int.h
@@ -196,6 +196,7 @@ struct Fts5Config {
char *zContent; /* content table */
char *zContentRowid; /* "content_rowid=" option value */
int bColumnsize; /* "columnsize=" option value (dflt==1) */
+ int bTokendata; /* "tokendata=" option value (dflt==0) */
int eDetail; /* FTS5_DETAIL_XXX value */
char *zContentExprlist;
Fts5Tokenizer *pTok;
@@ -384,17 +385,19 @@ struct Fts5IndexIter {
/*
** Values used as part of the flags argument passed to IndexQuery().
*/
-#define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */
-#define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */
-#define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */
-#define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */
+#define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */
+#define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */
+#define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */
+#define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */
/* The following are used internally by the fts5_index.c module. They are
** defined here only to make it easier to avoid clashes with the flags
** above. */
-#define FTS5INDEX_QUERY_SKIPEMPTY 0x0010
-#define FTS5INDEX_QUERY_NOOUTPUT 0x0020
-#define FTS5INDEX_QUERY_SKIPHASH 0x0040
+#define FTS5INDEX_QUERY_SKIPEMPTY 0x0010
+#define FTS5INDEX_QUERY_NOOUTPUT 0x0020
+#define FTS5INDEX_QUERY_SKIPHASH 0x0040
+#define FTS5INDEX_QUERY_NOTOKENDATA 0x0080
+#define FTS5INDEX_QUERY_SCANONETERM 0x0100
/*
** Create/destroy an Fts5Index object.
@@ -463,6 +466,10 @@ void *sqlite3Fts5StructureRef(Fts5Index*);
void sqlite3Fts5StructureRelease(void*);
int sqlite3Fts5StructureTest(Fts5Index*, void*);
+/*
+** Used by xInstToken():
+*/
+int sqlite3Fts5IterToken(Fts5IndexIter*, i64, int, int, const char**, int*);
/*
** Insert or remove data to or from the index. Each time a document is
@@ -540,6 +547,13 @@ int sqlite3Fts5IndexLoadConfig(Fts5Index *p);
int sqlite3Fts5IndexGetOrigin(Fts5Index *p, i64 *piOrigin);
int sqlite3Fts5IndexContentlessDelete(Fts5Index *p, i64 iOrigin, i64 iRowid);
+void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter*);
+
+/* Used to populate hash tables for xInstToken in detail=none/column mode. */
+int sqlite3Fts5IndexIterWriteTokendata(
+ Fts5IndexIter*, const char*, int, i64 iRowid, int iCol, int iOff
+);
+
/*
** End of interface to code in fts5_index.c.
**************************************************************************/
@@ -645,6 +659,7 @@ void sqlite3Fts5HashScanNext(Fts5Hash*);
int sqlite3Fts5HashScanEof(Fts5Hash*);
void sqlite3Fts5HashScanEntry(Fts5Hash *,
const char **pzTerm, /* OUT: term (nul-terminated) */
+ int *pnTerm, /* OUT: Size of term in bytes */
const u8 **ppDoclist, /* OUT: pointer to doclist */
int *pnDoclist /* OUT: size of doclist in bytes */
);
@@ -771,6 +786,10 @@ int sqlite3Fts5ExprClonePhrase(Fts5Expr*, int, Fts5Expr**);
int sqlite3Fts5ExprPhraseCollist(Fts5Expr *, int, const u8 **, int *);
+int sqlite3Fts5ExprQueryToken(Fts5Expr*, int, int, const char**, int*);
+int sqlite3Fts5ExprInstToken(Fts5Expr*, i64, int, int, int, int, const char**, int*);
+void sqlite3Fts5ExprClearTokens(Fts5Expr*);
+
/*******************************************
** The fts5_expr.c API above this point is used by the other hand-written
** C code in this module. The interfaces below this point are called by
diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c
index 5d0770502..d2e8309cd 100644
--- a/ext/fts5/fts5_config.c
+++ b/ext/fts5/fts5_config.c
@@ -398,6 +398,16 @@ static int fts5ConfigParseSpecial(
return rc;
}
+ if( sqlite3_strnicmp("tokendata", zCmd, nCmd)==0 ){
+ if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
+ *pzErr = sqlite3_mprintf("malformed tokendata=... directive");
+ rc = SQLITE_ERROR;
+ }else{
+ pConfig->bTokendata = (zArg[0]=='1');
+ }
+ return rc;
+ }
+
*pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd);
return SQLITE_ERROR;
}
diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c
index f5101ba06..bc7c9741e 100644
--- a/ext/fts5/fts5_expr.c
+++ b/ext/fts5/fts5_expr.c
@@ -100,7 +100,9 @@ struct Fts5ExprNode {
struct Fts5ExprTerm {
u8 bPrefix; /* True for a prefix term */
u8 bFirst; /* True if token must be first in column */
- char *zTerm; /* nul-terminated term */
+ char *pTerm; /* Term data */
+ int nQueryTerm; /* Effective size of term in bytes */
+ int nFullTerm; /* Size of term in bytes incl. tokendata */
Fts5IndexIter *pIter; /* Iterator for this term */
Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */
};
@@ -967,7 +969,7 @@ static int fts5ExprNearInitAll(
p->pIter = 0;
}
rc = sqlite3Fts5IndexQuery(
- pExpr->pIndex, p->zTerm, (int)strlen(p->zTerm),
+ pExpr->pIndex, p->pTerm, p->nQueryTerm,
(pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) |
(pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0),
pNear->pColset,
@@ -1604,7 +1606,7 @@ static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){
Fts5ExprTerm *pSyn;
Fts5ExprTerm *pNext;
Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
- sqlite3_free(pTerm->zTerm);
+ sqlite3_free(pTerm->pTerm);
sqlite3Fts5IterClose(pTerm->pIter);
for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){
pNext = pSyn->pSynonym;
@@ -1702,6 +1704,7 @@ Fts5ExprNearset *sqlite3Fts5ParseNearset(
typedef struct TokenCtx TokenCtx;
struct TokenCtx {
Fts5ExprPhrase *pPhrase;
+ Fts5Config *pConfig;
int rc;
};
@@ -1735,8 +1738,10 @@ static int fts5ParseTokenize(
rc = SQLITE_NOMEM;
}else{
memset(pSyn, 0, (size_t)nByte);
- pSyn->zTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer);
- memcpy(pSyn->zTerm, pToken, nToken);
+ pSyn->pTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer);
+ pSyn->nFullTerm = pSyn->nQueryTerm = nToken;
+ if( pCtx->pConfig->bTokendata ) pSyn->nQueryTerm = strlen(pSyn->pTerm);
+ memcpy(pSyn->pTerm, pToken, nToken);
pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym;
pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn;
}
@@ -1761,7 +1766,11 @@ static int fts5ParseTokenize(
if( rc==SQLITE_OK ){
pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
memset(pTerm, 0, sizeof(Fts5ExprTerm));
- pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
+ pTerm->pTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
+ pTerm->nFullTerm = pTerm->nQueryTerm = nToken;
+ if( pCtx->pConfig->bTokendata && rc==SQLITE_OK ){
+ pTerm->nQueryTerm = strlen(pTerm->pTerm);
+ }
}
}
@@ -1828,6 +1837,7 @@ Fts5ExprPhrase *sqlite3Fts5ParseTerm(
memset(&sCtx, 0, sizeof(TokenCtx));
sCtx.pPhrase = pAppend;
+ sCtx.pConfig = pConfig;
rc = fts5ParseStringFromToken(pToken, &z);
if( rc==SQLITE_OK ){
@@ -1877,8 +1887,7 @@ int sqlite3Fts5ExprClonePhrase(
int rc = SQLITE_OK; /* Return code */
Fts5ExprPhrase *pOrig; /* The phrase extracted from pExpr */
Fts5Expr *pNew = 0; /* Expression to return via *ppNew */
- TokenCtx sCtx = {0,0}; /* Context object for fts5ParseTokenize */
-
+ TokenCtx sCtx = {0,0,0}; /* Context object for fts5ParseTokenize */
pOrig = pExpr->apExprPhrase[iPhrase];
pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr));
if( rc==SQLITE_OK ){
@@ -1909,13 +1918,12 @@ int sqlite3Fts5ExprClonePhrase(
if( pOrig->nTerm ){
int i; /* Used to iterate through phrase terms */
+ sCtx.pConfig = pExpr->pConfig;
for(i=0; rc==SQLITE_OK && i<pOrig->nTerm; i++){
int tflags = 0;
Fts5ExprTerm *p;
for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK; p=p->pSynonym){
- const char *zTerm = p->zTerm;
- rc = fts5ParseTokenize((void*)&sCtx, tflags, zTerm, (int)strlen(zTerm),
- 0, 0);
+ rc = fts5ParseTokenize((void*)&sCtx, tflags, p->pTerm,p->nFullTerm,0,0);
tflags = FTS5_TOKEN_COLOCATED;
}
if( rc==SQLITE_OK ){
@@ -2296,11 +2304,13 @@ static Fts5ExprNode *fts5ParsePhraseToAnd(
if( parseGrowPhraseArray(pParse) ){
fts5ExprPhraseFree(pPhrase);
}else{
+ Fts5ExprTerm *p = &pNear->apPhrase[0]->aTerm[ii];
+ Fts5ExprTerm *pTo = &pPhrase->aTerm[0];
pParse->apPhrase[pParse->nPhrase++] = pPhrase;
pPhrase->nTerm = 1;
- pPhrase->aTerm[0].zTerm = sqlite3Fts5Strndup(
- &pParse->rc, pNear->apPhrase[0]->aTerm[ii].zTerm, -1
- );
+ pTo->pTerm = sqlite3Fts5Strndup(&pParse->rc, p->pTerm, p->nFullTerm);
+ pTo->nQueryTerm = p->nQueryTerm;
+ pTo->nFullTerm = p->nFullTerm;
pRet->apChild[ii] = sqlite3Fts5ParseNode(pParse, FTS5_STRING,
0, 0, sqlite3Fts5ParseNearset(pParse, 0, pPhrase)
);
@@ -2485,16 +2495,17 @@ static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){
/* Determine the maximum amount of space required. */
for(p=pTerm; p; p=p->pSynonym){
- nByte += (int)strlen(pTerm->zTerm) * 2 + 3 + 2;
+ nByte += pTerm->nQueryTerm * 2 + 3 + 2;
}
zQuoted = sqlite3_malloc64(nByte);
if( zQuoted ){
int i = 0;
for(p=pTerm; p; p=p->pSynonym){
- char *zIn = p->zTerm;
+ char *zIn = p->pTerm;
+ char *zEnd = &zIn[p->nQueryTerm];
zQuoted[i++] = '"';
- while( *zIn ){
+ while( zIn<zEnd ){
if( *zIn=='"' ) zQuoted[i++] = '"';
zQuoted[i++] = *zIn++;
}
@@ -2572,8 +2583,10 @@ static char *fts5ExprPrintTcl(
zRet = fts5PrintfAppend(zRet, " {");
for(iTerm=0; zRet && iTerm<pPhrase->nTerm; iTerm++){
- char *zTerm = pPhrase->aTerm[iTerm].zTerm;
- zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" ", zTerm);
+ Fts5ExprTerm *p = &pPhrase->aTerm[iTerm];
+ zRet = fts5PrintfAppend(zRet, "%s%.*s", iTerm==0?"":" ",
+ p->nQueryTerm, p->pTerm
+ );
if( pPhrase->aTerm[iTerm].bPrefix ){
zRet = fts5PrintfAppend(zRet, "*");
}
@@ -2974,6 +2987,17 @@ static int fts5ExprColsetTest(Fts5Colset *pColset, int iCol){
return 0;
}
+/*
+** pToken is a buffer nToken bytes in size that may or may not contain
+** an embedded 0x00 byte. If it does, return the number of bytes in
+** the buffer before the 0x00. If it does not, return nToken.
+*/
+static int fts5QueryTerm(const char *pToken, int nToken){
+ int ii;
+ for(ii=0; ii<nToken && pToken[ii]; ii++){}
+ return ii;
+}
+
static int fts5ExprPopulatePoslistsCb(
void *pCtx, /* Copy of 2nd argument to xTokenize() */
int tflags, /* Mask of FTS5_TOKEN_* flags */
@@ -2985,22 +3009,33 @@ static int fts5ExprPopulatePoslistsCb(
Fts5ExprCtx *p = (Fts5ExprCtx*)pCtx;
Fts5Expr *pExpr = p->pExpr;
int i;
+ int nQuery = nToken;
+ i64 iRowid = pExpr->pRoot->iRowid;
UNUSED_PARAM2(iUnused1, iUnused2);
- if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE;
+ if( nQuery>FTS5_MAX_TOKEN_SIZE ) nQuery = FTS5_MAX_TOKEN_SIZE;
+ if( pExpr->pConfig->bTokendata ){
+ nQuery = fts5QueryTerm(pToken, nQuery);
+ }
if( (tflags & FTS5_TOKEN_COLOCATED)==0 ) p->iOff++;
for(i=0; i<pExpr->nPhrase; i++){
- Fts5ExprTerm *pTerm;
+ Fts5ExprTerm *pT;
if( p->aPopulator[i].bOk==0 ) continue;
- for(pTerm=&pExpr->apExprPhrase[i]->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){
- int nTerm = (int)strlen(pTerm->zTerm);
- if( (nTerm==nToken || (nTerm<nToken && pTerm->bPrefix))
- && memcmp(pTerm->zTerm, pToken, nTerm)==0
+ for(pT=&pExpr->apExprPhrase[i]->aTerm[0]; pT; pT=pT->pSynonym){
+ if( (pT->nQueryTerm==nQuery || (pT->nQueryTerm<nQuery && pT->bPrefix))
+ && memcmp(pT->pTerm, pToken, pT->nQueryTerm)==0
){
int rc = sqlite3Fts5PoslistWriterAppend(
&pExpr->apExprPhrase[i]->poslist, &p->aPopulator[i].writer, p->iOff
);
+ if( rc==SQLITE_OK && pExpr->pConfig->bTokendata && !pT->bPrefix ){
+ int iCol = p->iOff>>32;
+ int iTokOff = p->iOff & 0x7FFFFFFF;
+ rc = sqlite3Fts5IndexIterWriteTokendata(
+ pT->pIter, pToken, nToken, iRowid, iCol, iTokOff
+ );
+ }
if( rc ) return rc;
break;
}
@@ -3135,3 +3170,81 @@ int sqlite3Fts5ExprPhraseCollist(
return rc;
}
+
+/*
+** Does the work of the fts5_api.xQueryToken() API method.
+*/
+int sqlite3Fts5ExprQueryToken(
+ Fts5Expr *pExpr,
+ int iPhrase,
+ int iToken,
+ const char **ppOut,
+ int *pnOut
+){
+ Fts5ExprPhrase *pPhrase = 0;
+
+ if( iPhrase<0 || iPhrase>=pExpr->nPhrase ){
+ return SQLITE_RANGE;
+ }
+ pPhrase = pExpr->apExprPhrase[iPhrase];
+ if( iToken<0 || iToken>=pPhrase->nTerm ){
+ return SQLITE_RANGE;
+ }
+
+ *ppOut = pPhrase->aTerm[iToken].pTerm;
+ *pnOut = pPhrase->aTerm[iToken].nFullTerm;
+ return SQLITE_OK;
+}
+
+/*
+** Does the work of the fts5_api.xInstToken() API method.
+*/
+int sqlite3Fts5ExprInstToken(
+ Fts5Expr *pExpr,
+ i64 iRowid,
+ int iPhrase,
+ int iCol,
+ int iOff,
+ int iToken,
+ const char **ppOut,
+ int *pnOut
+){
+ Fts5ExprPhrase *pPhrase = 0;
+ Fts5ExprTerm *pTerm = 0;
+ int rc = SQLITE_OK;
+
+ if( iPhrase<0 || iPhrase>=pExpr->nPhrase ){
+ return SQLITE_RANGE;
+ }
+ pPhrase = pExpr->apExprPhrase[iPhrase];
+ if( iToken<0 || iToken>=pPhrase->nTerm ){
+ return SQLITE_RANGE;
+ }
+ pTerm = &pPhrase->aTerm[iToken];
+ if( pTerm->bPrefix==0 ){
+ if( pExpr->pConfig->bTokendata ){
+ rc = sqlite3Fts5IterToken(
+ pTerm->pIter, iRowid, iCol, iOff+iToken, ppOut, pnOut
+ );
+ }else{
+ *ppOut = pTerm->pTerm;
+ *pnOut = pTerm->nFullTerm;
+ }
+ }
+ return rc;
+}
+
+/*
+** Clear the token mappings for all Fts5IndexIter objects mannaged by
+** the expression passed as the only argument.
+*/
+void sqlite3Fts5ExprClearTokens(Fts5Expr *pExpr){
+ int ii;
+ for(ii=0; ii<pExpr->nPhrase; ii++){
+ Fts5ExprTerm *pT;
+ for(pT=&pExpr->apExprPhrase[ii]->aTerm[0]; pT; pT=pT->pSynonym){
+ sqlite3Fts5IndexIterClearTokendata(pT->pIter);
+ }
+ }
+}
+
diff --git a/ext/fts5/fts5_hash.c b/ext/fts5/fts5_hash.c
index 391791c7a..5e0959aa8 100644
--- a/ext/fts5/fts5_hash.c
+++ b/ext/fts5/fts5_hash.c
@@ -36,10 +36,15 @@ struct Fts5Hash {
/*
** Each entry in the hash table is represented by an object of the
-** following type. Each object, its key (a nul-terminated string) and
-** its current data are stored in a single memory allocation. The
-** key immediately follows the object in memory. The position list
-** data immediately follows the key data in memory.
+** following type. Each object, its key, and its current data are stored
+** in a single memory allocation. The key immediately follows the object
+** in memory. The position list data immediately follows the key data
+** in memory.
+**
+** The key is Fts5HashEntry.nKey bytes in size. It consists of a single
+** byte identifying the index (either the main term index or a prefix-index),
+** followed by the term data. For example: "0token". There is no
+** nul-terminator - in this case nKey=6.
**
** The data that follows the key is in a similar, but not identical format
** to the doclist data stored in the database. It is:
@@ -174,8 +179,7 @@ static int fts5HashResize(Fts5Hash *pHash){
unsigned int iHash;
Fts5HashEntry *p = apOld[i];
apOld[i] = p->pHashNext;
- iHash = fts5HashKey(nNew, (u8*)fts5EntryKey(p),
- (int)strlen(fts5EntryKey(p)));
+ iHash = fts5HashKey(nNew, (u8*)fts5EntryKey(p), p->nKey);
p->pHashNext = apNew[iHash];
apNew[iHash] = p;
}
@@ -259,7 +263,7 @@ int sqlite3Fts5HashWrite(
for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
char *zKey = fts5EntryKey(p);
if( zKey[0]==bByte
- && p->nKey==nToken
+ && p->nKey==nToken+1
&& memcmp(&zKey[1], pToken, nToken)==0
){
break;
@@ -289,9 +293,9 @@ int sqlite3Fts5HashWrite(
zKey[0] = bByte;
memcpy(&zKey[1], pToken, nToken);
assert( iHash==fts5HashKey(pHash->nSlot, (u8*)zKey, nToken+1) );
- p->nKey = nToken;
+ p->nKey = nToken+1;
zKey[nToken+1] = '\0';
- p->nData = nToken+1 + 1 + sizeof(Fts5HashEntry);
+ p->nData = nToken+1 + sizeof(Fts5HashEntry);
p->pHashNext = pHash->aSlot[iHash];
pHash->aSlot[iHash] = p;
pHash->nEntry++;
@@ -408,12 +412,17 @@ static Fts5HashEntry *fts5HashEntryMerge(
*ppOut = p1;
p1 = 0;
}else{
- int i = 0;
char *zKey1 = fts5EntryKey(p1);
char *zKey2 = fts5EntryKey(p2);
- while( zKey1[i]==zKey2[i] ) i++;
+ int nMin = MIN(p1->nKey, p2->nKey);
+
+ int cmp = memcmp(zKey1, zKey2, nMin);
+ if( cmp==0 ){
+ cmp = p1->nKey - p2->nKey;
+ }
+ assert( cmp!=0 );
- if( ((u8)zKey1[i])>((u8)zKey2[i]) ){
+ if( cmp>0 ){
/* p2 is smaller */
*ppOut = p2;
ppOut = &p2->pScanNext;
@@ -455,7 +464,7 @@ static int fts5HashEntrySort(
Fts5HashEntry *pIter;
for(pIter=pHash->aSlot[iSlot]; pIter; pIter=pIter->pHashNext){
if( pTerm==0
- || (pIter->nKey+1>=nTerm && 0==memcmp(fts5EntryKey(pIter), pTerm, nTerm))
+ || (pIter->nKey>=nTerm && 0==memcmp(fts5EntryKey(pIter), pTerm, nTerm))
){
Fts5HashEntry *pEntry = pIter;
pEntry->pScanNext = 0;
@@ -494,12 +503,11 @@ int sqlite3Fts5HashQuery(
for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
zKey = fts5EntryKey(p);
- assert( p->nKey+1==(int)strlen(zKey) );
- if( nTerm==p->nKey+1 && memcmp(zKey, pTerm, nTerm)==0 ) break;
+ if( nTerm==p->nKey && memcmp(zKey, pTerm, nTerm)==0 ) break;
}
if( p ){
- int nHashPre = sizeof(Fts5HashEntry) + nTerm + 1;
+ int nHashPre = sizeof(Fts5HashEntry) + nTerm;
int nList = p->nData - nHashPre;
u8 *pRet = (u8*)(*ppOut = sqlite3_malloc64(nPre + nList + 10));
if( pRet ){
@@ -560,19 +568,22 @@ int sqlite3Fts5HashScanEof(Fts5Hash *p){
void sqlite3Fts5HashScanEntry(
Fts5Hash *pHash,
const char **pzTerm, /* OUT: term (nul-terminated) */
+ int *pnTerm, /* OUT: Size of term in bytes */
const u8 **ppDoclist, /* OUT: pointer to doclist */
int *pnDoclist /* OUT: size of doclist in bytes */
){
Fts5HashEntry *p;
if( (p = pHash->pScan) ){
char *zKey = fts5EntryKey(p);
- int nTerm = (int)strlen(zKey);
+ int nTerm = p->nKey;
fts5HashAddPoslistSize(pHash, p, 0);
*pzTerm = zKey;
- *ppDoclist = (const u8*)&zKey[nTerm+1];
- *pnDoclist = p->nData - (sizeof(Fts5HashEntry) + nTerm + 1);
+ *pnTerm = nTerm;
+ *ppDoclist = (const u8*)&zKey[nTerm];
+ *pnDoclist = p->nData - (sizeof(Fts5HashEntry) + nTerm);
}else{
*pzTerm = 0;
+ *pnTerm = 0;
*ppDoclist = 0;
*pnDoclist = 0;
}
diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c
index c467addb8..9a2cc026b 100644
--- a/ext/fts5/fts5_index.c
+++ b/ext/fts5/fts5_index.c
@@ -323,6 +323,9 @@ typedef struct Fts5SegWriter Fts5SegWriter;
typedef struct Fts5Structure Fts5Structure;
typedef struct Fts5StructureLevel Fts5StructureLevel;
typedef struct Fts5StructureSegment Fts5StructureSegment;
+typedef struct Fts5TokenDataIter Fts5TokenDataIter;
+typedef struct Fts5TokenDataMap Fts5TokenDataMap;
+typedef struct Fts5TombstoneArray Fts5TombstoneArray;
struct Fts5Data {
u8 *p; /* Pointer to buffer containing record */
@@ -365,6 +368,7 @@ struct Fts5Index {
sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */
sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=?" */
sqlite3_stmt *pIdxSelect;
+ sqlite3_stmt *pIdxNextSelect;
int nRead; /* Total number of blocks read */
sqlite3_stmt *pDeleteFromIdx;
@@ -518,8 +522,7 @@ struct Fts5SegIter {
Fts5Data *pLeaf; /* Current leaf data */
Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */
i64 iLeafOffset; /* Byte offset within current leaf */
- Fts5Data **apTombstone; /* Array of tombstone pages */
- int nTombstone;
+ Fts5TombstoneArray *pTombArray; /* Array of tombstone pages */
/* Next method */
void (*xNext)(Fts5Index*, Fts5SegIter*, int*);
@@ -547,6 +550,15 @@ struct Fts5SegIter {
};
/*
+** Array of tombstone pages. Reference counted.
+*/
+struct Fts5TombstoneArray {
+ int nRef; /* Number of pointers to this object */
+ int nTombstone;
+ Fts5Data *apTombstone[1]; /* Array of tombstone pages */
+};
+
+/*
** Argument is a pointer to an Fts5Data structure that contains a
** leaf page.
*/
@@ -590,9 +602,16 @@ struct Fts5SegIter {
** poslist:
** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
** There is no way to tell if this is populated or not.
+**
+** pColset:
+** If not NULL, points to an object containing a set of column indices.
+** Only matches that occur in one of these columns will be returned.
+** The Fts5Iter does not own the Fts5Colset object, and so it is not
+** freed when the iterator is closed - it is owned by the upper layer.
*/
struct Fts5Iter {
Fts5IndexIter base; /* Base class containing output vars */
+ Fts5TokenDataIter *pTokenDataIter;
Fts5Index *pIndex; /* Index that owns this iterator */
Fts5Buffer poslist; /* Buffer containing current poslist */
@@ -610,7 +629,6 @@ struct Fts5Iter {
Fts5SegIter aSeg[1]; /* Array of segment iterators */
};
-
/*
** An instance of the following type is used to iterate through the contents
** of a doclist-index record.
@@ -1909,18 +1927,20 @@ static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){
}
/*
-** Allocate a tombstone hash page array (pIter->apTombstone) for the
-** iterator passed as the second argument. If an OOM error occurs, leave
-** an error in the Fts5Index object.
+** Allocate a tombstone hash page array object (pIter->pTombArray) for
+** the iterator passed as the second argument. If an OOM error occurs,
+** leave an error in the Fts5Index object.
*/
static void fts5SegIterAllocTombstone(Fts5Index *p, Fts5SegIter *pIter){
const int nTomb = pIter->pSeg->nPgTombstone;
if( nTomb>0 ){
- Fts5Data **apTomb = 0;
- apTomb = (Fts5Data**)sqlite3Fts5MallocZero(&p->rc, sizeof(Fts5Data)*nTomb);
- if( apTomb ){
- pIter->apTombstone = apTomb;
- pIter->nTombstone = nTomb;
+ int nByte = nTomb * sizeof(Fts5Data*) + sizeof(Fts5TombstoneArray);
+ Fts5TombstoneArray *pNew;
+ pNew = (Fts5TombstoneArray*)sqlite3Fts5MallocZero(&p->rc, nByte);
+ if( pNew ){
+ pNew->nTombstone = nTomb;
+ pNew->nRef = 1;
+ pIter->pTombArray = pNew;
}
}
}
@@ -2177,15 +2197,16 @@ static void fts5SegIterNext_None(
}else{
const u8 *pList = 0;
const char *zTerm = 0;
+ int nTerm = 0;
int nList;
sqlite3Fts5HashScanNext(p->pHash);
- sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
+ sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &nTerm, &pList, &nList);
if( pList==0 ) goto next_none_eof;
pIter->pLeaf->p = (u8*)pList;
pIter->pLeaf->nn = nList;
pIter->pLeaf->szLeaf = nList;
pIter->iEndofDoclist = nList;
- sqlite3Fts5BufferSet(&p->rc,&pIter->term, (int)strlen(zTerm), (u8*)zTerm);
+ sqlite3Fts5BufferSet(&p->rc,&pIter->term, nTerm, (u8*)zTerm);
pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
}
@@ -2251,11 +2272,12 @@ static void fts5SegIterNext(
}else if( pIter->pSeg==0 ){
const u8 *pList = 0;
const char *zTerm = 0;
+ int nTerm = 0;
int nList = 0;
assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm );
if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){
sqlite3Fts5HashScanNext(p->pHash);
- sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
+ sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &nTerm, &pList, &nList);
}
if( pList==0 ){
fts5DataRelease(pIter->pLeaf);
@@ -2265,8 +2287,7 @@ static void fts5SegIterNext(
pIter->pLeaf->nn = nList;
pIter->pLeaf->szLeaf = nList;
pIter->iEndofDoclist = nList+1;
- sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm),
- (u8*)zTerm);
+ sqlite3Fts5BufferSet(&p->rc, &pIter->term, nTerm, (u8*)zTerm);
pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
*pbNewTerm = 1;
}
@@ -2652,7 +2673,7 @@ static void fts5SegIterSeekInit(
fts5LeafSeek(p, bGe, pIter, pTerm, nTerm);
}
- if( p->rc==SQLITE_OK && bGe==0 ){
+ if( p->rc==SQLITE_OK && (bGe==0 || (flags & FTS5INDEX_QUERY_SCANONETERM)) ){
pIter->flags |= FTS5_SEGITER_ONETERM;
if( pIter->pLeaf ){
if( flags & FTS5INDEX_QUERY_DESC ){
@@ -2668,7 +2689,9 @@ static void fts5SegIterSeekInit(
}
fts5SegIterSetNext(p, pIter);
- fts5SegIterAllocTombstone(p, pIter);
+ if( 0==(flags & FTS5INDEX_QUERY_SCANONETERM) ){
+ fts5SegIterAllocTombstone(p, pIter);
+ }
/* Either:
**
@@ -2685,6 +2708,79 @@ static void fts5SegIterSeekInit(
);
}
+
+/*
+** SQL used by fts5SegIterNextInit() to find the page to open.
+*/
+static sqlite3_stmt *fts5IdxNextStmt(Fts5Index *p){
+ if( p->pIdxNextSelect==0 ){
+ Fts5Config *pConfig = p->pConfig;
+ fts5IndexPrepareStmt(p, &p->pIdxNextSelect, sqlite3_mprintf(
+ "SELECT pgno FROM '%q'.'%q_idx' WHERE "
+ "segid=? AND term>? ORDER BY term ASC LIMIT 1",
+ pConfig->zDb, pConfig->zName
+ ));
+
+ }
+ return p->pIdxNextSelect;
+}
+
+/*
+** This is similar to fts5SegIterSeekInit(), except that it initializes
+** the segment iterator to point to the first term following the page
+** with pToken/nToken on it.
+*/
+static void fts5SegIterNextInit(
+ Fts5Index *p,
+ const char *pTerm, int nTerm,
+ Fts5StructureSegment *pSeg, /* Description of segment */
+ Fts5SegIter *pIter /* Object to populate */
+){
+ int iPg = -1; /* Page of segment to open */
+ int bDlidx = 0;
+ sqlite3_stmt *pSel = 0; /* SELECT to find iPg */
+
+ pSel = fts5IdxNextStmt(p);
+ if( pSel ){
+ assert( p->rc==SQLITE_OK );
+ sqlite3_bind_int(pSel, 1, pSeg->iSegid);
+ sqlite3_bind_blob(pSel, 2, pTerm, nTerm, SQLITE_STATIC);
+
+ if( sqlite3_step(pSel)==SQLITE_ROW ){
+ i64 val = sqlite3_column_int64(pSel, 0);
+ iPg = (int)(val>>1);
+ bDlidx = (val & 0x0001);
+ }
+ p->rc = sqlite3_reset(pSel);
+ sqlite3_bind_null(pSel, 2);
+ if( p->rc ) return;
+ }
+
+ memset(pIter, 0, sizeof(*pIter));
+ pIter->pSeg = pSeg;
+ pIter->flags |= FTS5_SEGITER_ONETERM;
+ if( iPg>=0 ){
+ pIter->iLeafPgno = iPg - 1;
+ fts5SegIterNextPage(p, pIter);
+ fts5SegIterSetNext(p, pIter);
+ }
+ if( pIter->pLeaf ){
+ const u8 *a = pIter->pLeaf->p;
+ int iTermOff = 0;
+
+ pIter->iPgidxOff = pIter->pLeaf->szLeaf;
+ pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], iTermOff);
+ pIter->iLeafOffset = iTermOff;
+ fts5SegIterLoadTerm(p, pIter, 0);
+ fts5SegIterLoadNPos(p, pIter);
+ if( bDlidx ) fts5SegIterLoadDlidx(p, pIter);
+
+ assert( p->rc!=SQLITE_OK ||
+ fts5BufferCompareBlob(&pIter->term, (const u8*)pTerm, nTerm)>0
+ );
+ }
+}
+
/*
** Initialize the object pIter to point to term pTerm/nTerm within the
** in-memory hash table. If there is no such term in the hash-table, the
@@ -2711,8 +2807,7 @@ static void fts5SegIterHashInit(
const u8 *pList = 0;
p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm);
- sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &pList, &nList);
- n = (z ? (int)strlen((const char*)z) : 0);
+ sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &n, &pList, &nList);
if( pList ){
pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data));
if( pLeaf ){
@@ -2772,13 +2867,30 @@ static void fts5IndexFreeArray(Fts5Data **ap, int n){
}
/*
+** Decrement the ref-count of the object passed as the only argument. If it
+** reaches 0, free it and its contents.
+*/
+static void fts5TombstoneArrayDelete(Fts5TombstoneArray *p){
+ if( p ){
+ p->nRef--;
+ if( p->nRef<=0 ){
+ int ii;
+ for(ii=0; ii<p->nTombstone; ii++){
+ fts5DataRelease(p->apTombstone[ii]);
+ }
+ sqlite3_free(p);
+ }
+ }
+}
+
+/*
** Zero the iterator passed as the only argument.
*/
static void fts5SegIterClear(Fts5SegIter *pIter){
fts5BufferFree(&pIter->term);
fts5DataRelease(pIter->pLeaf);
fts5DataRelease(pIter->pNextLeaf);
- fts5IndexFreeArray(pIter->apTombstone, pIter->nTombstone);
+ fts5TombstoneArrayDelete(pIter->pTombArray);
fts5DlidxIterFree(pIter->pDlidx);
sqlite3_free(pIter->aRowidOffset);
memset(pIter, 0, sizeof(Fts5SegIter));
@@ -3023,7 +3135,6 @@ static void fts5SegIterNextFrom(
}while( p->rc==SQLITE_OK );
}
-
/*
** Free the iterator object passed as the second argument.
*/
@@ -3168,24 +3279,25 @@ static int fts5IndexTombstoneQuery(
static int fts5MultiIterIsDeleted(Fts5Iter *pIter){
int iFirst = pIter->aFirst[1].iFirst;
Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
+ Fts5TombstoneArray *pArray = pSeg->pTombArray;
- if( pSeg->pLeaf && pSeg->nTombstone ){
+ if( pSeg->pLeaf && pArray ){
/* Figure out which page the rowid might be present on. */
- int iPg = ((u64)pSeg->iRowid) % pSeg->nTombstone;
+ int iPg = ((u64)pSeg->iRowid) % pArray->nTombstone;
assert( iPg>=0 );
/* If tombstone hash page iPg has not yet been loaded from the
** database, load it now. */
- if( pSeg->apTombstone[iPg]==0 ){
- pSeg->apTombstone[iPg] = fts5DataRead(pIter->pIndex,
+ if( pArray->apTombstone[iPg]==0 ){
+ pArray->apTombstone[iPg] = fts5DataRead(pIter->pIndex,
FTS5_TOMBSTONE_ROWID(pSeg->pSeg->iSegid, iPg)
);
- if( pSeg->apTombstone[iPg]==0 ) return 0;
+ if( pArray->apTombstone[iPg]==0 ) return 0;
}
return fts5IndexTombstoneQuery(
- pSeg->apTombstone[iPg],
- pSeg->nTombstone,
+ pArray->apTombstone[iPg],
+ pArray->nTombstone,
pSeg->iRowid
);
}
@@ -3724,6 +3836,32 @@ static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){
}
}
+/*
+** All the component segment-iterators of pIter have been set up. This
+** functions finishes setup for iterator pIter itself.
+*/
+static void fts5MultiIterFinishSetup(Fts5Index *p, Fts5Iter *pIter){
+ int iIter;
+ for(iIter=pIter->nSeg-1; iIter>0; iIter--){
+ int iEq;
+ if( (iEq = fts5MultiIterDoCompare(pIter, iIter)) ){
+ Fts5SegIter *pSeg = &pIter->aSeg[iEq];
+ if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0);
+ fts5MultiIterAdvanced(p, pIter, iEq, iIter);
+ }
+ }
+ fts5MultiIterSetEof(pIter);
+ fts5AssertMultiIterSetup(p, pIter);
+
+ if( (pIter->bSkipEmpty && fts5MultiIterIsEmpty(p, pIter))
+ || fts5MultiIterIsDeleted(pIter)
+ ){
+ fts5MultiIterNext(p, pIter, 0, 0);
+ }else if( pIter->base.bEof==0 ){
+ Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
+ pIter->xSetOutputs(pIter, pSeg);
+ }
+}
/*
** Allocate a new Fts5Iter object.
@@ -3805,31 +3943,12 @@ static void fts5MultiIterNew(
assert( iIter==nSeg );
}
- /* If the above was successful, each component iterators now points
+ /* If the above was successful, each component iterator now points
** to the first entry in its segment. In this case initialize the
** aFirst[] array. Or, if an error has occurred, free the iterator
** object and set the output variable to NULL. */
if( p->rc==SQLITE_OK ){
- for(iIter=pNew->nSeg-1; iIter>0; iIter--){
- int iEq;
- if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){
- Fts5SegIter *pSeg = &pNew->aSeg[iEq];
- if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0);
- fts5MultiIterAdvanced(p, pNew, iEq, iIter);
- }
- }
- fts5MultiIterSetEof(pNew);
- fts5AssertMultiIterSetup(p, pNew);
-
- if( (pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew))
- || fts5MultiIterIsDeleted(pNew)
- ){
- fts5MultiIterNext(p, pNew, 0, 0);
- }else if( pNew->base.bEof==0 ){
- Fts5SegIter *pSeg = &pNew->aSeg[pNew->aFirst[1].iFirst];
- pNew->xSetOutputs(pNew, pSeg);
- }
-
+ fts5MultiIterFinishSetup(p, pNew);
}else{
fts5MultiIterFree(pNew);
*ppOut = 0;
@@ -3854,7 +3973,6 @@ static void fts5MultiIterNew2(
pNew = fts5MultiIterAlloc(p, 2);
if( pNew ){
Fts5SegIter *pIter = &pNew->aSeg[1];
-
pIter->flags = FTS5_SEGITER_ONETERM;
if( pData->szLeaf>0 ){
pIter->pLeaf = pData;
@@ -4217,7 +4335,7 @@ static void fts5WriteDlidxAppend(
}
if( pDlidx->bPrevValid ){
- iVal = iRowid - pDlidx->iPrev;
+ iVal = (u64)iRowid - (u64)pDlidx->iPrev;
}else{
i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno);
assert( pDlidx->buf.n==0 );
@@ -5340,10 +5458,10 @@ static void fts5FlushSecureDelete(
Fts5Index *p,
Fts5Structure *pStruct,
const char *zTerm,
+ int nTerm,
i64 iRowid
){
const int f = FTS5INDEX_QUERY_SKIPHASH;
- int nTerm = (int)strlen(zTerm);
Fts5Iter *pIter = 0; /* Used to find term instance */
fts5MultiIterNew(p, pStruct, f, 0, (const u8*)zTerm, nTerm, -1, 0, &pIter);
@@ -5417,8 +5535,7 @@ static void fts5FlushOneHash(Fts5Index *p){
int nDoclist; /* Size of doclist in bytes */
/* Get the term and doclist for this entry. */
- sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
- nTerm = (int)strlen(zTerm);
+ sqlite3Fts5HashScanEntry(pHash, &zTerm, &nTerm, &pDoclist, &nDoclist);
if( bSecureDelete==0 ){
fts5WriteAppendTerm(p, &writer, nTerm, (const u8*)zTerm);
if( p->rc!=SQLITE_OK ) break;
@@ -5448,7 +5565,7 @@ static void fts5FlushOneHash(Fts5Index *p){
if( bSecureDelete ){
if( eDetail==FTS5_DETAIL_NONE ){
if( iOff<nDoclist && pDoclist[iOff]==0x00 ){
- fts5FlushSecureDelete(p, pStruct, zTerm, iRowid);
+ fts5FlushSecureDelete(p, pStruct, zTerm, nTerm, iRowid);
iOff++;
if( iOff<nDoclist && pDoclist[iOff]==0x00 ){
iOff++;
@@ -5458,7 +5575,7 @@ static void fts5FlushOneHash(Fts5Index *p){
}
}
}else if( (pDoclist[iOff] & 0x01) ){
- fts5FlushSecureDelete(p, pStruct, zTerm, iRowid);
+ fts5FlushSecureDelete(p, pStruct, zTerm, nTerm, iRowid);
if( p->rc!=SQLITE_OK || pDoclist[iOff]==0x01 ){
iOff++;
continue;
@@ -6078,7 +6195,7 @@ static void fts5SetupPrefixIter(
u8 *pToken, /* Buffer containing prefix to match */
int nToken, /* Size of buffer pToken in bytes */
Fts5Colset *pColset, /* Restrict matches to these columns */
- Fts5Iter **ppIter /* OUT: New iterator */
+ Fts5Iter **ppIter /* OUT: New iterator */
){
Fts5Structure *pStruct;
Fts5Buffer *aBuf;
@@ -6099,8 +6216,9 @@ static void fts5SetupPrefixIter(
aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
pStruct = fts5StructureRead(p);
+ assert( p->rc!=SQLITE_OK || (aBuf && pStruct) );
- if( aBuf && pStruct ){
+ if( p->rc==SQLITE_OK ){
const int flags = FTS5INDEX_QUERY_SCAN
| FTS5INDEX_QUERY_SKIPEMPTY
| FTS5INDEX_QUERY_NOOUTPUT;
@@ -6112,6 +6230,12 @@ static void fts5SetupPrefixIter(
int bNewTerm = 1;
memset(&doclist, 0, sizeof(doclist));
+
+ /* If iIdx is non-zero, then it is the number of a prefix-index for
+ ** prefixes 1 character longer than the prefix being queried for. That
+ ** index contains all the doclists required, except for the one
+ ** corresponding to the prefix itself. That one is extracted from the
+ ** main term index here. */
if( iIdx!=0 ){
int dummy = 0;
const int f2 = FTS5INDEX_QUERY_SKIPEMPTY|FTS5INDEX_QUERY_NOOUTPUT;
@@ -6135,6 +6259,7 @@ static void fts5SetupPrefixIter(
pToken[0] = FTS5_MAIN_PREFIX + iIdx;
fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1);
fts5IterSetOutputCb(&p->rc, p1);
+
for( /* no-op */ ;
fts5MultiIterEof(p, p1)==0;
fts5MultiIterNext2(p, p1, &bNewTerm)
@@ -6150,7 +6275,6 @@ static void fts5SetupPrefixIter(
}
if( p1->base.nData==0 ) continue;
-
if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){
for(i=0; p->rc==SQLITE_OK && doclist.n; i++){
int i1 = i*nMerge;
@@ -6189,7 +6313,7 @@ static void fts5SetupPrefixIter(
}
fts5MultiIterFree(p1);
- pData = fts5IdxMalloc(p, sizeof(Fts5Data)+doclist.n+FTS5_DATA_ZERO_PADDING);
+ pData = fts5IdxMalloc(p, sizeof(*pData)+doclist.n+FTS5_DATA_ZERO_PADDING);
if( pData ){
pData->p = (u8*)&pData[1];
pData->nn = pData->szLeaf = doclist.n;
@@ -6332,6 +6456,7 @@ int sqlite3Fts5IndexClose(Fts5Index *p){
sqlite3_finalize(p->pIdxWriter);
sqlite3_finalize(p->pIdxDeleter);
sqlite3_finalize(p->pIdxSelect);
+ sqlite3_finalize(p->pIdxNextSelect);
sqlite3_finalize(p->pDataVersion);
sqlite3_finalize(p->pDeleteFromIdx);
sqlite3Fts5HashFree(p->pHash);
@@ -6428,6 +6553,451 @@ int sqlite3Fts5IndexWrite(
}
/*
+** pToken points to a buffer of size nToken bytes containing a search
+** term, including the index number at the start, used on a tokendata=1
+** table. This function returns true if the term in buffer pBuf matches
+** token pToken/nToken.
+*/
+static int fts5IsTokendataPrefix(
+ Fts5Buffer *pBuf,
+ const u8 *pToken,
+ int nToken
+){
+ return (
+ pBuf->n>=nToken
+ && 0==memcmp(pBuf->p, pToken, nToken)
+ && (pBuf->n==nToken || pBuf->p[nToken]==0x00)
+ );
+}
+
+/*
+** Ensure the segment-iterator passed as the only argument points to EOF.
+*/
+static void fts5SegIterSetEOF(Fts5SegIter *pSeg){
+ fts5DataRelease(pSeg->pLeaf);
+ pSeg->pLeaf = 0;
+}
+
+/*
+** Usually, a tokendata=1 iterator (struct Fts5TokenDataIter) accumulates an
+** array of these for each row it visits. Or, for an iterator used by an
+** "ORDER BY rank" query, it accumulates an array of these for the entire
+** query.
+**
+** Each instance in the array indicates the iterator (and therefore term)
+** associated with position iPos of rowid iRowid. This is used by the
+** xInstToken() API.
+*/
+struct Fts5TokenDataMap {
+ i64 iRowid; /* Row this token is located in */
+ i64 iPos; /* Position of token */
+ int iIter; /* Iterator token was read from */
+};
+
+/*
+** An object used to supplement Fts5Iter for tokendata=1 iterators.
+*/
+struct Fts5TokenDataIter {
+ int nIter;
+ int nIterAlloc;
+
+ int nMap;
+ int nMapAlloc;
+ Fts5TokenDataMap *aMap;
+
+ Fts5PoslistReader *aPoslistReader;
+ int *aPoslistToIter;
+ Fts5Iter *apIter[1];
+};
+
+/*
+** This function appends iterator pAppend to Fts5TokenDataIter pIn and
+** returns the result.
+*/
+static Fts5TokenDataIter *fts5AppendTokendataIter(
+ Fts5Index *p, /* Index object (for error code) */
+ Fts5TokenDataIter *pIn, /* Current Fts5TokenDataIter struct */
+ Fts5Iter *pAppend /* Append this iterator */
+){
+ Fts5TokenDataIter *pRet = pIn;
+
+ if( p->rc==SQLITE_OK ){
+ if( pIn==0 || pIn->nIter==pIn->nIterAlloc ){
+ int nAlloc = pIn ? pIn->nIterAlloc*2 : 16;
+ int nByte = nAlloc * sizeof(Fts5Iter*) + sizeof(Fts5TokenDataIter);
+ Fts5TokenDataIter *pNew = (Fts5TokenDataIter*)sqlite3_realloc(pIn, nByte);
+
+ if( pNew==0 ){
+ p->rc = SQLITE_NOMEM;
+ }else{
+ if( pIn==0 ) memset(pNew, 0, nByte);
+ pRet = pNew;
+ pNew->nIterAlloc = nAlloc;
+ }
+ }
+ }
+ if( p->rc ){
+ sqlite3Fts5IterClose((Fts5IndexIter*)pAppend);
+ }else{
+ pRet->apIter[pRet->nIter++] = pAppend;
+ }
+ assert( pRet==0 || pRet->nIter<=pRet->nIterAlloc );
+
+ return pRet;
+}
+
+/*
+** Delete an Fts5TokenDataIter structure and its contents.
+*/
+static void fts5TokendataIterDelete(Fts5TokenDataIter *pSet){
+ if( pSet ){
+ int ii;
+ for(ii=0; ii<pSet->nIter; ii++){
+ fts5MultiIterFree(pSet->apIter[ii]);
+ }
+ sqlite3_free(pSet->aPoslistReader);
+ sqlite3_free(pSet->aMap);
+ sqlite3_free(pSet);
+ }
+}
+
+/*
+** Append a mapping to the token-map belonging to object pT.
+*/
+static void fts5TokendataIterAppendMap(
+ Fts5Index *p,
+ Fts5TokenDataIter *pT,
+ int iIter,
+ i64 iRowid,
+ i64 iPos
+){
+ if( p->rc==SQLITE_OK ){
+ if( pT->nMap==pT->nMapAlloc ){
+ int nNew = pT->nMapAlloc ? pT->nMapAlloc*2 : 64;
+ int nByte = nNew * sizeof(Fts5TokenDataMap);
+ Fts5TokenDataMap *aNew;
+
+ aNew = (Fts5TokenDataMap*)sqlite3_realloc(pT->aMap, nByte);
+ if( aNew==0 ){
+ p->rc = SQLITE_NOMEM;
+ return;
+ }
+
+ pT->aMap = aNew;
+ pT->nMapAlloc = nNew;
+ }
+
+ pT->aMap[pT->nMap].iRowid = iRowid;
+ pT->aMap[pT->nMap].iPos = iPos;
+ pT->aMap[pT->nMap].iIter = iIter;
+ pT->nMap++;
+ }
+}
+
+/*
+** The iterator passed as the only argument must be a tokendata=1 iterator
+** (pIter->pTokenDataIter!=0). This function sets the iterator output
+** variables (pIter->base.*) according to the contents of the current
+** row.
+*/
+static void fts5IterSetOutputsTokendata(Fts5Iter *pIter){
+ int ii;
+ int nHit = 0;
+ i64 iRowid = SMALLEST_INT64;
+ int iMin = 0;
+
+ Fts5TokenDataIter *pT = pIter->pTokenDataIter;
+
+ pIter->base.nData = 0;
+ pIter->base.pData = 0;
+
+ for(ii=0; ii<pT->nIter; ii++){
+ Fts5Iter *p = pT->apIter[ii];
+ if( p->base.bEof==0 ){
+ if( nHit==0 || p->base.iRowid<iRowid ){
+ iRowid = p->base.iRowid;
+ nHit = 1;
+ pIter->base.pData = p->base.pData;
+ pIter->base.nData = p->base.nData;
+ iMin = ii;
+ }else if( p->base.iRowid==iRowid ){
+ nHit++;
+ }
+ }
+ }
+
+ if( nHit==0 ){
+ pIter->base.bEof = 1;
+ }else{
+ int eDetail = pIter->pIndex->pConfig->eDetail;
+ pIter->base.bEof = 0;
+ pIter->base.iRowid = iRowid;
+
+ if( nHit==1 && eDetail==FTS5_DETAIL_FULL ){
+ fts5TokendataIterAppendMap(pIter->pIndex, pT, iMin, iRowid, -1);
+ }else
+ if( nHit>1 && eDetail!=FTS5_DETAIL_NONE ){
+ int nReader = 0;
+ int nByte = 0;
+ i64 iPrev = 0;
+
+ /* Allocate array of iterators if they are not already allocated. */
+ if( pT->aPoslistReader==0 ){
+ int nByte = pT->nIter * (sizeof(Fts5PoslistReader) + sizeof(int));
+ pT->aPoslistReader = (Fts5PoslistReader*)sqlite3Fts5MallocZero(
+ &pIter->pIndex->rc, nByte
+ );
+ if( pT->aPoslistReader==0 ) return;
+ pT->aPoslistToIter = (int*)&pT->aPoslistReader[pT->nIter];
+ }
+
+ /* Populate an iterator for each poslist that will be merged */
+ for(ii=0; ii<pT->nIter; ii++){
+ Fts5Iter *p = pT->apIter[ii];
+ if( iRowid==p->base.iRowid ){
+ pT->aPoslistToIter[nReader] = ii;
+ sqlite3Fts5PoslistReaderInit(
+ p->base.pData, p->base.nData, &pT->aPoslistReader[nReader++]
+ );
+ nByte += p->base.nData;
+ }
+ }
+
+ /* Ensure the output buffer is large enough */
+ if( fts5BufferGrow(&pIter->pIndex->rc, &pIter->poslist, nByte+nHit*10) ){
+ return;
+ }
+
+ /* Ensure the token-mapping is large enough */
+ if( eDetail==FTS5_DETAIL_FULL && pT->nMapAlloc<(pT->nMap + nByte) ){
+ int nNew = (pT->nMapAlloc + nByte) * 2;
+ Fts5TokenDataMap *aNew = (Fts5TokenDataMap*)sqlite3_realloc(
+ pT->aMap, nNew*sizeof(Fts5TokenDataMap)
+ );
+ if( aNew==0 ){
+ pIter->pIndex->rc = SQLITE_NOMEM;
+ return;
+ }
+ pT->aMap = aNew;
+ pT->nMapAlloc = nNew;
+ }
+
+ pIter->poslist.n = 0;
+
+ while( 1 ){
+ i64 iMinPos = LARGEST_INT64;
+
+ /* Find smallest position */
+ iMin = 0;
+ for(ii=0; ii<nReader; ii++){
+ Fts5PoslistReader *pReader = &pT->aPoslistReader[ii];
+ if( pReader->bEof==0 ){
+ if( pReader->iPos<iMinPos ){
+ iMinPos = pReader->iPos;
+ iMin = ii;
+ }
+ }
+ }
+
+ /* If all readers were at EOF, break out of the loop. */
+ if( iMinPos==LARGEST_INT64 ) break;
+
+ sqlite3Fts5PoslistSafeAppend(&pIter->poslist, &iPrev, iMinPos);
+ sqlite3Fts5PoslistReaderNext(&pT->aPoslistReader[iMin]);
+
+ if( eDetail==FTS5_DETAIL_FULL ){
+ pT->aMap[pT->nMap].iPos = iMinPos;
+ pT->aMap[pT->nMap].iIter = pT->aPoslistToIter[iMin];
+ pT->aMap[pT->nMap].iRowid = iRowid;
+ pT->nMap++;
+ }
+ }
+
+ pIter->base.pData = pIter->poslist.p;
+ pIter->base.nData = pIter->poslist.n;
+ }
+ }
+}
+
+/*
+** The iterator passed as the only argument must be a tokendata=1 iterator
+** (pIter->pTokenDataIter!=0). This function advances the iterator. If
+** argument bFrom is false, then the iterator is advanced to the next
+** entry. Or, if bFrom is true, it is advanced to the first entry with
+** a rowid of iFrom or greater.
+*/
+static void fts5TokendataIterNext(Fts5Iter *pIter, int bFrom, i64 iFrom){
+ int ii;
+ Fts5TokenDataIter *pT = pIter->pTokenDataIter;
+
+ for(ii=0; ii<pT->nIter; ii++){
+ Fts5Iter *p = pT->apIter[ii];
+ if( p->base.bEof==0
+ && (p->base.iRowid==pIter->base.iRowid || (bFrom && p->base.iRowid<iFrom))
+ ){
+ fts5MultiIterNext(p->pIndex, p, bFrom, iFrom);
+ while( bFrom && p->base.bEof==0
+ && p->base.iRowid<iFrom
+ && p->pIndex->rc==SQLITE_OK
+ ){
+ fts5MultiIterNext(p->pIndex, p, 0, 0);
+ }
+ }
+ }
+
+ fts5IterSetOutputsTokendata(pIter);
+}
+
+/*
+** If the segment-iterator passed as the first argument is at EOF, then
+** set pIter->term to a copy of buffer pTerm.
+*/
+static void fts5TokendataSetTermIfEof(Fts5Iter *pIter, Fts5Buffer *pTerm){
+ if( pIter && pIter->aSeg[0].pLeaf==0 ){
+ fts5BufferSet(&pIter->pIndex->rc, &pIter->aSeg[0].term, pTerm->n, pTerm->p);
+ }
+}
+
+/*
+** This function sets up an iterator to use for a non-prefix query on a
+** tokendata=1 table.
+*/
+static Fts5Iter *fts5SetupTokendataIter(
+ Fts5Index *p, /* FTS index to query */
+ const u8 *pToken, /* Buffer containing query term */
+ int nToken, /* Size of buffer pToken in bytes */
+ Fts5Colset *pColset /* Colset to filter on */
+){
+ Fts5Iter *pRet = 0;
+ Fts5TokenDataIter *pSet = 0;
+ Fts5Structure *pStruct = 0;
+ const int flags = FTS5INDEX_QUERY_SCANONETERM | FTS5INDEX_QUERY_SCAN;
+
+ Fts5Buffer bSeek = {0, 0, 0};
+ Fts5Buffer *pSmall = 0;
+
+ fts5IndexFlush(p);
+ pStruct = fts5StructureRead(p);
+
+ while( 1 ){
+ Fts5Iter *pPrev = pSet ? pSet->apIter[pSet->nIter-1] : 0;
+ Fts5Iter *pNew = 0;
+ Fts5SegIter *pNewIter = 0;
+ Fts5SegIter *pPrevIter = 0;
+
+ int iLvl, iSeg, ii;
+
+ pNew = fts5MultiIterAlloc(p, pStruct->nSegment);
+ if( pNew==0 ) break;
+
+ if( pSmall ){
+ fts5BufferSet(&p->rc, &bSeek, pSmall->n, pSmall->p);
+ fts5BufferAppendBlob(&p->rc, &bSeek, 1, (const u8*)"\0");
+ }else{
+ fts5BufferSet(&p->rc, &bSeek, nToken, pToken);
+ }
+
+ pNewIter = &pNew->aSeg[0];
+ pPrevIter = (pPrev ? &pPrev->aSeg[0] : 0);
+ for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
+ for(iSeg=pStruct->aLevel[iLvl].nSeg-1; iSeg>=0; iSeg--){
+ Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
+ int bDone = 0;
+
+ if( pPrevIter ){
+ if( fts5BufferCompare(pSmall, &pPrevIter->term) ){
+ memcpy(pNewIter, pPrevIter, sizeof(Fts5SegIter));
+ memset(pPrevIter, 0, sizeof(Fts5SegIter));
+ bDone = 1;
+ }else if( pPrevIter->iEndofDoclist>pPrevIter->pLeaf->szLeaf ){
+ fts5SegIterNextInit(p,(const char*)bSeek.p,bSeek.n-1,pSeg,pNewIter);
+ bDone = 1;
+ }
+ }
+
+ if( bDone==0 ){
+ fts5SegIterSeekInit(p, bSeek.p, bSeek.n, flags, pSeg, pNewIter);
+ }
+
+ if( pPrevIter ){
+ if( pPrevIter->pTombArray ){
+ pNewIter->pTombArray = pPrevIter->pTombArray;
+ pNewIter->pTombArray->nRef++;
+ }
+ }else{
+ fts5SegIterAllocTombstone(p, pNewIter);
+ }
+
+ pNewIter++;
+ if( pPrevIter ) pPrevIter++;
+ }
+ }
+ fts5TokendataSetTermIfEof(pPrev, pSmall);
+
+ pNew->bSkipEmpty = 1;
+ pNew->pColset = pColset;
+ fts5IterSetOutputCb(&p->rc, pNew);
+
+ /* Loop through all segments in the new iterator. Find the smallest
+ ** term that any segment-iterator points to. Iterator pNew will be
+ ** used for this term. Also, set any iterator that points to a term that
+ ** does not match pToken/nToken to point to EOF */
+ pSmall = 0;
+ for(ii=0; ii<pNew->nSeg; ii++){
+ Fts5SegIter *pII = &pNew->aSeg[ii];
+ if( 0==fts5IsTokendataPrefix(&pII->term, pToken, nToken) ){
+ fts5SegIterSetEOF(pII);
+ }
+ if( pII->pLeaf && (!pSmall || fts5BufferCompare(pSmall, &pII->term)>0) ){
+ pSmall = &pII->term;
+ }
+ }
+
+ /* If pSmall is still NULL at this point, then the new iterator does
+ ** not point to any terms that match the query. So delete it and break
+ ** out of the loop - all required iterators have been collected. */
+ if( pSmall==0 ){
+ sqlite3Fts5IterClose((Fts5IndexIter*)pNew);
+ break;
+ }
+
+ /* Append this iterator to the set and continue. */
+ pSet = fts5AppendTokendataIter(p, pSet, pNew);
+ }
+
+ if( p->rc==SQLITE_OK && pSet ){
+ int ii;
+ for(ii=0; ii<pSet->nIter; ii++){
+ Fts5Iter *pIter = pSet->apIter[ii];
+ int iSeg;
+ for(iSeg=0; iSeg<pIter->nSeg; iSeg++){
+ pIter->aSeg[iSeg].flags |= FTS5_SEGITER_ONETERM;
+ }
+ fts5MultiIterFinishSetup(p, pIter);
+ }
+ }
+
+ if( p->rc==SQLITE_OK ){
+ pRet = fts5MultiIterAlloc(p, 0);
+ }
+ if( pRet ){
+ pRet->pTokenDataIter = pSet;
+ if( pSet ){
+ fts5IterSetOutputsTokendata(pRet);
+ }else{
+ pRet->base.bEof = 1;
+ }
+ }else{
+ fts5TokendataIterDelete(pSet);
+ }
+
+ fts5StructureRelease(pStruct);
+ fts5BufferFree(&bSeek);
+ return pRet;
+}
+
+
+/*
** Open a new iterator to iterate though all rowid that match the
** specified token or token prefix.
*/
@@ -6448,6 +7018,7 @@ int sqlite3Fts5IndexQuery(
if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){
int iIdx = 0; /* Index to search */
int iPrefixIdx = 0; /* +1 prefix index */
+ int bTokendata = pConfig->bTokendata;
if( nToken>0 ) memcpy(&buf.p[1], pToken, nToken);
/* Figure out which index to search and set iIdx accordingly. If this
@@ -6461,6 +7032,7 @@ int sqlite3Fts5IndexQuery(
** for internal sanity checking by the integrity-check in debug
** mode only. */
#ifdef SQLITE_DEBUG
+ if( flags & FTS5INDEX_QUERY_NOTOKENDATA ) bTokendata = 0;
if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){
assert( flags & FTS5INDEX_QUERY_PREFIX );
iIdx = 1+pConfig->nPrefix;
@@ -6475,7 +7047,10 @@ int sqlite3Fts5IndexQuery(
}
}
- if( iIdx<=pConfig->nPrefix ){
+ if( bTokendata && iIdx==0 ){
+ buf.p[0] = '0';
+ pRet = fts5SetupTokendataIter(p, buf.p, nToken+1, pColset);
+ }else if( iIdx<=pConfig->nPrefix ){
/* Straight index lookup */
Fts5Structure *pStruct = fts5StructureRead(p);
buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx);
@@ -6522,7 +7097,11 @@ int sqlite3Fts5IndexQuery(
int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){
Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
assert( pIter->pIndex->rc==SQLITE_OK );
- fts5MultiIterNext(pIter->pIndex, pIter, 0, 0);
+ if( pIter->pTokenDataIter ){
+ fts5TokendataIterNext(pIter, 0, 0);
+ }else{
+ fts5MultiIterNext(pIter->pIndex, pIter, 0, 0);
+ }
return fts5IndexReturn(pIter->pIndex);
}
@@ -6555,7 +7134,11 @@ int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){
*/
int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){
Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
- fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch);
+ if( pIter->pTokenDataIter ){
+ fts5TokendataIterNext(pIter, 1, iMatch);
+ }else{
+ fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch);
+ }
return fts5IndexReturn(pIter->pIndex);
}
@@ -6571,12 +7154,106 @@ const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){
}
/*
+** This is used by xInstToken() to access the token at offset iOff, column
+** iCol of row iRowid. The token is returned via output variables *ppOut
+** and *pnOut. The iterator passed as the first argument must be a tokendata=1
+** iterator (pIter->pTokenDataIter!=0).
+*/
+int sqlite3Fts5IterToken(
+ Fts5IndexIter *pIndexIter,
+ i64 iRowid,
+ int iCol,
+ int iOff,
+ const char **ppOut, int *pnOut
+){
+ Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
+ Fts5TokenDataIter *pT = pIter->pTokenDataIter;
+ Fts5TokenDataMap *aMap = pT->aMap;
+ i64 iPos = (((i64)iCol)<<32) + iOff;
+
+ int i1 = 0;
+ int i2 = pT->nMap;
+ int iTest = 0;
+
+ while( i2>i1 ){
+ iTest = (i1 + i2) / 2;
+
+ if( aMap[iTest].iRowid<iRowid ){
+ i1 = iTest+1;
+ }else if( aMap[iTest].iRowid>iRowid ){
+ i2 = iTest;
+ }else{
+ if( aMap[iTest].iPos<iPos ){
+ if( aMap[iTest].iPos<0 ){
+ break;
+ }
+ i1 = iTest+1;
+ }else if( aMap[iTest].iPos>iPos ){
+ i2 = iTest;
+ }else{
+ break;
+ }
+ }
+ }
+
+ if( i2>i1 ){
+ Fts5Iter *pMap = pT->apIter[aMap[iTest].iIter];
+ *ppOut = (const char*)pMap->aSeg[0].term.p+1;
+ *pnOut = pMap->aSeg[0].term.n-1;
+ }
+
+ return SQLITE_OK;
+}
+
+/*
+** Clear any existing entries from the token-map associated with the
+** iterator passed as the only argument.
+*/
+void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter *pIndexIter){
+ Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
+ if( pIter->pTokenDataIter ){
+ pIter->pTokenDataIter->nMap = 0;
+ }
+}
+
+/*
+** Set a token-mapping for the iterator passed as the first argument. This
+** is used in detail=column or detail=none mode when a token is requested
+** using the xInstToken() API. In this case the caller tokenizers the
+** current row and configures the token-mapping via multiple calls to this
+** function.
+*/
+int sqlite3Fts5IndexIterWriteTokendata(
+ Fts5IndexIter *pIndexIter,
+ const char *pToken, int nToken,
+ i64 iRowid, int iCol, int iOff
+){
+ Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
+ Fts5TokenDataIter *pT = pIter->pTokenDataIter;
+ Fts5Index *p = pIter->pIndex;
+ int ii;
+
+ assert( p->pConfig->eDetail!=FTS5_DETAIL_FULL );
+ assert( pIter->pTokenDataIter );
+
+ for(ii=0; ii<pT->nIter; ii++){
+ Fts5Buffer *pTerm = &pT->apIter[ii]->aSeg[0].term;
+ if( nToken==pTerm->n-1 && memcmp(pToken, pTerm->p+1, nToken)==0 ) break;
+ }
+ if( ii<pT->nIter ){
+ fts5TokendataIterAppendMap(p, pT, ii, iRowid, (((i64)iCol)<<32) + iOff);
+ }
+ return fts5IndexReturn(p);
+}
+
+/*
** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
*/
void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){
if( pIndexIter ){
Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
Fts5Index *pIndex = pIter->pIndex;
+ fts5TokendataIterDelete(pIter->pTokenDataIter);
fts5MultiIterFree(pIter);
sqlite3Fts5IndexCloseReader(pIndex);
}
@@ -7084,7 +7761,9 @@ static int fts5QueryCksum(
int eDetail = p->pConfig->eDetail;
u64 cksum = *pCksum;
Fts5IndexIter *pIter = 0;
- int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter);
+ int rc = sqlite3Fts5IndexQuery(
+ p, z, n, (flags | FTS5INDEX_QUERY_NOTOKENDATA), 0, &pIter
+ );
while( rc==SQLITE_OK && ALWAYS(pIter!=0) && 0==sqlite3Fts5IterEof(pIter) ){
i64 rowid = pIter->iRowid;
@@ -7783,6 +8462,24 @@ static void fts5DecodeRowidList(
#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
+static void fts5BufferAppendTerm(int *pRc, Fts5Buffer *pBuf, Fts5Buffer *pTerm){
+ int ii;
+ fts5BufferGrow(pRc, pBuf, pTerm->n*2 + 1);
+ if( *pRc==SQLITE_OK ){
+ for(ii=0; ii<pTerm->n; ii++){
+ if( pTerm->p[ii]==0x00 ){
+ pBuf->p[pBuf->n++] = '\\';
+ pBuf->p[pBuf->n++] = '0';
+ }else{
+ pBuf->p[pBuf->n++] = pTerm->p[ii];
+ }
+ }
+ pBuf->p[pBuf->n] = 0x00;
+ }
+}
+#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
+
+#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
/*
** The implementation of user-defined scalar function fts5_decode().
*/
@@ -7889,9 +8586,8 @@ static void fts5DecodeFunction(
iOff += fts5GetVarint32(&a[iOff], nAppend);
term.n = nKeep;
fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff]);
- sqlite3Fts5BufferAppendPrintf(
- &rc, &s, " term=%.*s", term.n, (const char*)term.p
- );
+ sqlite3Fts5BufferAppendPrintf(&rc, &s, " term=");
+ fts5BufferAppendTerm(&rc, &s, &term);
iOff += nAppend;
/* Figure out where the doclist for this term ends */
@@ -7999,9 +8695,8 @@ static void fts5DecodeFunction(
fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
iOff += nByte;
- sqlite3Fts5BufferAppendPrintf(
- &rc, &s, " term=%.*s", term.n, (const char*)term.p
- );
+ sqlite3Fts5BufferAppendPrintf(&rc, &s, " term=");
+ fts5BufferAppendTerm(&rc, &s, &term);
iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff);
}
diff --git a/ext/fts5/fts5_main.c b/ext/fts5/fts5_main.c
index 6e86ca595..d35e998da 100644
--- a/ext/fts5/fts5_main.c
+++ b/ext/fts5/fts5_main.c
@@ -656,12 +656,15 @@ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
}
idxStr[iIdxStr] = '\0';
- /* Set idxFlags flags for the ORDER BY clause */
+ /* Set idxFlags flags for the ORDER BY clause
+ **
+ ** Note that tokendata=1 tables cannot currently handle "ORDER BY rowid DESC".
+ */
if( pInfo->nOrderBy==1 ){
int iSort = pInfo->aOrderBy[0].iColumn;
if( iSort==(pConfig->nCol+1) && bSeenMatch ){
idxFlags |= FTS5_BI_ORDER_RANK;
- }else if( iSort==-1 ){
+ }else if( iSort==-1 && (!pInfo->aOrderBy[0].desc || !pConfig->bTokendata) ){
idxFlags |= FTS5_BI_ORDER_ROWID;
}
if( BitFlagTest(idxFlags, FTS5_BI_ORDER_RANK|FTS5_BI_ORDER_ROWID) ){
@@ -913,6 +916,16 @@ static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){
);
assert( !CsrFlagTest(pCsr, FTS5CSR_EOF) );
+ /* If this cursor uses FTS5_PLAN_MATCH and this is a tokendata=1 table,
+ ** clear any token mappings accumulated at the fts5_index.c level. In
+ ** other cases, specifically FTS5_PLAN_SOURCE and FTS5_PLAN_SORTED_MATCH,
+ ** we need to retain the mappings for the entire query. */
+ if( pCsr->ePlan==FTS5_PLAN_MATCH
+ && ((Fts5Table*)pCursor->pVtab)->pConfig->bTokendata
+ ){
+ sqlite3Fts5ExprClearTokens(pCsr->pExpr);
+ }
+
if( pCsr->ePlan<3 ){
int bSkip = 0;
if( (rc = fts5CursorReseek(pCsr, &bSkip)) || bSkip ) return rc;
@@ -2063,12 +2076,6 @@ static int fts5ApiInst(
){
if( iIdx<0 || iIdx>=pCsr->nInstCount ){
rc = SQLITE_RANGE;
-#if 0
- }else if( fts5IsOffsetless((Fts5Table*)pCsr->base.pVtab) ){
- *piPhrase = pCsr->aInst[iIdx*3];
- *piCol = pCsr->aInst[iIdx*3 + 2];
- *piOff = -1;
-#endif
}else{
*piPhrase = pCsr->aInst[iIdx*3];
*piCol = pCsr->aInst[iIdx*3 + 1];
@@ -2323,13 +2330,56 @@ static int fts5ApiPhraseFirstColumn(
return rc;
}
+/*
+** xQueryToken() API implemenetation.
+*/
+static int fts5ApiQueryToken(
+ Fts5Context* pCtx,
+ int iPhrase,
+ int iToken,
+ const char **ppOut,
+ int *pnOut
+){
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
+ return sqlite3Fts5ExprQueryToken(pCsr->pExpr, iPhrase, iToken, ppOut, pnOut);
+}
+
+/*
+** xInstToken() API implemenetation.
+*/
+static int fts5ApiInstToken(
+ Fts5Context *pCtx,
+ int iIdx,
+ int iToken,
+ const char **ppOut, int *pnOut
+){
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
+ int rc = SQLITE_OK;
+ if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)==0
+ || SQLITE_OK==(rc = fts5CacheInstArray(pCsr))
+ ){
+ if( iIdx<0 || iIdx>=pCsr->nInstCount ){
+ rc = SQLITE_RANGE;
+ }else{
+ int iPhrase = pCsr->aInst[iIdx*3];
+ int iCol = pCsr->aInst[iIdx*3 + 1];
+ int iOff = pCsr->aInst[iIdx*3 + 2];
+ i64 iRowid = fts5CursorRowid(pCsr);
+ rc = sqlite3Fts5ExprInstToken(
+ pCsr->pExpr, iRowid, iPhrase, iCol, iOff, iToken, ppOut, pnOut
+ );
+ }
+ }
+ return rc;
+}
+
static int fts5ApiQueryPhrase(Fts5Context*, int, void*,
int(*)(const Fts5ExtensionApi*, Fts5Context*, void*)
);
static const Fts5ExtensionApi sFts5Api = {
- 2, /* iVersion */
+ 3, /* iVersion */
fts5ApiUserData,
fts5ApiColumnCount,
fts5ApiRowCount,
@@ -2349,6 +2399,8 @@ static const Fts5ExtensionApi sFts5Api = {
fts5ApiPhraseNext,
fts5ApiPhraseFirstColumn,
fts5ApiPhraseNextColumn,
+ fts5ApiQueryToken,
+ fts5ApiInstToken
};
/*
diff --git a/ext/fts5/fts5_tcl.c b/ext/fts5/fts5_tcl.c
index 80c600dbb..853a41865 100644
--- a/ext/fts5/fts5_tcl.c
+++ b/ext/fts5/fts5_tcl.c
@@ -244,6 +244,9 @@ static int SQLITE_TCLAPI xF5tApi(
{ "xGetAuxdataInt", 1, "CLEAR" }, /* 15 */
{ "xPhraseForeach", 4, "IPHRASE COLVAR OFFVAR SCRIPT" }, /* 16 */
{ "xPhraseColumnForeach", 3, "IPHRASE COLVAR SCRIPT" }, /* 17 */
+
+ { "xQueryToken", 2, "IPHRASE ITERM" }, /* 18 */
+ { "xInstToken", 2, "IDX ITERM" }, /* 19 */
{ 0, 0, 0}
};
@@ -500,6 +503,38 @@ static int SQLITE_TCLAPI xF5tApi(
break;
}
+ CASE(18, "xQueryToken") {
+ const char *pTerm = 0;
+ int nTerm = 0;
+ int iPhrase = 0;
+ int iTerm = 0;
+
+ if( Tcl_GetIntFromObj(interp, objv[2], &iPhrase) ) return TCL_ERROR;
+ if( Tcl_GetIntFromObj(interp, objv[3], &iTerm) ) return TCL_ERROR;
+ rc = p->pApi->xQueryToken(p->pFts, iPhrase, iTerm, &pTerm, &nTerm);
+ if( rc==SQLITE_OK ){
+ Tcl_SetObjResult(interp, Tcl_NewStringObj(pTerm, nTerm));
+ }
+
+ break;
+ }
+
+ CASE(19, "xInstToken") {
+ const char *pTerm = 0;
+ int nTerm = 0;
+ int iIdx = 0;
+ int iTerm = 0;
+
+ if( Tcl_GetIntFromObj(interp, objv[2], &iIdx) ) return TCL_ERROR;
+ if( Tcl_GetIntFromObj(interp, objv[3], &iTerm) ) return TCL_ERROR;
+ rc = p->pApi->xInstToken(p->pFts, iIdx, iTerm, &pTerm, &nTerm);
+ if( rc==SQLITE_OK ){
+ Tcl_SetObjResult(interp, Tcl_NewStringObj(pTerm, nTerm));
+ }
+
+ break;
+ }
+
default:
assert( 0 );
break;
@@ -1117,6 +1152,176 @@ static int SQLITE_TCLAPI f5tRegisterTok(
return TCL_OK;
}
+typedef struct OriginTextCtx OriginTextCtx;
+struct OriginTextCtx {
+ sqlite3 *db;
+ fts5_api *pApi;
+};
+
+typedef struct OriginTextTokenizer OriginTextTokenizer;
+struct OriginTextTokenizer {
+ Fts5Tokenizer *pTok; /* Underlying tokenizer object */
+ fts5_tokenizer tokapi; /* API implementation for pTok */
+};
+
+/*
+** Delete the OriginTextCtx object indicated by the only argument.
+*/
+static void f5tOrigintextTokenizerDelete(void *pCtx){
+ OriginTextCtx *p = (OriginTextCtx*)pCtx;
+ ckfree(p);
+}
+
+static int f5tOrigintextCreate(
+ void *pCtx,
+ const char **azArg,
+ int nArg,
+ Fts5Tokenizer **ppOut
+){
+ OriginTextCtx *p = (OriginTextCtx*)pCtx;
+ OriginTextTokenizer *pTok = 0;
+ void *pTokCtx = 0;
+ int rc = SQLITE_OK;
+
+ pTok = (OriginTextTokenizer*)sqlite3_malloc(sizeof(OriginTextTokenizer));
+ if( pTok==0 ){
+ rc = SQLITE_NOMEM;
+ }else if( nArg<1 ){
+ rc = SQLITE_ERROR;
+ }else{
+ /* Locate the underlying tokenizer */
+ rc = p->pApi->xFindTokenizer(p->pApi, azArg[0], &pTokCtx, &pTok->tokapi);
+ }
+
+ /* Create the new tokenizer instance */
+ if( rc==SQLITE_OK ){
+ rc = pTok->tokapi.xCreate(pTokCtx, &azArg[1], nArg-1, &pTok->pTok);
+ }
+
+ if( rc!=SQLITE_OK ){
+ sqlite3_free(pTok);
+ pTok = 0;
+ }
+ *ppOut = (Fts5Tokenizer*)pTok;
+ return rc;
+}
+
+static void f5tOrigintextDelete(Fts5Tokenizer *pTokenizer){
+ OriginTextTokenizer *p = (OriginTextTokenizer*)pTokenizer;
+ if( p->pTok ){
+ p->tokapi.xDelete(p->pTok);
+ }
+ sqlite3_free(p);
+}
+
+typedef struct OriginTextCb OriginTextCb;
+struct OriginTextCb {
+ void *pCtx;
+ const char *pText;
+ int nText;
+ int (*xToken)(void *, int, const char *, int, int, int);
+
+ char *aBuf; /* Buffer to use */
+ int nBuf; /* Allocated size of aBuf[] */
+};
+
+static int xOriginToken(
+ void *pCtx, /* Copy of 2nd argument to xTokenize() */
+ int tflags, /* Mask of FTS5_TOKEN_* flags */
+ const char *pToken, /* Pointer to buffer containing token */
+ int nToken, /* Size of token in bytes */
+ int iStart, /* Byte offset of token within input text */
+ int iEnd /* Byte offset of end of token within input */
+){
+ OriginTextCb *p = (OriginTextCb*)pCtx;
+ int ret = 0;
+
+ if( nToken==(iEnd-iStart) && 0==memcmp(pToken, &p->pText[iStart], nToken) ){
+ /* Token exactly matches document text. Pass it through as is. */
+ ret = p->xToken(p->pCtx, tflags, pToken, nToken, iStart, iEnd);
+ }else{
+ int nReq = nToken + 1 + (iEnd-iStart);
+ if( nReq>p->nBuf ){
+ sqlite3_free(p->aBuf);
+ p->aBuf = sqlite3_malloc(nReq*2);
+ if( p->aBuf==0 ) return SQLITE_NOMEM;
+ p->nBuf = nReq*2;
+ }
+
+ memcpy(p->aBuf, pToken, nToken);
+ p->aBuf[nToken] = '\0';
+ memcpy(&p->aBuf[nToken+1], &p->pText[iStart], iEnd-iStart);
+ ret = p->xToken(p->pCtx, tflags, p->aBuf, nReq, iStart, iEnd);
+ }
+
+ return ret;
+}
+
+
+static int f5tOrigintextTokenize(
+ Fts5Tokenizer *pTokenizer,
+ void *pCtx,
+ int flags, /* Mask of FTS5_TOKENIZE_* flags */
+ const char *pText, int nText,
+ int (*xToken)(void *, int, const char *, int, int, int)
+){
+ OriginTextTokenizer *p = (OriginTextTokenizer*)pTokenizer;
+ OriginTextCb cb;
+ int ret;
+
+ memset(&cb, 0, sizeof(cb));
+ cb.pCtx = pCtx;
+ cb.pText = pText;
+ cb.nText = nText;
+ cb.xToken = xToken;
+
+ ret = p->tokapi.xTokenize(p->pTok,(void*)&cb,flags,pText,nText,xOriginToken);
+ sqlite3_free(cb.aBuf);
+ return ret;
+}
+
+/*
+** sqlite3_fts5_register_origintext DB
+**
+** Description...
+*/
+static int SQLITE_TCLAPI f5tRegisterOriginText(
+ void * clientData,
+ Tcl_Interp *interp,
+ int objc,
+ Tcl_Obj *CONST objv[]
+){
+ sqlite3 *db = 0;
+ fts5_api *pApi = 0;
+ int rc;
+ fts5_tokenizer tok = {0, 0, 0};
+ OriginTextCtx *pCtx = 0;
+
+ if( objc!=2 ){
+ Tcl_WrongNumArgs(interp, 1, objv, "DB");
+ return TCL_ERROR;
+ }
+ if( f5tDbAndApi(interp, objv[1], &db, &pApi) ) return TCL_ERROR;
+
+ pCtx = (OriginTextCtx*)ckalloc(sizeof(OriginTextCtx));
+ pCtx->db = db;
+ pCtx->pApi = pApi;
+
+ tok.xCreate = f5tOrigintextCreate;
+ tok.xDelete = f5tOrigintextDelete;
+ tok.xTokenize = f5tOrigintextTokenize;
+ rc = pApi->xCreateTokenizer(
+ pApi, "origintext", (void*)pCtx, &tok, f5tOrigintextTokenizerDelete
+ );
+
+ Tcl_ResetResult(interp);
+ if( rc!=SQLITE_OK ){
+ Tcl_AppendResult(interp, "error: ", sqlite3_errmsg(db), 0);
+ return TCL_ERROR;
+ }
+ return TCL_OK;
+}
+
/*
** Entry point.
*/
@@ -1133,7 +1338,8 @@ int Fts5tcl_Init(Tcl_Interp *interp){
{ "sqlite3_fts5_may_be_corrupt", f5tMayBeCorrupt, 0 },
{ "sqlite3_fts5_token_hash", f5tTokenHash, 0 },
{ "sqlite3_fts5_register_matchinfo", f5tRegisterMatchinfo, 0 },
- { "sqlite3_fts5_register_fts5tokenize", f5tRegisterTok, 0 }
+ { "sqlite3_fts5_register_fts5tokenize", f5tRegisterTok, 0 },
+ { "sqlite3_fts5_register_origintext",f5tRegisterOriginText, 0 }
};
int i;
F5tTokenizerContext *pContext;
diff --git a/ext/fts5/test/fts5_common.tcl b/ext/fts5/test/fts5_common.tcl
index 9c012932d..001cad1de 100644
--- a/ext/fts5/test/fts5_common.tcl
+++ b/ext/fts5/test/fts5_common.tcl
@@ -438,6 +438,20 @@ proc detail_is_none {} { detail_check ; expr {$::detail == "none"} }
proc detail_is_col {} { detail_check ; expr {$::detail == "col" } }
proc detail_is_full {} { detail_check ; expr {$::detail == "full"} }
+proc foreach_tokenizer_mode {prefix script} {
+ set saved $::testprefix
+ foreach {d mapping} {
+ "" {}
+ "-origintext" {, tokenize="origintext unicode61", tokendata=1}
+ } {
+ set s [string map [list %TOKENIZER% $mapping] $script]
+ set ::testprefix "$prefix$d"
+ reset_db
+ sqlite3_fts5_register_origintext db
+ uplevel $s
+ }
+ set ::testprefix $saved
+}
#-------------------------------------------------------------------------
# Convert a poslist of the type returned by fts5_test_poslist() to a
diff --git a/ext/fts5/test/fts5aa.test b/ext/fts5/test/fts5aa.test
index e1551fc51..a80a307a4 100644
--- a/ext/fts5/test/fts5aa.test
+++ b/ext/fts5/test/fts5aa.test
@@ -22,6 +22,7 @@ ifcapable !fts5 {
}
foreach_detail_mode $::testprefix {
+foreach_tokenizer_mode $::testprefix {
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b, c);
@@ -44,7 +45,7 @@ do_execsql_test 1.1 {
#
do_execsql_test 2.0 {
- CREATE VIRTUAL TABLE t1 USING fts5(x, y, detail=%DETAIL%);
+ CREATE VIRTUAL TABLE t1 USING fts5(x, y, detail=%DETAIL% %TOKENIZER%);
}
do_execsql_test 2.1 {
INSERT INTO t1 VALUES('a b c', 'd e f');
@@ -73,8 +74,9 @@ do_execsql_test 2.4 {
#-------------------------------------------------------------------------
#
reset_db
+sqlite3_fts5_register_origintext db
do_execsql_test 3.0 {
- CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%);
+ CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%);
}
foreach {i x y} {
1 {g f d b f} {h h e i a}
@@ -97,8 +99,9 @@ foreach {i x y} {
#-------------------------------------------------------------------------
#
reset_db
+sqlite3_fts5_register_origintext db
do_execsql_test 4.0 {
- CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%);
+ CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
foreach {i x y} {
@@ -121,8 +124,9 @@ foreach {i x y} {
#-------------------------------------------------------------------------
#
reset_db
+sqlite3_fts5_register_origintext db
do_execsql_test 5.0 {
- CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%);
+ CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
foreach {i x y} {
@@ -145,8 +149,9 @@ foreach {i x y} {
#-------------------------------------------------------------------------
#
reset_db
+sqlite3_fts5_register_origintext db
do_execsql_test 6.0 {
- CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%);
+ CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
@@ -181,6 +186,7 @@ do_execsql_test 6.6 {
#-------------------------------------------------------------------------
#
reset_db
+sqlite3_fts5_register_origintext db
expr srand(0)
do_execsql_test 7.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x,y,z);
@@ -222,6 +228,7 @@ for {set i 1} {$i <= 10} {incr i} {
#-------------------------------------------------------------------------
#
reset_db
+sqlite3_fts5_register_origintext db
do_execsql_test 8.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x, prefix="1,2,3");
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
@@ -236,6 +243,7 @@ do_execsql_test 8.1 {
#-------------------------------------------------------------------------
#
reset_db
+sqlite3_fts5_register_origintext db
expr srand(0)
@@ -280,8 +288,9 @@ for {set i 1} {$i <= 10} {incr i} {
#-------------------------------------------------------------------------
#
reset_db
+sqlite3_fts5_register_origintext db
do_execsql_test 10.0 {
- CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%);
+ CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%);
}
set d10 {
1 {g f d b f} {h h e i a}
@@ -314,19 +323,19 @@ do_execsql_test 10.4.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
#-------------------------------------------------------------------------
#
do_catchsql_test 11.1 {
- CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rank, detail=%DETAIL%);
+ CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rank, detail=%DETAIL% %TOKENIZER%);
} {1 {reserved fts5 column name: rank}}
do_catchsql_test 11.2 {
- CREATE VIRTUAL TABLE rank USING fts5(a, b, c, detail=%DETAIL%);
+ CREATE VIRTUAL TABLE rank USING fts5(a, b, c, detail=%DETAIL% %TOKENIZER%);
} {1 {reserved fts5 table name: rank}}
do_catchsql_test 11.3 {
- CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rowid, detail=%DETAIL%);
+ CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rowid, detail=%DETAIL% %TOKENIZER%);
} {1 {reserved fts5 column name: rowid}}
#-------------------------------------------------------------------------
#
do_execsql_test 12.1 {
- CREATE VIRTUAL TABLE t2 USING fts5(x,y, detail=%DETAIL%);
+ CREATE VIRTUAL TABLE t2 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%);
} {}
do_catchsql_test 12.2 {
@@ -341,8 +350,9 @@ do_test 12.3 {
#-------------------------------------------------------------------------
#
reset_db
+sqlite3_fts5_register_origintext db
do_execsql_test 13.1 {
- CREATE VIRTUAL TABLE t1 USING fts5(x, detail=%DETAIL%);
+ CREATE VIRTUAL TABLE t1 USING fts5(x, detail=%DETAIL% %TOKENIZER%);
INSERT INTO t1(rowid, x) VALUES(1, 'o n e'), (2, 't w o');
} {}
@@ -365,8 +375,9 @@ do_execsql_test 13.6 {
#-------------------------------------------------------------------------
#
reset_db
+sqlite3_fts5_register_origintext db
do_execsql_test 14.1 {
- CREATE VIRTUAL TABLE t1 USING fts5(x, y, detail=%DETAIL%);
+ CREATE VIRTUAL TABLE t1 USING fts5(x, y, detail=%DETAIL% %TOKENIZER%);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
WITH d(x,y) AS (
SELECT NULL, 'xyz xyz xyz xyz xyz xyz'
@@ -449,8 +460,9 @@ do_catchsql_test 16.2 {
#-------------------------------------------------------------------------
#
reset_db
+sqlite3_fts5_register_origintext db
do_execsql_test 17.1 {
- CREATE VIRTUAL TABLE b2 USING fts5(x, detail=%DETAIL%);
+ CREATE VIRTUAL TABLE b2 USING fts5(x, detail=%DETAIL% %TOKENIZER%);
INSERT INTO b2 VALUES('a');
INSERT INTO b2 VALUES('b');
INSERT INTO b2 VALUES('c');
@@ -466,8 +478,9 @@ do_test 17.2 {
if {[string match n* %DETAIL%]==0} {
reset_db
+ sqlite3_fts5_register_origintext db
do_execsql_test 17.3 {
- CREATE VIRTUAL TABLE c2 USING fts5(x, y, detail=%DETAIL%);
+ CREATE VIRTUAL TABLE c2 USING fts5(x, y, detail=%DETAIL% %TOKENIZER%);
INSERT INTO c2 VALUES('x x x', 'x x x');
SELECT rowid FROM c2 WHERE c2 MATCH 'y:x';
} {1}
@@ -476,8 +489,9 @@ if {[string match n* %DETAIL%]==0} {
#-------------------------------------------------------------------------
#
reset_db
+sqlite3_fts5_register_origintext db
do_execsql_test 17.1 {
- CREATE VIRTUAL TABLE uio USING fts5(ttt, detail=%DETAIL%);
+ CREATE VIRTUAL TABLE uio USING fts5(ttt, detail=%DETAIL% %TOKENIZER%);
INSERT INTO uio VALUES(NULL);
INSERT INTO uio SELECT NULL FROM uio;
INSERT INTO uio SELECT NULL FROM uio;
@@ -524,8 +538,8 @@ do_execsql_test 17.9 {
#--------------------------------------------------------------------
#
do_execsql_test 18.1 {
- CREATE VIRTUAL TABLE t1 USING fts5(a, b, detail=%DETAIL%);
- CREATE VIRTUAL TABLE t2 USING fts5(c, d, detail=%DETAIL%);
+ CREATE VIRTUAL TABLE t1 USING fts5(a, b, detail=%DETAIL% %TOKENIZER%);
+ CREATE VIRTUAL TABLE t2 USING fts5(c, d, detail=%DETAIL% %TOKENIZER%);
INSERT INTO t1 VALUES('abc*', NULL);
INSERT INTO t2 VALUES(1, 'abcdefg');
}
@@ -540,8 +554,9 @@ do_execsql_test 18.3 {
# fts5 table in the temp schema.
#
reset_db
+sqlite3_fts5_register_origintext db
do_execsql_test 19.0 {
- CREATE VIRTUAL TABLE temp.t1 USING fts5(x, detail=%DETAIL%);
+ CREATE VIRTUAL TABLE temp.t1 USING fts5(x, detail=%DETAIL% %TOKENIZER%);
INSERT INTO t1 VALUES('x y z');
INSERT INTO t1 VALUES('w x 1');
SELECT rowid FROM t1 WHERE t1 MATCH 'x';
@@ -551,8 +566,9 @@ do_execsql_test 19.0 {
# Test that 6 and 7 byte varints can be read.
#
reset_db
+sqlite3_fts5_register_origintext db
do_execsql_test 20.0 {
- CREATE VIRTUAL TABLE temp.tmp USING fts5(x, detail=%DETAIL%);
+ CREATE VIRTUAL TABLE temp.tmp USING fts5(x, detail=%DETAIL% %TOKENIZER%);
}
set ::ids [list \
0 [expr 1<<36] [expr 2<<36] [expr 1<<43] [expr 2<<43]
@@ -570,7 +586,7 @@ do_test 20.1 {
#
do_execsql_test 21.0 {
CREATE TEMP TABLE t8(a, b);
- CREATE VIRTUAL TABLE ft USING fts5(x, detail=%DETAIL%);
+ CREATE VIRTUAL TABLE ft USING fts5(x, detail=%DETAIL% %TOKENIZER%);
}
do_execsql_test 21.1 {
@@ -581,7 +597,7 @@ do_execsql_test 21.1 {
}
do_execsql_test 22.0 {
- CREATE VIRTUAL TABLE t9 USING fts5(x, detail=%DETAIL%);
+ CREATE VIRTUAL TABLE t9 USING fts5(x, detail=%DETAIL% %TOKENIZER%);
INSERT INTO t9(rowid, x) VALUES(2, 'bbb');
BEGIN;
INSERT INTO t9(rowid, x) VALUES(1, 'aaa');
@@ -596,7 +612,7 @@ do_execsql_test 22.1 {
#-------------------------------------------------------------------------
do_execsql_test 23.0 {
- CREATE VIRTUAL TABLE t10 USING fts5(x, detail=%DETAIL%);
+ CREATE VIRTUAL TABLE t10 USING fts5(x, detail=%DETAIL% %TOKENIZER%);
CREATE TABLE t11(x);
}
do_execsql_test 23.1 {
@@ -608,7 +624,7 @@ do_execsql_test 23.2 {
#-------------------------------------------------------------------------
do_execsql_test 24.0 {
- CREATE VIRTUAL TABLE t12 USING fts5(x, detail=%DETAIL%);
+ CREATE VIRTUAL TABLE t12 USING fts5(x, detail=%DETAIL% %TOKENIZER%);
INSERT INTO t12 VALUES('aaaa');
}
do_execsql_test 24.1 {
@@ -618,6 +634,9 @@ do_execsql_test 24.1 {
INSERT INTO t12 VALUES('aaaa');
END;
}
+execsql_pp {
+ SELECT rowid, hex(block) FROM t12_data
+}
do_execsql_test 24.2 {
INSERT INTO t12(t12) VALUES('integrity-check');
}
@@ -627,7 +646,7 @@ do_execsql_test 24.3 {
#-------------------------------------------------------------------------
do_execsql_test 25.0 {
- CREATE VIRTUAL TABLE t13 USING fts5(x, detail=%DETAIL%);
+ CREATE VIRTUAL TABLE t13 USING fts5(x, detail=%DETAIL% %TOKENIZER%);
}
do_execsql_test 25.1 {
BEGIN;
@@ -639,6 +658,7 @@ SELECT * FROM t13('BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
}
+}
expand_all_sql db
finish_test
diff --git a/ext/fts5/test/fts5faultH.test b/ext/fts5/test/fts5faultH.test
new file mode 100644
index 000000000..9dd4cac0d
--- /dev/null
+++ b/ext/fts5/test/fts5faultH.test
@@ -0,0 +1,93 @@
+# 2010 June 15
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#***********************************************************************
+#
+
+source [file join [file dirname [info script]] fts5_common.tcl]
+source $testdir/malloc_common.tcl
+set testprefix fts5faultG
+
+# If SQLITE_ENABLE_FTS5 is defined, omit this file.
+ifcapable !fts5 {
+ finish_test
+ return
+}
+
+set ::testprefix fts5faultH
+
+sqlite3_fts5_register_origintext db
+
+do_execsql_test 1.0 {
+ CREATE VIRTUAL TABLE t1 USING fts5(
+ x, tokenize="origintext unicode61", tokendata=1
+ );
+
+ BEGIN;
+ INSERT INTO t1 VALUES('oNe tWo thRee');
+ INSERT INTO t1 VALUES('One Two Three');
+ INSERT INTO t1 VALUES('onE twO threE');
+ COMMIT;
+ BEGIN;
+ INSERT INTO t1 VALUES('one two three');
+ INSERT INTO t1 VALUES('one two three');
+ INSERT INTO t1 VALUES('one two three');
+ COMMIT;
+}
+
+do_faultsim_test 1 -faults oom* -prep {
+} -body {
+ execsql {
+ SELECT rowid FROM t1('three');
+ }
+} -test {
+ faultsim_integrity_check
+ faultsim_test_result {0 {1 2 3 4 5 6}}
+}
+
+
+reset_db
+sqlite3_fts5_register_origintext db
+do_execsql_test 2.0 {
+ CREATE VIRTUAL TABLE t1 USING fts5(
+ x, tokenize="origintext unicode61", tokendata=1
+ );
+ INSERT INTO t1(t1, rank) VALUES('pgsz', 64);
+
+ BEGIN;
+ INSERT INTO t1(rowid, x) VALUES(10, 'aaa bbb BBB');
+ INSERT INTO t1(rowid, x) VALUES(12, 'bbb bbb bbb');
+ INSERT INTO t1(rowid, x) VALUES(13, 'bbb bbb bbb');
+ INSERT INTO t1(rowid, x) VALUES(14, 'bbb BBB bbb');
+ INSERT INTO t1(rowid, x) VALUES(15, 'bbb bbb bbb');
+ INSERT INTO t1(rowid, x) VALUES(16, 'bbb bbb bbb');
+ INSERT INTO t1(rowid, x) VALUES(17, 'bbb bbb bbb');
+ INSERT INTO t1(rowid, x) VALUES(18, 'bbb bbb bbb');
+ INSERT INTO t1(rowid, x) VALUES(19, 'bbb bbb bbb');
+ INSERT INTO t1(rowid, x) VALUES(20, 'bbb bbb bbb');
+ INSERT INTO t1(rowid, x) VALUES(21, 'bbb bbb bbb');
+ INSERT INTO t1(rowid, x) VALUES(22, 'bbb bbb bbb');
+ INSERT INTO t1(rowid, x) VALUES(23, 'bbb bbb bbb');
+ INSERT INTO t1(rowid, x) VALUES(24, 'aaa bbb BBB');
+ COMMIT;
+}
+
+do_faultsim_test 2 -faults oom* -prep {
+} -body {
+ execsql {
+ SELECT rowid FROM t1('BBB AND AAA');
+ }
+} -test {
+ faultsim_integrity_check
+ faultsim_test_result {0 {10 24}}
+}
+
+
+
+finish_test
diff --git a/ext/fts5/test/fts5origintext.test b/ext/fts5/test/fts5origintext.test
new file mode 100644
index 000000000..9752f35d3
--- /dev/null
+++ b/ext/fts5/test/fts5origintext.test
@@ -0,0 +1,297 @@
+# 2014 Jan 08
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#***********************************************************************
+#
+# Tests focused on phrase queries.
+#
+
+source [file join [file dirname [info script]] fts5_common.tcl]
+set testprefix fts5origintext
+
+# If SQLITE_ENABLE_FTS5 is defined, omit this file.
+ifcapable !fts5 {
+ finish_test
+ return
+}
+
+foreach_detail_mode $testprefix {
+
+sqlite3_fts5_register_origintext db
+do_execsql_test 1.0 {
+ CREATE VIRTUAL TABLE ft USING fts5(
+ x, tokenize="origintext unicode61", detail=%DETAIL%
+ );
+ CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance);
+}
+
+do_execsql_test 1.1 {
+ INSERT INTO ft VALUES('Hello world');
+}
+
+do_execsql_test 1.2 {
+ INSERT INTO ft(ft) VALUES('integrity-check');
+}
+
+proc b {x} { string map [list "\0" "."] $x }
+db func b b
+
+do_execsql_test 1.3 {
+ select b(term) from vocab;
+} {
+ hello.Hello
+ world
+}
+
+do_execsql_test 1.4 {
+ SELECT rowid FROM ft('Hello');
+} {1}
+
+#-------------------------------------------------------------------------
+reset_db
+
+# Return a random integer between 0 and n-1.
+#
+proc random {n} {
+ expr {abs(int(rand()*$n))}
+}
+
+proc select_one {list} {
+ set n [llength $list]
+ lindex $list [random $n]
+}
+
+proc term {} {
+ set first_letter {
+ a b c d e f g h i j k l m n o p q r s t u v w x y z
+ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
+ }
+
+ set term [select_one $first_letter]
+ append term [random 100]
+}
+
+proc document {} {
+ set nTerm [expr [random 5] + 5]
+ set doc ""
+ for {set ii 0} {$ii < $nTerm} {incr ii} {
+ lappend doc [term]
+ }
+ set doc
+}
+db func document document
+
+sqlite3_fts5_register_origintext db
+do_execsql_test 2.0 {
+ CREATE VIRTUAL TABLE ft USING fts5(
+ x, tokenize="origintext unicode61", detail=%DETAIL%
+ );
+ INSERT INTO ft(ft, rank) VALUES('pgsz', 128);
+ CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance);
+}
+
+do_test 2.1 {
+ for {set ii 0} {$ii < 500} {incr ii} {
+ execsql { INSERT INTO ft VALUES( document() ) }
+ }
+} {}
+
+do_execsql_test 2.2 {
+ INSERT INTO ft(ft) VALUES('integrity-check');
+}
+
+do_execsql_test 2.3 {
+ INSERT INTO ft(ft, rank) VALUES('merge', 16);
+}
+
+do_execsql_test 2.4 {
+ INSERT INTO ft(ft) VALUES('integrity-check');
+}
+
+do_execsql_test 2.5 {
+ INSERT INTO ft(ft) VALUES('optimize');
+}
+
+#-------------------------------------------------------------------------
+reset_db
+
+sqlite3_fts5_register_origintext db
+do_execsql_test 3.0 {
+ CREATE VIRTUAL TABLE ft USING fts5(
+ x, tokenize="origintext unicode61", detail=%DETAIL%
+ );
+ CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance);
+
+ INSERT INTO ft(rowid, x) VALUES(1, 'hello');
+ INSERT INTO ft(rowid, x) VALUES(2, 'Hello');
+ INSERT INTO ft(rowid, x) VALUES(3, 'HELLO');
+}
+
+#proc b {x} { string map [list "\0" "."] $x }
+#db func b b
+#execsql_pp { SELECT b(term) FROM vocab }
+
+do_execsql_test 3.1.1 { SELECT rowid FROM ft('hello') } 1
+do_execsql_test 3.1.2 { SELECT rowid FROM ft('Hello') } 2
+do_execsql_test 3.1.3 { SELECT rowid FROM ft('HELLO') } 3
+
+do_execsql_test 3.2 {
+ CREATE VIRTUAL TABLE ft2 USING fts5(x,
+ tokenize="origintext unicode61",
+ tokendata=1,
+ detail=%DETAIL%
+ );
+ CREATE VIRTUAL TABLE vocab2 USING fts5vocab(ft2, instance);
+
+ INSERT INTO ft2(rowid, x) VALUES(1, 'hello');
+ INSERT INTO ft2(rowid, x) VALUES(2, 'Hello');
+ INSERT INTO ft2(rowid, x) VALUES(3, 'HELLO');
+
+ INSERT INTO ft2(rowid, x) VALUES(10, 'helloooo');
+}
+
+#proc b {x} { string map [list "\0" "."] $x }
+#db func b b
+#execsql_pp { SELECT b(term) FROM vocab }
+
+do_execsql_test 3.3.1 { SELECT rowid FROM ft2('hello') } {1 2 3}
+do_execsql_test 3.3.2 { SELECT rowid FROM ft2('Hello') } {1 2 3}
+do_execsql_test 3.3.3 { SELECT rowid FROM ft2('HELLO') } {1 2 3}
+
+do_execsql_test 3.3.4 { SELECT rowid FROM ft2('hello*') } {1 2 3 10}
+
+#-------------------------------------------------------------------------
+#
+reset_db
+sqlite3_fts5_register_origintext db
+proc querytoken {cmd iPhrase iToken} {
+ set txt [$cmd xQueryToken $iPhrase $iToken]
+ string map [list "\0" "."] $txt
+}
+sqlite3_fts5_create_function db querytoken querytoken
+
+do_execsql_test 4.0 {
+ CREATE VIRTUAL TABLE ft USING fts5(
+ x, tokenize='origintext unicode61', tokendata=1, detail=%DETAIL%
+ );
+ INSERT INTO ft VALUES('one two three four');
+}
+
+do_execsql_test 4.1 {
+ SELECT rowid, querytoken(ft, 0, 0) FROM ft('TwO')
+} {1 two.TwO}
+do_execsql_test 4.2 {
+ SELECT rowid, querytoken(ft, 0, 0) FROM ft('one TWO ThreE')
+} {1 one}
+do_execsql_test 4.3 {
+ SELECT rowid, querytoken(ft, 1, 0) FROM ft('one TWO ThreE')
+} {1 two.TWO}
+
+if {"%DETAIL%"=="full"} {
+ # Phrase queries are only supported for detail=full.
+ #
+ do_execsql_test 4.4 {
+ SELECT rowid, querytoken(ft, 0, 2) FROM ft('"one TWO ThreE"')
+ } {1 three.ThreE}
+ do_catchsql_test 4.5 {
+ SELECT rowid, querytoken(ft, 0, 3) FROM ft('"one TWO ThreE"')
+ } {1 SQLITE_RANGE}
+ do_catchsql_test 4.6 {
+ SELECT rowid, querytoken(ft, 1, 0) FROM ft('"one TWO ThreE"')
+ } {1 SQLITE_RANGE}
+ do_catchsql_test 4.7 {
+ SELECT rowid, querytoken(ft, -1, 0) FROM ft('"one TWO ThreE"')
+ } {1 SQLITE_RANGE}
+}
+
+#-------------------------------------------------------------------------
+#
+reset_db
+sqlite3_fts5_register_origintext db
+proc insttoken {cmd iIdx iToken} {
+ set txt [$cmd xInstToken $iIdx $iToken]
+ string map [list "\0" "."] $txt
+}
+sqlite3_fts5_create_function db insttoken insttoken
+fts5_aux_test_functions db
+
+do_execsql_test 5.0 {
+ CREATE VIRTUAL TABLE ft USING fts5(
+ x, tokenize='origintext unicode61', tokendata=1, detail=%DETAIL%
+ );
+ INSERT INTO ft VALUES('one ONE One oNe oNE one');
+}
+
+do_execsql_test 5.1 {
+ SELECT insttoken(ft, 0, 0),
+ insttoken(ft, 1, 0),
+ insttoken(ft, 2, 0),
+ insttoken(ft, 3, 0),
+ insttoken(ft, 4, 0),
+ insttoken(ft, 5, 0)
+ FROM ft('one');
+} {
+ one one.ONE one.One one.oNe one.oNE one
+}
+
+do_execsql_test 5.2 {
+ SELECT insttoken(ft, 1, 0) FROM ft('one');
+} {
+ one.ONE
+}
+
+do_execsql_test 5.3 {
+ SELECT fts5_test_poslist(ft) FROM ft('one');
+} {
+ {0.0.0 0.0.1 0.0.2 0.0.3 0.0.4 0.0.5}
+}
+
+#-------------------------------------------------------------------------
+# Test the xInstToken() API with:
+#
+# * a non tokendata=1 table.
+# * prefix queries.
+#
+reset_db
+sqlite3_fts5_register_origintext db
+do_execsql_test 6.0 {
+ CREATE VIRTUAL TABLE ft USING fts5(
+ x, y, tokenize='origintext unicode61', detail=%DETAIL%
+ );
+
+ INSERT INTO ft VALUES('One Two', 'Three two');
+ INSERT INTO ft VALUES('three Three', 'one One');
+}
+proc tokens {cmd} {
+ set ret [list]
+ for {set iTok 0} {$iTok < [$cmd xInstCount]} {incr iTok} {
+ set txt [$cmd xInstToken $iTok 0]
+ set txt [string map [list "\0" "."] $txt]
+ lappend ret $txt
+ }
+ set ret
+}
+sqlite3_fts5_create_function db tokens tokens
+
+do_execsql_test 6.1 {
+ SELECT rowid, tokens(ft) FROM ft('One');
+} {1 one.One 2 one.One}
+
+do_execsql_test 6.2 {
+ SELECT rowid, tokens(ft) FROM ft('on*');
+} {1 {{}} 2 {{} {}}}
+
+do_execsql_test 6.3 {
+ SELECT rowid, tokens(ft) FROM ft('Three*');
+} {1 {{}} 2 {{}}}
+
+}
+
+finish_test
+
diff --git a/ext/fts5/test/fts5origintext2.test b/ext/fts5/test/fts5origintext2.test
new file mode 100644
index 000000000..a8c5d4eb5
--- /dev/null
+++ b/ext/fts5/test/fts5origintext2.test
@@ -0,0 +1,146 @@
+# 2014 Jan 08
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#***********************************************************************
+#
+# Tests focused on phrase queries.
+#
+
+source [file join [file dirname [info script]] fts5_common.tcl]
+set testprefix fts5origintext2
+
+# If SQLITE_ENABLE_FTS5 is defined, omit this file.
+ifcapable !fts5 {
+ finish_test
+ return
+}
+
+sqlite3_fts5_register_origintext db
+do_execsql_test 1.0 {
+ CREATE VIRTUAL TABLE ft USING fts5(
+ x, tokenize="origintext unicode61", tokendata=1
+ );
+}
+
+do_execsql_test 1.1 {
+ BEGIN;
+ INSERT INTO ft VALUES('Hello');
+ INSERT INTO ft VALUES('hello');
+ INSERT INTO ft VALUES('HELLO');
+ INSERT INTO ft VALUES('today');
+ INSERT INTO ft VALUES('today');
+ INSERT INTO ft VALUES('today');
+ INSERT INTO ft VALUES('World');
+ INSERT INTO ft VALUES('world');
+ INSERT INTO ft VALUES('WORLD');
+ COMMIT;
+}
+
+do_execsql_test 1.2 { SELECT rowid FROM ft('hello'); } {1 2 3}
+do_execsql_test 1.3 { SELECT rowid FROM ft('today'); } {4 5 6}
+do_execsql_test 1.4 { SELECT rowid FROM ft('world'); } {7 8 9}
+
+do_execsql_test 1.5 {
+ SELECT count(*) FROM ft_data
+} 3
+
+do_execsql_test 1.6 {
+ DELETE FROM ft;
+ INSERT INTO ft(ft, rank) VALUES('pgsz', 64);
+ BEGIN;
+ WITH s(i) AS (
+ SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<100
+ )
+ INSERT INTO ft SELECT 'Hello Hello Hello Hello Hello Hello Hello' FROM s;
+ INSERT INTO ft VALUES ('hELLO hELLO hELLO');
+ INSERT INTO ft VALUES('today today today today today today today');
+ INSERT INTO ft VALUES('today today today today today today today');
+ INSERT INTO ft VALUES('today today today today today today today');
+ INSERT INTO ft VALUES('today today today today today today today');
+ INSERT INTO ft VALUES('today today today today today today today');
+ INSERT INTO ft VALUES('today today today today today today today');
+ INSERT INTO ft VALUES('World World World World World World World');
+ INSERT INTO ft VALUES('world world world world world world world');
+ INSERT INTO ft VALUES('WORLD WORLD WORLD WORLD WORLD WORLD WORLD');
+ INSERT INTO ft VALUES('World World World World World World World');
+ INSERT INTO ft VALUES('world world world world world world world');
+ INSERT INTO ft VALUES('WORLD WORLD WORLD WORLD WORLD WORLD WORLD');
+ COMMIT;
+}
+
+do_execsql_test 1.7 {
+ SELECT count(*) FROM ft_data;
+} 23
+
+do_execsql_test 1.8 { SELECT rowid FROM ft('hello') WHERE rowid>100; } {101}
+
+do_execsql_test 1.9 {
+ DELETE FROM ft;
+ INSERT INTO ft(ft) VALUES('optimize');
+ SELECT count(*) FROM ft_data;
+} {2}
+do_execsql_test 1.10 {
+ BEGIN;
+ INSERT INTO ft VALUES('Hello');
+ INSERT INTO ft VALUES('hello');
+ INSERT INTO ft VALUES('HELLO');
+ INSERT INTO ft VALUES('today');
+ INSERT INTO ft VALUES('today');
+ INSERT INTO ft VALUES('today');
+ INSERT INTO ft VALUES('World');
+ INSERT INTO ft VALUES('world');
+ INSERT INTO ft VALUES('WORLD');
+}
+
+do_execsql_test 1.11 { SELECT rowid FROM ft('hello'); } {1 2 3}
+do_execsql_test 1.12 { SELECT rowid FROM ft('today'); } {4 5 6}
+do_execsql_test 1.13 { SELECT rowid FROM ft('world'); } {7 8 9}
+do_execsql_test 1.14 { SELECT rowid FROM ft('hello') ORDER BY rank; } {1 2 3}
+
+#------------------------------------------------------------------------
+reset_db
+sqlite3_fts5_register_origintext db
+proc tokens {cmd} {
+ set ret [list]
+ for {set iTok 0} {$iTok < [$cmd xInstCount]} {incr iTok} {
+ set txt [$cmd xInstToken $iTok 0]
+ set txt [string map [list "\0" "."] $txt]
+ lappend ret $txt
+ }
+ set ret
+}
+sqlite3_fts5_create_function db tokens tokens
+
+do_execsql_test 2.0 {
+ CREATE VIRTUAL TABLE x1 USING fts5(
+ v, tokenize="origintext unicode61", tokendata=1, detail=none
+ );
+
+ INSERT INTO x1 VALUES('xxx Xxx XXX yyy YYY yyy');
+ INSERT INTO x1 VALUES('xxx yyy xxx yyy yyy yyy');
+}
+
+do_execsql_test 2.1 {
+ SELECT tokens(x1) FROM x1('xxx');
+} {
+ {xxx xxx.Xxx xxx.XXX} {xxx xxx}
+}
+
+do_execsql_test 2.2 {
+ UPDATE x1_content SET c0 = 'xxx xxX xxx yyy yyy yyy' WHERE id=1;
+}
+
+do_execsql_test 2.3 {
+ SELECT tokens(x1) FROM x1('xxx');
+} {
+ {xxx {} xxx} {xxx xxx}
+}
+
+finish_test
+
diff --git a/ext/fts5/test/fts5origintext3.test b/ext/fts5/test/fts5origintext3.test
new file mode 100644
index 000000000..834844595
--- /dev/null
+++ b/ext/fts5/test/fts5origintext3.test
@@ -0,0 +1,101 @@
+# 2023 November 22
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#***********************************************************************
+#
+# Tests focused on phrase queries.
+#
+
+source [file join [file dirname [info script]] fts5_common.tcl]
+set testprefix fts5origintext3
+
+# If SQLITE_ENABLE_FTS5 is defined, omit this file.
+ifcapable !fts5 {
+ finish_test
+ return
+}
+
+foreach_detail_mode $testprefix {
+ reset_db
+
+ sqlite3_fts5_register_origintext db
+ fts5_aux_test_functions db
+ proc insttoken {cmd iIdx iToken} {
+ set txt [$cmd xInstToken $iIdx $iToken]
+ string map [list "\0" "."] $txt
+ }
+ sqlite3_fts5_create_function db insttoken insttoken
+
+ do_execsql_test 1.0 {
+ CREATE VIRTUAL TABLE ft USING fts5(
+ x, tokenize="origintext unicode61", tokendata=1, detail=%DETAIL%
+ );
+ }
+
+ do_execsql_test 1.1 {
+ INSERT INTO ft VALUES('Hello world HELLO WORLD hello');
+ }
+
+ do_execsql_test 1.2 {
+ SELECT fts5_test_poslist(ft) FROM ft('hello');
+ } {{0.0.0 0.0.2 0.0.4}}
+
+ do_execsql_test 1.3 {
+ SELECT
+ insttoken(ft, 0, 0),
+ insttoken(ft, 1, 0),
+ insttoken(ft, 2, 0)
+ FROM ft('hello');
+ } {hello.Hello hello.HELLO hello}
+
+ do_execsql_test 1.4 {
+ SELECT
+ insttoken(ft, 0, 0),
+ insttoken(ft, 1, 0),
+ insttoken(ft, 2, 0)
+ FROM ft('hello') ORDER BY rank;
+ } {hello.Hello hello.HELLO hello}
+
+ do_execsql_test 1.5 {
+ CREATE VIRTUAL TABLE ft2 USING fts5(
+ x, tokenize="origintext unicode61", tokendata=1, detail=%DETAIL%
+ );
+ INSERT INTO ft2(rowid, x) VALUES(1, 'ONE one two three ONE');
+ INSERT INTO ft2(rowid, x) VALUES(2, 'TWO one two three TWO');
+ INSERT INTO ft2(rowid, x) VALUES(3, 'THREE one two three THREE');
+ }
+
+ do_execsql_test 1.6 {
+ SELECT insttoken(ft2, 0, 0), rowid FROM ft2('three') ORDER BY rank;
+ } {three.THREE 3 three 1 three 2}
+
+ do_execsql_test 1.7 {
+ INSERT INTO ft2(rowid, x) VALUES(10, 'aaa bbb BBB');
+ INSERT INTO ft2(rowid, x) VALUES(12, 'bbb bbb bbb');
+ INSERT INTO ft2(rowid, x) VALUES(13, 'bbb bbb bbb');
+ INSERT INTO ft2(rowid, x) VALUES(14, 'bbb BBB bbb');
+ INSERT INTO ft2(rowid, x) VALUES(15, 'bbb bbb bbb');
+ INSERT INTO ft2(rowid, x) VALUES(16, 'bbb bbb bbb');
+ INSERT INTO ft2(rowid, x) VALUES(17, 'bbb bbb bbb');
+ INSERT INTO ft2(rowid, x) VALUES(18, 'bbb bbb bbb');
+ INSERT INTO ft2(rowid, x) VALUES(19, 'bbb bbb bbb');
+ INSERT INTO ft2(rowid, x) VALUES(20, 'bbb bbb bbb');
+ INSERT INTO ft2(rowid, x) VALUES(21, 'bbb bbb bbb');
+ INSERT INTO ft2(rowid, x) VALUES(22, 'bbb bbb bbb');
+ INSERT INTO ft2(rowid, x) VALUES(23, 'bbb bbb bbb');
+ INSERT INTO ft2(rowid, x) VALUES(24, 'aaa bbb BBB');
+ }
+
+ do_execsql_test 1.8 { SELECT rowid FROM ft2('aaa AND bbb'); } {10 24}
+ do_execsql_test 1.9 { SELECT rowid FROM ft2('bbb AND aaa'); } {10 24}
+
+}
+
+finish_test
+
diff --git a/ext/fts5/test/fts5origintext4.test b/ext/fts5/test/fts5origintext4.test
new file mode 100644
index 000000000..8973a24b0
--- /dev/null
+++ b/ext/fts5/test/fts5origintext4.test
@@ -0,0 +1,66 @@
+# 2023 November 22
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#***********************************************************************
+#
+# Tests focused on phrase queries.
+#
+
+source [file join [file dirname [info script]] fts5_common.tcl]
+set testprefix fts5origintext4
+
+# If SQLITE_ENABLE_FTS5 is defined, omit this file.
+ifcapable !fts5 {
+ finish_test
+ return
+}
+
+sqlite3_fts5_register_origintext db
+do_execsql_test 1.0 {
+ CREATE VIRTUAL TABLE ft USING fts5(
+ x, tokenize="origintext unicode61", tokendata=1
+ );
+}
+
+do_execsql_test 1.1 {
+ BEGIN;
+ INSERT INTO ft SELECT 'the first thing';
+
+ WITH s(i) AS (
+ SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<90000
+ )
+ INSERT INTO ft SELECT 'The second thing' FROM s;
+
+ INSERT INTO ft SELECT 'the first thing';
+ COMMIT;
+ INSERT INTO ft(ft) VALUES('optimize');
+}
+
+foreach {tn sql expr} {
+ 1 { SELECT rowid FROM ft('the') } {$mem > 250000}
+ 2 { SELECT rowid FROM ft('first') } {$mem < 50000}
+ 3 { SELECT rowid FROM ft('the first') } {$mem < 50000}
+} {
+ db close
+ sqlite3 db test.db
+ sqlite3_fts5_register_origintext db
+
+ execsql $sql
+ do_test 1.2.$tn {
+ set mem [lindex [sqlite3_db_status db CACHE_USED 0] 1]
+ expr $expr
+ } 1
+}
+
+proc b {x} { string map [list "\0" "."] $x }
+db func b b
+# execsql_pp { SELECT segid, b(term), pgno from ft_idx }
+
+finish_test
+
diff --git a/ext/fts5/test/fts5origintext5.test b/ext/fts5/test/fts5origintext5.test
new file mode 100644
index 000000000..03d5bee21
--- /dev/null
+++ b/ext/fts5/test/fts5origintext5.test
@@ -0,0 +1,273 @@
+# 2023 Dec 04
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#***********************************************************************
+#
+# Tests for tables that use both tokendata=1 and contentless_delete=1.
+#
+
+source [file join [file dirname [info script]] fts5_common.tcl]
+set testprefix fts5origintext
+
+# If SQLITE_ENABLE_FTS5 is defined, omit this file.
+ifcapable !fts5 {
+ finish_test
+ return
+}
+
+# Return a random integer between 0 and n-1.
+#
+proc random {n} { expr {abs(int(rand()*$n))} }
+
+# Select an element of the list passed as the only argument at random and
+# return it.
+#
+proc select_one {list} {
+ set n [llength $list]
+ lindex $list [random $n]
+}
+
+# Given a term that consists entirely of alphabet characters, return all
+# permutations of the term using upper and lower case characters. e.g.
+#
+# "abc" -> {CBA cBA CbA cbA CBa cBa Cba cba}
+#
+proc casify {term {lRet {{}}}} {
+ if {$term==""} { return $lRet }
+ set t [string range $term 1 end]
+ set f1 [string toupper [string range $term 0 0]]
+ set f2 [string tolower [string range $term 0 0]]
+ set ret [list]
+ foreach x $lRet {
+ lappend ret "$x$f1"
+ lappend ret "$x$f2"
+ }
+ return [casify $t $ret]
+}
+
+proc vocab {} {
+ list abc def ghi jkl mno pqr stu vwx yza
+}
+
+# Return a random 3 letter term.
+#
+proc term {} {
+ if {[info exists ::expanded_vocab]==0} {
+ foreach v [vocab] { lappend ::expanded_vocab {*}[casify $v] }
+ }
+
+ select_one $::expanded_vocab
+}
+
+# Return a document - between 3 and 10 terms.
+#
+proc document {} {
+ set nTerm [expr [random 3] + 7]
+ set doc ""
+ for {set ii 0} {$ii < $nTerm} {incr ii} {
+ lappend doc [term]
+ }
+ set doc
+}
+db func document document
+
+#-------------------------------------------------------------------------
+
+expr srand(6)
+
+set NDOC 200
+set NLOOP 50
+
+sqlite3_fts5_register_origintext db
+
+proc tokens {cmd} {
+ set ret [list]
+ for {set iTok 0} {$iTok < [$cmd xInstCount]} {incr iTok} {
+ set txt [$cmd xInstToken $iTok 0]
+ set txt [string map [list "\0" "."] $txt]
+ lappend ret $txt
+ }
+ set ret
+}
+sqlite3_fts5_create_function db tokens tokens
+
+proc rankfunc {cmd} {
+ $cmd xRowid
+}
+sqlite3_fts5_create_function db rankfunc rankfunc
+
+proc ctrl_tokens {term args} {
+ set ret [list]
+ set term [string tolower $term]
+ foreach doc $args {
+ foreach a $doc {
+ if {[string tolower $a]==$term} {
+ if {$a==$term} {
+ lappend ret $a
+ } else {
+ lappend ret [string tolower $a].$a
+ }
+ }
+ }
+ }
+ set ret
+}
+db func ctrl_tokens ctrl_tokens
+
+proc do_all_vocab_test {tn} {
+ foreach ::v [concat [vocab] nnn] {
+ set answer [execsql {
+ SELECT id, ctrl_tokens($::v, x) FROM ctrl WHERE x LIKE '%' || $::v || '%'
+ }]
+ do_execsql_test $tn.$::v.1 {
+ SELECT rowid, tokens(ft) FROM ft($::v)
+ } $answer
+ do_execsql_test $tn.$::v.2 {
+ SELECT rowid, tokens(ft) FROM ft($::v) ORDER BY rank
+ } $answer
+ }
+}
+
+do_execsql_test 1.0 {
+ CREATE VIRTUAL TABLE ft USING fts5(
+ x, tokenize="origintext unicode61", content=, contentless_delete=1,
+ tokendata=1
+ );
+
+ CREATE TABLE ctrl(id INTEGER PRIMARY KEY, x TEXT);
+ INSERT INTO ft(ft, rank) VALUES('pgsz', 64);
+ INSERT INTO ft(ft, rank) VALUES('rank', 'rankfunc()');
+}
+do_test 1.1 {
+ for {set ii 0} {$ii < $NDOC} {incr ii} {
+ set doc [document]
+ execsql {
+ INSERT INTO ft(rowid, x) VALUES($ii, $doc);
+ INSERT INTO ctrl(id, x) VALUES($ii, $doc);
+ }
+ }
+} {}
+
+#execsql_pp { SELECT * FROM ctrl }
+#execsql_pp { SELECT * FROM ft }
+#fts5_aux_test_functions db
+#execsql_pp { SELECT rowid, tokens(ft), fts5_test_poslist(ft) FROM ft('ghi'); }
+
+do_all_vocab_test 1.2
+
+for {set ii 0} {$ii < $NLOOP} {incr ii} {
+ set lRowid [execsql { SELECT id FROM ctrl WHERE random() % 2 }]
+ foreach r $lRowid {
+ execsql { DELETE FROM ft WHERE rowid = $r }
+ execsql { DELETE FROM ctrl WHERE rowid = $r }
+
+ set doc [document]
+ execsql { INSERT INTO ft(rowid, x) VALUES($r, $doc) }
+ execsql { INSERT INTO ctrl(id, x) VALUES($r, $doc) }
+ }
+ do_all_vocab_test 1.3.$ii
+}
+
+#-------------------------------------------------------------------------
+
+do_execsql_test 2.0 {
+ CREATE VIRTUAL TABLE ft2 USING fts5(
+ x, y, tokenize="origintext unicode61", content=, contentless_delete=1,
+ tokendata=1
+ );
+
+ CREATE TABLE ctrl2(id INTEGER PRIMARY KEY, x TEXT, y TEXT);
+ INSERT INTO ft2(ft2, rank) VALUES('pgsz', 64);
+ INSERT INTO ft2(ft2, rank) VALUES('rank', 'rankfunc()');
+}
+do_test 2.1 {
+ for {set ii 0} {$ii < $NDOC} {incr ii} {
+ set doc1 [document]
+ set doc2 [document]
+ execsql {
+ INSERT INTO ft2(rowid, x, y) VALUES($ii, $doc, $doc2);
+ INSERT INTO ctrl2(id, x, y) VALUES($ii, $doc, $doc2);
+ }
+ }
+} {}
+
+proc do_all_vocab_test2 {tn} {
+ foreach ::v [vocab] {
+ set answer [execsql {
+ SELECT id, ctrl_tokens($::v, x, y) FROM ctrl2
+ WHERE x LIKE '%' || $::v || '%' OR y LIKE '%' || $::v || '%';
+ }]
+ do_execsql_test $tn.$::v.1 {
+ SELECT rowid, tokens(ft2) FROM ft2($::v)
+ } $answer
+ do_execsql_test $tn.$::v.2 {
+ SELECT rowid, tokens(ft2) FROM ft2($::v) ORDER BY rank
+ } $answer
+ }
+}
+
+do_all_vocab_test2 2.2
+
+for {set ii 0} {$ii < $NLOOP} {incr ii} {
+ set lRowid [execsql { SELECT id FROM ctrl2 WHERE random() % 2 }]
+ foreach r $lRowid {
+ execsql { DELETE FROM ft2 WHERE rowid = $r }
+ execsql { DELETE FROM ctrl2 WHERE rowid = $r }
+
+ set doc1 [document]
+ set doc2 [document]
+ execsql { INSERT INTO ft2(rowid, x, y) VALUES($r, $doc, $doc1) }
+ execsql { INSERT INTO ctrl2(id, x, y) VALUES($r, $doc, $doc2) }
+ }
+ do_all_vocab_test 2.3.$ii
+}
+
+#-------------------------------------------------------------------------
+
+unset -nocomplain ::expanded_vocab
+proc vocab {} {
+ list abcde fghij klmno
+}
+
+proc do_all_vocab_test3 {tn} {
+ foreach ::v [concat [vocab] nnn] {
+ set answer [execsql {
+ SELECT rowid, ctrl_tokens($::v, w) FROM ctrl3 WHERE w LIKE '%' || $::v || '%'
+ }]
+ do_execsql_test $tn.$::v.1 {
+ SELECT rowid, tokens(ft3) FROM ft3($::v)
+ } $answer
+ do_execsql_test $tn.$::v.2 {
+ SELECT rowid, tokens(ft3) FROM ft3($::v) ORDER BY rank
+ } $answer
+ }
+}
+
+do_execsql_test 3.0 {
+ CREATE VIRTUAL TABLE ft3 USING fts5(
+ w, tokenize="origintext unicode61", content=, contentless_delete=1,
+ tokendata=1
+ );
+ INSERT INTO ft3(ft3, rank) VALUES('rank', 'rankfunc()');
+ CREATE TABLE ctrl3(w);
+}
+
+do_execsql_test 3.1 {
+ WITH s(i) AS (
+ SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<2
+ )
+ INSERT INTO ctrl3 SELECT document() FROM s;
+ INSERT INTO ft3(rowid, w) SELECT rowid, w FROM ctrl3;
+}
+
+do_all_vocab_test3 3.2
+
+
+finish_test
+
diff --git a/ext/fts5/test/fts5simple2.test b/ext/fts5/test/fts5simple2.test
index e57cea70f..6c0e0e166 100644
--- a/ext/fts5/test/fts5simple2.test
+++ b/ext/fts5/test/fts5simple2.test
@@ -343,7 +343,9 @@ do_execsql_test 17.0 {
INSERT INTO t2 VALUES('a aa aaa', 'b bb bbb');
COMMIT;
}
-do_execsql_test 17.1 { SELECT * FROM t2('y:a*') WHERE rowid BETWEEN 10 AND 20 }
+do_execsql_test 17.1 {
+ SELECT * FROM t2('y:a*') WHERE rowid BETWEEN 10 AND 20
+}
do_execsql_test 17.2 {
BEGIN;
INSERT INTO t2 VALUES('a aa aaa', 'b bb bbb');
diff --git a/manifest b/manifest
index 6d47dd3e8..99b9d7e83 100644
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C README.md\stypo\sfix\sreported\sin\sthe\sforum\sand\supdate\sall\slinks\sfrom\shttp:\sto\shttps:.
-D 2023-12-06T12:30:28.174
+C Add\sthe\stokendata=1\soption\sand\srelated\sAPIs\sto\sfts5.
+D 2023-12-06T14:36:34.858
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
@@ -89,17 +89,17 @@ F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7
F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6dbd6348ef0cfc324a7
F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb
F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0
-F ext/fts5/fts5.h 05501612cc655504c5dce8ba765ab621d50fc478490089beaa0d75e00b23e520
-F ext/fts5/fts5Int.h 78a63cc0795186cde5384816a9403a68c65774b35d952e05b81a1b4b158e07c8
+F ext/fts5/fts5.h ff90acaa97f8e865b66d1177d1b56b8c110fd5548ab5863bab43f055a1d745fe
+F ext/fts5/fts5Int.h defa43c0932265138ee910ca416e6baccf8b774e0f3d610e74be1ab2880e9834
F ext/fts5/fts5_aux.c ee770eec0af8646db9e18fc01a0dad7345b5f5e8cbba236704cfae2d777022ad
F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b729225eeaf6a5
-F ext/fts5/fts5_config.c 054359543566cbff1ba65a188330660a5457299513ac71c53b3a07d934c7b081
-F ext/fts5/fts5_expr.c bd3b81ce669c4104e34ffe66570af1999a317b142c15fccb112de9fb0caa57a6
-F ext/fts5/fts5_hash.c 076058f93327051952a752dc765df1acfe783eb11b419b30652aa1fc1f987902
-F ext/fts5/fts5_index.c 458cbed8a3e17617cbf7e80cdfb7612000b9bb3781f286b345fb9655858658cf
-F ext/fts5/fts5_main.c a07ed863b8bd9e6fefb62db2fd40a3518eb30a5f7dcfda5be915dd2db45efa2f
+F ext/fts5/fts5_config.c 8072a207034b51ae9b7694121d1b5715c794e94b275e088f70ae532378ca5cdf
+F ext/fts5/fts5_expr.c b1ec526371b9ffde82341423a5b9753c42cbea629a41b69f26fa377d13b95a8e
+F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1
+F ext/fts5/fts5_index.c be39b44ff8773cff56bcbc01f74701a83e068c20d773cafd01e8bb2fa0fc1bc5
+F ext/fts5/fts5_main.c fb7ec495d663f40d18e420e1986316591041a70e1e4b4696ab2a7384e4c7fd7a
F ext/fts5/fts5_storage.c 5d10b9bdcce5b90656cad13c7d12ad4148677d4b9e3fca0481fca56d6601426d
-F ext/fts5/fts5_tcl.c b1445cbe69908c411df8084a10b2485500ac70a9c747cdc8cda175a3da59d8ae
+F ext/fts5/fts5_tcl.c cf0fd0dbe64ec272491b749e0d594f563cda03336aeb60900129e6d18b0aefb8
F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee
F ext/fts5/fts5_test_tok.c 3cb0a9b508b30d17ef025ccddd26ae3dc8ddffbe76c057616e59a9aa85d36f3b
F ext/fts5/fts5_tokenize.c 83cfcede3898001cab84432a36ce1503e3080cf9b1c682b022ec82e267ea4c13
@@ -108,8 +108,8 @@ F ext/fts5/fts5_varint.c e64d2113f6e1bfee0032972cffc1207b77af63319746951bf1d0988
F ext/fts5/fts5_vocab.c aed56169ae5c1aa9b8189c779ffeef04ed516d3c712c06914e6d91a6759f4e4a
F ext/fts5/fts5parse.y eb526940f892ade5693f22ffd6c4f2702543a9059942772526eac1fde256bb05
F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba
-F ext/fts5/test/fts5_common.tcl a9de9c2209cc4e7ae3c753e783504e67206c6c1467d08f209cd0c5923d3e8d8b
-F ext/fts5/test/fts5aa.test ba5158eba7d61359becdfca895ef471072c7bf7b20e5e60dcb4d024c8419c926
+F ext/fts5/test/fts5_common.tcl 8b1848ac2baad10e444e4183034a52050b52d20b3796d9d30e78f01ab0d05583
+F ext/fts5/test/fts5aa.test 4db81519863244a3cab35795fe65ab6b592e7970c7409eba098b23ebbfc08d95
F ext/fts5/test/fts5ab.test bd932720c748383277456b81f91bc00453de2174f9762cd05f95d0495dc50390
F ext/fts5/test/fts5ac.test a7aa7e1fefc6e1918aa4d3111d5c44a09177168e962c5fd2cca9620de8a7ed6d
F ext/fts5/test/fts5ad.test e8cf959dfcd57c8e46d6f5f25665686f3b6627130a9a981371dafdf6482790de
@@ -170,6 +170,7 @@ F ext/fts5/test/fts5faultD.test e7ed7895abfe6bc98a5e853826f6b74956e7ba7f594f1860
F ext/fts5/test/fts5faultE.test 844586ce71dab4be85bb86880e87b624d089f851654cd22e4710c77eb8ce7075
F ext/fts5/test/fts5faultF.test 4abef99f86e99d9f0c6460dd68c586a766b6b9f1f660ada55bf2e8266bd1bbc1
F ext/fts5/test/fts5faultG.test d2e5a4d9a34e08dcaadcaeafef74d10cbc2abdd11aa2659a18af0294bf2812d3
+F ext/fts5/test/fts5faultH.test d845f45dac3e1a3f20c7e0a2be95280c95d3204c06802f86ab2c110e52ed3d14
F ext/fts5/test/fts5first.test 3fcf2365c00a15fc9704233674789a3b95131d12de18a9b996159f6909dc8079
F ext/fts5/test/fts5full.test e1701a112354e0ff9a1fdffb0c940c576530c33732ee20ac5e8361777070d717
F ext/fts5/test/fts5fuzz1.test 238d8c45f3b81342aa384de3e581ff2fa330bf922a7b69e484bbc06051a1080e
@@ -190,6 +191,11 @@ F ext/fts5/test/fts5onepass.test f9b7d9b2c334900c6542a869760290e2ab5382af8fbd618
F ext/fts5/test/fts5optimize.test 36a752d24c818792032e4ff502936fc9cc5ef938721696396fdc79214b2717f1
F ext/fts5/test/fts5optimize2.test 93e742c36b487d8874621360af5b1ce4d39b04fb9e71ce9bc34015c5fc811785
F ext/fts5/test/fts5optimize3.test bf9c91bb927d0fb2b9a06318a217a0419183ac5913842e062c7e0b98ea5d0fca
+F ext/fts5/test/fts5origintext.test d2796fa08ee7aecfabdc0c45bb8a2fb16a00ea8757e63fbc153b718dbe430a39
+F ext/fts5/test/fts5origintext2.test f3b9436de540828d01f0672df855b09ebc0863e126d5b56234701d71dfa73634
+F ext/fts5/test/fts5origintext3.test 0d25933506600452a5ab3873cbb418ed5f2de2446c3672b9997b1ea104b0e7f0
+F ext/fts5/test/fts5origintext4.test 296b1b1e6630d492b99db0769e8127087548f0e939376047716a68b77ca3c871
+F ext/fts5/test/fts5origintext5.test a037bdf7235a22033c4663837bdb12d9738245464a3ac2f60c71fc40d07ede7d
F ext/fts5/test/fts5phrase.test 13e5d8e9083077b3d9c74315b3c92ec723cc6eb37c8155e0bfe1bba00559f07b
F ext/fts5/test/fts5plan.test b65cfcca9ddd6fdaa118c61e17aeec8e8433bc5b6bb307abd116514f79c49c5a
F ext/fts5/test/fts5porter.test 8d08010c28527db66bc3feebd2b8767504aaeb9b101a986342fa7833d49d0d15
@@ -212,7 +218,7 @@ F ext/fts5/test/fts5secure7.test fd03d0868d64340a1db8615b02e5508fea409de13910114
F ext/fts5/test/fts5secure8.test eb3579e9d58b0acad97e8082dee1f99b2d393198f03500b453c2b25761c0c298
F ext/fts5/test/fts5securefault.test dbca2b6a1c16700017f5051138991b705410889933f2a37c57ae8a23b296b10b
F ext/fts5/test/fts5simple.test a298670508c1458b88ce6030440f26a30673931884eb5f4094ac1773b3ba217b
-F ext/fts5/test/fts5simple2.test 258a1b0c590409bfa5271e872c79572b319d2a56554d0585f68f146a0da603f0
+F ext/fts5/test/fts5simple2.test 8dd2389ee75e21a1429fe87e5f8c7d9a97ad1470304a8a2d3ba4b8c3c345fecd
F ext/fts5/test/fts5simple3.test d5c74a9d3ca71bd5dd5cacb7c55b86ea12cdddfc8b1910e3de2995206898380f
F ext/fts5/test/fts5synonym.test 1651815b8008de170e8e600dcacc17521d765482ea8f074ae82cfa870d8bb7fb
F ext/fts5/test/fts5synonym2.test 8f891fc49cc1e8daed727051e77e1f42849c784a6a54bef82564761b2cb3e016
@@ -2147,8 +2153,9 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
-P 7f0c79b94e8f55e5013e52ba64ba8b32dad1dc4e2224d2099733cbc561de1810
-R e4c4253b529b92ef6212320f71e1570e
-U stephan
-Z 02e09a8447ded58f933fad2e169fffdc
+P 5c48acdbb44185b352b54911a57a6986d6c7e624bdeba2af48b985d29f0292bf 8f46eace86e7b2e556913575aa3cd6f7987ac0efcc880f0af649d42c253aeb81
+R aee08254c1ed5ae187dbc54a7e67d0a2
+T +closed 8f46eace86e7b2e556913575aa3cd6f7987ac0efcc880f0af649d42c253aeb81
+U dan
+Z b8398992a8dd36d240de5bcbcb58489b
# Remove this line to create a well-formed Fossil manifest.
diff --git a/manifest.uuid b/manifest.uuid
index f935efea2..c9e889522 100644
--- a/manifest.uuid
+++ b/manifest.uuid
@@ -1 +1 @@
-5c48acdbb44185b352b54911a57a6986d6c7e624bdeba2af48b985d29f0292bf \ No newline at end of file
+a76a636b23c0ebd95d47fdf8358de4729e51a5f68f1a730cd4d89b378e94ac0d \ No newline at end of file