diff options
-rw-r--r-- | ext/fts5/fts5.h | 30 | ||||
-rw-r--r-- | ext/fts5/fts5Int.h | 33 | ||||
-rw-r--r-- | ext/fts5/fts5_config.c | 10 | ||||
-rw-r--r-- | ext/fts5/fts5_expr.c | 163 | ||||
-rw-r--r-- | ext/fts5/fts5_hash.c | 49 | ||||
-rw-r--r-- | ext/fts5/fts5_index.c | 837 | ||||
-rw-r--r-- | ext/fts5/fts5_main.c | 70 | ||||
-rw-r--r-- | ext/fts5/fts5_tcl.c | 208 | ||||
-rw-r--r-- | ext/fts5/test/fts5_common.tcl | 14 | ||||
-rw-r--r-- | ext/fts5/test/fts5aa.test | 68 | ||||
-rw-r--r-- | ext/fts5/test/fts5faultH.test | 93 | ||||
-rw-r--r-- | ext/fts5/test/fts5origintext.test | 297 | ||||
-rw-r--r-- | ext/fts5/test/fts5origintext2.test | 146 | ||||
-rw-r--r-- | ext/fts5/test/fts5origintext3.test | 101 | ||||
-rw-r--r-- | ext/fts5/test/fts5origintext4.test | 66 | ||||
-rw-r--r-- | ext/fts5/test/fts5origintext5.test | 273 | ||||
-rw-r--r-- | ext/fts5/test/fts5simple2.test | 4 | ||||
-rw-r--r-- | manifest | 40 | ||||
-rw-r--r-- | manifest.uuid | 2 |
19 files changed, 2328 insertions, 176 deletions
diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index 323d73a28..63c9765eb 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -261,9 +261,30 @@ struct Fts5PhraseIter { ** ** xPhraseNextColumn() ** See xPhraseFirstColumn above. +** +** xQueryToken(pFts5, iPhrase, iToken, ppToken, pnToken) +** This is used to access token iToken of phrase iPhrase of the current +** query. Before returning, output parameter *ppToken is set to point +** to a buffer containing the requested token, and *pnToken to the +** size of this buffer in bytes. +** +** The output text is not a copy of the query text that specified the +** token. It is the output of the tokenizer module. For tokendata=1 +** tables, this includes any embedded 0x00 and trailing data. +** +** xInstToken(pFts5, iIdx, iToken, ppToken, pnToken) +** This is used to access token iToken of phrase hit iIdx within the +** current row. +** +** The output text is not a copy of the document text that was tokenized. +** It is the output of the tokenizer module. For tokendata=1 tables, this +** includes any embedded 0x00 and trailing data. +** +** This API can be quite slow if used with an FTS5 table created with the +** "detail=none" or "detail=column" option. */ struct Fts5ExtensionApi { - int iVersion; /* Currently always set to 2 */ + int iVersion; /* Currently always set to 3 */ void *(*xUserData)(Fts5Context*); @@ -298,6 +319,13 @@ struct Fts5ExtensionApi { int (*xPhraseFirstColumn)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*); void (*xPhraseNextColumn)(Fts5Context*, Fts5PhraseIter*, int *piCol); + + /* Below this point are iVersion>=3 only */ + int (*xQueryToken)(Fts5Context*, + int iPhrase, int iToken, + const char **ppToken, int *pnToken + ); + int (*xInstToken)(Fts5Context*, int iIdx, int iToken, const char**, int*); }; /* diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 8bbafbaaf..9beb26e05 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -196,6 +196,7 @@ struct Fts5Config { char *zContent; /* content table */ char *zContentRowid; /* "content_rowid=" option value */ int bColumnsize; /* "columnsize=" option value (dflt==1) */ + int bTokendata; /* "tokendata=" option value (dflt==0) */ int eDetail; /* FTS5_DETAIL_XXX value */ char *zContentExprlist; Fts5Tokenizer *pTok; @@ -384,17 +385,19 @@ struct Fts5IndexIter { /* ** Values used as part of the flags argument passed to IndexQuery(). */ -#define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ -#define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */ -#define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */ -#define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */ +#define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ +#define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */ +#define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */ +#define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */ /* The following are used internally by the fts5_index.c module. They are ** defined here only to make it easier to avoid clashes with the flags ** above. */ -#define FTS5INDEX_QUERY_SKIPEMPTY 0x0010 -#define FTS5INDEX_QUERY_NOOUTPUT 0x0020 -#define FTS5INDEX_QUERY_SKIPHASH 0x0040 +#define FTS5INDEX_QUERY_SKIPEMPTY 0x0010 +#define FTS5INDEX_QUERY_NOOUTPUT 0x0020 +#define FTS5INDEX_QUERY_SKIPHASH 0x0040 +#define FTS5INDEX_QUERY_NOTOKENDATA 0x0080 +#define FTS5INDEX_QUERY_SCANONETERM 0x0100 /* ** Create/destroy an Fts5Index object. @@ -463,6 +466,10 @@ void *sqlite3Fts5StructureRef(Fts5Index*); void sqlite3Fts5StructureRelease(void*); int sqlite3Fts5StructureTest(Fts5Index*, void*); +/* +** Used by xInstToken(): +*/ +int sqlite3Fts5IterToken(Fts5IndexIter*, i64, int, int, const char**, int*); /* ** Insert or remove data to or from the index. Each time a document is @@ -540,6 +547,13 @@ int sqlite3Fts5IndexLoadConfig(Fts5Index *p); int sqlite3Fts5IndexGetOrigin(Fts5Index *p, i64 *piOrigin); int sqlite3Fts5IndexContentlessDelete(Fts5Index *p, i64 iOrigin, i64 iRowid); +void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter*); + +/* Used to populate hash tables for xInstToken in detail=none/column mode. */ +int sqlite3Fts5IndexIterWriteTokendata( + Fts5IndexIter*, const char*, int, i64 iRowid, int iCol, int iOff +); + /* ** End of interface to code in fts5_index.c. **************************************************************************/ @@ -645,6 +659,7 @@ void sqlite3Fts5HashScanNext(Fts5Hash*); int sqlite3Fts5HashScanEof(Fts5Hash*); void sqlite3Fts5HashScanEntry(Fts5Hash *, const char **pzTerm, /* OUT: term (nul-terminated) */ + int *pnTerm, /* OUT: Size of term in bytes */ const u8 **ppDoclist, /* OUT: pointer to doclist */ int *pnDoclist /* OUT: size of doclist in bytes */ ); @@ -771,6 +786,10 @@ int sqlite3Fts5ExprClonePhrase(Fts5Expr*, int, Fts5Expr**); int sqlite3Fts5ExprPhraseCollist(Fts5Expr *, int, const u8 **, int *); +int sqlite3Fts5ExprQueryToken(Fts5Expr*, int, int, const char**, int*); +int sqlite3Fts5ExprInstToken(Fts5Expr*, i64, int, int, int, int, const char**, int*); +void sqlite3Fts5ExprClearTokens(Fts5Expr*); + /******************************************* ** The fts5_expr.c API above this point is used by the other hand-written ** C code in this module. The interfaces below this point are called by diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 5d0770502..d2e8309cd 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -398,6 +398,16 @@ static int fts5ConfigParseSpecial( return rc; } + if( sqlite3_strnicmp("tokendata", zCmd, nCmd)==0 ){ + if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ + *pzErr = sqlite3_mprintf("malformed tokendata=... directive"); + rc = SQLITE_ERROR; + }else{ + pConfig->bTokendata = (zArg[0]=='1'); + } + return rc; + } + *pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd); return SQLITE_ERROR; } diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index f5101ba06..bc7c9741e 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -100,7 +100,9 @@ struct Fts5ExprNode { struct Fts5ExprTerm { u8 bPrefix; /* True for a prefix term */ u8 bFirst; /* True if token must be first in column */ - char *zTerm; /* nul-terminated term */ + char *pTerm; /* Term data */ + int nQueryTerm; /* Effective size of term in bytes */ + int nFullTerm; /* Size of term in bytes incl. tokendata */ Fts5IndexIter *pIter; /* Iterator for this term */ Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */ }; @@ -967,7 +969,7 @@ static int fts5ExprNearInitAll( p->pIter = 0; } rc = sqlite3Fts5IndexQuery( - pExpr->pIndex, p->zTerm, (int)strlen(p->zTerm), + pExpr->pIndex, p->pTerm, p->nQueryTerm, (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0), pNear->pColset, @@ -1604,7 +1606,7 @@ static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){ Fts5ExprTerm *pSyn; Fts5ExprTerm *pNext; Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; - sqlite3_free(pTerm->zTerm); + sqlite3_free(pTerm->pTerm); sqlite3Fts5IterClose(pTerm->pIter); for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){ pNext = pSyn->pSynonym; @@ -1702,6 +1704,7 @@ Fts5ExprNearset *sqlite3Fts5ParseNearset( typedef struct TokenCtx TokenCtx; struct TokenCtx { Fts5ExprPhrase *pPhrase; + Fts5Config *pConfig; int rc; }; @@ -1735,8 +1738,10 @@ static int fts5ParseTokenize( rc = SQLITE_NOMEM; }else{ memset(pSyn, 0, (size_t)nByte); - pSyn->zTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer); - memcpy(pSyn->zTerm, pToken, nToken); + pSyn->pTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer); + pSyn->nFullTerm = pSyn->nQueryTerm = nToken; + if( pCtx->pConfig->bTokendata ) pSyn->nQueryTerm = strlen(pSyn->pTerm); + memcpy(pSyn->pTerm, pToken, nToken); pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym; pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn; } @@ -1761,7 +1766,11 @@ static int fts5ParseTokenize( if( rc==SQLITE_OK ){ pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; memset(pTerm, 0, sizeof(Fts5ExprTerm)); - pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken); + pTerm->pTerm = sqlite3Fts5Strndup(&rc, pToken, nToken); + pTerm->nFullTerm = pTerm->nQueryTerm = nToken; + if( pCtx->pConfig->bTokendata && rc==SQLITE_OK ){ + pTerm->nQueryTerm = strlen(pTerm->pTerm); + } } } @@ -1828,6 +1837,7 @@ Fts5ExprPhrase *sqlite3Fts5ParseTerm( memset(&sCtx, 0, sizeof(TokenCtx)); sCtx.pPhrase = pAppend; + sCtx.pConfig = pConfig; rc = fts5ParseStringFromToken(pToken, &z); if( rc==SQLITE_OK ){ @@ -1877,8 +1887,7 @@ int sqlite3Fts5ExprClonePhrase( int rc = SQLITE_OK; /* Return code */ Fts5ExprPhrase *pOrig; /* The phrase extracted from pExpr */ Fts5Expr *pNew = 0; /* Expression to return via *ppNew */ - TokenCtx sCtx = {0,0}; /* Context object for fts5ParseTokenize */ - + TokenCtx sCtx = {0,0,0}; /* Context object for fts5ParseTokenize */ pOrig = pExpr->apExprPhrase[iPhrase]; pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr)); if( rc==SQLITE_OK ){ @@ -1909,13 +1918,12 @@ int sqlite3Fts5ExprClonePhrase( if( pOrig->nTerm ){ int i; /* Used to iterate through phrase terms */ + sCtx.pConfig = pExpr->pConfig; for(i=0; rc==SQLITE_OK && i<pOrig->nTerm; i++){ int tflags = 0; Fts5ExprTerm *p; for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK; p=p->pSynonym){ - const char *zTerm = p->zTerm; - rc = fts5ParseTokenize((void*)&sCtx, tflags, zTerm, (int)strlen(zTerm), - 0, 0); + rc = fts5ParseTokenize((void*)&sCtx, tflags, p->pTerm,p->nFullTerm,0,0); tflags = FTS5_TOKEN_COLOCATED; } if( rc==SQLITE_OK ){ @@ -2296,11 +2304,13 @@ static Fts5ExprNode *fts5ParsePhraseToAnd( if( parseGrowPhraseArray(pParse) ){ fts5ExprPhraseFree(pPhrase); }else{ + Fts5ExprTerm *p = &pNear->apPhrase[0]->aTerm[ii]; + Fts5ExprTerm *pTo = &pPhrase->aTerm[0]; pParse->apPhrase[pParse->nPhrase++] = pPhrase; pPhrase->nTerm = 1; - pPhrase->aTerm[0].zTerm = sqlite3Fts5Strndup( - &pParse->rc, pNear->apPhrase[0]->aTerm[ii].zTerm, -1 - ); + pTo->pTerm = sqlite3Fts5Strndup(&pParse->rc, p->pTerm, p->nFullTerm); + pTo->nQueryTerm = p->nQueryTerm; + pTo->nFullTerm = p->nFullTerm; pRet->apChild[ii] = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, sqlite3Fts5ParseNearset(pParse, 0, pPhrase) ); @@ -2485,16 +2495,17 @@ static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){ /* Determine the maximum amount of space required. */ for(p=pTerm; p; p=p->pSynonym){ - nByte += (int)strlen(pTerm->zTerm) * 2 + 3 + 2; + nByte += pTerm->nQueryTerm * 2 + 3 + 2; } zQuoted = sqlite3_malloc64(nByte); if( zQuoted ){ int i = 0; for(p=pTerm; p; p=p->pSynonym){ - char *zIn = p->zTerm; + char *zIn = p->pTerm; + char *zEnd = &zIn[p->nQueryTerm]; zQuoted[i++] = '"'; - while( *zIn ){ + while( zIn<zEnd ){ if( *zIn=='"' ) zQuoted[i++] = '"'; zQuoted[i++] = *zIn++; } @@ -2572,8 +2583,10 @@ static char *fts5ExprPrintTcl( zRet = fts5PrintfAppend(zRet, " {"); for(iTerm=0; zRet && iTerm<pPhrase->nTerm; iTerm++){ - char *zTerm = pPhrase->aTerm[iTerm].zTerm; - zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" ", zTerm); + Fts5ExprTerm *p = &pPhrase->aTerm[iTerm]; + zRet = fts5PrintfAppend(zRet, "%s%.*s", iTerm==0?"":" ", + p->nQueryTerm, p->pTerm + ); if( pPhrase->aTerm[iTerm].bPrefix ){ zRet = fts5PrintfAppend(zRet, "*"); } @@ -2974,6 +2987,17 @@ static int fts5ExprColsetTest(Fts5Colset *pColset, int iCol){ return 0; } +/* +** pToken is a buffer nToken bytes in size that may or may not contain +** an embedded 0x00 byte. If it does, return the number of bytes in +** the buffer before the 0x00. If it does not, return nToken. +*/ +static int fts5QueryTerm(const char *pToken, int nToken){ + int ii; + for(ii=0; ii<nToken && pToken[ii]; ii++){} + return ii; +} + static int fts5ExprPopulatePoslistsCb( void *pCtx, /* Copy of 2nd argument to xTokenize() */ int tflags, /* Mask of FTS5_TOKEN_* flags */ @@ -2985,22 +3009,33 @@ static int fts5ExprPopulatePoslistsCb( Fts5ExprCtx *p = (Fts5ExprCtx*)pCtx; Fts5Expr *pExpr = p->pExpr; int i; + int nQuery = nToken; + i64 iRowid = pExpr->pRoot->iRowid; UNUSED_PARAM2(iUnused1, iUnused2); - if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; + if( nQuery>FTS5_MAX_TOKEN_SIZE ) nQuery = FTS5_MAX_TOKEN_SIZE; + if( pExpr->pConfig->bTokendata ){ + nQuery = fts5QueryTerm(pToken, nQuery); + } if( (tflags & FTS5_TOKEN_COLOCATED)==0 ) p->iOff++; for(i=0; i<pExpr->nPhrase; i++){ - Fts5ExprTerm *pTerm; + Fts5ExprTerm *pT; if( p->aPopulator[i].bOk==0 ) continue; - for(pTerm=&pExpr->apExprPhrase[i]->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){ - int nTerm = (int)strlen(pTerm->zTerm); - if( (nTerm==nToken || (nTerm<nToken && pTerm->bPrefix)) - && memcmp(pTerm->zTerm, pToken, nTerm)==0 + for(pT=&pExpr->apExprPhrase[i]->aTerm[0]; pT; pT=pT->pSynonym){ + if( (pT->nQueryTerm==nQuery || (pT->nQueryTerm<nQuery && pT->bPrefix)) + && memcmp(pT->pTerm, pToken, pT->nQueryTerm)==0 ){ int rc = sqlite3Fts5PoslistWriterAppend( &pExpr->apExprPhrase[i]->poslist, &p->aPopulator[i].writer, p->iOff ); + if( rc==SQLITE_OK && pExpr->pConfig->bTokendata && !pT->bPrefix ){ + int iCol = p->iOff>>32; + int iTokOff = p->iOff & 0x7FFFFFFF; + rc = sqlite3Fts5IndexIterWriteTokendata( + pT->pIter, pToken, nToken, iRowid, iCol, iTokOff + ); + } if( rc ) return rc; break; } @@ -3135,3 +3170,81 @@ int sqlite3Fts5ExprPhraseCollist( return rc; } + +/* +** Does the work of the fts5_api.xQueryToken() API method. +*/ +int sqlite3Fts5ExprQueryToken( + Fts5Expr *pExpr, + int iPhrase, + int iToken, + const char **ppOut, + int *pnOut +){ + Fts5ExprPhrase *pPhrase = 0; + + if( iPhrase<0 || iPhrase>=pExpr->nPhrase ){ + return SQLITE_RANGE; + } + pPhrase = pExpr->apExprPhrase[iPhrase]; + if( iToken<0 || iToken>=pPhrase->nTerm ){ + return SQLITE_RANGE; + } + + *ppOut = pPhrase->aTerm[iToken].pTerm; + *pnOut = pPhrase->aTerm[iToken].nFullTerm; + return SQLITE_OK; +} + +/* +** Does the work of the fts5_api.xInstToken() API method. +*/ +int sqlite3Fts5ExprInstToken( + Fts5Expr *pExpr, + i64 iRowid, + int iPhrase, + int iCol, + int iOff, + int iToken, + const char **ppOut, + int *pnOut +){ + Fts5ExprPhrase *pPhrase = 0; + Fts5ExprTerm *pTerm = 0; + int rc = SQLITE_OK; + + if( iPhrase<0 || iPhrase>=pExpr->nPhrase ){ + return SQLITE_RANGE; + } + pPhrase = pExpr->apExprPhrase[iPhrase]; + if( iToken<0 || iToken>=pPhrase->nTerm ){ + return SQLITE_RANGE; + } + pTerm = &pPhrase->aTerm[iToken]; + if( pTerm->bPrefix==0 ){ + if( pExpr->pConfig->bTokendata ){ + rc = sqlite3Fts5IterToken( + pTerm->pIter, iRowid, iCol, iOff+iToken, ppOut, pnOut + ); + }else{ + *ppOut = pTerm->pTerm; + *pnOut = pTerm->nFullTerm; + } + } + return rc; +} + +/* +** Clear the token mappings for all Fts5IndexIter objects mannaged by +** the expression passed as the only argument. +*/ +void sqlite3Fts5ExprClearTokens(Fts5Expr *pExpr){ + int ii; + for(ii=0; ii<pExpr->nPhrase; ii++){ + Fts5ExprTerm *pT; + for(pT=&pExpr->apExprPhrase[ii]->aTerm[0]; pT; pT=pT->pSynonym){ + sqlite3Fts5IndexIterClearTokendata(pT->pIter); + } + } +} + diff --git a/ext/fts5/fts5_hash.c b/ext/fts5/fts5_hash.c index 391791c7a..5e0959aa8 100644 --- a/ext/fts5/fts5_hash.c +++ b/ext/fts5/fts5_hash.c @@ -36,10 +36,15 @@ struct Fts5Hash { /* ** Each entry in the hash table is represented by an object of the -** following type. Each object, its key (a nul-terminated string) and -** its current data are stored in a single memory allocation. The -** key immediately follows the object in memory. The position list -** data immediately follows the key data in memory. +** following type. Each object, its key, and its current data are stored +** in a single memory allocation. The key immediately follows the object +** in memory. The position list data immediately follows the key data +** in memory. +** +** The key is Fts5HashEntry.nKey bytes in size. It consists of a single +** byte identifying the index (either the main term index or a prefix-index), +** followed by the term data. For example: "0token". There is no +** nul-terminator - in this case nKey=6. ** ** The data that follows the key is in a similar, but not identical format ** to the doclist data stored in the database. It is: @@ -174,8 +179,7 @@ static int fts5HashResize(Fts5Hash *pHash){ unsigned int iHash; Fts5HashEntry *p = apOld[i]; apOld[i] = p->pHashNext; - iHash = fts5HashKey(nNew, (u8*)fts5EntryKey(p), - (int)strlen(fts5EntryKey(p))); + iHash = fts5HashKey(nNew, (u8*)fts5EntryKey(p), p->nKey); p->pHashNext = apNew[iHash]; apNew[iHash] = p; } @@ -259,7 +263,7 @@ int sqlite3Fts5HashWrite( for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ char *zKey = fts5EntryKey(p); if( zKey[0]==bByte - && p->nKey==nToken + && p->nKey==nToken+1 && memcmp(&zKey[1], pToken, nToken)==0 ){ break; @@ -289,9 +293,9 @@ int sqlite3Fts5HashWrite( zKey[0] = bByte; memcpy(&zKey[1], pToken, nToken); assert( iHash==fts5HashKey(pHash->nSlot, (u8*)zKey, nToken+1) ); - p->nKey = nToken; + p->nKey = nToken+1; zKey[nToken+1] = '\0'; - p->nData = nToken+1 + 1 + sizeof(Fts5HashEntry); + p->nData = nToken+1 + sizeof(Fts5HashEntry); p->pHashNext = pHash->aSlot[iHash]; pHash->aSlot[iHash] = p; pHash->nEntry++; @@ -408,12 +412,17 @@ static Fts5HashEntry *fts5HashEntryMerge( *ppOut = p1; p1 = 0; }else{ - int i = 0; char *zKey1 = fts5EntryKey(p1); char *zKey2 = fts5EntryKey(p2); - while( zKey1[i]==zKey2[i] ) i++; + int nMin = MIN(p1->nKey, p2->nKey); + + int cmp = memcmp(zKey1, zKey2, nMin); + if( cmp==0 ){ + cmp = p1->nKey - p2->nKey; + } + assert( cmp!=0 ); - if( ((u8)zKey1[i])>((u8)zKey2[i]) ){ + if( cmp>0 ){ /* p2 is smaller */ *ppOut = p2; ppOut = &p2->pScanNext; @@ -455,7 +464,7 @@ static int fts5HashEntrySort( Fts5HashEntry *pIter; for(pIter=pHash->aSlot[iSlot]; pIter; pIter=pIter->pHashNext){ if( pTerm==0 - || (pIter->nKey+1>=nTerm && 0==memcmp(fts5EntryKey(pIter), pTerm, nTerm)) + || (pIter->nKey>=nTerm && 0==memcmp(fts5EntryKey(pIter), pTerm, nTerm)) ){ Fts5HashEntry *pEntry = pIter; pEntry->pScanNext = 0; @@ -494,12 +503,11 @@ int sqlite3Fts5HashQuery( for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ zKey = fts5EntryKey(p); - assert( p->nKey+1==(int)strlen(zKey) ); - if( nTerm==p->nKey+1 && memcmp(zKey, pTerm, nTerm)==0 ) break; + if( nTerm==p->nKey && memcmp(zKey, pTerm, nTerm)==0 ) break; } if( p ){ - int nHashPre = sizeof(Fts5HashEntry) + nTerm + 1; + int nHashPre = sizeof(Fts5HashEntry) + nTerm; int nList = p->nData - nHashPre; u8 *pRet = (u8*)(*ppOut = sqlite3_malloc64(nPre + nList + 10)); if( pRet ){ @@ -560,19 +568,22 @@ int sqlite3Fts5HashScanEof(Fts5Hash *p){ void sqlite3Fts5HashScanEntry( Fts5Hash *pHash, const char **pzTerm, /* OUT: term (nul-terminated) */ + int *pnTerm, /* OUT: Size of term in bytes */ const u8 **ppDoclist, /* OUT: pointer to doclist */ int *pnDoclist /* OUT: size of doclist in bytes */ ){ Fts5HashEntry *p; if( (p = pHash->pScan) ){ char *zKey = fts5EntryKey(p); - int nTerm = (int)strlen(zKey); + int nTerm = p->nKey; fts5HashAddPoslistSize(pHash, p, 0); *pzTerm = zKey; - *ppDoclist = (const u8*)&zKey[nTerm+1]; - *pnDoclist = p->nData - (sizeof(Fts5HashEntry) + nTerm + 1); + *pnTerm = nTerm; + *ppDoclist = (const u8*)&zKey[nTerm]; + *pnDoclist = p->nData - (sizeof(Fts5HashEntry) + nTerm); }else{ *pzTerm = 0; + *pnTerm = 0; *ppDoclist = 0; *pnDoclist = 0; } diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index c467addb8..9a2cc026b 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -323,6 +323,9 @@ typedef struct Fts5SegWriter Fts5SegWriter; typedef struct Fts5Structure Fts5Structure; typedef struct Fts5StructureLevel Fts5StructureLevel; typedef struct Fts5StructureSegment Fts5StructureSegment; +typedef struct Fts5TokenDataIter Fts5TokenDataIter; +typedef struct Fts5TokenDataMap Fts5TokenDataMap; +typedef struct Fts5TombstoneArray Fts5TombstoneArray; struct Fts5Data { u8 *p; /* Pointer to buffer containing record */ @@ -365,6 +368,7 @@ struct Fts5Index { sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */ sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=?" */ sqlite3_stmt *pIdxSelect; + sqlite3_stmt *pIdxNextSelect; int nRead; /* Total number of blocks read */ sqlite3_stmt *pDeleteFromIdx; @@ -518,8 +522,7 @@ struct Fts5SegIter { Fts5Data *pLeaf; /* Current leaf data */ Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */ i64 iLeafOffset; /* Byte offset within current leaf */ - Fts5Data **apTombstone; /* Array of tombstone pages */ - int nTombstone; + Fts5TombstoneArray *pTombArray; /* Array of tombstone pages */ /* Next method */ void (*xNext)(Fts5Index*, Fts5SegIter*, int*); @@ -547,6 +550,15 @@ struct Fts5SegIter { }; /* +** Array of tombstone pages. Reference counted. +*/ +struct Fts5TombstoneArray { + int nRef; /* Number of pointers to this object */ + int nTombstone; + Fts5Data *apTombstone[1]; /* Array of tombstone pages */ +}; + +/* ** Argument is a pointer to an Fts5Data structure that contains a ** leaf page. */ @@ -590,9 +602,16 @@ struct Fts5SegIter { ** poslist: ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered. ** There is no way to tell if this is populated or not. +** +** pColset: +** If not NULL, points to an object containing a set of column indices. +** Only matches that occur in one of these columns will be returned. +** The Fts5Iter does not own the Fts5Colset object, and so it is not +** freed when the iterator is closed - it is owned by the upper layer. */ struct Fts5Iter { Fts5IndexIter base; /* Base class containing output vars */ + Fts5TokenDataIter *pTokenDataIter; Fts5Index *pIndex; /* Index that owns this iterator */ Fts5Buffer poslist; /* Buffer containing current poslist */ @@ -610,7 +629,6 @@ struct Fts5Iter { Fts5SegIter aSeg[1]; /* Array of segment iterators */ }; - /* ** An instance of the following type is used to iterate through the contents ** of a doclist-index record. @@ -1909,18 +1927,20 @@ static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){ } /* -** Allocate a tombstone hash page array (pIter->apTombstone) for the -** iterator passed as the second argument. If an OOM error occurs, leave -** an error in the Fts5Index object. +** Allocate a tombstone hash page array object (pIter->pTombArray) for +** the iterator passed as the second argument. If an OOM error occurs, +** leave an error in the Fts5Index object. */ static void fts5SegIterAllocTombstone(Fts5Index *p, Fts5SegIter *pIter){ const int nTomb = pIter->pSeg->nPgTombstone; if( nTomb>0 ){ - Fts5Data **apTomb = 0; - apTomb = (Fts5Data**)sqlite3Fts5MallocZero(&p->rc, sizeof(Fts5Data)*nTomb); - if( apTomb ){ - pIter->apTombstone = apTomb; - pIter->nTombstone = nTomb; + int nByte = nTomb * sizeof(Fts5Data*) + sizeof(Fts5TombstoneArray); + Fts5TombstoneArray *pNew; + pNew = (Fts5TombstoneArray*)sqlite3Fts5MallocZero(&p->rc, nByte); + if( pNew ){ + pNew->nTombstone = nTomb; + pNew->nRef = 1; + pIter->pTombArray = pNew; } } } @@ -2177,15 +2197,16 @@ static void fts5SegIterNext_None( }else{ const u8 *pList = 0; const char *zTerm = 0; + int nTerm = 0; int nList; sqlite3Fts5HashScanNext(p->pHash); - sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList); + sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &nTerm, &pList, &nList); if( pList==0 ) goto next_none_eof; pIter->pLeaf->p = (u8*)pList; pIter->pLeaf->nn = nList; pIter->pLeaf->szLeaf = nList; pIter->iEndofDoclist = nList; - sqlite3Fts5BufferSet(&p->rc,&pIter->term, (int)strlen(zTerm), (u8*)zTerm); + sqlite3Fts5BufferSet(&p->rc,&pIter->term, nTerm, (u8*)zTerm); pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); } @@ -2251,11 +2272,12 @@ static void fts5SegIterNext( }else if( pIter->pSeg==0 ){ const u8 *pList = 0; const char *zTerm = 0; + int nTerm = 0; int nList = 0; assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm ); if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){ sqlite3Fts5HashScanNext(p->pHash); - sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList); + sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &nTerm, &pList, &nList); } if( pList==0 ){ fts5DataRelease(pIter->pLeaf); @@ -2265,8 +2287,7 @@ static void fts5SegIterNext( pIter->pLeaf->nn = nList; pIter->pLeaf->szLeaf = nList; pIter->iEndofDoclist = nList+1; - sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm), - (u8*)zTerm); + sqlite3Fts5BufferSet(&p->rc, &pIter->term, nTerm, (u8*)zTerm); pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); *pbNewTerm = 1; } @@ -2652,7 +2673,7 @@ static void fts5SegIterSeekInit( fts5LeafSeek(p, bGe, pIter, pTerm, nTerm); } - if( p->rc==SQLITE_OK && bGe==0 ){ + if( p->rc==SQLITE_OK && (bGe==0 || (flags & FTS5INDEX_QUERY_SCANONETERM)) ){ pIter->flags |= FTS5_SEGITER_ONETERM; if( pIter->pLeaf ){ if( flags & FTS5INDEX_QUERY_DESC ){ @@ -2668,7 +2689,9 @@ static void fts5SegIterSeekInit( } fts5SegIterSetNext(p, pIter); - fts5SegIterAllocTombstone(p, pIter); + if( 0==(flags & FTS5INDEX_QUERY_SCANONETERM) ){ + fts5SegIterAllocTombstone(p, pIter); + } /* Either: ** @@ -2685,6 +2708,79 @@ static void fts5SegIterSeekInit( ); } + +/* +** SQL used by fts5SegIterNextInit() to find the page to open. +*/ +static sqlite3_stmt *fts5IdxNextStmt(Fts5Index *p){ + if( p->pIdxNextSelect==0 ){ + Fts5Config *pConfig = p->pConfig; + fts5IndexPrepareStmt(p, &p->pIdxNextSelect, sqlite3_mprintf( + "SELECT pgno FROM '%q'.'%q_idx' WHERE " + "segid=? AND term>? ORDER BY term ASC LIMIT 1", + pConfig->zDb, pConfig->zName + )); + + } + return p->pIdxNextSelect; +} + +/* +** This is similar to fts5SegIterSeekInit(), except that it initializes +** the segment iterator to point to the first term following the page +** with pToken/nToken on it. +*/ +static void fts5SegIterNextInit( + Fts5Index *p, + const char *pTerm, int nTerm, + Fts5StructureSegment *pSeg, /* Description of segment */ + Fts5SegIter *pIter /* Object to populate */ +){ + int iPg = -1; /* Page of segment to open */ + int bDlidx = 0; + sqlite3_stmt *pSel = 0; /* SELECT to find iPg */ + + pSel = fts5IdxNextStmt(p); + if( pSel ){ + assert( p->rc==SQLITE_OK ); + sqlite3_bind_int(pSel, 1, pSeg->iSegid); + sqlite3_bind_blob(pSel, 2, pTerm, nTerm, SQLITE_STATIC); + + if( sqlite3_step(pSel)==SQLITE_ROW ){ + i64 val = sqlite3_column_int64(pSel, 0); + iPg = (int)(val>>1); + bDlidx = (val & 0x0001); + } + p->rc = sqlite3_reset(pSel); + sqlite3_bind_null(pSel, 2); + if( p->rc ) return; + } + + memset(pIter, 0, sizeof(*pIter)); + pIter->pSeg = pSeg; + pIter->flags |= FTS5_SEGITER_ONETERM; + if( iPg>=0 ){ + pIter->iLeafPgno = iPg - 1; + fts5SegIterNextPage(p, pIter); + fts5SegIterSetNext(p, pIter); + } + if( pIter->pLeaf ){ + const u8 *a = pIter->pLeaf->p; + int iTermOff = 0; + + pIter->iPgidxOff = pIter->pLeaf->szLeaf; + pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], iTermOff); + pIter->iLeafOffset = iTermOff; + fts5SegIterLoadTerm(p, pIter, 0); + fts5SegIterLoadNPos(p, pIter); + if( bDlidx ) fts5SegIterLoadDlidx(p, pIter); + + assert( p->rc!=SQLITE_OK || + fts5BufferCompareBlob(&pIter->term, (const u8*)pTerm, nTerm)>0 + ); + } +} + /* ** Initialize the object pIter to point to term pTerm/nTerm within the ** in-memory hash table. If there is no such term in the hash-table, the @@ -2711,8 +2807,7 @@ static void fts5SegIterHashInit( const u8 *pList = 0; p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm); - sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &pList, &nList); - n = (z ? (int)strlen((const char*)z) : 0); + sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &n, &pList, &nList); if( pList ){ pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data)); if( pLeaf ){ @@ -2772,13 +2867,30 @@ static void fts5IndexFreeArray(Fts5Data **ap, int n){ } /* +** Decrement the ref-count of the object passed as the only argument. If it +** reaches 0, free it and its contents. +*/ +static void fts5TombstoneArrayDelete(Fts5TombstoneArray *p){ + if( p ){ + p->nRef--; + if( p->nRef<=0 ){ + int ii; + for(ii=0; ii<p->nTombstone; ii++){ + fts5DataRelease(p->apTombstone[ii]); + } + sqlite3_free(p); + } + } +} + +/* ** Zero the iterator passed as the only argument. */ static void fts5SegIterClear(Fts5SegIter *pIter){ fts5BufferFree(&pIter->term); fts5DataRelease(pIter->pLeaf); fts5DataRelease(pIter->pNextLeaf); - fts5IndexFreeArray(pIter->apTombstone, pIter->nTombstone); + fts5TombstoneArrayDelete(pIter->pTombArray); fts5DlidxIterFree(pIter->pDlidx); sqlite3_free(pIter->aRowidOffset); memset(pIter, 0, sizeof(Fts5SegIter)); @@ -3023,7 +3135,6 @@ static void fts5SegIterNextFrom( }while( p->rc==SQLITE_OK ); } - /* ** Free the iterator object passed as the second argument. */ @@ -3168,24 +3279,25 @@ static int fts5IndexTombstoneQuery( static int fts5MultiIterIsDeleted(Fts5Iter *pIter){ int iFirst = pIter->aFirst[1].iFirst; Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; + Fts5TombstoneArray *pArray = pSeg->pTombArray; - if( pSeg->pLeaf && pSeg->nTombstone ){ + if( pSeg->pLeaf && pArray ){ /* Figure out which page the rowid might be present on. */ - int iPg = ((u64)pSeg->iRowid) % pSeg->nTombstone; + int iPg = ((u64)pSeg->iRowid) % pArray->nTombstone; assert( iPg>=0 ); /* If tombstone hash page iPg has not yet been loaded from the ** database, load it now. */ - if( pSeg->apTombstone[iPg]==0 ){ - pSeg->apTombstone[iPg] = fts5DataRead(pIter->pIndex, + if( pArray->apTombstone[iPg]==0 ){ + pArray->apTombstone[iPg] = fts5DataRead(pIter->pIndex, FTS5_TOMBSTONE_ROWID(pSeg->pSeg->iSegid, iPg) ); - if( pSeg->apTombstone[iPg]==0 ) return 0; + if( pArray->apTombstone[iPg]==0 ) return 0; } return fts5IndexTombstoneQuery( - pSeg->apTombstone[iPg], - pSeg->nTombstone, + pArray->apTombstone[iPg], + pArray->nTombstone, pSeg->iRowid ); } @@ -3724,6 +3836,32 @@ static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){ } } +/* +** All the component segment-iterators of pIter have been set up. This +** functions finishes setup for iterator pIter itself. +*/ +static void fts5MultiIterFinishSetup(Fts5Index *p, Fts5Iter *pIter){ + int iIter; + for(iIter=pIter->nSeg-1; iIter>0; iIter--){ + int iEq; + if( (iEq = fts5MultiIterDoCompare(pIter, iIter)) ){ + Fts5SegIter *pSeg = &pIter->aSeg[iEq]; + if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0); + fts5MultiIterAdvanced(p, pIter, iEq, iIter); + } + } + fts5MultiIterSetEof(pIter); + fts5AssertMultiIterSetup(p, pIter); + + if( (pIter->bSkipEmpty && fts5MultiIterIsEmpty(p, pIter)) + || fts5MultiIterIsDeleted(pIter) + ){ + fts5MultiIterNext(p, pIter, 0, 0); + }else if( pIter->base.bEof==0 ){ + Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; + pIter->xSetOutputs(pIter, pSeg); + } +} /* ** Allocate a new Fts5Iter object. @@ -3805,31 +3943,12 @@ static void fts5MultiIterNew( assert( iIter==nSeg ); } - /* If the above was successful, each component iterators now points + /* If the above was successful, each component iterator now points ** to the first entry in its segment. In this case initialize the ** aFirst[] array. Or, if an error has occurred, free the iterator ** object and set the output variable to NULL. */ if( p->rc==SQLITE_OK ){ - for(iIter=pNew->nSeg-1; iIter>0; iIter--){ - int iEq; - if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){ - Fts5SegIter *pSeg = &pNew->aSeg[iEq]; - if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0); - fts5MultiIterAdvanced(p, pNew, iEq, iIter); - } - } - fts5MultiIterSetEof(pNew); - fts5AssertMultiIterSetup(p, pNew); - - if( (pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew)) - || fts5MultiIterIsDeleted(pNew) - ){ - fts5MultiIterNext(p, pNew, 0, 0); - }else if( pNew->base.bEof==0 ){ - Fts5SegIter *pSeg = &pNew->aSeg[pNew->aFirst[1].iFirst]; - pNew->xSetOutputs(pNew, pSeg); - } - + fts5MultiIterFinishSetup(p, pNew); }else{ fts5MultiIterFree(pNew); *ppOut = 0; @@ -3854,7 +3973,6 @@ static void fts5MultiIterNew2( pNew = fts5MultiIterAlloc(p, 2); if( pNew ){ Fts5SegIter *pIter = &pNew->aSeg[1]; - pIter->flags = FTS5_SEGITER_ONETERM; if( pData->szLeaf>0 ){ pIter->pLeaf = pData; @@ -4217,7 +4335,7 @@ static void fts5WriteDlidxAppend( } if( pDlidx->bPrevValid ){ - iVal = iRowid - pDlidx->iPrev; + iVal = (u64)iRowid - (u64)pDlidx->iPrev; }else{ i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno); assert( pDlidx->buf.n==0 ); @@ -5340,10 +5458,10 @@ static void fts5FlushSecureDelete( Fts5Index *p, Fts5Structure *pStruct, const char *zTerm, + int nTerm, i64 iRowid ){ const int f = FTS5INDEX_QUERY_SKIPHASH; - int nTerm = (int)strlen(zTerm); Fts5Iter *pIter = 0; /* Used to find term instance */ fts5MultiIterNew(p, pStruct, f, 0, (const u8*)zTerm, nTerm, -1, 0, &pIter); @@ -5417,8 +5535,7 @@ static void fts5FlushOneHash(Fts5Index *p){ int nDoclist; /* Size of doclist in bytes */ /* Get the term and doclist for this entry. */ - sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist); - nTerm = (int)strlen(zTerm); + sqlite3Fts5HashScanEntry(pHash, &zTerm, &nTerm, &pDoclist, &nDoclist); if( bSecureDelete==0 ){ fts5WriteAppendTerm(p, &writer, nTerm, (const u8*)zTerm); if( p->rc!=SQLITE_OK ) break; @@ -5448,7 +5565,7 @@ static void fts5FlushOneHash(Fts5Index *p){ if( bSecureDelete ){ if( eDetail==FTS5_DETAIL_NONE ){ if( iOff<nDoclist && pDoclist[iOff]==0x00 ){ - fts5FlushSecureDelete(p, pStruct, zTerm, iRowid); + fts5FlushSecureDelete(p, pStruct, zTerm, nTerm, iRowid); iOff++; if( iOff<nDoclist && pDoclist[iOff]==0x00 ){ iOff++; @@ -5458,7 +5575,7 @@ static void fts5FlushOneHash(Fts5Index *p){ } } }else if( (pDoclist[iOff] & 0x01) ){ - fts5FlushSecureDelete(p, pStruct, zTerm, iRowid); + fts5FlushSecureDelete(p, pStruct, zTerm, nTerm, iRowid); if( p->rc!=SQLITE_OK || pDoclist[iOff]==0x01 ){ iOff++; continue; @@ -6078,7 +6195,7 @@ static void fts5SetupPrefixIter( u8 *pToken, /* Buffer containing prefix to match */ int nToken, /* Size of buffer pToken in bytes */ Fts5Colset *pColset, /* Restrict matches to these columns */ - Fts5Iter **ppIter /* OUT: New iterator */ + Fts5Iter **ppIter /* OUT: New iterator */ ){ Fts5Structure *pStruct; Fts5Buffer *aBuf; @@ -6099,8 +6216,9 @@ static void fts5SetupPrefixIter( aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); pStruct = fts5StructureRead(p); + assert( p->rc!=SQLITE_OK || (aBuf && pStruct) ); - if( aBuf && pStruct ){ + if( p->rc==SQLITE_OK ){ const int flags = FTS5INDEX_QUERY_SCAN | FTS5INDEX_QUERY_SKIPEMPTY | FTS5INDEX_QUERY_NOOUTPUT; @@ -6112,6 +6230,12 @@ static void fts5SetupPrefixIter( int bNewTerm = 1; memset(&doclist, 0, sizeof(doclist)); + + /* If iIdx is non-zero, then it is the number of a prefix-index for + ** prefixes 1 character longer than the prefix being queried for. That + ** index contains all the doclists required, except for the one + ** corresponding to the prefix itself. That one is extracted from the + ** main term index here. */ if( iIdx!=0 ){ int dummy = 0; const int f2 = FTS5INDEX_QUERY_SKIPEMPTY|FTS5INDEX_QUERY_NOOUTPUT; @@ -6135,6 +6259,7 @@ static void fts5SetupPrefixIter( pToken[0] = FTS5_MAIN_PREFIX + iIdx; fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1); fts5IterSetOutputCb(&p->rc, p1); + for( /* no-op */ ; fts5MultiIterEof(p, p1)==0; fts5MultiIterNext2(p, p1, &bNewTerm) @@ -6150,7 +6275,6 @@ static void fts5SetupPrefixIter( } if( p1->base.nData==0 ) continue; - if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){ for(i=0; p->rc==SQLITE_OK && doclist.n; i++){ int i1 = i*nMerge; @@ -6189,7 +6313,7 @@ static void fts5SetupPrefixIter( } fts5MultiIterFree(p1); - pData = fts5IdxMalloc(p, sizeof(Fts5Data)+doclist.n+FTS5_DATA_ZERO_PADDING); + pData = fts5IdxMalloc(p, sizeof(*pData)+doclist.n+FTS5_DATA_ZERO_PADDING); if( pData ){ pData->p = (u8*)&pData[1]; pData->nn = pData->szLeaf = doclist.n; @@ -6332,6 +6456,7 @@ int sqlite3Fts5IndexClose(Fts5Index *p){ sqlite3_finalize(p->pIdxWriter); sqlite3_finalize(p->pIdxDeleter); sqlite3_finalize(p->pIdxSelect); + sqlite3_finalize(p->pIdxNextSelect); sqlite3_finalize(p->pDataVersion); sqlite3_finalize(p->pDeleteFromIdx); sqlite3Fts5HashFree(p->pHash); @@ -6428,6 +6553,451 @@ int sqlite3Fts5IndexWrite( } /* +** pToken points to a buffer of size nToken bytes containing a search +** term, including the index number at the start, used on a tokendata=1 +** table. This function returns true if the term in buffer pBuf matches +** token pToken/nToken. +*/ +static int fts5IsTokendataPrefix( + Fts5Buffer *pBuf, + const u8 *pToken, + int nToken +){ + return ( + pBuf->n>=nToken + && 0==memcmp(pBuf->p, pToken, nToken) + && (pBuf->n==nToken || pBuf->p[nToken]==0x00) + ); +} + +/* +** Ensure the segment-iterator passed as the only argument points to EOF. +*/ +static void fts5SegIterSetEOF(Fts5SegIter *pSeg){ + fts5DataRelease(pSeg->pLeaf); + pSeg->pLeaf = 0; +} + +/* +** Usually, a tokendata=1 iterator (struct Fts5TokenDataIter) accumulates an +** array of these for each row it visits. Or, for an iterator used by an +** "ORDER BY rank" query, it accumulates an array of these for the entire +** query. +** +** Each instance in the array indicates the iterator (and therefore term) +** associated with position iPos of rowid iRowid. This is used by the +** xInstToken() API. +*/ +struct Fts5TokenDataMap { + i64 iRowid; /* Row this token is located in */ + i64 iPos; /* Position of token */ + int iIter; /* Iterator token was read from */ +}; + +/* +** An object used to supplement Fts5Iter for tokendata=1 iterators. +*/ +struct Fts5TokenDataIter { + int nIter; + int nIterAlloc; + + int nMap; + int nMapAlloc; + Fts5TokenDataMap *aMap; + + Fts5PoslistReader *aPoslistReader; + int *aPoslistToIter; + Fts5Iter *apIter[1]; +}; + +/* +** This function appends iterator pAppend to Fts5TokenDataIter pIn and +** returns the result. +*/ +static Fts5TokenDataIter *fts5AppendTokendataIter( + Fts5Index *p, /* Index object (for error code) */ + Fts5TokenDataIter *pIn, /* Current Fts5TokenDataIter struct */ + Fts5Iter *pAppend /* Append this iterator */ +){ + Fts5TokenDataIter *pRet = pIn; + + if( p->rc==SQLITE_OK ){ + if( pIn==0 || pIn->nIter==pIn->nIterAlloc ){ + int nAlloc = pIn ? pIn->nIterAlloc*2 : 16; + int nByte = nAlloc * sizeof(Fts5Iter*) + sizeof(Fts5TokenDataIter); + Fts5TokenDataIter *pNew = (Fts5TokenDataIter*)sqlite3_realloc(pIn, nByte); + + if( pNew==0 ){ + p->rc = SQLITE_NOMEM; + }else{ + if( pIn==0 ) memset(pNew, 0, nByte); + pRet = pNew; + pNew->nIterAlloc = nAlloc; + } + } + } + if( p->rc ){ + sqlite3Fts5IterClose((Fts5IndexIter*)pAppend); + }else{ + pRet->apIter[pRet->nIter++] = pAppend; + } + assert( pRet==0 || pRet->nIter<=pRet->nIterAlloc ); + + return pRet; +} + +/* +** Delete an Fts5TokenDataIter structure and its contents. +*/ +static void fts5TokendataIterDelete(Fts5TokenDataIter *pSet){ + if( pSet ){ + int ii; + for(ii=0; ii<pSet->nIter; ii++){ + fts5MultiIterFree(pSet->apIter[ii]); + } + sqlite3_free(pSet->aPoslistReader); + sqlite3_free(pSet->aMap); + sqlite3_free(pSet); + } +} + +/* +** Append a mapping to the token-map belonging to object pT. +*/ +static void fts5TokendataIterAppendMap( + Fts5Index *p, + Fts5TokenDataIter *pT, + int iIter, + i64 iRowid, + i64 iPos +){ + if( p->rc==SQLITE_OK ){ + if( pT->nMap==pT->nMapAlloc ){ + int nNew = pT->nMapAlloc ? pT->nMapAlloc*2 : 64; + int nByte = nNew * sizeof(Fts5TokenDataMap); + Fts5TokenDataMap *aNew; + + aNew = (Fts5TokenDataMap*)sqlite3_realloc(pT->aMap, nByte); + if( aNew==0 ){ + p->rc = SQLITE_NOMEM; + return; + } + + pT->aMap = aNew; + pT->nMapAlloc = nNew; + } + + pT->aMap[pT->nMap].iRowid = iRowid; + pT->aMap[pT->nMap].iPos = iPos; + pT->aMap[pT->nMap].iIter = iIter; + pT->nMap++; + } +} + +/* +** The iterator passed as the only argument must be a tokendata=1 iterator +** (pIter->pTokenDataIter!=0). This function sets the iterator output +** variables (pIter->base.*) according to the contents of the current +** row. +*/ +static void fts5IterSetOutputsTokendata(Fts5Iter *pIter){ + int ii; + int nHit = 0; + i64 iRowid = SMALLEST_INT64; + int iMin = 0; + + Fts5TokenDataIter *pT = pIter->pTokenDataIter; + + pIter->base.nData = 0; + pIter->base.pData = 0; + + for(ii=0; ii<pT->nIter; ii++){ + Fts5Iter *p = pT->apIter[ii]; + if( p->base.bEof==0 ){ + if( nHit==0 || p->base.iRowid<iRowid ){ + iRowid = p->base.iRowid; + nHit = 1; + pIter->base.pData = p->base.pData; + pIter->base.nData = p->base.nData; + iMin = ii; + }else if( p->base.iRowid==iRowid ){ + nHit++; + } + } + } + + if( nHit==0 ){ + pIter->base.bEof = 1; + }else{ + int eDetail = pIter->pIndex->pConfig->eDetail; + pIter->base.bEof = 0; + pIter->base.iRowid = iRowid; + + if( nHit==1 && eDetail==FTS5_DETAIL_FULL ){ + fts5TokendataIterAppendMap(pIter->pIndex, pT, iMin, iRowid, -1); + }else + if( nHit>1 && eDetail!=FTS5_DETAIL_NONE ){ + int nReader = 0; + int nByte = 0; + i64 iPrev = 0; + + /* Allocate array of iterators if they are not already allocated. */ + if( pT->aPoslistReader==0 ){ + int nByte = pT->nIter * (sizeof(Fts5PoslistReader) + sizeof(int)); + pT->aPoslistReader = (Fts5PoslistReader*)sqlite3Fts5MallocZero( + &pIter->pIndex->rc, nByte + ); + if( pT->aPoslistReader==0 ) return; + pT->aPoslistToIter = (int*)&pT->aPoslistReader[pT->nIter]; + } + + /* Populate an iterator for each poslist that will be merged */ + for(ii=0; ii<pT->nIter; ii++){ + Fts5Iter *p = pT->apIter[ii]; + if( iRowid==p->base.iRowid ){ + pT->aPoslistToIter[nReader] = ii; + sqlite3Fts5PoslistReaderInit( + p->base.pData, p->base.nData, &pT->aPoslistReader[nReader++] + ); + nByte += p->base.nData; + } + } + + /* Ensure the output buffer is large enough */ + if( fts5BufferGrow(&pIter->pIndex->rc, &pIter->poslist, nByte+nHit*10) ){ + return; + } + + /* Ensure the token-mapping is large enough */ + if( eDetail==FTS5_DETAIL_FULL && pT->nMapAlloc<(pT->nMap + nByte) ){ + int nNew = (pT->nMapAlloc + nByte) * 2; + Fts5TokenDataMap *aNew = (Fts5TokenDataMap*)sqlite3_realloc( + pT->aMap, nNew*sizeof(Fts5TokenDataMap) + ); + if( aNew==0 ){ + pIter->pIndex->rc = SQLITE_NOMEM; + return; + } + pT->aMap = aNew; + pT->nMapAlloc = nNew; + } + + pIter->poslist.n = 0; + + while( 1 ){ + i64 iMinPos = LARGEST_INT64; + + /* Find smallest position */ + iMin = 0; + for(ii=0; ii<nReader; ii++){ + Fts5PoslistReader *pReader = &pT->aPoslistReader[ii]; + if( pReader->bEof==0 ){ + if( pReader->iPos<iMinPos ){ + iMinPos = pReader->iPos; + iMin = ii; + } + } + } + + /* If all readers were at EOF, break out of the loop. */ + if( iMinPos==LARGEST_INT64 ) break; + + sqlite3Fts5PoslistSafeAppend(&pIter->poslist, &iPrev, iMinPos); + sqlite3Fts5PoslistReaderNext(&pT->aPoslistReader[iMin]); + + if( eDetail==FTS5_DETAIL_FULL ){ + pT->aMap[pT->nMap].iPos = iMinPos; + pT->aMap[pT->nMap].iIter = pT->aPoslistToIter[iMin]; + pT->aMap[pT->nMap].iRowid = iRowid; + pT->nMap++; + } + } + + pIter->base.pData = pIter->poslist.p; + pIter->base.nData = pIter->poslist.n; + } + } +} + +/* +** The iterator passed as the only argument must be a tokendata=1 iterator +** (pIter->pTokenDataIter!=0). This function advances the iterator. If +** argument bFrom is false, then the iterator is advanced to the next +** entry. Or, if bFrom is true, it is advanced to the first entry with +** a rowid of iFrom or greater. +*/ +static void fts5TokendataIterNext(Fts5Iter *pIter, int bFrom, i64 iFrom){ + int ii; + Fts5TokenDataIter *pT = pIter->pTokenDataIter; + + for(ii=0; ii<pT->nIter; ii++){ + Fts5Iter *p = pT->apIter[ii]; + if( p->base.bEof==0 + && (p->base.iRowid==pIter->base.iRowid || (bFrom && p->base.iRowid<iFrom)) + ){ + fts5MultiIterNext(p->pIndex, p, bFrom, iFrom); + while( bFrom && p->base.bEof==0 + && p->base.iRowid<iFrom + && p->pIndex->rc==SQLITE_OK + ){ + fts5MultiIterNext(p->pIndex, p, 0, 0); + } + } + } + + fts5IterSetOutputsTokendata(pIter); +} + +/* +** If the segment-iterator passed as the first argument is at EOF, then +** set pIter->term to a copy of buffer pTerm. +*/ +static void fts5TokendataSetTermIfEof(Fts5Iter *pIter, Fts5Buffer *pTerm){ + if( pIter && pIter->aSeg[0].pLeaf==0 ){ + fts5BufferSet(&pIter->pIndex->rc, &pIter->aSeg[0].term, pTerm->n, pTerm->p); + } +} + +/* +** This function sets up an iterator to use for a non-prefix query on a +** tokendata=1 table. +*/ +static Fts5Iter *fts5SetupTokendataIter( + Fts5Index *p, /* FTS index to query */ + const u8 *pToken, /* Buffer containing query term */ + int nToken, /* Size of buffer pToken in bytes */ + Fts5Colset *pColset /* Colset to filter on */ +){ + Fts5Iter *pRet = 0; + Fts5TokenDataIter *pSet = 0; + Fts5Structure *pStruct = 0; + const int flags = FTS5INDEX_QUERY_SCANONETERM | FTS5INDEX_QUERY_SCAN; + + Fts5Buffer bSeek = {0, 0, 0}; + Fts5Buffer *pSmall = 0; + + fts5IndexFlush(p); + pStruct = fts5StructureRead(p); + + while( 1 ){ + Fts5Iter *pPrev = pSet ? pSet->apIter[pSet->nIter-1] : 0; + Fts5Iter *pNew = 0; + Fts5SegIter *pNewIter = 0; + Fts5SegIter *pPrevIter = 0; + + int iLvl, iSeg, ii; + + pNew = fts5MultiIterAlloc(p, pStruct->nSegment); + if( pNew==0 ) break; + + if( pSmall ){ + fts5BufferSet(&p->rc, &bSeek, pSmall->n, pSmall->p); + fts5BufferAppendBlob(&p->rc, &bSeek, 1, (const u8*)"\0"); + }else{ + fts5BufferSet(&p->rc, &bSeek, nToken, pToken); + } + + pNewIter = &pNew->aSeg[0]; + pPrevIter = (pPrev ? &pPrev->aSeg[0] : 0); + for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ + for(iSeg=pStruct->aLevel[iLvl].nSeg-1; iSeg>=0; iSeg--){ + Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; + int bDone = 0; + + if( pPrevIter ){ + if( fts5BufferCompare(pSmall, &pPrevIter->term) ){ + memcpy(pNewIter, pPrevIter, sizeof(Fts5SegIter)); + memset(pPrevIter, 0, sizeof(Fts5SegIter)); + bDone = 1; + }else if( pPrevIter->iEndofDoclist>pPrevIter->pLeaf->szLeaf ){ + fts5SegIterNextInit(p,(const char*)bSeek.p,bSeek.n-1,pSeg,pNewIter); + bDone = 1; + } + } + + if( bDone==0 ){ + fts5SegIterSeekInit(p, bSeek.p, bSeek.n, flags, pSeg, pNewIter); + } + + if( pPrevIter ){ + if( pPrevIter->pTombArray ){ + pNewIter->pTombArray = pPrevIter->pTombArray; + pNewIter->pTombArray->nRef++; + } + }else{ + fts5SegIterAllocTombstone(p, pNewIter); + } + + pNewIter++; + if( pPrevIter ) pPrevIter++; + } + } + fts5TokendataSetTermIfEof(pPrev, pSmall); + + pNew->bSkipEmpty = 1; + pNew->pColset = pColset; + fts5IterSetOutputCb(&p->rc, pNew); + + /* Loop through all segments in the new iterator. Find the smallest + ** term that any segment-iterator points to. Iterator pNew will be + ** used for this term. Also, set any iterator that points to a term that + ** does not match pToken/nToken to point to EOF */ + pSmall = 0; + for(ii=0; ii<pNew->nSeg; ii++){ + Fts5SegIter *pII = &pNew->aSeg[ii]; + if( 0==fts5IsTokendataPrefix(&pII->term, pToken, nToken) ){ + fts5SegIterSetEOF(pII); + } + if( pII->pLeaf && (!pSmall || fts5BufferCompare(pSmall, &pII->term)>0) ){ + pSmall = &pII->term; + } + } + + /* If pSmall is still NULL at this point, then the new iterator does + ** not point to any terms that match the query. So delete it and break + ** out of the loop - all required iterators have been collected. */ + if( pSmall==0 ){ + sqlite3Fts5IterClose((Fts5IndexIter*)pNew); + break; + } + + /* Append this iterator to the set and continue. */ + pSet = fts5AppendTokendataIter(p, pSet, pNew); + } + + if( p->rc==SQLITE_OK && pSet ){ + int ii; + for(ii=0; ii<pSet->nIter; ii++){ + Fts5Iter *pIter = pSet->apIter[ii]; + int iSeg; + for(iSeg=0; iSeg<pIter->nSeg; iSeg++){ + pIter->aSeg[iSeg].flags |= FTS5_SEGITER_ONETERM; + } + fts5MultiIterFinishSetup(p, pIter); + } + } + + if( p->rc==SQLITE_OK ){ + pRet = fts5MultiIterAlloc(p, 0); + } + if( pRet ){ + pRet->pTokenDataIter = pSet; + if( pSet ){ + fts5IterSetOutputsTokendata(pRet); + }else{ + pRet->base.bEof = 1; + } + }else{ + fts5TokendataIterDelete(pSet); + } + + fts5StructureRelease(pStruct); + fts5BufferFree(&bSeek); + return pRet; +} + + +/* ** Open a new iterator to iterate though all rowid that match the ** specified token or token prefix. */ @@ -6448,6 +7018,7 @@ int sqlite3Fts5IndexQuery( if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){ int iIdx = 0; /* Index to search */ int iPrefixIdx = 0; /* +1 prefix index */ + int bTokendata = pConfig->bTokendata; if( nToken>0 ) memcpy(&buf.p[1], pToken, nToken); /* Figure out which index to search and set iIdx accordingly. If this @@ -6461,6 +7032,7 @@ int sqlite3Fts5IndexQuery( ** for internal sanity checking by the integrity-check in debug ** mode only. */ #ifdef SQLITE_DEBUG + if( flags & FTS5INDEX_QUERY_NOTOKENDATA ) bTokendata = 0; if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){ assert( flags & FTS5INDEX_QUERY_PREFIX ); iIdx = 1+pConfig->nPrefix; @@ -6475,7 +7047,10 @@ int sqlite3Fts5IndexQuery( } } - if( iIdx<=pConfig->nPrefix ){ + if( bTokendata && iIdx==0 ){ + buf.p[0] = '0'; + pRet = fts5SetupTokendataIter(p, buf.p, nToken+1, pColset); + }else if( iIdx<=pConfig->nPrefix ){ /* Straight index lookup */ Fts5Structure *pStruct = fts5StructureRead(p); buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx); @@ -6522,7 +7097,11 @@ int sqlite3Fts5IndexQuery( int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; assert( pIter->pIndex->rc==SQLITE_OK ); - fts5MultiIterNext(pIter->pIndex, pIter, 0, 0); + if( pIter->pTokenDataIter ){ + fts5TokendataIterNext(pIter, 0, 0); + }else{ + fts5MultiIterNext(pIter->pIndex, pIter, 0, 0); + } return fts5IndexReturn(pIter->pIndex); } @@ -6555,7 +7134,11 @@ int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){ */ int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; - fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch); + if( pIter->pTokenDataIter ){ + fts5TokendataIterNext(pIter, 1, iMatch); + }else{ + fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch); + } return fts5IndexReturn(pIter->pIndex); } @@ -6571,12 +7154,106 @@ const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){ } /* +** This is used by xInstToken() to access the token at offset iOff, column +** iCol of row iRowid. The token is returned via output variables *ppOut +** and *pnOut. The iterator passed as the first argument must be a tokendata=1 +** iterator (pIter->pTokenDataIter!=0). +*/ +int sqlite3Fts5IterToken( + Fts5IndexIter *pIndexIter, + i64 iRowid, + int iCol, + int iOff, + const char **ppOut, int *pnOut +){ + Fts5Iter *pIter = (Fts5Iter*)pIndexIter; + Fts5TokenDataIter *pT = pIter->pTokenDataIter; + Fts5TokenDataMap *aMap = pT->aMap; + i64 iPos = (((i64)iCol)<<32) + iOff; + + int i1 = 0; + int i2 = pT->nMap; + int iTest = 0; + + while( i2>i1 ){ + iTest = (i1 + i2) / 2; + + if( aMap[iTest].iRowid<iRowid ){ + i1 = iTest+1; + }else if( aMap[iTest].iRowid>iRowid ){ + i2 = iTest; + }else{ + if( aMap[iTest].iPos<iPos ){ + if( aMap[iTest].iPos<0 ){ + break; + } + i1 = iTest+1; + }else if( aMap[iTest].iPos>iPos ){ + i2 = iTest; + }else{ + break; + } + } + } + + if( i2>i1 ){ + Fts5Iter *pMap = pT->apIter[aMap[iTest].iIter]; + *ppOut = (const char*)pMap->aSeg[0].term.p+1; + *pnOut = pMap->aSeg[0].term.n-1; + } + + return SQLITE_OK; +} + +/* +** Clear any existing entries from the token-map associated with the +** iterator passed as the only argument. +*/ +void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter *pIndexIter){ + Fts5Iter *pIter = (Fts5Iter*)pIndexIter; + if( pIter->pTokenDataIter ){ + pIter->pTokenDataIter->nMap = 0; + } +} + +/* +** Set a token-mapping for the iterator passed as the first argument. This +** is used in detail=column or detail=none mode when a token is requested +** using the xInstToken() API. In this case the caller tokenizers the +** current row and configures the token-mapping via multiple calls to this +** function. +*/ +int sqlite3Fts5IndexIterWriteTokendata( + Fts5IndexIter *pIndexIter, + const char *pToken, int nToken, + i64 iRowid, int iCol, int iOff +){ + Fts5Iter *pIter = (Fts5Iter*)pIndexIter; + Fts5TokenDataIter *pT = pIter->pTokenDataIter; + Fts5Index *p = pIter->pIndex; + int ii; + + assert( p->pConfig->eDetail!=FTS5_DETAIL_FULL ); + assert( pIter->pTokenDataIter ); + + for(ii=0; ii<pT->nIter; ii++){ + Fts5Buffer *pTerm = &pT->apIter[ii]->aSeg[0].term; + if( nToken==pTerm->n-1 && memcmp(pToken, pTerm->p+1, nToken)==0 ) break; + } + if( ii<pT->nIter ){ + fts5TokendataIterAppendMap(p, pT, ii, iRowid, (((i64)iCol)<<32) + iOff); + } + return fts5IndexReturn(p); +} + +/* ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery(). */ void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){ if( pIndexIter ){ Fts5Iter *pIter = (Fts5Iter*)pIndexIter; Fts5Index *pIndex = pIter->pIndex; + fts5TokendataIterDelete(pIter->pTokenDataIter); fts5MultiIterFree(pIter); sqlite3Fts5IndexCloseReader(pIndex); } @@ -7084,7 +7761,9 @@ static int fts5QueryCksum( int eDetail = p->pConfig->eDetail; u64 cksum = *pCksum; Fts5IndexIter *pIter = 0; - int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter); + int rc = sqlite3Fts5IndexQuery( + p, z, n, (flags | FTS5INDEX_QUERY_NOTOKENDATA), 0, &pIter + ); while( rc==SQLITE_OK && ALWAYS(pIter!=0) && 0==sqlite3Fts5IterEof(pIter) ){ i64 rowid = pIter->iRowid; @@ -7783,6 +8462,24 @@ static void fts5DecodeRowidList( #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) +static void fts5BufferAppendTerm(int *pRc, Fts5Buffer *pBuf, Fts5Buffer *pTerm){ + int ii; + fts5BufferGrow(pRc, pBuf, pTerm->n*2 + 1); + if( *pRc==SQLITE_OK ){ + for(ii=0; ii<pTerm->n; ii++){ + if( pTerm->p[ii]==0x00 ){ + pBuf->p[pBuf->n++] = '\\'; + pBuf->p[pBuf->n++] = '0'; + }else{ + pBuf->p[pBuf->n++] = pTerm->p[ii]; + } + } + pBuf->p[pBuf->n] = 0x00; + } +} +#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ + +#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) /* ** The implementation of user-defined scalar function fts5_decode(). */ @@ -7889,9 +8586,8 @@ static void fts5DecodeFunction( iOff += fts5GetVarint32(&a[iOff], nAppend); term.n = nKeep; fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff]); - sqlite3Fts5BufferAppendPrintf( - &rc, &s, " term=%.*s", term.n, (const char*)term.p - ); + sqlite3Fts5BufferAppendPrintf(&rc, &s, " term="); + fts5BufferAppendTerm(&rc, &s, &term); iOff += nAppend; /* Figure out where the doclist for this term ends */ @@ -7999,9 +8695,8 @@ static void fts5DecodeFunction( fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]); iOff += nByte; - sqlite3Fts5BufferAppendPrintf( - &rc, &s, " term=%.*s", term.n, (const char*)term.p - ); + sqlite3Fts5BufferAppendPrintf(&rc, &s, " term="); + fts5BufferAppendTerm(&rc, &s, &term); iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff); } diff --git a/ext/fts5/fts5_main.c b/ext/fts5/fts5_main.c index 6e86ca595..d35e998da 100644 --- a/ext/fts5/fts5_main.c +++ b/ext/fts5/fts5_main.c @@ -656,12 +656,15 @@ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ } idxStr[iIdxStr] = '\0'; - /* Set idxFlags flags for the ORDER BY clause */ + /* Set idxFlags flags for the ORDER BY clause + ** + ** Note that tokendata=1 tables cannot currently handle "ORDER BY rowid DESC". + */ if( pInfo->nOrderBy==1 ){ int iSort = pInfo->aOrderBy[0].iColumn; if( iSort==(pConfig->nCol+1) && bSeenMatch ){ idxFlags |= FTS5_BI_ORDER_RANK; - }else if( iSort==-1 ){ + }else if( iSort==-1 && (!pInfo->aOrderBy[0].desc || !pConfig->bTokendata) ){ idxFlags |= FTS5_BI_ORDER_ROWID; } if( BitFlagTest(idxFlags, FTS5_BI_ORDER_RANK|FTS5_BI_ORDER_ROWID) ){ @@ -913,6 +916,16 @@ static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){ ); assert( !CsrFlagTest(pCsr, FTS5CSR_EOF) ); + /* If this cursor uses FTS5_PLAN_MATCH and this is a tokendata=1 table, + ** clear any token mappings accumulated at the fts5_index.c level. In + ** other cases, specifically FTS5_PLAN_SOURCE and FTS5_PLAN_SORTED_MATCH, + ** we need to retain the mappings for the entire query. */ + if( pCsr->ePlan==FTS5_PLAN_MATCH + && ((Fts5Table*)pCursor->pVtab)->pConfig->bTokendata + ){ + sqlite3Fts5ExprClearTokens(pCsr->pExpr); + } + if( pCsr->ePlan<3 ){ int bSkip = 0; if( (rc = fts5CursorReseek(pCsr, &bSkip)) || bSkip ) return rc; @@ -2063,12 +2076,6 @@ static int fts5ApiInst( ){ if( iIdx<0 || iIdx>=pCsr->nInstCount ){ rc = SQLITE_RANGE; -#if 0 - }else if( fts5IsOffsetless((Fts5Table*)pCsr->base.pVtab) ){ - *piPhrase = pCsr->aInst[iIdx*3]; - *piCol = pCsr->aInst[iIdx*3 + 2]; - *piOff = -1; -#endif }else{ *piPhrase = pCsr->aInst[iIdx*3]; *piCol = pCsr->aInst[iIdx*3 + 1]; @@ -2323,13 +2330,56 @@ static int fts5ApiPhraseFirstColumn( return rc; } +/* +** xQueryToken() API implemenetation. +*/ +static int fts5ApiQueryToken( + Fts5Context* pCtx, + int iPhrase, + int iToken, + const char **ppOut, + int *pnOut +){ + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + return sqlite3Fts5ExprQueryToken(pCsr->pExpr, iPhrase, iToken, ppOut, pnOut); +} + +/* +** xInstToken() API implemenetation. +*/ +static int fts5ApiInstToken( + Fts5Context *pCtx, + int iIdx, + int iToken, + const char **ppOut, int *pnOut +){ + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + int rc = SQLITE_OK; + if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)==0 + || SQLITE_OK==(rc = fts5CacheInstArray(pCsr)) + ){ + if( iIdx<0 || iIdx>=pCsr->nInstCount ){ + rc = SQLITE_RANGE; + }else{ + int iPhrase = pCsr->aInst[iIdx*3]; + int iCol = pCsr->aInst[iIdx*3 + 1]; + int iOff = pCsr->aInst[iIdx*3 + 2]; + i64 iRowid = fts5CursorRowid(pCsr); + rc = sqlite3Fts5ExprInstToken( + pCsr->pExpr, iRowid, iPhrase, iCol, iOff, iToken, ppOut, pnOut + ); + } + } + return rc; +} + static int fts5ApiQueryPhrase(Fts5Context*, int, void*, int(*)(const Fts5ExtensionApi*, Fts5Context*, void*) ); static const Fts5ExtensionApi sFts5Api = { - 2, /* iVersion */ + 3, /* iVersion */ fts5ApiUserData, fts5ApiColumnCount, fts5ApiRowCount, @@ -2349,6 +2399,8 @@ static const Fts5ExtensionApi sFts5Api = { fts5ApiPhraseNext, fts5ApiPhraseFirstColumn, fts5ApiPhraseNextColumn, + fts5ApiQueryToken, + fts5ApiInstToken }; /* diff --git a/ext/fts5/fts5_tcl.c b/ext/fts5/fts5_tcl.c index 80c600dbb..853a41865 100644 --- a/ext/fts5/fts5_tcl.c +++ b/ext/fts5/fts5_tcl.c @@ -244,6 +244,9 @@ static int SQLITE_TCLAPI xF5tApi( { "xGetAuxdataInt", 1, "CLEAR" }, /* 15 */ { "xPhraseForeach", 4, "IPHRASE COLVAR OFFVAR SCRIPT" }, /* 16 */ { "xPhraseColumnForeach", 3, "IPHRASE COLVAR SCRIPT" }, /* 17 */ + + { "xQueryToken", 2, "IPHRASE ITERM" }, /* 18 */ + { "xInstToken", 2, "IDX ITERM" }, /* 19 */ { 0, 0, 0} }; @@ -500,6 +503,38 @@ static int SQLITE_TCLAPI xF5tApi( break; } + CASE(18, "xQueryToken") { + const char *pTerm = 0; + int nTerm = 0; + int iPhrase = 0; + int iTerm = 0; + + if( Tcl_GetIntFromObj(interp, objv[2], &iPhrase) ) return TCL_ERROR; + if( Tcl_GetIntFromObj(interp, objv[3], &iTerm) ) return TCL_ERROR; + rc = p->pApi->xQueryToken(p->pFts, iPhrase, iTerm, &pTerm, &nTerm); + if( rc==SQLITE_OK ){ + Tcl_SetObjResult(interp, Tcl_NewStringObj(pTerm, nTerm)); + } + + break; + } + + CASE(19, "xInstToken") { + const char *pTerm = 0; + int nTerm = 0; + int iIdx = 0; + int iTerm = 0; + + if( Tcl_GetIntFromObj(interp, objv[2], &iIdx) ) return TCL_ERROR; + if( Tcl_GetIntFromObj(interp, objv[3], &iTerm) ) return TCL_ERROR; + rc = p->pApi->xInstToken(p->pFts, iIdx, iTerm, &pTerm, &nTerm); + if( rc==SQLITE_OK ){ + Tcl_SetObjResult(interp, Tcl_NewStringObj(pTerm, nTerm)); + } + + break; + } + default: assert( 0 ); break; @@ -1117,6 +1152,176 @@ static int SQLITE_TCLAPI f5tRegisterTok( return TCL_OK; } +typedef struct OriginTextCtx OriginTextCtx; +struct OriginTextCtx { + sqlite3 *db; + fts5_api *pApi; +}; + +typedef struct OriginTextTokenizer OriginTextTokenizer; +struct OriginTextTokenizer { + Fts5Tokenizer *pTok; /* Underlying tokenizer object */ + fts5_tokenizer tokapi; /* API implementation for pTok */ +}; + +/* +** Delete the OriginTextCtx object indicated by the only argument. +*/ +static void f5tOrigintextTokenizerDelete(void *pCtx){ + OriginTextCtx *p = (OriginTextCtx*)pCtx; + ckfree(p); +} + +static int f5tOrigintextCreate( + void *pCtx, + const char **azArg, + int nArg, + Fts5Tokenizer **ppOut +){ + OriginTextCtx *p = (OriginTextCtx*)pCtx; + OriginTextTokenizer *pTok = 0; + void *pTokCtx = 0; + int rc = SQLITE_OK; + + pTok = (OriginTextTokenizer*)sqlite3_malloc(sizeof(OriginTextTokenizer)); + if( pTok==0 ){ + rc = SQLITE_NOMEM; + }else if( nArg<1 ){ + rc = SQLITE_ERROR; + }else{ + /* Locate the underlying tokenizer */ + rc = p->pApi->xFindTokenizer(p->pApi, azArg[0], &pTokCtx, &pTok->tokapi); + } + + /* Create the new tokenizer instance */ + if( rc==SQLITE_OK ){ + rc = pTok->tokapi.xCreate(pTokCtx, &azArg[1], nArg-1, &pTok->pTok); + } + + if( rc!=SQLITE_OK ){ + sqlite3_free(pTok); + pTok = 0; + } + *ppOut = (Fts5Tokenizer*)pTok; + return rc; +} + +static void f5tOrigintextDelete(Fts5Tokenizer *pTokenizer){ + OriginTextTokenizer *p = (OriginTextTokenizer*)pTokenizer; + if( p->pTok ){ + p->tokapi.xDelete(p->pTok); + } + sqlite3_free(p); +} + +typedef struct OriginTextCb OriginTextCb; +struct OriginTextCb { + void *pCtx; + const char *pText; + int nText; + int (*xToken)(void *, int, const char *, int, int, int); + + char *aBuf; /* Buffer to use */ + int nBuf; /* Allocated size of aBuf[] */ +}; + +static int xOriginToken( + void *pCtx, /* Copy of 2nd argument to xTokenize() */ + int tflags, /* Mask of FTS5_TOKEN_* flags */ + const char *pToken, /* Pointer to buffer containing token */ + int nToken, /* Size of token in bytes */ + int iStart, /* Byte offset of token within input text */ + int iEnd /* Byte offset of end of token within input */ +){ + OriginTextCb *p = (OriginTextCb*)pCtx; + int ret = 0; + + if( nToken==(iEnd-iStart) && 0==memcmp(pToken, &p->pText[iStart], nToken) ){ + /* Token exactly matches document text. Pass it through as is. */ + ret = p->xToken(p->pCtx, tflags, pToken, nToken, iStart, iEnd); + }else{ + int nReq = nToken + 1 + (iEnd-iStart); + if( nReq>p->nBuf ){ + sqlite3_free(p->aBuf); + p->aBuf = sqlite3_malloc(nReq*2); + if( p->aBuf==0 ) return SQLITE_NOMEM; + p->nBuf = nReq*2; + } + + memcpy(p->aBuf, pToken, nToken); + p->aBuf[nToken] = '\0'; + memcpy(&p->aBuf[nToken+1], &p->pText[iStart], iEnd-iStart); + ret = p->xToken(p->pCtx, tflags, p->aBuf, nReq, iStart, iEnd); + } + + return ret; +} + + +static int f5tOrigintextTokenize( + Fts5Tokenizer *pTokenizer, + void *pCtx, + int flags, /* Mask of FTS5_TOKENIZE_* flags */ + const char *pText, int nText, + int (*xToken)(void *, int, const char *, int, int, int) +){ + OriginTextTokenizer *p = (OriginTextTokenizer*)pTokenizer; + OriginTextCb cb; + int ret; + + memset(&cb, 0, sizeof(cb)); + cb.pCtx = pCtx; + cb.pText = pText; + cb.nText = nText; + cb.xToken = xToken; + + ret = p->tokapi.xTokenize(p->pTok,(void*)&cb,flags,pText,nText,xOriginToken); + sqlite3_free(cb.aBuf); + return ret; +} + +/* +** sqlite3_fts5_register_origintext DB +** +** Description... +*/ +static int SQLITE_TCLAPI f5tRegisterOriginText( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + sqlite3 *db = 0; + fts5_api *pApi = 0; + int rc; + fts5_tokenizer tok = {0, 0, 0}; + OriginTextCtx *pCtx = 0; + + if( objc!=2 ){ + Tcl_WrongNumArgs(interp, 1, objv, "DB"); + return TCL_ERROR; + } + if( f5tDbAndApi(interp, objv[1], &db, &pApi) ) return TCL_ERROR; + + pCtx = (OriginTextCtx*)ckalloc(sizeof(OriginTextCtx)); + pCtx->db = db; + pCtx->pApi = pApi; + + tok.xCreate = f5tOrigintextCreate; + tok.xDelete = f5tOrigintextDelete; + tok.xTokenize = f5tOrigintextTokenize; + rc = pApi->xCreateTokenizer( + pApi, "origintext", (void*)pCtx, &tok, f5tOrigintextTokenizerDelete + ); + + Tcl_ResetResult(interp); + if( rc!=SQLITE_OK ){ + Tcl_AppendResult(interp, "error: ", sqlite3_errmsg(db), 0); + return TCL_ERROR; + } + return TCL_OK; +} + /* ** Entry point. */ @@ -1133,7 +1338,8 @@ int Fts5tcl_Init(Tcl_Interp *interp){ { "sqlite3_fts5_may_be_corrupt", f5tMayBeCorrupt, 0 }, { "sqlite3_fts5_token_hash", f5tTokenHash, 0 }, { "sqlite3_fts5_register_matchinfo", f5tRegisterMatchinfo, 0 }, - { "sqlite3_fts5_register_fts5tokenize", f5tRegisterTok, 0 } + { "sqlite3_fts5_register_fts5tokenize", f5tRegisterTok, 0 }, + { "sqlite3_fts5_register_origintext",f5tRegisterOriginText, 0 } }; int i; F5tTokenizerContext *pContext; diff --git a/ext/fts5/test/fts5_common.tcl b/ext/fts5/test/fts5_common.tcl index 9c012932d..001cad1de 100644 --- a/ext/fts5/test/fts5_common.tcl +++ b/ext/fts5/test/fts5_common.tcl @@ -438,6 +438,20 @@ proc detail_is_none {} { detail_check ; expr {$::detail == "none"} } proc detail_is_col {} { detail_check ; expr {$::detail == "col" } } proc detail_is_full {} { detail_check ; expr {$::detail == "full"} } +proc foreach_tokenizer_mode {prefix script} { + set saved $::testprefix + foreach {d mapping} { + "" {} + "-origintext" {, tokenize="origintext unicode61", tokendata=1} + } { + set s [string map [list %TOKENIZER% $mapping] $script] + set ::testprefix "$prefix$d" + reset_db + sqlite3_fts5_register_origintext db + uplevel $s + } + set ::testprefix $saved +} #------------------------------------------------------------------------- # Convert a poslist of the type returned by fts5_test_poslist() to a diff --git a/ext/fts5/test/fts5aa.test b/ext/fts5/test/fts5aa.test index e1551fc51..a80a307a4 100644 --- a/ext/fts5/test/fts5aa.test +++ b/ext/fts5/test/fts5aa.test @@ -22,6 +22,7 @@ ifcapable !fts5 { } foreach_detail_mode $::testprefix { +foreach_tokenizer_mode $::testprefix { do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(a, b, c); @@ -44,7 +45,7 @@ do_execsql_test 1.1 { # do_execsql_test 2.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x, y, detail=%DETAIL%); + CREATE VIRTUAL TABLE t1 USING fts5(x, y, detail=%DETAIL% %TOKENIZER%); } do_execsql_test 2.1 { INSERT INTO t1 VALUES('a b c', 'd e f'); @@ -73,8 +74,9 @@ do_execsql_test 2.4 { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 3.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%); + CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%); } foreach {i x y} { 1 {g f d b f} {h h e i a} @@ -97,8 +99,9 @@ foreach {i x y} { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 4.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%); + CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } foreach {i x y} { @@ -121,8 +124,9 @@ foreach {i x y} { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 5.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%); + CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } foreach {i x y} { @@ -145,8 +149,9 @@ foreach {i x y} { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 6.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%); + CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } @@ -181,6 +186,7 @@ do_execsql_test 6.6 { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db expr srand(0) do_execsql_test 7.0 { CREATE VIRTUAL TABLE t1 USING fts5(x,y,z); @@ -222,6 +228,7 @@ for {set i 1} {$i <= 10} {incr i} { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 8.0 { CREATE VIRTUAL TABLE t1 USING fts5(x, prefix="1,2,3"); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); @@ -236,6 +243,7 @@ do_execsql_test 8.1 { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db expr srand(0) @@ -280,8 +288,9 @@ for {set i 1} {$i <= 10} {incr i} { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 10.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%); + CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%); } set d10 { 1 {g f d b f} {h h e i a} @@ -314,19 +323,19 @@ do_execsql_test 10.4.2 { INSERT INTO t1(t1) VALUES('integrity-check') } #------------------------------------------------------------------------- # do_catchsql_test 11.1 { - CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rank, detail=%DETAIL%); + CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rank, detail=%DETAIL% %TOKENIZER%); } {1 {reserved fts5 column name: rank}} do_catchsql_test 11.2 { - CREATE VIRTUAL TABLE rank USING fts5(a, b, c, detail=%DETAIL%); + CREATE VIRTUAL TABLE rank USING fts5(a, b, c, detail=%DETAIL% %TOKENIZER%); } {1 {reserved fts5 table name: rank}} do_catchsql_test 11.3 { - CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rowid, detail=%DETAIL%); + CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rowid, detail=%DETAIL% %TOKENIZER%); } {1 {reserved fts5 column name: rowid}} #------------------------------------------------------------------------- # do_execsql_test 12.1 { - CREATE VIRTUAL TABLE t2 USING fts5(x,y, detail=%DETAIL%); + CREATE VIRTUAL TABLE t2 USING fts5(x,y, detail=%DETAIL% %TOKENIZER%); } {} do_catchsql_test 12.2 { @@ -341,8 +350,9 @@ do_test 12.3 { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 13.1 { - CREATE VIRTUAL TABLE t1 USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE t1 USING fts5(x, detail=%DETAIL% %TOKENIZER%); INSERT INTO t1(rowid, x) VALUES(1, 'o n e'), (2, 't w o'); } {} @@ -365,8 +375,9 @@ do_execsql_test 13.6 { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 14.1 { - CREATE VIRTUAL TABLE t1 USING fts5(x, y, detail=%DETAIL%); + CREATE VIRTUAL TABLE t1 USING fts5(x, y, detail=%DETAIL% %TOKENIZER%); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); WITH d(x,y) AS ( SELECT NULL, 'xyz xyz xyz xyz xyz xyz' @@ -449,8 +460,9 @@ do_catchsql_test 16.2 { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 17.1 { - CREATE VIRTUAL TABLE b2 USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE b2 USING fts5(x, detail=%DETAIL% %TOKENIZER%); INSERT INTO b2 VALUES('a'); INSERT INTO b2 VALUES('b'); INSERT INTO b2 VALUES('c'); @@ -466,8 +478,9 @@ do_test 17.2 { if {[string match n* %DETAIL%]==0} { reset_db + sqlite3_fts5_register_origintext db do_execsql_test 17.3 { - CREATE VIRTUAL TABLE c2 USING fts5(x, y, detail=%DETAIL%); + CREATE VIRTUAL TABLE c2 USING fts5(x, y, detail=%DETAIL% %TOKENIZER%); INSERT INTO c2 VALUES('x x x', 'x x x'); SELECT rowid FROM c2 WHERE c2 MATCH 'y:x'; } {1} @@ -476,8 +489,9 @@ if {[string match n* %DETAIL%]==0} { #------------------------------------------------------------------------- # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 17.1 { - CREATE VIRTUAL TABLE uio USING fts5(ttt, detail=%DETAIL%); + CREATE VIRTUAL TABLE uio USING fts5(ttt, detail=%DETAIL% %TOKENIZER%); INSERT INTO uio VALUES(NULL); INSERT INTO uio SELECT NULL FROM uio; INSERT INTO uio SELECT NULL FROM uio; @@ -524,8 +538,8 @@ do_execsql_test 17.9 { #-------------------------------------------------------------------- # do_execsql_test 18.1 { - CREATE VIRTUAL TABLE t1 USING fts5(a, b, detail=%DETAIL%); - CREATE VIRTUAL TABLE t2 USING fts5(c, d, detail=%DETAIL%); + CREATE VIRTUAL TABLE t1 USING fts5(a, b, detail=%DETAIL% %TOKENIZER%); + CREATE VIRTUAL TABLE t2 USING fts5(c, d, detail=%DETAIL% %TOKENIZER%); INSERT INTO t1 VALUES('abc*', NULL); INSERT INTO t2 VALUES(1, 'abcdefg'); } @@ -540,8 +554,9 @@ do_execsql_test 18.3 { # fts5 table in the temp schema. # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 19.0 { - CREATE VIRTUAL TABLE temp.t1 USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE temp.t1 USING fts5(x, detail=%DETAIL% %TOKENIZER%); INSERT INTO t1 VALUES('x y z'); INSERT INTO t1 VALUES('w x 1'); SELECT rowid FROM t1 WHERE t1 MATCH 'x'; @@ -551,8 +566,9 @@ do_execsql_test 19.0 { # Test that 6 and 7 byte varints can be read. # reset_db +sqlite3_fts5_register_origintext db do_execsql_test 20.0 { - CREATE VIRTUAL TABLE temp.tmp USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE temp.tmp USING fts5(x, detail=%DETAIL% %TOKENIZER%); } set ::ids [list \ 0 [expr 1<<36] [expr 2<<36] [expr 1<<43] [expr 2<<43] @@ -570,7 +586,7 @@ do_test 20.1 { # do_execsql_test 21.0 { CREATE TEMP TABLE t8(a, b); - CREATE VIRTUAL TABLE ft USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE ft USING fts5(x, detail=%DETAIL% %TOKENIZER%); } do_execsql_test 21.1 { @@ -581,7 +597,7 @@ do_execsql_test 21.1 { } do_execsql_test 22.0 { - CREATE VIRTUAL TABLE t9 USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE t9 USING fts5(x, detail=%DETAIL% %TOKENIZER%); INSERT INTO t9(rowid, x) VALUES(2, 'bbb'); BEGIN; INSERT INTO t9(rowid, x) VALUES(1, 'aaa'); @@ -596,7 +612,7 @@ do_execsql_test 22.1 { #------------------------------------------------------------------------- do_execsql_test 23.0 { - CREATE VIRTUAL TABLE t10 USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE t10 USING fts5(x, detail=%DETAIL% %TOKENIZER%); CREATE TABLE t11(x); } do_execsql_test 23.1 { @@ -608,7 +624,7 @@ do_execsql_test 23.2 { #------------------------------------------------------------------------- do_execsql_test 24.0 { - CREATE VIRTUAL TABLE t12 USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE t12 USING fts5(x, detail=%DETAIL% %TOKENIZER%); INSERT INTO t12 VALUES('aaaa'); } do_execsql_test 24.1 { @@ -618,6 +634,9 @@ do_execsql_test 24.1 { INSERT INTO t12 VALUES('aaaa'); END; } +execsql_pp { + SELECT rowid, hex(block) FROM t12_data +} do_execsql_test 24.2 { INSERT INTO t12(t12) VALUES('integrity-check'); } @@ -627,7 +646,7 @@ do_execsql_test 24.3 { #------------------------------------------------------------------------- do_execsql_test 25.0 { - CREATE VIRTUAL TABLE t13 USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE t13 USING fts5(x, detail=%DETAIL% %TOKENIZER%); } do_execsql_test 25.1 { BEGIN; @@ -639,6 +658,7 @@ SELECT * FROM t13('BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB } +} expand_all_sql db finish_test diff --git a/ext/fts5/test/fts5faultH.test b/ext/fts5/test/fts5faultH.test new file mode 100644 index 000000000..9dd4cac0d --- /dev/null +++ b/ext/fts5/test/fts5faultH.test @@ -0,0 +1,93 @@ +# 2010 June 15 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# + +source [file join [file dirname [info script]] fts5_common.tcl] +source $testdir/malloc_common.tcl +set testprefix fts5faultG + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +set ::testprefix fts5faultH + +sqlite3_fts5_register_origintext db + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts5( + x, tokenize="origintext unicode61", tokendata=1 + ); + + BEGIN; + INSERT INTO t1 VALUES('oNe tWo thRee'); + INSERT INTO t1 VALUES('One Two Three'); + INSERT INTO t1 VALUES('onE twO threE'); + COMMIT; + BEGIN; + INSERT INTO t1 VALUES('one two three'); + INSERT INTO t1 VALUES('one two three'); + INSERT INTO t1 VALUES('one two three'); + COMMIT; +} + +do_faultsim_test 1 -faults oom* -prep { +} -body { + execsql { + SELECT rowid FROM t1('three'); + } +} -test { + faultsim_integrity_check + faultsim_test_result {0 {1 2 3 4 5 6}} +} + + +reset_db +sqlite3_fts5_register_origintext db +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE t1 USING fts5( + x, tokenize="origintext unicode61", tokendata=1 + ); + INSERT INTO t1(t1, rank) VALUES('pgsz', 64); + + BEGIN; + INSERT INTO t1(rowid, x) VALUES(10, 'aaa bbb BBB'); + INSERT INTO t1(rowid, x) VALUES(12, 'bbb bbb bbb'); + INSERT INTO t1(rowid, x) VALUES(13, 'bbb bbb bbb'); + INSERT INTO t1(rowid, x) VALUES(14, 'bbb BBB bbb'); + INSERT INTO t1(rowid, x) VALUES(15, 'bbb bbb bbb'); + INSERT INTO t1(rowid, x) VALUES(16, 'bbb bbb bbb'); + INSERT INTO t1(rowid, x) VALUES(17, 'bbb bbb bbb'); + INSERT INTO t1(rowid, x) VALUES(18, 'bbb bbb bbb'); + INSERT INTO t1(rowid, x) VALUES(19, 'bbb bbb bbb'); + INSERT INTO t1(rowid, x) VALUES(20, 'bbb bbb bbb'); + INSERT INTO t1(rowid, x) VALUES(21, 'bbb bbb bbb'); + INSERT INTO t1(rowid, x) VALUES(22, 'bbb bbb bbb'); + INSERT INTO t1(rowid, x) VALUES(23, 'bbb bbb bbb'); + INSERT INTO t1(rowid, x) VALUES(24, 'aaa bbb BBB'); + COMMIT; +} + +do_faultsim_test 2 -faults oom* -prep { +} -body { + execsql { + SELECT rowid FROM t1('BBB AND AAA'); + } +} -test { + faultsim_integrity_check + faultsim_test_result {0 {10 24}} +} + + + +finish_test diff --git a/ext/fts5/test/fts5origintext.test b/ext/fts5/test/fts5origintext.test new file mode 100644 index 000000000..9752f35d3 --- /dev/null +++ b/ext/fts5/test/fts5origintext.test @@ -0,0 +1,297 @@ +# 2014 Jan 08 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focused on phrase queries. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5origintext + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +foreach_detail_mode $testprefix { + +sqlite3_fts5_register_origintext db +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE ft USING fts5( + x, tokenize="origintext unicode61", detail=%DETAIL% + ); + CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance); +} + +do_execsql_test 1.1 { + INSERT INTO ft VALUES('Hello world'); +} + +do_execsql_test 1.2 { + INSERT INTO ft(ft) VALUES('integrity-check'); +} + +proc b {x} { string map [list "\0" "."] $x } +db func b b + +do_execsql_test 1.3 { + select b(term) from vocab; +} { + hello.Hello + world +} + +do_execsql_test 1.4 { + SELECT rowid FROM ft('Hello'); +} {1} + +#------------------------------------------------------------------------- +reset_db + +# Return a random integer between 0 and n-1. +# +proc random {n} { + expr {abs(int(rand()*$n))} +} + +proc select_one {list} { + set n [llength $list] + lindex $list [random $n] +} + +proc term {} { + set first_letter { + a b c d e f g h i j k l m n o p q r s t u v w x y z + A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + } + + set term [select_one $first_letter] + append term [random 100] +} + +proc document {} { + set nTerm [expr [random 5] + 5] + set doc "" + for {set ii 0} {$ii < $nTerm} {incr ii} { + lappend doc [term] + } + set doc +} +db func document document + +sqlite3_fts5_register_origintext db +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE ft USING fts5( + x, tokenize="origintext unicode61", detail=%DETAIL% + ); + INSERT INTO ft(ft, rank) VALUES('pgsz', 128); + CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance); +} + +do_test 2.1 { + for {set ii 0} {$ii < 500} {incr ii} { + execsql { INSERT INTO ft VALUES( document() ) } + } +} {} + +do_execsql_test 2.2 { + INSERT INTO ft(ft) VALUES('integrity-check'); +} + +do_execsql_test 2.3 { + INSERT INTO ft(ft, rank) VALUES('merge', 16); +} + +do_execsql_test 2.4 { + INSERT INTO ft(ft) VALUES('integrity-check'); +} + +do_execsql_test 2.5 { + INSERT INTO ft(ft) VALUES('optimize'); +} + +#------------------------------------------------------------------------- +reset_db + +sqlite3_fts5_register_origintext db +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE ft USING fts5( + x, tokenize="origintext unicode61", detail=%DETAIL% + ); + CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance); + + INSERT INTO ft(rowid, x) VALUES(1, 'hello'); + INSERT INTO ft(rowid, x) VALUES(2, 'Hello'); + INSERT INTO ft(rowid, x) VALUES(3, 'HELLO'); +} + +#proc b {x} { string map [list "\0" "."] $x } +#db func b b +#execsql_pp { SELECT b(term) FROM vocab } + +do_execsql_test 3.1.1 { SELECT rowid FROM ft('hello') } 1 +do_execsql_test 3.1.2 { SELECT rowid FROM ft('Hello') } 2 +do_execsql_test 3.1.3 { SELECT rowid FROM ft('HELLO') } 3 + +do_execsql_test 3.2 { + CREATE VIRTUAL TABLE ft2 USING fts5(x, + tokenize="origintext unicode61", + tokendata=1, + detail=%DETAIL% + ); + CREATE VIRTUAL TABLE vocab2 USING fts5vocab(ft2, instance); + + INSERT INTO ft2(rowid, x) VALUES(1, 'hello'); + INSERT INTO ft2(rowid, x) VALUES(2, 'Hello'); + INSERT INTO ft2(rowid, x) VALUES(3, 'HELLO'); + + INSERT INTO ft2(rowid, x) VALUES(10, 'helloooo'); +} + +#proc b {x} { string map [list "\0" "."] $x } +#db func b b +#execsql_pp { SELECT b(term) FROM vocab } + +do_execsql_test 3.3.1 { SELECT rowid FROM ft2('hello') } {1 2 3} +do_execsql_test 3.3.2 { SELECT rowid FROM ft2('Hello') } {1 2 3} +do_execsql_test 3.3.3 { SELECT rowid FROM ft2('HELLO') } {1 2 3} + +do_execsql_test 3.3.4 { SELECT rowid FROM ft2('hello*') } {1 2 3 10} + +#------------------------------------------------------------------------- +# +reset_db +sqlite3_fts5_register_origintext db +proc querytoken {cmd iPhrase iToken} { + set txt [$cmd xQueryToken $iPhrase $iToken] + string map [list "\0" "."] $txt +} +sqlite3_fts5_create_function db querytoken querytoken + +do_execsql_test 4.0 { + CREATE VIRTUAL TABLE ft USING fts5( + x, tokenize='origintext unicode61', tokendata=1, detail=%DETAIL% + ); + INSERT INTO ft VALUES('one two three four'); +} + +do_execsql_test 4.1 { + SELECT rowid, querytoken(ft, 0, 0) FROM ft('TwO') +} {1 two.TwO} +do_execsql_test 4.2 { + SELECT rowid, querytoken(ft, 0, 0) FROM ft('one TWO ThreE') +} {1 one} +do_execsql_test 4.3 { + SELECT rowid, querytoken(ft, 1, 0) FROM ft('one TWO ThreE') +} {1 two.TWO} + +if {"%DETAIL%"=="full"} { + # Phrase queries are only supported for detail=full. + # + do_execsql_test 4.4 { + SELECT rowid, querytoken(ft, 0, 2) FROM ft('"one TWO ThreE"') + } {1 three.ThreE} + do_catchsql_test 4.5 { + SELECT rowid, querytoken(ft, 0, 3) FROM ft('"one TWO ThreE"') + } {1 SQLITE_RANGE} + do_catchsql_test 4.6 { + SELECT rowid, querytoken(ft, 1, 0) FROM ft('"one TWO ThreE"') + } {1 SQLITE_RANGE} + do_catchsql_test 4.7 { + SELECT rowid, querytoken(ft, -1, 0) FROM ft('"one TWO ThreE"') + } {1 SQLITE_RANGE} +} + +#------------------------------------------------------------------------- +# +reset_db +sqlite3_fts5_register_origintext db +proc insttoken {cmd iIdx iToken} { + set txt [$cmd xInstToken $iIdx $iToken] + string map [list "\0" "."] $txt +} +sqlite3_fts5_create_function db insttoken insttoken +fts5_aux_test_functions db + +do_execsql_test 5.0 { + CREATE VIRTUAL TABLE ft USING fts5( + x, tokenize='origintext unicode61', tokendata=1, detail=%DETAIL% + ); + INSERT INTO ft VALUES('one ONE One oNe oNE one'); +} + +do_execsql_test 5.1 { + SELECT insttoken(ft, 0, 0), + insttoken(ft, 1, 0), + insttoken(ft, 2, 0), + insttoken(ft, 3, 0), + insttoken(ft, 4, 0), + insttoken(ft, 5, 0) + FROM ft('one'); +} { + one one.ONE one.One one.oNe one.oNE one +} + +do_execsql_test 5.2 { + SELECT insttoken(ft, 1, 0) FROM ft('one'); +} { + one.ONE +} + +do_execsql_test 5.3 { + SELECT fts5_test_poslist(ft) FROM ft('one'); +} { + {0.0.0 0.0.1 0.0.2 0.0.3 0.0.4 0.0.5} +} + +#------------------------------------------------------------------------- +# Test the xInstToken() API with: +# +# * a non tokendata=1 table. +# * prefix queries. +# +reset_db +sqlite3_fts5_register_origintext db +do_execsql_test 6.0 { + CREATE VIRTUAL TABLE ft USING fts5( + x, y, tokenize='origintext unicode61', detail=%DETAIL% + ); + + INSERT INTO ft VALUES('One Two', 'Three two'); + INSERT INTO ft VALUES('three Three', 'one One'); +} +proc tokens {cmd} { + set ret [list] + for {set iTok 0} {$iTok < [$cmd xInstCount]} {incr iTok} { + set txt [$cmd xInstToken $iTok 0] + set txt [string map [list "\0" "."] $txt] + lappend ret $txt + } + set ret +} +sqlite3_fts5_create_function db tokens tokens + +do_execsql_test 6.1 { + SELECT rowid, tokens(ft) FROM ft('One'); +} {1 one.One 2 one.One} + +do_execsql_test 6.2 { + SELECT rowid, tokens(ft) FROM ft('on*'); +} {1 {{}} 2 {{} {}}} + +do_execsql_test 6.3 { + SELECT rowid, tokens(ft) FROM ft('Three*'); +} {1 {{}} 2 {{}}} + +} + +finish_test + diff --git a/ext/fts5/test/fts5origintext2.test b/ext/fts5/test/fts5origintext2.test new file mode 100644 index 000000000..a8c5d4eb5 --- /dev/null +++ b/ext/fts5/test/fts5origintext2.test @@ -0,0 +1,146 @@ +# 2014 Jan 08 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focused on phrase queries. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5origintext2 + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +sqlite3_fts5_register_origintext db +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE ft USING fts5( + x, tokenize="origintext unicode61", tokendata=1 + ); +} + +do_execsql_test 1.1 { + BEGIN; + INSERT INTO ft VALUES('Hello'); + INSERT INTO ft VALUES('hello'); + INSERT INTO ft VALUES('HELLO'); + INSERT INTO ft VALUES('today'); + INSERT INTO ft VALUES('today'); + INSERT INTO ft VALUES('today'); + INSERT INTO ft VALUES('World'); + INSERT INTO ft VALUES('world'); + INSERT INTO ft VALUES('WORLD'); + COMMIT; +} + +do_execsql_test 1.2 { SELECT rowid FROM ft('hello'); } {1 2 3} +do_execsql_test 1.3 { SELECT rowid FROM ft('today'); } {4 5 6} +do_execsql_test 1.4 { SELECT rowid FROM ft('world'); } {7 8 9} + +do_execsql_test 1.5 { + SELECT count(*) FROM ft_data +} 3 + +do_execsql_test 1.6 { + DELETE FROM ft; + INSERT INTO ft(ft, rank) VALUES('pgsz', 64); + BEGIN; + WITH s(i) AS ( + SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<100 + ) + INSERT INTO ft SELECT 'Hello Hello Hello Hello Hello Hello Hello' FROM s; + INSERT INTO ft VALUES ('hELLO hELLO hELLO'); + INSERT INTO ft VALUES('today today today today today today today'); + INSERT INTO ft VALUES('today today today today today today today'); + INSERT INTO ft VALUES('today today today today today today today'); + INSERT INTO ft VALUES('today today today today today today today'); + INSERT INTO ft VALUES('today today today today today today today'); + INSERT INTO ft VALUES('today today today today today today today'); + INSERT INTO ft VALUES('World World World World World World World'); + INSERT INTO ft VALUES('world world world world world world world'); + INSERT INTO ft VALUES('WORLD WORLD WORLD WORLD WORLD WORLD WORLD'); + INSERT INTO ft VALUES('World World World World World World World'); + INSERT INTO ft VALUES('world world world world world world world'); + INSERT INTO ft VALUES('WORLD WORLD WORLD WORLD WORLD WORLD WORLD'); + COMMIT; +} + +do_execsql_test 1.7 { + SELECT count(*) FROM ft_data; +} 23 + +do_execsql_test 1.8 { SELECT rowid FROM ft('hello') WHERE rowid>100; } {101} + +do_execsql_test 1.9 { + DELETE FROM ft; + INSERT INTO ft(ft) VALUES('optimize'); + SELECT count(*) FROM ft_data; +} {2} +do_execsql_test 1.10 { + BEGIN; + INSERT INTO ft VALUES('Hello'); + INSERT INTO ft VALUES('hello'); + INSERT INTO ft VALUES('HELLO'); + INSERT INTO ft VALUES('today'); + INSERT INTO ft VALUES('today'); + INSERT INTO ft VALUES('today'); + INSERT INTO ft VALUES('World'); + INSERT INTO ft VALUES('world'); + INSERT INTO ft VALUES('WORLD'); +} + +do_execsql_test 1.11 { SELECT rowid FROM ft('hello'); } {1 2 3} +do_execsql_test 1.12 { SELECT rowid FROM ft('today'); } {4 5 6} +do_execsql_test 1.13 { SELECT rowid FROM ft('world'); } {7 8 9} +do_execsql_test 1.14 { SELECT rowid FROM ft('hello') ORDER BY rank; } {1 2 3} + +#------------------------------------------------------------------------ +reset_db +sqlite3_fts5_register_origintext db +proc tokens {cmd} { + set ret [list] + for {set iTok 0} {$iTok < [$cmd xInstCount]} {incr iTok} { + set txt [$cmd xInstToken $iTok 0] + set txt [string map [list "\0" "."] $txt] + lappend ret $txt + } + set ret +} +sqlite3_fts5_create_function db tokens tokens + +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE x1 USING fts5( + v, tokenize="origintext unicode61", tokendata=1, detail=none + ); + + INSERT INTO x1 VALUES('xxx Xxx XXX yyy YYY yyy'); + INSERT INTO x1 VALUES('xxx yyy xxx yyy yyy yyy'); +} + +do_execsql_test 2.1 { + SELECT tokens(x1) FROM x1('xxx'); +} { + {xxx xxx.Xxx xxx.XXX} {xxx xxx} +} + +do_execsql_test 2.2 { + UPDATE x1_content SET c0 = 'xxx xxX xxx yyy yyy yyy' WHERE id=1; +} + +do_execsql_test 2.3 { + SELECT tokens(x1) FROM x1('xxx'); +} { + {xxx {} xxx} {xxx xxx} +} + +finish_test + diff --git a/ext/fts5/test/fts5origintext3.test b/ext/fts5/test/fts5origintext3.test new file mode 100644 index 000000000..834844595 --- /dev/null +++ b/ext/fts5/test/fts5origintext3.test @@ -0,0 +1,101 @@ +# 2023 November 22 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focused on phrase queries. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5origintext3 + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +foreach_detail_mode $testprefix { + reset_db + + sqlite3_fts5_register_origintext db + fts5_aux_test_functions db + proc insttoken {cmd iIdx iToken} { + set txt [$cmd xInstToken $iIdx $iToken] + string map [list "\0" "."] $txt + } + sqlite3_fts5_create_function db insttoken insttoken + + do_execsql_test 1.0 { + CREATE VIRTUAL TABLE ft USING fts5( + x, tokenize="origintext unicode61", tokendata=1, detail=%DETAIL% + ); + } + + do_execsql_test 1.1 { + INSERT INTO ft VALUES('Hello world HELLO WORLD hello'); + } + + do_execsql_test 1.2 { + SELECT fts5_test_poslist(ft) FROM ft('hello'); + } {{0.0.0 0.0.2 0.0.4}} + + do_execsql_test 1.3 { + SELECT + insttoken(ft, 0, 0), + insttoken(ft, 1, 0), + insttoken(ft, 2, 0) + FROM ft('hello'); + } {hello.Hello hello.HELLO hello} + + do_execsql_test 1.4 { + SELECT + insttoken(ft, 0, 0), + insttoken(ft, 1, 0), + insttoken(ft, 2, 0) + FROM ft('hello') ORDER BY rank; + } {hello.Hello hello.HELLO hello} + + do_execsql_test 1.5 { + CREATE VIRTUAL TABLE ft2 USING fts5( + x, tokenize="origintext unicode61", tokendata=1, detail=%DETAIL% + ); + INSERT INTO ft2(rowid, x) VALUES(1, 'ONE one two three ONE'); + INSERT INTO ft2(rowid, x) VALUES(2, 'TWO one two three TWO'); + INSERT INTO ft2(rowid, x) VALUES(3, 'THREE one two three THREE'); + } + + do_execsql_test 1.6 { + SELECT insttoken(ft2, 0, 0), rowid FROM ft2('three') ORDER BY rank; + } {three.THREE 3 three 1 three 2} + + do_execsql_test 1.7 { + INSERT INTO ft2(rowid, x) VALUES(10, 'aaa bbb BBB'); + INSERT INTO ft2(rowid, x) VALUES(12, 'bbb bbb bbb'); + INSERT INTO ft2(rowid, x) VALUES(13, 'bbb bbb bbb'); + INSERT INTO ft2(rowid, x) VALUES(14, 'bbb BBB bbb'); + INSERT INTO ft2(rowid, x) VALUES(15, 'bbb bbb bbb'); + INSERT INTO ft2(rowid, x) VALUES(16, 'bbb bbb bbb'); + INSERT INTO ft2(rowid, x) VALUES(17, 'bbb bbb bbb'); + INSERT INTO ft2(rowid, x) VALUES(18, 'bbb bbb bbb'); + INSERT INTO ft2(rowid, x) VALUES(19, 'bbb bbb bbb'); + INSERT INTO ft2(rowid, x) VALUES(20, 'bbb bbb bbb'); + INSERT INTO ft2(rowid, x) VALUES(21, 'bbb bbb bbb'); + INSERT INTO ft2(rowid, x) VALUES(22, 'bbb bbb bbb'); + INSERT INTO ft2(rowid, x) VALUES(23, 'bbb bbb bbb'); + INSERT INTO ft2(rowid, x) VALUES(24, 'aaa bbb BBB'); + } + + do_execsql_test 1.8 { SELECT rowid FROM ft2('aaa AND bbb'); } {10 24} + do_execsql_test 1.9 { SELECT rowid FROM ft2('bbb AND aaa'); } {10 24} + +} + +finish_test + diff --git a/ext/fts5/test/fts5origintext4.test b/ext/fts5/test/fts5origintext4.test new file mode 100644 index 000000000..8973a24b0 --- /dev/null +++ b/ext/fts5/test/fts5origintext4.test @@ -0,0 +1,66 @@ +# 2023 November 22 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focused on phrase queries. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5origintext4 + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +sqlite3_fts5_register_origintext db +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE ft USING fts5( + x, tokenize="origintext unicode61", tokendata=1 + ); +} + +do_execsql_test 1.1 { + BEGIN; + INSERT INTO ft SELECT 'the first thing'; + + WITH s(i) AS ( + SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<90000 + ) + INSERT INTO ft SELECT 'The second thing' FROM s; + + INSERT INTO ft SELECT 'the first thing'; + COMMIT; + INSERT INTO ft(ft) VALUES('optimize'); +} + +foreach {tn sql expr} { + 1 { SELECT rowid FROM ft('the') } {$mem > 250000} + 2 { SELECT rowid FROM ft('first') } {$mem < 50000} + 3 { SELECT rowid FROM ft('the first') } {$mem < 50000} +} { + db close + sqlite3 db test.db + sqlite3_fts5_register_origintext db + + execsql $sql + do_test 1.2.$tn { + set mem [lindex [sqlite3_db_status db CACHE_USED 0] 1] + expr $expr + } 1 +} + +proc b {x} { string map [list "\0" "."] $x } +db func b b +# execsql_pp { SELECT segid, b(term), pgno from ft_idx } + +finish_test + diff --git a/ext/fts5/test/fts5origintext5.test b/ext/fts5/test/fts5origintext5.test new file mode 100644 index 000000000..03d5bee21 --- /dev/null +++ b/ext/fts5/test/fts5origintext5.test @@ -0,0 +1,273 @@ +# 2023 Dec 04 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests for tables that use both tokendata=1 and contentless_delete=1. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5origintext + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +# Return a random integer between 0 and n-1. +# +proc random {n} { expr {abs(int(rand()*$n))} } + +# Select an element of the list passed as the only argument at random and +# return it. +# +proc select_one {list} { + set n [llength $list] + lindex $list [random $n] +} + +# Given a term that consists entirely of alphabet characters, return all +# permutations of the term using upper and lower case characters. e.g. +# +# "abc" -> {CBA cBA CbA cbA CBa cBa Cba cba} +# +proc casify {term {lRet {{}}}} { + if {$term==""} { return $lRet } + set t [string range $term 1 end] + set f1 [string toupper [string range $term 0 0]] + set f2 [string tolower [string range $term 0 0]] + set ret [list] + foreach x $lRet { + lappend ret "$x$f1" + lappend ret "$x$f2" + } + return [casify $t $ret] +} + +proc vocab {} { + list abc def ghi jkl mno pqr stu vwx yza +} + +# Return a random 3 letter term. +# +proc term {} { + if {[info exists ::expanded_vocab]==0} { + foreach v [vocab] { lappend ::expanded_vocab {*}[casify $v] } + } + + select_one $::expanded_vocab +} + +# Return a document - between 3 and 10 terms. +# +proc document {} { + set nTerm [expr [random 3] + 7] + set doc "" + for {set ii 0} {$ii < $nTerm} {incr ii} { + lappend doc [term] + } + set doc +} +db func document document + +#------------------------------------------------------------------------- + +expr srand(6) + +set NDOC 200 +set NLOOP 50 + +sqlite3_fts5_register_origintext db + +proc tokens {cmd} { + set ret [list] + for {set iTok 0} {$iTok < [$cmd xInstCount]} {incr iTok} { + set txt [$cmd xInstToken $iTok 0] + set txt [string map [list "\0" "."] $txt] + lappend ret $txt + } + set ret +} +sqlite3_fts5_create_function db tokens tokens + +proc rankfunc {cmd} { + $cmd xRowid +} +sqlite3_fts5_create_function db rankfunc rankfunc + +proc ctrl_tokens {term args} { + set ret [list] + set term [string tolower $term] + foreach doc $args { + foreach a $doc { + if {[string tolower $a]==$term} { + if {$a==$term} { + lappend ret $a + } else { + lappend ret [string tolower $a].$a + } + } + } + } + set ret +} +db func ctrl_tokens ctrl_tokens + +proc do_all_vocab_test {tn} { + foreach ::v [concat [vocab] nnn] { + set answer [execsql { + SELECT id, ctrl_tokens($::v, x) FROM ctrl WHERE x LIKE '%' || $::v || '%' + }] + do_execsql_test $tn.$::v.1 { + SELECT rowid, tokens(ft) FROM ft($::v) + } $answer + do_execsql_test $tn.$::v.2 { + SELECT rowid, tokens(ft) FROM ft($::v) ORDER BY rank + } $answer + } +} + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE ft USING fts5( + x, tokenize="origintext unicode61", content=, contentless_delete=1, + tokendata=1 + ); + + CREATE TABLE ctrl(id INTEGER PRIMARY KEY, x TEXT); + INSERT INTO ft(ft, rank) VALUES('pgsz', 64); + INSERT INTO ft(ft, rank) VALUES('rank', 'rankfunc()'); +} +do_test 1.1 { + for {set ii 0} {$ii < $NDOC} {incr ii} { + set doc [document] + execsql { + INSERT INTO ft(rowid, x) VALUES($ii, $doc); + INSERT INTO ctrl(id, x) VALUES($ii, $doc); + } + } +} {} + +#execsql_pp { SELECT * FROM ctrl } +#execsql_pp { SELECT * FROM ft } +#fts5_aux_test_functions db +#execsql_pp { SELECT rowid, tokens(ft), fts5_test_poslist(ft) FROM ft('ghi'); } + +do_all_vocab_test 1.2 + +for {set ii 0} {$ii < $NLOOP} {incr ii} { + set lRowid [execsql { SELECT id FROM ctrl WHERE random() % 2 }] + foreach r $lRowid { + execsql { DELETE FROM ft WHERE rowid = $r } + execsql { DELETE FROM ctrl WHERE rowid = $r } + + set doc [document] + execsql { INSERT INTO ft(rowid, x) VALUES($r, $doc) } + execsql { INSERT INTO ctrl(id, x) VALUES($r, $doc) } + } + do_all_vocab_test 1.3.$ii +} + +#------------------------------------------------------------------------- + +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE ft2 USING fts5( + x, y, tokenize="origintext unicode61", content=, contentless_delete=1, + tokendata=1 + ); + + CREATE TABLE ctrl2(id INTEGER PRIMARY KEY, x TEXT, y TEXT); + INSERT INTO ft2(ft2, rank) VALUES('pgsz', 64); + INSERT INTO ft2(ft2, rank) VALUES('rank', 'rankfunc()'); +} +do_test 2.1 { + for {set ii 0} {$ii < $NDOC} {incr ii} { + set doc1 [document] + set doc2 [document] + execsql { + INSERT INTO ft2(rowid, x, y) VALUES($ii, $doc, $doc2); + INSERT INTO ctrl2(id, x, y) VALUES($ii, $doc, $doc2); + } + } +} {} + +proc do_all_vocab_test2 {tn} { + foreach ::v [vocab] { + set answer [execsql { + SELECT id, ctrl_tokens($::v, x, y) FROM ctrl2 + WHERE x LIKE '%' || $::v || '%' OR y LIKE '%' || $::v || '%'; + }] + do_execsql_test $tn.$::v.1 { + SELECT rowid, tokens(ft2) FROM ft2($::v) + } $answer + do_execsql_test $tn.$::v.2 { + SELECT rowid, tokens(ft2) FROM ft2($::v) ORDER BY rank + } $answer + } +} + +do_all_vocab_test2 2.2 + +for {set ii 0} {$ii < $NLOOP} {incr ii} { + set lRowid [execsql { SELECT id FROM ctrl2 WHERE random() % 2 }] + foreach r $lRowid { + execsql { DELETE FROM ft2 WHERE rowid = $r } + execsql { DELETE FROM ctrl2 WHERE rowid = $r } + + set doc1 [document] + set doc2 [document] + execsql { INSERT INTO ft2(rowid, x, y) VALUES($r, $doc, $doc1) } + execsql { INSERT INTO ctrl2(id, x, y) VALUES($r, $doc, $doc2) } + } + do_all_vocab_test 2.3.$ii +} + +#------------------------------------------------------------------------- + +unset -nocomplain ::expanded_vocab +proc vocab {} { + list abcde fghij klmno +} + +proc do_all_vocab_test3 {tn} { + foreach ::v [concat [vocab] nnn] { + set answer [execsql { + SELECT rowid, ctrl_tokens($::v, w) FROM ctrl3 WHERE w LIKE '%' || $::v || '%' + }] + do_execsql_test $tn.$::v.1 { + SELECT rowid, tokens(ft3) FROM ft3($::v) + } $answer + do_execsql_test $tn.$::v.2 { + SELECT rowid, tokens(ft3) FROM ft3($::v) ORDER BY rank + } $answer + } +} + +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE ft3 USING fts5( + w, tokenize="origintext unicode61", content=, contentless_delete=1, + tokendata=1 + ); + INSERT INTO ft3(ft3, rank) VALUES('rank', 'rankfunc()'); + CREATE TABLE ctrl3(w); +} + +do_execsql_test 3.1 { + WITH s(i) AS ( + SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<2 + ) + INSERT INTO ctrl3 SELECT document() FROM s; + INSERT INTO ft3(rowid, w) SELECT rowid, w FROM ctrl3; +} + +do_all_vocab_test3 3.2 + + +finish_test + diff --git a/ext/fts5/test/fts5simple2.test b/ext/fts5/test/fts5simple2.test index e57cea70f..6c0e0e166 100644 --- a/ext/fts5/test/fts5simple2.test +++ b/ext/fts5/test/fts5simple2.test @@ -343,7 +343,9 @@ do_execsql_test 17.0 { INSERT INTO t2 VALUES('a aa aaa', 'b bb bbb'); COMMIT; } -do_execsql_test 17.1 { SELECT * FROM t2('y:a*') WHERE rowid BETWEEN 10 AND 20 } +do_execsql_test 17.1 { + SELECT * FROM t2('y:a*') WHERE rowid BETWEEN 10 AND 20 +} do_execsql_test 17.2 { BEGIN; INSERT INTO t2 VALUES('a aa aaa', 'b bb bbb'); @@ -1,5 +1,5 @@ -C README.md\stypo\sfix\sreported\sin\sthe\sforum\sand\supdate\sall\slinks\sfrom\shttp:\sto\shttps:. -D 2023-12-06T12:30:28.174 +C Merge\strunk\schanges\sinto\sthis\sbranch. +D 2023-12-06T14:30:34.010 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -89,17 +89,17 @@ F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6dbd6348ef0cfc324a7 F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0 -F ext/fts5/fts5.h 05501612cc655504c5dce8ba765ab621d50fc478490089beaa0d75e00b23e520 -F ext/fts5/fts5Int.h 78a63cc0795186cde5384816a9403a68c65774b35d952e05b81a1b4b158e07c8 +F ext/fts5/fts5.h ff90acaa97f8e865b66d1177d1b56b8c110fd5548ab5863bab43f055a1d745fe +F ext/fts5/fts5Int.h defa43c0932265138ee910ca416e6baccf8b774e0f3d610e74be1ab2880e9834 F ext/fts5/fts5_aux.c ee770eec0af8646db9e18fc01a0dad7345b5f5e8cbba236704cfae2d777022ad F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b729225eeaf6a5 -F ext/fts5/fts5_config.c 054359543566cbff1ba65a188330660a5457299513ac71c53b3a07d934c7b081 -F ext/fts5/fts5_expr.c bd3b81ce669c4104e34ffe66570af1999a317b142c15fccb112de9fb0caa57a6 -F ext/fts5/fts5_hash.c 076058f93327051952a752dc765df1acfe783eb11b419b30652aa1fc1f987902 -F ext/fts5/fts5_index.c 458cbed8a3e17617cbf7e80cdfb7612000b9bb3781f286b345fb9655858658cf -F ext/fts5/fts5_main.c a07ed863b8bd9e6fefb62db2fd40a3518eb30a5f7dcfda5be915dd2db45efa2f +F ext/fts5/fts5_config.c 8072a207034b51ae9b7694121d1b5715c794e94b275e088f70ae532378ca5cdf +F ext/fts5/fts5_expr.c b1ec526371b9ffde82341423a5b9753c42cbea629a41b69f26fa377d13b95a8e +F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 +F ext/fts5/fts5_index.c be39b44ff8773cff56bcbc01f74701a83e068c20d773cafd01e8bb2fa0fc1bc5 +F ext/fts5/fts5_main.c fb7ec495d663f40d18e420e1986316591041a70e1e4b4696ab2a7384e4c7fd7a F ext/fts5/fts5_storage.c 5d10b9bdcce5b90656cad13c7d12ad4148677d4b9e3fca0481fca56d6601426d -F ext/fts5/fts5_tcl.c b1445cbe69908c411df8084a10b2485500ac70a9c747cdc8cda175a3da59d8ae +F ext/fts5/fts5_tcl.c cf0fd0dbe64ec272491b749e0d594f563cda03336aeb60900129e6d18b0aefb8 F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee F ext/fts5/fts5_test_tok.c 3cb0a9b508b30d17ef025ccddd26ae3dc8ddffbe76c057616e59a9aa85d36f3b F ext/fts5/fts5_tokenize.c 83cfcede3898001cab84432a36ce1503e3080cf9b1c682b022ec82e267ea4c13 @@ -108,8 +108,8 @@ F ext/fts5/fts5_varint.c e64d2113f6e1bfee0032972cffc1207b77af63319746951bf1d0988 F ext/fts5/fts5_vocab.c aed56169ae5c1aa9b8189c779ffeef04ed516d3c712c06914e6d91a6759f4e4a F ext/fts5/fts5parse.y eb526940f892ade5693f22ffd6c4f2702543a9059942772526eac1fde256bb05 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba -F ext/fts5/test/fts5_common.tcl a9de9c2209cc4e7ae3c753e783504e67206c6c1467d08f209cd0c5923d3e8d8b -F ext/fts5/test/fts5aa.test ba5158eba7d61359becdfca895ef471072c7bf7b20e5e60dcb4d024c8419c926 +F ext/fts5/test/fts5_common.tcl 8b1848ac2baad10e444e4183034a52050b52d20b3796d9d30e78f01ab0d05583 +F ext/fts5/test/fts5aa.test 4db81519863244a3cab35795fe65ab6b592e7970c7409eba098b23ebbfc08d95 F ext/fts5/test/fts5ab.test bd932720c748383277456b81f91bc00453de2174f9762cd05f95d0495dc50390 F ext/fts5/test/fts5ac.test a7aa7e1fefc6e1918aa4d3111d5c44a09177168e962c5fd2cca9620de8a7ed6d F ext/fts5/test/fts5ad.test e8cf959dfcd57c8e46d6f5f25665686f3b6627130a9a981371dafdf6482790de @@ -170,6 +170,7 @@ F ext/fts5/test/fts5faultD.test e7ed7895abfe6bc98a5e853826f6b74956e7ba7f594f1860 F ext/fts5/test/fts5faultE.test 844586ce71dab4be85bb86880e87b624d089f851654cd22e4710c77eb8ce7075 F ext/fts5/test/fts5faultF.test 4abef99f86e99d9f0c6460dd68c586a766b6b9f1f660ada55bf2e8266bd1bbc1 F ext/fts5/test/fts5faultG.test d2e5a4d9a34e08dcaadcaeafef74d10cbc2abdd11aa2659a18af0294bf2812d3 +F ext/fts5/test/fts5faultH.test d845f45dac3e1a3f20c7e0a2be95280c95d3204c06802f86ab2c110e52ed3d14 F ext/fts5/test/fts5first.test 3fcf2365c00a15fc9704233674789a3b95131d12de18a9b996159f6909dc8079 F ext/fts5/test/fts5full.test e1701a112354e0ff9a1fdffb0c940c576530c33732ee20ac5e8361777070d717 F ext/fts5/test/fts5fuzz1.test 238d8c45f3b81342aa384de3e581ff2fa330bf922a7b69e484bbc06051a1080e @@ -190,6 +191,11 @@ F ext/fts5/test/fts5onepass.test f9b7d9b2c334900c6542a869760290e2ab5382af8fbd618 F ext/fts5/test/fts5optimize.test 36a752d24c818792032e4ff502936fc9cc5ef938721696396fdc79214b2717f1 F ext/fts5/test/fts5optimize2.test 93e742c36b487d8874621360af5b1ce4d39b04fb9e71ce9bc34015c5fc811785 F ext/fts5/test/fts5optimize3.test bf9c91bb927d0fb2b9a06318a217a0419183ac5913842e062c7e0b98ea5d0fca +F ext/fts5/test/fts5origintext.test d2796fa08ee7aecfabdc0c45bb8a2fb16a00ea8757e63fbc153b718dbe430a39 +F ext/fts5/test/fts5origintext2.test f3b9436de540828d01f0672df855b09ebc0863e126d5b56234701d71dfa73634 +F ext/fts5/test/fts5origintext3.test 0d25933506600452a5ab3873cbb418ed5f2de2446c3672b9997b1ea104b0e7f0 +F ext/fts5/test/fts5origintext4.test 296b1b1e6630d492b99db0769e8127087548f0e939376047716a68b77ca3c871 +F ext/fts5/test/fts5origintext5.test a037bdf7235a22033c4663837bdb12d9738245464a3ac2f60c71fc40d07ede7d F ext/fts5/test/fts5phrase.test 13e5d8e9083077b3d9c74315b3c92ec723cc6eb37c8155e0bfe1bba00559f07b F ext/fts5/test/fts5plan.test b65cfcca9ddd6fdaa118c61e17aeec8e8433bc5b6bb307abd116514f79c49c5a F ext/fts5/test/fts5porter.test 8d08010c28527db66bc3feebd2b8767504aaeb9b101a986342fa7833d49d0d15 @@ -212,7 +218,7 @@ F ext/fts5/test/fts5secure7.test fd03d0868d64340a1db8615b02e5508fea409de13910114 F ext/fts5/test/fts5secure8.test eb3579e9d58b0acad97e8082dee1f99b2d393198f03500b453c2b25761c0c298 F ext/fts5/test/fts5securefault.test dbca2b6a1c16700017f5051138991b705410889933f2a37c57ae8a23b296b10b F ext/fts5/test/fts5simple.test a298670508c1458b88ce6030440f26a30673931884eb5f4094ac1773b3ba217b -F ext/fts5/test/fts5simple2.test 258a1b0c590409bfa5271e872c79572b319d2a56554d0585f68f146a0da603f0 +F ext/fts5/test/fts5simple2.test 8dd2389ee75e21a1429fe87e5f8c7d9a97ad1470304a8a2d3ba4b8c3c345fecd F ext/fts5/test/fts5simple3.test d5c74a9d3ca71bd5dd5cacb7c55b86ea12cdddfc8b1910e3de2995206898380f F ext/fts5/test/fts5synonym.test 1651815b8008de170e8e600dcacc17521d765482ea8f074ae82cfa870d8bb7fb F ext/fts5/test/fts5synonym2.test 8f891fc49cc1e8daed727051e77e1f42849c784a6a54bef82564761b2cb3e016 @@ -2147,8 +2153,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 7f0c79b94e8f55e5013e52ba64ba8b32dad1dc4e2224d2099733cbc561de1810 -R e4c4253b529b92ef6212320f71e1570e -U stephan -Z 02e09a8447ded58f933fad2e169fffdc +P 59d008b6c23ab900377bc696ee19381feb7614bac80546eae361e401c3620c4e 5c48acdbb44185b352b54911a57a6986d6c7e624bdeba2af48b985d29f0292bf +R aee08254c1ed5ae187dbc54a7e67d0a2 +U dan +Z 4af5347745441ba205f194c56bf5dc11 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index f935efea2..aacb877b3 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -5c48acdbb44185b352b54911a57a6986d6c7e624bdeba2af48b985d29f0292bf
\ No newline at end of file +8f46eace86e7b2e556913575aa3cd6f7987ac0efcc880f0af649d42c253aeb81
\ No newline at end of file |