diff options
37 files changed, 3269 insertions, 331 deletions
diff --git a/ext/fts3/unicode/mkunicode.tcl b/ext/fts3/unicode/mkunicode.tcl index 58d90c68c..1306629da 100644 --- a/ext/fts3/unicode/mkunicode.tcl +++ b/ext/fts3/unicode/mkunicode.tcl @@ -628,6 +628,9 @@ proc print_categories {lMap} { $caseP $caseS $caseZ + + default: + return 1; } return 0; } diff --git a/ext/fts5/extract_api_docs.tcl b/ext/fts5/extract_api_docs.tcl index 6762a036d..634dc70cb 100644 --- a/ext/fts5/extract_api_docs.tcl +++ b/ext/fts5/extract_api_docs.tcl @@ -108,8 +108,11 @@ proc get_tokenizer_docs {data} { append res "<dt><b>$line</b></dt><dd><p style=margin-top:0>\n" continue } + if {[regexp {FTS5_TOKENIZER} $line]} { + set line </dl><p> + } if {[regexp {SYNONYM SUPPORT} $line]} { - set line "</dl><h3>Synonym Support</h3>" + set line "<h3>Synonym Support</h3>" } if {[string trim $line] == ""} { append res "<p>\n" diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index d3042fcb8..682a8da38 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -308,9 +308,32 @@ struct Fts5PhraseIter { ** ** This API can be quite slow if used with an FTS5 table created with the ** "detail=none" or "detail=column" option. +** +** xColumnLocale(pFts5, iIdx, pzLocale, pnLocale) +** If parameter iCol is less than zero, or greater than or equal to the +** number of columns in the table, SQLITE_RANGE is returned. +** +** Otherwise, this function attempts to retrieve the locale associated +** with column iCol of the current row. Usually, there is no associated +** locale, and output parameters (*pzLocale) and (*pnLocale) are set +** to NULL and 0, respectively. However, if the fts5_locale() function +** was used to associate a locale with the value when it was inserted +** into the fts5 table, then (*pzLocale) is set to point to a nul-terminated +** buffer containing the name of the locale in utf-8 encoding. (*pnLocale) +** is set to the size in bytes of the buffer, not including the +** nul-terminator. +** +** If successful, SQLITE_OK is returned. Or, if an error occurs, an +** SQLite error code is returned. The final value of the output parameters +** is undefined in this case. +** +** xTokenize_v2: +** Tokenize text using the tokenizer belonging to the FTS5 table. This +** API is the same as the xTokenize() API, except that it allows a tokenizer +** locale to be specified. */ struct Fts5ExtensionApi { - int iVersion; /* Currently always set to 3 */ + int iVersion; /* Currently always set to 4 */ void *(*xUserData)(Fts5Context*); @@ -352,6 +375,15 @@ struct Fts5ExtensionApi { const char **ppToken, int *pnToken ); int (*xInstToken)(Fts5Context*, int iIdx, int iToken, const char**, int*); + + /* Below this point are iVersion>=4 only */ + int (*xColumnLocale)(Fts5Context*, int iCol, const char **pz, int *pn); + int (*xTokenize_v2)(Fts5Context*, + const char *pText, int nText, /* Text to tokenize */ + const char *pLoc, int nLoc, /* Locale to pass to tokenizer */ + void *pCtx, /* Context passed to xToken() */ + int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ + ); }; /* @@ -364,6 +396,7 @@ struct Fts5ExtensionApi { ** Applications may also register custom tokenizer types. A tokenizer ** is registered by providing fts5 with a populated instance of the ** following structure. All structure methods must be defined, setting +** ** any member of the fts5_tokenizer struct to NULL leads to undefined ** behaviour. The structure methods are expected to function as follows: ** @@ -372,7 +405,7 @@ struct Fts5ExtensionApi { ** A tokenizer instance is required to actually tokenize text. ** ** The first argument passed to this function is a copy of the (void*) -** pointer provided by the application when the fts5_tokenizer object +** pointer provided by the application when the fts5_tokenizer_v2 object ** was registered with FTS5 (the third argument to xCreateTokenizer()). ** The second and third arguments are an array of nul-terminated strings ** containing the tokenizer arguments, if any, specified following the @@ -396,7 +429,7 @@ struct Fts5ExtensionApi { ** argument passed to this function is a pointer to an Fts5Tokenizer object ** returned by an earlier call to xCreate(). ** -** The second argument indicates the reason that FTS5 is requesting +** The third argument indicates the reason that FTS5 is requesting ** tokenization of the supplied text. This is always one of the following ** four values: ** @@ -420,6 +453,13 @@ struct Fts5ExtensionApi { ** on a columnsize=0 database. ** </ul> ** +** The sixth and seventh arguments passed to xTokenize() - pLocale and +** nLocale - are a pointer to a buffer containing the locale to use for +** tokenization (e.g. "en_US") and its size in bytes, respectively. The +** pLocale buffer is not nul-terminated. pLocale may be passed NULL (in +** which case nLocale is always 0) to indicate that the tokenizer should +** use its default locale. +** ** For each token in the input string, the supplied callback xToken() must ** be invoked. The first argument to it should be a copy of the pointer ** passed as the second argument to xTokenize(). The third and fourth @@ -443,6 +483,29 @@ struct Fts5ExtensionApi { ** may abandon the tokenization and return any error code other than ** SQLITE_OK or SQLITE_DONE. ** +** If the tokenizer is registered using an fts5_tokenizer_v2 object, +** then the xTokenize() method has two additional arguments - pLocale +** and nLocale. These specify the locale that the tokenizer should use +** for the current request. If pLocale and nLocale are both 0, then the +** tokenizer should use its default locale. Otherwise, pLocale points to +** an nLocale byte buffer containing the name of the locale to use as utf-8 +** text. pLocale is not nul-terminated. +** +** FTS5_TOKENIZER +** +** There is also an fts5_tokenizer object. This is an older version of +** fts5_tokenizer_v2. It is similar except that: +** +** <ul> +** <li> There is no "iVersion" field, and +** <li> The xTokenize() method does not take a locale argument. +** </ul> +** +** fts5_tokenizer tokenizers should be registered with the xCreateTokenizer() +** function, instead of xCreateTokenizer_v2(). Tokenizers implementations +** registered using either API may be retrieved using both xFindTokenizer() +** and xFindTokenizer_v2(). +** ** SYNONYM SUPPORT ** ** Custom tokenizers may also support synonyms. Consider a case in which a @@ -551,6 +614,33 @@ struct Fts5ExtensionApi { ** inefficient. */ typedef struct Fts5Tokenizer Fts5Tokenizer; +typedef struct fts5_tokenizer_v2 fts5_tokenizer_v2; +struct fts5_tokenizer_v2 { + int iVersion; /* Currently always 2 */ + + int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut); + void (*xDelete)(Fts5Tokenizer*); + int (*xTokenize)(Fts5Tokenizer*, + void *pCtx, + int flags, /* Mask of FTS5_TOKENIZE_* flags */ + const char *pText, int nText, + const char *pLocale, int nLocale, + int (*xToken)( + void *pCtx, /* Copy of 2nd argument to xTokenize() */ + int tflags, /* Mask of FTS5_TOKEN_* flags */ + const char *pToken, /* Pointer to buffer containing token */ + int nToken, /* Size of token in bytes */ + int iStart, /* Byte offset of token within input text */ + int iEnd /* Byte offset of end of token within input text */ + ) + ); +}; + +/* +** New code should use the fts5_tokenizer_v2 type to define tokenizer +** implementations. The following type is included for legacy applications +** that still use it. +*/ typedef struct fts5_tokenizer fts5_tokenizer; struct fts5_tokenizer { int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut); @@ -570,6 +660,7 @@ struct fts5_tokenizer { ); }; + /* Flags that may be passed as the third argument to xTokenize() */ #define FTS5_TOKENIZE_QUERY 0x0001 #define FTS5_TOKENIZE_PREFIX 0x0002 @@ -589,7 +680,7 @@ struct fts5_tokenizer { */ typedef struct fts5_api fts5_api; struct fts5_api { - int iVersion; /* Currently always set to 2 */ + int iVersion; /* Currently always set to 3 */ /* Create a new tokenizer */ int (*xCreateTokenizer)( @@ -616,6 +707,25 @@ struct fts5_api { fts5_extension_function xFunction, void (*xDestroy)(void*) ); + + /* APIs below this point are only available if iVersion>=3 */ + + /* Create a new tokenizer */ + int (*xCreateTokenizer_v2)( + fts5_api *pApi, + const char *zName, + void *pUserData, + fts5_tokenizer_v2 *pTokenizer, + void (*xDestroy)(void*) + ); + + /* Find an existing tokenizer */ + int (*xFindTokenizer_v2)( + fts5_api *pApi, + const char *zName, + void **ppUserData, + fts5_tokenizer_v2 **ppTokenizer + ); }; /* diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 4311faceb..7e4111957 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -162,10 +162,13 @@ typedef struct Fts5TokenizerConfig Fts5TokenizerConfig; struct Fts5TokenizerConfig { Fts5Tokenizer *pTok; - fts5_tokenizer *pTokApi; + fts5_tokenizer_v2 *pApi2; + fts5_tokenizer *pApi1; const char **azArg; int nArg; int ePattern; /* FTS_PATTERN_XXX constant */ + const char *pLocale; /* Current locale to use */ + int nLocale; /* Size of pLocale in bytes */ }; /* @@ -206,6 +209,8 @@ struct Fts5TokenizerConfig { ** ** INSERT INTO tbl(tbl, rank) VALUES('prefix-index', $bPrefixIndex); ** +** bLocale: +** Set to true if locale=1 was specified when the table was created. */ struct Fts5Config { sqlite3 *db; /* Database handle */ @@ -223,10 +228,12 @@ struct Fts5Config { char *zContentRowid; /* "content_rowid=" option value */ int bColumnsize; /* "columnsize=" option value (dflt==1) */ int bTokendata; /* "tokendata=" option value (dflt==0) */ + int bLocale; /* "locale=" option value (dflt==0) */ int eDetail; /* FTS5_DETAIL_XXX value */ char *zContentExprlist; Fts5TokenizerConfig t; int bLock; /* True when table is preparing statement */ + /* Values loaded from the %_config table */ int iVersion; /* fts5 file format 'version' */ @@ -292,6 +299,8 @@ int sqlite3Fts5ConfigSetValue(Fts5Config*, const char*, sqlite3_value*, int*); int sqlite3Fts5ConfigParseRank(const char*, char**, char**); +void sqlite3Fts5ConfigErrmsg(Fts5Config *pConfig, const char *zFmt, ...); + /* ** End of interface to code in fts5_config.c. **************************************************************************/ @@ -336,7 +345,7 @@ char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...); void sqlite3Fts5Put32(u8*, int); int sqlite3Fts5Get32(const u8*); -#define FTS5_POS2COLUMN(iPos) (int)(iPos >> 32) +#define FTS5_POS2COLUMN(iPos) (int)((iPos >> 32) & 0x7FFFFFFF) #define FTS5_POS2OFFSET(iPos) (int)(iPos & 0x7FFFFFFF) typedef struct Fts5PoslistReader Fts5PoslistReader; @@ -627,6 +636,17 @@ Fts5Table *sqlite3Fts5TableFromCsrid(Fts5Global*, i64); int sqlite3Fts5FlushToDisk(Fts5Table*); +int sqlite3Fts5ExtractText( + Fts5Config *pConfig, + sqlite3_value *pVal, /* Value to extract text from */ + int bContent, /* Loaded from content table */ + int *pbResetTokenizer, /* OUT: True if ClearLocale() required */ + const char **ppText, /* OUT: Pointer to text buffer */ + int *pnText /* OUT: Size of (*ppText) in bytes */ +); + +void sqlite3Fts5ClearLocale(Fts5Config *pConfig); + /* ** End of interface to code in fts5.c. **************************************************************************/ @@ -706,7 +726,7 @@ int sqlite3Fts5StorageRename(Fts5Storage*, const char *zName); int sqlite3Fts5DropAll(Fts5Config*); int sqlite3Fts5CreateTable(Fts5Config*, const char*, const char*, int, char **); -int sqlite3Fts5StorageDelete(Fts5Storage *p, i64, sqlite3_value**); +int sqlite3Fts5StorageDelete(Fts5Storage *p, i64, sqlite3_value**, int); int sqlite3Fts5StorageContentInsert(Fts5Storage *p, sqlite3_value**, i64*); int sqlite3Fts5StorageIndexInsert(Fts5Storage *p, sqlite3_value**, i64); @@ -732,6 +752,9 @@ int sqlite3Fts5StorageOptimize(Fts5Storage *p); int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge); int sqlite3Fts5StorageReset(Fts5Storage *p); +void sqlite3Fts5StorageReleaseDeleteRow(Fts5Storage*); +int sqlite3Fts5StorageFindDeleteRow(Fts5Storage *p, i64 iDel); + /* ** End of interface to code in fts5_storage.c. **************************************************************************/ diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c index 30101fbe2..eb3f7e359 100644 --- a/ext/fts5/fts5_aux.c +++ b/ext/fts5/fts5_aux.c @@ -226,6 +226,7 @@ static int fts5HighlightCb( return rc; } + /* ** Implementation of highlight() function. */ @@ -256,12 +257,19 @@ static void fts5HighlightFunction( sqlite3_result_text(pCtx, "", -1, SQLITE_STATIC); rc = SQLITE_OK; }else if( ctx.zIn ){ + const char *pLoc = 0; /* Locale of column iCol */ + int nLoc = 0; /* Size of pLoc in bytes */ if( rc==SQLITE_OK ){ rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter); } if( rc==SQLITE_OK ){ - rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb); + rc = pApi->xColumnLocale(pFts, iCol, &pLoc, &nLoc); + } + if( rc==SQLITE_OK ){ + rc = pApi->xTokenize_v2( + pFts, ctx.zIn, ctx.nIn, pLoc, nLoc, (void*)&ctx, fts5HighlightCb + ); } if( ctx.bOpen ){ fts5HighlightAppend(&rc, &ctx, ctx.zClose, -1); @@ -458,6 +466,8 @@ static void fts5SnippetFunction( memset(&sFinder, 0, sizeof(Fts5SFinder)); for(i=0; i<nCol; i++){ if( iCol<0 || iCol==i ){ + const char *pLoc = 0; /* Locale of column iCol */ + int nLoc = 0; /* Size of pLoc in bytes */ int nDoc; int nDocsize; int ii; @@ -465,8 +475,10 @@ static void fts5SnippetFunction( sFinder.nFirst = 0; rc = pApi->xColumnText(pFts, i, &sFinder.zDoc, &nDoc); if( rc!=SQLITE_OK ) break; - rc = pApi->xTokenize(pFts, - sFinder.zDoc, nDoc, (void*)&sFinder,fts5SentenceFinderCb + rc = pApi->xColumnLocale(pFts, i, &pLoc, &nLoc); + if( rc!=SQLITE_OK ) break; + rc = pApi->xTokenize_v2(pFts, + sFinder.zDoc, nDoc, pLoc, nLoc, (void*)&sFinder, fts5SentenceFinderCb ); if( rc!=SQLITE_OK ) break; rc = pApi->xColumnSize(pFts, i, &nDocsize); @@ -524,6 +536,9 @@ static void fts5SnippetFunction( rc = pApi->xColumnSize(pFts, iBestCol, &nColSize); } if( ctx.zIn ){ + const char *pLoc = 0; /* Locale of column iBestCol */ + int nLoc = 0; /* Bytes in pLoc */ + if( rc==SQLITE_OK ){ rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter); } @@ -542,7 +557,12 @@ static void fts5SnippetFunction( } if( rc==SQLITE_OK ){ - rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb); + rc = pApi->xColumnLocale(pFts, iBestCol, &pLoc, &nLoc); + } + if( rc==SQLITE_OK ){ + rc = pApi->xTokenize_v2( + pFts, ctx.zIn, ctx.nIn, pLoc, nLoc, (void*)&ctx,fts5HighlightCb + ); } if( ctx.bOpen ){ fts5HighlightAppend(&rc, &ctx, ctx.zClose, -1); diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 01f40455a..3cb1bd3be 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -380,6 +380,16 @@ static int fts5ConfigParseSpecial( return rc; } + if( sqlite3_strnicmp("locale", zCmd, nCmd)==0 ){ + if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ + *pzErr = sqlite3_mprintf("malformed locale=... directive"); + rc = SQLITE_ERROR; + }else{ + pConfig->bLocale = (zArg[0]=='1'); + } + return rc; + } + if( sqlite3_strnicmp("detail", zCmd, nCmd)==0 ){ const Fts5Enum aDetail[] = { { "none", FTS5_DETAIL_NONE }, @@ -669,7 +679,11 @@ void sqlite3Fts5ConfigFree(Fts5Config *pConfig){ if( pConfig ){ int i; if( pConfig->t.pTok ){ - pConfig->t.pTokApi->xDelete(pConfig->t.pTok); + if( pConfig->t.pApi1 ){ + pConfig->t.pApi1->xDelete(pConfig->t.pTok); + }else{ + pConfig->t.pApi2->xDelete(pConfig->t.pTok); + } } sqlite3_free((char*)pConfig->t.azArg); sqlite3_free(pConfig->zDb); @@ -752,9 +766,15 @@ int sqlite3Fts5Tokenize( rc = sqlite3Fts5LoadTokenizer(pConfig); } if( rc==SQLITE_OK ){ - rc = pConfig->t.pTokApi->xTokenize( - pConfig->t.pTok, pCtx, flags, pText, nText, xToken - ); + if( pConfig->t.pApi1 ){ + rc = pConfig->t.pApi1->xTokenize( + pConfig->t.pTok, pCtx, flags, pText, nText, xToken + ); + }else{ + rc = pConfig->t.pApi2->xTokenize(pConfig->t.pTok, pCtx, flags, + pText, nText, pConfig->t.pLocale, pConfig->t.nLocale, xToken + ); + } } } return rc; @@ -1011,13 +1031,10 @@ int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){ && iVersion!=FTS5_CURRENT_VERSION_SECUREDELETE ){ rc = SQLITE_ERROR; - if( pConfig->pzErrmsg ){ - assert( 0==*pConfig->pzErrmsg ); - *pConfig->pzErrmsg = sqlite3_mprintf("invalid fts5 file format " - "(found %d, expected %d or %d) - run 'rebuild'", - iVersion, FTS5_CURRENT_VERSION, FTS5_CURRENT_VERSION_SECUREDELETE - ); - } + sqlite3Fts5ConfigErrmsg(pConfig, "invalid fts5 file format " + "(found %d, expected %d or %d) - run 'rebuild'", + iVersion, FTS5_CURRENT_VERSION, FTS5_CURRENT_VERSION_SECUREDELETE + ); }else{ pConfig->iVersion = iVersion; } @@ -1027,3 +1044,26 @@ int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){ } return rc; } + +/* +** Set (*pConfig->pzErrmsg) to point to an sqlite3_malloc()ed buffer +** containing the error message created using printf() style formatting +** string zFmt and its trailing arguments. +*/ +void sqlite3Fts5ConfigErrmsg(Fts5Config *pConfig, const char *zFmt, ...){ + va_list ap; /* ... printf arguments */ + char *zMsg = 0; + + va_start(ap, zFmt); + zMsg = sqlite3_vmprintf(zFmt, ap); + if( pConfig->pzErrmsg ){ + assert( *pConfig->pzErrmsg==0 ); + *pConfig->pzErrmsg = zMsg; + }else{ + sqlite3_free(zMsg); + } + + va_end(ap); +} + + diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 1f089d875..cd44b96bd 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -286,11 +286,12 @@ int sqlite3Fts5ExprNew( }while( sParse.rc==SQLITE_OK && t!=FTS5_EOF ); sqlite3Fts5ParserFree(pEngine, fts5ParseFree); + assert( sParse.pExpr || sParse.rc!=SQLITE_OK ); assert_expr_depth_ok(sParse.rc, sParse.pExpr); /* If the LHS of the MATCH expression was a user column, apply the ** implicit column-filter. */ - if( iCol<pConfig->nCol && sParse.pExpr && sParse.rc==SQLITE_OK ){ + if( sParse.rc==SQLITE_OK && iCol<pConfig->nCol ){ int n = sizeof(Fts5Colset); Fts5Colset *pColset = (Fts5Colset*)sqlite3Fts5MallocZero(&sParse.rc, n); if( pColset ){ @@ -307,15 +308,7 @@ int sqlite3Fts5ExprNew( sParse.rc = SQLITE_NOMEM; sqlite3Fts5ParseNodeFree(sParse.pExpr); }else{ - if( !sParse.pExpr ){ - const int nByte = sizeof(Fts5ExprNode); - pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&sParse.rc, nByte); - if( pNew->pRoot ){ - pNew->pRoot->bEof = 1; - } - }else{ - pNew->pRoot = sParse.pExpr; - } + pNew->pRoot = sParse.pExpr; pNew->pIndex = 0; pNew->pConfig = pConfig; pNew->apExprPhrase = sParse.apPhrase; @@ -1133,7 +1126,7 @@ static int fts5ExprNodeTest_STRING( } }else{ Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; - if( pIter->iRowid==iLast || pIter->bEof ) continue; + if( pIter->iRowid==iLast ) continue; bMatch = 0; if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){ return rc; @@ -1655,9 +1648,6 @@ Fts5ExprNearset *sqlite3Fts5ParseNearset( Fts5ExprNearset *pRet = 0; if( pParse->rc==SQLITE_OK ){ - if( pPhrase==0 ){ - return pNear; - } if( pNear==0 ){ sqlite3_int64 nByte; nByte = sizeof(Fts5ExprNearset) + SZALLOC * sizeof(Fts5ExprPhrase*); diff --git a/ext/fts5/fts5_main.c b/ext/fts5/fts5_main.c index c862e2f95..e271402ec 100644 --- a/ext/fts5/fts5_main.c +++ b/ext/fts5/fts5_main.c @@ -103,11 +103,28 @@ struct Fts5Auxiliary { ** Each tokenizer module registered with the FTS5 module is represented ** by an object of the following type. All such objects are stored as part ** of the Fts5Global.pTok list. +** +** bV2Native: +** True if the tokenizer was registered using xCreateTokenizer_v2(), false +** for xCreateTokenizer(). If this variable is true, then x2 is populated +** with the routines as supplied by the caller and x1 contains synthesized +** wrapper routines. In this case the user-data pointer passed to +** x1.xCreate should be a pointer to the Fts5TokenizerModule structure, +** not a copy of pUserData. +** +** Of course, if bV2Native is false, then x1 contains the real routines and +** x2 the synthesized ones. In this case a pointer to the Fts5TokenizerModule +** object should be passed to x2.xCreate. +** +** The synthesized wrapper routines are necessary for xFindTokenizer(_v2) +** calls. */ struct Fts5TokenizerModule { char *zName; /* Name of tokenizer */ void *pUserData; /* User pointer passed to xCreate() */ - fts5_tokenizer x; /* Tokenizer functions */ + int bV2Native; /* True if v2 native tokenizer */ + fts5_tokenizer x1; /* Tokenizer functions */ + fts5_tokenizer_v2 x2; /* V2 tokenizer functions */ void (*xDestroy)(void*); /* Destructor function */ Fts5TokenizerModule *pNext; /* Next registered tokenizer module */ }; @@ -118,7 +135,7 @@ struct Fts5FullTable { Fts5Global *pGlobal; /* Global (connection wide) data */ Fts5Cursor *pSortCsr; /* Sort data from this cursor */ int iSavepoint; /* Successful xSavepoint()+1 */ - + #ifdef SQLITE_DEBUG struct Fts5TransactionState ts; #endif @@ -195,7 +212,7 @@ struct Fts5Cursor { Fts5Auxiliary *pAux; /* Currently executing extension function */ Fts5Auxdata *pAuxdata; /* First in linked list of saved aux-data */ - /* Cache used by auxiliary functions xInst() and xInstCount() */ + /* Cache used by auxiliary API functions xInst() and xInstCount() */ Fts5PoslistReader *aInstIter; /* One for each phrase */ int nInstAlloc; /* Size of aInst[] array (entries / 3) */ int nInstCount; /* Number of phrase instances */ @@ -230,6 +247,12 @@ struct Fts5Cursor { #define BitFlagAllTest(x,y) (((x) & (y))==(y)) #define BitFlagTest(x,y) (((x) & (y))!=0) +/* +** The subtype value and header bytes used by fts5_locale(). +*/ +#define FTS5_LOCALE_SUBTYPE ((unsigned int)'L') +#define FTS5_LOCALE_HEADER "\x00\xE0\xB2\xEB" + /* ** Macros to Set(), Clear() and Test() cursor flags. @@ -607,7 +630,7 @@ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ if( bSeenRank ) continue; idxStr[iIdxStr++] = 'r'; bSeenRank = 1; - }else if( iCol>=0 ){ + }else{ nSeenMatch++; idxStr[iIdxStr++] = 'M'; sqlite3_snprintf(6, &idxStr[iIdxStr], "%d", iCol); @@ -993,7 +1016,7 @@ static int fts5PrepareStatement( rc = sqlite3_prepare_v3(pConfig->db, zSql, -1, SQLITE_PREPARE_PERSISTENT, &pRet, 0); if( rc!=SQLITE_OK ){ - *pConfig->pzErrmsg = sqlite3_mprintf("%s", sqlite3_errmsg(pConfig->db)); + sqlite3Fts5ConfigErrmsg(pConfig, "%s", sqlite3_errmsg(pConfig->db)); } sqlite3_free(zSql); } @@ -1228,6 +1251,188 @@ static void fts5SetVtabError(Fts5FullTable *p, const char *zFormat, ...){ va_end(ap); } +/* +** Arrange for subsequent calls to sqlite3Fts5Tokenize() to use the locale +** specified by pLocale/nLocale. The buffer indicated by pLocale must remain +** valid until after the final call to sqlite3Fts5Tokenize() that will use +** the locale. +*/ +static void fts5SetLocale( + Fts5Config *pConfig, + const char *zLocale, + int nLocale +){ + Fts5TokenizerConfig *pT = &pConfig->t; + pT->pLocale = zLocale; + pT->nLocale = nLocale; +} + +/* +** Clear any locale configured by an earlier call to fts5SetLocale() or +** sqlite3Fts5ExtractText(). +*/ +void sqlite3Fts5ClearLocale(Fts5Config *pConfig){ + fts5SetLocale(pConfig, 0, 0); +} + +/* +** This function is used to extract utf-8 text from an sqlite3_value. This +** is usually done in order to tokenize it. For example, when: +** +** * a value is written to an fts5 table, +** * a value is deleted from an FTS5_CONTENT_NORMAL table, +** * a value containing a query expression is passed to xFilter() +** +** and so on. +** +** This function handles 2 cases: +** +** 1) Ordinary values. The text can be extracted from these using +** sqlite3_value_text(). +** +** 2) Combination text/locale blobs created by fts5_locale(). There +** are several cases for these: +** +** * Blobs tagged with FTS5_LOCALE_SUBTYPE. +** * Blobs read from the content table of a locale=1 external-content +** table, and +** * Blobs read from the content table of a locale=1 regular +** content table. +** +** The first two cases above should have the 4 byte FTS5_LOCALE_HEADER +** header. It is an error if a blob with the subtype or a blob read +** from the content table of an external content table does not have +** the required header. A blob read from the content table of a regular +** locale=1 table does not have the header. This is to save space. +** +** If successful, SQLITE_OK is returned and output parameters (*ppText) +** and (*pnText) are set to point to a buffer containing the extracted utf-8 +** text and its length in bytes, respectively. The buffer is not +** nul-terminated. It has the same lifetime as the sqlite3_value object +** from which it is extracted. +** +** Parameter bContent must be true if the value was read from an indexed +** column (i.e. not UNINDEXED) of the on disk content. +** +** If pbResetTokenizer is not NULL and if case (2) is used, then +** fts5SetLocale() is called to ensure subsequent sqlite3Fts5Tokenize() calls +** use the locale. In this case (*pbResetTokenizer) is set to true before +** returning, to indicate that the caller must call sqlite3Fts5ClearLocale() +** to clear the locale after tokenizing the text. +*/ +int sqlite3Fts5ExtractText( + Fts5Config *pConfig, + sqlite3_value *pVal, /* Value to extract text from */ + int bContent, /* True if indexed table content */ + int *pbResetTokenizer, /* OUT: True if xSetLocale(NULL) required */ + const char **ppText, /* OUT: Pointer to text buffer */ + int *pnText /* OUT: Size of (*ppText) in bytes */ +){ + const char *pText = 0; + int nText = 0; + int rc = SQLITE_OK; + int bDecodeBlob = 0; + + assert( pbResetTokenizer==0 || *pbResetTokenizer==0 ); + assert( bContent==0 || pConfig->eContent!=FTS5_CONTENT_NONE ); + assert( bContent==0 || sqlite3_value_subtype(pVal)==0 ); + + if( sqlite3_value_type(pVal)==SQLITE_BLOB ){ + if( sqlite3_value_subtype(pVal)==FTS5_LOCALE_SUBTYPE + || (bContent && pConfig->bLocale) + ){ + bDecodeBlob = 1; + } + } + + if( bDecodeBlob ){ + const int SZHDR = sizeof(FTS5_LOCALE_HEADER)-1; + const u8 *pBlob = sqlite3_value_blob(pVal); + int nBlob = sqlite3_value_bytes(pVal); + + /* Unless this blob was read from the %_content table of an + ** FTS5_CONTENT_NORMAL table, it should have the 4 byte fts5_locale() + ** header. Check for this. If it is not found, return an error. */ + if( (!bContent || pConfig->eContent!=FTS5_CONTENT_NORMAL) ){ + if( nBlob<SZHDR || memcmp(FTS5_LOCALE_HEADER, pBlob, SZHDR) ){ + rc = SQLITE_ERROR; + }else{ + pBlob += 4; + nBlob -= 4; + } + } + + if( rc==SQLITE_OK ){ + int nLocale = 0; + + for(nLocale=0; nLocale<nBlob; nLocale++){ + if( pBlob[nLocale]==0x00 ) break; + } + if( nLocale==nBlob || nLocale==0 ){ + rc = SQLITE_ERROR; + }else{ + pText = (const char*)&pBlob[nLocale+1]; + nText = nBlob-nLocale-1; + + if( pbResetTokenizer ){ + fts5SetLocale(pConfig, (const char*)pBlob, nLocale); + *pbResetTokenizer = 1; + } + } + } + + }else{ + pText = (const char*)sqlite3_value_text(pVal); + nText = sqlite3_value_bytes(pVal); + } + + *ppText = pText; + *pnText = nText; + return rc; +} + +/* +** Argument pVal is the text of a full-text search expression. It may or +** may not have been wrapped by fts5_locale(). This function extracts +** the text of the expression, and sets output variable (*pzText) to +** point to a nul-terminated buffer containing the expression. +** +** If pVal was an fts5_locale() value, then fts5SetLocale() is called to +** set the tokenizer to use the specified locale. +** +** If output variable (*pbFreeAndReset) is set to true, then the caller +** is required to (a) call sqlite3Fts5ClearLocale() to reset the tokenizer +** locale, and (b) call sqlite3_free() to free (*pzText). +*/ +static int fts5ExtractExprText( + Fts5Config *pConfig, /* Fts5 configuration */ + sqlite3_value *pVal, /* Value to extract expression text from */ + char **pzText, /* OUT: nul-terminated buffer of text */ + int *pbFreeAndReset /* OUT: Free (*pzText) and clear locale */ +){ + const char *zText = 0; + int nText = 0; + int rc = SQLITE_OK; + int bReset = 0; + + *pbFreeAndReset = 0; + rc = sqlite3Fts5ExtractText(pConfig, pVal, 0, &bReset, &zText, &nText); + if( rc==SQLITE_OK ){ + if( bReset ){ + *pzText = sqlite3Fts5Mprintf(&rc, "%.*s", nText, zText); + if( rc!=SQLITE_OK ){ + sqlite3Fts5ClearLocale(pConfig); + }else{ + *pbFreeAndReset = 1; + } + }else{ + *pzText = (char*)zText; + } + } + + return rc; +} + /* ** This is the xFilter interface for the virtual table. See @@ -1263,13 +1468,7 @@ static int fts5FilterMethod( int iIdxStr = 0; Fts5Expr *pExpr = 0; - if( pConfig->bLock ){ - pTab->p.base.zErrMsg = sqlite3_mprintf( - "recursively defined fts5 content table" - ); - return SQLITE_ERROR; - } - + assert( pConfig->bLock==0 ); if( pCsr->ePlan ){ fts5FreeCursorComponents(pCsr); memset(&pCsr->ePlan, 0, sizeof(Fts5Cursor) - ((u8*)&pCsr->ePlan-(u8*)pCsr)); @@ -1293,8 +1492,14 @@ static int fts5FilterMethod( pRank = apVal[i]; break; case 'M': { - const char *zText = (const char*)sqlite3_value_text(apVal[i]); + char *zText = 0; + int bFreeAndReset = 0; + int bInternal = 0; + + rc = fts5ExtractExprText(pConfig, apVal[i], &zText, &bFreeAndReset); + if( rc!=SQLITE_OK ) goto filter_out; if( zText==0 ) zText = ""; + iCol = 0; do{ iCol = iCol*10 + (idxStr[iIdxStr]-'0'); @@ -1306,7 +1511,7 @@ static int fts5FilterMethod( ** indicates that the MATCH expression is not a full text query, ** but a request for an internal parameter. */ rc = fts5SpecialMatch(pTab, pCsr, &zText[1]); - goto filter_out; + bInternal = 1; }else{ char **pzErr = &pTab->p.base.zErrMsg; rc = sqlite3Fts5ExprNew(pConfig, 0, iCol, zText, &pExpr, pzErr); @@ -1314,9 +1519,15 @@ static int fts5FilterMethod( rc = sqlite3Fts5ExprAnd(&pCsr->pExpr, pExpr); pExpr = 0; } - if( rc!=SQLITE_OK ) goto filter_out; } + if( bFreeAndReset ){ + sqlite3_free(zText); + sqlite3Fts5ClearLocale(pConfig); + } + + if( bInternal || rc!=SQLITE_OK ) goto filter_out; + break; } case 'L': @@ -1624,7 +1835,7 @@ static int fts5SpecialDelete( int eType1 = sqlite3_value_type(apVal[1]); if( eType1==SQLITE_INTEGER ){ sqlite3_int64 iDel = sqlite3_value_int64(apVal[1]); - rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, &apVal[2]); + rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, &apVal[2], 0); } return rc; } @@ -1748,7 +1959,7 @@ static int fts5UpdateMethod( /* DELETE */ else if( nArg==1 ){ i64 iDel = sqlite3_value_int64(apVal[0]); /* Rowid to delete */ - rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, 0); + rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, 0, 0); bUpdateOrDelete = 1; } @@ -1756,16 +1967,31 @@ static int fts5UpdateMethod( else{ int eType1 = sqlite3_value_numeric_type(apVal[1]); - if( eType1!=SQLITE_INTEGER && eType1!=SQLITE_NULL ){ - rc = SQLITE_MISMATCH; + /* Ensure that no fts5_locale() values are written to locale=0 tables. + ** And that no blobs except fts5_locale() blobs are written to indexed + ** (i.e. not UNINDEXED) columns of locale=1 tables. */ + int ii; + for(ii=0; ii<pConfig->nCol; ii++){ + if( sqlite3_value_type(apVal[ii+2])==SQLITE_BLOB ){ + int bSub = (sqlite3_value_subtype(apVal[ii+2])==FTS5_LOCALE_SUBTYPE); + if( (pConfig->bLocale && !bSub && pConfig->abUnindexed[ii]==0) + || (pConfig->bLocale==0 && bSub) + ){ + if( pConfig->bLocale==0 ){ + fts5SetVtabError(pTab, "fts5_locale() requires locale=1"); + } + rc = SQLITE_MISMATCH; + goto update_out; + } + } } - else if( eType0!=SQLITE_INTEGER ){ + if( eType0!=SQLITE_INTEGER ){ /* An INSERT statement. If the conflict-mode is REPLACE, first remove ** the current entry (if any). */ if( eConflict==SQLITE_REPLACE && eType1==SQLITE_INTEGER ){ i64 iNew = sqlite3_value_int64(apVal[1]); /* Rowid to delete */ - rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0); + rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0, 0); bUpdateOrDelete = 1; } fts5StorageInsert(&rc, pTab, apVal, pRowid); @@ -1775,28 +2001,35 @@ static int fts5UpdateMethod( else{ i64 iOld = sqlite3_value_int64(apVal[0]); /* Old rowid */ i64 iNew = sqlite3_value_int64(apVal[1]); /* New rowid */ - if( eType1==SQLITE_INTEGER && iOld!=iNew ){ + if( eType1!=SQLITE_INTEGER ){ + rc = SQLITE_MISMATCH; + }else if( iOld!=iNew ){ if( eConflict==SQLITE_REPLACE ){ - rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0); + rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0, 1); if( rc==SQLITE_OK ){ - rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0); + rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0, 0); } fts5StorageInsert(&rc, pTab, apVal, pRowid); }else{ - rc = sqlite3Fts5StorageContentInsert(pTab->pStorage, apVal, pRowid); + rc = sqlite3Fts5StorageFindDeleteRow(pTab->pStorage, iOld); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5StorageContentInsert(pTab->pStorage,apVal,pRowid); + } if( rc==SQLITE_OK ){ - rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0); + rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0, 1); } if( rc==SQLITE_OK ){ rc = sqlite3Fts5StorageIndexInsert(pTab->pStorage, apVal,*pRowid); } } }else{ - rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0); + rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0, 1); fts5StorageInsert(&rc, pTab, apVal, pRowid); } bUpdateOrDelete = 1; + sqlite3Fts5StorageReleaseDeleteRow(pTab->pStorage); } + } } @@ -1813,6 +2046,7 @@ static int fts5UpdateMethod( } } + update_out: pTab->p.pConfig->pzErrmsg = 0; return rc; } @@ -1890,17 +2124,40 @@ static int fts5ApiRowCount(Fts5Context *pCtx, i64 *pnRow){ return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow); } -static int fts5ApiTokenize( +/* +** Implementation of xTokenize_v2() API. +*/ +static int fts5ApiTokenize_v2( Fts5Context *pCtx, const char *pText, int nText, + const char *pLoc, int nLoc, void *pUserData, int (*xToken)(void*, int, const char*, int, int, int) ){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); - return sqlite3Fts5Tokenize( - pTab->pConfig, FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken + int rc = SQLITE_OK; + + fts5SetLocale(pTab->pConfig, pLoc, nLoc); + rc = sqlite3Fts5Tokenize(pTab->pConfig, + FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken ); + fts5SetLocale(pTab->pConfig, 0, 0); + + return rc; +} + +/* +** Implementation of xTokenize() API. This is just xTokenize_v2() with NULL/0 +** passed as the locale. +*/ +static int fts5ApiTokenize( + Fts5Context *pCtx, + const char *pText, int nText, + void *pUserData, + int (*xToken)(void*, int, const char*, int, int, int) +){ + return fts5ApiTokenize_v2(pCtx, pText, nText, 0, 0, pUserData, xToken); } static int fts5ApiPhraseCount(Fts5Context *pCtx){ @@ -1922,28 +2179,37 @@ static int fts5ApiColumnText( int rc = SQLITE_OK; Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); + + assert( pCsr->ePlan!=FTS5_PLAN_SPECIAL ); if( iCol<0 || iCol>=pTab->pConfig->nCol ){ rc = SQLITE_RANGE; - }else if( fts5IsContentless((Fts5FullTable*)(pCsr->base.pVtab)) - || pCsr->ePlan==FTS5_PLAN_SPECIAL - ){ + }else if( fts5IsContentless((Fts5FullTable*)(pCsr->base.pVtab)) ){ *pz = 0; *pn = 0; }else{ rc = fts5SeekCursor(pCsr, 0); if( rc==SQLITE_OK ){ - *pz = (const char*)sqlite3_column_text(pCsr->pStmt, iCol+1); - *pn = sqlite3_column_bytes(pCsr->pStmt, iCol+1); + Fts5Config *pConfig = pTab->pConfig; + int bContent = (pConfig->abUnindexed[iCol]==0); + sqlite3_value *pVal = sqlite3_column_value(pCsr->pStmt, iCol+1); + sqlite3Fts5ExtractText(pConfig, pVal, bContent, 0, pz, pn); } } return rc; } +/* +** This is called by various API functions - xInst, xPhraseFirst, +** xPhraseFirstColumn etc. - to obtain the position list for phrase iPhrase +** of the current row. This function works for both detail=full tables (in +** which case the position-list was read from the fts index) or for other +** detail= modes if the row content is available. +*/ static int fts5CsrPoslist( - Fts5Cursor *pCsr, - int iPhrase, - const u8 **pa, - int *pn + Fts5Cursor *pCsr, /* Fts5 cursor object */ + int iPhrase, /* Phrase to find position list for */ + const u8 **pa, /* OUT: Pointer to position list buffer */ + int *pn /* OUT: Size of (*pa) in bytes */ ){ Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig; int rc = SQLITE_OK; @@ -1951,20 +2217,34 @@ static int fts5CsrPoslist( if( iPhrase<0 || iPhrase>=sqlite3Fts5ExprPhraseCount(pCsr->pExpr) ){ rc = SQLITE_RANGE; + }else if( pConfig->eDetail!=FTS5_DETAIL_FULL + && pConfig->eContent==FTS5_CONTENT_NONE + ){ + *pa = 0; + *pn = 0; + return SQLITE_OK; }else if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_POSLIST) ){ if( pConfig->eDetail!=FTS5_DETAIL_FULL ){ Fts5PoslistPopulator *aPopulator; int i; + aPopulator = sqlite3Fts5ExprClearPoslists(pCsr->pExpr, bLive); if( aPopulator==0 ) rc = SQLITE_NOMEM; + if( rc==SQLITE_OK ){ + rc = fts5SeekCursor(pCsr, 0); + } for(i=0; i<pConfig->nCol && rc==SQLITE_OK; i++){ - int n; const char *z; - rc = fts5ApiColumnText((Fts5Context*)pCsr, i, &z, &n); + sqlite3_value *pVal = sqlite3_column_value(pCsr->pStmt, i+1); + const char *z = 0; + int n = 0; + int bReset = 0; + rc = sqlite3Fts5ExtractText(pConfig, pVal, 1, &bReset, &z, &n); if( rc==SQLITE_OK ){ rc = sqlite3Fts5ExprPopulatePoslists( pConfig, pCsr->pExpr, aPopulator, i, z, n ); } + if( bReset ) sqlite3Fts5ClearLocale(pConfig); } sqlite3_free(aPopulator); @@ -1989,7 +2269,6 @@ static int fts5CsrPoslist( *pn = 0; } - return rc; } @@ -2058,7 +2337,8 @@ static int fts5CacheInstArray(Fts5Cursor *pCsr){ aInst[0] = iBest; aInst[1] = FTS5_POS2COLUMN(aIter[iBest].iPos); aInst[2] = FTS5_POS2OFFSET(aIter[iBest].iPos); - if( aInst[1]<0 || aInst[1]>=nCol ){ + assert( aInst[1]>=0 ); + if( aInst[1]>=nCol ){ rc = FTS5_CORRUPT; break; } @@ -2145,16 +2425,21 @@ static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){ } }else{ int i; + rc = fts5SeekCursor(pCsr, 0); for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){ if( pConfig->abUnindexed[i]==0 ){ - const char *z; int n; - void *p = (void*)(&pCsr->aColumnSize[i]); + const char *z = 0; + int n = 0; + int bReset = 0; + sqlite3_value *pVal = sqlite3_column_value(pCsr->pStmt, i+1); + pCsr->aColumnSize[i] = 0; - rc = fts5ApiColumnText(pCtx, i, &z, &n); + rc = sqlite3Fts5ExtractText(pConfig, pVal, 1, &bReset, &z, &n); if( rc==SQLITE_OK ){ - rc = sqlite3Fts5Tokenize( - pConfig, FTS5_TOKENIZE_AUX, z, n, p, fts5ColumnSizeCb + rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_AUX, + z, n, (void*)&pCsr->aColumnSize[i], fts5ColumnSizeCb ); + if( bReset ) sqlite3Fts5ClearLocale(pConfig); } } } @@ -2401,8 +2686,71 @@ static int fts5ApiQueryPhrase(Fts5Context*, int, void*, int(*)(const Fts5ExtensionApi*, Fts5Context*, void*) ); +/* +** The xColumnLocale() API. +*/ +static int fts5ApiColumnLocale( + Fts5Context *pCtx, + int iCol, + const char **pzLocale, + int *pnLocale +){ + int rc = SQLITE_OK; + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig; + + *pzLocale = 0; + *pnLocale = 0; + + assert( pCsr->ePlan!=FTS5_PLAN_SPECIAL ); + if( iCol<0 || iCol>=pConfig->nCol ){ + rc = SQLITE_RANGE; + }else if( + pConfig->abUnindexed[iCol]==0 + && pConfig->eContent!=FTS5_CONTENT_NONE + && pConfig->bLocale + ){ + rc = fts5SeekCursor(pCsr, 0); + if( rc==SQLITE_OK ){ + /* Load the value into pVal. pVal is a locale/text pair iff: + ** + ** 1) It is an SQLITE_BLOB, and + ** 2) Either the subtype is FTS5_LOCALE_SUBTYPE, or else the + ** value was loaded from an FTS5_CONTENT_NORMAL table, and + ** 3) It does not begin with an 0x00 byte. + */ + sqlite3_value *pVal = sqlite3_column_value(pCsr->pStmt, iCol+1); + if( sqlite3_value_type(pVal)==SQLITE_BLOB ){ + const u8 *pBlob = (const u8*)sqlite3_value_blob(pVal); + int nBlob = sqlite3_value_bytes(pVal); + if( pConfig->eContent==FTS5_CONTENT_EXTERNAL ){ + const int SZHDR = sizeof(FTS5_LOCALE_HEADER)-1; + if( nBlob<SZHDR || memcmp(FTS5_LOCALE_HEADER, pBlob, SZHDR) ){ + rc = SQLITE_ERROR; + } + pBlob += 4; + nBlob -= 4; + } + if( rc==SQLITE_OK ){ + int nLocale = 0; + for(nLocale=0; nLocale<nBlob && pBlob[nLocale]!=0x00; nLocale++); + if( nLocale==nBlob || nLocale==0 ){ + rc = SQLITE_ERROR; + }else{ + /* A locale/text pair */ + *pzLocale = (const char*)pBlob; + *pnLocale = nLocale; + } + } + } + } + } + + return rc; +} + static const Fts5ExtensionApi sFts5Api = { - 3, /* iVersion */ + 4, /* iVersion */ fts5ApiUserData, fts5ApiColumnCount, fts5ApiRowCount, @@ -2423,7 +2771,9 @@ static const Fts5ExtensionApi sFts5Api = { fts5ApiPhraseFirstColumn, fts5ApiPhraseNextColumn, fts5ApiQueryToken, - fts5ApiInstToken + fts5ApiInstToken, + fts5ApiColumnLocale, + fts5ApiTokenize_v2 }; /* @@ -2474,6 +2824,7 @@ static void fts5ApiInvoke( sqlite3_value **argv ){ assert( pCsr->pAux==0 ); + assert( pCsr->ePlan!=FTS5_PLAN_SPECIAL ); pCsr->pAux = pAux; pAux->xFunc(&sFts5Api, (Fts5Context*)pCsr, context, argc, argv); pCsr->pAux = 0; @@ -2487,6 +2838,21 @@ static Fts5Cursor *fts5CursorFromCsrid(Fts5Global *pGlobal, i64 iCsrId){ return pCsr; } +/* +** Parameter zFmt is a printf() style formatting string. This function +** formats it using the trailing arguments and returns the result as +** an error message to the context passed as the first argument. +*/ +static void fts5ResultError(sqlite3_context *pCtx, const char *zFmt, ...){ + char *zErr = 0; + va_list ap; + va_start(ap, zFmt); + zErr = sqlite3_vmprintf(zFmt, ap); + sqlite3_result_error(pCtx, zErr, -1); + sqlite3_free(zErr); + va_end(ap); +} + static void fts5ApiCallback( sqlite3_context *context, int argc, @@ -2502,10 +2868,8 @@ static void fts5ApiCallback( iCsrId = sqlite3_value_int64(argv[0]); pCsr = fts5CursorFromCsrid(pAux->pGlobal, iCsrId); - if( pCsr==0 || pCsr->ePlan==0 ){ - char *zErr = sqlite3_mprintf("no such cursor: %lld", iCsrId); - sqlite3_result_error(context, zErr, -1); - sqlite3_free(zErr); + if( pCsr==0 || (pCsr->ePlan==0 || pCsr->ePlan==FTS5_PLAN_SPECIAL) ){ + fts5ResultError(context, "no such cursor: %lld", iCsrId); }else{ sqlite3_vtab *pTab = pCsr->base.pVtab; fts5ApiInvoke(pAux, pCsr, context, argc-1, &argv[1]); @@ -2599,6 +2963,57 @@ static int fts5PoslistBlob(sqlite3_context *pCtx, Fts5Cursor *pCsr){ return rc; } +/* +** Value pVal was read from column iCol of the FTS5 table. This function +** returns it to the owner of pCtx via a call to an sqlite3_result_xxx() +** function. This function deals with the same cases as +** sqlite3Fts5ExtractText(): +** +** 1) Ordinary values. These can be returned using sqlite3_result_value(). +** +** 2) Blobs from fts5_locale(). The text is extracted from these and +** returned via sqlite3_result_text(). The locale is discarded. +*/ +static void fts5ExtractValueFromColumn( + sqlite3_context *pCtx, + Fts5Config *pConfig, + int iCol, + sqlite3_value *pVal +){ + assert( pConfig->eContent!=FTS5_CONTENT_NONE ); + + if( pConfig->bLocale + && sqlite3_value_type(pVal)==SQLITE_BLOB + && pConfig->abUnindexed[iCol]==0 + ){ + const int SZHDR = sizeof(FTS5_LOCALE_HEADER)-1; + const u8 *pBlob = sqlite3_value_blob(pVal); + int nBlob = sqlite3_value_bytes(pVal); + int ii; + + if( pConfig->eContent==FTS5_CONTENT_EXTERNAL ){ + if( nBlob<SZHDR || memcmp(pBlob, FTS5_LOCALE_HEADER, SZHDR) ){ + sqlite3_result_error_code(pCtx, SQLITE_ERROR); + return; + }else{ + pBlob += 4; + nBlob -= 4; + } + } + + for(ii=0; ii<nBlob && pBlob[ii]; ii++); + if( ii==0 || ii==nBlob ){ + sqlite3_result_error_code(pCtx, SQLITE_ERROR); + }else{ + const char *pText = (const char*)&pBlob[ii+1]; + sqlite3_result_text(pCtx, pText, nBlob-ii-1, SQLITE_TRANSIENT); + } + return; + } + + sqlite3_result_value(pCtx, pVal); +} + /* ** This is the xColumn method, called by SQLite to request a value from ** the row that the supplied cursor currently points to. @@ -2628,8 +3043,8 @@ static int fts5ColumnMethod( ** auxiliary function. */ sqlite3_result_int64(pCtx, pCsr->iCsrId); }else if( iCol==pConfig->nCol+1 ){ - /* The value of the "rank" column. */ + if( pCsr->ePlan==FTS5_PLAN_SOURCE ){ fts5PoslistBlob(pCtx, pCsr); }else if( @@ -2640,20 +3055,27 @@ static int fts5ColumnMethod( fts5ApiInvoke(pCsr->pRank, pCsr, pCtx, pCsr->nRankArg, pCsr->apRankArg); } } - }else if( !fts5IsContentless(pTab) ){ - pConfig->pzErrmsg = &pTab->p.base.zErrMsg; - rc = fts5SeekCursor(pCsr, 1); - if( rc==SQLITE_OK ){ - sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1)); + }else{ + /* A column created by the user containing values. */ + int bNochange = sqlite3_vtab_nochange(pCtx); + + if( fts5IsContentless(pTab) ){ + if( bNochange && pConfig->bContentlessDelete ){ + fts5ResultError(pCtx, "cannot UPDATE a subset of " + "columns on fts5 contentless-delete table: %s", pConfig->zName + ); + } + }else if( bNochange==0 || pConfig->eContent!=FTS5_CONTENT_NORMAL ){ + pConfig->pzErrmsg = &pTab->p.base.zErrMsg; + rc = fts5SeekCursor(pCsr, 1); + if( rc==SQLITE_OK ){ + sqlite3_value *pVal = sqlite3_column_value(pCsr->pStmt, iCol+1); + fts5ExtractValueFromColumn(pCtx, pConfig, iCol, pVal); + } + pConfig->pzErrmsg = 0; } - pConfig->pzErrmsg = 0; - }else if( pConfig->bContentlessDelete && sqlite3_vtab_nochange(pCtx) ){ - char *zErr = sqlite3_mprintf("cannot UPDATE a subset of " - "columns on fts5 contentless-delete table: %s", pConfig->zName - ); - sqlite3_result_error(pCtx, zErr, -1); - sqlite3_free(zErr); } + return rc; } @@ -2793,47 +3215,208 @@ static int fts5CreateAux( } /* -** Register a new tokenizer. This is the implementation of the -** fts5_api.xCreateTokenizer() method. +** This function is used by xCreateTokenizer_v2() and xCreateTokenizer(). +** It allocates and partially populates a new Fts5TokenizerModule object. +** The new object is already linked into the Fts5Global context before +** returning. +** +** If successful, SQLITE_OK is returned and a pointer to the new +** Fts5TokenizerModule object returned via output parameter (*ppNew). All +** that is required is for the caller to fill in the methods in +** Fts5TokenizerModule.x1 and x2, and to set Fts5TokenizerModule.bV2Native +** as appropriate. +** +** If an error occurs, an SQLite error code is returned and the final value +** of (*ppNew) undefined. */ -static int fts5CreateTokenizer( - fts5_api *pApi, /* Global context (one per db handle) */ +static int fts5NewTokenizerModule( + Fts5Global *pGlobal, /* Global context (one per db handle) */ const char *zName, /* Name of new function */ void *pUserData, /* User data for aux. function */ - fts5_tokenizer *pTokenizer, /* Tokenizer implementation */ - void(*xDestroy)(void*) /* Destructor for pUserData */ + void(*xDestroy)(void*), /* Destructor for pUserData */ + Fts5TokenizerModule **ppNew ){ - Fts5Global *pGlobal = (Fts5Global*)pApi; - Fts5TokenizerModule *pNew; - sqlite3_int64 nName; /* Size of zName and its \0 terminator */ - sqlite3_int64 nByte; /* Bytes of space to allocate */ int rc = SQLITE_OK; + Fts5TokenizerModule *pNew; + sqlite3_int64 nName; /* Size of zName and its \0 terminator */ + sqlite3_int64 nByte; /* Bytes of space to allocate */ nName = strlen(zName) + 1; nByte = sizeof(Fts5TokenizerModule) + nName; - pNew = (Fts5TokenizerModule*)sqlite3_malloc64(nByte); + *ppNew = pNew = (Fts5TokenizerModule*)sqlite3Fts5MallocZero(&rc, nByte); if( pNew ){ - memset(pNew, 0, (size_t)nByte); pNew->zName = (char*)&pNew[1]; memcpy(pNew->zName, zName, nName); pNew->pUserData = pUserData; - pNew->x = *pTokenizer; pNew->xDestroy = xDestroy; pNew->pNext = pGlobal->pTok; pGlobal->pTok = pNew; if( pNew->pNext==0 ){ pGlobal->pDfltTok = pNew; } + } + + return rc; +} + +/* +** An instance of this type is used as the Fts5Tokenizer object for +** wrapper tokenizers - those that provide access to a v1 tokenizer via +** the fts5_tokenizer_v2 API, and those that provide access to a v2 tokenizer +** via the fts5_tokenizer API. +*/ +typedef struct Fts5VtoVTokenizer Fts5VtoVTokenizer; +struct Fts5VtoVTokenizer { + Fts5TokenizerModule *pMod; + Fts5Tokenizer *pReal; +}; + +/* +** Create a wrapper tokenizer. The context argument pCtx points to the +** Fts5TokenizerModule object. +*/ +static int fts5VtoVCreate( + void *pCtx, + const char **azArg, + int nArg, + Fts5Tokenizer **ppOut +){ + Fts5TokenizerModule *pMod = (Fts5TokenizerModule*)pCtx; + Fts5VtoVTokenizer *pNew = 0; + int rc = SQLITE_OK; + + pNew = (Fts5VtoVTokenizer*)sqlite3Fts5MallocZero(&rc, sizeof(*pNew)); + if( rc==SQLITE_OK ){ + pNew->pMod = pMod; + if( pMod->bV2Native ){ + rc = pMod->x2.xCreate(pMod->pUserData, azArg, nArg, &pNew->pReal); + }else{ + rc = pMod->x1.xCreate(pMod->pUserData, azArg, nArg, &pNew->pReal); + } + if( rc!=SQLITE_OK ){ + sqlite3_free(pNew); + pNew = 0; + } + } + + *ppOut = (Fts5Tokenizer*)pNew; + return rc; +} + +/* +** Delete an Fts5VtoVTokenizer wrapper tokenizer. +*/ +static void fts5VtoVDelete(Fts5Tokenizer *pTok){ + Fts5VtoVTokenizer *p = (Fts5VtoVTokenizer*)pTok; + if( p ){ + Fts5TokenizerModule *pMod = p->pMod; + if( pMod->bV2Native ){ + pMod->x2.xDelete(p->pReal); + }else{ + pMod->x1.xDelete(p->pReal); + } + sqlite3_free(p); + } +} + + +/* +** xTokenizer method for a wrapper tokenizer that offers the v1 interface +** (no support for locales). +*/ +static int fts5V1toV2Tokenize( + Fts5Tokenizer *pTok, + void *pCtx, int flags, + const char *pText, int nText, + int (*xToken)(void*, int, const char*, int, int, int) +){ + Fts5VtoVTokenizer *p = (Fts5VtoVTokenizer*)pTok; + Fts5TokenizerModule *pMod = p->pMod; + assert( pMod->bV2Native ); + return pMod->x2.xTokenize(p->pReal, pCtx, flags, pText, nText, 0, 0, xToken); +} + +/* +** xTokenizer method for a wrapper tokenizer that offers the v2 interface +** (with locale support). +*/ +static int fts5V2toV1Tokenize( + Fts5Tokenizer *pTok, + void *pCtx, int flags, + const char *pText, int nText, + const char *pLocale, int nLocale, + int (*xToken)(void*, int, const char*, int, int, int) +){ + Fts5VtoVTokenizer *p = (Fts5VtoVTokenizer*)pTok; + Fts5TokenizerModule *pMod = p->pMod; + assert( pMod->bV2Native==0 ); + return pMod->x1.xTokenize(p->pReal, pCtx, flags, pText, nText, xToken); +} + +/* +** Register a new tokenizer. This is the implementation of the +** fts5_api.xCreateTokenizer_v2() method. +*/ +static int fts5CreateTokenizer_v2( + fts5_api *pApi, /* Global context (one per db handle) */ + const char *zName, /* Name of new function */ + void *pUserData, /* User data for aux. function */ + fts5_tokenizer_v2 *pTokenizer, /* Tokenizer implementation */ + void(*xDestroy)(void*) /* Destructor for pUserData */ +){ + Fts5Global *pGlobal = (Fts5Global*)pApi; + int rc = SQLITE_OK; + + if( pTokenizer->iVersion>2 ){ + rc = SQLITE_ERROR; }else{ - rc = SQLITE_NOMEM; + Fts5TokenizerModule *pNew = 0; + rc = fts5NewTokenizerModule(pGlobal, zName, pUserData, xDestroy, &pNew); + if( pNew ){ + pNew->x2 = *pTokenizer; + pNew->bV2Native = 1; + pNew->x1.xCreate = fts5VtoVCreate; + pNew->x1.xTokenize = fts5V1toV2Tokenize; + pNew->x1.xDelete = fts5VtoVDelete; + } } return rc; } +/* +** The fts5_api.xCreateTokenizer() method. +*/ +static int fts5CreateTokenizer( + fts5_api *pApi, /* Global context (one per db handle) */ + const char *zName, /* Name of new function */ + void *pUserData, /* User data for aux. function */ + fts5_tokenizer *pTokenizer, /* Tokenizer implementation */ + void(*xDestroy)(void*) /* Destructor for pUserData */ +){ + Fts5TokenizerModule *pNew = 0; + int rc = SQLITE_OK; + + rc = fts5NewTokenizerModule( + (Fts5Global*)pApi, zName, pUserData, xDestroy, &pNew + ); + if( pNew ){ + pNew->x1 = *pTokenizer; + pNew->x2.xCreate = fts5VtoVCreate; + pNew->x2.xTokenize = fts5V2toV1Tokenize; + pNew->x2.xDelete = fts5VtoVDelete; + } + return rc; +} + +/* +** Search the global context passed as the first argument for a tokenizer +** module named zName. If found, return a pointer to the Fts5TokenizerModule +** object. Otherwise, return NULL. +*/ static Fts5TokenizerModule *fts5LocateTokenizer( - Fts5Global *pGlobal, - const char *zName + Fts5Global *pGlobal, /* Global (one per db handle) object */ + const char *zName /* Name of tokenizer module to find */ ){ Fts5TokenizerModule *pMod = 0; @@ -2850,6 +3433,36 @@ static Fts5TokenizerModule *fts5LocateTokenizer( /* ** Find a tokenizer. This is the implementation of the +** fts5_api.xFindTokenizer_v2() method. +*/ +static int fts5FindTokenizer_v2( + fts5_api *pApi, /* Global context (one per db handle) */ + const char *zName, /* Name of tokenizer */ + void **ppUserData, + fts5_tokenizer_v2 **ppTokenizer /* Populate this object */ +){ + int rc = SQLITE_OK; + Fts5TokenizerModule *pMod; + + pMod = fts5LocateTokenizer((Fts5Global*)pApi, zName); + if( pMod ){ + if( pMod->bV2Native ){ + *ppUserData = pMod->pUserData; + }else{ + *ppUserData = (void*)pMod; + } + *ppTokenizer = &pMod->x2; + }else{ + *ppTokenizer = 0; + *ppUserData = 0; + rc = SQLITE_ERROR; + } + + return rc; +} + +/* +** Find a tokenizer. This is the implementation of the ** fts5_api.xFindTokenizer() method. */ static int fts5FindTokenizer( @@ -2863,66 +3476,75 @@ static int fts5FindTokenizer( pMod = fts5LocateTokenizer((Fts5Global*)pApi, zName); if( pMod ){ - *pTokenizer = pMod->x; - *ppUserData = pMod->pUserData; + if( pMod->bV2Native==0 ){ + *ppUserData = pMod->pUserData; + }else{ + *ppUserData = (void*)pMod; + } + *pTokenizer = pMod->x1; }else{ - memset(pTokenizer, 0, sizeof(fts5_tokenizer)); + memset(pTokenizer, 0, sizeof(*pTokenizer)); + *ppUserData = 0; rc = SQLITE_ERROR; } return rc; } -int fts5GetTokenizer( - Fts5Global *pGlobal, - const char **azArg, - int nArg, - Fts5Config *pConfig, - char **pzErr -){ - Fts5TokenizerModule *pMod; +/* +** Attempt to instantiate the tokenizer. +*/ +int sqlite3Fts5LoadTokenizer(Fts5Config *pConfig){ + const char **azArg = pConfig->t.azArg; + const int nArg = pConfig->t.nArg; + Fts5TokenizerModule *pMod = 0; int rc = SQLITE_OK; - pMod = fts5LocateTokenizer(pGlobal, nArg==0 ? 0 : azArg[0]); + pMod = fts5LocateTokenizer(pConfig->pGlobal, nArg==0 ? 0 : azArg[0]); if( pMod==0 ){ assert( nArg>0 ); rc = SQLITE_ERROR; - if( pzErr ) *pzErr = sqlite3_mprintf("no such tokenizer: %s", azArg[0]); + sqlite3Fts5ConfigErrmsg(pConfig, "no such tokenizer: %s", azArg[0]); }else{ - rc = pMod->x.xCreate( - pMod->pUserData, (azArg?&azArg[1]:0), (nArg?nArg-1:0), &pConfig->t.pTok + int (*xCreate)(void*, const char**, int, Fts5Tokenizer**) = 0; + if( pMod->bV2Native ){ + xCreate = pMod->x2.xCreate; + pConfig->t.pApi2 = &pMod->x2; + }else{ + pConfig->t.pApi1 = &pMod->x1; + xCreate = pMod->x1.xCreate; + } + + rc = xCreate(pMod->pUserData, + (azArg?&azArg[1]:0), (nArg?nArg-1:0), &pConfig->t.pTok ); - pConfig->t.pTokApi = &pMod->x; + if( rc!=SQLITE_OK ){ - if( pzErr && rc!=SQLITE_NOMEM ){ - *pzErr = sqlite3_mprintf("error in tokenizer constructor"); + if( rc!=SQLITE_NOMEM ){ + sqlite3Fts5ConfigErrmsg(pConfig, "error in tokenizer constructor"); } - }else{ + }else if( pMod->bV2Native==0 ){ pConfig->t.ePattern = sqlite3Fts5TokenizerPattern( - pMod->x.xCreate, pConfig->t.pTok + pMod->x1.xCreate, pConfig->t.pTok ); } } if( rc!=SQLITE_OK ){ - pConfig->t.pTokApi = 0; + pConfig->t.pApi1 = 0; + pConfig->t.pApi2 = 0; pConfig->t.pTok = 0; } return rc; } + /* -** Attempt to instantiate the tokenizer. +** xDestroy callback passed to sqlite3_create_module(). This is invoked +** when the db handle is being closed. Free memory associated with +** tokenizers and aux functions registered with this db handle. */ -int sqlite3Fts5LoadTokenizer(Fts5Config *pConfig){ - return fts5GetTokenizer( - pConfig->pGlobal, pConfig->t.azArg, pConfig->t.nArg, - pConfig, pConfig->pzErrmsg - ); -} - - static void fts5ModuleDestroy(void *pCtx){ Fts5TokenizerModule *pTok, *pNextTok; Fts5Auxiliary *pAux, *pNextAux; @@ -2943,6 +3565,10 @@ static void fts5ModuleDestroy(void *pCtx){ sqlite3_free(pGlobal); } +/* +** Implementation of the fts5() function used by clients to obtain the +** API pointer. +*/ static void fts5Fts5Func( sqlite3_context *pCtx, /* Function call context */ int nArg, /* Number of args */ @@ -2970,6 +3596,69 @@ static void fts5SourceIdFunc( } /* +** Implementation of fts5_locale(LOCALE, TEXT) function. +** +** If parameter LOCALE is NULL, or a zero-length string, then a copy of +** TEXT is returned. Otherwise, both LOCALE and TEXT are interpreted as +** text, and the value returned is a blob consisting of: +** +** * The 4 bytes 0x00, 0xE0, 0xB2, 0xEb (FTS5_LOCALE_HEADER). +** * The LOCALE, as utf-8 text, followed by +** * 0x00, followed by +** * The TEXT, as utf-8 text. +** +** There is no final nul-terminator following the TEXT value. +*/ +static void fts5LocaleFunc( + sqlite3_context *pCtx, /* Function call context */ + int nArg, /* Number of args */ + sqlite3_value **apArg /* Function arguments */ +){ + const char *zLocale = 0; + int nLocale = 0; + const char *zText = 0; + int nText = 0; + + assert( nArg==2 ); + UNUSED_PARAM(nArg); + + zLocale = (const char*)sqlite3_value_text(apArg[0]); + nLocale = sqlite3_value_bytes(apArg[0]); + + zText = (const char*)sqlite3_value_text(apArg[1]); + nText = sqlite3_value_bytes(apArg[1]); + + if( zLocale==0 || zLocale[0]=='\0' ){ + sqlite3_result_text(pCtx, zText, nText, SQLITE_TRANSIENT); + }else{ + u8 *pBlob = 0; + u8 *pCsr = 0; + int nBlob = 0; + const int nHdr = 4; + assert( sizeof(FTS5_LOCALE_HEADER)==nHdr+1 ); + + nBlob = nHdr + nLocale + 1 + nText; + pBlob = (u8*)sqlite3_malloc(nBlob); + if( pBlob==0 ){ + sqlite3_result_error_nomem(pCtx); + return; + } + + pCsr = pBlob; + memcpy(pCsr, FTS5_LOCALE_HEADER, nHdr); + pCsr += nHdr; + memcpy(pCsr, zLocale, nLocale); + pCsr += nLocale; + (*pCsr++) = 0x00; + if( zText ) memcpy(pCsr, zText, nText); + assert( &pCsr[nText]==&pBlob[nBlob] ); + + sqlite3_result_blob(pCtx, pBlob, nBlob, sqlite3_free); + sqlite3_result_subtype(pCtx, FTS5_LOCALE_SUBTYPE); + } +} + +/* ** Return true if zName is the extension on one of the shadow tables used ** by this module. */ @@ -3061,10 +3750,12 @@ static int fts5Init(sqlite3 *db){ void *p = (void*)pGlobal; memset(pGlobal, 0, sizeof(Fts5Global)); pGlobal->db = db; - pGlobal->api.iVersion = 2; + pGlobal->api.iVersion = 3; pGlobal->api.xCreateFunction = fts5CreateAux; pGlobal->api.xCreateTokenizer = fts5CreateTokenizer; pGlobal->api.xFindTokenizer = fts5FindTokenizer; + pGlobal->api.xCreateTokenizer_v2 = fts5CreateTokenizer_v2; + pGlobal->api.xFindTokenizer_v2 = fts5FindTokenizer_v2; rc = sqlite3_create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy); if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db); if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(pGlobal, db); @@ -3083,6 +3774,13 @@ static int fts5Init(sqlite3 *db){ p, fts5SourceIdFunc, 0, 0 ); } + if( rc==SQLITE_OK ){ + rc = sqlite3_create_function( + db, "fts5_locale", 2, + SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_RESULT_SUBTYPE, + p, fts5LocaleFunc, 0, 0 + ); + } } /* If SQLITE_FTS5_ENABLE_TEST_MI is defined, assume that the file diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index 0b676e6b4..cf25eb361 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -16,13 +16,40 @@ #include "fts5Int.h" +/* +** pSavedRow: +** SQL statement FTS5_STMT_LOOKUP2 is a copy of FTS5_STMT_LOOKUP, it +** does a by-rowid lookup to retrieve a single row from the %_content +** table or equivalent external-content table/view. +** +** However, FTS5_STMT_LOOKUP2 is only used when retrieving the original +** values for a row being UPDATEd. In that case, the SQL statement is +** not reset and pSavedRow is set to point at it. This is so that the +** insert operation that follows the delete may access the original +** row values for any new values for which sqlite3_value_nochange() returns +** true. i.e. if the user executes: +** +** CREATE VIRTUAL TABLE ft USING fts5(a, b, c, locale=1); +** ... +** UPDATE fts SET a=?, b=? WHERE rowid=?; +** +** then the value passed to the xUpdate() method of this table as the +** new.c value is an sqlite3_value_nochange() value. So in this case it +** must be read from the saved row stored in Fts5Storage.pSavedRow. +** +** This is necessary - using sqlite3_value_nochange() instead of just having +** SQLite pass the original value back via xUpdate() - so as not to discard +** any locale information associated with such values. +** +*/ struct Fts5Storage { Fts5Config *pConfig; Fts5Index *pIndex; int bTotalsValid; /* True if nTotalRow/aTotalSize[] are valid */ i64 nTotalRow; /* Total number of rows in FTS table */ i64 *aTotalSize; /* Total sizes of each column */ - sqlite3_stmt *aStmt[11]; + sqlite3_stmt *pSavedRow; + sqlite3_stmt *aStmt[12]; }; @@ -36,14 +63,15 @@ struct Fts5Storage { # error "FTS5_STMT_LOOKUP mismatch" #endif -#define FTS5_STMT_INSERT_CONTENT 3 -#define FTS5_STMT_REPLACE_CONTENT 4 -#define FTS5_STMT_DELETE_CONTENT 5 -#define FTS5_STMT_REPLACE_DOCSIZE 6 -#define FTS5_STMT_DELETE_DOCSIZE 7 -#define FTS5_STMT_LOOKUP_DOCSIZE 8 -#define FTS5_STMT_REPLACE_CONFIG 9 -#define FTS5_STMT_SCAN 10 +#define FTS5_STMT_LOOKUP2 3 +#define FTS5_STMT_INSERT_CONTENT 4 +#define FTS5_STMT_REPLACE_CONTENT 5 +#define FTS5_STMT_DELETE_CONTENT 6 +#define FTS5_STMT_REPLACE_DOCSIZE 7 +#define FTS5_STMT_DELETE_DOCSIZE 8 +#define FTS5_STMT_LOOKUP_DOCSIZE 9 +#define FTS5_STMT_REPLACE_CONFIG 10 +#define FTS5_STMT_SCAN 11 /* ** Prepare the two insert statements - Fts5Storage.pInsertContent and @@ -73,6 +101,7 @@ static int fts5StorageGetStmt( "SELECT %s FROM %s T WHERE T.%Q >= ? AND T.%Q <= ? ORDER BY T.%Q ASC", "SELECT %s FROM %s T WHERE T.%Q <= ? AND T.%Q >= ? ORDER BY T.%Q DESC", "SELECT %s FROM %s T WHERE T.%Q=?", /* LOOKUP */ + "SELECT %s FROM %s T WHERE T.%Q=?", /* LOOKUP2 */ "INSERT INTO %Q.'%q_content' VALUES(%s)", /* INSERT_CONTENT */ "REPLACE INTO %Q.'%q_content' VALUES(%s)", /* REPLACE_CONTENT */ @@ -88,6 +117,8 @@ static int fts5StorageGetStmt( Fts5Config *pC = p->pConfig; char *zSql = 0; + assert( ArraySize(azStmt)==ArraySize(p->aStmt) ); + switch( eStmt ){ case FTS5_STMT_SCAN: zSql = sqlite3_mprintf(azStmt[eStmt], @@ -104,6 +135,7 @@ static int fts5StorageGetStmt( break; case FTS5_STMT_LOOKUP: + case FTS5_STMT_LOOKUP2: zSql = sqlite3_mprintf(azStmt[eStmt], pC->zContentExprlist, pC->zContent, pC->zContentRowid ); @@ -150,7 +182,7 @@ static int fts5StorageGetStmt( rc = SQLITE_NOMEM; }else{ int f = SQLITE_PREPARE_PERSISTENT; - if( eStmt>FTS5_STMT_LOOKUP ) f |= SQLITE_PREPARE_NO_VTAB; + if( eStmt>FTS5_STMT_LOOKUP2 ) f |= SQLITE_PREPARE_NO_VTAB; p->pConfig->bLock++; rc = sqlite3_prepare_v3(pC->db, zSql, -1, f, &p->aStmt[eStmt], 0); p->pConfig->bLock--; @@ -400,14 +432,48 @@ static int fts5StorageInsertCallback( } /* +** This function is used as part of an UPDATE statement that modifies the +** rowid of a row. In that case, this function is called first to set +** Fts5Storage.pSavedRow to point to a statement that may be used to +** access the original values of the row being deleted - iDel. +** +** SQLITE_OK is returned if successful, or an SQLite error code otherwise. +** It is not considered an error if row iDel does not exist. In this case +** pSavedRow is not set and SQLITE_OK returned. +*/ +int sqlite3Fts5StorageFindDeleteRow(Fts5Storage *p, i64 iDel){ + int rc = SQLITE_OK; + sqlite3_stmt *pSeek = 0; + + assert( p->pSavedRow==0 ); + rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP+1, &pSeek, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pSeek, 1, iDel); + if( sqlite3_step(pSeek)!=SQLITE_ROW ){ + rc = sqlite3_reset(pSeek); + }else{ + p->pSavedRow = pSeek; + } + } + + return rc; +} + +/* ** If a row with rowid iDel is present in the %_content table, add the ** delete-markers to the FTS index necessary to delete it. Do not actually ** remove the %_content row at this time though. +** +** If parameter bSaveRow is true, then Fts5Storage.pSavedRow is left +** pointing to a statement (FTS5_STMT_LOOKUP2) that may be used to access +** the original values of the row being deleted. This is used by UPDATE +** statements. */ static int fts5StorageDeleteFromIndex( Fts5Storage *p, i64 iDel, - sqlite3_value **apVal + sqlite3_value **apVal, + int bSaveRow /* True to set pSavedRow */ ){ Fts5Config *pConfig = p->pConfig; sqlite3_stmt *pSeek = 0; /* SELECT to read row iDel from %_data */ @@ -416,12 +482,21 @@ static int fts5StorageDeleteFromIndex( int iCol; Fts5InsertCtx ctx; + assert( bSaveRow==0 || apVal==0 ); + assert( bSaveRow==0 || bSaveRow==1 ); + assert( FTS5_STMT_LOOKUP2==FTS5_STMT_LOOKUP+1 ); + if( apVal==0 ){ - rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP, &pSeek, 0); - if( rc!=SQLITE_OK ) return rc; - sqlite3_bind_int64(pSeek, 1, iDel); - if( sqlite3_step(pSeek)!=SQLITE_ROW ){ - return sqlite3_reset(pSeek); + if( p->pSavedRow && bSaveRow ){ + pSeek = p->pSavedRow; + p->pSavedRow = 0; + }else{ + rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP+bSaveRow, &pSeek, 0); + if( rc!=SQLITE_OK ) return rc; + sqlite3_bind_int64(pSeek, 1, iDel); + if( sqlite3_step(pSeek)!=SQLITE_ROW ){ + return sqlite3_reset(pSeek); + } } } @@ -429,26 +504,32 @@ static int fts5StorageDeleteFromIndex( ctx.iCol = -1; for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){ if( pConfig->abUnindexed[iCol-1]==0 ){ - const char *zText; - int nText; + sqlite3_value *pVal = 0; + const char *pText = 0; + int nText = 0; + int bReset = 0; + assert( pSeek==0 || apVal==0 ); assert( pSeek!=0 || apVal!=0 ); if( pSeek ){ - zText = (const char*)sqlite3_column_text(pSeek, iCol); - nText = sqlite3_column_bytes(pSeek, iCol); - }else if( ALWAYS(apVal) ){ - zText = (const char*)sqlite3_value_text(apVal[iCol-1]); - nText = sqlite3_value_bytes(apVal[iCol-1]); + pVal = sqlite3_column_value(pSeek, iCol); }else{ - continue; + pVal = apVal[iCol-1]; } - ctx.szCol = 0; - rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT, - zText, nText, (void*)&ctx, fts5StorageInsertCallback + + rc = sqlite3Fts5ExtractText( + pConfig, pVal, pSeek!=0, &bReset, &pText, &nText ); - p->aTotalSize[iCol-1] -= (i64)ctx.szCol; - if( p->aTotalSize[iCol-1]<0 && rc==SQLITE_OK ){ - rc = FTS5_CORRUPT; + if( rc==SQLITE_OK ){ + ctx.szCol = 0; + rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT, + pText, nText, (void*)&ctx, fts5StorageInsertCallback + ); + p->aTotalSize[iCol-1] -= (i64)ctx.szCol; + if( rc==SQLITE_OK && p->aTotalSize[iCol-1]<0 ){ + rc = FTS5_CORRUPT; + } + if( bReset ) sqlite3Fts5ClearLocale(pConfig); } } } @@ -458,12 +539,30 @@ static int fts5StorageDeleteFromIndex( p->nTotalRow--; } - rc2 = sqlite3_reset(pSeek); - if( rc==SQLITE_OK ) rc = rc2; + if( rc==SQLITE_OK && bSaveRow ){ + assert( p->pSavedRow==0 ); + p->pSavedRow = pSeek; + }else{ + rc2 = sqlite3_reset(pSeek); + if( rc==SQLITE_OK ) rc = rc2; + } return rc; } /* +** Reset any saved statement pSavedRow. Zero pSavedRow as well. This +** should be called by the xUpdate() method of the fts5 table before +** returning from any operation that may have set Fts5Storage.pSavedRow. +*/ +void sqlite3Fts5StorageReleaseDeleteRow(Fts5Storage *pStorage){ + assert( pStorage->pSavedRow==0 + || pStorage->pSavedRow==pStorage->aStmt[FTS5_STMT_LOOKUP2] + ); + sqlite3_reset(pStorage->pSavedRow); + pStorage->pSavedRow = 0; +} + +/* ** This function is called to process a DELETE on a contentless_delete=1 ** table. It adds the tombstone required to delete the entry with rowid ** iDel. If successful, SQLITE_OK is returned. Or, if an error occurs, @@ -519,12 +618,12 @@ static int fts5StorageInsertDocsize( rc = sqlite3Fts5IndexGetOrigin(p->pIndex, &iOrigin); sqlite3_bind_int64(pReplace, 3, iOrigin); } - if( rc==SQLITE_OK ){ - sqlite3_bind_blob(pReplace, 2, pBuf->p, pBuf->n, SQLITE_STATIC); - sqlite3_step(pReplace); - rc = sqlite3_reset(pReplace); - sqlite3_bind_null(pReplace, 2); - } + } + if( rc==SQLITE_OK ){ + sqlite3_bind_blob(pReplace, 2, pBuf->p, pBuf->n, SQLITE_STATIC); + sqlite3_step(pReplace); + rc = sqlite3_reset(pReplace); + sqlite3_bind_null(pReplace, 2); } } return rc; @@ -578,7 +677,12 @@ static int fts5StorageSaveTotals(Fts5Storage *p){ /* ** Remove a row from the FTS table. */ -int sqlite3Fts5StorageDelete(Fts5Storage *p, i64 iDel, sqlite3_value **apVal){ +int sqlite3Fts5StorageDelete( + Fts5Storage *p, /* Storage object */ + i64 iDel, /* Rowid to delete from table */ + sqlite3_value **apVal, /* Optional - values to remove from index */ + int bSaveRow /* If true, set pSavedRow for deleted row */ +){ Fts5Config *pConfig = p->pConfig; int rc; sqlite3_stmt *pDel = 0; @@ -595,7 +699,7 @@ int sqlite3Fts5StorageDelete(Fts5Storage *p, i64 iDel, sqlite3_value **apVal){ if( p->pConfig->bContentlessDelete ){ rc = fts5StorageContentlessDelete(p, iDel); }else{ - rc = fts5StorageDeleteFromIndex(p, iDel, apVal); + rc = fts5StorageDeleteFromIndex(p, iDel, apVal, bSaveRow); } } @@ -684,14 +788,21 @@ int sqlite3Fts5StorageRebuild(Fts5Storage *p){ for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){ ctx.szCol = 0; if( pConfig->abUnindexed[ctx.iCol]==0 ){ - const char *zText = (const char*)sqlite3_column_text(pScan, ctx.iCol+1); - int nText = sqlite3_column_bytes(pScan, ctx.iCol+1); - rc = sqlite3Fts5Tokenize(pConfig, - FTS5_TOKENIZE_DOCUMENT, - zText, nText, - (void*)&ctx, - fts5StorageInsertCallback - ); + int bReset = 0; /* True if tokenizer locale must be reset */ + int nText = 0; /* Size of pText in bytes */ + const char *pText = 0; /* Pointer to buffer containing text value */ + sqlite3_value *pVal = sqlite3_column_value(pScan, ctx.iCol+1); + + rc = sqlite3Fts5ExtractText(pConfig, pVal, 1, &bReset, &pText, &nText); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5Tokenize(pConfig, + FTS5_TOKENIZE_DOCUMENT, + pText, nText, + (void*)&ctx, + fts5StorageInsertCallback + ); + if( bReset ) sqlite3Fts5ClearLocale(pConfig); + } } sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); p->aTotalSize[ctx.iCol] += (i64)ctx.szCol; @@ -775,7 +886,31 @@ int sqlite3Fts5StorageContentInsert( int i; /* Counter variable */ rc = fts5StorageGetStmt(p, FTS5_STMT_INSERT_CONTENT, &pInsert, 0); for(i=1; rc==SQLITE_OK && i<=pConfig->nCol+1; i++){ - rc = sqlite3_bind_value(pInsert, i, apVal[i]); + sqlite3_value *pVal = apVal[i]; + if( sqlite3_value_nochange(pVal) && p->pSavedRow ){ + /* This is an UPDATE statement, and column (i-2) was not modified. + ** Retrieve the value from Fts5Storage.pSavedRow instead. */ + pVal = sqlite3_column_value(p->pSavedRow, i-1); + }else if( sqlite3_value_subtype(pVal)==FTS5_LOCALE_SUBTYPE ){ + assert( pConfig->bLocale ); + assert( i>1 ); + if( pConfig->abUnindexed[i-2] ){ + /* At attempt to insert an fts5_locale() value into an UNINDEXED + ** column. Strip the locale away and just bind the text. */ + const char *pText = 0; + int nText = 0; + rc = sqlite3Fts5ExtractText(pConfig, pVal, 0, 0, &pText, &nText); + sqlite3_bind_text(pInsert, i, pText, nText, SQLITE_TRANSIENT); + }else{ + const u8 *pBlob = (const u8*)sqlite3_value_blob(pVal); + int nBlob = sqlite3_value_bytes(pVal); + assert( nBlob>4 ); + sqlite3_bind_blob(pInsert, i, pBlob+4, nBlob-4, SQLITE_TRANSIENT); + } + continue; + } + + rc = sqlite3_bind_value(pInsert, i, pVal); } if( rc==SQLITE_OK ){ sqlite3_step(pInsert); @@ -810,14 +945,24 @@ int sqlite3Fts5StorageIndexInsert( for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){ ctx.szCol = 0; if( pConfig->abUnindexed[ctx.iCol]==0 ){ - const char *zText = (const char*)sqlite3_value_text(apVal[ctx.iCol+2]); - int nText = sqlite3_value_bytes(apVal[ctx.iCol+2]); - rc = sqlite3Fts5Tokenize(pConfig, - FTS5_TOKENIZE_DOCUMENT, - zText, nText, - (void*)&ctx, - fts5StorageInsertCallback - ); + int bReset = 0; /* True if tokenizer locale must be reset */ + int nText = 0; /* Size of pText in bytes */ + const char *pText = 0; /* Pointer to buffer containing text value */ + sqlite3_value *pVal = apVal[ctx.iCol+2]; + int bDisk = 0; + if( p->pSavedRow && sqlite3_value_nochange(pVal) ){ + pVal = sqlite3_column_value(p->pSavedRow, ctx.iCol+1); + bDisk = 1; + } + rc = sqlite3Fts5ExtractText(pConfig, pVal, bDisk, &bReset, &pText,&nText); + if( rc==SQLITE_OK ){ + assert( bReset==0 || pConfig->bLocale ); + rc = sqlite3Fts5Tokenize(pConfig, + FTS5_TOKENIZE_DOCUMENT, pText, nText, (void*)&ctx, + fts5StorageInsertCallback + ); + if( bReset ) sqlite3Fts5ClearLocale(pConfig); + } } sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); p->aTotalSize[ctx.iCol] += (i64)ctx.szCol; @@ -988,14 +1133,22 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p, int iArg){ rc = sqlite3Fts5TermsetNew(&ctx.pTermset); } if( rc==SQLITE_OK ){ - const char *zText = (const char*)sqlite3_column_text(pScan, i+1); - int nText = sqlite3_column_bytes(pScan, i+1); - rc = sqlite3Fts5Tokenize(pConfig, - FTS5_TOKENIZE_DOCUMENT, - zText, nText, - (void*)&ctx, - fts5StorageIntegrityCallback + int bReset = 0; /* True if tokenizer locale must be reset */ + int nText = 0; /* Size of pText in bytes */ + const char *pText = 0; /* Pointer to buffer containing text value */ + + rc = sqlite3Fts5ExtractText(pConfig, + sqlite3_column_value(pScan, i+1), 1, &bReset, &pText, &nText ); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5Tokenize(pConfig, + FTS5_TOKENIZE_DOCUMENT, + pText, nText, + (void*)&ctx, + fts5StorageIntegrityCallback + ); + if( bReset ) sqlite3Fts5ClearLocale(pConfig); + } } if( rc==SQLITE_OK && pConfig->bColumnsize && ctx.szCol!=aColSize[i] ){ rc = FTS5_CORRUPT; diff --git a/ext/fts5/fts5_tcl.c b/ext/fts5/fts5_tcl.c index b67b037c4..1e9f7bbb6 100644 --- a/ext/fts5/fts5_tcl.c +++ b/ext/fts5/fts5_tcl.c @@ -240,6 +240,7 @@ static int SQLITE_TCLAPI xF5tApi( { "xQueryToken", 2, "IPHRASE ITERM" }, /* 18 */ { "xInstToken", 2, "IDX ITERM" }, /* 19 */ + { "xColumnLocale", 1, "COL" }, /* 20 */ { 0, 0, 0} }; @@ -528,6 +529,20 @@ static int SQLITE_TCLAPI xF5tApi( break; } + CASE(20, "xColumnLocale") { + const char *z = 0; + int n = 0; + int iCol; + if( Tcl_GetIntFromObj(interp, objv[2], &iCol) ){ + return TCL_ERROR; + } + rc = p->pApi->xColumnLocale(p->pFts, iCol, &z, &n); + if( rc==SQLITE_OK && z ){ + Tcl_SetObjResult(interp, Tcl_NewStringObj(z, n)); + } + break; + } + default: assert( 0 ); break; @@ -796,18 +811,32 @@ typedef struct F5tTokenizerInstance F5tTokenizerInstance; struct F5tTokenizerContext { void *pCtx; int (*xToken)(void*, int, const char*, int, int, int); + F5tTokenizerInstance *pInst; }; struct F5tTokenizerModule { Tcl_Interp *interp; Tcl_Obj *pScript; + void *pParentCtx; + fts5_tokenizer_v2 parent_v2; + fts5_tokenizer parent; F5tTokenizerContext *pContext; }; +/* +** zLocale: +** Within a call to xTokenize_v2(), pLocale/nLocale store the locale +** passed to the call by fts5. This can be retrieved by a Tcl tokenize +** script using [sqlite3_fts5_locale]. +*/ struct F5tTokenizerInstance { Tcl_Interp *interp; Tcl_Obj *pScript; + F5tTokenizerModule *pModule; + Fts5Tokenizer *pParent; F5tTokenizerContext *pContext; + const char *pLocale; + int nLocale; }; static int f5tTokenizerCreate( @@ -816,11 +845,20 @@ static int f5tTokenizerCreate( int nArg, Fts5Tokenizer **ppOut ){ + Fts5Tokenizer *pParent = 0; F5tTokenizerModule *pMod = (F5tTokenizerModule*)pCtx; Tcl_Obj *pEval; int rc = TCL_OK; int i; + assert( pMod->parent_v2.xCreate==0 || pMod->parent.xCreate==0 ); + if( pMod->parent_v2.xCreate ){ + rc = pMod->parent_v2.xCreate(pMod->pParentCtx, 0, 0, &pParent); + } + if( pMod->parent.xCreate ){ + rc = pMod->parent.xCreate(pMod->pParentCtx, 0, 0, &pParent); + } + pEval = Tcl_DuplicateObj(pMod->pScript); Tcl_IncrRefCount(pEval); for(i=0; rc==TCL_OK && i<nArg; i++){ @@ -840,6 +878,8 @@ static int f5tTokenizerCreate( pInst->interp = pMod->interp; pInst->pScript = Tcl_GetObjResult(pMod->interp); pInst->pContext = pMod->pContext; + pInst->pParent = pParent; + pInst->pModule = pMod; Tcl_IncrRefCount(pInst->pScript); *ppOut = (Fts5Tokenizer*)pInst; } @@ -850,11 +890,21 @@ static int f5tTokenizerCreate( static void f5tTokenizerDelete(Fts5Tokenizer *p){ F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p; - Tcl_DecrRefCount(pInst->pScript); - ckfree((char *)pInst); + if( pInst ){ + if( pInst->pParent ){ + if( pInst->pModule->parent_v2.xDelete ){ + pInst->pModule->parent_v2.xDelete(pInst->pParent); + }else{ + pInst->pModule->parent.xDelete(pInst->pParent); + } + } + Tcl_DecrRefCount(pInst->pScript); + ckfree((char *)pInst); + } } -static int f5tTokenizerTokenize( + +static int f5tTokenizerReallyTokenize( Fts5Tokenizer *p, void *pCtx, int flags, @@ -862,6 +912,7 @@ static int f5tTokenizerTokenize( int (*xToken)(void*, int, const char*, int, int, int) ){ F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p; + F5tTokenizerInstance *pOldInst = 0; void *pOldCtx; int (*xOldToken)(void*, int, const char*, int, int, int); Tcl_Obj *pEval; @@ -870,9 +921,11 @@ static int f5tTokenizerTokenize( pOldCtx = pInst->pContext->pCtx; xOldToken = pInst->pContext->xToken; + pOldInst = pInst->pContext->pInst; pInst->pContext->pCtx = pCtx; pInst->pContext->xToken = xToken; + pInst->pContext->pInst = pInst; assert( flags==FTS5_TOKENIZE_DOCUMENT @@ -908,9 +961,105 @@ static int f5tTokenizerTokenize( pInst->pContext->pCtx = pOldCtx; pInst->pContext->xToken = xOldToken; + pInst->pContext->pInst = pOldInst; return rc; } +typedef struct CallbackCtx CallbackCtx; +struct CallbackCtx { + Fts5Tokenizer *p; + void *pCtx; + int flags; + int (*xToken)(void*, int, const char*, int, int, int); +}; + +static int f5tTokenizeCallback( + void *pCtx, + int tflags, + const char *z, int n, + int iStart, int iEnd +){ + CallbackCtx *p = (CallbackCtx*)pCtx; + return f5tTokenizerReallyTokenize(p->p, p->pCtx, p->flags, z, n, p->xToken); +} + +static int f5tTokenizerTokenize_v2( + Fts5Tokenizer *p, + void *pCtx, + int flags, + const char *pText, int nText, + const char *pLoc, int nLoc, + int (*xToken)(void*, int, const char*, int, int, int) +){ + int rc = SQLITE_OK; + F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p; + + pInst->pLocale = pLoc; + pInst->nLocale = nLoc; + + if( pInst->pParent ){ + CallbackCtx ctx; + ctx.p = p; + ctx.pCtx = pCtx; + ctx.flags = flags; + ctx.xToken = xToken; + if( pInst->pModule->parent_v2.xTokenize ){ + rc = pInst->pModule->parent_v2.xTokenize( + pInst->pParent, (void*)&ctx, flags, pText, nText, + pLoc, nLoc, f5tTokenizeCallback + ); + }else{ + rc = pInst->pModule->parent.xTokenize( + pInst->pParent, (void*)&ctx, flags, pText, nText, f5tTokenizeCallback + ); + } + }else{ + rc = f5tTokenizerReallyTokenize(p, pCtx, flags, pText, nText, xToken); + } + + pInst->pLocale = 0; + pInst->nLocale = 0; + return rc; +} +static int f5tTokenizerTokenize( + Fts5Tokenizer *p, + void *pCtx, + int flags, + const char *pText, int nText, + int (*xToken)(void*, int, const char*, int, int, int) +){ + return f5tTokenizerTokenize_v2(p, pCtx, flags, pText, nText, 0, 0, xToken); +} + +/* +** sqlite3_fts5_locale +*/ +static int SQLITE_TCLAPI f5tTokenizerLocale( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + F5tTokenizerContext *p = (F5tTokenizerContext*)clientData; + + if( objc!=1 ){ + Tcl_WrongNumArgs(interp, 1, objv, ""); + return TCL_ERROR; + } + + if( p->xToken==0 ){ + Tcl_AppendResult(interp, + "sqlite3_fts5_locale may only be used by tokenizer callback", 0 + ); + return TCL_ERROR; + } + + Tcl_SetObjResult(interp, + Tcl_NewStringObj(p->pInst->pLocale, p->pInst->nLocale) + ); + return TCL_OK; +} + /* ** sqlite3_fts5_token ?-colocated? TEXT START END */ @@ -996,32 +1145,112 @@ static int SQLITE_TCLAPI f5tCreateTokenizer( fts5_api *pApi; char *zName; Tcl_Obj *pScript; - fts5_tokenizer t; F5tTokenizerModule *pMod; - int rc; + int rc = SQLITE_OK; + int bV2 = 0; /* True to use _v2 API */ + int iVersion = 2; /* Value for _v2.iVersion */ + const char *zParent = 0; /* Name of parent tokenizer, if any */ + int ii = 0; - if( objc!=4 ){ - Tcl_WrongNumArgs(interp, 1, objv, "DB NAME SCRIPT"); + if( objc<4 ){ + Tcl_WrongNumArgs(interp, 1, objv, "?OPTIONS? DB NAME SCRIPT"); return TCL_ERROR; } - if( f5tDbAndApi(interp, objv[1], &db, &pApi) ){ - return TCL_ERROR; + + /* Parse any options. Set stack variables bV2 and zParent. */ + for(ii=1; ii<objc-3; ii++){ + int iOpt = 0; + const char *azOpt[] = { "-v2", "-parent", "-version", 0 }; + if( Tcl_GetIndexFromObj(interp, objv[ii], azOpt, "OPTION", 0, &iOpt) ){ + return TCL_ERROR; + } + switch( iOpt ){ + case 0: /* -v2 */ { + bV2 = 1; + break; + } + case 1: /* -parent */ { + ii++; + if( ii==objc-3 ){ + Tcl_AppendResult( + interp, "option requires an argument: -parent", (char*)0 + ); + return TCL_ERROR; + } + zParent = Tcl_GetString(objv[ii]); + break; + } + case 2: /* -version */ { + ii++; + if( ii==objc-3 ){ + Tcl_AppendResult( + interp, "option requires an argument: -version", (char*)0 + ); + return TCL_ERROR; + } + if( Tcl_GetIntFromObj(interp, objv[ii], &iVersion) ){ + return TCL_ERROR; + } + break; + } + default: + assert( 0 ); + break; + } } - zName = Tcl_GetString(objv[2]); - pScript = objv[3]; - t.xCreate = f5tTokenizerCreate; - t.xTokenize = f5tTokenizerTokenize; - t.xDelete = f5tTokenizerDelete; + if( f5tDbAndApi(interp, objv[objc-3], &db, &pApi) ){ + return TCL_ERROR; + } + zName = Tcl_GetString(objv[objc-2]); + pScript = objv[objc-1]; pMod = (F5tTokenizerModule*)ckalloc(sizeof(F5tTokenizerModule)); + memset(pMod, 0, sizeof(F5tTokenizerModule)); pMod->interp = interp; pMod->pScript = pScript; - pMod->pContext = pContext; Tcl_IncrRefCount(pScript); - rc = pApi->xCreateTokenizer(pApi, zName, (void*)pMod, &t, f5tDelTokenizer); + pMod->pContext = pContext; + if( zParent ){ + if( bV2 ){ + fts5_tokenizer_v2 *pParent = 0; + rc = pApi->xFindTokenizer_v2(pApi, zParent, &pMod->pParentCtx, &pParent); + if( rc==SQLITE_OK ){ + memcpy(&pMod->parent_v2, pParent, sizeof(fts5_tokenizer_v2)); + pMod->parent_v2.xDelete(0); + } + }else{ + rc = pApi->xFindTokenizer(pApi, zParent, &pMod->pParentCtx,&pMod->parent); + if( rc==SQLITE_OK ){ + pMod->parent.xDelete(0); + } + } + } + + if( rc==SQLITE_OK ){ + void *pModCtx = (void*)pMod; + if( bV2==0 ){ + fts5_tokenizer t; + t.xCreate = f5tTokenizerCreate; + t.xTokenize = f5tTokenizerTokenize; + t.xDelete = f5tTokenizerDelete; + rc = pApi->xCreateTokenizer(pApi, zName, pModCtx, &t, f5tDelTokenizer); + }else{ + fts5_tokenizer_v2 t2; + memset(&t2, 0, sizeof(t2)); + t2.iVersion = iVersion; + t2.xCreate = f5tTokenizerCreate; + t2.xTokenize = f5tTokenizerTokenize_v2; + t2.xDelete = f5tTokenizerDelete; + rc = pApi->xCreateTokenizer_v2(pApi, zName, pModCtx, &t2,f5tDelTokenizer); + } + } + if( rc!=SQLITE_OK ){ - Tcl_AppendResult(interp, "error in fts5_api.xCreateTokenizer()", 0); + Tcl_AppendResult(interp, ( + bV2 ? "error in fts5_api.xCreateTokenizer_v2()" + : "error in fts5_api.xCreateTokenizer()" + ), 0); return TCL_ERROR; } @@ -1328,6 +1557,7 @@ int Fts5tcl_Init(Tcl_Interp *interp){ } aCmd[] = { { "sqlite3_fts5_create_tokenizer", f5tCreateTokenizer, 1 }, { "sqlite3_fts5_token", f5tTokenizerReturn, 1 }, + { "sqlite3_fts5_locale", f5tTokenizerLocale, 1 }, { "sqlite3_fts5_tokenize", f5tTokenize, 0 }, { "sqlite3_fts5_create_function", f5tCreateFunction, 0 }, { "sqlite3_fts5_may_be_corrupt", f5tMayBeCorrupt, 0 }, diff --git a/ext/fts5/fts5_tokenize.c b/ext/fts5/fts5_tokenize.c index 9f5cd24c3..f92529b84 100644 --- a/ext/fts5/fts5_tokenize.c +++ b/ext/fts5/fts5_tokenize.c @@ -79,7 +79,7 @@ static int fts5AsciiCreate( int i; memset(p, 0, sizeof(AsciiTokenizer)); memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar)); - for(i=0; rc==SQLITE_OK && i<nArg-1; i+=2){ + for(i=0; rc==SQLITE_OK && i<nArg; i+=2){ const char *zArg = azArg[i+1]; if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){ fts5AsciiAddExceptions(p, zArg, 1); @@ -90,7 +90,6 @@ static int fts5AsciiCreate( rc = SQLITE_ERROR; } } - if( rc==SQLITE_OK && i<nArg ) rc = SQLITE_ERROR; if( rc!=SQLITE_OK ){ fts5AsciiDelete((Fts5Tokenizer*)p); p = 0; @@ -382,7 +381,7 @@ static int fts5UnicodeCreate( } /* Search for a "categories" argument */ - for(i=0; rc==SQLITE_OK && i<nArg-1; i+=2){ + for(i=0; rc==SQLITE_OK && i<nArg; i+=2){ if( 0==sqlite3_stricmp(azArg[i], "categories") ){ zCat = azArg[i+1]; } @@ -391,7 +390,7 @@ static int fts5UnicodeCreate( rc = unicodeSetCategories(p, zCat); } - for(i=0; rc==SQLITE_OK && i<nArg-1; i+=2){ + for(i=0; rc==SQLITE_OK && i<nArg; i+=2){ const char *zArg = azArg[i+1]; if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){ if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){ @@ -416,8 +415,6 @@ static int fts5UnicodeCreate( rc = SQLITE_ERROR; } } - if( i<nArg && rc==SQLITE_OK ) rc = SQLITE_ERROR; - }else{ rc = SQLITE_NOMEM; } @@ -556,7 +553,7 @@ static int fts5UnicodeTokenize( typedef struct PorterTokenizer PorterTokenizer; struct PorterTokenizer { - fts5_tokenizer tokenizer; /* Parent tokenizer module */ + fts5_tokenizer_v2 tokenizer_v2; /* Parent tokenizer module */ Fts5Tokenizer *pTokenizer; /* Parent tokenizer instance */ char aBuf[FTS5_PORTER_MAX_TOKEN + 64]; }; @@ -568,7 +565,7 @@ static void fts5PorterDelete(Fts5Tokenizer *pTok){ if( pTok ){ PorterTokenizer *p = (PorterTokenizer*)pTok; if( p->pTokenizer ){ - p->tokenizer.xDelete(p->pTokenizer); + p->tokenizer_v2.xDelete(p->pTokenizer); } sqlite3_free(p); } @@ -587,6 +584,7 @@ static int fts5PorterCreate( PorterTokenizer *pRet; void *pUserdata = 0; const char *zBase = "unicode61"; + fts5_tokenizer_v2 *pV2 = 0; if( nArg>0 ){ zBase = azArg[0]; @@ -595,14 +593,15 @@ static int fts5PorterCreate( pRet = (PorterTokenizer*)sqlite3_malloc(sizeof(PorterTokenizer)); if( pRet ){ memset(pRet, 0, sizeof(PorterTokenizer)); - rc = pApi->xFindTokenizer(pApi, zBase, &pUserdata, &pRet->tokenizer); + rc = pApi->xFindTokenizer_v2(pApi, zBase, &pUserdata, &pV2); }else{ rc = SQLITE_NOMEM; } if( rc==SQLITE_OK ){ int nArg2 = (nArg>0 ? nArg-1 : 0); - const char **azArg2 = (nArg2 ? &azArg[1] : 0); - rc = pRet->tokenizer.xCreate(pUserdata, azArg2, nArg2, &pRet->pTokenizer); + const char **az2 = (nArg2 ? &azArg[1] : 0); + memcpy(&pRet->tokenizer_v2, pV2, sizeof(fts5_tokenizer_v2)); + rc = pRet->tokenizer_v2.xCreate(pUserdata, az2, nArg2, &pRet->pTokenizer); } if( rc!=SQLITE_OK ){ @@ -1253,6 +1252,7 @@ static int fts5PorterTokenize( void *pCtx, int flags, const char *pText, int nText, + const char *pLoc, int nLoc, int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) ){ PorterTokenizer *p = (PorterTokenizer*)pTokenizer; @@ -1260,8 +1260,8 @@ static int fts5PorterTokenize( sCtx.xToken = xToken; sCtx.pCtx = pCtx; sCtx.aBuf = p->aBuf; - return p->tokenizer.xTokenize( - p->pTokenizer, (void*)&sCtx, flags, pText, nText, fts5PorterCb + return p->tokenizer_v2.xTokenize( + p->pTokenizer, (void*)&sCtx, flags, pText, nText, pLoc, nLoc, fts5PorterCb ); } @@ -1291,41 +1291,46 @@ static int fts5TriCreate( Fts5Tokenizer **ppOut ){ int rc = SQLITE_OK; - TrigramTokenizer *pNew = (TrigramTokenizer*)sqlite3_malloc(sizeof(*pNew)); + TrigramTokenizer *pNew = 0; UNUSED_PARAM(pUnused); - if( pNew==0 ){ - rc = SQLITE_NOMEM; + if( nArg%2 ){ + rc = SQLITE_ERROR; }else{ int i; - pNew->bFold = 1; - pNew->iFoldParam = 0; - for(i=0; rc==SQLITE_OK && i<nArg-1; i+=2){ - const char *zArg = azArg[i+1]; - if( 0==sqlite3_stricmp(azArg[i], "case_sensitive") ){ - if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1] ){ - rc = SQLITE_ERROR; + pNew = (TrigramTokenizer*)sqlite3_malloc(sizeof(*pNew)); + if( pNew==0 ){ + rc = SQLITE_NOMEM; + }else{ + pNew->bFold = 1; + pNew->iFoldParam = 0; + + for(i=0; rc==SQLITE_OK && i<nArg; i+=2){ + const char *zArg = azArg[i+1]; + if( 0==sqlite3_stricmp(azArg[i], "case_sensitive") ){ + if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1] ){ + rc = SQLITE_ERROR; + }else{ + pNew->bFold = (zArg[0]=='0'); + } + }else if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){ + if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){ + rc = SQLITE_ERROR; + }else{ + pNew->iFoldParam = (zArg[0]!='0') ? 2 : 0; + } }else{ - pNew->bFold = (zArg[0]=='0'); - } - }else if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){ - if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){ rc = SQLITE_ERROR; - }else{ - pNew->iFoldParam = (zArg[0]!='0') ? 2 : 0; } - }else{ + } + + if( pNew->iFoldParam!=0 && pNew->bFold==0 ){ rc = SQLITE_ERROR; } - } - if( i<nArg && rc==SQLITE_OK ) rc = SQLITE_ERROR; - - if( pNew->iFoldParam!=0 && pNew->bFold==0 ){ - rc = SQLITE_ERROR; - } - - if( rc!=SQLITE_OK ){ - fts5TriDelete((Fts5Tokenizer*)pNew); - pNew = 0; + + if( rc!=SQLITE_OK ){ + fts5TriDelete((Fts5Tokenizer*)pNew); + pNew = 0; + } } } *ppOut = (Fts5Tokenizer*)pNew; @@ -1450,7 +1455,6 @@ int sqlite3Fts5TokenizerInit(fts5_api *pApi){ } aBuiltin[] = { { "unicode61", {fts5UnicodeCreate, fts5UnicodeDelete, fts5UnicodeTokenize}}, { "ascii", {fts5AsciiCreate, fts5AsciiDelete, fts5AsciiTokenize }}, - { "porter", {fts5PorterCreate, fts5PorterDelete, fts5PorterTokenize }}, { "trigram", {fts5TriCreate, fts5TriDelete, fts5TriTokenize}}, }; @@ -1465,6 +1469,19 @@ int sqlite3Fts5TokenizerInit(fts5_api *pApi){ 0 ); } - + if( rc==SQLITE_OK ){ + fts5_tokenizer_v2 sPorter = { + 2, + fts5PorterCreate, + fts5PorterDelete, + fts5PorterTokenize + }; + rc = pApi->xCreateTokenizer_v2(pApi, + "porter", + (void*)pApi, + &sPorter, + 0 + ); + } return rc; } diff --git a/ext/fts5/fts5_unicode2.c b/ext/fts5/fts5_unicode2.c index 3e97264fa..cc164a456 100644 --- a/ext/fts5/fts5_unicode2.c +++ b/ext/fts5/fts5_unicode2.c @@ -364,6 +364,9 @@ int sqlite3Fts5UnicodeCatParse(const char *zCat, u8 *aArray){ default: return 1; } break; + + default: + return 1; } return 0; } diff --git a/ext/fts5/test/fts5_common.tcl b/ext/fts5/test/fts5_common.tcl index 7076a52bb..8ea87dbdd 100644 --- a/ext/fts5/test/fts5_common.tcl +++ b/ext/fts5/test/fts5_common.tcl @@ -51,6 +51,10 @@ proc fts5_test_poslist2 {cmd} { sort_poslist $res } +proc fts5_test_insttoken {cmd iInst iToken} { + $cmd xInstToken $iInst $iToken +} + proc fts5_test_collist {cmd} { set res [list] @@ -78,6 +82,9 @@ proc fts5_test_columnsize {cmd} { proc fts5_columntext {cmd iCol} { $cmd xColumnText $iCol } +proc fts5_columnlocale {cmd iCol} { + $cmd xColumnLocale $iCol +} proc fts5_test_columntext {cmd} { set res [list] @@ -87,6 +94,14 @@ proc fts5_test_columntext {cmd} { set res } +proc fts5_test_columnlocale {cmd} { + set res [list] + for {set i 0} {$i < [$cmd xColumnCount]} {incr i} { + lappend res [$cmd xColumnLocale $i] + } + set res +} + proc fts5_test_columntotalsize {cmd} { set res [list] for {set i 0} {$i < [$cmd xColumnCount]} {incr i} { @@ -165,10 +180,12 @@ proc fts5_aux_test_functions {db} { foreach f { fts5_test_columnsize fts5_test_columntext + fts5_test_columnlocale fts5_test_columntotalsize fts5_test_poslist fts5_test_poslist2 fts5_test_collist + fts5_test_insttoken fts5_test_tokenize fts5_test_rowcount fts5_test_rowid @@ -177,6 +194,7 @@ proc fts5_aux_test_functions {db} { fts5_test_queryphrase fts5_test_phrasecount fts5_columntext + fts5_columnlocale fts5_queryphrase fts5_collist } { diff --git a/ext/fts5/test/fts5ah.test b/ext/fts5/test/fts5ah.test index bc8005783..bf9c9e9db 100644 --- a/ext/fts5/test/fts5ah.test +++ b/ext/fts5/test/fts5ah.test @@ -163,6 +163,17 @@ do_execsql_test 1.8.2 { SELECT count(*) FROM t1 WHERE t1 MATCH 'x' AND rowid < 'text'; } {10000} +do_execsql_test 1.8.3 { + SELECT count(*) FROM t1 WHERE t1 MATCH 'x' AND rowid<5000 AND rowid < 'text'; +} {4999} +do_execsql_test 1.8.4 { + SELECT count(*) FROM t1 WHERE t1 MATCH 'x' AND rowid>5000 AND rowid > 'text'; +} {0} + +do_catchsql_test 1.9 { + SELECT * FROM t1('*xy'); +} {1 {unknown special query: xy}} + } ;# foreach_detail_mode #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r} diff --git a/ext/fts5/test/fts5al.test b/ext/fts5/test/fts5al.test index b344e0d2e..7187ad67c 100644 --- a/ext/fts5/test/fts5al.test +++ b/ext/fts5/test/fts5al.test @@ -293,6 +293,16 @@ do_catchsql_test 4.4.4 { SELECT *, rank FROM t3 WHERE t3 MATCH 'a' AND rank MATCH NULL } {1 {parse error in rank function: }} +# Check that the second and subsequent rank= constraints are ignored. +# +do_catchsql_test 4.3.3 { + SELECT *, rank FROM t3 + WHERE t3 MATCH 'a' AND + rank MATCH 'nosuch()' AND + rank MATCH 'rowidmod(3)' + ORDER BY rank ASC +} {1 {unable to use function MATCH in the requested context}} + } ;# foreach_detail_mode diff --git a/ext/fts5/test/fts5blob.test b/ext/fts5/test/fts5blob.test new file mode 100644 index 000000000..4233719fb --- /dev/null +++ b/ext/fts5/test/fts5blob.test @@ -0,0 +1,166 @@ +# 2024 July 30 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# This file verifies that: +# +# * blob values may be written to locale=0 tables. +# +# * blob values - other than fts5_locale() values - may not be written +# to locale=0 tables. This is an SQLITE_MISMATCH error +# +# * blob values may be returned by queries on the external-content table +# of a locale=0 table. +# +# * blob values not may be returned by queries on the external-content +# table of a locale=1 table, apart from fts5_locale() blobs. This is an +# SQLITE_MISMATCH error. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5blob + +# If SQLITE_ENABLE_FTS5 is not defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +# Test that blobs may be stored in normal locale=0 tables. +# +foreach {tn enc} { + 1 utf8 + 2 utf16 +} { + reset_db + fts5_aux_test_functions db + + execsql "PRAGMA encoding = $enc" + + execsql " + CREATE VIRTUAL TABLE t1 USING fts5(x, y); + " + do_execsql_test 1.$tn.0 { + CREATE VIRTUAL TABLE tt USING fts5vocab('t1', 'instance'); + INSERT INTO t1(rowid, x, y) VALUES(1, 555, X'0000000041424320444546'); + INSERT INTO t1(rowid, x, y) VALUES(2, 666, X'41424300444546'); + INSERT INTO t1(rowid, x, y) VALUES(3, 777, 'xyz'); + } + + do_execsql_test 1.$tn.1 { + SELECT rowid, quote(x), quote(y) FROM t1 + } { + 1 555 X'0000000041424320444546' + 2 666 X'41424300444546' + 3 777 'xyz' + } + + do_execsql_test 1.$tn.2 { + DELETE FROM t1 WHERE rowid=2; + DELETE FROM t1 WHERE rowid=1; + } + + do_execsql_test 1.$tn.3 { + PRAGMA integrity_check; + } {ok} +} + +#-------------------------------------------------------------------------- +# Test that a blob may be stored and retrieved in an unindexed column of +# a regular table with locale=1. +# +reset_db +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x, y UNINDEXED, locale=1); + INSERT INTO t1(rowid, x, y) VALUES(12, 'twelve', X'0000000041424320444546'); +} + +do_execsql_test 2.1 { + select rowid, x, quote(y) FROM t1 +} { + 12 twelve X'0000000041424320444546' +} + +#-------------------------------------------------------------------------- +# Test that blobs may not be written to any type of table with locale=1 +# set. Except, they may be written to UNINDEXED columns. +# +reset_db +do_execsql_test 3.0 { + CREATE TABLE t1(a, b); + + CREATE VIRTUAL TABLE x1 USING fts5(a, b, locale=1); + CREATE VIRTUAL TABLE x2 USING fts5(a, b, locale=1, content=t2); + CREATE VIRTUAL TABLE x3 USING fts5(a, b, locale=1, content=); +} + +do_catchsql_test 3.1 { + INSERT INTO x1(rowid, a, b) VALUES(113, 'hello world', X'123456'); +} {1 {datatype mismatch}} +do_catchsql_test 3.2 { + INSERT INTO x2(rowid, a, b) VALUES(113, 'hello world', X'123456'); +} {1 {datatype mismatch}} +do_catchsql_test 3.3 { + INSERT INTO x3(rowid, a, b) VALUES(113, 'hello world', X'123456'); +} {1 {datatype mismatch}} + + +#-------------------------------------------------------------------------- +# Test that fts5_locale() values may not be written to any type of table +# without locale=1 set. Even to an UNINDEXED column. +# +reset_db +do_execsql_test 3.0 { + CREATE TABLE t1(a, b); + + CREATE VIRTUAL TABLE x1 USING fts5(a, b); + CREATE VIRTUAL TABLE x2 USING fts5(a, b, content=t2); + CREATE VIRTUAL TABLE x3 USING fts5(a, b, content=); + + CREATE VIRTUAL TABLE x4 USING fts5(a, b, c UNINDEXED); +} + +do_catchsql_test 3.1 { + INSERT INTO x1(rowid, a, b) + VALUES(113, 'hello world', fts5_locale('en_AU', 'abc')); +} {1 {fts5_locale() requires locale=1}} +do_catchsql_test 3.2 { + INSERT INTO x2(rowid, a, b) + VALUES(113, 'hello world', fts5_locale('en_AU', 'abc')); +} {1 {fts5_locale() requires locale=1}} +do_catchsql_test 3.3 { + INSERT INTO x3(rowid, a, b) + VALUES(113, 'hello world', fts5_locale('en_AU', 'abc')); +} {1 {fts5_locale() requires locale=1}} +do_catchsql_test 3.4 { + INSERT INTO x4(rowid, a, b, c) + VALUES(113, 'hello world', 'yesno', fts5_locale('en_AU', 'abc')); +} {1 {fts5_locale() requires locale=1}} + + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 4.0 { + CREATE VIRTUAL TABLE x1 USING fts5(x); +} + +foreach {tn sql} { + 1 { INSERT INTO x1(rowid, x) VALUES(4.5, 'abcd') } + 2 { INSERT INTO x1(rowid, x) VALUES('xyz', 'abcd') } + 3 { INSERT INTO x1(rowid, x) VALUES(X'001122', 'abcd') } +} { + do_catchsql_test 4.1.$tn $sql {1 {datatype mismatch}} +} + + +finish_test + + diff --git a/ext/fts5/test/fts5cat.test b/ext/fts5/test/fts5cat.test index 483f64bfe..71e2abe3a 100644 --- a/ext/fts5/test/fts5cat.test +++ b/ext/fts5/test/fts5cat.test @@ -55,5 +55,22 @@ do_execsql_test 1.5 { SELECT * FROM t4t } {สนามกีฬา 1 1} +#------------------------------------------------------------------------- +reset_db +do_execsql_test 2.0 " + CREATE VIRTUAL TABLE x1 USING fts5(c, + tokenize=\"unicode61 categories ' \t'\"); +" + +do_catchsql_test 2.1 " + CREATE VIRTUAL TABLE x2 USING fts5(c, + tokenize=\"unicode61 categories 'N*\t\tMYZ'\"); +" {1 {error in tokenizer constructor}} + +do_catchsql_test 2.2 " + CREATE VIRTUAL TABLE x2 USING fts5(c, + tokenize=\"unicode61 categories 'N*\t\tXYZ'\"); +" {1 {error in tokenizer constructor}} + finish_test diff --git a/ext/fts5/test/fts5contentless.test b/ext/fts5/test/fts5contentless.test index eb6b928ab..991e9888f 100644 --- a/ext/fts5/test/fts5contentless.test +++ b/ext/fts5/test/fts5contentless.test @@ -267,4 +267,24 @@ do_execsql_test 8.2 { SELECT rowid FROM ft('four'); } {} +#------------------------------------------------------------------------- +reset_db +do_execsql_test 9.0 { + CREATE VIRTUAL TABLE ft USING fts5(x, content='', contentless_delete=0); + INSERT INTO ft VALUES('hello world'); + INSERT INTO ft VALUES('one two three'); +} + +do_catchsql_test 9.1 { + INSERT INTO ft(ft, rowid, x) VALUES('delete', 1, 'hello world'); +} {0 {}} + +do_catchsql_test 9.2 { + CREATE VIRTUAL TABLE ft2 USING fts5(x, content='', contentless_delete=2); +} {1 {malformed contentless_delete=... directive}} + +do_catchsql_test 9.3 { + CREATE VIRTUAL TABLE ft2 USING fts5(x, content='', contentless_delete=11); +} {1 {malformed contentless_delete=... directive}} + finish_test diff --git a/ext/fts5/test/fts5corrupt.test b/ext/fts5/test/fts5corrupt.test index ae07383b2..0abd8b86d 100644 --- a/ext/fts5/test/fts5corrupt.test +++ b/ext/fts5/test/fts5corrupt.test @@ -101,4 +101,25 @@ do_catchsql_test 3.1 { SELECT * FROM t3 WHERE t3 MATCH 'o'; } {1 {fts5: missing row 3 from content table 'main'.'t3_content'}} +#-------------------------------------------------------------------- +# +reset_db +do_execsql_test 4.0 { + CREATE VIRTUAL TABLE t2 USING fts5(x); + INSERT INTO t2 VALUES('one two three'); + INSERT INTO t2 VALUES('four five six'); + INSERT INTO t2 VALUES('seven eight nine'); + INSERT INTO t2 VALUES('ten eleven twelve'); +} +do_execsql_test 4.1 { + SELECT hex(block) FROM t2_data WHERE id=1; +} {040C} +do_execsql_test 4.2 { + UPDATE t2_data SET block = X'0402' WHERE id=1 +} +breakpoint +do_catchsql_test 4.3 { + DELETE FROM t2 WHERE rowid=3 +} {1 {database disk image is malformed}} + finish_test diff --git a/ext/fts5/test/fts5corrupt3.test b/ext/fts5/test/fts5corrupt3.test index cfe1438ed..c5faaa87b 100644 --- a/ext/fts5/test/fts5corrupt3.test +++ b/ext/fts5/test/fts5corrupt3.test @@ -15524,6 +15524,160 @@ do_catchsql_test 80.1 { SELECT snippet(rowid, -1, '.', '..', '[', '(]'),snippet(rowid, -1, '.', '.', '', '(]'), highlight(t1, 29, 1 , '') FROM t1('g+ h') WHERE rank MATCH 'bm25(1.0, 10)' ORDER BY NOT (SELECT 1 FROM t1('g+ æ') WHERE rank MATCH 'bm25(1.0, 10)' ORDER BY rank); } {1 {database disk image is malformed}} +#------------------------------------------------------------------------- +reset_db +do_test 81.0 { + sqlite3 db {} + db deserialize [decode_hexdb { +.open --hexdb +| size 40960 pagesize 4096 filename crash-44e8035a976422.db +| page 1 offset 0 +| 0: 53 51 4c 69 74 65 20 66 6f 72 6d 61 74 20 33 00 SQLite format 3. +| 16: 10 00 01 01 00 40 20 20 00 00 00 00 00 00 00 0a .....@ ........ +| 32: 00 00 00 00 00 00 00 00 00 00 00 0d 00 00 00 04 ................ +| 48: 00 00 00 00 00 00 00 00 00 00 00 01 00 00 00 00 ................ +| 96: 00 00 00 00 0d 00 00 00 0d 0b 6e 00 0f a3 0f 4c ..........n....L +| 112: 0e e1 0e 81 0e 24 0d cc 0d 72 0d 1b 0c b0 0c 50 .....$...r.....P +| 128: 0b f8 0b b3 0b 6e 00 00 00 00 00 00 00 00 00 00 .....n.......... +| 2912: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 43 0d ..............C. +| 2928: 06 17 11 11 08 75 74 61 62 6c 65 74 34 74 34 43 .....utablet4t4C +| 2944: 52 45 41 54 45 20 56 49 52 54 55 41 4c 20 54 41 REATE VIRTUAL TA +| 2960: 42 4c 45 20 74 34 20 55 53 49 4e 47 20 66 74 73 BLE t4 USING fts +| 2976: 35 76 6f 63 61 62 28 27 74 32 27 2c 20 27 72 6f 5vocab('t2', 'ro +| 2992: 77 27 29 43 0c 06 17 11 11 08 75 74 61 62 6c 65 w')C......utable +| 3008: 74 33 74 33 43 52 45 41 54 45 20 56 49 52 54 55 t3t3CREATE VIRTU +| 3024: 41 4c 20 54 41 42 4c 45 20 74 33 20 55 53 49 4e AL TABLE t3 USIN +| 3040: 47 20 66 74 73 35 76 6f 63 61 62 28 27 74 31 27 G fts5vocab('t1' +| 3056: 2c 20 27 72 6f 77 27 29 56 0b 06 17 1f 1f 01 7d , 'row')V....... +| 3072: 74 61 62 6c 65 74 32 5f 63 6f 6e 66 69 67 74 32 tablet2_configt2 +| 3088: 5f 63 6f 6e 66 69 67 0a 43 52 45 41 54 45 20 54 _config.CREATE T +| 3104: 41 42 4c 45 20 27 74 32 5f 63 6f 6e 66 69 67 27 ABLE 't2_config' +| 3120: 28 6b 20 50 52 49 4d 41 52 59 20 4b 45 59 2c 20 (k PRIMARY KEY, +| 3136: 76 29 20 57 49 54 48 4f 55 54 20 52 4f 57 49 44 v) WITHOUT ROWID +| 3152: 5e 0a 07 17 21 21 01 81 07 74 61 62 6c 65 74 32 ^...!!...tablet2 +| 3168: 5f 63 6f 6e 74 65 6e 74 74 32 5f 63 6f 6e 74 65 _contentt2_conte +| 3184: 6e 74 09 43 52 45 41 54 45 20 54 41 42 4c 45 20 nt.CREATE TABLE +| 3200: 27 74 32 5f 63 6f 6e 74 65 6e 74 27 28 69 64 20 't2_content'(id +| 3216: 49 4e 54 45 47 45 52 20 50 52 49 4d 41 52 59 20 INTEGER PRIMARY +| 3232: 4b 45 59 2c 20 63 30 2c 20 63 31 2c 20 63 32 29 KEY, c0, c1, c2) +| 3248: 69 09 07 17 19 19 01 81 2d 74 61 62 6c 65 74 32 i.......-tablet2 +| 3264: 5f 69 64 78 74 32 5f 69 64 78 08 43 52 45 41 54 _idxt2_idx.CREAT +| 3280: 45 20 54 41 42 4c 45 20 27 74 32 5f 69 64 78 27 E TABLE 't2_idx' +| 3296: 28 73 65 67 69 64 2c 20 74 65 72 6d 2c 20 70 67 (segid, term, pg +| 3312: 6e 6f 2c 20 50 52 49 4d 41 52 59 20 4b 45 59 28 no, PRIMARY KEY( +| 3328: 73 65 67 69 64 2c 20 74 65 72 6d 29 29 20 57 49 segid, term)) WI +| 3344: 54 48 4f 55 54 20 52 4f 57 49 44 55 08 07 17 1b THOUT ROWIDU.... +| 3360: 1b 01 81 01 74 61 62 6c 65 74 32 5f 64 61 74 61 ....tablet2_data +| 3376: 74 32 5f 64 61 74 61 07 43 52 45 41 54 45 20 54 t2_data.CREATE T +| 3392: 41 42 4c 45 20 27 74 32 5f 64 61 74 61 27 28 69 ABLE 't2_data'(i +| 3408: 64 20 49 4e 54 45 47 45 52 20 50 52 49 4d 41 52 d INTEGER PRIMAR +| 3424: 59 20 4b 45 59 2c 20 62 6c 6f 63 6b 20 42 4c 4f Y KEY, block BLO +| 3440: 42 29 58 07 07 17 11 11 08 81 1d 74 61 62 6c 65 B)X........table +| 3456: 74 32 74 32 43 52 45 41 54 45 20 56 49 52 54 55 t2t2CREATE VIRTU +| 3472: 41 4c 20 54 41 42 4c 45 20 74 32 20 55 53 49 4e AL TABLE t2 USIN +| 3488: 47 20 66 74 73 35 28 27 61 27 2c 5b 62 5d 2c 22 G fts5('a',[b],. +| 3504: 63 22 2c 64 65 74 61 69 6c 3d 6e 6f 6e 65 2c 63 c.,detail=none,c +| 3520: 6f 6c 75 6d 6e 73 69 7a 65 3d 30 29 56 06 06 17 olumnsize=0)V... +| 3536: 1f 1f 01 7d 74 61 62 6c 65 74 31 5f 63 6f 6e 66 ....tablet1_conf +| 3552: 69 67 74 31 5f 63 6f 6e 66 69 67 06 43 52 45 41 igt1_config.CREA +| 3568: 54 45 20 54 41 42 4c 45 20 27 74 31 5f 63 6f 6e TE TABLE 't1_con +| 3584: 66 69 67 27 28 6b 20 50 52 49 4d 41 52 59 20 4b fig'(k PRIMARY K +| 3600: 45 59 2c 20 76 29 20 57 49 54 48 4f 55 54 20 52 EY, v) WITHOUT R +| 3616: 4f 57 49 44 5b 05 07 17 21 21 01 81 01 74 61 62 OWID[...!!...tab +| 3632: 6c 65 74 31 5f 64 6f 63 73 69 7a 65 74 31 5f 64 let1_docsizet1_d +| 3648: 6f 63 73 69 7a 65 05 43 52 45 41 54 45 20 54 41 ocsize.CREATE TA +| 3664: 42 4c 45 20 27 74 31 5f 64 6f 63 73 69 7a 65 27 BLE 't1_docsize' +| 3680: 28 69 64 20 49 4e 54 45 47 45 52 20 50 52 49 4d (id INTEGER PRIM +| 3696: 41 52 59 20 4b 45 59 2c 20 73 7a 20 42 4c 4f 42 ARY KEY, sz BLOB +| 3712: 29 5e 04 07 17 21 21 01 81 07 74 61 62 6c 65 74 )^...!!...tablet +| 3728: 31 5f 63 6f 6e 74 65 6e 74 74 31 5f 63 6f 6e 74 1_contentt1_cont +| 3744: 65 6e 74 04 43 52 45 41 54 45 20 54 41 42 4c 45 ent.CREATE TABLE +| 3760: 20 27 74 31 5f 63 6f 6e 74 65 6e 74 27 28 69 64 't1_content'(id +| 3776: 20 49 4e 54 45 47 45 52 20 50 52 49 4d 41 52 59 INTEGER PRIMARY +| 3792: 20 4b 45 59 2c 20 63 30 2c 20 63 31 2c 20 63 32 KEY, c0, c1, c2 +| 3808: 29 69 03 07 17 19 19 01 81 2d 74 61 62 6c 65 74 )i.......-tablet +| 3824: 31 5f 69 64 78 74 31 5f 69 64 78 03 43 52 45 41 1_idxt1_idx.CREA +| 3840: 54 45 20 54 41 42 4c 45 20 27 74 31 5f 69 64 78 TE TABLE 't1_idx +| 3856: 27 28 73 65 67 69 64 2c 20 74 65 72 6d 2c 20 70 '(segid, term, p +| 3872: 67 6e 6f 2c 20 50 52 49 4d 41 52 59 20 4b 45 59 gno, PRIMARY KEY +| 3888: 28 73 65 67 69 64 2c 20 74 65 72 6d 29 29 20 57 (segid, term)) W +| 3904: 49 54 48 4f 55 54 20 52 4f 57 49 44 55 02 07 17 ITHOUT ROWIDU... +| 3920: 1b 1b 01 81 01 74 61 62 6c 65 74 31 5f 64 61 74 .....tablet1_dat +| 3936: 61 74 31 5f 64 61 74 61 02 43 52 45 41 54 45 20 at1_data.CREATE +| 3952: 54 41 42 4c 45 20 27 74 31 5f 64 61 74 61 27 28 TABLE 't1_data'( +| 3968: 69 64 20 49 4e 54 45 47 45 52 20 50 52 49 4d 41 id INTEGER PRIMA +| 3984: 52 59 20 4b 45 59 2c 20 62 6c 6f 63 6b 20 42 4c RY KEY, block BL +| 4000: 4f 42 29 5b 01 07 17 11 11 08 81 23 74 61 62 6c OB)[.......#tabl +| 4016: 65 74 31 74 31 43 52 45 41 54 45 20 56 49 52 54 et1t1CREATE VIRT +| 4032: 55 41 4c 20 54 41 42 4c 45 20 74 31 20 55 53 49 UAL TABLE t1 USI +| 4048: 4e 47 20 66 74 73 35 28 61 2c 62 20 75 6e 69 6e NG fts5(a,b unin +| 4064: 64 65 78 65 64 2c 63 2c 74 6f 6b 65 6e 69 7a 65 dexed,c,tokenize +| 4080: 3d 22 70 6f 72 74 65 72 20 61 73 63 69 69 22 29 =.porter ascii.) +| page 2 offset 4096 +| 0: 0d 0f 68 00 05 0f 13 00 0f e6 0f 13 0f a8 0f 7c ..h............| +| 16: 0f 2a 00 00 00 00 00 00 00 00 00 00 00 00 00 00 .*.............. +| 3856: 00 00 00 15 0a 03 00 30 00 00 00 00 01 03 03 00 .......0........ +| 3872: 03 01 01 01 02 01 01 03 01 01 37 8c 80 80 80 80 ..........7..... +| 3888: 01 03 00 74 00 00 00 2e 02 30 61 03 02 02 01 01 ...t.....0a..... +| 3904: 62 03 02 03 01 01 63 03 02 04 01 01 67 03 06 01 b.....c.....g... +| 3920: 02 02 01 01 68 03 06 01 02 03 01 01 69 03 06 01 ....h.......i... +| 3936: 02 04 04 06 06 06 08 08 0f ef 00 14 2a 00 00 00 ............*... +| 3952: 00 01 02 02 00 02 01 01 01 02 01 01 25 88 80 80 ............%... +| 3968: 80 80 01 03 00 50 00 00 00 1f 02 30 67 02 08 02 .....P.....0g... +| 3984: 01 02 02 01 01 68 02 08 03 8d 02 03 01 01 6a 42 .....h........jB +| 4000: 08 04 01 02 04 04 09 09 37 84 80 80 80 80 01 03 ........7....... +| 4016: 00 74 00 00 00 2e 02 30 61 01 12 02 01 01 62 01 .t.....0a.....b. +| 4032: 02 03 01 01 63 01 02 04 01 01 67 01 06 01 02 02 ....c.....g..... +| 4048: 01 01 68 01 05 01 02 03 01 01 69 01 06 01 02 04 ..h.......i..... +| 4064: 04 06 06 06 08 08 07 01 03 00 14 03 09 00 09 00 ................ +| 4080: 00 00 11 24 00 00 00 00 01 01 01 00 01 01 01 01 ...$............ +| page 3 offset 8192 +| 0: 0a 00 00 00 03 0f ec 00 0f fa 0f f3 0f ec 00 00 ................ +| 4064: 00 00 00 00 00 00 00 00 00 00 00 00 06 04 01 0c ................ +| 4080: 01 03 02 06 04 01 0c 01 02 02 05 04 09 0c 01 02 ................ +| page 4 offset 12288 +| 0: 0d 00 00 00 03 0f be 00 0f ea 00 00 00 00 00 00 ................ +| 4016: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 14 03 ................ +| 4032: 05 00 17 17 17 61 20 62 20 63 67 20 68 20 69 67 .....a b cg h ig +| 4048: 20 68 20 69 14 02 05 00 17 17 17 67 20 68 20 69 h i.......g h i +| 4064: 61 20 62 20 63 67 20 68 20 69 14 01 05 00 17 17 a b cg h i...... +| 4080: 17 61 20 62 20 63 64 20 65 20 66 67 20 68 20 69 .a b cd e fg h i +| page 5 offset 16384 +| 0: 0d 00 00 00 03 0f e8 00 0f f8 0f f0 0f e8 00 00 ................ +| 4064: 00 00 00 00 00 00 00 00 06 03 03 00 12 03 00 03 ................ +| 4080: 06 02 03 00 12 03 00 03 06 01 03 00 12 03 00 03 ................ +| page 6 offset 20480 +| 0: 0a 00 00 00 01 0f f4 00 0f f4 00 00 00 00 00 00 ................ +| 4080: 00 00 00 00 0b 03 1b 01 76 65 72 73 69 6f 6e 04 ........version. +| page 7 offset 24576 +| 0: 0d 00 00 00 03 0f 9e 00 0f e6 0f ef 0f 9e 00 00 ................ +| 3984: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 41 84 ..............A. +| 4000: 80 80 80 80 01 04 00 81 06 00 00 00 34 02 30 61 ............4.0a +| 4016: 01 01 01 01 01 62 01 01 01 01 01 63 01 01 01 01 .....b.....c.... +| 4032: e6 64 01 01 01 65 01 01 01 66 01 01 01 67 01 01 .d...e...f...g.. +| 4048: 01 01 01 68 01 01 01 01 01 69 01 01 01 04 06 06 ...h.....i...... +| 4064: 06 04 04 04 06 06 07 01 03 00 14 03 09 09 09 0f ................ +| 4080: 0a 03 00 24 00 00 00 00 01 01 01 00 01 01 01 01 ...$............ +| page 8 offset 28672 +| 0: 0a 00 00 00 01 0f fa 00 0f fa 00 00 00 00 00 00 ................ +| 4080: 00 00 00 00 00 00 00 00 00 00 05 04 09 0c 01 02 ................ +| page 9 offset 32768 +| 0: 0d 00 00 00 03 0f be 00 0f ea 0f d4 0f be 00 00 ................ +| 4016: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 14 03 ................ +| 4032: 05 00 17 17 17 61 20 62 20 63 67 20 68 20 69 67 .....a b cg h ig +| 4048: 20 68 21 69 14 02 05 00 17 17 17 67 20 68 20 69 h!i.......g h i +| 4064: 61 20 62 20 63 67 20 68 20 69 14 01 05 00 17 17 a b cg h i...... +| 4080: 17 61 20 62 20 63 64 20 65 20 66 67 20 68 20 69 .a b cd e fg h i +| page 10 offset 36864 +| 0: 0a 00 00 00 01 0f f4 00 0f f4 00 00 00 00 00 00 ................ +| 4080: 00 00 00 00 0b 03 1b 01 76 65 72 73 69 6f 6e 04 ........version. +| end crash-44e8035a976422.db +}]} {} + +do_catchsql_test 81.2 { + UPDATE t1 SET b=zeroblob(299); +} {1 {database disk image is malformed}} + sqlite3_fts5_may_be_corrupt 0 finish_test diff --git a/ext/fts5/test/fts5expr.test b/ext/fts5/test/fts5expr.test index e3938beb0..49be61d9c 100644 --- a/ext/fts5/test/fts5expr.test +++ b/ext/fts5/test/fts5expr.test @@ -44,5 +44,9 @@ for {set ii 0} {$ii < 300} {incr ii} { } $res } +do_execsql_test 1.2 { + SELECT rowid FROM x1 WHERE a MATCH '"..."' +} {} + finish_test diff --git a/ext/fts5/test/fts5faultI.test b/ext/fts5/test/fts5faultI.test new file mode 100644 index 000000000..08a6bf056 --- /dev/null +++ b/ext/fts5/test/fts5faultI.test @@ -0,0 +1,237 @@ +# 2010 June 15 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# + +source [file join [file dirname [info script]] fts5_common.tcl] +source $testdir/malloc_common.tcl +set testprefix fts5faultI + +# If SQLITE_ENABLE_FTS5 is not defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +set ::testprefix fts5faultI + +do_execsql_test 1.0 { + PRAGMA encoding = utf16; + CREATE VIRTUAL TABLE t1 USING fts5(x, locale=1); + INSERT INTO t1 VALUES('origintext unicode61 ascii porter trigram'); +} + +faultsim_save_and_close +faultsim_restore_and_reopen + +do_faultsim_test 1 -faults oom* -prep { +} -body { + execsql { + SELECT rowid FROM t1(fts5_locale('en_US', 'origintext')); + } +} -test { + faultsim_test_result {0 1} +} + +do_faultsim_test 2 -faults oom* -prep { + faultsim_restore_and_reopen + execsql { + SELECT * FROM t1('ascii'); + } +} -body { + execsql { + UPDATE t1 SET rowid=rowid+1; + } +} -test { + faultsim_test_result {0 {}} +} + +fts5_aux_test_functions db +do_faultsim_test 3 -faults oom* -prep { +} -body { + execsql { + SELECT fts5_columnlocale(t1, 0) FROM t1('unicode*'); + } +} -test { + faultsim_test_result {0 {{}}} {1 SQLITE_NOMEM} +} + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 4.0 { + CREATE VIRTUAL TABLE w1 USING fts5(a); +} +faultsim_save_and_close + +do_faultsim_test 4 -faults oom* -prep { + faultsim_restore_and_reopen + execsql { + BEGIN; + INSERT INTO w1 VALUES('token token token'); + } +} -body { + execsql { + INSERT INTO w1(w1, rank) VALUES('rank', 'bm25()'); + } +} -test { + faultsim_test_result {0 {}} +} + +do_faultsim_test 5 -faults oom* -prep { + faultsim_restore_and_reopen + execsql { + BEGIN; + INSERT INTO w1 VALUES('one'); + SAVEPOINT one; + INSERT INTO w1 VALUES('two'); + ROLLBACK TO one; + } + +} -body { + execsql { + INSERT INTO w1 VALUES('string'); + } +} -test { + faultsim_test_result {0 {}} +} + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 5.0 { + CREATE VIRTUAL TABLE w1 USING fts5(a); + INSERT INTO w1 VALUES('one two three'); +} +fts5_aux_test_functions db + +do_faultsim_test 5 -faults oom* -prep { +} -body { + execsql { + SELECT fts5_test_insttoken(w1, 0, 0) FROM w1('two'); + } +} -test { + faultsim_test_result {0 two} {1 SQLITE_NOMEM} +} + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 6.0 { + CREATE VIRTUAL TABLE w1 USING fts5(a); + INSERT INTO w1 VALUES('one two three'); +} +fts5_aux_test_functions db +faultsim_save_and_close + +do_faultsim_test 6 -faults oom* -prep { + faultsim_restore_and_reopen + db eval { + BEGIN; + INSERT INTO w1 VALUES('four five six'); + SAVEPOINT abc; + INSERT INTO w1 VALUES('seven eight nine'); + SAVEPOINT def; + INSERT INTO w1 VALUES('ten eleven twelve'); + } +} -body { + execsql { + RELEASE abc; + } +} -test { + faultsim_test_result {0 {}} +} + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 7.0 { + CREATE VIRTUAL TABLE w1 USING fts5(a); + INSERT INTO w1 VALUES('one two three'); + INSERT INTO w1 VALUES('three two one'); + DELETE FROM w1_content WHERE rowid=1; +} + +faultsim_save_and_close + +do_faultsim_test 7 -faults oom* -prep { + faultsim_restore_and_reopen + db eval { SELECT * FROM w1 } +} -body { + execsql { + PRAGMA integrity_check; + } +} -test { +} + +#------------------------------------------------------------------------- +reset_db +fts5_tclnum_register db +fts5_aux_test_functions db + +do_execsql_test 8.0 { + CREATE VIRTUAL TABLE ft USING fts5( + x, tokenize = "tclnum query", detail=columns + ); + INSERT INTO ft VALUES('one two three i ii iii'); + INSERT INTO ft VALUES('four five six iv v vi'); + INSERT INTO ft VALUES('eight nine ten viii ix x'); +} {} + +do_faultsim_test 8.1 -faults oom* -prep { +} -body { + execsql { + SELECT fts5_test_collist (ft) FROM ft('one two'); + } +} -test { + faultsim_test_result {0 {{0.0 1.0}}} {1 {SQL logic error}} {1 SQLITE_NOMEM} +} + +do_faultsim_test 8.2 -faults oom* -prep { +} -body { + execsql { + SELECT rowid FROM ft('one two') ORDER BY rank; + } +} -test { + faultsim_test_result {0 1} {1 {SQL logic error}} {1 SQLITE_NOMEM} +} + +#------------------------------------------------------------------------- +reset_db + +do_execsql_test 9.0 { + CREATE VIRTUAL TABLE ft USING fts5(x); + INSERT INTO ft VALUES('one two three i ii iii'); + INSERT INTO ft VALUES('four five six iv v vi'); + INSERT INTO ft VALUES('eight nine ten viii ix x'); +} {} + +faultsim_save_and_close + +do_faultsim_test 9.1 -faults oom* -prep { + faultsim_restore_and_reopen +} -body { + execsql { + UPDATE ft SET rowid=4 WHERE rowid=1 + } +} -test { + faultsim_test_result {0 {}} +} + +do_faultsim_test 9.2 -faults oom* -prep { + faultsim_restore_and_reopen +} -body { + execsql { + SELECT rowid FROM ft WHERE x MATCH 'one AND two AND three' + } +} -test { + faultsim_test_result {0 1} +} + + + +finish_test + diff --git a/ext/fts5/test/fts5locale.test b/ext/fts5/test/fts5locale.test new file mode 100644 index 000000000..d64df1849 --- /dev/null +++ b/ext/fts5/test/fts5locale.test @@ -0,0 +1,576 @@ +# 2014 Dec 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focusing on the built-in fts5 tokenizers. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5locale + +# If SQLITE_ENABLE_FTS5 is not defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +proc transform_token {locale token} { + switch -- $locale { + reverse { + set ret "" + foreach c [split $token ""] { + set ret "$c$ret" + } + set token $ret + } + + default { + # no-op + } + } + + set token +} + +proc tcl_create {args} { return "tcl_tokenize" } +proc tcl_tokenize {tflags text} { + set iToken 1 + set bSkip 0 + if {[sqlite3_fts5_locale]=="second"} { set bSkip 1 } + foreach {w iStart iEnd} [fts5_tokenize_split $text] { + incr iToken + if {(($iToken) % ($bSkip + 1))} continue + + set w [transform_token [sqlite3_fts5_locale] $w] + sqlite3_fts5_token $w $iStart $iEnd + } +} + +#------------------------------------------------------------------------- +# Check that queries can have a locale attached to them. +# +reset_db +sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a, tokenize=tcl); + INSERT INTO t1 VALUES('abc'); + INSERT INTO t1 VALUES('cba'); +} {} + +do_execsql_test 1.1 { + SELECT rowid, a FROM t1( fts5_locale('en_US', 'abc') ); +} {1 abc} + +do_execsql_test 1.2 { + SELECT rowid, a FROM t1( fts5_locale('reverse', 'abc') ); +} {2 cba} + +#------------------------------------------------------------------------- +# Test that the locale= option exists and seems to accept values. And +# that fts5_locale() values may only be inserted into an internal-content +# table if the locale=1 option was specified. +# +reset_db +sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create + +do_execsql_test 2.1 { + CREATE VIRTUAL TABLE b1 USING fts5(x, y, locale=1, tokenize=tcl); + CREATE VIRTUAL TABLE b2 USING fts5(x, y, locale=0, tokenize=tcl); + + CREATE VIRTUAL TABLE ttt USING fts5vocab('b1', instance); +} + +do_catchsql_test 2.2.1 { + CREATE VIRTUAL TABLE b3 USING fts5(x, y, locale=2); +} {1 {malformed locale=... directive}} +do_catchsql_test 2.2.2 { + CREATE VIRTUAL TABLE b3 USING fts5(x, y, locale=111); +} {1 {malformed locale=... directive}} + +do_catchsql_test 2.3 { + INSERT INTO b1(b1, rank) VALUES('locale', 0); +} {1 {SQL logic error}} + +do_execsql_test 2.4 { + INSERT INTO b1 VALUES('abc', 'one two three'); + INSERT INTO b1 VALUES('def', fts5_locale('reverse', 'four five six')); +} + +do_execsql_test 2.5 { + INSERT INTO b2 VALUES('abc', 'one two three'); +} + +do_catchsql_test 2.6 { + INSERT INTO b2 VALUES('def', fts5_locale('reverse', 'four five six')); +} {1 {fts5_locale() requires locale=1}} + +do_execsql_test 2.7 { SELECT rowid FROM b1('one') } {1} +do_execsql_test 2.8 { SELECT rowid FROM b1('four') } {} +do_execsql_test 2.9 { SELECT rowid FROM b1('ruof') } 2 +do_execsql_test 2.10 { SELECT rowid FROM b1(fts5_locale('reverse', 'five'))} 2 + +do_execsql_test 2.11 { + SELECT x, quote(y) FROM b1 +} { + abc {'one two three'} + def {'four five six'} +} + +do_execsql_test 2.12 { SELECT quote(y) FROM b1('ruof') } { + {'four five six'} +} + +do_execsql_test 2.13 { + INSERT INTO b1(b1) VALUES('integrity-check'); +} +do_execsql_test 2.14 { + INSERT INTO b1(b1) VALUES('rebuild'); +} +do_execsql_test 2.15 { + INSERT INTO b1(b1) VALUES('integrity-check'); +} + +do_execsql_test 2.16 { + DELETE FROM b1 WHERE rowid=2 +} +do_execsql_test 2.17 { + INSERT INTO b1(b1) VALUES('integrity-check'); +} + +do_execsql_test 2.18 { + INSERT INTO b1(rowid, x, y) VALUES( + test_setsubtype(45, 76), 'abc def', 'def abc' + ); + INSERT INTO b1(b1) VALUES('integrity-check'); +} + +#------------------------------------------------------------------------- +# Test the 'delete' command with contentless tables. +# +reset_db +sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create + +do_execsql_test 3.1 { + CREATE VIRTUAL TABLE c1 USING fts5(x, content=, tokenize=tcl, locale=1); + CREATE VIRTUAL TABLE c2 USING fts5vocab('c1', instance); + + INSERT INTO c1 VALUES('hello world'); + INSERT INTO c1 VALUES( fts5_locale('reverse', 'one two three') ); +} + +do_execsql_test 3.2 { + SELECT DISTINCT term FROM c2 ORDER BY 1 +} { + eerht eno hello owt world +} + +do_execsql_test 3.3 { + INSERT INTO c1(c1, rowid, x) + VALUES('delete', 2, fts5_locale('reverse', 'one two three') ); +} + +do_execsql_test 3.4 { + SELECT DISTINCT term FROM c2 ORDER BY 1 +} { + hello world +} + +#------------------------------------------------------------------------- +# Test that an UPDATE that updates a subset of the columns does not +# magically discard the locale from those columns not updated. +# +reset_db +sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create + +do_execsql_test 4.1 { + CREATE VIRTUAL TABLE d1 USING fts5(x, y, locale=1, tokenize=tcl); + CREATE VIRTUAL TABLE d2 USING fts5vocab('d1', instance); + + INSERT INTO d1(rowid, x, y) VALUES(1, 'abc', 'def'); + INSERT INTO d1(rowid, x, y) VALUES(2, 'ghi', fts5_locale('reverse', 'hello')); +} + +do_execsql_test 4.2 { + SELECT DISTINCT term FROM d2 ORDER BY 1 +} { + abc def ghi olleh +} + +do_execsql_test 4.3 { + UPDATE d1 SET x='jkl' WHERE rowid=2; +} + +do_execsql_test 4.4 { + SELECT DISTINCT term FROM d2 ORDER BY 1 +} { + abc def jkl olleh +} + +do_execsql_test 4.5 { + SELECT rowid, * FROM d1 +} { + 1 abc def + 2 jkl hello +} + +do_execsql_test 4.6 { + UPDATE d1 SET rowid=4 WHERE rowid=2 +} + +do_execsql_test 4.7 { + SELECT rowid, * FROM d1 +} { + 1 abc def + 4 jkl hello +} + +fts5_aux_test_functions db + +do_execsql_test 4.8.1 { + SELECT fts5_test_columntext(d1) FROM d1('jkl') +} {{jkl hello}} +do_execsql_test 4.8.2 { + SELECT fts5_test_columntext(d1) FROM d1(fts5_locale('reverse', 'hello')) +} {{jkl hello}} + +do_execsql_test 4.9 { + SELECT fts5_test_columnlocale(d1) FROM d1(fts5_locale('reverse', 'hello')) +} {{{} reverse}} + +do_execsql_test 4.10 { + SELECT fts5_test_columnlocale(d1) FROM d1 +} { + {{} {}} + {{} reverse} +} + +#------------------------------------------------------------------------- +# Test that if an fts5_locale() value is written to an UNINDEXED +# column it is stored as text. This is so that blobs and other values +# can also be stored as is. +# +reset_db +sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create + +do_execsql_test 5.1 { + CREATE VIRTUAL TABLE t1 USING fts5( + x, y UNINDEXED, locale=1, tokenize=tcl + ); + + INSERT INTO t1(rowid, x, y) VALUES(111, + fts5_locale('reverse', 'one two three'), + fts5_locale('reverse', 'four five six') + ); +} + +do_execsql_test 5.2 { + SELECT rowid, x, y FROM t1 +} { + 111 {one two three} {four five six} +} + +do_execsql_test 5.3 { + SELECT typeof(c0), typeof(c1) FROM t1_content +} { + blob text +} + +#------------------------------------------------------------------------- + +foreach {tn opt} { + 1 {} + 2 {, columnsize=0} +} { + reset_db + sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create + do_execsql_test 6.$tn.1 " + CREATE VIRTUAL TABLE y1 USING fts5(t, locale=1, tokenize=tcl $opt); + " + + do_execsql_test 6.$tn.2 { + INSERT INTO y1(rowid, t) VALUES + (1, fts5_locale('second', 'the city of London')), + (2, fts5_locale('second', 'shall have all the old')), + (3, fts5_locale('second', 'Liberties and Customs')), + (4, fts5_locale('second', 'which it hath been used to have')); + } + + fts5_aux_test_functions db + + do_execsql_test 5.$tn.3 { + SELECT fts5_test_columnsize(y1) FROM y1 + } { + 2 3 2 4 + } + + do_execsql_test 5.$tn.4 { + SELECT rowid, fts5_test_columnsize(y1) FROM y1('shall'); + } { + 2 3 + } + + do_execsql_test 5.$tn.5 { + SELECT rowid, fts5_test_columnsize(y1) FROM y1('shall'); + } { + 2 3 + } + + do_execsql_test 5.$tn.6 { + SELECT rowid, fts5_test_columnsize(y1) FROM y1('have'); + } { + 4 4 + } + + do_execsql_test 5.$tn.7 { + SELECT rowid, highlight(y1, 0, '[', ']') FROM y1('have'); + } { + 4 {which it hath been used to [have]} + } + + do_execsql_test 5.$tn.8 { + SELECT rowid, + highlight(y1, 0, '[', ']'), + snippet(y1, 0, '[', ']', '...', 10) + FROM y1('Liberties + Customs'); + } { + 3 {[Liberties and Customs]} + {[Liberties and Customs]} + } +} + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 6.0 { + CREATE VIRTUAL TABLE x1 USING fts5(x); +} +do_catchsql_test 6.1 { + INSERT INTO x1(rowid, x) VALUES(123, fts5_locale('en_AU', 'hello world')); +} {1 {fts5_locale() requires locale=1}} + +do_execsql_test 6.2 { + SELECT typeof( fts5_locale(NULL, 'xyz') ), typeof( fts5_locale('', 'abc') ); +} {text text} + +#-------------------------------------------------------------------------- +# Test that fts5_locale() works with external-content tables. +# +reset_db +sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create + +do_execsql_test 7.1 { + CREATE TABLE t1(ii INTEGER PRIMARY KEY, bb BLOB, tt TEXT, locale TEXT); + CREATE VIEW v1 AS + SELECT ii AS rowid, bb, fts5_locale(locale, tt) AS tt FROM t1; + + CREATE VIRTUAL TABLE ft USING fts5( + bb, tt, locale=1, tokenize=tcl, content=v1 + ); + + INSERT INTO t1 VALUES(1, NULL, 'one two three', NULL); + INSERT INTO t1 VALUES(2, '7800616263', 'four five six', 'reverse'); + INSERT INTO t1 VALUES(3, '000000007800616263', 'seven eight nine', 'second'); +} + +do_execsql_test 7.2 { + INSERT INTO ft(ft) VALUES('rebuild'); + INSERT INTO ft(ft) VALUES('integrity-check'); +} + +do_execsql_test 7.3 { + SELECT rowid, quote(bb), quote(tt) FROM ft +} { + 1 NULL {'one two three'} + 2 '7800616263' {'four five six'} + 3 '000000007800616263' {'seven eight nine'} +} + +do_execsql_test 7.4 { SELECT rowid FROM ft('six'); } +do_execsql_test 7.5 { SELECT rowid FROM ft(fts5_locale('reverse','six')); } 2 + +fts5_aux_test_functions db + +do_execsql_test 7.6 { + SELECT fts5_test_columnlocale(ft) FROM ft; +} { + {{} {}} {{} reverse} {{} second} +} + +#------------------------------------------------------------------------- +# Test that the porter tokenizer works with locales. +# +reset_db +sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create + +do_execsql_test 8.1 { + CREATE VIRTUAL TABLE ft USING fts5(tt, locale=1, tokenize="porter tcl"); + CREATE VIRTUAL TABLE vocab USING fts5vocab('ft', instance); + + INSERT INTO ft(rowid, tt) VALUES + (111, fts5_locale('second', 'the porter tokenizer is a wrapper tokenizer')), + (222, fts5_locale('reverse', 'This value may also be set')); +} + +do_execsql_test 8.1 { + SELECT DISTINCT term FROM vocab ORDER BY 1 +} { + a eb eulav osla sihT te the token yam +} + +#------------------------------------------------------------------------- +# Test that position-lists (used by xInst, xPhraseFirst etc.) work with +# locales and modes other than detail=full. +# +foreach {tn detail} { + 1 detail=full + 2 detail=none + 3 detail=column +} { + reset_db + sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create + do_execsql_test 9.$tn.0 " + CREATE VIRTUAL TABLE ft USING fts5(tt, locale=1, tokenize=tcl, $detail); + " + do_execsql_test 9.$tn.1 { + CREATE VIRTUAL TABLE vocab USING fts5vocab('ft', instance); + INSERT INTO ft(rowid, tt) VALUES + (-1, fts5_locale('second', 'it is an ancient mariner')); + } + + do_execsql_test 9.$tn.2 { + SELECT DISTINCT term FROM vocab + } {an it mariner} + + do_execsql_test 9.$tn.3 { + SELECT highlight(ft, 0, '[', ']') FROM ft('mariner') + } {{it is an ancient [mariner]}} +} + +#------------------------------------------------------------------------- +# Check some corrupt fts5_locale() blob formats are detected. +# +foreach_detail_mode $::testprefix { + + reset_db + sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create + fts5_aux_test_functions db + do_execsql_test 10.1 { + CREATE TABLE x1(ii INTEGER PRIMARY KEY, x); + CREATE VIRTUAL TABLE ft USING fts5(x, + content=x1, content_rowid=ii, locale=1, detail=%DETAIL%, columnsize=0 + ); + + CREATE VIRTUAL TABLE ft2 USING fts5( + x, locale=1, detail=%DETAIL%, columnsize=0 + ); + } + + foreach {tn v} { + 1 X'001122' + 2 X'0011223344' + 3 X'00E0B2EB68656c6c6f' + 4 X'00E0B2EB0068656c6c6f' + } { + do_execsql_test 10.2.$tn.0 { INSERT INTO ft(ft) VALUES('delete-all') } + do_execsql_test 10.2.$tn.1 { DELETE FROM x1; } + do_execsql_test 10.2.$tn.2 " INSERT INTO x1 VALUES(NULL, $v) " + + do_catchsql_test 10.2.$tn.3 { + INSERT INTO ft(ft) VALUES('rebuild'); + } {1 {SQL logic error}} + + do_catchsql_test 10.2.$tn.4 " + SELECT * FROM ft( test_setsubtype($v, 76) ); + " {1 {SQL logic error}} + + do_execsql_test 10.2.$tn.5 { + INSERT INTO ft(rowid, x) VALUES(1, 'hello world'); + } + + if {"%DETAIL%"!="full"} { + do_catchsql_test 10.2.$tn.6 { + SELECT fts5_test_poslist(ft) FROM ft('world'); + } {1 SQLITE_ERROR} + + do_catchsql_test 10.2.$tn.7 { + SELECT fts5_test_columnsize(ft) FROM ft('world'); + } {1 SQLITE_ERROR} + + do_catchsql_test 10.2.$tn.7 { + SELECT fts5_test_columnlocale(ft) FROM ft('world'); + } {1 SQLITE_ERROR} + } + + do_catchsql_test 10.2.$tn.8 { + SELECT * FROM ft('hello') + } {1 {SQL logic error}} + + do_catchsql_test 10.2.$tn.9 { + PRAGMA integrity_check; + } {0 ok} + + do_execsql_test 10.2.$tn.10 { + DELETE FROM x1; + INSERT INTO x1(ii, x) VALUES(1, 'hello world'); + } + + do_catchsql_test 10.2.$tn.11 " + INSERT INTO ft(ft, rowid, x) VALUES('delete', 1, test_setsubtype($v,76) ) + " {1 {SQL logic error}} + + do_catchsql_test 10.2.$tn.12 " + INSERT INTO ft(rowid, x) VALUES(2, test_setsubtype($v,76) ) + " {1 {SQL logic error}} + + do_execsql_test 10.2.$tn.13 { + INSERT INTO ft2(rowid, x) VALUES(1, 'hello world'); + } + do_execsql_test 10.2.$tn.14 "UPDATE ft2_content SET c0=$v" + + do_catchsql_test 10.2.$tn.15 { + PRAGMA integrity_check; + } {1 {SQL logic error}} + + do_execsql_test 10.2.$tn.16 { + DELETE FROM ft2_content; + INSERT INTO ft2(ft2) VALUES('rebuild'); + } + } + +} + +#------------------------------------------------------------------------- +# +reset_db +sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create +fts5_aux_test_functions db +do_execsql_test 11.0 { + CREATE VIRTUAL TABLE x1 USING fts5(abc, locale=1); + INSERT INTO x1(rowid, abc) VALUES(123, fts5_locale('en_US', 'one two three')); +} + +do_catchsql_test 11.1 { + SELECT fts5_columnlocale(x1, -1) FROM x1('two'); +} {1 SQLITE_RANGE} +do_catchsql_test 11.2 { + SELECT fts5_columnlocale(x1, 1) FROM x1('two'); +} {1 SQLITE_RANGE} + +#------------------------------------------------------------------------- +# +reset_db +do_test 12.0 { + list [catch { + sqlite3_fts5_create_tokenizer -v2 -version 3 db tcl tcl_create + } msg] $msg +} {1 {error in fts5_api.xCreateTokenizer_v2()}} + +finish_test + diff --git a/ext/fts5/test/fts5misc.test b/ext/fts5/test/fts5misc.test index abd4fdaf8..534c42fff 100644 --- a/ext/fts5/test/fts5misc.test +++ b/ext/fts5/test/fts5misc.test @@ -21,8 +21,6 @@ ifcapable !fts5 { return } -if 0 { - do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(a); } @@ -37,21 +35,21 @@ do_catchsql_test 1.1.2 { do_catchsql_test 1.2.1 { SELECT highlight(t1, 4, '<b>', '</b>') FROM t1('*id'); -} {0 {{}}} +} {1 {no such cursor: 4}} do_catchsql_test 1.2.2 { SELECT a FROM t1 WHERE rank = (SELECT highlight(t1, 4, '<b>', '</b>') FROM t1('*id')); -} {0 {}} +} {1 {no such cursor: 6}} do_catchsql_test 1.3.1 { SELECT highlight(t1, 4, '<b>', '</b>') FROM t1('*reads'); -} {1 {no such cursor: 2}} +} {1 {no such cursor: 1}} do_catchsql_test 1.3.2 { SELECT a FROM t1 WHERE rank = (SELECT highlight(t1, 4, '<b>', '</b>') FROM t1('*reads')); -} {1 {no such cursor: 2}} +} {1 {no such cursor: 1}} db close sqlite3 db test.db @@ -61,6 +59,11 @@ do_catchsql_test 1.3.3 { WHERE rank = (SELECT highlight(t1, 4, '<b>', '</b>') FROM t1('*reads')); } {1 {no such cursor: 1}} +fts5_aux_test_functions db +do_catchsql_test 1.3.4 { + SELECT fts5_columntext(t1) FROM t1('*reads'); +} {1 {no such cursor: 1}} + #------------------------------------------------------------------------- reset_db do_execsql_test 2.0 { @@ -569,24 +572,98 @@ do_execsql_test 20.5 { } {3 1} #------------------------------------------------------------------------- -} reset_db do_execsql_test 21.0 { + CREATE TABLE t1(ii INTEGER, x TEXT, y TEXT); + CREATE VIRTUAL TABLE xyz USING fts5(content_rowid=ii, content=t1, x, y); + INSERT INTO t1 VALUES(1, 'one', 'i'); + INSERT INTO t1 VALUES(2, 'two', 'ii'); + INSERT INTO t1 VALUES(3, 'tree', 'iii'); + INSERT INTO xyz(xyz) VALUES('rebuild'); +} + +do_execsql_test 21.1 { + UPDATE xyz SET y='TWO' WHERE rowid=2; + UPDATE t1 SET y='TWO' WHERE ii=2; +} + +do_execsql_test 21.2 { + PRAGMA integrity_check +} {ok} + +breakpoint +sqlite3_db_config db DEFENSIVE 1 +do_execsql_test 21.3 { + CREATE TABLE xyz_notashadow(x, y); + DROP TABLE xyz_notashadow; +} +sqlite3_db_config db DEFENSIVE 0 + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 22.0 { + SELECT fts5(NULL); +} {{}} +do_execsql_test 22.1 { + SELECT count(*) FROM ( + SELECT fts5_source_id() + ) +} {1} +execsql_pp { + SELECT fts5_source_id() +} + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 23.0 { + CREATE VIRTUAL TABLE x1 USING fts5(x); + INSERT INTO x1 VALUES('one + two + three'); + INSERT INTO x1 VALUES('one + xyz + three'); + INSERT INTO x1 VALUES('xyz + two + xyz'); +} +do_execsql_test 23.1 { + SELECT rowid FROM x1('one + two + three'); +} {1} + +do_execsql_test 23.2 { + SELECT rowid FROM x1('^".." AND one'); +} {} + +do_execsql_test 23.3 { + SELECT rowid FROM x1('abc NEAR ".." NEAR def'); +} {} + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 24.0 { CREATE VIRTUAL TABLE t1 USING fts5(a, detail='none'); INSERT INTO t1(a) VALUES('a'); } -do_execsql_test 21.2 { +do_execsql_test 24.2 { SELECT rank FROM ( SELECT rank FROM t1('a NOT "" NOT def') ) ORDER BY 1; } {-1e-06} -do_execsql_test 21.3 { +do_execsql_test 24.3 { SELECT rank FROM ( SELECT rank FROM t1('a NOT � NOT def') ) ORDER BY 1; } {-1e-06} -do_execsql_test 21.4 { +do_execsql_test 24.4 { SELECT rank FROM ( SELECT rank FROM t1('a NOT "" NOT def') ); } {-1e-06} +#------------------------------------------------------------------------- +reset_db +fts5_aux_test_functions db + +do_execsql_test 25.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a, detail='none', content=''); + INSERT INTO t1(a) VALUES('a b c'); +} + +do_execsql_test 25.0 { + SELECT fts5_test_poslist(t1) FROM t1('b') ORDER BY rank; +} {{}} + finish_test diff --git a/ext/fts5/test/fts5origintext.test b/ext/fts5/test/fts5origintext.test index cc9d99e2d..8e975fa17 100644 --- a/ext/fts5/test/fts5origintext.test +++ b/ext/fts5/test/fts5origintext.test @@ -261,8 +261,8 @@ do_execsql_test 5.3 { reset_db sqlite3_fts5_register_origintext db do_execsql_test 6.0 { - CREATE VIRTUAL TABLE ft USING fts5( - x, y, tokenize='origintext unicode61', detail=%DETAIL% + CREATE VIRTUAL TABLE ft USING fts5( + x, y, tokenize='origintext unicode61', detail=%DETAIL%, tokendata=0 ); INSERT INTO ft VALUES('One Two', 'Three two'); @@ -291,6 +291,22 @@ do_execsql_test 6.3 { SELECT rowid, tokens(ft) FROM ft('Three*'); } {1 {{}} 2 {{}}} +fts5_aux_test_functions db +do_catchsql_test 6.4 { + SELECT fts5_test_insttoken(ft, -1, 0) FROM ft('one'); +} {1 SQLITE_RANGE} + +do_catchsql_test 6.5 { + SELECT fts5_test_insttoken(ft, 1, 0) FROM ft('one'); +} {1 SQLITE_RANGE} + +do_catchsql_test 6.6 { + CREATE VIRTUAL TABLE ft2 USING fts5(x, tokendata=2); +} {1 {malformed tokendata=... directive}} +do_catchsql_test 6.7 { + CREATE VIRTUAL TABLE ft2 USING fts5(x, content='', tokendata=11); +} {1 {malformed tokendata=... directive}} + } finish_test diff --git a/ext/fts5/test/fts5secure8.test b/ext/fts5/test/fts5secure8.test index 0216bb6ea..8b65b7c59 100644 --- a/ext/fts5/test/fts5secure8.test +++ b/ext/fts5/test/fts5secure8.test @@ -58,6 +58,10 @@ do_execsql_test 2.1 { pragma quick_check; } {ok} +do_catchsql_test 2.2 { + INSERT INTO xyz(xyz, rank) VALUES('secure-delete', 'hello world'); +} {1 {SQL logic error}} + diff --git a/ext/fts5/test/fts5simple.test b/ext/fts5/test/fts5simple.test index 638409506..60ccb5a9c 100644 --- a/ext/fts5/test/fts5simple.test +++ b/ext/fts5/test/fts5simple.test @@ -480,4 +480,33 @@ do_execsql_test 22.0 { do_catchsql_test 22.1 {SELECT * FROM x1('')} {1 {fts5: syntax error near ""}} do_catchsql_test 22.2 {SELECT * FROM x1(NULL)} {1 {fts5: syntax error near ""}} +#------------------------------------------------------------------------- +reset_db +do_execsql_test 23.0 { + CREATE VIRTUAL TABLE x1 USING fts5(x); + SELECT count(*) FROM x1_data; +} {2} + +do_execsql_test 23.1 { + BEGIN; + INSERT INTO x1 VALUES('a b c d'); + INSERT INTO x1 VALUES('a b c d'); + INSERT INTO x1 VALUES('a b c d'); +} + +do_execsql_test 23.2 { + SELECT count(*) FROM x1_data; +} {2} + +do_execsql_test 23.3 { + INSERT INTO x1(x1) VALUES('flush'); + SELECT count(*) FROM x1_data; +} {3} + +do_execsql_test 23.4 { + ROLLBACK; + SELECT count(*) FROM x1_data; +} {2} + + finish_test diff --git a/ext/fts5/test/fts5tokenizer3.test b/ext/fts5/test/fts5tokenizer3.test new file mode 100644 index 000000000..5cdab743c --- /dev/null +++ b/ext/fts5/test/fts5tokenizer3.test @@ -0,0 +1,77 @@ +# 2024 Aug 10 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focusing on the built-in fts5 tokenizers. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5tokenizer3 + +# If SQLITE_ENABLE_FTS5 is not defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + + +proc get_sod {args} { return "split_on_dot" } +proc get_lowercase {args} { return "lowercase" } + +proc lowercase {flags txt} { + set n [string length $txt] + sqlite3_fts5_token [string tolower $txt] 0 $n + return 0 +} + +proc split_on_dot {flags txt} { + set iOff 0 + foreach t [split $txt "."] { + set n [string length $txt] + sqlite3_fts5_token $t $iOff [expr $iOff+$n] + incr iOff [expr {$n+1}] + } + return "" +} + +foreach {tn script} { + 1 { + sqlite3_fts5_create_tokenizer db lowercase get_lowercase + sqlite3_fts5_create_tokenizer -parent lowercase db split_on_dot get_sod + } + 2 { + sqlite3_fts5_create_tokenizer -v2 db lowercase get_lowercase + sqlite3_fts5_create_tokenizer -parent lowercase db split_on_dot get_sod + } + 3 { + sqlite3_fts5_create_tokenizer db lowercase get_lowercase + sqlite3_fts5_create_tokenizer -v2 -parent lowercase db split_on_dot get_sod + } + 4 { + sqlite3_fts5_create_tokenizer -v2 db lowercase get_lowercase + sqlite3_fts5_create_tokenizer -v2 -parent lowercase db split_on_dot get_sod + } +} { + reset_db + eval $script + + do_execsql_test 1.$tn.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize=split_on_dot); + CREATE VIRTUAL TABLE t1vocab USING fts5vocab(t1, instance); + INSERT INTO t1 VALUES('ABC.Def.ghi'); + } + + do_execsql_test 1.$tn.1 { + SELECT term FROM t1vocab ORDER BY 1 + } {abc def ghi} +} + + +finish_test diff --git a/ext/fts5/test/fts5trigram.test b/ext/fts5/test/fts5trigram.test index 752686620..3742c647f 100644 --- a/ext/fts5/test/fts5trigram.test +++ b/ext/fts5/test/fts5trigram.test @@ -56,6 +56,7 @@ foreach {tn like res} { 7 {ABCDEFG%} 1 8 {%รุงเ%} 2 9 {%งเ%} 2 + 10 {%"งเ"%} {} } { do_execsql_test 1.3.$tn { SELECT rowid FROM t1 WHERE y LIKE $like @@ -200,6 +201,12 @@ do_eqp_test 6.3 { do_eqp_test 6.4 { SELECT * FROM ci1 WHERE x GLOB ? } {VIRTUAL TABLE INDEX 0:G0} +do_eqp_test 6.5 { + SELECT * FROM ci1 WHERE x < ? +} {{SCAN ci1 VIRTUAL TABLE INDEX 0:}} +do_eqp_test 6.6 { + SELECT * FROM ci0 WHERE x < ? +} {{SCAN ci0 VIRTUAL TABLE INDEX 0:}} reset_db do_execsql_test 7.0 { @@ -256,4 +263,85 @@ do_execsql_test 8.3 { {[abcde]} } +#------------------------------------------------------------------------- +reset_db +do_execsql_test 9.0 { + CREATE VIRTUAL TABLE t1 USING fts5( + a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, + tokenize=trigram + ); + + INSERT INTO t1(rowid, a12) VALUES(111, 'thats a tricky case though'); + INSERT INTO t1(rowid, a12) VALUES(222, 'the query planner cannot do'); +} + +do_execsql_test 9.1 { + SELECT rowid FROM t1 WHERE a12 LIKE '%tricky%' +} {111} + +do_execsql_test 9.2 { + SELECT rowid FROM t1 WHERE a12 LIKE '%tricky%' AND a12 LIKE '%case%' +} {111} + +do_execsql_test 9.3 { + SELECT rowid FROM t1 WHERE a12 LIKE NULL +} {} + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 10.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a, tokenize=trigram); +} + +do_test 10.1 { + foreach {val} { + "abc \UFFjkl\UFF" + "abc \UFFFjkl\UFFF" + "abc \UFFFFjkl\UFFFF" + "abc \UFFFFFjkl\UFFFFF" + "\UFFjkl\UFF abc" + "\UFFFjkl\UFFF abc" + "\UFFFFjkl\UFFFF abc" + "\UFFFFFjkl\UFFFFF abc" + "\U10001jkl\U10001 abc" + } { + execsql { INSERT INTO t1 VALUES( $val ) } + } +} {} + +do_test 10.2 { + foreach {val} { + X'E18000626320646566' + X'61EDA0806320646566' + X'61EDA0806320646566' + X'61EFBFBE6320646566' + X'76686920E18000626320646566' + X'7668692061EDA0806320646566' + X'7668692061EDA0806320646566' + X'7668692061EFBFBE6320646566' + } { + execsql " INSERT INTO t1 VALUES( $val ) " + } +} {} + +do_test 10.3 { + set a [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0x62}] + set b [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0x62}] + set c [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}] + set d [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}] + execsql { + INSERT INTO t1 VALUES($a); + INSERT INTO t1 VALUES($b); + INSERT INTO t1 VALUES($c); + INSERT INTO t1 VALUES($d); + + INSERT INTO t1 VALUES('abcd' || $a); + INSERT INTO t1 VALUES('abcd' || $b); + INSERT INTO t1 VALUES('abcd' || $c); + INSERT INTO t1 VALUES('abcd' || $d); + } +} {} + + + finish_test diff --git a/ext/fts5/test/fts5unicode2.test b/ext/fts5/test/fts5unicode2.test index 3fc1f673a..7a49a1d83 100644 --- a/ext/fts5/test/fts5unicode2.test +++ b/ext/fts5/test/fts5unicode2.test @@ -470,4 +470,24 @@ do_execsql_test 8.2.3 { SELECT rowid FROM t4 WHERE t4 MATCH 'a' ORDER BY rowid ASC; } {2 4} +#------------------------------------------------------------------------- + +foreach {tn val bErr} { + 1 0 0 + 2 1 0 + 3 2 0 + 4 3 1 + 5 11 1 +} { + reset_db + set aRes(0) {0 {}} + set aRes(1) {1 {error in tokenizer constructor}} + set res $aRes($bErr) + do_catchsql_test 9.1.$tn " + CREATE VIRTUAL TABLE bl USING fts5( + s, tokenize='trigram remove_diacritics $val' + ); + " $res +} + finish_test @@ -1,5 +1,5 @@ -C Fix\sa\sname\sresolution\sissue\swith\sCTEs. -D 2024-08-20T22:44:40.653 +C Refactor\sthe\sSrcItem\sobject\sto\smove\sfields\sassociated\swith\ssubqueries\sout\ninto\sa\sseparate\sobject\snamed\sSubquery.\s\sThis\sreduces\sthe\ssize\sof\sthe\sSrcItem\nobject\sby\sabout\s1/3rd\sand\sprovides\simproved\sperformance. +D 2024-08-20T23:11:28.443 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -89,29 +89,29 @@ F ext/fts3/tool/fts3cov.sh c331d006359456cf6f8f953e37f2b9c7d568f3863f00bb5f7eb87 F ext/fts3/tool/fts3view.c 413c346399159df81f86c4928b7c4a455caab73bfbc8cd68f950f632e5751674 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 -F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6dbd6348ef0cfc324a7 +F ext/fts3/unicode/mkunicode.tcl 63db9624ccf70d4887836c320eda93ab552f21008f3be7ede551eac3ead62baa F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb -F ext/fts5/extract_api_docs.tcl bc3a0ca78be7d3df08e7602c00ca48021ebae40682d75eb001bfdf6e54ffb44e -F ext/fts5/fts5.h 6b49ce6eb2e395e7fd84557b21d32f5de8041f2fada4c617e481e99427e24b6e -F ext/fts5/fts5Int.h 41fb3a2dd40e818cc96c6f4176dbdf2aaa8f57043cfc9a8f2676e7e6a72ad764 -F ext/fts5/fts5_aux.c 4584e88878e54828bf7d4d0d83deedd232ec60628b7731be02bad6adb62304b1 +F ext/fts5/extract_api_docs.tcl 1db7f85f4d84b7b6f33336155d5053fafc3c8debd074422d8003c8f7fa4d0fdb +F ext/fts5/fts5.h c65fc7799a4cd6774628da4fa9408955623e504d7369ab5b89c4413fdfe11eb5 +F ext/fts5/fts5Int.h 26a71a09cefa4ef6b4516b204ed48da3e1380970a19b3482eea7c5d805655360 +F ext/fts5/fts5_aux.c 12cd2512f869217c38b70c31de5b5f741812734fafa80f55b32ea9bbd96e2152 F ext/fts5/fts5_buffer.c 0eec58bff585f1a44ea9147eae5da2447292080ea435957f7488c70673cb6f09 -F ext/fts5/fts5_config.c 68cb87a49215f8e7028000b681df4057c430a4a6afbd676463886da94c9e1c37 -F ext/fts5/fts5_expr.c 4b7734db98393d6f7fbc5c9c71ebcabe70110f7df08f6b136d096a1eaee0f56a +F ext/fts5/fts5_config.c 353d2a0d12678cae6ab5b9ce54aed8dac0825667b69248b5a4ed81cbefc109ea +F ext/fts5/fts5_expr.c 9a56f53700d1860f0ee2f373c2b9074eaf2a7aa0637d0e27a6476de26a3fee33 F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 F ext/fts5/fts5_index.c eb9a0dda3bc6ef969a6be8d2746af56856e67251810ddba08622b45be8477abe -F ext/fts5/fts5_main.c 6ec7a7d005c632d86e510ddfaca56b197a5b20b61848415764b91bd27d1e4f84 -F ext/fts5/fts5_storage.c 1d7e08d4331da2f3f7e78e70eef2ed6a013d91ba16175c651adbc5ad672235aa -F ext/fts5/fts5_tcl.c 5ca3e3e35010d326f5b821a563e4fcde3913e052935f5c2c72c264122a26b48f +F ext/fts5/fts5_main.c 5ea7ab0c9967594e73b7dd0ad737595922a14f175aa4b486dc2992a3e3138b68 +F ext/fts5/fts5_storage.c 9a9b880be12901f1962ae2a5a7e1b74348b3099a1e728764e419f75d98e3e612 +F ext/fts5/fts5_tcl.c 1dcf08028141c40a32634bdcf2d5601622ce4edc48f82ac4ce0cbe0a92a6961d F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee F ext/fts5/fts5_test_tok.c 3cb0a9b508b30d17ef025ccddd26ae3dc8ddffbe76c057616e59a9aa85d36f3b -F ext/fts5/fts5_tokenize.c 63ebe9057ed3f4dfc49944bc4aee3d3b745cc2faff73bc152ed3554ed3bf9cf4 -F ext/fts5/fts5_unicode2.c eca63dbc797f8ff0572e97caf4631389c0ab900d6364861b915bdd4735973f00 +F ext/fts5/fts5_tokenize.c ae9c4fa93174ef06ffc138bd4280a1c37f7e13624d3d2706aad4b80573f23c41 +F ext/fts5/fts5_unicode2.c 6f9b0fb79a8facaed76628ffd4eb9c16d7f2b84b52872784f617cf3422a9b043 F ext/fts5/fts5_varint.c e64d2113f6e1bfee0032972cffc1207b77af63319746951bf1d09885d1dadf80 F ext/fts5/fts5_vocab.c e4830b00809e5da53bc10f93adc59e321407b0f801c7f4167c0e47f5552267e0 F ext/fts5/fts5parse.y eb526940f892ade5693f22ffd6c4f2702543a9059942772526eac1fde256bb05 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba -F ext/fts5/test/fts5_common.tcl bc33c6cc65e5d390f28a68aeeb3a526dadd2c3a947d2466ee1986c1a4002df56 +F ext/fts5/test/fts5_common.tcl c5aa7cf7148b6dcffb5b61520ae18212baf169936af734ab265143f59db328fe F ext/fts5/test/fts5aa.test 015c81b84d53bfcedd77d624202c8b02e9f0cbbb4b51688e3a9c9f90bccbb4ac F ext/fts5/test/fts5ab.test 4bdb619fee409e11417e8827e320b857e42e926a01a0408fc9f143ec870a6ced F ext/fts5/test/fts5ac.test 4a73626de86f3d17c95738034880c4f0de8d54741fb943d819b528373657e59b @@ -119,11 +119,11 @@ F ext/fts5/test/fts5ad.test 058e616612964e61d19f70295f0e6eaedceb4b29b1fbf4f85961 F ext/fts5/test/fts5ae.test 3d49edbd50bb0684199a2e7568aeb30d1d29718f5c0f61751983740fa836d15f F ext/fts5/test/fts5af.test ae81f08b8da4c5f9b3ec1ef538a4ab6b7c278e92fa9058d6dc5d842c5d9771b9 F ext/fts5/test/fts5ag.test 6667807b5d3fbf460892e756763fbe3d87a2fffe345a06514ba010ca6f6641f7 -F ext/fts5/test/fts5ah.test ac327281c8910cf9b85738a2655003fd0c6a8a76189ef34f3d086b8f9e54263b +F ext/fts5/test/fts5ah.test e1f01314b35745a30e1b494b46045b82005d71cae74f1ebd9f1338566b77f9fc F ext/fts5/test/fts5ai.test cbe26d78030998f535bc103f37915350b137a822c71a9db439a077d7666a3539 F ext/fts5/test/fts5aj.test 53c8508dab4acca3e691a4c51eca4b3b018319ab8635e540103d5bbdc91543c9 F ext/fts5/test/fts5ak.test 25e2f8afdcff30d98ca9dee8c5cacca2f26db17501c9401f16d99ee036f70e8d -F ext/fts5/test/fts5al.test 842c50fd9b287e3fa988dfcab436b27c432866f7406a56aaf3c67f316952cc02 +F ext/fts5/test/fts5al.test f0e655606771b2b5dbaf70e7f0044d560257cf3531d5eea40df58d0d7add8c39 F ext/fts5/test/fts5alter.test ebbee06419c2d3cee5ef7ebb5ba6a9996f1aa374035361c0acd37368cc5f64f3 F ext/fts5/test/fts5auto.test 2278de323172ced485d2844cb1357d00036ac1665f27e70fa1a48ce57bf31c2c F ext/fts5/test/fts5aux.test 27210687338133b1e9bc0dd669322fca59fd432439f40b126895e2d7c2f899d6 @@ -132,7 +132,8 @@ F ext/fts5/test/fts5auxdata.test 372549088ff792655f73e62b9dfaf4863ce74f5e604c06c F ext/fts5/test/fts5bigid.test 2860854c2561a57594192b00c33a29f91cb85e25f3d6c03b5c2b8f62708f39dd F ext/fts5/test/fts5bigpl.test 8f09858aab866c33593560e6480b2b6975ae7ff29ca32ad7b77e2da61402f8ef F ext/fts5/test/fts5bigtok.test 541119e616c637caea925a8c028c37c2c29e94383e00aa2f9198d530724b6e36 -F ext/fts5/test/fts5cat.test daba0b80659460b0cb60bd1f40b402478a761fe7ea414c3c94c2be25568cc33a +F ext/fts5/test/fts5blob.test caa33369e93e99ff494cd1103506ae34c5afbc0bcc369ed5e58e135144e33689 +F ext/fts5/test/fts5cat.test bf67dd335f964482ee658287521b81e2b88697b45eb7f73933e15f198ed447cb F ext/fts5/test/fts5circref.test f880dfd0d99f6fb73b88ccacb0927d18e833672fd906cc47d6b4e529419eaa62 F ext/fts5/test/fts5colset.test 544f4998cdbfe06a3123887fc0221612e8aa8192cdaff152872f1aadb10e6897 F ext/fts5/test/fts5columnsize.test 0af91d63985afdf663455d4b572b935238380140d74079eac362760866d3297b @@ -140,14 +141,14 @@ F ext/fts5/test/fts5config.test 017daf10d2642496e97402baa0134de8b5b46b9c37e53c22 F ext/fts5/test/fts5conflict.test bf6030a77dbb1bedfcc42e589ed7980846c995765d77460551e448b56d741244 F ext/fts5/test/fts5connect.test 08030168fc96fc278fa81f28654fb7e90566f33aff269c073e19b3ae9126b2f4 F ext/fts5/test/fts5content.test d5c0c2142e64cb305f0968de70c01f8e59dbc3ecc56520c22e739e5dd99ea3bb -F ext/fts5/test/fts5contentless.test b107465f8cd27dde6313b9c60b61d7158a7753b9c663c5c553695f826bb3c0a5 +F ext/fts5/test/fts5contentless.test 606f063b29ba0f46d4b79aa36cdd1ef4dab5de53eae8c881d731af75a4894aca F ext/fts5/test/fts5contentless2.test 70ffe6c611d8f278240da56734df8a77948f04e2739b358439e9bdcf56ced35f F ext/fts5/test/fts5contentless3.test 75eaae5ad6b284ee447788943974d323228f27cc35a1681da997135cff95bc6a F ext/fts5/test/fts5contentless4.test ec34dc69ef474ca9997dae6d91e072906e0e9a5a4b05ea89964c863833b6eff8 F ext/fts5/test/fts5contentless5.test 40cdcb4fe751672450829c5a96bd32c25fc2f6076279dd2ce5c58ac9a390132a -F ext/fts5/test/fts5corrupt.test a9bda1ded5112ebf1ee85c5381bd1fe8974952e2523cede4d5072804d2011503 +F ext/fts5/test/fts5corrupt.test 6485f721b88ba355ca5d701e7ee87a4efa3ea578d8e6adb26f51ef956c8328bd F ext/fts5/test/fts5corrupt2.test 335911e3f68b9625d850325f9e29a128db3f4276a8c9d4e32134580da8f924c4 -F ext/fts5/test/fts5corrupt3.test b5f35d72af85b1d5a092b3d5e437f7944d142dd0b0c87b928fd0436a0aec6987 +F ext/fts5/test/fts5corrupt3.test 621e9bca3e7299f487e1b29ff4179d9fc9560f5847dfc5b50a16010c9d2a0e5f F ext/fts5/test/fts5corrupt4.test dc08d19f5b8943e95a7778a7d8da592042504faf18dd93f68f7d7a0d7d7dd733 F ext/fts5/test/fts5corrupt5.test 11b47126f5772cc37b67e3e8b2ed05895c4d07c05338bc07e4eea225bfe32c76 F ext/fts5/test/fts5corrupt6.test 2d72db743db7b5d9c9a6d0cfef24d799ed1aa5e8192b66c40e871a37ed9eed06 @@ -159,7 +160,7 @@ F ext/fts5/test/fts5dlidx.test a7c42b0a74dc7c8aa1a46d586e0aadda4b6cc42c24450f8d3 F ext/fts5/test/fts5doclist.test b7cb84758504519746957802db9cd31187bb4e0028b89d9087ba06e26cc4155f F ext/fts5/test/fts5ea.test cefdf66024550fa7920c03395c71ce5046235ed1a1a7a469d79b19e7aad5afb5 F ext/fts5/test/fts5eb.test 401f756fdb77083aeba8b696c1e0ad4d834c39dbd6f17e492bb55a2ad64b4296 -F ext/fts5/test/fts5expr.test 7e1b2d075b63b727a624a378c2c09f94296a93dc4ae968aad67f8d9f3810c266 +F ext/fts5/test/fts5expr.test c7e208813df7a90badc856fde3796da79569b39382e0fdb43042127f3b8e06a7 F ext/fts5/test/fts5fault1.test d28a65caee75db6897c3cf1358c5230d3bb2a3bf7fb31062c19c7e5382b3d2bd F ext/fts5/test/fts5fault2.test 69c8fdbef830cd0d450908d4504d5bb86609e255af99c421c20a0756251fe344 F ext/fts5/test/fts5fault3.test da2f9e3e56ff5740d68ebdd6877c97089e7ed28ddff28a0da87a6afea27e5522 @@ -176,6 +177,7 @@ F ext/fts5/test/fts5faultE.test 844586ce71dab4be85bb86880e87b624d089f851654cd22e F ext/fts5/test/fts5faultF.test 4abef99f86e99d9f0c6460dd68c586a766b6b9f1f660ada55bf2e8266bd1bbc1 F ext/fts5/test/fts5faultG.test 0544411ffcb3e19b42866f757a8a5e0fb8fef3a62c06f61d14deebc571bb7ea9 F ext/fts5/test/fts5faultH.test 2b2b5b8cb1b3fd7679f488c06e22af44107fbc6137eaf45b3e771dc7b149312d +F ext/fts5/test/fts5faultI.test fbc65a64944fb747f6d3fb30628a807d5cce1bca43c11df40e7770ad7a7ed593 F ext/fts5/test/fts5first.test bfd685b96905bf541d99d8644e0a7219d1d833455a08ab64e344071a613b6ba9 F ext/fts5/test/fts5full.test 97d263c1072f4a560929cca31e70f65d2ae232610e17e6affcf7e979df59547b F ext/fts5/test/fts5fuzz1.test 238d8c45f3b81342aa384de3e581ff2fa330bf922a7b69e484bbc06051a1080e @@ -185,10 +187,11 @@ F ext/fts5/test/fts5interrupt.test 20d04204d3e341b104c0c24a41596b6393a3a81eba104 F ext/fts5/test/fts5lastrowid.test f36298a1fb9f988bde060a274a7ce638faa9c38a31400f8d2d27ea9373e0c4a1 F ext/fts5/test/fts5leftjoin.test c0b4cafb9661379e576dc4405c0891d8fcc2782680740513c4d1fc114b43d4ad F ext/fts5/test/fts5limits.test 8ab67cf5d311c124b6ceb0062d0297767176df4572d955fce79fa43004dff01c +F ext/fts5/test/fts5locale.test 79cbd3000ae269de50826f6061c81f7c9fdb21dd9954c0b7f9485263482c539d F ext/fts5/test/fts5matchinfo.test 877520582feb86bbfd95ab780099bcba4526f18ac75ee34979144cf86ba3a5a3 F ext/fts5/test/fts5merge.test 2654df0bcdb2d117c2d38b6aeb0168061be01c643f9e9194b36c43a2970e8082 F ext/fts5/test/fts5merge2.test 3ebad1a59d6ad3fb66eff6523a09e95dc6367cbefb3cd73196801dea0425c8e2 -F ext/fts5/test/fts5misc.test b1682a40061bc58dcc62bbad48938fc5214d2ac6a868a8899c0c3d1930f1115d +F ext/fts5/test/fts5misc.test 60bb2be4a2d83d7a45047c1812781e2e337a27efa539d86356ef7f4acaf08eab F ext/fts5/test/fts5multi.test a15bc91cdb717492e6e1b66fec1c356cb57386b980c7ba5af1915f97fe878581 F ext/fts5/test/fts5multiclient.test 5ff811c028d6108045ffef737f1e9f05028af2458e456c0937c1d1b8dea56d45 F ext/fts5/test/fts5near.test 33d60867581066e5db7016deb5d651628125d7ff4e0233a88175aa5b65874c74 @@ -196,7 +199,7 @@ F ext/fts5/test/fts5onepass.test f9b7d9b2c334900c6542a869760290e2ab5382af8fbd618 F ext/fts5/test/fts5optimize.test 264b9101721c17d06d1d174feb743fda3ddc89fad41dee980fef821428258e47 F ext/fts5/test/fts5optimize2.test 795d4ae5f66a7239cf8d5aef4c2ea96aeb8bcd907bd9be0cfe22064fc71a44ed F ext/fts5/test/fts5optimize3.test 1653029284e10e0715246819893ba30565c4ead0d0fc470adae92c353ea857d3 -F ext/fts5/test/fts5origintext.test 87c34c78f201b1e22ac93ac6bbe6196dde59f0a7266b1aeb938604a0eb9d5552 +F ext/fts5/test/fts5origintext.test 2015f69bc8abd111152a8e66211fd2d45026378001e07c054159aa4f84e6691d F ext/fts5/test/fts5origintext2.test f4505ff79bf7369f2b8b10b9cef7476049d844e20b37f29cad3a8b8d5ac6f9ba F ext/fts5/test/fts5origintext3.test 45c33cf0c91a9ca0e36d298462db3edc7c8fe45fd185649a9dbfd66bb670058b F ext/fts5/test/fts5origintext4.test 0d3ef0a8038f471dbc83001c34fe5f7ae39b571bfc209670771eb28bc0fc50e8 @@ -220,9 +223,9 @@ F ext/fts5/test/fts5secure4.test 0d10a80590c07891478700af7793b232962042677432b98 F ext/fts5/test/fts5secure5.test c07a68ced5951567ac116c22f2d2aafae497e47fe9fcb6a335c22f9c7a4f2c3a F ext/fts5/test/fts5secure6.test 74bf04733cc523bccca519bb03d3b4e2ed6f6e3db7c59bf6be82c88a0ac857fd F ext/fts5/test/fts5secure7.test fd03d0868d64340a1db8615b02e5508fea409de13910114e4f19eaefc120777a -F ext/fts5/test/fts5secure8.test e68c0ac4447f415ff3e4e82531e99548289286f9f3a29c8cd53036113fe28602 +F ext/fts5/test/fts5secure8.test 808ade9d172ed07b24b85c57dd53b6d2b1aba018b4e634d267ce572221de80e0 F ext/fts5/test/fts5securefault.test c34a28c7cd2f31a8b8907563889e1329a97da975c08df2d951422bcef8e2ebc5 -F ext/fts5/test/fts5simple.test 847fb828262328744733847dc76d6b5d4a6bd4c5d9b282cb819f6504340e061a +F ext/fts5/test/fts5simple.test ed7c3815c9fa1c16166258cb98edb2e014c63c7589958d76c5487df0df913d61 F ext/fts5/test/fts5simple2.test d10d963a357b8ec77b99032e4c816459b4dbdb1f6eee25eada7ef3ed245cb2dc F ext/fts5/test/fts5simple3.test 146ec3dc8f5763d6212641c9f0a2f1cba41679353d2add7b963beceb115dc7f4 F ext/fts5/test/fts5synonym.test becc8cea6cfc958a50b30c572c68cbfdf7455971d0fe988202ce67638d2c6cf6 @@ -231,12 +234,13 @@ F ext/fts5/test/fts5tok1.test 1f7817499f5971450d8c4a652114b3d833393c8134e32422d0 F ext/fts5/test/fts5tok2.test dcacb32d4a2a3f0dd3215d4a3987f78ae4be21a2 F ext/fts5/test/fts5tokenizer.test 7937cec672b148223fff8746d21d3e7ed0965fd7caf35ccdc888a005bb452f98 F ext/fts5/test/fts5tokenizer2.test ddb8b10fbe4b84b2a75812671f127774c1d2e3e2bf82d2e0e4f0bb1cd8a2b2d6 -F ext/fts5/test/fts5trigram.test be914555deb8504dde682bd5aa343d00c4da37dfad20709a5bac30d5f97f2ef5 +F ext/fts5/test/fts5tokenizer3.test eea778f7bb7024c3e904e28915f9d53286141671b138722148be22a9c758bdc3 +F ext/fts5/test/fts5trigram.test fb9ee982edd76280ce979905a2251081cd04ae4c470248bd5d391b2d096430ab F ext/fts5/test/fts5trigram2.test 6fde9de7f63a6b4aa18dc731be56dbd6be4e755c9b13dcd55479e200d1df0e61 F ext/fts5/test/fts5ubsan.test 9a2dcf399dc8d0e0de661f0d93884d1d27e5b7f0693cfceb97dd24d818df5dd2 F ext/fts5/test/fts5umlaut.test a42fe2fe6387c40c49ab27ccbd070e1ae38e07f38d05926482cc0bccac9ad602 F ext/fts5/test/fts5unicode.test 41898f7e476e6515cd4b737c02a442cda5a580a74509788aa9072a2074948e0e -F ext/fts5/test/fts5unicode2.test a5c38179b311a188b24376772309389b073c996f52b79bb9ca760a19e62043ea +F ext/fts5/test/fts5unicode2.test 3bbd30152f9f760bf13886e5b1e5ec23ff62f56758ddda5d9c775a6082fb4c7c F ext/fts5/test/fts5unicode3.test f4891a3dac3b49c3d7c0fdb29566e9eb0ecff35263370c89f9661b1952b20818 F ext/fts5/test/fts5unicode4.test 728c8f0caafb05567f524ad313d9f8b780fa45987b8a8df04eff87923c74b4d0 F ext/fts5/test/fts5unindexed.test 168838d2c385e131120bbf5b516d2432a5fabc4caa2259c932e1d49ae209a4ae @@ -706,7 +710,7 @@ F src/build.c c5522b5faf8128227678e194275cefaeb4d063f55dbe70bcff745f1b63a187cf F src/callback.c db3a45e376deff6a16c0058163fe0ae2b73a2945f3f408ca32cf74960b28d490 F src/complete.c a3634ab1e687055cd002e11b8f43eb75c17da23e F src/ctime.c 64e4b1227b4ed123146f0aa2989131d1fbd9b927b11e80c9d58c6a68f9cd5ce3 -F src/date.c 13dd752847afb32ed70510ad7345a5b9c841f51ad904dba5d010f1fa3a6a324e +F src/date.c 89ce1ff20512a7fa5070ba6e7dd5c171148ca7d580955795bf97c79c2456144a F src/dbpage.c 80e46e1df623ec40486da7a5086cb723b0275a6e2a7b01d9f9b5da0f04ba2782 F src/dbstat.c 3b677254d512fcafd4d0b341bf267b38b235ccfddbef24f9154e19360fa22e43 F src/delete.c 444c4d1eaac40103461e3b6f0881846dd3aafc1cec1dd169d3482fa331667da7 @@ -744,7 +748,7 @@ F src/os.h 1ff5ae51d339d0e30d8a9d814f4b8f8e448169304d83a7ed9db66a65732f3e63 F src/os_common.h 6c0eb8dd40ef3e12fe585a13e709710267a258e2c8dd1c40b1948a1d14582e06 F src/os_kv.c 4d39e1f1c180b11162c6dc4aa8ad34053873a639bac6baae23272fc03349986a F src/os_setup.h 6011ad7af5db4e05155f385eb3a9b4470688de6f65d6166b8956e58a3d872107 -F src/os_unix.c 2ea8d3ed496b8d1f9332a9505653424e5464fd797ea9d91f8e2e62f9dd0298d0 +F src/os_unix.c 6e3e4fc75904ff85184091dbab996e6e35c1799e771788961cc3b4fcbe8f852c F src/os_win.c 6ff43bac175bd9ed79e7c0f96840b139f2f51d01689a638fd05128becf94908a F src/os_win.h 7b073010f1451abe501be30d12f6bc599824944a F src/pager.c b08600ebf0db90b6d1e9b8b6577c6fa3877cbe1a100bd0b2899e4c6e9adad4b3 @@ -760,7 +764,7 @@ F src/printf.c 6a87534ebfb9e5346011191b1f3a7ebc457f5938c7e4feeea478ecf53f6a41b2 F src/random.c 606b00941a1d7dd09c381d3279a058d771f406c5213c9932bbd93d5587be4b9c F src/resolve.c 9afed5fd7b9111633bdb74a73cdc47324e28e4dc6c27113e3e9aee38fb9422ab F src/rowset.c 8432130e6c344b3401a8874c3cb49fefe6873fec593294de077afea2dce5ec97 -F src/select.c c1c28650d3ea5dc0670dd658600b963c29a5c31b685941a5df583b02631d04ff +F src/select.c 44d135bbea93872a7318f048d9d6e566b1c3eaa92d6dabe06e4741aa78d1c1ec F src/shell.c.in 94571558b0fb28c37a5cf6dbd6ea27285341023a28a8cb5795cd2768fab67704 F src/sqlite.h.in 1ad9110150773c38ebababbad11b5cb361bcd3997676dec1c91ac5e0416a7b86 F src/sqlite3.rc 5121c9e10c3964d5755191c80dd1180c122fc3a8 @@ -849,7 +853,7 @@ F src/walker.c d5006d6b005e4ea7302ad390957a8d41ed83faa177e412f89bc5600a7462a014 F src/where.c b6f79b189fcba459bb80420d3b4102f42a399be36ff29a3deff4ae4888fee46d F src/whereInt.h 6444b888ce395cb80511284b8a73b63472d34247fcb1b125ee06a54fa6ae878e F src/wherecode.c 137797b0de9ddf1ff43e5b0edffcc76fb05184ed651fc4f5a0a01a45c0b89d04 -F src/whereexpr.c 6a72cf607548765a262f216e87373fd675a4646f9cc4278fc519b66cf03dbc13 +F src/whereexpr.c 44f41ae554c7572e1de1485b3169b233ee04d464b2ee5881687ede3bf07cacfa F src/window.c 499d48f315a09242dc68f2fac635ed27dcf6bbb0d9ab9084857898c64489e975 F test/8_3_names.test ebbb5cd36741350040fd28b432ceadf495be25b2 F test/affinity2.test ce1aafc86e110685b324e9a763eab4f2a73f737842ec3b687bd965867de90627 @@ -1060,6 +1064,7 @@ F test/date.test c8ff835023f2107b57ce7a45c92265d51c98a23fc93231e998f12d850831aad F test/date2.test 7e12ec14aaf4d5e6294b4ba140445b0eca06ea50062a9c3a69c4ee13d0b6f8b1 F test/date3.test a1b77abf05c6772fe5ca2337cac1398892f2a41e62bce7e6be0f4a08a0e64ae5 F test/date4.test 75dc8401e8c0639a228cd26a6eaa4ff5ea8ccda912b9853d1c9462c476670e17 +F test/date5.test 14ba189bc4d03efc371dd5302e035764f6633355a3e13acb4a45e7b33530231e F test/dbdata.test 042f49acff3438f940eeba5868d3af080ae64ddf26ae78f80c92bec3ca7d8603 F test/dbfuzz.c 73047c920d6210e5912c87cdffd9a1c281d4252e F test/dbfuzz001.test 6c9a4622029d69dc38926f115864b055cb2f39badd25ec22cbfb130c8ba8e9c3 @@ -2205,8 +2210,8 @@ F vsixtest/vsixtest.tcl 6195aba1f12a5e10efc2b8c0009532167be5e301abe5b31385638080 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P c1bbed19f4348ec31c3cb4723eaa8f1554bb58ef4de7feb3a0f39612785f13d5 -R 1c80ace1a0877505f5ca5ebb5b98295b +P 3399698376761ab8c422f8ea02bfa2759afb606f08bedbd1cf7eee834229a9aa 4fa8235dd59cd683d6c6c97bfe181a9637be7c054d435323c903b9dbd74aff02 +R 82a6023b6446e680173d0de603e0e98e U drh -Z ef82496d97c0311a270f63ff1146b092 +Z cc2792cc7e3d54684569210f9c97c9df # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 53632b347..c96f75886 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -4fa8235dd59cd683d6c6c97bfe181a9637be7c054d435323c903b9dbd74aff02 +484bcd75bc95491d8540c791c1c4d40d996cb465839564662e14f98739699bf1 diff --git a/src/date.c b/src/date.c index 8a609ae3c..8c48a81fa 100644 --- a/src/date.c +++ b/src/date.c @@ -271,8 +271,8 @@ static void computeJD(DateTime *p){ Y--; M += 12; } - A = Y/100; - B = 2 - A + (A/4); + A = (Y+4800)/100; + B = 38 - A + (A/4); X1 = 36525*(Y+4716)/100; X2 = 306001*(M+1)/10000; p->iJD = (sqlite3_int64)((X1 + X2 + D + B - 1524.5 ) * 86400000); @@ -456,7 +456,7 @@ static int validJulianDay(sqlite3_int64 iJD){ ** Compute the Year, Month, and Day from the julian day number. */ static void computeYMD(DateTime *p){ - int Z, A, B, C, D, E, X1; + int Z, alpha, A, B, C, D, E, X1; if( p->validYMD ) return; if( !p->validJD ){ p->Y = 2000; @@ -467,8 +467,8 @@ static void computeYMD(DateTime *p){ return; }else{ Z = (int)((p->iJD + 43200000)/86400000); - A = (int)((Z - 1867216.25)/36524.25); - A = Z + 1 + A - (A/4); + alpha = (int)((Z + 32044.75)/36524.25) - 52; + A = Z + 1 + alpha - ((alpha+100)/4) + 25; B = A + 1524; C = (int)((B - 122.1)/365.25); D = (36525*(C&32767))/100; diff --git a/src/os_unix.c b/src/os_unix.c index c94c0c111..5d1dc9ac6 100644 --- a/src/os_unix.c +++ b/src/os_unix.c @@ -322,7 +322,7 @@ static pid_t randomnessPid = 0; #define UNIXFILE_EXCL 0x01 /* Connections from one process only */ #define UNIXFILE_RDONLY 0x02 /* Connection is read only */ #define UNIXFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */ -#ifndef SQLITE_DISABLE_DIRSYNC +#if !defined(SQLITE_DISABLE_DIRSYNC) && !defined(_AIX) # define UNIXFILE_DIRSYNC 0x08 /* Directory sync needed */ #else # define UNIXFILE_DIRSYNC 0x00 diff --git a/src/select.c b/src/select.c index b76e42b74..87bf7fac2 100644 --- a/src/select.c +++ b/src/select.c @@ -6784,6 +6784,7 @@ static void finalizeAggFunctions(Parse *pParse, AggInfo *pAggInfo){ for(i=0, pF=pAggInfo->aFunc; i<pAggInfo->nFunc; i++, pF++){ ExprList *pList; assert( ExprUseXList(pF->pFExpr) ); + if( pParse->nErr ) return; pList = pF->pFExpr->x.pList; if( pF->iOBTab>=0 ){ /* For an ORDER BY aggregate, calls to OP_AggStep were deferred. Inputs @@ -6993,6 +6994,7 @@ static void updateAccumulator( if( addrNext ){ sqlite3VdbeResolveLabel(v, addrNext); } + if( pParse->nErr ) return; } if( regHit==0 && pAggInfo->nAccumulator ){ regHit = regAcc; @@ -7002,6 +7004,7 @@ static void updateAccumulator( } for(i=0, pC=pAggInfo->aCol; i<pAggInfo->nAccumulator; i++, pC++){ sqlite3ExprCode(pParse, pC->pCExpr, AggInfoColumnReg(pAggInfo,i)); + if( pParse->nErr ) return; } pAggInfo->directMode = 0; diff --git a/src/whereexpr.c b/src/whereexpr.c index ae26e85d2..7ea2956a7 100644 --- a/src/whereexpr.c +++ b/src/whereexpr.c @@ -220,11 +220,20 @@ static int isLikeOrGlob( } if( z ){ - /* Count the number of prefix characters prior to the first wildcard */ + /* Count the number of prefix characters prior to the first wildcard. + ** If the underlying database has a UTF16LE encoding, then only consider + ** ASCII characters. Note that the encoding of z[] is UTF8 - we are + ** dealing with only UTF8 here in this code, but the database engine + ** itself might be processing content using a different encoding. */ cnt = 0; while( (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2] ){ cnt++; - if( c==wc[3] && z[cnt]!=0 ) cnt++; + if( c==wc[3] && z[cnt]!=0 ){ + cnt++; + }else if( c>=0x80 && ENC(db)==SQLITE_UTF16LE ){ + cnt--; + break; + } } /* The optimization is possible only if (1) the pattern does not begin @@ -239,7 +248,7 @@ static int isLikeOrGlob( Expr *pPrefix; /* A "complete" match if the pattern ends with "*" or "%" */ - *pisComplete = c==wc[0] && z[cnt+1]==0; + *pisComplete = c==wc[0] && z[cnt+1]==0 && ENC(db)!=SQLITE_UTF16LE; /* Get the pattern prefix. Remove all escapes from the prefix. */ pPrefix = sqlite3Expr(db, TK_STRING, (char*)z); diff --git a/test/date5.test b/test/date5.test new file mode 100644 index 000000000..688f84d0f --- /dev/null +++ b/test/date5.test @@ -0,0 +1,86 @@ +# 2024-08-19 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# https://sqlite.org/forum/forumpost/eaa0a09786c6368b +# +# Apparently SQLite has been miscomputing leap-year dates before +# the year 0400. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +# Skip this whole file if date and time functions are omitted +# at compile-time +# +ifcapable {!datetime} { + finish_test + return +} + +# Data sources: +# 1-10 https://ssd.jpl.nasa.gov/tools/jdc/#/cd +# 11 Jean Meeus, Astronomical Algorithms, ISBN 0-943396-61-1, p.59 +# 12 https://en.wikipedia.org/wiki/Julian_day +# +# ID YEAR MONTH DAY JD +set date5data { + 1 2024 2 29 2460369.5 + 2 2024 3 1 2460370.5 + 3 2023 2 28 2460003.5 + 4 2023 3 1 2460004.5 + 5 2000 2 29 2451603.5 + 6 2000 3 1 2451604.5 + 7 1900 2 28 2415078.5 + 8 1900 3 1 2415079.5 + 9 1712 2 29 2346413.5 + 10 1712 3 1 2346414.5 + 11 1977 4 26 2443259.5 + 12 2013 1 1 2456293.5 +} + +foreach {id y m d jd} $date5data { + set date [format %04d-%02d-%02d $y $m $d] + do_execsql_test date5-jd$jd { + SELECT date($::jd); + } $date + do_execsql_test date5-cal/$date { + SELECT julianday($::date); + } $jd + for {set i 1} {$y+400*$i<=9999} {incr i} { + set y2 [expr {$y+400*$i}] + set date2 [format %04d-%02d-%02d $y2 $m $d] + set jd2 [expr {$jd+146097*$i}] + do_execsql_test date5-jd$jd2 { + SELECT date($::jd2); + } $date2 + do_execsql_test date5-cal/$date2 { + SELECT julianday($::date2); + } $jd2 + } + for {set i 1} {$y-400*$i>=-4712} {incr i} { + set y2 [expr {$y-400*$i}] + if {$y2<0} { + set date2 [format -%04d-%02d-%02d [expr {-$y2}] $m $d] + } else { + set date2 [format %04d-%02d-%02d $y2 $m $d] + } + set jd2 [expr {$jd-146097*$i}] + do_execsql_test date5-jd$jd2 { + SELECT date($::jd2); + } $date2 + do_execsql_test date5-cal/$date2 { + SELECT julianday($::date2); + } $jd2 + } +} + +finish_test |