aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ext/fts3/unicode/mkunicode.tcl3
-rw-r--r--ext/fts5/extract_api_docs.tcl5
-rw-r--r--ext/fts5/fts5.h118
-rw-r--r--ext/fts5/fts5Int.h29
-rw-r--r--ext/fts5/fts5_aux.c28
-rw-r--r--ext/fts5/fts5_config.c62
-rw-r--r--ext/fts5/fts5_expr.c18
-rw-r--r--ext/fts5/fts5_main.c926
-rw-r--r--ext/fts5/fts5_storage.c281
-rw-r--r--ext/fts5/fts5_tcl.c264
-rw-r--r--ext/fts5/fts5_tokenize.c101
-rw-r--r--ext/fts5/fts5_unicode2.c3
-rw-r--r--ext/fts5/test/fts5_common.tcl18
-rw-r--r--ext/fts5/test/fts5ah.test11
-rw-r--r--ext/fts5/test/fts5al.test10
-rw-r--r--ext/fts5/test/fts5blob.test166
-rw-r--r--ext/fts5/test/fts5cat.test17
-rw-r--r--ext/fts5/test/fts5contentless.test20
-rw-r--r--ext/fts5/test/fts5corrupt.test21
-rw-r--r--ext/fts5/test/fts5corrupt3.test154
-rw-r--r--ext/fts5/test/fts5expr.test4
-rw-r--r--ext/fts5/test/fts5faultI.test237
-rw-r--r--ext/fts5/test/fts5locale.test576
-rw-r--r--ext/fts5/test/fts5misc.test97
-rw-r--r--ext/fts5/test/fts5origintext.test20
-rw-r--r--ext/fts5/test/fts5secure8.test4
-rw-r--r--ext/fts5/test/fts5simple.test29
-rw-r--r--ext/fts5/test/fts5tokenizer3.test77
-rw-r--r--ext/fts5/test/fts5trigram.test88
-rw-r--r--ext/fts5/test/fts5unicode2.test20
-rw-r--r--manifest75
-rw-r--r--manifest.uuid2
-rw-r--r--src/date.c10
-rw-r--r--src/os_unix.c2
-rw-r--r--src/select.c3
-rw-r--r--src/whereexpr.c15
-rw-r--r--test/date5.test86
37 files changed, 3269 insertions, 331 deletions
diff --git a/ext/fts3/unicode/mkunicode.tcl b/ext/fts3/unicode/mkunicode.tcl
index 58d90c68c..1306629da 100644
--- a/ext/fts3/unicode/mkunicode.tcl
+++ b/ext/fts3/unicode/mkunicode.tcl
@@ -628,6 +628,9 @@ proc print_categories {lMap} {
$caseP
$caseS
$caseZ
+
+ default:
+ return 1;
}
return 0;
}
diff --git a/ext/fts5/extract_api_docs.tcl b/ext/fts5/extract_api_docs.tcl
index 6762a036d..634dc70cb 100644
--- a/ext/fts5/extract_api_docs.tcl
+++ b/ext/fts5/extract_api_docs.tcl
@@ -108,8 +108,11 @@ proc get_tokenizer_docs {data} {
append res "<dt><b>$line</b></dt><dd><p style=margin-top:0>\n"
continue
}
+ if {[regexp {FTS5_TOKENIZER} $line]} {
+ set line </dl><p>
+ }
if {[regexp {SYNONYM SUPPORT} $line]} {
- set line "</dl><h3>Synonym Support</h3>"
+ set line "<h3>Synonym Support</h3>"
}
if {[string trim $line] == ""} {
append res "<p>\n"
diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h
index d3042fcb8..682a8da38 100644
--- a/ext/fts5/fts5.h
+++ b/ext/fts5/fts5.h
@@ -308,9 +308,32 @@ struct Fts5PhraseIter {
**
** This API can be quite slow if used with an FTS5 table created with the
** "detail=none" or "detail=column" option.
+**
+** xColumnLocale(pFts5, iIdx, pzLocale, pnLocale)
+** If parameter iCol is less than zero, or greater than or equal to the
+** number of columns in the table, SQLITE_RANGE is returned.
+**
+** Otherwise, this function attempts to retrieve the locale associated
+** with column iCol of the current row. Usually, there is no associated
+** locale, and output parameters (*pzLocale) and (*pnLocale) are set
+** to NULL and 0, respectively. However, if the fts5_locale() function
+** was used to associate a locale with the value when it was inserted
+** into the fts5 table, then (*pzLocale) is set to point to a nul-terminated
+** buffer containing the name of the locale in utf-8 encoding. (*pnLocale)
+** is set to the size in bytes of the buffer, not including the
+** nul-terminator.
+**
+** If successful, SQLITE_OK is returned. Or, if an error occurs, an
+** SQLite error code is returned. The final value of the output parameters
+** is undefined in this case.
+**
+** xTokenize_v2:
+** Tokenize text using the tokenizer belonging to the FTS5 table. This
+** API is the same as the xTokenize() API, except that it allows a tokenizer
+** locale to be specified.
*/
struct Fts5ExtensionApi {
- int iVersion; /* Currently always set to 3 */
+ int iVersion; /* Currently always set to 4 */
void *(*xUserData)(Fts5Context*);
@@ -352,6 +375,15 @@ struct Fts5ExtensionApi {
const char **ppToken, int *pnToken
);
int (*xInstToken)(Fts5Context*, int iIdx, int iToken, const char**, int*);
+
+ /* Below this point are iVersion>=4 only */
+ int (*xColumnLocale)(Fts5Context*, int iCol, const char **pz, int *pn);
+ int (*xTokenize_v2)(Fts5Context*,
+ const char *pText, int nText, /* Text to tokenize */
+ const char *pLoc, int nLoc, /* Locale to pass to tokenizer */
+ void *pCtx, /* Context passed to xToken() */
+ int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
+ );
};
/*
@@ -364,6 +396,7 @@ struct Fts5ExtensionApi {
** Applications may also register custom tokenizer types. A tokenizer
** is registered by providing fts5 with a populated instance of the
** following structure. All structure methods must be defined, setting
+**
** any member of the fts5_tokenizer struct to NULL leads to undefined
** behaviour. The structure methods are expected to function as follows:
**
@@ -372,7 +405,7 @@ struct Fts5ExtensionApi {
** A tokenizer instance is required to actually tokenize text.
**
** The first argument passed to this function is a copy of the (void*)
-** pointer provided by the application when the fts5_tokenizer object
+** pointer provided by the application when the fts5_tokenizer_v2 object
** was registered with FTS5 (the third argument to xCreateTokenizer()).
** The second and third arguments are an array of nul-terminated strings
** containing the tokenizer arguments, if any, specified following the
@@ -396,7 +429,7 @@ struct Fts5ExtensionApi {
** argument passed to this function is a pointer to an Fts5Tokenizer object
** returned by an earlier call to xCreate().
**
-** The second argument indicates the reason that FTS5 is requesting
+** The third argument indicates the reason that FTS5 is requesting
** tokenization of the supplied text. This is always one of the following
** four values:
**
@@ -420,6 +453,13 @@ struct Fts5ExtensionApi {
** on a columnsize=0 database.
** </ul>
**
+** The sixth and seventh arguments passed to xTokenize() - pLocale and
+** nLocale - are a pointer to a buffer containing the locale to use for
+** tokenization (e.g. "en_US") and its size in bytes, respectively. The
+** pLocale buffer is not nul-terminated. pLocale may be passed NULL (in
+** which case nLocale is always 0) to indicate that the tokenizer should
+** use its default locale.
+**
** For each token in the input string, the supplied callback xToken() must
** be invoked. The first argument to it should be a copy of the pointer
** passed as the second argument to xTokenize(). The third and fourth
@@ -443,6 +483,29 @@ struct Fts5ExtensionApi {
** may abandon the tokenization and return any error code other than
** SQLITE_OK or SQLITE_DONE.
**
+** If the tokenizer is registered using an fts5_tokenizer_v2 object,
+** then the xTokenize() method has two additional arguments - pLocale
+** and nLocale. These specify the locale that the tokenizer should use
+** for the current request. If pLocale and nLocale are both 0, then the
+** tokenizer should use its default locale. Otherwise, pLocale points to
+** an nLocale byte buffer containing the name of the locale to use as utf-8
+** text. pLocale is not nul-terminated.
+**
+** FTS5_TOKENIZER
+**
+** There is also an fts5_tokenizer object. This is an older version of
+** fts5_tokenizer_v2. It is similar except that:
+**
+** <ul>
+** <li> There is no "iVersion" field, and
+** <li> The xTokenize() method does not take a locale argument.
+** </ul>
+**
+** fts5_tokenizer tokenizers should be registered with the xCreateTokenizer()
+** function, instead of xCreateTokenizer_v2(). Tokenizers implementations
+** registered using either API may be retrieved using both xFindTokenizer()
+** and xFindTokenizer_v2().
+**
** SYNONYM SUPPORT
**
** Custom tokenizers may also support synonyms. Consider a case in which a
@@ -551,6 +614,33 @@ struct Fts5ExtensionApi {
** inefficient.
*/
typedef struct Fts5Tokenizer Fts5Tokenizer;
+typedef struct fts5_tokenizer_v2 fts5_tokenizer_v2;
+struct fts5_tokenizer_v2 {
+ int iVersion; /* Currently always 2 */
+
+ int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
+ void (*xDelete)(Fts5Tokenizer*);
+ int (*xTokenize)(Fts5Tokenizer*,
+ void *pCtx,
+ int flags, /* Mask of FTS5_TOKENIZE_* flags */
+ const char *pText, int nText,
+ const char *pLocale, int nLocale,
+ int (*xToken)(
+ void *pCtx, /* Copy of 2nd argument to xTokenize() */
+ int tflags, /* Mask of FTS5_TOKEN_* flags */
+ const char *pToken, /* Pointer to buffer containing token */
+ int nToken, /* Size of token in bytes */
+ int iStart, /* Byte offset of token within input text */
+ int iEnd /* Byte offset of end of token within input text */
+ )
+ );
+};
+
+/*
+** New code should use the fts5_tokenizer_v2 type to define tokenizer
+** implementations. The following type is included for legacy applications
+** that still use it.
+*/
typedef struct fts5_tokenizer fts5_tokenizer;
struct fts5_tokenizer {
int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
@@ -570,6 +660,7 @@ struct fts5_tokenizer {
);
};
+
/* Flags that may be passed as the third argument to xTokenize() */
#define FTS5_TOKENIZE_QUERY 0x0001
#define FTS5_TOKENIZE_PREFIX 0x0002
@@ -589,7 +680,7 @@ struct fts5_tokenizer {
*/
typedef struct fts5_api fts5_api;
struct fts5_api {
- int iVersion; /* Currently always set to 2 */
+ int iVersion; /* Currently always set to 3 */
/* Create a new tokenizer */
int (*xCreateTokenizer)(
@@ -616,6 +707,25 @@ struct fts5_api {
fts5_extension_function xFunction,
void (*xDestroy)(void*)
);
+
+ /* APIs below this point are only available if iVersion>=3 */
+
+ /* Create a new tokenizer */
+ int (*xCreateTokenizer_v2)(
+ fts5_api *pApi,
+ const char *zName,
+ void *pUserData,
+ fts5_tokenizer_v2 *pTokenizer,
+ void (*xDestroy)(void*)
+ );
+
+ /* Find an existing tokenizer */
+ int (*xFindTokenizer_v2)(
+ fts5_api *pApi,
+ const char *zName,
+ void **ppUserData,
+ fts5_tokenizer_v2 **ppTokenizer
+ );
};
/*
diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h
index 4311faceb..7e4111957 100644
--- a/ext/fts5/fts5Int.h
+++ b/ext/fts5/fts5Int.h
@@ -162,10 +162,13 @@ typedef struct Fts5TokenizerConfig Fts5TokenizerConfig;
struct Fts5TokenizerConfig {
Fts5Tokenizer *pTok;
- fts5_tokenizer *pTokApi;
+ fts5_tokenizer_v2 *pApi2;
+ fts5_tokenizer *pApi1;
const char **azArg;
int nArg;
int ePattern; /* FTS_PATTERN_XXX constant */
+ const char *pLocale; /* Current locale to use */
+ int nLocale; /* Size of pLocale in bytes */
};
/*
@@ -206,6 +209,8 @@ struct Fts5TokenizerConfig {
**
** INSERT INTO tbl(tbl, rank) VALUES('prefix-index', $bPrefixIndex);
**
+** bLocale:
+** Set to true if locale=1 was specified when the table was created.
*/
struct Fts5Config {
sqlite3 *db; /* Database handle */
@@ -223,10 +228,12 @@ struct Fts5Config {
char *zContentRowid; /* "content_rowid=" option value */
int bColumnsize; /* "columnsize=" option value (dflt==1) */
int bTokendata; /* "tokendata=" option value (dflt==0) */
+ int bLocale; /* "locale=" option value (dflt==0) */
int eDetail; /* FTS5_DETAIL_XXX value */
char *zContentExprlist;
Fts5TokenizerConfig t;
int bLock; /* True when table is preparing statement */
+
/* Values loaded from the %_config table */
int iVersion; /* fts5 file format 'version' */
@@ -292,6 +299,8 @@ int sqlite3Fts5ConfigSetValue(Fts5Config*, const char*, sqlite3_value*, int*);
int sqlite3Fts5ConfigParseRank(const char*, char**, char**);
+void sqlite3Fts5ConfigErrmsg(Fts5Config *pConfig, const char *zFmt, ...);
+
/*
** End of interface to code in fts5_config.c.
**************************************************************************/
@@ -336,7 +345,7 @@ char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...);
void sqlite3Fts5Put32(u8*, int);
int sqlite3Fts5Get32(const u8*);
-#define FTS5_POS2COLUMN(iPos) (int)(iPos >> 32)
+#define FTS5_POS2COLUMN(iPos) (int)((iPos >> 32) & 0x7FFFFFFF)
#define FTS5_POS2OFFSET(iPos) (int)(iPos & 0x7FFFFFFF)
typedef struct Fts5PoslistReader Fts5PoslistReader;
@@ -627,6 +636,17 @@ Fts5Table *sqlite3Fts5TableFromCsrid(Fts5Global*, i64);
int sqlite3Fts5FlushToDisk(Fts5Table*);
+int sqlite3Fts5ExtractText(
+ Fts5Config *pConfig,
+ sqlite3_value *pVal, /* Value to extract text from */
+ int bContent, /* Loaded from content table */
+ int *pbResetTokenizer, /* OUT: True if ClearLocale() required */
+ const char **ppText, /* OUT: Pointer to text buffer */
+ int *pnText /* OUT: Size of (*ppText) in bytes */
+);
+
+void sqlite3Fts5ClearLocale(Fts5Config *pConfig);
+
/*
** End of interface to code in fts5.c.
**************************************************************************/
@@ -706,7 +726,7 @@ int sqlite3Fts5StorageRename(Fts5Storage*, const char *zName);
int sqlite3Fts5DropAll(Fts5Config*);
int sqlite3Fts5CreateTable(Fts5Config*, const char*, const char*, int, char **);
-int sqlite3Fts5StorageDelete(Fts5Storage *p, i64, sqlite3_value**);
+int sqlite3Fts5StorageDelete(Fts5Storage *p, i64, sqlite3_value**, int);
int sqlite3Fts5StorageContentInsert(Fts5Storage *p, sqlite3_value**, i64*);
int sqlite3Fts5StorageIndexInsert(Fts5Storage *p, sqlite3_value**, i64);
@@ -732,6 +752,9 @@ int sqlite3Fts5StorageOptimize(Fts5Storage *p);
int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge);
int sqlite3Fts5StorageReset(Fts5Storage *p);
+void sqlite3Fts5StorageReleaseDeleteRow(Fts5Storage*);
+int sqlite3Fts5StorageFindDeleteRow(Fts5Storage *p, i64 iDel);
+
/*
** End of interface to code in fts5_storage.c.
**************************************************************************/
diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c
index 30101fbe2..eb3f7e359 100644
--- a/ext/fts5/fts5_aux.c
+++ b/ext/fts5/fts5_aux.c
@@ -226,6 +226,7 @@ static int fts5HighlightCb(
return rc;
}
+
/*
** Implementation of highlight() function.
*/
@@ -256,12 +257,19 @@ static void fts5HighlightFunction(
sqlite3_result_text(pCtx, "", -1, SQLITE_STATIC);
rc = SQLITE_OK;
}else if( ctx.zIn ){
+ const char *pLoc = 0; /* Locale of column iCol */
+ int nLoc = 0; /* Size of pLoc in bytes */
if( rc==SQLITE_OK ){
rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter);
}
if( rc==SQLITE_OK ){
- rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb);
+ rc = pApi->xColumnLocale(pFts, iCol, &pLoc, &nLoc);
+ }
+ if( rc==SQLITE_OK ){
+ rc = pApi->xTokenize_v2(
+ pFts, ctx.zIn, ctx.nIn, pLoc, nLoc, (void*)&ctx, fts5HighlightCb
+ );
}
if( ctx.bOpen ){
fts5HighlightAppend(&rc, &ctx, ctx.zClose, -1);
@@ -458,6 +466,8 @@ static void fts5SnippetFunction(
memset(&sFinder, 0, sizeof(Fts5SFinder));
for(i=0; i<nCol; i++){
if( iCol<0 || iCol==i ){
+ const char *pLoc = 0; /* Locale of column iCol */
+ int nLoc = 0; /* Size of pLoc in bytes */
int nDoc;
int nDocsize;
int ii;
@@ -465,8 +475,10 @@ static void fts5SnippetFunction(
sFinder.nFirst = 0;
rc = pApi->xColumnText(pFts, i, &sFinder.zDoc, &nDoc);
if( rc!=SQLITE_OK ) break;
- rc = pApi->xTokenize(pFts,
- sFinder.zDoc, nDoc, (void*)&sFinder,fts5SentenceFinderCb
+ rc = pApi->xColumnLocale(pFts, i, &pLoc, &nLoc);
+ if( rc!=SQLITE_OK ) break;
+ rc = pApi->xTokenize_v2(pFts,
+ sFinder.zDoc, nDoc, pLoc, nLoc, (void*)&sFinder, fts5SentenceFinderCb
);
if( rc!=SQLITE_OK ) break;
rc = pApi->xColumnSize(pFts, i, &nDocsize);
@@ -524,6 +536,9 @@ static void fts5SnippetFunction(
rc = pApi->xColumnSize(pFts, iBestCol, &nColSize);
}
if( ctx.zIn ){
+ const char *pLoc = 0; /* Locale of column iBestCol */
+ int nLoc = 0; /* Bytes in pLoc */
+
if( rc==SQLITE_OK ){
rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter);
}
@@ -542,7 +557,12 @@ static void fts5SnippetFunction(
}
if( rc==SQLITE_OK ){
- rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb);
+ rc = pApi->xColumnLocale(pFts, iBestCol, &pLoc, &nLoc);
+ }
+ if( rc==SQLITE_OK ){
+ rc = pApi->xTokenize_v2(
+ pFts, ctx.zIn, ctx.nIn, pLoc, nLoc, (void*)&ctx,fts5HighlightCb
+ );
}
if( ctx.bOpen ){
fts5HighlightAppend(&rc, &ctx, ctx.zClose, -1);
diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c
index 01f40455a..3cb1bd3be 100644
--- a/ext/fts5/fts5_config.c
+++ b/ext/fts5/fts5_config.c
@@ -380,6 +380,16 @@ static int fts5ConfigParseSpecial(
return rc;
}
+ if( sqlite3_strnicmp("locale", zCmd, nCmd)==0 ){
+ if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
+ *pzErr = sqlite3_mprintf("malformed locale=... directive");
+ rc = SQLITE_ERROR;
+ }else{
+ pConfig->bLocale = (zArg[0]=='1');
+ }
+ return rc;
+ }
+
if( sqlite3_strnicmp("detail", zCmd, nCmd)==0 ){
const Fts5Enum aDetail[] = {
{ "none", FTS5_DETAIL_NONE },
@@ -669,7 +679,11 @@ void sqlite3Fts5ConfigFree(Fts5Config *pConfig){
if( pConfig ){
int i;
if( pConfig->t.pTok ){
- pConfig->t.pTokApi->xDelete(pConfig->t.pTok);
+ if( pConfig->t.pApi1 ){
+ pConfig->t.pApi1->xDelete(pConfig->t.pTok);
+ }else{
+ pConfig->t.pApi2->xDelete(pConfig->t.pTok);
+ }
}
sqlite3_free((char*)pConfig->t.azArg);
sqlite3_free(pConfig->zDb);
@@ -752,9 +766,15 @@ int sqlite3Fts5Tokenize(
rc = sqlite3Fts5LoadTokenizer(pConfig);
}
if( rc==SQLITE_OK ){
- rc = pConfig->t.pTokApi->xTokenize(
- pConfig->t.pTok, pCtx, flags, pText, nText, xToken
- );
+ if( pConfig->t.pApi1 ){
+ rc = pConfig->t.pApi1->xTokenize(
+ pConfig->t.pTok, pCtx, flags, pText, nText, xToken
+ );
+ }else{
+ rc = pConfig->t.pApi2->xTokenize(pConfig->t.pTok, pCtx, flags,
+ pText, nText, pConfig->t.pLocale, pConfig->t.nLocale, xToken
+ );
+ }
}
}
return rc;
@@ -1011,13 +1031,10 @@ int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){
&& iVersion!=FTS5_CURRENT_VERSION_SECUREDELETE
){
rc = SQLITE_ERROR;
- if( pConfig->pzErrmsg ){
- assert( 0==*pConfig->pzErrmsg );
- *pConfig->pzErrmsg = sqlite3_mprintf("invalid fts5 file format "
- "(found %d, expected %d or %d) - run 'rebuild'",
- iVersion, FTS5_CURRENT_VERSION, FTS5_CURRENT_VERSION_SECUREDELETE
- );
- }
+ sqlite3Fts5ConfigErrmsg(pConfig, "invalid fts5 file format "
+ "(found %d, expected %d or %d) - run 'rebuild'",
+ iVersion, FTS5_CURRENT_VERSION, FTS5_CURRENT_VERSION_SECUREDELETE
+ );
}else{
pConfig->iVersion = iVersion;
}
@@ -1027,3 +1044,26 @@ int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){
}
return rc;
}
+
+/*
+** Set (*pConfig->pzErrmsg) to point to an sqlite3_malloc()ed buffer
+** containing the error message created using printf() style formatting
+** string zFmt and its trailing arguments.
+*/
+void sqlite3Fts5ConfigErrmsg(Fts5Config *pConfig, const char *zFmt, ...){
+ va_list ap; /* ... printf arguments */
+ char *zMsg = 0;
+
+ va_start(ap, zFmt);
+ zMsg = sqlite3_vmprintf(zFmt, ap);
+ if( pConfig->pzErrmsg ){
+ assert( *pConfig->pzErrmsg==0 );
+ *pConfig->pzErrmsg = zMsg;
+ }else{
+ sqlite3_free(zMsg);
+ }
+
+ va_end(ap);
+}
+
+
diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c
index 1f089d875..cd44b96bd 100644
--- a/ext/fts5/fts5_expr.c
+++ b/ext/fts5/fts5_expr.c
@@ -286,11 +286,12 @@ int sqlite3Fts5ExprNew(
}while( sParse.rc==SQLITE_OK && t!=FTS5_EOF );
sqlite3Fts5ParserFree(pEngine, fts5ParseFree);
+ assert( sParse.pExpr || sParse.rc!=SQLITE_OK );
assert_expr_depth_ok(sParse.rc, sParse.pExpr);
/* If the LHS of the MATCH expression was a user column, apply the
** implicit column-filter. */
- if( iCol<pConfig->nCol && sParse.pExpr && sParse.rc==SQLITE_OK ){
+ if( sParse.rc==SQLITE_OK && iCol<pConfig->nCol ){
int n = sizeof(Fts5Colset);
Fts5Colset *pColset = (Fts5Colset*)sqlite3Fts5MallocZero(&sParse.rc, n);
if( pColset ){
@@ -307,15 +308,7 @@ int sqlite3Fts5ExprNew(
sParse.rc = SQLITE_NOMEM;
sqlite3Fts5ParseNodeFree(sParse.pExpr);
}else{
- if( !sParse.pExpr ){
- const int nByte = sizeof(Fts5ExprNode);
- pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&sParse.rc, nByte);
- if( pNew->pRoot ){
- pNew->pRoot->bEof = 1;
- }
- }else{
- pNew->pRoot = sParse.pExpr;
- }
+ pNew->pRoot = sParse.pExpr;
pNew->pIndex = 0;
pNew->pConfig = pConfig;
pNew->apExprPhrase = sParse.apPhrase;
@@ -1133,7 +1126,7 @@ static int fts5ExprNodeTest_STRING(
}
}else{
Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter;
- if( pIter->iRowid==iLast || pIter->bEof ) continue;
+ if( pIter->iRowid==iLast ) continue;
bMatch = 0;
if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){
return rc;
@@ -1655,9 +1648,6 @@ Fts5ExprNearset *sqlite3Fts5ParseNearset(
Fts5ExprNearset *pRet = 0;
if( pParse->rc==SQLITE_OK ){
- if( pPhrase==0 ){
- return pNear;
- }
if( pNear==0 ){
sqlite3_int64 nByte;
nByte = sizeof(Fts5ExprNearset) + SZALLOC * sizeof(Fts5ExprPhrase*);
diff --git a/ext/fts5/fts5_main.c b/ext/fts5/fts5_main.c
index c862e2f95..e271402ec 100644
--- a/ext/fts5/fts5_main.c
+++ b/ext/fts5/fts5_main.c
@@ -103,11 +103,28 @@ struct Fts5Auxiliary {
** Each tokenizer module registered with the FTS5 module is represented
** by an object of the following type. All such objects are stored as part
** of the Fts5Global.pTok list.
+**
+** bV2Native:
+** True if the tokenizer was registered using xCreateTokenizer_v2(), false
+** for xCreateTokenizer(). If this variable is true, then x2 is populated
+** with the routines as supplied by the caller and x1 contains synthesized
+** wrapper routines. In this case the user-data pointer passed to
+** x1.xCreate should be a pointer to the Fts5TokenizerModule structure,
+** not a copy of pUserData.
+**
+** Of course, if bV2Native is false, then x1 contains the real routines and
+** x2 the synthesized ones. In this case a pointer to the Fts5TokenizerModule
+** object should be passed to x2.xCreate.
+**
+** The synthesized wrapper routines are necessary for xFindTokenizer(_v2)
+** calls.
*/
struct Fts5TokenizerModule {
char *zName; /* Name of tokenizer */
void *pUserData; /* User pointer passed to xCreate() */
- fts5_tokenizer x; /* Tokenizer functions */
+ int bV2Native; /* True if v2 native tokenizer */
+ fts5_tokenizer x1; /* Tokenizer functions */
+ fts5_tokenizer_v2 x2; /* V2 tokenizer functions */
void (*xDestroy)(void*); /* Destructor function */
Fts5TokenizerModule *pNext; /* Next registered tokenizer module */
};
@@ -118,7 +135,7 @@ struct Fts5FullTable {
Fts5Global *pGlobal; /* Global (connection wide) data */
Fts5Cursor *pSortCsr; /* Sort data from this cursor */
int iSavepoint; /* Successful xSavepoint()+1 */
-
+
#ifdef SQLITE_DEBUG
struct Fts5TransactionState ts;
#endif
@@ -195,7 +212,7 @@ struct Fts5Cursor {
Fts5Auxiliary *pAux; /* Currently executing extension function */
Fts5Auxdata *pAuxdata; /* First in linked list of saved aux-data */
- /* Cache used by auxiliary functions xInst() and xInstCount() */
+ /* Cache used by auxiliary API functions xInst() and xInstCount() */
Fts5PoslistReader *aInstIter; /* One for each phrase */
int nInstAlloc; /* Size of aInst[] array (entries / 3) */
int nInstCount; /* Number of phrase instances */
@@ -230,6 +247,12 @@ struct Fts5Cursor {
#define BitFlagAllTest(x,y) (((x) & (y))==(y))
#define BitFlagTest(x,y) (((x) & (y))!=0)
+/*
+** The subtype value and header bytes used by fts5_locale().
+*/
+#define FTS5_LOCALE_SUBTYPE ((unsigned int)'L')
+#define FTS5_LOCALE_HEADER "\x00\xE0\xB2\xEB"
+
/*
** Macros to Set(), Clear() and Test() cursor flags.
@@ -607,7 +630,7 @@ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
if( bSeenRank ) continue;
idxStr[iIdxStr++] = 'r';
bSeenRank = 1;
- }else if( iCol>=0 ){
+ }else{
nSeenMatch++;
idxStr[iIdxStr++] = 'M';
sqlite3_snprintf(6, &idxStr[iIdxStr], "%d", iCol);
@@ -993,7 +1016,7 @@ static int fts5PrepareStatement(
rc = sqlite3_prepare_v3(pConfig->db, zSql, -1,
SQLITE_PREPARE_PERSISTENT, &pRet, 0);
if( rc!=SQLITE_OK ){
- *pConfig->pzErrmsg = sqlite3_mprintf("%s", sqlite3_errmsg(pConfig->db));
+ sqlite3Fts5ConfigErrmsg(pConfig, "%s", sqlite3_errmsg(pConfig->db));
}
sqlite3_free(zSql);
}
@@ -1228,6 +1251,188 @@ static void fts5SetVtabError(Fts5FullTable *p, const char *zFormat, ...){
va_end(ap);
}
+/*
+** Arrange for subsequent calls to sqlite3Fts5Tokenize() to use the locale
+** specified by pLocale/nLocale. The buffer indicated by pLocale must remain
+** valid until after the final call to sqlite3Fts5Tokenize() that will use
+** the locale.
+*/
+static void fts5SetLocale(
+ Fts5Config *pConfig,
+ const char *zLocale,
+ int nLocale
+){
+ Fts5TokenizerConfig *pT = &pConfig->t;
+ pT->pLocale = zLocale;
+ pT->nLocale = nLocale;
+}
+
+/*
+** Clear any locale configured by an earlier call to fts5SetLocale() or
+** sqlite3Fts5ExtractText().
+*/
+void sqlite3Fts5ClearLocale(Fts5Config *pConfig){
+ fts5SetLocale(pConfig, 0, 0);
+}
+
+/*
+** This function is used to extract utf-8 text from an sqlite3_value. This
+** is usually done in order to tokenize it. For example, when:
+**
+** * a value is written to an fts5 table,
+** * a value is deleted from an FTS5_CONTENT_NORMAL table,
+** * a value containing a query expression is passed to xFilter()
+**
+** and so on.
+**
+** This function handles 2 cases:
+**
+** 1) Ordinary values. The text can be extracted from these using
+** sqlite3_value_text().
+**
+** 2) Combination text/locale blobs created by fts5_locale(). There
+** are several cases for these:
+**
+** * Blobs tagged with FTS5_LOCALE_SUBTYPE.
+** * Blobs read from the content table of a locale=1 external-content
+** table, and
+** * Blobs read from the content table of a locale=1 regular
+** content table.
+**
+** The first two cases above should have the 4 byte FTS5_LOCALE_HEADER
+** header. It is an error if a blob with the subtype or a blob read
+** from the content table of an external content table does not have
+** the required header. A blob read from the content table of a regular
+** locale=1 table does not have the header. This is to save space.
+**
+** If successful, SQLITE_OK is returned and output parameters (*ppText)
+** and (*pnText) are set to point to a buffer containing the extracted utf-8
+** text and its length in bytes, respectively. The buffer is not
+** nul-terminated. It has the same lifetime as the sqlite3_value object
+** from which it is extracted.
+**
+** Parameter bContent must be true if the value was read from an indexed
+** column (i.e. not UNINDEXED) of the on disk content.
+**
+** If pbResetTokenizer is not NULL and if case (2) is used, then
+** fts5SetLocale() is called to ensure subsequent sqlite3Fts5Tokenize() calls
+** use the locale. In this case (*pbResetTokenizer) is set to true before
+** returning, to indicate that the caller must call sqlite3Fts5ClearLocale()
+** to clear the locale after tokenizing the text.
+*/
+int sqlite3Fts5ExtractText(
+ Fts5Config *pConfig,
+ sqlite3_value *pVal, /* Value to extract text from */
+ int bContent, /* True if indexed table content */
+ int *pbResetTokenizer, /* OUT: True if xSetLocale(NULL) required */
+ const char **ppText, /* OUT: Pointer to text buffer */
+ int *pnText /* OUT: Size of (*ppText) in bytes */
+){
+ const char *pText = 0;
+ int nText = 0;
+ int rc = SQLITE_OK;
+ int bDecodeBlob = 0;
+
+ assert( pbResetTokenizer==0 || *pbResetTokenizer==0 );
+ assert( bContent==0 || pConfig->eContent!=FTS5_CONTENT_NONE );
+ assert( bContent==0 || sqlite3_value_subtype(pVal)==0 );
+
+ if( sqlite3_value_type(pVal)==SQLITE_BLOB ){
+ if( sqlite3_value_subtype(pVal)==FTS5_LOCALE_SUBTYPE
+ || (bContent && pConfig->bLocale)
+ ){
+ bDecodeBlob = 1;
+ }
+ }
+
+ if( bDecodeBlob ){
+ const int SZHDR = sizeof(FTS5_LOCALE_HEADER)-1;
+ const u8 *pBlob = sqlite3_value_blob(pVal);
+ int nBlob = sqlite3_value_bytes(pVal);
+
+ /* Unless this blob was read from the %_content table of an
+ ** FTS5_CONTENT_NORMAL table, it should have the 4 byte fts5_locale()
+ ** header. Check for this. If it is not found, return an error. */
+ if( (!bContent || pConfig->eContent!=FTS5_CONTENT_NORMAL) ){
+ if( nBlob<SZHDR || memcmp(FTS5_LOCALE_HEADER, pBlob, SZHDR) ){
+ rc = SQLITE_ERROR;
+ }else{
+ pBlob += 4;
+ nBlob -= 4;
+ }
+ }
+
+ if( rc==SQLITE_OK ){
+ int nLocale = 0;
+
+ for(nLocale=0; nLocale<nBlob; nLocale++){
+ if( pBlob[nLocale]==0x00 ) break;
+ }
+ if( nLocale==nBlob || nLocale==0 ){
+ rc = SQLITE_ERROR;
+ }else{
+ pText = (const char*)&pBlob[nLocale+1];
+ nText = nBlob-nLocale-1;
+
+ if( pbResetTokenizer ){
+ fts5SetLocale(pConfig, (const char*)pBlob, nLocale);
+ *pbResetTokenizer = 1;
+ }
+ }
+ }
+
+ }else{
+ pText = (const char*)sqlite3_value_text(pVal);
+ nText = sqlite3_value_bytes(pVal);
+ }
+
+ *ppText = pText;
+ *pnText = nText;
+ return rc;
+}
+
+/*
+** Argument pVal is the text of a full-text search expression. It may or
+** may not have been wrapped by fts5_locale(). This function extracts
+** the text of the expression, and sets output variable (*pzText) to
+** point to a nul-terminated buffer containing the expression.
+**
+** If pVal was an fts5_locale() value, then fts5SetLocale() is called to
+** set the tokenizer to use the specified locale.
+**
+** If output variable (*pbFreeAndReset) is set to true, then the caller
+** is required to (a) call sqlite3Fts5ClearLocale() to reset the tokenizer
+** locale, and (b) call sqlite3_free() to free (*pzText).
+*/
+static int fts5ExtractExprText(
+ Fts5Config *pConfig, /* Fts5 configuration */
+ sqlite3_value *pVal, /* Value to extract expression text from */
+ char **pzText, /* OUT: nul-terminated buffer of text */
+ int *pbFreeAndReset /* OUT: Free (*pzText) and clear locale */
+){
+ const char *zText = 0;
+ int nText = 0;
+ int rc = SQLITE_OK;
+ int bReset = 0;
+
+ *pbFreeAndReset = 0;
+ rc = sqlite3Fts5ExtractText(pConfig, pVal, 0, &bReset, &zText, &nText);
+ if( rc==SQLITE_OK ){
+ if( bReset ){
+ *pzText = sqlite3Fts5Mprintf(&rc, "%.*s", nText, zText);
+ if( rc!=SQLITE_OK ){
+ sqlite3Fts5ClearLocale(pConfig);
+ }else{
+ *pbFreeAndReset = 1;
+ }
+ }else{
+ *pzText = (char*)zText;
+ }
+ }
+
+ return rc;
+}
+
/*
** This is the xFilter interface for the virtual table. See
@@ -1263,13 +1468,7 @@ static int fts5FilterMethod(
int iIdxStr = 0;
Fts5Expr *pExpr = 0;
- if( pConfig->bLock ){
- pTab->p.base.zErrMsg = sqlite3_mprintf(
- "recursively defined fts5 content table"
- );
- return SQLITE_ERROR;
- }
-
+ assert( pConfig->bLock==0 );
if( pCsr->ePlan ){
fts5FreeCursorComponents(pCsr);
memset(&pCsr->ePlan, 0, sizeof(Fts5Cursor) - ((u8*)&pCsr->ePlan-(u8*)pCsr));
@@ -1293,8 +1492,14 @@ static int fts5FilterMethod(
pRank = apVal[i];
break;
case 'M': {
- const char *zText = (const char*)sqlite3_value_text(apVal[i]);
+ char *zText = 0;
+ int bFreeAndReset = 0;
+ int bInternal = 0;
+
+ rc = fts5ExtractExprText(pConfig, apVal[i], &zText, &bFreeAndReset);
+ if( rc!=SQLITE_OK ) goto filter_out;
if( zText==0 ) zText = "";
+
iCol = 0;
do{
iCol = iCol*10 + (idxStr[iIdxStr]-'0');
@@ -1306,7 +1511,7 @@ static int fts5FilterMethod(
** indicates that the MATCH expression is not a full text query,
** but a request for an internal parameter. */
rc = fts5SpecialMatch(pTab, pCsr, &zText[1]);
- goto filter_out;
+ bInternal = 1;
}else{
char **pzErr = &pTab->p.base.zErrMsg;
rc = sqlite3Fts5ExprNew(pConfig, 0, iCol, zText, &pExpr, pzErr);
@@ -1314,9 +1519,15 @@ static int fts5FilterMethod(
rc = sqlite3Fts5ExprAnd(&pCsr->pExpr, pExpr);
pExpr = 0;
}
- if( rc!=SQLITE_OK ) goto filter_out;
}
+ if( bFreeAndReset ){
+ sqlite3_free(zText);
+ sqlite3Fts5ClearLocale(pConfig);
+ }
+
+ if( bInternal || rc!=SQLITE_OK ) goto filter_out;
+
break;
}
case 'L':
@@ -1624,7 +1835,7 @@ static int fts5SpecialDelete(
int eType1 = sqlite3_value_type(apVal[1]);
if( eType1==SQLITE_INTEGER ){
sqlite3_int64 iDel = sqlite3_value_int64(apVal[1]);
- rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, &apVal[2]);
+ rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, &apVal[2], 0);
}
return rc;
}
@@ -1748,7 +1959,7 @@ static int fts5UpdateMethod(
/* DELETE */
else if( nArg==1 ){
i64 iDel = sqlite3_value_int64(apVal[0]); /* Rowid to delete */
- rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, 0);
+ rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, 0, 0);
bUpdateOrDelete = 1;
}
@@ -1756,16 +1967,31 @@ static int fts5UpdateMethod(
else{
int eType1 = sqlite3_value_numeric_type(apVal[1]);
- if( eType1!=SQLITE_INTEGER && eType1!=SQLITE_NULL ){
- rc = SQLITE_MISMATCH;
+ /* Ensure that no fts5_locale() values are written to locale=0 tables.
+ ** And that no blobs except fts5_locale() blobs are written to indexed
+ ** (i.e. not UNINDEXED) columns of locale=1 tables. */
+ int ii;
+ for(ii=0; ii<pConfig->nCol; ii++){
+ if( sqlite3_value_type(apVal[ii+2])==SQLITE_BLOB ){
+ int bSub = (sqlite3_value_subtype(apVal[ii+2])==FTS5_LOCALE_SUBTYPE);
+ if( (pConfig->bLocale && !bSub && pConfig->abUnindexed[ii]==0)
+ || (pConfig->bLocale==0 && bSub)
+ ){
+ if( pConfig->bLocale==0 ){
+ fts5SetVtabError(pTab, "fts5_locale() requires locale=1");
+ }
+ rc = SQLITE_MISMATCH;
+ goto update_out;
+ }
+ }
}
- else if( eType0!=SQLITE_INTEGER ){
+ if( eType0!=SQLITE_INTEGER ){
/* An INSERT statement. If the conflict-mode is REPLACE, first remove
** the current entry (if any). */
if( eConflict==SQLITE_REPLACE && eType1==SQLITE_INTEGER ){
i64 iNew = sqlite3_value_int64(apVal[1]); /* Rowid to delete */
- rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0);
+ rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0, 0);
bUpdateOrDelete = 1;
}
fts5StorageInsert(&rc, pTab, apVal, pRowid);
@@ -1775,28 +2001,35 @@ static int fts5UpdateMethod(
else{
i64 iOld = sqlite3_value_int64(apVal[0]); /* Old rowid */
i64 iNew = sqlite3_value_int64(apVal[1]); /* New rowid */
- if( eType1==SQLITE_INTEGER && iOld!=iNew ){
+ if( eType1!=SQLITE_INTEGER ){
+ rc = SQLITE_MISMATCH;
+ }else if( iOld!=iNew ){
if( eConflict==SQLITE_REPLACE ){
- rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0);
+ rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0, 1);
if( rc==SQLITE_OK ){
- rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0);
+ rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0, 0);
}
fts5StorageInsert(&rc, pTab, apVal, pRowid);
}else{
- rc = sqlite3Fts5StorageContentInsert(pTab->pStorage, apVal, pRowid);
+ rc = sqlite3Fts5StorageFindDeleteRow(pTab->pStorage, iOld);
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5StorageContentInsert(pTab->pStorage,apVal,pRowid);
+ }
if( rc==SQLITE_OK ){
- rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0);
+ rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0, 1);
}
if( rc==SQLITE_OK ){
rc = sqlite3Fts5StorageIndexInsert(pTab->pStorage, apVal,*pRowid);
}
}
}else{
- rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0);
+ rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0, 1);
fts5StorageInsert(&rc, pTab, apVal, pRowid);
}
bUpdateOrDelete = 1;
+ sqlite3Fts5StorageReleaseDeleteRow(pTab->pStorage);
}
+
}
}
@@ -1813,6 +2046,7 @@ static int fts5UpdateMethod(
}
}
+ update_out:
pTab->p.pConfig->pzErrmsg = 0;
return rc;
}
@@ -1890,17 +2124,40 @@ static int fts5ApiRowCount(Fts5Context *pCtx, i64 *pnRow){
return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow);
}
-static int fts5ApiTokenize(
+/*
+** Implementation of xTokenize_v2() API.
+*/
+static int fts5ApiTokenize_v2(
Fts5Context *pCtx,
const char *pText, int nText,
+ const char *pLoc, int nLoc,
void *pUserData,
int (*xToken)(void*, int, const char*, int, int, int)
){
Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
- return sqlite3Fts5Tokenize(
- pTab->pConfig, FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken
+ int rc = SQLITE_OK;
+
+ fts5SetLocale(pTab->pConfig, pLoc, nLoc);
+ rc = sqlite3Fts5Tokenize(pTab->pConfig,
+ FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken
);
+ fts5SetLocale(pTab->pConfig, 0, 0);
+
+ return rc;
+}
+
+/*
+** Implementation of xTokenize() API. This is just xTokenize_v2() with NULL/0
+** passed as the locale.
+*/
+static int fts5ApiTokenize(
+ Fts5Context *pCtx,
+ const char *pText, int nText,
+ void *pUserData,
+ int (*xToken)(void*, int, const char*, int, int, int)
+){
+ return fts5ApiTokenize_v2(pCtx, pText, nText, 0, 0, pUserData, xToken);
}
static int fts5ApiPhraseCount(Fts5Context *pCtx){
@@ -1922,28 +2179,37 @@ static int fts5ApiColumnText(
int rc = SQLITE_OK;
Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
+
+ assert( pCsr->ePlan!=FTS5_PLAN_SPECIAL );
if( iCol<0 || iCol>=pTab->pConfig->nCol ){
rc = SQLITE_RANGE;
- }else if( fts5IsContentless((Fts5FullTable*)(pCsr->base.pVtab))
- || pCsr->ePlan==FTS5_PLAN_SPECIAL
- ){
+ }else if( fts5IsContentless((Fts5FullTable*)(pCsr->base.pVtab)) ){
*pz = 0;
*pn = 0;
}else{
rc = fts5SeekCursor(pCsr, 0);
if( rc==SQLITE_OK ){
- *pz = (const char*)sqlite3_column_text(pCsr->pStmt, iCol+1);
- *pn = sqlite3_column_bytes(pCsr->pStmt, iCol+1);
+ Fts5Config *pConfig = pTab->pConfig;
+ int bContent = (pConfig->abUnindexed[iCol]==0);
+ sqlite3_value *pVal = sqlite3_column_value(pCsr->pStmt, iCol+1);
+ sqlite3Fts5ExtractText(pConfig, pVal, bContent, 0, pz, pn);
}
}
return rc;
}
+/*
+** This is called by various API functions - xInst, xPhraseFirst,
+** xPhraseFirstColumn etc. - to obtain the position list for phrase iPhrase
+** of the current row. This function works for both detail=full tables (in
+** which case the position-list was read from the fts index) or for other
+** detail= modes if the row content is available.
+*/
static int fts5CsrPoslist(
- Fts5Cursor *pCsr,
- int iPhrase,
- const u8 **pa,
- int *pn
+ Fts5Cursor *pCsr, /* Fts5 cursor object */
+ int iPhrase, /* Phrase to find position list for */
+ const u8 **pa, /* OUT: Pointer to position list buffer */
+ int *pn /* OUT: Size of (*pa) in bytes */
){
Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig;
int rc = SQLITE_OK;
@@ -1951,20 +2217,34 @@ static int fts5CsrPoslist(
if( iPhrase<0 || iPhrase>=sqlite3Fts5ExprPhraseCount(pCsr->pExpr) ){
rc = SQLITE_RANGE;
+ }else if( pConfig->eDetail!=FTS5_DETAIL_FULL
+ && pConfig->eContent==FTS5_CONTENT_NONE
+ ){
+ *pa = 0;
+ *pn = 0;
+ return SQLITE_OK;
}else if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_POSLIST) ){
if( pConfig->eDetail!=FTS5_DETAIL_FULL ){
Fts5PoslistPopulator *aPopulator;
int i;
+
aPopulator = sqlite3Fts5ExprClearPoslists(pCsr->pExpr, bLive);
if( aPopulator==0 ) rc = SQLITE_NOMEM;
+ if( rc==SQLITE_OK ){
+ rc = fts5SeekCursor(pCsr, 0);
+ }
for(i=0; i<pConfig->nCol && rc==SQLITE_OK; i++){
- int n; const char *z;
- rc = fts5ApiColumnText((Fts5Context*)pCsr, i, &z, &n);
+ sqlite3_value *pVal = sqlite3_column_value(pCsr->pStmt, i+1);
+ const char *z = 0;
+ int n = 0;
+ int bReset = 0;
+ rc = sqlite3Fts5ExtractText(pConfig, pVal, 1, &bReset, &z, &n);
if( rc==SQLITE_OK ){
rc = sqlite3Fts5ExprPopulatePoslists(
pConfig, pCsr->pExpr, aPopulator, i, z, n
);
}
+ if( bReset ) sqlite3Fts5ClearLocale(pConfig);
}
sqlite3_free(aPopulator);
@@ -1989,7 +2269,6 @@ static int fts5CsrPoslist(
*pn = 0;
}
-
return rc;
}
@@ -2058,7 +2337,8 @@ static int fts5CacheInstArray(Fts5Cursor *pCsr){
aInst[0] = iBest;
aInst[1] = FTS5_POS2COLUMN(aIter[iBest].iPos);
aInst[2] = FTS5_POS2OFFSET(aIter[iBest].iPos);
- if( aInst[1]<0 || aInst[1]>=nCol ){
+ assert( aInst[1]>=0 );
+ if( aInst[1]>=nCol ){
rc = FTS5_CORRUPT;
break;
}
@@ -2145,16 +2425,21 @@ static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){
}
}else{
int i;
+ rc = fts5SeekCursor(pCsr, 0);
for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
if( pConfig->abUnindexed[i]==0 ){
- const char *z; int n;
- void *p = (void*)(&pCsr->aColumnSize[i]);
+ const char *z = 0;
+ int n = 0;
+ int bReset = 0;
+ sqlite3_value *pVal = sqlite3_column_value(pCsr->pStmt, i+1);
+
pCsr->aColumnSize[i] = 0;
- rc = fts5ApiColumnText(pCtx, i, &z, &n);
+ rc = sqlite3Fts5ExtractText(pConfig, pVal, 1, &bReset, &z, &n);
if( rc==SQLITE_OK ){
- rc = sqlite3Fts5Tokenize(
- pConfig, FTS5_TOKENIZE_AUX, z, n, p, fts5ColumnSizeCb
+ rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_AUX,
+ z, n, (void*)&pCsr->aColumnSize[i], fts5ColumnSizeCb
);
+ if( bReset ) sqlite3Fts5ClearLocale(pConfig);
}
}
}
@@ -2401,8 +2686,71 @@ static int fts5ApiQueryPhrase(Fts5Context*, int, void*,
int(*)(const Fts5ExtensionApi*, Fts5Context*, void*)
);
+/*
+** The xColumnLocale() API.
+*/
+static int fts5ApiColumnLocale(
+ Fts5Context *pCtx,
+ int iCol,
+ const char **pzLocale,
+ int *pnLocale
+){
+ int rc = SQLITE_OK;
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
+ Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig;
+
+ *pzLocale = 0;
+ *pnLocale = 0;
+
+ assert( pCsr->ePlan!=FTS5_PLAN_SPECIAL );
+ if( iCol<0 || iCol>=pConfig->nCol ){
+ rc = SQLITE_RANGE;
+ }else if(
+ pConfig->abUnindexed[iCol]==0
+ && pConfig->eContent!=FTS5_CONTENT_NONE
+ && pConfig->bLocale
+ ){
+ rc = fts5SeekCursor(pCsr, 0);
+ if( rc==SQLITE_OK ){
+ /* Load the value into pVal. pVal is a locale/text pair iff:
+ **
+ ** 1) It is an SQLITE_BLOB, and
+ ** 2) Either the subtype is FTS5_LOCALE_SUBTYPE, or else the
+ ** value was loaded from an FTS5_CONTENT_NORMAL table, and
+ ** 3) It does not begin with an 0x00 byte.
+ */
+ sqlite3_value *pVal = sqlite3_column_value(pCsr->pStmt, iCol+1);
+ if( sqlite3_value_type(pVal)==SQLITE_BLOB ){
+ const u8 *pBlob = (const u8*)sqlite3_value_blob(pVal);
+ int nBlob = sqlite3_value_bytes(pVal);
+ if( pConfig->eContent==FTS5_CONTENT_EXTERNAL ){
+ const int SZHDR = sizeof(FTS5_LOCALE_HEADER)-1;
+ if( nBlob<SZHDR || memcmp(FTS5_LOCALE_HEADER, pBlob, SZHDR) ){
+ rc = SQLITE_ERROR;
+ }
+ pBlob += 4;
+ nBlob -= 4;
+ }
+ if( rc==SQLITE_OK ){
+ int nLocale = 0;
+ for(nLocale=0; nLocale<nBlob && pBlob[nLocale]!=0x00; nLocale++);
+ if( nLocale==nBlob || nLocale==0 ){
+ rc = SQLITE_ERROR;
+ }else{
+ /* A locale/text pair */
+ *pzLocale = (const char*)pBlob;
+ *pnLocale = nLocale;
+ }
+ }
+ }
+ }
+ }
+
+ return rc;
+}
+
static const Fts5ExtensionApi sFts5Api = {
- 3, /* iVersion */
+ 4, /* iVersion */
fts5ApiUserData,
fts5ApiColumnCount,
fts5ApiRowCount,
@@ -2423,7 +2771,9 @@ static const Fts5ExtensionApi sFts5Api = {
fts5ApiPhraseFirstColumn,
fts5ApiPhraseNextColumn,
fts5ApiQueryToken,
- fts5ApiInstToken
+ fts5ApiInstToken,
+ fts5ApiColumnLocale,
+ fts5ApiTokenize_v2
};
/*
@@ -2474,6 +2824,7 @@ static void fts5ApiInvoke(
sqlite3_value **argv
){
assert( pCsr->pAux==0 );
+ assert( pCsr->ePlan!=FTS5_PLAN_SPECIAL );
pCsr->pAux = pAux;
pAux->xFunc(&sFts5Api, (Fts5Context*)pCsr, context, argc, argv);
pCsr->pAux = 0;
@@ -2487,6 +2838,21 @@ static Fts5Cursor *fts5CursorFromCsrid(Fts5Global *pGlobal, i64 iCsrId){
return pCsr;
}
+/*
+** Parameter zFmt is a printf() style formatting string. This function
+** formats it using the trailing arguments and returns the result as
+** an error message to the context passed as the first argument.
+*/
+static void fts5ResultError(sqlite3_context *pCtx, const char *zFmt, ...){
+ char *zErr = 0;
+ va_list ap;
+ va_start(ap, zFmt);
+ zErr = sqlite3_vmprintf(zFmt, ap);
+ sqlite3_result_error(pCtx, zErr, -1);
+ sqlite3_free(zErr);
+ va_end(ap);
+}
+
static void fts5ApiCallback(
sqlite3_context *context,
int argc,
@@ -2502,10 +2868,8 @@ static void fts5ApiCallback(
iCsrId = sqlite3_value_int64(argv[0]);
pCsr = fts5CursorFromCsrid(pAux->pGlobal, iCsrId);
- if( pCsr==0 || pCsr->ePlan==0 ){
- char *zErr = sqlite3_mprintf("no such cursor: %lld", iCsrId);
- sqlite3_result_error(context, zErr, -1);
- sqlite3_free(zErr);
+ if( pCsr==0 || (pCsr->ePlan==0 || pCsr->ePlan==FTS5_PLAN_SPECIAL) ){
+ fts5ResultError(context, "no such cursor: %lld", iCsrId);
}else{
sqlite3_vtab *pTab = pCsr->base.pVtab;
fts5ApiInvoke(pAux, pCsr, context, argc-1, &argv[1]);
@@ -2599,6 +2963,57 @@ static int fts5PoslistBlob(sqlite3_context *pCtx, Fts5Cursor *pCsr){
return rc;
}
+/*
+** Value pVal was read from column iCol of the FTS5 table. This function
+** returns it to the owner of pCtx via a call to an sqlite3_result_xxx()
+** function. This function deals with the same cases as
+** sqlite3Fts5ExtractText():
+**
+** 1) Ordinary values. These can be returned using sqlite3_result_value().
+**
+** 2) Blobs from fts5_locale(). The text is extracted from these and
+** returned via sqlite3_result_text(). The locale is discarded.
+*/
+static void fts5ExtractValueFromColumn(
+ sqlite3_context *pCtx,
+ Fts5Config *pConfig,
+ int iCol,
+ sqlite3_value *pVal
+){
+ assert( pConfig->eContent!=FTS5_CONTENT_NONE );
+
+ if( pConfig->bLocale
+ && sqlite3_value_type(pVal)==SQLITE_BLOB
+ && pConfig->abUnindexed[iCol]==0
+ ){
+ const int SZHDR = sizeof(FTS5_LOCALE_HEADER)-1;
+ const u8 *pBlob = sqlite3_value_blob(pVal);
+ int nBlob = sqlite3_value_bytes(pVal);
+ int ii;
+
+ if( pConfig->eContent==FTS5_CONTENT_EXTERNAL ){
+ if( nBlob<SZHDR || memcmp(pBlob, FTS5_LOCALE_HEADER, SZHDR) ){
+ sqlite3_result_error_code(pCtx, SQLITE_ERROR);
+ return;
+ }else{
+ pBlob += 4;
+ nBlob -= 4;
+ }
+ }
+
+ for(ii=0; ii<nBlob && pBlob[ii]; ii++);
+ if( ii==0 || ii==nBlob ){
+ sqlite3_result_error_code(pCtx, SQLITE_ERROR);
+ }else{
+ const char *pText = (const char*)&pBlob[ii+1];
+ sqlite3_result_text(pCtx, pText, nBlob-ii-1, SQLITE_TRANSIENT);
+ }
+ return;
+ }
+
+ sqlite3_result_value(pCtx, pVal);
+}
+
/*
** This is the xColumn method, called by SQLite to request a value from
** the row that the supplied cursor currently points to.
@@ -2628,8 +3043,8 @@ static int fts5ColumnMethod(
** auxiliary function. */
sqlite3_result_int64(pCtx, pCsr->iCsrId);
}else if( iCol==pConfig->nCol+1 ){
-
/* The value of the "rank" column. */
+
if( pCsr->ePlan==FTS5_PLAN_SOURCE ){
fts5PoslistBlob(pCtx, pCsr);
}else if(
@@ -2640,20 +3055,27 @@ static int fts5ColumnMethod(
fts5ApiInvoke(pCsr->pRank, pCsr, pCtx, pCsr->nRankArg, pCsr->apRankArg);
}
}
- }else if( !fts5IsContentless(pTab) ){
- pConfig->pzErrmsg = &pTab->p.base.zErrMsg;
- rc = fts5SeekCursor(pCsr, 1);
- if( rc==SQLITE_OK ){
- sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1));
+ }else{
+ /* A column created by the user containing values. */
+ int bNochange = sqlite3_vtab_nochange(pCtx);
+
+ if( fts5IsContentless(pTab) ){
+ if( bNochange && pConfig->bContentlessDelete ){
+ fts5ResultError(pCtx, "cannot UPDATE a subset of "
+ "columns on fts5 contentless-delete table: %s", pConfig->zName
+ );
+ }
+ }else if( bNochange==0 || pConfig->eContent!=FTS5_CONTENT_NORMAL ){
+ pConfig->pzErrmsg = &pTab->p.base.zErrMsg;
+ rc = fts5SeekCursor(pCsr, 1);
+ if( rc==SQLITE_OK ){
+ sqlite3_value *pVal = sqlite3_column_value(pCsr->pStmt, iCol+1);
+ fts5ExtractValueFromColumn(pCtx, pConfig, iCol, pVal);
+ }
+ pConfig->pzErrmsg = 0;
}
- pConfig->pzErrmsg = 0;
- }else if( pConfig->bContentlessDelete && sqlite3_vtab_nochange(pCtx) ){
- char *zErr = sqlite3_mprintf("cannot UPDATE a subset of "
- "columns on fts5 contentless-delete table: %s", pConfig->zName
- );
- sqlite3_result_error(pCtx, zErr, -1);
- sqlite3_free(zErr);
}
+
return rc;
}
@@ -2793,47 +3215,208 @@ static int fts5CreateAux(
}
/*
-** Register a new tokenizer. This is the implementation of the
-** fts5_api.xCreateTokenizer() method.
+** This function is used by xCreateTokenizer_v2() and xCreateTokenizer().
+** It allocates and partially populates a new Fts5TokenizerModule object.
+** The new object is already linked into the Fts5Global context before
+** returning.
+**
+** If successful, SQLITE_OK is returned and a pointer to the new
+** Fts5TokenizerModule object returned via output parameter (*ppNew). All
+** that is required is for the caller to fill in the methods in
+** Fts5TokenizerModule.x1 and x2, and to set Fts5TokenizerModule.bV2Native
+** as appropriate.
+**
+** If an error occurs, an SQLite error code is returned and the final value
+** of (*ppNew) undefined.
*/
-static int fts5CreateTokenizer(
- fts5_api *pApi, /* Global context (one per db handle) */
+static int fts5NewTokenizerModule(
+ Fts5Global *pGlobal, /* Global context (one per db handle) */
const char *zName, /* Name of new function */
void *pUserData, /* User data for aux. function */
- fts5_tokenizer *pTokenizer, /* Tokenizer implementation */
- void(*xDestroy)(void*) /* Destructor for pUserData */
+ void(*xDestroy)(void*), /* Destructor for pUserData */
+ Fts5TokenizerModule **ppNew
){
- Fts5Global *pGlobal = (Fts5Global*)pApi;
- Fts5TokenizerModule *pNew;
- sqlite3_int64 nName; /* Size of zName and its \0 terminator */
- sqlite3_int64 nByte; /* Bytes of space to allocate */
int rc = SQLITE_OK;
+ Fts5TokenizerModule *pNew;
+ sqlite3_int64 nName; /* Size of zName and its \0 terminator */
+ sqlite3_int64 nByte; /* Bytes of space to allocate */
nName = strlen(zName) + 1;
nByte = sizeof(Fts5TokenizerModule) + nName;
- pNew = (Fts5TokenizerModule*)sqlite3_malloc64(nByte);
+ *ppNew = pNew = (Fts5TokenizerModule*)sqlite3Fts5MallocZero(&rc, nByte);
if( pNew ){
- memset(pNew, 0, (size_t)nByte);
pNew->zName = (char*)&pNew[1];
memcpy(pNew->zName, zName, nName);
pNew->pUserData = pUserData;
- pNew->x = *pTokenizer;
pNew->xDestroy = xDestroy;
pNew->pNext = pGlobal->pTok;
pGlobal->pTok = pNew;
if( pNew->pNext==0 ){
pGlobal->pDfltTok = pNew;
}
+ }
+
+ return rc;
+}
+
+/*
+** An instance of this type is used as the Fts5Tokenizer object for
+** wrapper tokenizers - those that provide access to a v1 tokenizer via
+** the fts5_tokenizer_v2 API, and those that provide access to a v2 tokenizer
+** via the fts5_tokenizer API.
+*/
+typedef struct Fts5VtoVTokenizer Fts5VtoVTokenizer;
+struct Fts5VtoVTokenizer {
+ Fts5TokenizerModule *pMod;
+ Fts5Tokenizer *pReal;
+};
+
+/*
+** Create a wrapper tokenizer. The context argument pCtx points to the
+** Fts5TokenizerModule object.
+*/
+static int fts5VtoVCreate(
+ void *pCtx,
+ const char **azArg,
+ int nArg,
+ Fts5Tokenizer **ppOut
+){
+ Fts5TokenizerModule *pMod = (Fts5TokenizerModule*)pCtx;
+ Fts5VtoVTokenizer *pNew = 0;
+ int rc = SQLITE_OK;
+
+ pNew = (Fts5VtoVTokenizer*)sqlite3Fts5MallocZero(&rc, sizeof(*pNew));
+ if( rc==SQLITE_OK ){
+ pNew->pMod = pMod;
+ if( pMod->bV2Native ){
+ rc = pMod->x2.xCreate(pMod->pUserData, azArg, nArg, &pNew->pReal);
+ }else{
+ rc = pMod->x1.xCreate(pMod->pUserData, azArg, nArg, &pNew->pReal);
+ }
+ if( rc!=SQLITE_OK ){
+ sqlite3_free(pNew);
+ pNew = 0;
+ }
+ }
+
+ *ppOut = (Fts5Tokenizer*)pNew;
+ return rc;
+}
+
+/*
+** Delete an Fts5VtoVTokenizer wrapper tokenizer.
+*/
+static void fts5VtoVDelete(Fts5Tokenizer *pTok){
+ Fts5VtoVTokenizer *p = (Fts5VtoVTokenizer*)pTok;
+ if( p ){
+ Fts5TokenizerModule *pMod = p->pMod;
+ if( pMod->bV2Native ){
+ pMod->x2.xDelete(p->pReal);
+ }else{
+ pMod->x1.xDelete(p->pReal);
+ }
+ sqlite3_free(p);
+ }
+}
+
+
+/*
+** xTokenizer method for a wrapper tokenizer that offers the v1 interface
+** (no support for locales).
+*/
+static int fts5V1toV2Tokenize(
+ Fts5Tokenizer *pTok,
+ void *pCtx, int flags,
+ const char *pText, int nText,
+ int (*xToken)(void*, int, const char*, int, int, int)
+){
+ Fts5VtoVTokenizer *p = (Fts5VtoVTokenizer*)pTok;
+ Fts5TokenizerModule *pMod = p->pMod;
+ assert( pMod->bV2Native );
+ return pMod->x2.xTokenize(p->pReal, pCtx, flags, pText, nText, 0, 0, xToken);
+}
+
+/*
+** xTokenizer method for a wrapper tokenizer that offers the v2 interface
+** (with locale support).
+*/
+static int fts5V2toV1Tokenize(
+ Fts5Tokenizer *pTok,
+ void *pCtx, int flags,
+ const char *pText, int nText,
+ const char *pLocale, int nLocale,
+ int (*xToken)(void*, int, const char*, int, int, int)
+){
+ Fts5VtoVTokenizer *p = (Fts5VtoVTokenizer*)pTok;
+ Fts5TokenizerModule *pMod = p->pMod;
+ assert( pMod->bV2Native==0 );
+ return pMod->x1.xTokenize(p->pReal, pCtx, flags, pText, nText, xToken);
+}
+
+/*
+** Register a new tokenizer. This is the implementation of the
+** fts5_api.xCreateTokenizer_v2() method.
+*/
+static int fts5CreateTokenizer_v2(
+ fts5_api *pApi, /* Global context (one per db handle) */
+ const char *zName, /* Name of new function */
+ void *pUserData, /* User data for aux. function */
+ fts5_tokenizer_v2 *pTokenizer, /* Tokenizer implementation */
+ void(*xDestroy)(void*) /* Destructor for pUserData */
+){
+ Fts5Global *pGlobal = (Fts5Global*)pApi;
+ int rc = SQLITE_OK;
+
+ if( pTokenizer->iVersion>2 ){
+ rc = SQLITE_ERROR;
}else{
- rc = SQLITE_NOMEM;
+ Fts5TokenizerModule *pNew = 0;
+ rc = fts5NewTokenizerModule(pGlobal, zName, pUserData, xDestroy, &pNew);
+ if( pNew ){
+ pNew->x2 = *pTokenizer;
+ pNew->bV2Native = 1;
+ pNew->x1.xCreate = fts5VtoVCreate;
+ pNew->x1.xTokenize = fts5V1toV2Tokenize;
+ pNew->x1.xDelete = fts5VtoVDelete;
+ }
}
return rc;
}
+/*
+** The fts5_api.xCreateTokenizer() method.
+*/
+static int fts5CreateTokenizer(
+ fts5_api *pApi, /* Global context (one per db handle) */
+ const char *zName, /* Name of new function */
+ void *pUserData, /* User data for aux. function */
+ fts5_tokenizer *pTokenizer, /* Tokenizer implementation */
+ void(*xDestroy)(void*) /* Destructor for pUserData */
+){
+ Fts5TokenizerModule *pNew = 0;
+ int rc = SQLITE_OK;
+
+ rc = fts5NewTokenizerModule(
+ (Fts5Global*)pApi, zName, pUserData, xDestroy, &pNew
+ );
+ if( pNew ){
+ pNew->x1 = *pTokenizer;
+ pNew->x2.xCreate = fts5VtoVCreate;
+ pNew->x2.xTokenize = fts5V2toV1Tokenize;
+ pNew->x2.xDelete = fts5VtoVDelete;
+ }
+ return rc;
+}
+
+/*
+** Search the global context passed as the first argument for a tokenizer
+** module named zName. If found, return a pointer to the Fts5TokenizerModule
+** object. Otherwise, return NULL.
+*/
static Fts5TokenizerModule *fts5LocateTokenizer(
- Fts5Global *pGlobal,
- const char *zName
+ Fts5Global *pGlobal, /* Global (one per db handle) object */
+ const char *zName /* Name of tokenizer module to find */
){
Fts5TokenizerModule *pMod = 0;
@@ -2850,6 +3433,36 @@ static Fts5TokenizerModule *fts5LocateTokenizer(
/*
** Find a tokenizer. This is the implementation of the
+** fts5_api.xFindTokenizer_v2() method.
+*/
+static int fts5FindTokenizer_v2(
+ fts5_api *pApi, /* Global context (one per db handle) */
+ const char *zName, /* Name of tokenizer */
+ void **ppUserData,
+ fts5_tokenizer_v2 **ppTokenizer /* Populate this object */
+){
+ int rc = SQLITE_OK;
+ Fts5TokenizerModule *pMod;
+
+ pMod = fts5LocateTokenizer((Fts5Global*)pApi, zName);
+ if( pMod ){
+ if( pMod->bV2Native ){
+ *ppUserData = pMod->pUserData;
+ }else{
+ *ppUserData = (void*)pMod;
+ }
+ *ppTokenizer = &pMod->x2;
+ }else{
+ *ppTokenizer = 0;
+ *ppUserData = 0;
+ rc = SQLITE_ERROR;
+ }
+
+ return rc;
+}
+
+/*
+** Find a tokenizer. This is the implementation of the
** fts5_api.xFindTokenizer() method.
*/
static int fts5FindTokenizer(
@@ -2863,66 +3476,75 @@ static int fts5FindTokenizer(
pMod = fts5LocateTokenizer((Fts5Global*)pApi, zName);
if( pMod ){
- *pTokenizer = pMod->x;
- *ppUserData = pMod->pUserData;
+ if( pMod->bV2Native==0 ){
+ *ppUserData = pMod->pUserData;
+ }else{
+ *ppUserData = (void*)pMod;
+ }
+ *pTokenizer = pMod->x1;
}else{
- memset(pTokenizer, 0, sizeof(fts5_tokenizer));
+ memset(pTokenizer, 0, sizeof(*pTokenizer));
+ *ppUserData = 0;
rc = SQLITE_ERROR;
}
return rc;
}
-int fts5GetTokenizer(
- Fts5Global *pGlobal,
- const char **azArg,
- int nArg,
- Fts5Config *pConfig,
- char **pzErr
-){
- Fts5TokenizerModule *pMod;
+/*
+** Attempt to instantiate the tokenizer.
+*/
+int sqlite3Fts5LoadTokenizer(Fts5Config *pConfig){
+ const char **azArg = pConfig->t.azArg;
+ const int nArg = pConfig->t.nArg;
+ Fts5TokenizerModule *pMod = 0;
int rc = SQLITE_OK;
- pMod = fts5LocateTokenizer(pGlobal, nArg==0 ? 0 : azArg[0]);
+ pMod = fts5LocateTokenizer(pConfig->pGlobal, nArg==0 ? 0 : azArg[0]);
if( pMod==0 ){
assert( nArg>0 );
rc = SQLITE_ERROR;
- if( pzErr ) *pzErr = sqlite3_mprintf("no such tokenizer: %s", azArg[0]);
+ sqlite3Fts5ConfigErrmsg(pConfig, "no such tokenizer: %s", azArg[0]);
}else{
- rc = pMod->x.xCreate(
- pMod->pUserData, (azArg?&azArg[1]:0), (nArg?nArg-1:0), &pConfig->t.pTok
+ int (*xCreate)(void*, const char**, int, Fts5Tokenizer**) = 0;
+ if( pMod->bV2Native ){
+ xCreate = pMod->x2.xCreate;
+ pConfig->t.pApi2 = &pMod->x2;
+ }else{
+ pConfig->t.pApi1 = &pMod->x1;
+ xCreate = pMod->x1.xCreate;
+ }
+
+ rc = xCreate(pMod->pUserData,
+ (azArg?&azArg[1]:0), (nArg?nArg-1:0), &pConfig->t.pTok
);
- pConfig->t.pTokApi = &pMod->x;
+
if( rc!=SQLITE_OK ){
- if( pzErr && rc!=SQLITE_NOMEM ){
- *pzErr = sqlite3_mprintf("error in tokenizer constructor");
+ if( rc!=SQLITE_NOMEM ){
+ sqlite3Fts5ConfigErrmsg(pConfig, "error in tokenizer constructor");
}
- }else{
+ }else if( pMod->bV2Native==0 ){
pConfig->t.ePattern = sqlite3Fts5TokenizerPattern(
- pMod->x.xCreate, pConfig->t.pTok
+ pMod->x1.xCreate, pConfig->t.pTok
);
}
}
if( rc!=SQLITE_OK ){
- pConfig->t.pTokApi = 0;
+ pConfig->t.pApi1 = 0;
+ pConfig->t.pApi2 = 0;
pConfig->t.pTok = 0;
}
return rc;
}
+
/*
-** Attempt to instantiate the tokenizer.
+** xDestroy callback passed to sqlite3_create_module(). This is invoked
+** when the db handle is being closed. Free memory associated with
+** tokenizers and aux functions registered with this db handle.
*/
-int sqlite3Fts5LoadTokenizer(Fts5Config *pConfig){
- return fts5GetTokenizer(
- pConfig->pGlobal, pConfig->t.azArg, pConfig->t.nArg,
- pConfig, pConfig->pzErrmsg
- );
-}
-
-
static void fts5ModuleDestroy(void *pCtx){
Fts5TokenizerModule *pTok, *pNextTok;
Fts5Auxiliary *pAux, *pNextAux;
@@ -2943,6 +3565,10 @@ static void fts5ModuleDestroy(void *pCtx){
sqlite3_free(pGlobal);
}
+/*
+** Implementation of the fts5() function used by clients to obtain the
+** API pointer.
+*/
static void fts5Fts5Func(
sqlite3_context *pCtx, /* Function call context */
int nArg, /* Number of args */
@@ -2970,6 +3596,69 @@ static void fts5SourceIdFunc(
}
/*
+** Implementation of fts5_locale(LOCALE, TEXT) function.
+**
+** If parameter LOCALE is NULL, or a zero-length string, then a copy of
+** TEXT is returned. Otherwise, both LOCALE and TEXT are interpreted as
+** text, and the value returned is a blob consisting of:
+**
+** * The 4 bytes 0x00, 0xE0, 0xB2, 0xEb (FTS5_LOCALE_HEADER).
+** * The LOCALE, as utf-8 text, followed by
+** * 0x00, followed by
+** * The TEXT, as utf-8 text.
+**
+** There is no final nul-terminator following the TEXT value.
+*/
+static void fts5LocaleFunc(
+ sqlite3_context *pCtx, /* Function call context */
+ int nArg, /* Number of args */
+ sqlite3_value **apArg /* Function arguments */
+){
+ const char *zLocale = 0;
+ int nLocale = 0;
+ const char *zText = 0;
+ int nText = 0;
+
+ assert( nArg==2 );
+ UNUSED_PARAM(nArg);
+
+ zLocale = (const char*)sqlite3_value_text(apArg[0]);
+ nLocale = sqlite3_value_bytes(apArg[0]);
+
+ zText = (const char*)sqlite3_value_text(apArg[1]);
+ nText = sqlite3_value_bytes(apArg[1]);
+
+ if( zLocale==0 || zLocale[0]=='\0' ){
+ sqlite3_result_text(pCtx, zText, nText, SQLITE_TRANSIENT);
+ }else{
+ u8 *pBlob = 0;
+ u8 *pCsr = 0;
+ int nBlob = 0;
+ const int nHdr = 4;
+ assert( sizeof(FTS5_LOCALE_HEADER)==nHdr+1 );
+
+ nBlob = nHdr + nLocale + 1 + nText;
+ pBlob = (u8*)sqlite3_malloc(nBlob);
+ if( pBlob==0 ){
+ sqlite3_result_error_nomem(pCtx);
+ return;
+ }
+
+ pCsr = pBlob;
+ memcpy(pCsr, FTS5_LOCALE_HEADER, nHdr);
+ pCsr += nHdr;
+ memcpy(pCsr, zLocale, nLocale);
+ pCsr += nLocale;
+ (*pCsr++) = 0x00;
+ if( zText ) memcpy(pCsr, zText, nText);
+ assert( &pCsr[nText]==&pBlob[nBlob] );
+
+ sqlite3_result_blob(pCtx, pBlob, nBlob, sqlite3_free);
+ sqlite3_result_subtype(pCtx, FTS5_LOCALE_SUBTYPE);
+ }
+}
+
+/*
** Return true if zName is the extension on one of the shadow tables used
** by this module.
*/
@@ -3061,10 +3750,12 @@ static int fts5Init(sqlite3 *db){
void *p = (void*)pGlobal;
memset(pGlobal, 0, sizeof(Fts5Global));
pGlobal->db = db;
- pGlobal->api.iVersion = 2;
+ pGlobal->api.iVersion = 3;
pGlobal->api.xCreateFunction = fts5CreateAux;
pGlobal->api.xCreateTokenizer = fts5CreateTokenizer;
pGlobal->api.xFindTokenizer = fts5FindTokenizer;
+ pGlobal->api.xCreateTokenizer_v2 = fts5CreateTokenizer_v2;
+ pGlobal->api.xFindTokenizer_v2 = fts5FindTokenizer_v2;
rc = sqlite3_create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy);
if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db);
if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(pGlobal, db);
@@ -3083,6 +3774,13 @@ static int fts5Init(sqlite3 *db){
p, fts5SourceIdFunc, 0, 0
);
}
+ if( rc==SQLITE_OK ){
+ rc = sqlite3_create_function(
+ db, "fts5_locale", 2,
+ SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_RESULT_SUBTYPE,
+ p, fts5LocaleFunc, 0, 0
+ );
+ }
}
/* If SQLITE_FTS5_ENABLE_TEST_MI is defined, assume that the file
diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c
index 0b676e6b4..cf25eb361 100644
--- a/ext/fts5/fts5_storage.c
+++ b/ext/fts5/fts5_storage.c
@@ -16,13 +16,40 @@
#include "fts5Int.h"
+/*
+** pSavedRow:
+** SQL statement FTS5_STMT_LOOKUP2 is a copy of FTS5_STMT_LOOKUP, it
+** does a by-rowid lookup to retrieve a single row from the %_content
+** table or equivalent external-content table/view.
+**
+** However, FTS5_STMT_LOOKUP2 is only used when retrieving the original
+** values for a row being UPDATEd. In that case, the SQL statement is
+** not reset and pSavedRow is set to point at it. This is so that the
+** insert operation that follows the delete may access the original
+** row values for any new values for which sqlite3_value_nochange() returns
+** true. i.e. if the user executes:
+**
+** CREATE VIRTUAL TABLE ft USING fts5(a, b, c, locale=1);
+** ...
+** UPDATE fts SET a=?, b=? WHERE rowid=?;
+**
+** then the value passed to the xUpdate() method of this table as the
+** new.c value is an sqlite3_value_nochange() value. So in this case it
+** must be read from the saved row stored in Fts5Storage.pSavedRow.
+**
+** This is necessary - using sqlite3_value_nochange() instead of just having
+** SQLite pass the original value back via xUpdate() - so as not to discard
+** any locale information associated with such values.
+**
+*/
struct Fts5Storage {
Fts5Config *pConfig;
Fts5Index *pIndex;
int bTotalsValid; /* True if nTotalRow/aTotalSize[] are valid */
i64 nTotalRow; /* Total number of rows in FTS table */
i64 *aTotalSize; /* Total sizes of each column */
- sqlite3_stmt *aStmt[11];
+ sqlite3_stmt *pSavedRow;
+ sqlite3_stmt *aStmt[12];
};
@@ -36,14 +63,15 @@ struct Fts5Storage {
# error "FTS5_STMT_LOOKUP mismatch"
#endif
-#define FTS5_STMT_INSERT_CONTENT 3
-#define FTS5_STMT_REPLACE_CONTENT 4
-#define FTS5_STMT_DELETE_CONTENT 5
-#define FTS5_STMT_REPLACE_DOCSIZE 6
-#define FTS5_STMT_DELETE_DOCSIZE 7
-#define FTS5_STMT_LOOKUP_DOCSIZE 8
-#define FTS5_STMT_REPLACE_CONFIG 9
-#define FTS5_STMT_SCAN 10
+#define FTS5_STMT_LOOKUP2 3
+#define FTS5_STMT_INSERT_CONTENT 4
+#define FTS5_STMT_REPLACE_CONTENT 5
+#define FTS5_STMT_DELETE_CONTENT 6
+#define FTS5_STMT_REPLACE_DOCSIZE 7
+#define FTS5_STMT_DELETE_DOCSIZE 8
+#define FTS5_STMT_LOOKUP_DOCSIZE 9
+#define FTS5_STMT_REPLACE_CONFIG 10
+#define FTS5_STMT_SCAN 11
/*
** Prepare the two insert statements - Fts5Storage.pInsertContent and
@@ -73,6 +101,7 @@ static int fts5StorageGetStmt(
"SELECT %s FROM %s T WHERE T.%Q >= ? AND T.%Q <= ? ORDER BY T.%Q ASC",
"SELECT %s FROM %s T WHERE T.%Q <= ? AND T.%Q >= ? ORDER BY T.%Q DESC",
"SELECT %s FROM %s T WHERE T.%Q=?", /* LOOKUP */
+ "SELECT %s FROM %s T WHERE T.%Q=?", /* LOOKUP2 */
"INSERT INTO %Q.'%q_content' VALUES(%s)", /* INSERT_CONTENT */
"REPLACE INTO %Q.'%q_content' VALUES(%s)", /* REPLACE_CONTENT */
@@ -88,6 +117,8 @@ static int fts5StorageGetStmt(
Fts5Config *pC = p->pConfig;
char *zSql = 0;
+ assert( ArraySize(azStmt)==ArraySize(p->aStmt) );
+
switch( eStmt ){
case FTS5_STMT_SCAN:
zSql = sqlite3_mprintf(azStmt[eStmt],
@@ -104,6 +135,7 @@ static int fts5StorageGetStmt(
break;
case FTS5_STMT_LOOKUP:
+ case FTS5_STMT_LOOKUP2:
zSql = sqlite3_mprintf(azStmt[eStmt],
pC->zContentExprlist, pC->zContent, pC->zContentRowid
);
@@ -150,7 +182,7 @@ static int fts5StorageGetStmt(
rc = SQLITE_NOMEM;
}else{
int f = SQLITE_PREPARE_PERSISTENT;
- if( eStmt>FTS5_STMT_LOOKUP ) f |= SQLITE_PREPARE_NO_VTAB;
+ if( eStmt>FTS5_STMT_LOOKUP2 ) f |= SQLITE_PREPARE_NO_VTAB;
p->pConfig->bLock++;
rc = sqlite3_prepare_v3(pC->db, zSql, -1, f, &p->aStmt[eStmt], 0);
p->pConfig->bLock--;
@@ -400,14 +432,48 @@ static int fts5StorageInsertCallback(
}
/*
+** This function is used as part of an UPDATE statement that modifies the
+** rowid of a row. In that case, this function is called first to set
+** Fts5Storage.pSavedRow to point to a statement that may be used to
+** access the original values of the row being deleted - iDel.
+**
+** SQLITE_OK is returned if successful, or an SQLite error code otherwise.
+** It is not considered an error if row iDel does not exist. In this case
+** pSavedRow is not set and SQLITE_OK returned.
+*/
+int sqlite3Fts5StorageFindDeleteRow(Fts5Storage *p, i64 iDel){
+ int rc = SQLITE_OK;
+ sqlite3_stmt *pSeek = 0;
+
+ assert( p->pSavedRow==0 );
+ rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP+1, &pSeek, 0);
+ if( rc==SQLITE_OK ){
+ sqlite3_bind_int64(pSeek, 1, iDel);
+ if( sqlite3_step(pSeek)!=SQLITE_ROW ){
+ rc = sqlite3_reset(pSeek);
+ }else{
+ p->pSavedRow = pSeek;
+ }
+ }
+
+ return rc;
+}
+
+/*
** If a row with rowid iDel is present in the %_content table, add the
** delete-markers to the FTS index necessary to delete it. Do not actually
** remove the %_content row at this time though.
+**
+** If parameter bSaveRow is true, then Fts5Storage.pSavedRow is left
+** pointing to a statement (FTS5_STMT_LOOKUP2) that may be used to access
+** the original values of the row being deleted. This is used by UPDATE
+** statements.
*/
static int fts5StorageDeleteFromIndex(
Fts5Storage *p,
i64 iDel,
- sqlite3_value **apVal
+ sqlite3_value **apVal,
+ int bSaveRow /* True to set pSavedRow */
){
Fts5Config *pConfig = p->pConfig;
sqlite3_stmt *pSeek = 0; /* SELECT to read row iDel from %_data */
@@ -416,12 +482,21 @@ static int fts5StorageDeleteFromIndex(
int iCol;
Fts5InsertCtx ctx;
+ assert( bSaveRow==0 || apVal==0 );
+ assert( bSaveRow==0 || bSaveRow==1 );
+ assert( FTS5_STMT_LOOKUP2==FTS5_STMT_LOOKUP+1 );
+
if( apVal==0 ){
- rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP, &pSeek, 0);
- if( rc!=SQLITE_OK ) return rc;
- sqlite3_bind_int64(pSeek, 1, iDel);
- if( sqlite3_step(pSeek)!=SQLITE_ROW ){
- return sqlite3_reset(pSeek);
+ if( p->pSavedRow && bSaveRow ){
+ pSeek = p->pSavedRow;
+ p->pSavedRow = 0;
+ }else{
+ rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP+bSaveRow, &pSeek, 0);
+ if( rc!=SQLITE_OK ) return rc;
+ sqlite3_bind_int64(pSeek, 1, iDel);
+ if( sqlite3_step(pSeek)!=SQLITE_ROW ){
+ return sqlite3_reset(pSeek);
+ }
}
}
@@ -429,26 +504,32 @@ static int fts5StorageDeleteFromIndex(
ctx.iCol = -1;
for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){
if( pConfig->abUnindexed[iCol-1]==0 ){
- const char *zText;
- int nText;
+ sqlite3_value *pVal = 0;
+ const char *pText = 0;
+ int nText = 0;
+ int bReset = 0;
+
assert( pSeek==0 || apVal==0 );
assert( pSeek!=0 || apVal!=0 );
if( pSeek ){
- zText = (const char*)sqlite3_column_text(pSeek, iCol);
- nText = sqlite3_column_bytes(pSeek, iCol);
- }else if( ALWAYS(apVal) ){
- zText = (const char*)sqlite3_value_text(apVal[iCol-1]);
- nText = sqlite3_value_bytes(apVal[iCol-1]);
+ pVal = sqlite3_column_value(pSeek, iCol);
}else{
- continue;
+ pVal = apVal[iCol-1];
}
- ctx.szCol = 0;
- rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT,
- zText, nText, (void*)&ctx, fts5StorageInsertCallback
+
+ rc = sqlite3Fts5ExtractText(
+ pConfig, pVal, pSeek!=0, &bReset, &pText, &nText
);
- p->aTotalSize[iCol-1] -= (i64)ctx.szCol;
- if( p->aTotalSize[iCol-1]<0 && rc==SQLITE_OK ){
- rc = FTS5_CORRUPT;
+ if( rc==SQLITE_OK ){
+ ctx.szCol = 0;
+ rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT,
+ pText, nText, (void*)&ctx, fts5StorageInsertCallback
+ );
+ p->aTotalSize[iCol-1] -= (i64)ctx.szCol;
+ if( rc==SQLITE_OK && p->aTotalSize[iCol-1]<0 ){
+ rc = FTS5_CORRUPT;
+ }
+ if( bReset ) sqlite3Fts5ClearLocale(pConfig);
}
}
}
@@ -458,12 +539,30 @@ static int fts5StorageDeleteFromIndex(
p->nTotalRow--;
}
- rc2 = sqlite3_reset(pSeek);
- if( rc==SQLITE_OK ) rc = rc2;
+ if( rc==SQLITE_OK && bSaveRow ){
+ assert( p->pSavedRow==0 );
+ p->pSavedRow = pSeek;
+ }else{
+ rc2 = sqlite3_reset(pSeek);
+ if( rc==SQLITE_OK ) rc = rc2;
+ }
return rc;
}
/*
+** Reset any saved statement pSavedRow. Zero pSavedRow as well. This
+** should be called by the xUpdate() method of the fts5 table before
+** returning from any operation that may have set Fts5Storage.pSavedRow.
+*/
+void sqlite3Fts5StorageReleaseDeleteRow(Fts5Storage *pStorage){
+ assert( pStorage->pSavedRow==0
+ || pStorage->pSavedRow==pStorage->aStmt[FTS5_STMT_LOOKUP2]
+ );
+ sqlite3_reset(pStorage->pSavedRow);
+ pStorage->pSavedRow = 0;
+}
+
+/*
** This function is called to process a DELETE on a contentless_delete=1
** table. It adds the tombstone required to delete the entry with rowid
** iDel. If successful, SQLITE_OK is returned. Or, if an error occurs,
@@ -519,12 +618,12 @@ static int fts5StorageInsertDocsize(
rc = sqlite3Fts5IndexGetOrigin(p->pIndex, &iOrigin);
sqlite3_bind_int64(pReplace, 3, iOrigin);
}
- if( rc==SQLITE_OK ){
- sqlite3_bind_blob(pReplace, 2, pBuf->p, pBuf->n, SQLITE_STATIC);
- sqlite3_step(pReplace);
- rc = sqlite3_reset(pReplace);
- sqlite3_bind_null(pReplace, 2);
- }
+ }
+ if( rc==SQLITE_OK ){
+ sqlite3_bind_blob(pReplace, 2, pBuf->p, pBuf->n, SQLITE_STATIC);
+ sqlite3_step(pReplace);
+ rc = sqlite3_reset(pReplace);
+ sqlite3_bind_null(pReplace, 2);
}
}
return rc;
@@ -578,7 +677,12 @@ static int fts5StorageSaveTotals(Fts5Storage *p){
/*
** Remove a row from the FTS table.
*/
-int sqlite3Fts5StorageDelete(Fts5Storage *p, i64 iDel, sqlite3_value **apVal){
+int sqlite3Fts5StorageDelete(
+ Fts5Storage *p, /* Storage object */
+ i64 iDel, /* Rowid to delete from table */
+ sqlite3_value **apVal, /* Optional - values to remove from index */
+ int bSaveRow /* If true, set pSavedRow for deleted row */
+){
Fts5Config *pConfig = p->pConfig;
int rc;
sqlite3_stmt *pDel = 0;
@@ -595,7 +699,7 @@ int sqlite3Fts5StorageDelete(Fts5Storage *p, i64 iDel, sqlite3_value **apVal){
if( p->pConfig->bContentlessDelete ){
rc = fts5StorageContentlessDelete(p, iDel);
}else{
- rc = fts5StorageDeleteFromIndex(p, iDel, apVal);
+ rc = fts5StorageDeleteFromIndex(p, iDel, apVal, bSaveRow);
}
}
@@ -684,14 +788,21 @@ int sqlite3Fts5StorageRebuild(Fts5Storage *p){
for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
ctx.szCol = 0;
if( pConfig->abUnindexed[ctx.iCol]==0 ){
- const char *zText = (const char*)sqlite3_column_text(pScan, ctx.iCol+1);
- int nText = sqlite3_column_bytes(pScan, ctx.iCol+1);
- rc = sqlite3Fts5Tokenize(pConfig,
- FTS5_TOKENIZE_DOCUMENT,
- zText, nText,
- (void*)&ctx,
- fts5StorageInsertCallback
- );
+ int bReset = 0; /* True if tokenizer locale must be reset */
+ int nText = 0; /* Size of pText in bytes */
+ const char *pText = 0; /* Pointer to buffer containing text value */
+ sqlite3_value *pVal = sqlite3_column_value(pScan, ctx.iCol+1);
+
+ rc = sqlite3Fts5ExtractText(pConfig, pVal, 1, &bReset, &pText, &nText);
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5Tokenize(pConfig,
+ FTS5_TOKENIZE_DOCUMENT,
+ pText, nText,
+ (void*)&ctx,
+ fts5StorageInsertCallback
+ );
+ if( bReset ) sqlite3Fts5ClearLocale(pConfig);
+ }
}
sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
p->aTotalSize[ctx.iCol] += (i64)ctx.szCol;
@@ -775,7 +886,31 @@ int sqlite3Fts5StorageContentInsert(
int i; /* Counter variable */
rc = fts5StorageGetStmt(p, FTS5_STMT_INSERT_CONTENT, &pInsert, 0);
for(i=1; rc==SQLITE_OK && i<=pConfig->nCol+1; i++){
- rc = sqlite3_bind_value(pInsert, i, apVal[i]);
+ sqlite3_value *pVal = apVal[i];
+ if( sqlite3_value_nochange(pVal) && p->pSavedRow ){
+ /* This is an UPDATE statement, and column (i-2) was not modified.
+ ** Retrieve the value from Fts5Storage.pSavedRow instead. */
+ pVal = sqlite3_column_value(p->pSavedRow, i-1);
+ }else if( sqlite3_value_subtype(pVal)==FTS5_LOCALE_SUBTYPE ){
+ assert( pConfig->bLocale );
+ assert( i>1 );
+ if( pConfig->abUnindexed[i-2] ){
+ /* At attempt to insert an fts5_locale() value into an UNINDEXED
+ ** column. Strip the locale away and just bind the text. */
+ const char *pText = 0;
+ int nText = 0;
+ rc = sqlite3Fts5ExtractText(pConfig, pVal, 0, 0, &pText, &nText);
+ sqlite3_bind_text(pInsert, i, pText, nText, SQLITE_TRANSIENT);
+ }else{
+ const u8 *pBlob = (const u8*)sqlite3_value_blob(pVal);
+ int nBlob = sqlite3_value_bytes(pVal);
+ assert( nBlob>4 );
+ sqlite3_bind_blob(pInsert, i, pBlob+4, nBlob-4, SQLITE_TRANSIENT);
+ }
+ continue;
+ }
+
+ rc = sqlite3_bind_value(pInsert, i, pVal);
}
if( rc==SQLITE_OK ){
sqlite3_step(pInsert);
@@ -810,14 +945,24 @@ int sqlite3Fts5StorageIndexInsert(
for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
ctx.szCol = 0;
if( pConfig->abUnindexed[ctx.iCol]==0 ){
- const char *zText = (const char*)sqlite3_value_text(apVal[ctx.iCol+2]);
- int nText = sqlite3_value_bytes(apVal[ctx.iCol+2]);
- rc = sqlite3Fts5Tokenize(pConfig,
- FTS5_TOKENIZE_DOCUMENT,
- zText, nText,
- (void*)&ctx,
- fts5StorageInsertCallback
- );
+ int bReset = 0; /* True if tokenizer locale must be reset */
+ int nText = 0; /* Size of pText in bytes */
+ const char *pText = 0; /* Pointer to buffer containing text value */
+ sqlite3_value *pVal = apVal[ctx.iCol+2];
+ int bDisk = 0;
+ if( p->pSavedRow && sqlite3_value_nochange(pVal) ){
+ pVal = sqlite3_column_value(p->pSavedRow, ctx.iCol+1);
+ bDisk = 1;
+ }
+ rc = sqlite3Fts5ExtractText(pConfig, pVal, bDisk, &bReset, &pText,&nText);
+ if( rc==SQLITE_OK ){
+ assert( bReset==0 || pConfig->bLocale );
+ rc = sqlite3Fts5Tokenize(pConfig,
+ FTS5_TOKENIZE_DOCUMENT, pText, nText, (void*)&ctx,
+ fts5StorageInsertCallback
+ );
+ if( bReset ) sqlite3Fts5ClearLocale(pConfig);
+ }
}
sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
p->aTotalSize[ctx.iCol] += (i64)ctx.szCol;
@@ -988,14 +1133,22 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p, int iArg){
rc = sqlite3Fts5TermsetNew(&ctx.pTermset);
}
if( rc==SQLITE_OK ){
- const char *zText = (const char*)sqlite3_column_text(pScan, i+1);
- int nText = sqlite3_column_bytes(pScan, i+1);
- rc = sqlite3Fts5Tokenize(pConfig,
- FTS5_TOKENIZE_DOCUMENT,
- zText, nText,
- (void*)&ctx,
- fts5StorageIntegrityCallback
+ int bReset = 0; /* True if tokenizer locale must be reset */
+ int nText = 0; /* Size of pText in bytes */
+ const char *pText = 0; /* Pointer to buffer containing text value */
+
+ rc = sqlite3Fts5ExtractText(pConfig,
+ sqlite3_column_value(pScan, i+1), 1, &bReset, &pText, &nText
);
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5Tokenize(pConfig,
+ FTS5_TOKENIZE_DOCUMENT,
+ pText, nText,
+ (void*)&ctx,
+ fts5StorageIntegrityCallback
+ );
+ if( bReset ) sqlite3Fts5ClearLocale(pConfig);
+ }
}
if( rc==SQLITE_OK && pConfig->bColumnsize && ctx.szCol!=aColSize[i] ){
rc = FTS5_CORRUPT;
diff --git a/ext/fts5/fts5_tcl.c b/ext/fts5/fts5_tcl.c
index b67b037c4..1e9f7bbb6 100644
--- a/ext/fts5/fts5_tcl.c
+++ b/ext/fts5/fts5_tcl.c
@@ -240,6 +240,7 @@ static int SQLITE_TCLAPI xF5tApi(
{ "xQueryToken", 2, "IPHRASE ITERM" }, /* 18 */
{ "xInstToken", 2, "IDX ITERM" }, /* 19 */
+ { "xColumnLocale", 1, "COL" }, /* 20 */
{ 0, 0, 0}
};
@@ -528,6 +529,20 @@ static int SQLITE_TCLAPI xF5tApi(
break;
}
+ CASE(20, "xColumnLocale") {
+ const char *z = 0;
+ int n = 0;
+ int iCol;
+ if( Tcl_GetIntFromObj(interp, objv[2], &iCol) ){
+ return TCL_ERROR;
+ }
+ rc = p->pApi->xColumnLocale(p->pFts, iCol, &z, &n);
+ if( rc==SQLITE_OK && z ){
+ Tcl_SetObjResult(interp, Tcl_NewStringObj(z, n));
+ }
+ break;
+ }
+
default:
assert( 0 );
break;
@@ -796,18 +811,32 @@ typedef struct F5tTokenizerInstance F5tTokenizerInstance;
struct F5tTokenizerContext {
void *pCtx;
int (*xToken)(void*, int, const char*, int, int, int);
+ F5tTokenizerInstance *pInst;
};
struct F5tTokenizerModule {
Tcl_Interp *interp;
Tcl_Obj *pScript;
+ void *pParentCtx;
+ fts5_tokenizer_v2 parent_v2;
+ fts5_tokenizer parent;
F5tTokenizerContext *pContext;
};
+/*
+** zLocale:
+** Within a call to xTokenize_v2(), pLocale/nLocale store the locale
+** passed to the call by fts5. This can be retrieved by a Tcl tokenize
+** script using [sqlite3_fts5_locale].
+*/
struct F5tTokenizerInstance {
Tcl_Interp *interp;
Tcl_Obj *pScript;
+ F5tTokenizerModule *pModule;
+ Fts5Tokenizer *pParent;
F5tTokenizerContext *pContext;
+ const char *pLocale;
+ int nLocale;
};
static int f5tTokenizerCreate(
@@ -816,11 +845,20 @@ static int f5tTokenizerCreate(
int nArg,
Fts5Tokenizer **ppOut
){
+ Fts5Tokenizer *pParent = 0;
F5tTokenizerModule *pMod = (F5tTokenizerModule*)pCtx;
Tcl_Obj *pEval;
int rc = TCL_OK;
int i;
+ assert( pMod->parent_v2.xCreate==0 || pMod->parent.xCreate==0 );
+ if( pMod->parent_v2.xCreate ){
+ rc = pMod->parent_v2.xCreate(pMod->pParentCtx, 0, 0, &pParent);
+ }
+ if( pMod->parent.xCreate ){
+ rc = pMod->parent.xCreate(pMod->pParentCtx, 0, 0, &pParent);
+ }
+
pEval = Tcl_DuplicateObj(pMod->pScript);
Tcl_IncrRefCount(pEval);
for(i=0; rc==TCL_OK && i<nArg; i++){
@@ -840,6 +878,8 @@ static int f5tTokenizerCreate(
pInst->interp = pMod->interp;
pInst->pScript = Tcl_GetObjResult(pMod->interp);
pInst->pContext = pMod->pContext;
+ pInst->pParent = pParent;
+ pInst->pModule = pMod;
Tcl_IncrRefCount(pInst->pScript);
*ppOut = (Fts5Tokenizer*)pInst;
}
@@ -850,11 +890,21 @@ static int f5tTokenizerCreate(
static void f5tTokenizerDelete(Fts5Tokenizer *p){
F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p;
- Tcl_DecrRefCount(pInst->pScript);
- ckfree((char *)pInst);
+ if( pInst ){
+ if( pInst->pParent ){
+ if( pInst->pModule->parent_v2.xDelete ){
+ pInst->pModule->parent_v2.xDelete(pInst->pParent);
+ }else{
+ pInst->pModule->parent.xDelete(pInst->pParent);
+ }
+ }
+ Tcl_DecrRefCount(pInst->pScript);
+ ckfree((char *)pInst);
+ }
}
-static int f5tTokenizerTokenize(
+
+static int f5tTokenizerReallyTokenize(
Fts5Tokenizer *p,
void *pCtx,
int flags,
@@ -862,6 +912,7 @@ static int f5tTokenizerTokenize(
int (*xToken)(void*, int, const char*, int, int, int)
){
F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p;
+ F5tTokenizerInstance *pOldInst = 0;
void *pOldCtx;
int (*xOldToken)(void*, int, const char*, int, int, int);
Tcl_Obj *pEval;
@@ -870,9 +921,11 @@ static int f5tTokenizerTokenize(
pOldCtx = pInst->pContext->pCtx;
xOldToken = pInst->pContext->xToken;
+ pOldInst = pInst->pContext->pInst;
pInst->pContext->pCtx = pCtx;
pInst->pContext->xToken = xToken;
+ pInst->pContext->pInst = pInst;
assert(
flags==FTS5_TOKENIZE_DOCUMENT
@@ -908,9 +961,105 @@ static int f5tTokenizerTokenize(
pInst->pContext->pCtx = pOldCtx;
pInst->pContext->xToken = xOldToken;
+ pInst->pContext->pInst = pOldInst;
return rc;
}
+typedef struct CallbackCtx CallbackCtx;
+struct CallbackCtx {
+ Fts5Tokenizer *p;
+ void *pCtx;
+ int flags;
+ int (*xToken)(void*, int, const char*, int, int, int);
+};
+
+static int f5tTokenizeCallback(
+ void *pCtx,
+ int tflags,
+ const char *z, int n,
+ int iStart, int iEnd
+){
+ CallbackCtx *p = (CallbackCtx*)pCtx;
+ return f5tTokenizerReallyTokenize(p->p, p->pCtx, p->flags, z, n, p->xToken);
+}
+
+static int f5tTokenizerTokenize_v2(
+ Fts5Tokenizer *p,
+ void *pCtx,
+ int flags,
+ const char *pText, int nText,
+ const char *pLoc, int nLoc,
+ int (*xToken)(void*, int, const char*, int, int, int)
+){
+ int rc = SQLITE_OK;
+ F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p;
+
+ pInst->pLocale = pLoc;
+ pInst->nLocale = nLoc;
+
+ if( pInst->pParent ){
+ CallbackCtx ctx;
+ ctx.p = p;
+ ctx.pCtx = pCtx;
+ ctx.flags = flags;
+ ctx.xToken = xToken;
+ if( pInst->pModule->parent_v2.xTokenize ){
+ rc = pInst->pModule->parent_v2.xTokenize(
+ pInst->pParent, (void*)&ctx, flags, pText, nText,
+ pLoc, nLoc, f5tTokenizeCallback
+ );
+ }else{
+ rc = pInst->pModule->parent.xTokenize(
+ pInst->pParent, (void*)&ctx, flags, pText, nText, f5tTokenizeCallback
+ );
+ }
+ }else{
+ rc = f5tTokenizerReallyTokenize(p, pCtx, flags, pText, nText, xToken);
+ }
+
+ pInst->pLocale = 0;
+ pInst->nLocale = 0;
+ return rc;
+}
+static int f5tTokenizerTokenize(
+ Fts5Tokenizer *p,
+ void *pCtx,
+ int flags,
+ const char *pText, int nText,
+ int (*xToken)(void*, int, const char*, int, int, int)
+){
+ return f5tTokenizerTokenize_v2(p, pCtx, flags, pText, nText, 0, 0, xToken);
+}
+
+/*
+** sqlite3_fts5_locale
+*/
+static int SQLITE_TCLAPI f5tTokenizerLocale(
+ void * clientData,
+ Tcl_Interp *interp,
+ int objc,
+ Tcl_Obj *CONST objv[]
+){
+ F5tTokenizerContext *p = (F5tTokenizerContext*)clientData;
+
+ if( objc!=1 ){
+ Tcl_WrongNumArgs(interp, 1, objv, "");
+ return TCL_ERROR;
+ }
+
+ if( p->xToken==0 ){
+ Tcl_AppendResult(interp,
+ "sqlite3_fts5_locale may only be used by tokenizer callback", 0
+ );
+ return TCL_ERROR;
+ }
+
+ Tcl_SetObjResult(interp,
+ Tcl_NewStringObj(p->pInst->pLocale, p->pInst->nLocale)
+ );
+ return TCL_OK;
+}
+
/*
** sqlite3_fts5_token ?-colocated? TEXT START END
*/
@@ -996,32 +1145,112 @@ static int SQLITE_TCLAPI f5tCreateTokenizer(
fts5_api *pApi;
char *zName;
Tcl_Obj *pScript;
- fts5_tokenizer t;
F5tTokenizerModule *pMod;
- int rc;
+ int rc = SQLITE_OK;
+ int bV2 = 0; /* True to use _v2 API */
+ int iVersion = 2; /* Value for _v2.iVersion */
+ const char *zParent = 0; /* Name of parent tokenizer, if any */
+ int ii = 0;
- if( objc!=4 ){
- Tcl_WrongNumArgs(interp, 1, objv, "DB NAME SCRIPT");
+ if( objc<4 ){
+ Tcl_WrongNumArgs(interp, 1, objv, "?OPTIONS? DB NAME SCRIPT");
return TCL_ERROR;
}
- if( f5tDbAndApi(interp, objv[1], &db, &pApi) ){
- return TCL_ERROR;
+
+ /* Parse any options. Set stack variables bV2 and zParent. */
+ for(ii=1; ii<objc-3; ii++){
+ int iOpt = 0;
+ const char *azOpt[] = { "-v2", "-parent", "-version", 0 };
+ if( Tcl_GetIndexFromObj(interp, objv[ii], azOpt, "OPTION", 0, &iOpt) ){
+ return TCL_ERROR;
+ }
+ switch( iOpt ){
+ case 0: /* -v2 */ {
+ bV2 = 1;
+ break;
+ }
+ case 1: /* -parent */ {
+ ii++;
+ if( ii==objc-3 ){
+ Tcl_AppendResult(
+ interp, "option requires an argument: -parent", (char*)0
+ );
+ return TCL_ERROR;
+ }
+ zParent = Tcl_GetString(objv[ii]);
+ break;
+ }
+ case 2: /* -version */ {
+ ii++;
+ if( ii==objc-3 ){
+ Tcl_AppendResult(
+ interp, "option requires an argument: -version", (char*)0
+ );
+ return TCL_ERROR;
+ }
+ if( Tcl_GetIntFromObj(interp, objv[ii], &iVersion) ){
+ return TCL_ERROR;
+ }
+ break;
+ }
+ default:
+ assert( 0 );
+ break;
+ }
}
- zName = Tcl_GetString(objv[2]);
- pScript = objv[3];
- t.xCreate = f5tTokenizerCreate;
- t.xTokenize = f5tTokenizerTokenize;
- t.xDelete = f5tTokenizerDelete;
+ if( f5tDbAndApi(interp, objv[objc-3], &db, &pApi) ){
+ return TCL_ERROR;
+ }
+ zName = Tcl_GetString(objv[objc-2]);
+ pScript = objv[objc-1];
pMod = (F5tTokenizerModule*)ckalloc(sizeof(F5tTokenizerModule));
+ memset(pMod, 0, sizeof(F5tTokenizerModule));
pMod->interp = interp;
pMod->pScript = pScript;
- pMod->pContext = pContext;
Tcl_IncrRefCount(pScript);
- rc = pApi->xCreateTokenizer(pApi, zName, (void*)pMod, &t, f5tDelTokenizer);
+ pMod->pContext = pContext;
+ if( zParent ){
+ if( bV2 ){
+ fts5_tokenizer_v2 *pParent = 0;
+ rc = pApi->xFindTokenizer_v2(pApi, zParent, &pMod->pParentCtx, &pParent);
+ if( rc==SQLITE_OK ){
+ memcpy(&pMod->parent_v2, pParent, sizeof(fts5_tokenizer_v2));
+ pMod->parent_v2.xDelete(0);
+ }
+ }else{
+ rc = pApi->xFindTokenizer(pApi, zParent, &pMod->pParentCtx,&pMod->parent);
+ if( rc==SQLITE_OK ){
+ pMod->parent.xDelete(0);
+ }
+ }
+ }
+
+ if( rc==SQLITE_OK ){
+ void *pModCtx = (void*)pMod;
+ if( bV2==0 ){
+ fts5_tokenizer t;
+ t.xCreate = f5tTokenizerCreate;
+ t.xTokenize = f5tTokenizerTokenize;
+ t.xDelete = f5tTokenizerDelete;
+ rc = pApi->xCreateTokenizer(pApi, zName, pModCtx, &t, f5tDelTokenizer);
+ }else{
+ fts5_tokenizer_v2 t2;
+ memset(&t2, 0, sizeof(t2));
+ t2.iVersion = iVersion;
+ t2.xCreate = f5tTokenizerCreate;
+ t2.xTokenize = f5tTokenizerTokenize_v2;
+ t2.xDelete = f5tTokenizerDelete;
+ rc = pApi->xCreateTokenizer_v2(pApi, zName, pModCtx, &t2,f5tDelTokenizer);
+ }
+ }
+
if( rc!=SQLITE_OK ){
- Tcl_AppendResult(interp, "error in fts5_api.xCreateTokenizer()", 0);
+ Tcl_AppendResult(interp, (
+ bV2 ? "error in fts5_api.xCreateTokenizer_v2()"
+ : "error in fts5_api.xCreateTokenizer()"
+ ), 0);
return TCL_ERROR;
}
@@ -1328,6 +1557,7 @@ int Fts5tcl_Init(Tcl_Interp *interp){
} aCmd[] = {
{ "sqlite3_fts5_create_tokenizer", f5tCreateTokenizer, 1 },
{ "sqlite3_fts5_token", f5tTokenizerReturn, 1 },
+ { "sqlite3_fts5_locale", f5tTokenizerLocale, 1 },
{ "sqlite3_fts5_tokenize", f5tTokenize, 0 },
{ "sqlite3_fts5_create_function", f5tCreateFunction, 0 },
{ "sqlite3_fts5_may_be_corrupt", f5tMayBeCorrupt, 0 },
diff --git a/ext/fts5/fts5_tokenize.c b/ext/fts5/fts5_tokenize.c
index 9f5cd24c3..f92529b84 100644
--- a/ext/fts5/fts5_tokenize.c
+++ b/ext/fts5/fts5_tokenize.c
@@ -79,7 +79,7 @@ static int fts5AsciiCreate(
int i;
memset(p, 0, sizeof(AsciiTokenizer));
memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar));
- for(i=0; rc==SQLITE_OK && i<nArg-1; i+=2){
+ for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
const char *zArg = azArg[i+1];
if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){
fts5AsciiAddExceptions(p, zArg, 1);
@@ -90,7 +90,6 @@ static int fts5AsciiCreate(
rc = SQLITE_ERROR;
}
}
- if( rc==SQLITE_OK && i<nArg ) rc = SQLITE_ERROR;
if( rc!=SQLITE_OK ){
fts5AsciiDelete((Fts5Tokenizer*)p);
p = 0;
@@ -382,7 +381,7 @@ static int fts5UnicodeCreate(
}
/* Search for a "categories" argument */
- for(i=0; rc==SQLITE_OK && i<nArg-1; i+=2){
+ for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
if( 0==sqlite3_stricmp(azArg[i], "categories") ){
zCat = azArg[i+1];
}
@@ -391,7 +390,7 @@ static int fts5UnicodeCreate(
rc = unicodeSetCategories(p, zCat);
}
- for(i=0; rc==SQLITE_OK && i<nArg-1; i+=2){
+ for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
const char *zArg = azArg[i+1];
if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){
if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){
@@ -416,8 +415,6 @@ static int fts5UnicodeCreate(
rc = SQLITE_ERROR;
}
}
- if( i<nArg && rc==SQLITE_OK ) rc = SQLITE_ERROR;
-
}else{
rc = SQLITE_NOMEM;
}
@@ -556,7 +553,7 @@ static int fts5UnicodeTokenize(
typedef struct PorterTokenizer PorterTokenizer;
struct PorterTokenizer {
- fts5_tokenizer tokenizer; /* Parent tokenizer module */
+ fts5_tokenizer_v2 tokenizer_v2; /* Parent tokenizer module */
Fts5Tokenizer *pTokenizer; /* Parent tokenizer instance */
char aBuf[FTS5_PORTER_MAX_TOKEN + 64];
};
@@ -568,7 +565,7 @@ static void fts5PorterDelete(Fts5Tokenizer *pTok){
if( pTok ){
PorterTokenizer *p = (PorterTokenizer*)pTok;
if( p->pTokenizer ){
- p->tokenizer.xDelete(p->pTokenizer);
+ p->tokenizer_v2.xDelete(p->pTokenizer);
}
sqlite3_free(p);
}
@@ -587,6 +584,7 @@ static int fts5PorterCreate(
PorterTokenizer *pRet;
void *pUserdata = 0;
const char *zBase = "unicode61";
+ fts5_tokenizer_v2 *pV2 = 0;
if( nArg>0 ){
zBase = azArg[0];
@@ -595,14 +593,15 @@ static int fts5PorterCreate(
pRet = (PorterTokenizer*)sqlite3_malloc(sizeof(PorterTokenizer));
if( pRet ){
memset(pRet, 0, sizeof(PorterTokenizer));
- rc = pApi->xFindTokenizer(pApi, zBase, &pUserdata, &pRet->tokenizer);
+ rc = pApi->xFindTokenizer_v2(pApi, zBase, &pUserdata, &pV2);
}else{
rc = SQLITE_NOMEM;
}
if( rc==SQLITE_OK ){
int nArg2 = (nArg>0 ? nArg-1 : 0);
- const char **azArg2 = (nArg2 ? &azArg[1] : 0);
- rc = pRet->tokenizer.xCreate(pUserdata, azArg2, nArg2, &pRet->pTokenizer);
+ const char **az2 = (nArg2 ? &azArg[1] : 0);
+ memcpy(&pRet->tokenizer_v2, pV2, sizeof(fts5_tokenizer_v2));
+ rc = pRet->tokenizer_v2.xCreate(pUserdata, az2, nArg2, &pRet->pTokenizer);
}
if( rc!=SQLITE_OK ){
@@ -1253,6 +1252,7 @@ static int fts5PorterTokenize(
void *pCtx,
int flags,
const char *pText, int nText,
+ const char *pLoc, int nLoc,
int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
){
PorterTokenizer *p = (PorterTokenizer*)pTokenizer;
@@ -1260,8 +1260,8 @@ static int fts5PorterTokenize(
sCtx.xToken = xToken;
sCtx.pCtx = pCtx;
sCtx.aBuf = p->aBuf;
- return p->tokenizer.xTokenize(
- p->pTokenizer, (void*)&sCtx, flags, pText, nText, fts5PorterCb
+ return p->tokenizer_v2.xTokenize(
+ p->pTokenizer, (void*)&sCtx, flags, pText, nText, pLoc, nLoc, fts5PorterCb
);
}
@@ -1291,41 +1291,46 @@ static int fts5TriCreate(
Fts5Tokenizer **ppOut
){
int rc = SQLITE_OK;
- TrigramTokenizer *pNew = (TrigramTokenizer*)sqlite3_malloc(sizeof(*pNew));
+ TrigramTokenizer *pNew = 0;
UNUSED_PARAM(pUnused);
- if( pNew==0 ){
- rc = SQLITE_NOMEM;
+ if( nArg%2 ){
+ rc = SQLITE_ERROR;
}else{
int i;
- pNew->bFold = 1;
- pNew->iFoldParam = 0;
- for(i=0; rc==SQLITE_OK && i<nArg-1; i+=2){
- const char *zArg = azArg[i+1];
- if( 0==sqlite3_stricmp(azArg[i], "case_sensitive") ){
- if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1] ){
- rc = SQLITE_ERROR;
+ pNew = (TrigramTokenizer*)sqlite3_malloc(sizeof(*pNew));
+ if( pNew==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ pNew->bFold = 1;
+ pNew->iFoldParam = 0;
+
+ for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
+ const char *zArg = azArg[i+1];
+ if( 0==sqlite3_stricmp(azArg[i], "case_sensitive") ){
+ if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1] ){
+ rc = SQLITE_ERROR;
+ }else{
+ pNew->bFold = (zArg[0]=='0');
+ }
+ }else if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){
+ if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){
+ rc = SQLITE_ERROR;
+ }else{
+ pNew->iFoldParam = (zArg[0]!='0') ? 2 : 0;
+ }
}else{
- pNew->bFold = (zArg[0]=='0');
- }
- }else if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){
- if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){
rc = SQLITE_ERROR;
- }else{
- pNew->iFoldParam = (zArg[0]!='0') ? 2 : 0;
}
- }else{
+ }
+
+ if( pNew->iFoldParam!=0 && pNew->bFold==0 ){
rc = SQLITE_ERROR;
}
- }
- if( i<nArg && rc==SQLITE_OK ) rc = SQLITE_ERROR;
-
- if( pNew->iFoldParam!=0 && pNew->bFold==0 ){
- rc = SQLITE_ERROR;
- }
-
- if( rc!=SQLITE_OK ){
- fts5TriDelete((Fts5Tokenizer*)pNew);
- pNew = 0;
+
+ if( rc!=SQLITE_OK ){
+ fts5TriDelete((Fts5Tokenizer*)pNew);
+ pNew = 0;
+ }
}
}
*ppOut = (Fts5Tokenizer*)pNew;
@@ -1450,7 +1455,6 @@ int sqlite3Fts5TokenizerInit(fts5_api *pApi){
} aBuiltin[] = {
{ "unicode61", {fts5UnicodeCreate, fts5UnicodeDelete, fts5UnicodeTokenize}},
{ "ascii", {fts5AsciiCreate, fts5AsciiDelete, fts5AsciiTokenize }},
- { "porter", {fts5PorterCreate, fts5PorterDelete, fts5PorterTokenize }},
{ "trigram", {fts5TriCreate, fts5TriDelete, fts5TriTokenize}},
};
@@ -1465,6 +1469,19 @@ int sqlite3Fts5TokenizerInit(fts5_api *pApi){
0
);
}
-
+ if( rc==SQLITE_OK ){
+ fts5_tokenizer_v2 sPorter = {
+ 2,
+ fts5PorterCreate,
+ fts5PorterDelete,
+ fts5PorterTokenize
+ };
+ rc = pApi->xCreateTokenizer_v2(pApi,
+ "porter",
+ (void*)pApi,
+ &sPorter,
+ 0
+ );
+ }
return rc;
}
diff --git a/ext/fts5/fts5_unicode2.c b/ext/fts5/fts5_unicode2.c
index 3e97264fa..cc164a456 100644
--- a/ext/fts5/fts5_unicode2.c
+++ b/ext/fts5/fts5_unicode2.c
@@ -364,6 +364,9 @@ int sqlite3Fts5UnicodeCatParse(const char *zCat, u8 *aArray){
default: return 1; }
break;
+
+ default:
+ return 1;
}
return 0;
}
diff --git a/ext/fts5/test/fts5_common.tcl b/ext/fts5/test/fts5_common.tcl
index 7076a52bb..8ea87dbdd 100644
--- a/ext/fts5/test/fts5_common.tcl
+++ b/ext/fts5/test/fts5_common.tcl
@@ -51,6 +51,10 @@ proc fts5_test_poslist2 {cmd} {
sort_poslist $res
}
+proc fts5_test_insttoken {cmd iInst iToken} {
+ $cmd xInstToken $iInst $iToken
+}
+
proc fts5_test_collist {cmd} {
set res [list]
@@ -78,6 +82,9 @@ proc fts5_test_columnsize {cmd} {
proc fts5_columntext {cmd iCol} {
$cmd xColumnText $iCol
}
+proc fts5_columnlocale {cmd iCol} {
+ $cmd xColumnLocale $iCol
+}
proc fts5_test_columntext {cmd} {
set res [list]
@@ -87,6 +94,14 @@ proc fts5_test_columntext {cmd} {
set res
}
+proc fts5_test_columnlocale {cmd} {
+ set res [list]
+ for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
+ lappend res [$cmd xColumnLocale $i]
+ }
+ set res
+}
+
proc fts5_test_columntotalsize {cmd} {
set res [list]
for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
@@ -165,10 +180,12 @@ proc fts5_aux_test_functions {db} {
foreach f {
fts5_test_columnsize
fts5_test_columntext
+ fts5_test_columnlocale
fts5_test_columntotalsize
fts5_test_poslist
fts5_test_poslist2
fts5_test_collist
+ fts5_test_insttoken
fts5_test_tokenize
fts5_test_rowcount
fts5_test_rowid
@@ -177,6 +194,7 @@ proc fts5_aux_test_functions {db} {
fts5_test_queryphrase
fts5_test_phrasecount
fts5_columntext
+ fts5_columnlocale
fts5_queryphrase
fts5_collist
} {
diff --git a/ext/fts5/test/fts5ah.test b/ext/fts5/test/fts5ah.test
index bc8005783..bf9c9e9db 100644
--- a/ext/fts5/test/fts5ah.test
+++ b/ext/fts5/test/fts5ah.test
@@ -163,6 +163,17 @@ do_execsql_test 1.8.2 {
SELECT count(*) FROM t1 WHERE t1 MATCH 'x' AND rowid < 'text';
} {10000}
+do_execsql_test 1.8.3 {
+ SELECT count(*) FROM t1 WHERE t1 MATCH 'x' AND rowid<5000 AND rowid < 'text';
+} {4999}
+do_execsql_test 1.8.4 {
+ SELECT count(*) FROM t1 WHERE t1 MATCH 'x' AND rowid>5000 AND rowid > 'text';
+} {0}
+
+do_catchsql_test 1.9 {
+ SELECT * FROM t1('*xy');
+} {1 {unknown special query: xy}}
+
} ;# foreach_detail_mode
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}
diff --git a/ext/fts5/test/fts5al.test b/ext/fts5/test/fts5al.test
index b344e0d2e..7187ad67c 100644
--- a/ext/fts5/test/fts5al.test
+++ b/ext/fts5/test/fts5al.test
@@ -293,6 +293,16 @@ do_catchsql_test 4.4.4 {
SELECT *, rank FROM t3 WHERE t3 MATCH 'a' AND rank MATCH NULL
} {1 {parse error in rank function: }}
+# Check that the second and subsequent rank= constraints are ignored.
+#
+do_catchsql_test 4.3.3 {
+ SELECT *, rank FROM t3
+ WHERE t3 MATCH 'a' AND
+ rank MATCH 'nosuch()' AND
+ rank MATCH 'rowidmod(3)'
+ ORDER BY rank ASC
+} {1 {unable to use function MATCH in the requested context}}
+
} ;# foreach_detail_mode
diff --git a/ext/fts5/test/fts5blob.test b/ext/fts5/test/fts5blob.test
new file mode 100644
index 000000000..4233719fb
--- /dev/null
+++ b/ext/fts5/test/fts5blob.test
@@ -0,0 +1,166 @@
+# 2024 July 30
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#***********************************************************************
+#
+# This file verifies that:
+#
+# * blob values may be written to locale=0 tables.
+#
+# * blob values - other than fts5_locale() values - may not be written
+# to locale=0 tables. This is an SQLITE_MISMATCH error
+#
+# * blob values may be returned by queries on the external-content table
+# of a locale=0 table.
+#
+# * blob values not may be returned by queries on the external-content
+# table of a locale=1 table, apart from fts5_locale() blobs. This is an
+# SQLITE_MISMATCH error.
+#
+
+source [file join [file dirname [info script]] fts5_common.tcl]
+set testprefix fts5blob
+
+# If SQLITE_ENABLE_FTS5 is not defined, omit this file.
+ifcapable !fts5 {
+ finish_test
+ return
+}
+
+# Test that blobs may be stored in normal locale=0 tables.
+#
+foreach {tn enc} {
+ 1 utf8
+ 2 utf16
+} {
+ reset_db
+ fts5_aux_test_functions db
+
+ execsql "PRAGMA encoding = $enc"
+
+ execsql "
+ CREATE VIRTUAL TABLE t1 USING fts5(x, y);
+ "
+ do_execsql_test 1.$tn.0 {
+ CREATE VIRTUAL TABLE tt USING fts5vocab('t1', 'instance');
+ INSERT INTO t1(rowid, x, y) VALUES(1, 555, X'0000000041424320444546');
+ INSERT INTO t1(rowid, x, y) VALUES(2, 666, X'41424300444546');
+ INSERT INTO t1(rowid, x, y) VALUES(3, 777, 'xyz');
+ }
+
+ do_execsql_test 1.$tn.1 {
+ SELECT rowid, quote(x), quote(y) FROM t1
+ } {
+ 1 555 X'0000000041424320444546'
+ 2 666 X'41424300444546'
+ 3 777 'xyz'
+ }
+
+ do_execsql_test 1.$tn.2 {
+ DELETE FROM t1 WHERE rowid=2;
+ DELETE FROM t1 WHERE rowid=1;
+ }
+
+ do_execsql_test 1.$tn.3 {
+ PRAGMA integrity_check;
+ } {ok}
+}
+
+#--------------------------------------------------------------------------
+# Test that a blob may be stored and retrieved in an unindexed column of
+# a regular table with locale=1.
+#
+reset_db
+do_execsql_test 2.0 {
+ CREATE VIRTUAL TABLE t1 USING fts5(x, y UNINDEXED, locale=1);
+ INSERT INTO t1(rowid, x, y) VALUES(12, 'twelve', X'0000000041424320444546');
+}
+
+do_execsql_test 2.1 {
+ select rowid, x, quote(y) FROM t1
+} {
+ 12 twelve X'0000000041424320444546'
+}
+
+#--------------------------------------------------------------------------
+# Test that blobs may not be written to any type of table with locale=1
+# set. Except, they may be written to UNINDEXED columns.
+#
+reset_db
+do_execsql_test 3.0 {
+ CREATE TABLE t1(a, b);
+
+ CREATE VIRTUAL TABLE x1 USING fts5(a, b, locale=1);
+ CREATE VIRTUAL TABLE x2 USING fts5(a, b, locale=1, content=t2);
+ CREATE VIRTUAL TABLE x3 USING fts5(a, b, locale=1, content=);
+}
+
+do_catchsql_test 3.1 {
+ INSERT INTO x1(rowid, a, b) VALUES(113, 'hello world', X'123456');
+} {1 {datatype mismatch}}
+do_catchsql_test 3.2 {
+ INSERT INTO x2(rowid, a, b) VALUES(113, 'hello world', X'123456');
+} {1 {datatype mismatch}}
+do_catchsql_test 3.3 {
+ INSERT INTO x3(rowid, a, b) VALUES(113, 'hello world', X'123456');
+} {1 {datatype mismatch}}
+
+
+#--------------------------------------------------------------------------
+# Test that fts5_locale() values may not be written to any type of table
+# without locale=1 set. Even to an UNINDEXED column.
+#
+reset_db
+do_execsql_test 3.0 {
+ CREATE TABLE t1(a, b);
+
+ CREATE VIRTUAL TABLE x1 USING fts5(a, b);
+ CREATE VIRTUAL TABLE x2 USING fts5(a, b, content=t2);
+ CREATE VIRTUAL TABLE x3 USING fts5(a, b, content=);
+
+ CREATE VIRTUAL TABLE x4 USING fts5(a, b, c UNINDEXED);
+}
+
+do_catchsql_test 3.1 {
+ INSERT INTO x1(rowid, a, b)
+ VALUES(113, 'hello world', fts5_locale('en_AU', 'abc'));
+} {1 {fts5_locale() requires locale=1}}
+do_catchsql_test 3.2 {
+ INSERT INTO x2(rowid, a, b)
+ VALUES(113, 'hello world', fts5_locale('en_AU', 'abc'));
+} {1 {fts5_locale() requires locale=1}}
+do_catchsql_test 3.3 {
+ INSERT INTO x3(rowid, a, b)
+ VALUES(113, 'hello world', fts5_locale('en_AU', 'abc'));
+} {1 {fts5_locale() requires locale=1}}
+do_catchsql_test 3.4 {
+ INSERT INTO x4(rowid, a, b, c)
+ VALUES(113, 'hello world', 'yesno', fts5_locale('en_AU', 'abc'));
+} {1 {fts5_locale() requires locale=1}}
+
+
+#-------------------------------------------------------------------------
+#
+reset_db
+do_execsql_test 4.0 {
+ CREATE VIRTUAL TABLE x1 USING fts5(x);
+}
+
+foreach {tn sql} {
+ 1 { INSERT INTO x1(rowid, x) VALUES(4.5, 'abcd') }
+ 2 { INSERT INTO x1(rowid, x) VALUES('xyz', 'abcd') }
+ 3 { INSERT INTO x1(rowid, x) VALUES(X'001122', 'abcd') }
+} {
+ do_catchsql_test 4.1.$tn $sql {1 {datatype mismatch}}
+}
+
+
+finish_test
+
+
diff --git a/ext/fts5/test/fts5cat.test b/ext/fts5/test/fts5cat.test
index 483f64bfe..71e2abe3a 100644
--- a/ext/fts5/test/fts5cat.test
+++ b/ext/fts5/test/fts5cat.test
@@ -55,5 +55,22 @@ do_execsql_test 1.5 {
SELECT * FROM t4t
} {สนามกีฬา 1 1}
+#-------------------------------------------------------------------------
+reset_db
+do_execsql_test 2.0 "
+ CREATE VIRTUAL TABLE x1 USING fts5(c,
+ tokenize=\"unicode61 categories ' \t'\");
+"
+
+do_catchsql_test 2.1 "
+ CREATE VIRTUAL TABLE x2 USING fts5(c,
+ tokenize=\"unicode61 categories 'N*\t\tMYZ'\");
+" {1 {error in tokenizer constructor}}
+
+do_catchsql_test 2.2 "
+ CREATE VIRTUAL TABLE x2 USING fts5(c,
+ tokenize=\"unicode61 categories 'N*\t\tXYZ'\");
+" {1 {error in tokenizer constructor}}
+
finish_test
diff --git a/ext/fts5/test/fts5contentless.test b/ext/fts5/test/fts5contentless.test
index eb6b928ab..991e9888f 100644
--- a/ext/fts5/test/fts5contentless.test
+++ b/ext/fts5/test/fts5contentless.test
@@ -267,4 +267,24 @@ do_execsql_test 8.2 {
SELECT rowid FROM ft('four');
} {}
+#-------------------------------------------------------------------------
+reset_db
+do_execsql_test 9.0 {
+ CREATE VIRTUAL TABLE ft USING fts5(x, content='', contentless_delete=0);
+ INSERT INTO ft VALUES('hello world');
+ INSERT INTO ft VALUES('one two three');
+}
+
+do_catchsql_test 9.1 {
+ INSERT INTO ft(ft, rowid, x) VALUES('delete', 1, 'hello world');
+} {0 {}}
+
+do_catchsql_test 9.2 {
+ CREATE VIRTUAL TABLE ft2 USING fts5(x, content='', contentless_delete=2);
+} {1 {malformed contentless_delete=... directive}}
+
+do_catchsql_test 9.3 {
+ CREATE VIRTUAL TABLE ft2 USING fts5(x, content='', contentless_delete=11);
+} {1 {malformed contentless_delete=... directive}}
+
finish_test
diff --git a/ext/fts5/test/fts5corrupt.test b/ext/fts5/test/fts5corrupt.test
index ae07383b2..0abd8b86d 100644
--- a/ext/fts5/test/fts5corrupt.test
+++ b/ext/fts5/test/fts5corrupt.test
@@ -101,4 +101,25 @@ do_catchsql_test 3.1 {
SELECT * FROM t3 WHERE t3 MATCH 'o';
} {1 {fts5: missing row 3 from content table 'main'.'t3_content'}}
+#--------------------------------------------------------------------
+#
+reset_db
+do_execsql_test 4.0 {
+ CREATE VIRTUAL TABLE t2 USING fts5(x);
+ INSERT INTO t2 VALUES('one two three');
+ INSERT INTO t2 VALUES('four five six');
+ INSERT INTO t2 VALUES('seven eight nine');
+ INSERT INTO t2 VALUES('ten eleven twelve');
+}
+do_execsql_test 4.1 {
+ SELECT hex(block) FROM t2_data WHERE id=1;
+} {040C}
+do_execsql_test 4.2 {
+ UPDATE t2_data SET block = X'0402' WHERE id=1
+}
+breakpoint
+do_catchsql_test 4.3 {
+ DELETE FROM t2 WHERE rowid=3
+} {1 {database disk image is malformed}}
+
finish_test
diff --git a/ext/fts5/test/fts5corrupt3.test b/ext/fts5/test/fts5corrupt3.test
index cfe1438ed..c5faaa87b 100644
--- a/ext/fts5/test/fts5corrupt3.test
+++ b/ext/fts5/test/fts5corrupt3.test
@@ -15524,6 +15524,160 @@ do_catchsql_test 80.1 {
SELECT snippet(rowid, -1, '.', '..', '[', '(]'),snippet(rowid, -1, '.', '.', '', '(]'), highlight(t1, 29, 1 , '') FROM t1('g+ h') WHERE rank MATCH 'bm25(1.0, 10)' ORDER BY NOT (SELECT 1 FROM t1('g+ æ') WHERE rank MATCH 'bm25(1.0, 10)' ORDER BY rank);
} {1 {database disk image is malformed}}
+#-------------------------------------------------------------------------
+reset_db
+do_test 81.0 {
+ sqlite3 db {}
+ db deserialize [decode_hexdb {
+.open --hexdb
+| size 40960 pagesize 4096 filename crash-44e8035a976422.db
+| page 1 offset 0
+| 0: 53 51 4c 69 74 65 20 66 6f 72 6d 61 74 20 33 00 SQLite format 3.
+| 16: 10 00 01 01 00 40 20 20 00 00 00 00 00 00 00 0a .....@ ........
+| 32: 00 00 00 00 00 00 00 00 00 00 00 0d 00 00 00 04 ................
+| 48: 00 00 00 00 00 00 00 00 00 00 00 01 00 00 00 00 ................
+| 96: 00 00 00 00 0d 00 00 00 0d 0b 6e 00 0f a3 0f 4c ..........n....L
+| 112: 0e e1 0e 81 0e 24 0d cc 0d 72 0d 1b 0c b0 0c 50 .....$...r.....P
+| 128: 0b f8 0b b3 0b 6e 00 00 00 00 00 00 00 00 00 00 .....n..........
+| 2912: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 43 0d ..............C.
+| 2928: 06 17 11 11 08 75 74 61 62 6c 65 74 34 74 34 43 .....utablet4t4C
+| 2944: 52 45 41 54 45 20 56 49 52 54 55 41 4c 20 54 41 REATE VIRTUAL TA
+| 2960: 42 4c 45 20 74 34 20 55 53 49 4e 47 20 66 74 73 BLE t4 USING fts
+| 2976: 35 76 6f 63 61 62 28 27 74 32 27 2c 20 27 72 6f 5vocab('t2', 'ro
+| 2992: 77 27 29 43 0c 06 17 11 11 08 75 74 61 62 6c 65 w')C......utable
+| 3008: 74 33 74 33 43 52 45 41 54 45 20 56 49 52 54 55 t3t3CREATE VIRTU
+| 3024: 41 4c 20 54 41 42 4c 45 20 74 33 20 55 53 49 4e AL TABLE t3 USIN
+| 3040: 47 20 66 74 73 35 76 6f 63 61 62 28 27 74 31 27 G fts5vocab('t1'
+| 3056: 2c 20 27 72 6f 77 27 29 56 0b 06 17 1f 1f 01 7d , 'row')V.......
+| 3072: 74 61 62 6c 65 74 32 5f 63 6f 6e 66 69 67 74 32 tablet2_configt2
+| 3088: 5f 63 6f 6e 66 69 67 0a 43 52 45 41 54 45 20 54 _config.CREATE T
+| 3104: 41 42 4c 45 20 27 74 32 5f 63 6f 6e 66 69 67 27 ABLE 't2_config'
+| 3120: 28 6b 20 50 52 49 4d 41 52 59 20 4b 45 59 2c 20 (k PRIMARY KEY,
+| 3136: 76 29 20 57 49 54 48 4f 55 54 20 52 4f 57 49 44 v) WITHOUT ROWID
+| 3152: 5e 0a 07 17 21 21 01 81 07 74 61 62 6c 65 74 32 ^...!!...tablet2
+| 3168: 5f 63 6f 6e 74 65 6e 74 74 32 5f 63 6f 6e 74 65 _contentt2_conte
+| 3184: 6e 74 09 43 52 45 41 54 45 20 54 41 42 4c 45 20 nt.CREATE TABLE
+| 3200: 27 74 32 5f 63 6f 6e 74 65 6e 74 27 28 69 64 20 't2_content'(id
+| 3216: 49 4e 54 45 47 45 52 20 50 52 49 4d 41 52 59 20 INTEGER PRIMARY
+| 3232: 4b 45 59 2c 20 63 30 2c 20 63 31 2c 20 63 32 29 KEY, c0, c1, c2)
+| 3248: 69 09 07 17 19 19 01 81 2d 74 61 62 6c 65 74 32 i.......-tablet2
+| 3264: 5f 69 64 78 74 32 5f 69 64 78 08 43 52 45 41 54 _idxt2_idx.CREAT
+| 3280: 45 20 54 41 42 4c 45 20 27 74 32 5f 69 64 78 27 E TABLE 't2_idx'
+| 3296: 28 73 65 67 69 64 2c 20 74 65 72 6d 2c 20 70 67 (segid, term, pg
+| 3312: 6e 6f 2c 20 50 52 49 4d 41 52 59 20 4b 45 59 28 no, PRIMARY KEY(
+| 3328: 73 65 67 69 64 2c 20 74 65 72 6d 29 29 20 57 49 segid, term)) WI
+| 3344: 54 48 4f 55 54 20 52 4f 57 49 44 55 08 07 17 1b THOUT ROWIDU....
+| 3360: 1b 01 81 01 74 61 62 6c 65 74 32 5f 64 61 74 61 ....tablet2_data
+| 3376: 74 32 5f 64 61 74 61 07 43 52 45 41 54 45 20 54 t2_data.CREATE T
+| 3392: 41 42 4c 45 20 27 74 32 5f 64 61 74 61 27 28 69 ABLE 't2_data'(i
+| 3408: 64 20 49 4e 54 45 47 45 52 20 50 52 49 4d 41 52 d INTEGER PRIMAR
+| 3424: 59 20 4b 45 59 2c 20 62 6c 6f 63 6b 20 42 4c 4f Y KEY, block BLO
+| 3440: 42 29 58 07 07 17 11 11 08 81 1d 74 61 62 6c 65 B)X........table
+| 3456: 74 32 74 32 43 52 45 41 54 45 20 56 49 52 54 55 t2t2CREATE VIRTU
+| 3472: 41 4c 20 54 41 42 4c 45 20 74 32 20 55 53 49 4e AL TABLE t2 USIN
+| 3488: 47 20 66 74 73 35 28 27 61 27 2c 5b 62 5d 2c 22 G fts5('a',[b],.
+| 3504: 63 22 2c 64 65 74 61 69 6c 3d 6e 6f 6e 65 2c 63 c.,detail=none,c
+| 3520: 6f 6c 75 6d 6e 73 69 7a 65 3d 30 29 56 06 06 17 olumnsize=0)V...
+| 3536: 1f 1f 01 7d 74 61 62 6c 65 74 31 5f 63 6f 6e 66 ....tablet1_conf
+| 3552: 69 67 74 31 5f 63 6f 6e 66 69 67 06 43 52 45 41 igt1_config.CREA
+| 3568: 54 45 20 54 41 42 4c 45 20 27 74 31 5f 63 6f 6e TE TABLE 't1_con
+| 3584: 66 69 67 27 28 6b 20 50 52 49 4d 41 52 59 20 4b fig'(k PRIMARY K
+| 3600: 45 59 2c 20 76 29 20 57 49 54 48 4f 55 54 20 52 EY, v) WITHOUT R
+| 3616: 4f 57 49 44 5b 05 07 17 21 21 01 81 01 74 61 62 OWID[...!!...tab
+| 3632: 6c 65 74 31 5f 64 6f 63 73 69 7a 65 74 31 5f 64 let1_docsizet1_d
+| 3648: 6f 63 73 69 7a 65 05 43 52 45 41 54 45 20 54 41 ocsize.CREATE TA
+| 3664: 42 4c 45 20 27 74 31 5f 64 6f 63 73 69 7a 65 27 BLE 't1_docsize'
+| 3680: 28 69 64 20 49 4e 54 45 47 45 52 20 50 52 49 4d (id INTEGER PRIM
+| 3696: 41 52 59 20 4b 45 59 2c 20 73 7a 20 42 4c 4f 42 ARY KEY, sz BLOB
+| 3712: 29 5e 04 07 17 21 21 01 81 07 74 61 62 6c 65 74 )^...!!...tablet
+| 3728: 31 5f 63 6f 6e 74 65 6e 74 74 31 5f 63 6f 6e 74 1_contentt1_cont
+| 3744: 65 6e 74 04 43 52 45 41 54 45 20 54 41 42 4c 45 ent.CREATE TABLE
+| 3760: 20 27 74 31 5f 63 6f 6e 74 65 6e 74 27 28 69 64 't1_content'(id
+| 3776: 20 49 4e 54 45 47 45 52 20 50 52 49 4d 41 52 59 INTEGER PRIMARY
+| 3792: 20 4b 45 59 2c 20 63 30 2c 20 63 31 2c 20 63 32 KEY, c0, c1, c2
+| 3808: 29 69 03 07 17 19 19 01 81 2d 74 61 62 6c 65 74 )i.......-tablet
+| 3824: 31 5f 69 64 78 74 31 5f 69 64 78 03 43 52 45 41 1_idxt1_idx.CREA
+| 3840: 54 45 20 54 41 42 4c 45 20 27 74 31 5f 69 64 78 TE TABLE 't1_idx
+| 3856: 27 28 73 65 67 69 64 2c 20 74 65 72 6d 2c 20 70 '(segid, term, p
+| 3872: 67 6e 6f 2c 20 50 52 49 4d 41 52 59 20 4b 45 59 gno, PRIMARY KEY
+| 3888: 28 73 65 67 69 64 2c 20 74 65 72 6d 29 29 20 57 (segid, term)) W
+| 3904: 49 54 48 4f 55 54 20 52 4f 57 49 44 55 02 07 17 ITHOUT ROWIDU...
+| 3920: 1b 1b 01 81 01 74 61 62 6c 65 74 31 5f 64 61 74 .....tablet1_dat
+| 3936: 61 74 31 5f 64 61 74 61 02 43 52 45 41 54 45 20 at1_data.CREATE
+| 3952: 54 41 42 4c 45 20 27 74 31 5f 64 61 74 61 27 28 TABLE 't1_data'(
+| 3968: 69 64 20 49 4e 54 45 47 45 52 20 50 52 49 4d 41 id INTEGER PRIMA
+| 3984: 52 59 20 4b 45 59 2c 20 62 6c 6f 63 6b 20 42 4c RY KEY, block BL
+| 4000: 4f 42 29 5b 01 07 17 11 11 08 81 23 74 61 62 6c OB)[.......#tabl
+| 4016: 65 74 31 74 31 43 52 45 41 54 45 20 56 49 52 54 et1t1CREATE VIRT
+| 4032: 55 41 4c 20 54 41 42 4c 45 20 74 31 20 55 53 49 UAL TABLE t1 USI
+| 4048: 4e 47 20 66 74 73 35 28 61 2c 62 20 75 6e 69 6e NG fts5(a,b unin
+| 4064: 64 65 78 65 64 2c 63 2c 74 6f 6b 65 6e 69 7a 65 dexed,c,tokenize
+| 4080: 3d 22 70 6f 72 74 65 72 20 61 73 63 69 69 22 29 =.porter ascii.)
+| page 2 offset 4096
+| 0: 0d 0f 68 00 05 0f 13 00 0f e6 0f 13 0f a8 0f 7c ..h............|
+| 16: 0f 2a 00 00 00 00 00 00 00 00 00 00 00 00 00 00 .*..............
+| 3856: 00 00 00 15 0a 03 00 30 00 00 00 00 01 03 03 00 .......0........
+| 3872: 03 01 01 01 02 01 01 03 01 01 37 8c 80 80 80 80 ..........7.....
+| 3888: 01 03 00 74 00 00 00 2e 02 30 61 03 02 02 01 01 ...t.....0a.....
+| 3904: 62 03 02 03 01 01 63 03 02 04 01 01 67 03 06 01 b.....c.....g...
+| 3920: 02 02 01 01 68 03 06 01 02 03 01 01 69 03 06 01 ....h.......i...
+| 3936: 02 04 04 06 06 06 08 08 0f ef 00 14 2a 00 00 00 ............*...
+| 3952: 00 01 02 02 00 02 01 01 01 02 01 01 25 88 80 80 ............%...
+| 3968: 80 80 01 03 00 50 00 00 00 1f 02 30 67 02 08 02 .....P.....0g...
+| 3984: 01 02 02 01 01 68 02 08 03 8d 02 03 01 01 6a 42 .....h........jB
+| 4000: 08 04 01 02 04 04 09 09 37 84 80 80 80 80 01 03 ........7.......
+| 4016: 00 74 00 00 00 2e 02 30 61 01 12 02 01 01 62 01 .t.....0a.....b.
+| 4032: 02 03 01 01 63 01 02 04 01 01 67 01 06 01 02 02 ....c.....g.....
+| 4048: 01 01 68 01 05 01 02 03 01 01 69 01 06 01 02 04 ..h.......i.....
+| 4064: 04 06 06 06 08 08 07 01 03 00 14 03 09 00 09 00 ................
+| 4080: 00 00 11 24 00 00 00 00 01 01 01 00 01 01 01 01 ...$............
+| page 3 offset 8192
+| 0: 0a 00 00 00 03 0f ec 00 0f fa 0f f3 0f ec 00 00 ................
+| 4064: 00 00 00 00 00 00 00 00 00 00 00 00 06 04 01 0c ................
+| 4080: 01 03 02 06 04 01 0c 01 02 02 05 04 09 0c 01 02 ................
+| page 4 offset 12288
+| 0: 0d 00 00 00 03 0f be 00 0f ea 00 00 00 00 00 00 ................
+| 4016: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 14 03 ................
+| 4032: 05 00 17 17 17 61 20 62 20 63 67 20 68 20 69 67 .....a b cg h ig
+| 4048: 20 68 20 69 14 02 05 00 17 17 17 67 20 68 20 69 h i.......g h i
+| 4064: 61 20 62 20 63 67 20 68 20 69 14 01 05 00 17 17 a b cg h i......
+| 4080: 17 61 20 62 20 63 64 20 65 20 66 67 20 68 20 69 .a b cd e fg h i
+| page 5 offset 16384
+| 0: 0d 00 00 00 03 0f e8 00 0f f8 0f f0 0f e8 00 00 ................
+| 4064: 00 00 00 00 00 00 00 00 06 03 03 00 12 03 00 03 ................
+| 4080: 06 02 03 00 12 03 00 03 06 01 03 00 12 03 00 03 ................
+| page 6 offset 20480
+| 0: 0a 00 00 00 01 0f f4 00 0f f4 00 00 00 00 00 00 ................
+| 4080: 00 00 00 00 0b 03 1b 01 76 65 72 73 69 6f 6e 04 ........version.
+| page 7 offset 24576
+| 0: 0d 00 00 00 03 0f 9e 00 0f e6 0f ef 0f 9e 00 00 ................
+| 3984: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 41 84 ..............A.
+| 4000: 80 80 80 80 01 04 00 81 06 00 00 00 34 02 30 61 ............4.0a
+| 4016: 01 01 01 01 01 62 01 01 01 01 01 63 01 01 01 01 .....b.....c....
+| 4032: e6 64 01 01 01 65 01 01 01 66 01 01 01 67 01 01 .d...e...f...g..
+| 4048: 01 01 01 68 01 01 01 01 01 69 01 01 01 04 06 06 ...h.....i......
+| 4064: 06 04 04 04 06 06 07 01 03 00 14 03 09 09 09 0f ................
+| 4080: 0a 03 00 24 00 00 00 00 01 01 01 00 01 01 01 01 ...$............
+| page 8 offset 28672
+| 0: 0a 00 00 00 01 0f fa 00 0f fa 00 00 00 00 00 00 ................
+| 4080: 00 00 00 00 00 00 00 00 00 00 05 04 09 0c 01 02 ................
+| page 9 offset 32768
+| 0: 0d 00 00 00 03 0f be 00 0f ea 0f d4 0f be 00 00 ................
+| 4016: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 14 03 ................
+| 4032: 05 00 17 17 17 61 20 62 20 63 67 20 68 20 69 67 .....a b cg h ig
+| 4048: 20 68 21 69 14 02 05 00 17 17 17 67 20 68 20 69 h!i.......g h i
+| 4064: 61 20 62 20 63 67 20 68 20 69 14 01 05 00 17 17 a b cg h i......
+| 4080: 17 61 20 62 20 63 64 20 65 20 66 67 20 68 20 69 .a b cd e fg h i
+| page 10 offset 36864
+| 0: 0a 00 00 00 01 0f f4 00 0f f4 00 00 00 00 00 00 ................
+| 4080: 00 00 00 00 0b 03 1b 01 76 65 72 73 69 6f 6e 04 ........version.
+| end crash-44e8035a976422.db
+}]} {}
+
+do_catchsql_test 81.2 {
+ UPDATE t1 SET b=zeroblob(299);
+} {1 {database disk image is malformed}}
+
sqlite3_fts5_may_be_corrupt 0
finish_test
diff --git a/ext/fts5/test/fts5expr.test b/ext/fts5/test/fts5expr.test
index e3938beb0..49be61d9c 100644
--- a/ext/fts5/test/fts5expr.test
+++ b/ext/fts5/test/fts5expr.test
@@ -44,5 +44,9 @@ for {set ii 0} {$ii < 300} {incr ii} {
} $res
}
+do_execsql_test 1.2 {
+ SELECT rowid FROM x1 WHERE a MATCH '"..."'
+} {}
+
finish_test
diff --git a/ext/fts5/test/fts5faultI.test b/ext/fts5/test/fts5faultI.test
new file mode 100644
index 000000000..08a6bf056
--- /dev/null
+++ b/ext/fts5/test/fts5faultI.test
@@ -0,0 +1,237 @@
+# 2010 June 15
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#***********************************************************************
+#
+
+source [file join [file dirname [info script]] fts5_common.tcl]
+source $testdir/malloc_common.tcl
+set testprefix fts5faultI
+
+# If SQLITE_ENABLE_FTS5 is not defined, omit this file.
+ifcapable !fts5 {
+ finish_test
+ return
+}
+
+set ::testprefix fts5faultI
+
+do_execsql_test 1.0 {
+ PRAGMA encoding = utf16;
+ CREATE VIRTUAL TABLE t1 USING fts5(x, locale=1);
+ INSERT INTO t1 VALUES('origintext unicode61 ascii porter trigram');
+}
+
+faultsim_save_and_close
+faultsim_restore_and_reopen
+
+do_faultsim_test 1 -faults oom* -prep {
+} -body {
+ execsql {
+ SELECT rowid FROM t1(fts5_locale('en_US', 'origintext'));
+ }
+} -test {
+ faultsim_test_result {0 1}
+}
+
+do_faultsim_test 2 -faults oom* -prep {
+ faultsim_restore_and_reopen
+ execsql {
+ SELECT * FROM t1('ascii');
+ }
+} -body {
+ execsql {
+ UPDATE t1 SET rowid=rowid+1;
+ }
+} -test {
+ faultsim_test_result {0 {}}
+}
+
+fts5_aux_test_functions db
+do_faultsim_test 3 -faults oom* -prep {
+} -body {
+ execsql {
+ SELECT fts5_columnlocale(t1, 0) FROM t1('unicode*');
+ }
+} -test {
+ faultsim_test_result {0 {{}}} {1 SQLITE_NOMEM}
+}
+
+#-------------------------------------------------------------------------
+reset_db
+do_execsql_test 4.0 {
+ CREATE VIRTUAL TABLE w1 USING fts5(a);
+}
+faultsim_save_and_close
+
+do_faultsim_test 4 -faults oom* -prep {
+ faultsim_restore_and_reopen
+ execsql {
+ BEGIN;
+ INSERT INTO w1 VALUES('token token token');
+ }
+} -body {
+ execsql {
+ INSERT INTO w1(w1, rank) VALUES('rank', 'bm25()');
+ }
+} -test {
+ faultsim_test_result {0 {}}
+}
+
+do_faultsim_test 5 -faults oom* -prep {
+ faultsim_restore_and_reopen
+ execsql {
+ BEGIN;
+ INSERT INTO w1 VALUES('one');
+ SAVEPOINT one;
+ INSERT INTO w1 VALUES('two');
+ ROLLBACK TO one;
+ }
+
+} -body {
+ execsql {
+ INSERT INTO w1 VALUES('string');
+ }
+} -test {
+ faultsim_test_result {0 {}}
+}
+
+#-------------------------------------------------------------------------
+reset_db
+do_execsql_test 5.0 {
+ CREATE VIRTUAL TABLE w1 USING fts5(a);
+ INSERT INTO w1 VALUES('one two three');
+}
+fts5_aux_test_functions db
+
+do_faultsim_test 5 -faults oom* -prep {
+} -body {
+ execsql {
+ SELECT fts5_test_insttoken(w1, 0, 0) FROM w1('two');
+ }
+} -test {
+ faultsim_test_result {0 two} {1 SQLITE_NOMEM}
+}
+
+#-------------------------------------------------------------------------
+reset_db
+do_execsql_test 6.0 {
+ CREATE VIRTUAL TABLE w1 USING fts5(a);
+ INSERT INTO w1 VALUES('one two three');
+}
+fts5_aux_test_functions db
+faultsim_save_and_close
+
+do_faultsim_test 6 -faults oom* -prep {
+ faultsim_restore_and_reopen
+ db eval {
+ BEGIN;
+ INSERT INTO w1 VALUES('four five six');
+ SAVEPOINT abc;
+ INSERT INTO w1 VALUES('seven eight nine');
+ SAVEPOINT def;
+ INSERT INTO w1 VALUES('ten eleven twelve');
+ }
+} -body {
+ execsql {
+ RELEASE abc;
+ }
+} -test {
+ faultsim_test_result {0 {}}
+}
+
+#-------------------------------------------------------------------------
+reset_db
+do_execsql_test 7.0 {
+ CREATE VIRTUAL TABLE w1 USING fts5(a);
+ INSERT INTO w1 VALUES('one two three');
+ INSERT INTO w1 VALUES('three two one');
+ DELETE FROM w1_content WHERE rowid=1;
+}
+
+faultsim_save_and_close
+
+do_faultsim_test 7 -faults oom* -prep {
+ faultsim_restore_and_reopen
+ db eval { SELECT * FROM w1 }
+} -body {
+ execsql {
+ PRAGMA integrity_check;
+ }
+} -test {
+}
+
+#-------------------------------------------------------------------------
+reset_db
+fts5_tclnum_register db
+fts5_aux_test_functions db
+
+do_execsql_test 8.0 {
+ CREATE VIRTUAL TABLE ft USING fts5(
+ x, tokenize = "tclnum query", detail=columns
+ );
+ INSERT INTO ft VALUES('one two three i ii iii');
+ INSERT INTO ft VALUES('four five six iv v vi');
+ INSERT INTO ft VALUES('eight nine ten viii ix x');
+} {}
+
+do_faultsim_test 8.1 -faults oom* -prep {
+} -body {
+ execsql {
+ SELECT fts5_test_collist (ft) FROM ft('one two');
+ }
+} -test {
+ faultsim_test_result {0 {{0.0 1.0}}} {1 {SQL logic error}} {1 SQLITE_NOMEM}
+}
+
+do_faultsim_test 8.2 -faults oom* -prep {
+} -body {
+ execsql {
+ SELECT rowid FROM ft('one two') ORDER BY rank;
+ }
+} -test {
+ faultsim_test_result {0 1} {1 {SQL logic error}} {1 SQLITE_NOMEM}
+}
+
+#-------------------------------------------------------------------------
+reset_db
+
+do_execsql_test 9.0 {
+ CREATE VIRTUAL TABLE ft USING fts5(x);
+ INSERT INTO ft VALUES('one two three i ii iii');
+ INSERT INTO ft VALUES('four five six iv v vi');
+ INSERT INTO ft VALUES('eight nine ten viii ix x');
+} {}
+
+faultsim_save_and_close
+
+do_faultsim_test 9.1 -faults oom* -prep {
+ faultsim_restore_and_reopen
+} -body {
+ execsql {
+ UPDATE ft SET rowid=4 WHERE rowid=1
+ }
+} -test {
+ faultsim_test_result {0 {}}
+}
+
+do_faultsim_test 9.2 -faults oom* -prep {
+ faultsim_restore_and_reopen
+} -body {
+ execsql {
+ SELECT rowid FROM ft WHERE x MATCH 'one AND two AND three'
+ }
+} -test {
+ faultsim_test_result {0 1}
+}
+
+
+
+finish_test
+
diff --git a/ext/fts5/test/fts5locale.test b/ext/fts5/test/fts5locale.test
new file mode 100644
index 000000000..d64df1849
--- /dev/null
+++ b/ext/fts5/test/fts5locale.test
@@ -0,0 +1,576 @@
+# 2014 Dec 20
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#***********************************************************************
+#
+# Tests focusing on the built-in fts5 tokenizers.
+#
+
+source [file join [file dirname [info script]] fts5_common.tcl]
+set testprefix fts5locale
+
+# If SQLITE_ENABLE_FTS5 is not defined, omit this file.
+ifcapable !fts5 {
+ finish_test
+ return
+}
+
+proc transform_token {locale token} {
+ switch -- $locale {
+ reverse {
+ set ret ""
+ foreach c [split $token ""] {
+ set ret "$c$ret"
+ }
+ set token $ret
+ }
+
+ default {
+ # no-op
+ }
+ }
+
+ set token
+}
+
+proc tcl_create {args} { return "tcl_tokenize" }
+proc tcl_tokenize {tflags text} {
+ set iToken 1
+ set bSkip 0
+ if {[sqlite3_fts5_locale]=="second"} { set bSkip 1 }
+ foreach {w iStart iEnd} [fts5_tokenize_split $text] {
+ incr iToken
+ if {(($iToken) % ($bSkip + 1))} continue
+
+ set w [transform_token [sqlite3_fts5_locale] $w]
+ sqlite3_fts5_token $w $iStart $iEnd
+ }
+}
+
+#-------------------------------------------------------------------------
+# Check that queries can have a locale attached to them.
+#
+reset_db
+sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
+
+do_execsql_test 1.0 {
+ CREATE VIRTUAL TABLE t1 USING fts5(a, tokenize=tcl);
+ INSERT INTO t1 VALUES('abc');
+ INSERT INTO t1 VALUES('cba');
+} {}
+
+do_execsql_test 1.1 {
+ SELECT rowid, a FROM t1( fts5_locale('en_US', 'abc') );
+} {1 abc}
+
+do_execsql_test 1.2 {
+ SELECT rowid, a FROM t1( fts5_locale('reverse', 'abc') );
+} {2 cba}
+
+#-------------------------------------------------------------------------
+# Test that the locale= option exists and seems to accept values. And
+# that fts5_locale() values may only be inserted into an internal-content
+# table if the locale=1 option was specified.
+#
+reset_db
+sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
+
+do_execsql_test 2.1 {
+ CREATE VIRTUAL TABLE b1 USING fts5(x, y, locale=1, tokenize=tcl);
+ CREATE VIRTUAL TABLE b2 USING fts5(x, y, locale=0, tokenize=tcl);
+
+ CREATE VIRTUAL TABLE ttt USING fts5vocab('b1', instance);
+}
+
+do_catchsql_test 2.2.1 {
+ CREATE VIRTUAL TABLE b3 USING fts5(x, y, locale=2);
+} {1 {malformed locale=... directive}}
+do_catchsql_test 2.2.2 {
+ CREATE VIRTUAL TABLE b3 USING fts5(x, y, locale=111);
+} {1 {malformed locale=... directive}}
+
+do_catchsql_test 2.3 {
+ INSERT INTO b1(b1, rank) VALUES('locale', 0);
+} {1 {SQL logic error}}
+
+do_execsql_test 2.4 {
+ INSERT INTO b1 VALUES('abc', 'one two three');
+ INSERT INTO b1 VALUES('def', fts5_locale('reverse', 'four five six'));
+}
+
+do_execsql_test 2.5 {
+ INSERT INTO b2 VALUES('abc', 'one two three');
+}
+
+do_catchsql_test 2.6 {
+ INSERT INTO b2 VALUES('def', fts5_locale('reverse', 'four five six'));
+} {1 {fts5_locale() requires locale=1}}
+
+do_execsql_test 2.7 { SELECT rowid FROM b1('one') } {1}
+do_execsql_test 2.8 { SELECT rowid FROM b1('four') } {}
+do_execsql_test 2.9 { SELECT rowid FROM b1('ruof') } 2
+do_execsql_test 2.10 { SELECT rowid FROM b1(fts5_locale('reverse', 'five'))} 2
+
+do_execsql_test 2.11 {
+ SELECT x, quote(y) FROM b1
+} {
+ abc {'one two three'}
+ def {'four five six'}
+}
+
+do_execsql_test 2.12 { SELECT quote(y) FROM b1('ruof') } {
+ {'four five six'}
+}
+
+do_execsql_test 2.13 {
+ INSERT INTO b1(b1) VALUES('integrity-check');
+}
+do_execsql_test 2.14 {
+ INSERT INTO b1(b1) VALUES('rebuild');
+}
+do_execsql_test 2.15 {
+ INSERT INTO b1(b1) VALUES('integrity-check');
+}
+
+do_execsql_test 2.16 {
+ DELETE FROM b1 WHERE rowid=2
+}
+do_execsql_test 2.17 {
+ INSERT INTO b1(b1) VALUES('integrity-check');
+}
+
+do_execsql_test 2.18 {
+ INSERT INTO b1(rowid, x, y) VALUES(
+ test_setsubtype(45, 76), 'abc def', 'def abc'
+ );
+ INSERT INTO b1(b1) VALUES('integrity-check');
+}
+
+#-------------------------------------------------------------------------
+# Test the 'delete' command with contentless tables.
+#
+reset_db
+sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
+
+do_execsql_test 3.1 {
+ CREATE VIRTUAL TABLE c1 USING fts5(x, content=, tokenize=tcl, locale=1);
+ CREATE VIRTUAL TABLE c2 USING fts5vocab('c1', instance);
+
+ INSERT INTO c1 VALUES('hello world');
+ INSERT INTO c1 VALUES( fts5_locale('reverse', 'one two three') );
+}
+
+do_execsql_test 3.2 {
+ SELECT DISTINCT term FROM c2 ORDER BY 1
+} {
+ eerht eno hello owt world
+}
+
+do_execsql_test 3.3 {
+ INSERT INTO c1(c1, rowid, x)
+ VALUES('delete', 2, fts5_locale('reverse', 'one two three') );
+}
+
+do_execsql_test 3.4 {
+ SELECT DISTINCT term FROM c2 ORDER BY 1
+} {
+ hello world
+}
+
+#-------------------------------------------------------------------------
+# Test that an UPDATE that updates a subset of the columns does not
+# magically discard the locale from those columns not updated.
+#
+reset_db
+sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
+
+do_execsql_test 4.1 {
+ CREATE VIRTUAL TABLE d1 USING fts5(x, y, locale=1, tokenize=tcl);
+ CREATE VIRTUAL TABLE d2 USING fts5vocab('d1', instance);
+
+ INSERT INTO d1(rowid, x, y) VALUES(1, 'abc', 'def');
+ INSERT INTO d1(rowid, x, y) VALUES(2, 'ghi', fts5_locale('reverse', 'hello'));
+}
+
+do_execsql_test 4.2 {
+ SELECT DISTINCT term FROM d2 ORDER BY 1
+} {
+ abc def ghi olleh
+}
+
+do_execsql_test 4.3 {
+ UPDATE d1 SET x='jkl' WHERE rowid=2;
+}
+
+do_execsql_test 4.4 {
+ SELECT DISTINCT term FROM d2 ORDER BY 1
+} {
+ abc def jkl olleh
+}
+
+do_execsql_test 4.5 {
+ SELECT rowid, * FROM d1
+} {
+ 1 abc def
+ 2 jkl hello
+}
+
+do_execsql_test 4.6 {
+ UPDATE d1 SET rowid=4 WHERE rowid=2
+}
+
+do_execsql_test 4.7 {
+ SELECT rowid, * FROM d1
+} {
+ 1 abc def
+ 4 jkl hello
+}
+
+fts5_aux_test_functions db
+
+do_execsql_test 4.8.1 {
+ SELECT fts5_test_columntext(d1) FROM d1('jkl')
+} {{jkl hello}}
+do_execsql_test 4.8.2 {
+ SELECT fts5_test_columntext(d1) FROM d1(fts5_locale('reverse', 'hello'))
+} {{jkl hello}}
+
+do_execsql_test 4.9 {
+ SELECT fts5_test_columnlocale(d1) FROM d1(fts5_locale('reverse', 'hello'))
+} {{{} reverse}}
+
+do_execsql_test 4.10 {
+ SELECT fts5_test_columnlocale(d1) FROM d1
+} {
+ {{} {}}
+ {{} reverse}
+}
+
+#-------------------------------------------------------------------------
+# Test that if an fts5_locale() value is written to an UNINDEXED
+# column it is stored as text. This is so that blobs and other values
+# can also be stored as is.
+#
+reset_db
+sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
+
+do_execsql_test 5.1 {
+ CREATE VIRTUAL TABLE t1 USING fts5(
+ x, y UNINDEXED, locale=1, tokenize=tcl
+ );
+
+ INSERT INTO t1(rowid, x, y) VALUES(111,
+ fts5_locale('reverse', 'one two three'),
+ fts5_locale('reverse', 'four five six')
+ );
+}
+
+do_execsql_test 5.2 {
+ SELECT rowid, x, y FROM t1
+} {
+ 111 {one two three} {four five six}
+}
+
+do_execsql_test 5.3 {
+ SELECT typeof(c0), typeof(c1) FROM t1_content
+} {
+ blob text
+}
+
+#-------------------------------------------------------------------------
+
+foreach {tn opt} {
+ 1 {}
+ 2 {, columnsize=0}
+} {
+ reset_db
+ sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
+ do_execsql_test 6.$tn.1 "
+ CREATE VIRTUAL TABLE y1 USING fts5(t, locale=1, tokenize=tcl $opt);
+ "
+
+ do_execsql_test 6.$tn.2 {
+ INSERT INTO y1(rowid, t) VALUES
+ (1, fts5_locale('second', 'the city of London')),
+ (2, fts5_locale('second', 'shall have all the old')),
+ (3, fts5_locale('second', 'Liberties and Customs')),
+ (4, fts5_locale('second', 'which it hath been used to have'));
+ }
+
+ fts5_aux_test_functions db
+
+ do_execsql_test 5.$tn.3 {
+ SELECT fts5_test_columnsize(y1) FROM y1
+ } {
+ 2 3 2 4
+ }
+
+ do_execsql_test 5.$tn.4 {
+ SELECT rowid, fts5_test_columnsize(y1) FROM y1('shall');
+ } {
+ 2 3
+ }
+
+ do_execsql_test 5.$tn.5 {
+ SELECT rowid, fts5_test_columnsize(y1) FROM y1('shall');
+ } {
+ 2 3
+ }
+
+ do_execsql_test 5.$tn.6 {
+ SELECT rowid, fts5_test_columnsize(y1) FROM y1('have');
+ } {
+ 4 4
+ }
+
+ do_execsql_test 5.$tn.7 {
+ SELECT rowid, highlight(y1, 0, '[', ']') FROM y1('have');
+ } {
+ 4 {which it hath been used to [have]}
+ }
+
+ do_execsql_test 5.$tn.8 {
+ SELECT rowid,
+ highlight(y1, 0, '[', ']'),
+ snippet(y1, 0, '[', ']', '...', 10)
+ FROM y1('Liberties + Customs');
+ } {
+ 3 {[Liberties and Customs]}
+ {[Liberties and Customs]}
+ }
+}
+
+#-------------------------------------------------------------------------
+reset_db
+do_execsql_test 6.0 {
+ CREATE VIRTUAL TABLE x1 USING fts5(x);
+}
+do_catchsql_test 6.1 {
+ INSERT INTO x1(rowid, x) VALUES(123, fts5_locale('en_AU', 'hello world'));
+} {1 {fts5_locale() requires locale=1}}
+
+do_execsql_test 6.2 {
+ SELECT typeof( fts5_locale(NULL, 'xyz') ), typeof( fts5_locale('', 'abc') );
+} {text text}
+
+#--------------------------------------------------------------------------
+# Test that fts5_locale() works with external-content tables.
+#
+reset_db
+sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
+
+do_execsql_test 7.1 {
+ CREATE TABLE t1(ii INTEGER PRIMARY KEY, bb BLOB, tt TEXT, locale TEXT);
+ CREATE VIEW v1 AS
+ SELECT ii AS rowid, bb, fts5_locale(locale, tt) AS tt FROM t1;
+
+ CREATE VIRTUAL TABLE ft USING fts5(
+ bb, tt, locale=1, tokenize=tcl, content=v1
+ );
+
+ INSERT INTO t1 VALUES(1, NULL, 'one two three', NULL);
+ INSERT INTO t1 VALUES(2, '7800616263', 'four five six', 'reverse');
+ INSERT INTO t1 VALUES(3, '000000007800616263', 'seven eight nine', 'second');
+}
+
+do_execsql_test 7.2 {
+ INSERT INTO ft(ft) VALUES('rebuild');
+ INSERT INTO ft(ft) VALUES('integrity-check');
+}
+
+do_execsql_test 7.3 {
+ SELECT rowid, quote(bb), quote(tt) FROM ft
+} {
+ 1 NULL {'one two three'}
+ 2 '7800616263' {'four five six'}
+ 3 '000000007800616263' {'seven eight nine'}
+}
+
+do_execsql_test 7.4 { SELECT rowid FROM ft('six'); }
+do_execsql_test 7.5 { SELECT rowid FROM ft(fts5_locale('reverse','six')); } 2
+
+fts5_aux_test_functions db
+
+do_execsql_test 7.6 {
+ SELECT fts5_test_columnlocale(ft) FROM ft;
+} {
+ {{} {}} {{} reverse} {{} second}
+}
+
+#-------------------------------------------------------------------------
+# Test that the porter tokenizer works with locales.
+#
+reset_db
+sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
+
+do_execsql_test 8.1 {
+ CREATE VIRTUAL TABLE ft USING fts5(tt, locale=1, tokenize="porter tcl");
+ CREATE VIRTUAL TABLE vocab USING fts5vocab('ft', instance);
+
+ INSERT INTO ft(rowid, tt) VALUES
+ (111, fts5_locale('second', 'the porter tokenizer is a wrapper tokenizer')),
+ (222, fts5_locale('reverse', 'This value may also be set'));
+}
+
+do_execsql_test 8.1 {
+ SELECT DISTINCT term FROM vocab ORDER BY 1
+} {
+ a eb eulav osla sihT te the token yam
+}
+
+#-------------------------------------------------------------------------
+# Test that position-lists (used by xInst, xPhraseFirst etc.) work with
+# locales and modes other than detail=full.
+#
+foreach {tn detail} {
+ 1 detail=full
+ 2 detail=none
+ 3 detail=column
+} {
+ reset_db
+ sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
+ do_execsql_test 9.$tn.0 "
+ CREATE VIRTUAL TABLE ft USING fts5(tt, locale=1, tokenize=tcl, $detail);
+ "
+ do_execsql_test 9.$tn.1 {
+ CREATE VIRTUAL TABLE vocab USING fts5vocab('ft', instance);
+ INSERT INTO ft(rowid, tt) VALUES
+ (-1, fts5_locale('second', 'it is an ancient mariner'));
+ }
+
+ do_execsql_test 9.$tn.2 {
+ SELECT DISTINCT term FROM vocab
+ } {an it mariner}
+
+ do_execsql_test 9.$tn.3 {
+ SELECT highlight(ft, 0, '[', ']') FROM ft('mariner')
+ } {{it is an ancient [mariner]}}
+}
+
+#-------------------------------------------------------------------------
+# Check some corrupt fts5_locale() blob formats are detected.
+#
+foreach_detail_mode $::testprefix {
+
+ reset_db
+ sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
+ fts5_aux_test_functions db
+ do_execsql_test 10.1 {
+ CREATE TABLE x1(ii INTEGER PRIMARY KEY, x);
+ CREATE VIRTUAL TABLE ft USING fts5(x,
+ content=x1, content_rowid=ii, locale=1, detail=%DETAIL%, columnsize=0
+ );
+
+ CREATE VIRTUAL TABLE ft2 USING fts5(
+ x, locale=1, detail=%DETAIL%, columnsize=0
+ );
+ }
+
+ foreach {tn v} {
+ 1 X'001122'
+ 2 X'0011223344'
+ 3 X'00E0B2EB68656c6c6f'
+ 4 X'00E0B2EB0068656c6c6f'
+ } {
+ do_execsql_test 10.2.$tn.0 { INSERT INTO ft(ft) VALUES('delete-all') }
+ do_execsql_test 10.2.$tn.1 { DELETE FROM x1; }
+ do_execsql_test 10.2.$tn.2 " INSERT INTO x1 VALUES(NULL, $v) "
+
+ do_catchsql_test 10.2.$tn.3 {
+ INSERT INTO ft(ft) VALUES('rebuild');
+ } {1 {SQL logic error}}
+
+ do_catchsql_test 10.2.$tn.4 "
+ SELECT * FROM ft( test_setsubtype($v, 76) );
+ " {1 {SQL logic error}}
+
+ do_execsql_test 10.2.$tn.5 {
+ INSERT INTO ft(rowid, x) VALUES(1, 'hello world');
+ }
+
+ if {"%DETAIL%"!="full"} {
+ do_catchsql_test 10.2.$tn.6 {
+ SELECT fts5_test_poslist(ft) FROM ft('world');
+ } {1 SQLITE_ERROR}
+
+ do_catchsql_test 10.2.$tn.7 {
+ SELECT fts5_test_columnsize(ft) FROM ft('world');
+ } {1 SQLITE_ERROR}
+
+ do_catchsql_test 10.2.$tn.7 {
+ SELECT fts5_test_columnlocale(ft) FROM ft('world');
+ } {1 SQLITE_ERROR}
+ }
+
+ do_catchsql_test 10.2.$tn.8 {
+ SELECT * FROM ft('hello')
+ } {1 {SQL logic error}}
+
+ do_catchsql_test 10.2.$tn.9 {
+ PRAGMA integrity_check;
+ } {0 ok}
+
+ do_execsql_test 10.2.$tn.10 {
+ DELETE FROM x1;
+ INSERT INTO x1(ii, x) VALUES(1, 'hello world');
+ }
+
+ do_catchsql_test 10.2.$tn.11 "
+ INSERT INTO ft(ft, rowid, x) VALUES('delete', 1, test_setsubtype($v,76) )
+ " {1 {SQL logic error}}
+
+ do_catchsql_test 10.2.$tn.12 "
+ INSERT INTO ft(rowid, x) VALUES(2, test_setsubtype($v,76) )
+ " {1 {SQL logic error}}
+
+ do_execsql_test 10.2.$tn.13 {
+ INSERT INTO ft2(rowid, x) VALUES(1, 'hello world');
+ }
+ do_execsql_test 10.2.$tn.14 "UPDATE ft2_content SET c0=$v"
+
+ do_catchsql_test 10.2.$tn.15 {
+ PRAGMA integrity_check;
+ } {1 {SQL logic error}}
+
+ do_execsql_test 10.2.$tn.16 {
+ DELETE FROM ft2_content;
+ INSERT INTO ft2(ft2) VALUES('rebuild');
+ }
+ }
+
+}
+
+#-------------------------------------------------------------------------
+#
+reset_db
+sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
+fts5_aux_test_functions db
+do_execsql_test 11.0 {
+ CREATE VIRTUAL TABLE x1 USING fts5(abc, locale=1);
+ INSERT INTO x1(rowid, abc) VALUES(123, fts5_locale('en_US', 'one two three'));
+}
+
+do_catchsql_test 11.1 {
+ SELECT fts5_columnlocale(x1, -1) FROM x1('two');
+} {1 SQLITE_RANGE}
+do_catchsql_test 11.2 {
+ SELECT fts5_columnlocale(x1, 1) FROM x1('two');
+} {1 SQLITE_RANGE}
+
+#-------------------------------------------------------------------------
+#
+reset_db
+do_test 12.0 {
+ list [catch {
+ sqlite3_fts5_create_tokenizer -v2 -version 3 db tcl tcl_create
+ } msg] $msg
+} {1 {error in fts5_api.xCreateTokenizer_v2()}}
+
+finish_test
+
diff --git a/ext/fts5/test/fts5misc.test b/ext/fts5/test/fts5misc.test
index abd4fdaf8..534c42fff 100644
--- a/ext/fts5/test/fts5misc.test
+++ b/ext/fts5/test/fts5misc.test
@@ -21,8 +21,6 @@ ifcapable !fts5 {
return
}
-if 0 {
-
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(a);
}
@@ -37,21 +35,21 @@ do_catchsql_test 1.1.2 {
do_catchsql_test 1.2.1 {
SELECT highlight(t1, 4, '<b>', '</b>') FROM t1('*id');
-} {0 {{}}}
+} {1 {no such cursor: 4}}
do_catchsql_test 1.2.2 {
SELECT a FROM t1
WHERE rank = (SELECT highlight(t1, 4, '<b>', '</b>') FROM t1('*id'));
-} {0 {}}
+} {1 {no such cursor: 6}}
do_catchsql_test 1.3.1 {
SELECT highlight(t1, 4, '<b>', '</b>') FROM t1('*reads');
-} {1 {no such cursor: 2}}
+} {1 {no such cursor: 1}}
do_catchsql_test 1.3.2 {
SELECT a FROM t1
WHERE rank = (SELECT highlight(t1, 4, '<b>', '</b>') FROM t1('*reads'));
-} {1 {no such cursor: 2}}
+} {1 {no such cursor: 1}}
db close
sqlite3 db test.db
@@ -61,6 +59,11 @@ do_catchsql_test 1.3.3 {
WHERE rank = (SELECT highlight(t1, 4, '<b>', '</b>') FROM t1('*reads'));
} {1 {no such cursor: 1}}
+fts5_aux_test_functions db
+do_catchsql_test 1.3.4 {
+ SELECT fts5_columntext(t1) FROM t1('*reads');
+} {1 {no such cursor: 1}}
+
#-------------------------------------------------------------------------
reset_db
do_execsql_test 2.0 {
@@ -569,24 +572,98 @@ do_execsql_test 20.5 {
} {3 1}
#-------------------------------------------------------------------------
-}
reset_db
do_execsql_test 21.0 {
+ CREATE TABLE t1(ii INTEGER, x TEXT, y TEXT);
+ CREATE VIRTUAL TABLE xyz USING fts5(content_rowid=ii, content=t1, x, y);
+ INSERT INTO t1 VALUES(1, 'one', 'i');
+ INSERT INTO t1 VALUES(2, 'two', 'ii');
+ INSERT INTO t1 VALUES(3, 'tree', 'iii');
+ INSERT INTO xyz(xyz) VALUES('rebuild');
+}
+
+do_execsql_test 21.1 {
+ UPDATE xyz SET y='TWO' WHERE rowid=2;
+ UPDATE t1 SET y='TWO' WHERE ii=2;
+}
+
+do_execsql_test 21.2 {
+ PRAGMA integrity_check
+} {ok}
+
+breakpoint
+sqlite3_db_config db DEFENSIVE 1
+do_execsql_test 21.3 {
+ CREATE TABLE xyz_notashadow(x, y);
+ DROP TABLE xyz_notashadow;
+}
+sqlite3_db_config db DEFENSIVE 0
+
+#-------------------------------------------------------------------------
+reset_db
+do_execsql_test 22.0 {
+ SELECT fts5(NULL);
+} {{}}
+do_execsql_test 22.1 {
+ SELECT count(*) FROM (
+ SELECT fts5_source_id()
+ )
+} {1}
+execsql_pp {
+ SELECT fts5_source_id()
+}
+
+#-------------------------------------------------------------------------
+reset_db
+do_execsql_test 23.0 {
+ CREATE VIRTUAL TABLE x1 USING fts5(x);
+ INSERT INTO x1 VALUES('one + two + three');
+ INSERT INTO x1 VALUES('one + xyz + three');
+ INSERT INTO x1 VALUES('xyz + two + xyz');
+}
+do_execsql_test 23.1 {
+ SELECT rowid FROM x1('one + two + three');
+} {1}
+
+do_execsql_test 23.2 {
+ SELECT rowid FROM x1('^".." AND one');
+} {}
+
+do_execsql_test 23.3 {
+ SELECT rowid FROM x1('abc NEAR ".." NEAR def');
+} {}
+
+#-------------------------------------------------------------------------
+reset_db
+do_execsql_test 24.0 {
CREATE VIRTUAL TABLE t1 USING fts5(a, detail='none');
INSERT INTO t1(a) VALUES('a');
}
-do_execsql_test 21.2 {
+do_execsql_test 24.2 {
SELECT rank FROM ( SELECT rank FROM t1('a NOT "" NOT def') ) ORDER BY 1;
} {-1e-06}
-do_execsql_test 21.3 {
+do_execsql_test 24.3 {
SELECT rank FROM ( SELECT rank FROM t1('a NOT � NOT def') ) ORDER BY 1;
} {-1e-06}
-do_execsql_test 21.4 {
+do_execsql_test 24.4 {
SELECT rank FROM ( SELECT rank FROM t1('a NOT "" NOT def') );
} {-1e-06}
+#-------------------------------------------------------------------------
+reset_db
+fts5_aux_test_functions db
+
+do_execsql_test 25.0 {
+ CREATE VIRTUAL TABLE t1 USING fts5(a, detail='none', content='');
+ INSERT INTO t1(a) VALUES('a b c');
+}
+
+do_execsql_test 25.0 {
+ SELECT fts5_test_poslist(t1) FROM t1('b') ORDER BY rank;
+} {{}}
+
finish_test
diff --git a/ext/fts5/test/fts5origintext.test b/ext/fts5/test/fts5origintext.test
index cc9d99e2d..8e975fa17 100644
--- a/ext/fts5/test/fts5origintext.test
+++ b/ext/fts5/test/fts5origintext.test
@@ -261,8 +261,8 @@ do_execsql_test 5.3 {
reset_db
sqlite3_fts5_register_origintext db
do_execsql_test 6.0 {
- CREATE VIRTUAL TABLE ft USING fts5(
- x, y, tokenize='origintext unicode61', detail=%DETAIL%
+ CREATE VIRTUAL TABLE ft USING fts5(
+ x, y, tokenize='origintext unicode61', detail=%DETAIL%, tokendata=0
);
INSERT INTO ft VALUES('One Two', 'Three two');
@@ -291,6 +291,22 @@ do_execsql_test 6.3 {
SELECT rowid, tokens(ft) FROM ft('Three*');
} {1 {{}} 2 {{}}}
+fts5_aux_test_functions db
+do_catchsql_test 6.4 {
+ SELECT fts5_test_insttoken(ft, -1, 0) FROM ft('one');
+} {1 SQLITE_RANGE}
+
+do_catchsql_test 6.5 {
+ SELECT fts5_test_insttoken(ft, 1, 0) FROM ft('one');
+} {1 SQLITE_RANGE}
+
+do_catchsql_test 6.6 {
+ CREATE VIRTUAL TABLE ft2 USING fts5(x, tokendata=2);
+} {1 {malformed tokendata=... directive}}
+do_catchsql_test 6.7 {
+ CREATE VIRTUAL TABLE ft2 USING fts5(x, content='', tokendata=11);
+} {1 {malformed tokendata=... directive}}
+
}
finish_test
diff --git a/ext/fts5/test/fts5secure8.test b/ext/fts5/test/fts5secure8.test
index 0216bb6ea..8b65b7c59 100644
--- a/ext/fts5/test/fts5secure8.test
+++ b/ext/fts5/test/fts5secure8.test
@@ -58,6 +58,10 @@ do_execsql_test 2.1 {
pragma quick_check;
} {ok}
+do_catchsql_test 2.2 {
+ INSERT INTO xyz(xyz, rank) VALUES('secure-delete', 'hello world');
+} {1 {SQL logic error}}
+
diff --git a/ext/fts5/test/fts5simple.test b/ext/fts5/test/fts5simple.test
index 638409506..60ccb5a9c 100644
--- a/ext/fts5/test/fts5simple.test
+++ b/ext/fts5/test/fts5simple.test
@@ -480,4 +480,33 @@ do_execsql_test 22.0 {
do_catchsql_test 22.1 {SELECT * FROM x1('')} {1 {fts5: syntax error near ""}}
do_catchsql_test 22.2 {SELECT * FROM x1(NULL)} {1 {fts5: syntax error near ""}}
+#-------------------------------------------------------------------------
+reset_db
+do_execsql_test 23.0 {
+ CREATE VIRTUAL TABLE x1 USING fts5(x);
+ SELECT count(*) FROM x1_data;
+} {2}
+
+do_execsql_test 23.1 {
+ BEGIN;
+ INSERT INTO x1 VALUES('a b c d');
+ INSERT INTO x1 VALUES('a b c d');
+ INSERT INTO x1 VALUES('a b c d');
+}
+
+do_execsql_test 23.2 {
+ SELECT count(*) FROM x1_data;
+} {2}
+
+do_execsql_test 23.3 {
+ INSERT INTO x1(x1) VALUES('flush');
+ SELECT count(*) FROM x1_data;
+} {3}
+
+do_execsql_test 23.4 {
+ ROLLBACK;
+ SELECT count(*) FROM x1_data;
+} {2}
+
+
finish_test
diff --git a/ext/fts5/test/fts5tokenizer3.test b/ext/fts5/test/fts5tokenizer3.test
new file mode 100644
index 000000000..5cdab743c
--- /dev/null
+++ b/ext/fts5/test/fts5tokenizer3.test
@@ -0,0 +1,77 @@
+# 2024 Aug 10
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#***********************************************************************
+#
+# Tests focusing on the built-in fts5 tokenizers.
+#
+
+source [file join [file dirname [info script]] fts5_common.tcl]
+set testprefix fts5tokenizer3
+
+# If SQLITE_ENABLE_FTS5 is not defined, omit this file.
+ifcapable !fts5 {
+ finish_test
+ return
+}
+
+
+proc get_sod {args} { return "split_on_dot" }
+proc get_lowercase {args} { return "lowercase" }
+
+proc lowercase {flags txt} {
+ set n [string length $txt]
+ sqlite3_fts5_token [string tolower $txt] 0 $n
+ return 0
+}
+
+proc split_on_dot {flags txt} {
+ set iOff 0
+ foreach t [split $txt "."] {
+ set n [string length $txt]
+ sqlite3_fts5_token $t $iOff [expr $iOff+$n]
+ incr iOff [expr {$n+1}]
+ }
+ return ""
+}
+
+foreach {tn script} {
+ 1 {
+ sqlite3_fts5_create_tokenizer db lowercase get_lowercase
+ sqlite3_fts5_create_tokenizer -parent lowercase db split_on_dot get_sod
+ }
+ 2 {
+ sqlite3_fts5_create_tokenizer -v2 db lowercase get_lowercase
+ sqlite3_fts5_create_tokenizer -parent lowercase db split_on_dot get_sod
+ }
+ 3 {
+ sqlite3_fts5_create_tokenizer db lowercase get_lowercase
+ sqlite3_fts5_create_tokenizer -v2 -parent lowercase db split_on_dot get_sod
+ }
+ 4 {
+ sqlite3_fts5_create_tokenizer -v2 db lowercase get_lowercase
+ sqlite3_fts5_create_tokenizer -v2 -parent lowercase db split_on_dot get_sod
+ }
+} {
+ reset_db
+ eval $script
+
+ do_execsql_test 1.$tn.0 {
+ CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize=split_on_dot);
+ CREATE VIRTUAL TABLE t1vocab USING fts5vocab(t1, instance);
+ INSERT INTO t1 VALUES('ABC.Def.ghi');
+ }
+
+ do_execsql_test 1.$tn.1 {
+ SELECT term FROM t1vocab ORDER BY 1
+ } {abc def ghi}
+}
+
+
+finish_test
diff --git a/ext/fts5/test/fts5trigram.test b/ext/fts5/test/fts5trigram.test
index 752686620..3742c647f 100644
--- a/ext/fts5/test/fts5trigram.test
+++ b/ext/fts5/test/fts5trigram.test
@@ -56,6 +56,7 @@ foreach {tn like res} {
7 {ABCDEFG%} 1
8 {%รุงเ%} 2
9 {%งเ%} 2
+ 10 {%"งเ"%} {}
} {
do_execsql_test 1.3.$tn {
SELECT rowid FROM t1 WHERE y LIKE $like
@@ -200,6 +201,12 @@ do_eqp_test 6.3 {
do_eqp_test 6.4 {
SELECT * FROM ci1 WHERE x GLOB ?
} {VIRTUAL TABLE INDEX 0:G0}
+do_eqp_test 6.5 {
+ SELECT * FROM ci1 WHERE x < ?
+} {{SCAN ci1 VIRTUAL TABLE INDEX 0:}}
+do_eqp_test 6.6 {
+ SELECT * FROM ci0 WHERE x < ?
+} {{SCAN ci0 VIRTUAL TABLE INDEX 0:}}
reset_db
do_execsql_test 7.0 {
@@ -256,4 +263,85 @@ do_execsql_test 8.3 {
{[abcde]}
}
+#-------------------------------------------------------------------------
+reset_db
+do_execsql_test 9.0 {
+ CREATE VIRTUAL TABLE t1 USING fts5(
+ a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12,
+ tokenize=trigram
+ );
+
+ INSERT INTO t1(rowid, a12) VALUES(111, 'thats a tricky case though');
+ INSERT INTO t1(rowid, a12) VALUES(222, 'the query planner cannot do');
+}
+
+do_execsql_test 9.1 {
+ SELECT rowid FROM t1 WHERE a12 LIKE '%tricky%'
+} {111}
+
+do_execsql_test 9.2 {
+ SELECT rowid FROM t1 WHERE a12 LIKE '%tricky%' AND a12 LIKE '%case%'
+} {111}
+
+do_execsql_test 9.3 {
+ SELECT rowid FROM t1 WHERE a12 LIKE NULL
+} {}
+
+#-------------------------------------------------------------------------
+reset_db
+do_execsql_test 10.0 {
+ CREATE VIRTUAL TABLE t1 USING fts5(a, tokenize=trigram);
+}
+
+do_test 10.1 {
+ foreach {val} {
+ "abc \UFFjkl\UFF"
+ "abc \UFFFjkl\UFFF"
+ "abc \UFFFFjkl\UFFFF"
+ "abc \UFFFFFjkl\UFFFFF"
+ "\UFFjkl\UFF abc"
+ "\UFFFjkl\UFFF abc"
+ "\UFFFFjkl\UFFFF abc"
+ "\UFFFFFjkl\UFFFFF abc"
+ "\U10001jkl\U10001 abc"
+ } {
+ execsql { INSERT INTO t1 VALUES( $val ) }
+ }
+} {}
+
+do_test 10.2 {
+ foreach {val} {
+ X'E18000626320646566'
+ X'61EDA0806320646566'
+ X'61EDA0806320646566'
+ X'61EFBFBE6320646566'
+ X'76686920E18000626320646566'
+ X'7668692061EDA0806320646566'
+ X'7668692061EDA0806320646566'
+ X'7668692061EFBFBE6320646566'
+ } {
+ execsql " INSERT INTO t1 VALUES( $val ) "
+ }
+} {}
+
+do_test 10.3 {
+ set a [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0x62}]
+ set b [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0x62}]
+ set c [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
+ set d [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
+ execsql {
+ INSERT INTO t1 VALUES($a);
+ INSERT INTO t1 VALUES($b);
+ INSERT INTO t1 VALUES($c);
+ INSERT INTO t1 VALUES($d);
+
+ INSERT INTO t1 VALUES('abcd' || $a);
+ INSERT INTO t1 VALUES('abcd' || $b);
+ INSERT INTO t1 VALUES('abcd' || $c);
+ INSERT INTO t1 VALUES('abcd' || $d);
+ }
+} {}
+
+
+
finish_test
diff --git a/ext/fts5/test/fts5unicode2.test b/ext/fts5/test/fts5unicode2.test
index 3fc1f673a..7a49a1d83 100644
--- a/ext/fts5/test/fts5unicode2.test
+++ b/ext/fts5/test/fts5unicode2.test
@@ -470,4 +470,24 @@ do_execsql_test 8.2.3 {
SELECT rowid FROM t4 WHERE t4 MATCH 'a' ORDER BY rowid ASC;
} {2 4}
+#-------------------------------------------------------------------------
+
+foreach {tn val bErr} {
+ 1 0 0
+ 2 1 0
+ 3 2 0
+ 4 3 1
+ 5 11 1
+} {
+ reset_db
+ set aRes(0) {0 {}}
+ set aRes(1) {1 {error in tokenizer constructor}}
+ set res $aRes($bErr)
+ do_catchsql_test 9.1.$tn "
+ CREATE VIRTUAL TABLE bl USING fts5(
+ s, tokenize='trigram remove_diacritics $val'
+ );
+ " $res
+}
+
finish_test
diff --git a/manifest b/manifest
index 7ee60fbf4..de163306d 100644
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Fix\sa\sname\sresolution\sissue\swith\sCTEs.
-D 2024-08-20T22:44:40.653
+C Refactor\sthe\sSrcItem\sobject\sto\smove\sfields\sassociated\swith\ssubqueries\sout\ninto\sa\sseparate\sobject\snamed\sSubquery.\s\sThis\sreduces\sthe\ssize\sof\sthe\sSrcItem\nobject\sby\sabout\s1/3rd\sand\sprovides\simproved\sperformance.
+D 2024-08-20T23:11:28.443
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
@@ -89,29 +89,29 @@ F ext/fts3/tool/fts3cov.sh c331d006359456cf6f8f953e37f2b9c7d568f3863f00bb5f7eb87
F ext/fts3/tool/fts3view.c 413c346399159df81f86c4928b7c4a455caab73bfbc8cd68f950f632e5751674
F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c
F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7
-F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6dbd6348ef0cfc324a7
+F ext/fts3/unicode/mkunicode.tcl 63db9624ccf70d4887836c320eda93ab552f21008f3be7ede551eac3ead62baa
F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb
-F ext/fts5/extract_api_docs.tcl bc3a0ca78be7d3df08e7602c00ca48021ebae40682d75eb001bfdf6e54ffb44e
-F ext/fts5/fts5.h 6b49ce6eb2e395e7fd84557b21d32f5de8041f2fada4c617e481e99427e24b6e
-F ext/fts5/fts5Int.h 41fb3a2dd40e818cc96c6f4176dbdf2aaa8f57043cfc9a8f2676e7e6a72ad764
-F ext/fts5/fts5_aux.c 4584e88878e54828bf7d4d0d83deedd232ec60628b7731be02bad6adb62304b1
+F ext/fts5/extract_api_docs.tcl 1db7f85f4d84b7b6f33336155d5053fafc3c8debd074422d8003c8f7fa4d0fdb
+F ext/fts5/fts5.h c65fc7799a4cd6774628da4fa9408955623e504d7369ab5b89c4413fdfe11eb5
+F ext/fts5/fts5Int.h 26a71a09cefa4ef6b4516b204ed48da3e1380970a19b3482eea7c5d805655360
+F ext/fts5/fts5_aux.c 12cd2512f869217c38b70c31de5b5f741812734fafa80f55b32ea9bbd96e2152
F ext/fts5/fts5_buffer.c 0eec58bff585f1a44ea9147eae5da2447292080ea435957f7488c70673cb6f09
-F ext/fts5/fts5_config.c 68cb87a49215f8e7028000b681df4057c430a4a6afbd676463886da94c9e1c37
-F ext/fts5/fts5_expr.c 4b7734db98393d6f7fbc5c9c71ebcabe70110f7df08f6b136d096a1eaee0f56a
+F ext/fts5/fts5_config.c 353d2a0d12678cae6ab5b9ce54aed8dac0825667b69248b5a4ed81cbefc109ea
+F ext/fts5/fts5_expr.c 9a56f53700d1860f0ee2f373c2b9074eaf2a7aa0637d0e27a6476de26a3fee33
F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1
F ext/fts5/fts5_index.c eb9a0dda3bc6ef969a6be8d2746af56856e67251810ddba08622b45be8477abe
-F ext/fts5/fts5_main.c 6ec7a7d005c632d86e510ddfaca56b197a5b20b61848415764b91bd27d1e4f84
-F ext/fts5/fts5_storage.c 1d7e08d4331da2f3f7e78e70eef2ed6a013d91ba16175c651adbc5ad672235aa
-F ext/fts5/fts5_tcl.c 5ca3e3e35010d326f5b821a563e4fcde3913e052935f5c2c72c264122a26b48f
+F ext/fts5/fts5_main.c 5ea7ab0c9967594e73b7dd0ad737595922a14f175aa4b486dc2992a3e3138b68
+F ext/fts5/fts5_storage.c 9a9b880be12901f1962ae2a5a7e1b74348b3099a1e728764e419f75d98e3e612
+F ext/fts5/fts5_tcl.c 1dcf08028141c40a32634bdcf2d5601622ce4edc48f82ac4ce0cbe0a92a6961d
F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee
F ext/fts5/fts5_test_tok.c 3cb0a9b508b30d17ef025ccddd26ae3dc8ddffbe76c057616e59a9aa85d36f3b
-F ext/fts5/fts5_tokenize.c 63ebe9057ed3f4dfc49944bc4aee3d3b745cc2faff73bc152ed3554ed3bf9cf4
-F ext/fts5/fts5_unicode2.c eca63dbc797f8ff0572e97caf4631389c0ab900d6364861b915bdd4735973f00
+F ext/fts5/fts5_tokenize.c ae9c4fa93174ef06ffc138bd4280a1c37f7e13624d3d2706aad4b80573f23c41
+F ext/fts5/fts5_unicode2.c 6f9b0fb79a8facaed76628ffd4eb9c16d7f2b84b52872784f617cf3422a9b043
F ext/fts5/fts5_varint.c e64d2113f6e1bfee0032972cffc1207b77af63319746951bf1d09885d1dadf80
F ext/fts5/fts5_vocab.c e4830b00809e5da53bc10f93adc59e321407b0f801c7f4167c0e47f5552267e0
F ext/fts5/fts5parse.y eb526940f892ade5693f22ffd6c4f2702543a9059942772526eac1fde256bb05
F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba
-F ext/fts5/test/fts5_common.tcl bc33c6cc65e5d390f28a68aeeb3a526dadd2c3a947d2466ee1986c1a4002df56
+F ext/fts5/test/fts5_common.tcl c5aa7cf7148b6dcffb5b61520ae18212baf169936af734ab265143f59db328fe
F ext/fts5/test/fts5aa.test 015c81b84d53bfcedd77d624202c8b02e9f0cbbb4b51688e3a9c9f90bccbb4ac
F ext/fts5/test/fts5ab.test 4bdb619fee409e11417e8827e320b857e42e926a01a0408fc9f143ec870a6ced
F ext/fts5/test/fts5ac.test 4a73626de86f3d17c95738034880c4f0de8d54741fb943d819b528373657e59b
@@ -119,11 +119,11 @@ F ext/fts5/test/fts5ad.test 058e616612964e61d19f70295f0e6eaedceb4b29b1fbf4f85961
F ext/fts5/test/fts5ae.test 3d49edbd50bb0684199a2e7568aeb30d1d29718f5c0f61751983740fa836d15f
F ext/fts5/test/fts5af.test ae81f08b8da4c5f9b3ec1ef538a4ab6b7c278e92fa9058d6dc5d842c5d9771b9
F ext/fts5/test/fts5ag.test 6667807b5d3fbf460892e756763fbe3d87a2fffe345a06514ba010ca6f6641f7
-F ext/fts5/test/fts5ah.test ac327281c8910cf9b85738a2655003fd0c6a8a76189ef34f3d086b8f9e54263b
+F ext/fts5/test/fts5ah.test e1f01314b35745a30e1b494b46045b82005d71cae74f1ebd9f1338566b77f9fc
F ext/fts5/test/fts5ai.test cbe26d78030998f535bc103f37915350b137a822c71a9db439a077d7666a3539
F ext/fts5/test/fts5aj.test 53c8508dab4acca3e691a4c51eca4b3b018319ab8635e540103d5bbdc91543c9
F ext/fts5/test/fts5ak.test 25e2f8afdcff30d98ca9dee8c5cacca2f26db17501c9401f16d99ee036f70e8d
-F ext/fts5/test/fts5al.test 842c50fd9b287e3fa988dfcab436b27c432866f7406a56aaf3c67f316952cc02
+F ext/fts5/test/fts5al.test f0e655606771b2b5dbaf70e7f0044d560257cf3531d5eea40df58d0d7add8c39
F ext/fts5/test/fts5alter.test ebbee06419c2d3cee5ef7ebb5ba6a9996f1aa374035361c0acd37368cc5f64f3
F ext/fts5/test/fts5auto.test 2278de323172ced485d2844cb1357d00036ac1665f27e70fa1a48ce57bf31c2c
F ext/fts5/test/fts5aux.test 27210687338133b1e9bc0dd669322fca59fd432439f40b126895e2d7c2f899d6
@@ -132,7 +132,8 @@ F ext/fts5/test/fts5auxdata.test 372549088ff792655f73e62b9dfaf4863ce74f5e604c06c
F ext/fts5/test/fts5bigid.test 2860854c2561a57594192b00c33a29f91cb85e25f3d6c03b5c2b8f62708f39dd
F ext/fts5/test/fts5bigpl.test 8f09858aab866c33593560e6480b2b6975ae7ff29ca32ad7b77e2da61402f8ef
F ext/fts5/test/fts5bigtok.test 541119e616c637caea925a8c028c37c2c29e94383e00aa2f9198d530724b6e36
-F ext/fts5/test/fts5cat.test daba0b80659460b0cb60bd1f40b402478a761fe7ea414c3c94c2be25568cc33a
+F ext/fts5/test/fts5blob.test caa33369e93e99ff494cd1103506ae34c5afbc0bcc369ed5e58e135144e33689
+F ext/fts5/test/fts5cat.test bf67dd335f964482ee658287521b81e2b88697b45eb7f73933e15f198ed447cb
F ext/fts5/test/fts5circref.test f880dfd0d99f6fb73b88ccacb0927d18e833672fd906cc47d6b4e529419eaa62
F ext/fts5/test/fts5colset.test 544f4998cdbfe06a3123887fc0221612e8aa8192cdaff152872f1aadb10e6897
F ext/fts5/test/fts5columnsize.test 0af91d63985afdf663455d4b572b935238380140d74079eac362760866d3297b
@@ -140,14 +141,14 @@ F ext/fts5/test/fts5config.test 017daf10d2642496e97402baa0134de8b5b46b9c37e53c22
F ext/fts5/test/fts5conflict.test bf6030a77dbb1bedfcc42e589ed7980846c995765d77460551e448b56d741244
F ext/fts5/test/fts5connect.test 08030168fc96fc278fa81f28654fb7e90566f33aff269c073e19b3ae9126b2f4
F ext/fts5/test/fts5content.test d5c0c2142e64cb305f0968de70c01f8e59dbc3ecc56520c22e739e5dd99ea3bb
-F ext/fts5/test/fts5contentless.test b107465f8cd27dde6313b9c60b61d7158a7753b9c663c5c553695f826bb3c0a5
+F ext/fts5/test/fts5contentless.test 606f063b29ba0f46d4b79aa36cdd1ef4dab5de53eae8c881d731af75a4894aca
F ext/fts5/test/fts5contentless2.test 70ffe6c611d8f278240da56734df8a77948f04e2739b358439e9bdcf56ced35f
F ext/fts5/test/fts5contentless3.test 75eaae5ad6b284ee447788943974d323228f27cc35a1681da997135cff95bc6a
F ext/fts5/test/fts5contentless4.test ec34dc69ef474ca9997dae6d91e072906e0e9a5a4b05ea89964c863833b6eff8
F ext/fts5/test/fts5contentless5.test 40cdcb4fe751672450829c5a96bd32c25fc2f6076279dd2ce5c58ac9a390132a
-F ext/fts5/test/fts5corrupt.test a9bda1ded5112ebf1ee85c5381bd1fe8974952e2523cede4d5072804d2011503
+F ext/fts5/test/fts5corrupt.test 6485f721b88ba355ca5d701e7ee87a4efa3ea578d8e6adb26f51ef956c8328bd
F ext/fts5/test/fts5corrupt2.test 335911e3f68b9625d850325f9e29a128db3f4276a8c9d4e32134580da8f924c4
-F ext/fts5/test/fts5corrupt3.test b5f35d72af85b1d5a092b3d5e437f7944d142dd0b0c87b928fd0436a0aec6987
+F ext/fts5/test/fts5corrupt3.test 621e9bca3e7299f487e1b29ff4179d9fc9560f5847dfc5b50a16010c9d2a0e5f
F ext/fts5/test/fts5corrupt4.test dc08d19f5b8943e95a7778a7d8da592042504faf18dd93f68f7d7a0d7d7dd733
F ext/fts5/test/fts5corrupt5.test 11b47126f5772cc37b67e3e8b2ed05895c4d07c05338bc07e4eea225bfe32c76
F ext/fts5/test/fts5corrupt6.test 2d72db743db7b5d9c9a6d0cfef24d799ed1aa5e8192b66c40e871a37ed9eed06
@@ -159,7 +160,7 @@ F ext/fts5/test/fts5dlidx.test a7c42b0a74dc7c8aa1a46d586e0aadda4b6cc42c24450f8d3
F ext/fts5/test/fts5doclist.test b7cb84758504519746957802db9cd31187bb4e0028b89d9087ba06e26cc4155f
F ext/fts5/test/fts5ea.test cefdf66024550fa7920c03395c71ce5046235ed1a1a7a469d79b19e7aad5afb5
F ext/fts5/test/fts5eb.test 401f756fdb77083aeba8b696c1e0ad4d834c39dbd6f17e492bb55a2ad64b4296
-F ext/fts5/test/fts5expr.test 7e1b2d075b63b727a624a378c2c09f94296a93dc4ae968aad67f8d9f3810c266
+F ext/fts5/test/fts5expr.test c7e208813df7a90badc856fde3796da79569b39382e0fdb43042127f3b8e06a7
F ext/fts5/test/fts5fault1.test d28a65caee75db6897c3cf1358c5230d3bb2a3bf7fb31062c19c7e5382b3d2bd
F ext/fts5/test/fts5fault2.test 69c8fdbef830cd0d450908d4504d5bb86609e255af99c421c20a0756251fe344
F ext/fts5/test/fts5fault3.test da2f9e3e56ff5740d68ebdd6877c97089e7ed28ddff28a0da87a6afea27e5522
@@ -176,6 +177,7 @@ F ext/fts5/test/fts5faultE.test 844586ce71dab4be85bb86880e87b624d089f851654cd22e
F ext/fts5/test/fts5faultF.test 4abef99f86e99d9f0c6460dd68c586a766b6b9f1f660ada55bf2e8266bd1bbc1
F ext/fts5/test/fts5faultG.test 0544411ffcb3e19b42866f757a8a5e0fb8fef3a62c06f61d14deebc571bb7ea9
F ext/fts5/test/fts5faultH.test 2b2b5b8cb1b3fd7679f488c06e22af44107fbc6137eaf45b3e771dc7b149312d
+F ext/fts5/test/fts5faultI.test fbc65a64944fb747f6d3fb30628a807d5cce1bca43c11df40e7770ad7a7ed593
F ext/fts5/test/fts5first.test bfd685b96905bf541d99d8644e0a7219d1d833455a08ab64e344071a613b6ba9
F ext/fts5/test/fts5full.test 97d263c1072f4a560929cca31e70f65d2ae232610e17e6affcf7e979df59547b
F ext/fts5/test/fts5fuzz1.test 238d8c45f3b81342aa384de3e581ff2fa330bf922a7b69e484bbc06051a1080e
@@ -185,10 +187,11 @@ F ext/fts5/test/fts5interrupt.test 20d04204d3e341b104c0c24a41596b6393a3a81eba104
F ext/fts5/test/fts5lastrowid.test f36298a1fb9f988bde060a274a7ce638faa9c38a31400f8d2d27ea9373e0c4a1
F ext/fts5/test/fts5leftjoin.test c0b4cafb9661379e576dc4405c0891d8fcc2782680740513c4d1fc114b43d4ad
F ext/fts5/test/fts5limits.test 8ab67cf5d311c124b6ceb0062d0297767176df4572d955fce79fa43004dff01c
+F ext/fts5/test/fts5locale.test 79cbd3000ae269de50826f6061c81f7c9fdb21dd9954c0b7f9485263482c539d
F ext/fts5/test/fts5matchinfo.test 877520582feb86bbfd95ab780099bcba4526f18ac75ee34979144cf86ba3a5a3
F ext/fts5/test/fts5merge.test 2654df0bcdb2d117c2d38b6aeb0168061be01c643f9e9194b36c43a2970e8082
F ext/fts5/test/fts5merge2.test 3ebad1a59d6ad3fb66eff6523a09e95dc6367cbefb3cd73196801dea0425c8e2
-F ext/fts5/test/fts5misc.test b1682a40061bc58dcc62bbad48938fc5214d2ac6a868a8899c0c3d1930f1115d
+F ext/fts5/test/fts5misc.test 60bb2be4a2d83d7a45047c1812781e2e337a27efa539d86356ef7f4acaf08eab
F ext/fts5/test/fts5multi.test a15bc91cdb717492e6e1b66fec1c356cb57386b980c7ba5af1915f97fe878581
F ext/fts5/test/fts5multiclient.test 5ff811c028d6108045ffef737f1e9f05028af2458e456c0937c1d1b8dea56d45
F ext/fts5/test/fts5near.test 33d60867581066e5db7016deb5d651628125d7ff4e0233a88175aa5b65874c74
@@ -196,7 +199,7 @@ F ext/fts5/test/fts5onepass.test f9b7d9b2c334900c6542a869760290e2ab5382af8fbd618
F ext/fts5/test/fts5optimize.test 264b9101721c17d06d1d174feb743fda3ddc89fad41dee980fef821428258e47
F ext/fts5/test/fts5optimize2.test 795d4ae5f66a7239cf8d5aef4c2ea96aeb8bcd907bd9be0cfe22064fc71a44ed
F ext/fts5/test/fts5optimize3.test 1653029284e10e0715246819893ba30565c4ead0d0fc470adae92c353ea857d3
-F ext/fts5/test/fts5origintext.test 87c34c78f201b1e22ac93ac6bbe6196dde59f0a7266b1aeb938604a0eb9d5552
+F ext/fts5/test/fts5origintext.test 2015f69bc8abd111152a8e66211fd2d45026378001e07c054159aa4f84e6691d
F ext/fts5/test/fts5origintext2.test f4505ff79bf7369f2b8b10b9cef7476049d844e20b37f29cad3a8b8d5ac6f9ba
F ext/fts5/test/fts5origintext3.test 45c33cf0c91a9ca0e36d298462db3edc7c8fe45fd185649a9dbfd66bb670058b
F ext/fts5/test/fts5origintext4.test 0d3ef0a8038f471dbc83001c34fe5f7ae39b571bfc209670771eb28bc0fc50e8
@@ -220,9 +223,9 @@ F ext/fts5/test/fts5secure4.test 0d10a80590c07891478700af7793b232962042677432b98
F ext/fts5/test/fts5secure5.test c07a68ced5951567ac116c22f2d2aafae497e47fe9fcb6a335c22f9c7a4f2c3a
F ext/fts5/test/fts5secure6.test 74bf04733cc523bccca519bb03d3b4e2ed6f6e3db7c59bf6be82c88a0ac857fd
F ext/fts5/test/fts5secure7.test fd03d0868d64340a1db8615b02e5508fea409de13910114e4f19eaefc120777a
-F ext/fts5/test/fts5secure8.test e68c0ac4447f415ff3e4e82531e99548289286f9f3a29c8cd53036113fe28602
+F ext/fts5/test/fts5secure8.test 808ade9d172ed07b24b85c57dd53b6d2b1aba018b4e634d267ce572221de80e0
F ext/fts5/test/fts5securefault.test c34a28c7cd2f31a8b8907563889e1329a97da975c08df2d951422bcef8e2ebc5
-F ext/fts5/test/fts5simple.test 847fb828262328744733847dc76d6b5d4a6bd4c5d9b282cb819f6504340e061a
+F ext/fts5/test/fts5simple.test ed7c3815c9fa1c16166258cb98edb2e014c63c7589958d76c5487df0df913d61
F ext/fts5/test/fts5simple2.test d10d963a357b8ec77b99032e4c816459b4dbdb1f6eee25eada7ef3ed245cb2dc
F ext/fts5/test/fts5simple3.test 146ec3dc8f5763d6212641c9f0a2f1cba41679353d2add7b963beceb115dc7f4
F ext/fts5/test/fts5synonym.test becc8cea6cfc958a50b30c572c68cbfdf7455971d0fe988202ce67638d2c6cf6
@@ -231,12 +234,13 @@ F ext/fts5/test/fts5tok1.test 1f7817499f5971450d8c4a652114b3d833393c8134e32422d0
F ext/fts5/test/fts5tok2.test dcacb32d4a2a3f0dd3215d4a3987f78ae4be21a2
F ext/fts5/test/fts5tokenizer.test 7937cec672b148223fff8746d21d3e7ed0965fd7caf35ccdc888a005bb452f98
F ext/fts5/test/fts5tokenizer2.test ddb8b10fbe4b84b2a75812671f127774c1d2e3e2bf82d2e0e4f0bb1cd8a2b2d6
-F ext/fts5/test/fts5trigram.test be914555deb8504dde682bd5aa343d00c4da37dfad20709a5bac30d5f97f2ef5
+F ext/fts5/test/fts5tokenizer3.test eea778f7bb7024c3e904e28915f9d53286141671b138722148be22a9c758bdc3
+F ext/fts5/test/fts5trigram.test fb9ee982edd76280ce979905a2251081cd04ae4c470248bd5d391b2d096430ab
F ext/fts5/test/fts5trigram2.test 6fde9de7f63a6b4aa18dc731be56dbd6be4e755c9b13dcd55479e200d1df0e61
F ext/fts5/test/fts5ubsan.test 9a2dcf399dc8d0e0de661f0d93884d1d27e5b7f0693cfceb97dd24d818df5dd2
F ext/fts5/test/fts5umlaut.test a42fe2fe6387c40c49ab27ccbd070e1ae38e07f38d05926482cc0bccac9ad602
F ext/fts5/test/fts5unicode.test 41898f7e476e6515cd4b737c02a442cda5a580a74509788aa9072a2074948e0e
-F ext/fts5/test/fts5unicode2.test a5c38179b311a188b24376772309389b073c996f52b79bb9ca760a19e62043ea
+F ext/fts5/test/fts5unicode2.test 3bbd30152f9f760bf13886e5b1e5ec23ff62f56758ddda5d9c775a6082fb4c7c
F ext/fts5/test/fts5unicode3.test f4891a3dac3b49c3d7c0fdb29566e9eb0ecff35263370c89f9661b1952b20818
F ext/fts5/test/fts5unicode4.test 728c8f0caafb05567f524ad313d9f8b780fa45987b8a8df04eff87923c74b4d0
F ext/fts5/test/fts5unindexed.test 168838d2c385e131120bbf5b516d2432a5fabc4caa2259c932e1d49ae209a4ae
@@ -706,7 +710,7 @@ F src/build.c c5522b5faf8128227678e194275cefaeb4d063f55dbe70bcff745f1b63a187cf
F src/callback.c db3a45e376deff6a16c0058163fe0ae2b73a2945f3f408ca32cf74960b28d490
F src/complete.c a3634ab1e687055cd002e11b8f43eb75c17da23e
F src/ctime.c 64e4b1227b4ed123146f0aa2989131d1fbd9b927b11e80c9d58c6a68f9cd5ce3
-F src/date.c 13dd752847afb32ed70510ad7345a5b9c841f51ad904dba5d010f1fa3a6a324e
+F src/date.c 89ce1ff20512a7fa5070ba6e7dd5c171148ca7d580955795bf97c79c2456144a
F src/dbpage.c 80e46e1df623ec40486da7a5086cb723b0275a6e2a7b01d9f9b5da0f04ba2782
F src/dbstat.c 3b677254d512fcafd4d0b341bf267b38b235ccfddbef24f9154e19360fa22e43
F src/delete.c 444c4d1eaac40103461e3b6f0881846dd3aafc1cec1dd169d3482fa331667da7
@@ -744,7 +748,7 @@ F src/os.h 1ff5ae51d339d0e30d8a9d814f4b8f8e448169304d83a7ed9db66a65732f3e63
F src/os_common.h 6c0eb8dd40ef3e12fe585a13e709710267a258e2c8dd1c40b1948a1d14582e06
F src/os_kv.c 4d39e1f1c180b11162c6dc4aa8ad34053873a639bac6baae23272fc03349986a
F src/os_setup.h 6011ad7af5db4e05155f385eb3a9b4470688de6f65d6166b8956e58a3d872107
-F src/os_unix.c 2ea8d3ed496b8d1f9332a9505653424e5464fd797ea9d91f8e2e62f9dd0298d0
+F src/os_unix.c 6e3e4fc75904ff85184091dbab996e6e35c1799e771788961cc3b4fcbe8f852c
F src/os_win.c 6ff43bac175bd9ed79e7c0f96840b139f2f51d01689a638fd05128becf94908a
F src/os_win.h 7b073010f1451abe501be30d12f6bc599824944a
F src/pager.c b08600ebf0db90b6d1e9b8b6577c6fa3877cbe1a100bd0b2899e4c6e9adad4b3
@@ -760,7 +764,7 @@ F src/printf.c 6a87534ebfb9e5346011191b1f3a7ebc457f5938c7e4feeea478ecf53f6a41b2
F src/random.c 606b00941a1d7dd09c381d3279a058d771f406c5213c9932bbd93d5587be4b9c
F src/resolve.c 9afed5fd7b9111633bdb74a73cdc47324e28e4dc6c27113e3e9aee38fb9422ab
F src/rowset.c 8432130e6c344b3401a8874c3cb49fefe6873fec593294de077afea2dce5ec97
-F src/select.c c1c28650d3ea5dc0670dd658600b963c29a5c31b685941a5df583b02631d04ff
+F src/select.c 44d135bbea93872a7318f048d9d6e566b1c3eaa92d6dabe06e4741aa78d1c1ec
F src/shell.c.in 94571558b0fb28c37a5cf6dbd6ea27285341023a28a8cb5795cd2768fab67704
F src/sqlite.h.in 1ad9110150773c38ebababbad11b5cb361bcd3997676dec1c91ac5e0416a7b86
F src/sqlite3.rc 5121c9e10c3964d5755191c80dd1180c122fc3a8
@@ -849,7 +853,7 @@ F src/walker.c d5006d6b005e4ea7302ad390957a8d41ed83faa177e412f89bc5600a7462a014
F src/where.c b6f79b189fcba459bb80420d3b4102f42a399be36ff29a3deff4ae4888fee46d
F src/whereInt.h 6444b888ce395cb80511284b8a73b63472d34247fcb1b125ee06a54fa6ae878e
F src/wherecode.c 137797b0de9ddf1ff43e5b0edffcc76fb05184ed651fc4f5a0a01a45c0b89d04
-F src/whereexpr.c 6a72cf607548765a262f216e87373fd675a4646f9cc4278fc519b66cf03dbc13
+F src/whereexpr.c 44f41ae554c7572e1de1485b3169b233ee04d464b2ee5881687ede3bf07cacfa
F src/window.c 499d48f315a09242dc68f2fac635ed27dcf6bbb0d9ab9084857898c64489e975
F test/8_3_names.test ebbb5cd36741350040fd28b432ceadf495be25b2
F test/affinity2.test ce1aafc86e110685b324e9a763eab4f2a73f737842ec3b687bd965867de90627
@@ -1060,6 +1064,7 @@ F test/date.test c8ff835023f2107b57ce7a45c92265d51c98a23fc93231e998f12d850831aad
F test/date2.test 7e12ec14aaf4d5e6294b4ba140445b0eca06ea50062a9c3a69c4ee13d0b6f8b1
F test/date3.test a1b77abf05c6772fe5ca2337cac1398892f2a41e62bce7e6be0f4a08a0e64ae5
F test/date4.test 75dc8401e8c0639a228cd26a6eaa4ff5ea8ccda912b9853d1c9462c476670e17
+F test/date5.test 14ba189bc4d03efc371dd5302e035764f6633355a3e13acb4a45e7b33530231e
F test/dbdata.test 042f49acff3438f940eeba5868d3af080ae64ddf26ae78f80c92bec3ca7d8603
F test/dbfuzz.c 73047c920d6210e5912c87cdffd9a1c281d4252e
F test/dbfuzz001.test 6c9a4622029d69dc38926f115864b055cb2f39badd25ec22cbfb130c8ba8e9c3
@@ -2205,8 +2210,8 @@ F vsixtest/vsixtest.tcl 6195aba1f12a5e10efc2b8c0009532167be5e301abe5b31385638080
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
-P c1bbed19f4348ec31c3cb4723eaa8f1554bb58ef4de7feb3a0f39612785f13d5
-R 1c80ace1a0877505f5ca5ebb5b98295b
+P 3399698376761ab8c422f8ea02bfa2759afb606f08bedbd1cf7eee834229a9aa 4fa8235dd59cd683d6c6c97bfe181a9637be7c054d435323c903b9dbd74aff02
+R 82a6023b6446e680173d0de603e0e98e
U drh
-Z ef82496d97c0311a270f63ff1146b092
+Z cc2792cc7e3d54684569210f9c97c9df
# Remove this line to create a well-formed Fossil manifest.
diff --git a/manifest.uuid b/manifest.uuid
index 53632b347..c96f75886 100644
--- a/manifest.uuid
+++ b/manifest.uuid
@@ -1 +1 @@
-4fa8235dd59cd683d6c6c97bfe181a9637be7c054d435323c903b9dbd74aff02
+484bcd75bc95491d8540c791c1c4d40d996cb465839564662e14f98739699bf1
diff --git a/src/date.c b/src/date.c
index 8a609ae3c..8c48a81fa 100644
--- a/src/date.c
+++ b/src/date.c
@@ -271,8 +271,8 @@ static void computeJD(DateTime *p){
Y--;
M += 12;
}
- A = Y/100;
- B = 2 - A + (A/4);
+ A = (Y+4800)/100;
+ B = 38 - A + (A/4);
X1 = 36525*(Y+4716)/100;
X2 = 306001*(M+1)/10000;
p->iJD = (sqlite3_int64)((X1 + X2 + D + B - 1524.5 ) * 86400000);
@@ -456,7 +456,7 @@ static int validJulianDay(sqlite3_int64 iJD){
** Compute the Year, Month, and Day from the julian day number.
*/
static void computeYMD(DateTime *p){
- int Z, A, B, C, D, E, X1;
+ int Z, alpha, A, B, C, D, E, X1;
if( p->validYMD ) return;
if( !p->validJD ){
p->Y = 2000;
@@ -467,8 +467,8 @@ static void computeYMD(DateTime *p){
return;
}else{
Z = (int)((p->iJD + 43200000)/86400000);
- A = (int)((Z - 1867216.25)/36524.25);
- A = Z + 1 + A - (A/4);
+ alpha = (int)((Z + 32044.75)/36524.25) - 52;
+ A = Z + 1 + alpha - ((alpha+100)/4) + 25;
B = A + 1524;
C = (int)((B - 122.1)/365.25);
D = (36525*(C&32767))/100;
diff --git a/src/os_unix.c b/src/os_unix.c
index c94c0c111..5d1dc9ac6 100644
--- a/src/os_unix.c
+++ b/src/os_unix.c
@@ -322,7 +322,7 @@ static pid_t randomnessPid = 0;
#define UNIXFILE_EXCL 0x01 /* Connections from one process only */
#define UNIXFILE_RDONLY 0x02 /* Connection is read only */
#define UNIXFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */
-#ifndef SQLITE_DISABLE_DIRSYNC
+#if !defined(SQLITE_DISABLE_DIRSYNC) && !defined(_AIX)
# define UNIXFILE_DIRSYNC 0x08 /* Directory sync needed */
#else
# define UNIXFILE_DIRSYNC 0x00
diff --git a/src/select.c b/src/select.c
index b76e42b74..87bf7fac2 100644
--- a/src/select.c
+++ b/src/select.c
@@ -6784,6 +6784,7 @@ static void finalizeAggFunctions(Parse *pParse, AggInfo *pAggInfo){
for(i=0, pF=pAggInfo->aFunc; i<pAggInfo->nFunc; i++, pF++){
ExprList *pList;
assert( ExprUseXList(pF->pFExpr) );
+ if( pParse->nErr ) return;
pList = pF->pFExpr->x.pList;
if( pF->iOBTab>=0 ){
/* For an ORDER BY aggregate, calls to OP_AggStep were deferred. Inputs
@@ -6993,6 +6994,7 @@ static void updateAccumulator(
if( addrNext ){
sqlite3VdbeResolveLabel(v, addrNext);
}
+ if( pParse->nErr ) return;
}
if( regHit==0 && pAggInfo->nAccumulator ){
regHit = regAcc;
@@ -7002,6 +7004,7 @@ static void updateAccumulator(
}
for(i=0, pC=pAggInfo->aCol; i<pAggInfo->nAccumulator; i++, pC++){
sqlite3ExprCode(pParse, pC->pCExpr, AggInfoColumnReg(pAggInfo,i));
+ if( pParse->nErr ) return;
}
pAggInfo->directMode = 0;
diff --git a/src/whereexpr.c b/src/whereexpr.c
index ae26e85d2..7ea2956a7 100644
--- a/src/whereexpr.c
+++ b/src/whereexpr.c
@@ -220,11 +220,20 @@ static int isLikeOrGlob(
}
if( z ){
- /* Count the number of prefix characters prior to the first wildcard */
+ /* Count the number of prefix characters prior to the first wildcard.
+ ** If the underlying database has a UTF16LE encoding, then only consider
+ ** ASCII characters. Note that the encoding of z[] is UTF8 - we are
+ ** dealing with only UTF8 here in this code, but the database engine
+ ** itself might be processing content using a different encoding. */
cnt = 0;
while( (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2] ){
cnt++;
- if( c==wc[3] && z[cnt]!=0 ) cnt++;
+ if( c==wc[3] && z[cnt]!=0 ){
+ cnt++;
+ }else if( c>=0x80 && ENC(db)==SQLITE_UTF16LE ){
+ cnt--;
+ break;
+ }
}
/* The optimization is possible only if (1) the pattern does not begin
@@ -239,7 +248,7 @@ static int isLikeOrGlob(
Expr *pPrefix;
/* A "complete" match if the pattern ends with "*" or "%" */
- *pisComplete = c==wc[0] && z[cnt+1]==0;
+ *pisComplete = c==wc[0] && z[cnt+1]==0 && ENC(db)!=SQLITE_UTF16LE;
/* Get the pattern prefix. Remove all escapes from the prefix. */
pPrefix = sqlite3Expr(db, TK_STRING, (char*)z);
diff --git a/test/date5.test b/test/date5.test
new file mode 100644
index 000000000..688f84d0f
--- /dev/null
+++ b/test/date5.test
@@ -0,0 +1,86 @@
+# 2024-08-19
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#***********************************************************************
+#
+# https://sqlite.org/forum/forumpost/eaa0a09786c6368b
+#
+# Apparently SQLite has been miscomputing leap-year dates before
+# the year 0400.
+#
+
+set testdir [file dirname $argv0]
+source $testdir/tester.tcl
+
+# Skip this whole file if date and time functions are omitted
+# at compile-time
+#
+ifcapable {!datetime} {
+ finish_test
+ return
+}
+
+# Data sources:
+# 1-10 https://ssd.jpl.nasa.gov/tools/jdc/#/cd
+# 11 Jean Meeus, Astronomical Algorithms, ISBN 0-943396-61-1, p.59
+# 12 https://en.wikipedia.org/wiki/Julian_day
+#
+# ID YEAR MONTH DAY JD
+set date5data {
+ 1 2024 2 29 2460369.5
+ 2 2024 3 1 2460370.5
+ 3 2023 2 28 2460003.5
+ 4 2023 3 1 2460004.5
+ 5 2000 2 29 2451603.5
+ 6 2000 3 1 2451604.5
+ 7 1900 2 28 2415078.5
+ 8 1900 3 1 2415079.5
+ 9 1712 2 29 2346413.5
+ 10 1712 3 1 2346414.5
+ 11 1977 4 26 2443259.5
+ 12 2013 1 1 2456293.5
+}
+
+foreach {id y m d jd} $date5data {
+ set date [format %04d-%02d-%02d $y $m $d]
+ do_execsql_test date5-jd$jd {
+ SELECT date($::jd);
+ } $date
+ do_execsql_test date5-cal/$date {
+ SELECT julianday($::date);
+ } $jd
+ for {set i 1} {$y+400*$i<=9999} {incr i} {
+ set y2 [expr {$y+400*$i}]
+ set date2 [format %04d-%02d-%02d $y2 $m $d]
+ set jd2 [expr {$jd+146097*$i}]
+ do_execsql_test date5-jd$jd2 {
+ SELECT date($::jd2);
+ } $date2
+ do_execsql_test date5-cal/$date2 {
+ SELECT julianday($::date2);
+ } $jd2
+ }
+ for {set i 1} {$y-400*$i>=-4712} {incr i} {
+ set y2 [expr {$y-400*$i}]
+ if {$y2<0} {
+ set date2 [format -%04d-%02d-%02d [expr {-$y2}] $m $d]
+ } else {
+ set date2 [format %04d-%02d-%02d $y2 $m $d]
+ }
+ set jd2 [expr {$jd-146097*$i}]
+ do_execsql_test date5-jd$jd2 {
+ SELECT date($::jd2);
+ } $date2
+ do_execsql_test date5-cal/$date2 {
+ SELECT julianday($::date2);
+ } $jd2
+ }
+}
+
+finish_test