aboutsummaryrefslogtreecommitdiff
path: root/ext/fts5/fts5_tcl.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/fts5/fts5_tcl.c')
-rw-r--r--ext/fts5/fts5_tcl.c264
1 files changed, 247 insertions, 17 deletions
diff --git a/ext/fts5/fts5_tcl.c b/ext/fts5/fts5_tcl.c
index b67b037c4..1e9f7bbb6 100644
--- a/ext/fts5/fts5_tcl.c
+++ b/ext/fts5/fts5_tcl.c
@@ -240,6 +240,7 @@ static int SQLITE_TCLAPI xF5tApi(
{ "xQueryToken", 2, "IPHRASE ITERM" }, /* 18 */
{ "xInstToken", 2, "IDX ITERM" }, /* 19 */
+ { "xColumnLocale", 1, "COL" }, /* 20 */
{ 0, 0, 0}
};
@@ -528,6 +529,20 @@ static int SQLITE_TCLAPI xF5tApi(
break;
}
+ CASE(20, "xColumnLocale") {
+ const char *z = 0;
+ int n = 0;
+ int iCol;
+ if( Tcl_GetIntFromObj(interp, objv[2], &iCol) ){
+ return TCL_ERROR;
+ }
+ rc = p->pApi->xColumnLocale(p->pFts, iCol, &z, &n);
+ if( rc==SQLITE_OK && z ){
+ Tcl_SetObjResult(interp, Tcl_NewStringObj(z, n));
+ }
+ break;
+ }
+
default:
assert( 0 );
break;
@@ -796,18 +811,32 @@ typedef struct F5tTokenizerInstance F5tTokenizerInstance;
struct F5tTokenizerContext {
void *pCtx;
int (*xToken)(void*, int, const char*, int, int, int);
+ F5tTokenizerInstance *pInst;
};
struct F5tTokenizerModule {
Tcl_Interp *interp;
Tcl_Obj *pScript;
+ void *pParentCtx;
+ fts5_tokenizer_v2 parent_v2;
+ fts5_tokenizer parent;
F5tTokenizerContext *pContext;
};
+/*
+** zLocale:
+** Within a call to xTokenize_v2(), pLocale/nLocale store the locale
+** passed to the call by fts5. This can be retrieved by a Tcl tokenize
+** script using [sqlite3_fts5_locale].
+*/
struct F5tTokenizerInstance {
Tcl_Interp *interp;
Tcl_Obj *pScript;
+ F5tTokenizerModule *pModule;
+ Fts5Tokenizer *pParent;
F5tTokenizerContext *pContext;
+ const char *pLocale;
+ int nLocale;
};
static int f5tTokenizerCreate(
@@ -816,11 +845,20 @@ static int f5tTokenizerCreate(
int nArg,
Fts5Tokenizer **ppOut
){
+ Fts5Tokenizer *pParent = 0;
F5tTokenizerModule *pMod = (F5tTokenizerModule*)pCtx;
Tcl_Obj *pEval;
int rc = TCL_OK;
int i;
+ assert( pMod->parent_v2.xCreate==0 || pMod->parent.xCreate==0 );
+ if( pMod->parent_v2.xCreate ){
+ rc = pMod->parent_v2.xCreate(pMod->pParentCtx, 0, 0, &pParent);
+ }
+ if( pMod->parent.xCreate ){
+ rc = pMod->parent.xCreate(pMod->pParentCtx, 0, 0, &pParent);
+ }
+
pEval = Tcl_DuplicateObj(pMod->pScript);
Tcl_IncrRefCount(pEval);
for(i=0; rc==TCL_OK && i<nArg; i++){
@@ -840,6 +878,8 @@ static int f5tTokenizerCreate(
pInst->interp = pMod->interp;
pInst->pScript = Tcl_GetObjResult(pMod->interp);
pInst->pContext = pMod->pContext;
+ pInst->pParent = pParent;
+ pInst->pModule = pMod;
Tcl_IncrRefCount(pInst->pScript);
*ppOut = (Fts5Tokenizer*)pInst;
}
@@ -850,11 +890,21 @@ static int f5tTokenizerCreate(
static void f5tTokenizerDelete(Fts5Tokenizer *p){
F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p;
- Tcl_DecrRefCount(pInst->pScript);
- ckfree((char *)pInst);
+ if( pInst ){
+ if( pInst->pParent ){
+ if( pInst->pModule->parent_v2.xDelete ){
+ pInst->pModule->parent_v2.xDelete(pInst->pParent);
+ }else{
+ pInst->pModule->parent.xDelete(pInst->pParent);
+ }
+ }
+ Tcl_DecrRefCount(pInst->pScript);
+ ckfree((char *)pInst);
+ }
}
-static int f5tTokenizerTokenize(
+
+static int f5tTokenizerReallyTokenize(
Fts5Tokenizer *p,
void *pCtx,
int flags,
@@ -862,6 +912,7 @@ static int f5tTokenizerTokenize(
int (*xToken)(void*, int, const char*, int, int, int)
){
F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p;
+ F5tTokenizerInstance *pOldInst = 0;
void *pOldCtx;
int (*xOldToken)(void*, int, const char*, int, int, int);
Tcl_Obj *pEval;
@@ -870,9 +921,11 @@ static int f5tTokenizerTokenize(
pOldCtx = pInst->pContext->pCtx;
xOldToken = pInst->pContext->xToken;
+ pOldInst = pInst->pContext->pInst;
pInst->pContext->pCtx = pCtx;
pInst->pContext->xToken = xToken;
+ pInst->pContext->pInst = pInst;
assert(
flags==FTS5_TOKENIZE_DOCUMENT
@@ -908,9 +961,105 @@ static int f5tTokenizerTokenize(
pInst->pContext->pCtx = pOldCtx;
pInst->pContext->xToken = xOldToken;
+ pInst->pContext->pInst = pOldInst;
return rc;
}
+typedef struct CallbackCtx CallbackCtx;
+struct CallbackCtx {
+ Fts5Tokenizer *p;
+ void *pCtx;
+ int flags;
+ int (*xToken)(void*, int, const char*, int, int, int);
+};
+
+static int f5tTokenizeCallback(
+ void *pCtx,
+ int tflags,
+ const char *z, int n,
+ int iStart, int iEnd
+){
+ CallbackCtx *p = (CallbackCtx*)pCtx;
+ return f5tTokenizerReallyTokenize(p->p, p->pCtx, p->flags, z, n, p->xToken);
+}
+
+static int f5tTokenizerTokenize_v2(
+ Fts5Tokenizer *p,
+ void *pCtx,
+ int flags,
+ const char *pText, int nText,
+ const char *pLoc, int nLoc,
+ int (*xToken)(void*, int, const char*, int, int, int)
+){
+ int rc = SQLITE_OK;
+ F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p;
+
+ pInst->pLocale = pLoc;
+ pInst->nLocale = nLoc;
+
+ if( pInst->pParent ){
+ CallbackCtx ctx;
+ ctx.p = p;
+ ctx.pCtx = pCtx;
+ ctx.flags = flags;
+ ctx.xToken = xToken;
+ if( pInst->pModule->parent_v2.xTokenize ){
+ rc = pInst->pModule->parent_v2.xTokenize(
+ pInst->pParent, (void*)&ctx, flags, pText, nText,
+ pLoc, nLoc, f5tTokenizeCallback
+ );
+ }else{
+ rc = pInst->pModule->parent.xTokenize(
+ pInst->pParent, (void*)&ctx, flags, pText, nText, f5tTokenizeCallback
+ );
+ }
+ }else{
+ rc = f5tTokenizerReallyTokenize(p, pCtx, flags, pText, nText, xToken);
+ }
+
+ pInst->pLocale = 0;
+ pInst->nLocale = 0;
+ return rc;
+}
+static int f5tTokenizerTokenize(
+ Fts5Tokenizer *p,
+ void *pCtx,
+ int flags,
+ const char *pText, int nText,
+ int (*xToken)(void*, int, const char*, int, int, int)
+){
+ return f5tTokenizerTokenize_v2(p, pCtx, flags, pText, nText, 0, 0, xToken);
+}
+
+/*
+** sqlite3_fts5_locale
+*/
+static int SQLITE_TCLAPI f5tTokenizerLocale(
+ void * clientData,
+ Tcl_Interp *interp,
+ int objc,
+ Tcl_Obj *CONST objv[]
+){
+ F5tTokenizerContext *p = (F5tTokenizerContext*)clientData;
+
+ if( objc!=1 ){
+ Tcl_WrongNumArgs(interp, 1, objv, "");
+ return TCL_ERROR;
+ }
+
+ if( p->xToken==0 ){
+ Tcl_AppendResult(interp,
+ "sqlite3_fts5_locale may only be used by tokenizer callback", 0
+ );
+ return TCL_ERROR;
+ }
+
+ Tcl_SetObjResult(interp,
+ Tcl_NewStringObj(p->pInst->pLocale, p->pInst->nLocale)
+ );
+ return TCL_OK;
+}
+
/*
** sqlite3_fts5_token ?-colocated? TEXT START END
*/
@@ -996,32 +1145,112 @@ static int SQLITE_TCLAPI f5tCreateTokenizer(
fts5_api *pApi;
char *zName;
Tcl_Obj *pScript;
- fts5_tokenizer t;
F5tTokenizerModule *pMod;
- int rc;
+ int rc = SQLITE_OK;
+ int bV2 = 0; /* True to use _v2 API */
+ int iVersion = 2; /* Value for _v2.iVersion */
+ const char *zParent = 0; /* Name of parent tokenizer, if any */
+ int ii = 0;
- if( objc!=4 ){
- Tcl_WrongNumArgs(interp, 1, objv, "DB NAME SCRIPT");
+ if( objc<4 ){
+ Tcl_WrongNumArgs(interp, 1, objv, "?OPTIONS? DB NAME SCRIPT");
return TCL_ERROR;
}
- if( f5tDbAndApi(interp, objv[1], &db, &pApi) ){
- return TCL_ERROR;
+
+ /* Parse any options. Set stack variables bV2 and zParent. */
+ for(ii=1; ii<objc-3; ii++){
+ int iOpt = 0;
+ const char *azOpt[] = { "-v2", "-parent", "-version", 0 };
+ if( Tcl_GetIndexFromObj(interp, objv[ii], azOpt, "OPTION", 0, &iOpt) ){
+ return TCL_ERROR;
+ }
+ switch( iOpt ){
+ case 0: /* -v2 */ {
+ bV2 = 1;
+ break;
+ }
+ case 1: /* -parent */ {
+ ii++;
+ if( ii==objc-3 ){
+ Tcl_AppendResult(
+ interp, "option requires an argument: -parent", (char*)0
+ );
+ return TCL_ERROR;
+ }
+ zParent = Tcl_GetString(objv[ii]);
+ break;
+ }
+ case 2: /* -version */ {
+ ii++;
+ if( ii==objc-3 ){
+ Tcl_AppendResult(
+ interp, "option requires an argument: -version", (char*)0
+ );
+ return TCL_ERROR;
+ }
+ if( Tcl_GetIntFromObj(interp, objv[ii], &iVersion) ){
+ return TCL_ERROR;
+ }
+ break;
+ }
+ default:
+ assert( 0 );
+ break;
+ }
}
- zName = Tcl_GetString(objv[2]);
- pScript = objv[3];
- t.xCreate = f5tTokenizerCreate;
- t.xTokenize = f5tTokenizerTokenize;
- t.xDelete = f5tTokenizerDelete;
+ if( f5tDbAndApi(interp, objv[objc-3], &db, &pApi) ){
+ return TCL_ERROR;
+ }
+ zName = Tcl_GetString(objv[objc-2]);
+ pScript = objv[objc-1];
pMod = (F5tTokenizerModule*)ckalloc(sizeof(F5tTokenizerModule));
+ memset(pMod, 0, sizeof(F5tTokenizerModule));
pMod->interp = interp;
pMod->pScript = pScript;
- pMod->pContext = pContext;
Tcl_IncrRefCount(pScript);
- rc = pApi->xCreateTokenizer(pApi, zName, (void*)pMod, &t, f5tDelTokenizer);
+ pMod->pContext = pContext;
+ if( zParent ){
+ if( bV2 ){
+ fts5_tokenizer_v2 *pParent = 0;
+ rc = pApi->xFindTokenizer_v2(pApi, zParent, &pMod->pParentCtx, &pParent);
+ if( rc==SQLITE_OK ){
+ memcpy(&pMod->parent_v2, pParent, sizeof(fts5_tokenizer_v2));
+ pMod->parent_v2.xDelete(0);
+ }
+ }else{
+ rc = pApi->xFindTokenizer(pApi, zParent, &pMod->pParentCtx,&pMod->parent);
+ if( rc==SQLITE_OK ){
+ pMod->parent.xDelete(0);
+ }
+ }
+ }
+
+ if( rc==SQLITE_OK ){
+ void *pModCtx = (void*)pMod;
+ if( bV2==0 ){
+ fts5_tokenizer t;
+ t.xCreate = f5tTokenizerCreate;
+ t.xTokenize = f5tTokenizerTokenize;
+ t.xDelete = f5tTokenizerDelete;
+ rc = pApi->xCreateTokenizer(pApi, zName, pModCtx, &t, f5tDelTokenizer);
+ }else{
+ fts5_tokenizer_v2 t2;
+ memset(&t2, 0, sizeof(t2));
+ t2.iVersion = iVersion;
+ t2.xCreate = f5tTokenizerCreate;
+ t2.xTokenize = f5tTokenizerTokenize_v2;
+ t2.xDelete = f5tTokenizerDelete;
+ rc = pApi->xCreateTokenizer_v2(pApi, zName, pModCtx, &t2,f5tDelTokenizer);
+ }
+ }
+
if( rc!=SQLITE_OK ){
- Tcl_AppendResult(interp, "error in fts5_api.xCreateTokenizer()", 0);
+ Tcl_AppendResult(interp, (
+ bV2 ? "error in fts5_api.xCreateTokenizer_v2()"
+ : "error in fts5_api.xCreateTokenizer()"
+ ), 0);
return TCL_ERROR;
}
@@ -1328,6 +1557,7 @@ int Fts5tcl_Init(Tcl_Interp *interp){
} aCmd[] = {
{ "sqlite3_fts5_create_tokenizer", f5tCreateTokenizer, 1 },
{ "sqlite3_fts5_token", f5tTokenizerReturn, 1 },
+ { "sqlite3_fts5_locale", f5tTokenizerLocale, 1 },
{ "sqlite3_fts5_tokenize", f5tTokenize, 0 },
{ "sqlite3_fts5_create_function", f5tCreateFunction, 0 },
{ "sqlite3_fts5_may_be_corrupt", f5tMayBeCorrupt, 0 },