diff options
Diffstat (limited to 'ext/fts5/fts5_tcl.c')
-rw-r--r-- | ext/fts5/fts5_tcl.c | 264 |
1 files changed, 247 insertions, 17 deletions
diff --git a/ext/fts5/fts5_tcl.c b/ext/fts5/fts5_tcl.c index b67b037c4..1e9f7bbb6 100644 --- a/ext/fts5/fts5_tcl.c +++ b/ext/fts5/fts5_tcl.c @@ -240,6 +240,7 @@ static int SQLITE_TCLAPI xF5tApi( { "xQueryToken", 2, "IPHRASE ITERM" }, /* 18 */ { "xInstToken", 2, "IDX ITERM" }, /* 19 */ + { "xColumnLocale", 1, "COL" }, /* 20 */ { 0, 0, 0} }; @@ -528,6 +529,20 @@ static int SQLITE_TCLAPI xF5tApi( break; } + CASE(20, "xColumnLocale") { + const char *z = 0; + int n = 0; + int iCol; + if( Tcl_GetIntFromObj(interp, objv[2], &iCol) ){ + return TCL_ERROR; + } + rc = p->pApi->xColumnLocale(p->pFts, iCol, &z, &n); + if( rc==SQLITE_OK && z ){ + Tcl_SetObjResult(interp, Tcl_NewStringObj(z, n)); + } + break; + } + default: assert( 0 ); break; @@ -796,18 +811,32 @@ typedef struct F5tTokenizerInstance F5tTokenizerInstance; struct F5tTokenizerContext { void *pCtx; int (*xToken)(void*, int, const char*, int, int, int); + F5tTokenizerInstance *pInst; }; struct F5tTokenizerModule { Tcl_Interp *interp; Tcl_Obj *pScript; + void *pParentCtx; + fts5_tokenizer_v2 parent_v2; + fts5_tokenizer parent; F5tTokenizerContext *pContext; }; +/* +** zLocale: +** Within a call to xTokenize_v2(), pLocale/nLocale store the locale +** passed to the call by fts5. This can be retrieved by a Tcl tokenize +** script using [sqlite3_fts5_locale]. +*/ struct F5tTokenizerInstance { Tcl_Interp *interp; Tcl_Obj *pScript; + F5tTokenizerModule *pModule; + Fts5Tokenizer *pParent; F5tTokenizerContext *pContext; + const char *pLocale; + int nLocale; }; static int f5tTokenizerCreate( @@ -816,11 +845,20 @@ static int f5tTokenizerCreate( int nArg, Fts5Tokenizer **ppOut ){ + Fts5Tokenizer *pParent = 0; F5tTokenizerModule *pMod = (F5tTokenizerModule*)pCtx; Tcl_Obj *pEval; int rc = TCL_OK; int i; + assert( pMod->parent_v2.xCreate==0 || pMod->parent.xCreate==0 ); + if( pMod->parent_v2.xCreate ){ + rc = pMod->parent_v2.xCreate(pMod->pParentCtx, 0, 0, &pParent); + } + if( pMod->parent.xCreate ){ + rc = pMod->parent.xCreate(pMod->pParentCtx, 0, 0, &pParent); + } + pEval = Tcl_DuplicateObj(pMod->pScript); Tcl_IncrRefCount(pEval); for(i=0; rc==TCL_OK && i<nArg; i++){ @@ -840,6 +878,8 @@ static int f5tTokenizerCreate( pInst->interp = pMod->interp; pInst->pScript = Tcl_GetObjResult(pMod->interp); pInst->pContext = pMod->pContext; + pInst->pParent = pParent; + pInst->pModule = pMod; Tcl_IncrRefCount(pInst->pScript); *ppOut = (Fts5Tokenizer*)pInst; } @@ -850,11 +890,21 @@ static int f5tTokenizerCreate( static void f5tTokenizerDelete(Fts5Tokenizer *p){ F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p; - Tcl_DecrRefCount(pInst->pScript); - ckfree((char *)pInst); + if( pInst ){ + if( pInst->pParent ){ + if( pInst->pModule->parent_v2.xDelete ){ + pInst->pModule->parent_v2.xDelete(pInst->pParent); + }else{ + pInst->pModule->parent.xDelete(pInst->pParent); + } + } + Tcl_DecrRefCount(pInst->pScript); + ckfree((char *)pInst); + } } -static int f5tTokenizerTokenize( + +static int f5tTokenizerReallyTokenize( Fts5Tokenizer *p, void *pCtx, int flags, @@ -862,6 +912,7 @@ static int f5tTokenizerTokenize( int (*xToken)(void*, int, const char*, int, int, int) ){ F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p; + F5tTokenizerInstance *pOldInst = 0; void *pOldCtx; int (*xOldToken)(void*, int, const char*, int, int, int); Tcl_Obj *pEval; @@ -870,9 +921,11 @@ static int f5tTokenizerTokenize( pOldCtx = pInst->pContext->pCtx; xOldToken = pInst->pContext->xToken; + pOldInst = pInst->pContext->pInst; pInst->pContext->pCtx = pCtx; pInst->pContext->xToken = xToken; + pInst->pContext->pInst = pInst; assert( flags==FTS5_TOKENIZE_DOCUMENT @@ -908,9 +961,105 @@ static int f5tTokenizerTokenize( pInst->pContext->pCtx = pOldCtx; pInst->pContext->xToken = xOldToken; + pInst->pContext->pInst = pOldInst; return rc; } +typedef struct CallbackCtx CallbackCtx; +struct CallbackCtx { + Fts5Tokenizer *p; + void *pCtx; + int flags; + int (*xToken)(void*, int, const char*, int, int, int); +}; + +static int f5tTokenizeCallback( + void *pCtx, + int tflags, + const char *z, int n, + int iStart, int iEnd +){ + CallbackCtx *p = (CallbackCtx*)pCtx; + return f5tTokenizerReallyTokenize(p->p, p->pCtx, p->flags, z, n, p->xToken); +} + +static int f5tTokenizerTokenize_v2( + Fts5Tokenizer *p, + void *pCtx, + int flags, + const char *pText, int nText, + const char *pLoc, int nLoc, + int (*xToken)(void*, int, const char*, int, int, int) +){ + int rc = SQLITE_OK; + F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p; + + pInst->pLocale = pLoc; + pInst->nLocale = nLoc; + + if( pInst->pParent ){ + CallbackCtx ctx; + ctx.p = p; + ctx.pCtx = pCtx; + ctx.flags = flags; + ctx.xToken = xToken; + if( pInst->pModule->parent_v2.xTokenize ){ + rc = pInst->pModule->parent_v2.xTokenize( + pInst->pParent, (void*)&ctx, flags, pText, nText, + pLoc, nLoc, f5tTokenizeCallback + ); + }else{ + rc = pInst->pModule->parent.xTokenize( + pInst->pParent, (void*)&ctx, flags, pText, nText, f5tTokenizeCallback + ); + } + }else{ + rc = f5tTokenizerReallyTokenize(p, pCtx, flags, pText, nText, xToken); + } + + pInst->pLocale = 0; + pInst->nLocale = 0; + return rc; +} +static int f5tTokenizerTokenize( + Fts5Tokenizer *p, + void *pCtx, + int flags, + const char *pText, int nText, + int (*xToken)(void*, int, const char*, int, int, int) +){ + return f5tTokenizerTokenize_v2(p, pCtx, flags, pText, nText, 0, 0, xToken); +} + +/* +** sqlite3_fts5_locale +*/ +static int SQLITE_TCLAPI f5tTokenizerLocale( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + F5tTokenizerContext *p = (F5tTokenizerContext*)clientData; + + if( objc!=1 ){ + Tcl_WrongNumArgs(interp, 1, objv, ""); + return TCL_ERROR; + } + + if( p->xToken==0 ){ + Tcl_AppendResult(interp, + "sqlite3_fts5_locale may only be used by tokenizer callback", 0 + ); + return TCL_ERROR; + } + + Tcl_SetObjResult(interp, + Tcl_NewStringObj(p->pInst->pLocale, p->pInst->nLocale) + ); + return TCL_OK; +} + /* ** sqlite3_fts5_token ?-colocated? TEXT START END */ @@ -996,32 +1145,112 @@ static int SQLITE_TCLAPI f5tCreateTokenizer( fts5_api *pApi; char *zName; Tcl_Obj *pScript; - fts5_tokenizer t; F5tTokenizerModule *pMod; - int rc; + int rc = SQLITE_OK; + int bV2 = 0; /* True to use _v2 API */ + int iVersion = 2; /* Value for _v2.iVersion */ + const char *zParent = 0; /* Name of parent tokenizer, if any */ + int ii = 0; - if( objc!=4 ){ - Tcl_WrongNumArgs(interp, 1, objv, "DB NAME SCRIPT"); + if( objc<4 ){ + Tcl_WrongNumArgs(interp, 1, objv, "?OPTIONS? DB NAME SCRIPT"); return TCL_ERROR; } - if( f5tDbAndApi(interp, objv[1], &db, &pApi) ){ - return TCL_ERROR; + + /* Parse any options. Set stack variables bV2 and zParent. */ + for(ii=1; ii<objc-3; ii++){ + int iOpt = 0; + const char *azOpt[] = { "-v2", "-parent", "-version", 0 }; + if( Tcl_GetIndexFromObj(interp, objv[ii], azOpt, "OPTION", 0, &iOpt) ){ + return TCL_ERROR; + } + switch( iOpt ){ + case 0: /* -v2 */ { + bV2 = 1; + break; + } + case 1: /* -parent */ { + ii++; + if( ii==objc-3 ){ + Tcl_AppendResult( + interp, "option requires an argument: -parent", (char*)0 + ); + return TCL_ERROR; + } + zParent = Tcl_GetString(objv[ii]); + break; + } + case 2: /* -version */ { + ii++; + if( ii==objc-3 ){ + Tcl_AppendResult( + interp, "option requires an argument: -version", (char*)0 + ); + return TCL_ERROR; + } + if( Tcl_GetIntFromObj(interp, objv[ii], &iVersion) ){ + return TCL_ERROR; + } + break; + } + default: + assert( 0 ); + break; + } } - zName = Tcl_GetString(objv[2]); - pScript = objv[3]; - t.xCreate = f5tTokenizerCreate; - t.xTokenize = f5tTokenizerTokenize; - t.xDelete = f5tTokenizerDelete; + if( f5tDbAndApi(interp, objv[objc-3], &db, &pApi) ){ + return TCL_ERROR; + } + zName = Tcl_GetString(objv[objc-2]); + pScript = objv[objc-1]; pMod = (F5tTokenizerModule*)ckalloc(sizeof(F5tTokenizerModule)); + memset(pMod, 0, sizeof(F5tTokenizerModule)); pMod->interp = interp; pMod->pScript = pScript; - pMod->pContext = pContext; Tcl_IncrRefCount(pScript); - rc = pApi->xCreateTokenizer(pApi, zName, (void*)pMod, &t, f5tDelTokenizer); + pMod->pContext = pContext; + if( zParent ){ + if( bV2 ){ + fts5_tokenizer_v2 *pParent = 0; + rc = pApi->xFindTokenizer_v2(pApi, zParent, &pMod->pParentCtx, &pParent); + if( rc==SQLITE_OK ){ + memcpy(&pMod->parent_v2, pParent, sizeof(fts5_tokenizer_v2)); + pMod->parent_v2.xDelete(0); + } + }else{ + rc = pApi->xFindTokenizer(pApi, zParent, &pMod->pParentCtx,&pMod->parent); + if( rc==SQLITE_OK ){ + pMod->parent.xDelete(0); + } + } + } + + if( rc==SQLITE_OK ){ + void *pModCtx = (void*)pMod; + if( bV2==0 ){ + fts5_tokenizer t; + t.xCreate = f5tTokenizerCreate; + t.xTokenize = f5tTokenizerTokenize; + t.xDelete = f5tTokenizerDelete; + rc = pApi->xCreateTokenizer(pApi, zName, pModCtx, &t, f5tDelTokenizer); + }else{ + fts5_tokenizer_v2 t2; + memset(&t2, 0, sizeof(t2)); + t2.iVersion = iVersion; + t2.xCreate = f5tTokenizerCreate; + t2.xTokenize = f5tTokenizerTokenize_v2; + t2.xDelete = f5tTokenizerDelete; + rc = pApi->xCreateTokenizer_v2(pApi, zName, pModCtx, &t2,f5tDelTokenizer); + } + } + if( rc!=SQLITE_OK ){ - Tcl_AppendResult(interp, "error in fts5_api.xCreateTokenizer()", 0); + Tcl_AppendResult(interp, ( + bV2 ? "error in fts5_api.xCreateTokenizer_v2()" + : "error in fts5_api.xCreateTokenizer()" + ), 0); return TCL_ERROR; } @@ -1328,6 +1557,7 @@ int Fts5tcl_Init(Tcl_Interp *interp){ } aCmd[] = { { "sqlite3_fts5_create_tokenizer", f5tCreateTokenizer, 1 }, { "sqlite3_fts5_token", f5tTokenizerReturn, 1 }, + { "sqlite3_fts5_locale", f5tTokenizerLocale, 1 }, { "sqlite3_fts5_tokenize", f5tTokenize, 0 }, { "sqlite3_fts5_create_function", f5tCreateFunction, 0 }, { "sqlite3_fts5_may_be_corrupt", f5tMayBeCorrupt, 0 }, |