aboutsummaryrefslogtreecommitdiff
path: root/ext/fts5/fts5_expr.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/fts5/fts5_expr.c')
-rw-r--r--ext/fts5/fts5_expr.c163
1 files changed, 138 insertions, 25 deletions
diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c
index f5101ba06..bc7c9741e 100644
--- a/ext/fts5/fts5_expr.c
+++ b/ext/fts5/fts5_expr.c
@@ -100,7 +100,9 @@ struct Fts5ExprNode {
struct Fts5ExprTerm {
u8 bPrefix; /* True for a prefix term */
u8 bFirst; /* True if token must be first in column */
- char *zTerm; /* nul-terminated term */
+ char *pTerm; /* Term data */
+ int nQueryTerm; /* Effective size of term in bytes */
+ int nFullTerm; /* Size of term in bytes incl. tokendata */
Fts5IndexIter *pIter; /* Iterator for this term */
Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */
};
@@ -967,7 +969,7 @@ static int fts5ExprNearInitAll(
p->pIter = 0;
}
rc = sqlite3Fts5IndexQuery(
- pExpr->pIndex, p->zTerm, (int)strlen(p->zTerm),
+ pExpr->pIndex, p->pTerm, p->nQueryTerm,
(pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) |
(pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0),
pNear->pColset,
@@ -1604,7 +1606,7 @@ static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){
Fts5ExprTerm *pSyn;
Fts5ExprTerm *pNext;
Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
- sqlite3_free(pTerm->zTerm);
+ sqlite3_free(pTerm->pTerm);
sqlite3Fts5IterClose(pTerm->pIter);
for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){
pNext = pSyn->pSynonym;
@@ -1702,6 +1704,7 @@ Fts5ExprNearset *sqlite3Fts5ParseNearset(
typedef struct TokenCtx TokenCtx;
struct TokenCtx {
Fts5ExprPhrase *pPhrase;
+ Fts5Config *pConfig;
int rc;
};
@@ -1735,8 +1738,10 @@ static int fts5ParseTokenize(
rc = SQLITE_NOMEM;
}else{
memset(pSyn, 0, (size_t)nByte);
- pSyn->zTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer);
- memcpy(pSyn->zTerm, pToken, nToken);
+ pSyn->pTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer);
+ pSyn->nFullTerm = pSyn->nQueryTerm = nToken;
+ if( pCtx->pConfig->bTokendata ) pSyn->nQueryTerm = strlen(pSyn->pTerm);
+ memcpy(pSyn->pTerm, pToken, nToken);
pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym;
pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn;
}
@@ -1761,7 +1766,11 @@ static int fts5ParseTokenize(
if( rc==SQLITE_OK ){
pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
memset(pTerm, 0, sizeof(Fts5ExprTerm));
- pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
+ pTerm->pTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
+ pTerm->nFullTerm = pTerm->nQueryTerm = nToken;
+ if( pCtx->pConfig->bTokendata && rc==SQLITE_OK ){
+ pTerm->nQueryTerm = strlen(pTerm->pTerm);
+ }
}
}
@@ -1828,6 +1837,7 @@ Fts5ExprPhrase *sqlite3Fts5ParseTerm(
memset(&sCtx, 0, sizeof(TokenCtx));
sCtx.pPhrase = pAppend;
+ sCtx.pConfig = pConfig;
rc = fts5ParseStringFromToken(pToken, &z);
if( rc==SQLITE_OK ){
@@ -1877,8 +1887,7 @@ int sqlite3Fts5ExprClonePhrase(
int rc = SQLITE_OK; /* Return code */
Fts5ExprPhrase *pOrig; /* The phrase extracted from pExpr */
Fts5Expr *pNew = 0; /* Expression to return via *ppNew */
- TokenCtx sCtx = {0,0}; /* Context object for fts5ParseTokenize */
-
+ TokenCtx sCtx = {0,0,0}; /* Context object for fts5ParseTokenize */
pOrig = pExpr->apExprPhrase[iPhrase];
pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr));
if( rc==SQLITE_OK ){
@@ -1909,13 +1918,12 @@ int sqlite3Fts5ExprClonePhrase(
if( pOrig->nTerm ){
int i; /* Used to iterate through phrase terms */
+ sCtx.pConfig = pExpr->pConfig;
for(i=0; rc==SQLITE_OK && i<pOrig->nTerm; i++){
int tflags = 0;
Fts5ExprTerm *p;
for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK; p=p->pSynonym){
- const char *zTerm = p->zTerm;
- rc = fts5ParseTokenize((void*)&sCtx, tflags, zTerm, (int)strlen(zTerm),
- 0, 0);
+ rc = fts5ParseTokenize((void*)&sCtx, tflags, p->pTerm,p->nFullTerm,0,0);
tflags = FTS5_TOKEN_COLOCATED;
}
if( rc==SQLITE_OK ){
@@ -2296,11 +2304,13 @@ static Fts5ExprNode *fts5ParsePhraseToAnd(
if( parseGrowPhraseArray(pParse) ){
fts5ExprPhraseFree(pPhrase);
}else{
+ Fts5ExprTerm *p = &pNear->apPhrase[0]->aTerm[ii];
+ Fts5ExprTerm *pTo = &pPhrase->aTerm[0];
pParse->apPhrase[pParse->nPhrase++] = pPhrase;
pPhrase->nTerm = 1;
- pPhrase->aTerm[0].zTerm = sqlite3Fts5Strndup(
- &pParse->rc, pNear->apPhrase[0]->aTerm[ii].zTerm, -1
- );
+ pTo->pTerm = sqlite3Fts5Strndup(&pParse->rc, p->pTerm, p->nFullTerm);
+ pTo->nQueryTerm = p->nQueryTerm;
+ pTo->nFullTerm = p->nFullTerm;
pRet->apChild[ii] = sqlite3Fts5ParseNode(pParse, FTS5_STRING,
0, 0, sqlite3Fts5ParseNearset(pParse, 0, pPhrase)
);
@@ -2485,16 +2495,17 @@ static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){
/* Determine the maximum amount of space required. */
for(p=pTerm; p; p=p->pSynonym){
- nByte += (int)strlen(pTerm->zTerm) * 2 + 3 + 2;
+ nByte += pTerm->nQueryTerm * 2 + 3 + 2;
}
zQuoted = sqlite3_malloc64(nByte);
if( zQuoted ){
int i = 0;
for(p=pTerm; p; p=p->pSynonym){
- char *zIn = p->zTerm;
+ char *zIn = p->pTerm;
+ char *zEnd = &zIn[p->nQueryTerm];
zQuoted[i++] = '"';
- while( *zIn ){
+ while( zIn<zEnd ){
if( *zIn=='"' ) zQuoted[i++] = '"';
zQuoted[i++] = *zIn++;
}
@@ -2572,8 +2583,10 @@ static char *fts5ExprPrintTcl(
zRet = fts5PrintfAppend(zRet, " {");
for(iTerm=0; zRet && iTerm<pPhrase->nTerm; iTerm++){
- char *zTerm = pPhrase->aTerm[iTerm].zTerm;
- zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" ", zTerm);
+ Fts5ExprTerm *p = &pPhrase->aTerm[iTerm];
+ zRet = fts5PrintfAppend(zRet, "%s%.*s", iTerm==0?"":" ",
+ p->nQueryTerm, p->pTerm
+ );
if( pPhrase->aTerm[iTerm].bPrefix ){
zRet = fts5PrintfAppend(zRet, "*");
}
@@ -2974,6 +2987,17 @@ static int fts5ExprColsetTest(Fts5Colset *pColset, int iCol){
return 0;
}
+/*
+** pToken is a buffer nToken bytes in size that may or may not contain
+** an embedded 0x00 byte. If it does, return the number of bytes in
+** the buffer before the 0x00. If it does not, return nToken.
+*/
+static int fts5QueryTerm(const char *pToken, int nToken){
+ int ii;
+ for(ii=0; ii<nToken && pToken[ii]; ii++){}
+ return ii;
+}
+
static int fts5ExprPopulatePoslistsCb(
void *pCtx, /* Copy of 2nd argument to xTokenize() */
int tflags, /* Mask of FTS5_TOKEN_* flags */
@@ -2985,22 +3009,33 @@ static int fts5ExprPopulatePoslistsCb(
Fts5ExprCtx *p = (Fts5ExprCtx*)pCtx;
Fts5Expr *pExpr = p->pExpr;
int i;
+ int nQuery = nToken;
+ i64 iRowid = pExpr->pRoot->iRowid;
UNUSED_PARAM2(iUnused1, iUnused2);
- if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE;
+ if( nQuery>FTS5_MAX_TOKEN_SIZE ) nQuery = FTS5_MAX_TOKEN_SIZE;
+ if( pExpr->pConfig->bTokendata ){
+ nQuery = fts5QueryTerm(pToken, nQuery);
+ }
if( (tflags & FTS5_TOKEN_COLOCATED)==0 ) p->iOff++;
for(i=0; i<pExpr->nPhrase; i++){
- Fts5ExprTerm *pTerm;
+ Fts5ExprTerm *pT;
if( p->aPopulator[i].bOk==0 ) continue;
- for(pTerm=&pExpr->apExprPhrase[i]->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){
- int nTerm = (int)strlen(pTerm->zTerm);
- if( (nTerm==nToken || (nTerm<nToken && pTerm->bPrefix))
- && memcmp(pTerm->zTerm, pToken, nTerm)==0
+ for(pT=&pExpr->apExprPhrase[i]->aTerm[0]; pT; pT=pT->pSynonym){
+ if( (pT->nQueryTerm==nQuery || (pT->nQueryTerm<nQuery && pT->bPrefix))
+ && memcmp(pT->pTerm, pToken, pT->nQueryTerm)==0
){
int rc = sqlite3Fts5PoslistWriterAppend(
&pExpr->apExprPhrase[i]->poslist, &p->aPopulator[i].writer, p->iOff
);
+ if( rc==SQLITE_OK && pExpr->pConfig->bTokendata && !pT->bPrefix ){
+ int iCol = p->iOff>>32;
+ int iTokOff = p->iOff & 0x7FFFFFFF;
+ rc = sqlite3Fts5IndexIterWriteTokendata(
+ pT->pIter, pToken, nToken, iRowid, iCol, iTokOff
+ );
+ }
if( rc ) return rc;
break;
}
@@ -3135,3 +3170,81 @@ int sqlite3Fts5ExprPhraseCollist(
return rc;
}
+
+/*
+** Does the work of the fts5_api.xQueryToken() API method.
+*/
+int sqlite3Fts5ExprQueryToken(
+ Fts5Expr *pExpr,
+ int iPhrase,
+ int iToken,
+ const char **ppOut,
+ int *pnOut
+){
+ Fts5ExprPhrase *pPhrase = 0;
+
+ if( iPhrase<0 || iPhrase>=pExpr->nPhrase ){
+ return SQLITE_RANGE;
+ }
+ pPhrase = pExpr->apExprPhrase[iPhrase];
+ if( iToken<0 || iToken>=pPhrase->nTerm ){
+ return SQLITE_RANGE;
+ }
+
+ *ppOut = pPhrase->aTerm[iToken].pTerm;
+ *pnOut = pPhrase->aTerm[iToken].nFullTerm;
+ return SQLITE_OK;
+}
+
+/*
+** Does the work of the fts5_api.xInstToken() API method.
+*/
+int sqlite3Fts5ExprInstToken(
+ Fts5Expr *pExpr,
+ i64 iRowid,
+ int iPhrase,
+ int iCol,
+ int iOff,
+ int iToken,
+ const char **ppOut,
+ int *pnOut
+){
+ Fts5ExprPhrase *pPhrase = 0;
+ Fts5ExprTerm *pTerm = 0;
+ int rc = SQLITE_OK;
+
+ if( iPhrase<0 || iPhrase>=pExpr->nPhrase ){
+ return SQLITE_RANGE;
+ }
+ pPhrase = pExpr->apExprPhrase[iPhrase];
+ if( iToken<0 || iToken>=pPhrase->nTerm ){
+ return SQLITE_RANGE;
+ }
+ pTerm = &pPhrase->aTerm[iToken];
+ if( pTerm->bPrefix==0 ){
+ if( pExpr->pConfig->bTokendata ){
+ rc = sqlite3Fts5IterToken(
+ pTerm->pIter, iRowid, iCol, iOff+iToken, ppOut, pnOut
+ );
+ }else{
+ *ppOut = pTerm->pTerm;
+ *pnOut = pTerm->nFullTerm;
+ }
+ }
+ return rc;
+}
+
+/*
+** Clear the token mappings for all Fts5IndexIter objects mannaged by
+** the expression passed as the only argument.
+*/
+void sqlite3Fts5ExprClearTokens(Fts5Expr *pExpr){
+ int ii;
+ for(ii=0; ii<pExpr->nPhrase; ii++){
+ Fts5ExprTerm *pT;
+ for(pT=&pExpr->apExprPhrase[ii]->aTerm[0]; pT; pT=pT->pSynonym){
+ sqlite3Fts5IndexIterClearTokendata(pT->pIter);
+ }
+ }
+}
+