diff options
author | dan <dan@noemail.net> | 2014-12-22 21:01:52 +0000 |
---|---|---|
committer | dan <dan@noemail.net> | 2014-12-22 21:01:52 +0000 |
commit | 005e10e39a2d7e56cd113a1af8e7d37e07deca3d (patch) | |
tree | 7e64724270cfef1f2456ef0eef25cbf2ff4e995d | |
parent | 1616d55153a7654bd7955d1ba0c5c08b786277bb (diff) | |
download | sqlite-005e10e39a2d7e56cd113a1af8e7d37e07deca3d.tar.gz sqlite-005e10e39a2d7e56cd113a1af8e7d37e07deca3d.zip |
Fixes and simplifications for the snippet() and highlight() functions.
FossilOrigin-Name: ca5d44042aa7461dcc8b700b0763df4df9d4a891
-rw-r--r-- | ext/fts5/fts5.c | 5 | ||||
-rw-r--r-- | ext/fts5/fts5.h | 12 | ||||
-rw-r--r-- | ext/fts5/fts5_aux.c | 638 | ||||
-rw-r--r-- | ext/fts5/fts5_tcl.c | 51 | ||||
-rw-r--r-- | ext/fts5/fts5auxdata.test | 112 | ||||
-rw-r--r-- | manifest | 23 | ||||
-rw-r--r-- | manifest.uuid | 2 | ||||
-rw-r--r-- | test/fts5af.test | 6 | ||||
-rw-r--r-- | test/fts5ak.test | 23 |
9 files changed, 440 insertions, 432 deletions
diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 4c6e98b86..67b2b8237 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -1300,7 +1300,10 @@ static int fts5ApiSetAuxdata( } }else{ pData = (Fts5Auxdata*)sqlite3_malloc(sizeof(Fts5Auxdata)); - if( pData==0 ) return SQLITE_NOMEM; + if( pData==0 ){ + if( xDelete ) xDelete(pPtr); + return SQLITE_NOMEM; + } memset(pData, 0, sizeof(Fts5Auxdata)); pData->pAux = pCsr->pAux; pData->pNext = pCsr->pAuxdata; diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index 71db9577c..8bee42dbc 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -141,9 +141,10 @@ typedef void (*fts5_extension_function)( ** future invocation of the same fts5 extension function made as part of ** of the same MATCH query using the xGetAuxdata() API. ** -** Each extension function is allocated a single auxiliary data slot per -** query. If the extension function is invoked more than once by the SQL -** query, then all invocations share a single auxiliary data context. +** Each extension function is allocated a single auxiliary data slot for +** each FTS query (MATCH expression). If the extension function is invoked +** more than once for a single FTS query, then all invocations share a +** single auxiliary data context. ** ** If there is already an auxiliary data pointer when this function is ** invoked, then it is replaced by the new pointer. If an xDelete callback @@ -153,6 +154,11 @@ typedef void (*fts5_extension_function)( ** The xDelete callback, if one is specified, is also invoked on the ** auxiliary data pointer after the FTS5 query has finished. ** +** If an error (e.g. an OOM condition) occurs within this function, an +** the auxiliary data is set to NULL and an error code returned. If the +** xDelete parameter was not NULL, it is invoked on the auxiliary data +** pointer before returning. +** ** ** xGetAuxdata(pFts5, bClear) ** diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c index a09487a36..c0224a0e0 100644 --- a/ext/fts5/fts5_aux.c +++ b/ext/fts5/fts5_aux.c @@ -14,22 +14,113 @@ #include "fts5Int.h" #include <math.h> -/************************************************************************* -** Start of highlight() implementation. +/* +** Object used to iterate through all "coalesced phrase instances" in +** a single column of the current row. If the phrase instances in the +** column being considered do not overlap, this object simply iterates +** through them. Or, if they do overlap (share one or more tokens in +** common), each set of overlapping instances is treated as a single +** match. See documentation for the highlight() auxiliary function for +** details. +** +** Usage is: +** +** for(rc = fts5CInstIterNext(pApi, pFts, iCol, &iter); +** (rc==SQLITE_OK && 0==fts5CInstIterEof(&iter); +** rc = fts5CInstIterNext(&iter) +** ){ +** printf("instance starts at %d, ends at %d\n", iter.iStart, iter.iEnd); +** } +** */ -typedef struct HighlightContext HighlightContext; -struct HighlightContext { +typedef struct CInstIter CInstIter; +struct CInstIter { const Fts5ExtensionApi *pApi; /* API offered by current FTS version */ Fts5Context *pFts; /* First arg to pass to pApi functions */ + int iCol; /* Column to search */ + int iInst; /* Next phrase instance index */ int nInst; /* Total number of phrase instances */ - int iInst; /* Current phrase instance index */ - int iStart; /* First token of current phrase */ - int iEnd; /* Last token of current phrase */ + /* Output variables */ + int iStart; /* First token in coalesced phrase instance */ + int iEnd; /* Last token in coalesced phrase instance */ +}; + +/* +** Return non-zero if the iterator is at EOF, or zero otherwise. +*/ +static int fts5CInstIterEof(CInstIter *pIter){ + return (pIter->iStart < 0); +} + +/* +** Advance the iterator to the next coalesced phrase instance. Return +** an SQLite error code if an error occurs, or SQLITE_OK otherwise. +*/ +static int fts5CInstIterNext(CInstIter *pIter){ + int rc = SQLITE_OK; + pIter->iStart = -1; + pIter->iEnd = -1; + + while( rc==SQLITE_OK && pIter->iInst<pIter->nInst ){ + int ip; int ic; int io; + rc = pIter->pApi->xInst(pIter->pFts, pIter->iInst, &ip, &ic, &io); + if( rc==SQLITE_OK ){ + if( ic==pIter->iCol ){ + int iEnd = io - 1 + pIter->pApi->xPhraseSize(pIter->pFts, ip); + if( pIter->iStart<0 ){ + pIter->iStart = io; + pIter->iEnd = iEnd; + }else if( io<=pIter->iEnd ){ + if( iEnd>pIter->iEnd ) pIter->iEnd = iEnd; + }else{ + break; + } + } + pIter->iInst++; + } + } + + return rc; +} + +/* +** Initialize the iterator object indicated by the final parameter to +** iterate through coalesced phrase instances in column iCol. +*/ +static int fts5CInstIterInit( + const Fts5ExtensionApi *pApi, + Fts5Context *pFts, + int iCol, + CInstIter *pIter +){ + int rc; + + memset(pIter, 0, sizeof(CInstIter)); + pIter->pApi = pApi; + pIter->pFts = pFts; + pIter->iCol = iCol; + rc = pApi->xInstCount(pFts, &pIter->nInst); + + if( rc==SQLITE_OK ){ + rc = fts5CInstIterNext(pIter); + } + + return rc; +} + + + +/************************************************************************* +** Start of highlight() implementation. +*/ +typedef struct HighlightContext HighlightContext; +struct HighlightContext { + CInstIter iter; /* Coalesced Instance Iterator */ + int iRangeStart; + int iRangeEnd; const char *zOpen; /* Opening highlight */ const char *zClose; /* Closing highlight */ - int iCol; /* Column to read from */ - const char *zIn; /* Input text */ int nIn; /* Size of input text in bytes */ int iOff; /* Current offset within zIn[] */ @@ -40,6 +131,10 @@ struct HighlightContext { ** Append text to the HighlightContext output string - p->zOut. Argument ** z points to a buffer containing n bytes of text to append. If n is ** negative, everything up until the first '\0' is appended to the output. +** +** If *pRc is set to any value other than SQLITE_OK when this function is +** called, it is a no-op. If an error (i.e. an OOM condition) is encountered, +** *pRc is set to an error code before returning. */ static void fts5HighlightAppend( int *pRc, @@ -53,6 +148,9 @@ static void fts5HighlightAppend( } } +/* +** Tokenizer callback used by implementation of highlight() function. +*/ static int fts5HighlightCb( void *pContext, /* Pointer to HighlightContext object */ const char *pToken, /* Buffer containing token */ @@ -64,39 +162,43 @@ static int fts5HighlightCb( HighlightContext *p = (HighlightContext*)pContext; int rc = SQLITE_OK; - if( iPos==p->iStart ){ + if( p->iRangeEnd>0 ){ + if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK; + if( iPos==p->iRangeStart ) p->iOff = iStartOff; + } + + if( iPos==p->iter.iStart ){ fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iStartOff - p->iOff); fts5HighlightAppend(&rc, p, p->zOpen, -1); p->iOff = iStartOff; } - if( iPos==p->iEnd ){ - int bClose = 1; - for(p->iInst++; rc==SQLITE_OK && p->iInst<p->nInst; p->iInst++){ - int iP, iPCol, iOff; - rc = p->pApi->xInst(p->pFts, p->iInst, &iP, &iPCol, &iOff); - if( iPCol!=p->iCol ){ - p->iStart = p->iEnd = -1; - }else{ - int iEnd = iOff - 1 + p->pApi->xPhraseSize(p->pFts, iP); - if( iEnd<=p->iEnd ) continue; - if( iOff<=p->iEnd ) bClose = 0; - p->iStart = iOff; - p->iEnd = iEnd; - } - break; + if( iPos==p->iter.iEnd ){ + if( p->iRangeEnd && p->iter.iStart<p->iRangeStart ){ + fts5HighlightAppend(&rc, p, p->zOpen, -1); + } + fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); + fts5HighlightAppend(&rc, p, p->zClose, -1); + p->iOff = iEndOff; + if( rc==SQLITE_OK ){ + rc = fts5CInstIterNext(&p->iter); } + } - if( bClose ){ - fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); + if( p->iRangeEnd>0 && iPos==p->iRangeEnd ){ + fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); + p->iOff = iEndOff; + if( iPos<p->iter.iEnd ){ fts5HighlightAppend(&rc, p, p->zClose, -1); - p->iOff = iEndOff; } } return rc; } +/* +** Implementation of highlight() function. +*/ static void fts5HighlightFunction( const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ Fts5Context *pFts, /* First arg to pass to pApi functions */ @@ -106,6 +208,7 @@ static void fts5HighlightFunction( ){ HighlightContext ctx; int rc; + int iCol; if( nVal!=3 ){ const char *zErr = "wrong number of arguments to function highlight()"; @@ -113,26 +216,14 @@ static void fts5HighlightFunction( return; } + iCol = sqlite3_value_int(apVal[0]); memset(&ctx, 0, sizeof(HighlightContext)); - ctx.iCol = sqlite3_value_int(apVal[0]); ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]); ctx.zClose = (const char*)sqlite3_value_text(apVal[2]); - ctx.pApi = pApi; - ctx.pFts = pFts; - rc = pApi->xColumnText(pFts, ctx.iCol, &ctx.zIn, &ctx.nIn); - if( rc==SQLITE_OK ) rc = pApi->xInstCount(pFts, &ctx.nInst); - - /* Find the first phrase instance in the right column. */ - ctx.iStart = -1; - ctx.iEnd = -1; - for( ; ctx.iInst<ctx.nInst && rc==SQLITE_OK; ctx.iInst++){ - int iP, iPCol, iOff; - rc = pApi->xInst(pFts, ctx.iInst, &iP, &iPCol, &iOff); - if( iPCol==ctx.iCol ){ - ctx.iStart = iOff; - ctx.iEnd = iOff - 1 + pApi->xPhraseSize(pFts, iP); - break; - } + rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn); + + if( rc==SQLITE_OK ){ + rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter); } if( rc==SQLITE_OK ){ @@ -150,403 +241,128 @@ static void fts5HighlightFunction( /* **************************************************************************/ -typedef struct SnipPhrase SnipPhrase; -typedef struct SnipIter SnipIter; -typedef struct SnippetCtx SnippetCtx; - -struct SnipPhrase { - u64 mask; /* Current mask */ - int nToken; /* Tokens in this phrase */ - int i; /* Current offset in phrase poslist */ - i64 iPos; /* Next position in phrase (-ve -> EOF) */ -}; - -struct SnipIter { - i64 iLast; /* Last token position of current snippet */ - int nScore; /* Score of current snippet */ - - const Fts5ExtensionApi *pApi; - Fts5Context *pFts; - u64 szmask; /* Mask used to on SnipPhrase.mask */ - int nPhrase; /* Number of phrases */ - SnipPhrase aPhrase[0]; /* Array of size nPhrase */ -}; - -struct SnippetCtx { - int iFirst; /* Offset of first token to record */ - int nToken; /* Size of aiStart[] and aiEnd[] arrays */ - int iSeen; /* Set to largest offset seen */ - int *aiStart; - int *aiEnd; -}; - -static int fts5SnippetCallback( - void *pContext, /* Pointer to Fts5Buffer object */ - const char *pToken, /* Buffer containing token */ - int nToken, /* Size of token in bytes */ - int iStart, /* Start offset of token */ - int iEnd, /* End offset of token */ - int iPos /* Position offset of token */ -){ - int rc = SQLITE_OK; - SnippetCtx *pCtx = (SnippetCtx*)pContext; - int iOff = iPos - pCtx->iFirst; - - if( iOff>=0 ){ - if( iOff < pCtx->nToken ){ - pCtx->aiStart[iOff] = iStart; - pCtx->aiEnd[iOff] = iEnd; - } - pCtx->iSeen = iPos; - if( iOff>=pCtx->nToken ) rc = SQLITE_DONE; - } - return rc; -} - -/* -** Set pIter->nScore to the score for the current entry. -*/ -static void fts5SnippetCalculateScore(SnipIter *pIter){ - int i; - int nScore = 0; - assert( pIter->iLast>=0 ); - - for(i=0; i<pIter->nPhrase; i++){ - SnipPhrase *p = &pIter->aPhrase[i]; - u64 mask = p->mask; - if( mask ){ - u64 j; - nScore += 1000; - for(j=1; j & pIter->szmask; j<<=1){ - if( mask & j ) nScore++; - } - } - } - - pIter->nScore = nScore; -} - -/* -** Allocate a new snippet iter. -*/ -static int fts5SnipIterNew( +static void fts5SnippetFunction( const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ Fts5Context *pFts, /* First arg to pass to pApi functions */ - int nToken, /* Number of tokens in snippets */ - SnipIter **ppIter /* OUT: New object */ + sqlite3_context *pCtx, /* Context for returning result/error */ + int nVal, /* Number of values in apVal[] array */ + sqlite3_value **apVal /* Array of trailing arguments */ ){ - int i; /* Counter variable */ - SnipIter *pIter; /* New iterator object */ - int nByte; /* Bytes of space to allocate */ + HighlightContext ctx; + int rc = SQLITE_OK; /* Return code */ + int iCol; /* 1st argument to snippet() */ + const char *zEllips; /* 4th argument to snippet() */ + int nToken; /* 5th argument to snippet() */ + int nInst; /* Number of instance matches this row */ + int i; /* Used to iterate through instances */ int nPhrase; /* Number of phrases in query */ - - *ppIter = 0; - nPhrase = pApi->xPhraseCount(pFts); - nByte = sizeof(SnipIter) + nPhrase * sizeof(SnipPhrase); - pIter = (SnipIter*)sqlite3_malloc(nByte); - if( pIter==0 ) return SQLITE_NOMEM; - memset(pIter, 0, nByte); - - pIter->nPhrase = nPhrase; - pIter->pApi = pApi; - pIter->pFts = pFts; - pIter->szmask = ((u64)1 << nToken) - 1; - assert( nToken<=63 ); - - for(i=0; i<nPhrase; i++){ - pIter->aPhrase[i].nToken = pApi->xPhraseSize(pFts, i); - } - - *ppIter = pIter; - return SQLITE_OK; -} - -/* -** Set the iterator to point to the first candidate snippet. -*/ -static void fts5SnipIterFirst(SnipIter *pIter){ - const Fts5ExtensionApi *pApi = pIter->pApi; - Fts5Context *pFts = pIter->pFts; - int i; /* Used to iterate through phrases */ - SnipPhrase *pMin = 0; /* Phrase with first match */ - - memset(pIter->aPhrase, 0, sizeof(SnipPhrase) * pIter->nPhrase); - - for(i=0; i<pIter->nPhrase; i++){ - SnipPhrase *p = &pIter->aPhrase[i]; - p->nToken = pApi->xPhraseSize(pFts, i); - pApi->xPoslist(pFts, i, &p->i, &p->iPos); - if( p->iPos>=0 && (pMin==0 || p->iPos<pMin->iPos) ){ - pMin = p; - } + unsigned char *aSeen; /* Array of "seen instance" flags */ + int iBestCol; /* Column containing best snippet */ + int iBestStart = 0; /* First token of best snippet */ + int iBestLast = nToken; /* Last token of best snippet */ + int nBestScore = 0; /* Score of best snippet */ + int nColSize; /* Total size of iBestCol in tokens */ + + if( nVal!=5 ){ + const char *zErr = "wrong number of arguments to function snippet()"; + sqlite3_result_error(pCtx, zErr, -1); + return; } - assert( pMin ); - pIter->iLast = pMin->iPos + pMin->nToken - 1; - pMin->mask = 0x01; - pApi->xPoslist(pFts, pMin - pIter->aPhrase, &pMin->i, &pMin->iPos); - fts5SnippetCalculateScore(pIter); -} + memset(&ctx, 0, sizeof(HighlightContext)); + rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn); -/* -** Advance the snippet iterator to the next candidate snippet. -*/ -static void fts5SnipIterNext(SnipIter *pIter){ - const Fts5ExtensionApi *pApi = pIter->pApi; - Fts5Context *pFts = pIter->pFts; - int nPhrase = pIter->nPhrase; - int i; /* Used to iterate through phrases */ - SnipPhrase *pMin = 0; + iCol = sqlite3_value_int(apVal[0]); + ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]); + ctx.zClose = (const char*)sqlite3_value_text(apVal[2]); + zEllips = (const char*)sqlite3_value_text(apVal[3]); + nToken = sqlite3_value_int(apVal[4]); - for(i=0; i<nPhrase; i++){ - SnipPhrase *p = &pIter->aPhrase[i]; - if( p->iPos>=0 && (pMin==0 || p->iPos<pMin->iPos) ) pMin = p; + iBestCol = (iCol>=0 ? iCol : 0); + nPhrase = pApi->xPhraseCount(pFts); + aSeen = sqlite3_malloc(nPhrase); + if( aSeen==0 ){ + rc = SQLITE_NOMEM; } - if( pMin==0 ){ - /* pMin==0 indicates that the SnipIter is at EOF. */ - pIter->iLast = -1; - }else{ - i64 nShift = pMin->iPos - pIter->iLast; - assert( nShift>=0 ); - for(i=0; i<nPhrase; i++){ - SnipPhrase *p = &pIter->aPhrase[i]; - if( nShift>=63 ){ - p->mask = 0; - }else{ - p->mask = p->mask << (int)nShift; - p->mask &= pIter->szmask; + if( rc==SQLITE_OK ){ + rc = pApi->xInstCount(pFts, &nInst); + } + for(i=0; rc==SQLITE_OK && i<nInst; i++){ + int ip, iSnippetCol, iStart; + memset(aSeen, 0, nPhrase); + rc = pApi->xInst(pFts, i, &ip, &iSnippetCol, &iStart); + if( rc==SQLITE_OK && (iCol<0 || iSnippetCol==iCol) ){ + int nScore = 1000; + int iLast = iStart - 1 + pApi->xPhraseSize(pFts, ip); + int j; + aSeen[ip] = 1; + + for(j=i+1; rc==SQLITE_OK && j<nInst; j++){ + int ic; int io; int iFinal; + rc = pApi->xInst(pFts, j, &ip, &ic, &io); + iFinal = io + pApi->xPhraseSize(pFts, ip) - 1; + if( rc==SQLITE_OK && ic==iSnippetCol && iLast<iStart+nToken ){ + nScore += aSeen[ip] ? 1000 : 1; + aSeen[ip] = 1; + if( iFinal>iLast ) iLast = iFinal; + } } - } - pIter->iLast = pMin->iPos; - pMin->mask |= 0x01; - fts5SnippetCalculateScore(pIter); - pApi->xPoslist(pFts, pMin - pIter->aPhrase, &pMin->i, &pMin->iPos); + if( rc==SQLITE_OK && nScore>nBestScore ){ + iBestCol = iSnippetCol; + iBestStart = iStart; + iBestLast = iLast; + nBestScore = nScore; + } + } } -} -static void fts5SnipIterFree(SnipIter *pIter){ - if( pIter ){ - sqlite3_free(pIter); + if( rc==SQLITE_OK ){ + rc = pApi->xColumnSize(pFts, iBestCol, &nColSize); } -} - -static int fts5SnippetText( - const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ - Fts5Context *pFts, /* First arg to pass to pApi functions */ - SnipIter *pIter, /* Snippet to write to buffer */ - int nToken, /* Size of desired snippet in tokens */ - const char *zStart, - const char *zFinal, - const char *zEllip, - Fts5Buffer *pBuf /* Write output to this buffer */ -){ - SnippetCtx ctx; - int i; - u64 all = 0; - const char *zCol; /* Column text to extract snippet from */ - int nCol; /* Size of column text in bytes */ - int rc; - int nShift; - - rc = pApi->xColumnText(pFts, FTS5_POS2COLUMN(pIter->iLast), &zCol, &nCol); - if( rc!=SQLITE_OK ) return rc; - - /* At this point pIter->iLast is the offset of the last token in the - ** proposed snippet. However, in all cases pIter->iLast contains the - ** final token of one of the phrases. This makes the snippet look - ** unbalanced. For example: - ** - ** "...x x x x x <b>term</b>..." - ** - ** It is better to increase iLast a little so that the snippet looks - ** more like: - ** - ** "...x x x <b>term</b> y y..." - ** - ** The problem is that there is no easy way to discover whether or not - ** how many tokens are present in the column following "term". - */ - - /* Set variable nShift to the number of tokens by which the snippet - ** should be shifted, assuming there are sufficient tokens to the right - ** of iLast in the column value. */ - for(i=0; i<pIter->nPhrase; i++){ - int iToken; - for(iToken=0; iToken<pIter->aPhrase[i].nToken; iToken++){ - all |= (pIter->aPhrase[i].mask << iToken); - } + if( rc==SQLITE_OK ){ + rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn); } - for(i=nToken-1; i>=0; i--){ - if( all & ((u64)1 << i) ) break; + if( rc==SQLITE_OK ){ + rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter); } - assert( i>=0 ); - nShift = (nToken - i) / 2; - memset(&ctx, 0, sizeof(SnippetCtx)); - ctx.nToken = nToken + nShift; - ctx.iFirst = FTS5_POS2OFFSET(pIter->iLast) - nToken + 1; - if( ctx.iFirst<0 ){ - nShift += ctx.iFirst; - if( nShift<0 ) nShift = 0; - ctx.iFirst = 0; + if( (iBestStart+nToken-1)>iBestLast ){ + iBestStart -= (iBestStart+nToken-1-iBestLast) / 2; } - ctx.aiStart = (int*)sqlite3_malloc(sizeof(int) * ctx.nToken * 2); - if( ctx.aiStart==0 ) return SQLITE_NOMEM; - ctx.aiEnd = &ctx.aiStart[ctx.nToken]; - - rc = pApi->xTokenize(pFts, zCol, nCol, (void*)&ctx, fts5SnippetCallback); - if( rc==SQLITE_OK ){ - int i1; /* First token from input to include */ - int i2; /* Last token from input to include */ - - int iPrint; - int iMatchto; - int iLast; - - int *aiStart = ctx.aiStart - ctx.iFirst; - int *aiEnd = ctx.aiEnd - ctx.iFirst; - - /* Ideally we want to start the snippet with token (ctx.iFirst + nShift). - ** However, this is only possible if there are sufficient tokens within - ** the column. This block sets variables i1 and i2 to the first and last - ** input tokens to include in the snippet. */ - if( (ctx.iFirst + nShift + nToken)<=ctx.iSeen ){ - i1 = ctx.iFirst + nShift; - i2 = i1 + nToken - 1; - }else{ - i2 = ctx.iSeen; - i1 = ctx.iSeen - nToken + 1; - assert( i1>=0 || ctx.iFirst==0 ); - if( i1<0 ) i1 = 0; - } - - /* If required, append the preceding ellipsis. */ - if( i1>0 ) sqlite3Fts5BufferAppendPrintf(&rc, pBuf, "%s", zEllip); - - iLast = FTS5_POS2OFFSET(pIter->iLast); - iPrint = i1; - iMatchto = -1; - - for(i=i1; i<=i2; i++){ - - /* Check if this is the first token of any phrase match. */ - int ip; - for(ip=0; ip<pIter->nPhrase; ip++){ - SnipPhrase *pPhrase = &pIter->aPhrase[ip]; - u64 m = (1 << (iLast - i - pPhrase->nToken + 1)); - - if( i<=iLast && (pPhrase->mask & m) ){ - if( iMatchto<0 ){ - sqlite3Fts5BufferAppendPrintf(&rc, pBuf, "%.*s%s", - aiStart[i] - aiStart[iPrint], - &zCol[aiStart[iPrint]], - zStart - ); - iPrint = i; - } - if( i>iMatchto ) iMatchto = i + pPhrase->nToken - 1; - } - } - - if( i==iMatchto ){ - sqlite3Fts5BufferAppendPrintf(&rc, pBuf, "%.*s%s", - aiEnd[i] - aiStart[iPrint], - &zCol[aiStart[iPrint]], - zFinal - ); - iMatchto = -1; - iPrint = i+1; - - if( i<i2 ){ - sqlite3Fts5BufferAppendPrintf(&rc, pBuf, "%.*s", - aiStart[i+1] - aiEnd[i], - &zCol[aiEnd[i]] - ); - } - } - } - - if( iPrint<=i2 ){ - sqlite3Fts5BufferAppendPrintf(&rc, pBuf, "%.*s", - aiEnd[i2] - aiStart[iPrint], - &zCol[aiStart[iPrint]] - ); - if( iMatchto>=0 ){ - sqlite3Fts5BufferAppendString(&rc, pBuf, zFinal); - } - } - - /* If required, append the trailing ellipsis. */ - if( i2<ctx.iSeen ) sqlite3Fts5BufferAppendString(&rc, pBuf, zEllip); + if( iBestStart+nToken>nColSize ){ + iBestStart = nColSize - nToken; } + if( iBestStart<0 ) iBestStart = 0; - sqlite3_free(ctx.aiStart); - return rc; -} - -/* -** A default snippet() implementation. This is compatible with the FTS3 -** snippet() function. -*/ -static void fts5SnippetFunction( - const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ - Fts5Context *pFts, /* First arg to pass to pApi functions */ - sqlite3_context *pCtx, /* Context for returning result/error */ - int nVal, /* Number of values in apVal[] array */ - sqlite3_value **apVal /* Array of trailing arguments */ -){ - const char *zStart = "<b>"; - const char *zFinal = "</b>"; - const char *zEllip = "<b>...</b>"; - int nToken = -15; - int nAbs; - int rc; - SnipIter *pIter = 0; + ctx.iRangeStart = iBestStart; + ctx.iRangeEnd = iBestStart + nToken - 1; - if( nVal>=1 ) zStart = (const char*)sqlite3_value_text(apVal[0]); - if( nVal>=2 ) zFinal = (const char*)sqlite3_value_text(apVal[1]); - if( nVal>=3 ) zEllip = (const char*)sqlite3_value_text(apVal[2]); - if( nVal>=4 ){ - nToken = sqlite3_value_int(apVal[3]); - if( nToken==0 ) nToken = -15; + if( iBestStart>0 ){ + fts5HighlightAppend(&rc, &ctx, zEllips, -1); } - nAbs = nToken * (nToken<0 ? -1 : 1); - - rc = fts5SnipIterNew(pApi, pFts, nAbs, &pIter); if( rc==SQLITE_OK ){ - Fts5Buffer buf; /* Result buffer */ - int nBestScore = 0; /* Score of best snippet found */ - - for(fts5SnipIterFirst(pIter); - pIter->iLast>=0; - fts5SnipIterNext(pIter) - ){ - if( pIter->nScore>nBestScore ) nBestScore = pIter->nScore; - } - for(fts5SnipIterFirst(pIter); - pIter->iLast>=0; - fts5SnipIterNext(pIter) - ){ - if( pIter->nScore==nBestScore ) break; - } - - memset(&buf, 0, sizeof(Fts5Buffer)); - rc = fts5SnippetText(pApi, pFts, pIter, nAbs, zStart, zFinal, zEllip, &buf); - if( rc==SQLITE_OK ){ - sqlite3_result_text(pCtx, (const char*)buf.p, buf.n, SQLITE_TRANSIENT); - } - sqlite3_free(buf.p); + rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx, fts5HighlightCb); + } + if( ctx.iRangeEnd>=(nColSize-1) ){ + fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); + }else{ + fts5HighlightAppend(&rc, &ctx, zEllips, -1); } - fts5SnipIterFree(pIter); - if( rc!=SQLITE_OK ){ + if( rc==SQLITE_OK ){ + sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT); + }else{ sqlite3_result_error_code(pCtx, rc); } + sqlite3_free(ctx.zOut); + sqlite3_free(aSeen); } +/************************************************************************/ + /* ** Context object passed by fts5GatherTotals() to xQueryPhrase callback diff --git a/ext/fts5/fts5_tcl.c b/ext/fts5/fts5_tcl.c index dd5ef6043..d9b3dd488 100644 --- a/ext/fts5/fts5_tcl.c +++ b/ext/fts5/fts5_tcl.c @@ -57,6 +57,19 @@ struct F5tApi { Fts5Context *pFts; }; +/* +** An object of this type is used with the xSetAuxdata() and xGetAuxdata() +** API test wrappers. The tcl interface allows a single tcl value to be +** saved using xSetAuxdata(). Instead of simply storing a pointer to the +** tcl object, the code in this file wraps it in an sqlite3_malloc'd +** instance of the following struct so that if the destructor is not +** correctly invoked it will be reported as an SQLite memory leak. +*/ +typedef struct F5tAuxData F5tAuxData; +struct F5tAuxData { + Tcl_Obj *pObj; +}; + static int xTokenizeCb( void *pCtx, const char *zToken, int nToken, @@ -108,8 +121,14 @@ static int xQueryPhraseCb( return rc; } +static void xSetAuxdataDestructor(void *p){ + F5tAuxData *pData = (F5tAuxData*)p; + Tcl_DecrRefCount(pData->pObj); + sqlite3_free(pData); +} + /* -** api sub-command... +** api sub-command... ** ** Description... */ @@ -136,6 +155,8 @@ static int xF5tApi( { "xColumnText", 1, "COL" }, { "xColumnSize", 1, "COL" }, { "xQueryPhrase", 2, "PHRASE SCRIPT" }, + { "xSetAuxdata", 1, "VALUE" }, + { "xGetAuxdata", 1, "CLEAR" }, { 0, 0, 0} }; @@ -284,6 +305,34 @@ static int xF5tApi( } break; } + CASE(12, "xSetAuxdata") { + F5tAuxData *pData = (F5tAuxData*)sqlite3_malloc(sizeof(F5tAuxData)); + if( pData==0 ){ + Tcl_AppendResult(interp, "out of memory", 0); + return TCL_ERROR; + } + pData->pObj = objv[2]; + Tcl_IncrRefCount(pData->pObj); + rc = p->pApi->xSetAuxdata(p->pFts, pData, xSetAuxdataDestructor); + break; + } + CASE(13, "xGetAuxdata") { + F5tAuxData *pData; + int bClear; + if( Tcl_GetBooleanFromObj(interp, objv[2], &bClear) ){ + return TCL_ERROR; + } + pData = (F5tAuxData*)p->pApi->xGetAuxdata(p->pFts, bClear); + if( pData==0 ){ + Tcl_ResetResult(interp); + }else{ + Tcl_SetObjResult(interp, pData->pObj); + if( bClear ){ + xSetAuxdataDestructor((void*)pData); + } + } + break; + } default: assert( 0 ); diff --git a/ext/fts5/fts5auxdata.test b/ext/fts5/fts5auxdata.test new file mode 100644 index 000000000..158e393c3 --- /dev/null +++ b/ext/fts5/fts5auxdata.test @@ -0,0 +1,112 @@ +# 2014 Dec 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focusing on the fts5 xSetAuxdata() and xGetAuxdata() APIs. +# + +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. test] +} +source $testdir/tester.tcl +set testprefix fts5auxdata + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE f1 USING fts5(a, b); + INSERT INTO f1(rowid, a, b) VALUES(1, 'a', 'b1'); + INSERT INTO f1(rowid, a, b) VALUES(2, 'a', 'b2'); + INSERT INTO f1(rowid, a, b) VALUES(3, 'a', 'b3'); + INSERT INTO f1(rowid, a, b) VALUES(4, 'a', 'b4'); + INSERT INTO f1(rowid, a, b) VALUES(5, 'a', 'b5'); +} + +proc aux_function_1 {cmd tn} { + switch [$cmd xRowid] { + 1 { + do_test $tn.1 [list $cmd xGetAuxdata 0 ] {} + $cmd xSetAuxdata "one" + } + + 2 { + do_test $tn.2 [list $cmd xGetAuxdata 0 ] {one} + $cmd xSetAuxdata "two" + } + + 3 { + do_test $tn.3 [list $cmd xGetAuxdata 0 ] {two} + } + + 4 { + do_test $tn.4 [list $cmd xGetAuxdata 1 ] {two} + } + + 5 { + do_test $tn.5 [list $cmd xGetAuxdata 0 ] {} + } + } +} + +sqlite3_fts5_create_function db aux_function_1 aux_function_1 +db eval { + SELECT aux_function_1(f1, 1) FROM f1 WHERE f1 MATCH 'a' + ORDER BY rowid ASC +} + +proc aux_function_2 {cmd tn inst} { + if {$inst == "A"} { + switch [$cmd xRowid] { + 1 { + do_test $tn.1.$inst [list $cmd xGetAuxdata 0 ] {} + $cmd xSetAuxdata "one $inst" + } + 2 { + do_test $tn.2.$inst [list $cmd xGetAuxdata 0 ] "one $inst" + $cmd xSetAuxdata "two $inst" + } + 3 { + do_test $tn.3.$inst [list $cmd xGetAuxdata 0 ] "two $inst" + } + 4 { + do_test $tn.4.$inst [list $cmd xGetAuxdata 1 ] "two $inst" + } + 5 { + do_test $tn.5.$inst [list $cmd xGetAuxdata 0 ] {} + } + } + } else { + switch [$cmd xRowid] { + 1 { + do_test $tn.1.$inst [list $cmd xGetAuxdata 0 ] "one A" + } + 2 { + do_test $tn.2.$inst [list $cmd xGetAuxdata 0 ] "two A" + } + 3 { + do_test $tn.3.$inst [list $cmd xGetAuxdata 0 ] "two A" + } + 4 { + do_test $tn.4.$inst [list $cmd xGetAuxdata 0 ] {} + } + 5 { + do_test $tn.5.$inst [list $cmd xGetAuxdata 0 ] {} + } + } + } +} + +sqlite3_fts5_create_function db aux_function_2 aux_function_2 +db eval { + SELECT aux_function_2(f1, 2, 'A'), aux_function_2(f1, 2, 'B') + FROM f1 WHERE f1 MATCH 'a' + ORDER BY rowid ASC +} + +finish_test + @@ -1,5 +1,5 @@ -C Remove\sthe\sfts5_test()\saux\sfunction.\sTest\saux\sfunctions\susing\sthe\stcl\sinterface\sinstead. -D 2014-12-19T20:53:51.092 +C Fixes\sand\ssimplifications\sfor\sthe\ssnippet()\sand\shighlight()\sfunctions. +D 2014-12-22T21:01:52.167 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,18 +104,19 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786 -F ext/fts5/fts5.c d1c1722eb661da3e8e3a19909958b97beff7d243 -F ext/fts5/fts5.h 72fc1e9995b1ddc254a487b9528614a83bd3dfb6 +F ext/fts5/fts5.c 8e5af98a1e370a39c8a91ed77f21ad171e5b214c +F ext/fts5/fts5.h 0a0e97c65ba3b3e82638d7f7742c5d96f2b61535 F ext/fts5/fts5Int.h 36054b1dfc4881a9b94f945b348ab6cc01c0c7a5 -F ext/fts5/fts5_aux.c b8e5660a05b86dab059c9989835b5df0ac5e3c55 +F ext/fts5/fts5_aux.c 6200a3f6d17c491e6c87189eaef7649ee7fe564d F ext/fts5/fts5_buffer.c 1bc5c762bb2e9b4a40b2e8a820a31b809e72eec1 F ext/fts5/fts5_config.c 5caeb4e77680d635be25b899f97a29cf26fb45ce F ext/fts5/fts5_expr.c 27d3d2deebae277c34ae2bb3d501dd879c442ba5 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 F ext/fts5/fts5_index.c 4a8e8535b4303400ddb5f6fb08152da0d88ebf6f F ext/fts5/fts5_storage.c bfeedb83b095a1018f4f531c3cc3f9099e9f9081 -F ext/fts5/fts5_tcl.c d56484fd5cc3b02d268ee11fa4918e98ce3b1d03 +F ext/fts5/fts5_tcl.c 4392e74421d24cc37c370732e8b48217cd2c1777 F ext/fts5/fts5_tokenize.c 8360c0d1ae0d4696f3cc13f7c67a2db6011cdc5b +F ext/fts5/fts5auxdata.test 3844d0f098441cedf75b9cc96d5e6e94d1a3bef4 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb @@ -605,12 +606,12 @@ F test/fts5ab.test 52f6b9223372ff70b0edb5a3054fbd7bc7fcfefc F test/fts5ac.test 021e175b809d2baa23792807caae5dfc6bc706f4 F test/fts5ad.test ff518db6b0d7750b51ee6531ffebf82e57094bfd F test/fts5ae.test 0877873a2b9df6b3a2d832ed5ea928f838d19faf -F test/fts5af.test d24e3b0f879998ef5f60087272f8ab7b3a8fd4dc +F test/fts5af.test 355d2048bd9ddc2f8f4e80a4cb1e70c6204422a0 F test/fts5ag.test 8b2bb67cf2a3245eaad5e49ab8daa6be6e64332b F test/fts5ah.test 788e923e60b5e7a559f672cfbf262b8b260ea176 F test/fts5ai.test aa2b5fd0f8d2cf59ac0211111e63cbca3b40ed7d F test/fts5aj.test bc3d91bd012c7ca175cdf266c2074920bb5fa5ba -F test/fts5ak.test e55bb0f3fac1291d32bc9485a3ee55a7d76f4d5f +F test/fts5ak.test 26187e57ba56a9e10e6da894a038b07588e7249d F test/fts5al.test 61b067f3b0b61679ab164a8a855882dfd313988d F test/fts5ea.test afaf3497b43add578384dc1fd26b0342738abe87 F test/fts5fault1.test ba59b6f0897a4fe510c446b98968ec1e8800a56b @@ -1209,7 +1210,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 38b3c65e3ee95eb7afadb76e0110570fbbc41e1b -R a4b205e397ac161e65b87e55d29e5aba +P 67e3ffd950c5347d219a06b33ad51949cffa7d90 +R eaa7ec352adc789c928b49341506e13d U dan -Z b9d8ce93a014115b155c701f123b4810 +Z 1c7bcf3d91cb30ef107cecfef87d0af9 diff --git a/manifest.uuid b/manifest.uuid index 365622895..4e7afab00 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -67e3ffd950c5347d219a06b33ad51949cffa7d90
\ No newline at end of file +ca5d44042aa7461dcc8b700b0763df4df9d4a891
\ No newline at end of file diff --git a/test/fts5af.test b/test/fts5af.test index da70dc7ae..ca56c0ec1 100644 --- a/test/fts5af.test +++ b/test/fts5af.test @@ -38,19 +38,19 @@ proc do_snippet_test {tn doc match res} { do_execsql_test $tn.1 { DELETE FROM t1; INSERT INTO t1 VALUES($v1, NULL); - SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2; + SELECT snippet(t1, -1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2; } [list $res] do_execsql_test $tn.2 { DELETE FROM t1; INSERT INTO t1 VALUES(NULL, $v1); - SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2; + SELECT snippet(t1, -1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2; } [list $res] do_execsql_test $tn.3 { DELETE FROM t1; INSERT INTO t1 VALUES($v1, NULL); - SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2 + SELECT snippet(t1, -1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2 ORDER BY rank DESC; } [list $res] diff --git a/test/fts5ak.test b/test/fts5ak.test index 29d19bc4b..53977ab70 100644 --- a/test/fts5ak.test +++ b/test/fts5ak.test @@ -16,7 +16,7 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl -set testprefix fts5aj +set testprefix fts5ak # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { @@ -118,6 +118,27 @@ do_execsql_test 2.6.2 { {a b c [d] e [f] g h i j} } +#------------------------------------------------------------------------- +# The example from the docs. +# +do_execsql_test 3.1 { + -- Assuming this: + CREATE VIRTUAL TABLE ft USING fts5(a); + INSERT INTO ft VALUES('a b c x c d e'); + INSERT INTO ft VALUES('a b c c d e'); + INSERT INTO ft VALUES('a b c d e'); + + -- The following SELECT statement returns these three rows: + -- '[a b c] x [c d e]' + -- '[a b c] [c d e]' + -- '[a b c d e]' + SELECT highlight(ft, 0, '[', ']') FROM ft WHERE ft MATCH 'a+b+c AND c+d+e'; +} { + {[a b c d e]} + {[a b c] [c d e]} + {[a b c] x [c d e]} +} + finish_test |