diff options
Diffstat (limited to 'ext/fts5/fts5_expr.c')
-rw-r--r-- | ext/fts5/fts5_expr.c | 1701 |
1 files changed, 1701 insertions, 0 deletions
diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c new file mode 100644 index 000000000..878b54f53 --- /dev/null +++ b/ext/fts5/fts5_expr.c @@ -0,0 +1,1701 @@ +/* +** 2014 May 31 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +*/ + +#ifdef SQLITE_ENABLE_FTS5 + + +#include "fts5Int.h" +#include "fts5parse.h" + +/* +** All token types in the generated fts5parse.h file are greater than 0. +*/ +#define FTS5_EOF 0 + +typedef struct Fts5ExprTerm Fts5ExprTerm; + +/* +** Functions generated by lemon from fts5parse.y. +*/ +void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(u64)); +void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*)); +void sqlite3Fts5Parser(void*, int, Fts5Token, Fts5Parse*); + +struct Fts5Expr { + Fts5Index *pIndex; + Fts5ExprNode *pRoot; + int bDesc; /* Iterate in descending docid order */ + int nPhrase; /* Number of phrases in expression */ + Fts5ExprPhrase **apExprPhrase; /* Pointers to phrase objects */ +}; + +/* +** eType: +** Expression node type. Always one of: +** +** FTS5_AND (pLeft, pRight valid) +** FTS5_OR (pLeft, pRight valid) +** FTS5_NOT (pLeft, pRight valid) +** FTS5_STRING (pNear valid) +*/ +struct Fts5ExprNode { + int eType; /* Node type */ + Fts5ExprNode *pLeft; /* Left hand child node */ + Fts5ExprNode *pRight; /* Right hand child node */ + Fts5ExprNearset *pNear; /* For FTS5_STRING - cluster of phrases */ + int bEof; /* True at EOF */ + i64 iRowid; /* Current rowid */ +}; + +/* +** An instance of the following structure represents a single search term +** or term prefix. +*/ +struct Fts5ExprTerm { + int bPrefix; /* True for a prefix term */ + char *zTerm; /* nul-terminated term */ + Fts5IndexIter *pIter; /* Iterator for this term */ +}; + +/* +** A phrase. One or more terms that must appear in a contiguous sequence +** within a document for it to match. +*/ +struct Fts5ExprPhrase { + Fts5ExprNode *pNode; /* FTS5_STRING node this phrase is part of */ + Fts5Buffer poslist; /* Current position list */ + int nTerm; /* Number of entries in aTerm[] */ + Fts5ExprTerm aTerm[0]; /* Terms that make up this phrase */ +}; + +/* +** One or more phrases that must appear within a certain token distance of +** each other within each matching document. +*/ +struct Fts5ExprNearset { + int nNear; /* NEAR parameter */ + int iCol; /* Column to search (-1 -> all columns) */ + int nPhrase; /* Number of entries in aPhrase[] array */ + Fts5ExprPhrase *apPhrase[0]; /* Array of phrase pointers */ +}; + + +/* +** Parse context. +*/ +struct Fts5Parse { + Fts5Config *pConfig; + char *zErr; + int rc; + int nPhrase; /* Size of apPhrase array */ + Fts5ExprPhrase **apPhrase; /* Array of all phrases */ + Fts5ExprNode *pExpr; /* Result of a successful parse */ +}; + +void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){ + if( pParse->rc==SQLITE_OK ){ + va_list ap; + va_start(ap, zFmt); + pParse->zErr = sqlite3_vmprintf(zFmt, ap); + va_end(ap); + pParse->rc = SQLITE_ERROR; + } +} + +static int fts5ExprIsspace(char t){ + return t==' ' || t=='\t' || t=='\n' || t=='\r'; +} + +static int fts5ExprIstoken(char t){ + return fts5ExprIsspace(t)==0 && t!='\0' + && t!=':' && t!='(' && t!=')' + && t!=',' && t!='+' && t!='*'; +} + +/* +** Read the first token from the nul-terminated string at *pz. +*/ +static int fts5ExprGetToken( + Fts5Parse *pParse, + const char **pz, /* IN/OUT: Pointer into buffer */ + Fts5Token *pToken +){ + const char *z = *pz; + int tok; + + /* Skip past any whitespace */ + while( fts5ExprIsspace(*z) ) z++; + + pToken->p = z; + pToken->n = 1; + switch( *z ){ + case '(': tok = FTS5_LP; break; + case ')': tok = FTS5_RP; break; + case ':': tok = FTS5_COLON; break; + case ',': tok = FTS5_COMMA; break; + case '+': tok = FTS5_PLUS; break; + case '*': tok = FTS5_STAR; break; + case '\0': tok = FTS5_EOF; break; + + case '"': { + const char *z2; + tok = FTS5_STRING; + + for(z2=&z[1]; 1; z2++){ + if( z2[0]=='"' ){ + z2++; + if( z2[0]!='"' ) break; + } + if( z2[0]=='\0' ){ + sqlite3Fts5ParseError(pParse, "unterminated string"); + return FTS5_EOF; + } + } + pToken->n = (z2 - z); + break; + } + + default: { + const char *z2; + tok = FTS5_STRING; + for(z2=&z[1]; fts5ExprIstoken(*z2); z2++); + pToken->n = (z2 - z); + if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 ) tok = FTS5_OR; + if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT; + if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND; + break; + } + } + + *pz = &pToken->p[pToken->n]; + return tok; +} + +static void *fts5ParseAlloc(u64 t){ return sqlite3_malloc((int)t); } +static void fts5ParseFree(void *p){ sqlite3_free(p); } + +int sqlite3Fts5ExprNew( + Fts5Config *pConfig, /* FTS5 Configuration */ + const char *zExpr, /* Expression text */ + Fts5Expr **ppNew, + char **pzErr +){ + Fts5Parse sParse; + Fts5Token token; + const char *z = zExpr; + int t; /* Next token type */ + void *pEngine; + Fts5Expr *pNew; + + *ppNew = 0; + *pzErr = 0; + memset(&sParse, 0, sizeof(sParse)); + pEngine = sqlite3Fts5ParserAlloc(fts5ParseAlloc); + if( pEngine==0 ){ return SQLITE_NOMEM; } + sParse.pConfig = pConfig; + + do { + t = fts5ExprGetToken(&sParse, &z, &token); + sqlite3Fts5Parser(pEngine, t, token, &sParse); + }while( sParse.rc==SQLITE_OK && t!=FTS5_EOF ); + sqlite3Fts5ParserFree(pEngine, fts5ParseFree); + + assert( sParse.rc!=SQLITE_OK || sParse.zErr==0 ); + if( sParse.rc==SQLITE_OK ){ + *ppNew = pNew = sqlite3_malloc(sizeof(Fts5Expr)); + if( pNew==0 ){ + sParse.rc = SQLITE_NOMEM; + sqlite3Fts5ParseNodeFree(sParse.pExpr); + }else{ + pNew->pRoot = sParse.pExpr; + pNew->pIndex = 0; + pNew->apExprPhrase = sParse.apPhrase; + pNew->nPhrase = sParse.nPhrase; + sParse.apPhrase = 0; + } + } + + sqlite3_free(sParse.apPhrase); + *pzErr = sParse.zErr; + return sParse.rc; +} + +static char *fts5ExprStrdup(int *pRc, const char *zIn){ + char *zRet = 0; + if( *pRc==SQLITE_OK ){ + int nByte = strlen(zIn) + 1; + zRet = sqlite3_malloc(nByte); + if( zRet ){ + memcpy(zRet, zIn, nByte); + }else{ + *pRc = SQLITE_NOMEM; + } + } + return zRet; +} + +static void *fts5ExprMalloc(int *pRc, int nByte){ + void *pRet = 0; + if( *pRc==SQLITE_OK ){ + pRet = sqlite3_malloc(nByte); + if( pRet ){ + memset(pRet, 0, nByte); + }else{ + *pRc = SQLITE_NOMEM; + } + } + return pRet; +} + +/* +** Create a new FTS5 expression by cloning phrase iPhrase of the +** expression passed as the second argument. +*/ +int sqlite3Fts5ExprPhraseExpr( + Fts5Config *pConfig, + Fts5Expr *pExpr, + int iPhrase, + Fts5Expr **ppNew +){ + int rc = SQLITE_OK; /* Return code */ + Fts5ExprPhrase *pOrig = 0; /* The phrase extracted from pExpr */ + int i; /* Used to iterate through phrase terms */ + + /* Components of the new expression object */ + Fts5Expr *pNew; + Fts5ExprPhrase **apPhrase; + Fts5ExprNode *pNode; + Fts5ExprNearset *pNear; + Fts5ExprPhrase *pCopy; + + pOrig = pExpr->apExprPhrase[iPhrase]; + pNew = (Fts5Expr*)fts5ExprMalloc(&rc, sizeof(Fts5Expr)); + apPhrase = (Fts5ExprPhrase**)fts5ExprMalloc(&rc, sizeof(Fts5ExprPhrase*)); + pNode = (Fts5ExprNode*)fts5ExprMalloc(&rc, sizeof(Fts5ExprNode)); + pNear = (Fts5ExprNearset*)fts5ExprMalloc(&rc, + sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*) + ); + pCopy = (Fts5ExprPhrase*)fts5ExprMalloc(&rc, + sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * pOrig->nTerm + ); + + for(i=0; rc==SQLITE_OK && i<pOrig->nTerm; i++){ + pCopy->aTerm[i].zTerm = fts5ExprStrdup(&rc, pOrig->aTerm[i].zTerm); + pCopy->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix; + } + + if( rc==SQLITE_OK ){ + /* All the allocations succeeded. Put the expression object together. */ + pNew->pIndex = pExpr->pIndex; + pNew->pRoot = pNode; + pNew->nPhrase = 1; + pNew->apExprPhrase = apPhrase; + pNew->apExprPhrase[0] = pCopy; + + pNode->eType = FTS5_STRING; + pNode->pNear = pNear; + + pNear->iCol = -1; + pNear->nPhrase = 1; + pNear->apPhrase[0] = pCopy; + + pCopy->nTerm = pOrig->nTerm; + pCopy->pNode = pNode; + }else{ + /* At least one allocation failed. Free them all. */ + if( pCopy ){ + for(i=0; i<pOrig->nTerm; i++){ + sqlite3_free(pCopy->aTerm[i].zTerm); + } + sqlite3_free(pCopy); + sqlite3_free(pNear); + sqlite3_free(pNode); + sqlite3_free(apPhrase); + sqlite3_free(pNew); + pNew = 0; + } + } + + *ppNew = pNew; + return rc; +} + +/* +** Free the expression node object passed as the only argument. +*/ +void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){ + if( p ){ + sqlite3Fts5ParseNodeFree(p->pLeft); + sqlite3Fts5ParseNodeFree(p->pRight); + sqlite3Fts5ParseNearsetFree(p->pNear); + sqlite3_free(p); + } +} + +/* +** Free the expression object passed as the only argument. +*/ +void sqlite3Fts5ExprFree(Fts5Expr *p){ + if( p ){ + sqlite3Fts5ParseNodeFree(p->pRoot); + sqlite3_free(p->apExprPhrase); + sqlite3_free(p); + } +} + +/* +** All individual term iterators in pPhrase are guaranteed to be valid and +** pointing to the same rowid when this function is called. This function +** checks if the current rowid really is a match, and if so populates +** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch +** is set to true if this is really a match, or false otherwise. +** +** SQLITE_OK is returned if an error occurs, or an SQLite error code +** otherwise. It is not considered an error code if the current rowid is +** not a match. +*/ +static int fts5ExprPhraseIsMatch( + Fts5Expr *pExpr, /* Expression pPhrase belongs to */ + int iCol, /* If >=0, search for matches in iCol only */ + Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ + int *pbMatch /* OUT: Set to true if really a match */ +){ + Fts5PoslistWriter writer = {0}; + Fts5PoslistReader aStatic[4]; + Fts5PoslistReader *aIter = aStatic; + int i; + int rc = SQLITE_OK; + + fts5BufferZero(&pPhrase->poslist); + + /* If the aStatic[] array is not large enough, allocate a large array + ** using sqlite3_malloc(). This approach could be improved upon. */ + if( pPhrase->nTerm>(sizeof(aStatic) / sizeof(aStatic[0])) ){ + int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm; + aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte); + if( !aIter ) return SQLITE_NOMEM; + } + + /* Initialize a term iterator for each term in the phrase */ + for(i=0; i<pPhrase->nTerm; i++){ + int n; + const u8 *a; + rc = sqlite3Fts5IterPoslist(pPhrase->aTerm[i].pIter, &a, &n); + if( rc || sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]) ){ + goto ismatch_out; + } + } + + while( 1 ){ + int bMatch; + i64 iPos = aIter[0].iPos; + do { + bMatch = 1; + for(i=0; i<pPhrase->nTerm; i++){ + Fts5PoslistReader *pPos = &aIter[i]; + i64 iAdj = iPos + i; + if( pPos->iPos!=iAdj ){ + bMatch = 0; + while( pPos->iPos<iAdj ){ + if( sqlite3Fts5PoslistReaderNext(pPos) ) goto ismatch_out; + } + if( pPos->iPos>iAdj ) iPos = pPos->iPos-i; + } + } + }while( bMatch==0 ); + + /* Append position iPos to the output */ + rc = sqlite3Fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos); + if( rc!=SQLITE_OK ) goto ismatch_out; + + for(i=0; i<pPhrase->nTerm; i++){ + if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out; + } + } + + ismatch_out: + *pbMatch = (pPhrase->poslist.n>0); + if( aIter!=aStatic ) sqlite3_free(aIter); + return rc; +} + +typedef struct Fts5LookaheadReader Fts5LookaheadReader; +struct Fts5LookaheadReader { + const u8 *a; /* Buffer containing position list */ + int n; /* Size of buffer a[] in bytes */ + int i; /* Current offset in position list */ + i64 iPos; /* Current position */ + i64 iLookahead; /* Next position */ +}; + +#define FTS5_LOOKAHEAD_EOF (((i64)1) << 62) + +static int fts5LookaheadReaderNext(Fts5LookaheadReader *p){ + p->iPos = p->iLookahead; + if( sqlite3Fts5PoslistNext64(p->a, p->n, &p->i, &p->iLookahead) ){ + p->iLookahead = FTS5_LOOKAHEAD_EOF; + } + return (p->iPos==FTS5_LOOKAHEAD_EOF); +} + +static int fts5LookaheadReaderInit( + const u8 *a, int n, /* Buffer to read position list from */ + Fts5LookaheadReader *p /* Iterator object to initialize */ +){ + memset(p, 0, sizeof(Fts5LookaheadReader)); + p->a = a; + p->n = n; + fts5LookaheadReaderNext(p); + return fts5LookaheadReaderNext(p); +} + +#if 0 +static int fts5LookaheadReaderEof(Fts5LookaheadReader *p){ + return (p->iPos==FTS5_LOOKAHEAD_EOF); +} +#endif + +typedef struct Fts5NearTrimmer Fts5NearTrimmer; +struct Fts5NearTrimmer { + Fts5LookaheadReader reader; /* Input iterator */ + Fts5PoslistWriter writer; /* Writer context */ + Fts5Buffer *pOut; /* Output poslist */ +}; + +/* +** The near-set object passed as the first argument contains more than +** one phrase. All phrases currently point to the same row. The +** Fts5ExprPhrase.poslist buffers are populated accordingly. This function +** tests if the current row contains instances of each phrase sufficiently +** close together to meet the NEAR constraint. Output variable *pbMatch +** is set to true if it does, or false otherwise. +** +** If no error occurs, SQLITE_OK is returned. Or, if an error does occur, +** an SQLite error code. If a value other than SQLITE_OK is returned, the +** final value of *pbMatch is undefined. +** +** TODO: This function should also edit the position lists associated +** with each phrase to remove any phrase instances that are not part of +** a set of intances that collectively matches the NEAR constraint. +*/ +static int fts5ExprNearIsMatch(Fts5ExprNearset *pNear, int *pbMatch){ + Fts5NearTrimmer aStatic[4]; + Fts5NearTrimmer *a = aStatic; + + Fts5ExprPhrase **apPhrase = pNear->apPhrase; + + int i; + int rc = SQLITE_OK; + int bMatch; + + assert( pNear->nPhrase>1 ); + + /* If the aStatic[] array is not large enough, allocate a large array + ** using sqlite3_malloc(). This approach could be improved upon. */ + if( pNear->nPhrase>(sizeof(aStatic) / sizeof(aStatic[0])) ){ + int nByte = sizeof(Fts5LookaheadReader) * pNear->nPhrase; + a = (Fts5NearTrimmer*)sqlite3_malloc(nByte); + if( !a ) return SQLITE_NOMEM; + memset(a, 0, nByte); + }else{ + memset(aStatic, 0, sizeof(aStatic)); + } + + /* Initialize a lookahead iterator for each phrase. After passing the + ** buffer and buffer size to the lookaside-reader init function, zero + ** the phrase poslist buffer. The new poslist for the phrase (containing + ** the same entries as the original with some entries removed on account + ** of the NEAR constraint) is written over the original even as it is + ** being read. This is safe as the entries for the new poslist are a + ** subset of the old, so it is not possible for data yet to be read to + ** be overwritten. */ + for(i=0; i<pNear->nPhrase; i++){ + Fts5Buffer *pPoslist = &apPhrase[i]->poslist; + fts5LookaheadReaderInit(pPoslist->p, pPoslist->n, &a[i].reader); + pPoslist->n = 0; + a[i].pOut = pPoslist; + } + + while( 1 ){ + int iAdv; + i64 iMin; + i64 iMax; + + /* This block advances the phrase iterators until they point to a set of + ** entries that together comprise a match. */ + iMax = a[0].reader.iPos; + do { + bMatch = 1; + for(i=0; i<pNear->nPhrase; i++){ + Fts5LookaheadReader *pPos = &a[i].reader; + iMin = iMax - pNear->apPhrase[i]->nTerm - pNear->nNear; + if( pPos->iPos<iMin || pPos->iPos>iMax ){ + bMatch = 0; + while( pPos->iPos<iMin ){ + if( fts5LookaheadReaderNext(pPos) ) goto ismatch_out; + } + if( pPos->iPos>iMax ) iMax = pPos->iPos; + } + } + }while( bMatch==0 ); + + /* Add an entry to each output position list */ + for(i=0; i<pNear->nPhrase; i++){ + i64 iPos = a[i].reader.iPos; + Fts5PoslistWriter *pWriter = &a[i].writer; + if( a[i].pOut->n==0 || iPos!=pWriter->iPrev ){ + sqlite3Fts5PoslistWriterAppend(a[i].pOut, pWriter, iPos); + } + } + + iAdv = 0; + iMin = a[0].reader.iLookahead; + for(i=0; i<pNear->nPhrase; i++){ + if( a[i].reader.iLookahead < iMin ){ + iMin = a[i].reader.iLookahead; + iAdv = i; + } + } + if( fts5LookaheadReaderNext(&a[iAdv].reader) ) goto ismatch_out; + } + + ismatch_out: + *pbMatch = (a[0].pOut->n>0); + if( a!=aStatic ) sqlite3_free(a); + return rc; +} + +/* +** Advance each term iterator in each phrase in pNear. If any reach EOF, +** set output variable *pbEof to true before returning. +*/ +static int fts5ExprNearAdvanceAll( + Fts5Expr *pExpr, /* Expression pPhrase belongs to */ + Fts5ExprNearset *pNear, /* Near object to advance iterators of */ + int *pbEof /* OUT: Set to true if phrase at EOF */ +){ + int i, j; /* Phrase and token index, respectively */ + + for(i=0; i<pNear->nPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + for(j=0; j<pPhrase->nTerm; j++){ + Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; + int rc = sqlite3Fts5IterNext(pIter); + if( rc || sqlite3Fts5IterEof(pIter) ){ + *pbEof = 1; + return rc; + } + } + } + + return SQLITE_OK; +} + +/* +** Advance iterator pIter until it points to a value equal to or laster +** than the initial value of *piLast. If this means the iterator points +** to a value laster than *piLast, update *piLast to the new lastest value. +** +** If the iterator reaches EOF, set *pbEof to true before returning. If +** an error occurs, set *pRc to an error code. If either *pbEof or *pRc +** are set, return a non-zero value. Otherwise, return zero. +*/ +static int fts5ExprAdvanceto( + Fts5IndexIter *pIter, /* Iterator to advance */ + int bDesc, /* True if iterator is "rowid DESC" */ + i64 *piLast, /* IN/OUT: Lastest rowid seen so far */ + int *pRc, /* OUT: Error code */ + int *pbEof /* OUT: Set to true if EOF */ +){ + i64 iLast = *piLast; + i64 iRowid; + + iRowid = sqlite3Fts5IterRowid(pIter); + if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){ + sqlite3Fts5IterNextFrom(pIter, iLast); + if( sqlite3Fts5IterEof(pIter) ){ + *pbEof = 1; + return 1; + } + iRowid = sqlite3Fts5IterRowid(pIter); + assert( (bDesc==0 && iRowid>=iLast) || (bDesc==1 && iRowid<=iLast) ); + } + *piLast = iRowid; + + return 0; +} + +/* +** All individual term iterators in pNear are guaranteed to be valid when +** this function is called. This function checks if all term iterators +** point to the same rowid, and if not, advances them until they do. +** If an EOF is reached before this happens, *pbEof is set to true before +** returning. +** +** SQLITE_OK is returned if an error occurs, or an SQLite error code +** otherwise. It is not considered an error code if an iterator reaches +** EOF. +*/ +static int fts5ExprNearNextRowidMatch( + Fts5Expr *pExpr, /* Expression pPhrase belongs to */ + Fts5ExprNode *pNode, + int bFromValid, + i64 iFrom +){ + Fts5ExprNearset *pNear = pNode->pNear; + int rc = SQLITE_OK; + int i, j; /* Phrase and token index, respectively */ + i64 iLast; /* Lastest rowid any iterator points to */ + int bMatch; /* True if all terms are at the same rowid */ + + /* Initialize iLast, the "lastest" rowid any iterator points to. If the + ** iterator skips through rowids in the default ascending order, this means + ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it + ** means the minimum rowid. */ + iLast = sqlite3Fts5IterRowid(pNear->apPhrase[0]->aTerm[0].pIter); + if( bFromValid && (iFrom>iLast)==(pExpr->bDesc==0) ){ + assert( pExpr->bDesc || iFrom>=iLast ); + iLast = iFrom; + } + + do { + bMatch = 1; + for(i=0; i<pNear->nPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + for(j=0; j<pPhrase->nTerm; j++){ + Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; + i64 iRowid = sqlite3Fts5IterRowid(pIter); + if( iRowid!=iLast ) bMatch = 0; + if( fts5ExprAdvanceto(pIter, pExpr->bDesc, &iLast, &rc, &pNode->bEof) ){ + return rc; + } + } + } + }while( bMatch==0 ); + + pNode->iRowid = iLast; + return rc; +} + +/* +** Argument pNode points to a NEAR node. All individual term iterators +** point to valid entries (not EOF). +* +** This function tests if the term iterators currently all point to the +** same rowid, and if so, if the row matches the phrase and NEAR constraints. +** If so, the pPhrase->poslist buffers are populated and the pNode->iRowid +** variable set before returning. Or, if the current combination of +** iterators is not a match, they are advanced until they are. If one of +** the iterators reaches EOF before a match is found, *pbEof is set to +** true before returning. The final values of the pPhrase->poslist and +** iRowid fields are undefined in this case. +** +** SQLITE_OK is returned if an error occurs, or an SQLite error code +** otherwise. It is not considered an error code if an iterator reaches +** EOF. +*/ +static int fts5ExprNearNextMatch( + Fts5Expr *pExpr, /* Expression that pNear is a part of */ + Fts5ExprNode *pNode, /* The "NEAR" node (FTS5_STRING) */ + int bFromValid, + i64 iFrom +){ + int rc = SQLITE_OK; + Fts5ExprNearset *pNear = pNode->pNear; + while( 1 ){ + int i; + + /* Advance the iterators until they all point to the same rowid */ + rc = fts5ExprNearNextRowidMatch(pExpr, pNode, bFromValid, iFrom); + if( pNode->bEof || rc!=SQLITE_OK ) break; + + /* Check that each phrase in the nearset matches the current row. + ** Populate the pPhrase->poslist buffers at the same time. If any + ** phrase is not a match, break out of the loop early. */ + for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + if( pPhrase->nTerm>1 || pNear->iCol>=0 ){ + int bMatch = 0; + rc = fts5ExprPhraseIsMatch(pExpr, pNear->iCol, pPhrase, &bMatch); + if( bMatch==0 ) break; + }else{ + int n; + const u8 *a; + rc = sqlite3Fts5IterPoslist(pPhrase->aTerm[0].pIter, &a, &n); + fts5BufferSet(&rc, &pPhrase->poslist, n, a); + } + } + + if( rc==SQLITE_OK && i==pNear->nPhrase ){ + int bMatch = 1; + if( pNear->nPhrase>1 ){ + rc = fts5ExprNearIsMatch(pNear, &bMatch); + } + if( rc!=SQLITE_OK || bMatch ) break; + } + + /* If control flows to here, then the current rowid is not a match. + ** Advance all term iterators in all phrases to the next rowid. */ + if( rc==SQLITE_OK ){ + rc = fts5ExprNearAdvanceAll(pExpr, pNear, &pNode->bEof); + } + if( pNode->bEof || rc!=SQLITE_OK ) break; + } + + return rc; +} + +/* +** Initialize all term iterators in the pNear object. If any term is found +** to match no documents at all, set *pbEof to true and return immediately, +** without initializing any further iterators. +*/ +static int fts5ExprNearInitAll( + Fts5Expr *pExpr, + Fts5ExprNode *pNode +){ + Fts5ExprNearset *pNear = pNode->pNear; + Fts5ExprTerm *pTerm; + Fts5ExprPhrase *pPhrase; + int i, j; + int rc = SQLITE_OK; + + for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){ + pPhrase = pNear->apPhrase[i]; + for(j=0; j<pPhrase->nTerm; j++){ + pTerm = &pPhrase->aTerm[j]; + rc = sqlite3Fts5IndexQuery( + pExpr->pIndex, pTerm->zTerm, strlen(pTerm->zTerm), + (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | + (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0), + &pTerm->pIter + ); + assert( rc==SQLITE_OK || pTerm->pIter==0 ); + if( pTerm->pIter==0 || sqlite3Fts5IterEof(pTerm->pIter) ){ + pNode->bEof = 1; + break; + } + } + } + + return rc; +} + +/* fts5ExprNodeNext() calls fts5ExprNodeNextMatch(). And vice-versa. */ +static int fts5ExprNodeNextMatch(Fts5Expr*, Fts5ExprNode*, int, i64); + +/* +** Compare the values currently indicated by the two nodes as follows: +** +** res = (*p1) - (*p2) +** +** Nodes that point to values that come later in the iteration order are +** considered to be larger. Nodes at EOF are the largest of all. +** +** This means that if the iteration order is ASC, then numerically larger +** rowids are considered larger. Or if it is the default DESC, numerically +** smaller rowids are larger. +*/ +static int fts5NodeCompare( + Fts5Expr *pExpr, + Fts5ExprNode *p1, + Fts5ExprNode *p2 +){ + if( p2->bEof ) return -1; + if( p1->bEof ) return +1; + if( pExpr->bDesc==0 ){ + if( p1->iRowid<p2->iRowid ) return -1; + return (p1->iRowid > p2->iRowid); + }else{ + if( p1->iRowid>p2->iRowid ) return -1; + return (p1->iRowid < p2->iRowid); + } +} + +/* +** Advance node iterator pNode, part of expression pExpr. If argument +** bFromValid is zero, then pNode is advanced exactly once. Or, if argument +** bFromValid is non-zero, then pNode is advanced until it is at or past +** rowid value iFrom. Whether "past" means "less than" or "greater than" +** depends on whether this is an ASC or DESC iterator. +*/ +static int fts5ExprNodeNext( + Fts5Expr *pExpr, + Fts5ExprNode *pNode, + int bFromValid, + i64 iFrom +){ + int rc = SQLITE_OK; + + if( pNode->bEof==0 ){ + switch( pNode->eType ){ + case FTS5_STRING: { + rc = fts5ExprNearAdvanceAll(pExpr, pNode->pNear, &pNode->bEof); + break; + }; + + case FTS5_AND: { + rc = fts5ExprNodeNext(pExpr, pNode->pLeft, bFromValid, iFrom); + if( rc==SQLITE_OK ){ + /* todo: update (iFrom/bFromValid) here */ + rc = fts5ExprNodeNext(pExpr, pNode->pRight, bFromValid, iFrom); + } + break; + } + + case FTS5_OR: { + Fts5ExprNode *p1 = pNode->pLeft; + Fts5ExprNode *p2 = pNode->pRight; + int cmp = fts5NodeCompare(pExpr, p1, p2); + + if( cmp==0 ){ + rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom); + if( rc==SQLITE_OK ){ + rc = fts5ExprNodeNext(pExpr, p2, bFromValid, iFrom); + } + }else{ + rc = fts5ExprNodeNext(pExpr, (cmp < 0) ? p1 : p2, bFromValid, iFrom); + } + + break; + } + + default: assert( pNode->eType==FTS5_NOT ); { + rc = fts5ExprNodeNext(pExpr, pNode->pLeft, bFromValid, iFrom); + break; + } + } + + if( rc==SQLITE_OK ){ + rc = fts5ExprNodeNextMatch(pExpr, pNode, bFromValid, iFrom); + } + } + + return rc; +} + +static void fts5ExprSetEof(Fts5ExprNode *pNode){ + if( pNode ){ + pNode->bEof = 1; + fts5ExprSetEof(pNode->pLeft); + fts5ExprSetEof(pNode->pRight); + } +} + +/* +** +*/ +static int fts5ExprNodeNextMatch( + Fts5Expr *pExpr, + Fts5ExprNode *pNode, + int bFromValid, + i64 iFrom +){ + int rc = SQLITE_OK; + if( pNode->bEof==0 ){ + switch( pNode->eType ){ + + case FTS5_STRING: { + rc = fts5ExprNearNextMatch(pExpr, pNode, bFromValid, iFrom); + break; + } + + case FTS5_AND: { + Fts5ExprNode *p1 = pNode->pLeft; + Fts5ExprNode *p2 = pNode->pRight; + + while( p1->bEof==0 && p2->bEof==0 && p2->iRowid!=p1->iRowid ){ + Fts5ExprNode *pAdv; + assert( pExpr->bDesc==0 || pExpr->bDesc==1 ); + if( pExpr->bDesc==(p1->iRowid > p2->iRowid) ){ + pAdv = p1; + if( bFromValid==0 || pExpr->bDesc==(p2->iRowid < iFrom) ){ + iFrom = p2->iRowid; + } + }else{ + pAdv = p2; + if( bFromValid==0 || pExpr->bDesc==(p1->iRowid < iFrom) ){ + iFrom = p1->iRowid; + } + } + rc = fts5ExprNodeNext(pExpr, pAdv, 1, iFrom); + if( rc!=SQLITE_OK ) break; + } + if( p1->bEof || p2->bEof ){ + fts5ExprSetEof(pNode); + } + pNode->iRowid = p1->iRowid; + break; + } + + case FTS5_OR: { + Fts5ExprNode *p1 = pNode->pLeft; + Fts5ExprNode *p2 = pNode->pRight; + Fts5ExprNode *pNext = (fts5NodeCompare(pExpr, p1, p2) > 0 ? p2 : p1); + pNode->bEof = pNext->bEof; + pNode->iRowid = pNext->iRowid; + break; + } + + default: assert( pNode->eType==FTS5_NOT ); { + Fts5ExprNode *p1 = pNode->pLeft; + Fts5ExprNode *p2 = pNode->pRight; + while( rc==SQLITE_OK ){ + int cmp; + while( rc==SQLITE_OK && (cmp = fts5NodeCompare(pExpr, p1, p2))>0 ){ + rc = fts5ExprNodeNext(pExpr, p2, bFromValid, iFrom); + } + if( rc || cmp ) break; + rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom); + } + pNode->bEof = p1->bEof; + pNode->iRowid = p1->iRowid; + break; + } + } + } + return rc; +} + + +/* +** Set node pNode, which is part of expression pExpr, to point to the first +** match. If there are no matches, set the Node.bEof flag to indicate EOF. +** +** Return an SQLite error code if an error occurs, or SQLITE_OK otherwise. +** It is not an error if there are no matches. +*/ +static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ + int rc = SQLITE_OK; + pNode->bEof = 0; + + if( pNode->eType==FTS5_STRING ){ + + /* Initialize all term iterators in the NEAR object. */ + rc = fts5ExprNearInitAll(pExpr, pNode); + + /* Attempt to advance to the first match */ + if( rc==SQLITE_OK && pNode->bEof==0 ){ + rc = fts5ExprNearNextMatch(pExpr, pNode, 0, 0); + } + + }else{ + rc = fts5ExprNodeFirst(pExpr, pNode->pLeft); + if( rc==SQLITE_OK ){ + rc = fts5ExprNodeFirst(pExpr, pNode->pRight); + } + if( rc==SQLITE_OK ){ + rc = fts5ExprNodeNextMatch(pExpr, pNode, 0, 0); + } + } + return rc; +} + + + +/* +** Begin iterating through the set of documents in index pIdx matched by +** the MATCH expression passed as the first argument. If the "bDesc" parameter +** is passed a non-zero value, iteration is in descending rowid order. Or, +** if it is zero, in ascending order. +** +** Return SQLITE_OK if successful, or an SQLite error code otherwise. It +** is not considered an error if the query does not match any documents. +*/ +int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, int bDesc){ + int rc = SQLITE_OK; + if( p->pRoot ){ + p->pIndex = pIdx; + p->bDesc = bDesc; + rc = fts5ExprNodeFirst(p, p->pRoot); + } + return rc; +} + +/* +** Move to the next document +** +** Return SQLITE_OK if successful, or an SQLite error code otherwise. It +** is not considered an error if the query does not match any documents. +*/ +int sqlite3Fts5ExprNext(Fts5Expr *p){ + int rc; + rc = fts5ExprNodeNext(p, p->pRoot, 0, 0); + return rc; +} + +int sqlite3Fts5ExprEof(Fts5Expr *p){ + return (p->pRoot==0 || p->pRoot->bEof); +} + +i64 sqlite3Fts5ExprRowid(Fts5Expr *p){ + return p->pRoot->iRowid; +} + +/* +** Argument pIn points to a buffer of nIn bytes. This function allocates +** and returns a new buffer populated with a copy of (pIn/nIn) with a +** nul-terminator byte appended to it. +** +** It is the responsibility of the caller to eventually free the returned +** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned. +*/ +static char *fts5Strndup(int *pRc, const char *pIn, int nIn){ + char *zRet = 0; + if( *pRc==SQLITE_OK ){ + zRet = (char*)sqlite3_malloc(nIn+1); + if( zRet ){ + memcpy(zRet, pIn, nIn); + zRet[nIn] = '\0'; + }else{ + *pRc = SQLITE_NOMEM; + } + } + return zRet; +} + +static int fts5ParseStringFromToken(Fts5Token *pToken, char **pz){ + int rc = SQLITE_OK; + *pz = fts5Strndup(&rc, pToken->p, pToken->n); + return rc; +} + +/* +** Free the phrase object passed as the only argument. +*/ +static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){ + if( pPhrase ){ + int i; + for(i=0; i<pPhrase->nTerm; i++){ + Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; + sqlite3_free(pTerm->zTerm); + if( pTerm->pIter ){ + sqlite3Fts5IterClose(pTerm->pIter); + } + } + fts5BufferFree(&pPhrase->poslist); + sqlite3_free(pPhrase); + } +} + +/* +** If argument pNear is NULL, then a new Fts5ExprNearset object is allocated +** and populated with pPhrase. Or, if pNear is not NULL, phrase pPhrase is +** appended to it and the results returned. +** +** If an OOM error occurs, both the pNear and pPhrase objects are freed and +** NULL returned. +*/ +Fts5ExprNearset *sqlite3Fts5ParseNearset( + Fts5Parse *pParse, /* Parse context */ + Fts5ExprNearset *pNear, /* Existing nearset, or NULL */ + Fts5ExprPhrase *pPhrase /* Recently parsed phrase */ +){ + const int SZALLOC = 8; + Fts5ExprNearset *pRet = 0; + + if( pParse->rc==SQLITE_OK ){ + if( pPhrase==0 ){ + return pNear; + } + if( pNear==0 ){ + int nByte = sizeof(Fts5ExprNearset) + SZALLOC * sizeof(Fts5ExprPhrase*); + pRet = sqlite3_malloc(nByte); + if( pRet==0 ){ + pParse->rc = SQLITE_NOMEM; + }else{ + memset(pRet, 0, nByte); + pRet->iCol = -1; + } + }else if( (pNear->nPhrase % SZALLOC)==0 ){ + int nNew = pRet->nPhrase + SZALLOC; + int nByte = sizeof(Fts5ExprNearset) + nNew * sizeof(Fts5ExprPhrase*); + + pRet = (Fts5ExprNearset*)sqlite3_realloc(pNear, nByte); + if( pRet==0 ){ + pParse->rc = SQLITE_NOMEM; + } + }else{ + pRet = pNear; + } + } + + if( pRet==0 ){ + assert( pParse->rc!=SQLITE_OK ); + sqlite3Fts5ParseNearsetFree(pNear); + sqlite3Fts5ParsePhraseFree(pPhrase); + }else{ + pRet->apPhrase[pRet->nPhrase++] = pPhrase; + } + return pRet; +} + +typedef struct TokenCtx TokenCtx; +struct TokenCtx { + Fts5ExprPhrase *pPhrase; +}; + +/* +** Callback for tokenizing terms used by ParseTerm(). +*/ +static int fts5ParseTokenize( + void *pContext, /* Pointer to Fts5InsertCtx object */ + const char *pToken, /* Buffer containing token */ + int nToken, /* Size of token in bytes */ + int iStart, /* Start offset of token */ + int iEnd /* End offset of token */ +){ + int rc = SQLITE_OK; + const int SZALLOC = 8; + TokenCtx *pCtx = (TokenCtx*)pContext; + Fts5ExprPhrase *pPhrase = pCtx->pPhrase; + Fts5ExprTerm *pTerm; + + if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){ + Fts5ExprPhrase *pNew; + int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0); + + pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, + sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew + ); + if( pNew==0 ) return SQLITE_NOMEM; + if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase)); + pCtx->pPhrase = pPhrase = pNew; + pNew->nTerm = nNew - SZALLOC; + } + + pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; + memset(pTerm, 0, sizeof(Fts5ExprTerm)); + pTerm->zTerm = fts5Strndup(&rc, pToken, nToken); + + return rc; +} + + +/* +** Free the phrase object passed as the only argument. +*/ +void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase *pPhrase){ + fts5ExprPhraseFree(pPhrase); +} + +/* +** Free the phrase object passed as the second argument. +*/ +void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset *pNear){ + if( pNear ){ + int i; + for(i=0; i<pNear->nPhrase; i++){ + fts5ExprPhraseFree(pNear->apPhrase[i]); + } + sqlite3_free(pNear); + } +} + +void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p){ + assert( pParse->pExpr==0 ); + pParse->pExpr = p; +} + +/* +** This function is called by the parser to process a string token. The +** string may or may not be quoted. In any case it is tokenized and a +** phrase object consisting of all tokens returned. +*/ +Fts5ExprPhrase *sqlite3Fts5ParseTerm( + Fts5Parse *pParse, /* Parse context */ + Fts5ExprPhrase *pAppend, /* Phrase to append to */ + Fts5Token *pToken, /* String to tokenize */ + int bPrefix /* True if there is a trailing "*" */ +){ + Fts5Config *pConfig = pParse->pConfig; + TokenCtx sCtx; /* Context object passed to callback */ + int rc; /* Tokenize return code */ + char *z = 0; + + memset(&sCtx, 0, sizeof(TokenCtx)); + sCtx.pPhrase = pAppend; + + rc = fts5ParseStringFromToken(pToken, &z); + if( rc==SQLITE_OK ){ + sqlite3Fts5Dequote(z); + rc = sqlite3Fts5Tokenize(pConfig, z, strlen(z), &sCtx, fts5ParseTokenize); + } + sqlite3_free(z); + if( rc ){ + pParse->rc = rc; + fts5ExprPhraseFree(sCtx.pPhrase); + sCtx.pPhrase = 0; + }else if( sCtx.pPhrase ){ + + if( pAppend==0 ){ + if( (pParse->nPhrase % 8)==0 ){ + int nByte = sizeof(Fts5ExprPhrase*) * (pParse->nPhrase + 8); + Fts5ExprPhrase **apNew; + apNew = (Fts5ExprPhrase**)sqlite3_realloc(pParse->apPhrase, nByte); + if( apNew==0 ){ + pParse->rc = SQLITE_NOMEM; + fts5ExprPhraseFree(sCtx.pPhrase); + return 0; + } + pParse->apPhrase = apNew; + } + pParse->nPhrase++; + } + + pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase; + assert( sCtx.pPhrase->nTerm>0 ); + sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = bPrefix; + } + + return sCtx.pPhrase; +} + +/* +** Token pTok has appeared in a MATCH expression where the NEAR operator +** is expected. If token pTok does not contain "NEAR", store an error +** in the pParse object. +*/ +void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){ + if( pParse->rc==SQLITE_OK ){ + if( pTok->n!=4 || memcmp("NEAR", pTok->p, 4) ){ + sqlite3Fts5ParseError( + pParse, "fts5: syntax error near \"%.*s\"", pTok->n, pTok->p + ); + } + } +} + +void sqlite3Fts5ParseSetDistance( + Fts5Parse *pParse, + Fts5ExprNearset *pNear, + Fts5Token *p +){ + int nNear = 0; + int i; + if( p->n ){ + for(i=0; i<p->n; i++){ + char c = (char)p->p[i]; + if( c<'0' || c>'9' ){ + sqlite3Fts5ParseError( + pParse, "expected integer, got \"%.*s\"", p->n, p->p + ); + return; + } + nNear = nNear * 10 + (p->p[i] - '0'); + } + }else{ + nNear = FTS5_DEFAULT_NEARDIST; + } + pNear->nNear = nNear; +} + +void sqlite3Fts5ParseSetColumn( + Fts5Parse *pParse, + Fts5ExprNearset *pNear, + Fts5Token *p +){ + char *z = 0; + int rc = fts5ParseStringFromToken(p, &z); + if( rc==SQLITE_OK ){ + Fts5Config *pConfig = pParse->pConfig; + int i; + for(i=0; i<pConfig->nCol; i++){ + if( 0==sqlite3_stricmp(pConfig->azCol[i], z) ){ + pNear->iCol = i; + break; + } + } + if( i==pConfig->nCol ){ + sqlite3Fts5ParseError(pParse, "no such column: %s", z); + } + sqlite3_free(z); + }else{ + pParse->rc = rc; + } +} + +/* +** Allocate and return a new expression object. If anything goes wrong (i.e. +** OOM error), leave an error code in pParse and return NULL. +*/ +Fts5ExprNode *sqlite3Fts5ParseNode( + Fts5Parse *pParse, /* Parse context */ + int eType, /* FTS5_STRING, AND, OR or NOT */ + Fts5ExprNode *pLeft, /* Left hand child expression */ + Fts5ExprNode *pRight, /* Right hand child expression */ + Fts5ExprNearset *pNear /* For STRING expressions, the near cluster */ +){ + Fts5ExprNode *pRet = 0; + + if( pParse->rc==SQLITE_OK ){ + assert( (eType!=FTS5_STRING && !pNear) + || (eType==FTS5_STRING && !pLeft && !pRight) + ); + if( eType==FTS5_STRING && pNear==0 ) return 0; + if( eType!=FTS5_STRING && pLeft==0 ) return pRight; + if( eType!=FTS5_STRING && pRight==0 ) return pLeft; + pRet = (Fts5ExprNode*)sqlite3_malloc(sizeof(Fts5ExprNode)); + if( pRet==0 ){ + pParse->rc = SQLITE_NOMEM; + }else{ + memset(pRet, 0, sizeof(*pRet)); + pRet->eType = eType; + pRet->pLeft = pLeft; + pRet->pRight = pRight; + pRet->pNear = pNear; + if( eType==FTS5_STRING ){ + int iPhrase; + for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){ + pNear->apPhrase[iPhrase]->pNode = pRet; + } + } + } + } + + if( pRet==0 ){ + assert( pParse->rc!=SQLITE_OK ); + sqlite3Fts5ParseNodeFree(pLeft); + sqlite3Fts5ParseNodeFree(pRight); + sqlite3Fts5ParseNearsetFree(pNear); + } + return pRet; +} + +static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){ + char *zQuoted = sqlite3_malloc(strlen(pTerm->zTerm) * 2 + 3 + 2); + if( zQuoted ){ + int i = 0; + char *zIn = pTerm->zTerm; + zQuoted[i++] = '"'; + while( *zIn ){ + if( *zIn=='"' ) zQuoted[i++] = '"'; + zQuoted[i++] = *zIn++; + } + zQuoted[i++] = '"'; + if( pTerm->bPrefix ){ + zQuoted[i++] = ' '; + zQuoted[i++] = '*'; + } + zQuoted[i++] = '\0'; + } + return zQuoted; +} + +static char *fts5PrintfAppend(char *zApp, const char *zFmt, ...){ + char *zNew; + va_list ap; + va_start(ap, zFmt); + zNew = sqlite3_vmprintf(zFmt, ap); + va_end(ap); + if( zApp ){ + char *zNew2 = sqlite3_mprintf("%s%s", zApp, zNew); + sqlite3_free(zNew); + zNew = zNew2; + } + sqlite3_free(zApp); + return zNew; +} + +/* +** Compose a tcl-readable representation of expression pExpr. Return a +** pointer to a buffer containing that representation. It is the +** responsibility of the caller to at some point free the buffer using +** sqlite3_free(). +*/ +static char *fts5ExprPrintTcl( + Fts5Config *pConfig, + const char *zNearsetCmd, + Fts5ExprNode *pExpr +){ + char *zRet = 0; + if( pExpr->eType==FTS5_STRING ){ + Fts5ExprNearset *pNear = pExpr->pNear; + int i; + int iTerm; + + zRet = fts5PrintfAppend(zRet, "[%s ", zNearsetCmd); + if( pNear->iCol>=0 ){ + zRet = fts5PrintfAppend(zRet, "-col %d ", pNear->iCol); + if( zRet==0 ) return 0; + } + + if( pNear->nPhrase>1 ){ + zRet = fts5PrintfAppend(zRet, "-near %d ", pNear->nNear); + if( zRet==0 ) return 0; + } + + zRet = fts5PrintfAppend(zRet, "--"); + if( zRet==0 ) return 0; + + for(i=0; i<pNear->nPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + + zRet = fts5PrintfAppend(zRet, " {"); + for(iTerm=0; zRet && iTerm<pPhrase->nTerm; iTerm++){ + char *zTerm = pPhrase->aTerm[iTerm].zTerm; + zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" ", zTerm); + } + + if( zRet ) zRet = fts5PrintfAppend(zRet, "}"); + if( zRet==0 ) return 0; + } + + if( zRet ) zRet = fts5PrintfAppend(zRet, "]"); + if( zRet==0 ) return 0; + + }else{ + char *zOp = 0; + char *z1 = 0; + char *z2 = 0; + switch( pExpr->eType ){ + case FTS5_AND: zOp = "&&"; break; + case FTS5_NOT: zOp = "&& !"; break; + case FTS5_OR: zOp = "||"; break; + default: assert( 0 ); + } + + z1 = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pLeft); + z2 = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRight); + if( z1 && z2 ){ + int b1 = pExpr->pLeft->eType!=FTS5_STRING; + int b2 = pExpr->pRight->eType!=FTS5_STRING; + zRet = sqlite3_mprintf("%s%s%s %s %s%s%s", + b1 ? "(" : "", z1, b1 ? ")" : "", + zOp, + b2 ? "(" : "", z2, b2 ? ")" : "" + ); + } + sqlite3_free(z1); + sqlite3_free(z2); + } + + return zRet; +} + +static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){ + char *zRet = 0; + if( pExpr->eType==FTS5_STRING ){ + Fts5ExprNearset *pNear = pExpr->pNear; + int i; + int iTerm; + + if( pNear->iCol>=0 ){ + zRet = fts5PrintfAppend(zRet, "%s : ", pConfig->azCol[pNear->iCol]); + if( zRet==0 ) return 0; + } + + if( pNear->nPhrase>1 ){ + zRet = fts5PrintfAppend(zRet, "NEAR("); + if( zRet==0 ) return 0; + } + + for(i=0; i<pNear->nPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + if( i!=0 ){ + zRet = fts5PrintfAppend(zRet, " "); + if( zRet==0 ) return 0; + } + for(iTerm=0; iTerm<pPhrase->nTerm; iTerm++){ + char *zTerm = fts5ExprTermPrint(&pPhrase->aTerm[iTerm]); + if( zTerm ){ + zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" + ", zTerm); + sqlite3_free(zTerm); + } + if( zTerm==0 || zRet==0 ){ + sqlite3_free(zRet); + return 0; + } + } + } + + if( pNear->nPhrase>1 ){ + zRet = fts5PrintfAppend(zRet, ", %d)", pNear->nNear); + if( zRet==0 ) return 0; + } + + }else{ + char *zOp = 0; + char *z1 = 0; + char *z2 = 0; + switch( pExpr->eType ){ + case FTS5_AND: zOp = "AND"; break; + case FTS5_NOT: zOp = "NOT"; break; + case FTS5_OR: zOp = "OR"; break; + default: assert( 0 ); + } + + z1 = fts5ExprPrint(pConfig, pExpr->pLeft); + z2 = fts5ExprPrint(pConfig, pExpr->pRight); + if( z1 && z2 ){ + int b1 = pExpr->pLeft->eType!=FTS5_STRING; + int b2 = pExpr->pRight->eType!=FTS5_STRING; + zRet = sqlite3_mprintf("%s%s%s %s %s%s%s", + b1 ? "(" : "", z1, b1 ? ")" : "", + zOp, + b2 ? "(" : "", z2, b2 ? ")" : "" + ); + } + sqlite3_free(z1); + sqlite3_free(z2); + } + + return zRet; +} + +/* +** The implementation of user-defined scalar functions fts5_expr() (bTcl==0) +** and fts5_expr_tcl() (bTcl!=0). +*/ +static void fts5ExprFunction( + sqlite3_context *pCtx, /* Function call context */ + int nArg, /* Number of args */ + sqlite3_value **apVal, /* Function arguments */ + int bTcl +){ + Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_data(pCtx); + sqlite3 *db = sqlite3_context_db_handle(pCtx); + const char *zExpr = 0; + char *zErr = 0; + Fts5Expr *pExpr = 0; + int rc; + int i; + + const char **azConfig; /* Array of arguments for Fts5Config */ + const char *zNearsetCmd = "nearset"; + int nConfig; /* Size of azConfig[] */ + Fts5Config *pConfig = 0; + + if( bTcl && nArg>1 ){ + zNearsetCmd = (const char*)sqlite3_value_text(apVal[1]); + } + + nConfig = nArg + 2 - bTcl; + azConfig = (const char**)sqlite3_malloc(sizeof(char*) * nConfig); + if( azConfig==0 ){ + sqlite3_result_error_nomem(pCtx); + return; + } + azConfig[0] = 0; + azConfig[1] = "main"; + azConfig[2] = "tbl"; + for(i=1+bTcl; i<nArg; i++){ + azConfig[i+2-bTcl] = (const char*)sqlite3_value_text(apVal[i]); + } + zExpr = (const char*)sqlite3_value_text(apVal[0]); + + rc = sqlite3Fts5ConfigParse(pGlobal, db, nConfig, azConfig, &pConfig, &zErr); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5ExprNew(pConfig, zExpr, &pExpr, &zErr); + } + if( rc==SQLITE_OK ){ + char *zText; + if( pExpr->pRoot==0 ){ + zText = sqlite3_mprintf(""); + }else if( bTcl ){ + zText = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRoot); + }else{ + zText = fts5ExprPrint(pConfig, pExpr->pRoot); + } + if( rc==SQLITE_OK ){ + sqlite3_result_text(pCtx, zText, -1, SQLITE_TRANSIENT); + sqlite3_free(zText); + } + } + + if( rc!=SQLITE_OK ){ + if( zErr ){ + sqlite3_result_error(pCtx, zErr, -1); + sqlite3_free(zErr); + }else{ + sqlite3_result_error_code(pCtx, rc); + } + } + sqlite3_free(azConfig); + sqlite3Fts5ConfigFree(pConfig); + sqlite3Fts5ExprFree(pExpr); +} + +static void fts5ExprFunctionHr( + sqlite3_context *pCtx, /* Function call context */ + int nArg, /* Number of args */ + sqlite3_value **apVal /* Function arguments */ +){ + fts5ExprFunction(pCtx, nArg, apVal, 0); +} +static void fts5ExprFunctionTcl( + sqlite3_context *pCtx, /* Function call context */ + int nArg, /* Number of args */ + sqlite3_value **apVal /* Function arguments */ +){ + fts5ExprFunction(pCtx, nArg, apVal, 1); +} + +/* +** This is called during initialization to register the fts5_expr() scalar +** UDF with the SQLite handle passed as the only argument. +*/ +int sqlite3Fts5ExprInit(Fts5Global *pGlobal, sqlite3 *db){ + struct Fts5ExprFunc { + const char *z; + void (*x)(sqlite3_context*,int,sqlite3_value**); + } aFunc[] = { + { "fts5_expr", fts5ExprFunctionHr }, + { "fts5_expr_tcl", fts5ExprFunctionTcl }, + }; + int i; + int rc = SQLITE_OK; + void *pCtx = (void*)pGlobal; + + for(i=0; rc==SQLITE_OK && i<(sizeof(aFunc) / sizeof(aFunc[0])); i++){ + struct Fts5ExprFunc *p = &aFunc[i]; + rc = sqlite3_create_function(db, p->z, -1, SQLITE_UTF8, pCtx, p->x, 0, 0); + } + + return rc; +} + +/* +** Return the number of phrases in expression pExpr. +*/ +int sqlite3Fts5ExprPhraseCount(Fts5Expr *pExpr){ + return pExpr->nPhrase; +} + +/* +** Return the number of terms in the iPhrase'th phrase in pExpr. +*/ +int sqlite3Fts5ExprPhraseSize(Fts5Expr *pExpr, int iPhrase){ + if( iPhrase<0 || iPhrase>=pExpr->nPhrase ) return 0; + return pExpr->apExprPhrase[iPhrase]->nTerm; +} + +/* +** This function is used to access the current position list for phrase +** iPhrase. +*/ +int sqlite3Fts5ExprPoslist(Fts5Expr *pExpr, int iPhrase, const u8 **pa){ + if( iPhrase>=0 && iPhrase<pExpr->nPhrase ){ + Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase]; + Fts5ExprNode *pNode = pPhrase->pNode; + if( pNode->bEof==0 && pNode->iRowid==pExpr->pRoot->iRowid ){ + *pa = pPhrase->poslist.p; + return pPhrase->poslist.n; + } + } + *pa = 0; + return 0; +} + +#endif /* SQLITE_ENABLE_FTS5 */ |