aboutsummaryrefslogtreecommitdiff
path: root/ext/fts5/fts5_expr.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/fts5/fts5_expr.c')
-rw-r--r--ext/fts5/fts5_expr.c1701
1 files changed, 1701 insertions, 0 deletions
diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c
new file mode 100644
index 000000000..878b54f53
--- /dev/null
+++ b/ext/fts5/fts5_expr.c
@@ -0,0 +1,1701 @@
+/*
+** 2014 May 31
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+*/
+
+#ifdef SQLITE_ENABLE_FTS5
+
+
+#include "fts5Int.h"
+#include "fts5parse.h"
+
+/*
+** All token types in the generated fts5parse.h file are greater than 0.
+*/
+#define FTS5_EOF 0
+
+typedef struct Fts5ExprTerm Fts5ExprTerm;
+
+/*
+** Functions generated by lemon from fts5parse.y.
+*/
+void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(u64));
+void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*));
+void sqlite3Fts5Parser(void*, int, Fts5Token, Fts5Parse*);
+
+struct Fts5Expr {
+ Fts5Index *pIndex;
+ Fts5ExprNode *pRoot;
+ int bDesc; /* Iterate in descending docid order */
+ int nPhrase; /* Number of phrases in expression */
+ Fts5ExprPhrase **apExprPhrase; /* Pointers to phrase objects */
+};
+
+/*
+** eType:
+** Expression node type. Always one of:
+**
+** FTS5_AND (pLeft, pRight valid)
+** FTS5_OR (pLeft, pRight valid)
+** FTS5_NOT (pLeft, pRight valid)
+** FTS5_STRING (pNear valid)
+*/
+struct Fts5ExprNode {
+ int eType; /* Node type */
+ Fts5ExprNode *pLeft; /* Left hand child node */
+ Fts5ExprNode *pRight; /* Right hand child node */
+ Fts5ExprNearset *pNear; /* For FTS5_STRING - cluster of phrases */
+ int bEof; /* True at EOF */
+ i64 iRowid; /* Current rowid */
+};
+
+/*
+** An instance of the following structure represents a single search term
+** or term prefix.
+*/
+struct Fts5ExprTerm {
+ int bPrefix; /* True for a prefix term */
+ char *zTerm; /* nul-terminated term */
+ Fts5IndexIter *pIter; /* Iterator for this term */
+};
+
+/*
+** A phrase. One or more terms that must appear in a contiguous sequence
+** within a document for it to match.
+*/
+struct Fts5ExprPhrase {
+ Fts5ExprNode *pNode; /* FTS5_STRING node this phrase is part of */
+ Fts5Buffer poslist; /* Current position list */
+ int nTerm; /* Number of entries in aTerm[] */
+ Fts5ExprTerm aTerm[0]; /* Terms that make up this phrase */
+};
+
+/*
+** One or more phrases that must appear within a certain token distance of
+** each other within each matching document.
+*/
+struct Fts5ExprNearset {
+ int nNear; /* NEAR parameter */
+ int iCol; /* Column to search (-1 -> all columns) */
+ int nPhrase; /* Number of entries in aPhrase[] array */
+ Fts5ExprPhrase *apPhrase[0]; /* Array of phrase pointers */
+};
+
+
+/*
+** Parse context.
+*/
+struct Fts5Parse {
+ Fts5Config *pConfig;
+ char *zErr;
+ int rc;
+ int nPhrase; /* Size of apPhrase array */
+ Fts5ExprPhrase **apPhrase; /* Array of all phrases */
+ Fts5ExprNode *pExpr; /* Result of a successful parse */
+};
+
+void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){
+ if( pParse->rc==SQLITE_OK ){
+ va_list ap;
+ va_start(ap, zFmt);
+ pParse->zErr = sqlite3_vmprintf(zFmt, ap);
+ va_end(ap);
+ pParse->rc = SQLITE_ERROR;
+ }
+}
+
+static int fts5ExprIsspace(char t){
+ return t==' ' || t=='\t' || t=='\n' || t=='\r';
+}
+
+static int fts5ExprIstoken(char t){
+ return fts5ExprIsspace(t)==0 && t!='\0'
+ && t!=':' && t!='(' && t!=')'
+ && t!=',' && t!='+' && t!='*';
+}
+
+/*
+** Read the first token from the nul-terminated string at *pz.
+*/
+static int fts5ExprGetToken(
+ Fts5Parse *pParse,
+ const char **pz, /* IN/OUT: Pointer into buffer */
+ Fts5Token *pToken
+){
+ const char *z = *pz;
+ int tok;
+
+ /* Skip past any whitespace */
+ while( fts5ExprIsspace(*z) ) z++;
+
+ pToken->p = z;
+ pToken->n = 1;
+ switch( *z ){
+ case '(': tok = FTS5_LP; break;
+ case ')': tok = FTS5_RP; break;
+ case ':': tok = FTS5_COLON; break;
+ case ',': tok = FTS5_COMMA; break;
+ case '+': tok = FTS5_PLUS; break;
+ case '*': tok = FTS5_STAR; break;
+ case '\0': tok = FTS5_EOF; break;
+
+ case '"': {
+ const char *z2;
+ tok = FTS5_STRING;
+
+ for(z2=&z[1]; 1; z2++){
+ if( z2[0]=='"' ){
+ z2++;
+ if( z2[0]!='"' ) break;
+ }
+ if( z2[0]=='\0' ){
+ sqlite3Fts5ParseError(pParse, "unterminated string");
+ return FTS5_EOF;
+ }
+ }
+ pToken->n = (z2 - z);
+ break;
+ }
+
+ default: {
+ const char *z2;
+ tok = FTS5_STRING;
+ for(z2=&z[1]; fts5ExprIstoken(*z2); z2++);
+ pToken->n = (z2 - z);
+ if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 ) tok = FTS5_OR;
+ if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT;
+ if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND;
+ break;
+ }
+ }
+
+ *pz = &pToken->p[pToken->n];
+ return tok;
+}
+
+static void *fts5ParseAlloc(u64 t){ return sqlite3_malloc((int)t); }
+static void fts5ParseFree(void *p){ sqlite3_free(p); }
+
+int sqlite3Fts5ExprNew(
+ Fts5Config *pConfig, /* FTS5 Configuration */
+ const char *zExpr, /* Expression text */
+ Fts5Expr **ppNew,
+ char **pzErr
+){
+ Fts5Parse sParse;
+ Fts5Token token;
+ const char *z = zExpr;
+ int t; /* Next token type */
+ void *pEngine;
+ Fts5Expr *pNew;
+
+ *ppNew = 0;
+ *pzErr = 0;
+ memset(&sParse, 0, sizeof(sParse));
+ pEngine = sqlite3Fts5ParserAlloc(fts5ParseAlloc);
+ if( pEngine==0 ){ return SQLITE_NOMEM; }
+ sParse.pConfig = pConfig;
+
+ do {
+ t = fts5ExprGetToken(&sParse, &z, &token);
+ sqlite3Fts5Parser(pEngine, t, token, &sParse);
+ }while( sParse.rc==SQLITE_OK && t!=FTS5_EOF );
+ sqlite3Fts5ParserFree(pEngine, fts5ParseFree);
+
+ assert( sParse.rc!=SQLITE_OK || sParse.zErr==0 );
+ if( sParse.rc==SQLITE_OK ){
+ *ppNew = pNew = sqlite3_malloc(sizeof(Fts5Expr));
+ if( pNew==0 ){
+ sParse.rc = SQLITE_NOMEM;
+ sqlite3Fts5ParseNodeFree(sParse.pExpr);
+ }else{
+ pNew->pRoot = sParse.pExpr;
+ pNew->pIndex = 0;
+ pNew->apExprPhrase = sParse.apPhrase;
+ pNew->nPhrase = sParse.nPhrase;
+ sParse.apPhrase = 0;
+ }
+ }
+
+ sqlite3_free(sParse.apPhrase);
+ *pzErr = sParse.zErr;
+ return sParse.rc;
+}
+
+static char *fts5ExprStrdup(int *pRc, const char *zIn){
+ char *zRet = 0;
+ if( *pRc==SQLITE_OK ){
+ int nByte = strlen(zIn) + 1;
+ zRet = sqlite3_malloc(nByte);
+ if( zRet ){
+ memcpy(zRet, zIn, nByte);
+ }else{
+ *pRc = SQLITE_NOMEM;
+ }
+ }
+ return zRet;
+}
+
+static void *fts5ExprMalloc(int *pRc, int nByte){
+ void *pRet = 0;
+ if( *pRc==SQLITE_OK ){
+ pRet = sqlite3_malloc(nByte);
+ if( pRet ){
+ memset(pRet, 0, nByte);
+ }else{
+ *pRc = SQLITE_NOMEM;
+ }
+ }
+ return pRet;
+}
+
+/*
+** Create a new FTS5 expression by cloning phrase iPhrase of the
+** expression passed as the second argument.
+*/
+int sqlite3Fts5ExprPhraseExpr(
+ Fts5Config *pConfig,
+ Fts5Expr *pExpr,
+ int iPhrase,
+ Fts5Expr **ppNew
+){
+ int rc = SQLITE_OK; /* Return code */
+ Fts5ExprPhrase *pOrig = 0; /* The phrase extracted from pExpr */
+ int i; /* Used to iterate through phrase terms */
+
+ /* Components of the new expression object */
+ Fts5Expr *pNew;
+ Fts5ExprPhrase **apPhrase;
+ Fts5ExprNode *pNode;
+ Fts5ExprNearset *pNear;
+ Fts5ExprPhrase *pCopy;
+
+ pOrig = pExpr->apExprPhrase[iPhrase];
+ pNew = (Fts5Expr*)fts5ExprMalloc(&rc, sizeof(Fts5Expr));
+ apPhrase = (Fts5ExprPhrase**)fts5ExprMalloc(&rc, sizeof(Fts5ExprPhrase*));
+ pNode = (Fts5ExprNode*)fts5ExprMalloc(&rc, sizeof(Fts5ExprNode));
+ pNear = (Fts5ExprNearset*)fts5ExprMalloc(&rc,
+ sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*)
+ );
+ pCopy = (Fts5ExprPhrase*)fts5ExprMalloc(&rc,
+ sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * pOrig->nTerm
+ );
+
+ for(i=0; rc==SQLITE_OK && i<pOrig->nTerm; i++){
+ pCopy->aTerm[i].zTerm = fts5ExprStrdup(&rc, pOrig->aTerm[i].zTerm);
+ pCopy->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix;
+ }
+
+ if( rc==SQLITE_OK ){
+ /* All the allocations succeeded. Put the expression object together. */
+ pNew->pIndex = pExpr->pIndex;
+ pNew->pRoot = pNode;
+ pNew->nPhrase = 1;
+ pNew->apExprPhrase = apPhrase;
+ pNew->apExprPhrase[0] = pCopy;
+
+ pNode->eType = FTS5_STRING;
+ pNode->pNear = pNear;
+
+ pNear->iCol = -1;
+ pNear->nPhrase = 1;
+ pNear->apPhrase[0] = pCopy;
+
+ pCopy->nTerm = pOrig->nTerm;
+ pCopy->pNode = pNode;
+ }else{
+ /* At least one allocation failed. Free them all. */
+ if( pCopy ){
+ for(i=0; i<pOrig->nTerm; i++){
+ sqlite3_free(pCopy->aTerm[i].zTerm);
+ }
+ sqlite3_free(pCopy);
+ sqlite3_free(pNear);
+ sqlite3_free(pNode);
+ sqlite3_free(apPhrase);
+ sqlite3_free(pNew);
+ pNew = 0;
+ }
+ }
+
+ *ppNew = pNew;
+ return rc;
+}
+
+/*
+** Free the expression node object passed as the only argument.
+*/
+void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){
+ if( p ){
+ sqlite3Fts5ParseNodeFree(p->pLeft);
+ sqlite3Fts5ParseNodeFree(p->pRight);
+ sqlite3Fts5ParseNearsetFree(p->pNear);
+ sqlite3_free(p);
+ }
+}
+
+/*
+** Free the expression object passed as the only argument.
+*/
+void sqlite3Fts5ExprFree(Fts5Expr *p){
+ if( p ){
+ sqlite3Fts5ParseNodeFree(p->pRoot);
+ sqlite3_free(p->apExprPhrase);
+ sqlite3_free(p);
+ }
+}
+
+/*
+** All individual term iterators in pPhrase are guaranteed to be valid and
+** pointing to the same rowid when this function is called. This function
+** checks if the current rowid really is a match, and if so populates
+** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch
+** is set to true if this is really a match, or false otherwise.
+**
+** SQLITE_OK is returned if an error occurs, or an SQLite error code
+** otherwise. It is not considered an error code if the current rowid is
+** not a match.
+*/
+static int fts5ExprPhraseIsMatch(
+ Fts5Expr *pExpr, /* Expression pPhrase belongs to */
+ int iCol, /* If >=0, search for matches in iCol only */
+ Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */
+ int *pbMatch /* OUT: Set to true if really a match */
+){
+ Fts5PoslistWriter writer = {0};
+ Fts5PoslistReader aStatic[4];
+ Fts5PoslistReader *aIter = aStatic;
+ int i;
+ int rc = SQLITE_OK;
+
+ fts5BufferZero(&pPhrase->poslist);
+
+ /* If the aStatic[] array is not large enough, allocate a large array
+ ** using sqlite3_malloc(). This approach could be improved upon. */
+ if( pPhrase->nTerm>(sizeof(aStatic) / sizeof(aStatic[0])) ){
+ int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm;
+ aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte);
+ if( !aIter ) return SQLITE_NOMEM;
+ }
+
+ /* Initialize a term iterator for each term in the phrase */
+ for(i=0; i<pPhrase->nTerm; i++){
+ int n;
+ const u8 *a;
+ rc = sqlite3Fts5IterPoslist(pPhrase->aTerm[i].pIter, &a, &n);
+ if( rc || sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]) ){
+ goto ismatch_out;
+ }
+ }
+
+ while( 1 ){
+ int bMatch;
+ i64 iPos = aIter[0].iPos;
+ do {
+ bMatch = 1;
+ for(i=0; i<pPhrase->nTerm; i++){
+ Fts5PoslistReader *pPos = &aIter[i];
+ i64 iAdj = iPos + i;
+ if( pPos->iPos!=iAdj ){
+ bMatch = 0;
+ while( pPos->iPos<iAdj ){
+ if( sqlite3Fts5PoslistReaderNext(pPos) ) goto ismatch_out;
+ }
+ if( pPos->iPos>iAdj ) iPos = pPos->iPos-i;
+ }
+ }
+ }while( bMatch==0 );
+
+ /* Append position iPos to the output */
+ rc = sqlite3Fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos);
+ if( rc!=SQLITE_OK ) goto ismatch_out;
+
+ for(i=0; i<pPhrase->nTerm; i++){
+ if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out;
+ }
+ }
+
+ ismatch_out:
+ *pbMatch = (pPhrase->poslist.n>0);
+ if( aIter!=aStatic ) sqlite3_free(aIter);
+ return rc;
+}
+
+typedef struct Fts5LookaheadReader Fts5LookaheadReader;
+struct Fts5LookaheadReader {
+ const u8 *a; /* Buffer containing position list */
+ int n; /* Size of buffer a[] in bytes */
+ int i; /* Current offset in position list */
+ i64 iPos; /* Current position */
+ i64 iLookahead; /* Next position */
+};
+
+#define FTS5_LOOKAHEAD_EOF (((i64)1) << 62)
+
+static int fts5LookaheadReaderNext(Fts5LookaheadReader *p){
+ p->iPos = p->iLookahead;
+ if( sqlite3Fts5PoslistNext64(p->a, p->n, &p->i, &p->iLookahead) ){
+ p->iLookahead = FTS5_LOOKAHEAD_EOF;
+ }
+ return (p->iPos==FTS5_LOOKAHEAD_EOF);
+}
+
+static int fts5LookaheadReaderInit(
+ const u8 *a, int n, /* Buffer to read position list from */
+ Fts5LookaheadReader *p /* Iterator object to initialize */
+){
+ memset(p, 0, sizeof(Fts5LookaheadReader));
+ p->a = a;
+ p->n = n;
+ fts5LookaheadReaderNext(p);
+ return fts5LookaheadReaderNext(p);
+}
+
+#if 0
+static int fts5LookaheadReaderEof(Fts5LookaheadReader *p){
+ return (p->iPos==FTS5_LOOKAHEAD_EOF);
+}
+#endif
+
+typedef struct Fts5NearTrimmer Fts5NearTrimmer;
+struct Fts5NearTrimmer {
+ Fts5LookaheadReader reader; /* Input iterator */
+ Fts5PoslistWriter writer; /* Writer context */
+ Fts5Buffer *pOut; /* Output poslist */
+};
+
+/*
+** The near-set object passed as the first argument contains more than
+** one phrase. All phrases currently point to the same row. The
+** Fts5ExprPhrase.poslist buffers are populated accordingly. This function
+** tests if the current row contains instances of each phrase sufficiently
+** close together to meet the NEAR constraint. Output variable *pbMatch
+** is set to true if it does, or false otherwise.
+**
+** If no error occurs, SQLITE_OK is returned. Or, if an error does occur,
+** an SQLite error code. If a value other than SQLITE_OK is returned, the
+** final value of *pbMatch is undefined.
+**
+** TODO: This function should also edit the position lists associated
+** with each phrase to remove any phrase instances that are not part of
+** a set of intances that collectively matches the NEAR constraint.
+*/
+static int fts5ExprNearIsMatch(Fts5ExprNearset *pNear, int *pbMatch){
+ Fts5NearTrimmer aStatic[4];
+ Fts5NearTrimmer *a = aStatic;
+
+ Fts5ExprPhrase **apPhrase = pNear->apPhrase;
+
+ int i;
+ int rc = SQLITE_OK;
+ int bMatch;
+
+ assert( pNear->nPhrase>1 );
+
+ /* If the aStatic[] array is not large enough, allocate a large array
+ ** using sqlite3_malloc(). This approach could be improved upon. */
+ if( pNear->nPhrase>(sizeof(aStatic) / sizeof(aStatic[0])) ){
+ int nByte = sizeof(Fts5LookaheadReader) * pNear->nPhrase;
+ a = (Fts5NearTrimmer*)sqlite3_malloc(nByte);
+ if( !a ) return SQLITE_NOMEM;
+ memset(a, 0, nByte);
+ }else{
+ memset(aStatic, 0, sizeof(aStatic));
+ }
+
+ /* Initialize a lookahead iterator for each phrase. After passing the
+ ** buffer and buffer size to the lookaside-reader init function, zero
+ ** the phrase poslist buffer. The new poslist for the phrase (containing
+ ** the same entries as the original with some entries removed on account
+ ** of the NEAR constraint) is written over the original even as it is
+ ** being read. This is safe as the entries for the new poslist are a
+ ** subset of the old, so it is not possible for data yet to be read to
+ ** be overwritten. */
+ for(i=0; i<pNear->nPhrase; i++){
+ Fts5Buffer *pPoslist = &apPhrase[i]->poslist;
+ fts5LookaheadReaderInit(pPoslist->p, pPoslist->n, &a[i].reader);
+ pPoslist->n = 0;
+ a[i].pOut = pPoslist;
+ }
+
+ while( 1 ){
+ int iAdv;
+ i64 iMin;
+ i64 iMax;
+
+ /* This block advances the phrase iterators until they point to a set of
+ ** entries that together comprise a match. */
+ iMax = a[0].reader.iPos;
+ do {
+ bMatch = 1;
+ for(i=0; i<pNear->nPhrase; i++){
+ Fts5LookaheadReader *pPos = &a[i].reader;
+ iMin = iMax - pNear->apPhrase[i]->nTerm - pNear->nNear;
+ if( pPos->iPos<iMin || pPos->iPos>iMax ){
+ bMatch = 0;
+ while( pPos->iPos<iMin ){
+ if( fts5LookaheadReaderNext(pPos) ) goto ismatch_out;
+ }
+ if( pPos->iPos>iMax ) iMax = pPos->iPos;
+ }
+ }
+ }while( bMatch==0 );
+
+ /* Add an entry to each output position list */
+ for(i=0; i<pNear->nPhrase; i++){
+ i64 iPos = a[i].reader.iPos;
+ Fts5PoslistWriter *pWriter = &a[i].writer;
+ if( a[i].pOut->n==0 || iPos!=pWriter->iPrev ){
+ sqlite3Fts5PoslistWriterAppend(a[i].pOut, pWriter, iPos);
+ }
+ }
+
+ iAdv = 0;
+ iMin = a[0].reader.iLookahead;
+ for(i=0; i<pNear->nPhrase; i++){
+ if( a[i].reader.iLookahead < iMin ){
+ iMin = a[i].reader.iLookahead;
+ iAdv = i;
+ }
+ }
+ if( fts5LookaheadReaderNext(&a[iAdv].reader) ) goto ismatch_out;
+ }
+
+ ismatch_out:
+ *pbMatch = (a[0].pOut->n>0);
+ if( a!=aStatic ) sqlite3_free(a);
+ return rc;
+}
+
+/*
+** Advance each term iterator in each phrase in pNear. If any reach EOF,
+** set output variable *pbEof to true before returning.
+*/
+static int fts5ExprNearAdvanceAll(
+ Fts5Expr *pExpr, /* Expression pPhrase belongs to */
+ Fts5ExprNearset *pNear, /* Near object to advance iterators of */
+ int *pbEof /* OUT: Set to true if phrase at EOF */
+){
+ int i, j; /* Phrase and token index, respectively */
+
+ for(i=0; i<pNear->nPhrase; i++){
+ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
+ for(j=0; j<pPhrase->nTerm; j++){
+ Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter;
+ int rc = sqlite3Fts5IterNext(pIter);
+ if( rc || sqlite3Fts5IterEof(pIter) ){
+ *pbEof = 1;
+ return rc;
+ }
+ }
+ }
+
+ return SQLITE_OK;
+}
+
+/*
+** Advance iterator pIter until it points to a value equal to or laster
+** than the initial value of *piLast. If this means the iterator points
+** to a value laster than *piLast, update *piLast to the new lastest value.
+**
+** If the iterator reaches EOF, set *pbEof to true before returning. If
+** an error occurs, set *pRc to an error code. If either *pbEof or *pRc
+** are set, return a non-zero value. Otherwise, return zero.
+*/
+static int fts5ExprAdvanceto(
+ Fts5IndexIter *pIter, /* Iterator to advance */
+ int bDesc, /* True if iterator is "rowid DESC" */
+ i64 *piLast, /* IN/OUT: Lastest rowid seen so far */
+ int *pRc, /* OUT: Error code */
+ int *pbEof /* OUT: Set to true if EOF */
+){
+ i64 iLast = *piLast;
+ i64 iRowid;
+
+ iRowid = sqlite3Fts5IterRowid(pIter);
+ if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){
+ sqlite3Fts5IterNextFrom(pIter, iLast);
+ if( sqlite3Fts5IterEof(pIter) ){
+ *pbEof = 1;
+ return 1;
+ }
+ iRowid = sqlite3Fts5IterRowid(pIter);
+ assert( (bDesc==0 && iRowid>=iLast) || (bDesc==1 && iRowid<=iLast) );
+ }
+ *piLast = iRowid;
+
+ return 0;
+}
+
+/*
+** All individual term iterators in pNear are guaranteed to be valid when
+** this function is called. This function checks if all term iterators
+** point to the same rowid, and if not, advances them until they do.
+** If an EOF is reached before this happens, *pbEof is set to true before
+** returning.
+**
+** SQLITE_OK is returned if an error occurs, or an SQLite error code
+** otherwise. It is not considered an error code if an iterator reaches
+** EOF.
+*/
+static int fts5ExprNearNextRowidMatch(
+ Fts5Expr *pExpr, /* Expression pPhrase belongs to */
+ Fts5ExprNode *pNode,
+ int bFromValid,
+ i64 iFrom
+){
+ Fts5ExprNearset *pNear = pNode->pNear;
+ int rc = SQLITE_OK;
+ int i, j; /* Phrase and token index, respectively */
+ i64 iLast; /* Lastest rowid any iterator points to */
+ int bMatch; /* True if all terms are at the same rowid */
+
+ /* Initialize iLast, the "lastest" rowid any iterator points to. If the
+ ** iterator skips through rowids in the default ascending order, this means
+ ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it
+ ** means the minimum rowid. */
+ iLast = sqlite3Fts5IterRowid(pNear->apPhrase[0]->aTerm[0].pIter);
+ if( bFromValid && (iFrom>iLast)==(pExpr->bDesc==0) ){
+ assert( pExpr->bDesc || iFrom>=iLast );
+ iLast = iFrom;
+ }
+
+ do {
+ bMatch = 1;
+ for(i=0; i<pNear->nPhrase; i++){
+ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
+ for(j=0; j<pPhrase->nTerm; j++){
+ Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter;
+ i64 iRowid = sqlite3Fts5IterRowid(pIter);
+ if( iRowid!=iLast ) bMatch = 0;
+ if( fts5ExprAdvanceto(pIter, pExpr->bDesc, &iLast, &rc, &pNode->bEof) ){
+ return rc;
+ }
+ }
+ }
+ }while( bMatch==0 );
+
+ pNode->iRowid = iLast;
+ return rc;
+}
+
+/*
+** Argument pNode points to a NEAR node. All individual term iterators
+** point to valid entries (not EOF).
+*
+** This function tests if the term iterators currently all point to the
+** same rowid, and if so, if the row matches the phrase and NEAR constraints.
+** If so, the pPhrase->poslist buffers are populated and the pNode->iRowid
+** variable set before returning. Or, if the current combination of
+** iterators is not a match, they are advanced until they are. If one of
+** the iterators reaches EOF before a match is found, *pbEof is set to
+** true before returning. The final values of the pPhrase->poslist and
+** iRowid fields are undefined in this case.
+**
+** SQLITE_OK is returned if an error occurs, or an SQLite error code
+** otherwise. It is not considered an error code if an iterator reaches
+** EOF.
+*/
+static int fts5ExprNearNextMatch(
+ Fts5Expr *pExpr, /* Expression that pNear is a part of */
+ Fts5ExprNode *pNode, /* The "NEAR" node (FTS5_STRING) */
+ int bFromValid,
+ i64 iFrom
+){
+ int rc = SQLITE_OK;
+ Fts5ExprNearset *pNear = pNode->pNear;
+ while( 1 ){
+ int i;
+
+ /* Advance the iterators until they all point to the same rowid */
+ rc = fts5ExprNearNextRowidMatch(pExpr, pNode, bFromValid, iFrom);
+ if( pNode->bEof || rc!=SQLITE_OK ) break;
+
+ /* Check that each phrase in the nearset matches the current row.
+ ** Populate the pPhrase->poslist buffers at the same time. If any
+ ** phrase is not a match, break out of the loop early. */
+ for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
+ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
+ if( pPhrase->nTerm>1 || pNear->iCol>=0 ){
+ int bMatch = 0;
+ rc = fts5ExprPhraseIsMatch(pExpr, pNear->iCol, pPhrase, &bMatch);
+ if( bMatch==0 ) break;
+ }else{
+ int n;
+ const u8 *a;
+ rc = sqlite3Fts5IterPoslist(pPhrase->aTerm[0].pIter, &a, &n);
+ fts5BufferSet(&rc, &pPhrase->poslist, n, a);
+ }
+ }
+
+ if( rc==SQLITE_OK && i==pNear->nPhrase ){
+ int bMatch = 1;
+ if( pNear->nPhrase>1 ){
+ rc = fts5ExprNearIsMatch(pNear, &bMatch);
+ }
+ if( rc!=SQLITE_OK || bMatch ) break;
+ }
+
+ /* If control flows to here, then the current rowid is not a match.
+ ** Advance all term iterators in all phrases to the next rowid. */
+ if( rc==SQLITE_OK ){
+ rc = fts5ExprNearAdvanceAll(pExpr, pNear, &pNode->bEof);
+ }
+ if( pNode->bEof || rc!=SQLITE_OK ) break;
+ }
+
+ return rc;
+}
+
+/*
+** Initialize all term iterators in the pNear object. If any term is found
+** to match no documents at all, set *pbEof to true and return immediately,
+** without initializing any further iterators.
+*/
+static int fts5ExprNearInitAll(
+ Fts5Expr *pExpr,
+ Fts5ExprNode *pNode
+){
+ Fts5ExprNearset *pNear = pNode->pNear;
+ Fts5ExprTerm *pTerm;
+ Fts5ExprPhrase *pPhrase;
+ int i, j;
+ int rc = SQLITE_OK;
+
+ for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
+ pPhrase = pNear->apPhrase[i];
+ for(j=0; j<pPhrase->nTerm; j++){
+ pTerm = &pPhrase->aTerm[j];
+ rc = sqlite3Fts5IndexQuery(
+ pExpr->pIndex, pTerm->zTerm, strlen(pTerm->zTerm),
+ (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) |
+ (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0),
+ &pTerm->pIter
+ );
+ assert( rc==SQLITE_OK || pTerm->pIter==0 );
+ if( pTerm->pIter==0 || sqlite3Fts5IterEof(pTerm->pIter) ){
+ pNode->bEof = 1;
+ break;
+ }
+ }
+ }
+
+ return rc;
+}
+
+/* fts5ExprNodeNext() calls fts5ExprNodeNextMatch(). And vice-versa. */
+static int fts5ExprNodeNextMatch(Fts5Expr*, Fts5ExprNode*, int, i64);
+
+/*
+** Compare the values currently indicated by the two nodes as follows:
+**
+** res = (*p1) - (*p2)
+**
+** Nodes that point to values that come later in the iteration order are
+** considered to be larger. Nodes at EOF are the largest of all.
+**
+** This means that if the iteration order is ASC, then numerically larger
+** rowids are considered larger. Or if it is the default DESC, numerically
+** smaller rowids are larger.
+*/
+static int fts5NodeCompare(
+ Fts5Expr *pExpr,
+ Fts5ExprNode *p1,
+ Fts5ExprNode *p2
+){
+ if( p2->bEof ) return -1;
+ if( p1->bEof ) return +1;
+ if( pExpr->bDesc==0 ){
+ if( p1->iRowid<p2->iRowid ) return -1;
+ return (p1->iRowid > p2->iRowid);
+ }else{
+ if( p1->iRowid>p2->iRowid ) return -1;
+ return (p1->iRowid < p2->iRowid);
+ }
+}
+
+/*
+** Advance node iterator pNode, part of expression pExpr. If argument
+** bFromValid is zero, then pNode is advanced exactly once. Or, if argument
+** bFromValid is non-zero, then pNode is advanced until it is at or past
+** rowid value iFrom. Whether "past" means "less than" or "greater than"
+** depends on whether this is an ASC or DESC iterator.
+*/
+static int fts5ExprNodeNext(
+ Fts5Expr *pExpr,
+ Fts5ExprNode *pNode,
+ int bFromValid,
+ i64 iFrom
+){
+ int rc = SQLITE_OK;
+
+ if( pNode->bEof==0 ){
+ switch( pNode->eType ){
+ case FTS5_STRING: {
+ rc = fts5ExprNearAdvanceAll(pExpr, pNode->pNear, &pNode->bEof);
+ break;
+ };
+
+ case FTS5_AND: {
+ rc = fts5ExprNodeNext(pExpr, pNode->pLeft, bFromValid, iFrom);
+ if( rc==SQLITE_OK ){
+ /* todo: update (iFrom/bFromValid) here */
+ rc = fts5ExprNodeNext(pExpr, pNode->pRight, bFromValid, iFrom);
+ }
+ break;
+ }
+
+ case FTS5_OR: {
+ Fts5ExprNode *p1 = pNode->pLeft;
+ Fts5ExprNode *p2 = pNode->pRight;
+ int cmp = fts5NodeCompare(pExpr, p1, p2);
+
+ if( cmp==0 ){
+ rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom);
+ if( rc==SQLITE_OK ){
+ rc = fts5ExprNodeNext(pExpr, p2, bFromValid, iFrom);
+ }
+ }else{
+ rc = fts5ExprNodeNext(pExpr, (cmp < 0) ? p1 : p2, bFromValid, iFrom);
+ }
+
+ break;
+ }
+
+ default: assert( pNode->eType==FTS5_NOT ); {
+ rc = fts5ExprNodeNext(pExpr, pNode->pLeft, bFromValid, iFrom);
+ break;
+ }
+ }
+
+ if( rc==SQLITE_OK ){
+ rc = fts5ExprNodeNextMatch(pExpr, pNode, bFromValid, iFrom);
+ }
+ }
+
+ return rc;
+}
+
+static void fts5ExprSetEof(Fts5ExprNode *pNode){
+ if( pNode ){
+ pNode->bEof = 1;
+ fts5ExprSetEof(pNode->pLeft);
+ fts5ExprSetEof(pNode->pRight);
+ }
+}
+
+/*
+**
+*/
+static int fts5ExprNodeNextMatch(
+ Fts5Expr *pExpr,
+ Fts5ExprNode *pNode,
+ int bFromValid,
+ i64 iFrom
+){
+ int rc = SQLITE_OK;
+ if( pNode->bEof==0 ){
+ switch( pNode->eType ){
+
+ case FTS5_STRING: {
+ rc = fts5ExprNearNextMatch(pExpr, pNode, bFromValid, iFrom);
+ break;
+ }
+
+ case FTS5_AND: {
+ Fts5ExprNode *p1 = pNode->pLeft;
+ Fts5ExprNode *p2 = pNode->pRight;
+
+ while( p1->bEof==0 && p2->bEof==0 && p2->iRowid!=p1->iRowid ){
+ Fts5ExprNode *pAdv;
+ assert( pExpr->bDesc==0 || pExpr->bDesc==1 );
+ if( pExpr->bDesc==(p1->iRowid > p2->iRowid) ){
+ pAdv = p1;
+ if( bFromValid==0 || pExpr->bDesc==(p2->iRowid < iFrom) ){
+ iFrom = p2->iRowid;
+ }
+ }else{
+ pAdv = p2;
+ if( bFromValid==0 || pExpr->bDesc==(p1->iRowid < iFrom) ){
+ iFrom = p1->iRowid;
+ }
+ }
+ rc = fts5ExprNodeNext(pExpr, pAdv, 1, iFrom);
+ if( rc!=SQLITE_OK ) break;
+ }
+ if( p1->bEof || p2->bEof ){
+ fts5ExprSetEof(pNode);
+ }
+ pNode->iRowid = p1->iRowid;
+ break;
+ }
+
+ case FTS5_OR: {
+ Fts5ExprNode *p1 = pNode->pLeft;
+ Fts5ExprNode *p2 = pNode->pRight;
+ Fts5ExprNode *pNext = (fts5NodeCompare(pExpr, p1, p2) > 0 ? p2 : p1);
+ pNode->bEof = pNext->bEof;
+ pNode->iRowid = pNext->iRowid;
+ break;
+ }
+
+ default: assert( pNode->eType==FTS5_NOT ); {
+ Fts5ExprNode *p1 = pNode->pLeft;
+ Fts5ExprNode *p2 = pNode->pRight;
+ while( rc==SQLITE_OK ){
+ int cmp;
+ while( rc==SQLITE_OK && (cmp = fts5NodeCompare(pExpr, p1, p2))>0 ){
+ rc = fts5ExprNodeNext(pExpr, p2, bFromValid, iFrom);
+ }
+ if( rc || cmp ) break;
+ rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom);
+ }
+ pNode->bEof = p1->bEof;
+ pNode->iRowid = p1->iRowid;
+ break;
+ }
+ }
+ }
+ return rc;
+}
+
+
+/*
+** Set node pNode, which is part of expression pExpr, to point to the first
+** match. If there are no matches, set the Node.bEof flag to indicate EOF.
+**
+** Return an SQLite error code if an error occurs, or SQLITE_OK otherwise.
+** It is not an error if there are no matches.
+*/
+static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){
+ int rc = SQLITE_OK;
+ pNode->bEof = 0;
+
+ if( pNode->eType==FTS5_STRING ){
+
+ /* Initialize all term iterators in the NEAR object. */
+ rc = fts5ExprNearInitAll(pExpr, pNode);
+
+ /* Attempt to advance to the first match */
+ if( rc==SQLITE_OK && pNode->bEof==0 ){
+ rc = fts5ExprNearNextMatch(pExpr, pNode, 0, 0);
+ }
+
+ }else{
+ rc = fts5ExprNodeFirst(pExpr, pNode->pLeft);
+ if( rc==SQLITE_OK ){
+ rc = fts5ExprNodeFirst(pExpr, pNode->pRight);
+ }
+ if( rc==SQLITE_OK ){
+ rc = fts5ExprNodeNextMatch(pExpr, pNode, 0, 0);
+ }
+ }
+ return rc;
+}
+
+
+
+/*
+** Begin iterating through the set of documents in index pIdx matched by
+** the MATCH expression passed as the first argument. If the "bDesc" parameter
+** is passed a non-zero value, iteration is in descending rowid order. Or,
+** if it is zero, in ascending order.
+**
+** Return SQLITE_OK if successful, or an SQLite error code otherwise. It
+** is not considered an error if the query does not match any documents.
+*/
+int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, int bDesc){
+ int rc = SQLITE_OK;
+ if( p->pRoot ){
+ p->pIndex = pIdx;
+ p->bDesc = bDesc;
+ rc = fts5ExprNodeFirst(p, p->pRoot);
+ }
+ return rc;
+}
+
+/*
+** Move to the next document
+**
+** Return SQLITE_OK if successful, or an SQLite error code otherwise. It
+** is not considered an error if the query does not match any documents.
+*/
+int sqlite3Fts5ExprNext(Fts5Expr *p){
+ int rc;
+ rc = fts5ExprNodeNext(p, p->pRoot, 0, 0);
+ return rc;
+}
+
+int sqlite3Fts5ExprEof(Fts5Expr *p){
+ return (p->pRoot==0 || p->pRoot->bEof);
+}
+
+i64 sqlite3Fts5ExprRowid(Fts5Expr *p){
+ return p->pRoot->iRowid;
+}
+
+/*
+** Argument pIn points to a buffer of nIn bytes. This function allocates
+** and returns a new buffer populated with a copy of (pIn/nIn) with a
+** nul-terminator byte appended to it.
+**
+** It is the responsibility of the caller to eventually free the returned
+** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned.
+*/
+static char *fts5Strndup(int *pRc, const char *pIn, int nIn){
+ char *zRet = 0;
+ if( *pRc==SQLITE_OK ){
+ zRet = (char*)sqlite3_malloc(nIn+1);
+ if( zRet ){
+ memcpy(zRet, pIn, nIn);
+ zRet[nIn] = '\0';
+ }else{
+ *pRc = SQLITE_NOMEM;
+ }
+ }
+ return zRet;
+}
+
+static int fts5ParseStringFromToken(Fts5Token *pToken, char **pz){
+ int rc = SQLITE_OK;
+ *pz = fts5Strndup(&rc, pToken->p, pToken->n);
+ return rc;
+}
+
+/*
+** Free the phrase object passed as the only argument.
+*/
+static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){
+ if( pPhrase ){
+ int i;
+ for(i=0; i<pPhrase->nTerm; i++){
+ Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
+ sqlite3_free(pTerm->zTerm);
+ if( pTerm->pIter ){
+ sqlite3Fts5IterClose(pTerm->pIter);
+ }
+ }
+ fts5BufferFree(&pPhrase->poslist);
+ sqlite3_free(pPhrase);
+ }
+}
+
+/*
+** If argument pNear is NULL, then a new Fts5ExprNearset object is allocated
+** and populated with pPhrase. Or, if pNear is not NULL, phrase pPhrase is
+** appended to it and the results returned.
+**
+** If an OOM error occurs, both the pNear and pPhrase objects are freed and
+** NULL returned.
+*/
+Fts5ExprNearset *sqlite3Fts5ParseNearset(
+ Fts5Parse *pParse, /* Parse context */
+ Fts5ExprNearset *pNear, /* Existing nearset, or NULL */
+ Fts5ExprPhrase *pPhrase /* Recently parsed phrase */
+){
+ const int SZALLOC = 8;
+ Fts5ExprNearset *pRet = 0;
+
+ if( pParse->rc==SQLITE_OK ){
+ if( pPhrase==0 ){
+ return pNear;
+ }
+ if( pNear==0 ){
+ int nByte = sizeof(Fts5ExprNearset) + SZALLOC * sizeof(Fts5ExprPhrase*);
+ pRet = sqlite3_malloc(nByte);
+ if( pRet==0 ){
+ pParse->rc = SQLITE_NOMEM;
+ }else{
+ memset(pRet, 0, nByte);
+ pRet->iCol = -1;
+ }
+ }else if( (pNear->nPhrase % SZALLOC)==0 ){
+ int nNew = pRet->nPhrase + SZALLOC;
+ int nByte = sizeof(Fts5ExprNearset) + nNew * sizeof(Fts5ExprPhrase*);
+
+ pRet = (Fts5ExprNearset*)sqlite3_realloc(pNear, nByte);
+ if( pRet==0 ){
+ pParse->rc = SQLITE_NOMEM;
+ }
+ }else{
+ pRet = pNear;
+ }
+ }
+
+ if( pRet==0 ){
+ assert( pParse->rc!=SQLITE_OK );
+ sqlite3Fts5ParseNearsetFree(pNear);
+ sqlite3Fts5ParsePhraseFree(pPhrase);
+ }else{
+ pRet->apPhrase[pRet->nPhrase++] = pPhrase;
+ }
+ return pRet;
+}
+
+typedef struct TokenCtx TokenCtx;
+struct TokenCtx {
+ Fts5ExprPhrase *pPhrase;
+};
+
+/*
+** Callback for tokenizing terms used by ParseTerm().
+*/
+static int fts5ParseTokenize(
+ void *pContext, /* Pointer to Fts5InsertCtx object */
+ const char *pToken, /* Buffer containing token */
+ int nToken, /* Size of token in bytes */
+ int iStart, /* Start offset of token */
+ int iEnd /* End offset of token */
+){
+ int rc = SQLITE_OK;
+ const int SZALLOC = 8;
+ TokenCtx *pCtx = (TokenCtx*)pContext;
+ Fts5ExprPhrase *pPhrase = pCtx->pPhrase;
+ Fts5ExprTerm *pTerm;
+
+ if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
+ Fts5ExprPhrase *pNew;
+ int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);
+
+ pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase,
+ sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew
+ );
+ if( pNew==0 ) return SQLITE_NOMEM;
+ if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
+ pCtx->pPhrase = pPhrase = pNew;
+ pNew->nTerm = nNew - SZALLOC;
+ }
+
+ pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
+ memset(pTerm, 0, sizeof(Fts5ExprTerm));
+ pTerm->zTerm = fts5Strndup(&rc, pToken, nToken);
+
+ return rc;
+}
+
+
+/*
+** Free the phrase object passed as the only argument.
+*/
+void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase *pPhrase){
+ fts5ExprPhraseFree(pPhrase);
+}
+
+/*
+** Free the phrase object passed as the second argument.
+*/
+void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset *pNear){
+ if( pNear ){
+ int i;
+ for(i=0; i<pNear->nPhrase; i++){
+ fts5ExprPhraseFree(pNear->apPhrase[i]);
+ }
+ sqlite3_free(pNear);
+ }
+}
+
+void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p){
+ assert( pParse->pExpr==0 );
+ pParse->pExpr = p;
+}
+
+/*
+** This function is called by the parser to process a string token. The
+** string may or may not be quoted. In any case it is tokenized and a
+** phrase object consisting of all tokens returned.
+*/
+Fts5ExprPhrase *sqlite3Fts5ParseTerm(
+ Fts5Parse *pParse, /* Parse context */
+ Fts5ExprPhrase *pAppend, /* Phrase to append to */
+ Fts5Token *pToken, /* String to tokenize */
+ int bPrefix /* True if there is a trailing "*" */
+){
+ Fts5Config *pConfig = pParse->pConfig;
+ TokenCtx sCtx; /* Context object passed to callback */
+ int rc; /* Tokenize return code */
+ char *z = 0;
+
+ memset(&sCtx, 0, sizeof(TokenCtx));
+ sCtx.pPhrase = pAppend;
+
+ rc = fts5ParseStringFromToken(pToken, &z);
+ if( rc==SQLITE_OK ){
+ sqlite3Fts5Dequote(z);
+ rc = sqlite3Fts5Tokenize(pConfig, z, strlen(z), &sCtx, fts5ParseTokenize);
+ }
+ sqlite3_free(z);
+ if( rc ){
+ pParse->rc = rc;
+ fts5ExprPhraseFree(sCtx.pPhrase);
+ sCtx.pPhrase = 0;
+ }else if( sCtx.pPhrase ){
+
+ if( pAppend==0 ){
+ if( (pParse->nPhrase % 8)==0 ){
+ int nByte = sizeof(Fts5ExprPhrase*) * (pParse->nPhrase + 8);
+ Fts5ExprPhrase **apNew;
+ apNew = (Fts5ExprPhrase**)sqlite3_realloc(pParse->apPhrase, nByte);
+ if( apNew==0 ){
+ pParse->rc = SQLITE_NOMEM;
+ fts5ExprPhraseFree(sCtx.pPhrase);
+ return 0;
+ }
+ pParse->apPhrase = apNew;
+ }
+ pParse->nPhrase++;
+ }
+
+ pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase;
+ assert( sCtx.pPhrase->nTerm>0 );
+ sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = bPrefix;
+ }
+
+ return sCtx.pPhrase;
+}
+
+/*
+** Token pTok has appeared in a MATCH expression where the NEAR operator
+** is expected. If token pTok does not contain "NEAR", store an error
+** in the pParse object.
+*/
+void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){
+ if( pParse->rc==SQLITE_OK ){
+ if( pTok->n!=4 || memcmp("NEAR", pTok->p, 4) ){
+ sqlite3Fts5ParseError(
+ pParse, "fts5: syntax error near \"%.*s\"", pTok->n, pTok->p
+ );
+ }
+ }
+}
+
+void sqlite3Fts5ParseSetDistance(
+ Fts5Parse *pParse,
+ Fts5ExprNearset *pNear,
+ Fts5Token *p
+){
+ int nNear = 0;
+ int i;
+ if( p->n ){
+ for(i=0; i<p->n; i++){
+ char c = (char)p->p[i];
+ if( c<'0' || c>'9' ){
+ sqlite3Fts5ParseError(
+ pParse, "expected integer, got \"%.*s\"", p->n, p->p
+ );
+ return;
+ }
+ nNear = nNear * 10 + (p->p[i] - '0');
+ }
+ }else{
+ nNear = FTS5_DEFAULT_NEARDIST;
+ }
+ pNear->nNear = nNear;
+}
+
+void sqlite3Fts5ParseSetColumn(
+ Fts5Parse *pParse,
+ Fts5ExprNearset *pNear,
+ Fts5Token *p
+){
+ char *z = 0;
+ int rc = fts5ParseStringFromToken(p, &z);
+ if( rc==SQLITE_OK ){
+ Fts5Config *pConfig = pParse->pConfig;
+ int i;
+ for(i=0; i<pConfig->nCol; i++){
+ if( 0==sqlite3_stricmp(pConfig->azCol[i], z) ){
+ pNear->iCol = i;
+ break;
+ }
+ }
+ if( i==pConfig->nCol ){
+ sqlite3Fts5ParseError(pParse, "no such column: %s", z);
+ }
+ sqlite3_free(z);
+ }else{
+ pParse->rc = rc;
+ }
+}
+
+/*
+** Allocate and return a new expression object. If anything goes wrong (i.e.
+** OOM error), leave an error code in pParse and return NULL.
+*/
+Fts5ExprNode *sqlite3Fts5ParseNode(
+ Fts5Parse *pParse, /* Parse context */
+ int eType, /* FTS5_STRING, AND, OR or NOT */
+ Fts5ExprNode *pLeft, /* Left hand child expression */
+ Fts5ExprNode *pRight, /* Right hand child expression */
+ Fts5ExprNearset *pNear /* For STRING expressions, the near cluster */
+){
+ Fts5ExprNode *pRet = 0;
+
+ if( pParse->rc==SQLITE_OK ){
+ assert( (eType!=FTS5_STRING && !pNear)
+ || (eType==FTS5_STRING && !pLeft && !pRight)
+ );
+ if( eType==FTS5_STRING && pNear==0 ) return 0;
+ if( eType!=FTS5_STRING && pLeft==0 ) return pRight;
+ if( eType!=FTS5_STRING && pRight==0 ) return pLeft;
+ pRet = (Fts5ExprNode*)sqlite3_malloc(sizeof(Fts5ExprNode));
+ if( pRet==0 ){
+ pParse->rc = SQLITE_NOMEM;
+ }else{
+ memset(pRet, 0, sizeof(*pRet));
+ pRet->eType = eType;
+ pRet->pLeft = pLeft;
+ pRet->pRight = pRight;
+ pRet->pNear = pNear;
+ if( eType==FTS5_STRING ){
+ int iPhrase;
+ for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){
+ pNear->apPhrase[iPhrase]->pNode = pRet;
+ }
+ }
+ }
+ }
+
+ if( pRet==0 ){
+ assert( pParse->rc!=SQLITE_OK );
+ sqlite3Fts5ParseNodeFree(pLeft);
+ sqlite3Fts5ParseNodeFree(pRight);
+ sqlite3Fts5ParseNearsetFree(pNear);
+ }
+ return pRet;
+}
+
+static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){
+ char *zQuoted = sqlite3_malloc(strlen(pTerm->zTerm) * 2 + 3 + 2);
+ if( zQuoted ){
+ int i = 0;
+ char *zIn = pTerm->zTerm;
+ zQuoted[i++] = '"';
+ while( *zIn ){
+ if( *zIn=='"' ) zQuoted[i++] = '"';
+ zQuoted[i++] = *zIn++;
+ }
+ zQuoted[i++] = '"';
+ if( pTerm->bPrefix ){
+ zQuoted[i++] = ' ';
+ zQuoted[i++] = '*';
+ }
+ zQuoted[i++] = '\0';
+ }
+ return zQuoted;
+}
+
+static char *fts5PrintfAppend(char *zApp, const char *zFmt, ...){
+ char *zNew;
+ va_list ap;
+ va_start(ap, zFmt);
+ zNew = sqlite3_vmprintf(zFmt, ap);
+ va_end(ap);
+ if( zApp ){
+ char *zNew2 = sqlite3_mprintf("%s%s", zApp, zNew);
+ sqlite3_free(zNew);
+ zNew = zNew2;
+ }
+ sqlite3_free(zApp);
+ return zNew;
+}
+
+/*
+** Compose a tcl-readable representation of expression pExpr. Return a
+** pointer to a buffer containing that representation. It is the
+** responsibility of the caller to at some point free the buffer using
+** sqlite3_free().
+*/
+static char *fts5ExprPrintTcl(
+ Fts5Config *pConfig,
+ const char *zNearsetCmd,
+ Fts5ExprNode *pExpr
+){
+ char *zRet = 0;
+ if( pExpr->eType==FTS5_STRING ){
+ Fts5ExprNearset *pNear = pExpr->pNear;
+ int i;
+ int iTerm;
+
+ zRet = fts5PrintfAppend(zRet, "[%s ", zNearsetCmd);
+ if( pNear->iCol>=0 ){
+ zRet = fts5PrintfAppend(zRet, "-col %d ", pNear->iCol);
+ if( zRet==0 ) return 0;
+ }
+
+ if( pNear->nPhrase>1 ){
+ zRet = fts5PrintfAppend(zRet, "-near %d ", pNear->nNear);
+ if( zRet==0 ) return 0;
+ }
+
+ zRet = fts5PrintfAppend(zRet, "--");
+ if( zRet==0 ) return 0;
+
+ for(i=0; i<pNear->nPhrase; i++){
+ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
+
+ zRet = fts5PrintfAppend(zRet, " {");
+ for(iTerm=0; zRet && iTerm<pPhrase->nTerm; iTerm++){
+ char *zTerm = pPhrase->aTerm[iTerm].zTerm;
+ zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" ", zTerm);
+ }
+
+ if( zRet ) zRet = fts5PrintfAppend(zRet, "}");
+ if( zRet==0 ) return 0;
+ }
+
+ if( zRet ) zRet = fts5PrintfAppend(zRet, "]");
+ if( zRet==0 ) return 0;
+
+ }else{
+ char *zOp = 0;
+ char *z1 = 0;
+ char *z2 = 0;
+ switch( pExpr->eType ){
+ case FTS5_AND: zOp = "&&"; break;
+ case FTS5_NOT: zOp = "&& !"; break;
+ case FTS5_OR: zOp = "||"; break;
+ default: assert( 0 );
+ }
+
+ z1 = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pLeft);
+ z2 = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRight);
+ if( z1 && z2 ){
+ int b1 = pExpr->pLeft->eType!=FTS5_STRING;
+ int b2 = pExpr->pRight->eType!=FTS5_STRING;
+ zRet = sqlite3_mprintf("%s%s%s %s %s%s%s",
+ b1 ? "(" : "", z1, b1 ? ")" : "",
+ zOp,
+ b2 ? "(" : "", z2, b2 ? ")" : ""
+ );
+ }
+ sqlite3_free(z1);
+ sqlite3_free(z2);
+ }
+
+ return zRet;
+}
+
+static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){
+ char *zRet = 0;
+ if( pExpr->eType==FTS5_STRING ){
+ Fts5ExprNearset *pNear = pExpr->pNear;
+ int i;
+ int iTerm;
+
+ if( pNear->iCol>=0 ){
+ zRet = fts5PrintfAppend(zRet, "%s : ", pConfig->azCol[pNear->iCol]);
+ if( zRet==0 ) return 0;
+ }
+
+ if( pNear->nPhrase>1 ){
+ zRet = fts5PrintfAppend(zRet, "NEAR(");
+ if( zRet==0 ) return 0;
+ }
+
+ for(i=0; i<pNear->nPhrase; i++){
+ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
+ if( i!=0 ){
+ zRet = fts5PrintfAppend(zRet, " ");
+ if( zRet==0 ) return 0;
+ }
+ for(iTerm=0; iTerm<pPhrase->nTerm; iTerm++){
+ char *zTerm = fts5ExprTermPrint(&pPhrase->aTerm[iTerm]);
+ if( zTerm ){
+ zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" + ", zTerm);
+ sqlite3_free(zTerm);
+ }
+ if( zTerm==0 || zRet==0 ){
+ sqlite3_free(zRet);
+ return 0;
+ }
+ }
+ }
+
+ if( pNear->nPhrase>1 ){
+ zRet = fts5PrintfAppend(zRet, ", %d)", pNear->nNear);
+ if( zRet==0 ) return 0;
+ }
+
+ }else{
+ char *zOp = 0;
+ char *z1 = 0;
+ char *z2 = 0;
+ switch( pExpr->eType ){
+ case FTS5_AND: zOp = "AND"; break;
+ case FTS5_NOT: zOp = "NOT"; break;
+ case FTS5_OR: zOp = "OR"; break;
+ default: assert( 0 );
+ }
+
+ z1 = fts5ExprPrint(pConfig, pExpr->pLeft);
+ z2 = fts5ExprPrint(pConfig, pExpr->pRight);
+ if( z1 && z2 ){
+ int b1 = pExpr->pLeft->eType!=FTS5_STRING;
+ int b2 = pExpr->pRight->eType!=FTS5_STRING;
+ zRet = sqlite3_mprintf("%s%s%s %s %s%s%s",
+ b1 ? "(" : "", z1, b1 ? ")" : "",
+ zOp,
+ b2 ? "(" : "", z2, b2 ? ")" : ""
+ );
+ }
+ sqlite3_free(z1);
+ sqlite3_free(z2);
+ }
+
+ return zRet;
+}
+
+/*
+** The implementation of user-defined scalar functions fts5_expr() (bTcl==0)
+** and fts5_expr_tcl() (bTcl!=0).
+*/
+static void fts5ExprFunction(
+ sqlite3_context *pCtx, /* Function call context */
+ int nArg, /* Number of args */
+ sqlite3_value **apVal, /* Function arguments */
+ int bTcl
+){
+ Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_data(pCtx);
+ sqlite3 *db = sqlite3_context_db_handle(pCtx);
+ const char *zExpr = 0;
+ char *zErr = 0;
+ Fts5Expr *pExpr = 0;
+ int rc;
+ int i;
+
+ const char **azConfig; /* Array of arguments for Fts5Config */
+ const char *zNearsetCmd = "nearset";
+ int nConfig; /* Size of azConfig[] */
+ Fts5Config *pConfig = 0;
+
+ if( bTcl && nArg>1 ){
+ zNearsetCmd = (const char*)sqlite3_value_text(apVal[1]);
+ }
+
+ nConfig = nArg + 2 - bTcl;
+ azConfig = (const char**)sqlite3_malloc(sizeof(char*) * nConfig);
+ if( azConfig==0 ){
+ sqlite3_result_error_nomem(pCtx);
+ return;
+ }
+ azConfig[0] = 0;
+ azConfig[1] = "main";
+ azConfig[2] = "tbl";
+ for(i=1+bTcl; i<nArg; i++){
+ azConfig[i+2-bTcl] = (const char*)sqlite3_value_text(apVal[i]);
+ }
+ zExpr = (const char*)sqlite3_value_text(apVal[0]);
+
+ rc = sqlite3Fts5ConfigParse(pGlobal, db, nConfig, azConfig, &pConfig, &zErr);
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5ExprNew(pConfig, zExpr, &pExpr, &zErr);
+ }
+ if( rc==SQLITE_OK ){
+ char *zText;
+ if( pExpr->pRoot==0 ){
+ zText = sqlite3_mprintf("");
+ }else if( bTcl ){
+ zText = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRoot);
+ }else{
+ zText = fts5ExprPrint(pConfig, pExpr->pRoot);
+ }
+ if( rc==SQLITE_OK ){
+ sqlite3_result_text(pCtx, zText, -1, SQLITE_TRANSIENT);
+ sqlite3_free(zText);
+ }
+ }
+
+ if( rc!=SQLITE_OK ){
+ if( zErr ){
+ sqlite3_result_error(pCtx, zErr, -1);
+ sqlite3_free(zErr);
+ }else{
+ sqlite3_result_error_code(pCtx, rc);
+ }
+ }
+ sqlite3_free(azConfig);
+ sqlite3Fts5ConfigFree(pConfig);
+ sqlite3Fts5ExprFree(pExpr);
+}
+
+static void fts5ExprFunctionHr(
+ sqlite3_context *pCtx, /* Function call context */
+ int nArg, /* Number of args */
+ sqlite3_value **apVal /* Function arguments */
+){
+ fts5ExprFunction(pCtx, nArg, apVal, 0);
+}
+static void fts5ExprFunctionTcl(
+ sqlite3_context *pCtx, /* Function call context */
+ int nArg, /* Number of args */
+ sqlite3_value **apVal /* Function arguments */
+){
+ fts5ExprFunction(pCtx, nArg, apVal, 1);
+}
+
+/*
+** This is called during initialization to register the fts5_expr() scalar
+** UDF with the SQLite handle passed as the only argument.
+*/
+int sqlite3Fts5ExprInit(Fts5Global *pGlobal, sqlite3 *db){
+ struct Fts5ExprFunc {
+ const char *z;
+ void (*x)(sqlite3_context*,int,sqlite3_value**);
+ } aFunc[] = {
+ { "fts5_expr", fts5ExprFunctionHr },
+ { "fts5_expr_tcl", fts5ExprFunctionTcl },
+ };
+ int i;
+ int rc = SQLITE_OK;
+ void *pCtx = (void*)pGlobal;
+
+ for(i=0; rc==SQLITE_OK && i<(sizeof(aFunc) / sizeof(aFunc[0])); i++){
+ struct Fts5ExprFunc *p = &aFunc[i];
+ rc = sqlite3_create_function(db, p->z, -1, SQLITE_UTF8, pCtx, p->x, 0, 0);
+ }
+
+ return rc;
+}
+
+/*
+** Return the number of phrases in expression pExpr.
+*/
+int sqlite3Fts5ExprPhraseCount(Fts5Expr *pExpr){
+ return pExpr->nPhrase;
+}
+
+/*
+** Return the number of terms in the iPhrase'th phrase in pExpr.
+*/
+int sqlite3Fts5ExprPhraseSize(Fts5Expr *pExpr, int iPhrase){
+ if( iPhrase<0 || iPhrase>=pExpr->nPhrase ) return 0;
+ return pExpr->apExprPhrase[iPhrase]->nTerm;
+}
+
+/*
+** This function is used to access the current position list for phrase
+** iPhrase.
+*/
+int sqlite3Fts5ExprPoslist(Fts5Expr *pExpr, int iPhrase, const u8 **pa){
+ if( iPhrase>=0 && iPhrase<pExpr->nPhrase ){
+ Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase];
+ Fts5ExprNode *pNode = pPhrase->pNode;
+ if( pNode->bEof==0 && pNode->iRowid==pExpr->pRoot->iRowid ){
+ *pa = pPhrase->poslist.p;
+ return pPhrase->poslist.n;
+ }
+ }
+ *pa = 0;
+ return 0;
+}
+
+#endif /* SQLITE_ENABLE_FTS5 */