diff options
-rw-r--r-- | ext/fts3/fts3.c | 1065 | ||||
-rw-r--r-- | ext/fts3/fts3Int.h | 55 | ||||
-rw-r--r-- | ext/fts3/fts3_expr.c | 6 | ||||
-rw-r--r-- | ext/fts3/fts3_snippet.c | 7 | ||||
-rw-r--r-- | ext/fts3/fts3_write.c | 421 | ||||
-rw-r--r-- | ext/fts3/fts3speed.tcl | 122 | ||||
-rw-r--r-- | manifest | 38 | ||||
-rw-r--r-- | manifest.uuid | 2 | ||||
-rw-r--r-- | src/btree.c | 3 | ||||
-rw-r--r-- | src/sqlite.h.in | 5 | ||||
-rw-r--r-- | src/test1.c | 46 | ||||
-rw-r--r-- | src/vdbeblob.c | 174 | ||||
-rw-r--r-- | test/fts3ah.test | 37 | ||||
-rw-r--r-- | test/fts3cov.test | 11 | ||||
-rw-r--r-- | test/fts3defer.test | 383 | ||||
-rw-r--r-- | test/malloc_common.tcl | 8 | ||||
-rw-r--r-- | test/permutations.test | 2 |
17 files changed, 1922 insertions, 463 deletions
diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c index 5323a5c1a..c3bbb0829 100644 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@ -441,16 +441,13 @@ static int fts3DisconnectMethod(sqlite3_vtab *pVtab){ int i; assert( p->nPendingData==0 ); + assert( p->pSegments==0 ); /* Free any prepared statements held */ for(i=0; i<SizeofArray(p->aStmt); i++){ sqlite3_finalize(p->aStmt[i]); } - for(i=0; i<p->nLeavesStmt; i++){ - sqlite3_finalize(p->aLeavesStmt[i]); - } - sqlite3_free(p->zSelectLeaves); - sqlite3_free(p->aLeavesStmt); + sqlite3_free(p->zSegmentsTbl); /* Invoke the tokenizer destructor to free the tokenizer. */ p->pTokenizer->pModule->xDestroy(p->pTokenizer); @@ -461,7 +458,7 @@ static int fts3DisconnectMethod(sqlite3_vtab *pVtab){ /* ** Construct one or more SQL statements from the format string given -** and then evaluate those statements. The success code is writting +** and then evaluate those statements. The success code is written ** into *pRc. ** ** If *pRc is initially non-zero then this routine is a no-op. @@ -513,33 +510,38 @@ static int fts3DestroyMethod(sqlite3_vtab *pVtab){ ** Invoke sqlite3_declare_vtab() to declare the schema for the FTS3 table ** passed as the first argument. This is done as part of the xConnect() ** and xCreate() methods. +** +** If *pRc is non-zero when this function is called, it is a no-op. +** Otherwise, if an error occurs, an SQLite error code is stored in *pRc +** before returning. */ -static int fts3DeclareVtab(Fts3Table *p){ - int i; /* Iterator variable */ - int rc; /* Return code */ - char *zSql; /* SQL statement passed to declare_vtab() */ - char *zCols; /* List of user defined columns */ - - /* Create a list of user columns for the virtual table */ - zCols = sqlite3_mprintf("%Q, ", p->azColumn[0]); - for(i=1; zCols && i<p->nColumn; i++){ - zCols = sqlite3_mprintf("%z%Q, ", zCols, p->azColumn[i]); - } +static void fts3DeclareVtab(int *pRc, Fts3Table *p){ + if( *pRc==SQLITE_OK ){ + int i; /* Iterator variable */ + int rc; /* Return code */ + char *zSql; /* SQL statement passed to declare_vtab() */ + char *zCols; /* List of user defined columns */ + + /* Create a list of user columns for the virtual table */ + zCols = sqlite3_mprintf("%Q, ", p->azColumn[0]); + for(i=1; zCols && i<p->nColumn; i++){ + zCols = sqlite3_mprintf("%z%Q, ", zCols, p->azColumn[i]); + } - /* Create the whole "CREATE TABLE" statement to pass to SQLite */ - zSql = sqlite3_mprintf( - "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN)", zCols, p->zName - ); + /* Create the whole "CREATE TABLE" statement to pass to SQLite */ + zSql = sqlite3_mprintf( + "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN)", zCols, p->zName + ); + if( !zCols || !zSql ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_declare_vtab(p->db, zSql); + } - if( !zCols || !zSql ){ - rc = SQLITE_NOMEM; - }else{ - rc = sqlite3_declare_vtab(p->db, zSql); + sqlite3_free(zSql); + sqlite3_free(zCols); + *pRc = rc; } - - sqlite3_free(zSql); - sqlite3_free(zCols); - return rc; } /* @@ -640,6 +642,37 @@ static void fts3TableExists( } /* +** Store the current database page-size in bytes in p->nPgsz. +** +** If *pRc is non-zero when this function is called, it is a no-op. +** Otherwise, if an error occurs, an SQLite error code is stored in *pRc +** before returning. +*/ +static void fts3DatabasePageSize(int *pRc, Fts3Table *p){ + if( *pRc==SQLITE_OK ){ + int rc; /* Return code */ + char *zSql; /* SQL text "PRAGMA %Q.page_size" */ + sqlite3_stmt *pStmt; /* Compiled "PRAGMA %Q.page_size" statement */ + + zSql = sqlite3_mprintf("PRAGMA %Q.page_size", p->zDb); + if( !zSql ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare(p->db, zSql, -1, &pStmt, 0); + if( rc==SQLITE_OK ){ + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + p->nPgsz = sqlite3_column_int(pStmt, 0); + } + rc = sqlite3_finalize(pStmt); + } + } + assert( p->nPgsz>0 || rc!=SQLITE_OK ); + sqlite3_free(zSql); + *pRc = rc; + } +} + +/* ** This function is the implementation of both the xConnect and xCreate ** methods of the FTS3 virtual table. ** @@ -763,12 +796,15 @@ static int fts3InitVtab( fts3TableExists(&rc, db, argv[1], argv[2], "_content", &p->bHasContent); fts3TableExists(&rc, db, argv[1], argv[2], "_docsize", &p->bHasDocsize); } - if( rc!=SQLITE_OK ) goto fts3_init_out; - rc = fts3DeclareVtab(p); - if( rc!=SQLITE_OK ) goto fts3_init_out; + /* Figure out the page-size for the database. This is required in order to + ** estimate the cost of loading large doclists from the database (see + ** function sqlite3Fts3SegReaderCost() for details). + */ + fts3DatabasePageSize(&rc, p); - *ppVTab = &p->base; + /* Declare the table schema to SQLite. */ + fts3DeclareVtab(&rc, p); fts3_init_out: assert( p || (pTokenizer && rc!=SQLITE_OK) ); @@ -778,6 +814,8 @@ fts3_init_out: }else{ pTokenizer->pModule->xDestroy(pTokenizer); } + }else{ + *ppVTab = &p->base; } return rc; } @@ -889,10 +927,12 @@ static int fts3OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ ** Close the cursor. For additional information see the documentation ** on the xClose method of the virtual table interface. */ -static int fulltextClose(sqlite3_vtab_cursor *pCursor){ +static int fts3CloseMethod(sqlite3_vtab_cursor *pCursor){ Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; + assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); sqlite3_finalize(pCsr->pStmt); sqlite3Fts3ExprFree(pCsr->pExpr); + sqlite3Fts3FreeDeferredTokens(pCsr); sqlite3_free(pCsr->aDoclist); sqlite3_free(pCsr->aMatchinfo); sqlite3_free(pCsr); @@ -930,34 +970,83 @@ static int fts3CursorSeek(sqlite3_context *pContext, Fts3Cursor *pCsr){ } } -/* -** Advance the cursor to the next row in the %_content table that -** matches the search criteria. For a MATCH search, this will be -** the next row that matches. For a full-table scan, this will be -** simply the next row in the %_content table. For a docid lookup, -** this routine simply sets the EOF flag. -** -** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned -** even if we reach end-of-file. The fts3EofMethod() will be called -** subsequently to determine whether or not an EOF was hit. -*/ -static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){ + +static int fts3ScanInteriorNode( + Fts3Table *p, /* Virtual table handle */ + const char *zTerm, /* Term to select leaves for */ + int nTerm, /* Size of term zTerm in bytes */ + const char *zNode, /* Buffer containing segment interior node */ + int nNode, /* Size of buffer at zNode */ + sqlite3_int64 *piFirst, /* OUT: Selected child node */ + sqlite3_int64 *piLast /* OUT: Selected child node */ +){ int rc = SQLITE_OK; /* Return code */ - Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; + const char *zCsr = zNode; /* Cursor to iterate through node */ + const char *zEnd = &zCsr[nNode];/* End of interior node buffer */ + char *zBuffer = 0; /* Buffer to load terms into */ + int nAlloc = 0; /* Size of allocated buffer */ - if( pCsr->aDoclist==0 ){ - if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){ - pCsr->isEof = 1; - rc = sqlite3_reset(pCsr->pStmt); + int isFirstTerm = 1; /* True when processing first term on page */ + int dummy; + sqlite3_int64 iChild; /* Block id of child node to descend to */ + int nBlock; /* Size of child node in bytes */ + + zCsr += sqlite3Fts3GetVarint32(zCsr, &dummy); + zCsr += sqlite3Fts3GetVarint(zCsr, &iChild); + + while( zCsr<zEnd && (piFirst || piLast) ){ + int cmp; /* memcmp() result */ + int nSuffix; /* Size of term suffix */ + int nPrefix = 0; /* Size of term prefix */ + int nBuffer; /* Total term size */ + + /* Load the next term on the node into zBuffer */ + if( !isFirstTerm ){ + zCsr += sqlite3Fts3GetVarint32(zCsr, &nPrefix); } - }else if( pCsr->pNextId>=&pCsr->aDoclist[pCsr->nDoclist] ){ - pCsr->isEof = 1; - }else{ - sqlite3_reset(pCsr->pStmt); - fts3GetDeltaVarint(&pCsr->pNextId, &pCsr->iPrevId); - pCsr->isRequireSeek = 1; - pCsr->isMatchinfoNeeded = 1; - } + isFirstTerm = 0; + zCsr += sqlite3Fts3GetVarint32(zCsr, &nSuffix); + if( nPrefix+nSuffix>nAlloc ){ + char *zNew; + nAlloc = (nPrefix+nSuffix) * 2; + zNew = (char *)sqlite3_realloc(zBuffer, nAlloc); + if( !zNew ){ + sqlite3_free(zBuffer); + return SQLITE_NOMEM; + } + zBuffer = zNew; + } + memcpy(&zBuffer[nPrefix], zCsr, nSuffix); + nBuffer = nPrefix + nSuffix; + zCsr += nSuffix; + + /* Compare the term we are searching for with the term just loaded from + ** the interior node. If the specified term is greater than or equal + ** to the term from the interior node, then all terms on the sub-tree + ** headed by node iChild are smaller than zTerm. No need to search + ** iChild. + ** + ** If the interior node term is larger than the specified term, then + ** the tree headed by iChild may contain the specified term. + */ + cmp = memcmp(zTerm, zBuffer, (nBuffer>nTerm ? nTerm : nBuffer)); + if( piFirst && (cmp<0 || (cmp==0 && nBuffer>nTerm)) ){ + *piFirst = iChild; + piFirst = 0; + } + + if( piLast && cmp<0 ){ + *piLast = iChild; + piLast = 0; + } + + iChild++; + }; + + if( piFirst ) *piFirst = iChild; + if( piLast ) *piLast = iChild; + + sqlite3_free(zBuffer); return rc; } @@ -984,77 +1073,33 @@ static int fts3SelectLeaf( int nTerm, /* Size of term zTerm in bytes */ const char *zNode, /* Buffer containing segment interior node */ int nNode, /* Size of buffer at zNode */ - sqlite3_int64 *piLeaf /* Selected leaf node */ + sqlite3_int64 *piLeaf, /* Selected leaf node */ + sqlite3_int64 *piLeaf2 /* Selected leaf node */ ){ - int rc = SQLITE_OK; /* Return code */ - const char *zCsr = zNode; /* Cursor to iterate through node */ - const char *zEnd = &zCsr[nNode];/* End of interior node buffer */ - char *zBuffer = 0; /* Buffer to load terms into */ - int nAlloc = 0; /* Size of allocated buffer */ - - while( 1 ){ - int isFirstTerm = 1; /* True when processing first term on page */ - int iHeight; /* Height of this node in tree */ - sqlite3_int64 iChild; /* Block id of child node to descend to */ - int nBlock; /* Size of child node in bytes */ + int rc; /* Return code */ + int iHeight; /* Height of this node in tree */ - zCsr += sqlite3Fts3GetVarint32(zCsr, &iHeight); - zCsr += sqlite3Fts3GetVarint(zCsr, &iChild); - - while( zCsr<zEnd ){ - int cmp; /* memcmp() result */ - int nSuffix; /* Size of term suffix */ - int nPrefix = 0; /* Size of term prefix */ - int nBuffer; /* Total term size */ + sqlite3Fts3GetVarint32(zNode, &iHeight); + rc = fts3ScanInteriorNode(p, zTerm, nTerm, zNode, nNode, piLeaf, piLeaf2); - /* Load the next term on the node into zBuffer */ - if( !isFirstTerm ){ - zCsr += sqlite3Fts3GetVarint32(zCsr, &nPrefix); - } - isFirstTerm = 0; - zCsr += sqlite3Fts3GetVarint32(zCsr, &nSuffix); - if( nPrefix+nSuffix>nAlloc ){ - char *zNew; - nAlloc = (nPrefix+nSuffix) * 2; - zNew = (char *)sqlite3_realloc(zBuffer, nAlloc); - if( !zNew ){ - sqlite3_free(zBuffer); - return SQLITE_NOMEM; - } - zBuffer = zNew; - } - memcpy(&zBuffer[nPrefix], zCsr, nSuffix); - nBuffer = nPrefix + nSuffix; - zCsr += nSuffix; - - /* Compare the term we are searching for with the term just loaded from - ** the interior node. If the specified term is greater than or equal - ** to the term from the interior node, then all terms on the sub-tree - ** headed by node iChild are smaller than zTerm. No need to search - ** iChild. - ** - ** If the interior node term is larger than the specified term, then - ** the tree headed by iChild may contain the specified term. - */ - cmp = memcmp(zTerm, zBuffer, (nBuffer>nTerm ? nTerm : nBuffer)); - if( cmp<0 || (cmp==0 && nBuffer>nTerm) ) break; - iChild++; - }; + if( rc==SQLITE_OK && iHeight>1 ){ + const char *zBlob; + int nBlob; - /* If (iHeight==1), the children of this interior node are leaves. The - ** specified term may be present on leaf node iChild. - */ - if( iHeight==1 ){ - *piLeaf = iChild; - break; + if( piLeaf && piLeaf2 && (*piLeaf!=*piLeaf2) ){ + rc = sqlite3Fts3ReadBlock(p, *piLeaf, &zBlob, &nBlob); + if( rc==SQLITE_OK ){ + rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, 0); + } + piLeaf = 0; } - /* Descend to interior node iChild. */ - rc = sqlite3Fts3ReadBlock(p, iChild, &zCsr, &nBlock); - if( rc!=SQLITE_OK ) break; - zEnd = &zCsr[nBlock]; + rc = sqlite3Fts3ReadBlock(p, piLeaf ? *piLeaf : *piLeaf2, &zBlob, &nBlob); + if( rc==SQLITE_OK ){ + rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, piLeaf2); + } } - sqlite3_free(zBuffer); + return rc; } @@ -1286,20 +1331,44 @@ static void fts3PoslistMerge( /* ** nToken==1 searches for adjacent positions. +** +** This function is used to merge two position lists into one. When it is +** called, *pp1 and *pp2 must both point to position lists. A position-list is +** the part of a doclist that follows each document id. For example, if a row +** contains: +** +** 'a b c'|'x y z'|'a b b a' +** +** Then the position list for this row for token 'b' would consist of: +** +** 0x02 0x01 0x02 0x03 0x03 0x00 +** +** When this function returns, both *pp1 and *pp2 are left pointing to the +** byte following the 0x00 terminator of their respective position lists. +** +** If isSaveLeft is 0, an entry is added to the output position list for +** each position in *pp2 for which there exists one or more positions in +** *pp1 so that (pos(*pp2)>pos(*pp1) && pos(*pp2)-pos(*pp1)<=nToken). i.e. +** when the *pp1 token appears before the *pp2 token, but not more than nToken +** slots before it. */ static int fts3PoslistPhraseMerge( - char **pp, /* Output buffer */ + char **pp, /* IN/OUT: Preallocated output buffer */ int nToken, /* Maximum difference in token positions */ int isSaveLeft, /* Save the left position */ - char **pp1, /* Left input list */ - char **pp2 /* Right input list */ + int isExact, /* If *pp1 is exactly nTokens before *pp2 */ + char **pp1, /* IN/OUT: Left input list */ + char **pp2 /* IN/OUT: Right input list */ ){ char *p = (pp ? *pp : 0); char *p1 = *pp1; char *p2 = *pp2; - int iCol1 = 0; int iCol2 = 0; + + /* Never set both isSaveLeft and isExact for the same invocation. */ + assert( isSaveLeft==0 || isExact==0 ); + assert( *p1!=0 && *p2!=0 ); if( *p1==POS_COLUMN ){ p1++; @@ -1328,7 +1397,9 @@ static int fts3PoslistPhraseMerge( fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2; while( 1 ){ - if( iPos2>iPos1 && iPos2<=iPos1+nToken ){ + if( iPos2==iPos1+nToken + || (isExact==0 && iPos2>iPos1 && iPos2<=iPos1+nToken) + ){ sqlite3_int64 iSave; if( !pp ){ fts3PoslistCopy(0, &p2); @@ -1411,21 +1482,21 @@ static int fts3PoslistNearMerge( char *p2 = *pp2; if( !pp ){ - if( fts3PoslistPhraseMerge(0, nRight, 0, pp1, pp2) ) return 1; + if( fts3PoslistPhraseMerge(0, nRight, 0, 0, pp1, pp2) ) return 1; *pp1 = p1; *pp2 = p2; - return fts3PoslistPhraseMerge(0, nLeft, 0, pp2, pp1); + return fts3PoslistPhraseMerge(0, nLeft, 0, 0, pp2, pp1); }else{ char *pTmp1 = aTmp; char *pTmp2; char *aTmp2; int res = 1; - fts3PoslistPhraseMerge(&pTmp1, nRight, 0, pp1, pp2); + fts3PoslistPhraseMerge(&pTmp1, nRight, 0, 0, pp1, pp2); aTmp2 = pTmp2 = pTmp1; *pp1 = p1; *pp2 = p2; - fts3PoslistPhraseMerge(&pTmp2, nLeft, 1, pp2, pp1); + fts3PoslistPhraseMerge(&pTmp2, nLeft, 1, 0, pp2, pp1); if( pTmp1!=aTmp && pTmp2!=aTmp2 ){ fts3PoslistMerge(pp, &aTmp, &aTmp2); }else if( pTmp1!=aTmp ){ @@ -1471,7 +1542,8 @@ static int fts3DoclistMerge( char *a1, /* Buffer containing first doclist */ int n1, /* Size of buffer a1 */ char *a2, /* Buffer containing second doclist */ - int n2 /* Size of buffer a2 */ + int n2, /* Size of buffer a2 */ + int *pnDoc /* OUT: Number of docids in output */ ){ sqlite3_int64 i1 = 0; sqlite3_int64 i2 = 0; @@ -1482,6 +1554,7 @@ static int fts3DoclistMerge( char *p2 = a2; char *pEnd1 = &a1[n1]; char *pEnd2 = &a2[n2]; + int nDoc = 0; assert( mergetype==MERGE_OR || mergetype==MERGE_POS_OR || mergetype==MERGE_AND || mergetype==MERGE_NOT @@ -1525,6 +1598,7 @@ static int fts3DoclistMerge( fts3PutDeltaVarint(&p, &iPrev, i1); fts3GetDeltaVarint2(&p1, pEnd1, &i1); fts3GetDeltaVarint2(&p2, pEnd2, &i2); + nDoc++; }else if( i1<i2 ){ fts3GetDeltaVarint2(&p1, pEnd1, &i1); }else{ @@ -1555,9 +1629,11 @@ static int fts3DoclistMerge( char *pSave = p; sqlite3_int64 iPrevSave = iPrev; fts3PutDeltaVarint(&p, &iPrev, i1); - if( 0==fts3PoslistPhraseMerge(ppPos, 1, 0, &p1, &p2) ){ + if( 0==fts3PoslistPhraseMerge(ppPos, nParam1, 0, 1, &p1, &p2) ){ p = pSave; iPrev = iPrevSave; + }else{ + nDoc++; } fts3GetDeltaVarint2(&p1, pEnd1, &i1); fts3GetDeltaVarint2(&p2, pEnd2, &i2); @@ -1610,6 +1686,7 @@ static int fts3DoclistMerge( } } + if( pnDoc ) *pnDoc = nDoc; *pnBuffer = (int)(p-aBuffer); return SQLITE_OK; } @@ -1657,7 +1734,7 @@ static int fts3TermSelectMerge(TermSelect *pTS){ return SQLITE_NOMEM; } fts3DoclistMerge(mergetype, 0, 0, - aNew, &nNew, pTS->aaOutput[i], pTS->anOutput[i], aOut, nOut + aNew, &nNew, pTS->aaOutput[i], pTS->anOutput[i], aOut, nOut, 0 ); sqlite3_free(pTS->aaOutput[i]); sqlite3_free(aOut); @@ -1728,8 +1805,8 @@ static int fts3TermSelectCb( } return SQLITE_NOMEM; } - fts3DoclistMerge(mergetype, 0, 0, - aNew, &nNew, pTS->aaOutput[iOut], pTS->anOutput[iOut], aMerge, nMerge + fts3DoclistMerge(mergetype, 0, 0, aNew, &nNew, + pTS->aaOutput[iOut], pTS->anOutput[iOut], aMerge, nMerge, 0 ); if( iOut>0 ) sqlite3_free(aMerge); @@ -1747,43 +1824,106 @@ static int fts3TermSelectCb( return SQLITE_OK; } +static int fts3DeferredTermSelect( + Fts3DeferredToken *pToken, /* Phrase token */ + int isTermPos, /* True to include positions */ + int *pnOut, /* OUT: Size of list */ + char **ppOut /* OUT: Body of list */ +){ + char *aSource; + int nSource; + + aSource = sqlite3Fts3DeferredDoclist(pToken, &nSource); + if( !aSource ){ + *pnOut = 0; + *ppOut = 0; + }else if( isTermPos ){ + *ppOut = sqlite3_malloc(nSource); + if( !*ppOut ) return SQLITE_NOMEM; + memcpy(*ppOut, aSource, nSource); + *pnOut = nSource; + }else{ + sqlite3_int64 docid; + *pnOut = sqlite3Fts3GetVarint(aSource, &docid); + *ppOut = sqlite3_malloc(*pnOut); + if( !*ppOut ) return SQLITE_NOMEM; + sqlite3Fts3PutVarint(*ppOut, docid); + } + + return SQLITE_OK; +} + /* -** This function retreives the doclist for the specified term (or term -** prefix) from the database. -** -** The returned doclist may be in one of two formats, depending on the -** value of parameter isReqPos. If isReqPos is zero, then the doclist is -** a sorted list of delta-compressed docids (a bare doclist). If isReqPos -** is non-zero, then the returned list is in the same format as is stored -** in the database without the found length specifier at the start of on-disk -** doclists. +** An Fts3SegReaderArray is used to store an array of Fts3SegReader objects. +** Elements are added to the array using fts3SegReaderArrayAdd(). */ -static int fts3TermSelect( - Fts3Table *p, /* Virtual table handle */ - int iColumn, /* Column to query (or -ve for all columns) */ +struct Fts3SegReaderArray { + int nSegment; /* Number of valid entries in apSegment[] */ + int nAlloc; /* Allocated size of apSegment[] */ + int nCost; /* The cost of executing SegReaderIterate() */ + Fts3SegReader *apSegment[1]; /* Array of seg-reader objects */ +}; + + +/* +** Free an Fts3SegReaderArray object. Also free all seg-readers in the +** array (using sqlite3Fts3SegReaderFree()). +*/ +static void fts3SegReaderArrayFree(Fts3SegReaderArray *pArray){ + if( pArray ){ + int i; + for(i=0; i<pArray->nSegment; i++){ + sqlite3Fts3SegReaderFree(0, pArray->apSegment[i]); + } + sqlite3_free(pArray); + } +} + +static int fts3SegReaderArrayAdd( + Fts3SegReaderArray **ppArray, + Fts3SegReader *pNew +){ + Fts3SegReaderArray *pArray = *ppArray; + + if( !pArray || pArray->nAlloc==pArray->nSegment ){ + int nNew = (pArray ? pArray->nAlloc+16 : 16); + pArray = (Fts3SegReaderArray *)sqlite3_realloc(pArray, + sizeof(Fts3SegReaderArray) + (nNew-1) * sizeof(Fts3SegReader*) + ); + if( !pArray ){ + sqlite3Fts3SegReaderFree(0, pNew); + return SQLITE_NOMEM; + } + if( nNew==16 ){ + pArray->nSegment = 0; + pArray->nCost = 0; + } + pArray->nAlloc = nNew; + *ppArray = pArray; + } + + pArray->apSegment[pArray->nSegment++] = pNew; + return SQLITE_OK; +} + +static int fts3TermSegReaderArray( + Fts3Cursor *pCsr, /* Virtual table cursor handle */ const char *zTerm, /* Term to query for */ int nTerm, /* Size of zTerm in bytes */ int isPrefix, /* True for a prefix search */ - int isReqPos, /* True to include position lists in output */ - int *pnOut, /* OUT: Size of buffer at *ppOut */ - char **ppOut /* OUT: Malloced result buffer */ + Fts3SegReaderArray **ppArray /* OUT: Allocated seg-reader array */ ){ - int i; - TermSelect tsc; - Fts3SegFilter filter; /* Segment term filter configuration */ - Fts3SegReader **apSegment; /* Array of segments to read data from */ - int nSegment = 0; /* Size of apSegment array */ - int nAlloc = 16; /* Allocated size of segment array */ + Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; int rc; /* Return code */ + Fts3SegReaderArray *pArray = 0; /* Array object to build */ + Fts3SegReader *pReader = 0; /* Seg-reader to add to pArray */ sqlite3_stmt *pStmt = 0; /* SQL statement to scan %_segdir table */ int iAge = 0; /* Used to assign ages to segments */ - apSegment = (Fts3SegReader **)sqlite3_malloc(sizeof(Fts3SegReader*)*nAlloc); - if( !apSegment ) return SQLITE_NOMEM; - rc = sqlite3Fts3SegReaderPending(p, zTerm, nTerm, isPrefix, &apSegment[0]); - if( rc!=SQLITE_OK ) goto finished; - if( apSegment[0] ){ - nSegment = 1; + /* Allocate a seg-reader to scan the pending terms, if any. */ + rc = sqlite3Fts3SegReaderPending(p, zTerm, nTerm, isPrefix, &pReader); + if( rc==SQLITE_OK && pReader ) { + rc = fts3SegReaderArrayAdd(&pArray, pReader); } /* Loop through the entire %_segdir table. For each segment, create a @@ -1791,12 +1931,10 @@ static int fts3TermSelect( ** that may contain a term that matches zTerm/nTerm. For non-prefix ** searches, this is always a single leaf. For prefix searches, this ** may be a contiguous block of leaves. - ** - ** The code in this loop does not actually load any leaves into memory - ** (unless the root node happens to be a leaf). It simply examines the - ** b-tree structure to determine which leaves need to be inspected. */ - rc = sqlite3Fts3AllSegdirs(p, &pStmt); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3AllSegdirs(p, &pStmt); + } while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){ Fts3SegReader *pNew = 0; int nRoot = sqlite3_column_bytes(pStmt, 4); @@ -1809,10 +1947,11 @@ static int fts3TermSelect( rc = sqlite3Fts3SegReaderNew(p, iAge, 0, 0, 0, zRoot, nRoot, &pNew); }else{ int rc2; /* Return value of sqlite3Fts3ReadBlock() */ - sqlite3_int64 i1; /* Blockid of leaf that may contain zTerm */ - rc = fts3SelectLeaf(p, zTerm, nTerm, zRoot, nRoot, &i1); + sqlite3_int64 i1; /* First leaf that may contain zTerm */ + sqlite3_int64 i2; /* Last leaf that may contain zTerm */ + rc = fts3SelectLeaf(p, zTerm, nTerm, zRoot, nRoot, &i1, (isPrefix?&i2:0)); + if( isPrefix==0 ) i2 = i1; if( rc==SQLITE_OK ){ - sqlite3_int64 i2 = sqlite3_column_int64(pStmt, 2); rc = sqlite3Fts3SegReaderNew(p, iAge, i1, i2, 0, 0, 0, &pNew); } @@ -1829,43 +1968,69 @@ static int fts3TermSelect( } iAge++; - /* If a new Fts3SegReader was allocated, add it to the apSegment array. */ + /* If a new Fts3SegReader was allocated, add it to the array. */ assert( pNew!=0 || rc!=SQLITE_OK ); - if( pNew ){ - if( nSegment==nAlloc ){ - Fts3SegReader **pArray; - nAlloc += 16; - pArray = (Fts3SegReader **)sqlite3_realloc( - apSegment, nAlloc*sizeof(Fts3SegReader *) - ); - if( !pArray ){ - sqlite3Fts3SegReaderFree(p, pNew); - rc = SQLITE_NOMEM; - goto finished; - } - apSegment = pArray; - } - apSegment[nSegment++] = pNew; + if( rc==SQLITE_OK ){ + rc = fts3SegReaderArrayAdd(&pArray, pNew); + }else{ + sqlite3Fts3SegReaderFree(p, pNew); } + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3SegReaderCost(pCsr, pNew, &pArray->nCost); + } + } + + if( rc==SQLITE_DONE ){ + rc = sqlite3_reset(pStmt); + }else{ + sqlite3_reset(pStmt); } - if( rc!=SQLITE_DONE ){ - assert( rc!=SQLITE_OK ); - goto finished; + if( rc!=SQLITE_OK ){ + fts3SegReaderArrayFree(pArray); + pArray = 0; } + *ppArray = pArray; + return rc; +} +/* +** This function retreives the doclist for the specified term (or term +** prefix) from the database. +** +** The returned doclist may be in one of two formats, depending on the +** value of parameter isReqPos. If isReqPos is zero, then the doclist is +** a sorted list of delta-compressed docids (a bare doclist). If isReqPos +** is non-zero, then the returned list is in the same format as is stored +** in the database without the found length specifier at the start of on-disk +** doclists. +*/ +static int fts3TermSelect( + Fts3Table *p, /* Virtual table handle */ + Fts3PhraseToken *pTok, /* Token to query for */ + int iColumn, /* Column to query (or -ve for all columns) */ + int isReqPos, /* True to include position lists in output */ + int *pnOut, /* OUT: Size of buffer at *ppOut */ + char **ppOut /* OUT: Malloced result buffer */ +){ + int rc; /* Return code */ + Fts3SegReaderArray *pArray; /* Seg-reader array for this term */ + TermSelect tsc; /* Context object for fts3TermSelectCb() */ + Fts3SegFilter filter; /* Segment term filter configuration */ + + pArray = pTok->pArray; memset(&tsc, 0, sizeof(TermSelect)); tsc.isReqPos = isReqPos; filter.flags = FTS3_SEGMENT_IGNORE_EMPTY - | (isPrefix ? FTS3_SEGMENT_PREFIX : 0) + | (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0) | (isReqPos ? FTS3_SEGMENT_REQUIRE_POS : 0) | (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0); filter.iCol = iColumn; - filter.zTerm = zTerm; - filter.nTerm = nTerm; + filter.zTerm = pTok->z; + filter.nTerm = pTok->n; - rc = sqlite3Fts3SegReaderIterate(p, apSegment, nSegment, &filter, - fts3TermSelectCb, (void *)&tsc + rc = sqlite3Fts3SegReaderIterate(p, pArray->apSegment, pArray->nSegment, + &filter, fts3TermSelectCb, (void *)&tsc ); if( rc==SQLITE_OK ){ rc = fts3TermSelectMerge(&tsc); @@ -1875,26 +2040,104 @@ static int fts3TermSelect( *ppOut = tsc.aaOutput[0]; *pnOut = tsc.anOutput[0]; }else{ + int i; for(i=0; i<SizeofArray(tsc.aaOutput); i++){ sqlite3_free(tsc.aaOutput[i]); } } -finished: - sqlite3_reset(pStmt); - for(i=0; i<nSegment; i++){ - sqlite3Fts3SegReaderFree(p, apSegment[i]); + fts3SegReaderArrayFree(pArray); + pTok->pArray = 0; + return rc; +} + +/* +** This function counts the total number of docids in the doclist stored +** in buffer aList[], size nList bytes. +** +** If the isPoslist argument is true, then it is assumed that the doclist +** contains a position-list following each docid. Otherwise, it is assumed +** that the doclist is simply a list of docids stored as delta encoded +** varints. +*/ +static int fts3DoclistCountDocids(int isPoslist, char *aList, int nList){ + int nDoc = 0; /* Return value */ + if( aList ){ + char *aEnd = &aList[nList]; /* Pointer to one byte after EOF */ + char *p = aList; /* Cursor */ + if( !isPoslist ){ + /* The number of docids in the list is the same as the number of + ** varints. In FTS3 a varint consists of a single byte with the 0x80 + ** bit cleared and zero or more bytes with the 0x80 bit set. So to + ** count the varints in the buffer, just count the number of bytes + ** with the 0x80 bit clear. */ + while( p<aEnd ) nDoc += (((*p++)&0x80)==0); + }else{ + while( p<aEnd ){ + nDoc++; + while( (*p++)&0x80 ); /* Skip docid varint */ + fts3PoslistCopy(0, &p); /* Skip over position list */ + } + } + } + + return nDoc; +} + +/* +** Call sqlite3Fts3DeferToken() for each token in the expression pExpr. +*/ +static int fts3DeferExpression(Fts3Cursor *pCsr, Fts3Expr *pExpr){ + int rc = SQLITE_OK; + if( pExpr ){ + rc = fts3DeferExpression(pCsr, pExpr->pLeft); + if( rc==SQLITE_OK ){ + rc = fts3DeferExpression(pCsr, pExpr->pRight); + } + if( pExpr->eType==FTSQUERY_PHRASE ){ + int iCol = pExpr->pPhrase->iColumn; + int i; + pExpr->bDeferred = 1; + for(i=0; rc==SQLITE_OK && i<pExpr->pPhrase->nToken; i++){ + Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i]; + if( pToken->pDeferred==0 ){ + rc = sqlite3Fts3DeferToken(pCsr, pToken, iCol); + } + } + } } - sqlite3_free(apSegment); return rc; } +static void fts3DoclistStripPositions(char *aList, int *pnList){ + if( aList ){ + char *aEnd = &aList[*pnList]; /* Pointer to one byte after EOF */ + char *p = aList; /* Input cursor */ + char *pOut = aList; /* Output cursor */ + sqlite3_int64 iPrev = 0; + + while( p<aEnd ){ + sqlite3_int64 delta; + p += sqlite3Fts3GetVarint(p, &delta); + fts3PoslistCopy(0, &p); + pOut += sqlite3Fts3PutVarint(pOut, delta); + } + + *pnList = (pOut - aList); + } +} /* ** Return a DocList corresponding to the phrase *pPhrase. +** +** If this function returns SQLITE_OK, but *pnOut is set to a negative value, +** then no tokens in the phrase were looked up in the full-text index. This +** is only possible when this function is called from within xFilter(). The +** caller should assume that all documents match the phrase. The actual +** filtering will take place in xNext(). */ static int fts3PhraseSelect( - Fts3Table *p, /* Virtual table handle */ + Fts3Cursor *pCsr, /* Virtual table cursor handle */ Fts3Phrase *pPhrase, /* Phrase to return a doclist for */ int isReqPos, /* True if output should contain positions */ char **paOut, /* OUT: Pointer to malloc'd result buffer */ @@ -1906,42 +2149,123 @@ static int fts3PhraseSelect( int ii; int iCol = pPhrase->iColumn; int isTermPos = (pPhrase->nToken>1 || isReqPos); + Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; + + int iPrevTok = 0; + int nDoc = 0; + + /* If this is an xFilter() evaluation, create a segment-reader for each + ** phrase token. Or, if this is an xNest() or snippet/offsets/matchinfo + ** evaluation, only create segment-readers if there are no Fts3DeferredToken + ** objects attached to the phrase-tokens. + */ + for(ii=0; ii<pPhrase->nToken; ii++){ + Fts3PhraseToken *pTok = &pPhrase->aToken[ii]; + if( pTok->pArray==0 && (pCsr->doDeferred==0 || pTok->pDeferred==0) ){ + rc = fts3TermSegReaderArray( + pCsr, pTok->z, pTok->n, pTok->isPrefix, &pTok->pArray + ); + if( rc!=SQLITE_OK ) return rc; + } + } for(ii=0; ii<pPhrase->nToken; ii++){ - struct PhraseToken *pTok = &pPhrase->aToken[ii]; - char *z = pTok->z; /* Next token of the phrase */ - int n = pTok->n; /* Size of z in bytes */ - int isPrefix = pTok->isPrefix;/* True if token is a prefix */ + Fts3PhraseToken *pTok; /* Token to find doclist for */ + int iTok; /* The token being queried this iteration */ char *pList; /* Pointer to token doclist */ int nList; /* Size of buffer at pList */ - rc = fts3TermSelect(p, iCol, z, n, isPrefix, isTermPos, &nList, &pList); + /* Select a token to process. If this is an xFilter() call, then tokens + ** are processed in order from least to most costly. Otherwise, tokens + ** are processed in the order in which they occur in the phrase. + */ + if( pCsr->doDeferred || isReqPos ){ + iTok = ii; + pTok = &pPhrase->aToken[iTok]; + }else{ + int nMinCost = 0x7FFFFFFF; + int jj; + + /* Find the remaining token with the lowest cost. */ + for(jj=0; jj<pPhrase->nToken; jj++){ + Fts3SegReaderArray *pArray = pPhrase->aToken[jj].pArray; + if( pArray && pArray->nCost<nMinCost ){ + iTok = jj; + nMinCost = pArray->nCost; + } + } + pTok = &pPhrase->aToken[iTok]; + + /* This branch is taken if it is determined that loading the doclist + ** for the next token would require more IO than loading all documents + ** currently identified by doclist pOut/nOut. No further doclists will + ** be loaded from the full-text index for this phrase. + */ + if( nMinCost>nDoc && ii>0 ){ + rc = fts3DeferExpression(pCsr, pCsr->pExpr); + break; + } + } + + if( pCsr->doDeferred && pTok->pDeferred ){ + rc = fts3DeferredTermSelect(pTok->pDeferred, isTermPos, &nList, &pList); + }else{ + assert( pTok->pArray ); + rc = fts3TermSelect(p, pTok, iCol, isTermPos, &nList, &pList); + } + assert( rc!=SQLITE_OK || pCsr->doDeferred || pTok->pArray==0 ); if( rc!=SQLITE_OK ) break; if( ii==0 ){ pOut = pList; nOut = nList; + if( pCsr->doDeferred==0 && pPhrase->nToken>1 ){ + nDoc = fts3DoclistCountDocids(1, pOut, nOut); + } }else{ - /* Merge the new term list and the current output. If this is the - ** last term in the phrase, and positions are not required in the - ** output of this function, the positions can be dropped as part - ** of this merge. Either way, the result of this merge will be - ** smaller than nList bytes. The code in fts3DoclistMerge() is written - ** so that it is safe to use pList as the output as well as an input - ** in this case. + /* Merge the new term list and the current output. */ + char *aLeft, *aRight; + int nLeft, nRight; + int nDist; + int mt; + + /* If this is the final token of the phrase, and positions were not + ** requested by the caller, use MERGE_PHRASE instead of POS_PHRASE. + ** This drops the position information from the output list. */ - int mergetype = MERGE_POS_PHRASE; - if( ii==pPhrase->nToken-1 && !isReqPos ){ - mergetype = MERGE_PHRASE; + mt = MERGE_POS_PHRASE; + if( ii==pPhrase->nToken-1 && !isReqPos ) mt = MERGE_PHRASE; + + assert( iPrevTok!=iTok ); + if( iPrevTok<iTok ){ + aLeft = pOut; + nLeft = nOut; + aRight = pList; + nRight = nList; + nDist = iTok-iPrevTok; + }else{ + aRight = pOut; + nRight = nOut; + aLeft = pList; + nLeft = nList; + nDist = iPrevTok-iTok; } - fts3DoclistMerge(mergetype, 0, 0, pList, &nOut, pOut, nOut, pList, nList); - sqlite3_free(pOut); - pOut = pList; + pOut = aRight; + fts3DoclistMerge( + mt, nDist, 0, pOut, &nOut, aLeft, nLeft, aRight, nRight, &nDoc + ); + sqlite3_free(aLeft); } assert( nOut==0 || pOut!=0 ); + + iPrevTok = iTok; } if( rc==SQLITE_OK ){ + if( ii!=pPhrase->nToken ){ + assert( pCsr->doDeferred==0 && isReqPos==0 ); + fts3DoclistStripPositions(pOut, &nOut); + } *paOut = pOut; *pnOut = nOut; }else{ @@ -1972,7 +2296,7 @@ static int fts3NearMerge( rc = SQLITE_NOMEM; }else{ rc = fts3DoclistMerge(mergetype, nNear+nTokenRight, nNear+nTokenLeft, - aOut, pnOut, aLeft, nLeft, aRight, nRight + aOut, pnOut, aLeft, nLeft, aRight, nRight, 0 ); if( rc!=SQLITE_OK ){ sqlite3_free(aOut); @@ -2018,14 +2342,99 @@ int sqlite3Fts3ExprNearTrim(Fts3Expr *pLeft, Fts3Expr *pRight, int nNear){ return rc; } +typedef struct ExprAndCost ExprAndCost; +struct ExprAndCost { + Fts3Expr *pExpr; + int nCost; +}; + +int fts3ExprCost(Fts3Expr *pExpr){ + int nCost; /* Return value */ + if( pExpr->eType==FTSQUERY_PHRASE ){ + Fts3Phrase *pPhrase = pExpr->pPhrase; + int ii; + nCost = 0; + for(ii=0; ii<pPhrase->nToken; ii++){ + nCost += pPhrase->aToken[ii].pArray->nCost; + } + }else{ + nCost = fts3ExprCost(pExpr->pLeft) + fts3ExprCost(pExpr->pRight); + } + return nCost; +} + +static void fts3ExprAssignCosts( + Fts3Expr *pExpr, /* Expression to create seg-readers for */ + ExprAndCost **ppExprCost /* OUT: Write to *ppExprCost */ +){ + if( pExpr->eType==FTSQUERY_AND ){ + fts3ExprAssignCosts(pExpr->pLeft, ppExprCost); + fts3ExprAssignCosts(pExpr->pRight, ppExprCost); + }else{ + (*ppExprCost)->pExpr = pExpr; + (*ppExprCost)->nCost = fts3ExprCost(pExpr);; + (*ppExprCost)++; + } +} + +static int fts3ExprAllocateSegReaders( + Fts3Cursor *pCsr, /* FTS3 table */ + Fts3Expr *pExpr, /* Expression to create seg-readers for */ + int *pnExpr /* OUT: Number of AND'd expressions */ +){ + int rc = SQLITE_OK; /* Return code */ + + if( pCsr->doDeferred ) return SQLITE_OK; + if( pnExpr && pExpr->eType!=FTSQUERY_AND ){ + (*pnExpr)++; + pnExpr = 0; + } + + if( pExpr->eType==FTSQUERY_PHRASE ){ + Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; + Fts3Phrase *pPhrase = pExpr->pPhrase; + int ii; + + for(ii=0; rc==SQLITE_OK && ii<pPhrase->nToken; ii++){ + Fts3PhraseToken *pTok = &pPhrase->aToken[ii]; + if( pTok->pArray==0 ){ + rc = fts3TermSegReaderArray( + pCsr, pTok->z, pTok->n, pTok->isPrefix, &pTok->pArray + ); + } + } + }else{ + rc = fts3ExprAllocateSegReaders(pCsr, pExpr->pLeft, pnExpr); + if( rc==SQLITE_OK ){ + rc = fts3ExprAllocateSegReaders(pCsr, pExpr->pRight, pnExpr); + } + } + return rc; +} + +static void fts3ExprFreeSegReaders(Fts3Expr *pExpr){ + if( pExpr ){ + Fts3Phrase *pPhrase = pExpr->pPhrase; + if( pPhrase ){ + int kk; + for(kk=0; kk<pPhrase->nToken; kk++){ + fts3SegReaderArrayFree(pPhrase->aToken[kk].pArray); + pPhrase->aToken[kk].pArray = 0; + } + } + fts3ExprFreeSegReaders(pExpr->pLeft); + fts3ExprFreeSegReaders(pExpr->pRight); + } +} + /* ** Evaluate the full-text expression pExpr against fts3 table pTab. Store ** the resulting doclist in *paOut and *pnOut. This routine mallocs for ** the space needed to store the output. The caller is responsible for ** freeing the space when it has finished. */ -static int evalFts3Expr( - Fts3Table *p, /* Virtual table handle */ +static int fts3EvalExpr( + Fts3Cursor *p, /* Virtual table cursor handle */ Fts3Expr *pExpr, /* Parsed fts3 expression */ char **paOut, /* OUT: Pointer to malloc'd result buffer */ int *pnOut, /* OUT: Size of buffer at *paOut */ @@ -2038,27 +2447,96 @@ static int evalFts3Expr( *pnOut = 0; if( pExpr ){ + assert( pExpr->eType==FTSQUERY_NEAR || pExpr->eType==FTSQUERY_OR + || pExpr->eType==FTSQUERY_AND || pExpr->eType==FTSQUERY_NOT + || pExpr->eType==FTSQUERY_PHRASE + ); assert( pExpr->eType==FTSQUERY_PHRASE || pExpr->eType==FTSQUERY_NEAR || isReqPos==0 ); + if( pExpr->eType==FTSQUERY_PHRASE ){ - rc = fts3PhraseSelect(p, pExpr->pPhrase, + rc = fts3PhraseSelect(p, pExpr->pPhrase, isReqPos || (pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR), paOut, pnOut ); + fts3ExprFreeSegReaders(pExpr); + }else if( p->doDeferred==0 && pExpr->eType==FTSQUERY_AND ){ + ExprAndCost *aExpr = 0; /* Array of AND'd expressions and costs */ + int nExpr = 0; /* Size of aExpr[] */ + char *aRet = 0; /* Doclist to return to caller */ + int nRet = 0; /* Length of aRet[] in bytes */ + int nDoc = 0x7FFFFFFF; + + assert( !isReqPos ); + + rc = fts3ExprAllocateSegReaders(p, pExpr, &nExpr); + if( rc==SQLITE_OK ){ + aExpr = sqlite3_malloc(sizeof(ExprAndCost) * nExpr); + if( !aExpr ) rc = SQLITE_NOMEM; + } + if( rc==SQLITE_OK ){ + int ii; /* Used to iterate through expressions */ + + fts3ExprAssignCosts(pExpr, &aExpr); + aExpr -= nExpr; + for(ii=0; ii<nExpr; ii++){ + char *aNew; + int nNew; + int jj; + ExprAndCost *pBest = 0; + + for(jj=0; jj<nExpr; jj++){ + ExprAndCost *pCand = &aExpr[jj]; + if( pCand->pExpr && (pBest==0 || pCand->nCost<pBest->nCost) ){ + pBest = pCand; + } + } + + if( pBest->nCost>nDoc ){ + rc = fts3DeferExpression(p, p->pExpr); + break; + }else{ + rc = fts3EvalExpr(p, pBest->pExpr, &aNew, &nNew, 0); + if( rc!=SQLITE_OK ) break; + pBest->pExpr = 0; + if( ii==0 ){ + aRet = aNew; + nRet = nNew; + if( nExpr>1 ){ + nDoc = fts3DoclistCountDocids(0, aRet, nRet); + } + }else{ + fts3DoclistMerge( + MERGE_AND, 0, 0, aRet, &nRet, aRet, nRet, aNew, nNew, &nDoc + ); + sqlite3_free(aNew); + } + } + } + } + + *paOut = aRet; + *pnOut = nRet; + sqlite3_free(aExpr); + fts3ExprFreeSegReaders(pExpr); + }else{ char *aLeft; char *aRight; int nLeft; int nRight; - if( 0==(rc = evalFts3Expr(p, pExpr->pRight, &aRight, &nRight, isReqPos)) - && 0==(rc = evalFts3Expr(p, pExpr->pLeft, &aLeft, &nLeft, isReqPos)) + assert( pExpr->eType==FTSQUERY_NEAR + || pExpr->eType==FTSQUERY_OR + || pExpr->eType==FTSQUERY_NOT + || (pExpr->eType==FTSQUERY_AND && p->doDeferred) + ); + + if( 0==(rc = fts3EvalExpr(p, pExpr->pRight, &aRight, &nRight, isReqPos)) + && 0==(rc = fts3EvalExpr(p, pExpr->pLeft, &aLeft, &nLeft, isReqPos)) ){ - assert( pExpr->eType==FTSQUERY_NEAR || pExpr->eType==FTSQUERY_OR - || pExpr->eType==FTSQUERY_AND || pExpr->eType==FTSQUERY_NOT - ); switch( pExpr->eType ){ case FTSQUERY_NEAR: { Fts3Expr *pLeft; @@ -2093,7 +2571,7 @@ static int evalFts3Expr( */ char *aBuffer = sqlite3_malloc(nRight+nLeft+1); rc = fts3DoclistMerge(MERGE_OR, 0, 0, aBuffer, pnOut, - aLeft, nLeft, aRight, nRight + aLeft, nLeft, aRight, nRight, 0 ); *paOut = aBuffer; sqlite3_free(aLeft); @@ -2103,7 +2581,7 @@ static int evalFts3Expr( default: { assert( FTSQUERY_NOT==MERGE_NOT && FTSQUERY_AND==MERGE_AND ); fts3DoclistMerge(pExpr->eType, 0, 0, aLeft, pnOut, - aLeft, nLeft, aRight, nRight + aLeft, nLeft, aRight, nRight, 0 ); *paOut = aLeft; break; @@ -2118,6 +2596,72 @@ static int evalFts3Expr( } /* +** +*/ +static int fts3EvalDeferred(Fts3Cursor *pCsr, int *pbRes){ + int rc = SQLITE_OK; + if( pCsr->pDeferred==0 ){ + *pbRes = 1; + }else{ + rc = fts3CursorSeek(0, pCsr); + if( rc==SQLITE_OK ){ + sqlite3Fts3FreeDeferredDoclists(pCsr); + rc = sqlite3Fts3CacheDeferredDoclists(pCsr); + } + if( rc==SQLITE_OK ){ + char *a = 0; + int n = 0; + pCsr->doDeferred = 1; + rc = fts3EvalExpr(pCsr, pCsr->pExpr, &a, &n, 0); + pCsr->doDeferred = 0; + assert( n>=0 ); + *pbRes = (n>0); + sqlite3_free(a); + } + } + return rc; +} + +/* +** Advance the cursor to the next row in the %_content table that +** matches the search criteria. For a MATCH search, this will be +** the next row that matches. For a full-table scan, this will be +** simply the next row in the %_content table. For a docid lookup, +** this routine simply sets the EOF flag. +** +** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned +** even if we reach end-of-file. The fts3EofMethod() will be called +** subsequently to determine whether or not an EOF was hit. +*/ +static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){ + int res; + int rc = SQLITE_OK; /* Return code */ + Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; + + do { + if( pCsr->aDoclist==0 ){ + if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){ + pCsr->isEof = 1; + rc = sqlite3_reset(pCsr->pStmt); + break; + } + pCsr->iPrevId = sqlite3_column_int64(pCsr->pStmt, 0); + }else{ + if( pCsr->pNextId>=&pCsr->aDoclist[pCsr->nDoclist] ){ + pCsr->isEof = 1; + break; + } + sqlite3_reset(pCsr->pStmt); + fts3GetDeltaVarint(&pCsr->pNextId, &pCsr->iPrevId); + pCsr->isRequireSeek = 1; + pCsr->isMatchinfoNeeded = 1; + } + }while( SQLITE_OK==(rc = fts3EvalDeferred(pCsr, &res)) && res==0 ); + + return rc; +} + +/* ** This is the xFilter interface for the virtual table. See ** the virtual table xFilter method documentation for additional ** information. @@ -2160,6 +2704,7 @@ static int fts3FilterMethod( assert( idxNum>=0 && idxNum<=(FTS3_FULLTEXT_SEARCH+p->nColumn) ); assert( nVal==0 || nVal==1 ); assert( (nVal==0)==(idxNum==FTS3_FULLSCAN_SEARCH) ); + assert( p->pSegments==0 ); /* In case the cursor has been used before, clear it now. */ sqlite3_finalize(pCsr->pStmt); @@ -2167,24 +2712,7 @@ static int fts3FilterMethod( sqlite3Fts3ExprFree(pCsr->pExpr); memset(&pCursor[1], 0, sizeof(Fts3Cursor)-sizeof(sqlite3_vtab_cursor)); - /* Compile a SELECT statement for this cursor. For a full-table-scan, the - ** statement loops through all rows of the %_content table. For a - ** full-text query or docid lookup, the statement retrieves a single - ** row by docid. - */ - zSql = sqlite3_mprintf(azSql[idxNum==FTS3_FULLSCAN_SEARCH], p->zDb, p->zName); - if( !zSql ){ - rc = SQLITE_NOMEM; - }else{ - rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0); - sqlite3_free(zSql); - } - if( rc!=SQLITE_OK ) return rc; - pCsr->eSearch = (i16)idxNum; - - if( idxNum==FTS3_DOCID_SEARCH ){ - rc = sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]); - }else if( idxNum!=FTS3_FULLSCAN_SEARCH ){ + if( idxNum!=FTS3_DOCID_SEARCH && idxNum!=FTS3_FULLSCAN_SEARCH ){ int iCol = idxNum-FTS3_FULLTEXT_SEARCH; const char *zQuery = (const char *)sqlite3_value_text(apVal[0]); @@ -2206,11 +2734,34 @@ static int fts3FilterMethod( rc = sqlite3Fts3ReadLock(p); if( rc!=SQLITE_OK ) return rc; - rc = evalFts3Expr(p, pCsr->pExpr, &pCsr->aDoclist, &pCsr->nDoclist, 0); + rc = fts3EvalExpr(pCsr, pCsr->pExpr, &pCsr->aDoclist, &pCsr->nDoclist, 0); + sqlite3Fts3SegmentsClose(p); + if( rc!=SQLITE_OK ) return rc; pCsr->pNextId = pCsr->aDoclist; pCsr->iPrevId = 0; + if( pCsr->nDoclist<0 ){ + assert( pCsr->aDoclist==0 ); + idxNum = FTS3_FULLSCAN_SEARCH; + } } + /* Compile a SELECT statement for this cursor. For a full-table-scan, the + ** statement loops through all rows of the %_content table. For a + ** full-text query or docid lookup, the statement retrieves a single + ** row by docid. + */ + zSql = sqlite3_mprintf(azSql[idxNum==FTS3_FULLSCAN_SEARCH], p->zDb, p->zName); + if( !zSql ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0); + sqlite3_free(zSql); + } + if( rc==SQLITE_OK && idxNum==FTS3_DOCID_SEARCH ){ + rc = sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]); + } + pCsr->eSearch = (i16)idxNum; + if( rc!=SQLITE_OK ) return rc; return fts3NextMethod(pCursor); } @@ -2296,7 +2847,9 @@ static int fts3UpdateMethod( ** hash-table to the database. */ static int fts3SyncMethod(sqlite3_vtab *pVtab){ - return sqlite3Fts3PendingTermsFlush((Fts3Table *)pVtab); + int rc = sqlite3Fts3PendingTermsFlush((Fts3Table *)pVtab); + sqlite3Fts3SegmentsClose((Fts3Table *)pVtab); + return rc; } /* @@ -2334,8 +2887,12 @@ static int fts3RollbackMethod(sqlite3_vtab *pVtab){ ** This is used by the matchinfo(), snippet() and offsets() auxillary ** functions. */ -int sqlite3Fts3ExprLoadDoclist(Fts3Table *pTab, Fts3Expr *pExpr){ - return evalFts3Expr(pTab, pExpr, &pExpr->aDoclist, &pExpr->nDoclist, 1); +int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *pCsr, Fts3Expr *pExpr){ + int rc; + pCsr->doDeferred = 1; + rc = fts3EvalExpr(pCsr, pExpr, &pExpr->aDoclist, &pExpr->nDoclist, 1); + pCsr->doDeferred = 0; + return rc; } /* @@ -2626,7 +3183,7 @@ static const sqlite3_module fts3Module = { /* xDisconnect */ fts3DisconnectMethod, /* xDestroy */ fts3DestroyMethod, /* xOpen */ fts3OpenMethod, - /* xClose */ fulltextClose, + /* xClose */ fts3CloseMethod, /* xFilter */ fts3FilterMethod, /* xNext */ fts3NextMethod, /* xEof */ fts3EofMethod, diff --git a/ext/fts3/fts3Int.h b/ext/fts3/fts3Int.h index 8ed31aedd..c98271729 100644 --- a/ext/fts3/fts3Int.h +++ b/ext/fts3/fts3Int.h @@ -96,8 +96,12 @@ typedef struct Fts3Table Fts3Table; typedef struct Fts3Cursor Fts3Cursor; typedef struct Fts3Expr Fts3Expr; typedef struct Fts3Phrase Fts3Phrase; -typedef struct Fts3SegReader Fts3SegReader; +typedef struct Fts3PhraseToken Fts3PhraseToken; + typedef struct Fts3SegFilter Fts3SegFilter; +typedef struct Fts3DeferredToken Fts3DeferredToken; +typedef struct Fts3SegReader Fts3SegReader; +typedef struct Fts3SegReaderArray Fts3SegReaderArray; /* ** A connection to a fulltext index is an instance of the following @@ -120,20 +124,12 @@ struct Fts3Table { */ sqlite3_stmt *aStmt[25]; - /* Pointer to string containing the SQL: - ** - ** "SELECT block FROM %_segments WHERE blockid BETWEEN ? AND ? - ** ORDER BY blockid" - */ - char *zSelectLeaves; - int nLeavesStmt; /* Valid statements in aLeavesStmt */ - int nLeavesTotal; /* Total number of prepared leaves stmts */ - int nLeavesAlloc; /* Allocated size of aLeavesStmt */ - sqlite3_stmt **aLeavesStmt; /* Array of prepared zSelectLeaves stmts */ - + char *zSegmentsTbl; /* Name of %_segments table */ + int nPgsz; /* Page size for host database */ int nNodeSize; /* Soft limit for node size */ u8 bHasContent; /* True if %_content table exists */ u8 bHasDocsize; /* True if %_docsize table exists */ + sqlite3_blob *pSegments; /* Blob handle open on %_segments table */ /* The following hash table is used to buffer pending index updates during ** transactions. Variable nPendingData estimates the memory size of the @@ -160,12 +156,16 @@ struct Fts3Cursor { u8 isRequireSeek; /* True if must seek pStmt to %_content row */ sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */ Fts3Expr *pExpr; /* Parsed MATCH query string */ + Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */ sqlite3_int64 iPrevId; /* Previous id read from aDoclist */ char *pNextId; /* Pointer into the body of aDoclist */ char *aDoclist; /* List of docids for full-text queries */ int nDoclist; /* Size of buffer at aDoclist */ int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */ u32 *aMatchinfo; /* Information about most recent match */ + + int doDeferred; + int nRowAvg; /* Average size of database rows, in pages */ }; /* @@ -190,18 +190,23 @@ struct Fts3Cursor { /* ** A "phrase" is a sequence of one or more tokens that must match in ** sequence. A single token is the base case and the most common case. -** For a sequence of tokens contained in "...", nToken will be the number -** of tokens in the string. +** For a sequence of tokens contained in double-quotes (i.e. "one two three") +** nToken will be the number of tokens in the string. */ + +struct Fts3PhraseToken { + char *z; /* Text of the token */ + int n; /* Number of bytes in buffer z */ + int isPrefix; /* True if token ends with a "*" character */ + Fts3SegReaderArray *pArray; + Fts3DeferredToken *pDeferred; +}; + struct Fts3Phrase { int nToken; /* Number of tokens in the phrase */ int iColumn; /* Index of column this phrase must match */ int isNot; /* Phrase prefixed by unary not (-) operator */ - struct PhraseToken { - char *z; /* Text of the token */ - int n; /* Number of bytes in buffer pointed to by z */ - int isPrefix; /* True if token ends in with a "*" character */ - } aToken[1]; /* One entry for each token in the phrase */ + Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */ }; /* @@ -225,6 +230,8 @@ struct Fts3Expr { Fts3Expr *pRight; /* Right operand */ Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */ + int bDeferred; + int isLoaded; /* True if aDoclist/nDoclist are initialized. */ char *aDoclist; /* Buffer containing doclist */ int nDoclist; /* Size of aDoclist in bytes */ @@ -275,6 +282,14 @@ int sqlite3Fts3MatchinfoDocsizeLocal(Fts3Cursor*, u32*); int sqlite3Fts3MatchinfoDocsizeGlobal(Fts3Cursor*, u32*); int sqlite3Fts3ReadLock(Fts3Table *); +void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *); +int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int); +int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *); +void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *); +char *sqlite3Fts3DeferredDoclist(Fts3DeferredToken *, int *); + +void sqlite3Fts3SegmentsClose(Fts3Table *); + /* Flags allowed as part of the 4th argument to SegmentReaderIterate() */ #define FTS3_SEGMENT_REQUIRE_POS 0x00000001 #define FTS3_SEGMENT_IGNORE_EMPTY 0x00000002 @@ -297,7 +312,7 @@ int sqlite3Fts3VarintLen(sqlite3_uint64); void sqlite3Fts3Dequote(char *); char *sqlite3Fts3FindPositions(Fts3Expr *, sqlite3_int64, int); -int sqlite3Fts3ExprLoadDoclist(Fts3Table *, Fts3Expr *); +int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *, Fts3Expr *); int sqlite3Fts3ExprNearTrim(Fts3Expr *, Fts3Expr *, int); /* fts3_tokenizer.c */ diff --git a/ext/fts3/fts3_expr.c b/ext/fts3/fts3_expr.c index 008ba8148..0f411f097 100644 --- a/ext/fts3/fts3_expr.c +++ b/ext/fts3/fts3_expr.c @@ -223,7 +223,7 @@ static int getNextString( rc = pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos); if( rc==SQLITE_OK ){ int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase); - p = fts3ReallocOrFree(p, nByte+ii*sizeof(struct PhraseToken)); + p = fts3ReallocOrFree(p, nByte+ii*sizeof(Fts3PhraseToken)); zTemp = fts3ReallocOrFree(zTemp, nTemp + nToken); if( !p || !zTemp ){ goto no_mem; @@ -235,6 +235,8 @@ static int getNextString( p->pPhrase = (Fts3Phrase *)&p[1]; p->pPhrase->nToken = ii+1; p->pPhrase->aToken[ii].n = nToken; + p->pPhrase->aToken[ii].pDeferred = 0; + p->pPhrase->aToken[ii].pArray = 0; memcpy(&zTemp[nTemp], zToken, nToken); nTemp += nToken; if( iEnd<nInput && zInput[iEnd]=='*' ){ @@ -254,7 +256,7 @@ static int getNextString( char *zNew = NULL; int nNew = 0; int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase); - nByte += (p?(p->pPhrase->nToken-1):0) * sizeof(struct PhraseToken); + nByte += (p?(p->pPhrase->nToken-1):0) * sizeof(Fts3PhraseToken); p = fts3ReallocOrFree(p, nByte + nTemp); if( !p ){ goto no_mem; diff --git a/ext/fts3/fts3_snippet.c b/ext/fts3/fts3_snippet.c index d67f7ac09..21f858545 100644 --- a/ext/fts3/fts3_snippet.c +++ b/ext/fts3/fts3_snippet.c @@ -24,7 +24,7 @@ */ typedef struct LoadDoclistCtx LoadDoclistCtx; struct LoadDoclistCtx { - Fts3Table *pTab; /* FTS3 Table */ + Fts3Cursor *pCsr; /* FTS3 Cursor */ int nPhrase; /* Number of phrases seen so far */ int nToken; /* Number of tokens seen so far */ }; @@ -218,7 +218,7 @@ static int fts3ExprLoadDoclistsCb1(Fts3Expr *pExpr, int iPhrase, void *ctx){ p->nToken += pExpr->pPhrase->nToken; if( pExpr->isLoaded==0 ){ - rc = sqlite3Fts3ExprLoadDoclist(p->pTab, pExpr); + rc = sqlite3Fts3ExprLoadDoclist(p->pCsr, pExpr); pExpr->isLoaded = 1; if( rc==SQLITE_OK ){ rc = fts3ExprNearTrim(pExpr); @@ -261,13 +261,14 @@ static int fts3ExprLoadDoclists( ){ int rc; /* Return Code */ LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */ - sCtx.pTab = (Fts3Table *)pCsr->base.pVtab; + sCtx.pCsr = pCsr; rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb1, (void *)&sCtx); if( rc==SQLITE_OK ){ (void)fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb2, 0); } if( pnPhrase ) *pnPhrase = sCtx.nPhrase; if( pnToken ) *pnToken = sCtx.nToken; + sqlite3Fts3SegmentsClose((Fts3Table *)pCsr->base.pVtab); return rc; } diff --git a/ext/fts3/fts3_write.c b/ext/fts3/fts3_write.c index e43436095..b54ece336 100644 --- a/ext/fts3/fts3_write.c +++ b/ext/fts3/fts3_write.c @@ -42,6 +42,17 @@ struct PendingList { sqlite3_int64 iLastPos; }; + +/* +** Each cursor has a (possibly empty) linked list of the following objects. +*/ +struct Fts3DeferredToken { + Fts3PhraseToken *pToken; /* Pointer to corresponding expr token */ + int iCol; /* Column token must occur in */ + Fts3DeferredToken *pNext; /* Next in list of deferred tokens */ + PendingList *pList; /* Doclist is assembled here */ +}; + /* ** An instance of this structure is used to iterate through the terms on ** a contiguous set of segment b-tree leaf nodes. Although the details of @@ -51,6 +62,7 @@ struct PendingList { ** ** sqlite3Fts3SegReaderNew() ** sqlite3Fts3SegReaderFree() +** sqlite3Fts3SegReaderCost() ** sqlite3Fts3SegReaderIterate() ** ** Methods used to manipulate Fts3SegReader structures: @@ -61,9 +73,13 @@ struct PendingList { */ struct Fts3SegReader { int iIdx; /* Index within level, or 0x7FFFFFFF for PT */ - sqlite3_int64 iStartBlock; - sqlite3_int64 iEndBlock; - sqlite3_stmt *pStmt; /* SQL Statement to access leaf nodes */ + + sqlite3_int64 iStartBlock; /* Rowid of first leaf block to traverse */ + sqlite3_int64 iLeafEndBlock; /* Rowid of final leaf block to traverse */ + sqlite3_int64 iEndBlock; /* Rowid of final block in segment (or 0) */ + sqlite3_int64 iCurrentBlock; /* Current leaf block (or 0) */ + sqlite3_blob *pBlob; /* Blob open on iStartBlock */ + char *aNode; /* Pointer to node data (or NULL) */ int nNode; /* Size of buffer at aNode (or 0) */ int nTermAlloc; /* Allocated size of zTerm buffer */ @@ -85,6 +101,7 @@ struct Fts3SegReader { }; #define fts3SegReaderIsPending(p) ((p)->ppNextElem!=0) +#define fts3SegReaderIsRootOnly(p) ((p)->aNode==(char *)&(p)[1]) /* ** An instance of this structure is used to create a segment b-tree in the @@ -490,10 +507,10 @@ static int fts3PendingListAppend( ** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code. */ static int fts3PendingTermsAdd( - Fts3Table *p, /* FTS table into which text will be inserted */ - const char *zText, /* Text of document to be inseted */ - int iCol, /* Column number into which text is inserted */ - u32 *pnWord /* OUT: Number of tokens inserted */ + Fts3Table *p, /* Table into which text will be inserted */ + const char *zText, /* Text of document to be inserted */ + int iCol, /* Column into which text is being inserted */ + u32 *pnWord /* OUT: Number of tokens inserted */ ){ int rc; int iStart; @@ -787,11 +804,63 @@ static int fts3AllocateSegdirIdx(Fts3Table *p, int iLevel, int *piIdx){ } /* +** The %_segments table is declared as follows: +** +** CREATE TABLE %_segments(blockid INTEGER PRIMARY KEY, block BLOB) +*/ +static int fts3SegmentsBlob( + Fts3Table *p, + sqlite3_int64 iSegment, + char **paBlob, + int *pnBlob +){ + int rc; + + if( p->pSegments ){ + rc = sqlite3_blob_reopen(p->pSegments, iSegment); + }else{ + if( 0==p->zSegmentsTbl ){ + p->zSegmentsTbl = sqlite3_mprintf("%s_segments", p->zName); + if( 0==p->zSegmentsTbl ) return SQLITE_NOMEM; + } + rc = sqlite3_blob_open( + p->db, p->zDb, p->zSegmentsTbl, "block", iSegment, 0, &p->pSegments + ); + } + + if( rc==SQLITE_OK ){ + int nByte = sqlite3_blob_bytes(p->pSegments); + if( paBlob ){ + char *aByte = sqlite3_malloc(nByte); + if( !aByte ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_blob_read(p->pSegments, aByte, nByte, 0); + if( rc!=SQLITE_OK ){ + sqlite3_free(aByte); + aByte = 0; + } + } + *paBlob = aByte; + } + *pnBlob = nByte; + } + + return rc; +} + +void sqlite3Fts3SegmentsClose(Fts3Table *p){ + sqlite3_blob_close(p->pSegments); + p->pSegments = 0; +} + + +/* ** Move the iterator passed as the first argument to the next term in the ** segment. If successful, SQLITE_OK is returned. If there is no next term, ** SQLITE_DONE. Otherwise, an SQLite error code. */ -static int fts3SegReaderNext(Fts3SegReader *pReader){ +static int fts3SegReaderNext(Fts3Table *p, Fts3SegReader *pReader){ char *pNext; /* Cursor variable */ int nPrefix; /* Number of bytes in term prefix */ int nSuffix; /* Number of bytes in term suffix */ @@ -803,7 +872,9 @@ static int fts3SegReaderNext(Fts3SegReader *pReader){ } if( !pNext || pNext>=&pReader->aNode[pReader->nNode] ){ + sqlite3_blob *pBlob; int rc; + if( fts3SegReaderIsPending(pReader) ){ Fts3HashElem *pElem = *(pReader->ppNextElem); if( pElem==0 ){ @@ -819,17 +890,25 @@ static int fts3SegReaderNext(Fts3SegReader *pReader){ } return SQLITE_OK; } - if( !pReader->pStmt ){ - pReader->aNode = 0; + + if( !fts3SegReaderIsRootOnly(pReader) ){ + sqlite3_free(pReader->aNode); + } + pReader->aNode = 0; + + /* If iCurrentBlock>=iLeafEndBlock, this is an EOF condition. All leaf + ** blocks have already been traversed. */ + if( pReader->iCurrentBlock>=pReader->iLeafEndBlock ){ return SQLITE_OK; } - rc = sqlite3_step(pReader->pStmt); - if( rc!=SQLITE_ROW ){ - pReader->aNode = 0; - return (rc==SQLITE_DONE ? SQLITE_OK : rc); + + rc = fts3SegmentsBlob( + p, ++pReader->iCurrentBlock, &pReader->aNode, &pReader->nNode + ); + + if( rc!=SQLITE_OK ){ + return rc; } - pReader->nNode = sqlite3_column_bytes(pReader->pStmt, 0); - pReader->aNode = (char *)sqlite3_column_blob(pReader->pStmt, 0); pNext = pReader->aNode; } @@ -915,24 +994,102 @@ static void fts3SegReaderNextDocid( } /* +** This function is called to estimate the amount of data that will be +** loaded from the disk If SegReaderIterate() is called on this seg-reader, +** in units of average document size. +** +** This can be used as follows: If the caller has a small doclist that +** contains references to N documents, and is considering merging it with +** a large doclist (size X "average documents"), it may opt not to load +** the large doclist if X>N. +*/ +int sqlite3Fts3SegReaderCost( + Fts3Cursor *pCsr, /* FTS3 cursor handle */ + Fts3SegReader *pReader, /* Segment-reader handle */ + int *pnCost /* IN/OUT: Number of bytes read */ +){ + Fts3Table *p = (Fts3Table*)pCsr->base.pVtab; + int rc = SQLITE_OK; /* Return code */ + int nCost = 0; /* Cost in bytes to return */ + sqlite3_int64 iLeaf; /* Used to iterate through required leaves */ + int pgsz = p->nPgsz; /* Database page size */ + + /* If this seg-reader is reading the pending-terms table, or if all data + ** for the segment is stored on the root page of the b-tree, then the cost + ** is zero. In this case all required data is already in main memory. + */ + if( p->bHasDocsize + && !fts3SegReaderIsPending(pReader) + && !fts3SegReaderIsRootOnly(pReader) + ){ + int nBlob = 0; + sqlite3_int64 iBlock; + + if( pCsr->nRowAvg==0 ){ + /* The average document size, which is required to calculate the cost + ** of each doclist, has not yet been determined. Read the required + ** data from the %_stat table to calculate it. + ** + ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3 + ** varints, where nCol is the number of columns in the FTS3 table. + ** The first varint is the number of documents currently stored in + ** the table. The following nCol varints contain the total amount of + ** data stored in all rows of each column of the table, from left + ** to right. + */ + sqlite3_stmt *pStmt; + rc = fts3SqlStmt(p, SQL_SELECT_DOCTOTAL, &pStmt, 0); + if( rc ) return rc; + if( sqlite3_step(pStmt)==SQLITE_ROW ){ + sqlite3_int64 nDoc = 0; + sqlite3_int64 nByte = 0; + const char *a = sqlite3_column_blob(pStmt, 0); + if( a ){ + const char *pEnd = &a[sqlite3_column_bytes(pStmt, 0)]; + a += sqlite3Fts3GetVarint(a, &nDoc); + while( a<pEnd ){ + sqlite3_int64 nVarint; + a += sqlite3Fts3GetVarint(a, &nVarint); + nByte += nVarint; + } + } + + pCsr->nRowAvg = (((nByte / nDoc) + pgsz - 1) / pgsz); + } + rc = sqlite3_reset(pStmt); + if( rc!=SQLITE_OK || pCsr->nRowAvg==0 ) return rc; + } + + /* Assume that a blob flows over onto overflow pages if it is larger + ** than (pgsz-35) bytes in size (the file-format documentation + ** confirms this). + */ + for(iBlock=pReader->iStartBlock; iBlock<=pReader->iLeafEndBlock; iBlock++){ + rc = fts3SegmentsBlob(p, iBlock, 0, &nBlob); + if( rc!=SQLITE_OK ) break; + if( (nBlob+35)>pgsz ){ + int nOvfl = (nBlob + 34)/pgsz; + nCost += ((nOvfl + pCsr->nRowAvg - 1)/pCsr->nRowAvg); + } + } + } + + *pnCost += nCost; + return rc; +} + +/* ** Free all allocations associated with the iterator passed as the ** second argument. */ void sqlite3Fts3SegReaderFree(Fts3Table *p, Fts3SegReader *pReader){ - if( pReader ){ - if( pReader->pStmt ){ - /* Move the leaf-range SELECT statement to the aLeavesStmt[] array, - ** so that it can be reused when required by another query. - */ - assert( p->nLeavesStmt<p->nLeavesTotal ); - sqlite3_reset(pReader->pStmt); - p->aLeavesStmt[p->nLeavesStmt++] = pReader->pStmt; - } - if( !fts3SegReaderIsPending(pReader) ){ - sqlite3_free(pReader->zTerm); + if( pReader && !fts3SegReaderIsPending(pReader) ){ + sqlite3_free(pReader->zTerm); + if( !fts3SegReaderIsRootOnly(pReader) ){ + sqlite3_free(pReader->aNode); } - sqlite3_free(pReader); } + sqlite3_free(pReader); } /* @@ -952,6 +1109,7 @@ int sqlite3Fts3SegReaderNew( Fts3SegReader *pReader; /* Newly allocated SegReader object */ int nExtra = 0; /* Bytes to allocate segment root node */ + assert( iStartLeaf<=iEndLeaf ); if( iStartLeaf==0 ){ nExtra = nRoot; } @@ -961,8 +1119,9 @@ int sqlite3Fts3SegReaderNew( return SQLITE_NOMEM; } memset(pReader, 0, sizeof(Fts3SegReader)); - pReader->iStartBlock = iStartLeaf; pReader->iIdx = iAge; + pReader->iStartBlock = iStartLeaf; + pReader->iLeafEndBlock = iEndLeaf; pReader->iEndBlock = iEndBlock; if( nExtra ){ @@ -971,52 +1130,9 @@ int sqlite3Fts3SegReaderNew( pReader->nNode = nRoot; memcpy(pReader->aNode, zRoot, nRoot); }else{ - /* If the text of the SQL statement to iterate through a contiguous - ** set of entries in the %_segments table has not yet been composed, - ** compose it now. - */ - if( !p->zSelectLeaves ){ - p->zSelectLeaves = sqlite3_mprintf( - "SELECT block FROM %Q.'%q_segments' WHERE blockid BETWEEN ? AND ? " - "ORDER BY blockid", p->zDb, p->zName - ); - if( !p->zSelectLeaves ){ - rc = SQLITE_NOMEM; - goto finished; - } - } - - /* If there are no free statements in the aLeavesStmt[] array, prepare - ** a new statement now. Otherwise, reuse a prepared statement from - ** aLeavesStmt[]. - */ - if( p->nLeavesStmt==0 ){ - if( p->nLeavesTotal==p->nLeavesAlloc ){ - int nNew = p->nLeavesAlloc + 16; - sqlite3_stmt **aNew = (sqlite3_stmt **)sqlite3_realloc( - p->aLeavesStmt, nNew*sizeof(sqlite3_stmt *) - ); - if( !aNew ){ - rc = SQLITE_NOMEM; - goto finished; - } - p->nLeavesAlloc = nNew; - p->aLeavesStmt = aNew; - } - rc = sqlite3_prepare_v2(p->db, p->zSelectLeaves, -1, &pReader->pStmt, 0); - if( rc!=SQLITE_OK ){ - goto finished; - } - p->nLeavesTotal++; - }else{ - pReader->pStmt = p->aLeavesStmt[--p->nLeavesStmt]; - } - - /* Bind the start and end leaf blockids to the prepared SQL statement. */ - sqlite3_bind_int64(pReader->pStmt, 1, iStartLeaf); - sqlite3_bind_int64(pReader->pStmt, 2, iEndLeaf); + pReader->iCurrentBlock = iStartLeaf-1; } - rc = fts3SegReaderNext(pReader); + rc = fts3SegReaderNext(p, pReader); finished: if( rc==SQLITE_OK ){ @@ -1113,7 +1229,7 @@ int sqlite3Fts3SegReaderPending( pReader->iIdx = 0x7FFFFFFF; pReader->ppNextElem = (Fts3HashElem **)&pReader[1]; memcpy(pReader->ppNextElem, aElem, nElem*sizeof(Fts3HashElem *)); - fts3SegReaderNext(pReader); + fts3SegReaderNext(p, pReader); } } @@ -1991,7 +2107,7 @@ int sqlite3Fts3SegReaderIterate( for(i=0; i<nSegment; i++){ Fts3SegReader *pSeg = apSegment[i]; while( fts3SegReaderTermCmp(pSeg, zTerm, nTerm)<0 ){ - rc = fts3SegReaderNext(pSeg); + rc = fts3SegReaderNext(p, pSeg); if( rc!=SQLITE_OK ) goto finished; } } } @@ -2102,7 +2218,7 @@ int sqlite3Fts3SegReaderIterate( } for(i=0; i<nMerge; i++){ - rc = fts3SegReaderNext(apSegment[i]); + rc = fts3SegReaderNext(p, apSegment[i]); if( rc!=SQLITE_OK ) goto finished; } fts3SegReaderSort(apSegment, nSegment, nMerge, fts3SegReaderCmp); @@ -2517,10 +2633,158 @@ static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){ rc = SQLITE_ERROR; } + sqlite3Fts3SegmentsClose(p); + return rc; +} + +/* +** Return the deferred doclist associated with deferred token pDeferred. +** This function assumes that sqlite3Fts3CacheDeferredDoclists() has already +** been called to allocate and populate the doclist. +*/ +char *sqlite3Fts3DeferredDoclist(Fts3DeferredToken *pDeferred, int *pnByte){ + if( pDeferred->pList ){ + *pnByte = pDeferred->pList->nData; + return pDeferred->pList->aData; + } + *pnByte = 0; + return 0; +} + +/* +** Helper fucntion for FreeDeferredDoclists(). This function removes all +** references to deferred doclists from within the tree of Fts3Expr +** structures headed by +*/ +static void fts3DeferredDoclistClear(Fts3Expr *pExpr){ + if( pExpr ){ + fts3DeferredDoclistClear(pExpr->pLeft); + fts3DeferredDoclistClear(pExpr->pRight); + if( pExpr->bDeferred && pExpr->isLoaded ){ + sqlite3_free(pExpr->aDoclist); + pExpr->isLoaded = 0; + pExpr->aDoclist = 0; + pExpr->nDoclist = 0; + pExpr->pCurrent = 0; + pExpr->iCurrent = 0; + } + } +} + +/* +** Delete all cached deferred doclists. Deferred doclists are cached +** (allocated) by the sqlite3Fts3CacheDeferredDoclists() function. +*/ +void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *pCsr){ + Fts3DeferredToken *pDef; + for(pDef=pCsr->pDeferred; pDef; pDef=pDef->pNext){ + sqlite3_free(pDef->pList); + pDef->pList = 0; + } + fts3DeferredDoclistClear(pCsr->pExpr); +} + +/* +** Free all entries in the pCsr->pDeffered list. Entries are added to +** this list using sqlite3Fts3DeferToken(). +*/ +void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *pCsr){ + Fts3DeferredToken *pDef; + Fts3DeferredToken *pNext; + for(pDef=pCsr->pDeferred; pDef; pDef=pNext){ + pNext = pDef->pNext; + sqlite3_free(pDef->pList); + sqlite3_free(pDef); + } + pCsr->pDeferred = 0; +} + +/* +** Generate deferred-doclists for all tokens in the pCsr->pDeferred list +** based on the row that pCsr currently points to. +** +** A deferred-doclist is like any other doclist with position information +** included, except that it only contains entries for a single row of the +** table, not for all rows. +*/ +int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){ + int rc = SQLITE_OK; /* Return code */ + if( pCsr->pDeferred ){ + int i; /* Used to iterate through table columns */ + sqlite3_int64 iDocid; /* Docid of the row pCsr points to */ + Fts3DeferredToken *pDef; /* Used to iterate through deferred tokens */ + + Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; + sqlite3_tokenizer *pT = p->pTokenizer; + sqlite3_tokenizer_module const *pModule = pT->pModule; + + assert( pCsr->isRequireSeek==0 ); + iDocid = sqlite3_column_int64(pCsr->pStmt, 0); + + for(i=0; i<p->nColumn && rc==SQLITE_OK; i++){ + const char *zText = sqlite3_column_text(pCsr->pStmt, i+1); + sqlite3_tokenizer_cursor *pTC = 0; + + rc = pModule->xOpen(pT, zText, -1, &pTC); + while( rc==SQLITE_OK ){ + char const *zToken; /* Buffer containing token */ + int nToken; /* Number of bytes in token */ + int iDum1, iDum2; /* Dummy variables */ + int iPos; /* Position of token in zText */ + + pTC->pTokenizer = pT; + rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos); + for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ + Fts3PhraseToken *pPT = pDef->pToken; + if( (pDef->iCol>=p->nColumn || pDef->iCol==i) + && (pPT->n==nToken || (pPT->isPrefix && pPT->n<nToken)) + && (0==memcmp(zToken, pPT->z, pPT->n)) + ){ + fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc); + } + } + } + if( pTC ) pModule->xClose(pTC); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; + } + + for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ + if( pDef->pList ){ + rc = fts3PendingListAppendVarint(&pDef->pList, 0); + } + } + } + return rc; } /* +** Add an entry for token pToken to the pCsr->pDeferred list. +*/ +int sqlite3Fts3DeferToken( + Fts3Cursor *pCsr, /* Fts3 table cursor */ + Fts3PhraseToken *pToken, /* Token to defer */ + int iCol /* Column that token must appear in (or -1) */ +){ + Fts3DeferredToken *pDeferred; + pDeferred = sqlite3_malloc(sizeof(*pDeferred)); + if( !pDeferred ){ + return SQLITE_NOMEM; + } + memset(pDeferred, 0, sizeof(*pDeferred)); + pDeferred->pToken = pToken; + pDeferred->pNext = pCsr->pDeferred; + pDeferred->iCol = iCol; + pCsr->pDeferred = pDeferred; + + assert( pToken->pDeferred==0 ); + pToken->pDeferred = pDeferred; + + return SQLITE_OK; +} + + +/* ** This function does the work for the xUpdate method of FTS3 virtual ** tables. */ @@ -2538,6 +2802,7 @@ int sqlite3Fts3UpdateMethod( u32 *aSzDel; /* Sizes of deleted documents */ int nChng = 0; /* Net change in number of documents */ + assert( p->pSegments==0 ); /* Allocate space to hold the change in document sizes */ aSzIns = sqlite3_malloc( sizeof(aSzIns[0])*p->nColumn*2 ); @@ -2593,6 +2858,7 @@ int sqlite3Fts3UpdateMethod( } sqlite3_free(aSzIns); + sqlite3Fts3SegmentsClose(p); return rc; } @@ -2616,6 +2882,7 @@ int sqlite3Fts3Optimize(Fts3Table *p){ sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0); } } + sqlite3Fts3SegmentsClose(p); return rc; } diff --git a/ext/fts3/fts3speed.tcl b/ext/fts3/fts3speed.tcl new file mode 100644 index 000000000..377cb1960 --- /dev/null +++ b/ext/fts3/fts3speed.tcl @@ -0,0 +1,122 @@ + + +#-------------------------------------------------------------------------- +# This script contains several sub-programs used to test FTS3/FTS4 +# performance. It does not run the queries directly, but generates SQL +# scripts that can be run using the shell tool. +# +# The following cases are tested: +# +# 1. Inserting documents into an FTS3 table. +# 2. Optimizing an FTS3 table (i.e. "INSERT INTO t1 VALUES('optimize')"). +# 3. Deleting documents from an FTS3 table. +# 4. Querying FTS3 tables. +# + +# Number of tokens in vocabulary. And number of tokens in each document. +# +set VOCAB_SIZE 2000 +set DOC_SIZE 100 + +set NUM_INSERTS 100000 +set NUM_SELECTS 1000 + +# Force everything in this script to be deterministic. +# +expr {srand(0)} + +proc usage {} { + puts stderr "Usage: $::argv0 <rows> <selects>" + exit -1 +} + +proc sql {sql} { + puts $::fd $sql +} + + +# Return a list of $nWord randomly generated tokens each between 2 and 10 +# characters in length. +# +proc build_vocab {nWord} { + set ret [list] + set chars [list a b c d e f g h i j k l m n o p q r s t u v w x y z] + for {set i 0} {$i<$nWord} {incr i} { + set len [expr {int((rand()*9.0)+2)}] + set term "" + for {set j 0} {$j<$len} {incr j} { + append term [lindex $chars [expr {int(rand()*[llength $chars])}]] + } + lappend ret $term + } + set ret +} + +proc select_term {} { + set n [llength $::vocab] + set t [expr int(rand()*$n*3)] + if {$t>=2*$n} { set t [expr {($t-2*$n)/100}] } + if {$t>=$n} { set t [expr {($t-$n)/10}] } + lindex $::vocab $t +} + +proc select_doc {nTerm} { + set ret [list] + for {set i 0} {$i<$nTerm} {incr i} { + lappend ret [select_term] + } + set ret +} + +proc test_1 {nInsert} { + sql "PRAGMA synchronous = OFF;" + sql "DROP TABLE IF EXISTS t1;" + sql "CREATE VIRTUAL TABLE t1 USING fts4;" + for {set i 0} {$i < $nInsert} {incr i} { + set doc [select_doc $::DOC_SIZE] + sql "INSERT INTO t1 VALUES('$doc');" + } +} + +proc test_2 {} { + sql "INSERT INTO t1(t1) VALUES('optimize');" +} + +proc test_3 {nSelect} { + for {set i 0} {$i < $nSelect} {incr i} { + sql "SELECT count(*) FROM t1 WHERE t1 MATCH '[select_term]';" + } +} + +proc test_4 {nSelect} { + for {set i 0} {$i < $nSelect} {incr i} { + sql "SELECT count(*) FROM t1 WHERE t1 MATCH '[select_term] [select_term]';" + } +} + +if {[llength $argv]!=0} usage + +set ::vocab [build_vocab $::VOCAB_SIZE] + +set ::fd [open fts3speed_insert.sql w] +test_1 $NUM_INSERTS +close $::fd + +set ::fd [open fts3speed_select.sql w] +test_3 $NUM_SELECTS +close $::fd + +set ::fd [open fts3speed_select2.sql w] +test_4 $NUM_SELECTS +close $::fd + +set ::fd [open fts3speed_optimize.sql w] +test_2 +close $::fd + +puts "Success. Created files:" +puts " fts3speed_insert.sql" +puts " fts3speed_select.sql" +puts " fts3speed_select2.sql" +puts " fts3speed_optimize.sql" + @@ -1,5 +1,5 @@ -C Fix\ssome\ssegfaults\sthat\scould\soccur\sin\sobscure\scircumstances\swhere\serror\smessages\scontained\scharacters\sthat\scould\sbe\smistaken\sfor\sprintf\sformat\sspecifiers. -D 2010-10-21T15:12:44 +C Merge\strunk\schanges\sinto\sexperimental\sbranch. +D 2010-10-21T15:49:47 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 2c8cefd962eca0147132c7cf9eaa4bb24c656f3f F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -61,19 +61,20 @@ F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0 F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9 F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d -F ext/fts3/fts3.c 03be86c59ac1a60d448c6eda460a8975ff2f170d +F ext/fts3/fts3.c f423181b76fc35c38c4dcf4d8aac012813817f34 F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe -F ext/fts3/fts3Int.h b4f0b05ccafe1e5b4be2f052e9840dbd78a0395f -F ext/fts3/fts3_expr.c 42d5697731cd30fbeabd081bb3e6d3df5531f606 +F ext/fts3/fts3Int.h f80be5abfb24e51cb816287230c0d0c8f1712f59 +F ext/fts3/fts3_expr.c a5aee50edde20e5c9116199bd58be869a3a22c9f F ext/fts3/fts3_hash.c 3c8f6387a4a7f5305588b203fa7c887d753e1f1c F ext/fts3/fts3_hash.h 8331fb2206c609f9fc4c4735b9ab5ad6137c88ec F ext/fts3/fts3_icu.c ac494aed69835008185299315403044664bda295 F ext/fts3/fts3_porter.c 8df6f6efcc4e9e31f8bf73a4007c2e9abca1dfba -F ext/fts3/fts3_snippet.c 2c4c921155e4b6befd272041fb903d999ac07d30 +F ext/fts3/fts3_snippet.c ca60a2a47de5e7abb22a804ccd1a743f81c2fe3e F ext/fts3/fts3_tokenizer.c b4f2d01c24573852755bc92864816785dae39318 F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3 F ext/fts3/fts3_tokenizer1.c 6e5cbaa588924ac578263a598e4fb9f5c9bb179d -F ext/fts3/fts3_write.c 97a583b9e1d23d5af4278f3ee3c16a37c3e077f4 +F ext/fts3/fts3_write.c be47d30cf80bc91e050ece18e2de7e207432be1a +F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9 F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9 F ext/icu/icu.c 850e9a36567bbcce6bd85a4b68243cad8e3c2de2 @@ -118,7 +119,7 @@ F src/auth.c 523da7fb4979469955d822ff9298352d6b31de34 F src/backup.c d5b0137bc20327af08c14772227cc35134839c30 F src/bitvec.c af50f1c8c0ff54d6bdb7a80e2fceca5a93670bef F src/btmutex.c 96a12f50f7a17475155971a241d85ec5171573ff -F src/btree.c 8a1b0267a4f1914aedbaa93d3fcf4f2e42141ea8 +F src/btree.c 3edab36d03d86c200cb9551467410f975d510aa9 F src/btree.h 2d1a83ad509047e8cc314fda7e054f99ff52414d F src/btreeInt.h c424f2f131cc61ddf130f9bd736b3df12c8a51f0 F src/build.c 00a327120d81ace6267e714ae8010c997d55de5d @@ -175,14 +176,14 @@ F src/resolve.c 1c0f32b64f8e3f555fe1f732f9d6f501a7f05706 F src/rowset.c 69afa95a97c524ba6faf3805e717b5b7ae85a697 F src/select.c 6a5c72fb0e8dc7f6133f5a9d7a747130ef0a00ea F src/shell.c 8517fc1f9c59ae4007e6cc8b9af91ab231ea2056 -F src/sqlite.h.in 13f219b9ab78f22603019fd193f09d5c8913795a +F src/sqlite.h.in 460599b35c035deb339d1c9933089ef32187ecc6 F src/sqlite3ext.h c90bd5507099f62043832d73f6425d8d5c5da754 F src/sqliteInt.h c63b0340dfdfde18ff255ddccf004edd2d073288 F src/sqliteLimit.h a17dcd3fb775d63b64a43a55c54cb282f9726f44 F src/status.c 496913d4e8441195f6f2a75b1c95993a45b9b30b F src/table.c 2cd62736f845d82200acfa1287e33feb3c15d62e F src/tclsqlite.c e91019fb6787166abca23a81b16c07fecc2ed751 -F src/test1.c cbedc6ea7905b1361db054fbf7fcd0dafb6d844e +F src/test1.c f6e39615c8315e03798217a360810e4c59595627 F src/test2.c 80d323d11e909cf0eb1b6fbb4ac22276483bcf31 F src/test3.c 056093cfef69ff4227a6bdb9108564dc7f45e4bc F src/test4.c 0528360b5025688002a5feb6be906ddce52eaaee @@ -230,7 +231,7 @@ F src/vdbe.h 4de0efb4b0fdaaa900cf419b35c458933ef1c6d2 F src/vdbeInt.h 7f4cf1b2b69bef3a432b1f23dfebef57275436b4 F src/vdbeapi.c 5368714fa750270cf6430160287c21adff44582d F src/vdbeaux.c de0b06b11a25293e820a49159eca9f1c51a64716 -F src/vdbeblob.c 6e10c214efa3514ca2f1714773cc4cc5c7b05175 +F src/vdbeblob.c e6e485934fcc9201dd1bfd65864be2bb14243b5d F src/vdbemem.c 23723a12cd3ba7ab3099193094cbb2eb78956aa9 F src/vdbetrace.c 864cef96919323482ebd9986f2132435115e9cc2 F src/vtab.c b297e8fa656ab5e66244ab15680d68db0adbec30 @@ -419,7 +420,7 @@ F test/fts3ad.test e40570cb6f74f059129ad48bcef3d7cbc20dda49 F test/fts3ae.test ce32a13b34b0260928e4213b4481acf801533bda F test/fts3af.test d394978c534eabf22dd0837e718b913fd66b499c F test/fts3ag.test 0b7d303f61ae5d620c4efb5e825713ea34ff9441 -F test/fts3ah.test ba181d6a3dee0c929f0d69df67cac9c47cda6bff +F test/fts3ah.test dc9f66c32c296f1bc8bcc4535126bddfeca62894 F test/fts3ai.test d29cee6ed653e30de478066881cec8aa766531b2 F test/fts3aj.test 584facbc9ac4381a7ec624bfde677340ffc2a5a4 F test/fts3ak.test bd14deafe9d1586e8e9bf032411026ac4f8c925d @@ -430,8 +431,9 @@ F test/fts3ao.test 8fee868a0e131b98ce3e8907dc69936278e8b29a F test/fts3atoken.test 25c2070e1e8755d414bf9c8200427b277a9f99fa F test/fts3b.test e93bbb653e52afde110ad53bbd793f14fe7a8984 F test/fts3c.test fc723a9cf10b397fdfc2b32e73c53c8b1ec02958 -F test/fts3cov.test 3a9d8618a3107166530c447e808f8992372e0415 +F test/fts3cov.test 6f1ff88ff6b5abcfff6979098cb9d0c68a69202e F test/fts3d.test 95fb3c862cbc4297c93fceb9a635543744e9ef52 +F test/fts3defer.test cf66bf69afcc2fb8373d3aed31c55399409e83f2 F test/fts3e.test 1f6c6ac9cc8b772ca256e6b22aaeed50c9350851 F test/fts3expr.test 5e745b2b6348499d9ef8d59015de3182072c564c F test/fts3expr2.test 18da930352e5693eaa163a3eacf96233b7290d1a @@ -532,7 +534,7 @@ F test/mallocH.test 79b65aed612c9b3ed2dcdaa727c85895fd1bfbdb F test/mallocI.test a88c2b9627c8506bf4703d8397420043a786cdb6 F test/mallocJ.test b5d1839da331d96223e5f458856f8ffe1366f62e F test/mallocK.test d79968641d1b70d88f6c01bdb9a7eb4a55582cc9 -F test/malloc_common.tcl cda732c0d2365a058c2a73778cf6b6da6db54452 +F test/malloc_common.tcl 9dfb33f12173f9a8b029dae0443c569b59b980b6 F test/manydb.test b3d3bc4c25657e7f68d157f031eb4db7b3df0d3c F test/memdb.test 0825155b2290e900264daaaf0334b6dfe69ea498 F test/memleak.test 10b9c6c57e19fc68c32941495e9ba1c50123f6e2 @@ -567,7 +569,7 @@ F test/pageropt.test 8146bf448cf09e87bb1867c2217b921fb5857806 F test/pagesize.test 76aa9f23ecb0741a4ed9d2e16c5fa82671f28efb F test/pcache.test 4118a183908ecaed343a06fcef3ba82e87e0129d F test/pcache2.test 0d85f2ab6963aee28c671d4c71bec038c00a1d16 -F test/permutations.test ca1c985cf68c692096d0325b33c62f2b576446a5 +F test/permutations.test ec9b2ebd52ff43c5a3bec4723098fab1ef29d944 F test/pragma.test fdfc09067ea104a0c247a1a79d8093b56656f850 F test/pragma2.test 5364893491b9231dd170e3459bfc2e2342658b47 F test/printf.test 05970cde31b1a9f54bd75af60597be75a5c54fea @@ -873,7 +875,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P 2c3c4ba035e548e97101142692133cf685da16bc -R 18a7b139ced85b4a9a48c95f0f44b0f9 +P d0a450ce78e99f55c862f26f9332786660007a0a f91471e7234db490f97298b1ccb8d6c7fc45b089 +R 3c9178950d1e4be3aecf1d9f3dffa25a U dan -Z ed59bb88307b21a6af9f1327c9400518 +Z 5bbfebd98739f68617c4d86986bdd88f diff --git a/manifest.uuid b/manifest.uuid index 2ff27688e..278a75661 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -f91471e7234db490f97298b1ccb8d6c7fc45b089
\ No newline at end of file +fd1e5cade04961c2f5438a1dfcc2e15eafb4503f
\ No newline at end of file diff --git a/src/btree.c b/src/btree.c index f9c368a93..7e8c39feb 100644 --- a/src/btree.c +++ b/src/btree.c @@ -8096,8 +8096,7 @@ int sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void *z){ void sqlite3BtreeCacheOverflow(BtCursor *pCur){ assert( cursorHoldsMutex(pCur) ); assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); - assert(!pCur->isIncrblobHandle); - assert(!pCur->aOverflow); + invalidateOverflowCache(pCur); pCur->isIncrblobHandle = 1; } #endif diff --git a/src/sqlite.h.in b/src/sqlite.h.in index e827e828b..5e32c10c7 100644 --- a/src/sqlite.h.in +++ b/src/sqlite.h.in @@ -4791,6 +4791,11 @@ int sqlite3_blob_open( ); /* +** CAPI3REF: Move a BLOB Handle +*/ +SQLITE_EXPERIMENTAL int sqlite3_blob_reopen(sqlite3_blob *, sqlite3_int64); + +/* ** CAPI3REF: Close A BLOB Handle ** ** ^Closes an open [BLOB handle]. diff --git a/src/test1.c b/src/test1.c index 2cf8c9764..6ea6e8297 100644 --- a/src/test1.c +++ b/src/test1.c @@ -1703,6 +1703,51 @@ static int test_blob_write( return (rc==SQLITE_OK ? TCL_OK : TCL_ERROR); } + +static int test_blob_reopen( + ClientData clientData, /* Not used */ + Tcl_Interp *interp, /* The TCL interpreter that invoked this command */ + int objc, /* Number of arguments */ + Tcl_Obj *CONST objv[] /* Command arguments */ +){ + Tcl_WideInt iRowid; + Tcl_Channel channel; + ClientData instanceData; + sqlite3_blob *pBlob; + int notUsed; + int rc; + + unsigned char *zBuf; + int nBuf; + + if( objc!=3 ){ + Tcl_WrongNumArgs(interp, 1, objv, "CHANNEL ROWID"); + return TCL_ERROR; + } + + channel = Tcl_GetChannel(interp, Tcl_GetString(objv[1]), ¬Used); + if( !channel || TCL_OK!=Tcl_GetWideIntFromObj(interp, objv[2], &iRowid) ){ + return TCL_ERROR; + } + + if( TCL_OK!=(rc = Tcl_Flush(channel)) ){ + return rc; + } + if( TCL_OK!=(rc = Tcl_Seek(channel, 0, SEEK_SET)) ){ + return rc; + } + + instanceData = Tcl_GetChannelInstanceData(channel); + pBlob = *((sqlite3_blob **)instanceData); + + rc = sqlite3_blob_reopen(pBlob, iRowid); + if( rc!=SQLITE_OK ){ + Tcl_SetResult(interp, (char *)sqlite3TestErrorName(rc), TCL_VOLATILE); + } + + return (rc==SQLITE_OK ? TCL_OK : TCL_ERROR); +} + #endif /* @@ -5328,6 +5373,7 @@ int Sqlitetest1_Init(Tcl_Interp *interp){ #ifndef SQLITE_OMIT_INCRBLOB { "sqlite3_blob_read", test_blob_read, 0 }, { "sqlite3_blob_write", test_blob_write, 0 }, + { "sqlite3_blob_reopen", test_blob_reopen, 0 }, #endif { "pcache_stats", test_pcache_stats, 0 }, #ifdef SQLITE_ENABLE_UNLOCK_NOTIFY diff --git a/src/vdbeblob.c b/src/vdbeblob.c index 38587ee56..129aad736 100644 --- a/src/vdbeblob.c +++ b/src/vdbeblob.c @@ -26,11 +26,61 @@ struct Incrblob { int flags; /* Copy of "flags" passed to sqlite3_blob_open() */ int nByte; /* Size of open blob, in bytes */ int iOffset; /* Byte offset of blob in cursor data */ + int iCol; /* Table column this handle is open on */ BtCursor *pCsr; /* Cursor pointing at blob row */ sqlite3_stmt *pStmt; /* Statement holding cursor open */ sqlite3 *db; /* The associated database */ }; + +static int blobSeekToRow(Incrblob *p, sqlite3_int64 iRow, char **pzErr){ + int rc; /* Error code */ + char *zErr = 0; /* Error message */ + Vdbe *v = (Vdbe *)p->pStmt; + + v->aVar[0].u.i = iRow; + rc = sqlite3_step(p->pStmt); + + if( rc==SQLITE_ROW ){ + Vdbe *v = (Vdbe *)p->pStmt; + u32 type = v->apCsr[0]->aType[p->iCol]; + if( type<12 ){ + zErr = sqlite3MPrintf(p->db, "cannot open value of type %s", + type==0?"null": type==7?"real": "integer" + ); + rc = SQLITE_ERROR; + sqlite3_finalize(p->pStmt); + p->pStmt = 0; + }else{ + p->iOffset = v->apCsr[0]->aOffset[p->iCol]; + p->nByte = sqlite3VdbeSerialTypeLen(type); + p->pCsr = v->apCsr[0]->pCursor; + sqlite3BtreeEnterCursor(p->pCsr); + sqlite3BtreeCacheOverflow(p->pCsr); + sqlite3BtreeLeaveCursor(p->pCsr); + } + } + + if( rc==SQLITE_ROW ){ + rc = SQLITE_OK; + }else if( p->pStmt ){ + rc = sqlite3_finalize(p->pStmt); + p->pStmt = 0; + if( rc==SQLITE_OK ){ + zErr = sqlite3MPrintf(p->db, "no such rowid: %lld", iRow); + rc = SQLITE_ERROR; + }else{ + zErr = sqlite3MPrintf(p->db, "%s", sqlite3_errmsg(p->db)); + } + } + + assert( rc!=SQLITE_OK || zErr==0 ); + assert( rc!=SQLITE_ROW && rc!=SQLITE_DONE ); + + *pzErr = zErr; + return rc; +} + /* ** Open a blob handle. */ @@ -71,11 +121,12 @@ int sqlite3_blob_open( {OP_OpenWrite, 0, 0, 0}, /* 4: Open cursor 0 for read/write */ {OP_Variable, 1, 1, 1}, /* 5: Push the rowid to the stack */ - {OP_NotExists, 0, 9, 1}, /* 6: Seek the cursor */ + {OP_NotExists, 0, 10, 1}, /* 6: Seek the cursor */ {OP_Column, 0, 0, 1}, /* 7 */ {OP_ResultRow, 1, 0, 0}, /* 8 */ - {OP_Close, 0, 0, 0}, /* 9 */ - {OP_Halt, 0, 0, 0}, /* 10 */ + {OP_Goto, 0, 5, 0}, /* 9 */ + {OP_Close, 0, 0, 0}, /* 10 */ + {OP_Halt, 0, 0, 0}, /* 11 */ }; Vdbe *v = 0; @@ -83,14 +134,20 @@ int sqlite3_blob_open( char *zErr = 0; Table *pTab; Parse *pParse; + Incrblob *pBlob; + flags = !!flags; /* flags = (flags ? 1 : 0); */ *ppBlob = 0; + sqlite3_mutex_enter(db->mutex); + + pBlob = (Incrblob *)sqlite3DbMallocZero(db, sizeof(Incrblob)); pParse = sqlite3StackAllocRaw(db, sizeof(*pParse)); - if( pParse==0 ){ - rc = SQLITE_NOMEM; + if( pParse==0 || pBlob==0 ){ + assert( db->mallocFailed ); goto blob_open_out; } + do { memset(pParse, 0, sizeof(Parse)); pParse->db = db; @@ -177,7 +234,6 @@ int sqlite3_blob_open( if( v ){ int iDb = sqlite3SchemaToIndex(db, pTab->pSchema); sqlite3VdbeAddOpList(v, sizeof(openBlob)/sizeof(VdbeOpList), openBlob); - flags = !!flags; /* flags = (flags ? 1 : 0); */ /* Configure the OP_Transaction */ sqlite3VdbeChangeP1(v, 0, iDb); @@ -220,63 +276,26 @@ int sqlite3_blob_open( } } + pBlob->flags = flags; + pBlob->pStmt = (sqlite3_stmt *)v; + pBlob->iCol = iCol; + pBlob->db = db; sqlite3BtreeLeaveAll(db); + v = 0; if( db->mallocFailed ){ goto blob_open_out; } - - sqlite3_bind_int64((sqlite3_stmt *)v, 1, iRow); - rc = sqlite3_step((sqlite3_stmt *)v); - if( rc!=SQLITE_ROW ){ - nAttempt++; - rc = sqlite3_finalize((sqlite3_stmt *)v); - sqlite3DbFree(db, zErr); - zErr = sqlite3MPrintf(db, "%s", sqlite3_errmsg(db)); - v = 0; - } - } while( nAttempt<5 && rc==SQLITE_SCHEMA ); - - if( rc==SQLITE_ROW ){ - /* The row-record has been opened successfully. Check that the - ** column in question contains text or a blob. If it contains - ** text, it is up to the caller to get the encoding right. - */ - Incrblob *pBlob; - u32 type = v->apCsr[0]->aType[iCol]; - - if( type<12 ){ - sqlite3DbFree(db, zErr); - zErr = sqlite3MPrintf(db, "cannot open value of type %s", - type==0?"null": type==7?"real": "integer" - ); - rc = SQLITE_ERROR; - goto blob_open_out; - } - pBlob = (Incrblob *)sqlite3DbMallocZero(db, sizeof(Incrblob)); - if( db->mallocFailed ){ - sqlite3DbFree(db, pBlob); - goto blob_open_out; - } - pBlob->flags = flags; - pBlob->pCsr = v->apCsr[0]->pCursor; - sqlite3BtreeEnterCursor(pBlob->pCsr); - sqlite3BtreeCacheOverflow(pBlob->pCsr); - sqlite3BtreeLeaveCursor(pBlob->pCsr); - pBlob->pStmt = (sqlite3_stmt *)v; - pBlob->iOffset = v->apCsr[0]->aOffset[iCol]; - pBlob->nByte = sqlite3VdbeSerialTypeLen(type); - pBlob->db = db; - *ppBlob = (sqlite3_blob *)pBlob; - rc = SQLITE_OK; - }else if( rc==SQLITE_OK ){ - sqlite3DbFree(db, zErr); - zErr = sqlite3MPrintf(db, "no such rowid: %lld", iRow); - rc = SQLITE_ERROR; - } + sqlite3_bind_int64(pBlob->pStmt, 1, iRow); + rc = blobSeekToRow(pBlob, iRow, &zErr); + } while( (++nAttempt)<5 && rc==SQLITE_SCHEMA ); blob_open_out: - if( v && (rc!=SQLITE_OK || db->mallocFailed) ){ - sqlite3VdbeFinalize(v); + if( rc==SQLITE_OK && db->mallocFailed==0 ){ + *ppBlob = (sqlite3_blob *)pBlob; + }else{ + if( v ) sqlite3VdbeFinalize(v); + if( pBlob && pBlob->pStmt ) sqlite3VdbeFinalize((Vdbe *)pBlob->pStmt); + sqlite3DbFree(db, pBlob); } sqlite3Error(db, rc, (zErr ? "%s" : 0), zErr); sqlite3DbFree(db, zErr); @@ -331,7 +350,7 @@ static int blobReadWrite( /* Request is out of range. Return a transient error. */ rc = SQLITE_ERROR; sqlite3Error(db, SQLITE_ERROR, 0); - } else if( v==0 ){ + }else if( v==0 ){ /* If there is no statement handle, then the blob-handle has ** already been invalidated. Return SQLITE_ABORT in this case. */ @@ -382,4 +401,43 @@ int sqlite3_blob_bytes(sqlite3_blob *pBlob){ return p ? p->nByte : 0; } +/* +** Move an existing blob handle to point to a different row of the same +** database table. +** +** If an error occurs, or if the specified row does not exist or does not +** contain a blob or text value, then an error code is returned and the +** database handle error code and message set. If this happens, then all +** subsequent calls to sqlite3_blob_xxx() functions (except blob_close()) +** immediately return SQLITE_ABORT. +*/ +int sqlite3_blob_reopen(sqlite3_blob *pBlob, sqlite3_int64 iRow){ + int rc; + Incrblob *p = (Incrblob *)pBlob; + sqlite3 *db; + + if( p==0 ) return SQLITE_MISUSE_BKPT; + db = p->db; + sqlite3_mutex_enter(db->mutex); + + if( p->pStmt==0 ){ + /* If there is no statement handle, then the blob-handle has + ** already been invalidated. Return SQLITE_ABORT in this case. + */ + rc = SQLITE_ABORT; + }else{ + char *zErr; + rc = blobSeekToRow(p, iRow, &zErr); + if( rc!=SQLITE_OK ){ + sqlite3Error(db, rc, (zErr ? "%s" : 0), zErr); + sqlite3DbFree(db, zErr); + } + assert( rc!=SQLITE_SCHEMA ); + } + + rc = sqlite3ApiExit(db, rc); + sqlite3_mutex_leave(db->mutex); + return rc; +} + #endif /* #ifndef SQLITE_OMIT_INCRBLOB */ diff --git a/test/fts3ah.test b/test/fts3ah.test index 1a58e49f4..3810ec37b 100644 --- a/test/fts3ah.test +++ b/test/fts3ah.test @@ -1,37 +1,32 @@ -# 2006 October 31 (scaaarey) +# 2006 October 31 # -# The author disclaims copyright to this source code. +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The focus -# here is testing correct handling of excessively long terms. -# -# $Id: fts3ah.test,v 1.1 2007/08/20 17:38:42 shess Exp $ +# here is testing correct handling of very long terms. # set testdir [file dirname $argv0] source $testdir/tester.tcl -# If SQLITE_ENABLE_FTS3 is defined, omit this file. +# If SQLITE_ENABLE_FTS3 is not defined, omit this file. ifcapable !fts3 { finish_test return } -# Generate a term of len copies of char. -proc bigterm {char len} { - for {set term ""} {$len>0} {incr len -1} { - append term $char - } - return $term -} - # Generate a document of bigterms based on characters from the list # chars. proc bigtermdoc {chars len} { set doc "" foreach char $chars { - append doc " " [bigterm $char $len] + append doc " " [string repeat $char $len] } return $doc } @@ -41,9 +36,9 @@ set doc1 [bigtermdoc {a b c d} $len] set doc2 [bigtermdoc {b d e f} $len] set doc3 [bigtermdoc {a c e} $len] -set aterm [bigterm a $len] -set bterm [bigterm b $len] -set xterm [bigterm x $len] +set aterm [string repeat a $len] +set bterm [string repeat b $len] +set xterm [string repeat x $len] db eval { CREATE VIRTUAL TABLE t1 USING fts3(content); @@ -61,15 +56,15 @@ do_test fts3ah-1.2 { execsql {SELECT rowid FROM t1 WHERE t1 MATCH $aterm} } {1 3} -do_test fts3ah-1.2 { +do_test fts3ah-1.3 { execsql {SELECT rowid FROM t1 WHERE t1 MATCH $xterm} } {} -do_test fts3ah-1.3 { +do_test fts3ah-1.4 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH '$aterm -$xterm'" } {1 3} -do_test fts3ah-1.4 { +do_test fts3ah-1.5 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"$aterm $bterm\"'" } {1} diff --git a/test/fts3cov.test b/test/fts3cov.test index d3fe4fa8c..92def056c 100644 --- a/test/fts3cov.test +++ b/test/fts3cov.test @@ -82,27 +82,28 @@ do_test fts3cov-2.1 { INSERT INTO t1 VALUES('And she in the midnight wood will pray'); INSERT INTO t1 VALUES('For the weal of her lover that''s far away.'); COMMIT; - + } + execsql { INSERT INTO t1(t1) VALUES('optimize'); SELECT substr(hex(root), 1, 2) FROM t1_segdir; } } {03} # Test the "missing entry" case: -do_test fts3cov-2.1 { +do_test fts3cov-2.2 { set root [db one {SELECT root FROM t1_segdir}] read_fts3varint [string range $root 1 end] left_child execsql { DELETE FROM t1_segments WHERE blockid = $left_child } } {} -do_error_test fts3cov-2.2 { +do_error_test fts3cov-2.3 { SELECT * FROM t1 WHERE t1 MATCH 'c*' } {database disk image is malformed} # Test the "replaced with NULL" case: -do_test fts3cov-2.3 { +do_test fts3cov-2.4 { execsql { INSERT INTO t1_segments VALUES($left_child, NULL) } } {} -do_error_test fts3cov-2.4 { +do_error_test fts3cov-2.5 { SELECT * FROM t1 WHERE t1 MATCH 'cloud' } {database disk image is malformed} diff --git a/test/fts3defer.test b/test/fts3defer.test new file mode 100644 index 000000000..a6de8ac54 --- /dev/null +++ b/test/fts3defer.test @@ -0,0 +1,383 @@ +# 2010 October 15 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/malloc_common.tcl + +ifcapable !fts3 { + finish_test + return +} + +set sqlite_fts3_enable_parentheses 1 + +set ::testprefix fts3defer + +#-------------------------------------------------------------------------- +# Test cases fts3defer-1.* are the "warm body" cases. The database contains +# one row with 15000 instances of the token "a". This makes the doclist for +# "a" so large that FTS3 will avoid loading it in most cases. +# +# To show this, test cases fts3defer-1.2.* execute a bunch of FTS3 queries +# involving token "a". Then, fts3defer-1.3.* replaces the doclist for token +# "a" with all zeroes and fts3defer-1.4.* repeats the tests from 1.2. If +# the tests still work, we can conclude that the doclist for "a" was not +# used. +# + +set aaa [string repeat "a " 15000] + +do_execsql_test 1.1 { + CREATE VIRTUAL TABLE t1 USING fts4; + BEGIN; + INSERT INTO t1 VALUES('this is a dog'); + INSERT INTO t1 VALUES('an instance of a phrase'); + INSERT INTO t1 VALUES('an instance of a longer phrase'); + INSERT INTO t1 VALUES($aaa); + COMMIT; +} {} + +set tests { + 1 {SELECT rowid FROM t1 WHERE t1 MATCH '"a dog"'} {1} + 2 {SELECT rowid FROM t1 WHERE t1 MATCH '"is a dog"'} {1} + 3 {SELECT rowid FROM t1 WHERE t1 MATCH '"a longer phrase"'} {3} + 4 {SELECT snippet(t1) FROM t1 WHERE t1 MATCH '"a longer phrase"'} + {"an instance of <b>a</b> <b>longer</b> <b>phrase</b>"} + 5 {SELECT rowid FROM t1 WHERE t1 MATCH 'a dog'} {1} +} + +do_select_tests 1.2 $tests + +do_execsql_test 1.3 { + SELECT count(*) FROM t1_segments WHERE length(block)>10000; + UPDATE t1_segments + SET block = zeroblob(length(block)) + WHERE length(block)>10000; +} {1} + +do_select_tests 1.4 $tests + +# Drop the table. It is corrupt now anyhow, so not useful for subsequent tests. +# +do_execsql_test 1.5 { DROP TABLE t1 } + +#-------------------------------------------------------------------------- +# These tests - fts3defer-2.* - are more rigorous. They test that for a +# variety of queries, FTS3 and FTS4 return the same results. And that +# zeroing the very large doclists that FTS4 does not load does not change +# the results. +# +# They use the following pseudo-randomly generated document data. The +# tokens "zm" and "jk" are especially common in this dataset. Additionally, +# two documents are added to the pseudo-random data before it is loaded +# into FTS4 containing 100,000 instances of the "zm" and "jk" tokens. This +# makes the doclists for those tokens so large that FTS4 avoids loading them +# into memory if possible. +# +set data [list] +lappend data [string repeat "zm " 100000] +lappend data [string repeat "jk " 100000] +lappend data {*}{ + "zm zm agmckuiu uhzq nsab jk rrkx duszemmzl hyq jk" + "jk uhzq zm zm rgpzmlnmd zm zk jk jk zm" + "duszemmzl zm jk xldlpy zm jk sbptoa xh jk xldlpy" + "zm xh zm xqf azavwm jk jk trqd rgpzmlnmd jk" + "zm vwq urvysbnykk ubwrfqnbjf zk lsz jk doiwavhwwo jk jk" + "jk xduvfhk orpfawpx zkhdvkw jk mjpavjuhw zm jk duszemmzl zm" + "jk igju jk jk zm hmjf xh zm gwdfhwurx zk" + "vgsld jk jk zm hrlipdm jn zm zsmhnf vgsld duszemmzl" + "gtuiexzsu aayxpmve zm zm zm drir scpgna xh azavwm uhzq" + "farlehdhq hkfoudzftq igju duszemmzl xnxhf ewle zm hrlipdm urvysbnykk kn" + "xnxhf jk jk agmckuiu duszemmzl jk zm zm jk vgsld" + "zm zm zm jk jk urvysbnykk ogttbykvt zm zm jk" + "iasrqgqv zm azavwm zidhxhbtv jk jk mjpavjuhw zm zm ajmvcydy" + "rgpzmlnmd tmt mjpavjuhw xh igju jk azavwm fibokdry vgsld ofm" + "zm jk vgsld jk xh jk csjqxhgj drir jk pmrb" + "xh jk jk zm rrkx duszemmzl mjpavjuhw xldlpy igju zm" + "jk hkfoudzftq zf rrkx wdmy jupk jk zm urvysbnykk npywgdvgz" + "zm jk zm zm zhbrzadb uenvbm aayxpmve urvysbnykk duszemmzl jk" + "uenvbm jk zm fxw xh bdilwmjw mjpavjuhw uv jk zm" + "nk jk bnhc pahlds jk igju dzadnqzprr jk jk jk" + "uhzq uv zm duszemmzl tlqix jk jk xh jk zm" + "jk zm agmckuiu urvysbnykk jk jk zm zm jk jk" + "azavwm mjpavjuhw lsgshn trqd xldlpy ogyavjvv agmckuiu ryvwwhlbc jk jk" + "tmt jk zk zm azavwm ofm acpgim bvgimjik iasrqgqv wuvajhwqz" + "igju ogyavjvv xrbdak rrkx fibokdry zf ujfhmrllq jk zm hxgwvib" + "zm pahlds jk uenvbm aayxpmve iaf hmjf xph vnlyvtkgx zm" + "jk xnxhf igju jk xh jk nvfasfh zm js jk" + "zm zm rwaj igju xr rrkx xnxhf nvfasfh skxbsqzvmt xatbxeqq" + "vgsld zm ujfhmrllq uhzq ogyavjvv nsab azavwm zm vgsld jmfiqhwnjg" + "ymjoym duszemmzl urvysbnykk azavwm jk jmfiqhwnjg bu qcdziqomqk vnlyvtkgx" + "zm nbilqcnz dzadnqzprr xh bkfgzsxn urvysbnykk xrujfzxqf zm zf agmckuiu" + "jk urvysbnykk nvfasfh zf xh zm zm qcdziqomqk qvxtclg wdmy" + "fibokdry jk urvysbnykk jk xr osff zm cvnnsl zm vgsld" + "jk mjpavjuhw hkfoudzftq jk zm xh xqf urvysbnykk jk iasrqgqv" + "jk csjqxhgj duszemmzl iasrqgqv aayxpmve zm brsuoqww jk qpmhtvl wluvgsw" + "jk mj azavwm jk zm jn dzadnqzprr zm jk uhzq" + "zk xqf jupk fxw nbilqcnz zm jk jcpiwj tznlvbfcv nvfasfh" + "jk jcpiwj zm xnxhf zm mjpavjuhw mj drir pa pvjrjlas" + "duszemmzl dzadnqzprr jk swc duszemmzl tmt jk jk pahlds jk" + "zk zm jk zm zm eczkjblu zm hi pmrb jk" + "azavwm zm iz agmckuiu jk sntk jk duszemmzl duszemmzl zm" + "jk zm jk eczkjblu urvysbnykk sk gnl jk ttvgf hmjf" + "jk bnhc jjrxpjkb mjpavjuhw fibokdry igju jk zm zm xh" + "wxe ogttbykvt uhzq xr iaf zf urvysbnykk aayxpmve oacaxgjoo mjpavjuhw" + "gazrt jk ephknonq myjp uenvbm wuvajhwqz jk zm xnxhf nvfasfh" + "zm aayxpmve csjqxhgj xnxhf xr jk aayxpmve xnxhf zm zm" + "sokcyf zm ogyavjvv jk zm fibokdry zm jk igju igju" + "vgsld bvgimjik xuprtlyle jk akmikrqyt jk aayxpmve hkfoudzftq ddjj ithtir" + "zm uhzq ovkyevlgv zk uenvbm csjqxhgj jk vgsld pgybs jk" + "zm agmckuiu zexh fibokdry jk uhzq bu tugflixoex xnxhf sk" + "zm zf uenvbm jk azavwm zm zm agmckuiu zm jk" + "rrkx jk zf jt zm oacaxgjoo fibokdry wdmy igju csjqxhgj" + "hi igju zm jk zidhxhbtv dzadnqzprr jk jk trqd duszemmzl" + "zm zm mjpavjuhw xrbdak qrvbjruc jk qzzqdxq guwq cvnnsl zm" + "ithtir jk jk qcdziqomqk zm farlehdhq zm zm xrbdak jk" + "ixfipk csjqxhgj azavwm sokcyf ttvgf vgsld jk sk xh zk" + "nvfasfh azavwm zm zm zm fxw nvfasfh zk gnl trqd" + "zm fibokdry csjqxhgj ofm dzadnqzprr jk akmikrqyt orpfawpx duszemmzl vwq" + "csjqxhgj jk jk vgsld urvysbnykk jk nxum jk jk nxum" + "zm hkfoudzftq jk ryvwwhlbc mjpavjuhw ephknonq jk zm ogyavjvv zm" + "lwa hi xnxhf qdyerbws zk njtc jk uhzq zm jk" + "trqd zm dzadnqzprr zm urvysbnykk jk lsz jk mjpavjuhw cmnnkna" + "duszemmzl zk jk jk fibokdry jseuhjnzo zm aayxpmve zk jk" + "fibokdry jk sviq qvxtclg wdmy jk doiwavhwwo zexh jk zm" + "jupk zm xh jk mjpavjuhw zm jk nsab npywgdvgz duszemmzl" + "zm igju zm zm nvfasfh eh hkfoudzftq fibokdry fxw xkblf" + "jk zm jk jk zm xh zk abthnzcv zf csjqxhgj" + "zm zm jk nkaotm urvysbnykk sbptoa bq jk ktxdty ubwrfqnbjf" + "nvfasfh aayxpmve xdcuz zm tugflixoex jcpiwj zm mjpavjuhw fibokdry doiwavhwwo" + "iaf jk mjpavjuhw zm duszemmzl jk jk uhzq pahlds fibokdry" + "ddjj zk azavwm jk swc zm gjtexkv jk xh jk" + "igju jk csjqxhgj zm jk dzadnqzprr duszemmzl ulvcbv jk jk" + "jk fibokdry zm csjqxhgj jn zm zm zm zf uhzq" + "duszemmzl jk xkblf zk hrlipdm aayxpmve uenvbm uhzq jk zf" + "dzadnqzprr jk zm zdu nvfasfh zm jk urvysbnykk hmjf jk" + "jk aayxpmve aserrdxm acpgim fibokdry jk drir wxe brsuoqww rrkx" + "uhzq csjqxhgj nvfasfh jk rrkx qbamok trqd uenvbm sntk zm" + "ps azavwm zkhdvkw jk zm jk jk zm csjqxhgj xedlrcfo" + "jk jk ogyavjvv jk zm farlehdhq duszemmzl jk agitgxamxe jk" + "qzzqdxq rwaj jk jk zm xqf jk uenvbm jk zk" + "zm hxgwvib akmikrqyt zf agmckuiu uenvbm bq npywgdvgz azavwm jk" + "zf jmfiqhwnjg js igju zm aayxpmve zm mbxnljomiv csjqxhgj nvfasfh" + "zm jk jk gazrt jk jk lkc jk nvfasfh jk" + "xldlpy orpfawpx zkhdvkw jk zm igju zm urvysbnykk dzadnqzprr mbxnljomiv" + "urvysbnykk jk zk igju zm uenvbm jk zm ithtir jk" + "zm zk zm zf ofm zm xdcuz dzadnqzprr zm vgsld" + "sbptoa jk tugflixoex jk zm zm vgsld zm xh zm" + "uhzq jk zk evvivo vgsld vniqnuynvf agmckuiu jk zm zm" + "zm nvfasfh zm zm zm abthnzcv uenvbm jk zk dzadnqzprr" + "zm azavwm igju qzzqdxq jk xnxhf abthnzcv jk nvfasfh zm" + "qbamok fxw vgsld igju cmnnkna xnxhf vniqnuynvf zk xh zm" + "nvfasfh zk zm mjpavjuhw dzadnqzprr jk jk duszemmzl xldlpy nvfasfh" + "xnxhf sviq nsab npywgdvgz osff vgsld farlehdhq fibokdry wjbkhzsa hhac" + "zm azavwm scpgna jk jk bq jk duszemmzl fibokdry ovkyevlgv" + "csjqxhgj zm jk jk duszemmzl zk xh zm jk zf" + "urvysbnykk dzadnqzprr csjqxhgj mjpavjuhw ubwrfqnbjf nkaotm jk jk zm drir" + "nvfasfh xh igju zm wluvgsw jk zm srwwnezqk ewle ovnq" + "jk nvfasfh eh ktxdty urvysbnykk vgsld zm jk eh uenvbm" + "orpfawpx pahlds jk uhzq hi zm zm zf jk dzadnqzprr" + "srwwnezqk csjqxhgj rbwzuf nvfasfh jcpiwj xldlpy nvfasfh jk vgsld wjybxmieki" +} + +#set e [list] +#foreach d $data {set e [concat $e $d]} +#puts [lsort -unique $e] +#exit + +set zero_long_doclists { + UPDATE t1_segments SET block=zeroblob(length(block)) WHERE length(block)>10000 +} + +foreach {tn setup} { + 1 { + set dmt_modes 0 + execsql { CREATE VIRTUAL TABLE t1 USING FTS3 } + foreach doc $data { execsql { INSERT INTO t1 VALUES($doc) } } + } + 2 { + set dmt_modes 0 + execsql { CREATE VIRTUAL TABLE t1 USING FTS4 } + foreach doc $data { execsql { INSERT INTO t1 VALUES($doc) } } + } + 3 { + set dmt_modes {0 1 2} + execsql { CREATE VIRTUAL TABLE t1 USING FTS4 } + foreach doc $data { execsql { INSERT INTO t1 VALUES($doc) } } + execsql $zero_long_doclists + } + 4 { + set dmt_modes 0 + execsql { CREATE VIRTUAL TABLE t1 USING FTS4 } + foreach doc $data { execsql { INSERT INTO t1 VALUES($doc) } } + execsql "INSERT INTO t1(t1) VALUES('optimize')" + execsql $zero_long_doclists + } +} { + + execsql { DROP TABLE IF EXISTS t1 } + eval $setup + set ::testprefix fts3defer-2.$tn + set DO_MALLOC_TEST 0 + + do_execsql_test 0 { + SELECT count(*) FROM t1_segments WHERE length(block)>10000 + } {2} + + do_select_test 1.1 { + SELECT rowid FROM t1 WHERE t1 MATCH 'jk xnxhf' + } {13 29 40 47 48 52 63 92} + do_select_test 1.2 { + SELECT rowid FROM t1 WHERE t1 MATCH 'jk eh' + } {100} + do_select_test 1.3 { + SELECT rowid FROM t1 WHERE t1 MATCH 'jk ubwrfqnbjf' + } {7 70 98} + do_select_test 1.4 { + SELECT rowid FROM t1 WHERE t1 MATCH 'duszemmzl jk' + } {3 5 8 10 13 18 20 23 32 37 41 43 55 60 65 67 72 74 76 81 94 96 97} + do_select_test 1.5 { + SELECT rowid FROM t1 WHERE t1 MATCH 'ubwrfqnbjf jk' + } {7 70 98} + do_select_test 1.6 { + SELECT rowid FROM t1 WHERE t1 MATCH 'jk ubwrfqnbjf jk jk jk jk' + } {7 70 98} + do_select_test 1.7 { + SELECT rowid FROM t1 WHERE t1 MATCH 'zm xnxhf' + } {12 13 29 30 40 47 48 52 63 92 93} + do_select_test 1.8 { + SELECT rowid FROM t1 WHERE t1 MATCH 'zm eh' + } {68 100} + do_select_test 1.9 { + SELECT rowid FROM t1 WHERE t1 MATCH 'zm ubwrfqnbjf' + } {7 70 98} + + do_select_test 2.1 { + SELECT rowid FROM t1 WHERE t1 MATCH '"zm agmckuiu"' + } {3 24 52 53} + do_select_test 2.2 { + SELECT rowid FROM t1 WHERE t1 MATCH '"zm zf"' + } {33 53 75 88 101} + do_select_test 2.3 { + SELECT rowid FROM t1 WHERE t1 MATCH '"zm aayxpmve"' + } {48 65 84} + do_select_test 2.4 { + SELECT rowid FROM t1 WHERE t1 MATCH '"aayxpmve zm"' + } {11 37 84} + do_select_test 2.5 { + SELECT rowid FROM t1 WHERE t1 MATCH '"jk azavwm"' + } {16 53} + do_select_test 2.6 { + SELECT rowid FROM t1 WHERE t1 MATCH '"xh jk jk"' + } {18} + do_select_test 2.7 { + SELECT rowid FROM t1 WHERE t1 MATCH '"zm jk vgsld"' + } {13 17} + + do_select_test 2.8 { + SELECT rowid FROM t1 WHERE t1 MATCH 'z* vgsld' + } {10 13 17 31 35 51 58 88 89 90 93 100} + do_select_test 2.9 { + SELECT rowid FROM t1 + WHERE t1 MATCH '( + zdu OR zexh OR zf OR zhbrzadb OR zidhxhbtv OR + zk OR zkhdvkw OR zm OR zsmhnf + ) vgsld' + } {10 13 17 31 35 51 58 88 89 90 93 100} + + do_select_test 3.1 { + SELECT snippet(t1, '[', ']') FROM t1 WHERE t1 MATCH '"zm agmckuiu"' + } { + {zm [zm] [agmckuiu] uhzq nsab jk rrkx duszemmzl hyq jk} + {jk [zm] [agmckuiu] urvysbnykk jk jk zm zm jk jk} + {[zm] [agmckuiu] zexh fibokdry jk uhzq bu tugflixoex xnxhf sk} + {zm zf uenvbm jk azavwm zm [zm] [agmckuiu] zm jk} + } + + do_select_test 3.2 { + SELECT snippet(t1, '[', ']') FROM t1 WHERE t1 MATCH 'xnxhf jk' + } { + {[xnxhf] [jk] [jk] agmckuiu duszemmzl [jk] zm zm [jk] vgsld} + {[jk] [xnxhf] igju [jk] xh [jk] nvfasfh zm js [jk]} + {[jk] jcpiwj zm [xnxhf] zm mjpavjuhw mj drir pa pvjrjlas} + {gazrt [jk] ephknonq myjp uenvbm wuvajhwqz [jk] zm [xnxhf] nvfasfh} + {zm aayxpmve csjqxhgj [xnxhf] xr [jk] aayxpmve [xnxhf] zm zm} + {zm agmckuiu zexh fibokdry [jk] uhzq bu tugflixoex [xnxhf] sk} + {lwa hi [xnxhf] qdyerbws zk njtc [jk] uhzq zm [jk]} + {zm azavwm igju qzzqdxq [jk] [xnxhf] abthnzcv [jk] nvfasfh zm} + } + + do_select_test 4.1 { + SELECT offsets(t1) FROM t1 WHERE t1 MATCH '"jk uenvbm"' + } { + {0 0 10 2 0 1 13 6} {0 0 26 2 0 1 29 6} + } + + do_select_test 4.2 { + SELECT offsets(t1) FROM t1 WHERE t1 MATCH 'duszemmzl jk fibokdry' + } { + {0 2 3 8 0 1 36 2 0 0 58 9} + {0 0 0 9 0 1 13 2 0 1 16 2 0 2 19 8 0 1 53 2} + {0 1 4 2 0 0 20 9 0 1 30 2 0 1 33 2 0 2 48 8} + {0 1 17 2 0 1 20 2 0 1 26 2 0 0 29 9 0 2 39 8} + } + + # The following block of tests runs normally with FTS3 or FTS4 without the + # long doclists zeroed. And with OOM-injection for FTS4 with long doclists + # zeroed. Change this by messing with the [set dmt_modes] commands above. + # + foreach DO_MALLOC_TEST $dmt_modes { + + # Phrase search. + do_select_test 5.$DO_MALLOC_TEST.1 { + SELECT rowid FROM t1 WHERE t1 MATCH '"jk mjpavjuhw"' + } {8 15 36 64 67 72} + + # Multiple tokens search. + do_select_test 5.$DO_MALLOC_TEST.2 { + SELECT rowid FROM t1 WHERE t1 MATCH 'duszemmzl zm' + } {3 5 8 10 12 13 18 20 23 37 43 55 60 65 67 72 74 81 94 96 97} + + # snippet() function with phrase. + do_select_test 5.$DO_MALLOC_TEST.3 { + SELECT snippet(t1, '[', ']') FROM t1 WHERE t1 MATCH '"zm aayxpmve"' + } { + {[zm] [aayxpmve] csjqxhgj xnxhf xr jk aayxpmve xnxhf zm zm} + {duszemmzl zk jk jk fibokdry jseuhjnzo [zm] [aayxpmve] zk jk} + {zf jmfiqhwnjg js igju [zm] [aayxpmve] zm mbxnljomiv csjqxhgj nvfasfh} + } + + # snippet() function with multiple tokens. + do_select_test 5.$DO_MALLOC_TEST.4 { + SELECT snippet(t1, '[', ']') FROM t1 WHERE t1 MATCH 'zm zhbrzadb' + } { + {[zm] jk [zm] [zm] [zhbrzadb] uenvbm aayxpmve urvysbnykk duszemmzl jk} + } + + # snippet() function with phrase. + do_select_test 5.$DO_MALLOC_TEST.5 { + SELECT offsets(t1) FROM t1 WHERE t1 MATCH '"zm aayxpmve"' + } { + {0 0 0 2 0 1 3 8} {0 0 38 2 0 1 41 8} {0 0 22 2 0 1 25 8} + } + + # snippet() function with multiple tokens. + do_select_test 5.$DO_MALLOC_TEST.6 { + SELECT offsets(t1) FROM t1 WHERE t1 MATCH 'zm zhbrzadb' + } { + {0 0 0 2 0 0 6 2 0 0 9 2 0 1 12 8} + } + } +} + + +finish_test diff --git a/test/malloc_common.tcl b/test/malloc_common.tcl index 6b7869d1d..4b2478abe 100644 --- a/test/malloc_common.tcl +++ b/test/malloc_common.tcl @@ -526,7 +526,7 @@ proc do_malloc_test {tn args} { # match the expected results passed via parameter $result. # proc do_select_test {name sql result} { - uplevel [list doPassiveTest 0 $name $sql [list 0 $result]] + uplevel [list doPassiveTest 0 $name $sql [list 0 [list {*}$result]]] } proc do_restart_select_test {name sql result} { @@ -540,6 +540,12 @@ proc do_error_test {name sql error} { proc doPassiveTest {isRestart name sql catchres} { if {![info exists ::DO_MALLOC_TEST]} { set ::DO_MALLOC_TEST 1 } + if {[info exists ::testprefix] + && [string is integer [string range $name 0 0]] + } { + set name $::testprefix.$name + } + switch $::DO_MALLOC_TEST { 0 { # No malloc failures. do_test $name [list set {} [uplevel [list catchsql $sql]]] $catchres diff --git a/test/permutations.test b/test/permutations.test index fb1604ca9..95896b6d3 100644 --- a/test/permutations.test +++ b/test/permutations.test @@ -166,7 +166,7 @@ test_suite "fts3" -prefix "" -description { fts3ak.test fts3al.test fts3am.test fts3an.test fts3ao.test fts3atoken.test fts3b.test fts3c.test fts3cov.test fts3d.test fts3e.test fts3expr.test fts3expr2.test fts3near.test - fts3query.test fts3snippet.test + fts3query.test fts3snippet.test fts3defer.test } |