aboutsummaryrefslogtreecommitdiff
path: root/ext/fts3/fts3_snippet.c
diff options
context:
space:
mode:
authordan <dan@noemail.net>2015-05-11 19:01:18 +0000
committerdan <dan@noemail.net>2015-05-11 19:01:18 +0000
commit6ebf1eb2c4f77c0ceaf982705230e61c583caf29 (patch)
tree8ab045353a79911f77a40a3d3bb00d3e05a73318 /ext/fts3/fts3_snippet.c
parent8906b7dbde7bfd06dbef07309d6e76e80ae8594c (diff)
parent1c1cea8baec472eb39ae92d92e2b9b1321334f9c (diff)
downloadsqlite-6ebf1eb2c4f77c0ceaf982705230e61c583caf29.tar.gz
sqlite-6ebf1eb2c4f77c0ceaf982705230e61c583caf29.zip
Add new fts3 matchinfo option 'b'. Also optimize existing option 'y'.
FossilOrigin-Name: 2e7679a1df4020dc0166f5de8ffd664df18a3002
Diffstat (limited to 'ext/fts3/fts3_snippet.c')
-rw-r--r--ext/fts3/fts3_snippet.c313
1 files changed, 218 insertions, 95 deletions
diff --git a/ext/fts3/fts3_snippet.c b/ext/fts3/fts3_snippet.c
index 6abb169eb..646d7acd5 100644
--- a/ext/fts3/fts3_snippet.c
+++ b/ext/fts3/fts3_snippet.c
@@ -28,6 +28,7 @@
#define FTS3_MATCHINFO_LCS 's' /* nCol values */
#define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */
#define FTS3_MATCHINFO_LHITS 'y' /* nCol*nPhrase values */
+#define FTS3_MATCHINFO_LHITS_BM 'b' /* nCol*nPhrase values */
/*
** The default value for the second argument to matchinfo().
@@ -89,9 +90,22 @@ struct MatchInfo {
int nCol; /* Number of columns in table */
int nPhrase; /* Number of matchable phrases in query */
sqlite3_int64 nDoc; /* Number of docs in database */
+ char flag;
u32 *aMatchinfo; /* Pre-allocated buffer */
};
+/*
+** An instance of this structure is used to manage a pair of buffers, each
+** (nElem * sizeof(u32)) bytes in size. See the MatchinfoBuffer code below
+** for details.
+*/
+struct MatchinfoBuffer {
+ u8 aRef[3];
+ int nElem;
+ int bGlobal; /* Set if global data is loaded */
+ char *zMatchinfo;
+ u32 aMatchinfo[0];
+};
/*
@@ -107,6 +121,97 @@ struct StrBuffer {
};
+/*************************************************************************
+** Start of MatchinfoBuffer code.
+*/
+
+/*
+** Allocate a two-slot MatchinfoBuffer object.
+*/
+static MatchinfoBuffer *fts3MIBufferNew(int nElem, const char *zMatchinfo){
+ MatchinfoBuffer *pRet;
+ int nByte = sizeof(u32) * (2*nElem + 2) + sizeof(MatchinfoBuffer);
+ int nStr = strlen(zMatchinfo);
+
+ pRet = sqlite3_malloc(nByte + nStr+1);
+ if( pRet ){
+ memset(pRet, 0, nByte);
+ pRet->aMatchinfo[0] = (u8*)(&pRet->aMatchinfo[1]) - (u8*)pRet;
+ pRet->aMatchinfo[1+nElem] = pRet->aMatchinfo[0] + sizeof(u32)*(nElem+1);
+ pRet->nElem = nElem;
+ pRet->zMatchinfo = ((char*)pRet) + nByte;
+ memcpy(pRet->zMatchinfo, zMatchinfo, nStr+1);
+ pRet->aRef[0] = 1;
+ }
+
+ return pRet;
+}
+
+static void fts3MIBufferFree(void *p){
+ MatchinfoBuffer *pBuf = (MatchinfoBuffer*)((u8*)p - ((u32*)p)[-1]);
+
+ assert( (u32*)p==&pBuf->aMatchinfo[1]
+ || (u32*)p==&pBuf->aMatchinfo[pBuf->nElem+2]
+ );
+ if( (u32*)p==&pBuf->aMatchinfo[1] ){
+ pBuf->aRef[1] = 0;
+ }else{
+ pBuf->aRef[2] = 0;
+ }
+
+ if( pBuf->aRef[0]==0 && pBuf->aRef[1]==0 && pBuf->aRef[2]==0 ){
+ sqlite3_free(pBuf);
+ }
+}
+
+static void (*fts3MIBufferAlloc(MatchinfoBuffer *p, u32 **paOut))(void*){
+ void (*xRet)(void*) = 0;
+ u32 *aOut = 0;
+
+ if( p->aRef[1]==0 ){
+ p->aRef[1] = 1;
+ aOut = &p->aMatchinfo[1];
+ xRet = fts3MIBufferFree;
+ }
+ else if( p->aRef[2]==0 ){
+ p->aRef[2] = 1;
+ aOut = &p->aMatchinfo[p->nElem+2];
+ xRet = fts3MIBufferFree;
+ }else{
+ aOut = (u32*)sqlite3_malloc(p->nElem * sizeof(u32));
+ if( aOut ){
+ xRet = sqlite3_free;
+ if( p->bGlobal ) memcpy(aOut, &p->aMatchinfo[1], p->nElem*sizeof(u32));
+ }
+ }
+
+ *paOut = aOut;
+ return xRet;
+}
+
+static void fts3MIBufferSetGlobal(MatchinfoBuffer *p){
+ p->bGlobal = 1;
+ memcpy(&p->aMatchinfo[2+p->nElem], &p->aMatchinfo[1], p->nElem*sizeof(u32));
+}
+
+/*
+** Free a MatchinfoBuffer object allocated using fts3MIBufferNew()
+*/
+void sqlite3Fts3MIBufferFree(MatchinfoBuffer *p){
+ if( p ){
+ assert( p->aRef[0]==1 );
+ p->aRef[0] = 0;
+ if( p->aRef[0]==0 && p->aRef[1]==0 && p->aRef[2]==0 ){
+ sqlite3_free(p);
+ }
+ }
+}
+
+/*
+** End of MatchinfoBuffer code.
+*************************************************************************/
+
+
/*
** This function is used to help iterate through a position-list. A position
** list is a list of unique integers, sorted from smallest to largest. Each
@@ -143,7 +248,7 @@ static int fts3ExprIterate2(
void *pCtx /* Second argument to pass to callback */
){
int rc; /* Return code */
- int eType = pExpr->eType; /* Type of expression node pExpr */
+ int eType = pExpr->eType; /* Type of expression node pExpr */
if( eType!=FTSQUERY_PHRASE ){
assert( pExpr->pLeft && pExpr->pRight );
@@ -177,6 +282,7 @@ static int fts3ExprIterate(
return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx);
}
+
/*
** This is an fts3ExprIterate() callback used while loading the doclists
** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
@@ -221,8 +327,7 @@ static int fts3ExprLoadDoclists(
static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
(*(int *)ctx)++;
- UNUSED_PARAMETER(pExpr);
- UNUSED_PARAMETER(iPhrase);
+ pExpr->iPhrase = iPhrase;
return SQLITE_OK;
}
static int fts3ExprPhraseCount(Fts3Expr *pExpr){
@@ -443,7 +548,7 @@ static int fts3BestSnippet(
sIter.nSnippet = nSnippet;
sIter.nPhrase = nList;
sIter.iCurrent = -1;
- rc = fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sIter);
+ rc = fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void*)&sIter);
if( rc==SQLITE_OK ){
/* Set the *pmSeen output variable. */
@@ -745,6 +850,60 @@ static int fts3ColumnlistCount(char **ppCollist){
}
/*
+** This function gathers 'y' or 'b' data for a single phrase.
+*/
+static void fts3ExprLHits(
+ Fts3Expr *pExpr, /* Phrase expression node */
+ MatchInfo *p /* Matchinfo context */
+){
+ Fts3Table *pTab = (Fts3Table *)p->pCursor->base.pVtab;
+ int iStart;
+ Fts3Phrase *pPhrase = pExpr->pPhrase;
+ char *pIter = pPhrase->doclist.pList;
+ int iCol = 0;
+
+ assert( p->flag==FTS3_MATCHINFO_LHITS_BM || p->flag==FTS3_MATCHINFO_LHITS );
+ if( p->flag==FTS3_MATCHINFO_LHITS ){
+ iStart = pExpr->iPhrase * p->nCol;
+ }else{
+ iStart = pExpr->iPhrase * ((p->nCol + 31) / 32);
+ }
+
+ while( 1 ){
+ int nHit = fts3ColumnlistCount(&pIter);
+ if( (pPhrase->iColumn>=pTab->nColumn || pPhrase->iColumn==iCol) ){
+ if( p->flag==FTS3_MATCHINFO_LHITS ){
+ p->aMatchinfo[iStart + iCol] = (u32)nHit;
+ }else if( nHit ){
+ p->aMatchinfo[iStart + (iCol+1)/32] |= (1 << (iCol&0x1F));
+ }
+ }
+ assert( *pIter==0x00 || *pIter==0x01 );
+ if( *pIter!=0x01 ) break;
+ pIter++;
+ pIter += fts3GetVarint32(pIter, &iCol);
+ }
+}
+
+/*
+** Gather the results for matchinfo directives 'y' and 'b'.
+*/
+static void fts3ExprLHitGather(
+ Fts3Expr *pExpr,
+ MatchInfo *p
+){
+ assert( (pExpr->pLeft==0)==(pExpr->pRight==0) );
+ if( pExpr->bEof==0 && pExpr->iDocid==p->pCursor->iPrevId ){
+ if( pExpr->pLeft ){
+ fts3ExprLHitGather(pExpr->pLeft, p);
+ fts3ExprLHitGather(pExpr->pRight, p);
+ }else{
+ fts3ExprLHits(pExpr, p);
+ }
+ }
+}
+
+/*
** fts3ExprIterate() callback used to collect the "global" matchinfo stats
** for a single query.
**
@@ -810,51 +969,6 @@ static int fts3ExprLocalHitsCb(
return rc;
}
-/*
-** fts3ExprIterate() callback used to gather information for the matchinfo
-** directive 'y'.
-*/
-static int fts3ExprLHitsCb(
- Fts3Expr *pExpr, /* Phrase expression node */
- int iPhrase, /* Phrase number */
- void *pCtx /* Pointer to MatchInfo structure */
-){
- MatchInfo *p = (MatchInfo *)pCtx;
- Fts3Table *pTab = (Fts3Table *)p->pCursor->base.pVtab;
- int rc = SQLITE_OK;
- int iStart = iPhrase * p->nCol;
- Fts3Expr *pEof; /* Ancestor node already at EOF */
-
- /* This must be a phrase */
- assert( pExpr->pPhrase );
-
- /* Initialize all output integers to zero. */
- memset(&p->aMatchinfo[iStart], 0, sizeof(u32) * p->nCol);
-
- /* Check if this or any parent node is at EOF. If so, then all output
- ** values are zero. */
- for(pEof=pExpr; pEof && pEof->bEof==0; pEof=pEof->pParent);
-
- if( pEof==0 && pExpr->iDocid==p->pCursor->iPrevId ){
- Fts3Phrase *pPhrase = pExpr->pPhrase;
- char *pIter = pPhrase->doclist.pList;
- int iCol = 0;
-
- while( 1 ){
- int nHit = fts3ColumnlistCount(&pIter);
- if( (pPhrase->iColumn>=pTab->nColumn || pPhrase->iColumn==iCol) ){
- p->aMatchinfo[iStart + iCol] = (u32)nHit;
- }
- assert( *pIter==0x00 || *pIter==0x01 );
- if( *pIter!=0x01 ) break;
- pIter++;
- pIter += fts3GetVarint32(pIter, &iCol);
- }
- }
-
- return rc;
-}
-
static int fts3MatchinfoCheck(
Fts3Table *pTab,
char cArg,
@@ -868,6 +982,7 @@ static int fts3MatchinfoCheck(
|| (cArg==FTS3_MATCHINFO_LCS)
|| (cArg==FTS3_MATCHINFO_HITS)
|| (cArg==FTS3_MATCHINFO_LHITS)
+ || (cArg==FTS3_MATCHINFO_LHITS_BM)
){
return SQLITE_OK;
}
@@ -895,6 +1010,10 @@ static int fts3MatchinfoSize(MatchInfo *pInfo, char cArg){
nVal = pInfo->nCol * pInfo->nPhrase;
break;
+ case FTS3_MATCHINFO_LHITS_BM:
+ nVal = pInfo->nPhrase * ((pInfo->nCol + 31) / 32);
+ break;
+
default:
assert( cArg==FTS3_MATCHINFO_HITS );
nVal = pInfo->nCol * pInfo->nPhrase * 3;
@@ -1089,7 +1208,7 @@ static int fts3MatchinfoValues(
sqlite3_stmt *pSelect = 0;
for(i=0; rc==SQLITE_OK && zArg[i]; i++){
-
+ pInfo->flag = zArg[i];
switch( zArg[i] ){
case FTS3_MATCHINFO_NPHRASE:
if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase;
@@ -1149,9 +1268,13 @@ static int fts3MatchinfoValues(
}
break;
- case FTS3_MATCHINFO_LHITS:
- (void)fts3ExprIterate(pCsr->pExpr, fts3ExprLHitsCb, (void*)pInfo);
+ case FTS3_MATCHINFO_LHITS_BM:
+ case FTS3_MATCHINFO_LHITS: {
+ int nZero = fts3MatchinfoSize(pInfo, zArg[i]) * sizeof(u32);
+ memset(pInfo->aMatchinfo, 0, nZero);
+ fts3ExprLHitGather(pCsr->pExpr, pInfo);
break;
+ }
default: {
Fts3Expr *pExpr;
@@ -1184,7 +1307,8 @@ static int fts3MatchinfoValues(
** Populate pCsr->aMatchinfo[] with data for the current row. The
** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32).
*/
-static int fts3GetMatchinfo(
+static void fts3GetMatchinfo(
+ sqlite3_context *pCtx, /* Return results here */
Fts3Cursor *pCsr, /* FTS3 Cursor object */
const char *zArg /* Second argument to matchinfo() function */
){
@@ -1193,6 +1317,9 @@ static int fts3GetMatchinfo(
int rc = SQLITE_OK;
int bGlobal = 0; /* Collect 'global' stats as well as local */
+ u32 *aOut = 0;
+ void (*xDestroyOut)(void*) = 0;
+
memset(&sInfo, 0, sizeof(MatchInfo));
sInfo.pCursor = pCsr;
sInfo.nCol = pTab->nColumn;
@@ -1200,21 +1327,18 @@ static int fts3GetMatchinfo(
/* If there is cached matchinfo() data, but the format string for the
** cache does not match the format string for this request, discard
** the cached data. */
- if( pCsr->zMatchinfo && strcmp(pCsr->zMatchinfo, zArg) ){
- assert( pCsr->aMatchinfo );
- sqlite3_free(pCsr->aMatchinfo);
- pCsr->zMatchinfo = 0;
- pCsr->aMatchinfo = 0;
+ if( pCsr->pMIBuffer && strcmp(pCsr->pMIBuffer->zMatchinfo, zArg) ){
+ sqlite3Fts3MIBufferFree(pCsr->pMIBuffer);
+ pCsr->pMIBuffer = 0;
}
- /* If Fts3Cursor.aMatchinfo[] is NULL, then this is the first time the
+ /* If Fts3Cursor.pMIBuffer is NULL, then this is the first time the
** matchinfo function has been called for this query. In this case
** allocate the array used to accumulate the matchinfo data and
** initialize those elements that are constant for every row.
*/
- if( pCsr->aMatchinfo==0 ){
+ if( pCsr->pMIBuffer==0 ){
int nMatchinfo = 0; /* Number of u32 elements in match-info */
- int nArg; /* Bytes in zArg */
int i; /* Used to iterate through zArg */
/* Determine the number of phrases in the query */
@@ -1223,30 +1347,46 @@ static int fts3GetMatchinfo(
/* Determine the number of integers in the buffer returned by this call. */
for(i=0; zArg[i]; i++){
+ char *zErr = 0;
+ if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){
+ sqlite3_result_error(pCtx, zErr, -1);
+ sqlite3_free(zErr);
+ return;
+ }
nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]);
}
/* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */
- nArg = (int)strlen(zArg);
- pCsr->aMatchinfo = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo + nArg + 1);
- if( !pCsr->aMatchinfo ) return SQLITE_NOMEM;
-
- pCsr->zMatchinfo = (char *)&pCsr->aMatchinfo[nMatchinfo];
- pCsr->nMatchinfo = nMatchinfo;
- memcpy(pCsr->zMatchinfo, zArg, nArg+1);
- memset(pCsr->aMatchinfo, 0, sizeof(u32)*nMatchinfo);
+ pCsr->pMIBuffer = fts3MIBufferNew(nMatchinfo, zArg);
+ if( !pCsr->pMIBuffer ) rc = SQLITE_NOMEM;
+
pCsr->isMatchinfoNeeded = 1;
bGlobal = 1;
}
- sInfo.aMatchinfo = pCsr->aMatchinfo;
- sInfo.nPhrase = pCsr->nPhrase;
- if( pCsr->isMatchinfoNeeded ){
+ if( rc==SQLITE_OK ){
+ xDestroyOut = fts3MIBufferAlloc(pCsr->pMIBuffer, &aOut);
+ if( xDestroyOut==0 ){
+ rc = SQLITE_NOMEM;
+ }
+ }
+
+ if( rc==SQLITE_OK ){
+ sInfo.aMatchinfo = aOut;
+ sInfo.nPhrase = pCsr->nPhrase;
rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg);
- pCsr->isMatchinfoNeeded = 0;
+ if( bGlobal ){
+ fts3MIBufferSetGlobal(pCsr->pMIBuffer);
+ }
}
- return rc;
+ if( rc!=SQLITE_OK ){
+ sqlite3_result_error_code(pCtx, rc);
+ if( xDestroyOut ) xDestroyOut(aOut);
+ }else{
+ int n = pCsr->pMIBuffer->nElem * sizeof(u32);
+ sqlite3_result_blob(pCtx, aOut, n, xDestroyOut);
+ }
}
/*
@@ -1452,7 +1592,7 @@ void sqlite3Fts3Offsets(
*/
sCtx.iCol = iCol;
sCtx.iTerm = 0;
- (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void *)&sCtx);
+ (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void*)&sCtx);
/* Retreive the text stored in column iCol. If an SQL NULL is stored
** in column iCol, jump immediately to the next iteration of the loop.
@@ -1544,19 +1684,9 @@ void sqlite3Fts3Matchinfo(
const char *zArg /* Second arg to matchinfo() function */
){
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
- int rc;
- int i;
const char *zFormat;
if( zArg ){
- for(i=0; zArg[i]; i++){
- char *zErr = 0;
- if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){
- sqlite3_result_error(pContext, zErr, -1);
- sqlite3_free(zErr);
- return;
- }
- }
zFormat = zArg;
}else{
zFormat = FTS3_MATCHINFO_DEFAULT;
@@ -1565,17 +1695,10 @@ void sqlite3Fts3Matchinfo(
if( !pCsr->pExpr ){
sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC);
return;
- }
-
- /* Retrieve matchinfo() data. */
- rc = fts3GetMatchinfo(pCsr, zFormat);
- sqlite3Fts3SegmentsClose(pTab);
-
- if( rc!=SQLITE_OK ){
- sqlite3_result_error_code(pContext, rc);
}else{
- int n = pCsr->nMatchinfo * sizeof(u32);
- sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT);
+ /* Retrieve matchinfo() data. */
+ fts3GetMatchinfo(pContext, pCsr, zFormat);
+ sqlite3Fts3SegmentsClose(pTab);
}
}