diff options
author | drh <> | 2021-12-01 16:31:02 +0000 |
---|---|---|
committer | drh <> | 2021-12-01 16:31:02 +0000 |
commit | 2db144c33b39985b159ec9210cc13bdb53c00e1c (patch) | |
tree | 317c41dcf05b1f3c7ebf99fcb76380b4d1ede8f9 /src | |
parent | c1085ea412b5c78d58cad59273d71f44d39843c5 (diff) | |
download | sqlite-2db144c33b39985b159ec9210cc13bdb53c00e1c.tar.gz sqlite-2db144c33b39985b159ec9210cc13bdb53c00e1c.zip |
Add a Bloom filter to the automatic-index mechanism.
FossilOrigin-Name: 50ac4de1d7cbb586ea7969e1ae80ea8b021e194edc2fa7db19374b4ee9369bee
Diffstat (limited to 'src')
-rw-r--r-- | src/sqliteInt.h | 1 | ||||
-rw-r--r-- | src/vdbe.c | 106 | ||||
-rw-r--r-- | src/vdbeInt.h | 2 | ||||
-rw-r--r-- | src/vdbemem.c | 4 | ||||
-rw-r--r-- | src/where.c | 8 | ||||
-rw-r--r-- | src/whereInt.h | 1 | ||||
-rw-r--r-- | src/wherecode.c | 8 |
7 files changed, 127 insertions, 3 deletions
diff --git a/src/sqliteInt.h b/src/sqliteInt.h index b45151104..d404b7b4f 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -1761,6 +1761,7 @@ struct sqlite3 { #define SQLITE_SeekScan 0x00020000 /* The OP_SeekScan optimization */ #define SQLITE_OmitOrderBy 0x00040000 /* Omit pointless ORDER BY */ /* TH3 expects this value ^^^^^^^^^^ to be 0x40000. Coordinate any change */ +#define SQLITE_BloomFilter 0x00080000 /* Use a Bloom filter on searches */ #define SQLITE_AllOpts 0xffffffff /* All optimizations */ /* diff --git a/src/vdbe.c b/src/vdbe.c index 3476c02da..e38527356 100644 --- a/src/vdbe.c +++ b/src/vdbe.c @@ -672,6 +672,35 @@ static Mem *out2Prerelease(Vdbe *p, VdbeOp *pOp){ } /* +** Default size of a bloom filter, in bytes +*/ +#define SQLITE_BLOOM_SZ 10000 + +/* +** Compute a bloom filter hash using pOp->p4.i registers from aMem[] beginning +** with pOp->p3. Return the hash. +*/ +static unsigned int filterHash(const Mem *aMem, const Op *pOp){ + int i, mx; + u32 h = 0; + + i = pOp->p3; + assert( pOp->p4type==P4_INT32 ); + mx = i + pOp->p4.i; + for(i=pOp->p3, mx=i+pOp->p4.i; i<mx; i++){ + const Mem *p = &aMem[i]; + if( p->flags & (MEM_Int|MEM_IntReal) ){ + h += (u32)(p->u.i&0xffffffff); + }else if( p->flags & MEM_Real ){ + h += (u32)(sqlite3VdbeIntValue(p)&0xffffffff); + }else if( p->flags & (MEM_Str|MEM_Blob) ){ + h += p->n; + } + } + return h % (SQLITE_BLOOM_SZ*8); +} + +/* ** Return the symbolic name for the data type of a pMem */ static const char *vdbeMemTypeName(Mem *pMem){ @@ -8129,6 +8158,83 @@ case OP_Function: { /* group */ break; } +/* Opcode: FilterInit P1 * * * * +** Synopsis: filter(P1) = empty +** +** Initialize register P1 so that is an empty bloom filter. +*/ +case OP_FilterInit: { + assert( pOp->p1>0 && pOp->p1<=(p->nMem+1 - p->nCursor) ); + pIn1 = &aMem[pOp->p1]; + sqlite3VdbeMemSetZeroBlob(pIn1, SQLITE_BLOOM_SZ); + if( sqlite3VdbeMemExpandBlob(pIn1) ) goto no_mem; + break; +} + +/* Opcode: FilterAdd P1 * P3 P4 * +** Synopsis: filter(P1) += key(P3@P4) +** +** Compute a hash on the P4 registers starting with r[P3] and +** add that hash to the bloom filter contained in r[P1]. +*/ +case OP_FilterAdd: { + u32 h; + + assert( pOp->p1>0 && pOp->p1<=(p->nMem+1 - p->nCursor) ); + pIn1 = &aMem[pOp->p1]; + assert( pIn1->flags & MEM_Blob ); + assert( pIn1->n==SQLITE_BLOOM_SZ ); + h = filterHash(aMem, pOp); +#ifdef SQLITE_DEBUG + if( db->flags&SQLITE_VdbeTrace ){ + int ii; + for(ii=pOp->p3; ii<pOp->p3+pOp->p4.i; ii++){ + registerTrace(ii, &aMem[ii]); + } + printf("hash = %u\n", h); + } +#endif + assert( h>=0 && h<SQLITE_BLOOM_SZ*8 ); + pIn1->z[h/8] |= 1<<(h&7); + break; +} + +/* Opcode: Filter P1 P2 P3 P4 * +** Synopsis: if key(P3@P4) not in filter(P1) goto P2 +** +** Compute a hash on the key contained in the P4 registers starting +** with r[P3]. Check to see if that hash is found in the +** bloom filter hosted by register P1. If it is not present then +** maybe jump to P2. Otherwise fall through. +** +** False negatives are harmless. It is always safe to fall through, +** even if the value is in the bloom filter. A false negative causes +** more CPU cycles to be used, but it should still yield the correct +** answer. However, an incorrect answer may well arise from a +** false positive - if the jump is taken when it should fall through. +*/ +case OP_Filter: { /* jump */ + u32 h; + + assert( pOp->p1>0 && pOp->p1<=(p->nMem+1 - p->nCursor) ); + pIn1 = &aMem[pOp->p1]; + assert( pIn1->flags & MEM_Blob ); + assert( pIn1->n==SQLITE_BLOOM_SZ ); + h = filterHash(aMem, pOp); +#ifdef SQLITE_DEBUG + if( db->flags&SQLITE_VdbeTrace ){ + int ii; + for(ii=pOp->p3; ii<pOp->p3+pOp->p4.i; ii++){ + registerTrace(ii, &aMem[ii]); + } + printf("hash = %u\n", h); + } +#endif + assert( h>=0 && h<SQLITE_BLOOM_SZ*8 ); + if( (pIn1->z[h/8] & (1<<(h&7)))==0 ) goto jump_to_p2; + break; +} + /* Opcode: Trace P1 P2 * P4 * ** ** Write P4 on the statement trace output if statement tracing is diff --git a/src/vdbeInt.h b/src/vdbeInt.h index 599d06416..c76cdbfdb 100644 --- a/src/vdbeInt.h +++ b/src/vdbeInt.h @@ -538,7 +538,7 @@ int sqlite3VdbeMemSetRowSet(Mem*); int sqlite3VdbeMemMakeWriteable(Mem*); int sqlite3VdbeMemStringify(Mem*, u8, u8); int sqlite3IntFloatCompare(i64,double); -i64 sqlite3VdbeIntValue(Mem*); +i64 sqlite3VdbeIntValue(const Mem*); int sqlite3VdbeMemIntegerify(Mem*); double sqlite3VdbeRealValue(Mem*); int sqlite3VdbeBooleanValue(Mem*, int ifNull); diff --git a/src/vdbemem.c b/src/vdbemem.c index 570a2eb38..5a9d15f46 100644 --- a/src/vdbemem.c +++ b/src/vdbemem.c @@ -596,12 +596,12 @@ static SQLITE_NOINLINE i64 doubleToInt64(double r){ ** ** If pMem represents a string value, its encoding might be changed. */ -static SQLITE_NOINLINE i64 memIntValue(Mem *pMem){ +static SQLITE_NOINLINE i64 memIntValue(const Mem *pMem){ i64 value = 0; sqlite3Atoi64(pMem->z, &value, pMem->n, pMem->enc); return value; } -i64 sqlite3VdbeIntValue(Mem *pMem){ +i64 sqlite3VdbeIntValue(const Mem *pMem){ int flags; assert( pMem!=0 ); assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) ); diff --git a/src/where.c b/src/where.c index 54e4ea819..eaa45b019 100644 --- a/src/where.c +++ b/src/where.c @@ -904,6 +904,10 @@ static void constructAutomaticIndex( sqlite3VdbeAddOp2(v, OP_OpenAutoindex, pLevel->iIdxCur, nKeyCol+1); sqlite3VdbeSetP4KeyInfo(pParse, pIdx); VdbeComment((v, "for %s", pTable->zName)); + if( OptimizationEnabled(pParse->db, SQLITE_BloomFilter) ){ + pLevel->regFilter = ++pParse->nMem; + sqlite3VdbeAddOp1(v, OP_FilterInit, pLevel->regFilter); + } /* Fill the automatic index with content */ pTabItem = &pWC->pWInfo->pTabList->a[pLevel->iFrom]; @@ -926,6 +930,10 @@ static void constructAutomaticIndex( regBase = sqlite3GenerateIndexKey( pParse, pIdx, pLevel->iTabCur, regRecord, 0, 0, 0, 0 ); + if( pLevel->regFilter ){ + sqlite3VdbeAddOp4Int(v, OP_FilterAdd, pLevel->regFilter, 0, + regBase, pLoop->u.btree.nEq); + } sqlite3VdbeAddOp2(v, OP_IdxInsert, pLevel->iIdxCur, regRecord); sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT); if( pPartial ) sqlite3VdbeResolveLabel(v, iContinue); diff --git a/src/whereInt.h b/src/whereInt.h index f651e790c..5b076e40e 100644 --- a/src/whereInt.h +++ b/src/whereInt.h @@ -64,6 +64,7 @@ struct WhereLevel { u32 iLikeRepCntr; /* LIKE range processing counter register (times 2) */ int addrLikeRep; /* LIKE range processing address */ #endif + int regFilter; /* Bloom filter */ u8 iFrom; /* Which entry in the FROM clause */ u8 op, p3, p5; /* Opcode, P3 & P5 of the opcode that ends the loop */ int p1, p2; /* Operands of the opcode used to end the loop */ diff --git a/src/wherecode.c b/src/wherecode.c index 460ac4fe3..637db432e 100644 --- a/src/wherecode.c +++ b/src/wherecode.c @@ -1511,6 +1511,10 @@ Bitmask sqlite3WhereCodeOneLoopStart( iRowidReg = codeEqualityTerm(pParse, pTerm, pLevel, 0, bRev, iReleaseReg); if( iRowidReg!=iReleaseReg ) sqlite3ReleaseTempReg(pParse, iReleaseReg); addrNxt = pLevel->addrNxt; + if( pLevel->regFilter ){ + sqlite3VdbeAddOp4Int(v, OP_Filter, pLevel->regFilter, addrNxt, + iRowidReg, 1); + } sqlite3VdbeAddOp3(v, OP_SeekRowid, iCur, addrNxt, iRowidReg); VdbeCoverage(v); pLevel->op = OP_Noop; @@ -1836,6 +1840,10 @@ Bitmask sqlite3WhereCodeOneLoopStart( sqlite3VdbeAddOp2(v, OP_Integer, 1, regBignull); VdbeComment((v, "NULL-scan pass ctr")); } + if( pLevel->regFilter ){ + sqlite3VdbeAddOp4Int(v, OP_Filter, pLevel->regFilter, addrNxt, + regBase, nConstraint); + } op = aStartOp[(start_constraints<<2) + (startEq<<1) + bRev]; assert( op!=0 ); |