aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordrh <>2021-12-01 16:31:02 +0000
committerdrh <>2021-12-01 16:31:02 +0000
commit2db144c33b39985b159ec9210cc13bdb53c00e1c (patch)
tree317c41dcf05b1f3c7ebf99fcb76380b4d1ede8f9 /src
parentc1085ea412b5c78d58cad59273d71f44d39843c5 (diff)
downloadsqlite-2db144c33b39985b159ec9210cc13bdb53c00e1c.tar.gz
sqlite-2db144c33b39985b159ec9210cc13bdb53c00e1c.zip
Add a Bloom filter to the automatic-index mechanism.
FossilOrigin-Name: 50ac4de1d7cbb586ea7969e1ae80ea8b021e194edc2fa7db19374b4ee9369bee
Diffstat (limited to 'src')
-rw-r--r--src/sqliteInt.h1
-rw-r--r--src/vdbe.c106
-rw-r--r--src/vdbeInt.h2
-rw-r--r--src/vdbemem.c4
-rw-r--r--src/where.c8
-rw-r--r--src/whereInt.h1
-rw-r--r--src/wherecode.c8
7 files changed, 127 insertions, 3 deletions
diff --git a/src/sqliteInt.h b/src/sqliteInt.h
index b45151104..d404b7b4f 100644
--- a/src/sqliteInt.h
+++ b/src/sqliteInt.h
@@ -1761,6 +1761,7 @@ struct sqlite3 {
#define SQLITE_SeekScan 0x00020000 /* The OP_SeekScan optimization */
#define SQLITE_OmitOrderBy 0x00040000 /* Omit pointless ORDER BY */
/* TH3 expects this value ^^^^^^^^^^ to be 0x40000. Coordinate any change */
+#define SQLITE_BloomFilter 0x00080000 /* Use a Bloom filter on searches */
#define SQLITE_AllOpts 0xffffffff /* All optimizations */
/*
diff --git a/src/vdbe.c b/src/vdbe.c
index 3476c02da..e38527356 100644
--- a/src/vdbe.c
+++ b/src/vdbe.c
@@ -672,6 +672,35 @@ static Mem *out2Prerelease(Vdbe *p, VdbeOp *pOp){
}
/*
+** Default size of a bloom filter, in bytes
+*/
+#define SQLITE_BLOOM_SZ 10000
+
+/*
+** Compute a bloom filter hash using pOp->p4.i registers from aMem[] beginning
+** with pOp->p3. Return the hash.
+*/
+static unsigned int filterHash(const Mem *aMem, const Op *pOp){
+ int i, mx;
+ u32 h = 0;
+
+ i = pOp->p3;
+ assert( pOp->p4type==P4_INT32 );
+ mx = i + pOp->p4.i;
+ for(i=pOp->p3, mx=i+pOp->p4.i; i<mx; i++){
+ const Mem *p = &aMem[i];
+ if( p->flags & (MEM_Int|MEM_IntReal) ){
+ h += (u32)(p->u.i&0xffffffff);
+ }else if( p->flags & MEM_Real ){
+ h += (u32)(sqlite3VdbeIntValue(p)&0xffffffff);
+ }else if( p->flags & (MEM_Str|MEM_Blob) ){
+ h += p->n;
+ }
+ }
+ return h % (SQLITE_BLOOM_SZ*8);
+}
+
+/*
** Return the symbolic name for the data type of a pMem
*/
static const char *vdbeMemTypeName(Mem *pMem){
@@ -8129,6 +8158,83 @@ case OP_Function: { /* group */
break;
}
+/* Opcode: FilterInit P1 * * * *
+** Synopsis: filter(P1) = empty
+**
+** Initialize register P1 so that is an empty bloom filter.
+*/
+case OP_FilterInit: {
+ assert( pOp->p1>0 && pOp->p1<=(p->nMem+1 - p->nCursor) );
+ pIn1 = &aMem[pOp->p1];
+ sqlite3VdbeMemSetZeroBlob(pIn1, SQLITE_BLOOM_SZ);
+ if( sqlite3VdbeMemExpandBlob(pIn1) ) goto no_mem;
+ break;
+}
+
+/* Opcode: FilterAdd P1 * P3 P4 *
+** Synopsis: filter(P1) += key(P3@P4)
+**
+** Compute a hash on the P4 registers starting with r[P3] and
+** add that hash to the bloom filter contained in r[P1].
+*/
+case OP_FilterAdd: {
+ u32 h;
+
+ assert( pOp->p1>0 && pOp->p1<=(p->nMem+1 - p->nCursor) );
+ pIn1 = &aMem[pOp->p1];
+ assert( pIn1->flags & MEM_Blob );
+ assert( pIn1->n==SQLITE_BLOOM_SZ );
+ h = filterHash(aMem, pOp);
+#ifdef SQLITE_DEBUG
+ if( db->flags&SQLITE_VdbeTrace ){
+ int ii;
+ for(ii=pOp->p3; ii<pOp->p3+pOp->p4.i; ii++){
+ registerTrace(ii, &aMem[ii]);
+ }
+ printf("hash = %u\n", h);
+ }
+#endif
+ assert( h>=0 && h<SQLITE_BLOOM_SZ*8 );
+ pIn1->z[h/8] |= 1<<(h&7);
+ break;
+}
+
+/* Opcode: Filter P1 P2 P3 P4 *
+** Synopsis: if key(P3@P4) not in filter(P1) goto P2
+**
+** Compute a hash on the key contained in the P4 registers starting
+** with r[P3]. Check to see if that hash is found in the
+** bloom filter hosted by register P1. If it is not present then
+** maybe jump to P2. Otherwise fall through.
+**
+** False negatives are harmless. It is always safe to fall through,
+** even if the value is in the bloom filter. A false negative causes
+** more CPU cycles to be used, but it should still yield the correct
+** answer. However, an incorrect answer may well arise from a
+** false positive - if the jump is taken when it should fall through.
+*/
+case OP_Filter: { /* jump */
+ u32 h;
+
+ assert( pOp->p1>0 && pOp->p1<=(p->nMem+1 - p->nCursor) );
+ pIn1 = &aMem[pOp->p1];
+ assert( pIn1->flags & MEM_Blob );
+ assert( pIn1->n==SQLITE_BLOOM_SZ );
+ h = filterHash(aMem, pOp);
+#ifdef SQLITE_DEBUG
+ if( db->flags&SQLITE_VdbeTrace ){
+ int ii;
+ for(ii=pOp->p3; ii<pOp->p3+pOp->p4.i; ii++){
+ registerTrace(ii, &aMem[ii]);
+ }
+ printf("hash = %u\n", h);
+ }
+#endif
+ assert( h>=0 && h<SQLITE_BLOOM_SZ*8 );
+ if( (pIn1->z[h/8] & (1<<(h&7)))==0 ) goto jump_to_p2;
+ break;
+}
+
/* Opcode: Trace P1 P2 * P4 *
**
** Write P4 on the statement trace output if statement tracing is
diff --git a/src/vdbeInt.h b/src/vdbeInt.h
index 599d06416..c76cdbfdb 100644
--- a/src/vdbeInt.h
+++ b/src/vdbeInt.h
@@ -538,7 +538,7 @@ int sqlite3VdbeMemSetRowSet(Mem*);
int sqlite3VdbeMemMakeWriteable(Mem*);
int sqlite3VdbeMemStringify(Mem*, u8, u8);
int sqlite3IntFloatCompare(i64,double);
-i64 sqlite3VdbeIntValue(Mem*);
+i64 sqlite3VdbeIntValue(const Mem*);
int sqlite3VdbeMemIntegerify(Mem*);
double sqlite3VdbeRealValue(Mem*);
int sqlite3VdbeBooleanValue(Mem*, int ifNull);
diff --git a/src/vdbemem.c b/src/vdbemem.c
index 570a2eb38..5a9d15f46 100644
--- a/src/vdbemem.c
+++ b/src/vdbemem.c
@@ -596,12 +596,12 @@ static SQLITE_NOINLINE i64 doubleToInt64(double r){
**
** If pMem represents a string value, its encoding might be changed.
*/
-static SQLITE_NOINLINE i64 memIntValue(Mem *pMem){
+static SQLITE_NOINLINE i64 memIntValue(const Mem *pMem){
i64 value = 0;
sqlite3Atoi64(pMem->z, &value, pMem->n, pMem->enc);
return value;
}
-i64 sqlite3VdbeIntValue(Mem *pMem){
+i64 sqlite3VdbeIntValue(const Mem *pMem){
int flags;
assert( pMem!=0 );
assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) );
diff --git a/src/where.c b/src/where.c
index 54e4ea819..eaa45b019 100644
--- a/src/where.c
+++ b/src/where.c
@@ -904,6 +904,10 @@ static void constructAutomaticIndex(
sqlite3VdbeAddOp2(v, OP_OpenAutoindex, pLevel->iIdxCur, nKeyCol+1);
sqlite3VdbeSetP4KeyInfo(pParse, pIdx);
VdbeComment((v, "for %s", pTable->zName));
+ if( OptimizationEnabled(pParse->db, SQLITE_BloomFilter) ){
+ pLevel->regFilter = ++pParse->nMem;
+ sqlite3VdbeAddOp1(v, OP_FilterInit, pLevel->regFilter);
+ }
/* Fill the automatic index with content */
pTabItem = &pWC->pWInfo->pTabList->a[pLevel->iFrom];
@@ -926,6 +930,10 @@ static void constructAutomaticIndex(
regBase = sqlite3GenerateIndexKey(
pParse, pIdx, pLevel->iTabCur, regRecord, 0, 0, 0, 0
);
+ if( pLevel->regFilter ){
+ sqlite3VdbeAddOp4Int(v, OP_FilterAdd, pLevel->regFilter, 0,
+ regBase, pLoop->u.btree.nEq);
+ }
sqlite3VdbeAddOp2(v, OP_IdxInsert, pLevel->iIdxCur, regRecord);
sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT);
if( pPartial ) sqlite3VdbeResolveLabel(v, iContinue);
diff --git a/src/whereInt.h b/src/whereInt.h
index f651e790c..5b076e40e 100644
--- a/src/whereInt.h
+++ b/src/whereInt.h
@@ -64,6 +64,7 @@ struct WhereLevel {
u32 iLikeRepCntr; /* LIKE range processing counter register (times 2) */
int addrLikeRep; /* LIKE range processing address */
#endif
+ int regFilter; /* Bloom filter */
u8 iFrom; /* Which entry in the FROM clause */
u8 op, p3, p5; /* Opcode, P3 & P5 of the opcode that ends the loop */
int p1, p2; /* Operands of the opcode used to end the loop */
diff --git a/src/wherecode.c b/src/wherecode.c
index 460ac4fe3..637db432e 100644
--- a/src/wherecode.c
+++ b/src/wherecode.c
@@ -1511,6 +1511,10 @@ Bitmask sqlite3WhereCodeOneLoopStart(
iRowidReg = codeEqualityTerm(pParse, pTerm, pLevel, 0, bRev, iReleaseReg);
if( iRowidReg!=iReleaseReg ) sqlite3ReleaseTempReg(pParse, iReleaseReg);
addrNxt = pLevel->addrNxt;
+ if( pLevel->regFilter ){
+ sqlite3VdbeAddOp4Int(v, OP_Filter, pLevel->regFilter, addrNxt,
+ iRowidReg, 1);
+ }
sqlite3VdbeAddOp3(v, OP_SeekRowid, iCur, addrNxt, iRowidReg);
VdbeCoverage(v);
pLevel->op = OP_Noop;
@@ -1836,6 +1840,10 @@ Bitmask sqlite3WhereCodeOneLoopStart(
sqlite3VdbeAddOp2(v, OP_Integer, 1, regBignull);
VdbeComment((v, "NULL-scan pass ctr"));
}
+ if( pLevel->regFilter ){
+ sqlite3VdbeAddOp4Int(v, OP_Filter, pLevel->regFilter, addrNxt,
+ regBase, nConstraint);
+ }
op = aStartOp[(start_constraints<<2) + (startEq<<1) + bRev];
assert( op!=0 );