diff options
Diffstat (limited to 'src/where.c')
-rw-r--r-- | src/where.c | 293 |
1 files changed, 247 insertions, 46 deletions
diff --git a/src/where.c b/src/where.c index e95fd57f3..58e35384b 100644 --- a/src/where.c +++ b/src/where.c @@ -772,6 +772,79 @@ static void markTermAsChild(WhereClause *pWC, int iChild, int iParent){ pWC->a[iParent].nChild++; } +/* +** Return the N-th AND-connected subterm of pTerm. Or if pTerm is not +** a conjunction, then return just pTerm when N==0. If N is exceeds +** the number of available subterms, return NULL. +*/ +static WhereTerm *whereNthSubterm(WhereTerm *pTerm, int N){ + if( pTerm->eOperator!=WO_AND ){ + return N==0 ? pTerm : 0; + } + if( N<pTerm->u.pAndInfo->wc.nTerm ){ + return &pTerm->u.pAndInfo->wc.a[N]; + } + return 0; +} + +/* +** Subterms pOne and pTwo are contained within WHERE clause pWC. The +** two subterms are in disjunction - they are OR-ed together. +** +** If these two terms are both of the form: "A op B" with the same +** A and B values but different operators and if the operators are +** compatible (if one is = and the other is <, for example) then +** add a new virtual AND term to pWC that is the combination of the +** two. +** +** Some examples: +** +** x<y OR x=y --> x<=y +** x=y OR x=y --> x=y +** x<=y OR x<y --> x<=y +** +** The following is NOT generated: +** +** x<y OR x>y --> x!=y +*/ +static void whereCombineDisjuncts( + SrcList *pSrc, /* the FROM clause */ + WhereClause *pWC, /* The complete WHERE clause */ + WhereTerm *pOne, /* First disjunct */ + WhereTerm *pTwo /* Second disjunct */ +){ + u16 eOp = pOne->eOperator | pTwo->eOperator; + sqlite3 *db; /* Database connection (for malloc) */ + Expr *pNew; /* New virtual expression */ + int op; /* Operator for the combined expression */ + int idxNew; /* Index in pWC of the next virtual term */ + + if( (pOne->eOperator & (WO_EQ|WO_LT|WO_LE|WO_GT|WO_GE))==0 ) return; + if( (pTwo->eOperator & (WO_EQ|WO_LT|WO_LE|WO_GT|WO_GE))==0 ) return; + if( (eOp & (WO_EQ|WO_LT|WO_LE))!=eOp + && (eOp & (WO_EQ|WO_GT|WO_GE))!=eOp ) return; + assert( pOne->pExpr->pLeft!=0 && pOne->pExpr->pRight!=0 ); + assert( pTwo->pExpr->pLeft!=0 && pTwo->pExpr->pRight!=0 ); + if( sqlite3ExprCompare(pOne->pExpr->pLeft, pTwo->pExpr->pLeft, -1) ) return; + if( sqlite3ExprCompare(pOne->pExpr->pRight, pTwo->pExpr->pRight, -1) )return; + /* If we reach this point, it means the two subterms can be combined */ + if( (eOp & (eOp-1))!=0 ){ + if( eOp & (WO_LT|WO_LE) ){ + eOp = WO_LE; + }else{ + assert( eOp & (WO_GT|WO_GE) ); + eOp = WO_GE; + } + } + db = pWC->pWInfo->pParse->db; + pNew = sqlite3ExprDup(db, pOne->pExpr, 0); + if( pNew==0 ) return; + for(op=TK_EQ; eOp!=(WO_EQ<<(op-TK_EQ)); op++){ assert( op<TK_GE ); } + pNew->op = op; + idxNew = whereClauseInsert(pWC, pNew, TERM_VIRTUAL|TERM_DYNAMIC); + exprAnalyze(pSrc, pWC, idxNew); +} + #if !defined(SQLITE_OMIT_OR_OPTIMIZATION) && !defined(SQLITE_OMIT_SUBQUERY) /* ** Analyze a term that consists of two or more OR-connected @@ -796,6 +869,7 @@ static void markTermAsChild(WhereClause *pWC, int iChild, int iParent){ ** (C) t1.x=t2.y OR (t1.x=t2.z AND t1.y=15) ** (D) x=expr1 OR (y>11 AND y<22 AND z LIKE '*hello*') ** (E) (p.a=1 AND q.b=2 AND r.c=3) OR (p.x=4 AND q.y=5 AND r.z=6) +** (F) x>A OR (x=A AND y>=B) ** ** CASE 1: ** @@ -812,6 +886,16 @@ static void markTermAsChild(WhereClause *pWC, int iChild, int iParent){ ** ** CASE 2: ** +** If there are exactly two disjuncts one side has x>A and the other side +** has x=A (for the same x and A) then add a new virtual conjunct term to the +** WHERE clause of the form "x>=A". Example: +** +** x>A OR (x=A AND y>B) adds: x>=A +** +** The added conjunct can sometimes be helpful in query planning. +** +** CASE 3: +** ** If all subterms are indexable by a single table T, then set ** ** WhereTerm.eOperator = WO_OR @@ -938,12 +1022,26 @@ static void exprAnalyzeOrTerm( } /* - ** Record the set of tables that satisfy case 2. The set might be + ** Record the set of tables that satisfy case 3. The set might be ** empty. */ pOrInfo->indexable = indexable; pTerm->eOperator = indexable==0 ? 0 : WO_OR; + /* For a two-way OR, attempt to implementation case 2. + */ + if( indexable && pOrWc->nTerm==2 ){ + int iOne = 0; + WhereTerm *pOne; + while( (pOne = whereNthSubterm(&pOrWc->a[0],iOne++))!=0 ){ + int iTwo = 0; + WhereTerm *pTwo; + while( (pTwo = whereNthSubterm(&pOrWc->a[1],iTwo++))!=0 ){ + whereCombineDisjuncts(pSrc, pWC, pOne, pTwo); + } + } + } + /* ** chngToIN holds a set of tables that *might* satisfy case 1. But ** we have to do some additional checking to see if case 1 really @@ -1073,7 +1171,7 @@ static void exprAnalyzeOrTerm( }else{ sqlite3ExprListDelete(db, pList); } - pTerm->eOperator = WO_NOOP; /* case 1 trumps case 2 */ + pTerm->eOperator = WO_NOOP; /* case 1 trumps case 3 */ } } } @@ -1268,7 +1366,7 @@ static void exprAnalyze( Expr *pNewExpr2; int idxNew1; int idxNew2; - Token sCollSeqName; /* Name of collating sequence */ + const char *zCollSeqName; /* Name of collating sequence */ const u16 wtFlags = TERM_LIKEOPT | TERM_VIRTUAL | TERM_DYNAMIC; pLeft = pExpr->x.pList->a[1].pExpr; @@ -1304,11 +1402,10 @@ static void exprAnalyze( } *pC = c + 1; } - sCollSeqName.z = noCase ? "NOCASE" : "BINARY"; - sCollSeqName.n = 6; + zCollSeqName = noCase ? "NOCASE" : "BINARY"; pNewExpr1 = sqlite3ExprDup(db, pLeft, 0); pNewExpr1 = sqlite3PExpr(pParse, TK_GE, - sqlite3ExprAddCollateToken(pParse,pNewExpr1,&sCollSeqName), + sqlite3ExprAddCollateString(pParse,pNewExpr1,zCollSeqName), pStr1, 0); transferJoinMarkings(pNewExpr1, pExpr); idxNew1 = whereClauseInsert(pWC, pNewExpr1, wtFlags); @@ -1316,7 +1413,7 @@ static void exprAnalyze( exprAnalyze(pSrc, pWC, idxNew1); pNewExpr2 = sqlite3ExprDup(db, pLeft, 0); pNewExpr2 = sqlite3PExpr(pParse, TK_LT, - sqlite3ExprAddCollateToken(pParse,pNewExpr2,&sCollSeqName), + sqlite3ExprAddCollateString(pParse,pNewExpr2,zCollSeqName), pStr2, 0); transferJoinMarkings(pNewExpr2, pExpr); idxNew2 = whereClauseInsert(pWC, pNewExpr2, wtFlags); @@ -1933,11 +2030,14 @@ static int vtabBestIndex(Parse *pParse, Table *pTab, sqlite3_index_info *p){ ** Estimate the location of a particular key among all keys in an ** index. Store the results in aStat as follows: ** -** aStat[0] Est. number of rows less than pVal -** aStat[1] Est. number of rows equal to pVal +** aStat[0] Est. number of rows less than pRec +** aStat[1] Est. number of rows equal to pRec ** ** Return the index of the sample that is the smallest sample that -** is greater than or equal to pRec. +** is greater than or equal to pRec. Note that this index is not an index +** into the aSample[] array - it is an index into a virtual set of samples +** based on the contents of aSample[] and the number of fields in record +** pRec. */ static int whereKeyStats( Parse *pParse, /* Database connection */ @@ -1948,67 +2048,158 @@ static int whereKeyStats( ){ IndexSample *aSample = pIdx->aSample; int iCol; /* Index of required stats in anEq[] etc. */ + int i; /* Index of first sample >= pRec */ + int iSample; /* Smallest sample larger than or equal to pRec */ int iMin = 0; /* Smallest sample not yet tested */ - int i = pIdx->nSample; /* Smallest sample larger than or equal to pRec */ int iTest; /* Next sample to test */ int res; /* Result of comparison operation */ + int nField; /* Number of fields in pRec */ + tRowcnt iLower = 0; /* anLt[] + anEq[] of largest sample pRec is > */ #ifndef SQLITE_DEBUG UNUSED_PARAMETER( pParse ); #endif assert( pRec!=0 ); - iCol = pRec->nField - 1; assert( pIdx->nSample>0 ); - assert( pRec->nField>0 && iCol<pIdx->nSampleCol ); + assert( pRec->nField>0 && pRec->nField<=pIdx->nSampleCol ); + + /* Do a binary search to find the first sample greater than or equal + ** to pRec. If pRec contains a single field, the set of samples to search + ** is simply the aSample[] array. If the samples in aSample[] contain more + ** than one fields, all fields following the first are ignored. + ** + ** If pRec contains N fields, where N is more than one, then as well as the + ** samples in aSample[] (truncated to N fields), the search also has to + ** consider prefixes of those samples. For example, if the set of samples + ** in aSample is: + ** + ** aSample[0] = (a, 5) + ** aSample[1] = (a, 10) + ** aSample[2] = (b, 5) + ** aSample[3] = (c, 100) + ** aSample[4] = (c, 105) + ** + ** Then the search space should ideally be the samples above and the + ** unique prefixes [a], [b] and [c]. But since that is hard to organize, + ** the code actually searches this set: + ** + ** 0: (a) + ** 1: (a, 5) + ** 2: (a, 10) + ** 3: (a, 10) + ** 4: (b) + ** 5: (b, 5) + ** 6: (c) + ** 7: (c, 100) + ** 8: (c, 105) + ** 9: (c, 105) + ** + ** For each sample in the aSample[] array, N samples are present in the + ** effective sample array. In the above, samples 0 and 1 are based on + ** sample aSample[0]. Samples 2 and 3 on aSample[1] etc. + ** + ** Often, sample i of each block of N effective samples has (i+1) fields. + ** Except, each sample may be extended to ensure that it is greater than or + ** equal to the previous sample in the array. For example, in the above, + ** sample 2 is the first sample of a block of N samples, so at first it + ** appears that it should be 1 field in size. However, that would make it + ** smaller than sample 1, so the binary search would not work. As a result, + ** it is extended to two fields. The duplicates that this creates do not + ** cause any problems. + */ + nField = pRec->nField; + iCol = 0; + iSample = pIdx->nSample * nField; do{ - iTest = (iMin+i)/2; - res = sqlite3VdbeRecordCompare(aSample[iTest].n, aSample[iTest].p, pRec); + int iSamp; /* Index in aSample[] of test sample */ + int n; /* Number of fields in test sample */ + + iTest = (iMin+iSample)/2; + iSamp = iTest / nField; + if( iSamp>0 ){ + /* The proposed effective sample is a prefix of sample aSample[iSamp]. + ** Specifically, the shortest prefix of at least (1 + iTest%nField) + ** fields that is greater than the previous effective sample. */ + for(n=(iTest % nField) + 1; n<nField; n++){ + if( aSample[iSamp-1].anLt[n-1]!=aSample[iSamp].anLt[n-1] ) break; + } + }else{ + n = iTest + 1; + } + + pRec->nField = n; + res = sqlite3VdbeRecordCompare(aSample[iSamp].n, aSample[iSamp].p, pRec); if( res<0 ){ + iLower = aSample[iSamp].anLt[n-1] + aSample[iSamp].anEq[n-1]; + iMin = iTest+1; + }else if( res==0 && n<nField ){ + iLower = aSample[iSamp].anLt[n-1]; iMin = iTest+1; + res = -1; }else{ - i = iTest; + iSample = iTest; + iCol = n-1; } - }while( res && iMin<i ); + }while( res && iMin<iSample ); + i = iSample / nField; #ifdef SQLITE_DEBUG /* The following assert statements check that the binary search code ** above found the right answer. This block serves no purpose other ** than to invoke the asserts. */ - if( res==0 ){ - /* If (res==0) is true, then sample $i must be equal to pRec */ - assert( i<pIdx->nSample ); - assert( 0==sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec) - || pParse->db->mallocFailed ); - }else{ - /* Otherwise, pRec must be smaller than sample $i and larger than - ** sample ($i-1). */ - assert( i==pIdx->nSample - || sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)>0 - || pParse->db->mallocFailed ); - assert( i==0 - || sqlite3VdbeRecordCompare(aSample[i-1].n, aSample[i-1].p, pRec)<0 - || pParse->db->mallocFailed ); + if( pParse->db->mallocFailed==0 ){ + if( res==0 ){ + /* If (res==0) is true, then pRec must be equal to sample i. */ + assert( i<pIdx->nSample ); + assert( iCol==nField-1 ); + pRec->nField = nField; + assert( 0==sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec) + || pParse->db->mallocFailed + ); + }else{ + /* Unless i==pIdx->nSample, indicating that pRec is larger than + ** all samples in the aSample[] array, pRec must be smaller than the + ** (iCol+1) field prefix of sample i. */ + assert( i<=pIdx->nSample && i>=0 ); + pRec->nField = iCol+1; + assert( i==pIdx->nSample + || sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)>0 + || pParse->db->mallocFailed ); + + /* if i==0 and iCol==0, then record pRec is smaller than all samples + ** in the aSample[] array. Otherwise, if (iCol>0) then pRec must + ** be greater than or equal to the (iCol) field prefix of sample i. + ** If (i>0), then pRec must also be greater than sample (i-1). */ + if( iCol>0 ){ + pRec->nField = iCol; + assert( sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)<=0 + || pParse->db->mallocFailed ); + } + if( i>0 ){ + pRec->nField = nField; + assert( sqlite3VdbeRecordCompare(aSample[i-1].n, aSample[i-1].p, pRec)<0 + || pParse->db->mallocFailed ); + } + } } #endif /* ifdef SQLITE_DEBUG */ - /* At this point, aSample[i] is the first sample that is greater than - ** or equal to pVal. Or if i==pIdx->nSample, then all samples are less - ** than pVal. If aSample[i]==pVal, then res==0. - */ if( res==0 ){ + /* Record pRec is equal to sample i */ + assert( iCol==nField-1 ); aStat[0] = aSample[i].anLt[iCol]; aStat[1] = aSample[i].anEq[iCol]; }else{ - tRowcnt iLower, iUpper, iGap; - if( i==0 ){ - iLower = 0; - iUpper = aSample[0].anLt[iCol]; + /* At this point, the (iCol+1) field prefix of aSample[i] is the first + ** sample that is greater than pRec. Or, if i==pIdx->nSample then pRec + ** is larger than all samples in the array. */ + tRowcnt iUpper, iGap; + if( i>=pIdx->nSample ){ + iUpper = sqlite3LogEstToInt(pIdx->aiRowLogEst[0]); }else{ - i64 nRow0 = sqlite3LogEstToInt(pIdx->aiRowLogEst[0]); - iUpper = i>=pIdx->nSample ? nRow0 : aSample[i].anLt[iCol]; - iLower = aSample[i-1].anEq[iCol] + aSample[i-1].anLt[iCol]; + iUpper = aSample[i].anLt[iCol]; } - aStat[1] = pIdx->aAvgEq[iCol]; + if( iLower>=iUpper ){ iGap = 0; }else{ @@ -2020,7 +2211,11 @@ static int whereKeyStats( iGap = iGap/3; } aStat[0] = iLower + iGap; + aStat[1] = pIdx->aAvgEq[iCol]; } + + /* Restore the pRec->nField value before returning. */ + pRec->nField = nField; return i; } #endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */ @@ -5807,10 +6002,10 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){ /* Seed the search with a single WherePath containing zero WhereLoops. ** - ** TUNING: Do not let the number of iterations go above 25. If the cost - ** of computing an automatic index is not paid back within the first 25 + ** TUNING: Do not let the number of iterations go above 28. If the cost + ** of computing an automatic index is not paid back within the first 28 ** rows, then do not use the automatic index. */ - aFrom[0].nRow = MIN(pParse->nQueryLoop, 46); assert( 46==sqlite3LogEst(25) ); + aFrom[0].nRow = MIN(pParse->nQueryLoop, 48); assert( 48==sqlite3LogEst(28) ); nFrom = 1; assert( aFrom[0].isOrdered==0 ); if( nOrderBy ){ @@ -6610,6 +6805,12 @@ WhereInfo *sqlite3WhereBegin( if( op ){ sqlite3VdbeAddOp3(v, op, iIndexCur, pIx->tnum, iDb); sqlite3VdbeSetP4KeyInfo(pParse, pIx); + if( (pLoop->wsFlags & WHERE_CONSTRAINT)!=0 + && (pLoop->wsFlags & (WHERE_COLUMN_RANGE|WHERE_SKIPSCAN))==0 + && (pWInfo->wctrlFlags&WHERE_ORDERBY_MIN)==0 + ){ + sqlite3VdbeChangeP5(v, OPFLAG_SEEKEQ); /* Hint to COMDB2 */ + } VdbeComment((v, "%s", pIx->zName)); } } |