diff options
author | drh <drh@noemail.net> | 2013-10-10 20:13:18 +0000 |
---|---|---|
committer | drh <drh@noemail.net> | 2013-10-10 20:13:18 +0000 |
commit | a63b85299290b7eca076ea6e031e7f43d4feadcf (patch) | |
tree | e925eb05965ec76f37b6483581d5bd8f8ea9e53b /src/where.c | |
parent | 59255f9a98381960cfede9253277f128895531f6 (diff) | |
parent | 7e65c94bdc689bebd222321dec5c034194267ac9 (diff) | |
download | sqlite-a63b85299290b7eca076ea6e031e7f43d4feadcf.tar.gz sqlite-a63b85299290b7eca076ea6e031e7f43d4feadcf.zip |
Synchronize with the trunk.
FossilOrigin-Name: 136445ba020c9475d3f5a7843d7d0add98477138
Diffstat (limited to 'src/where.c')
-rw-r--r-- | src/where.c | 402 |
1 files changed, 185 insertions, 217 deletions
diff --git a/src/where.c b/src/where.c index b633dfd57..545dee2d2 100644 --- a/src/where.c +++ b/src/where.c @@ -49,25 +49,6 @@ typedef struct WhereOrCost WhereOrCost; typedef struct WhereOrSet WhereOrSet; /* -** Cost X is tracked as 10*log2(X) stored in a 16-bit integer. The -** maximum cost for ordinary tables is 64*(2**63) which becomes 6900. -** (Virtual tables can return a larger cost, but let's assume they do not.) -** So all costs can be stored in a 16-bit unsigned integer without risk -** of overflow. -** -** Costs are estimates, so no effort is made to compute 10*log2(X) exactly. -** Instead, a close estimate is used. Any value of X<=1 is stored as 0. -** X=2 is 10. X=3 is 16. X=1000 is 99. etc. -** -** The tool/wherecosttest.c source file implements a command-line program -** that will convert WhereCosts to integers, convert integers to WhereCosts -** and do addition and multiplication on WhereCost values. The wherecosttest -** command-line program is a useful utility to have around when working with -** this module. -*/ -typedef unsigned short int WhereCost; - -/* ** This object contains information needed to implement a single nested ** loop in WHERE clause. ** @@ -106,6 +87,7 @@ struct WhereLevel { Index *pCovidx; /* Possible covering index for WHERE_MULTI_OR */ } u; struct WhereLoop *pWLoop; /* The selected WhereLoop object */ + Bitmask notReady; /* FROM entries not usable at this level */ }; /* @@ -130,9 +112,9 @@ struct WhereLoop { #endif u8 iTab; /* Position in FROM clause of table for this loop */ u8 iSortIdx; /* Sorting index number. 0==None */ - WhereCost rSetup; /* One-time setup cost (ex: create transient index) */ - WhereCost rRun; /* Cost of running each loop */ - WhereCost nOut; /* Estimated number of output rows */ + LogEst rSetup; /* One-time setup cost (ex: create transient index) */ + LogEst rRun; /* Cost of running each loop */ + LogEst nOut; /* Estimated number of output rows */ union { struct { /* Information for internal btree tables */ int nEq; /* Number of equality constraints */ @@ -162,8 +144,8 @@ struct WhereLoop { */ struct WhereOrCost { Bitmask prereq; /* Prerequisites */ - WhereCost rRun; /* Cost of running this subquery */ - WhereCost nOut; /* Number of outputs for this subquery */ + LogEst rRun; /* Cost of running this subquery */ + LogEst nOut; /* Number of outputs for this subquery */ }; /* The WhereOrSet object holds a set of possible WhereOrCosts that @@ -201,8 +183,8 @@ static int whereLoopResize(sqlite3*, WhereLoop*, int); struct WherePath { Bitmask maskLoop; /* Bitmask of all WhereLoop objects in this path */ Bitmask revLoop; /* aLoop[]s that should be reversed for ORDER BY */ - WhereCost nRow; /* Estimated number of rows generated by this path */ - WhereCost rCost; /* Total cost of this path */ + LogEst nRow; /* Estimated number of rows generated by this path */ + LogEst rCost; /* Total cost of this path */ u8 isOrdered; /* True if this path satisfies ORDER BY */ u8 isOrderedValid; /* True if the isOrdered field is valid */ WhereLoop **aLoop; /* Array of WhereLoop objects implementing this path */ @@ -268,6 +250,7 @@ struct WhereTerm { WhereOrInfo *pOrInfo; /* Extra information if (eOperator & WO_OR)!=0 */ WhereAndInfo *pAndInfo; /* Extra information if (eOperator& WO_AND)!=0 */ } u; + LogEst truthProb; /* Probability of truth for this expression */ u16 eOperator; /* A WO_xx value describing <op> */ u8 wtFlags; /* TERM_xxx bit flags. See below */ u8 nChild; /* Number of children that must disable us */ @@ -415,7 +398,7 @@ struct WhereInfo { ExprList *pResultSet; /* Result set. DISTINCT operates on these */ WhereLoop *pLoops; /* List of all WhereLoop objects */ Bitmask revMask; /* Mask of ORDER BY terms that need reversing */ - WhereCost nRowOut; /* Estimated number of output rows */ + LogEst nRowOut; /* Estimated number of output rows */ u16 wctrlFlags; /* Flags originally passed to sqlite3WhereBegin() */ u8 bOBSat; /* ORDER BY satisfied by indices */ u8 okOnePass; /* Ok to use one-pass algorithm for UPDATE/DELETE */ @@ -475,26 +458,11 @@ struct WhereInfo { #define WHERE_MULTI_OR 0x00002000 /* OR using multiple indices */ #define WHERE_AUTO_INDEX 0x00004000 /* Uses an ephemeral index */ - -/* Convert a WhereCost value (10 times log2(X)) into its integer value X. -** A rough approximation is used. The value returned is not exact. -*/ -static u64 whereCostToInt(WhereCost x){ - u64 n; - if( x<10 ) return 1; - n = x%10; - x /= 10; - if( n>=5 ) n -= 2; - else if( n>=1 ) n -= 1; - if( x>=3 ) return (n+8)<<(x-3); - return (n+8)>>(3-x); -} - /* ** Return the estimated number of output rows from a WHERE clause */ u64 sqlite3WhereOutputRowCount(WhereInfo *pWInfo){ - return whereCostToInt(pWInfo->nRowOut); + return sqlite3LogEstToInt(pWInfo->nRowOut); } /* @@ -556,8 +524,8 @@ static void whereOrMove(WhereOrSet *pDest, WhereOrSet *pSrc){ static int whereOrInsert( WhereOrSet *pSet, /* The WhereOrSet to be updated */ Bitmask prereq, /* Prerequisites of the new entry */ - WhereCost rRun, /* Run-cost of the new entry */ - WhereCost nOut /* Number of outputs for the new entry */ + LogEst rRun, /* Run-cost of the new entry */ + LogEst nOut /* Number of outputs for the new entry */ ){ u16 i; WhereOrCost *p; @@ -683,6 +651,11 @@ static int whereClauseInsert(WhereClause *pWC, Expr *p, u8 wtFlags){ pWC->nSlot = sqlite3DbMallocSize(db, pWC->a)/sizeof(pWC->a[0]); } pTerm = &pWC->a[idx = pWC->nTerm++]; + if( p && ExprHasProperty(p, EP_Unlikely) ){ + pTerm->truthProb = sqlite3LogEst(p->iTable) - 99; + }else{ + pTerm->truthProb = -1; + } pTerm->pExpr = sqlite3ExprSkipCollate(p); pTerm->wtFlags = wtFlags; pTerm->pWC = pWC; @@ -1943,75 +1916,12 @@ static int isDistinctRedundant( return 0; } -/* -** Find (an approximate) sum of two WhereCosts. This computation is -** not a simple "+" operator because WhereCost is stored as a logarithmic -** value. -** -*/ -static WhereCost whereCostAdd(WhereCost a, WhereCost b){ - static const unsigned char x[] = { - 10, 10, /* 0,1 */ - 9, 9, /* 2,3 */ - 8, 8, /* 4,5 */ - 7, 7, 7, /* 6,7,8 */ - 6, 6, 6, /* 9,10,11 */ - 5, 5, 5, /* 12-14 */ - 4, 4, 4, 4, /* 15-18 */ - 3, 3, 3, 3, 3, 3, /* 19-24 */ - 2, 2, 2, 2, 2, 2, 2, /* 25-31 */ - }; - if( a>=b ){ - if( a>b+49 ) return a; - if( a>b+31 ) return a+1; - return a+x[a-b]; - }else{ - if( b>a+49 ) return b; - if( b>a+31 ) return b+1; - return b+x[b-a]; - } -} - -/* -** Convert an integer into a WhereCost. In other words, compute a -** good approximatation for 10*log2(x). -*/ -static WhereCost whereCost(tRowcnt x){ - static WhereCost a[] = { 0, 2, 3, 5, 6, 7, 8, 9 }; - WhereCost y = 40; - if( x<8 ){ - if( x<2 ) return 0; - while( x<8 ){ y -= 10; x <<= 1; } - }else{ - while( x>255 ){ y += 40; x >>= 4; } - while( x>15 ){ y += 10; x >>= 1; } - } - return a[x&7] + y - 10; -} - -#ifndef SQLITE_OMIT_VIRTUALTABLE -/* -** Convert a double (as received from xBestIndex of a virtual table) -** into a WhereCost. In other words, compute an approximation for -** 10*log2(x). -*/ -static WhereCost whereCostFromDouble(double x){ - u64 a; - WhereCost e; - assert( sizeof(x)==8 && sizeof(a)==8 ); - if( x<=1 ) return 0; - if( x<=2000000000 ) return whereCost((tRowcnt)x); - memcpy(&a, &x, 8); - e = (a>>52) - 1022; - return e*10; -} -#endif /* SQLITE_OMIT_VIRTUALTABLE */ /* ** Estimate the logarithm of the input value to base 2. */ -static WhereCost estLog(WhereCost N){ - WhereCost x = whereCost(N); +static LogEst estLog(LogEst N){ + LogEst x = sqlite3LogEst(N); return x>33 ? x - 33 : 0; } @@ -2421,12 +2331,15 @@ static void whereKeyStats( tRowcnt *aStat /* OUT: stats written here */ ){ IndexSample *aSample = pIdx->aSample; - int iCol = pRec->nField-1; /* Index of required stats in anEq[] etc. */ + int iCol; /* Index of required stats in anEq[] etc. */ int iMin = 0; /* Smallest sample not yet tested */ int i = pIdx->nSample; /* Smallest sample larger than or equal to pRec */ int iTest; /* Next sample to test */ int res; /* Result of comparison operation */ + assert( pRec!=0 || pParse->db->mallocFailed ); + if( pRec==0 ) return; + iCol = pRec->nField - 1; assert( pIdx->nSample>0 ); assert( pRec->nField>0 && iCol<pIdx->nSampleCol ); do{ @@ -2521,7 +2434,7 @@ static void whereKeyStats( ** ** then nEq is set to 0. ** -** When this function is called, *pnOut is set to the whereCost() of the +** When this function is called, *pnOut is set to the sqlite3LogEst() of the ** number of rows that the index scan is expected to visit without ** considering the range constraints. If nEq is 0, this is the number of ** rows in the index. Assuming no error occurs, *pnOut is adjusted (reduced) @@ -2537,28 +2450,24 @@ static int whereRangeScanEst( WhereLoopBuilder *pBuilder, WhereTerm *pLower, /* Lower bound on the range. ex: "x>123" Might be NULL */ WhereTerm *pUpper, /* Upper bound on the range. ex: "x<455" Might be NULL */ - WhereCost *pnOut /* IN/OUT: Number of rows visited */ + WhereLoop *pLoop /* Modify the .nOut and maybe .rRun fields */ ){ int rc = SQLITE_OK; - int nOut = (int)*pnOut; + int nOut = pLoop->nOut; + int nEq = pLoop->u.btree.nEq; + LogEst nNew; #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 - Index *p = pBuilder->pNew->u.btree.pIndex; - int nEq = pBuilder->pNew->u.btree.nEq; + Index *p = pLoop->u.btree.pIndex; - if( nEq==pBuilder->nRecValid + if( p->nSample>0 + && nEq==pBuilder->nRecValid && nEq<p->nSampleCol - && p->nSample && OptimizationEnabled(pParse->db, SQLITE_Stat3) ){ UnpackedRecord *pRec = pBuilder->pRec; tRowcnt a[2]; u8 aff; - if( nEq==p->nColumn ){ - aff = SQLITE_AFF_INTEGER; - }else{ - aff = p->pTable->aCol[p->aiColumn[nEq]].affinity; - } /* Variable iLower will be set to the estimate of the number of rows in ** the index that are less than the lower bound of the range query. The @@ -2580,6 +2489,11 @@ static int whereRangeScanEst( tRowcnt iLower; tRowcnt iUpper; + if( nEq==p->nColumn ){ + aff = SQLITE_AFF_INTEGER; + }else{ + aff = p->pTable->aCol[p->aiColumn[nEq]].affinity; + } /* Determine iLower and iUpper using ($P) only. */ if( nEq==0 ){ iLower = 0; @@ -2603,6 +2517,7 @@ static int whereRangeScanEst( whereKeyStats(pParse, p, pRec, 0, a); iNew = a[0] + ((pLower->eOperator & WO_GT) ? a[1] : 0); if( iNew>iLower ) iLower = iNew; + nOut--; } } @@ -2617,21 +2532,21 @@ static int whereRangeScanEst( whereKeyStats(pParse, p, pRec, 1, a); iNew = a[0] + ((pUpper->eOperator & WO_LE) ? a[1] : 0); if( iNew<iUpper ) iUpper = iNew; + nOut--; } } pBuilder->pRec = pRec; if( rc==SQLITE_OK ){ - WhereCost nNew; if( iUpper>iLower ){ - nNew = whereCost(iUpper - iLower); + nNew = sqlite3LogEst(iUpper - iLower); }else{ - nNew = 10; assert( 10==whereCost(2) ); + nNew = 10; assert( 10==sqlite3LogEst(2) ); } if( nNew<nOut ){ nOut = nNew; } - *pnOut = (WhereCost)nOut; + pLoop->nOut = (LogEst)nOut; WHERETRACE(0x100, ("range scan regions: %u..%u est=%d\n", (u32)iLower, (u32)iUpper, nOut)); return SQLITE_OK; @@ -2644,14 +2559,18 @@ static int whereRangeScanEst( assert( pLower || pUpper ); /* TUNING: Each inequality constraint reduces the search space 4-fold. ** A BETWEEN operator, therefore, reduces the search space 16-fold */ + nNew = nOut; if( pLower && (pLower->wtFlags & TERM_VNULL)==0 ){ - nOut -= 20; assert( 20==whereCost(4) ); + nNew -= 20; assert( 20==sqlite3LogEst(4) ); + nOut--; } if( pUpper ){ - nOut -= 20; assert( 20==whereCost(4) ); + nNew -= 20; assert( 20==sqlite3LogEst(4) ); + nOut--; } - if( nOut<10 ) nOut = 10; - *pnOut = (WhereCost)nOut; + if( nNew<10 ) nNew = 10; + if( nNew<nOut ) nOut = nNew; + pLoop->nOut = (LogEst)nOut; return rc; } @@ -2796,6 +2715,7 @@ static void disableTerm(WhereLevel *pLevel, WhereTerm *pTerm){ if( pTerm && (pTerm->wtFlags & TERM_CODED)==0 && (pLevel->iLeftJoin==0 || ExprHasProperty(pTerm->pExpr, EP_FromJoin)) + && (pLevel->notReady & pTerm->prereqAll)==0 ){ pTerm->wtFlags |= TERM_CODED; if( pTerm->iParent>=0 ){ @@ -3221,7 +3141,6 @@ static Bitmask codeOneLoopStart( int addrCont; /* Jump here to continue with next cycle */ int iRowidReg = 0; /* Rowid is stored in this register, if not zero */ int iReleaseReg = 0; /* Temp register to free before returning */ - Bitmask newNotReady; /* Return value */ pParse = pWInfo->pParse; v = pParse->pVdbe; @@ -3231,6 +3150,7 @@ static Bitmask codeOneLoopStart( pLoop = pLevel->pWLoop; pTabItem = &pWInfo->pTabList->a[pLevel->iFrom]; iCur = pTabItem->iCursor; + pLevel->notReady = notReady & ~getMask(&pWInfo->sMaskSet, iCur); bRev = (pWInfo->revMask>>iLevel)&1; omitTable = (pLoop->wsFlags & WHERE_IDX_ONLY)!=0 && (pWInfo->wctrlFlags & WHERE_FORCE_TABLE)==0; @@ -3883,7 +3803,6 @@ static Bitmask codeOneLoopStart( pLevel->p2 = 1 + sqlite3VdbeAddOp2(v, aStart[bRev], iCur, addrBrk); pLevel->p5 = SQLITE_STMTSTATUS_FULLSCAN_STEP; } - newNotReady = notReady & ~getMask(&pWInfo->sMaskSet, iCur); /* Insert code to test every subexpression that can be completely ** computed using the current set of tables. @@ -3893,7 +3812,7 @@ static Bitmask codeOneLoopStart( testcase( pTerm->wtFlags & TERM_VIRTUAL ); testcase( pTerm->wtFlags & TERM_CODED ); if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue; - if( (pTerm->prereqAll & newNotReady)!=0 ){ + if( (pTerm->prereqAll & pLevel->notReady)!=0 ){ testcase( pWInfo->untestedTerms==0 && (pWInfo->wctrlFlags & WHERE_ONETABLE_ONLY)!=0 ); pWInfo->untestedTerms = 1; @@ -3925,7 +3844,7 @@ static Bitmask codeOneLoopStart( if( pLevel->iLeftJoin ) continue; pE = pTerm->pExpr; assert( !ExprHasProperty(pE, EP_FromJoin) ); - assert( (pTerm->prereqRight & newNotReady)!=0 ); + assert( (pTerm->prereqRight & pLevel->notReady)!=0 ); pAlt = findTerm(pWC, iCur, pTerm->u.leftColumn, notReady, WO_EQ|WO_IN, 0); if( pAlt==0 ) continue; if( pAlt->wtFlags & (TERM_CODED) ) continue; @@ -3953,7 +3872,7 @@ static Bitmask codeOneLoopStart( testcase( pTerm->wtFlags & TERM_VIRTUAL ); testcase( pTerm->wtFlags & TERM_CODED ); if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue; - if( (pTerm->prereqAll & newNotReady)!=0 ){ + if( (pTerm->prereqAll & pLevel->notReady)!=0 ){ assert( pWInfo->untestedTerms ); continue; } @@ -3964,7 +3883,7 @@ static Bitmask codeOneLoopStart( } sqlite3ReleaseTempReg(pParse, iReleaseReg); - return newNotReady; + return pLevel->notReady; } #ifdef WHERETRACE_ENABLED @@ -4065,8 +3984,11 @@ static int whereLoopResize(sqlite3 *db, WhereLoop *p, int n){ ** Transfer content from the second pLoop into the first. */ static int whereLoopXfer(sqlite3 *db, WhereLoop *pTo, WhereLoop *pFrom){ - if( whereLoopResize(db, pTo, pFrom->nLTerm) ) return SQLITE_NOMEM; whereLoopClearUnion(db, pTo); + if( whereLoopResize(db, pTo, pFrom->nLTerm) ){ + memset(&pTo->u, 0, sizeof(pTo->u)); + return SQLITE_NOMEM; + } memcpy(pTo, pFrom, WHERE_LOOP_XFER_SZ); memcpy(pTo->aLTerm, pFrom->aLTerm, pTo->nLTerm*sizeof(pTo->aLTerm[0])); if( pFrom->wsFlags & WHERE_VIRTUALTABLE ){ @@ -4182,9 +4104,9 @@ static int whereLoopInsert(WhereLoopBuilder *pBuilder, WhereLoop *pTemplate){ assert( p->rSetup==pTemplate->rSetup ); if( p->prereq==pTemplate->prereq && p->nLTerm<pTemplate->nLTerm - && (p->wsFlags & WHERE_INDEXED)!=0 - && (pTemplate->wsFlags & WHERE_INDEXED)!=0 - && p->u.btree.pIndex==pTemplate->u.btree.pIndex + && (p->wsFlags & pTemplate->wsFlags & WHERE_INDEXED)!=0 + && (p->u.btree.pIndex==pTemplate->u.btree.pIndex + || pTemplate->rRun+p->nLTerm<=p->rRun+pTemplate->nLTerm) ){ /* Overwrite an existing WhereLoop with an similar one that uses ** more terms of the index */ @@ -4199,12 +4121,12 @@ static int whereLoopInsert(WhereLoopBuilder *pBuilder, WhereLoop *pTemplate){ if( (p->prereq & pTemplate->prereq)==pTemplate->prereq && p->rRun>=pTemplate->rRun && p->nOut>=pTemplate->nOut - && ALWAYS(p->rSetup>=pTemplate->rSetup) /* See SETUP-INVARIANT above */ ){ /* Overwrite an existing WhereLoop with a better one: one that is ** better at one of (1) dependencies, (2) setup-cost, (3) run-cost ** or (4) number of output rows, and is no worse in any of those ** categories. */ + assert( p->rSetup>=pTemplate->rSetup ); /* SETUP-INVARIANT above */ pNext = p->pNextLoop; break; } @@ -4252,6 +4174,36 @@ whereLoopInsert_noop: } /* +** Adjust the WhereLoop.nOut value downward to account for terms of the +** WHERE clause that reference the loop but which are not used by an +** index. +** +** In the current implementation, the first extra WHERE clause term reduces +** the number of output rows by a factor of 10 and each additional term +** reduces the number of output rows by sqrt(2). +*/ +static void whereLoopOutputAdjust(WhereClause *pWC, WhereLoop *pLoop, int iCur){ + WhereTerm *pTerm, *pX; + Bitmask notAllowed = ~(pLoop->prereq|pLoop->maskSelf); + int i, j; + + if( !OptimizationEnabled(pWC->pWInfo->pParse->db, SQLITE_AdjustOutEst) ){ + return; + } + for(i=pWC->nTerm, pTerm=pWC->a; i>0; i--, pTerm++){ + if( (pTerm->wtFlags & TERM_VIRTUAL)!=0 ) break; + if( (pTerm->prereqAll & pLoop->maskSelf)==0 ) continue; + if( (pTerm->prereqAll & notAllowed)!=0 ) continue; + for(j=pLoop->nLTerm-1; j>=0; j--){ + pX = pLoop->aLTerm[j]; + if( pX==pTerm ) break; + if( pX->iParent>=0 && (&pWC->a[pX->iParent])==pTerm ) break; + } + if( j<0 ) pLoop->nOut += pTerm->truthProb; + } +} + +/* ** We have so far matched pBuilder->pNew->u.btree.nEq terms of the index pIndex. ** Try to match one more. ** @@ -4262,7 +4214,7 @@ static int whereLoopAddBtreeIndex( WhereLoopBuilder *pBuilder, /* The WhereLoop factory */ struct SrcList_item *pSrc, /* FROM clause term being analyzed */ Index *pProbe, /* An index on pSrc */ - WhereCost nInMul /* log(Number of iterations due to IN) */ + LogEst nInMul /* log(Number of iterations due to IN) */ ){ WhereInfo *pWInfo = pBuilder->pWInfo; /* WHERE analyse context */ Parse *pParse = pWInfo->pParse; /* Parsing context */ @@ -4275,11 +4227,11 @@ static int whereLoopAddBtreeIndex( u16 saved_nLTerm; /* Original value of pNew->nLTerm */ int saved_nEq; /* Original value of pNew->u.btree.nEq */ u32 saved_wsFlags; /* Original value of pNew->wsFlags */ - WhereCost saved_nOut; /* Original value of pNew->nOut */ + LogEst saved_nOut; /* Original value of pNew->nOut */ int iCol; /* Index of the column in the table */ int rc = SQLITE_OK; /* Return code */ - WhereCost nRowEst; /* Estimated index selectivity */ - WhereCost rLogSize; /* Logarithm of table size */ + LogEst nRowEst; /* Estimated index selectivity */ + LogEst rLogSize; /* Logarithm of table size */ WhereTerm *pTop = 0, *pBtm = 0; /* Top and bottom range constraints */ pNew = pBuilder->pNew; @@ -4299,7 +4251,7 @@ static int whereLoopAddBtreeIndex( assert( pNew->u.btree.nEq<=pProbe->nColumn ); if( pNew->u.btree.nEq < pProbe->nColumn ){ iCol = pProbe->aiColumn[pNew->u.btree.nEq]; - nRowEst = whereCost(pProbe->aiRowEst[pNew->u.btree.nEq+1]); + nRowEst = sqlite3LogEst(pProbe->aiRowEst[pNew->u.btree.nEq+1]); if( nRowEst==0 && pProbe->onError==OE_None ) nRowEst = 1; }else{ iCol = -1; @@ -4313,7 +4265,7 @@ static int whereLoopAddBtreeIndex( saved_prereq = pNew->prereq; saved_nOut = pNew->nOut; pNew->rSetup = 0; - rLogSize = estLog(whereCost(pProbe->aiRowEst[0])); + rLogSize = estLog(sqlite3LogEst(pProbe->aiRowEst[0])); for(; rc==SQLITE_OK && pTerm!=0; pTerm = whereScanNext(&scan)){ int nIn = 0; #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 @@ -4340,10 +4292,10 @@ static int whereLoopAddBtreeIndex( pNew->wsFlags |= WHERE_COLUMN_IN; if( ExprHasProperty(pExpr, EP_xIsSelect) ){ /* "x IN (SELECT ...)": TUNING: the SELECT returns 25 rows */ - nIn = 46; assert( 46==whereCost(25) ); + nIn = 46; assert( 46==sqlite3LogEst(25) ); }else if( ALWAYS(pExpr->x.pList && pExpr->x.pList->nExpr) ){ /* "x IN (value, value, ...)" */ - nIn = whereCost(pExpr->x.pList->nExpr); + nIn = sqlite3LogEst(pExpr->x.pList->nExpr); } pNew->rRun += nIn; pNew->u.btree.nEq++; @@ -4365,7 +4317,7 @@ static int whereLoopAddBtreeIndex( pNew->wsFlags |= WHERE_COLUMN_NULL; pNew->u.btree.nEq++; /* TUNING: IS NULL selects 2 rows */ - nIn = 10; assert( 10==whereCost(2) ); + nIn = 10; assert( 10==sqlite3LogEst(2) ); pNew->nOut = nRowEst + nInMul + nIn; }else if( pTerm->eOperator & (WO_GT|WO_GE) ){ testcase( pTerm->eOperator & WO_GT ); @@ -4385,7 +4337,7 @@ static int whereLoopAddBtreeIndex( if( pNew->wsFlags & WHERE_COLUMN_RANGE ){ /* Adjust nOut and rRun for STAT3 range values */ assert( pNew->nOut==saved_nOut ); - whereRangeScanEst(pParse, pBuilder, pBtm, pTop, &pNew->nOut); + whereRangeScanEst(pParse, pBuilder, pBtm, pTop, pNew); } #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 if( nInMul==0 @@ -4405,7 +4357,7 @@ static int whereLoopAddBtreeIndex( } assert( nOut==0 || rc==SQLITE_OK ); if( nOut ){ - nOut = whereCost(nOut); + nOut = sqlite3LogEst(nOut); pNew->nOut = MIN(nOut, saved_nOut); } } @@ -4413,11 +4365,11 @@ static int whereLoopAddBtreeIndex( if( (pNew->wsFlags & (WHERE_IDX_ONLY|WHERE_IPK))==0 ){ /* Each row involves a step of the index, then a binary search of ** the main table */ - pNew->rRun = whereCostAdd(pNew->rRun, rLogSize>27 ? rLogSize-17 : 10); + pNew->rRun = sqlite3LogEstAdd(pNew->rRun,rLogSize>27 ? rLogSize-17 : 10); } /* Step cost for each output row */ - pNew->rRun = whereCostAdd(pNew->rRun, pNew->nOut); - /* TBD: Adjust nOut for additional constraints */ + pNew->rRun = sqlite3LogEstAdd(pNew->rRun, pNew->nOut); + whereLoopOutputAdjust(pBuilder->pWC, pNew, pSrc->iCursor); rc = whereLoopInsert(pBuilder, pNew); if( (pNew->wsFlags & WHERE_TOP_LIMIT)==0 && pNew->u.btree.nEq<(pProbe->nColumn + (pProbe->zName!=0)) @@ -4516,14 +4468,16 @@ static int whereLoopAddBtree( int rc = SQLITE_OK; /* Return code */ int iSortIdx = 1; /* Index number */ int b; /* A boolean value */ - WhereCost rSize; /* number of rows in the table */ - WhereCost rLogSize; /* Logarithm of the number of rows in the table */ + LogEst rSize; /* number of rows in the table */ + LogEst rLogSize; /* Logarithm of the number of rows in the table */ WhereClause *pWC; /* The parsed WHERE clause */ + Table *pTab; /* Table being queried */ pNew = pBuilder->pNew; pWInfo = pBuilder->pWInfo; pTabList = pWInfo->pTabList; pSrc = pTabList->a + pNew->iTab; + pTab = pSrc->pTab; pWC = pBuilder->pWC; assert( !IsVirtual(pSrc->pTab) ); @@ -4541,8 +4495,8 @@ static int whereLoopAddBtree( sPk.aiColumn = &aiColumnPk; sPk.aiRowEst = aiRowEstPk; sPk.onError = OE_Replace; - sPk.pTable = pSrc->pTab; - aiRowEstPk[0] = pSrc->pTab->nRowEst; + sPk.pTable = pTab; + aiRowEstPk[0] = pTab->nRowEst; aiRowEstPk[1] = 1; pFirst = pSrc->pTab->pIndex; if( pSrc->notIndexed==0 ){ @@ -4552,7 +4506,7 @@ static int whereLoopAddBtree( } pProbe = &sPk; } - rSize = whereCost(pSrc->pTab->nRowEst); + rSize = sqlite3LogEst(pTab->nRowEst); rLogSize = estLog(rSize); #ifndef SQLITE_OMIT_AUTOMATIC_INDEX @@ -4577,13 +4531,13 @@ static int whereLoopAddBtree( /* TUNING: One-time cost for computing the automatic index is ** approximately 7*N*log2(N) where N is the number of rows in ** the table being indexed. */ - pNew->rSetup = rLogSize + rSize + 28; assert( 28==whereCost(7) ); + pNew->rSetup = rLogSize + rSize + 28; assert( 28==sqlite3LogEst(7) ); /* TUNING: Each index lookup yields 20 rows in the table. This ** is more than the usual guess of 10 rows, since we have no way ** of knowning how selective the index will ultimately be. It would ** not be unreasonable to make this value much larger. */ - pNew->nOut = 43; assert( 43==whereCost(20) ); - pNew->rRun = whereCostAdd(rLogSize,pNew->nOut); + pNew->nOut = 43; assert( 43==sqlite3LogEst(20) ); + pNew->rRun = sqlite3LogEstAdd(rLogSize,pNew->nOut); pNew->wsFlags = WHERE_AUTO_INDEX; pNew->prereq = mExtra | pTerm->prereqRight; rc = whereLoopInsert(pBuilder, pNew); @@ -4617,11 +4571,11 @@ static int whereLoopAddBtree( pNew->iSortIdx = b ? iSortIdx : 0; /* TUNING: Cost of full table scan is 3*(N + log2(N)). ** + The extra 3 factor is to encourage the use of indexed lookups - ** over full scans. A smaller constant 2 is used for covering - ** index scans so that a covering index scan will be favored over - ** a table scan. */ - pNew->rRun = whereCostAdd(rSize,rLogSize) + 16; + ** over full scans. FIXME */ + pNew->rRun = sqlite3LogEstAdd(rSize,rLogSize) + 16; + whereLoopOutputAdjust(pWC, pNew, pSrc->iCursor); rc = whereLoopInsert(pBuilder, pNew); + pNew->nOut = rSize; if( rc ) break; }else{ Bitmask m = pSrc->colUsed & ~columnsInIndex(pProbe); @@ -4631,6 +4585,7 @@ static int whereLoopAddBtree( if( b || ( m==0 && pProbe->bUnordered==0 + && pProbe->szIdxRow<pTab->szTabRow && (pWInfo->wctrlFlags & WHERE_ONEPASS_DESIRED)==0 && sqlite3GlobalConfig.bUseCis && OptimizationEnabled(pWInfo->pParse->db, SQLITE_CoverIdxScan) @@ -4638,22 +4593,22 @@ static int whereLoopAddBtree( ){ pNew->iSortIdx = b ? iSortIdx : 0; if( m==0 ){ - /* TUNING: Cost of a covering index scan is 2*(N + log2(N)). - ** + The extra 2 factor is to encourage the use of indexed lookups - ** over index scans. A table scan uses a factor of 3 so that - ** index scans are favored over table scans. - ** + If this covering index might also help satisfy the ORDER BY - ** clause, then the cost is fudged down slightly so that this - ** index is favored above other indices that have no hope of - ** helping with the ORDER BY. */ - pNew->rRun = 10 + whereCostAdd(rSize,rLogSize) - b; + /* TUNING: Cost of a covering index scan is K*(N + log2(N)). + ** + The extra factor K of between 1.1 and 3.0 that depends + ** on the relative sizes of the table and the index. K + ** is smaller for smaller indices, thus favoring them. + */ + pNew->rRun = sqlite3LogEstAdd(rSize,rLogSize) + 1 + + (15*pProbe->szIdxRow)/pTab->szTabRow; }else{ assert( b!=0 ); /* TUNING: Cost of scanning a non-covering index is (N+1)*log2(N) ** which we will simplify to just N*log2(N) */ pNew->rRun = rSize + rLogSize; } + whereLoopOutputAdjust(pWC, pNew, pSrc->iCursor); rc = whereLoopInsert(pBuilder, pNew); + pNew->nOut = rSize; if( rc ) break; } } @@ -4822,9 +4777,9 @@ static int whereLoopAddVirtual( pNew->u.vtab.isOrdered = (u8)((pIdxInfo->nOrderBy!=0) && pIdxInfo->orderByConsumed); pNew->rSetup = 0; - pNew->rRun = whereCostFromDouble(pIdxInfo->estimatedCost); + pNew->rRun = sqlite3LogEstFromDouble(pIdxInfo->estimatedCost); /* TUNING: Every virtual table query returns 25 rows */ - pNew->nOut = 46; assert( 46==whereCost(25) ); + pNew->nOut = 46; assert( 46==sqlite3LogEst(25) ); whereLoopInsert(pBuilder, pNew); if( pNew->u.vtab.needFree ){ sqlite3_free(pNew->u.vtab.idxStr); @@ -4861,6 +4816,8 @@ static int whereLoopAddOr(WhereLoopBuilder *pBuilder, Bitmask mExtra){ pWCEnd = pWC->a + pWC->nTerm; pNew = pBuilder->pNew; memset(&sSum, 0, sizeof(sSum)); + pItem = pWInfo->pTabList->a + pNew->iTab; + iCur = pItem->iCursor; for(pTerm=pWC->a; pTerm<pWCEnd && rc==SQLITE_OK; pTerm++){ if( (pTerm->eOperator & WO_OR)!=0 @@ -4872,8 +4829,6 @@ static int whereLoopAddOr(WhereLoopBuilder *pBuilder, Bitmask mExtra){ int once = 1; int i, j; - pItem = pWInfo->pTabList->a + pNew->iTab; - iCur = pItem->iCursor; sSubBuild = *pBuilder; sSubBuild.pOrderBy = 0; sSubBuild.pOrSet = &sCur; @@ -4914,8 +4869,8 @@ static int whereLoopAddOr(WhereLoopBuilder *pBuilder, Bitmask mExtra){ for(i=0; i<sPrev.n; i++){ for(j=0; j<sCur.n; j++){ whereOrInsert(&sSum, sPrev.a[i].prereq | sCur.a[j].prereq, - whereCostAdd(sPrev.a[i].rRun, sCur.a[j].rRun), - whereCostAdd(sPrev.a[i].nOut, sCur.a[j].nOut)); + sqlite3LogEstAdd(sPrev.a[i].rRun, sCur.a[j].rRun), + sqlite3LogEstAdd(sPrev.a[i].nOut, sCur.a[j].nOut)); } } } @@ -5253,16 +5208,19 @@ static const char *wherePathName(WherePath *pPath, int nLoop, WhereLoop *pLast){ ** Return SQLITE_OK on success or SQLITE_NOMEM of a memory allocation ** error occurs. */ -static int wherePathSolver(WhereInfo *pWInfo, WhereCost nRowEst){ +static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){ int mxChoice; /* Maximum number of simultaneous paths tracked */ int nLoop; /* Number of terms in the join */ Parse *pParse; /* Parsing context */ sqlite3 *db; /* The database connection */ int iLoop; /* Loop counter over the terms of the join */ int ii, jj; /* Loop counters */ - WhereCost rCost; /* Cost of a path */ - WhereCost mxCost = 0; /* Maximum cost of a set of paths */ - WhereCost rSortCost; /* Cost to do a sort */ + int mxI = 0; /* Index of next entry to replace */ + LogEst rCost; /* Cost of a path */ + LogEst nOut; /* Number of outputs */ + LogEst mxCost = 0; /* Maximum cost of a set of paths */ + LogEst mxOut = 0; /* Maximum nOut value on the set of paths */ + LogEst rSortCost; /* Cost to do a sort */ int nTo, nFrom; /* Number of valid entries in aTo[] and aFrom[] */ WherePath *aFrom; /* All nFrom paths at the previous level */ WherePath *aTo; /* The nTo best paths at the current level */ @@ -5299,7 +5257,7 @@ static int wherePathSolver(WhereInfo *pWInfo, WhereCost nRowEst){ ** TUNING: Do not let the number of iterations go above 25. If the cost ** of computing an automatic index is not paid back within the first 25 ** rows, then do not use the automatic index. */ - aFrom[0].nRow = MIN(pParse->nQueryLoop, 46); assert( 46==whereCost(25) ); + aFrom[0].nRow = MIN(pParse->nQueryLoop, 46); assert( 46==sqlite3LogEst(25) ); nFrom = 1; /* Precompute the cost of sorting the final result set, if the caller @@ -5308,8 +5266,10 @@ static int wherePathSolver(WhereInfo *pWInfo, WhereCost nRowEst){ if( pWInfo->pOrderBy==0 || nRowEst==0 ){ aFrom[0].isOrderedValid = 1; }else{ - /* TUNING: Estimated cost of sorting is N*log2(N) where N is the - ** number of output rows. */ + /* TUNING: Estimated cost of sorting is 48*N*log2(N) where N is the + ** number of output rows. The 48 is the expected size of a row to sort. + ** FIXME: compute a better estimate of the 48 multiplier based on the + ** result set expressions. */ rSortCost = nRowEst + estLog(nRowEst); WHERETRACE(0x002,("---- sort cost=%-3d\n", rSortCost)); } @@ -5329,8 +5289,9 @@ static int wherePathSolver(WhereInfo *pWInfo, WhereCost nRowEst){ if( (pWLoop->maskSelf & pFrom->maskLoop)!=0 ) continue; /* At this point, pWLoop is a candidate to be the next loop. ** Compute its cost */ - rCost = whereCostAdd(pWLoop->rSetup,pWLoop->rRun + pFrom->nRow); - rCost = whereCostAdd(rCost, pFrom->rCost); + rCost = sqlite3LogEstAdd(pWLoop->rSetup,pWLoop->rRun + pFrom->nRow); + rCost = sqlite3LogEstAdd(rCost, pFrom->rCost); + nOut = pFrom->nRow + pWLoop->nOut; maskNew = pFrom->maskLoop | pWLoop->maskSelf; if( !isOrderedValid ){ switch( wherePathSatisfiesOrderBy(pWInfo, @@ -5343,7 +5304,7 @@ static int wherePathSolver(WhereInfo *pWInfo, WhereCost nRowEst){ case 0: /* No. pFrom+pWLoop will require a separate sort */ isOrdered = 0; isOrderedValid = 1; - rCost = whereCostAdd(rCost, rSortCost); + rCost = sqlite3LogEstAdd(rCost, rSortCost); break; default: /* Cannot tell yet. Try again on the next iteration */ break; @@ -5353,7 +5314,11 @@ static int wherePathSolver(WhereInfo *pWInfo, WhereCost nRowEst){ } /* Check to see if pWLoop should be added to the mxChoice best so far */ for(jj=0, pTo=aTo; jj<nTo; jj++, pTo++){ - if( pTo->maskLoop==maskNew && pTo->isOrderedValid==isOrderedValid ){ + if( pTo->maskLoop==maskNew + && pTo->isOrderedValid==isOrderedValid + && ((pTo->rCost<=rCost && pTo->nRow<=nOut) || + (pTo->rCost>=rCost && pTo->nRow>=nOut)) + ){ testcase( jj==nTo-1 ); break; } @@ -5362,8 +5327,8 @@ static int wherePathSolver(WhereInfo *pWInfo, WhereCost nRowEst){ if( nTo>=mxChoice && rCost>=mxCost ){ #ifdef WHERETRACE_ENABLED if( sqlite3WhereTrace&0x4 ){ - sqlite3DebugPrintf("Skip %s cost=%3d order=%c\n", - wherePathName(pFrom, iLoop, pWLoop), rCost, + sqlite3DebugPrintf("Skip %s cost=%-3d,%3d order=%c\n", + wherePathName(pFrom, iLoop, pWLoop), rCost, nOut, isOrderedValid ? (isOrdered ? 'Y' : 'N') : '?'); } #endif @@ -5375,26 +5340,26 @@ static int wherePathSolver(WhereInfo *pWInfo, WhereCost nRowEst){ jj = nTo++; }else{ /* New path replaces the prior worst to keep count below mxChoice */ - for(jj=nTo-1; aTo[jj].rCost<mxCost; jj--){ assert(jj>0); } + jj = mxI; } pTo = &aTo[jj]; #ifdef WHERETRACE_ENABLED if( sqlite3WhereTrace&0x4 ){ - sqlite3DebugPrintf("New %s cost=%-3d order=%c\n", - wherePathName(pFrom, iLoop, pWLoop), rCost, + sqlite3DebugPrintf("New %s cost=%-3d,%3d order=%c\n", + wherePathName(pFrom, iLoop, pWLoop), rCost, nOut, isOrderedValid ? (isOrdered ? 'Y' : 'N') : '?'); } #endif }else{ - if( pTo->rCost<=rCost ){ + if( pTo->rCost<=rCost && pTo->nRow<=nOut ){ #ifdef WHERETRACE_ENABLED if( sqlite3WhereTrace&0x4 ){ sqlite3DebugPrintf( - "Skip %s cost=%-3d order=%c", - wherePathName(pFrom, iLoop, pWLoop), rCost, + "Skip %s cost=%-3d,%3d order=%c", + wherePathName(pFrom, iLoop, pWLoop), rCost, nOut, isOrderedValid ? (isOrdered ? 'Y' : 'N') : '?'); - sqlite3DebugPrintf(" vs %s cost=%-3d order=%c\n", - wherePathName(pTo, iLoop+1, 0), pTo->rCost, + sqlite3DebugPrintf(" vs %s cost=%-3d,%d order=%c\n", + wherePathName(pTo, iLoop+1, 0), pTo->rCost, pTo->nRow, pTo->isOrderedValid ? (pTo->isOrdered ? 'Y' : 'N') : '?'); } #endif @@ -5406,11 +5371,11 @@ static int wherePathSolver(WhereInfo *pWInfo, WhereCost nRowEst){ #ifdef WHERETRACE_ENABLED if( sqlite3WhereTrace&0x4 ){ sqlite3DebugPrintf( - "Update %s cost=%-3d order=%c", - wherePathName(pFrom, iLoop, pWLoop), rCost, + "Update %s cost=%-3d,%3d order=%c", + wherePathName(pFrom, iLoop, pWLoop), rCost, nOut, isOrderedValid ? (isOrdered ? 'Y' : 'N') : '?'); - sqlite3DebugPrintf(" was %s cost=%-3d order=%c\n", - wherePathName(pTo, iLoop+1, 0), pTo->rCost, + sqlite3DebugPrintf(" was %s cost=%-3d,%3d order=%c\n", + wherePathName(pTo, iLoop+1, 0), pTo->rCost, pTo->nRow, pTo->isOrderedValid ? (pTo->isOrdered ? 'Y' : 'N') : '?'); } #endif @@ -5418,16 +5383,22 @@ static int wherePathSolver(WhereInfo *pWInfo, WhereCost nRowEst){ /* pWLoop is a winner. Add it to the set of best so far */ pTo->maskLoop = pFrom->maskLoop | pWLoop->maskSelf; pTo->revLoop = revMask; - pTo->nRow = pFrom->nRow + pWLoop->nOut; + pTo->nRow = nOut; pTo->rCost = rCost; pTo->isOrderedValid = isOrderedValid; pTo->isOrdered = isOrdered; memcpy(pTo->aLoop, pFrom->aLoop, sizeof(WhereLoop*)*iLoop); pTo->aLoop[iLoop] = pWLoop; if( nTo>=mxChoice ){ + mxI = 0; mxCost = aTo[0].rCost; + mxOut = aTo[0].nRow; for(jj=1, pTo=&aTo[1]; jj<mxChoice; jj++, pTo++){ - if( pTo->rCost>mxCost ) mxCost = pTo->rCost; + if( pTo->rCost>mxCost || (pTo->rCost==mxCost && pTo->nRow>mxOut) ){ + mxCost = pTo->rCost; + mxOut = pTo->nRow; + mxI = jj; + } } } } @@ -5464,12 +5435,9 @@ static int wherePathSolver(WhereInfo *pWInfo, WhereCost nRowEst){ /* Find the lowest cost path. pFrom will be left pointing to that path */ pFrom = aFrom; - assert( nFrom==1 ); -#if 0 /* The following is needed if nFrom is ever more than 1 */ for(ii=1; ii<nFrom; ii++){ if( pFrom->rCost>aFrom[ii].rCost ) pFrom = &aFrom[ii]; } -#endif assert( pWInfo->nLevel==nLoop ); /* Load the lowest cost path into pWInfo */ for(iLoop=0; iLoop<nLoop; iLoop++){ @@ -5543,7 +5511,7 @@ static int whereShortCut(WhereLoopBuilder *pBuilder){ pLoop->nLTerm = 1; pLoop->u.btree.nEq = 1; /* TUNING: Cost of a rowid lookup is 10 */ - pLoop->rRun = 33; /* 33==whereCost(10) */ + pLoop->rRun = 33; /* 33==sqlite3LogEst(10) */ }else{ for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ assert( pLoop->aLTermSpace==pLoop->aLTerm ); @@ -5566,12 +5534,12 @@ static int whereShortCut(WhereLoopBuilder *pBuilder){ pLoop->u.btree.nEq = j; pLoop->u.btree.pIndex = pIdx; /* TUNING: Cost of a unique index lookup is 15 */ - pLoop->rRun = 39; /* 39==whereCost(15) */ + pLoop->rRun = 39; /* 39==sqlite3LogEst(15) */ break; } } if( pLoop->wsFlags ){ - pLoop->nOut = (WhereCost)1; + pLoop->nOut = (LogEst)1; pWInfo->a[0].pWLoop = pLoop; pLoop->maskSelf = getMask(&pWInfo->sMaskSet, iCur); pWInfo->a[0].iTabCur = iCur; @@ -5939,7 +5907,7 @@ WhereInfo *sqlite3WhereBegin( /* If the caller is an UPDATE or DELETE statement that is requesting ** to use a one-pass algorithm, determine if this is appropriate. - ** The one-pass algorithm only works if the WHERE clause constraints + ** The one-pass algorithm only works if the WHERE clause constrains ** the statement to update a single row. */ assert( (wctrlFlags & WHERE_ONEPASS_DESIRED)==0 || pWInfo->nLevel==1 ); |