diff options
author | drh <drh@noemail.net> | 2011-02-04 06:36:44 +0000 |
---|---|---|
committer | drh <drh@noemail.net> | 2011-02-04 06:36:44 +0000 |
commit | 47fe5533a6879c04d5c26d481d4dff933d2ffdd8 (patch) | |
tree | c5f2072769001551780e475e6cf018086678dd48 /src | |
parent | 9ab724f196854bc9d4f2eca00b9fbe4c6726174a (diff) | |
parent | 40ad34c608b42ce0adc5f35f07e07a2b6f5ef4b3 (diff) | |
download | sqlite-47fe5533a6879c04d5c26d481d4dff933d2ffdd8.tar.gz sqlite-47fe5533a6879c04d5c26d481d4dff933d2ffdd8.zip |
Merge the stat2 query planner enhancements into the trunk.
FossilOrigin-Name: 499edcbc8ab70fcf35431d4e672c68dbcb6c5aad
Diffstat (limited to 'src')
-rw-r--r-- | src/vdbemem.c | 2 | ||||
-rw-r--r-- | src/where.c | 399 |
2 files changed, 331 insertions, 70 deletions
diff --git a/src/vdbemem.c b/src/vdbemem.c index 4831d8065..aae8dbb65 100644 --- a/src/vdbemem.c +++ b/src/vdbemem.c @@ -1082,6 +1082,8 @@ int sqlite3ValueFromExpr( pVal->r = (double)-1 * pVal->r; sqlite3ValueApplyAffinity(pVal, affinity, enc); } + }else if( op==TK_NULL ){ + pVal = sqlite3ValueNew(db); } #ifndef SQLITE_OMIT_BLOB_LITERAL else if( op==TK_BLOB ){ diff --git a/src/where.c b/src/where.c index 4a5026f5f..6d660e8cc 100644 --- a/src/where.c +++ b/src/where.c @@ -117,6 +117,7 @@ struct WhereTerm { #define TERM_ORINFO 0x10 /* Need to free the WhereTerm.u.pOrInfo object */ #define TERM_ANDINFO 0x20 /* Need to free the WhereTerm.u.pAndInfo obj */ #define TERM_OR_OK 0x40 /* Used during OR-clause processing */ +#define TERM_VNULL 0x80 /* Manufactured x>NULL or x<=NULL term */ /* ** An instance of the following structure holds all information about a @@ -210,6 +211,7 @@ struct WhereCost { #define WO_ISNULL 0x080 #define WO_OR 0x100 /* Two or more OR-connected terms */ #define WO_AND 0x200 /* Two or more AND-connected terms */ +#define WO_NOOP 0x800 /* This term does not restrict search space */ #define WO_ALL 0xfff /* Mask of all possible WO_* values */ #define WO_SINGLE 0x0ff /* Mask of all non-compound WO_* values */ @@ -1060,7 +1062,7 @@ static void exprAnalyzeOrTerm( }else{ sqlite3ExprListDelete(db, pList); } - pTerm->eOperator = 0; /* case 1 trumps case 2 */ + pTerm->eOperator = WO_NOOP; /* case 1 trumps case 2 */ } } } @@ -1324,6 +1326,42 @@ static void exprAnalyze( } #endif /* SQLITE_OMIT_VIRTUALTABLE */ +#ifdef SQLITE_ENABLE_STAT2 + /* When sqlite_stat2 histogram data is available an operator of the + ** form "x IS NOT NULL" can sometimes be evaluated more efficiently + ** as "x>NULL" if x is not an INTEGER PRIMARY KEY. So construct a + ** virtual term of that form. + ** + ** Note that the virtual term must be tagged with TERM_VNULL. This + ** TERM_VNULL tag will suppress the not-null check at the beginning + ** of the loop. Without the TERM_VNULL flag, the not-null check at + ** the start of the loop will prevent any results from being returned. + */ + if( pExpr->op==TK_NOTNULL && pExpr->pLeft->iColumn>=0 ){ + Expr *pNewExpr; + Expr *pLeft = pExpr->pLeft; + int idxNew; + WhereTerm *pNewTerm; + + pNewExpr = sqlite3PExpr(pParse, TK_GT, + sqlite3ExprDup(db, pLeft, 0), + sqlite3PExpr(pParse, TK_NULL, 0, 0, 0), 0); + + idxNew = whereClauseInsert(pWC, pNewExpr, + TERM_VIRTUAL|TERM_DYNAMIC|TERM_VNULL); + testcase( idxNew==0 ); + pNewTerm = &pWC->a[idxNew]; + pNewTerm->leftCursor = pLeft->iTable; + pNewTerm->u.leftColumn = pLeft->iColumn; + pNewTerm->eOperator = WO_GT; + pNewTerm->iParent = idxTerm; + pTerm = &pWC->a[idxTerm]; + pTerm->nChild = 1; + pTerm->wtFlags |= TERM_COPIED; + pNewTerm->prereqAll = pTerm->prereqAll; + } +#endif /* SQLITE_ENABLE_STAT2 */ + /* Prevent ON clause terms of a LEFT JOIN from being used to drive ** an index for tables to the left of the join. */ @@ -2201,11 +2239,18 @@ static void bestVirtualIndex( /* ** Argument pIdx is a pointer to an index structure that has an array of ** SQLITE_INDEX_SAMPLES evenly spaced samples of the first indexed column -** stored in Index.aSample. The domain of values stored in said column -** may be thought of as divided into (SQLITE_INDEX_SAMPLES+1) regions. -** Region 0 contains all values smaller than the first sample value. Region -** 1 contains values larger than or equal to the value of the first sample, -** but smaller than the value of the second. And so on. +** stored in Index.aSample. These samples divide the domain of values stored +** the index into (SQLITE_INDEX_SAMPLES+1) regions. +** Region 0 contains all values less than the first sample value. Region +** 1 contains values between the first and second samples. Region 2 contains +** values between samples 2 and 3. And so on. Region SQLITE_INDEX_SAMPLES +** contains values larger than the last sample. +** +** If the index contains many duplicates of a single value, then it is +** possible that two or more adjacent samples can hold the same value. +** When that is the case, the smallest possible region code is returned +** when roundUp is false and the largest possible region code is returned +** when roundUp is true. ** ** If successful, this function determines which of the regions value ** pVal lies in, sets *piRegion to the region index (a value between 0 @@ -2218,8 +2263,10 @@ static int whereRangeRegion( Parse *pParse, /* Database connection */ Index *pIdx, /* Index to consider domain of */ sqlite3_value *pVal, /* Value to consider */ + int roundUp, /* Return largest valid region if true */ int *piRegion /* OUT: Region of domain in which value lies */ ){ + assert( roundUp==0 || roundUp==1 ); if( ALWAYS(pVal) ){ IndexSample *aSample = pIdx->aSample; int i = 0; @@ -2229,7 +2276,17 @@ static int whereRangeRegion( double r = sqlite3_value_double(pVal); for(i=0; i<SQLITE_INDEX_SAMPLES; i++){ if( aSample[i].eType==SQLITE_NULL ) continue; - if( aSample[i].eType>=SQLITE_TEXT || aSample[i].u.r>r ) break; + if( aSample[i].eType>=SQLITE_TEXT ) break; + if( roundUp ){ + if( aSample[i].u.r>r ) break; + }else{ + if( aSample[i].u.r>=r ) break; + } + } + }else if( eType==SQLITE_NULL ){ + i = 0; + if( roundUp ){ + while( i<SQLITE_INDEX_SAMPLES && aSample[i].eType==SQLITE_NULL ) i++; } }else{ sqlite3 *db = pParse->db; @@ -2260,7 +2317,7 @@ static int whereRangeRegion( n = sqlite3ValueBytes(pVal, pColl->enc); for(i=0; i<SQLITE_INDEX_SAMPLES; i++){ - int r; + int c; int eSampletype = aSample[i].eType; if( eSampletype==SQLITE_NULL || eSampletype<eType ) continue; if( (eSampletype!=eType) ) break; @@ -2274,14 +2331,14 @@ static int whereRangeRegion( assert( db->mallocFailed ); return SQLITE_NOMEM; } - r = pColl->xCmp(pColl->pUser, nSample, zSample, n, z); + c = pColl->xCmp(pColl->pUser, nSample, zSample, n, z); sqlite3DbFree(db, zSample); }else #endif { - r = pColl->xCmp(pColl->pUser, aSample[i].nByte, aSample[i].u.z, n, z); + c = pColl->xCmp(pColl->pUser, aSample[i].nByte, aSample[i].u.z, n, z); } - if( r>0 ) break; + if( c-roundUp>=0 ) break; } } @@ -2364,9 +2421,9 @@ static int valueFromExpr( ** constraints. ** ** In the absence of sqlite_stat2 ANALYZE data, each range inequality -** reduces the search space by 2/3rds. Hence a single constraint (x>?) -** results in a return of 33 and a range constraint (x>? AND x<?) results -** in a return of 11. +** reduces the search space by 3/4ths. Hence a single constraint (x>?) +** results in a return of 25 and a range constraint (x>? AND x<?) results +** in a return of 6. */ static int whereRangeScanEst( Parse *pParse, /* Parsing & code generating context */ @@ -2386,15 +2443,21 @@ static int whereRangeScanEst( int iEst; int iLower = 0; int iUpper = SQLITE_INDEX_SAMPLES; + int roundUpUpper; + int roundUpLower; u8 aff = p->pTable->aCol[p->aiColumn[0]].affinity; if( pLower ){ Expr *pExpr = pLower->pExpr->pRight; rc = valueFromExpr(pParse, pExpr, aff, &pLowerVal); + assert( pLower->eOperator==WO_GT || pLower->eOperator==WO_GE ); + roundUpLower = (pLower->eOperator==WO_GT) ?1:0; } if( rc==SQLITE_OK && pUpper ){ Expr *pExpr = pUpper->pExpr->pRight; rc = valueFromExpr(pParse, pExpr, aff, &pUpperVal); + assert( pUpper->eOperator==WO_LT || pUpper->eOperator==WO_LE ); + roundUpUpper = (pUpper->eOperator==WO_LE) ?1:0; } if( rc!=SQLITE_OK || (pLowerVal==0 && pUpperVal==0) ){ @@ -2402,28 +2465,29 @@ static int whereRangeScanEst( sqlite3ValueFree(pUpperVal); goto range_est_fallback; }else if( pLowerVal==0 ){ - rc = whereRangeRegion(pParse, p, pUpperVal, &iUpper); + rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper); if( pLower ) iLower = iUpper/2; }else if( pUpperVal==0 ){ - rc = whereRangeRegion(pParse, p, pLowerVal, &iLower); + rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower); if( pUpper ) iUpper = (iLower + SQLITE_INDEX_SAMPLES + 1)/2; }else{ - rc = whereRangeRegion(pParse, p, pUpperVal, &iUpper); + rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper); if( rc==SQLITE_OK ){ - rc = whereRangeRegion(pParse, p, pLowerVal, &iLower); + rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower); } } + WHERETRACE(("range scan regions: %d..%d\n", iLower, iUpper)); iEst = iUpper - iLower; testcase( iEst==SQLITE_INDEX_SAMPLES ); assert( iEst<=SQLITE_INDEX_SAMPLES ); if( iEst<1 ){ - iEst = 1; + *piEst = 50/SQLITE_INDEX_SAMPLES; + }else{ + *piEst = (iEst*100)/SQLITE_INDEX_SAMPLES; } - sqlite3ValueFree(pLowerVal); sqlite3ValueFree(pUpperVal); - *piEst = (iEst * 100)/SQLITE_INDEX_SAMPLES; return rc; } range_est_fallback: @@ -2433,22 +2497,151 @@ range_est_fallback: UNUSED_PARAMETER(nEq); #endif assert( pLower || pUpper ); - if( pLower && pUpper ){ - *piEst = 11; + *piEst = 100; + if( pLower && (pLower->wtFlags & TERM_VNULL)==0 ) *piEst /= 4; + if( pUpper ) *piEst /= 4; + return rc; +} + +#ifdef SQLITE_ENABLE_STAT2 +/* +** Estimate the number of rows that will be returned based on +** an equality constraint x=VALUE and where that VALUE occurs in +** the histogram data. This only works when x is the left-most +** column of an index and sqlite_stat2 histogram data is available +** for that index. +** +** Write the estimated row count into *pnRow and return SQLITE_OK. +** If unable to make an estimate, leave *pnRow unchanged and return +** non-zero. +** +** This routine can fail if it is unable to load a collating sequence +** required for string comparison, or if unable to allocate memory +** for a UTF conversion required for comparison. The error is stored +** in the pParse structure. +*/ +int whereEqualScanEst( + Parse *pParse, /* Parsing & code generating context */ + Index *p, /* The index whose left-most column is pTerm */ + Expr *pExpr, /* Expression for VALUE in the x=VALUE constraint */ + double *pnRow /* Write the revised row estimate here */ +){ + sqlite3_value *pRhs = 0; /* VALUE on right-hand side of pTerm */ + int iLower, iUpper; /* Range of histogram regions containing pRhs */ + u8 aff; /* Column affinity */ + int rc; /* Subfunction return code */ + double nRowEst; /* New estimate of the number of rows */ + + assert( p->aSample!=0 ); + aff = p->pTable->aCol[p->aiColumn[0]].affinity; + rc = valueFromExpr(pParse, pExpr, aff, &pRhs); + if( rc ) goto whereEqualScanEst_cancel; + if( pRhs==0 ) return SQLITE_NOTFOUND; + rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower); + if( rc ) goto whereEqualScanEst_cancel; + rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper); + if( rc ) goto whereEqualScanEst_cancel; + WHERETRACE(("equality scan regions: %d..%d\n", iLower, iUpper)); + if( iLower>=iUpper ){ + nRowEst = p->aiRowEst[0]/(SQLITE_INDEX_SAMPLES*2); + if( nRowEst<*pnRow ) *pnRow = nRowEst; }else{ - *piEst = 33; + nRowEst = (iUpper-iLower)*p->aiRowEst[0]/SQLITE_INDEX_SAMPLES; + *pnRow = nRowEst; + } + +whereEqualScanEst_cancel: + sqlite3ValueFree(pRhs); + return rc; +} +#endif /* defined(SQLITE_ENABLE_STAT2) */ + +#ifdef SQLITE_ENABLE_STAT2 +/* +** Estimate the number of rows that will be returned based on +** an IN constraint where the right-hand side of the IN operator +** is a list of values. Example: +** +** WHERE x IN (1,2,3,4) +** +** Write the estimated row count into *pnRow and return SQLITE_OK. +** If unable to make an estimate, leave *pnRow unchanged and return +** non-zero. +** +** This routine can fail if it is unable to load a collating sequence +** required for string comparison, or if unable to allocate memory +** for a UTF conversion required for comparison. The error is stored +** in the pParse structure. +*/ +int whereInScanEst( + Parse *pParse, /* Parsing & code generating context */ + Index *p, /* The index whose left-most column is pTerm */ + ExprList *pList, /* The value list on the RHS of "x IN (v1,v2,v3,...)" */ + double *pnRow /* Write the revised row estimate here */ +){ + sqlite3_value *pVal = 0; /* One value from list */ + int iLower, iUpper; /* Range of histogram regions containing pRhs */ + u8 aff; /* Column affinity */ + int rc = SQLITE_OK; /* Subfunction return code */ + double nRowEst; /* New estimate of the number of rows */ + int nSpan = 0; /* Number of histogram regions spanned */ + int nSingle = 0; /* Histogram regions hit by a single value */ + int nNotFound = 0; /* Count of values that are not constants */ + int i; /* Loop counter */ + u8 aSpan[SQLITE_INDEX_SAMPLES+1]; /* Histogram regions that are spanned */ + u8 aSingle[SQLITE_INDEX_SAMPLES+1]; /* Histogram regions hit once */ + + assert( p->aSample!=0 ); + aff = p->pTable->aCol[p->aiColumn[0]].affinity; + memset(aSpan, 0, sizeof(aSpan)); + memset(aSingle, 0, sizeof(aSingle)); + for(i=0; i<pList->nExpr; i++){ + sqlite3ValueFree(pVal); + rc = valueFromExpr(pParse, pList->a[i].pExpr, aff, &pVal); + if( rc ) break; + if( pVal==0 || sqlite3_value_type(pVal)==SQLITE_NULL ){ + nNotFound++; + continue; + } + rc = whereRangeRegion(pParse, p, pVal, 0, &iLower); + if( rc ) break; + rc = whereRangeRegion(pParse, p, pVal, 1, &iUpper); + if( rc ) break; + if( iLower>=iUpper ){ + aSingle[iLower] = 1; + }else{ + assert( iLower>=0 && iUpper<=SQLITE_INDEX_SAMPLES ); + while( iLower<iUpper ) aSpan[iLower++] = 1; + } } + if( rc==SQLITE_OK ){ + for(i=nSpan=0; i<=SQLITE_INDEX_SAMPLES; i++){ + if( aSpan[i] ){ + nSpan++; + }else if( aSingle[i] ){ + nSingle++; + } + } + nRowEst = (nSpan*2+nSingle)*p->aiRowEst[0]/(2*SQLITE_INDEX_SAMPLES) + + nNotFound*p->aiRowEst[1]; + if( nRowEst > p->aiRowEst[0] ) nRowEst = p->aiRowEst[0]; + *pnRow = nRowEst; + WHERETRACE(("IN row estimate: nSpan=%d, nSingle=%d, nNotFound=%d, est=%g\n", + nSpan, nSingle, nNotFound, nRowEst)); + } + sqlite3ValueFree(pVal); return rc; } +#endif /* defined(SQLITE_ENABLE_STAT2) */ /* -** Find the query plan for accessing a particular table. Write the +** Find the best query plan for accessing a particular table. Write the ** best query plan and its cost into the WhereCost object supplied as the ** last parameter. ** ** The lowest cost plan wins. The cost is an estimate of the amount of -** CPU and disk I/O need to process the request using the selected plan. +** CPU and disk I/O needed to process the requested result. ** Factors that influence cost include: ** ** * The estimated number of rows that will be retrieved. (The @@ -2467,7 +2660,7 @@ range_est_fallback: ** ** If a NOT INDEXED clause (pSrc->notIndexed!=0) was attached to the table ** in the SELECT statement, then no indexes are considered. However, the -** selected plan may still take advantage of the tables built-in rowid +** selected plan may still take advantage of the built-in rowid primary key ** index. */ static void bestBtreeIndex( @@ -2510,9 +2703,11 @@ static void bestBtreeIndex( wsFlagMask = ~(WHERE_ROWID_EQ|WHERE_ROWID_RANGE); eqTermMask = idxEqTermMask; }else{ - /* There is no INDEXED BY clause. Create a fake Index object to - ** represent the primary key */ - Index *pFirst; /* Any other index on the table */ + /* There is no INDEXED BY clause. Create a fake Index object in local + ** variable sPk to represent the rowid primary key index. Make this + ** fake index the first in a chain of Index objects with all of the real + ** indices to follow */ + Index *pFirst; /* First of real indices on the table */ memset(&sPk, 0, sizeof(Index)); sPk.nColumn = 1; sPk.aiColumn = &aiColumnPk; @@ -2523,6 +2718,8 @@ static void bestBtreeIndex( aiRowEstPk[1] = 1; pFirst = pSrc->pTab->pIndex; if( pSrc->notIndexed==0 ){ + /* The real indices of the table are only considered if the + ** NOT INDEXED qualifier is omitted from the FROM clause */ sPk.pNext = pFirst; } pProbe = &sPk; @@ -2540,15 +2737,18 @@ static void bestBtreeIndex( double cost; /* Cost of using pProbe */ double nRow; /* Estimated number of rows in result set */ int rev; /* True to scan in reverse order */ + double nSearch; /* Estimated number of binary searches */ int wsFlags = 0; Bitmask used = 0; /* The following variables are populated based on the properties of - ** scan being evaluated. They are then used to determine the expected + ** index being evaluated. They are then used to determine the expected ** cost and number of rows returned. ** ** nEq: ** Number of equality terms that can be implemented using the index. + ** In other words, the number of initial fields in the index that + ** are used in == or IN or NOT NULL constraints of the WHERE clause. ** ** nInMul: ** The "in-multiplier". This is an estimate of how many seek operations @@ -2572,7 +2772,9 @@ static void bestBtreeIndex( ** ** bInEst: ** Set to true if there was at least one "x IN (SELECT ...)" term used - ** in determining the value of nInMul. + ** in determining the value of nInMul. Note that the RHS of the + ** IN operator must be a SELECT, not a value list, for this variable + ** to be true. ** ** estBound: ** An estimate on the amount of the table that must be searched. A @@ -2580,8 +2782,8 @@ static void bestBtreeIndex( ** might reduce this to a value less than 100 to indicate that only ** a fraction of the table needs searching. In the absence of ** sqlite_stat2 ANALYZE data, a single inequality reduces the search - ** space to 1/3rd its original size. So an x>? constraint reduces - ** estBound to 33. Two constraints (x>? AND x<?) reduce estBound to 11. + ** space to 1/4rd its original size. So an x>? constraint reduces + ** estBound to 25. Two constraints (x>? AND x<?) reduce estBound to 6. ** ** bSort: ** Boolean. True if there is an ORDER BY clause that will require an @@ -2589,25 +2791,31 @@ static void bestBtreeIndex( ** correctly order records). ** ** bLookup: - ** Boolean. True if for each index entry visited a lookup on the - ** corresponding table b-tree is required. This is always false - ** for the rowid index. For other indexes, it is true unless all the - ** columns of the table used by the SELECT statement are present in - ** the index (such an index is sometimes described as a covering index). + ** Boolean. True if a table lookup is required for each index entry + ** visited. In other words, true if this is not a covering index. + ** This is always false for the rowid primary key index of a table. + ** For other indexes, it is true unless all the columns of the table + ** used by the SELECT statement are present in the index (such an + ** index is sometimes described as a covering index). ** For example, given the index on (a, b), the second of the following - ** two queries requires table b-tree lookups, but the first does not. + ** two queries requires table b-tree lookups in order to find the value + ** of column c, but the first does not because columns a and b are + ** both available in the index. ** ** SELECT a, b FROM tbl WHERE a = 1; ** SELECT a, b, c FROM tbl WHERE a = 1; */ - int nEq; - int bInEst = 0; - int nInMul = 1; - int estBound = 100; - int nBound = 0; /* Number of range constraints seen */ - int bSort = 0; - int bLookup = 0; - WhereTerm *pTerm; /* A single term of the WHERE clause */ + int nEq; /* Number of == or IN terms matching index */ + int bInEst = 0; /* True if "x IN (SELECT...)" seen */ + int nInMul = 1; /* Number of distinct equalities to lookup */ + int estBound = 100; /* Estimated reduction in search space */ + int nBound = 0; /* Number of range constraints seen */ + int bSort = 0; /* True if external sort required */ + int bLookup = 0; /* True if not a covering index */ + WhereTerm *pTerm; /* A single term of the WHERE clause */ +#ifdef SQLITE_ENABLE_STAT2 + WhereTerm *pFirstTerm = 0; /* First term matching the index */ +#endif /* Determine the values of nEq and nInMul */ for(nEq=0; nEq<pProbe->nColumn; nEq++){ @@ -2619,14 +2827,19 @@ static void bestBtreeIndex( Expr *pExpr = pTerm->pExpr; wsFlags |= WHERE_COLUMN_IN; if( ExprHasProperty(pExpr, EP_xIsSelect) ){ + /* "x IN (SELECT ...)": Assume the SELECT returns 25 rows */ nInMul *= 25; bInEst = 1; - }else if( ALWAYS(pExpr->x.pList) ){ - nInMul *= pExpr->x.pList->nExpr + 1; + }else if( ALWAYS(pExpr->x.pList && pExpr->x.pList->nExpr) ){ + /* "x IN (value, value, ...)" */ + nInMul *= pExpr->x.pList->nExpr; } }else if( pTerm->eOperator & WO_ISNULL ){ wsFlags |= WHERE_COLUMN_NULL; } +#ifdef SQLITE_ENABLE_STAT2 + if( nEq==0 && pProbe->aSample ) pFirstTerm = pTerm; +#endif used |= pTerm->prereqRight; } @@ -2694,8 +2907,8 @@ static void bestBtreeIndex( } /* - ** Estimate the number of rows of output. For an IN operator, - ** do not let the estimate exceed half the rows in the table. + ** Estimate the number of rows of output. For an "x IN (SELECT...)" + ** constraint, do not let the estimate exceed half the rows in the table. */ nRow = (double)(aiRowEst[nEq] * nInMul); if( bInEst && nRow*2>aiRowEst[0] ){ @@ -2703,31 +2916,69 @@ static void bestBtreeIndex( nInMul = (int)(nRow / aiRowEst[nEq]); } - /* Assume constant cost to access a row and logarithmic cost to - ** do a binary search. Hence, the initial cost is the number of output - ** rows plus log2(table-size) times the number of binary searches. +#ifdef SQLITE_ENABLE_STAT2 + /* If the constraint is of the form x=VALUE and histogram + ** data is available for column x, then it might be possible + ** to get a better estimate on the number of rows based on + ** VALUE and how common that value is according to the histogram. */ - cost = nRow + nInMul*estLog(aiRowEst[0]); + if( nRow>(double)1 && nEq==1 && pFirstTerm!=0 ){ + if( pFirstTerm->eOperator==WO_EQ ){ + whereEqualScanEst(pParse, pProbe, pFirstTerm->pExpr->pRight, &nRow); + }else if( pFirstTerm->eOperator==WO_IN && bInEst==0 ){ + whereInScanEst(pParse, pProbe, pFirstTerm->pExpr->x.pList, &nRow); + } + } +#endif /* SQLITE_ENABLE_STAT2 */ /* Adjust the number of rows and the cost downward to reflect rows ** that are excluded by range constraints. */ nRow = (nRow * (double)estBound) / (double)100; - cost = (cost * (double)estBound) / (double)100; + if( nRow<1 ) nRow = 1; - /* Add in the estimated cost of sorting the result + /* Assume constant cost to advance from one row to the next and + ** logarithmic cost to do a binary search. Hence, the initial cost + ** is the number of output rows plus log2(table-size) times the + ** number of binary searches. + ** + ** Because fan-out on tables is so much higher than the fan-out on + ** indices (because table btrees contain only integer keys in non-leaf + ** nodes) we weight the cost of a table binary search as 1/10th the + ** cost of an index binary search. */ - if( bSort ){ - cost += cost*estLog(cost); + if( pIdx ){ + if( bLookup ){ + /* For an index lookup followed by a table lookup: + ** nInMul index searches to find the start of each index range + ** + nRow steps through the index + ** + nRow table searches to lookup the table entry using the rowid + */ + nSearch = nInMul + nRow/10; + }else{ + /* For a covering index: + ** nInMul binary searches to find the initial entry + ** + nRow steps through the index + */ + nSearch = nInMul; + } + }else{ + /* For a rowid primary key lookup: + ** nInMult binary searches to find the initial entry scaled by 1/10th + ** + nRow steps through the table + */ + nSearch = nInMul/10; } + cost = nRow + nSearch*estLog(aiRowEst[0]); - /* If all information can be taken directly from the index, we avoid - ** doing table lookups. This reduces the cost by half. (Not really - - ** this needs to be fixed.) + /* Add in the estimated cost of sorting the result. This cost is expanded + ** by a fudge factor of 3.0 to account for the fact that a sorting step + ** involves a write and is thus more expensive than a lookup step. */ - if( pIdx && bLookup==0 ){ - cost /= (double)2; + if( bSort ){ + cost += nRow*estLog(nRow)*(double)3; } + /**** Cost of using this index has now been computed ****/ /* If there are additional constraints on this table that cannot @@ -2768,15 +3019,19 @@ static void bestBtreeIndex( } }else if( pTerm->eOperator & (WO_LT|WO_LE|WO_GT|WO_GE) ){ if( nSkipRange ){ - /* Ignore the first nBound range constraints since the index + /* Ignore the first nSkipRange range constraints since the index ** has already accounted for these */ nSkipRange--; }else{ /* Assume each additional range constraint reduces the result - ** set size by a factor of 3 */ + ** set size by a factor of 3. Indexed range constraints reduce + ** the search space by a larger factor: 4. We make indexed range + ** more selective intentionally because of the subjective + ** observation that indexed range constraints really are more + ** selective in practice, on average. */ nRow /= 3; } - }else{ + }else if( pTerm->eOperator!=WO_NOOP ){ /* Any other expression lowers the output row count by half */ nRow /= 2; } @@ -3614,7 +3869,9 @@ static Bitmask codeOneLoopStart( if( pRangeStart ){ Expr *pRight = pRangeStart->pExpr->pRight; sqlite3ExprCode(pParse, pRight, regBase+nEq); - sqlite3ExprCodeIsNullJump(v, pRight, regBase+nEq, addrNxt); + if( (pRangeStart->wtFlags & TERM_VNULL)==0 ){ + sqlite3ExprCodeIsNullJump(v, pRight, regBase+nEq, addrNxt); + } if( zStartAff ){ if( sqlite3CompareAffinity(pRight, zStartAff[nEq])==SQLITE_AFF_NONE){ /* Since the comparison is to be performed with no conversions @@ -3653,7 +3910,9 @@ static Bitmask codeOneLoopStart( Expr *pRight = pRangeEnd->pExpr->pRight; sqlite3ExprCacheRemove(pParse, regBase+nEq, 1); sqlite3ExprCode(pParse, pRight, regBase+nEq); - sqlite3ExprCodeIsNullJump(v, pRight, regBase+nEq, addrNxt); + if( (pRangeEnd->wtFlags & TERM_VNULL)==0 ){ + sqlite3ExprCodeIsNullJump(v, pRight, regBase+nEq, addrNxt); + } if( zEndAff ){ if( sqlite3CompareAffinity(pRight, zEndAff[nEq])==SQLITE_AFF_NONE){ /* Since the comparison is to be performed with no conversions |