diff options
author | dan <dan@noemail.net> | 2011-06-30 20:17:15 +0000 |
---|---|---|
committer | dan <dan@noemail.net> | 2011-06-30 20:17:15 +0000 |
commit | 38cc40c216f080dd309cb47f9e7fc976ba4fc2dd (patch) | |
tree | 77ed4b83c7ca97cb585528db34b310288f62c94e /src | |
parent | e1b4f0f177406df827af3ae30b2209cb7e682ef7 (diff) | |
download | sqlite-38cc40c216f080dd309cb47f9e7fc976ba4fc2dd.tar.gz sqlite-38cc40c216f080dd309cb47f9e7fc976ba4fc2dd.zip |
Experimental changes to improve optimization of DISTINCT queries.
FossilOrigin-Name: f7ba0219ef2f235543c258be736955d91ca5ecce
Diffstat (limited to 'src')
-rw-r--r-- | src/delete.c | 4 | ||||
-rw-r--r-- | src/fkey.c | 2 | ||||
-rw-r--r-- | src/select.c | 65 | ||||
-rw-r--r-- | src/sqliteInt.h | 6 | ||||
-rw-r--r-- | src/update.c | 4 | ||||
-rw-r--r-- | src/where.c | 137 |
6 files changed, 184 insertions, 34 deletions
diff --git a/src/delete.c b/src/delete.c index ec0ecec7e..147a5ca89 100644 --- a/src/delete.c +++ b/src/delete.c @@ -371,7 +371,9 @@ void sqlite3DeleteFrom( /* Collect rowids of every row to be deleted. */ sqlite3VdbeAddOp2(v, OP_Null, 0, iRowSet); - pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere,0,WHERE_DUPLICATES_OK); + pWInfo = sqlite3WhereBegin( + pParse, pTabList, pWhere, 0, 0, WHERE_DUPLICATES_OK + ); if( pWInfo==0 ) goto delete_from_cleanup; regRowid = sqlite3ExprCodeGetColumn(pParse, pTab, -1, iCur, iRowid); sqlite3VdbeAddOp2(v, OP_RowSetAdd, iRowSet, regRowid); diff --git a/src/fkey.c b/src/fkey.c index dda8a50d4..37d4744dd 100644 --- a/src/fkey.c +++ b/src/fkey.c @@ -560,7 +560,7 @@ static void fkScanChildren( ** clause. If the constraint is not deferred, throw an exception for ** each row found. Otherwise, for deferred constraints, increment the ** deferred constraint counter by nIncr for each row selected. */ - pWInfo = sqlite3WhereBegin(pParse, pSrc, pWhere, 0, 0); + pWInfo = sqlite3WhereBegin(pParse, pSrc, pWhere, 0, 0, 0); if( nIncr>0 && pFKey->isDeferred==0 ){ sqlite3ParseToplevel(pParse)->mayAbort = 1; } diff --git a/src/select.c b/src/select.c index 8dcfb84b8..00107e1de 100644 --- a/src/select.c +++ b/src/select.c @@ -3721,6 +3721,7 @@ int sqlite3Select( int distinct; /* Table to use for the distinct set */ int rc = 1; /* Value to return from this function */ int addrSortIndex; /* Address of an OP_OpenEphemeral instruction */ + int addrDistinctIndex; /* Address of an OP_OpenEphemeral instruction */ AggInfo sAggInfo; /* Information used by aggregate queries */ int iEnd; /* Address of the end of the query */ sqlite3 *db; /* The database connection */ @@ -3850,12 +3851,14 @@ int sqlite3Select( /* If possible, rewrite the query to use GROUP BY instead of DISTINCT. ** GROUP BY might use an index, DISTINCT never does. */ +#if 0 assert( p->pGroupBy==0 || (p->selFlags & SF_Aggregate)!=0 ); if( (p->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct ){ p->pGroupBy = sqlite3ExprListDup(db, p->pEList, 0); pGroupBy = p->pGroupBy; p->selFlags &= ~SF_Distinct; } +#endif /* If there is both a GROUP BY and an ORDER BY clause and they are ** identical, then disable the ORDER BY clause since the GROUP BY @@ -3904,11 +3907,10 @@ int sqlite3Select( */ if( p->selFlags & SF_Distinct ){ KeyInfo *pKeyInfo; - assert( isAgg || pGroupBy ); distinct = pParse->nTab++; pKeyInfo = keyInfoFromExprList(pParse, p->pEList); - sqlite3VdbeAddOp4(v, OP_OpenEphemeral, distinct, 0, 0, - (char*)pKeyInfo, P4_KEYINFO_HANDOFF); + addrDistinctIndex = sqlite3VdbeAddOp4(v, OP_OpenEphemeral, distinct, 0, 0, + (char*)pKeyInfo, P4_KEYINFO_HANDOFF); sqlite3VdbeChangeP5(v, BTREE_UNORDERED); }else{ distinct = -1; @@ -3916,10 +3918,10 @@ int sqlite3Select( /* Aggregate and non-aggregate queries are handled differently */ if( !isAgg && pGroupBy==0 ){ - /* This case is for non-aggregate queries - ** Begin the database scan - */ - pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pOrderBy, 0); + ExprList *pDist = (isDistinct ? p->pEList : 0); + + /* Begin the database scan. */ + pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pOrderBy, pDist, 0); if( pWInfo==0 ) goto select_end; if( pWInfo->nRowOut < p->nSelectRow ) p->nSelectRow = pWInfo->nRowOut; @@ -3932,10 +3934,47 @@ int sqlite3Select( p->addrOpenEphm[2] = -1; } - /* Use the standard inner loop - */ - assert(!isDistinct); - selectInnerLoop(pParse, p, pEList, 0, 0, pOrderBy, -1, pDest, + if( pWInfo->eDistinct ){ + assert( isDistinct ); + assert( pWInfo->eDistinct==WHERE_DISTINCT_ORDERED + || pWInfo->eDistinct==WHERE_DISTINCT_UNIQUE + ); + distinct = -1; + if( pWInfo->eDistinct==WHERE_DISTINCT_ORDERED ){ + int iJump; + int iExpr; + int iFlag = ++pParse->nMem; + int iBase = pParse->nMem+1; + int iBase2 = iBase + pEList->nExpr; + pParse->nMem += (pEList->nExpr*2); + VdbeOp *pOp; + + /* Change the OP_OpenEphemeral coded earlier to an OP_Integer. The + ** OP_Integer initializes the "first row" flag. */ + pOp = sqlite3VdbeGetOp(v, addrDistinctIndex); + pOp->opcode = OP_Integer; + pOp->p1 = 1; + pOp->p2 = iFlag; + + sqlite3ExprCodeExprList(pParse, pEList, iBase, 1); + iJump = sqlite3VdbeCurrentAddr(v) + 1 + pEList->nExpr + 1 + 1; + sqlite3VdbeAddOp2(v, OP_If, iFlag, iJump-1); + for(iExpr=0; iExpr<pEList->nExpr; iExpr++){ + CollSeq *pColl = sqlite3ExprCollSeq(pParse, pEList->a[iExpr].pExpr); + sqlite3VdbeAddOp3(v, OP_Ne, iBase+iExpr, iJump, iBase2+iExpr); + sqlite3VdbeChangeP4(v, -1, (const char *)pColl, P4_COLLSEQ); + sqlite3VdbeChangeP5(v, SQLITE_NULLEQ); + } + sqlite3VdbeAddOp2(v, OP_Goto, 0, pWInfo->iContinue); + + sqlite3VdbeAddOp2(v, OP_Integer, 0, iFlag); + assert( sqlite3VdbeCurrentAddr(v)==iJump ); + sqlite3VdbeAddOp3(v, OP_Move, iBase, iBase2, pEList->nExpr); + } + } + + /* Use the standard inner loop. */ + selectInnerLoop(pParse, p, pEList, 0, 0, pOrderBy, distinct, pDest, pWInfo->iContinue, pWInfo->iBreak); /* End the database scan loop. @@ -4045,7 +4084,7 @@ int sqlite3Select( ** in the right order to begin with. */ sqlite3VdbeAddOp2(v, OP_Gosub, regReset, addrReset); - pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pGroupBy, 0); + pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pGroupBy, 0, 0); if( pWInfo==0 ) goto select_end; if( pGroupBy==0 ){ /* The optimizer is able to deliver rows in group by order so @@ -4307,7 +4346,7 @@ int sqlite3Select( ** of output. */ resetAccumulator(pParse, &sAggInfo); - pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pMinMax, flag); + pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pMinMax, 0, flag); if( pWInfo==0 ){ sqlite3ExprListDelete(db, pDel); goto select_end; diff --git a/src/sqliteInt.h b/src/sqliteInt.h index 09f64b793..c1e055b26 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -1966,6 +1966,7 @@ struct WhereInfo { u16 wctrlFlags; /* Flags originally passed to sqlite3WhereBegin() */ u8 okOnePass; /* Ok to use one-pass algorithm for UPDATE or DELETE */ u8 untestedTerms; /* Not all WHERE terms resolved by outer loop */ + u8 eDistinct; SrcList *pTabList; /* List of tables in the join */ int iTop; /* The very beginning of the WHERE loop */ int iContinue; /* Jump here to continue with next record */ @@ -1977,6 +1978,9 @@ struct WhereInfo { WhereLevel a[1]; /* Information about each nest loop in WHERE */ }; +#define WHERE_DISTINCT_UNIQUE 1 +#define WHERE_DISTINCT_ORDERED 2 + /* ** A NameContext defines a context in which to resolve table and column ** names. The context consists of a list of tables (the pSrcList) field and @@ -2738,7 +2742,7 @@ Expr *sqlite3LimitWhere(Parse *, SrcList *, Expr *, ExprList *, Expr *, Expr *, #endif void sqlite3DeleteFrom(Parse*, SrcList*, Expr*); void sqlite3Update(Parse*, SrcList*, ExprList*, Expr*, int); -WhereInfo *sqlite3WhereBegin(Parse*, SrcList*, Expr*, ExprList**, u16); +WhereInfo *sqlite3WhereBegin(Parse*, SrcList*, Expr*, ExprList**,ExprList*,u16); void sqlite3WhereEnd(WhereInfo*); int sqlite3ExprCodeGetColumn(Parse*, Table*, int, int, int); void sqlite3ExprCodeGetColumnOfTable(Vdbe*, Table*, int, int, int); diff --git a/src/update.c b/src/update.c index d445f01f4..2b5efff37 100644 --- a/src/update.c +++ b/src/update.c @@ -311,7 +311,9 @@ void sqlite3Update( /* Begin the database scan */ sqlite3VdbeAddOp2(v, OP_Null, 0, regOldRowid); - pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere,0, WHERE_ONEPASS_DESIRED); + pWInfo = sqlite3WhereBegin( + pParse, pTabList, pWhere, 0, 0, WHERE_ONEPASS_DESIRED + ); if( pWInfo==0 ) goto update_cleanup; okOnePass = pWInfo->okOnePass; diff --git a/src/where.c b/src/where.c index cf30d94d6..8aecf9788 100644 --- a/src/where.c +++ b/src/where.c @@ -253,6 +253,7 @@ struct WhereCost { #define WHERE_VIRTUALTABLE 0x08000000 /* Use virtual-table processing */ #define WHERE_MULTI_OR 0x10000000 /* OR using multiple indices */ #define WHERE_TEMP_INDEX 0x20000000 /* Uses an ephemeral index */ +#define WHERE_DISTINCT 0x40000000 /* Correct order for DISTINCT */ /* ** Initialize a preallocated WhereClause structure. @@ -1433,7 +1434,10 @@ static int isSortingIndex( struct ExprList_item *pTerm; /* A term of the ORDER BY clause */ sqlite3 *db = pParse->db; - assert( pOrderBy!=0 ); + if( !pOrderBy ) return 0; + if( wsFlags & WHERE_COLUMN_IN ) return 0; + if( pIdx->bUnordered ) return 0; + nTerm = pOrderBy->nExpr; assert( nTerm>0 ); @@ -1523,7 +1527,7 @@ static int isSortingIndex( } } - *pbRev = sortOrder!=0; + if( pbRev ) *pbRev = sortOrder!=0; if( j>=nTerm ){ /* All terms of the ORDER BY clause are covered by this index so ** this index can be used for sorting. */ @@ -2689,6 +2693,7 @@ static void bestBtreeIndex( Bitmask notReady, /* Mask of cursors not available for indexing */ Bitmask notValid, /* Cursors not available for any purpose */ ExprList *pOrderBy, /* The ORDER BY clause */ + ExprList *pDistinct, /* The select-list if query is DISTINCT */ WhereCost *pCost /* Lowest cost query plan */ ){ int iCur = pSrc->iCursor; /* The cursor of the table to be accessed */ @@ -2829,7 +2834,8 @@ static void bestBtreeIndex( int nInMul = 1; /* Number of distinct equalities to lookup */ int estBound = 100; /* Estimated reduction in search space */ int nBound = 0; /* Number of range constraints seen */ - int bSort = 0; /* True if external sort required */ + int bSort = !!pOrderBy; /* True if external sort required */ + int bDist = !!pDistinct; /* True if index cannot help with DISTINCT */ int bLookup = 0; /* True if not a covering index */ WhereTerm *pTerm; /* A single term of the WHERE clause */ #ifdef SQLITE_ENABLE_STAT2 @@ -2893,17 +2899,22 @@ static void bestBtreeIndex( ** naturally scan rows in the required order, set the appropriate flags ** in wsFlags. Otherwise, if there is an ORDER BY clause but the index ** will scan rows in a different order, set the bSort variable. */ - if( pOrderBy ){ - if( (wsFlags & WHERE_COLUMN_IN)==0 - && pProbe->bUnordered==0 - && isSortingIndex(pParse, pWC->pMaskSet, pProbe, iCur, pOrderBy, - nEq, wsFlags, &rev) - ){ - wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_ORDERBY; - wsFlags |= (rev ? WHERE_REVERSE : 0); - }else{ - bSort = 1; - } + if( isSortingIndex( + pParse, pWC->pMaskSet, pProbe, iCur, pOrderBy, nEq, wsFlags, &rev) + ){ + bSort = 0; + wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_ORDERBY; + wsFlags |= (rev ? WHERE_REVERSE : 0); + } + + /* If there is a DISTINCT qualifier and this index will scan rows in + ** order of the DISTINCT expressions, clear bDist and set the appropriate + ** flags in wsFlags. */ + if( isSortingIndex( + pParse, pWC->pMaskSet, pProbe, iCur, pDistinct, nEq, wsFlags, 0) + ){ + bDist = 0; + wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_DISTINCT; } /* If currently calculating the cost of using an index (not the IPK @@ -3020,6 +3031,9 @@ static void bestBtreeIndex( if( bSort ){ cost += nRow*estLog(nRow)*3; } + if( bDist ){ + cost += nRow*estLog(nRow)*3; + } /**** Cost of using this index has now been computed ****/ @@ -3165,7 +3179,7 @@ static void bestIndex( }else #endif { - bestBtreeIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, pCost); + bestBtreeIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, 0, pCost); } } @@ -4127,7 +4141,7 @@ static Bitmask codeOneLoopStart( if( pOrTerm->leftCursor==iCur || pOrTerm->eOperator==WO_AND ){ WhereInfo *pSubWInfo; /* Info for single OR-term scan */ /* Loop through table entries that match term pOrTerm. */ - pSubWInfo = sqlite3WhereBegin(pParse, pOrTab, pOrTerm->pExpr, 0, + pSubWInfo = sqlite3WhereBegin(pParse, pOrTab, pOrTerm->pExpr, 0, 0, WHERE_OMIT_OPEN | WHERE_OMIT_CLOSE | WHERE_FORCE_TABLE | WHERE_ONETABLE_ONLY); if( pSubWInfo ){ @@ -4274,6 +4288,79 @@ static void whereInfoFree(sqlite3 *db, WhereInfo *pWInfo){ } } +/* +** Return true if the DISTINCT expression-list passed as the third argument +** is redundant. A DISTINCT list is redundant if the database contains a +** UNIQUE index that guarantees that the result of the query will be distinct +** anyway. +*/ +static int whereDistinctRedundant( + Parse *pParse, + SrcList *pTabList, + WhereClause *pWC, + ExprList *pDistinct +){ + Table *pTab; + Index *pIdx; + int i; + int iBase; + + /* If there is more than one table or sub-select in the FROM clause of + ** this query, then it will not be possible to show that the DISTINCT + ** clause is redundant. */ + if( pTabList->nSrc!=1 ) return 0; + iBase = pTabList->a[0].iCursor; + pTab = pTabList->a[0].pTab; + + /* Check if all the expressions in the ExprList are of type TK_COLUMN and + ** on the same table. If this is not the case, return early, since it will + ** not be possible to prove that the DISTINCT qualifier is redundant. + ** If any of the expressions is an IPK column, then return true. + */ + for(i=0; i<pDistinct->nExpr; i++){ + Expr *p = pDistinct->a[i].pExpr; + if( p->op!=TK_COLUMN || p->iTable!=iBase ) return 0; + if( p->iColumn<0 ) return 1; + } + + /* Loop through all indices on the table, checking each to see if it makes + ** the DISTINCT qualifier redundant. It does so if: + ** + ** 1. The index is itself UNIQUE, and + ** + ** 2. All of the columns in the index are either part of the pDistinct + ** list, or else the WHERE clause contains a term of the form "col=X", + ** where X is a constant value. The collation sequences of the + ** comparison and select-list expressions must match those of the index. + */ + for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + if( pIdx->onError==OE_None ) continue; + for(i=0; i<pIdx->nColumn; i++){ + int iCol = pIdx->aiColumn[i]; + const char *zColl = pIdx->azColl[i]; + + if( 0==findTerm(pWC, iBase, iCol, ~(Bitmask)0, WO_EQ, pIdx) ){ + int j; + for(j=0; j<pDistinct->nExpr; j++){ + Expr *p = pDistinct->a[j].pExpr; + if( p->iColumn==iCol ){ + CollSeq *pColl = sqlite3ExprCollSeq(pParse, p); + const char *zEColl = (pColl ? pColl : pParse->db->pDfltColl)->zName; + if( 0==sqlite3StrICmp(zColl, zEColl) ) break; + } + } + if( j==pDistinct->nExpr ) break; + } + } + if( i==pIdx->nColumn ){ + /* This index implies that the DISTINCT qualifier is redundant. */ + return 1; + } + } + + return 0; +} + /* ** Generate the beginning of the loop used for WHERE clause processing. @@ -4368,6 +4455,7 @@ WhereInfo *sqlite3WhereBegin( SrcList *pTabList, /* A list of all tables to be scanned */ Expr *pWhere, /* The WHERE clause */ ExprList **ppOrderBy, /* An ORDER BY clause, or NULL */ + ExprList *pDistinct, /* The select-list for DISTINCT queries - or NULL */ u16 wctrlFlags /* One of the WHERE_* flags defined in sqliteInt.h */ ){ int i; /* Loop counter */ @@ -4495,6 +4583,15 @@ WhereInfo *sqlite3WhereBegin( goto whereBeginError; } + /* Check if the DISTINCT qualifier, if there is one, is redundant. + ** If it is, then set pDistinct to NULL and WhereInfo.eDistinct to + ** WHERE_DISTINCT_UNIQUE to tell the caller to ignore the DISTINCT. + */ + if( pDistinct && whereDistinctRedundant(pParse, pTabList, pWC, pDistinct) ){ + pDistinct = 0; + pWInfo->eDistinct = WHERE_DISTINCT_UNIQUE; + } + /* Chose the best index to use for each table in the FROM clause. ** ** This loop fills in the following fields: @@ -4578,6 +4675,7 @@ WhereInfo *sqlite3WhereBegin( int doNotReorder; /* True if this table should not be reordered */ WhereCost sCost; /* Cost information from best[Virtual]Index() */ ExprList *pOrderBy; /* ORDER BY clause for index to optimize */ + ExprList *pDist; /* DISTINCT clause for index to optimize */ doNotReorder = (pTabItem->jointype & (JT_LEFT|JT_CROSS))!=0; if( j!=iFrom && doNotReorder ) break; @@ -4588,6 +4686,7 @@ WhereInfo *sqlite3WhereBegin( } mask = (isOptimal ? m : notReady); pOrderBy = ((i==0 && ppOrderBy )?*ppOrderBy:0); + pDist = (i==0 ? pDistinct : 0); if( pTabItem->pIndex==0 ) nUnconstrained++; WHERETRACE(("=== trying table %d with isOptimal=%d ===\n", @@ -4602,7 +4701,7 @@ WhereInfo *sqlite3WhereBegin( #endif { bestBtreeIndex(pParse, pWC, pTabItem, mask, notReady, pOrderBy, - &sCost); + pDist, &sCost); } assert( isOptimal || (sCost.used¬Ready)==0 ); @@ -4663,6 +4762,10 @@ WhereInfo *sqlite3WhereBegin( if( (bestPlan.plan.wsFlags & WHERE_ORDERBY)!=0 ){ *ppOrderBy = 0; } + if( (bestPlan.plan.wsFlags & WHERE_DISTINCT)!=0 ){ + assert( pWInfo->eDistinct==0 ); + pWInfo->eDistinct = WHERE_DISTINCT_ORDERED; + } andFlags &= bestPlan.plan.wsFlags; pLevel->plan = bestPlan.plan; testcase( bestPlan.plan.wsFlags & WHERE_INDEXED ); |