aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordan <dan@noemail.net>2011-06-30 20:17:15 +0000
committerdan <dan@noemail.net>2011-06-30 20:17:15 +0000
commit38cc40c216f080dd309cb47f9e7fc976ba4fc2dd (patch)
tree77ed4b83c7ca97cb585528db34b310288f62c94e /src
parente1b4f0f177406df827af3ae30b2209cb7e682ef7 (diff)
downloadsqlite-38cc40c216f080dd309cb47f9e7fc976ba4fc2dd.tar.gz
sqlite-38cc40c216f080dd309cb47f9e7fc976ba4fc2dd.zip
Experimental changes to improve optimization of DISTINCT queries.
FossilOrigin-Name: f7ba0219ef2f235543c258be736955d91ca5ecce
Diffstat (limited to 'src')
-rw-r--r--src/delete.c4
-rw-r--r--src/fkey.c2
-rw-r--r--src/select.c65
-rw-r--r--src/sqliteInt.h6
-rw-r--r--src/update.c4
-rw-r--r--src/where.c137
6 files changed, 184 insertions, 34 deletions
diff --git a/src/delete.c b/src/delete.c
index ec0ecec7e..147a5ca89 100644
--- a/src/delete.c
+++ b/src/delete.c
@@ -371,7 +371,9 @@ void sqlite3DeleteFrom(
/* Collect rowids of every row to be deleted.
*/
sqlite3VdbeAddOp2(v, OP_Null, 0, iRowSet);
- pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere,0,WHERE_DUPLICATES_OK);
+ pWInfo = sqlite3WhereBegin(
+ pParse, pTabList, pWhere, 0, 0, WHERE_DUPLICATES_OK
+ );
if( pWInfo==0 ) goto delete_from_cleanup;
regRowid = sqlite3ExprCodeGetColumn(pParse, pTab, -1, iCur, iRowid);
sqlite3VdbeAddOp2(v, OP_RowSetAdd, iRowSet, regRowid);
diff --git a/src/fkey.c b/src/fkey.c
index dda8a50d4..37d4744dd 100644
--- a/src/fkey.c
+++ b/src/fkey.c
@@ -560,7 +560,7 @@ static void fkScanChildren(
** clause. If the constraint is not deferred, throw an exception for
** each row found. Otherwise, for deferred constraints, increment the
** deferred constraint counter by nIncr for each row selected. */
- pWInfo = sqlite3WhereBegin(pParse, pSrc, pWhere, 0, 0);
+ pWInfo = sqlite3WhereBegin(pParse, pSrc, pWhere, 0, 0, 0);
if( nIncr>0 && pFKey->isDeferred==0 ){
sqlite3ParseToplevel(pParse)->mayAbort = 1;
}
diff --git a/src/select.c b/src/select.c
index 8dcfb84b8..00107e1de 100644
--- a/src/select.c
+++ b/src/select.c
@@ -3721,6 +3721,7 @@ int sqlite3Select(
int distinct; /* Table to use for the distinct set */
int rc = 1; /* Value to return from this function */
int addrSortIndex; /* Address of an OP_OpenEphemeral instruction */
+ int addrDistinctIndex; /* Address of an OP_OpenEphemeral instruction */
AggInfo sAggInfo; /* Information used by aggregate queries */
int iEnd; /* Address of the end of the query */
sqlite3 *db; /* The database connection */
@@ -3850,12 +3851,14 @@ int sqlite3Select(
/* If possible, rewrite the query to use GROUP BY instead of DISTINCT.
** GROUP BY might use an index, DISTINCT never does.
*/
+#if 0
assert( p->pGroupBy==0 || (p->selFlags & SF_Aggregate)!=0 );
if( (p->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct ){
p->pGroupBy = sqlite3ExprListDup(db, p->pEList, 0);
pGroupBy = p->pGroupBy;
p->selFlags &= ~SF_Distinct;
}
+#endif
/* If there is both a GROUP BY and an ORDER BY clause and they are
** identical, then disable the ORDER BY clause since the GROUP BY
@@ -3904,11 +3907,10 @@ int sqlite3Select(
*/
if( p->selFlags & SF_Distinct ){
KeyInfo *pKeyInfo;
- assert( isAgg || pGroupBy );
distinct = pParse->nTab++;
pKeyInfo = keyInfoFromExprList(pParse, p->pEList);
- sqlite3VdbeAddOp4(v, OP_OpenEphemeral, distinct, 0, 0,
- (char*)pKeyInfo, P4_KEYINFO_HANDOFF);
+ addrDistinctIndex = sqlite3VdbeAddOp4(v, OP_OpenEphemeral, distinct, 0, 0,
+ (char*)pKeyInfo, P4_KEYINFO_HANDOFF);
sqlite3VdbeChangeP5(v, BTREE_UNORDERED);
}else{
distinct = -1;
@@ -3916,10 +3918,10 @@ int sqlite3Select(
/* Aggregate and non-aggregate queries are handled differently */
if( !isAgg && pGroupBy==0 ){
- /* This case is for non-aggregate queries
- ** Begin the database scan
- */
- pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pOrderBy, 0);
+ ExprList *pDist = (isDistinct ? p->pEList : 0);
+
+ /* Begin the database scan. */
+ pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pOrderBy, pDist, 0);
if( pWInfo==0 ) goto select_end;
if( pWInfo->nRowOut < p->nSelectRow ) p->nSelectRow = pWInfo->nRowOut;
@@ -3932,10 +3934,47 @@ int sqlite3Select(
p->addrOpenEphm[2] = -1;
}
- /* Use the standard inner loop
- */
- assert(!isDistinct);
- selectInnerLoop(pParse, p, pEList, 0, 0, pOrderBy, -1, pDest,
+ if( pWInfo->eDistinct ){
+ assert( isDistinct );
+ assert( pWInfo->eDistinct==WHERE_DISTINCT_ORDERED
+ || pWInfo->eDistinct==WHERE_DISTINCT_UNIQUE
+ );
+ distinct = -1;
+ if( pWInfo->eDistinct==WHERE_DISTINCT_ORDERED ){
+ int iJump;
+ int iExpr;
+ int iFlag = ++pParse->nMem;
+ int iBase = pParse->nMem+1;
+ int iBase2 = iBase + pEList->nExpr;
+ pParse->nMem += (pEList->nExpr*2);
+ VdbeOp *pOp;
+
+ /* Change the OP_OpenEphemeral coded earlier to an OP_Integer. The
+ ** OP_Integer initializes the "first row" flag. */
+ pOp = sqlite3VdbeGetOp(v, addrDistinctIndex);
+ pOp->opcode = OP_Integer;
+ pOp->p1 = 1;
+ pOp->p2 = iFlag;
+
+ sqlite3ExprCodeExprList(pParse, pEList, iBase, 1);
+ iJump = sqlite3VdbeCurrentAddr(v) + 1 + pEList->nExpr + 1 + 1;
+ sqlite3VdbeAddOp2(v, OP_If, iFlag, iJump-1);
+ for(iExpr=0; iExpr<pEList->nExpr; iExpr++){
+ CollSeq *pColl = sqlite3ExprCollSeq(pParse, pEList->a[iExpr].pExpr);
+ sqlite3VdbeAddOp3(v, OP_Ne, iBase+iExpr, iJump, iBase2+iExpr);
+ sqlite3VdbeChangeP4(v, -1, (const char *)pColl, P4_COLLSEQ);
+ sqlite3VdbeChangeP5(v, SQLITE_NULLEQ);
+ }
+ sqlite3VdbeAddOp2(v, OP_Goto, 0, pWInfo->iContinue);
+
+ sqlite3VdbeAddOp2(v, OP_Integer, 0, iFlag);
+ assert( sqlite3VdbeCurrentAddr(v)==iJump );
+ sqlite3VdbeAddOp3(v, OP_Move, iBase, iBase2, pEList->nExpr);
+ }
+ }
+
+ /* Use the standard inner loop. */
+ selectInnerLoop(pParse, p, pEList, 0, 0, pOrderBy, distinct, pDest,
pWInfo->iContinue, pWInfo->iBreak);
/* End the database scan loop.
@@ -4045,7 +4084,7 @@ int sqlite3Select(
** in the right order to begin with.
*/
sqlite3VdbeAddOp2(v, OP_Gosub, regReset, addrReset);
- pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pGroupBy, 0);
+ pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pGroupBy, 0, 0);
if( pWInfo==0 ) goto select_end;
if( pGroupBy==0 ){
/* The optimizer is able to deliver rows in group by order so
@@ -4307,7 +4346,7 @@ int sqlite3Select(
** of output.
*/
resetAccumulator(pParse, &sAggInfo);
- pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pMinMax, flag);
+ pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pMinMax, 0, flag);
if( pWInfo==0 ){
sqlite3ExprListDelete(db, pDel);
goto select_end;
diff --git a/src/sqliteInt.h b/src/sqliteInt.h
index 09f64b793..c1e055b26 100644
--- a/src/sqliteInt.h
+++ b/src/sqliteInt.h
@@ -1966,6 +1966,7 @@ struct WhereInfo {
u16 wctrlFlags; /* Flags originally passed to sqlite3WhereBegin() */
u8 okOnePass; /* Ok to use one-pass algorithm for UPDATE or DELETE */
u8 untestedTerms; /* Not all WHERE terms resolved by outer loop */
+ u8 eDistinct;
SrcList *pTabList; /* List of tables in the join */
int iTop; /* The very beginning of the WHERE loop */
int iContinue; /* Jump here to continue with next record */
@@ -1977,6 +1978,9 @@ struct WhereInfo {
WhereLevel a[1]; /* Information about each nest loop in WHERE */
};
+#define WHERE_DISTINCT_UNIQUE 1
+#define WHERE_DISTINCT_ORDERED 2
+
/*
** A NameContext defines a context in which to resolve table and column
** names. The context consists of a list of tables (the pSrcList) field and
@@ -2738,7 +2742,7 @@ Expr *sqlite3LimitWhere(Parse *, SrcList *, Expr *, ExprList *, Expr *, Expr *,
#endif
void sqlite3DeleteFrom(Parse*, SrcList*, Expr*);
void sqlite3Update(Parse*, SrcList*, ExprList*, Expr*, int);
-WhereInfo *sqlite3WhereBegin(Parse*, SrcList*, Expr*, ExprList**, u16);
+WhereInfo *sqlite3WhereBegin(Parse*, SrcList*, Expr*, ExprList**,ExprList*,u16);
void sqlite3WhereEnd(WhereInfo*);
int sqlite3ExprCodeGetColumn(Parse*, Table*, int, int, int);
void sqlite3ExprCodeGetColumnOfTable(Vdbe*, Table*, int, int, int);
diff --git a/src/update.c b/src/update.c
index d445f01f4..2b5efff37 100644
--- a/src/update.c
+++ b/src/update.c
@@ -311,7 +311,9 @@ void sqlite3Update(
/* Begin the database scan
*/
sqlite3VdbeAddOp2(v, OP_Null, 0, regOldRowid);
- pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere,0, WHERE_ONEPASS_DESIRED);
+ pWInfo = sqlite3WhereBegin(
+ pParse, pTabList, pWhere, 0, 0, WHERE_ONEPASS_DESIRED
+ );
if( pWInfo==0 ) goto update_cleanup;
okOnePass = pWInfo->okOnePass;
diff --git a/src/where.c b/src/where.c
index cf30d94d6..8aecf9788 100644
--- a/src/where.c
+++ b/src/where.c
@@ -253,6 +253,7 @@ struct WhereCost {
#define WHERE_VIRTUALTABLE 0x08000000 /* Use virtual-table processing */
#define WHERE_MULTI_OR 0x10000000 /* OR using multiple indices */
#define WHERE_TEMP_INDEX 0x20000000 /* Uses an ephemeral index */
+#define WHERE_DISTINCT 0x40000000 /* Correct order for DISTINCT */
/*
** Initialize a preallocated WhereClause structure.
@@ -1433,7 +1434,10 @@ static int isSortingIndex(
struct ExprList_item *pTerm; /* A term of the ORDER BY clause */
sqlite3 *db = pParse->db;
- assert( pOrderBy!=0 );
+ if( !pOrderBy ) return 0;
+ if( wsFlags & WHERE_COLUMN_IN ) return 0;
+ if( pIdx->bUnordered ) return 0;
+
nTerm = pOrderBy->nExpr;
assert( nTerm>0 );
@@ -1523,7 +1527,7 @@ static int isSortingIndex(
}
}
- *pbRev = sortOrder!=0;
+ if( pbRev ) *pbRev = sortOrder!=0;
if( j>=nTerm ){
/* All terms of the ORDER BY clause are covered by this index so
** this index can be used for sorting. */
@@ -2689,6 +2693,7 @@ static void bestBtreeIndex(
Bitmask notReady, /* Mask of cursors not available for indexing */
Bitmask notValid, /* Cursors not available for any purpose */
ExprList *pOrderBy, /* The ORDER BY clause */
+ ExprList *pDistinct, /* The select-list if query is DISTINCT */
WhereCost *pCost /* Lowest cost query plan */
){
int iCur = pSrc->iCursor; /* The cursor of the table to be accessed */
@@ -2829,7 +2834,8 @@ static void bestBtreeIndex(
int nInMul = 1; /* Number of distinct equalities to lookup */
int estBound = 100; /* Estimated reduction in search space */
int nBound = 0; /* Number of range constraints seen */
- int bSort = 0; /* True if external sort required */
+ int bSort = !!pOrderBy; /* True if external sort required */
+ int bDist = !!pDistinct; /* True if index cannot help with DISTINCT */
int bLookup = 0; /* True if not a covering index */
WhereTerm *pTerm; /* A single term of the WHERE clause */
#ifdef SQLITE_ENABLE_STAT2
@@ -2893,17 +2899,22 @@ static void bestBtreeIndex(
** naturally scan rows in the required order, set the appropriate flags
** in wsFlags. Otherwise, if there is an ORDER BY clause but the index
** will scan rows in a different order, set the bSort variable. */
- if( pOrderBy ){
- if( (wsFlags & WHERE_COLUMN_IN)==0
- && pProbe->bUnordered==0
- && isSortingIndex(pParse, pWC->pMaskSet, pProbe, iCur, pOrderBy,
- nEq, wsFlags, &rev)
- ){
- wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_ORDERBY;
- wsFlags |= (rev ? WHERE_REVERSE : 0);
- }else{
- bSort = 1;
- }
+ if( isSortingIndex(
+ pParse, pWC->pMaskSet, pProbe, iCur, pOrderBy, nEq, wsFlags, &rev)
+ ){
+ bSort = 0;
+ wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_ORDERBY;
+ wsFlags |= (rev ? WHERE_REVERSE : 0);
+ }
+
+ /* If there is a DISTINCT qualifier and this index will scan rows in
+ ** order of the DISTINCT expressions, clear bDist and set the appropriate
+ ** flags in wsFlags. */
+ if( isSortingIndex(
+ pParse, pWC->pMaskSet, pProbe, iCur, pDistinct, nEq, wsFlags, 0)
+ ){
+ bDist = 0;
+ wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_DISTINCT;
}
/* If currently calculating the cost of using an index (not the IPK
@@ -3020,6 +3031,9 @@ static void bestBtreeIndex(
if( bSort ){
cost += nRow*estLog(nRow)*3;
}
+ if( bDist ){
+ cost += nRow*estLog(nRow)*3;
+ }
/**** Cost of using this index has now been computed ****/
@@ -3165,7 +3179,7 @@ static void bestIndex(
}else
#endif
{
- bestBtreeIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, pCost);
+ bestBtreeIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, 0, pCost);
}
}
@@ -4127,7 +4141,7 @@ static Bitmask codeOneLoopStart(
if( pOrTerm->leftCursor==iCur || pOrTerm->eOperator==WO_AND ){
WhereInfo *pSubWInfo; /* Info for single OR-term scan */
/* Loop through table entries that match term pOrTerm. */
- pSubWInfo = sqlite3WhereBegin(pParse, pOrTab, pOrTerm->pExpr, 0,
+ pSubWInfo = sqlite3WhereBegin(pParse, pOrTab, pOrTerm->pExpr, 0, 0,
WHERE_OMIT_OPEN | WHERE_OMIT_CLOSE |
WHERE_FORCE_TABLE | WHERE_ONETABLE_ONLY);
if( pSubWInfo ){
@@ -4274,6 +4288,79 @@ static void whereInfoFree(sqlite3 *db, WhereInfo *pWInfo){
}
}
+/*
+** Return true if the DISTINCT expression-list passed as the third argument
+** is redundant. A DISTINCT list is redundant if the database contains a
+** UNIQUE index that guarantees that the result of the query will be distinct
+** anyway.
+*/
+static int whereDistinctRedundant(
+ Parse *pParse,
+ SrcList *pTabList,
+ WhereClause *pWC,
+ ExprList *pDistinct
+){
+ Table *pTab;
+ Index *pIdx;
+ int i;
+ int iBase;
+
+ /* If there is more than one table or sub-select in the FROM clause of
+ ** this query, then it will not be possible to show that the DISTINCT
+ ** clause is redundant. */
+ if( pTabList->nSrc!=1 ) return 0;
+ iBase = pTabList->a[0].iCursor;
+ pTab = pTabList->a[0].pTab;
+
+ /* Check if all the expressions in the ExprList are of type TK_COLUMN and
+ ** on the same table. If this is not the case, return early, since it will
+ ** not be possible to prove that the DISTINCT qualifier is redundant.
+ ** If any of the expressions is an IPK column, then return true.
+ */
+ for(i=0; i<pDistinct->nExpr; i++){
+ Expr *p = pDistinct->a[i].pExpr;
+ if( p->op!=TK_COLUMN || p->iTable!=iBase ) return 0;
+ if( p->iColumn<0 ) return 1;
+ }
+
+ /* Loop through all indices on the table, checking each to see if it makes
+ ** the DISTINCT qualifier redundant. It does so if:
+ **
+ ** 1. The index is itself UNIQUE, and
+ **
+ ** 2. All of the columns in the index are either part of the pDistinct
+ ** list, or else the WHERE clause contains a term of the form "col=X",
+ ** where X is a constant value. The collation sequences of the
+ ** comparison and select-list expressions must match those of the index.
+ */
+ for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
+ if( pIdx->onError==OE_None ) continue;
+ for(i=0; i<pIdx->nColumn; i++){
+ int iCol = pIdx->aiColumn[i];
+ const char *zColl = pIdx->azColl[i];
+
+ if( 0==findTerm(pWC, iBase, iCol, ~(Bitmask)0, WO_EQ, pIdx) ){
+ int j;
+ for(j=0; j<pDistinct->nExpr; j++){
+ Expr *p = pDistinct->a[j].pExpr;
+ if( p->iColumn==iCol ){
+ CollSeq *pColl = sqlite3ExprCollSeq(pParse, p);
+ const char *zEColl = (pColl ? pColl : pParse->db->pDfltColl)->zName;
+ if( 0==sqlite3StrICmp(zColl, zEColl) ) break;
+ }
+ }
+ if( j==pDistinct->nExpr ) break;
+ }
+ }
+ if( i==pIdx->nColumn ){
+ /* This index implies that the DISTINCT qualifier is redundant. */
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
/*
** Generate the beginning of the loop used for WHERE clause processing.
@@ -4368,6 +4455,7 @@ WhereInfo *sqlite3WhereBegin(
SrcList *pTabList, /* A list of all tables to be scanned */
Expr *pWhere, /* The WHERE clause */
ExprList **ppOrderBy, /* An ORDER BY clause, or NULL */
+ ExprList *pDistinct, /* The select-list for DISTINCT queries - or NULL */
u16 wctrlFlags /* One of the WHERE_* flags defined in sqliteInt.h */
){
int i; /* Loop counter */
@@ -4495,6 +4583,15 @@ WhereInfo *sqlite3WhereBegin(
goto whereBeginError;
}
+ /* Check if the DISTINCT qualifier, if there is one, is redundant.
+ ** If it is, then set pDistinct to NULL and WhereInfo.eDistinct to
+ ** WHERE_DISTINCT_UNIQUE to tell the caller to ignore the DISTINCT.
+ */
+ if( pDistinct && whereDistinctRedundant(pParse, pTabList, pWC, pDistinct) ){
+ pDistinct = 0;
+ pWInfo->eDistinct = WHERE_DISTINCT_UNIQUE;
+ }
+
/* Chose the best index to use for each table in the FROM clause.
**
** This loop fills in the following fields:
@@ -4578,6 +4675,7 @@ WhereInfo *sqlite3WhereBegin(
int doNotReorder; /* True if this table should not be reordered */
WhereCost sCost; /* Cost information from best[Virtual]Index() */
ExprList *pOrderBy; /* ORDER BY clause for index to optimize */
+ ExprList *pDist; /* DISTINCT clause for index to optimize */
doNotReorder = (pTabItem->jointype & (JT_LEFT|JT_CROSS))!=0;
if( j!=iFrom && doNotReorder ) break;
@@ -4588,6 +4686,7 @@ WhereInfo *sqlite3WhereBegin(
}
mask = (isOptimal ? m : notReady);
pOrderBy = ((i==0 && ppOrderBy )?*ppOrderBy:0);
+ pDist = (i==0 ? pDistinct : 0);
if( pTabItem->pIndex==0 ) nUnconstrained++;
WHERETRACE(("=== trying table %d with isOptimal=%d ===\n",
@@ -4602,7 +4701,7 @@ WhereInfo *sqlite3WhereBegin(
#endif
{
bestBtreeIndex(pParse, pWC, pTabItem, mask, notReady, pOrderBy,
- &sCost);
+ pDist, &sCost);
}
assert( isOptimal || (sCost.used&notReady)==0 );
@@ -4663,6 +4762,10 @@ WhereInfo *sqlite3WhereBegin(
if( (bestPlan.plan.wsFlags & WHERE_ORDERBY)!=0 ){
*ppOrderBy = 0;
}
+ if( (bestPlan.plan.wsFlags & WHERE_DISTINCT)!=0 ){
+ assert( pWInfo->eDistinct==0 );
+ pWInfo->eDistinct = WHERE_DISTINCT_ORDERED;
+ }
andFlags &= bestPlan.plan.wsFlags;
pLevel->plan = bestPlan.plan;
testcase( bestPlan.plan.wsFlags & WHERE_INDEXED );