diff options
author | drh <> | 2025-01-24 16:37:31 +0000 |
---|---|---|
committer | drh <> | 2025-01-24 16:37:31 +0000 |
commit | 079f840e477db79ca73097888949ef1361a55505 (patch) | |
tree | b339f08df65da07a070c4d76d68ce3b5d33c0008 /src | |
parent | 7ae051df2a29b834ff049a7159dea11882602858 (diff) | |
parent | d37412b80c754e4ca1e45d373bb3b3d832662c73 (diff) | |
download | sqlite-079f840e477db79ca73097888949ef1361a55505.tar.gz sqlite-079f840e477db79ca73097888949ef1361a55505.zip |
Improve the star-query heuristic so that it does a better job of identifying
actual star queries. Also includes improved diagnostic output from the
query planner.
FossilOrigin-Name: 7cfbe14d199bb631abd4d009698eeaee9b8450d5061ded612095ee4738ac6a1f
Diffstat (limited to 'src')
-rw-r--r-- | src/where.c | 136 | ||||
-rw-r--r-- | src/whereInt.h | 4 |
2 files changed, 107 insertions, 33 deletions
diff --git a/src/where.c b/src/where.c index cad66e70b..14dc99e5c 100644 --- a/src/where.c +++ b/src/where.c @@ -2431,8 +2431,9 @@ void sqlite3WhereClausePrint(WhereClause *pWC){ ** 1.002.001 t2.t2xy 2 f 010241 N 2 cost 0,56,31 */ void sqlite3WhereLoopPrint(const WhereLoop *p, const WhereClause *pWC){ + WhereInfo *pWInfo; if( pWC ){ - WhereInfo *pWInfo = pWC->pWInfo; + pWInfo = pWC->pWInfo; int nb = 1+(pWInfo->pTabList->nSrc+3)/4; SrcItem *pItem = pWInfo->pTabList->a + p->iTab; Table *pTab = pItem->pSTab; @@ -2442,6 +2443,7 @@ void sqlite3WhereLoopPrint(const WhereLoop *p, const WhereClause *pWC){ sqlite3DebugPrintf(" %12s", pItem->zAlias ? pItem->zAlias : pTab->zName); }else{ + pWInfo = 0; sqlite3DebugPrintf("%c%2d.%03llx.%03llx %c%d", p->cId, p->iTab, p->maskSelf, p->prereq & 0xfff, p->cId, p->iTab); } @@ -2473,7 +2475,12 @@ void sqlite3WhereLoopPrint(const WhereLoop *p, const WhereClause *pWC){ }else{ sqlite3DebugPrintf(" f %06x N %d", p->wsFlags, p->nLTerm); } - sqlite3DebugPrintf(" cost %d,%d,%d\n", p->rSetup, p->rRun, p->nOut); + if( pWInfo && pWInfo->nOutStarDelta>0 && p->rStarDelta!=0 ){ + sqlite3DebugPrintf(" cost %d,%d,%d delta=%d\n", + p->rSetup, p->rRun, p->nOut, -p->rStarDelta); + }else{ + sqlite3DebugPrintf(" cost %d,%d,%d\n", p->rSetup, p->rRun, p->nOut); + } if( p->nLTerm && (sqlite3WhereTrace & 0x4000)!=0 ){ int i; for(i=0; i<p->nLTerm; i++){ @@ -5441,11 +5448,13 @@ static LogEst whereSortingCost( ** 18 for star queries ** 12 otherwise ** -** For the purposes of SQLite, a star-query is defined as a query -** with a large central table that is joined (using an INNER JOIN, -** not a LEFT JOIN) against four or more smaller tables. The central -** table is called the "fact" table. The smaller tables that get -** joined are "dimension tables". +** For the purposes of this heuristic, a star-query is defined as a query +** with a large central table that is joined using an INNER JOIN, +** not CROSS or OUTER JOINs, against four or more smaller tables. +* The central table is called the "fact" table. The smaller tables +** that get joined are "dimension tables". Also, any table that is +** self-joined cannot be a dimension table; we assume that dimension +** tables may only be joined against fact tables. ** ** SIDE EFFECT: (and really the whole point of this subroutine) ** @@ -5456,52 +5465,109 @@ static LogEst whereSortingCost( ** resulting in poor query plans. The total amount of heuristic cost ** adjustment is stored in pWInfo->nOutStarDelta and the cost adjustment ** for each WhereLoop is stored in its rStarDelta field. +** +** This heuristic can be completely disabled, so that no query is +** considered a star-query, using SQLITE_TESTCTRL_OPTIMIZATION to +** disable the SQLITE_StarQuery optimization. In the CLI, the command +** to do that is: ".testctrl opt -starquery". */ -static int computeMxChoice(WhereInfo *pWInfo, LogEst nRowEst){ +static int computeMxChoice(WhereInfo *pWInfo){ int nLoop = pWInfo->nLevel; /* Number of terms in the join */ - if( nRowEst==0 - && nLoop>=5 + WhereLoop *pWLoop; /* For looping over WhereLoops */ + +#ifdef SQLITE_DEBUG + /* The star-query detection code below makes use of the following + ** properties of the WhereLoop list, so verifying them before + ** continuing: + ** (1) .maskSelf is the bitmask corresponding to .iTab + ** (2) The WhereLoop list is in ascending .iTab order + */ + for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){ + assert( pWLoop->maskSelf==MASKBIT(pWLoop->iTab) ); + assert( pWLoop->pNextLoop==0 || pWLoop->iTab<=pWLoop->pNextLoop->iTab ); + } +#endif /* SQLITE_DEBUG */ + + if( nLoop>=5 + && !pWInfo->bStarDone && OptimizationEnabled(pWInfo->pParse->db, SQLITE_StarQuery) ){ + SrcItem *aFromTabs; /* All terms of the FROM clause */ + int iFromIdx; /* Term of FROM clause is the candidate fact-table */ + Bitmask m; /* Bitmask for candidate fact-table */ + Bitmask mSelfJoin = 0; /* Tables that cannot be dimension tables */ + WhereLoop *pStart; /* Where to start searching for dimension-tables */ + + pWInfo->bStarDone = 1; /* Only do this computation once */ + /* Check to see if we are dealing with a star schema and if so, reduce ** the cost of fact tables relative to dimension tables, as a heuristic ** to help keep the fact tables in outer loops. */ - int iLoop; /* Counter over join terms */ - Bitmask m; /* Bitmask for current loop */ assert( pWInfo->nOutStarDelta==0 ); - for(iLoop=0, m=1; iLoop<nLoop; iLoop++, m<<=1){ - WhereLoop *pWLoop; /* For looping over WhereLoops */ + aFromTabs = pWInfo->pTabList->a; + pStart = pWInfo->pLoops; + for(iFromIdx=0, m=1; iFromIdx<nLoop; iFromIdx++, m<<=1){ int nDep = 0; /* Number of dimension tables */ LogEst rDelta; /* Heuristic cost adjustment */ Bitmask mSeen = 0; /* Mask of dimension tables */ - for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){ - if( (pWLoop->prereq & m)!=0 /* pWInfo depends on iLoop */ + SrcItem *pFactTab; /* The candidate fact table */ + + pFactTab = aFromTabs + iFromIdx; + if( (pFactTab->fg.jointype & (JT_OUTER|JT_CROSS))!=0 ){ + /* If the candidate fact-table is the right table of an outer join + ** restrict the search for dimension-tables to be tables to the right + ** of the fact-table. */ + if( iFromIdx+4 > nLoop ) break; /* Impossible to reach nDep>=4 */ + while( ALWAYS(pStart) && pStart->iTab<=iFromIdx ){ + pStart = pStart->pNextLoop; + } + } + for(pWLoop=pStart; pWLoop; pWLoop=pWLoop->pNextLoop){ + if( (aFromTabs[pWLoop->iTab].fg.jointype & (JT_OUTER|JT_CROSS))!=0 ){ + /* Fact-tables and dimension-tables cannot be separated by an + ** outer join (at least for the definition of fact- and dimension- + ** used by this heuristic). */ + break; + } + if( (pWLoop->prereq & m)!=0 /* pWInfo depends on iFromIdx */ && (pWLoop->maskSelf & mSeen)==0 /* pWInfo not already a dependency */ - && (pWInfo->pTabList->a[pWLoop->iTab].fg.jointype & JT_LEFT)==0 - /* ^- pWInfo isn't a LEFT JOIN */ + && (pWLoop->maskSelf & mSelfJoin)==0 /* Not a self-join */ ){ - nDep++; - mSeen |= pWLoop->maskSelf; + if( aFromTabs[pWLoop->iTab].pSTab==pFactTab->pSTab ){ + mSelfJoin |= m; + }else{ + nDep++; + mSeen |= pWLoop->maskSelf; + } } } if( nDep<=3 ) continue; rDelta = 15*(nDep-3); #ifdef WHERETRACE_ENABLED /* 0x4 */ if( sqlite3WhereTrace&0x4 ){ - SrcItem *pItem = pWInfo->pTabList->a + iLoop; - sqlite3DebugPrintf( - "Fact-table %s(%d): %d dimensions, cost reduced %d\n", - pItem->zAlias ? pItem->zAlias : pItem->pSTab->zName, iLoop, - nDep, rDelta); + Bitmask x; + int ii; + sqlite3DebugPrintf( + "Fact-table %s(%d): cost reduced %d due to %d dimension tables:", + pFactTab->zAlias ? pFactTab->zAlias : pFactTab->pSTab->zName, + iFromIdx, rDelta, nDep + ); + for(ii=0, x=1; ii<nLoop; ii++, x<<=1){ + if( x & mSeen ){ + SrcItem *pDim = aFromTabs + ii; + sqlite3DebugPrintf(" %s(%d)", + pDim->zAlias ? pDim->zAlias : pDim->pSTab->zName, ii + ); + } + } + sqlite3DebugPrintf("\n"); } #endif - if( pWInfo->nOutStarDelta==0 ){ - for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){ - pWLoop->rStarDelta = 0; - } + for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){ + pWLoop->rStarDelta = 0; } - pWInfo->nOutStarDelta += rDelta; + pWInfo->nOutStarDelta = rDelta; for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){ if( pWLoop->maskSelf==m ){ pWLoop->rRun -= rDelta; @@ -5509,7 +5575,7 @@ static int computeMxChoice(WhereInfo *pWInfo, LogEst nRowEst){ pWLoop->rStarDelta = rDelta; } } - } + } } return pWInfo->nOutStarDelta>0 ? 18 : 12; } @@ -5587,7 +5653,7 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){ }else if( nLoop==2 ){ mxChoice = 5; }else{ - mxChoice = computeMxChoice(pWInfo, nRowEst); + mxChoice = computeMxChoice(pWInfo); } assert( nLoop<=pWInfo->pTabList->nSrc ); @@ -5956,6 +6022,9 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){ } pWInfo->nRowOut = pFrom->nRow + pWInfo->nOutStarDelta; +#ifdef WHERETRACE_ENABLED + pWInfo->rTotalCost = pFrom->rCost + pWInfo->nOutStarDelta; +#endif /* Free temporary memory and return success */ sqlite3StackFreeNN(pParse->db, pSpace); @@ -6836,7 +6905,8 @@ WhereInfo *sqlite3WhereBegin( assert( db->mallocFailed==0 ); #ifdef WHERETRACE_ENABLED if( sqlite3WhereTrace ){ - sqlite3DebugPrintf("---- Solution nRow=%d", pWInfo->nRowOut); + sqlite3DebugPrintf("---- Solution cost=%d, nRow=%d", + pWInfo->rTotalCost, pWInfo->nRowOut); if( pWInfo->nOBSat>0 ){ sqlite3DebugPrintf(" ORDERBY=%d,0x%llx", pWInfo->nOBSat, pWInfo->revMask); } diff --git a/src/whereInt.h b/src/whereInt.h index f44c04041..5bea70ba9 100644 --- a/src/whereInt.h +++ b/src/whereInt.h @@ -486,8 +486,12 @@ struct WhereInfo { unsigned untestedTerms :1; /* Not all WHERE terms resolved by outer loop */ unsigned bOrderedInnerLoop:1;/* True if only the inner-most loop is ordered */ unsigned sorted :1; /* True if really sorted (not just grouped) */ + unsigned bStarDone :1; /* True if check for star-query is complete */ LogEst nOutStarDelta; /* Artifical nOut reduction for star-query */ LogEst nRowOut; /* Estimated number of output rows */ +#ifdef WHERETRACE_ENABLED + LogEst rTotalCost; /* Total cost of the solution */ +#endif int iTop; /* The very beginning of the WHERE loop */ int iEndWhere; /* End of the WHERE clause itself */ WhereLoop *pLoops; /* List of all WhereLoop objects */ |