diff options
Diffstat (limited to 'src/where.c')
-rw-r--r-- | src/where.c | 229 |
1 files changed, 173 insertions, 56 deletions
diff --git a/src/where.c b/src/where.c index 0b8e5acea..5cb52b8ad 100644 --- a/src/where.c +++ b/src/where.c @@ -2431,8 +2431,9 @@ void sqlite3WhereClausePrint(WhereClause *pWC){ ** 1.002.001 t2.t2xy 2 f 010241 N 2 cost 0,56,31 */ void sqlite3WhereLoopPrint(const WhereLoop *p, const WhereClause *pWC){ + WhereInfo *pWInfo; if( pWC ){ - WhereInfo *pWInfo = pWC->pWInfo; + pWInfo = pWC->pWInfo; int nb = 1+(pWInfo->pTabList->nSrc+3)/4; SrcItem *pItem = pWInfo->pTabList->a + p->iTab; Table *pTab = pItem->pSTab; @@ -2442,6 +2443,7 @@ void sqlite3WhereLoopPrint(const WhereLoop *p, const WhereClause *pWC){ sqlite3DebugPrintf(" %12s", pItem->zAlias ? pItem->zAlias : pTab->zName); }else{ + pWInfo = 0; sqlite3DebugPrintf("%c%2d.%03llx.%03llx %c%d", p->cId, p->iTab, p->maskSelf, p->prereq & 0xfff, p->cId, p->iTab); } @@ -2473,7 +2475,12 @@ void sqlite3WhereLoopPrint(const WhereLoop *p, const WhereClause *pWC){ }else{ sqlite3DebugPrintf(" f %06x N %d", p->wsFlags, p->nLTerm); } - sqlite3DebugPrintf(" cost %d,%d,%d\n", p->rSetup, p->rRun, p->nOut); + if( pWInfo && pWInfo->bStarUsed && p->rStarDelta!=0 ){ + sqlite3DebugPrintf(" cost %d,%d,%d delta=%d\n", + p->rSetup, p->rRun, p->nOut, p->rStarDelta); + }else{ + sqlite3DebugPrintf(" cost %d,%d,%d\n", p->rSetup, p->rRun, p->nOut); + } if( p->nLTerm && (sqlite3WhereTrace & 0x4000)!=0 ){ int i; for(i=0; i<p->nLTerm; i++){ @@ -5441,77 +5448,179 @@ static LogEst whereSortingCost( ** 18 for star queries ** 12 otherwise ** -** For the purposes of SQLite, a star-query is defined as a query -** with a large central table that is joined (using an INNER JOIN, -** not a LEFT JOIN) against four or more smaller tables. The central -** table is called the "fact" table. The smaller tables that get -** joined are "dimension tables". +** For the purposes of this heuristic, a star-query is defined as a query +** with a large central table that is joined using an INNER JOIN, +** not CROSS or OUTER JOINs, against four or more smaller tables. +** The central table is called the "fact" table. The smaller tables +** that get joined are "dimension tables". Also, any table that is +** self-joined cannot be a dimension table; we assume that dimension +** tables may only be joined against fact tables. ** ** SIDE EFFECT: (and really the whole point of this subroutine) ** -** If pWInfo describes a star-query, then the cost on WhereLoops for the -** fact table is reduced. This heuristic helps keep fact tables in -** outer loops. Without this heuristic, paths with fact tables in outer -** loops tend to get pruned by the mxChoice limit on the number of paths, -** resulting in poor query plans. The total amount of heuristic cost -** adjustment is stored in pWInfo->nOutStarDelta and the cost adjustment -** for each WhereLoop is stored in its rStarDelta field. +** If pWInfo describes a star-query, then the cost for SCANs of dimension +** WhereLoops is increased to be slightly larger than the cost of a SCAN +** in the fact table. Only SCAN costs are increased. SEARCH costs are +** unchanged. This heuristic helps keep fact tables in outer loops. Without +** this heuristic, paths with fact tables in outer loops tend to get pruned +** by the mxChoice limit on the number of paths, resulting in poor query +** plans. See the starschema1.test test module for examples of queries +** that need this heuristic to find good query plans. +** +** This heuristic can be completely disabled, so that no query is +** considered a star-query, using SQLITE_TESTCTRL_OPTIMIZATION to +** disable the SQLITE_StarQuery optimization. In the CLI, the command +** to do that is: ".testctrl opt -starquery". +** +** HISTORICAL NOTES: +** +** This optimization was first added on 2024-05-09 by check-in 38db9b5c83d. +** The original optimization reduced the cost and output size estimate for +** fact tables to help them move to outer loops. But months later (as people +** started upgrading) performance regression reports started caming in, +** including: +** +** forum post b18ef983e68d06d1 (2024-12-21) +** forum post 0025389d0860af82 (2025-01-14) +** forum post d87570a145599033 (2025-01-17) +** +** To address these, the criteria for a star-query was tightened to exclude +** cases where the fact and dimensions are separated by an outer join, and +** the affect of star-schema detection was changed to increase the rRun cost +** on just full table scans of dimension tables, rather than reducing costs +** in the all access methods of the fact table. */ -static int computeMxChoice(WhereInfo *pWInfo, LogEst nRowEst){ +static int computeMxChoice(WhereInfo *pWInfo){ int nLoop = pWInfo->nLevel; /* Number of terms in the join */ - if( nRowEst==0 - && nLoop>=5 + WhereLoop *pWLoop; /* For looping over WhereLoops */ + +#ifdef SQLITE_DEBUG + /* The star-query detection code below makes use of the following + ** properties of the WhereLoop list, so verify them before + ** continuing: + ** (1) .maskSelf is the bitmask corresponding to .iTab + ** (2) The WhereLoop list is in ascending .iTab order + */ + for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){ + assert( pWLoop->maskSelf==MASKBIT(pWLoop->iTab) ); + assert( pWLoop->pNextLoop==0 || pWLoop->iTab<=pWLoop->pNextLoop->iTab ); + } +#endif /* SQLITE_DEBUG */ + + if( nLoop>=5 + && !pWInfo->bStarDone && OptimizationEnabled(pWInfo->pParse->db, SQLITE_StarQuery) ){ - /* Check to see if we are dealing with a star schema and if so, reduce - ** the cost of fact tables relative to dimension tables, as a heuristic - ** to help keep the fact tables in outer loops. + SrcItem *aFromTabs; /* All terms of the FROM clause */ + int iFromIdx; /* Term of FROM clause is the candidate fact-table */ + Bitmask m; /* Bitmask for candidate fact-table */ + Bitmask mSelfJoin = 0; /* Tables that cannot be dimension tables */ + WhereLoop *pStart; /* Where to start searching for dimension-tables */ + + pWInfo->bStarDone = 1; /* Only do this computation once */ + + /* Look for fact tables with four or more dimensions where the + ** dimension tables are not separately from the fact tables by an outer + ** or cross join. Adjust cost weights if found. */ - int iLoop; /* Counter over join terms */ - Bitmask m; /* Bitmask for current loop */ - assert( pWInfo->nOutStarDelta==0 ); - for(iLoop=0, m=1; iLoop<nLoop; iLoop++, m<<=1){ - WhereLoop *pWLoop; /* For looping over WhereLoops */ + assert( !pWInfo->bStarUsed ); + aFromTabs = pWInfo->pTabList->a; + pStart = pWInfo->pLoops; + for(iFromIdx=0, m=1; iFromIdx<nLoop; iFromIdx++, m<<=1){ int nDep = 0; /* Number of dimension tables */ - LogEst rDelta; /* Heuristic cost adjustment */ + LogEst mxRun; /* Maximum SCAN cost of a fact table */ Bitmask mSeen = 0; /* Mask of dimension tables */ - for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){ - if( (pWLoop->prereq & m)!=0 /* pWInfo depends on iLoop */ + SrcItem *pFactTab; /* The candidate fact table */ + + pFactTab = aFromTabs + iFromIdx; + if( (pFactTab->fg.jointype & (JT_OUTER|JT_CROSS))!=0 ){ + /* If the candidate fact-table is the right table of an outer join + ** restrict the search for dimension-tables to be tables to the right + ** of the fact-table. */ + if( iFromIdx+4 > nLoop ) break; /* Impossible to reach nDep>=4 */ + while( pStart && pStart->iTab<=iFromIdx ){ + pStart = pStart->pNextLoop; + } + } + for(pWLoop=pStart; pWLoop; pWLoop=pWLoop->pNextLoop){ + if( (aFromTabs[pWLoop->iTab].fg.jointype & (JT_OUTER|JT_CROSS))!=0 ){ + /* Fact-tables and dimension-tables cannot be separated by an + ** outer join (at least for the definition of fact- and dimension- + ** used by this heuristic). */ + break; + } + if( (pWLoop->prereq & m)!=0 /* pWInfo depends on iFromIdx */ && (pWLoop->maskSelf & mSeen)==0 /* pWInfo not already a dependency */ - && (pWInfo->pTabList->a[pWLoop->iTab].fg.jointype & JT_LEFT)==0 - /* ^- pWInfo isn't a LEFT JOIN */ + && (pWLoop->maskSelf & mSelfJoin)==0 /* Not a self-join */ ){ - nDep++; - mSeen |= pWLoop->maskSelf; + if( aFromTabs[pWLoop->iTab].pSTab==pFactTab->pSTab ){ + mSelfJoin |= m; + }else{ + nDep++; + mSeen |= pWLoop->maskSelf; + } } } if( nDep<=3 ) continue; - rDelta = 15*(nDep-3); -#ifdef WHERETRACE_ENABLED /* 0x4 */ - if( sqlite3WhereTrace&0x4 ){ - SrcItem *pItem = pWInfo->pTabList->a + iLoop; - sqlite3DebugPrintf( - "Fact-table %s(%d): %d dimensions, cost reduced %d\n", - pItem->zAlias ? pItem->zAlias : pItem->pSTab->zName, iLoop, - nDep, rDelta); - } -#endif - if( pWInfo->nOutStarDelta==0 ){ + + /* If we reach this point, it means that pFactTab is a fact table + ** with four or more dimensions connected by inner joins. Proceed + ** to make cost adjustments. */ + +#ifdef WHERETRACE_ENABLED + /* Make sure rStarDelta values are initialized */ + if( !pWInfo->bStarUsed ){ for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){ pWLoop->rStarDelta = 0; } } - pWInfo->nOutStarDelta += rDelta; +#endif + pWInfo->bStarUsed = 1; + + /* Compute the maximum cost of any WhereLoop for the + ** fact table plus one epsilon */ + mxRun = LOGEST_MIN; + for(pWLoop=pStart; pWLoop; pWLoop=pWLoop->pNextLoop){ + if( pWLoop->iTab<iFromIdx ) continue; + if( pWLoop->iTab>iFromIdx ) break; + if( pWLoop->rRun>mxRun ) mxRun = pWLoop->rRun; + } + if( ALWAYS(mxRun<LOGEST_MAX) ) mxRun++; + + /* Increase the cost of table scans for dimension tables to be + ** slightly more than the maximum cost of the fact table */ + for(pWLoop=pStart; pWLoop; pWLoop=pWLoop->pNextLoop){ + if( (pWLoop->maskSelf & mSeen)==0 ) continue; + if( pWLoop->nLTerm ) continue; + if( pWLoop->rRun<mxRun ){ +#ifdef WHERETRACE_ENABLED /* 0x80000 */ + if( sqlite3WhereTrace & 0x80000 ){ + SrcItem *pDim = aFromTabs + pWLoop->iTab; + sqlite3DebugPrintf( + "Increase SCAN cost of dimension %s(%d) of fact %s(%d) to %d\n", + pDim->zAlias ? pDim->zAlias: pDim->pSTab->zName, pWLoop->iTab, + pFactTab->zAlias ? pFactTab->zAlias : pFactTab->pSTab->zName, + iFromIdx, mxRun + ); + } + pWLoop->rStarDelta = mxRun - pWLoop->rRun; +#endif /* WHERETRACE_ENABLED */ + pWLoop->rRun = mxRun; + } + } + } +#ifdef WHERETRACE_ENABLED /* 0x80000 */ + if( (sqlite3WhereTrace & 0x80000)!=0 && pWInfo->bStarUsed ){ + sqlite3DebugPrintf("WhereLoops changed by star-query heuristic:\n"); for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){ - if( pWLoop->maskSelf==m ){ - pWLoop->rRun -= rDelta; - pWLoop->nOut -= rDelta; - pWLoop->rStarDelta = rDelta; + if( pWLoop->rStarDelta ){ + sqlite3WhereLoopPrint(pWLoop, &pWInfo->sWC); } } - } + } +#endif } - return pWInfo->nOutStarDelta>0 ? 18 : 12; + return pWInfo->bStarUsed ? 18 : 12; } /* @@ -5586,8 +5695,10 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){ mxChoice = 1; }else if( nLoop==2 ){ mxChoice = 5; + }else if( pParse->nErr ){ + mxChoice = 1; }else{ - mxChoice = computeMxChoice(pWInfo, nRowEst); + mxChoice = computeMxChoice(pWInfo); } assert( nLoop<=pWInfo->pTabList->nSrc ); @@ -5838,8 +5949,10 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){ if( sqlite3WhereTrace & 0x02 ){ LogEst rMin, rFloor = 0; int nDone = 0; + int nProgress; sqlite3DebugPrintf("---- after round %d ----\n", iLoop); - while( nDone<nTo ){ + do{ + nProgress = 0; rMin = 0x7fff; for(ii=0, pTo=aTo; ii<nTo; ii++, pTo++){ if( pTo->rCost>rFloor && pTo->rCost<rMin ) rMin = pTo->rCost; @@ -5855,10 +5968,11 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){ sqlite3DebugPrintf("\n"); } nDone++; + nProgress++; } } rFloor = rMin; - } + }while( nDone<nTo && nProgress>0 ); } #endif @@ -5952,7 +6066,10 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){ } } - pWInfo->nRowOut = pFrom->nRow + pWInfo->nOutStarDelta; + pWInfo->nRowOut = pFrom->nRow; +#ifdef WHERETRACE_ENABLED + pWInfo->rTotalCost = pFrom->rCost; +#endif /* Free temporary memory and return success */ sqlite3StackFreeNN(pParse->db, pSpace); @@ -6350,7 +6467,6 @@ static SQLITE_NOINLINE void whereCheckIfBloomFilterIsUseful( } } nSearch += pLoop->nOut; - if( pWInfo->nOutStarDelta ) nSearch += pLoop->rStarDelta; } } @@ -6833,7 +6949,8 @@ WhereInfo *sqlite3WhereBegin( assert( db->mallocFailed==0 ); #ifdef WHERETRACE_ENABLED if( sqlite3WhereTrace ){ - sqlite3DebugPrintf("---- Solution nRow=%d", pWInfo->nRowOut); + sqlite3DebugPrintf("---- Solution cost=%d, nRow=%d", + pWInfo->rTotalCost, pWInfo->nRowOut); if( pWInfo->nOBSat>0 ){ sqlite3DebugPrintf(" ORDERBY=%d,0x%llx", pWInfo->nOBSat, pWInfo->revMask); } |