aboutsummaryrefslogtreecommitdiff
path: root/src/where.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/where.c')
-rw-r--r--src/where.c229
1 files changed, 173 insertions, 56 deletions
diff --git a/src/where.c b/src/where.c
index 0b8e5acea..5cb52b8ad 100644
--- a/src/where.c
+++ b/src/where.c
@@ -2431,8 +2431,9 @@ void sqlite3WhereClausePrint(WhereClause *pWC){
** 1.002.001 t2.t2xy 2 f 010241 N 2 cost 0,56,31
*/
void sqlite3WhereLoopPrint(const WhereLoop *p, const WhereClause *pWC){
+ WhereInfo *pWInfo;
if( pWC ){
- WhereInfo *pWInfo = pWC->pWInfo;
+ pWInfo = pWC->pWInfo;
int nb = 1+(pWInfo->pTabList->nSrc+3)/4;
SrcItem *pItem = pWInfo->pTabList->a + p->iTab;
Table *pTab = pItem->pSTab;
@@ -2442,6 +2443,7 @@ void sqlite3WhereLoopPrint(const WhereLoop *p, const WhereClause *pWC){
sqlite3DebugPrintf(" %12s",
pItem->zAlias ? pItem->zAlias : pTab->zName);
}else{
+ pWInfo = 0;
sqlite3DebugPrintf("%c%2d.%03llx.%03llx %c%d",
p->cId, p->iTab, p->maskSelf, p->prereq & 0xfff, p->cId, p->iTab);
}
@@ -2473,7 +2475,12 @@ void sqlite3WhereLoopPrint(const WhereLoop *p, const WhereClause *pWC){
}else{
sqlite3DebugPrintf(" f %06x N %d", p->wsFlags, p->nLTerm);
}
- sqlite3DebugPrintf(" cost %d,%d,%d\n", p->rSetup, p->rRun, p->nOut);
+ if( pWInfo && pWInfo->bStarUsed && p->rStarDelta!=0 ){
+ sqlite3DebugPrintf(" cost %d,%d,%d delta=%d\n",
+ p->rSetup, p->rRun, p->nOut, p->rStarDelta);
+ }else{
+ sqlite3DebugPrintf(" cost %d,%d,%d\n", p->rSetup, p->rRun, p->nOut);
+ }
if( p->nLTerm && (sqlite3WhereTrace & 0x4000)!=0 ){
int i;
for(i=0; i<p->nLTerm; i++){
@@ -5441,77 +5448,179 @@ static LogEst whereSortingCost(
** 18 for star queries
** 12 otherwise
**
-** For the purposes of SQLite, a star-query is defined as a query
-** with a large central table that is joined (using an INNER JOIN,
-** not a LEFT JOIN) against four or more smaller tables. The central
-** table is called the "fact" table. The smaller tables that get
-** joined are "dimension tables".
+** For the purposes of this heuristic, a star-query is defined as a query
+** with a large central table that is joined using an INNER JOIN,
+** not CROSS or OUTER JOINs, against four or more smaller tables.
+** The central table is called the "fact" table. The smaller tables
+** that get joined are "dimension tables". Also, any table that is
+** self-joined cannot be a dimension table; we assume that dimension
+** tables may only be joined against fact tables.
**
** SIDE EFFECT: (and really the whole point of this subroutine)
**
-** If pWInfo describes a star-query, then the cost on WhereLoops for the
-** fact table is reduced. This heuristic helps keep fact tables in
-** outer loops. Without this heuristic, paths with fact tables in outer
-** loops tend to get pruned by the mxChoice limit on the number of paths,
-** resulting in poor query plans. The total amount of heuristic cost
-** adjustment is stored in pWInfo->nOutStarDelta and the cost adjustment
-** for each WhereLoop is stored in its rStarDelta field.
+** If pWInfo describes a star-query, then the cost for SCANs of dimension
+** WhereLoops is increased to be slightly larger than the cost of a SCAN
+** in the fact table. Only SCAN costs are increased. SEARCH costs are
+** unchanged. This heuristic helps keep fact tables in outer loops. Without
+** this heuristic, paths with fact tables in outer loops tend to get pruned
+** by the mxChoice limit on the number of paths, resulting in poor query
+** plans. See the starschema1.test test module for examples of queries
+** that need this heuristic to find good query plans.
+**
+** This heuristic can be completely disabled, so that no query is
+** considered a star-query, using SQLITE_TESTCTRL_OPTIMIZATION to
+** disable the SQLITE_StarQuery optimization. In the CLI, the command
+** to do that is: ".testctrl opt -starquery".
+**
+** HISTORICAL NOTES:
+**
+** This optimization was first added on 2024-05-09 by check-in 38db9b5c83d.
+** The original optimization reduced the cost and output size estimate for
+** fact tables to help them move to outer loops. But months later (as people
+** started upgrading) performance regression reports started caming in,
+** including:
+**
+** forum post b18ef983e68d06d1 (2024-12-21)
+** forum post 0025389d0860af82 (2025-01-14)
+** forum post d87570a145599033 (2025-01-17)
+**
+** To address these, the criteria for a star-query was tightened to exclude
+** cases where the fact and dimensions are separated by an outer join, and
+** the affect of star-schema detection was changed to increase the rRun cost
+** on just full table scans of dimension tables, rather than reducing costs
+** in the all access methods of the fact table.
*/
-static int computeMxChoice(WhereInfo *pWInfo, LogEst nRowEst){
+static int computeMxChoice(WhereInfo *pWInfo){
int nLoop = pWInfo->nLevel; /* Number of terms in the join */
- if( nRowEst==0
- && nLoop>=5
+ WhereLoop *pWLoop; /* For looping over WhereLoops */
+
+#ifdef SQLITE_DEBUG
+ /* The star-query detection code below makes use of the following
+ ** properties of the WhereLoop list, so verify them before
+ ** continuing:
+ ** (1) .maskSelf is the bitmask corresponding to .iTab
+ ** (2) The WhereLoop list is in ascending .iTab order
+ */
+ for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){
+ assert( pWLoop->maskSelf==MASKBIT(pWLoop->iTab) );
+ assert( pWLoop->pNextLoop==0 || pWLoop->iTab<=pWLoop->pNextLoop->iTab );
+ }
+#endif /* SQLITE_DEBUG */
+
+ if( nLoop>=5
+ && !pWInfo->bStarDone
&& OptimizationEnabled(pWInfo->pParse->db, SQLITE_StarQuery)
){
- /* Check to see if we are dealing with a star schema and if so, reduce
- ** the cost of fact tables relative to dimension tables, as a heuristic
- ** to help keep the fact tables in outer loops.
+ SrcItem *aFromTabs; /* All terms of the FROM clause */
+ int iFromIdx; /* Term of FROM clause is the candidate fact-table */
+ Bitmask m; /* Bitmask for candidate fact-table */
+ Bitmask mSelfJoin = 0; /* Tables that cannot be dimension tables */
+ WhereLoop *pStart; /* Where to start searching for dimension-tables */
+
+ pWInfo->bStarDone = 1; /* Only do this computation once */
+
+ /* Look for fact tables with four or more dimensions where the
+ ** dimension tables are not separately from the fact tables by an outer
+ ** or cross join. Adjust cost weights if found.
*/
- int iLoop; /* Counter over join terms */
- Bitmask m; /* Bitmask for current loop */
- assert( pWInfo->nOutStarDelta==0 );
- for(iLoop=0, m=1; iLoop<nLoop; iLoop++, m<<=1){
- WhereLoop *pWLoop; /* For looping over WhereLoops */
+ assert( !pWInfo->bStarUsed );
+ aFromTabs = pWInfo->pTabList->a;
+ pStart = pWInfo->pLoops;
+ for(iFromIdx=0, m=1; iFromIdx<nLoop; iFromIdx++, m<<=1){
int nDep = 0; /* Number of dimension tables */
- LogEst rDelta; /* Heuristic cost adjustment */
+ LogEst mxRun; /* Maximum SCAN cost of a fact table */
Bitmask mSeen = 0; /* Mask of dimension tables */
- for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){
- if( (pWLoop->prereq & m)!=0 /* pWInfo depends on iLoop */
+ SrcItem *pFactTab; /* The candidate fact table */
+
+ pFactTab = aFromTabs + iFromIdx;
+ if( (pFactTab->fg.jointype & (JT_OUTER|JT_CROSS))!=0 ){
+ /* If the candidate fact-table is the right table of an outer join
+ ** restrict the search for dimension-tables to be tables to the right
+ ** of the fact-table. */
+ if( iFromIdx+4 > nLoop ) break; /* Impossible to reach nDep>=4 */
+ while( pStart && pStart->iTab<=iFromIdx ){
+ pStart = pStart->pNextLoop;
+ }
+ }
+ for(pWLoop=pStart; pWLoop; pWLoop=pWLoop->pNextLoop){
+ if( (aFromTabs[pWLoop->iTab].fg.jointype & (JT_OUTER|JT_CROSS))!=0 ){
+ /* Fact-tables and dimension-tables cannot be separated by an
+ ** outer join (at least for the definition of fact- and dimension-
+ ** used by this heuristic). */
+ break;
+ }
+ if( (pWLoop->prereq & m)!=0 /* pWInfo depends on iFromIdx */
&& (pWLoop->maskSelf & mSeen)==0 /* pWInfo not already a dependency */
- && (pWInfo->pTabList->a[pWLoop->iTab].fg.jointype & JT_LEFT)==0
- /* ^- pWInfo isn't a LEFT JOIN */
+ && (pWLoop->maskSelf & mSelfJoin)==0 /* Not a self-join */
){
- nDep++;
- mSeen |= pWLoop->maskSelf;
+ if( aFromTabs[pWLoop->iTab].pSTab==pFactTab->pSTab ){
+ mSelfJoin |= m;
+ }else{
+ nDep++;
+ mSeen |= pWLoop->maskSelf;
+ }
}
}
if( nDep<=3 ) continue;
- rDelta = 15*(nDep-3);
-#ifdef WHERETRACE_ENABLED /* 0x4 */
- if( sqlite3WhereTrace&0x4 ){
- SrcItem *pItem = pWInfo->pTabList->a + iLoop;
- sqlite3DebugPrintf(
- "Fact-table %s(%d): %d dimensions, cost reduced %d\n",
- pItem->zAlias ? pItem->zAlias : pItem->pSTab->zName, iLoop,
- nDep, rDelta);
- }
-#endif
- if( pWInfo->nOutStarDelta==0 ){
+
+ /* If we reach this point, it means that pFactTab is a fact table
+ ** with four or more dimensions connected by inner joins. Proceed
+ ** to make cost adjustments. */
+
+#ifdef WHERETRACE_ENABLED
+ /* Make sure rStarDelta values are initialized */
+ if( !pWInfo->bStarUsed ){
for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){
pWLoop->rStarDelta = 0;
}
}
- pWInfo->nOutStarDelta += rDelta;
+#endif
+ pWInfo->bStarUsed = 1;
+
+ /* Compute the maximum cost of any WhereLoop for the
+ ** fact table plus one epsilon */
+ mxRun = LOGEST_MIN;
+ for(pWLoop=pStart; pWLoop; pWLoop=pWLoop->pNextLoop){
+ if( pWLoop->iTab<iFromIdx ) continue;
+ if( pWLoop->iTab>iFromIdx ) break;
+ if( pWLoop->rRun>mxRun ) mxRun = pWLoop->rRun;
+ }
+ if( ALWAYS(mxRun<LOGEST_MAX) ) mxRun++;
+
+ /* Increase the cost of table scans for dimension tables to be
+ ** slightly more than the maximum cost of the fact table */
+ for(pWLoop=pStart; pWLoop; pWLoop=pWLoop->pNextLoop){
+ if( (pWLoop->maskSelf & mSeen)==0 ) continue;
+ if( pWLoop->nLTerm ) continue;
+ if( pWLoop->rRun<mxRun ){
+#ifdef WHERETRACE_ENABLED /* 0x80000 */
+ if( sqlite3WhereTrace & 0x80000 ){
+ SrcItem *pDim = aFromTabs + pWLoop->iTab;
+ sqlite3DebugPrintf(
+ "Increase SCAN cost of dimension %s(%d) of fact %s(%d) to %d\n",
+ pDim->zAlias ? pDim->zAlias: pDim->pSTab->zName, pWLoop->iTab,
+ pFactTab->zAlias ? pFactTab->zAlias : pFactTab->pSTab->zName,
+ iFromIdx, mxRun
+ );
+ }
+ pWLoop->rStarDelta = mxRun - pWLoop->rRun;
+#endif /* WHERETRACE_ENABLED */
+ pWLoop->rRun = mxRun;
+ }
+ }
+ }
+#ifdef WHERETRACE_ENABLED /* 0x80000 */
+ if( (sqlite3WhereTrace & 0x80000)!=0 && pWInfo->bStarUsed ){
+ sqlite3DebugPrintf("WhereLoops changed by star-query heuristic:\n");
for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){
- if( pWLoop->maskSelf==m ){
- pWLoop->rRun -= rDelta;
- pWLoop->nOut -= rDelta;
- pWLoop->rStarDelta = rDelta;
+ if( pWLoop->rStarDelta ){
+ sqlite3WhereLoopPrint(pWLoop, &pWInfo->sWC);
}
}
- }
+ }
+#endif
}
- return pWInfo->nOutStarDelta>0 ? 18 : 12;
+ return pWInfo->bStarUsed ? 18 : 12;
}
/*
@@ -5586,8 +5695,10 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){
mxChoice = 1;
}else if( nLoop==2 ){
mxChoice = 5;
+ }else if( pParse->nErr ){
+ mxChoice = 1;
}else{
- mxChoice = computeMxChoice(pWInfo, nRowEst);
+ mxChoice = computeMxChoice(pWInfo);
}
assert( nLoop<=pWInfo->pTabList->nSrc );
@@ -5838,8 +5949,10 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){
if( sqlite3WhereTrace & 0x02 ){
LogEst rMin, rFloor = 0;
int nDone = 0;
+ int nProgress;
sqlite3DebugPrintf("---- after round %d ----\n", iLoop);
- while( nDone<nTo ){
+ do{
+ nProgress = 0;
rMin = 0x7fff;
for(ii=0, pTo=aTo; ii<nTo; ii++, pTo++){
if( pTo->rCost>rFloor && pTo->rCost<rMin ) rMin = pTo->rCost;
@@ -5855,10 +5968,11 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){
sqlite3DebugPrintf("\n");
}
nDone++;
+ nProgress++;
}
}
rFloor = rMin;
- }
+ }while( nDone<nTo && nProgress>0 );
}
#endif
@@ -5952,7 +6066,10 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){
}
}
- pWInfo->nRowOut = pFrom->nRow + pWInfo->nOutStarDelta;
+ pWInfo->nRowOut = pFrom->nRow;
+#ifdef WHERETRACE_ENABLED
+ pWInfo->rTotalCost = pFrom->rCost;
+#endif
/* Free temporary memory and return success */
sqlite3StackFreeNN(pParse->db, pSpace);
@@ -6350,7 +6467,6 @@ static SQLITE_NOINLINE void whereCheckIfBloomFilterIsUseful(
}
}
nSearch += pLoop->nOut;
- if( pWInfo->nOutStarDelta ) nSearch += pLoop->rStarDelta;
}
}
@@ -6833,7 +6949,8 @@ WhereInfo *sqlite3WhereBegin(
assert( db->mallocFailed==0 );
#ifdef WHERETRACE_ENABLED
if( sqlite3WhereTrace ){
- sqlite3DebugPrintf("---- Solution nRow=%d", pWInfo->nRowOut);
+ sqlite3DebugPrintf("---- Solution cost=%d, nRow=%d",
+ pWInfo->rTotalCost, pWInfo->nRowOut);
if( pWInfo->nOBSat>0 ){
sqlite3DebugPrintf(" ORDERBY=%d,0x%llx", pWInfo->nOBSat, pWInfo->revMask);
}