aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/sqliteInt.h2
-rw-r--r--src/where.c73
-rw-r--r--src/whereInt.h6
3 files changed, 55 insertions, 26 deletions
diff --git a/src/sqliteInt.h b/src/sqliteInt.h
index d29b2e393..205f8f3e0 100644
--- a/src/sqliteInt.h
+++ b/src/sqliteInt.h
@@ -881,6 +881,8 @@ typedef u64 tRowcnt;
** 0.5 -> -10 0.1 -> -33 0.0625 -> -40
*/
typedef INT16_TYPE LogEst;
+#define LOGEST_MIN (-32768)
+#define LOGEST_MAX (32767)
/*
** Set the SQLITE_PTRSIZE macro to the number of bytes in a pointer
diff --git a/src/where.c b/src/where.c
index 0db7cbc1e..01f8ff0d9 100644
--- a/src/where.c
+++ b/src/where.c
@@ -5451,7 +5451,7 @@ static LogEst whereSortingCost(
** For the purposes of this heuristic, a star-query is defined as a query
** with a large central table that is joined using an INNER JOIN,
** not CROSS or OUTER JOINs, against four or more smaller tables.
-* The central table is called the "fact" table. The smaller tables
+** The central table is called the "fact" table. The smaller tables
** that get joined are "dimension tables". Also, any table that is
** self-joined cannot be a dimension table; we assume that dimension
** tables may only be joined against fact tables.
@@ -5460,16 +5460,35 @@ static LogEst whereSortingCost(
**
** If pWInfo describes a star-query, then the cost for SCANs of dimension
** WhereLoops is increased to be slightly larger than the cost of a SCAN
-** in the fact table. This heuristic helps keep fact tables in
-** outer loops. Without this heuristic, paths with fact tables in outer
-** loops tend to get pruned by the mxChoice limit on the number of paths,
-** resulting in poor query plans. The cost adjustment for each WhereLoop
-** is stored in its rStarDelta field.
+** in the fact table. Only SCAN costs are increased. SEARCH costs are
+** unchanged. This heuristic helps keep fact tables in outer loops. Without
+** this heuristic, paths with fact tables in outer loops tend to get pruned
+** by the mxChoice limit on the number of paths, resulting in poor query
+** plans. See the starschema1.test test module for examples of queries
+** that need this heuristic to find good query plans.
**
** This heuristic can be completely disabled, so that no query is
** considered a star-query, using SQLITE_TESTCTRL_OPTIMIZATION to
** disable the SQLITE_StarQuery optimization. In the CLI, the command
** to do that is: ".testctrl opt -starquery".
+**
+** HISTORICAL NOTES:
+**
+** This optimization was first added on 2024-05-09 by check-in 38db9b5c83d.
+** The original optimization reduced the cost and output size estimate for
+** fact tables to help them move to outer loops. But months later (as people
+** started upgrading) performance regression reports started caming in,
+** including:
+**
+** forum post b18ef983e68d06d1 (2024-12-21)
+** forum post 0025389d0860af82 (2025-01-14)
+** forum post d87570a145599033 (2025-01-17)
+**
+** To address these, the criteria for a star-query was tightened to exclude
+** cases where the fact and dimensions are separated by an outer join, and
+** the affect of star-schema detection was changed to increase the rRun cost
+** on just full table scans of dimension tables, rather than reducing costs
+** in the all access methods of the fact table.
*/
static int computeMxChoice(WhereInfo *pWInfo){
int nLoop = pWInfo->nLevel; /* Number of terms in the join */
@@ -5477,7 +5496,7 @@ static int computeMxChoice(WhereInfo *pWInfo){
#ifdef SQLITE_DEBUG
/* The star-query detection code below makes use of the following
- ** properties of the WhereLoop list, so verifying them before
+ ** properties of the WhereLoop list, so verify them before
** continuing:
** (1) .maskSelf is the bitmask corresponding to .iTab
** (2) The WhereLoop list is in ascending .iTab order
@@ -5500,9 +5519,10 @@ static int computeMxChoice(WhereInfo *pWInfo){
pWInfo->bStarDone = 1; /* Only do this computation once */
- /* Check to see if we are dealing with a star schema and if so, reduce
- ** the cost of fact tables relative to dimension tables, as a heuristic
- ** to help keep the fact tables in outer loops.
+ /* Check to see if we are dealing with a star schema and if so, adjust
+ ** SCAN cost of dimensino tables so that they are as large as the SCAN
+ ** cost of the fact table. This is a heuristic that helps keep the
+ ** fact tables in outer loops.
*/
assert( !pWInfo->bStarUsed );
aFromTabs = pWInfo->pTabList->a;
@@ -5543,29 +5563,34 @@ static int computeMxChoice(WhereInfo *pWInfo){
}
}
if( nDep<=3 ) continue;
-
- /* Compute the maximum cost of any WhereLoop for the iFromIdx-th term */
- mxRun = -32767;
- for(pWLoop=pStart; pWLoop; pWLoop=pWLoop->pNextLoop){
- if( pWLoop->iTab<iFromIdx ) continue;
- if( pWLoop->iTab>iFromIdx ) break;
- if( pWLoop->rRun>mxRun ) mxRun = pWLoop->rRun;
- }
-
+ /* If we reach this point, it means that pFactTab is a fact table */
+
+#ifdef WHERETRACE_ENABLED
/* Make sure rStarDelta values are initialized */
if( !pWInfo->bStarUsed ){
for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){
pWLoop->rStarDelta = 0;
}
- pWInfo->bStarUsed = 1;
}
+#endif
+ pWInfo->bStarUsed = 1;
+
+ /* Compute one more than the maximum cost of any WhereLoop for the
+ ** fact table */
+ mxRun = LOGEST_MIN;
+ for(pWLoop=pStart; pWLoop; pWLoop=pWLoop->pNextLoop){
+ if( pWLoop->iTab<iFromIdx ) continue;
+ if( pWLoop->iTab>iFromIdx ) break;
+ if( pWLoop->rRun>mxRun ) mxRun = pWLoop->rRun;
+ }
+ if( ALWAYS(mxRun<LOGEST_MAX) ) mxRun++;
/* Increase the cost of table scans for dimension tables to be slightly
** more than the maximum cost of fact table */
for(pWLoop=pStart; pWLoop; pWLoop=pWLoop->pNextLoop){
if( (pWLoop->maskSelf & mSeen)==0 ) continue;
if( pWLoop->nLTerm ) continue;
- if( pWLoop->rRun<=mxRun ){
+ if( pWLoop->rRun<mxRun ){
#ifdef WHERETRACE_ENABLED /* 0x80000 */
if( sqlite3WhereTrace & 0x80000 ){
SrcItem *pDim = aFromTabs + pWLoop->iTab;
@@ -5573,12 +5598,12 @@ static int computeMxChoice(WhereInfo *pWInfo){
"Increase SCAN cost of dimension %s(%d) of fact %s(%d) to %d\n",
pDim->zAlias ? pDim->zAlias: pDim->pSTab->zName, pWLoop->iTab,
pFactTab->zAlias ? pFactTab->zAlias : pFactTab->pSTab->zName,
- iFromIdx, mxRun+1
+ iFromIdx, mxRun
);
}
+ pWLoop->rStarDelta = mxRun - pWLoop->rRun;
#endif /* WHERETRACE_ENABLED */
- pWLoop->rStarDelta = mxRun+1 - pWLoop->rRun;
- pWLoop->rRun = mxRun+1;
+ pWLoop->rRun = mxRun;
}
}
}
diff --git a/src/whereInt.h b/src/whereInt.h
index 35bfe581d..8ba8a7072 100644
--- a/src/whereInt.h
+++ b/src/whereInt.h
@@ -162,8 +162,10 @@ struct WhereLoop {
/**** whereLoopXfer() copies fields above ***********************/
# define WHERE_LOOP_XFER_SZ offsetof(WhereLoop,nLSlot)
u16 nLSlot; /* Number of slots allocated for aLTerm[] */
+#ifdef WHERETRACE_ENABLED
LogEst rStarDelta; /* Cost delta due to star-schema heuristic. Not
** initialized unless pWInfo->bStarUsed */
+#endif
WhereTerm **aLTerm; /* WhereTerms used */
WhereLoop *pNextLoop; /* Next WhereLoop object in the WhereClause */
WhereTerm *aLTermSpace[3]; /* Initial aLTerm[] space */
@@ -485,9 +487,9 @@ struct WhereInfo {
unsigned bDeferredSeek :1; /* Uses OP_DeferredSeek */
unsigned untestedTerms :1; /* Not all WHERE terms resolved by outer loop */
unsigned bOrderedInnerLoop:1;/* True if only the inner-most loop is ordered */
- unsigned sorted :1; /* True if really sorted (not just grouped) */
+ unsigned sorted :1; /* True if really sorted (not just grouped) */
unsigned bStarDone :1; /* True if check for star-query is complete */
- unsigned bStarUsed :1; /* True if cost adjustments for star-query */
+ unsigned bStarUsed :1; /* True if star-query heuristic is used */
LogEst nRowOut; /* Estimated number of output rows */
#ifdef WHERETRACE_ENABLED
LogEst rTotalCost; /* Total cost of the solution */