aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordrh <>2025-01-31 00:54:59 +0000
committerdrh <>2025-01-31 00:54:59 +0000
commit0911f86abf39bfaf0f3b144f07860238baf6a483 (patch)
tree468e924cf458bcffd8948677078a460b66c688ea /src
parentc850c2be757ffbf11423b47f8ef94bd0ce20f048 (diff)
parent49906e8e4bfbac201532e111b5f972129ce7bafe (diff)
downloadsqlite-0911f86abf39bfaf0f3b144f07860238baf6a483.tar.gz
sqlite-0911f86abf39bfaf0f3b144f07860238baf6a483.zip
Merge all the latest trunk changes into the extra-security branch.
FossilOrigin-Name: 86ba57561a8d8c14e401c06b2345a9417053aa3a5f0c84e52460f23f5e6aa8d0
Diffstat (limited to 'src')
-rw-r--r--src/build.c1
-rw-r--r--src/expr.c8
-rw-r--r--src/func.c6
-rw-r--r--src/insert.c28
-rw-r--r--src/sqlite.h.in5
-rw-r--r--src/sqliteInt.h11
-rw-r--r--src/tclsqlite.c8
-rw-r--r--src/treeview.c16
-rw-r--r--src/vdbe.h1
-rw-r--r--src/vdbeapi.c31
-rw-r--r--src/where.c229
-rw-r--r--src/whereInt.h12
12 files changed, 238 insertions, 118 deletions
diff --git a/src/build.c b/src/build.c
index a5deb54fc..cc29610e0 100644
--- a/src/build.c
+++ b/src/build.c
@@ -4691,7 +4691,6 @@ void sqlite3IdListDelete(sqlite3 *db, IdList *pList){
int i;
assert( db!=0 );
if( pList==0 ) return;
- assert( pList->eU4!=EU4_EXPR ); /* EU4_EXPR mode is not currently used */
for(i=0; i<pList->nId; i++){
sqlite3DbFree(db, pList->a[i].zName);
}
diff --git a/src/expr.c b/src/expr.c
index ca5b9092e..8f898a1e3 100644
--- a/src/expr.c
+++ b/src/expr.c
@@ -1932,16 +1932,13 @@ IdList *sqlite3IdListDup(sqlite3 *db, const IdList *p){
int i;
assert( db!=0 );
if( p==0 ) return 0;
- assert( p->eU4!=EU4_EXPR );
pNew = sqlite3DbMallocRawNN(db, sizeof(*pNew)+(p->nId-1)*sizeof(p->a[0]) );
if( pNew==0 ) return 0;
pNew->nId = p->nId;
- pNew->eU4 = p->eU4;
for(i=0; i<p->nId; i++){
struct IdList_item *pNewItem = &pNew->a[i];
const struct IdList_item *pOldItem = &p->a[i];
pNewItem->zName = sqlite3DbStrDup(db, pOldItem->zName);
- pNewItem->u4 = pOldItem->u4;
}
return pNew;
}
@@ -3465,6 +3462,7 @@ static int findCompatibleInRhsSubrtn(
assert( pOp->opcode==OP_BeginSubrtn );
pSig = pOp->p4.pSubrtnSig;
assert( pSig!=0 );
+ if( !pSig->bComplete ) continue;
if( pNewSig->selId!=pSig->selId ) continue;
if( strcmp(pNewSig->zAff,pSig->zAff)!=0 ) continue;
pExpr->y.sub.iAddr = pSig->iAddr;
@@ -3511,6 +3509,7 @@ void sqlite3CodeRhsOfIN(
KeyInfo *pKeyInfo = 0; /* Key information */
int nVal; /* Size of vector pLeft */
Vdbe *v; /* The prepared statement under construction */
+ SubrtnSig *pSig = 0; /* Signature for this subroutine */
v = pParse->pVdbe;
assert( v!=0 );
@@ -3531,7 +3530,6 @@ void sqlite3CodeRhsOfIN(
** Compute a signature for the RHS of the IN operator to facility
** finding and reusing prior instances of the same IN operator.
*/
- SubrtnSig *pSig = 0;
assert( !ExprUseXSelect(pExpr) || pExpr->x.pSelect!=0 );
if( ExprUseXSelect(pExpr) && (pExpr->x.pSelect->selFlags & SF_All)==0 ){
pSig = sqlite3DbMallocRawNN(pParse->db, sizeof(pSig[0]));
@@ -3574,6 +3572,7 @@ void sqlite3CodeRhsOfIN(
pExpr->y.sub.iAddr =
sqlite3VdbeAddOp2(v, OP_BeginSubrtn, 0, pExpr->y.sub.regReturn) + 1;
if( pSig ){
+ pSig->bComplete = 0;
pSig->iAddr = pExpr->y.sub.iAddr;
pSig->regReturn = pExpr->y.sub.regReturn;
pSig->iTable = iTab;
@@ -3709,6 +3708,7 @@ void sqlite3CodeRhsOfIN(
sqlite3ReleaseTempReg(pParse, r1);
sqlite3ReleaseTempReg(pParse, r2);
}
+ if( pSig ) pSig->bComplete = 1;
if( pKeyInfo ){
sqlite3VdbeChangeP4(v, addr, (void *)pKeyInfo, P4_KEYINFO);
}
diff --git a/src/func.c b/src/func.c
index 2bd4be31c..bd25a44d4 100644
--- a/src/func.c
+++ b/src/func.c
@@ -2808,10 +2808,8 @@ void sqlite3RegisterBuiltinFunctions(void){
#endif /* SQLITE_ENABLE_MATH_FUNCTIONS */
FUNCTION(sign, 1, 0, 0, signFunc ),
INLINE_FUNC(coalesce, -4, INLINEFUNC_coalesce, 0 ),
- INLINE_FUNC(iif, 2, INLINEFUNC_iif, 0 ),
- INLINE_FUNC(iif, 3, INLINEFUNC_iif, 0 ),
- INLINE_FUNC(if, 2, INLINEFUNC_iif, 0 ),
- INLINE_FUNC(if, 3, INLINEFUNC_iif, 0 ),
+ INLINE_FUNC(iif, -4, INLINEFUNC_iif, 0 ),
+ INLINE_FUNC(if, -4, INLINEFUNC_iif, 0 ),
};
#ifndef SQLITE_OMIT_ALTERTABLE
sqlite3AlterFunctions();
diff --git a/src/insert.c b/src/insert.c
index d380281be..83baeece6 100644
--- a/src/insert.c
+++ b/src/insert.c
@@ -927,6 +927,7 @@ void sqlite3Insert(
int regRowid; /* registers holding insert rowid */
int regData; /* register holding first column to insert */
int *aRegIdx = 0; /* One register allocated to each index */
+ int *aTabColMap = 0; /* Mapping from pTab columns to pCol entries */
#ifndef SQLITE_OMIT_TRIGGER
int isView; /* True if attempting to insert into a view */
@@ -1071,15 +1072,15 @@ void sqlite3Insert(
*/
bIdListInOrder = (pTab->tabFlags & (TF_OOOHidden|TF_HasStored))==0;
if( pColumn ){
- assert( pColumn->eU4!=EU4_EXPR );
- pColumn->eU4 = EU4_IDX;
- for(i=0; i<pColumn->nId; i++){
- pColumn->a[i].u4.idx = -1;
- }
+ aTabColMap = sqlite3DbMallocZero(db, pTab->nCol*sizeof(int));
+ if( aTabColMap==0 ) goto insert_cleanup;
for(i=0; i<pColumn->nId; i++){
+ const char *zCName = pColumn->a[i].zName;
+ u8 hName = sqlite3StrIHash(zCName);
for(j=0; j<pTab->nCol; j++){
- if( sqlite3StrICmp(pColumn->a[i].zName, pTab->aCol[j].zCnName)==0 ){
- pColumn->a[i].u4.idx = j;
+ if( pTab->aCol[j].hName!=hName ) continue;
+ if( sqlite3StrICmp(zCName, pTab->aCol[j].zCnName)==0 ){
+ if( aTabColMap[j]==0 ) aTabColMap[j] = i+1;
if( i!=j ) bIdListInOrder = 0;
if( j==pTab->iPKey ){
ipkColumn = i; assert( !withoutRowid );
@@ -1401,9 +1402,9 @@ void sqlite3Insert(
}
}
if( pColumn ){
- assert( pColumn->eU4==EU4_IDX );
- for(j=0; j<pColumn->nId && pColumn->a[j].u4.idx!=i; j++){}
- if( j>=pColumn->nId ){
+ j = aTabColMap[i];
+ assert( j>=0 && j<=pColumn->nId );
+ if( j==0 ){
/* A column not named in the insert column list gets its
** default value */
sqlite3ExprCodeFactorable(pParse,
@@ -1411,7 +1412,7 @@ void sqlite3Insert(
iRegStore);
continue;
}
- k = j;
+ k = j - 1;
}else if( nColumn==0 ){
/* This is INSERT INTO ... DEFAULT VALUES. Load the default value. */
sqlite3ExprCodeFactorable(pParse,
@@ -1656,7 +1657,10 @@ insert_cleanup:
sqlite3ExprListDelete(db, pList);
sqlite3UpsertDelete(db, pUpsert);
sqlite3SelectDelete(db, pSelect);
- sqlite3IdListDelete(db, pColumn);
+ if( pColumn ){
+ sqlite3IdListDelete(db, pColumn);
+ sqlite3DbFree(db, aTabColMap);
+ }
if( aRegIdx ) sqlite3DbNNFreeNN(db, aRegIdx);
}
diff --git a/src/sqlite.h.in b/src/sqlite.h.in
index 46336bf3e..b8f4ba3cb 100644
--- a/src/sqlite.h.in
+++ b/src/sqlite.h.in
@@ -10785,8 +10785,9 @@ SQLITE_EXPERIMENTAL int sqlite3_snapshot_recover(sqlite3 *db, const char *zDb);
/*
** CAPI3REF: Serialize a database
**
-** The sqlite3_serialize(D,S,P,F) interface returns a pointer to memory
-** that is a serialization of the S database on [database connection] D.
+** The sqlite3_serialize(D,S,P,F) interface returns a pointer to
+** memory that is a serialization of the S database on
+** [database connection] D. If S is a NULL pointer, the main database is used.
** If P is not a NULL pointer, then the size of the database in bytes
** is written into *P.
**
diff --git a/src/sqliteInt.h b/src/sqliteInt.h
index aa7e297e7..8c1be72c1 100644
--- a/src/sqliteInt.h
+++ b/src/sqliteInt.h
@@ -881,6 +881,8 @@ typedef u64 tRowcnt;
** 0.5 -> -10 0.1 -> -33 0.0625 -> -40
*/
typedef INT16_TYPE LogEst;
+#define LOGEST_MIN (-32768)
+#define LOGEST_MAX (32767)
/*
** Set the SQLITE_PTRSIZE macro to the number of bytes in a pointer
@@ -1151,7 +1153,7 @@ extern u32 sqlite3WhereTrace;
** 0xFFFF---- Low-level debug messages
**
** 0x00000001 Code generation
-** 0x00000002 Solver
+** 0x00000002 Solver (Use 0x40000 for less detail)
** 0x00000004 Solver costs
** 0x00000008 WhereLoop inserts
**
@@ -1170,6 +1172,8 @@ extern u32 sqlite3WhereTrace;
**
** 0x00010000 Show more detail when printing WHERE terms
** 0x00020000 Show WHERE terms returned from whereScanNext()
+** 0x00040000 Solver overview messages
+** 0x00080000 Star-query heuristic
*/
@@ -3221,13 +3225,8 @@ struct ExprList {
*/
struct IdList {
int nId; /* Number of identifiers on the list */
- u8 eU4; /* Which element of a.u4 is valid */
struct IdList_item {
char *zName; /* Name of the identifier */
- union {
- int idx; /* Index in some Table.aCol[] of a column named zName */
- Expr *pExpr; /* Expr to implement a USING variable -- NOT USED */
- } u4;
} a[1];
};
diff --git a/src/tclsqlite.c b/src/tclsqlite.c
index 76c9ef75c..824e8c4d3 100644
--- a/src/tclsqlite.c
+++ b/src/tclsqlite.c
@@ -510,7 +510,7 @@ static int createIncrblobChannel(
** or {...} or ; to be seen anywhere. Most callback scripts consist
** of just a single procedure name and they meet this requirement.
*/
-static int safeToUseEvalObjv(Tcl_Interp *interp, Tcl_Obj *pCmd){
+static int safeToUseEvalObjv(Tcl_Obj *pCmd){
/* We could try to do something with Tcl_Parse(). But we will instead
** just do a search for forbidden characters. If any of the forbidden
** characters appear in pCmd, we will report the string as unsafe.
@@ -2993,7 +2993,7 @@ deserialize_error:
}
pFunc->pScript = pScript;
Tcl_IncrRefCount(pScript);
- pFunc->useEvalObjv = safeToUseEvalObjv(interp, pScript);
+ pFunc->useEvalObjv = safeToUseEvalObjv(pScript);
pFunc->eType = eType;
rc = sqlite3_create_function(pDb->db, zName, nArg, flags,
pFunc, tclSqlFunc, 0, 0);
@@ -4021,7 +4021,9 @@ EXTERN int Tclsqlite_Unload(Tcl_Interp *interp, int flags){ return TCL_OK; }
EXTERN int Sqlite_SafeInit(Tcl_Interp *interp){ return TCL_ERROR; }
EXTERN int Sqlite_SafeUnload(Tcl_Interp *interp, int flags){return TCL_ERROR;}
-/* Also variants with a lowercase "s" */
+/* Also variants with a lowercase "s". I'm told that these are
+** deprecated in Tcl9, but they continue to be included for backwards
+** compatibility. */
EXTERN int sqlite3_Init(Tcl_Interp *interp){ return Sqlite3_Init(interp);}
EXTERN int sqlite_Init(Tcl_Interp *interp){ return Sqlite3_Init(interp);}
diff --git a/src/treeview.c b/src/treeview.c
index 2cfcfb69b..832965924 100644
--- a/src/treeview.c
+++ b/src/treeview.c
@@ -978,21 +978,7 @@ void sqlite3TreeViewBareIdList(
if( zName==0 ) zName = "(null)";
sqlite3TreeViewPush(&pView, moreToFollow);
sqlite3TreeViewLine(pView, 0);
- if( pList->eU4==EU4_NONE ){
- fprintf(stdout, "%s\n", zName);
- }else if( pList->eU4==EU4_IDX ){
- fprintf(stdout, "%s (%d)\n", zName, pList->a[i].u4.idx);
- }else{
- assert( pList->eU4==EU4_EXPR );
- if( pList->a[i].u4.pExpr==0 ){
- fprintf(stdout, "%s (pExpr=NULL)\n", zName);
- }else{
- fprintf(stdout, "%s\n", zName);
- sqlite3TreeViewPush(&pView, i<pList->nId-1);
- sqlite3TreeViewExpr(pView, pList->a[i].u4.pExpr, 0);
- sqlite3TreeViewPop(&pView);
- }
- }
+ fprintf(stdout, "%s\n", zName);
sqlite3TreeViewPop(&pView);
}
}
diff --git a/src/vdbe.h b/src/vdbe.h
index 71aae29a0..476f1b4ea 100644
--- a/src/vdbe.h
+++ b/src/vdbe.h
@@ -40,6 +40,7 @@ typedef struct SubrtnSig SubrtnSig;
*/
struct SubrtnSig {
int selId; /* SELECT-id for the SELECT statement on the RHS */
+ u8 bComplete; /* True if fully coded and available for reusable */
char *zAff; /* Affinity of the overall IN expression */
int iTable; /* Ephemeral table generated by the subroutine */
int iAddr; /* Subroutine entry address */
diff --git a/src/vdbeapi.c b/src/vdbeapi.c
index 113b8a9c0..aab7ac8a3 100644
--- a/src/vdbeapi.c
+++ b/src/vdbeapi.c
@@ -2177,6 +2177,7 @@ int sqlite3_preupdate_old(sqlite3 *db, int iIdx, sqlite3_value **ppValue){
PreUpdate *p;
Mem *pMem;
int rc = SQLITE_OK;
+ int iStore = 0;
#ifdef SQLITE_ENABLE_API_ARMOR
if( db==0 || ppValue==0 ){
@@ -2191,9 +2192,11 @@ int sqlite3_preupdate_old(sqlite3 *db, int iIdx, sqlite3_value **ppValue){
goto preupdate_old_out;
}
if( p->pPk ){
- iIdx = sqlite3TableColumnToIndex(p->pPk, iIdx);
+ iStore = sqlite3TableColumnToIndex(p->pPk, iIdx);
+ }else{
+ iStore = sqlite3TableColumnToStorage(p->pTab, iIdx);
}
- if( iIdx>=p->pCsr->nField || iIdx<0 ){
+ if( iStore>=p->pCsr->nField || iStore<0 ){
rc = SQLITE_RANGE;
goto preupdate_old_out;
}
@@ -2224,8 +2227,8 @@ int sqlite3_preupdate_old(sqlite3 *db, int iIdx, sqlite3_value **ppValue){
p->aRecord = aRec;
}
- pMem = *ppValue = &p->pUnpacked->aMem[iIdx];
- if( iIdx>=p->pUnpacked->nField ){
+ pMem = *ppValue = &p->pUnpacked->aMem[iStore];
+ if( iStore>=p->pUnpacked->nField ){
/* This occurs when the table has been extended using ALTER TABLE
** ADD COLUMN. The value to return is the default value of the column. */
Column *pCol = &p->pTab->aCol[iIdx];
@@ -2329,6 +2332,7 @@ int sqlite3_preupdate_new(sqlite3 *db, int iIdx, sqlite3_value **ppValue){
PreUpdate *p;
int rc = SQLITE_OK;
Mem *pMem;
+ int iStore = 0;
#ifdef SQLITE_ENABLE_API_ARMOR
if( db==0 || ppValue==0 ){
@@ -2341,9 +2345,12 @@ int sqlite3_preupdate_new(sqlite3 *db, int iIdx, sqlite3_value **ppValue){
goto preupdate_new_out;
}
if( p->pPk && p->op!=SQLITE_UPDATE ){
- iIdx = sqlite3TableColumnToIndex(p->pPk, iIdx);
+ iStore = sqlite3TableColumnToIndex(p->pPk, iIdx);
+ }else{
+ iStore = sqlite3TableColumnToStorage(p->pTab, iIdx);
}
- if( iIdx>=p->pCsr->nField || iIdx<0 ){
+
+ if( iStore>=p->pCsr->nField || iStore<0 ){
rc = SQLITE_RANGE;
goto preupdate_new_out;
}
@@ -2363,14 +2370,14 @@ int sqlite3_preupdate_new(sqlite3 *db, int iIdx, sqlite3_value **ppValue){
}
p->pNewUnpacked = pUnpack;
}
- pMem = &pUnpack->aMem[iIdx];
+ pMem = &pUnpack->aMem[iStore];
if( iIdx==p->pTab->iPKey ){
sqlite3VdbeMemSetInt64(pMem, p->iKey2);
- }else if( iIdx>=pUnpack->nField ){
+ }else if( iStore>=pUnpack->nField ){
pMem = (sqlite3_value *)columnNullValue();
}
}else{
- /* For an UPDATE, memory cell (p->iNewReg+1+iIdx) contains the required
+ /* For an UPDATE, memory cell (p->iNewReg+1+iStore) contains the required
** value. Make a copy of the cell contents and return a pointer to it.
** It is not safe to return a pointer to the memory cell itself as the
** caller may modify the value text encoding.
@@ -2383,13 +2390,13 @@ int sqlite3_preupdate_new(sqlite3 *db, int iIdx, sqlite3_value **ppValue){
goto preupdate_new_out;
}
}
- assert( iIdx>=0 && iIdx<p->pCsr->nField );
- pMem = &p->aNew[iIdx];
+ assert( iStore>=0 && iStore<p->pCsr->nField );
+ pMem = &p->aNew[iStore];
if( pMem->flags==0 ){
if( iIdx==p->pTab->iPKey ){
sqlite3VdbeMemSetInt64(pMem, p->iKey2);
}else{
- rc = sqlite3VdbeMemCopy(pMem, &p->v->aMem[p->iNewReg+1+iIdx]);
+ rc = sqlite3VdbeMemCopy(pMem, &p->v->aMem[p->iNewReg+1+iStore]);
if( rc!=SQLITE_OK ) goto preupdate_new_out;
}
}
diff --git a/src/where.c b/src/where.c
index 0b8e5acea..5cb52b8ad 100644
--- a/src/where.c
+++ b/src/where.c
@@ -2431,8 +2431,9 @@ void sqlite3WhereClausePrint(WhereClause *pWC){
** 1.002.001 t2.t2xy 2 f 010241 N 2 cost 0,56,31
*/
void sqlite3WhereLoopPrint(const WhereLoop *p, const WhereClause *pWC){
+ WhereInfo *pWInfo;
if( pWC ){
- WhereInfo *pWInfo = pWC->pWInfo;
+ pWInfo = pWC->pWInfo;
int nb = 1+(pWInfo->pTabList->nSrc+3)/4;
SrcItem *pItem = pWInfo->pTabList->a + p->iTab;
Table *pTab = pItem->pSTab;
@@ -2442,6 +2443,7 @@ void sqlite3WhereLoopPrint(const WhereLoop *p, const WhereClause *pWC){
sqlite3DebugPrintf(" %12s",
pItem->zAlias ? pItem->zAlias : pTab->zName);
}else{
+ pWInfo = 0;
sqlite3DebugPrintf("%c%2d.%03llx.%03llx %c%d",
p->cId, p->iTab, p->maskSelf, p->prereq & 0xfff, p->cId, p->iTab);
}
@@ -2473,7 +2475,12 @@ void sqlite3WhereLoopPrint(const WhereLoop *p, const WhereClause *pWC){
}else{
sqlite3DebugPrintf(" f %06x N %d", p->wsFlags, p->nLTerm);
}
- sqlite3DebugPrintf(" cost %d,%d,%d\n", p->rSetup, p->rRun, p->nOut);
+ if( pWInfo && pWInfo->bStarUsed && p->rStarDelta!=0 ){
+ sqlite3DebugPrintf(" cost %d,%d,%d delta=%d\n",
+ p->rSetup, p->rRun, p->nOut, p->rStarDelta);
+ }else{
+ sqlite3DebugPrintf(" cost %d,%d,%d\n", p->rSetup, p->rRun, p->nOut);
+ }
if( p->nLTerm && (sqlite3WhereTrace & 0x4000)!=0 ){
int i;
for(i=0; i<p->nLTerm; i++){
@@ -5441,77 +5448,179 @@ static LogEst whereSortingCost(
** 18 for star queries
** 12 otherwise
**
-** For the purposes of SQLite, a star-query is defined as a query
-** with a large central table that is joined (using an INNER JOIN,
-** not a LEFT JOIN) against four or more smaller tables. The central
-** table is called the "fact" table. The smaller tables that get
-** joined are "dimension tables".
+** For the purposes of this heuristic, a star-query is defined as a query
+** with a large central table that is joined using an INNER JOIN,
+** not CROSS or OUTER JOINs, against four or more smaller tables.
+** The central table is called the "fact" table. The smaller tables
+** that get joined are "dimension tables". Also, any table that is
+** self-joined cannot be a dimension table; we assume that dimension
+** tables may only be joined against fact tables.
**
** SIDE EFFECT: (and really the whole point of this subroutine)
**
-** If pWInfo describes a star-query, then the cost on WhereLoops for the
-** fact table is reduced. This heuristic helps keep fact tables in
-** outer loops. Without this heuristic, paths with fact tables in outer
-** loops tend to get pruned by the mxChoice limit on the number of paths,
-** resulting in poor query plans. The total amount of heuristic cost
-** adjustment is stored in pWInfo->nOutStarDelta and the cost adjustment
-** for each WhereLoop is stored in its rStarDelta field.
+** If pWInfo describes a star-query, then the cost for SCANs of dimension
+** WhereLoops is increased to be slightly larger than the cost of a SCAN
+** in the fact table. Only SCAN costs are increased. SEARCH costs are
+** unchanged. This heuristic helps keep fact tables in outer loops. Without
+** this heuristic, paths with fact tables in outer loops tend to get pruned
+** by the mxChoice limit on the number of paths, resulting in poor query
+** plans. See the starschema1.test test module for examples of queries
+** that need this heuristic to find good query plans.
+**
+** This heuristic can be completely disabled, so that no query is
+** considered a star-query, using SQLITE_TESTCTRL_OPTIMIZATION to
+** disable the SQLITE_StarQuery optimization. In the CLI, the command
+** to do that is: ".testctrl opt -starquery".
+**
+** HISTORICAL NOTES:
+**
+** This optimization was first added on 2024-05-09 by check-in 38db9b5c83d.
+** The original optimization reduced the cost and output size estimate for
+** fact tables to help them move to outer loops. But months later (as people
+** started upgrading) performance regression reports started caming in,
+** including:
+**
+** forum post b18ef983e68d06d1 (2024-12-21)
+** forum post 0025389d0860af82 (2025-01-14)
+** forum post d87570a145599033 (2025-01-17)
+**
+** To address these, the criteria for a star-query was tightened to exclude
+** cases where the fact and dimensions are separated by an outer join, and
+** the affect of star-schema detection was changed to increase the rRun cost
+** on just full table scans of dimension tables, rather than reducing costs
+** in the all access methods of the fact table.
*/
-static int computeMxChoice(WhereInfo *pWInfo, LogEst nRowEst){
+static int computeMxChoice(WhereInfo *pWInfo){
int nLoop = pWInfo->nLevel; /* Number of terms in the join */
- if( nRowEst==0
- && nLoop>=5
+ WhereLoop *pWLoop; /* For looping over WhereLoops */
+
+#ifdef SQLITE_DEBUG
+ /* The star-query detection code below makes use of the following
+ ** properties of the WhereLoop list, so verify them before
+ ** continuing:
+ ** (1) .maskSelf is the bitmask corresponding to .iTab
+ ** (2) The WhereLoop list is in ascending .iTab order
+ */
+ for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){
+ assert( pWLoop->maskSelf==MASKBIT(pWLoop->iTab) );
+ assert( pWLoop->pNextLoop==0 || pWLoop->iTab<=pWLoop->pNextLoop->iTab );
+ }
+#endif /* SQLITE_DEBUG */
+
+ if( nLoop>=5
+ && !pWInfo->bStarDone
&& OptimizationEnabled(pWInfo->pParse->db, SQLITE_StarQuery)
){
- /* Check to see if we are dealing with a star schema and if so, reduce
- ** the cost of fact tables relative to dimension tables, as a heuristic
- ** to help keep the fact tables in outer loops.
+ SrcItem *aFromTabs; /* All terms of the FROM clause */
+ int iFromIdx; /* Term of FROM clause is the candidate fact-table */
+ Bitmask m; /* Bitmask for candidate fact-table */
+ Bitmask mSelfJoin = 0; /* Tables that cannot be dimension tables */
+ WhereLoop *pStart; /* Where to start searching for dimension-tables */
+
+ pWInfo->bStarDone = 1; /* Only do this computation once */
+
+ /* Look for fact tables with four or more dimensions where the
+ ** dimension tables are not separately from the fact tables by an outer
+ ** or cross join. Adjust cost weights if found.
*/
- int iLoop; /* Counter over join terms */
- Bitmask m; /* Bitmask for current loop */
- assert( pWInfo->nOutStarDelta==0 );
- for(iLoop=0, m=1; iLoop<nLoop; iLoop++, m<<=1){
- WhereLoop *pWLoop; /* For looping over WhereLoops */
+ assert( !pWInfo->bStarUsed );
+ aFromTabs = pWInfo->pTabList->a;
+ pStart = pWInfo->pLoops;
+ for(iFromIdx=0, m=1; iFromIdx<nLoop; iFromIdx++, m<<=1){
int nDep = 0; /* Number of dimension tables */
- LogEst rDelta; /* Heuristic cost adjustment */
+ LogEst mxRun; /* Maximum SCAN cost of a fact table */
Bitmask mSeen = 0; /* Mask of dimension tables */
- for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){
- if( (pWLoop->prereq & m)!=0 /* pWInfo depends on iLoop */
+ SrcItem *pFactTab; /* The candidate fact table */
+
+ pFactTab = aFromTabs + iFromIdx;
+ if( (pFactTab->fg.jointype & (JT_OUTER|JT_CROSS))!=0 ){
+ /* If the candidate fact-table is the right table of an outer join
+ ** restrict the search for dimension-tables to be tables to the right
+ ** of the fact-table. */
+ if( iFromIdx+4 > nLoop ) break; /* Impossible to reach nDep>=4 */
+ while( pStart && pStart->iTab<=iFromIdx ){
+ pStart = pStart->pNextLoop;
+ }
+ }
+ for(pWLoop=pStart; pWLoop; pWLoop=pWLoop->pNextLoop){
+ if( (aFromTabs[pWLoop->iTab].fg.jointype & (JT_OUTER|JT_CROSS))!=0 ){
+ /* Fact-tables and dimension-tables cannot be separated by an
+ ** outer join (at least for the definition of fact- and dimension-
+ ** used by this heuristic). */
+ break;
+ }
+ if( (pWLoop->prereq & m)!=0 /* pWInfo depends on iFromIdx */
&& (pWLoop->maskSelf & mSeen)==0 /* pWInfo not already a dependency */
- && (pWInfo->pTabList->a[pWLoop->iTab].fg.jointype & JT_LEFT)==0
- /* ^- pWInfo isn't a LEFT JOIN */
+ && (pWLoop->maskSelf & mSelfJoin)==0 /* Not a self-join */
){
- nDep++;
- mSeen |= pWLoop->maskSelf;
+ if( aFromTabs[pWLoop->iTab].pSTab==pFactTab->pSTab ){
+ mSelfJoin |= m;
+ }else{
+ nDep++;
+ mSeen |= pWLoop->maskSelf;
+ }
}
}
if( nDep<=3 ) continue;
- rDelta = 15*(nDep-3);
-#ifdef WHERETRACE_ENABLED /* 0x4 */
- if( sqlite3WhereTrace&0x4 ){
- SrcItem *pItem = pWInfo->pTabList->a + iLoop;
- sqlite3DebugPrintf(
- "Fact-table %s(%d): %d dimensions, cost reduced %d\n",
- pItem->zAlias ? pItem->zAlias : pItem->pSTab->zName, iLoop,
- nDep, rDelta);
- }
-#endif
- if( pWInfo->nOutStarDelta==0 ){
+
+ /* If we reach this point, it means that pFactTab is a fact table
+ ** with four or more dimensions connected by inner joins. Proceed
+ ** to make cost adjustments. */
+
+#ifdef WHERETRACE_ENABLED
+ /* Make sure rStarDelta values are initialized */
+ if( !pWInfo->bStarUsed ){
for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){
pWLoop->rStarDelta = 0;
}
}
- pWInfo->nOutStarDelta += rDelta;
+#endif
+ pWInfo->bStarUsed = 1;
+
+ /* Compute the maximum cost of any WhereLoop for the
+ ** fact table plus one epsilon */
+ mxRun = LOGEST_MIN;
+ for(pWLoop=pStart; pWLoop; pWLoop=pWLoop->pNextLoop){
+ if( pWLoop->iTab<iFromIdx ) continue;
+ if( pWLoop->iTab>iFromIdx ) break;
+ if( pWLoop->rRun>mxRun ) mxRun = pWLoop->rRun;
+ }
+ if( ALWAYS(mxRun<LOGEST_MAX) ) mxRun++;
+
+ /* Increase the cost of table scans for dimension tables to be
+ ** slightly more than the maximum cost of the fact table */
+ for(pWLoop=pStart; pWLoop; pWLoop=pWLoop->pNextLoop){
+ if( (pWLoop->maskSelf & mSeen)==0 ) continue;
+ if( pWLoop->nLTerm ) continue;
+ if( pWLoop->rRun<mxRun ){
+#ifdef WHERETRACE_ENABLED /* 0x80000 */
+ if( sqlite3WhereTrace & 0x80000 ){
+ SrcItem *pDim = aFromTabs + pWLoop->iTab;
+ sqlite3DebugPrintf(
+ "Increase SCAN cost of dimension %s(%d) of fact %s(%d) to %d\n",
+ pDim->zAlias ? pDim->zAlias: pDim->pSTab->zName, pWLoop->iTab,
+ pFactTab->zAlias ? pFactTab->zAlias : pFactTab->pSTab->zName,
+ iFromIdx, mxRun
+ );
+ }
+ pWLoop->rStarDelta = mxRun - pWLoop->rRun;
+#endif /* WHERETRACE_ENABLED */
+ pWLoop->rRun = mxRun;
+ }
+ }
+ }
+#ifdef WHERETRACE_ENABLED /* 0x80000 */
+ if( (sqlite3WhereTrace & 0x80000)!=0 && pWInfo->bStarUsed ){
+ sqlite3DebugPrintf("WhereLoops changed by star-query heuristic:\n");
for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){
- if( pWLoop->maskSelf==m ){
- pWLoop->rRun -= rDelta;
- pWLoop->nOut -= rDelta;
- pWLoop->rStarDelta = rDelta;
+ if( pWLoop->rStarDelta ){
+ sqlite3WhereLoopPrint(pWLoop, &pWInfo->sWC);
}
}
- }
+ }
+#endif
}
- return pWInfo->nOutStarDelta>0 ? 18 : 12;
+ return pWInfo->bStarUsed ? 18 : 12;
}
/*
@@ -5586,8 +5695,10 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){
mxChoice = 1;
}else if( nLoop==2 ){
mxChoice = 5;
+ }else if( pParse->nErr ){
+ mxChoice = 1;
}else{
- mxChoice = computeMxChoice(pWInfo, nRowEst);
+ mxChoice = computeMxChoice(pWInfo);
}
assert( nLoop<=pWInfo->pTabList->nSrc );
@@ -5838,8 +5949,10 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){
if( sqlite3WhereTrace & 0x02 ){
LogEst rMin, rFloor = 0;
int nDone = 0;
+ int nProgress;
sqlite3DebugPrintf("---- after round %d ----\n", iLoop);
- while( nDone<nTo ){
+ do{
+ nProgress = 0;
rMin = 0x7fff;
for(ii=0, pTo=aTo; ii<nTo; ii++, pTo++){
if( pTo->rCost>rFloor && pTo->rCost<rMin ) rMin = pTo->rCost;
@@ -5855,10 +5968,11 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){
sqlite3DebugPrintf("\n");
}
nDone++;
+ nProgress++;
}
}
rFloor = rMin;
- }
+ }while( nDone<nTo && nProgress>0 );
}
#endif
@@ -5952,7 +6066,10 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){
}
}
- pWInfo->nRowOut = pFrom->nRow + pWInfo->nOutStarDelta;
+ pWInfo->nRowOut = pFrom->nRow;
+#ifdef WHERETRACE_ENABLED
+ pWInfo->rTotalCost = pFrom->rCost;
+#endif
/* Free temporary memory and return success */
sqlite3StackFreeNN(pParse->db, pSpace);
@@ -6350,7 +6467,6 @@ static SQLITE_NOINLINE void whereCheckIfBloomFilterIsUseful(
}
}
nSearch += pLoop->nOut;
- if( pWInfo->nOutStarDelta ) nSearch += pLoop->rStarDelta;
}
}
@@ -6833,7 +6949,8 @@ WhereInfo *sqlite3WhereBegin(
assert( db->mallocFailed==0 );
#ifdef WHERETRACE_ENABLED
if( sqlite3WhereTrace ){
- sqlite3DebugPrintf("---- Solution nRow=%d", pWInfo->nRowOut);
+ sqlite3DebugPrintf("---- Solution cost=%d, nRow=%d",
+ pWInfo->rTotalCost, pWInfo->nRowOut);
if( pWInfo->nOBSat>0 ){
sqlite3DebugPrintf(" ORDERBY=%d,0x%llx", pWInfo->nOBSat, pWInfo->revMask);
}
diff --git a/src/whereInt.h b/src/whereInt.h
index f44c04041..8ba8a7072 100644
--- a/src/whereInt.h
+++ b/src/whereInt.h
@@ -162,8 +162,10 @@ struct WhereLoop {
/**** whereLoopXfer() copies fields above ***********************/
# define WHERE_LOOP_XFER_SZ offsetof(WhereLoop,nLSlot)
u16 nLSlot; /* Number of slots allocated for aLTerm[] */
+#ifdef WHERETRACE_ENABLED
LogEst rStarDelta; /* Cost delta due to star-schema heuristic. Not
- ** initialized unless pWInfo->nOutStarDelta>0 */
+ ** initialized unless pWInfo->bStarUsed */
+#endif
WhereTerm **aLTerm; /* WhereTerms used */
WhereLoop *pNextLoop; /* Next WhereLoop object in the WhereClause */
WhereTerm *aLTermSpace[3]; /* Initial aLTerm[] space */
@@ -485,9 +487,13 @@ struct WhereInfo {
unsigned bDeferredSeek :1; /* Uses OP_DeferredSeek */
unsigned untestedTerms :1; /* Not all WHERE terms resolved by outer loop */
unsigned bOrderedInnerLoop:1;/* True if only the inner-most loop is ordered */
- unsigned sorted :1; /* True if really sorted (not just grouped) */
- LogEst nOutStarDelta; /* Artifical nOut reduction for star-query */
+ unsigned sorted :1; /* True if really sorted (not just grouped) */
+ unsigned bStarDone :1; /* True if check for star-query is complete */
+ unsigned bStarUsed :1; /* True if star-query heuristic is used */
LogEst nRowOut; /* Estimated number of output rows */
+#ifdef WHERETRACE_ENABLED
+ LogEst rTotalCost; /* Total cost of the solution */
+#endif
int iTop; /* The very beginning of the WHERE loop */
int iEndWhere; /* End of the WHERE clause itself */
WhereLoop *pLoops; /* List of all WhereLoop objects */