aboutsummaryrefslogtreecommitdiff
path: root/src/select.c
diff options
context:
space:
mode:
authordrh <>2021-03-24 17:28:11 +0000
committerdrh <>2021-03-24 17:28:11 +0000
commit6db1c9da67d9921bfe7f1186b55dc59a996ab9d2 (patch)
treefb3f60351d1262da990ef9b42d0de4debc40fe56 /src/select.c
parent29821b46f5740ccabf7147aaea1db999a1289ace (diff)
parentb19a5812417f2b276689929a76c087bc54e6cf97 (diff)
downloadsqlite-6db1c9da67d9921bfe7f1186b55dc59a996ab9d2.tar.gz
sqlite-6db1c9da67d9921bfe7f1186b55dc59a996ab9d2.zip
Improvements to distinct aggregates such that they can sometimes avoid
using an ephermeral table to test for duplicates if the column that is distinct is part of an index. FossilOrigin-Name: ef4ac0ddd297bbd38351410c5a387e1628561b3f1bec9e4c2c9d76fbe29f955a
Diffstat (limited to 'src/select.c')
-rw-r--r--src/select.c271
1 files changed, 188 insertions, 83 deletions
diff --git a/src/select.c b/src/select.c
index cab90eb01..b94efd5f3 100644
--- a/src/select.c
+++ b/src/select.c
@@ -741,31 +741,146 @@ static void codeOffset(
}
/*
-** Add code that will check to make sure the N registers starting at iMem
-** form a distinct entry. iTab is a sorting index that holds previously
-** seen combinations of the N values. A new entry is made in iTab
-** if the current N values are new.
-**
-** A jump to addrRepeat is made and the N+1 values are popped from the
-** stack if the top N elements are not distinct.
+** Add code that will check to make sure the array of registers starting at
+** iMem form a distinct entry. This is used by both "SELECT DISTINCT ..." and
+** distinct aggregates ("SELECT count(DISTINCT <expr>) ..."). Three strategies
+** are available. Which is used depends on the value of parameter eTnctType,
+** as follows:
+**
+** WHERE_DISTINCT_UNORDERED/WHERE_DISTINCT_NOOP:
+** Parameter iTab is the cursor number of an ephemeral table that must
+** be opened before the VM code generated by this routine is executed.
+** The ephemeral cursor table is queried for a record identical to the
+** record formed by the current array of registers. If one is found,
+** jump to VM address addrRepeat. Otherwise, insert a new record into
+** the ephemeral cursor and proceed.
+**
+** The returned value in this case is a copy of parameter iTab.
+**
+** WHERE_DISTINCT_ORDERED:
+** In this case rows are being delivered sorted order sorted. The ephermal
+** table is not required in this case. Instead, the current set of
+** registers are compared to previous row. If they match, the new row
+** is not distinct and control jumps to VM address addrRepeat. Otherwise,
+** the VM program proceeds with processing the new row.
+**
+** The returned value in this case is the register number of the first
+** in an array of registers used to store the previous result row so that
+** it can be compared to the next. The caller must ensure that this cell
+** is initialized to NULL and has the "clear" flag set.
+**
+** WHERE_DISTINCT_UNIQUE:
+** In this case it has already been determined that the rows are distinct.
+** No special action is required. The return value is always zero.
+**
+** Parameter pEList is the list of expressions used to generated the
+** contents of each row. It is used by this routine to determine (a)
+** how many elements there are in the array of registers and (b) the
+** collation sequences that should be used for the comparisons if
+** eTnctType is WHERE_DISTINCT_ORDERED.
*/
-static void codeDistinct(
+static int codeDistinct(
Parse *pParse, /* Parsing and code generating context */
+ int eTnctType, /* WHERE_DISTINCT_* value */
int iTab, /* A sorting index used to test for distinctness */
int addrRepeat, /* Jump to here if not distinct */
- int N, /* Number of elements */
- int iMem /* First element */
+ ExprList *pEList, /* Expression for each element */
+ int regElem /* First element */
){
- Vdbe *v;
- int r1;
+ int iRet = 0;
+ int nResultCol = pEList->nExpr;
+ Vdbe *v = pParse->pVdbe;
- v = pParse->pVdbe;
- r1 = sqlite3GetTempReg(pParse);
- sqlite3VdbeAddOp4Int(v, OP_Found, iTab, addrRepeat, iMem, N); VdbeCoverage(v);
- sqlite3VdbeAddOp3(v, OP_MakeRecord, iMem, N, r1);
- sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iTab, r1, iMem, N);
- sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT);
- sqlite3ReleaseTempReg(pParse, r1);
+ switch( eTnctType ){
+ case WHERE_DISTINCT_ORDERED: {
+ int i;
+ int iJump; /* Jump destination */
+ int regPrev; /* Previous row content */
+
+ /* Allocate space for the previous row */
+ iRet = regPrev = pParse->nMem+1;
+ pParse->nMem += nResultCol;
+
+ iJump = sqlite3VdbeCurrentAddr(v) + nResultCol;
+ for(i=0; i<nResultCol; i++){
+ CollSeq *pColl = sqlite3ExprCollSeq(pParse, pEList->a[i].pExpr);
+ if( i<nResultCol-1 ){
+ sqlite3VdbeAddOp3(v, OP_Ne, regElem+i, iJump, regPrev+i);
+ VdbeCoverage(v);
+ }else{
+ sqlite3VdbeAddOp3(v, OP_Eq, regElem+i, addrRepeat, regPrev+i);
+ VdbeCoverage(v);
+ }
+ sqlite3VdbeChangeP4(v, -1, (const char *)pColl, P4_COLLSEQ);
+ sqlite3VdbeChangeP5(v, SQLITE_NULLEQ);
+ }
+ assert( sqlite3VdbeCurrentAddr(v)==iJump || pParse->db->mallocFailed );
+ sqlite3VdbeAddOp3(v, OP_Copy, regElem, regPrev, nResultCol-1);
+ break;
+ }
+
+ case WHERE_DISTINCT_UNIQUE: {
+ /* nothing to do */
+ break;
+ }
+
+ default: {
+ int r1 = sqlite3GetTempReg(pParse);
+ sqlite3VdbeAddOp4Int(v, OP_Found, iTab, addrRepeat, regElem, nResultCol);
+ VdbeCoverage(v);
+ sqlite3VdbeAddOp3(v, OP_MakeRecord, regElem, nResultCol, r1);
+ sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iTab, r1, regElem, nResultCol);
+ sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT);
+ sqlite3ReleaseTempReg(pParse, r1);
+ iRet = iTab;
+ break;
+ }
+ }
+
+ return iRet;
+}
+
+/*
+** A call to this function must be made for each call to codeDistinct().
+** Parameter is passed the value returned by that call to codeDistinct(),
+** and iOpenEphAddr the address of the instruction used to open the
+** ephemeral table (that may be) used by codeDistinct().
+**
+** Argument eTnctType is passed the strategy to be used for any DISTINCT
+** operation, as returned by sqlite3WhereIsDistinct(). If the strategy
+** is WHERE_DISTINCT_NOOP or WHERE_DISTINCT_UNORDERED, this function is
+** a no-op. Otherwise:
+**
+** WHERE_DISTINCT_UNIQUE:
+** In this case the ephemeral table is not required. So instruction
+** iOpenEphAddr is replaced by an OP_Noop.
+**
+** WHERE_DISTINCT_ORDERED:
+** In this case the ephemeral table is not required. The instruction
+** iOpenEphAddr is replaced by an OP_Null instruction to set register
+** iVal to a NULL value with the "clear" flag set (see comments above
+** codeDistinct() for details).
+*/
+static void fixDistinctOpenEph(
+ Parse *pParse, /* Parsing and code generating context */
+ int eTnctType, /* WHERE_DISTINCT_* value */
+ int iVal, /* Value returned by codeDistinct() */
+ int iOpenEphAddr /* Address of OP_OpenEphemeral instruction for iTab */
+){
+ if( eTnctType==WHERE_DISTINCT_UNIQUE || eTnctType==WHERE_DISTINCT_ORDERED ){
+ Vdbe *v = pParse->pVdbe;
+ sqlite3VdbeChangeToNoop(v, iOpenEphAddr);
+ if( eTnctType==WHERE_DISTINCT_ORDERED ){
+ /* Change the OP_OpenEphemeral to an OP_Null that sets the MEM_Cleared
+ ** bit on the first register of the previous value. This will cause the
+ ** OP_Ne added in codeDistinct() to always fail on the first iteration of
+ ** the loop even if the first row is all NULLs. */
+ VdbeOp *pOp = sqlite3VdbeGetOp(v, iOpenEphAddr);
+ pOp->opcode = OP_Null;
+ pOp->p1 = 1;
+ pOp->p2 = iVal;
+ }
+ }
}
#ifdef SQLITE_ENABLE_SORTER_REFERENCES
@@ -1013,59 +1128,11 @@ static void selectInnerLoop(
** part of the result.
*/
if( hasDistinct ){
- switch( pDistinct->eTnctType ){
- case WHERE_DISTINCT_ORDERED: {
- VdbeOp *pOp; /* No longer required OpenEphemeral instr. */
- int iJump; /* Jump destination */
- int regPrev; /* Previous row content */
-
- /* Allocate space for the previous row */
- regPrev = pParse->nMem+1;
- pParse->nMem += nResultCol;
-
- /* Change the OP_OpenEphemeral coded earlier to an OP_Null
- ** sets the MEM_Cleared bit on the first register of the
- ** previous value. This will cause the OP_Ne below to always
- ** fail on the first iteration of the loop even if the first
- ** row is all NULLs.
- */
- sqlite3VdbeChangeToNoop(v, pDistinct->addrTnct);
- pOp = sqlite3VdbeGetOp(v, pDistinct->addrTnct);
- pOp->opcode = OP_Null;
- pOp->p1 = 1;
- pOp->p2 = regPrev;
- pOp = 0; /* Ensure pOp is not used after sqlite3VdbeAddOp() */
-
- iJump = sqlite3VdbeCurrentAddr(v) + nResultCol;
- for(i=0; i<nResultCol; i++){
- CollSeq *pColl = sqlite3ExprCollSeq(pParse, p->pEList->a[i].pExpr);
- if( i<nResultCol-1 ){
- sqlite3VdbeAddOp3(v, OP_Ne, regResult+i, iJump, regPrev+i);
- VdbeCoverage(v);
- }else{
- sqlite3VdbeAddOp3(v, OP_Eq, regResult+i, iContinue, regPrev+i);
- VdbeCoverage(v);
- }
- sqlite3VdbeChangeP4(v, -1, (const char *)pColl, P4_COLLSEQ);
- sqlite3VdbeChangeP5(v, SQLITE_NULLEQ);
- }
- assert( sqlite3VdbeCurrentAddr(v)==iJump || pParse->db->mallocFailed );
- sqlite3VdbeAddOp3(v, OP_Copy, regResult, regPrev, nResultCol-1);
- break;
- }
-
- case WHERE_DISTINCT_UNIQUE: {
- sqlite3VdbeChangeToNoop(v, pDistinct->addrTnct);
- break;
- }
-
- default: {
- assert( pDistinct->eTnctType==WHERE_DISTINCT_UNORDERED );
- codeDistinct(pParse, pDistinct->tabTnct, iContinue, nResultCol,
- regResult);
- break;
- }
- }
+ int eType = pDistinct->eTnctType;
+ int iTab = pDistinct->tabTnct;
+ assert( nResultCol==p->pEList->nExpr );
+ iTab = codeDistinct(pParse, eType, iTab, iContinue, p->pEList, regResult);
+ fixDistinctOpenEph(pParse, eType, iTab, pDistinct->addrTnct);
if( pSort==0 ){
codeOffset(v, p->iOffset, iContinue);
}
@@ -5645,8 +5712,8 @@ static void resetAccumulator(Parse *pParse, AggInfo *pAggInfo){
pFunc->iDistinct = -1;
}else{
KeyInfo *pKeyInfo = sqlite3KeyInfoFromExprList(pParse, pE->x.pList,0,0);
- sqlite3VdbeAddOp4(v, OP_OpenEphemeral, pFunc->iDistinct, 0, 0,
- (char*)pKeyInfo, P4_KEYINFO);
+ pFunc->iDistAddr = sqlite3VdbeAddOp4(v, OP_OpenEphemeral,
+ pFunc->iDistinct, 0, 0, (char*)pKeyInfo, P4_KEYINFO);
}
}
}
@@ -5678,7 +5745,12 @@ static void finalizeAggFunctions(Parse *pParse, AggInfo *pAggInfo){
** registers if register regAcc contains 0. The caller will take care
** of setting and clearing regAcc.
*/
-static void updateAccumulator(Parse *pParse, int regAcc, AggInfo *pAggInfo){
+static void updateAccumulator(
+ Parse *pParse,
+ int regAcc,
+ AggInfo *pAggInfo,
+ int eDistinctType
+){
Vdbe *v = pParse->pVdbe;
int i;
int regHit = 0;
@@ -5724,13 +5796,14 @@ static void updateAccumulator(Parse *pParse, int regAcc, AggInfo *pAggInfo){
nArg = 0;
regAgg = 0;
}
- if( pF->iDistinct>=0 ){
+ if( pF->iDistinct>=0 && pList ){
if( addrNext==0 ){
addrNext = sqlite3VdbeMakeLabel(pParse);
}
testcase( nArg==0 ); /* Error condition */
testcase( nArg>1 ); /* Also an error */
- codeDistinct(pParse, pF->iDistinct, addrNext, 1, regAgg);
+ pF->iDistinct = codeDistinct(pParse, eDistinctType,
+ pF->iDistinct, addrNext, pList, regAgg);
}
if( pF->pFunc->funcFlags & SQLITE_FUNC_NEEDCOLL ){
CollSeq *pColl = 0;
@@ -6762,6 +6835,20 @@ int sqlite3Select(
int addrSortingIdx; /* The OP_OpenEphemeral for the sorting index */
int addrReset; /* Subroutine for resetting the accumulator */
int regReset; /* Return address register for reset subroutine */
+ ExprList *pDistinct = 0;
+ u16 distFlag = 0;
+ int eDist = WHERE_DISTINCT_NOOP;
+
+ if( pAggInfo->nFunc==1
+ && pAggInfo->aFunc[0].iDistinct>=0
+ && pAggInfo->aFunc[0].pFExpr->x.pList
+ ){
+ Expr *pExpr = pAggInfo->aFunc[0].pFExpr->x.pList->a[0].pExpr;
+ pExpr = sqlite3ExprDup(db, pExpr, 0);
+ pDistinct = sqlite3ExprListDup(db, pGroupBy, 0);
+ pDistinct = sqlite3ExprListAppend(pParse, pDistinct, pExpr);
+ distFlag = pDistinct ? WHERE_WANT_DISTINCT : 0;
+ }
/* If there is a GROUP BY clause we might need a sorting index to
** implement it. Allocate that sorting index now. If it turns out
@@ -6798,10 +6885,12 @@ int sqlite3Select(
*/
sqlite3VdbeAddOp2(v, OP_Gosub, regReset, addrReset);
SELECTTRACE(1,pParse,p,("WhereBegin\n"));
- pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, pGroupBy, 0,
- WHERE_GROUPBY | (orderByGrp ? WHERE_SORTBYGROUP : 0), 0
+ pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, pGroupBy, pDistinct,
+ WHERE_GROUPBY | (orderByGrp ? WHERE_SORTBYGROUP : 0) | distFlag, 0
);
+ sqlite3ExprListDelete(db, pDistinct);
if( pWInfo==0 ) goto select_end;
+ eDist = sqlite3WhereIsDistinct(pWInfo);
SELECTTRACE(1,pParse,p,("WhereBegin returns\n"));
if( sqlite3WhereIsOrdered(pWInfo)==pGroupBy->nExpr ){
/* The optimizer is able to deliver rows in group by order so
@@ -6919,7 +7008,7 @@ int sqlite3Select(
** the current row
*/
sqlite3VdbeJumpHere(v, addr1);
- updateAccumulator(pParse, iUseFlag, pAggInfo);
+ updateAccumulator(pParse, iUseFlag, pAggInfo, eDist);
sqlite3VdbeAddOp2(v, OP_Integer, 1, iUseFlag);
VdbeComment((v, "indicate data in accumulator"));
@@ -6975,7 +7064,11 @@ int sqlite3Select(
sqlite3VdbeAddOp2(v, OP_Integer, 0, iUseFlag);
VdbeComment((v, "indicate accumulator empty"));
sqlite3VdbeAddOp1(v, OP_Return, regReset);
-
+
+ if( eDist!=WHERE_DISTINCT_NOOP ){
+ struct AggInfo_func *pF = &pAggInfo->aFunc[0];
+ fixDistinctOpenEph(pParse, eDist, pF->iDistinct, pF->iDistAddr);
+ }
} /* endif pGroupBy. Begin aggregate queries without GROUP BY: */
else {
Table *pTab;
@@ -7039,6 +7132,9 @@ int sqlite3Select(
explainSimpleCount(pParse, pTab, pBest);
}else{
int regAcc = 0; /* "populate accumulators" flag */
+ ExprList *pDistinct = 0;
+ u16 distFlag = 0;
+ int eDist;
/* If there are accumulator registers but no min() or max() functions
** without FILTER clauses, allocate register regAcc. Register regAcc
@@ -7062,6 +7158,9 @@ int sqlite3Select(
regAcc = ++pParse->nMem;
sqlite3VdbeAddOp2(v, OP_Integer, 0, regAcc);
}
+ }else if( pAggInfo->nFunc==1 && pAggInfo->aFunc[0].iDistinct>=0 ){
+ pDistinct = pAggInfo->aFunc[0].pFExpr->x.pList;
+ distFlag = pDistinct ? WHERE_WANT_DISTINCT : 0;
}
/* This case runs if the aggregate has no GROUP BY clause. The
@@ -7081,12 +7180,18 @@ int sqlite3Select(
SELECTTRACE(1,pParse,p,("WhereBegin\n"));
pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, pMinMaxOrderBy,
- 0, minMaxFlag, 0);
+ pDistinct, minMaxFlag|distFlag, 0);
if( pWInfo==0 ){
goto select_end;
}
SELECTTRACE(1,pParse,p,("WhereBegin returns\n"));
- updateAccumulator(pParse, regAcc, pAggInfo);
+ eDist = sqlite3WhereIsDistinct(pWInfo);
+ updateAccumulator(pParse, regAcc, pAggInfo, eDist);
+ if( eDist!=WHERE_DISTINCT_NOOP ){
+ struct AggInfo_func *pF = &pAggInfo->aFunc[0];
+ fixDistinctOpenEph(pParse, eDist, pF->iDistinct, pF->iDistAddr);
+ }
+
if( regAcc ) sqlite3VdbeAddOp2(v, OP_Integer, 1, regAcc);
if( minMaxFlag ){
sqlite3WhereMinMaxOptEarlyOut(v, pWInfo);