diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backup.c | 14 | ||||
-rw-r--r-- | src/btree.c | 2 | ||||
-rw-r--r-- | src/expr.c | 6 | ||||
-rw-r--r-- | src/fkey.c | 16 | ||||
-rw-r--r-- | src/journal.c | 8 | ||||
-rw-r--r-- | src/parse.y | 37 | ||||
-rw-r--r-- | src/pragma.c | 128 | ||||
-rw-r--r-- | src/resolve.c | 124 | ||||
-rw-r--r-- | src/select.c | 139 | ||||
-rw-r--r-- | src/shell.c | 6 | ||||
-rw-r--r-- | src/sqlite.h.in | 2 | ||||
-rw-r--r-- | src/sqliteInt.h | 57 | ||||
-rw-r--r-- | src/tclsqlite.c | 4 | ||||
-rw-r--r-- | src/test_regexp.c | 757 | ||||
-rw-r--r-- | src/vdbe.c | 7 | ||||
-rw-r--r-- | src/where.c | 159 |
16 files changed, 1269 insertions, 197 deletions
diff --git a/src/backup.c b/src/backup.c index b234716d6..d3614b88c 100644 --- a/src/backup.c +++ b/src/backup.c @@ -212,7 +212,12 @@ static int isFatalError(int rc){ ** page iSrcPg from the source database. Copy this data into the ** destination database. */ -static int backupOnePage(sqlite3_backup *p, Pgno iSrcPg, const u8 *zSrcData){ +static int backupOnePage( + sqlite3_backup *p, /* Backup handle */ + Pgno iSrcPg, /* Source database page to backup */ + const u8 *zSrcData, /* Source database page data */ + int bUpdate /* True for an update, false otherwise */ +){ Pager * const pDestPager = sqlite3BtreePager(p->pDest); const int nSrcPgsz = sqlite3BtreeGetPageSize(p->pSrc); int nDestPgsz = sqlite3BtreeGetPageSize(p->pDest); @@ -285,6 +290,9 @@ static int backupOnePage(sqlite3_backup *p, Pgno iSrcPg, const u8 *zSrcData){ */ memcpy(zOut, zIn, nCopy); ((u8 *)sqlite3PagerGetExtra(pDestPg))[0] = 0; + if( iOff==0 && bUpdate==0 ){ + sqlite3Put4byte(&zOut[28], sqlite3BtreeLastPage(p->pSrc)); + } } sqlite3PagerUnref(pDestPg); } @@ -391,7 +399,7 @@ int sqlite3_backup_step(sqlite3_backup *p, int nPage){ DbPage *pSrcPg; /* Source page object */ rc = sqlite3PagerGet(pSrcPager, iSrcPg, &pSrcPg); if( rc==SQLITE_OK ){ - rc = backupOnePage(p, iSrcPg, sqlite3PagerGetData(pSrcPg)); + rc = backupOnePage(p, iSrcPg, sqlite3PagerGetData(pSrcPg), 0); sqlite3PagerUnref(pSrcPg); } } @@ -639,7 +647,7 @@ void sqlite3BackupUpdate(sqlite3_backup *pBackup, Pgno iPage, const u8 *aData){ int rc; assert( p->pDestDb ); sqlite3_mutex_enter(p->pDestDb->mutex); - rc = backupOnePage(p, iPage, aData); + rc = backupOnePage(p, iPage, aData, 1); sqlite3_mutex_leave(p->pDestDb->mutex); assert( rc!=SQLITE_BUSY && rc!=SQLITE_LOCKED ); if( rc!=SQLITE_OK ){ diff --git a/src/btree.c b/src/btree.c index 246843b79..bb64e643b 100644 --- a/src/btree.c +++ b/src/btree.c @@ -8026,7 +8026,7 @@ char *sqlite3BtreeIntegrityCheck( } i = PENDING_BYTE_PAGE(pBt); if( i<=sCheck.nPage ) setPageReferenced(&sCheck, i); - sqlite3StrAccumInit(&sCheck.errMsg, zErr, sizeof(zErr), 20000); + sqlite3StrAccumInit(&sCheck.errMsg, zErr, sizeof(zErr), SQLITE_MAX_LENGTH); sCheck.errMsg.useMalloc = 2; /* Check the integrity of the freelist diff --git a/src/expr.c b/src/expr.c index 9ca34ec7b..5ba261691 100644 --- a/src/expr.c +++ b/src/expr.c @@ -3281,6 +3281,12 @@ void sqlite3ExplainExprList(Vdbe *pOut, ExprList *pList){ sqlite3ExplainPush(pOut); sqlite3ExplainExpr(pOut, pList->a[i].pExpr); sqlite3ExplainPop(pOut); + if( pList->a[i].zName ){ + sqlite3ExplainPrintf(pOut, " AS %s", pList->a[i].zName); + } + if( pList->a[i].bSpanIsTab ){ + sqlite3ExplainPrintf(pOut, " (%s)", pList->a[i].zSpan); + } if( i<pList->nExpr-1 ){ sqlite3ExplainNL(pOut); } diff --git a/src/fkey.c b/src/fkey.c index 2d01e2524..421311dc3 100644 --- a/src/fkey.c +++ b/src/fkey.c @@ -142,7 +142,7 @@ ** A foreign key constraint requires that the key columns in the parent ** table are collectively subject to a UNIQUE or PRIMARY KEY constraint. ** Given that pParent is the parent table for foreign key constraint pFKey, -** search the schema a unique index on the parent key columns. +** search the schema for a unique index on the parent key columns. ** ** If successful, zero is returned. If the parent key is an INTEGER PRIMARY ** KEY column, then output variable *ppIdx is set to NULL. Otherwise, *ppIdx @@ -178,7 +178,7 @@ ** into pParse. If an OOM error occurs, non-zero is returned and the ** pParse->db->mallocFailed flag is set. */ -static int locateFkeyIndex( +int sqlite3FkLocateIndex( Parse *pParse, /* Parse context to store any error in */ Table *pParent, /* Parent table of FK constraint pFKey */ FKey *pFKey, /* Foreign key to find index for */ @@ -275,7 +275,9 @@ static int locateFkeyIndex( if( !pIdx ){ if( !pParse->disableTriggers ){ - sqlite3ErrorMsg(pParse, "foreign key mismatch"); + sqlite3ErrorMsg(pParse, + "foreign key mismatch - \"%w\" referencing \"%w\"", + pFKey->pFrom->zName, pFKey->zTo); } sqlite3DbFree(pParse->db, aiCol); return 1; @@ -736,7 +738,7 @@ void sqlite3FkCheck( }else{ pTo = sqlite3LocateTable(pParse, 0, pFKey->zTo, zDb); } - if( !pTo || locateFkeyIndex(pParse, pTo, pFKey, &pIdx, &aiFree) ){ + if( !pTo || sqlite3FkLocateIndex(pParse, pTo, pFKey, &pIdx, &aiFree) ){ assert( isIgnoreErrors==0 || (regOld!=0 && regNew==0) ); if( !isIgnoreErrors || db->mallocFailed ) return; if( pTo==0 ){ @@ -816,7 +818,7 @@ void sqlite3FkCheck( continue; } - if( locateFkeyIndex(pParse, pTab, pFKey, &pIdx, &aiCol) ){ + if( sqlite3FkLocateIndex(pParse, pTab, pFKey, &pIdx, &aiCol) ){ if( !isIgnoreErrors || db->mallocFailed ) return; continue; } @@ -871,7 +873,7 @@ u32 sqlite3FkOldmask( } for(p=sqlite3FkReferences(pTab); p; p=p->pNextTo){ Index *pIdx = 0; - locateFkeyIndex(pParse, pTab, p, &pIdx, 0); + sqlite3FkLocateIndex(pParse, pTab, p, &pIdx, 0); if( pIdx ){ for(i=0; i<pIdx->nColumn; i++) mask |= COLUMN_MASK(pIdx->aiColumn[i]); } @@ -997,7 +999,7 @@ static Trigger *fkActionTrigger( int i; /* Iterator variable */ Expr *pWhen = 0; /* WHEN clause for the trigger */ - if( locateFkeyIndex(pParse, pTab, pFKey, &pIdx, &aiCol) ) return 0; + if( sqlite3FkLocateIndex(pParse, pTab, pFKey, &pIdx, &aiCol) ) return 0; assert( aiCol || pFKey->nCol==1 ); for(i=0; i<pFKey->nCol; i++){ diff --git a/src/journal.c b/src/journal.c index 06605cc95..fed27be3e 100644 --- a/src/journal.c +++ b/src/journal.c @@ -59,6 +59,14 @@ static int createFile(JournalFile *p){ assert(p->iSize<=p->nBuf); rc = sqlite3OsWrite(p->pReal, p->zBuf, p->iSize, 0); } + if( rc!=SQLITE_OK ){ + /* If an error occurred while writing to the file, close it before + ** returning. This way, SQLite uses the in-memory journal data to + ** roll back changes made to the internal page-cache before this + ** function was called. */ + sqlite3OsClose(pReal); + p->pReal = 0; + } } } return rc; diff --git a/src/parse.y b/src/parse.y index 0bfe823d4..abc0e7dc0 100644 --- a/src/parse.y +++ b/src/parse.y @@ -435,8 +435,8 @@ oneselect(A) ::= SELECT distinct(D) selcollist(W) from(X) where_opt(Y) // The "distinct" nonterminal is true (1) if the DISTINCT keyword is // present and false (0) if it is not. // -%type distinct {int} -distinct(A) ::= DISTINCT. {A = 1;} +%type distinct {u16} +distinct(A) ::= DISTINCT. {A = SF_Distinct;} distinct(A) ::= ALL. {A = 0;} distinct(A) ::= . {A = 0;} @@ -499,7 +499,8 @@ stl_prefix(A) ::= seltablist(X) joinop(Y). { if( ALWAYS(A && A->nSrc>0) ) A->a[A->nSrc-1].jointype = (u8)Y; } stl_prefix(A) ::= . {A = 0;} -seltablist(A) ::= stl_prefix(X) nm(Y) dbnm(D) as(Z) indexed_opt(I) on_opt(N) using_opt(U). { +seltablist(A) ::= stl_prefix(X) nm(Y) dbnm(D) as(Z) indexed_opt(I) + on_opt(N) using_opt(U). { A = sqlite3SrcListAppendFromTerm(pParse,X,&Y,&D,&Z,0,N,U); sqlite3SrcListIndexedBy(pParse, A, &I); } @@ -512,25 +513,23 @@ seltablist(A) ::= stl_prefix(X) nm(Y) dbnm(D) as(Z) indexed_opt(I) on_opt(N) usi as(Z) on_opt(N) using_opt(U). { if( X==0 && Z.n==0 && N==0 && U==0 ){ A = F; + }else if( F->nSrc==1 ){ + A = sqlite3SrcListAppendFromTerm(pParse,X,0,0,&Z,0,N,U); + if( A ){ + struct SrcList_item *pNew = &A->a[A->nSrc-1]; + struct SrcList_item *pOld = F->a; + pNew->zName = pOld->zName; + pNew->zDatabase = pOld->zDatabase; + pOld->zName = pOld->zDatabase = 0; + } + sqlite3SrcListDelete(pParse->db, F); }else{ Select *pSubquery; sqlite3SrcListShiftJoinType(F); - pSubquery = sqlite3SelectNew(pParse,0,F,0,0,0,0,0,0,0); + pSubquery = sqlite3SelectNew(pParse,0,F,0,0,0,0,SF_NestedFrom,0,0); A = sqlite3SrcListAppendFromTerm(pParse,X,0,0,&Z,pSubquery,N,U); } } - - // A seltablist_paren nonterminal represents anything in a FROM that - // is contained inside parentheses. This can be either a subquery or - // a grouping of table and subqueries. - // -// %type seltablist_paren {Select*} -// %destructor seltablist_paren {sqlite3SelectDelete(pParse->db, $$);} -// seltablist_paren(A) ::= select(S). {A = S;} -// seltablist_paren(A) ::= seltablist(F). { -// sqlite3SrcListShiftJoinType(F); -// A = sqlite3SelectNew(pParse,0,F,0,0,0,0,0,0,0); -// } %endif SQLITE_OMIT_SUBQUERY %type dbnm {Token} @@ -653,7 +652,8 @@ where_opt(A) ::= WHERE expr(X). {A = X.pExpr;} ////////////////////////// The UPDATE command //////////////////////////////// // %ifdef SQLITE_ENABLE_UPDATE_DELETE_LIMIT -cmd ::= UPDATE orconf(R) fullname(X) indexed_opt(I) SET setlist(Y) where_opt(W) orderby_opt(O) limit_opt(L). { +cmd ::= UPDATE orconf(R) fullname(X) indexed_opt(I) SET setlist(Y) where_opt(W) + orderby_opt(O) limit_opt(L). { sqlite3SrcListIndexedBy(pParse, X, &I); sqlite3ExprListCheckLength(pParse,Y,"set list"); W = sqlite3LimitWhere(pParse, X, W, O, L.pLimit, L.pOffset, "UPDATE"); @@ -661,7 +661,8 @@ cmd ::= UPDATE orconf(R) fullname(X) indexed_opt(I) SET setlist(Y) where_opt(W) } %endif %ifndef SQLITE_ENABLE_UPDATE_DELETE_LIMIT -cmd ::= UPDATE orconf(R) fullname(X) indexed_opt(I) SET setlist(Y) where_opt(W). { +cmd ::= UPDATE orconf(R) fullname(X) indexed_opt(I) SET setlist(Y) + where_opt(W). { sqlite3SrcListIndexedBy(pParse, X, &I); sqlite3ExprListCheckLength(pParse,Y,"set list"); sqlite3Update(pParse,X,Y,W,R); diff --git a/src/pragma.c b/src/pragma.c index d3061b234..7e8b59b8e 100644 --- a/src/pragma.c +++ b/src/pragma.c @@ -948,9 +948,11 @@ void sqlite3Pragma( if( sqlite3ReadSchema(pParse) ) goto pragma_out; pTab = sqlite3FindTable(db, zRight, zDb); if( pTab ){ - int i; + int i, k; int nHidden = 0; Column *pCol; + Index *pPk; + for(pPk=pTab->pIndex; pPk && pPk->autoIndex!=2; pPk=pPk->pNext){} sqlite3VdbeSetNumCols(v, 6); pParse->nMem = 6; sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "cid", SQLITE_STATIC); @@ -975,8 +977,14 @@ void sqlite3Pragma( }else{ sqlite3VdbeAddOp2(v, OP_Null, 0, 5); } - sqlite3VdbeAddOp2(v, OP_Integer, - (pCol->colFlags&COLFLAG_PRIMKEY)!=0, 6); + if( (pCol->colFlags & COLFLAG_PRIMKEY)==0 ){ + k = 0; + }else if( pPk==0 ){ + k = 1; + }else{ + for(k=1; ALWAYS(k<=pTab->nCol) && pPk->aiColumn[k-1]!=i; k++){} + } + sqlite3VdbeAddOp2(v, OP_Integer, k, 6); sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 6); } } @@ -1114,6 +1122,120 @@ void sqlite3Pragma( }else #endif /* !defined(SQLITE_OMIT_FOREIGN_KEY) */ +#ifndef SQLITE_OMIT_FOREIGN_KEY + if( sqlite3StrICmp(zLeft, "foreign_key_check")==0 ){ + FKey *pFK; /* A foreign key constraint */ + Table *pTab; /* Child table contain "REFERENCES" keyword */ + Table *pParent; /* Parent table that child points to */ + Index *pIdx; /* Index in the parent table */ + int i; /* Loop counter: Foreign key number for pTab */ + int j; /* Loop counter: Field of the foreign key */ + HashElem *k; /* Loop counter: Next table in schema */ + int x; /* result variable */ + int regResult; /* 3 registers to hold a result row */ + int regKey; /* Register to hold key for checking the FK */ + int regRow; /* Registers to hold a row from pTab */ + int addrTop; /* Top of a loop checking foreign keys */ + int addrOk; /* Jump here if the key is OK */ + int *aiCols; /* child to parent column mapping */ + + if( sqlite3ReadSchema(pParse) ) goto pragma_out; + regResult = pParse->nMem+1; + pParse->nMem += 4; + regKey = ++pParse->nMem; + regRow = ++pParse->nMem; + v = sqlite3GetVdbe(pParse); + sqlite3VdbeSetNumCols(v, 4); + sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "table", SQLITE_STATIC); + sqlite3VdbeSetColName(v, 1, COLNAME_NAME, "rowid", SQLITE_STATIC); + sqlite3VdbeSetColName(v, 2, COLNAME_NAME, "parent", SQLITE_STATIC); + sqlite3VdbeSetColName(v, 3, COLNAME_NAME, "fkid", SQLITE_STATIC); + sqlite3CodeVerifySchema(pParse, iDb); + k = sqliteHashFirst(&db->aDb[iDb].pSchema->tblHash); + while( k ){ + if( zRight ){ + pTab = sqlite3LocateTable(pParse, 0, zRight, zDb); + k = 0; + }else{ + pTab = (Table*)sqliteHashData(k); + k = sqliteHashNext(k); + } + if( pTab==0 || pTab->pFKey==0 ) continue; + sqlite3TableLock(pParse, iDb, pTab->tnum, 0, pTab->zName); + if( pTab->nCol+regRow>pParse->nMem ) pParse->nMem = pTab->nCol + regRow; + sqlite3OpenTable(pParse, 0, iDb, pTab, OP_OpenRead); + sqlite3VdbeAddOp4(v, OP_String8, 0, regResult, 0, pTab->zName, + P4_TRANSIENT); + for(i=1, pFK=pTab->pFKey; pFK; i++, pFK=pFK->pNextFrom){ + pParent = sqlite3LocateTable(pParse, 0, pFK->zTo, zDb); + if( pParent==0 ) break; + pIdx = 0; + sqlite3TableLock(pParse, iDb, pParent->tnum, 0, pParent->zName); + x = sqlite3FkLocateIndex(pParse, pParent, pFK, &pIdx, 0); + if( x==0 ){ + if( pIdx==0 ){ + sqlite3OpenTable(pParse, i, iDb, pParent, OP_OpenRead); + }else{ + KeyInfo *pKey = sqlite3IndexKeyinfo(pParse, pIdx); + sqlite3VdbeAddOp3(v, OP_OpenRead, i, pIdx->tnum, iDb); + sqlite3VdbeChangeP4(v, -1, (char*)pKey, P4_KEYINFO_HANDOFF); + } + }else{ + k = 0; + break; + } + } + if( pFK ) break; + if( pParse->nTab<i ) pParse->nTab = i; + addrTop = sqlite3VdbeAddOp1(v, OP_Rewind, 0); + for(i=1, pFK=pTab->pFKey; pFK; i++, pFK=pFK->pNextFrom){ + pParent = sqlite3LocateTable(pParse, 0, pFK->zTo, zDb); + assert( pParent!=0 ); + pIdx = 0; + aiCols = 0; + x = sqlite3FkLocateIndex(pParse, pParent, pFK, &pIdx, &aiCols); + assert( x==0 ); + addrOk = sqlite3VdbeMakeLabel(v); + if( pIdx==0 ){ + int iKey = pFK->aCol[0].iFrom; + assert( iKey>=0 && iKey<pTab->nCol ); + if( iKey!=pTab->iPKey ){ + sqlite3VdbeAddOp3(v, OP_Column, 0, iKey, regRow); + sqlite3ColumnDefault(v, pTab, iKey, regRow); + sqlite3VdbeAddOp2(v, OP_IsNull, regRow, addrOk); + sqlite3VdbeAddOp2(v, OP_MustBeInt, regRow, + sqlite3VdbeCurrentAddr(v)+3); + }else{ + sqlite3VdbeAddOp2(v, OP_Rowid, 0, regRow); + } + sqlite3VdbeAddOp3(v, OP_NotExists, i, 0, regRow); + sqlite3VdbeAddOp2(v, OP_Goto, 0, addrOk); + sqlite3VdbeJumpHere(v, sqlite3VdbeCurrentAddr(v)-2); + }else{ + for(j=0; j<pFK->nCol; j++){ + sqlite3ExprCodeGetColumnOfTable(v, pTab, 0, + aiCols ? aiCols[j] : pFK->aCol[0].iFrom, regRow+j); + sqlite3VdbeAddOp2(v, OP_IsNull, regRow+j, addrOk); + } + sqlite3VdbeAddOp3(v, OP_MakeRecord, regRow, pFK->nCol, regKey); + sqlite3VdbeChangeP4(v, -1, + sqlite3IndexAffinityStr(v,pIdx), P4_TRANSIENT); + sqlite3VdbeAddOp4Int(v, OP_Found, i, addrOk, regKey, 0); + } + sqlite3VdbeAddOp2(v, OP_Rowid, 0, regResult+1); + sqlite3VdbeAddOp4(v, OP_String8, 0, regResult+2, 0, + pFK->zTo, P4_TRANSIENT); + sqlite3VdbeAddOp2(v, OP_Integer, i-1, regResult+3); + sqlite3VdbeAddOp2(v, OP_ResultRow, regResult, 4); + sqlite3VdbeResolveLabel(v, addrOk); + sqlite3DbFree(db, aiCols); + } + sqlite3VdbeAddOp2(v, OP_Next, 0, addrTop+1); + sqlite3VdbeJumpHere(v, addrTop); + } + }else +#endif /* !defined(SQLITE_OMIT_FOREIGN_KEY) */ + #ifndef NDEBUG if( sqlite3StrICmp(zLeft, "parser_trace")==0 ){ if( zRight ){ diff --git a/src/resolve.c b/src/resolve.c index 51aab7792..944fb5cad 100644 --- a/src/resolve.c +++ b/src/resolve.c @@ -150,6 +150,35 @@ static int nameInUsingClause(IdList *pUsing, const char *zCol){ return 0; } +/* +** Subqueries stores the original database, table and column names for their +** result sets in ExprList.a[].zSpan, in the form "DATABASE.TABLE.COLUMN". +** Check to see if the zSpan given to this routine matches the zDb, zTab, +** and zCol. If any of zDb, zTab, and zCol are NULL then those fields will +** match anything. +*/ +int sqlite3MatchSpanName( + const char *zSpan, + const char *zCol, + const char *zTab, + const char *zDb +){ + int n; + for(n=0; ALWAYS(zSpan[n]) && zSpan[n]!='.'; n++){} + if( zDb && sqlite3StrNICmp(zSpan, zDb, n)!=0 ){ + return 0; + } + zSpan += n+1; + for(n=0; ALWAYS(zSpan[n]) && zSpan[n]!='.'; n++){} + if( zTab && sqlite3StrNICmp(zSpan, zTab, n)!=0 ){ + return 0; + } + zSpan += n+1; + if( zCol && sqlite3StrICmp(zSpan, zCol)!=0 ){ + return 0; + } + return 1; +} /* ** Given the name of a column of the form X.Y.Z or Y.Z or just Z, look up @@ -206,6 +235,20 @@ static int lookupName( pExpr->pTab = 0; ExprSetIrreducible(pExpr); + /* Translate the schema name in zDb into a pointer to the corresponding + ** schema. If not found, pSchema will remain NULL and nothing will match + ** resulting in an appropriate error message toward the end of this routine + */ + if( zDb ){ + for(i=0; i<db->nDb; i++){ + assert( db->aDb[i].zName ); + if( sqlite3StrICmp(db->aDb[i].zName,zDb)==0 ){ + pSchema = db->aDb[i].pSchema; + break; + } + } + } + /* Start at the inner-most context and move outward until a match is found */ while( pNC && cnt==0 ){ ExprList *pEList; @@ -214,31 +257,36 @@ static int lookupName( if( pSrcList ){ for(i=0, pItem=pSrcList->a; i<pSrcList->nSrc; i++, pItem++){ Table *pTab; - int iDb; Column *pCol; pTab = pItem->pTab; assert( pTab!=0 && pTab->zName!=0 ); - iDb = sqlite3SchemaToIndex(db, pTab->pSchema); assert( pTab->nCol>0 ); - if( zTab ){ - if( pItem->zAlias ){ - char *zTabName = pItem->zAlias; - if( sqlite3StrICmp(zTabName, zTab)!=0 ) continue; - }else{ - char *zTabName = pTab->zName; - if( NEVER(zTabName==0) || sqlite3StrICmp(zTabName, zTab)!=0 ){ - continue; - } - if( zDb!=0 && sqlite3StrICmp(db->aDb[iDb].zName, zDb)!=0 ){ - continue; + if( pItem->pSelect && (pItem->pSelect->selFlags & SF_NestedFrom)!=0 ){ + ExprList *pEList = pItem->pSelect->pEList; + int hit = 0; + for(j=0; j<pEList->nExpr; j++){ + if( sqlite3MatchSpanName(pEList->a[j].zSpan, zCol, zTab, zDb) ){ + cnt++; + cntTab = 2; + pMatch = pItem; + pExpr->iColumn = j; + hit = 1; } } + if( hit || zTab==0 ) continue; + } + if( zDb && pTab->pSchema!=pSchema ){ + continue; + } + if( zTab ){ + const char *zTabName = pItem->zAlias ? pItem->zAlias : pTab->zName; + assert( zTabName!=0 ); + if( sqlite3StrICmp(zTabName, zTab)!=0 ){ + continue; + } } if( 0==(cntTab++) ){ - pExpr->iTable = pItem->iCursor; - pExpr->pTab = pTab; - pSchema = pTab->pSchema; pMatch = pItem; } for(j=0, pCol=pTab->aCol; j<pTab->nCol; j++, pCol++){ @@ -252,17 +300,19 @@ static int lookupName( if( nameInUsingClause(pItem->pUsing, zCol) ) continue; } cnt++; - pExpr->iTable = pItem->iCursor; - pExpr->pTab = pTab; pMatch = pItem; - pSchema = pTab->pSchema; /* Substitute the rowid (column -1) for the INTEGER PRIMARY KEY */ pExpr->iColumn = j==pTab->iPKey ? -1 : (i16)j; break; } } } - } + if( pMatch ){ + pExpr->iTable = pMatch->iCursor; + pExpr->pTab = pMatch->pTab; + pSchema = pExpr->pTab->pSchema; + } + } /* if( pSrcList ) */ #ifndef SQLITE_OMIT_TRIGGER /* If we have not already resolved the name, then maybe @@ -1033,23 +1083,6 @@ static int resolveSelectStep(Walker *pWalker, Select *p){ return WRC_Abort; } - /* Set up the local name-context to pass to sqlite3ResolveExprNames() to - ** resolve the result-set expression list. - */ - sNC.ncFlags = NC_AllowAgg; - sNC.pSrcList = p->pSrc; - sNC.pNext = pOuterNC; - - /* Resolve names in the result set. */ - pEList = p->pEList; - assert( pEList!=0 ); - for(i=0; i<pEList->nExpr; i++){ - Expr *pX = pEList->a[i].pExpr; - if( sqlite3ResolveExprNames(&sNC, pX) ){ - return WRC_Abort; - } - } - /* Recursively resolve names in all subqueries */ for(i=0; i<p->pSrc->nSrc; i++){ @@ -1077,6 +1110,23 @@ static int resolveSelectStep(Walker *pWalker, Select *p){ } } + /* Set up the local name-context to pass to sqlite3ResolveExprNames() to + ** resolve the result-set expression list. + */ + sNC.ncFlags = NC_AllowAgg; + sNC.pSrcList = p->pSrc; + sNC.pNext = pOuterNC; + + /* Resolve names in the result set. */ + pEList = p->pEList; + assert( pEList!=0 ); + for(i=0; i<pEList->nExpr; i++){ + Expr *pX = pEList->a[i].pExpr; + if( sqlite3ResolveExprNames(&sNC, pX) ){ + return WRC_Abort; + } + } + /* If there are no aggregate functions in the result-set, and no GROUP BY ** expression, do not allow aggregates in any of the other expressions. */ diff --git a/src/select.c b/src/select.c index a90877dc6..316889480 100644 --- a/src/select.c +++ b/src/select.c @@ -55,7 +55,7 @@ Select *sqlite3SelectNew( ExprList *pGroupBy, /* the GROUP BY clause */ Expr *pHaving, /* the HAVING clause */ ExprList *pOrderBy, /* the ORDER BY clause */ - int isDistinct, /* true if the DISTINCT keyword is present */ + u16 selFlags, /* Flag parameters, such as SF_Distinct */ Expr *pLimit, /* LIMIT value. NULL means not used */ Expr *pOffset /* OFFSET value. NULL means no offset */ ){ @@ -79,7 +79,7 @@ Select *sqlite3SelectNew( pNew->pGroupBy = pGroupBy; pNew->pHaving = pHaving; pNew->pOrderBy = pOrderBy; - pNew->selFlags = isDistinct ? SF_Distinct : 0; + pNew->selFlags = selFlags; pNew->op = TK_SELECT; pNew->pLimit = pLimit; pNew->pOffset = pOffset; @@ -1336,8 +1336,6 @@ static int selectColumnsFromExprList( /* Get an appropriate name for the column */ p = sqlite3ExprSkipCollate(pEList->a[i].pExpr); - assert( p->pRight==0 || ExprHasProperty(p->pRight, EP_IntValue) - || p->pRight->u.zToken==0 || p->pRight->u.zToken[0]!=0 ); if( (zName = pEList->a[i].zName)!=0 ){ /* If the column contains an "AS <name>" phrase, use <name> as the name */ zName = sqlite3DbStrDup(db, zName); @@ -1375,6 +1373,9 @@ static int selectColumnsFromExprList( for(j=cnt=0; j<i; j++){ if( sqlite3StrICmp(aCol[j].zName, zName)==0 ){ char *zNewName; + int k; + for(k=nName-1; k>1 && sqlite3Isdigit(zName[k]); k--){} + if( zName[k]==':' ) nName = k; zName[nName] = 0; zNewName = sqlite3MPrintf(db, "%s:%d", zName, ++cnt); sqlite3DbFree(db, zName); @@ -3160,34 +3161,43 @@ static int flattenSubquery( #endif /* !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) */ /* -** Analyze the SELECT statement passed as an argument to see if it -** is a min() or max() query. Return WHERE_ORDERBY_MIN or WHERE_ORDERBY_MAX if -** it is, or 0 otherwise. At present, a query is considered to be -** a min()/max() query if: +** Based on the contents of the AggInfo structure indicated by the first +** argument, this function checks if the following are true: ** -** 1. There is a single object in the FROM clause. +** * the query contains just a single aggregate function, +** * the aggregate function is either min() or max(), and +** * the argument to the aggregate function is a column value. ** -** 2. There is a single expression in the result set, and it is -** either min(x) or max(x), where x is a column reference. +** If all of the above are true, then WHERE_ORDERBY_MIN or WHERE_ORDERBY_MAX +** is returned as appropriate. Also, *ppMinMax is set to point to the +** list of arguments passed to the aggregate before returning. +** +** Or, if the conditions above are not met, *ppMinMax is set to 0 and +** WHERE_ORDERBY_NORMAL is returned. */ -static u8 minMaxQuery(Select *p){ - Expr *pExpr; - ExprList *pEList = p->pEList; - - if( pEList->nExpr!=1 ) return WHERE_ORDERBY_NORMAL; - pExpr = pEList->a[0].pExpr; - if( pExpr->op!=TK_AGG_FUNCTION ) return 0; - if( NEVER(ExprHasProperty(pExpr, EP_xIsSelect)) ) return 0; - pEList = pExpr->x.pList; - if( pEList==0 || pEList->nExpr!=1 ) return 0; - if( pEList->a[0].pExpr->op!=TK_AGG_COLUMN ) return WHERE_ORDERBY_NORMAL; - assert( !ExprHasProperty(pExpr, EP_IntValue) ); - if( sqlite3StrICmp(pExpr->u.zToken,"min")==0 ){ - return WHERE_ORDERBY_MIN; - }else if( sqlite3StrICmp(pExpr->u.zToken,"max")==0 ){ - return WHERE_ORDERBY_MAX; +static u8 minMaxQuery(AggInfo *pAggInfo, ExprList **ppMinMax){ + int eRet = WHERE_ORDERBY_NORMAL; /* Return value */ + + *ppMinMax = 0; + if( pAggInfo->nFunc==1 ){ + Expr *pExpr = pAggInfo->aFunc[0].pExpr; /* Aggregate function */ + ExprList *pEList = pExpr->x.pList; /* Arguments to agg function */ + + assert( pExpr->op==TK_AGG_FUNCTION ); + if( pEList && pEList->nExpr==1 && pEList->a[0].pExpr->op==TK_AGG_COLUMN ){ + const char *zFunc = pExpr->u.zToken; + if( sqlite3StrICmp(zFunc, "min")==0 ){ + eRet = WHERE_ORDERBY_MIN; + *ppMinMax = pEList; + }else if( sqlite3StrICmp(zFunc, "max")==0 ){ + eRet = WHERE_ORDERBY_MAX; + *ppMinMax = pEList; + } + } } - return WHERE_ORDERBY_NORMAL; + + assert( *ppMinMax==0 || (*ppMinMax)->nExpr==1 ); + return eRet; } /* @@ -3282,6 +3292,7 @@ static int selectExpander(Walker *pWalker, Select *p){ ExprList *pEList; struct SrcList_item *pFrom; sqlite3 *db = pParse->db; + Expr *pE, *pRight, *pExpr; if( db->mallocFailed ){ return WRC_Abort; @@ -3367,7 +3378,7 @@ static int selectExpander(Walker *pWalker, Select *p){ ** that need expanding. */ for(k=0; k<pEList->nExpr; k++){ - Expr *pE = pEList->a[k].pExpr; + pE = pEList->a[k].pExpr; if( pE->op==TK_ALL ) break; assert( pE->op!=TK_DOT || pE->pRight!=0 ); assert( pE->op!=TK_DOT || (pE->pLeft!=0 && pE->pLeft->op==TK_ID) ); @@ -3385,10 +3396,18 @@ static int selectExpander(Walker *pWalker, Select *p){ int longNames = (flags & SQLITE_FullColNames)!=0 && (flags & SQLITE_ShortColNames)==0; + /* When processing FROM-clause subqueries, it is always the case + ** that full_column_names=OFF and short_column_names=ON. The + ** sqlite3ResultSetOfSelect() routine makes it so. */ + assert( (p->selFlags & SF_NestedFrom)==0 + || ((flags & SQLITE_FullColNames)==0 && + (flags & SQLITE_ShortColNames)!=0) ); + for(k=0; k<pEList->nExpr; k++){ - Expr *pE = a[k].pExpr; - assert( pE->op!=TK_DOT || pE->pRight!=0 ); - if( pE->op!=TK_ALL && (pE->op!=TK_DOT || pE->pRight->op!=TK_ALL) ){ + pE = a[k].pExpr; + pRight = pE->pRight; + assert( pE->op!=TK_DOT || pRight!=0 ); + if( pE->op!=TK_ALL && (pE->op!=TK_DOT || pRight->op!=TK_ALL) ){ /* This particular expression does not need to be expanded. */ pNew = sqlite3ExprListAppend(pParse, pNew, a[k].pExpr); @@ -3403,32 +3422,43 @@ static int selectExpander(Walker *pWalker, Select *p){ /* This expression is a "*" or a "TABLE.*" and needs to be ** expanded. */ int tableSeen = 0; /* Set to 1 when TABLE matches */ - char *zTName; /* text of name of TABLE */ + char *zTName = 0; /* text of name of TABLE */ if( pE->op==TK_DOT ){ assert( pE->pLeft!=0 ); assert( !ExprHasProperty(pE->pLeft, EP_IntValue) ); zTName = pE->pLeft->u.zToken; - }else{ - zTName = 0; } for(i=0, pFrom=pTabList->a; i<pTabList->nSrc; i++, pFrom++){ Table *pTab = pFrom->pTab; + Select *pSub = pFrom->pSelect; char *zTabName = pFrom->zAlias; + const char *zSchemaName = 0; + int iDb; if( zTabName==0 ){ zTabName = pTab->zName; } if( db->mallocFailed ) break; - if( zTName && sqlite3StrICmp(zTName, zTabName)!=0 ){ - continue; + if( pSub==0 || (pSub->selFlags & SF_NestedFrom)==0 ){ + pSub = 0; + if( zTName && sqlite3StrICmp(zTName, zTabName)!=0 ){ + continue; + } + iDb = sqlite3SchemaToIndex(db, pTab->pSchema); + zSchemaName = iDb>=0 ? db->aDb[iDb].zName : "*"; } - tableSeen = 1; for(j=0; j<pTab->nCol; j++){ - Expr *pExpr, *pRight; char *zName = pTab->aCol[j].zName; char *zColname; /* The computed column name */ char *zToFree; /* Malloced string that needs to be freed */ Token sColname; /* Computed column name as a token */ + assert( zName ); + if( zTName && pSub + && sqlite3MatchSpanName(pSub->pEList->a[j].zSpan, 0, zTName, 0)==0 + ){ + continue; + } + /* If a column is marked as 'hidden' (currently only possible ** for virtual tables), do not include it in the expanded ** result-set list. @@ -3437,6 +3467,7 @@ static int selectExpander(Walker *pWalker, Select *p){ assert(IsVirtual(pTab)); continue; } + tableSeen = 1; if( i>0 && zTName==0 ){ if( (pFrom->jointype & JT_NATURAL)!=0 @@ -3459,6 +3490,10 @@ static int selectExpander(Walker *pWalker, Select *p){ Expr *pLeft; pLeft = sqlite3Expr(db, TK_ID, zTabName); pExpr = sqlite3PExpr(pParse, TK_DOT, pLeft, pRight, 0); + if( zSchemaName ){ + pLeft = sqlite3Expr(db, TK_ID, zSchemaName); + pExpr = sqlite3PExpr(pParse, TK_DOT, pLeft, pExpr, 0); + } if( longNames ){ zColname = sqlite3MPrintf(db, "%s.%s", zTabName, zName); zToFree = zColname; @@ -3470,6 +3505,18 @@ static int selectExpander(Walker *pWalker, Select *p){ sColname.z = zColname; sColname.n = sqlite3Strlen30(zColname); sqlite3ExprListSetName(pParse, pNew, &sColname, 0); + if( pNew && (p->selFlags & SF_NestedFrom)!=0 ){ + struct ExprList_item *pX = &pNew->a[pNew->nExpr-1]; + if( pSub ){ + pX->zSpan = sqlite3DbStrDup(db, pSub->pEList->a[j].zSpan); + testcase( pX->zSpan==0 ); + }else{ + pX->zSpan = sqlite3MPrintf(db, "%s.%s.%s", + zSchemaName, zTabName, zColname); + testcase( pX->zSpan==0 ); + } + pX->bSpanIsTab = 1; + } sqlite3DbFree(db, zToFree); } } @@ -4527,11 +4574,17 @@ int sqlite3Select( ** Refer to code and comments in where.c for details. */ ExprList *pMinMax = 0; - u8 flag = minMaxQuery(p); + u8 flag = WHERE_ORDERBY_NORMAL; + + assert( p->pGroupBy==0 ); + assert( flag==0 ); + if( p->pHaving==0 ){ + flag = minMaxQuery(&sAggInfo, &pMinMax); + } + assert( flag==0 || (pMinMax!=0 && pMinMax->nExpr==1) ); + if( flag ){ - assert( !ExprHasProperty(p->pEList->a[0].pExpr, EP_xIsSelect) ); - assert( p->pEList->a[0].pExpr->x.pList->nExpr==1 ); - pMinMax = sqlite3ExprListDup(db, p->pEList->a[0].pExpr->x.pList,0); + pMinMax = sqlite3ExprListDup(db, pMinMax, 0); pDel = pMinMax; if( pMinMax && !db->mallocFailed ){ pMinMax->a[0].sortOrder = flag!=WHERE_ORDERBY_MIN ?1:0; diff --git a/src/shell.c b/src/shell.c index 7dd741b2d..4c50feb46 100644 --- a/src/shell.c +++ b/src/shell.c @@ -1480,6 +1480,12 @@ static void open_db(struct callback_data *p){ #ifndef SQLITE_OMIT_LOAD_EXTENSION sqlite3_enable_load_extension(p->db, 1); #endif +#ifdef SQLITE_ENABLE_REGEXP + { + extern int sqlite3_add_regexp_func(sqlite3*); + sqlite3_add_regexp_func(db); + } +#endif } } diff --git a/src/sqlite.h.in b/src/sqlite.h.in index f07e225b4..c41e62c47 100644 --- a/src/sqlite.h.in +++ b/src/sqlite.h.in @@ -1587,7 +1587,7 @@ struct sqlite3_mem_methods { ** [SQLITE_USE_URI] symbol defined. ** ** [[SQLITE_CONFIG_COVERING_INDEX_SCAN]] <dt>SQLITE_CONFIG_COVERING_INDEX_SCAN -** <dd> This option taks a single integer argument which is interpreted as +** <dd> This option takes a single integer argument which is interpreted as ** a boolean in order to enable or disable the use of covering indices for ** full table scans in the query optimizer. The default setting is determined ** by the [SQLITE_ALLOW_COVERING_INDEX_SCAN] compile-time option, or is "on" diff --git a/src/sqliteInt.h b/src/sqliteInt.h index 5e1d2d75c..1062612e8 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -1492,20 +1492,20 @@ struct UnpackedRecord { ** element. */ struct Index { - char *zName; /* Name of this index */ - int *aiColumn; /* Which columns are used by this index. 1st is 0 */ - tRowcnt *aiRowEst; /* Result of ANALYZE: Est. rows selected by each column */ - Table *pTable; /* The SQL table being indexed */ - char *zColAff; /* String defining the affinity of each column */ - Index *pNext; /* The next index associated with the same table */ - Schema *pSchema; /* Schema containing this index */ - u8 *aSortOrder; /* Array of size Index.nColumn. True==DESC, False==ASC */ - char **azColl; /* Array of collation sequence names for index */ - int nColumn; /* Number of columns in the table used by this index */ - int tnum; /* Page containing root of this index in database file */ - u8 onError; /* OE_Abort, OE_Ignore, OE_Replace, or OE_None */ - u8 autoIndex; /* True if is automatically created (ex: by UNIQUE) */ - u8 bUnordered; /* Use this index for == or IN queries only */ + char *zName; /* Name of this index */ + int *aiColumn; /* Which columns are used by this index. 1st is 0 */ + tRowcnt *aiRowEst; /* From ANALYZE: Est. rows selected by each column */ + Table *pTable; /* The SQL table being indexed */ + char *zColAff; /* String defining the affinity of each column */ + Index *pNext; /* The next index associated with the same table */ + Schema *pSchema; /* Schema containing this index */ + u8 *aSortOrder; /* for each column: True==DESC, False==ASC */ + char **azColl; /* Array of collation sequence names for index */ + int tnum; /* DB Page containing root of this index */ + u16 nColumn; /* Number of columns in table used by this index */ + u8 onError; /* OE_Abort, OE_Ignore, OE_Replace, or OE_None */ + unsigned autoIndex:2; /* 1==UNIQUE, 2==PRIMARY KEY, 0==CREATE INDEX */ + unsigned bUnordered:1; /* Use this index for == or IN queries only */ #ifdef SQLITE_ENABLE_STAT3 int nSample; /* Number of elements in aSample[] */ tRowcnt avgEq; /* Average nEq value for key values not in aSample */ @@ -1779,18 +1779,27 @@ struct Expr { ** list of "ID = expr" items in an UPDATE. A list of expressions can ** also be used as the argument to a function, in which case the a.zName ** field is not used. +** +** By default the Expr.zSpan field holds a human-readable description of +** the expression that is used in the generation of error messages and +** column labels. In this case, Expr.zSpan is typically the text of a +** column expression as it exists in a SELECT statement. However, if +** the bSpanIsTab flag is set, then zSpan is overloaded to mean the name +** of the result column in the form: DATABASE.TABLE.COLUMN. This later +** form is used for name resolution with nested FROM clauses. */ struct ExprList { int nExpr; /* Number of expressions on the list */ int iECursor; /* VDBE Cursor associated with this ExprList */ struct ExprList_item { /* For each expression in the list */ - Expr *pExpr; /* The list of expressions */ - char *zName; /* Token associated with this expression */ - char *zSpan; /* Original text of the expression */ - u8 sortOrder; /* 1 for DESC or 0 for ASC */ - u8 done; /* A flag to indicate when processing is finished */ - u16 iOrderByCol; /* For ORDER BY, column number in result set */ - u16 iAlias; /* Index into Parse.aAlias[] for zName */ + Expr *pExpr; /* The list of expressions */ + char *zName; /* Token associated with this expression */ + char *zSpan; /* Original text of the expression */ + u8 sortOrder; /* 1 for DESC or 0 for ASC */ + unsigned done :1; /* A flag to indicate when processing is finished */ + unsigned bSpanIsTab :1; /* zSpan holds DB.TABLE.COLUMN */ + u16 iOrderByCol; /* For ORDER BY, column number in result set */ + u16 iAlias; /* Index into Parse.aAlias[] for zName */ } *a; /* Alloc a power of two greater or equal to nExpr */ }; @@ -2110,6 +2119,7 @@ struct Select { #define SF_UseSorter 0x0040 /* Sort using a sorter */ #define SF_Values 0x0080 /* Synthesized from VALUES clause */ #define SF_Materialize 0x0100 /* Force materialization of views */ +#define SF_NestedFrom 0x0200 /* Part of a parenthesized FROM clause */ /* @@ -2826,7 +2836,7 @@ Index *sqlite3CreateIndex(Parse*,Token*,Token*,SrcList*,ExprList*,int,Token*, void sqlite3DropIndex(Parse*, SrcList*, int); int sqlite3Select(Parse*, Select*, SelectDest*); Select *sqlite3SelectNew(Parse*,ExprList*,SrcList*,Expr*,ExprList*, - Expr*,ExprList*,int,Expr*,Expr*); + Expr*,ExprList*,u16,Expr*,Expr*); void sqlite3SelectDelete(sqlite3*, Select*); Table *sqlite3SrcListLookup(Parse*, SrcList*); int sqlite3IsReadOnly(Parse*, Table*, int); @@ -3083,6 +3093,7 @@ void sqlite3NestedParse(Parse*, const char*, ...); void sqlite3ExpirePreparedStatements(sqlite3*); int sqlite3CodeSubselect(Parse *, Expr *, int, int); void sqlite3SelectPrep(Parse*, Select*, NameContext*); +int sqlite3MatchSpanName(const char*, const char*, const char*, const char*); int sqlite3ResolveExprNames(NameContext*, Expr*); void sqlite3ResolveSelectNames(Parse*, Select*, NameContext*); int sqlite3ResolveOrderGroupBy(Parse*, Select*, ExprList*, const char*); @@ -3221,8 +3232,10 @@ const char *sqlite3JournalModename(int); #endif #ifndef SQLITE_OMIT_FOREIGN_KEY void sqlite3FkDelete(sqlite3 *, Table*); + int sqlite3FkLocateIndex(Parse*,Table*,FKey*,Index**,int**); #else #define sqlite3FkDelete(a,b) + #define sqlite3FkLocateIndex(a,b,c,d,e) #endif diff --git a/src/tclsqlite.c b/src/tclsqlite.c index f1a05c9a6..070c540ec 100644 --- a/src/tclsqlite.c +++ b/src/tclsqlite.c @@ -3833,6 +3833,8 @@ static void init_all(Tcl_Interp *interp){ #if defined(SQLITE_ENABLE_SESSION) && defined(SQLITE_ENABLE_PREUPDATE_HOOK) extern int TestSession_Init(Tcl_Interp*); #endif + extern int Sqlitetestregexp_Init(Tcl_Interp*); + #if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) extern int Sqlitetestfts3_Init(Tcl_Interp *interp); #endif @@ -3878,6 +3880,8 @@ static void init_all(Tcl_Interp *interp){ #if defined(SQLITE_ENABLE_SESSION) && defined(SQLITE_ENABLE_PREUPDATE_HOOK) TestSession_Init(interp); #endif + Sqlitetestregexp_Init(interp); + #if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) Sqlitetestfts3_Init(interp); #endif diff --git a/src/test_regexp.c b/src/test_regexp.c new file mode 100644 index 000000000..87fb3db5b --- /dev/null +++ b/src/test_regexp.c @@ -0,0 +1,757 @@ +/* +** 2012-11-13 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** The code in this file implements a compact but reasonably +** efficient regular-expression matcher for posix extended regular +** expressions against UTF8 text. The following syntax is supported: +** +** X* zero or more occurrences of X +** X+ one or more occurrences of X +** X? zero or one occurrences of X +** X{p,q} between p and q occurrences of X +** (X) match X +** X|Y X or Y +** ^X X occurring at the beginning of the string +** X$ X occurring at the end of the string +** . Match any single character +** \c Character c where c is one of \{}()[]|*+?. +** \c C-language escapes for c in afnrtv. ex: \t or \n +** \uXXXX Where XXXX is exactly 4 hex digits, unicode value XXXX +** \xXXX Where XXX is any number of hex digits, unicode value XXX +** [abc] Any single character from the set abc +** [^abc] Any single character not in the set abc +** [a-z] Any single character in the range a-z +** [^a-z] Any single character not in the range a-z +** \b Word boundary +** \w Word character. [A-Za-z0-9_] +** \W Non-word character +** \d Digit +** \D Non-digit +** \s Whitespace character +** \S Non-whitespace character +** +** A nondeterministic finite automaton (NFA) is used for matching, so the +** performance is bounded by O(N*M) where N is the size of the regular +** expression and M is the size of the input string. The matcher never +** exhibits exponential behavior. Note that the X{p,q} operator expands +** to p copies of X following by q-p copies of X? and that the size of the +** regular expression in the O(N*M) performance bound is computed after +** this expansion. +*/ +#include <string.h> +#include <stdlib.h> +#include "sqlite3.h" + +/* The end-of-input character */ +#define RE_EOF 0 /* End of input */ + +/* The NFA is implemented as sequence of opcodes taken from the following +** set. Each opcode has a single integer argument. +*/ +#define RE_OP_MATCH 1 /* Match the one character in the argument */ +#define RE_OP_ANY 2 /* Match any one character. (Implements ".") */ +#define RE_OP_ANYSTAR 3 /* Special optimized version of .* */ +#define RE_OP_FORK 4 /* Continue to both next and opcode at iArg */ +#define RE_OP_GOTO 5 /* Jump to opcode at iArg */ +#define RE_OP_ACCEPT 6 /* Halt and indicate a successful match */ +#define RE_OP_CC_INC 7 /* Beginning of a [...] character class */ +#define RE_OP_CC_EXC 8 /* Beginning of a [^...] character class */ +#define RE_OP_CC_VALUE 9 /* Single value in a character class */ +#define RE_OP_CC_RANGE 10 /* Range of values in a character class */ +#define RE_OP_WORD 11 /* Perl word character [A-Za-z0-9_] */ +#define RE_OP_NOTWORD 12 /* Not a perl word character */ +#define RE_OP_DIGIT 13 /* digit: [0-9] */ +#define RE_OP_NOTDIGIT 14 /* Not a digit */ +#define RE_OP_SPACE 15 /* space: [ \t\n\r\v\f] */ +#define RE_OP_NOTSPACE 16 /* Not a digit */ +#define RE_OP_BOUNDARY 17 /* Boundary between word and non-word */ + +/* Each opcode is a "state" in the NFA */ +typedef unsigned short ReStateNumber; + +/* Because this is an NFA and not a DFA, multiple states can be active at +** once. An instance of the following object records all active states in +** the NFA. The implementation is optimized for the common case where the +** number of actives states is small. +*/ +typedef struct ReStateSet { + unsigned nState; /* Number of current states */ + ReStateNumber *aState; /* Current states */ +} ReStateSet; + +/* An input string read one character at a time. +*/ +typedef struct ReInput ReInput; +struct ReInput { + const unsigned char *z; /* All text */ + int i; /* Next byte to read */ + int mx; /* EOF when i>=mx */ +}; + +/* A compiled NFA (or an NFA that is in the process of being compiled) is +** an instance of the following object. +*/ +typedef struct ReCompiled ReCompiled; +struct ReCompiled { + ReInput sIn; /* Regular expression text */ + const char *zErr; /* Error message to return */ + char *aOp; /* Operators for the virtual machine */ + int *aArg; /* Arguments to each operator */ + unsigned (*xNextChar)(ReInput*); /* Next character function */ + char zInit[12]; /* Initial text to match */ + int nInit; /* Number of characters in zInit */ + unsigned nState; /* Number of entries in aOp[] and aArg[] */ + unsigned nAlloc; /* Slots allocated for aOp[] and aArg[] */ +}; + +/* Add a state to the given state set if it is not already there */ +static void re_add_state(ReStateSet *pSet, int newState){ + unsigned i; + for(i=0; i<pSet->nState; i++) if( pSet->aState[i]==newState ) return; + pSet->aState[pSet->nState++] = newState; +} + +/* Extract the next unicode character from *pzIn and return it. Advance +** *pzIn to the first byte past the end of the character returned. To +** be clear: this routine converts utf8 to unicode. This routine is +** optimized for the common case where the next character is a single byte. +*/ +static unsigned re_next_char(ReInput *p){ + unsigned c; + if( p->i>=p->mx ) return 0; + c = p->z[p->i++]; + if( c>0x80 ){ + if( (c&0xe0)==0xc0 && p->i<p->mx && (p->z[p->i]&0xc0)==0x80 ){ + c = (c&0x1f)<<6 | (p->z[p->i++]&0x3f); + if( c<0x80 ) c = 0xfffd; + }else if( (c&0xf0)==0xe0 && p->i+1<p->mx && (p->z[p->i]&0xc0)==0x80 + && (p->z[p->i+1]&0xc0)==0x80 ){ + c = (c&0x0f)<<12 | ((p->z[p->i]&0x3f)<<6) | (p->z[p->i+1]&0x3f); + p->i += 2; + if( c<0x3ff || (c>=0xd800 && c<=0xdfff) ) c = 0xfffd; + }else if( (c&0xf8)==0xf0 && p->i+3<p->mx && (p->z[p->i]&0xc0)==0x80 + && (p->z[p->i+1]&0xc0)==0x80 && (p->z[p->i+2]&0xc0)==0x80 ){ + c = (c&0x07)<<18 | ((p->z[p->i]&0x3f)<<12) | ((p->z[p->i+1]&0x3f)<<6) + | (p->z[p->i+2]&0x3f); + p->i += 3; + if( c<0xffff ) c = 0xfffd; + }else{ + c = 0xfffd; + } + } + return c; +} +static unsigned re_next_char_nocase(ReInput *p){ + unsigned c = re_next_char(p); + if( c>='A' && c<='Z' ) c += 'a' - 'A'; + return c; +} + +/* Return true if c is a perl "word" character: [A-Za-z0-9_] */ +static int re_word_char(int c){ + return (c>='0' && c<='9') || (c>='a' && c<='z') + || (c>='A' && c<='Z') || c=='_'; +} + +/* Return true if c is a "digit" character: [0-9] */ +static int re_digit_char(int c){ + return (c>='0' && c<='9'); +} + +/* Return true if c is a perl "space" character: [ \t\r\n\v\f] */ +static int re_space_char(int c){ + return c==' ' || c=='\t' || c=='\n' || c=='\v' || c=='\f'; +} + +/* Run a compiled regular expression on the zero-terminated input +** string zIn[]. Return true on a match and false if there is no match. +*/ +int re_match(ReCompiled *pRe, const unsigned char *zIn, int nIn){ + ReStateSet aStateSet[2], *pThis, *pNext; + ReStateNumber aSpace[100]; + ReStateNumber *pToFree; + unsigned int i = 0; + unsigned int iSwap = 0; + int c = RE_EOF+1; + int cPrev = 0; + int rc = 0; + ReInput in; + + in.z = zIn; + in.i = 0; + in.mx = nIn>=0 ? nIn : strlen((char*)zIn); + if( pRe->nInit ){ + unsigned char x = pRe->zInit[0]; + while( in.i+pRe->nInit<=in.mx + && (zIn[in.i]!=x || memcmp(zIn+in.i, pRe->zInit, pRe->nInit)!=0) + ){ + in.i++; + } + if( in.i+pRe->nInit>in.mx ) return 0; + } + if( pRe->nState<=(sizeof(aSpace)/(sizeof(aSpace[0])*2)) ){ + pToFree = 0; + aStateSet[0].aState = aSpace; + }else{ + pToFree = sqlite3_malloc( sizeof(ReStateNumber)*2*pRe->nState ); + if( pToFree==0 ) return -1; + aStateSet[0].aState = pToFree; + } + aStateSet[1].aState = &aStateSet[0].aState[pRe->nState]; + pNext = &aStateSet[1]; + pNext->nState = 0; + re_add_state(pNext, 0); + while( c!=RE_EOF && pNext->nState>0 ){ + cPrev = c; + c = pRe->xNextChar(&in); + pThis = pNext; + pNext = &aStateSet[iSwap]; + iSwap = 1 - iSwap; + pNext->nState = 0; + for(i=0; i<pThis->nState; i++){ + int x = pThis->aState[i]; + switch( pRe->aOp[x] ){ + case RE_OP_MATCH: { + if( pRe->aArg[x]==c ) re_add_state(pNext, x+1); + break; + } + case RE_OP_ANY: { + re_add_state(pNext, x+1); + break; + } + case RE_OP_WORD: { + if( re_word_char(c) ) re_add_state(pNext, x+1); + break; + } + case RE_OP_NOTWORD: { + if( !re_word_char(c) ) re_add_state(pNext, x+1); + break; + } + case RE_OP_DIGIT: { + if( re_digit_char(c) ) re_add_state(pNext, x+1); + break; + } + case RE_OP_NOTDIGIT: { + if( !re_digit_char(c) ) re_add_state(pNext, x+1); + break; + } + case RE_OP_SPACE: { + if( re_space_char(c) ) re_add_state(pNext, x+1); + break; + } + case RE_OP_NOTSPACE: { + if( !re_space_char(c) ) re_add_state(pNext, x+1); + break; + } + case RE_OP_BOUNDARY: { + if( re_word_char(c)!=re_word_char(cPrev) ) re_add_state(pThis, x+1); + break; + } + case RE_OP_ANYSTAR: { + re_add_state(pNext, x); + re_add_state(pThis, x+1); + break; + } + case RE_OP_FORK: { + re_add_state(pThis, x+pRe->aArg[x]); + re_add_state(pThis, x+1); + break; + } + case RE_OP_GOTO: { + re_add_state(pThis, x+pRe->aArg[x]); + break; + } + case RE_OP_ACCEPT: { + rc = 1; + goto re_match_end; + } + case RE_OP_CC_INC: + case RE_OP_CC_EXC: { + int j = 1; + int n = pRe->aArg[x]; + int hit = 0; + for(j=1; j>0 && j<n; j++){ + if( pRe->aOp[x+j]==RE_OP_CC_VALUE ){ + if( pRe->aArg[x+j]==c ){ + hit = 1; + j = -1; + } + }else{ + if( pRe->aArg[x+j]<=c && pRe->aArg[x+j+1]>=c ){ + hit = 1; + j = -1; + }else{ + j++; + } + } + } + if( pRe->aOp[x]==RE_OP_CC_EXC ) hit = !hit; + if( hit ) re_add_state(pNext, x+n); + break; + } + } + } + } + for(i=0; i<pNext->nState; i++){ + if( pRe->aOp[pNext->aState[i]]==RE_OP_ACCEPT ){ rc = 1; break; } + } +re_match_end: + sqlite3_free(pToFree); + return rc; +} + +/* Resize the opcode and argument arrays for an RE under construction. +*/ +static int re_resize(ReCompiled *p, int N){ + char *aOp; + int *aArg; + aOp = sqlite3_realloc(p->aOp, N*sizeof(p->aOp[0])); + if( aOp==0 ) return 1; + p->aOp = aOp; + aArg = sqlite3_realloc(p->aArg, N*sizeof(p->aArg[0])); + if( aArg==0 ) return 1; + p->aArg = aArg; + p->nAlloc = N; + return 0; +} + +/* Insert a new opcode and argument into an RE under construction. The +** insertion point is just prior to existing opcode iBefore. +*/ +static int re_insert(ReCompiled *p, int iBefore, int op, int arg){ + int i; + if( p->nAlloc<=p->nState && re_resize(p, p->nAlloc*2) ) return 0; + for(i=p->nState; i>iBefore; i--){ + p->aOp[i] = p->aOp[i-1]; + p->aArg[i] = p->aArg[i-1]; + } + p->nState++; + p->aOp[iBefore] = op; + p->aArg[iBefore] = arg; + return iBefore; +} + +/* Append a new opcode and argument to the end of the RE under construction. +*/ +static int re_append(ReCompiled *p, int op, int arg){ + return re_insert(p, p->nState, op, arg); +} + +/* Make a copy of N opcodes starting at iStart onto the end of the RE +** under construction. +*/ +static void re_copy(ReCompiled *p, int iStart, int N){ + if( p->nState+N>=p->nAlloc && re_resize(p, p->nAlloc*2+N) ) return; + memcpy(&p->aOp[p->nState], &p->aOp[iStart], N*sizeof(p->aOp[0])); + memcpy(&p->aArg[p->nState], &p->aArg[iStart], N*sizeof(p->aArg[0])); + p->nState += N; +} + +/* Return true if c is a hexadecimal digit character: [0-9a-fA-F] +** If c is a hex digit, also set *pV = (*pV)*16 + valueof(c). If +** c is not a hex digit *pV is unchanged. +*/ +static int re_hex(int c, int *pV){ + if( c>='0' && c<='9' ){ + c -= '0'; + }else if( c>='a' && c<='f' ){ + c -= 'a' - 10; + }else if( c>='A' && c<='F' ){ + c -= 'A' - 10; + }else{ + return 0; + } + *pV = (*pV)*16 + (c & 0xff); + return 1; +} + +/* A backslash character has been seen, read the next character and +** return its intepretation. +*/ +static unsigned re_esc_char(ReCompiled *p){ + static const char zEsc[] = "afnrtv\\()*.+?[$^{|}]"; + static const char zTrans[] = "\a\f\n\r\t\v"; + int i, v = 0; + char c; + if( p->sIn.i>=p->sIn.mx ) return 0; + c = p->sIn.z[p->sIn.i]; + if( c=='u' && p->sIn.i+5<p->sIn.mx ){ + v = 0; + const unsigned char *zIn = p->sIn.z + p->sIn.i; + if( re_hex(zIn[1],&v) + && re_hex(zIn[2],&v) + && re_hex(zIn[3],&v) + && re_hex(zIn[4],&v) + ){ + p->sIn.i += 5; + return v; + } + } + if( c=='x' ){ + v = 0; + for(i=1; p->sIn.i<p->sIn.mx && re_hex(p->sIn.z[p->sIn.i+i], &v); i++){} + if( i>1 ){ + p->sIn.i += i; + return v; + } + } + for(i=0; zEsc[i] && zEsc[i]!=c; i++){} + if( zEsc[i] ){ + if( i<6 ) c = zTrans[i]; + p->sIn.i++; + }else{ + p->zErr = "unknown \\ escape"; + } + return c; +} + +/* Forward declaration */ +static const char *re_subcompile_string(ReCompiled*); + +/* Peek at the next byte of input */ +static unsigned char rePeek(ReCompiled *p){ + return p->sIn.i<p->sIn.mx ? p->sIn.z[p->sIn.i] : 0; +} + +/* Compile RE text into a sequence of opcodes. Continue up to the +** first unmatched ")" character, then return. If an error is found, +** return a pointer to the error message string. +*/ +static const char *re_subcompile_re(ReCompiled *p){ + const char *zErr; + int iStart, iEnd, iGoto; + iStart = p->nState; + zErr = re_subcompile_string(p); + if( zErr ) return zErr; + while( rePeek(p)=='|' ){ + iEnd = p->nState; + re_insert(p, iStart, RE_OP_FORK, iEnd + 2 - iStart); + iGoto = re_append(p, RE_OP_GOTO, 0); + p->sIn.i++; + zErr = re_subcompile_string(p); + if( zErr ) return zErr; + p->aArg[iGoto] = p->nState - iGoto; + } + return 0; +} + +/* Compile an element of regular expression text (anything that can be +** an operand to the "|" operator). Return NULL on success or a pointer +** to the error message if there is a problem. +*/ +static const char *re_subcompile_string(ReCompiled *p){ + int iPrev = -1; + int iStart; + unsigned c; + const char *zErr; + while( (c = p->xNextChar(&p->sIn))!=0 ){ + iStart = p->nState; + switch( c ){ + case '|': + case '$': + case ')': { + p->sIn.i--; + return 0; + } + case '(': { + zErr = re_subcompile_re(p); + if( zErr ) return zErr; + if( rePeek(p)!=')' ) return "unmatched '('"; + p->sIn.i++; + break; + } + case '.': { + if( rePeek(p)=='*' ){ + re_append(p, RE_OP_ANYSTAR, 0); + p->sIn.i++; + }else{ + re_append(p, RE_OP_ANY, 0); + } + break; + } + case '*': { + if( iPrev<0 ) return "'*' without operand"; + re_insert(p, iPrev, RE_OP_GOTO, p->nState - iPrev + 1); + re_append(p, RE_OP_FORK, iPrev - p->nState + 1); + break; + } + case '+': { + if( iPrev<0 ) return "'+' without operand"; + re_append(p, RE_OP_FORK, iPrev - p->nState); + break; + } + case '?': { + if( iPrev<0 ) return "'?' without operand"; + re_insert(p, iPrev, RE_OP_FORK, p->nState - iPrev+1); + break; + } + case '{': { + int m = 0, n = 0; + int sz, j; + if( iPrev<0 ) return "'{m,n}' without operand"; + while( (c=rePeek(p))>='0' && c<='9' ){ m = m*10 + c - '0'; p->sIn.i++; } + n = m; + if( c==',' ){ + p->sIn.i++; + n = 0; + while( (c=rePeek(p))>='0' && c<='9' ){ n = n*10 + c-'0'; p->sIn.i++; } + } + if( c!='}' ) return "unmatched '{'"; + if( n>0 && n<m ) return "n less than m in '{m,n}'"; + p->sIn.i++; + sz = p->nState - iPrev; + if( m==0 ){ + if( n==0 ) return "both m and n are zero in '{m,n}'"; + re_insert(p, iPrev, RE_OP_FORK, sz+1); + n--; + }else{ + for(j=1; j<m; j++) re_copy(p, iPrev, sz); + } + for(j=m; j<n; j++){ + re_append(p, RE_OP_FORK, sz+1); + re_copy(p, iPrev, sz); + } + if( n==0 && m>0 ){ + re_append(p, RE_OP_FORK, -sz); + } + break; + } + case '[': { + int iFirst = p->nState; + if( rePeek(p)=='^' ){ + re_append(p, RE_OP_CC_EXC, 0); + p->sIn.i++; + }else{ + re_append(p, RE_OP_CC_INC, 0); + } + while( (c = p->xNextChar(&p->sIn))!=0 ){ + if( c=='[' && rePeek(p)==':' ){ + return "POSIX character classes not supported"; + } + if( c=='\\' ) c = re_esc_char(p); + if( rePeek(p)=='-' ){ + re_append(p, RE_OP_CC_RANGE, c); + p->sIn.i++; + c = p->xNextChar(&p->sIn); + if( c=='\\' ) c = re_esc_char(p); + re_append(p, RE_OP_CC_RANGE, c); + }else{ + re_append(p, RE_OP_CC_VALUE, c); + } + if( rePeek(p)==']' ){ p->sIn.i++; break; } + } + if( c==0 ) return "unclosed '['"; + p->aArg[iFirst] = p->nState - iFirst; + break; + } + case '\\': { + int specialOp = 0; + switch( rePeek(p) ){ + case 'b': specialOp = RE_OP_BOUNDARY; break; + case 'd': specialOp = RE_OP_DIGIT; break; + case 'D': specialOp = RE_OP_NOTDIGIT; break; + case 's': specialOp = RE_OP_SPACE; break; + case 'S': specialOp = RE_OP_NOTSPACE; break; + case 'w': specialOp = RE_OP_WORD; break; + case 'W': specialOp = RE_OP_NOTWORD; break; + } + if( specialOp ){ + p->sIn.i++; + re_append(p, specialOp, 0); + }else{ + c = re_esc_char(p); + re_append(p, RE_OP_MATCH, c); + } + break; + } + default: { + re_append(p, RE_OP_MATCH, c); + break; + } + } + iPrev = iStart; + } + return 0; +} + +/* Free and reclaim all the memory used by a previously compiled +** regular expression. Applications should invoke this routine once +** for every call to re_compile() to avoid memory leaks. +*/ +void re_free(ReCompiled *pRe){ + if( pRe ){ + sqlite3_free(pRe->aOp); + sqlite3_free(pRe->aArg); + sqlite3_free(pRe); + } +} + +/* +** Compile a textual regular expression in zIn[] into a compiled regular +** expression suitable for us by re_match() and return a pointer to the +** compiled regular expression in *ppRe. Return NULL on success or an +** error message if something goes wrong. +*/ +const char *re_compile(ReCompiled **ppRe, const char *zIn, int noCase){ + ReCompiled *pRe; + const char *zErr; + int i, j; + + *ppRe = 0; + pRe = sqlite3_malloc( sizeof(*pRe) ); + if( pRe==0 ){ + return "out of memory"; + } + memset(pRe, 0, sizeof(*pRe)); + pRe->xNextChar = noCase ? re_next_char_nocase : re_next_char; + if( re_resize(pRe, 30) ){ + re_free(pRe); + return "out of memory"; + } + if( zIn[0]=='^' ){ + zIn++; + }else{ + re_append(pRe, RE_OP_ANYSTAR, 0); + } + pRe->sIn.z = (unsigned char*)zIn; + pRe->sIn.i = 0; + pRe->sIn.mx = strlen((char*)pRe->sIn.z); + zErr = re_subcompile_re(pRe); + if( zErr ){ + re_free(pRe); + return zErr; + } + if( rePeek(pRe)=='$' && pRe->sIn.i+1>=pRe->sIn.mx ){ + re_append(pRe, RE_OP_MATCH, RE_EOF); + re_append(pRe, RE_OP_ACCEPT, 0); + *ppRe = pRe; + }else if( pRe->sIn.i>=pRe->sIn.mx ){ + re_append(pRe, RE_OP_ACCEPT, 0); + *ppRe = pRe; + }else{ + re_free(pRe); + return "unrecognized character"; + } + if( pRe->aOp[0]==RE_OP_ANYSTAR ){ + for(j=0, i=1; j<sizeof(pRe->zInit)-2 && pRe->aOp[i]==RE_OP_MATCH; i++){ + unsigned x = pRe->aArg[i]; + if( x<=127 ){ + pRe->zInit[j++] = x; + }else if( x<=0xfff ){ + pRe->zInit[j++] = 0xc0 | (x>>6); + pRe->zInit[j++] = 0x80 | (x&0x3f); + }else if( x<=0xffff ){ + pRe->zInit[j++] = 0xd0 | (x>>12); + pRe->zInit[j++] = 0x80 | ((x>>6)&0x3f); + pRe->zInit[j++] = 0x80 | ((x>>6)&0x3f); + }else{ + break; + } + } + if( j>0 && pRe->zInit[j-1]==0 ) j--; + pRe->nInit = j; + } + return pRe->zErr; +} + +/* +** Implementation of the regexp() SQL function. This function implements +** the build-in REGEXP operator. The first argument to the function is the +** pattern and the second argument is the string. So, the SQL statements: +** +** A REGEXP B +** +** is implemented as regexp(B,A). +*/ +static void re_sql_func( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + ReCompiled *pRe; /* Compiled regular expression */ + const char *zPattern; /* The regular expression */ + const unsigned char *zStr;/* String being searched */ + const char *zErr; /* Compile error message */ + + pRe = sqlite3_get_auxdata(context, 0); + if( pRe==0 ){ + zPattern = (const char*)sqlite3_value_text(argv[0]); + if( zPattern==0 ) return; + zErr = re_compile(&pRe, zPattern, 0); + if( zErr ){ + re_free(pRe); + sqlite3_result_error(context, zErr, -1); + return; + } + if( pRe==0 ){ + sqlite3_result_error_nomem(context); + return; + } + sqlite3_set_auxdata(context, 0, pRe, (void(*)(void*))re_free); + } + zStr = (const unsigned char*)sqlite3_value_text(argv[1]); + if( zStr!=0 ){ + sqlite3_result_int(context, re_match(pRe, zStr, -1)); + } +} + +/* +** Invoke this routine in order to install the REGEXP function in an +** SQLite database connection. +** +** Use: +** +** sqlite3_auto_extension(sqlite3_add_regexp_func); +** +** to cause this extension to be automatically loaded into each new +** database connection. +*/ +int sqlite3_add_regexp_func(sqlite3 *db){ + return sqlite3_create_function(db, "regexp", 2, SQLITE_UTF8, 0, + re_sql_func, 0, 0); +} + + +/***************************** Test Code ***********************************/ +#ifdef SQLITE_TEST +#include <tcl.h> +extern int getDbPointer(Tcl_Interp *interp, const char *zA, sqlite3 **ppDb); + +/* Implementation of the TCL command: +** +** sqlite3_add_regexp_func $DB +*/ +static int tclSqlite3AddRegexpFunc( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + sqlite3 *db; + if( objc!=2 ){ + Tcl_WrongNumArgs(interp, 1, objv, "DB"); + return TCL_ERROR; + } + if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ) return TCL_ERROR; + sqlite3_add_regexp_func(db); + return TCL_OK; +} + +/* Register the sqlite3_add_regexp_func TCL command with the TCL interpreter. +*/ +int Sqlitetestregexp_Init(Tcl_Interp *interp){ + Tcl_CreateObjCommand(interp, "sqlite3_add_regexp_func", + tclSqlite3AddRegexpFunc, 0, 0); + return TCL_OK; +} +#endif /* SQLITE_TEST */ +/**************************** End Of Test Code *******************************/ diff --git a/src/vdbe.c b/src/vdbe.c index df8130386..255fb6338 100644 --- a/src/vdbe.c +++ b/src/vdbe.c @@ -1280,6 +1280,7 @@ case OP_Subtract: /* same as TK_MINUS, in1, in2, out3 */ case OP_Multiply: /* same as TK_STAR, in1, in2, out3 */ case OP_Divide: /* same as TK_SLASH, in1, in2, out3 */ case OP_Remainder: { /* same as TK_REM, in1, in2, out3 */ + char bIntint; /* Started out as two integer operands */ int flags; /* Combined MEM_* flags from both inputs */ i64 iA; /* Integer value of left operand */ i64 iB; /* Integer value of right operand */ @@ -1296,6 +1297,7 @@ case OP_Remainder: { /* same as TK_REM, in1, in2, out3 */ if( (pIn1->flags & pIn2->flags & MEM_Int)==MEM_Int ){ iA = pIn1->u.i; iB = pIn2->u.i; + bIntint = 1; switch( pOp->opcode ){ case OP_Add: if( sqlite3AddInt64(&iB,iA) ) goto fp_math; break; case OP_Subtract: if( sqlite3SubInt64(&iB,iA) ) goto fp_math; break; @@ -1316,6 +1318,7 @@ case OP_Remainder: { /* same as TK_REM, in1, in2, out3 */ pOut->u.i = iB; MemSetTypeFlag(pOut, MEM_Int); }else{ + bIntint = 0; fp_math: rA = sqlite3VdbeRealValue(pIn1); rB = sqlite3VdbeRealValue(pIn2); @@ -1347,7 +1350,7 @@ fp_math: } pOut->r = rB; MemSetTypeFlag(pOut, MEM_Real); - if( (flags & MEM_Real)==0 ){ + if( (flags & MEM_Real)==0 && !bIntint ){ sqlite3VdbeIntegerAffinity(pOut); } #endif @@ -2086,8 +2089,6 @@ case OP_BitNot: { /* same as TK_BITNOT, in1, out2 */ ** ** Check if OP_Once flag P1 is set. If so, jump to instruction P2. Otherwise, ** set the flag and fall through to the next instruction. -** -** See also: JumpOnce */ case OP_Once: { /* jump */ assert( pOp->p1<p->nOnceFlag ); diff --git a/src/where.c b/src/where.c index 98369db6d..3b64b411d 100644 --- a/src/where.c +++ b/src/where.c @@ -253,7 +253,7 @@ struct WhereCost { #define WHERE_COLUMN_NULL 0x00080000 /* x IS NULL */ #define WHERE_INDEXED 0x000f0000 /* Anything that uses an index */ #define WHERE_NOT_FULLSCAN 0x100f3000 /* Does not do a full table scan */ -#define WHERE_IN_ABLE 0x000f1000 /* Able to support an IN operator */ +#define WHERE_IN_ABLE 0x080f1000 /* Able to support an IN operator */ #define WHERE_TOP_LIMIT 0x00100000 /* x<EXPR or x<=EXPR constraint */ #define WHERE_BTM_LIMIT 0x00200000 /* x>EXPR or x>=EXPR constraint */ #define WHERE_BOTH_LIMIT 0x00300000 /* Both x>EXPR and x<EXPR */ @@ -403,7 +403,7 @@ static int whereClauseInsert(WhereClause *pWC, Expr *p, u8 wtFlags){ pWC->nSlot = sqlite3DbMallocSize(db, pWC->a)/sizeof(pWC->a[0]); } pTerm = &pWC->a[idx = pWC->nTerm++]; - pTerm->pExpr = p; + pTerm->pExpr = sqlite3ExprSkipCollate(p); pTerm->wtFlags = wtFlags; pTerm->pWC = pWC; pTerm->iParent = -1; @@ -1188,7 +1188,8 @@ static void exprAnalyze( } pTerm = &pWC->a[idxTerm]; pMaskSet = pWC->pMaskSet; - pExpr = sqlite3ExprSkipCollate(pTerm->pExpr); + pExpr = pTerm->pExpr; + assert( pExpr->op!=TK_AS && pExpr->op!=TK_COLLATE ); prereqLeft = exprTableUsage(pMaskSet, pExpr->pLeft); op = pExpr->op; if( op==TK_IN ){ @@ -2056,7 +2057,7 @@ static sqlite3_index_info *allocateIndexInfo(WhereBestIdx *p){ assert( (pTerm->eOperator&(pTerm->eOperator-1))==0 ); testcase( pTerm->eOperator==WO_IN ); testcase( pTerm->eOperator==WO_ISNULL ); - if( pTerm->eOperator & (WO_IN|WO_ISNULL) ) continue; + if( pTerm->eOperator & (WO_ISNULL) ) continue; if( pTerm->wtFlags & TERM_VNULL ) continue; nTerm++; } @@ -2104,15 +2105,18 @@ static sqlite3_index_info *allocateIndexInfo(WhereBestIdx *p){ pUsage; for(i=j=0, pTerm=pWC->a; i<pWC->nTerm; i++, pTerm++){ + u8 op; if( pTerm->leftCursor != pSrc->iCursor ) continue; assert( (pTerm->eOperator&(pTerm->eOperator-1))==0 ); testcase( pTerm->eOperator==WO_IN ); testcase( pTerm->eOperator==WO_ISNULL ); - if( pTerm->eOperator & (WO_IN|WO_ISNULL) ) continue; + if( pTerm->eOperator & (WO_ISNULL) ) continue; if( pTerm->wtFlags & TERM_VNULL ) continue; pIdxCons[j].iColumn = pTerm->u.leftColumn; pIdxCons[j].iTermOffset = i; - pIdxCons[j].op = (u8)pTerm->eOperator; + op = (u8)pTerm->eOperator; + if( op==WO_IN ) op = WO_EQ; + pIdxCons[j].op = op; /* The direct assignment in the previous line is possible only because ** the WO_ and SQLITE_INDEX_CONSTRAINT_ codes are identical. The ** following asserts verify this fact. */ @@ -2122,7 +2126,7 @@ static sqlite3_index_info *allocateIndexInfo(WhereBestIdx *p){ assert( WO_GT==SQLITE_INDEX_CONSTRAINT_GT ); assert( WO_GE==SQLITE_INDEX_CONSTRAINT_GE ); assert( WO_MATCH==SQLITE_INDEX_CONSTRAINT_MATCH ); - assert( pTerm->eOperator & (WO_EQ|WO_LT|WO_LE|WO_GT|WO_GE|WO_MATCH) ); + assert( pTerm->eOperator & (WO_IN|WO_EQ|WO_LT|WO_LE|WO_GT|WO_GE|WO_MATCH) ); j++; } for(i=0; i<nOrderBy; i++){ @@ -2208,6 +2212,7 @@ static void bestVirtualIndex(WhereBestIdx *p){ WhereTerm *pTerm; int i, j; int nOrderBy; + int bAllowIN; /* Allow IN optimizations */ double rCost; /* Make sure wsFlags is initialized to some sane value. Otherwise, if the @@ -2242,59 +2247,87 @@ static void bestVirtualIndex(WhereBestIdx *p){ assert( pTab->azModuleArg && pTab->azModuleArg[0] ); assert( sqlite3GetVTable(pParse->db, pTab) ); - /* Set the aConstraint[].usable fields and initialize all - ** output variables to zero. - ** - ** aConstraint[].usable is true for constraints where the right-hand - ** side contains only references to tables to the left of the current - ** table. In other words, if the constraint is of the form: - ** - ** column = expr - ** - ** and we are evaluating a join, then the constraint on column is - ** only valid if all tables referenced in expr occur to the left - ** of the table containing column. - ** - ** The aConstraints[] array contains entries for all constraints - ** on the current table. That way we only have to compute it once - ** even though we might try to pick the best index multiple times. - ** For each attempt at picking an index, the order of tables in the - ** join might be different so we have to recompute the usable flag - ** each time. + /* Try once or twice. On the first attempt, allow IN optimizations. + ** If an IN optimization is accepted by the virtual table xBestIndex + ** method, but the pInfo->aConstrainUsage.omit flag is not set, then + ** the query will not work because it might allow duplicate rows in + ** output. In that case, run the xBestIndex method a second time + ** without the IN constraints. Usually this loop only runs once. + ** The loop will exit using a "break" statement. */ - pIdxCons = *(struct sqlite3_index_constraint**)&pIdxInfo->aConstraint; - pUsage = pIdxInfo->aConstraintUsage; - for(i=0; i<pIdxInfo->nConstraint; i++, pIdxCons++){ - j = pIdxCons->iTermOffset; - pTerm = &pWC->a[j]; - pIdxCons->usable = (pTerm->prereqRight&p->notReady) ? 0 : 1; - } - memset(pUsage, 0, sizeof(pUsage[0])*pIdxInfo->nConstraint); - if( pIdxInfo->needToFreeIdxStr ){ - sqlite3_free(pIdxInfo->idxStr); - } - pIdxInfo->idxStr = 0; - pIdxInfo->idxNum = 0; - pIdxInfo->needToFreeIdxStr = 0; - pIdxInfo->orderByConsumed = 0; - /* ((double)2) In case of SQLITE_OMIT_FLOATING_POINT... */ - pIdxInfo->estimatedCost = SQLITE_BIG_DBL / ((double)2); - nOrderBy = pIdxInfo->nOrderBy; - if( !p->pOrderBy ){ - pIdxInfo->nOrderBy = 0; - } - - if( vtabBestIndex(pParse, pTab, pIdxInfo) ){ - return; - } + for(bAllowIN=1; 1; bAllowIN--){ + assert( bAllowIN==0 || bAllowIN==1 ); - pIdxCons = *(struct sqlite3_index_constraint**)&pIdxInfo->aConstraint; - for(i=0; i<pIdxInfo->nConstraint; i++){ - if( pUsage[i].argvIndex>0 ){ - p->cost.used |= pWC->a[pIdxCons[i].iTermOffset].prereqRight; + /* Set the aConstraint[].usable fields and initialize all + ** output variables to zero. + ** + ** aConstraint[].usable is true for constraints where the right-hand + ** side contains only references to tables to the left of the current + ** table. In other words, if the constraint is of the form: + ** + ** column = expr + ** + ** and we are evaluating a join, then the constraint on column is + ** only valid if all tables referenced in expr occur to the left + ** of the table containing column. + ** + ** The aConstraints[] array contains entries for all constraints + ** on the current table. That way we only have to compute it once + ** even though we might try to pick the best index multiple times. + ** For each attempt at picking an index, the order of tables in the + ** join might be different so we have to recompute the usable flag + ** each time. + */ + pIdxCons = *(struct sqlite3_index_constraint**)&pIdxInfo->aConstraint; + pUsage = pIdxInfo->aConstraintUsage; + for(i=0; i<pIdxInfo->nConstraint; i++, pIdxCons++){ + j = pIdxCons->iTermOffset; + pTerm = &pWC->a[j]; + if( (pTerm->prereqRight&p->notReady)==0 + && (bAllowIN || pTerm->eOperator!=WO_IN) + ){ + pIdxCons->usable = 1; + }else{ + pIdxCons->usable = 0; + } } + memset(pUsage, 0, sizeof(pUsage[0])*pIdxInfo->nConstraint); + if( pIdxInfo->needToFreeIdxStr ){ + sqlite3_free(pIdxInfo->idxStr); + } + pIdxInfo->idxStr = 0; + pIdxInfo->idxNum = 0; + pIdxInfo->needToFreeIdxStr = 0; + pIdxInfo->orderByConsumed = 0; + /* ((double)2) In case of SQLITE_OMIT_FLOATING_POINT... */ + pIdxInfo->estimatedCost = SQLITE_BIG_DBL / ((double)2); + nOrderBy = pIdxInfo->nOrderBy; + if( !p->pOrderBy ){ + pIdxInfo->nOrderBy = 0; + } + + if( vtabBestIndex(pParse, pTab, pIdxInfo) ){ + return; + } + + pIdxCons = *(struct sqlite3_index_constraint**)&pIdxInfo->aConstraint; + for(i=0; i<pIdxInfo->nConstraint; i++, pIdxCons++){ + if( pUsage[i].argvIndex>0 ){ + j = pIdxCons->iTermOffset; + pTerm = &pWC->a[j]; + p->cost.used |= pTerm->prereqRight; + if( pTerm->eOperator==WO_IN && pUsage[i].omit==0 ){ + /* Do not attempt to use an IN constraint if the virtual table + ** says that the equivalent EQ constraint cannot be safely omitted. + ** If we do attempt to use such a constraint, some rows might be + ** repeated in the output. */ + break; + } + } + } + if( i>=pIdxInfo->nConstraint ) break; } - + /* If there is an ORDER BY clause, and the selected virtual table index ** does not satisfy it, increase the cost of the scan accordingly. This ** matches the processing for non-virtual tables in bestBtreeIndex(). @@ -4063,6 +4096,7 @@ static Bitmask codeOneLoopStart( ** to access the data. */ int iReg; /* P3 Value for OP_VFilter */ + int addrNotFound; sqlite3_index_info *pVtabIdx = pLevel->plan.u.pVtabIdx; int nConstraint = pVtabIdx->nConstraint; struct sqlite3_index_constraint_usage *aUsage = @@ -4072,11 +4106,18 @@ static Bitmask codeOneLoopStart( sqlite3ExprCachePush(pParse); iReg = sqlite3GetTempRange(pParse, nConstraint+2); + addrNotFound = pLevel->addrBrk; for(j=1; j<=nConstraint; j++){ for(k=0; k<nConstraint; k++){ if( aUsage[k].argvIndex==j ){ - int iTerm = aConstraint[k].iTermOffset; - sqlite3ExprCode(pParse, pWC->a[iTerm].pExpr->pRight, iReg+j+1); + WhereTerm *pTerm = &pWC->a[aConstraint[k].iTermOffset]; + int iTarget = iReg+j+1; + if( pTerm->eOperator & WO_IN ){ + codeEqualityTerm(pParse, pTerm, pLevel, iTarget); + addrNotFound = pLevel->addrNxt; + }else{ + sqlite3ExprCode(pParse, pTerm->pExpr->pRight, iTarget); + } break; } } @@ -4084,7 +4125,7 @@ static Bitmask codeOneLoopStart( } sqlite3VdbeAddOp2(v, OP_Integer, pVtabIdx->idxNum, iReg); sqlite3VdbeAddOp2(v, OP_Integer, j-1, iReg+1); - sqlite3VdbeAddOp4(v, OP_VFilter, iCur, addrBrk, iReg, pVtabIdx->idxStr, + sqlite3VdbeAddOp4(v, OP_VFilter, iCur, addrNotFound, iReg, pVtabIdx->idxStr, pVtabIdx->needToFreeIdxStr ? P4_MPRINTF : P4_STATIC); pVtabIdx->needToFreeIdxStr = 0; for(j=0; j<nConstraint; j++){ |