diff options
author | dan <Dan Kennedy> | 2025-01-31 14:52:36 +0000 |
---|---|---|
committer | dan <Dan Kennedy> | 2025-01-31 14:52:36 +0000 |
commit | f6ca35d88cad6521ad06c6c9e1bfe0e3066d8cbb (patch) | |
tree | 3d9bb8c7ade44432f4beb24748cffbe93dbbb469 /src | |
parent | a0337903030a123f09eae41a5146993ff81d83e4 (diff) | |
download | sqlite-f6ca35d88cad6521ad06c6c9e1bfe0e3066d8cbb.tar.gz sqlite-f6ca35d88cad6521ad06c6c9e1bfe0e3066d8cbb.zip |
Fix a problem with LIKE and GLOB processing in utf-16be databases in cases where the utf-8 encoding of a character ends with the byte 0xBF.
FossilOrigin-Name: 4b4f33d791fe4318c4597bee7d2f9e486ed223e731982af470f5cc0dbdc600fc
Diffstat (limited to 'src')
-rw-r--r-- | src/whereexpr.c | 32 |
1 files changed, 20 insertions, 12 deletions
diff --git a/src/whereexpr.c b/src/whereexpr.c index 2b6eb6a78..0a963f4f9 100644 --- a/src/whereexpr.c +++ b/src/whereexpr.c @@ -219,12 +219,12 @@ static int isLikeOrGlob( z = (u8*)pRight->u.zToken; } if( z ){ - /* Count the number of prefix bytes prior to the first wildcard. - ** or U+fffd character. If the underlying database has a UTF16LE - ** encoding, then only consider ASCII characters. Note that the - ** encoding of z[] is UTF8 - we are dealing with only UTF8 here in - ** this code, but the database engine itself might be processing - ** content using a different encoding. */ + /* Count the number of prefix bytes prior to the first wildcard, + ** U+fffd character, or malformed utf-8. If the underlying database + ** has a UTF16LE encoding, then only consider ASCII characters. Note that + ** the encoding of z[] is UTF8 - we are dealing with only UTF8 here in this + ** code, but the database engine itself might be processing content using a + ** different encoding. */ cnt = 0; while( (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2] ){ cnt++; @@ -232,7 +232,9 @@ static int isLikeOrGlob( cnt++; }else if( c>=0x80 ){ const u8 *z2 = z+cnt-1; - if( sqlite3Utf8Read(&z2)==0xfffd || ENC(db)==SQLITE_UTF16LE ){ + if( sqlite3Utf8Read(&z2)==0xfffd || c==0xFF /* bad utf-8 */ + || ENC(db)==SQLITE_UTF16LE + ){ cnt--; break; }else{ @@ -1384,9 +1386,8 @@ static void exprAnalyze( } if( !db->mallocFailed ){ - u8 c, *pC; /* Last character before the first wildcard */ + u8 *pC; /* Last character before the first wildcard */ pC = (u8*)&pStr2->u.zToken[sqlite3Strlen30(pStr2->u.zToken)-1]; - c = *pC; if( noCase ){ /* The point is to increment the last character before the first ** wildcard. But if we increment '@', that will push it into the @@ -1394,10 +1395,17 @@ static void exprAnalyze( ** inequality. To avoid this, make sure to also run the full ** LIKE on all candidate expressions by clearing the isComplete flag */ - if( c=='A'-1 ) isComplete = 0; - c = sqlite3UpperToLower[c]; + if( *pC=='A'-1 ) isComplete = 0; + *pC = sqlite3UpperToLower[*pC]; + } + + /* Increment the value of the last utf8 character in the prefix. */ + while( *pC==0xBF && pC>(u8*)pStr2->u.zToken ){ + *pC = 0x80; + pC--; } - *pC = c + 1; + assert( *pC!=0xFF ); /* isLikeOrGlob() guarantees this */ + (*pC)++; } zCollSeqName = noCase ? "NOCASE" : sqlite3StrBINARY; pNewExpr1 = sqlite3ExprDup(db, pLeft, 0); |