aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordan <Dan Kennedy>2025-01-31 14:52:36 +0000
committerdan <Dan Kennedy>2025-01-31 14:52:36 +0000
commitf6ca35d88cad6521ad06c6c9e1bfe0e3066d8cbb (patch)
tree3d9bb8c7ade44432f4beb24748cffbe93dbbb469 /src
parenta0337903030a123f09eae41a5146993ff81d83e4 (diff)
downloadsqlite-f6ca35d88cad6521ad06c6c9e1bfe0e3066d8cbb.tar.gz
sqlite-f6ca35d88cad6521ad06c6c9e1bfe0e3066d8cbb.zip
Fix a problem with LIKE and GLOB processing in utf-16be databases in cases where the utf-8 encoding of a character ends with the byte 0xBF.
FossilOrigin-Name: 4b4f33d791fe4318c4597bee7d2f9e486ed223e731982af470f5cc0dbdc600fc
Diffstat (limited to 'src')
-rw-r--r--src/whereexpr.c32
1 files changed, 20 insertions, 12 deletions
diff --git a/src/whereexpr.c b/src/whereexpr.c
index 2b6eb6a78..0a963f4f9 100644
--- a/src/whereexpr.c
+++ b/src/whereexpr.c
@@ -219,12 +219,12 @@ static int isLikeOrGlob(
z = (u8*)pRight->u.zToken;
}
if( z ){
- /* Count the number of prefix bytes prior to the first wildcard.
- ** or U+fffd character. If the underlying database has a UTF16LE
- ** encoding, then only consider ASCII characters. Note that the
- ** encoding of z[] is UTF8 - we are dealing with only UTF8 here in
- ** this code, but the database engine itself might be processing
- ** content using a different encoding. */
+ /* Count the number of prefix bytes prior to the first wildcard,
+ ** U+fffd character, or malformed utf-8. If the underlying database
+ ** has a UTF16LE encoding, then only consider ASCII characters. Note that
+ ** the encoding of z[] is UTF8 - we are dealing with only UTF8 here in this
+ ** code, but the database engine itself might be processing content using a
+ ** different encoding. */
cnt = 0;
while( (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2] ){
cnt++;
@@ -232,7 +232,9 @@ static int isLikeOrGlob(
cnt++;
}else if( c>=0x80 ){
const u8 *z2 = z+cnt-1;
- if( sqlite3Utf8Read(&z2)==0xfffd || ENC(db)==SQLITE_UTF16LE ){
+ if( sqlite3Utf8Read(&z2)==0xfffd || c==0xFF /* bad utf-8 */
+ || ENC(db)==SQLITE_UTF16LE
+ ){
cnt--;
break;
}else{
@@ -1384,9 +1386,8 @@ static void exprAnalyze(
}
if( !db->mallocFailed ){
- u8 c, *pC; /* Last character before the first wildcard */
+ u8 *pC; /* Last character before the first wildcard */
pC = (u8*)&pStr2->u.zToken[sqlite3Strlen30(pStr2->u.zToken)-1];
- c = *pC;
if( noCase ){
/* The point is to increment the last character before the first
** wildcard. But if we increment '@', that will push it into the
@@ -1394,10 +1395,17 @@ static void exprAnalyze(
** inequality. To avoid this, make sure to also run the full
** LIKE on all candidate expressions by clearing the isComplete flag
*/
- if( c=='A'-1 ) isComplete = 0;
- c = sqlite3UpperToLower[c];
+ if( *pC=='A'-1 ) isComplete = 0;
+ *pC = sqlite3UpperToLower[*pC];
+ }
+
+ /* Increment the value of the last utf8 character in the prefix. */
+ while( *pC==0xBF && pC>(u8*)pStr2->u.zToken ){
+ *pC = 0x80;
+ pC--;
}
- *pC = c + 1;
+ assert( *pC!=0xFF ); /* isLikeOrGlob() guarantees this */
+ (*pC)++;
}
zCollSeqName = noCase ? "NOCASE" : sqlite3StrBINARY;
pNewExpr1 = sqlite3ExprDup(db, pLeft, 0);