Improved rebustness to malformed UTF-16 inputs to sqlite3_prepare16_v2().

FossilOrigin-Name: 7b3a517b3e16ea487ca77a2c88a0c11d737de366524fc911aa1bdd6bfb7ad148
author: drh <> 2024-09-19 13:39:06 +0000
committer: drh <> 2024-09-19 13:39:06 +0000
commit: f8305e46169d531fce2f778b1de99b59b7cd2318 (patch)
tree: 7ab84a4889f8d15a45089fdaf8480956c8385d35 /src
parent: 8513eb6ba84b11a645e1a60184cd649a2039c9e2 (diff)
download: sqlite-f8305e46169d531fce2f778b1de99b59b7cd2318.tar.gz
sqlite-f8305e46169d531fce2f778b1de99b59b7cd2318.zip
3 files changed, 21 insertions, 7 deletions
diff --git a/src/prepare.c b/src/prepare.c
index df9c98f74..7aa1e1a02 100644
--- a/src/prepare.c
+++ b/src/prepare.c
@@ -1007,12 +1007,24 @@ static int sqlite3Prepare16(
   if( !sqlite3SafetyCheckOk(db)||zSql==0 ){
     return SQLITE_MISUSE_BKPT;
   }
+
+  /* Make sure nBytes is non-negative and correct.  It should be the
+  ** number of bytes until the end of the input buffer or until the first
+  ** U+0000 character.  If the input nBytes is odd, convert it into
+  ** an even number.  If the input nBytes is negative, then the input
+  ** must be terminated by at least one U+0000 character */
   if( nBytes>=0 ){
     int sz;
     const char *z = (const char*)zSql;
     for(sz=0; sz<nBytes && (z[sz]!=0 || z[sz+1]!=0); sz += 2){}
     nBytes = sz;
+  }else{
+    int sz;
+    const char *z = (const char*)zSql;
+    for(sz=0; z[sz]!=0 || z[sz+1]!=0; sz += 2){}
+    nBytes = sz;
   }
+
   sqlite3_mutex_enter(db->mutex);
   zSql8 = sqlite3Utf16to8(db, zSql, nBytes, SQLITE_UTF16NATIVE);
   if( zSql8 ){
@@ -1026,7 +1038,7 @@ static int sqlite3Prepare16(
     ** the same number of characters into the UTF-16 string.
     */
     int chars_parsed = sqlite3Utf8CharLen(zSql8, (int)(zTail8-zSql8));
-    *pzTail = (u8 *)zSql + sqlite3Utf16ByteLen(zSql, chars_parsed);
+    *pzTail = (u8 *)zSql + sqlite3Utf16ByteLen(zSql, nBytes, chars_parsed);
   }
   sqlite3DbFree(db, zSql8); 
   rc = sqlite3ApiExit(db, rc);
diff --git a/src/sqliteInt.h b/src/sqliteInt.h
index 0dab59f7a..604f7e975 100644
--- a/src/sqliteInt.h
+++ b/src/sqliteInt.h
@@ -5267,7 +5267,7 @@ int sqlite3GetInt32(const char *, int*);
 int sqlite3GetUInt32(const char*, u32*);
 int sqlite3Atoi(const char*);
 #ifndef SQLITE_OMIT_UTF16
-int sqlite3Utf16ByteLen(const void *pData, int nChar);
+int sqlite3Utf16ByteLen(const void *pData, int nByte, int nChar);
 #endif
 int sqlite3Utf8CharLen(const char *pData, int nByte);
 u32 sqlite3Utf8Read(const u8**);
diff --git a/src/utf.c b/src/utf.c
index 216864f5c..083ada788 100644
--- a/src/utf.c
+++ b/src/utf.c
@@ -514,20 +514,22 @@ char *sqlite3Utf16to8(sqlite3 *db, const void *z, int nByte, u8 enc){
 }
 
 /*
-** zIn is a UTF-16 encoded unicode string at least nChar characters long.
+** zIn is a UTF-16 encoded unicode string at least nByte bytes long.
 ** Return the number of bytes in the first nChar unicode characters
-** in pZ.  nChar must be non-negative.
+** in pZ.  nChar must be non-negative.  Surrogate pairs count as a single
+** character.
 */
-int sqlite3Utf16ByteLen(const void *zIn, int nChar){
+int sqlite3Utf16ByteLen(const void *zIn, int nByte, int nChar){
   int c;
   unsigned char const *z = zIn;
+  unsigned char const *zEnd = &z[nByte-1];
   int n = 0;
   
   if( SQLITE_UTF16NATIVE==SQLITE_UTF16LE ) z++;
-  while( n<nChar ){
+  while( n<nChar && ALWAYS(z<=zEnd) ){
     c = z[0];
     z += 2;
-    if( c>=0xd8 && c<0xdc && z[0]>=0xdc && z[0]<0xe0 ) z += 2;
+    if( c>=0xd8 && c<0xdc && z<=zEnd && z[0]>=0xdc && z[0]<0xe0 ) z += 2;
     n++;
   }
   return (int)(z-(unsigned char const *)zIn)
author	drh <>	2024-09-19 13:39:06 +0000
committer	drh <>	2024-09-19 13:39:06 +0000
commit	f8305e46169d531fce2f778b1de99b59b7cd2318 (patch)
tree	7ab84a4889f8d15a45089fdaf8480956c8385d35 /src
parent	8513eb6ba84b11a645e1a60184cd649a2039c9e2 (diff)
download	sqlite-f8305e46169d531fce2f778b1de99b59b7cd2318.tar.gz sqlite-f8305e46169d531fce2f778b1de99b59b7cd2318.zip