aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordrh <drh@noemail.net>2009-10-23 18:15:46 +0000
committerdrh <drh@noemail.net>2009-10-23 18:15:46 +0000
commit7c95b0f3dab6f6924826f32ee570e852dc4fc946 (patch)
tree2802c661095c1457df412dcd70b2e6062e534d00 /src
parent9bd1b449442b3e8faeb2da6607cf5a52edc5e96c (diff)
downloadsqlite-7c95b0f3dab6f6924826f32ee570e852dc4fc946.tar.gz
sqlite-7c95b0f3dab6f6924826f32ee570e852dc4fc946.zip
Make sure that UTF16 to UTF8 conversions to not read past the end of the
UTF16 input buffer if the last two bytes of the UTF16 happen to be the first half of a surrogate pair. Ticket [3fe897352e] FossilOrigin-Name: 19064d7cea838e1a93fe63743ed247f440679e97
Diffstat (limited to 'src')
-rw-r--r--src/test_func.c107
-rw-r--r--src/utf.c21
2 files changed, 116 insertions, 12 deletions
diff --git a/src/test_func.c b/src/test_func.c
index d0f47d400..355767461 100644
--- a/src/test_func.c
+++ b/src/test_func.c
@@ -313,6 +313,108 @@ static void test_eval(
}
+/*
+** convert one character from hex to binary
+*/
+static int testHexChar(char c){
+ if( c>='0' && c<='9' ){
+ return c - '0';
+ }else if( c>='a' && c<='f' ){
+ return c - 'a' + 10;
+ }else if( c>='A' && c<='F' ){
+ return c - 'A' + 10;
+ }
+ return 0;
+}
+
+/*
+** Convert hex to binary.
+*/
+static void testHexToBin(const char *zIn, char *zOut){
+ while( zIn[0] && zIn[1] ){
+ *(zOut++) = (testHexChar(zIn[0])<<4) + testHexChar(zIn[1]);
+ zIn += 2;
+ }
+}
+
+/*
+** hex_to_utf16be(HEX)
+**
+** Convert the input string from HEX into binary. Then return the
+** result using sqlite3_result_text16le().
+*/
+static void testHexToUtf16be(
+ sqlite3_context *pCtx,
+ int nArg,
+ sqlite3_value **argv
+){
+ int n;
+ const char *zIn;
+ char *zOut;
+ assert( nArg==1 );
+ n = sqlite3_value_bytes(argv[0]);
+ zIn = (const char*)sqlite3_value_text(argv[0]);
+ zOut = sqlite3_malloc( n/2 );
+ if( zOut==0 ){
+ sqlite3_result_error_nomem(pCtx);
+ }else{
+ testHexToBin(zIn, zOut);
+ sqlite3_result_text16be(pCtx, zOut, n/2, sqlite3_free);
+ }
+}
+
+/*
+** hex_to_utf8(HEX)
+**
+** Convert the input string from HEX into binary. Then return the
+** result using sqlite3_result_text16le().
+*/
+static void testHexToUtf8(
+ sqlite3_context *pCtx,
+ int nArg,
+ sqlite3_value **argv
+){
+ int n;
+ const char *zIn;
+ char *zOut;
+ assert( nArg==1 );
+ n = sqlite3_value_bytes(argv[0]);
+ zIn = (const char*)sqlite3_value_text(argv[0]);
+ zOut = sqlite3_malloc( n/2 );
+ if( zOut==0 ){
+ sqlite3_result_error_nomem(pCtx);
+ }else{
+ testHexToBin(zIn, zOut);
+ sqlite3_result_text(pCtx, zOut, n/2, sqlite3_free);
+ }
+}
+
+/*
+** hex_to_utf16le(HEX)
+**
+** Convert the input string from HEX into binary. Then return the
+** result using sqlite3_result_text16le().
+*/
+static void testHexToUtf16le(
+ sqlite3_context *pCtx,
+ int nArg,
+ sqlite3_value **argv
+){
+ int n;
+ const char *zIn;
+ char *zOut;
+ assert( nArg==1 );
+ n = sqlite3_value_bytes(argv[0]);
+ zIn = (const char*)sqlite3_value_text(argv[0]);
+ zOut = sqlite3_malloc( n/2 );
+ if( zOut==0 ){
+ sqlite3_result_error_nomem(pCtx);
+ }else{
+ testHexToBin(zIn, zOut);
+ sqlite3_result_text16le(pCtx, zOut, n/2, sqlite3_free);
+ }
+}
+
static int registerTestFunctions(sqlite3 *db){
static const struct {
char *zName;
@@ -324,7 +426,10 @@ static int registerTestFunctions(sqlite3 *db){
{ "test_destructor", 1, SQLITE_UTF8, test_destructor},
#ifndef SQLITE_OMIT_UTF16
{ "test_destructor16", 1, SQLITE_UTF8, test_destructor16},
+ { "hex_to_utf16be", 1, SQLITE_UTF8, testHexToUtf16be},
+ { "hex_to_utf16le", 1, SQLITE_UTF8, testHexToUtf16le},
#endif
+ { "hex_to_utf8", 1, SQLITE_UTF8, testHexToUtf8},
{ "test_destructor_count", 0, SQLITE_UTF8, test_destructor_count},
{ "test_auxdata", -1, SQLITE_UTF8, test_auxdata},
{ "test_error", 1, SQLITE_UTF8, test_error},
@@ -447,8 +552,6 @@ abuse_err:
return TCL_ERROR;
}
-
-
/*
** Register commands with the TCL interpreter.
*/
diff --git a/src/utf.c b/src/utf.c
index c3d07be82..93ce47c48 100644
--- a/src/utf.c
+++ b/src/utf.c
@@ -107,20 +107,20 @@ static const unsigned char sqlite3Utf8Trans1[] = {
} \
}
-#define READ_UTF16LE(zIn, c){ \
+#define READ_UTF16LE(zIn, zTerm, c){ \
c = (*zIn++); \
c += ((*zIn++)<<8); \
- if( c>=0xD800 && c<0xE000 ){ \
+ if( c>=0xD800 && c<0xE000 && zIn<zTerm ){ \
int c2 = (*zIn++); \
c2 += ((*zIn++)<<8); \
c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \
} \
}
-#define READ_UTF16BE(zIn, c){ \
+#define READ_UTF16BE(zIn, zTerm, c){ \
c = ((*zIn++)<<8); \
c += (*zIn++); \
- if( c>=0xD800 && c<0xE000 ){ \
+ if( c>=0xD800 && c<0xE000 && zIn<zTerm ){ \
int c2 = ((*zIn++)<<8); \
c2 += (*zIn++); \
c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \
@@ -305,13 +305,13 @@ int sqlite3VdbeMemTranslate(Mem *pMem, u8 desiredEnc){
if( pMem->enc==SQLITE_UTF16LE ){
/* UTF-16 Little-endian -> UTF-8 */
while( zIn<zTerm ){
- READ_UTF16LE(zIn, c);
+ READ_UTF16LE(zIn, zTerm, c);
WRITE_UTF8(z, c);
}
}else{
/* UTF-16 Big-endian -> UTF-8 */
while( zIn<zTerm ){
- READ_UTF16BE(zIn, c);
+ READ_UTF16BE(zIn, zTerm, c);
WRITE_UTF8(z, c);
}
}
@@ -488,6 +488,7 @@ char *sqlite3Utf8to16(sqlite3 *db, u8 enc, char *z, int n, int *pnOut){
int sqlite3Utf16ByteLen(const void *zIn, int nChar){
int c;
unsigned char const *z = zIn;
+ unsigned char const *zTerm = &z[nChar];
int n = 0;
if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){
/* Using an "if (SQLITE_UTF16NATIVE==SQLITE_UTF16BE)" construct here
@@ -500,12 +501,12 @@ int sqlite3Utf16ByteLen(const void *zIn, int nChar){
** penalty is paid for this "if" statement.
*/
while( n<nChar ){
- READ_UTF16BE(z, c);
+ READ_UTF16BE(z, zTerm, c);
n++;
}
}else{
while( n<nChar ){
- READ_UTF16LE(z, c);
+ READ_UTF16LE(z, zTerm, c);
n++;
}
}
@@ -547,7 +548,7 @@ void sqlite3UtfSelfTest(void){
assert( n>0 && n<=4 );
z[0] = 0;
z = zBuf;
- READ_UTF16LE(z, c);
+ READ_UTF16LE(z, &zBuf[n], c);
assert( c==i );
assert( (z-zBuf)==n );
}
@@ -559,7 +560,7 @@ void sqlite3UtfSelfTest(void){
assert( n>0 && n<=4 );
z[0] = 0;
z = zBuf;
- READ_UTF16BE(z, c);
+ READ_UTF16BE(z, &zBuf[n], c);
assert( c==i );
assert( (z-zBuf)==n );
}