diff options
Diffstat (limited to 'src/utf.c')
-rw-r--r-- | src/utf.c | 43 |
1 files changed, 37 insertions, 6 deletions
@@ -106,6 +106,35 @@ static const unsigned char sqlite3Utf8Trans1[] = { } /* +** Write a single UTF8 character whose value is v into the +** buffer starting at zOut. zOut must be sized to hold at +** least four bytes. Return the number of bytes needed +** to encode the new character. +*/ +int sqlite3AppendOneUtf8Character(char *zOut, u32 v){ + if( v<0x00080 ){ + zOut[0] = (u8)(v & 0xff); + return 1; + } + if( v<0x00800 ){ + zOut[0] = 0xc0 + (u8)((v>>6) & 0x1f); + zOut[1] = 0x80 + (u8)(v & 0x3f); + return 2; + } + if( v<0x10000 ){ + zOut[0] = 0xe0 + (u8)((v>>12) & 0x0f); + zOut[1] = 0x80 + (u8)((v>>6) & 0x3f); + zOut[2] = 0x80 + (u8)(v & 0x3f); + return 3; + } + zOut[0] = 0xf0 + (u8)((v>>18) & 0x07); + zOut[1] = 0x80 + (u8)((v>>12) & 0x3f); + zOut[2] = 0x80 + (u8)((v>>6) & 0x3f); + zOut[3] = 0x80 + (u8)(v & 0x3f); + return 4; +} + +/* ** Translate a single UTF-8 character. Return the unicode value. ** ** During translation, assume that the byte that zTerm points @@ -136,7 +165,7 @@ static const unsigned char sqlite3Utf8Trans1[] = { c = *(zIn++); \ if( c>=0xc0 ){ \ c = sqlite3Utf8Trans1[c-0xc0]; \ - while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \ + while( zIn<zTerm && (*zIn & 0xc0)==0x80 ){ \ c = (c<<6) + (0x3f & *(zIn++)); \ } \ if( c<0x80 \ @@ -514,20 +543,22 @@ char *sqlite3Utf16to8(sqlite3 *db, const void *z, int nByte, u8 enc){ } /* -** zIn is a UTF-16 encoded unicode string at least nChar characters long. +** zIn is a UTF-16 encoded unicode string at least nByte bytes long. ** Return the number of bytes in the first nChar unicode characters -** in pZ. nChar must be non-negative. +** in pZ. nChar must be non-negative. Surrogate pairs count as a single +** character. */ -int sqlite3Utf16ByteLen(const void *zIn, int nChar){ +int sqlite3Utf16ByteLen(const void *zIn, int nByte, int nChar){ int c; unsigned char const *z = zIn; + unsigned char const *zEnd = &z[nByte-1]; int n = 0; if( SQLITE_UTF16NATIVE==SQLITE_UTF16LE ) z++; - while( n<nChar ){ + while( n<nChar && z<=zEnd ){ c = z[0]; z += 2; - if( c>=0xd8 && c<0xdc && z[0]>=0xdc && z[0]<0xe0 ) z += 2; + if( c>=0xd8 && c<0xdc && z<=zEnd && z[0]>=0xdc && z[0]<0xe0 ) z += 2; n++; } return (int)(z-(unsigned char const *)zIn) |