diff options
author | drh <drh@noemail.net> | 2007-05-15 11:55:09 +0000 |
---|---|---|
committer | drh <drh@noemail.net> | 2007-05-15 11:55:09 +0000 |
commit | 4a919118d3b9a1854c97d3e873a2c92ff37d22b5 (patch) | |
tree | 34060bee4a1c2b41be031e54192e5efbfb52e036 /src/sqliteInt.h | |
parent | eab7f3fe8db327632dbef21188c07e3618410470 (diff) | |
download | sqlite-4a919118d3b9a1854c97d3e873a2c92ff37d22b5.tar.gz sqlite-4a919118d3b9a1854c97d3e873a2c92ff37d22b5.zip |
A new approach for UTF-8 translation. (CVS 4004)
FossilOrigin-Name: 6c8ad2790eaede90b3f1ef62614e667178b2a8c4
Diffstat (limited to 'src/sqliteInt.h')
-rw-r--r-- | src/sqliteInt.h | 61 |
1 files changed, 59 insertions, 2 deletions
diff --git a/src/sqliteInt.h b/src/sqliteInt.h index a550ca8ab..70d26ed1b 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -11,7 +11,7 @@ ************************************************************************* ** Internal interface definitions for SQLite. ** -** @(#) $Id: sqliteInt.h,v 1.566 2007/05/12 12:08:51 drh Exp $ +** @(#) $Id: sqliteInt.h,v 1.567 2007/05/15 11:55:09 drh Exp $ */ #ifndef _SQLITEINT_H_ #define _SQLITEINT_H_ @@ -1537,6 +1537,63 @@ typedef struct { extern int sqlite3_always_code_trigger_setup; /* +** A lookup table used by the SQLITE_READ_UTF8 macro. The definition +** is in utf.c. +*/ +extern const unsigned char sqlite3UtfTrans1[]; + +/* +** Macros for reading UTF8 characters. +** +** SQLITE_READ_UTF8(x,c) reads a single UTF8 value out of x and writes +** that value into c. The type of x must be unsigned char*. The type +** of c must be unsigned int. +** +** SQLITE_SKIP_UTF8(x) advances x forward by one character. The type of +** x must be unsigned char*. +** +** Notes On Invalid UTF-8: +** +** * These macros never allow a 7-bit character (0x00 through 0x7f) to +** be encoded as a multi-byte character. Any multi-byte character that +** attempts to encode a value between 0x00 and 0x7f is rendered as 0xfffd. +** +** * These macros never allow a UTF16 surragate value to be encoded. +** If a multi-byte character attempts to encode a value between +** 0xd800 and 0xe000 then it is rendered as 0xfffd. +** +** * Bytes in the range of 0x80 through 0xbf which occur as the first +** byte of a character are interpreted as single-byte characters +** and rendered as themselves even though they are technically +** invalid characters. +** +** * These routines accept an infinite number of different UTF8 encodings +** for unicode values 0x80 and greater. They do not change over-length +** encodings to 0xfffd as some systems recommend. +** +*/ +#define SQLITE_READ_UTF8(zIn, c) { \ + c = *(zIn++); \ + if( c>=0xc0 ){ \ + c = sqlite3UtfTrans1[c-0xc0]; \ + while( (*zIn & 0xc0)==0x80 ){ \ + c = (c<<6) + (0x3f & *(zIn++)); \ + } \ + if( c<0x80 \ + || (c&0xFFFFF800)==0xD800 \ + || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \ + } \ +} +#define SQLITE_SKIP_UTF8(zIn) { \ + if( (*(zIn++))>=0xc0 ){ \ + while( (*zIn & 0xc0)==0x80 ){ zIn++; } \ + } \ +} + + + + +/* ** The SQLITE_CORRUPT_BKPT macro can be either a constant (for production ** builds) or a function call (for debugging). If it is a function call, ** it allows the operator to set a breakpoint at the spot where database @@ -1753,7 +1810,7 @@ int sqlite3GetInt32(const char *, int*); int sqlite3FitsIn64Bits(const char *); int sqlite3Utf16ByteLen(const void *pData, int nChar); int sqlite3Utf8CharLen(const char *pData, int nByte); -int sqlite3ReadUtf8(const unsigned char *); +u32 sqlite3ReadUtf8(const unsigned char *); int sqlite3PutVarint(unsigned char *, u64); int sqlite3GetVarint(const unsigned char *, u64 *); int sqlite3GetVarint32(const unsigned char *, u32 *); |