aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordanielk1977 <danielk1977@noemail.net>2004-06-18 04:24:54 +0000
committerdanielk1977 <danielk1977@noemail.net>2004-06-18 04:24:54 +0000
commitbfd6cce56bbb02a96fd7599ff89e1e807fa2df29 (patch)
treee0d7c19ec2260b540dc806932b07a75dec67f2d9 /src
parenta2854229224e9e13eab1a9e9031057e6a259c38c (diff)
downloadsqlite-bfd6cce56bbb02a96fd7599ff89e1e807fa2df29.tar.gz
sqlite-bfd6cce56bbb02a96fd7599ff89e1e807fa2df29.zip
Optimisation for unicode encoding conversion routines. (CVS 1614)
FossilOrigin-Name: 39a415eaa65964742e40b7ea4d471fa04007c6c9
Diffstat (limited to 'src')
-rw-r--r--src/build.c23
-rw-r--r--src/main.c127
-rw-r--r--src/sqliteInt.h21
-rw-r--r--src/test1.c6
-rw-r--r--src/test5.c284
-rw-r--r--src/tokenize.c20
-rw-r--r--src/utf.c946
-rw-r--r--src/util.c38
-rw-r--r--src/vdbe.c39
-rw-r--r--src/vdbeInt.h2
-rw-r--r--src/vdbeapi.c15
-rw-r--r--src/vdbemem.c93
12 files changed, 630 insertions, 984 deletions
diff --git a/src/build.c b/src/build.c
index 1cdc7974f..0114aeb47 100644
--- a/src/build.c
+++ b/src/build.c
@@ -23,7 +23,7 @@
** ROLLBACK
** PRAGMA
**
-** $Id: build.c,v 1.220 2004/06/17 06:13:34 danielk1977 Exp $
+** $Id: build.c,v 1.221 2004/06/18 04:24:54 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>
@@ -959,15 +959,12 @@ static void callCollNeeded(sqlite *db, const char *zName, int nName){
db->xCollNeeded(db->pCollNeededArg, db, (int)db->enc, zExternal);
}
if( db->xCollNeeded16 ){
- if( SQLITE_BIGENDIAN ){
- zExternal = sqlite3utf8to16be(zName, nName);
- }else{
- zExternal = sqlite3utf8to16le(zName, nName);
- }
+ sqlite3_value *pTmp = sqlite3GetTransientValue(db);
+ sqlite3ValueSetStr(pTmp, -1, zName, SQLITE_UTF8, SQLITE_STATIC);
+ zExternal = sqlite3ValueText(pTmp, SQLITE_UTF16NATIVE);
if( !zExternal ) return;
db->xCollNeeded16(db->pCollNeededArg, db, (int)db->enc, zExternal);
}
- if( zExternal ) sqliteFree(zExternal);
}
static int synthCollSeq(Parse *pParse, CollSeq *pColl){
@@ -2627,3 +2624,15 @@ void sqlite3EndWriteOperation(Parse *pParse){
/* Delete me! */
return;
}
+
+/*
+** Return the transient sqlite3_value object used for encoding conversions
+** during SQL compilation.
+*/
+sqlite3_value *sqlite3GetTransientValue(sqlite *db){
+ if( !db->pValue ){
+ db->pValue = sqlite3ValueNew();
+ }
+ return db->pValue;
+}
+
diff --git a/src/main.c b/src/main.c
index fa4bcc211..b7022d72e 100644
--- a/src/main.c
+++ b/src/main.c
@@ -14,7 +14,7 @@
** other files are for internal use by SQLite and should not be
** accessed by users of the library.
**
-** $Id: main.c,v 1.224 2004/06/16 12:00:56 danielk1977 Exp $
+** $Id: main.c,v 1.225 2004/06/18 04:24:54 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "os.h"
@@ -515,6 +515,12 @@ void sqlite3_close(sqlite *db){
sqlite3HashClear(&db->aFunc);
sqlite3Error(db, SQLITE_OK, 0); /* Deallocates any cached error strings. */
+ if( db->pValue ){
+ sqlite3ValueFree(db->pValue);
+ }
+ if( db->pErr ){
+ sqlite3ValueFree(db->pErr);
+ }
sqliteFree(db);
}
@@ -749,14 +755,17 @@ int sqlite3_create_function16(
void (*xFinal)(sqlite3_context*)
){
int rc;
- char *zFunctionName8;
- zFunctionName8 = sqlite3utf16to8(zFunctionName, -1, SQLITE_BIGENDIAN);
- if( !zFunctionName8 ){
+ char const *zFunc8;
+
+ sqlite3_value *pTmp = sqlite3GetTransientValue(db);
+ sqlite3ValueSetStr(pTmp, -1, zFunctionName, SQLITE_UTF16NATIVE,SQLITE_STATIC);
+ zFunc8 = sqlite3ValueText(pTmp, SQLITE_UTF8);
+
+ if( !zFunc8 ){
return SQLITE_NOMEM;
}
- rc = sqlite3_create_function(db, zFunctionName8, nArg, eTextRep,
+ rc = sqlite3_create_function(db, zFunc8, nArg, eTextRep,
iCollateArg, pUserData, xFunc, xStep, xFinal);
- sqliteFree(zFunctionName8);
return rc;
}
@@ -844,16 +853,16 @@ int sqlite3BtreeFactory(
** error.
*/
const char *sqlite3_errmsg(sqlite3 *db){
- if( !db ){
+ if( !db || !db->pErr ){
/* If db is NULL, then assume that a malloc() failed during an
** sqlite3_open() call.
*/
return sqlite3ErrStr(SQLITE_NOMEM);
}
- if( db->zErrMsg ){
- return db->zErrMsg;
+ if( !sqlite3_value_text(db->pErr) ){
+ return sqlite3ErrStr(db->errCode);
}
- return sqlite3ErrStr(db->errCode);
+ return sqlite3_value_text(db->pErr);
}
/*
@@ -861,38 +870,32 @@ const char *sqlite3_errmsg(sqlite3 *db){
** error.
*/
const void *sqlite3_errmsg16(sqlite3 *db){
- if( !db ){
- /* If db is NULL, then assume that a malloc() failed during an
- ** sqlite3_open() call. We have a static version of the string
- ** "out of memory" encoded using UTF-16 just for this purpose.
- **
- ** Because all the characters in the string are in the unicode
- ** range 0x00-0xFF, if we pad the big-endian string with a
- ** zero byte, we can obtain the little-endian string with
- ** &big_endian[1].
- */
- static char outOfMemBe[] = {
- 0, 'o', 0, 'u', 0, 't', 0, ' ',
- 0, 'o', 0, 'f', 0, ' ',
- 0, 'm', 0, 'e', 0, 'm', 0, 'o', 0, 'r', 0, 'y', 0, 0, 0
- };
- static char *outOfMemLe = &outOfMemBe[1];
-
- if( SQLITE_BIGENDIAN ){
- return (void *)outOfMemBe;
- }else{
- return (void *)outOfMemLe;
+ /* Because all the characters in the string are in the unicode
+ ** range 0x00-0xFF, if we pad the big-endian string with a
+ ** zero byte, we can obtain the little-endian string with
+ ** &big_endian[1].
+ */
+ static char outOfMemBe[] = {
+ 0, 'o', 0, 'u', 0, 't', 0, ' ',
+ 0, 'o', 0, 'f', 0, ' ',
+ 0, 'm', 0, 'e', 0, 'm', 0, 'o', 0, 'r', 0, 'y', 0, 0, 0
+ };
+
+ if( db && db->pErr ){
+ if( !sqlite3_value_text16(db->pErr) ){
+ sqlite3ValueSetStr(db->pErr, -1, sqlite3ErrStr(db->errCode),
+ SQLITE_UTF8, SQLITE_STATIC);
}
- }
- if( !db->zErrMsg16 ){
- char const *zErr8 = sqlite3_errmsg(db);
- if( SQLITE_BIGENDIAN ){
- db->zErrMsg16 = sqlite3utf8to16be(zErr8, -1);
- }else{
- db->zErrMsg16 = sqlite3utf8to16le(zErr8, -1);
+ if( sqlite3_value_text16(db->pErr) ){
+ return sqlite3_value_text16(db->pErr);
}
- }
- return db->zErrMsg16;
+ }
+
+ /* If db is NULL, then assume that a malloc() failed during an
+ ** sqlite3_open() call. We have a static version of the string
+ ** "out of memory" encoded using UTF-16 just for this purpose.
+ */
+ return (void *)(&outOfMemBe[SQLITE_UTF16NATIVE==SQLITE_UTF16LE?1:0]);
}
int sqlite3_errcode(sqlite3 *db){
@@ -1047,11 +1050,14 @@ int sqlite3_prepare16(
** encoded string to UTF-8, then invoking sqlite3_prepare(). The
** tricky bit is figuring out the pointer to return in *pzTail.
*/
- char *zSql8 = 0;
+ char const *zSql8 = 0;
char const *zTail8 = 0;
int rc;
+ sqlite3_value *pTmp;
- zSql8 = sqlite3utf16to8(zSql, nBytes, SQLITE_BIGENDIAN);
+ pTmp = sqlite3GetTransientValue(db);
+ sqlite3ValueSetStr(pTmp, -1, zSql, SQLITE_UTF16NATIVE, SQLITE_STATIC);
+ zSql8 = sqlite3ValueText(pTmp, SQLITE_UTF8);
if( !zSql8 ){
sqlite3Error(db, SQLITE_NOMEM, 0);
return SQLITE_NOMEM;
@@ -1067,7 +1073,6 @@ int sqlite3_prepare16(
int chars_parsed = sqlite3utf8CharLen(zSql8, zTail8-zSql8);
*pzTail = (u8 *)zSql + sqlite3utf16ByteLen(zSql, chars_parsed);
}
- sqliteFree(zSql8);
return rc;
}
@@ -1134,7 +1139,6 @@ static int openDatabase(
}
rc = sqlite3BtreeFactory(db, zFilename, 0, MAX_PAGES, &db->aDb[0].pBt);
if( rc!=SQLITE_OK ){
- /* FIX ME: sqlite3BtreeFactory() should call sqlite3Error(). */
sqlite3Error(db, rc, 0);
db->magic = SQLITE_MAGIC_CLOSED;
goto opendb_out;
@@ -1148,6 +1152,7 @@ static int openDatabase(
*/
sqlite3RegisterBuiltinFunctions(db);
if( rc==SQLITE_OK ){
+ sqlite3Error(db, SQLITE_OK, 0);
db->magic = SQLITE_MAGIC_OPEN;
}else{
sqlite3Error(db, rc, "%s", zErrMsg, 0);
@@ -1177,21 +1182,24 @@ int sqlite3_open16(
const void *zFilename,
sqlite3 **ppDb
){
- char *zFilename8; /* zFilename encoded in UTF-8 instead of UTF-16 */
- int rc;
+ char const *zFilename8; /* zFilename encoded in UTF-8 instead of UTF-16 */
+ int rc = SQLITE_NOMEM;
+ sqlite3_value *pVal;
assert( ppDb );
-
- zFilename8 = sqlite3utf16to8(zFilename, -1, SQLITE_BIGENDIAN);
- if( !zFilename8 ){
- *ppDb = 0;
- return SQLITE_NOMEM;
+ *ppDb = 0;
+ pVal = sqlite3ValueNew();
+ sqlite3ValueSetStr(pVal, -1, zFilename, SQLITE_UTF16NATIVE, SQLITE_STATIC);
+ zFilename8 = sqlite3ValueText(pVal, SQLITE_UTF8);
+ if( zFilename8 ){
+ rc = openDatabase(zFilename8, ppDb);
+ if( rc==SQLITE_OK && *ppDb ){
+ sqlite3_exec(*ppDb, "PRAGMA encoding = 'UTF-16'", 0, 0, 0);
+ }
}
- rc = openDatabase(zFilename8, ppDb);
- if( rc==SQLITE_OK && *ppDb ){
- sqlite3_exec(*ppDb, "PRAGMA encoding = 'UTF-16'", 0, 0, 0);
+ if( pVal ){
+ sqlite3ValueFree(pVal);
}
- sqliteFree(zFilename8);
return rc;
}
@@ -1273,10 +1281,11 @@ int sqlite3_create_collation16(
int(*xCompare)(void*,int,const void*,int,const void*)
){
int rc;
- char *zName8 = sqlite3utf16to8(zName, -1, SQLITE_BIGENDIAN);
- rc = sqlite3_create_collation(db, zName8, enc, pCtx, xCompare);
- sqliteFree(zName8);
- return rc;
+ char const *zName8;
+ sqlite3_value *pTmp = sqlite3GetTransientValue(db);
+ sqlite3ValueSetStr(pTmp, -1, zName, SQLITE_UTF16NATIVE, SQLITE_STATIC);
+ zName8 = sqlite3ValueText(pTmp, SQLITE_UTF8);
+ return sqlite3_create_collation(db, zName8, enc, pCtx, xCompare);
}
/*
diff --git a/src/sqliteInt.h b/src/sqliteInt.h
index 416642534..05f51304e 100644
--- a/src/sqliteInt.h
+++ b/src/sqliteInt.h
@@ -11,7 +11,7 @@
*************************************************************************
** Internal interface definitions for SQLite.
**
-** @(#) $Id: sqliteInt.h,v 1.286 2004/06/17 05:36:44 danielk1977 Exp $
+** @(#) $Id: sqliteInt.h,v 1.287 2004/06/18 04:24:54 danielk1977 Exp $
*/
#include "config.h"
#include "sqlite3.h"
@@ -194,6 +194,7 @@ extern const int sqlite3one;
# define sqliteStrNDup(X,Y) sqlite3StrNDup_(X,Y,__FILE__,__LINE__)
void sqlite3StrRealloc(char**);
#else
+# define sqlite3FreeX sqliteFree
# define sqlite3Realloc_(X,Y) sqliteRealloc(X,Y)
# define sqlite3StrRealloc(X)
#endif
@@ -422,14 +423,17 @@ struct sqlite {
#endif
int errCode; /* Most recent error code (SQLITE_*) */
- char *zErrMsg; /* Most recent error message (UTF-8 encoded) */
- void *zErrMsg16; /* Most recent error message (UTF-16 encoded) */
u8 enc; /* Text encoding for this database. */
u8 autoCommit; /* The auto-commit flag. */
int nMaster; /* Length of master journal name. -1=unknown */
void(*xCollNeeded)(void*,sqlite3*,int eTextRep,const char*);
void(*xCollNeeded16)(void*,sqlite3*,int eTextRep,const void*);
void *pCollNeededArg;
+ sqlite3_value *pValue; /* Value used for transient conversions */
+ sqlite3_value *pErr; /* Most recent error message */
+
+ char *zErrMsg; /* Most recent error message (UTF-8 encoded) */
+ char *zErrMsg16; /* Most recent error message (UTF-8 encoded) */
};
/*
@@ -1213,6 +1217,7 @@ void sqlite3RealToSortable(double r, char *);
char *sqlite3StrDup_(const char*,char*,int);
char *sqlite3StrNDup_(const char*, int,char*,int);
void sqlite3CheckMemory(void*,int);
+ void sqlite3FreeX(void *p);
#else
void *sqliteMalloc(int);
void *sqliteMallocRaw(int);
@@ -1375,11 +1380,6 @@ char *sqlite3_snprintf(int,char*,const char*,...);
int sqlite3GetInt32(const char *, int*);
int sqlite3GetInt64(const char *, i64*);
int sqlite3FitsIn64Bits(const char *);
-unsigned char *sqlite3utf16to8(const void *pData, int N, int big_endian);
-void *sqlite3utf8to16be(const unsigned char *pIn, int N);
-void *sqlite3utf8to16le(const unsigned char *pIn, int N);
-void sqlite3utf16to16le(void *pData, int N);
-void sqlite3utf16to16be(void *pData, int N);
int sqlite3utf16ByteLen(const void *pData, int nChar);
int sqlite3utf8CharLen(const char *pData, int nByte);
int sqlite3utf8LikeCompare(const unsigned char*, const unsigned char*);
@@ -1396,8 +1396,6 @@ int sqlite3IndexAffinityOk(Expr *pExpr, char idx_affinity);
char sqlite3ExprAffinity(Expr *pExpr);
int sqlite3atoi64(const char*, i64*);
void sqlite3Error(sqlite *, int, const char*,...);
-int sqlite3utfTranslate(const void *, int , u8 , void **, int *, u8);
-u8 sqlite3UtfReadBom(const void *zData, int nData);
void *sqlite3HexToBlob(const char *z);
int sqlite3TwoPartName(Parse *, Token *, Token *, Token **);
const char *sqlite3ErrStr(int);
@@ -1412,6 +1410,7 @@ int sqlite3CheckObjectName(Parse *, const char *);
const void *sqlite3ValueText(sqlite3_value*, u8);
int sqlite3ValueBytes(sqlite3_value*, u8);
-void sqlite3ValueSetStr(sqlite3_value*, int, const void *,u8);
+void sqlite3ValueSetStr(sqlite3_value*, int, const void *,u8, void(*)(void*));
void sqlite3ValueFree(sqlite3_value*);
sqlite3_value *sqlite3ValueNew();
+sqlite3_value *sqlite3GetTransientValue(sqlite *db);
diff --git a/src/test1.c b/src/test1.c
index ce122405f..e5cc0e218 100644
--- a/src/test1.c
+++ b/src/test1.c
@@ -13,7 +13,7 @@
** is not included in the SQLite library. It is used for automated
** testing of the SQLite library.
**
-** $Id: test1.c,v 1.77 2004/06/15 02:44:19 danielk1977 Exp $
+** $Id: test1.c,v 1.78 2004/06/18 04:24:55 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "tcl.h"
@@ -940,9 +940,9 @@ static int test_collate_func(
}
pVal = sqlite3ValueNew();
- sqlite3ValueSetStr(pVal, nA, zA, encin);
+ sqlite3ValueSetStr(pVal, nA, zA, encin, SQLITE_STATIC);
Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj(sqlite3_value_text(pVal),-1));
- sqlite3ValueSetStr(pVal, nB, zB, encin);
+ sqlite3ValueSetStr(pVal, nB, zB, encin, SQLITE_STATIC);
Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj(sqlite3_value_text(pVal),-1));
sqlite3ValueFree(pVal);
diff --git a/src/test5.c b/src/test5.c
index 525716be1..8ce005323 100644
--- a/src/test5.c
+++ b/src/test5.c
@@ -15,7 +15,7 @@
** is used for testing the SQLite routines for converting between
** the various supported unicode encodings.
**
-** $Id: test5.c,v 1.10 2004/06/12 00:42:35 danielk1977 Exp $
+** $Id: test5.c,v 1.11 2004/06/18 04:24:55 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "vdbeInt.h"
@@ -25,195 +25,6 @@
#include <string.h>
/*
-** Return the number of bytes up to and including the first pair of
-** 0x00 bytes in *pStr.
-*/
-static int utf16_length(const unsigned char *pZ){
- const unsigned char *pC1 = pZ;
- const unsigned char *pC2 = pZ+1;
- while( *pC1 || *pC2 ){
- pC1 += 2;
- pC2 += 2;
- }
- return (pC1-pZ)+2;
-}
-
-/*
-** tclcmd: sqlite_utf8to16le STRING
-** title: Convert STRING from utf-8 to utf-16le
-**
-** Return the utf-16le encoded string
-*/
-static int sqlite_utf8to16le(
- void * clientData,
- Tcl_Interp *interp,
- int objc,
- Tcl_Obj *CONST objv[]
-){
- unsigned char *out;
- unsigned char *in;
- Tcl_Obj *res;
-
- if( objc!=2 ){
- Tcl_AppendResult(interp, "wrong # args: should be \"",
- Tcl_GetStringFromObj(objv[0], 0), "<utf-8 encoded-string>", 0);
- return TCL_ERROR;
- }
-
- in = Tcl_GetString(objv[1]);
- out = (unsigned char *)sqlite3utf8to16le(in, -1);
- res = Tcl_NewByteArrayObj(out, utf16_length(out));
- sqliteFree(out);
-
- Tcl_SetObjResult(interp, res);
-
- return TCL_OK;
-}
-
-/*
-** tclcmd: sqlite_utf8to16be STRING
-** title: Convert STRING from utf-8 to utf-16be
-**
-** Return the utf-16be encoded string
-*/
-static int sqlite_utf8to16be(
- void * clientData,
- Tcl_Interp *interp,
- int objc,
- Tcl_Obj *CONST objv[]
-){
- unsigned char *out;
- unsigned char *in;
- Tcl_Obj *res;
-
- if( objc!=2 ){
- Tcl_AppendResult(interp, "wrong # args: should be \"",
- Tcl_GetStringFromObj(objv[0], 0), "<utf-8 encoded-string>", 0);
- return TCL_ERROR;
- }
-
- in = Tcl_GetByteArrayFromObj(objv[1], 0);
- in = Tcl_GetString(objv[1]);
- out = (unsigned char *)sqlite3utf8to16be(in, -1);
- res = Tcl_NewByteArrayObj(out, utf16_length(out));
- sqliteFree(out);
-
- Tcl_SetObjResult(interp, res);
-
- return TCL_OK;
-}
-
-/*
-** tclcmd: sqlite_utf16to16le STRING
-** title: Convert STRING from utf-16 in native byte order to utf-16le
-**
-** Return the utf-16le encoded string. If the input string contains
-** a byte-order mark, then the byte order mark should override the
-** native byte order.
-*/
-static int sqlite_utf16to16le(
- void * clientData,
- Tcl_Interp *interp,
- int objc,
- Tcl_Obj *CONST objv[]
-){
- unsigned char *out;
- unsigned char *in;
- int in_len;
- Tcl_Obj *res;
-
- if( objc!=2 ){
- Tcl_AppendResult(interp, "wrong # args: should be \"",
- Tcl_GetStringFromObj(objv[0], 0), "<utf-16 encoded-string>", 0);
- return TCL_ERROR;
- }
-
- in = Tcl_GetByteArrayFromObj(objv[1], &in_len);
- out = (unsigned char *)sqliteMalloc(in_len);
- memcpy(out, in, in_len);
-
- sqlite3utf16to16le(out, -1);
- res = Tcl_NewByteArrayObj(out, utf16_length(out));
- sqliteFree(out);
-
- Tcl_SetObjResult(interp, res);
-
- return TCL_OK;
-}
-
-/*
-** tclcmd: sqlite_utf16to16be STRING
-** title: Convert STRING from utf-16 in native byte order to utf-16be
-**
-** Return the utf-16be encoded string. If the input string contains
-** a byte-order mark, then the byte order mark should override the
-** native byte order.
-*/
-static int sqlite_utf16to16be(
- void * clientData,
- Tcl_Interp *interp,
- int objc,
- Tcl_Obj *CONST objv[]
-){
- unsigned char *out;
- unsigned char *in;
- int in_len;
- Tcl_Obj *res;
-
- if( objc!=2 ){
- Tcl_AppendResult(interp, "wrong # args: should be \"",
- Tcl_GetStringFromObj(objv[0], 0), "<utf-16 encoded-string>", 0);
- return TCL_ERROR;
- }
-
- in = Tcl_GetByteArrayFromObj(objv[1], &in_len);
- out = (unsigned char *)sqliteMalloc(in_len);
- memcpy(out, in, in_len);
-
- sqlite3utf16to16be(out, -1);
- res = Tcl_NewByteArrayObj(out, utf16_length(out));
- sqliteFree(out);
-
- Tcl_SetObjResult(interp, res);
-
- return TCL_OK;
-}
-
-/*
-** tclcmd: sqlite_utf16to8 STRING
-** title: Convert STRING from utf-16 in native byte order to utf-8
-**
-** Return the utf-8 encoded string. If the input string contains
-** a byte-order mark, then the byte order mark should override the
-** native byte order.
-*/
-static int sqlite_utf16to8(
- void * clientData,
- Tcl_Interp *interp,
- int objc,
- Tcl_Obj *CONST objv[]
-){
- unsigned char *out;
- unsigned char *in;
- Tcl_Obj *res;
-
- if( objc!=2 ){
- Tcl_AppendResult(interp, "wrong # args: should be \"",
- Tcl_GetStringFromObj(objv[0], 0), " <utf-16 encoded-string>", 0);
- return TCL_ERROR;
- }
-
- in = Tcl_GetByteArrayFromObj(objv[1], 0);
- out = sqlite3utf16to8(in, -1, SQLITE_BIGENDIAN);
- res = Tcl_NewByteArrayObj(out, strlen(out)+1);
- sqliteFree(out);
-
- Tcl_SetObjResult(interp, res);
-
- return TCL_OK;
-}
-
-/*
** The first argument is a TCL UTF-8 string. Return the byte array
** object with the encoded representation of the string, including
** the NULL terminator.
@@ -281,6 +92,92 @@ static int test_value_overhead(
return TCL_OK;
}
+static u8 name_to_enc(Tcl_Interp *interp, Tcl_Obj *pObj){
+ struct EncName {
+ char *zName;
+ u8 enc;
+ } encnames[] = {
+ { "UTF8", SQLITE_UTF8 },
+ { "UTF16LE", SQLITE_UTF16LE },
+ { "UTF16BE", SQLITE_UTF16BE },
+ { "UTF16", SQLITE_UTF16NATIVE },
+ { 0, 0 }
+ };
+ struct EncName *pEnc;
+ char *z = Tcl_GetString(pObj);
+ for(pEnc=&encnames[0]; pEnc->zName; pEnc++){
+ if( 0==sqlite3StrICmp(z, pEnc->zName) ){
+ break;
+ }
+ }
+ if( !pEnc->enc ){
+ Tcl_AppendResult(interp, "No such encoding: ", z, 0);
+ }
+ return pEnc->enc;
+}
+
+static int test_translate(
+ void * clientData,
+ Tcl_Interp *interp,
+ int objc,
+ Tcl_Obj *CONST objv[]
+){
+ u8 enc_from;
+ u8 enc_to;
+ sqlite3_value *pVal;
+
+ const char *z;
+ int len;
+
+ if( objc!=4 ){
+ Tcl_AppendResult(interp, "wrong # args: should be \"",
+ Tcl_GetStringFromObj(objv[0], 0),
+ " <string/blob> <from enc> <to enc>", 0
+ );
+ return TCL_ERROR;
+ }
+
+ enc_from = name_to_enc(interp, objv[2]);
+ if( !enc_from ) return TCL_ERROR;
+ enc_to = name_to_enc(interp, objv[3]);
+ if( !enc_to ) return TCL_ERROR;
+
+ pVal = sqlite3ValueNew();
+
+ if( enc_from==SQLITE_UTF8 ){
+ z = Tcl_GetString(objv[1]);
+ sqlite3ValueSetStr(pVal, -1, z, enc_from, SQLITE_STATIC);
+ }else{
+ z = Tcl_GetByteArrayFromObj(objv[1], &len);
+ sqlite3ValueSetStr(pVal, -1, z, enc_from, SQLITE_STATIC);
+ }
+
+ z = sqlite3ValueText(pVal, enc_to);
+ len = sqlite3ValueBytes(pVal, enc_to) + (enc_to==SQLITE_UTF8?1:2);
+ Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(z, len));
+
+ sqlite3ValueFree(pVal);
+
+ return TCL_OK;
+}
+
+/*
+** Usage: translate_selftest
+**
+** Call sqlite3utfSelfTest() to run the internal tests for unicode
+** translation. If there is a problem an assert() will fail.
+**/
+void sqlite3utfSelfTest();
+static int test_translate_selftest(
+ void * clientData,
+ Tcl_Interp *interp,
+ int objc,
+ Tcl_Obj *CONST objv[]
+){
+ sqlite3utfSelfTest();
+ return SQLITE_OK;
+}
+
/*
** Register commands with the TCL interpreter.
@@ -290,13 +187,10 @@ int Sqlitetest5_Init(Tcl_Interp *interp){
char *zName;
Tcl_ObjCmdProc *xProc;
} aCmd[] = {
- { "sqlite_utf16to8", (Tcl_ObjCmdProc*)sqlite_utf16to8 },
- { "sqlite_utf8to16le", (Tcl_ObjCmdProc*)sqlite_utf8to16le },
- { "sqlite_utf8to16be", (Tcl_ObjCmdProc*)sqlite_utf8to16be },
- { "sqlite_utf16to16le", (Tcl_ObjCmdProc*)sqlite_utf16to16le },
- { "sqlite_utf16to16be", (Tcl_ObjCmdProc*)sqlite_utf16to16be },
{ "binarize", (Tcl_ObjCmdProc*)binarize },
{ "test_value_overhead", (Tcl_ObjCmdProc*)test_value_overhead },
+ { "test_translate", (Tcl_ObjCmdProc*)test_translate },
+ { "translate_selftest", (Tcl_ObjCmdProc*)test_translate_selftest},
};
int i;
for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){
diff --git a/src/tokenize.c b/src/tokenize.c
index aacf745d4..7f7e981fd 100644
--- a/src/tokenize.c
+++ b/src/tokenize.c
@@ -15,7 +15,7 @@
** individual tokens and sends those tokens one-by-one over to the
** parser for analysis.
**
-** $Id: tokenize.c,v 1.76 2004/05/31 23:56:43 danielk1977 Exp $
+** $Id: tokenize.c,v 1.77 2004/06/18 04:24:55 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "os.h"
@@ -701,10 +701,18 @@ int sqlite3_complete(const char *zSql){
** UTF-8.
*/
int sqlite3_complete16(const void *zSql){
- int rc;
- char *zSql8 = sqlite3utf16to8(zSql, -1, SQLITE_BIGENDIAN);
- if( !zSql8 ) return 0;
- rc = sqlite3_complete(zSql8);
- sqliteFree(zSql8);
+ sqlite3_value *pVal;
+ char *zSql8;
+ int rc = 0;
+
+ pVal = sqlite3ValueNew();
+ sqlite3ValueSetStr(pVal, -1, zSql, SQLITE_UTF16NATIVE, SQLITE_STATIC);
+ zSql8 = sqlite3ValueText(pVal, SQLITE_UTF8);
+ if( zSql8 ){
+ rc = sqlite3_complete(zSql8);
+ sqliteFree(zSql8);
+ }
+ sqlite3ValueFree(pVal);
return rc;
}
+
diff --git a/src/utf.c b/src/utf.c
index d257f3f48..98e13abf4 100644
--- a/src/utf.c
+++ b/src/utf.c
@@ -12,7 +12,7 @@
** This file contains routines used to translate between UTF-8,
** UTF-16, UTF-16BE, and UTF-16LE.
**
-** $Id: utf.c,v 1.20 2004/06/17 05:36:44 danielk1977 Exp $
+** $Id: utf.c,v 1.21 2004/06/18 04:24:55 danielk1977 Exp $
**
** Notes on UTF-8:
**
@@ -48,31 +48,19 @@
** When converting malformed UTF-16 strings to UTF-8, one instance of the
** replacement character U+FFFD for each pair of bytes that cannot be
** interpeted as part of a valid unicode character.
+**
+** This file contains the following public routines:
+**
+** sqlite3VdbeMemTranslate() - Translate the encoding used by a Mem* string.
+** sqlite3VdbeMemHandleBom() - Handle byte-order-marks in UTF16 Mem* strings.
+** sqlite3utf16ByteLen() - Calculate byte-length of a void* UTF16 string.
+** sqlite3utf8CharLen() - Calculate char-length of a char* UTF8 string.
+** sqlite3utf8LikeCompare() - Do a LIKE match given two UTF8 char* strings.
+**
*/
#include <assert.h>
#include "sqliteInt.h"
-
-typedef struct UtfString UtfString;
-struct UtfString {
- unsigned char *pZ; /* Raw string data */
- int n; /* Allocated length of pZ in bytes */
- int c; /* Number of pZ bytes already read or written */
-};
-
-/*
-** These two macros are used to interpret the first two bytes of the
-** unsigned char array pZ as a 16-bit unsigned int. BE16() for a big-endian
-** interpretation, LE16() for little-endian.
-*/
-#define BE16(pZ) (((u16)((pZ)[0])<<8) + (u16)((pZ)[1]))
-#define LE16(pZ) (((u16)((pZ)[1])<<8) + (u16)((pZ)[0]))
-
-/*
-** READ_16 interprets the first two bytes of the unsigned char array pZ
-** as a 16-bit unsigned int. If big_endian is non-zero the intepretation
-** is big-endian, otherwise little-endian.
-*/
-#define READ_16(pZ,big_endian) (big_endian?BE16(pZ):LE16(pZ))
+#include "vdbeInt.h"
/*
** The following macro, LOWERCASE(x), takes an integer representing a
@@ -96,353 +84,317 @@ static unsigned char UpperToLower[91] = {
};
/*
-** The first parameter, zStr, points at a unicode string. This routine
-** reads a single character from the string and returns the codepoint value
-** of the character read.
-**
-** The value of *pEnc is the string encoding. If *pEnc is SQLITE_UTF16LE or
-** SQLITE_UTF16BE, and the first character read is a byte-order-mark, then
-** the value of *pEnc is modified if necessary. In this case the next
-** character is read and it's code-point value returned.
-**
-** The value of *pOffset is the byte-offset in zStr from which to begin
-** reading. It is incremented by the number of bytes read by this function.
-**
-** If the fourth parameter, fold, is non-zero, then codepoint values are
-** folded to lower-case before being returned. See comments for macro
-** LOWERCASE(x) for details.
+** This table maps from the first byte of a UTF-8 character to the number
+** of trailing bytes expected. A value '255' indicates that the table key
+** is not a legal first byte for a UTF-8 character.
*/
-int sqlite3ReadUniChar(const char *zStr, int *pOffset, u8 *pEnc, int fold){
- int ret = 0;
-
- switch( *pEnc ){
- case SQLITE_UTF8: {
-
-#if 0
- static const int initVal[] = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
- 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
- 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
- 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
- 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
- 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
- 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
- 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
- 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
- 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
- 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
- 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 0, 1, 2,
- 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
- 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,
- 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 254,
- 255,
- };
- ret = initVal[(unsigned char)zStr[(*pOffset)++]];
- while( (0xc0&zStr[*pOffset])==0x80 ){
- ret = (ret<<6) | (0x3f&(zStr[(*pOffset)++]));
- }
-#endif
-
- struct Utf8TblRow {
- u8 b1_mask;
- u8 b1_masked_val;
- u8 b1_value_mask;
- int trailing_bytes;
- };
- static const struct Utf8TblRow utf8tbl[] = {
- { 0x80, 0x00, 0x7F, 0 },
- { 0xE0, 0xC0, 0x1F, 1 },
- { 0xF0, 0xE0, 0x0F, 2 },
- { 0xF8, 0xF0, 0x0E, 3 },
- { 0, 0, 0, 0}
- };
-
- u8 b1; /* First byte of the potentially multi-byte utf-8 character */
- int ii;
- struct Utf8TblRow const *pRow;
-
- pRow = &(utf8tbl[0]);
-
- b1 = zStr[(*pOffset)++];
- while( pRow->b1_mask && (b1&pRow->b1_mask)!=pRow->b1_masked_val ){
- pRow++;
- }
- if( !pRow->b1_mask ){
- return (int)0xFFFD;
- }
-
- ret = (u32)(b1&pRow->b1_value_mask);
- for( ii=0; ii<pRow->trailing_bytes; ii++ ){
- u8 b = zStr[(*pOffset)++];
- if( (b&0xC0)!=0x80 ){
- return (int)0xFFFD;
- }
- ret = (ret<<6) + (u32)(b&0x3F);
- }
- break;
- }
-
- case SQLITE_UTF16LE:
- case SQLITE_UTF16BE: {
- u32 code_point; /* the first code-point in the character */
- u32 code_point2; /* the second code-point in the character, if any */
-
- code_point = READ_16(&zStr[*pOffset], (*pEnc==SQLITE_UTF16BE));
- *pOffset += 2;
-
- /* If this is a non-surrogate code-point, just cast it to an int and
- ** this is the code-point value.
- */
- if( code_point<0xD800 || code_point>0xE000 ){
- ret = code_point;
- break;
- }
-
- /* If this is a trailing surrogate code-point, then the string is
- ** malformed; return the replacement character.
- */
- if( code_point>0xDBFF ){
- return (int)0xFFFD;
- }
-
- /* The code-point just read is a leading surrogate code-point. If their
- ** is not enough data left or the next code-point is not a trailing
- ** surrogate, return the replacement character.
- */
- code_point2 = READ_16(&zStr[*pOffset], (*pEnc==SQLITE_UTF16BE));
- *pOffset += 2;
- if( code_point2<0xDC00 || code_point>0xDFFF ){
- return (int)0xFFFD;
- }
-
- ret = (
- (((code_point&0x03C0)+0x0040)<<16) + /* uuuuu */
- ((code_point&0x003F)<<10) + /* xxxxxx */
- (code_point2&0x03FF) /* yy yyyyyyyy */
- );
- }
- default:
- assert(0);
- }
-
- if( fold ){
- return LOWERCASE(ret);
- }
- return ret;
-}
+static const u8 xtra_utf8_bytes[256] = {
+/* 0xxxxxxx */
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+/* 10wwwwww */
+255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+
+/* 110yyyyy */
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+/* 1110zzzz */
+2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+
+/* 11110yyy */
+3, 3, 3, 3, 3, 3, 3, 3, 255, 255, 255, 255, 255, 255, 255, 255,
+};
/*
-** Read the BOM from the start of *pStr, if one is present. Return zero
-** for little-endian, non-zero for big-endian. If no BOM is present, return
-** the value of the parameter "big_endian".
-**
-** Return values:
-** 1 -> big-endian string
-** 0 -> little-endian string
+** This table maps from the number of trailing bytes in a UTF-8 character
+** to an integer constant that is effectively calculated for each character
+** read by a naive implementation of a UTF-8 character reader. The code
+** in the READ_UTF8 macro explains things best.
*/
-static int readUtf16Bom(UtfString *pStr, int big_endian){
- /* The BOM must be the first thing read from the string */
- assert( pStr->c==0 );
-
- /* If the string data consists of 1 byte or less, the BOM will make no
- ** difference anyway. In this case just fall through to the default case
- ** and return the native byte-order for this machine.
- **
- ** Otherwise, check the first 2 bytes of the string to see if a BOM is
- ** present.
- */
- if( pStr->n>1 ){
- u8 bom = sqlite3UtfReadBom(pStr->pZ, 2);
- if( bom ){
- pStr->c += 2;
- return (bom==SQLITE_UTF16LE)?0:1;
- }
- }
+static const int xtra_utf8_bits[4] = {
+0,
+12416, /* (0xC0 << 6) + (0x80) */
+925824, /* (0xE0 << 12) + (0x80 << 6) + (0x80) */
+63447168 /* (0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
+};
- return big_endian;
+#define READ_UTF8(zIn, c) { \
+ int xtra; \
+ c = *(zIn)++; \
+ xtra = xtra_utf8_bytes[c]; \
+ switch( xtra ){ \
+ case 255: c = (int)0xFFFD; break; \
+ case 3: c = (c<<6) + *(zIn)++; \
+ case 2: c = (c<<6) + *(zIn)++; \
+ case 1: c = (c<<6) + *(zIn)++; \
+ c -= xtra_utf8_bits[xtra]; \
+ } \
}
-/*
-** zData is a UTF-16 encoded string, nData bytes in length. This routine
-** checks if there is a byte-order mark at the start of zData. If no
-** byte order mark is found 0 is returned. Otherwise SQLITE_UTF16BE or
-** SQLITE_UTF16LE is returned, depending on whether The BOM indicates that
-** the text is big-endian or little-endian.
-*/
-u8 sqlite3UtfReadBom(const void *zData, int nData){
- if( nData<0 || nData>1 ){
- u8 b1 = *(u8 *)zData;
- u8 b2 = *(((u8 *)zData) + 1);
- if( b1==0xFE && b2==0xFF ){
- return SQLITE_UTF16BE;
- }
- if( b1==0xFF && b2==0xFE ){
- return SQLITE_UTF16LE;
- }
- }
- return 0;
+#define SKIP_UTF8(zIn) { \
+ zIn += (xtra_utf8_bytes[*(u8 *)zIn] + 1); \
}
-
-/*
-** Read a single unicode character from the UTF-8 encoded string *pStr. The
-** value returned is a unicode scalar value. In the case of malformed
-** strings, the unicode replacement character U+FFFD may be returned.
-*/
-static u32 readUtf8(UtfString *pStr){
- u8 enc = SQLITE_UTF8;
- return sqlite3ReadUniChar(pStr->pZ, &pStr->c, &enc, 0);
+#define WRITE_UTF8(zOut, c) { \
+ if( c<0x00080 ){ \
+ *zOut++ = (c&0xFF); \
+ } \
+ else if( c<0x00800 ){ \
+ *zOut++ = 0xC0 + ((c>>6)&0x1F); \
+ *zOut++ = 0x80 + (c & 0x3F); \
+ } \
+ else if( c<0x10000 ){ \
+ *zOut++ = 0xE0 + ((c>>12)&0x0F); \
+ *zOut++ = 0x80 + ((c>>6) & 0x3F); \
+ *zOut++ = 0x80 + (c & 0x3F); \
+ }else{ \
+ *zOut++ = 0xF0 + ((c>>18) & 0x07); \
+ *zOut++ = 0x80 + ((c>>12) & 0x3F); \
+ *zOut++ = 0x80 + ((c>>6) & 0x3F); \
+ *zOut++ = 0x80 + (c & 0x3F); \
+ } \
}
-/*
-** Write the unicode character 'code' to the string pStr using UTF-8
-** encoding. SQLITE_NOMEM may be returned if sqlite3Malloc() fails.
-*/
-static int writeUtf8(UtfString *pStr, u32 code){
- struct Utf8WriteTblRow {
- u32 max_code;
- int trailing_bytes;
- u8 b1_and_mask;
- u8 b1_or_mask;
- };
- static const struct Utf8WriteTblRow utf8tbl[] = {
- {0x0000007F, 0, 0x7F, 0x00},
- {0x000007FF, 1, 0xDF, 0xC0},
- {0x0000FFFF, 2, 0xEF, 0xE0},
- {0x0010FFFF, 3, 0xF7, 0xF0},
- {0x00000000, 0, 0x00, 0x00}
- };
- const struct Utf8WriteTblRow *pRow = &utf8tbl[0];
-
- while( code>pRow->max_code ){
- assert( pRow->max_code );
- pRow++;
- }
+#define WRITE_UTF16LE(zOut, c) { \
+ if( c<=0xFFFF ){ \
+ *zOut++ = (c&0x00FF); \
+ *zOut++ = ((c>>8)&0x00FF); \
+ }else{ \
+ *zOut++ = (((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0)); \
+ *zOut++ = (0x00D8 + (((c-0x10000)>>18)&0x03)); \
+ *zOut++ = (c&0x00FF); \
+ *zOut++ = (0x00DC + ((c>>8)&0x03)); \
+ } \
+}
- /* Ensure there is enough room left in the output buffer to write
- ** this UTF-8 character.
- */
- assert( (pStr->n-pStr->c)>=(pRow->trailing_bytes+1) );
+#define WRITE_UTF16BE(zOut, c) { \
+ if( c<=0xFFFF ){ \
+ *zOut++ = ((c>>8)&0x00FF); \
+ *zOut++ = (c&0x00FF); \
+ }else{ \
+ *zOut++ = (0x00D8 + (((c-0x10000)>>18)&0x03)); \
+ *zOut++ = (((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0)); \
+ *zOut++ = (0x00DC + ((c>>8)&0x03)); \
+ *zOut++ = (c&0x00FF); \
+ } \
+}
- /* Write the UTF-8 encoded character to pStr. All cases below are
- ** intentionally fall-through.
- */
- switch( pRow->trailing_bytes ){
- case 3:
- pStr->pZ[pStr->c+3] = (((u8)code)&0x3F)|0x80;
- code = code>>6;
- case 2:
- pStr->pZ[pStr->c+2] = (((u8)code)&0x3F)|0x80;
- code = code>>6;
- case 1:
- pStr->pZ[pStr->c+1] = (((u8)code)&0x3F)|0x80;
- code = code>>6;
- case 0:
- pStr->pZ[pStr->c] = (((u8)code)&(pRow->b1_and_mask))|(pRow->b1_or_mask);
- }
- pStr->c += (pRow->trailing_bytes + 1);
+#define READ_UTF16LE(zIn, c){ \
+ c = (*zIn++); \
+ c += ((*zIn++)<<8); \
+ if( c>=0xD800 && c<=0xE000 ){ \
+ int c2 = (*zIn++); \
+ c2 += ((*zIn++)<<8); \
+ c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \
+ } \
+}
- return 0;
+#define READ_UTF16BE(zIn, c){ \
+ c = ((*zIn++)<<8); \
+ c += (*zIn++); \
+ if( c>=0xD800 && c<=0xE000 ){ \
+ int c2 = ((*zIn++)<<8); \
+ c2 += (*zIn++); \
+ c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \
+ } \
}
/*
-** Read a single unicode character from the UTF-16 encoded string *pStr. The
-** value returned is a unicode scalar value. In the case of malformed
-** strings, the unicode replacement character U+FFFD may be returned.
-**
-** If big_endian is true, the string is assumed to be UTF-16BE encoded.
-** Otherwise, it is UTF-16LE encoded.
-*/
-static u32 readUtf16(UtfString *pStr, int big_endian){
- u32 code_point; /* the first code-point in the character */
+** If the TRANSLATE_TRACE macro is defined, the value of each Mem is
+** printed on stderr on the way into and out of sqlite3VdbeMemTranslate().
+*/
+/* #define TRANSLATE_TRACE 1 */
- /* If there is only one byte of data left in the string, return the
- ** replacement character.
- */
- if( (pStr->n-pStr->c)==1 ){
- pStr->c++;
- return (int)0xFFFD;
+/*
+** This routine transforms the internal text encoding used by pMem to
+** desiredEnc. It is an error if the string is already of the desired
+** encoding, or if *pMem does not contain a string value.
+*/
+int sqlite3VdbeMemTranslate(Mem *pMem, u8 desiredEnc){
+ unsigned char zShort[NBFS]; /* Temporary short output buffer */
+ int len; /* Maximum length of output string in bytes */
+ unsigned char *zOut; /* Output buffer */
+ unsigned char *zIn; /* Input iterator */
+ unsigned char *zTerm; /* End of input */
+ unsigned char *z; /* Output iterator */
+ int c;
+
+ assert( pMem->flags&MEM_Str );
+ assert( pMem->enc!=desiredEnc );
+ assert( pMem->enc!=0 );
+ assert( pMem->n>=0 );
+
+#ifdef TRANSLATE_TRACE
+ {
+ char zBuf[100];
+ sqlite3VdbeMemPrettyPrint(pMem, zBuf, 100);
+ fprintf(stderr, "INPUT: %s\n", zBuf);
}
+#endif
- code_point = READ_16(&(pStr->pZ[pStr->c]), big_endian);
- pStr->c += 2;
-
- /* If this is a non-surrogate code-point, just cast it to an int and
- ** return the code-point value.
+ /* If the translation is between UTF-16 little and big endian, then
+ ** all that is required is to swap the byte order. This case is handled
+ ** differently from the others.
*/
- if( code_point<0xD800 || code_point>0xE000 ){
- return code_point;
+ if( pMem->enc!=SQLITE_UTF8 && desiredEnc!=SQLITE_UTF8 ){
+ u8 temp;
+ sqlite3VdbeMemMakeWriteable(pMem);
+ zIn = pMem->z;
+ zTerm = &zIn[pMem->n];
+ while( zIn<zTerm ){
+ temp = *zIn;
+ *zIn = *(zIn+1);
+ zIn++;
+ *zIn++ = temp;
+ }
+ pMem->enc = desiredEnc;
+ goto translate_out;
}
- /* If this is a trailing surrogate code-point, then the string is
- ** malformed; return the replacement character.
+ /* Set zIn to point at the start of the input buffer and zTerm to point 1
+ ** byte past the end.
+ **
+ ** Variable zOut is set to point at the output buffer. This may be space
+ ** obtained from malloc(), or Mem.zShort, if it large enough and not in
+ ** use, or the zShort array on the stack (see above).
*/
- if( code_point>0xDBFF ){
- return 0xFFFD;
+ zIn = pMem->z;
+ zTerm = &zIn[pMem->n];
+ len = pMem->n*2 + 2;
+ if( len>NBFS ){
+ zOut = sqliteMallocRaw(len);
+ if( !zOut ) return SQLITE_NOMEM;
+ }else{
+ if( pMem->z==pMem->zShort ){
+ zOut = zShort;
+ }else{
+ zOut = pMem->zShort;
+ }
}
-
- /* The code-point just read is a leading surrogate code-point. If their
- ** is not enough data left or the next code-point is not a trailing
- ** surrogate, return the replacement character.
- */
- if( (pStr->n-pStr->c)>1 ){
- u32 code_point2 = READ_16(&pStr->pZ[pStr->c], big_endian);
- if( code_point2<0xDC00 || code_point>0xDFFF ){
- return 0xFFFD;
+ z = zOut;
+
+ if( pMem->enc==SQLITE_UTF8 ){
+ if( desiredEnc==SQLITE_UTF16LE ){
+ /* UTF-8 -> UTF-16 Little-endian */
+ while( zIn<zTerm ){
+ READ_UTF8(zIn, c);
+ WRITE_UTF16LE(z, c);
+ }
+ WRITE_UTF16LE(z, 0);
+ pMem->n = (z-zOut)-2;
+ }else if( desiredEnc==SQLITE_UTF16BE ){
+ /* UTF-8 -> UTF-16 Big-endian */
+ while( zIn<zTerm ){
+ READ_UTF8(zIn, c);
+ WRITE_UTF16BE(z, c);
+ }
+ WRITE_UTF16BE(z, 0);
+ pMem->n = (z-zOut)-2;
}
- pStr->c += 2;
-
- return (
- (((code_point&0x03C0)+0x0040)<<16) + /* uuuuu */
- ((code_point&0x003F)<<10) + /* xxxxxx */
- (code_point2&0x03FF) /* yy yyyyyyyy */
- );
+ }else{
+ assert( desiredEnc==SQLITE_UTF8 );
+ if( pMem->enc==SQLITE_UTF16LE ){
+ /* UTF-16 Little-endian -> UTF-8 */
+ while( zIn<zTerm ){
+ READ_UTF16LE(zIn, c);
+ WRITE_UTF8(z, c);
+ }
+ WRITE_UTF8(z, 0);
+ pMem->n = (z-zOut)-1;
+ }else{
+ /* UTF-16 Little-endian -> UTF-8 */
+ while( zIn<zTerm ){
+ READ_UTF16BE(zIn, c);
+ WRITE_UTF8(z, c);
+ }
+ WRITE_UTF8(z, 0);
+ pMem->n = (z-zOut)-1;
+ }
+ }
+ sqlite3VdbeMemRelease(pMem);
+ pMem->flags &= ~(MEM_Static|MEM_Dyn|MEM_Ephem|MEM_Short);
+ pMem->enc = desiredEnc;
+ if( (char *)zOut==pMem->zShort ){
+ pMem->flags |= (MEM_Term|MEM_Short);
+ }else if( zOut==zShort ){
+ memcpy(pMem->zShort, zOut, len);
+ zOut = pMem->zShort;
+ pMem->flags |= (MEM_Term|MEM_Short);
}else{
- return (int)0xFFFD;
+ pMem->flags |= (MEM_Term|MEM_Dyn);
}
-
- /* not reached */
+ pMem->z = zOut;
+
+translate_out:
+#ifdef TRANSLATE_TRACE
+ {
+ char zBuf[100];
+ sqlite3VdbeMemPrettyPrint(pMem, zBuf, 100);
+ fprintf(stderr, "OUTPUT: %s\n", zBuf);
+ }
+#endif
+ return SQLITE_OK;
}
-static int writeUtf16(UtfString *pStr, int code, int big_endian){
- int bytes;
- unsigned char *hi_byte;
- unsigned char *lo_byte;
-
- bytes = (code>0x0000FFFF?4:2);
-
- /* Ensure there is enough room left in the output buffer to write
- ** this UTF-8 character.
- */
- assert( (pStr->n-pStr->c)>=bytes );
-
- /* Initialise hi_byte and lo_byte to point at the locations into which
- ** the MSB and LSB of the (first) 16-bit unicode code-point written for
- ** this character.
- */
- hi_byte = (big_endian?&pStr->pZ[pStr->c]:&pStr->pZ[pStr->c+1]);
- lo_byte = (big_endian?&pStr->pZ[pStr->c+1]:&pStr->pZ[pStr->c]);
+/*
+** This routine checks for a byte-order mark at the beginning of the
+** UTF-16 string stored in *pMem. If one is present, it is removed and
+** the encoding of the Mem adjusted. This routine does not do any
+** byte-swapping, it just sets Mem.enc appropriately.
+**
+** The allocation (static, dynamic etc.) and encoding of the Mem may be
+** changed by this function.
+*/
+int sqlite3VdbeMemHandleBom(Mem *pMem){
+ int rc = SQLITE_OK;
+ u8 bom = 0;
- if( bytes==2 ){
- *hi_byte = (u8)((code&0x0000FF00)>>8);
- *lo_byte = (u8)(code&0x000000FF);
- }else{
- u32 wrd;
- wrd = ((((code&0x001F0000)-0x00010000)+(code&0x0000FC00))>>10)|0x0000D800;
- *hi_byte = (u8)((wrd&0x0000FF00)>>8);
- *lo_byte = (u8)(wrd&0x000000FF);
-
- wrd = (code&0x000003FF)|0x0000DC00;
- *(hi_byte+2) = (u8)((wrd&0x0000FF00)>>8);
- *(lo_byte+2) = (u8)(wrd&0x000000FF);
+ if( pMem->n<0 || pMem->n>1 ){
+ u8 b1 = *(u8 *)pMem->z;
+ u8 b2 = *(((u8 *)pMem->z) + 1);
+ if( b1==0xFE && b2==0xFF ){
+ bom = SQLITE_UTF16BE;
+ }
+ if( b1==0xFF && b2==0xFE ){
+ bom = SQLITE_UTF16LE;
+ }
}
-
- pStr->c += bytes;
- return 0;
+ if( bom ){
+ if( pMem->flags & MEM_Short ){
+ memmove(pMem->zShort, &pMem->zShort[2], NBFS-2);
+ pMem->n -= 2;
+ pMem->enc = bom;
+ }
+ else if( pMem->flags & MEM_Dyn ){
+ void (*xDel)(void*) = pMem->xDel;
+ char *z = pMem->z;
+ pMem->z = 0;
+ pMem->xDel = 0;
+ rc = sqlite3VdbeMemSetStr(pMem, &z[2], pMem->n-2, bom, SQLITE_TRANSIENT);
+ if( xDel ){
+ xDel(z);
+ }else{
+ sqliteFree(z);
+ }
+ }else{
+ rc = sqlite3VdbeMemSetStr(pMem, &pMem->z[2], pMem->n-2, bom,
+ SQLITE_TRANSIENT);
+ }
+ }
+ return rc;
}
/*
@@ -452,22 +404,20 @@ static int writeUtf16(UtfString *pStr, int code, int big_endian){
** number of unicode characters in the first nByte of pZ (or up to
** the first 0x00, whichever comes first).
*/
-int sqlite3utf8CharLen(const char *pZ, int nByte){
- UtfString str;
- int ret = 0;
- u32 code = 1;
-
- str.pZ = (char *)pZ;
- str.n = nByte;
- str.c = 0;
-
- while( (nByte<0 || str.c<str.n) && code!=0 ){
- code = readUtf8(&str);
- ret++;
+int sqlite3utf8CharLen(const char *z, int nByte){
+ int r = 0;
+ const char *zTerm;
+ if( nByte>0 ){
+ zTerm = &z[nByte];
+ }else{
+ zTerm = (const char *)(-1);
}
- if( code==0 ) ret--;
-
- return ret;
+ assert( z<=zTerm );
+ while( *z!=0 && z<zTerm ){
+ SKIP_UTF8(z);
+ r++;
+ }
+ return r;
}
/*
@@ -477,242 +427,25 @@ int sqlite3utf8CharLen(const char *pZ, int nByte){
** then return the number of bytes in the first nChar unicode characters
** in pZ (or up until the first pair of 0x00 bytes, whichever comes first).
*/
-int sqlite3utf16ByteLen(const void *pZ, int nChar){
- if( nChar<0 ){
- const unsigned char *pC1 = (unsigned char *)pZ;
- const unsigned char *pC2 = (unsigned char *)pZ+1;
- while( *pC1 || *pC2 ){
- pC1 += 2;
- pC2 += 2;
+int sqlite3utf16ByteLen(const void *zIn, int nChar){
+ int c = 1;
+ char const *z = zIn;
+ int n = 0;
+ if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){
+ while( c && ((nChar<0) || n<nChar) ){
+ READ_UTF16BE(z, c);
+ n++;
}
- return pC1-(unsigned char *)pZ;
}else{
- UtfString str;
- u32 code = 1;
- int big_endian;
- int nRead = 0;
- int ret;
-
- str.pZ = (char *)pZ;
- str.c = 0;
- str.n = -1;
-
- /* Check for a BOM. We just ignore it if there is one, it's only read
- ** so that it is not counted as a character.
- */
- big_endian = readUtf16Bom(&str, 0);
- ret = 0-str.c;
-
- while( code!=0 && nRead<nChar ){
- code = readUtf16(&str, big_endian);
- nRead++;
+ while( c && ((nChar<0) || n<nChar) ){
+ READ_UTF16LE(z, c);
+ n++;
}
- if( code==0 ){
- ret -= 2;
- }
- return str.c + ret;
}
+ return (z-(char const *)zIn)-((c==0)?2:0);
}
/*
-** Convert a string in UTF-16 native byte (or with a Byte-order-mark or
-** "BOM") into a UTF-8 string. The UTF-8 string is written into space
-** obtained from sqlite3Malloc() and must be released by the calling function.
-**
-** The parameter N is the number of bytes in the UTF-16 string. If N is
-** negative, the entire string up to the first \u0000 character is translated.
-**
-** The returned UTF-8 string is always \000 terminated.
-*/
-unsigned char *sqlite3utf16to8(const void *pData, int N, int big_endian){
- UtfString in;
- UtfString out;
-
- out.pZ = 0;
-
- in.pZ = (unsigned char *)pData;
- in.n = N;
- in.c = 0;
-
- if( in.n<0 ){
- in.n = sqlite3utf16ByteLen(in.pZ, -1);
- }
-
- /* A UTF-8 encoding of a unicode string can require at most 1.5 times as
- ** much space to store as the same string encoded using UTF-16. Allocate
- ** this now.
- */
- out.n = (in.n*1.5) + 1;
- out.pZ = sqliteMalloc(out.n);
- if( !out.pZ ){
- return 0;
- }
- out.c = 0;
-
- big_endian = readUtf16Bom(&in, big_endian);
- while( in.c<in.n ){
- writeUtf8(&out, readUtf16(&in, big_endian));
- }
-
- /* Add the NULL-terminator character */
- assert( out.c<out.n );
- out.pZ[out.c] = 0x00;
-
- return out.pZ;
-}
-
-static void *utf8toUtf16(const unsigned char *pIn, int N, int big_endian){
- UtfString in;
- UtfString out;
-
- in.pZ = (unsigned char *)pIn;
- in.n = N;
- in.c = 0;
-
- if( in.n<0 ){
- in.n = strlen(in.pZ);
- }
-
- /* A UTF-16 encoding of a unicode string can require at most twice as
- ** much space to store as the same string encoded using UTF-8. Allocate
- ** this now.
- */
- out.n = (in.n*2) + 2;
- out.pZ = sqliteMalloc(out.n);
- if( !out.pZ ){
- return 0;
- }
- out.c = 0;
-
- while( in.c<in.n ){
- writeUtf16(&out, readUtf8(&in), big_endian);
- }
-
- /* Add the NULL-terminator character */
- assert( (out.c+1)<out.n );
- out.pZ[out.c] = 0x00;
- out.pZ[out.c+1] = 0x00;
-
- return out.pZ;
-}
-
-/*
-** Translate UTF-8 to UTF-16BE or UTF-16LE
-*/
-void *sqlite3utf8to16be(const unsigned char *pIn, int N){
- return utf8toUtf16(pIn, N, 1);
-}
-
-void *sqlite3utf8to16le(const unsigned char *pIn, int N){
- return utf8toUtf16(pIn, N, 0);
-}
-
-/*
-** This routine does the work for sqlite3utf16to16le() and
-** sqlite3utf16to16be(). If big_endian is 1 the input string is
-** transformed in place to UTF-16BE encoding. If big_endian is 0 then
-** the input is transformed to UTF-16LE.
-**
-** Unless the first two bytes of the input string is a BOM, the input is
-** assumed to be UTF-16 encoded using the machines native byte ordering.
-*/
-static void utf16to16(void *pData, int N, int big_endian){
- UtfString inout;
- inout.pZ = (unsigned char *)pData;
- inout.c = 0;
- inout.n = N;
-
- if( inout.n<0 ){
- inout.n = sqlite3utf16ByteLen(inout.pZ, -1);
- }
-
- if( readUtf16Bom(&inout, SQLITE_BIGENDIAN)!=big_endian ){
- /* swab(&inout.pZ[inout.c], inout.pZ, inout.n-inout.c); */
- int i;
- for(i=0; i<(inout.n-inout.c); i += 2){
- char c1 = inout.pZ[i+inout.c];
- char c2 = inout.pZ[i+inout.c+1];
- inout.pZ[i] = c2;
- inout.pZ[i+1] = c1;
- }
- }else if( inout.c ){
- memmove(inout.pZ, &inout.pZ[inout.c], inout.n-inout.c);
- }
-
- inout.pZ[inout.n-inout.c] = 0x00;
- inout.pZ[inout.n-inout.c+1] = 0x00;
-}
-
-/*
-** Convert a string in UTF-16 native byte or with a BOM into a UTF-16LE
-** string. The conversion occurs in-place. The output overwrites the
-** input. N bytes are converted. If N is negative everything is converted
-** up to the first \u0000 character.
-**
-** If the native byte order is little-endian and there is no BOM, then
-** this routine is a no-op. If there is a BOM at the start of the string,
-** it is removed.
-**
-** Translation from UTF-16LE to UTF-16BE and back again is accomplished
-** using the library function swab().
-*/
-void sqlite3utf16to16le(void *pData, int N){
- utf16to16(pData, N, 0);
-}
-
-/*
-** Convert a string in UTF-16 native byte or with a BOM into a UTF-16BE
-** string. The conversion occurs in-place. The output overwrites the
-** input. N bytes are converted. If N is negative everything is converted
-** up to the first \u0000 character.
-**
-** If the native byte order is little-endian and there is no BOM, then
-** this routine is a no-op. If there is a BOM at the start of the string,
-** it is removed.
-**
-** Translation from UTF-16LE to UTF-16BE and back again is accomplished
-** using the library function swab().
-*/
-void sqlite3utf16to16be(void *pData, int N){
- utf16to16(pData, N, 1);
-}
-
-/*
-** This function is used to translate between UTF-8 and UTF-16. The
-** result is returned in dynamically allocated memory.
-*/
-int sqlite3utfTranslate(
- const void *zData, int nData, /* Input string */
- u8 enc1, /* Encoding of zData */
- void **zOut, int *nOut, /* Output string */
- u8 enc2 /* Desired encoding of output */
-){
- assert( enc1==SQLITE_UTF8 || enc1==SQLITE_UTF16LE || enc1==SQLITE_UTF16BE );
- assert( enc2==SQLITE_UTF8 || enc2==SQLITE_UTF16LE || enc2==SQLITE_UTF16BE );
- assert(
- (enc1==SQLITE_UTF8 && (enc2==SQLITE_UTF16LE || enc2==SQLITE_UTF16BE)) ||
- (enc2==SQLITE_UTF8 && (enc1==SQLITE_UTF16LE || enc1==SQLITE_UTF16BE))
- );
-
- if( enc1==SQLITE_UTF8 ){
- if( enc2==SQLITE_UTF16LE ){
- *zOut = sqlite3utf8to16le(zData, nData);
- }else{
- *zOut = sqlite3utf8to16be(zData, nData);
- }
- if( !(*zOut) ) return SQLITE_NOMEM;
- *nOut = sqlite3utf16ByteLen(*zOut, -1);
- }else{
- *zOut = sqlite3utf16to8(zData, nData, enc1==SQLITE_UTF16BE);
- if( !(*zOut) ) return SQLITE_NOMEM;
- *nOut = strlen(*zOut);
- }
- return SQLITE_OK;
-}
-
-#define sqliteNextChar(X) while( (0xc0&*++(X))==0x80 ){}
-
-/*
** Compare two UTF-8 strings for equality using the "LIKE" operator of
** SQL. The '%' character matches any sequence of 0 or more
** characters and '_' matches any single character. Case is
@@ -731,7 +464,7 @@ int sqlite3utf8LikeCompare(
while( (c=zPattern[1]) == '%' || c == '_' ){
if( c=='_' ){
if( *zString==0 ) return 0;
- sqliteNextChar(zString);
+ SKIP_UTF8(zString);
}
zPattern++;
}
@@ -744,13 +477,13 @@ int sqlite3utf8LikeCompare(
}
if( c2==0 ) return 0;
if( sqlite3utf8LikeCompare(&zPattern[1],zString) ) return 1;
- sqliteNextChar(zString);
+ SKIP_UTF8(zString);
}
return 0;
}
case '_': {
if( *zString==0 ) return 0;
- sqliteNextChar(zString);
+ SKIP_UTF8(zString);
zPattern++;
break;
}
@@ -764,3 +497,50 @@ int sqlite3utf8LikeCompare(
}
return *zString==0;
}
+
+#ifndef NDEBUG
+/*
+** This routine is called from the TCL test function "translate_selftest".
+** It checks that the primitives for serializing and deserializing
+** characters in each encoding are inverses of each other.
+*/
+void sqlite3utfSelfTest(){
+ int i;
+ unsigned char zBuf[20];
+ unsigned char *z;
+ int n;
+ int c;
+
+ for(i=0; 0 && i<0x00110000; i++){
+ z = zBuf;
+ WRITE_UTF8(z, i);
+ n = z-zBuf;
+ z = zBuf;
+ READ_UTF8(z, c);
+ assert( c==i );
+ assert( (z-zBuf)==n );
+ }
+ for(i=0; i<0x00110000; i++){
+ if( i>=0xD800 && i<=0xE000 ) continue;
+ z = zBuf;
+ WRITE_UTF16LE(z, i);
+ n = z-zBuf;
+ z = zBuf;
+ READ_UTF16LE(z, c);
+ assert( c==i );
+ assert( (z-zBuf)==n );
+ }
+ for(i=0; i<0x00110000; i++){
+ if( i>=0xD800 && i<=0xE000 ) continue;
+ z = zBuf;
+ WRITE_UTF16BE(z, i);
+ n = z-zBuf;
+ z = zBuf;
+ READ_UTF16BE(z, c);
+ assert( c==i );
+ assert( (z-zBuf)==n );
+ }
+}
+#endif
+
+
diff --git a/src/util.c b/src/util.c
index 002bc2d35..29cd56112 100644
--- a/src/util.c
+++ b/src/util.c
@@ -14,7 +14,7 @@
** This file contains functions for allocating memory, comparing
** strings, and stuff like that.
**
-** $Id: util.c,v 1.102 2004/06/16 07:45:29 danielk1977 Exp $
+** $Id: util.c,v 1.103 2004/06/18 04:24:55 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <stdarg.h>
@@ -256,6 +256,13 @@ char *sqlite3StrNDup_(const char *z, int n, char *zFile, int line){
}
return zNew;
}
+
+/*
+** A version of sqliteFree that is always a function, not a macro.
+*/
+void sqlite3FreeX(void *p){
+ sqliteFree(p);
+}
#endif /* SQLITE_DEBUG */
/*
@@ -446,23 +453,18 @@ void sqlite3SetNString(char **pz, ...){
** to NULL.
*/
void sqlite3Error(sqlite *db, int err_code, const char *zFormat, ...){
- /* Free any existing error message. */
- if( db->zErrMsg ){
- sqliteFree(db->zErrMsg);
- db->zErrMsg = 0;
- }
- if( db->zErrMsg16 ){
- sqliteFree(db->zErrMsg16);
- db->zErrMsg16 = 0;
- }
-
- /* Set the new error code and error message. */
- db->errCode = err_code;
- if( zFormat ){
- va_list ap;
- va_start(ap, zFormat);
- db->zErrMsg = sqlite3VMPrintf(zFormat, ap);
- va_end(ap);
+ if( db && (db->pErr || (db->pErr = sqlite3ValueNew())) ){
+ db->errCode = err_code;
+ if( zFormat ){
+ char *z;
+ va_list ap;
+ va_start(ap, zFormat);
+ z = sqlite3VMPrintf(zFormat, ap);
+ va_end(ap);
+ sqlite3ValueSetStr(db->pErr, -1, z, SQLITE_UTF8, sqlite3FreeX);
+ }else{
+ sqlite3ValueSetStr(db->pErr, 0, 0, SQLITE_UTF8, SQLITE_STATIC);
+ }
}
}
diff --git a/src/vdbe.c b/src/vdbe.c
index 979889d65..ae7a38c9a 100644
--- a/src/vdbe.c
+++ b/src/vdbe.c
@@ -43,7 +43,7 @@
** in this file for details. If in doubt, do not deviate from existing
** commenting and indentation practices when changing or adding code.
**
-** $Id: vdbe.c,v 1.378 2004/06/17 07:53:03 danielk1977 Exp $
+** $Id: vdbe.c,v 1.379 2004/06/18 04:24:55 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "os.h"
@@ -361,10 +361,12 @@ static void applyAffinity(Mem *pRec, char affinity, u8 enc){
** Write a nice string representation of the contents of cell pMem
** into buffer zBuf, length nBuf.
*/
-void prettyPrintMem(Mem *pMem, char *zBuf, int nBuf){
+void sqlite3VdbeMemPrettyPrint(Mem *pMem, char *zBuf, int nBuf){
char *zCsr = zBuf;
int f = pMem->flags;
+ static const char *encnames[] = {"(X)", "(8)", "(16LE)", "(16BE)"};
+
if( f&MEM_Blob ){
int i;
char c;
@@ -414,11 +416,6 @@ void prettyPrintMem(Mem *pMem, char *zBuf, int nBuf){
zBuf[k++] = '[';
for(j=0; j<15 && j<pMem->n; j++){
u8 c = pMem->z[j];
-/*
- if( c==0 && j==pMem->n-1 ) break;
- zBuf[k++] = "0123456789ABCDEF"[c>>4];
- zBuf[k++] = "0123456789ABCDEF"[c&0xf];
-*/
if( c>=0x20 && c<0x7f ){
zBuf[k++] = c;
}else{
@@ -426,14 +423,10 @@ void prettyPrintMem(Mem *pMem, char *zBuf, int nBuf){
}
}
zBuf[k++] = ']';
+ k += sprintf(&zBuf[k], encnames[pMem->enc]);
zBuf[k++] = 0;
}
}
-
-/* Temporary - this is useful in conjunction with prettyPrintMem whilst
-** debugging.
-*/
-char zGdbBuf[100];
#endif
@@ -734,20 +727,20 @@ case OP_String8: {
pOp->opcode = OP_String;
if( db->enc!=SQLITE_UTF8 && pOp->p3 ){
- char *z = pOp->p3;
- if( db->enc==SQLITE_UTF16LE ){
- pOp->p3 = sqlite3utf8to16le(z, -1);
- }else{
- pOp->p3 = sqlite3utf8to16be(z, -1);
- }
+ pTos++;
+ sqlite3VdbeMemSetStr(pTos, pOp->p3, -1, SQLITE_UTF8, SQLITE_STATIC);
+ if( SQLITE_OK!=sqlite3VdbeChangeEncoding(pTos, db->enc) ) goto no_mem;
+ if( SQLITE_OK!=sqlite3VdbeMemDynamicify(pTos) ) goto no_mem;
+ pTos->flags &= ~(MEM_Dyn);
+ pTos->flags |= MEM_Static;
if( pOp->p3type==P3_DYNAMIC ){
- sqliteFree(z);
+ sqliteFree(pOp->p3);
}
pOp->p3type = P3_DYNAMIC;
- if( !pOp->p3 ) goto no_mem;
+ pOp->p3 = pTos->z;
+ break;
}
-
- /* Fall through to the next case, OP_String */
+ /* Otherwise fall through to the next case, OP_String */
}
/* Opcode: String * * P3
@@ -4590,7 +4583,7 @@ default: {
fprintf(p->trace, " r:%g", pTos[i].r);
}else{
char zBuf[100];
- prettyPrintMem(&pTos[i], zBuf, 100);
+ sqlite3VdbeMemPrettyPrint(&pTos[i], zBuf, 100);
fprintf(p->trace, " ");
fprintf(p->trace, zBuf);
}
diff --git a/src/vdbeInt.h b/src/vdbeInt.h
index ce5244fda..eed4f5a39 100644
--- a/src/vdbeInt.h
+++ b/src/vdbeInt.h
@@ -390,3 +390,5 @@ void sqlite3VdbeMemRelease(Mem *p);
#ifndef NDEBUG
void sqlite3VdbeMemSanity(Mem*, u8);
#endif
+int sqlite3VdbeMemTranslate(Mem*, u8);
+void sqlite3VdbeMemPrettyPrint(Mem *pMem, char *zBuf, int nBuf);
diff --git a/src/vdbeapi.c b/src/vdbeapi.c
index dff0a10ad..fde3b1cd2 100644
--- a/src/vdbeapi.c
+++ b/src/vdbeapi.c
@@ -518,20 +518,7 @@ int sqlite3_bind_text16(
}
pVar = &p->apVar[i-1];
- /* There may or may not be a byte order mark at the start of the UTF-16.
- ** Either way set 'txt_enc' to the SQLITE_UTF16* value indicating the
- ** actual byte order used by this string. If the string does happen
- ** to contain a BOM, then move zData so that it points to the first
- ** byte after the BOM.
- */
- txt_enc = sqlite3UtfReadBom(zData, nData);
- if( txt_enc ){
- zData = (void *)(((u8 *)zData) + 2);
- nData -= 2;
- }else{
- txt_enc = SQLITE_BIGENDIAN?SQLITE_UTF16BE:SQLITE_UTF16LE;
- }
- rc = sqlite3VdbeMemSetStr(pVar, zData, nData, txt_enc, xDel);
+ rc = sqlite3VdbeMemSetStr(pVar, zData, nData, SQLITE_UTF16NATIVE, xDel);
if( rc ){
return rc;
}
diff --git a/src/vdbemem.c b/src/vdbemem.c
index 6becf6f87..8c5891dec 100644
--- a/src/vdbemem.c
+++ b/src/vdbemem.c
@@ -21,63 +21,23 @@
#include "vdbeInt.h"
/*
-** If pMem is a string object, this routine sets the encoding of the string
-** (to one of UTF-8 or UTF16) and whether or not the string is
-** nul-terminated. If pMem is not a string object, then this routine is
-** a no-op.
+** If pMem is an object with a valid string representation, this routine
+** ensures the internal encoding for the string representation is
+** 'desiredEnc', one of SQLITE_UTF8, SQLITE_UTF16LE or SQLITE_UTF16BE.
**
-** The second argument, "desiredEnc" is one of TEXT_Utf8, TEXT_Utf16le
-** or TEXT_Utf16be. This routine changes the encoding of pMem to match
-** desiredEnc.
+** If pMem is not a string object, or the encoding of the string
+** representation is already stored using the requested encoding, then this
+** routine is a no-op.
**
** SQLITE_OK is returned if the conversion is successful (or not required).
** SQLITE_NOMEM may be returned if a malloc() fails during conversion
** between formats.
*/
int sqlite3VdbeChangeEncoding(Mem *pMem, int desiredEnc){
- /* If this is not a string, or if it is a string but the encoding is
- ** already correct, do nothing. */
if( !(pMem->flags&MEM_Str) || pMem->enc==desiredEnc ){
return SQLITE_OK;
}
-
- if( pMem->enc==SQLITE_UTF8 || desiredEnc==SQLITE_UTF8 ){
- /* If the current encoding does not match the desired encoding, then
- ** we will need to do some translation between encodings.
- */
- char *z;
- int n;
- int rc;
-
- rc = sqlite3utfTranslate(pMem->z, pMem->n, pMem->enc, (void **)&z,
- &n, desiredEnc);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- sqlite3VdbeMemRelease(pMem);
-
- /* Result of sqlite3utfTranslate is currently always dynamically
- ** allocated and nul terminated. This might be altered as a performance
- ** enhancement later.
- */
- pMem->z = z;
- pMem->n = n;
- pMem->flags &= ~(MEM_Ephem | MEM_Short | MEM_Static);
- pMem->flags |= MEM_Str | MEM_Dyn | MEM_Term;
- pMem->xDel = 0;
- }else{
- /* Must be translating between UTF-16le and UTF-16be. */
- int i;
- u8 *pFrom, *pTo;
- sqlite3VdbeMemMakeWriteable(pMem);
- for(i=0, pFrom=pMem->z, pTo=&pMem->z[1]; i<pMem->n; i+=2, pFrom+=2,pTo+=2){
- u8 temp = *pFrom;
- *pFrom = *pTo;
- *pTo = temp;
- }
- }
- pMem->enc = desiredEnc;
- return SQLITE_OK;
+ return sqlite3VdbeMemTranslate(pMem, desiredEnc);
}
/*
@@ -405,16 +365,19 @@ int sqlite3VdbeMemSetStr(
case SQLITE_UTF16LE:
case SQLITE_UTF16BE:
pMem->flags |= MEM_Str;
- if( n<0 ){
- pMem->n = sqlite3utf16ByteLen(z,-1);
+ if( pMem->n<0 ){
+ pMem->n = sqlite3utf16ByteLen(pMem->z,-1);
pMem->flags |= MEM_Term;
}
+ if( sqlite3VdbeMemHandleBom(pMem) ){
+ return SQLITE_NOMEM;
+ }
break;
default:
assert(0);
}
- if( xDel==SQLITE_TRANSIENT ){
+ if( pMem->flags&MEM_Ephem ){
return sqlite3VdbeMemMakeWriteable(pMem);
}
return SQLITE_OK;
@@ -498,11 +461,9 @@ int sqlite3MemCompare(const Mem *pMem1, const Mem *pMem2, const CollSeq *pColl){
assert( pMem1->enc==SQLITE_UTF8 ||
pMem1->enc==SQLITE_UTF16LE || pMem1->enc==SQLITE_UTF16BE );
- /* FIX ME: This may fail if the collation sequence is deleted after
- ** this vdbe program is compiled. We cannot just use BINARY in this
- ** case as this may lead to a segfault caused by traversing an index
- ** table incorrectly. We need to return an error to the user in this
- ** case.
+ /* This assert may fail if the collation sequence is deleted after this
+ ** vdbe program is compiled. The documentation defines this as an
+ ** undefined condition. A crash is usual result.
*/
assert( !pColl || pColl->xCmp );
@@ -645,22 +606,17 @@ void sqlite3VdbeMemSanity(Mem *pMem, u8 db_enc){
** SQLITE_UTF8.
*/
const void *sqlite3ValueText(sqlite3_value* pVal, u8 enc){
+ if( !pVal ) return 0;
assert( enc==SQLITE_UTF16LE || enc==SQLITE_UTF16BE || enc==SQLITE_UTF8);
+
if( pVal->flags&MEM_Null ){
- /* For a NULL return a NULL Pointer */
return 0;
}
-
if( pVal->flags&MEM_Str ){
- /* If there is already a string representation, make sure it is in
- ** encoded in the required UTF-16 byte order.
- */
sqlite3VdbeChangeEncoding(pVal, enc);
}else if( !(pVal->flags&MEM_Blob) ){
- /* Otherwise, unless this is a blob, convert it to a UTF-16 string */
sqlite3VdbeMemStringify(pVal, enc);
}
-
return (const void *)(pVal->z);
}
@@ -673,12 +629,19 @@ sqlite3_value* sqlite3ValueNew(){
return p;
}
-void sqlite3ValueSetStr(sqlite3_value *v, int n, const void *z, u8 enc){
- sqlite3VdbeMemSetStr((Mem *)v, z, n, enc, SQLITE_STATIC);
+void sqlite3ValueSetStr(
+ sqlite3_value *v,
+ int n,
+ const void *z,
+ u8 enc,
+ void (*xDel)(void*)
+){
+ if( v ) sqlite3VdbeMemSetStr((Mem *)v, z, n, enc, xDel);
}
void sqlite3ValueFree(sqlite3_value *v){
- sqlite3ValueSetStr(v, 0, 0, SQLITE_UTF8);
+ if( !v ) return;
+ sqlite3ValueSetStr(v, 0, 0, SQLITE_UTF8, SQLITE_STATIC);
sqliteFree(v);
}