aboutsummaryrefslogtreecommitdiff
path: root/src/utf.c
diff options
context:
space:
mode:
authordrh <drh@noemail.net>2004-05-04 15:00:46 +0000
committerdrh <drh@noemail.net>2004-05-04 15:00:46 +0000
commita5d14fe7c5285cfac5c9a9aa90b7e61664b7be3c (patch)
treeb311c6578084e8b7afa03403c228f9e740bd1f4b /src/utf.c
parent9102529d8c62d457190d052d07314d4d4dcb56a2 (diff)
downloadsqlite-a5d14fe7c5285cfac5c9a9aa90b7e61664b7be3c.tar.gz
sqlite-a5d14fe7c5285cfac5c9a9aa90b7e61664b7be3c.zip
Added template for the utf.c file containing conversion routines. (CVS 1313)
FossilOrigin-Name: 89b42c468f437003f74a1785370e75b2585fa9e2
Diffstat (limited to 'src/utf.c')
-rw-r--r--src/utf.c77
1 files changed, 77 insertions, 0 deletions
diff --git a/src/utf.c b/src/utf.c
new file mode 100644
index 000000000..6990553e0
--- /dev/null
+++ b/src/utf.c
@@ -0,0 +1,77 @@
+/*
+** 2004 April 13
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains routines used to translate between UTF-8,
+** UTF-16, UTF-16BE, and UTF-16LE.
+**
+** $Id: utf.c,v 1.1 2004/05/04 15:00:47 drh Exp $
+**
+** Notes on UTF-8:
+**
+** Byte-0 Byte-1 Byte-2 Byte-3 Value
+** 0xxxxxxx 00000000 00000000 0xxxxxxx
+** 110yyyyy 10xxxxxx 00000000 00000yyy yyxxxxxx
+** 1110zzzz 10yyyyyy 10xxxxxx 00000000 zzzzyyyy yyxxxxxx
+** 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx 000uuuuu zzzzyyyy yyxxxxxx
+**
+**
+** Notes on UTF-16: (with wwww+1==uuuuu)
+**
+** Word-0 Word-1 Value
+** 110110wwwwxxxxxx 110111yyyyyyyyyy 000uuuuu xxxxxxyy yyyyyyyy
+** xxxxxxxxyyyyyyyy 00000000 xxxxxxxx yyyyyyyy
+**
+** BOM or Byte Order Mark:
+** 0xff 0xfe little-endian utf-16 follows
+** 0xfe 0xff big-endian utf-16 follows
+*/
+
+/*
+** Convert a string in UTF-16 native byte (or with a Byte-order-mark or
+** "BOM") into a UTF-8 string. The UTF-8 string is written into space
+** obtained from sqlit3Malloc() and must be released by the calling function.
+**
+** The parameter N is the number of bytes in the UTF-16 string. If N is
+** negative, the entire string up to the first \u0000 character is translated.
+**
+** The returned UTF-8 string is always \000 terminated.
+*/
+unsigned char *sqlite3utf16to8(const void *pData, int N){
+ unsigned char *in = (unsigned char *)pData;
+}
+
+/*
+** Convert a string in UTF-16 native byte or with a BOM into a UTF-16LE
+** string. The conversion occurs in-place. The output overwrites the
+** input. N bytes are converted. If N is negative everything is converted
+** up to the first \u0000 character.
+**
+** If the native byte order is little-endian and there is no BOM, then
+** this routine is a no-op. If there is a BOM at the start of the string,
+** it is removed.
+*/
+void sqlite3utf16to16le(void *pData, int N){
+}
+void sqlite3utf16to16be(void *pData, int N){
+}
+
+/*
+** Translation from UTF-16LE to UTF-16BE and back again is accomplished
+** using the library function swab().
+*/
+
+/*
+** Translate UTF-8 to UTF-16BE or UTF-16LE
+*/
+void *sqlite3utf8to16be(const unsigned char *pIn, int N){
+}
+void *sqlite3utf8to16le(const unsigned char *pIn, int N){
+}