aboutsummaryrefslogtreecommitdiff
path: root/src/utf.c
diff options
context:
space:
mode:
authordrh <>2025-02-25 11:47:34 +0000
committerdrh <>2025-02-25 11:47:34 +0000
commita357a90f12e927ec169168cd89e54dc4fa905717 (patch)
tree17d1145c01c157b77094b27a4ae59765ed9b6b2d /src/utf.c
parentc46fbec350c4a0a71410bd32384e59ac27a799d5 (diff)
downloadsqlite-a357a90f12e927ec169168cd89e54dc4fa905717.tar.gz
sqlite-a357a90f12e927ec169168cd89e54dc4fa905717.zip
Consolidate two different UTF8 encoders into a single subroutine.
FossilOrigin-Name: 6208e494858b9d362efc7db4e8aac6f8e93fe51d2e038c94dfa97c55a74688a0
Diffstat (limited to 'src/utf.c')
-rw-r--r--src/utf.c29
1 files changed, 29 insertions, 0 deletions
diff --git a/src/utf.c b/src/utf.c
index c934bb234..57700bf20 100644
--- a/src/utf.c
+++ b/src/utf.c
@@ -106,6 +106,35 @@ static const unsigned char sqlite3Utf8Trans1[] = {
}
/*
+** Write a single UTF8 character whose value is v into the
+** buffer starting at zOut. zOut must be sized to hold at
+** least for bytes. Return the number of bytes needed
+** to encode the new character.
+*/
+int sqlite3AppendOneUtf8Character(char *zOut, u32 v){
+ if( v<0x00080 ){
+ zOut[0] = (u8)(v & 0xff);
+ return 1;
+ }
+ if( v<0x00800 ){
+ zOut[0] = 0xc0 + (u8)((v>>6) & 0x1f);
+ zOut[1] = 0x80 + (u8)(v & 0x3f);
+ return 2;
+ }
+ if( v<0x10000 ){
+ zOut[0] = 0xe0 + (u8)((v>>12) & 0x0f);
+ zOut[1] = 0x80 + (u8)((v>>6) & 0x3f);
+ zOut[2] = 0x80 + (u8)(v & 0x3f);
+ return 3;
+ }
+ zOut[0] = 0xf0 + (u8)((v>>18) & 0x07);
+ zOut[1] = 0x80 + (u8)((v>>12) & 0x3f);
+ zOut[2] = 0x80 + (u8)((v>>6) & 0x3f);
+ zOut[3] = 0x80 + (u8)(v & 0x3f);
+ return 4;
+}
+
+/*
** Translate a single UTF-8 character. Return the unicode value.
**
** During translation, assume that the byte that zTerm points