aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordrh <>2025-02-22 23:18:38 +0000
committerdrh <>2025-02-22 23:18:38 +0000
commit4d70dbad57c2099aac75a46b3dfb646df09587aa (patch)
tree77701ac288691de877235209860b3119f758d89f /src
parentc071c47b51f39e2a186dbb87e1b7347d5590986d (diff)
downloadsqlite-4d70dbad57c2099aac75a46b3dfb646df09587aa.tar.gz
sqlite-4d70dbad57c2099aac75a46b3dfb646df09587aa.zip
Prototype implementation of the unistr() SQL function.
FossilOrigin-Name: 7cc302de05ed2a973372c05f55b048bf99af3d2590dd29f6fd0f379fb451aa0e
Diffstat (limited to 'src')
-rw-r--r--src/func.c127
1 files changed, 127 insertions, 0 deletions
diff --git a/src/func.c b/src/func.c
index 5784b75e2..c80ac915a 100644
--- a/src/func.c
+++ b/src/func.c
@@ -1150,6 +1150,132 @@ void sqlite3QuoteValue(StrAccum *pStr, sqlite3_value *pValue){
}
/*
+** Write a single UTF8 character whose value if v into the
+** buffer starting at zOut. Return the number of bytes needed
+** to encode that character.
+*/
+static int appendOneUtf8Char(char *zOut, u32 v){
+ if( v<0x00080 ){
+ zOut[0] = (u8)(v & 0xff);
+ return 1;
+ }
+ if( v<0x00800 ){
+ zOut[0] = 0xc0 + (u8)((v>>6) & 0x1f);
+ zOut[1] = 0x80 + (u8)(v & 0x3f);
+ return 2;
+ }
+ if( v<0x10000 ){
+ zOut[0] = 0xe0 + (u8)((v>>12) & 0x0f);
+ zOut[1] = 0x80 + (u8)((v>>6) & 0x3f);
+ zOut[2] = 0x80 + (u8)(v & 0x3f);
+ return 3;
+ }
+ zOut[0] = 0xf0 + (u8)((v>>18) & 0x07);
+ zOut[1] = 0x80 + (u8)((v>>12) & 0x3f);
+ zOut[2] = 0x80 + (u8)((v>>6) & 0x3f);
+ zOut[3] = 0x80 + (u8)(v & 0x3f);
+ return 4;
+}
+
+/*
+** Return true if z[] begins with N hexadecimal digits, and write
+** a decoding of those digits into *pVal. Or return false if any
+** one of the first N characters in z[] is not a hexadecimal digit.
+*/
+static int isNHex(const char *z, int N, u32 *pVal){
+ int i;
+ int v = 0;
+ for(i=0; i<N; i++){
+ if( !sqlite3Isxdigit(z[i]) ) return 0;
+ v = (v<<4) + sqlite3HexToInt(z[i]);
+ }
+ *pVal = v;
+ return 1;
+}
+
+/*
+** Implementation of the UNISTR() function.
+**
+** This is intended to be a work-alike of the UNISTR() function in
+** PostgreSQL. Quoting from the PG documentation (PostgreSQL 17 -
+** scraped on 2025-02-22):
+**
+** Evaluate escaped Unicode characters in the argument. Unicode
+** characters can be specified as \XXXX (4 hexadecimal digits),
+** \+XXXXXX (6 hexadecimal digits), \uXXXX (4 hexadecimal digits),
+** or \UXXXXXXXX (8 hexadecimal digits). To specify a backslash,
+** write two backslashes. All other characters are taken literally.
+*/
+static void unistrFunc(
+ sqlite3_context *context,
+ int argc,
+ sqlite3_value **argv
+){
+ char *zOut;
+ const char *zIn;
+ int nIn;
+ int i, j, n;
+ u32 v;
+
+ assert( argc==1 );
+ zIn = (const char*)sqlite3_value_text(argv[0]);
+ if( zIn==0 ) return;
+ nIn = sqlite3_value_bytes(argv[0]);
+ zOut = sqlite3_malloc64(nIn+1);
+ if( zOut==0 ){
+ sqlite3_result_error_nomem(context);
+ return;
+ }
+ i = j = 0;
+ while( i<nIn ){
+ char *z = strchr(&zIn[i],'\\');
+ if( z==0 ){
+ n = nIn - i;
+ memmove(&zOut[j], &zIn[i], n);
+ j += n;
+ break;
+ }
+ n = z - &zIn[i];
+ if( n>0 ){
+ memmove(&zOut[j], &zIn[i], n);
+ j += n;
+ i += n;
+ }
+ if( zIn[i+1]=='\\' ){
+ i += 2;
+ zOut[j++] = '\\';
+ }else if( sqlite3Isxdigit(zIn[i+1]) ){
+ if( !isNHex(&zIn[i+1], 4, &v) ) goto unistr_error;
+ i += 5;
+ j += appendOneUtf8Char(&zOut[j], v);
+ }else if( zIn[i+1]=='+' ){
+ if( !isNHex(&zIn[i+2], 6, &v) ) goto unistr_error;
+ i += 8;
+ j += appendOneUtf8Char(&zOut[j], v);
+ }else if( zIn[i+1]=='u' ){
+ if( !isNHex(&zIn[i+2], 4, &v) ) goto unistr_error;
+ i += 6;
+ j += appendOneUtf8Char(&zOut[j], v);
+ }else if( zIn[i+1]=='U' ){
+ if( !isNHex(&zIn[i+2], 8, &v) ) goto unistr_error;
+ i += 10;
+ j += appendOneUtf8Char(&zOut[j], v);
+ }else{
+ goto unistr_error;
+ }
+ }
+ zOut[j] = 0;
+ sqlite3_result_text64(context, zOut, j, sqlite3_free, SQLITE_UTF8);
+ return;
+
+unistr_error:
+ sqlite3_free(zOut);
+ sqlite3_result_error(context, "invalid Unicode escape", -1);
+ return;
+}
+
+
+/*
** Implementation of the QUOTE() function.
**
** The quote(X) function returns the text of an SQL literal which is the
@@ -2736,6 +2862,7 @@ void sqlite3RegisterBuiltinFunctions(void){
DFUNCTION(sqlite_version, 0, 0, 0, versionFunc ),
DFUNCTION(sqlite_source_id, 0, 0, 0, sourceidFunc ),
FUNCTION(sqlite_log, 2, 0, 0, errlogFunc ),
+ FUNCTION(unistr, 1, 0, 0, unistrFunc ),
FUNCTION(quote, 1, 0, 0, quoteFunc ),
VFUNCTION(last_insert_rowid, 0, 0, 0, last_insert_rowid),
VFUNCTION(changes, 0, 0, 0, changes ),