aboutsummaryrefslogtreecommitdiff
path: root/test/regexp1.test
diff options
context:
space:
mode:
authordrh <>2022-07-03 14:25:47 +0000
committerdrh <>2022-07-03 14:25:47 +0000
commitf28727f61e270ecbd58e52318d3895990ff66fe5 (patch)
treecbad8ba0c9ce4d08c1183835fa4403aa524ce96f /test/regexp1.test
parenta57ac0a82738d5da8177c276f0904fe0089fad81 (diff)
downloadsqlite-f28727f61e270ecbd58e52318d3895990ff66fe5.tar.gz
sqlite-f28727f61e270ecbd58e52318d3895990ff66fe5.zip
Fix the initial-prefix optimization for the REGEXP extension such that it
works even if the prefix contains characters that require a 3-byte UTF8 encoding. This should fix the problem reported by [forum:/forumpost/96692f8ba5|forum post 96692f8ba5]. FossilOrigin-Name: c94595a6e15490b432f099fefbe2429fa19287f7bdc86332cba0fd1e08f65bd6
Diffstat (limited to 'test/regexp1.test')
-rw-r--r--test/regexp1.test22
1 files changed, 22 insertions, 0 deletions
diff --git a/test/regexp1.test b/test/regexp1.test
index 1eb56c672..569dd66c2 100644
--- a/test/regexp1.test
+++ b/test/regexp1.test
@@ -239,4 +239,26 @@ do_execsql_test regexp1-2.22 {
SELECT 'abc$¢€xyz' REGEXP '^abc[^\u0025-X][^ -\u007f][^\u20ab]xyz$'
} {1}
+# 2022-07-03
+# https://sqlite.org/forum/forumpost/96692f8ba5
+# The REGEXP extension mishandles the prefix search optimization when
+# the prefix contains 3-byte UTF8 characters.
+#
+reset_db
+load_static_extension db regexp
+do_execsql_test regexp1-3.1 {
+ CREATE TABLE t1(id INTEGER PRIMARY KEY, a TEXT);
+ INSERT INTO t1(id, a) VALUES(1, '日本語');
+ SELECT a, hex(a), length(a) FROM t1;
+} {日本語 E697A5E69CACE8AA9E 3}
+do_execsql_test regexp1-3.2 {
+ SELECT * FROM t1 WHERE a='日本語';
+} {1 日本語}
+do_execsql_test regexp1-3.3 {
+ SELECT * FROM t1 WHERE a LIKE '日本語';
+} {1 日本語}
+do_execsql_test regexp1-3.4 {
+ SELECT * FROM t1 wHERE a REGEXP '日本語';
+} {1 日本語}
+
finish_test