diff options
author | drh <> | 2022-07-03 14:25:47 +0000 |
---|---|---|
committer | drh <> | 2022-07-03 14:25:47 +0000 |
commit | f28727f61e270ecbd58e52318d3895990ff66fe5 (patch) | |
tree | cbad8ba0c9ce4d08c1183835fa4403aa524ce96f /test/regexp1.test | |
parent | a57ac0a82738d5da8177c276f0904fe0089fad81 (diff) | |
download | sqlite-f28727f61e270ecbd58e52318d3895990ff66fe5.tar.gz sqlite-f28727f61e270ecbd58e52318d3895990ff66fe5.zip |
Fix the initial-prefix optimization for the REGEXP extension such that it
works even if the prefix contains characters that require a 3-byte UTF8
encoding. This should fix the problem reported by
[forum:/forumpost/96692f8ba5|forum post 96692f8ba5].
FossilOrigin-Name: c94595a6e15490b432f099fefbe2429fa19287f7bdc86332cba0fd1e08f65bd6
Diffstat (limited to 'test/regexp1.test')
-rw-r--r-- | test/regexp1.test | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/test/regexp1.test b/test/regexp1.test index 1eb56c672..569dd66c2 100644 --- a/test/regexp1.test +++ b/test/regexp1.test @@ -239,4 +239,26 @@ do_execsql_test regexp1-2.22 { SELECT 'abc$¢€xyz' REGEXP '^abc[^\u0025-X][^ -\u007f][^\u20ab]xyz$' } {1} +# 2022-07-03 +# https://sqlite.org/forum/forumpost/96692f8ba5 +# The REGEXP extension mishandles the prefix search optimization when +# the prefix contains 3-byte UTF8 characters. +# +reset_db +load_static_extension db regexp +do_execsql_test regexp1-3.1 { + CREATE TABLE t1(id INTEGER PRIMARY KEY, a TEXT); + INSERT INTO t1(id, a) VALUES(1, '日本語'); + SELECT a, hex(a), length(a) FROM t1; +} {日本語 E697A5E69CACE8AA9E 3} +do_execsql_test regexp1-3.2 { + SELECT * FROM t1 WHERE a='日本語'; +} {1 日本語} +do_execsql_test regexp1-3.3 { + SELECT * FROM t1 WHERE a LIKE '日本語'; +} {1 日本語} +do_execsql_test regexp1-3.4 { + SELECT * FROM t1 wHERE a REGEXP '日本語'; +} {1 日本語} + finish_test |