aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorinoas <mail@inoas.com>2022-10-25 23:01:23 +0200
committerLouis Pilfold <louis@lpil.uk>2022-10-27 15:13:03 +0100
commited9405a0eb12061b3ce680f80e6d89b5ff518f21 (patch)
tree107d7ea2116fef3358c8ecd88ee84e8d8a2e315e
parentc758631b79d594b884a8031182464e9251bd9ee4 (diff)
downloadgleam_stdlib-ed9405a0eb12061b3ce680f80e6d89b5ff518f21.tar.gz
gleam_stdlib-ed9405a0eb12061b3ce680f80e6d89b5ff518f21.zip
fix regex.scan to work correclty with utf8 strings
-rw-r--r--src/gleam_stdlib.erl13
-rw-r--r--test/gleam/regex_test.gleam14
2 files changed, 21 insertions, 6 deletions
diff --git a/src/gleam_stdlib.erl b/src/gleam_stdlib.erl
index dc3727c..04b73eb 100644
--- a/src/gleam_stdlib.erl
+++ b/src/gleam_stdlib.erl
@@ -200,16 +200,17 @@ regex_check(Regex, String) ->
regex_split(Regex, String) ->
re:split(String, Regex).
-regex_submatches(String, {S, L}) ->
- SubMatch = string:slice(String, S, L),
- case string:is_empty(SubMatch) of
+regex_submatches(String, {Start, Length}) ->
+ Binary = unicode:characters_to_binary(String, unicode, unicode),
+ BinarySlice = binary:part(Binary, {Start, Length}),
+ case string:is_empty(binary_to_list(BinarySlice)) of
true -> none;
- false -> {some, SubMatch}
+ false -> {some, BinarySlice}
end.
-regex_matches(String, [{S, L} | Submatches]) ->
+regex_matches(String, [{Start, Length} | Submatches]) ->
Submatches1 = lists:map(fun(X) -> regex_submatches(String, X) end, Submatches),
- {match, binary:part(String, S, L), Submatches1}.
+ {match, binary:part(String, Start, Length), Submatches1}.
regex_scan(Regex, String) ->
case re:run(String, Regex, [global]) of
diff --git a/test/gleam/regex_test.gleam b/test/gleam/regex_test.gleam
index cdd6e68..30d197f 100644
--- a/test/gleam/regex_test.gleam
+++ b/test/gleam/regex_test.gleam
@@ -64,4 +64,18 @@ pub fn scan_test() {
Match(content: "on a boat", submatches: [None, Some("boat")]),
Match(content: "in a lake", submatches: [None, Some("lake")]),
])
+
+ assert Ok(re) = regex.from_string("answer (\\d+)")
+ regex.scan(re, "Is the answer 42?")
+ |> should.equal([Match(content: "answer 42", submatches: [Some("42")])])
+
+ assert Ok(re) = regex.from_string("(\\d+)")
+ regex.scan(re, "hello 42")
+ |> should.equal([Match(content: "42", submatches: [Some("42")])])
+
+ regex.scan(re, "你好 42")
+ |> should.equal([Match(content: "42", submatches: [Some("42")])])
+
+ regex.scan(re, "你好 42 世界")
+ |> should.equal([Match(content: "42", submatches: [Some("42")])])
}