diff options
author | inoas <mail@inoas.com> | 2022-10-25 23:01:23 +0200 |
---|---|---|
committer | Louis Pilfold <louis@lpil.uk> | 2022-10-27 15:13:03 +0100 |
commit | ed9405a0eb12061b3ce680f80e6d89b5ff518f21 (patch) | |
tree | 107d7ea2116fef3358c8ecd88ee84e8d8a2e315e | |
parent | c758631b79d594b884a8031182464e9251bd9ee4 (diff) | |
download | gleam_stdlib-ed9405a0eb12061b3ce680f80e6d89b5ff518f21.tar.gz gleam_stdlib-ed9405a0eb12061b3ce680f80e6d89b5ff518f21.zip |
fix regex.scan to work correclty with utf8 strings
-rw-r--r-- | src/gleam_stdlib.erl | 13 | ||||
-rw-r--r-- | test/gleam/regex_test.gleam | 14 |
2 files changed, 21 insertions, 6 deletions
diff --git a/src/gleam_stdlib.erl b/src/gleam_stdlib.erl index dc3727c..04b73eb 100644 --- a/src/gleam_stdlib.erl +++ b/src/gleam_stdlib.erl @@ -200,16 +200,17 @@ regex_check(Regex, String) -> regex_split(Regex, String) -> re:split(String, Regex). -regex_submatches(String, {S, L}) -> - SubMatch = string:slice(String, S, L), - case string:is_empty(SubMatch) of +regex_submatches(String, {Start, Length}) -> + Binary = unicode:characters_to_binary(String, unicode, unicode), + BinarySlice = binary:part(Binary, {Start, Length}), + case string:is_empty(binary_to_list(BinarySlice)) of true -> none; - false -> {some, SubMatch} + false -> {some, BinarySlice} end. -regex_matches(String, [{S, L} | Submatches]) -> +regex_matches(String, [{Start, Length} | Submatches]) -> Submatches1 = lists:map(fun(X) -> regex_submatches(String, X) end, Submatches), - {match, binary:part(String, S, L), Submatches1}. + {match, binary:part(String, Start, Length), Submatches1}. regex_scan(Regex, String) -> case re:run(String, Regex, [global]) of diff --git a/test/gleam/regex_test.gleam b/test/gleam/regex_test.gleam index cdd6e68..30d197f 100644 --- a/test/gleam/regex_test.gleam +++ b/test/gleam/regex_test.gleam @@ -64,4 +64,18 @@ pub fn scan_test() { Match(content: "on a boat", submatches: [None, Some("boat")]), Match(content: "in a lake", submatches: [None, Some("lake")]), ]) + + assert Ok(re) = regex.from_string("answer (\\d+)") + regex.scan(re, "Is the answer 42?") + |> should.equal([Match(content: "answer 42", submatches: [Some("42")])]) + + assert Ok(re) = regex.from_string("(\\d+)") + regex.scan(re, "hello 42") + |> should.equal([Match(content: "42", submatches: [Some("42")])]) + + regex.scan(re, "你好 42") + |> should.equal([Match(content: "42", submatches: [Some("42")])]) + + regex.scan(re, "你好 42 世界") + |> should.equal([Match(content: "42", submatches: [Some("42")])]) } |