diff options
author | Erik Terpstra <erterpstra@gmail.com> | 2020-06-23 15:08:06 +0200 |
---|---|---|
committer | Louis Pilfold <louis@lpil.uk> | 2020-06-25 14:31:57 +0100 |
commit | 70e513cf98a4d6f681e660f97813329d2ea74203 (patch) | |
tree | 86b20f8c3486add04a1f3e1a8a921db9d19eac2f | |
parent | 6b68e5e25006bc734345ce77ede14b6e54214d23 (diff) | |
download | gleam_stdlib-70e513cf98a4d6f681e660f97813329d2ea74203.tar.gz gleam_stdlib-70e513cf98a4d6f681e660f97813329d2ea74203.zip |
regex.split & regex.scan
-rw-r--r-- | CHANGELOG.md | 4 | ||||
-rw-r--r-- | src/gleam/regex.gleam | 53 | ||||
-rw-r--r-- | src/gleam_stdlib.erl | 26 | ||||
-rw-r--r-- | test/gleam/regex_test.gleam | 32 |
4 files changed, 111 insertions, 4 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 518a173..85b8f57 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,8 +18,8 @@ - The `option` module gains the the `map`, `flatten`, `then` and `or` functions. - The `result` module gains the the `or` function. -- Created the `regex` module with the `from_string`, `from_string_with`, and - `match` functions. +- Created the `regex` module with the `from_string`, `from_string_with`, + `match`, `split` and `scan` functions. - The `list` module gains the the `pop`, `pop_map` and `key_pop` functions. - `base` module created with `encode64`, `decode64`, `url_encode64` and `url_decode64`. diff --git a/src/gleam/regex.gleam b/src/gleam/regex.gleam index 3740984..3f49eb1 100644 --- a/src/gleam/regex.gleam +++ b/src/gleam/regex.gleam @@ -3,8 +3,26 @@ //// all of the PCRE library is interfaced and some parts of the library go beyond //// what PCRE offers. Currently PCRE version 8.40 (release date 2017-01-11) is used. +import gleam/option.{Option} + pub external type Regex +/// The details about a particular match: +/// +/// - match — the full string of the match. +/// - index — the index of the match in the original string. +/// - number — each match is numbered, starting from 1 +/// - submatches — a Regex can have subpatterns, sup-parts that are in parentheses. +/// +pub type Match { + Match( + match: String, + index: Int, + number: Int, + submatches: List(Option(String)), + ) +} + /// When a regular expression fails to compile: /// /// - error — a descriptive error message @@ -73,3 +91,38 @@ pub external fn from_string_with( /// pub external fn match(Regex, String) -> Bool = "gleam_stdlib" "regex_match" + +/// Split a string +/// +/// ## Examples +/// +/// > let Ok(re) = from_string(" *, *") +/// > match(re, "foo,32, 4, 9 ,0") +/// ["foo", "32", "4", "9", "0"] +/// +pub external fn split(Regex, String) -> List(String) = + "gleam_stdlib" "regex_split" + +/// Collects all matches of the regular expression. +/// +/// ## Examples +/// +/// > let Ok(re) = regex.from_string("[oi]n a (\\w+)") +/// > regex.scan(re, "I am on a boat in a lake.") +/// [ +/// Match( +/// match: "on a boat", +/// index: 5, +/// number: 1, +/// submatches: [Some("boat")] +/// ), +/// Match( +/// match: "in a lake", +/// index: 15, +/// number: 2, +/// submatches: [Some("lake")] +/// ) +/// ] +/// +pub external fn scan(Regex, String) -> List(Match) = + "gleam_stdlib" "regex_scan" diff --git a/src/gleam_stdlib.erl b/src/gleam_stdlib.erl index 47fbdb5..e89458b 100644 --- a/src/gleam_stdlib.erl +++ b/src/gleam_stdlib.erl @@ -11,7 +11,7 @@ string_pad/4, decode_tuple2/1, decode_map/1, bit_string_int_to_u32/1, bit_string_int_from_u32/1, bit_string_append/2, bit_string_part_/3, decode_bit_string/1, regex_from_string/1, regex_from_string_with/2, - regex_match/2, base_decoded4/1]). + regex_match/2, regex_split/2, regex_scan/2, base_decoded4/1]). should_equal(Actual, Expected) -> ?assertEqual(Expected, Actual). should_not_equal(Actual, Expected) -> ?assertNotEqual(Expected, Actual). @@ -190,6 +190,30 @@ regex_match(Regex, String) -> _ -> false end. +regex_split(Regex, String) -> + re:split(String, Regex). + +regex_submatches(String, {S, L}) -> + SubMatch = string:slice(String, S, L), + case string:is_empty(SubMatch) of + true -> none; + false -> {some, SubMatch} + end. + +regex_matches(String, [{S, L} | Submatches], Number) -> + {match, string:slice(String, S, L), S, Number, + lists:map(fun(X) -> regex_submatches(String, X) end, Submatches)}. + +regex_captured(_, [], _) -> []; +regex_captured(String, [ H | T ], Number) -> + [ regex_matches(String, H, Number) | regex_captured(String, T, Number + 1) ]. + +regex_scan(Regex, String) -> + case re:run(String, Regex, [global]) of + {match, Captured} -> regex_captured(String, Captured, 1); + _ -> [] + end. + base_decoded4(S) -> try {ok, base64:decode(S)} catch error:badarith -> {error, nil} diff --git a/test/gleam/regex_test.gleam b/test/gleam/regex_test.gleam index d7ffc2d..da98cd8 100644 --- a/test/gleam/regex_test.gleam +++ b/test/gleam/regex_test.gleam @@ -1,4 +1,5 @@ -import gleam/regex.{FromStringError, Options} +import gleam/option.{Some, None} +import gleam/regex.{FromStringError, Match, Options} import gleam/should pub fn from_string_test() { @@ -44,3 +45,32 @@ pub fn match_test() { regex.match(re, "boo") |> should.equal(False) } + +pub fn split_test() { + assert Ok(re) = regex.from_string(" *, *") + + regex.split(re, "foo,32, 4, 9 ,0") + |> should.equal(["foo", "32", "4", "9", "0"]) +} + +pub fn scan_test() { + assert Ok(re) = regex.from_string("[oi]n a(.?) (\\w+)") + + regex.scan(re, "I am on a boat in a lake.") + |> should.equal( + [ + Match( + match: "on a boat", + index: 5, + number: 1, + submatches: [None, Some("boat")], + ), + Match( + match: "in a lake", + index: 15, + number: 2, + submatches: [None, Some("lake")], + ), + ], + ) +} |