aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorErik Terpstra <erterpstra@gmail.com>2020-06-23 15:08:06 +0200
committerLouis Pilfold <louis@lpil.uk>2020-06-25 14:31:57 +0100
commit70e513cf98a4d6f681e660f97813329d2ea74203 (patch)
tree86b20f8c3486add04a1f3e1a8a921db9d19eac2f
parent6b68e5e25006bc734345ce77ede14b6e54214d23 (diff)
downloadgleam_stdlib-70e513cf98a4d6f681e660f97813329d2ea74203.tar.gz
gleam_stdlib-70e513cf98a4d6f681e660f97813329d2ea74203.zip
regex.split & regex.scan
-rw-r--r--CHANGELOG.md4
-rw-r--r--src/gleam/regex.gleam53
-rw-r--r--src/gleam_stdlib.erl26
-rw-r--r--test/gleam/regex_test.gleam32
4 files changed, 111 insertions, 4 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 518a173..85b8f57 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,8 +18,8 @@
- The `option` module gains the the `map`, `flatten`, `then` and `or`
functions.
- The `result` module gains the the `or` function.
-- Created the `regex` module with the `from_string`, `from_string_with`, and
- `match` functions.
+- Created the `regex` module with the `from_string`, `from_string_with`,
+ `match`, `split` and `scan` functions.
- The `list` module gains the the `pop`, `pop_map` and `key_pop` functions.
- `base` module created with `encode64`, `decode64`, `url_encode64` and
`url_decode64`.
diff --git a/src/gleam/regex.gleam b/src/gleam/regex.gleam
index 3740984..3f49eb1 100644
--- a/src/gleam/regex.gleam
+++ b/src/gleam/regex.gleam
@@ -3,8 +3,26 @@
//// all of the PCRE library is interfaced and some parts of the library go beyond
//// what PCRE offers. Currently PCRE version 8.40 (release date 2017-01-11) is used.
+import gleam/option.{Option}
+
pub external type Regex
+/// The details about a particular match:
+///
+/// - match — the full string of the match.
+/// - index — the index of the match in the original string.
+/// - number — each match is numbered, starting from 1
+/// - submatches — a Regex can have subpatterns, sup-parts that are in parentheses.
+///
+pub type Match {
+ Match(
+ match: String,
+ index: Int,
+ number: Int,
+ submatches: List(Option(String)),
+ )
+}
+
/// When a regular expression fails to compile:
///
/// - error — a descriptive error message
@@ -73,3 +91,38 @@ pub external fn from_string_with(
///
pub external fn match(Regex, String) -> Bool =
"gleam_stdlib" "regex_match"
+
+/// Split a string
+///
+/// ## Examples
+///
+/// > let Ok(re) = from_string(" *, *")
+/// > match(re, "foo,32, 4, 9 ,0")
+/// ["foo", "32", "4", "9", "0"]
+///
+pub external fn split(Regex, String) -> List(String) =
+ "gleam_stdlib" "regex_split"
+
+/// Collects all matches of the regular expression.
+///
+/// ## Examples
+///
+/// > let Ok(re) = regex.from_string("[oi]n a (\\w+)")
+/// > regex.scan(re, "I am on a boat in a lake.")
+/// [
+/// Match(
+/// match: "on a boat",
+/// index: 5,
+/// number: 1,
+/// submatches: [Some("boat")]
+/// ),
+/// Match(
+/// match: "in a lake",
+/// index: 15,
+/// number: 2,
+/// submatches: [Some("lake")]
+/// )
+/// ]
+///
+pub external fn scan(Regex, String) -> List(Match) =
+ "gleam_stdlib" "regex_scan"
diff --git a/src/gleam_stdlib.erl b/src/gleam_stdlib.erl
index 47fbdb5..e89458b 100644
--- a/src/gleam_stdlib.erl
+++ b/src/gleam_stdlib.erl
@@ -11,7 +11,7 @@
string_pad/4, decode_tuple2/1, decode_map/1, bit_string_int_to_u32/1,
bit_string_int_from_u32/1, bit_string_append/2, bit_string_part_/3,
decode_bit_string/1, regex_from_string/1, regex_from_string_with/2,
- regex_match/2, base_decoded4/1]).
+ regex_match/2, regex_split/2, regex_scan/2, base_decoded4/1]).
should_equal(Actual, Expected) -> ?assertEqual(Expected, Actual).
should_not_equal(Actual, Expected) -> ?assertNotEqual(Expected, Actual).
@@ -190,6 +190,30 @@ regex_match(Regex, String) ->
_ -> false
end.
+regex_split(Regex, String) ->
+ re:split(String, Regex).
+
+regex_submatches(String, {S, L}) ->
+ SubMatch = string:slice(String, S, L),
+ case string:is_empty(SubMatch) of
+ true -> none;
+ false -> {some, SubMatch}
+ end.
+
+regex_matches(String, [{S, L} | Submatches], Number) ->
+ {match, string:slice(String, S, L), S, Number,
+ lists:map(fun(X) -> regex_submatches(String, X) end, Submatches)}.
+
+regex_captured(_, [], _) -> [];
+regex_captured(String, [ H | T ], Number) ->
+ [ regex_matches(String, H, Number) | regex_captured(String, T, Number + 1) ].
+
+regex_scan(Regex, String) ->
+ case re:run(String, Regex, [global]) of
+ {match, Captured} -> regex_captured(String, Captured, 1);
+ _ -> []
+ end.
+
base_decoded4(S) ->
try {ok, base64:decode(S)} catch
error:badarith -> {error, nil}
diff --git a/test/gleam/regex_test.gleam b/test/gleam/regex_test.gleam
index d7ffc2d..da98cd8 100644
--- a/test/gleam/regex_test.gleam
+++ b/test/gleam/regex_test.gleam
@@ -1,4 +1,5 @@
-import gleam/regex.{FromStringError, Options}
+import gleam/option.{Some, None}
+import gleam/regex.{FromStringError, Match, Options}
import gleam/should
pub fn from_string_test() {
@@ -44,3 +45,32 @@ pub fn match_test() {
regex.match(re, "boo")
|> should.equal(False)
}
+
+pub fn split_test() {
+ assert Ok(re) = regex.from_string(" *, *")
+
+ regex.split(re, "foo,32, 4, 9 ,0")
+ |> should.equal(["foo", "32", "4", "9", "0"])
+}
+
+pub fn scan_test() {
+ assert Ok(re) = regex.from_string("[oi]n a(.?) (\\w+)")
+
+ regex.scan(re, "I am on a boat in a lake.")
+ |> should.equal(
+ [
+ Match(
+ match: "on a boat",
+ index: 5,
+ number: 1,
+ submatches: [None, Some("boat")],
+ ),
+ Match(
+ match: "in a lake",
+ index: 15,
+ number: 2,
+ submatches: [None, Some("lake")],
+ ),
+ ],
+ )
+}