diff options
author | Louis Pilfold <louis@lpil.uk> | 2020-06-29 20:47:49 +0100 |
---|---|---|
committer | Louis Pilfold <louis@lpil.uk> | 2020-06-30 12:14:27 +0100 |
commit | c8544e558b8b8e2b407ec9650d588401c944d20f (patch) | |
tree | b390bdc5e5807bff7d3f07f22e8ed157584b32f1 | |
parent | 1ae8354d444013117abdde5987f734b3587592c4 (diff) | |
download | gleam_stdlib-c8544e558b8b8e2b407ec9650d588401c944d20f.tar.gz gleam_stdlib-c8544e558b8b8e2b407ec9650d588401c944d20f.zip |
Alter regex API
-rw-r--r-- | CHANGELOG.md | 4 | ||||
-rw-r--r-- | src/gleam/regex.gleam | 99 | ||||
-rw-r--r-- | src/gleam_stdlib.erl | 47 | ||||
-rw-r--r-- | test/gleam/regex_test.gleam | 47 |
4 files changed, 100 insertions, 97 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index fb85cef..56c765c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,8 +22,8 @@ - The `option` module gains the the `map`, `flatten`, `then` and `or` functions. - The `result` module gains the the `or` function. -- Created the `regex` module with the `from_string`, `from_string_with`, - `match`, `split` and `scan` functions. +- Created the `regex` module with the `from_string`, `compile`, `check`, + `split` and `scan` functions. - The `list` module gains the the `pop`, `pop_map` and `key_pop` functions. - `base` module created with `encode64`, `decode64`, `url_encode64` and `url_decode64`. diff --git a/src/gleam/regex.gleam b/src/gleam/regex.gleam index 5c668eb..4317cd5 100644 --- a/src/gleam/regex.gleam +++ b/src/gleam/regex.gleam @@ -9,23 +9,49 @@ pub external type Regex /// The details about a particular match: /// -/// - match — the full string of the match. -/// - index — the byte index of the match in the original string. -/// - submatches — a Regex can have subpatterns, sup-parts that are in parentheses. -/// pub type Match { - Match(match: String, index: Int, submatches: List(Option(String))) + Match( + /// The full string of the match. + content: String, + /// The byte index of the match in the original string. + byte_index: Int, + /// A Regex can have subpatterns, sup-parts that are in parentheses. + submatches: List(Option(String)), + ) } /// When a regular expression fails to compile: /// -/// - error — a descriptive error message -/// - index — the byte index of the cause in the regex string -/// -pub type FromStringError { - FromStringError(error: String, index: Int) +pub type CompileError { + CompileError( + /// The problem encountered that caused the compilation to fail + error: String, + /// The byte index into the string to where the problem was found + byte_index: Int, + ) } +pub type Options { + Options(case_insensitive: Bool, multi_line: Bool) +} + +/// Create a Regex with some additional options. +/// +/// ## Examples +/// +/// > let options = Options(case_insensitive: False, multi_line: True) +/// > assert Ok(re) = compile("^[0-9]", with: options) +/// > match(re, "abc\n123") +/// True +/// +/// > let options = Options(case_insensitive: True, multi_line: False) +/// > assert Ok(re) = compile("[A-Z]", with: options) +/// > match(re, "abc123") +/// True +/// +pub external fn compile(String, with: Options) -> Result(Regex, CompileError) = + "gleam_stdlib" "compile_regex" + /// Create a new Regex. /// /// ## Examples @@ -39,51 +65,28 @@ pub type FromStringError { /// /// > from_string("[0-9") /// Error( -/// FromStringError( +/// CompileError( /// error: "missing terminating ] for character class", -/// index: 4 +/// byte_index: 4 /// ) /// ) /// -pub external fn from_string(String) -> Result(Regex, FromStringError) = - "gleam_stdlib" "regex_from_string" - -pub type Options { - Options(case_insensitive: Bool, multi_line: Bool) +pub fn from_string(pattern: String) -> Result(Regex, CompileError) { + compile(pattern, Options(case_insensitive: False, multi_line: False)) } -/// Create a Regex with some additional options. -/// -/// ## Examples -/// -/// > let options = Options(case_insensitive: False, multi_line: True) -/// > assert Ok(re) = from_string_with(options, "^[0-9]") -/// > match(re, "abc\n123") -/// True -/// -/// > let options = Options(case_insensitive: True, multi_line: False) -/// > assert Ok(re) = from_string_with(options, "[A-Z]") -/// > match(re, "abc123") -/// True -/// -pub external fn from_string_with( - Options, - String, -) -> Result(Regex, FromStringError) = - "gleam_stdlib" "regex_from_string_with" - /// Returns a boolean indicating whether there was a match or not. /// /// ## Examples /// /// > assert Ok(re) = from_string("^f.o.?") -/// > match(re, "foo") +/// > check(with: re, content: "foo") /// True /// -/// > match(re, "boo") +/// > check(with: re, content: "boo") /// False /// -pub external fn match(Regex, String) -> Bool = +pub external fn check(with: Regex, content: String) -> Bool = "gleam_stdlib" "regex_match" /// Split a string @@ -91,10 +94,10 @@ pub external fn match(Regex, String) -> Bool = /// ## Examples /// /// > assert Ok(re) = from_string(" *, *") -/// > split(re, "foo,32, 4, 9 ,0") +/// > split(with: re, content: "foo,32, 4, 9 ,0") /// ["foo", "32", "4", "9", "0"] /// -pub external fn split(Regex, String) -> List(String) = +pub external fn split(with: Regex, content: String) -> List(String) = "gleam_stdlib" "regex_split" /// Collects all matches of the regular expression. @@ -102,19 +105,19 @@ pub external fn split(Regex, String) -> List(String) = /// ## Examples /// /// > assert Ok(re) = regex.from_string("[oi]n a (\\w+)") -/// > regex.scan(re, "I am on a boat in a lake.") +/// > regex.scan(with: re, content: "I am on a boat in a lake.") /// [ /// Match( -/// match: "on a boat", -/// index: 5, +/// content: "on a boat", +/// byte_index: 5, /// submatches: [Some("boat")] /// ), /// Match( -/// match: "in a lake", -/// index: 15, +/// content: "in a lake", +/// byte_index: 15, /// submatches: [Some("lake")] /// ) /// ] /// -pub external fn scan(Regex, String) -> List(Match) = +pub external fn scan(with: Regex, content: String) -> List(Match) = "gleam_stdlib" "regex_scan" diff --git a/src/gleam_stdlib.erl b/src/gleam_stdlib.erl index 85855d1..db2f6fd 100644 --- a/src/gleam_stdlib.erl +++ b/src/gleam_stdlib.erl @@ -10,8 +10,8 @@ string_pop_grapheme/1, string_starts_with/2, string_ends_with/2, string_pad/4, decode_tuple2/1, decode_map/1, bit_string_int_to_u32/1, bit_string_int_from_u32/1, bit_string_append/2, bit_string_part_/3, - decode_bit_string/1, regex_from_string/1, regex_from_string_with/2, - regex_match/2, regex_split/2, regex_scan/2, base_decoded4/1]). + decode_bit_string/1, compile_regex/2, regex_match/2, regex_split/2, + regex_scan/2, base_decoded4/1]). should_equal(Actual, Expected) -> ?assertEqual(Expected, Actual). should_not_equal(Actual, Expected) -> ?assertNotEqual(Expected, Actual). @@ -155,40 +155,31 @@ bit_string_part_(Bin, Pos, Len) -> end. bit_string_int_to_u32(I) when 0 =< I, I < 4294967296 -> - {ok, <<I:32>>}; + {ok, <<I:32>>}; bit_string_int_to_u32(_) -> - {error, nil}. + {error, nil}. bit_string_int_from_u32(<<I:32>>) -> - {ok, I}; + {ok, I}; bit_string_int_from_u32(_) -> - {error, nil}. - -regex_from_string_with_opts(Options, String) -> - case re:compile(String, Options) of + {error, nil}. + +compile_regex(String, Options) -> + {options, Caseless, Multiline} = Options, + OptionsList = [ + unicode, + Caseless andalso caseless, + Multiline andalso multiline + ], + FilteredOptions = [Option || Option <- OptionsList, Option /= false], + case re:compile(String, FilteredOptions) of {ok, MP} -> {ok, MP}; {error, {Str, Pos}} -> - {error, {from_string_error, unicode:characters_to_binary(Str), Pos}} - end. - -regex_from_string(String) -> - regex_from_string_with_opts([unicode], String). - -regex_from_string_with(Options, String) -> - OptList = case Options of - {options, true, _} -> [unicode, caseless]; - _ -> [unicode] - end, - case Options of - {options, _, true} -> regex_from_string_with_opts([multiline | OptList], String); - _ -> regex_from_string_with_opts(OptList, String) + {error, {compile_error, unicode:characters_to_binary(Str), Pos}} end. regex_match(Regex, String) -> - case re:run(String, Regex) of - {match, _} -> true; - _ -> false - end. + re:run(String, Regex) /= nomatch. regex_split(Regex, String) -> re:split(String, Regex). @@ -207,7 +198,7 @@ regex_matches(String, [{S, L} | Submatches]) -> regex_scan(Regex, String) -> case re:run(String, Regex, [global]) of {match, Captured} -> lists:map(fun(X) -> regex_matches(String, X) end, Captured); - _ -> [] + nomatch -> [] end. base_decoded4(S) -> diff --git a/test/gleam/regex_test.gleam b/test/gleam/regex_test.gleam index 8f69fee..3eecccb 100644 --- a/test/gleam/regex_test.gleam +++ b/test/gleam/regex_test.gleam @@ -1,48 +1,49 @@ -import gleam/option.{Some, None} -import gleam/regex.{FromStringError, Match, Options} +import gleam/io +import gleam/option.{None, Some} +import gleam/regex.{CompileError, Match, Options} import gleam/should pub fn from_string_test() { assert Ok(re) = regex.from_string("[0-9]") - regex.match(re, "abc123") + regex.check(re, "abc123") |> should.equal(True) - regex.match(re, "abcxyz") + regex.check(re, "abcxyz") |> should.equal(False) assert Error(from_string_err) = regex.from_string("[0-9") from_string_err |> should.equal( - FromStringError( + CompileError( error: "missing terminating ] for character class", - index: 4, + byte_index: 4, ), ) } -pub fn from_string_with_test() { +pub fn compile_test() { let options = Options(case_insensitive: True, multi_line: False) - assert Ok(re) = regex.from_string_with(options, "[A-B]") + assert Ok(re) = regex.compile("[A-B]", options) - regex.match(re, "abc123") + regex.check(re, "abc123") |> should.equal(True) let options = Options(case_insensitive: False, multi_line: True) - assert Ok(re) = regex.from_string_with(options, "^[0-9]") + assert Ok(re) = regex.compile("^[0-9]", options) - regex.match(re, "abc\n123") + regex.check(re, "abc\n123") |> should.equal(True) } -pub fn match_test() { +pub fn check_test() { assert Ok(re) = regex.from_string("^f.o.?") - regex.match(re, "foo") + regex.check(re, "foo") |> should.equal(True) - regex.match(re, "boo") + regex.check(re, "boo") |> should.equal(False) } @@ -57,21 +58,29 @@ pub fn scan_test() { assert Ok(re) = regex.from_string("Gl\\w+") regex.scan(re, "!Gleam") - |> should.equal([Match(match: "Gleam", index: 1, submatches: [])]) + |> should.equal([Match(content: "Gleam", byte_index: 1, submatches: [])]) regex.scan(re, "हGleam") - |> should.equal([Match(match: "Gleam", index: 3, submatches: [])]) + |> should.equal([Match(content: "Gleam", byte_index: 3, submatches: [])]) regex.scan(re, "𐍈Gleam") - |> should.equal([Match(match: "Gleam", index: 4, submatches: [])]) + |> should.equal([Match(content: "Gleam", byte_index: 4, submatches: [])]) assert Ok(re) = regex.from_string("[oi]n a(.?) (\\w+)") regex.scan(re, "I am on a boat in a lake.") |> should.equal( [ - Match(match: "on a boat", index: 5, submatches: [None, Some("boat")]), - Match(match: "in a lake", index: 15, submatches: [None, Some("lake")]), + Match( + content: "on a boat", + byte_index: 5, + submatches: [None, Some("boat")], + ), + Match( + content: "in a lake", + byte_index: 15, + submatches: [None, Some("lake")], + ), ], ) } |