diff options
author | Louis Pilfold <louis@lpil.uk> | 2021-08-26 23:13:44 +0100 |
---|---|---|
committer | Louis Pilfold <louis@lpil.uk> | 2021-08-27 00:37:15 +0100 |
commit | 6104a3bcc26a05942b8062ae62a0c7b932ca8cf6 (patch) | |
tree | d32e8a76a710a33b0d204333343bec5f831f9db2 | |
parent | 70f065415469c65dd7f66dd4ef3f8e651f16b66a (diff) | |
download | gleam_stdlib-6104a3bcc26a05942b8062ae62a0c7b932ca8cf6.tar.gz gleam_stdlib-6104a3bcc26a05942b8062ae62a0c7b932ca8cf6.zip |
Regex scan
-rw-r--r-- | CHANGELOG.md | 1 | ||||
-rw-r--r-- | src/gleam/regex.gleam | 49 | ||||
-rw-r--r-- | src/gleam_stdlib.js | 22 | ||||
-rw-r--r-- | test/gleam/regex_test.gleam | 48 |
4 files changed, 64 insertions, 56 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index d8ec189..f2e0b73 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ string error description. - The `string` module gains the `to_option` function. - Fixed a bug where `io.print` could crash when printing special characters. +- The `regex.Match` record no longer has the `byte_index` field any more. ## v0.16.0 - 2021-06-17 diff --git a/src/gleam/regex.gleam b/src/gleam/regex.gleam index 8eddabe..cd717d9 100644 --- a/src/gleam/regex.gleam +++ b/src/gleam/regex.gleam @@ -13,8 +13,6 @@ pub type Match { Match( /// The full string of the match. content: String, - /// The byte index of the match in the original string. - byte_index: Int, /// A Regex can have subpatterns, sup-parts that are in parentheses. submatches: List(Option(String)), ) @@ -141,26 +139,33 @@ if javascript { "../gleam_stdlib.js" "split" } +/// Collects all matches of the regular expression. +/// +/// ## Examples +/// +/// > assert Ok(re) = regex.from_string("[oi]n a (\\w+)") +/// > regex.scan(with: re, content: "I am on a boat in a lake.") +/// [ +/// Match( +/// content: "on a boat", +/// submatches: [Some("boat")] +/// ), +/// Match( +/// content: "in a lake", +/// submatches: [Some("lake")] +/// ) +/// ] +/// +pub fn scan(with regex: Regex, content string: String) -> List(Match) { + do_scan(regex, string) +} + if erlang { - /// Collects all matches of the regular expression. - /// - /// ## Examples - /// - /// > assert Ok(re) = regex.from_string("[oi]n a (\\w+)") - /// > regex.scan(with: re, content: "I am on a boat in a lake.") - /// [ - /// Match( - /// content: "on a boat", - /// byte_index: 5, - /// submatches: [Some("boat")] - /// ), - /// Match( - /// content: "in a lake", - /// byte_index: 15, - /// submatches: [Some("lake")] - /// ) - /// ] - /// - pub external fn scan(with: Regex, content: String) -> List(Match) = + external fn do_scan(Regex, String) -> List(Match) = "gleam_stdlib" "regex_scan" } + +if javascript { + external fn do_scan(Regex, String) -> List(Match) = + "../gleam_stdlib.js" "regex_scan" +} diff --git a/src/gleam_stdlib.js b/src/gleam_stdlib.js index d2454ba..652b7b5 100644 --- a/src/gleam_stdlib.js +++ b/src/gleam_stdlib.js @@ -7,7 +7,11 @@ import { toBitString, stringBits, } from "./gleam.js"; -import { CompileError as RegexCompileError } from "./gleam/regex.js"; +import { + CompileError as RegexCompileError, + Match as RegexMatch, +} from "./gleam/regex.js"; +import { Some, None } from "./gleam/option.js"; const Nil = undefined; @@ -236,13 +240,21 @@ export function regex_check(regex, string) { export function compile_regex(pattern, options) { try { - let flags = ""; + let flags = "gu"; if (options.case_insensitive) flags += "i"; if (options.multi_line) flags += "m"; return new Ok(new RegExp(pattern, flags)); } catch (error) { - return new Error( - new RegexCompileError(error.message, error.columnNumber || 0) - ); + let number = (error.columnNumber || 0) | 0; + return new Error(new RegexCompileError(error.message, number)); } } + +export function regex_scan(regex, string) { + let matches = Array.from(string.matchAll(regex)).map((match) => { + let content = match.shift(); + let submatches = match.map((x) => (x ? new Some(x) : new None())); + return new RegexMatch(content, List.fromArray(submatches)); + }); + return List.fromArray(matches); +} diff --git a/test/gleam/regex_test.gleam b/test/gleam/regex_test.gleam index 87535a0..5916b63 100644 --- a/test/gleam/regex_test.gleam +++ b/test/gleam/regex_test.gleam @@ -46,33 +46,23 @@ pub fn split_test() { |> should.equal(["foo", "32", "4", "9", "0"]) } -if erlang { - pub fn scan_test() { - assert Ok(re) = regex.from_string("Gl\\w+") - - regex.scan(re, "!Gleam") - |> should.equal([Match(content: "Gleam", byte_index: 1, submatches: [])]) - - regex.scan(re, "हGleam") - |> should.equal([Match(content: "Gleam", byte_index: 3, submatches: [])]) - - regex.scan(re, "𐍈Gleam") - |> should.equal([Match(content: "Gleam", byte_index: 4, submatches: [])]) - - assert Ok(re) = regex.from_string("[oi]n a(.?) (\\w+)") - - regex.scan(re, "I am on a boat in a lake.") - |> should.equal([ - Match( - content: "on a boat", - byte_index: 5, - submatches: [None, Some("boat")], - ), - Match( - content: "in a lake", - byte_index: 15, - submatches: [None, Some("lake")], - ), - ]) - } +pub fn scan_test() { + assert Ok(re) = regex.from_string("Gl\\w+") + + regex.scan(re, "!Gleam") + |> should.equal([Match(content: "Gleam", submatches: [])]) + + regex.scan(re, "हGleam") + |> should.equal([Match(content: "Gleam", submatches: [])]) + + regex.scan(re, "𐍈Gleam") + |> should.equal([Match(content: "Gleam", submatches: [])]) + + assert Ok(re) = regex.from_string("[oi]n a(.?) (\\w+)") + + regex.scan(re, "I am on a boat in a lake.") + |> should.equal([ + Match(content: "on a boat", submatches: [None, Some("boat")]), + Match(content: "in a lake", submatches: [None, Some("lake")]), + ]) } |