aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLouis Pilfold <louis@lpil.uk>2021-08-26 23:13:44 +0100
committerLouis Pilfold <louis@lpil.uk>2021-08-27 00:37:15 +0100
commit6104a3bcc26a05942b8062ae62a0c7b932ca8cf6 (patch)
treed32e8a76a710a33b0d204333343bec5f831f9db2
parent70f065415469c65dd7f66dd4ef3f8e651f16b66a (diff)
downloadgleam_stdlib-6104a3bcc26a05942b8062ae62a0c7b932ca8cf6.tar.gz
gleam_stdlib-6104a3bcc26a05942b8062ae62a0c7b932ca8cf6.zip
Regex scan
-rw-r--r--CHANGELOG.md1
-rw-r--r--src/gleam/regex.gleam49
-rw-r--r--src/gleam_stdlib.js22
-rw-r--r--test/gleam/regex_test.gleam48
4 files changed, 64 insertions, 56 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d8ec189..f2e0b73 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@
string error description.
- The `string` module gains the `to_option` function.
- Fixed a bug where `io.print` could crash when printing special characters.
+- The `regex.Match` record no longer has the `byte_index` field any more.
## v0.16.0 - 2021-06-17
diff --git a/src/gleam/regex.gleam b/src/gleam/regex.gleam
index 8eddabe..cd717d9 100644
--- a/src/gleam/regex.gleam
+++ b/src/gleam/regex.gleam
@@ -13,8 +13,6 @@ pub type Match {
Match(
/// The full string of the match.
content: String,
- /// The byte index of the match in the original string.
- byte_index: Int,
/// A Regex can have subpatterns, sup-parts that are in parentheses.
submatches: List(Option(String)),
)
@@ -141,26 +139,33 @@ if javascript {
"../gleam_stdlib.js" "split"
}
+/// Collects all matches of the regular expression.
+///
+/// ## Examples
+///
+/// > assert Ok(re) = regex.from_string("[oi]n a (\\w+)")
+/// > regex.scan(with: re, content: "I am on a boat in a lake.")
+/// [
+/// Match(
+/// content: "on a boat",
+/// submatches: [Some("boat")]
+/// ),
+/// Match(
+/// content: "in a lake",
+/// submatches: [Some("lake")]
+/// )
+/// ]
+///
+pub fn scan(with regex: Regex, content string: String) -> List(Match) {
+ do_scan(regex, string)
+}
+
if erlang {
- /// Collects all matches of the regular expression.
- ///
- /// ## Examples
- ///
- /// > assert Ok(re) = regex.from_string("[oi]n a (\\w+)")
- /// > regex.scan(with: re, content: "I am on a boat in a lake.")
- /// [
- /// Match(
- /// content: "on a boat",
- /// byte_index: 5,
- /// submatches: [Some("boat")]
- /// ),
- /// Match(
- /// content: "in a lake",
- /// byte_index: 15,
- /// submatches: [Some("lake")]
- /// )
- /// ]
- ///
- pub external fn scan(with: Regex, content: String) -> List(Match) =
+ external fn do_scan(Regex, String) -> List(Match) =
"gleam_stdlib" "regex_scan"
}
+
+if javascript {
+ external fn do_scan(Regex, String) -> List(Match) =
+ "../gleam_stdlib.js" "regex_scan"
+}
diff --git a/src/gleam_stdlib.js b/src/gleam_stdlib.js
index d2454ba..652b7b5 100644
--- a/src/gleam_stdlib.js
+++ b/src/gleam_stdlib.js
@@ -7,7 +7,11 @@ import {
toBitString,
stringBits,
} from "./gleam.js";
-import { CompileError as RegexCompileError } from "./gleam/regex.js";
+import {
+ CompileError as RegexCompileError,
+ Match as RegexMatch,
+} from "./gleam/regex.js";
+import { Some, None } from "./gleam/option.js";
const Nil = undefined;
@@ -236,13 +240,21 @@ export function regex_check(regex, string) {
export function compile_regex(pattern, options) {
try {
- let flags = "";
+ let flags = "gu";
if (options.case_insensitive) flags += "i";
if (options.multi_line) flags += "m";
return new Ok(new RegExp(pattern, flags));
} catch (error) {
- return new Error(
- new RegexCompileError(error.message, error.columnNumber || 0)
- );
+ let number = (error.columnNumber || 0) | 0;
+ return new Error(new RegexCompileError(error.message, number));
}
}
+
+export function regex_scan(regex, string) {
+ let matches = Array.from(string.matchAll(regex)).map((match) => {
+ let content = match.shift();
+ let submatches = match.map((x) => (x ? new Some(x) : new None()));
+ return new RegexMatch(content, List.fromArray(submatches));
+ });
+ return List.fromArray(matches);
+}
diff --git a/test/gleam/regex_test.gleam b/test/gleam/regex_test.gleam
index 87535a0..5916b63 100644
--- a/test/gleam/regex_test.gleam
+++ b/test/gleam/regex_test.gleam
@@ -46,33 +46,23 @@ pub fn split_test() {
|> should.equal(["foo", "32", "4", "9", "0"])
}
-if erlang {
- pub fn scan_test() {
- assert Ok(re) = regex.from_string("Gl\\w+")
-
- regex.scan(re, "!Gleam")
- |> should.equal([Match(content: "Gleam", byte_index: 1, submatches: [])])
-
- regex.scan(re, "हGleam")
- |> should.equal([Match(content: "Gleam", byte_index: 3, submatches: [])])
-
- regex.scan(re, "𐍈Gleam")
- |> should.equal([Match(content: "Gleam", byte_index: 4, submatches: [])])
-
- assert Ok(re) = regex.from_string("[oi]n a(.?) (\\w+)")
-
- regex.scan(re, "I am on a boat in a lake.")
- |> should.equal([
- Match(
- content: "on a boat",
- byte_index: 5,
- submatches: [None, Some("boat")],
- ),
- Match(
- content: "in a lake",
- byte_index: 15,
- submatches: [None, Some("lake")],
- ),
- ])
- }
+pub fn scan_test() {
+ assert Ok(re) = regex.from_string("Gl\\w+")
+
+ regex.scan(re, "!Gleam")
+ |> should.equal([Match(content: "Gleam", submatches: [])])
+
+ regex.scan(re, "हGleam")
+ |> should.equal([Match(content: "Gleam", submatches: [])])
+
+ regex.scan(re, "𐍈Gleam")
+ |> should.equal([Match(content: "Gleam", submatches: [])])
+
+ assert Ok(re) = regex.from_string("[oi]n a(.?) (\\w+)")
+
+ regex.scan(re, "I am on a boat in a lake.")
+ |> should.equal([
+ Match(content: "on a boat", submatches: [None, Some("boat")]),
+ Match(content: "in a lake", submatches: [None, Some("lake")]),
+ ])
}