aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorshayan javani <massivefermion@protonmail.com>2023-03-14 16:38:43 +0330
committerGitHub <noreply@github.com>2023-03-14 13:08:43 +0000
commita4805b21cf42241cc2951947231e76679bd99d07 (patch)
tree33524eed2216e1dcf90f19d88fdd511cadd3aef5
parenta37d5dddbb3a29b896e5d5351830223c9cd9e933 (diff)
downloadgleam_stdlib-a4805b21cf42241cc2951947231e76679bd99d07.tar.gz
gleam_stdlib-a4805b21cf42241cc2951947231e76679bd99d07.zip
make `regex.scan`'s behavior consistent across targets (#423)
-rw-r--r--CHANGELOG.md1
-rw-r--r--src/gleam/regex.gleam46
-rw-r--r--src/gleam_stdlib.erl1
-rw-r--r--src/gleam_stdlib.mjs13
-rw-r--r--test/gleam/regex_test.gleam44
5 files changed, 102 insertions, 3 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ec66604..3aed2ea 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@
## Unreleased
+- `regex.scan` now behaves consistently across both targets when a capture group does not capture anything.
- The `Map` type was rewritten as a persistent immutable data structure. This
results in drastically improved performance when constructing or updating
maps, especially with large maps.
diff --git a/src/gleam/regex.gleam b/src/gleam/regex.gleam
index 307d679..013d55d 100644
--- a/src/gleam/regex.gleam
+++ b/src/gleam/regex.gleam
@@ -160,7 +160,7 @@ if javascript {
/// ## Examples
///
/// ```gleam
-/// > assert Ok(re) = from_string("[oi]n a (\\w+)")
+/// > let assert Ok(re) = from_string("[oi]n a (\\w+)")
/// > scan(with: re, content: "I am on a boat in a lake.")
/// [
/// Match(
@@ -174,6 +174,50 @@ if javascript {
/// ]
/// ```
///
+/// ```gleam
+/// > let assert Ok(re) = regex.from_string("([+|\\-])?(\\d+)(\\w+)?")
+/// > scan(with: re, content: "-36")
+/// [
+/// Match(
+/// content: "-36",
+/// submatches: [Some("-"), Some("36")]
+/// )
+/// ]
+///
+/// > scan(with: re, content: "36")
+/// [
+/// Match(
+/// content: "-36",
+/// submatches: [None, Some("36")]
+/// )
+/// ]
+/// ```
+///
+/// ```gleam
+/// > let assert Ok(re) = regex.from_string("var\\s*(\\w+)\\s*(int|string)?\\s*=\\s*(.*)")
+/// > scan(with: re, content: "var age = 32")
+/// [
+/// Match(
+/// content: "var age = 32",
+/// submatches: [Some("age"), None, Some("32")]
+/// )
+/// ]
+/// ```
+///
+/// ```gleam
+/// > let assert Ok(re) = regex.from_string("let (\\w+) = (\\w+)")
+/// > scan(with: re, content: "let age = 32")
+/// [
+/// Match(
+/// content: "let age = 32",
+/// submatches: [Some("age"), Some("32")]
+/// )
+/// ]
+///
+/// > scan(with: re, content: "const age = 32")
+/// []
+/// ```
+///
pub fn scan(with regex: Regex, content string: String) -> List(Match) {
do_scan(regex, string)
}
diff --git a/src/gleam_stdlib.erl b/src/gleam_stdlib.erl
index 80c3f2c..e1fdbb1 100644
--- a/src/gleam_stdlib.erl
+++ b/src/gleam_stdlib.erl
@@ -205,6 +205,7 @@ regex_check(Regex, String) ->
regex_split(Regex, String) ->
re:split(String, Regex).
+regex_submatches(_, {-1, 0}) -> none;
regex_submatches(String, {Start, Length}) ->
BinarySlice = binary:part(String, {Start, Length}),
case string:is_empty(binary_to_list(BinarySlice)) of
diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs
index 37c3ea2..632c668 100644
--- a/src/gleam_stdlib.mjs
+++ b/src/gleam_stdlib.mjs
@@ -388,8 +388,17 @@ export function compile_regex(pattern, options) {
export function regex_scan(regex, string) {
let matches = Array.from(string.matchAll(regex)).map((match) => {
- let content = match.shift();
- let submatches = match.map((x) => (x ? new Some(x) : new None()));
+ const content = match[0];
+ const submatches = [];
+ for (let n = match.length - 1; n > 0; n--) {
+ if (match[n]) {
+ submatches[n-1] = new Some(match[n])
+ continue
+ }
+ if(submatches.length > 0) {
+ submatches[n-1] = new None()
+ }
+ }
return new RegexMatch(content, List.fromArray(submatches));
});
return List.fromArray(matches);
diff --git a/test/gleam/regex_test.gleam b/test/gleam/regex_test.gleam
index 87f2aab..eb84013 100644
--- a/test/gleam/regex_test.gleam
+++ b/test/gleam/regex_test.gleam
@@ -84,4 +84,48 @@ pub fn scan_test() {
regex.scan(re, "你好 42 世界")
|> should.equal([Match(content: "42", submatches: [Some("42")])])
+
+ let assert Ok(re) = regex.from_string("([+|\\-])?(\\d+)(\\w+)?")
+ regex.scan(re, "+36kg")
+ |> should.equal([
+ Match(content: "+36kg", submatches: [Some("+"), Some("36"), Some("kg")]),
+ ])
+
+ regex.scan(re, "36kg")
+ |> should.equal([
+ Match(content: "36kg", submatches: [None, Some("36"), Some("kg")]),
+ ])
+
+ regex.scan(re, "36")
+ |> should.equal([Match(content: "36", submatches: [None, Some("36")])])
+
+ regex.scan(re, "-36")
+ |> should.equal([Match(content: "-36", submatches: [Some("-"), Some("36")])])
+
+ regex.scan(re, "-kg")
+ |> should.equal([])
+
+ let assert Ok(re) =
+ regex.from_string("var\\s*(\\w+)\\s*(int|string)?\\s*=\\s*(.*)")
+ regex.scan(re, "var age int = 32")
+ |> should.equal([
+ Match(
+ content: "var age int = 32",
+ submatches: [Some("age"), Some("int"), Some("32")],
+ ),
+ ])
+
+ regex.scan(re, "var age = 32")
+ |> should.equal([
+ Match(content: "var age = 32", submatches: [Some("age"), None, Some("32")]),
+ ])
+
+ let assert Ok(re) = regex.from_string("let (\\w+) = (\\w+)")
+ regex.scan(re, "let age = 32")
+ |> should.equal([
+ Match(content: "let age = 32", submatches: [Some("age"), Some("32")]),
+ ])
+
+ regex.scan(re, "const age = 32")
+ |> should.equal([])
}