From 6104a3bcc26a05942b8062ae62a0c7b932ca8cf6 Mon Sep 17 00:00:00 2001 From: Louis Pilfold Date: Thu, 26 Aug 2021 23:13:44 +0100 Subject: Regex scan --- src/gleam/regex.gleam | 49 +++++++++++++++++++++++++++---------------------- src/gleam_stdlib.js | 22 +++++++++++++++++----- 2 files changed, 44 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/gleam/regex.gleam b/src/gleam/regex.gleam index 8eddabe..cd717d9 100644 --- a/src/gleam/regex.gleam +++ b/src/gleam/regex.gleam @@ -13,8 +13,6 @@ pub type Match { Match( /// The full string of the match. content: String, - /// The byte index of the match in the original string. - byte_index: Int, /// A Regex can have subpatterns, sup-parts that are in parentheses. submatches: List(Option(String)), ) @@ -141,26 +139,33 @@ if javascript { "../gleam_stdlib.js" "split" } +/// Collects all matches of the regular expression. +/// +/// ## Examples +/// +/// > assert Ok(re) = regex.from_string("[oi]n a (\\w+)") +/// > regex.scan(with: re, content: "I am on a boat in a lake.") +/// [ +/// Match( +/// content: "on a boat", +/// submatches: [Some("boat")] +/// ), +/// Match( +/// content: "in a lake", +/// submatches: [Some("lake")] +/// ) +/// ] +/// +pub fn scan(with regex: Regex, content string: String) -> List(Match) { + do_scan(regex, string) +} + if erlang { - /// Collects all matches of the regular expression. - /// - /// ## Examples - /// - /// > assert Ok(re) = regex.from_string("[oi]n a (\\w+)") - /// > regex.scan(with: re, content: "I am on a boat in a lake.") - /// [ - /// Match( - /// content: "on a boat", - /// byte_index: 5, - /// submatches: [Some("boat")] - /// ), - /// Match( - /// content: "in a lake", - /// byte_index: 15, - /// submatches: [Some("lake")] - /// ) - /// ] - /// - pub external fn scan(with: Regex, content: String) -> List(Match) = + external fn do_scan(Regex, String) -> List(Match) = "gleam_stdlib" "regex_scan" } + +if javascript { + external fn do_scan(Regex, String) -> List(Match) = + "../gleam_stdlib.js" "regex_scan" +} diff --git a/src/gleam_stdlib.js b/src/gleam_stdlib.js index d2454ba..652b7b5 100644 --- a/src/gleam_stdlib.js +++ b/src/gleam_stdlib.js @@ -7,7 +7,11 @@ import { toBitString, stringBits, } from "./gleam.js"; -import { CompileError as RegexCompileError } from "./gleam/regex.js"; +import { + CompileError as RegexCompileError, + Match as RegexMatch, +} from "./gleam/regex.js"; +import { Some, None } from "./gleam/option.js"; const Nil = undefined; @@ -236,13 +240,21 @@ export function regex_check(regex, string) { export function compile_regex(pattern, options) { try { - let flags = ""; + let flags = "gu"; if (options.case_insensitive) flags += "i"; if (options.multi_line) flags += "m"; return new Ok(new RegExp(pattern, flags)); } catch (error) { - return new Error( - new RegexCompileError(error.message, error.columnNumber || 0) - ); + let number = (error.columnNumber || 0) | 0; + return new Error(new RegexCompileError(error.message, number)); } } + +export function regex_scan(regex, string) { + let matches = Array.from(string.matchAll(regex)).map((match) => { + let content = match.shift(); + let submatches = match.map((x) => (x ? new Some(x) : new None())); + return new RegexMatch(content, List.fromArray(submatches)); + }); + return List.fromArray(matches); +} -- cgit v1.2.3