From 93c3223e8e5d5f9ba2a4020b18bed5802c63bdff Mon Sep 17 00:00:00 2001 From: inoas Date: Thu, 27 Oct 2022 21:20:27 +0000 Subject: Regex use ucp flag because they use unicode flag (#357) --- CHANGELOG.md | 5 ++++- src/gleam_stdlib.erl | 1 + test/gleam/regex_test.gleam | 6 ++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 215e54b..842c85d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ ## v0.24.1 - unreleased +- For `regexp.compile` unicode character properties are now used when + resolving `\B`, `\b`, `\D`, `\d`, `\S`, `\s`, `\W`, and `\w` on target + Erlang. - `list.sort` is now tail recursive and will no longer exceed the stack size on large inputs on target JavaScript. - `list.sort` is now a "stable" sort, meaning equal elements are sorted in @@ -11,7 +14,7 @@ - Fixed a bug where `regex.scan` would not work correctly on utf8. - The performance of `list.flatten` has been greatly improved. - The `string_builder` module gains the `join` function. -- The `list` module gains the `shuffle` function. +- The `list` module gains the `shuffle` function. ## v0.24.0 - 2022-10-15 diff --git a/src/gleam_stdlib.erl b/src/gleam_stdlib.erl index 690cc34..961d0af 100644 --- a/src/gleam_stdlib.erl +++ b/src/gleam_stdlib.erl @@ -184,6 +184,7 @@ compile_regex(String, Options) -> {options, Caseless, Multiline} = Options, OptionsList = [ unicode, + ucp, Caseless andalso caseless, Multiline andalso multiline ], diff --git a/test/gleam/regex_test.gleam b/test/gleam/regex_test.gleam index 30d197f..f2fbd6c 100644 --- a/test/gleam/regex_test.gleam +++ b/test/gleam/regex_test.gleam @@ -26,6 +26,12 @@ pub fn compile_test() { regex.check(re, "abc\n123") |> should.be_true + + // For Erlang: This test will only passes if unicode and ucp flags are set + assert Ok(re) = regex.compile("\\s", options) + // Em space == U+2003 == " " == used below + regex.check(re, " ") + |> should.be_true } pub fn check_test() { -- cgit v1.2.3