diff options
author | Louis Pilfold <louis@lpil.uk> | 2021-07-21 18:34:57 +0100 |
---|---|---|
committer | Louis Pilfold <louis@lpil.uk> | 2021-07-21 18:34:57 +0100 |
commit | 733032f0d8570913f41ccf9498fe82301f2eadc6 (patch) | |
tree | 8ada00ea77a11e3ef8f4cdb0d59ca71b0aa1068f | |
parent | 363d0a0c95fc1f3c91fe5c1101cac9d848c66fde (diff) | |
download | gleam_stdlib-733032f0d8570913f41ccf9498fe82301f2eadc6.tar.gz gleam_stdlib-733032f0d8570913f41ccf9498fe82301f2eadc6.zip |
Convert some string functions
-rw-r--r-- | src/gleam/string.gleam | 267 | ||||
-rw-r--r-- | src/gleam/string_builder.gleam | 126 | ||||
-rw-r--r-- | src/gleam_stdlib.js | 46 | ||||
-rw-r--r-- | test/gleam/string_test.gleam | 60 |
4 files changed, 313 insertions, 186 deletions
diff --git a/src/gleam/string.gleam b/src/gleam/string.gleam index a4574dc..4701afd 100644 --- a/src/gleam/string.gleam +++ b/src/gleam/string.gleam @@ -1,119 +1,155 @@ //// Strings in Gleam are UTF-8 binaries. They can be written in your code a //// text surrounded by `"double quotes"`. +import gleam/string_builder +import gleam/iterator +import gleam/list +import gleam/order +import gleam/result + if erlang { - import gleam/string_builder import gleam/dynamic.{Dynamic} - import gleam/iterator - import gleam/list - import gleam/order - import gleam/result - - pub type String = - String +} - /// A UtfCodepoint is the integer representation of a valid UTF codepoint - pub type UtfCodepoint = - UtfCodepoint +pub type String = + String + +/// A UtfCodepoint is the integer representation of a valid UTF codepoint +pub type UtfCodepoint = + UtfCodepoint + +/// Determines if a string is empty. +/// +/// ## Examples +/// +/// > is_empty("") +/// True +/// +/// > is_empty("the world") +/// False +/// +pub fn is_empty(str: String) -> Bool { + str == "" +} - /// Determines if a string is empty. - /// - /// ## Examples - /// - /// > is_empty("") - /// True - /// - /// > is_empty("the world") - /// False - /// - pub fn is_empty(str: String) -> Bool { - str == "" - } +/// Gets the number of grapheme clusters in a given string. +/// +/// This function has to iterate across the whole string to count the number of +/// graphemes, so it runs in linear time. +/// +/// ## Examples +/// +/// > length("Gleam") +/// 5 +/// +/// > length("ß↑e̊") +/// 3 +/// +/// > length("") +/// 0 +/// +pub fn length(string: String) -> Int { + do_length(string) +} - /// Gets the number of grapheme clusters in a given string. - /// - /// This function has to iterate across the whole string to count the number of - /// graphemes, so it runs in linear time. - /// - /// ## Examples - /// - /// > length("Gleam") - /// 5 - /// - /// > length("ß↑e̊") - /// 3 - /// - /// > length("") - /// 0 - /// - pub external fn length(String) -> Int = +if erlang { + external fn do_length(String) -> Int = "string" "length" +} - /// - /// Reverses a string. - /// - /// This function has to iterate across the whole string so it runs in linear - /// time. - /// - /// ## Examples - /// - /// > reverse("stressed") - /// "desserts" - /// - pub fn reverse(string: String) -> String { - string - |> string_builder.from_string - |> string_builder.reverse - |> string_builder.to_string - } +if javascript { + external fn do_length(String) -> Int = + "../gleam_stdlib.js" "string_length" +} - /// Creates a new string by replacing all occurrences of a given substring. - /// - /// ## Examples - /// - /// > replace("www.example.com", each: ".", with: "-") - /// "www-example-com" - /// - /// > replace("a,b,c,d,e", each: ",", with: "/") - /// "a/b/c/d/e" - /// - pub fn replace( - in string: String, - each pattern: String, - with substitute: String, - ) -> String { - string - |> string_builder.from_string - |> string_builder.replace(each: pattern, with: substitute) - |> string_builder.to_string - } +/// +/// Reverses a string. +/// +/// This function has to iterate across the whole string so it runs in linear +/// time. +/// +/// ## Examples +/// +/// > reverse("stressed") +/// "desserts" +/// +pub fn reverse(string: String) -> String { + string + |> string_builder.from_string + |> string_builder.reverse + |> string_builder.to_string +} - /// Creates a new string with all the graphemes in the input string converted to - /// lowercase. - /// - /// Useful for case-insensitive comparisons. - /// - /// ## Examples - /// - /// > lowercase("X-FILES") - /// "x-files" - /// - pub external fn lowercase(String) -> String = +/// Creates a new string by replacing all occurrences of a given substring. +/// +/// ## Examples +/// +/// > replace("www.example.com", each: ".", with: "-") +/// "www-example-com" +/// +/// > replace("a,b,c,d,e", each: ",", with: "/") +/// "a/b/c/d/e" +/// +pub fn replace( + in string: String, + each pattern: String, + with substitute: String, +) -> String { + string + |> string_builder.from_string + |> string_builder.replace(each: pattern, with: substitute) + |> string_builder.to_string +} + +/// Creates a new string with all the graphemes in the input string converted to +/// lowercase. +/// +/// Useful for case-insensitive comparisons. +/// +/// ## Examples +/// +/// > lowercase("X-FILES") +/// "x-files" +/// +pub fn lowercase(string: String) -> String { + do_lowercase(string) +} + +if erlang { + external fn do_lowercase(String) -> String = "string" "lowercase" +} - /// Creates a new string with all the graphemes in the input string converted to - /// uppercase. - /// - /// Useful for case-insensitive comparisons and VIRTUAL YELLING. - /// - /// ## Examples - /// - /// > uppercase("skinner") - /// "SKINNER" - /// - pub external fn uppercase(String) -> String = +if javascript { + external fn do_lowercase(String) -> String = + "../gleam_stdlib.js" "string_lowercase" +} + +/// Creates a new string with all the graphemes in the input string converted to +/// uppercase. +/// +/// Useful for case-insensitive comparisons and VIRTUAL YELLING. +/// +/// ## Examples +/// +/// > uppercase("skinner") +/// "SKINNER" +/// +pub fn uppercase(string: String) -> String { + do_uppercase(string) +} + +if erlang { + external fn do_uppercase(String) -> String = "string" "uppercase" +} + +if javascript { + external fn do_uppercase(String) -> String = + "../gleam_stdlib.js" "string_uppercase" +} +if erlang { /// Compares two strings to see which is "larger" by comparing their graphemes. /// /// This does not compare the size or length of the given strings. @@ -463,20 +499,27 @@ if erlang { _ -> [] } } +} - external fn int_to_utf_codepoint(Int) -> UtfCodepoint = +if erlang { + external fn unsafe_int_to_utf_codepoint(Int) -> UtfCodepoint = "gleam_stdlib" "identity" +} - /// Converts an integer to a UtfCodepoint - /// - /// Returns an error if the integer does not represent a valid UTF codepoint. - /// - pub fn utf_codepoint(value: Int) -> Result(UtfCodepoint, Nil) { - case value { - i if i > 1114111 -> Error(Nil) - 65534 | 65535 -> Error(Nil) - i if i >= 55296 && i <= 57343 -> Error(Nil) - i -> Ok(int_to_utf_codepoint(i)) - } +if javascript { + external fn unsafe_int_to_utf_codepoint(Int) -> UtfCodepoint = + "../gleam_stdlib.js" "identity" +} + +/// Converts an integer to a UtfCodepoint +/// +/// Returns an error if the integer does not represent a valid UTF codepoint. +/// +pub fn utf_codepoint(value: Int) -> Result(UtfCodepoint, Nil) { + case value { + i if i > 1114111 -> Error(Nil) + 65534 | 65535 -> Error(Nil) + i if i >= 55296 && i <= 57343 -> Error(Nil) + i -> Ok(unsafe_int_to_utf_codepoint(i)) } } diff --git a/src/gleam/string_builder.gleam b/src/gleam/string_builder.gleam index c09d60b..707521e 100644 --- a/src/gleam/string_builder.gleam +++ b/src/gleam/string_builder.gleam @@ -1,17 +1,17 @@ -if erlang { - /// StringBuilder is a type used for efficiently building strings. - /// - /// When we append one string to another the strings must be copied to a - /// new location in memory so that they can sit together. This behaviour - /// enables efficient reading of the string but copying can be expensive, - /// especially if we want to join many strings together. - /// - /// StringBuilder is different in that it can be joined together in constant time - /// using minimal memory, and then can be efficiently converted to a string - /// using the `to_string` function. - /// - pub external type StringBuilder +/// StringBuilder is a type used for efficiently building strings. +/// +/// When we append one string to another the strings must be copied to a +/// new location in memory so that they can sit together. This behaviour +/// enables efficient reading of the string but copying can be expensive, +/// especially if we want to join many strings together. +/// +/// StringBuilder is different in that it can be joined together in constant time +/// using minimal memory, and then can be efficiently converted to a string +/// using the `to_string` function. +/// +pub external type StringBuilder +if erlang { /// Prepends a String onto the start of some StringBuilder. /// /// Runs in constant time. @@ -59,22 +59,46 @@ if erlang { /// pub external fn concat(List(StringBuilder)) -> StringBuilder = "gleam_stdlib" "identity" +} - /// Converts a string into a builder. - /// - /// Runs in constant time. - /// - pub external fn from_string(String) -> StringBuilder = +/// Converts a string into a builder. +/// +/// Runs in constant time. +/// +pub fn from_string(string: String) -> StringBuilder { + do_from_string(string) +} + +if erlang { + external fn do_from_string(String) -> StringBuilder = "gleam_stdlib" "identity" +} - /// Turns an `StringBuilder` into a `String` - /// - /// This function is implemented natively by the virtual machine and is highly - /// optimised. - /// - pub external fn to_string(StringBuilder) -> String = +if javascript { + external fn do_from_string(String) -> StringBuilder = + "../gleam_stdlib.js" "identity" +} + +/// Turns an `StringBuilder` into a `String` +/// +/// This function is implemented natively by the virtual machine and is highly +/// optimised. +/// +pub fn to_string(builder: StringBuilder) -> String { + do_to_string(builder) +} + +if erlang { + external fn do_to_string(StringBuilder) -> String = "erlang" "iolist_to_binary" +} +if javascript { + external fn do_to_string(StringBuilder) -> String = + "../gleam_stdlib.js" "identity" +} + +if erlang { /// Returns the size of the StringBuilder in bytes. /// pub external fn byte_size(StringBuilder) -> Int = @@ -96,12 +120,25 @@ if erlang { /// pub external fn uppercase(StringBuilder) -> StringBuilder = "string" "uppercase" +} - /// Converts a builder to a new builder with the contents reversed. - /// - pub external fn reverse(StringBuilder) -> StringBuilder = +/// Converts a builder to a new builder with the contents reversed. +/// +pub fn reverse(builder: StringBuilder) -> StringBuilder { + do_reverse(builder) +} + +if erlang { + external fn do_reverse(StringBuilder) -> StringBuilder = "string" "reverse" +} +if javascript { + external fn do_reverse(StringBuilder) -> StringBuilder = + "../gleam_stdlib.js" "string_reverse" +} + +if erlang { type Direction { All } @@ -114,6 +151,26 @@ if erlang { pub fn split(iodata: StringBuilder, on pattern: String) -> List(StringBuilder) { erl_split(iodata, pattern, All) } +} + +/// Replaces all instances of a pattern with a given string substitute. +/// +pub fn replace( + in builder: StringBuilder, + each pattern: String, + with substitute: String, +) -> StringBuilder { + do_replace(builder, pattern, substitute) +} + +if erlang { + fn do_replace( + iodata: StringBuilder, + pattern: String, + substitute: String, + ) -> StringBuilder { + erl_replace(iodata, pattern, substitute, All) + } external fn erl_replace( StringBuilder, @@ -122,17 +179,14 @@ if erlang { Direction, ) -> StringBuilder = "string" "replace" +} - /// Replaces all instances of a pattern with a given string substitute. - /// - pub fn replace( - in iodata: StringBuilder, - each pattern: String, - with substitute: String, - ) -> StringBuilder { - erl_replace(iodata, pattern, substitute, All) - } +if javascript { + external fn do_replace(StringBuilder, String, String) -> StringBuilder = + "../gleam_stdlib.js" "string_replace" +} +if erlang { /// Compares two builders to determine if they have the same textual content. /// /// Comparing two iodata using the `==` operator may return False even if they diff --git a/src/gleam_stdlib.js b/src/gleam_stdlib.js index 960792b..15c8a09 100644 --- a/src/gleam_stdlib.js +++ b/src/gleam_stdlib.js @@ -1,19 +1,47 @@ export function identity(x) { - return x + return x; } export function parse_int(value) { - if (/^[-+]?(\d+)$/.test(value)) { - return { "type": "Ok", "0": Number(value) } - } else { - return { "type": "Error", "0": null } - } + if (/^[-+]?(\d+)$/.test(value)) { + return { type: "Ok", 0: Number(value) }; + } else { + return { type: "Error", 0: null }; + } } export function int_to_string(int) { - return int.toString() + return int.toString(); } export function int_to_base_string(int, base) { - return int.toString(base) -}
\ No newline at end of file + return int.toString(base); +} + +export function string_replace(string, target, substitute) { + return string.replaceAll(target, substitute); +} + +export function string_reverse(string) { + return string.split("").reverse().join(""); +} + +export function string_length(string) { + if (Intl && Intl.Segmenter) { + let i = 0; + for (let _ of new Intl.Segmenter("en-gb").segment(string)) { + i++; + } + return i; + } else { + return string.match(/./gu).length; + } +} + +export function string_lowercase(string) { + return string.toLowerCase(); +} + +export function string_uppercase(string) { + return string.toUpperCase(); +} diff --git a/test/gleam/string_test.gleam b/test/gleam/string_test.gleam index 085e2f9..05a4322 100644 --- a/test/gleam/string_test.gleam +++ b/test/gleam/string_test.gleam @@ -1,34 +1,34 @@ -if erlang { - import gleam/string - import gleam/should - import gleam/order +import gleam/string +import gleam/should +import gleam/order - pub fn length_test() { - string.length("ß↑e̊") - |> should.equal(3) +pub fn length_test() { + string.length("ß↑e̊") + |> should.equal(3) - string.length("Gleam") - |> should.equal(5) + string.length("Gleam") + |> should.equal(5) - string.length("") - |> should.equal(0) - } + string.length("") + |> should.equal(0) +} - pub fn lowercase_test() { - string.lowercase("Gleam") - |> should.equal("gleam") - } +pub fn lowercase_test() { + string.lowercase("Gleam") + |> should.equal("gleam") +} - pub fn uppercase_test() { - string.uppercase("Gleam") - |> should.equal("GLEAM") - } +pub fn uppercase_test() { + string.uppercase("Gleam") + |> should.equal("GLEAM") +} - pub fn reverse_test() { - string.reverse("Gleam") - |> should.equal("maelG") - } +pub fn reverse_test() { + string.reverse("Gleam") + |> should.equal("maelG") +} +if erlang { pub fn split_test() { "Gleam,Erlang,Elixir" |> string.split(",") @@ -52,13 +52,15 @@ if erlang { |> string.split_once(",") |> should.equal(Error(Nil)) } +} - pub fn replace_test() { - "Gleam,Erlang,Elixir" - |> string.replace(",", "++") - |> should.equal("Gleam++Erlang++Elixir") - } +pub fn replace_test() { + "Gleam,Erlang,Elixir" + |> string.replace(",", "++") + |> should.equal("Gleam++Erlang++Elixir") +} +if erlang { pub fn append_test() { "Test" |> string.append(" Me") |