aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLouis Pilfold <louis@lpil.uk>2021-07-21 18:34:57 +0100
committerLouis Pilfold <louis@lpil.uk>2021-07-21 18:34:57 +0100
commit733032f0d8570913f41ccf9498fe82301f2eadc6 (patch)
tree8ada00ea77a11e3ef8f4cdb0d59ca71b0aa1068f
parent363d0a0c95fc1f3c91fe5c1101cac9d848c66fde (diff)
downloadgleam_stdlib-733032f0d8570913f41ccf9498fe82301f2eadc6.tar.gz
gleam_stdlib-733032f0d8570913f41ccf9498fe82301f2eadc6.zip
Convert some string functions
-rw-r--r--src/gleam/string.gleam267
-rw-r--r--src/gleam/string_builder.gleam126
-rw-r--r--src/gleam_stdlib.js46
-rw-r--r--test/gleam/string_test.gleam60
4 files changed, 313 insertions, 186 deletions
diff --git a/src/gleam/string.gleam b/src/gleam/string.gleam
index a4574dc..4701afd 100644
--- a/src/gleam/string.gleam
+++ b/src/gleam/string.gleam
@@ -1,119 +1,155 @@
//// Strings in Gleam are UTF-8 binaries. They can be written in your code a
//// text surrounded by `"double quotes"`.
+import gleam/string_builder
+import gleam/iterator
+import gleam/list
+import gleam/order
+import gleam/result
+
if erlang {
- import gleam/string_builder
import gleam/dynamic.{Dynamic}
- import gleam/iterator
- import gleam/list
- import gleam/order
- import gleam/result
-
- pub type String =
- String
+}
- /// A UtfCodepoint is the integer representation of a valid UTF codepoint
- pub type UtfCodepoint =
- UtfCodepoint
+pub type String =
+ String
+
+/// A UtfCodepoint is the integer representation of a valid UTF codepoint
+pub type UtfCodepoint =
+ UtfCodepoint
+
+/// Determines if a string is empty.
+///
+/// ## Examples
+///
+/// > is_empty("")
+/// True
+///
+/// > is_empty("the world")
+/// False
+///
+pub fn is_empty(str: String) -> Bool {
+ str == ""
+}
- /// Determines if a string is empty.
- ///
- /// ## Examples
- ///
- /// > is_empty("")
- /// True
- ///
- /// > is_empty("the world")
- /// False
- ///
- pub fn is_empty(str: String) -> Bool {
- str == ""
- }
+/// Gets the number of grapheme clusters in a given string.
+///
+/// This function has to iterate across the whole string to count the number of
+/// graphemes, so it runs in linear time.
+///
+/// ## Examples
+///
+/// > length("Gleam")
+/// 5
+///
+/// > length("ß↑e̊")
+/// 3
+///
+/// > length("")
+/// 0
+///
+pub fn length(string: String) -> Int {
+ do_length(string)
+}
- /// Gets the number of grapheme clusters in a given string.
- ///
- /// This function has to iterate across the whole string to count the number of
- /// graphemes, so it runs in linear time.
- ///
- /// ## Examples
- ///
- /// > length("Gleam")
- /// 5
- ///
- /// > length("ß↑e̊")
- /// 3
- ///
- /// > length("")
- /// 0
- ///
- pub external fn length(String) -> Int =
+if erlang {
+ external fn do_length(String) -> Int =
"string" "length"
+}
- ///
- /// Reverses a string.
- ///
- /// This function has to iterate across the whole string so it runs in linear
- /// time.
- ///
- /// ## Examples
- ///
- /// > reverse("stressed")
- /// "desserts"
- ///
- pub fn reverse(string: String) -> String {
- string
- |> string_builder.from_string
- |> string_builder.reverse
- |> string_builder.to_string
- }
+if javascript {
+ external fn do_length(String) -> Int =
+ "../gleam_stdlib.js" "string_length"
+}
- /// Creates a new string by replacing all occurrences of a given substring.
- ///
- /// ## Examples
- ///
- /// > replace("www.example.com", each: ".", with: "-")
- /// "www-example-com"
- ///
- /// > replace("a,b,c,d,e", each: ",", with: "/")
- /// "a/b/c/d/e"
- ///
- pub fn replace(
- in string: String,
- each pattern: String,
- with substitute: String,
- ) -> String {
- string
- |> string_builder.from_string
- |> string_builder.replace(each: pattern, with: substitute)
- |> string_builder.to_string
- }
+///
+/// Reverses a string.
+///
+/// This function has to iterate across the whole string so it runs in linear
+/// time.
+///
+/// ## Examples
+///
+/// > reverse("stressed")
+/// "desserts"
+///
+pub fn reverse(string: String) -> String {
+ string
+ |> string_builder.from_string
+ |> string_builder.reverse
+ |> string_builder.to_string
+}
- /// Creates a new string with all the graphemes in the input string converted to
- /// lowercase.
- ///
- /// Useful for case-insensitive comparisons.
- ///
- /// ## Examples
- ///
- /// > lowercase("X-FILES")
- /// "x-files"
- ///
- pub external fn lowercase(String) -> String =
+/// Creates a new string by replacing all occurrences of a given substring.
+///
+/// ## Examples
+///
+/// > replace("www.example.com", each: ".", with: "-")
+/// "www-example-com"
+///
+/// > replace("a,b,c,d,e", each: ",", with: "/")
+/// "a/b/c/d/e"
+///
+pub fn replace(
+ in string: String,
+ each pattern: String,
+ with substitute: String,
+) -> String {
+ string
+ |> string_builder.from_string
+ |> string_builder.replace(each: pattern, with: substitute)
+ |> string_builder.to_string
+}
+
+/// Creates a new string with all the graphemes in the input string converted to
+/// lowercase.
+///
+/// Useful for case-insensitive comparisons.
+///
+/// ## Examples
+///
+/// > lowercase("X-FILES")
+/// "x-files"
+///
+pub fn lowercase(string: String) -> String {
+ do_lowercase(string)
+}
+
+if erlang {
+ external fn do_lowercase(String) -> String =
"string" "lowercase"
+}
- /// Creates a new string with all the graphemes in the input string converted to
- /// uppercase.
- ///
- /// Useful for case-insensitive comparisons and VIRTUAL YELLING.
- ///
- /// ## Examples
- ///
- /// > uppercase("skinner")
- /// "SKINNER"
- ///
- pub external fn uppercase(String) -> String =
+if javascript {
+ external fn do_lowercase(String) -> String =
+ "../gleam_stdlib.js" "string_lowercase"
+}
+
+/// Creates a new string with all the graphemes in the input string converted to
+/// uppercase.
+///
+/// Useful for case-insensitive comparisons and VIRTUAL YELLING.
+///
+/// ## Examples
+///
+/// > uppercase("skinner")
+/// "SKINNER"
+///
+pub fn uppercase(string: String) -> String {
+ do_uppercase(string)
+}
+
+if erlang {
+ external fn do_uppercase(String) -> String =
"string" "uppercase"
+}
+
+if javascript {
+ external fn do_uppercase(String) -> String =
+ "../gleam_stdlib.js" "string_uppercase"
+}
+if erlang {
/// Compares two strings to see which is "larger" by comparing their graphemes.
///
/// This does not compare the size or length of the given strings.
@@ -463,20 +499,27 @@ if erlang {
_ -> []
}
}
+}
- external fn int_to_utf_codepoint(Int) -> UtfCodepoint =
+if erlang {
+ external fn unsafe_int_to_utf_codepoint(Int) -> UtfCodepoint =
"gleam_stdlib" "identity"
+}
- /// Converts an integer to a UtfCodepoint
- ///
- /// Returns an error if the integer does not represent a valid UTF codepoint.
- ///
- pub fn utf_codepoint(value: Int) -> Result(UtfCodepoint, Nil) {
- case value {
- i if i > 1114111 -> Error(Nil)
- 65534 | 65535 -> Error(Nil)
- i if i >= 55296 && i <= 57343 -> Error(Nil)
- i -> Ok(int_to_utf_codepoint(i))
- }
+if javascript {
+ external fn unsafe_int_to_utf_codepoint(Int) -> UtfCodepoint =
+ "../gleam_stdlib.js" "identity"
+}
+
+/// Converts an integer to a UtfCodepoint
+///
+/// Returns an error if the integer does not represent a valid UTF codepoint.
+///
+pub fn utf_codepoint(value: Int) -> Result(UtfCodepoint, Nil) {
+ case value {
+ i if i > 1114111 -> Error(Nil)
+ 65534 | 65535 -> Error(Nil)
+ i if i >= 55296 && i <= 57343 -> Error(Nil)
+ i -> Ok(unsafe_int_to_utf_codepoint(i))
}
}
diff --git a/src/gleam/string_builder.gleam b/src/gleam/string_builder.gleam
index c09d60b..707521e 100644
--- a/src/gleam/string_builder.gleam
+++ b/src/gleam/string_builder.gleam
@@ -1,17 +1,17 @@
-if erlang {
- /// StringBuilder is a type used for efficiently building strings.
- ///
- /// When we append one string to another the strings must be copied to a
- /// new location in memory so that they can sit together. This behaviour
- /// enables efficient reading of the string but copying can be expensive,
- /// especially if we want to join many strings together.
- ///
- /// StringBuilder is different in that it can be joined together in constant time
- /// using minimal memory, and then can be efficiently converted to a string
- /// using the `to_string` function.
- ///
- pub external type StringBuilder
+/// StringBuilder is a type used for efficiently building strings.
+///
+/// When we append one string to another the strings must be copied to a
+/// new location in memory so that they can sit together. This behaviour
+/// enables efficient reading of the string but copying can be expensive,
+/// especially if we want to join many strings together.
+///
+/// StringBuilder is different in that it can be joined together in constant time
+/// using minimal memory, and then can be efficiently converted to a string
+/// using the `to_string` function.
+///
+pub external type StringBuilder
+if erlang {
/// Prepends a String onto the start of some StringBuilder.
///
/// Runs in constant time.
@@ -59,22 +59,46 @@ if erlang {
///
pub external fn concat(List(StringBuilder)) -> StringBuilder =
"gleam_stdlib" "identity"
+}
- /// Converts a string into a builder.
- ///
- /// Runs in constant time.
- ///
- pub external fn from_string(String) -> StringBuilder =
+/// Converts a string into a builder.
+///
+/// Runs in constant time.
+///
+pub fn from_string(string: String) -> StringBuilder {
+ do_from_string(string)
+}
+
+if erlang {
+ external fn do_from_string(String) -> StringBuilder =
"gleam_stdlib" "identity"
+}
- /// Turns an `StringBuilder` into a `String`
- ///
- /// This function is implemented natively by the virtual machine and is highly
- /// optimised.
- ///
- pub external fn to_string(StringBuilder) -> String =
+if javascript {
+ external fn do_from_string(String) -> StringBuilder =
+ "../gleam_stdlib.js" "identity"
+}
+
+/// Turns an `StringBuilder` into a `String`
+///
+/// This function is implemented natively by the virtual machine and is highly
+/// optimised.
+///
+pub fn to_string(builder: StringBuilder) -> String {
+ do_to_string(builder)
+}
+
+if erlang {
+ external fn do_to_string(StringBuilder) -> String =
"erlang" "iolist_to_binary"
+}
+if javascript {
+ external fn do_to_string(StringBuilder) -> String =
+ "../gleam_stdlib.js" "identity"
+}
+
+if erlang {
/// Returns the size of the StringBuilder in bytes.
///
pub external fn byte_size(StringBuilder) -> Int =
@@ -96,12 +120,25 @@ if erlang {
///
pub external fn uppercase(StringBuilder) -> StringBuilder =
"string" "uppercase"
+}
- /// Converts a builder to a new builder with the contents reversed.
- ///
- pub external fn reverse(StringBuilder) -> StringBuilder =
+/// Converts a builder to a new builder with the contents reversed.
+///
+pub fn reverse(builder: StringBuilder) -> StringBuilder {
+ do_reverse(builder)
+}
+
+if erlang {
+ external fn do_reverse(StringBuilder) -> StringBuilder =
"string" "reverse"
+}
+if javascript {
+ external fn do_reverse(StringBuilder) -> StringBuilder =
+ "../gleam_stdlib.js" "string_reverse"
+}
+
+if erlang {
type Direction {
All
}
@@ -114,6 +151,26 @@ if erlang {
pub fn split(iodata: StringBuilder, on pattern: String) -> List(StringBuilder) {
erl_split(iodata, pattern, All)
}
+}
+
+/// Replaces all instances of a pattern with a given string substitute.
+///
+pub fn replace(
+ in builder: StringBuilder,
+ each pattern: String,
+ with substitute: String,
+) -> StringBuilder {
+ do_replace(builder, pattern, substitute)
+}
+
+if erlang {
+ fn do_replace(
+ iodata: StringBuilder,
+ pattern: String,
+ substitute: String,
+ ) -> StringBuilder {
+ erl_replace(iodata, pattern, substitute, All)
+ }
external fn erl_replace(
StringBuilder,
@@ -122,17 +179,14 @@ if erlang {
Direction,
) -> StringBuilder =
"string" "replace"
+}
- /// Replaces all instances of a pattern with a given string substitute.
- ///
- pub fn replace(
- in iodata: StringBuilder,
- each pattern: String,
- with substitute: String,
- ) -> StringBuilder {
- erl_replace(iodata, pattern, substitute, All)
- }
+if javascript {
+ external fn do_replace(StringBuilder, String, String) -> StringBuilder =
+ "../gleam_stdlib.js" "string_replace"
+}
+if erlang {
/// Compares two builders to determine if they have the same textual content.
///
/// Comparing two iodata using the `==` operator may return False even if they
diff --git a/src/gleam_stdlib.js b/src/gleam_stdlib.js
index 960792b..15c8a09 100644
--- a/src/gleam_stdlib.js
+++ b/src/gleam_stdlib.js
@@ -1,19 +1,47 @@
export function identity(x) {
- return x
+ return x;
}
export function parse_int(value) {
- if (/^[-+]?(\d+)$/.test(value)) {
- return { "type": "Ok", "0": Number(value) }
- } else {
- return { "type": "Error", "0": null }
- }
+ if (/^[-+]?(\d+)$/.test(value)) {
+ return { type: "Ok", 0: Number(value) };
+ } else {
+ return { type: "Error", 0: null };
+ }
}
export function int_to_string(int) {
- return int.toString()
+ return int.toString();
}
export function int_to_base_string(int, base) {
- return int.toString(base)
-} \ No newline at end of file
+ return int.toString(base);
+}
+
+export function string_replace(string, target, substitute) {
+ return string.replaceAll(target, substitute);
+}
+
+export function string_reverse(string) {
+ return string.split("").reverse().join("");
+}
+
+export function string_length(string) {
+ if (Intl && Intl.Segmenter) {
+ let i = 0;
+ for (let _ of new Intl.Segmenter("en-gb").segment(string)) {
+ i++;
+ }
+ return i;
+ } else {
+ return string.match(/./gu).length;
+ }
+}
+
+export function string_lowercase(string) {
+ return string.toLowerCase();
+}
+
+export function string_uppercase(string) {
+ return string.toUpperCase();
+}
diff --git a/test/gleam/string_test.gleam b/test/gleam/string_test.gleam
index 085e2f9..05a4322 100644
--- a/test/gleam/string_test.gleam
+++ b/test/gleam/string_test.gleam
@@ -1,34 +1,34 @@
-if erlang {
- import gleam/string
- import gleam/should
- import gleam/order
+import gleam/string
+import gleam/should
+import gleam/order
- pub fn length_test() {
- string.length("ß↑e̊")
- |> should.equal(3)
+pub fn length_test() {
+ string.length("ß↑e̊")
+ |> should.equal(3)
- string.length("Gleam")
- |> should.equal(5)
+ string.length("Gleam")
+ |> should.equal(5)
- string.length("")
- |> should.equal(0)
- }
+ string.length("")
+ |> should.equal(0)
+}
- pub fn lowercase_test() {
- string.lowercase("Gleam")
- |> should.equal("gleam")
- }
+pub fn lowercase_test() {
+ string.lowercase("Gleam")
+ |> should.equal("gleam")
+}
- pub fn uppercase_test() {
- string.uppercase("Gleam")
- |> should.equal("GLEAM")
- }
+pub fn uppercase_test() {
+ string.uppercase("Gleam")
+ |> should.equal("GLEAM")
+}
- pub fn reverse_test() {
- string.reverse("Gleam")
- |> should.equal("maelG")
- }
+pub fn reverse_test() {
+ string.reverse("Gleam")
+ |> should.equal("maelG")
+}
+if erlang {
pub fn split_test() {
"Gleam,Erlang,Elixir"
|> string.split(",")
@@ -52,13 +52,15 @@ if erlang {
|> string.split_once(",")
|> should.equal(Error(Nil))
}
+}
- pub fn replace_test() {
- "Gleam,Erlang,Elixir"
- |> string.replace(",", "++")
- |> should.equal("Gleam++Erlang++Elixir")
- }
+pub fn replace_test() {
+ "Gleam,Erlang,Elixir"
+ |> string.replace(",", "++")
+ |> should.equal("Gleam++Erlang++Elixir")
+}
+if erlang {
pub fn append_test() {
"Test"
|> string.append(" Me")