aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGiacomo Cavalieri <giacomo.cavalieri@icloud.com>2024-11-12 18:54:54 +0100
committerLouis Pilfold <louis@lpil.uk>2024-11-15 15:22:34 +0000
commit5fced08f04dd7c2dad7c644b1333926a9126d7c9 (patch)
treeea6cca57245fb6022f433ac7b9886a3979ebe2c9
parent32f29ae21f8466f431ac92a264e9b6482bc63c1c (diff)
downloadgleam_stdlib-5fced08f04dd7c2dad7c644b1333926a9126d7c9.tar.gz
gleam_stdlib-5fced08f04dd7c2dad7c644b1333926a9126d7c9.zip
add string and bytes tree modules
-rw-r--r--src/gleam/bytes_tree.gleam186
-rw-r--r--src/gleam/string_tree.gleam248
-rw-r--r--test/gleam/bytes_tree_test.gleam94
-rw-r--r--test/gleam/string_tree_test.gleam149
4 files changed, 677 insertions, 0 deletions
diff --git a/src/gleam/bytes_tree.gleam b/src/gleam/bytes_tree.gleam
new file mode 100644
index 0000000..f5b5f8b
--- /dev/null
+++ b/src/gleam/bytes_tree.gleam
@@ -0,0 +1,186 @@
+//// `BytesTree` is a type used for efficiently building binary content to be
+//// written to a file or a socket. Internally it is represented as tree so to
+//// append or prepend to a bytes tree is a constant time operation that
+//// allocates a new node in the tree without copying any of the content. When
+//// writing to an output stream the tree is traversed and the content is sent
+//// directly rather than copying it into a single buffer beforehand.
+////
+//// If we append one bit array to another the bit arrays must be copied to a
+//// new location in memory so that they can sit together. This behaviour
+//// enables efficient reading of the data but copying can be expensive,
+//// especially if we want to join many bit arrays together.
+////
+//// BytesTree is different in that it can be joined together in constant
+//// time using minimal memory, and then can be efficiently converted to a
+//// bit array using the `to_bit_array` function.
+////
+//// Byte trees are always byte aligned, so that a number of bits that is not
+//// divisible by 8 will be padded with 0s.
+////
+//// On Erlang this type is compatible with Erlang's iolists.
+
+// TODO: pad bit arrays to byte boundaries when adding to a tree.
+import gleam/bit_array
+import gleam/list
+import gleam/string_tree.{type StringTree}
+
+pub opaque type BytesTree {
+ Bytes(BitArray)
+ Text(StringTree)
+ Many(List(BytesTree))
+}
+
+/// Create an empty `BytesTree`. Useful as the start of a pipe chaining many
+/// trees together.
+///
+pub fn new() -> BytesTree {
+ concat([])
+}
+
+/// Prepends a bit array to the start of a bytes tree.
+///
+/// Runs in constant time.
+///
+pub fn prepend(to second: BytesTree, prefix first: BitArray) -> BytesTree {
+ append_tree(from_bit_array(first), second)
+}
+
+/// Appends a bit array to the end of a bytes tree.
+///
+/// Runs in constant time.
+///
+pub fn append(to first: BytesTree, suffix second: BitArray) -> BytesTree {
+ append_tree(first, from_bit_array(second))
+}
+
+/// Prepends a bytes tree onto the start of another.
+///
+/// Runs in constant time.
+///
+pub fn prepend_tree(to second: BytesTree, prefix first: BytesTree) -> BytesTree {
+ append_tree(first, second)
+}
+
+/// Appends a bytes tree onto the end of another.
+///
+/// Runs in constant time.
+///
+@external(erlang, "gleam_stdlib", "iodata_append")
+pub fn append_tree(to first: BytesTree, suffix second: BytesTree) -> BytesTree {
+ case second {
+ Many(trees) -> Many([first, ..trees])
+ _ -> Many([first, second])
+ }
+}
+
+/// Prepends a string onto the start of a bytes tree.
+///
+/// Runs in constant time when running on Erlang.
+/// Runs in linear time with the length of the string otherwise.
+///
+pub fn prepend_string(to second: BytesTree, prefix first: String) -> BytesTree {
+ append_tree(from_string(first), second)
+}
+
+/// Appends a string onto the end of a bytes tree.
+///
+/// Runs in constant time when running on Erlang.
+/// Runs in linear time with the length of the string otherwise.
+///
+pub fn append_string(to first: BytesTree, suffix second: String) -> BytesTree {
+ append_tree(first, from_string(second))
+}
+
+/// Joins a list of bytes trees into a single one.
+///
+/// Runs in constant time.
+///
+@external(erlang, "gleam_stdlib", "identity")
+pub fn concat(trees: List(BytesTree)) -> BytesTree {
+ Many(trees)
+}
+
+/// Joins a list of bit arrays into a single bytes tree.
+///
+/// Runs in constant time.
+///
+@external(erlang, "gleam_stdlib", "identity")
+pub fn concat_bit_arrays(bits: List(BitArray)) -> BytesTree {
+ bits
+ |> list.map(fn(b) { from_bit_array(b) })
+ |> concat()
+}
+
+/// Creates a new bytes tree from a string.
+///
+/// Runs in constant time when running on Erlang.
+/// Runs in linear time otherwise.
+///
+@external(erlang, "gleam_stdlib", "wrap_list")
+pub fn from_string(string: String) -> BytesTree {
+ Text(string_tree.from_string(string))
+}
+
+/// Creates a new bytes tree from a string tree.
+///
+/// Runs in constant time when running on Erlang.
+/// Runs in linear time otherwise.
+///
+@external(erlang, "gleam_stdlib", "wrap_list")
+pub fn from_string_tree(tree: string_tree.StringTree) -> BytesTree {
+ Text(tree)
+}
+
+/// Creates a new bytes tree from a bit array.
+///
+/// Runs in constant time.
+///
+@external(erlang, "gleam_stdlib", "wrap_list")
+pub fn from_bit_array(bits: BitArray) -> BytesTree {
+ Bytes(bits)
+}
+
+/// Turns a bytes tree into a bit array.
+///
+/// Runs in linear time.
+///
+/// When running on Erlang this function is implemented natively by the
+/// virtual machine and is highly optimised.
+///
+@external(erlang, "erlang", "list_to_bitstring")
+pub fn to_bit_array(tree: BytesTree) -> BitArray {
+ [[tree]]
+ |> to_list([])
+ |> list.reverse
+ |> bit_array.concat
+}
+
+fn to_list(stack: List(List(BytesTree)), acc: List(BitArray)) -> List(BitArray) {
+ case stack {
+ [] -> acc
+
+ [[], ..remaining_stack] -> to_list(remaining_stack, acc)
+
+ [[Bytes(bits), ..rest], ..remaining_stack] ->
+ to_list([rest, ..remaining_stack], [bits, ..acc])
+
+ [[Text(tree), ..rest], ..remaining_stack] -> {
+ let bits = bit_array.from_string(string_tree.to_string(tree))
+ to_list([rest, ..remaining_stack], [bits, ..acc])
+ }
+
+ [[Many(trees), ..rest], ..remaining_stack] ->
+ to_list([trees, rest, ..remaining_stack], acc)
+ }
+}
+
+/// Returns the size of the bytes tree's content in bytes.
+///
+/// Runs in linear time.
+///
+@external(erlang, "erlang", "iolist_size")
+pub fn byte_size(tree: BytesTree) -> Int {
+ [[tree]]
+ |> to_list([])
+ |> list.fold(0, fn(acc, bits) { bit_array.byte_size(bits) + acc })
+}
diff --git a/src/gleam/string_tree.gleam b/src/gleam/string_tree.gleam
new file mode 100644
index 0000000..9aeb367
--- /dev/null
+++ b/src/gleam/string_tree.gleam
@@ -0,0 +1,248 @@
+import gleam/list
+
+/// `StringTree` is a type used for efficiently building text content to be
+/// written to a file or a socket. Internally it is represented as tree so to
+/// append or prepend to a string tree is a constant time operation that
+/// allocates a new node in the tree without copying any of the content. When
+/// writing to an output stream the tree is traversed and the content is sent
+/// directly rather than copying it into a single buffer beforehand.
+///
+/// On Erlang this type is compatible with Erlang's iodata. On JavaScript this
+/// type is compatible with normal strings.
+///
+/// The BEAM virtual machine has an optimisation for appending strings, where it
+/// will mutate the string buffer when safe to do so, so if you are looking to
+/// build a string through appending many small strings then you may get better
+/// performance by not using a string tree. Always benchmark your performance
+/// sensitive code.
+///
+pub type StringTree
+
+/// Create an empty `StringTree`. Useful as the start of a pipe chaining many
+/// trees together.
+///
+pub fn new() -> StringTree {
+ do_from_strings([])
+}
+
+/// Prepends a `String` onto the start of some `StringTree`.
+///
+/// Runs in constant time.
+///
+pub fn prepend(to tree: StringTree, prefix prefix: String) -> StringTree {
+ append_tree(from_string(prefix), tree)
+}
+
+/// Appends a `String` onto the end of some `StringTree`.
+///
+/// Runs in constant time.
+///
+pub fn append(to tree: StringTree, suffix second: String) -> StringTree {
+ append_tree(tree, from_string(second))
+}
+
+/// Prepends some `StringTree` onto the start of another.
+///
+/// Runs in constant time.
+///
+pub fn prepend_tree(
+ to tree: StringTree,
+ prefix prefix: StringTree,
+) -> StringTree {
+ do_append(prefix, tree)
+}
+
+/// Appends some `StringTree` onto the end of another.
+///
+/// Runs in constant time.
+///
+pub fn append_tree(to tree: StringTree, suffix suffix: StringTree) -> StringTree {
+ do_append(tree, suffix)
+}
+
+@external(erlang, "gleam_stdlib", "iodata_append")
+@external(javascript, "../gleam_stdlib.mjs", "add")
+fn do_append(a: StringTree, b: StringTree) -> StringTree
+
+/// Converts a list of strings into a `StringTree`.
+///
+/// Runs in constant time.
+///
+pub fn from_strings(strings: List(String)) -> StringTree {
+ do_from_strings(strings)
+}
+
+@external(erlang, "gleam_stdlib", "identity")
+@external(javascript, "../gleam_stdlib.mjs", "concat")
+fn do_from_strings(a: List(String)) -> StringTree
+
+/// Joins a list of trees into a single tree.
+///
+/// Runs in constant time.
+///
+pub fn concat(trees: List(StringTree)) -> StringTree {
+ do_concat(trees)
+}
+
+@external(erlang, "gleam_stdlib", "identity")
+@external(javascript, "../gleam_stdlib.mjs", "concat")
+fn do_concat(trees: List(StringTree)) -> StringTree
+
+/// Converts a string into a `StringTree`.
+///
+/// Runs in constant time.
+///
+pub fn from_string(string: String) -> StringTree {
+ do_from_string(string)
+}
+
+@external(erlang, "gleam_stdlib", "identity")
+@external(javascript, "../gleam_stdlib.mjs", "identity")
+fn do_from_string(string: String) -> StringTree
+
+/// Turns a `StringTree` into a `String`
+///
+/// This function is implemented natively by the virtual machine and is highly
+/// optimised.
+///
+pub fn to_string(tree: StringTree) -> String {
+ do_to_string(tree)
+}
+
+@external(erlang, "unicode", "characters_to_binary")
+@external(javascript, "../gleam_stdlib.mjs", "identity")
+fn do_to_string(tree: StringTree) -> String
+
+/// Returns the size of the `StringTree` in bytes.
+///
+pub fn byte_size(tree: StringTree) -> Int {
+ do_byte_size(tree)
+}
+
+@external(erlang, "erlang", "iolist_size")
+@external(javascript, "../gleam_stdlib.mjs", "length")
+fn do_byte_size(tree: StringTree) -> Int
+
+/// Joins the given trees into a new tree separated with the given string.
+///
+pub fn join(trees: List(StringTree), with sep: String) -> StringTree {
+ trees
+ |> list.intersperse(from_string(sep))
+ |> concat
+}
+
+/// Converts a `StringTree` to a new one where the contents have been
+/// lowercased.
+///
+pub fn lowercase(tree: StringTree) -> StringTree {
+ do_lowercase(tree)
+}
+
+@external(erlang, "string", "lowercase")
+@external(javascript, "../gleam_stdlib.mjs", "lowercase")
+fn do_lowercase(tree: StringTree) -> StringTree
+
+/// Converts a `StringTree` to a new one where the contents have been
+/// uppercased.
+///
+pub fn uppercase(tree: StringTree) -> StringTree {
+ do_uppercase(tree)
+}
+
+@external(erlang, "string", "uppercase")
+@external(javascript, "../gleam_stdlib.mjs", "uppercase")
+fn do_uppercase(tree: StringTree) -> StringTree
+
+/// Converts a `StringTree` to a new one with the contents reversed.
+///
+pub fn reverse(tree: StringTree) -> StringTree {
+ do_reverse(tree)
+}
+
+@external(erlang, "string", "reverse")
+fn do_reverse(tree: StringTree) -> StringTree {
+ tree
+ |> to_string
+ |> do_to_graphemes
+ |> list.reverse
+ |> from_strings
+}
+
+@external(javascript, "../gleam_stdlib.mjs", "graphemes")
+fn do_to_graphemes(string: String) -> List(String)
+
+/// Splits a `StringTree` on a given pattern into a list of trees.
+///
+pub fn split(tree: StringTree, on pattern: String) -> List(StringTree) {
+ do_split(tree, pattern)
+}
+
+type Direction {
+ All
+}
+
+@external(javascript, "../gleam_stdlib.mjs", "split")
+fn do_split(tree: StringTree, pattern: String) -> List(StringTree) {
+ erl_split(tree, pattern, All)
+}
+
+@external(erlang, "string", "split")
+fn erl_split(a: StringTree, b: String, c: Direction) -> List(StringTree)
+
+/// Replaces all instances of a pattern with a given string substitute.
+///
+@external(erlang, "gleam_stdlib", "string_replace")
+@external(javascript, "../gleam_stdlib.mjs", "string_replace")
+pub fn replace(
+ in tree: StringTree,
+ each pattern: String,
+ with substitute: String,
+) -> StringTree
+
+/// Compares two string trees to determine if they have the same textual
+/// content.
+///
+/// Comparing two string trees using the `==` operator may return `False` even
+/// if they have the same content as they may have been build in different ways,
+/// so using this function is often preferred.
+///
+/// ## Examples
+///
+/// ```gleam
+/// from_strings(["a", "b"]) == from_string("ab")
+/// // -> False
+/// ```
+///
+/// ```gleam
+/// is_equal(from_strings(["a", "b"]), from_string("ab"))
+/// // -> True
+/// ```
+///
+@external(erlang, "string", "equal")
+pub fn is_equal(a: StringTree, b: StringTree) -> Bool {
+ a == b
+}
+
+/// Inspects a `StringTree` to determine if it is equivalent to an empty string.
+///
+/// ## Examples
+///
+/// ```gleam
+/// from_string("ok") |> is_empty
+/// // -> False
+/// ```
+///
+/// ```gleam
+/// from_string("") |> is_empty
+/// // -> True
+/// ```
+///
+/// ```gleam
+/// from_strings([]) |> is_empty
+/// // -> True
+/// ```
+///
+@external(erlang, "string", "is_empty")
+pub fn is_empty(tree: StringTree) -> Bool {
+ from_string("") == tree
+}
diff --git a/test/gleam/bytes_tree_test.gleam b/test/gleam/bytes_tree_test.gleam
new file mode 100644
index 0000000..1f7245a
--- /dev/null
+++ b/test/gleam/bytes_tree_test.gleam
@@ -0,0 +1,94 @@
+import gleam/bytes_tree
+import gleam/should
+import gleam/string_tree
+
+pub fn tree_test() {
+ let data =
+ bytes_tree.from_bit_array(<<1>>)
+ |> bytes_tree.append(<<2>>)
+ |> bytes_tree.append(<<3>>)
+ |> bytes_tree.prepend(<<0>>)
+
+ data
+ |> bytes_tree.to_bit_array
+ |> should.equal(<<0, 1, 2, 3>>)
+
+ data
+ |> bytes_tree.byte_size
+ |> should.equal(4)
+}
+
+pub fn tree_with_strings_test() {
+ let data =
+ bytes_tree.from_bit_array(<<1>>)
+ |> bytes_tree.append_string("2")
+ |> bytes_tree.append_string("3")
+ |> bytes_tree.prepend_string("0")
+
+ data
+ |> bytes_tree.to_bit_array
+ |> should.equal(<<"0":utf8, 1, "2":utf8, "3":utf8>>)
+
+ data
+ |> bytes_tree.byte_size
+ |> should.equal(4)
+}
+
+pub fn tree_with_trees_test() {
+ let data =
+ bytes_tree.from_bit_array(<<1>>)
+ |> bytes_tree.append_tree(bytes_tree.from_bit_array(<<2>>))
+ |> bytes_tree.append_tree(bytes_tree.from_bit_array(<<3>>))
+ |> bytes_tree.prepend_tree(bytes_tree.from_bit_array(<<0>>))
+
+ data
+ |> bytes_tree.to_bit_array
+ |> should.equal(<<0, 1, 2, 3>>)
+
+ data
+ |> bytes_tree.byte_size
+ |> should.equal(4)
+}
+
+pub fn concat_test() {
+ [
+ bytes_tree.from_bit_array(<<1, 2>>),
+ bytes_tree.from_bit_array(<<3, 4>>),
+ bytes_tree.from_bit_array(<<5, 6>>),
+ ]
+ |> bytes_tree.concat
+ |> bytes_tree.to_bit_array
+ |> should.equal(<<1, 2, 3, 4, 5, 6>>)
+}
+
+pub fn concat_bit_arrays_test() {
+ bytes_tree.concat_bit_arrays([<<"h":utf8>>, <<"e":utf8>>, <<"y":utf8>>])
+ |> bytes_tree.to_bit_array
+ |> should.equal(<<"hey":utf8>>)
+}
+
+pub fn from_bit_array() {
+ // Regression test: no additional modification of the tree
+ bytes_tree.from_bit_array(<<>>)
+ |> bytes_tree.to_bit_array
+ |> should.equal(<<>>)
+}
+
+pub fn from_string_test() {
+ // Regression test: no additional modification of the tree
+ bytes_tree.from_string("")
+ |> bytes_tree.to_bit_array
+ |> should.equal(<<>>)
+}
+
+pub fn new_test() {
+ bytes_tree.new()
+ |> bytes_tree.to_bit_array
+ |> should.equal(<<>>)
+}
+
+pub fn from_string_tree_test() {
+ string_tree.from_string("hello")
+ |> bytes_tree.from_string_tree
+ |> should.equal(bytes_tree.from_string("hello"))
+}
diff --git a/test/gleam/string_tree_test.gleam b/test/gleam/string_tree_test.gleam
new file mode 100644
index 0000000..f2f14e3
--- /dev/null
+++ b/test/gleam/string_tree_test.gleam
@@ -0,0 +1,149 @@
+import gleam/should
+import gleam/string_tree
+
+pub fn string_tree_test() {
+ let data =
+ string_tree.from_string("ello")
+ |> string_tree.append(",")
+ |> string_tree.append(" world!")
+ |> string_tree.prepend("H")
+
+ data
+ |> string_tree.to_string
+ |> should.equal("Hello, world!")
+
+ data
+ |> string_tree.byte_size
+ |> should.equal(13)
+
+ let data =
+ string_tree.from_string("ello")
+ |> string_tree.append_tree(string_tree.from_string(","))
+ |> string_tree.append_tree(
+ string_tree.concat([
+ string_tree.from_string(" wo"),
+ string_tree.from_string("rld!"),
+ ]),
+ )
+ |> string_tree.prepend_tree(string_tree.from_string("H"))
+
+ data
+ |> string_tree.to_string
+ |> should.equal("Hello, world!")
+
+ data
+ |> string_tree.byte_size
+ |> should.equal(13)
+}
+
+pub fn reverse_test() {
+ "Ĺo͂řȩm̅"
+ |> string_tree.from_string
+ |> string_tree.reverse
+ |> string_tree.reverse
+ |> string_tree.to_string
+ |> should.equal("Ĺo͂řȩm̅")
+
+ "Ĺo͂řȩm̅"
+ |> string_tree.from_string
+ |> string_tree.reverse
+ |> string_tree.to_string
+ |> should.equal("m̅ȩřo͂Ĺ")
+
+ "👶🏿"
+ |> string_tree.from_string
+ |> string_tree.reverse
+ |> string_tree.reverse
+ |> string_tree.to_string
+ |> should.equal("👶🏿")
+
+ "👶🏿"
+ |> string_tree.from_string
+ |> string_tree.reverse
+ |> string_tree.to_string
+ |> should.equal("👶🏿")
+}
+
+pub fn lowercase_test() {
+ ["Gleam", "Gleam"]
+ |> string_tree.from_strings
+ |> string_tree.lowercase
+ |> string_tree.to_string
+ |> should.equal("gleamgleam")
+}
+
+pub fn uppercase_test() {
+ ["Gleam", "Gleam"]
+ |> string_tree.from_strings
+ |> string_tree.uppercase
+ |> string_tree.to_string
+ |> should.equal("GLEAMGLEAM")
+}
+
+pub fn split_test() {
+ "Gleam,Erlang,Elixir"
+ |> string_tree.from_string
+ |> string_tree.split(",")
+ |> should.equal([
+ string_tree.from_string("Gleam"),
+ string_tree.from_string("Erlang"),
+ string_tree.from_string("Elixir"),
+ ])
+
+ ["Gleam, Erl", "ang,Elixir"]
+ |> string_tree.from_strings
+ |> string_tree.split(", ")
+ |> should.equal([
+ string_tree.from_string("Gleam"),
+ string_tree.from_strings(["Erl", "ang,Elixir"]),
+ ])
+}
+
+pub fn is_equal_test() {
+ string_tree.from_string("12")
+ |> string_tree.is_equal(string_tree.from_strings(["1", "2"]))
+ |> should.be_true
+
+ string_tree.from_string("12")
+ |> string_tree.is_equal(string_tree.from_string("12"))
+ |> should.be_true
+
+ string_tree.from_string("12")
+ |> string_tree.is_equal(string_tree.from_string("2"))
+ |> should.be_false
+}
+
+pub fn is_empty_test() {
+ string_tree.from_string("")
+ |> string_tree.is_empty
+ |> should.be_true
+
+ string_tree.from_string("12")
+ |> string_tree.is_empty
+ |> should.be_false
+
+ string_tree.from_strings([])
+ |> string_tree.is_empty
+ |> should.be_true
+
+ string_tree.from_strings(["", ""])
+ |> string_tree.is_empty
+ |> should.be_true
+}
+
+pub fn new_test() {
+ string_tree.new()
+ |> string_tree.to_string
+ |> should.equal("")
+}
+
+pub fn join_test() {
+ [
+ string_tree.from_string("Gleam"),
+ string_tree.from_string("Elixir"),
+ string_tree.from_string("Erlang"),
+ ]
+ |> string_tree.join(", ")
+ |> string_tree.to_string
+ |> should.equal("Gleam, Elixir, Erlang")
+}