diff options
author | Giacomo Cavalieri <giacomo.cavalieri@icloud.com> | 2024-11-12 18:54:54 +0100 |
---|---|---|
committer | Louis Pilfold <louis@lpil.uk> | 2024-11-15 15:22:34 +0000 |
commit | 5fced08f04dd7c2dad7c644b1333926a9126d7c9 (patch) | |
tree | ea6cca57245fb6022f433ac7b9886a3979ebe2c9 /src | |
parent | 32f29ae21f8466f431ac92a264e9b6482bc63c1c (diff) | |
download | gleam_stdlib-5fced08f04dd7c2dad7c644b1333926a9126d7c9.tar.gz gleam_stdlib-5fced08f04dd7c2dad7c644b1333926a9126d7c9.zip |
add string and bytes tree modules
Diffstat (limited to 'src')
-rw-r--r-- | src/gleam/bytes_tree.gleam | 186 | ||||
-rw-r--r-- | src/gleam/string_tree.gleam | 248 |
2 files changed, 434 insertions, 0 deletions
diff --git a/src/gleam/bytes_tree.gleam b/src/gleam/bytes_tree.gleam new file mode 100644 index 0000000..f5b5f8b --- /dev/null +++ b/src/gleam/bytes_tree.gleam @@ -0,0 +1,186 @@ +//// `BytesTree` is a type used for efficiently building binary content to be +//// written to a file or a socket. Internally it is represented as tree so to +//// append or prepend to a bytes tree is a constant time operation that +//// allocates a new node in the tree without copying any of the content. When +//// writing to an output stream the tree is traversed and the content is sent +//// directly rather than copying it into a single buffer beforehand. +//// +//// If we append one bit array to another the bit arrays must be copied to a +//// new location in memory so that they can sit together. This behaviour +//// enables efficient reading of the data but copying can be expensive, +//// especially if we want to join many bit arrays together. +//// +//// BytesTree is different in that it can be joined together in constant +//// time using minimal memory, and then can be efficiently converted to a +//// bit array using the `to_bit_array` function. +//// +//// Byte trees are always byte aligned, so that a number of bits that is not +//// divisible by 8 will be padded with 0s. +//// +//// On Erlang this type is compatible with Erlang's iolists. + +// TODO: pad bit arrays to byte boundaries when adding to a tree. +import gleam/bit_array +import gleam/list +import gleam/string_tree.{type StringTree} + +pub opaque type BytesTree { + Bytes(BitArray) + Text(StringTree) + Many(List(BytesTree)) +} + +/// Create an empty `BytesTree`. Useful as the start of a pipe chaining many +/// trees together. +/// +pub fn new() -> BytesTree { + concat([]) +} + +/// Prepends a bit array to the start of a bytes tree. +/// +/// Runs in constant time. +/// +pub fn prepend(to second: BytesTree, prefix first: BitArray) -> BytesTree { + append_tree(from_bit_array(first), second) +} + +/// Appends a bit array to the end of a bytes tree. +/// +/// Runs in constant time. +/// +pub fn append(to first: BytesTree, suffix second: BitArray) -> BytesTree { + append_tree(first, from_bit_array(second)) +} + +/// Prepends a bytes tree onto the start of another. +/// +/// Runs in constant time. +/// +pub fn prepend_tree(to second: BytesTree, prefix first: BytesTree) -> BytesTree { + append_tree(first, second) +} + +/// Appends a bytes tree onto the end of another. +/// +/// Runs in constant time. +/// +@external(erlang, "gleam_stdlib", "iodata_append") +pub fn append_tree(to first: BytesTree, suffix second: BytesTree) -> BytesTree { + case second { + Many(trees) -> Many([first, ..trees]) + _ -> Many([first, second]) + } +} + +/// Prepends a string onto the start of a bytes tree. +/// +/// Runs in constant time when running on Erlang. +/// Runs in linear time with the length of the string otherwise. +/// +pub fn prepend_string(to second: BytesTree, prefix first: String) -> BytesTree { + append_tree(from_string(first), second) +} + +/// Appends a string onto the end of a bytes tree. +/// +/// Runs in constant time when running on Erlang. +/// Runs in linear time with the length of the string otherwise. +/// +pub fn append_string(to first: BytesTree, suffix second: String) -> BytesTree { + append_tree(first, from_string(second)) +} + +/// Joins a list of bytes trees into a single one. +/// +/// Runs in constant time. +/// +@external(erlang, "gleam_stdlib", "identity") +pub fn concat(trees: List(BytesTree)) -> BytesTree { + Many(trees) +} + +/// Joins a list of bit arrays into a single bytes tree. +/// +/// Runs in constant time. +/// +@external(erlang, "gleam_stdlib", "identity") +pub fn concat_bit_arrays(bits: List(BitArray)) -> BytesTree { + bits + |> list.map(fn(b) { from_bit_array(b) }) + |> concat() +} + +/// Creates a new bytes tree from a string. +/// +/// Runs in constant time when running on Erlang. +/// Runs in linear time otherwise. +/// +@external(erlang, "gleam_stdlib", "wrap_list") +pub fn from_string(string: String) -> BytesTree { + Text(string_tree.from_string(string)) +} + +/// Creates a new bytes tree from a string tree. +/// +/// Runs in constant time when running on Erlang. +/// Runs in linear time otherwise. +/// +@external(erlang, "gleam_stdlib", "wrap_list") +pub fn from_string_tree(tree: string_tree.StringTree) -> BytesTree { + Text(tree) +} + +/// Creates a new bytes tree from a bit array. +/// +/// Runs in constant time. +/// +@external(erlang, "gleam_stdlib", "wrap_list") +pub fn from_bit_array(bits: BitArray) -> BytesTree { + Bytes(bits) +} + +/// Turns a bytes tree into a bit array. +/// +/// Runs in linear time. +/// +/// When running on Erlang this function is implemented natively by the +/// virtual machine and is highly optimised. +/// +@external(erlang, "erlang", "list_to_bitstring") +pub fn to_bit_array(tree: BytesTree) -> BitArray { + [[tree]] + |> to_list([]) + |> list.reverse + |> bit_array.concat +} + +fn to_list(stack: List(List(BytesTree)), acc: List(BitArray)) -> List(BitArray) { + case stack { + [] -> acc + + [[], ..remaining_stack] -> to_list(remaining_stack, acc) + + [[Bytes(bits), ..rest], ..remaining_stack] -> + to_list([rest, ..remaining_stack], [bits, ..acc]) + + [[Text(tree), ..rest], ..remaining_stack] -> { + let bits = bit_array.from_string(string_tree.to_string(tree)) + to_list([rest, ..remaining_stack], [bits, ..acc]) + } + + [[Many(trees), ..rest], ..remaining_stack] -> + to_list([trees, rest, ..remaining_stack], acc) + } +} + +/// Returns the size of the bytes tree's content in bytes. +/// +/// Runs in linear time. +/// +@external(erlang, "erlang", "iolist_size") +pub fn byte_size(tree: BytesTree) -> Int { + [[tree]] + |> to_list([]) + |> list.fold(0, fn(acc, bits) { bit_array.byte_size(bits) + acc }) +} diff --git a/src/gleam/string_tree.gleam b/src/gleam/string_tree.gleam new file mode 100644 index 0000000..9aeb367 --- /dev/null +++ b/src/gleam/string_tree.gleam @@ -0,0 +1,248 @@ +import gleam/list + +/// `StringTree` is a type used for efficiently building text content to be +/// written to a file or a socket. Internally it is represented as tree so to +/// append or prepend to a string tree is a constant time operation that +/// allocates a new node in the tree without copying any of the content. When +/// writing to an output stream the tree is traversed and the content is sent +/// directly rather than copying it into a single buffer beforehand. +/// +/// On Erlang this type is compatible with Erlang's iodata. On JavaScript this +/// type is compatible with normal strings. +/// +/// The BEAM virtual machine has an optimisation for appending strings, where it +/// will mutate the string buffer when safe to do so, so if you are looking to +/// build a string through appending many small strings then you may get better +/// performance by not using a string tree. Always benchmark your performance +/// sensitive code. +/// +pub type StringTree + +/// Create an empty `StringTree`. Useful as the start of a pipe chaining many +/// trees together. +/// +pub fn new() -> StringTree { + do_from_strings([]) +} + +/// Prepends a `String` onto the start of some `StringTree`. +/// +/// Runs in constant time. +/// +pub fn prepend(to tree: StringTree, prefix prefix: String) -> StringTree { + append_tree(from_string(prefix), tree) +} + +/// Appends a `String` onto the end of some `StringTree`. +/// +/// Runs in constant time. +/// +pub fn append(to tree: StringTree, suffix second: String) -> StringTree { + append_tree(tree, from_string(second)) +} + +/// Prepends some `StringTree` onto the start of another. +/// +/// Runs in constant time. +/// +pub fn prepend_tree( + to tree: StringTree, + prefix prefix: StringTree, +) -> StringTree { + do_append(prefix, tree) +} + +/// Appends some `StringTree` onto the end of another. +/// +/// Runs in constant time. +/// +pub fn append_tree(to tree: StringTree, suffix suffix: StringTree) -> StringTree { + do_append(tree, suffix) +} + +@external(erlang, "gleam_stdlib", "iodata_append") +@external(javascript, "../gleam_stdlib.mjs", "add") +fn do_append(a: StringTree, b: StringTree) -> StringTree + +/// Converts a list of strings into a `StringTree`. +/// +/// Runs in constant time. +/// +pub fn from_strings(strings: List(String)) -> StringTree { + do_from_strings(strings) +} + +@external(erlang, "gleam_stdlib", "identity") +@external(javascript, "../gleam_stdlib.mjs", "concat") +fn do_from_strings(a: List(String)) -> StringTree + +/// Joins a list of trees into a single tree. +/// +/// Runs in constant time. +/// +pub fn concat(trees: List(StringTree)) -> StringTree { + do_concat(trees) +} + +@external(erlang, "gleam_stdlib", "identity") +@external(javascript, "../gleam_stdlib.mjs", "concat") +fn do_concat(trees: List(StringTree)) -> StringTree + +/// Converts a string into a `StringTree`. +/// +/// Runs in constant time. +/// +pub fn from_string(string: String) -> StringTree { + do_from_string(string) +} + +@external(erlang, "gleam_stdlib", "identity") +@external(javascript, "../gleam_stdlib.mjs", "identity") +fn do_from_string(string: String) -> StringTree + +/// Turns a `StringTree` into a `String` +/// +/// This function is implemented natively by the virtual machine and is highly +/// optimised. +/// +pub fn to_string(tree: StringTree) -> String { + do_to_string(tree) +} + +@external(erlang, "unicode", "characters_to_binary") +@external(javascript, "../gleam_stdlib.mjs", "identity") +fn do_to_string(tree: StringTree) -> String + +/// Returns the size of the `StringTree` in bytes. +/// +pub fn byte_size(tree: StringTree) -> Int { + do_byte_size(tree) +} + +@external(erlang, "erlang", "iolist_size") +@external(javascript, "../gleam_stdlib.mjs", "length") +fn do_byte_size(tree: StringTree) -> Int + +/// Joins the given trees into a new tree separated with the given string. +/// +pub fn join(trees: List(StringTree), with sep: String) -> StringTree { + trees + |> list.intersperse(from_string(sep)) + |> concat +} + +/// Converts a `StringTree` to a new one where the contents have been +/// lowercased. +/// +pub fn lowercase(tree: StringTree) -> StringTree { + do_lowercase(tree) +} + +@external(erlang, "string", "lowercase") +@external(javascript, "../gleam_stdlib.mjs", "lowercase") +fn do_lowercase(tree: StringTree) -> StringTree + +/// Converts a `StringTree` to a new one where the contents have been +/// uppercased. +/// +pub fn uppercase(tree: StringTree) -> StringTree { + do_uppercase(tree) +} + +@external(erlang, "string", "uppercase") +@external(javascript, "../gleam_stdlib.mjs", "uppercase") +fn do_uppercase(tree: StringTree) -> StringTree + +/// Converts a `StringTree` to a new one with the contents reversed. +/// +pub fn reverse(tree: StringTree) -> StringTree { + do_reverse(tree) +} + +@external(erlang, "string", "reverse") +fn do_reverse(tree: StringTree) -> StringTree { + tree + |> to_string + |> do_to_graphemes + |> list.reverse + |> from_strings +} + +@external(javascript, "../gleam_stdlib.mjs", "graphemes") +fn do_to_graphemes(string: String) -> List(String) + +/// Splits a `StringTree` on a given pattern into a list of trees. +/// +pub fn split(tree: StringTree, on pattern: String) -> List(StringTree) { + do_split(tree, pattern) +} + +type Direction { + All +} + +@external(javascript, "../gleam_stdlib.mjs", "split") +fn do_split(tree: StringTree, pattern: String) -> List(StringTree) { + erl_split(tree, pattern, All) +} + +@external(erlang, "string", "split") +fn erl_split(a: StringTree, b: String, c: Direction) -> List(StringTree) + +/// Replaces all instances of a pattern with a given string substitute. +/// +@external(erlang, "gleam_stdlib", "string_replace") +@external(javascript, "../gleam_stdlib.mjs", "string_replace") +pub fn replace( + in tree: StringTree, + each pattern: String, + with substitute: String, +) -> StringTree + +/// Compares two string trees to determine if they have the same textual +/// content. +/// +/// Comparing two string trees using the `==` operator may return `False` even +/// if they have the same content as they may have been build in different ways, +/// so using this function is often preferred. +/// +/// ## Examples +/// +/// ```gleam +/// from_strings(["a", "b"]) == from_string("ab") +/// // -> False +/// ``` +/// +/// ```gleam +/// is_equal(from_strings(["a", "b"]), from_string("ab")) +/// // -> True +/// ``` +/// +@external(erlang, "string", "equal") +pub fn is_equal(a: StringTree, b: StringTree) -> Bool { + a == b +} + +/// Inspects a `StringTree` to determine if it is equivalent to an empty string. +/// +/// ## Examples +/// +/// ```gleam +/// from_string("ok") |> is_empty +/// // -> False +/// ``` +/// +/// ```gleam +/// from_string("") |> is_empty +/// // -> True +/// ``` +/// +/// ```gleam +/// from_strings([]) |> is_empty +/// // -> True +/// ``` +/// +@external(erlang, "string", "is_empty") +pub fn is_empty(tree: StringTree) -> Bool { + from_string("") == tree +} |