aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Viney <richard.viney@gmail.com>2024-11-28 11:15:42 +1300
committerLouis Pilfold <louis@lpil.uk>2024-11-28 12:20:57 +0000
commit406eb387d750191546d9da6f54df50b4b1ee8409 (patch)
tree845945efea54c443eaf63eb7bd028ccb8e650f1a
parent4411f584ff87f7acdf26676fc085f4b277eff166 (diff)
downloadgleam_stdlib-406eb387d750191546d9da6f54df50b4b1ee8409.tar.gz
gleam_stdlib-406eb387d750191546d9da6f54df50b4b1ee8409.zip
Add bit_array.pad_to_bytes. Pad bit arrays when encoding and in bytes_tree.
-rw-r--r--CHANGELOG.md7
-rw-r--r--src/gleam/bit_array.gleam28
-rw-r--r--src/gleam/bytes_tree.gleam10
-rw-r--r--src/gleam_stdlib.erl31
-rw-r--r--test/gleam/bit_array_test.gleam131
-rw-r--r--test/gleam/bytes_tree_test.gleam24
6 files changed, 206 insertions, 25 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 16f2acd..193845e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,13 @@
- The performance of `string.trim`, `string.trim_start`, and `string.trim_end`
has been improved on JavaScript.
+- The `base64_encode`, `base64_url_encode`, and `base16_encode` functions in the
+ `bit_array` module no longer throw an exception when called with a bit array
+ which is not a whole number of bytes. Instead, the bit array is now padded
+ with zero bits prior to being encoded.
+- The `bit_array` module gains the `pad_to_bytes` function.
+- The `bytes_tree` module now pads unaligned bit arrays with zeros when they are
+ added to the tree.
## v0.44.0 - 2024-11-25
diff --git a/src/gleam/bit_array.gleam b/src/gleam/bit_array.gleam
index 9d00bbe..173fab8 100644
--- a/src/gleam/bit_array.gleam
+++ b/src/gleam/bit_array.gleam
@@ -23,6 +23,13 @@ pub fn bit_size(x: BitArray) -> Int {
@external(javascript, "../gleam_stdlib.mjs", "length")
pub fn byte_size(x: BitArray) -> Int
+/// Pads a bit array with zeros so that it is a whole number of bytes.
+///
+@external(erlang, "gleam_stdlib", "bit_array_pad_to_bytes")
+pub fn pad_to_bytes(x: BitArray) -> BitArray {
+ x
+}
+
/// Creates a new bit array by joining two bit arrays.
///
/// ## Examples
@@ -104,6 +111,9 @@ fn unsafe_to_string(a: BitArray) -> String
pub fn concat(bit_arrays: List(BitArray)) -> BitArray
/// Encodes a BitArray into a base 64 encoded string.
+///
+/// If the bit array does not contain a whole number of bytes then it is padded
+/// with zero bits prior to being encoded.
///
@external(erlang, "gleam_stdlib", "bit_array_base64_encode")
@external(javascript, "../gleam_stdlib.mjs", "encode64")
@@ -123,7 +133,11 @@ pub fn base64_decode(encoded: String) -> Result(BitArray, Nil) {
@external(javascript, "../gleam_stdlib.mjs", "decode64")
fn decode64(a: String) -> Result(BitArray, Nil)
-/// Encodes a `BitArray` into a base 64 encoded string with URL and filename safe alphabet.
+/// Encodes a `BitArray` into a base 64 encoded string with URL and filename
+/// safe alphabet.
+///
+/// If the bit array does not contain a whole number of bytes then it is padded
+/// with zero bits prior to being encoded.
///
pub fn base64_url_encode(input: BitArray, padding: Bool) -> String {
base64_encode(input, padding)
@@ -131,7 +145,8 @@ pub fn base64_url_encode(input: BitArray, padding: Bool) -> String {
|> string.replace("/", "_")
}
-/// Decodes a base 64 encoded string with URL and filename safe alphabet into a `BitArray`.
+/// Decodes a base 64 encoded string with URL and filename safe alphabet into a
+/// `BitArray`.
///
pub fn base64_url_decode(encoded: String) -> Result(BitArray, Nil) {
encoded
@@ -140,10 +155,17 @@ pub fn base64_url_decode(encoded: String) -> Result(BitArray, Nil) {
|> base64_decode()
}
-@external(erlang, "binary", "encode_hex")
+/// Encodes a `BitArray` into a base 16 encoded string.
+///
+/// If the bit array does not contain a whole number of bytes then it is padded
+/// with zero bits prior to being encoded.
+///
+@external(erlang, "gleam_stdlib", "base16_encode")
@external(javascript, "../gleam_stdlib.mjs", "base16_encode")
pub fn base16_encode(input: BitArray) -> String
+/// Decodes a base 16 encoded string into a `BitArray`.
+///
@external(erlang, "gleam_stdlib", "base16_decode")
@external(javascript, "../gleam_stdlib.mjs", "base16_decode")
pub fn base16_decode(input: String) -> Result(BitArray, Nil)
diff --git a/src/gleam/bytes_tree.gleam b/src/gleam/bytes_tree.gleam
index f5b5f8b..f3ef975 100644
--- a/src/gleam/bytes_tree.gleam
+++ b/src/gleam/bytes_tree.gleam
@@ -19,7 +19,6 @@
////
//// On Erlang this type is compatible with Erlang's iolists.
-// TODO: pad bit arrays to byte boundaries when adding to a tree.
import gleam/bit_array
import gleam/list
import gleam/string_tree.{type StringTree}
@@ -104,7 +103,6 @@ pub fn concat(trees: List(BytesTree)) -> BytesTree {
///
/// Runs in constant time.
///
-@external(erlang, "gleam_stdlib", "identity")
pub fn concat_bit_arrays(bits: List(BitArray)) -> BytesTree {
bits
|> list.map(fn(b) { from_bit_array(b) })
@@ -135,8 +133,14 @@ pub fn from_string_tree(tree: string_tree.StringTree) -> BytesTree {
///
/// Runs in constant time.
///
-@external(erlang, "gleam_stdlib", "wrap_list")
pub fn from_bit_array(bits: BitArray) -> BytesTree {
+ bits
+ |> bit_array.pad_to_bytes
+ |> wrap_list
+}
+
+@external(erlang, "gleam_stdlib", "wrap_list")
+fn wrap_list(bits: BitArray) -> BytesTree {
Bytes(bits)
}
diff --git a/src/gleam_stdlib.erl b/src/gleam_stdlib.erl
index ffea232..5c93fd4 100644
--- a/src/gleam_stdlib.erl
+++ b/src/gleam_stdlib.erl
@@ -5,7 +5,7 @@
decode_float/1, decode_list/1, decode_option/2, decode_field/2, parse_int/1,
parse_float/1, less_than/2, string_pop_grapheme/1, string_pop_codeunit/1,
string_starts_with/2, wrap_list/1, string_ends_with/2, string_pad/4,
- decode_map/1, uri_parse/1, bit_array_int_to_u32/1, bit_array_int_from_u32/1,
+ decode_map/1, uri_parse/1,
decode_result/1, bit_array_slice/3, decode_bit_array/1, compile_regex/2,
regex_scan/2, percent_encode/1, percent_decode/1, regex_check/2,
regex_split/2, base_decode64/1, parse_query/1, bit_array_concat/1,
@@ -14,8 +14,8 @@
tuple_get/2, classify_dynamic/1, print/1, println/1, print_error/1,
println_error/1, inspect/1, float_to_string/1, int_from_base_string/2,
utf_codepoint_list_to_string/1, contains_string/2, crop_string/2,
- base16_decode/1, string_replace/3, regex_replace/3, slice/3,
- bit_array_to_int_and_size/1
+ base16_encode/1, base16_decode/1, string_replace/3, regex_replace/3,
+ slice/3, bit_array_to_int_and_size/1, bit_array_pad_to_bytes/1
]).
%% Taken from OTP's uri_string module
@@ -207,12 +207,21 @@ string_pop_grapheme(String) ->
string_pop_codeunit(<<Cp/integer, Rest/binary>>) -> {Cp, Rest};
string_pop_codeunit(Binary) -> {0, Binary}.
+bit_array_pad_to_bytes(Bin) ->
+ case erlang:bit_size(Bin) rem 8 of
+ 0 -> Bin;
+ TrailingBits ->
+ PaddingBits = 8 - TrailingBits,
+ <<Bin/bits, 0:PaddingBits>>
+ end.
+
bit_array_concat(BitArrays) ->
list_to_bitstring(BitArrays).
-if(?OTP_RELEASE >= 26).
bit_array_base64_encode(Bin, Padding) ->
- base64:encode(Bin, #{padding => Padding}).
+ PaddedBin = bit_array_pad_to_bytes(Bin),
+ base64:encode(PaddedBin, #{padding => Padding}).
-else.
bit_array_base64_encode(_Bin, _Padding) ->
erlang:error(<<"Erlang OTP/26 or higher is required to use base64:encode">>).
@@ -223,16 +232,6 @@ bit_array_slice(Bin, Pos, Len) ->
catch error:badarg -> {error, nil}
end.
-bit_array_int_to_u32(I) when 0 =< I, I < 4294967296 ->
- {ok, <<I:32>>};
-bit_array_int_to_u32(_) ->
- {error, nil}.
-
-bit_array_int_from_u32(<<I:32>>) ->
- {ok, I};
-bit_array_int_from_u32(_) ->
- {error, nil}.
-
compile_regex(String, Options) ->
{options, Caseless, Multiline} = Options,
OptionsList = [
@@ -552,6 +551,10 @@ crop_string(String, Prefix) ->
contains_string(String, Substring) ->
is_bitstring(string:find(String, Substring)).
+base16_encode(Bin) ->
+ PaddedBin = bit_array_pad_to_bytes(Bin),
+ binary:encode_hex(PaddedBin).
+
base16_decode(String) ->
try
{ok, binary:decode_hex(String)}
diff --git a/test/gleam/bit_array_test.gleam b/test/gleam/bit_array_test.gleam
index 555c61a..493e696 100644
--- a/test/gleam/bit_array_test.gleam
+++ b/test/gleam/bit_array_test.gleam
@@ -36,11 +36,50 @@ pub fn bit_size_erlang_only_test() {
}
pub fn byte_size_test() {
- bit_array.byte_size(bit_array.from_string("hello"))
+ bit_array.byte_size(<<>>)
+ |> should.equal(0)
+
+ bit_array.byte_size(<<0, 1, 2, 3, 4>>)
|> should.equal(5)
+}
- bit_array.byte_size(bit_array.from_string(""))
- |> should.equal(0)
+// This test is target specific since it's using non byte-aligned BitArrays
+// and those are not supported on the JavaScript target.
+@target(erlang)
+pub fn byte_size_erlang_only_test() {
+ bit_array.byte_size(<<1, 2, 3:6>>)
+ |> should.equal(3)
+}
+
+pub fn pad_to_bytes_test() {
+ <<>>
+ |> bit_array.pad_to_bytes
+ |> should.equal(<<>>)
+
+ <<0xAB>>
+ |> bit_array.pad_to_bytes
+ |> should.equal(<<0xAB>>)
+
+ <<0xAB, 0x12>>
+ |> bit_array.pad_to_bytes
+ |> should.equal(<<0xAB, 0x12>>)
+}
+
+// This test is target specific since it's using non byte-aligned BitArrays
+// and those are not supported on the JavaScript target.
+@target(erlang)
+pub fn pad_to_bytes_erlang_only_test() {
+ <<1:1>>
+ |> bit_array.pad_to_bytes
+ |> should.equal(<<0x80>>)
+
+ <<-1:7>>
+ |> bit_array.pad_to_bytes
+ |> should.equal(<<0xFE>>)
+
+ <<0xAB, 0x12, 3:3>>
+ |> bit_array.pad_to_bytes
+ |> should.equal(<<0xAB, 0x12, 0x60>>)
}
pub fn not_equal_test() {
@@ -85,9 +124,25 @@ pub fn concat_test() {
// and those are not supported on the JavaScript target.
@target(erlang)
pub fn concat_erlang_only_test() {
+ [<<-1:32>>, <<0:1>>, <<0:0>>]
+ |> bit_array.concat
+ |> should.equal(<<255, 255, 255, 255, 0:1>>)
+
+ [<<-20:6, 2>>, <<3:4>>, <<7:3>>, <<-1:64>>]
+ |> bit_array.concat
+ |> should.equal(<<176, 8, 255, 255, 255, 255, 255, 255, 255, 255, 31:size(5)>>)
+
[<<1, 2:4>>, <<3>>]
|> bit_array.concat
|> should.equal(<<1, 2:4, 3>>)
+
+ [<<-1:32>>, <<0:1>>, <<0:0>>]
+ |> bit_array.concat
+ |> should.equal(<<255, 255, 255, 255, 0:1>>)
+
+ [<<-20:6, 2>>, <<3:4>>, <<7:3>>, <<-1:64>>]
+ |> bit_array.concat
+ |> should.equal(<<176, 8, 255, 255, 255, 255, 255, 255, 255, 255, 31:size(5)>>)
}
pub fn slice_test() {
@@ -133,6 +188,19 @@ pub fn slice_test() {
|> should.equal(Ok(<<"b":utf8>>))
}
+// This test is target specific since it's using non byte-aligned BitArrays
+// and those are not supported on the JavaScript target.
+@target(erlang)
+pub fn slice_erlang_onyl_test() {
+ <<0, 1, 2:7>>
+ |> bit_array.slice(0, 3)
+ |> should.equal(Error(Nil))
+
+ <<0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15>>
+ |> bit_array.slice(8, 12)
+ |> should.equal(Error(Nil))
+}
+
pub fn to_string_test() {
<<>>
|> bit_array.to_string
@@ -155,6 +223,15 @@ pub fn to_string_test() {
|> should.equal(Error(Nil))
}
+// This test is target specific since it's using non byte-aligned BitArrays
+// and those are not supported on the JavaScript target.
+@target(erlang)
+pub fn to_string_erlang_only_test() {
+ <<"ΓΈ":utf8, 50:4>>
+ |> bit_array.to_string
+ |> should.equal(Error(Nil))
+}
+
pub fn is_utf8_test() {
<<>>
|> bit_array.is_utf8
@@ -207,6 +284,23 @@ pub fn base64_encode_test() {
))
}
+// This test is target specific since it's using non byte-aligned BitArrays
+// and those are not supported on the JavaScript target.
+@target(erlang)
+pub fn base64_erlang_only_encode_test() {
+ <<-1:7>>
+ |> bit_array.base64_encode(True)
+ |> should.equal("/g==")
+
+ <<0xFA, 5:3>>
+ |> bit_array.base64_encode(True)
+ |> should.equal("+qA=")
+
+ <<0xFA, 0xBC, 0x6D, 1:1>>
+ |> bit_array.base64_encode(True)
+ |> should.equal("+rxtgA==")
+}
+
pub fn base64_decode_test() {
"/3/+/A=="
|> bit_array.base64_decode()
@@ -305,6 +399,27 @@ pub fn base16_test() {
|> should.equal("A1B2C3D4E5F67891")
}
+// This test is target specific since it's using non byte-aligned BitArrays
+// and those are not supported on the JavaScript target.
+@target(erlang)
+pub fn base16_encode_erlang_only_test() {
+ <<-1:7>>
+ |> bit_array.base16_encode()
+ |> should.equal("FE")
+
+ <<0xFA, 5:3>>
+ |> bit_array.base16_encode()
+ |> should.equal("FAA0")
+
+ <<0xFA, 5:4>>
+ |> bit_array.base16_encode()
+ |> should.equal("FA50")
+
+ <<0xFA, 0xBC, 0x6D, 1:1>>
+ |> bit_array.base16_encode()
+ |> should.equal("FABC6D80")
+}
+
pub fn base16_decode_test() {
bit_array.base16_decode("")
|> should.equal(Ok(<<>>))
@@ -353,7 +468,7 @@ pub fn inspect_test() {
// This test is target specific since it's using non byte-aligned BitArrays
// and those are not supported on the JavaScript target.
@target(erlang)
-pub fn inspect_partial_bytes_test() {
+pub fn inspect_erlang_only_test() {
bit_array.inspect(<<4:5>>)
|> should.equal("<<4:size(5)>>")
@@ -365,7 +480,7 @@ pub fn inspect_partial_bytes_test() {
}
@target(erlang)
-pub fn compare_different_sizes_test() {
+pub fn compare_test() {
bit_array.compare(<<4:5>>, <<4:5>>)
|> should.equal(order.Eq)
@@ -458,4 +573,10 @@ pub fn starts_with_erlang_only_test() {
bit_array.starts_with(<<0:127>>, <<1:127>>)
|> should.be_false
+
+ bit_array.starts_with(<<0xFF, 0x81>>, <<0xFF, 1:1>>)
+ |> should.be_true
+
+ bit_array.starts_with(<<0xFF, 0x81>>, <<0xFF, 0:1>>)
+ |> should.be_false
}
diff --git a/test/gleam/bytes_tree_test.gleam b/test/gleam/bytes_tree_test.gleam
index 1f7245a..3f549f1 100644
--- a/test/gleam/bytes_tree_test.gleam
+++ b/test/gleam/bytes_tree_test.gleam
@@ -18,6 +18,23 @@ pub fn tree_test() {
|> should.equal(4)
}
+@target(erlang)
+pub fn tree_unaligned_bit_arrays_test() {
+ let data =
+ bytes_tree.from_bit_array(<<-1:5>>)
+ |> bytes_tree.append(<<-1:3>>)
+ |> bytes_tree.append(<<-2:2>>)
+ |> bytes_tree.prepend(<<-1:4>>)
+
+ data
+ |> bytes_tree.to_bit_array
+ |> should.equal(<<-1:4, 0:4, -1:5, 0:3, -1:3, 0:5, -2:2, 0:6>>)
+
+ data
+ |> bytes_tree.byte_size
+ |> should.equal(4)
+}
+
pub fn tree_with_strings_test() {
let data =
bytes_tree.from_bit_array(<<1>>)
@@ -67,6 +84,13 @@ pub fn concat_bit_arrays_test() {
|> should.equal(<<"hey":utf8>>)
}
+@target(erlang)
+pub fn concat_unaligned_bit_arrays_test() {
+ bytes_tree.concat_bit_arrays([<<-1:4>>, <<-1:5>>, <<-1:3>>, <<-2:2>>])
+ |> bytes_tree.to_bit_array
+ |> should.equal(<<-1:4, 0:4, -1:5, 0:3, -1:3, 0:5, -2:2, 0:6>>)
+}
+
pub fn from_bit_array() {
// Regression test: no additional modification of the tree
bytes_tree.from_bit_array(<<>>)