diff options
author | Richard Viney <richard.viney@gmail.com> | 2024-11-28 11:15:42 +1300 |
---|---|---|
committer | Louis Pilfold <louis@lpil.uk> | 2024-11-28 12:20:57 +0000 |
commit | 406eb387d750191546d9da6f54df50b4b1ee8409 (patch) | |
tree | 845945efea54c443eaf63eb7bd028ccb8e650f1a | |
parent | 4411f584ff87f7acdf26676fc085f4b277eff166 (diff) | |
download | gleam_stdlib-406eb387d750191546d9da6f54df50b4b1ee8409.tar.gz gleam_stdlib-406eb387d750191546d9da6f54df50b4b1ee8409.zip |
Add bit_array.pad_to_bytes. Pad bit arrays when encoding and in bytes_tree.
-rw-r--r-- | CHANGELOG.md | 7 | ||||
-rw-r--r-- | src/gleam/bit_array.gleam | 28 | ||||
-rw-r--r-- | src/gleam/bytes_tree.gleam | 10 | ||||
-rw-r--r-- | src/gleam_stdlib.erl | 31 | ||||
-rw-r--r-- | test/gleam/bit_array_test.gleam | 131 | ||||
-rw-r--r-- | test/gleam/bytes_tree_test.gleam | 24 |
6 files changed, 206 insertions, 25 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 16f2acd..193845e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ - The performance of `string.trim`, `string.trim_start`, and `string.trim_end` has been improved on JavaScript. +- The `base64_encode`, `base64_url_encode`, and `base16_encode` functions in the + `bit_array` module no longer throw an exception when called with a bit array + which is not a whole number of bytes. Instead, the bit array is now padded + with zero bits prior to being encoded. +- The `bit_array` module gains the `pad_to_bytes` function. +- The `bytes_tree` module now pads unaligned bit arrays with zeros when they are + added to the tree. ## v0.44.0 - 2024-11-25 diff --git a/src/gleam/bit_array.gleam b/src/gleam/bit_array.gleam index 9d00bbe..173fab8 100644 --- a/src/gleam/bit_array.gleam +++ b/src/gleam/bit_array.gleam @@ -23,6 +23,13 @@ pub fn bit_size(x: BitArray) -> Int { @external(javascript, "../gleam_stdlib.mjs", "length") pub fn byte_size(x: BitArray) -> Int +/// Pads a bit array with zeros so that it is a whole number of bytes. +/// +@external(erlang, "gleam_stdlib", "bit_array_pad_to_bytes") +pub fn pad_to_bytes(x: BitArray) -> BitArray { + x +} + /// Creates a new bit array by joining two bit arrays. /// /// ## Examples @@ -104,6 +111,9 @@ fn unsafe_to_string(a: BitArray) -> String pub fn concat(bit_arrays: List(BitArray)) -> BitArray /// Encodes a BitArray into a base 64 encoded string. +/// +/// If the bit array does not contain a whole number of bytes then it is padded +/// with zero bits prior to being encoded. /// @external(erlang, "gleam_stdlib", "bit_array_base64_encode") @external(javascript, "../gleam_stdlib.mjs", "encode64") @@ -123,7 +133,11 @@ pub fn base64_decode(encoded: String) -> Result(BitArray, Nil) { @external(javascript, "../gleam_stdlib.mjs", "decode64") fn decode64(a: String) -> Result(BitArray, Nil) -/// Encodes a `BitArray` into a base 64 encoded string with URL and filename safe alphabet. +/// Encodes a `BitArray` into a base 64 encoded string with URL and filename +/// safe alphabet. +/// +/// If the bit array does not contain a whole number of bytes then it is padded +/// with zero bits prior to being encoded. /// pub fn base64_url_encode(input: BitArray, padding: Bool) -> String { base64_encode(input, padding) @@ -131,7 +145,8 @@ pub fn base64_url_encode(input: BitArray, padding: Bool) -> String { |> string.replace("/", "_") } -/// Decodes a base 64 encoded string with URL and filename safe alphabet into a `BitArray`. +/// Decodes a base 64 encoded string with URL and filename safe alphabet into a +/// `BitArray`. /// pub fn base64_url_decode(encoded: String) -> Result(BitArray, Nil) { encoded @@ -140,10 +155,17 @@ pub fn base64_url_decode(encoded: String) -> Result(BitArray, Nil) { |> base64_decode() } -@external(erlang, "binary", "encode_hex") +/// Encodes a `BitArray` into a base 16 encoded string. +/// +/// If the bit array does not contain a whole number of bytes then it is padded +/// with zero bits prior to being encoded. +/// +@external(erlang, "gleam_stdlib", "base16_encode") @external(javascript, "../gleam_stdlib.mjs", "base16_encode") pub fn base16_encode(input: BitArray) -> String +/// Decodes a base 16 encoded string into a `BitArray`. +/// @external(erlang, "gleam_stdlib", "base16_decode") @external(javascript, "../gleam_stdlib.mjs", "base16_decode") pub fn base16_decode(input: String) -> Result(BitArray, Nil) diff --git a/src/gleam/bytes_tree.gleam b/src/gleam/bytes_tree.gleam index f5b5f8b..f3ef975 100644 --- a/src/gleam/bytes_tree.gleam +++ b/src/gleam/bytes_tree.gleam @@ -19,7 +19,6 @@ //// //// On Erlang this type is compatible with Erlang's iolists. -// TODO: pad bit arrays to byte boundaries when adding to a tree. import gleam/bit_array import gleam/list import gleam/string_tree.{type StringTree} @@ -104,7 +103,6 @@ pub fn concat(trees: List(BytesTree)) -> BytesTree { /// /// Runs in constant time. /// -@external(erlang, "gleam_stdlib", "identity") pub fn concat_bit_arrays(bits: List(BitArray)) -> BytesTree { bits |> list.map(fn(b) { from_bit_array(b) }) @@ -135,8 +133,14 @@ pub fn from_string_tree(tree: string_tree.StringTree) -> BytesTree { /// /// Runs in constant time. /// -@external(erlang, "gleam_stdlib", "wrap_list") pub fn from_bit_array(bits: BitArray) -> BytesTree { + bits + |> bit_array.pad_to_bytes + |> wrap_list +} + +@external(erlang, "gleam_stdlib", "wrap_list") +fn wrap_list(bits: BitArray) -> BytesTree { Bytes(bits) } diff --git a/src/gleam_stdlib.erl b/src/gleam_stdlib.erl index ffea232..5c93fd4 100644 --- a/src/gleam_stdlib.erl +++ b/src/gleam_stdlib.erl @@ -5,7 +5,7 @@ decode_float/1, decode_list/1, decode_option/2, decode_field/2, parse_int/1, parse_float/1, less_than/2, string_pop_grapheme/1, string_pop_codeunit/1, string_starts_with/2, wrap_list/1, string_ends_with/2, string_pad/4, - decode_map/1, uri_parse/1, bit_array_int_to_u32/1, bit_array_int_from_u32/1, + decode_map/1, uri_parse/1, decode_result/1, bit_array_slice/3, decode_bit_array/1, compile_regex/2, regex_scan/2, percent_encode/1, percent_decode/1, regex_check/2, regex_split/2, base_decode64/1, parse_query/1, bit_array_concat/1, @@ -14,8 +14,8 @@ tuple_get/2, classify_dynamic/1, print/1, println/1, print_error/1, println_error/1, inspect/1, float_to_string/1, int_from_base_string/2, utf_codepoint_list_to_string/1, contains_string/2, crop_string/2, - base16_decode/1, string_replace/3, regex_replace/3, slice/3, - bit_array_to_int_and_size/1 + base16_encode/1, base16_decode/1, string_replace/3, regex_replace/3, + slice/3, bit_array_to_int_and_size/1, bit_array_pad_to_bytes/1 ]). %% Taken from OTP's uri_string module @@ -207,12 +207,21 @@ string_pop_grapheme(String) -> string_pop_codeunit(<<Cp/integer, Rest/binary>>) -> {Cp, Rest}; string_pop_codeunit(Binary) -> {0, Binary}. +bit_array_pad_to_bytes(Bin) -> + case erlang:bit_size(Bin) rem 8 of + 0 -> Bin; + TrailingBits -> + PaddingBits = 8 - TrailingBits, + <<Bin/bits, 0:PaddingBits>> + end. + bit_array_concat(BitArrays) -> list_to_bitstring(BitArrays). -if(?OTP_RELEASE >= 26). bit_array_base64_encode(Bin, Padding) -> - base64:encode(Bin, #{padding => Padding}). + PaddedBin = bit_array_pad_to_bytes(Bin), + base64:encode(PaddedBin, #{padding => Padding}). -else. bit_array_base64_encode(_Bin, _Padding) -> erlang:error(<<"Erlang OTP/26 or higher is required to use base64:encode">>). @@ -223,16 +232,6 @@ bit_array_slice(Bin, Pos, Len) -> catch error:badarg -> {error, nil} end. -bit_array_int_to_u32(I) when 0 =< I, I < 4294967296 -> - {ok, <<I:32>>}; -bit_array_int_to_u32(_) -> - {error, nil}. - -bit_array_int_from_u32(<<I:32>>) -> - {ok, I}; -bit_array_int_from_u32(_) -> - {error, nil}. - compile_regex(String, Options) -> {options, Caseless, Multiline} = Options, OptionsList = [ @@ -552,6 +551,10 @@ crop_string(String, Prefix) -> contains_string(String, Substring) -> is_bitstring(string:find(String, Substring)). +base16_encode(Bin) -> + PaddedBin = bit_array_pad_to_bytes(Bin), + binary:encode_hex(PaddedBin). + base16_decode(String) -> try {ok, binary:decode_hex(String)} diff --git a/test/gleam/bit_array_test.gleam b/test/gleam/bit_array_test.gleam index 555c61a..493e696 100644 --- a/test/gleam/bit_array_test.gleam +++ b/test/gleam/bit_array_test.gleam @@ -36,11 +36,50 @@ pub fn bit_size_erlang_only_test() { } pub fn byte_size_test() { - bit_array.byte_size(bit_array.from_string("hello")) + bit_array.byte_size(<<>>) + |> should.equal(0) + + bit_array.byte_size(<<0, 1, 2, 3, 4>>) |> should.equal(5) +} - bit_array.byte_size(bit_array.from_string("")) - |> should.equal(0) +// This test is target specific since it's using non byte-aligned BitArrays +// and those are not supported on the JavaScript target. +@target(erlang) +pub fn byte_size_erlang_only_test() { + bit_array.byte_size(<<1, 2, 3:6>>) + |> should.equal(3) +} + +pub fn pad_to_bytes_test() { + <<>> + |> bit_array.pad_to_bytes + |> should.equal(<<>>) + + <<0xAB>> + |> bit_array.pad_to_bytes + |> should.equal(<<0xAB>>) + + <<0xAB, 0x12>> + |> bit_array.pad_to_bytes + |> should.equal(<<0xAB, 0x12>>) +} + +// This test is target specific since it's using non byte-aligned BitArrays +// and those are not supported on the JavaScript target. +@target(erlang) +pub fn pad_to_bytes_erlang_only_test() { + <<1:1>> + |> bit_array.pad_to_bytes + |> should.equal(<<0x80>>) + + <<-1:7>> + |> bit_array.pad_to_bytes + |> should.equal(<<0xFE>>) + + <<0xAB, 0x12, 3:3>> + |> bit_array.pad_to_bytes + |> should.equal(<<0xAB, 0x12, 0x60>>) } pub fn not_equal_test() { @@ -85,9 +124,25 @@ pub fn concat_test() { // and those are not supported on the JavaScript target. @target(erlang) pub fn concat_erlang_only_test() { + [<<-1:32>>, <<0:1>>, <<0:0>>] + |> bit_array.concat + |> should.equal(<<255, 255, 255, 255, 0:1>>) + + [<<-20:6, 2>>, <<3:4>>, <<7:3>>, <<-1:64>>] + |> bit_array.concat + |> should.equal(<<176, 8, 255, 255, 255, 255, 255, 255, 255, 255, 31:size(5)>>) + [<<1, 2:4>>, <<3>>] |> bit_array.concat |> should.equal(<<1, 2:4, 3>>) + + [<<-1:32>>, <<0:1>>, <<0:0>>] + |> bit_array.concat + |> should.equal(<<255, 255, 255, 255, 0:1>>) + + [<<-20:6, 2>>, <<3:4>>, <<7:3>>, <<-1:64>>] + |> bit_array.concat + |> should.equal(<<176, 8, 255, 255, 255, 255, 255, 255, 255, 255, 31:size(5)>>) } pub fn slice_test() { @@ -133,6 +188,19 @@ pub fn slice_test() { |> should.equal(Ok(<<"b":utf8>>)) } +// This test is target specific since it's using non byte-aligned BitArrays +// and those are not supported on the JavaScript target. +@target(erlang) +pub fn slice_erlang_onyl_test() { + <<0, 1, 2:7>> + |> bit_array.slice(0, 3) + |> should.equal(Error(Nil)) + + <<0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15>> + |> bit_array.slice(8, 12) + |> should.equal(Error(Nil)) +} + pub fn to_string_test() { <<>> |> bit_array.to_string @@ -155,6 +223,15 @@ pub fn to_string_test() { |> should.equal(Error(Nil)) } +// This test is target specific since it's using non byte-aligned BitArrays +// and those are not supported on the JavaScript target. +@target(erlang) +pub fn to_string_erlang_only_test() { + <<"ΓΈ":utf8, 50:4>> + |> bit_array.to_string + |> should.equal(Error(Nil)) +} + pub fn is_utf8_test() { <<>> |> bit_array.is_utf8 @@ -207,6 +284,23 @@ pub fn base64_encode_test() { )) } +// This test is target specific since it's using non byte-aligned BitArrays +// and those are not supported on the JavaScript target. +@target(erlang) +pub fn base64_erlang_only_encode_test() { + <<-1:7>> + |> bit_array.base64_encode(True) + |> should.equal("/g==") + + <<0xFA, 5:3>> + |> bit_array.base64_encode(True) + |> should.equal("+qA=") + + <<0xFA, 0xBC, 0x6D, 1:1>> + |> bit_array.base64_encode(True) + |> should.equal("+rxtgA==") +} + pub fn base64_decode_test() { "/3/+/A==" |> bit_array.base64_decode() @@ -305,6 +399,27 @@ pub fn base16_test() { |> should.equal("A1B2C3D4E5F67891") } +// This test is target specific since it's using non byte-aligned BitArrays +// and those are not supported on the JavaScript target. +@target(erlang) +pub fn base16_encode_erlang_only_test() { + <<-1:7>> + |> bit_array.base16_encode() + |> should.equal("FE") + + <<0xFA, 5:3>> + |> bit_array.base16_encode() + |> should.equal("FAA0") + + <<0xFA, 5:4>> + |> bit_array.base16_encode() + |> should.equal("FA50") + + <<0xFA, 0xBC, 0x6D, 1:1>> + |> bit_array.base16_encode() + |> should.equal("FABC6D80") +} + pub fn base16_decode_test() { bit_array.base16_decode("") |> should.equal(Ok(<<>>)) @@ -353,7 +468,7 @@ pub fn inspect_test() { // This test is target specific since it's using non byte-aligned BitArrays // and those are not supported on the JavaScript target. @target(erlang) -pub fn inspect_partial_bytes_test() { +pub fn inspect_erlang_only_test() { bit_array.inspect(<<4:5>>) |> should.equal("<<4:size(5)>>") @@ -365,7 +480,7 @@ pub fn inspect_partial_bytes_test() { } @target(erlang) -pub fn compare_different_sizes_test() { +pub fn compare_test() { bit_array.compare(<<4:5>>, <<4:5>>) |> should.equal(order.Eq) @@ -458,4 +573,10 @@ pub fn starts_with_erlang_only_test() { bit_array.starts_with(<<0:127>>, <<1:127>>) |> should.be_false + + bit_array.starts_with(<<0xFF, 0x81>>, <<0xFF, 1:1>>) + |> should.be_true + + bit_array.starts_with(<<0xFF, 0x81>>, <<0xFF, 0:1>>) + |> should.be_false } diff --git a/test/gleam/bytes_tree_test.gleam b/test/gleam/bytes_tree_test.gleam index 1f7245a..3f549f1 100644 --- a/test/gleam/bytes_tree_test.gleam +++ b/test/gleam/bytes_tree_test.gleam @@ -18,6 +18,23 @@ pub fn tree_test() { |> should.equal(4) } +@target(erlang) +pub fn tree_unaligned_bit_arrays_test() { + let data = + bytes_tree.from_bit_array(<<-1:5>>) + |> bytes_tree.append(<<-1:3>>) + |> bytes_tree.append(<<-2:2>>) + |> bytes_tree.prepend(<<-1:4>>) + + data + |> bytes_tree.to_bit_array + |> should.equal(<<-1:4, 0:4, -1:5, 0:3, -1:3, 0:5, -2:2, 0:6>>) + + data + |> bytes_tree.byte_size + |> should.equal(4) +} + pub fn tree_with_strings_test() { let data = bytes_tree.from_bit_array(<<1>>) @@ -67,6 +84,13 @@ pub fn concat_bit_arrays_test() { |> should.equal(<<"hey":utf8>>) } +@target(erlang) +pub fn concat_unaligned_bit_arrays_test() { + bytes_tree.concat_bit_arrays([<<-1:4>>, <<-1:5>>, <<-1:3>>, <<-2:2>>]) + |> bytes_tree.to_bit_array + |> should.equal(<<-1:4, 0:4, -1:5, 0:3, -1:3, 0:5, -2:2, 0:6>>) +} + pub fn from_bit_array() { // Regression test: no additional modification of the tree bytes_tree.from_bit_array(<<>>) |