diff options
author | Richard Viney <richard.viney@gmail.com> | 2024-06-21 17:01:37 +1200 |
---|---|---|
committer | Louis Pilfold <louis@lpil.uk> | 2024-06-21 10:49:14 +0100 |
commit | 6a6d5adf356993031ab2e79012e436dd2567a2f9 (patch) | |
tree | a1da55c581271e0160b47e3be35211f456719099 | |
parent | ddd52834c46a9e8c608cbdc347a71b9ceeb361df (diff) | |
download | gleam_stdlib-6a6d5adf356993031ab2e79012e436dd2567a2f9.tar.gz gleam_stdlib-6a6d5adf356993031ab2e79012e436dd2567a2f9.zip |
Base64 encoding optimisations. Fix encoding of large bit arrays on JS.
This native JS implementation is 13-14x faster in simple benchmarks.
-rw-r--r-- | CHANGELOG.md | 2 | ||||
-rw-r--r-- | src/gleam/bit_array.gleam | 12 | ||||
-rw-r--r-- | src/gleam_stdlib.erl | 6 | ||||
-rw-r--r-- | src/gleam_stdlib.mjs | 45 | ||||
-rw-r--r-- | test/gleam/bit_array_test.gleam | 11 |
5 files changed, 61 insertions, 15 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 8272cca..ea30f7d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,8 @@ - The behaviour of the string trim functions is now consistent across targets. - `iterator.yield` now yields values without waiting for the next one to become available. +- Base64 encoding speed improvements. Encoding of bit arrays over ~100KiB to + Base64 on JavaScript no longer throws an exception. ## v0.38.0 - 2024-05-24 diff --git a/src/gleam/bit_array.gleam b/src/gleam/bit_array.gleam index c112eb3..75ac238 100644 --- a/src/gleam/bit_array.gleam +++ b/src/gleam/bit_array.gleam @@ -107,17 +107,9 @@ pub fn concat(bit_arrays: List(BitArray)) -> BitArray /// Encodes a BitArray into a base 64 encoded string. /// -pub fn base64_encode(input: BitArray, padding: Bool) -> String { - let encoded = encode64(input) - case padding { - True -> encoded - False -> string.replace(encoded, "=", "") - } -} - -@external(erlang, "base64", "encode") +@external(erlang, "gleam_stdlib", "bit_array_base64_encode") @external(javascript, "../gleam_stdlib.mjs", "encode64") -fn encode64(a: BitArray) -> String +pub fn base64_encode(input: BitArray, padding: Bool) -> String /// Decodes a base 64 encoded string into a `BitArray`. /// diff --git a/src/gleam_stdlib.erl b/src/gleam_stdlib.erl index b95d9f8..2596f41 100644 --- a/src/gleam_stdlib.erl +++ b/src/gleam_stdlib.erl @@ -8,7 +8,8 @@ bit_array_int_to_u32/1, bit_array_int_from_u32/1, decode_result/1, bit_array_slice/3, decode_bit_array/1, compile_regex/2, regex_scan/2, percent_encode/1, percent_decode/1, regex_check/2, regex_split/2, - base_decode64/1, parse_query/1, bit_array_concat/1, size_of_tuple/1, + base_decode64/1, parse_query/1, bit_array_concat/1, + bit_array_base64_encode/2, size_of_tuple/1, decode_tuple/1, decode_tuple2/1, decode_tuple3/1, decode_tuple4/1, decode_tuple5/1, decode_tuple6/1, tuple_get/2, classify_dynamic/1, print/1, println/1, print_error/1, println_error/1, inspect/1, float_to_string/1, @@ -201,6 +202,9 @@ string_pop_grapheme(String) -> bit_array_concat(BitArrays) -> list_to_bitstring(BitArrays). +bit_array_base64_encode(Bin, Padding) -> + base64:encode(Bin, #{padding => Padding}). + bit_array_slice(Bin, Pos, Len) -> try {ok, binary:part(Bin, Pos, Len)} catch error:badarg -> {error, nil} diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index a837e1d..c4111f9 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -504,10 +504,47 @@ export function parse_query(query) { } } -// From https://developer.mozilla.org/en-US/docs/Glossary/Base64 -export function encode64(bit_array) { - const binString = String.fromCodePoint(...bit_array.buffer); - return btoa(binString); +const b64EncodeLookup = [ + 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, + 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, + 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, + 122, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 43, 47, +]; + +let b64TextDecoder; + +// Implementation based on https://github.com/mitschabaude/fast-base64/blob/main/js.js +export function encode64(bit_array, padding) { + b64TextDecoder ??= new TextDecoder(); + + const bytes = bit_array.buffer; + + const m = bytes.length; + const k = m % 3; + const n = Math.floor(m / 3) * 4 + (k && k + 1); + const N = Math.ceil(m / 3) * 4; + const encoded = new Uint8Array(N); + + for (let i = 0, j = 0; j < m; i += 4, j += 3) { + const y = (bytes[j] << 16) + (bytes[j + 1] << 8) + (bytes[j + 2] | 0); + encoded[i] = b64EncodeLookup[y >> 18]; + encoded[i + 1] = b64EncodeLookup[(y >> 12) & 0x3f]; + encoded[i + 2] = b64EncodeLookup[(y >> 6) & 0x3f]; + encoded[i + 3] = b64EncodeLookup[y & 0x3f]; + } + + let base64 = b64TextDecoder.decode(new Uint8Array(encoded.buffer, 0, n)); + + if (padding) { + if (k === 1) { + base64 += "=="; + } + else if (k === 2) { + base64 += "="; + } + } + + return base64; } // From https://developer.mozilla.org/en-US/docs/Glossary/Base64 diff --git a/test/gleam/bit_array_test.gleam b/test/gleam/bit_array_test.gleam index 7c4a5f4..1c3e7db 100644 --- a/test/gleam/bit_array_test.gleam +++ b/test/gleam/bit_array_test.gleam @@ -1,6 +1,7 @@ import gleam/bit_array import gleam/result import gleam/should +import gleam/string pub fn byte_size_test() { bit_array.byte_size(bit_array.from_string("hello")) @@ -145,6 +146,10 @@ pub fn base64_encode_test() { |> bit_array.base64_encode(True) |> should.equal("/3/+/A==") + <<255, 127, 254, 252, 100>> + |> bit_array.base64_encode(True) + |> should.equal("/3/+/GQ=") + <<255, 127, 254, 252>> |> bit_array.base64_encode(False) |> should.equal("/3/+/A") @@ -156,6 +161,12 @@ pub fn base64_encode_test() { <<>> |> bit_array.base64_encode(True) |> should.equal("") + + string.repeat("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 1024 * 32) + |> bit_array.from_string + |> bit_array.base64_encode(True) + |> string.length + |> should.equal(1_398_104) } pub fn base64_decode_test() { |