aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Viney <richard.viney@gmail.com>2024-06-21 17:01:37 +1200
committerLouis Pilfold <louis@lpil.uk>2024-06-21 10:49:14 +0100
commit6a6d5adf356993031ab2e79012e436dd2567a2f9 (patch)
treea1da55c581271e0160b47e3be35211f456719099
parentddd52834c46a9e8c608cbdc347a71b9ceeb361df (diff)
downloadgleam_stdlib-6a6d5adf356993031ab2e79012e436dd2567a2f9.tar.gz
gleam_stdlib-6a6d5adf356993031ab2e79012e436dd2567a2f9.zip
Base64 encoding optimisations. Fix encoding of large bit arrays on JS.
This native JS implementation is 13-14x faster in simple benchmarks.
-rw-r--r--CHANGELOG.md2
-rw-r--r--src/gleam/bit_array.gleam12
-rw-r--r--src/gleam_stdlib.erl6
-rw-r--r--src/gleam_stdlib.mjs45
-rw-r--r--test/gleam/bit_array_test.gleam11
5 files changed, 61 insertions, 15 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8272cca..ea30f7d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,8 @@
- The behaviour of the string trim functions is now consistent across targets.
- `iterator.yield` now yields values without waiting for the next one to become
available.
+- Base64 encoding speed improvements. Encoding of bit arrays over ~100KiB to
+ Base64 on JavaScript no longer throws an exception.
## v0.38.0 - 2024-05-24
diff --git a/src/gleam/bit_array.gleam b/src/gleam/bit_array.gleam
index c112eb3..75ac238 100644
--- a/src/gleam/bit_array.gleam
+++ b/src/gleam/bit_array.gleam
@@ -107,17 +107,9 @@ pub fn concat(bit_arrays: List(BitArray)) -> BitArray
/// Encodes a BitArray into a base 64 encoded string.
///
-pub fn base64_encode(input: BitArray, padding: Bool) -> String {
- let encoded = encode64(input)
- case padding {
- True -> encoded
- False -> string.replace(encoded, "=", "")
- }
-}
-
-@external(erlang, "base64", "encode")
+@external(erlang, "gleam_stdlib", "bit_array_base64_encode")
@external(javascript, "../gleam_stdlib.mjs", "encode64")
-fn encode64(a: BitArray) -> String
+pub fn base64_encode(input: BitArray, padding: Bool) -> String
/// Decodes a base 64 encoded string into a `BitArray`.
///
diff --git a/src/gleam_stdlib.erl b/src/gleam_stdlib.erl
index b95d9f8..2596f41 100644
--- a/src/gleam_stdlib.erl
+++ b/src/gleam_stdlib.erl
@@ -8,7 +8,8 @@
bit_array_int_to_u32/1, bit_array_int_from_u32/1, decode_result/1,
bit_array_slice/3, decode_bit_array/1, compile_regex/2, regex_scan/2,
percent_encode/1, percent_decode/1, regex_check/2, regex_split/2,
- base_decode64/1, parse_query/1, bit_array_concat/1, size_of_tuple/1,
+ base_decode64/1, parse_query/1, bit_array_concat/1,
+ bit_array_base64_encode/2, size_of_tuple/1,
decode_tuple/1, decode_tuple2/1, decode_tuple3/1, decode_tuple4/1,
decode_tuple5/1, decode_tuple6/1, tuple_get/2, classify_dynamic/1, print/1,
println/1, print_error/1, println_error/1, inspect/1, float_to_string/1,
@@ -201,6 +202,9 @@ string_pop_grapheme(String) ->
bit_array_concat(BitArrays) ->
list_to_bitstring(BitArrays).
+bit_array_base64_encode(Bin, Padding) ->
+ base64:encode(Bin, #{padding => Padding}).
+
bit_array_slice(Bin, Pos, Len) ->
try {ok, binary:part(Bin, Pos, Len)}
catch error:badarg -> {error, nil}
diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs
index a837e1d..c4111f9 100644
--- a/src/gleam_stdlib.mjs
+++ b/src/gleam_stdlib.mjs
@@ -504,10 +504,47 @@ export function parse_query(query) {
}
}
-// From https://developer.mozilla.org/en-US/docs/Glossary/Base64
-export function encode64(bit_array) {
- const binString = String.fromCodePoint(...bit_array.buffer);
- return btoa(binString);
+const b64EncodeLookup = [
+ 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83,
+ 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
+ 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
+ 122, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 43, 47,
+];
+
+let b64TextDecoder;
+
+// Implementation based on https://github.com/mitschabaude/fast-base64/blob/main/js.js
+export function encode64(bit_array, padding) {
+ b64TextDecoder ??= new TextDecoder();
+
+ const bytes = bit_array.buffer;
+
+ const m = bytes.length;
+ const k = m % 3;
+ const n = Math.floor(m / 3) * 4 + (k && k + 1);
+ const N = Math.ceil(m / 3) * 4;
+ const encoded = new Uint8Array(N);
+
+ for (let i = 0, j = 0; j < m; i += 4, j += 3) {
+ const y = (bytes[j] << 16) + (bytes[j + 1] << 8) + (bytes[j + 2] | 0);
+ encoded[i] = b64EncodeLookup[y >> 18];
+ encoded[i + 1] = b64EncodeLookup[(y >> 12) & 0x3f];
+ encoded[i + 2] = b64EncodeLookup[(y >> 6) & 0x3f];
+ encoded[i + 3] = b64EncodeLookup[y & 0x3f];
+ }
+
+ let base64 = b64TextDecoder.decode(new Uint8Array(encoded.buffer, 0, n));
+
+ if (padding) {
+ if (k === 1) {
+ base64 += "==";
+ }
+ else if (k === 2) {
+ base64 += "=";
+ }
+ }
+
+ return base64;
}
// From https://developer.mozilla.org/en-US/docs/Glossary/Base64
diff --git a/test/gleam/bit_array_test.gleam b/test/gleam/bit_array_test.gleam
index 7c4a5f4..1c3e7db 100644
--- a/test/gleam/bit_array_test.gleam
+++ b/test/gleam/bit_array_test.gleam
@@ -1,6 +1,7 @@
import gleam/bit_array
import gleam/result
import gleam/should
+import gleam/string
pub fn byte_size_test() {
bit_array.byte_size(bit_array.from_string("hello"))
@@ -145,6 +146,10 @@ pub fn base64_encode_test() {
|> bit_array.base64_encode(True)
|> should.equal("/3/+/A==")
+ <<255, 127, 254, 252, 100>>
+ |> bit_array.base64_encode(True)
+ |> should.equal("/3/+/GQ=")
+
<<255, 127, 254, 252>>
|> bit_array.base64_encode(False)
|> should.equal("/3/+/A")
@@ -156,6 +161,12 @@ pub fn base64_encode_test() {
<<>>
|> bit_array.base64_encode(True)
|> should.equal("")
+
+ string.repeat("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 1024 * 32)
+ |> bit_array.from_string
+ |> bit_array.base64_encode(True)
+ |> string.length
+ |> should.equal(1_398_104)
}
pub fn base64_decode_test() {