aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGiacomo Cavalieri <giacomo.cavalieri@icloud.com>2024-06-05 23:40:38 +0200
committerGitHub <noreply@github.com>2024-06-05 22:40:38 +0100
commit578c85bb3db98a88d8ab3533be101469c4eac7a8 (patch)
treefae4cd25bfb177b7df21cef5dbf3cb2a2b432458 /src
parentfbd7c12be933244ee91a94d1c3d665d713428d47 (diff)
downloadlustre-578c85bb3db98a88d8ab3533be101469c4eac7a8.tar.gz
lustre-578c85bb3db98a88d8ab3533be101469c4eac7a8.zip
:zap: Optimise escape function (#133)
Diffstat (limited to 'src')
-rw-r--r--src/lustre-escape.ffi.mjs11
-rw-r--r--src/lustre/internals/escape.gleam291
-rw-r--r--src/lustre/internals/vdom.gleam31
-rw-r--r--src/lustre_escape_ffi.erl4
4 files changed, 313 insertions, 24 deletions
diff --git a/src/lustre-escape.ffi.mjs b/src/lustre-escape.ffi.mjs
new file mode 100644
index 0000000..4ef05be
--- /dev/null
+++ b/src/lustre-escape.ffi.mjs
@@ -0,0 +1,11 @@
+export function slice(string, from, len) {
+ return string.slice(from, from + len);
+}
+
+export function first(string) {
+ return string.slice(0, 1);
+}
+
+export function drop_first(string) {
+ return string.slice(1);
+}
diff --git a/src/lustre/internals/escape.gleam b/src/lustre/internals/escape.gleam
new file mode 100644
index 0000000..e0e3ebd
--- /dev/null
+++ b/src/lustre/internals/escape.gleam
@@ -0,0 +1,291 @@
+import gleam/list
+
+// ERLANG VERSION --------------------------------------------------------------
+
+@target(erlang)
+import gleam/bit_array
+
+@target(erlang)
+pub fn escape(text: String) -> String {
+ // This version is highly optimised for the Erlang target, it treats Strings
+ // as BitArrays and slices them to share as much as possible. You can find
+ // more details in `do_escape`.
+ let bits = <<text:utf8>>
+ let acc = do_escape(bits, 0, bits, [])
+
+ list.reverse(acc)
+ |> bit_array.concat
+ |> coerce
+}
+
+@target(erlang)
+@external(erlang, "lustre_escape_ffi", "coerce")
+fn coerce(bit_array: BitArray) -> String
+
+// A possible way to escape chars would be to split the string into graphemes,
+// traverse those one by one and accumulate them back into a string escaping
+// ">", "<", etc. as we see them.
+//
+// However, we can be a lot more performant by working directly on the
+// `BitArray` representing a Gleam UTF-8 String.
+// This means that, instead of popping a grapheme at a time, we can work
+// directly on BitArray slices: this has the big advantage of making sure we
+// share as much as possible with the original string without having to build
+// a new one from scratch.
+//
+@target(erlang)
+fn do_escape(
+ bin: BitArray,
+ skip: Int,
+ original: BitArray,
+ acc: List(BitArray),
+) -> List(BitArray) {
+ case bin {
+ // If we find a char to escape we just advance the `skip` counter so that
+ // it will be ignored in the following slice, then we append the escaped
+ // version to the accumulator.
+ <<"<":utf8, rest:bits>> -> {
+ let acc = [<<"&lt;":utf8>>, ..acc]
+ do_escape(rest, skip + 1, original, acc)
+ }
+
+ <<">":utf8, rest:bits>> -> {
+ let acc = [<<"&gt;":utf8>>, ..acc]
+ do_escape(rest, skip + 1, original, acc)
+ }
+
+ <<"&":utf8, rest:bits>> -> {
+ let acc = [<<"&amp;":utf8>>, ..acc]
+ do_escape(rest, skip + 1, original, acc)
+ }
+
+ <<"\"":utf8, rest:bits>> -> {
+ let acc = [<<"&quot;":utf8>>, ..acc]
+ do_escape(rest, skip + 1, original, acc)
+ }
+
+ <<"'":utf8, rest:bits>> -> {
+ let acc = [<<"&#39;":utf8>>, ..acc]
+ do_escape(rest, skip + 1, original, acc)
+ }
+
+ // For any other bit that doesn't need to be escaped we go into an inner
+ // loop, consuming as much "non-escapable" chars as possible.
+ <<_char, rest:bits>> -> do_escape_normal(rest, skip, original, acc, 1)
+
+ <<>> -> acc
+
+ _ -> panic as "non byte aligned string, all strings should be byte aligned"
+ }
+}
+
+@target(erlang)
+fn do_escape_normal(
+ bin: BitArray,
+ skip: Int,
+ original: BitArray,
+ acc: List(BitArray),
+ len: Int,
+) -> List(BitArray) {
+ // Remember, if we're here it means we've found a char that doesn't need to be
+ // escaped, so what we want to do is advance the `len` counter until we reach
+ // a char that _does_ need to be escaped and take the slice going from
+ // `skip` with size `len`.
+ //
+ // Imagine we're escaping this string: "abc<def&ghi" and we've reached 'd':
+ // ```
+ // abc<def&ghi
+ // ^ `skip` points here
+ // ```
+ // We're going to be increasing `len` until we reach the '&':
+ // ```
+ // abc<def&ghi
+ // ^^^ len will be 3 when we reach the '&' that needs escaping
+ // ```
+ // So we take the slice corresponding to "def".
+ //
+ case bin {
+ // If we reach a char that has to be escaped we append the slice starting
+ // from `skip` with size `len` and the escaped char.
+ // This is what allows us to share as much of the original string as
+ // possible: we only allocate a new BitArray for the escaped chars,
+ // everything else is just a slice of the original String.
+ <<"<":utf8, rest:bits>> -> {
+ let assert Ok(slice) = bit_array.slice(original, skip, len)
+ let acc = [<<"&lt;":utf8>>, slice, ..acc]
+ do_escape(rest, skip + len + 1, original, acc)
+ }
+
+ <<">":utf8, rest:bits>> -> {
+ let assert Ok(slice) = bit_array.slice(original, skip, len)
+ let acc = [<<"&gt;":utf8>>, slice, ..acc]
+ do_escape(rest, skip + len + 1, original, acc)
+ }
+
+ <<"&":utf8, rest:bits>> -> {
+ let assert Ok(slice) = bit_array.slice(original, skip, len)
+ let acc = [<<"&amp;":utf8>>, slice, ..acc]
+ do_escape(rest, skip + len + 1, original, acc)
+ }
+
+ <<"\"":utf8, rest:bits>> -> {
+ let assert Ok(slice) = bit_array.slice(original, skip, len)
+ let acc = [<<"&quot;":utf8>>, slice, ..acc]
+ do_escape(rest, skip + len + 1, original, acc)
+ }
+
+ <<"'":utf8, rest:bits>> -> {
+ let assert Ok(slice) = bit_array.slice(original, skip, len)
+ let acc = [<<"&#39;":utf8>>, slice, ..acc]
+ do_escape(rest, skip + len + 1, original, acc)
+ }
+
+ // If a char doesn't need escaping we keep increasing the length of the
+ // slice we're going to take.
+ <<_char, rest:bits>> -> do_escape_normal(rest, skip, original, acc, len + 1)
+
+ <<>> ->
+ case skip {
+ 0 -> [original]
+ _ -> {
+ let assert Ok(slice) = bit_array.slice(original, skip, len)
+ [slice, ..acc]
+ }
+ }
+
+ _ -> panic as "non byte aligned string, all strings should be byte aligned"
+ }
+}
+
+// JAVASCRIPT VERSION ----------------------------------------------------------
+
+@target(javascript)
+import gleam/string
+
+@target(javascript)
+pub fn escape(text: String) -> String {
+ do_escape(text, 0, text, [], 0, False)
+ |> list.reverse
+ |> string.join(with: "")
+}
+
+// The logic behind this function is exactly the same as the erlang one: we
+// iterate the string byte by byte and only ever take slices of it (constant
+// time operation that ensures maximum sharing). However, this implementation is
+// a little more convoluted since we cannot define it as two mutually recursive
+// functions as we did with the Erlang one (or it won't be tail call optimised
+// on the JS target).
+//
+@target(javascript)
+fn do_escape(
+ string: String,
+ skip: Int,
+ original: String,
+ acc: List(String),
+ len: Int,
+ found_normal: Bool,
+) -> List(String) {
+ case found_normal, first(string) {
+ False, "<" -> {
+ let rest = drop_first(string)
+ let acc = ["&lt;", ..acc]
+ do_escape(rest, skip + 1, original, acc, 0, False)
+ }
+
+ False, ">" -> {
+ let rest = drop_first(string)
+ let acc = ["&gt;", ..acc]
+ do_escape(rest, skip + 1, original, acc, 0, False)
+ }
+
+ False, "&" -> {
+ let rest = drop_first(string)
+ let acc = ["&amp;", ..acc]
+ do_escape(rest, skip + 1, original, acc, 0, False)
+ }
+
+ False, "\"" -> {
+ let rest = drop_first(string)
+ let acc = ["&quot;", ..acc]
+ do_escape(rest, skip + 1, original, acc, 0, False)
+ }
+
+ False, "'" -> {
+ let rest = drop_first(string)
+ let acc = ["&#39;", ..acc]
+ do_escape(rest, skip + 1, original, acc, 0, False)
+ }
+
+ False, "" -> acc
+
+ // For any other bit that doesn't need to be escaped we go into an inner
+ // loop, consuming as much "non-escapable" chars as possible.
+ False, _ -> {
+ let rest = drop_first(string)
+ do_escape(rest, skip, original, acc, 1, True)
+ }
+
+ True, "<" -> {
+ let rest = drop_first(string)
+ let slice = slice(original, skip, len)
+ let acc = ["&lt;", slice, ..acc]
+ do_escape(rest, skip + len + 1, original, acc, 0, False)
+ }
+
+ True, ">" -> {
+ let rest = drop_first(string)
+ let slice = slice(original, skip, len)
+ let acc = ["&gt;", slice, ..acc]
+ do_escape(rest, skip + len + 1, original, acc, 0, False)
+ }
+
+ True, "&" -> {
+ let rest = drop_first(string)
+ let slice = slice(original, skip, len)
+ let acc = ["&amp;", slice, ..acc]
+ do_escape(rest, skip + len + 1, original, acc, 0, False)
+ }
+
+ True, "\"" -> {
+ let rest = drop_first(string)
+ let slice = slice(original, skip, len)
+ let acc = ["&quot;", slice, ..acc]
+ do_escape(rest, skip + len + 1, original, acc, 0, False)
+ }
+
+ True, "'" -> {
+ let rest = drop_first(string)
+ let slice = slice(original, skip, len)
+ let acc = ["&#39;", slice, ..acc]
+ do_escape(rest, skip + len + 1, original, acc, 0, False)
+ }
+
+ True, "" ->
+ case skip {
+ 0 -> [original]
+ _ -> {
+ let slice = slice(original, skip, len)
+ [slice, ..acc]
+ }
+ }
+
+ // If a char doesn't need escaping we keep increasing the length of the
+ // slice we're going to take.
+ True, _ -> {
+ let rest = drop_first(string)
+ do_escape(rest, skip, original, acc, len + 1, True)
+ }
+ }
+}
+
+@target(javascript)
+@external(javascript, "../../lustre-escape.ffi.mjs", "first")
+fn first(_string: String) -> String
+
+@target(javascript)
+@external(javascript, "../../lustre-escape.ffi.mjs", "drop_first")
+fn drop_first(_string: String) -> String
+
+@target(javascript)
+@external(javascript, "../../lustre-escape.ffi.mjs", "slice")
+fn slice(_string: String, _from: Int, _to: Int) -> String
diff --git a/src/lustre/internals/vdom.gleam b/src/lustre/internals/vdom.gleam
index c4cab69..91672cc 100644
--- a/src/lustre/internals/vdom.gleam
+++ b/src/lustre/internals/vdom.gleam
@@ -8,6 +8,7 @@ import gleam/json.{type Json}
import gleam/list
import gleam/string
import gleam/string_builder.{type StringBuilder}
+import lustre/internals/escape.{escape}
// TYPES -----------------------------------------------------------------------
@@ -198,7 +199,7 @@ fn do_element_to_string_builder(
case element {
Text("") -> string_builder.new()
Text(content) if raw_text -> string_builder.from_string(content)
- Text(content) -> string_builder.from_string(escape("", content))
+ Text(content) -> string_builder.from_string(escape(content))
Map(subtree) -> do_element_to_string_builder(subtree(), raw_text)
@@ -296,26 +297,26 @@ fn attributes_to_string_builder(
)
Ok(#("class", val)) if class == "" -> #(
html,
- escape("", val),
+ escape(val),
style,
inner_html,
)
Ok(#("class", val)) -> #(
html,
- class <> " " <> escape("", val),
+ class <> " " <> escape(val),
style,
inner_html,
)
Ok(#("style", val)) if style == "" -> #(
html,
class,
- escape("", val),
+ escape(val),
inner_html,
)
Ok(#("style", val)) -> #(
html,
class,
- style <> " " <> escape("", val),
+ style <> " " <> escape(val),
inner_html,
)
Ok(#(key, "")) -> #(
@@ -325,10 +326,7 @@ fn attributes_to_string_builder(
inner_html,
)
Ok(#(key, val)) -> #(
- string_builder.append(
- html,
- " " <> key <> "=\"" <> escape("", val) <> "\"",
- ),
+ string_builder.append(html, " " <> key <> "=\"" <> escape(val) <> "\""),
class,
style,
inner_html,
@@ -354,21 +352,6 @@ fn attributes_to_string_builder(
// UTILS -----------------------------------------------------------------------
-fn escape(escaped: String, content: String) -> String {
- case content {
- "<" <> rest -> escape(escaped <> "&lt;", rest)
- ">" <> rest -> escape(escaped <> "&gt;", rest)
- "&" <> rest -> escape(escaped <> "&amp;", rest)
- "\"" <> rest -> escape(escaped <> "&quot;", rest)
- "'" <> rest -> escape(escaped <> "&#39;", rest)
- _ ->
- case string.pop_grapheme(content) {
- Ok(#(x, xs)) -> escape(escaped <> x, xs)
- Error(_) -> escaped
- }
- }
-}
-
fn attribute_to_string_parts(
attr: Attribute(msg),
) -> Result(#(String, String), Nil) {
diff --git a/src/lustre_escape_ffi.erl b/src/lustre_escape_ffi.erl
new file mode 100644
index 0000000..49baec2
--- /dev/null
+++ b/src/lustre_escape_ffi.erl
@@ -0,0 +1,4 @@
+-module(lustre_escape_ffi).
+-export([coerce/1]).
+
+coerce(X) -> X.