diff options
-rw-r--r-- | src/lustre-escape.ffi.mjs | 11 | ||||
-rw-r--r-- | src/lustre/internals/escape.gleam | 291 | ||||
-rw-r--r-- | src/lustre/internals/vdom.gleam | 31 | ||||
-rw-r--r-- | src/lustre_escape_ffi.erl | 4 |
4 files changed, 313 insertions, 24 deletions
diff --git a/src/lustre-escape.ffi.mjs b/src/lustre-escape.ffi.mjs new file mode 100644 index 0000000..4ef05be --- /dev/null +++ b/src/lustre-escape.ffi.mjs @@ -0,0 +1,11 @@ +export function slice(string, from, len) { + return string.slice(from, from + len); +} + +export function first(string) { + return string.slice(0, 1); +} + +export function drop_first(string) { + return string.slice(1); +} diff --git a/src/lustre/internals/escape.gleam b/src/lustre/internals/escape.gleam new file mode 100644 index 0000000..e0e3ebd --- /dev/null +++ b/src/lustre/internals/escape.gleam @@ -0,0 +1,291 @@ +import gleam/list + +// ERLANG VERSION -------------------------------------------------------------- + +@target(erlang) +import gleam/bit_array + +@target(erlang) +pub fn escape(text: String) -> String { + // This version is highly optimised for the Erlang target, it treats Strings + // as BitArrays and slices them to share as much as possible. You can find + // more details in `do_escape`. + let bits = <<text:utf8>> + let acc = do_escape(bits, 0, bits, []) + + list.reverse(acc) + |> bit_array.concat + |> coerce +} + +@target(erlang) +@external(erlang, "lustre_escape_ffi", "coerce") +fn coerce(bit_array: BitArray) -> String + +// A possible way to escape chars would be to split the string into graphemes, +// traverse those one by one and accumulate them back into a string escaping +// ">", "<", etc. as we see them. +// +// However, we can be a lot more performant by working directly on the +// `BitArray` representing a Gleam UTF-8 String. +// This means that, instead of popping a grapheme at a time, we can work +// directly on BitArray slices: this has the big advantage of making sure we +// share as much as possible with the original string without having to build +// a new one from scratch. +// +@target(erlang) +fn do_escape( + bin: BitArray, + skip: Int, + original: BitArray, + acc: List(BitArray), +) -> List(BitArray) { + case bin { + // If we find a char to escape we just advance the `skip` counter so that + // it will be ignored in the following slice, then we append the escaped + // version to the accumulator. + <<"<":utf8, rest:bits>> -> { + let acc = [<<"<":utf8>>, ..acc] + do_escape(rest, skip + 1, original, acc) + } + + <<">":utf8, rest:bits>> -> { + let acc = [<<">":utf8>>, ..acc] + do_escape(rest, skip + 1, original, acc) + } + + <<"&":utf8, rest:bits>> -> { + let acc = [<<"&":utf8>>, ..acc] + do_escape(rest, skip + 1, original, acc) + } + + <<"\"":utf8, rest:bits>> -> { + let acc = [<<""":utf8>>, ..acc] + do_escape(rest, skip + 1, original, acc) + } + + <<"'":utf8, rest:bits>> -> { + let acc = [<<"'":utf8>>, ..acc] + do_escape(rest, skip + 1, original, acc) + } + + // For any other bit that doesn't need to be escaped we go into an inner + // loop, consuming as much "non-escapable" chars as possible. + <<_char, rest:bits>> -> do_escape_normal(rest, skip, original, acc, 1) + + <<>> -> acc + + _ -> panic as "non byte aligned string, all strings should be byte aligned" + } +} + +@target(erlang) +fn do_escape_normal( + bin: BitArray, + skip: Int, + original: BitArray, + acc: List(BitArray), + len: Int, +) -> List(BitArray) { + // Remember, if we're here it means we've found a char that doesn't need to be + // escaped, so what we want to do is advance the `len` counter until we reach + // a char that _does_ need to be escaped and take the slice going from + // `skip` with size `len`. + // + // Imagine we're escaping this string: "abc<def&ghi" and we've reached 'd': + // ``` + // abc<def&ghi + // ^ `skip` points here + // ``` + // We're going to be increasing `len` until we reach the '&': + // ``` + // abc<def&ghi + // ^^^ len will be 3 when we reach the '&' that needs escaping + // ``` + // So we take the slice corresponding to "def". + // + case bin { + // If we reach a char that has to be escaped we append the slice starting + // from `skip` with size `len` and the escaped char. + // This is what allows us to share as much of the original string as + // possible: we only allocate a new BitArray for the escaped chars, + // everything else is just a slice of the original String. + <<"<":utf8, rest:bits>> -> { + let assert Ok(slice) = bit_array.slice(original, skip, len) + let acc = [<<"<":utf8>>, slice, ..acc] + do_escape(rest, skip + len + 1, original, acc) + } + + <<">":utf8, rest:bits>> -> { + let assert Ok(slice) = bit_array.slice(original, skip, len) + let acc = [<<">":utf8>>, slice, ..acc] + do_escape(rest, skip + len + 1, original, acc) + } + + <<"&":utf8, rest:bits>> -> { + let assert Ok(slice) = bit_array.slice(original, skip, len) + let acc = [<<"&":utf8>>, slice, ..acc] + do_escape(rest, skip + len + 1, original, acc) + } + + <<"\"":utf8, rest:bits>> -> { + let assert Ok(slice) = bit_array.slice(original, skip, len) + let acc = [<<""":utf8>>, slice, ..acc] + do_escape(rest, skip + len + 1, original, acc) + } + + <<"'":utf8, rest:bits>> -> { + let assert Ok(slice) = bit_array.slice(original, skip, len) + let acc = [<<"'":utf8>>, slice, ..acc] + do_escape(rest, skip + len + 1, original, acc) + } + + // If a char doesn't need escaping we keep increasing the length of the + // slice we're going to take. + <<_char, rest:bits>> -> do_escape_normal(rest, skip, original, acc, len + 1) + + <<>> -> + case skip { + 0 -> [original] + _ -> { + let assert Ok(slice) = bit_array.slice(original, skip, len) + [slice, ..acc] + } + } + + _ -> panic as "non byte aligned string, all strings should be byte aligned" + } +} + +// JAVASCRIPT VERSION ---------------------------------------------------------- + +@target(javascript) +import gleam/string + +@target(javascript) +pub fn escape(text: String) -> String { + do_escape(text, 0, text, [], 0, False) + |> list.reverse + |> string.join(with: "") +} + +// The logic behind this function is exactly the same as the erlang one: we +// iterate the string byte by byte and only ever take slices of it (constant +// time operation that ensures maximum sharing). However, this implementation is +// a little more convoluted since we cannot define it as two mutually recursive +// functions as we did with the Erlang one (or it won't be tail call optimised +// on the JS target). +// +@target(javascript) +fn do_escape( + string: String, + skip: Int, + original: String, + acc: List(String), + len: Int, + found_normal: Bool, +) -> List(String) { + case found_normal, first(string) { + False, "<" -> { + let rest = drop_first(string) + let acc = ["<", ..acc] + do_escape(rest, skip + 1, original, acc, 0, False) + } + + False, ">" -> { + let rest = drop_first(string) + let acc = [">", ..acc] + do_escape(rest, skip + 1, original, acc, 0, False) + } + + False, "&" -> { + let rest = drop_first(string) + let acc = ["&", ..acc] + do_escape(rest, skip + 1, original, acc, 0, False) + } + + False, "\"" -> { + let rest = drop_first(string) + let acc = [""", ..acc] + do_escape(rest, skip + 1, original, acc, 0, False) + } + + False, "'" -> { + let rest = drop_first(string) + let acc = ["'", ..acc] + do_escape(rest, skip + 1, original, acc, 0, False) + } + + False, "" -> acc + + // For any other bit that doesn't need to be escaped we go into an inner + // loop, consuming as much "non-escapable" chars as possible. + False, _ -> { + let rest = drop_first(string) + do_escape(rest, skip, original, acc, 1, True) + } + + True, "<" -> { + let rest = drop_first(string) + let slice = slice(original, skip, len) + let acc = ["<", slice, ..acc] + do_escape(rest, skip + len + 1, original, acc, 0, False) + } + + True, ">" -> { + let rest = drop_first(string) + let slice = slice(original, skip, len) + let acc = [">", slice, ..acc] + do_escape(rest, skip + len + 1, original, acc, 0, False) + } + + True, "&" -> { + let rest = drop_first(string) + let slice = slice(original, skip, len) + let acc = ["&", slice, ..acc] + do_escape(rest, skip + len + 1, original, acc, 0, False) + } + + True, "\"" -> { + let rest = drop_first(string) + let slice = slice(original, skip, len) + let acc = [""", slice, ..acc] + do_escape(rest, skip + len + 1, original, acc, 0, False) + } + + True, "'" -> { + let rest = drop_first(string) + let slice = slice(original, skip, len) + let acc = ["'", slice, ..acc] + do_escape(rest, skip + len + 1, original, acc, 0, False) + } + + True, "" -> + case skip { + 0 -> [original] + _ -> { + let slice = slice(original, skip, len) + [slice, ..acc] + } + } + + // If a char doesn't need escaping we keep increasing the length of the + // slice we're going to take. + True, _ -> { + let rest = drop_first(string) + do_escape(rest, skip, original, acc, len + 1, True) + } + } +} + +@target(javascript) +@external(javascript, "../../lustre-escape.ffi.mjs", "first") +fn first(_string: String) -> String + +@target(javascript) +@external(javascript, "../../lustre-escape.ffi.mjs", "drop_first") +fn drop_first(_string: String) -> String + +@target(javascript) +@external(javascript, "../../lustre-escape.ffi.mjs", "slice") +fn slice(_string: String, _from: Int, _to: Int) -> String diff --git a/src/lustre/internals/vdom.gleam b/src/lustre/internals/vdom.gleam index c4cab69..91672cc 100644 --- a/src/lustre/internals/vdom.gleam +++ b/src/lustre/internals/vdom.gleam @@ -8,6 +8,7 @@ import gleam/json.{type Json} import gleam/list import gleam/string import gleam/string_builder.{type StringBuilder} +import lustre/internals/escape.{escape} // TYPES ----------------------------------------------------------------------- @@ -198,7 +199,7 @@ fn do_element_to_string_builder( case element { Text("") -> string_builder.new() Text(content) if raw_text -> string_builder.from_string(content) - Text(content) -> string_builder.from_string(escape("", content)) + Text(content) -> string_builder.from_string(escape(content)) Map(subtree) -> do_element_to_string_builder(subtree(), raw_text) @@ -296,26 +297,26 @@ fn attributes_to_string_builder( ) Ok(#("class", val)) if class == "" -> #( html, - escape("", val), + escape(val), style, inner_html, ) Ok(#("class", val)) -> #( html, - class <> " " <> escape("", val), + class <> " " <> escape(val), style, inner_html, ) Ok(#("style", val)) if style == "" -> #( html, class, - escape("", val), + escape(val), inner_html, ) Ok(#("style", val)) -> #( html, class, - style <> " " <> escape("", val), + style <> " " <> escape(val), inner_html, ) Ok(#(key, "")) -> #( @@ -325,10 +326,7 @@ fn attributes_to_string_builder( inner_html, ) Ok(#(key, val)) -> #( - string_builder.append( - html, - " " <> key <> "=\"" <> escape("", val) <> "\"", - ), + string_builder.append(html, " " <> key <> "=\"" <> escape(val) <> "\""), class, style, inner_html, @@ -354,21 +352,6 @@ fn attributes_to_string_builder( // UTILS ----------------------------------------------------------------------- -fn escape(escaped: String, content: String) -> String { - case content { - "<" <> rest -> escape(escaped <> "<", rest) - ">" <> rest -> escape(escaped <> ">", rest) - "&" <> rest -> escape(escaped <> "&", rest) - "\"" <> rest -> escape(escaped <> """, rest) - "'" <> rest -> escape(escaped <> "'", rest) - _ -> - case string.pop_grapheme(content) { - Ok(#(x, xs)) -> escape(escaped <> x, xs) - Error(_) -> escaped - } - } -} - fn attribute_to_string_parts( attr: Attribute(msg), ) -> Result(#(String, String), Nil) { diff --git a/src/lustre_escape_ffi.erl b/src/lustre_escape_ffi.erl new file mode 100644 index 0000000..49baec2 --- /dev/null +++ b/src/lustre_escape_ffi.erl @@ -0,0 +1,4 @@ +-module(lustre_escape_ffi). +-export([coerce/1]). + +coerce(X) -> X. |