diff options
author | inoas <mail@inoas.com> | 2022-11-29 07:17:37 +0100 |
---|---|---|
committer | Louis Pilfold <louis@lpil.uk> | 2022-12-22 20:44:33 +0000 |
commit | 0d7e9c0a6741029d63b78cdab5a6b690ab5a9669 (patch) | |
tree | 8afc3c9af865c202d36fb60f824f7f3766ef218b | |
parent | 91bc303735833f41b615d3ebe8e2606068ed501d (diff) | |
download | gleam_stdlib-0d7e9c0a6741029d63b78cdab5a6b690ab5a9669.tar.gz gleam_stdlib-0d7e9c0a6741029d63b78cdab5a6b690ab5a9669.zip |
Add string.{to_utf_codepoints, from_utf_codepoints}
-rw-r--r-- | src/gleam/string.gleam | 108 |
1 files changed, 107 insertions, 1 deletions
diff --git a/src/gleam/string.gleam b/src/gleam/string.gleam index e5c9e63..099d7b1 100644 --- a/src/gleam/string.gleam +++ b/src/gleam/string.gleam @@ -8,8 +8,9 @@ import gleam/order import gleam/string_builder.{StringBuilder} if erlang { - import gleam/result + import gleam/bit_string import gleam/dynamic.{Dynamic} + import gleam/result } /// Determines if a `String` is empty. @@ -775,6 +776,111 @@ if javascript { "../gleam_stdlib.mjs" "codepoint" } +/// Converts a `String` to a `List` of `UtfCodepoint`. +/// +/// Also see: <https://en.wikipedia.org/wiki/Unicode#Codespace_and_CodePoints>. +/// +/// ## Examples +/// +/// ```gleam +/// > "a" |> to_utf_codepoints +/// [UtfCodepoint(97)] +/// ``` +/// +/// ```gleam +/// // aka ["🏳", "️", "", "🌈"] aka [waving_white_flag, variant_selector_16, zero_width_joiner, rainbow] +/// > "🏳️🌈" |> to_utf_codepoints +/// [UtfCodepoint(127987), UtfCodepoint(65039), UtfCodepoint(8205), UtfCodepoint(127752)] +/// ``` +/// +pub fn to_utf_codepoints(string: String) -> List(UtfCodepoint) { + do_to_utf_codepoints(string) +} + +if erlang { + fn do_to_utf_codepoints(string: String) -> List(UtfCodepoint) { + do_to_utf_codepoints_impl(bit_string.from_string(string), []) + |> list.reverse + } + + fn do_to_utf_codepoints_impl( + bit_string: BitString, + acc: List(UtfCodepoint), + ) -> List(UtfCodepoint) { + case bit_string { + <<head:utf8_codepoint, rest:binary>> -> + do_to_utf_codepoints_impl(rest, [head, ..acc]) + <<>> -> acc + } + } +} + +if javascript { + fn do_to_utf_codepoints(string: String) -> List(UtfCodepoint) { + string + |> string_to_codepoint_integer_list + |> list.map(unsafe_int_to_utf_codepoint) + } + + external fn string_to_codepoint_integer_list(String) -> List(Int) = + "../gleam_stdlib.mjs" "string_to_codepoint_integer_list" +} + +/// Converts a `List` of `UtfCodepoint`s to a `String`. +/// +/// Also see: <https://en.wikipedia.org/wiki/Unicode#Codespace_and_CodePoints>. +/// +/// ## Examples +/// +/// ```gleam +/// > { +/// > assert #(Ok(a), Ok(b), Ok(c)) = #( +/// > utf_codepoint(97), +/// > utf_codepoint(98), +/// > utf_codepoint(99), +/// > ) +/// > [a, b, c] +/// > } +/// > |> from_utf_codepoints +/// "abc" +/// ``` +/// +pub fn from_utf_codepoints(utf_codepoints: List(UtfCodepoint)) -> String { + do_from_utf_codepoints(utf_codepoints) +} + +if erlang { + fn do_from_utf_codepoints(utf_codepoints: List(UtfCodepoint)) -> String { + assert Ok(string) = + do_from_utf_codepoints_impl(utf_codepoints, bit_string.from_string("")) + |> bit_string.to_string + string + } + + fn do_from_utf_codepoints_impl( + utf_codepoints: List(UtfCodepoint), + acc: BitString, + ) -> BitString { + case utf_codepoints { + [head, ..tail] -> + do_from_utf_codepoints_impl( + tail, + <<acc:bit_string, head:utf8_codepoint>>, + ) + [] -> acc + } + } +} + +if javascript { + fn do_from_utf_codepoints(utf_codepoints: List(UtfCodepoint)) -> String { + utf_codepoint_list_to_string(utf_codepoints) + } + + external fn utf_codepoint_list_to_string(List(UtfCodepoint)) -> String = + "../gleam_stdlib.mjs" "utf_codepoint_list_to_string" +} + /// Converts an integer to a `UtfCodepoint`. /// /// Returns an `Error` if the integer does not represent a valid UTF codepoint. |