fix erlang string.inspect and io.debug to correctly escape ", \, \r, \n, \r\n and \t

author: inoas <mail@inoas.com> 2022-12-23 11:04:30 +0100
committer: Louis Pilfold <louis@lpil.uk> 2022-12-23 12:00:27 +0000
commit: 8ab43ccb44cf9d913d01b1ae6ff780ee08e4e126 (patch)
tree: 3855c5325fb510d5a91dfc1e651b648e62edd41b
parent: 9dc8bc4b3e9b58396ceae3e2bb466c1eb4679f86 (diff)
download: gleam_stdlib-8ab43ccb44cf9d913d01b1ae6ff780ee08e4e126.tar.gz
gleam_stdlib-8ab43ccb44cf9d913d01b1ae6ff780ee08e4e126.zip
6 files changed, 140 insertions, 35 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3085f68..2a6d930 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,6 +16,8 @@
   `Intl.Segmenter` class.
 - The `string` module gains `to_utf_codepoints`, `from_utf_codepoints`, and
   `utf_codepoint_to_int` functions.
+- Fixed `string.inspect`'s escaping of `"`, `\`, `\n`, `\r`, `\r\n`, and `\t`,
+  which in turn fixes `io.debug`'s output of such strings.
 
 ## v0.25.0 - 2022-11-19
 
diff --git a/src/gleam/bit_string.gleam b/src/gleam/bit_string.gleam
index 410c712..6a67028 100644
--- a/src/gleam/bit_string.gleam
+++ b/src/gleam/bit_string.gleam
@@ -91,7 +91,7 @@ if erlang {
   fn do_is_utf8(bits: BitString) -> Bool {
     case bits {
       <<>> -> True
-      <<_:utf8, rest:binary>> -> is_utf8(rest)
+      <<_:utf8, rest:binary>> -> do_is_utf8(rest)
       _ -> False
     }
   }
diff --git a/src/gleam_stdlib.erl b/src/gleam_stdlib.erl
index b5d6ce6..80c3f2c 100644
--- a/src/gleam_stdlib.erl
+++ b/src/gleam_stdlib.erl
@@ -351,13 +351,9 @@ inspect(Any) when is_integer(Any) ->
 inspect(Any) when is_float(Any) ->
     io_lib_format:fwrite_g(Any);
 inspect(Binary) when is_binary(Binary) ->
-    case gleam@bit_string:is_utf8(Binary) of
-        true ->
-            Pattern = [$"],
-            Replacement = [$\\, $\\, $"],
-            Escaped = re:replace(Binary, Pattern, Replacement, [{return, binary}, global]),
-            ["\"", Escaped, "\""];
-        false ->
+    case inspect_maybe_utf8_string(Binary, <<>>) of
+        {ok, InspectedUtf8String} -> InspectedUtf8String;
+        {error, not_a_utf8_string} ->
             Segments = [erlang:integer_to_list(X) || <<X>> <= Binary],
             ["<<", lists:join(", ", Segments), ">>"]
     end;
@@ -390,7 +386,7 @@ inspect(Any) when is_function(Any) ->
 inspect(Any) ->
     ["//erl(", io_lib:format("~p", [Any]), ")"].
 
-inspect_list([])  ->
+inspect_list([]) ->
     {proper, []};
 inspect_list([Head]) ->
     {proper, [inspect(Head)]};
@@ -400,5 +396,21 @@ inspect_list([First | Rest]) when is_list(Rest) ->
 inspect_list([First | ImproperTail]) ->
     {improper, [inspect(First), <<" | ">>, inspect(ImproperTail)]}.
 
+inspect_maybe_utf8_string(Binary, Acc) ->
+    case Binary of
+        <<>> -> {ok, <<$", Acc/binary, $">>};
+        <<Head/utf8, Rest/binary>> ->
+            Escaped = case Head of
+                $" -> <<$\\, $">>;
+                $\\ -> <<$\\, $\\>>;
+                $\r -> <<$\\, $r>>;
+                $\n -> <<$\\, $n>>;
+                $\t -> <<$\\, $t>>;
+                Other -> <<Other/utf8>>
+            end,
+            inspect_maybe_utf8_string(Rest, <<Acc/binary, Escaped/binary>>);
+        _ -> {error, not_a_utf8_string}
+    end.
+
 float_to_string(Float) when is_float(Float) ->
     erlang:iolist_to_binary(io_lib_format:fwrite_g(Float)).
diff --git a/test/gleam/bit_string_test.gleam b/test/gleam/bit_string_test.gleam
index 48f66d6..c52dd85 100644
--- a/test/gleam/bit_string_test.gleam
+++ b/test/gleam/bit_string_test.gleam
@@ -104,7 +104,7 @@ pub fn to_string_test() {
   |> bit_string.to_string
   |> should.equal(Ok("ø"))
 
-  <<65535>>
+  <<65_535>>
   |> bit_string.to_string
   |> should.equal(Error(Nil))
 }
@@ -126,7 +126,7 @@ pub fn is_utf8_test() {
   |> bit_string.is_utf8
   |> should.be_true
 
-  <<65535>>
+  <<65_535>>
   |> bit_string.is_utf8
   |> should.be_false
 }
diff --git a/test/gleam/dynamic_test.gleam b/test/gleam/dynamic_test.gleam
index 8e9d4f7..104c1fa 100644
--- a/test/gleam/dynamic_test.gleam
+++ b/test/gleam/dynamic_test.gleam
@@ -32,10 +32,10 @@ pub fn bit_string_test() {
 
 if erlang {
   pub fn bit_string_erlang_test() {
-    <<65535:16>>
+    <<65_535:16>>
     |> dynamic.from
     |> dynamic.bit_string
-    |> should.equal(Ok(<<65535:16>>))
+    |> should.equal(Ok(<<65_535:16>>))
   }
 }
 
@@ -67,7 +67,7 @@ pub fn string_test() {
 
 if erlang {
   pub fn string_non_utf8_test() {
-    <<65535:16>>
+    <<65_535:16>>
     |> dynamic.from
     |> dynamic.string
     |> should.equal(Error([
diff --git a/test/gleam/string_test.gleam b/test/gleam/string_test.gleam
index 07bbfb9..f3495ac 100644
--- a/test/gleam/string_test.gleam
+++ b/test/gleam/string_test.gleam
@@ -434,10 +434,10 @@ pub fn to_utf_codepoints_test() {
       Ok(zero_width_joiner),
       Ok(rainbow),
     ) = #(
-      string.utf_codepoint(127987),
-      string.utf_codepoint(65039),
+      string.utf_codepoint(127_987),
+      string.utf_codepoint(65_039),
       string.utf_codepoint(8205),
-      string.utf_codepoint(127752),
+      string.utf_codepoint(127_752),
     )
     [waving_white_flag, variant_selector_16, zero_width_joiner, rainbow]
   })
@@ -472,18 +472,18 @@ pub fn from_utf_codepoints_test() {
 }
 
 pub fn utf_codepoint_test() {
-  string.utf_codepoint(1114444)
+  string.utf_codepoint(1_114_444)
   |> should.be_error
 
-  string.utf_codepoint(65534)
+  string.utf_codepoint(65_534)
   |> should.be_error
 
-  string.utf_codepoint(55296)
+  string.utf_codepoint(55_296)
   |> should.be_error
 }
 
 pub fn bit_string_utf_codepoint_test() {
-  assert Ok(snake) = string.utf_codepoint(128013)
+  assert Ok(snake) = string.utf_codepoint(128_013)
   should.equal(<<snake:utf8_codepoint>>, <<"🐍":utf8>>)
 }
 
@@ -659,17 +659,103 @@ pub fn inspect_test() {
   string.inspect("")
   |> should.equal("\"\"")
 
+  string.inspect("\\")
+  |> should.equal("\"\\\\\"")
+
+  string.inspect("\\\\")
+  |> should.equal("\"\\\\\\\\\"")
+
+  string.inspect("\\\\\\")
+  |> should.equal("\"\\\\\\\\\\\\\"")
+
+  string.inspect("\"")
+  |> should.equal("\"\\\"\"")
+  string.inspect("\"\"")
+  |> should.equal("\"\\\"\\\"\"")
+
+  string.inspect("\r")
+  |> should.equal("\"\\r\"")
+
+  string.inspect("\n")
+  |> should.equal("\"\\n\"")
+
+  string.inspect("\t")
+  |> should.equal("\"\\t\"")
+
+  string.inspect("\r\r")
+  |> should.equal("\"\\r\\r\"")
+
+  string.inspect("\n\n")
+  |> should.equal("\"\\n\\n\"")
+
+  string.inspect("\r\n")
+  |> should.equal("\"\\r\\n\"")
+
+  string.inspect("\n\r")
+  |> should.equal("\"\\n\\r\"")
+
+  string.inspect("\t\t")
+  |> should.equal("\"\\t\\t\"")
+
+  string.inspect("\t\n")
+  |> should.equal("\"\\t\\n\"")
+
+  string.inspect("\n\t")
+  |> should.equal("\"\\n\\t\"")
+
+  string.inspect("\t\r")
+  |> should.equal("\"\\t\\r\"")
+
+  string.inspect("\r\t")
+  |> should.equal("\"\\r\\t\"")
+
+  string.inspect("\\\n\\")
+  |> should.equal("\"\\\\\\n\\\\\"")
+
+  string.inspect("\\\"\\")
+  |> should.equal("\"\\\\\\\"\\\\\"")
+
+  string.inspect("\\\"\"\\")
+  |> should.equal("\"\\\\\\\"\\\"\\\\\"")
+
+  string.inspect("'")
+  |> should.equal("\"'\"")
+
+  string.inspect("''")
+  |> should.equal("\"''\"")
+
+  string.inspect("around-single-quotes'around-single-quotes")
+  |> should.equal("\"around-single-quotes'around-single-quotes\"")
+
+  string.inspect("'between-single-quotes'")
+  |> should.equal("\"'between-single-quotes'\"")
+
+  string.inspect("0")
+  |> should.equal("\"0\"")
+
   string.inspect("1")
   |> should.equal("\"1\"")
 
+  string.inspect("2")
+  |> should.equal("\"2\"")
+
   string.inspect("Hello Joe!")
   |> should.equal("\"Hello Joe!\"")
 
   string.inspect("Hello \"Manuel\"!")
   |> should.equal("\"Hello \\\"Manuel\\\"!\"")
 
-  string.inspect("💜 Gleam")
-  |> should.equal("\"💜 Gleam\"")
+  string.inspect("👨‍👩‍👦‍👦 💜 Gleam")
+  |> should.equal("\"👨‍👩‍👦‍👦 💜 Gleam\"")
+
+  string.inspect("True")
+  |> should.equal("\"True\"")
+
+  string.inspect("False")
+  |> should.equal("\"False\"")
+
+  string.inspect("Nil")
+  |> should.equal("\"Nil\"")
 
   string.inspect(["1"])
   |> should.equal("[\"1\"]")
@@ -689,8 +775,10 @@ pub fn inspect_test() {
   string.inspect([#(1, 2, 3), #(1, 2, 3)])
   |> should.equal("[#(1, 2, 3), #(1, 2, 3)]")
 
-  string.inspect(#([1, 2, 3], "🌈", #(1, "1", True)))
-  |> should.equal("#([1, 2, 3], \"🌈\", #(1, \"1\", True))")
+  string.inspect(#([1, 2, 3], "🌈", "🏳️‍🌈", #(1, "1", True)))
+  |> should.equal(
+    "#([1, 2, 3], \"🌈\", \"🏳️‍🌈\", #(1, \"1\", True))",
+  )
 
   string.inspect(Nil)
   |> should.equal("Nil")
@@ -788,14 +876,14 @@ pub fn inspect_test() {
 
 if javascript {
   pub fn target_inspect_test() {
-    // Due to Erlang's internal representation, on Erlang this will pass, instead:
+    // Due to Erlang's internal representation, on Erlang this passes, instead:
+    // string.inspect(#(InspectTypeZero, InspectTypeZero))
     // |> should.equal("InspectTypeZero(InspectTypeZero)")
-    //
     string.inspect(#(InspectTypeZero, InspectTypeZero))
     |> should.equal("#(InspectTypeZero, InspectTypeZero)")
 
-    // Due to JavaScript's `Number` type `Float`s without digits return as `Int`s.
-    //
+    // Due to JavaScript's `Number` type `Float`s without digits return as
+    // `Int`s.
     string.inspect(-1.0)
     |> should.equal("-1")
 
@@ -811,7 +899,8 @@ if javascript {
     string.inspect(#(1.0))
     |> should.equal("#(1)")
 
-    // Unlike on Erlang, on JavaScript `BitString` and `String` do have a different runtime representation.
+    // Unlike on Erlang, on JavaScript `BitString` and `String` do have a
+    // different runtime representation.
     <<"abc":utf8>>
     |> string.inspect()
     |> should.equal("<<97, 98, 99>>")
@@ -828,14 +917,15 @@ if erlang {
     "erlang" "make_ref"
 
   pub fn target_inspect_test() {
-    // Erlang's internal representation does not allow a correct differentiation.
+    // Erlang's internal representation does not allow a correct
+    // differentiation at runtime and thus this does not pass:
+    // string.inspect(#(InspectTypeZero, InspectTypeZero))
     // |> should.equal("#(InspectTypeZero, InspectTypeZero)")
-    //
     string.inspect(#(InspectTypeZero, InspectTypeZero))
     |> should.equal("InspectTypeZero(InspectTypeZero)")
 
-    // Unlike JavaScript, Erlang correctly differentiates between `1` and `1.0`.
-    //
+    // Unlike JavaScript, Erlang correctly differentiates between `1` and `1.0`
+    // at runtime.
     string.inspect(-1.0)
     |> should.equal("-1.0")
 
@@ -867,7 +957,8 @@ if erlang {
     |> regex.check(regular_expression, _)
     |> should.be_true
 
-    // On Erlang the runtime representation for `String` and `BitString` is indistinguishable.
+    // On Erlang the representation between `String` and `BitString` is
+    // indistinguishable at runtime.
     <<"abc":utf8>>
     |> string.inspect()
     |> should.equal("\"abc\"")
author	inoas <mail@inoas.com>	2022-12-23 11:04:30 +0100
committer	Louis Pilfold <louis@lpil.uk>	2022-12-23 12:00:27 +0000
commit	8ab43ccb44cf9d913d01b1ae6ff780ee08e4e126 (patch)
tree	3855c5325fb510d5a91dfc1e651b648e62edd41b
parent	9dc8bc4b3e9b58396ceae3e2bb466c1eb4679f86 (diff)
download	gleam_stdlib-8ab43ccb44cf9d913d01b1ae6ff780ee08e4e126.tar.gz gleam_stdlib-8ab43ccb44cf9d913d01b1ae6ff780ee08e4e126.zip