diff options
author | inoas <mail@inoas.com> | 2022-12-23 11:04:30 +0100 |
---|---|---|
committer | Louis Pilfold <louis@lpil.uk> | 2022-12-23 12:00:27 +0000 |
commit | 8ab43ccb44cf9d913d01b1ae6ff780ee08e4e126 (patch) | |
tree | 3855c5325fb510d5a91dfc1e651b648e62edd41b | |
parent | 9dc8bc4b3e9b58396ceae3e2bb466c1eb4679f86 (diff) | |
download | gleam_stdlib-8ab43ccb44cf9d913d01b1ae6ff780ee08e4e126.tar.gz gleam_stdlib-8ab43ccb44cf9d913d01b1ae6ff780ee08e4e126.zip |
fix erlang string.inspect and io.debug to correctly escape ", \, \r, \n, \r\n and \t
-rw-r--r-- | CHANGELOG.md | 2 | ||||
-rw-r--r-- | src/gleam/bit_string.gleam | 2 | ||||
-rw-r--r-- | src/gleam_stdlib.erl | 28 | ||||
-rw-r--r-- | test/gleam/bit_string_test.gleam | 4 | ||||
-rw-r--r-- | test/gleam/dynamic_test.gleam | 6 | ||||
-rw-r--r-- | test/gleam/string_test.gleam | 133 |
6 files changed, 140 insertions, 35 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 3085f68..2a6d930 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,8 @@ `Intl.Segmenter` class. - The `string` module gains `to_utf_codepoints`, `from_utf_codepoints`, and `utf_codepoint_to_int` functions. +- Fixed `string.inspect`'s escaping of `"`, `\`, `\n`, `\r`, `\r\n`, and `\t`, + which in turn fixes `io.debug`'s output of such strings. ## v0.25.0 - 2022-11-19 diff --git a/src/gleam/bit_string.gleam b/src/gleam/bit_string.gleam index 410c712..6a67028 100644 --- a/src/gleam/bit_string.gleam +++ b/src/gleam/bit_string.gleam @@ -91,7 +91,7 @@ if erlang { fn do_is_utf8(bits: BitString) -> Bool { case bits { <<>> -> True - <<_:utf8, rest:binary>> -> is_utf8(rest) + <<_:utf8, rest:binary>> -> do_is_utf8(rest) _ -> False } } diff --git a/src/gleam_stdlib.erl b/src/gleam_stdlib.erl index b5d6ce6..80c3f2c 100644 --- a/src/gleam_stdlib.erl +++ b/src/gleam_stdlib.erl @@ -351,13 +351,9 @@ inspect(Any) when is_integer(Any) -> inspect(Any) when is_float(Any) -> io_lib_format:fwrite_g(Any); inspect(Binary) when is_binary(Binary) -> - case gleam@bit_string:is_utf8(Binary) of - true -> - Pattern = [$"], - Replacement = [$\\, $\\, $"], - Escaped = re:replace(Binary, Pattern, Replacement, [{return, binary}, global]), - ["\"", Escaped, "\""]; - false -> + case inspect_maybe_utf8_string(Binary, <<>>) of + {ok, InspectedUtf8String} -> InspectedUtf8String; + {error, not_a_utf8_string} -> Segments = [erlang:integer_to_list(X) || <<X>> <= Binary], ["<<", lists:join(", ", Segments), ">>"] end; @@ -390,7 +386,7 @@ inspect(Any) when is_function(Any) -> inspect(Any) -> ["//erl(", io_lib:format("~p", [Any]), ")"]. -inspect_list([]) -> +inspect_list([]) -> {proper, []}; inspect_list([Head]) -> {proper, [inspect(Head)]}; @@ -400,5 +396,21 @@ inspect_list([First | Rest]) when is_list(Rest) -> inspect_list([First | ImproperTail]) -> {improper, [inspect(First), <<" | ">>, inspect(ImproperTail)]}. +inspect_maybe_utf8_string(Binary, Acc) -> + case Binary of + <<>> -> {ok, <<$", Acc/binary, $">>}; + <<Head/utf8, Rest/binary>> -> + Escaped = case Head of + $" -> <<$\\, $">>; + $\\ -> <<$\\, $\\>>; + $\r -> <<$\\, $r>>; + $\n -> <<$\\, $n>>; + $\t -> <<$\\, $t>>; + Other -> <<Other/utf8>> + end, + inspect_maybe_utf8_string(Rest, <<Acc/binary, Escaped/binary>>); + _ -> {error, not_a_utf8_string} + end. + float_to_string(Float) when is_float(Float) -> erlang:iolist_to_binary(io_lib_format:fwrite_g(Float)). diff --git a/test/gleam/bit_string_test.gleam b/test/gleam/bit_string_test.gleam index 48f66d6..c52dd85 100644 --- a/test/gleam/bit_string_test.gleam +++ b/test/gleam/bit_string_test.gleam @@ -104,7 +104,7 @@ pub fn to_string_test() { |> bit_string.to_string |> should.equal(Ok("ΓΈ")) - <<65535>> + <<65_535>> |> bit_string.to_string |> should.equal(Error(Nil)) } @@ -126,7 +126,7 @@ pub fn is_utf8_test() { |> bit_string.is_utf8 |> should.be_true - <<65535>> + <<65_535>> |> bit_string.is_utf8 |> should.be_false } diff --git a/test/gleam/dynamic_test.gleam b/test/gleam/dynamic_test.gleam index 8e9d4f7..104c1fa 100644 --- a/test/gleam/dynamic_test.gleam +++ b/test/gleam/dynamic_test.gleam @@ -32,10 +32,10 @@ pub fn bit_string_test() { if erlang { pub fn bit_string_erlang_test() { - <<65535:16>> + <<65_535:16>> |> dynamic.from |> dynamic.bit_string - |> should.equal(Ok(<<65535:16>>)) + |> should.equal(Ok(<<65_535:16>>)) } } @@ -67,7 +67,7 @@ pub fn string_test() { if erlang { pub fn string_non_utf8_test() { - <<65535:16>> + <<65_535:16>> |> dynamic.from |> dynamic.string |> should.equal(Error([ diff --git a/test/gleam/string_test.gleam b/test/gleam/string_test.gleam index 07bbfb9..f3495ac 100644 --- a/test/gleam/string_test.gleam +++ b/test/gleam/string_test.gleam @@ -434,10 +434,10 @@ pub fn to_utf_codepoints_test() { Ok(zero_width_joiner), Ok(rainbow), ) = #( - string.utf_codepoint(127987), - string.utf_codepoint(65039), + string.utf_codepoint(127_987), + string.utf_codepoint(65_039), string.utf_codepoint(8205), - string.utf_codepoint(127752), + string.utf_codepoint(127_752), ) [waving_white_flag, variant_selector_16, zero_width_joiner, rainbow] }) @@ -472,18 +472,18 @@ pub fn from_utf_codepoints_test() { } pub fn utf_codepoint_test() { - string.utf_codepoint(1114444) + string.utf_codepoint(1_114_444) |> should.be_error - string.utf_codepoint(65534) + string.utf_codepoint(65_534) |> should.be_error - string.utf_codepoint(55296) + string.utf_codepoint(55_296) |> should.be_error } pub fn bit_string_utf_codepoint_test() { - assert Ok(snake) = string.utf_codepoint(128013) + assert Ok(snake) = string.utf_codepoint(128_013) should.equal(<<snake:utf8_codepoint>>, <<"π":utf8>>) } @@ -659,17 +659,103 @@ pub fn inspect_test() { string.inspect("") |> should.equal("\"\"") + string.inspect("\\") + |> should.equal("\"\\\\\"") + + string.inspect("\\\\") + |> should.equal("\"\\\\\\\\\"") + + string.inspect("\\\\\\") + |> should.equal("\"\\\\\\\\\\\\\"") + + string.inspect("\"") + |> should.equal("\"\\\"\"") + string.inspect("\"\"") + |> should.equal("\"\\\"\\\"\"") + + string.inspect("\r") + |> should.equal("\"\\r\"") + + string.inspect("\n") + |> should.equal("\"\\n\"") + + string.inspect("\t") + |> should.equal("\"\\t\"") + + string.inspect("\r\r") + |> should.equal("\"\\r\\r\"") + + string.inspect("\n\n") + |> should.equal("\"\\n\\n\"") + + string.inspect("\r\n") + |> should.equal("\"\\r\\n\"") + + string.inspect("\n\r") + |> should.equal("\"\\n\\r\"") + + string.inspect("\t\t") + |> should.equal("\"\\t\\t\"") + + string.inspect("\t\n") + |> should.equal("\"\\t\\n\"") + + string.inspect("\n\t") + |> should.equal("\"\\n\\t\"") + + string.inspect("\t\r") + |> should.equal("\"\\t\\r\"") + + string.inspect("\r\t") + |> should.equal("\"\\r\\t\"") + + string.inspect("\\\n\\") + |> should.equal("\"\\\\\\n\\\\\"") + + string.inspect("\\\"\\") + |> should.equal("\"\\\\\\\"\\\\\"") + + string.inspect("\\\"\"\\") + |> should.equal("\"\\\\\\\"\\\"\\\\\"") + + string.inspect("'") + |> should.equal("\"'\"") + + string.inspect("''") + |> should.equal("\"''\"") + + string.inspect("around-single-quotes'around-single-quotes") + |> should.equal("\"around-single-quotes'around-single-quotes\"") + + string.inspect("'between-single-quotes'") + |> should.equal("\"'between-single-quotes'\"") + + string.inspect("0") + |> should.equal("\"0\"") + string.inspect("1") |> should.equal("\"1\"") + string.inspect("2") + |> should.equal("\"2\"") + string.inspect("Hello Joe!") |> should.equal("\"Hello Joe!\"") string.inspect("Hello \"Manuel\"!") |> should.equal("\"Hello \\\"Manuel\\\"!\"") - string.inspect("π Gleam") - |> should.equal("\"π Gleam\"") + string.inspect("π¨βπ©βπ¦βπ¦ π Gleam") + |> should.equal("\"π¨βπ©βπ¦βπ¦ π Gleam\"") + + string.inspect("True") + |> should.equal("\"True\"") + + string.inspect("False") + |> should.equal("\"False\"") + + string.inspect("Nil") + |> should.equal("\"Nil\"") string.inspect(["1"]) |> should.equal("[\"1\"]") @@ -689,8 +775,10 @@ pub fn inspect_test() { string.inspect([#(1, 2, 3), #(1, 2, 3)]) |> should.equal("[#(1, 2, 3), #(1, 2, 3)]") - string.inspect(#([1, 2, 3], "π", #(1, "1", True))) - |> should.equal("#([1, 2, 3], \"π\", #(1, \"1\", True))") + string.inspect(#([1, 2, 3], "π", "π³οΈβπ", #(1, "1", True))) + |> should.equal( + "#([1, 2, 3], \"π\", \"π³οΈβπ\", #(1, \"1\", True))", + ) string.inspect(Nil) |> should.equal("Nil") @@ -788,14 +876,14 @@ pub fn inspect_test() { if javascript { pub fn target_inspect_test() { - // Due to Erlang's internal representation, on Erlang this will pass, instead: + // Due to Erlang's internal representation, on Erlang this passes, instead: + // string.inspect(#(InspectTypeZero, InspectTypeZero)) // |> should.equal("InspectTypeZero(InspectTypeZero)") - // string.inspect(#(InspectTypeZero, InspectTypeZero)) |> should.equal("#(InspectTypeZero, InspectTypeZero)") - // Due to JavaScript's `Number` type `Float`s without digits return as `Int`s. - // + // Due to JavaScript's `Number` type `Float`s without digits return as + // `Int`s. string.inspect(-1.0) |> should.equal("-1") @@ -811,7 +899,8 @@ if javascript { string.inspect(#(1.0)) |> should.equal("#(1)") - // Unlike on Erlang, on JavaScript `BitString` and `String` do have a different runtime representation. + // Unlike on Erlang, on JavaScript `BitString` and `String` do have a + // different runtime representation. <<"abc":utf8>> |> string.inspect() |> should.equal("<<97, 98, 99>>") @@ -828,14 +917,15 @@ if erlang { "erlang" "make_ref" pub fn target_inspect_test() { - // Erlang's internal representation does not allow a correct differentiation. + // Erlang's internal representation does not allow a correct + // differentiation at runtime and thus this does not pass: + // string.inspect(#(InspectTypeZero, InspectTypeZero)) // |> should.equal("#(InspectTypeZero, InspectTypeZero)") - // string.inspect(#(InspectTypeZero, InspectTypeZero)) |> should.equal("InspectTypeZero(InspectTypeZero)") - // Unlike JavaScript, Erlang correctly differentiates between `1` and `1.0`. - // + // Unlike JavaScript, Erlang correctly differentiates between `1` and `1.0` + // at runtime. string.inspect(-1.0) |> should.equal("-1.0") @@ -867,7 +957,8 @@ if erlang { |> regex.check(regular_expression, _) |> should.be_true - // On Erlang the runtime representation for `String` and `BitString` is indistinguishable. + // On Erlang the representation between `String` and `BitString` is + // indistinguishable at runtime. <<"abc":utf8>> |> string.inspect() |> should.equal("\"abc\"") |