aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorinoas <mail@inoas.com>2022-12-23 11:04:30 +0100
committerLouis Pilfold <louis@lpil.uk>2022-12-23 12:00:27 +0000
commit8ab43ccb44cf9d913d01b1ae6ff780ee08e4e126 (patch)
tree3855c5325fb510d5a91dfc1e651b648e62edd41b
parent9dc8bc4b3e9b58396ceae3e2bb466c1eb4679f86 (diff)
downloadgleam_stdlib-8ab43ccb44cf9d913d01b1ae6ff780ee08e4e126.tar.gz
gleam_stdlib-8ab43ccb44cf9d913d01b1ae6ff780ee08e4e126.zip
fix erlang string.inspect and io.debug to correctly escape ", \, \r, \n, \r\n and \t
-rw-r--r--CHANGELOG.md2
-rw-r--r--src/gleam/bit_string.gleam2
-rw-r--r--src/gleam_stdlib.erl28
-rw-r--r--test/gleam/bit_string_test.gleam4
-rw-r--r--test/gleam/dynamic_test.gleam6
-rw-r--r--test/gleam/string_test.gleam133
6 files changed, 140 insertions, 35 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3085f68..2a6d930 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,6 +16,8 @@
`Intl.Segmenter` class.
- The `string` module gains `to_utf_codepoints`, `from_utf_codepoints`, and
`utf_codepoint_to_int` functions.
+- Fixed `string.inspect`'s escaping of `"`, `\`, `\n`, `\r`, `\r\n`, and `\t`,
+ which in turn fixes `io.debug`'s output of such strings.
## v0.25.0 - 2022-11-19
diff --git a/src/gleam/bit_string.gleam b/src/gleam/bit_string.gleam
index 410c712..6a67028 100644
--- a/src/gleam/bit_string.gleam
+++ b/src/gleam/bit_string.gleam
@@ -91,7 +91,7 @@ if erlang {
fn do_is_utf8(bits: BitString) -> Bool {
case bits {
<<>> -> True
- <<_:utf8, rest:binary>> -> is_utf8(rest)
+ <<_:utf8, rest:binary>> -> do_is_utf8(rest)
_ -> False
}
}
diff --git a/src/gleam_stdlib.erl b/src/gleam_stdlib.erl
index b5d6ce6..80c3f2c 100644
--- a/src/gleam_stdlib.erl
+++ b/src/gleam_stdlib.erl
@@ -351,13 +351,9 @@ inspect(Any) when is_integer(Any) ->
inspect(Any) when is_float(Any) ->
io_lib_format:fwrite_g(Any);
inspect(Binary) when is_binary(Binary) ->
- case gleam@bit_string:is_utf8(Binary) of
- true ->
- Pattern = [$"],
- Replacement = [$\\, $\\, $"],
- Escaped = re:replace(Binary, Pattern, Replacement, [{return, binary}, global]),
- ["\"", Escaped, "\""];
- false ->
+ case inspect_maybe_utf8_string(Binary, <<>>) of
+ {ok, InspectedUtf8String} -> InspectedUtf8String;
+ {error, not_a_utf8_string} ->
Segments = [erlang:integer_to_list(X) || <<X>> <= Binary],
["<<", lists:join(", ", Segments), ">>"]
end;
@@ -390,7 +386,7 @@ inspect(Any) when is_function(Any) ->
inspect(Any) ->
["//erl(", io_lib:format("~p", [Any]), ")"].
-inspect_list([]) ->
+inspect_list([]) ->
{proper, []};
inspect_list([Head]) ->
{proper, [inspect(Head)]};
@@ -400,5 +396,21 @@ inspect_list([First | Rest]) when is_list(Rest) ->
inspect_list([First | ImproperTail]) ->
{improper, [inspect(First), <<" | ">>, inspect(ImproperTail)]}.
+inspect_maybe_utf8_string(Binary, Acc) ->
+ case Binary of
+ <<>> -> {ok, <<$", Acc/binary, $">>};
+ <<Head/utf8, Rest/binary>> ->
+ Escaped = case Head of
+ $" -> <<$\\, $">>;
+ $\\ -> <<$\\, $\\>>;
+ $\r -> <<$\\, $r>>;
+ $\n -> <<$\\, $n>>;
+ $\t -> <<$\\, $t>>;
+ Other -> <<Other/utf8>>
+ end,
+ inspect_maybe_utf8_string(Rest, <<Acc/binary, Escaped/binary>>);
+ _ -> {error, not_a_utf8_string}
+ end.
+
float_to_string(Float) when is_float(Float) ->
erlang:iolist_to_binary(io_lib_format:fwrite_g(Float)).
diff --git a/test/gleam/bit_string_test.gleam b/test/gleam/bit_string_test.gleam
index 48f66d6..c52dd85 100644
--- a/test/gleam/bit_string_test.gleam
+++ b/test/gleam/bit_string_test.gleam
@@ -104,7 +104,7 @@ pub fn to_string_test() {
|> bit_string.to_string
|> should.equal(Ok("ΓΈ"))
- <<65535>>
+ <<65_535>>
|> bit_string.to_string
|> should.equal(Error(Nil))
}
@@ -126,7 +126,7 @@ pub fn is_utf8_test() {
|> bit_string.is_utf8
|> should.be_true
- <<65535>>
+ <<65_535>>
|> bit_string.is_utf8
|> should.be_false
}
diff --git a/test/gleam/dynamic_test.gleam b/test/gleam/dynamic_test.gleam
index 8e9d4f7..104c1fa 100644
--- a/test/gleam/dynamic_test.gleam
+++ b/test/gleam/dynamic_test.gleam
@@ -32,10 +32,10 @@ pub fn bit_string_test() {
if erlang {
pub fn bit_string_erlang_test() {
- <<65535:16>>
+ <<65_535:16>>
|> dynamic.from
|> dynamic.bit_string
- |> should.equal(Ok(<<65535:16>>))
+ |> should.equal(Ok(<<65_535:16>>))
}
}
@@ -67,7 +67,7 @@ pub fn string_test() {
if erlang {
pub fn string_non_utf8_test() {
- <<65535:16>>
+ <<65_535:16>>
|> dynamic.from
|> dynamic.string
|> should.equal(Error([
diff --git a/test/gleam/string_test.gleam b/test/gleam/string_test.gleam
index 07bbfb9..f3495ac 100644
--- a/test/gleam/string_test.gleam
+++ b/test/gleam/string_test.gleam
@@ -434,10 +434,10 @@ pub fn to_utf_codepoints_test() {
Ok(zero_width_joiner),
Ok(rainbow),
) = #(
- string.utf_codepoint(127987),
- string.utf_codepoint(65039),
+ string.utf_codepoint(127_987),
+ string.utf_codepoint(65_039),
string.utf_codepoint(8205),
- string.utf_codepoint(127752),
+ string.utf_codepoint(127_752),
)
[waving_white_flag, variant_selector_16, zero_width_joiner, rainbow]
})
@@ -472,18 +472,18 @@ pub fn from_utf_codepoints_test() {
}
pub fn utf_codepoint_test() {
- string.utf_codepoint(1114444)
+ string.utf_codepoint(1_114_444)
|> should.be_error
- string.utf_codepoint(65534)
+ string.utf_codepoint(65_534)
|> should.be_error
- string.utf_codepoint(55296)
+ string.utf_codepoint(55_296)
|> should.be_error
}
pub fn bit_string_utf_codepoint_test() {
- assert Ok(snake) = string.utf_codepoint(128013)
+ assert Ok(snake) = string.utf_codepoint(128_013)
should.equal(<<snake:utf8_codepoint>>, <<"🐍":utf8>>)
}
@@ -659,17 +659,103 @@ pub fn inspect_test() {
string.inspect("")
|> should.equal("\"\"")
+ string.inspect("\\")
+ |> should.equal("\"\\\\\"")
+
+ string.inspect("\\\\")
+ |> should.equal("\"\\\\\\\\\"")
+
+ string.inspect("\\\\\\")
+ |> should.equal("\"\\\\\\\\\\\\\"")
+
+ string.inspect("\"")
+ |> should.equal("\"\\\"\"")
+ string.inspect("\"\"")
+ |> should.equal("\"\\\"\\\"\"")
+
+ string.inspect("\r")
+ |> should.equal("\"\\r\"")
+
+ string.inspect("\n")
+ |> should.equal("\"\\n\"")
+
+ string.inspect("\t")
+ |> should.equal("\"\\t\"")
+
+ string.inspect("\r\r")
+ |> should.equal("\"\\r\\r\"")
+
+ string.inspect("\n\n")
+ |> should.equal("\"\\n\\n\"")
+
+ string.inspect("\r\n")
+ |> should.equal("\"\\r\\n\"")
+
+ string.inspect("\n\r")
+ |> should.equal("\"\\n\\r\"")
+
+ string.inspect("\t\t")
+ |> should.equal("\"\\t\\t\"")
+
+ string.inspect("\t\n")
+ |> should.equal("\"\\t\\n\"")
+
+ string.inspect("\n\t")
+ |> should.equal("\"\\n\\t\"")
+
+ string.inspect("\t\r")
+ |> should.equal("\"\\t\\r\"")
+
+ string.inspect("\r\t")
+ |> should.equal("\"\\r\\t\"")
+
+ string.inspect("\\\n\\")
+ |> should.equal("\"\\\\\\n\\\\\"")
+
+ string.inspect("\\\"\\")
+ |> should.equal("\"\\\\\\\"\\\\\"")
+
+ string.inspect("\\\"\"\\")
+ |> should.equal("\"\\\\\\\"\\\"\\\\\"")
+
+ string.inspect("'")
+ |> should.equal("\"'\"")
+
+ string.inspect("''")
+ |> should.equal("\"''\"")
+
+ string.inspect("around-single-quotes'around-single-quotes")
+ |> should.equal("\"around-single-quotes'around-single-quotes\"")
+
+ string.inspect("'between-single-quotes'")
+ |> should.equal("\"'between-single-quotes'\"")
+
+ string.inspect("0")
+ |> should.equal("\"0\"")
+
string.inspect("1")
|> should.equal("\"1\"")
+ string.inspect("2")
+ |> should.equal("\"2\"")
+
string.inspect("Hello Joe!")
|> should.equal("\"Hello Joe!\"")
string.inspect("Hello \"Manuel\"!")
|> should.equal("\"Hello \\\"Manuel\\\"!\"")
- string.inspect("πŸ’œ Gleam")
- |> should.equal("\"πŸ’œ Gleam\"")
+ string.inspect("πŸ‘¨β€πŸ‘©β€πŸ‘¦β€πŸ‘¦ πŸ’œ Gleam")
+ |> should.equal("\"πŸ‘¨β€πŸ‘©β€πŸ‘¦β€πŸ‘¦ πŸ’œ Gleam\"")
+
+ string.inspect("True")
+ |> should.equal("\"True\"")
+
+ string.inspect("False")
+ |> should.equal("\"False\"")
+
+ string.inspect("Nil")
+ |> should.equal("\"Nil\"")
string.inspect(["1"])
|> should.equal("[\"1\"]")
@@ -689,8 +775,10 @@ pub fn inspect_test() {
string.inspect([#(1, 2, 3), #(1, 2, 3)])
|> should.equal("[#(1, 2, 3), #(1, 2, 3)]")
- string.inspect(#([1, 2, 3], "🌈", #(1, "1", True)))
- |> should.equal("#([1, 2, 3], \"🌈\", #(1, \"1\", True))")
+ string.inspect(#([1, 2, 3], "🌈", "πŸ³οΈβ€πŸŒˆ", #(1, "1", True)))
+ |> should.equal(
+ "#([1, 2, 3], \"🌈\", \"πŸ³οΈβ€πŸŒˆ\", #(1, \"1\", True))",
+ )
string.inspect(Nil)
|> should.equal("Nil")
@@ -788,14 +876,14 @@ pub fn inspect_test() {
if javascript {
pub fn target_inspect_test() {
- // Due to Erlang's internal representation, on Erlang this will pass, instead:
+ // Due to Erlang's internal representation, on Erlang this passes, instead:
+ // string.inspect(#(InspectTypeZero, InspectTypeZero))
// |> should.equal("InspectTypeZero(InspectTypeZero)")
- //
string.inspect(#(InspectTypeZero, InspectTypeZero))
|> should.equal("#(InspectTypeZero, InspectTypeZero)")
- // Due to JavaScript's `Number` type `Float`s without digits return as `Int`s.
- //
+ // Due to JavaScript's `Number` type `Float`s without digits return as
+ // `Int`s.
string.inspect(-1.0)
|> should.equal("-1")
@@ -811,7 +899,8 @@ if javascript {
string.inspect(#(1.0))
|> should.equal("#(1)")
- // Unlike on Erlang, on JavaScript `BitString` and `String` do have a different runtime representation.
+ // Unlike on Erlang, on JavaScript `BitString` and `String` do have a
+ // different runtime representation.
<<"abc":utf8>>
|> string.inspect()
|> should.equal("<<97, 98, 99>>")
@@ -828,14 +917,15 @@ if erlang {
"erlang" "make_ref"
pub fn target_inspect_test() {
- // Erlang's internal representation does not allow a correct differentiation.
+ // Erlang's internal representation does not allow a correct
+ // differentiation at runtime and thus this does not pass:
+ // string.inspect(#(InspectTypeZero, InspectTypeZero))
// |> should.equal("#(InspectTypeZero, InspectTypeZero)")
- //
string.inspect(#(InspectTypeZero, InspectTypeZero))
|> should.equal("InspectTypeZero(InspectTypeZero)")
- // Unlike JavaScript, Erlang correctly differentiates between `1` and `1.0`.
- //
+ // Unlike JavaScript, Erlang correctly differentiates between `1` and `1.0`
+ // at runtime.
string.inspect(-1.0)
|> should.equal("-1.0")
@@ -867,7 +957,8 @@ if erlang {
|> regex.check(regular_expression, _)
|> should.be_true
- // On Erlang the runtime representation for `String` and `BitString` is indistinguishable.
+ // On Erlang the representation between `String` and `BitString` is
+ // indistinguishable at runtime.
<<"abc":utf8>>
|> string.inspect()
|> should.equal("\"abc\"")