Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 33 additions & 1 deletion lib/mix/lib/mix/utils.ex
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,39 @@ defmodule Mix.Utils do
[" ", parent, quoted(name), edge_info, ?\n]
end

defp quoted(data), do: [?", to_string(data), ?"]
defp quoted(data) do
string = to_string(data)
escaped_data = escape_dot_string(string)
[?", escaped_data, ?"]
end

# Escape a string for DOT format according to GraphViz specification https://graphviz.org/doc/info/lang.html
# - Only quotes need escaping
# - String must not end with an odd number of backslashes (would escape the closing quote)
defp escape_dot_string(string) do
escape_dot_string(string, [], 0)
end

defp escape_dot_string(<<>>, acc, backslash_count) do
if rem(backslash_count, 2) == 1 do
# Odd number of trailing backslashes - add one more to make it even
[acc, "\\"]
else
acc
end
end

defp escape_dot_string(<<?"::utf8, rest::binary>>, acc, _backslash_count) do
escape_dot_string(rest, [acc, "\\\""], 0)
end

defp escape_dot_string(<<"\\"::utf8, rest::binary>>, acc, backslash_count) do
escape_dot_string(rest, [acc, "\\"], backslash_count + 1)
end

defp escape_dot_string(<<char::utf8, rest::binary>>, acc, _backslash_count) do
escape_dot_string(rest, [acc, char], 0)
end
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
escaped_data = escape_dot_string(string)
[?", escaped_data, ?"]
end
# Escape a string for DOT format according to GraphViz specification https://graphviz.org/doc/info/lang.html
# - Only quotes need escaping
# - String must not end with an odd number of backslashes (would escape the closing quote)
defp escape_dot_string(string) do
escape_dot_string(string, [], 0)
end
defp escape_dot_string(<<>>, acc, backslash_count) do
if rem(backslash_count, 2) == 1 do
# Odd number of trailing backslashes - add one more to make it even
[acc, "\\"]
else
acc
end
end
defp escape_dot_string(<<?"::utf8, rest::binary>>, acc, _backslash_count) do
escape_dot_string(rest, [acc, "\\\""], 0)
end
defp escape_dot_string(<<"\\"::utf8, rest::binary>>, acc, backslash_count) do
escape_dot_string(rest, [acc, "\\"], backslash_count + 1)
end
defp escape_dot_string(<<char::utf8, rest::binary>>, acc, _backslash_count) do
escape_dot_string(rest, [acc, char], 0)
end
escape_dot_string(string, <<?">>)
end
# Escape a string for DOT format according to GraphViz specification https://graphviz.org/doc/info/lang.html
# - Only quotes need escaping
# - The ending quote should not be escaped (which requires an even of trailing backslashes)
defp escape_dot_string(<<?\\, ?\\, rest::binary>>, acc) do
escape_dot_string(rest, <<acc::binary, ?\\, ?\\>>)
end
defp escape_dot_string(<<?", rest::binary>>, acc) do
escape_dot_string(rest, <<acc::binary, ?\\, ?">>)
end
defp escape_dot_string(<<char, rest::binary>>, acc) do
escape_dot_string(rest, <<acc::binary, char>>)
end
defp escape_dot_string(<<?\\>>, acc) do
<<acc::binary, ?\\, ?\\, ?">>
end
defp escape_dot_string(<<>>, acc) do
<<acc::binary, ?">>
end

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The above should be more efficient too as it should apply the binary optimizations for traversal.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Won't it create binary copies on each char? The caller already builds an iolist so the iolist was a natural choice.
  2. escape_dot_string(<<char, rest::binary>>, acc) will split multibyt utf8 chars into bytes, it may accidentally match on ?". Shouldn't that be escape_dot_string(<<char::utf8, rest::binary>>, acc)?

Copy link
Member

@josevalim josevalim Jul 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. No, Erlang can optimize it, and mutate the binary in place. It is more efficient than iolists (see below)
  2. utf8 bytes can not have the lower 127 ASCII characters in them, so we are good. There is no way to partially match on something and have ?" as left over.
$ ERL_COMPILER_OPTIONS=bin_opt_info elixir lib/mix/lib/mix/utils.ex
    warning: redefining module Mix.Utils (current version loaded from lib/mix/ebin/Elixir.Mix.Utils.beam)
    │
  8 │ defmodule Mix.Utils do
    │ ~~~~~~~~~~~~~~~~~~~~~~
    │
    └─ lib/mix/lib/mix/utils.ex:8: Mix.Utils (module)

     warning: OPTIMIZED: match context reused
     │
 493 │     escape_dot_string(rest, <<acc::binary, ?\\, ?\\>>)
     │     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     │
     └─ lib/mix/lib/mix/utils.ex:493

     warning: OPTIMIZED: match context reused
     │
 497 │     escape_dot_string(rest, <<acc::binary, ?\\, ?">>)
     │     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     │
     └─ lib/mix/lib/mix/utils.ex:497

     warning: OPTIMIZED: match context reused
     │
 505 │     escape_dot_string(rest, <<acc::binary, char>>)
     │     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     │
     └─ lib/mix/lib/mix/utils.ex:505


@doc false
@deprecated "Use Macro.underscore/1 instead"
Expand Down
120 changes: 120 additions & 0 deletions lib/mix/test/mix/utils_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,126 @@ defmodule Mix.UtilsTest do
end
end

describe "write_dot_graph!/4" do
test "escapes quotes" do
in_tmp("dot_quotes", fn ->
callback = fn node -> {{node, nil}, []} end

Mix.Utils.write_dot_graph!("graph.dot", "graph", ["foo \"bar\""], callback, [])

assert File.read!("graph.dot") == """
digraph "graph" {
"foo \\"bar\\""
}
"""
end)
end

test "preserves newlines and other control characters" do
in_tmp("dot_newlines", fn ->
callback = fn node -> {{node, nil}, []} end

Mix.Utils.write_dot_graph!("graph.dot", "graph", ["foo \nbar\r\nbaz"], callback, [])

assert File.read!("graph.dot") == """
digraph "graph" {
"foo
bar\r
baz"
}
"""
end)
end

test "handles backslashes correctly" do
in_tmp("dot_backslashes", fn ->
callback = fn node -> {{node, nil}, []} end

Mix.Utils.write_dot_graph!("graph.dot", "graph", ["foo\\bar"], callback, [])

assert File.read!("graph.dot") == """
digraph "graph" {
"foo\\bar"
}
"""
end)
end

test "quote and backslash combinations" do
in_tmp("dot_complex", fn ->
callback = fn node -> {{node, nil}, []} end

test_cases = [
# "fo"o" -> "fo\"o"
{"fo\"o", "fo\\\"o"},
# "fo\"o" -> "fo\\\"o"
{"fo\\\"o", "fo\\\\\"o"},
# "fo\o" -> "fo\o"
{"fo\\o", "fo\\o"},
# "fo\\o" -> "fo\\o"
{"fo\\\\o", "fo\\\\o"},
# "fo\\\o" -> "fo\\\o"
{"fo\\\\\\o", "fo\\\\\\o"}
]

Enum.each(test_cases, fn {input, expected} ->
Mix.Utils.write_dot_graph!("graph.dot", "graph", [input], callback, [])
content = File.read!("graph.dot")
assert content == "digraph \"graph\" {\n \"#{expected}\"\n}\n"
end)
end)
end

test "escapes backslash at end of string" do
in_tmp("dot_end_backslash", fn ->
callback = fn node -> {{node, nil}, []} end

test_cases = [
# "fo\" -> "fo\\" (add backslash)
{"fo\\", "fo\\\\"},
# "fo\\" -> "fo\\" (already valid)
{"fo\\\\", "fo\\\\"},
# "fo\\\" -> "fo\\\\" (add backslash)
{"fo\\\\\\", "fo\\\\\\\\"}
]

Enum.each(test_cases, fn {input, expected} ->
Mix.Utils.write_dot_graph!("graph.dot", "graph", [input], callback, [])
content = File.read!("graph.dot")
assert content == "digraph \"graph\" {\n \"#{expected}\"\n}\n"
end)
end)
end

test "handles empty strings" do
in_tmp("dot_empty", fn ->
callback = fn node -> {{node, nil}, []} end

Mix.Utils.write_dot_graph!("graph.dot", "graph", [""], callback, [])

assert File.read!("graph.dot") == """
digraph "graph" {
""
}
"""
end)
end

test "handles edge labels with escaping" do
in_tmp("dot_edge_labels", fn ->
callback = fn node -> {{node, "edge \"label\""}, []} end

Mix.Utils.write_dot_graph!("graph.dot", "graph", ["node"], callback, [])

assert File.read!("graph.dot") == """
digraph "graph" {
"node" [label="edge \\"label\\""]
}
"""
end)
end
end

defp assert_ebin_symlinked_or_copied(result) do
case result do
{:ok, paths} ->
Expand Down