Skip to content

Commit 768fa49

Browse files
committed
Optimize inspection of binaries
We have used this code: data = Base.encode64(:crypto.strong_rand_bytes(2_000_000)) Mix.Tasks.Profile.Tprof.profile( fn -> inspect(data, printable_limit: :infinity) end, type: :memory ) The first result reported this: Profile results of #PID<0.106.0> # CALLS % WORDS PER CALL Total 10666685 100.00 9268308211 868.90 :code.ensure_loaded/1 1 0.00 3 3.00 Inspect.BitString.inspect/2 1 0.00 4 4.00 Inspect.Algebra.group/2 1 0.00 4 4.00 Inspect.Algebra.format/2 1 0.00 6 6.00 Inspect.Algebra.format/3 3 0.00 8 2.67 :erlang.iolist_to_binary/1 2 0.00 11 5.50 anonymous fn/2 in Kernel.struct/2 1 0.00 17 17.00 Inspect.Opts.new/1 1 0.00 17 17.00 Code.Identifier.escape/5 2666669 0.06 5333344 2.00 String.recur_printable?/2 2666669 0.29 26666778 10.00 Code.Identifier.escape_map/1 2666668 28.72 2662035405 998.26 Code.Identifier.escape_char/1 2666668 70.93 6574272614 2465.35 As you can see, most of the allocations were in escape_char, which makes sense: it will be invoked a lot for raw binaries and it allocated lists, instead of relying on binary optimizations. We reimplemented Code.Identifier.escape/5 to rely on binary optimizations which then yielded this: Profile results of #PID<0.112.0> # CALLS % WORDS PER CALL Total 5333350 100.00 26666864 5.00 :code.ensure_loaded/1 1 0.00 3 3.00 Inspect.BitString.inspect/2 1 0.00 4 4.00 Inspect.Algebra.group/2 1 0.00 4 4.00 Inspect.Algebra.format/2 1 0.00 6 6.00 Code.Identifier.escape/4 1 0.00 8 8.00 Code.Identifier.escape/5 2666669 0.00 8 0.00 Inspect.Algebra.format/3 3 0.00 8 2.67 :erlang.iolist_to_binary/1 2 0.00 11 5.50 anonymous fn/2 in Kernel.struct/2 1 0.00 17 17.00 Inspect.Opts.new/1 1 0.00 17 17.00 String.recur_printable?/2 2666669 100.00 26666778 10.00 Now the allocations are all on `recur_printable?/2`. By running this command, we can find out why it is allocating so much: ERL_COMPILER_OPTIONS=bin_opt_info elixir lib/elixir/lib/string.ex It happens that, due to a catch all, new binary contexts had to be allocated. By addressing that, we now get: Profile results of #PID<0.119.0> # CALLS % WORDS PER CALL Total 5333350 100.00 91 0.00 :code.ensure_loaded/1 1 3.30 3 3.00 Inspect.BitString.inspect/2 1 4.40 4 4.00 Inspect.Algebra.group/2 1 4.40 4 4.00 String.recur_printable?/2 2666669 5.49 5 0.00 Inspect.Algebra.format/2 1 6.59 6 6.00 Code.Identifier.escape/4 1 8.79 8 8.00 Code.Identifier.escape/5 2666669 8.79 8 0.00 Inspect.Algebra.format/3 3 8.79 8 2.67 :erlang.iolist_to_binary/1 2 12.09 11 5.50 anonymous fn/2 in Kernel.struct/2 1 18.68 17 17.00 Inspect.Opts.new/1 1 18.68 17 17.00 Which is several orders of magnitude fewer words. We basically allocate the new escaped binary and a few extra bytes. Closes #14029.
1 parent eadbd8b commit 768fa49

File tree

2 files changed

+36
-30
lines changed

2 files changed

+36
-30
lines changed

lib/elixir/lib/code/identifier.ex

Lines changed: 35 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -71,41 +71,45 @@ defmodule Code.Identifier do
7171
Escapes the given identifier.
7272
"""
7373
@spec escape(binary(), char() | nil, :infinity | non_neg_integer, (char() -> iolist() | false)) ::
74-
{escaped :: iolist(), remaining :: binary()}
74+
{escaped :: binary(), remaining :: binary()}
7575
def escape(binary, char, limit \\ :infinity, fun \\ &escape_map/1)
76-
when ((char in 0..0x10FFFF or is_nil(char)) and limit == :infinity) or
76+
when (is_binary(binary) and ((char in 0..0x10FFFF or is_nil(char)) and limit == :infinity)) or
7777
(is_integer(limit) and limit >= 0) do
78-
escape(binary, char, limit, [], fun)
78+
escape(binary, char, limit, <<>>, fun)
7979
end
8080

8181
defp escape(<<_, _::binary>> = binary, _char, 0, acc, _fun) do
8282
{acc, binary}
8383
end
8484

8585
defp escape(<<char, t::binary>>, char, count, acc, fun) do
86-
escape(t, char, decrement(count), [acc | [?\\, char]], fun)
86+
escape(t, char, decrement(count), <<acc::binary, ?\\, char>>, fun)
8787
end
8888

8989
defp escape(<<?#, ?{, t::binary>>, char, count, acc, fun) do
90-
escape(t, char, decrement(count), [acc | [?\\, ?#, ?{]], fun)
90+
escape(t, char, decrement(count), <<acc::binary, ?\\, ?#, ?{>>, fun)
9191
end
9292

9393
defp escape(<<h::utf8, t::binary>>, char, count, acc, fun) do
94-
escaped = if value = fun.(h), do: value, else: escape_char(h)
95-
escape(t, char, decrement(count), [acc | escaped], fun)
94+
if value = fun.(h) do
95+
value = IO.iodata_to_binary(value)
96+
escape(t, char, decrement(count), <<acc::binary, value::binary>>, fun)
97+
else
98+
escape(t, char, decrement(count), escape_char(h, acc), fun)
99+
end
96100
end
97101

98102
defp escape(<<a::4, b::4, t::binary>>, char, count, acc, fun) do
99-
escape(t, char, decrement(count), [acc | [?\\, ?x, to_hex(a), to_hex(b)]], fun)
103+
escape(t, char, decrement(count), <<acc::binary, ?\\, ?x, to_hex(a), to_hex(b)>>, fun)
100104
end
101105

102106
defp escape(<<>>, _char, _count, acc, _fun) do
103107
{acc, <<>>}
104108
end
105109

106-
defp escape_char(0), do: [?\\, ?0]
110+
defp escape_char(0, acc), do: <<acc::binary, ?\\, ?0>>
107111

108-
defp escape_char(char)
112+
defp escape_char(char, acc)
109113
# Some characters that are confusing (zero-width / alternative spaces) are displayed
110114
# using their unicode representation:
111115
# https://en.wikipedia.org/wiki/Universal_Character_Set_characters#Special-purpose_characters
@@ -131,42 +135,44 @@ defmodule Code.Identifier do
131135
when char in 0x2000..0x200A
132136
when char == 0x205F do
133137
<<a::4, b::4, c::4, d::4>> = <<char::16>>
134-
[?\\, ?u, to_hex(a), to_hex(b), to_hex(c), to_hex(d)]
138+
<<acc::binary, ?\\, ?u, to_hex(a), to_hex(b), to_hex(c), to_hex(d)>>
135139
end
136140

137-
defp escape_char(char)
141+
defp escape_char(char, acc)
138142
when char in 0x20..0x7E
139143
when char in 0xA0..0xD7FF
140144
when char in 0xE000..0xFFFD
141145
when char in 0x10000..0x10FFFF do
142-
<<char::utf8>>
146+
<<acc::binary, char::utf8>>
143147
end
144148

145-
defp escape_char(char) when char < 0x100 do
149+
defp escape_char(char, acc) when char < 0x100 do
146150
<<a::4, b::4>> = <<char::8>>
147-
[?\\, ?x, to_hex(a), to_hex(b)]
151+
<<acc::binary, ?\\, ?x, to_hex(a), to_hex(b)>>
148152
end
149153

150-
defp escape_char(char) when char < 0x10000 do
154+
defp escape_char(char, acc) when char < 0x10000 do
151155
<<a::4, b::4, c::4, d::4>> = <<char::16>>
152-
[?\\, ?x, ?{, to_hex(a), to_hex(b), to_hex(c), to_hex(d), ?}]
156+
<<acc::binary, ?\\, ?x, ?{, to_hex(a), to_hex(b), to_hex(c), to_hex(d), ?}>>
153157
end
154158

155-
defp escape_char(char) when char < 0x1000000 do
159+
defp escape_char(char, acc) when char < 0x1000000 do
156160
<<a::4, b::4, c::4, d::4, e::4, f::4>> = <<char::24>>
157-
[?\\, ?x, ?{, to_hex(a), to_hex(b), to_hex(c), to_hex(d), to_hex(e), to_hex(f), ?}]
161+
162+
<<acc::binary, ?\\, ?x, ?{, to_hex(a), to_hex(b), to_hex(c), to_hex(d), to_hex(e), to_hex(f),
163+
?}>>
158164
end
159165

160-
defp escape_map(?\a), do: [?\\, ?a]
161-
defp escape_map(?\b), do: [?\\, ?b]
162-
defp escape_map(?\d), do: [?\\, ?d]
163-
defp escape_map(?\e), do: [?\\, ?e]
164-
defp escape_map(?\f), do: [?\\, ?f]
165-
defp escape_map(?\n), do: [?\\, ?n]
166-
defp escape_map(?\r), do: [?\\, ?r]
167-
defp escape_map(?\t), do: [?\\, ?t]
168-
defp escape_map(?\v), do: [?\\, ?v]
169-
defp escape_map(?\\), do: [?\\, ?\\]
166+
defp escape_map(?\a), do: "\\a"
167+
defp escape_map(?\b), do: "\\b"
168+
defp escape_map(?\d), do: "\\d"
169+
defp escape_map(?\e), do: "\\e"
170+
defp escape_map(?\f), do: "\\f"
171+
defp escape_map(?\n), do: "\\n"
172+
defp escape_map(?\r), do: "\\r"
173+
defp escape_map(?\t), do: "\\t"
174+
defp escape_map(?\v), do: "\\v"
175+
defp escape_map(?\\), do: "\\\\"
170176
defp escape_map(_), do: false
171177

172178
@compile {:inline, to_hex: 1, decrement: 1}

lib/elixir/lib/string.ex

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ defmodule String do
326326
recur_printable?(string, character_limit)
327327
end
328328

329-
defp recur_printable?(_string, 0), do: true
329+
defp recur_printable?(<<_::binary>>, 0), do: true
330330
defp recur_printable?(<<>>, _character_limit), do: true
331331

332332
for char <- 0x20..0x7E do

0 commit comments

Comments
 (0)