Skip to content

Commit 4d66ac7

Browse files
committed
Dedicated inspect implementation for regexes. Fix #2250
We no longer share the implementation with BitString because regex literals have slightly different escaping rules
1 parent fb49aa1 commit 4d66ac7

File tree

3 files changed

+105
-5
lines changed

3 files changed

+105
-5
lines changed

lib/elixir/lib/inspect.ex

Lines changed: 75 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -202,19 +202,42 @@ defimpl Inspect, for: BitString do
202202
else
203203
<< byte :: size(8), h :: binary >> = head
204204
t = << h :: binary, t :: binary >>
205-
escape(t, char, << binary :: binary, octify(byte) :: binary >>)
205+
escape(t, char, << binary :: binary, escape_char(byte) :: binary >>)
206206
end
207207
end
208208
defp escape(<<h, t :: binary>>, char, binary) do
209-
escape(t, char, << binary :: binary, octify(h) :: binary >>)
209+
escape(t, char, << binary :: binary, escape_char(h) :: binary >>)
210210
end
211211
defp escape(<<>>, _char, binary), do: binary
212212

213+
214+
@doc false
215+
# also used by Regex
216+
def escape_char(char) when char in ?\000..?\377,
217+
do: octify(char)
218+
219+
def escape_char(char), do: hexify(char)
220+
213221
defp octify(byte) do
214222
<< hi :: size(2), mi :: size(3), lo :: size(3) >> = << byte >>
215223
<< ?\\, ?0 + hi, ?0 + mi, ?0 + lo >>
216224
end
217225

226+
defp hexify(char) when char < 0x10000 do
227+
<<a::4, b::4, c::4, d::4>> = <<char::size(16)>>
228+
<<?\\, ?x, ?{, to_hex(a), to_hex(b), to_hex(c), to_hex(d), ?}>>
229+
end
230+
231+
defp hexify(char) when char < 0x1000000 do
232+
<<a::4, b::4, c::4, d::4, e::4, f::4>> = <<char::size(24)>>
233+
<<?\\, ?x, ?{, to_hex(a), to_hex(b), to_hex(c),
234+
to_hex(d), to_hex(e), to_hex(f), ?}>>
235+
end
236+
237+
defp to_hex(c) when c in 0..9, do: ?0+c
238+
defp to_hex(c) when c in 10..15, do: ?a+c-10
239+
240+
218241
defp append(<<h, t :: binary>>, binary), do: append(t, << binary :: binary, h >>)
219242
defp append(<<>>, binary), do: binary
220243

@@ -419,9 +442,57 @@ defimpl Inspect, for: Float do
419442
end
420443

421444
defimpl Inspect, for: Regex do
422-
def inspect(regex, opts) do
423-
concat ["~r", to_doc(regex.source, opts), regex.opts]
445+
def inspect(regex, _opts) do
446+
delim =?/
447+
concat ["~r",
448+
<<delim, escape(regex.source, delim)::binary, delim>>,
449+
regex.opts]
424450
end
451+
452+
453+
defp escape(bin, term),
454+
do: escape(bin, <<>>, term)
455+
456+
defp escape(<<?\\, term>> <> rest, buf, term),
457+
do: escape(rest, buf <> <<?\\, term>>, term)
458+
459+
defp escape(<<term>> <> rest, buf, term),
460+
do: escape(rest, buf <> <<?\\, term>>, term)
461+
462+
# the list of characters is from `String.printable?` impl
463+
# minus characters treated specially by regex: \s, \d, \b, \e
464+
465+
defp escape(<<?\n>> <> rest, buf, term),
466+
do: escape(rest, <<buf::binary, ?\\, ?n>>, term)
467+
468+
defp escape(<<?\r>> <> rest, buf, term),
469+
do: escape(rest, <<buf::binary, ?\\, ?r>>, term)
470+
471+
defp escape(<<?\t>> <> rest, buf, term),
472+
do: escape(rest, <<buf::binary, ?\\, ?t>>, term)
473+
474+
defp escape(<<?\v>> <> rest, buf, term),
475+
do: escape(rest, <<buf::binary, ?\\, ?v>>, term)
476+
477+
defp escape(<<?\f>> <> rest, buf, term),
478+
do: escape(rest, <<buf::binary, ?\\, ?f>>, term)
479+
480+
defp escape(<<?\a>> <> rest, buf, term),
481+
do: escape(rest, <<buf::binary, ?\\, ?a>>, term)
482+
483+
defp escape(<<c::utf8>> <> rest, buf, term) do
484+
charstr = <<c::utf8>>
485+
if String.printable?(charstr) and not c in [?\d, ?\b, ?\e] do
486+
escape(rest, buf <> charstr, term)
487+
else
488+
escape(rest, buf <> Inspect.BitString.escape_char(c), term)
489+
end
490+
end
491+
492+
defp escape(<<c>> <> rest, buf, term),
493+
do: escape(rest, <<buf::binary, Inspect.BitString.escape_char(c)>>, term)
494+
495+
defp escape(<<>>, buf, _), do: buf
425496
end
426497

427498
defimpl Inspect, for: Function do

lib/elixir/test/elixir/inspect_test.exs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,8 @@ defmodule Inspect.OthersTest do
335335
end
336336

337337
test :regex do
338-
"~r\"foo\"m" = inspect(~r(foo)m)
338+
"~r/foo/m" = inspect(~r(foo)m)
339+
"~r/\\a\\010\\177\\033\\f\\n\\r \\t\\v\\//" = inspect(Regex.compile!("\a\b\d\e\f\n\r\s\t\v/"))
340+
"~r/\\a\\b\\d\\e\\f\\n\\r\\s\\t\\v\\//" = inspect(~r<\a\b\d\e\f\n\r\s\t\v/>)
339341
end
340342
end

lib/elixir/test/elixir/regex_test.exs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,34 @@ defmodule RegexTest do
4242
end
4343

4444
test :source do
45+
src = "foo"
46+
assert Regex.source(Regex.compile!(src)) == src
47+
assert Regex.source(~r/#{src}/) == src
48+
49+
src = "\a\b\d\e\f\n\r\s\t\v"
50+
assert Regex.source(Regex.compile!(src)) == src
51+
assert Regex.source(~r/#{src}/) == src
52+
53+
src = "\a\\b\\d\\e\f\n\r\\s\t\v"
54+
assert Regex.source(Regex.compile!(src)) == src
55+
assert Regex.source(~r/#{src}/) == src
56+
end
57+
58+
test :literal_source do
4559
assert Regex.source(Regex.compile!("foo")) == "foo"
60+
assert Regex.source(~r"foo") == "foo"
61+
assert Regex.re_pattern(Regex.compile!("foo"))
62+
== Regex.re_pattern(~r"foo")
63+
64+
assert Regex.source(Regex.compile!("\a\b\d\e\f\n\r\s\t\v")) == "\a\b\d\e\f\n\r\s\t\v"
65+
assert Regex.source(~r<\a\b\d\e\f\n\r\s\t\v>) == "\a\\b\\d\\e\f\n\r\\s\t\v"
66+
assert Regex.re_pattern(Regex.compile!("\a\b\d\e\f\n\r\s\t\v"))
67+
== Regex.re_pattern(~r"\a\010\177\033\f\n\r \t\v")
68+
69+
assert Regex.source(Regex.compile!("\a\\b\\d\e\f\n\r\\s\t\v")) == "\a\\b\\d\e\f\n\r\\s\t\v"
70+
assert Regex.source(~r<\a\\b\\d\\e\f\n\r\\s\t\v>) == "\a\\\\b\\\\d\\\\e\f\n\r\\\\s\t\v"
71+
assert Regex.re_pattern(Regex.compile!("\a\\b\\d\e\f\n\r\\s\t\v"))
72+
== Regex.re_pattern(~r"\a\b\d\e\f\n\r\s\t\v")
4673
end
4774

4875
test :opts do

0 commit comments

Comments
 (0)