Skip to content

Commit ff21a9d

Browse files
committed
Add __escape__/1 and use it to fix Regex escaping in OTP28.1+ (#14720)
Leverages newly added :re.import/1. erlang/otp#9976
1 parent 1778bf2 commit ff21a9d

File tree

6 files changed

+149
-18
lines changed

6 files changed

+149
-18
lines changed

lib/elixir/lib/kernel.ex

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3814,13 +3814,6 @@ defmodule Kernel do
38143814
{_, doc} when doc_attr? ->
38153815
do_at_escape(name, doc)
38163816

3817-
%{__struct__: Regex, source: source, opts: opts} = regex ->
3818-
# TODO: Automatically deal with exported regexes
3819-
case :erlang.system_info(:otp_release) < [?2, ?8] do
3820-
true -> do_at_escape(name, regex)
3821-
false -> quote(do: Regex.compile!(unquote(source), unquote(opts)))
3822-
end
3823-
38243817
value ->
38253818
do_at_escape(name, value)
38263819
end
@@ -6647,13 +6640,14 @@ defmodule Kernel do
66476640
end
66486641

66496642
defp compile_regex(binary_or_tuple, options) do
6650-
# TODO: Remove this when we require Erlang/OTP 28+
6651-
case is_binary(binary_or_tuple) and :erlang.system_info(:otp_release) < [?2, ?8] do
6643+
bin_opts = :binary.list_to_bin(options)
6644+
6645+
case is_binary(binary_or_tuple) do
66526646
true ->
6653-
Macro.escape(Regex.compile!(binary_or_tuple, :binary.list_to_bin(options)))
6647+
Macro.escape(Regex.compile!(binary_or_tuple, bin_opts))
66546648

66556649
false ->
6656-
quote(do: Regex.compile!(unquote(binary_or_tuple), unquote(:binary.list_to_bin(options))))
6650+
quote(do: Regex.compile!(unquote(binary_or_tuple), unquote(bin_opts)))
66576651
end
66586652
end
66596653

lib/elixir/lib/macro.ex

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -843,6 +843,66 @@ defmodule Macro do
843843
`escape/2` is used to escape *values* (either directly passed or variable
844844
bound), while `quote/2` produces syntax trees for
845845
expressions.
846+
847+
## Dealing with references and other runtime values
848+
849+
Macros work at compile-time and therefore `Macro.escape/1` can only escape values
850+
that are valid during compilation, such as numbers, atoms, tuples, maps, binaries,
851+
etc.
852+
853+
However, you may have values at compile-time which cannot be escaped, such as
854+
`reference`s and `pid`s, since the process or memory address they point to will
855+
no longer exist once compilation completes. Attempting to escape said values will
856+
raise an exception. This is a common issue when working with NIFs.
857+
858+
Luckily, Elixir v1.19 introduces a mechanism that allows those values to be escaped,
859+
as long as they are encapsulated by a struct within a module that defines the
860+
`__escape__/1` function. This is possible as long as the reference has a natural
861+
text or binary representation that can be serialized during compilation.
862+
863+
Let's imagine we have the following struct:
864+
865+
defmodule WrapperStruct do
866+
defstruct [:ref]
867+
868+
def new(...), do: %WrapperStruct{ref: ...}
869+
870+
# efficiently dump to / load from binaries
871+
def dump_to_binary(%WrapperStruct{ref: ref}), do: ...
872+
def load_from_binary(binary), do: %WrapperStruct{ref: ...}
873+
end
874+
875+
Such a struct could not be used in module attributes or escaped with `Macro.escape/2`:
876+
877+
defmodule Foo do
878+
@my_struct WrapperStruct.new(...)
879+
def my_struct, do: @my_struct
880+
end
881+
882+
** (ArgumentError) cannot inject attribute @my_struct into function/macro because cannot escape #Reference<...>
883+
884+
To address this, structs can re-define how they should be escaped by defining a custom
885+
`__escape__/1` function which returns the AST. In our example:
886+
887+
defmodule WrapperStruct do
888+
# ...
889+
890+
def __escape__(struct) do
891+
# dump to a binary representation at compile-time
892+
binary = dump_to_binary(struct)
893+
quote do
894+
# load from the binary representation at runtime
895+
WrapperStruct.load_from_binary(unquote(Macro.escape(binary)))
896+
end
897+
end
898+
end
899+
900+
Now, our example above will be expanded as:
901+
902+
def my_struct, do: WrapperStruct.load_from_binary(<<...>>)
903+
904+
When implementing `__escape__/1`, you must ensure that the quoted expression
905+
will evaluate to a struct that represents the one given as argument.
846906
"""
847907
@spec escape(term, escape_opts) :: t()
848908
def escape(expr, opts \\ []) do

lib/elixir/lib/regex.ex

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1000,4 +1000,45 @@ defmodule Regex do
10001000

10011001
defp translate_options(<<>>, acc), do: acc
10021002
defp translate_options(t, _acc), do: {:error, t}
1003+
1004+
@doc false
1005+
def __escape__(%{__struct__: Regex} = regex) do
1006+
# OTP 28.0 introduced refs in patterns, which can't be used in AST anymore
1007+
# OTP 28.1 introduced :re.import/1 which allows us to work with pre-compiled binaries again
1008+
1009+
pattern_ast =
1010+
cond do
1011+
# TODO: Remove this when we require Erlang/OTP 28+
1012+
# Before OTP 28.0, patterns did not contain any refs and could be safely be escaped
1013+
:erlang.system_info(:otp_release) < [?2, ?8] ->
1014+
Macro.escape(regex.re_pattern)
1015+
1016+
# OTP 28.1+ introduced the ability to export and import regexes from compiled binaries
1017+
Code.ensure_loaded?(:re) and function_exported?(:re, :import, 1) ->
1018+
{:ok, exported} = :re.compile(regex.source, [:export] ++ regex.opts)
1019+
1020+
quote do
1021+
:re.import(unquote(Macro.escape(exported)))
1022+
end
1023+
1024+
# TODO: Remove this when we require Erlang/OTP 28.1+
1025+
# OTP 28.0 works in degraded mode performance-wise, we need to recompile from the source
1026+
true ->
1027+
quote do
1028+
{:ok, pattern} =
1029+
:re.compile(unquote(Macro.escape(regex.source)), unquote(Macro.escape(regex.opts)))
1030+
1031+
pattern
1032+
end
1033+
end
1034+
1035+
quote do
1036+
%{
1037+
__struct__: unquote(Regex),
1038+
re_pattern: unquote(pattern_ast),
1039+
source: unquote(Macro.escape(regex.source)),
1040+
opts: unquote(Macro.escape(regex.opts))
1041+
}
1042+
end
1043+
end
10031044
end

lib/elixir/src/elixir_quote.erl

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
%% SPDX-FileCopyrightText: 2012 Plataformatec
44

55
-module(elixir_quote).
6+
7+
-feature(maybe_expr, enable).
8+
69
-export([escape/3, linify/3, linify_with_context_counter/3, build/7, quote/2, has_unquotes/1, fun_to_quoted/1]).
710
-export([dot/5, tail_list/3, list/2, validate_runtime/2, shallow_validate_ast/1]). %% Quote callbacks
811

@@ -164,8 +167,17 @@ do_escape(BitString, _) when is_bitstring(BitString) ->
164167
end;
165168

166169
do_escape(Map, Q) when is_map(Map) ->
167-
TT = [escape_map_key_value(K, V, Map, Q) || {K, V} <- lists:sort(maps:to_list(Map))],
168-
{'%{}', [], TT};
170+
maybe
171+
#{'__struct__' := Module} ?= Map,
172+
true ?= is_atom(Module),
173+
{module, Module} ?= code:ensure_loaded(Module),
174+
true ?= erlang:function_exported(Module, '__escape__', 1),
175+
Module:'__escape__'(Map)
176+
else
177+
_ ->
178+
TT = [escape_map_key_value(K, V, Map, Q) || {K, V} <- lists:sort(maps:to_list(Map))],
179+
{'%{}', [], TT}
180+
end;
169181

170182
do_escape([], _) ->
171183
[];
@@ -211,7 +223,7 @@ escape_map_key_value(K, V, Map, Q) ->
211223
"(it must be defined within a function instead). ", (bad_escape_hint())/binary>>);
212224
true ->
213225
{do_quote(K, Q), do_quote(V, Q)}
214-
end.
226+
end.
215227

216228
find_tuple_ref(Tuple, Index) when Index > tuple_size(Tuple) -> nil;
217229
find_tuple_ref(Tuple, Index) ->

lib/elixir/test/elixir/macro_test.exs

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -141,11 +141,26 @@ defmodule MacroTest do
141141
assert Macro.escape({:quote, [], [[do: :foo]]}) == {:{}, [], [:quote, [], [[do: :foo]]]}
142142
end
143143

144-
test "inspects container when a reference cannot be escaped" do
145-
assert_raise ArgumentError, ~r"~r/foo/ contains a reference", fn ->
146-
Macro.escape(%{~r/foo/ | re_pattern: {:re_pattern, 0, 0, 0, make_ref()}})
144+
test "escape container when a reference cannot be escaped" do
145+
assert_raise ArgumentError, ~r"contains a reference", fn ->
146+
Macro.escape(%{re_pattern: {:re_pattern, 0, 0, 0, make_ref()}})
147147
end
148148
end
149+
150+
@tag :re_import
151+
test "escape regex will remove references and replace it by a call to :re.import/1" do
152+
assert {
153+
:%{},
154+
[],
155+
[
156+
__struct__: Regex,
157+
re_pattern:
158+
{{:., [], [:re, :import]}, [], [{:{}, [], [:re_exported_pattern | _]}]},
159+
source: "foo",
160+
opts: []
161+
]
162+
} = Macro.escape(~r/foo/)
163+
end
149164
end
150165

151166
describe "expand_once/2" do

lib/elixir/test/elixir/test_helper.exs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,11 +132,20 @@ cover_exclude =
132132
[]
133133
end
134134

135+
# OTP 28.1+
136+
re_import_exclude =
137+
if Code.ensure_loaded?(:re) and function_exported?(:re, :import, 1) do
138+
[]
139+
else
140+
[:re_import]
141+
end
142+
135143
ExUnit.start(
136144
trace: !!System.get_env("TRACE"),
137145
exclude:
138146
epmd_exclude ++
139-
os_exclude ++ line_exclude ++ distributed_exclude ++ source_exclude ++ cover_exclude,
147+
os_exclude ++
148+
line_exclude ++ distributed_exclude ++ source_exclude ++ cover_exclude ++ re_import_exclude,
140149
include: line_include,
141150
assert_receive_timeout: String.to_integer(System.get_env("ELIXIR_ASSERT_TIMEOUT", "300"))
142151
)

0 commit comments

Comments
 (0)