From f47e748641ceec9d55b9bcc955f31ada5322725e Mon Sep 17 00:00:00 2001 From: sabiwara Date: Thu, 21 Aug 2025 20:58:41 +0900 Subject: [PATCH 1/8] Use new OTP28.1 :re.import in escaped regex AST https://github.com/erlang/otp/pull/9976 --- lib/elixir/lib/kernel.ex | 18 ++++++++++++++---- lib/elixir/src/elixir_quote.erl | 27 +++++++++++++++++++++++++-- lib/elixir/test/elixir/macro_test.exs | 18 +++++++++++++++++- 3 files changed, 56 insertions(+), 7 deletions(-) diff --git a/lib/elixir/lib/kernel.ex b/lib/elixir/lib/kernel.ex index ed9c05f7046..3ae821ba4bb 100644 --- a/lib/elixir/lib/kernel.ex +++ b/lib/elixir/lib/kernel.ex @@ -6646,16 +6646,26 @@ defmodule Kernel do end defp compile_regex(binary_or_tuple, options) do - # TODO: Remove this when we require Erlang/OTP 28+ - case is_binary(binary_or_tuple) and :erlang.system_info(:otp_release) < [?2, ?8] do + bin_opts = :binary.list_to_bin(options) + + # TODO: Remove this when we require Erlang/OTP 28.1+ + case is_binary(binary_or_tuple) and compile_time_regexes_supported?() do true -> - Macro.escape(Regex.compile!(binary_or_tuple, :binary.list_to_bin(options))) + Macro.escape(Regex.compile!(binary_or_tuple, bin_opts)) false -> - quote(do: Regex.compile!(unquote(binary_or_tuple), unquote(:binary.list_to_bin(options)))) + quote(do: Regex.compile!(unquote(binary_or_tuple), unquote(bin_opts))) end end + defp compile_time_regexes_supported? do + # OTP 28.0 introduced refs in patterns, which can't be used in AST anymore + # OTP 28.1 introduced :re.import/1 which allows us to fix this in Macro.escape + :erlang.system_info(:otp_release) < [?2, ?8] or + (Code.ensure_loaded?(:re) and + function_exported?(:re, :import, 1)) + end + @doc ~S""" Handles the sigil `~D` for dates. diff --git a/lib/elixir/src/elixir_quote.erl b/lib/elixir/src/elixir_quote.erl index 4260985f28b..c1199df636a 100644 --- a/lib/elixir/src/elixir_quote.erl +++ b/lib/elixir/src/elixir_quote.erl @@ -168,9 +168,28 @@ do_escape(BitString, _) when is_bitstring(BitString) -> {'<<>>', [], [{'::', [], [Bits, {size, [], [Size]}]}, {'::', [], [Bytes, {binary, [], nil}]}]} end; +do_escape(#{ + '__struct__' := 'Elixir.Regex', + 're_pattern' := {re_pattern, _, _, _, Ref}, + 'source' := Source, + 'opts' := Opts +} = Map, Q) when is_reference(Ref), is_binary(Source), is_list(Opts) -> + case erlang:function_exported(re, import, 1) of + true -> + {ok, ExportedPattern} = re:compile(Source, [export | Opts]), + PatternAst = {{'.', [], ['re', 'import']}, [], [do_escape(ExportedPattern, Q)]}, + {'%{}', [], [ + {'__struct__', 'Elixir.Regex'}, + {'re_pattern', PatternAst}, + {'source', Source}, + {'opts', do_escape(Opts, Q)} + ]}; + false -> + escape_map(Map, Q) + end; + do_escape(Map, Q) when is_map(Map) -> - TT = [escape_map_key_value(K, V, Map, Q) || {K, V} <- lists:sort(maps:to_list(Map))], - {'%{}', [], TT}; + escape_map(Map, Q); do_escape([], _) -> []; @@ -203,6 +222,10 @@ do_escape(Fun, _) when is_function(Fun) -> do_escape(Other, _) -> bad_escape(Other). +escape_map(Map, Q) -> + TT = [escape_map_key_value(K, V, Map, Q) || {K, V} <- lists:sort(maps:to_list(Map))], + {'%{}', [], TT}. + escape_map_key_value(K, V, Map, Q) -> MaybeRef = if is_reference(V) -> V; diff --git a/lib/elixir/test/elixir/macro_test.exs b/lib/elixir/test/elixir/macro_test.exs index 3c8944defde..61a48fcc4df 100644 --- a/lib/elixir/test/elixir/macro_test.exs +++ b/lib/elixir/test/elixir/macro_test.exs @@ -141,11 +141,27 @@ defmodule MacroTest do assert Macro.escape({:quote, [], [[do: :foo]]}) == {:{}, [], [:quote, [], [[do: :foo]]]} end - test "inspects container when a reference cannot be escaped" do + @tag skip: System.otp_release() < "28" or function_exported?(:re, :import, 1) + test "escape container when a reference cannot be escaped" do assert_raise ArgumentError, ~r"~r/foo/ contains a reference", fn -> Macro.escape(%{~r/foo/ | re_pattern: {:re_pattern, 0, 0, 0, make_ref()}}) end end + + @tag skip: not function_exported?(:re, :import, 1) + test "escape regex will remove references and replace it by a call to :re.import/1" do + assert { + :%{}, + [], + [ + __struct__: Regex, + re_pattern: + {{:., [], [:re, :import]}, [], [{:{}, [], [:re_exported_pattern | _]}]}, + source: "foo", + opts: [] + ] + } = Macro.escape(%{~r/foo/ | re_pattern: {:re_pattern, 0, 0, 0, make_ref()}}) + end end describe "expand_once/2" do From bf77434d4de27ccb8cc262e7803b67ce7cb39840 Mon Sep 17 00:00:00 2001 From: sabiwara Date: Sat, 30 Aug 2025 09:35:22 +0900 Subject: [PATCH 2/8] Without cheating: defining __escape__/1 --- lib/elixir/lib/kernel.ex | 11 +------ lib/elixir/lib/regex.ex | 41 +++++++++++++++++++++++++++ lib/elixir/src/elixir_quote.erl | 41 ++++++++++----------------- lib/elixir/test/elixir/macro_test.exs | 7 ++--- 4 files changed, 60 insertions(+), 40 deletions(-) diff --git a/lib/elixir/lib/kernel.ex b/lib/elixir/lib/kernel.ex index 3ae821ba4bb..c78e730e9c2 100644 --- a/lib/elixir/lib/kernel.ex +++ b/lib/elixir/lib/kernel.ex @@ -6648,8 +6648,7 @@ defmodule Kernel do defp compile_regex(binary_or_tuple, options) do bin_opts = :binary.list_to_bin(options) - # TODO: Remove this when we require Erlang/OTP 28.1+ - case is_binary(binary_or_tuple) and compile_time_regexes_supported?() do + case is_binary(binary_or_tuple) do true -> Macro.escape(Regex.compile!(binary_or_tuple, bin_opts)) @@ -6658,14 +6657,6 @@ defmodule Kernel do end end - defp compile_time_regexes_supported? do - # OTP 28.0 introduced refs in patterns, which can't be used in AST anymore - # OTP 28.1 introduced :re.import/1 which allows us to fix this in Macro.escape - :erlang.system_info(:otp_release) < [?2, ?8] or - (Code.ensure_loaded?(:re) and - function_exported?(:re, :import, 1)) - end - @doc ~S""" Handles the sigil `~D` for dates. diff --git a/lib/elixir/lib/regex.ex b/lib/elixir/lib/regex.ex index 8ca5a226d34..d07f4479c25 100644 --- a/lib/elixir/lib/regex.ex +++ b/lib/elixir/lib/regex.ex @@ -1000,4 +1000,45 @@ defmodule Regex do defp translate_options(<<>>, acc), do: acc defp translate_options(t, _acc), do: {:error, t} + + @doc false + def __escape__(%{__struct__: Regex} = regex) do + # OTP 28.0 introduced refs in patterns, which can't be used in AST anymore + # OTP 28.1 introduced :re.import/1 which allows us to work with pre-compiled binaries again + + pattern_ast = + cond do + # TODO: Remove this when we require Erlang/OTP 28+ + # Before OTP 28.0, patterns did not contain any refs and could be safely be escaped + :erlang.system_info(:otp_release) < [?2, ?8] -> + Macro.escape(regex.re_pattern) + + # OTP 28.1+ introduced the ability to export and import regexes from compiled binaries + Code.ensure_loaded?(:re) and function_exported?(:re, :import, 1) -> + {:ok, exported} = :re.compile(regex.source, [:export] ++ regex.opts) + + quote do + :re.import(unquote(Macro.escape(exported))) + end + + # TODO: Remove this when we require Erlang/OTP 28.1+ + # OTP 28.0 works in degraded mode performance-wise, we need to recompile from the source + true -> + quote do + {:ok, pattern} = + :re.compile(unquote(Macro.escape(regex.source)), unquote(Macro.escape(regex.opts))) + + pattern + end + end + + quote do + %{ + __struct__: unquote(Regex), + re_pattern: unquote(pattern_ast), + source: unquote(Macro.escape(regex.source)), + opts: unquote(Macro.escape(regex.opts)) + } + end + end end diff --git a/lib/elixir/src/elixir_quote.erl b/lib/elixir/src/elixir_quote.erl index c1199df636a..4c4cf0dacf4 100644 --- a/lib/elixir/src/elixir_quote.erl +++ b/lib/elixir/src/elixir_quote.erl @@ -3,6 +3,9 @@ %% SPDX-FileCopyrightText: 2012 Plataformatec -module(elixir_quote). + +-feature(maybe_expr, enable). + -export([escape/3, linify/3, linify_with_context_counter/3, build/7, quote/2, has_unquotes/1, fun_to_quoted/1]). -export([dot/5, tail_list/3, list/2, validate_runtime/2, shallow_validate_ast/1]). %% Quote callbacks @@ -168,28 +171,18 @@ do_escape(BitString, _) when is_bitstring(BitString) -> {'<<>>', [], [{'::', [], [Bits, {size, [], [Size]}]}, {'::', [], [Bytes, {binary, [], nil}]}]} end; -do_escape(#{ - '__struct__' := 'Elixir.Regex', - 're_pattern' := {re_pattern, _, _, _, Ref}, - 'source' := Source, - 'opts' := Opts -} = Map, Q) when is_reference(Ref), is_binary(Source), is_list(Opts) -> - case erlang:function_exported(re, import, 1) of - true -> - {ok, ExportedPattern} = re:compile(Source, [export | Opts]), - PatternAst = {{'.', [], ['re', 'import']}, [], [do_escape(ExportedPattern, Q)]}, - {'%{}', [], [ - {'__struct__', 'Elixir.Regex'}, - {'re_pattern', PatternAst}, - {'source', Source}, - {'opts', do_escape(Opts, Q)} - ]}; - false -> - escape_map(Map, Q) - end; - do_escape(Map, Q) when is_map(Map) -> - escape_map(Map, Q); + maybe + #{'__struct__' := Module} ?= Map, + true ?= is_atom(Module), + {module, Module} ?= code:ensure_loaded(Module), + true ?= erlang:function_exported(Module, '__escape__', 1), + Module:'__escape__'(Map) + else + _ -> + TT = [escape_map_key_value(K, V, Map, Q) || {K, V} <- lists:sort(maps:to_list(Map))], + {'%{}', [], TT} + end; do_escape([], _) -> []; @@ -222,10 +215,6 @@ do_escape(Fun, _) when is_function(Fun) -> do_escape(Other, _) -> bad_escape(Other). -escape_map(Map, Q) -> - TT = [escape_map_key_value(K, V, Map, Q) || {K, V} <- lists:sort(maps:to_list(Map))], - {'%{}', [], TT}. - escape_map_key_value(K, V, Map, Q) -> MaybeRef = if is_reference(V) -> V; @@ -239,7 +228,7 @@ escape_map_key_value(K, V, Map, Q) -> "(it must be defined within a function instead). ", (bad_escape_hint())/binary>>); true -> {do_quote(K, Q), do_quote(V, Q)} - end. + end. find_tuple_ref(Tuple, Index) when Index > tuple_size(Tuple) -> nil; find_tuple_ref(Tuple, Index) -> diff --git a/lib/elixir/test/elixir/macro_test.exs b/lib/elixir/test/elixir/macro_test.exs index 61a48fcc4df..6036e379864 100644 --- a/lib/elixir/test/elixir/macro_test.exs +++ b/lib/elixir/test/elixir/macro_test.exs @@ -141,10 +141,9 @@ defmodule MacroTest do assert Macro.escape({:quote, [], [[do: :foo]]}) == {:{}, [], [:quote, [], [[do: :foo]]]} end - @tag skip: System.otp_release() < "28" or function_exported?(:re, :import, 1) test "escape container when a reference cannot be escaped" do - assert_raise ArgumentError, ~r"~r/foo/ contains a reference", fn -> - Macro.escape(%{~r/foo/ | re_pattern: {:re_pattern, 0, 0, 0, make_ref()}}) + assert_raise ArgumentError, ~r"contains a reference", fn -> + Macro.escape(%{re_pattern: {:re_pattern, 0, 0, 0, make_ref()}}) end end @@ -160,7 +159,7 @@ defmodule MacroTest do source: "foo", opts: [] ] - } = Macro.escape(%{~r/foo/ | re_pattern: {:re_pattern, 0, 0, 0, make_ref()}}) + } = Macro.escape(~r/foo/) end end From 602096f74a8f47a9112e3c8570f1eb7a1bbb1ec7 Mon Sep 17 00:00:00 2001 From: sabiwara Date: Sat, 30 Aug 2025 11:33:38 +0900 Subject: [PATCH 3/8] Remove special escape clause for module attribute --- lib/elixir/lib/kernel.ex | 7 ------- 1 file changed, 7 deletions(-) diff --git a/lib/elixir/lib/kernel.ex b/lib/elixir/lib/kernel.ex index c78e730e9c2..dd3a4d8320f 100644 --- a/lib/elixir/lib/kernel.ex +++ b/lib/elixir/lib/kernel.ex @@ -3813,13 +3813,6 @@ defmodule Kernel do {_, doc} when doc_attr? -> do_at_escape(name, doc) - %{__struct__: Regex, source: source, opts: opts} = regex -> - # TODO: Automatically deal with exported regexes - case :erlang.system_info(:otp_release) < [?2, ?8] do - true -> do_at_escape(name, regex) - false -> quote(do: Regex.compile!(unquote(source), unquote(opts))) - end - value -> do_at_escape(name, value) end From 6ef94c5658bb83ee6692660938c2208e7482f879 Mon Sep 17 00:00:00 2001 From: sabiwara Date: Sat, 30 Aug 2025 15:36:18 +0900 Subject: [PATCH 4/8] Move exclude tag to test helper --- lib/elixir/test/elixir/macro_test.exs | 2 +- lib/elixir/test/elixir/test_helper.exs | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/lib/elixir/test/elixir/macro_test.exs b/lib/elixir/test/elixir/macro_test.exs index 6036e379864..8f1735190db 100644 --- a/lib/elixir/test/elixir/macro_test.exs +++ b/lib/elixir/test/elixir/macro_test.exs @@ -147,7 +147,7 @@ defmodule MacroTest do end end - @tag skip: not function_exported?(:re, :import, 1) + @tag :re_import test "escape regex will remove references and replace it by a call to :re.import/1" do assert { :%{}, diff --git a/lib/elixir/test/elixir/test_helper.exs b/lib/elixir/test/elixir/test_helper.exs index 16df79ae49f..2ce3cd85236 100644 --- a/lib/elixir/test/elixir/test_helper.exs +++ b/lib/elixir/test/elixir/test_helper.exs @@ -132,11 +132,20 @@ cover_exclude = [] end +# OTP 28.1+ +re_import_exclude = + if Code.ensure_loaded?(:re) and function_exported?(:re, :import, 1) do + [] + else + [:re_import] + end + ExUnit.start( trace: !!System.get_env("TRACE"), exclude: epmd_exclude ++ - os_exclude ++ line_exclude ++ distributed_exclude ++ source_exclude ++ cover_exclude, + os_exclude ++ + line_exclude ++ distributed_exclude ++ source_exclude ++ cover_exclude ++ re_import_exclude, include: line_include, assert_receive_timeout: String.to_integer(System.get_env("ELIXIR_ASSERT_TIMEOUT", "300")) ) From c3be3297de05695159fd978d11eb386ed316e92f Mon Sep 17 00:00:00 2001 From: sabiwara Date: Sat, 30 Aug 2025 17:40:35 +0900 Subject: [PATCH 5/8] Document __escape__/1 in Macro.escape/2 docs --- lib/elixir/lib/macro.ex | 50 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/lib/elixir/lib/macro.ex b/lib/elixir/lib/macro.ex index 0fca503b690..67bbcebfb06 100644 --- a/lib/elixir/lib/macro.ex +++ b/lib/elixir/lib/macro.ex @@ -843,6 +843,56 @@ defmodule Macro do `escape/2` is used to escape *values* (either directly passed or variable bound), while `quote/2` produces syntax trees for expressions. + + ## Customizing struct escapes + + By default, structs are escaped to generate the AST for their internal representation. + + This approach does not work if the internal representation contains references (like + `Regex` structs), because references can't be escaped. + This is a common issue when working with NIFs. + + Let's imagine we have the following struct: + + defmodule WrapperStruct do + defstruct [:ref] + + def new(...), do: %WrapperStruct{ref: ...} + + # efficiently dump to / load from binaries + def dump_to_binary(%WrapperStruct{ref: ref}), do: ... + def load_from_binary(binary), do: %WrapperStruct{ref: ...} + end + + Such a struct could not be used in module attributes or escaped with `Macro.escape/2`: + + defmodule Foo do + @my_struct WrapperStruct.new(...) + def my_struct, do: @my_struct + end + + ** (ArgumentError) cannot inject attribute @my_struct into function/macro because cannot escape #Reference<...> + + To address this, structs can re-define how they should be escaped by defining a custom + `__escape__/1` function which returns the AST. In our example: + + defmodule WrapperStruct do + # ... + + def __escape__(struct) do + # dump to a binary representation at compile-time + binary = dump_to_binary(struct) + quote do + # load from the binary representation at runtime + WrapperStruct.load_from_binary(unquote(Macro.escape(binary))) + end + end + end + + Now, our example above will be expanded as: + + def my_struct, do: WrapperStruct.load_from_binary(<<...>>) + """ @spec escape(term, escape_opts) :: t() def escape(expr, opts \\ []) do From f490780b3bf037ad54344ea9d3b8cc51d8bfb051 Mon Sep 17 00:00:00 2001 From: Jean Klingler Date: Sat, 30 Aug 2025 17:58:46 +0900 Subject: [PATCH 6/8] Rephrase documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: José Valim --- lib/elixir/lib/macro.ex | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/lib/elixir/lib/macro.ex b/lib/elixir/lib/macro.ex index 67bbcebfb06..303a8e732ab 100644 --- a/lib/elixir/lib/macro.ex +++ b/lib/elixir/lib/macro.ex @@ -844,13 +844,21 @@ defmodule Macro do bound), while `quote/2` produces syntax trees for expressions. - ## Customizing struct escapes + ## Dealing with references and other runtime values - By default, structs are escaped to generate the AST for their internal representation. + Macros work at compile-time and therefore `Macro.escape/1` can only escape + values that are valid during compilation, such as numbers, atoms, tuples, binaries, + etc. - This approach does not work if the internal representation contains references (like - `Regex` structs), because references can't be escaped. - This is a common issue when working with NIFs. + However, you may have values at compile-time which cannot be escaped, such as + `reference`s and `pid`s, since the process or memory address they point to will + no longer exist once compilation completes. Attempting to escape said values will + load to errors. This is a common issue when working with NIFs. + + Luckily, Elixir v1.19 introduces a mechanism that allow those values to be escaped, + as long as they are encapsulated by a struct within a module that defines the + `__escape__/1` function. This is possible as long as the reference has a natural + text or binary representation that can be serialized during compilation. Let's imagine we have the following struct: @@ -893,6 +901,8 @@ defmodule Macro do def my_struct, do: WrapperStruct.load_from_binary(<<...>>) + When implementing `__escape__/1`, you must ensure that the quoted expression + will evaluate to a struct that represents the one given as argument. """ @spec escape(term, escape_opts) :: t() def escape(expr, opts \\ []) do From 5814e033c50781805066a2fd3bcbb9bbb45d7710 Mon Sep 17 00:00:00 2001 From: sabiwara Date: Sat, 30 Aug 2025 18:05:30 +0900 Subject: [PATCH 7/8] Minor doc tweaks --- lib/elixir/lib/macro.ex | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/elixir/lib/macro.ex b/lib/elixir/lib/macro.ex index 303a8e732ab..93d90b369e9 100644 --- a/lib/elixir/lib/macro.ex +++ b/lib/elixir/lib/macro.ex @@ -846,16 +846,16 @@ defmodule Macro do ## Dealing with references and other runtime values - Macros work at compile-time and therefore `Macro.escape/1` can only escape - values that are valid during compilation, such as numbers, atoms, tuples, binaries, + Macros work at compile-time and therefore `Macro.escape/1` can only escape values + that are valid during compilation, such as numbers, atoms, tuples, maps, binaries, etc. However, you may have values at compile-time which cannot be escaped, such as `reference`s and `pid`s, since the process or memory address they point to will no longer exist once compilation completes. Attempting to escape said values will - load to errors. This is a common issue when working with NIFs. + raise. This is a common issue when working with NIFs. - Luckily, Elixir v1.19 introduces a mechanism that allow those values to be escaped, + Luckily, Elixir v1.19 introduces a mechanism that allows those values to be escaped, as long as they are encapsulated by a struct within a module that defines the `__escape__/1` function. This is possible as long as the reference has a natural text or binary representation that can be serialized during compilation. From 8b0e498d78ecc8ebd6bbb107f722e5c088a7075e Mon Sep 17 00:00:00 2001 From: Jean Klingler Date: Sat, 30 Aug 2025 18:11:01 +0900 Subject: [PATCH 8/8] Minor doc tweak --- lib/elixir/lib/macro.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/elixir/lib/macro.ex b/lib/elixir/lib/macro.ex index 93d90b369e9..1e242c21f4f 100644 --- a/lib/elixir/lib/macro.ex +++ b/lib/elixir/lib/macro.ex @@ -853,7 +853,7 @@ defmodule Macro do However, you may have values at compile-time which cannot be escaped, such as `reference`s and `pid`s, since the process or memory address they point to will no longer exist once compilation completes. Attempting to escape said values will - raise. This is a common issue when working with NIFs. + raise an exception. This is a common issue when working with NIFs. Luckily, Elixir v1.19 introduces a mechanism that allows those values to be escaped, as long as they are encapsulated by a struct within a module that defines the