diff --git a/lib/elixir/lib/regex.ex b/lib/elixir/lib/regex.ex index 6e478d3d05c..429247a311f 100644 --- a/lib/elixir/lib/regex.ex +++ b/lib/elixir/lib/regex.ex @@ -412,6 +412,82 @@ defmodule Regex do opts end + @doc """ + Returns the pattern as an embeddable string. + + If the pattern was compiled with an option which cannot be represented + as an embeddable modifier in the current version of PCRE and strict is true + (the default) then an ArgumentError exception will be raised. + + When the `:strict` option is false the pattern will be returned as though + any offending options had not be used and the function will not raise any + exceptions. + + Embeddable modifiers/options are currently: + + * 'i' - `:caseless` + * 'm' - `:multiline` + * 's' - `:dotall, {:newline, :anycrlf}` + * 'x' - `:extended` + + Unembeddable modifiers are: + + * 'f' - `:firstline` + * 'U' - `:ungreedy` + * 'u' - `:unicode, :ucp` + + Any other regex compilation option not listed here is considered unembeddable + and will raise an exception unless the `:strict` option is false. + + ## Examples + iex> Regex.to_embed(~r/foo/) + "(?-imsx:foo)" + + iex> Regex.to_embed(~r/^foo/m) + "(?m-isx:^foo)" + + iex> Regex.to_embed(~r/foo # comment/ix) + "(?ix-ms:foo # comment\\n)" + + iex> Regex.to_embed(~r/foo/iu) + ** (ArgumentError) regex compiled with options [:ucp, :unicode] which cannot be represented as an embedded pattern in this version of PCRE + + iex> Regex.to_embed(~r/foo/imsxu, strict: false) + "(?imsx:foo\\n)" + + """ + @doc since: "1.19.0" + @spec to_embed(t, strict: boolean()) :: String.t() + def to_embed(%Regex{source: source, opts: regex_opts}, embed_opts \\ []) do + strict = Keyword.get(embed_opts, :strict, true) + + modifiers = + case embeddable_modifiers(regex_opts) do + {:ok, modifiers} -> + modifiers + + {:error, modifiers, untranslatable} -> + if strict do + raise ArgumentError, + "regex compiled with options #{inspect(untranslatable)} which cannot be " <> + "represented as an embedded pattern in this version of PCRE" + else + modifiers + end + end + + disabled = [?i, ?m, ?s, ?x] -- modifiers + + disabled = if disabled != [], do: "-#{disabled}", else: "" + + # Future proof option ordering consistency by sorting + modifiers = Enum.sort(modifiers) + + nl = if Enum.member?(regex_opts, :extended), do: "\n", else: "" + + "(?#{modifiers}#{disabled}:#{source}#{nl})" + end + @doc """ Returns a list of names in the regex. @@ -845,6 +921,29 @@ defmodule Regex do # Helpers + # translate options to modifiers as required for emedding + defp embeddable_modifiers(list), do: embeddable_modifiers(list, [], []) + + defp embeddable_modifiers([:dotall, {:newline, :anycrlf} | t], acc, err), + do: embeddable_modifiers(t, [?s | acc], err) + + defp embeddable_modifiers([:caseless | t], acc, err), + do: embeddable_modifiers(t, [?i | acc], err) + + defp embeddable_modifiers([:extended | t], acc, err), + do: embeddable_modifiers(t, [?x | acc], err) + + defp embeddable_modifiers([:multiline | t], acc, err), + do: embeddable_modifiers(t, [?m | acc], err) + + defp embeddable_modifiers([option | t], acc, err), + do: embeddable_modifiers(t, acc, [option | err]) + + defp embeddable_modifiers([], acc, []), do: {:ok, acc} + defp embeddable_modifiers([], acc, err), do: {:error, acc, err} + + # translate modifers to options + defp translate_options(<>, acc), do: translate_options(t, [:dotall, {:newline, :anycrlf} | acc])