Skip to content
Merged
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions lib/elixir/lib/regex.ex
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,82 @@ defmodule Regex do
opts
end

@doc """
Returns the pattern as an embeddable string.

If the pattern was compiled with an option which cannot be represented
as an embeddable modifier in the current version of PCRE and strict is true
(the default) then an ArgumentError exception will be raised.

When the `:strict` option is false the pattern will be returned as though
any offending options had not be used and the function will not raise any
exceptions.

Embeddable modifiers/options are currently:

* 'i' - `:caseless`
* 'm' - `:multiline`
* 's' - `:dotall, {:newline, :anycrlf}`
* 'x' - `:extended`

Unembeddable modifiers are:

* 'f' - `:firstline`
* 'U' - `:ungreedy`
* 'u' - `:unicode, :ucp`

Any other regex compilation option not listed here is considered unembeddable
and will raise an exception unless the `:strict` option is false.

## Examples
iex> Regex.to_embed(~r/foo/)
"(?-imsx:foo)"

iex> Regex.to_embed(~r/^foo/m)
"(?m-isx:^foo)"

iex> Regex.to_embed(~r/foo # comment/ix)
"(?ix-ms:foo # comment\\n)"

iex> Regex.to_embed(~r/foo/iu)
** (ArgumentError) regex compiled with options [:ucp, :unicode] which cannot be represented as an embedded pattern in this version of PCRE

iex> Regex.to_embed(~r/foo/imsxu, strict: false)
"(?imsx:foo\\n)"

"""
@doc since: "1.19.0"
@spec to_embed(t, [term]) :: String.t()
def to_embed(%Regex{source: source, opts: regex_opts}, embed_opts \\ []) do
strict = Keyword.get(embed_opts, :strict, true)

modifiers =
case embeddable_modifiers(regex_opts) do
{:ok, modifiers} ->
modifiers

{:error, modifiers, untranslatable} ->
if strict do
raise ArgumentError,
"regex compiled with options #{inspect(untranslatable)} which cannot be " <>
"represented as an embedded pattern in this version of PCRE"
else
modifiers
end
end

disabled = [?i, ?m, ?s, ?x] -- modifiers

disabled = if disabled != [], do: "-#{disabled}", else: ""

# Future proof option ordering consistency by sorting
modifiers = Enum.sort(modifiers)

nl = if Enum.member?(regex_opts, :extended), do: "\n", else: ""

"(?#{modifiers}#{disabled}:#{source}#{nl})"
end

@doc """
Returns a list of names in the regex.

Expand Down Expand Up @@ -845,6 +921,29 @@ defmodule Regex do

# Helpers

# translate options to modifiers as required for emedding
defp embeddable_modifiers(list), do: embeddable_modifiers(list, [], [])

defp embeddable_modifiers([:dotall, {:newline, :anycrlf} | t], acc, err),
do: embeddable_modifiers(t, [?s | acc], err)

defp embeddable_modifiers([:caseless | t], acc, err),
do: embeddable_modifiers(t, [?i | acc], err)

defp embeddable_modifiers([:extended | t], acc, err),
do: embeddable_modifiers(t, [?x | acc], err)

defp embeddable_modifiers([:multiline | t], acc, err),
do: embeddable_modifiers(t, [?m | acc], err)

defp embeddable_modifiers([option | t], acc, err),
do: embeddable_modifiers(t, acc, [option | err])

defp embeddable_modifiers([], acc, []), do: {:ok, acc}
defp embeddable_modifiers([], acc, err), do: {:error, acc, err}

# translate modifers to options

defp translate_options(<<?s, t::binary>>, acc),
do: translate_options(t, [:dotall, {:newline, :anycrlf} | acc])

Expand Down