Skip to content

Commit 0bef2c1

Browse files
author
Yves Orton
committed
regex.ex - add support for to_embed!() and to_embed()
to_embed!(regex,strict) returns an embeddable representation of regex. For instance ~r/foo/i can be represented as ~r/(?i-msx:foo)/. If strict is true (the default) then it will throw an ArgumentError if the regex was compiled with an option/modifier which cannot be represented as an embeddable pattern. If strict is false then it will ignore any unembeddable options. This can be helpful if the pattern was compiled with /u and will be embedded in a pattern also compiled with /u. to_embed(regex) is the same as to_embed!(regex,false).
1 parent 2e3b812 commit 0bef2c1

File tree

1 file changed

+109
-0
lines changed

1 file changed

+109
-0
lines changed

lib/elixir/lib/regex.ex

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ defmodule Regex do
159159
"Unicode-spaces"
160160
161161
"""
162+
alias DialyxirVendored.Warnings.NoReturn
162163

163164
defstruct re_pattern: nil, source: "", opts: []
164165

@@ -412,6 +413,91 @@ defmodule Regex do
412413
opts
413414
end
414415

416+
@doc """
417+
Returns the pattern as an embeddable string.
418+
419+
If the pattern was compiled with an option which cannot be represented
420+
as an embeddable modifier in the current version of PCRE and strict is true
421+
(the default) then an ArgumentError exception will be raised.
422+
423+
When strict is false the pattern will be returned as though any offending
424+
options had not be used and the function will not raise any exceptions.
425+
426+
Embeddable modifiers/options are currently:
427+
* 'i' - :caseless
428+
* 'm' - :multiline
429+
* 's' - :dotall, :newline, :anycrlf})
430+
* 'x' - :extended
431+
432+
And unembeddable modifiers are
433+
* 'f' - :firstline
434+
* 'U' - :ungreedy
435+
* 'u' - :unicode, :ucp
436+
437+
Any other regex compilation option not listed here is considered unembeddable.
438+
439+
## Examples
440+
441+
iex> Regex.to_embed!(~r/foo/m)
442+
"(?m-isx:foo)"
443+
444+
iex> Regex.to_embed!(~r/foo # comment/ix)
445+
"(?ix-ms:foo # comment\\n)"
446+
447+
iex> Regex.to_embed!(~r/foo/iu)
448+
** (ArgumentError) regex compiled with options [:ucp, :unicode] which cannot be represented as an embedded pattern in this version of PCRE
449+
450+
iex> Regex.to_embed!(~r/foo/imsxu,false)
451+
"(?imsx:foo\\n)"
452+
453+
"""
454+
@spec to_embed!(t, boolean()) :: String.t() | NoReturn
455+
def to_embed!(%Regex{source: source, opts: opts}, strict \\ true) do
456+
modifiers =
457+
case embeddable_modifiers(opts) do
458+
{:ok, modifiers} ->
459+
modifiers
460+
461+
{:error, modifiers, untranslatable} ->
462+
if strict do
463+
raise ArgumentError,
464+
"regex compiled with options #{inspect(untranslatable)} which cannot be " <>
465+
"represented as an embedded pattern in this version of PCRE"
466+
else
467+
modifiers
468+
end
469+
end
470+
471+
disabled =
472+
Enum.reject([?i, ?m, ?s, ?x], &(&1 in modifiers))
473+
|> List.to_string()
474+
475+
disabled = if disabled != "", do: "-#{disabled}", else: ""
476+
477+
modifiers =
478+
Enum.sort(modifiers)
479+
|> List.to_string()
480+
481+
nl = if Enum.member?(opts, :extended), do: "\n", else: ""
482+
483+
"(?#{modifiers}#{disabled}:#{source}#{nl})"
484+
end
485+
486+
@doc """
487+
Returns the pattern as en embeddable string. Ignores any options which cannot
488+
be represented as an embeddable pattern in the current version of PCRE. Same
489+
as calling `to_embed!()` with strict set to false.
490+
491+
## Examples
492+
493+
iex> Regex.to_embed(~r/foo/iu)
494+
(?i-msx:foo)
495+
"""
496+
@spec to_embed(t) :: String.t()
497+
def to_embed(%Regex{} = regex) do
498+
to_embed!(regex, false)
499+
end
500+
415501
@doc """
416502
Returns a list of names in the regex.
417503
@@ -845,6 +931,29 @@ defmodule Regex do
845931

846932
# Helpers
847933

934+
# translate options to modifiers as required for emedding
935+
defp embeddable_modifiers(list), do: embeddable_modifiers(list, [], [])
936+
937+
defp embeddable_modifiers([:dotall, {:newline, :anycrlf} | t], acc, err),
938+
do: embeddable_modifiers(t, [?s | acc], err)
939+
940+
defp embeddable_modifiers([:caseless | t], acc, err),
941+
do: embeddable_modifiers(t, [?i | acc], err)
942+
943+
defp embeddable_modifiers([:extended | t], acc, err),
944+
do: embeddable_modifiers(t, [?x | acc], err)
945+
946+
defp embeddable_modifiers([:multiline | t], acc, err),
947+
do: embeddable_modifiers(t, [?m | acc], err)
948+
949+
defp embeddable_modifiers([option | t], acc, err),
950+
do: embeddable_modifiers(t, acc, [option | err])
951+
952+
defp embeddable_modifiers([], acc, []), do: {:ok, acc}
953+
defp embeddable_modifiers([], acc, err), do: {:error, acc, err}
954+
955+
# translate modifers to options
956+
848957
defp translate_options(<<?s, t::binary>>, acc),
849958
do: translate_options(t, [:dotall, {:newline, :anycrlf} | acc])
850959

0 commit comments

Comments
 (0)