|
3 | 3 | # SPDX-FileCopyrightText: 2012 Plataformatec |
4 | 4 |
|
5 | 5 | defmodule Regex do |
| 6 | + import Kernel, except: [to_string: 1] |
| 7 | + |
6 | 8 | @moduledoc ~S""" |
7 | 9 | Provides regular expressions for Elixir. |
8 | 10 |
|
@@ -172,6 +174,29 @@ defmodule Regex do |
172 | 174 | defexception message: "regex could not be compiled" |
173 | 175 | end |
174 | 176 |
|
| 177 | + defmodule ModifierError do |
| 178 | + @moduledoc """ |
| 179 | + An exception raised when a regular expression modifier or option that |
| 180 | + cannot be represented as an embeddable expression is passed into |
| 181 | + to_string!() or modifiers!() |
| 182 | +
|
| 183 | + See `Regex.modifiers/2` for more details on embeddable modifiers. |
| 184 | + """ |
| 185 | + |
| 186 | + defexception [:regex, :modifiers] |
| 187 | + |
| 188 | + def message(%{regex: regex, modifiers: modifiers}) do |
| 189 | + if modifiers == nil do |
| 190 | + "regex #{inspect(regex)} compiled with unknown options" |
| 191 | + else |
| 192 | + ess = if String.length(modifiers) > 1, do: "s", else: "" |
| 193 | + |
| 194 | + "regex #{inspect(regex)} compiled with modifier#{ess}" <> |
| 195 | + " #{inspect(modifiers)} which cannot be embedded" |
| 196 | + end |
| 197 | + end |
| 198 | + end |
| 199 | + |
175 | 200 | @doc """ |
176 | 201 | Compiles the regular expression. |
177 | 202 |
|
@@ -393,6 +418,71 @@ defmodule Regex do |
393 | 418 | source |
394 | 419 | end |
395 | 420 |
|
| 421 | + @embeddable_modifiers [?i, ?m, ?s, ?x] |
| 422 | + |
| 423 | + @doc """ |
| 424 | + Returns an {:ok, ...} tuple containing the regex source as a |
| 425 | + binary in an embeddable form wrapped in a `(?:...)` construct. |
| 426 | + If the pattern was compiled with an option which cannot be embedded |
| 427 | + in such a construct an `{:error, ...}` tuple containing the |
| 428 | + offending modifer letters will be returned. |
| 429 | +
|
| 430 | + See `Regex.modifiers/2` for more details on embeddable modifiers. |
| 431 | +
|
| 432 | + ## Examples |
| 433 | +
|
| 434 | + iex> Regex.to_string(~r/foo/ix) |
| 435 | + {:ok, "(?ix-ms:foo\\n)"} |
| 436 | + iex> Regex.to_string(~r/foo/u) |
| 437 | + {:error, "u"} |
| 438 | +
|
| 439 | + """ |
| 440 | + @spec to_string(t) :: :error | {:ok, String.t()} | {:error, String.t()} |
| 441 | + def to_string(%Regex{source: source, opts: opts}) do |
| 442 | + nl = if Enum.member?(opts, :extended), do: "\n", else: "" |
| 443 | + |
| 444 | + case _modifiers(opts, true) do |
| 445 | + :error -> |
| 446 | + :error |
| 447 | + |
| 448 | + {:ok, modifiers} -> |
| 449 | + disabled = Enum.reject(@embeddable_modifiers, &(&1 in modifiers)) |
| 450 | + disabled = if disabled == [], do: "", else: "-" <> to_sorted_string(disabled) |
| 451 | + |
| 452 | + modifiers = to_sorted_string(modifiers) <> disabled |
| 453 | + |
| 454 | + {:ok, "(?#{modifiers}:#{source}#{nl})"} |
| 455 | + |
| 456 | + {:error, modifiers} -> |
| 457 | + {:error, to_sorted_string(modifiers)} |
| 458 | + end |
| 459 | + end |
| 460 | + |
| 461 | + @doc """ |
| 462 | + Returns a string containing the regex source as a binary in an |
| 463 | + embeddable form wrapped in a `(?:...)` construct. If the pattern |
| 464 | + was compiled with an option that cannot be represented with this |
| 465 | + type of construct then a `Regex.ModifierError` error will be raised. |
| 466 | +
|
| 467 | + See `Regex.modifiers/2` for more details on embeddable modifiers. |
| 468 | +
|
| 469 | + ## Examples |
| 470 | +
|
| 471 | + iex> Regex.to_string!(~r/foo/xism) |
| 472 | + "(?imsx:foo\\n)" |
| 473 | + iex> Regex.to_string!(~r/foo/uf) |
| 474 | + ** (Regex.ModifierError) regex ~r/foo/fu compiled with modifiers "fu" which cannot be embedded |
| 475 | +
|
| 476 | + """ |
| 477 | + @spec to_string!(t) :: String.t() |
| 478 | + def to_string!(%Regex{} = regex) do |
| 479 | + case to_string(regex) do |
| 480 | + :error -> raise Regex.ModifierError, regex: regex |
| 481 | + {:ok, pattern} -> pattern |
| 482 | + {:error, modifiers} -> raise Regex.ModifierError, regex: regex, modifiers: modifiers |
| 483 | + end |
| 484 | + end |
| 485 | + |
396 | 486 | @doc """ |
397 | 487 | Returns the regex options. |
398 | 488 |
|
@@ -843,8 +933,107 @@ defmodule Regex do |
843 | 933 | [binary_part(original, 0, length), ?\\, char | escape(rest, 0, rest)] |
844 | 934 | end |
845 | 935 |
|
| 936 | + @doc """ |
| 937 | + Returns a binary containing the regex modifier letters that |
| 938 | + a pattern was compiled with. If the `embed_only` option is true |
| 939 | + then the function will throw an error if the regex was |
| 940 | + compiled with a modifier which cannot be embeddd in a pattern. |
| 941 | +
|
| 942 | + See `Regex.modifiers/1` for more details. |
| 943 | +
|
| 944 | + ## Examples |
| 945 | +
|
| 946 | + iex> Regex.modifiers!(~r/foo/x) |
| 947 | + "x" |
| 948 | + iex> Regex.modifiers!(~r/foo/u, true) |
| 949 | + ** (Regex.ModifierError) regex ~r/foo/u compiled with modifier "u" which cannot be embedded |
| 950 | + """ |
| 951 | + @spec modifiers!(t, boolean) :: String.t() |
| 952 | + def modifiers!(%Regex{} = regex, embed_only \\ false) do |
| 953 | + case modifiers(regex, embed_only) do |
| 954 | + :error -> raise Regex.ModifierError, regex: regex |
| 955 | + {:ok, modifiers} -> modifiers |
| 956 | + {:error, modifiers} -> raise Regex.ModifierError, regex: regex, modifiers: modifiers |
| 957 | + end |
| 958 | + end |
| 959 | + |
| 960 | + @doc """ |
| 961 | + Returns an `{:ok, ...}` tuple containing the regex options as a |
| 962 | + binary. |
| 963 | +
|
| 964 | + If the embed_only option is enabled then with only return `{:ok, ...}` |
| 965 | + if all the options provided are found in the following list |
| 966 | +
|
| 967 | + * (m) :multiline |
| 968 | + * (s) :dotall |
| 969 | + * (i) :caseless |
| 970 | + * (x) :extended |
| 971 | +
|
| 972 | + if any option is in the following list |
| 973 | +
|
| 974 | + * (u) :unicode |
| 975 | + * (f) :firstline |
| 976 | + * (U) :ungreedy |
| 977 | +
|
| 978 | + then an `{:error, ...}` tuple containing the offending modifers will |
| 979 | + be returned instead. |
| 980 | +
|
| 981 | + Returns `:error` if an option that is not listed here has been discovered |
| 982 | + in the `%Regex` struct. |
| 983 | +
|
| 984 | + ## Examples |
| 985 | +
|
| 986 | + iex> Regex.modifiers(~r/foo/x) |
| 987 | + {:ok, "x"} |
| 988 | + iex> Regex.modifiers(~r/foo/u) |
| 989 | + {:ok, "u"} |
| 990 | + iex> Regex.modifiers(~r/foo/xsim) |
| 991 | + {:ok, "imsx"} |
| 992 | + iex> Regex.modifiers(~r/foo/u, true) |
| 993 | + {:error, "u"} |
| 994 | + """ |
| 995 | + @spec modifiers(t, boolean()) :: String.t() |
| 996 | + def modifiers(%Regex{opts: opts}, embed_only \\ false) do |
| 997 | + case _modifiers(opts, embed_only) do |
| 998 | + {:ok, list} -> {:ok, to_sorted_string(list)} |
| 999 | + {:error, list} -> {:error, to_sorted_string(list)} |
| 1000 | + :error -> :error |
| 1001 | + end |
| 1002 | + end |
| 1003 | + |
846 | 1004 | # Helpers |
847 | 1005 |
|
| 1006 | + defp to_sorted_string(list), do: List.to_string(Enum.sort(list)) |
| 1007 | + |
| 1008 | + defp _modifiers(list, embed_only), do: _modifiers(list, [], [], embed_only) |
| 1009 | + |
| 1010 | + defp _modifiers([:dotall, {:newline, :anycrlf} | t], acc, err, embed_only), |
| 1011 | + do: _modifiers(t, [?s | acc], err, embed_only) |
| 1012 | + |
| 1013 | + defp _modifiers([:unicode, :ucp | t], acc, err, false), |
| 1014 | + do: _modifiers(t, [?u | acc], err, false) |
| 1015 | + |
| 1016 | + defp _modifiers([:unicode, :ucp | t], acc, err, true), |
| 1017 | + do: _modifiers(t, acc, [?u | err], true) |
| 1018 | + |
| 1019 | + defp _modifiers([:caseless | t], acc, err, embed_only), |
| 1020 | + do: _modifiers(t, [?i | acc], err, embed_only) |
| 1021 | + |
| 1022 | + defp _modifiers([:extended | t], acc, err, embed_only), |
| 1023 | + do: _modifiers(t, [?x | acc], err, embed_only) |
| 1024 | + |
| 1025 | + defp _modifiers([:firstline | t], acc, err, false), do: _modifiers(t, [?f | acc], err, false) |
| 1026 | + defp _modifiers([:firstline | t], acc, err, true), do: _modifiers(t, acc, [?f | err], true) |
| 1027 | + defp _modifiers([:ungreedy | t], acc, err, false), do: _modifiers(t, [?U | acc], err, false) |
| 1028 | + defp _modifiers([:ungreedy | t], acc, err, true), do: _modifiers(t, acc, [?U | err], true) |
| 1029 | + |
| 1030 | + defp _modifiers([:multiline | t], acc, err, embed_only), |
| 1031 | + do: _modifiers(t, [?m | acc], err, embed_only) |
| 1032 | + |
| 1033 | + defp _modifiers([], acc, [], _all), do: {:ok, acc} |
| 1034 | + defp _modifiers([], _acc, err, _all), do: {:error, err} |
| 1035 | + defp _modifiers(_t, _acc, _err, _all), do: :error |
| 1036 | + |
848 | 1037 | defp translate_options(<<?s, t::binary>>, acc), |
849 | 1038 | do: translate_options(t, [:dotall, {:newline, :anycrlf} | acc]) |
850 | 1039 |
|
|
0 commit comments