Skip to content

Commit 80b3183

Browse files
author
José Valim
committed
Support functions in Regex.replace/4
1 parent d5f35d4 commit 80b3183

File tree

4 files changed

+155
-24
lines changed

4 files changed

+155
-24
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
* [Mix] Support application configurations in `config/config.exs` which can be customized by specifying your own `:config_path`
1111
* [Mix] Support user-wide configuration with `~/.mix/config.exs`
1212
* [Mix] `mix help` now uses ANSI formatting to print guides
13+
* [Regex] Support functions in `Regex.replace/4`
1314
* [String] Support `:parts` in `String.split/3`
1415

1516
* Bug fixes

lib/elixir/lib/regex.ex

Lines changed: 143 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ defmodule Regex do
308308
309309
## Options
310310
311-
* `:parts` - when specified, splits the string into the
311+
* `:parts` - when specified, splits the string into the
312312
given number of parts. If not specified, `:parts`
313313
is defaulted to `:infinity`, which will split the
314314
string into the maximum number of parts possible
@@ -362,10 +362,20 @@ defmodule Regex do
362362
Receives a regex, a binary and a replacement, returns a new
363363
binary where the all matches are replaced by replacement.
364364
365-
Inside the replacement, you can either give `&` to access the
366-
whole regular expression or `\N`, where `N` is in integer to access
367-
a specific matching parens. You can also set `:global` to `false`
368-
if you want to replace just the first occurrence.
365+
The replacement can be either a string or a function. The string
366+
is used as a replacement for every match and it allows specific
367+
captures to be accessed via `\N`, where `N` is the capture. In
368+
case `\0` is used, the whole match is inserted.
369+
370+
When the replacement is a function, the function may have arity
371+
N where each argument maps to a capture, with the first argument
372+
being the whole match. If the function expects more arguments
373+
than captures found, the remaining arguments will receive `""`.
374+
375+
## Options
376+
377+
* `:global` - when `false`, replaces only the first occurrence
378+
(defaults to true)
369379
370380
## Examples
371381
@@ -375,22 +385,142 @@ defmodule Regex do
375385
iex> Regex.replace(~r/b/, "abc", "d")
376386
"adc"
377387
378-
iex> Regex.replace(~r/b/, "abc", "[&]")
388+
iex> Regex.replace(~r/b/, "abc", "[\\0]")
379389
"a[b]c"
380390
381-
iex> Regex.replace(~r/b/, "abc", "[\\&]")
382-
"a[&]c"
391+
iex> Regex.replace(~r/a(b|d)c/, "abcadc", "[\\1]")
392+
"[b][d]"
383393
384-
iex> Regex.replace(~r/(b)/, "abc", "[\\1]")
385-
"a[b]c"
394+
iex> Regex.replace(~r/a(b|d)c/, "abcadc", fn _, x -> "[#{x}]" end)
395+
"[b][d]"
386396
387397
"""
388398
def replace(regex, string, replacement, options \\ [])
389399

390-
def replace(%Regex{re_pattern: compiled}, string, replacement, options) when is_binary(string) do
400+
def replace(regex, string, replacement, options) when is_binary(replacement) do
401+
do_replace(regex, string, precompile_replacement(replacement), options)
402+
end
403+
404+
def replace(regex, string, replacement, options) when is_function(replacement) do
405+
{:arity, arity} = :erlang.fun_info(replacement, :arity)
406+
do_replace(regex, string, {replacement, arity}, options)
407+
end
408+
409+
defp do_replace(%Regex{re_pattern: compiled}, string, replacement, options) do
391410
opts = if Keyword.get(options, :global) != false, do: [:global], else: []
392-
opts = [{:return, :binary}|opts]
393-
:re.replace(string, compiled, replacement, opts)
411+
opts = [{:capture, :all, :index}|opts]
412+
413+
case :re.run(string, compiled, opts) do
414+
:nomatch ->
415+
string
416+
{:match, [mlist|t]} when is_list(mlist) ->
417+
apply_list(string, replacement, [mlist|t]) |> iodata_to_binary
418+
{:match, slist} ->
419+
apply_list(string, replacement, [slist]) |> iodata_to_binary
420+
end
421+
end
422+
423+
defp precompile_replacement(""),
424+
do: []
425+
426+
defp precompile_replacement(<<?\\, x, rest :: binary>>) when x < ?0 or x > ?9 do
427+
case precompile_replacement(rest) do
428+
[head | t] when is_binary(head) ->
429+
[<<x, head :: binary>> | t]
430+
other ->
431+
[<<x>> | other]
432+
end
433+
end
434+
435+
defp precompile_replacement(<<?\\, rest :: binary>>) when byte_size(rest) > 0 do
436+
{ns, rest} = pick_int(rest)
437+
[list_to_integer(ns) | precompile_replacement(rest)]
438+
end
439+
440+
defp precompile_replacement(<<x, rest :: binary>>) do
441+
case precompile_replacement(rest) do
442+
[head | t] when is_binary(head) ->
443+
[<<x, head :: binary>> | t]
444+
other ->
445+
[<<x>> | other]
446+
end
447+
end
448+
449+
defp pick_int(<<x, rest :: binary>>) when x in ?0..?9 do
450+
{found, rest} = pick_int(rest)
451+
{[x|found], rest}
452+
end
453+
454+
defp pick_int(bin) do
455+
{[], bin}
456+
end
457+
458+
defp apply_list(string, replacement, list) do
459+
apply_list(string, string, 0, replacement, list)
460+
end
461+
462+
defp apply_list(_, "", _, _, []) do
463+
[]
464+
end
465+
466+
defp apply_list(_, string, _, _, []) do
467+
string
468+
end
469+
470+
defp apply_list(whole, string, pos, replacement, [[{mpos, _} | _] | _] = list) when mpos > pos do
471+
length = mpos - pos
472+
<<untouched :: [size(length), binary], rest :: binary>> = string
473+
[untouched | apply_list(whole, rest, mpos, replacement, list)]
474+
end
475+
476+
defp apply_list(whole, string, pos, replacement, [[{mpos, length} | _] = head | tail]) when mpos == pos do
477+
<<_ :: [size(length), binary], rest :: binary>> = string
478+
new_data = apply_replace(whole, replacement, head)
479+
[new_data | apply_list(whole, rest, pos + length, replacement, tail)]
480+
end
481+
482+
defp apply_replace(string, {fun, arity}, indexes) do
483+
apply(fun, get_indexes(string, indexes, arity))
484+
end
485+
486+
defp apply_replace(_, [bin], _) when is_binary(bin) do
487+
bin
488+
end
489+
490+
defp apply_replace(string, repl, indexes) do
491+
indexes = list_to_tuple(indexes)
492+
493+
for part <- repl do
494+
cond do
495+
is_binary(part) ->
496+
part
497+
part > tuple_size(indexes) ->
498+
""
499+
true ->
500+
get_index(string, elem(indexes, part))
501+
end
502+
end
503+
end
504+
505+
defp get_index(_string, {pos, _len}) when pos < 0 do
506+
""
507+
end
508+
509+
defp get_index(string, {pos, len}) do
510+
<<_ :: [size(pos), binary], res :: [size(len), binary], _ :: binary>> = string
511+
res
512+
end
513+
514+
defp get_indexes(_string, _, 0) do
515+
[]
516+
end
517+
518+
defp get_indexes(string, [], arity) do
519+
[""|get_indexes(string, [], arity - 1)]
520+
end
521+
522+
defp get_indexes(string, [h|t], arity) do
523+
[get_index(string, h)|get_indexes(string, t, arity - 1)]
394524
end
395525

396526
{:ok, pattern} = :re.compile(~S"[.^$*+?()[{\\\|\s#]", [:unicode])

lib/elixir/lib/string.ex

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -611,9 +611,7 @@ defmodule String do
611611
@spec replace(t, t, t) :: t
612612
@spec replace(t, t, t, Keyword.t) :: t
613613

614-
def replace(subject, pattern, replacement, options \\ [])
615-
616-
def replace(subject, pattern, replacement, options) do
614+
def replace(subject, pattern, replacement, options \\ []) when is_binary(replacement) do
617615
if Regex.regex?(pattern) do
618616
Regex.replace(pattern, subject, replacement, global: options[:global])
619617
else
@@ -623,9 +621,9 @@ defmodule String do
623621
end
624622

625623
defp translate_replace_options(options) do
626-
opts = if options[:global] != false, do: [:global], else: []
624+
opts = if Keyword.get(options, :global) != false, do: [:global], else: []
627625

628-
if insert = options[:insert_replaced] do
626+
if insert = Keyword.get(options, :insert_replaced) do
629627
opts = [{:insert_replaced, insert}|opts]
630628
end
631629

lib/elixir/test/elixir/regex_test.exs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -132,15 +132,17 @@ defmodule RegexTest do
132132
test :replace do
133133
assert Regex.replace(~r(d), "abc", "d") == "abc"
134134
assert Regex.replace(~r(b), "abc", "d") == "adc"
135-
assert Regex.replace(~r(b), "abc", "[&]") == "a[b]c"
136-
assert Regex.replace(~r(b), "abc", "[\\&]") == "a[&]c"
135+
assert Regex.replace(~r(b), "abc", "[\\0]") == "a[b]c"
137136
assert Regex.replace(~r[(b)], "abc", "[\\1]") == "a[b]c"
138137

139-
assert Regex.replace(~r(d), "abcbe", "d") == "abcbe"
140138
assert Regex.replace(~r(b), "abcbe", "d") == "adcde"
141-
assert Regex.replace(~r(b), "abcbe", "[&]") == "a[b]c[b]e"
142-
assert Regex.replace(~r(b), "abcbe", "[\\&]") == "a[&]c[&]e"
143-
assert Regex.replace(~r[(b)], "abcbe", "[\\1]") == "a[b]c[b]e"
139+
assert Regex.replace(~r(b), "abcbe", "d", global: false) == "adcbe"
140+
141+
assert Regex.replace(~r[a(b)c], "abcabc", fn -> "ac" end) == "acac"
142+
assert Regex.replace(~r[a(b)c], "abcabc", fn "abc" -> "ac" end) == "acac"
143+
assert Regex.replace(~r[a(b)c], "abcabc", fn "abc", "b" -> "ac" end) == "acac"
144+
assert Regex.replace(~r[a(b)c], "abcabc", fn "abc", "b", "" -> "ac" end) == "acac"
145+
assert Regex.replace(~r[a(b)c], "abcabc", fn "abc", "b" -> "ac" end, global: false) == "acabc"
144146
end
145147

146148
test :escape do

0 commit comments

Comments
 (0)