Skip to content

Commit 6db5091

Browse files
author
José Valim
committed
Provide :on option for Regex.split/3
1 parent 4e7ba9d commit 6db5091

File tree

2 files changed

+61
-24
lines changed

2 files changed

+61
-24
lines changed

lib/elixir/lib/regex.ex

Lines changed: 37 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,11 @@ defmodule Regex do
327327
328328
* `:trim` - when true, remove blank strings from the result.
329329
330+
* `:on` - specifies which captures and order to split the string
331+
on. Check the moduledoc for `Regex` to see the possible capture
332+
values. Defaults to `:first` which means captures inside the
333+
Regex does not affect the split result.
334+
330335
## Examples
331336
332337
iex> Regex.split(~r/-/, "a-b-c")
@@ -341,21 +346,27 @@ defmodule Regex do
341346
iex> Regex.split(~r//, "abc")
342347
["a", "b", "c", ""]
343348
344-
iex> Regex.split(~r//, "abc", trim: true)
345-
["a", "b", "c"]
349+
iex> Regex.split(~r/a(?<second>b)c/, "abc")
350+
["", ""]
351+
352+
iex> Regex.split(~r/a(?<second>b)c/, "abc", on: [:second])
353+
["a", "c"]
346354
347355
"""
348356

349357
def split(regex, string, options \\ [])
350358

351-
def split(%Regex{}, "", _options), do: [""]
359+
def split(%Regex{}, "", _opts), do: [""]
352360

353-
def split(%Regex{re_pattern: compiled}, string, options) when is_binary(string) do
354-
case :re.run(string, compiled, [:global, capture: :first]) do
361+
def split(%Regex{re_pattern: compiled}, string, opts) when is_binary(string) do
362+
on = Keyword.get(opts, :on, :first)
363+
case :re.run(string, compiled, [:global, capture: on]) do
355364
{:match, matches} ->
356365
do_split(matches, string, 0,
357-
parts_to_index(Keyword.get(options, :parts, :infinity)),
358-
Keyword.get(options, :trim, false))
366+
parts_to_index(Keyword.get(opts, :parts, :infinity)),
367+
Keyword.get(opts, :trim, false))
368+
:match ->
369+
[string]
359370
:nomatch ->
360371
[string]
361372
end
@@ -364,25 +375,30 @@ defmodule Regex do
364375
defp parts_to_index(:infinity), do: 0
365376
defp parts_to_index(n) when is_integer(n) and n > 0, do: n
366377

367-
defp do_split(_, "", _index, _counter, true), do: []
368-
defp do_split(_, string, _index, 1, _trim), do: [string]
369-
defp do_split([], string, _index, _counter, _trim), do: [string]
378+
defp do_split(_, string, offset, _counter, true) when byte_size(string) <= offset,
379+
do: []
370380

371-
defp do_split([[{0, 0}]|t], string, index, counter, trim) do
372-
do_split(t, string, index, counter, trim)
373-
end
381+
defp do_split(_, string, offset, 1, _trim),
382+
do: [binary_part(string, offset, byte_size(string) - offset)]
383+
384+
defp do_split([], string, offset, _counter, _trim),
385+
do: [binary_part(string, offset, byte_size(string) - offset)]
386+
387+
defp do_split([[{pos, _}|h]|t], string, offset, counter, trim) when pos - offset < 0,
388+
do: do_split([h|t], string, offset, counter, trim)
374389

375-
defp do_split([[{pos, length}]|t], string, index, counter, trim) do
376-
first = pos - index
377-
last = first + length
390+
defp do_split([[]|t], string, offset, counter, trim),
391+
do: do_split(t, string, offset, counter, trim)
378392

379-
head = binary_part(string, 0, first)
380-
tail = binary_part(string, last, byte_size(string) - last)
393+
defp do_split([[{pos, length}|h]|t], string, offset, counter, trim) do
394+
new_offset = pos + length
395+
keep = pos - offset
381396

382-
if trim and head == "" do
383-
do_split(t, tail, pos + length, counter, trim)
397+
if keep == 0 and (length == 0 or trim) do
398+
do_split([h|t], string, new_offset, counter, trim)
384399
else
385-
[head|do_split(t, tail, pos + length, counter - 1, trim)]
400+
<<_::binary-size(offset), part::binary-size(keep), _::binary>> = string
401+
[part|do_split([h|t], string, new_offset, counter - 1, trim)]
386402
end
387403
end
388404

lib/elixir/test/elixir/regex_test.exs

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -150,17 +150,38 @@ defmodule RegexTest do
150150
test :split do
151151
assert Regex.split(~r",", "") == [""]
152152
assert Regex.split(~r",", "", trim: true) == [""]
153+
154+
assert Regex.split(~r"=", "key=") == ["key", ""]
155+
assert Regex.split(~r"=", "=value") == ["", "value"]
156+
153157
assert Regex.split(~r" ", "foo bar baz") == ["foo", "bar", "baz"]
154158
assert Regex.split(~r" ", "foo bar baz", parts: :infinity) == ["foo", "bar", "baz"]
155159
assert Regex.split(~r" ", "foo bar baz", parts: 10) == ["foo", "bar", "baz"]
156160
assert Regex.split(~r" ", "foo bar baz", parts: 2) == ["foo", "bar baz"]
157-
assert Regex.split(~r"\s", "foobar") == ["foobar"]
161+
158162
assert Regex.split(~r" ", " foo bar baz ") == ["", "foo", "bar", "baz", ""]
159163
assert Regex.split(~r" ", " foo bar baz ", trim: true) == ["foo", "bar", "baz"]
160164
assert Regex.split(~r" ", " foo bar baz ", parts: 2) == ["", "foo bar baz "]
161165
assert Regex.split(~r" ", " foo bar baz ", trim: true, parts: 2) == ["foo", "bar baz "]
162-
assert Regex.split(~r"=", "key=") == ["key", ""]
163-
assert Regex.split(~r"=", "=value") == ["", "value"]
166+
end
167+
168+
test :split_on do
169+
assert Regex.split(~r/()abc()/, "xabcxabcx", on: :none) ==
170+
["xabcxabcx"]
171+
assert Regex.split(~r/()abc()/, "xabcxabcx", on: :all_but_first) ==
172+
["x", "abc", "x", "abc", "x"]
173+
174+
assert Regex.split(~r/(?<first>)abc(?<last>)/, "xabcxabcx", on: [:first, :last]) ==
175+
["x", "abc", "x", "abc", "x"]
176+
assert Regex.split(~r/(?<first>)abc(?<last>)/, "xabcxabcx", on: [:last, :first]) ==
177+
["xabc", "xabc", "x"]
178+
179+
assert Regex.split(~r/a(?<second>b)c/, "abc", on: [:second]) ==
180+
["a", "c"]
181+
assert Regex.split(~r/a(?<second>b)c|a(?<fourth>d)c/, "abc adc abc", on: [:second]) ==
182+
["a", "c adc a", "c"]
183+
assert Regex.split(~r/a(?<second>b)c|a(?<fourth>d)c/, "abc adc abc", on: [:second, :fourth]) ==
184+
["a", "c a", "c a", "c"]
164185
end
165186

166187
test :replace do

0 commit comments

Comments
 (0)