Skip to content

Commit 4068160

Browse files
author
José Valim
committed
Normalize and allow captures in Regex.scan/3, closes #1526
1 parent 2a672fa commit 4068160

File tree

3 files changed

+48
-38
lines changed

3 files changed

+48
-38
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
* [Mix] Add `mix cmd` as a convenience to run a command recursively in child apps in an umbrella application
2020
* [Mix] Support `umbrella: true` in dependencies as a convenience for setting up umbrella path deps
2121
* [Mix] `mix run` now behaves closer to the `elixir` command and properly mangles the ARGV
22+
* [String] Add `Regex.scan/3` now supports capturing groups
2223
* [String] Add `String.reverse/1`
2324

2425
* bug fix
@@ -42,8 +43,10 @@
4243
* [Mix] `:test_coverage` option now expect keywords arguments and the `--cover` flag is now treated as a boolean
4344

4445
* backwards incompatible changes
46+
* [Regex] `Regex.scan/3` now always returns a list of lists, normalizing the result, instead of list with mixed lists and binaries
4547
* [System] `System.halt/2` was removed since the current Erlang implementation of such function is bugged
4648

49+
4750
# v0.10.0 (2013-07-15)
4851

4952
* enhancements

lib/elixir/lib/regex.ex

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,10 @@ defmodule Regex do
9696

9797
@doc """
9898
Runs the regular expression against the given string.
99-
It returns a list with all matches, `nil` if no match occurred, or `[]`
100-
if it matched, `/g` was specified, but nothing was captured.
99+
It returns a list with all matches or `nil` if no match occurred.
100+
101+
When the option `:capture` is set to `:groups`, it will capture all
102+
the groups in the regex.
101103
102104
## Examples
103105
@@ -128,29 +130,23 @@ defmodule Regex do
128130
end
129131

130132
@doc """
131-
Returns the given captures as a keyword list or `nil` if no captures are found.
132-
Requires the regex to be compiled with the groups option.
133+
Returns the given captures as a keyword list or `nil` if no captures
134+
are found. Requires the regex to be compiled with the groups option.
133135
134136
## Examples
135137
136138
iex> Regex.captures(%r/c(?<foo>d)/g, "abcd")
137139
[foo: "d"]
138140
iex> Regex.captures(%r/a(?<foo>b)c(?<bar>d)/g, "abcd")
139-
[bar: "d", foo: "b"]
141+
[foo: "b", bar: "d"]
140142
iex> Regex.captures(%r/a(?<foo>b)c(?<bar>d)/g, "efgh")
141143
nil
144+
142145
"""
143146
def captures(regex(groups: groups) = regex, string, options // []) do
144-
unless captures = Keyword.get(options, :capture) do
145-
captures = if groups do
146-
Enum.sort(groups)
147-
else
148-
raise ArgumentError, message: "regex was not compiled with g"
149-
end
150-
options = Keyword.put(options, :capture, captures)
151-
end
147+
options = Keyword.put_new(options, :capture, :groups)
152148
results = run(regex, string, options)
153-
if results, do: Enum.zip(captures, results)
149+
if results, do: Enum.zip(groups, results)
154150
end
155151

156152
@doc """
@@ -200,29 +196,42 @@ defmodule Regex do
200196
end
201197

202198
@doc """
203-
Same as run, but scans the target several times collecting all matches of
204-
the regular expression. A list is returned with each match. If the item in
205-
the list is a binary, it means there were no captures. If the item is another
206-
list, each element in this secondary list is a capture.
199+
Same as run, but scans the target several times collecting all
200+
matches of the regular expression. A list of lists is returned,
201+
where each entry in the primary list represents a match and each
202+
entry in the secondary list represents the captured contents.
203+
204+
The captured contents defaults to :all, which includes the whole
205+
regex match and each capture.
206+
207+
When the option `:capture` is set to `:groups`, it will capture all
208+
the groups in the regex.
207209
208210
## Examples
209211
210212
iex> Regex.scan(%r/c(d|e)/, "abcd abce")
211-
[["d"], ["e"]]
213+
[["cd", "d"], ["ce", "e"]]
212214
iex> Regex.scan(%r/c(?:d|e)/, "abcd abce")
213-
["cd", "ce"]
215+
[["cd"], ["ce"]]
214216
iex> Regex.scan(%r/e/, "abcd")
215217
[]
216218
217219
"""
218220
def scan(regex, string, options // [])
219221

220-
def scan(regex(re_pattern: compiled), string, options) do
222+
def scan(regex(re_pattern: compiled, groups: groups), string, options) do
221223
return = Keyword.get(options, :return, return_for(string))
222-
options = [{ :capture, :all, return }, :global]
224+
225+
captures =
226+
case Keyword.get(options, :capture, :all) do
227+
:groups -> groups || raise ArgumentError, message: "regex was not compiled with g"
228+
others -> others
229+
end
230+
231+
options = [{ :capture, captures, return }, :global]
223232
case :re.run(string, compiled, options) do
224233
:nomatch -> []
225-
{ :match, results } -> flatten_result(results)
234+
{ :match, results } -> results
226235
end
227236
end
228237

@@ -330,15 +339,6 @@ defmodule Regex do
330339
defp translate_options(<<?g, t :: binary>>), do: [:groups|translate_options(t)]
331340
defp translate_options(<<>>), do: []
332341

333-
defp flatten_result(results) do
334-
lc result inlist results do
335-
case result do
336-
[t] -> t
337-
[_|t] -> t
338-
end
339-
end
340-
end
341-
342342
{ :ok, pattern } = :re.compile(%B"\(\?<(?<G>[^>]*)>")
343343
@groups_pattern pattern
344344

lib/elixir/test/elixir/regex_test.exs

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,17 @@ defmodule Regex.BinaryTest do
9494
end
9595
9696
test :scan do
97-
assert Regex.scan(%r"c(d|e)", "abcd abce") == [["d"], ["e"]]
98-
assert Regex.scan(%r"c(?:d|e)", "abcd abce") == ["cd", "ce"]
97+
assert Regex.scan(%r"c(d|e)", "abcd abce") == [["cd", "d"], ["ce", "e"]]
98+
assert Regex.scan(%r"c(?:d|e)", "abcd abce") == [["cd"], ["ce"]]
9999
assert Regex.scan(%r"e", "abcd") == []
100-
assert Regex.scan(%r"c(d|e)", "abcd abce", return: :list) == [['d'], ['e']]
100+
assert Regex.scan(%r"c(d|e)", "abcd abce", return: :list) == [['cd', 'd'], ['ce', 'e']]
101+
end
102+
103+
test :scan_with_groups do
104+
assert Regex.scan(%r/c(?<foo>d)/g, 'abcd', capture: :groups) == [['d']]
105+
assert Regex.scan(%r/c(?<foo>d)/g, 'no_match', capture: :groups) == []
106+
assert Regex.scan(%r/c(?<foo>d|e)/g, 'abcd abce', capture: :groups) == [['d'], ['e']]
107+
assert Regex.scan(%r/c(?<foo>d)/g, 'abcd', return: :binary, capture: :groups) == [["d"]]
101108
end
102109
103110
test :split do
@@ -185,10 +192,10 @@ defmodule Regex.ListTest do
185192
end
186193

187194
test :scan do
188-
assert Regex.scan(%r'c(d|e)', 'abcd abce') == [['d'], ['e']]
189-
assert Regex.scan(%r'c(?:d|e)', 'abcd abce') == ['cd', 'ce']
195+
assert Regex.scan(%r'c(d|e)', 'abcd abce') == [['cd', 'd'], ['ce', 'e']]
196+
assert Regex.scan(%r'c(?:d|e)', 'abcd abce') == [['cd'], ['ce']]
190197
assert Regex.scan(%r'e', 'abcd') == []
191-
assert Regex.scan(%r'c(d|e)', 'abcd abce', return: :binary) == [["d"], ["e"]]
198+
assert Regex.scan(%r'c(d|e)', 'abcd abce', return: :binary) == [["cd", "d"], ["ce", "e"]]
192199
end
193200

194201
test :split do

0 commit comments

Comments
 (0)