@@ -327,6 +327,11 @@ defmodule Regex do
327
327
328
328
* `:trim` - when true, remove blank strings from the result.
329
329
330
+ * `:on` - specifies which captures and order to split the string
331
+ on. Check the moduledoc for `Regex` to see the possible capture
332
+ values. Defaults to `:first` which means captures inside the
333
+ Regex does not affect the split result.
334
+
330
335
## Examples
331
336
332
337
iex> Regex.split(~r/-/, "a-b-c")
@@ -341,21 +346,27 @@ defmodule Regex do
341
346
iex> Regex.split(~r//, "abc")
342
347
["a", "b", "c", ""]
343
348
344
- iex> Regex.split(~r//, "abc", trim: true)
345
- ["a", "b", "c"]
349
+ iex> Regex.split(~r/a(?<second>b)c/, "abc")
350
+ ["", ""]
351
+
352
+ iex> Regex.split(~r/a(?<second>b)c/, "abc", on: [:second])
353
+ ["a", "c"]
346
354
347
355
"""
348
356
349
357
def split ( regex , string , options \\ [ ] )
350
358
351
- def split ( % Regex { } , "" , _options ) , do: [ "" ]
359
+ def split ( % Regex { } , "" , _opts ) , do: [ "" ]
352
360
353
- def split ( % Regex { re_pattern: compiled } , string , options ) when is_binary ( string ) do
354
- case :re . run ( string , compiled , [ :global , capture: :first ] ) do
361
+ def split ( % Regex { re_pattern: compiled } , string , opts ) when is_binary ( string ) do
362
+ on = Keyword . get ( opts , :on , :first )
363
+ case :re . run ( string , compiled , [ :global , capture: on ] ) do
355
364
{ :match , matches } ->
356
365
do_split ( matches , string , 0 ,
357
- parts_to_index ( Keyword . get ( options , :parts , :infinity ) ) ,
358
- Keyword . get ( options , :trim , false ) )
366
+ parts_to_index ( Keyword . get ( opts , :parts , :infinity ) ) ,
367
+ Keyword . get ( opts , :trim , false ) )
368
+ :match ->
369
+ [ string ]
359
370
:nomatch ->
360
371
[ string ]
361
372
end
@@ -364,25 +375,30 @@ defmodule Regex do
364
375
defp parts_to_index ( :infinity ) , do: 0
365
376
defp parts_to_index ( n ) when is_integer ( n ) and n > 0 , do: n
366
377
367
- defp do_split ( _ , "" , _index , _counter , true ) , do: [ ]
368
- defp do_split ( _ , string , _index , 1 , _trim ) , do: [ string ]
369
- defp do_split ( [ ] , string , _index , _counter , _trim ) , do: [ string ]
378
+ defp do_split ( _ , string , offset , _counter , true ) when byte_size ( string ) <= offset ,
379
+ do: [ ]
370
380
371
- defp do_split ( [ [ { 0 , 0 } ] | t ] , string , index , counter , trim ) do
372
- do_split ( t , string , index , counter , trim )
373
- end
381
+ defp do_split ( _ , string , offset , 1 , _trim ) ,
382
+ do: [ binary_part ( string , offset , byte_size ( string ) - offset ) ]
383
+
384
+ defp do_split ( [ ] , string , offset , _counter , _trim ) ,
385
+ do: [ binary_part ( string , offset , byte_size ( string ) - offset ) ]
386
+
387
+ defp do_split ( [ [ { pos , _ } | h ] | t ] , string , offset , counter , trim ) when pos - offset < 0 ,
388
+ do: do_split ( [ h | t ] , string , offset , counter , trim )
374
389
375
- defp do_split ( [ [ { pos , length } ] | t ] , string , index , counter , trim ) do
376
- first = pos - index
377
- last = first + length
390
+ defp do_split ( [ [ ] | t ] , string , offset , counter , trim ) ,
391
+ do: do_split ( t , string , offset , counter , trim )
378
392
379
- head = binary_part ( string , 0 , first )
380
- tail = binary_part ( string , last , byte_size ( string ) - last )
393
+ defp do_split ( [ [ { pos , length } | h ] | t ] , string , offset , counter , trim ) do
394
+ new_offset = pos + length
395
+ keep = pos - offset
381
396
382
- if trim and head == "" do
383
- do_split ( t , tail , pos + length , counter , trim )
397
+ if keep == 0 and ( length == 0 or trim ) do
398
+ do_split ( [ h | t ] , string , new_offset , counter , trim )
384
399
else
385
- [ head | do_split ( t , tail , pos + length , counter - 1 , trim ) ]
400
+ << _ :: binary - size ( offset ) , part :: binary - size ( keep ) , _ :: binary >> = string
401
+ [ part | do_split ( [ h | t ] , string , new_offset , counter - 1 , trim ) ]
386
402
end
387
403
end
388
404
0 commit comments