Skip to content

Commit afd51e8

Browse files
author
José Valim
committed
Make split operations consistent with replace
Both String.replace/3 and Regex.replace/3 consider there is an empty string before and after the subject when doing replacements: String.replace("abc", "", "x") #=> "xaxbxcx" This change makes String.split/2 and Regex.split/2 consistent with this operation by returning empty strings before and after the split subject: String.split("abc", "") #=> ["", "a", "b", "c", ""] Closes #7023
1 parent 40f4079 commit afd51e8

File tree

4 files changed

+76
-35
lines changed

4 files changed

+76
-35
lines changed

lib/elixir/lib/regex.ex

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ defmodule Regex do
439439
["abc"]
440440
441441
iex> Regex.split(~r{}, "abc")
442-
["a", "b", "c", ""]
442+
["", "a", "b", "c", ""]
443443
444444
iex> Regex.split(~r{a(?<second>b)c}, "abc")
445445
["", ""]
@@ -509,25 +509,21 @@ defmodule Regex do
509509
new_offset = pos + length
510510
keep = pos - offset
511511

512-
if keep == 0 and length == 0 do
513-
do_split([h | t], string, new_offset, counter, trim, true)
514-
else
515-
<<_::binary-size(offset), part::binary-size(keep), match::binary-size(length), _::binary>> =
516-
string
512+
<<_::binary-size(offset), part::binary-size(keep), match::binary-size(length), _::binary>> =
513+
string
517514

518-
if keep == 0 and (length == 0 or trim) do
519-
[match | do_split([h | t], string, new_offset, counter - 1, trim, true)]
520-
else
521-
[part, match | do_split([h | t], string, new_offset, counter - 1, trim, true)]
522-
end
515+
if keep == 0 and trim do
516+
[match | do_split([h | t], string, new_offset, counter - 1, trim, true)]
517+
else
518+
[part, match | do_split([h | t], string, new_offset, counter - 1, trim, true)]
523519
end
524520
end
525521

526522
defp do_split([[{pos, length} | h] | t], string, offset, counter, trim, false) do
527523
new_offset = pos + length
528524
keep = pos - offset
529525

530-
if keep == 0 and (length == 0 or trim) do
526+
if keep == 0 and trim do
531527
do_split([h | t], string, new_offset, counter, trim, false)
532528
else
533529
<<_::binary-size(offset), part::binary-size(keep), _::binary>> = string

lib/elixir/lib/string.ex

Lines changed: 48 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -341,16 +341,19 @@ defmodule String do
341341
iex> String.split("abc", ~r{b}, include_captures: true)
342342
["a", "b", "c"]
343343
344-
Splitting on empty patterns returns graphemes:
344+
Splitting on empty string returns graphemes:
345345
346346
iex> String.split("abc", "")
347-
["a", "b", "c", ""]
347+
["", "a", "b", "c", ""]
348348
349349
iex> String.split("abc", "", trim: true)
350350
["a", "b", "c"]
351351
352-
iex> String.split("abc", "", parts: 2)
353-
["a", "bc"]
352+
iex> String.split("abc", "", parts: 1)
353+
["abc"]
354+
355+
iex> String.split("abc", "", parts: 3)
356+
["", "a", "bc"]
354357
355358
A precompiled pattern can also be given:
356359
@@ -379,7 +382,19 @@ defmodule String do
379382
Regex.split(pattern, string, options)
380383
end
381384

382-
def split(string, pattern, []) when is_binary(string) and pattern != "" do
385+
def split(string, "", options) when is_binary(string) do
386+
parts = Keyword.get(options, :parts, :infinity)
387+
index = parts_to_index(parts)
388+
trim = Keyword.get(options, :trim, false)
389+
390+
if trim == false and index != 1 do
391+
["" | split_empty(string, trim, index - 1)]
392+
else
393+
split_empty(string, trim, index)
394+
end
395+
end
396+
397+
def split(string, pattern, []) when is_tuple(pattern) or is_binary(string) do
383398
:binary.split(string, pattern, [:global])
384399
end
385400

@@ -393,6 +408,16 @@ defmodule String do
393408
defp parts_to_index(:infinity), do: 0
394409
defp parts_to_index(n) when is_integer(n) and n > 0, do: n
395410

411+
defp split_empty("", true, 1), do: []
412+
defp split_empty(string, _, 1), do: [string]
413+
414+
defp split_empty(string, trim, count) do
415+
case next_grapheme(string) do
416+
{h, t} -> [h | split_empty(t, trim, count - 1)]
417+
nil -> split_empty("", trim, 1)
418+
end
419+
end
420+
396421
defp split_each("", _pattern, true, 1), do: []
397422
defp split_each(string, _pattern, _trim, 1) when is_binary(string), do: [string]
398423

@@ -424,26 +449,37 @@ defmodule String do
424449
["1", "2", "3", "4"]
425450
426451
iex> String.splitter("abcd", "") |> Enum.take(10)
427-
["a", "b", "c", "d", ""]
452+
["", "a", "b", "c", "d", ""]
428453
429454
iex> String.splitter("abcd", "", trim: true) |> Enum.take(10)
430455
["a", "b", "c", "d"]
431456
432457
"""
433458
@spec splitter(t, pattern, keyword) :: Enumerable.t()
434-
def splitter(string, pattern, options \\ []) do
459+
def splitter(string, pattern, options \\ [])
460+
461+
def splitter(string, "", options) do
462+
if Keyword.get(options, :trim, false) do
463+
Stream.unfold(string, &next_grapheme/1)
464+
else
465+
Stream.unfold(:match, &do_empty_splitter(&1, string))
466+
end
467+
end
468+
469+
def splitter(string, pattern, options) do
435470
pattern = maybe_compile_pattern(pattern)
436471
trim = Keyword.get(options, :trim, false)
437472
Stream.unfold(string, &do_splitter(&1, pattern, trim))
438473
end
439474

475+
defp do_empty_splitter(:match, string), do: {"", string}
476+
defp do_empty_splitter(:nomatch, _string), do: nil
477+
defp do_empty_splitter("", _), do: {"", :nomatch}
478+
defp do_empty_splitter(string, _), do: next_grapheme(string)
479+
440480
defp do_splitter(:nomatch, _pattern, _), do: nil
441-
defp do_splitter("", _pattern, true), do: nil
442481
defp do_splitter("", _pattern, false), do: {"", :nomatch}
443-
444-
defp do_splitter(bin, "", _trim) do
445-
next_grapheme(bin)
446-
end
482+
defp do_splitter("", _pattern, true), do: nil
447483

448484
defp do_splitter(bin, pattern, trim) do
449485
case :binary.split(bin, pattern) do
@@ -453,7 +489,6 @@ defmodule String do
453489
end
454490
end
455491

456-
defp maybe_compile_pattern(""), do: ""
457492
defp maybe_compile_pattern(pattern) when is_tuple(pattern), do: pattern
458493
defp maybe_compile_pattern(pattern), do: :binary.compile_pattern(pattern)
459494

lib/elixir/test/elixir/regex_test.exs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -211,11 +211,17 @@ defmodule RegexTest do
211211
assert Regex.split(~r/([ln])/, "Erlang", include_captures: true) == ["Er", "l", "a", "n", "g"]
212212
assert Regex.split(~r/([kw])/, "Elixir", include_captures: true) == ["Elixir"]
213213

214-
parts = ["Elixir"]
215-
assert Regex.split(~r/([Ee]lixir)/, "Elixir", include_captures: true, trim: true) == parts
214+
assert Regex.split(~r/([Ee]lixir)/, "Elixir", include_captures: true, trim: true) ==
215+
["Elixir"]
216216

217-
parts = ["", "Elixir", ""]
218-
assert Regex.split(~r/([Ee]lixir)/, "Elixir", include_captures: true, trim: false) == parts
217+
assert Regex.split(~r/([Ee]lixir)/, "Elixir", include_captures: true, trim: false) ==
218+
["", "Elixir", ""]
219+
220+
assert Regex.split(~r//, "abc", include_captures: true) ==
221+
["", "", "a", "", "b", "", "c", "", ""]
222+
223+
assert Regex.split(~r/a/, "abc", include_captures: true) == ["", "a", "bc"]
224+
assert Regex.split(~r/c/, "abc", include_captures: true) == ["ab", "c", ""]
219225
end
220226

221227
test "replace/3,4" do

lib/elixir/test/elixir/string_test.exs

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,17 +56,21 @@ defmodule StringTest do
5656
assert String.split(" a b c ", " ", trim: true, parts: 1) == [" a b c "]
5757
assert String.split(" a b c ", " ", trim: true, parts: 2) == ["a", "b c "]
5858

59-
assert String.split("abé", "") == ["a", "b", "é", ""]
60-
assert String.split("abé", "", parts: :infinity) == ["a", "b", "é", ""]
59+
assert String.split("abé", "") == ["", "a", "b", "é", ""]
60+
assert String.split("abé", "", parts: :infinity) == ["", "a", "b", "é", ""]
6161
assert String.split("abé", "", parts: 1) == ["abé"]
62-
assert String.split("abé", "", parts: 2) == ["a", "bé"]
63-
assert String.split("abé", "", parts: 10) == ["a", "b", "é", ""]
62+
assert String.split("abé", "", parts: 2) == ["", "abé"]
63+
assert String.split("abé", "", parts: 3) == ["", "a", "bé"]
64+
assert String.split("abé", "", parts: 4) == ["", "a", "b", "é"]
65+
assert String.split("abé", "", parts: 5) == ["", "a", "b", "é", ""]
66+
assert String.split("abé", "", parts: 10) == ["", "a", "b", "é", ""]
6467
assert String.split("abé", "", trim: true) == ["a", "b", "é"]
6568
assert String.split("abé", "", trim: true, parts: :infinity) == ["a", "b", "é"]
6669
assert String.split("abé", "", trim: true, parts: 2) == ["a", "bé"]
70+
assert String.split("abé", "", trim: true, parts: 3) == ["a", "b", "é"]
71+
assert String.split("abé", "", trim: true, parts: 4) == ["a", "b", "é"]
6772

68-
assert String.split("noël", "") == ["n", "o", "ë", "l", ""]
69-
73+
assert String.split("noël", "") == ["", "n", "o", "ë", "l", ""]
7074
assert String.split("x-", "-", parts: 2, trim: true) == ["x"]
7175
assert String.split("x-x-", "-", parts: 3, trim: true) == ["x", "x"]
7276
end

0 commit comments

Comments
 (0)