Skip to content

Commit 769b69a

Browse files
author
José Valim
committed
Ensure String.split/2 works with unicode
1 parent 0e725fb commit 769b69a

File tree

2 files changed

+15
-12
lines changed

2 files changed

+15
-12
lines changed

lib/elixir/lib/string.ex

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ defmodule String do
155155
@spec split(t) :: [t]
156156
defdelegate split(binary), to: String.Unicode
157157

158-
@doc """
158+
@doc %S"""
159159
Divides a string into substrings based on a pattern,
160160
returning a list of these substrings. The pattern can
161161
be a string, a list of strings or a regular expression.
@@ -168,30 +168,33 @@ defmodule String do
168168
169169
## Examples
170170
171+
Splitting with a string pattern:
172+
171173
iex> String.split("a,b,c", ",")
172174
["a", "b", "c"]
173175
iex> String.split("a,b,c", ",", global: false)
174176
["a", "b,c"]
175177
iex> String.split(" a b c ", " ", trim: true)
176178
["a", "b", "c"]
177179
180+
A list of patterns:
181+
178182
iex> String.split("1,2 3,4", [" ", ","])
179183
["1", "2", "3", "4"]
180184
185+
A regular expression:
186+
181187
iex> String.split("a,b,c", %r{,})
182188
["a", "b", "c"]
183189
iex> String.split("a,b,c", %r{,}, global: false)
184190
["a", "b,c"]
185-
iex> String.split("a,b", %r{\\.})
186-
["a,b"]
191+
iex> String.split(" a b c ", %r{\s}, trim: true)
192+
["a", "b", "c"]
193+
194+
Splitting on empty patterns returns codepoints:
187195
188-
iex> String.split("abc", %r{c})
189-
["ab", ""]
190196
iex> String.split("abc", %r{})
191197
["a", "b", "c", ""]
192-
iex> String.split("abc", %r{}, trim: true)
193-
["a", "b", "c"]
194-
195198
iex> String.split("abc", "")
196199
["a", "b", "c", ""]
197200
iex> String.split("abc", "", trim: true)
@@ -206,7 +209,7 @@ defmodule String do
206209

207210
def split("", _pattern, _options), do: [""]
208211

209-
def split(binary, "", options), do: split(binary, %r"", options)
212+
def split(binary, "", options), do: split(binary, %r""u, options)
210213

211214
def split(binary, pattern, options) when is_regex(pattern) do
212215
Regex.split(pattern, binary, options)

lib/elixir/test/elixir/string_test.exs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,9 @@ defmodule StringTest do
3737
assert String.split(" a b c ", " ", trim: true) == ["a", "b", "c"]
3838
assert String.split(" a b c ", " ", trim: true, global: false) == ["a b c "]
3939

40-
assert String.split("abc", "") == ["a", "b", "c", ""]
41-
assert String.split("abc", "", global: false) == ["a", "bc"]
42-
assert String.split("abc", "", trim: true) == ["a", "b", "c"]
40+
assert String.split("abé", "") == ["a", "b", "é", ""]
41+
assert String.split("abé", "", global: false) == ["a", ""]
42+
assert String.split("abé", "", trim: true) == ["a", "b", "é"]
4343
end
4444

4545
test :split_with_regex do

0 commit comments

Comments
 (0)