Skip to content

Commit c9fd346

Browse files
authored
Document String#split(Regex)'s capture group behavior (#16207)
1 parent 9937b8b commit c9fd346

File tree

2 files changed

+28
-11
lines changed

2 files changed

+28
-11
lines changed

spec/std/string_spec.cr

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1497,6 +1497,8 @@ describe "String" do
14971497
it "keeps groups" do
14981498
s = "split on the word on okay?"
14991499
s.split(/(on)/).should eq(["split ", "on", " the word ", "on", " okay?"])
1500+
s.split(/o(?:(n)|(r))/).should eq(["split ", "n", " the w", "r", "d ", "n", " okay?"])
1501+
s.split(/()/, limit: 4, remove_empty: true).should eq(["s", "", "p", "", "l", "", "it on the word on okay?"])
15001502
end
15011503
end
15021504
end

src/string.cr

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4241,19 +4241,25 @@ class String
42414241
yield String.new(to_unsafe + byte_offset, piece_bytesize, piece_size)
42424242
end
42434243

4244-
# Makes an `Array` by splitting the string on *separator* (and removing instances of *separator*).
4244+
# Makes an `Array` by splitting the string on *separator* (and removing
4245+
# instances of *separator*).
42454246
#
4246-
# If *limit* is present, the array will be limited to *limit* items and
4247-
# the final item will contain the remainder of the string.
4247+
# If *separator* is an empty regex (`//`), the string will be separated into
4248+
# one-character strings. If *separator* defines any capture groups, their
4249+
# matches are also included in the result.
42484250
#
4249-
# If *separator* is an empty regex (`//`), the string will be separated into one-character strings.
4251+
# If *limit* is present, *separator* will be matched at most `limit - 1`
4252+
# times, and the final item will contain the remainder of the string. The
4253+
# array may contain more than *limit* items if capture groups are present.
42504254
#
42514255
# If *remove_empty* is `true`, any empty strings are removed from the result.
4256+
# This does not affect matches from *separator*'s capture groups.
42524257
#
42534258
# ```
42544259
# long_river_name = "Mississippi"
4255-
# long_river_name.split(/s+/) # => ["Mi", "i", "ippi"]
4256-
# long_river_name.split(//) # => ["M", "i", "s", "s", "i", "s", "s", "i", "p", "p", "i"]
4260+
# long_river_name.split(/s+/) # => ["Mi", "i", "ippi"]
4261+
# long_river_name.split(//) # => ["M", "i", "s", "s", "i", "s", "s", "i", "p", "p", "i"]
4262+
# long_river_name.split(/(i)/) # => ["M", "i", "ss", "i", "ss", "i", "pp", "i", ""]
42574263
# ```
42584264
def split(separator : Regex, limit = nil, *, remove_empty = false, options : Regex::MatchOptions = Regex::MatchOptions::None) : Array(String)
42594265
ary = Array(String).new
@@ -4263,14 +4269,19 @@ class String
42634269
ary
42644270
end
42654271

4266-
# Splits the string after each regex *separator* and yields each part to a block.
4272+
# Splits the string after each regex *separator* and yields each part to a
4273+
# block.
42674274
#
4268-
# If *limit* is present, the array will be limited to *limit* items and
4269-
# the final item will contain the remainder of the string.
4275+
# If *separator* is an empty regex (`//`), the string will be separated into
4276+
# one-character strings. If *separator* defines any capture groups, their
4277+
# matches are also yielded in order.
42704278
#
4271-
# If *separator* is an empty regex (`//`), the string will be separated into one-character strings.
4279+
# If *limit* is present, *separator* will be matched at most `limit - 1`
4280+
# times, and the final item will contain the remainder of the string. More
4281+
# than *limit* items may be yielded in total if capture groups are present.
42724282
#
4273-
# If *remove_empty* is `true`, any empty strings are removed from the result.
4283+
# If *remove_empty* is `true`, any empty strings are not yielded. This does
4284+
# not affect matches from *separator*'s capture groups.
42744285
#
42754286
# ```
42764287
# ary = [] of String
@@ -4282,6 +4293,10 @@ class String
42824293
#
42834294
# long_river_name.split(//) { |s| ary << s }
42844295
# ary # => ["M", "i", "s", "s", "i", "s", "s", "i", "p", "p", "i"]
4296+
# ary.clear
4297+
#
4298+
# long_river_name.split(/(i)/) { |s| ary << s }
4299+
# ary # => ["M", "i", "ss", "i", "ss", "i", "pp", "i", ""]
42854300
# ```
42864301
def split(separator : Regex, limit = nil, *, remove_empty = false, options : Regex::MatchOptions = Regex::MatchOptions::None, &block : String -> _)
42874302
if empty?

0 commit comments

Comments
 (0)