From 12f58dfcc6d8bc93f0ca03d5ca3ad33e44af7445 Mon Sep 17 00:00:00 2001 From: Andrew Konchin Date: Sun, 19 Oct 2025 01:44:05 +0300 Subject: [PATCH 1/5] Remove empty lines from String#each_line --- spec/std/string_spec.cr | 9 +++++++++ src/string.cr | 16 +++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/spec/std/string_spec.cr b/spec/std/string_spec.cr index 0dc744f39659..2465bd00eda4 100644 --- a/spec/std/string_spec.cr +++ b/spec/std/string_spec.cr @@ -2700,8 +2700,17 @@ describe "String" do lines.should eq(["foo\n", "\n", "bar\r\n", "baz\r\n"]) end + it "gets each_line with remove_empty = true" do + lines = [] of String + "\nfoo\n\nbar\r\nbaz\n\n".each_line(remove_empty: true) do |line| + lines << line + end.should be_nil + lines.should eq(["foo", "bar", "baz"]) + end + it_iterates "#each_line", ["foo", "bar", "baz"], "foo\nbar\r\nbaz\r\n".each_line it_iterates "#each_line(chomp: false)", ["foo\n", "bar\r\n", "baz\r\n"], "foo\nbar\r\nbaz\r\n".each_line(chomp: false) + it_iterates "#each_line(remove_empty: true)", ["foo", "bar", "baz"], "\nfoo\n\nbar\r\nbaz\n\n".each_line(remove_empty: true) it_iterates "#each_codepoint", [97, 98, 9731], "ab☃".each_codepoint diff --git a/src/string.cr b/src/string.cr index 99a26e55fcf1..c0cb449eb9bd 100644 --- a/src/string.cr +++ b/src/string.cr @@ -4395,6 +4395,8 @@ class String # "hello\nworld\r\n".each_line(chomp: false) { } # yields "hello\n", "world\r\n" # ``` # + # If *remove_empty* is `true`, any empty strings are removed from the result. + # # A trailing line feed is not considered starting a final, empty line. The # empty string does not contain any lines. # @@ -4405,7 +4407,7 @@ class String # ``` # # * `#lines` returns an array of lines - def each_line(chomp : Bool = true, & : String ->) : Nil + def each_line(chomp : Bool = true, remove_empty : Bool = false, & : String ->) : Nil return if empty? offset = 0 @@ -4419,7 +4421,7 @@ class String end end - yield unsafe_byte_slice_string(offset, count) + yield unsafe_byte_slice_string(offset, count) unless remove_empty && count == 0 offset = byte_index + 1 end @@ -4429,8 +4431,8 @@ class String end # Returns an `Iterator` which yields each line of this string (see `String#each_line`). - def each_line(chomp = true) - LineIterator.new(self, chomp) + def each_line(chomp = true, remove_empty = false) + LineIterator.new(self, chomp, remove_empty) end # Converts camelcase boundaries to underscores. @@ -5699,7 +5701,7 @@ class String private class LineIterator include Iterator(String) - def initialize(@string : String, @chomp : Bool) + def initialize(@string : String, @chomp : Bool, @remove_empty : Bool) @offset = 0 @end = false end @@ -5728,6 +5730,10 @@ class String @end = true end + if @remove_empty && !@end && value.is_a?(String) && value.as(String).bytesize == 0 + value = self.next + end + value end end From 626e7ef20336639b6b5f7129f399f287f02b27c3 Mon Sep 17 00:00:00 2001 From: Andrew Konchin Date: Mon, 20 Oct 2025 14:37:12 +0300 Subject: [PATCH 2/5] Add type restriction for remove_empty and make it a named parameter --- src/string.cr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/string.cr b/src/string.cr index c0cb449eb9bd..74c933d8a00b 100644 --- a/src/string.cr +++ b/src/string.cr @@ -4431,7 +4431,7 @@ class String end # Returns an `Iterator` which yields each line of this string (see `String#each_line`). - def each_line(chomp = true, remove_empty = false) + def each_line(chomp = true, *, remove_empty : Bool = false) LineIterator.new(self, chomp, remove_empty) end From 3b2c8b7720fba9ed763d69c9932feacafb465544 Mon Sep 17 00:00:00 2001 From: Andrew Konchin Date: Tue, 21 Oct 2025 16:20:37 +0300 Subject: [PATCH 3/5] Apply remove-empty-line filter to a line ignoring trailing '\n' --- spec/std/string_spec.cr | 11 ++++++++++- src/string.cr | 31 ++++++++++++++++++++++++++----- 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/spec/std/string_spec.cr b/spec/std/string_spec.cr index 2465bd00eda4..67b64f7525f0 100644 --- a/spec/std/string_spec.cr +++ b/spec/std/string_spec.cr @@ -2708,9 +2708,18 @@ describe "String" do lines.should eq(["foo", "bar", "baz"]) end + it "gets each_line with remove_empty = true and chomp = false" do + lines = [] of String + "\nfoo\n\nbar\r\n\r\nbaz".each_line(remove_empty: true, chomp: false) do |line| + lines << line + end.should be_nil + lines.should eq(["foo\n", "bar\r\n", "baz"]) + end + it_iterates "#each_line", ["foo", "bar", "baz"], "foo\nbar\r\nbaz\r\n".each_line it_iterates "#each_line(chomp: false)", ["foo\n", "bar\r\n", "baz\r\n"], "foo\nbar\r\nbaz\r\n".each_line(chomp: false) - it_iterates "#each_line(remove_empty: true)", ["foo", "bar", "baz"], "\nfoo\n\nbar\r\nbaz\n\n".each_line(remove_empty: true) + it_iterates "#each_line(remove_empty: true)", ["foo", "bar", "baz"], "\nfoo\n\nbar\r\n\r\nbaz".each_line(remove_empty: true) + it_iterates "#each_line(remove_empty: true, chomp: false)", ["foo\n", "bar\r\n", "baz"], "\nfoo\n\nbar\r\n\r\nbaz".each_line(remove_empty: true, chomp: false) it_iterates "#each_codepoint", [97, 98, 9731], "ab☃".each_codepoint diff --git a/src/string.cr b/src/string.cr index 74c933d8a00b..e9e8f95535a5 100644 --- a/src/string.cr +++ b/src/string.cr @@ -4414,6 +4414,18 @@ class String while byte_index = byte_index('\n'.ord.to_u8, offset) count = byte_index - offset + 1 + + if remove_empty + line_length = count - 1 + if offset + line_length > 0 && to_unsafe[offset + line_length - 1] === '\r' + line_length -= 1 + end + + skip_line = line_length == 0 + else + skip_line = false + end + if chomp count -= 1 if offset + count > 0 && to_unsafe[offset + count - 1] === '\r' @@ -4421,7 +4433,7 @@ class String end end - yield unsafe_byte_slice_string(offset, count) unless remove_empty && count == 0 + yield unsafe_byte_slice_string(offset, count) unless skip_line offset = byte_index + 1 end @@ -5709,9 +5721,21 @@ class String def next return stop if @end + skip_line = false + byte_index = @string.byte_index('\n'.ord.to_u8, @offset) if byte_index count = byte_index - @offset + 1 + + if @remove_empty + line_length = count - 1 + if @offset + line_length > 0 && @string.to_unsafe[@offset + line_length - 1] === '\r' + line_length -= 1 + end + + skip_line = line_length == 0 + end + if @chomp count -= 1 if @offset + count > 0 && @string.to_unsafe[@offset + count - 1] === '\r' @@ -5730,10 +5754,7 @@ class String @end = true end - if @remove_empty && !@end && value.is_a?(String) && value.as(String).bytesize == 0 - value = self.next - end - + value = self.next if skip_line value end end From f5b7e4b7d028ca14b45cc7d15a8bc22abd82e0b1 Mon Sep 17 00:00:00 2001 From: Andrew Konchin Date: Tue, 21 Oct 2025 16:47:41 +0300 Subject: [PATCH 4/5] Refactor and simplify logic to use next or earlier return --- src/string.cr | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/src/string.cr b/src/string.cr index e9e8f95535a5..b3676ecd80dd 100644 --- a/src/string.cr +++ b/src/string.cr @@ -4415,15 +4415,9 @@ class String while byte_index = byte_index('\n'.ord.to_u8, offset) count = byte_index - offset + 1 - if remove_empty - line_length = count - 1 - if offset + line_length > 0 && to_unsafe[offset + line_length - 1] === '\r' - line_length -= 1 - end - - skip_line = line_length == 0 - else - skip_line = false + if remove_empty && (byte_index == offset || (byte_index == offset + 1 && to_unsafe[offset] === '\r')) + offset = byte_index + 1 + next end if chomp @@ -4433,7 +4427,7 @@ class String end end - yield unsafe_byte_slice_string(offset, count) unless skip_line + yield unsafe_byte_slice_string(offset, count) offset = byte_index + 1 end @@ -5721,19 +5715,13 @@ class String def next return stop if @end - skip_line = false - byte_index = @string.byte_index('\n'.ord.to_u8, @offset) if byte_index count = byte_index - @offset + 1 - if @remove_empty - line_length = count - 1 - if @offset + line_length > 0 && @string.to_unsafe[@offset + line_length - 1] === '\r' - line_length -= 1 - end - - skip_line = line_length == 0 + if @remove_empty && (byte_index == @offset || (byte_index == @offset + 1 && @string.to_unsafe[@offset] === '\r')) + @offset = byte_index + 1 + return self.next end if @chomp @@ -5754,7 +5742,6 @@ class String @end = true end - value = self.next if skip_line value end end From d6d6affa642dab858e6133a12306f77c03a5b984 Mon Sep 17 00:00:00 2001 From: Andrew Konchin Date: Tue, 21 Oct 2025 16:50:32 +0300 Subject: [PATCH 5/5] Polish a bit comment to emphasise filtering empty lines, not empty strings --- src/string.cr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/string.cr b/src/string.cr index b3676ecd80dd..4545764da2e4 100644 --- a/src/string.cr +++ b/src/string.cr @@ -4395,7 +4395,7 @@ class String # "hello\nworld\r\n".each_line(chomp: false) { } # yields "hello\n", "world\r\n" # ``` # - # If *remove_empty* is `true`, any empty strings are removed from the result. + # If *remove_empty* is `true`, any empty lines are removed from the result. # # A trailing line feed is not considered starting a final, empty line. The # empty string does not contain any lines.