Skip to content

Commit 2dc5928

Browse files
More efficient chomp for String (#58192)
We can avoid all the computation of valid indices for this simple function, since it only strips CR (0xD) and LF (0xA), which are known to be single bytes. This is approximately four times faster. --------- Co-authored-by: Dilum Aluthge <[email protected]>
1 parent 555d3ba commit 2dc5928

File tree

1 file changed

+20
-9
lines changed

1 file changed

+20
-9
lines changed

base/strings/util.jl

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -319,14 +319,20 @@ end
319319
"""
320320
chomp(s::AbstractString)::SubString
321321
322-
Remove a single trailing newline from a string.
322+
Remove a single trailing newline (i.e. "\\r\\n" or "\\n") from a string.
323323
324324
See also [`chop`](@ref).
325325
326326
# Examples
327327
```jldoctest
328328
julia> chomp("Hello\\n")
329329
"Hello"
330+
331+
julia> chomp("World\\r\\n")
332+
"World"
333+
334+
julia> chomp("Julia\\r\\n\\n")
335+
"Julia\\r\\n"
330336
```
331337
"""
332338
function chomp(s::AbstractString)
@@ -336,17 +342,22 @@ function chomp(s::AbstractString)
336342
(j < 1 || s[j] != '\r') && (return SubString(s, 1, j))
337343
return SubString(s, 1, prevind(s,j))
338344
end
339-
function chomp(s::String)
340-
i = lastindex(s)
341-
if i < 1 || codeunit(s,i) != 0x0a
342-
return @inbounds SubString(s, 1, i)
343-
elseif i < 2 || codeunit(s,i-1) != 0x0d
344-
return @inbounds SubString(s, 1, prevind(s, i))
345+
346+
@assume_effects :removable :foldable function chomp(s::Union{String, SubString{String}})
347+
cu = codeunits(s)
348+
ncu = length(cu)
349+
len = if iszero(ncu)
350+
0
345351
else
346-
return @inbounds SubString(s, 1, prevind(s, i-1))
352+
has_lf = @inbounds(cu[ncu]) == 0x0a
353+
two_bytes = ncu > 1
354+
has_cr = has_lf & two_bytes & (@inbounds(cu[ncu - two_bytes]) == 0x0d)
355+
ncu - (has_lf + has_cr)
347356
end
357+
off = s isa String ? 0 : s.offset
358+
par = s isa String ? s : s.string
359+
@inbounds @inline SubString{String}(par, off, len, Val{:noshift}())
348360
end
349-
350361
"""
351362
lstrip([pred=isspace,] str::AbstractString)::SubString
352363
lstrip(str::AbstractString, chars)::SubString

0 commit comments

Comments
 (0)