@@ -383,6 +383,83 @@ function rpad(
383383 r == 0 ? string (s, p^ q) : string (s, p^ q, first (p, r))
384384end
385385
386+ """
387+ eachsplit(str::AbstractString, dlm; limit::Integer=0)
388+ eachsplit(str::AbstractString; limit::Integer=0)
389+
390+ Split `str` on occurrences of the delimiter(s) `dlm` and return an iterator over the
391+ substrings. `dlm` can be any of the formats allowed by [`findnext`](@ref)'s first argument
392+ (i.e. as a string, regular expression or a function), or as a single character or collection
393+ of characters.
394+
395+ If `dlm` is omitted, it defaults to [`isspace`](@ref).
396+
397+ The iterator will return a maximum of `limit` results if the keyword argument is supplied.
398+ The default of `limit=0` implies no maximum.
399+
400+ See also [`split`](@ref).
401+
402+ # Examples
403+ ```jldoctest
404+ julia> a = "Ma.rch"
405+ "Ma.rch"
406+
407+ julia> collect(eachsplit(a, "."))
408+ 2-element Vector{SubString}:
409+ "Ma"
410+ "rch"
411+ ```
412+ """
413+ function eachsplit end
414+
415+ # Forcing specialization on `splitter` improves performance (roughly 30% decrease in runtime)
416+ # and prevents a major invalidation risk (1550 MethodInstances)
417+ struct SplitIterator{S<: AbstractString ,F}
418+ str:: S
419+ splitter:: F
420+ limit:: Int
421+ keepempty:: Bool
422+ end
423+
424+ eltype (:: Type{<:SplitIterator} ) = SubString
425+
426+ IteratorSize (:: Type{<:SplitIterator} ) = SizeUnknown ()
427+
428+ # i: the starting index of the substring to be extracted
429+ # k: the starting index of the next substring to be extracted
430+ # n: the number of splits returned so far; always less than iter.limit - 1 (1 for the rest)
431+ function iterate (iter:: SplitIterator , (i, k, n)= (firstindex (iter. str), firstindex (iter. str), 0 ))
432+ i - 1 > ncodeunits (iter. str):: Int && return nothing
433+ r = findnext (iter. splitter, iter. str, k):: Union{Nothing,Int,UnitRange{Int}}
434+ while r != = nothing && n != iter. limit - 1 && first (r) <= ncodeunits (iter. str)
435+ j, k = first (r), nextind (iter. str, last (r)):: Int
436+ k_ = k <= j ? nextind (iter. str, j) : k
437+ if i < k
438+ substr = @inbounds SubString (iter. str, i, prevind (iter. str, j):: Int )
439+ (iter. keepempty || i < j) && return (substr, (k, k_, n + 1 ))
440+ i = k
441+ end
442+ k = k_
443+ r = findnext (iter. splitter, iter. str, k):: Union{Nothing,Int,UnitRange{Int}}
444+ end
445+ iter. keepempty || i <= ncodeunits (iter. str) || return nothing
446+ @inbounds SubString (iter. str, i), (ncodeunits (iter. str) + 2 , k, n + 1 )
447+ end
448+
449+ eachsplit (str:: T , splitter; limit:: Integer = 0 , keepempty:: Bool = true ) where {T<: AbstractString } =
450+ SplitIterator (str, splitter, limit, keepempty)
451+
452+ eachsplit (str:: T , splitter:: Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}} ;
453+ limit:: Integer = 0 , keepempty= true ) where {T<: AbstractString } =
454+ eachsplit (str, in (splitter); limit, keepempty)
455+
456+ eachsplit (str:: T , splitter:: AbstractChar ; limit:: Integer = 0 , keepempty= true ) where {T<: AbstractString } =
457+ eachsplit (str, isequal (splitter); limit, keepempty)
458+
459+ # a bit oddball, but standard behavior in Perl, Ruby & Python:
460+ eachsplit (str:: AbstractString ; limit:: Integer = 0 , keepempty= false ) =
461+ eachsplit (str, isspace; limit, keepempty)
462+
386463"""
387464 split(str::AbstractString, dlm; limit::Integer=0, keepempty::Bool=true)
388465 split(str::AbstractString; limit::Integer=0, keepempty::Bool=false)
@@ -412,52 +489,16 @@ julia> split(a, ".")
412489 "rch"
413490```
414491"""
415- function split end
416-
417492function split (str:: T , splitter;
418493 limit:: Integer = 0 , keepempty:: Bool = true ) where {T<: AbstractString }
419- _split (str, splitter, limit, keepempty, T <: SubString ? T[] : SubString{T}[])
420- end
421- function split (str:: T , splitter:: Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}} ;
422- limit:: Integer = 0 , keepempty:: Bool = true ) where {T<: AbstractString }
423- _split (str, in (splitter), limit, keepempty, T <: SubString ? T[] : SubString{T}[])
424- end
425- function split (str:: T , splitter:: AbstractChar ;
426- limit:: Integer = 0 , keepempty:: Bool = true ) where {T<: AbstractString }
427- _split (str, isequal (splitter), limit, keepempty, T <: SubString ? T[] : SubString{T}[])
428- end
429-
430- function _split (str:: AbstractString , splitter:: F , limit:: Integer , keepempty:: Bool , strs:: Vector ) where F
431- # Forcing specialization on `splitter` improves performance (roughly 30% decrease in runtime)
432- # and prevents a major invalidation risk (1550 MethodInstances)
433- i = 1 # firstindex(str)
434- n = lastindex (str):: Int
435- r = findfirst (splitter,str):: Union{Nothing,Int,UnitRange{Int}}
436- if r != = nothing
437- j, k = first (r), nextind (str,last (r)):: Int
438- while 0 < j <= n && length (strs) != limit- 1
439- if i < k
440- if keepempty || i < j
441- push! (strs, @inbounds SubString (str,i,prevind (str,j):: Int ))
442- end
443- i = k
444- end
445- (k <= j) && (k = nextind (str,j):: Int )
446- r = findnext (splitter,str,k):: Union{Nothing,Int,UnitRange{Int}}
447- r === nothing && break
448- j, k = first (r), nextind (str,last (r)):: Int
449- end
450- end
451- if keepempty || i <= ncodeunits (str):: Int
452- push! (strs, @inbounds SubString (str,i))
453- end
454- return strs
494+ itr = eachsplit (str, splitter; limit, keepempty)
495+ collect (T <: SubString ? T : SubString{T}, itr)
455496end
456497
457498# a bit oddball, but standard behavior in Perl, Ruby & Python:
458499split (str:: AbstractString ;
459500 limit:: Integer = 0 , keepempty:: Bool = false ) =
460- split (str, isspace; limit= limit, keepempty = keepempty)
501+ split (str, isspace; limit, keepempty)
461502
462503"""
463504 rsplit(s::AbstractString; limit::Integer=0, keepempty::Bool=false)
0 commit comments