Skip to content

Commit 937c3ad

Browse files
isvalid: return false out of bounds instead of throwing
also: `next` can assume that incoming indices are valid
1 parent 6f10ca2 commit 937c3ad

File tree

5 files changed

+26
-39
lines changed

5 files changed

+26
-39
lines changed

base/strings/basic.jl

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -87,14 +87,13 @@ See also: [`ncodeunits`](@ref), [`checkbounds`](@ref)
8787
"""
8888
isvalid(s::AbstractString, i::Integer) -> Bool
8989
90-
Predicate indicating whether the given index is the start of the encoding of
91-
a character in `s` or not. If `isvalid(s, i)` is true then `s[i]` will return
92-
the character whose encoding starts at that index, if it's false, then `s[i]`
93-
will raise an invalid index error. Behavior of `next(s, i)` is similar except
94-
that the character is returned along with the index of the following character.
95-
In order for `isvalid(s, i)` to be an O(1) function, the encoding of `s` must
96-
be [self-synchronizing](https://en.wikipedia.org/wiki/Self-synchronizing_code);
97-
this is a basic assumption of Julia's generic string support.
90+
Predicate indicating whether the given index is the start of the encoding of a
91+
character in `s` or not. If `isvalid(s, i)` is true then `s[i]` will return the
92+
character whose encoding starts at that index, if it's false, then `s[i]` will
93+
raise an invalid index error or a bounds error depending on if `i` is in bounds.
94+
In order for `isvalid(s, i)` to be an O(1) function, the encoding of `s` must be
95+
[self-synchronizing](https://en.wikipedia.org/wiki/Self-synchronizing_code) this
96+
is a basic assumption of Julia's generic string support.
9897
9998
See also: [`getindex`](@ref), [`next`](@ref), [`thisind`](@ref),
10099
[`nextind`](@ref), [`prevind`](@ref), [`length`](@ref)
@@ -128,8 +127,8 @@ Stacktrace:
128127
Return a tuple of the character in `s` at index `i` with the index of the start
129128
of the following character in `s`. This is the key method that allows strings to
130129
be iterated, yielding a sequences of characters. If `i` is out of bounds in `s`
131-
then a bounds error is raised; if `i` is not a valid character index in `s` then
132-
a Unicode index error is raised.
130+
then a bounds error is raised. The `next` function, as part of the iteration
131+
protocoal may assume that `i` is the start of a character in `s`.
133132
134133
See also: [`getindex`](@ref), [`start`](@ref), [`done`](@ref),
135134
[`checkbounds`](@ref)
@@ -145,7 +144,11 @@ eltype(::Type{<:AbstractString}) = Char
145144
sizeof(s::AbstractString) = ncodeunits(s) * sizeof(codeunit(s))
146145
endof(s::AbstractString) = thisind(s, ncodeunits(s))
147146

148-
getindex(s::AbstractString, i::Integer) = next(s, i)[1]
147+
function getindex(s::AbstractString, i::Integer)
148+
@boundscheck checkbounds(s, i)
149+
@inbounds return isvalid(s, i) ? next(s, i)[1] : string_index_err(s, i)
150+
end
151+
149152
getindex(s::AbstractString, i::Colon) = s
150153
# TODO: handle other ranges with stride ±1 specially?
151154
# TODO: add more @propagate_inbounds annotations?

base/strings/search.jl

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@ function search(s::String, c::Char, i::Integer = 1)
55
i == sizeof(s) + 1 && return 0
66
throw(BoundsError(s, i))
77
end
8-
@inbounds if is_valid_continuation(codeunit(s,i))
9-
string_index_err(s, i)
10-
end
8+
@inbounds isvalid(s, i) || string_index_err(s, i)
119
c '\x7f' && return search(s, c % UInt8, i)
1210
while true
1311
i = search(s, first_utf8_byte(c), i)
@@ -94,13 +92,10 @@ julia> search("JuliaLang","Julia")
9492
```
9593
"""
9694
function search(s::AbstractString, c::Chars, i::Integer)
97-
if isempty(c)
98-
return 1 <= i <= nextind(s,endof(s)) ? i :
99-
throw(BoundsError(s, i))
100-
end
101-
if i < 1 || i > nextind(s,endof(s))
102-
throw(BoundsError(s, i))
103-
end
95+
z = ncodeunits(s) + 1
96+
isempty(c) && return 1 i z ? i : throw(BoundsError(s, i))
97+
1 i  z || throw(BoundsError(s, i))
98+
@inbounds i == z || isvalid(s, i) || string_index_err(s, i)
10499
while !done(s,i)
105100
d, j = next(s,i)
106101
if d in c

base/strings/string.jl

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -157,11 +157,8 @@ is_valid_continuation(c) = c & 0xc0 == 0x80
157157
return next_continued(s, i, u)
158158
end
159159

160-
@noinline function next_continued(s::String, i::Int, u::UInt32)
161-
if u < 0xc0000000
162-
isvalid(s, i) && (i += 1; @goto ret)
163-
string_index_err(s, i)
164-
end
160+
function next_continued(s::String, i::Int, u::UInt32)
161+
u < 0xc0000000 && (i += 1; @goto ret)
165162
n = ncodeunits(s)
166163
# first continuation byte
167164
(i += 1) > n && @goto ret
@@ -281,11 +278,7 @@ first_utf8_byte(c::Char) = (reinterpret(UInt32, c) >> 24) % UInt8
281278

282279
## overload methods for efficiency ##
283280

284-
function isvalid(s::String, i::Int)
285-
@boundscheck checkbounds(s, i)
286-
return thisind(s, i) == i
287-
end
288-
isvalid(s::String, i::Integer) = isvalid(s, Int(i))
281+
isvalid(s::String, i::Int) = checkbounds(Bool, s, i) && thisind(s, i) == i
289282

290283
## optimized concatenation, reverse, repeat ##
291284

base/strings/substring.jl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,9 @@ function getindex(s::SubString, i::Integer)
7373
end
7474

7575
function isvalid(s::SubString, i::Integer)
76-
@boundscheck checkbounds(s, i)
77-
@inbounds return isvalid(s.string, s.offset + i)
76+
ib = true
77+
@boundscheck ib = checkbounds(Bool, s, i)
78+
@inbounds return ib && isvalid(s.string, s.offset + i)
7879
end
7980

8081
thisind(s::SubString, i::Integer) = thisind(s.string, s.offset + i) - s.offset

test/strings/types.jl

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -169,12 +169,7 @@ let s = "lorem ipsum", sdict = Dict(
169169
for (ss, s) in sdict
170170
@test ncodeunits(ss) == ncodeunits(s)
171171
for i in -2:13
172-
if 1  i  ncodeunits(ss)
173-
@test isvalid(ss, i) == isvalid(s, i)
174-
else
175-
@test_throws BoundsError isvalid(ss, i)
176-
@test_throws BoundsError isvalid(s, i)
177-
end
172+
@test isvalid(ss, i) == isvalid(s, i)
178173
end
179174
for i in 1:ncodeunits(ss), j = i-1:ncodeunits(ss)
180175
@test length(ss, i, j) == length(s, i, j)

0 commit comments

Comments
 (0)