Skip to content

Commit 1f0c6fa

Browse files
various String performance tweaks
- {next,getindex}_continued don't need to re-check bounds - short-circuiting is slightly faster in length
1 parent a7face9 commit 1f0c6fa

File tree

1 file changed

+15
-16
lines changed

1 file changed

+15
-16
lines changed

base/strings/string.jl

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -150,12 +150,10 @@ is_valid_continuation(c) = c & 0xc0 == 0x80
150150

151151
## required core functionality ##
152152

153-
function next(s::String, i::Int)
154-
@boundscheck checkbounds(s, i)
155-
@inbounds b = codeunit(s, i)
156-
# TODO: check index validity
153+
@propagate_inbounds function next(s::String, i::Int)
154+
b = codeunit(s, i)
157155
u = UInt32(b) << 24
158-
(b < 0x80) | (0xf8 b) && return reinterpret(Char, u), i+1
156+
between(b, 0x80, 0xf7) || return reinterpret(Char, u), i+1
159157
return next_continued(s, i, u)
160158
end
161159

@@ -187,29 +185,30 @@ end
187185
@propagate_inbounds function getindex(s::String, i::Int)
188186
b = codeunit(s, i)
189187
u = UInt32(b) << 24
190-
(b < 0x80) | (0xf8 b) && return reinterpret(Char, u)
188+
between(b, 0x80, 0xf7) || return reinterpret(Char, u)
191189
return getindex_continued(s, i, u)
192190
end
193191

194-
@noinline function getindex_continued(s::String, i::Int, u::UInt32)
192+
function getindex_continued(s::String, i::Int, u::UInt32)
195193
if u < 0xc0000000
196-
isvalid(s, i) && @goto ret
194+
# called from `getindex` which checks bounds
195+
@inbounds isvalid(s, i) && @goto ret
197196
string_index_err(s, i)
198197
end
199198
n = ncodeunits(s)
200-
# first continuation byte
199+
201200
(i += 1) > n && @goto ret
202-
@inbounds b = codeunit(s, i)
201+
@inbounds b = codeunit(s, i) # cont byte 1
203202
b & 0xc0 == 0x80 || @goto ret
204203
u |= UInt32(b) << 16
205-
# second continuation byte
204+
206205
((i += 1) > n) | (u < 0xe0000000) && @goto ret
207-
@inbounds b = codeunit(s, i)
206+
@inbounds b = codeunit(s, i) # cont byte 2
208207
b & 0xc0 == 0x80 || @goto ret
209208
u |= UInt32(b) << 8
210-
# third continuation byte
209+
211210
((i += 1) > n) | (u < 0xf0000000) && @goto ret
212-
@inbounds b = codeunit(s, i)
211+
@inbounds b = codeunit(s, i) # cont byte 3
213212
b & 0xc0 == 0x80 || @goto ret
214213
u |= UInt32(b)
215214
@label ret
@@ -252,13 +251,13 @@ end
252251

253252
length(s::String) = _length(s, 1, ncodeunits(s), ncodeunits(s))
254253

255-
function _length(s::String, i::Int, n::Int, c::Int)
254+
@inline function _length(s::String, i::Int, n::Int, c::Int)
256255
i < n || return c
257256
@inbounds b = codeunit(s, i)
258257
@inbounds while true
259258
while true
260259
(i += 1)  n || return c
261-
between(b, 0xc0, 0xf7) && break
260+
0xc0  b  0xf7 && break
262261
b = codeunit(s, i)
263262
end
264263
l = b

0 commit comments

Comments
 (0)