@@ -88,14 +88,16 @@ function unsafe_checkstring(dat::AbstractVector{UInt8},
88
88
flags:: UInt = 0
89
89
totalchar = num2byte = num3byte = num4byte = 0
90
90
@inbounds while pos <= endpos
91
- ch, pos = next (dat, pos)
91
+ ch = dat[pos]
92
+ pos += 1
92
93
totalchar += 1
93
94
if ch > 0x7f
94
95
# Check UTF-8 encoding
95
96
if ch < 0xe0
96
97
# 2-byte UTF-8 sequence (i.e. characters 0x80-0x7ff)
97
98
(pos > endpos) && throw (UnicodeError (UTF_ERR_SHORT, pos, ch))
98
- byt, pos = next (dat, pos)
99
+ byt = dat[pos]
100
+ pos += 1
99
101
ch = get_continuation (ch & 0x3f , byt, pos)
100
102
if ch > 0x7f
101
103
num2byte += 1
@@ -110,20 +112,25 @@ function unsafe_checkstring(dat::AbstractVector{UInt8},
110
112
elseif ch < 0xf0
111
113
# 3-byte UTF-8 sequence (i.e. characters 0x800-0xffff)
112
114
(pos + 1 > endpos) && throw (UnicodeError (UTF_ERR_SHORT, pos, ch))
113
- byt, pos = next (dat, pos)
115
+ byt = dat[pos]
116
+ pos += 1
114
117
ch = get_continuation (ch & 0x0f , byt, pos)
115
- byt, pos = next (dat, pos)
118
+ byt = dat[pos]
119
+ pos += 1
116
120
ch = get_continuation (ch, byt, pos)
117
121
# check for surrogate pairs, make sure correct
118
122
if is_surrogate_codeunit (ch)
119
123
! is_surrogate_lead (ch) && throw (UnicodeError (UTF_ERR_NOT_LEAD, pos- 2 , ch))
120
124
# next character *must* be a trailing surrogate character
121
125
(pos + 2 > endpos) && throw (UnicodeError (UTF_ERR_MISSING_SURROGATE, pos- 2 , ch))
122
- byt, pos = next (dat, pos)
126
+ byt = dat[pos]
127
+ pos += 1
123
128
(byt != 0xed ) && throw (UnicodeError (UTF_ERR_NOT_TRAIL, pos, byt))
124
- byt, pos = next (dat, pos)
129
+ byt = dat[pos]
130
+ pos += 1
125
131
surr = get_continuation (0x0000d , byt, pos)
126
- byt, pos = next (dat, pos)
132
+ byt = dat[pos]
133
+ pos += 1
127
134
surr = get_continuation (surr, byt, pos)
128
135
! is_surrogate_trail (surr) && throw (UnicodeError (UTF_ERR_NOT_TRAIL, pos- 2 , surr))
129
136
! accept_surrogates && throw (UnicodeError (UTF_ERR_SURROGATE, pos- 2 , surr))
@@ -140,11 +147,14 @@ function unsafe_checkstring(dat::AbstractVector{UInt8},
140
147
elseif ch < 0xf5
141
148
# 4-byte UTF-8 sequence (i.e. characters > 0xffff)
142
149
(pos + 2 > endpos) && throw (UnicodeError (UTF_ERR_SHORT, pos, ch))
143
- byt, pos = next (dat, pos)
150
+ byt = dat[pos]
151
+ pos += 1
144
152
ch = get_continuation (ch & 0x07 , byt, pos)
145
- byt, pos = next (dat, pos)
153
+ byt = dat[pos]
154
+ pos += 1
146
155
ch = get_continuation (ch, byt, pos)
147
- byt, pos = next (dat, pos)
156
+ byt = dat[pos]
157
+ pos += 1
148
158
ch = get_continuation (ch, byt, pos)
149
159
if ch > 0x10ffff
150
160
throw (UnicodeError (UTF_ERR_INVALID, pos- 3 , ch))
@@ -187,7 +197,8 @@ function unsafe_checkstring(
187
197
flags:: UInt = 0
188
198
totalchar = num2byte = num3byte = num4byte = 0
189
199
@inbounds while pos <= endpos
190
- ch, pos = next (dat, pos)
200
+ ch = dat[pos]
201
+ pos = nextind (dat, pos)
191
202
totalchar += 1
192
203
if ch > 0x7f
193
204
if ch < 0x100
@@ -204,7 +215,8 @@ function unsafe_checkstring(
204
215
elseif is_surrogate_lead (ch)
205
216
pos > endpos && throw (UnicodeError (UTF_ERR_MISSING_SURROGATE, pos, ch))
206
217
# next character *must* be a trailing surrogate character
207
- ch, pos = next (dat, pos)
218
+ ch = dat[pos]
219
+ pos = nextind (dat, pos)
208
220
! is_surrogate_trail (ch) && throw (UnicodeError (UTF_ERR_NOT_TRAIL, pos, ch))
209
221
num4byte += 1
210
222
if ! (typeof (dat) <: AbstractVector{UInt16} )
0 commit comments