@@ -155,10 +155,6 @@ public struct UTF8 : UnicodeCodec {
155
155
/// The number of bits in `_decodeBuffer` that are current filled.
156
156
internal var _bitsInBuffer : UInt8 = 0
157
157
158
- /// Whether we have exhausted the iterator. Note that this doesn't mean
159
- /// we are done decoding, as there might still be bytes left in the buffer.
160
- internal var _didExhaustIterator : Bool = false
161
-
162
158
/// Starts or continues decoding a UTF-8 sequence.
163
159
///
164
160
/// To decode a code unit sequence completely, call this method repeatedly
@@ -202,44 +198,40 @@ public struct UTF8 : UnicodeCodec {
202
198
/// UTF sequence has been fully decoded.
203
199
public mutating func decode<
204
200
I : IteratorProtocol where I. Element == CodeUnit
205
- > ( _ next: inout I ) -> UnicodeDecodingResult {
206
-
207
- refillBuffer: if !_didExhaustIterator {
208
- // Bufferless ASCII fastpath.
209
- if _fastPath ( _bitsInBuffer == 0 ) {
210
- if let codeUnit = next. next ( ) {
211
- if codeUnit & 0x80 == 0 {
212
- return . scalarValue( UnicodeScalar ( _unchecked: UInt32 ( codeUnit) ) )
213
- }
214
- // Non-ASCII, proceed to buffering mode.
215
- _decodeBuffer = UInt32 ( codeUnit)
216
- _bitsInBuffer = 8
217
- } else {
218
- _didExhaustIterator = true
219
- return . emptyInput
220
- }
221
- } else if ( _decodeBuffer & 0x80 == 0 ) {
222
- // ASCII in buffer. We don't refill the buffer so we can return
223
- // to bufferless mode once we've exhausted it.
224
- break refillBuffer
201
+ > ( _ input: inout I ) -> UnicodeDecodingResult {
202
+
203
+ // Bufferless ASCII fastpath.
204
+ if _fastPath ( _bitsInBuffer == 0 ) {
205
+ guard let codeUnit = input. next ( ) else { return . emptyInput }
206
+ // ASCII, return immediately.
207
+ if codeUnit & 0x80 == 0 {
208
+ return . scalarValue( UnicodeScalar ( _unchecked: UInt32 ( codeUnit) ) )
225
209
}
226
- // Buffering mode.
227
- // Fill buffer back to 4 bytes (or as many as are left in the iterator).
228
- _sanityCheck ( _bitsInBuffer < 32 )
229
- repeat {
230
- if let codeUnit = next. next ( ) {
231
- // We use & 0x1f to make the compiler omit a bounds check branch.
232
- _decodeBuffer |= ( UInt32 ( codeUnit) << UInt32 ( _bitsInBuffer & 0x1f ) )
233
- _bitsInBuffer = _bitsInBuffer &+ 8
234
- } else {
235
- _didExhaustIterator = true
236
- if _bitsInBuffer == 0 { return . emptyInput }
237
- break // We still have some bytes left in our buffer.
238
- }
239
- } while _bitsInBuffer < 32
240
- } else if _bitsInBuffer == 0 {
241
- return . emptyInput
210
+ // Non-ASCII, proceed to buffering mode.
211
+ _decodeBuffer = UInt32 ( codeUnit)
212
+ _bitsInBuffer = 8
213
+ } else if ( _decodeBuffer & 0x80 == 0 ) {
214
+ // ASCII in buffer. We don't refill the buffer so we can return
215
+ // to bufferless mode once we've exhausted it.
216
+ let codeUnit = _decodeBuffer & 0xff
217
+ _decodeBuffer >>= 8
218
+ _bitsInBuffer = _bitsInBuffer &- 8
219
+ return . scalarValue( UnicodeScalar ( _unchecked: codeUnit) )
242
220
}
221
+ // Buffering mode.
222
+ // Fill buffer back to 4 bytes (or as many as are left in the iterator).
223
+ _sanityCheck ( _bitsInBuffer < 32 )
224
+ repeat {
225
+ if let codeUnit = input. next ( ) {
226
+ // We know _bitsInBuffer < 32 so we use `& 0x1f` (31) to make the
227
+ // compiler omit a bounds check branch for the bitshift.
228
+ _decodeBuffer |= ( UInt32 ( codeUnit) << UInt32 ( _bitsInBuffer & 0x1f ) )
229
+ _bitsInBuffer = _bitsInBuffer &+ 8
230
+ } else {
231
+ if _bitsInBuffer == 0 { return . emptyInput }
232
+ break // We still have some bytes left in our buffer.
233
+ }
234
+ } while _bitsInBuffer < 32
243
235
244
236
// Decode one unicode scalar.
245
237
// Note our empty bytes are always 0x00, which is required for this call.
@@ -250,16 +242,13 @@ public struct UTF8 : UnicodeCodec {
250
242
_sanityCheck ( 1 ... 4 ~= length && bitsConsumed <= _bitsInBuffer)
251
243
// Swift doesn't allow shifts greater than or equal to the type width.
252
244
// _decodeBuffer >>= UInt32(bitsConsumed) // >>= 32 crashes.
253
- // Mask with 0x3f to let the compiler omit the '>= 64' bounds check.
245
+ // Mask with 0x3f (63) to let the compiler omit the '>= 64' bounds check.
254
246
_decodeBuffer = UInt32 ( truncatingBitPattern:
255
247
UInt64 ( _decodeBuffer) >> ( UInt64 ( bitsConsumed) & 0x3f ) )
256
248
_bitsInBuffer = _bitsInBuffer &- bitsConsumed
257
249
258
- if _fastPath ( result != nil ) {
259
- return . scalarValue( UnicodeScalar ( _unchecked: result!) )
260
- } else {
261
- return . error // Ill-formed UTF-8 code unit sequence.
262
- }
250
+ guard _fastPath ( result != nil ) else { return . error }
251
+ return . scalarValue( UnicodeScalar ( _unchecked: result!) )
263
252
}
264
253
265
254
/// Attempts to decode a single UTF-8 code unit sequence starting at the LSB
0 commit comments