@@ -295,7 +295,7 @@ public protocol UnicodeEncoding {
295
295
296
296
297
297
public protocol _UTF8Decoder : UnicodeDecoder {
298
- func _parseNonASCII( ) -> ( isValid: Bool , length : UInt8 )
298
+ func _parseNonASCII( ) -> ( isValid: Bool , bitCount : UInt8 )
299
299
var buffer : Buffer { get set }
300
300
}
301
301
@@ -333,23 +333,22 @@ extension _UTF8Decoder where Buffer == _UIntBuffer<UInt32, UInt8> {
333
333
} while buffer. _bitCount < 32
334
334
335
335
// Find one unicode scalar.
336
- let ( isValid, length ) = _parseNonASCII ( )
337
- _sanityCheck ( 1 ... 4 ~= length )
338
- _sanityCheck ( length <= buffer. count )
336
+ let ( isValid, scalarBitCount ) = _parseNonASCII ( )
337
+ _sanityCheck ( scalarBitCount % 8 == 0 && 1 ... 4 ~= scalarBitCount / 8 )
338
+ _sanityCheck ( scalarBitCount <= buffer. _bitCount )
339
339
340
340
// Consume the decoded bytes (or maximal subpart of ill-formed sequence).
341
- let bitsConsumed = length << 3
342
341
var encodedScalar = buffer
343
- encodedScalar. _bitCount = bitsConsumed
342
+ encodedScalar. _bitCount = scalarBitCount
344
343
345
344
buffer. _storage = UInt32 (
346
345
// widen to 64 bits so that we can empty the buffer in the 4-byte case
347
- extendingOrTruncating: UInt64 ( buffer. _storage) &>> bitsConsumed )
346
+ extendingOrTruncating: UInt64 ( buffer. _storage) &>> scalarBitCount )
348
347
349
- buffer. _bitCount = buffer. _bitCount &- bitsConsumed
348
+ buffer. _bitCount = buffer. _bitCount &- scalarBitCount
350
349
351
350
if _fastPath ( isValid) { return . valid( encodedScalar) }
352
- return . invalid( length: Int ( length ) )
351
+ return . invalid( length: Int ( scalarBitCount &>> 3 ) )
353
352
}
354
353
}
355
354
@@ -393,13 +392,13 @@ extension UTF8.ReverseDecoder : _UTF8Decoder {
393
392
}
394
393
395
394
public // @testable
396
- func _parseNonASCII( ) -> ( isValid: Bool , length : UInt8 ) {
395
+ func _parseNonASCII( ) -> ( isValid: Bool , bitCount : UInt8 ) {
397
396
_sanityCheck ( buffer. _storage & 0x80 != 0 ) // this case handled elsewhere
398
397
if buffer. _storage & 0b0__1110_0000__1100_0000
399
398
== 0b0__1100_0000__1000_0000 {
400
399
// 2-byte sequence. Top 4 bits of decoded result must be nonzero
401
400
let top4Bits = buffer. _storage & 0b0__0001_1110__0000_0000
402
- if _fastPath ( top4Bits != 0 ) { return ( true , 2 ) }
401
+ if _fastPath ( top4Bits != 0 ) { return ( true , 2 * 8 ) }
403
402
}
404
403
else if buffer. _storage & 0b0__1111_0000__1100_0000__1100_0000
405
404
== 0b0__1110_0000__1000_0000__1000_0000 {
@@ -408,7 +407,7 @@ extension UTF8.ReverseDecoder : _UTF8Decoder {
408
407
let top5Bits = buffer. _storage & 0b0__1111__0010_0000__0000_0000
409
408
if _fastPath (
410
409
top5Bits != 0 && top5Bits != 0b0__1101__0010_0000__0000_0000 ) {
411
- return ( true , 3 )
410
+ return ( true , 3 * 8 )
412
411
}
413
412
}
414
413
else if buffer. _storage & 0b0__1111_1000__1100_0000__1100_0000__1100_0000
@@ -419,9 +418,9 @@ extension UTF8.ReverseDecoder : _UTF8Decoder {
419
418
if _fastPath (
420
419
top5bits != 0
421
420
&& top5bits <= 0b0__0100__0000_0000__0000_0000__0000_0000
422
- ) { return ( true , 4 ) }
421
+ ) { return ( true , 4 * 8 ) }
423
422
}
424
- return ( false , _invalidLength ( ) )
423
+ return ( false , _invalidLength ( ) &* 8 )
425
424
}
426
425
427
426
/// Returns the length of the invalid sequence that ends with the LSB of
@@ -460,22 +459,22 @@ extension Unicode.UTF8.ForwardDecoder : _UTF8Decoder {
460
459
public typealias CodeUnit = UInt8
461
460
462
461
public // @testable
463
- func _parseNonASCII( ) -> ( isValid: Bool , length : UInt8 ) {
462
+ func _parseNonASCII( ) -> ( isValid: Bool , bitCount : UInt8 ) {
464
463
_sanityCheck ( buffer. _storage & 0x80 != 0 ) // this case handled elsewhere
465
464
466
465
if buffer. _storage & 0b0__1100_0000__1110_0000
467
466
== 0b0__1000_0000__1100_0000 {
468
467
// 2-byte sequence. At least one of the top 4 bits of the decoded result
469
468
// must be nonzero.
470
- if _fastPath ( buffer. _storage & 0b0_0001_1110 != 0 ) { return ( true , 2 ) }
469
+ if _fastPath ( buffer. _storage & 0b0_0001_1110 != 0 ) { return ( true , 2 * 8 ) }
471
470
}
472
471
else if buffer. _storage & 0b0__1100_0000__1100_0000__1111_0000
473
472
== 0b0__1000_0000__1000_0000__1110_0000 {
474
473
// 3-byte sequence. The top 5 bits of the decoded result must be nonzero
475
474
// and not a surrogate
476
475
let top5Bits = buffer. _storage & 0b0___0010_0000__0000_1111
477
476
if _fastPath ( top5Bits != 0 && top5Bits != 0b0___0010_0000__0000_1101 ) {
478
- return ( true , 3 )
477
+ return ( true , 3 * 8 )
479
478
}
480
479
}
481
480
else if buffer. _storage & 0b0__1100_0000__1100_0000__1100_0000__1111_1000
@@ -486,9 +485,9 @@ extension Unicode.UTF8.ForwardDecoder : _UTF8Decoder {
486
485
if _fastPath (
487
486
top5bits != 0
488
487
&& top5bits. byteSwapped <= 0b0__0000_0100__0000_0000
489
- ) { return ( true , 4 ) }
488
+ ) { return ( true , 4 * 8 ) }
490
489
}
491
- return ( false , _invalidLength ( ) )
490
+ return ( false , _invalidLength ( ) &* 8 )
492
491
}
493
492
494
493
/// Returns the length of the invalid sequence that starts with the LSB of
0 commit comments