Skip to content

Commit 63a2033

Browse files
author
Dave Abrahams
committed
[stdlib] UnicodeDecoders: separate EncodedScalar from Buffer
New code I'm writing doesn't seem to make sense with the two things collapsed.
1 parent 932e7d3 commit 63a2033

File tree

1 file changed

+22
-15
lines changed

1 file changed

+22
-15
lines changed

test/Prototypes/UnicodeDecoders.swift

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -243,17 +243,20 @@ extension Unicode {
243243

244244
public protocol UnicodeDecoder {
245245
associatedtype CodeUnit : UnsignedInteger, FixedWidthInteger
246-
associatedtype Buffer : Collection where Buffer.Iterator.Element == CodeUnit
246+
associatedtype Buffer : Collection
247+
where Buffer.Iterator.Element == CodeUnit
248+
associatedtype EncodedScalar : Collection
249+
where EncodedScalar.Iterator.Element == CodeUnit
247250

248251
init()
249252

250253
var buffer: Buffer { get }
251254

252255
mutating func parseOne<I : IteratorProtocol>(
253256
_ input: inout I
254-
) -> Unicode.ParseResult<Buffer> where I.Element == CodeUnit
257+
) -> Unicode.ParseResult<EncodedScalar> where I.Element == CodeUnit
255258

256-
static func decodeOne(_ content: Buffer) -> UnicodeScalar
259+
static func decodeOne(_ content: EncodedScalar) -> UnicodeScalar
257260
}
258261

259262
extension UnicodeDecoder {
@@ -294,22 +297,23 @@ public protocol UnicodeEncoding {
294297
}
295298

296299

297-
public protocol _UTF8Decoder : UnicodeDecoder {
300+
public protocol _UTF8Decoder : UnicodeDecoder where Buffer == EncodedScalar {
298301
func _parseNonASCII() -> (isValid: Bool, bitCount: UInt8)
299302
var buffer: Buffer { get set }
300303
}
301304

302305
extension _UTF8Decoder where Buffer == _UIntBuffer<UInt32, UInt8> {
303306
public mutating func parseOne<I : IteratorProtocol>(
304307
_ input: inout I
305-
) -> Unicode.ParseResult<Buffer> where I.Element == Unicode.UTF8.CodeUnit {
308+
) -> Unicode.ParseResult<EncodedScalar>
309+
where I.Element == Unicode.UTF8.CodeUnit {
306310

307311
// Bufferless ASCII fastpath.
308312
if _fastPath(buffer.isEmpty) {
309313
guard let codeUnit = input.next() else { return .emptyInput }
310314
// ASCII, return immediately.
311315
if codeUnit & 0x80 == 0 {
312-
return .valid(Buffer(containing: codeUnit))
316+
return .valid(EncodedScalar(containing: codeUnit))
313317
}
314318
// Non-ASCII, proceed to buffering mode.
315319
buffer.append(codeUnit)
@@ -318,7 +322,7 @@ extension _UTF8Decoder where Buffer == _UIntBuffer<UInt32, UInt8> {
318322
// to bufferless mode once we've exhausted it.
319323
let codeUnit = UInt8(extendingOrTruncating: buffer._storage)
320324
buffer.remove(at: buffer.startIndex)
321-
return .valid(Buffer(containing: codeUnit))
325+
return .valid(EncodedScalar(containing: codeUnit))
322326
}
323327
// Buffering mode.
324328
// Fill buffer back to 4 bytes (or as many as are left in the iterator).
@@ -355,11 +359,14 @@ extension _UTF8Decoder where Buffer == _UIntBuffer<UInt32, UInt8> {
355359
extension Unicode.UTF8 : UnicodeEncoding {
356360
public struct ForwardDecoder {
357361
public typealias Buffer = _UIntBuffer<UInt32, UInt8>
362+
public typealias EncodedScalar = Buffer
358363
public init() { buffer = Buffer() }
359364
public var buffer: Buffer
360365
}
366+
361367
public struct ReverseDecoder {
362368
public typealias Buffer = _UIntBuffer<UInt32, UInt8>
369+
public typealias EncodedScalar = Buffer
363370
public init() { buffer = Buffer() }
364371
public var buffer: Buffer
365372
}
@@ -368,9 +375,9 @@ extension Unicode.UTF8 : UnicodeEncoding {
368375
extension UTF8.ReverseDecoder : _UTF8Decoder {
369376
public typealias CodeUnit = UInt8
370377

371-
public static func decodeOne(_ encodedScalar: Buffer) -> UnicodeScalar {
372-
let bits = encodedScalar._storage
373-
switch encodedScalar._bitCount {
378+
public static func decodeOne(_ source: EncodedScalar) -> UnicodeScalar {
379+
let bits = source._storage
380+
switch source._bitCount {
374381
case 8: return UnicodeScalar(_unchecked: bits)
375382
case 16:
376383
var value = bits & 0b0______________________11_1111
@@ -382,7 +389,7 @@ extension UTF8.ReverseDecoder : _UTF8Decoder {
382389
value |= bits &>> 4 & 0b0_________1111_0000__0000_0000
383390
return UnicodeScalar(_unchecked: value)
384391
default:
385-
_sanityCheck(encodedScalar._bitCount == 32)
392+
_sanityCheck(source._bitCount == 32)
386393
var value = bits & 0b0______________________11_1111
387394
value |= bits &>> 2 & 0b0______________1111__1100_0000
388395
value |= bits &>> 4 & 0b0_____11__1111_0000__0000_0000
@@ -515,9 +522,9 @@ extension Unicode.UTF8.ForwardDecoder : _UTF8Decoder {
515522
return 1
516523
}
517524

518-
public static func decodeOne(_ encodedScalar: Buffer) -> UnicodeScalar {
519-
let bits = encodedScalar._storage
520-
switch encodedScalar._bitCount {
525+
public static func decodeOne(_ source: EncodedScalar) -> UnicodeScalar {
526+
let bits = source._storage
527+
switch source._bitCount {
521528
case 8:
522529
return UnicodeScalar(_unchecked: bits)
523530
case 16:
@@ -530,7 +537,7 @@ extension Unicode.UTF8.ForwardDecoder : _UTF8Decoder {
530537
value |= (bits & 0b0________________________________0000_1111) &<< 12
531538
return UnicodeScalar(_unchecked: value)
532539
default:
533-
_sanityCheck(encodedScalar.count == 4)
540+
_sanityCheck(source.count == 4)
534541
var value = (bits & 0b0_11_1111__0000_0000__0000_0000__0000_0000) &>> 24
535542
value |= (bits & 0b0____________11_1111__0000_0000__0000_0000) &>> 10
536543
value |= (bits & 0b0_______________________11_1111__0000_0000) &<< 4

0 commit comments

Comments
 (0)