Skip to content

Commit 16e42d3

Browse files
author
Dave Abrahams
committed
[stdlib] UnicodeDecoders: dumber is faster
A really simpleminded Collection view is faster by far than trying to cache all that information. I believe we could do even better by storing a simpler cache in the indices of the view, but performance is already in decent shape, so we can put off further optimization: BASELINE user 0m2.864s user 0m2.775s user 0m2.763s SEQUENCE user 0m2.632s user 0m2.571s user 0m2.553s COLLECTION user 0m3.553s user 0m3.567s user 0m3.475s
1 parent 367a8a3 commit 16e42d3

File tree

1 file changed

+24
-63
lines changed

1 file changed

+24
-63
lines changed

test/Prototypes/UnicodeDecoders.swift

Lines changed: 24 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -361,8 +361,6 @@ extension Unicode {
361361
}
362362
extension Unicode.DefaultScalarView {
363363
struct Index {
364-
var parsedLength: UInt8
365-
var impl: Unicode.IndexImpl<Encoding>
366364
var codeUnitIndex: CodeUnits.Index
367365
}
368366
}
@@ -384,78 +382,41 @@ extension Unicode.DefaultScalarView.Index : Comparable {
384382
}
385383

386384
extension Unicode.DefaultScalarView : Collection {
387-
func _forwardIndex(atCodeUnit i: CodeUnits.Index) -> Index {
388-
return index(
389-
after: Index(
390-
parsedLength: 0,
391-
impl: .forward(
392-
Encoding.ForwardDecoder(),
393-
Encoding.ForwardDecoder.replacement),
394-
codeUnitIndex: i
395-
))
396-
}
397-
398385
var startIndex: Index {
399-
return codeUnits.isEmpty ? endIndex
400-
: _forwardIndex(atCodeUnit: codeUnits.startIndex)
386+
return Index(codeUnitIndex: codeUnits.startIndex)
401387
}
402388

403389
var endIndex: Index {
404-
return Index(
405-
parsedLength: 0,
406-
impl: .reverse(
407-
Encoding.ReverseDecoder(),
408-
Encoding.ReverseDecoder.replacement),
409-
codeUnitIndex: codeUnits.endIndex
410-
)
390+
return Index(codeUnitIndex: codeUnits.endIndex)
411391
}
412392

413393
subscript(i: Index) -> UnicodeScalar {
414-
switch i.impl {
415-
case .forward(_, let s):
416-
return Encoding.ForwardDecoder.decodeOne(s)
417-
case .reverse(_, let s):
418-
return Encoding.ReverseDecoder.decodeOne(s)
394+
var d = Encoding.ForwardDecoder()
395+
var input = codeUnits[i.codeUnitIndex..<codeUnits.endIndex].makeIterator()
396+
switch d.parseOne(&input) {
397+
case .valid(let scalarContent):
398+
return Encoding.ForwardDecoder.decodeOne(scalarContent)
399+
case .invalid:
400+
return UnicodeScalar(_unchecked: 0xFFFD)
401+
case .emptyInput:
402+
fatalError("subscripting at endIndex")
419403
}
420404
}
421405

422406
func index(after i: Index) -> Index {
423-
switch i.impl {
424-
case .forward(var d, _):
425-
let stride = i.parsedLength
426-
427-
// position of the code unit after the last one we've processed
428-
let i0 = codeUnits.index(
429-
i.codeUnitIndex,
430-
offsetBy: CodeUnits.IndexDistance(d.buffer.count) + numericCast(stride))
431-
432-
var tail = codeUnits[i0..<codeUnits.endIndex].makeIterator()
433-
switch d.parseOne(&tail) {
434-
435-
case .valid(let s):
436-
return Index(
437-
parsedLength: UInt8(extendingOrTruncating: s.count),
438-
impl: .forward(d, s),
439-
codeUnitIndex:
440-
codeUnits.index(i.codeUnitIndex, offsetBy: numericCast(stride)))
441-
442-
case .invalid(let l):
443-
return Index(
444-
parsedLength: UInt8(extendingOrTruncating: l),
445-
impl: .forward(d, Encoding.ForwardDecoder.replacement),
446-
codeUnitIndex:
447-
codeUnits.index(i.codeUnitIndex, offsetBy: numericCast(stride)))
448-
449-
case .emptyInput:
450-
return endIndex
451-
}
452-
453-
case .reverse(_,_):
454-
fatalError("implement me")
455-
// The following has the right semantics but kills inlining. Needs a
456-
// refactor to be right.
457-
//
458-
// return index(after: _forwardIndex(atCodeUnit: i.codeUnitIndex))
407+
var d = Encoding.ForwardDecoder()
408+
var input = codeUnits[i.codeUnitIndex..<codeUnits.endIndex].makeIterator()
409+
switch d.parseOne(&input) {
410+
case .valid(let scalarContent):
411+
return Index(
412+
codeUnitIndex: codeUnits.index(
413+
i.codeUnitIndex, offsetBy: numericCast(scalarContent.count)))
414+
case .invalid(let l):
415+
return Index(
416+
codeUnitIndex: codeUnits.index(
417+
i.codeUnitIndex, offsetBy: numericCast(l)))
418+
case .emptyInput:
419+
fatalError("advancing past endIndex")
459420
}
460421
}
461422
}

0 commit comments

Comments
 (0)