Skip to content

Commit 5ad4ef4

Browse files
author
Dave Abrahams
committed
[stdlib] UnicodeDecoders: bidirectional UnicodeScalar view
Reverse iteration over a collection is significantly slower than forward. REVERSE_COLLECTION user 0m4.609s user 0m4.587s user 0m4.585s COLLECTION user 0m3.423s user 0m3.517s user 0m3.492s
1 parent 16e42d3 commit 5ad4ef4

File tree

2 files changed

+98
-25
lines changed

2 files changed

+98
-25
lines changed

stdlib/public/core/Collection.swift

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,14 @@ public struct IndexingIterator<
386386
self._position = _elements.startIndex
387387
}
388388

389+
@_inlineable
390+
/// Creates an iterator over the given collection.
391+
public /// @testable
392+
init(_elements: Elements, _position: Elements.Index) {
393+
self._elements = _elements
394+
self._position = _position
395+
}
396+
389397
/// Advances to the next element and returns it, or `nil` if no next element
390398
/// exists.
391399
///

test/Prototypes/UnicodeDecoders.swift

Lines changed: 90 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
// The BASELINE timings come from the existing standard library Codecs
1818

1919
/*
20-
for x in BASELINE FORWARD REVERSE SEQUENCE COLLECTION ; do
20+
for x in BASELINE FORWARD REVERSE SEQUENCE COLLECTION REVERSE_COLLECTION ; do
2121
echo $x
2222
swiftc -DBENCHMARK -D$x -O -swift-version 4 UnicodeDecoders.swift -o /tmp/u3-$x
2323
for i in {1..3}; do
@@ -326,6 +326,11 @@ extension Unicode {
326326
Encoding: UnicodeEncoding
327327
> where CodeUnits.Iterator.Element == Encoding.CodeUnit {
328328
var codeUnits: CodeUnits
329+
init(
330+
_ codeUnits: CodeUnits,
331+
fromEncoding _: Encoding.Type = Encoding.self) {
332+
self.codeUnits = codeUnits
333+
}
329334
}
330335
}
331336

@@ -353,27 +358,23 @@ extension Unicode.DefaultScalarView.Iterator : IteratorProtocol, Sequence {
353358
}
354359
}
355360

356-
extension Unicode {
357-
enum IndexImpl<E: UnicodeEncoding> {
358-
case forward(E.ForwardDecoder, E.ForwardDecoder.EncodedScalar)
359-
case reverse(E.ReverseDecoder, E.ReverseDecoder.EncodedScalar)
360-
}
361-
}
362361
extension Unicode.DefaultScalarView {
363362
struct Index {
364363
var codeUnitIndex: CodeUnits.Index
365364
}
366365
}
367366

368367
extension Unicode.DefaultScalarView.Index : Comparable {
369-
static func < (
368+
@inline(__always)
369+
public static func < (
370370
lhs: Unicode.DefaultScalarView<CodeUnits,Encoding>.Index,
371371
rhs: Unicode.DefaultScalarView<CodeUnits,Encoding>.Index
372372
) -> Bool {
373373
return lhs.codeUnitIndex < rhs.codeUnitIndex
374374
}
375375

376-
static func == (
376+
@inline(__always)
377+
public static func == (
377378
lhs: Unicode.DefaultScalarView<CodeUnits,Encoding>.Index,
378379
rhs: Unicode.DefaultScalarView<CodeUnits,Encoding>.Index
379380
) -> Bool {
@@ -382,41 +383,97 @@ extension Unicode.DefaultScalarView.Index : Comparable {
382383
}
383384

384385
extension Unicode.DefaultScalarView : Collection {
385-
var startIndex: Index {
386+
public var startIndex: Index {
386387
return Index(codeUnitIndex: codeUnits.startIndex)
387388
}
388389

389-
var endIndex: Index {
390+
public var endIndex: Index {
390391
return Index(codeUnitIndex: codeUnits.endIndex)
391392
}
392393

393-
subscript(i: Index) -> UnicodeScalar {
394+
public subscript(i: Index) -> UnicodeScalar {
395+
@inline(__always)
396+
get {
397+
var d = Encoding.ForwardDecoder()
398+
var input = codeUnits[i.codeUnitIndex..<codeUnits.endIndex].makeIterator()
399+
switch d.parseOne(&input) {
400+
case .valid(let scalarContent):
401+
return Encoding.ForwardDecoder.decodeOne(scalarContent)
402+
case .invalid:
403+
return UnicodeScalar(_unchecked: 0xFFFD)
404+
case .emptyInput:
405+
fatalError("subscripting at endIndex")
406+
}
407+
}
408+
}
409+
410+
@inline(__always)
411+
public func index(after i: Index) -> Index {
394412
var d = Encoding.ForwardDecoder()
395413
var input = codeUnits[i.codeUnitIndex..<codeUnits.endIndex].makeIterator()
396414
switch d.parseOne(&input) {
397415
case .valid(let scalarContent):
398-
return Encoding.ForwardDecoder.decodeOne(scalarContent)
399-
case .invalid:
400-
return UnicodeScalar(_unchecked: 0xFFFD)
416+
return Index(
417+
codeUnitIndex: codeUnits.index(
418+
i.codeUnitIndex, offsetBy: numericCast(scalarContent.count)))
419+
case .invalid(let l):
420+
return Index(
421+
codeUnitIndex: codeUnits.index(
422+
i.codeUnitIndex, offsetBy: numericCast(l)))
401423
case .emptyInput:
402-
fatalError("subscripting at endIndex")
424+
fatalError("indexing past endIndex")
403425
}
404426
}
427+
}
405428

406-
func index(after i: Index) -> Index {
407-
var d = Encoding.ForwardDecoder()
408-
var input = codeUnits[i.codeUnitIndex..<codeUnits.endIndex].makeIterator()
429+
// This should go in the standard library; see
430+
// https://github.com/apple/swift/pull/9074 and
431+
// https://bugs.swift.org/browse/SR-4721
432+
@_fixed_layout
433+
public struct ReverseIndexingIterator<
434+
Elements : BidirectionalCollection
435+
> : IteratorProtocol, Sequence {
436+
437+
@_inlineable
438+
@inline(__always)
439+
/// Creates an iterator over the given collection.
440+
public /// @testable
441+
init(_elements: Elements, _position: Elements.Index) {
442+
self._elements = _elements
443+
self._position = _position
444+
}
445+
446+
@_inlineable
447+
@inline(__always)
448+
public mutating func next() -> Elements._Element? {
449+
guard _fastPath(_position != _elements.startIndex) else { return nil }
450+
_position = _elements.index(before: _position)
451+
return _elements[_position]
452+
}
453+
454+
@_versioned
455+
internal let _elements: Elements
456+
@_versioned
457+
internal var _position: Elements.Index
458+
}
459+
460+
extension Unicode.DefaultScalarView : BidirectionalCollection {
461+
@inline(__always)
462+
public func index(before i: Index) -> Index {
463+
var d = Encoding.ReverseDecoder()
464+
var input = ReverseIndexingIterator(
465+
_elements: codeUnits, _position: i.codeUnitIndex)
409466
switch d.parseOne(&input) {
410467
case .valid(let scalarContent):
411468
return Index(
412469
codeUnitIndex: codeUnits.index(
413-
i.codeUnitIndex, offsetBy: numericCast(scalarContent.count)))
470+
i.codeUnitIndex, offsetBy: -numericCast(scalarContent.count)))
414471
case .invalid(let l):
415472
return Index(
416473
codeUnitIndex: codeUnits.index(
417-
i.codeUnitIndex, offsetBy: numericCast(l)))
474+
i.codeUnitIndex, offsetBy: -numericCast(l)))
418475
case .emptyInput:
419-
fatalError("advancing past endIndex")
476+
fatalError("indexing past startIndex")
420477
}
421478
}
422479
}
@@ -793,8 +850,9 @@ func checkDecodeUTF8(
793850
}
794851
}
795852

796-
let scalars = Unicode.DefaultScalarView<[UInt8], UTF8>(codeUnits: utf8Str)
853+
let scalars = Unicode.DefaultScalarView(utf8Str, fromEncoding: UTF8.self)
797854
expectEqualSequence(expected, scalars)
855+
expectEqualSequence(expected.reversed(), scalars.reversed())
798856

799857
do {
800858
var x = scalars.makeIterator()
@@ -2435,16 +2493,23 @@ public func run_UTF8Decode(_ N: Int) {
24352493
typealias D = UTF8.ReverseDecoder
24362494
D.decode(&it, repairingIllFormedSequences: true) { total = total &+ $0.value }
24372495
#elseif SEQUENCE
2438-
for s in Unicode.DefaultScalarView<[UInt8], UTF8>(codeUnits: string) {
2496+
for s in Unicode.DefaultScalarView(string, fromEncoding: UTF8.self) {
24392497
total = total &+ s.value
24402498
}
24412499
#elseif COLLECTION
2442-
let scalars = Unicode.DefaultScalarView<[UInt8], UTF8>(codeUnits: string)
2500+
let scalars = Unicode.DefaultScalarView(string, fromEncoding: UTF8.self)
24432501
var i = scalars.startIndex
24442502
while i != scalars.endIndex {
24452503
total = total &+ scalars[i].value
24462504
i = scalars.index(after: i)
24472505
}
2506+
#elseif REVERSE_COLLECTION
2507+
let scalars = Unicode.DefaultScalarView(string, fromEncoding: UTF8.self)
2508+
var i = scalars.endIndex
2509+
while i != scalars.startIndex {
2510+
i = scalars.index(before: i)
2511+
total = total &+ scalars[i].value
2512+
}
24482513
#else
24492514
Error_Unknown_Benchmark()
24502515
#endif

0 commit comments

Comments
 (0)