Skip to content

Commit df2d07d

Browse files
committed
Improve CSVWriter encoding performance (enhancing #29)
1 parent bef29ba commit df2d07d

File tree

3 files changed

+54
-42
lines changed

3 files changed

+54
-42
lines changed

sources/declarative/encodable/internal/SinkBuffer.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ extension ShadowEncoder.Sink.Buffer {
8282
/// The location for the first CSV field stored within this structure.
8383
var firstIndex: (row: Int, field: Int)? {
8484
guard let row = self._inverseSort.last else { return nil }
85-
guard let fieldIndex = row.value.keys.sorted().first else { fatalError() }
85+
let fieldIndex = row.value.keys.sorted().first!
8686
return (row.key, fieldIndex)
8787
}
8888

sources/imperative/writer/internal/WriterEncoder.swift

Lines changed: 52 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -5,31 +5,48 @@ internal extension CSVWriter {
55
typealias ScalarEncoder = (Unicode.Scalar) throws -> Void
66

77
/// Creates an encoder that take a `Unicode.Scalar` and store the correct byte representation on the appropriate place.
8-
/// - parameter stream: Output stream receiving the encoded data.
8+
/// - parameter outputStream: Output stream receiving the encoded data.
99
/// - parameter encoding: The string encoding being used for the external representation.
1010
/// - parameter firstBytes: Bytes to be preppended at the beggining of the stream.
1111
/// - throws: `CSVError<CSVWriter>` exclusively.
1212
/// - returns: An encoder closure writing bytes in the provided stream with the given string encoding.
13-
static func makeEncoder(from stream: OutputStream, encoding: String.Encoding, firstBytes: [UInt8]) throws -> ScalarEncoder {
14-
guard case .open = stream.streamStatus else { throw Error._unopenStream(status: stream.streamStatus, error: stream.streamError) }
13+
static func makeEncoder(from outputStream: OutputStream, encoding: String.Encoding, firstBytes: [UInt8]) throws -> ScalarEncoder {
14+
guard case .open = outputStream.streamStatus else {
15+
throw Error._unopenStream(status: outputStream.streamStatus, error: outputStream.streamError)
16+
}
17+
18+
let stream = Unmanaged<OutputStream>.passUnretained(outputStream)
1519

1620
if !firstBytes.isEmpty {
1721
try CSVWriter._streamWrite(on: stream, bytes: firstBytes, count: firstBytes.count)
1822
}
1923

2024
switch encoding {
2125
case .ascii:
22-
return { [unowned stream] (scalar) in
26+
return { (scalar) in
2327
guard var byte = Unicode.ASCII.encode(scalar)?.first else { throw Error._invalidASCII(scalar: scalar) }
2428
try CSVWriter._streamWrite(on: stream, bytes: &byte, count: 1)
2529
}
2630
case .utf8:
27-
return { [unowned stream] (scalar) in
28-
guard let bytes = Unicode.UTF8.encode(scalar) else { throw Error._invalidUTF8(scalar: scalar) }
29-
try CSVWriter._streamWrite(on: stream, bytes: Array(bytes), count: bytes.count)
31+
return { (scalar) in
32+
guard var iterator = Unicode.UTF8.encode(scalar)?.makeIterator() else { throw Error._invalidUTF8(scalar: scalar) }
33+
34+
var bytes = (iterator.next()!, UInt8.zero, UInt8.zero, UInt8.zero)
35+
var count: Int = 1
36+
37+
try withUnsafeMutableBytes(of: &bytes) {
38+
let ptr = $0.baseAddress.unsafelyUnwrapped.assumingMemoryBound(to: UInt8.self)
39+
40+
while let byte = iterator.next() {
41+
(ptr + count).pointee = byte
42+
count &+= 1
43+
}
44+
45+
try CSVWriter._streamWrite(on: stream, bytes: ptr, count: count)
46+
}
3047
}
3148
case .utf16BigEndian, .utf16, .unicode: // UTF16 & Unicode imply: follow the BOM and if it is not there, assume big endian.
32-
return { [unowned stream] (scalar) in
49+
return { (scalar) in
3350
guard let tmp = Unicode.UTF16.encode(scalar) else { throw Error._invalidUTF16(scalar: scalar) }
3451
let bytes = tmp.flatMap {
3552
[UInt8(truncatingIfNeeded: $0 >> 8),
@@ -38,7 +55,7 @@ internal extension CSVWriter {
3855
try CSVWriter._streamWrite(on: stream, bytes: bytes, count: bytes.count)
3956
}
4057
case .utf16LittleEndian:
41-
return { [unowned stream] (scalar) in
58+
return { (scalar) in
4259
guard let tmp = Unicode.UTF16.encode(scalar) else { throw Error._invalidUTF16(scalar: scalar) }
4360
let bytes = tmp.flatMap {
4461
[UInt8(truncatingIfNeeded: $0),
@@ -47,7 +64,7 @@ internal extension CSVWriter {
4764
try CSVWriter._streamWrite(on: stream, bytes: bytes, count: bytes.count)
4865
}
4966
case .utf32BigEndian, .utf32:
50-
return { [unowned stream] (scalar) in
67+
return { (scalar) in
5168
guard let tmp = Unicode.UTF32.encode(scalar) else { throw Error._invalidUTF32(scalar: scalar) }
5269
let bytes = tmp.flatMap {
5370
[UInt8(truncatingIfNeeded: $0 >> 24),
@@ -58,7 +75,7 @@ internal extension CSVWriter {
5875
try CSVWriter._streamWrite(on: stream, bytes: bytes, count: bytes.count)
5976
}
6077
case .utf32LittleEndian:
61-
return { [unowned stream] (scalar) in
78+
return { (scalar) in
6279
guard let tmp = Unicode.UTF32.encode(scalar) else { throw Error._invalidUTF32(scalar: scalar) }
6380
let bytes = tmp.flatMap {
6481
[UInt8(truncatingIfNeeded: $0),
@@ -69,53 +86,48 @@ internal extension CSVWriter {
6986
try CSVWriter._streamWrite(on: stream, bytes: bytes, count: bytes.count)
7087
}
7188
case .shiftJIS:
72-
return { [unowned stream] (scalar) in
89+
return { (scalar) in
90+
// - todo: Performance for Shift JIS is pretty bad. Figure out how to encode a Unicode scalar to Shift JIS directly without going through String -> Data -> [UInt8]
7391
guard let tmp = String(scalar).data(using: .shiftJIS) else { throw Error._invalidShiftJIS(scalar: scalar) }
74-
guard let bytes = tmp.encodedHexadecimals else { throw Error._invalidShiftJIS(scalar: scalar) }
75-
try CSVWriter._streamWrite(on: stream, bytes: bytes, count: bytes.count)
92+
try tmp.withUnsafeBytes {
93+
let count = $0.count
94+
let ptr = $0.baseAddress.unsafelyUnwrapped.assumingMemoryBound(to: UInt8.self)
95+
try CSVWriter._streamWrite(on: stream, bytes: ptr, count: count)
96+
}
7697
}
7798
default: throw Error._unsupported(encoding: encoding)
7899
}
79100
}
80101
}
81102

82-
extension Data {
83-
var encodedHexadecimals: [UInt8]? {
84-
let responseValues = self.withUnsafeBytes({ (pointer: UnsafeRawBufferPointer) -> [UInt8] in
85-
let unsafeBufferPointer = pointer.bindMemory(to: UInt8.self)
86-
let unsafePointer = unsafeBufferPointer.baseAddress!
87-
return [UInt8](UnsafeBufferPointer(start: unsafePointer, count: self.count))
88-
})
89-
return responseValues
90-
}
91-
}
92-
93103
fileprivate extension CSVWriter {
94104
/// Writes on the stream the given bytes.
95105
/// - precondition: `count` is always greater than zero.
96106
/// - parameter stream: The output stream accepting the writes.
97107
/// - parameter bytes: The actual bytes to be written.
98108
/// - parameter count: The number of bytes within `bytes`.
99109
/// - throws: `CSVError<CSVWriter>` exclusively.
100-
private static func _streamWrite(on stream: OutputStream, bytes: UnsafePointer<UInt8>, count: Int) throws {
110+
private static func _streamWrite(on stream: Unmanaged<OutputStream>, bytes: UnsafePointer<UInt8>, count: Int) throws {
101111
let attempts = 2
102112
var (distance, remainingAttempts) = (0, attempts)
103113

104-
repeat {
105-
let written = stream.write(bytes.advanced(by: distance), maxLength: count - distance)
106-
107-
if written > 0 {
108-
distance += written
109-
} else if written == 0 {
110-
remainingAttempts -= 1
111-
guard remainingAttempts > 0 else {
112-
throw Error._streamEmptyWrite(error: stream.streamError, status: stream.streamStatus, numAttempts: attempts)
114+
try stream._withUnsafeGuaranteedRef {
115+
repeat {
116+
let written = $0.write(bytes.advanced(by: distance), maxLength: count - distance)
117+
118+
if written > 0 {
119+
distance += written
120+
} else if written == 0 {
121+
remainingAttempts -= 1
122+
guard remainingAttempts > 0 else {
123+
throw Error._streamEmptyWrite(error: $0.streamError, status: $0.streamStatus, numAttempts: attempts)
124+
}
125+
continue
126+
} else {
127+
throw Error._streamFailed(error: $0.streamError, status: $0.streamStatus)
113128
}
114-
continue
115-
} else {
116-
throw Error._streamFailed(error: stream.streamError, status: stream.streamStatus)
117-
}
118-
} while distance < count
129+
} while distance < count
130+
}
119131
}
120132
}
121133

sources/imperative/writer/internal/WriterEncoding.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ internal extension Strategy.BOM {
2929
case (.always, .utf32LittleEndian): return BOM.UTF32.littleEndian
3030
case (.always, .utf32BigEndian),
3131
(.always, .utf32), (.convention, .utf32): return BOM.UTF32.bigEndian
32-
default: return .init()
32+
default: return Array()
3333
}
3434
}
3535
}

0 commit comments

Comments
 (0)