Skip to content

Commit 389ff77

Browse files
committed
CSVWriter overhaul
1 parent fabe684 commit 389ff77

18 files changed

+393
-482
lines changed

README.md

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -36,39 +36,39 @@ The _active entities_ provide imperative control on how to read or write CSV dat
3636
<ul>
3737
<details><summary><code>CSVReader</code>.</summary><p>
3838

39-
A `CSVReadder` parses CSV data from an input and returns CSV row as an array of strings.
39+
A `CSVReadder` parses CSV data from a given input (`String`, or `Data`, or file) and returns CSV rows as a `String`s array. `CSVReader` can be used at a "high-level", in which case it parses an input completely; or at a lower level, in which each row is decoded when requested.
40+
41+
- Complete input parsing.
42+
43+
```swift
44+
let file = try CSVReader.parse(input: ...)
45+
// file is of type: CSVReader.Output
46+
```
47+
48+
This type of parsing returns a simple structure containing the CSV headers and CSV rows. Additionally it lets you access each field through the header name or the field index.
4049

4150
- Row-by-row parsing.
4251

4352
```swift
44-
let reader = try CSVReader(data: ...)
53+
let reader = try CSVReader(input: "...")
4554
while let row = try reader.parseRow() {
4655
// Do something with the row: [String]
4756
}
4857
```
4958

50-
Alternatively you can use the `parseRecord()` function which also returns the next CSV row, but it wraps the result in a convenience structure. This structure lets you access each field with the header name (as long as the `headerStrategy` is market with `.firstLine`).
59+
Alternatively you can use the `parseRecord()` function which also returns the next CSV row, but it wraps the result in a convenience structure. This structure lets you access each field with the header name (as long as the `headerStrategy` is marked with `.firstLine`).
5160

5261
- `Sequence` syntax parsing.
5362

5463
```swift
55-
let reader = try CSVReader(fileURL: ...)
64+
let reader = try CSVReader(input: URL(...), configuration: ...)
5665
for row in reader {
5766
// Do something with the row: [String]
5867
}
5968
```
6069

6170
Please note the `Sequence` syntax (i.e. `IteratorProtocol`) doesn't throw errors; therefore if the CSV data is invalid, the previous code will crash. If you don't control the CSV data origin, use `parseRow()` instead.
6271

63-
- Whole input parsing.
64-
65-
```swift
66-
let file = try CSVReader.parse(string: ..., configuration: ...)
67-
// file is of type: CSVReader.Output
68-
```
69-
70-
This type of parsing returns a simple structure containing the CSV headers and CSV rows. Additionally it lets you access each field through the header name or the field index.
71-
7272
### Reader Configuration
7373

7474
`CSVReader` accepts the following configuration properties:
@@ -96,7 +96,7 @@ A `CSVReadder` parses CSV data from an input and returns CSV row as an array of
9696
The configuration values are set during initialization and can be passed to the `CSVReader` instance through a structure or with a convenience closure syntax:
9797

9898
```swift
99-
let reader = CSVReader(data: ...) {
99+
let reader = CSVReader(input: ...) {
100100
$0.encoding = .utf8
101101
$0.delimiters.row = "\r\n"
102102
$0.headerStrategy = .firstLine
@@ -108,6 +108,31 @@ let reader = CSVReader(data: ...) {
108108

109109
<details><summary><code>CSVWriter</code>.</summary><p>
110110

111+
A `CSVWriter` encodes CSV information into a specified target (i.e. a `String`, or `Data`, or a file). It can be used at a "high-level", by encoding completely a prepared set of information; or at a lower level, in which case rows or fields can be writen individually.
112+
113+
- Full encoding.
114+
115+
```swift
116+
let data = try CSVWriter.serialize(rows: [...], into: Data.self)
117+
```
118+
119+
- Row-by-row encoding.
120+
121+
```swift
122+
let writer = try CSVWriter()
123+
for row in customData {
124+
try writer.write(row: row)
125+
}
126+
let outcome = writer.data()
127+
```
128+
129+
- Field-by-field encoding.
130+
131+
```swift
132+
let writer = try CSVWriter(fileURL: ...)
133+
try writer.write(field: ...)
134+
```
135+
111136
#warning("TODO:")
112137

113138
</p></details>

Sources/Active/Reader/Reader.swift

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ public final class CSVReader: IteratorProtocol, Sequence {
4646
self.isFieldDelimiter = CSVReader.makeMatcher(delimiter: self.settings.delimiters.field, buffer: self.buffer, decoder: self.decoder)
4747
self.isRowDelimiter = CSVReader.makeMatcher(delimiter: self.settings.delimiters.row, buffer: self.buffer, decoder: self.decoder)
4848
self.count = (0, 0)
49-
self.status = .reading
49+
self.status = .active
5050

5151
switch configuration.headerStrategy {
5252
case .none: break
@@ -55,8 +55,7 @@ public final class CSVReader: IteratorProtocol, Sequence {
5555
guard !headers.isEmpty else { throw Error.invalidEmptyHeader() }
5656
self.headers = headers
5757
self.count = (rows: 1, fields: headers.count)
58-
// case .unknown:
59-
// #warning("TODO:")
58+
// case .unknown: #warning("TODO")
6059
}
6160
}
6261
}
@@ -96,7 +95,7 @@ extension CSVReader {
9695
/// - returns: The row's fields or `nil` if there isn't anything else to parse. The row will never be an empty array.
9796
public func parseRow() throws -> [String]? {
9897
switch self.status {
99-
case .reading: break
98+
case .active: break
10099
case .finished: return nil
101100
case .failed(let e): throw e
102101
}
@@ -126,6 +125,8 @@ extension CSVReader {
126125
}
127126
}
128127

128+
// MARK: -
129+
129130
extension CSVReader {
130131
/// Creates the lookup dictionary from the headers row.
131132
internal func makeHeaderLookup() throws -> [Int:Int] {
@@ -268,6 +269,8 @@ extension CSVReader {
268269
}
269270
}
270271

272+
// MARK: -
273+
271274
fileprivate extension CSVReader.Error {
272275
/// Error raised when a header was required, but the line was empty.
273276
static func invalidEmptyHeader() -> CSVError<CSVReader> {

Sources/Active/Reader/ReaderAPI.swift

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ extension CSVReader {
66
/// - parameter configuration: Recipe detailing how to parse the CSV data (i.e. encoding, delimiters, etc.).
77
/// - throws: `CSVError<CSVReader>` exclusively.
88
@_specialize(exported: true, where S==String)
9+
@_specialize(exported: true, where S==Substring)
910
public convenience init<S>(input: S, configuration: Configuration = .init()) throws where S:StringProtocol {
1011
let buffer = ScalarBuffer(reservingCapacity: 8)
1112
let decoder = CSVReader.makeDecoder(from: input.unicodeScalars.makeIterator())
@@ -30,7 +31,7 @@ extension CSVReader {
3031
var dataIterator = input.makeIterator()
3132
let (inferredEncoding, unusedBytes) = String.Encoding.infer(from: &dataIterator)
3233
// B.2. Select the appropriate encoding depending from the user provided encoding (if any), and the BOM encoding (if any).
33-
let encoding = try String.Encoding.selectFrom(provided: configuration.encoding, inferred: inferredEncoding)
34+
let encoding = try CSVReader.selectEncodingFrom(provided: configuration.encoding, inferred: inferredEncoding)
3435
// B.3. Create the scalar iterator producing all `Unicode.Scalar`s from the data bytes.
3536
let decoder = try CSVReader.makeDecoder(from: dataIterator, encoding: encoding, firstBytes: unusedBytes)
3637
try self.init(configuration: configuration, buffer: buffer, decoder: decoder)
@@ -59,7 +60,7 @@ extension CSVReader {
5960
// B.2. Check whether the input data has a BOM.
6061
let inferred = try String.Encoding.infer(from: stream)
6162
// B.3. Select the appropriate encoding depending from the user provided encoding (if any), and the BOM encoding (if any).
62-
encoding = try String.Encoding.selectFrom(provided: configuration.encoding, inferred: inferred.encoding)
63+
encoding = try CSVReader.selectEncodingFrom(provided: configuration.encoding, inferred: inferred.encoding)
6364
unusedBytes = inferred.unusedBytes
6465
} catch let error {
6566
if stream.streamStatus != .closed { stream.close() }
@@ -118,6 +119,7 @@ extension CSVReader {
118119
/// - throws: `CSVError<CSVReader>` exclusively.
119120
/// - returns: Tuple with the CSV headers (empty if none) and all records within the CSV file.
120121
@_specialize(exported: true, where S==String)
122+
@_specialize(exported: true, where S==Substring)
121123
public static func parse<S>(input: S, configuration: Configuration = .init()) throws -> Output where S:StringProtocol {
122124
let reader = try CSVReader(input: input, configuration: configuration)
123125
let lookup = try reader.makeHeaderLookup()
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
extension CSVReader {
2+
/// Buffer used to stored previously read unicode scalars.
3+
internal final class ScalarBuffer: IteratorProtocol {
4+
/// Unicode scalars read inferring configuration variables that were unknown.
5+
///
6+
/// This buffer is reversed to make it efficient to remove elements.
7+
private var readScalars: [Unicode.Scalar]
8+
9+
/// Creates the buffer with a given capacity value.
10+
init(reservingCapacity capacity: Int) {
11+
self.readScalars = []
12+
self.readScalars.reserveCapacity(capacity)
13+
}
14+
15+
func next() -> Unicode.Scalar? {
16+
guard !self.readScalars.isEmpty else { return nil }
17+
return self.readScalars.removeLast()
18+
}
19+
20+
/// Inserts a single unicode scalar at the beginning of the buffer.
21+
func preppend(scalar: Unicode.Scalar) {
22+
self.readScalars.append(scalar)
23+
}
24+
25+
/// Inserts a sequence of scalars at the beginning of the buffer.
26+
func preppend<S:Sequence>(scalars: S) where S.Element == Unicode.Scalar {
27+
self.readScalars.append(contentsOf: scalars.reversed())
28+
}
29+
30+
/// Appends a single unicode scalar to the buffer.
31+
func append(scalar: Unicode.Scalar) {
32+
self.readScalars.insert(scalar, at: self.readScalars.startIndex)
33+
}
34+
35+
/// Appends a sequence of unicode scalars to the buffer.
36+
func append<S:Sequence>(scalars: S) where S.Element == Unicode.Scalar {
37+
self.readScalars.insert(contentsOf: scalars.reversed(), at: self.readScalars.startIndex)
38+
}
39+
40+
/// Removes all scalars in the buffer.
41+
func removeAll() {
42+
self.readScalars.removeAll()
43+
}
44+
}
45+
}

Sources/Active/Reader/ReaderConfiguration.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ extension CSVReader {
5555
case (let field, nil):
5656
self.delimiters = try CSVReader.inferRowDelimiter(fieldDelimiter: field, decoder: decoder, buffer: buffer)
5757
case (let field, let row) where !field.elementsEqual(row):
58-
self.delimiters = (field, row)
58+
self.delimiters = (.init(field), .init(row))
5959
case (let delimiter, _):
6060
throw Error.invalidDelimiters(delimiter)
6161
}

Sources/Active/Reader/ReaderDecoder.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,7 @@ fileprivate extension CSVReader.Error {
345345
.init(.streamFailure, underlying: underlyingError,
346346
reason: "The input stream encountered an error while trying to read input bytes.",
347347
help: "Review the internal error and make sure you have access to the input data.",
348-
userInfo: ["Status": status])
348+
userInfo: ["Stream status": status])
349349
}
350350
/// Error raised when trying to retrieve two bytes from the input data, but only one was available.
351351
static func incompleteUTF16() -> CSVError<CSVReader> {

Sources/Active/Reader/ReaderEncodings.swift

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,15 @@ internal extension String.Encoding {
3636
assert(unusedBytes != nil)
3737
return (encoding, unusedBytes!)
3838
}
39-
39+
}
40+
41+
internal extension CSVReader {
4042
/// Select the appropriate encoding depending on the `String` encoding provided by the user and the encoding inferred from the Byte Order Marker.
4143
/// - parameter provided: The user provided `String` encoding.
4244
/// - parameter inferred: The `String` encoding inferred from the data Byte Order Marker.
4345
/// - throws: `CSVError<CSVReader>` exclusively.
4446
/// - returns: The appropriate `String.Encoding` matching from the provided and inferred values.
45-
static func selectFrom(provided: String.Encoding?, inferred: String.Encoding?) throws -> String.Encoding {
47+
static func selectEncodingFrom(provided: String.Encoding?, inferred: String.Encoding?) throws -> String.Encoding {
4648
switch (provided, inferred) {
4749
case (nil, nil): return .utf8
4850
case (nil, let rhs?): return rhs

Sources/Active/Reader/ReaderInference.swift

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,23 @@ internal extension CSVReader {
33
/// - parameter decoder: The instance providing the input `Unicode.Scalar`s.
44
/// - throws: `CSVError<CSVReader>` exclusively.
55
static func inferFieldDelimiter(rowDelimiter: String.UnicodeScalarView, decoder: ScalarDecoder, buffer: ScalarBuffer) throws -> Delimiter.RawPair {
6+
//#warning("TODO:")
67
throw Error.unsupportedInference()
78
}
89

910
/// Tries to infer the row delimiter given the field delimiter.
1011
/// - parameter decoder: The instance providing the input `Unicode.Scalar`s.
1112
/// - throws: `CSVError<CSVReader>` exclusively.
1213
static func inferRowDelimiter(fieldDelimiter: String.UnicodeScalarView, decoder: ScalarDecoder, buffer: ScalarBuffer) throws -> Delimiter.RawPair {
14+
//#warning("TODO:")
1315
throw Error.unsupportedInference()
1416
}
1517

1618
/// Tries to infer both the field and row delimiter from the raw data.
1719
/// - parameter decoder: The instance providing the input `Unicode.Scalar`s.
1820
/// - throws: `CSVError<CSVReader>` exclusively.
1921
static func inferDelimiters(decoder: ScalarDecoder, buffer: ScalarBuffer) throws -> Delimiter.RawPair {
22+
//#warning("TODO:")
2023
throw Error.unsupportedInference()
2124
}
2225
}
@@ -30,22 +33,22 @@ internal extension CSVReader {
3033
typealias DelimiterChecker = (_ scalar: Unicode.Scalar) throws -> Bool
3134

3235
/// Creates a delimiter identifier closure.
33-
/// - parameter view: The unicode characters forming a targeted delimiter.
36+
/// - parameter delimiter: The unicode characters forming a targeted delimiter.
3437
/// - parameter buffer: A unicode character buffer containing further characters to parse.
3538
/// - parameter decoder: The instance providing the input `Unicode.Scalar`s.
3639
/// - returns: A closure which given the targeted unicode character and the buffer and iterrator, returns a Boolean indicating whether there is a delimiter.
37-
static func makeMatcher(delimiter view: String.UnicodeScalarView, buffer: ScalarBuffer, decoder: @escaping CSVReader.ScalarDecoder) -> CSVReader.DelimiterChecker {
40+
static func makeMatcher(delimiter: [Unicode.Scalar], buffer: ScalarBuffer, decoder: @escaping CSVReader.ScalarDecoder) -> CSVReader.DelimiterChecker {
3841
// This should never be triggered.
39-
assert(!view.isEmpty)
42+
assert(!delimiter.isEmpty)
4043

4144
// For optimizations sake, a delimiter proofer is built for a single unicode scalar.
42-
if view.count == 1 {
43-
let delimiter: Unicode.Scalar = view.first!
45+
if delimiter.count == 1 {
46+
let delimiter: Unicode.Scalar = delimiter.first!
4447
return { delimiter == $0 }
4548
// For optimizations sake, a delimiter proofer is built for two unicode scalars.
46-
} else if view.count == 2 {
47-
let firstDelimiter = view.first!
48-
let secondDelimiter = view[view.index(after: view.startIndex)]
49+
} else if delimiter.count == 2 {
50+
let firstDelimiter = delimiter.first!
51+
let secondDelimiter = delimiter[delimiter.index(after: delimiter.startIndex)]
4952

5053
return { [unowned buffer] in
5154
guard firstDelimiter == $0, let secondScalar = try buffer.next() ?? decoder() else {
@@ -63,17 +66,17 @@ internal extension CSVReader {
6366
} else {
6467
return { [unowned buffer] (firstScalar) -> Bool in
6568
var scalar = firstScalar
66-
var index = view.startIndex
69+
var index = delimiter.startIndex
6770
var toIncludeInBuffer: [Unicode.Scalar] = .init()
6871

6972
while true {
70-
guard scalar == view[index] else {
73+
guard scalar == delimiter[index] else {
7174
buffer.preppend(scalars: toIncludeInBuffer)
7275
return false
7376
}
7477

75-
index = view.index(after: index)
76-
guard index < view.endIndex else { return true }
78+
index = delimiter.index(after: index)
79+
guard index < delimiter.endIndex else { return true }
7780

7881
guard let nextScalar = try buffer.next() ?? decoder() else {
7982
buffer.preppend(scalars: toIncludeInBuffer)

Sources/Active/Reader/ReaderInternals.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ extension CSVReader {
22
/// Reader status indicating whether there are remaning lines to read, the CSV has been completely parsed, or an error occurred and no further operation shall be performed.
33
public enum Status {
44
/// The CSV file hasn't been completely parsed.
5-
case reading
5+
case active
66
/// There are no more rows to read. The EOF has been reached.
77
case finished
88
/// An error has occurred and no further operations shall be performed with the reader instance.

Sources/Active/ScalarBuffer.swift

Lines changed: 0 additions & 43 deletions
This file was deleted.

0 commit comments

Comments
 (0)