Skip to content

Commit e2dcb01

Browse files
committed
Encoding buffering strategy .sequential added
1 parent b183ecf commit e2dcb01

29 files changed

+750
-514
lines changed

README.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -315,11 +315,11 @@ let result = try decoder.decode(CustomType.self, from: data)
315315
`CSVDecoder` can decode CSVs represented as a `Data` blob, a `String`, or an actual file in the file system.
316316

317317
```swift
318-
let decoder = CSVDecoder { $0.bufferingStrategy = .unfulfilled }
318+
let decoder = CSVDecoder { $0.bufferingStrategy = .fulfilled }
319319
let content: [Student] = try decoder([Student].self, from: URL("~/Desktop/Student.csv"))
320320
```
321321

322-
If you are dealing with a big CSV file, it is preferred to used direct file decoding and a `.sequential` or `.unfulfilled` buffering strategy, since then memory usage is drastically reduced.
322+
If you are dealing with a big CSV file, it is preferred to used direct file decoding, a `.sequential` or `.fulfilled` buffering strategy, and set *presampling* to false; since then memory usage is drastically reduced.
323323

324324
### Decoder configuration
325325

@@ -371,7 +371,7 @@ let encoder = CSVEncoder { $0.bufferingStrategy = .sequential }
371371
try encoder.encode(value, into: URL("~/Desktop/Students.csv"))
372372
```
373373

374-
If you are dealing with a big CSV content, it is preferred to use direct file encoding and a `.sequential` or `.unfulfilled` buffering strategy, since then memory usage is drastically reduced.
374+
If you are dealing with a big CSV content, it is preferred to use direct file encoding and a `.sequential` or `.fulfilled` buffering strategy, since then memory usage is drastically reduced.
375375

376376
### Encoder configuration
377377

@@ -393,7 +393,7 @@ The configuration values can be set during `CSVEncoder` initialization or at any
393393

394394
```swift
395395
let encoder = CSVEncoder {
396-
$0.header = ["name", "age", "hasPet"]
396+
$0.headers = ["name", "age", "hasPet"]
397397
$0.delimiters = (field: ";", row: "\r\n")
398398
$0.dateStrategy = .iso8601
399399
$0.bufferingStrategy = .sequential
@@ -407,6 +407,8 @@ encoder.dataStrategy = .custom { (data, encoder) in
407407
}
408408
```
409409

410+
> The `.headers` configuration is required if you are using keyed encoding container.
411+
410412
</p></details>
411413
</ul>
412414

File renamed without changes.
File renamed without changes.
File renamed without changes.

sources/Active/Reader/ReaderInternals.swift renamed to sources/Active/Reader/Internal/ReaderInference.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
1-
internal extension CSVReader {
1+
extension CSVReader {
22
/// Closure accepting a scalar and returning a Boolean indicating whether the scalar (and subsquent unicode scalars) form a delimiter.
33
/// - parameter scalar: The scalar that may start a delimiter.
44
/// - throws: `CSVError<CSVReader>` exclusively.
5-
typealias DelimiterChecker = (_ scalar: Unicode.Scalar) throws -> Bool
5+
internal typealias DelimiterChecker = (_ scalar: Unicode.Scalar) throws -> Bool
66

77
/// Creates a delimiter identifier closure.
88
/// - parameter delimiter: The unicode characters forming a targeted delimiter.
99
/// - parameter buffer: A unicode character buffer containing further characters to parse.
1010
/// - parameter decoder: The instance providing the input `Unicode.Scalar`s.
1111
/// - returns: A closure which given the targeted unicode character and the buffer and iterrator, returns a Boolean indicating whether there is a delimiter.
12-
static func makeMatcher(delimiter: [Unicode.Scalar], buffer: ScalarBuffer, decoder: @escaping CSVReader.ScalarDecoder) -> CSVReader.DelimiterChecker {
12+
internal static func makeMatcher(delimiter: [Unicode.Scalar], buffer: ScalarBuffer, decoder: @escaping CSVReader.ScalarDecoder) -> CSVReader.DelimiterChecker {
1313
// This should never be triggered.
1414
assert(!delimiter.isEmpty)
1515

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
import Foundation
2+
3+
extension CSVReader: Failable {
4+
/// Reader status indicating whether there are remaning lines to read, the CSV has been completely parsed, or an error occurred and no further operation shall be performed.
5+
public enum Status {
6+
/// The CSV file hasn't been completely parsed.
7+
case active
8+
/// There are no more rows to read. The EOF has been reached.
9+
case finished
10+
/// An error has occurred and no further operations shall be performed with the reader instance.
11+
case failed(CSVError<CSVReader>)
12+
}
13+
14+
/// The type of error raised by the CSV reader.
15+
public enum Error: Int {
16+
/// Some of the configuration values provided are invalid.
17+
case invalidConfiguration = 1
18+
/// The CSV data is invalid.
19+
case invalidInput = 2
20+
// /// The inferral process to figure out delimiters or header row status was unsuccessful.
21+
// case inferenceFailure = 3
22+
/// The input stream failed.
23+
case streamFailure = 4
24+
}
25+
26+
public static var errorDomain: String {
27+
"Reader"
28+
}
29+
30+
public static func errorDescription(for failure: Error) -> String {
31+
switch failure {
32+
case .invalidConfiguration: return "Invalid configuration"
33+
// case .inferenceFailure: return "Inference failure"
34+
case .invalidInput: return "Invalid input"
35+
case .streamFailure: return "Stream failure"
36+
}
37+
}
38+
}
39+
40+
extension CSVReader {
41+
/// Private configuration variables for the CSV reader.
42+
internal struct Settings {
43+
/// The unicode scalar delimiters for fields and rows.
44+
let delimiters: Delimiter.RawPair
45+
/// The unicode scalar used as encapsulator and escaping character (when printed two times).
46+
let escapingScalar: Unicode.Scalar?
47+
/// The characters set to be trimmed at the beginning and ending of each field.
48+
let trimCharacters: CharacterSet
49+
50+
/// Creates the inmutable reader settings from the user provided configuration values.
51+
/// - parameter configuration: The configuration values provided by the API user.
52+
/// - parameter decoder: The instance providing the input `Unicode.Scalar`s.
53+
/// - parameter buffer: Small buffer use to store `Unicode.Scalar` values that have been read from the input, but haven't yet been processed.
54+
/// - throws: `CSVError<CSVReader>` exclusively.
55+
init(configuration: Configuration, decoder: ScalarDecoder, buffer: ScalarBuffer) throws {
56+
// 1. Figure out the field and row delimiters.
57+
switch (configuration.delimiters.field.rawValue, configuration.delimiters.row.rawValue) {
58+
case (nil, nil):
59+
self.delimiters = try CSVReader.inferDelimiters(decoder: decoder, buffer: buffer)
60+
case (nil, let row):
61+
self.delimiters = try CSVReader.inferFieldDelimiter(rowDelimiter: row, decoder: decoder, buffer: buffer)
62+
case (let field, nil):
63+
self.delimiters = try CSVReader.inferRowDelimiter(fieldDelimiter: field, decoder: decoder, buffer: buffer)
64+
case (let field, let row) where !field.elementsEqual(row):
65+
self.delimiters = (.init(field), .init(row))
66+
case (let delimiter, _):
67+
throw Error.invalidDelimiters(delimiter)
68+
}
69+
// 2. Set the escaping scalar.
70+
self.escapingScalar = configuration.escapingStrategy.scalar
71+
// 3. Set the trim characters set.
72+
self.trimCharacters = configuration.trimStrategry
73+
// 4. Ensure trim character set doesn't contain the field delimiter.
74+
guard delimiters.field.allSatisfy({ !self.trimCharacters.contains($0) }) else {
75+
throw Error.invalidTrimCharacters(self.trimCharacters, delimiter: configuration.delimiters.field.rawValue)
76+
}
77+
// 5. Ensure trim character set doesn't contain the row delimiter.
78+
guard delimiters.row.allSatisfy({ !self.trimCharacters.contains($0) }) else {
79+
throw Error.invalidTrimCharacters(self.trimCharacters, delimiter: configuration.delimiters.row.rawValue)
80+
}
81+
// 6. Ensure trim character set does not include escaping scalar
82+
if let escapingScalar = self.escapingScalar, self.trimCharacters.contains(escapingScalar) {
83+
throw Error.invalidTrimCharacters(self.trimCharacters, escapingScalar: escapingScalar)
84+
}
85+
}
86+
}
87+
}
88+
89+
fileprivate extension CSVReader.Error {
90+
/// Error raised when the field and row delimiters are the same.
91+
/// - parameter delimiter: The indicated field and row delimiters.
92+
static func invalidDelimiters(_ delimiter: String.UnicodeScalarView) -> CSVError<CSVReader> {
93+
.init(.invalidConfiguration,
94+
reason: "The field and row delimiters cannot be the same.",
95+
help: "Set different delimiters for field and rows.",
96+
userInfo: ["Delimiter": delimiter])
97+
}
98+
/// Error raised when a delimiter (whether row or field) is included in the trim character set.
99+
static func invalidTrimCharacters(_ trimCharacters: CharacterSet, delimiter: String.UnicodeScalarView) -> CSVError<CSVReader> {
100+
.init(.invalidConfiguration,
101+
reason: "The trim character set includes delimiter characters.",
102+
help: "Remove the delimiter scalars from the trim character set.",
103+
userInfo: ["Delimiter": delimiter, "Trim characters": trimCharacters])
104+
}
105+
/// Error raised when the escaping scalar has been included in the trim character set.
106+
/// - parameter escapingScalar: The selected escaping scalar.
107+
/// - parameter trimCharacters: The character set selected for trimming.
108+
static func invalidTrimCharacters(_ trimCharacters: CharacterSet, escapingScalar: Unicode.Scalar) -> CSVError<CSVReader> {
109+
.init(.invalidConfiguration,
110+
reason: "The trim characters set includes the escaping scalar.",
111+
help: "Remove the escaping scalar from the trim characters set.",
112+
userInfo: ["Escaping scalar": escapingScalar, "Trim characters": trimCharacters])
113+
}
114+
}

sources/Active/Reader/ReaderConfiguration.swift

Lines changed: 17 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -33,78 +33,24 @@ extension CSVReader {
3333
}
3434
}
3535

36-
extension CSVReader {
37-
/// Private configuration variables for the CSV reader.
38-
internal struct Settings {
39-
/// The unicode scalar delimiters for fields and rows.
40-
let delimiters: Delimiter.RawPair
41-
/// The unicode scalar used as encapsulator and escaping character (when printed two times).
42-
let escapingScalar: Unicode.Scalar?
43-
/// The characters set to be trimmed at the beginning and ending of each field.
44-
let trimCharacters: CharacterSet
36+
// MARK: -
37+
38+
extension Strategy {
39+
/// Indication on whether the CSV file contains headers or not.
40+
public enum Header: ExpressibleByNilLiteral, ExpressibleByBooleanLiteral {
41+
/// The CSV contains no header row.
42+
case none
43+
/// The CSV contains a single header row.
44+
case firstLine
45+
// /// It is not known whether the CSV contains a header row. Try to infer it!
46+
// case unknown
4547

46-
/// Creates the inmutable reader settings from the user provided configuration values.
47-
/// - parameter configuration: The configuration values provided by the API user.
48-
/// - parameter decoder: The instance providing the input `Unicode.Scalar`s.
49-
/// - parameter buffer: Small buffer use to store `Unicode.Scalar` values that have been read from the input, but haven't yet been processed.
50-
/// - throws: `CSVError<CSVReader>` exclusively.
51-
init(configuration: Configuration, decoder: ScalarDecoder, buffer: ScalarBuffer) throws {
52-
// 1. Figure out the field and row delimiters.
53-
switch (configuration.delimiters.field.rawValue, configuration.delimiters.row.rawValue) {
54-
case (nil, nil):
55-
self.delimiters = try CSVReader.inferDelimiters(decoder: decoder, buffer: buffer)
56-
case (nil, let row):
57-
self.delimiters = try CSVReader.inferFieldDelimiter(rowDelimiter: row, decoder: decoder, buffer: buffer)
58-
case (let field, nil):
59-
self.delimiters = try CSVReader.inferRowDelimiter(fieldDelimiter: field, decoder: decoder, buffer: buffer)
60-
case (let field, let row) where !field.elementsEqual(row):
61-
self.delimiters = (.init(field), .init(row))
62-
case (let delimiter, _):
63-
throw Error.invalidDelimiters(delimiter)
64-
}
65-
// 2. Set the escaping scalar.
66-
self.escapingScalar = configuration.escapingStrategy.scalar
67-
// 3. Set the trim characters set.
68-
self.trimCharacters = configuration.trimStrategry
69-
// 4. Ensure trim character set doesn't contain the field delimiter.
70-
guard delimiters.field.allSatisfy({ !self.trimCharacters.contains($0) }) else {
71-
throw Error.invalidTrimCharacters(self.trimCharacters, delimiter: configuration.delimiters.field.rawValue)
72-
}
73-
// 5. Ensure trim character set doesn't contain the row delimiter.
74-
guard delimiters.row.allSatisfy({ !self.trimCharacters.contains($0) }) else {
75-
throw Error.invalidTrimCharacters(self.trimCharacters, delimiter: configuration.delimiters.row.rawValue)
76-
}
77-
// 6. Ensure trim character set does not include escaping scalar
78-
if let escapingScalar = self.escapingScalar, self.trimCharacters.contains(escapingScalar) {
79-
throw Error.invalidTrimCharacters(self.trimCharacters, escapingScalar: escapingScalar)
80-
}
48+
public init(nilLiteral: ()) {
49+
self = .none
50+
}
51+
52+
public init(booleanLiteral value: BooleanLiteralType) {
53+
self = (value) ? .firstLine : .none
8154
}
82-
}
83-
}
84-
85-
fileprivate extension CSVReader.Error {
86-
/// Error raised when the field and row delimiters are the same.
87-
/// - parameter delimiter: The indicated field and row delimiters.
88-
static func invalidDelimiters(_ delimiter: String.UnicodeScalarView) -> CSVError<CSVReader> {
89-
.init(.invalidConfiguration,
90-
reason: "The field and row delimiters cannot be the same.",
91-
help: "Set different delimiters for field and rows.",
92-
userInfo: ["Delimiter": delimiter])
93-
}
94-
/// Error raised when a delimiter (whether row or field) is included in the trim character set.
95-
static func invalidTrimCharacters(_ trimCharacters: CharacterSet, delimiter: String.UnicodeScalarView) -> CSVError<CSVReader> {
96-
.init(.invalidConfiguration,
97-
reason: "The trim character set includes delimiter characters.",
98-
help: "Remove the delimiter scalars from the trim character set.",
99-
userInfo: ["Delimiter": delimiter, "Trim characters": trimCharacters])
100-
}
101-
/// Error raised when the escaping scalar has been included in the trim character set.
102-
/// - parameter escapingScalar: The selected escaping scalar.
103-
/// - parameter trimCharacters: The character set selected for trimming.
104-
static func invalidTrimCharacters(_ trimCharacters: CharacterSet, escapingScalar: Unicode.Scalar) -> CSVError<CSVReader> {
105-
.init(.invalidConfiguration,
106-
reason: "The trim characters set includes the escaping scalar.",
107-
help: "Remove the escaping scalar from the trim characters set.",
108-
userInfo: ["Escaping scalar": escapingScalar, "Trim characters": trimCharacters])
10955
}
11056
}

sources/Active/Reader/ReaderStrategy.swift renamed to sources/Active/Reader/ReaderResults.swift

Lines changed: 3 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,4 @@
1-
extension Strategy {
2-
/// Indication on whether the CSV file contains headers or not.
3-
public enum Header: ExpressibleByNilLiteral, ExpressibleByBooleanLiteral {
4-
/// The CSV contains no header row.
5-
case none
6-
/// The CSV contains a single header row.
7-
case firstLine
8-
// /// It is not known whether the CSV contains a header row. Try to infer it!
9-
// case unknown
10-
11-
public init(nilLiteral: ()) {
12-
self = .none
13-
}
14-
15-
public init(booleanLiteral value: BooleanLiteralType) {
16-
self = (value) ? .firstLine : .none
17-
}
18-
}
19-
}
20-
21-
extension CSVReader: Failable {
22-
/// The type of error raised by the CSV reader.
23-
public enum Error: Int {
24-
/// Some of the configuration values provided are invalid.
25-
case invalidConfiguration = 1
26-
/// The CSV data is invalid.
27-
case invalidInput = 2
28-
// /// The inferral process to figure out delimiters or header row status was unsuccessful.
29-
// case inferenceFailure = 3
30-
/// The input stream failed.
31-
case streamFailure = 4
32-
}
33-
34-
public static var errorDomain: String { "Reader" }
35-
36-
public static func errorDescription(for failure: Error) -> String {
37-
switch failure {
38-
case .invalidConfiguration: return "Invalid configuration"
39-
// case .inferenceFailure: return "Inference failure"
40-
case .invalidInput: return "Invalid input"
41-
case .streamFailure: return "Stream failure"
42-
}
43-
}
44-
}
45-
461
extension CSVReader {
47-
/// Reader status indicating whether there are remaning lines to read, the CSV has been completely parsed, or an error occurred and no further operation shall be performed.
48-
public enum Status {
49-
/// The CSV file hasn't been completely parsed.
50-
case active
51-
/// There are no more rows to read. The EOF has been reached.
52-
case finished
53-
/// An error has occurred and no further operations shall be performed with the reader instance.
54-
case failed(CSVError<CSVReader>)
55-
}
56-
572
/// A record is a convenience structure on top of a CSV row (i.e. an array of strings) letting you access efficiently each field through its header title/name.
583
public struct Record: RandomAccessCollection, Hashable {
594
/// A CSV row content.
@@ -108,7 +53,9 @@ extension CSVReader {
10853
lhs.row == rhs
10954
}
11055
}
111-
56+
}
57+
58+
extension CSVReader {
11259
/// Structure wrapping over the result of a CSV file.
11360
public struct Output: RandomAccessCollection, Equatable {
11461
/// A row representing the field titles/names.
File renamed without changes.
File renamed without changes.

0 commit comments

Comments
 (0)