Skip to content

Commit 61c26b1

Browse files
committed
ShadowEncoder.Sink implemented
1 parent dde52b1 commit 61c26b1

17 files changed

+235
-86
lines changed

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ You can choose to add the library through SPM or Cocoapods:
3737
let package = Package(
3838
/* Your package name, supported platforms, and generated products go here */
3939
dependencies: [
40-
.package(url: "https://github.com/dehesa/CodableCSV.git", .upToNextMinor(from: "0.5.1"))
40+
.package(url: "https://github.com/dehesa/CodableCSV.git", .upToNextMinor(from: "0.5.2"))
4141
],
4242
targets: [
4343
.target(name: /* Your target name here */, dependencies: ["CodableCSV"])
@@ -48,7 +48,7 @@ You can choose to add the library through SPM or Cocoapods:
4848
- [Cocoapods](https://cocoapods.org).
4949

5050
```
51-
pod 'CodableCSV', '~> 0.5.1'
51+
pod 'CodableCSV', '~> 0.5.2'
5252
```
5353

5454
</p></details>
@@ -324,9 +324,9 @@ The decoding process can be tweaked by specifying configuration values at initia
324324

325325
- `dataStrategy` (default: `.base64`) specify the strategy to use when decoding data blobs.
326326

327-
- `bufferingStrategy` (default: `.keepAll`) tells the decoder how to cache previously decoded CSV rows.
327+
- `bufferingStrategy` (default: `.keepAll`) tells the decoder how to cache CSV rows.
328328

329-
Caching rows allow random access through `KeyedDecodingContainer`s.
329+
Caching rows allow random access through `KeyedDecodingContainer`s. For more information check the `DecodingBuffer` strategy definition.
330330

331331
The configuration values can be set during `CSVDecoder` initialization or at any point before the `decode` function is called.
332332

@@ -335,7 +335,7 @@ let decoder = CSVDecoder {
335335
$0.encoding = .utf8
336336
$0.delimiters.field = "\t"
337337
$0.headerStrategy = .firstLine
338-
$0.bufferingStrategy = .ordered
338+
$0.bufferingStrategy = .keepAll
339339
}
340340

341341
decoder.decimalStratey = .custom {

sources/Active/Reader/Reader.swift

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ extension CSVReader {
8484
if let l = self.headerLookup {
8585
lookup = l
8686
} else {
87-
lookup = try self.makeHeaderLookup()
87+
lookup = try self.headers.lookupDictionary(onCollision: Error.invalidHashableHeader)
8888
self.headerLookup = lookup
8989
}
9090

@@ -132,21 +132,6 @@ extension CSVReader {
132132
// MARK: -
133133

134134
extension CSVReader {
135-
/// Creates the lookup dictionary from the headers row.
136-
///
137-
/// Although it is officially allowed that two CSV headers have the same value, this method will throw an error if that is the case.
138-
/// - throws: `CSVError<CSVReader>` exclusively.
139-
internal func makeHeaderLookup() throws -> [Int:Int] {
140-
var result: [Int:Int] = .init(minimumCapacity: self.headers.count)
141-
for (index, header) in self.headers.enumerated() {
142-
let hash = header.hashValue
143-
guard case .none = result.updateValue(index, forKey: hash) else {
144-
throw Error.invalidHashableHeader()
145-
}
146-
}
147-
return result
148-
}
149-
150135
/// Parses a CSV row.
151136
/// - parameter rowIndex: The current index location.
152137
/// - throws: `CSVError<CSVReader>` exclusively.

sources/Active/Reader/ReaderAPI.swift

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ extension CSVReader {
118118
/// - returns: Tuple with the CSV headers (empty if none) and all records within the CSV file.
119119
public static func parse<S>(input: S, configuration: Configuration = .init()) throws -> Output where S:StringProtocol {
120120
let reader = try CSVReader(input: input, configuration: configuration)
121-
let lookup = try reader.makeHeaderLookup()
121+
let lookup = try reader.headers.lookupDictionary(onCollision: Error.invalidHashableHeader)
122122

123123
var result: [[String]] = .init()
124124
while let row = try reader.parseRow() {
@@ -135,7 +135,7 @@ extension CSVReader {
135135
/// - returns: Tuple with the CSV headers (empty if none) and all records within the CSV file.
136136
public static func parse(input: Data, configuration: Configuration = .init()) throws -> Output {
137137
let reader = try CSVReader(input: input, configuration: configuration)
138-
let lookup = try reader.makeHeaderLookup()
138+
let lookup = try reader.headers.lookupDictionary(onCollision: Error.invalidHashableHeader)
139139

140140
var result: [[String]] = .init()
141141
while let row = try reader.parseRow() {
@@ -152,7 +152,7 @@ extension CSVReader {
152152
/// - returns: Tuple with the CSV headers (empty if none) and all records within the CSV file.
153153
public static func parse(input: URL, configuration: Configuration = .init()) throws -> Output {
154154
let reader = try CSVReader(input: input, configuration: configuration)
155-
let lookup = try reader.makeHeaderLookup()
155+
let lookup = try reader.headers.lookupDictionary(onCollision: Error.invalidHashableHeader)
156156

157157
var result: [[String]] = .init()
158158
while let row = try reader.parseRow() {
@@ -220,4 +220,10 @@ fileprivate extension CSVReader.Error {
220220
help: "Make sure the URL is valid and you are allowed to access the file. Alternatively set the configuration's presample or load the file in a data blob and use the reader's data initializer.",
221221
userInfo: ["File URL": url])
222222
}
223+
/// Error raised when a record is fetched, but there are header names which has the same hash value (i.e. they have the same name).
224+
static func invalidHashableHeader() -> CSVError<CSVReader> {
225+
.init(.invalidInput,
226+
reason: "The header row contain two fields with the same value.",
227+
help: "Request a row instead of a record.")
228+
}
223229
}

sources/Active/Writer/Writer.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ public final class CSVWriter {
2222
/// The field to write next.
2323
public private(set) var fieldIndex: Int
2424
/// The number of fields per row that are expected.
25-
private var expectedFields: Int
25+
private(set) internal var expectedFields: Int
2626

2727
/// Designated initializer for the CSV writer.
2828
/// - parameter configuration: Recipe detailing how to parse the CSV data (i.e. encoding, delimiters, etc.).

sources/Codable/Decodable/Containers/KeyedDecodingContainer.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ extension ShadowDecoder {
4848
guard let numRows = self.decoder.source.numRows, numRows > 0 else { return [] }
4949
return (0..<numRows).compactMap { Key(intValue: $0) }
5050
case .row:
51-
let numFields = self.decoder.source.numFields
51+
let numFields = self.decoder.source.numExpectedFields
5252
guard numFields > 0 else { return [] }
5353

5454
let numberKeys = (0..<numFields).compactMap { Key(intValue: $0) }
@@ -65,7 +65,7 @@ extension ShadowDecoder {
6565
return self.decoder.source.contains(rowIndex: index)
6666
case .row:
6767
if let index = key.intValue {
68-
return index >= 0 && index < self.decoder.source.numFields
68+
return index >= 0 && index < self.decoder.source.numExpectedFields
6969
} else {
7070
return self.decoder.source.headers.contains(key.stringValue)
7171
}
@@ -283,7 +283,7 @@ extension ShadowDecoder.KeyedContainer {
283283
case .file:
284284
guard let rowIndex = key.intValue else { throw DecodingError.invalidKey(forRow: key, codingPath: codingPath) }
285285
// Values are only allowed to be decoded directly from a nested container in "file level" if the CSV rows have a single column.
286-
guard self.decoder.source.numFields == 1 else { throw DecodingError.invalidNestedRequired(codingPath: self.codingPath) }
286+
guard self.decoder.source.numExpectedFields == 1 else { throw DecodingError.invalidNestedRequired(codingPath: self.codingPath) }
287287
index = (rowIndex, 0)
288288
codingPath.append(IndexKey(index.field))
289289
}

sources/Codable/Decodable/Containers/SingleValueDecodingContainer.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -241,12 +241,12 @@ private extension ShadowDecoder.SingleValueContainer {
241241
return try transform(string) ?! DecodingError.invalid(type: T.self, string: string, codingPath: self.codingPath)
242242
case .row(let rowIndex):
243243
// Values are only allowed to be decoded directly from a single value container in "row level" if the CSV has single column rows.
244-
guard source.numFields == 1 else { throw DecodingError.invalidNestedRequired(codingPath: self.codingPath) }
244+
guard source.numExpectedFields == 1 else { throw DecodingError.invalidNestedRequired(codingPath: self.codingPath) }
245245
let string = try source.field(at: rowIndex, 0)
246246
return try transform(string) ?! DecodingError.invalid(type: T.self, string: string, codingPath: self.codingPath + [IndexKey(0)])
247247
case .file:
248248
// Values are only allowed to be decoded directly from a single value container in "file level" if the CSV file has a single row with a single column.
249-
if source.isRowAtEnd(index: 1), source.numFields == 1 {
249+
if source.isRowAtEnd(index: 1), source.numExpectedFields == 1 {
250250
let string = try self.decoder.source.field(at: 0, 0)
251251
return try transform(string) ?! DecodingError.invalid(type: T.self, string: string, codingPath: self.codingPath + [IndexKey(0), IndexKey(0)])
252252
} else {

sources/Codable/Decodable/Containers/UnkeyedDecodingContainer.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ extension ShadowDecoder {
4848
var count: Int? {
4949
switch self.focus {
5050
case .file: return self.decoder.source.numRows
51-
case .row: return self.decoder.source.numFields
51+
case .row: return self.decoder.source.numExpectedFields
5252
}
5353
}
5454

@@ -298,7 +298,7 @@ extension ShadowDecoder.UnkeyedContainer {
298298
index = (rowIndex, self.currentIndex)
299299
case .file:
300300
// Values are only allowed to be decoded directly from a nested container in "file level" if the CSV rows have a single column.
301-
guard self.decoder.source.numFields == 1 else { throw DecodingError.invalidNestedRequired(codingPath: self.codingPath) }
301+
guard self.decoder.source.numExpectedFields == 1 else { throw DecodingError.invalidNestedRequired(codingPath: self.codingPath) }
302302
index = (self.currentIndex, 0)
303303
codingPath.append(IndexKey(index.field))
304304
}

sources/Codable/Decodable/DecoderConfiguration.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ extension CSVDecoder {
1212
/// The strategy to use when decoding binary data.
1313
public var dataStrategy: Strategy.DataDecoding
1414
/// The amount of CSV rows kept in memory after decoding to allow the random-order jumping exposed by keyed containers.
15-
public var bufferingStrategy: Strategy.Buffering
15+
public var bufferingStrategy: Strategy.DecodingBuffer
1616

1717
/// Designated initializer setting the default values.
1818
public init() {

sources/Codable/Decodable/DecodingStrategy.swift

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -35,18 +35,26 @@ extension Strategy {
3535
case custom((_ decoder: Decoder) throws -> Data)
3636
}
3737

38-
/// Strategy indicating how many rows are cached for reuse by the decoder.
38+
/// Indication of how many rows are cached for reuse by the decoder.
3939
///
40-
/// The `Decodable` protocol allows CSV rows to be decoded in random-order through the keyed containers. For example, a user can ask for a row at position 24 and then ask for the CSV row at index 1.
41-
/// Since it is impossible to foresee how the user will decode the rows, this library allows the user to set the buffering mechanism.
40+
/// CSV decoding is an inherently sequential operation; i.e. row 2 must be decoded after row 1. This due to the string encoding, the field/row delimiter usage, and by not setting the underlying row width.
41+
/// On the other hand, the `Decodable` protocol allows CSV rows to be decoded in random-order through the keyed containers. For example, a user can ask for a row at position 24 and then ask for the CSV row at index 3.
4242
///
43-
/// Setting the buffering strategy lets you tweak the memory usage and whether an error will be thrown when previous rows are requested:
44-
/// - `keepAll` will impose no restrictions and will make the decoder cache every decoded row. Setting this strategy will double the memory usage, but the user is free to request rows in any order.
45-
/// - `ordered` discard decoded rows after usage, only keeping records when a jump forward have been requested through a keyed container.
46-
public enum Buffering {
43+
/// A buffer is used to marry the sequential needs of the CSV decoder and `Decodable`'s *random* nature. This buffer stores all decoded CSV rows (starts with none and gets filled as more rows are being decoded).
44+
/// The `DecodingBuffer` strategy gives you the option to control the buffer's memory usage and whether rows are being discarded after being decoded.
45+
public enum DecodingBuffer {
4746
/// All decoded CSV rows are cached.
47+
/// Forward/Backwards decoding jumps are allowed. A row that has been previously decoded can be decoded again.
48+
///
49+
/// Setting this strategy will double the memory usage, but the user is free to request rows in any order.
4850
case keepAll
49-
/// Rows are only cached when there are holes between the decoded row indices.
50-
// case ordered
51+
/// Only CSV rows that have been decoded but not requested by the user are being kept in memory.
52+
/// Forward/Backwards decoding jumps are allowed. However, previously requested rows cannot be requested again or an error will be thrown.
53+
///
54+
/// This strategy will massively reduce the memory usage, but it will throw an error if a CSV row that was previously decoded is requested from a keyed container.
55+
case unfulfilled
56+
/// No rows are kept in memory (except for the CSV row being decoded at the moment)
57+
/// Forward jumps are allowed, but the rows in-between the jump cannot be decoded.
58+
case sequential
5159
}
5260
}

sources/Codable/Decodable/Shadow/Source.swift

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@ extension ShadowDecoder {
99
let configuration: CSVDecoder.Configuration
1010
/// Any contextual information set by the user for decoding.
1111
let userInfo: [CodingUserInfoKey:Any]
12-
/// The header record with the field names.
12+
/// The header row with the field names.
1313
let headers: [String]
1414
/// Lookup dictionary providing fast index discovery for header names.
15-
private let headerLookup: [Int:Int]
15+
private var headerLookup: [Int:Int]
1616

1717
/// Creates the unique data source for a decoding process.
1818
/// - parameter reader: The instance actually reading the input bytes.
@@ -24,7 +24,7 @@ extension ShadowDecoder {
2424
self.configuration = configuration
2525
self.userInfo = userInfo
2626
self.headers = reader.headers
27-
self.headerLookup = (self.headers.isEmpty) ? .init() : try! reader.makeHeaderLookup()
27+
self.headerLookup = .init()
2828
}
2929
}
3030
}
@@ -77,8 +77,10 @@ extension ShadowDecoder.Source {
7777
}
7878

7979
extension ShadowDecoder.Source {
80-
/// Returns the number of fields that there is per record.
81-
var numFields: Int {
80+
/// Returns the number of fields that there is per row.
81+
///
82+
/// If the number is not know at call time, a row is decoded to figure out how many fields there are.
83+
var numExpectedFields: Int {
8284
let (numRows, numFields) = self.reader.count
8385
guard numRows <= 0 else { return numFields }
8486

@@ -90,7 +92,7 @@ extension ShadowDecoder.Source {
9092
/// Boolean indicating whether the given field index is out of bounds (i.e. there are no more elements left to be decoded in the row).
9193
/// - parameter index: The field index being checked.
9294
func isFieldAtEnd(index: Int) -> Bool {
93-
return index >= self.numFields
95+
return index >= self.numExpectedFields
9496
}
9597

9698
/// Returns the field index for the given coding key.
@@ -103,7 +105,11 @@ extension ShadowDecoder.Source {
103105
if let index = key.intValue { return index }
104106

105107
let name = key.stringValue
106-
guard !self.headerLookup.isEmpty else { throw DecodingError.emptyHeader(key: key, codingPath: codingPath) }
108+
if self.headerLookup.isEmpty {
109+
guard !self.headers.isEmpty else { throw DecodingError.emptyHeader(key: key, codingPath: codingPath) }
110+
self.headerLookup = try self.headers.lookupDictionary(onCollision: { DecodingError.invalidHashableHeader(codingPath: codingPath) })
111+
}
112+
107113
return try self.headerLookup[name.hashValue] ?! DecodingError.unmatchedHeader(forKey: key, codingPath: codingPath)
108114
}
109115

@@ -134,6 +140,13 @@ extension ShadowDecoder.Source {
134140
}
135141

136142
fileprivate extension DecodingError {
143+
/// Error raised when a record is fetched, but there are header names which has the same hash value (i.e. they have the same name).
144+
static func invalidHashableHeader(codingPath: [CodingKey]) -> DecodingError {
145+
DecodingError.dataCorrupted(
146+
Context(codingPath: codingPath,
147+
debugDescription: "The header row contain two fields with the same value.")
148+
)
149+
}
137150
/// The provided coding key couldn't be mapped into a concrete index since there is no CSV header.
138151
static func emptyHeader(key: CodingKey, codingPath: [CodingKey]) -> DecodingError {
139152
DecodingError.keyNotFound(key, .init(codingPath: codingPath,

0 commit comments

Comments
 (0)