Skip to content

Commit c3c3393

Browse files
committed
First complete Encodable implementation (.keepAll strategy)
1 parent db4e4b0 commit c3c3393

File tree

6 files changed

+273
-149
lines changed

6 files changed

+273
-149
lines changed

sources/Codable/Encodable/Containers/SingleValueEncodingContainer.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ extension ShadowEncoder.SingleValueContainer {
225225
}
226226

227227
let string = try transform()
228-
try sink.field(value: string, at: rowIndex, fieldIndex)
228+
try sink.fieldValue(string, rowIndex, fieldIndex)
229229
}
230230
}
231231

sources/Codable/Encodable/EncoderConfiguration.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ extension CSVEncoder {
2121
self.decimalStrategy = .locale(nil)
2222
self.dateStrategy = .deferredToDate
2323
self.dataStrategy = .base64
24-
self.bufferingStrategy = .unfulfilled
24+
self.bufferingStrategy = .keepAll
2525
}
2626
}
2727
}

sources/Codable/Encodable/EncodingStrategy.swift

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,17 +41,27 @@ extension Strategy {
4141
case custom((Data, Encoder) throws -> Void)
4242
}
4343

44-
/// Indication on how encoded CSV rows are cached and actually written to the output target (file, data blocb, or string).
44+
/// Indication on how encoded CSV rows are cached and written to the output target (file, data blocb, or string).
4545
///
46-
/// CSV encoding is an inherently sequential operation, i.e. row 2 must be encoded after row 1. On the other hand, the `Encodable` protocol allows CSV rows to be encoded in a random-order
46+
/// CSV encoding is an inherently sequential operation, i.e. row 2 must be encoded after row 1. On the other hand, the `Encodable` protocol allows CSV rows to be encoded in a random-order through *keyed container*. Selecting the appropriate buffering strategy lets you pick your encoding style and minimize memory usage.
4747
public enum EncodingBuffer {
48-
/// Encoded rows are being kept in memory till it is their turn to be written to the targeted output.
48+
/// All encoded rows/fields are cached and the *writing* only occurs at the end of the encodable process.
4949
///
50-
/// Foward encoding jumps are allowed and the user may jump backward to continue encoding.
50+
/// *Keyed containers* can be used to encode rows/fields unordered. That means, a row at position 5 may be encoded before the row at position 3. Similar behavior is supported for fields within a row.
51+
/// - attention: This strategy consumes the largest amount of memory from all the supported options.
52+
case keepAll
53+
/// Encoded rows may be cached, but the encoder will keep the buffer as small as possible by writing completed ordered rows.
54+
///
55+
/// *Keyed containers* can be used to encode rows/fields unordered. The writer will however consume rows in order.
56+
///
57+
/// For example, an encoder starts encoding row 1 and it gets all its fields. The row will get written and no cache for the row is kept. Same situation occurs when the row 2 is encoded.
58+
/// However, the user may decide to jump to row 5 and encode it. This row will be kept in the cache till row 3 and 4 are encoded, at which time row 3, 4, 5, and any subsequent rows will be writen.
59+
/// - attention: This strategy tries to keep the cache to a minimum, but memory usage may be big if there are holes while encoding rows. Those holes are filled with empty rows at the end of the encoding process.
5160
case unfulfilled
5261
/// No rows are kept in memory and writes are performed sequentially.
5362
///
54-
/// If a keyed container is used to encode rows and a jump forward is requested all the in-between rows are filled with empty fields.
63+
/// *Keyed containers* can be used, however when forward jumps are performed any in-between rows will be filled with empty fields.
64+
/// - attention: This strategy provides the smallest usage of memory from all.
5565
case sequential
5666
}
5767
}

sources/Codable/Encodable/Shadow/Sink.swift

Lines changed: 76 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ extension ShadowEncoder {
1111
let userInfo: [CodingUserInfoKey:Any]
1212
/// Lookup dictionary providing fast index discovery for header names.
1313
private var headerLookup: [Int:Int]
14+
/// Encodes the given field in the given position.
15+
let fieldValue: (_ value: String, _ rowIndex: Int, _ fieldIndex: Int) throws -> Void
1416

1517
/// Creates the unique data sink for the encoding process.
1618
init(writer: CSVWriter, configuration: CSVEncoder.Configuration, userInfo: [CodingUserInfoKey:Any]) throws {
@@ -19,10 +21,54 @@ extension ShadowEncoder {
1921
self.configuration = configuration
2022
self.userInfo = userInfo
2123
self.headerLookup = .init()
24+
25+
switch configuration.bufferingStrategy {
26+
case .keepAll:
27+
self.fieldValue = { [unowned buffer = self.buffer] in buffer.store(value: $0, at: $1, $2) }
28+
case .unfulfilled:
29+
fatalError()
30+
case .sequential:
31+
fatalError()
32+
}
2233
}
2334
}
2435
}
2536

37+
//func field(value: String, at rowIndex: Int, _ fieldIndex: Int) throws {
38+
// #warning("How to deal with intended field gaps?")
39+
// // When the next row is writen, check the previous row.
40+
// // What happens when there are several empty rows?
41+
//
42+
// // 1. Is the requested row the same as the writer's row focus?
43+
// guard self.writer.rowIndex == rowIndex else {
44+
// // 1.1. If not, the row must not have been written yet (otherwise an error is thrown).
45+
// guard self.writer.rowIndex > rowIndex else { throw CSVEncoder.Error.writingSurpassed(rowIndex: rowIndex, fieldIndex: fieldIndex, value: value) }
46+
// // 1.2. If the row hasn't been writen yet, store it in the buffer.
47+
// return self.buffer.store(value: value, at: rowIndex, fieldIndex)
48+
// }
49+
// // 2. Is the requested field the same as the writer's field focus?
50+
// guard self.writer.fieldIndex == fieldIndex else {
51+
// // 2.1 If not, the field must not have been written yet (otherwise an error is thrown).
52+
// guard self.writer.fieldIndex > fieldIndex else { throw CSVEncoder.Error.writingSurpassed(rowIndex: rowIndex, fieldIndex: fieldIndex, value: value) }
53+
// // 2.2 If the field hasn't been writen yet, store it in the buffer.
54+
// return self.buffer.store(value: value, at: rowIndex, fieldIndex)
55+
// }
56+
// // 3. Write the provided field since it is the same as the writer's row/field.
57+
// try self.writer.write(field: value)
58+
// // 4. How many fields per row there are? If unknown, stop.
59+
// guard self.writer.expectedFields > 0 else { return }
60+
// #warning("How to deal with the first ever row when no headers are given?")
61+
// while true {
62+
// // 5. If is not the end of the row, check the buffer and see whether the following fields are already cached.
63+
// while self.writer.fieldIndex < self.writer.expectedFields {
64+
// guard let field = self.buffer.retrieveField(at: self.writer.rowIndex, self.writer.fieldIndex) else { return }
65+
// try self.writer.write(field: field)
66+
// }
67+
// // 6. If it is the end of the row, write the row delimiter and pass to the next row.
68+
// try self.writer.endRow()
69+
// }
70+
//}
71+
2672
extension ShadowEncoder.Sink {
2773
/// The number of fields expected per row.
2874
///
@@ -42,10 +88,13 @@ extension ShadowEncoder.Sink {
4288
///
4389
/// The fields might not yet be fully encoded (i.e. written in their binary format).
4490
func numEncodedFields(at rowIndex: Int) -> Int {
91+
// 1. If the requested row has already been writen, it can be safely assumed that all the fields were written.
4592
if rowIndex < self.writer.rowIndex {
4693
return self.writer.expectedFields
94+
// 2. If the row index is the same as the one being targeted by the writer, the number is the sum of the writer and the buffer.
4795
} else if rowIndex == self.writer.rowIndex {
48-
return max(self.writer.fieldIndex, self.buffer.fieldCount(for: rowIndex))
96+
return self.writer.fieldIndex + self.buffer.fieldCount(for: rowIndex)
97+
// 3. If the row hasn't been written yet, query the buffer.
4998
} else {
5099
return self.buffer.fieldCount(for: rowIndex)
51100
}
@@ -57,80 +106,49 @@ extension ShadowEncoder.Sink {
57106
/// - parameter key: The coding key representing the field's position within a row, or the field's name within the headers row.
58107
/// - returns: The position of the field within the row.
59108
func fieldIndex(forKey key: CodingKey, codingPath: [CodingKey]) throws -> Int {
109+
// 1. If the key can be transformed into an integer, prefer that.
60110
if let index = key.intValue { return index }
61-
111+
// 2. If not, extract the header name from the key.
62112
let name = key.stringValue
113+
// 3. Get the header lookup dictionary (building it if it is the first time accessing it).
63114
if self.headerLookup.isEmpty {
64115
guard !self.configuration.headers.isEmpty else { throw CSVEncoder.Error.emptyHeader(key: key, codingPath: codingPath) }
65116
self.headerLookup = try self.configuration.headers.lookupDictionary(onCollision: { CSVEncoder.Error.invalidHashableHeader() })
66117
}
67-
118+
// 4. Get the index from the header lookup up and the header name.
68119
return try self.headerLookup[name.hashValue] ?! CSVEncoder.Error.unmatchedHeader(forKey: key, codingPath: codingPath)
69120
}
70121

71-
/// Encodes the given field in the given position.
72-
func field(value: String, at rowIndex: Int, _ fieldIndex: Int) throws {
73-
#warning("How to deal with intended field gaps?")
74-
// When the next row is writen, check the previous row.
75-
// Although, what happens when there are several empty rows?
76-
77-
// 1. Is the requested row the same position as the writer's row?
78-
guard self.writer.rowIndex == rowIndex else {
79-
// 1.1. If not, the row must not have been written yet (otherwise an error is thrown).
80-
guard self.writer.rowIndex > rowIndex else { throw CSVEncoder.Error.writingSurpassed(rowIndex: rowIndex, fieldIndex: fieldIndex, value: value) }
81-
// 1.2. If the row hasn't been writen yet, store it in the buffer.
82-
return self.buffer.store(value: value, at: rowIndex, fieldIndex)
83-
}
84-
// 2. Is the requested field the same as the writer's field?
85-
guard self.writer.fieldIndex == fieldIndex else {
86-
// 2.1 If not, the field must not have been written yet (otherwise an error is thrown).
87-
guard self.writer.fieldIndex > fieldIndex else { throw CSVEncoder.Error.writingSurpassed(rowIndex: rowIndex, fieldIndex: fieldIndex, value: value) }
88-
// 2.2 If the field hasn't been writen yet, store it in the buffer.
89-
return self.buffer.store(value: value, at: rowIndex, fieldIndex)
90-
}
91-
// 3. Write the provided field since it is the same as the writer's row/field.
92-
try self.writer.write(field: value)
93-
// 4. How many fields per row there are? If unknown, stop.
94-
guard self.writer.expectedFields > 0 else { return }
95-
#warning("How to deal with the first ever row when no headers are given?")
96-
while true {
97-
// 5. If is not the end of the row, check the buffer and see whether the following fields are already cached.
98-
while self.writer.fieldIndex < self.writer.expectedFields {
99-
guard let field = self.buffer.retrieveField(at: self.writer.rowIndex, self.writer.fieldIndex) else { return }
100-
try self.writer.write(field: field)
101-
}
102-
// 6. If it is the end of the row, write the row delimiter and pass to the next row.
103-
try self.writer.endRow()
104-
}
105-
}
106-
107122
/// Finishes the whole encoding operation by commiting to the writer any remaining row/field in the buffer.
108123
///
109124
/// This function works even when the number of fields per row are unknown.
110125
func completeEncoding() throws {
111126
// 1. Remove from the buffer the rows/fields from the writer point.
112-
var remainings = self.buffer.retrieveSequence(from: self.writer.rowIndex, fieldIndex: self.writer.fieldIndex)
113-
// 2. After the removal there should be any more rows/fields in the buffer.
114-
guard self.buffer.isEmpty else { throw CSVEncoder.Error.corruptedBuffer() }
115-
// 3. Iterate through all the remaining rows.
116-
while let row = remainings.next() {
117-
// 4. If the writer is further back from the next remaining row. Fill the writer with empty rows.
118-
while self.writer.rowIndex < row.index {
119-
try self.writer.endRow()
120-
}
121-
// 5. Iterate through all the fields in the row.
122-
for field in row.fields {
123-
// 6. If the row is further back from the next remaining field. Fill the writer with empty fields.
124-
while self.writer.fieldIndex < field.index {
125-
try self.writer.write(field: "")
127+
var remainings = self.buffer.retrieveAll()
128+
// 2. Check whether there is any remaining row whatsoever.
129+
if let firstIndex = remainings.firstIndex {
130+
// 3. The first indeces must be the same or greater than the writer ones.
131+
guard firstIndex.row >= self.writer.rowIndex, firstIndex.field >= self.writer.fieldIndex else { throw CSVEncoder.Error.corruptedBuffer() }
132+
// 4. Iterate through all the remaining rows.
133+
while let row = remainings.next() {
134+
// 5. If the writer is further back from the next remaining row. Fill the writer with empty rows.
135+
while self.writer.rowIndex < row.index {
136+
try self.writer.endRow()
137+
}
138+
// 6. Iterate through all the fields in the row.
139+
for field in row.fields {
140+
// 7. If the row is further back from the next remaining field. Fill the writer with empty fields.
141+
while self.writer.fieldIndex < field.index {
142+
try self.writer.write(field: "")
143+
}
144+
// 8. Write the targeted field.
145+
try self.writer.write(field: field.value)
126146
}
127-
// 7. Write the targeted field.
128-
try self.writer.write(field: field.value)
147+
// 9. Finish the targeted row.
148+
try self.writer.endRow()
129149
}
130-
// 8. Finish the targeted row.
131-
try self.writer.endRow()
132150
}
133-
// 9. Finish the file.
151+
// 10. Finish the file.
134152
try self.writer.endFile()
135153
}
136154
}

sources/Codable/Encodable/Shadow/SinkBuffer.swift

Lines changed: 41 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,69 +3,86 @@ extension ShadowEncoder.Sink {
33
internal final class Buffer {
44
/// The buffering strategy.
55
let strategy: Strategy.EncodingBuffer
6+
/// The underlying storage.
7+
private var storage: [Int: [Int:String]]
68

79
/// Designated initializer.
810
init(strategy: Strategy.EncodingBuffer) {
911
self.strategy = strategy
10-
#warning("TODO: EncodingBuffer strategy")
12+
13+
let capacity: Int
14+
switch strategy {
15+
case .keepAll: capacity = 32
16+
case .unfulfilled: capacity = 8
17+
case .sequential: capacity = 1
18+
}
19+
self.storage = .init(minimumCapacity: capacity)
1120
}
1221
}
1322
}
1423

1524
extension ShadowEncoder.Sink.Buffer {
16-
///
25+
/// The
1726
var isEmpty: Bool {
18-
#warning("TODO")
19-
fatalError()
27+
self.storage.isEmpty
2028
}
2129

2230
/// The number of rows being hold by the receiving buffer.
2331
var count: Int {
24-
#warning("TODO")
25-
fatalError()
32+
self.storage.count
2633
}
2734

2835
/// Returns the number of fields that have been received for the given row.
2936
///
3037
/// If none, it returns *zero*.
3138
func fieldCount(for rowIndex: Int) -> Int {
32-
#warning("TODO")
33-
fatalError()
39+
self.storage[rowIndex]?.count ?? 0
3440
}
3541

42+
/// Stores the provided `value` into the temporary storage associating its position as `rowIndex` and `fieldIndex`.
3643
///
44+
/// If there was a value at that position, the value is overwritten.
3745
func store(value: String, at rowIndex: Int, _ fieldIndex: Int) {
38-
#warning("TODO")
39-
fatalError()
46+
var row = self.storage[rowIndex] ?? .init()
47+
row[fieldIndex] = value
48+
self.storage[rowIndex] = row
4049
}
4150

4251
/// Retrieves and removes from the buffer the indicated value.
4352
func retrieveField(at rowIndex: Int, _ fieldIndex: Int) -> String? {
44-
#warning("TODO")
45-
fatalError()
53+
self.storage[rowIndex]?.removeValue(forKey: fieldIndex)
4654
}
4755

48-
/// Retrieves and removes from the buffer all rows/fields from the given indices.
49-
///
50-
/// This function never returns rows at an index smaller than the passed `rowIndex`. Also, for the `rowIndex`, it doesn't return the fields previous the `fieldIndex`.
51-
func retrieveSequence(from rowIndex: Int, fieldIndex: Int) -> RowSequence {
52-
#warning("TODO")
53-
fatalError()
56+
/// Retrieves and removes from the buffer all rows/fields.
57+
func retrieveAll() -> RowSequence {
58+
let sequence = RowSequence(self.storage)
59+
self.storage.removeAll(keepingCapacity: false)
60+
return sequence
5461
}
5562
}
5663

5764
extension ShadowEncoder.Sink.Buffer {
5865
///
5966
struct RowSequence: Sequence, IteratorProtocol {
67+
///
68+
private var inverseSort: [(key: Int, value: [Int:String])]
69+
///
70+
init(_ storage: [Int:[Int:String]]) {
71+
self.inverseSort = storage.sorted { $0.key > $1.key }
72+
}
6073
///
6174
mutating func next() -> Row? {
62-
#warning("TODO")
63-
fatalError()
75+
guard !self.inverseSort.isEmpty else { return nil }
76+
let element = self.inverseSort.removeLast()
77+
var fields = element.value.map { Field(index: $0.key, value: $0.value) }
78+
fields.sort { $0.index < $1.index }
79+
return Row(index: element.key, fields: fields)
6480
}
65-
66-
var isEmpty: Bool {
67-
#warning("TODO")
68-
fatalError()
81+
///
82+
var firstIndex: (row: Int, field: Int)? {
83+
guard let row = self.inverseSort.last else { return nil }
84+
guard let fieldIndex = row.value.keys.sorted().first else { fatalError() }
85+
return (row.key, fieldIndex)
6986
}
7087
}
7188
}

0 commit comments

Comments
 (0)