First complete Encodable implementation (.keepAll strategy)

dehesa · dehesa · commit c3c33930464c · 2020-03-30T00:16:28.000+02:00
diff --git a/sources/Codable/Encodable/Containers/SingleValueEncodingContainer.swift b/sources/Codable/Encodable/Containers/SingleValueEncodingContainer.swift
@@ -225,7 +225,7 @@ extension ShadowEncoder.SingleValueContainer {
         }
         
         let string = try transform()
-        try sink.field(value: string, at: rowIndex, fieldIndex)
+        try sink.fieldValue(string, rowIndex, fieldIndex)
     }
 }
 
diff --git a/sources/Codable/Encodable/EncoderConfiguration.swift b/sources/Codable/Encodable/EncoderConfiguration.swift
@@ -21,7 +21,7 @@ extension CSVEncoder {
             self.decimalStrategy = .locale(nil)
             self.dateStrategy = .deferredToDate
             self.dataStrategy = .base64
-            self.bufferingStrategy = .unfulfilled
+            self.bufferingStrategy = .keepAll
         }
     }
 }
diff --git a/sources/Codable/Encodable/EncodingStrategy.swift b/sources/Codable/Encodable/EncodingStrategy.swift
@@ -41,17 +41,27 @@ extension Strategy {
         case custom((Data, Encoder) throws -> Void)
     }
     
-    /// Indication on how encoded CSV rows are cached and actually written to the output target (file, data blocb, or string).
+    /// Indication on how encoded CSV rows are cached and written to the output target (file, data blocb, or string).
     ///
-    /// CSV encoding is an inherently sequential operation, i.e. row 2 must be encoded after row 1. On the other hand, the `Encodable` protocol allows CSV rows to be encoded in a random-order
+    /// CSV encoding is an inherently sequential operation, i.e. row 2 must be encoded after row 1. On the other hand, the `Encodable` protocol allows CSV rows to be encoded in a random-order through *keyed container*. Selecting the appropriate buffering strategy lets you pick your encoding style and minimize memory usage.
     public enum EncodingBuffer {
-        /// Encoded rows are being kept in memory till it is their turn to be written to the targeted output.
+        /// All encoded rows/fields are cached and the *writing* only occurs at the end of the encodable process.
         ///
-        /// Foward encoding jumps are allowed and the user may jump backward to continue encoding.
+        /// *Keyed containers* can be used to encode rows/fields unordered. That means, a row at position 5 may be encoded before the row at position 3. Similar behavior is supported for fields within a row.
+        /// - attention: This strategy consumes the largest amount of memory from all the supported options.
+        case keepAll
+        /// Encoded rows may be cached, but the encoder will keep the buffer as small as possible by writing completed ordered rows.
+        ///
+        /// *Keyed containers* can be used to encode rows/fields unordered. The writer will however consume rows in order.
+        ///
+        /// For example, an encoder starts encoding row 1 and it gets all its fields. The row will get written and no cache for the row is kept. Same situation occurs when the row 2 is encoded.
+        /// However, the user may decide to jump to row 5 and encode it. This row will be kept in the cache till row 3 and 4 are encoded, at which time row 3, 4, 5, and any subsequent rows will be writen.
+        /// - attention: This strategy tries to keep the cache to a minimum, but memory usage may be big if there are holes while encoding rows. Those holes are filled with empty rows at the end of the encoding process.
         case unfulfilled
         /// No rows are kept in memory and writes are performed sequentially.
         ///
-        /// If a keyed container is used to encode rows and a jump forward is requested all the in-between rows are filled with empty fields.
+        /// *Keyed containers* can be used, however when forward jumps are performed any in-between rows will be filled with empty fields.
+        /// - attention: This strategy provides the smallest usage of memory from all.
         case sequential
     }
 }
diff --git a/sources/Codable/Encodable/Shadow/Sink.swift b/sources/Codable/Encodable/Shadow/Sink.swift
@@ -11,6 +11,8 @@ extension ShadowEncoder {
         let userInfo: [CodingUserInfoKey:Any]
         /// Lookup dictionary providing fast index discovery for header names.
         private var headerLookup: [Int:Int]
+        /// Encodes the given field in the given position.
+        let fieldValue: (_ value: String, _ rowIndex: Int, _ fieldIndex: Int) throws -> Void
         
         /// Creates the unique data sink for the encoding process.
         init(writer: CSVWriter, configuration: CSVEncoder.Configuration, userInfo: [CodingUserInfoKey:Any]) throws {
@@ -19,10 +21,54 @@ extension ShadowEncoder {
             self.configuration = configuration
             self.userInfo = userInfo
             self.headerLookup = .init()
+            
+            switch configuration.bufferingStrategy {
+            case .keepAll:
+                self.fieldValue = { [unowned buffer = self.buffer] in buffer.store(value: $0, at: $1, $2) }
+            case .unfulfilled:
+                fatalError()
+            case .sequential:
+                fatalError()
+            }
         }
     }
 }
 
+//func field(value: String, at rowIndex: Int, _ fieldIndex: Int) throws {
+//    #warning("How to deal with intended field gaps?")
+//    // When the next row is writen, check the previous row.
+//    // What happens when there are several empty rows?
+//
+//    // 1. Is the requested row the same as the writer's row focus?
+//    guard self.writer.rowIndex == rowIndex else {
+//        // 1.1. If not, the row must not have been written yet (otherwise an error is thrown).
+//        guard self.writer.rowIndex > rowIndex else { throw CSVEncoder.Error.writingSurpassed(rowIndex: rowIndex, fieldIndex: fieldIndex, value: value) }
+//        // 1.2. If the row hasn't been writen yet, store it in the buffer.
+//        return self.buffer.store(value: value, at: rowIndex, fieldIndex)
+//    }
+//    // 2. Is the requested field the same as the writer's field focus?
+//    guard self.writer.fieldIndex == fieldIndex else {
+//        // 2.1 If not, the field must not have been written yet (otherwise an error is thrown).
+//        guard self.writer.fieldIndex > fieldIndex else { throw CSVEncoder.Error.writingSurpassed(rowIndex: rowIndex, fieldIndex: fieldIndex, value: value) }
+//        // 2.2 If the field hasn't been writen yet, store it in the buffer.
+//        return self.buffer.store(value: value, at: rowIndex, fieldIndex)
+//    }
+//    // 3. Write the provided field since it is the same as the writer's row/field.
+//    try self.writer.write(field: value)
+//    // 4. How many fields per row there are? If unknown, stop.
+//    guard self.writer.expectedFields > 0 else { return }
+//    #warning("How to deal with the first ever row when no headers are given?")
+//    while true {
+//        // 5. If is not the end of the row, check the buffer and see whether the following fields are already cached.
+//        while self.writer.fieldIndex < self.writer.expectedFields {
+//            guard let field = self.buffer.retrieveField(at: self.writer.rowIndex, self.writer.fieldIndex) else { return }
+//            try self.writer.write(field: field)
+//        }
+//        // 6. If it is the end of the row, write the row delimiter and pass to the next row.
+//        try self.writer.endRow()
+//    }
+//}
+
 extension ShadowEncoder.Sink {
     /// The number of fields expected per row.
     ///
@@ -42,10 +88,13 @@ extension ShadowEncoder.Sink {
     ///
     /// The fields might not yet be fully encoded (i.e. written in their binary format).
     func numEncodedFields(at rowIndex: Int) -> Int {
+        // 1. If the requested row has already been writen, it can be safely assumed that all the fields were written.
         if rowIndex < self.writer.rowIndex {
             return self.writer.expectedFields
+        // 2. If the row index is the same as the one being targeted by the writer, the number is the sum of the writer and the buffer.
         } else if rowIndex == self.writer.rowIndex {
-            return max(self.writer.fieldIndex, self.buffer.fieldCount(for: rowIndex))
+            return self.writer.fieldIndex + self.buffer.fieldCount(for: rowIndex)
+        // 3. If the row hasn't been written yet, query the buffer.
         } else {
             return self.buffer.fieldCount(for: rowIndex)
         }
@@ -57,80 +106,49 @@ extension ShadowEncoder.Sink {
     /// - parameter key: The coding key representing the field's position within a row, or the field's name within the headers row.
     /// - returns: The position of the field within the row.
     func fieldIndex(forKey key: CodingKey, codingPath: [CodingKey]) throws -> Int {
+        // 1. If the key can be transformed into an integer, prefer that.
         if let index = key.intValue { return index }
-        
+        // 2. If not, extract the header name from the key.
         let name = key.stringValue
+        // 3. Get the header lookup dictionary (building it if it is the first time accessing it).
         if self.headerLookup.isEmpty {
             guard !self.configuration.headers.isEmpty else { throw CSVEncoder.Error.emptyHeader(key: key, codingPath: codingPath) }
             self.headerLookup = try self.configuration.headers.lookupDictionary(onCollision: { CSVEncoder.Error.invalidHashableHeader() })
         }
-        
+        // 4. Get the index from the header lookup up and the header name.
         return try self.headerLookup[name.hashValue] ?! CSVEncoder.Error.unmatchedHeader(forKey: key, codingPath: codingPath)
     }
     
-    /// Encodes the given field in the given position.
-    func field(value: String, at rowIndex: Int, _ fieldIndex: Int) throws {
-        #warning("How to deal with intended field gaps?")
-        // When the next row is writen, check the previous row.
-        // Although, what happens when there are several empty rows?
-        
-        // 1. Is the requested row the same position as the writer's row?
-        guard self.writer.rowIndex == rowIndex else {
-            // 1.1. If not, the row must not have been written yet (otherwise an error is thrown).
-            guard self.writer.rowIndex > rowIndex else { throw CSVEncoder.Error.writingSurpassed(rowIndex: rowIndex, fieldIndex: fieldIndex, value: value) }
-            // 1.2. If the row hasn't been writen yet, store it in the buffer.
-            return self.buffer.store(value: value, at: rowIndex, fieldIndex)
-        }
-        // 2. Is the requested field the same as the writer's field?
-        guard self.writer.fieldIndex == fieldIndex else {
-            // 2.1 If not, the field must not have been written yet (otherwise an error is thrown).
-            guard self.writer.fieldIndex > fieldIndex else { throw CSVEncoder.Error.writingSurpassed(rowIndex: rowIndex, fieldIndex: fieldIndex, value: value) }
-            // 2.2 If the field hasn't been writen yet, store it in the buffer.
-            return self.buffer.store(value: value, at: rowIndex, fieldIndex)
-        }
-        // 3. Write the provided field since it is the same as the writer's row/field.
-        try self.writer.write(field: value)
-        // 4. How many fields per row there are? If unknown, stop.
-        guard self.writer.expectedFields > 0 else { return }
-        #warning("How to deal with the first ever row when no headers are given?")
-        while true {
-            // 5. If is not the end of the row, check the buffer and see whether the following fields are already cached.
-            while self.writer.fieldIndex < self.writer.expectedFields {
-                guard let field = self.buffer.retrieveField(at: self.writer.rowIndex, self.writer.fieldIndex) else { return }
-                try self.writer.write(field: field)
-            }
-            // 6. If it is the end of the row, write the row delimiter and pass to the next row.
-            try self.writer.endRow()
-        }
-    }
-    
     /// Finishes the whole encoding operation by commiting to the writer any remaining row/field in the buffer.
     ///
     /// This function works even when the number of fields per row are unknown.
     func completeEncoding() throws {
         // 1. Remove from the buffer the rows/fields from the writer point.
-        var remainings = self.buffer.retrieveSequence(from: self.writer.rowIndex, fieldIndex: self.writer.fieldIndex)
-        // 2. After the removal there should be any more rows/fields in the buffer.
-        guard self.buffer.isEmpty else { throw CSVEncoder.Error.corruptedBuffer() }
-        // 3. Iterate through all the remaining rows.
-        while let row = remainings.next() {
-            // 4. If the writer is further back from the next remaining row. Fill the writer with empty rows.
-            while self.writer.rowIndex < row.index {
-                try self.writer.endRow()
-            }
-            // 5. Iterate through all the fields in the row.
-            for field in row.fields {
-                // 6. If the row is further back from the next remaining field. Fill the writer with empty fields.
-                while self.writer.fieldIndex < field.index {
-                    try self.writer.write(field: "")
+        var remainings = self.buffer.retrieveAll()
+        // 2. Check whether there is any remaining row whatsoever.
+        if let firstIndex = remainings.firstIndex {
+            // 3. The first indeces must be the same or greater than the writer ones.
+            guard firstIndex.row >= self.writer.rowIndex, firstIndex.field >= self.writer.fieldIndex else { throw CSVEncoder.Error.corruptedBuffer() }
+            // 4. Iterate through all the remaining rows.
+            while let row = remainings.next() {
+                // 5. If the writer is further back from the next remaining row. Fill the writer with empty rows.
+                while self.writer.rowIndex < row.index {
+                    try self.writer.endRow()
+                }
+                // 6. Iterate through all the fields in the row.
+                for field in row.fields {
+                    // 7. If the row is further back from the next remaining field. Fill the writer with empty fields.
+                    while self.writer.fieldIndex < field.index {
+                        try self.writer.write(field: "")
+                    }
+                    // 8. Write the targeted field.
+                    try self.writer.write(field: field.value)
                 }
-                // 7. Write the targeted field.
-                try self.writer.write(field: field.value)
+                // 9. Finish the targeted row.
+                try self.writer.endRow()
             }
-            // 8. Finish the targeted row.
-            try self.writer.endRow()
         }
-        // 9. Finish the file.
+        // 10. Finish the file.
         try self.writer.endFile()
     }
 }
diff --git a/sources/Codable/Encodable/Shadow/SinkBuffer.swift b/sources/Codable/Encodable/Shadow/SinkBuffer.swift
@@ -3,69 +3,86 @@ extension ShadowEncoder.Sink {
     internal final class Buffer {
         /// The buffering strategy.
         let strategy: Strategy.EncodingBuffer
+        /// The underlying storage.
+        private var storage: [Int: [Int:String]]
         
         /// Designated initializer.
         init(strategy: Strategy.EncodingBuffer) {
             self.strategy = strategy
-            #warning("TODO: EncodingBuffer strategy")
+            
+            let capacity: Int
+            switch strategy {
+            case .keepAll:     capacity = 32
+            case .unfulfilled: capacity = 8
+            case .sequential:  capacity = 1
+            }
+            self.storage = .init(minimumCapacity: capacity)
         }
     }
 }
 
 extension ShadowEncoder.Sink.Buffer {
-    ///
+    /// The
     var isEmpty: Bool {
-        #warning("TODO")
-        fatalError()
+        self.storage.isEmpty
     }
     
     /// The number of rows being hold by the receiving buffer.
     var count: Int {
-        #warning("TODO")
-        fatalError()
+        self.storage.count
     }
     
     /// Returns the number of fields that have been received for the given row.
     ///
     /// If none, it returns *zero*.
     func fieldCount(for rowIndex: Int) -> Int {
-        #warning("TODO")
-        fatalError()
+        self.storage[rowIndex]?.count ?? 0
     }
     
+    /// Stores the provided `value` into the temporary storage associating its position as `rowIndex` and `fieldIndex`.
     ///
+    /// If there was a value at that position, the value is overwritten.
     func store(value: String, at rowIndex: Int, _ fieldIndex: Int) {
-        #warning("TODO")
-        fatalError()
+        var row = self.storage[rowIndex] ?? .init()
+        row[fieldIndex] = value
+        self.storage[rowIndex] = row
     }
     
     /// Retrieves and removes from the buffer the indicated value.
     func retrieveField(at rowIndex: Int, _ fieldIndex: Int) -> String? {
-        #warning("TODO")
-        fatalError()
+        self.storage[rowIndex]?.removeValue(forKey: fieldIndex)
     }
     
-    /// Retrieves and removes from the buffer all rows/fields from the given indices.
-    ///
-    /// This function never returns rows at an index smaller than the passed `rowIndex`. Also, for the `rowIndex`, it doesn't return the fields previous the `fieldIndex`.
-    func retrieveSequence(from rowIndex: Int, fieldIndex: Int) -> RowSequence {
-        #warning("TODO")
-        fatalError()
+    /// Retrieves and removes from the buffer all rows/fields.
+    func retrieveAll() -> RowSequence {
+        let sequence = RowSequence(self.storage)
+        self.storage.removeAll(keepingCapacity: false)
+        return sequence
     }
 }
 
 extension ShadowEncoder.Sink.Buffer {
     ///
     struct RowSequence: Sequence, IteratorProtocol {
+        ///
+        private var inverseSort: [(key: Int, value: [Int:String])]
+        ///
+        init(_ storage: [Int:[Int:String]]) {
+            self.inverseSort = storage.sorted { $0.key > $1.key }
+        }
         ///
         mutating func next() -> Row? {
-            #warning("TODO")
-            fatalError()
+            guard !self.inverseSort.isEmpty else { return nil }
+            let element = self.inverseSort.removeLast()
+            var fields = element.value.map { Field(index: $0.key, value: $0.value) }
+            fields.sort { $0.index < $1.index }
+            return Row(index: element.key, fields: fields)
         }
-        
-        var isEmpty: Bool {
-            #warning("TODO")
-            fatalError()
+        ///
+        var firstIndex: (row: Int, field: Int)? {
+            guard let row = self.inverseSort.last else { return nil }
+            guard let fieldIndex = row.value.keys.sorted().first else { fatalError() }
+            return (row.key, fieldIndex)
         }
     }
 }
diff --git a/tests/CodableTests/EncodingRegularUsageTests.swift b/tests/CodableTests/EncodingRegularUsageTests.swift

Original file line number	Diff line number	Diff line change
`@@ -225,7 +225,7 @@ extension ShadowEncoder.SingleValueContainer {`
`225`	`225`	`}`
`226`	`226`
`227`	`227`	`let string = try transform()`
`228`		`- try sink.field(value: string, at: rowIndex, fieldIndex)`
	`228`	`+ try sink.fieldValue(string, rowIndex, fieldIndex)`
`229`	`229`	`}`
`230`	`230`	`}`
`231`	`231`
Original file line number	Diff line number	Diff line change
`@@ -21,7 +21,7 @@ extension CSVEncoder {`
`21`	`21`	`self.decimalStrategy = .locale(nil)`
`22`	`22`	`self.dateStrategy = .deferredToDate`
`23`	`23`	`self.dataStrategy = .base64`
`24`		`- self.bufferingStrategy = .unfulfilled`
	`24`	`+ self.bufferingStrategy = .keepAll`
`25`	`25`	`}`
`26`	`26`	`}`
`27`	`27`	`}`