Skip to content

Commit 14f9a32

Browse files
committed
Handle both LF and CRLF #37
1 parent f0c72b3 commit 14f9a32

14 files changed

+268
-197
lines changed

sources/Delimiter.swift

Lines changed: 92 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,60 +2,132 @@
22
public enum Delimiter {
33
/// The CSV pair of delimiters (field & row delimiters).
44
public typealias Pair = (field: Self.Field, row: Self.Row)
5-
/// The CSV pair of delimiter in string format.
6-
internal typealias RawPair = (field: [Unicode.Scalar], row: [Unicode.Scalar])
75
}
86

97
extension Delimiter {
108
/// The delimiter between fields/values.
11-
public struct Field: ExpressibleByNilLiteral, ExpressibleByStringLiteral, RawRepresentable {
12-
public let rawValue: String.UnicodeScalarView
9+
///
10+
/// If the delimiter is initialized with `nil`, it implies the field delimiter is unknown and the system should try to figure it out.
11+
public struct Field: ExpressibleByNilLiteral, ExpressibleByStringLiteral, CustomStringConvertible {
12+
/// The accepted field delimiter. Usually a comma `,`.
13+
///
14+
/// If it's empty, the field delimiter is unknown.
15+
internal let scalars: [Unicode.Scalar]
1316

1417
public init(nilLiteral: ()) {
15-
self.rawValue = .init()
18+
self.scalars = Array()
1619
}
1720

1821
public init(unicodeScalarLiteral value: Unicode.Scalar) {
19-
self.rawValue = .init(repeating: value, count: 1)
22+
self.scalars = [value]
2023
}
2124

2225
public init(stringLiteral value: String) {
23-
self.rawValue = value.unicodeScalars
26+
precondition(!value.isEmpty)
27+
self.scalars = Array(value.unicodeScalars)
2428
}
2529

26-
public init?(rawValue: String.UnicodeScalarView) {
27-
self.rawValue = rawValue
30+
/// The field delimiter is represented by the given `String`-like type.
31+
/// - parameter delimiter: The exact composition of the field delimiter. If empty, the initializer fails returning `nil`.
32+
public init?<S:StringProtocol>(_ delimiter: S) {
33+
guard !delimiter.isEmpty else { return nil }
34+
self.scalars = Array(delimiter.unicodeScalars)
2835
}
2936

30-
public init<S:StringProtocol>(_ value: S) {
31-
self.rawValue = String.UnicodeScalarView(value.unicodeScalars)
37+
/// Boolean indicating if the exact unicode scalar composition for the field delimiter is known or unknown.
38+
internal var isKnown: Bool {
39+
!self.scalars.isEmpty
40+
}
41+
42+
/// Returns the `String` representation of the field delimiter.
43+
public var description: String {
44+
String(String.UnicodeScalarView(self.scalars))
3245
}
3346
}
3447
}
3548

3649
extension Delimiter {
3750
/// The delimiter between rows.
38-
public struct Row: ExpressibleByNilLiteral, ExpressibleByStringLiteral, RawRepresentable {
39-
public let rawValue: String.UnicodeScalarView
51+
///
52+
/// If the delimiter is initialized with `nil`, it implies the row delimiter is unknown and the system should try to figure it out.
53+
public struct Row: ExpressibleByStringLiteral, ExpressibleByNilLiteral, CustomStringConvertible {
54+
/// All the accepted row delimiters. Usually, it is only one.
55+
/// - invariant: The elements of the set (i.e. the arrays) always contain at least one element.
56+
internal let scalars: Set<[Unicode.Scalar]>
57+
58+
/// Specifies two row delimiters: CR (Carriage Return) LF (Line Feed) `\r\n` and s single line feed `\n`.
59+
///
60+
/// This delimiter is intended to be used with CSVs where the end of the row may be marked with a CRLF sometimes and other times with LF.
61+
public static var standard: Self {
62+
self.init("\n", "\r\n")!
63+
}
4064

4165
public init(nilLiteral: ()) {
42-
self.rawValue = .init()
66+
self.scalars = Set()
4367
}
4468

4569
public init(unicodeScalarLiteral value: Unicode.Scalar) {
46-
self.rawValue = .init(repeating: value, count: 1)
70+
var delimiters = Set<[Unicode.Scalar]>(minimumCapacity: 1)
71+
delimiters.insert([value])
72+
self.scalars = delimiters
4773
}
4874

4975
public init(stringLiteral value: String) {
50-
self.rawValue = value.unicodeScalars
76+
precondition(!value.isEmpty)
77+
78+
var delimiters = Set<[Unicode.Scalar]>(minimumCapacity: 1)
79+
delimiters.insert(Array(value.unicodeScalars))
80+
self.scalars = delimiters
81+
}
82+
83+
/// Creates one or more possible row delimiters.
84+
/// - parameter delimiters:The exact composition of the row delimiters. If any of the `delimiters` is empty, the initializer fails returning `nil`.
85+
public init?<S:StringProtocol>(_ delimiters: S...) {
86+
let scalars: [[Unicode.Scalar]] = delimiters.compactMap {
87+
guard !$0.isEmpty else { return nil }
88+
return Array($0.unicodeScalars)
89+
}
90+
guard !scalars.isEmpty else { return nil }
91+
self.scalars = Set(scalars)
5192
}
5293

53-
public init?(rawValue: String.UnicodeScalarView) {
54-
self.rawValue = rawValue
94+
/// Boolean indicating if the exact unicode scalar composition for the row delimiter is known or unknown.
95+
internal var isKnown: Bool {
96+
!self.scalars.isEmpty
5597
}
5698

57-
public init<S:StringProtocol>(_ value: S) {
58-
self.rawValue = String.UnicodeScalarView(value.unicodeScalars)
99+
/// Returns the `String` representation of the row delimiter.
100+
///
101+
/// If more than one row has been provided, the `String` with less number of characters and less value (i.e. less Integer value) is selected.
102+
public var description: String {
103+
String(String.UnicodeScalarView(self.scalars.min {
104+
guard $0.count == $1.count else { return $0.count < $1.count }
105+
for (lhs, rhs) in zip($0, $1) where lhs != rhs { return lhs < rhs }
106+
return true
107+
}!))
108+
}
109+
}
110+
}
111+
112+
internal extension Delimiter {
113+
/// Contains the exact composition of a CSV field and row delimiter.
114+
struct Scalars {
115+
/// The exact composition of unicode scalars indetifying a field delimiter.
116+
/// - invariant: The array always contains at least one element.
117+
let field: [Unicode.Scalar]
118+
/// All possile row delimiters specifying its exact compositon of unicode scalars.
119+
/// - invariant: The set always contains at least one element and all set elements always contain at least on scalar.
120+
let row: Set<[Unicode.Scalar]>
121+
122+
/// Designated initializer checking that the delimiters aren't empty and the field delimiter is not included in the row delimiter.
123+
/// - parameter field: The exact composition of the field delimiter. If empty, `nil` is returned.
124+
/// - parameter row: The exact composition of all possible row delimiters. If it is empty or any of its elements is an empty array, `nil` is returned.
125+
init?(field: [Unicode.Scalar], row: Set<[Unicode.Scalar]>) {
126+
guard !field.isEmpty else { return nil }
127+
self.field = field
128+
guard !row.isEmpty, row.allSatisfy({ !$0.isEmpty }) else { return nil }
129+
self.row = row
130+
guard self.row.allSatisfy({ $0 != self.field }) else { return nil }
59131
}
60132
}
61133
}

sources/Deprecated.swift

Lines changed: 0 additions & 22 deletions
This file was deleted.

sources/imperative/reader/Reader.swift

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ public final class CSVReader: IteratorProtocol, Sequence {
1919
/// The unicode scalar decoder providing all input data.
2020
private let _decoder: ScalarDecoder
2121
/// Check whether the given unicode scalar is part of the field delimiter sequence.
22-
private let _isFieldDelimiter: DelimiterChecker
22+
private let _isFieldDelimiter: Delimiter.Scalars.Checker
2323
/// Check whether the given unicode scalar is par of the row delimiter sequence.
24-
private let _isRowDelimiter: DelimiterChecker
24+
private let _isRowDelimiter: Delimiter.Scalars.Checker
2525
/// The amount of rows (counting the header row) that have been read and the amount of fields that should be in each row.
2626
internal private(set) var count: (rows: Int, fields: Int)
2727
/// The reader status indicating whether there are remaning lines to read, the CSV has been completely parsed, or an error occurred and no further operation shall be performed.
@@ -40,8 +40,8 @@ public final class CSVReader: IteratorProtocol, Sequence {
4040
self._fieldBuffer = .init()
4141
self._fieldBuffer.reserveCapacity(128)
4242
self._decoder = decoder
43-
self._isFieldDelimiter = CSVReader.makeMatcher(delimiter: self._settings.delimiters.field, buffer: self._scalarBuffer, decoder: self._decoder)
44-
self._isRowDelimiter = CSVReader.makeMatcher(delimiter: self._settings.delimiters.row, buffer: self._scalarBuffer, decoder: self._decoder)
43+
self._isFieldDelimiter = self._settings.delimiters.makeFieldMatcher(buffer: self._scalarBuffer, decoder: self._decoder)
44+
self._isRowDelimiter = self._settings.delimiters.makeRowMatcher(buffer: self._scalarBuffer, decoder: self._decoder)
4545
self.count = (0, 0)
4646
self.status = .active
4747

@@ -328,7 +328,7 @@ fileprivate extension CSVReader.Error {
328328
static func _invalidUnescapedField(rowIndex: Int) -> CSVError<CSVReader> {
329329
.init(.invalidInput,
330330
reason: "The escaping scalar (double quotes by default) is not allowed within fields which aren't already escaped.",
331-
help: "Add the escaping scalar add the very beginning and the very end of the field and escape the escaping scalar found within the field.",
331+
help: "Add the escaping scalar at the very beginning and the very end of the field and escape the escaping scalar found within the field.",
332332
userInfo: ["Row index": rowIndex])
333333
}
334334
/// Error raised when an EOF has been received but the last CSV field was not finalized.

0 commit comments

Comments
 (0)