Skip to content

Commit fabe684

Browse files
committed
Unicode scalar encoder added
1 parent 7052679 commit fabe684

File tree

9 files changed

+287
-186
lines changed

9 files changed

+287
-186
lines changed

Sources/Active/Reader/ReaderDecoder.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import Foundation
22

33
internal extension CSVReader {
4-
/// Function where each time that is executed it generates a new scalar (from the input data), it throws an error, or returns `nil` indicating the end of the file.
4+
/// Closure where each time that is executed it generates a new scalar (from the input data), it throws an error, or returns `nil` indicating the end of the file.
55
typealias ScalarDecoder = () throws -> Unicode.Scalar?
66

77
/// Creates a custom `Unicode.Scalar` iterator wrapping a simple scalar iterator (usually a `String.UnicodeScalarView.Iterator`).

Sources/Active/Writer/Writer.swift

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ public final class CSVWriter {
66
public let configuration: Configuration
77
/// Internal writer settings extracted from the public `configuration` and other values inferred during initialization.
88
private let settings: Settings
9-
// /// Encoder used to transform unicode scalars into a bunch of bytes.
10-
// private let encoder: Unicode.Scalar.Encoder
9+
/// Encoder used to transform unicode scalars into a bunch of bytes and store them in the result
10+
private let encoder: ScalarEncoder
1111
// /// Unicode scalar buffer to keep scalars that hasn't yet been analysed.
1212
// private let buffer: ScalarBuffer
1313
// /// Check whether the given unicode scalar is part of the field delimiter sequence.
@@ -23,26 +23,28 @@ public final class CSVWriter {
2323
// /// The writer state indicating whether it has already begun working or it is idle.
2424
// private var state: (file: State.File, row: State.Row)
2525

26-
init() {
27-
fatalError()
26+
/// Designated initializer that will set up the CSV writer.
27+
///
28+
/// To start "writing", call `beginFile()` after this initializer.
29+
/// ```swift
30+
/// let writer = try CSVWriter(output: (stream, true), configuration: config, encoder: transformer)
31+
/// try writer.beginFile(bom: ..., writeHeaders: true)
32+
/// try writer.write(field: "Coco")
33+
/// try writer.write(field: "Dog")
34+
/// try writer.write(field: "2")
35+
/// try writer.endRow()
36+
/// try writer.endFile()
37+
/// ```
38+
/// - parameter output: The output stream on where to write the encoded rows/fields.
39+
/// - parameter configuration: The configurations for the CSV writer.
40+
/// - parameter encoder: The function transforming unicode scalars into the desired binary representation.
41+
/// - throws: `CSVError<CSVWriter>` exclusively.
42+
init(configuration: Configuration, encoder: @escaping ScalarEncoder) throws {
43+
self.configuration = configuration
44+
self.settings = try Settings(configuration: configuration)
45+
self.encoder = encoder
2846
}
2947

30-
// /// Designated initializer that will set up the CSV writer.
31-
// ///
32-
// /// To start "writing", call `beginFile()` after this initializer.
33-
// /// ```swift
34-
// /// let writer = try CSVWriter(output: (stream, true), configuration: config, encoder: transformer)
35-
// /// try writer.beginFile(bom: ..., writeHeaders: true)
36-
// /// try writer.write(field: "Coco")
37-
// /// try writer.write(field: "Dog")
38-
// /// try writer.write(field: "2")
39-
// /// try writer.endRow()
40-
// /// try writer.endFile()
41-
// /// ```
42-
// /// - parameter output: The output stream on where to write the encoded rows/fields.
43-
// /// - parameter configuration: The configurations for the CSV writer.
44-
// /// - parameter encoder: The function transforming unicode scalars into the desired binary representation.
45-
// /// - throws: `CSVWriter.Error` exclusively.
4648
// internal init(output: (stream: OutputStream, closeAtEnd: Bool), configuration: Configuration, encoder: @escaping Unicode.Scalar.Encoder) throws {
4749
// self.settings = try Settings(configuration: configuration)
4850
//

Sources/Active/Writer/WriterAPI.swift

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,14 @@ extension CSVWriter {
66
/// - parameter rows: A sequence of rows whose elements are sequences of `String`-like elements.
77
/// - parameter configuration: Configuration values specifying how the CSV output should look like.
88
/// - throws: `CSVError<CSVWriter>` exclusively.
9-
@inlinable public convenience init<S:Sequence,Sub:Sequence>(output type: String.Type, rows: S, configuration: Configuration = .init()) throws where S.Element==Sub, Sub.Element:StringProtocol {
9+
public convenience init<S:Sequence,Sub:Sequence>(output type: String.Type, rows: S, configuration: Configuration = .init()) throws where S.Element==Sub, Sub.Element:StringProtocol {
10+
let (encoding, bom) = try String.Encoding.selectFrom(provided: configuration.encoding, inferred: nil, serializeBOM: configuration.serializeBOM)
11+
12+
let stream = OutputStream(toMemory: ())
13+
#warning("Open the stream")
14+
15+
let encoder = try CSVWriter.makeEncoder(from: stream, encoding: encoding, firstBytes: bom)
16+
let writer = try CSVWriter(configuration: configuration, encoder: encoder)
1017
fatalError()
1118
}
1219

Sources/Active/Writer/WriterConfiguration.swift

Lines changed: 4 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -35,16 +35,12 @@ extension CSVWriter {
3535
let headers: [String]
3636
/// The unicode scalar used as encapsulator and escaping character (when printed two times).
3737
let escapingScalar: Unicode.Scalar = "\""
38-
/// The bytes representing the BOM encoding. If empty, no bytes will be written.
39-
let bom: [UInt8]
4038

4139
/// Designated initializer taking generic CSV configuration (with possible unknown data) and making it specific to a CSV writer instance.
4240
/// - parameter configuration: The public CSV writer configuration variables.
4341
/// - throws: `CSVWriter.Error` exclusively.
44-
init(configuration: CSVWriter.Configuration, fileEncoding: String.Encoding?) throws {
45-
// 1. Copy headers.
46-
self.headers = configuration.headers
47-
// 2. Validate the delimiters.
42+
init(configuration: CSVWriter.Configuration) throws {
43+
// 1. Validate the delimiters.
4844
let (field, row) = (configuration.delimiters.field.rawValue, configuration.delimiters.row.rawValue)
4945
if field.isEmpty || row.isEmpty {
5046
throw Error.invalidEmptyDelimiter()
@@ -53,41 +49,13 @@ extension CSVWriter {
5349
} else {
5450
self.delimiters = (field, row)
5551
}
56-
// 3. Set up the right BOM.
57-
let encoding: String.Encoding
58-
switch (configuration.encoding, fileEncoding) {
59-
case (let e?, nil): encoding = e
60-
case (nil, let e?): encoding = e
61-
case (nil, nil): encoding = .utf8
62-
case (let lhs?, let rhs?) where lhs == rhs: encoding = lhs
63-
case (let lhs?, let rhs?): throw Error.invalidEncoding(provided: lhs, file: rhs)
64-
}
65-
66-
switch (configuration.serializeBOM, encoding) {
67-
case (.always, .utf8): self.bom = BOM.UTF8
68-
case (.always, .utf16LittleEndian): self.bom = BOM.UTF16.littleEndian
69-
case (.always, .utf16BigEndian),
70-
(.always, .utf16), (.standard, .utf16),
71-
(.always, .unicode), (.standard, .unicode): self.bom = BOM.UTF16.bigEndian
72-
case (.always, .utf32LittleEndian): self.bom = BOM.UTF32.littleEndian
73-
case (.always, .utf32BigEndian),
74-
(.always, .utf32), (.standard, .utf32): self.bom = BOM.UTF32.bigEndian
75-
default: self.bom = .init()
76-
}
52+
// 2. Copy headers.
53+
self.headers = configuration.headers
7754
}
7855
}
7956
}
8057

8158
fileprivate extension CSVWriter.Error {
82-
/// Error raised when the provided string encoding is different than the inferred file encoding.
83-
/// - parameter provided: The string encoding provided by the user.
84-
/// - parameter file: The string encoding in the targeted file.
85-
static func invalidEncoding(provided: String.Encoding, file: String.Encoding) -> CSVError<CSVWriter> {
86-
.init(.invalidConfiguration,
87-
reason: "The encoding provided was different than the encoding detected on the file.",
88-
help: "Set the configuration encoding to nil or to the file encoding.",
89-
userInfo: ["Provided encoding": provided, "File encoding": file])
90-
}
9159
/// Error raised when the the field or/and row delimiters are empty.
9260
/// - parameter delimiter: The indicated field and row delimiters.
9361
static func invalidEmptyDelimiter() -> CSVError<CSVWriter> {
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
import Foundation
2+
3+
internal extension CSVWriter {
4+
/// Closure where each time that is executed a scalar will be stored on the final output.
5+
typealias ScalarEncoder = (Unicode.Scalar) throws -> Void
6+
7+
/// Creates an encoder that take a `Unicode.Scalar` and store the correct byte representation on the appropriate place.
8+
/// - parameter stream: Output stream receiving the encoded data.
9+
/// - parameter encoding: The string encoding being used for the external representation.
10+
/// - parameter firstBytes: Bytes to be preppended at the beggining of the stream.
11+
static func makeEncoder(from stream: OutputStream, encoding: String.Encoding, firstBytes: [UInt8]) throws -> ScalarEncoder {
12+
if !firstBytes.isEmpty {
13+
try CSVWriter.lowlevelWriter(on: stream, bytes: firstBytes, count: firstBytes.count)
14+
}
15+
16+
switch encoding {
17+
case .ascii:
18+
return { [unowned stream] (scalar) in
19+
guard var byte = Unicode.ASCII.encode(scalar)?.first else { throw Error.invalidASCII(scalar: scalar) }
20+
try CSVWriter.lowlevelWriter(on: stream, bytes: &byte, count: 1)
21+
}
22+
case .utf8:
23+
return { [unowned stream] (scalar) in
24+
guard let bytes = Unicode.UTF8.encode(scalar),
25+
let _ = try bytes.withContiguousStorageIfAvailable({ try CSVWriter.lowlevelWriter(on: stream, bytes: $0.baseAddress!, count: bytes.count) }) else {
26+
throw Error.invalidUTF8(scalar: scalar)
27+
}
28+
}
29+
case .utf16BigEndian, .utf16, .unicode: // UTF16 & Unicode imply: follow the BOM and if it is not there, assume big endian.
30+
return { [unowned stream] (scalar) in
31+
guard let tmp = Unicode.UTF16.encode(scalar) else { throw Error.invalidUTF16(scalar: scalar) }
32+
let bytes = tmp.flatMap {
33+
[UInt8(truncatingIfNeeded: $0 >> 8),
34+
UInt8(truncatingIfNeeded: $0)]
35+
}
36+
try CSVWriter.lowlevelWriter(on: stream, bytes: bytes, count: bytes.count)
37+
}
38+
case .utf16LittleEndian:
39+
return { [unowned stream] (scalar) in
40+
guard let tmp = Unicode.UTF16.encode(scalar) else { throw Error.invalidUTF16(scalar: scalar) }
41+
let bytes = tmp.flatMap {
42+
[UInt8(truncatingIfNeeded: $0),
43+
UInt8(truncatingIfNeeded: $0 >> 8)]
44+
}
45+
try CSVWriter.lowlevelWriter(on: stream, bytes: bytes, count: bytes.count)
46+
}
47+
case .utf32BigEndian, .utf32:
48+
return { [unowned stream] (scalar) in
49+
guard let tmp = Unicode.UTF32.encode(scalar) else { throw Error.invalidUTF32(scalar: scalar) }
50+
let bytes = tmp.flatMap {
51+
[UInt8(truncatingIfNeeded: $0 >> 24),
52+
UInt8(truncatingIfNeeded: $0 >> 16),
53+
UInt8(truncatingIfNeeded: $0 >> 8),
54+
UInt8(truncatingIfNeeded: $0)]
55+
}
56+
try CSVWriter.lowlevelWriter(on: stream, bytes: bytes, count: bytes.count)
57+
}
58+
case .utf32LittleEndian:
59+
return { [unowned stream] (scalar) in
60+
guard let tmp = Unicode.UTF32.encode(scalar) else { throw Error.invalidUTF32(scalar: scalar) }
61+
let bytes = tmp.flatMap {
62+
[UInt8(truncatingIfNeeded: $0),
63+
UInt8(truncatingIfNeeded: $0 >> 8),
64+
UInt8(truncatingIfNeeded: $0 >> 16),
65+
UInt8(truncatingIfNeeded: $0 >> 24)]
66+
}
67+
try CSVWriter.lowlevelWriter(on: stream, bytes: bytes, count: bytes.count)
68+
}
69+
default: throw Error.unsupported(encoding: encoding)
70+
}
71+
}
72+
}
73+
74+
fileprivate extension CSVWriter {
75+
/// Writes on the stream the given bytes.
76+
static func lowlevelWriter(on stream: OutputStream, bytes: UnsafePointer<UInt8>, count: Int, attempts: Int = 2) throws {
77+
var (distance, remainingAttempts) = (0, attempts)
78+
79+
repeat {
80+
let written = stream.write(bytes.advanced(by: distance), maxLength: count - distance)
81+
82+
if written > 0 {
83+
distance += written
84+
} else if written == 0 {
85+
remainingAttempts -= 1
86+
guard remainingAttempts > 0 else {
87+
throw Error.streamEmptyWrite(underlyingError: stream.streamError, status: stream.streamStatus, numAttempts: attempts)
88+
}
89+
} else {
90+
throw Error.streamFailed(underlyingError: stream.streamError, status: stream.streamStatus)
91+
}
92+
} while distance < count
93+
}
94+
}
95+
96+
fileprivate extension CSVWriter.Error {
97+
/// The given `String.Encoding` is not yet supported by the library.
98+
/// - parameter encoding: The desired byte representatoion.
99+
static func unsupported(encoding: String.Encoding) -> CSVError<CSVWriter> {
100+
.init(.invalidConfiguration,
101+
reason: "The given encoding is not yet supported by this library",
102+
help: "Contact the library maintainer",
103+
userInfo: ["Encoding": encoding])
104+
}
105+
/// Error raised when a Unicode scalar is an invalid ASCII character.
106+
/// - parameter byte: The byte being decoded from the input data.
107+
static func invalidASCII(scalar: Unicode.Scalar) -> CSVError<CSVReader> {
108+
.init(.invalidInput,
109+
reason: "The Unicode Scalar is not an ASCII character.",
110+
help: "Make sure the CSV only contains ASCII characters or select a different encoding (e.g. UTF8).",
111+
userInfo: ["Unicode scalar": scalar])
112+
}
113+
/// Error raised when a UTF8 character cannot be constructed from a Unicode scalar value.
114+
static func invalidUTF8(scalar: Unicode.Scalar) -> CSVError<CSVReader> {
115+
.init(.invalidInput,
116+
reason: "The Unicode Scalar couldn't be decoded as UTF8 characters",
117+
help: "Make sure the CSV only contains UTF8 characters or select a different encoding.",
118+
userInfo: ["Unicode scalar": scalar])
119+
}
120+
/// Error raised when a UTF16 character cannot be constructed from a Unicode scalar value.
121+
static func invalidUTF16(scalar: Unicode.Scalar) -> CSVError<CSVReader> {
122+
.init(.invalidInput,
123+
reason: "The Unicode Scalar couldn't be decoded as multibyte UTF16",
124+
help: "Make sure the CSV only contains UTF16 characters.",
125+
userInfo: ["Unicode scalar": scalar])
126+
}
127+
/// Error raised when a UTF32 character cannot be constructed from a Unicode scalar value.
128+
static func invalidUTF32(scalar: Unicode.Scalar) -> CSVError<CSVReader> {
129+
.init(.invalidInput,
130+
reason: "The Unicode Scalar couldn't be decoded as multibyte UTF32",
131+
help: "Make sure the CSV only contains UTF32 characters.",
132+
userInfo: ["Unicode scalar": scalar])
133+
}
134+
///
135+
static func streamFailed(underlyingError: Swift.Error?, status: Stream.Status) -> CSVError<CSVWriter> {
136+
.init(.streamFailure, underlying: underlyingError,
137+
reason: "The output stream encountered an error while trying to write encoded bytes",
138+
help: "Review the underlying error and make sure you have access to the output data (if it is a file)",
139+
userInfo: ["Status": status])
140+
}
141+
///
142+
static func streamEmptyWrite(underlyingError: Swift.Error?, status: Stream.Status, numAttempts: Int) -> CSVError<CSVWriter> {
143+
.init(.streamFailure, underlying: underlyingError,
144+
reason: "Several attempts were made to write on the stream, but they were unsuccessful.",
145+
help: "Review the underlying error (if any) and try again.",
146+
userInfo: ["Status": status, "Attempts": numAttempts])
147+
}
148+
}

0 commit comments

Comments
 (0)