Skip to content

Commit 9deef23

Browse files
committed
Add support for multiple delimiters with more than two scalars per delimiter
1 parent 63cac55 commit 9deef23

File tree

1 file changed

+72
-34
lines changed

1 file changed

+72
-34
lines changed

sources/imperative/reader/internal/ReaderInference.swift

Lines changed: 72 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ extension Delimiter.Scalars {
2424
let delimiters = self.row.sorted { $0.count < $1.count }
2525
let maxScalars = delimiters.last!.count
2626

27-
// For optimization sake, a delimiter proofer is built for s single value scalar.
27+
// For optimization sake, a delimiter proofer is built for a single value scalar.
2828
if maxScalars == 1 {
2929
return { [dels = delimiters.map { $0.first! }] in dels.contains($0) }
3030
// For optimization sake, a delimiter proofer is built for two unicode scalars.
@@ -41,9 +41,35 @@ extension Delimiter.Scalars {
4141
}
4242
}
4343
} else {
44-
return { [storage = Unmanaged.passUnretained(buffer), decoder] (scalar) in
45-
#warning("Finish edge-case implementation")
46-
fatalError()
44+
return { [storage = Unmanaged.passUnretained(buffer), decoder] (firstScalar) in
45+
try storage._withUnsafeGuaranteedRef {
46+
var tmp: [Unicode.Scalar] = Array()
47+
48+
loop: for del in delimiters {
49+
var iterator = del.makeIterator()
50+
guard firstScalar == iterator.next().unsafelyUnwrapped else { continue loop }
51+
52+
var b = 0
53+
while let delimiterScalar = iterator.next() {
54+
let scalar: UnicodeScalar
55+
if tmp.endIndex > b {
56+
scalar = tmp[b]
57+
} else if let decodedScalar = try $0.next() ?? decoder() {
58+
scalar = decodedScalar
59+
tmp.append(scalar)
60+
} else {
61+
break loop
62+
}
63+
64+
guard scalar == delimiterScalar else { continue loop }
65+
b &+= 1
66+
}
67+
return true
68+
}
69+
70+
$0.preppend(scalars: tmp)
71+
return false
72+
}
4773
}
4874
}
4975
}
@@ -55,45 +81,57 @@ private extension Delimiter.Scalars {
5581
/// - parameter buffer: A unicode character buffer containing further characters to parse.
5682
/// - parameter decoder: The instance providing the input `Unicode.Scalar`s.
5783
static func _makeMatcher(delimiter: [Unicode.Scalar], buffer: CSVReader.ScalarBuffer, decoder: @escaping CSVReader.ScalarDecoder) -> Self.Checker {
84+
assert(!delimiter.isEmpty)
85+
let count = delimiter.count
86+
let first = delimiter.first.unsafelyUnwrapped
87+
5888
// For optimizations sake, a delimiter proofer is built for a single unicode scalar.
59-
if delimiter.count == 1 {
60-
return { [s = delimiter[0]] in $0 == s }
61-
// For optimizations sake, a delimiter proofer is built for two unicode scalars.
62-
} else if delimiter.count == 2 {
63-
return { [storage = Unmanaged.passUnretained(buffer), decoder, first = delimiter[0], second = delimiter[1]] in
64-
guard $0 == first else { return false }
89+
if count == 1 {
90+
return { $0 == first }
91+
}
92+
93+
let storage = Unmanaged.passUnretained(buffer)
94+
let second = delimiter[1]
95+
96+
// For optimizations sake, a delimiter proofer is built for two unicode scalars.
97+
if count == 2 {
98+
return { [decoder] in
99+
guard first == $0 else { return false }
65100
return try storage._withUnsafeGuaranteedRef {
66-
guard let nextScalar = try $0.next() ?? decoder() else { return false }
67-
if second == nextScalar { return true }
68-
else { $0.preppend(scalar: nextScalar); return false }
101+
guard let scalar = try $0.next() ?? decoder() else { return false }
102+
guard second == scalar else {
103+
$0.preppend(scalar: scalar)
104+
return false
105+
}
106+
return true
69107
}
70108
}
71-
// For completion sake, a delimiter proofer is build for +2 unicode scalars. CSV files with multiscalar delimiters are very very rare.
72-
} else {
73-
return { [storage = Unmanaged.passUnretained(buffer), decoder] (firstScalar) -> Bool in
74-
try storage._withUnsafeGuaranteedRef {
75-
var scalar = firstScalar
76-
var index = delimiter.startIndex
77-
var toIncludeInBuffer: [Unicode.Scalar] = Array()
78-
79-
while true {
80-
guard scalar == delimiter[index] else {
81-
$0.preppend(scalars: toIncludeInBuffer)
82-
return false
83-
}
109+
}
84110

85-
index = delimiter.index(after: index)
86-
guard index < delimiter.endIndex else { return true }
111+
// For completion sake, a delimiter proofer is build for +2 unicode scalars. CSV files with multiscalar delimiters are very very rare.
112+
let delimiterIterator = delimiter.makeIterator()
113+
return { [decoder] in
114+
var iterator = delimiterIterator
115+
guard iterator.next().unsafelyUnwrapped == $0 else { return false }
87116

88-
guard let nextScalar = try $0.next() ?? decoder() else {
89-
$0.preppend(scalars: toIncludeInBuffer)
90-
return false
91-
}
117+
return try storage._withUnsafeGuaranteedRef {
118+
var tmp: [Unicode.Scalar] = Array()
119+
tmp.reserveCapacity(count)
92120

93-
toIncludeInBuffer.append(nextScalar)
94-
scalar = nextScalar
121+
while let delimiterScalar = iterator.next() {
122+
guard let scalar = try $0.next() ?? decoder() else {
123+
storage._withUnsafeGuaranteedRef { $0.preppend(scalars: tmp) }
124+
return false
125+
}
126+
127+
tmp.append(scalar)
128+
guard scalar == delimiterScalar else {
129+
storage._withUnsafeGuaranteedRef { $0.preppend(scalars: tmp) }
130+
return false
95131
}
96132
}
133+
134+
return true
97135
}
98136
}
99137
}

0 commit comments

Comments
 (0)