Skip to content

Commit 0529475

Browse files
Merge pull request #25 from willtemperley/main
First Arrow integration tests done with Gold files.
2 parents 26a2bac + 5999858 commit 0529475

File tree

119 files changed

+1290
-388
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

119 files changed

+1290
-388
lines changed

Sources/Arrow/Array/Array.swift

Lines changed: 90 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,19 @@
1414

1515
import Foundation
1616

17-
public protocol ArrowArrayProtocol {
18-
associatedtype ItemType
19-
subscript(_ index: Int) -> ItemType? { get }
17+
public protocol AnyArrowArrayProtocol {
2018
var offset: Int { get }
2119
var length: Int { get }
2220
var nullCount: Int { get }
2321
func slice(offset: Int, length: Int) -> Self
2422
func any(at index: Int) -> Any?
2523
var bufferSizes: [Int] { get }
24+
var buffers: [ArrowBufferProtocol] { get }
25+
}
26+
27+
internal protocol ArrowArrayProtocol: AnyArrowArrayProtocol {
28+
associatedtype ItemType
29+
subscript(_ index: Int) -> ItemType? { get }
2630
}
2731

2832
// This exists to support type-erased struct arrays.
@@ -32,12 +36,26 @@ extension ArrowArrayProtocol {
3236
}
3337
}
3438

39+
// MARK: Capability protocols.
40+
41+
public protocol ArrowArrayOfString {
42+
subscript(index: Int) -> String? { get }
43+
}
44+
extension ArrowArrayVariable: ArrowArrayOfString where ItemType == String {}
45+
46+
public protocol ArrowArrayOfData {
47+
subscript(index: Int) -> Data? { get }
48+
}
49+
extension ArrowArrayFixedSizeBinary: ArrowArrayOfData where ItemType == Data {}
50+
extension ArrowArrayVariable: ArrowArrayOfData where ItemType == Data {}
51+
3552
/// An Arrow array of booleans using the three-valued logical model (true / false / null).
3653
public struct ArrowArrayBoolean: ArrowArrayProtocol {
3754
public typealias ItemType = Bool
3855
public let offset: Int
3956
public let length: Int
4057
public var bufferSizes: [Int] { [nullBuffer.length, valueBuffer.length] }
58+
public var buffers: [ArrowBufferProtocol] { [nullBuffer, valueBuffer] }
4159
public var nullCount: Int { nullBuffer.nullCount }
4260
let nullBuffer: NullBuffer
4361
let valueBuffer: NullBuffer
@@ -84,6 +102,7 @@ where
84102
public let offset: Int
85103
public let length: Int
86104
public var bufferSizes: [Int] { [nullBuffer.length, valueBuffer.length] }
105+
public var buffers: [ArrowBufferProtocol] { [nullBuffer, valueBuffer] }
87106
public var nullCount: Int { nullBuffer.nullCount }
88107
let nullBuffer: NullBuffer
89108
let valueBuffer: ValueBuffer
@@ -119,6 +138,54 @@ where
119138
}
120139
}
121140

141+
public struct ArrowArrayFixedSizeBinary<ValueBuffer>: ArrowArrayProtocol
142+
where
143+
ValueBuffer: VariableLengthBufferProtocol<Data>
144+
{
145+
public typealias ItemType = Data
146+
public let offset: Int
147+
public let length: Int
148+
public let byteWidth: Int
149+
150+
public var bufferSizes: [Int] { [nullBuffer.length, valueBuffer.length] }
151+
public var buffers: [ArrowBufferProtocol] { [nullBuffer, valueBuffer] }
152+
153+
public var nullCount: Int { nullBuffer.nullCount }
154+
155+
let nullBuffer: NullBuffer
156+
let valueBuffer: ValueBuffer
157+
158+
public init(
159+
offset: Int = 0,
160+
length: Int,
161+
byteWidth: Int,
162+
nullBuffer: NullBuffer,
163+
valueBuffer: ValueBuffer
164+
) {
165+
self.offset = offset
166+
self.length = length
167+
self.byteWidth = byteWidth
168+
self.nullBuffer = nullBuffer
169+
self.valueBuffer = valueBuffer
170+
}
171+
172+
public subscript(index: Int) -> ValueBuffer.ElementType? {
173+
guard nullBuffer.isSet(index) else { return nil }
174+
let startIndex = index * byteWidth
175+
return valueBuffer.loadVariable(at: startIndex, arrayLength: byteWidth)
176+
}
177+
178+
public func slice(offset: Int, length: Int) -> Self {
179+
.init(
180+
offset: self.offset + offset, // relative to current offset
181+
length: length,
182+
byteWidth: byteWidth,
183+
nullBuffer: nullBuffer,
184+
valueBuffer: valueBuffer
185+
)
186+
}
187+
}
188+
122189
/// An Arrow array of variable-length types.
123190
public struct ArrowArrayVariable<OffsetsBuffer, ValueBuffer>:
124191
ArrowArrayProtocol
@@ -133,6 +200,9 @@ where
133200
public var bufferSizes: [Int] {
134201
[nullBuffer.length, offsetsBuffer.length, valueBuffer.length]
135202
}
203+
public var buffers: [ArrowBufferProtocol] {
204+
[nullBuffer, offsetsBuffer, valueBuffer]
205+
}
136206
public var nullCount: Int { nullBuffer.nullCount }
137207
let nullBuffer: NullBuffer
138208
let offsetsBuffer: OffsetsBuffer
@@ -183,6 +253,7 @@ where
183253
{
184254
public typealias ItemType = Date
185255
public var bufferSizes: [Int] { array.bufferSizes }
256+
public var buffers: [ArrowBufferProtocol] { array.buffers }
186257
public var nullCount: Int { array.nullCount }
187258
public var offset: Int { array.offset }
188259
public var length: Int { array.length }
@@ -212,6 +283,7 @@ where
212283
{
213284
public typealias ItemType = Date
214285
public var bufferSizes: [Int] { array.bufferSizes }
286+
public var buffers: [ArrowBufferProtocol] { array.buffers }
215287
public var nullCount: Int { array.nullCount }
216288
public var offset: Int { array.offset }
217289
public var length: Int { array.length }
@@ -238,13 +310,16 @@ where
238310
public struct ArrowListArray<Element, OffsetsBuffer>: ArrowArrayProtocol
239311
where
240312
OffsetsBuffer: FixedWidthBufferProtocol<Int32>,
241-
Element: ArrowArrayProtocol
313+
Element: AnyArrowArrayProtocol
242314
{
243315
public typealias ItemType = Element
244316
public let offset: Int
245317
public let length: Int
246318
public var bufferSizes: [Int] {
247-
[nullBuffer.length, offsetsBuffer.length, values.length]
319+
[nullBuffer.length, offsetsBuffer.length]
320+
}
321+
public var buffers: [ArrowBufferProtocol] {
322+
[nullBuffer, offsetsBuffer]
248323
}
249324
public var nullCount: Int { nullBuffer.nullCount }
250325
let nullBuffer: NullBuffer
@@ -273,7 +348,6 @@ where
273348
}
274349
let startIndex = offsetsBuffer[offsetIndex]
275350
let endIndex = offsetsBuffer[offsetIndex + 1]
276-
277351
let length = endIndex - startIndex
278352
return values.slice(offset: Int(startIndex), length: Int(length))
279353
}
@@ -292,20 +366,23 @@ where
292366
/// A type-erased wrapper for an Arrow list array.
293367
public struct AnyArrowListArray: ArrowArrayProtocol {
294368

295-
public typealias ItemType = any ArrowArrayProtocol
369+
public typealias ItemType = AnyArrowArrayProtocol
296370
public var bufferSizes: [Int] {
297371
_base.bufferSizes
298372
}
373+
public var buffers: [ArrowBufferProtocol] {
374+
_base.buffers
375+
}
299376

300377
private let _base: any ArrowArrayProtocol
301-
private let _subscriptImpl: (Int) -> (any ArrowArrayProtocol)?
378+
private let _subscriptImpl: (Int) -> AnyArrowArrayProtocol?
302379
private let _sliceImpl: (Int, Int) -> AnyArrowListArray
303380

304381
public let offset: Int
305382
public let length: Int
306383
public var nullCount: Int { _base.nullCount }
307384

308-
public init<Element, OffsetsBuffer>(
385+
init<Element, OffsetsBuffer>(
309386
_ list: ArrowListArray<Element, OffsetsBuffer>
310387
)
311388
where
@@ -319,7 +396,7 @@ public struct AnyArrowListArray: ArrowArrayProtocol {
319396
self._sliceImpl = { AnyArrowListArray(list.slice(offset: $0, length: $1)) }
320397
}
321398

322-
public subscript(index: Int) -> (any ArrowArrayProtocol)? {
399+
public subscript(index: Int) -> AnyArrowArrayProtocol? {
323400
_subscriptImpl(index)
324401
}
325402

@@ -333,16 +410,17 @@ public struct ArrowStructArray: ArrowArrayProtocol {
333410
public typealias ItemType = [String: Any]
334411
public let offset: Int
335412
public let length: Int
336-
public let fields: [(name: String, array: any ArrowArrayProtocol)]
413+
public let fields: [(name: String, array: AnyArrowArrayProtocol)]
337414
public var bufferSizes: [Int] { [nullBuffer.length] }
415+
public var buffers: [ArrowBufferProtocol] { [nullBuffer] }
338416
public var nullCount: Int { nullBuffer.nullCount }
339417
let nullBuffer: NullBuffer
340418

341419
public init(
342420
offset: Int = 0,
343421
length: Int,
344422
nullBuffer: NullBuffer,
345-
fields: [(name: String, array: any ArrowArrayProtocol)]
423+
fields: [(name: String, array: AnyArrowArrayProtocol)]
346424
) {
347425
self.offset = offset
348426
self.length = length
@@ -352,7 +430,6 @@ public struct ArrowStructArray: ArrowArrayProtocol {
352430

353431
public subscript(index: Int) -> ItemType? {
354432
guard nullBuffer.isSet(offset + index) else { return nil }
355-
356433
var result: [String: Any] = [:]
357434
for (name, array) in fields {
358435
result[name] = array.any(at: index)

Sources/Arrow/Array/Builder.swift

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ public class ArrayBuilderBoolean: AnyArrayBuilder {
5858
public func appendNull() {
5959
length += 1
6060
nullBuilder.appendValid(false)
61+
valueBuilder.appendValid(false)
6162
}
6263

6364
public func finish() -> ArrayType {
@@ -113,6 +114,62 @@ public class ArrayBuilderFixedWidth<T: Numeric>: AnyArrayBuilder {
113114
}
114115
}
115116

117+
/// A builder for Arrow arrays holding variable length types.
118+
public class ArrayBuilderFixedSizedBinary:
119+
AnyArrayBuilder
120+
{
121+
public typealias ArrayType = ArrowArrayFixedSizeBinary<
122+
VariableLengthTypeBuffer<Data>
123+
>
124+
125+
var length: Int
126+
let byteWidth: Int
127+
let nullBuilder: NullBufferBuilder
128+
let valueBuilder: VariableLengthTypeBufferBuilder<Data>
129+
let nullValue: Data
130+
131+
public init(byteWidth: Int) {
132+
self.length = 0
133+
self.byteWidth = byteWidth
134+
self.nullBuilder = NullBufferBuilder()
135+
self.valueBuilder = VariableLengthTypeBufferBuilder<Data>()
136+
self.nullValue = Data(repeating: 0, count: byteWidth)
137+
}
138+
139+
public func append(_ value: Data) {
140+
length += 1
141+
nullBuilder.appendValid(true)
142+
precondition(value.count == byteWidth, "Incorrect byte width.")
143+
let requiredCapacity = valueBuilder.length + value.count
144+
if requiredCapacity > valueBuilder.capacity {
145+
var newCapacity = valueBuilder.capacity
146+
while newCapacity < requiredCapacity {
147+
newCapacity *= 2
148+
}
149+
valueBuilder.increaseCapacity(to: newCapacity)
150+
}
151+
valueBuilder.append(value)
152+
}
153+
154+
public func appendNull() {
155+
length += 1
156+
nullBuilder.appendValid(false)
157+
valueBuilder.append(nullValue)
158+
}
159+
160+
public func finish() -> ArrayType {
161+
let nullBuffer = nullBuilder.finish()
162+
let valueBuffer = valueBuilder.finish()
163+
return .init(
164+
offset: 0,
165+
length: length,
166+
byteWidth: byteWidth,
167+
nullBuffer: nullBuffer,
168+
valueBuffer: valueBuffer
169+
)
170+
}
171+
}
172+
116173
/// A builder for Arrow arrays holding variable length types.
117174
public class ArrayBuilderVariableLength<Element: VariableLength>:
118175
AnyArrayBuilder

Sources/Arrow/ArrowWriterX.swift

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -153,10 +153,10 @@ public class ArrowWriter {
153153
withUnsafeBytes(of: continuationMarker.littleEndian) {
154154
writer.append(Data($0))
155155
}
156-
withUnsafeBytes(of: rbResult.1.o.littleEndian) {
156+
withUnsafeBytes(of: UInt32(rbResult.count).littleEndian) {
157157
writer.append(Data($0))
158158
}
159-
writer.append(rbResult.0)
159+
writer.append(rbResult)
160160
addPadForAlignment(&writer)
161161
let metadataLength = writer.count - startIndex
162162
let bodyStart = writer.count
@@ -250,7 +250,7 @@ public class ArrowWriter {
250250

251251
private func writeRecordBatch(
252252
batch: RecordBatchX
253-
) -> Result<(Data, Offset), ArrowError> {
253+
) -> Result<Data, ArrowError> {
254254
let schema = batch.schema
255255
var fbb = FlatBufferBuilder()
256256

@@ -296,7 +296,7 @@ public class ArrowWriter {
296296
FMessage.add(header: recordBatchOffset, &fbb)
297297
let messageOffset = FMessage.endMessage(&fbb, start: startMessage)
298298
fbb.finish(offset: messageOffset)
299-
return .success((fbb.data, Offset(offset: UInt32(fbb.data.count))))
299+
return .success(fbb.data)
300300
}
301301

302302
private func writeRecordBatchData(
@@ -374,7 +374,7 @@ public class ArrowWriter {
374374
case .success(let rbBlocks):
375375
switch writeFooter(schema: info.schema, rbBlocks: rbBlocks) {
376376
case .success(let footerData):
377-
fbb.finish(offset: Offset(offset: fbb.buffer.size))
377+
// fbb.finish(offset: Offset(offset: fbb.buffer.size))
378378
let footerOffset = writer.count
379379
writer.append(footerData)
380380
addPadForAlignment(&writer)
@@ -486,7 +486,7 @@ public class ArrowWriter {
486486
var writer: any DataWriter = InMemDataWriter()
487487
switch writeRecordBatch(batch: batch) {
488488
case .success(let message):
489-
writer.append(message.0)
489+
writer.append(message)
490490
addPadForAlignment(&writer)
491491
var dataWriter: any DataWriter = InMemDataWriter()
492492
switch writeRecordBatchData(

0 commit comments

Comments
 (0)