Skip to content

Commit c4938e3

Browse files
Merge pull request #27 from willtemperley/main
Use JSON testing approach for Arrow gold files.
2 parents 5d97e84 + 8724026 commit c4938e3

23 files changed

+1392
-327
lines changed

Sources/Arrow/Array/Array.swift

Lines changed: 110 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -38,26 +38,30 @@ extension ArrowArrayProtocol {
3838

3939
// MARK: Capability protocols.
4040

41-
public protocol ArrowArrayOfString {
41+
public protocol StringArrayProtocol {
42+
var length: Int { get }
4243
subscript(index: Int) -> String? { get }
4344
}
44-
extension ArrowArrayVariable: ArrowArrayOfString where ItemType == String {}
45+
extension ArrowArrayVariable: StringArrayProtocol where ItemType == String {}
4546

46-
public protocol ArrowArrayOfData {
47-
subscript(index: Int) -> Data? { get }
48-
}
49-
extension ArrowArrayFixedSizeBinary: ArrowArrayOfData where ItemType == Data {}
50-
extension ArrowArrayVariable: ArrowArrayOfData where ItemType == Data {}
47+
protocol BinaryArrayProtocol: ArrowArrayProtocol where ItemType == Data {}
48+
extension ArrowArrayFixedSizeBinary: BinaryArrayProtocol {}
49+
extension ArrowArrayVariable: BinaryArrayProtocol
50+
where ItemType == Data, OffsetType: FixedWidthInteger & SignedInteger {}
5151

52-
public protocol ArrowArrayOfInt8 {
53-
subscript(index: Int) -> Int8? { get }
54-
}
55-
extension ArrowArrayFixed: ArrowArrayOfInt8 where ItemType == Int8 {}
52+
protocol Utf8ArrayProtocol: ArrowArrayProtocol where ItemType == String {}
53+
extension ArrowArrayVariable: Utf8ArrayProtocol
54+
where ItemType == String, OffsetType: FixedWidthInteger & SignedInteger {}
5655

57-
public protocol ArrowArrayOfInt32 {
58-
subscript(index: Int) -> Int32? { get }
56+
public protocol ListArrayProtocol {
57+
var length: Int { get }
58+
var values: AnyArrowArrayProtocol { get }
59+
subscript(index: Int) -> AnyArrowArrayProtocol? { get }
5960
}
60-
extension ArrowArrayFixed: ArrowArrayOfInt32 where ItemType == Int32 {}
61+
extension ArrowListArray: ListArrayProtocol {}
62+
extension ArrowFixedSizeListArray: ListArrayProtocol {}
63+
64+
// MARK: Array implementations.
6165

6266
/// An Arrow array of booleans using the three-valued logical model (true / false / null).
6367
public struct ArrowArrayBoolean: ArrowArrayProtocol {
@@ -102,34 +106,32 @@ public struct ArrowArrayBoolean: ArrowArrayProtocol {
102106
}
103107

104108
/// An Arrow array of fixed-width types.
105-
public struct ArrowArrayFixed<ValueBuffer>: ArrowArrayProtocol
106-
where
107-
ValueBuffer: FixedWidthBufferProtocol,
108-
ValueBuffer.ElementType: Numeric
109+
public struct ArrowArrayNumeric<ItemType: Numeric & BitwiseCopyable>:
110+
ArrowArrayProtocol
109111
{
110-
111-
public typealias ItemType = ValueBuffer.ElementType
112112
public let offset: Int
113113
public let length: Int
114+
public var nullCount: Int { nullBuffer.nullCount }
114115
public var bufferSizes: [Int] { [nullBuffer.length, valueBuffer.length] }
115116
public var buffers: [ArrowBufferProtocol] { [nullBuffer, valueBuffer] }
116-
public var nullCount: Int { nullBuffer.nullCount }
117+
117118
let nullBuffer: NullBuffer
118-
let valueBuffer: ValueBuffer
119+
private let valueBuffer: any FixedWidthBufferProtocol<ItemType>
119120

120-
public init(
121+
// Initialize from concrete buffer type
122+
public init<ValueBuffer: FixedWidthBufferProtocol>(
121123
offset: Int = 0,
122124
length: Int,
123125
nullBuffer: NullBuffer,
124126
valueBuffer: ValueBuffer
125-
) {
127+
) where ValueBuffer.ElementType == ItemType {
126128
self.offset = offset
127129
self.length = length
128130
self.nullBuffer = nullBuffer
129131
self.valueBuffer = valueBuffer
130132
}
131133

132-
public subscript(index: Int) -> ValueBuffer.ElementType? {
134+
public subscript(index: Int) -> ItemType? {
133135
precondition(index >= 0 && index < length, "Invalid index.")
134136
let offsetIndex = self.offset + index
135137
if !self.nullBuffer.isSet(offsetIndex) {
@@ -148,10 +150,7 @@ where
148150
}
149151
}
150152

151-
public struct ArrowArrayFixedSizeBinary<ValueBuffer>: ArrowArrayProtocol
152-
where
153-
ValueBuffer: VariableLengthBufferProtocol<Data>
154-
{
153+
public struct ArrowArrayFixedSizeBinary: ArrowArrayProtocol {
155154
public typealias ItemType = Data
156155
public let offset: Int
157156
public let length: Int
@@ -163,14 +162,14 @@ where
163162
public var nullCount: Int { nullBuffer.nullCount }
164163

165164
let nullBuffer: NullBuffer
166-
let valueBuffer: ValueBuffer
165+
let valueBuffer: any VariableLengthBufferProtocol<Data>
167166

168167
public init(
169168
offset: Int = 0,
170169
length: Int,
171170
byteWidth: Int,
172171
nullBuffer: NullBuffer,
173-
valueBuffer: ValueBuffer
172+
valueBuffer: any VariableLengthBufferProtocol<Data>
174173
) {
175174
self.offset = offset
176175
self.length = length
@@ -179,7 +178,7 @@ where
179178
self.valueBuffer = valueBuffer
180179
}
181180

182-
public subscript(index: Int) -> ValueBuffer.ElementType? {
181+
public subscript(index: Int) -> ItemType? {
183182
guard nullBuffer.isSet(index) else { return nil }
184183
let startIndex = index * byteWidth
185184
return valueBuffer.loadVariable(at: startIndex, arrayLength: byteWidth)
@@ -197,51 +196,56 @@ where
197196
}
198197

199198
/// An Arrow array of variable-length types.
200-
public struct ArrowArrayVariable<OffsetsBuffer, ValueBuffer>:
201-
ArrowArrayProtocol
202-
where
203-
OffsetsBuffer: FixedWidthBufferProtocol<Int32>,
204-
ValueBuffer: VariableLengthBufferProtocol<ValueBuffer.ElementType>,
205-
ValueBuffer.ElementType: VariableLength
206-
{
207-
public typealias ItemType = ValueBuffer.ElementType
199+
public struct ArrowArrayVariable<
200+
ItemType: VariableLength,
201+
OffsetType: FixedWidthInteger & SignedInteger
202+
>: ArrowArrayProtocol {
208203
public let offset: Int
209204
public let length: Int
205+
private let nullBuffer: NullBuffer
206+
private let offsetsBuffer: any FixedWidthBufferProtocol<OffsetType>
207+
private let valueBuffer: any VariableLengthBufferProtocol<ItemType>
208+
210209
public var bufferSizes: [Int] {
211210
[nullBuffer.length, offsetsBuffer.length, valueBuffer.length]
212211
}
212+
213213
public var buffers: [ArrowBufferProtocol] {
214214
[nullBuffer, offsetsBuffer, valueBuffer]
215215
}
216+
216217
public var nullCount: Int { nullBuffer.nullCount }
217-
let nullBuffer: NullBuffer
218-
let offsetsBuffer: OffsetsBuffer
219-
let valueBuffer: ValueBuffer
220218

221-
public init(
219+
public init<
220+
Offsets: FixedWidthBufferProtocol<OffsetType>,
221+
Values: VariableLengthBufferProtocol
222+
>(
222223
offset: Int = 0,
223224
length: Int,
224225
nullBuffer: NullBuffer,
225-
offsetsBuffer: OffsetsBuffer,
226-
valueBuffer: ValueBuffer
227-
) {
226+
offsetsBuffer: Offsets,
227+
valueBuffer: Values
228+
) where Values.ElementType == ItemType {
228229
self.offset = offset
229230
self.length = length
230231
self.nullBuffer = nullBuffer
231232
self.offsetsBuffer = offsetsBuffer
232233
self.valueBuffer = valueBuffer
233234
}
234235

235-
public subscript(index: Int) -> ValueBuffer.ElementType? {
236+
public subscript(index: Int) -> ItemType? {
236237
let offsetIndex = self.offset + index
237-
if !self.nullBuffer.isSet(offsetIndex) {
238+
guard self.nullBuffer.isSet(offsetIndex) else {
238239
return nil
239240
}
240-
let startIndex = offsetsBuffer[offsetIndex]
241-
let endIndex = offsetsBuffer[offsetIndex + 1]
241+
242+
// Use runtime dispatch through the existential
243+
let startOffset = offsetsBuffer[offsetIndex]
244+
let endOffset = offsetsBuffer[offsetIndex + 1]
245+
242246
return valueBuffer.loadVariable(
243-
at: Int(startIndex),
244-
arrayLength: Int(endIndex - startIndex)
247+
at: Int(startOffset),
248+
arrayLength: Int(endOffset - startOffset)
245249
)
246250
}
247251

@@ -257,17 +261,14 @@ where
257261
}
258262

259263
/// An Arrow array of `Date`s with a resolution of 1 day.
260-
public struct ArrowArrayDate32<ValueBuffer>: ArrowArrayProtocol
261-
where
262-
ValueBuffer: FixedWidthBufferProtocol<Int32>
263-
{
264+
public struct ArrowArrayDate32: ArrowArrayProtocol {
264265
public typealias ItemType = Date
265266
public var bufferSizes: [Int] { array.bufferSizes }
266267
public var buffers: [ArrowBufferProtocol] { array.buffers }
267268
public var nullCount: Int { array.nullCount }
268269
public var offset: Int { array.offset }
269270
public var length: Int { array.length }
270-
let array: ArrowArrayFixed<ValueBuffer>
271+
let array: ArrowArrayNumeric<Date32>
271272

272273
public subscript(index: Int) -> Date? {
273274
precondition(index >= 0 && index < length, "Invalid index.")
@@ -287,17 +288,14 @@ where
287288
}
288289

289290
/// An Arrow array of `Date`s with a resolution of 1 second.
290-
public struct ArrowArrayDate64<ValueBuffer>: ArrowArrayProtocol
291-
where
292-
ValueBuffer: FixedWidthBufferProtocol<Date64>
293-
{
291+
public struct ArrowArrayDate64: ArrowArrayProtocol {
294292
public typealias ItemType = Date
295293
public var bufferSizes: [Int] { array.bufferSizes }
296294
public var buffers: [ArrowBufferProtocol] { array.buffers }
297295
public var nullCount: Int { array.nullCount }
298296
public var offset: Int { array.offset }
299297
public var length: Int { array.length }
300-
let array: ArrowArrayFixed<ValueBuffer>
298+
let array: ArrowArrayNumeric<Date64>
301299

302300
public subscript(index: Int) -> Date? {
303301
precondition(index >= 0 && index < length, "Invalid index.")
@@ -316,13 +314,12 @@ where
316314
}
317315
}
318316

319-
/// A strongly-typed Arrow list array which may be nested arbitrarily.
320-
public struct ArrowListArray<Element, OffsetsBuffer>: ArrowArrayProtocol
317+
///// An Arrow list array which may be nested arbitrarily.
318+
public struct ArrowListArray<OffsetsBuffer>: ArrowArrayProtocol
321319
where
322-
OffsetsBuffer: FixedWidthBufferProtocol<Int32>,
323-
Element: AnyArrowArrayProtocol
320+
OffsetsBuffer: FixedWidthBufferProtocol,
321+
OffsetsBuffer.ElementType: FixedWidthInteger & SignedInteger
324322
{
325-
public typealias ItemType = Element
326323
public let offset: Int
327324
public let length: Int
328325
public var bufferSizes: [Int] {
@@ -332,16 +329,17 @@ where
332329
[nullBuffer, offsetsBuffer]
333330
}
334331
public var nullCount: Int { nullBuffer.nullCount }
332+
335333
let nullBuffer: NullBuffer
336334
let offsetsBuffer: OffsetsBuffer
337-
let values: Element
335+
public let values: AnyArrowArrayProtocol
338336

339337
public init(
340338
offset: Int = 0,
341339
length: Int,
342340
nullBuffer: NullBuffer,
343341
offsetsBuffer: OffsetsBuffer,
344-
values: Element
342+
values: AnyArrowArrayProtocol
345343
) {
346344
self.offset = offset
347345
self.length = length
@@ -350,7 +348,7 @@ where
350348
self.values = values
351349
}
352350

353-
public subscript(index: Int) -> Element? {
351+
public subscript(index: Int) -> AnyArrowArrayProtocol? {
354352
precondition(index >= 0 && index < length, "Invalid index.")
355353
let offsetIndex = self.offset + index
356354
if !self.nullBuffer.isSet(offsetIndex) {
@@ -373,45 +371,59 @@ where
373371
}
374372
}
375373

376-
/// A type-erased wrapper for an Arrow list array.
377-
public struct AnyArrowListArray: ArrowArrayProtocol {
374+
/// An Arrow list array with fixed size elements.
375+
public struct ArrowFixedSizeListArray: ArrowArrayProtocol {
376+
public let offset: Int
377+
public let length: Int
378+
public let listSize: Int
378379

379-
public typealias ItemType = AnyArrowArrayProtocol
380380
public var bufferSizes: [Int] {
381-
_base.bufferSizes
381+
[nullBuffer.length]
382382
}
383+
383384
public var buffers: [ArrowBufferProtocol] {
384-
_base.buffers
385+
[nullBuffer]
385386
}
386387

387-
private let _base: any ArrowArrayProtocol
388-
private let _subscriptImpl: (Int) -> AnyArrowArrayProtocol?
389-
private let _sliceImpl: (Int, Int) -> AnyArrowListArray
388+
public var nullCount: Int { nullBuffer.nullCount }
389+
390+
let nullBuffer: NullBuffer
391+
public let values: AnyArrowArrayProtocol
390392

391-
public let offset: Int
392-
public let length: Int
393-
public var nullCount: Int { _base.nullCount }
394-
395-
public init<Element, OffsetsBuffer>(
396-
_ list: ArrowListArray<Element, OffsetsBuffer>
397-
)
398-
where
399-
OffsetsBuffer: FixedWidthBufferProtocol<Int32>,
400-
Element: AnyArrowArrayProtocol
401-
{
402-
self._base = list
403-
self.offset = list.offset
404-
self.length = list.length
405-
self._subscriptImpl = { list[$0] }
406-
self._sliceImpl = { AnyArrowListArray(list.slice(offset: $0, length: $1)) }
393+
public init(
394+
offset: Int = 0,
395+
length: Int,
396+
listSize: Int,
397+
nullBuffer: NullBuffer,
398+
values: AnyArrowArrayProtocol
399+
) {
400+
self.offset = offset
401+
self.length = length
402+
self.listSize = listSize
403+
self.nullBuffer = nullBuffer
404+
self.values = values
407405
}
408406

409407
public subscript(index: Int) -> AnyArrowArrayProtocol? {
410-
_subscriptImpl(index)
408+
precondition(index >= 0 && index < length, "Invalid index.")
409+
let offsetIndex = self.offset + index
410+
411+
if !self.nullBuffer.isSet(offsetIndex) {
412+
return nil
413+
}
414+
415+
let startIndex = offsetIndex * listSize
416+
return values.slice(offset: startIndex, length: listSize)
411417
}
412418

413-
public func slice(offset: Int, length: Int) -> AnyArrowListArray {
414-
_sliceImpl(offset, length)
419+
public func slice(offset: Int, length: Int) -> Self {
420+
.init(
421+
offset: self.offset + offset,
422+
length: length,
423+
listSize: listSize,
424+
nullBuffer: nullBuffer,
425+
values: values
426+
)
415427
}
416428
}
417429

0 commit comments

Comments
 (0)