Skip to content

Commit 0f11175

Browse files
author
Marco
committed
Replacing ListArray and StructArray with NestedArray
1 parent c4f7ef0 commit 0f11175

File tree

6 files changed

+152
-154
lines changed

6 files changed

+152
-154
lines changed

Arrow/Sources/Arrow/ArrowArray.swift

Lines changed: 94 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,9 @@ public class ArrowArrayHolderImpl: ArrowArrayHolder {
114114
case .binary:
115115
return try ArrowArrayHolderImpl(BinaryArray(with))
116116
case .strct:
117-
return try ArrowArrayHolderImpl(StructArray(with))
117+
return try ArrowArrayHolderImpl(NestedArray(with))
118118
case .list:
119-
return try ArrowArrayHolderImpl(ListArray(with))
119+
return try ArrowArrayHolderImpl(NestedArray(with))
120120
default:
121121
throw ArrowError.invalid("Array not found for type: \(arrowType)")
122122
}
@@ -357,119 +357,121 @@ public class BinaryArray: ArrowArray<Data> {
357357
}
358358
}
359359

360-
public class StructArray: ArrowArray<[Any?]> {
361-
public private(set) var arrowFields: [ArrowArrayHolder]?
360+
public class NestedArray: ArrowArray<[Any?]> {
361+
private var children: [ArrowArrayHolder]?
362+
362363
public required init(_ arrowData: ArrowData) throws {
363364
try super.init(arrowData)
364-
var fields = [ArrowArrayHolder]()
365-
for child in arrowData.children {
366-
fields.append(try ArrowArrayHolderImpl.loadArray(child.type, with: child))
365+
366+
if let listType = arrowData.type as? ArrowTypeList {
367+
guard arrowData.children.count == 1 else {
368+
throw ArrowError.invalid("List array must have exactly one child")
369+
}
370+
371+
self.children = [try ArrowArrayHolderImpl.loadArray(
372+
listType.elementType,
373+
with: arrowData.children[0]
374+
)]
375+
} else {
376+
var fields = [ArrowArrayHolder]()
377+
for child in arrowData.children {
378+
fields.append(try ArrowArrayHolderImpl.loadArray(child.type, with: child))
379+
}
380+
self.children = fields
367381
}
368-
369-
self.arrowFields = fields
370382
}
371-
383+
372384
public override subscript(_ index: UInt) -> [Any?]? {
373385
if self.arrowData.isNull(index) {
374386
return nil
375387
}
376-
377-
if let fields = arrowFields {
388+
389+
guard let children = self.children else {
390+
return nil
391+
}
392+
393+
if arrowData.type is ArrowTypeList {
394+
guard let values = children.first else { return nil }
395+
396+
let offsets = self.arrowData.buffers[1]
397+
let offsetIndex = Int(index) * MemoryLayout<Int32>.stride
398+
399+
let startOffset = offsets.rawPointer.advanced(by: offsetIndex).load(as: Int32.self)
400+
let endOffset = offsets.rawPointer.advanced(by: offsetIndex + MemoryLayout<Int32>.stride).load(as: Int32.self)
401+
402+
var items = [Any?]()
403+
for i in startOffset..<endOffset {
404+
items.append(values.array.asAny(UInt(i)))
405+
}
406+
407+
return items
408+
} else {
378409
var result = [Any?]()
379-
for field in fields {
410+
for field in children {
380411
result.append(field.array.asAny(index))
381412
}
382-
383413
return result
384414
}
385-
386-
return nil
387415
}
388-
416+
389417
public override func asString(_ index: UInt) -> String {
418+
let isListType = arrowData.type is ArrowTypeList
419+
390420
if self.arrowData.isNull(index) {
391-
return ""
421+
return isListType ? "null" : ""
392422
}
393-
394-
var output = "{"
395-
if let fields = arrowFields {
396-
for fieldIndex in 0..<fields.count {
397-
let asStr = fields[fieldIndex].array as? AsString
398-
if fieldIndex == 0 {
399-
output.append("\(asStr!.asString(index))")
423+
424+
if isListType {
425+
guard let list = self[index] else {
426+
return "null"
427+
}
428+
429+
var output = "["
430+
for (i, item) in list.enumerated() {
431+
if i > 0 {
432+
output.append(",")
433+
}
434+
435+
if item == nil {
436+
output.append("null")
437+
} else if let asStringItem = item as? AsString {
438+
output.append(asStringItem.asString(0))
400439
} else {
401-
output.append(",\(asStr!.asString(index))")
440+
output.append("\(item!)")
402441
}
403442
}
443+
output.append("]")
444+
return output
445+
} else {
446+
var output = "{"
447+
if let children = self.children {
448+
for fieldIndex in 0..<children.count {
449+
let asStr = children[fieldIndex].array as? AsString
450+
if fieldIndex == 0 {
451+
output.append("\(asStr!.asString(index))")
452+
} else {
453+
output.append(",\(asStr!.asString(index))")
454+
}
455+
}
456+
}
457+
output += "}"
458+
return output
404459
}
405-
406-
output += "}"
407-
return output
408460
}
409-
}
410-
411-
public class ListArray: ArrowArray<[Any?]> {
412-
public private(set) var values: ArrowArrayHolder?
413-
414-
public required init(_ arrowData: ArrowData) throws {
415-
try super.init(arrowData)
416-
guard arrowData.children.count == 1 else {
417-
throw ArrowError.invalid("List array must have exactly one child")
418-
}
419-
420-
guard let listType = arrowData.type as? ArrowTypeList else {
421-
throw ArrowError.invalid("Expected ArrowTypeList")
422-
}
423-
424-
self.values = try ArrowArrayHolderImpl.loadArray(
425-
listType.elementType,
426-
with: arrowData.children[0]
427-
)
461+
462+
public var isListArray: Bool {
463+
return arrowData.type is ArrowTypeList
428464
}
429-
430-
public override subscript(_ index: UInt) -> [Any?]? {
431-
guard let values = self.values else { return nil }
432-
433-
if self.arrowData.isNull(index) {
434-
return nil
435-
}
436-
437-
let offsets = self.arrowData.buffers[1]
438-
let offsetIndex = Int(index) * MemoryLayout<Int32>.stride
439-
440-
let startOffset = offsets.rawPointer.advanced(by: offsetIndex).load(as: Int32.self)
441-
let endOffset = offsets.rawPointer.advanced(by: offsetIndex + MemoryLayout<Int32>.stride).load(as: Int32.self)
442-
443-
var items = [Any?]()
444-
for i in startOffset..<endOffset {
445-
items.append(values.array.asAny(UInt(i)))
446-
}
447-
448-
return items
465+
466+
public var isStructArray: Bool {
467+
return arrowData.type is ArrowTypeStruct
449468
}
450-
451-
public override func asString(_ index: UInt) -> String {
452-
guard let list = self[index] else {
453-
return "null"
454-
}
455-
456-
var output = "["
457-
458-
for (i, item) in list.enumerated() {
459-
if i > 0 {
460-
output.append(",")
461-
}
462-
463-
if item == nil {
464-
output.append("null")
465-
} else if let asStringItem = item as? AsString {
466-
output.append(asStringItem.asString(0))
467-
} else {
468-
output.append("\(item!)")
469-
}
470-
}
471-
472-
output.append("]")
473-
return output
469+
470+
public var fields: [ArrowArrayHolder]? {
471+
return isStructArray ? children : nil
472+
}
473+
474+
public var values: ArrowArrayHolder? {
475+
return isListArray ? children?.first : nil
474476
}
475477
}

Arrow/Sources/Arrow/ArrowArrayBuilder.swift

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ public class TimestampArrayBuilder: ArrowArrayBuilder<FixedBufferBuilder<Int64>,
125125
}
126126
}
127127

128-
public class StructArrayBuilder: ArrowArrayBuilder<StructBufferBuilder, StructArray> {
128+
public class StructArrayBuilder: ArrowArrayBuilder<StructBufferBuilder, NestedArray> {
129129
let builders: [any ArrowArrayHolderBuilder]
130130
let fields: [ArrowField]
131131
public init(_ fields: [ArrowField], builders: [any ArrowArrayHolderBuilder]) throws {
@@ -159,7 +159,7 @@ public class StructArrayBuilder: ArrowArrayBuilder<StructBufferBuilder, StructAr
159159
}
160160
}
161161

162-
public override func finish() throws -> StructArray {
162+
public override func finish() throws -> NestedArray {
163163
let buffers = self.bufferBuilder.finish()
164164
var childData = [ArrowData]()
165165
for builder in self.builders {
@@ -169,12 +169,12 @@ public class StructArrayBuilder: ArrowArrayBuilder<StructBufferBuilder, StructAr
169169
let arrowData = try ArrowData(self.type, buffers: buffers,
170170
children: childData, nullCount: self.nullCount,
171171
length: self.length)
172-
let structArray = try StructArray(arrowData)
172+
let structArray = try NestedArray(arrowData)
173173
return structArray
174174
}
175175
}
176176

177-
public class ListArrayBuilder: ArrowArrayBuilder<ListBufferBuilder, ListArray> {
177+
public class ListArrayBuilder: ArrowArrayBuilder<ListBufferBuilder, NestedArray> {
178178
let valueBuilder: any ArrowArrayHolderBuilder
179179

180180
public override init(_ elementType: ArrowType) throws {
@@ -191,11 +191,11 @@ public class ListArrayBuilder: ArrowArrayBuilder<ListBufferBuilder, ListArray> {
191191
}
192192
}
193193

194-
public override func finish() throws -> ListArray {
194+
public override func finish() throws -> NestedArray {
195195
let buffers = self.bufferBuilder.finish()
196196
let childData = try valueBuilder.toHolder().array.arrowData
197197
let arrowData = try ArrowData(self.type, buffers: buffers, children: [childData], nullCount: self.nullCount, length: self.length)
198-
return try ListArray(arrowData)
198+
return try NestedArray(arrowData)
199199
}
200200
}
201201

Arrow/Sources/Arrow/ArrowReaderHelper.swift

Lines changed: 11 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -135,35 +135,22 @@ private func makeFixedHolder<T>(
135135
}
136136
}
137137

138-
func makeStructHolder(
138+
func makeNestedHolder(
139139
_ field: ArrowField,
140140
buffers: [ArrowBuffer],
141141
nullCount: UInt,
142142
children: [ArrowData],
143143
rbLength: UInt
144144
) -> Result<ArrowArrayHolder, ArrowError> {
145145
do {
146-
let arrowData = try ArrowData(field.type,
147-
buffers: buffers, children: children,
148-
nullCount: nullCount, length: rbLength)
149-
return .success(ArrowArrayHolderImpl(try StructArray(arrowData)))
150-
} catch let error as ArrowError {
151-
return .failure(error)
152-
} catch {
153-
return .failure(.unknownError("\(error)"))
154-
}
155-
}
156-
157-
func makeListHolder(
158-
_ field: ArrowField,
159-
buffers: [ArrowBuffer],
160-
nullCount: UInt,
161-
children: [ArrowData],
162-
rbLength: UInt
163-
) -> Result<ArrowArrayHolder, ArrowError> {
164-
do {
165-
let arrowData = try ArrowData(field.type, buffers: buffers, children: children, nullCount: nullCount, length: rbLength)
166-
return .success(ArrowArrayHolderImpl(try ListArray(arrowData)))
146+
let arrowData = try ArrowData(
147+
field.type,
148+
buffers: buffers,
149+
children: children,
150+
nullCount: nullCount,
151+
length: rbLength
152+
)
153+
return .success(ArrowArrayHolderImpl(try NestedArray(arrowData)))
167154
} catch let error as ArrowError {
168155
return .failure(error)
169156
} catch {
@@ -224,9 +211,9 @@ func makeArrayHolder( // swiftlint:disable:this cyclomatic_complexity
224211
case .timestamp:
225212
return makeTimestampHolder(field, buffers: buffers, nullCount: nullCount)
226213
case .strct:
227-
return makeStructHolder(field, buffers: buffers, nullCount: nullCount, children: children!, rbLength: rbLength)
214+
return makeNestedHolder(field, buffers: buffers, nullCount: nullCount, children: children!, rbLength: rbLength)
228215
case .list:
229-
return makeListHolder(field, buffers: buffers, nullCount: nullCount, children: children!, rbLength: rbLength)
216+
return makeNestedHolder(field, buffers: buffers, nullCount: nullCount, children: children!, rbLength: rbLength)
230217
default:
231218
return .failure(.unknownType("Type \(typeId) currently not supported"))
232219
}

Arrow/Sources/Arrow/ArrowWriter.swift

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -170,8 +170,10 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
170170
nullCount: Int64(column.nullCount))
171171
offsets.append(fbb.create(struct: fieldNode))
172172
if let nestedType = column.type as? ArrowTypeStruct {
173-
let structArray = column.array as? StructArray
174-
writeFieldNodes(nestedType.fields, columns: structArray!.arrowFields!, offsets: &offsets, fbb: &fbb)
173+
let nestedArray = column.array as? NestedArray
174+
if let nestedFields = nestedArray?.fields {
175+
writeFieldNodes(nestedType.fields, columns: nestedFields, offsets: &offsets, fbb: &fbb)
176+
}
175177
}
176178
}
177179
}
@@ -190,9 +192,11 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
190192
buffers.append(buffer)
191193
bufferOffset += bufferDataSize
192194
if let nestedType = column.type as? ArrowTypeStruct {
193-
let structArray = column.array as? StructArray
194-
writeBufferInfo(nestedType.fields, columns: structArray!.arrowFields!,
195-
bufferOffset: &bufferOffset, buffers: &buffers, fbb: &fbb)
195+
let nestedArray = column.array as? NestedArray
196+
if let nestedFields = nestedArray?.fields {
197+
writeBufferInfo(nestedType.fields, columns: nestedFields,
198+
bufferOffset: &bufferOffset, buffers: &buffers, fbb: &fbb)
199+
}
196200
}
197201
}
198202
}
@@ -236,22 +240,24 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
236240
return .success((fbb.data, Offset(offset: UInt32(fbb.data.count))))
237241
}
238242

243+
239244
private func writeRecordBatchData(
240245
_ writer: inout DataWriter, fields: [ArrowField],
241-
columns: [ArrowArrayHolder])
242-
-> Result<Bool, ArrowError> {
246+
columns: [ArrowArrayHolder]
247+
) -> Result<Bool, ArrowError> {
243248
for index in 0 ..< fields.count {
244249
let column = columns[index]
245250
let colBufferData = column.getBufferData()
246251
for var bufferData in colBufferData {
247252
addPadForAlignment(&bufferData)
248253
writer.append(bufferData)
249254
if let nestedType = column.type as? ArrowTypeStruct {
250-
guard let structArray = column.array as? StructArray else {
255+
guard let nestedArray = column.array as? NestedArray,
256+
let nestedFields = nestedArray.fields else {
251257
return .failure(.invalid("Struct type array expected for nested type"))
252258
}
253259

254-
switch writeRecordBatchData(&writer, fields: nestedType.fields, columns: structArray.arrowFields!) {
260+
switch writeRecordBatchData(&writer, fields: nestedType.fields, columns: nestedFields) {
255261
case .success:
256262
continue
257263
case .failure(let error):

Arrow/Tests/ArrowTests/ArrayTests.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -320,8 +320,8 @@ final class ArrayTests: XCTestCase { // swiftlint:disable:this type_body_length
320320
let structArray = try structBuilder.finish()
321321
XCTAssertEqual(structArray.length, 3)
322322
XCTAssertNil(structArray[1])
323-
XCTAssertEqual(structArray.arrowFields![0].length, 3)
324-
XCTAssertNil(structArray.arrowFields![0].array.asAny(1))
323+
XCTAssertEqual(structArray.fields![0].length, 3)
324+
XCTAssertNil(structArray.fields![0].array.asAny(1))
325325
XCTAssertEqual(structArray[0]![STIndex.bool.rawValue] as? Bool, true)
326326
XCTAssertEqual(structArray[0]![STIndex.int8.rawValue] as? Int8, 1)
327327
XCTAssertEqual(structArray[0]![STIndex.int16.rawValue] as? Int16, 2)

0 commit comments

Comments
 (0)