Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 116 additions & 25 deletions Arrow/Sources/Arrow/ArrowArray.swift
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,9 @@ public class ArrowArrayHolderImpl: ArrowArrayHolder {
case .binary:
return try ArrowArrayHolderImpl(BinaryArray(with))
case .strct:
return try ArrowArrayHolderImpl(StructArray(with))
return try ArrowArrayHolderImpl(NestedArray(with))
case .list:
return try ArrowArrayHolderImpl(NestedArray(with))
default:
throw ArrowError.invalid("Array not found for type: \(arrowType)")
}
Expand Down Expand Up @@ -355,53 +357,142 @@ public class BinaryArray: ArrowArray<Data> {
}
}

public class StructArray: ArrowArray<[Any?]> {
public private(set) var arrowFields: [ArrowArrayHolder]?
public class NestedArray: ArrowArray<[Any?]> {
private var children: [ArrowArrayHolder]?

public required init(_ arrowData: ArrowData) throws {
try super.init(arrowData)
var fields = [ArrowArrayHolder]()
for child in arrowData.children {
fields.append(try ArrowArrayHolderImpl.loadArray(child.type, with: child))
}

self.arrowFields = fields
switch arrowData.type.id {
case .list:
guard arrowData.children.count == 1 else {
throw ArrowError.invalid("List array must have exactly one child")
}

guard let listType = arrowData.type as? ArrowTypeList else {
throw ArrowError.invalid("Expected ArrowTypeList for list type ID")
}

self.children = [try ArrowArrayHolderImpl.loadArray(
listType.elementType,
with: arrowData.children[0]
)]

case .strct:
var fields = [ArrowArrayHolder]()
for child in arrowData.children {
fields.append(try ArrowArrayHolderImpl.loadArray(child.type, with: child))
}
self.children = fields

default:
throw ArrowError.invalid("NestedArray only supports list and struct types, got: \(arrowData.type.id)")
}
}

public override subscript(_ index: UInt) -> [Any?]? {
if self.arrowData.isNull(index) {
return nil
}

if let fields = arrowFields {
guard let children = self.children else {
return nil
}

switch arrowData.type.id {
case .list:
guard let values = children.first else { return nil }

let offsets = self.arrowData.buffers[1]
let offsetIndex = Int(index) * MemoryLayout<Int32>.stride

let startOffset = offsets.rawPointer.advanced(by: offsetIndex).load(as: Int32.self)
let endOffset = offsets.rawPointer.advanced(by: offsetIndex + MemoryLayout<Int32>.stride).load(as: Int32.self)

var items = [Any?]()
for i in startOffset..<endOffset {
items.append(values.array.asAny(UInt(i)))
}

return items

case .strct:
var result = [Any?]()
for field in fields {
for field in children {
result.append(field.array.asAny(index))
}

return result
}

return nil
default:
return nil
}
}

public override func asString(_ index: UInt) -> String {
if self.arrowData.isNull(index) {
return ""
}
switch arrowData.type.id {
case .list:
if self.arrowData.isNull(index) {
return "null"
}

guard let list = self[index] else {
return "null"
}

var output = "{"
if let fields = arrowFields {
for fieldIndex in 0..<fields.count {
let asStr = fields[fieldIndex].array as? AsString
if fieldIndex == 0 {
output.append("\(asStr!.asString(index))")
var output = "["
for (i, item) in list.enumerated() {
if i > 0 {
output.append(",")
}

if item == nil {
output.append("null")
} else if let asStringItem = item as? AsString {
output.append(asStringItem.asString(0))
} else {
output.append(",\(asStr!.asString(index))")
output.append("\(item!)")
}
}
output.append("]")
return output

case .strct:
if self.arrowData.isNull(index) {
return ""
}

var output = "{"
if let children = self.children {
for fieldIndex in 0..<children.count {
let asStr = children[fieldIndex].array as? AsString
if fieldIndex == 0 {
output.append("\(asStr!.asString(index))")
} else {
output.append(",\(asStr!.asString(index))")
}
}
}
output += "}"
return output

default:
return ""
}
}

public var isListArray: Bool {
return arrowData.type.id == .list
}

public var isStructArray: Bool {
return arrowData.type.id == .strct
}

public var fields: [ArrowArrayHolder]? {
return arrowData.type.id == .strct ? children : nil
}

output += "}"
return output
public var values: ArrowArrayHolder? {
return arrowData.type.id == .list ? children?.first : nil
}
}
53 changes: 48 additions & 5 deletions Arrow/Sources/Arrow/ArrowArrayBuilder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,13 @@ public class TimestampArrayBuilder: ArrowArrayBuilder<FixedBufferBuilder<Int64>,
}
}

public class StructArrayBuilder: ArrowArrayBuilder<StructBufferBuilder, StructArray> {
public class StructArrayBuilder: ArrowArrayBuilder<StructBufferBuilder, NestedArray> {
let builders: [any ArrowArrayHolderBuilder]
let fields: [ArrowField]
public init(_ fields: [ArrowField], builders: [any ArrowArrayHolderBuilder]) throws {
self.fields = fields
self.builders = builders
try super.init(ArrowNestedType(ArrowType.ArrowStruct, fields: fields))
try super.init(ArrowTypeStruct(ArrowType.ArrowStruct, fields: fields))
self.bufferBuilder.initializeTypeInfo(fields)
}

Expand All @@ -143,7 +143,7 @@ public class StructArrayBuilder: ArrowArrayBuilder<StructBufferBuilder, StructAr
}

self.builders = builders
try super.init(ArrowNestedType(ArrowType.ArrowStruct, fields: fields))
try super.init(ArrowTypeStruct(ArrowType.ArrowStruct, fields: fields))
}

public override func append(_ values: [Any?]?) {
Expand All @@ -159,7 +159,7 @@ public class StructArrayBuilder: ArrowArrayBuilder<StructBufferBuilder, StructAr
}
}

public override func finish() throws -> StructArray {
public override func finish() throws -> NestedArray {
let buffers = self.bufferBuilder.finish()
var childData = [ArrowData]()
for builder in self.builders {
Expand All @@ -169,11 +169,36 @@ public class StructArrayBuilder: ArrowArrayBuilder<StructBufferBuilder, StructAr
let arrowData = try ArrowData(self.type, buffers: buffers,
children: childData, nullCount: self.nullCount,
length: self.length)
let structArray = try StructArray(arrowData)
let structArray = try NestedArray(arrowData)
return structArray
}
}

public class ListArrayBuilder: ArrowArrayBuilder<ListBufferBuilder, NestedArray> {
let valueBuilder: any ArrowArrayHolderBuilder

public override init(_ elementType: ArrowType) throws {
self.valueBuilder = try ArrowArrayBuilders.loadBuilder(arrowType: elementType)
try super.init(ArrowTypeList(elementType))
}

public override func append(_ values: [Any?]?) {
self.bufferBuilder.append(values)
if let vals = values {
for val in vals {
self.valueBuilder.appendAny(val)
}
}
}

public override func finish() throws -> NestedArray {
let buffers = self.bufferBuilder.finish()
let childData = try valueBuilder.toHolder().array.arrowData
let arrowData = try ArrowData(self.type, buffers: buffers, children: [childData], nullCount: self.nullCount, length: self.length)
return try NestedArray(arrowData)
}
}

public class ArrowArrayBuilders {
public static func loadBuilder( // swiftlint:disable:this cyclomatic_complexity
_ builderType: Any.Type) throws -> ArrowArrayHolderBuilder {
Expand Down Expand Up @@ -290,6 +315,16 @@ public class ArrowArrayBuilders {
throw ArrowError.invalid("Expected arrow type for \(arrowType.id) not found")
}
return try TimestampArrayBuilder(timestampType.unit)
case .strct:
guard let structType = arrowType as? ArrowTypeStruct else {
throw ArrowError.invalid("Expected ArrowStructType for \(arrowType.id)")
}
return try StructArrayBuilder(structType.fields)
case .list:
guard let listType = arrowType as? ArrowTypeList else {
throw ArrowError.invalid("Expected ArrowTypeList for \(arrowType.id)")
}
return try ListArrayBuilder(listType.elementType)
default:
throw ArrowError.unknownType("Builder not found for arrow type: \(arrowType.id)")
}
Expand Down Expand Up @@ -353,4 +388,12 @@ public class ArrowArrayBuilders {
public static func loadTimestampArrayBuilder(_ unit: ArrowTimestampUnit, timezone: String? = nil) throws -> TimestampArrayBuilder {
return try TimestampArrayBuilder(unit, timezone: timezone)
}

public static func loadStructArrayBuilder(_ fields: [ArrowField]) throws -> StructArrayBuilder {
return try StructArrayBuilder(fields)
}

public static func loadListArrayBuilder(_ elementType: ArrowType) throws -> ListArrayBuilder {
return try ListArrayBuilder(elementType)
}
}
81 changes: 78 additions & 3 deletions Arrow/Sources/Arrow/ArrowBufferBuilder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -338,20 +338,20 @@ public class Date64BufferBuilder: AbstractWrapperBufferBuilder<Date, Int64> {

public final class StructBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder {
public typealias ItemType = [Any?]
var info: ArrowNestedType?
var info: ArrowTypeStruct?
public init() throws {
let nulls = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout<UInt8>.stride))
super.init(nulls)
}

public func initializeTypeInfo(_ fields: [ArrowField]) {
info = ArrowNestedType(ArrowType.ArrowStruct, fields: fields)
info = ArrowTypeStruct(ArrowType.ArrowStruct, fields: fields)
}

public func append(_ newValue: [Any?]?) {
let index = UInt(self.length)
self.length += 1
if length > self.nulls.length {
if self.length > self.nulls.length {
self.resize(length)
}

Expand Down Expand Up @@ -379,3 +379,78 @@ public final class StructBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder {
return [nulls]
}
}

public class ListBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder {
public typealias ItemType = [Any?]
var offsets: ArrowBuffer

public required init() throws {
self.offsets = ArrowBuffer.createBuffer(1, size: UInt(MemoryLayout<Int32>.stride))
let nulls = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout<UInt8>.stride))
super.init(nulls)
self.offsets.rawPointer.storeBytes(of: Int32(0), as: Int32.self)
}

public func append(_ count: Int) {
let index = UInt(self.length)
self.length += 1

if length >= self.offsets.length {
self.resize(length + 1)
}

let offsetIndex = Int(index) * MemoryLayout<Int32>.stride
let currentOffset = self.offsets.rawPointer.advanced(by: offsetIndex).load(as: Int32.self)

BitUtility.setBit(index + self.offset, buffer: self.nulls)
let newOffset = currentOffset + Int32(count)
self.offsets.rawPointer.advanced(by: offsetIndex + MemoryLayout<Int32>.stride).storeBytes(of: newOffset, as: Int32.self)
}

public func append(_ newValue: [Any?]?) {
let index = UInt(self.length)
self.length += 1

if self.length >= self.offsets.length {
self.resize(self.length + 1)
}

let offsetIndex = Int(index) * MemoryLayout<Int32>.stride
let currentOffset = self.offsets.rawPointer.advanced(by: offsetIndex).load(as: Int32.self)

if let vals = newValue {
BitUtility.setBit(index + self.offset, buffer: self.nulls)
let newOffset = currentOffset + Int32(vals.count)
self.offsets.rawPointer.advanced(by: offsetIndex + MemoryLayout<Int32>.stride).storeBytes(of: newOffset, as: Int32.self)
} else {
self.nullCount += 1
BitUtility.clearBit(index + self.offset, buffer: self.nulls)
self.offsets.rawPointer.advanced(by: offsetIndex + MemoryLayout<Int32>.stride).storeBytes(of: currentOffset, as: Int32.self)
}
}

public override func isNull(_ index: UInt) -> Bool {
return !BitUtility.isSet(index + self.offset, buffer: self.nulls)
}

public func resize(_ length: UInt) {
if length > self.offsets.length {
let resizeLength = resizeLength(self.offsets)
var offsets = ArrowBuffer.createBuffer(resizeLength, size: UInt(MemoryLayout<Int32>.size))
var nulls = ArrowBuffer.createBuffer(resizeLength/8 + 1, size: UInt(MemoryLayout<UInt8>.size))
ArrowBuffer.copyCurrent(self.offsets, to: &offsets, len: self.offsets.capacity)
ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: self.nulls.capacity)
self.offsets = offsets
self.nulls = nulls
}
}

public func finish() -> [ArrowBuffer] {
let length = self.length
var nulls = ArrowBuffer.createBuffer(length/8 + 1, size: UInt(MemoryLayout<UInt8>.size))
var offsets = ArrowBuffer.createBuffer(length + 1, size: UInt(MemoryLayout<Int32>.size))
ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: nulls.capacity)
ArrowBuffer.copyCurrent(self.offsets, to: &offsets, len: offsets.capacity)
return [nulls, offsets]
}
}
Loading