From 77dda93a89da93571c4752a739c0493e957bb876 Mon Sep 17 00:00:00 2001 From: Will Temperley Date: Fri, 31 Oct 2025 14:43:41 +0800 Subject: [PATCH 1/6] AsString merged into AnyArrowArray --- Sources/Arrow/ArrowArray.swift | 7 +++-- Sources/Arrow/ChunkedArray.swift | 6 +--- Tests/ArrowTests/IPCTests.swift | 50 +++++++++++++++----------------- 3 files changed, 29 insertions(+), 34 deletions(-) diff --git a/Sources/Arrow/ArrowArray.swift b/Sources/Arrow/ArrowArray.swift index f9de931..559c124 100644 --- a/Sources/Arrow/ArrowArray.swift +++ b/Sources/Arrow/ArrowArray.swift @@ -24,12 +24,13 @@ public protocol AnyArrowArray { var bufferData: [Data] { get } var bufferDataSizes: [Int] { get } func asAny(_ index: UInt) -> Any? + func asString(_ index: UInt) -> String } // MARK: - Core Protocol /// The interface for Arrow array types. -public protocol ArrowArray: AsString, AnyArrowArray { +public protocol ArrowArray: AnyArrowArray { associatedtype ItemType var arrowData: ArrowData { get } init(_ arrowData: ArrowData) throws(ArrowError) @@ -451,7 +452,7 @@ public struct NestedArray: ArrowArray, AnyArrowArray { switch item { case nil: output.append("null") - case let asStringItem as AsString: + case let asStringItem as AnyArrowArray: output.append(asStringItem.asString(0)) case let someItem?: output.append("\(someItem)") @@ -466,7 +467,7 @@ public struct NestedArray: ArrowArray, AnyArrowArray { var output = "{" if let children = self.children { let parts = children.compactMap { child in - (child as? AsString)?.asString(index) + child.asString(index) } output.append(parts.joined(separator: ",")) } diff --git a/Sources/Arrow/ChunkedArray.swift b/Sources/Arrow/ChunkedArray.swift index bc8fde0..182caf0 100644 --- a/Sources/Arrow/ChunkedArray.swift +++ b/Sources/Arrow/ChunkedArray.swift @@ -14,10 +14,6 @@ import Foundation -public protocol AsString { - func asString(_ index: UInt) -> String -} - public class ChunkedArrayHolder { public let type: ArrowType public let length: UInt @@ -85,7 +81,7 @@ public class ChunkedArrayHolder { } } -public class ChunkedArray: AsString { +public class ChunkedArray { public let arrays: [any ArrowArray] public let type: ArrowType public let nullCount: UInt diff --git a/Tests/ArrowTests/IPCTests.swift b/Tests/ArrowTests/IPCTests.swift index 6614627..83abf3a 100644 --- a/Tests/ArrowTests/IPCTests.swift +++ b/Tests/ArrowTests/IPCTests.swift @@ -71,10 +71,10 @@ func checkBoolRecordBatch( #expect(recordBatch.schema.fields[1].type == .utf8) for index in 0.. Date: Fri, 31 Oct 2025 17:08:06 +0800 Subject: [PATCH 2/6] Gather FlatBuffers utilities into one place. --- Sources/Arrow/ArrowArray.swift | 7 +- ...rrowArray.swift => ArrowArrayLoader.swift} | 1 - Sources/Arrow/ArrowReader.swift | 7 + Sources/Arrow/ArrowReaderHelper.swift | 189 ++---------------- Sources/Arrow/FlatBuffersHelpers.swift | 165 +++++++++++++++ 5 files changed, 189 insertions(+), 180 deletions(-) rename Sources/Arrow/{AnyArrowArray.swift => ArrowArrayLoader.swift} (97%) create mode 100644 Sources/Arrow/FlatBuffersHelpers.swift diff --git a/Sources/Arrow/ArrowArray.swift b/Sources/Arrow/ArrowArray.swift index 559c124..0405e85 100644 --- a/Sources/Arrow/ArrowArray.swift +++ b/Sources/Arrow/ArrowArray.swift @@ -38,6 +38,7 @@ public protocol ArrowArray: AnyArrowArray { } // MARK: - Default Implementations + extension ArrowArray { public var nullCount: UInt { @@ -189,8 +190,7 @@ public struct Date32Array: ArrowArray { let byteOffset = self.arrowData.stride * Int(index) let milliseconds = self.arrowData.buffers[1].rawPointer.advanced( by: byteOffset - ).load( - as: UInt32.self) + ).load(as: UInt32.self) return Date(timeIntervalSince1970: TimeInterval(milliseconds * 86400)) } } @@ -210,8 +210,7 @@ public struct Date64Array: ArrowArray { let byteOffset = self.arrowData.stride * Int(index) let milliseconds = self.arrowData.buffers[1].rawPointer.advanced( by: byteOffset - ).load( - as: UInt64.self) + ).load(as: UInt64.self) return Date(timeIntervalSince1970: TimeInterval(milliseconds / 1000)) } } diff --git a/Sources/Arrow/AnyArrowArray.swift b/Sources/Arrow/ArrowArrayLoader.swift similarity index 97% rename from Sources/Arrow/AnyArrowArray.swift rename to Sources/Arrow/ArrowArrayLoader.swift index b27c33b..56dbf43 100644 --- a/Sources/Arrow/AnyArrowArray.swift +++ b/Sources/Arrow/ArrowArrayLoader.swift @@ -13,7 +13,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -// FIXME: Temporary to support holder refactor struct ArrowArrayLoader { public static func loadArray( _ arrowType: ArrowType, diff --git a/Sources/Arrow/ArrowReader.swift b/Sources/Arrow/ArrowReader.swift index 8891ec1..006c165 100644 --- a/Sources/Arrow/ArrowReader.swift +++ b/Sources/Arrow/ArrowReader.swift @@ -544,4 +544,11 @@ public struct ArrowReader: Sendable { .unknownError("Unhandled header type: \(message.headerType)")) } } + + func validateFileData(_ data: Data) -> Bool { + let markerLength = fileMarker.count + let startString = data[.. Result { - if field.type == .date32 { - let arrowData = ArrowData( - field.type, - buffers: buffers, - nullCount: nullCount - ) - return .success(Date32Array(arrowData)) - } - let arrowData = ArrowData( - field.type, - buffers: buffers, - nullCount: nullCount - ) - return .success(Date64Array(arrowData)) -} - private func makeBoolHolder( _ buffers: [ArrowBuffer], nullCount: UInt @@ -193,8 +172,20 @@ func makeArrayHolder( return makeStringHolder(buffers, nullCount: nullCount) case .binary: return makeBinaryHolder(buffers, nullCount: nullCount) - case .date32, .date64: - return makeDateHolder(field, buffers: buffers, nullCount: nullCount) + case .date32: + let arrowData = ArrowData( + field.type, + buffers: buffers, + nullCount: nullCount + ) + return .success(Date32Array(arrowData)) + case .date64: + let arrowData = ArrowData( + field.type, + buffers: buffers, + nullCount: nullCount + ) + return .success(Date64Array(arrowData)) case .time32: let arrowData = ArrowData( field.type, buffers: buffers, nullCount: nullCount) @@ -226,159 +217,7 @@ func makeArrayHolder( } } -func makeBuffer( - _ buffer: Buffer, fileData: Data, - length: UInt, messageOffset: Int64 -) -> ArrowBuffer { - let startOffset = messageOffset + buffer.offset - let endOffset = startOffset + buffer.length - let bufferData = [UInt8](fileData[startOffset.. Bool { - switch type { - case .int, .bool, .floatingpoint, .date, .time, .timestamp: - return true - default: - return false - } -} - -func findArrowType(_ field: FlatField) throws(ArrowError) -> ArrowType { - let type = field.typeType - switch type { - case .int: - guard let intType = field.type(type: FlatInt.self) else { - throw .invalid("Could not get integer type from \(field)") - } - let bitWidth = intType.bitWidth - if bitWidth == 8 { - if intType.isSigned { - return .int8 - } else { - return .uint8 - } - } - if bitWidth == 16 { - return intType.isSigned ? .int16 : .uint16 - } - if bitWidth == 32 { - return intType.isSigned ? .int32 : .uint32 - } - if bitWidth == 64 { - return intType.isSigned ? .int64 : .uint64 - } - throw .invalid("Unhandled integer bit width: \(bitWidth)") - case .bool: - return .boolean - case .floatingpoint: - guard let floatType = field.type(type: FloatingPoint.self) else { - throw .invalid("Could not get floating point type from field") - } - switch floatType.precision { - case .half: - return .float16 - case .single: - return .float32 - case .double: - return .float64 - } - case .utf8: - return .utf8 - case .binary: - return .binary - case .date: - guard let dateType = field.type(type: FlatDate.self) else { - throw .invalid("Could not get date type from field") - } - if dateType.unit == .day { - return .date32 - } - return .date64 - case .time: - guard let timeType = field.type(type: FlatTime.self) else { - throw .invalid("Could not get time type from field") - } - if timeType.unit == .second || timeType.unit == .millisecond { - return .time32( - timeType.unit == .second ? .second : .millisecond - ) - } - return .time64( - timeType.unit == .microsecond ? .microsecond : .nanosecond - ) - case .timestamp: - guard let timestampType = field.type(type: FlatTimestamp.self) else { - throw .invalid("Could not get timestamp type from field") - } - let arrowUnit: TimeUnit - switch timestampType.unit { - case .second: - arrowUnit = .second - case .millisecond: - arrowUnit = .millisecond - case .microsecond: - arrowUnit = .microsecond - case .nanosecond: - arrowUnit = .nanosecond - } - let timezone = timestampType.timezone - return .timestamp(arrowUnit, timezone) - case .struct_: - guard field.type(type: FlatStruct.self) != nil else { - throw .invalid("Could not get struct type from field") - } - var fields: [ArrowField] = [] - for index in 0..= recordBatch.buffersCount { - throw ArrowError.outOfBounds(index: Int64(index)) - } -} - -func validateFileData(_ data: Data) -> Bool { - let markerLength = fileMarker.count - let startString = data[.. UInt32 { let token = data.withUnsafeBytes { rawBuffer in diff --git a/Sources/Arrow/FlatBuffersHelpers.swift b/Sources/Arrow/FlatBuffersHelpers.swift new file mode 100644 index 0000000..6fcd3a1 --- /dev/null +++ b/Sources/Arrow/FlatBuffersHelpers.swift @@ -0,0 +1,165 @@ +// Copyright 2025 The Apache Software Foundation +// Copyright 2025 The Columnar-Swift Contributors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +func makeBuffer( + _ buffer: Buffer, + fileData: Data, + length: UInt, + messageOffset: Int64 +) -> ArrowBuffer { + let startOffset = messageOffset + buffer.offset + let endOffset = startOffset + buffer.length + let bufferData = [UInt8](fileData[startOffset.. Bool { + switch type { + case .int, .bool, .floatingpoint, .date, .time, .timestamp: + return true + default: + return false + } +} + +func findArrowType(_ field: FlatField) throws(ArrowError) -> ArrowType { + let type = field.typeType + switch type { + case .int: + guard let intType = field.type(type: FlatInt.self) else { + throw .invalid("Could not get integer type from \(field)") + } + let bitWidth = intType.bitWidth + if bitWidth == 8 { + if intType.isSigned { + return .int8 + } else { + return .uint8 + } + } + if bitWidth == 16 { + return intType.isSigned ? .int16 : .uint16 + } + if bitWidth == 32 { + return intType.isSigned ? .int32 : .uint32 + } + if bitWidth == 64 { + return intType.isSigned ? .int64 : .uint64 + } + throw .invalid("Unhandled integer bit width: \(bitWidth)") + case .bool: + return .boolean + case .floatingpoint: + guard let floatType = field.type(type: FloatingPoint.self) else { + throw .invalid("Could not get floating point type from field") + } + switch floatType.precision { + case .half: + return .float16 + case .single: + return .float32 + case .double: + return .float64 + } + case .utf8: + return .utf8 + case .binary: + return .binary + case .date: + guard let dateType = field.type(type: FlatDate.self) else { + throw .invalid("Could not get date type from field") + } + if dateType.unit == .day { + return .date32 + } + return .date64 + case .time: + guard let timeType = field.type(type: FlatTime.self) else { + throw .invalid("Could not get time type from field") + } + if timeType.unit == .second || timeType.unit == .millisecond { + return .time32( + timeType.unit == .second ? .second : .millisecond + ) + } + return .time64( + timeType.unit == .microsecond ? .microsecond : .nanosecond + ) + case .timestamp: + guard let timestampType = field.type(type: FlatTimestamp.self) else { + throw .invalid("Could not get timestamp type from field") + } + let arrowUnit: TimeUnit + switch timestampType.unit { + case .second: + arrowUnit = .second + case .millisecond: + arrowUnit = .millisecond + case .microsecond: + arrowUnit = .microsecond + case .nanosecond: + arrowUnit = .nanosecond + } + let timezone = timestampType.timezone + return .timestamp(arrowUnit, timezone) + case .struct_: + guard field.type(type: FlatStruct.self) != nil else { + throw .invalid("Could not get struct type from field") + } + var fields: [ArrowField] = [] + for index in 0..= recordBatch.buffersCount { + throw ArrowError.outOfBounds(index: Int64(index)) + } +} From 4c4e54bd394491b9fb1c9a489a52771ceb154471 Mon Sep 17 00:00:00 2001 From: Will Temperley Date: Sat, 1 Nov 2025 14:18:01 +0800 Subject: [PATCH 3/6] Rename ArrowArrayHolderBuilder to reflect change from ArrowArrayHolder to AnyArrowArray --- Sources/Arrow/ArrowArray.swift | 2 - Sources/Arrow/ArrowArrayBuilder.swift | 65 +++++++++++++----------- Sources/Arrow/ArrowArrayLoader.swift | 1 + Sources/Arrow/ArrowEncoder.swift | 15 +++--- Sources/Arrow/ArrowReader.swift | 2 +- Sources/Arrow/ArrowReaderHelper.swift | 2 - Tests/ArrowTests/ArrayBuilderTests.swift | 56 ++++++++++---------- Tests/ArrowTests/ArrayTests.swift | 8 +-- Tests/ArrowTests/CodableTests.swift | 45 ++++++++-------- Tests/ArrowTests/IPCTests.swift | 12 ++--- 10 files changed, 106 insertions(+), 102 deletions(-) diff --git a/Sources/Arrow/ArrowArray.swift b/Sources/Arrow/ArrowArray.swift index 0405e85..9e47915 100644 --- a/Sources/Arrow/ArrowArray.swift +++ b/Sources/Arrow/ArrowArray.swift @@ -38,9 +38,7 @@ public protocol ArrowArray: AnyArrowArray { } // MARK: - Default Implementations - extension ArrowArray { - public var nullCount: UInt { arrowData.nullCount } diff --git a/Sources/Arrow/ArrowArrayBuilder.swift b/Sources/Arrow/ArrowArrayBuilder.swift index dae440f..ad50329 100644 --- a/Sources/Arrow/ArrowArrayBuilder.swift +++ b/Sources/Arrow/ArrowArrayBuilder.swift @@ -15,17 +15,15 @@ import Foundation -// FIXME: Rename or remove -public protocol ArrowArrayHolderBuilder { - func toHolder() throws(ArrowError) -> AnyArrowArray +public protocol AnyArrowArrayBuilder { + func toAnyArrowArray() throws(ArrowError) -> AnyArrowArray func appendAny(_ val: Any?) } public class ArrowArrayBuilder< - T: ArrowBufferBuilder, U: ArrowArray ->: - ArrowArrayHolderBuilder -{ + T: ArrowBufferBuilder, + U: ArrowArray +>: AnyArrowArrayBuilder { let type: ArrowType let bufferBuilder: T public var length: UInt { self.bufferBuilder.length } @@ -73,13 +71,14 @@ public class ArrowArrayBuilder< self.type.getStride() } - public func toHolder() throws(ArrowError) -> AnyArrowArray { + public func toAnyArrowArray() throws(ArrowError) -> AnyArrowArray { try self.finish() } } public class NumberArrayBuilder: ArrowArrayBuilder< - FixedBufferBuilder, FixedArray + FixedBufferBuilder, +FixedArray > where T: Numeric, T: BitwiseCopyable { fileprivate convenience init() throws(ArrowError) { @@ -88,7 +87,8 @@ where T: Numeric, T: BitwiseCopyable { } public class StringArrayBuilder: ArrowArrayBuilder< - VariableBufferBuilder, StringArray + VariableBufferBuilder, + StringArray > { fileprivate convenience init() throws(ArrowError) { @@ -97,7 +97,8 @@ public class StringArrayBuilder: ArrowArrayBuilder< } public class BinaryArrayBuilder: ArrowArrayBuilder< - VariableBufferBuilder, BinaryArray + VariableBufferBuilder, + BinaryArray > { fileprivate convenience init() throws(ArrowError) { @@ -112,7 +113,8 @@ public class BoolArrayBuilder: ArrowArrayBuilder { } public class Date32ArrayBuilder: ArrowArrayBuilder< - Date32BufferBuilder, Date32Array + Date32BufferBuilder, + Date32Array > { fileprivate convenience init() throws(ArrowError) { @@ -121,7 +123,8 @@ public class Date32ArrayBuilder: ArrowArrayBuilder< } public class Date64ArrayBuilder: ArrowArrayBuilder< - Date64BufferBuilder, Date64Array + Date64BufferBuilder, + Date64Array > { fileprivate convenience init() throws(ArrowError) { @@ -130,7 +133,8 @@ public class Date64ArrayBuilder: ArrowArrayBuilder< } public class Time32ArrayBuilder: ArrowArrayBuilder< - FixedBufferBuilder, Time32Array + FixedBufferBuilder, + Time32Array > { fileprivate convenience init(_ unit: TimeUnit) throws(ArrowError) { @@ -139,7 +143,8 @@ public class Time32ArrayBuilder: ArrowArrayBuilder< } public class Time64ArrayBuilder: ArrowArrayBuilder< - FixedBufferBuilder, Time64Array + FixedBufferBuilder, + Time64Array > { fileprivate convenience init(_ unit: TimeUnit) throws(ArrowError) { @@ -148,7 +153,8 @@ public class Time64ArrayBuilder: ArrowArrayBuilder< } public class TimestampArrayBuilder: ArrowArrayBuilder< - FixedBufferBuilder, TimestampArray + FixedBufferBuilder, + TimestampArray > { fileprivate convenience init( @@ -159,12 +165,13 @@ public class TimestampArrayBuilder: ArrowArrayBuilder< } public class StructArrayBuilder: ArrowArrayBuilder< - StructBufferBuilder, NestedArray + StructBufferBuilder, +NestedArray > { - let builders: [any ArrowArrayHolderBuilder] + let builders: [any AnyArrowArrayBuilder] let fields: [ArrowField] - public init(_ fields: [ArrowField], builders: [any ArrowArrayHolderBuilder]) + public init(_ fields: [ArrowField], builders: [any AnyArrowArrayBuilder]) throws(ArrowError) { self.fields = fields @@ -175,7 +182,7 @@ public class StructArrayBuilder: ArrowArrayBuilder< public init(_ fields: [ArrowField]) throws(ArrowError) { self.fields = fields - var builders: [any ArrowArrayHolderBuilder] = [] + var builders: [any AnyArrowArrayBuilder] = [] for field in fields { builders.append( try ArrowArrayBuilders.loadBuilder(arrowType: field.type)) @@ -201,7 +208,7 @@ public class StructArrayBuilder: ArrowArrayBuilder< let buffers = self.bufferBuilder.finish() var childData: [ArrowData] = [] for builder in self.builders { - childData.append(try builder.toHolder().arrowData) + childData.append(try builder.toAnyArrowArray().arrowData) } let arrowData = ArrowData( self.type, buffers: buffers, @@ -215,7 +222,7 @@ public class StructArrayBuilder: ArrowArrayBuilder< public class ListArrayBuilder: ArrowArrayBuilder { - let valueBuilder: any ArrowArrayHolderBuilder + let valueBuilder: any AnyArrowArrayBuilder public override init(_ elementType: ArrowType) throws(ArrowError) { @@ -240,7 +247,7 @@ public class ListArrayBuilder: ArrowArrayBuilder public override func finish() throws(ArrowError) -> any ArrowArray<[Any?]> { let buffers = self.bufferBuilder.finish() - let childData = try valueBuilder.toHolder().arrowData + let childData = try valueBuilder.toAnyArrowArray().arrowData let arrowData = ArrowData( self.type, buffers: buffers, @@ -252,10 +259,10 @@ public class ListArrayBuilder: ArrowArrayBuilder } } -public class ArrowArrayBuilders { - public static func loadBuilder( - _ builderType: Any.Type - ) throws(ArrowError) -> ArrowArrayHolderBuilder { +public enum ArrowArrayBuilders { + public static func builder( + for builderType: Any.Type + ) throws(ArrowError) -> AnyArrowArrayBuilder { if builderType == Int8.self || builderType == Int8?.self { return try ArrowArrayBuilders.loadNumberArrayBuilder() as NumberArrayBuilder @@ -317,7 +324,7 @@ public class ArrowArrayBuilders { _ obj: T ) throws -> StructArrayBuilder { let mirror = Mirror(reflecting: obj) - var builders: [ArrowArrayHolderBuilder] = [] + var builders: [AnyArrowArrayBuilder] = [] var fields: [ArrowField] = [] for (property, value) in mirror.children { guard let propertyName = property else { @@ -339,7 +346,7 @@ public class ArrowArrayBuilders { public static func loadBuilder( arrowType: ArrowType - ) throws(ArrowError) -> ArrowArrayHolderBuilder { + ) throws(ArrowError) -> AnyArrowArrayBuilder { switch arrowType { case .uint8: return try loadNumberArrayBuilder() as NumberArrayBuilder diff --git a/Sources/Arrow/ArrowArrayLoader.swift b/Sources/Arrow/ArrowArrayLoader.swift index 56dbf43..b3b799b 100644 --- a/Sources/Arrow/ArrowArrayLoader.swift +++ b/Sources/Arrow/ArrowArrayLoader.swift @@ -13,6 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +// TODO: Duplicated struct ArrowArrayLoader { public static func loadArray( _ arrowType: ArrowType, diff --git a/Sources/Arrow/ArrowEncoder.swift b/Sources/Arrow/ArrowEncoder.swift index a935891..f15573d 100644 --- a/Sources/Arrow/ArrowEncoder.swift +++ b/Sources/Arrow/ArrowEncoder.swift @@ -15,7 +15,7 @@ import Foundation public class ArrowEncoder: Encoder { - public private(set) var builders: [String: ArrowArrayHolderBuilder] = [:] + public private(set) var builders: [String: AnyArrowArrayBuilder] = [:] private var byIndex: [String] = [] public var codingPath: [CodingKey] = [] public var userInfo: [CodingUserInfoKey: Any] = [:] @@ -29,9 +29,7 @@ public class ArrowEncoder: Encoder { public init() {} - public init( - _ builders: [String: ArrowArrayHolderBuilder], byIndex: [String] - ) { + public init(_ builders: [String: AnyArrowArrayBuilder], byIndex: [String]) { self.builders = builders self.byIndex = byIndex } @@ -59,14 +57,13 @@ public class ArrowEncoder: Encoder { // this will check if T is a simple built in type // (UInt, Int, Int8, String, Date, etc...). if ArrowArrayBuilders.isValidBuilderType(T.self) { - let builders = ["col0": try ArrowArrayBuilders.loadBuilder(T.self)] + let builders = ["col0": try ArrowArrayBuilders.builder(for: T.self)] return ArrowEncoder(builders, byIndex: ["col0"]) } else { let encoder = ArrowEncoder() if data is [AnyHashable: Any] { encoder.modForIndex = 2 } - return encoder } } @@ -78,7 +75,7 @@ public class ArrowEncoder: Encoder { guard let builder = builders[key] else { throw .invalid("Missing builder for \(key)") } - batchBuilder.addColumn(key, arrowArray: try builder.toHolder()) + batchBuilder.addColumn(key, arrowArray: try builder.toAnyArrowArray()) } return try batchBuilder.finish().get() } @@ -116,12 +113,12 @@ public class ArrowEncoder: Encoder { func ensureColumnExists( _ value: T, key: String - ) throws(ArrowError) -> ArrowArrayHolderBuilder { + ) throws(ArrowError) -> AnyArrowArrayBuilder { try throwIfInvalid() if let builder = builders[key] { return builder } - let builder = try ArrowArrayBuilders.loadBuilder(T.self) + let builder = try ArrowArrayBuilders.builder(for: T.self) builders[key] = builder byIndex.append(key) return builder diff --git a/Sources/Arrow/ArrowReader.swift b/Sources/Arrow/ArrowReader.swift index 006c165..f3985b1 100644 --- a/Sources/Arrow/ArrowReader.swift +++ b/Sources/Arrow/ArrowReader.swift @@ -544,7 +544,7 @@ public struct ArrowReader: Sendable { .unknownError("Unhandled header type: \(message.headerType)")) } } - + func validateFileData(_ data: Data) -> Bool { let markerLength = fileMarker.count let startString = data[.. UInt32 { let token = data.withUnsafeBytes { rawBuffer in rawBuffer.loadUnaligned(fromByteOffset: offset, as: UInt32.self) diff --git a/Tests/ArrowTests/ArrayBuilderTests.swift b/Tests/ArrowTests/ArrayBuilderTests.swift index 4a648c4..c7555f6 100644 --- a/Tests/ArrowTests/ArrayBuilderTests.swift +++ b/Tests/ArrowTests/ArrayBuilderTests.swift @@ -53,36 +53,36 @@ struct ArrayBuilderTests { @Test func loadArrayBuilders() throws { #expect(throws: Never.self) { - let _ = try ArrowArrayBuilders.loadBuilder(Int8.self) - let _ = try ArrowArrayBuilders.loadBuilder(Int16.self) - let _ = try ArrowArrayBuilders.loadBuilder(Int32.self) - let _ = try ArrowArrayBuilders.loadBuilder(Int64.self) - let _ = try ArrowArrayBuilders.loadBuilder(UInt8.self) - let _ = try ArrowArrayBuilders.loadBuilder(UInt16.self) - let _ = try ArrowArrayBuilders.loadBuilder(UInt32.self) - let _ = try ArrowArrayBuilders.loadBuilder(UInt64.self) - let _ = try ArrowArrayBuilders.loadBuilder(Float.self) - let _ = try ArrowArrayBuilders.loadBuilder(Double.self) - let _ = try ArrowArrayBuilders.loadBuilder(Date.self) - let _ = try ArrowArrayBuilders.loadBuilder(Bool.self) - let _ = try ArrowArrayBuilders.loadBuilder(Int8?.self) - let _ = try ArrowArrayBuilders.loadBuilder(Int16?.self) - let _ = try ArrowArrayBuilders.loadBuilder(Int32?.self) - let _ = try ArrowArrayBuilders.loadBuilder(Int64?.self) - let _ = try ArrowArrayBuilders.loadBuilder(UInt8?.self) - let _ = try ArrowArrayBuilders.loadBuilder(UInt16?.self) - let _ = try ArrowArrayBuilders.loadBuilder(UInt32?.self) - let _ = try ArrowArrayBuilders.loadBuilder(UInt64?.self) - let _ = try ArrowArrayBuilders.loadBuilder(Float?.self) - let _ = try ArrowArrayBuilders.loadBuilder(Double?.self) - let _ = try ArrowArrayBuilders.loadBuilder(Date?.self) - let _ = try ArrowArrayBuilders.loadBuilder(Bool?.self) + let _ = try ArrowArrayBuilders.builder(for: Int8.self) + let _ = try ArrowArrayBuilders.builder(for: Int16.self) + let _ = try ArrowArrayBuilders.builder(for: Int32.self) + let _ = try ArrowArrayBuilders.builder(for: Int64.self) + let _ = try ArrowArrayBuilders.builder(for: UInt8.self) + let _ = try ArrowArrayBuilders.builder(for: UInt16.self) + let _ = try ArrowArrayBuilders.builder(for: UInt32.self) + let _ = try ArrowArrayBuilders.builder(for: UInt64.self) + let _ = try ArrowArrayBuilders.builder(for: Float.self) + let _ = try ArrowArrayBuilders.builder(for: Double.self) + let _ = try ArrowArrayBuilders.builder(for: Date.self) + let _ = try ArrowArrayBuilders.builder(for: Bool.self) + let _ = try ArrowArrayBuilders.builder(for: Int8?.self) + let _ = try ArrowArrayBuilders.builder(for: Int16?.self) + let _ = try ArrowArrayBuilders.builder(for: Int32?.self) + let _ = try ArrowArrayBuilders.builder(for: Int64?.self) + let _ = try ArrowArrayBuilders.builder(for: UInt8?.self) + let _ = try ArrowArrayBuilders.builder(for: UInt16?.self) + let _ = try ArrowArrayBuilders.builder(for: UInt32?.self) + let _ = try ArrowArrayBuilders.builder(for: UInt64?.self) + let _ = try ArrowArrayBuilders.builder(for: Float?.self) + let _ = try ArrowArrayBuilders.builder(for: Double?.self) + let _ = try ArrowArrayBuilders.builder(for: Date?.self) + let _ = try ArrowArrayBuilders.builder(for: Bool?.self) } #expect(throws: ArrowError.self) { - let _ = try ArrowArrayBuilders.loadBuilder(Int.self) - let _ = try ArrowArrayBuilders.loadBuilder(UInt.self) - let _ = try ArrowArrayBuilders.loadBuilder(Int?.self) - let _ = try ArrowArrayBuilders.loadBuilder(UInt?.self) + let _ = try ArrowArrayBuilders.builder(for: Int.self) + let _ = try ArrowArrayBuilders.builder(for: UInt.self) + let _ = try ArrowArrayBuilders.builder(for: Int?.self) + let _ = try ArrowArrayBuilders.builder(for: UInt?.self) } } } diff --git a/Tests/ArrowTests/ArrayTests.swift b/Tests/ArrowTests/ArrayTests.swift index 553bec1..ef40a22 100644 --- a/Tests/ArrowTests/ArrayTests.swift +++ b/Tests/ArrowTests/ArrayTests.swift @@ -452,18 +452,18 @@ struct ArrayTests { } @Test func arrowArrayHolderBuilder() throws { - let uint8HBuilder: ArrowArrayHolderBuilder = + let uint8HBuilder: AnyArrowArrayBuilder = (try ArrowArrayBuilders.loadNumberArrayBuilder() as NumberArrayBuilder) for index in 0..<100 { uint8HBuilder.appendAny(UInt8(index)) } - let uint8Holder = try uint8HBuilder.toHolder() + let uint8Holder = try uint8HBuilder.toAnyArrowArray() #expect(uint8Holder.nullCount == 0) #expect(uint8Holder.length == 100) - let stringHBuilder: ArrowArrayHolderBuilder = + let stringHBuilder: AnyArrowArrayBuilder = (try ArrowArrayBuilders.loadStringArrayBuilder()) for index in 0..<100 { if index % 10 == 9 { @@ -473,7 +473,7 @@ struct ArrayTests { } } - let stringHolder = try stringHBuilder.toHolder() + let stringHolder = try stringHBuilder.toAnyArrowArray() #expect(stringHolder.nullCount == 10) #expect(stringHolder.length == 100) } diff --git a/Tests/ArrowTests/CodableTests.swift b/Tests/ArrowTests/CodableTests.swift index 67ae61d..8ae2f0f 100644 --- a/Tests/ArrowTests/CodableTests.swift +++ b/Tests/ArrowTests/CodableTests.swift @@ -91,19 +91,19 @@ struct CodableTests { stringBuilder.append("test0", "test1", "test2") dateBuilder.append(date1, date1, date1) let result = RecordBatch.Builder() - .addColumn("propBool", arrowArray: try boolBuilder.toHolder()) - .addColumn("propInt8", arrowArray: try int8Builder.toHolder()) - .addColumn("propInt16", arrowArray: try int16Builder.toHolder()) - .addColumn("propInt32", arrowArray: try int32Builder.toHolder()) - .addColumn("propInt64", arrowArray: try int64Builder.toHolder()) - .addColumn("propUInt8", arrowArray: try uint8Builder.toHolder()) - .addColumn("propUInt16", arrowArray: try uint16Builder.toHolder()) - .addColumn("propUInt32", arrowArray: try uint32Builder.toHolder()) - .addColumn("propUInt64", arrowArray: try uint64Builder.toHolder()) - .addColumn("propFloat", arrowArray: try floatBuilder.toHolder()) - .addColumn("propDouble", arrowArray: try doubleBuilder.toHolder()) - .addColumn("propString", arrowArray: try stringBuilder.toHolder()) - .addColumn("propDate", arrowArray: try dateBuilder.toHolder()) + .addColumn("propBool", arrowArray: try boolBuilder.finish()) + .addColumn("propInt8", arrowArray: try int8Builder.finish()) + .addColumn("propInt16", arrowArray: try int16Builder.finish()) + .addColumn("propInt32", arrowArray: try int32Builder.finish()) + .addColumn("propInt64", arrowArray: try int64Builder.finish()) + .addColumn("propUInt8", arrowArray: try uint8Builder.finish()) + .addColumn("propUInt16", arrowArray: try uint16Builder.finish()) + .addColumn("propUInt32", arrowArray: try uint32Builder.finish()) + .addColumn("propUInt64", arrowArray: try uint64Builder.finish()) + .addColumn("propFloat", arrowArray: try floatBuilder.finish()) + .addColumn("propDouble", arrowArray: try doubleBuilder.finish()) + .addColumn("propString", arrowArray: try stringBuilder.finish()) + .addColumn("propDate", arrowArray: try dateBuilder.finish()) .finish() switch result { case .success(let rb): @@ -139,7 +139,7 @@ struct CodableTests { try ArrowArrayBuilders.loadNumberArrayBuilder() int8Builder.append(10, 11, 12) let result = RecordBatch.Builder() - .addColumn("propInt8", arrowArray: try int8Builder.toHolder()) + .addColumn("propInt8", arrowArray: try int8Builder.finish()) .finish() switch result { case .success(let rb): @@ -159,7 +159,10 @@ struct CodableTests { try ArrowArrayBuilders.loadNumberArrayBuilder() int8WNilBuilder.append(10, nil, 12, nil) let resultWNil = RecordBatch.Builder() - .addColumn("propInt8", arrowArray: try int8WNilBuilder.toHolder()) + .addColumn( + "propInt8", + arrowArray: try int8WNilBuilder.finish() + ) .finish() switch resultWNil { case .success(let rb): @@ -185,8 +188,8 @@ struct CodableTests { int8Builder.append(10, 11, 12, 13) stringBuilder.append("test10", "test11", "test12", "test13") switch RecordBatch.Builder() - .addColumn("propInt8", arrowArray: try int8Builder.toHolder()) - .addColumn("propString", arrowArray: try stringBuilder.toHolder()) + .addColumn("propInt8", arrowArray: try int8Builder.finish()) + .addColumn("propString", arrowArray: try stringBuilder.finish()) .finish() { case .success(let rb): @@ -200,8 +203,8 @@ struct CodableTests { } switch RecordBatch.Builder() - .addColumn("propString", arrowArray: try stringBuilder.toHolder()) - .addColumn("propInt8", arrowArray: try int8Builder.toHolder()) + .addColumn("propString", arrowArray: try stringBuilder.finish()) + .addColumn("propInt8", arrowArray: try int8Builder.finish()) .finish() { case .success(let rb): @@ -222,8 +225,8 @@ struct CodableTests { int8Builder.append(10, 11, 12, 13) stringWNilBuilder.append(nil, "test11", nil, "test13") let resultWNil = RecordBatch.Builder() - .addColumn("propInt8", arrowArray: try int8Builder.toHolder()) - .addColumn("propString", arrowArray: try stringWNilBuilder.toHolder()) + .addColumn("propInt8", arrowArray: try int8Builder.finish()) + .addColumn("propString", arrowArray: try stringWNilBuilder.finish()) .finish() switch resultWNil { case .success(let rb): diff --git a/Tests/ArrowTests/IPCTests.swift b/Tests/ArrowTests/IPCTests.swift index 83abf3a..f8ded1a 100644 --- a/Tests/ArrowTests/IPCTests.swift +++ b/Tests/ArrowTests/IPCTests.swift @@ -71,9 +71,9 @@ func checkBoolRecordBatch( #expect(recordBatch.schema.fields[1].type == .utf8) for index in 0.. Date: Sat, 1 Nov 2025 15:59:00 +0800 Subject: [PATCH 4/6] Clearly name the type parameters on ArrowArrayBuilder. --- Sources/Arrow/ArrowArrayBuilder.swift | 38 ++++++++++++++------------- Sources/Arrow/ArrowTable.swift | 7 +++++ 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/Sources/Arrow/ArrowArrayBuilder.swift b/Sources/Arrow/ArrowArrayBuilder.swift index ad50329..280604f 100644 --- a/Sources/Arrow/ArrowArrayBuilder.swift +++ b/Sources/Arrow/ArrowArrayBuilder.swift @@ -21,54 +21,56 @@ public protocol AnyArrowArrayBuilder { } public class ArrowArrayBuilder< - T: ArrowBufferBuilder, - U: ArrowArray + BufferBuilder: ArrowBufferBuilder, + ArrayType: ArrowArray >: AnyArrowArrayBuilder { - let type: ArrowType - let bufferBuilder: T + let arrowType: ArrowType + let bufferBuilder: BufferBuilder public var length: UInt { self.bufferBuilder.length } public var capacity: UInt { self.bufferBuilder.capacity } public var nullCount: UInt { self.bufferBuilder.nullCount } public var offset: UInt { self.bufferBuilder.offset } fileprivate init(_ type: ArrowType) throws(ArrowError) { - self.type = type - self.bufferBuilder = T() + self.arrowType = type + self.bufferBuilder = BufferBuilder() } - public func append(_ vals: T.ItemType?...) { + public func append(_ vals: BufferBuilder.ItemType?...) { for val in vals { self.bufferBuilder.append(val) } } - public func append(_ vals: [T.ItemType?]) { + public func append(_ vals: [BufferBuilder.ItemType?]) { for val in vals { self.bufferBuilder.append(val) } } - public func append(_ val: T.ItemType?) { + public func append(_ val: BufferBuilder.ItemType?) { self.bufferBuilder.append(val) } public func appendAny(_ val: Any?) { - self.bufferBuilder.append(val as? T.ItemType) + self.bufferBuilder.append(val as? BufferBuilder.ItemType) } - public func finish() throws(ArrowError) -> any ArrowArray { + public func finish() throws(ArrowError) -> any ArrowArray< + BufferBuilder.ItemType + > { let buffers = self.bufferBuilder.finish() let arrowData = ArrowData( - self.type, + self.arrowType, buffers: buffers, nullCount: self.nullCount ) - let array = try U(arrowData) + let array = try ArrayType(arrowData) return array } public func getStride() -> Int { - self.type.getStride() + self.arrowType.getStride() } public func toAnyArrowArray() throws(ArrowError) -> AnyArrowArray { @@ -78,7 +80,7 @@ public class ArrowArrayBuilder< public class NumberArrayBuilder: ArrowArrayBuilder< FixedBufferBuilder, -FixedArray + FixedArray > where T: Numeric, T: BitwiseCopyable { fileprivate convenience init() throws(ArrowError) { @@ -166,7 +168,7 @@ public class TimestampArrayBuilder: ArrowArrayBuilder< public class StructArrayBuilder: ArrowArrayBuilder< StructBufferBuilder, -NestedArray + NestedArray > { let builders: [any AnyArrowArrayBuilder] @@ -211,7 +213,7 @@ NestedArray childData.append(try builder.toAnyArrowArray().arrowData) } let arrowData = ArrowData( - self.type, buffers: buffers, + self.arrowType, buffers: buffers, children: childData, nullCount: self.nullCount, length: self.length) @@ -249,7 +251,7 @@ public class ListArrayBuilder: ArrowArrayBuilder let buffers = self.bufferBuilder.finish() let childData = try valueBuilder.toAnyArrowArray().arrowData let arrowData = ArrowData( - self.type, + self.arrowType, buffers: buffers, children: [childData], nullCount: self.nullCount, diff --git a/Sources/Arrow/ArrowTable.swift b/Sources/Arrow/ArrowTable.swift index fdfcba4..d1daebd 100644 --- a/Sources/Arrow/ArrowTable.swift +++ b/Sources/Arrow/ArrowTable.swift @@ -210,6 +210,7 @@ public class RecordBatch { public var columnCount: UInt { UInt(self.columns.count) } public let columns: [AnyArrowArray] public let length: UInt + public init(_ schema: ArrowSchema, columns: [AnyArrowArray]) { self.schema = schema self.columns = columns @@ -222,6 +223,12 @@ public class RecordBatch { public init() {} + /// Add a column the `RecordBatch` builder. + /// - Parameters: + /// - fieldName: The field name. + /// - arrowArray: The array to add to the reocrd batch. + /// - Returns: The `RecordBatch.Builder` with the array appended and the field added to + /// the schema. If the array contains zero nulls, the field is defined as non-null. @discardableResult public func addColumn( _ fieldName: String, From 604a822a9c0ef9abcb36653e44593d03d5bc6824 Mon Sep 17 00:00:00 2001 From: Will Temperley Date: Sat, 1 Nov 2025 16:16:32 +0800 Subject: [PATCH 5/6] Introduce ArrowArrayBuilderProtocol which will become the ArrowArrayBuilder base. --- Sources/Arrow/ArrowArrayBuilder.swift | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/Sources/Arrow/ArrowArrayBuilder.swift b/Sources/Arrow/ArrowArrayBuilder.swift index 280604f..4372a94 100644 --- a/Sources/Arrow/ArrowArrayBuilder.swift +++ b/Sources/Arrow/ArrowArrayBuilder.swift @@ -1,5 +1,5 @@ // Copyright 2025 The Apache Software Foundation -// Copyright 2025 The Columnar-Swift Contributors +// Copyright 2025 The Columnar Swift Contributors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,10 +20,20 @@ public protocol AnyArrowArrayBuilder { func appendAny(_ val: Any?) } +/// A type which can build an `ArrowArray`of `ItemType`. +public protocol ArrowArrayBuilderProtocol { + associatedtype BufferBuilder: ArrowBufferBuilder + associatedtype ArrayType: ArrowArray + where ArrayType.ItemType == BufferBuilder.ItemType + + mutating func append(_ value: BufferBuilder.ItemType?) throws(ArrowError) + func finish() throws(ArrowError) -> ArrayType +} + public class ArrowArrayBuilder< BufferBuilder: ArrowBufferBuilder, ArrayType: ArrowArray ->: AnyArrowArrayBuilder { +>: AnyArrowArrayBuilder, ArrowArrayBuilderProtocol { let arrowType: ArrowType let bufferBuilder: BufferBuilder public var length: UInt { self.bufferBuilder.length } @@ -56,9 +66,7 @@ public class ArrowArrayBuilder< self.bufferBuilder.append(val as? BufferBuilder.ItemType) } - public func finish() throws(ArrowError) -> any ArrowArray< - BufferBuilder.ItemType - > { + public func finish() throws(ArrowError) -> ArrayType { let buffers = self.bufferBuilder.finish() let arrowData = ArrowData( self.arrowType, @@ -206,7 +214,7 @@ public class StructArrayBuilder: ArrowArrayBuilder< } } - public override func finish() throws(ArrowError) -> any ArrowArray<[Any?]> { + public override func finish() throws(ArrowError) -> ArrayType { let buffers = self.bufferBuilder.finish() var childData: [ArrowData] = [] for builder in self.builders { @@ -247,7 +255,7 @@ public class ListArrayBuilder: ArrowArrayBuilder } } - public override func finish() throws(ArrowError) -> any ArrowArray<[Any?]> { + public override func finish() throws(ArrowError) -> ArrayType { let buffers = self.bufferBuilder.finish() let childData = try valueBuilder.toAnyArrowArray().arrowData let arrowData = ArrowData( From ee6e143e55d86bbdc8e257c8554e527908ea2d2b Mon Sep 17 00:00:00 2001 From: Will Temperley Date: Sat, 1 Nov 2025 20:51:51 +0800 Subject: [PATCH 6/6] Migrate most ArrowArrayBuilder functions to protocol extensions. --- Sources/Arrow/ArrowArrayBuilder.swift | 127 +++++++++++++++++--------- Tests/ArrowTests/ArrayTests.swift | 2 +- Tests/ArrowTests/IPCTests.swift | 2 +- 3 files changed, 84 insertions(+), 47 deletions(-) diff --git a/Sources/Arrow/ArrowArrayBuilder.swift b/Sources/Arrow/ArrowArrayBuilder.swift index 4372a94..e7ef607 100644 --- a/Sources/Arrow/ArrowArrayBuilder.swift +++ b/Sources/Arrow/ArrowArrayBuilder.swift @@ -15,36 +15,38 @@ import Foundation +// MARK: Array builder interface. + +/// A type which builds a type-erased `ArrowArray`. public protocol AnyArrowArrayBuilder { func toAnyArrowArray() throws(ArrowError) -> AnyArrowArray func appendAny(_ val: Any?) } /// A type which can build an `ArrowArray`of `ItemType`. -public protocol ArrowArrayBuilderProtocol { +public protocol ArrowArrayBuilder { associatedtype BufferBuilder: ArrowBufferBuilder associatedtype ArrayType: ArrowArray where ArrayType.ItemType == BufferBuilder.ItemType - mutating func append(_ value: BufferBuilder.ItemType?) throws(ArrowError) + func append(_ vals: BufferBuilder.ItemType?...) + func append(_ vals: [BufferBuilder.ItemType?]) + func append(_ val: BufferBuilder.ItemType?) + func appendAny(_ val: Any?) func finish() throws(ArrowError) -> ArrayType } -public class ArrowArrayBuilder< - BufferBuilder: ArrowBufferBuilder, - ArrayType: ArrowArray ->: AnyArrowArrayBuilder, ArrowArrayBuilderProtocol { - let arrowType: ArrowType - let bufferBuilder: BufferBuilder - public var length: UInt { self.bufferBuilder.length } - public var capacity: UInt { self.bufferBuilder.capacity } - public var nullCount: UInt { self.bufferBuilder.nullCount } - public var offset: UInt { self.bufferBuilder.offset } +internal protocol ArrowArrayBuilderInternal: ArrowArrayBuilder { + var arrowType: ArrowType { get } + var bufferBuilder: BufferBuilder { get } +} - fileprivate init(_ type: ArrowType) throws(ArrowError) { - self.arrowType = type - self.bufferBuilder = BufferBuilder() - } +extension ArrowArrayBuilderInternal { + + var length: UInt { self.bufferBuilder.length } + var capacity: UInt { self.bufferBuilder.capacity } + var nullCount: UInt { self.bufferBuilder.nullCount } + var offset: UInt { self.bufferBuilder.offset } public func append(_ vals: BufferBuilder.ItemType?...) { for val in vals { @@ -66,6 +68,35 @@ public class ArrowArrayBuilder< self.bufferBuilder.append(val as? BufferBuilder.ItemType) } + /// Returns the byte width of this type if it is a primitive type. + public func stride() -> Int { + self.arrowType.getStride() + } + + /// Returns an unparameterised `ArrowArray`. + /// - Returns: The type-erased Arrow array. + public func toAnyArrowArray() throws(ArrowError) -> AnyArrowArray { + try self.finish() + } +} + +// MARK: Base implementation. + +// Note: It would be preferable to move all of this to a protocol, however +// ListArrayBuilder overrides finish. This is delicate because protocol +// extension method dispatching means the +public class ArrowArrayBuilderBase< + BufferBuilder: ArrowBufferBuilder, + ArrayType: ArrowArray +>: AnyArrowArrayBuilder, ArrowArrayBuilderInternal { + let arrowType: ArrowType + let bufferBuilder: BufferBuilder + + fileprivate init(_ type: ArrowType) throws(ArrowError) { + self.arrowType = type + self.bufferBuilder = BufferBuilder() + } + public func finish() throws(ArrowError) -> ArrayType { let buffers = self.bufferBuilder.finish() let arrowData = ArrowData( @@ -76,27 +107,20 @@ public class ArrowArrayBuilder< let array = try ArrayType(arrowData) return array } - - public func getStride() -> Int { - self.arrowType.getStride() - } - - public func toAnyArrowArray() throws(ArrowError) -> AnyArrowArray { - try self.finish() - } } -public class NumberArrayBuilder: ArrowArrayBuilder< - FixedBufferBuilder, - FixedArray +/// A type which builds an `ArrowArray` with a numeric `ItemType`. +public class NumberArrayBuilder: ArrowArrayBuilderBase< + FixedBufferBuilder, + FixedArray > -where T: Numeric, T: BitwiseCopyable { +where ItemType: Numeric, ItemType: BitwiseCopyable { fileprivate convenience init() throws(ArrowError) { - try self.init(try ArrowTypeConverter.infoForNumericType(T.self)) + try self.init(try ArrowTypeConverter.infoForNumericType(ItemType.self)) } } -public class StringArrayBuilder: ArrowArrayBuilder< +public class StringArrayBuilder: ArrowArrayBuilderBase< VariableBufferBuilder, StringArray > @@ -106,7 +130,7 @@ public class StringArrayBuilder: ArrowArrayBuilder< } } -public class BinaryArrayBuilder: ArrowArrayBuilder< +public class BinaryArrayBuilder: ArrowArrayBuilderBase< VariableBufferBuilder, BinaryArray > @@ -116,13 +140,16 @@ public class BinaryArrayBuilder: ArrowArrayBuilder< } } -public class BoolArrayBuilder: ArrowArrayBuilder { +public class BoolArrayBuilder: ArrowArrayBuilderBase< + BoolBufferBuilder, BoolArray +> +{ fileprivate convenience init() throws(ArrowError) { try self.init(.boolean) } } -public class Date32ArrayBuilder: ArrowArrayBuilder< +public class Date32ArrayBuilder: ArrowArrayBuilderBase< Date32BufferBuilder, Date32Array > @@ -132,7 +159,7 @@ public class Date32ArrayBuilder: ArrowArrayBuilder< } } -public class Date64ArrayBuilder: ArrowArrayBuilder< +public class Date64ArrayBuilder: ArrowArrayBuilderBase< Date64BufferBuilder, Date64Array > @@ -142,7 +169,7 @@ public class Date64ArrayBuilder: ArrowArrayBuilder< } } -public class Time32ArrayBuilder: ArrowArrayBuilder< +public class Time32ArrayBuilder: ArrowArrayBuilderBase< FixedBufferBuilder, Time32Array > @@ -152,7 +179,7 @@ public class Time32ArrayBuilder: ArrowArrayBuilder< } } -public class Time64ArrayBuilder: ArrowArrayBuilder< +public class Time64ArrayBuilder: ArrowArrayBuilderBase< FixedBufferBuilder, Time64Array > @@ -162,7 +189,7 @@ public class Time64ArrayBuilder: ArrowArrayBuilder< } } -public class TimestampArrayBuilder: ArrowArrayBuilder< +public class TimestampArrayBuilder: ArrowArrayBuilderBase< FixedBufferBuilder, TimestampArray > @@ -174,16 +201,19 @@ public class TimestampArrayBuilder: ArrowArrayBuilder< } } -public class StructArrayBuilder: ArrowArrayBuilder< +// MARK: Struct array builder. + +public class StructArrayBuilder: ArrowArrayBuilderBase< StructBufferBuilder, NestedArray > { let builders: [any AnyArrowArrayBuilder] let fields: [ArrowField] - public init(_ fields: [ArrowField], builders: [any AnyArrowArrayBuilder]) - throws(ArrowError) - { + public init( + _ fields: [ArrowField], + builders: [any AnyArrowArrayBuilder] + ) throws(ArrowError) { self.fields = fields self.builders = builders try super.init(.strct(fields)) @@ -201,7 +231,7 @@ public class StructArrayBuilder: ArrowArrayBuilder< try super.init(.strct(fields)) } - public override func append(_ values: [Any?]?) { + public func append(_ values: [Any?]?) { self.bufferBuilder.append(values) if let anyValues = values { for index in 0.. +// MARK: List array builder. + +/// A type which can build an `NestedArray`containing exactly `ItemType`. +public class ListArrayBuilder: ArrowArrayBuilderBase< + ListBufferBuilder, + NestedArray +> { let valueBuilder: any AnyArrowArrayBuilder @@ -246,7 +282,8 @@ public class ListArrayBuilder: ArrowArrayBuilder try super.init(elementType) } - public override func append(_ values: [Any?]?) { + // Overrides the default + public func append(_ values: [Any?]?) { self.bufferBuilder.append(values) if let vals = values { for val in vals { @@ -330,7 +367,7 @@ public enum ArrowArrayBuilders { || type == Float.self || type == Date.self } - public static func loadStructArrayBuilderForType( + public static func structArrayBuilderForType( _ obj: T ) throws -> StructArrayBuilder { let mirror = Mirror(reflecting: obj) diff --git a/Tests/ArrowTests/ArrayTests.swift b/Tests/ArrowTests/ArrayTests.swift index ef40a22..4bf0755 100644 --- a/Tests/ArrowTests/ArrayTests.swift +++ b/Tests/ArrowTests/ArrayTests.swift @@ -361,7 +361,7 @@ struct ArrayTests { let testData = StructTest() let dateNow = Date.now - let structBuilder = try ArrowArrayBuilders.loadStructArrayBuilderForType( + let structBuilder = try ArrowArrayBuilders.structArrayBuilderForType( testData) structBuilder.append([ true, Int8(1), Int16(2), Int32(3), Int64(4), diff --git a/Tests/ArrowTests/IPCTests.swift b/Tests/ArrowTests/IPCTests.swift index f8ded1a..8b0b9ea 100644 --- a/Tests/ArrowTests/IPCTests.swift +++ b/Tests/ArrowTests/IPCTests.swift @@ -168,7 +168,7 @@ func makeStructSchema() throws -> ArrowSchema { func makeStructRecordBatch() throws -> RecordBatch { let testData = StructTest() let dateNow = Date.now - let structBuilder = try ArrowArrayBuilders.loadStructArrayBuilderForType( + let structBuilder = try ArrowArrayBuilders.structArrayBuilderForType( testData ) structBuilder.append([