diff --git a/Sources/Arrow/ArrowArray.swift b/Sources/Arrow/ArrowArray.swift index f9de931..9e47915 100644 --- a/Sources/Arrow/ArrowArray.swift +++ b/Sources/Arrow/ArrowArray.swift @@ -24,12 +24,13 @@ public protocol AnyArrowArray { var bufferData: [Data] { get } var bufferDataSizes: [Int] { get } func asAny(_ index: UInt) -> Any? + func asString(_ index: UInt) -> String } // MARK: - Core Protocol /// The interface for Arrow array types. -public protocol ArrowArray: AsString, AnyArrowArray { +public protocol ArrowArray: AnyArrowArray { associatedtype ItemType var arrowData: ArrowData { get } init(_ arrowData: ArrowData) throws(ArrowError) @@ -38,7 +39,6 @@ public protocol ArrowArray: AsString, AnyArrowArray { // MARK: - Default Implementations extension ArrowArray { - public var nullCount: UInt { arrowData.nullCount } @@ -188,8 +188,7 @@ public struct Date32Array: ArrowArray { let byteOffset = self.arrowData.stride * Int(index) let milliseconds = self.arrowData.buffers[1].rawPointer.advanced( by: byteOffset - ).load( - as: UInt32.self) + ).load(as: UInt32.self) return Date(timeIntervalSince1970: TimeInterval(milliseconds * 86400)) } } @@ -209,8 +208,7 @@ public struct Date64Array: ArrowArray { let byteOffset = self.arrowData.stride * Int(index) let milliseconds = self.arrowData.buffers[1].rawPointer.advanced( by: byteOffset - ).load( - as: UInt64.self) + ).load(as: UInt64.self) return Date(timeIntervalSince1970: TimeInterval(milliseconds / 1000)) } } @@ -451,7 +449,7 @@ public struct NestedArray: ArrowArray, AnyArrowArray { switch item { case nil: output.append("null") - case let asStringItem as AsString: + case let asStringItem as AnyArrowArray: output.append(asStringItem.asString(0)) case let someItem?: output.append("\(someItem)") @@ -466,7 +464,7 @@ public struct NestedArray: ArrowArray, AnyArrowArray { var output = "{" if let children = self.children { let parts = children.compactMap { child in - (child as? AsString)?.asString(index) + child.asString(index) } output.append(parts.joined(separator: ",")) } diff --git a/Sources/Arrow/ArrowArrayBuilder.swift b/Sources/Arrow/ArrowArrayBuilder.swift index dae440f..e7ef607 100644 --- a/Sources/Arrow/ArrowArrayBuilder.swift +++ b/Sources/Arrow/ArrowArrayBuilder.swift @@ -1,5 +1,5 @@ // Copyright 2025 The Apache Software Foundation -// Copyright 2025 The Columnar-Swift Contributors +// Copyright 2025 The Columnar Swift Contributors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,80 +15,114 @@ import Foundation -// FIXME: Rename or remove -public protocol ArrowArrayHolderBuilder { - func toHolder() throws(ArrowError) -> AnyArrowArray +// MARK: Array builder interface. + +/// A type which builds a type-erased `ArrowArray`. +public protocol AnyArrowArrayBuilder { + func toAnyArrowArray() throws(ArrowError) -> AnyArrowArray func appendAny(_ val: Any?) } -public class ArrowArrayBuilder< - T: ArrowBufferBuilder, U: ArrowArray ->: - ArrowArrayHolderBuilder -{ - let type: ArrowType - let bufferBuilder: T - public var length: UInt { self.bufferBuilder.length } - public var capacity: UInt { self.bufferBuilder.capacity } - public var nullCount: UInt { self.bufferBuilder.nullCount } - public var offset: UInt { self.bufferBuilder.offset } +/// A type which can build an `ArrowArray`of `ItemType`. +public protocol ArrowArrayBuilder { + associatedtype BufferBuilder: ArrowBufferBuilder + associatedtype ArrayType: ArrowArray + where ArrayType.ItemType == BufferBuilder.ItemType - fileprivate init(_ type: ArrowType) throws(ArrowError) { - self.type = type - self.bufferBuilder = T() - } + func append(_ vals: BufferBuilder.ItemType?...) + func append(_ vals: [BufferBuilder.ItemType?]) + func append(_ val: BufferBuilder.ItemType?) + func appendAny(_ val: Any?) + func finish() throws(ArrowError) -> ArrayType +} + +internal protocol ArrowArrayBuilderInternal: ArrowArrayBuilder { + var arrowType: ArrowType { get } + var bufferBuilder: BufferBuilder { get } +} + +extension ArrowArrayBuilderInternal { - public func append(_ vals: T.ItemType?...) { + var length: UInt { self.bufferBuilder.length } + var capacity: UInt { self.bufferBuilder.capacity } + var nullCount: UInt { self.bufferBuilder.nullCount } + var offset: UInt { self.bufferBuilder.offset } + + public func append(_ vals: BufferBuilder.ItemType?...) { for val in vals { self.bufferBuilder.append(val) } } - public func append(_ vals: [T.ItemType?]) { + public func append(_ vals: [BufferBuilder.ItemType?]) { for val in vals { self.bufferBuilder.append(val) } } - public func append(_ val: T.ItemType?) { + public func append(_ val: BufferBuilder.ItemType?) { self.bufferBuilder.append(val) } public func appendAny(_ val: Any?) { - self.bufferBuilder.append(val as? T.ItemType) + self.bufferBuilder.append(val as? BufferBuilder.ItemType) + } + + /// Returns the byte width of this type if it is a primitive type. + public func stride() -> Int { + self.arrowType.getStride() + } + + /// Returns an unparameterised `ArrowArray`. + /// - Returns: The type-erased Arrow array. + public func toAnyArrowArray() throws(ArrowError) -> AnyArrowArray { + try self.finish() + } +} + +// MARK: Base implementation. + +// Note: It would be preferable to move all of this to a protocol, however +// ListArrayBuilder overrides finish. This is delicate because protocol +// extension method dispatching means the +public class ArrowArrayBuilderBase< + BufferBuilder: ArrowBufferBuilder, + ArrayType: ArrowArray +>: AnyArrowArrayBuilder, ArrowArrayBuilderInternal { + let arrowType: ArrowType + let bufferBuilder: BufferBuilder + + fileprivate init(_ type: ArrowType) throws(ArrowError) { + self.arrowType = type + self.bufferBuilder = BufferBuilder() } - public func finish() throws(ArrowError) -> any ArrowArray { + public func finish() throws(ArrowError) -> ArrayType { let buffers = self.bufferBuilder.finish() let arrowData = ArrowData( - self.type, + self.arrowType, buffers: buffers, nullCount: self.nullCount ) - let array = try U(arrowData) + let array = try ArrayType(arrowData) return array } - - public func getStride() -> Int { - self.type.getStride() - } - - public func toHolder() throws(ArrowError) -> AnyArrowArray { - try self.finish() - } } -public class NumberArrayBuilder: ArrowArrayBuilder< - FixedBufferBuilder, FixedArray +/// A type which builds an `ArrowArray` with a numeric `ItemType`. +public class NumberArrayBuilder: ArrowArrayBuilderBase< + FixedBufferBuilder, + FixedArray > -where T: Numeric, T: BitwiseCopyable { +where ItemType: Numeric, ItemType: BitwiseCopyable { fileprivate convenience init() throws(ArrowError) { - try self.init(try ArrowTypeConverter.infoForNumericType(T.self)) + try self.init(try ArrowTypeConverter.infoForNumericType(ItemType.self)) } } -public class StringArrayBuilder: ArrowArrayBuilder< - VariableBufferBuilder, StringArray +public class StringArrayBuilder: ArrowArrayBuilderBase< + VariableBufferBuilder, + StringArray > { fileprivate convenience init() throws(ArrowError) { @@ -96,8 +130,9 @@ public class StringArrayBuilder: ArrowArrayBuilder< } } -public class BinaryArrayBuilder: ArrowArrayBuilder< - VariableBufferBuilder, BinaryArray +public class BinaryArrayBuilder: ArrowArrayBuilderBase< + VariableBufferBuilder, + BinaryArray > { fileprivate convenience init() throws(ArrowError) { @@ -105,14 +140,18 @@ public class BinaryArrayBuilder: ArrowArrayBuilder< } } -public class BoolArrayBuilder: ArrowArrayBuilder { +public class BoolArrayBuilder: ArrowArrayBuilderBase< + BoolBufferBuilder, BoolArray +> +{ fileprivate convenience init() throws(ArrowError) { try self.init(.boolean) } } -public class Date32ArrayBuilder: ArrowArrayBuilder< - Date32BufferBuilder, Date32Array +public class Date32ArrayBuilder: ArrowArrayBuilderBase< + Date32BufferBuilder, + Date32Array > { fileprivate convenience init() throws(ArrowError) { @@ -120,8 +159,9 @@ public class Date32ArrayBuilder: ArrowArrayBuilder< } } -public class Date64ArrayBuilder: ArrowArrayBuilder< - Date64BufferBuilder, Date64Array +public class Date64ArrayBuilder: ArrowArrayBuilderBase< + Date64BufferBuilder, + Date64Array > { fileprivate convenience init() throws(ArrowError) { @@ -129,8 +169,9 @@ public class Date64ArrayBuilder: ArrowArrayBuilder< } } -public class Time32ArrayBuilder: ArrowArrayBuilder< - FixedBufferBuilder, Time32Array +public class Time32ArrayBuilder: ArrowArrayBuilderBase< + FixedBufferBuilder, + Time32Array > { fileprivate convenience init(_ unit: TimeUnit) throws(ArrowError) { @@ -138,8 +179,9 @@ public class Time32ArrayBuilder: ArrowArrayBuilder< } } -public class Time64ArrayBuilder: ArrowArrayBuilder< - FixedBufferBuilder, Time64Array +public class Time64ArrayBuilder: ArrowArrayBuilderBase< + FixedBufferBuilder, + Time64Array > { fileprivate convenience init(_ unit: TimeUnit) throws(ArrowError) { @@ -147,8 +189,9 @@ public class Time64ArrayBuilder: ArrowArrayBuilder< } } -public class TimestampArrayBuilder: ArrowArrayBuilder< - FixedBufferBuilder, TimestampArray +public class TimestampArrayBuilder: ArrowArrayBuilderBase< + FixedBufferBuilder, + TimestampArray > { fileprivate convenience init( @@ -158,15 +201,19 @@ public class TimestampArrayBuilder: ArrowArrayBuilder< } } -public class StructArrayBuilder: ArrowArrayBuilder< - StructBufferBuilder, NestedArray +// MARK: Struct array builder. + +public class StructArrayBuilder: ArrowArrayBuilderBase< + StructBufferBuilder, + NestedArray > { - let builders: [any ArrowArrayHolderBuilder] + let builders: [any AnyArrowArrayBuilder] let fields: [ArrowField] - public init(_ fields: [ArrowField], builders: [any ArrowArrayHolderBuilder]) - throws(ArrowError) - { + public init( + _ fields: [ArrowField], + builders: [any AnyArrowArrayBuilder] + ) throws(ArrowError) { self.fields = fields self.builders = builders try super.init(.strct(fields)) @@ -175,7 +222,7 @@ public class StructArrayBuilder: ArrowArrayBuilder< public init(_ fields: [ArrowField]) throws(ArrowError) { self.fields = fields - var builders: [any ArrowArrayHolderBuilder] = [] + var builders: [any AnyArrowArrayBuilder] = [] for field in fields { builders.append( try ArrowArrayBuilders.loadBuilder(arrowType: field.type)) @@ -184,7 +231,7 @@ public class StructArrayBuilder: ArrowArrayBuilder< try super.init(.strct(fields)) } - public override func append(_ values: [Any?]?) { + public func append(_ values: [Any?]?) { self.bufferBuilder.append(values) if let anyValues = values { for index in 0.. any ArrowArray<[Any?]> { + public override func finish() throws(ArrowError) -> ArrayType { let buffers = self.bufferBuilder.finish() var childData: [ArrowData] = [] for builder in self.builders { - childData.append(try builder.toHolder().arrowData) + childData.append(try builder.toAnyArrowArray().arrowData) } let arrowData = ArrowData( - self.type, buffers: buffers, + self.arrowType, buffers: buffers, children: childData, nullCount: self.nullCount, length: self.length) @@ -213,9 +260,15 @@ public class StructArrayBuilder: ArrowArrayBuilder< } } -public class ListArrayBuilder: ArrowArrayBuilder +// MARK: List array builder. + +/// A type which can build an `NestedArray`containing exactly `ItemType`. +public class ListArrayBuilder: ArrowArrayBuilderBase< + ListBufferBuilder, + NestedArray +> { - let valueBuilder: any ArrowArrayHolderBuilder + let valueBuilder: any AnyArrowArrayBuilder public override init(_ elementType: ArrowType) throws(ArrowError) { @@ -229,7 +282,8 @@ public class ListArrayBuilder: ArrowArrayBuilder try super.init(elementType) } - public override func append(_ values: [Any?]?) { + // Overrides the default + public func append(_ values: [Any?]?) { self.bufferBuilder.append(values) if let vals = values { for val in vals { @@ -238,11 +292,11 @@ public class ListArrayBuilder: ArrowArrayBuilder } } - public override func finish() throws(ArrowError) -> any ArrowArray<[Any?]> { + public override func finish() throws(ArrowError) -> ArrayType { let buffers = self.bufferBuilder.finish() - let childData = try valueBuilder.toHolder().arrowData + let childData = try valueBuilder.toAnyArrowArray().arrowData let arrowData = ArrowData( - self.type, + self.arrowType, buffers: buffers, children: [childData], nullCount: self.nullCount, @@ -252,10 +306,10 @@ public class ListArrayBuilder: ArrowArrayBuilder } } -public class ArrowArrayBuilders { - public static func loadBuilder( - _ builderType: Any.Type - ) throws(ArrowError) -> ArrowArrayHolderBuilder { +public enum ArrowArrayBuilders { + public static func builder( + for builderType: Any.Type + ) throws(ArrowError) -> AnyArrowArrayBuilder { if builderType == Int8.self || builderType == Int8?.self { return try ArrowArrayBuilders.loadNumberArrayBuilder() as NumberArrayBuilder @@ -313,11 +367,11 @@ public class ArrowArrayBuilders { || type == Float.self || type == Date.self } - public static func loadStructArrayBuilderForType( + public static func structArrayBuilderForType( _ obj: T ) throws -> StructArrayBuilder { let mirror = Mirror(reflecting: obj) - var builders: [ArrowArrayHolderBuilder] = [] + var builders: [AnyArrowArrayBuilder] = [] var fields: [ArrowField] = [] for (property, value) in mirror.children { guard let propertyName = property else { @@ -339,7 +393,7 @@ public class ArrowArrayBuilders { public static func loadBuilder( arrowType: ArrowType - ) throws(ArrowError) -> ArrowArrayHolderBuilder { + ) throws(ArrowError) -> AnyArrowArrayBuilder { switch arrowType { case .uint8: return try loadNumberArrayBuilder() as NumberArrayBuilder diff --git a/Sources/Arrow/AnyArrowArray.swift b/Sources/Arrow/ArrowArrayLoader.swift similarity index 97% rename from Sources/Arrow/AnyArrowArray.swift rename to Sources/Arrow/ArrowArrayLoader.swift index b27c33b..b3b799b 100644 --- a/Sources/Arrow/AnyArrowArray.swift +++ b/Sources/Arrow/ArrowArrayLoader.swift @@ -13,7 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// FIXME: Temporary to support holder refactor +// TODO: Duplicated struct ArrowArrayLoader { public static func loadArray( _ arrowType: ArrowType, diff --git a/Sources/Arrow/ArrowEncoder.swift b/Sources/Arrow/ArrowEncoder.swift index a935891..f15573d 100644 --- a/Sources/Arrow/ArrowEncoder.swift +++ b/Sources/Arrow/ArrowEncoder.swift @@ -15,7 +15,7 @@ import Foundation public class ArrowEncoder: Encoder { - public private(set) var builders: [String: ArrowArrayHolderBuilder] = [:] + public private(set) var builders: [String: AnyArrowArrayBuilder] = [:] private var byIndex: [String] = [] public var codingPath: [CodingKey] = [] public var userInfo: [CodingUserInfoKey: Any] = [:] @@ -29,9 +29,7 @@ public class ArrowEncoder: Encoder { public init() {} - public init( - _ builders: [String: ArrowArrayHolderBuilder], byIndex: [String] - ) { + public init(_ builders: [String: AnyArrowArrayBuilder], byIndex: [String]) { self.builders = builders self.byIndex = byIndex } @@ -59,14 +57,13 @@ public class ArrowEncoder: Encoder { // this will check if T is a simple built in type // (UInt, Int, Int8, String, Date, etc...). if ArrowArrayBuilders.isValidBuilderType(T.self) { - let builders = ["col0": try ArrowArrayBuilders.loadBuilder(T.self)] + let builders = ["col0": try ArrowArrayBuilders.builder(for: T.self)] return ArrowEncoder(builders, byIndex: ["col0"]) } else { let encoder = ArrowEncoder() if data is [AnyHashable: Any] { encoder.modForIndex = 2 } - return encoder } } @@ -78,7 +75,7 @@ public class ArrowEncoder: Encoder { guard let builder = builders[key] else { throw .invalid("Missing builder for \(key)") } - batchBuilder.addColumn(key, arrowArray: try builder.toHolder()) + batchBuilder.addColumn(key, arrowArray: try builder.toAnyArrowArray()) } return try batchBuilder.finish().get() } @@ -116,12 +113,12 @@ public class ArrowEncoder: Encoder { func ensureColumnExists( _ value: T, key: String - ) throws(ArrowError) -> ArrowArrayHolderBuilder { + ) throws(ArrowError) -> AnyArrowArrayBuilder { try throwIfInvalid() if let builder = builders[key] { return builder } - let builder = try ArrowArrayBuilders.loadBuilder(T.self) + let builder = try ArrowArrayBuilders.builder(for: T.self) builders[key] = builder byIndex.append(key) return builder diff --git a/Sources/Arrow/ArrowReader.swift b/Sources/Arrow/ArrowReader.swift index 8891ec1..f3985b1 100644 --- a/Sources/Arrow/ArrowReader.swift +++ b/Sources/Arrow/ArrowReader.swift @@ -544,4 +544,11 @@ public struct ArrowReader: Sendable { .unknownError("Unhandled header type: \(message.headerType)")) } } + + func validateFileData(_ data: Data) -> Bool { + let markerLength = fileMarker.count + let startString = data[.. Result { - if field.type == .date32 { - let arrowData = ArrowData( - field.type, - buffers: buffers, - nullCount: nullCount - ) - return .success(Date32Array(arrowData)) - } - let arrowData = ArrowData( - field.type, - buffers: buffers, - nullCount: nullCount - ) - return .success(Date64Array(arrowData)) -} - private func makeBoolHolder( _ buffers: [ArrowBuffer], nullCount: UInt @@ -193,8 +172,20 @@ func makeArrayHolder( return makeStringHolder(buffers, nullCount: nullCount) case .binary: return makeBinaryHolder(buffers, nullCount: nullCount) - case .date32, .date64: - return makeDateHolder(field, buffers: buffers, nullCount: nullCount) + case .date32: + let arrowData = ArrowData( + field.type, + buffers: buffers, + nullCount: nullCount + ) + return .success(Date32Array(arrowData)) + case .date64: + let arrowData = ArrowData( + field.type, + buffers: buffers, + nullCount: nullCount + ) + return .success(Date64Array(arrowData)) case .time32: let arrowData = ArrowData( field.type, buffers: buffers, nullCount: nullCount) @@ -226,160 +217,6 @@ func makeArrayHolder( } } -func makeBuffer( - _ buffer: Buffer, fileData: Data, - length: UInt, messageOffset: Int64 -) -> ArrowBuffer { - let startOffset = messageOffset + buffer.offset - let endOffset = startOffset + buffer.length - let bufferData = [UInt8](fileData[startOffset.. Bool { - switch type { - case .int, .bool, .floatingpoint, .date, .time, .timestamp: - return true - default: - return false - } -} - -func findArrowType(_ field: FlatField) throws(ArrowError) -> ArrowType { - let type = field.typeType - switch type { - case .int: - guard let intType = field.type(type: FlatInt.self) else { - throw .invalid("Could not get integer type from \(field)") - } - let bitWidth = intType.bitWidth - if bitWidth == 8 { - if intType.isSigned { - return .int8 - } else { - return .uint8 - } - } - if bitWidth == 16 { - return intType.isSigned ? .int16 : .uint16 - } - if bitWidth == 32 { - return intType.isSigned ? .int32 : .uint32 - } - if bitWidth == 64 { - return intType.isSigned ? .int64 : .uint64 - } - throw .invalid("Unhandled integer bit width: \(bitWidth)") - case .bool: - return .boolean - case .floatingpoint: - guard let floatType = field.type(type: FloatingPoint.self) else { - throw .invalid("Could not get floating point type from field") - } - switch floatType.precision { - case .half: - return .float16 - case .single: - return .float32 - case .double: - return .float64 - } - case .utf8: - return .utf8 - case .binary: - return .binary - case .date: - guard let dateType = field.type(type: FlatDate.self) else { - throw .invalid("Could not get date type from field") - } - if dateType.unit == .day { - return .date32 - } - return .date64 - case .time: - guard let timeType = field.type(type: FlatTime.self) else { - throw .invalid("Could not get time type from field") - } - if timeType.unit == .second || timeType.unit == .millisecond { - return .time32( - timeType.unit == .second ? .second : .millisecond - ) - } - return .time64( - timeType.unit == .microsecond ? .microsecond : .nanosecond - ) - case .timestamp: - guard let timestampType = field.type(type: FlatTimestamp.self) else { - throw .invalid("Could not get timestamp type from field") - } - let arrowUnit: TimeUnit - switch timestampType.unit { - case .second: - arrowUnit = .second - case .millisecond: - arrowUnit = .millisecond - case .microsecond: - arrowUnit = .microsecond - case .nanosecond: - arrowUnit = .nanosecond - } - let timezone = timestampType.timezone - return .timestamp(arrowUnit, timezone) - case .struct_: - guard field.type(type: FlatStruct.self) != nil else { - throw .invalid("Could not get struct type from field") - } - var fields: [ArrowField] = [] - for index in 0..= recordBatch.buffersCount { - throw ArrowError.outOfBounds(index: Int64(index)) - } -} - -func validateFileData(_ data: Data) -> Bool { - let markerLength = fileMarker.count - let startString = data[.. UInt32 { let token = data.withUnsafeBytes { rawBuffer in rawBuffer.loadUnaligned(fromByteOffset: offset, as: UInt32.self) diff --git a/Sources/Arrow/ArrowTable.swift b/Sources/Arrow/ArrowTable.swift index fdfcba4..d1daebd 100644 --- a/Sources/Arrow/ArrowTable.swift +++ b/Sources/Arrow/ArrowTable.swift @@ -210,6 +210,7 @@ public class RecordBatch { public var columnCount: UInt { UInt(self.columns.count) } public let columns: [AnyArrowArray] public let length: UInt + public init(_ schema: ArrowSchema, columns: [AnyArrowArray]) { self.schema = schema self.columns = columns @@ -222,6 +223,12 @@ public class RecordBatch { public init() {} + /// Add a column the `RecordBatch` builder. + /// - Parameters: + /// - fieldName: The field name. + /// - arrowArray: The array to add to the reocrd batch. + /// - Returns: The `RecordBatch.Builder` with the array appended and the field added to + /// the schema. If the array contains zero nulls, the field is defined as non-null. @discardableResult public func addColumn( _ fieldName: String, diff --git a/Sources/Arrow/ChunkedArray.swift b/Sources/Arrow/ChunkedArray.swift index bc8fde0..182caf0 100644 --- a/Sources/Arrow/ChunkedArray.swift +++ b/Sources/Arrow/ChunkedArray.swift @@ -14,10 +14,6 @@ import Foundation -public protocol AsString { - func asString(_ index: UInt) -> String -} - public class ChunkedArrayHolder { public let type: ArrowType public let length: UInt @@ -85,7 +81,7 @@ public class ChunkedArrayHolder { } } -public class ChunkedArray: AsString { +public class ChunkedArray { public let arrays: [any ArrowArray] public let type: ArrowType public let nullCount: UInt diff --git a/Sources/Arrow/FlatBuffersHelpers.swift b/Sources/Arrow/FlatBuffersHelpers.swift new file mode 100644 index 0000000..6fcd3a1 --- /dev/null +++ b/Sources/Arrow/FlatBuffersHelpers.swift @@ -0,0 +1,165 @@ +// Copyright 2025 The Apache Software Foundation +// Copyright 2025 The Columnar-Swift Contributors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +func makeBuffer( + _ buffer: Buffer, + fileData: Data, + length: UInt, + messageOffset: Int64 +) -> ArrowBuffer { + let startOffset = messageOffset + buffer.offset + let endOffset = startOffset + buffer.length + let bufferData = [UInt8](fileData[startOffset.. Bool { + switch type { + case .int, .bool, .floatingpoint, .date, .time, .timestamp: + return true + default: + return false + } +} + +func findArrowType(_ field: FlatField) throws(ArrowError) -> ArrowType { + let type = field.typeType + switch type { + case .int: + guard let intType = field.type(type: FlatInt.self) else { + throw .invalid("Could not get integer type from \(field)") + } + let bitWidth = intType.bitWidth + if bitWidth == 8 { + if intType.isSigned { + return .int8 + } else { + return .uint8 + } + } + if bitWidth == 16 { + return intType.isSigned ? .int16 : .uint16 + } + if bitWidth == 32 { + return intType.isSigned ? .int32 : .uint32 + } + if bitWidth == 64 { + return intType.isSigned ? .int64 : .uint64 + } + throw .invalid("Unhandled integer bit width: \(bitWidth)") + case .bool: + return .boolean + case .floatingpoint: + guard let floatType = field.type(type: FloatingPoint.self) else { + throw .invalid("Could not get floating point type from field") + } + switch floatType.precision { + case .half: + return .float16 + case .single: + return .float32 + case .double: + return .float64 + } + case .utf8: + return .utf8 + case .binary: + return .binary + case .date: + guard let dateType = field.type(type: FlatDate.self) else { + throw .invalid("Could not get date type from field") + } + if dateType.unit == .day { + return .date32 + } + return .date64 + case .time: + guard let timeType = field.type(type: FlatTime.self) else { + throw .invalid("Could not get time type from field") + } + if timeType.unit == .second || timeType.unit == .millisecond { + return .time32( + timeType.unit == .second ? .second : .millisecond + ) + } + return .time64( + timeType.unit == .microsecond ? .microsecond : .nanosecond + ) + case .timestamp: + guard let timestampType = field.type(type: FlatTimestamp.self) else { + throw .invalid("Could not get timestamp type from field") + } + let arrowUnit: TimeUnit + switch timestampType.unit { + case .second: + arrowUnit = .second + case .millisecond: + arrowUnit = .millisecond + case .microsecond: + arrowUnit = .microsecond + case .nanosecond: + arrowUnit = .nanosecond + } + let timezone = timestampType.timezone + return .timestamp(arrowUnit, timezone) + case .struct_: + guard field.type(type: FlatStruct.self) != nil else { + throw .invalid("Could not get struct type from field") + } + var fields: [ArrowField] = [] + for index in 0..= recordBatch.buffersCount { + throw ArrowError.outOfBounds(index: Int64(index)) + } +} diff --git a/Tests/ArrowTests/ArrayBuilderTests.swift b/Tests/ArrowTests/ArrayBuilderTests.swift index 4a648c4..c7555f6 100644 --- a/Tests/ArrowTests/ArrayBuilderTests.swift +++ b/Tests/ArrowTests/ArrayBuilderTests.swift @@ -53,36 +53,36 @@ struct ArrayBuilderTests { @Test func loadArrayBuilders() throws { #expect(throws: Never.self) { - let _ = try ArrowArrayBuilders.loadBuilder(Int8.self) - let _ = try ArrowArrayBuilders.loadBuilder(Int16.self) - let _ = try ArrowArrayBuilders.loadBuilder(Int32.self) - let _ = try ArrowArrayBuilders.loadBuilder(Int64.self) - let _ = try ArrowArrayBuilders.loadBuilder(UInt8.self) - let _ = try ArrowArrayBuilders.loadBuilder(UInt16.self) - let _ = try ArrowArrayBuilders.loadBuilder(UInt32.self) - let _ = try ArrowArrayBuilders.loadBuilder(UInt64.self) - let _ = try ArrowArrayBuilders.loadBuilder(Float.self) - let _ = try ArrowArrayBuilders.loadBuilder(Double.self) - let _ = try ArrowArrayBuilders.loadBuilder(Date.self) - let _ = try ArrowArrayBuilders.loadBuilder(Bool.self) - let _ = try ArrowArrayBuilders.loadBuilder(Int8?.self) - let _ = try ArrowArrayBuilders.loadBuilder(Int16?.self) - let _ = try ArrowArrayBuilders.loadBuilder(Int32?.self) - let _ = try ArrowArrayBuilders.loadBuilder(Int64?.self) - let _ = try ArrowArrayBuilders.loadBuilder(UInt8?.self) - let _ = try ArrowArrayBuilders.loadBuilder(UInt16?.self) - let _ = try ArrowArrayBuilders.loadBuilder(UInt32?.self) - let _ = try ArrowArrayBuilders.loadBuilder(UInt64?.self) - let _ = try ArrowArrayBuilders.loadBuilder(Float?.self) - let _ = try ArrowArrayBuilders.loadBuilder(Double?.self) - let _ = try ArrowArrayBuilders.loadBuilder(Date?.self) - let _ = try ArrowArrayBuilders.loadBuilder(Bool?.self) + let _ = try ArrowArrayBuilders.builder(for: Int8.self) + let _ = try ArrowArrayBuilders.builder(for: Int16.self) + let _ = try ArrowArrayBuilders.builder(for: Int32.self) + let _ = try ArrowArrayBuilders.builder(for: Int64.self) + let _ = try ArrowArrayBuilders.builder(for: UInt8.self) + let _ = try ArrowArrayBuilders.builder(for: UInt16.self) + let _ = try ArrowArrayBuilders.builder(for: UInt32.self) + let _ = try ArrowArrayBuilders.builder(for: UInt64.self) + let _ = try ArrowArrayBuilders.builder(for: Float.self) + let _ = try ArrowArrayBuilders.builder(for: Double.self) + let _ = try ArrowArrayBuilders.builder(for: Date.self) + let _ = try ArrowArrayBuilders.builder(for: Bool.self) + let _ = try ArrowArrayBuilders.builder(for: Int8?.self) + let _ = try ArrowArrayBuilders.builder(for: Int16?.self) + let _ = try ArrowArrayBuilders.builder(for: Int32?.self) + let _ = try ArrowArrayBuilders.builder(for: Int64?.self) + let _ = try ArrowArrayBuilders.builder(for: UInt8?.self) + let _ = try ArrowArrayBuilders.builder(for: UInt16?.self) + let _ = try ArrowArrayBuilders.builder(for: UInt32?.self) + let _ = try ArrowArrayBuilders.builder(for: UInt64?.self) + let _ = try ArrowArrayBuilders.builder(for: Float?.self) + let _ = try ArrowArrayBuilders.builder(for: Double?.self) + let _ = try ArrowArrayBuilders.builder(for: Date?.self) + let _ = try ArrowArrayBuilders.builder(for: Bool?.self) } #expect(throws: ArrowError.self) { - let _ = try ArrowArrayBuilders.loadBuilder(Int.self) - let _ = try ArrowArrayBuilders.loadBuilder(UInt.self) - let _ = try ArrowArrayBuilders.loadBuilder(Int?.self) - let _ = try ArrowArrayBuilders.loadBuilder(UInt?.self) + let _ = try ArrowArrayBuilders.builder(for: Int.self) + let _ = try ArrowArrayBuilders.builder(for: UInt.self) + let _ = try ArrowArrayBuilders.builder(for: Int?.self) + let _ = try ArrowArrayBuilders.builder(for: UInt?.self) } } } diff --git a/Tests/ArrowTests/ArrayTests.swift b/Tests/ArrowTests/ArrayTests.swift index 553bec1..4bf0755 100644 --- a/Tests/ArrowTests/ArrayTests.swift +++ b/Tests/ArrowTests/ArrayTests.swift @@ -361,7 +361,7 @@ struct ArrayTests { let testData = StructTest() let dateNow = Date.now - let structBuilder = try ArrowArrayBuilders.loadStructArrayBuilderForType( + let structBuilder = try ArrowArrayBuilders.structArrayBuilderForType( testData) structBuilder.append([ true, Int8(1), Int16(2), Int32(3), Int64(4), @@ -452,18 +452,18 @@ struct ArrayTests { } @Test func arrowArrayHolderBuilder() throws { - let uint8HBuilder: ArrowArrayHolderBuilder = + let uint8HBuilder: AnyArrowArrayBuilder = (try ArrowArrayBuilders.loadNumberArrayBuilder() as NumberArrayBuilder) for index in 0..<100 { uint8HBuilder.appendAny(UInt8(index)) } - let uint8Holder = try uint8HBuilder.toHolder() + let uint8Holder = try uint8HBuilder.toAnyArrowArray() #expect(uint8Holder.nullCount == 0) #expect(uint8Holder.length == 100) - let stringHBuilder: ArrowArrayHolderBuilder = + let stringHBuilder: AnyArrowArrayBuilder = (try ArrowArrayBuilders.loadStringArrayBuilder()) for index in 0..<100 { if index % 10 == 9 { @@ -473,7 +473,7 @@ struct ArrayTests { } } - let stringHolder = try stringHBuilder.toHolder() + let stringHolder = try stringHBuilder.toAnyArrowArray() #expect(stringHolder.nullCount == 10) #expect(stringHolder.length == 100) } diff --git a/Tests/ArrowTests/CodableTests.swift b/Tests/ArrowTests/CodableTests.swift index 67ae61d..8ae2f0f 100644 --- a/Tests/ArrowTests/CodableTests.swift +++ b/Tests/ArrowTests/CodableTests.swift @@ -91,19 +91,19 @@ struct CodableTests { stringBuilder.append("test0", "test1", "test2") dateBuilder.append(date1, date1, date1) let result = RecordBatch.Builder() - .addColumn("propBool", arrowArray: try boolBuilder.toHolder()) - .addColumn("propInt8", arrowArray: try int8Builder.toHolder()) - .addColumn("propInt16", arrowArray: try int16Builder.toHolder()) - .addColumn("propInt32", arrowArray: try int32Builder.toHolder()) - .addColumn("propInt64", arrowArray: try int64Builder.toHolder()) - .addColumn("propUInt8", arrowArray: try uint8Builder.toHolder()) - .addColumn("propUInt16", arrowArray: try uint16Builder.toHolder()) - .addColumn("propUInt32", arrowArray: try uint32Builder.toHolder()) - .addColumn("propUInt64", arrowArray: try uint64Builder.toHolder()) - .addColumn("propFloat", arrowArray: try floatBuilder.toHolder()) - .addColumn("propDouble", arrowArray: try doubleBuilder.toHolder()) - .addColumn("propString", arrowArray: try stringBuilder.toHolder()) - .addColumn("propDate", arrowArray: try dateBuilder.toHolder()) + .addColumn("propBool", arrowArray: try boolBuilder.finish()) + .addColumn("propInt8", arrowArray: try int8Builder.finish()) + .addColumn("propInt16", arrowArray: try int16Builder.finish()) + .addColumn("propInt32", arrowArray: try int32Builder.finish()) + .addColumn("propInt64", arrowArray: try int64Builder.finish()) + .addColumn("propUInt8", arrowArray: try uint8Builder.finish()) + .addColumn("propUInt16", arrowArray: try uint16Builder.finish()) + .addColumn("propUInt32", arrowArray: try uint32Builder.finish()) + .addColumn("propUInt64", arrowArray: try uint64Builder.finish()) + .addColumn("propFloat", arrowArray: try floatBuilder.finish()) + .addColumn("propDouble", arrowArray: try doubleBuilder.finish()) + .addColumn("propString", arrowArray: try stringBuilder.finish()) + .addColumn("propDate", arrowArray: try dateBuilder.finish()) .finish() switch result { case .success(let rb): @@ -139,7 +139,7 @@ struct CodableTests { try ArrowArrayBuilders.loadNumberArrayBuilder() int8Builder.append(10, 11, 12) let result = RecordBatch.Builder() - .addColumn("propInt8", arrowArray: try int8Builder.toHolder()) + .addColumn("propInt8", arrowArray: try int8Builder.finish()) .finish() switch result { case .success(let rb): @@ -159,7 +159,10 @@ struct CodableTests { try ArrowArrayBuilders.loadNumberArrayBuilder() int8WNilBuilder.append(10, nil, 12, nil) let resultWNil = RecordBatch.Builder() - .addColumn("propInt8", arrowArray: try int8WNilBuilder.toHolder()) + .addColumn( + "propInt8", + arrowArray: try int8WNilBuilder.finish() + ) .finish() switch resultWNil { case .success(let rb): @@ -185,8 +188,8 @@ struct CodableTests { int8Builder.append(10, 11, 12, 13) stringBuilder.append("test10", "test11", "test12", "test13") switch RecordBatch.Builder() - .addColumn("propInt8", arrowArray: try int8Builder.toHolder()) - .addColumn("propString", arrowArray: try stringBuilder.toHolder()) + .addColumn("propInt8", arrowArray: try int8Builder.finish()) + .addColumn("propString", arrowArray: try stringBuilder.finish()) .finish() { case .success(let rb): @@ -200,8 +203,8 @@ struct CodableTests { } switch RecordBatch.Builder() - .addColumn("propString", arrowArray: try stringBuilder.toHolder()) - .addColumn("propInt8", arrowArray: try int8Builder.toHolder()) + .addColumn("propString", arrowArray: try stringBuilder.finish()) + .addColumn("propInt8", arrowArray: try int8Builder.finish()) .finish() { case .success(let rb): @@ -222,8 +225,8 @@ struct CodableTests { int8Builder.append(10, 11, 12, 13) stringWNilBuilder.append(nil, "test11", nil, "test13") let resultWNil = RecordBatch.Builder() - .addColumn("propInt8", arrowArray: try int8Builder.toHolder()) - .addColumn("propString", arrowArray: try stringWNilBuilder.toHolder()) + .addColumn("propInt8", arrowArray: try int8Builder.finish()) + .addColumn("propString", arrowArray: try stringWNilBuilder.finish()) .finish() switch resultWNil { case .success(let rb): diff --git a/Tests/ArrowTests/IPCTests.swift b/Tests/ArrowTests/IPCTests.swift index 6614627..8b0b9ea 100644 --- a/Tests/ArrowTests/IPCTests.swift +++ b/Tests/ArrowTests/IPCTests.swift @@ -71,10 +71,10 @@ func checkBoolRecordBatch( #expect(recordBatch.schema.fields[1].type == .utf8) for index in 0.. ArrowSchema { func makeStructRecordBatch() throws -> RecordBatch { let testData = StructTest() let dateNow = Date.now - let structBuilder = try ArrowArrayBuilders.loadStructArrayBuilderForType( + let structBuilder = try ArrowArrayBuilders.structArrayBuilderForType( testData ) structBuilder.append([ @@ -277,15 +277,15 @@ struct IPCStreamReaderTests { #expect(recordBatch.schema.fields[4].type == .float32) let columns = recordBatch.columns #expect(columns[0].nullCount == 2) - let dateVal = "\((columns[2] as! AsString).asString(0))" + let dateVal = "\((columns[2]).asString(0))" #expect(dateVal == "2014-09-10 00:00:00 +0000") - let stringVal = "\((columns[1] as! AsString).asString(1))" + let stringVal = "\((columns[1]).asString(1))" #expect(stringVal == "test22") - let uintVal = "\((columns[0] as! AsString).asString(0))" + let uintVal = "\((columns[0]).asString(0))" #expect(uintVal == "10") - let stringVal2 = "\((columns[1] as! AsString).asString(3))" + let stringVal2 = "\((columns[1]).asString(3))" #expect(stringVal2 == "test44") - let uintVal2 = "\((columns[0] as! AsString).asString(3))" + let uintVal2 = "\((columns[0]).asString(3))" #expect(uintVal2 == "44") } case .failure(let error): @@ -323,8 +323,7 @@ struct IPCFileReaderTests { recordBatch.schema.fields[1].type == .utf8) for index in 0..