Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
167 changes: 71 additions & 96 deletions Sources/Arrow/ArrowArray.swift
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

import ArrowC
import Foundation

/// A type-erased ArrowArray.
Expand All @@ -25,9 +26,10 @@ public protocol AnyArrowArray {
var bufferDataSizes: [Int] { get }
func asAny(_ index: UInt) -> Any?
func asString(_ index: UInt) -> String
func setCArrayPtr(_ cArrayPtr: UnsafePointer<ArrowC.ArrowArray>?)
}

// MARK: - Core Protocol
// MARK: Core Protocol

/// The interface for Arrow array types.
public protocol ArrowArray<ItemType>: AnyArrowArray {
Expand All @@ -37,8 +39,42 @@ public protocol ArrowArray<ItemType>: AnyArrowArray {
subscript(_ index: UInt) -> ItemType? { get }
}

// MARK: - Default Implementations
extension ArrowArray {
public class ArrowArrayBase<T>: ArrowArray {

public var arrowData: ArrowData
public var cArrayPtr: UnsafePointer<ArrowC.ArrowArray>? = nil

required public init(_ arrowData: ArrowData) throws(ArrowError) {
self.arrowData = arrowData
}

public subscript(_ index: UInt) -> T? {
fatalError("Base class is abstract.")
}

public func asString(_ index: UInt) -> String {
guard let value = self[index] else {
return ""
}
return "\(value)"
}

public func asAny(_ index: UInt) -> Any? {
self[index]
}

public func setCArrayPtr(_ cArrayPtr: UnsafePointer<ArrowC.ArrowArray>?) {
self.cArrayPtr = cArrayPtr
}

deinit {
if let cArrayPtr = cArrayPtr {
ArrowCImporter.release(cArrayPtr)
}
}
}

extension ArrowArrayBase {
public var nullCount: UInt {
arrowData.nullCount
}
Expand All @@ -63,32 +99,19 @@ extension ArrowArray {
arrowData.buffers.map { Int($0.capacity) }
}

public func isNull(at index: UInt) throws -> Bool {
public func isNull(at index: UInt) throws(ArrowError) -> Bool {
if index >= self.length {
throw ArrowError.outOfBounds(index: Int64(index))
throw .outOfBounds(index: Int64(index))
}
return arrowData.isNull(index)
}

public func asString(_ index: UInt) -> String {
guard let value = self[index] else {
return ""
}
return "\(value)"
}

public func asAny(_ index: UInt) -> Any? {
self[index]
}
}

// MARK: Fixed Arrays

public protocol FixedArrayProtocol: ArrowArray where ItemType: BitwiseCopyable {
}
public class FixedArray<T>: ArrowArrayBase<T> where T: BitwiseCopyable {

extension FixedArrayProtocol {
public subscript(_ index: UInt) -> ItemType? {
public override subscript(_ index: UInt) -> ItemType? {
if arrowData.isNull(index) {
return nil
}
Expand All @@ -106,28 +129,9 @@ extension FixedArrayProtocol {
}
}

public struct FixedArray<T>: FixedArrayProtocol where T: BitwiseCopyable {
public typealias ItemType = T
public let arrowData: ArrowData
public class StringArray: ArrowArrayBase<String> {

public init(arrowData: ArrowData) {
self.arrowData = arrowData
}

public init(_ arrowData: ArrowData) {
self.arrowData = arrowData
}
}

public struct StringArray: ArrowArray {
public typealias ItemType = String
public let arrowData: ArrowData

public init(_ arrowData: ArrowData) {
self.arrowData = arrowData
}

public subscript(_ index: UInt) -> String? {
public override subscript(_ index: UInt) -> String? {
let offsetIndex = MemoryLayout<Int32>.stride * Int(index)
if self.arrowData.isNull(index) {
return nil
Expand All @@ -138,8 +142,8 @@ public struct StringArray: ArrowArray {

var startIndex: Int32 = 0
if index > 0 {
startIndex = offsets.rawPointer.advanced(by: offsetIndex).load(
as: Int32.self)
startIndex = offsets.rawPointer.advanced(by: offsetIndex)
.load(as: Int32.self)
}

let endIndex = offsets.rawPointer.advanced(
Expand All @@ -150,21 +154,16 @@ public struct StringArray: ArrowArray {
let rawPointer = values.rawPointer.advanced(by: Int(startIndex))
.bindMemory(to: UInt8.self, capacity: arrayLength)
let buffer = UnsafeBufferPointer<UInt8>(
start: rawPointer, count: arrayLength)
let byteArray = Array(buffer)
return String(data: Data(byteArray), encoding: .utf8)
start: rawPointer,
count: arrayLength
)
return String(bytes: buffer, encoding: .utf8)
}
}

public struct BoolArray: ArrowArray {
public typealias ItemType = Bool
public let arrowData: ArrowData
public class BoolArray: ArrowArrayBase<Bool> {

public init(_ arrowData: ArrowData) {
self.arrowData = arrowData
}

public subscript(_ index: UInt) -> Bool? {
public override subscript(_ index: UInt) -> Bool? {
if self.arrowData.isNull(index) {
return nil
}
Expand All @@ -173,15 +172,9 @@ public struct BoolArray: ArrowArray {
}
}

public struct Date32Array: ArrowArray {
public typealias ItemType = Date
public let arrowData: ArrowData
public class Date32Array: ArrowArrayBase<Date> {

public init(_ arrowData: ArrowData) {
self.arrowData = arrowData
}

public subscript(_ index: UInt) -> Date? {
public override subscript(_ index: UInt) -> Date? {
if self.arrowData.isNull(index) {
return nil
}
Expand All @@ -193,15 +186,9 @@ public struct Date32Array: ArrowArray {
}
}

public struct Date64Array: ArrowArray {
public typealias ItemType = Date
public let arrowData: ArrowData
public class Date64Array: ArrowArrayBase<Date> {

public init(_ arrowData: ArrowData) {
self.arrowData = arrowData
}

public subscript(_ index: UInt) -> Date? {
public override subscript(_ index: UInt) -> Date? {
if self.arrowData.isNull(index) {
return nil
}
Expand All @@ -217,13 +204,7 @@ public typealias Time64Array = FixedArray<Time64>

public typealias Time32Array = FixedArray<Time32>

public struct TimestampArray: FixedArrayProtocol {
public typealias ItemType = Timestamp
public let arrowData: ArrowData

public init(_ arrowData: ArrowData) {
self.arrowData = arrowData
}
public class TimestampArray: FixedArray<Timestamp> {

public struct FormattingOptions: Equatable {
public var dateFormat: String = "yyyy-MM-dd HH:mm:ss.SSS"
Expand Down Expand Up @@ -256,7 +237,7 @@ public struct TimestampArray: FixedArrayProtocol {
private var cachedFormatter: DateFormatter?
private var cachedOptions: FormattingOptions?

public mutating func formattedDate(
public func formattedDate(
at index: UInt,
options: FormattingOptions = FormattingOptions()
) -> String? {
Expand Down Expand Up @@ -300,8 +281,7 @@ public struct TimestampArray: FixedArrayProtocol {
return Date(timeIntervalSince1970: timeInterval)
}

// TODO: Mutating function to hack around cached formatter
public mutating func asString(_ index: UInt) -> String {
public override func asString(_ index: UInt) -> String {
if let formatted = formattedDate(at: index) {
return formatted
} else {
Expand All @@ -310,13 +290,7 @@ public struct TimestampArray: FixedArrayProtocol {
}
}

public struct BinaryArray: ArrowArray {
public typealias ItemType = Data
public let arrowData: ArrowData

public init(_ arrowData: ArrowData) {
self.arrowData = arrowData
}
public class BinaryArray: ArrowArrayBase<Data> {

public struct Options {
public var printAsHex = false
Expand All @@ -325,7 +299,7 @@ public struct BinaryArray: ArrowArray {

public var options = Options()

public subscript(_ index: UInt) -> Data? {
public override subscript(_ index: UInt) -> Data? {
let offsetIndex = MemoryLayout<Int32>.stride * Int(index)
if self.arrowData.isNull(index) {
return nil
Expand All @@ -350,7 +324,7 @@ public struct BinaryArray: ArrowArray {
return Data(byteArray)
}

public func asString(_ index: UInt) -> String {
public override func asString(_ index: UInt) -> String {
guard let data = self[index] else { return "" }
if options.printAsHex {
return data.hexEncodedString()
Expand All @@ -364,13 +338,14 @@ public struct BinaryArray: ArrowArray {
}
}

public struct NestedArray: ArrowArray, AnyArrowArray {
public typealias ItemType = [Any?]
public let arrowData: ArrowData
public class NestedArray: ArrowArrayBase<[Any?]> {

private var children: [AnyArrowArray]?

public init(_ arrowData: ArrowData) throws(ArrowError) {
self.arrowData = arrowData
public required init(
_ arrowData: ArrowData
) throws(ArrowError) {
try super.init(arrowData)

switch arrowData.type {
case .list(let field):
Expand Down Expand Up @@ -398,7 +373,7 @@ public struct NestedArray: ArrowArray, AnyArrowArray {
}
}

public subscript(_ index: UInt) -> [Any?]? {
public override subscript(_ index: UInt) -> [Any?]? {
if self.arrowData.isNull(index) {
return nil
}
Expand Down Expand Up @@ -432,7 +407,7 @@ public struct NestedArray: ArrowArray, AnyArrowArray {
}
}

public func asString(_ index: UInt) -> String {
public override func asString(_ index: UInt) -> String {
switch arrowData.type {
case .list(let _):
if self.arrowData.isNull(index) {
Expand Down
5 changes: 3 additions & 2 deletions Sources/Arrow/ArrowArrayBuilder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,10 @@ extension ArrowArrayBuilderInternal {

// MARK: Base implementation.

// Note: It would be preferable to move all of this to a protocol, however
// Note: It would be preferable to move this to a protocol, however
// ListArrayBuilder overrides finish. This is delicate because protocol
// extension method dispatching means the
// extension method dispatching uses static dispatch, so overrides are not
// called when type erasure is used in nested types.
public class ArrowArrayBuilderBase<
BufferBuilder: ArrowBufferBuilder,
ArrayType: ArrowArray<BufferBuilder.ItemType>
Expand Down
36 changes: 18 additions & 18 deletions Sources/Arrow/ArrowArrayLoader.swift
Original file line number Diff line number Diff line change
Expand Up @@ -21,41 +21,41 @@ struct ArrowArrayLoader {
) throws(ArrowError) -> any AnyArrowArray {
switch arrowType {
case .int8:
return FixedArray<Int8>(arrowData)
return try FixedArray<Int8>(arrowData)
case .int16:
return FixedArray<Int16>(arrowData)
return try FixedArray<Int16>(arrowData)
case .int32:
return FixedArray<Int32>(arrowData)
return try FixedArray<Int32>(arrowData)
case .int64:
return FixedArray<Int64>(arrowData)
return try FixedArray<Int64>(arrowData)
case .uint8:
return FixedArray<UInt8>(arrowData)
return try FixedArray<UInt8>(arrowData)
case .uint16:
return FixedArray<UInt16>(arrowData)
return try FixedArray<UInt16>(arrowData)
case .uint32:
return FixedArray<UInt32>(arrowData)
return try FixedArray<UInt32>(arrowData)
case .uint64:
return FixedArray<UInt64>(arrowData)
return try FixedArray<UInt64>(arrowData)
case .float64:
return FixedArray<Double>(arrowData)
return try FixedArray<Double>(arrowData)
case .float32:
return FixedArray<Float>(arrowData)
return try FixedArray<Float>(arrowData)
case .date32:
return Date32Array(arrowData)
return try Date32Array(arrowData)
case .date64:
return Date64Array(arrowData)
return try Date64Array(arrowData)
case .time32:
return Time32Array(arrowData)
return try Time32Array(arrowData)
case .time64:
return Time64Array(arrowData)
return try Time64Array(arrowData)
case .timestamp:
return TimestampArray(arrowData)
return try TimestampArray(arrowData)
case .utf8:
return StringArray(arrowData)
return try StringArray(arrowData)
case .boolean:
return BoolArray(arrowData)
return try BoolArray(arrowData)
case .binary:
return BinaryArray(arrowData)
return try BinaryArray(arrowData)
case .strct(let _):
return try NestedArray(arrowData)
case .list(let _):
Expand Down
Loading