Skip to content

Commit 1e67aa5

Browse files
committed
[SPARK-51465] Use Apache Arrow Swift 19.0.1
1 parent 0aaeba2 commit 1e67aa5

23 files changed

+8388
-0
lines changed
Lines changed: 331 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,331 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
import Foundation
19+
20+
public protocol ArrowArrayHolder {
21+
var type: ArrowType { get }
22+
var length: UInt { get }
23+
var nullCount: UInt { get }
24+
var array: AnyArray { get }
25+
var data: ArrowData { get }
26+
var getBufferData: () -> [Data] { get }
27+
var getBufferDataSizes: () -> [Int] { get }
28+
var getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> ArrowColumn { get }
29+
}
30+
31+
public class ArrowArrayHolderImpl: ArrowArrayHolder {
32+
public let data: ArrowData
33+
public let type: ArrowType
34+
public let length: UInt
35+
public let nullCount: UInt
36+
public let array: AnyArray
37+
public let getBufferData: () -> [Data]
38+
public let getBufferDataSizes: () -> [Int]
39+
public let getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> ArrowColumn
40+
public init<T>(_ arrowArray: ArrowArray<T>) {
41+
self.array = arrowArray
42+
self.data = arrowArray.arrowData
43+
self.length = arrowArray.length
44+
self.type = arrowArray.arrowData.type
45+
self.nullCount = arrowArray.nullCount
46+
self.getBufferData = { () -> [Data] in
47+
var bufferData = [Data]()
48+
for buffer in arrowArray.arrowData.buffers {
49+
bufferData.append(Data())
50+
buffer.append(to: &bufferData[bufferData.count - 1])
51+
}
52+
53+
return bufferData
54+
}
55+
56+
self.getBufferDataSizes = { () -> [Int] in
57+
var bufferDataSizes = [Int]()
58+
for buffer in arrowArray.arrowData.buffers {
59+
bufferDataSizes.append(Int(buffer.capacity))
60+
}
61+
62+
return bufferDataSizes
63+
}
64+
65+
self.getArrowColumn = {
66+
(field: ArrowField, arrayHolders: [ArrowArrayHolder]) throws -> ArrowColumn in
67+
var arrays = [ArrowArray<T>]()
68+
for arrayHolder in arrayHolders {
69+
if let array = arrayHolder.array as? ArrowArray<T> {
70+
arrays.append(array)
71+
}
72+
}
73+
74+
return ArrowColumn(field, chunked: ChunkedArrayHolder(try ChunkedArray<T>(arrays)))
75+
}
76+
}
77+
78+
public static func loadArray( // swiftlint:disable:this cyclomatic_complexity
79+
_ arrowType: ArrowType, with: ArrowData
80+
) throws -> ArrowArrayHolder {
81+
switch arrowType.id {
82+
case .int8:
83+
return try ArrowArrayHolderImpl(FixedArray<Int8>(with))
84+
case .int16:
85+
return try ArrowArrayHolderImpl(FixedArray<Int16>(with))
86+
case .int32:
87+
return try ArrowArrayHolderImpl(FixedArray<Int32>(with))
88+
case .int64:
89+
return try ArrowArrayHolderImpl(FixedArray<Int64>(with))
90+
case .uint8:
91+
return try ArrowArrayHolderImpl(FixedArray<UInt8>(with))
92+
case .uint16:
93+
return try ArrowArrayHolderImpl(FixedArray<UInt16>(with))
94+
case .uint32:
95+
return try ArrowArrayHolderImpl(FixedArray<UInt32>(with))
96+
case .uint64:
97+
return try ArrowArrayHolderImpl(FixedArray<UInt64>(with))
98+
case .double:
99+
return try ArrowArrayHolderImpl(FixedArray<Double>(with))
100+
case .float:
101+
return try ArrowArrayHolderImpl(FixedArray<Float>(with))
102+
case .date32:
103+
return try ArrowArrayHolderImpl(Date32Array(with))
104+
case .date64:
105+
return try ArrowArrayHolderImpl(Date64Array(with))
106+
case .time32:
107+
return try ArrowArrayHolderImpl(Time32Array(with))
108+
case .time64:
109+
return try ArrowArrayHolderImpl(Time64Array(with))
110+
case .string:
111+
return try ArrowArrayHolderImpl(StringArray(with))
112+
case .boolean:
113+
return try ArrowArrayHolderImpl(BoolArray(with))
114+
case .binary:
115+
return try ArrowArrayHolderImpl(BinaryArray(with))
116+
case .strct:
117+
return try ArrowArrayHolderImpl(StructArray(with))
118+
default:
119+
throw ArrowError.invalid("Array not found for type: \(arrowType)")
120+
}
121+
}
122+
}
123+
124+
public class ArrowArray<T>: AsString, AnyArray {
125+
public typealias ItemType = T
126+
public let arrowData: ArrowData
127+
public var nullCount: UInt { return self.arrowData.nullCount }
128+
public var length: UInt { return self.arrowData.length }
129+
130+
public required init(_ arrowData: ArrowData) throws {
131+
self.arrowData = arrowData
132+
}
133+
134+
public func isNull(_ at: UInt) throws -> Bool {
135+
if at >= self.length {
136+
throw ArrowError.outOfBounds(index: Int64(at))
137+
}
138+
139+
return self.arrowData.isNull(at)
140+
}
141+
142+
public subscript(_ index: UInt) -> T? {
143+
fatalError("subscript() has not been implemented")
144+
}
145+
146+
public func asString(_ index: UInt) -> String {
147+
if self[index] == nil {
148+
return ""
149+
}
150+
151+
return "\(self[index]!)"
152+
}
153+
154+
public func asAny(_ index: UInt) -> Any? {
155+
if self[index] == nil {
156+
return nil
157+
}
158+
159+
return self[index]!
160+
}
161+
}
162+
163+
public class FixedArray<T>: ArrowArray<T> {
164+
public override subscript(_ index: UInt) -> T? {
165+
if self.arrowData.isNull(index) {
166+
return nil
167+
}
168+
169+
let byteOffset = self.arrowData.stride * Int(index)
170+
return self.arrowData.buffers[1].rawPointer.advanced(by: byteOffset).load(as: T.self)
171+
}
172+
}
173+
174+
public class StringArray: ArrowArray<String> {
175+
public override subscript(_ index: UInt) -> String? {
176+
let offsetIndex = MemoryLayout<Int32>.stride * Int(index)
177+
if self.arrowData.isNull(index) {
178+
return nil
179+
}
180+
181+
let offsets = self.arrowData.buffers[1]
182+
let values = self.arrowData.buffers[2]
183+
184+
var startIndex: Int32 = 0
185+
if index > 0 {
186+
startIndex = offsets.rawPointer.advanced(by: offsetIndex).load(as: Int32.self)
187+
}
188+
189+
let endIndex = offsets.rawPointer.advanced(by: offsetIndex + MemoryLayout<Int32>.stride)
190+
.load(as: Int32.self)
191+
let arrayLength = Int(endIndex - startIndex)
192+
let rawPointer = values.rawPointer.advanced(by: Int(startIndex))
193+
.bindMemory(to: UInt8.self, capacity: arrayLength)
194+
let buffer = UnsafeBufferPointer<UInt8>(start: rawPointer, count: arrayLength)
195+
let byteArray = Array(buffer)
196+
return String(data: Data(byteArray), encoding: .utf8)
197+
}
198+
}
199+
200+
public class BoolArray: ArrowArray<Bool> {
201+
public override subscript(_ index: UInt) -> Bool? {
202+
if self.arrowData.isNull(index) {
203+
return nil
204+
}
205+
206+
let valueBuffer = self.arrowData.buffers[1]
207+
return BitUtility.isSet(index, buffer: valueBuffer)
208+
}
209+
}
210+
211+
public class Date32Array: ArrowArray<Date> {
212+
public override subscript(_ index: UInt) -> Date? {
213+
if self.arrowData.isNull(index) {
214+
return nil
215+
}
216+
217+
let byteOffset = self.arrowData.stride * Int(index)
218+
let milliseconds = self.arrowData.buffers[1].rawPointer.advanced(by: byteOffset).load(
219+
as: UInt32.self)
220+
return Date(timeIntervalSince1970: TimeInterval(milliseconds * 86400))
221+
}
222+
}
223+
224+
public class Date64Array: ArrowArray<Date> {
225+
public override subscript(_ index: UInt) -> Date? {
226+
if self.arrowData.isNull(index) {
227+
return nil
228+
}
229+
230+
let byteOffset = self.arrowData.stride * Int(index)
231+
let milliseconds = self.arrowData.buffers[1].rawPointer.advanced(by: byteOffset).load(
232+
as: UInt64.self)
233+
return Date(timeIntervalSince1970: TimeInterval(milliseconds / 1000))
234+
}
235+
}
236+
237+
public class Time32Array: FixedArray<Time32> {}
238+
public class Time64Array: FixedArray<Time64> {}
239+
240+
public class BinaryArray: ArrowArray<Data> {
241+
public struct Options {
242+
public var printAsHex = false
243+
public var printEncoding: String.Encoding = .utf8
244+
}
245+
246+
public var options = Options()
247+
248+
public override subscript(_ index: UInt) -> Data? {
249+
let offsetIndex = MemoryLayout<Int32>.stride * Int(index)
250+
if self.arrowData.isNull(index) {
251+
return nil
252+
}
253+
254+
let offsets = self.arrowData.buffers[1]
255+
let values = self.arrowData.buffers[2]
256+
var startIndex: Int32 = 0
257+
if index > 0 {
258+
startIndex = offsets.rawPointer.advanced(by: offsetIndex).load(as: Int32.self)
259+
}
260+
261+
let endIndex = offsets.rawPointer.advanced(by: offsetIndex + MemoryLayout<Int32>.stride)
262+
.load(as: Int32.self)
263+
let arrayLength = Int(endIndex - startIndex)
264+
let rawPointer = values.rawPointer.advanced(by: Int(startIndex))
265+
.bindMemory(to: UInt8.self, capacity: arrayLength)
266+
let buffer = UnsafeBufferPointer<UInt8>(start: rawPointer, count: arrayLength)
267+
let byteArray = Array(buffer)
268+
return Data(byteArray)
269+
}
270+
271+
public override func asString(_ index: UInt) -> String {
272+
if self[index] == nil { return "" }
273+
let data = self[index]!
274+
if options.printAsHex {
275+
return data.hexEncodedString()
276+
} else {
277+
return String(data: data, encoding: .utf8)!
278+
}
279+
}
280+
}
281+
282+
public class StructArray: ArrowArray<[Any?]> {
283+
public private(set) var arrowFields: [ArrowArrayHolder]?
284+
public required init(_ arrowData: ArrowData) throws {
285+
try super.init(arrowData)
286+
var fields = [ArrowArrayHolder]()
287+
for child in arrowData.children {
288+
fields.append(try ArrowArrayHolderImpl.loadArray(child.type, with: child))
289+
}
290+
291+
self.arrowFields = fields
292+
}
293+
294+
public override subscript(_ index: UInt) -> [Any?]? {
295+
if self.arrowData.isNull(index) {
296+
return nil
297+
}
298+
299+
if let fields = arrowFields {
300+
var result = [Any?]()
301+
for field in fields {
302+
result.append(field.array.asAny(index))
303+
}
304+
305+
return result
306+
}
307+
308+
return nil
309+
}
310+
311+
public override func asString(_ index: UInt) -> String {
312+
if self.arrowData.isNull(index) {
313+
return ""
314+
}
315+
316+
var output = "{"
317+
if let fields = arrowFields {
318+
for fieldIndex in 0..<fields.count {
319+
let asStr = fields[fieldIndex].array as? AsString
320+
if fieldIndex == 0 {
321+
output.append("\(asStr!.asString(index))")
322+
} else {
323+
output.append(",\(asStr!.asString(index))")
324+
}
325+
}
326+
}
327+
328+
output += "}"
329+
return output
330+
}
331+
}

0 commit comments

Comments
 (0)