@@ -19,7 +19,7 @@ import FlatBuffers
19
19
import Foundation
20
20
21
21
let FILEMARKER = " ARROW1 "
22
- let CONTINUATIONMARKER = - 1
22
+ let CONTINUATIONMARKER = UInt32 ( 0xFFFF_FFFF )
23
23
24
24
/// @nodoc
25
25
public class ArrowReader { // swiftlint:disable:this type_body_length
@@ -240,7 +240,78 @@ public class ArrowReader { // swiftlint:disable:this type_body_length
240
240
return . success( RecordBatch ( arrowSchema, columns: columns) )
241
241
}
242
242
243
- public func fromStream( // swiftlint:disable:this function_body_length
243
+ /*
244
+ This is for reading the Arrow streaming format. The Arrow streaming format
245
+ is slightly different from the Arrow File format as it doesn't contain a header
246
+ and footer.
247
+ */
248
+ public func readStreaming( // swiftlint:disable:this function_body_length
249
+ _ input: Data ,
250
+ useUnalignedBuffers: Bool = false
251
+ ) -> Result < ArrowReaderResult , ArrowError > {
252
+ let result = ArrowReaderResult ( )
253
+ var offset : Int = 0
254
+ var length = getUInt32 ( input, offset: offset)
255
+ var streamData = input
256
+ var schemaMessage : org_apache_arrow_flatbuf_Schema ?
257
+ while length != 0 {
258
+ if length == CONTINUATIONMARKER {
259
+ offset += Int ( MemoryLayout< UInt32> . size)
260
+ length = getUInt32 ( input, offset: offset)
261
+ if length == 0 {
262
+ return . success( result)
263
+ }
264
+ }
265
+
266
+ offset += Int ( MemoryLayout< UInt32> . size)
267
+ streamData = input [ offset... ]
268
+ let dataBuffer = ByteBuffer (
269
+ data: streamData,
270
+ allowReadingUnalignedBuffers: true )
271
+ let message = org_apache_arrow_flatbuf_Message. getRootAsMessage ( bb: dataBuffer)
272
+ switch message. headerType {
273
+ case . recordbatch:
274
+ do {
275
+ let rbMessage = message. header ( type: org_apache_arrow_flatbuf_RecordBatch. self) !
276
+ let recordBatch = try loadRecordBatch (
277
+ rbMessage,
278
+ schema: schemaMessage!,
279
+ arrowSchema: result. schema!,
280
+ data: input,
281
+ messageEndOffset: ( Int64 ( offset) + Int64( length) )
282
+ ) . get ( )
283
+ result. batches. append ( recordBatch)
284
+ offset += Int ( message. bodyLength + Int64( length) )
285
+ length = getUInt32 ( input, offset: offset)
286
+ } catch let error as ArrowError {
287
+ return . failure( error)
288
+ } catch {
289
+ return . failure( . unknownError( " Unexpected error: \( error) " ) )
290
+ }
291
+ case . schema:
292
+ schemaMessage = message. header ( type: org_apache_arrow_flatbuf_Schema. self) !
293
+ let schemaResult = loadSchema ( schemaMessage!)
294
+ switch schemaResult {
295
+ case . success( let schema) :
296
+ result. schema = schema
297
+ case . failure( let error) :
298
+ return . failure( error)
299
+ }
300
+ offset += Int ( message. bodyLength + Int64( length) )
301
+ length = getUInt32 ( input, offset: offset)
302
+ default :
303
+ return . failure( . unknownError( " Unhandled header type: \( message. headerType) " ) )
304
+ }
305
+ }
306
+ return . success( result)
307
+ }
308
+
309
+ /*
310
+ This is for reading the Arrow file format. The Arrow file format supports
311
+ random accessing the data. The Arrow file format contains a header and
312
+ footer around the Arrow streaming format.
313
+ */
314
+ public func readFile( // swiftlint:disable:this function_body_length
244
315
_ fileData: Data ,
245
316
useUnalignedBuffers: Bool = false
246
317
) -> Result < ArrowReaderResult , ArrowError > {
@@ -266,7 +337,7 @@ public class ArrowReader { // swiftlint:disable:this type_body_length
266
337
for index in 0 ..< footer. recordBatchesCount {
267
338
let recordBatch = footer. recordBatches ( at: index) !
268
339
var messageLength = fileData. withUnsafeBytes { rawBuffer in
269
- rawBuffer. loadUnaligned ( fromByteOffset: Int ( recordBatch. offset) , as: Int32 . self)
340
+ rawBuffer. loadUnaligned ( fromByteOffset: Int ( recordBatch. offset) , as: UInt32 . self)
270
341
}
271
342
272
343
var messageOffset : Int64 = 1
@@ -275,7 +346,7 @@ public class ArrowReader { // swiftlint:disable:this type_body_length
275
346
messageLength = fileData. withUnsafeBytes { rawBuffer in
276
347
rawBuffer. loadUnaligned (
277
348
fromByteOffset: Int ( recordBatch. offset + Int64( MemoryLayout< Int32> . size) ) ,
278
- as: Int32 . self)
349
+ as: UInt32 . self)
279
350
}
280
351
}
281
352
@@ -299,8 +370,10 @@ public class ArrowReader { // swiftlint:disable:this type_body_length
299
370
messageEndOffset: messageEndOffset
300
371
) . get ( )
301
372
result. batches. append ( recordBatch)
302
- } catch let error {
373
+ } catch let error as ArrowError {
303
374
return . failure( error)
375
+ } catch {
376
+ return . failure( . unknownError( " Unexpected error: \( error) " ) )
304
377
}
305
378
default :
306
379
return . failure( . unknownError( " Unhandled header type: \( message. headerType) " ) )
@@ -320,7 +393,7 @@ public class ArrowReader { // swiftlint:disable:this type_body_length
320
393
let markerLength = FILEMARKER . utf8. count
321
394
let footerLengthEnd = Int ( fileData. count - markerLength)
322
395
let data = fileData [ ..< ( footerLengthEnd) ]
323
- return fromStream ( data)
396
+ return readFile ( data)
324
397
} catch {
325
398
return . failure( . unknownError( " Error loading file: \( error) " ) )
326
399
}
@@ -360,13 +433,15 @@ public class ArrowReader { // swiftlint:disable:this type_body_length
360
433
) . get ( )
361
434
result. batches. append ( recordBatch)
362
435
return . success( ( ) )
363
- } catch let error {
436
+ } catch let error as ArrowError {
364
437
return . failure( error)
438
+ } catch {
439
+ return . failure( . unknownError( " Unexpected error: \( error) " ) )
365
440
}
366
-
367
441
default :
368
442
return . failure( . unknownError( " Unhandled header type: \( message. headerType) " ) )
369
443
}
370
444
}
371
445
372
446
}
447
+ // swiftlint:disable:this file_length
0 commit comments