@@ -19,7 +19,7 @@ import FlatBuffers
1919import Foundation
2020
2121let FILEMARKER = " ARROW1 "
22- let CONTINUATIONMARKER = - 1
22+ let CONTINUATIONMARKER = UInt32 ( 0xFFFF_FFFF )
2323
2424/// @nodoc
2525public class ArrowReader { // swiftlint:disable:this type_body_length
@@ -240,7 +240,78 @@ public class ArrowReader { // swiftlint:disable:this type_body_length
240240 return . success( RecordBatch ( arrowSchema, columns: columns) )
241241 }
242242
243- public func fromStream( // swiftlint:disable:this function_body_length
243+ /*
244+ This is for reading the Arrow streaming format. The Arrow streaming format
245+ is slightly different from the Arrow File format as it doesn't contain a header
246+ and footer.
247+ */
248+ public func readStreaming( // swiftlint:disable:this function_body_length
249+ _ input: Data ,
250+ useUnalignedBuffers: Bool = false
251+ ) -> Result < ArrowReaderResult , ArrowError > {
252+ let result = ArrowReaderResult ( )
253+ var offset : Int = 0
254+ var length = getUInt32 ( input, offset: offset)
255+ var streamData = input
256+ var schemaMessage : org_apache_arrow_flatbuf_Schema ?
257+ while length != 0 {
258+ if length == CONTINUATIONMARKER {
259+ offset += Int ( MemoryLayout< UInt32> . size)
260+ length = getUInt32 ( input, offset: offset)
261+ if length == 0 {
262+ return . success( result)
263+ }
264+ }
265+
266+ offset += Int ( MemoryLayout< UInt32> . size)
267+ streamData = input [ offset... ]
268+ let dataBuffer = ByteBuffer (
269+ data: streamData,
270+ allowReadingUnalignedBuffers: true )
271+ let message = org_apache_arrow_flatbuf_Message. getRootAsMessage ( bb: dataBuffer)
272+ switch message. headerType {
273+ case . recordbatch:
274+ do {
275+ let rbMessage = message. header ( type: org_apache_arrow_flatbuf_RecordBatch. self) !
276+ let recordBatch = try loadRecordBatch (
277+ rbMessage,
278+ schema: schemaMessage!,
279+ arrowSchema: result. schema!,
280+ data: input,
281+ messageEndOffset: ( Int64 ( offset) + Int64( length) )
282+ ) . get ( )
283+ result. batches. append ( recordBatch)
284+ offset += Int ( message. bodyLength + Int64( length) )
285+ length = getUInt32 ( input, offset: offset)
286+ } catch let error as ArrowError {
287+ return . failure( error)
288+ } catch {
289+ return . failure( . unknownError( " Unexpected error: \( error) " ) )
290+ }
291+ case . schema:
292+ schemaMessage = message. header ( type: org_apache_arrow_flatbuf_Schema. self) !
293+ let schemaResult = loadSchema ( schemaMessage!)
294+ switch schemaResult {
295+ case . success( let schema) :
296+ result. schema = schema
297+ case . failure( let error) :
298+ return . failure( error)
299+ }
300+ offset += Int ( message. bodyLength + Int64( length) )
301+ length = getUInt32 ( input, offset: offset)
302+ default :
303+ return . failure( . unknownError( " Unhandled header type: \( message. headerType) " ) )
304+ }
305+ }
306+ return . success( result)
307+ }
308+
309+ /*
310+ This is for reading the Arrow file format. The Arrow file format supports
311+ random accessing the data. The Arrow file format contains a header and
312+ footer around the Arrow streaming format.
313+ */
314+ public func readFile( // swiftlint:disable:this function_body_length
244315 _ fileData: Data ,
245316 useUnalignedBuffers: Bool = false
246317 ) -> Result < ArrowReaderResult , ArrowError > {
@@ -266,7 +337,7 @@ public class ArrowReader { // swiftlint:disable:this type_body_length
266337 for index in 0 ..< footer. recordBatchesCount {
267338 let recordBatch = footer. recordBatches ( at: index) !
268339 var messageLength = fileData. withUnsafeBytes { rawBuffer in
269- rawBuffer. loadUnaligned ( fromByteOffset: Int ( recordBatch. offset) , as: Int32 . self)
340+ rawBuffer. loadUnaligned ( fromByteOffset: Int ( recordBatch. offset) , as: UInt32 . self)
270341 }
271342
272343 var messageOffset : Int64 = 1
@@ -275,7 +346,7 @@ public class ArrowReader { // swiftlint:disable:this type_body_length
275346 messageLength = fileData. withUnsafeBytes { rawBuffer in
276347 rawBuffer. loadUnaligned (
277348 fromByteOffset: Int ( recordBatch. offset + Int64( MemoryLayout< Int32> . size) ) ,
278- as: Int32 . self)
349+ as: UInt32 . self)
279350 }
280351 }
281352
@@ -299,8 +370,10 @@ public class ArrowReader { // swiftlint:disable:this type_body_length
299370 messageEndOffset: messageEndOffset
300371 ) . get ( )
301372 result. batches. append ( recordBatch)
302- } catch let error {
373+ } catch let error as ArrowError {
303374 return . failure( error)
375+ } catch {
376+ return . failure( . unknownError( " Unexpected error: \( error) " ) )
304377 }
305378 default :
306379 return . failure( . unknownError( " Unhandled header type: \( message. headerType) " ) )
@@ -320,7 +393,7 @@ public class ArrowReader { // swiftlint:disable:this type_body_length
320393 let markerLength = FILEMARKER . utf8. count
321394 let footerLengthEnd = Int ( fileData. count - markerLength)
322395 let data = fileData [ ..< ( footerLengthEnd) ]
323- return fromStream ( data)
396+ return readFile ( data)
324397 } catch {
325398 return . failure( . unknownError( " Error loading file: \( error) " ) )
326399 }
@@ -360,13 +433,15 @@ public class ArrowReader { // swiftlint:disable:this type_body_length
360433 ) . get ( )
361434 result. batches. append ( recordBatch)
362435 return . success( ( ) )
363- } catch let error {
436+ } catch let error as ArrowError {
364437 return . failure( error)
438+ } catch {
439+ return . failure( . unknownError( " Unexpected error: \( error) " ) )
365440 }
366-
367441 default :
368442 return . failure( . unknownError( " Unhandled header type: \( message. headerType) " ) )
369443 }
370444 }
371445
372446}
447+ // swiftlint:disable:this file_length
0 commit comments