@@ -19,7 +19,7 @@ import FlatBuffers
1919import Foundation
2020
2121let FILEMARKER = " ARROW1 "
22- let CONTINUATIONMARKER = - 1
22+ let CONTINUATIONMARKER = UInt32 ( 0xFFFF_FFFF )
2323
2424/// @nodoc
2525public class ArrowReader { // swiftlint:disable:this type_body_length
@@ -240,7 +240,78 @@ public class ArrowReader { // swiftlint:disable:this type_body_length
240240 return . success( RecordBatch ( arrowSchema, columns: columns) )
241241 }
242242
243- public func fromStream( // swiftlint:disable:this function_body_length
243+ /*
244+ This is for reading the Arrow streaming format. The Arrow streaming format
245+ is slightly different from the Arrow File format as it doesn't contain a header
246+ and footer.
247+ */
248+ public func readStreaming( // swiftlint:disable:this function_body_length
249+ _ fileData: Data ,
250+ useUnalignedBuffers: Bool = false
251+ ) -> Result < ArrowReaderResult , ArrowError > {
252+ let result = ArrowReaderResult ( )
253+ var offset : Int = 0
254+ var length = getUInt32 ( fileData, offset: offset)
255+ var streamData = fileData
256+ var schemaMessage : org_apache_arrow_flatbuf_Schema ?
257+ while length != 0 {
258+ if length == CONTINUATIONMARKER {
259+ offset += Int ( MemoryLayout< UInt32> . size)
260+ length = getUInt32 ( fileData, offset: offset)
261+ if length == 0 {
262+ return . success( result)
263+ }
264+ }
265+
266+ offset += Int ( MemoryLayout< UInt32> . size)
267+ streamData = fileData [ offset... ]
268+ let dataBuffer = ByteBuffer (
269+ data: streamData,
270+ allowReadingUnalignedBuffers: true )
271+ let message = org_apache_arrow_flatbuf_Message. getRootAsMessage ( bb: dataBuffer)
272+ switch message. headerType {
273+ case . recordbatch:
274+ do {
275+ let rbMessage = message. header ( type: org_apache_arrow_flatbuf_RecordBatch. self) !
276+ offset += Int ( message. bodyLength + Int64( length) )
277+ let recordBatch = try loadRecordBatch (
278+ rbMessage,
279+ schema: schemaMessage!,
280+ arrowSchema: result. schema!,
281+ data: fileData,
282+ messageEndOffset: ( message. bodyLength + Int64( length) )
283+ ) . get ( )
284+ result. batches. append ( recordBatch)
285+ length = getUInt32 ( fileData, offset: offset)
286+ } catch let error as ArrowError {
287+ return . failure( error)
288+ } catch {
289+ return . failure( . unknownError( " Unexpected error: \( error) " ) )
290+ }
291+ case . schema:
292+ schemaMessage = message. header ( type: org_apache_arrow_flatbuf_Schema. self) !
293+ let schemaResult = loadSchema ( schemaMessage!)
294+ switch schemaResult {
295+ case . success( let schema) :
296+ result. schema = schema
297+ case . failure( let error) :
298+ return . failure( error)
299+ }
300+ offset += Int ( message. bodyLength + Int64( length) )
301+ length = getUInt32 ( fileData, offset: offset)
302+ default :
303+ return . failure( . unknownError( " Unhandled header type: \( message. headerType) " ) )
304+ }
305+ }
306+ return . success( result)
307+ }
308+
309+ /*
310+ This is for reading the Arrow file format. The Arrow file format supports
311+ random accessing the data. The Arrow file format contains a header and
312+ footer around the Arrow streaming format.
313+ */
314+ public func readFile( // swiftlint:disable:this function_body_length
244315 _ fileData: Data ,
245316 useUnalignedBuffers: Bool = false
246317 ) -> Result < ArrowReaderResult , ArrowError > {
@@ -266,7 +337,7 @@ public class ArrowReader { // swiftlint:disable:this type_body_length
266337 for index in 0 ..< footer. recordBatchesCount {
267338 let recordBatch = footer. recordBatches ( at: index) !
268339 var messageLength = fileData. withUnsafeBytes { rawBuffer in
269- rawBuffer. loadUnaligned ( fromByteOffset: Int ( recordBatch. offset) , as: Int32 . self)
340+ rawBuffer. loadUnaligned ( fromByteOffset: Int ( recordBatch. offset) , as: UInt32 . self)
270341 }
271342
272343 var messageOffset : Int64 = 1
@@ -275,7 +346,7 @@ public class ArrowReader { // swiftlint:disable:this type_body_length
275346 messageLength = fileData. withUnsafeBytes { rawBuffer in
276347 rawBuffer. loadUnaligned (
277348 fromByteOffset: Int ( recordBatch. offset + Int64( MemoryLayout< Int32> . size) ) ,
278- as: Int32 . self)
349+ as: UInt32 . self)
279350 }
280351 }
281352
@@ -320,7 +391,7 @@ public class ArrowReader { // swiftlint:disable:this type_body_length
320391 let markerLength = FILEMARKER . utf8. count
321392 let footerLengthEnd = Int ( fileData. count - markerLength)
322393 let data = fileData [ ..< ( footerLengthEnd) ]
323- return fromStream ( data)
394+ return readFile ( data)
324395 } catch {
325396 return . failure( . unknownError( " Error loading file: \( error) " ) )
326397 }
0 commit comments