@@ -71,11 +71,30 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
7171 public init ( ) { }
7272
7373 private func writeField( _ fbb: inout FlatBufferBuilder , field: ArrowField ) -> Result < Offset , ArrowError > {
74+ var fieldsOffset : Offset ?
75+ if let nestedField = field. type as? ArrowNestedType {
76+ var offsets = [ Offset] ( )
77+ for field in nestedField. fields {
78+ switch writeField ( & fbb, field: field) {
79+ case . success( let offset) :
80+ offsets. append ( offset)
81+ case . failure( let error) :
82+ return . failure( error)
83+ }
84+ }
85+
86+ fieldsOffset = fbb. createVector ( ofOffsets: offsets)
87+ }
88+
7489 let nameOffset = fbb. create ( string: field. name)
7590 let fieldTypeOffsetResult = toFBType ( & fbb, arrowType: field. type)
7691 let startOffset = org_apache_arrow_flatbuf_Field. startField ( & fbb)
7792 org_apache_arrow_flatbuf_Field. add ( name: nameOffset, & fbb)
7893 org_apache_arrow_flatbuf_Field. add ( nullable: field. isNullable, & fbb)
94+ if let childrenOffset = fieldsOffset {
95+ org_apache_arrow_flatbuf_Field. addVectorOf ( children: childrenOffset, & fbb)
96+ }
97+
7998 switch toFBTypeEnum ( field. type) {
8099 case . success( let type) :
81100 org_apache_arrow_flatbuf_Field. add ( typeType: type, & fbb)
@@ -101,7 +120,6 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
101120 case . failure( let error) :
102121 return . failure( error)
103122 }
104-
105123 }
106124
107125 let fieldsOffset : Offset = fbb. createVector ( ofOffsets: fieldOffsets)
@@ -126,7 +144,7 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
126144 withUnsafeBytes ( of: CONTINUATIONMARKER . littleEndian) { writer. append ( Data ( $0) ) }
127145 withUnsafeBytes ( of: rbResult. 1 . o. littleEndian) { writer. append ( Data ( $0) ) }
128146 writer. append ( rbResult. 0 )
129- switch writeRecordBatchData ( & writer, batch: batch) {
147+ switch writeRecordBatchData ( & writer, fields : batch. schema . fields , columns : batch. columns ) {
130148 case . success:
131149 rbBlocks. append (
132150 org_apache_arrow_flatbuf_Block ( offset: Int64 ( startIndex) ,
@@ -143,37 +161,59 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
143161 return . success( rbBlocks)
144162 }
145163
146- private func writeRecordBatch( batch: RecordBatch ) -> Result < ( Data , Offset ) , ArrowError > {
147- let schema = batch. schema
148- var fbb = FlatBufferBuilder ( )
149-
150- // write out field nodes
151- var fieldNodeOffsets = [ Offset] ( )
152- fbb. startVector ( schema. fields. count, elementSize: MemoryLayout< org_apache_arrow_flatbuf_FieldNode> . size)
153- for index in ( 0 ..< schema. fields. count) . reversed ( ) {
154- let column = batch. column ( index)
164+ private func writeFieldNodes( _ fields: [ ArrowField ] , columns: [ ArrowArrayHolder ] , offsets: inout [ Offset ] ,
165+ fbb: inout FlatBufferBuilder ) {
166+ for index in ( 0 ..< fields. count) . reversed ( ) {
167+ let column = columns [ index]
155168 let fieldNode =
156169 org_apache_arrow_flatbuf_FieldNode ( length: Int64 ( column. length) ,
157170 nullCount: Int64 ( column. nullCount) )
158- fieldNodeOffsets. append ( fbb. create ( struct: fieldNode) )
171+ offsets. append ( fbb. create ( struct: fieldNode) )
172+ if let nestedType = column. type as? ArrowNestedType {
173+ let structArray = column. array as? StructArray
174+ writeFieldNodes ( nestedType. fields, columns: structArray!. arrowFields!, offsets: & offsets, fbb: & fbb)
175+ }
159176 }
177+ }
160178
161- let nodeOffset = fbb . endVector ( len : schema . fields. count )
162-
163- // write out buffers
164- var buffers = [ org_apache_arrow_flatbuf_Buffer] ( )
165- var bufferOffset = Int ( 0 )
166- for index in 0 ..< batch . schema . fields. count {
167- let column = batch . column ( index)
179+ private func writeBufferInfo ( _ fields: [ ArrowField ] ,
180+ columns : [ ArrowArrayHolder ] ,
181+ bufferOffset : inout Int ,
182+ buffers: inout [ org_apache_arrow_flatbuf_Buffer ] ,
183+ fbb : inout FlatBufferBuilder ) {
184+ for index in 0 ..< fields. count {
185+ let column = columns [ index]
168186 let colBufferDataSizes = column. getBufferDataSizes ( )
169187 for var bufferDataSize in colBufferDataSizes {
170188 bufferDataSize = getPadForAlignment ( bufferDataSize)
171189 let buffer = org_apache_arrow_flatbuf_Buffer ( offset: Int64 ( bufferOffset) , length: Int64 ( bufferDataSize) )
172190 buffers. append ( buffer)
173191 bufferOffset += bufferDataSize
192+ if let nestedType = column. type as? ArrowNestedType {
193+ let structArray = column. array as? StructArray
194+ writeBufferInfo ( nestedType. fields, columns: structArray!. arrowFields!,
195+ bufferOffset: & bufferOffset, buffers: & buffers, fbb: & fbb)
196+ }
174197 }
175198 }
199+ }
176200
201+ private func writeRecordBatch( batch: RecordBatch ) -> Result < ( Data , Offset ) , ArrowError > {
202+ let schema = batch. schema
203+ var fbb = FlatBufferBuilder ( )
204+
205+ // write out field nodes
206+ var fieldNodeOffsets = [ Offset] ( )
207+ fbb. startVector ( schema. fields. count, elementSize: MemoryLayout< org_apache_arrow_flatbuf_FieldNode> . size)
208+ writeFieldNodes ( schema. fields, columns: batch. columns, offsets: & fieldNodeOffsets, fbb: & fbb)
209+ let nodeOffset = fbb. endVector ( len: fieldNodeOffsets. count)
210+
211+ // write out buffers
212+ var buffers = [ org_apache_arrow_flatbuf_Buffer] ( )
213+ var bufferOffset = Int ( 0 )
214+ writeBufferInfo ( schema. fields, columns: batch. columns,
215+ bufferOffset: & bufferOffset, buffers: & buffers,
216+ fbb: & fbb)
177217 org_apache_arrow_flatbuf_RecordBatch. startVectorOfBuffers ( batch. schema. fields. count, in: & fbb)
178218 for buffer in buffers. reversed ( ) {
179219 fbb. create ( struct: buffer)
@@ -196,13 +236,28 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
196236 return . success( ( fbb. data, Offset ( offset: UInt32 ( fbb. data. count) ) ) )
197237 }
198238
199- private func writeRecordBatchData( _ writer: inout DataWriter , batch: RecordBatch ) -> Result < Bool , ArrowError > {
200- for index in 0 ..< batch. schema. fields. count {
201- let column = batch. column ( index)
239+ private func writeRecordBatchData(
240+ _ writer: inout DataWriter , fields: [ ArrowField ] ,
241+ columns: [ ArrowArrayHolder ] )
242+ -> Result < Bool , ArrowError > {
243+ for index in 0 ..< fields. count {
244+ let column = columns [ index]
202245 let colBufferData = column. getBufferData ( )
203246 for var bufferData in colBufferData {
204247 addPadForAlignment ( & bufferData)
205248 writer. append ( bufferData)
249+ if let nestedType = column. type as? ArrowNestedType {
250+ guard let structArray = column. array as? StructArray else {
251+ return . failure( . invalid( " Struct type array expected for nested type " ) )
252+ }
253+
254+ switch writeRecordBatchData ( & writer, fields: nestedType. fields, columns: structArray. arrowFields!) {
255+ case . success:
256+ continue
257+ case . failure( let error) :
258+ return . failure( error)
259+ }
260+ }
206261 }
207262 }
208263
@@ -226,11 +281,10 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
226281 org_apache_arrow_flatbuf_Footer. addVectorOf ( recordBatches: rbBlkEnd, & fbb)
227282 let footerOffset = org_apache_arrow_flatbuf_Footer. endFooter ( & fbb, start: footerStartOffset)
228283 fbb. finish ( offset: footerOffset)
284+ return . success( fbb. data)
229285 case . failure( let error) :
230286 return . failure( error)
231287 }
232-
233- return . success( fbb. data)
234288 }
235289
236290 private func writeFile( _ writer: inout DataWriter , info: ArrowWriter . Info ) -> Result < Bool , ArrowError > {
@@ -265,7 +319,7 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
265319 return . success( true )
266320 }
267321
268- public func writeSteaming ( _ info: ArrowWriter . Info ) -> Result < Data , ArrowError > {
322+ public func writeStreaming ( _ info: ArrowWriter . Info ) -> Result < Data , ArrowError > {
269323 let writer : any DataWriter = InMemDataWriter ( )
270324 switch toMessage ( info. schema) {
271325 case . success( let schemaData) :
@@ -343,7 +397,7 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
343397 writer. append ( message. 0 )
344398 addPadForAlignment ( & writer)
345399 var dataWriter : any DataWriter = InMemDataWriter ( )
346- switch writeRecordBatchData ( & dataWriter, batch: batch) {
400+ switch writeRecordBatchData ( & dataWriter, fields : batch. schema . fields , columns : batch. columns ) {
347401 case . success:
348402 return . success( [
349403 ( writer as! InMemDataWriter ) . data, // swiftlint:disable:this force_cast
@@ -377,3 +431,4 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
377431 return . success( fbb. data)
378432 }
379433}
434+ // swiftlint:disable:this file_length
0 commit comments