18
18
import FlatBuffers
19
19
import Foundation
20
20
21
- /// @nodoc
22
21
public protocol DataWriter {
23
22
var count : Int { get }
24
23
func append( _ data: Data )
25
24
}
26
25
27
- /// @nodoc
28
26
public class ArrowWriter { // swiftlint:disable:this type_body_length
29
27
public class InMemDataWriter : DataWriter {
30
28
public private( set) var data : Data
@@ -77,11 +75,30 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
77
75
private func writeField( _ fbb: inout FlatBufferBuilder , field: ArrowField ) -> Result <
78
76
Offset , ArrowError
79
77
> {
78
+ var fieldsOffset : Offset ?
79
+ if let nestedField = field. type as? ArrowNestedType {
80
+ var offsets = [ Offset] ( )
81
+ for field in nestedField. fields {
82
+ switch writeField ( & fbb, field: field) {
83
+ case . success( let offset) :
84
+ offsets. append ( offset)
85
+ case . failure( let error) :
86
+ return . failure( error)
87
+ }
88
+ }
89
+
90
+ fieldsOffset = fbb. createVector ( ofOffsets: offsets)
91
+ }
92
+
80
93
let nameOffset = fbb. create ( string: field. name)
81
94
let fieldTypeOffsetResult = toFBType ( & fbb, arrowType: field. type)
82
95
let startOffset = org_apache_arrow_flatbuf_Field. startField ( & fbb)
83
96
org_apache_arrow_flatbuf_Field. add ( name: nameOffset, & fbb)
84
97
org_apache_arrow_flatbuf_Field. add ( nullable: field. isNullable, & fbb)
98
+ if let childrenOffset = fieldsOffset {
99
+ org_apache_arrow_flatbuf_Field. addVectorOf ( children: childrenOffset, & fbb)
100
+ }
101
+
85
102
switch toFBTypeEnum ( field. type) {
86
103
case . success( let type) :
87
104
org_apache_arrow_flatbuf_Field. add ( typeType: type, & fbb)
@@ -109,7 +126,6 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
109
126
case . failure( let error) :
110
127
return . failure( error)
111
128
}
112
-
113
129
}
114
130
115
131
let fieldsOffset : Offset = fbb. createVector ( ofOffsets: fieldOffsets)
@@ -135,7 +151,7 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
135
151
withUnsafeBytes ( of: CONTINUATIONMARKER . littleEndian) { writer. append ( Data ( $0) ) }
136
152
withUnsafeBytes ( of: rbResult. 1 . o. littleEndian) { writer. append ( Data ( $0) ) }
137
153
writer. append ( rbResult. 0 )
138
- switch writeRecordBatchData ( & writer, batch: batch) {
154
+ switch writeRecordBatchData ( & writer, fields : batch. schema . fields , columns : batch. columns ) {
139
155
case . success:
140
156
rbBlocks. append (
141
157
org_apache_arrow_flatbuf_Block (
@@ -153,40 +169,69 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
153
169
return . success( rbBlocks)
154
170
}
155
171
156
- private func writeRecordBatch( batch: RecordBatch ) -> Result < ( Data , Offset ) , ArrowError > {
157
- let schema = batch. schema
158
- var fbb = FlatBufferBuilder ( )
159
-
160
- // write out field nodes
161
- var fieldNodeOffsets = [ Offset] ( )
162
- fbb. startVector (
163
- schema. fields. count, elementSize: MemoryLayout< org_apache_arrow_flatbuf_FieldNode> . size)
164
- for index in ( 0 ..< schema. fields. count) . reversed ( ) {
165
- let column = batch. column ( index)
172
+ private func writeFieldNodes(
173
+ _ fields: [ ArrowField ] , columns: [ ArrowArrayHolder ] , offsets: inout [ Offset ] ,
174
+ fbb: inout FlatBufferBuilder
175
+ ) {
176
+ for index in ( 0 ..< fields. count) . reversed ( ) {
177
+ let column = columns [ index]
166
178
let fieldNode =
167
179
org_apache_arrow_flatbuf_FieldNode (
168
180
length: Int64 ( column. length) ,
169
181
nullCount: Int64 ( column. nullCount) )
170
- fieldNodeOffsets. append ( fbb. create ( struct: fieldNode) )
182
+ offsets. append ( fbb. create ( struct: fieldNode) )
183
+ if let nestedType = column. type as? ArrowNestedType {
184
+ let structArray = column. array as? StructArray
185
+ writeFieldNodes (
186
+ nestedType. fields, columns: structArray!. arrowFields!, offsets: & offsets, fbb: & fbb)
187
+ }
171
188
}
189
+ }
172
190
173
- let nodeOffset = fbb. endVector ( len: schema. fields. count)
174
-
175
- // write out buffers
176
- var buffers = [ org_apache_arrow_flatbuf_Buffer] ( )
177
- var bufferOffset = Int ( 0 )
178
- for index in 0 ..< batch. schema. fields. count {
179
- let column = batch. column ( index)
191
+ private func writeBufferInfo(
192
+ _ fields: [ ArrowField ] ,
193
+ columns: [ ArrowArrayHolder ] ,
194
+ bufferOffset: inout Int ,
195
+ buffers: inout [ org_apache_arrow_flatbuf_Buffer ] ,
196
+ fbb: inout FlatBufferBuilder
197
+ ) {
198
+ for index in 0 ..< fields. count {
199
+ let column = columns [ index]
180
200
let colBufferDataSizes = column. getBufferDataSizes ( )
181
201
for var bufferDataSize in colBufferDataSizes {
182
202
bufferDataSize = getPadForAlignment ( bufferDataSize)
183
203
let buffer = org_apache_arrow_flatbuf_Buffer (
184
204
offset: Int64 ( bufferOffset) , length: Int64 ( bufferDataSize) )
185
205
buffers. append ( buffer)
186
206
bufferOffset += bufferDataSize
207
+ if let nestedType = column. type as? ArrowNestedType {
208
+ let structArray = column. array as? StructArray
209
+ writeBufferInfo (
210
+ nestedType. fields, columns: structArray!. arrowFields!,
211
+ bufferOffset: & bufferOffset, buffers: & buffers, fbb: & fbb)
212
+ }
187
213
}
188
214
}
215
+ }
189
216
217
+ private func writeRecordBatch( batch: RecordBatch ) -> Result < ( Data , Offset ) , ArrowError > {
218
+ let schema = batch. schema
219
+ var fbb = FlatBufferBuilder ( )
220
+
221
+ // write out field nodes
222
+ var fieldNodeOffsets = [ Offset] ( )
223
+ fbb. startVector (
224
+ schema. fields. count, elementSize: MemoryLayout< org_apache_arrow_flatbuf_FieldNode> . size)
225
+ writeFieldNodes ( schema. fields, columns: batch. columns, offsets: & fieldNodeOffsets, fbb: & fbb)
226
+ let nodeOffset = fbb. endVector ( len: fieldNodeOffsets. count)
227
+
228
+ // write out buffers
229
+ var buffers = [ org_apache_arrow_flatbuf_Buffer] ( )
230
+ var bufferOffset = Int ( 0 )
231
+ writeBufferInfo (
232
+ schema. fields, columns: batch. columns,
233
+ bufferOffset: & bufferOffset, buffers: & buffers,
234
+ fbb: & fbb)
190
235
org_apache_arrow_flatbuf_RecordBatch. startVectorOfBuffers ( batch. schema. fields. count, in: & fbb)
191
236
for buffer in buffers. reversed ( ) {
192
237
fbb. create ( struct: buffer)
@@ -210,15 +255,32 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
210
255
return . success( ( fbb. data, Offset ( offset: UInt32 ( fbb. data. count) ) ) )
211
256
}
212
257
213
- private func writeRecordBatchData( _ writer: inout DataWriter , batch: RecordBatch ) -> Result <
214
- Bool , ArrowError
215
- > {
216
- for index in 0 ..< batch. schema. fields. count {
217
- let column = batch. column ( index)
258
+ private func writeRecordBatchData(
259
+ _ writer: inout DataWriter , fields: [ ArrowField ] ,
260
+ columns: [ ArrowArrayHolder ]
261
+ )
262
+ -> Result < Bool , ArrowError >
263
+ {
264
+ for index in 0 ..< fields. count {
265
+ let column = columns [ index]
218
266
let colBufferData = column. getBufferData ( )
219
267
for var bufferData in colBufferData {
220
268
addPadForAlignment ( & bufferData)
221
269
writer. append ( bufferData)
270
+ if let nestedType = column. type as? ArrowNestedType {
271
+ guard let structArray = column. array as? StructArray else {
272
+ return . failure( . invalid( " Struct type array expected for nested type " ) )
273
+ }
274
+
275
+ switch writeRecordBatchData (
276
+ & writer, fields: nestedType. fields, columns: structArray. arrowFields!)
277
+ {
278
+ case . success:
279
+ continue
280
+ case . failure( let error) :
281
+ return . failure( error)
282
+ }
283
+ }
222
284
}
223
285
}
224
286
@@ -244,11 +306,10 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
244
306
org_apache_arrow_flatbuf_Footer. addVectorOf ( recordBatches: rbBlkEnd, & fbb)
245
307
let footerOffset = org_apache_arrow_flatbuf_Footer. endFooter ( & fbb, start: footerStartOffset)
246
308
fbb. finish ( offset: footerOffset)
309
+ return . success( fbb. data)
247
310
case . failure( let error) :
248
311
return . failure( error)
249
312
}
250
-
251
- return . success( fbb. data)
252
313
}
253
314
254
315
private func writeFile( _ writer: inout DataWriter , info: ArrowWriter . Info ) -> Result <
@@ -285,7 +346,7 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
285
346
return . success( true )
286
347
}
287
348
288
- public func writeSteaming ( _ info: ArrowWriter . Info ) -> Result < Data , ArrowError > {
349
+ public func writeStreaming ( _ info: ArrowWriter . Info ) -> Result < Data , ArrowError > {
289
350
let writer : any DataWriter = InMemDataWriter ( )
290
351
switch toMessage ( info. schema) {
291
352
case . success( let schemaData) :
@@ -363,7 +424,8 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
363
424
writer. append ( message. 0 )
364
425
addPadForAlignment ( & writer)
365
426
var dataWriter : any DataWriter = InMemDataWriter ( )
366
- switch writeRecordBatchData ( & dataWriter, batch: batch) {
427
+ switch writeRecordBatchData ( & dataWriter, fields: batch. schema. fields, columns: batch. columns)
428
+ {
367
429
case . success:
368
430
return . success( [
369
431
( writer as! InMemDataWriter ) . data, // swiftlint:disable:this force_cast
@@ -397,3 +459,4 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
397
459
return . success( fbb. data)
398
460
}
399
461
}
462
+ // swiftlint:disable:this file_length
0 commit comments