@@ -56,25 +56,38 @@ namespace sparrow_ipc
5656 {
5757 std::vector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>> children_vec;
5858 children_vec.reserve (arrow_schema.n_children );
59- for (int i = 0 ; i < arrow_schema.n_children ; ++i)
59+ for (size_t i = 0 ; i < arrow_schema.n_children ; ++i)
6060 {
6161 if (arrow_schema.children [i] == nullptr )
6262 {
63- throw std::invalid_argument (" ArrowSchema has null child at index " + std::to_string (i) );
63+ throw std::invalid_argument (" ArrowSchema has null child pointer " );
6464 }
65- flatbuffers::Offset<org::apache::arrow::flatbuf::Field> field = create_field (
66- builder,
67- *(arrow_schema.children [i])
68- );
65+ const auto & child = *arrow_schema.children [i];
66+ flatbuffers::Offset<org::apache::arrow::flatbuf::Field> field = create_field (builder, child);
6967 children_vec.emplace_back (field);
7068 }
7169 return children_vec.empty () ? 0 : builder.CreateVector (children_vec);
7270 }
7371
74- flatbuffers::FlatBufferBuilder get_schema_message_builder (const ArrowSchema& arrow_schema)
72+ ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset<org::apache::arrow::flatbuf::Field>>>
73+ create_children (flatbuffers::FlatBufferBuilder& builder, sparrow::record_batch::column_range columns)
74+ {
75+ std::vector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>> children_vec;
76+ children_vec.reserve (columns.size ());
77+ for (const auto & column : columns)
78+ {
79+ const auto & arrow_schema = sparrow::detail::array_access::get_arrow_proxy (column).schema ();
80+ flatbuffers::Offset<org::apache::arrow::flatbuf::Field> field = create_field (builder, arrow_schema);
81+ children_vec.emplace_back (field);
82+ }
83+ return children_vec.empty () ? 0 : builder.CreateVector (children_vec);
84+ }
85+
86+ flatbuffers::FlatBufferBuilder get_schema_message_builder (const sparrow::record_batch& record_batch)
7587 {
7688 flatbuffers::FlatBufferBuilder schema_builder;
77- const auto fields_vec = create_children (schema_builder, arrow_schema);
89+ record_batch.columns ();
90+ const auto fields_vec = create_children (schema_builder, record_batch.columns ());
7891 const auto schema_offset = org::apache::arrow::flatbuf::CreateSchema (
7992 schema_builder,
8093 org::apache::arrow::flatbuf::Endianness::Little, // TODO: make configurable
@@ -92,12 +105,11 @@ namespace sparrow_ipc
92105 return schema_builder;
93106 }
94107
95- std::vector<uint8_t > serialize_schema_message (const ArrowSchema& arrow_schema )
108+ std::vector<uint8_t > serialize_schema_message (const sparrow::record_batch& record_batch )
96109 {
97110 std::vector<uint8_t > schema_buffer;
98-
99111 schema_buffer.insert (schema_buffer.end (), continuation.begin (), continuation.end ());
100- flatbuffers::FlatBufferBuilder schema_builder = get_schema_message_builder (arrow_schema );
112+ flatbuffers::FlatBufferBuilder schema_builder = get_schema_message_builder (record_batch );
101113 const flatbuffers::uoffset_t schema_len = schema_builder.GetSize ();
102114 schema_buffer.reserve (schema_buffer.size () + sizeof (uint32_t ) + schema_len);
103115 // Write the 4-byte length prefix after the continuation bytes
@@ -162,8 +174,7 @@ namespace sparrow_ipc
162174 }
163175 for (const auto & child : arrow_proxy.children ())
164176 {
165- const auto & child_arrow_proxy = sparrow::detail::array_access::get_arrow_proxy (child);
166- fill_buffers (child_arrow_proxy, flatbuf_buffers, offset);
177+ fill_buffers (child, flatbuf_buffers, offset);
167178 }
168179 }
169180
@@ -190,8 +201,7 @@ namespace sparrow_ipc
190201 }
191202 for (const auto & child : arrow_proxy.children ())
192203 {
193- const auto & child_arrow_proxy = sparrow::detail::array_access::get_arrow_proxy (child);
194- fill_body (child_arrow_proxy, body);
204+ fill_body (child, body);
195205 }
196206 }
197207
@@ -215,8 +225,7 @@ namespace sparrow_ipc
215225 }
216226 for (const auto & child : arrow_proxy.children ())
217227 {
218- const auto & child_arrow_proxy = sparrow::detail::array_access::get_arrow_proxy (child);
219- total_size += calculate_body_size (child_arrow_proxy);
228+ total_size += calculate_body_size (child);
220229 }
221230 return total_size;
222231 }
@@ -270,14 +279,11 @@ namespace sparrow_ipc
270279 {
271280 std::vector<org::apache::arrow::flatbuf::FieldNode> nodes = create_fieldnodes (record_batch);
272281 std::vector<org::apache::arrow::flatbuf::Buffer> flatbuf_buffers = get_buffers (record_batch);
273- flatbuffers::FlatBufferBuilder record_batch_builder;
274- ::flatbuffers::Offset<org::apache::arrow::flatbuf::RecordBatch>
275- record_batch_offset = org::apache::arrow::flatbuf::CreateRecordBatchDirect (
276- record_batch_builder,
277- static_cast <int64_t >(record_batch.nb_rows ()),
278- &nodes,
279- &flatbuf_buffers
280- );
282+ flatbuffers::FlatBufferBuilder record_batch_builder = get_record_batch_message_builder (
283+ record_batch,
284+ nodes,
285+ flatbuf_buffers
286+ );
281287 std::vector<uint8_t > output;
282288 output.insert (output.end (), continuation.begin (), continuation.end ());
283289 const flatbuffers::uoffset_t record_batch_len = record_batch_builder.GetSize ();
@@ -302,40 +308,4 @@ namespace sparrow_ipc
302308 return output;
303309 }
304310
305- template <std::ranges::input_range R>
306- requires std::same_as<std::ranges::range_value_t <R>, sparrow::record_batch>
307- std::vector<uint8_t > serialize_record_batches (const R& record_batches)
308- {
309- std::vector<uint8_t > output;
310- for (const auto & record_batch : record_batches)
311- {
312- const auto rb_serialized = serialize_record_batch (record_batch);
313- output.insert (
314- output.end (),
315- std::make_move_iterator (rb_serialized.begin ()),
316- std::make_move_iterator (rb_serialized.end ())
317- );
318- }
319- return output;
320- }
321-
322- template <std::ranges::input_range R>
323- requires std::same_as<std::ranges::range_value_t <R>, sparrow::record_batch>
324- std::vector<uint8_t > serialize (const R& record_batches)
325- {
326- if (check_record_batches_consistency (record_batches))
327- {
328- throw std::invalid_argument (
329- " All record batches must have the same schema to be serialized together."
330- );
331- }
332- std::vector<uint8_t > serialized_schema = serialize_schema_message (record_batches[0 ].schema ());
333- std::vector<uint8_t > serialized_record_batches = serialize_record_batches (record_batches);
334- serialized_schema.insert (
335- serialized_schema.end (),
336- std::make_move_iterator (serialized_record_batches.begin ()),
337- std::make_move_iterator (serialized_record_batches.end ())
338- );
339- return serialized_schema;
340- }
341311}
0 commit comments