|
16 | 16 |
|
17 | 17 | namespace sparrow_ipc |
18 | 18 | { |
| 19 | + /** |
| 20 | + * @brief Represents a block entry in the Arrow IPC file footer. |
| 21 | + * |
| 22 | + * Each block describes the location and size of a record batch in the file. |
| 23 | + */ |
| 24 | + struct record_batch_block |
| 25 | + { |
| 26 | + int64_t offset; ///< Offset from the start of the file to the record batch message |
| 27 | + int32_t metadata_length; ///< Length of the metadata (FlatBuffer message) |
| 28 | + int64_t body_length; ///< Length of the record batch body (data buffers) |
| 29 | + }; |
| 30 | + |
19 | 31 | /** |
20 | 32 | * @brief Writes the Arrow IPC file footer. |
21 | 33 | * |
22 | 34 | * @param record_batch A record batch containing the schema for the footer |
| 35 | + * @param record_batch_blocks Vector of block information for each record batch |
23 | 36 | * @param stream The output stream to write the footer to |
24 | 37 | * @return The size of the footer in bytes |
25 | 38 | */ |
26 | 39 | SPARROW_IPC_API size_t write_footer( |
27 | 40 | const sparrow::record_batch& record_batch, |
| 41 | + const std::vector<record_batch_block>& record_batch_blocks, |
28 | 42 | any_output_stream& stream |
29 | 43 | ); |
30 | 44 |
|
@@ -184,7 +198,14 @@ namespace sparrow_ipc |
184 | 198 | { |
185 | 199 | throw std::invalid_argument("Record batch schema does not match file serializer schema"); |
186 | 200 | } |
187 | | - serialize_record_batch(rb, m_stream, m_compression, compressed_buffers_cache); |
| 201 | + |
| 202 | + // Offset is from the start of the file to the record batch message |
| 203 | + const int64_t offset = static_cast<int64_t>(m_stream.size()); |
| 204 | + |
| 205 | + // Serialize and get block info |
| 206 | + const auto info = serialize_record_batch(rb, m_stream, m_compression, compressed_buffers_cache); |
| 207 | + |
| 208 | + m_record_batch_blocks.push_back({offset, info.metadata_length, info.body_length}); |
188 | 209 | } |
189 | 210 | } |
190 | 211 |
|
@@ -280,6 +301,7 @@ namespace sparrow_ipc |
280 | 301 | any_output_stream m_stream; |
281 | 302 | bool m_ended{false}; |
282 | 303 | std::optional<CompressionType> m_compression; |
| 304 | + std::vector<record_batch_block> m_record_batch_blocks; |
283 | 305 | }; |
284 | 306 |
|
285 | 307 | /** |
|
0 commit comments