66#include < sparrow/record_batch.hpp>
77
88#include " sparrow_ipc/compression.hpp"
9+ #include " sparrow_ipc/utils.hpp"
910
1011namespace sparrow_ipc
1112{
@@ -166,6 +167,42 @@ namespace sparrow_ipc
166167 [[nodiscard]] std::vector<org::apache::arrow::flatbuf::FieldNode>
167168 create_fieldnodes (const sparrow::record_batch& record_batch);
168169
170+ namespace details
171+ {
172+ template <typename Func>
173+ void fill_buffers_impl (
174+ const sparrow::arrow_proxy& arrow_proxy,
175+ std::vector<org::apache::arrow::flatbuf::Buffer>& flatbuf_buffers,
176+ int64_t & offset,
177+ Func&& get_buffer_size
178+ )
179+ {
180+ const auto & buffers = arrow_proxy.buffers ();
181+ for (const auto & buffer : buffers)
182+ {
183+ int64_t size = get_buffer_size (buffer);
184+ flatbuf_buffers.emplace_back (offset, size);
185+ offset += utils::align_to_8 (size);
186+ }
187+ for (const auto & child : arrow_proxy.children ())
188+ {
189+ fill_buffers_impl (child, flatbuf_buffers, offset, get_buffer_size);
190+ }
191+ }
192+
193+ template <typename Func>
194+ std::vector<org::apache::arrow::flatbuf::Buffer> get_buffers_impl (const sparrow::record_batch& record_batch, Func&& fill_buffers_func)
195+ {
196+ std::vector<org::apache::arrow::flatbuf::Buffer> buffers;
197+ int64_t offset = 0 ;
198+ for (const auto & column : record_batch.columns ())
199+ {
200+ const auto & arrow_proxy = sparrow::detail::array_access::get_arrow_proxy (column);
201+ fill_buffers_func (arrow_proxy, buffers, offset);
202+ }
203+ return buffers;
204+ }
205+ } // namespace details
169206
170207 /* *
171208 * @brief Recursively fills a vector of FlatBuffer Buffer objects with buffer information from an Arrow
@@ -208,20 +245,39 @@ namespace sparrow_ipc
208245 get_buffers (const sparrow::record_batch& record_batch);
209246
210247 /* *
211- * @brief Generates the compressed message body and buffer metadata for a record batch .
248+ * @brief Recursively populates a vector with compressed buffer metadata from an Arrow proxy .
212249 *
213- * This function traverses the record batch, compresses each buffer using the specified
214- * compression algorithm, and constructs the message body. For each compressed buffer,
215- * it is prefixed by its 8-byte uncompressed size. Padding is added after each
216- * compressed buffer to ensure 8-byte alignment.
250+ * This function traverses the Arrow proxy and its children, compressing each buffer and recording
251+ * its metadata (offset and size) in the provided vector. The offset is updated to ensure proper
252+ * alignment for each subsequent buffer.
217253 *
218- * @param record_batch The record batch to serialize .
219- * @param compression_type The compression algorithm to use (e.g., LZ4_FRAME, ZSTD) .
220- * @return A vector of FlatBuffer Buffer objects describing the offset and
221- * size of each buffer within the compressed body .
254+ * @param arrow_proxy The Arrow proxy containing the buffers to be compressed .
255+ * @param flatbuf_compressed_buffers A vector to store the resulting compressed buffer metadata .
256+ * @param offset The current offset in the buffer layout, which will be updated by the function.
257+ * @param compression_type The compression algorithm to use .
222258 */
223- [[nodiscard]] SPARROW_IPC_API std::vector<org::apache::arrow::flatbuf::Buffer>
224- generate_compressed_buffers (const sparrow::record_batch& record_batch, const CompressionType compression_type);
259+ void fill_compressed_buffers (
260+ const sparrow::arrow_proxy& arrow_proxy,
261+ std::vector<org::apache::arrow::flatbuf::Buffer>& flatbuf_compressed_buffers,
262+ int64_t & offset,
263+ const CompressionType compression_type
264+ );
265+
266+ /* *
267+ * @brief Retrieves metadata describing the layout of compressed buffers within a record batch.
268+ *
269+ * This function processes a record batch to determine the metadata (offset and size)
270+ * for each of its buffers, assuming they are compressed using the specified algorithm.
271+ * This metadata accounts for each compressed buffer being prefixed by its 8-byte
272+ * uncompressed size and padded to ensure 8-byte alignment.
273+ *
274+ * @param record_batch The record batch whose buffers' compressed metadata is to be retrieved.
275+ * @param compression_type The compression algorithm that would be applied (e.g., LZ4_FRAME, ZSTD).
276+ * @return A vector of FlatBuffer Buffer objects, each describing the offset and
277+ * size of a corresponding compressed buffer within a larger message body.
278+ */
279+ [[nodiscard]] std::vector<org::apache::arrow::flatbuf::Buffer>
280+ get_compressed_buffers (const sparrow::record_batch& record_batch, const CompressionType compression_type);
225281
226282 /* *
227283 * @brief Calculates the total size of the body section for an Arrow array.
@@ -234,7 +290,7 @@ namespace sparrow_ipc
234290 * @param compression The compression type to use when serializing
235291 * @return int64_t The total aligned size in bytes of all buffers in the array hierarchy
236292 */
237- [[nodiscard]] SPARROW_IPC_API int64_t calculate_body_size (const sparrow::arrow_proxy& arrow_proxy, std::optional<CompressionType> compression = std::nullopt );
293+ [[nodiscard]] int64_t calculate_body_size (const sparrow::arrow_proxy& arrow_proxy, std::optional<CompressionType> compression = std::nullopt );
238294
239295 /* *
240296 * @brief Calculates the total body size of a record batch by summing the body sizes of all its columns.
@@ -247,7 +303,7 @@ namespace sparrow_ipc
247303 * @param compression The compression type to use when serializing
248304 * @return int64_t The total body size in bytes of all columns in the record batch
249305 */
250- [[nodiscard]] SPARROW_IPC_API int64_t calculate_body_size (const sparrow::record_batch& record_batch, std::optional<CompressionType> compression = std::nullopt );
306+ [[nodiscard]] int64_t calculate_body_size (const sparrow::record_batch& record_batch, std::optional<CompressionType> compression = std::nullopt );
251307
252308 /* *
253309 * @brief Creates a FlatBuffer message containing a serialized Apache Arrow RecordBatch.
0 commit comments