Skip to content

Commit 85569fd

Browse files
committed
wip
1 parent f30c862 commit 85569fd

File tree

3 files changed

+7
-37
lines changed

3 files changed

+7
-37
lines changed

include/sparrow_ipc/serialize.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ namespace sparrow_ipc
8989
* includes both metadata and data portions of the record batch
9090
*/
9191

92-
SPARROW_IPC_API void
92+
SPARROW_IPC_API serialized_record_batch_info
9393
serialize_record_batch(const sparrow::record_batch& record_batch,
9494
any_output_stream& stream,
9595
std::optional<CompressionType> compression,

include/sparrow_ipc/serialize_utils.hpp

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -24,31 +24,7 @@ namespace sparrow_ipc
2424
*/
2525
SPARROW_IPC_API void
2626
serialize_schema_message(const sparrow::record_batch& record_batch, any_output_stream& stream);
27-
28-
/**
29-
* @brief Serializes a record batch into a binary format following the Arrow IPC specification.
30-
*
31-
* This function converts a sparrow record batch into a serialized byte vector that includes:
32-
* - A continuation marker at the beginning
33-
* - The record batch metadata length (4 bytes)
34-
* - The FlatBuffer-encoded record batch metadata containing field nodes and buffer information
35-
* - Padding to ensure 8-byte alignment
36-
* - The actual data body containing the record batch buffers
37-
*
38-
* The serialization follows the Arrow IPC stream format where each record batch message
39-
* consists of a metadata section followed by a body section containing the actual data.
40-
*
41-
* @param record_batch The sparrow record batch to be serialized.
42-
* @param stream The output stream where the serialized record batch will be written.
43-
* @param compression Optional: The compression type to use when serializing.
44-
* @param cache Optional: A cache for compressed buffers to avoid recompression if compression is enabled.
45-
* If compression is given, cache should be set as well.
46-
*/
47-
SPARROW_IPC_API void
48-
serialize_record_batch(const sparrow::record_batch& record_batch, any_output_stream& stream,
49-
std::optional<CompressionType> compression,
50-
std::optional<std::reference_wrapper<CompressionCache>> cache);
51-
27+
5228
/**
5329
* @brief Calculates the total serialized size of a schema message.
5430
*

src/serialize.cpp

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -30,29 +30,23 @@ namespace sparrow_ipc
3030
serialized_record_batch_info serialize_record_batch(
3131
const sparrow::record_batch& record_batch,
3232
any_output_stream& stream,
33-
34-
std::optional<CompressionType> compression
35-
,
36-
std::optional<std::reference_wrapper<CompressionCache>> cache)
33+
std::optional<CompressionType> compression,
34+
std::optional<std::reference_wrapper<CompressionCache>> cache
35+
)
3736
{
3837
// Build and serialize metadata
3938
flatbuffers::FlatBufferBuilder builder = get_record_batch_message_builder(record_batch, compression, cache);
40-
const flatbuffers::uoffset_t flatbuffer_size = builder.GetSize();
41-
39+
4240
// Calculate metadata length for the Block in the footer
4341
// According to Arrow spec, metadata_length must be a multiple of 8.
4442
// The encapsulated message format is:
4543
// - continuation (4 bytes)
4644
// - size prefix (4 bytes)
4745
// - flatbuffer metadata (flatbuffer_size bytes)
4846
// - padding to 8-byte boundary
49-
//
47+
//
5048
// Arrow's WriteMessage returns metadata_length = align_to_8(8 + flatbuffer_size)
5149
// which INCLUDES the continuation bytes.
52-
const size_t prefix_size = continuation.size() + sizeof(uint32_t); // 8 bytes
53-
const int32_t metadata_length = static_cast<int32_t>(
54-
utils::align_to_8(prefix_size + flatbuffer_size)
55-
);
5650

5751
// Write metadata
5852
common_serialize(builder, stream);

0 commit comments

Comments
 (0)