Skip to content

Commit b49b6eb

Browse files
committed
Move defintions to serialize_null_array.cpp
1 parent eff8892 commit b49b6eb

File tree

3 files changed

+47
-44
lines changed

3 files changed

+47
-44
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ set(SPARROW_IPC_HEADERS
5151

5252
set(SPARROW_IPC_SRC
5353
${SPARROW_IPC_SOURCE_DIR}/serialize.cpp
54+
${SPARROW_IPC_SOURCE_DIR}/serialize_null_array.cpp
5455
${SPARROW_IPC_SOURCE_DIR}/utils.cpp
5556
)
5657

include/serialize_null_array.hpp

Lines changed: 3 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,10 @@
11
#pragma once
22

3+
#include "config/config.hpp"
34
#include "serialize.hpp"
45

56
namespace sparrow_ipc
67
{
7-
// TODO move to cpp if not templated
8-
9-
// This function serializes a sparrow::null_array into a byte vector compliant
10-
// with the Apache Arrow IPC Streaming Format. It mirrors the structure of
11-
// serialize_primitive_array but is optimized for null_array's properties.
12-
// A null_array is represented by metadata only (Schema, RecordBatch) and has no data buffers,
13-
// making its message body zero-length.
14-
std::vector<uint8_t> serialize_null_array(sparrow::null_array& arr)
15-
{
16-
auto [arrow_arr_ptr, arrow_schema_ptr] = sparrow::get_arrow_structures(arr);
17-
auto& arrow_arr = *arrow_arr_ptr;
18-
auto& arrow_schema = *arrow_schema_ptr;
19-
20-
std::vector<uint8_t> final_buffer;
21-
// I - Serialize the Schema message
22-
details::serialize_schema_message(arrow_schema, arr.metadata(), final_buffer);
23-
24-
// II - Serialize the RecordBatch message
25-
details::serialize_record_batch_message(arrow_arr, {}, final_buffer);
26-
27-
// Return the final buffer containing the complete IPC stream
28-
return final_buffer;
29-
}
30-
31-
// This function deserializes a byte vector into a sparrow::null_array.
32-
// It reads the Schema and RecordBatch messages to extract the array's length,
33-
// name, and metadata, then constructs a null_array.
34-
sparrow::null_array deserialize_null_array(const std::vector<uint8_t>& buffer)
35-
{
36-
const uint8_t* buf_ptr = buffer.data();
37-
size_t current_offset = 0;
38-
39-
// I - Deserialize the Schema message
40-
std::optional<std::string> name;
41-
std::optional<std::vector<sparrow::metadata_pair>> metadata;
42-
details::deserialize_schema_message(buf_ptr, current_offset, name, metadata);
43-
44-
// II - Deserialize the RecordBatch message
45-
const auto* record_batch = details::deserialize_record_batch_message(buf_ptr, current_offset);
46-
47-
// The body is empty, so we don't need to read any further.
48-
// Construct the null_array from the deserialized metadata.
49-
return sparrow::null_array(record_batch->length(), name, metadata);
50-
}
8+
SPARROW_IPC_API std::vector<uint8_t> serialize_null_array(sparrow::null_array& arr);
9+
SPARROW_IPC_API sparrow::null_array deserialize_null_array(const std::vector<uint8_t>& buffer);
5110
}

src/serialize_null_array.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#include "serialize_null_array.hpp"
2+
3+
namespace sparrow_ipc
4+
{
5+
// A null_array is represented by metadata only (Schema, RecordBatch) and has no data buffers,
6+
// making its message body zero-length.
7+
std::vector<uint8_t> serialize_null_array(sparrow::null_array& arr)
8+
{
9+
auto [arrow_arr_ptr, arrow_schema_ptr] = sparrow::get_arrow_structures(arr);
10+
auto& arrow_arr = *arrow_arr_ptr;
11+
auto& arrow_schema = *arrow_schema_ptr;
12+
13+
std::vector<uint8_t> final_buffer;
14+
// I - Serialize the Schema message
15+
details::serialize_schema_message(arrow_schema, arr.metadata(), final_buffer);
16+
17+
// II - Serialize the RecordBatch message
18+
details::serialize_record_batch_message(arrow_arr, {}, final_buffer);
19+
20+
// Return the final buffer containing the complete IPC stream
21+
return final_buffer;
22+
}
23+
24+
// This reads the Schema and RecordBatch messages to extract the array's length,
25+
// name, and metadata, then constructs a null_array.
26+
sparrow::null_array deserialize_null_array(const std::vector<uint8_t>& buffer)
27+
{
28+
const uint8_t* buf_ptr = buffer.data();
29+
size_t current_offset = 0;
30+
31+
// I - Deserialize the Schema message
32+
std::optional<std::string> name;
33+
std::optional<std::vector<sparrow::metadata_pair>> metadata;
34+
details::deserialize_schema_message(buf_ptr, current_offset, name, metadata);
35+
36+
// II - Deserialize the RecordBatch message
37+
const auto* record_batch = details::deserialize_record_batch_message(buf_ptr, current_offset);
38+
39+
// The body is empty, so we don't need to read any further.
40+
// Construct the null_array from the deserialized metadata.
41+
return sparrow::null_array(record_batch->length(), name, metadata);
42+
}
43+
}

0 commit comments

Comments
 (0)