Skip to content

Commit 2e19675

Browse files
committed
Use function to handle flatbuffers format
1 parent 2bfa9da commit 2e19675

File tree

3 files changed

+35
-24
lines changed

3 files changed

+35
-24
lines changed

CMakeLists.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,14 @@ set(SPARROW_IPC_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include)
2929
set(SPARROW_IPC_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
3030

3131
set(SPARROW_IPC_HEADERS
32-
#TODO split serialize/deserialize fcts in two different files or just rename the current one?
3332
${SPARROW_IPC_INCLUDE_DIR}/config/config.hpp
3433
${SPARROW_IPC_INCLUDE_DIR}/serialize.hpp
3534
${SPARROW_IPC_INCLUDE_DIR}/sparrow-ipc.hpp
3635
)
3736

3837
set(SPARROW_IPC_SRC
39-
${SPARROW_IPC_SOURCE_DIR}/sparrow-ipc.cpp
4038
${SPARROW_IPC_SOURCE_DIR}/serialize.cpp
39+
${SPARROW_IPC_SOURCE_DIR}/sparrow-ipc.cpp
4140
)
4241

4342
set(SCHEMA_DIR ${CMAKE_BINARY_DIR}/format)

include/serialize.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <vector>
44
#include "sparrow/sparrow.hpp"
55

6+
//TODO split serialize/deserialize fcts in two different files or just rename the current one?
67
template <typename T>
78
std::vector<uint8_t> serialize_primitive_array(const sparrow::primitive_array<T>& arr);
89

src/serialize.cpp

Lines changed: 33 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
#include <cstdint>
22
#include <cstring>
3+
#include <optional>
4+
#include <stdexcept>
5+
#include <string>
6+
#include <string_view>
37
#include <vector>
48

59
#include "Message_generated.h"
@@ -14,6 +18,32 @@ namespace
1418
{
1519
return (n + 7) & -8;
1620
}
21+
22+
std::pair<org::apache::arrow::flatbuf::Type, flatbuffers::Offset<void>>
23+
get_flatbuffer_type(flatbuffers::FlatBufferBuilder& builder, const char* format_str)
24+
{
25+
if (strcmp(format_str, "i") == 0)
26+
{
27+
auto int_type = org::apache::arrow::flatbuf::CreateInt(builder, 32, true);
28+
return {org::apache::arrow::flatbuf::Type::Int, int_type.Union()};
29+
}
30+
else if (strcmp(format_str, "f") == 0)
31+
{
32+
auto fp_type = org::apache::arrow::flatbuf::CreateFloatingPoint(
33+
builder, org::apache::arrow::flatbuf::Precision::SINGLE);
34+
return {org::apache::arrow::flatbuf::Type::FloatingPoint, fp_type.Union()};
35+
}
36+
else if (strcmp(format_str, "g") == 0)
37+
{
38+
auto fp_type = org::apache::arrow::flatbuf::CreateFloatingPoint(
39+
builder, org::apache::arrow::flatbuf::Precision::DOUBLE);
40+
return {org::apache::arrow::flatbuf::Type::FloatingPoint, fp_type.Union()};
41+
}
42+
else
43+
{
44+
throw std::runtime_error("Unsupported data type for serialization");
45+
}
46+
}
1747
}
1848

1949
template <typename T>
@@ -51,27 +81,7 @@ std::vector<uint8_t> serialize_primitive_array(const sparrow::primitive_array<T>
5181
}
5282

5383
// Determine the Flatbuffer type information from the C schema's format string
54-
org::apache::arrow::flatbuf::Type type_enum = org::apache::arrow::flatbuf::Type::NONE;
55-
flatbuffers::Offset<void> type_offset;
56-
// TODO not sure about the way we should handle this, maybe use some utility fct from sparrow or define one to handle all possible formats?
57-
if (strcmp(arrow_schema.format, "i") == 0)
58-
{
59-
type_enum = org::apache::arrow::flatbuf::Type::Int;
60-
auto int_type = org::apache::arrow::flatbuf::CreateInt(schema_builder, 32, true);
61-
type_offset = int_type.Union();
62-
}
63-
else if (strcmp(arrow_schema.format, "f") == 0)
64-
{
65-
type_enum = org::apache::arrow::flatbuf::Type::FloatingPoint;
66-
auto fp_type = org::apache::arrow::flatbuf::CreateFloatingPoint(schema_builder, org::apache::arrow::flatbuf::Precision::SINGLE);
67-
type_offset = fp_type.Union();
68-
}
69-
else if (strcmp(arrow_schema.format, "g") == 0)
70-
{
71-
type_enum = org::apache::arrow::flatbuf::Type::FloatingPoint;
72-
auto fp_type = org::apache::arrow::flatbuf::CreateFloatingPoint(schema_builder, org::apache::arrow::flatbuf::Precision::DOUBLE);
73-
type_offset = fp_type.Union();
74-
}
84+
auto [type_enum, type_offset] = get_flatbuffer_type(schema_builder, arrow_schema.format);
7585

7686
// Handle metadata
7787
flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>>
@@ -274,9 +284,10 @@ sparrow::primitive_array<T> deserialize_primitive_array(const std::vector<uint8_
274284
// Handle metadata
275285
std::optional<std::vector<sparrow::metadata_pair>> metadata;
276286
auto fb_metadata = fields->Get(0)->custom_metadata();
277-
if (fb_metadata && fb_metadata->size() > 0)
287+
if (fb_metadata && !fb_metadata->empty())
278288
{
279289
metadata = std::vector<sparrow::metadata_pair>();
290+
metadata->reserve(fb_metadata->size());
280291
for (const auto& kv : *fb_metadata)
281292
{
282293
metadata->emplace_back(kv->key()->c_str(), kv->value()->c_str());

0 commit comments

Comments
 (0)