Skip to content

Commit 9347b29

Browse files
committed
wip
1 parent 309e5a9 commit 9347b29

20 files changed

+172
-137
lines changed

CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,6 @@ set(SPARROW_IPC_SRC
110110
${SPARROW_IPC_SOURCE_DIR}/serialize_null_array.cpp
111111
${SPARROW_IPC_SOURCE_DIR}/serialize.cpp
112112
${SPARROW_IPC_SOURCE_DIR}/utils.cpp
113-
${SPARROW_IPC_SOURCE_DIR}/magic_values.cpp
114113
${SPARROW_IPC_SOURCE_DIR}/metadata.cpp
115114
${SPARROW_IPC_SOURCE_DIR}/deserialize_utils.cpp
116115
)

include/sparrow_ipc/arrow_interface/arrow_array.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
namespace sparrow_ipc
99
{
10-
[[nodiscard]] ArrowArray make_arrow_array(
10+
[[nodiscard]] ArrowArray make_non_owning_arrow_array(
1111
int64_t length,
1212
int64_t null_count,
1313
int64_t offset,
@@ -17,9 +17,9 @@ namespace sparrow_ipc
1717
ArrowArray* dictionary
1818
);
1919

20-
void release_arrow_array(ArrowArray* array);
20+
void release_non_owning_arrow_array(ArrowArray* array);
2121

22-
void fill_arrow_array(
22+
void fill_non_owning_arrow_array(
2323
ArrowArray& array,
2424
int64_t length,
2525
int64_t null_count,

include/sparrow_ipc/arrow_interface/arrow_array/private_data.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44

55
namespace sparrow_ipc
66
{
7-
class arrow_array_private_data
7+
class non_owning_arrow_array_private_data
88
{
99
public:
1010

11-
explicit constexpr arrow_array_private_data(std::vector<std::uint8_t*>&& buffers_pointers)
11+
explicit constexpr non_owning_arrow_array_private_data(std::vector<std::uint8_t*>&& buffers_pointers)
1212
: m_buffers_pointers(std::move(buffers_pointers))
1313
{
1414
}

include/sparrow_ipc/arrow_interface/arrow_array_schema_common_release.hpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,12 @@ namespace sparrow_ipc
1616
*/
1717
template <class T>
1818
requires std::same_as<T, ArrowArray> || std::same_as<T, ArrowSchema>
19-
void release_common_arrow(T& t)
19+
void release_common_non_owning_arrow(T& t)
2020
{
21-
using private_data_type = std::
22-
conditional_t<std::same_as<T, ArrowArray>, arrow_array_private_data, arrow_schema_private_data>;
21+
using private_data_type = std::conditional_t<
22+
std::same_as<T, ArrowArray>,
23+
non_owning_arrow_array_private_data,
24+
non_owning_arrow_schema_private_data>;
2325
if (t.release == nullptr)
2426
{
2527
return;

include/sparrow_ipc/arrow_interface/arrow_schema.hpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@
1111

1212
namespace sparrow_ipc
1313
{
14-
void release_arrow_schema(ArrowSchema* schema);
14+
void release_non_owning_arrow_schema(ArrowSchema* schema);
1515

1616
template <sparrow::input_metadata_container M = std::vector<sparrow::metadata_pair>>
17-
void fill_arrow_schema(
17+
void fill_non_owning_arrow_schema(
1818
ArrowSchema& schema,
1919
std::string_view format,
2020
const char* name,
@@ -41,19 +41,19 @@ namespace sparrow_ipc
4141
)
4242
: std::nullopt;
4343

44-
schema.private_data = new arrow_schema_private_data(format, name, std::move(metadata_str));
44+
schema.private_data = new non_owning_arrow_schema_private_data(format, name, std::move(metadata_str));
4545

46-
const auto private_data = static_cast<arrow_schema_private_data*>(schema.private_data);
46+
const auto private_data = static_cast<non_owning_arrow_schema_private_data*>(schema.private_data);
4747
schema.format = private_data->format_ptr();
4848
schema.name = private_data->name_ptr();
4949
schema.metadata = private_data->metadata_ptr();
5050
schema.children = children;
5151
schema.dictionary = dictionary;
52-
schema.release = release_arrow_schema;
52+
schema.release = release_non_owning_arrow_schema;
5353
}
5454

5555
template <sparrow::input_metadata_container M = std::vector<sparrow::metadata_pair>>
56-
[[nodiscard]] ArrowSchema make_arrow_schema(
56+
[[nodiscard]] ArrowSchema make_non_owning_arrow_schema(
5757
std::string_view format,
5858
const char* name,
5959
std::optional<M> metadata,
@@ -64,7 +64,7 @@ namespace sparrow_ipc
6464
)
6565
{
6666
ArrowSchema schema{};
67-
fill_arrow_schema(schema, format, name, metadata, flags, children_count, children, dictionary);
67+
fill_non_owning_arrow_schema(schema, format, name, metadata, flags, children_count, children, dictionary);
6868
return schema;
6969
}
7070
}

include/sparrow_ipc/arrow_interface/arrow_schema/private_data.hpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,15 @@
66

77
namespace sparrow_ipc
88
{
9-
class arrow_schema_private_data
9+
class non_owning_arrow_schema_private_data
1010
{
1111
public:
1212

13-
arrow_schema_private_data(std::string_view format, const char* name, std::optional<std::string> metadata);
13+
non_owning_arrow_schema_private_data(
14+
std::string_view format,
15+
const char* name,
16+
std::optional<std::string> metadata
17+
);
1418

1519
[[nodiscard]] const char* format_ptr() const noexcept;
1620
[[nodiscard]] const char* name_ptr() const noexcept;

include/sparrow_ipc/deserialize_fixedsizebinary_array.hpp

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,10 @@
66
#include "Message_generated.h"
77
#include "sparrow_ipc/arrow_interface/arrow_array.hpp"
88
#include "sparrow_ipc/arrow_interface/arrow_schema.hpp"
9-
9+
#include "sparrow_ipc/deserialize_utils.hpp"
1010

1111
namespace sparrow_ipc
1212
{
13-
1413
[[nodiscard]] sparrow::fixed_width_binary_array deserialize_fixedwidthbinary(
1514
const org::apache::arrow::flatbuf::RecordBatch& record_batch,
1615
std::span<const uint8_t> body,
@@ -21,27 +20,32 @@ namespace sparrow_ipc
2120
)
2221
{
2322
const std::string format = "w:" + std::to_string(byte_width);
24-
ArrowSchema schema = make_arrow_schema(format, name.data(), metadata, std::nullopt, 0, nullptr, nullptr);
25-
26-
const auto bitmap_buffer_metadata = record_batch.buffers()->Get(buffer_index++);
27-
28-
uint8_t* bitmap_ptr = nullptr;
29-
int64_t null_count = 0;
30-
31-
// Check if validity buffer is present (length > 0 for nullable fields)
32-
if (bitmap_buffer_metadata->length() > 0) {
33-
bitmap_ptr = const_cast<uint8_t*>(body.data() + bitmap_buffer_metadata->offset());
34-
const sparrow::dynamic_bitset_view<const std::uint8_t> bitmap_view{bitmap_ptr, static_cast<size_t>(record_batch.length())};
35-
null_count = bitmap_view.null_count();
36-
}
37-
23+
ArrowSchema schema = make_non_owning_arrow_schema(
24+
format,
25+
name.data(),
26+
metadata,
27+
std::nullopt,
28+
0,
29+
nullptr,
30+
nullptr
31+
);
32+
const auto [bitmap_ptr, null_count] = utils::get_bitmap_pointer_and_null_count(
33+
record_batch,
34+
body,
35+
buffer_index++
36+
);
3837
const auto buffer_metadata = record_batch.buffers()->Get(buffer_index++);
3938
auto buffer_ptr = const_cast<uint8_t*>(body.data() + buffer_metadata->offset());
40-
4139
std::vector<std::uint8_t*> buffers = {bitmap_ptr, buffer_ptr};
42-
43-
ArrowArray array = make_arrow_array(record_batch.length(), null_count, 0, std::move(buffers), 0, nullptr, nullptr);
44-
40+
ArrowArray array = make_non_owning_arrow_array(
41+
record_batch.length(),
42+
null_count,
43+
0,
44+
std::move(buffers),
45+
0,
46+
nullptr,
47+
nullptr
48+
);
4549
sparrow::arrow_proxy ap{std::move(array), std::move(schema)};
4650
return sparrow::fixed_width_binary_array{std::move(ap)};
4751
}

include/sparrow_ipc/deserialize_primitive_array.hpp

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "Message_generated.h"
1010
#include "sparrow_ipc/arrow_interface/arrow_array.hpp"
1111
#include "sparrow_ipc/arrow_interface/arrow_schema.hpp"
12+
#include "sparrow_ipc/deserialize_utils.hpp"
1213

1314
namespace sparrow_ipc
1415
{
@@ -24,25 +25,32 @@ namespace sparrow_ipc
2425
const std::string_view format = data_type_to_format(
2526
sparrow::detail::get_data_type_from_array<sparrow::primitive_array<T>>::get()
2627
);
27-
ArrowSchema schema = make_arrow_schema(format, name.data(), metadata, std::nullopt, 0, nullptr, nullptr);
28-
29-
const auto bitmap_buffer_metadata = record_batch.buffers()->Get(buffer_index++);
30-
uint8_t* bitmap_ptr = nullptr;
31-
int64_t null_count = 0;
32-
33-
// Check if validity buffer is present (length > 0 for nullable fields)
34-
if (bitmap_buffer_metadata->length() > 0) {
35-
bitmap_ptr = const_cast<uint8_t*>(body.data() + bitmap_buffer_metadata->offset());
36-
const sparrow::dynamic_bitset_view<const std::uint8_t> bitmap_view{bitmap_ptr, static_cast<size_t>(record_batch.length())};
37-
null_count = bitmap_view.null_count();
38-
}
39-
28+
ArrowSchema schema = make_non_owning_arrow_schema(
29+
format,
30+
name.data(),
31+
metadata,
32+
std::nullopt,
33+
0,
34+
nullptr,
35+
nullptr
36+
);
37+
const auto [bitmap_ptr, null_count] = utils::get_bitmap_pointer_and_null_count(
38+
record_batch,
39+
body,
40+
buffer_index++
41+
);
4042
const auto primitive_buffer_metadata = record_batch.buffers()->Get(buffer_index++);
4143
auto primitives_ptr = const_cast<uint8_t*>(body.data() + primitive_buffer_metadata->offset());
42-
4344
std::vector<std::uint8_t*> buffers = {bitmap_ptr, primitives_ptr};
44-
ArrowArray array = make_arrow_array(record_batch.length(), null_count, 0, std::move(buffers), 0, nullptr, nullptr);
45-
45+
ArrowArray array = make_non_owning_arrow_array(
46+
record_batch.length(),
47+
null_count,
48+
0,
49+
std::move(buffers),
50+
0,
51+
nullptr,
52+
nullptr
53+
);
4654
sparrow::arrow_proxy ap{std::move(array), std::move(schema)};
4755
return sparrow::primitive_array<T>{std::move(ap)};
4856
}

include/sparrow_ipc/deserialize_utils.hpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,12 @@
44

55
#include <sparrow/buffer/dynamic_bitset/dynamic_bitset_view.hpp>
66
#include <sparrow/u8_buffer.hpp>
7+
#include <utility>
78

89
#include "Message_generated.h"
910
#include "Schema_generated.h"
1011

11-
namespace sparrow_ipc
12+
namespace sparrow_ipc::utils
1213
{
1314
template <typename T>
1415
[[nodiscard]] sparrow::u8_buffer<T> message_buffer_to_u8buffer(
@@ -29,4 +30,10 @@ namespace sparrow_ipc
2930
std::span<const uint8_t> body,
3031
size_t index
3132
);
33+
34+
[[nodiscard]] std::pair<std::uint8_t*, int64_t> get_bitmap_pointer_and_null_count(
35+
const org::apache::arrow::flatbuf::RecordBatch& record_batch,
36+
std::span<const uint8_t> body,
37+
size_t index
38+
);
3239
}

include/sparrow_ipc/deserialize_variable_size_binary_array.hpp

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "Message_generated.h"
99
#include "sparrow_ipc/arrow_interface/arrow_array.hpp"
1010
#include "sparrow_ipc/arrow_interface/arrow_schema.hpp"
11+
#include "sparrow_ipc/deserialize_utils.hpp"
1112

1213
namespace sparrow_ipc
1314
{
@@ -20,31 +21,35 @@ namespace sparrow_ipc
2021
size_t& buffer_index
2122
)
2223
{
23-
const std::string_view format = data_type_to_format(
24-
sparrow::detail::get_data_type_from_array<T>::get()
24+
const std::string_view format = data_type_to_format(sparrow::detail::get_data_type_from_array<T>::get());
25+
ArrowSchema schema = make_non_owning_arrow_schema(
26+
format,
27+
name.data(),
28+
metadata,
29+
std::nullopt,
30+
0,
31+
nullptr,
32+
nullptr
33+
);
34+
const auto [bitmap_ptr, null_count] = utils::get_bitmap_pointer_and_null_count(
35+
record_batch,
36+
body,
37+
buffer_index++
2538
);
26-
ArrowSchema schema = make_arrow_schema(format, name.data(), metadata, std::nullopt, 0, nullptr, nullptr);
27-
28-
const auto bitmap_buffer_metadata = record_batch.buffers()->Get(buffer_index++);
29-
uint8_t* bitmap_ptr = nullptr;
30-
int64_t null_count = 0;
31-
32-
// Check if validity buffer is present (length > 0 for nullable fields)
33-
if (bitmap_buffer_metadata->length() > 0) {
34-
bitmap_ptr = const_cast<uint8_t*>(body.data() + bitmap_buffer_metadata->offset());
35-
const sparrow::dynamic_bitset_view<const std::uint8_t> bitmap_view{bitmap_ptr, static_cast<size_t>(record_batch.length())};
36-
null_count = bitmap_view.null_count();
37-
}
38-
3939
const auto offset_metadata = record_batch.buffers()->Get(buffer_index++);
4040
auto offset_ptr = const_cast<uint8_t*>(body.data() + offset_metadata->offset());
41-
4241
const auto buffer_metadata = record_batch.buffers()->Get(buffer_index++);
4342
auto buffer_ptr = const_cast<uint8_t*>(body.data() + buffer_metadata->offset());
44-
4543
std::vector<std::uint8_t*> buffers = {bitmap_ptr, offset_ptr, buffer_ptr};
46-
ArrowArray array = make_arrow_array(record_batch.length(), null_count, 0, std::move(buffers), 0, nullptr, nullptr);
47-
44+
ArrowArray array = make_non_owning_arrow_array(
45+
record_batch.length(),
46+
null_count,
47+
0,
48+
std::move(buffers),
49+
0,
50+
nullptr,
51+
nullptr
52+
);
4853
sparrow::arrow_proxy ap{std::move(array), std::move(schema)};
4954
return T{std::move(ap)};
5055
}

0 commit comments

Comments
 (0)