Skip to content

Commit 94aa30a

Browse files
committed
wip
1 parent 391fb38 commit 94aa30a

File tree

2 files changed

+47
-7
lines changed

2 files changed

+47
-7
lines changed

integration_tests/arrow_stream_to_file.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ int main(int argc, char* argv[])
6363
}
6464

6565
// Convert stream to file format using the library
66-
std::vector<uint8_t> output_stream_data = integration_tools::stream_to_file(
66+
const std::vector<uint8_t> output_stream_data = integration_tools::stream_to_file(
6767
std::span<const uint8_t>(input_stream_data)
6868
);
6969

integration_tests/test_integration_tools.cpp

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,31 @@
33
#include <string>
44
#include <vector>
55

6+
#include <File_generated.h>
67
#include <nlohmann/json.hpp>
78

8-
#include <sparrow/record_batch.hpp>
99
#include <sparrow/json_reader/json_parser.hpp>
10+
#include <sparrow/record_batch.hpp>
1011

1112
#include "doctest/doctest.h"
1213
#include "integration_tools.hpp"
1314
#include "sparrow_ipc/deserialize.hpp"
1415
#include "sparrow_ipc/stream_file_serializer.hpp"
1516

17+
// Helper function to extract and parse the footer from Arrow IPC file data
18+
const org::apache::arrow::flatbuf::Footer* get_footer_from_file_data(const std::vector<uint8_t>& file_data)
19+
{
20+
// Footer size is stored 4 bytes before the trailing magic
21+
const size_t footer_size_offset = file_data.size() - sparrow_ipc::arrow_file_magic_size - sizeof(int32_t);
22+
int32_t footer_size = 0;
23+
std::memcpy(&footer_size, file_data.data() + footer_size_offset, sizeof(int32_t));
24+
25+
// Footer data starts at footer_size_offset - footer_size
26+
const size_t footer_offset = footer_size_offset - footer_size;
27+
28+
return org::apache::arrow::flatbuf::GetFooter(file_data.data() + footer_offset);
29+
}
30+
1631
TEST_SUITE("Integration Tools Tests")
1732
{
1833
// Get paths to test data
@@ -29,15 +44,21 @@ TEST_SUITE("Integration Tools Tests")
2944
TEST_CASE("stream_to_file - Empty input")
3045
{
3146
std::vector<uint8_t> empty_data;
32-
CHECK_THROWS_AS(integration_tools::stream_to_file(std::span<const uint8_t>(empty_data)), std::runtime_error);
47+
CHECK_THROWS_AS(
48+
integration_tools::stream_to_file(std::span<const uint8_t>(empty_data)),
49+
std::runtime_error
50+
);
3351
}
3452

3553
TEST_CASE("validate_json_against_arrow_file - Non-existent JSON file")
3654
{
3755
const std::filesystem::path non_existent = "non_existent_file_12345.json";
3856
std::vector<uint8_t> dummy_stream = {1, 2, 3};
3957
CHECK_THROWS_AS(
40-
integration_tools::validate_json_against_arrow_file(non_existent, std::span<const uint8_t>(dummy_stream)),
58+
integration_tools::validate_json_against_arrow_file(
59+
non_existent,
60+
std::span<const uint8_t>(dummy_stream)
61+
),
4162
std::runtime_error
4263
);
4364
}
@@ -91,6 +112,21 @@ TEST_SUITE("Integration Tools Tests")
91112
// Verify the output is valid
92113
const auto batches = sparrow_ipc::deserialize_file(std::span<const uint8_t>(output_data));
93114
CHECK_GT(batches.size(), 0);
115+
116+
// Check footer
117+
const auto* footer = get_footer_from_file_data(output_data);
118+
REQUIRE(footer != nullptr);
119+
REQUIRE(footer->recordBatches() != nullptr);
120+
CHECK_EQ(footer->recordBatches()->size(), batches.size());
121+
122+
// Check alignment of record batch blocks
123+
for (size_t i = 0; i < footer->recordBatches()->size(); ++i)
124+
{
125+
const auto& block = *footer->recordBatches()->Get(static_cast<uint32_t>(i));
126+
CHECK_EQ(block.offset() % 8, 0);
127+
CHECK_EQ(block.bodyLength() % 8, 0);
128+
CHECK_EQ(block.metaDataLength() % 8, 0);
129+
}
94130
}
95131

96132
TEST_CASE("Round-trip: JSON -> stream -> file")
@@ -107,8 +143,10 @@ TEST_SUITE("Integration Tools Tests")
107143
const std::vector<uint8_t> stream_data = integration_tools::json_file_to_stream(json_file);
108144
REQUIRE_GT(stream_data.size(), 0);
109145

110-
// Step 2: stream -> file
111-
const std::vector<uint8_t> file_data = integration_tools::stream_to_file(std::span<const uint8_t>(stream_data));
146+
// Step 2: stream -> file
147+
const std::vector<uint8_t> file_data = integration_tools::stream_to_file(
148+
std::span<const uint8_t>(stream_data)
149+
);
112150
REQUIRE_GT(file_data.size(), 0);
113151

114152
// Step 3: Compare the results - both should deserialize to same data
@@ -250,7 +288,9 @@ TEST_SUITE("Integration Tools Tests")
250288
SUBCASE(filename.c_str())
251289
{
252290
// Convert to stream
253-
const std::vector<uint8_t> arrow_file_data = integration_tools::json_file_to_arrow_file(json_file);
291+
const std::vector<uint8_t> arrow_file_data = integration_tools::json_file_to_arrow_file(
292+
json_file
293+
);
254294
REQUIRE_GT(arrow_file_data.size(), 0);
255295

256296
// Validate

0 commit comments

Comments
 (0)