33#include < string>
44#include < vector>
55
6+ #include < File_generated.h>
67#include < nlohmann/json.hpp>
78
8- #include < sparrow/record_batch.hpp>
99#include < sparrow/json_reader/json_parser.hpp>
10+ #include < sparrow/record_batch.hpp>
1011
1112#include " doctest/doctest.h"
1213#include " integration_tools.hpp"
1314#include " sparrow_ipc/deserialize.hpp"
1415#include " sparrow_ipc/stream_file_serializer.hpp"
1516
17+ // Helper function to extract and parse the footer from Arrow IPC file data
18+ const org::apache::arrow::flatbuf::Footer* get_footer_from_file_data (const std::vector<uint8_t >& file_data)
19+ {
20+ // Footer size is stored 4 bytes before the trailing magic
21+ const size_t footer_size_offset = file_data.size () - sparrow_ipc::arrow_file_magic_size - sizeof (int32_t );
22+ int32_t footer_size = 0 ;
23+ std::memcpy (&footer_size, file_data.data () + footer_size_offset, sizeof (int32_t ));
24+
25+ // Footer data starts at footer_size_offset - footer_size
26+ const size_t footer_offset = footer_size_offset - footer_size;
27+
28+ return org::apache::arrow::flatbuf::GetFooter (file_data.data () + footer_offset);
29+ }
30+
1631TEST_SUITE (" Integration Tools Tests" )
1732{
1833 // Get paths to test data
@@ -29,15 +44,21 @@ TEST_SUITE("Integration Tools Tests")
2944 TEST_CASE (" stream_to_file - Empty input" )
3045 {
3146 std::vector<uint8_t > empty_data;
32- CHECK_THROWS_AS (integration_tools::stream_to_file (std::span<const uint8_t >(empty_data)), std::runtime_error);
47+ CHECK_THROWS_AS (
48+ integration_tools::stream_to_file (std::span<const uint8_t >(empty_data)),
49+ std::runtime_error
50+ );
3351 }
3452
3553 TEST_CASE (" validate_json_against_arrow_file - Non-existent JSON file" )
3654 {
3755 const std::filesystem::path non_existent = " non_existent_file_12345.json" ;
3856 std::vector<uint8_t > dummy_stream = {1 , 2 , 3 };
3957 CHECK_THROWS_AS (
40- integration_tools::validate_json_against_arrow_file (non_existent, std::span<const uint8_t >(dummy_stream)),
58+ integration_tools::validate_json_against_arrow_file (
59+ non_existent,
60+ std::span<const uint8_t >(dummy_stream)
61+ ),
4162 std::runtime_error
4263 );
4364 }
@@ -91,6 +112,21 @@ TEST_SUITE("Integration Tools Tests")
91112 // Verify the output is valid
92113 const auto batches = sparrow_ipc::deserialize_file (std::span<const uint8_t >(output_data));
93114 CHECK_GT (batches.size (), 0 );
115+
116+ // Check footer
117+ const auto * footer = get_footer_from_file_data (output_data);
118+ REQUIRE (footer != nullptr );
119+ REQUIRE (footer->recordBatches () != nullptr );
120+ CHECK_EQ (footer->recordBatches ()->size (), batches.size ());
121+
122+ // Check alignment of record batch blocks
123+ for (size_t i = 0 ; i < footer->recordBatches ()->size (); ++i)
124+ {
125+ const auto & block = *footer->recordBatches ()->Get (static_cast <uint32_t >(i));
126+ CHECK_EQ (block.offset () % 8 , 0 );
127+ CHECK_EQ (block.bodyLength () % 8 , 0 );
128+ CHECK_EQ (block.metaDataLength () % 8 , 0 );
129+ }
94130 }
95131
96132 TEST_CASE (" Round-trip: JSON -> stream -> file" )
@@ -107,8 +143,10 @@ TEST_SUITE("Integration Tools Tests")
107143 const std::vector<uint8_t > stream_data = integration_tools::json_file_to_stream (json_file);
108144 REQUIRE_GT (stream_data.size (), 0 );
109145
110- // Step 2: stream -> file
111- const std::vector<uint8_t > file_data = integration_tools::stream_to_file (std::span<const uint8_t >(stream_data));
146+ // Step 2: stream -> file
147+ const std::vector<uint8_t > file_data = integration_tools::stream_to_file (
148+ std::span<const uint8_t >(stream_data)
149+ );
112150 REQUIRE_GT (file_data.size (), 0 );
113151
114152 // Step 3: Compare the results - both should deserialize to same data
@@ -250,7 +288,9 @@ TEST_SUITE("Integration Tools Tests")
250288 SUBCASE (filename.c_str ())
251289 {
252290 // Convert to stream
253- const std::vector<uint8_t > arrow_file_data = integration_tools::json_file_to_arrow_file (json_file);
291+ const std::vector<uint8_t > arrow_file_data = integration_tools::json_file_to_arrow_file (
292+ json_file
293+ );
254294 REQUIRE_GT (arrow_file_data.size (), 0 );
255295
256296 // Validate
0 commit comments