Skip to content

Commit 5849d6a

Browse files
committed
Add tests
1 parent 9a3a39f commit 5849d6a

File tree

1 file changed

+62
-0
lines changed

1 file changed

+62
-0
lines changed

integration_tests/test_integration_tools.cpp

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
#include "doctest/doctest.h"
1717
#include "integration_tools.hpp"
1818
#include "sparrow_ipc/deserialize.hpp"
19+
#include "sparrow_ipc/memory_output_stream.hpp"
20+
#include "sparrow_ipc/serializer.hpp"
1921
#include "sparrow_ipc/stream_file_serializer.hpp"
2022

2123
// Helper function to extract and parse the footer from Arrow IPC file data
@@ -164,6 +166,66 @@ TEST_SUITE("Integration Tools Tests")
164166
}
165167
}
166168

169+
TEST_CASE("Round-trip: JSON -> Arrow file -> Arrow stream with record batch count verification")
170+
{
171+
const std::filesystem::path json_file = tests_resources_files_path / "generated_primitive.json";
172+
173+
if (!std::filesystem::exists(json_file))
174+
{
175+
MESSAGE("Skipping test: test file not found at " << json_file);
176+
return;
177+
}
178+
179+
// Load and parse the JSON file to get expected batch count
180+
std::ifstream json_input(json_file);
181+
REQUIRE(json_input.is_open());
182+
const nlohmann::json json_data = nlohmann::json::parse(json_input);
183+
json_input.close();
184+
185+
REQUIRE(json_data.contains("batches"));
186+
const size_t expected_batch_count = json_data["batches"].size();
187+
REQUIRE_GT(expected_batch_count, 0);
188+
189+
// Step 1: JSON -> Arrow file
190+
const std::vector<uint8_t> arrow_file_data = integration_tools::json_file_to_arrow_file(json_file);
191+
REQUIRE_GT(arrow_file_data.size(), 0);
192+
193+
// Verify record batch count in Arrow file footer
194+
const auto* footer = get_footer_from_file_data(arrow_file_data);
195+
REQUIRE(footer != nullptr);
196+
REQUIRE(footer->recordBatches() != nullptr);
197+
CHECK_EQ(footer->recordBatches()->size(), expected_batch_count);
198+
199+
// Step 2: Deserialize Arrow file
200+
const auto file_batches = sparrow_ipc::deserialize_file(std::span<const uint8_t>(arrow_file_data));
201+
CHECK_EQ(file_batches.size(), expected_batch_count);
202+
203+
// Step 3: Arrow file -> Arrow stream (re-serialize deserialized batches)
204+
std::vector<uint8_t> stream_data;
205+
sparrow_ipc::memory_output_stream mem_stream(stream_data);
206+
sparrow_ipc::serializer serializer(mem_stream);
207+
serializer << file_batches << sparrow_ipc::end_stream;
208+
REQUIRE_GT(stream_data.size(), 0);
209+
210+
// Step 4: Deserialize Arrow stream and verify record batch count
211+
const auto stream_batches = sparrow_ipc::deserialize_stream(std::span<const uint8_t>(stream_data));
212+
CHECK_EQ(stream_batches.size(), expected_batch_count);
213+
214+
// Step 5: Compare the results - all batches should match
215+
REQUIRE_EQ(file_batches.size(), stream_batches.size());
216+
for (size_t i = 0; i < file_batches.size(); ++i)
217+
{
218+
CHECK(integration_tools::compare_record_batch(file_batches[i], stream_batches[i], i, false));
219+
}
220+
221+
// Output summary
222+
MESSAGE("JSON -> Arrow file -> Arrow stream round-trip successful:");
223+
MESSAGE(" Expected batch count: " << expected_batch_count);
224+
MESSAGE(" Arrow file footer batch count: " << footer->recordBatches()->size());
225+
MESSAGE(" Deserialized file batches: " << file_batches.size());
226+
MESSAGE(" Deserialized stream batches: " << stream_batches.size());
227+
}
228+
167229
TEST_CASE("validate_json_against_arrow_file - Successful validation")
168230
{
169231
const std::filesystem::path json_file = tests_resources_files_path / "generated_primitive.json";

0 commit comments

Comments
 (0)