|
16 | 16 | #include "doctest/doctest.h" |
17 | 17 | #include "integration_tools.hpp" |
18 | 18 | #include "sparrow_ipc/deserialize.hpp" |
| 19 | +#include "sparrow_ipc/memory_output_stream.hpp" |
| 20 | +#include "sparrow_ipc/serializer.hpp" |
19 | 21 | #include "sparrow_ipc/stream_file_serializer.hpp" |
20 | 22 |
|
21 | 23 | // Helper function to extract and parse the footer from Arrow IPC file data |
@@ -164,6 +166,66 @@ TEST_SUITE("Integration Tools Tests") |
164 | 166 | } |
165 | 167 | } |
166 | 168 |
|
| 169 | + TEST_CASE("Round-trip: JSON -> Arrow file -> Arrow stream with record batch count verification") |
| 170 | + { |
| 171 | + const std::filesystem::path json_file = tests_resources_files_path / "generated_primitive.json"; |
| 172 | + |
| 173 | + if (!std::filesystem::exists(json_file)) |
| 174 | + { |
| 175 | + MESSAGE("Skipping test: test file not found at " << json_file); |
| 176 | + return; |
| 177 | + } |
| 178 | + |
| 179 | + // Load and parse the JSON file to get expected batch count |
| 180 | + std::ifstream json_input(json_file); |
| 181 | + REQUIRE(json_input.is_open()); |
| 182 | + const nlohmann::json json_data = nlohmann::json::parse(json_input); |
| 183 | + json_input.close(); |
| 184 | + |
| 185 | + REQUIRE(json_data.contains("batches")); |
| 186 | + const size_t expected_batch_count = json_data["batches"].size(); |
| 187 | + REQUIRE_GT(expected_batch_count, 0); |
| 188 | + |
| 189 | + // Step 1: JSON -> Arrow file |
| 190 | + const std::vector<uint8_t> arrow_file_data = integration_tools::json_file_to_arrow_file(json_file); |
| 191 | + REQUIRE_GT(arrow_file_data.size(), 0); |
| 192 | + |
| 193 | + // Verify record batch count in Arrow file footer |
| 194 | + const auto* footer = get_footer_from_file_data(arrow_file_data); |
| 195 | + REQUIRE(footer != nullptr); |
| 196 | + REQUIRE(footer->recordBatches() != nullptr); |
| 197 | + CHECK_EQ(footer->recordBatches()->size(), expected_batch_count); |
| 198 | + |
| 199 | + // Step 2: Deserialize Arrow file |
| 200 | + const auto file_batches = sparrow_ipc::deserialize_file(std::span<const uint8_t>(arrow_file_data)); |
| 201 | + CHECK_EQ(file_batches.size(), expected_batch_count); |
| 202 | + |
| 203 | + // Step 3: Arrow file -> Arrow stream (re-serialize deserialized batches) |
| 204 | + std::vector<uint8_t> stream_data; |
| 205 | + sparrow_ipc::memory_output_stream mem_stream(stream_data); |
| 206 | + sparrow_ipc::serializer serializer(mem_stream); |
| 207 | + serializer << file_batches << sparrow_ipc::end_stream; |
| 208 | + REQUIRE_GT(stream_data.size(), 0); |
| 209 | + |
| 210 | + // Step 4: Deserialize Arrow stream and verify record batch count |
| 211 | + const auto stream_batches = sparrow_ipc::deserialize_stream(std::span<const uint8_t>(stream_data)); |
| 212 | + CHECK_EQ(stream_batches.size(), expected_batch_count); |
| 213 | + |
| 214 | + // Step 5: Compare the results - all batches should match |
| 215 | + REQUIRE_EQ(file_batches.size(), stream_batches.size()); |
| 216 | + for (size_t i = 0; i < file_batches.size(); ++i) |
| 217 | + { |
| 218 | + CHECK(integration_tools::compare_record_batch(file_batches[i], stream_batches[i], i, false)); |
| 219 | + } |
| 220 | + |
| 221 | + // Output summary |
| 222 | + MESSAGE("JSON -> Arrow file -> Arrow stream round-trip successful:"); |
| 223 | + MESSAGE(" Expected batch count: " << expected_batch_count); |
| 224 | + MESSAGE(" Arrow file footer batch count: " << footer->recordBatches()->size()); |
| 225 | + MESSAGE(" Deserialized file batches: " << file_batches.size()); |
| 226 | + MESSAGE(" Deserialized stream batches: " << stream_batches.size()); |
| 227 | + } |
| 228 | + |
167 | 229 | TEST_CASE("validate_json_against_arrow_file - Successful validation") |
168 | 230 | { |
169 | 231 | const std::filesystem::path json_file = tests_resources_files_path / "generated_primitive.json"; |
|
0 commit comments