1111#include " doctest/doctest.h"
1212#include " integration_tools.hpp"
1313#include " sparrow_ipc/deserialize.hpp"
14+ #include " sparrow_ipc/stream_file_serializer.hpp"
1415
1516TEST_SUITE (" Integration Tools Tests" )
1617{
@@ -41,6 +42,49 @@ TEST_SUITE("Integration Tools Tests")
4142 );
4243 }
4344
45+ TEST_CASE (" json_file_to_arrow_file - Convert JSON to Arrow file format" )
46+ {
47+ // Test with generated_primitive.json
48+ const std::filesystem::path json_file = tests_resources_files_path / " generated_primitive.json" ;
49+
50+ if (!std::filesystem::exists (json_file))
51+ {
52+ MESSAGE (" Skipping test: test file not found at " << json_file);
53+ return ;
54+ }
55+
56+ // Convert JSON to Arrow file format
57+ std::vector<uint8_t > arrow_file_data;
58+ CHECK_NOTHROW (arrow_file_data = integration_tools::json_file_to_arrow_file (json_file));
59+ CHECK_GT (arrow_file_data.size (), 0 );
60+
61+ // Verify the output is a valid Arrow file by deserializing it
62+ auto batches = sparrow_ipc::deserialize_file (std::span<const uint8_t >(arrow_file_data));
63+ REQUIRE_EQ (batches.size (), 2 ); // generated_primitive.json has 2 batches
64+
65+ // Verify first batch has 17 rows (from JSON)
66+ CHECK_EQ (batches[0 ].nb_rows (), 17 );
67+ // Verify second batch has 20 rows (from JSON)
68+ CHECK_EQ (batches[1 ].nb_rows (), 20 );
69+
70+ // Verify all 22 columns are present (from the JSON schema)
71+ CHECK_EQ (batches[0 ].nb_columns (), 22 );
72+ CHECK_EQ (batches[1 ].nb_columns (), 22 );
73+
74+ // Validate the Arrow file content matches the original JSON
75+ std::ifstream json_input (json_file);
76+ REQUIRE (json_input.is_open ());
77+ nlohmann::json json_data = nlohmann::json::parse (json_input);
78+ json_input.close ();
79+
80+ // Build record batches from JSON and compare with deserialized ones
81+ for (size_t i = 0 ; i < batches.size (); ++i)
82+ {
83+ auto expected_batch = sparrow::json_reader::build_record_batch_from_json (json_data, i);
84+ CHECK (integration_tools::compare_record_batch (expected_batch, batches[i], i, false ));
85+ }
86+ }
87+
4488 TEST_CASE (" json_file_to_stream - Convert JSON to stream" )
4589 {
4690 // Test with a known good JSON file
@@ -87,8 +131,8 @@ TEST_SUITE("Integration Tools Tests")
87131 CHECK_NOTHROW (output_data = integration_tools::stream_to_file (std::span<const uint8_t >(input_data)));
88132 CHECK_GT (output_data.size (), 0 );
89133
90- // Verify the output is valid
91- auto batches = sparrow_ipc::deserialize_stream (std::span<const uint8_t >(output_data));
134+ // Verify the output is valid Arrow file format
135+ auto batches = sparrow_ipc::deserialize_file (std::span<const uint8_t >(output_data));
92136 CHECK_GT (batches.size (), 0 );
93137 }
94138
@@ -112,7 +156,7 @@ TEST_SUITE("Integration Tools Tests")
112156
113157 // Step 3: Compare the results - both should deserialize to same data
114158 auto stream_batches = sparrow_ipc::deserialize_stream (std::span<const uint8_t >(stream_data));
115- auto file_batches = sparrow_ipc::deserialize_stream (std::span<const uint8_t >(file_data));
159+ auto file_batches = sparrow_ipc::deserialize_file (std::span<const uint8_t >(file_data));
116160
117161 REQUIRE_EQ (stream_batches.size (), file_batches.size ());
118162 for (size_t i = 0 ; i < stream_batches.size (); ++i)
0 commit comments