77#include < nlohmann/json.hpp>
88
99#include < sparrow/record_batch.hpp>
10+ #include < sparrow/utils/format.hpp>
1011
1112#include " sparrow/json_reader/json_parser.hpp"
1213
@@ -22,9 +23,9 @@ const std::filesystem::path tests_resources_files_path = arrow_testing_data_dir
2223
2324const std::vector<std::filesystem::path> files_paths_to_test = {
2425 tests_resources_files_path / " generated_primitive" ,
25- // tests_resources_files_path / "generated_primitive_large_offsets",
26+ tests_resources_files_path / " generated_primitive_large_offsets" ,
2627 tests_resources_files_path / " generated_primitive_zerolength" ,
27- tests_resources_files_path / " generated_primitive_no_batches"
28+ // tests_resources_files_path / "generated_primitive_no_batches"
2829};
2930
3031size_t get_number_of_batches (const std::filesystem::path& json_path)
@@ -48,6 +49,33 @@ nlohmann::json load_json_file(const std::filesystem::path& json_path)
4849 return nlohmann::json::parse (json_file);
4950}
5051
52+ void compare_record_batches (
53+ const std::vector<sparrow::record_batch>& record_batches_1,
54+ const std::vector<sparrow::record_batch>& record_batches_2
55+ )
56+ {
57+ REQUIRE_EQ (record_batches_1.size (), record_batches_2.size ());
58+ for (size_t i = 0 ; i < record_batches_1.size (); ++i)
59+ {
60+ for (size_t y = 0 ; y < record_batches_1[i].nb_columns (); y++)
61+ {
62+ const auto & column_1 = record_batches_1[i].get_column (y);
63+ const auto & column_2 = record_batches_2[i].get_column (y);
64+ REQUIRE_EQ (column_1.size (), column_2.size ());
65+ for (size_t z = 0 ; z < column_1.size (); z++)
66+ {
67+ const auto col_name = column_1.name ().value_or (" NA" );
68+ INFO (" Comparing batch " << i << " , column " << y << " named :" << col_name << " , row " << z);
69+ REQUIRE_EQ (column_1.data_type (), column_2.data_type ());
70+ CHECK_EQ (column_1.name (), column_2.name ());
71+ const auto & column_1_value = column_1[z];
72+ const auto & column_2_value = column_2[z];
73+ CHECK_EQ (column_1_value, column_2_value);
74+ }
75+ }
76+ }
77+ }
78+
5179TEST_SUITE (" Integration tests" )
5280{
5381 TEST_CASE (" Compare stream deserialization with JSON deserialization" )
@@ -90,29 +118,7 @@ TEST_SUITE("Integration tests")
90118 const auto record_batches_from_stream = sparrow_ipc::deserialize_stream (
91119 std::span<const uint8_t >(stream_data)
92120 );
93-
94- // Compare record batches
95- REQUIRE_EQ (record_batches_from_stream.size (), record_batches_from_json.size ());
96- for (size_t i = 0 ; i < record_batches_from_stream.size (); ++i)
97- {
98- for (size_t y = 0 ; y < record_batches_from_stream[i].nb_columns (); y++)
99- {
100- const auto & column_stream = record_batches_from_stream[i].get_column (y);
101- const auto & column_json = record_batches_from_json[i].get_column (y);
102- REQUIRE_EQ (column_stream.size (), column_json.size ());
103- for (size_t z = 0 ; z < column_json.size (); z++)
104- {
105- const auto col_name = column_stream.name ().value_or (" NA" );
106- INFO (
107- " Comparing batch " << i << " , column " << y << " named :" << col_name
108- << " , row " << z
109- );
110- const auto & column_stream_value = column_stream[z];
111- const auto & column_json_value = column_json[z];
112- CHECK_EQ (column_stream_value, column_json_value);
113- }
114- }
115- }
121+ compare_record_batches (record_batches_from_json, record_batches_from_stream);
116122 }
117123 }
118124 }
@@ -131,9 +137,7 @@ TEST_SUITE("Integration tests")
131137 CHECK (json_data != nullptr );
132138
133139 const size_t num_batches = get_number_of_batches (json_path);
134-
135140 std::vector<sparrow::record_batch> record_batches_from_json;
136-
137141 for (size_t batch_idx = 0 ; batch_idx < num_batches; ++batch_idx)
138142 {
139143 INFO (" Processing batch " << batch_idx << " of " << num_batches);
@@ -153,17 +157,16 @@ TEST_SUITE("Integration tests")
153157 );
154158 stream_file.close ();
155159
156- // Serialize the record batches from JSON
157- const auto serialized_data = sparrow_ipc::serialize (record_batches_from_json);
160+ // Process the stream file
161+ const auto record_batches_from_stream = sparrow_ipc::deserialize_stream (
162+ std::span<const uint8_t >(stream_data)
163+ );
158164
159- // Compare the serialized data with the original stream data
160- // CHECK_EQ(serialized_data, stream_data);
161- // REQUIRE_EQ(serialized_data.size(), stream_data.size());
162- for (size_t i = 0 ; i < std::min (serialized_data.size (), stream_data.size ()); ++i)
163- {
164- INFO (" Comparing byte " << i << " of " << serialized_data.size ());
165- CHECK_EQ (serialized_data[i], stream_data[i]);
166- }
165+ const auto serialized_data = sparrow_ipc::serialize (record_batches_from_json);
166+ const auto deserialized_serialized_data = sparrow_ipc::deserialize_stream (
167+ std::span<const uint8_t >(serialized_data)
168+ );
169+ compare_record_batches (record_batches_from_stream, deserialized_serialized_data);
167170 }
168171 }
169172 }
0 commit comments