@@ -27,16 +27,19 @@ const std::filesystem::path tests_resources_files_path_with_compression = arrow_
2727
2828const std::vector<std::filesystem::path> files_paths_to_test = {
2929 tests_resources_files_path / " generated_primitive" ,
30- // tests_resources_files_path / "generated_primitive_large_offsets",
3130 tests_resources_files_path / " generated_primitive_zerolength" ,
32- // tests_resources_files_path / "generated_primitive_no_batches"
31+ tests_resources_files_path / " generated_primitive_no_batches" ,
32+ tests_resources_files_path / " generated_binary" ,
33+ tests_resources_files_path / " generated_large_binary" ,
34+ tests_resources_files_path / " generated_binary_zerolength" ,
35+ tests_resources_files_path / " generated_binary_no_batches" ,
3336};
3437
3538const std::vector<std::filesystem::path> files_paths_to_test_with_compression = {
3639 tests_resources_files_path_with_compression / " generated_lz4" ,
37- tests_resources_files_path_with_compression/ " generated_uncompressible_lz4"
38- // tests_resources_files_path_with_compression / "generated_zstd"
39- // tests_resources_files_path_with_compression/ "generated_uncompressible_zstd"
40+ tests_resources_files_path_with_compression/ " generated_uncompressible_lz4" ,
41+ // tests_resources_files_path_with_compression / "generated_zstd",
42+ // tests_resources_files_path_with_compression/ "generated_uncompressible_zstd",
4043};
4144
4245
@@ -236,4 +239,57 @@ TEST_SUITE("Integration tests")
236239 }
237240 }
238241 }
242+
243+ TEST_CASE (" Round trip of classic test files serialization/deserialization using LZ4 compression" )
244+ {
245+ for (const auto & file_path : files_paths_to_test)
246+ {
247+ std::filesystem::path json_path = file_path;
248+ json_path.replace_extension (" .json" );
249+
250+ // Load the JSON file
251+ auto json_data = load_json_file (json_path);
252+ CHECK (json_data != nullptr );
253+
254+ const size_t num_batches = get_number_of_batches (json_path);
255+ std::vector<sparrow::record_batch> record_batches_from_json;
256+ for (size_t batch_idx = 0 ; batch_idx < num_batches; ++batch_idx)
257+ {
258+ INFO (" Processing batch " << batch_idx << " of " << num_batches);
259+ record_batches_from_json.emplace_back (
260+ sparrow::json_reader::build_record_batch_from_json (json_data, batch_idx)
261+ );
262+ }
263+
264+ // Load stream file
265+ std::filesystem::path stream_file_path = file_path;
266+ stream_file_path.replace_extension (" .stream" );
267+ std::ifstream stream_file (stream_file_path, std::ios::in | std::ios::binary);
268+ REQUIRE (stream_file.is_open ());
269+ const std::vector<uint8_t > stream_data (
270+ (std::istreambuf_iterator<char >(stream_file)),
271+ (std::istreambuf_iterator<char >())
272+ );
273+ stream_file.close ();
274+
275+ // Process the stream file
276+ const auto record_batches_from_stream = sparrow_ipc::deserialize_stream (
277+ std::span<const uint8_t >(stream_data)
278+ );
279+
280+ // Serialize from json with LZ4 compression
281+ std::vector<uint8_t > serialized_data;
282+ sparrow_ipc::memory_output_stream stream (serialized_data);
283+ sparrow_ipc::serializer serializer (stream, sparrow_ipc::CompressionType::LZ4_FRAME);
284+ serializer << record_batches_from_json << sparrow_ipc::end_stream;
285+
286+ // Deserialize
287+ const auto deserialized_serialized_data = sparrow_ipc::deserialize_stream (
288+ std::span<const uint8_t >(serialized_data)
289+ );
290+
291+ // Compare
292+ compare_record_batches (record_batches_from_stream, deserialized_serialized_data);
293+ }
294+ }
239295}
0 commit comments