44#include < iostream>
55#include < vector>
66
7- #include " integration_tools.hpp"
7+ #include < sparrow_ipc/deserialize.hpp>
8+ #include < sparrow_ipc/memory_output_stream.hpp>
9+ #include < sparrow_ipc/serializer.hpp>
10+ #include " sparrow_ipc/stream_file_serializer.hpp"
811
912/* *
10- * @brief Reads a JSON file containing record batches and outputs the serialized Arrow IPC stream to a file.
13+ * @brief Reads an Arrow IPC file and outputs the serialized Arrow IPC stream to a file.
1114 *
12- * This program takes a JSON file path and an output file path as command-line arguments,
13- * parses the record batches from the JSON data , serializes them into Arrow IPC stream format,
15+ * This program takes an Arrow IPC file path and an output file path as command-line arguments,
16+ * deserializes the record batches from the Arrow file , serializes them into Arrow IPC stream format,
1417 * and writes the binary stream to the specified output file.
1518 *
16- * Usage: arrow_file_to_stream <json_file_path > <output_arrow_file >
19+ * Usage: arrow_file_to_stream <arrow_file_path > <output_stream_file >
1720 *
1821 * @param argc Number of command-line arguments
1922 * @param argv Array of command-line arguments
@@ -23,18 +26,40 @@ int main(int argc, char* argv[])
2326{
2427 if (argc != 3 )
2528 {
26- std::cerr << " Usage: " << argv[0 ] << " <json_file_path > <output_arrow_file >\n " ;
27- std::cerr << " Reads a JSON file and outputs the serialized Arrow IPC stream to a file.\n " ;
29+ std::cerr << " Usage: " << argv[0 ] << " <arrow_file_path > <output_stream_file >\n " ;
30+ std::cerr << " Reads an Arrow IPC file and outputs the serialized Arrow IPC stream to a file.\n " ;
2831 return EXIT_FAILURE;
2932 }
3033
31- const std::filesystem::path json_path (argv[1 ]);
34+ const std::filesystem::path input_path (argv[1 ]);
3235 const std::filesystem::path output_path (argv[2 ]);
3336
3437 try
3538 {
36- const std::vector<uint8_t > stream_data = integration_tools::json_file_to_stream (json_path);
39+ // Read the Arrow file
40+ std::ifstream input_file (input_path, std::ios::binary);
41+ if (!input_file)
42+ {
43+ std::cerr << " Error: Could not open input file: " << input_path << " \n " ;
44+ return EXIT_FAILURE;
45+ }
46+
47+ const std::vector<uint8_t > file_data (
48+ (std::istreambuf_iterator<char >(input_file)),
49+ (std::istreambuf_iterator<char >())
50+ );
51+ input_file.close ();
52+
53+ // Deserialize record batches from Arrow file format
54+ auto batches = sparrow_ipc::deserialize_file (file_data);
55+
56+ // Serialize to Arrow stream format
57+ std::vector<uint8_t > stream_data;
58+ sparrow_ipc::memory_output_stream mem_stream (stream_data);
59+ sparrow_ipc::serializer serializer (mem_stream);
60+ serializer << batches << sparrow_ipc::end_stream;
3761
62+ // Write to output file
3863 std::ofstream output_file (output_path, std::ios::binary);
3964 if (!output_file)
4065 {
0 commit comments