Skip to content

Commit 713198e

Browse files
committed
wip
1 parent 8d69bb0 commit 713198e

File tree

1 file changed

+34
-9
lines changed

1 file changed

+34
-9
lines changed

integration_tests/arrow_file_to_stream.cpp

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,19 @@
44
#include <iostream>
55
#include <vector>
66

7-
#include "integration_tools.hpp"
7+
#include <sparrow_ipc/deserialize.hpp>
8+
#include <sparrow_ipc/memory_output_stream.hpp>
9+
#include <sparrow_ipc/serializer.hpp>
10+
#include "sparrow_ipc/stream_file_serializer.hpp"
811

912
/**
10-
* @brief Reads a JSON file containing record batches and outputs the serialized Arrow IPC stream to a file.
13+
* @brief Reads an Arrow IPC file and outputs the serialized Arrow IPC stream to a file.
1114
*
12-
* This program takes a JSON file path and an output file path as command-line arguments,
13-
* parses the record batches from the JSON data, serializes them into Arrow IPC stream format,
15+
* This program takes an Arrow IPC file path and an output file path as command-line arguments,
16+
* deserializes the record batches from the Arrow file, serializes them into Arrow IPC stream format,
1417
* and writes the binary stream to the specified output file.
1518
*
16-
* Usage: arrow_file_to_stream <json_file_path> <output_arrow_file>
19+
* Usage: arrow_file_to_stream <arrow_file_path> <output_stream_file>
1720
*
1821
* @param argc Number of command-line arguments
1922
* @param argv Array of command-line arguments
@@ -23,18 +26,40 @@ int main(int argc, char* argv[])
2326
{
2427
if (argc != 3)
2528
{
26-
std::cerr << "Usage: " << argv[0] << " <json_file_path> <output_arrow_file>\n";
27-
std::cerr << "Reads a JSON file and outputs the serialized Arrow IPC stream to a file.\n";
29+
std::cerr << "Usage: " << argv[0] << " <arrow_file_path> <output_stream_file>\n";
30+
std::cerr << "Reads an Arrow IPC file and outputs the serialized Arrow IPC stream to a file.\n";
2831
return EXIT_FAILURE;
2932
}
3033

31-
const std::filesystem::path json_path(argv[1]);
34+
const std::filesystem::path input_path(argv[1]);
3235
const std::filesystem::path output_path(argv[2]);
3336

3437
try
3538
{
36-
const std::vector<uint8_t> stream_data = integration_tools::json_file_to_stream(json_path);
39+
// Read the Arrow file
40+
std::ifstream input_file(input_path, std::ios::binary);
41+
if (!input_file)
42+
{
43+
std::cerr << "Error: Could not open input file: " << input_path << "\n";
44+
return EXIT_FAILURE;
45+
}
46+
47+
const std::vector<uint8_t> file_data(
48+
(std::istreambuf_iterator<char>(input_file)),
49+
(std::istreambuf_iterator<char>())
50+
);
51+
input_file.close();
52+
53+
// Deserialize record batches from Arrow file format
54+
auto batches = sparrow_ipc::deserialize_file(file_data);
55+
56+
// Serialize to Arrow stream format
57+
std::vector<uint8_t> stream_data;
58+
sparrow_ipc::memory_output_stream mem_stream(stream_data);
59+
sparrow_ipc::serializer serializer(mem_stream);
60+
serializer << batches << sparrow_ipc::end_stream;
3761

62+
// Write to output file
3863
std::ofstream output_file(output_path, std::ios::binary);
3964
if (!output_file)
4065
{

0 commit comments

Comments
 (0)