File tree Expand file tree Collapse file tree 4 files changed +32
-8
lines changed
Expand file tree Collapse file tree 4 files changed +32
-8
lines changed Original file line number Diff line number Diff line change @@ -35,7 +35,7 @@ int main(int argc, char* argv[])
3535 try
3636 {
3737 // Convert JSON file to stream using the library
38- std::vector<uint8_t > stream_data = integration_tools::json_file_to_stream (json_path);
38+ std::vector<uint8_t > stream_data = integration_tools::json_file_to_arrow_file (json_path);
3939
4040 // Write the binary stream to the output file
4141 std::ofstream output_file (output_path, std::ios::out | std::ios::binary);
Original file line number Diff line number Diff line change @@ -26,7 +26,7 @@ int main(int argc, char* argv[])
2626 if (argc != 3 )
2727 {
2828 std::cerr << " Usage: " << argv[0 ] << " <input_file_path> <output_file_path>\n " ;
29- std::cerr << " Reads an Arrow IPC stream from a file and writes it to another file.\n " ;
29+ std::cerr << " Reads an Arrow IPC stream from a file and writes it to an Arrow file.\n " ;
3030 return EXIT_FAILURE;
3131 }
3232
Original file line number Diff line number Diff line change 55#include < vector>
66
77#include < nlohmann/json.hpp>
8+
89#include < sparrow/record_batch.hpp>
910
1011namespace integration_tools
1112{
13+ /* *
14+ * @brief Converts a JSON file to Arrow IPC file format.
15+ *
16+ * Reads a JSON file from the specified path and converts its contents into
17+ * Apache Arrow IPC (Inter-Process Communication) file format, returning the
18+ * serialized Arrow data as a byte vector.
19+ *
20+ * @param json_path The filesystem path to the input JSON file to be converted.
21+ * @return std::vector<uint8_t> A byte vector containing the Arrow IPC file data.
22+ *
23+ * @throws std::filesystem::filesystem_error If the file cannot be accessed or read.
24+ * @throws std::runtime_error If the JSON parsing or Arrow conversion fails.
25+ */
26+ std::vector<uint8_t > json_file_to_arrow_file (const std::filesystem::path& json_path);
27+
1228 /* *
1329 * @brief Reads a JSON file and converts it to Arrow IPC stream format.
1430 *
@@ -44,10 +60,8 @@ namespace integration_tools
4460 * @return true if the data matches, false otherwise
4561 * @throws std::runtime_error on parsing or deserialization errors
4662 */
47- bool validate_json_against_stream (
48- const std::filesystem::path& json_path,
49- std::span<const uint8_t > stream_data
50- );
63+ bool
64+ validate_json_against_stream (const std::filesystem::path& json_path, std::span<const uint8_t > stream_data);
5165
5266 /* *
5367 * @brief Compares two record batches for equality.
Original file line number Diff line number Diff line change 44#include < iostream>
55#include < iterator>
66#include < sstream>
7+ #include " sparrow_ipc/stream_file_serializer.hpp"
78
89#if defined(__cpp_lib_format)
910# include < format>
1718
1819namespace integration_tools
1920{
21+ std::vector<uint8_t > json_file_to_arrow_file (const std::filesystem::path& json_path)
22+ {
23+ // Convert JSON file to stream first
24+ std::vector<uint8_t > stream_data = json_file_to_stream (json_path);
25+
26+ // Then convert stream to file format
27+ return stream_to_file (std::span<const uint8_t >(stream_data));
28+ }
29+
2030 std::vector<uint8_t > json_file_to_stream (const std::filesystem::path& json_path)
2131 {
2232 // Check if the JSON file exists
@@ -106,8 +116,8 @@ namespace integration_tools
106116 // Re-serialize the record batches to ensure a valid output stream
107117 std::vector<uint8_t > output_stream_data;
108118 sparrow_ipc::memory_output_stream stream (output_stream_data);
109- sparrow_ipc::serializer serializer (stream);
110- serializer << record_batches << sparrow_ipc::end_stream ;
119+ sparrow_ipc::stream_file_serializer serializer (stream);
120+ serializer << record_batches << sparrow_ipc::end_file ;
111121
112122 return output_stream_data;
113123 }
You can’t perform that action at this time.
0 commit comments