Skip to content

Commit 3543a07

Browse files
committed
wip
1 parent c48a5c8 commit 3543a07

File tree

4 files changed

+32
-8
lines changed

4 files changed

+32
-8
lines changed

integration_tests/arrow_json_to_file.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ int main(int argc, char* argv[])
3535
try
3636
{
3737
// Convert JSON file to stream using the library
38-
std::vector<uint8_t> stream_data = integration_tools::json_file_to_stream(json_path);
38+
std::vector<uint8_t> stream_data = integration_tools::json_file_to_arrow_file(json_path);
3939

4040
// Write the binary stream to the output file
4141
std::ofstream output_file(output_path, std::ios::out | std::ios::binary);

integration_tests/arrow_stream_to_file.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ int main(int argc, char* argv[])
2626
if (argc != 3)
2727
{
2828
std::cerr << "Usage: " << argv[0] << " <input_file_path> <output_file_path>\n";
29-
std::cerr << "Reads an Arrow IPC stream from a file and writes it to another file.\n";
29+
std::cerr << "Reads an Arrow IPC stream from a file and writes it to an Arrow file.\n";
3030
return EXIT_FAILURE;
3131
}
3232

integration_tests/include/integration_tools.hpp

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,26 @@
55
#include <vector>
66

77
#include <nlohmann/json.hpp>
8+
89
#include <sparrow/record_batch.hpp>
910

1011
namespace integration_tools
1112
{
13+
/**
14+
* @brief Converts a JSON file to Arrow IPC file format.
15+
*
16+
* Reads a JSON file from the specified path and converts its contents into
17+
* Apache Arrow IPC (Inter-Process Communication) file format, returning the
18+
* serialized Arrow data as a byte vector.
19+
*
20+
* @param json_path The filesystem path to the input JSON file to be converted.
21+
* @return std::vector<uint8_t> A byte vector containing the Arrow IPC file data.
22+
*
23+
* @throws std::filesystem::filesystem_error If the file cannot be accessed or read.
24+
* @throws std::runtime_error If the JSON parsing or Arrow conversion fails.
25+
*/
26+
std::vector<uint8_t> json_file_to_arrow_file(const std::filesystem::path& json_path);
27+
1228
/**
1329
* @brief Reads a JSON file and converts it to Arrow IPC stream format.
1430
*
@@ -44,10 +60,8 @@ namespace integration_tools
4460
* @return true if the data matches, false otherwise
4561
* @throws std::runtime_error on parsing or deserialization errors
4662
*/
47-
bool validate_json_against_stream(
48-
const std::filesystem::path& json_path,
49-
std::span<const uint8_t> stream_data
50-
);
63+
bool
64+
validate_json_against_stream(const std::filesystem::path& json_path, std::span<const uint8_t> stream_data);
5165

5266
/**
5367
* @brief Compares two record batches for equality.

integration_tests/src/integration_tools.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <iostream>
55
#include <iterator>
66
#include <sstream>
7+
#include "sparrow_ipc/stream_file_serializer.hpp"
78

89
#if defined(__cpp_lib_format)
910
# include <format>
@@ -17,6 +18,15 @@
1718

1819
namespace integration_tools
1920
{
21+
std::vector<uint8_t> json_file_to_arrow_file(const std::filesystem::path& json_path)
22+
{
23+
// Convert JSON file to stream first
24+
std::vector<uint8_t> stream_data = json_file_to_stream(json_path);
25+
26+
// Then convert stream to file format
27+
return stream_to_file(std::span<const uint8_t>(stream_data));
28+
}
29+
2030
std::vector<uint8_t> json_file_to_stream(const std::filesystem::path& json_path)
2131
{
2232
// Check if the JSON file exists
@@ -106,8 +116,8 @@ namespace integration_tools
106116
// Re-serialize the record batches to ensure a valid output stream
107117
std::vector<uint8_t> output_stream_data;
108118
sparrow_ipc::memory_output_stream stream(output_stream_data);
109-
sparrow_ipc::serializer serializer(stream);
110-
serializer << record_batches << sparrow_ipc::end_stream;
119+
sparrow_ipc::stream_file_serializer serializer(stream);
120+
serializer << record_batches << sparrow_ipc::end_file;
111121

112122
return output_stream_data;
113123
}

0 commit comments

Comments
 (0)