Skip to content

Commit 235d29b

Browse files
committed
wip
1 parent 98c7e1f commit 235d29b

File tree

2 files changed

+49
-10
lines changed

2 files changed

+49
-10
lines changed

integration_tests/src/integration_tools.cpp

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,22 +20,17 @@ namespace integration_tools
2020
{
2121
std::vector<uint8_t> json_file_to_arrow_file(const std::filesystem::path& json_path)
2222
{
23-
// Convert JSON file to stream first
24-
std::vector<uint8_t> stream_data = json_file_to_stream(json_path);
25-
26-
// Then convert stream to file format
23+
const std::vector<uint8_t> stream_data = json_file_to_stream(json_path);
2724
return stream_to_file(std::span<const uint8_t>(stream_data));
2825
}
2926

3027
std::vector<uint8_t> json_file_to_stream(const std::filesystem::path& json_path)
3128
{
32-
// Check if the JSON file exists
3329
if (!std::filesystem::exists(json_path))
3430
{
3531
throw std::runtime_error("JSON file not found: " + json_path.string());
3632
}
3733

38-
// Open and parse the JSON file
3934
std::ifstream json_file(json_path);
4035
if (!json_file.is_open())
4136
{
@@ -311,7 +306,7 @@ namespace integration_tools
311306
std::vector<sparrow::record_batch> stream_batches;
312307
try
313308
{
314-
stream_batches = sparrow_ipc::deserialize_file(stream_data);
309+
stream_batches = sparrow_ipc::deserialize_stream(stream_data);
315310
}
316311
catch (const std::exception& e)
317312
{

integration_tests/test_integration_tools.cpp

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "doctest/doctest.h"
1212
#include "integration_tools.hpp"
1313
#include "sparrow_ipc/deserialize.hpp"
14+
#include "sparrow_ipc/stream_file_serializer.hpp"
1415

1516
TEST_SUITE("Integration Tools Tests")
1617
{
@@ -41,6 +42,49 @@ TEST_SUITE("Integration Tools Tests")
4142
);
4243
}
4344

45+
TEST_CASE("json_file_to_arrow_file - Convert JSON to Arrow file format")
46+
{
47+
// Test with generated_primitive.json
48+
const std::filesystem::path json_file = tests_resources_files_path / "generated_primitive.json";
49+
50+
if (!std::filesystem::exists(json_file))
51+
{
52+
MESSAGE("Skipping test: test file not found at " << json_file);
53+
return;
54+
}
55+
56+
// Convert JSON to Arrow file format
57+
std::vector<uint8_t> arrow_file_data;
58+
CHECK_NOTHROW(arrow_file_data = integration_tools::json_file_to_arrow_file(json_file));
59+
CHECK_GT(arrow_file_data.size(), 0);
60+
61+
// Verify the output is a valid Arrow file by deserializing it
62+
auto batches = sparrow_ipc::deserialize_file(std::span<const uint8_t>(arrow_file_data));
63+
REQUIRE_EQ(batches.size(), 2); // generated_primitive.json has 2 batches
64+
65+
// Verify first batch has 17 rows (from JSON)
66+
CHECK_EQ(batches[0].nb_rows(), 17);
67+
// Verify second batch has 20 rows (from JSON)
68+
CHECK_EQ(batches[1].nb_rows(), 20);
69+
70+
// Verify all 22 columns are present (from the JSON schema)
71+
CHECK_EQ(batches[0].nb_columns(), 22);
72+
CHECK_EQ(batches[1].nb_columns(), 22);
73+
74+
// Validate the Arrow file content matches the original JSON
75+
std::ifstream json_input(json_file);
76+
REQUIRE(json_input.is_open());
77+
nlohmann::json json_data = nlohmann::json::parse(json_input);
78+
json_input.close();
79+
80+
// Build record batches from JSON and compare with deserialized ones
81+
for (size_t i = 0; i < batches.size(); ++i)
82+
{
83+
auto expected_batch = sparrow::json_reader::build_record_batch_from_json(json_data, i);
84+
CHECK(integration_tools::compare_record_batch(expected_batch, batches[i], i, false));
85+
}
86+
}
87+
4488
TEST_CASE("json_file_to_stream - Convert JSON to stream")
4589
{
4690
// Test with a known good JSON file
@@ -87,8 +131,8 @@ TEST_SUITE("Integration Tools Tests")
87131
CHECK_NOTHROW(output_data = integration_tools::stream_to_file(std::span<const uint8_t>(input_data)));
88132
CHECK_GT(output_data.size(), 0);
89133

90-
// Verify the output is valid
91-
auto batches = sparrow_ipc::deserialize_stream(std::span<const uint8_t>(output_data));
134+
// Verify the output is valid Arrow file format
135+
auto batches = sparrow_ipc::deserialize_file(std::span<const uint8_t>(output_data));
92136
CHECK_GT(batches.size(), 0);
93137
}
94138

@@ -112,7 +156,7 @@ TEST_SUITE("Integration Tools Tests")
112156

113157
// Step 3: Compare the results - both should deserialize to same data
114158
auto stream_batches = sparrow_ipc::deserialize_stream(std::span<const uint8_t>(stream_data));
115-
auto file_batches = sparrow_ipc::deserialize_stream(std::span<const uint8_t>(file_data));
159+
auto file_batches = sparrow_ipc::deserialize_file(std::span<const uint8_t>(file_data));
116160

117161
REQUIRE_EQ(stream_batches.size(), file_batches.size());
118162
for (size_t i = 0; i < stream_batches.size(); ++i)

0 commit comments

Comments
 (0)