Skip to content

Commit 92b25f7

Browse files
committed
Add read of ipc file in example
1 parent bb8496e commit 92b25f7

File tree

5 files changed

+63
-43
lines changed

5 files changed

+63
-43
lines changed

cmake/external_dependencies.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ endif()
5252
find_package_or_fetch(
5353
PACKAGE_NAME sparrow
5454
GIT_REPOSITORY https://github.com/man-group/sparrow.git
55-
TAG 1.1.2
55+
TAG 1.2.0
5656
)
5757
unset(CREATE_JSON_READER_TARGET)
5858

@@ -111,7 +111,7 @@ if(SPARROW_IPC_BUILD_TESTS)
111111

112112
# Iterate over all the files in the arrow-testing-data source directiory. When it's a gz, extract in place.
113113
file(GLOB_RECURSE arrow_testing_data_targz_files CONFIGURE_DEPENDS
114-
"${arrow-testing_SOURCE_DIR}/data/arrow-ipc-stream/integration/1.0.0-littleendian/*.json.gz"
114+
"${arrow-testing_SOURCE_DIR}/data/arrow-ipc-stream/integration/cpp-21.0.0/*.json.gz"
115115
)
116116
foreach(file_path IN LISTS arrow_testing_data_targz_files)
117117
cmake_path(GET file_path PARENT_PATH parent_dir)

examples/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ target_link_libraries(write_and_read_streams
88
PRIVATE
99
sparrow-ipc
1010
sparrow::sparrow
11+
arrow-testing-data
1112
)
1213

1314
# Set C++ standard to match the main project

examples/write_and_read_streams.cpp

Lines changed: 58 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#include <algorithm>
22
#include <cstdlib>
3+
#include <filesystem>
4+
#include <fstream>
35
#include <iostream>
46
#include <random>
57
#include <vector>
@@ -10,6 +12,11 @@
1012

1113
#include <sparrow/record_batch.hpp>
1214

15+
const std::filesystem::path arrow_testing_data_dir = ARROW_TESTING_DATA_DIR;
16+
const std::filesystem::path tests_resources_files_path = arrow_testing_data_dir / "data" / "arrow-ipc-stream"
17+
/ "integration" / "cpp-21.0.0";
18+
19+
1320
namespace sp = sparrow;
1421

1522
// Random number generator
@@ -23,34 +30,26 @@ std::mt19937 gen(rd());
2330
sp::record_batch create_random_record_batch(size_t num_rows)
2431
{
2532
std::uniform_int_distribution<int32_t> int_dist(0, 1000);
26-
std::uniform_real_distribution<float> float_dist(-100.0f, 100.0f);
27-
std::uniform_int_distribution<int> bool_dist(0, 1);
33+
2834

2935
// Create integer column with random values
3036
std::vector<int32_t> int_values;
3137
int_values.reserve(num_rows);
32-
for (size_t i = 0; i < num_rows; ++i)
33-
{
34-
int_values.push_back(int_dist(gen));
35-
}
38+
std::generate_n(std::back_inserter(int_values), num_rows, [&]() { return int_dist(gen); });
3639
auto int_array = sp::primitive_array<int32_t>(std::move(int_values));
3740

3841
// Create float column with random values
42+
std::uniform_real_distribution<float> float_dist(-100.0f, 100.0f);
3943
std::vector<float> float_values;
4044
float_values.reserve(num_rows);
41-
for (size_t i = 0; i < num_rows; ++i)
42-
{
43-
float_values.push_back(float_dist(gen));
44-
}
45+
std::generate_n(std::back_inserter(float_values), num_rows, [&]() { return float_dist(gen); });
4546
auto float_array = sp::primitive_array<float>(std::move(float_values));
4647

4748
// Create boolean column with random values
49+
std::uniform_int_distribution<int> bool_dist(0, 1);
4850
std::vector<bool> bool_values;
4951
bool_values.reserve(num_rows);
50-
for (size_t i = 0; i < num_rows; ++i)
51-
{
52-
bool_values.push_back(static_cast<bool>(bool_dist(gen)));
53-
}
52+
std::generate_n(std::back_inserter(bool_values), num_rows, [&]() { return static_cast<bool>(bool_dist(gen)); });
5453
auto bool_array = sp::primitive_array<bool>(std::move(bool_values));
5554

5655
// Create string column with random values
@@ -59,11 +58,7 @@ sp::record_batch create_random_record_batch(size_t num_rows)
5958
const std::vector<std::string> sample_strings =
6059
{"alpha", "beta", "gamma", "delta", "epsilon", "zeta", "eta", "theta", "iota", "kappa"};
6160
std::uniform_int_distribution<size_t> str_dist(0, sample_strings.size() - 1);
62-
63-
for (size_t i = 0; i < num_rows; ++i)
64-
{
65-
string_values.push_back(sample_strings[str_dist(gen)] + "_" + std::to_string(i));
66-
}
61+
std::generate_n(std::back_inserter(string_values), num_rows, [&]() { return sample_strings[str_dist(gen)] + "_" + std::to_string(string_values.size()); });
6762
auto string_array = sp::string_array(std::move(string_values));
6863

6964
// Create record batch with named columns (same schema for all batches)
@@ -277,6 +272,50 @@ int main()
277272
std::cerr << " ✗ Schema inconsistency detected!\n";
278273
}
279274

275+
// Step 8: Read and display a primitive stream file from test resources
276+
std::cout << "\n8. Reading a primitive stream file from test resources...\n";
277+
278+
const std::filesystem::path primitive_stream_file = tests_resources_files_path / "generated_primitive.stream";
279+
280+
if (std::filesystem::exists(primitive_stream_file))
281+
{
282+
std::cout << " Reading file: " << primitive_stream_file << "\n";
283+
284+
// Read the stream file
285+
std::ifstream stream_file(primitive_stream_file, std::ios::in | std::ios::binary);
286+
if (!stream_file.is_open())
287+
{
288+
std::cerr << " ERROR: Could not open stream file!\n";
289+
}
290+
else
291+
{
292+
const std::vector<uint8_t> file_stream_data(
293+
(std::istreambuf_iterator<char>(stream_file)),
294+
(std::istreambuf_iterator<char>())
295+
);
296+
stream_file.close();
297+
298+
std::cout << " File size: " << file_stream_data.size() << " bytes\n";
299+
300+
// Deserialize the stream
301+
auto file_batches = sparrow_ipc::deserialize_stream(file_stream_data);
302+
303+
std::cout << " Deserialized " << file_batches.size() << " record batch(es) from file\n";
304+
305+
// Display the first batch
306+
if (!file_batches.empty())
307+
{
308+
std::cout << " First batch from file:\n";
309+
std::cout << std::format("{}\n", file_batches[0]);
310+
}
311+
}
312+
}
313+
else
314+
{
315+
std::cout << " Note: Test resource file not found at " << primitive_stream_file << "\n";
316+
std::cout << " This is expected if test data is not available.\n";
317+
}
318+
280319
std::cout << "\n=== Example completed successfully! ===\n";
281320
}
282321
catch (const std::exception& e)

include/sparrow_ipc/flatbuffer_utils.hpp

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -88,26 +88,6 @@ namespace sparrow_ipc
8888
::flatbuffers::Vector<::flatbuffers::Offset<org::apache::arrow::flatbuf::Field>>>
8989
create_children(flatbuffers::FlatBufferBuilder& builder, const sparrow::record_batch& record_batch);
9090

91-
/**
92-
* @brief Creates a FlatBuffers vector of Field objects from an ArrowSchema's children.
93-
*
94-
* This function iterates through all children of the given ArrowSchema and converts
95-
* each child to a FlatBuffers Field object. The resulting fields are collected into
96-
* a FlatBuffers vector.
97-
*
98-
* @param builder Reference to the FlatBufferBuilder used for creating FlatBuffers objects
99-
* @param arrow_schema The ArrowSchema containing the children to convert
100-
*
101-
* @return A FlatBuffers offset to a vector of Field objects, or 0 if no children exist
102-
*
103-
* @throws std::invalid_argument If any child pointer in the ArrowSchema is null
104-
*
105-
* @note The function reserves space for all children upfront for performance optimization
106-
* @note Returns 0 (null offset) when the schema has no children, otherwise returns a valid vector offset
107-
*/
108-
[[nodiscard]] ::flatbuffers::Offset<
109-
::flatbuffers::Vector<::flatbuffers::Offset<org::apache::arrow::flatbuf::Field>>>
110-
create_children(flatbuffers::FlatBufferBuilder& builder, const sparrow::record_batch& record_batch);
11191

11292
/**
11393
* @brief Creates a FlatBuffers vector of Field objects from an ArrowSchema's children.

tests/test_de_serialization_with_files.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@
2020
const std::filesystem::path arrow_testing_data_dir = ARROW_TESTING_DATA_DIR;
2121

2222
const std::filesystem::path tests_resources_files_path = arrow_testing_data_dir / "data" / "arrow-ipc-stream"
23-
/ "integration" / "1.0.0-littleendian";
23+
/ "integration" / "cpp-21.0.0";
2424

2525
const std::vector<std::filesystem::path> files_paths_to_test = {
2626
tests_resources_files_path / "generated_primitive",
27-
tests_resources_files_path / "generated_primitive_large_offsets",
27+
// tests_resources_files_path / "generated_primitive_large_offsets",
2828
tests_resources_files_path / "generated_primitive_zerolength",
2929
// tests_resources_files_path / "generated_primitive_no_batches"
3030
};

0 commit comments

Comments
 (0)