Skip to content

Commit bb8496e

Browse files
committed
wip
1 parent 1cde033 commit bb8496e

File tree

1 file changed

+71
-59
lines changed

1 file changed

+71
-59
lines changed

examples/write_and_read_streams.cpp

Lines changed: 71 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include <algorithm>
2+
#include <cstdlib>
23
#include <iostream>
34
#include <random>
45
#include <vector>
@@ -74,6 +75,73 @@ sp::record_batch create_random_record_batch(size_t num_rows)
7475
);
7576
}
7677

78+
/**
79+
* Verify that two sets of record batches are identical
80+
* Returns true if all batches match, false otherwise
81+
*/
82+
bool verify_batches_match(
83+
const std::vector<sp::record_batch>& original_batches,
84+
const std::vector<sp::record_batch>& deserialized_batches
85+
)
86+
{
87+
if (original_batches.size() != deserialized_batches.size())
88+
{
89+
std::cerr << "ERROR: Batch count mismatch! Original: " << original_batches.size()
90+
<< ", Deserialized: " << deserialized_batches.size() << "\n";
91+
return false;
92+
}
93+
94+
bool all_match = true;
95+
for (size_t batch_idx = 0; batch_idx < original_batches.size(); ++batch_idx)
96+
{
97+
const auto& original = original_batches[batch_idx];
98+
const auto& deserialized = deserialized_batches[batch_idx];
99+
100+
// Check basic structure
101+
if (original.nb_columns() != deserialized.nb_columns() || original.nb_rows() != deserialized.nb_rows())
102+
{
103+
std::cerr << "ERROR: Batch " << batch_idx << " structure mismatch!\n";
104+
all_match = false;
105+
continue;
106+
}
107+
108+
// Check column names
109+
if (!std::ranges::equal(original.names(), deserialized.names()))
110+
{
111+
std::cerr << "WARNING: Batch " << batch_idx << " column names mismatch!\n";
112+
}
113+
114+
// Check column data
115+
for (size_t col_idx = 0; col_idx < original.nb_columns(); ++col_idx)
116+
{
117+
const auto& orig_col = original.get_column(col_idx);
118+
const auto& deser_col = deserialized.get_column(col_idx);
119+
120+
if (orig_col.data_type() != deser_col.data_type())
121+
{
122+
std::cerr << "ERROR: Batch " << batch_idx << ", column " << col_idx << " type mismatch!\n";
123+
all_match = false;
124+
continue;
125+
}
126+
127+
// Check values
128+
for (size_t row_idx = 0; row_idx < orig_col.size(); ++row_idx)
129+
{
130+
if (orig_col[row_idx] != deser_col[row_idx])
131+
{
132+
std::cerr << "ERROR: Batch " << batch_idx << ", column " << col_idx << ", row " << row_idx
133+
<< " value mismatch!\n";
134+
std::cerr << " Original: " << orig_col[row_idx]
135+
<< ", Deserialized: " << deser_col[row_idx] << "\n";
136+
all_match = false;
137+
}
138+
}
139+
}
140+
}
141+
142+
return all_match;
143+
}
144+
77145
int main()
78146
{
79147
std::cout << "=== Sparrow IPC Stream Write and Read Example ===\n";
@@ -125,63 +193,7 @@ int main()
125193
// Step 4: Verify that original and deserialized data match
126194
std::cout << "\n4. Verifying data integrity...\n";
127195

128-
if (original_batches.size() != deserialized_batches.size())
129-
{
130-
std::cerr << "ERROR: Batch count mismatch! Original: " << original_batches.size()
131-
<< ", Deserialized: " << deserialized_batches.size() << "\n";
132-
return 1;
133-
}
134-
135-
bool all_match = true;
136-
for (size_t batch_idx = 0; batch_idx < original_batches.size(); ++batch_idx)
137-
{
138-
const auto& original = original_batches[batch_idx];
139-
const auto& deserialized = deserialized_batches[batch_idx];
140-
141-
// Check basic structure
142-
if (original.nb_columns() != deserialized.nb_columns()
143-
|| original.nb_rows() != deserialized.nb_rows())
144-
{
145-
std::cerr << "ERROR: Batch " << batch_idx << " structure mismatch!\n";
146-
all_match = false;
147-
continue;
148-
}
149-
150-
// Check column names
151-
if (!std::ranges::equal(original.names(), deserialized.names()))
152-
{
153-
std::cerr << "WARNING: Batch " << batch_idx << " column names mismatch!\n";
154-
}
155-
156-
// Check column data
157-
for (size_t col_idx = 0; col_idx < original.nb_columns(); ++col_idx)
158-
{
159-
const auto& orig_col = original.get_column(col_idx);
160-
const auto& deser_col = deserialized.get_column(col_idx);
161-
162-
if (orig_col.data_type() != deser_col.data_type())
163-
{
164-
std::cerr << "ERROR: Batch " << batch_idx << ", column " << col_idx << " type mismatch!\n";
165-
all_match = false;
166-
continue;
167-
}
168-
169-
// Check values
170-
for (size_t row_idx = 0; row_idx < orig_col.size(); ++row_idx)
171-
{
172-
if (orig_col[row_idx] != deser_col[row_idx])
173-
{
174-
std::cerr << "ERROR: Batch " << batch_idx << ", column " << col_idx << ", row "
175-
<< row_idx << " value mismatch!\n";
176-
std::cerr << " Original: " << orig_col[row_idx]
177-
<< ", Deserialized: " << deser_col[row_idx] << "\n";
178-
all_match = false;
179-
}
180-
}
181-
}
182-
}
183-
184-
if (all_match)
196+
if (verify_batches_match(original_batches, deserialized_batches))
185197
{
186198
std::cout << " ✓ All data matches perfectly!\n";
187199
}
@@ -270,8 +282,8 @@ int main()
270282
catch (const std::exception& e)
271283
{
272284
std::cerr << "Error: " << e.what() << "\n";
273-
return 1;
285+
return EXIT_FAILURE;
274286
}
275287

276-
return 0;
288+
return EXIT_SUCCESS;
277289
}

0 commit comments

Comments
 (0)