|
1 | 1 | #include <algorithm> |
| 2 | +#include <cstdlib> |
2 | 3 | #include <iostream> |
3 | 4 | #include <random> |
4 | 5 | #include <vector> |
@@ -74,6 +75,73 @@ sp::record_batch create_random_record_batch(size_t num_rows) |
74 | 75 | ); |
75 | 76 | } |
76 | 77 |
|
| 78 | +/** |
| 79 | + * Verify that two sets of record batches are identical |
| 80 | + * Returns true if all batches match, false otherwise |
| 81 | + */ |
| 82 | +bool verify_batches_match( |
| 83 | + const std::vector<sp::record_batch>& original_batches, |
| 84 | + const std::vector<sp::record_batch>& deserialized_batches |
| 85 | +) |
| 86 | +{ |
| 87 | + if (original_batches.size() != deserialized_batches.size()) |
| 88 | + { |
| 89 | + std::cerr << "ERROR: Batch count mismatch! Original: " << original_batches.size() |
| 90 | + << ", Deserialized: " << deserialized_batches.size() << "\n"; |
| 91 | + return false; |
| 92 | + } |
| 93 | + |
| 94 | + bool all_match = true; |
| 95 | + for (size_t batch_idx = 0; batch_idx < original_batches.size(); ++batch_idx) |
| 96 | + { |
| 97 | + const auto& original = original_batches[batch_idx]; |
| 98 | + const auto& deserialized = deserialized_batches[batch_idx]; |
| 99 | + |
| 100 | + // Check basic structure |
| 101 | + if (original.nb_columns() != deserialized.nb_columns() || original.nb_rows() != deserialized.nb_rows()) |
| 102 | + { |
| 103 | + std::cerr << "ERROR: Batch " << batch_idx << " structure mismatch!\n"; |
| 104 | + all_match = false; |
| 105 | + continue; |
| 106 | + } |
| 107 | + |
| 108 | + // Check column names |
| 109 | + if (!std::ranges::equal(original.names(), deserialized.names())) |
| 110 | + { |
| 111 | + std::cerr << "WARNING: Batch " << batch_idx << " column names mismatch!\n"; |
| 112 | + } |
| 113 | + |
| 114 | + // Check column data |
| 115 | + for (size_t col_idx = 0; col_idx < original.nb_columns(); ++col_idx) |
| 116 | + { |
| 117 | + const auto& orig_col = original.get_column(col_idx); |
| 118 | + const auto& deser_col = deserialized.get_column(col_idx); |
| 119 | + |
| 120 | + if (orig_col.data_type() != deser_col.data_type()) |
| 121 | + { |
| 122 | + std::cerr << "ERROR: Batch " << batch_idx << ", column " << col_idx << " type mismatch!\n"; |
| 123 | + all_match = false; |
| 124 | + continue; |
| 125 | + } |
| 126 | + |
| 127 | + // Check values |
| 128 | + for (size_t row_idx = 0; row_idx < orig_col.size(); ++row_idx) |
| 129 | + { |
| 130 | + if (orig_col[row_idx] != deser_col[row_idx]) |
| 131 | + { |
| 132 | + std::cerr << "ERROR: Batch " << batch_idx << ", column " << col_idx << ", row " << row_idx |
| 133 | + << " value mismatch!\n"; |
| 134 | + std::cerr << " Original: " << orig_col[row_idx] |
| 135 | + << ", Deserialized: " << deser_col[row_idx] << "\n"; |
| 136 | + all_match = false; |
| 137 | + } |
| 138 | + } |
| 139 | + } |
| 140 | + } |
| 141 | + |
| 142 | + return all_match; |
| 143 | +} |
| 144 | + |
77 | 145 | int main() |
78 | 146 | { |
79 | 147 | std::cout << "=== Sparrow IPC Stream Write and Read Example ===\n"; |
@@ -125,63 +193,7 @@ int main() |
125 | 193 | // Step 4: Verify that original and deserialized data match |
126 | 194 | std::cout << "\n4. Verifying data integrity...\n"; |
127 | 195 |
|
128 | | - if (original_batches.size() != deserialized_batches.size()) |
129 | | - { |
130 | | - std::cerr << "ERROR: Batch count mismatch! Original: " << original_batches.size() |
131 | | - << ", Deserialized: " << deserialized_batches.size() << "\n"; |
132 | | - return 1; |
133 | | - } |
134 | | - |
135 | | - bool all_match = true; |
136 | | - for (size_t batch_idx = 0; batch_idx < original_batches.size(); ++batch_idx) |
137 | | - { |
138 | | - const auto& original = original_batches[batch_idx]; |
139 | | - const auto& deserialized = deserialized_batches[batch_idx]; |
140 | | - |
141 | | - // Check basic structure |
142 | | - if (original.nb_columns() != deserialized.nb_columns() |
143 | | - || original.nb_rows() != deserialized.nb_rows()) |
144 | | - { |
145 | | - std::cerr << "ERROR: Batch " << batch_idx << " structure mismatch!\n"; |
146 | | - all_match = false; |
147 | | - continue; |
148 | | - } |
149 | | - |
150 | | - // Check column names |
151 | | - if (!std::ranges::equal(original.names(), deserialized.names())) |
152 | | - { |
153 | | - std::cerr << "WARNING: Batch " << batch_idx << " column names mismatch!\n"; |
154 | | - } |
155 | | - |
156 | | - // Check column data |
157 | | - for (size_t col_idx = 0; col_idx < original.nb_columns(); ++col_idx) |
158 | | - { |
159 | | - const auto& orig_col = original.get_column(col_idx); |
160 | | - const auto& deser_col = deserialized.get_column(col_idx); |
161 | | - |
162 | | - if (orig_col.data_type() != deser_col.data_type()) |
163 | | - { |
164 | | - std::cerr << "ERROR: Batch " << batch_idx << ", column " << col_idx << " type mismatch!\n"; |
165 | | - all_match = false; |
166 | | - continue; |
167 | | - } |
168 | | - |
169 | | - // Check values |
170 | | - for (size_t row_idx = 0; row_idx < orig_col.size(); ++row_idx) |
171 | | - { |
172 | | - if (orig_col[row_idx] != deser_col[row_idx]) |
173 | | - { |
174 | | - std::cerr << "ERROR: Batch " << batch_idx << ", column " << col_idx << ", row " |
175 | | - << row_idx << " value mismatch!\n"; |
176 | | - std::cerr << " Original: " << orig_col[row_idx] |
177 | | - << ", Deserialized: " << deser_col[row_idx] << "\n"; |
178 | | - all_match = false; |
179 | | - } |
180 | | - } |
181 | | - } |
182 | | - } |
183 | | - |
184 | | - if (all_match) |
| 196 | + if (verify_batches_match(original_batches, deserialized_batches)) |
185 | 197 | { |
186 | 198 | std::cout << " ✓ All data matches perfectly!\n"; |
187 | 199 | } |
@@ -270,8 +282,8 @@ int main() |
270 | 282 | catch (const std::exception& e) |
271 | 283 | { |
272 | 284 | std::cerr << "Error: " << e.what() << "\n"; |
273 | | - return 1; |
| 285 | + return EXIT_FAILURE; |
274 | 286 | } |
275 | 287 |
|
276 | | - return 0; |
| 288 | + return EXIT_SUCCESS; |
277 | 289 | } |
0 commit comments