|
26 | 26 | #include <avro/CustomAttributes.hh> |
27 | 27 | #include <avro/LogicalType.hh> |
28 | 28 | #include <avro/NodeImpl.hh> |
| 29 | +#include <avro/Schema.hh> |
29 | 30 | #include <avro/Types.hh> |
| 31 | +#include <avro/ValidSchema.hh> |
30 | 32 |
|
31 | 33 | #include "iceberg/avro/avro_schema_util_internal.h" |
32 | 34 | #include "iceberg/util/macros.h" |
@@ -263,4 +265,116 @@ Status ToAvroNodeVisitor::Visit(const SchemaField& field, ::avro::NodePtr* node) |
263 | 265 | return {}; |
264 | 266 | } |
265 | 267 |
|
| 268 | +namespace { |
| 269 | + |
| 270 | +bool HasId(const ::avro::NodePtr& parent_node, size_t field_idx, |
| 271 | + const std::string& attr_name) { |
| 272 | + if (field_idx >= parent_node->customAttributes()) { |
| 273 | + return false; |
| 274 | + } |
| 275 | + return parent_node->customAttributesAt(field_idx).getAttribute(attr_name).has_value(); |
| 276 | +} |
| 277 | + |
| 278 | +} // namespace |
| 279 | + |
| 280 | +Status HasIdVisitor::Visit(const ::avro::NodePtr& node) { |
| 281 | + if (!node) [[unlikely]] { |
| 282 | + return InvalidSchema("Avro node is null"); |
| 283 | + } |
| 284 | + |
| 285 | + switch (node->type()) { |
| 286 | + case ::avro::AVRO_RECORD: |
| 287 | + return VisitRecord(node); |
| 288 | + case ::avro::AVRO_ARRAY: |
| 289 | + return VisitArray(node); |
| 290 | + case ::avro::AVRO_MAP: |
| 291 | + return VisitMap(node); |
| 292 | + case ::avro::AVRO_UNION: |
| 293 | + return VisitUnion(node); |
| 294 | + case ::avro::AVRO_BOOL: |
| 295 | + case ::avro::AVRO_INT: |
| 296 | + case ::avro::AVRO_LONG: |
| 297 | + case ::avro::AVRO_FLOAT: |
| 298 | + case ::avro::AVRO_DOUBLE: |
| 299 | + case ::avro::AVRO_STRING: |
| 300 | + case ::avro::AVRO_BYTES: |
| 301 | + case ::avro::AVRO_FIXED: |
| 302 | + return {}; |
| 303 | + case ::avro::AVRO_NULL: |
| 304 | + case ::avro::AVRO_ENUM: |
| 305 | + default: |
| 306 | + return InvalidSchema("Unsupported Avro type: {}", static_cast<int>(node->type())); |
| 307 | + } |
| 308 | +} |
| 309 | + |
| 310 | +Status HasIdVisitor::VisitRecord(const ::avro::NodePtr& node) { |
| 311 | + static const std::string kFieldIdKey{kFieldIdProp}; |
| 312 | + total_fields_ += node->leaves(); |
| 313 | + for (size_t i = 0; i < node->leaves(); ++i) { |
| 314 | + if (HasId(node, i, kFieldIdKey)) { |
| 315 | + fields_with_id_++; |
| 316 | + } |
| 317 | + ICEBERG_RETURN_UNEXPECTED(Visit(node->leafAt(i))); |
| 318 | + } |
| 319 | + return {}; |
| 320 | +} |
| 321 | + |
| 322 | +Status HasIdVisitor::VisitArray(const ::avro::NodePtr& node) { |
| 323 | + if (node->leaves() != 1) [[unlikely]] { |
| 324 | + return InvalidSchema("Array type must have exactly one leaf"); |
| 325 | + } |
| 326 | + |
| 327 | + if (node->logicalType().type() == ::avro::LogicalType::CUSTOM && |
| 328 | + node->logicalType().customLogicalType() != nullptr && |
| 329 | + node->logicalType().customLogicalType()->name() == "map") { |
| 330 | + return Visit(node->leafAt(0)); |
| 331 | + } |
| 332 | + |
| 333 | + total_fields_++; |
| 334 | + if (HasId(node, /*field_idx=*/0, std::string(kElementIdProp))) { |
| 335 | + fields_with_id_++; |
| 336 | + } |
| 337 | + |
| 338 | + return Visit(node->leafAt(0)); |
| 339 | +} |
| 340 | + |
| 341 | +Status HasIdVisitor::VisitMap(const ::avro::NodePtr& node) { |
| 342 | + if (node->leaves() != 2) [[unlikely]] { |
| 343 | + return InvalidSchema("Map type must have exactly two leaves"); |
| 344 | + } |
| 345 | + |
| 346 | + total_fields_ += 2; |
| 347 | + if (HasId(node, /*field_idx=*/0, std::string(kKeyIdProp))) { |
| 348 | + fields_with_id_++; |
| 349 | + } |
| 350 | + if (HasId(node, /*field_idx=*/0, std::string(kValueIdProp))) { |
| 351 | + fields_with_id_++; |
| 352 | + } |
| 353 | + |
| 354 | + return Visit(node->leafAt(1)); |
| 355 | +} |
| 356 | + |
| 357 | +Status HasIdVisitor::VisitUnion(const ::avro::NodePtr& node) { |
| 358 | + if (node->leaves() != 2) [[unlikely]] { |
| 359 | + return InvalidSchema("Union type must have exactly two branches"); |
| 360 | + } |
| 361 | + |
| 362 | + const auto& branch_0 = node->leafAt(0); |
| 363 | + const auto& branch_1 = node->leafAt(1); |
| 364 | + if (branch_0->type() == ::avro::AVRO_NULL) { |
| 365 | + return Visit(branch_1); |
| 366 | + } |
| 367 | + if (branch_1->type() == ::avro::AVRO_NULL) { |
| 368 | + return Visit(branch_0); |
| 369 | + } |
| 370 | + |
| 371 | + return InvalidSchema("Union type must have exactly one null branch"); |
| 372 | +} |
| 373 | + |
| 374 | +Status HasIdVisitor::Visit(const ::avro::ValidSchema& schema) { |
| 375 | + return Visit(schema.root()); |
| 376 | +} |
| 377 | + |
| 378 | +Status HasIdVisitor::Visit(const ::avro::Schema& schema) { return Visit(schema.root()); } |
| 379 | + |
266 | 380 | } // namespace iceberg::avro |
0 commit comments