Skip to content

Commit c9ff700

Browse files
update code
1 parent 94cee21 commit c9ff700

File tree

2 files changed

+66
-169
lines changed

2 files changed

+66
-169
lines changed

src/iceberg/avro/avro_schema_util.cc

Lines changed: 60 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -803,40 +803,37 @@ Result<::avro::NodePtr> CreateRecordNodeWithFieldIds(const ::avro::NodePtr& orig
803803
}
804804
}
805805

806-
if (nested_field) {
807-
// Check if field_id is present
808-
if (!nested_field->field_id.has_value()) {
809-
return InvalidSchema("Field ID is missing for field '{}' in nested mapping",
810-
field_name);
811-
}
806+
if (!nested_field) {
807+
return InvalidSchema("Field '{}' not found in nested mapping", field_name);
808+
}
812809

813-
// Preserve existing custom attributes for this field
814-
::avro::CustomAttributes attributes;
815-
if (i < original_node->customAttributes()) {
816-
// Copy all existing attributes from the original node
817-
const auto& original_attrs = original_node->customAttributesAt(i);
818-
const auto& existing_attrs = original_attrs.attributes();
819-
for (const auto& attr_pair : existing_attrs) {
820-
// Copy each existing attribute to preserve original metadata
821-
attributes.addAttribute(attr_pair.first, attr_pair.second, false);
822-
}
810+
if (!nested_field->field_id.has_value()) {
811+
return InvalidSchema("Field ID is missing for field '{}' in nested mapping",
812+
field_name);
813+
}
814+
815+
// Preserve existing custom attributes for this field
816+
::avro::CustomAttributes attributes;
817+
if (i < original_node->customAttributes()) {
818+
// Copy all existing attributes from the original node
819+
for (const auto& attr_pair : original_node->customAttributesAt(i).attributes()) {
820+
// Copy each existing attribute to preserve original metadata
821+
attributes.addAttribute(attr_pair.first, attr_pair.second, /*addQuote=*/false);
823822
}
823+
}
824824

825-
// Add field ID attribute to the new node (preserving existing attributes)
826-
attributes.addAttribute(std::string(kFieldIdProp),
827-
std::to_string(nested_field->field_id.value()), false);
825+
// Add field ID attribute to the new node (preserving existing attributes)
826+
attributes.addAttribute(std::string(kFieldIdProp),
827+
std::to_string(nested_field->field_id.value()),
828+
/*addQuote=*/false);
828829

829-
new_record_node->addCustomAttributesForField(attributes);
830+
new_record_node->addCustomAttributesForField(attributes);
830831

831-
// Recursively apply field IDs to nested fields
832-
ICEBERG_ASSIGN_OR_RAISE(auto new_nested_node,
833-
MakeAvroNodeWithFieldIds(field_node, *nested_field));
834-
new_record_node->addName(field_name);
835-
new_record_node->addLeaf(new_nested_node);
836-
} else {
837-
// If no nested field found, this is an error
838-
return InvalidSchema("Field '{}' not found in nested mapping", field_name);
839-
}
832+
// Recursively apply field IDs to nested fields
833+
ICEBERG_ASSIGN_OR_RAISE(auto new_nested_node,
834+
MakeAvroNodeWithFieldIds(field_node, *nested_field));
835+
new_record_node->addName(field_name);
836+
new_record_node->addLeaf(new_nested_node);
840837
}
841838

842839
return new_record_node;
@@ -858,39 +855,29 @@ Result<::avro::NodePtr> CreateArrayNodeWithFieldIds(const ::avro::NodePtr& origi
858855
return new_array_node;
859856
}
860857

861-
// For regular arrays, try to find element field ID from nested mapping
862-
const MappedField* element_field = nullptr;
863-
if (field.nested_mapping) {
864-
auto fields_span = field.nested_mapping->fields();
865-
for (const auto& f : fields_span) {
866-
if (f.names.find(std::string(kElement)) != f.names.end()) {
867-
element_field = &f;
868-
break;
869-
}
870-
}
858+
// For regular arrays, use the first field from nested mapping as element field
859+
if (!field.nested_mapping || field.nested_mapping->fields().empty()) {
860+
return InvalidSchema("Array type requires nested mapping with element field");
871861
}
872862

873-
if (element_field) {
874-
// Check if field_id is present
875-
if (!element_field->field_id.has_value()) {
876-
return InvalidSchema("Field ID is missing for element field in array");
877-
}
863+
const auto& element_field = field.nested_mapping->fields()[0];
878864

879-
ICEBERG_ASSIGN_OR_RAISE(
880-
auto new_element_node,
881-
MakeAvroNodeWithFieldIds(original_node->leafAt(0), *element_field));
882-
new_array_node->addLeaf(new_element_node);
883-
884-
// Add element field ID as custom attribute
885-
::avro::CustomAttributes element_attributes;
886-
element_attributes.addAttribute(std::string(kFieldIdProp),
887-
std::to_string(*element_field->field_id), false);
888-
new_array_node->addCustomAttributesForField(element_attributes);
889-
} else {
890-
// If no element field found, this is an error
891-
return InvalidSchema("Element field not found in nested mapping for array");
865+
if (!element_field.field_id.has_value()) {
866+
return InvalidSchema("Field ID is missing for element field in array");
892867
}
893868

869+
ICEBERG_ASSIGN_OR_RAISE(
870+
auto new_element_node,
871+
MakeAvroNodeWithFieldIds(original_node->leafAt(0), element_field));
872+
new_array_node->addLeaf(new_element_node);
873+
874+
// Add element field ID as custom attribute
875+
::avro::CustomAttributes element_attributes;
876+
element_attributes.addAttribute(std::string(kElementIdProp),
877+
std::to_string(*element_field.field_id),
878+
/*addQuote=*/false);
879+
new_array_node->addCustomAttributesForField(element_attributes);
880+
894881
return new_array_node;
895882
}
896883

@@ -908,45 +895,25 @@ Result<::avro::NodePtr> CreateMapNodeWithFieldIds(const ::avro::NodePtr& origina
908895
return InvalidSchema("Map type requires nested mapping for key and value fields");
909896
}
910897

911-
// Find key and value field mappings by name
912-
std::optional<int32_t> key_id = field.nested_mapping->Id("key");
913-
std::optional<int32_t> value_id = field.nested_mapping->Id("value");
914-
915-
if (!key_id || !value_id) {
916-
return InvalidSchema("Map type requires both 'key' and 'value' field mappings");
917-
}
918-
919-
std::optional<MappedFieldConstRef> key_field_ref = field.nested_mapping->Field(*key_id);
920-
std::optional<MappedFieldConstRef> value_field_ref =
921-
field.nested_mapping->Field(*value_id);
922-
923-
if (!key_field_ref || !value_field_ref) {
924-
return InvalidSchema("Map type requires both key and value field mappings");
898+
// For map types, use the first two fields from nested mapping as key and value
899+
if (!field.nested_mapping || field.nested_mapping->fields().size() < 2) {
900+
return InvalidSchema("Map type requires nested mapping with key and value fields");
925901
}
926902

927-
const auto& key_mapped_field = key_field_ref->get();
928-
const auto& value_mapped_field = value_field_ref->get();
903+
const auto& key_mapped_field = field.nested_mapping->fields()[0];
904+
const auto& value_mapped_field = field.nested_mapping->fields()[1];
929905

930906
if (!key_mapped_field.field_id || !value_mapped_field.field_id) {
931907
return InvalidSchema("Map key and value fields must have field IDs");
932908
}
933909

934-
// Create key field with mapped field ID
935-
MappedField key_field;
936-
key_field.field_id = *key_mapped_field.field_id;
937-
key_field.nested_mapping = key_mapped_field.nested_mapping;
938-
939-
// Create value field with mapped field ID
940-
MappedField value_field;
941-
value_field.field_id = *value_mapped_field.field_id;
942-
value_field.nested_mapping = value_mapped_field.nested_mapping;
943-
944910
// Add key and value nodes
945-
ICEBERG_ASSIGN_OR_RAISE(auto new_key_node,
946-
MakeAvroNodeWithFieldIds(original_node->leafAt(0), key_field));
911+
ICEBERG_ASSIGN_OR_RAISE(
912+
auto new_key_node,
913+
MakeAvroNodeWithFieldIds(original_node->leafAt(0), key_mapped_field));
947914
ICEBERG_ASSIGN_OR_RAISE(
948915
auto new_value_node,
949-
MakeAvroNodeWithFieldIds(original_node->leafAt(1), value_field));
916+
MakeAvroNodeWithFieldIds(original_node->leafAt(1), value_mapped_field));
950917
new_map_node->addLeaf(new_key_node);
951918
new_map_node->addLeaf(new_value_node);
952919

@@ -958,19 +925,21 @@ Result<::avro::NodePtr> CreateMapNodeWithFieldIds(const ::avro::NodePtr& origina
958925
for (const auto& attr_pair : existing_attrs) {
959926
// Copy each existing attribute to preserve original metadata
960927
::avro::CustomAttributes attributes;
961-
attributes.addAttribute(attr_pair.first, attr_pair.second, false);
928+
attributes.addAttribute(attr_pair.first, attr_pair.second, /*addQuote=*/false);
962929
new_map_node->addCustomAttributesForField(attributes);
963930
}
964931
}
965932

966933
::avro::CustomAttributes key_attributes;
967-
key_attributes.addAttribute(std::string(kFieldIdProp),
968-
std::to_string(*key_mapped_field.field_id), false);
934+
key_attributes.addAttribute(std::string(kKeyIdProp),
935+
std::to_string(*key_mapped_field.field_id),
936+
/*addQuote=*/false);
969937
new_map_node->addCustomAttributesForField(key_attributes);
970938

971939
::avro::CustomAttributes value_attributes;
972-
value_attributes.addAttribute(std::string(kFieldIdProp),
973-
std::to_string(*value_mapped_field.field_id), false);
940+
value_attributes.addAttribute(std::string(kValueIdProp),
941+
std::to_string(*value_mapped_field.field_id),
942+
/*addQuote=*/false);
974943
new_map_node->addCustomAttributesForField(value_attributes);
975944

976945
return new_map_node;

test/avro_schema_test.cc

Lines changed: 6 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
#include <gtest/gtest.h>
2626

2727
#include "iceberg/avro/avro_schema_util_internal.h"
28-
#include "iceberg/json_internal.h"
2928
#include "iceberg/metadata_columns.h"
3029
#include "iceberg/name_mapping.h"
3130
#include "iceberg/schema.h"
@@ -1274,7 +1273,8 @@ TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToArray) {
12741273
// Check that array element has field ID applied
12751274
const auto& element_node = array_node->leafAt(0);
12761275
EXPECT_EQ(element_node->type(), ::avro::AVRO_STRING);
1277-
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(array_node, /*index=*/0, /*field_id=*/20));
1276+
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(array_node, /*index=*/0, /*field_id=*/20,
1277+
/*key=*/"element-id"));
12781278
}
12791279

12801280
TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToMap) {
@@ -1314,8 +1314,10 @@ TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToMap) {
13141314
EXPECT_EQ(map_node->leafAt(0)->type(), ::avro::AVRO_STRING);
13151315
EXPECT_EQ(map_node->leafAt(1)->type(), ::avro::AVRO_STRING);
13161316
ASSERT_EQ(map_node->customAttributes(), 2);
1317-
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(map_node, /*index=*/0, /*field_id=*/30));
1318-
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(map_node, /*index=*/1, /*field_id=*/31));
1317+
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(map_node, /*index=*/0, /*field_id=*/30,
1318+
/*key=*/"key-id"));
1319+
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(map_node, /*index=*/1, /*field_id=*/31,
1320+
/*key=*/"value-id"));
13191321
}
13201322

13211323
TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToComplexMap) {
@@ -1409,7 +1411,6 @@ TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToUnion) {
14091411
EXPECT_EQ(union_node->type(), ::avro::AVRO_UNION);
14101412
EXPECT_EQ(union_node->leaves(), 2);
14111413

1412-
// Check that the non-null branch has field ID applied
14131414
const auto& non_null_branch = union_node->leafAt(1);
14141415
EXPECT_EQ(non_null_branch->type(), ::avro::AVRO_STRING);
14151416
}
@@ -1434,79 +1435,6 @@ TEST_F(NameMappingAvroSchemaTest, MissingFieldIdError) {
14341435

14351436
auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
14361437
ASSERT_THAT(result, IsError(ErrorKind::kInvalidSchema));
1437-
ASSERT_THAT(result,
1438-
HasErrorMessage("Field ID is missing for field 'name' in nested mapping"));
1439-
}
1440-
1441-
TEST_F(NameMappingAvroSchemaTest, MissingFieldError) {
1442-
// Create a name mapping
1443-
auto name_mapping = CreateSimpleNameMapping();
1444-
1445-
// Create an Avro record schema with a field not in the mapping
1446-
std::string avro_schema_json = R"({
1447-
"type": "record",
1448-
"name": "test_record",
1449-
"fields": [
1450-
{"name": "id", "type": "int"},
1451-
{"name": "name", "type": "string"},
1452-
{"name": "unknown_field", "type": "int"}
1453-
]
1454-
})";
1455-
auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
1456-
1457-
auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
1458-
ASSERT_THAT(result, IsError(ErrorKind::kInvalidSchema));
1459-
ASSERT_THAT(result,
1460-
HasErrorMessage("Field 'unknown_field' not found in nested mapping"));
1461-
}
1462-
1463-
TEST_F(NameMappingAvroSchemaTest, MissingArrayElementError) {
1464-
// Create a name mapping without array element mapping
1465-
std::vector<MappedField> fields;
1466-
fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
1467-
auto name_mapping = NameMapping::Make(std::move(fields));
1468-
1469-
// Create an Avro array schema
1470-
std::string avro_schema_json = R"({
1471-
"type": "record",
1472-
"name": "test_record",
1473-
"fields": [
1474-
{"name": "id", "type": "int"},
1475-
{"name": "items", "type": {
1476-
"type": "array",
1477-
"items": "string"
1478-
}}
1479-
]
1480-
})";
1481-
auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
1482-
1483-
auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
1484-
ASSERT_THAT(result, IsError(ErrorKind::kInvalidSchema));
1485-
ASSERT_THAT(result, HasErrorMessage("Field 'items' not found in nested mapping"));
14861438
}
14871439

1488-
TEST_F(NameMappingAvroSchemaTest, MissingMapKeyValueError) {
1489-
// Create a name mapping without map key/value mapping
1490-
std::vector<MappedField> fields;
1491-
fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
1492-
auto name_mapping = NameMapping::Make(std::move(fields));
1493-
1494-
// Create an Avro map schema
1495-
std::string avro_schema_json = R"({
1496-
"type": "record",
1497-
"name": "test_record",
1498-
"fields": [
1499-
{"name": "id", "type": "int"},
1500-
{"name": "properties", "type": {
1501-
"type": "map",
1502-
"values": "string"
1503-
}}
1504-
]
1505-
})";
1506-
auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
1507-
1508-
auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
1509-
ASSERT_THAT(result, IsError(ErrorKind::kInvalidSchema));
1510-
ASSERT_THAT(result, HasErrorMessage("Field 'properties' not found in nested mapping"));
1511-
}
15121440
} // namespace iceberg::avro

0 commit comments

Comments
 (0)