@@ -47,8 +47,82 @@ void CheckFieldIdAt(const ::avro::NodePtr& node, size_t index, int32_t field_id,
4747 ASSERT_EQ (attrs.getAttribute (key), std::make_optional (std::to_string (field_id)));
4848}
4949
50+ // Helper function to check if a custom attribute exists for a field name preservation
51+ void CheckIcebergFieldName (const ::avro::NodePtr& node, size_t index,
52+ const std::string& original_name) {
53+ ASSERT_LT (index, node->customAttributes ());
54+ const auto & attrs = node->customAttributesAt (index);
55+ ASSERT_EQ (attrs.getAttribute (" iceberg-field-name" ), std::make_optional (original_name));
56+ }
57+
5058} // namespace
5159
60+ TEST (ValidAvroNameTest, ValidNames) {
61+ // Valid field names should return true
62+ EXPECT_TRUE (ValidAvroName (" valid_field" ));
63+ EXPECT_TRUE (ValidAvroName (" field123" ));
64+ EXPECT_TRUE (ValidAvroName (" _private" ));
65+ EXPECT_TRUE (ValidAvroName (" CamelCase" ));
66+ EXPECT_TRUE (ValidAvroName (" field_with_underscores" ));
67+ }
68+
69+ TEST (ValidAvroNameTest, InvalidNames) {
70+ // Names starting with numbers should return false
71+ EXPECT_FALSE (ValidAvroName (" 123field" ));
72+ EXPECT_FALSE (ValidAvroName (" 0value" ));
73+
74+ // Names with special characters should return false
75+ EXPECT_FALSE (ValidAvroName (" field-name" ));
76+ EXPECT_FALSE (ValidAvroName (" field.name" ));
77+ EXPECT_FALSE (ValidAvroName (" field name" ));
78+ EXPECT_FALSE (ValidAvroName (" field@name" ));
79+ EXPECT_FALSE (ValidAvroName (" field#name" ));
80+ }
81+
82+ TEST (ValidAvroNameTest, EmptyName) {
83+ // Empty name should return false
84+ EXPECT_FALSE (ValidAvroName (" " ));
85+ }
86+
87+ TEST (SanitizeFieldNameTest, ValidFieldNames) {
88+ // Valid field names should remain unchanged
89+ EXPECT_EQ (SanitizeFieldName (" valid_field" ), " valid_field" );
90+ EXPECT_EQ (SanitizeFieldName (" field123" ), " field123" );
91+ EXPECT_EQ (SanitizeFieldName (" _private" ), " _private" );
92+ EXPECT_EQ (SanitizeFieldName (" CamelCase" ), " CamelCase" );
93+ EXPECT_EQ (SanitizeFieldName (" field_with_underscores" ), " field_with_underscores" );
94+ }
95+
96+ TEST (SanitizeFieldNameTest, InvalidFieldNames) {
97+ // Field names starting with numbers should be prefixed with underscore
98+ EXPECT_EQ (SanitizeFieldName (" 123field" ), " _123field" );
99+ EXPECT_EQ (SanitizeFieldName (" 0value" ), " _0value" );
100+
101+ // Field names with special characters should be encoded with hex values
102+ EXPECT_EQ (SanitizeFieldName (" field-name" ), " field_x2Dname" );
103+ EXPECT_EQ (SanitizeFieldName (" field.name" ), " field_x2Ename" );
104+ EXPECT_EQ (SanitizeFieldName (" field name" ), " field_x20name" );
105+ EXPECT_EQ (SanitizeFieldName (" field@name" ), " field_x40name" );
106+ EXPECT_EQ (SanitizeFieldName (" field#name" ), " field_x23name" );
107+
108+ // Complex field names with multiple issues
109+ EXPECT_EQ (SanitizeFieldName (" 1field-with.special@chars" ),
110+ " _1field_x2Dwith_x2Especial_x40chars" );
111+ EXPECT_EQ (SanitizeFieldName (" user-email" ), " user_x2Demail" );
112+ }
113+
114+ TEST (SanitizeFieldNameTest, EdgeCases) {
115+ // Empty field name
116+ EXPECT_EQ (SanitizeFieldName (" " ), " " );
117+
118+ // Field name with only special characters
119+ EXPECT_EQ (SanitizeFieldName (" @#$" ), " _x40_x23_x24" );
120+
121+ // Field name starting with special character
122+ EXPECT_EQ (SanitizeFieldName (" -field" ), " _x2Dfield" );
123+ EXPECT_EQ (SanitizeFieldName (" .field" ), " _x2Efield" );
124+ }
125+
52126TEST (ToAvroNodeVisitorTest, BooleanType) {
53127 ::avro::NodePtr node;
54128 EXPECT_THAT (ToAvroNodeVisitor{}.Visit (BooleanType{}, &node), IsOk ());
@@ -181,6 +255,60 @@ TEST(ToAvroNodeVisitorTest, StructType) {
181255 EXPECT_EQ (node->leafAt (1 )->leafAt (1 )->type (), ::avro::AVRO_INT);
182256}
183257
258+ TEST (ToAvroNodeVisitorTest, StructTypeWithFieldNames) {
259+ StructType struct_type{
260+ {SchemaField{/* field_id=*/ 1 , " user-name" , iceberg::string (),
261+ /* optional=*/ false },
262+ SchemaField{/* field_id=*/ 2 , " valid_field" , iceberg::string (),
263+ /* optional=*/ false },
264+ SchemaField{/* field_id=*/ 3 , " email.address" , iceberg::string (),
265+ /* optional=*/ true },
266+ SchemaField{/* field_id=*/ 4 , " AnotherField" , iceberg::int32 (),
267+ /* optional=*/ true },
268+ SchemaField{/* field_id=*/ 5 , " 123field" , iceberg::int32 (),
269+ /* optional=*/ false },
270+ SchemaField{/* field_id=*/ 6 , " field with spaces" , iceberg::boolean (),
271+ /* optional=*/ true }}};
272+ ::avro::NodePtr node;
273+ EXPECT_THAT (ToAvroNodeVisitor{}.Visit (struct_type, &node), IsOk ());
274+ EXPECT_EQ (node->type (), ::avro::AVRO_RECORD);
275+
276+ ASSERT_EQ (node->names (), 6 );
277+
278+ EXPECT_EQ (node->nameAt (0 ), " user_x2Dname" ); // "user-name" -> "user_x2Dname"
279+ EXPECT_EQ (node->nameAt (2 ),
280+ " email_x2Eaddress" ); // "email.address" -> "email_x2Eaddress"
281+ EXPECT_EQ (node->nameAt (4 ), " _123field" ); // "123field" -> "_123field"
282+ EXPECT_EQ (
283+ node->nameAt (5 ),
284+ " field_x20with_x20spaces" ); // "field with spaces" -> "field_x20with_x20spaces"
285+
286+ EXPECT_EQ (node->nameAt (1 ), " valid_field" );
287+ EXPECT_EQ (node->nameAt (3 ), " AnotherField" );
288+
289+ ASSERT_EQ (node->customAttributes (), 6 );
290+ ASSERT_NO_FATAL_FAILURE (CheckFieldIdAt (node, /* index=*/ 0 , /* field_id=*/ 1 ));
291+ ASSERT_NO_FATAL_FAILURE (CheckFieldIdAt (node, /* index=*/ 1 , /* field_id=*/ 2 ));
292+ ASSERT_NO_FATAL_FAILURE (CheckFieldIdAt (node, /* index=*/ 2 , /* field_id=*/ 3 ));
293+ ASSERT_NO_FATAL_FAILURE (CheckFieldIdAt (node, /* index=*/ 3 , /* field_id=*/ 4 ));
294+ ASSERT_NO_FATAL_FAILURE (CheckFieldIdAt (node, /* index=*/ 4 , /* field_id=*/ 5 ));
295+ ASSERT_NO_FATAL_FAILURE (CheckFieldIdAt (node, /* index=*/ 5 , /* field_id=*/ 6 ));
296+
297+ const auto & attrs1 = node->customAttributesAt (1 ); // valid_field
298+ const auto & attrs3 = node->customAttributesAt (3 ); // AnotherField
299+ EXPECT_FALSE (attrs1.getAttribute (" iceberg-field-name" ).has_value ());
300+ EXPECT_FALSE (attrs3.getAttribute (" iceberg-field-name" ).has_value ());
301+
302+ ASSERT_NO_FATAL_FAILURE (
303+ CheckIcebergFieldName (node, /* index=*/ 0 , /* original_name=*/ " user-name" ));
304+ ASSERT_NO_FATAL_FAILURE (
305+ CheckIcebergFieldName (node, /* index=*/ 2 , /* original_name=*/ " email.address" ));
306+ ASSERT_NO_FATAL_FAILURE (
307+ CheckIcebergFieldName (node, /* index=*/ 4 , /* original_name=*/ " 123field" ));
308+ ASSERT_NO_FATAL_FAILURE (
309+ CheckIcebergFieldName (node, /* index=*/ 5 , /* original_name=*/ " field with spaces" ));
310+ }
311+
184312TEST (ToAvroNodeVisitorTest, ListType) {
185313 ListType list_type{SchemaField{/* field_id=*/ 5 , " element" , iceberg::string (),
186314 /* optional=*/ true }};
@@ -1436,5 +1564,4 @@ TEST_F(NameMappingAvroSchemaTest, MissingFieldIdError) {
14361564 auto result = MakeAvroNodeWithFieldIds (avro_schema.root (), *name_mapping);
14371565 ASSERT_THAT (result, IsError (ErrorKind::kInvalidSchema ));
14381566}
1439-
14401567} // namespace iceberg::avro
0 commit comments