Skip to content

Commit cda7ce2

Browse files
update code
1 parent a19781e commit cda7ce2

File tree

6 files changed

+66
-100
lines changed

6 files changed

+66
-100
lines changed

src/iceberg/avro/avro_reader.cc

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -106,13 +106,9 @@ class AvroReader::Impl {
106106
if (has_id_visitor.HasNoIds()) {
107107
// Apply field IDs based on name mapping if available
108108
if (options.name_mapping) {
109-
MappedField mapped_field;
110-
// Convert NameMapping to MappedFields for nested mapping
111-
mapped_field.nested_mapping =
112-
std::make_shared<MappedFields>(options.name_mapping->AsMappedFields());
113109
ICEBERG_ASSIGN_OR_RAISE(
114110
auto new_root_node,
115-
CreateAvroNodeWithFieldIds(file_schema.root(), mapped_field));
111+
CreateAvroNodeWithFieldIds(file_schema.root(), *options.name_mapping));
116112

117113
// Create a new schema with the updated root node
118114
auto new_schema = ::avro::ValidSchema(new_root_node);

src/iceberg/avro/avro_schema_util.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1006,4 +1006,11 @@ Result<::avro::NodePtr> CreateAvroNodeWithFieldIds(const ::avro::NodePtr& origin
10061006
}
10071007
}
10081008

1009+
Result<::avro::NodePtr> CreateAvroNodeWithFieldIds(const ::avro::NodePtr& original_node,
1010+
const NameMapping& mapping) {
1011+
MappedField mapped_field;
1012+
mapped_field.nested_mapping = std::make_shared<MappedFields>(mapping.AsMappedFields());
1013+
return CreateAvroNodeWithFieldIds(original_node, mapped_field);
1014+
}
1015+
10091016
} // namespace iceberg::avro

src/iceberg/avro/avro_schema_util_internal.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,4 +152,11 @@ bool HasMapLogicalType(const ::avro::NodePtr& node);
152152
Result<::avro::NodePtr> CreateAvroNodeWithFieldIds(const ::avro::NodePtr& original_node,
153153
const MappedField& mapped_field);
154154

155+
/// \brief Create a new Avro node with field IDs from name mapping.
156+
/// \param original_node The original Avro node to copy.
157+
/// \param mapping The name mapping to apply field IDs from.
158+
/// \return A new Avro node with field IDs applied, or an error.
159+
Result<::avro::NodePtr> CreateAvroNodeWithFieldIds(const ::avro::NodePtr& original_node,
160+
const NameMapping& mapping);
161+
155162
} // namespace iceberg::avro

test/CMakeLists.txt

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,11 @@ configure_file("${CMAKE_SOURCE_DIR}/test/test_config.h.in"
2929
"${CMAKE_BINARY_DIR}/iceberg/test/test_config.h")
3030

3131
add_executable(schema_test)
32+
target_include_directories(schema_test PRIVATE "${CMAKE_BINARY_DIR}")
3233
target_sources(schema_test
33-
PRIVATE schema_test.cc
34+
PRIVATE name_mapping_test.cc
35+
test_common.cc
36+
schema_test.cc
3437
schema_field_test.cc
3538
type_test.cc
3639
transform_test.cc
@@ -82,8 +85,7 @@ if(ICEBERG_BUILD_BUNDLE)
8285
avro_schema_test.cc
8386
avro_stream_test.cc
8487
manifest_list_reader_test.cc
85-
test_common.cc
86-
name_mapping_test.cc)
88+
test_common.cc)
8789
target_link_libraries(avro_test PRIVATE iceberg_bundle_static GTest::gtest_main
8890
GTest::gmock)
8991
target_include_directories(avro_test PRIVATE "${CMAKE_BINARY_DIR}")

test/avro_schema_test.cc

Lines changed: 46 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,7 @@
2323
#include <avro/NodeImpl.hh>
2424
#include <avro/Types.hh>
2525
#include <gtest/gtest.h>
26-
#include <nlohmann/json.hpp>
2726

28-
#include "iceberg/avro/avro_reader.h"
2927
#include "iceberg/avro/avro_schema_util_internal.h"
3028
#include "iceberg/json_internal.h"
3129
#include "iceberg/metadata_columns.h"
@@ -50,24 +48,6 @@ void CheckFieldIdAt(const ::avro::NodePtr& node, size_t index, int32_t field_id,
5048
ASSERT_EQ(attrs.getAttribute(key), std::make_optional(std::to_string(field_id)));
5149
}
5250

53-
// Helper function to create a test name mapping
54-
std::unique_ptr<NameMapping> CreateTestNameMapping() {
55-
std::vector<MappedField> fields;
56-
fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
57-
fields.emplace_back(MappedField{.names = {"name"}, .field_id = 2});
58-
59-
// Create nested mapping for the data field
60-
std::vector<MappedField> nested_fields;
61-
nested_fields.emplace_back(MappedField{.names = {"value"}, .field_id = 3});
62-
nested_fields.emplace_back(MappedField{.names = {"description"}, .field_id = 4});
63-
auto nested_mapping = MappedFields::Make(std::move(nested_fields));
64-
65-
fields.emplace_back(MappedField{
66-
.names = {"data"}, .field_id = 5, .nested_mapping = std::move(nested_mapping)});
67-
68-
return NameMapping::Make(std::move(fields));
69-
}
70-
7151
} // namespace
7252

7353
TEST(ToAvroNodeVisitorTest, BooleanType) {
@@ -1167,23 +1147,20 @@ TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToRecord) {
11671147
auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
11681148

11691149
auto name_mapping = CreateSimpleNameMapping();
1170-
MappedField mapped_field;
1171-
mapped_field.nested_mapping =
1172-
std::make_shared<MappedFields>(name_mapping->AsMappedFields());
11731150

1174-
auto result = CreateAvroNodeWithFieldIds(avro_schema.root(), mapped_field);
1151+
auto result = CreateAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
11751152
ASSERT_THAT(result, IsOk());
11761153

1177-
const auto& new_node = *result;
1178-
EXPECT_EQ(new_node->type(), ::avro::AVRO_RECORD);
1179-
EXPECT_EQ(new_node->names(), 3);
1180-
EXPECT_EQ(new_node->leaves(), 3);
1154+
const auto& node = *result;
1155+
EXPECT_EQ(node->type(), ::avro::AVRO_RECORD);
1156+
EXPECT_EQ(node->names(), 3);
1157+
EXPECT_EQ(node->leaves(), 3);
11811158

11821159
// Check that field IDs are properly applied
1183-
ASSERT_EQ(new_node->customAttributes(), 3);
1184-
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, 0, 1));
1185-
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, 1, 2));
1186-
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, 2, 3));
1160+
ASSERT_EQ(node->customAttributes(), 3);
1161+
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/0, /*field_id=*/1));
1162+
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/1, /*field_id=*/2));
1163+
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/2, /*field_id=*/3));
11871164
}
11881165

11891166
TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToNestedRecord) {
@@ -1207,34 +1184,31 @@ TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToNestedRecord) {
12071184
auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
12081185

12091186
auto name_mapping = CreateNestedNameMapping();
1210-
MappedField mapped_field;
1211-
mapped_field.nested_mapping =
1212-
std::make_shared<MappedFields>(name_mapping->AsMappedFields());
12131187

1214-
auto result = CreateAvroNodeWithFieldIds(avro_schema.root(), mapped_field);
1188+
auto result = CreateAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
12151189
ASSERT_THAT(result, IsOk());
12161190

1217-
const auto& new_node = *result;
1218-
EXPECT_EQ(new_node->type(), ::avro::AVRO_RECORD);
1219-
EXPECT_EQ(new_node->names(), 2);
1220-
EXPECT_EQ(new_node->leaves(), 2);
1191+
const auto& node = *result;
1192+
EXPECT_EQ(node->type(), ::avro::AVRO_RECORD);
1193+
EXPECT_EQ(node->names(), 2);
1194+
EXPECT_EQ(node->leaves(), 2);
12211195

12221196
// Check that field IDs are properly applied to top-level fields
1223-
ASSERT_EQ(new_node->customAttributes(), 2);
1224-
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, 0, 1));
1225-
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, 1, 2));
1197+
ASSERT_EQ(node->customAttributes(), 2);
1198+
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/0, /*field_id=*/1));
1199+
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/1, /*field_id=*/2));
12261200

12271201
// Check nested record
1228-
const auto& address_node = new_node->leafAt(1);
1202+
const auto& address_node = node->leafAt(1);
12291203
EXPECT_EQ(address_node->type(), ::avro::AVRO_RECORD);
12301204
EXPECT_EQ(address_node->names(), 3);
12311205
EXPECT_EQ(address_node->leaves(), 3);
12321206

12331207
// Check that field IDs are properly applied to nested fields
12341208
ASSERT_EQ(address_node->customAttributes(), 3);
1235-
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(address_node, 0, 10));
1236-
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(address_node, 1, 11));
1237-
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(address_node, 2, 12));
1209+
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(address_node, /*index=*/0, /*field_id=*/10));
1210+
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(address_node, /*index=*/1, /*field_id=*/11));
1211+
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(address_node, /*index=*/2, /*field_id=*/12));
12381212
}
12391213

12401214
TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToArray) {
@@ -1253,22 +1227,28 @@ TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToArray) {
12531227
auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
12541228

12551229
auto name_mapping = CreateArrayNameMapping();
1256-
MappedField mapped_field;
1257-
mapped_field.nested_mapping =
1258-
std::make_shared<MappedFields>(name_mapping->AsMappedFields());
12591230

1260-
auto result = CreateAvroNodeWithFieldIds(avro_schema.root(), mapped_field);
1231+
auto result = CreateAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
12611232
ASSERT_THAT(result, IsOk());
12621233

12631234
const auto& new_node = *result;
12641235
EXPECT_EQ(new_node->type(), ::avro::AVRO_RECORD);
12651236
EXPECT_EQ(new_node->names(), 2);
12661237
EXPECT_EQ(new_node->leaves(), 2);
12671238

1268-
// Check array field structure only - don't access any attributes
1239+
// Check that field IDs are properly applied to top-level fields
1240+
ASSERT_EQ(new_node->customAttributes(), 2);
1241+
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/0, /*field_id=*/1));
1242+
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/1, /*field_id=*/2));
1243+
1244+
// Check array field structure and element field ID
12691245
const auto& array_node = new_node->leafAt(1);
12701246
EXPECT_EQ(array_node->type(), ::avro::AVRO_ARRAY);
12711247
EXPECT_EQ(array_node->leaves(), 1);
1248+
1249+
// Check that array element has field ID applied
1250+
const auto& element_node = array_node->leafAt(0);
1251+
EXPECT_EQ(element_node->type(), ::avro::AVRO_STRING);
12721252
}
12731253

12741254
TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToMap) {
@@ -1287,19 +1267,21 @@ TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToMap) {
12871267
auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
12881268

12891269
auto name_mapping = CreateMapNameMapping();
1290-
MappedField mapped_field;
1291-
mapped_field.nested_mapping =
1292-
std::make_shared<MappedFields>(name_mapping->AsMappedFields());
12931270

1294-
auto result = CreateAvroNodeWithFieldIds(avro_schema.root(), mapped_field);
1271+
auto result = CreateAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
12951272
ASSERT_THAT(result, IsOk());
12961273

12971274
const auto& new_node = *result;
12981275
EXPECT_EQ(new_node->type(), ::avro::AVRO_RECORD);
12991276
EXPECT_EQ(new_node->names(), 2);
13001277
EXPECT_EQ(new_node->leaves(), 2);
13011278

1302-
// Check map field structure only - don't access any attributes
1279+
// Check that field IDs are properly applied to top-level fields
1280+
ASSERT_EQ(new_node->customAttributes(), 2);
1281+
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/0, /*field_id=*/1));
1282+
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/1, /*field_id=*/2));
1283+
1284+
// Check map field structure and key-value field IDs
13031285
const auto& map_node = new_node->leafAt(1);
13041286
EXPECT_EQ(map_node->type(), ::avro::AVRO_MAP);
13051287
ASSERT_GE(map_node->leaves(), 2);
@@ -1320,11 +1302,8 @@ TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToUnion) {
13201302
auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
13211303

13221304
auto name_mapping = CreateUnionNameMapping();
1323-
MappedField mapped_field;
1324-
mapped_field.nested_mapping =
1325-
std::make_shared<MappedFields>(name_mapping->AsMappedFields());
13261305

1327-
auto result = CreateAvroNodeWithFieldIds(avro_schema.root(), mapped_field);
1306+
auto result = CreateAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
13281307
ASSERT_THAT(result, IsOk());
13291308

13301309
const auto& new_node = *result;
@@ -1334,8 +1313,8 @@ TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToUnion) {
13341313

13351314
// Check that field IDs are properly applied to top-level fields
13361315
ASSERT_EQ(new_node->customAttributes(), 2);
1337-
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, 0, 1));
1338-
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, 1, 2));
1316+
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/0, /*field_id=*/1));
1317+
ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/1, /*field_id=*/2));
13391318

13401319
// Check union field
13411320
const auto& union_node = new_node->leafAt(1);
@@ -1365,11 +1344,7 @@ TEST_F(NameMappingAvroSchemaTest, MissingFieldIdError) {
13651344
})";
13661345
auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
13671346

1368-
MappedField mapped_field;
1369-
mapped_field.nested_mapping =
1370-
std::make_shared<MappedFields>(name_mapping->AsMappedFields());
1371-
1372-
auto result = CreateAvroNodeWithFieldIds(avro_schema.root(), mapped_field);
1347+
auto result = CreateAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
13731348
ASSERT_THAT(result, IsError(ErrorKind::kInvalidSchema));
13741349
ASSERT_THAT(result,
13751350
HasErrorMessage("Field ID is missing for field 'name' in nested mapping"));
@@ -1391,11 +1366,7 @@ TEST_F(NameMappingAvroSchemaTest, MissingFieldError) {
13911366
})";
13921367
auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
13931368

1394-
MappedField mapped_field;
1395-
mapped_field.nested_mapping =
1396-
std::make_shared<MappedFields>(name_mapping->AsMappedFields());
1397-
1398-
auto result = CreateAvroNodeWithFieldIds(avro_schema.root(), mapped_field);
1369+
auto result = CreateAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
13991370
ASSERT_THAT(result, IsError(ErrorKind::kInvalidSchema));
14001371
ASSERT_THAT(result,
14011372
HasErrorMessage("Field 'unknown_field' not found in nested mapping"));
@@ -1421,11 +1392,7 @@ TEST_F(NameMappingAvroSchemaTest, MissingArrayElementError) {
14211392
})";
14221393
auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
14231394

1424-
MappedField mapped_field;
1425-
mapped_field.nested_mapping =
1426-
std::make_shared<MappedFields>(name_mapping->AsMappedFields());
1427-
1428-
auto result = CreateAvroNodeWithFieldIds(avro_schema.root(), mapped_field);
1395+
auto result = CreateAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
14291396
ASSERT_THAT(result, IsError(ErrorKind::kInvalidSchema));
14301397
ASSERT_THAT(result, HasErrorMessage("Field 'items' not found in nested mapping"));
14311398
}
@@ -1450,11 +1417,7 @@ TEST_F(NameMappingAvroSchemaTest, MissingMapKeyValueError) {
14501417
})";
14511418
auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
14521419

1453-
MappedField mapped_field;
1454-
mapped_field.nested_mapping =
1455-
std::make_shared<MappedFields>(name_mapping->AsMappedFields());
1456-
1457-
auto result = CreateAvroNodeWithFieldIds(avro_schema.root(), mapped_field);
1420+
auto result = CreateAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
14581421
ASSERT_THAT(result, IsError(ErrorKind::kInvalidSchema));
14591422
ASSERT_THAT(result, HasErrorMessage("Field 'properties' not found in nested mapping"));
14601423
}

test/name_mapping_test.cc

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,9 @@
2424
#include <unordered_set>
2525
#include <vector>
2626

27-
#include <avro/Compiler.hh>
28-
#include <avro/NodeImpl.hh>
29-
#include <avro/Types.hh>
3027
#include <gmock/gmock.h>
3128
#include <gtest/gtest.h>
3229

33-
#include "iceberg/avro/avro_reader.h"
34-
#include "iceberg/avro/avro_schema_util_internal.h"
35-
#include "iceberg/file_reader.h"
36-
#include "iceberg/json_internal.h"
37-
#include "matchers.h"
38-
3930
namespace iceberg {
4031

4132
class NameMappingTest : public ::testing::Test {

0 commit comments

Comments
 (0)