diff --git a/src/iceberg/schema.cc b/src/iceberg/schema.cc index 1df20c60b..2ab2f7e70 100644 --- a/src/iceberg/schema.cc +++ b/src/iceberg/schema.cc @@ -22,6 +22,7 @@ #include #include +#include "iceberg/schema_internal.h" #include "iceberg/type.h" #include "iceberg/util/formatter.h" // IWYU pragma: keep #include "iceberg/util/macros.h" @@ -260,4 +261,148 @@ void NameToIdVisitor::Finish() { } } +/// \brief Visitor for pruning columns based on selected field IDs. +/// +/// This visitor traverses a schema and creates a projected version containing only +/// the specified fields. When `select_full_types` is true, a field with all its +/// sub-fields are selected if its field-id has been selected; otherwise, only leaf +/// fields of selected field-ids are selected. +/// +/// \note It returns an error when projection is not successful. +class PruneColumnVisitor { + public: + PruneColumnVisitor(const std::unordered_set& selected_ids, + bool select_full_types) + : selected_ids_(selected_ids), select_full_types_(select_full_types) {} + + Result> Visit(const std::shared_ptr& type) const { + switch (type->type_id()) { + case TypeId::kStruct: + return Visit(internal::checked_pointer_cast(type)); + case TypeId::kList: + return Visit(internal::checked_pointer_cast(type)); + case TypeId::kMap: + return Visit(internal::checked_pointer_cast(type)); + default: + return nullptr; + } + } + + Result> Visit(const SchemaField& field) const { + if (selected_ids_.contains(field.field_id())) { + return (select_full_types_ || field.type()->is_primitive()) ? field.type() + : Visit(field.type()); + } + return Visit(field.type()); + } + + static SchemaField MakeField(const SchemaField& field, std::shared_ptr type) { + return {field.field_id(), std::string(field.name()), std::move(type), + field.optional(), std::string(field.doc())}; + } + + Result> Visit(const std::shared_ptr& type) const { + bool same_types = true; + std::vector selected_fields; + for (const auto& field : type->fields()) { + ICEBERG_ASSIGN_OR_RAISE(auto child_type, Visit(field)); + if (child_type) { + same_types = same_types && (child_type == field.type()); + selected_fields.emplace_back(MakeField(field, std::move(child_type))); + } + } + + if (selected_fields.empty()) { + return nullptr; + } else if (same_types && selected_fields.size() == type->fields().size()) { + return type; + } + return std::make_shared(std::move(selected_fields)); + } + + Result> Visit(const std::shared_ptr& type) const { + const auto& elem_field = type->fields()[0]; + ICEBERG_ASSIGN_OR_RAISE(auto elem_type, Visit(elem_field)); + if (elem_type == nullptr) { + return nullptr; + } else if (elem_type == elem_field.type()) { + return type; + } + return std::make_shared(MakeField(elem_field, std::move(elem_type))); + } + + Result> Visit(const std::shared_ptr& type) const { + const auto& key_field = type->fields()[0]; + const auto& value_field = type->fields()[1]; + ICEBERG_ASSIGN_OR_RAISE(auto key_type, Visit(key_field)); + ICEBERG_ASSIGN_OR_RAISE(auto value_type, Visit(value_field)); + + if (key_type == nullptr && value_type == nullptr) { + return nullptr; + } else if (value_type == value_field.type() && + (key_type == key_field.type() || key_type == nullptr)) { + return type; + } else if (value_type == nullptr) { + return InvalidArgument("Cannot project Map without value field"); + } + return std::make_shared( + (key_type == nullptr ? key_field : MakeField(key_field, std::move(key_type))), + MakeField(value_field, std::move(value_type))); + } + + private: + const std::unordered_set& selected_ids_; + const bool select_full_types_; +}; + +Result> Schema::Select(std::span names, + bool case_sensitive) const { + const std::string kAllColumns = "*"; + if (std::ranges::find(names, kAllColumns) != names.end()) { + auto struct_type = ToStructType(*this); + return FromStructType(std::move(*struct_type), std::nullopt); + } + + std::unordered_set selected_ids; + for (const auto& name : names) { + ICEBERG_ASSIGN_OR_RAISE(auto result, FindFieldByName(name, case_sensitive)); + if (result.has_value()) { + selected_ids.insert(result.value().get().field_id()); + } + } + + PruneColumnVisitor visitor(selected_ids, /*select_full_types=*/true); + ICEBERG_ASSIGN_OR_RAISE( + auto pruned_type, visitor.Visit(std::shared_ptr(ToStructType(*this)))); + + if (!pruned_type) { + return std::make_unique(std::vector{}, std::nullopt); + } + + if (pruned_type->type_id() != TypeId::kStruct) { + return InvalidSchema("Projected type must be a struct type"); + } + + return FromStructType(std::move(internal::checked_cast(*pruned_type)), + std::nullopt); +} + +Result> Schema::Project( + const std::unordered_set& field_ids) const { + PruneColumnVisitor visitor(field_ids, /*select_full_types=*/false); + ICEBERG_ASSIGN_OR_RAISE( + auto project_type, visitor.Visit(std::shared_ptr(ToStructType(*this)))); + + if (!project_type) { + return std::make_unique(std::vector{}, std::nullopt); + } + + if (project_type->type_id() != TypeId::kStruct) { + return InvalidSchema("Projected type must be a struct type"); + } + + return FromStructType(std::move(internal::checked_cast(*project_type)), + std::nullopt); +} + } // namespace iceberg diff --git a/src/iceberg/schema.h b/src/iceberg/schema.h index 260d9d342..81f9aa394 100644 --- a/src/iceberg/schema.h +++ b/src/iceberg/schema.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "iceberg/iceberg_export.h" @@ -53,9 +54,9 @@ class ICEBERG_EXPORT Schema : public StructType { /// /// A schema is identified by a unique ID for the purposes of schema /// evolution. - [[nodiscard]] std::optional schema_id() const; + std::optional schema_id() const; - [[nodiscard]] std::string ToString() const override; + std::string ToString() const override; /// \brief Find the SchemaField by field name. /// @@ -66,18 +67,37 @@ class ICEBERG_EXPORT Schema : public StructType { /// canonical name 'm.value.x' /// FIXME: Currently only handles ASCII lowercase conversion; extend to support /// non-ASCII characters (e.g., using std::towlower or ICU) - [[nodiscard]] Result>> - FindFieldByName(std::string_view name, bool case_sensitive = true) const; + Result>> FindFieldByName( + std::string_view name, bool case_sensitive = true) const; /// \brief Find the SchemaField by field id. - [[nodiscard]] Result>> - FindFieldById(int32_t field_id) const; + Result>> FindFieldById( + int32_t field_id) const; + + /// \brief Creates a projected schema from selected field names. + /// + /// \param names Selected field names and nested names are dot-concatenated. + /// \param case_sensitive Whether name matching is case-sensitive (default: true). + /// \return Projected schema containing only selected fields. + /// \note If the field name of a nested type has been selected, all of its + /// sub-fields will be selected. + Result> Select(std::span names, + bool case_sensitive = true) const; + + /// \brief Creates a projected schema from selected field IDs. + /// + /// \param field_ids Set of field IDs to select + /// \return Projected schema containing only the specified fields. + /// \note Field ID of a nested field may not be projected unless at least + /// one of its sub-fields has been projected. + Result> Project( + const std::unordered_set& field_ids) const; friend bool operator==(const Schema& lhs, const Schema& rhs) { return lhs.Equals(rhs); } private: /// \brief Compare two schemas for equality. - [[nodiscard]] bool Equals(const Schema& other) const; + bool Equals(const Schema& other) const; Status InitIdToFieldMap() const; Status InitNameToIdMap() const; diff --git a/src/iceberg/schema_internal.cc b/src/iceberg/schema_internal.cc index beb973b28..e020a9b7b 100644 --- a/src/iceberg/schema_internal.cc +++ b/src/iceberg/schema_internal.cc @@ -325,4 +325,9 @@ Result> FromArrowSchema(const ArrowSchema& schema, return FromStructType(std::move(struct_type), schema_id); } +std::unique_ptr ToStructType(const Schema& schema) { + std::vector fields(schema.fields().begin(), schema.fields().end()); + return std::make_unique(std::move(fields)); +} + } // namespace iceberg diff --git a/src/iceberg/schema_internal.h b/src/iceberg/schema_internal.h index 8b290852a..5c7209d64 100644 --- a/src/iceberg/schema_internal.h +++ b/src/iceberg/schema_internal.h @@ -53,4 +53,6 @@ Result> FromArrowSchema(const ArrowSchema& schema, std::unique_ptr FromStructType(StructType&& struct_type, std::optional schema_id); +std::unique_ptr ToStructType(const Schema& schema); + } // namespace iceberg diff --git a/test/schema_test.cc b/test/schema_test.cc index b01ffe9ba..3d10fb824 100644 --- a/test/schema_test.cc +++ b/test/schema_test.cc @@ -26,10 +26,24 @@ #include #include +#include "gtest/gtest.h" +#include "iceberg/result.h" #include "iceberg/schema_field.h" #include "iceberg/util/formatter.h" // IWYU pragma: keep #include "matchers.h" +template +std::shared_ptr MakeStructType(Args&&... args) { + return std::make_shared( + std::vector{std::move(args)...}); +} + +template +std::unique_ptr MakeSchema(Args&&... args) { + return std::make_unique( + std::vector{std::move(args)...}, std::nullopt); +} + TEST(SchemaTest, Basics) { { iceberg::SchemaField field1(5, "foo", iceberg::int32(), true); @@ -492,7 +506,444 @@ TEST(SchemaTest, NestedDuplicateFieldIdError) { ::testing::HasSubstr("Duplicate field id found: 1")); } -// Thread safety tests for Lazy Init +namespace { + +iceberg::SchemaField Id() { return {1, "id", iceberg::int32(), true}; } +iceberg::SchemaField Name() { return {2, "name", iceberg::string(), false}; } +iceberg::SchemaField Age() { return {3, "age", iceberg::int32(), true}; } +iceberg::SchemaField Email() { return {4, "email", iceberg::string(), true}; } +iceberg::SchemaField Street() { return {11, "street", iceberg::string(), true}; } +iceberg::SchemaField City() { return {12, "city", iceberg::string(), true}; } +iceberg::SchemaField Zip() { return {13, "zip", iceberg::int32(), true}; } +iceberg::SchemaField Theme() { return {24, "theme", iceberg::string(), true}; } +iceberg::SchemaField Key() { return {31, "key", iceberg::int32(), false}; } +iceberg::SchemaField Value() { return {32, "value", iceberg::string(), false}; } +iceberg::SchemaField Element() { return {41, "element", iceberg::string(), false}; } + +static std::unique_ptr BasicSchema() { + return MakeSchema(Id(), Name(), Age(), Email()); +} + +static std::unique_ptr AddressSchema() { + auto address_type = MakeStructType(Street(), City(), Zip()); + auto address_field = iceberg::SchemaField{14, "address", std::move(address_type), true}; + return MakeSchema(Id(), Name(), std::move(address_field)); +} + +static std::unique_ptr NestedUserSchema() { + auto address_type = MakeStructType(Street(), City()); + auto address_field = iceberg::SchemaField{16, "address", std::move(address_type), true}; + auto user_type = MakeStructType(Name(), address_field); + auto user_field = iceberg::SchemaField{17, "user", std::move(user_type), true}; + return MakeSchema(Id(), user_field); +} + +static std::unique_ptr MultiLevelSchema() { + auto profile_type = MakeStructType(Name(), Age()); + auto profile_field = iceberg::SchemaField{23, "profile", std::move(profile_type), true}; + + auto settings_type = MakeStructType(Theme()); + auto settings_field = + iceberg::SchemaField{25, "settings", std::move(settings_type), true}; + + auto user_type = MakeStructType(profile_field, settings_field); + auto user_field = iceberg::SchemaField{26, "user", std::move(user_type), true}; + + return MakeSchema(Id(), user_field); +} + +static std::unique_ptr ListSchema() { + auto list_type = std::make_shared(Element()); + auto tags_field = iceberg::SchemaField{42, "tags", std::move(list_type), true}; + + auto user_type = MakeStructType(Name(), Age()); + auto user_field = iceberg::SchemaField{45, "user", std::move(user_type), true}; + + return MakeSchema(Id(), tags_field, user_field); +} + +static std::unique_ptr MapSchema() { + auto map_type = std::make_shared(Key(), Value()); + auto map_field = iceberg::SchemaField{33, "map_field", std::move(map_type), true}; + return MakeSchema(map_field); +} + +static std::unique_ptr ListWithStructElementSchema() { + auto struct_type = MakeStructType(Name(), Age()); + auto element_field = iceberg::SchemaField{53, "element", std::move(struct_type), false}; + auto list_type = std::make_shared(element_field); + auto list_field = iceberg::SchemaField{54, "list_field", std::move(list_type), true}; + return MakeSchema(list_field); +} + +static std::unique_ptr ListOfMapSchema() { + auto map_value_struct = MakeStructType(Name(), Age()); + auto map_value_field = + iceberg::SchemaField{64, "value", std::move(map_value_struct), false}; + auto map_type = std::make_shared(Key(), map_value_field); + auto list_element = iceberg::SchemaField{65, "element", std::move(map_type), false}; + auto list_type = std::make_shared(list_element); + auto list_field = iceberg::SchemaField{66, "list_field", std::move(list_type), true}; + return MakeSchema(list_field); +} + +static std::unique_ptr ComplexMapSchema() { + auto key_id_field = iceberg::SchemaField{71, "id", iceberg::int32(), false}; + auto key_name_field = iceberg::SchemaField{72, "name", iceberg::string(), false}; + auto key_struct = MakeStructType(key_id_field, key_name_field); + auto key_field = iceberg::SchemaField{73, "key", std::move(key_struct), false}; + + auto value_id_field = iceberg::SchemaField{74, "id", iceberg::int32(), false}; + auto value_name_field = iceberg::SchemaField{75, "name", iceberg::string(), false}; + auto value_struct = MakeStructType(value_id_field, value_name_field); + auto value_field = iceberg::SchemaField{76, "value", std::move(value_struct), false}; + + auto map_type = std::make_shared(key_field, value_field); + auto map_field = iceberg::SchemaField{77, "map_field", std::move(map_type), true}; + return MakeSchema(map_field); +} +} // namespace + +struct SelectTestParam { + std::string test_name; + std::function()> create_schema; + std::vector select_fields; + std::function()> expected_schema; + bool should_succeed; + std::string expected_error_message; + bool case_sensitive = true; +}; + +class SelectParamTest : public ::testing::TestWithParam {}; + +TEST_P(SelectParamTest, SelectFields) { + const auto& param = GetParam(); + auto input_schema = param.create_schema(); + auto result = input_schema->Select(param.select_fields, param.case_sensitive); + + if (param.should_succeed) { + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(*result.value(), *param.expected_schema()); + } else { + ASSERT_FALSE(result.has_value()); + ASSERT_THAT(result, iceberg::IsError(iceberg::ErrorKind::kInvalidArgument)); + ASSERT_THAT(result, iceberg::HasErrorMessage(param.expected_error_message)); + } +} + +INSTANTIATE_TEST_SUITE_P( + SelectTestCases, SelectParamTest, + ::testing::Values( + SelectTestParam{.test_name = "SelectAllColumns", + .create_schema = []() { return BasicSchema(); }, + .select_fields = {"*"}, + .expected_schema = []() { return BasicSchema(); }, + .should_succeed = true}, + + SelectTestParam{.test_name = "SelectSingleField", + .create_schema = []() { return BasicSchema(); }, + .select_fields = {"name"}, + .expected_schema = []() { return MakeSchema(Name()); }, + .should_succeed = true}, + + SelectTestParam{ + .test_name = "SelectMultipleFields", + .create_schema = []() { return BasicSchema(); }, + .select_fields = {"id", "name", "age"}, + .expected_schema = []() { return MakeSchema(Id(), Name(), Age()); }, + .should_succeed = true}, + + SelectTestParam{.test_name = "SelectNonExistentField", + .create_schema = []() { return BasicSchema(); }, + .select_fields = {"nonexistent"}, + .expected_schema = []() { return MakeSchema(); }, + .should_succeed = true}, + + SelectTestParam{.test_name = "SelectCaseSensitive", + .create_schema = []() { return BasicSchema(); }, + .select_fields = {"Name"}, // case-sensitive + .expected_schema = []() { return MakeSchema(); }, + .should_succeed = true}, + + SelectTestParam{.test_name = "SelectCaseInsensitive", + .create_schema = []() { return BasicSchema(); }, + .select_fields = {"Name"}, // case-insensitive + .expected_schema = []() { return MakeSchema(Name()); }, + .should_succeed = true, + .case_sensitive = false})); + +INSTANTIATE_TEST_SUITE_P( + SelectNestedTestCases, SelectParamTest, + ::testing::Values(SelectTestParam{ + .test_name = "SelectTopLevelFields", + .create_schema = []() { return AddressSchema(); }, + .select_fields = {"id", "name"}, + .expected_schema = []() { return MakeSchema(Id(), Name()); }, + .should_succeed = true}, + + SelectTestParam{.test_name = "SelectNestedField", + .create_schema = []() { return AddressSchema(); }, + .select_fields = {"address.street"}, + .expected_schema = + []() { + auto address_type = MakeStructType(Street()); + auto address_field = iceberg::SchemaField{ + 14, "address", std::move(address_type), + true}; + return MakeSchema(address_field); + }, + .should_succeed = true})); + +INSTANTIATE_TEST_SUITE_P( + SelectMultiLevelTestCases, SelectParamTest, + ::testing::Values( + SelectTestParam{.test_name = "SelectTopLevelAndNestedFields", + .create_schema = []() { return NestedUserSchema(); }, + .select_fields = {"id", "user.name", "user.address.street"}, + .expected_schema = + []() { + auto address_type = MakeStructType(Street()); + auto address_field = iceberg::SchemaField{ + 16, "address", std::move(address_type), true}; + auto user_type = MakeStructType(Name(), address_field); + auto user_field = iceberg::SchemaField{ + 17, "user", std::move(user_type), true}; + return MakeSchema(Id(), user_field); + }, + .should_succeed = true}, + + SelectTestParam{.test_name = "SelectNestedFieldsAtDifferentLevels", + .create_schema = []() { return MultiLevelSchema(); }, + .select_fields = {"user.profile.name", "user.settings.theme"}, + .expected_schema = + []() { + auto profile_type = MakeStructType(Name()); + auto profile_field = iceberg::SchemaField{ + 23, "profile", std::move(profile_type), true}; + + auto settings_type = MakeStructType(Theme()); + auto settings_field = iceberg::SchemaField{ + 25, "settings", std::move(settings_type), true}; + + auto user_type = + MakeStructType(profile_field, settings_field); + auto user_field = iceberg::SchemaField{ + 26, "user", std::move(user_type), true}; + return MakeSchema(user_field); + }, + .should_succeed = true}, + + SelectTestParam{.test_name = "SelectListAndNestedFields", + .create_schema = []() { return ListSchema(); }, + .select_fields = {"id", "user.name"}, + .expected_schema = + []() { + auto user_type = MakeStructType(Name()); + auto user_field = iceberg::SchemaField{ + 45, "user", std::move(user_type), true}; + return MakeSchema(Id(), user_field); + }, + .should_succeed = true})); + +struct ProjectTestParam { + std::string test_name; + std::function()> create_schema; + std::unordered_set selected_ids; + std::function()> expected_schema; + bool should_succeed; + std::string expected_error_message; +}; + +class ProjectParamTest : public ::testing::TestWithParam {}; + +TEST_P(ProjectParamTest, ProjectFields) { + const auto& param = GetParam(); + auto input_schema = param.create_schema(); + auto result = input_schema->Project(param.selected_ids); + + if (param.should_succeed) { + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(*result.value(), *param.expected_schema()); + } else { + ASSERT_FALSE(result.has_value()); + ASSERT_THAT(result, iceberg::IsError(iceberg::ErrorKind::kInvalidArgument)); + ASSERT_THAT(result, iceberg::HasErrorMessage(param.expected_error_message)); + } +} + +INSTANTIATE_TEST_SUITE_P( + ProjectTestCases, ProjectParamTest, + ::testing::Values(ProjectTestParam{.test_name = "ProjectAllFields", + .create_schema = []() { return BasicSchema(); }, + .selected_ids = {1, 2, 3, 4}, + .expected_schema = []() { return BasicSchema(); }, + .should_succeed = true}, + + ProjectTestParam{ + .test_name = "ProjectSingleField", + .create_schema = []() { return BasicSchema(); }, + .selected_ids = {2}, + .expected_schema = []() { return MakeSchema(Name()); }, + .should_succeed = true}, + + ProjectTestParam{.test_name = "ProjectNonExistentFieldId", + .create_schema = []() { return BasicSchema(); }, + .selected_ids = {999}, + .expected_schema = []() { return MakeSchema(); }, + .should_succeed = true}, + + ProjectTestParam{.test_name = "ProjectEmptySelection", + .create_schema = []() { return BasicSchema(); }, + .selected_ids = {}, + .expected_schema = []() { return MakeSchema(); }, + .should_succeed = true})); + +INSTANTIATE_TEST_SUITE_P(ProjectNestedTestCases, ProjectParamTest, + ::testing::Values(ProjectTestParam{ + .test_name = "ProjectNestedStructField", + .create_schema = []() { return AddressSchema(); }, + .selected_ids = {11}, + .expected_schema = + []() { + auto address_type = MakeStructType(Street()); + auto address_field = iceberg::SchemaField{ + 14, "address", std::move(address_type), true}; + return MakeSchema(address_field); + }, + .should_succeed = true})); + +INSTANTIATE_TEST_SUITE_P( + ProjectMultiLevelTestCases, ProjectParamTest, + ::testing::Values( + ProjectTestParam{.test_name = "ProjectTopLevelAndNestedFields", + .create_schema = []() { return NestedUserSchema(); }, + .selected_ids = {1, 2, 11}, + .expected_schema = + []() { + auto address_type = MakeStructType(Street()); + auto address_field = iceberg::SchemaField{ + 16, "address", std::move(address_type), true}; + auto user_type = MakeStructType(Name(), address_field); + auto user_field = iceberg::SchemaField{ + 17, "user", std::move(user_type), true}; + return MakeSchema(Id(), user_field); + }, + .should_succeed = true}, + + ProjectTestParam{.test_name = "ProjectNestedFieldsAtDifferentLevels", + .create_schema = []() { return MultiLevelSchema(); }, + .selected_ids = {2, 24}, + .expected_schema = + []() { + auto profile_type = MakeStructType(Name()); + auto profile_field = iceberg::SchemaField{ + 23, "profile", std::move(profile_type), true}; + + auto settings_type = MakeStructType(Theme()); + auto settings_field = iceberg::SchemaField{ + 25, "settings", std::move(settings_type), true}; + + auto user_type = + MakeStructType(profile_field, settings_field); + auto user_field = iceberg::SchemaField{ + 26, "user", std::move(user_type), true}; + return MakeSchema(user_field); + }, + .should_succeed = true}, + + ProjectTestParam{.test_name = "ProjectListAndNestedFields", + .create_schema = []() { return ListSchema(); }, + .selected_ids = {1, 2}, + .expected_schema = + []() { + auto user_type = MakeStructType(Name()); + auto user_field = iceberg::SchemaField{ + 45, "user", std::move(user_type), true}; + return MakeSchema(Id(), user_field); + }, + .should_succeed = true})); + +INSTANTIATE_TEST_SUITE_P( + ProjectMapErrorTestCases, ProjectParamTest, + ::testing::Values(ProjectTestParam{ + .test_name = "ProjectMapWithOnlyKey", + .create_schema = []() { return MapSchema(); }, + .selected_ids = {31}, // Only select key field, not value field + .expected_schema = []() { return nullptr; }, + .should_succeed = false, + .expected_error_message = "Cannot project Map without value field"})); + +INSTANTIATE_TEST_SUITE_P( + ProjectListAndMapTestCases, ProjectParamTest, + ::testing::Values( + ProjectTestParam{.test_name = "ProjectListElement", + .create_schema = []() { return ListWithStructElementSchema(); }, + .selected_ids = {2}, // Only select name field from list element + .expected_schema = + []() { + auto struct_type = MakeStructType(Name()); + auto element_field = iceberg::SchemaField{ + 53, "element", std::move(struct_type), false}; + auto list_type = + std::make_shared(element_field); + auto list_field = iceberg::SchemaField{ + 54, "list_field", std::move(list_type), true}; + return MakeSchema(list_field); + }, + .should_succeed = true}, + + ProjectTestParam{.test_name = "ProjectListOfMap", + .create_schema = []() { return ListOfMapSchema(); }, + .selected_ids = {2, 3}, + .expected_schema = + []() { + auto map_value_struct = MakeStructType(Name(), Age()); + auto map_value_field = iceberg::SchemaField{ + 64, "value", std::move(map_value_struct), false}; + auto map_type = std::make_shared( + Key(), map_value_field); + auto list_element = iceberg::SchemaField{ + 65, "element", std::move(map_type), false}; + auto list_type = + std::make_shared(list_element); + auto list_field = iceberg::SchemaField{ + 66, "list_field", std::move(list_type), true}; + return MakeSchema(list_field); + }, + .should_succeed = true}, + + ProjectTestParam{ + .test_name = "ProjectMapKeyAndValue", + .create_schema = []() { return ComplexMapSchema(); }, + .selected_ids = {71, 74}, + .expected_schema = + []() { + auto key_id_field = + iceberg::SchemaField{71, "id", iceberg::int32(), false}; + auto key_struct = MakeStructType(key_id_field); + auto key_field = + iceberg::SchemaField{73, "key", std::move(key_struct), false}; + + auto value_id_field = + iceberg::SchemaField{74, "id", iceberg::int32(), false}; + auto value_struct = MakeStructType(value_id_field); + auto value_field = + iceberg::SchemaField{76, "value", std::move(value_struct), false}; + + auto map_type = + std::make_shared(key_field, value_field); + auto map_field = + iceberg::SchemaField{77, "map_field", std::move(map_type), true}; + return MakeSchema(map_field); + }, + .should_succeed = true}, + + ProjectTestParam{.test_name = "ProjectEmptyResult", + .create_schema = []() { return BasicSchema(); }, + .selected_ids = {999}, // Select non-existent field + .expected_schema = []() { return MakeSchema(); }, + .should_succeed = true})); + class SchemaThreadSafetyTest : public ::testing::Test { protected: void SetUp() override {