2828#include < optional>
2929#include < ranges>
3030#include < string_view>
31- #include < iceberg/schema_field.h>
31+ #include < unordered_map>
32+
33+ #include < iceberg/type_fwd.h>
3234
3335#include " iceberg/exception.h"
3436#include " iceberg/util/formatter.h" // IWYU pragma: keep
3537
3638namespace iceberg {
39+ void NestedType::BuildNameToIndexMap (
40+ std::string_view current_path,
41+ std::unordered_map<std::string, size_t >& name_to_index_, int & index) const {
42+ for (const auto & field : fields ()) {
43+ std::string full_path = std::string (current_path) + " ." + std::string (field.name ());
44+ if (current_path == " " ) {
45+ full_path = std::string (field.name ());
46+ }
47+ if (field.type () && field.type ()->is_nested ()) {
48+ auto nested_type = std::dynamic_pointer_cast<NestedType>(field.type ());
49+ if (nested_type) {
50+ nested_type->BuildNameToIndexMap (full_path, name_to_index_, index);
51+ }
52+ }
53+ name_to_index_[full_path] = index++;
54+ }
55+ }
3756
38- StructType::StructType (std::vector<SchemaField> fields) : fields_(std::move(fields)) {
39- size_t index = 0 ;
40- for (const auto & field : fields_) {
41- auto [it, inserted] = field_id_to_index_.try_emplace (field.field_id (), index);
57+ void NestedType::BuildIdToIndexMap (std::unordered_map<int , size_t >& id_to_index,
58+ std::vector<SchemaField>& full_schemafield) const {
59+ for (const auto & field : fields ()) {
60+ if (field.type () && field.type ()->is_nested ()) {
61+ auto nested_type = std::dynamic_pointer_cast<NestedType>(field.type ());
62+ if (nested_type) {
63+ nested_type->BuildIdToIndexMap (id_to_index, full_schemafield);
64+ }
65+ }
66+ auto [it, inserted] =
67+ id_to_index.try_emplace (field.field_id (), full_schemafield.size ());
4268 if (!inserted) {
4369 throw IcebergError (
4470 std::format (" StructType: duplicate field ID {} (field indices {} and {})" ,
45- field.field_id (), it->second , index ));
71+ field.field_id (), it->second , full_schemafield. size () ));
4672 }
73+ full_schemafield.emplace_back (field);
74+ }
75+ }
76+
77+ void NestedType::BuildLowerCaseNameToIndexMap (
78+ std::string_view current_path,
79+ std::unordered_map<std::string, size_t >& lowercase_name_to_index, int & index) const {
80+ for (const auto & field : fields ()) {
81+ std::string full_path = std::string (current_path) + " ." + std::string (field.name ());
82+ if (current_path == " " ) {
83+ full_path = std::string (field.name ());
84+ }
85+ if (field.type () && field.type ()->is_nested ()) {
86+ auto nested_type = std::dynamic_pointer_cast<NestedType>(field.type ());
87+ if (nested_type) {
88+ nested_type->BuildLowerCaseNameToIndexMap (full_path, lowercase_name_to_index,
89+ index);
90+ }
91+ }
92+ lowercase_name_to_index[full_path] = index++;
93+ }
94+ }
4795
48- ++index;
96+ void NestedType::InitNameToIndexMap () const {
97+ if (init_name_to_index_) {
98+ return ;
4999 }
100+ int index = 0 ;
101+ BuildNameToIndexMap (" " , name_to_index_, index);
102+ init_name_to_index_ = true ;
103+ }
104+
105+ void NestedType::InitIdToIndexMap () const {
106+ if (init_id_to_index_) {
107+ return ;
108+ }
109+ BuildIdToIndexMap (field_id_to_index_, full_schemafield_);
110+ init_id_to_index_ = true ;
111+ }
112+
113+ void NestedType::InitLowerCaseNameToIndexMap () const {
114+ if (init_lowercase_name_to_index_) {
115+ return ;
116+ }
117+ int index = 0 ;
118+ BuildLowerCaseNameToIndexMap (" " , lowercase_name_to_index_, index);
119+ init_lowercase_name_to_index_ = true ;
120+ }
121+
122+ StructType::StructType (std::vector<SchemaField> fields) : fields_(std::move(fields)) {
123+ InitIdToIndexMap ();
50124}
51125
52126TypeId StructType::type_id () const { return kTypeId ; }
@@ -64,7 +138,7 @@ std::optional<std::reference_wrapper<const SchemaField>> StructType::GetFieldByI
64138 int32_t field_id) const {
65139 auto it = field_id_to_index_.find (field_id);
66140 if (it == field_id_to_index_.end ()) return std::nullopt ;
67- return fields_ [it->second ];
141+ return full_schemafield_ [it->second ];
68142}
69143std::optional<std::reference_wrapper<const SchemaField>> StructType::GetFieldByIndex (
70144 int32_t index) const {
@@ -74,22 +148,21 @@ std::optional<std::reference_wrapper<const SchemaField>> StructType::GetFieldByI
74148 return fields_[index];
75149}
76150std::optional<std::reference_wrapper<const SchemaField>> StructType::GetFieldByName (
77- std::string_view name) const {
151+ std::string_view name, bool case_sensitive ) const {
78152 // N.B. duplicate names are not permitted (looking at the Java
79153 // implementation) so there is nothing in particular we need to do here
80- InitNameToIdMap ();
81- auto it = field_name_to_index_.find (std::string (name));
82- if (it == field_name_to_index_.end ()) return std::nullopt ;
83- return fields_[it->second ];
84- }
85- std::optional<std::reference_wrapper<const SchemaField>> StructType::GetFieldByNameCaseInsensitive (
86- std::string_view name) const {
87- InitNameToIdMapCaseInsensitive ();
154+ if (case_sensitive) {
155+ InitNameToIndexMap ();
156+ auto it = name_to_index_.find (std::string (name));
157+ if (it == name_to_index_.end ()) return std::nullopt ;
158+ return full_schemafield_[it->second ];
159+ }
160+ InitLowerCaseNameToIndexMap ();
88161 std::string lower_name (name);
89162 std::ranges::transform (lower_name, lower_name.begin (), ::tolower);
90- auto it = caseinsensitive_field_name_to_index_ .find (lower_name);
91- if (it == caseinsensitive_field_name_to_index_ .end ()) return std::nullopt ;
92- return fields_ [it->second ];
163+ auto it = lowercase_name_to_index_ .find (lower_name);
164+ if (it == lowercase_name_to_index_ .end ()) return std::nullopt ;
165+ return full_schemafield_ [it->second ];
93166}
94167bool StructType::Equals (const Type& other) const {
95168 if (other.type_id () != TypeId::kStruct ) {
@@ -98,36 +171,19 @@ bool StructType::Equals(const Type& other) const {
98171 const auto & struct_ = static_cast <const StructType&>(other);
99172 return fields_ == struct_.fields_ ;
100173}
101- void StructType::InitNameToIdMap () const {
102- if (!field_name_to_index_.empty ()) {
103- return ;
104- }
105-
106- for (int i = 0 ; i < fields_.size (); i++) {
107- field_name_to_index_[std::string (fields_[i].name ())] = i;
108- }
109- }
110- void StructType::InitNameToIdMapCaseInsensitive () const {
111- if (!caseinsensitive_field_name_to_index_.empty ()) {
112- return ;
113- }
114-
115- for (int i = 0 ; i < fields_.size (); i++) {
116- std::string lowercase_name (fields_[i].name ());
117- std::ranges::transform (lowercase_name, lowercase_name.begin (), ::tolower);
118- caseinsensitive_field_name_to_index_[lowercase_name] = i;
119- }
120- }
121174
122175ListType::ListType (SchemaField element) : element_(std::move(element)) {
123176 if (element_.name () != kElementName ) {
124177 throw IcebergError (std::format (" ListType: child field name should be '{}', was '{}'" ,
125178 kElementName , element_.name ()));
126179 }
180+ InitIdToIndexMap ();
127181}
128182
129183ListType::ListType (int32_t field_id, std::shared_ptr<Type> type, bool optional)
130- : element_(field_id, std::string(kElementName ), std::move(type), optional) {}
184+ : element_(field_id, std::string(kElementName ), std::move(type), optional) {
185+ InitIdToIndexMap ();
186+ }
131187
132188TypeId ListType::type_id () const { return kTypeId ; }
133189std::string ListType::ToString () const {
@@ -141,10 +197,9 @@ std::string ListType::ToString() const {
141197std::span<const SchemaField> ListType::fields () const { return {&element_, 1 }; }
142198std::optional<std::reference_wrapper<const SchemaField>> ListType::GetFieldById (
143199 int32_t field_id) const {
144- if (field_id == element_.field_id ()) {
145- return std::cref (element_);
146- }
147- return std::nullopt ;
200+ auto it = field_id_to_index_.find (field_id);
201+ if (it == field_id_to_index_.end ()) return std::nullopt ;
202+ return full_schemafield_[it->second ];
148203}
149204std::optional<std::reference_wrapper<const SchemaField>> ListType::GetFieldByIndex (
150205 int index) const {
@@ -154,19 +209,20 @@ std::optional<std::reference_wrapper<const SchemaField>> ListType::GetFieldByInd
154209 return std::nullopt ;
155210}
156211std::optional<std::reference_wrapper<const SchemaField>> ListType::GetFieldByName (
157- std::string_view name) const {
158- if (name == element_.name ()) {
159- return std::cref (element_);
212+ std::string_view name, bool case_sensitive) const {
213+ if (case_sensitive) {
214+ InitNameToIndexMap ();
215+ if (name == element_.name ()) {
216+ return std::cref (element_);
217+ }
218+ return std::nullopt ;
160219 }
161- return std::nullopt ;
162- }
163- std::optional<std::reference_wrapper<const SchemaField>> ListType::GetFieldByNameCaseInsensitive (
164- std::string_view name) const {
220+ InitLowerCaseNameToIndexMap ();
165221 auto lower_name_view = name | std::views::transform (::tolower);
166222 auto lower_field_name = element_.name () | std::views::transform (::tolower);
167223 if (std::ranges::equal (lower_field_name, lower_name_view)) {
168224 return std::cref (element_);
169- }
225+ }
170226 return std::nullopt ;
171227}
172228bool ListType::Equals (const Type& other) const {
@@ -187,6 +243,7 @@ MapType::MapType(SchemaField key, SchemaField value)
187243 throw IcebergError (std::format (" MapType: value field name should be '{}', was '{}'" ,
188244 kValueName , this ->value ().name ()));
189245 }
246+ InitIdToIndexMap ();
190247}
191248
192249const SchemaField& MapType::key () const { return fields_[0 ]; }
@@ -204,12 +261,9 @@ std::string MapType::ToString() const {
204261std::span<const SchemaField> MapType::fields () const { return fields_; }
205262std::optional<std::reference_wrapper<const SchemaField>> MapType::GetFieldById (
206263 int32_t field_id) const {
207- if (field_id == key ().field_id ()) {
208- return key ();
209- } else if (field_id == value ().field_id ()) {
210- return value ();
211- }
212- return std::nullopt ;
264+ auto it = field_id_to_index_.find (field_id);
265+ if (it == field_id_to_index_.end ()) return std::nullopt ;
266+ return full_schemafield_[it->second ];
213267}
214268std::optional<std::reference_wrapper<const SchemaField>> MapType::GetFieldByIndex (
215269 int32_t index) const {
@@ -221,19 +275,20 @@ std::optional<std::reference_wrapper<const SchemaField>> MapType::GetFieldByInde
221275 return std::nullopt ;
222276}
223277std::optional<std::reference_wrapper<const SchemaField>> MapType::GetFieldByName (
224- std::string_view name) const {
225- if (name == kKeyName ) {
226- return key ();
227- } else if (name == kValueName ) {
228- return value ();
278+ std::string_view name, bool case_sensitive) const {
279+ if (case_sensitive) {
280+ InitNameToIndexMap ();
281+ if (name == kKeyName ) {
282+ return key ();
283+ } else if (name == kValueName ) {
284+ return value ();
285+ }
286+ return std::nullopt ;
229287 }
230- return std::nullopt ;
231- }
232- std::optional<std::reference_wrapper<const SchemaField>> MapType::GetFieldByNameCaseInsensitive (
233- std::string_view name) const {
234- auto lower_name_view = name | std::views::transform (::tolower);
235- auto lower_key_view = kKeyName | std::views::transform (tolower);
236- auto lower_value_view = kValueName | std::views::transform (tolower);
288+ InitLowerCaseNameToIndexMap ();
289+ auto lower_name_view = name | std::views::transform (::tolower);
290+ auto lower_key_view = kKeyName | std::views::transform (::tolower);
291+ auto lower_value_view = kValueName | std::views::transform (::tolower);
237292 if (std::ranges::equal (lower_key_view, lower_name_view)) {
238293 return key ();
239294 } else if (std::ranges::equal (lower_value_view, lower_name_view)) {
0 commit comments