1919
2020#include " iceberg/schema.h"
2121
22+ #include < algorithm>
2223#include < format>
2324
2425#include " iceberg/type.h"
2526#include " iceberg/util/formatter.h" // IWYU pragma: keep
27+ #include " iceberg/util/macros.h"
2628namespace iceberg {
2729class IdVisitor {
2830 public:
29- explicit IdVisitor (bool has_init_ = false );
31+ explicit IdVisitor (
32+ std::unordered_map<int32_t , std::reference_wrapper<const SchemaField>>&
33+ id_to_field);
3034 Status Visit (const Type& type);
35+ Status VisitNestedType (const Type& type);
3136
32- bool has_init;
33- int index = 0 ;
34- std::unordered_map<int , size_t > id_to_index;
35- std::vector<std::reference_wrapper<const SchemaField>> full_schemafield;
37+ private:
38+ std::unordered_map<int32_t , std::reference_wrapper<const SchemaField>>& id_to_field_;
3639};
37-
38- std::string GetPath (const std::string& last_path, const std::string& field_name,
39- bool case_sensitive) {
40- if (case_sensitive) {
41- return last_path.empty () ? field_name : last_path + " ." + field_name;
42- }
43- std::string lower_name (field_name);
44- std::ranges::transform (lower_name, lower_name.begin (), ::tolower);
45- return last_path.empty () ? lower_name : last_path + " ." + lower_name;
46- }
47- class NameVisitor {
40+ class NametoIdVisitor {
4841 public:
49- explicit NameVisitor (bool case_sensitive_ = true , bool has_init_ = false );
42+ explicit NametoIdVisitor (std::unordered_map<std::string, size_t >& name_to_id,
43+ bool case_sensitive_ = true );
5044 Status Visit (const ListType& type, const std::string& path,
5145 const std::string& short_path);
5246 Status Visit (const MapType& type, const std::string& path,
@@ -55,12 +49,12 @@ class NameVisitor {
5549 const std::string& short_path);
5650 Status Visit (const PrimitiveType& type, const std::string& path,
5751 const std::string& short_path);
52+ static std::string GetPath (const std::string& last_path, const std::string& field_name,
53+ bool case_sensitive = true );
5854
59- int index = 0 ;
60- bool case_sensitive;
61- bool has_init;
62- std::unordered_map<std::string, size_t > name_to_index;
63- std::vector<std::reference_wrapper<const SchemaField>> full_schemafield;
55+ private:
56+ bool case_sensitive_;
57+ std::unordered_map<std::string, size_t >& name_to_id_;
6458};
6559Schema::Schema (std::vector<SchemaField> fields, std::optional<int32_t > schema_id)
6660 : StructType(std::move(fields)), schema_id_(schema_id) {}
@@ -84,16 +78,16 @@ Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFie
8478 std::string_view name, bool case_sensitive) const {
8579 if (case_sensitive) {
8680 ICEBERG_RETURN_UNEXPECTED (InitNameToIndexMap ());
87- auto it = name_to_index_ .find (std::string (name));
88- if (it == name_to_index_ .end ()) return std::nullopt ;
89- return full_schemafield_[ it->second ] ;
81+ auto it = name_to_id_ .find (std::string (name));
82+ if (it == name_to_id_ .end ()) return std::nullopt ;
83+ return FindFieldById ( it->second ) ;
9084 }
9185 ICEBERG_RETURN_UNEXPECTED (InitLowerCaseNameToIndexMap ());
9286 std::string lower_name (name);
9387 std::ranges::transform (lower_name, lower_name.begin (), ::tolower);
94- auto it = lowercase_name_to_index_ .find (lower_name);
95- if (it == lowercase_name_to_index_ .end ()) return std::nullopt ;
96- return full_schemafield_[ it->second ] ;
88+ auto it = lowercase_name_to_id_ .find (lower_name);
89+ if (it == lowercase_name_to_id_ .end ()) return std::nullopt ;
90+ return FindFieldById ( it->second ) ;
9791}
9892
9993Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFieldByName (
@@ -102,150 +96,134 @@ Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFie
10296}
10397
10498Result<Status> Schema::InitIdToIndexMap () const {
105- if (!id_to_index_ .empty ()) {
99+ if (!id_to_field_ .empty ()) {
106100 return {};
107101 }
108- bool has_init = !full_schemafield_.empty ();
109- IdVisitor visitor (has_init);
102+ IdVisitor visitor (id_to_field_);
110103 ICEBERG_RETURN_UNEXPECTED (VisitTypeInline (*this , &visitor));
111- id_to_index_ = std::move (visitor.id_to_index );
112- if (!has_init) {
113- full_schemafield_ = std::move (visitor.full_schemafield );
114- }
115104 return {};
116105}
117106
118107Result<Status> Schema::InitNameToIndexMap () const {
119- if (!name_to_index_ .empty ()) {
108+ if (!name_to_id_ .empty ()) {
120109 return {};
121110 }
122- bool has_init = !full_schemafield_.empty ();
123111 std::string path, short_path;
124- NameVisitor visitor (true , has_init );
112+ NametoIdVisitor visitor (name_to_id_, true );
125113 ICEBERG_RETURN_UNEXPECTED (VisitTypeInline (*this , &visitor, path, short_path));
126- name_to_index_ = std::move (visitor.name_to_index );
127- if (!has_init) {
128- full_schemafield_ = std::move (visitor.full_schemafield );
129- }
130114 return {};
131115}
132116
133117Result<Status> Schema::InitLowerCaseNameToIndexMap () const {
134- if (!lowercase_name_to_index_ .empty ()) {
118+ if (!lowercase_name_to_id_ .empty ()) {
135119 return {};
136120 }
137- bool has_init = !full_schemafield_.empty ();
138121 std::string path, short_path;
139- NameVisitor visitor (false , has_init );
122+ NametoIdVisitor visitor (lowercase_name_to_id_, false );
140123 ICEBERG_RETURN_UNEXPECTED (VisitTypeInline (*this , &visitor, path, short_path));
141- lowercase_name_to_index_ = std::move (visitor.name_to_index );
142- if (!has_init) {
143- full_schemafield_ = std::move (visitor.full_schemafield );
144- }
145124 return {};
146125}
147126
148127Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFieldById (
149128 int32_t field_id) const {
150129 ICEBERG_RETURN_UNEXPECTED (InitIdToIndexMap ());
151- auto it = id_to_index_ .find (field_id);
152- if (it == id_to_index_ .end ()) {
130+ auto it = id_to_field_ .find (field_id);
131+ if (it == id_to_field_ .end ()) {
153132 return std::nullopt ;
154133 }
155- return full_schemafield_[ it->second ] ;
134+ return it->second ;
156135}
157136
158- IdVisitor::IdVisitor (bool has_init_) : has_init(has_init_) {}
137+ IdVisitor::IdVisitor (
138+ std::unordered_map<int32_t , std::reference_wrapper<const SchemaField>>& id_to_field)
139+ : id_to_field_(id_to_field) {}
159140
160141Status IdVisitor::Visit (const Type& type) {
142+ if (type.is_nested ()) {
143+ ICEBERG_RETURN_UNEXPECTED (VisitNestedType (type));
144+ }
145+ return {};
146+ }
147+
148+ Status IdVisitor::VisitNestedType (const Type& type) {
161149 const auto & nested = iceberg::internal::checked_cast<const NestedType&>(type);
162150 const auto & fields = nested.fields ();
163151 for (const auto & field : fields) {
164- id_to_index[field.field_id ()] = index++;
165- if (!has_init) {
166- full_schemafield.emplace_back (field);
167- }
168- if (field.type ()->is_nested ()) {
169- ICEBERG_RETURN_UNEXPECTED (Visit (*field.type ()));
170- }
152+ id_to_field_.emplace (field.field_id (), std::cref (field));
153+ ICEBERG_RETURN_UNEXPECTED (Visit (*field.type ()));
171154 }
172155 return {};
173156}
174157
175- NameVisitor::NameVisitor (bool case_sensitive_, bool has_init_)
176- : case_sensitive(case_sensitive_), has_init(has_init_) {}
158+ NametoIdVisitor::NametoIdVisitor (std::unordered_map<std::string, size_t >& name_to_id,
159+ bool case_sensitive)
160+ : name_to_id_(name_to_id), case_sensitive_(case_sensitive) {}
177161
178- Status NameVisitor ::Visit (const ListType& type, const std::string& path,
179- const std::string& short_path) {
162+ Status NametoIdVisitor ::Visit (const ListType& type, const std::string& path,
163+ const std::string& short_path) {
180164 const auto & field = type.fields ()[0 ];
181- std::string full_path =
182- iceberg::GetPath (path, std::string (field.name ()), case_sensitive);
165+ std::string full_path = GetPath (path, std::string (field.name ()), case_sensitive_);
183166 std::string short_full_path;
184167 if (field.type ()->type_id () == TypeId::kStruct ) {
185168 short_full_path = short_path;
186169 } else {
187- short_full_path =
188- iceberg::GetPath (short_path, std::string (field.name ()), case_sensitive);
189- }
190- name_to_index[full_path] = index++;
191- if (!has_init) {
192- full_schemafield.emplace_back (field);
193- }
194- name_to_index.emplace (short_full_path, index - 1 );
195- if (field.type ()->is_nested ()) {
196- ICEBERG_RETURN_UNEXPECTED (
197- VisitTypeInline (*field.type (), this , full_path, short_full_path));
170+ short_full_path = GetPath (short_path, std::string (field.name ()), case_sensitive_);
198171 }
172+ name_to_id_[full_path] = field.field_id ();
173+ name_to_id_.emplace (short_full_path, field.field_id ());
174+ ICEBERG_RETURN_UNEXPECTED (
175+ VisitTypeInline (*field.type (), this , full_path, short_full_path));
199176 return {};
200177}
201178
202- Status NameVisitor ::Visit (const MapType& type, const std::string& path,
203- const std::string& short_path) {
179+ Status NametoIdVisitor ::Visit (const MapType& type, const std::string& path,
180+ const std::string& short_path) {
204181 std::string full_path, short_full_path;
205- for (const auto & field : type.fields ()) {
206- full_path = iceberg::GetPath (path, std::string (field.name ()), case_sensitive);
182+ const auto & fields = type.fields ();
183+ for (const auto & field : fields) {
184+ full_path = GetPath (path, std::string (field.name ()), case_sensitive_);
207185 if (field.name () == MapType::kValueName &&
208186 field.type ()->type_id () == TypeId::kStruct ) {
209187 short_full_path = short_path;
210188 } else {
211- short_full_path = iceberg::GetPath (path, std::string (field.name ()), case_sensitive);
212- }
213- name_to_index[full_path] = index++;
214- if (!has_init) {
215- full_schemafield.emplace_back (field);
216- }
217- name_to_index.emplace (short_full_path, index - 1 );
218- if (field.type ()->is_nested ()) {
219- ICEBERG_RETURN_UNEXPECTED (
220- VisitTypeInline (*field.type (), this , full_path, short_full_path));
189+ short_full_path = GetPath (path, std::string (field.name ()), case_sensitive_);
221190 }
191+ name_to_id_[full_path] = field.field_id ();
192+ name_to_id_.emplace (short_full_path, field.field_id ());
193+ ICEBERG_RETURN_UNEXPECTED (
194+ VisitTypeInline (*field.type (), this , full_path, short_full_path));
222195 }
223196 return {};
224197}
225198
226- Status NameVisitor ::Visit (const StructType& type, const std::string& path,
227- const std::string& short_path) {
199+ Status NametoIdVisitor ::Visit (const StructType& type, const std::string& path,
200+ const std::string& short_path) {
228201 const auto & fields = type.fields ();
229202 std::string full_path, short_full_path;
230203 for (const auto & field : fields) {
231- full_path = iceberg::GetPath (path, std::string (field.name ()), case_sensitive);
232- short_full_path =
233- iceberg::GetPath (short_path, std::string (field.name ()), case_sensitive);
234- name_to_index[full_path] = index++;
235- if (!has_init) {
236- full_schemafield.emplace_back (field);
237- }
238- name_to_index.emplace (short_full_path, index - 1 );
239- if (field.type ()->is_nested ()) {
240- ICEBERG_RETURN_UNEXPECTED (
241- VisitTypeInline (*field.type (), this , full_path, short_full_path));
242- }
204+ full_path =
205+ NametoIdVisitor::GetPath (path, std::string (field.name ()), case_sensitive_);
206+ short_full_path = GetPath (short_path, std::string (field.name ()), case_sensitive_);
207+ name_to_id_[full_path] = field.field_id ();
208+ name_to_id_.emplace (short_full_path, field.field_id ());
209+ ICEBERG_RETURN_UNEXPECTED (
210+ VisitTypeInline (*field.type (), this , full_path, short_full_path));
243211 }
244212 return {};
245213}
246214
247- Status NameVisitor ::Visit (const PrimitiveType& type, const std::string& path,
248- const std::string& short_path) {
215+ Status NametoIdVisitor ::Visit (const PrimitiveType& type, const std::string& path,
216+ const std::string& short_path) {
249217 return {};
250218}
219+
220+ std::string NametoIdVisitor::GetPath (const std::string& last_path,
221+ const std::string& field_name, bool case_sensitive) {
222+ if (case_sensitive) {
223+ return last_path.empty () ? field_name : last_path + " ." + field_name;
224+ }
225+ std::string lower_name (field_name);
226+ std::ranges::transform (lower_name, lower_name.begin (), ::tolower);
227+ return last_path.empty () ? lower_name : last_path + " ." + lower_name;
228+ }
251229} // namespace iceberg
0 commit comments