2323
2424#include " iceberg/type.h"
2525#include " iceberg/util/formatter.h" // IWYU pragma: keep
26+ #include " iceberg/util/macros.h"
2627namespace iceberg {
2728class IdVisitor {
2829 public:
29- explicit IdVisitor (bool has_init_ = false );
30+ explicit IdVisitor (
31+ std::unordered_map<int32_t , std::reference_wrapper<const SchemaField>>&
32+ id_to_field);
3033 Status Visit (const Type& type);
34+ Status VisitNestedType (const Type& type);
3135
32- bool has_init;
33- int index = 0 ;
34- std::unordered_map<int , size_t > id_to_index;
35- std::vector<std::reference_wrapper<const SchemaField>> full_schemafield;
36+ private:
37+ std::unordered_map<int32_t , std::reference_wrapper<const SchemaField>>& id_to_field_;
3638};
37-
38- std::string GetPath (const std::string& last_path, const std::string& field_name,
39- bool case_sensitive) {
40- if (case_sensitive) {
41- return last_path.empty () ? field_name : last_path + " ." + field_name;
42- }
43- std::string lower_name (field_name);
44- std::ranges::transform (lower_name, lower_name.begin (), ::tolower);
45- return last_path.empty () ? lower_name : last_path + " ." + lower_name;
46- }
47- class NameVisitor {
39+ class NametoIdVisitor {
4840 public:
49- explicit NameVisitor (bool case_sensitive_ = true , bool has_init_ = false );
41+ explicit NametoIdVisitor (std::unordered_map<std::string, size_t >& name_to_id,
42+ bool case_sensitive_ = true );
5043 Status Visit (const ListType& type, const std::string& path,
5144 const std::string& short_path);
5245 Status Visit (const MapType& type, const std::string& path,
@@ -55,12 +48,12 @@ class NameVisitor {
5548 const std::string& short_path);
5649 Status Visit (const PrimitiveType& type, const std::string& path,
5750 const std::string& short_path);
51+ static std::string GetPath (const std::string& last_path, const std::string& field_name,
52+ bool case_sensitive = true );
5853
59- int index = 0 ;
60- bool case_sensitive;
61- bool has_init;
62- std::unordered_map<std::string, size_t > name_to_index;
63- std::vector<std::reference_wrapper<const SchemaField>> full_schemafield;
54+ private:
55+ bool case_sensitive_;
56+ std::unordered_map<std::string, size_t >& name_to_id_;
6457};
6558Schema::Schema (std::vector<SchemaField> fields, std::optional<int32_t > schema_id)
6659 : StructType(std::move(fields)), schema_id_(schema_id) {}
@@ -84,16 +77,16 @@ Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFie
8477 std::string_view name, bool case_sensitive) const {
8578 if (case_sensitive) {
8679 ICEBERG_RETURN_UNEXPECTED (InitNameToIndexMap ());
87- auto it = name_to_index_ .find (std::string (name));
88- if (it == name_to_index_ .end ()) return std::nullopt ;
89- return full_schemafield_[ it->second ] ;
80+ auto it = name_to_id_ .find (std::string (name));
81+ if (it == name_to_id_ .end ()) return std::nullopt ;
82+ return FindFieldById ( it->second ) ;
9083 }
9184 ICEBERG_RETURN_UNEXPECTED (InitLowerCaseNameToIndexMap ());
9285 std::string lower_name (name);
9386 std::ranges::transform (lower_name, lower_name.begin (), ::tolower);
94- auto it = lowercase_name_to_index_ .find (lower_name);
95- if (it == lowercase_name_to_index_ .end ()) return std::nullopt ;
96- return full_schemafield_[ it->second ] ;
87+ auto it = lowercase_name_to_id_ .find (lower_name);
88+ if (it == lowercase_name_to_id_ .end ()) return std::nullopt ;
89+ return FindFieldById ( it->second ) ;
9790}
9891
9992Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFieldByName (
@@ -102,150 +95,134 @@ Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFie
10295}
10396
10497Result<Status> Schema::InitIdToIndexMap () const {
105- if (!id_to_index_ .empty ()) {
98+ if (!id_to_field_ .empty ()) {
10699 return {};
107100 }
108- bool has_init = !full_schemafield_.empty ();
109- IdVisitor visitor (has_init);
101+ IdVisitor visitor (id_to_field_);
110102 ICEBERG_RETURN_UNEXPECTED (VisitTypeInline (*this , &visitor));
111- id_to_index_ = std::move (visitor.id_to_index );
112- if (!has_init) {
113- full_schemafield_ = std::move (visitor.full_schemafield );
114- }
115103 return {};
116104}
117105
118106Result<Status> Schema::InitNameToIndexMap () const {
119- if (!name_to_index_ .empty ()) {
107+ if (!name_to_id_ .empty ()) {
120108 return {};
121109 }
122- bool has_init = !full_schemafield_.empty ();
123110 std::string path, short_path;
124- NameVisitor visitor (true , has_init );
111+ NametoIdVisitor visitor (name_to_id_, true );
125112 ICEBERG_RETURN_UNEXPECTED (VisitTypeInline (*this , &visitor, path, short_path));
126- name_to_index_ = std::move (visitor.name_to_index );
127- if (!has_init) {
128- full_schemafield_ = std::move (visitor.full_schemafield );
129- }
130113 return {};
131114}
132115
133116Result<Status> Schema::InitLowerCaseNameToIndexMap () const {
134- if (!lowercase_name_to_index_ .empty ()) {
117+ if (!lowercase_name_to_id_ .empty ()) {
135118 return {};
136119 }
137- bool has_init = !full_schemafield_.empty ();
138120 std::string path, short_path;
139- NameVisitor visitor (false , has_init );
121+ NametoIdVisitor visitor (lowercase_name_to_id_, false );
140122 ICEBERG_RETURN_UNEXPECTED (VisitTypeInline (*this , &visitor, path, short_path));
141- lowercase_name_to_index_ = std::move (visitor.name_to_index );
142- if (!has_init) {
143- full_schemafield_ = std::move (visitor.full_schemafield );
144- }
145123 return {};
146124}
147125
148126Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFieldById (
149127 int32_t field_id) const {
150128 ICEBERG_RETURN_UNEXPECTED (InitIdToIndexMap ());
151- auto it = id_to_index_ .find (field_id);
152- if (it == id_to_index_ .end ()) {
129+ auto it = id_to_field_ .find (field_id);
130+ if (it == id_to_field_ .end ()) {
153131 return std::nullopt ;
154132 }
155- return full_schemafield_[ it->second ] ;
133+ return it->second ;
156134}
157135
158- IdVisitor::IdVisitor (bool has_init_) : has_init(has_init_) {}
136+ IdVisitor::IdVisitor (
137+ std::unordered_map<int32_t , std::reference_wrapper<const SchemaField>>& id_to_field)
138+ : id_to_field_(id_to_field) {}
159139
160140Status IdVisitor::Visit (const Type& type) {
141+ if (type.is_nested ()) {
142+ ICEBERG_RETURN_UNEXPECTED (VisitNestedType (type));
143+ }
144+ return {};
145+ }
146+
147+ Status IdVisitor::VisitNestedType (const Type& type) {
161148 const auto & nested = iceberg::internal::checked_cast<const NestedType&>(type);
162149 const auto & fields = nested.fields ();
163150 for (const auto & field : fields) {
164- id_to_index[field.field_id ()] = index++;
165- if (!has_init) {
166- full_schemafield.emplace_back (field);
167- }
168- if (field.type ()->is_nested ()) {
169- ICEBERG_RETURN_UNEXPECTED (Visit (*field.type ()));
170- }
151+ id_to_field_.emplace (field.field_id (), std::cref (field));
152+ ICEBERG_RETURN_UNEXPECTED (Visit (*field.type ()));
171153 }
172154 return {};
173155}
174156
175- NameVisitor::NameVisitor (bool case_sensitive_, bool has_init_)
176- : case_sensitive(case_sensitive_), has_init(has_init_) {}
157+ NametoIdVisitor::NametoIdVisitor (std::unordered_map<std::string, size_t >& name_to_id,
158+ bool case_sensitive)
159+ : name_to_id_(name_to_id), case_sensitive_(case_sensitive) {}
177160
178- Status NameVisitor ::Visit (const ListType& type, const std::string& path,
179- const std::string& short_path) {
161+ Status NametoIdVisitor ::Visit (const ListType& type, const std::string& path,
162+ const std::string& short_path) {
180163 const auto & field = type.fields ()[0 ];
181- std::string full_path =
182- iceberg::GetPath (path, std::string (field.name ()), case_sensitive);
164+ std::string full_path = GetPath (path, std::string (field.name ()), case_sensitive_);
183165 std::string short_full_path;
184166 if (field.type ()->type_id () == TypeId::kStruct ) {
185167 short_full_path = short_path;
186168 } else {
187- short_full_path =
188- iceberg::GetPath (short_path, std::string (field.name ()), case_sensitive);
189- }
190- name_to_index[full_path] = index++;
191- if (!has_init) {
192- full_schemafield.emplace_back (field);
193- }
194- name_to_index.emplace (short_full_path, index - 1 );
195- if (field.type ()->is_nested ()) {
196- ICEBERG_RETURN_UNEXPECTED (
197- VisitTypeInline (*field.type (), this , full_path, short_full_path));
169+ short_full_path = GetPath (short_path, std::string (field.name ()), case_sensitive_);
198170 }
171+ name_to_id_[full_path] = field.field_id ();
172+ name_to_id_.emplace (short_full_path, field.field_id ());
173+ ICEBERG_RETURN_UNEXPECTED (
174+ VisitTypeInline (*field.type (), this , full_path, short_full_path));
199175 return {};
200176}
201177
202- Status NameVisitor ::Visit (const MapType& type, const std::string& path,
203- const std::string& short_path) {
178+ Status NametoIdVisitor ::Visit (const MapType& type, const std::string& path,
179+ const std::string& short_path) {
204180 std::string full_path, short_full_path;
205- for (const auto & field : type.fields ()) {
206- full_path = iceberg::GetPath (path, std::string (field.name ()), case_sensitive);
181+ const auto & fields = type.fields ();
182+ for (const auto & field : fields) {
183+ full_path = GetPath (path, std::string (field.name ()), case_sensitive_);
207184 if (field.name () == MapType::kValueName &&
208185 field.type ()->type_id () == TypeId::kStruct ) {
209186 short_full_path = short_path;
210187 } else {
211- short_full_path = iceberg::GetPath (path, std::string (field.name ()), case_sensitive);
212- }
213- name_to_index[full_path] = index++;
214- if (!has_init) {
215- full_schemafield.emplace_back (field);
216- }
217- name_to_index.emplace (short_full_path, index - 1 );
218- if (field.type ()->is_nested ()) {
219- ICEBERG_RETURN_UNEXPECTED (
220- VisitTypeInline (*field.type (), this , full_path, short_full_path));
188+ short_full_path = GetPath (path, std::string (field.name ()), case_sensitive_);
221189 }
190+ name_to_id_[full_path] = field.field_id ();
191+ name_to_id_.emplace (short_full_path, field.field_id ());
192+ ICEBERG_RETURN_UNEXPECTED (
193+ VisitTypeInline (*field.type (), this , full_path, short_full_path));
222194 }
223195 return {};
224196}
225197
226- Status NameVisitor ::Visit (const StructType& type, const std::string& path,
227- const std::string& short_path) {
198+ Status NametoIdVisitor ::Visit (const StructType& type, const std::string& path,
199+ const std::string& short_path) {
228200 const auto & fields = type.fields ();
229201 std::string full_path, short_full_path;
230202 for (const auto & field : fields) {
231- full_path = iceberg::GetPath (path, std::string (field.name ()), case_sensitive);
232- short_full_path =
233- iceberg::GetPath (short_path, std::string (field.name ()), case_sensitive);
234- name_to_index[full_path] = index++;
235- if (!has_init) {
236- full_schemafield.emplace_back (field);
237- }
238- name_to_index.emplace (short_full_path, index - 1 );
239- if (field.type ()->is_nested ()) {
240- ICEBERG_RETURN_UNEXPECTED (
241- VisitTypeInline (*field.type (), this , full_path, short_full_path));
242- }
203+ full_path =
204+ NametoIdVisitor::GetPath (path, std::string (field.name ()), case_sensitive_);
205+ short_full_path = GetPath (short_path, std::string (field.name ()), case_sensitive_);
206+ name_to_id_[full_path] = field.field_id ();
207+ name_to_id_.emplace (short_full_path, field.field_id ());
208+ ICEBERG_RETURN_UNEXPECTED (
209+ VisitTypeInline (*field.type (), this , full_path, short_full_path));
243210 }
244211 return {};
245212}
246213
247- Status NameVisitor ::Visit (const PrimitiveType& type, const std::string& path,
248- const std::string& short_path) {
214+ Status NametoIdVisitor ::Visit (const PrimitiveType& type, const std::string& path,
215+ const std::string& short_path) {
249216 return {};
250217}
218+
219+ std::string NametoIdVisitor::GetPath (const std::string& last_path,
220+ const std::string& field_name, bool case_sensitive) {
221+ if (case_sensitive) {
222+ return last_path.empty () ? field_name : last_path + " ." + field_name;
223+ }
224+ std::string lower_name (field_name);
225+ std::ranges::transform (lower_name, lower_name.begin (), ::tolower);
226+ return last_path.empty () ? lower_name : last_path + " ." + lower_name;
227+ }
251228} // namespace iceberg
0 commit comments