1919
2020#include " iceberg/schema.h"
2121
22+ #include < algorithm>
2223#include < format>
2324
2425#include " iceberg/type.h"
2526#include " iceberg/util/formatter.h" // IWYU pragma: keep
27+ #include " iceberg/util/macros.h"
28+ #include " iceberg/util/visit_type.h"
2629namespace iceberg {
2730class IdVisitor {
2831 public:
29- explicit IdVisitor (bool has_init_ = false );
32+ explicit IdVisitor (
33+ std::unordered_map<int32_t , std::reference_wrapper<const SchemaField>>&
34+ id_to_field);
3035 Status Visit (const Type& type);
36+ Status VisitNestedType (const Type& type);
3137
32- bool has_init;
33- int index = 0 ;
34- std::unordered_map<int , size_t > id_to_index;
35- std::vector<std::reference_wrapper<const SchemaField>> full_schemafield;
38+ private:
39+ std::unordered_map<int32_t , std::reference_wrapper<const SchemaField>>& id_to_field_;
3640};
37-
38- std::string GetPath (const std::string& last_path, const std::string& field_name,
39- bool case_sensitive) {
40- if (case_sensitive) {
41- return last_path.empty () ? field_name : last_path + " ." + field_name;
42- }
43- std::string lower_name (field_name);
44- std::ranges::transform (lower_name, lower_name.begin (), ::tolower);
45- return last_path.empty () ? lower_name : last_path + " ." + lower_name;
46- }
47- class NameVisitor {
41+ class NametoIdVisitor {
4842 public:
49- explicit NameVisitor (bool case_sensitive_ = true , bool has_init_ = false );
43+ explicit NametoIdVisitor (std::unordered_map<std::string, size_t >& name_to_id,
44+ bool case_sensitive_ = true );
5045 Status Visit (const ListType& type, const std::string& path,
5146 const std::string& short_path);
5247 Status Visit (const MapType& type, const std::string& path,
@@ -55,12 +50,12 @@ class NameVisitor {
5550 const std::string& short_path);
5651 Status Visit (const PrimitiveType& type, const std::string& path,
5752 const std::string& short_path);
53+ static std::string GetPath (const std::string& last_path, const std::string& field_name,
54+ bool case_sensitive = true );
5855
59- int index = 0 ;
60- bool case_sensitive;
61- bool has_init;
62- std::unordered_map<std::string, size_t > name_to_index;
63- std::vector<std::reference_wrapper<const SchemaField>> full_schemafield;
56+ private:
57+ bool case_sensitive_;
58+ std::unordered_map<std::string, size_t >& name_to_id_;
6459};
6560Schema::Schema (std::vector<SchemaField> fields, std::optional<int32_t > schema_id)
6661 : StructType(std::move(fields)), schema_id_(schema_id) {}
@@ -84,16 +79,16 @@ Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFie
8479 std::string_view name, bool case_sensitive) const {
8580 if (case_sensitive) {
8681 ICEBERG_RETURN_UNEXPECTED (InitNameToIndexMap ());
87- auto it = name_to_index_ .find (std::string (name));
88- if (it == name_to_index_ .end ()) return std::nullopt ;
89- return full_schemafield_[ it->second ] ;
82+ auto it = name_to_id_ .find (std::string (name));
83+ if (it == name_to_id_ .end ()) return std::nullopt ;
84+ return FindFieldById ( it->second ) ;
9085 }
9186 ICEBERG_RETURN_UNEXPECTED (InitLowerCaseNameToIndexMap ());
9287 std::string lower_name (name);
9388 std::ranges::transform (lower_name, lower_name.begin (), ::tolower);
94- auto it = lowercase_name_to_index_ .find (lower_name);
95- if (it == lowercase_name_to_index_ .end ()) return std::nullopt ;
96- return full_schemafield_[ it->second ] ;
89+ auto it = lowercase_name_to_id_ .find (lower_name);
90+ if (it == lowercase_name_to_id_ .end ()) return std::nullopt ;
91+ return FindFieldById ( it->second ) ;
9792}
9893
9994Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFieldByName (
@@ -102,150 +97,134 @@ Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFie
10297}
10398
10499Result<Status> Schema::InitIdToIndexMap () const {
105- if (!id_to_index_ .empty ()) {
100+ if (!id_to_field_ .empty ()) {
106101 return {};
107102 }
108- bool has_init = !full_schemafield_.empty ();
109- IdVisitor visitor (has_init);
103+ IdVisitor visitor (id_to_field_);
110104 ICEBERG_RETURN_UNEXPECTED (VisitTypeInline (*this , &visitor));
111- id_to_index_ = std::move (visitor.id_to_index );
112- if (!has_init) {
113- full_schemafield_ = std::move (visitor.full_schemafield );
114- }
115105 return {};
116106}
117107
118108Result<Status> Schema::InitNameToIndexMap () const {
119- if (!name_to_index_ .empty ()) {
109+ if (!name_to_id_ .empty ()) {
120110 return {};
121111 }
122- bool has_init = !full_schemafield_.empty ();
123112 std::string path, short_path;
124- NameVisitor visitor (true , has_init );
113+ NametoIdVisitor visitor (name_to_id_, true );
125114 ICEBERG_RETURN_UNEXPECTED (VisitTypeInline (*this , &visitor, path, short_path));
126- name_to_index_ = std::move (visitor.name_to_index );
127- if (!has_init) {
128- full_schemafield_ = std::move (visitor.full_schemafield );
129- }
130115 return {};
131116}
132117
133118Result<Status> Schema::InitLowerCaseNameToIndexMap () const {
134- if (!lowercase_name_to_index_ .empty ()) {
119+ if (!lowercase_name_to_id_ .empty ()) {
135120 return {};
136121 }
137- bool has_init = !full_schemafield_.empty ();
138122 std::string path, short_path;
139- NameVisitor visitor (false , has_init );
123+ NametoIdVisitor visitor (lowercase_name_to_id_, false );
140124 ICEBERG_RETURN_UNEXPECTED (VisitTypeInline (*this , &visitor, path, short_path));
141- lowercase_name_to_index_ = std::move (visitor.name_to_index );
142- if (!has_init) {
143- full_schemafield_ = std::move (visitor.full_schemafield );
144- }
145125 return {};
146126}
147127
148128Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFieldById (
149129 int32_t field_id) const {
150130 ICEBERG_RETURN_UNEXPECTED (InitIdToIndexMap ());
151- auto it = id_to_index_ .find (field_id);
152- if (it == id_to_index_ .end ()) {
131+ auto it = id_to_field_ .find (field_id);
132+ if (it == id_to_field_ .end ()) {
153133 return std::nullopt ;
154134 }
155- return full_schemafield_[ it->second ] ;
135+ return it->second ;
156136}
157137
158- IdVisitor::IdVisitor (bool has_init_) : has_init(has_init_) {}
138+ IdVisitor::IdVisitor (
139+ std::unordered_map<int32_t , std::reference_wrapper<const SchemaField>>& id_to_field)
140+ : id_to_field_(id_to_field) {}
159141
160142Status IdVisitor::Visit (const Type& type) {
143+ if (type.is_nested ()) {
144+ ICEBERG_RETURN_UNEXPECTED (VisitNestedType (type));
145+ }
146+ return {};
147+ }
148+
149+ Status IdVisitor::VisitNestedType (const Type& type) {
161150 const auto & nested = iceberg::internal::checked_cast<const NestedType&>(type);
162151 const auto & fields = nested.fields ();
163152 for (const auto & field : fields) {
164- id_to_index[field.field_id ()] = index++;
165- if (!has_init) {
166- full_schemafield.emplace_back (field);
167- }
168- if (field.type ()->is_nested ()) {
169- ICEBERG_RETURN_UNEXPECTED (Visit (*field.type ()));
170- }
153+ id_to_field_.emplace (field.field_id (), std::cref (field));
154+ ICEBERG_RETURN_UNEXPECTED (Visit (*field.type ()));
171155 }
172156 return {};
173157}
174158
175- NameVisitor::NameVisitor (bool case_sensitive_, bool has_init_)
176- : case_sensitive(case_sensitive_), has_init(has_init_) {}
159+ NametoIdVisitor::NametoIdVisitor (std::unordered_map<std::string, size_t >& name_to_id,
160+ bool case_sensitive)
161+ : name_to_id_(name_to_id), case_sensitive_(case_sensitive) {}
177162
178- Status NameVisitor ::Visit (const ListType& type, const std::string& path,
179- const std::string& short_path) {
163+ Status NametoIdVisitor ::Visit (const ListType& type, const std::string& path,
164+ const std::string& short_path) {
180165 const auto & field = type.fields ()[0 ];
181- std::string full_path =
182- iceberg::GetPath (path, std::string (field.name ()), case_sensitive);
166+ std::string full_path = GetPath (path, std::string (field.name ()), case_sensitive_);
183167 std::string short_full_path;
184168 if (field.type ()->type_id () == TypeId::kStruct ) {
185169 short_full_path = short_path;
186170 } else {
187- short_full_path =
188- iceberg::GetPath (short_path, std::string (field.name ()), case_sensitive);
189- }
190- name_to_index[full_path] = index++;
191- if (!has_init) {
192- full_schemafield.emplace_back (field);
193- }
194- name_to_index.emplace (short_full_path, index - 1 );
195- if (field.type ()->is_nested ()) {
196- ICEBERG_RETURN_UNEXPECTED (
197- VisitTypeInline (*field.type (), this , full_path, short_full_path));
171+ short_full_path = GetPath (short_path, std::string (field.name ()), case_sensitive_);
198172 }
173+ name_to_id_[full_path] = field.field_id ();
174+ name_to_id_.emplace (short_full_path, field.field_id ());
175+ ICEBERG_RETURN_UNEXPECTED (
176+ VisitTypeInline (*field.type (), this , full_path, short_full_path));
199177 return {};
200178}
201179
202- Status NameVisitor ::Visit (const MapType& type, const std::string& path,
203- const std::string& short_path) {
180+ Status NametoIdVisitor ::Visit (const MapType& type, const std::string& path,
181+ const std::string& short_path) {
204182 std::string full_path, short_full_path;
205- for (const auto & field : type.fields ()) {
206- full_path = iceberg::GetPath (path, std::string (field.name ()), case_sensitive);
183+ const auto & fields = type.fields ();
184+ for (const auto & field : fields) {
185+ full_path = GetPath (path, std::string (field.name ()), case_sensitive_);
207186 if (field.name () == MapType::kValueName &&
208187 field.type ()->type_id () == TypeId::kStruct ) {
209188 short_full_path = short_path;
210189 } else {
211- short_full_path = iceberg::GetPath (path, std::string (field.name ()), case_sensitive);
212- }
213- name_to_index[full_path] = index++;
214- if (!has_init) {
215- full_schemafield.emplace_back (field);
216- }
217- name_to_index.emplace (short_full_path, index - 1 );
218- if (field.type ()->is_nested ()) {
219- ICEBERG_RETURN_UNEXPECTED (
220- VisitTypeInline (*field.type (), this , full_path, short_full_path));
190+ short_full_path = GetPath (path, std::string (field.name ()), case_sensitive_);
221191 }
192+ name_to_id_[full_path] = field.field_id ();
193+ name_to_id_.emplace (short_full_path, field.field_id ());
194+ ICEBERG_RETURN_UNEXPECTED (
195+ VisitTypeInline (*field.type (), this , full_path, short_full_path));
222196 }
223197 return {};
224198}
225199
226- Status NameVisitor ::Visit (const StructType& type, const std::string& path,
227- const std::string& short_path) {
200+ Status NametoIdVisitor ::Visit (const StructType& type, const std::string& path,
201+ const std::string& short_path) {
228202 const auto & fields = type.fields ();
229203 std::string full_path, short_full_path;
230204 for (const auto & field : fields) {
231- full_path = iceberg::GetPath (path, std::string (field.name ()), case_sensitive);
232- short_full_path =
233- iceberg::GetPath (short_path, std::string (field.name ()), case_sensitive);
234- name_to_index[full_path] = index++;
235- if (!has_init) {
236- full_schemafield.emplace_back (field);
237- }
238- name_to_index.emplace (short_full_path, index - 1 );
239- if (field.type ()->is_nested ()) {
240- ICEBERG_RETURN_UNEXPECTED (
241- VisitTypeInline (*field.type (), this , full_path, short_full_path));
242- }
205+ full_path =
206+ NametoIdVisitor::GetPath (path, std::string (field.name ()), case_sensitive_);
207+ short_full_path = GetPath (short_path, std::string (field.name ()), case_sensitive_);
208+ name_to_id_[full_path] = field.field_id ();
209+ name_to_id_.emplace (short_full_path, field.field_id ());
210+ ICEBERG_RETURN_UNEXPECTED (
211+ VisitTypeInline (*field.type (), this , full_path, short_full_path));
243212 }
244213 return {};
245214}
246215
247- Status NameVisitor ::Visit (const PrimitiveType& type, const std::string& path,
248- const std::string& short_path) {
216+ Status NametoIdVisitor ::Visit (const PrimitiveType& type, const std::string& path,
217+ const std::string& short_path) {
249218 return {};
250219}
220+
221+ std::string NametoIdVisitor::GetPath (const std::string& last_path,
222+ const std::string& field_name, bool case_sensitive) {
223+ if (case_sensitive) {
224+ return last_path.empty () ? field_name : last_path + " ." + field_name;
225+ }
226+ std::string lower_name (field_name);
227+ std::ranges::transform (lower_name, lower_name.begin (), ::tolower);
228+ return last_path.empty () ? lower_name : last_path + " ." + lower_name;
229+ }
251230} // namespace iceberg
0 commit comments