2121
2222#include < format>
2323
24- #include " iceberg/exception.h"
2524#include " iceberg/type.h"
2625#include " iceberg/util/formatter.h" // IWYU pragma: keep
2726namespace iceberg {
27+ class IdVisitor {
28+ public:
29+ explicit IdVisitor (bool has_init_ = false );
30+ Status Visit (const Type& type);
2831
29- Schema::Schema (std::vector<SchemaField> fields, std::optional<int32_t > schema_id)
30- : StructType(std::move(fields)), schema_id_(schema_id) {
31- InitIdToIndexMap ();
32+ bool has_init;
33+ int index = 0 ;
34+ std::unordered_map<int , size_t > id_to_index;
35+ std::vector<std::reference_wrapper<const SchemaField>> full_schemafield;
36+ };
37+
38+ std::string GetPath (const std::string& last_path, const std::string& field_name,
39+ bool case_sensitive) {
40+ if (case_sensitive) {
41+ return last_path.empty () ? field_name : last_path + " ." + field_name;
42+ }
43+ std::string lower_name (field_name);
44+ std::ranges::transform (lower_name, lower_name.begin (), ::tolower);
45+ return last_path.empty () ? lower_name : last_path + " ." + lower_name;
3246}
47+ class NameVisitor {
48+ public:
49+ explicit NameVisitor (bool case_sensitive_ = true , bool has_init_ = false );
50+ Status Visit (const ListType& type, const std::string& path,
51+ const std::string& short_path);
52+ Status Visit (const MapType& type, const std::string& path,
53+ const std::string& short_path);
54+ Status Visit (const StructType& type, const std::string& path,
55+ const std::string& short_path);
56+ Status Visit (const PrimitiveType& type, const std::string& path,
57+ const std::string& short_path);
58+
59+ int index = 0 ;
60+ bool case_sensitive;
61+ bool has_init;
62+ std::unordered_map<std::string, size_t > name_to_index;
63+ std::vector<std::reference_wrapper<const SchemaField>> full_schemafield;
64+ };
65+ Schema::Schema (std::vector<SchemaField> fields, std::optional<int32_t > schema_id)
66+ : StructType(std::move(fields)), schema_id_(schema_id) {}
3367
3468std::optional<int32_t > Schema::schema_id () const { return schema_id_; }
3569
@@ -46,142 +80,172 @@ bool Schema::Equals(const Schema& other) const {
4680 return schema_id_ == other.schema_id_ && fields_ == other.fields_ ;
4781}
4882
49- std::optional<std::reference_wrapper<const SchemaField>> Schema::GetFieldByName (
83+ Result< std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFieldByName (
5084 std::string_view name, bool case_sensitive) const {
5185 if (case_sensitive) {
52- InitNameToIndexMap ();
86+ ICEBERG_RETURN_UNEXPECTED ( InitNameToIndexMap () );
5387 auto it = name_to_index_.find (std::string (name));
5488 if (it == name_to_index_.end ()) return std::nullopt ;
5589 return full_schemafield_[it->second ];
5690 }
57- InitLowerCaseNameToIndexMap ();
91+ ICEBERG_RETURN_UNEXPECTED ( InitLowerCaseNameToIndexMap () );
5892 std::string lower_name (name);
5993 std::ranges::transform (lower_name, lower_name.begin (), ::tolower);
6094 auto it = lowercase_name_to_index_.find (lower_name);
6195 if (it == lowercase_name_to_index_.end ()) return std::nullopt ;
6296 return full_schemafield_[it->second ];
6397}
6498
65- std::optional<std::reference_wrapper<const SchemaField>> Schema::GetFieldByName (
99+ Result< std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFieldByName (
66100 std::string_view name) const {
67- return GetFieldByName (name, true );
101+ return FindFieldByName (name, /* case_sensitive */ true );
68102}
69103
70- void Schema::InitIdToIndexMap () const {
104+ Result<Status> Schema::InitIdToIndexMap () const {
71105 if (!id_to_index_.empty ()) {
72- return ;
106+ return {} ;
73107 }
74- SchemaFieldVisitor visitor;
75- auto result = VisitTypeInline (*this , &visitor, id_to_index_, full_schemafield_);
108+ bool has_init = !full_schemafield_.empty ();
109+ IdVisitor visitor (has_init);
110+ ICEBERG_RETURN_UNEXPECTED (VisitTypeInline (*this , &visitor));
111+ id_to_index_ = std::move (visitor.id_to_index );
112+ if (!has_init) {
113+ full_schemafield_ = std::move (visitor.full_schemafield );
114+ }
115+ return {};
76116}
77117
78- void Schema::InitNameToIndexMap () const {
118+ Result<Status> Schema::InitNameToIndexMap () const {
79119 if (!name_to_index_.empty ()) {
80- return ;
120+ return {} ;
81121 }
82- int index = 0 ;
83- std::string_view path, short_path;
84- SchemaFieldVisitor visitor;
85- std::unordered_map<std::string, size_t > shortname_to_index;
86- auto tmp = VisitTypeInline (*this , &visitor, name_to_index_, path, shortname_to_index,
87- short_path, index, true );
88- if (!tmp.has_value ()) {
89- throw IcebergError (" Failed to perform InitNameToIndexMap" );
90- }
91- for (const auto & pair : shortname_to_index) {
92- if (!name_to_index_.count (pair.first )) {
93- name_to_index_.emplace (pair.first , pair.second );
94- }
122+ bool has_init = !full_schemafield_.empty ();
123+ std::string path, short_path;
124+ NameVisitor visitor (true , has_init);
125+ ICEBERG_RETURN_UNEXPECTED (VisitTypeInline (*this , &visitor, path, short_path));
126+ name_to_index_ = std::move (visitor.name_to_index );
127+ if (!has_init) {
128+ full_schemafield_ = std::move (visitor.full_schemafield );
95129 }
130+ return {};
96131}
97132
98- void Schema::InitLowerCaseNameToIndexMap () const {
133+ Result<Status> Schema::InitLowerCaseNameToIndexMap () const {
99134 if (!lowercase_name_to_index_.empty ()) {
100- return ;
135+ return {} ;
101136 }
102- int index = 0 ;
103- std::string_view path, short_path;
104- SchemaFieldVisitor visitor;
105- std::unordered_map<std::string, size_t > shortlowercasename_to_index;
106- auto tmp = VisitTypeInline (*this , &visitor, lowercase_name_to_index_, path,
107- shortlowercasename_to_index, short_path, index, false );
108- if (!tmp.has_value ()) {
109- throw IcebergError (" Failed to perform InitLowerCaseNameToIndexMap" );
110- }
111- for (const auto & pair : shortlowercasename_to_index) {
112- if (!lowercase_name_to_index_.count (pair.first )) {
113- lowercase_name_to_index_.emplace (pair.first , pair.second );
114- }
137+ bool has_init = !full_schemafield_.empty ();
138+ std::string path, short_path;
139+ NameVisitor visitor (false , has_init);
140+ ICEBERG_RETURN_UNEXPECTED (VisitTypeInline (*this , &visitor, path, short_path));
141+ lowercase_name_to_index_ = std::move (visitor.name_to_index );
142+ if (!has_init) {
143+ full_schemafield_ = std::move (visitor.full_schemafield );
115144 }
145+ return {};
116146}
117147
118- std::optional<std::reference_wrapper<const SchemaField>> Schema::GetFieldById (
148+ Result< std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFieldById (
119149 int32_t field_id) const {
120- InitIdToIndexMap ();
150+ ICEBERG_RETURN_UNEXPECTED ( InitIdToIndexMap () );
121151 auto it = id_to_index_.find (field_id);
122152 if (it == id_to_index_.end ()) {
123153 return std::nullopt ;
124154 }
125155 return full_schemafield_[it->second ];
126156}
127157
128- Status SchemaFieldVisitor::Visit ( const Type& type,
129- std::unordered_map< int , size_t >& id_to_index,
130- std::vector<SchemaField>& full_schemafield ) {
158+ IdVisitor::IdVisitor ( bool has_init_) : has_init(has_init_) {}
159+
160+ Status IdVisitor::Visit ( const Type& type ) {
131161 const auto & nested = iceberg::internal::checked_cast<const NestedType&>(type);
132- for (const auto & field : nested.fields ()) {
133- id_to_index[field.field_id ()] = full_schemafield.size ();
134- full_schemafield.emplace_back (field);
162+ const auto & fields = nested.fields ();
163+ for (const auto & field : fields) {
164+ id_to_index[field.field_id ()] = index++;
165+ if (!has_init) {
166+ full_schemafield.emplace_back (field);
167+ }
135168 if (field.type ()->is_nested ()) {
136- auto tmp = Visit (*field.type (), id_to_index, full_schemafield);
137- if (!tmp.has_value ()) {
138- throw IcebergError (" Failed to perform visit(id_to_index)" );
139- }
169+ ICEBERG_RETURN_UNEXPECTED (Visit (*field.type ()));
140170 }
141171 }
142172 return {};
143173}
144- std::string SchemaFieldVisitor::GetPath (const std::string& last_path,
145- const std::string& field_name,
146- bool case_sensitive) {
147- if (case_sensitive) {
148- return last_path.empty () ? field_name : last_path + " ." + field_name;
174+
175+ NameVisitor::NameVisitor (bool case_sensitive_, bool has_init_)
176+ : case_sensitive(case_sensitive_), has_init(has_init_) {}
177+
178+ Status NameVisitor::Visit (const ListType& type, const std::string& path,
179+ const std::string& short_path) {
180+ const auto & field = type.fields ()[0 ];
181+ std::string full_path =
182+ iceberg::GetPath (path, std::string (field.name ()), case_sensitive);
183+ std::string short_full_path;
184+ if (field.type ()->type_id () == TypeId::kStruct ) {
185+ short_full_path = short_path;
186+ } else {
187+ short_full_path =
188+ iceberg::GetPath (short_path, std::string (field.name ()), case_sensitive);
149189 }
150- std::string lower_name (field_name);
151- std::ranges::transform (lower_name, lower_name.begin (), ::tolower);
152- return last_path.empty () ? lower_name : last_path + " ." + lower_name;
190+ name_to_index[full_path] = index++;
191+ if (!has_init) {
192+ full_schemafield.emplace_back (field);
193+ }
194+ name_to_index.emplace (short_full_path, index - 1 );
195+ if (field.type ()->is_nested ()) {
196+ ICEBERG_RETURN_UNEXPECTED (
197+ VisitTypeInline (*field.type (), this , full_path, short_full_path));
198+ }
199+ return {};
153200}
154201
155- Status SchemaFieldVisitor::Visit (
156- const Type& type, std::unordered_map<std::string, size_t >& name_to_index,
157- std::string_view path, std::unordered_map<std::string, size_t >& shortname_to_index,
158- std::string_view short_path, int & index, bool case_sensitive) {
159- const char dot = ' .' ;
160- const auto & nested = iceberg::internal::checked_cast<const NestedType&>(type);
161- for (const auto & field : nested.fields ()) {
162- std::string full_path, short_full_path;
163- full_path = GetPath (std::string (path), std::string (field.name ()), case_sensitive);
164- name_to_index[full_path] = index;
165-
166- if (type.type_id () == TypeId::kList and field.type ()->type_id () == TypeId::kStruct ) {
167- short_full_path = short_path;
168- } else if (type.type_id () == TypeId::kMap and field.name () == " value" and
169- field.type ()->type_id () == TypeId::kStruct ) {
202+ Status NameVisitor::Visit (const MapType& type, const std::string& path,
203+ const std::string& short_path) {
204+ std::string full_path, short_full_path;
205+ for (const auto & field : type.fields ()) {
206+ full_path = iceberg::GetPath (path, std::string (field.name ()), case_sensitive);
207+ if (field.name () == MapType::kValueName &&
208+ field.type ()->type_id () == TypeId::kStruct ) {
170209 short_full_path = short_path;
171210 } else {
172- short_full_path =
173- GetPath (std::string (short_path), std::string (field.name ()), case_sensitive);
211+ short_full_path = iceberg::GetPath (path, std::string (field.name ()), case_sensitive);
212+ }
213+ name_to_index[full_path] = index++;
214+ if (!has_init) {
215+ full_schemafield.emplace_back (field);
174216 }
175- shortname_to_index[ short_full_path] = index++ ;
217+ name_to_index. emplace ( short_full_path, index - 1 ) ;
176218 if (field.type ()->is_nested ()) {
177- auto tmp = Visit (*field.type (), name_to_index, full_path, shortname_to_index,
178- short_full_path, index, case_sensitive);
179- if (!tmp.has_value ()) {
180- throw IcebergError (" Failed to perform visit(name_to_index)" );
181- }
219+ ICEBERG_RETURN_UNEXPECTED (
220+ VisitTypeInline (*field.type (), this , full_path, short_full_path));
182221 }
183222 }
184223 return {};
185224}
186225
226+ Status NameVisitor::Visit (const StructType& type, const std::string& path,
227+ const std::string& short_path) {
228+ const auto & fields = type.fields ();
229+ std::string full_path, short_full_path;
230+ for (const auto & field : fields) {
231+ full_path = iceberg::GetPath (path, std::string (field.name ()), case_sensitive);
232+ short_full_path =
233+ iceberg::GetPath (short_path, std::string (field.name ()), case_sensitive);
234+ name_to_index[full_path] = index++;
235+ if (!has_init) {
236+ full_schemafield.emplace_back (field);
237+ }
238+ name_to_index.emplace (short_full_path, index - 1 );
239+ if (field.type ()->is_nested ()) {
240+ ICEBERG_RETURN_UNEXPECTED (
241+ VisitTypeInline (*field.type (), this , full_path, short_full_path));
242+ }
243+ }
244+ return {};
245+ }
246+
247+ Status NameVisitor::Visit (const PrimitiveType& type, const std::string& path,
248+ const std::string& short_path) {
249+ return {};
250+ }
187251} // namespace iceberg
0 commit comments