2020#include " iceberg/schema.h"
2121
2222#include < format>
23+ #include < functional>
2324
2425#include " iceberg/type.h"
2526#include " iceberg/util/formatter.h" // IWYU pragma: keep
27+ #include " iceberg/util/macros.h"
28+ #include " iceberg/util/visit_type.h"
2629
2730namespace iceberg {
2831
32+ class IdToFieldVisitor {
33+ public:
34+ explicit IdToFieldVisitor (
35+ std::unordered_map<int32_t , std::reference_wrapper<const SchemaField>>&
36+ id_to_field);
37+ Status Visit (const PrimitiveType& type);
38+ Status Visit (const NestedType& type);
39+
40+ private:
41+ std::unordered_map<int32_t , std::reference_wrapper<const SchemaField>>& id_to_field_;
42+ };
43+
44+ class NameToIdVisitor {
45+ public:
46+ explicit NameToIdVisitor (
47+ std::unordered_map<std::string, int32_t , StringHash, std::equal_to<>>& name_to_id,
48+ bool case_sensitive = true ,
49+ std::function<std::string(std::string_view)> quoting_func = {});
50+ Status Visit (const ListType& type, const std::string& path,
51+ const std::string& short_path);
52+ Status Visit (const MapType& type, const std::string& path,
53+ const std::string& short_path);
54+ Status Visit (const StructType& type, const std::string& path,
55+ const std::string& short_path);
56+ Status Visit (const PrimitiveType& type, const std::string& path,
57+ const std::string& short_path);
58+ void Finish ();
59+
60+ private:
61+ std::string BuildPath (std::string_view prefix, std::string_view field_name,
62+ bool case_sensitive);
63+
64+ private:
65+ bool case_sensitive_;
66+ std::unordered_map<std::string, int32_t , StringHash, std::equal_to<>>& name_to_id_;
67+ std::unordered_map<std::string, int32_t , StringHash, std::equal_to<>> short_name_to_id_;
68+ std::function<std::string(std::string_view)> quoting_func_;
69+ };
70+
2971Schema::Schema (std::vector<SchemaField> fields, std::optional<int32_t > schema_id)
3072 : StructType(std::move(fields)), schema_id_(schema_id) {}
3173
@@ -44,4 +86,175 @@ bool Schema::Equals(const Schema& other) const {
4486 return schema_id_ == other.schema_id_ && fields_ == other.fields_ ;
4587}
4688
89+ Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFieldByName (
90+ std::string_view name, bool case_sensitive) const {
91+ if (case_sensitive) {
92+ ICEBERG_RETURN_UNEXPECTED (InitNameToIdMap ());
93+ auto it = name_to_id_.find (name);
94+ if (it == name_to_id_.end ()) return std::nullopt ;
95+ return FindFieldById (it->second );
96+ }
97+ ICEBERG_RETURN_UNEXPECTED (InitLowerCaseNameToIdMap ());
98+ auto it = lowercase_name_to_id_.find (StringUtils::ToLower (name));
99+ if (it == lowercase_name_to_id_.end ()) return std::nullopt ;
100+ return FindFieldById (it->second );
101+ }
102+
103+ Status Schema::InitIdToFieldMap () const {
104+ if (!id_to_field_.empty ()) {
105+ return {};
106+ }
107+ IdToFieldVisitor visitor (id_to_field_);
108+ ICEBERG_RETURN_UNEXPECTED (VisitTypeInline (*this , &visitor));
109+ return {};
110+ }
111+
112+ Status Schema::InitNameToIdMap () const {
113+ if (!name_to_id_.empty ()) {
114+ return {};
115+ }
116+ NameToIdVisitor visitor (name_to_id_, /* case_sensitive=*/ true );
117+ ICEBERG_RETURN_UNEXPECTED (
118+ VisitTypeInline (*this , &visitor, /* path=*/ " " , /* short_path=*/ " " ));
119+ visitor.Finish ();
120+ return {};
121+ }
122+
123+ Status Schema::InitLowerCaseNameToIdMap () const {
124+ if (!lowercase_name_to_id_.empty ()) {
125+ return {};
126+ }
127+ NameToIdVisitor visitor (lowercase_name_to_id_, /* case_sensitive=*/ false );
128+ ICEBERG_RETURN_UNEXPECTED (
129+ VisitTypeInline (*this , &visitor, /* path=*/ " " , /* short_path=*/ " " ));
130+ visitor.Finish ();
131+ return {};
132+ }
133+
134+ Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFieldById (
135+ int32_t field_id) const {
136+ ICEBERG_RETURN_UNEXPECTED (InitIdToFieldMap ());
137+ auto it = id_to_field_.find (field_id);
138+ if (it == id_to_field_.end ()) {
139+ return std::nullopt ;
140+ }
141+ return it->second ;
142+ }
143+
144+ IdToFieldVisitor::IdToFieldVisitor (
145+ std::unordered_map<int32_t , std::reference_wrapper<const SchemaField>>& id_to_field)
146+ : id_to_field_(id_to_field) {}
147+
148+ Status IdToFieldVisitor::Visit (const PrimitiveType& type) { return {}; }
149+
150+ Status IdToFieldVisitor::Visit (const NestedType& type) {
151+ const auto & nested = internal::checked_cast<const NestedType&>(type);
152+ const auto & fields = nested.fields ();
153+ for (const auto & field : fields) {
154+ auto it = id_to_field_.try_emplace (field.field_id (), std::cref (field));
155+ if (!it.second ) {
156+ return InvalidSchema (" Duplicate field id found: {}" , field.field_id ());
157+ }
158+ ICEBERG_RETURN_UNEXPECTED (VisitTypeInline (*field.type (), this ));
159+ }
160+ return {};
161+ }
162+
163+ NameToIdVisitor::NameToIdVisitor (
164+ std::unordered_map<std::string, int32_t , StringHash, std::equal_to<>>& name_to_id,
165+ bool case_sensitive, std::function<std::string(std::string_view)> quoting_func)
166+ : name_to_id_(name_to_id),
167+ case_sensitive_(case_sensitive),
168+ quoting_func_(std::move(quoting_func)) {}
169+
170+ Status NameToIdVisitor::Visit (const ListType& type, const std::string& path,
171+ const std::string& short_path) {
172+ const auto & field = type.fields ()[0 ];
173+ std::string new_path = BuildPath (path, field.name (), case_sensitive_);
174+ std::string new_short_path;
175+ if (field.type ()->type_id () == TypeId::kStruct ) {
176+ new_short_path = short_path;
177+ } else {
178+ new_short_path = BuildPath (short_path, field.name (), case_sensitive_);
179+ }
180+ auto it = name_to_id_.try_emplace (new_path, field.field_id ());
181+ if (!it.second ) {
182+ return InvalidSchema (" Duplicate path found: {}, prev id: {}, curr id: {}" ,
183+ it.first ->first , it.first ->second , field.field_id ());
184+ }
185+ short_name_to_id_.try_emplace (new_short_path, field.field_id ());
186+ ICEBERG_RETURN_UNEXPECTED (
187+ VisitTypeInline (*field.type (), this , new_path, new_short_path));
188+ return {};
189+ }
190+
191+ Status NameToIdVisitor::Visit (const MapType& type, const std::string& path,
192+ const std::string& short_path) {
193+ std::string new_path, new_short_path;
194+ const auto & fields = type.fields ();
195+ for (const auto & field : fields) {
196+ new_path = BuildPath (path, field.name (), case_sensitive_);
197+ if (field.name () == MapType::kValueName &&
198+ field.type ()->type_id () == TypeId::kStruct ) {
199+ new_short_path = short_path;
200+ } else {
201+ new_short_path = BuildPath (short_path, field.name (), case_sensitive_);
202+ }
203+ auto it = name_to_id_.try_emplace (new_path, field.field_id ());
204+ if (!it.second ) {
205+ return InvalidSchema (" Duplicate path found: {}, prev id: {}, curr id: {}" ,
206+ it.first ->first , it.first ->second , field.field_id ());
207+ }
208+ short_name_to_id_.try_emplace (new_short_path, field.field_id ());
209+ ICEBERG_RETURN_UNEXPECTED (
210+ VisitTypeInline (*field.type (), this , new_path, new_short_path));
211+ }
212+ return {};
213+ }
214+
215+ Status NameToIdVisitor::Visit (const StructType& type, const std::string& path,
216+ const std::string& short_path) {
217+ const auto & fields = type.fields ();
218+ std::string new_path, new_short_path;
219+ for (const auto & field : fields) {
220+ new_path = BuildPath (path, field.name (), case_sensitive_);
221+ new_short_path = BuildPath (short_path, field.name (), case_sensitive_);
222+ auto it = name_to_id_.try_emplace (new_path, field.field_id ());
223+ if (!it.second ) {
224+ return InvalidSchema (" Duplicate path found: {}, prev id: {}, curr id: {}" ,
225+ it.first ->first , it.first ->second , field.field_id ());
226+ }
227+ short_name_to_id_.try_emplace (new_short_path, field.field_id ());
228+ ICEBERG_RETURN_UNEXPECTED (
229+ VisitTypeInline (*field.type (), this , new_path, new_short_path));
230+ }
231+ return {};
232+ }
233+
234+ Status NameToIdVisitor::Visit (const PrimitiveType& type, const std::string& path,
235+ const std::string& short_path) {
236+ return {};
237+ }
238+
239+ std::string NameToIdVisitor::BuildPath (std::string_view prefix,
240+ std::string_view field_name, bool case_sensitive) {
241+ std::string quoted_name;
242+ if (!quoting_func_) {
243+ quoted_name = std::string (field_name);
244+ } else {
245+ quoted_name = quoting_func_ (field_name);
246+ }
247+ if (case_sensitive) {
248+ return prefix.empty () ? quoted_name : std::string (prefix) + " ." + quoted_name;
249+ }
250+ return prefix.empty () ? StringUtils::ToLower (quoted_name)
251+ : std::string (prefix) + " ." + StringUtils::ToLower (quoted_name);
252+ }
253+
254+ void NameToIdVisitor::Finish () {
255+ for (auto && it : short_name_to_id_) {
256+ name_to_id_.try_emplace (it.first , it.second );
257+ }
258+ }
259+
47260} // namespace iceberg
0 commit comments