2121
2222#include < format>
2323
24+ #include " iceberg/exception.h"
2425#include " iceberg/type.h"
2526#include " iceberg/util/formatter.h" // IWYU pragma: keep
26-
2727namespace iceberg {
2828
2929Schema::Schema (std::vector<SchemaField> fields, std::optional<int32_t > schema_id)
30- : StructType(std::move(fields)), schema_id_(schema_id) {}
30+ : StructType(std::move(fields)), schema_id_(schema_id) {
31+ InitIdToIndexMap ();
32+ }
3133
3234std::optional<int32_t > Schema::schema_id () const { return schema_id_; }
3335
@@ -44,4 +46,142 @@ bool Schema::Equals(const Schema& other) const {
4446 return schema_id_ == other.schema_id_ && fields_ == other.fields_ ;
4547}
4648
49+ std::optional<std::reference_wrapper<const SchemaField>> Schema::GetFieldByName (
50+ std::string_view name, bool case_sensitive) const {
51+ if (case_sensitive) {
52+ InitNameToIndexMap ();
53+ auto it = name_to_index_.find (std::string (name));
54+ if (it == name_to_index_.end ()) return std::nullopt ;
55+ return full_schemafield_[it->second ];
56+ }
57+ InitLowerCaseNameToIndexMap ();
58+ std::string lower_name (name);
59+ std::ranges::transform (lower_name, lower_name.begin (), ::tolower);
60+ auto it = lowercase_name_to_index_.find (lower_name);
61+ if (it == lowercase_name_to_index_.end ()) return std::nullopt ;
62+ return full_schemafield_[it->second ];
63+ }
64+
65+ std::optional<std::reference_wrapper<const SchemaField>> Schema::GetFieldByName (
66+ std::string_view name) const {
67+ return GetFieldByName (name, true );
68+ }
69+
70+ void Schema::InitIdToIndexMap () const {
71+ if (!id_to_index_.empty ()) {
72+ return ;
73+ }
74+ SchemaFieldVisitor visitor;
75+ auto result = VisitTypeInline (*this , &visitor, id_to_index_, full_schemafield_);
76+ }
77+
78+ void Schema::InitNameToIndexMap () const {
79+ if (!name_to_index_.empty ()) {
80+ return ;
81+ }
82+ int index = 0 ;
83+ std::string_view path, short_path;
84+ SchemaFieldVisitor visitor;
85+ std::unordered_map<std::string, size_t > shortname_to_index;
86+ auto tmp = VisitTypeInline (*this , &visitor, name_to_index_, path, shortname_to_index,
87+ short_path, index, true );
88+ if (!tmp.has_value ()) {
89+ throw IcebergError (" Failed to perform InitNameToIndexMap" );
90+ }
91+ for (const auto & pair : shortname_to_index) {
92+ if (!name_to_index_.count (pair.first )) {
93+ name_to_index_.emplace (pair.first , pair.second );
94+ }
95+ }
96+ }
97+
98+ void Schema::InitLowerCaseNameToIndexMap () const {
99+ if (!lowercase_name_to_index_.empty ()) {
100+ return ;
101+ }
102+ int index = 0 ;
103+ std::string_view path, short_path;
104+ SchemaFieldVisitor visitor;
105+ std::unordered_map<std::string, size_t > shortlowercasename_to_index;
106+ auto tmp = VisitTypeInline (*this , &visitor, lowercase_name_to_index_, path,
107+ shortlowercasename_to_index, short_path, index, false );
108+ if (!tmp.has_value ()) {
109+ throw IcebergError (" Failed to perform InitLowerCaseNameToIndexMap" );
110+ }
111+ for (const auto & pair : shortlowercasename_to_index) {
112+ if (!lowercase_name_to_index_.count (pair.first )) {
113+ lowercase_name_to_index_.emplace (pair.first , pair.second );
114+ }
115+ }
116+ }
117+
118+ std::optional<std::reference_wrapper<const SchemaField>> Schema::GetFieldById (
119+ int32_t field_id) const {
120+ InitIdToIndexMap ();
121+ auto it = id_to_index_.find (field_id);
122+ if (it == id_to_index_.end ()) {
123+ return std::nullopt ;
124+ }
125+ return full_schemafield_[it->second ];
126+ }
127+
128+ Status SchemaFieldVisitor::Visit (const Type& type,
129+ std::unordered_map<int , size_t >& id_to_index,
130+ std::vector<SchemaField>& full_schemafield) {
131+ const auto & nested = iceberg::internal::checked_cast<const NestedType&>(type);
132+ for (const auto & field : nested.fields ()) {
133+ id_to_index[field.field_id ()] = full_schemafield.size ();
134+ full_schemafield.emplace_back (field);
135+ if (field.type ()->is_nested ()) {
136+ auto tmp = Visit (*field.type (), id_to_index, full_schemafield);
137+ if (!tmp.has_value ()) {
138+ throw IcebergError (" Failed to perform visit(id_to_index)" );
139+ }
140+ }
141+ }
142+ return {};
143+ }
144+ std::string SchemaFieldVisitor::GetPath (const std::string& last_path,
145+ const std::string& field_name,
146+ bool case_sensitive) {
147+ if (case_sensitive) {
148+ return last_path.empty () ? field_name : last_path + " ." + field_name;
149+ }
150+ std::string lower_name (field_name);
151+ std::ranges::transform (lower_name, lower_name.begin (), ::tolower);
152+ return last_path.empty () ? lower_name : last_path + " ." + lower_name;
153+ }
154+
155+ Status SchemaFieldVisitor::Visit (
156+ const Type& type, std::unordered_map<std::string, size_t >& name_to_index,
157+ std::string_view path, std::unordered_map<std::string, size_t >& shortname_to_index,
158+ std::string_view short_path, int & index, bool case_sensitive) {
159+ const char dot = ' .' ;
160+ const auto & nested = iceberg::internal::checked_cast<const NestedType&>(type);
161+ for (const auto & field : nested.fields ()) {
162+ std::string full_path, short_full_path;
163+ full_path = GetPath (std::string (path), std::string (field.name ()), case_sensitive);
164+ name_to_index[full_path] = index;
165+
166+ if (type.type_id () == TypeId::kList and field.type ()->type_id () == TypeId::kStruct ) {
167+ short_full_path = short_path;
168+ } else if (type.type_id () == TypeId::kMap and field.name () == " value" and
169+ field.type ()->type_id () == TypeId::kStruct ) {
170+ short_full_path = short_path;
171+ } else {
172+ short_full_path =
173+ GetPath (std::string (short_path), std::string (field.name ()), case_sensitive);
174+ }
175+ shortname_to_index[short_full_path] = index++;
176+ if (field.type ()->is_nested ()) {
177+ auto tmp = Visit (*field.type (), name_to_index, full_path, shortname_to_index,
178+ short_full_path, index, case_sensitive);
179+ if (!tmp.has_value ()) {
180+ throw IcebergError (" Failed to perform visit(name_to_index)" );
181+ }
182+ }
183+ }
184+ return {};
185+ }
186+
47187} // namespace iceberg
0 commit comments