2727namespace iceberg {
2828
2929Schema::Schema (std::vector<SchemaField> fields, std::optional<int32_t > schema_id)
30- : StructType(std::move(fields)), schema_id_(schema_id) {}
30+ : StructType(std::move(fields)), schema_id_(schema_id) {
31+ InitIdToIndexMap ();
32+ }
3133
3234std::optional<int32_t > Schema::schema_id () const { return schema_id_; }
3335
@@ -44,4 +46,130 @@ bool Schema::Equals(const Schema& other) const {
4446 return schema_id_ == other.schema_id_ && fields_ == other.fields_ ;
4547}
4648
49+ std::optional<std::reference_wrapper<const SchemaField>> Schema::GetFieldByName (
50+ std::string_view name, bool case_sensitive) const {
51+ if (case_sensitive) {
52+ InitNameToIndexMap ();
53+ auto it = name_to_index_.find (std::string (name));
54+ if (it == name_to_index_.end ()) return std::nullopt ;
55+ return full_schemafield_[it->second ];
56+ }
57+ InitLowerCaseNameToIndexMap ();
58+ std::string lower_name (name);
59+ std::ranges::transform (lower_name, lower_name.begin (), ::tolower);
60+ auto it = lowercase_name_to_index_.find (lower_name);
61+ if (it == lowercase_name_to_index_.end ()) return std::nullopt ;
62+ return full_schemafield_[it->second ];
63+ }
64+
65+ std::optional<std::reference_wrapper<const SchemaField>> Schema::GetFieldByName (
66+ std::string_view name) const {
67+ return GetFieldByName (name, true );
68+ }
69+
70+ void Schema::InitIdToIndexMap () const {
71+ if (!id_to_index_.empty ()) {
72+ return ;
73+ }
74+ SchemaFieldVisitor visitor;
75+ auto result = VisitTypeInline (*this , &visitor, id_to_index_, full_schemafield_);
76+ }
77+
78+ void Schema::InitNameToIndexMap () const {
79+ if (!name_to_index_.empty ()) {
80+ return ;
81+ }
82+ int index = 0 ;
83+ std::string_view path, short_path;
84+ SchemaFieldVisitor visitor;
85+ std::unordered_map<std::string, size_t > shortname_to_index;
86+ VisitTypeInline (*this , &visitor, name_to_index_, path, shortname_to_index, short_path,
87+ index, true );
88+ for (const auto & [key, value] : shortname_to_index) {
89+ if (!name_to_index_.count (key)) {
90+ name_to_index_[key] = value;
91+ }
92+ }
93+ }
94+
95+ void Schema::InitLowerCaseNameToIndexMap () const {
96+ if (!lowercase_name_to_index_.empty ()) {
97+ return ;
98+ }
99+ int index = 0 ;
100+ std::string_view path, short_path;
101+ SchemaFieldVisitor visitor;
102+ std::unordered_map<std::string, size_t > shortlowercasename_to_index;
103+ VisitTypeInline (*this , &visitor, lowercase_name_to_index_, path,
104+ shortlowercasename_to_index, short_path, index, false );
105+ for (const auto & [key, value] : shortlowercasename_to_index) {
106+ if (!lowercase_name_to_index_.count (key)) {
107+ lowercase_name_to_index_[key] = value;
108+ }
109+ }
110+ }
111+
112+ std::optional<std::reference_wrapper<const SchemaField>> Schema::GetFieldById (
113+ int32_t field_id) const {
114+ InitIdToIndexMap ();
115+ auto it = id_to_index_.find (field_id);
116+ if (it == id_to_index_.end ()) {
117+ return std::nullopt ;
118+ }
119+ return full_schemafield_[it->second ];
120+ }
121+
122+ Status SchemaFieldVisitor::Visit (const Type& type,
123+ std::unordered_map<int , size_t >& id_to_index,
124+ std::vector<SchemaField>& full_schemafield) {
125+ const auto & nested = iceberg::internal::checked_cast<const NestedType&>(type);
126+ for (const auto & field : nested.fields ()) {
127+ id_to_index[field.field_id ()] = full_schemafield.size ();
128+ full_schemafield.emplace_back (field);
129+ if (field.type ()->is_nested ()) {
130+ Visit (*field.type (), id_to_index, full_schemafield);
131+ }
132+ }
133+ return {};
134+ }
135+ std::string SchemaFieldVisitor::GetPath (const std::string& last_path,
136+ const std::string& field_name,
137+ bool case_sensitive) {
138+ if (case_sensitive) {
139+ return last_path.empty () ? field_name : last_path + " ." + field_name;
140+ }
141+ std::string lower_name (field_name);
142+ std::ranges::transform (lower_name, lower_name.begin (), ::tolower);
143+ return last_path.empty () ? lower_name : last_path + " ." + lower_name;
144+ }
145+
146+ Status SchemaFieldVisitor::Visit (
147+ const Type& type, std::unordered_map<std::string, size_t >& name_to_index,
148+ std::string_view path, std::unordered_map<std::string, size_t >& shortname_to_index,
149+ std::string_view short_path, int & index, bool case_sensitive) {
150+ const char dot = ' .' ;
151+ const auto & nested = iceberg::internal::checked_cast<const NestedType&>(type);
152+ for (const auto & field : nested.fields ()) {
153+ std::string full_path, short_full_path;
154+ full_path = GetPath (std::string (path), std::string (field.name ()), case_sensitive);
155+ name_to_index[full_path] = index;
156+
157+ if (type.type_id () == TypeId::kList and field.type ()->type_id () == TypeId::kStruct ) {
158+ short_full_path = short_path;
159+ } else if (type.type_id () == TypeId::kMap and field.name () == " value" and
160+ field.type ()->type_id () == TypeId::kStruct ) {
161+ short_full_path = short_path;
162+ } else {
163+ short_full_path =
164+ GetPath (std::string (short_path), std::string (field.name ()), case_sensitive);
165+ }
166+ shortname_to_index[short_full_path] = index++;
167+ if (field.type ()->is_nested ()) {
168+ Visit (*field.type (), name_to_index, full_path, shortname_to_index, short_full_path,
169+ index, case_sensitive);
170+ }
171+ }
172+ return {};
173+ }
174+
47175} // namespace iceberg
0 commit comments