2121
2222#include < algorithm>
2323#include < format>
24+ #include < functional>
2425
2526#include " iceberg/type.h"
2627#include " iceberg/util/formatter.h" // IWYU pragma: keep
2728#include " iceberg/util/macros.h"
2829#include " iceberg/util/visit_type.h"
2930
3031namespace iceberg {
32+
3133class IdToFieldVisitor {
3234 public:
3335 explicit IdToFieldVisitor (
@@ -40,10 +42,14 @@ class IdToFieldVisitor {
4042 std::unordered_map<int32_t , std::reference_wrapper<const SchemaField>>& id_to_field_;
4143};
4244
43- class NametoIdVisitor {
45+ class NameToIdVisitor {
4446 public:
45- explicit NametoIdVisitor (std::unordered_map<std::string, int32_t >& name_to_id,
46- bool case_sensitive_ = true );
47+ explicit NameToIdVisitor (
48+ std::unordered_map<std::string, int32_t >& name_to_id, bool case_sensitive_ = true ,
49+ std::function<std::string(std::string_view)> quoting_func_ =
50+ [](std::string_view s) { return std::string (s); });
51+ ~NameToIdVisitor ();
52+
4753 Status Visit (const ListType& type, const std::string& path,
4854 const std::string& short_path);
4955 Status Visit (const MapType& type, const std::string& path,
@@ -52,12 +58,16 @@ class NametoIdVisitor {
5258 const std::string& short_path);
5359 Status Visit (const PrimitiveType& type, const std::string& path,
5460 const std::string& short_path);
55- static std::string BuildPath (std::string_view prefix, std::string_view field_name,
56- bool case_sensitive);
61+ std::string BuildPath (std::string_view prefix, std::string_view field_name,
62+ bool case_sensitive);
63+
64+ void Merge ();
5765
5866 private:
5967 bool case_sensitive_;
6068 std::unordered_map<std::string, int32_t >& name_to_id_;
69+ std::unordered_map<std::string, int32_t > shortname_to_id_;
70+ std::function<std::string(std::string_view)> quoting_func_;
6171};
6272
6373Schema::Schema (std::vector<SchemaField> fields, std::optional<int32_t > schema_id)
@@ -81,20 +91,20 @@ bool Schema::Equals(const Schema& other) const {
8191Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFieldByName (
8292 std::string_view name, bool case_sensitive) const {
8393 if (case_sensitive) {
84- ICEBERG_RETURN_UNEXPECTED (InitNameToIndexMap ());
94+ ICEBERG_RETURN_UNEXPECTED (InitNameToIdMap ());
8595 auto it = name_to_id_.find (std::string (name));
8696 if (it == name_to_id_.end ()) return std::nullopt ;
8797 return FindFieldById (it->second );
8898 }
89- ICEBERG_RETURN_UNEXPECTED (InitLowerCaseNameToIndexMap ());
99+ ICEBERG_RETURN_UNEXPECTED (InitLowerCaseNameToIdMap ());
90100 std::string lower_name (name);
91101 std::ranges::transform (lower_name, lower_name.begin (), ::tolower);
92102 auto it = lowercase_name_to_id_.find (lower_name);
93103 if (it == lowercase_name_to_id_.end ()) return std::nullopt ;
94104 return FindFieldById (it->second );
95105}
96106
97- Result< Status> Schema::InitIdToIndexMap () const {
107+ Status Schema::InitIdToFieldMap () const {
98108 if (!id_to_field_.empty ()) {
99109 return {};
100110 }
@@ -103,29 +113,29 @@ Result<Status> Schema::InitIdToIndexMap() const {
103113 return {};
104114}
105115
106- Result< Status> Schema::InitNameToIndexMap () const {
116+ Status Schema::InitNameToIdMap () const {
107117 if (!name_to_id_.empty ()) {
108118 return {};
109119 }
110- std::string path, short_path ;
111- NametoIdVisitor visitor (name_to_id_, true );
112- ICEBERG_RETURN_UNEXPECTED ( VisitTypeInline (*this , &visitor, path, short_path));
120+ NameToIdVisitor visitor (name_to_id_, /* case_sensitive= */ true ) ;
121+ ICEBERG_RETURN_UNEXPECTED (
122+ VisitTypeInline (*this , &visitor, /* path= */ " " , /* short_path= */ " " ));
113123 return {};
114124}
115125
116- Result< Status> Schema::InitLowerCaseNameToIndexMap () const {
126+ Status Schema::InitLowerCaseNameToIdMap () const {
117127 if (!lowercase_name_to_id_.empty ()) {
118128 return {};
119129 }
120- std::string path, short_path ;
121- NametoIdVisitor visitor (lowercase_name_to_id_, false );
122- ICEBERG_RETURN_UNEXPECTED ( VisitTypeInline (*this , &visitor, path, short_path));
130+ NameToIdVisitor visitor (lowercase_name_to_id_, /* case_sensitive= */ false ) ;
131+ ICEBERG_RETURN_UNEXPECTED (
132+ VisitTypeInline (*this , &visitor, /* path= */ " " , /* short_path= */ " " ));
123133 return {};
124134}
125135
126136Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFieldById (
127137 int32_t field_id) const {
128- ICEBERG_RETURN_UNEXPECTED (InitIdToIndexMap ());
138+ ICEBERG_RETURN_UNEXPECTED (InitIdToFieldMap ());
129139 auto it = id_to_field_.find (field_id);
130140 if (it == id_to_field_.end ()) {
131141 return std::nullopt ;
@@ -154,11 +164,16 @@ Status IdToFieldVisitor::VisitNestedType(const Type& type) {
154164 return {};
155165}
156166
157- NametoIdVisitor::NametoIdVisitor (std::unordered_map<std::string, int32_t >& name_to_id,
158- bool case_sensitive)
159- : name_to_id_(name_to_id), case_sensitive_(case_sensitive) {}
167+ NameToIdVisitor::NameToIdVisitor (
168+ std::unordered_map<std::string, int32_t >& name_to_id, bool case_sensitive,
169+ std::function<std::string(std::string_view)> quoting_func)
170+ : name_to_id_(name_to_id),
171+ case_sensitive_(case_sensitive),
172+ quoting_func_(std::move(quoting_func)) {}
173+
174+ NameToIdVisitor::~NameToIdVisitor () { Merge (); }
160175
161- Status NametoIdVisitor ::Visit (const ListType& type, const std::string& path,
176+ Status NameToIdVisitor ::Visit (const ListType& type, const std::string& path,
162177 const std::string& short_path) {
163178 const auto & field = type.fields ()[0 ];
164179 std::string new_path = BuildPath (path, field.name (), case_sensitive_);
@@ -168,14 +183,17 @@ Status NametoIdVisitor::Visit(const ListType& type, const std::string& path,
168183 } else {
169184 new_short_path = BuildPath (short_path, field.name (), case_sensitive_);
170185 }
171- name_to_id_[new_path] = field.field_id ();
172- name_to_id_.emplace (new_short_path, field.field_id ());
186+ auto it = name_to_id_.emplace (new_path, field.field_id ());
187+ if (!it.second ) {
188+ return NotSupported (" Duplicate path in name_to_id_: {}" , new_path);
189+ }
190+ shortname_to_id_.emplace (new_short_path, field.field_id ());
173191 ICEBERG_RETURN_UNEXPECTED (
174192 VisitTypeInline (*field.type (), this , new_path, new_short_path));
175193 return {};
176194}
177195
178- Status NametoIdVisitor ::Visit (const MapType& type, const std::string& path,
196+ Status NameToIdVisitor ::Visit (const MapType& type, const std::string& path,
179197 const std::string& short_path) {
180198 std::string new_path, new_short_path;
181199 const auto & fields = type.fields ();
@@ -187,42 +205,62 @@ Status NametoIdVisitor::Visit(const MapType& type, const std::string& path,
187205 } else {
188206 new_short_path = BuildPath (short_path, field.name (), case_sensitive_);
189207 }
190- name_to_id_[new_path] = field.field_id ();
191- name_to_id_.emplace (new_short_path, field.field_id ());
208+ auto it = name_to_id_.emplace (new_path, field.field_id ());
209+ if (!it.second ) {
210+ return NotSupported (" Duplicate path in name_to_id_: {}" , new_path);
211+ }
212+ shortname_to_id_.emplace (new_short_path, field.field_id ());
192213 ICEBERG_RETURN_UNEXPECTED (
193214 VisitTypeInline (*field.type (), this , new_path, new_short_path));
194215 }
195216 return {};
196217}
197218
198- Status NametoIdVisitor ::Visit (const StructType& type, const std::string& path,
219+ Status NameToIdVisitor ::Visit (const StructType& type, const std::string& path,
199220 const std::string& short_path) {
200221 const auto & fields = type.fields ();
201222 std::string new_path, new_short_path;
202223 for (const auto & field : fields) {
203224 new_path = BuildPath (path, field.name (), case_sensitive_);
204225 new_short_path = BuildPath (short_path, field.name (), case_sensitive_);
205- name_to_id_[new_path] = field.field_id ();
206- name_to_id_.emplace (new_short_path, field.field_id ());
226+ auto it = name_to_id_.emplace (new_path, field.field_id ());
227+ if (!it.second ) {
228+ return NotSupported (" Duplicate path in name_to_id_: {}" , new_path);
229+ }
230+ shortname_to_id_.emplace (new_short_path, field.field_id ());
207231 ICEBERG_RETURN_UNEXPECTED (
208232 VisitTypeInline (*field.type (), this , new_path, new_short_path));
209233 }
210234 return {};
211235}
212236
213- Status NametoIdVisitor ::Visit (const PrimitiveType& type, const std::string& path,
237+ Status NameToIdVisitor ::Visit (const PrimitiveType& type, const std::string& path,
214238 const std::string& short_path) {
215239 return {};
216240}
217241
218- std::string NametoIdVisitor ::BuildPath (std::string_view prefix,
242+ std::string NameToIdVisitor ::BuildPath (std::string_view prefix,
219243 std::string_view field_name, bool case_sensitive) {
244+ std::string quoted_name;
245+ if (!quoting_func_) {
246+ quoted_name = std::string (field_name);
247+ } else {
248+ quoted_name = quoting_func_ (field_name);
249+ }
220250 if (case_sensitive) {
221- return prefix.empty () ? std::string (field_name)
222- : std::string (prefix) + " ." + std::string (field_name);
251+ return prefix.empty () ? quoted_name : std::string (prefix) + " ." + quoted_name;
223252 }
224- std::string lower_name (field_name) ;
253+ std::string lower_name = quoted_name ;
225254 std::ranges::transform (lower_name, lower_name.begin (), ::tolower);
226255 return prefix.empty () ? lower_name : std::string (prefix) + " ." + lower_name;
227256}
257+
258+ void NameToIdVisitor::Merge () {
259+ for (const auto & it : shortname_to_id_) {
260+ if (name_to_id_.find (it.first ) == name_to_id_.end ()) {
261+ name_to_id_[it.first ] = it.second ;
262+ }
263+ }
264+ }
265+
228266} // namespace iceberg
0 commit comments