2121
2222#include < algorithm>
2323#include < format>
24+ #include < functional>
2425
2526#include " iceberg/type.h"
2627#include " iceberg/util/formatter.h" // IWYU pragma: keep
2728#include " iceberg/util/macros.h"
2829#include " iceberg/util/visit_type.h"
2930
3031namespace iceberg {
32+
3133class IdToFieldVisitor {
3234 public:
3335 explicit IdToFieldVisitor (
@@ -40,10 +42,14 @@ class IdToFieldVisitor {
4042 std::unordered_map<int32_t , std::reference_wrapper<const SchemaField>>& id_to_field_;
4143};
4244
43- class NametoIdVisitor {
45+ class NameToIdVisitor {
4446 public:
45- explicit NametoIdVisitor (std::unordered_map<std::string, int32_t >& name_to_id,
46- bool case_sensitive_ = true );
47+ explicit NameToIdVisitor (
48+ std::unordered_map<std::string, int32_t >& name_to_id, bool case_sensitive_ = true ,
49+ std::function<std::string(std::string_view)> quoting_func_ =
50+ [](std::string_view s) { return std::string (s); });
51+ ~NameToIdVisitor ();
52+
4753 Status Visit (const ListType& type, const std::string& path,
4854 const std::string& short_path);
4955 Status Visit (const MapType& type, const std::string& path,
@@ -52,12 +58,17 @@ class NametoIdVisitor {
5258 const std::string& short_path);
5359 Status Visit (const PrimitiveType& type, const std::string& path,
5460 const std::string& short_path);
55- static std::string BuildPath (std::string_view prefix, std::string_view field_name,
56- bool case_sensitive);
61+
62+ private:
63+ std::string BuildPath (std::string_view prefix, std::string_view field_name,
64+ bool case_sensitive);
65+ void Merge ();
5766
5867 private:
5968 bool case_sensitive_;
6069 std::unordered_map<std::string, int32_t >& name_to_id_;
70+ std::unordered_map<std::string, int32_t > shortname_to_id_;
71+ std::function<std::string(std::string_view)> quoting_func_;
6172};
6273
6374Schema::Schema (std::vector<SchemaField> fields, std::optional<int32_t > schema_id)
@@ -81,20 +92,20 @@ bool Schema::Equals(const Schema& other) const {
8192Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFieldByName (
8293 std::string_view name, bool case_sensitive) const {
8394 if (case_sensitive) {
84- ICEBERG_RETURN_UNEXPECTED (InitNameToIndexMap ());
95+ ICEBERG_RETURN_UNEXPECTED (InitNameToIdMap ());
8596 auto it = name_to_id_.find (std::string (name));
8697 if (it == name_to_id_.end ()) return std::nullopt ;
8798 return FindFieldById (it->second );
8899 }
89- ICEBERG_RETURN_UNEXPECTED (InitLowerCaseNameToIndexMap ());
100+ ICEBERG_RETURN_UNEXPECTED (InitLowerCaseNameToIdMap ());
90101 std::string lower_name (name);
91102 std::ranges::transform (lower_name, lower_name.begin (), ::tolower);
92103 auto it = lowercase_name_to_id_.find (lower_name);
93104 if (it == lowercase_name_to_id_.end ()) return std::nullopt ;
94105 return FindFieldById (it->second );
95106}
96107
97- Result< Status> Schema::InitIdToIndexMap () const {
108+ Status Schema::InitIdToFieldMap () const {
98109 if (!id_to_field_.empty ()) {
99110 return {};
100111 }
@@ -103,29 +114,29 @@ Result<Status> Schema::InitIdToIndexMap() const {
103114 return {};
104115}
105116
106- Result< Status> Schema::InitNameToIndexMap () const {
117+ Status Schema::InitNameToIdMap () const {
107118 if (!name_to_id_.empty ()) {
108119 return {};
109120 }
110- std::string path, short_path ;
111- NametoIdVisitor visitor (name_to_id_, true );
112- ICEBERG_RETURN_UNEXPECTED ( VisitTypeInline (*this , &visitor, path, short_path));
121+ NameToIdVisitor visitor (name_to_id_, /* case_sensitive= */ true ) ;
122+ ICEBERG_RETURN_UNEXPECTED (
123+ VisitTypeInline (*this , &visitor, /* path= */ " " , /* short_path= */ " " ));
113124 return {};
114125}
115126
116- Result< Status> Schema::InitLowerCaseNameToIndexMap () const {
127+ Status Schema::InitLowerCaseNameToIdMap () const {
117128 if (!lowercase_name_to_id_.empty ()) {
118129 return {};
119130 }
120- std::string path, short_path ;
121- NametoIdVisitor visitor (lowercase_name_to_id_, false );
122- ICEBERG_RETURN_UNEXPECTED ( VisitTypeInline (*this , &visitor, path, short_path));
131+ NameToIdVisitor visitor (lowercase_name_to_id_, /* case_sensitive= */ false ) ;
132+ ICEBERG_RETURN_UNEXPECTED (
133+ VisitTypeInline (*this , &visitor, /* path= */ " " , /* short_path= */ " " ));
123134 return {};
124135}
125136
126137Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFieldById (
127138 int32_t field_id) const {
128- ICEBERG_RETURN_UNEXPECTED (InitIdToIndexMap ());
139+ ICEBERG_RETURN_UNEXPECTED (InitIdToFieldMap ());
129140 auto it = id_to_field_.find (field_id);
130141 if (it == id_to_field_.end ()) {
131142 return std::nullopt ;
@@ -154,11 +165,16 @@ Status IdToFieldVisitor::VisitNestedType(const Type& type) {
154165 return {};
155166}
156167
157- NametoIdVisitor::NametoIdVisitor (std::unordered_map<std::string, int32_t >& name_to_id,
158- bool case_sensitive)
159- : name_to_id_(name_to_id), case_sensitive_(case_sensitive) {}
168+ NameToIdVisitor::NameToIdVisitor (
169+ std::unordered_map<std::string, int32_t >& name_to_id, bool case_sensitive,
170+ std::function<std::string(std::string_view)> quoting_func)
171+ : name_to_id_(name_to_id),
172+ case_sensitive_(case_sensitive),
173+ quoting_func_(std::move(quoting_func)) {}
174+
175+ NameToIdVisitor::~NameToIdVisitor () { Merge (); }
160176
161- Status NametoIdVisitor ::Visit (const ListType& type, const std::string& path,
177+ Status NameToIdVisitor ::Visit (const ListType& type, const std::string& path,
162178 const std::string& short_path) {
163179 const auto & field = type.fields ()[0 ];
164180 std::string new_path = BuildPath (path, field.name (), case_sensitive_);
@@ -168,14 +184,17 @@ Status NametoIdVisitor::Visit(const ListType& type, const std::string& path,
168184 } else {
169185 new_short_path = BuildPath (short_path, field.name (), case_sensitive_);
170186 }
171- name_to_id_[new_path] = field.field_id ();
172- name_to_id_.emplace (new_short_path, field.field_id ());
187+ auto it = name_to_id_.emplace (new_path, field.field_id ());
188+ if (!it.second ) {
189+ return NotSupported (" Duplicate path in name_to_id_: {}" , new_path);
190+ }
191+ shortname_to_id_.emplace (new_short_path, field.field_id ());
173192 ICEBERG_RETURN_UNEXPECTED (
174193 VisitTypeInline (*field.type (), this , new_path, new_short_path));
175194 return {};
176195}
177196
178- Status NametoIdVisitor ::Visit (const MapType& type, const std::string& path,
197+ Status NameToIdVisitor ::Visit (const MapType& type, const std::string& path,
179198 const std::string& short_path) {
180199 std::string new_path, new_short_path;
181200 const auto & fields = type.fields ();
@@ -187,42 +206,62 @@ Status NametoIdVisitor::Visit(const MapType& type, const std::string& path,
187206 } else {
188207 new_short_path = BuildPath (short_path, field.name (), case_sensitive_);
189208 }
190- name_to_id_[new_path] = field.field_id ();
191- name_to_id_.emplace (new_short_path, field.field_id ());
209+ auto it = name_to_id_.emplace (new_path, field.field_id ());
210+ if (!it.second ) {
211+ return NotSupported (" Duplicate path in name_to_id_: {}" , new_path);
212+ }
213+ shortname_to_id_.emplace (new_short_path, field.field_id ());
192214 ICEBERG_RETURN_UNEXPECTED (
193215 VisitTypeInline (*field.type (), this , new_path, new_short_path));
194216 }
195217 return {};
196218}
197219
198- Status NametoIdVisitor ::Visit (const StructType& type, const std::string& path,
220+ Status NameToIdVisitor ::Visit (const StructType& type, const std::string& path,
199221 const std::string& short_path) {
200222 const auto & fields = type.fields ();
201223 std::string new_path, new_short_path;
202224 for (const auto & field : fields) {
203225 new_path = BuildPath (path, field.name (), case_sensitive_);
204226 new_short_path = BuildPath (short_path, field.name (), case_sensitive_);
205- name_to_id_[new_path] = field.field_id ();
206- name_to_id_.emplace (new_short_path, field.field_id ());
227+ auto it = name_to_id_.emplace (new_path, field.field_id ());
228+ if (!it.second ) {
229+ return NotSupported (" Duplicate path in name_to_id_: {}" , new_path);
230+ }
231+ shortname_to_id_.emplace (new_short_path, field.field_id ());
207232 ICEBERG_RETURN_UNEXPECTED (
208233 VisitTypeInline (*field.type (), this , new_path, new_short_path));
209234 }
210235 return {};
211236}
212237
213- Status NametoIdVisitor ::Visit (const PrimitiveType& type, const std::string& path,
238+ Status NameToIdVisitor ::Visit (const PrimitiveType& type, const std::string& path,
214239 const std::string& short_path) {
215240 return {};
216241}
217242
218- std::string NametoIdVisitor ::BuildPath (std::string_view prefix,
243+ std::string NameToIdVisitor ::BuildPath (std::string_view prefix,
219244 std::string_view field_name, bool case_sensitive) {
245+ std::string quoted_name;
246+ if (!quoting_func_) {
247+ quoted_name = std::string (field_name);
248+ } else {
249+ quoted_name = quoting_func_ (field_name);
250+ }
220251 if (case_sensitive) {
221- return prefix.empty () ? std::string (field_name)
222- : std::string (prefix) + " ." + std::string (field_name);
252+ return prefix.empty () ? quoted_name : std::string (prefix) + " ." + quoted_name;
223253 }
224- std::string lower_name (field_name) ;
254+ std::string lower_name = quoted_name ;
225255 std::ranges::transform (lower_name, lower_name.begin (), ::tolower);
226256 return prefix.empty () ? lower_name : std::string (prefix) + " ." + lower_name;
227257}
258+
259+ void NameToIdVisitor::Merge () {
260+ for (const auto & it : shortname_to_id_) {
261+ if (name_to_id_.find (it.first ) == name_to_id_.end ()) {
262+ name_to_id_[it.first ] = it.second ;
263+ }
264+ }
265+ }
266+
228267} // namespace iceberg
0 commit comments