2121
2222#include < algorithm>
2323#include < format>
24+ #include < functional>
2425
2526#include " iceberg/type.h"
2627#include " iceberg/util/formatter.h" // IWYU pragma: keep
2728#include " iceberg/util/macros.h"
2829#include " iceberg/util/visit_type.h"
2930
3031namespace iceberg {
32+
3133class IdToFieldVisitor {
3234 public:
3335 explicit IdToFieldVisitor (
@@ -40,10 +42,14 @@ class IdToFieldVisitor {
4042 std::unordered_map<int32_t , std::reference_wrapper<const SchemaField>>& id_to_field_;
4143};
4244
43- class NametoIdVisitor {
45+ class NameToIdVisitor {
4446 public:
45- explicit NametoIdVisitor (std::unordered_map<std::string, int32_t >& name_to_id,
46- bool case_sensitive_ = true );
47+ explicit NameToIdVisitor (
48+ std::unordered_map<std::string, int32_t >& name_to_id, bool case_sensitive_ = true ,
49+ std::function<std::string(std::string_view)> quoting_func_ =
50+ [](std::string_view s) { return std::string (s); });
51+ ~NameToIdVisitor ();
52+
4753 Status Visit (const ListType& type, const std::string& path,
4854 const std::string& short_path);
4955 Status Visit (const MapType& type, const std::string& path,
@@ -52,12 +58,16 @@ class NametoIdVisitor {
5258 const std::string& short_path);
5359 Status Visit (const PrimitiveType& type, const std::string& path,
5460 const std::string& short_path);
55- static std::string BuildPath (std::string_view prefix, std::string_view field_name,
56- bool case_sensitive);
61+ std::string BuildPath (std::string_view prefix, std::string_view field_name,
62+ bool case_sensitive);
63+
64+ void Merge ();
5765
5866 private:
5967 bool case_sensitive_;
6068 std::unordered_map<std::string, int32_t >& name_to_id_;
69+ std::unordered_map<std::string, int32_t > shortname_to_id_;
70+ std::function<std::string(std::string_view)> quoting_func_;
6171};
6272
6373Schema::Schema (std::vector<SchemaField> fields, std::optional<int32_t > schema_id)
@@ -81,20 +91,20 @@ bool Schema::Equals(const Schema& other) const {
8191Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFieldByName (
8292 std::string_view name, bool case_sensitive) const {
8393 if (case_sensitive) {
84- ICEBERG_RETURN_UNEXPECTED (InitNameToIndexMap ());
94+ ICEBERG_RETURN_UNEXPECTED (InitNameToIdMap ());
8595 auto it = name_to_id_.find (std::string (name));
8696 if (it == name_to_id_.end ()) return std::nullopt ;
8797 return FindFieldById (it->second );
8898 }
89- ICEBERG_RETURN_UNEXPECTED (InitLowerCaseNameToIndexMap ());
99+ ICEBERG_RETURN_UNEXPECTED (InitLowerCaseNameToIdMap ());
90100 std::string lower_name (name);
91101 std::ranges::transform (lower_name, lower_name.begin (), ::tolower);
92102 auto it = lowercase_name_to_id_.find (lower_name);
93103 if (it == lowercase_name_to_id_.end ()) return std::nullopt ;
94104 return FindFieldById (it->second );
95105}
96106
97- Result< Status> Schema::InitIdToIndexMap () const {
107+ Status Schema::InitIdToFieldMap () const {
98108 if (!id_to_field_.empty ()) {
99109 return {};
100110 }
@@ -103,29 +113,29 @@ Result<Status> Schema::InitIdToIndexMap() const {
103113 return {};
104114}
105115
106- Result< Status> Schema::InitNameToIndexMap () const {
116+ Status Schema::InitNameToIdMap () const {
107117 if (!name_to_id_.empty ()) {
108118 return {};
109119 }
110- std::string path, short_path ;
111- NametoIdVisitor visitor (name_to_id_, true );
112- ICEBERG_RETURN_UNEXPECTED ( VisitTypeInline (*this , &visitor, path, short_path));
120+ NameToIdVisitor visitor (name_to_id_, /* case_sensitive= */ true ) ;
121+ ICEBERG_RETURN_UNEXPECTED (
122+ VisitTypeInline (*this , &visitor, /* path= */ " " , /* short_path= */ " " ));
113123 return {};
114124}
115125
116- Result< Status> Schema::InitLowerCaseNameToIndexMap () const {
126+ Status Schema::InitLowerCaseNameToIdMap () const {
117127 if (!lowercase_name_to_id_.empty ()) {
118128 return {};
119129 }
120- std::string path, short_path ;
121- NametoIdVisitor visitor (lowercase_name_to_id_, false );
122- ICEBERG_RETURN_UNEXPECTED ( VisitTypeInline (*this , &visitor, path, short_path));
130+ NameToIdVisitor visitor (lowercase_name_to_id_, /* case_sensitive= */ false ) ;
131+ ICEBERG_RETURN_UNEXPECTED (
132+ VisitTypeInline (*this , &visitor, /* path= */ " " , /* short_path= */ " " ));
123133 return {};
124134}
125135
126136Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFieldById (
127137 int32_t field_id) const {
128- ICEBERG_RETURN_UNEXPECTED (InitIdToIndexMap ());
138+ ICEBERG_RETURN_UNEXPECTED (InitIdToFieldMap ());
129139 auto it = id_to_field_.find (field_id);
130140 if (it == id_to_field_.end ()) {
131141 return std::nullopt ;
@@ -148,17 +158,22 @@ Status IdToFieldVisitor::VisitNestedType(const Type& type) {
148158 const auto & nested = iceberg::internal::checked_cast<const NestedType&>(type);
149159 const auto & fields = nested.fields ();
150160 for (const auto & field : fields) {
151- id_to_field_.emplace (field.field_id (), std::cref (field));
161+ auto [it, inserted] = id_to_field_.emplace (field.field_id (), std::cref (field));
152162 ICEBERG_RETURN_UNEXPECTED (Visit (*field.type ()));
153163 }
154164 return {};
155165}
156166
157- NametoIdVisitor::NametoIdVisitor (std::unordered_map<std::string, int32_t >& name_to_id,
158- bool case_sensitive)
159- : name_to_id_(name_to_id), case_sensitive_(case_sensitive) {}
167+ NameToIdVisitor::NameToIdVisitor (
168+ std::unordered_map<std::string, int32_t >& name_to_id, bool case_sensitive,
169+ std::function<std::string(std::string_view)> quoting_func)
170+ : name_to_id_(name_to_id),
171+ case_sensitive_(case_sensitive),
172+ quoting_func_(std::move(quoting_func)) {}
173+
174+ NameToIdVisitor::~NameToIdVisitor () { Merge (); }
160175
161- Status NametoIdVisitor ::Visit (const ListType& type, const std::string& path,
176+ Status NameToIdVisitor ::Visit (const ListType& type, const std::string& path,
162177 const std::string& short_path) {
163178 const auto & field = type.fields ()[0 ];
164179 std::string new_path = BuildPath (path, field.name (), case_sensitive_);
@@ -168,14 +183,18 @@ Status NametoIdVisitor::Visit(const ListType& type, const std::string& path,
168183 } else {
169184 new_short_path = BuildPath (short_path, field.name (), case_sensitive_);
170185 }
171- name_to_id_[new_path] = field.field_id ();
172- name_to_id_.emplace (new_short_path, field.field_id ());
186+ auto [it, inserted] = name_to_id_.emplace (new_path, field.field_id ());
187+ if (!inserted) {
188+ std::string msg = " Duplicate path in name_to_id_: " + new_path;
189+ return NotSupported (" {}" , msg);
190+ }
191+ shortname_to_id_.emplace (new_short_path, field.field_id ());
173192 ICEBERG_RETURN_UNEXPECTED (
174193 VisitTypeInline (*field.type (), this , new_path, new_short_path));
175194 return {};
176195}
177196
178- Status NametoIdVisitor ::Visit (const MapType& type, const std::string& path,
197+ Status NameToIdVisitor ::Visit (const MapType& type, const std::string& path,
179198 const std::string& short_path) {
180199 std::string new_path, new_short_path;
181200 const auto & fields = type.fields ();
@@ -187,42 +206,64 @@ Status NametoIdVisitor::Visit(const MapType& type, const std::string& path,
187206 } else {
188207 new_short_path = BuildPath (short_path, field.name (), case_sensitive_);
189208 }
190- name_to_id_[new_path] = field.field_id ();
191- name_to_id_.emplace (new_short_path, field.field_id ());
209+ auto [it, inserted] = name_to_id_.emplace (new_path, field.field_id ());
210+ if (!inserted) {
211+ std::string msg = " Duplicate path in name_to_id_: " + new_path;
212+ return NotSupported (" {}" , msg);
213+ }
214+ shortname_to_id_.emplace (new_short_path, field.field_id ());
192215 ICEBERG_RETURN_UNEXPECTED (
193216 VisitTypeInline (*field.type (), this , new_path, new_short_path));
194217 }
195218 return {};
196219}
197220
198- Status NametoIdVisitor ::Visit (const StructType& type, const std::string& path,
221+ Status NameToIdVisitor ::Visit (const StructType& type, const std::string& path,
199222 const std::string& short_path) {
200223 const auto & fields = type.fields ();
201224 std::string new_path, new_short_path;
202225 for (const auto & field : fields) {
203226 new_path = BuildPath (path, field.name (), case_sensitive_);
204227 new_short_path = BuildPath (short_path, field.name (), case_sensitive_);
205- name_to_id_[new_path] = field.field_id ();
206- name_to_id_.emplace (new_short_path, field.field_id ());
228+ auto [it, inserted] = name_to_id_.emplace (new_path, field.field_id ());
229+ if (!inserted) {
230+ std::string msg = " Duplicate path in name_to_id_: " + it->first ;
231+ return NotSupported (" {}" , msg);
232+ }
233+ shortname_to_id_.emplace (new_short_path, field.field_id ());
207234 ICEBERG_RETURN_UNEXPECTED (
208235 VisitTypeInline (*field.type (), this , new_path, new_short_path));
209236 }
210237 return {};
211238}
212239
213- Status NametoIdVisitor ::Visit (const PrimitiveType& type, const std::string& path,
240+ Status NameToIdVisitor ::Visit (const PrimitiveType& type, const std::string& path,
214241 const std::string& short_path) {
215242 return {};
216243}
217244
218- std::string NametoIdVisitor ::BuildPath (std::string_view prefix,
245+ std::string NameToIdVisitor ::BuildPath (std::string_view prefix,
219246 std::string_view field_name, bool case_sensitive) {
247+ std::string quoted_name;
248+ if (!quoting_func_) {
249+ quoted_name = std::string (field_name);
250+ } else {
251+ quoted_name = quoting_func_ (field_name);
252+ }
220253 if (case_sensitive) {
221- return prefix.empty () ? std::string (field_name)
222- : std::string (prefix) + " ." + std::string (field_name);
254+ return prefix.empty () ? quoted_name : std::string (prefix) + " ." + quoted_name;
223255 }
224- std::string lower_name (field_name) ;
256+ std::string lower_name = quoted_name ;
225257 std::ranges::transform (lower_name, lower_name.begin (), ::tolower);
226258 return prefix.empty () ? lower_name : std::string (prefix) + " ." + lower_name;
227259}
260+
261+ void NameToIdVisitor::Merge () {
262+ for (const auto & [key, val] : shortname_to_id_) {
263+ if (name_to_id_.find (key) == name_to_id_.end ()) {
264+ name_to_id_[key] = val;
265+ }
266+ }
267+ }
268+
228269} // namespace iceberg
0 commit comments