2525
2626#include " iceberg/exception.h"
2727#include " iceberg/util/formatter.h" // IWYU pragma: keep
28+ #include " iceberg/util/macros.h"
29+ #include " iceberg/util/string_util.h"
2830
2931namespace iceberg {
3032
31- StructType::StructType (std::vector<SchemaField> fields) : fields_(std::move(fields)) {
32- size_t index = 0 ;
33- for (const auto & field : fields_) {
34- auto [it, inserted] = field_id_to_index_.try_emplace (field.field_id (), index);
35- if (!inserted) {
36- throw IcebergError (
37- std::format (" StructType: duplicate field ID {} (field indices {} and {})" ,
38- field.field_id (), it->second , index));
39- }
40-
41- ++index;
42- }
33+ Result<std::optional<NestedType::SchemaFieldConstRef>> NestedType::GetFieldByName (
34+ std::string_view name) const {
35+ return GetFieldByName (name, /* case_sensitive=*/ true );
4336}
4437
38+ StructType::StructType (std::vector<SchemaField> fields) : fields_(std::move(fields)) {}
39+
4540TypeId StructType::type_id () const { return kTypeId ; }
4641
4742std::string StructType::ToString () const {
@@ -53,27 +48,34 @@ std::string StructType::ToString() const {
5348 return repr;
5449}
5550std::span<const SchemaField> StructType::fields () const { return fields_; }
56- std::optional<std::reference_wrapper< const SchemaField >> StructType::GetFieldById (
51+ Result< std::optional<NestedType::SchemaFieldConstRef >> StructType::GetFieldById (
5752 int32_t field_id) const {
58- auto it = field_id_to_index_.find (field_id);
59- if (it == field_id_to_index_.end ()) return std::nullopt ;
60- return fields_[it->second ];
53+ ICEBERG_RETURN_UNEXPECTED (InitFieldById ());
54+ auto it = field_by_id_.find (field_id);
55+ if (it == field_by_id_.end ()) return std::nullopt ;
56+ return it->second ;
6157}
62- std::optional<std::reference_wrapper< const SchemaField >> StructType::GetFieldByIndex (
58+ Result< std::optional<NestedType::SchemaFieldConstRef >> StructType::GetFieldByIndex (
6359 int32_t index) const {
64- if (index < 0 || index >= static_cast < int32_t >( fields_.size () )) {
65- return std:: nullopt ;
60+ if (index < 0 || static_cast < size_t >( index) >= fields_.size ()) {
61+ return InvalidArgument ( " Invalid index {} to get field from struct " , index) ;
6662 }
6763 return fields_[index];
6864}
69- std::optional<std::reference_wrapper< const SchemaField >> StructType::GetFieldByName (
70- std::string_view name) const {
71- // N.B. duplicate names are not permitted (looking at the Java
72- // implementation) so there is nothing in particular we need to do here
73- for ( const auto & field : fields_) {
74- if (field. name () == name ) {
75- return field ;
65+ Result< std::optional<NestedType::SchemaFieldConstRef >> StructType::GetFieldByName (
66+ std::string_view name, bool case_sensitive ) const {
67+ if (case_sensitive) {
68+ ICEBERG_RETURN_UNEXPECTED ( InitFieldByName ());
69+ auto it = field_by_name_. find (name);
70+ if (it != field_by_name_. end () ) {
71+ return it-> second ;
7672 }
73+ return std::nullopt ;
74+ }
75+ ICEBERG_RETURN_UNEXPECTED (InitFieldByLowerCaseName ());
76+ auto it = field_by_lowercase_name_.find (StringUtils::ToLower (name));
77+ if (it != field_by_lowercase_name_.end ()) {
78+ return it->second ;
7779 }
7880 return std::nullopt ;
7981}
@@ -84,6 +86,48 @@ bool StructType::Equals(const Type& other) const {
8486 const auto & struct_ = static_cast <const StructType&>(other);
8587 return fields_ == struct_.fields_ ;
8688}
89+ Status StructType::InitFieldById () const {
90+ if (!field_by_id_.empty ()) {
91+ return {};
92+ }
93+ for (const auto & field : fields_) {
94+ auto it = field_by_id_.try_emplace (field.field_id (), field);
95+ if (!it.second ) {
96+ return InvalidSchema (" Duplicate field id found: {} (prev name: {}, curr name: {})" ,
97+ field.field_id (), it.first ->second .get ().name (), field.name ());
98+ }
99+ }
100+ return {};
101+ }
102+ Status StructType::InitFieldByName () const {
103+ if (!field_by_name_.empty ()) {
104+ return {};
105+ }
106+ for (const auto & field : fields_) {
107+ auto it = field_by_name_.try_emplace (field.name (), field);
108+ if (!it.second ) {
109+ return InvalidSchema (" Duplicate field name found: {} (prev id: {}, curr id: {})" ,
110+ it.first ->first , it.first ->second .get ().field_id (),
111+ field.field_id ());
112+ }
113+ }
114+ return {};
115+ }
116+ Status StructType::InitFieldByLowerCaseName () const {
117+ if (!field_by_lowercase_name_.empty ()) {
118+ return {};
119+ }
120+ for (const auto & field : fields_) {
121+ auto it =
122+ field_by_lowercase_name_.try_emplace (StringUtils::ToLower (field.name ()), field);
123+ if (!it.second ) {
124+ return InvalidSchema (
125+ " Duplicate lowercase field name found: {} (prev id: {}, curr id: {})" ,
126+ it.first ->first , it.first ->second .get ().field_id (), field.field_id ());
127+ }
128+ }
129+ return {};
130+ }
87131
88132ListType::ListType (SchemaField element) : element_(std::move(element)) {
89133 if (element_.name () != kElementName ) {
@@ -105,23 +149,29 @@ std::string ListType::ToString() const {
105149 return repr;
106150}
107151std::span<const SchemaField> ListType::fields () const { return {&element_, 1 }; }
108- std::optional<std::reference_wrapper< const SchemaField >> ListType::GetFieldById (
152+ Result< std::optional<NestedType::SchemaFieldConstRef >> ListType::GetFieldById (
109153 int32_t field_id) const {
110154 if (field_id == element_.field_id ()) {
111155 return std::cref (element_);
112156 }
113157 return std::nullopt ;
114158}
115- std::optional<std::reference_wrapper< const SchemaField >> ListType::GetFieldByIndex (
159+ Result< std::optional<NestedType::SchemaFieldConstRef >> ListType::GetFieldByIndex (
116160 int index) const {
117161 if (index == 0 ) {
118162 return std::cref (element_);
119163 }
120- return std:: nullopt ;
164+ return InvalidArgument ( " Invalid index {} to get field from list " , index) ;
121165}
122- std::optional<std::reference_wrapper<const SchemaField>> ListType::GetFieldByName (
123- std::string_view name) const {
124- if (name == element_.name ()) {
166+ Result<std::optional<NestedType::SchemaFieldConstRef>> ListType::GetFieldByName (
167+ std::string_view name, bool case_sensitive) const {
168+ if (case_sensitive) {
169+ if (name == kElementName ) {
170+ return std::cref (element_);
171+ }
172+ return std::nullopt ;
173+ }
174+ if (StringUtils::ToLower (name) == kElementName ) {
125175 return std::cref (element_);
126176 }
127177 return std::nullopt ;
@@ -159,7 +209,7 @@ std::string MapType::ToString() const {
159209 return repr;
160210}
161211std::span<const SchemaField> MapType::fields () const { return fields_; }
162- std::optional<std::reference_wrapper< const SchemaField >> MapType::GetFieldById (
212+ Result< std::optional<NestedType::SchemaFieldConstRef >> MapType::GetFieldById (
163213 int32_t field_id) const {
164214 if (field_id == key ().field_id ()) {
165215 return key ();
@@ -168,20 +218,29 @@ std::optional<std::reference_wrapper<const SchemaField>> MapType::GetFieldById(
168218 }
169219 return std::nullopt ;
170220}
171- std::optional<std::reference_wrapper< const SchemaField >> MapType::GetFieldByIndex (
221+ Result< std::optional<NestedType::SchemaFieldConstRef >> MapType::GetFieldByIndex (
172222 int32_t index) const {
173223 if (index == 0 ) {
174224 return key ();
175225 } else if (index == 1 ) {
176226 return value ();
177227 }
178- return std:: nullopt ;
228+ return InvalidArgument ( " Invalid index {} to get field from map " , index) ;
179229}
180- std::optional<std::reference_wrapper<const SchemaField>> MapType::GetFieldByName (
181- std::string_view name) const {
182- if (name == kKeyName ) {
230+ Result<std::optional<NestedType::SchemaFieldConstRef>> MapType::GetFieldByName (
231+ std::string_view name, bool case_sensitive) const {
232+ if (case_sensitive) {
233+ if (name == kKeyName ) {
234+ return key ();
235+ } else if (name == kValueName ) {
236+ return value ();
237+ }
238+ return std::nullopt ;
239+ }
240+ const auto lower_case_name = StringUtils::ToLower (name);
241+ if (lower_case_name == kKeyName ) {
183242 return key ();
184- } else if (name == kValueName ) {
243+ } else if (lower_case_name == kValueName ) {
185244 return value ();
186245 }
187246 return std::nullopt ;
0 commit comments