Skip to content

Commit ef94f05

Browse files
author
nullccxsy
committed
Fix: fix comments, add duplicate name check, and resolve duplicate result call in macros
- Added logic to detect and handle duplicate names in NameToIdVisitor to prevent invalid schema errors. - Fixed duplicate result call issue in src/iceberg/util/macros.h by optimizing ICEBERG_RETURN_UNEXPECTED macro.
1 parent 8ba65bf commit ef94f05

File tree

4 files changed

+378
-171
lines changed

4 files changed

+378
-171
lines changed

src/iceberg/schema.cc

Lines changed: 72 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,15 @@
2121

2222
#include <algorithm>
2323
#include <format>
24+
#include <functional>
2425

2526
#include "iceberg/type.h"
2627
#include "iceberg/util/formatter.h" // IWYU pragma: keep
2728
#include "iceberg/util/macros.h"
2829
#include "iceberg/util/visit_type.h"
2930

3031
namespace iceberg {
32+
3133
class IdToFieldVisitor {
3234
public:
3335
explicit IdToFieldVisitor(
@@ -40,10 +42,14 @@ class IdToFieldVisitor {
4042
std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>& id_to_field_;
4143
};
4244

43-
class NametoIdVisitor {
45+
class NameToIdVisitor {
4446
public:
45-
explicit NametoIdVisitor(std::unordered_map<std::string, int32_t>& name_to_id,
46-
bool case_sensitive_ = true);
47+
explicit NameToIdVisitor(
48+
std::unordered_map<std::string, int32_t>& name_to_id, bool case_sensitive_ = true,
49+
std::function<std::string(std::string_view)> quoting_func_ =
50+
[](std::string_view s) { return std::string(s); });
51+
~NameToIdVisitor();
52+
4753
Status Visit(const ListType& type, const std::string& path,
4854
const std::string& short_path);
4955
Status Visit(const MapType& type, const std::string& path,
@@ -52,12 +58,16 @@ class NametoIdVisitor {
5258
const std::string& short_path);
5359
Status Visit(const PrimitiveType& type, const std::string& path,
5460
const std::string& short_path);
55-
static std::string BuildPath(std::string_view prefix, std::string_view field_name,
56-
bool case_sensitive);
61+
std::string BuildPath(std::string_view prefix, std::string_view field_name,
62+
bool case_sensitive);
63+
64+
void Merge();
5765

5866
private:
5967
bool case_sensitive_;
6068
std::unordered_map<std::string, int32_t>& name_to_id_;
69+
std::unordered_map<std::string, int32_t> shortname_to_id_;
70+
std::function<std::string(std::string_view)> quoting_func_;
6171
};
6272

6373
Schema::Schema(std::vector<SchemaField> fields, std::optional<int32_t> schema_id)
@@ -81,20 +91,20 @@ bool Schema::Equals(const Schema& other) const {
8191
Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFieldByName(
8292
std::string_view name, bool case_sensitive) const {
8393
if (case_sensitive) {
84-
ICEBERG_RETURN_UNEXPECTED(InitNameToIndexMap());
94+
ICEBERG_RETURN_UNEXPECTED(InitNameToIdMap());
8595
auto it = name_to_id_.find(std::string(name));
8696
if (it == name_to_id_.end()) return std::nullopt;
8797
return FindFieldById(it->second);
8898
}
89-
ICEBERG_RETURN_UNEXPECTED(InitLowerCaseNameToIndexMap());
99+
ICEBERG_RETURN_UNEXPECTED(InitLowerCaseNameToIdMap());
90100
std::string lower_name(name);
91101
std::ranges::transform(lower_name, lower_name.begin(), ::tolower);
92102
auto it = lowercase_name_to_id_.find(lower_name);
93103
if (it == lowercase_name_to_id_.end()) return std::nullopt;
94104
return FindFieldById(it->second);
95105
}
96106

97-
Result<Status> Schema::InitIdToIndexMap() const {
107+
Status Schema::InitIdToFieldMap() const {
98108
if (!id_to_field_.empty()) {
99109
return {};
100110
}
@@ -103,29 +113,29 @@ Result<Status> Schema::InitIdToIndexMap() const {
103113
return {};
104114
}
105115

106-
Result<Status> Schema::InitNameToIndexMap() const {
116+
Status Schema::InitNameToIdMap() const {
107117
if (!name_to_id_.empty()) {
108118
return {};
109119
}
110-
std::string path, short_path;
111-
NametoIdVisitor visitor(name_to_id_, true);
112-
ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor, path, short_path));
120+
NameToIdVisitor visitor(name_to_id_, /*case_sensitive=*/true);
121+
ICEBERG_RETURN_UNEXPECTED(
122+
VisitTypeInline(*this, &visitor, /*path=*/"", /*short_path=*/""));
113123
return {};
114124
}
115125

116-
Result<Status> Schema::InitLowerCaseNameToIndexMap() const {
126+
Status Schema::InitLowerCaseNameToIdMap() const {
117127
if (!lowercase_name_to_id_.empty()) {
118128
return {};
119129
}
120-
std::string path, short_path;
121-
NametoIdVisitor visitor(lowercase_name_to_id_, false);
122-
ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor, path, short_path));
130+
NameToIdVisitor visitor(lowercase_name_to_id_, /*case_sensitive=*/false);
131+
ICEBERG_RETURN_UNEXPECTED(
132+
VisitTypeInline(*this, &visitor, /*path=*/"", /*short_path=*/""));
123133
return {};
124134
}
125135

126136
Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFieldById(
127137
int32_t field_id) const {
128-
ICEBERG_RETURN_UNEXPECTED(InitIdToIndexMap());
138+
ICEBERG_RETURN_UNEXPECTED(InitIdToFieldMap());
129139
auto it = id_to_field_.find(field_id);
130140
if (it == id_to_field_.end()) {
131141
return std::nullopt;
@@ -154,11 +164,16 @@ Status IdToFieldVisitor::VisitNestedType(const Type& type) {
154164
return {};
155165
}
156166

157-
NametoIdVisitor::NametoIdVisitor(std::unordered_map<std::string, int32_t>& name_to_id,
158-
bool case_sensitive)
159-
: name_to_id_(name_to_id), case_sensitive_(case_sensitive) {}
167+
NameToIdVisitor::NameToIdVisitor(
168+
std::unordered_map<std::string, int32_t>& name_to_id, bool case_sensitive,
169+
std::function<std::string(std::string_view)> quoting_func)
170+
: name_to_id_(name_to_id),
171+
case_sensitive_(case_sensitive),
172+
quoting_func_(std::move(quoting_func)) {}
173+
174+
NameToIdVisitor::~NameToIdVisitor() { Merge(); }
160175

161-
Status NametoIdVisitor::Visit(const ListType& type, const std::string& path,
176+
Status NameToIdVisitor::Visit(const ListType& type, const std::string& path,
162177
const std::string& short_path) {
163178
const auto& field = type.fields()[0];
164179
std::string new_path = BuildPath(path, field.name(), case_sensitive_);
@@ -168,14 +183,17 @@ Status NametoIdVisitor::Visit(const ListType& type, const std::string& path,
168183
} else {
169184
new_short_path = BuildPath(short_path, field.name(), case_sensitive_);
170185
}
171-
name_to_id_[new_path] = field.field_id();
172-
name_to_id_.emplace(new_short_path, field.field_id());
186+
auto it = name_to_id_.emplace(new_path, field.field_id());
187+
if (!it.second) {
188+
return NotSupported("Duplicate path in name_to_id_: {}", new_path);
189+
}
190+
shortname_to_id_.emplace(new_short_path, field.field_id());
173191
ICEBERG_RETURN_UNEXPECTED(
174192
VisitTypeInline(*field.type(), this, new_path, new_short_path));
175193
return {};
176194
}
177195

178-
Status NametoIdVisitor::Visit(const MapType& type, const std::string& path,
196+
Status NameToIdVisitor::Visit(const MapType& type, const std::string& path,
179197
const std::string& short_path) {
180198
std::string new_path, new_short_path;
181199
const auto& fields = type.fields();
@@ -187,42 +205,62 @@ Status NametoIdVisitor::Visit(const MapType& type, const std::string& path,
187205
} else {
188206
new_short_path = BuildPath(short_path, field.name(), case_sensitive_);
189207
}
190-
name_to_id_[new_path] = field.field_id();
191-
name_to_id_.emplace(new_short_path, field.field_id());
208+
auto it = name_to_id_.emplace(new_path, field.field_id());
209+
if (!it.second) {
210+
return NotSupported("Duplicate path in name_to_id_: {}", new_path);
211+
}
212+
shortname_to_id_.emplace(new_short_path, field.field_id());
192213
ICEBERG_RETURN_UNEXPECTED(
193214
VisitTypeInline(*field.type(), this, new_path, new_short_path));
194215
}
195216
return {};
196217
}
197218

198-
Status NametoIdVisitor::Visit(const StructType& type, const std::string& path,
219+
Status NameToIdVisitor::Visit(const StructType& type, const std::string& path,
199220
const std::string& short_path) {
200221
const auto& fields = type.fields();
201222
std::string new_path, new_short_path;
202223
for (const auto& field : fields) {
203224
new_path = BuildPath(path, field.name(), case_sensitive_);
204225
new_short_path = BuildPath(short_path, field.name(), case_sensitive_);
205-
name_to_id_[new_path] = field.field_id();
206-
name_to_id_.emplace(new_short_path, field.field_id());
226+
auto it = name_to_id_.emplace(new_path, field.field_id());
227+
if (!it.second) {
228+
return NotSupported("Duplicate path in name_to_id_: {}", new_path);
229+
}
230+
shortname_to_id_.emplace(new_short_path, field.field_id());
207231
ICEBERG_RETURN_UNEXPECTED(
208232
VisitTypeInline(*field.type(), this, new_path, new_short_path));
209233
}
210234
return {};
211235
}
212236

213-
Status NametoIdVisitor::Visit(const PrimitiveType& type, const std::string& path,
237+
Status NameToIdVisitor::Visit(const PrimitiveType& type, const std::string& path,
214238
const std::string& short_path) {
215239
return {};
216240
}
217241

218-
std::string NametoIdVisitor::BuildPath(std::string_view prefix,
242+
std::string NameToIdVisitor::BuildPath(std::string_view prefix,
219243
std::string_view field_name, bool case_sensitive) {
244+
std::string quoted_name;
245+
if (!quoting_func_) {
246+
quoted_name = std::string(field_name);
247+
} else {
248+
quoted_name = quoting_func_(field_name);
249+
}
220250
if (case_sensitive) {
221-
return prefix.empty() ? std::string(field_name)
222-
: std::string(prefix) + "." + std::string(field_name);
251+
return prefix.empty() ? quoted_name : std::string(prefix) + "." + quoted_name;
223252
}
224-
std::string lower_name(field_name);
253+
std::string lower_name = quoted_name;
225254
std::ranges::transform(lower_name, lower_name.begin(), ::tolower);
226255
return prefix.empty() ? lower_name : std::string(prefix) + "." + lower_name;
227256
}
257+
258+
void NameToIdVisitor::Merge() {
259+
for (const auto& it : shortname_to_id_) {
260+
if (name_to_id_.find(it.first) == name_to_id_.end()) {
261+
name_to_id_[it.first] = it.second;
262+
}
263+
}
264+
}
265+
228266
} // namespace iceberg

src/iceberg/schema.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ class ICEBERG_EXPORT Schema : public StructType {
6262
/// short name 'l.x' in addition to canonical name 'l.element.x'. a map 'm', if its
6363
/// value include a structs with field 'x' wil produce short name 'm.x' in addition to
6464
/// canonical name 'm.value.x'
65+
/// FIXME: Currently only handles ASCII lowercase conversion; extend to support
66+
/// non-ASCII characters (e.g., using std::towlower or ICU)
6567
[[nodiscard]] Result<std::optional<std::reference_wrapper<const SchemaField>>>
6668
FindFieldByName(std::string_view name, bool case_sensitive = true) const;
6769

@@ -85,8 +87,11 @@ class ICEBERG_EXPORT Schema : public StructType {
8587

8688
const std::optional<int32_t> schema_id_;
8789

88-
Result<Status> InitIdToIndexMap() const;
89-
Result<Status> InitNameToIndexMap() const;
90-
Result<Status> InitLowerCaseNameToIndexMap() const;
90+
// TODO: Address potential concurrency issues in lazy initialization (e.g., use
91+
// std::call_once)
92+
Status InitIdToFieldMap() const;
93+
Status InitNameToIdMap() const;
94+
Status InitLowerCaseNameToIdMap() const;
9195
};
96+
9297
} // namespace iceberg

src/iceberg/util/macros.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,13 @@
1919

2020
#pragma once
2121

22-
#define ICEBERG_RETURN_UNEXPECTED(result) \
23-
if (!result) [[unlikely]] { \
24-
return std::unexpected<Error>(result.error()); \
25-
}
22+
#define ICEBERG_RETURN_UNEXPECTED(result) \
23+
do { \
24+
auto&& iceberg_temp_result = (result); \
25+
if (!iceberg_temp_result) [[unlikely]] { \
26+
return std::unexpected<Error>(iceberg_temp_result.error()); \
27+
} \
28+
} while (false);
2629

2730
#define ICEBERG_ASSIGN_OR_RAISE_IMPL(result_name, lhs, rexpr) \
2831
auto&& result_name = (rexpr); \

0 commit comments

Comments
 (0)