Skip to content

Commit 0f85748

Browse files
author
nullccxsy
committed
Fix: fix comments, add duplicate name check, and resolve duplicate result call in macros
- Added logic to detect and handle duplicate names in NameToIdVisitor to prevent invalid schema errors. - Fixed duplicate result call issue in src/iceberg/util/macros.h by optimizing ICEBERG_RETURN_UNEXPECTED macro.
1 parent 8ba65bf commit 0f85748

File tree

4 files changed

+379
-171
lines changed

4 files changed

+379
-171
lines changed

src/iceberg/schema.cc

Lines changed: 73 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,15 @@
2121

2222
#include <algorithm>
2323
#include <format>
24+
#include <functional>
2425

2526
#include "iceberg/type.h"
2627
#include "iceberg/util/formatter.h" // IWYU pragma: keep
2728
#include "iceberg/util/macros.h"
2829
#include "iceberg/util/visit_type.h"
2930

3031
namespace iceberg {
32+
3133
class IdToFieldVisitor {
3234
public:
3335
explicit IdToFieldVisitor(
@@ -40,10 +42,14 @@ class IdToFieldVisitor {
4042
std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>& id_to_field_;
4143
};
4244

43-
class NametoIdVisitor {
45+
class NameToIdVisitor {
4446
public:
45-
explicit NametoIdVisitor(std::unordered_map<std::string, int32_t>& name_to_id,
46-
bool case_sensitive_ = true);
47+
explicit NameToIdVisitor(
48+
std::unordered_map<std::string, int32_t>& name_to_id, bool case_sensitive_ = true,
49+
std::function<std::string(std::string_view)> quoting_func_ =
50+
[](std::string_view s) { return std::string(s); });
51+
~NameToIdVisitor();
52+
4753
Status Visit(const ListType& type, const std::string& path,
4854
const std::string& short_path);
4955
Status Visit(const MapType& type, const std::string& path,
@@ -52,12 +58,17 @@ class NametoIdVisitor {
5258
const std::string& short_path);
5359
Status Visit(const PrimitiveType& type, const std::string& path,
5460
const std::string& short_path);
55-
static std::string BuildPath(std::string_view prefix, std::string_view field_name,
56-
bool case_sensitive);
61+
62+
private:
63+
std::string BuildPath(std::string_view prefix, std::string_view field_name,
64+
bool case_sensitive);
65+
void Merge();
5766

5867
private:
5968
bool case_sensitive_;
6069
std::unordered_map<std::string, int32_t>& name_to_id_;
70+
std::unordered_map<std::string, int32_t> shortname_to_id_;
71+
std::function<std::string(std::string_view)> quoting_func_;
6172
};
6273

6374
Schema::Schema(std::vector<SchemaField> fields, std::optional<int32_t> schema_id)
@@ -81,20 +92,20 @@ bool Schema::Equals(const Schema& other) const {
8192
Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFieldByName(
8293
std::string_view name, bool case_sensitive) const {
8394
if (case_sensitive) {
84-
ICEBERG_RETURN_UNEXPECTED(InitNameToIndexMap());
95+
ICEBERG_RETURN_UNEXPECTED(InitNameToIdMap());
8596
auto it = name_to_id_.find(std::string(name));
8697
if (it == name_to_id_.end()) return std::nullopt;
8798
return FindFieldById(it->second);
8899
}
89-
ICEBERG_RETURN_UNEXPECTED(InitLowerCaseNameToIndexMap());
100+
ICEBERG_RETURN_UNEXPECTED(InitLowerCaseNameToIdMap());
90101
std::string lower_name(name);
91102
std::ranges::transform(lower_name, lower_name.begin(), ::tolower);
92103
auto it = lowercase_name_to_id_.find(lower_name);
93104
if (it == lowercase_name_to_id_.end()) return std::nullopt;
94105
return FindFieldById(it->second);
95106
}
96107

97-
Result<Status> Schema::InitIdToIndexMap() const {
108+
Status Schema::InitIdToFieldMap() const {
98109
if (!id_to_field_.empty()) {
99110
return {};
100111
}
@@ -103,29 +114,29 @@ Result<Status> Schema::InitIdToIndexMap() const {
103114
return {};
104115
}
105116

106-
Result<Status> Schema::InitNameToIndexMap() const {
117+
Status Schema::InitNameToIdMap() const {
107118
if (!name_to_id_.empty()) {
108119
return {};
109120
}
110-
std::string path, short_path;
111-
NametoIdVisitor visitor(name_to_id_, true);
112-
ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor, path, short_path));
121+
NameToIdVisitor visitor(name_to_id_, /*case_sensitive=*/true);
122+
ICEBERG_RETURN_UNEXPECTED(
123+
VisitTypeInline(*this, &visitor, /*path=*/"", /*short_path=*/""));
113124
return {};
114125
}
115126

116-
Result<Status> Schema::InitLowerCaseNameToIndexMap() const {
127+
Status Schema::InitLowerCaseNameToIdMap() const {
117128
if (!lowercase_name_to_id_.empty()) {
118129
return {};
119130
}
120-
std::string path, short_path;
121-
NametoIdVisitor visitor(lowercase_name_to_id_, false);
122-
ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor, path, short_path));
131+
NameToIdVisitor visitor(lowercase_name_to_id_, /*case_sensitive=*/false);
132+
ICEBERG_RETURN_UNEXPECTED(
133+
VisitTypeInline(*this, &visitor, /*path=*/"", /*short_path=*/""));
123134
return {};
124135
}
125136

126137
Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFieldById(
127138
int32_t field_id) const {
128-
ICEBERG_RETURN_UNEXPECTED(InitIdToIndexMap());
139+
ICEBERG_RETURN_UNEXPECTED(InitIdToFieldMap());
129140
auto it = id_to_field_.find(field_id);
130141
if (it == id_to_field_.end()) {
131142
return std::nullopt;
@@ -154,11 +165,16 @@ Status IdToFieldVisitor::VisitNestedType(const Type& type) {
154165
return {};
155166
}
156167

157-
NametoIdVisitor::NametoIdVisitor(std::unordered_map<std::string, int32_t>& name_to_id,
158-
bool case_sensitive)
159-
: name_to_id_(name_to_id), case_sensitive_(case_sensitive) {}
168+
NameToIdVisitor::NameToIdVisitor(
169+
std::unordered_map<std::string, int32_t>& name_to_id, bool case_sensitive,
170+
std::function<std::string(std::string_view)> quoting_func)
171+
: name_to_id_(name_to_id),
172+
case_sensitive_(case_sensitive),
173+
quoting_func_(std::move(quoting_func)) {}
174+
175+
NameToIdVisitor::~NameToIdVisitor() { Merge(); }
160176

161-
Status NametoIdVisitor::Visit(const ListType& type, const std::string& path,
177+
Status NameToIdVisitor::Visit(const ListType& type, const std::string& path,
162178
const std::string& short_path) {
163179
const auto& field = type.fields()[0];
164180
std::string new_path = BuildPath(path, field.name(), case_sensitive_);
@@ -168,14 +184,17 @@ Status NametoIdVisitor::Visit(const ListType& type, const std::string& path,
168184
} else {
169185
new_short_path = BuildPath(short_path, field.name(), case_sensitive_);
170186
}
171-
name_to_id_[new_path] = field.field_id();
172-
name_to_id_.emplace(new_short_path, field.field_id());
187+
auto it = name_to_id_.emplace(new_path, field.field_id());
188+
if (!it.second) {
189+
return NotSupported("Duplicate path in name_to_id_: {}", new_path);
190+
}
191+
shortname_to_id_.emplace(new_short_path, field.field_id());
173192
ICEBERG_RETURN_UNEXPECTED(
174193
VisitTypeInline(*field.type(), this, new_path, new_short_path));
175194
return {};
176195
}
177196

178-
Status NametoIdVisitor::Visit(const MapType& type, const std::string& path,
197+
Status NameToIdVisitor::Visit(const MapType& type, const std::string& path,
179198
const std::string& short_path) {
180199
std::string new_path, new_short_path;
181200
const auto& fields = type.fields();
@@ -187,42 +206,62 @@ Status NametoIdVisitor::Visit(const MapType& type, const std::string& path,
187206
} else {
188207
new_short_path = BuildPath(short_path, field.name(), case_sensitive_);
189208
}
190-
name_to_id_[new_path] = field.field_id();
191-
name_to_id_.emplace(new_short_path, field.field_id());
209+
auto it = name_to_id_.emplace(new_path, field.field_id());
210+
if (!it.second) {
211+
return NotSupported("Duplicate path in name_to_id_: {}", new_path);
212+
}
213+
shortname_to_id_.emplace(new_short_path, field.field_id());
192214
ICEBERG_RETURN_UNEXPECTED(
193215
VisitTypeInline(*field.type(), this, new_path, new_short_path));
194216
}
195217
return {};
196218
}
197219

198-
Status NametoIdVisitor::Visit(const StructType& type, const std::string& path,
220+
Status NameToIdVisitor::Visit(const StructType& type, const std::string& path,
199221
const std::string& short_path) {
200222
const auto& fields = type.fields();
201223
std::string new_path, new_short_path;
202224
for (const auto& field : fields) {
203225
new_path = BuildPath(path, field.name(), case_sensitive_);
204226
new_short_path = BuildPath(short_path, field.name(), case_sensitive_);
205-
name_to_id_[new_path] = field.field_id();
206-
name_to_id_.emplace(new_short_path, field.field_id());
227+
auto it = name_to_id_.emplace(new_path, field.field_id());
228+
if (!it.second) {
229+
return NotSupported("Duplicate path in name_to_id_: {}", new_path);
230+
}
231+
shortname_to_id_.emplace(new_short_path, field.field_id());
207232
ICEBERG_RETURN_UNEXPECTED(
208233
VisitTypeInline(*field.type(), this, new_path, new_short_path));
209234
}
210235
return {};
211236
}
212237

213-
Status NametoIdVisitor::Visit(const PrimitiveType& type, const std::string& path,
238+
Status NameToIdVisitor::Visit(const PrimitiveType& type, const std::string& path,
214239
const std::string& short_path) {
215240
return {};
216241
}
217242

218-
std::string NametoIdVisitor::BuildPath(std::string_view prefix,
243+
std::string NameToIdVisitor::BuildPath(std::string_view prefix,
219244
std::string_view field_name, bool case_sensitive) {
245+
std::string quoted_name;
246+
if (!quoting_func_) {
247+
quoted_name = std::string(field_name);
248+
} else {
249+
quoted_name = quoting_func_(field_name);
250+
}
220251
if (case_sensitive) {
221-
return prefix.empty() ? std::string(field_name)
222-
: std::string(prefix) + "." + std::string(field_name);
252+
return prefix.empty() ? quoted_name : std::string(prefix) + "." + quoted_name;
223253
}
224-
std::string lower_name(field_name);
254+
std::string lower_name = quoted_name;
225255
std::ranges::transform(lower_name, lower_name.begin(), ::tolower);
226256
return prefix.empty() ? lower_name : std::string(prefix) + "." + lower_name;
227257
}
258+
259+
void NameToIdVisitor::Merge() {
260+
for (const auto& it : shortname_to_id_) {
261+
if (name_to_id_.find(it.first) == name_to_id_.end()) {
262+
name_to_id_[it.first] = it.second;
263+
}
264+
}
265+
}
266+
228267
} // namespace iceberg

src/iceberg/schema.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ class ICEBERG_EXPORT Schema : public StructType {
6262
/// short name 'l.x' in addition to canonical name 'l.element.x'. a map 'm', if its
6363
/// value include a structs with field 'x' wil produce short name 'm.x' in addition to
6464
/// canonical name 'm.value.x'
65+
/// FIXME: Currently only handles ASCII lowercase conversion; extend to support
66+
/// non-ASCII characters (e.g., using std::towlower or ICU)
6567
[[nodiscard]] Result<std::optional<std::reference_wrapper<const SchemaField>>>
6668
FindFieldByName(std::string_view name, bool case_sensitive = true) const;
6769

@@ -85,8 +87,11 @@ class ICEBERG_EXPORT Schema : public StructType {
8587

8688
const std::optional<int32_t> schema_id_;
8789

88-
Result<Status> InitIdToIndexMap() const;
89-
Result<Status> InitNameToIndexMap() const;
90-
Result<Status> InitLowerCaseNameToIndexMap() const;
90+
// TODO: Address potential concurrency issues in lazy initialization (e.g., use
91+
// std::call_once)
92+
Status InitIdToFieldMap() const;
93+
Status InitNameToIdMap() const;
94+
Status InitLowerCaseNameToIdMap() const;
9195
};
96+
9297
} // namespace iceberg

src/iceberg/util/macros.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,13 @@
1919

2020
#pragma once
2121

22-
#define ICEBERG_RETURN_UNEXPECTED(result) \
23-
if (!result) [[unlikely]] { \
24-
return std::unexpected<Error>(result.error()); \
25-
}
22+
#define ICEBERG_RETURN_UNEXPECTED(result) \
23+
do { \
24+
auto&& iceberg_temp_result = (result); \
25+
if (!iceberg_temp_result) [[unlikely]] { \
26+
return std::unexpected<Error>(iceberg_temp_result.error()); \
27+
} \
28+
} while (false);
2629

2730
#define ICEBERG_ASSIGN_OR_RAISE_IMPL(result_name, lhs, rexpr) \
2831
auto&& result_name = (rexpr); \

0 commit comments

Comments
 (0)