Skip to content

Commit dc2d6f3

Browse files
author
nullccxsy
committed
fix comments
1 parent f0e7dfe commit dc2d6f3

File tree

5 files changed

+420
-687
lines changed

5 files changed

+420
-687
lines changed

src/iceberg/schema.cc

Lines changed: 19 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -274,18 +274,14 @@ class PruneColumnVisitor {
274274

275275
Result<std::shared_ptr<Type>> Visit(const std::shared_ptr<Type>& type) const {
276276
switch (type->type_id()) {
277-
case TypeId::kStruct: {
277+
case TypeId::kStruct:
278278
return Visit(internal::checked_pointer_cast<StructType>(type));
279-
}
280-
case TypeId::kList: {
279+
case TypeId::kList:
281280
return Visit(internal::checked_pointer_cast<ListType>(type));
282-
}
283-
case TypeId::kMap: {
281+
case TypeId::kMap:
284282
return Visit(internal::checked_pointer_cast<MapType>(type));
285-
}
286-
default: {
283+
default:
287284
return nullptr;
288-
}
289285
}
290286
}
291287

@@ -353,21 +349,11 @@ class PruneColumnVisitor {
353349

354350
private:
355351
const std::unordered_set<int32_t>& selected_ids_;
356-
bool select_full_types_;
352+
const bool select_full_types_;
357353
};
358354

359-
Result<std::unique_ptr<const Schema>> Schema::Select(std::span<const std::string> names,
360-
bool case_sensitive) const {
361-
return SelectInternal(names, case_sensitive);
362-
}
363-
364-
Result<std::unique_ptr<const Schema>> Schema::Select(
365-
const std::initializer_list<std::string>& names, bool case_sensitive) const {
366-
return SelectInternal(names, case_sensitive);
367-
}
368-
369-
Result<std::unique_ptr<const Schema>> Schema::SelectInternal(
370-
std::span<const std::string> names, bool case_sensitive) const {
355+
Result<std::unique_ptr<Schema>> Schema::Select(std::span<const std::string> names,
356+
bool case_sensitive) const {
371357
const std::string kAllColumns = "*";
372358
if (std::ranges::find(names, kAllColumns) != names.end()) {
373359
return std::make_unique<Schema>(*this);
@@ -382,9 +368,8 @@ Result<std::unique_ptr<const Schema>> Schema::SelectInternal(
382368
}
383369

384370
PruneColumnVisitor visitor(selected_ids, /*select_full_types=*/true);
385-
auto self = std::shared_ptr<const StructType>(this, [](const StructType*) {});
386-
ICEBERG_ASSIGN_OR_RAISE(auto result,
387-
visitor.Visit(std::const_pointer_cast<StructType>(self)));
371+
ICEBERG_ASSIGN_OR_RAISE(
372+
auto result, visitor.Visit(std::shared_ptr<StructType>(ToStructType(*this))));
388373

389374
if (!result) {
390375
return std::make_unique<Schema>(std::vector<SchemaField>{}, schema_id_);
@@ -394,17 +379,16 @@ Result<std::unique_ptr<const Schema>> Schema::SelectInternal(
394379
return InvalidSchema("Projected type must be a struct type");
395380
}
396381

397-
auto& projected_struct = internal::checked_cast<const StructType&>(*result);
398-
399-
return FromStructType(std::move(const_cast<StructType&>(projected_struct)), schema_id_);
382+
return FromStructType(std::move(const_cast<StructType&>(
383+
internal::checked_cast<const StructType&>(*result))),
384+
schema_id_);
400385
}
401386

402-
Result<std::unique_ptr<const Schema>> Schema::Project(
403-
std::unordered_set<int32_t>& field_ids) const {
387+
Result<std::unique_ptr<Schema>> Schema::Project(
388+
const std::unordered_set<int32_t>& field_ids) const {
404389
PruneColumnVisitor visitor(field_ids, /*select_full_types=*/false);
405-
auto self = std::shared_ptr<const StructType>(this, [](const StructType*) {});
406-
ICEBERG_ASSIGN_OR_RAISE(auto result,
407-
visitor.Visit(std::const_pointer_cast<StructType>(self)));
390+
ICEBERG_ASSIGN_OR_RAISE(
391+
auto result, visitor.Visit(std::shared_ptr<StructType>(ToStructType(*this))));
408392

409393
if (!result) {
410394
return std::make_unique<Schema>(std::vector<SchemaField>{}, schema_id_);
@@ -414,8 +398,9 @@ Result<std::unique_ptr<const Schema>> Schema::Project(
414398
return InvalidSchema("Projected type must be a struct type");
415399
}
416400

417-
auto& projected_struct = internal::checked_cast<const StructType&>(*result);
418-
return FromStructType(std::move(const_cast<StructType&>(projected_struct)), schema_id_);
401+
return FromStructType(std::move(const_cast<StructType&>(
402+
internal::checked_cast<const StructType&>(*result))),
403+
schema_id_);
419404
}
420405

421406
} // namespace iceberg

src/iceberg/schema.h

Lines changed: 12 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -75,55 +75,29 @@ class ICEBERG_EXPORT Schema : public StructType {
7575

7676
/// \brief Creates a projected schema from selected field names.
7777
///
78-
/// Selects fields by their names using dot notation for nested fields.
79-
/// Supports both canonical names (e.g., "user.address.street") and short names
80-
/// (e.g., "user.street" for map values, "list.element" for list elements).
81-
///
82-
/// \param names Field names to select (supports nested field paths)
83-
/// \param case_sensitive Whether name matching is case-sensitive (default: true)
84-
/// \return Projected schema containing only the specified fields
85-
Result<std::unique_ptr<const Schema>> Select(std::span<const std::string> names,
86-
bool case_sensitive = true) const;
87-
88-
/// \brief Creates a projected schema from selected field names.
89-
Result<std::unique_ptr<const Schema>> Select(
90-
const std::initializer_list<std::string>& names, bool case_sensitive = true) const;
91-
92-
/// \brief Creates a projected schema from selected field names.
93-
template <typename... Args>
94-
Result<std::unique_ptr<const Schema>> Select(Args&&... names,
95-
bool case_sensitive = true) const {
96-
static_assert(((std::is_convertible_v<Args, std::string> ||
97-
std::is_convertible_v<Args, std::string>) &&
98-
...),
99-
"All arguments must be convertible to std::string");
100-
return select({(names)...}, case_sensitive);
101-
}
78+
/// \param names Selected field names and nested names are dot-concatenated.
79+
/// \param case_sensitive Whether name matching is case-sensitive (default: true).
80+
/// \return Projected schema containing only selected fields.
81+
/// \note If the field name of a nested type has been selected, all of its
82+
/// sub-fields will be selected.
83+
Result<std::unique_ptr<Schema>> Select(std::span<const std::string> names,
84+
bool case_sensitive = true) const;
10285

10386
/// \brief Creates a projected schema from selected field IDs.
10487
///
105-
/// Selects fields by their numeric IDs. More efficient than Select() when you
106-
/// already know the field IDs. Handles recursive projection of nested structs.
107-
///
10888
/// \param field_ids Set of field IDs to select
109-
/// \return Projected schema containing only the specified fields
110-
///
111-
/// \note When a struct field ID is specified:
112-
/// - If nested field IDs are also in field_ids, they are recursively projected
113-
/// - If no nested field IDs are in field_ids, an empty struct is included
114-
/// - List/Map types cannot be explicitly projected (returns error)
115-
Result<std::unique_ptr<const Schema>> Project(
116-
std::unordered_set<int32_t>& field_ids) const;
89+
/// \return Projected schema containing only the specified fields.
90+
/// \note Field ID of a nested field may not be projected unless at least
91+
/// one of its sub-fields has been projected.
92+
Result<std::unique_ptr<Schema>> Project(
93+
const std::unordered_set<int32_t>& field_ids) const;
11794

11895
friend bool operator==(const Schema& lhs, const Schema& rhs) { return lhs.Equals(rhs); }
11996

12097
private:
12198
/// \brief Compare two schemas for equality.
12299
bool Equals(const Schema& other) const;
123100

124-
Result<std::unique_ptr<const Schema>> SelectInternal(std::span<const std::string> names,
125-
bool case_sensitive) const;
126-
127101
// TODO(nullccxsy): Address potential concurrency issues in lazy initialization (e.g.,
128102
// use std::call_once)
129103
Status InitIdToFieldMap() const;

src/iceberg/schema_internal.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,4 +324,9 @@ Result<std::unique_ptr<Schema>> FromArrowSchema(const ArrowSchema& schema,
324324
return FromStructType(std::move(struct_type), schema_id);
325325
}
326326

327+
std::unique_ptr<StructType> ToStructType(const Schema& schema) {
328+
std::vector<SchemaField> fields(schema.fields().begin(), schema.fields().end());
329+
return std::make_unique<StructType>(std::move(fields));
330+
}
331+
327332
} // namespace iceberg

src/iceberg/schema_internal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,4 +58,6 @@ Result<std::unique_ptr<Schema>> FromArrowSchema(const ArrowSchema& schema,
5858
std::unique_ptr<Schema> FromStructType(StructType&& struct_type,
5959
std::optional<int32_t> schema_id);
6060

61+
std::unique_ptr<StructType> ToStructType(const Schema& schema);
62+
6163
} // namespace iceberg

0 commit comments

Comments
 (0)