Skip to content

Commit 6cbd651

Browse files
committed
fix comments
1 parent 3921055 commit 6cbd651

File tree

5 files changed

+45
-54
lines changed

5 files changed

+45
-54
lines changed

src/iceberg/manifest_entry.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ struct ICEBERG_EXPORT ManifestEntry {
291291
std::optional<int64_t> file_sequence_number;
292292
/// Field id: 2
293293
/// File path, partition tuple, metrics, ...
294-
DataFile data_file;
294+
std::shared_ptr<DataFile> data_file;
295295

296296
inline static const SchemaField kStatus =
297297
SchemaField::MakeRequired(0, "status", std::make_shared<IntType>());

src/iceberg/manifest_reader.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,19 +34,37 @@ namespace iceberg {
3434
/// \brief Read manifest entries from a manifest file.
3535
class ICEBERG_EXPORT ManifestReader {
3636
public:
37+
virtual ~ManifestReader() = default;
3738
virtual Result<std::span<std::unique_ptr<ManifestEntry>>> Entries() const = 0;
3839

3940
private:
4041
std::unique_ptr<StructLikeReader> reader_;
4142
};
4243

44+
/// \brief Creates a reader for a manifest file.
45+
/// \param file_path Path to the manifest file.
46+
/// \return A Result containing the reader or an error.
47+
inline Result<std::unique_ptr<ManifestReader>> CreateManifestReader(
48+
const std::string& file_path) {
49+
return NotImplemented("manifest reader");
50+
}
51+
4352
/// \brief Read manifest files from a manifest list file.
4453
class ICEBERG_EXPORT ManifestListReader {
4554
public:
55+
virtual ~ManifestListReader() = default;
4656
virtual Result<std::span<std::unique_ptr<ManifestFile>>> Files() const = 0;
4757

4858
private:
4959
std::unique_ptr<StructLikeReader> reader_;
5060
};
5161

62+
/// \brief Creates a reader for the manifest list.
63+
/// \param file_path Path to the manifest list file.
64+
/// \return A Result containing the reader or an error.
65+
inline Result<std::unique_ptr<ManifestListReader>> CreateManifestListReader(
66+
const std::string& file_path) {
67+
return NotImplemented("manifest list reader");
68+
}
69+
5270
} // namespace iceberg

src/iceberg/table_scan.cc

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ namespace iceberg {
3333
TableScanBuilder::TableScanBuilder(const Table& table) : table_(table) {}
3434

3535
TableScanBuilder& TableScanBuilder::WithColumnNames(
36-
const std::vector<std::string>& column_names) {
37-
column_names_ = column_names;
36+
std::vector<std::string> column_names) {
37+
column_names_ = std::move(column_names);
3838
return *this;
3939
}
4040

@@ -43,9 +43,8 @@ TableScanBuilder& TableScanBuilder::WithSnapshotId(int64_t snapshot_id) {
4343
return *this;
4444
}
4545

46-
TableScanBuilder& TableScanBuilder::WithFilter(
47-
const std::shared_ptr<Expression>& filter) {
48-
filter_ = filter;
46+
TableScanBuilder& TableScanBuilder::WithFilter(std::shared_ptr<Expression> filter) {
47+
filter_ = std::move(filter);
4948
return *this;
5049
}
5150

@@ -56,33 +55,37 @@ Result<std::unique_ptr<TableScan>> TableScanBuilder::Build() {
5655
} else {
5756
snapshot = table_.current_snapshot();
5857
}
58+
if (snapshot == nullptr) {
59+
return InvalidArgument("No snapshot found for table {}", table_.name());
60+
}
5961

6062
std::shared_ptr<Schema> schema;
6163
if (snapshot->schema_id) {
6264
const auto& schemas = table_.schemas();
6365
if (auto it = schemas.find(*snapshot->schema_id); it != schemas.end()) {
6466
schema = it->second;
6567
} else {
66-
return InvalidData("Schema {} in snapshot {} is not found", *snapshot->schema_id,
67-
snapshot->snapshot_id);
68+
return InvalidArgument("Schema {} in snapshot {} is not found",
69+
*snapshot->schema_id, snapshot->snapshot_id);
6870
}
6971
} else {
7072
schema = table_.schema();
7173
}
7274

73-
std::vector<int32_t> field_ids;
74-
field_ids.reserve(column_names_.size());
75+
std::vector<SchemaField> project_fields;
76+
project_fields.reserve(column_names_.size());
7577
for (const auto& column_name : column_names_) {
7678
auto field_opt = schema->GetFieldByName(column_name);
7779
if (!field_opt) {
7880
return InvalidArgument("Column {} not found in schema", column_name);
7981
}
80-
field_ids.emplace_back(field_opt.value().get().field_id());
82+
project_fields.emplace_back(field_opt.value().get());
8183
}
8284

85+
auto projected_schema =
86+
std::make_shared<Schema>(std::move(project_fields), schema->schema_id());
8387
TableScan::ScanContext context{.snapshot = std::move(snapshot),
84-
.schema = std::move(schema),
85-
.field_ids = std::move(field_ids),
88+
.projected_schema = std::move(projected_schema),
8689
.filter = std::move(filter_)};
8790
return std::make_unique<TableScan>(std::move(context), table_.io());
8891
}
@@ -103,23 +106,11 @@ Result<std::vector<std::shared_ptr<FileScanTask>>> TableScan::PlanFiles() const
103106

104107
for (const auto& manifest : manifests) {
105108
const auto& data_file = manifest->data_file;
106-
tasks.emplace_back(std::make_shared<FileScanTask>(
107-
data_file.file_path, 0, data_file.file_size_in_bytes, data_file.record_count,
108-
data_file.content, data_file.file_format, context_.schema, context_.field_ids,
109-
context_.filter));
109+
tasks.emplace_back(
110+
std::make_shared<FileScanTask>(data_file, 0, data_file->file_size_in_bytes));
110111
}
111112
}
112113
return tasks;
113114
}
114115

115-
Result<std::unique_ptr<ManifestListReader>> TableScan::CreateManifestListReader(
116-
const std::string& file_path) const {
117-
return NotImplemented("manifest list reader");
118-
}
119-
120-
Result<std::unique_ptr<ManifestReader>> TableScan::CreateManifestReader(
121-
const std::string& file_path) const {
122-
return NotImplemented("manifest reader");
123-
}
124-
125116
} // namespace iceberg

src/iceberg/table_scan.h

Lines changed: 7 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ class ICEBERG_EXPORT TableScanBuilder {
4242
/// \brief Selects columns to include in the scan.
4343
/// \param column_names A list of column names. If empty, all columns will be selected.
4444
/// \return Reference to the builder.
45-
TableScanBuilder& WithColumnNames(const std::vector<std::string>& column_names);
45+
TableScanBuilder& WithColumnNames(std::vector<std::string> column_names);
4646

4747
/// \brief Applies a filter expression to the scan.
4848
/// \param filter Filter expression to use.
@@ -65,10 +65,9 @@ class ICEBERG_EXPORT TableScan {
6565
public:
6666
/// \brief Scan context holding snapshot and scan-specific metadata.
6767
struct ScanContext {
68-
std::shared_ptr<Snapshot> snapshot; ///< Snapshot to scan.
69-
std::shared_ptr<Schema> schema; ///< Table schema.
70-
std::vector<int32_t> field_ids; ///< Field IDs of selected columns.
71-
std::shared_ptr<Expression> filter; ///< Filter expression to apply.
68+
std::shared_ptr<Snapshot> snapshot; ///< Snapshot to scan.
69+
std::shared_ptr<Schema> projected_schema; ///< Projected schema.
70+
std::shared_ptr<Expression> filter; ///< Filter expression to apply.
7271
};
7372

7473
/// \brief Constructs a TableScan with the given context and file I/O.
@@ -83,33 +82,15 @@ class ICEBERG_EXPORT TableScan {
8382
Result<std::vector<std::shared_ptr<FileScanTask>>> PlanFiles() const;
8483

8584
private:
86-
/// \brief Creates a reader for the manifest list.
87-
/// \param file_path Path to the manifest list file.
88-
/// \return A Result containing the reader or an error.
89-
Result<std::unique_ptr<ManifestListReader>> CreateManifestListReader(
90-
const std::string& file_path) const;
91-
92-
/// \brief Creates a reader for a manifest file.
93-
/// \param file_path Path to the manifest file.
94-
/// \return A Result containing the reader or an error.
95-
Result<std::unique_ptr<ManifestReader>> CreateManifestReader(
96-
const std::string& file_path) const;
97-
9885
ScanContext context_;
9986
std::shared_ptr<FileIO> file_io_;
10087
};
10188

10289
/// \brief Represents a task to scan a portion of a data file.
10390
struct ICEBERG_EXPORT FileScanTask {
104-
std::string file_path; ///< Path to the data file.
105-
uint64_t start; ///< Start byte offset.
106-
uint64_t length; ///< Length in bytes to scan.
107-
std::optional<uint64_t> record_count; ///< Optional number of records.
108-
DataFile::Content file_content; ///< Type of file content.
109-
FileFormatType file_format; ///< Format of the data file.
110-
std::shared_ptr<Schema> schema; ///< Table schema.
111-
std::vector<int32_t> field_ids; ///< Field IDs to project.
112-
std::shared_ptr<Expression> filter; ///< Filter expression to apply.
91+
std::shared_ptr<DataFile> data_file; ///< the data file.
92+
uint64_t start; ///< Start byte offset.
93+
uint64_t length; ///< Length in bytes to scan.
11394
};
11495

11596
} // namespace iceberg

src/iceberg/type_fwd.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,9 @@ class AppendFiles;
131131
struct DataFile;
132132
struct ManifestEntry;
133133
struct ManifestFile;
134-
class ManifestReader;
135134
struct ManifestList;
135+
136+
class ManifestReader;
136137
class ManifestListReader;
137138

138139
} // namespace iceberg

0 commit comments

Comments
 (0)