Skip to content

Commit 43979f2

Browse files
author
shuxu.li
committed
feat: metadata access support for table
1 parent 515bd86 commit 43979f2

File tree

7 files changed

+104
-143
lines changed

7 files changed

+104
-143
lines changed

src/iceberg/snapshot.h

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -273,20 +273,4 @@ struct ICEBERG_EXPORT Snapshot {
273273
bool Equals(const Snapshot& other) const;
274274
};
275275

276-
/// \brief Represents a snapshot log entry
277-
struct ICEBERG_EXPORT SnapshotLogEntry {
278-
/// The timestamp in milliseconds of the change
279-
TimePointMs timestamp_ms;
280-
/// ID of the snapshot
281-
int64_t snapshot_id;
282-
283-
friend bool operator==(const SnapshotLogEntry& lhs, const SnapshotLogEntry& rhs) {
284-
return lhs.timestamp_ms == rhs.timestamp_ms && lhs.snapshot_id == rhs.snapshot_id;
285-
}
286-
287-
friend bool operator!=(const SnapshotLogEntry& lhs, const SnapshotLogEntry& rhs) {
288-
return !(lhs == rhs);
289-
}
290-
};
291-
292276
} // namespace iceberg

src/iceberg/table.cc

Lines changed: 32 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -28,67 +28,51 @@ namespace iceberg {
2828

2929
const std::string& Table::uuid() const { return metadata_->table_uuid; }
3030

31-
const std::shared_ptr<Schema>& Table::schema() const {
32-
if (!schema_) {
33-
const static std::shared_ptr<Schema> kEmptySchema =
34-
std::make_shared<Schema>(std::vector<SchemaField>{});
35-
auto schema = metadata_->Schema();
36-
if (schema.has_value()) {
37-
schema_ = schema.value();
38-
} else {
39-
schema_ = kEmptySchema;
40-
}
41-
}
42-
return schema_;
43-
}
31+
Result<std::shared_ptr<Schema>> Table::schema() const { return metadata_->Schema(); }
4432

45-
const std::unordered_map<int32_t, std::shared_ptr<Schema>>& Table::schemas() const {
46-
std::call_once(init_schemas_once_, [this]() {
33+
const std::shared_ptr<std::unordered_map<int32_t, std::shared_ptr<Schema>>>&
34+
Table::schemas() const {
35+
if (!schemas_map_) {
36+
schemas_map_ =
37+
std::make_shared<std::unordered_map<int32_t, std::shared_ptr<Schema>>>();
4738
for (const auto& schema : metadata_->schemas) {
4839
if (schema->schema_id()) {
49-
schemas_map_.emplace(schema->schema_id().value(), schema);
40+
schemas_map_->emplace(schema->schema_id().value(), schema);
5041
}
5142
}
52-
});
43+
}
5344
return schemas_map_;
5445
}
5546

56-
const std::shared_ptr<PartitionSpec>& Table::spec() const {
57-
std::call_once(init_partition_spec_once_, [this]() {
58-
auto partition_spec = metadata_->PartitionSpec();
59-
if (partition_spec.has_value()) {
60-
partition_spec_ = partition_spec.value();
61-
}
62-
});
63-
return partition_spec_;
47+
Result<std::shared_ptr<PartitionSpec>> Table::spec() const {
48+
return metadata_->PartitionSpec();
6449
}
6550

66-
const std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>>& Table::specs() const {
67-
std::call_once(init_partition_specs_once_, [this]() {
51+
const std::shared_ptr<std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>>>&
52+
Table::specs() const {
53+
if (!partition_spec_map_) {
54+
partition_spec_map_ =
55+
std::make_shared<std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>>>();
6856
for (const auto& spec : metadata_->partition_specs) {
69-
partition_spec_map_[spec->spec_id()] = spec;
57+
partition_spec_map_->emplace(spec->spec_id(), spec);
7058
}
71-
});
59+
}
7260
return partition_spec_map_;
7361
}
7462

75-
const std::shared_ptr<SortOrder>& Table::sort_order() const {
76-
std::call_once(init_sort_order_once_, [this]() {
77-
auto sort_order = metadata_->SortOrder();
78-
if (sort_order.has_value()) {
79-
sort_order_ = sort_order.value();
80-
}
81-
});
82-
return sort_order_;
63+
Result<std::shared_ptr<SortOrder>> Table::sort_order() const {
64+
return metadata_->SortOrder();
8365
}
8466

85-
const std::unordered_map<int32_t, std::shared_ptr<SortOrder>>& Table::sort_orders()
86-
const {
87-
std::call_once(init_sort_orders_once_, [this]() {
67+
const std::shared_ptr<std::unordered_map<int32_t, std::shared_ptr<SortOrder>>>&
68+
Table::sort_orders() const {
69+
if (!sort_orders_map_) {
70+
sort_orders_map_ =
71+
std::make_shared<std::unordered_map<int32_t, std::shared_ptr<SortOrder>>>();
8872
for (const auto& order : metadata_->sort_orders) {
89-
sort_orders_map_[order->order_id()] = order;
73+
sort_orders_map_->emplace(order->order_id(), order);
9074
}
91-
});
75+
}
9276
return sort_orders_map_;
9377
}
9478

@@ -98,23 +82,17 @@ const std::unordered_map<std::string, std::string>& Table::properties() const {
9882

9983
const std::string& Table::location() const { return metadata_->location; }
10084

101-
std::shared_ptr<Snapshot> Table::current_snapshot() const {
102-
std::call_once(init_snapshot_once_, [this]() {
103-
auto snapshot = metadata_->Snapshot();
104-
if (snapshot.has_value()) {
105-
current_snapshot_ = snapshot.value();
106-
}
107-
});
108-
return current_snapshot_;
85+
Result<std::shared_ptr<Snapshot>> Table::current_snapshot() const {
86+
return metadata_->Snapshot();
10987
}
11088

111-
std::shared_ptr<Snapshot> Table::SnapshotById(int64_t snapshot_id) const {
89+
Result<std::shared_ptr<Snapshot>> Table::SnapshotById(int64_t snapshot_id) const {
11290
auto iter = std::ranges::find_if(metadata_->snapshots,
11391
[this, &snapshot_id](const auto& snapshot) {
11492
return snapshot->snapshot_id == snapshot_id;
11593
});
11694
if (iter == metadata_->snapshots.end()) {
117-
return nullptr;
95+
return NotFound("Snapshot with ID {} is not found", snapshot_id);
11896
}
11997
return *iter;
12098
}
@@ -127,4 +105,6 @@ const std::vector<SnapshotLogEntry>& Table::history() const {
127105
return metadata_->snapshot_log;
128106
}
129107

108+
const std::shared_ptr<FileIO>& Table::io() const { return io_; }
109+
130110
} // namespace iceberg

src/iceberg/table.h

Lines changed: 28 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ class ICEBERG_EXPORT Table {
4141
/// \param[in] metadata The metadata for the table.
4242
/// \param[in] metadata_location The location of the table metadata file.
4343
/// \param[in] io The FileIO to read and write table data and metadata files.
44-
/// \param[in] catalog The catalog that this table belongs to.
44+
/// \param[in] catalog The catalog that this table belongs to. If null, the table will
45+
/// be read-only.
4546
Table(TableIdentifier identifier, std::shared_ptr<TableMetadata> metadata,
4647
std::string metadata_location, std::shared_ptr<FileIO> io,
4748
std::shared_ptr<Catalog> catalog)
@@ -57,41 +58,41 @@ class ICEBERG_EXPORT Table {
5758
/// \brief Returns the UUID of the table
5859
const std::string& uuid() const;
5960

60-
/// \brief Return the schema for this table, return empty schema if not found
61-
const std::shared_ptr<Schema>& schema() const;
61+
/// \brief Return the schema for this table, return NotFoundError if not found
62+
Result<std::shared_ptr<Schema>> schema() const;
6263

6364
/// \brief Return a map of schema for this table
64-
const std::unordered_map<int32_t, std::shared_ptr<Schema>>& schemas() const;
65+
const std::shared_ptr<std::unordered_map<int32_t, std::shared_ptr<Schema>>>& schemas()
66+
const;
6567

66-
/// \brief Return the partition spec for this table, return null if default spec is not
67-
/// found
68-
const std::shared_ptr<PartitionSpec>& spec() const;
68+
/// \brief Return the partition spec for this table, return NotFoundError if not found
69+
Result<std::shared_ptr<PartitionSpec>> spec() const;
6970

7071
/// \brief Return a map of partition specs for this table
71-
const std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>>& specs() const;
72+
const std::shared_ptr<std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>>>&
73+
specs() const;
7274

73-
/// \brief Return the sort order for this table, return null if default sort order is
74-
/// not found
75-
const std::shared_ptr<SortOrder>& sort_order() const;
75+
/// \brief Return the sort order for this table, return NotFoundError if not found
76+
Result<std::shared_ptr<SortOrder>> sort_order() const;
7677

7778
/// \brief Return a map of sort order IDs to sort orders for this table
78-
const std::unordered_map<int32_t, std::shared_ptr<SortOrder>>& sort_orders() const;
79+
const std::shared_ptr<std::unordered_map<int32_t, std::shared_ptr<SortOrder>>>&
80+
sort_orders() const;
7981

8082
/// \brief Return a map of string properties for this table
8183
const std::unordered_map<std::string, std::string>& properties() const;
8284

8385
/// \brief Return the table's base location
8486
const std::string& location() const;
8587

86-
/// \brief Return the table's current snapshot, return null if not found
87-
std::shared_ptr<Snapshot> current_snapshot() const;
88+
/// \brief Return the table's current snapshot, return NotFoundError if not found
89+
Result<std::shared_ptr<Snapshot>> current_snapshot() const;
8890

89-
/// \brief Get the snapshot of this table with the given id, or null if there is no
90-
/// matching snapshot
91+
/// \brief Get the snapshot of this table with the given id
9192
///
9293
/// \param snapshot_id the ID of the snapshot to get
93-
/// \return the Snapshot with the given id
94-
std::shared_ptr<Snapshot> SnapshotById(int64_t snapshot_id) const;
94+
/// \return the Snapshot with the given id, return NotFoundError if not found
95+
Result<std::shared_ptr<Snapshot>> SnapshotById(int64_t snapshot_id) const;
9596

9697
/// \brief Get the snapshots of this table
9798
const std::vector<std::shared_ptr<Snapshot>>& snapshots() const;
@@ -101,31 +102,22 @@ class ICEBERG_EXPORT Table {
101102
/// \return a vector of history entries
102103
const std::vector<SnapshotLogEntry>& history() const;
103104

105+
/// \brief Returns a FileIO to read and write table data and metadata files
106+
const std::shared_ptr<FileIO>& io() const;
107+
104108
private:
105109
const TableIdentifier identifier_;
106110
const std::shared_ptr<TableMetadata> metadata_;
107111
const std::string metadata_location_;
108112
std::shared_ptr<FileIO> io_;
109113
std::shared_ptr<Catalog> catalog_;
110114

111-
mutable std::shared_ptr<Schema> schema_;
112-
mutable std::unordered_map<int32_t, std::shared_ptr<Schema>> schemas_map_;
113-
114-
mutable std::shared_ptr<PartitionSpec> partition_spec_;
115-
mutable std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> partition_spec_map_;
116-
117-
mutable std::shared_ptr<SortOrder> sort_order_;
118-
mutable std::unordered_map<int32_t, std::shared_ptr<SortOrder>> sort_orders_map_;
119-
120-
mutable std::shared_ptr<Snapshot> current_snapshot_;
121-
122-
// once_flags
123-
mutable std::once_flag init_schemas_once_;
124-
mutable std::once_flag init_partition_spec_once_;
125-
mutable std::once_flag init_partition_specs_once_;
126-
mutable std::once_flag init_sort_order_once_;
127-
mutable std::once_flag init_sort_orders_once_;
128-
mutable std::once_flag init_snapshot_once_;
115+
mutable std::shared_ptr<std::unordered_map<int32_t, std::shared_ptr<Schema>>>
116+
schemas_map_;
117+
mutable std::shared_ptr<std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>>>
118+
partition_spec_map_;
119+
mutable std::shared_ptr<std::unordered_map<int32_t, std::shared_ptr<SortOrder>>>
120+
sort_orders_map_;
129121
};
130122

131123
} // namespace iceberg

src/iceberg/table_metadata.cc

Lines changed: 5 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -47,24 +47,13 @@ std::string ToString(const MetadataLogEntry& entry) {
4747
}
4848

4949
Result<std::shared_ptr<Schema>> TableMetadata::Schema() const {
50-
std::call_once(init_schema_once, [this]() {
51-
auto iter = std::ranges::find_if(schemas, [this](const auto& schema) {
52-
return schema->schema_id() == current_schema_id;
53-
});
54-
if (iter != schemas.end()) {
55-
schema = *iter;
56-
}
57-
58-
// compatible with V1 table schema
59-
if (!schema && schemas.size() == 1UL) {
60-
schema = schemas.front();
61-
}
50+
auto iter = std::ranges::find_if(schemas, [this](const auto& schema) {
51+
return schema->schema_id() == current_schema_id;
6252
});
63-
64-
if (!schema) {
65-
return NotFound("Current schema is not defined for this table");
53+
if (iter == schemas.end()) {
54+
return NotFound("Current schema is not found");
6655
}
67-
return schema;
56+
return *iter;
6857
}
6958

7059
Result<std::shared_ptr<PartitionSpec>> TableMetadata::PartitionSpec() const {
@@ -88,9 +77,6 @@ Result<std::shared_ptr<SortOrder>> TableMetadata::SortOrder() const {
8877
}
8978

9079
Result<std::shared_ptr<Snapshot>> TableMetadata::Snapshot() const {
91-
if (current_snapshot_id == Snapshot::kInvalidSnapshotId) {
92-
return NotFound("Current snapshot is not defined for this table");
93-
}
9480
auto iter = std::ranges::find_if(snapshots, [this](const auto& snapshot) {
9581
return snapshot->snapshot_id == current_snapshot_id;
9682
});

src/iceberg/table_metadata.h

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,22 @@
3636

3737
namespace iceberg {
3838

39+
/// \brief Represents a snapshot log entry
40+
struct ICEBERG_EXPORT SnapshotLogEntry {
41+
/// The timestamp in milliseconds of the change
42+
TimePointMs timestamp_ms;
43+
/// ID of the snapshot
44+
int64_t snapshot_id;
45+
46+
friend bool operator==(const SnapshotLogEntry& lhs, const SnapshotLogEntry& rhs) {
47+
return lhs.timestamp_ms == rhs.timestamp_ms && lhs.snapshot_id == rhs.snapshot_id;
48+
}
49+
50+
friend bool operator!=(const SnapshotLogEntry& lhs, const SnapshotLogEntry& rhs) {
51+
return !(lhs == rhs);
52+
}
53+
};
54+
3955
/// \brief Represents a metadata log entry
4056
struct ICEBERG_EXPORT MetadataLogEntry {
4157
/// The timestamp in milliseconds of the change
@@ -80,8 +96,6 @@ struct ICEBERG_EXPORT TableMetadata {
8096
TimePointMs last_updated_ms;
8197
/// The highest assigned column ID for the table
8298
int32_t last_column_id;
83-
/// The current schema for the table, or null if not set
84-
mutable std::shared_ptr<Schema> schema;
8599
/// A list of schemas
86100
std::vector<std::shared_ptr<Schema>> schemas;
87101
/// ID of the table's current schema
@@ -117,9 +131,6 @@ struct ICEBERG_EXPORT TableMetadata {
117131
/// A `long` higher than all assigned row IDs
118132
int64_t next_row_id;
119133

120-
/// \brief Used for lazy initialization of schema
121-
mutable std::once_flag init_schema_once;
122-
123134
/// \brief Get the current schema, return NotFoundError if not found
124135
Result<std::shared_ptr<Schema>> Schema() const;
125136
/// \brief Get the current partition spec, return NotFoundError if not found

src/iceberg/type_fwd.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,9 @@ class TransformFunction;
9999
struct PartitionStatisticsFile;
100100
struct Snapshot;
101101
struct SnapshotRef;
102-
struct SnapshotLogEntry;
102+
103103
struct MetadataLogEntry;
104+
struct SnapshotLogEntry;
104105

105106
struct StatisticsFile;
106107
struct TableMetadata;

0 commit comments

Comments
 (0)