Skip to content

Commit 9257726

Browse files
author
shuxu.li
committed
feat: metadata access support for table
1 parent fc8bf0f commit 9257726

File tree

9 files changed

+233
-365
lines changed

9 files changed

+233
-365
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ set(ICEBERG_SOURCES
3838
sort_field.cc
3939
sort_order.cc
4040
statistics_file.cc
41-
table_impl.cc
41+
table.cc
4242
table_metadata.cc
4343
transform.cc
4444
transform_function.cc

src/iceberg/snapshot.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,4 +273,20 @@ struct ICEBERG_EXPORT Snapshot {
273273
bool Equals(const Snapshot& other) const;
274274
};
275275

276+
/// \brief Represents a snapshot log entry
277+
struct ICEBERG_EXPORT SnapshotLogEntry {
278+
/// The timestamp in milliseconds of the change
279+
TimePointMs timestamp_ms;
280+
/// ID of the snapshot
281+
int64_t snapshot_id;
282+
283+
friend bool operator==(const SnapshotLogEntry& lhs, const SnapshotLogEntry& rhs) {
284+
return lhs.timestamp_ms == rhs.timestamp_ms && lhs.snapshot_id == rhs.snapshot_id;
285+
}
286+
287+
friend bool operator!=(const SnapshotLogEntry& lhs, const SnapshotLogEntry& rhs) {
288+
return !(lhs == rhs);
289+
}
290+
};
291+
276292
} // namespace iceberg

src/iceberg/table.cc

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/table.h"
21+
22+
#include "iceberg/partition_spec.h"
23+
#include "iceberg/schema.h"
24+
#include "iceberg/sort_order.h"
25+
#include "iceberg/table_metadata.h"
26+
27+
namespace iceberg {
28+
29+
const std::string& Table::uuid() const { return metadata_->table_uuid; }
30+
31+
const std::shared_ptr<Schema>& Table::schema() const {
32+
if (!schema_) {
33+
const static std::shared_ptr<Schema> kEmptySchema =
34+
std::make_shared<Schema>(std::vector<SchemaField>{});
35+
auto schema = metadata_->Schema();
36+
if (schema.has_value()) {
37+
schema_ = schema.value();
38+
} else {
39+
schema_ = kEmptySchema;
40+
}
41+
}
42+
return schema_;
43+
}
44+
45+
const std::unordered_map<int32_t, std::shared_ptr<Schema>>& Table::schemas() const {
46+
std::call_once(init_schemas_once_, [this]() {
47+
for (const auto& schema : metadata_->schemas) {
48+
if (schema->schema_id()) {
49+
schemas_map_.emplace(schema->schema_id().value(), schema);
50+
}
51+
}
52+
});
53+
return schemas_map_;
54+
}
55+
56+
const std::shared_ptr<PartitionSpec>& Table::spec() const {
57+
std::call_once(init_partition_spec_once_, [this]() {
58+
auto partition_spec = metadata_->PartitionSpec();
59+
if (partition_spec.has_value()) {
60+
partition_spec_ = partition_spec.value();
61+
}
62+
});
63+
return partition_spec_;
64+
}
65+
66+
const std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>>& Table::specs() const {
67+
std::call_once(init_partition_specs_once_, [this]() {
68+
for (const auto& spec : metadata_->partition_specs) {
69+
partition_spec_map_[spec->spec_id()] = spec;
70+
}
71+
});
72+
return partition_spec_map_;
73+
}
74+
75+
const std::shared_ptr<SortOrder>& Table::sort_order() const {
76+
std::call_once(init_sort_order_once_, [this]() {
77+
auto sort_order = metadata_->SortOrder();
78+
if (sort_order.has_value()) {
79+
sort_order_ = sort_order.value();
80+
}
81+
});
82+
return sort_order_;
83+
}
84+
85+
const std::unordered_map<int32_t, std::shared_ptr<SortOrder>>& Table::sort_orders()
86+
const {
87+
std::call_once(init_sort_orders_once_, [this]() {
88+
for (const auto& order : metadata_->sort_orders) {
89+
sort_orders_map_[order->order_id()] = order;
90+
}
91+
});
92+
return sort_orders_map_;
93+
}
94+
95+
const std::unordered_map<std::string, std::string>& Table::properties() const {
96+
return metadata_->properties;
97+
}
98+
99+
const std::string& Table::location() const { return metadata_->location; }
100+
101+
std::shared_ptr<Snapshot> Table::CurrentSnapshot() const {
102+
std::call_once(init_snapshot_once_, [this]() {
103+
auto snapshot = metadata_->Snapshot();
104+
if (snapshot.has_value()) {
105+
current_snapshot_ = snapshot.value();
106+
}
107+
});
108+
return current_snapshot_;
109+
}
110+
111+
std::shared_ptr<Snapshot> Table::SnapshotById(int64_t snapshot_id) const {
112+
auto iter = std::ranges::find_if(metadata_->snapshots,
113+
[this, &snapshot_id](const auto& snapshot) {
114+
return snapshot->snapshot_id == snapshot_id;
115+
});
116+
if (iter == metadata_->snapshots.end()) {
117+
return nullptr;
118+
}
119+
return *iter;
120+
}
121+
122+
const std::vector<std::shared_ptr<Snapshot>>& Table::snapshots() const {
123+
return metadata_->snapshots;
124+
}
125+
126+
const std::vector<SnapshotLogEntry>& Table::history() const {
127+
return metadata_->snapshot_log;
128+
}
129+
130+
} // namespace iceberg

src/iceberg/table.h

Lines changed: 61 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,16 @@
1919

2020
#pragma once
2121

22-
#include <memory>
2322
#include <string>
2423
#include <unordered_map>
2524
#include <vector>
2625

2726
#include "iceberg/iceberg_export.h"
27+
#include "iceberg/location_provider.h"
2828
#include "iceberg/result.h"
29+
#include "iceberg/snapshot.h"
30+
#include "iceberg/table_identifier.h"
31+
#include "iceberg/transaction.h"
2932
#include "iceberg/type_fwd.h"
3033

3134
namespace iceberg {
@@ -35,78 +38,96 @@ class ICEBERG_EXPORT Table {
3538
public:
3639
virtual ~Table() = default;
3740

38-
/// \brief Return the full name for this table
39-
virtual const std::string& name() const = 0;
41+
/// \brief Construct a table.
42+
/// \param[in] identifier The identifier of the table.
43+
/// \param[in] metadata The metadata for the table.
44+
/// \param[in] metadata_location The location of the table metadata file.
45+
/// \param[in] io The FileIO to read and write table data and metadata files.
46+
/// \param[in] catalog The catalog that this table belongs to.
47+
Table(TableIdentifier identifier, std::shared_ptr<TableMetadata> metadata,
48+
std::string metadata_location, std::shared_ptr<FileIO> io,
49+
std::shared_ptr<Catalog> catalog)
50+
: identifier_(std::move(identifier)),
51+
metadata_(std::move(metadata)),
52+
metadata_location_(std::move(metadata_location)),
53+
io_(std::move(io)),
54+
catalog_(std::move(catalog)) {};
55+
56+
/// \brief Return the identifier of this table
57+
const TableIdentifier& name() const { return identifier_; }
4058

4159
/// \brief Returns the UUID of the table
42-
virtual const std::string& uuid() const = 0;
60+
const std::string& uuid() const;
4361

44-
/// \brief Refresh the current table metadata
45-
virtual Status Refresh() = 0;
46-
47-
/// \brief Return the schema for this table, return NotFoundError if not found
48-
virtual Result<std::shared_ptr<Schema>> schema() const = 0;
62+
/// \brief Return the schema for this table, return empty schema if not found
63+
const std::shared_ptr<Schema>& schema() const;
4964

5065
/// \brief Return a map of schema for this table
51-
virtual const std::unordered_map<int32_t, std::shared_ptr<Schema>>& schemas() const = 0;
66+
const std::unordered_map<int32_t, std::shared_ptr<Schema>>& schemas() const;
5267

53-
/// \brief Return the partition spec for this table, return NotFoundError if not found
54-
virtual Result<std::shared_ptr<PartitionSpec>> spec() const = 0;
68+
/// \brief Return the partition spec for this table, return null if default spec is not
69+
/// found
70+
const std::shared_ptr<PartitionSpec>& spec() const;
5571

5672
/// \brief Return a map of partition specs for this table
57-
virtual const std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>>& specs()
58-
const = 0;
73+
const std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>>& specs() const;
5974

60-
/// \brief Return the sort order for this table, return NotFoundError if not found
61-
virtual Result<std::shared_ptr<SortOrder>> sort_order() const = 0;
75+
/// \brief Return the sort order for this table, return null if default sort order is
76+
/// not found
77+
const std::shared_ptr<SortOrder>& sort_order() const;
6278

6379
/// \brief Return a map of sort order IDs to sort orders for this table
64-
virtual const std::unordered_map<int32_t, std::shared_ptr<SortOrder>>& sort_orders()
65-
const = 0;
80+
const std::unordered_map<int32_t, std::shared_ptr<SortOrder>>& sort_orders() const;
6681

6782
/// \brief Return a map of string properties for this table
68-
virtual const std::unordered_map<std::string, std::string>& properties() const = 0;
83+
const std::unordered_map<std::string, std::string>& properties() const;
6984

7085
/// \brief Return the table's base location
71-
virtual const std::string& location() const = 0;
86+
const std::string& location() const;
7287

73-
/// \brief Return the table's current snapshot, or NotFoundError if not found
74-
virtual Result<std::shared_ptr<Snapshot>> current_snapshot() const = 0;
88+
/// \brief Return the table's current snapshot, return null if not found
89+
std::shared_ptr<Snapshot> CurrentSnapshot() const;
7590

7691
/// \brief Get the snapshot of this table with the given id, or null if there is no
7792
/// matching snapshot
7893
///
7994
/// \param snapshot_id the ID of the snapshot to get
8095
/// \return the Snapshot with the given id
81-
virtual Result<std::shared_ptr<Snapshot>> snapshot(int64_t snapshot_id) const = 0;
96+
std::shared_ptr<Snapshot> SnapshotById(int64_t snapshot_id) const;
8297

8398
/// \brief Get the snapshots of this table
84-
virtual const std::vector<std::shared_ptr<Snapshot>>& snapshots() const = 0;
99+
const std::vector<std::shared_ptr<Snapshot>>& snapshots() const;
85100

86101
/// \brief Get the snapshot history of this table
87102
///
88103
/// \return a vector of history entries
89-
virtual const std::vector<std::shared_ptr<HistoryEntry>>& history() const = 0;
104+
const std::vector<SnapshotLogEntry>& history() const;
105+
106+
private:
107+
const TableIdentifier identifier_;
108+
const std::shared_ptr<TableMetadata> metadata_;
109+
const std::string metadata_location_;
110+
std::shared_ptr<FileIO> io_;
111+
std::shared_ptr<Catalog> catalog_;
90112

91-
// TODO(lishuxu): TableScan is not implemented yet, disable it for now.
92-
// /// \brief Create a new table scan for this table
93-
// ///
94-
// /// Once a table scan is created, it can be refined to project columns and filter
95-
// data. virtual Result<std::unique_ptr<TableScan>> NewScan() const = 0;
113+
mutable std::shared_ptr<Schema> schema_;
114+
mutable std::unordered_map<int32_t, std::shared_ptr<Schema>> schemas_map_;
96115

97-
/// \brief Create a new append API to add files to this table and commit
98-
virtual Result<std::shared_ptr<AppendFiles>> NewAppend() = 0;
116+
mutable std::shared_ptr<PartitionSpec> partition_spec_;
117+
mutable std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> partition_spec_map_;
99118

100-
/// \brief Create a new transaction API to commit multiple table operations at once
101-
virtual Result<std::unique_ptr<Transaction>> NewTransaction() = 0;
119+
mutable std::shared_ptr<SortOrder> sort_order_;
120+
mutable std::unordered_map<int32_t, std::shared_ptr<SortOrder>> sort_orders_map_;
102121

103-
/// TODO(wgtmac): design of FileIO is not finalized yet. We intend to use an
104-
/// IO-less design in the core library.
105-
// /// \brief Returns a FileIO to read and write table data and metadata files
106-
// virtual std::shared_ptr<FileIO> io() const = 0;
122+
mutable std::shared_ptr<Snapshot> current_snapshot_;
107123

108-
/// \brief Returns a LocationProvider to provide locations for new data files
109-
virtual Result<std::unique_ptr<LocationProvider>> location_provider() const = 0;
124+
// once_flags
125+
mutable std::once_flag init_schemas_once_;
126+
mutable std::once_flag init_partition_spec_once_;
127+
mutable std::once_flag init_partition_specs_once_;
128+
mutable std::once_flag init_sort_order_once_;
129+
mutable std::once_flag init_sort_orders_once_;
130+
mutable std::once_flag init_snapshot_once_;
110131
};
111132

112133
} // namespace iceberg

0 commit comments

Comments
 (0)