Skip to content

Commit 2774dc9

Browse files
committed
feat: implement initial MemoryCatalog functionality with namespace and table support
1 parent a5bcd45 commit 2774dc9

File tree

6 files changed

+630
-0
lines changed

6 files changed

+630
-0
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ set(ICEBERG_INCLUDES "$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/src>"
1919
"$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/src>")
2020
set(ICEBERG_SOURCES
2121
arrow_c_data_internal.cc
22+
catalog/memory_catalog.cc
2223
demo.cc
2324
json_internal.cc
2425
schema.cc
Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/catalog/memory_catalog.h"
21+
22+
#include <algorithm>
23+
#include <iterator>
24+
25+
#include "iceberg/exception.h"
26+
#include "iceberg/table.h"
27+
28+
namespace iceberg {
29+
30+
MemoryCatalog::MemoryCatalog(std::shared_ptr<FileIO> file_io,
31+
std::optional<std::string> warehouse_location)
32+
: file_io_(std::move(file_io)),
33+
warehouse_location_(std::move(warehouse_location)),
34+
root_container_(std::make_unique<NamespaceContainer>()) {}
35+
36+
void MemoryCatalog::Initialize(
37+
const std::string& name,
38+
const std::unordered_map<std::string, std::string>& properties) {
39+
catalog_name_ = name;
40+
properties_ = properties;
41+
}
42+
43+
std::string_view MemoryCatalog::name() const { return catalog_name_; }
44+
45+
Result<std::vector<TableIdentifier>> MemoryCatalog::ListTables(
46+
const Namespace& ns) const {
47+
std::unique_lock lock(mutex_);
48+
const auto& table_names = root_container_->ListTables(ns);
49+
std::vector<TableIdentifier> table_idents;
50+
table_idents.reserve(table_names.size());
51+
std::ranges::transform(
52+
table_names, std::back_inserter(table_idents),
53+
[&ns](auto const& table_name) { return TableIdentifier(ns, table_name); });
54+
return table_idents;
55+
}
56+
57+
Result<std::unique_ptr<Table>> MemoryCatalog::CreateTable(
58+
const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec,
59+
const std::string& location,
60+
const std::unordered_map<std::string, std::string>& properties) {
61+
throw IcebergError("not implemented");
62+
}
63+
64+
Result<std::unique_ptr<Table>> MemoryCatalog::UpdateTable(
65+
const TableIdentifier& identifier,
66+
const std::vector<std::unique_ptr<UpdateRequirement>>& requirements,
67+
const std::vector<std::unique_ptr<MetadataUpdate>>& updates) {
68+
throw IcebergError("not implemented");
69+
}
70+
71+
Result<std::shared_ptr<Transaction>> MemoryCatalog::StageCreateTable(
72+
const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec,
73+
const std::string& location,
74+
const std::unordered_map<std::string, std::string>& properties) {
75+
throw IcebergError("not implemented");
76+
}
77+
78+
bool MemoryCatalog::TableExists(const TableIdentifier& identifier) const {
79+
std::unique_lock lock(mutex_);
80+
return root_container_->TableExists(identifier);
81+
}
82+
83+
bool MemoryCatalog::DropTable(const TableIdentifier& identifier, bool purge) {
84+
std::unique_lock lock(mutex_);
85+
// TODO(Guotao): Delete all metadata files if purge is true.
86+
return root_container_->UnregisterTable(identifier);
87+
}
88+
89+
Result<std::shared_ptr<Table>> MemoryCatalog::LoadTable(
90+
const TableIdentifier& identifier) const {
91+
throw IcebergError("not implemented");
92+
}
93+
94+
Result<std::shared_ptr<Table>> MemoryCatalog::RegisterTable(
95+
const TableIdentifier& identifier, const std::string& metadata_file_location) {
96+
std::unique_lock lock(mutex_);
97+
if (!root_container_->NamespaceExists(identifier.ns)) {
98+
return unexpected<Error>({.kind = ErrorKind::kNoSuchNamespace,
99+
.message = "table namespace does not exist"});
100+
}
101+
if (!root_container_->RegisterTable(identifier, metadata_file_location)) {
102+
return unexpected<Error>(
103+
{.kind = ErrorKind::kUnknownError, .message = "The registry failed."});
104+
}
105+
return LoadTable(identifier);
106+
}
107+
108+
std::unique_ptr<TableBuilder> MemoryCatalog::BuildTable(const TableIdentifier& identifier,
109+
const Schema& schema) const {
110+
throw IcebergError("not implemented");
111+
}
112+
113+
/// Implementation of NamespaceContainer
114+
NamespaceContainer* NamespaceContainer::GetNamespaceContainer(
115+
NamespaceContainer* root, const Namespace& namespace_ident) {
116+
return GetNamespaceContainerImpl(root, namespace_ident);
117+
}
118+
119+
const NamespaceContainer* NamespaceContainer::GetNamespaceContainer(
120+
const NamespaceContainer* root, const Namespace& namespace_ident) {
121+
return GetNamespaceContainerImpl(root, namespace_ident);
122+
}
123+
124+
bool NamespaceContainer::NamespaceExists(const Namespace& namespace_ident) const {
125+
return GetNamespaceContainer(this, namespace_ident) != nullptr;
126+
}
127+
128+
std::vector<std::string> NamespaceContainer::ListChildrenNamespaces(
129+
const std::optional<Namespace>& parent_namespace_ident) const {
130+
auto container = this;
131+
if (parent_namespace_ident.has_value()) {
132+
container = GetNamespaceContainer(this, *parent_namespace_ident);
133+
if (!container) return {};
134+
}
135+
136+
std::vector<std::string> names;
137+
auto const& children = container->children_;
138+
names.reserve(children.size());
139+
std::ranges::transform(children, std::back_inserter(names),
140+
[](const auto& pair) { return pair.first; });
141+
return names;
142+
}
143+
144+
bool NamespaceContainer::CreateNamespace(
145+
const Namespace& namespace_ident,
146+
const std::unordered_map<std::string, std::string>& properties) {
147+
auto container = this;
148+
bool newly_created = false;
149+
150+
for (const auto& part_level : namespace_ident.levels) {
151+
if (auto it = container->children_.find(part_level);
152+
it == container->children_.end()) {
153+
container->children_[part_level] = std::make_unique<NamespaceContainer>();
154+
container = container->children_[part_level].get();
155+
newly_created = true;
156+
} else {
157+
container = it->second.get();
158+
}
159+
}
160+
161+
if (!newly_created) return false;
162+
163+
container->properties_ = properties;
164+
return true;
165+
}
166+
167+
bool NamespaceContainer::DeleteNamespace(const Namespace& namespace_ident) {
168+
if (namespace_ident.levels.empty()) return false;
169+
170+
auto parent_namespace_ident = namespace_ident;
171+
const auto to_delete = parent_namespace_ident.levels.back();
172+
parent_namespace_ident.levels.pop_back();
173+
174+
auto* parent = GetNamespaceContainer(this, parent_namespace_ident);
175+
if (!parent) return false;
176+
177+
auto it = parent->children_.find(to_delete);
178+
if (it == parent->children_.end()) return false;
179+
180+
const auto& target = *it->second;
181+
if (!target.children_.empty() || !target.table_metadata_locations_.empty()) {
182+
return false;
183+
}
184+
185+
return parent->children_.erase(to_delete) > 0;
186+
}
187+
188+
std::optional<std::unordered_map<std::string, std::string>>
189+
NamespaceContainer::GetProperties(const Namespace& namespace_ident) const {
190+
const auto container = GetNamespaceContainer(this, namespace_ident);
191+
if (!container) return std::nullopt;
192+
return container->properties_;
193+
}
194+
195+
bool NamespaceContainer::ReplaceProperties(
196+
const Namespace& namespace_ident,
197+
const std::unordered_map<std::string, std::string>& properties) {
198+
const auto container = GetNamespaceContainer(this, namespace_ident);
199+
if (!container) return false;
200+
container->properties_ = properties;
201+
return true;
202+
}
203+
204+
std::vector<std::string> NamespaceContainer::ListTables(
205+
const Namespace& namespace_ident) const {
206+
const auto container = GetNamespaceContainer(this, namespace_ident);
207+
if (!container) return {};
208+
209+
const auto& locations = container->table_metadata_locations_;
210+
std::vector<std::string> table_names;
211+
table_names.reserve(locations.size());
212+
213+
std::ranges::transform(locations, std::back_inserter(table_names),
214+
[](const auto& pair) { return pair.first; });
215+
std::ranges::sort(table_names);
216+
217+
return table_names;
218+
}
219+
220+
bool NamespaceContainer::RegisterTable(TableIdentifier const& table_ident,
221+
const std::string& metadata_location) {
222+
const auto container = GetNamespaceContainer(this, table_ident.ns);
223+
if (!container) return false;
224+
if (container->table_metadata_locations_.contains(table_ident.name)) return false;
225+
container->table_metadata_locations_[table_ident.name] = metadata_location;
226+
return true;
227+
}
228+
229+
bool NamespaceContainer::UnregisterTable(TableIdentifier const& table_ident) {
230+
const auto container = GetNamespaceContainer(this, table_ident.ns);
231+
if (!container) return false;
232+
return container->table_metadata_locations_.erase(table_ident.name) > 0;
233+
}
234+
235+
bool NamespaceContainer::TableExists(TableIdentifier const& table_ident) const {
236+
const auto container = GetNamespaceContainer(this, table_ident.ns);
237+
if (!container) return false;
238+
return container->table_metadata_locations_.contains(table_ident.name);
239+
}
240+
241+
std::optional<std::string> NamespaceContainer::GetTableMetadataLocation(
242+
TableIdentifier const& table_ident) const {
243+
const auto container = GetNamespaceContainer(this, table_ident.ns);
244+
if (!container) return std::nullopt;
245+
const auto it = container->table_metadata_locations_.find(table_ident.name);
246+
if (it == container->table_metadata_locations_.end()) return std::nullopt;
247+
return it->second;
248+
}
249+
} // namespace iceberg

0 commit comments

Comments
 (0)