Skip to content

Commit f9cdba1

Browse files
committed
feat: implement initial MemoryCatalog functionality with namespace and table support
1 parent a5bcd45 commit f9cdba1

File tree

6 files changed

+629
-0
lines changed

6 files changed

+629
-0
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ set(ICEBERG_INCLUDES "$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/src>"
1919
"$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/src>")
2020
set(ICEBERG_SOURCES
2121
arrow_c_data_internal.cc
22+
catalog/memory_catalog.cc
2223
demo.cc
2324
json_internal.cc
2425
schema.cc
Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/catalog/memory_catalog.h"
21+
22+
#include <algorithm>
23+
24+
#include "iceberg/exception.h"
25+
#include "iceberg/table.h"
26+
27+
namespace iceberg {
28+
29+
MemoryCatalog::MemoryCatalog(std::shared_ptr<FileIO> file_io,
30+
std::optional<std::string> warehouse_location)
31+
: file_io_(std::move(file_io)),
32+
warehouse_location_(std::move(warehouse_location)),
33+
root_container_(std::make_unique<NamespaceContainer>()) {}
34+
35+
void MemoryCatalog::Initialize(
36+
const std::string& name,
37+
const std::unordered_map<std::string, std::string>& properties) {
38+
catalog_name_ = name;
39+
properties_ = properties;
40+
}
41+
42+
std::string_view MemoryCatalog::name() const { return catalog_name_; }
43+
44+
Result<std::vector<TableIdentifier>> MemoryCatalog::ListTables(
45+
const Namespace& ns) const {
46+
std::unique_lock lock(mutex_);
47+
const auto& table_names = root_container_->ListTables(ns);
48+
std::vector<TableIdentifier> table_idents;
49+
table_idents.reserve(table_names.size());
50+
std::ranges::transform(
51+
table_names, std::back_inserter(table_idents),
52+
[&ns](auto const& table_name) { return TableIdentifier(ns, table_name); });
53+
return table_idents;
54+
}
55+
56+
Result<std::unique_ptr<Table>> MemoryCatalog::CreateTable(
57+
const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec,
58+
const std::string& location,
59+
const std::unordered_map<std::string, std::string>& properties) {
60+
throw IcebergError("not implemented");
61+
}
62+
63+
Result<std::unique_ptr<Table>> MemoryCatalog::UpdateTable(
64+
const TableIdentifier& identifier,
65+
const std::vector<std::unique_ptr<UpdateRequirement>>& requirements,
66+
const std::vector<std::unique_ptr<MetadataUpdate>>& updates) {
67+
throw IcebergError("not implemented");
68+
}
69+
70+
Result<std::shared_ptr<Transaction>> MemoryCatalog::StageCreateTable(
71+
const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec,
72+
const std::string& location,
73+
const std::unordered_map<std::string, std::string>& properties) {
74+
throw IcebergError("not implemented");
75+
}
76+
77+
bool MemoryCatalog::TableExists(const TableIdentifier& identifier) const {
78+
std::unique_lock lock(mutex_);
79+
return root_container_->TableExists(identifier);
80+
}
81+
82+
bool MemoryCatalog::DropTable(const TableIdentifier& identifier, bool purge) {
83+
std::unique_lock lock(mutex_);
84+
// TODO(Guotao): Delete all metadata files if purge is true.
85+
return root_container_->UnregisterTable(identifier);
86+
}
87+
88+
Result<std::shared_ptr<Table>> MemoryCatalog::LoadTable(
89+
const TableIdentifier& identifier) const {
90+
throw IcebergError("not implemented");
91+
}
92+
93+
Result<std::shared_ptr<Table>> MemoryCatalog::RegisterTable(
94+
const TableIdentifier& identifier, const std::string& metadata_file_location) {
95+
std::unique_lock lock(mutex_);
96+
if (!root_container_->NamespaceExists(identifier.ns)) {
97+
return unexpected<Error>({.kind = ErrorKind::kNoSuchNamespace,
98+
.message = "table namespace does not exist"});
99+
}
100+
if (!root_container_->RegisterTable(identifier, metadata_file_location)) {
101+
return unexpected<Error>(
102+
{.kind = ErrorKind::kUnknownError, .message = "The registry failed."});
103+
}
104+
return LoadTable(identifier);
105+
}
106+
107+
std::unique_ptr<TableBuilder> MemoryCatalog::BuildTable(const TableIdentifier& identifier,
108+
const Schema& schema) const {
109+
throw IcebergError("not implemented");
110+
}
111+
112+
/// Implementation of NamespaceContainer
113+
NamespaceContainer* NamespaceContainer::GetNamespaceContainer(
114+
NamespaceContainer* root, const Namespace& namespace_ident) {
115+
return GetNamespaceContainerImpl(root, namespace_ident);
116+
}
117+
118+
const NamespaceContainer* NamespaceContainer::GetNamespaceContainer(
119+
const NamespaceContainer* root, const Namespace& namespace_ident) {
120+
return GetNamespaceContainerImpl(root, namespace_ident);
121+
}
122+
123+
bool NamespaceContainer::NamespaceExists(const Namespace& namespace_ident) const {
124+
return GetNamespaceContainer(this, namespace_ident) != nullptr;
125+
}
126+
127+
std::vector<std::string> NamespaceContainer::ListChildrenNamespaces(
128+
const std::optional<Namespace>& parent_namespace_ident) const {
129+
auto container = this;
130+
if (parent_namespace_ident.has_value()) {
131+
container = GetNamespaceContainer(this, *parent_namespace_ident);
132+
if (!container) return {};
133+
}
134+
135+
std::vector<std::string> names;
136+
auto const& children = container->children_;
137+
names.reserve(children.size());
138+
std::ranges::transform(children, std::back_inserter(names),
139+
[](const auto& pair) { return pair.first; });
140+
return names;
141+
}
142+
143+
bool NamespaceContainer::CreateNamespace(
144+
const Namespace& namespace_ident,
145+
const std::unordered_map<std::string, std::string>& properties) {
146+
auto container = this;
147+
bool newly_created = false;
148+
149+
for (const auto& part_level : namespace_ident.levels) {
150+
if (auto it = container->children_.find(part_level);
151+
it == container->children_.end()) {
152+
container->children_[part_level] = std::make_unique<NamespaceContainer>();
153+
container = container->children_[part_level].get();
154+
newly_created = true;
155+
} else {
156+
container = it->second.get();
157+
}
158+
}
159+
160+
if (!newly_created) return false;
161+
162+
container->properties_ = properties;
163+
return true;
164+
}
165+
166+
bool NamespaceContainer::DeleteNamespace(const Namespace& namespace_ident) {
167+
if (namespace_ident.levels.empty()) return false;
168+
169+
auto parent_namespace_ident = namespace_ident;
170+
const auto to_delete = parent_namespace_ident.levels.back();
171+
parent_namespace_ident.levels.pop_back();
172+
173+
auto* parent = GetNamespaceContainer(this, parent_namespace_ident);
174+
if (!parent) return false;
175+
176+
auto it = parent->children_.find(to_delete);
177+
if (it == parent->children_.end()) return false;
178+
179+
const auto& target = *it->second;
180+
if (!target.children_.empty() || !target.table_metadata_locations_.empty()) {
181+
return false;
182+
}
183+
184+
return parent->children_.erase(to_delete) > 0;
185+
}
186+
187+
std::optional<std::unordered_map<std::string, std::string>>
188+
NamespaceContainer::GetProperties(const Namespace& namespace_ident) const {
189+
const auto container = GetNamespaceContainer(this, namespace_ident);
190+
if (!container) return std::nullopt;
191+
return container->properties_;
192+
}
193+
194+
bool NamespaceContainer::ReplaceProperties(
195+
const Namespace& namespace_ident,
196+
const std::unordered_map<std::string, std::string>& properties) {
197+
const auto container = GetNamespaceContainer(this, namespace_ident);
198+
if (!container) return false;
199+
container->properties_ = properties;
200+
return true;
201+
}
202+
203+
std::vector<std::string> NamespaceContainer::ListTables(
204+
const Namespace& namespace_ident) const {
205+
const auto container = GetNamespaceContainer(this, namespace_ident);
206+
if (!container) return {};
207+
208+
const auto& locations = container->table_metadata_locations_;
209+
std::vector<std::string> table_names;
210+
table_names.reserve(locations.size());
211+
212+
std::ranges::transform(locations, std::back_inserter(table_names),
213+
[](const auto& pair) { return pair.first; });
214+
std::ranges::sort(table_names);
215+
216+
return table_names;
217+
}
218+
219+
bool NamespaceContainer::RegisterTable(TableIdentifier const& table_ident,
220+
const std::string& metadata_location) {
221+
const auto container = GetNamespaceContainer(this, table_ident.ns);
222+
if (!container) return false;
223+
if (container->table_metadata_locations_.contains(table_ident.name)) return false;
224+
container->table_metadata_locations_[table_ident.name] = metadata_location;
225+
return true;
226+
}
227+
228+
bool NamespaceContainer::UnregisterTable(TableIdentifier const& table_ident) {
229+
const auto container = GetNamespaceContainer(this, table_ident.ns);
230+
if (!container) return false;
231+
return container->table_metadata_locations_.erase(table_ident.name) > 0;
232+
}
233+
234+
bool NamespaceContainer::TableExists(TableIdentifier const& table_ident) const {
235+
const auto container = GetNamespaceContainer(this, table_ident.ns);
236+
if (!container) return false;
237+
return container->table_metadata_locations_.contains(table_ident.name);
238+
}
239+
240+
std::optional<std::string> NamespaceContainer::GetTableMetadataLocation(
241+
TableIdentifier const& table_ident) const {
242+
const auto container = GetNamespaceContainer(this, table_ident.ns);
243+
if (!container) return std::nullopt;
244+
const auto it = container->table_metadata_locations_.find(table_ident.name);
245+
if (it == container->table_metadata_locations_.end()) return std::nullopt;
246+
return it->second;
247+
}
248+
} // namespace iceberg

0 commit comments

Comments
 (0)