Skip to content

Commit e47caa0

Browse files
committed
feat: implement initial MemoryCatalog functionality with namespace and table support
1 parent a5bcd45 commit e47caa0

File tree

6 files changed

+629
-0
lines changed

6 files changed

+629
-0
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ set(ICEBERG_INCLUDES "$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/src>"
1919
"$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/src>")
2020
set(ICEBERG_SOURCES
2121
arrow_c_data_internal.cc
22+
catalog/memory_catalog.cc
2223
demo.cc
2324
json_internal.cc
2425
schema.cc
Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/catalog/memory_catalog.h"
21+
22+
#include <algorithm>
23+
#include <iterator> // IWYU pragma: keep
24+
25+
#include "iceberg/exception.h"
26+
#include "iceberg/table.h"
27+
28+
namespace iceberg {
29+
30+
MemoryCatalog::MemoryCatalog(std::shared_ptr<FileIO> file_io,
31+
std::optional<std::string> warehouse_location)
32+
: file_io_(std::move(file_io)),
33+
warehouse_location_(std::move(warehouse_location)),
34+
root_container_(std::make_unique<NamespaceContainer>()) {}
35+
36+
void MemoryCatalog::Initialize(
37+
const std::string& name,
38+
const std::unordered_map<std::string, std::string>& properties) {
39+
catalog_name_ = name;
40+
properties_ = properties;
41+
}
42+
43+
std::string_view MemoryCatalog::name() const { return catalog_name_; }
44+
45+
Result<std::vector<TableIdentifier>> MemoryCatalog::ListTables(
46+
const Namespace& ns) const {
47+
std::unique_lock lock(mutex_);
48+
const auto& table_names = root_container_->ListTables(ns);
49+
std::vector<TableIdentifier> table_idents;
50+
table_idents.reserve(table_names.size());
51+
std::ranges::transform(
52+
table_names, std::back_inserter(table_idents),
53+
[&ns](auto const& table_name) { return TableIdentifier(ns, table_name); });
54+
return table_idents;
55+
}
56+
57+
Result<std::unique_ptr<Table>> MemoryCatalog::CreateTable(
58+
const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec,
59+
const std::string& location,
60+
const std::unordered_map<std::string, std::string>& properties) {
61+
throw IcebergError("not implemented");
62+
}
63+
64+
Result<std::unique_ptr<Table>> MemoryCatalog::UpdateTable(
65+
const TableIdentifier& identifier,
66+
const std::vector<std::unique_ptr<UpdateRequirement>>& requirements,
67+
const std::vector<std::unique_ptr<MetadataUpdate>>& updates) {
68+
throw IcebergError("not implemented");
69+
}
70+
71+
Result<std::shared_ptr<Transaction>> MemoryCatalog::StageCreateTable(
72+
const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec,
73+
const std::string& location,
74+
const std::unordered_map<std::string, std::string>& properties) {
75+
throw IcebergError("not implemented");
76+
}
77+
78+
bool MemoryCatalog::TableExists(const TableIdentifier& identifier) const {
79+
std::unique_lock lock(mutex_);
80+
return root_container_->TableExists(identifier);
81+
}
82+
83+
bool MemoryCatalog::DropTable(const TableIdentifier& identifier, bool purge) {
84+
std::unique_lock lock(mutex_);
85+
// TODO(Guotao): Delete all metadata files if purge is true.
86+
return root_container_->UnregisterTable(identifier);
87+
}
88+
89+
Result<std::shared_ptr<Table>> MemoryCatalog::LoadTable(
90+
const TableIdentifier& identifier) const {
91+
throw IcebergError("not implemented");
92+
}
93+
94+
Result<std::shared_ptr<Table>> MemoryCatalog::RegisterTable(
95+
const TableIdentifier& identifier, const std::string& metadata_file_location) {
96+
std::unique_lock lock(mutex_);
97+
if (!root_container_->NamespaceExists(identifier.ns)) {
98+
return unexpected<Error>({.kind = ErrorKind::kNoSuchNamespace,
99+
.message = "table namespace does not exist"});
100+
}
101+
if (!root_container_->RegisterTable(identifier, metadata_file_location)) {
102+
return unexpected<Error>(
103+
{.kind = ErrorKind::kUnknownError, .message = "The registry failed."});
104+
}
105+
return LoadTable(identifier);
106+
}
107+
108+
std::unique_ptr<TableBuilder> MemoryCatalog::BuildTable(const TableIdentifier& identifier,
109+
const Schema& schema) const {
110+
throw IcebergError("not implemented");
111+
}
112+
113+
/// Implementation of NamespaceContainer
114+
NamespaceContainer* NamespaceContainer::GetNamespaceContainer(
115+
NamespaceContainer* root, const Namespace& namespace_ident) {
116+
return GetNamespaceContainerImpl(root, namespace_ident);
117+
}
118+
119+
const NamespaceContainer* NamespaceContainer::GetNamespaceContainer(
120+
const NamespaceContainer* root, const Namespace& namespace_ident) {
121+
return GetNamespaceContainerImpl(root, namespace_ident);
122+
}
123+
124+
bool NamespaceContainer::NamespaceExists(const Namespace& namespace_ident) const {
125+
return GetNamespaceContainer(this, namespace_ident) != nullptr;
126+
}
127+
128+
std::vector<std::string> NamespaceContainer::ListChildrenNamespaces(
129+
const std::optional<Namespace>& parent_namespace_ident) const {
130+
auto container = this;
131+
if (parent_namespace_ident.has_value()) {
132+
container = GetNamespaceContainer(this, *parent_namespace_ident);
133+
if (!container) return {};
134+
}
135+
136+
std::vector<std::string> names;
137+
auto const& children = container->children_;
138+
names.reserve(children.size());
139+
std::ranges::transform(children, std::back_inserter(names),
140+
[](const auto& pair) { return pair.first; });
141+
return names;
142+
}
143+
144+
bool NamespaceContainer::CreateNamespace(
145+
const Namespace& namespace_ident,
146+
const std::unordered_map<std::string, std::string>& properties) {
147+
auto container = this;
148+
bool newly_created = false;
149+
150+
for (const auto& part_level : namespace_ident.levels) {
151+
if (auto it = container->children_.find(part_level);
152+
it == container->children_.end()) {
153+
container = &container->children_[part_level];
154+
newly_created = true;
155+
} else {
156+
container = &it->second;
157+
}
158+
}
159+
160+
if (!newly_created) return false;
161+
162+
container->properties_ = properties;
163+
return true;
164+
}
165+
166+
bool NamespaceContainer::DeleteNamespace(const Namespace& namespace_ident) {
167+
if (namespace_ident.levels.empty()) return false;
168+
169+
auto parent_namespace_ident = namespace_ident;
170+
const auto to_delete = parent_namespace_ident.levels.back();
171+
parent_namespace_ident.levels.pop_back();
172+
173+
auto* parent = GetNamespaceContainer(this, parent_namespace_ident);
174+
if (!parent) return false;
175+
176+
auto it = parent->children_.find(to_delete);
177+
if (it == parent->children_.end()) return false;
178+
179+
const auto& target = it->second;
180+
if (!target.children_.empty() || !target.table_metadata_locations_.empty()) {
181+
return false;
182+
}
183+
184+
return parent->children_.erase(to_delete) > 0;
185+
}
186+
187+
std::optional<std::unordered_map<std::string, std::string>>
188+
NamespaceContainer::GetProperties(const Namespace& namespace_ident) const {
189+
const auto container = GetNamespaceContainer(this, namespace_ident);
190+
if (!container) return std::nullopt;
191+
return container->properties_;
192+
}
193+
194+
bool NamespaceContainer::ReplaceProperties(
195+
const Namespace& namespace_ident,
196+
const std::unordered_map<std::string, std::string>& properties) {
197+
const auto container = GetNamespaceContainer(this, namespace_ident);
198+
if (!container) return false;
199+
container->properties_ = properties;
200+
return true;
201+
}
202+
203+
std::vector<std::string> NamespaceContainer::ListTables(
204+
const Namespace& namespace_ident) const {
205+
const auto container = GetNamespaceContainer(this, namespace_ident);
206+
if (!container) return {};
207+
208+
const auto& locations = container->table_metadata_locations_;
209+
std::vector<std::string> table_names;
210+
table_names.reserve(locations.size());
211+
212+
std::ranges::transform(locations, std::back_inserter(table_names),
213+
[](const auto& pair) { return pair.first; });
214+
std::ranges::sort(table_names);
215+
216+
return table_names;
217+
}
218+
219+
bool NamespaceContainer::RegisterTable(TableIdentifier const& table_ident,
220+
const std::string& metadata_location) {
221+
const auto container = GetNamespaceContainer(this, table_ident.ns);
222+
if (!container) return false;
223+
if (container->table_metadata_locations_.contains(table_ident.name)) return false;
224+
container->table_metadata_locations_[table_ident.name] = metadata_location;
225+
return true;
226+
}
227+
228+
bool NamespaceContainer::UnregisterTable(TableIdentifier const& table_ident) {
229+
const auto container = GetNamespaceContainer(this, table_ident.ns);
230+
if (!container) return false;
231+
return container->table_metadata_locations_.erase(table_ident.name) > 0;
232+
}
233+
234+
bool NamespaceContainer::TableExists(TableIdentifier const& table_ident) const {
235+
const auto container = GetNamespaceContainer(this, table_ident.ns);
236+
if (!container) return false;
237+
return container->table_metadata_locations_.contains(table_ident.name);
238+
}
239+
240+
std::optional<std::string> NamespaceContainer::GetTableMetadataLocation(
241+
TableIdentifier const& table_ident) const {
242+
const auto container = GetNamespaceContainer(this, table_ident.ns);
243+
if (!container) return std::nullopt;
244+
const auto it = container->table_metadata_locations_.find(table_ident.name);
245+
if (it == container->table_metadata_locations_.end()) return std::nullopt;
246+
return it->second;
247+
}
248+
} // namespace iceberg

0 commit comments

Comments
 (0)