diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index 44b5003e7..146d94173 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -19,6 +19,7 @@ set(ICEBERG_INCLUDES "$" "$") set(ICEBERG_SOURCES arrow_c_data_internal.cc + catalog/in_memory_catalog.cc demo.cc expression/expression.cc file_reader.cc @@ -74,6 +75,7 @@ add_iceberg_lib(iceberg iceberg_install_all_headers(iceberg) +add_subdirectory(catalog) add_subdirectory(expression) add_subdirectory(util) diff --git a/src/iceberg/catalog.h b/src/iceberg/catalog.h index 121b0da36..a882f4d61 100644 --- a/src/iceberg/catalog.h +++ b/src/iceberg/catalog.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "iceberg/result.h" @@ -42,6 +43,58 @@ class ICEBERG_EXPORT Catalog { /// \brief Return the name for this catalog virtual std::string_view name() const = 0; + /// \brief Create a namespace with associated properties. + /// + /// \param ns the namespace to create + /// \param properties a key-value map of metadata for the namespace + /// \return Status::OK if created successfully; + /// ErrorKind::kAlreadyExists if the namespace already exists; + /// ErrorKind::kNotSupported if the operation is not supported + virtual Status CreateNamespace( + const Namespace& ns, + const std::unordered_map& properties) = 0; + + /// \brief List child namespaces from the given namespace. + /// + /// \param ns the parent namespace + /// \return a list of child namespaces; + /// ErrorKind::kNoSuchNamespace if the given namespace does not exist + virtual Result> ListNamespaces(const Namespace& ns) const = 0; + + /// \brief Get metadata properties for a namespace. + /// + /// \param ns the namespace to look up + /// \return a key-value map of metadata properties; + /// ErrorKind::kNoSuchNamespace if the namespace does not exist + virtual Result> GetNamespaceProperties( + const Namespace& ns) const = 0; + + /// \brief Drop a namespace. + /// + /// \param ns the namespace to drop + /// \return Status::OK if dropped successfully; + /// ErrorKind::kNoSuchNamespace if the namespace does not exist; + /// ErrorKind::kNotAllowed if the namespace is not empty + virtual Status DropNamespace(const Namespace& ns) = 0; + + /// \brief Check whether the namespace exists. + /// + /// \param ns the namespace to check + /// \return true if the namespace exists, false otherwise + virtual Result NamespaceExists(const Namespace& ns) const = 0; + + /// \brief Update a namespace's properties by applying additions and removals. + /// + /// \param ns the namespace to update + /// \param updates a set of properties to add or overwrite + /// \param removals a set of property keys to remove + /// \return Status::OK if the update is successful; + /// ErrorKind::kNoSuchNamespace if the namespace does not exist; + /// ErrorKind::kUnsupported if the operation is not supported + virtual Status UpdateNamespaceProperties( + const Namespace& ns, const std::unordered_map& updates, + const std::unordered_set& removals) = 0; + /// \brief Return all the identifiers under this namespace /// /// \param ns a namespace @@ -80,8 +133,8 @@ class ICEBERG_EXPORT Catalog { /// \param spec a partition spec /// \param location a location for the table; leave empty if unspecified /// \param properties a string map of table properties - /// \return a Transaction to create the table or ErrorKind::kAlreadyExists if the table - /// already exists + /// \return a Transaction to create the table or ErrorKind::kAlreadyExists if the + /// table already exists virtual Result> StageCreateTable( const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec, const std::string& location, @@ -90,8 +143,11 @@ class ICEBERG_EXPORT Catalog { /// \brief Check whether table exists /// /// \param identifier a table identifier - /// \return true if the table exists, false otherwise - virtual bool TableExists(const TableIdentifier& identifier) const = 0; + /// \return Result indicating table exists or not. + /// - On success, the table existence was successfully checked (actual + /// existence may be inferred elsewhere). + /// - On failure, contains error information. + virtual Result TableExists(const TableIdentifier& identifier) const = 0; /// \brief Drop a table; optionally delete data and metadata files /// @@ -100,8 +156,10 @@ class ICEBERG_EXPORT Catalog { /// /// \param identifier a table identifier /// \param purge if true, delete all data and metadata files in the table - /// \return true if the table was dropped, false if the table did not exist - virtual bool DropTable(const TableIdentifier& identifier, bool purge) = 0; + /// \return Status indicating the outcome of the operation. + /// - On success, the table was dropped (or did not exist). + /// - On failure, contains error information. + virtual Status DropTable(const TableIdentifier& identifier, bool purge) = 0; /// \brief Load a table /// @@ -119,18 +177,6 @@ class ICEBERG_EXPORT Catalog { virtual Result> RegisterTable( const TableIdentifier& identifier, const std::string& metadata_file_location) = 0; - /// \brief Initialize a catalog given a custom name and a map of catalog properties - /// - /// A custom Catalog implementation must have a default constructor. A compute engine - /// will first initialize the catalog without any arguments, and then call this method - /// to complete catalog initialization with properties passed into the engine. - /// - /// \param name a custom name for the catalog - /// \param properties catalog properties - virtual void Initialize( - const std::string& name, - const std::unordered_map& properties) = 0; - /// \brief Instantiate a builder to either create a table or start a create/replace /// transaction /// diff --git a/src/iceberg/catalog/CMakeLists.txt b/src/iceberg/catalog/CMakeLists.txt new file mode 100644 index 000000000..ec53e84aa --- /dev/null +++ b/src/iceberg/catalog/CMakeLists.txt @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +iceberg_install_all_headers(iceberg/catalog) diff --git a/src/iceberg/catalog/in_memory_catalog.cc b/src/iceberg/catalog/in_memory_catalog.cc new file mode 100644 index 000000000..3e32ddc75 --- /dev/null +++ b/src/iceberg/catalog/in_memory_catalog.cc @@ -0,0 +1,587 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/catalog/in_memory_catalog.h" + +#include +#include // IWYU pragma: keep +#include +#include +#include + +#include "iceberg/exception.h" +#include "iceberg/table.h" +#include "iceberg/util/macros.h" + +namespace iceberg { + +namespace { + +/// \brief A hierarchical namespace that manages namespaces and table metadata in-memory. +/// +/// Each InMemoryNamespace represents a namespace level and can contain properties, +/// tables, and child namespaces. This structure enables a tree-like representation +/// of nested namespaces. +class ICEBERG_EXPORT InMemoryNamespace { + public: + /// \brief Checks whether the given namespace exists. + /// + /// \param namespace_ident The namespace to check. + /// \return Result indicating whether the namespace exists. + Result NamespaceExists(const Namespace& namespace_ident) const; + + /// \brief Lists immediate child namespaces under the given parent namespace. + /// + /// \param parent_namespace_ident The parent namespace to list children for. + /// \return A vector of child namespaces if found; error otherwise. + Result> ListNamespaces( + const Namespace& parent_namespace_ident) const; + + /// \brief Creates a new namespace with the specified properties. + /// + /// \param namespace_ident The namespace to create. + /// \param properties A map of key-value pairs to associate with the namespace. + /// \return Status::OK if the namespace is created; + /// ErrorKind::kAlreadyExists if the namespace already exists. + Status CreateNamespace(const Namespace& namespace_ident, + const std::unordered_map& properties); + + /// \brief Deletes an existing namespace. + /// + /// \param namespace_ident The namespace to delete. + /// \return Status::OK if the namespace is deleted; + /// ErrorKind::kNoSuchNamespace if the namespace does not exist; + /// ErrorKind::kNotAllowed if the namespace is not empty. + Status DropNamespace(const Namespace& namespace_ident); + + /// \brief Retrieves the properties of the specified namespace. + /// + /// \param namespace_ident The namespace whose properties are to be retrieved. + /// \return A map of property key-value pairs if the namespace exists; + /// error otherwise. + Result> GetProperties( + const Namespace& namespace_ident) const; + + /// \brief Updates a namespace's properties by applying additions and removals. + /// + /// \param namespace_ident The namespace to update. + /// \param updates Properties to add or overwrite. + /// \param removals Property keys to remove. + /// \return Status::OK if the update is successful; + /// ErrorKind::kNoSuchNamespace if the namespace does not exist; + /// ErrorKind::kUnsupported if the operation is not supported. + Status UpdateNamespaceProperties( + const Namespace& namespace_ident, + const std::unordered_map& updates, + const std::unordered_set& removals); + + /// \brief Lists all table names under the specified namespace. + /// + /// \param namespace_ident The namespace to list tables from. + /// \return A vector of table names if successful; error otherwise. + Result> ListTables(const Namespace& namespace_ident) const; + + /// \brief Registers a table in the given namespace with a metadata location. + /// + /// \param table_ident The fully qualified identifier of the table. + /// \param metadata_location The path to the table's metadata. + /// \return Status::OK if the table is registered; + /// Error otherwise. + Status RegisterTable(const TableIdentifier& table_ident, + const std::string& metadata_location); + + /// \brief Unregisters a table from the specified namespace. + /// + /// \param table_ident The identifier of the table to unregister. + /// \return Status::OK if the table is removed; + /// ErrorKind::kNoSuchTable if the table does not exist. + Status UnregisterTable(const TableIdentifier& table_ident); + + /// \brief Checks if a table exists in the specified namespace. + /// + /// \param table_ident The identifier of the table to check. + /// \return Result indicating whether the table exists. + Result TableExists(const TableIdentifier& table_ident) const; + + /// \brief Gets the metadata location for the specified table. + /// + /// \param table_ident The identifier of the table. + /// \return The metadata location if the table exists; error otherwise. + Result GetTableMetadataLocation(const TableIdentifier& table_ident) const; + + /// \brief Internal utility for retrieving a namespace node pointer from the tree. + /// + /// \tparam NamespacePtr The type of the namespace node pointer. + /// \param root The root namespace node. + /// \param namespace_ident The fully qualified namespace to resolve. + /// \return A pointer to the namespace node if it exists; error otherwise. + template + static Result GetNamespaceImpl(NamespacePtr root, + const Namespace& namespace_ident) { + auto node = root; + for (const auto& part_level : namespace_ident.levels) { + auto it = node->children_.find(part_level); + if (it == node->children_.end()) { + return NoSuchNamespace("{}", part_level); + } + node = &it->second; + } + return node; + } + + private: + /// Map of child namespace names to their corresponding namespace instances. + std::unordered_map children_; + + /// Key-value property map for this namespace. + std::unordered_map properties_; + + /// Mapping of table names to metadata file locations. + std::unordered_map table_metadata_locations_; +}; + +Result GetNamespace(InMemoryNamespace* root, + const Namespace& namespace_ident) { + return InMemoryNamespace::GetNamespaceImpl(root, namespace_ident); +} + +Result GetNamespace(const InMemoryNamespace* root, + const Namespace& namespace_ident) { + return InMemoryNamespace::GetNamespaceImpl(root, namespace_ident); +} + +Result InMemoryNamespace::NamespaceExists(const Namespace& namespace_ident) const { + const auto& ns = GetNamespace(this, namespace_ident); + if (ns.has_value()) { + return true; + } + if (ns.error().kind == ErrorKind::kNoSuchNamespace) { + return false; + } + return unexpected(ns.error()); +} + +Result> InMemoryNamespace::ListNamespaces( + const Namespace& parent_namespace_ident) const { + const auto nsRs = GetNamespace(this, parent_namespace_ident); + ICEBERG_RETURN_UNEXPECTED(nsRs); + auto ns = *nsRs; + + std::vector names; + auto const& children = ns->children_; + names.reserve(children.size()); + std::ranges::transform(children, std::back_inserter(names), [&](const auto& pair) { + auto childNs = parent_namespace_ident; + childNs.levels.emplace_back(pair.first); + return childNs; + }); + return names; +} + +Status InMemoryNamespace::CreateNamespace( + const Namespace& namespace_ident, + const std::unordered_map& properties) { + if (namespace_ident.levels.empty()) { + return InvalidArgument("namespace identifier is empty"); + } + + auto ns = this; + bool newly_created = false; + for (const auto& part_level : namespace_ident.levels) { + if (auto it = ns->children_.find(part_level); it == ns->children_.end()) { + ns = &ns->children_[part_level]; + newly_created = true; + } else { + ns = &it->second; + } + } + if (!newly_created) { + return AlreadyExists("{}", namespace_ident.levels.back()); + } + + ns->properties_ = properties; + return {}; +} + +Status InMemoryNamespace::DropNamespace(const Namespace& namespace_ident) { + if (namespace_ident.levels.empty()) { + return InvalidArgument("namespace identifier is empty"); + } + + auto parent_namespace_ident = namespace_ident; + const auto to_delete = parent_namespace_ident.levels.back(); + parent_namespace_ident.levels.pop_back(); + + const auto parentRs = GetNamespace(this, parent_namespace_ident); + ICEBERG_RETURN_UNEXPECTED(parentRs); + + const auto it = parentRs.value()->children_.find(to_delete); + if (it == parentRs.value()->children_.end()) { + return NotFound("namespace {} is not found", to_delete); + } + + const auto& target = it->second; + if (!target.children_.empty() || !target.table_metadata_locations_.empty()) { + return NotAllowed("{} has other sub-namespaces and cannot be deleted", to_delete); + } + + parentRs.value()->children_.erase(to_delete); + return {}; +} + +Result> InMemoryNamespace::GetProperties( + const Namespace& namespace_ident) const { + const auto ns = GetNamespace(this, namespace_ident); + ICEBERG_RETURN_UNEXPECTED(ns); + return ns.value()->properties_; +} + +Status InMemoryNamespace::UpdateNamespaceProperties( + const Namespace& namespace_ident, + const std::unordered_map& updates, + const std::unordered_set& removals) { + const auto ns = GetNamespace(this, namespace_ident); + ICEBERG_RETURN_UNEXPECTED(ns); + + std::ranges::for_each(updates, [&](const auto& prop) { + ns.value()->properties_[prop.first] = prop.second; + }); + std::ranges::for_each(removals, + [&](const auto& prop) { ns.value()->properties_.erase(prop); }); + return {}; +} + +Result> InMemoryNamespace::ListTables( + const Namespace& namespace_ident) const { + const auto ns = GetNamespace(this, namespace_ident); + ICEBERG_RETURN_UNEXPECTED(ns); + + const auto& locations = ns.value()->table_metadata_locations_; + std::vector table_names; + table_names.reserve(locations.size()); + + std::ranges::transform(locations, std::back_inserter(table_names), + [](const auto& pair) { return pair.first; }); + std::ranges::sort(table_names); + + return table_names; +} + +Status InMemoryNamespace::RegisterTable(TableIdentifier const& table_ident, + const std::string& metadata_location) { + const auto ns = GetNamespace(this, table_ident.ns); + ICEBERG_RETURN_UNEXPECTED(ns); + if (ns.value()->table_metadata_locations_.contains(table_ident.name)) { + return AlreadyExists("{} already exists", table_ident.name); + } + ns.value()->table_metadata_locations_[table_ident.name] = metadata_location; + return {}; +} + +Status InMemoryNamespace::UnregisterTable(TableIdentifier const& table_ident) { + const auto ns = GetNamespace(this, table_ident.ns); + ICEBERG_RETURN_UNEXPECTED(ns); + ns.value()->table_metadata_locations_.erase(table_ident.name); + return {}; +} + +Result InMemoryNamespace::TableExists(TableIdentifier const& table_ident) const { + const auto ns = GetNamespace(this, table_ident.ns); + ICEBERG_RETURN_UNEXPECTED(ns); + return ns.value()->table_metadata_locations_.contains(table_ident.name); +} + +Result InMemoryNamespace::GetTableMetadataLocation( + TableIdentifier const& table_ident) const { + const auto ns = GetNamespace(this, table_ident.ns); + ICEBERG_RETURN_UNEXPECTED(ns); + const auto it = ns.value()->table_metadata_locations_.find(table_ident.name); + if (it == ns.value()->table_metadata_locations_.end()) { + return NotFound("{} does not exist", table_ident.name); + } + return it->second; +} + +} // namespace + +class ICEBERG_EXPORT InMemoryCatalogImpl { + public: + InMemoryCatalogImpl(std::string name, std::shared_ptr file_io, + std::string warehouse_location, + std::unordered_map properties); + + std::string_view name() const; + + Status CreateNamespace(const Namespace& ns, + const std::unordered_map& properties); + + Result> ListNamespaces(const Namespace& ns) const; + + Status DropNamespace(const Namespace& ns); + + Result NamespaceExists(const Namespace& ns) const; + + Result> GetNamespaceProperties( + const Namespace& ns) const; + + Status UpdateNamespaceProperties( + const Namespace& ns, const std::unordered_map& updates, + const std::unordered_set& removals); + + Result> ListTables(const Namespace& ns) const; + + Result> CreateTable( + const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec, + const std::string& location, + const std::unordered_map& properties); + + Result> UpdateTable( + const TableIdentifier& identifier, + const std::vector>& requirements, + const std::vector>& updates); + + Result> StageCreateTable( + const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec, + const std::string& location, + const std::unordered_map& properties); + + Result TableExists(const TableIdentifier& identifier) const; + + Status DropTable(const TableIdentifier& identifier, bool purge); + + Result> LoadTable(const TableIdentifier& identifier) const; + + Result> RegisterTable(const TableIdentifier& identifier, + const std::string& metadata_file_location); + + std::unique_ptr BuildTable(const TableIdentifier& identifier, + const Schema& schema) const; + + private: + std::string catalog_name_; + std::unordered_map properties_; + std::shared_ptr file_io_; + std::string warehouse_location_; + std::unique_ptr root_namespace_; + mutable std::recursive_mutex mutex_; +}; + +InMemoryCatalogImpl::InMemoryCatalogImpl( + std::string name, std::shared_ptr file_io, std::string warehouse_location, + std::unordered_map properties) + : catalog_name_(std::move(name)), + properties_(std::move(properties)), + file_io_(std::move(file_io)), + warehouse_location_(std::move(warehouse_location)), + root_namespace_(std::make_unique()) {} + +std::string_view InMemoryCatalogImpl::name() const { return catalog_name_; } + +Status InMemoryCatalogImpl::CreateNamespace( + const Namespace& ns, const std::unordered_map& properties) { + std::unique_lock lock(mutex_); + return root_namespace_->CreateNamespace(ns, properties); +} + +Result> InMemoryCatalogImpl::ListNamespaces( + const Namespace& ns) const { + std::unique_lock lock(mutex_); + return root_namespace_->ListNamespaces(ns); +} + +Status InMemoryCatalogImpl::DropNamespace(const Namespace& ns) { + std::unique_lock lock(mutex_); + return root_namespace_->DropNamespace(ns); +} + +Result InMemoryCatalogImpl::NamespaceExists(const Namespace& ns) const { + std::unique_lock lock(mutex_); + return root_namespace_->NamespaceExists(ns); +} + +Result> +InMemoryCatalogImpl::GetNamespaceProperties(const Namespace& ns) const { + std::unique_lock lock(mutex_); + return root_namespace_->GetProperties(ns); +} + +Status InMemoryCatalogImpl::UpdateNamespaceProperties( + const Namespace& ns, const std::unordered_map& updates, + const std::unordered_set& removals) { + std::unique_lock lock(mutex_); + return root_namespace_->UpdateNamespaceProperties(ns, updates, removals); +} + +Result> InMemoryCatalogImpl::ListTables( + const Namespace& ns) const { + std::unique_lock lock(mutex_); + const auto& table_names = root_namespace_->ListTables(ns); + ICEBERG_RETURN_UNEXPECTED(table_names); + std::vector table_idents; + table_idents.reserve(table_names.value().size()); + std::ranges::transform( + table_names.value(), std::back_inserter(table_idents), + [&ns](auto const& table_name) { return TableIdentifier(ns, table_name); }); + return table_idents; +} + +Result> InMemoryCatalogImpl::CreateTable( + const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec, + const std::string& location, + const std::unordered_map& properties) { + return NotImplemented("create table"); +} + +Result> InMemoryCatalogImpl::UpdateTable( + const TableIdentifier& identifier, + const std::vector>& requirements, + const std::vector>& updates) { + return NotImplemented("update table"); +} + +Result> InMemoryCatalogImpl::StageCreateTable( + const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec, + const std::string& location, + const std::unordered_map& properties) { + return NotImplemented("stage create table"); +} + +Result InMemoryCatalogImpl::TableExists(const TableIdentifier& identifier) const { + std::unique_lock lock(mutex_); + return root_namespace_->TableExists(identifier); +} + +Status InMemoryCatalogImpl::DropTable(const TableIdentifier& identifier, bool purge) { + std::unique_lock lock(mutex_); + // TODO(Guotao): Delete all metadata files if purge is true. + return root_namespace_->UnregisterTable(identifier); +} + +Result> InMemoryCatalogImpl::LoadTable( + const TableIdentifier& identifier) const { + return NotImplemented("load table"); +} + +Result> InMemoryCatalogImpl::RegisterTable( + const TableIdentifier& identifier, const std::string& metadata_file_location) { + std::unique_lock lock(mutex_); + if (!root_namespace_->NamespaceExists(identifier.ns)) { + return NoSuchNamespace("table namespace does not exist."); + } + if (!root_namespace_->RegisterTable(identifier, metadata_file_location)) { + return UnknownError("The registry failed."); + } + return LoadTable(identifier); +} + +std::unique_ptr InMemoryCatalogImpl::BuildTable( + const TableIdentifier& identifier, const Schema& schema) const { + throw IcebergError("not implemented"); +} + +InMemoryCatalog::InMemoryCatalog( + std::string const& name, std::shared_ptr const& file_io, + std::string const& warehouse_location, + std::unordered_map const& properties) + : impl_(std::make_unique(name, file_io, warehouse_location, + properties)) {} + +InMemoryCatalog::~InMemoryCatalog() = default; + +std::string_view InMemoryCatalog::name() const { return impl_->name(); } + +Status InMemoryCatalog::CreateNamespace( + const Namespace& ns, const std::unordered_map& properties) { + return impl_->CreateNamespace(ns, properties); +} + +Result> +InMemoryCatalog::GetNamespaceProperties(const Namespace& ns) const { + return impl_->GetNamespaceProperties(ns); +} + +Result> InMemoryCatalog::ListNamespaces( + const Namespace& ns) const { + return impl_->ListNamespaces(ns); +} + +Status InMemoryCatalog::DropNamespace(const Namespace& ns) { + return impl_->DropNamespace(ns); +} + +Result InMemoryCatalog::NamespaceExists(const Namespace& ns) const { + return impl_->NamespaceExists(ns); +} + +Status InMemoryCatalog::UpdateNamespaceProperties( + const Namespace& ns, const std::unordered_map& updates, + const std::unordered_set& removals) { + return impl_->UpdateNamespaceProperties(ns, updates, removals); +} + +Result> InMemoryCatalog::ListTables( + const Namespace& ns) const { + return impl_->ListTables(ns); +} + +Result> InMemoryCatalog::CreateTable( + const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec, + const std::string& location, + const std::unordered_map& properties) { + return impl_->CreateTable(identifier, schema, spec, location, properties); +} + +Result> InMemoryCatalog::UpdateTable( + const TableIdentifier& identifier, + const std::vector>& requirements, + const std::vector>& updates) { + return impl_->UpdateTable(identifier, requirements, updates); +} + +Result> InMemoryCatalog::StageCreateTable( + const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec, + const std::string& location, + const std::unordered_map& properties) { + return impl_->StageCreateTable(identifier, schema, spec, location, properties); +} + +Result InMemoryCatalog::TableExists(const TableIdentifier& identifier) const { + return impl_->TableExists(identifier); +} + +Status InMemoryCatalog::DropTable(const TableIdentifier& identifier, bool purge) { + return impl_->DropTable(identifier, purge); +} + +Result> InMemoryCatalog::LoadTable( + const TableIdentifier& identifier) const { + return impl_->LoadTable(identifier); +} + +Result> InMemoryCatalog::RegisterTable( + const TableIdentifier& identifier, const std::string& metadata_file_location) { + return impl_->RegisterTable(identifier, metadata_file_location); +} + +std::unique_ptr InMemoryCatalog::BuildTable( + const TableIdentifier& identifier, const Schema& schema) const { + throw IcebergError("not implemented"); +} + +} // namespace iceberg diff --git a/src/iceberg/catalog/in_memory_catalog.h b/src/iceberg/catalog/in_memory_catalog.h new file mode 100644 index 000000000..c8e24b5db --- /dev/null +++ b/src/iceberg/catalog/in_memory_catalog.h @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include "iceberg/catalog.h" + +namespace iceberg { +/** + * @brief An in-memory implementation of the Iceberg Catalog interface. + * + * This catalog stores all metadata purely in memory, with no persistence to disk + * or external systems. It is primarily intended for unit tests, prototyping, or + * demonstration purposes. + * + * @note This class is **not** suitable for production use. + * All data will be lost when the process exits. + */ +class ICEBERG_EXPORT InMemoryCatalog : public Catalog { + public: + InMemoryCatalog(std::string const& name, std::shared_ptr const& file_io, + std::string const& warehouse_location, + std::unordered_map const& properties); + ~InMemoryCatalog() override; + + std::string_view name() const override; + + Status CreateNamespace( + const Namespace& ns, + const std::unordered_map& properties) override; + + Result> ListNamespaces(const Namespace& ns) const override; + + Status DropNamespace(const Namespace& ns) override; + + Result NamespaceExists(const Namespace& ns) const override; + + Result> GetNamespaceProperties( + const Namespace& ns) const override; + + Status UpdateNamespaceProperties( + const Namespace& ns, const std::unordered_map& updates, + const std::unordered_set& removals) override; + + Result> ListTables(const Namespace& ns) const override; + + Result> CreateTable( + const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec, + const std::string& location, + const std::unordered_map& properties) override; + + Result> UpdateTable( + const TableIdentifier& identifier, + const std::vector>& requirements, + const std::vector>& updates) override; + + Result> StageCreateTable( + const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec, + const std::string& location, + const std::unordered_map& properties) override; + + Result TableExists(const TableIdentifier& identifier) const override; + + Status DropTable(const TableIdentifier& identifier, bool purge) override; + + Result> LoadTable( + const TableIdentifier& identifier) const override; + + Result> RegisterTable( + const TableIdentifier& identifier, + const std::string& metadata_file_location) override; + + std::unique_ptr BuildTable(const TableIdentifier& identifier, + const Schema& schema) const override; + + private: + std::unique_ptr impl_; +}; + +} // namespace iceberg diff --git a/src/iceberg/result.h b/src/iceberg/result.h index 38d9e381f..a682e8316 100644 --- a/src/iceberg/result.h +++ b/src/iceberg/result.h @@ -38,6 +38,7 @@ enum class ErrorKind { kJsonParseError, kNoSuchNamespace, kNoSuchTable, + kNotAllowed, kNotFound, kNotImplemented, kNotSupported, @@ -80,6 +81,7 @@ DEFINE_ERROR_FUNCTION(IOError) DEFINE_ERROR_FUNCTION(JsonParseError) DEFINE_ERROR_FUNCTION(NoSuchNamespace) DEFINE_ERROR_FUNCTION(NoSuchTable) +DEFINE_ERROR_FUNCTION(NotAllowed) DEFINE_ERROR_FUNCTION(NotFound) DEFINE_ERROR_FUNCTION(NotImplemented) DEFINE_ERROR_FUNCTION(NotSupported) diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h index 9fc6bd6cb..a5996c426 100644 --- a/src/iceberg/type_fwd.h +++ b/src/iceberg/type_fwd.h @@ -87,6 +87,7 @@ struct Namespace; struct TableIdentifier; class Catalog; +class FileIO; class LocationProvider; class SortField; class SortOrder; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 4a88229f5..8ca09bd16 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -43,6 +43,11 @@ target_sources(schema_test target_link_libraries(schema_test PRIVATE iceberg_static GTest::gtest_main GTest::gmock) add_test(NAME schema_test COMMAND schema_test) +add_executable(catalog_test) +target_sources(catalog_test PRIVATE in_memory_catalog_test.cc) +target_link_libraries(catalog_test PRIVATE iceberg_static GTest::gtest_main GTest::gmock) +add_test(NAME catalog_test COMMAND catalog_test) + add_executable(expression_test) target_sources(expression_test PRIVATE expression_test.cc) target_link_libraries(expression_test PRIVATE iceberg_static GTest::gtest_main diff --git a/test/in_memory_catalog_test.cc b/test/in_memory_catalog_test.cc new file mode 100644 index 000000000..c76d78878 --- /dev/null +++ b/test/in_memory_catalog_test.cc @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/catalog/in_memory_catalog.h" + +#include +#include + +#include "matchers.h" + +namespace iceberg { + +class InMemoryCatalogTest : public ::testing::Test { + protected: + void SetUp() override { + file_io_ = nullptr; // TODO(Guotao): A real FileIO instance needs to be constructed. + std::unordered_map properties = {{"prop1", "val1"}}; + catalog_ = std::make_unique("test_catalog", file_io_, + "/tmp/warehouse/", properties); + } + + std::shared_ptr file_io_; + std::unique_ptr catalog_; +}; + +TEST_F(InMemoryCatalogTest, CatalogName) { + EXPECT_EQ(catalog_->name(), "test_catalog"); + auto tablesRs = catalog_->ListTables(Namespace{{}}); + EXPECT_THAT(tablesRs, IsOk()); + ASSERT_TRUE(tablesRs->empty()); +} + +TEST_F(InMemoryCatalogTest, ListTables) { + auto tablesRs = catalog_->ListTables(Namespace{{}}); + EXPECT_THAT(tablesRs, IsOk()); + ASSERT_TRUE(tablesRs->empty()); +} + +TEST_F(InMemoryCatalogTest, TableExists) { + TableIdentifier tableIdent{.ns = {}, .name = "t1"}; + auto result = catalog_->TableExists(tableIdent); + EXPECT_THAT(result, HasValue(::testing::Eq(false))); +} + +TEST_F(InMemoryCatalogTest, DropTable) { + TableIdentifier tableIdent{.ns = {}, .name = "t1"}; + auto result = catalog_->DropTable(tableIdent, false); + EXPECT_THAT(result, IsOk()); +} + +TEST_F(InMemoryCatalogTest, Namespace) { + Namespace ns{.levels = {"n1", "n2"}}; + std::unordered_map properties = {{"prop1", "val1"}, + {"prop2", "val2"}}; + EXPECT_THAT(catalog_->CreateNamespace(ns, properties), IsOk()); + EXPECT_THAT(catalog_->CreateNamespace(ns, properties), + IsError(ErrorKind::kAlreadyExists)); + + EXPECT_THAT(catalog_->NamespaceExists(ns), HasValue(::testing::Eq(true))); + EXPECT_THAT(catalog_->NamespaceExists(Namespace{.levels = {"n1", "n3"}}), + HasValue(::testing::Eq(false))); + auto childNs = catalog_->ListNamespaces(Namespace{.levels = {"n1"}}); + EXPECT_THAT(childNs, IsOk()); + ASSERT_EQ(childNs->size(), 1U); + ASSERT_EQ(childNs->at(0).levels.size(), 2U); + ASSERT_EQ(childNs->at(0).levels.at(1), "n2"); + + auto propsRs = catalog_->GetNamespaceProperties(ns); + EXPECT_THAT(propsRs, IsOk()); + ASSERT_EQ(propsRs->size(), 2U); + ASSERT_EQ(propsRs.value().at("prop1"), "val1"); + ASSERT_EQ(propsRs.value().at("prop2"), "val2"); + + EXPECT_THAT(catalog_->UpdateNamespaceProperties( + ns, {{"prop2", "val2-updated"}, {"prop3", "val3"}}, {"prop1"}), + IsOk()); + propsRs = catalog_->GetNamespaceProperties(ns); + EXPECT_THAT(propsRs, IsOk()); + ASSERT_EQ(propsRs->size(), 2U); + ASSERT_EQ(propsRs.value().at("prop2"), "val2-updated"); + ASSERT_EQ(propsRs.value().at("prop3"), "val3"); +} + +} // namespace iceberg