diff --git a/src/iceberg/catalog/in_memory_catalog.cc b/src/iceberg/catalog/in_memory_catalog.cc index 3e32ddc75..f2ad26685 100644 --- a/src/iceberg/catalog/in_memory_catalog.cc +++ b/src/iceberg/catalog/in_memory_catalog.cc @@ -21,18 +21,14 @@ #include #include // IWYU pragma: keep -#include -#include -#include #include "iceberg/exception.h" #include "iceberg/table.h" +#include "iceberg/table_metadata.h" #include "iceberg/util/macros.h" namespace iceberg { -namespace { - /// \brief A hierarchical namespace that manages namespaces and table metadata in-memory. /// /// Each InMemoryNamespace represents a namespace level and can contain properties, @@ -318,117 +314,56 @@ Result InMemoryNamespace::GetTableMetadataLocation( return it->second; } -} // namespace - -class ICEBERG_EXPORT InMemoryCatalogImpl { - public: - InMemoryCatalogImpl(std::string name, std::shared_ptr file_io, - std::string warehouse_location, - std::unordered_map properties); - - std::string_view name() const; - - Status CreateNamespace(const Namespace& ns, - const std::unordered_map& properties); - - Result> ListNamespaces(const Namespace& ns) const; - - Status DropNamespace(const Namespace& ns); - - Result NamespaceExists(const Namespace& ns) const; - - Result> GetNamespaceProperties( - const Namespace& ns) const; - - Status UpdateNamespaceProperties( - const Namespace& ns, const std::unordered_map& updates, - const std::unordered_set& removals); - - Result> ListTables(const Namespace& ns) const; - - Result> CreateTable( - const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec, - const std::string& location, - const std::unordered_map& properties); - - Result> UpdateTable( - const TableIdentifier& identifier, - const std::vector>& requirements, - const std::vector>& updates); - - Result> StageCreateTable( - const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec, - const std::string& location, - const std::unordered_map& properties); - - Result TableExists(const TableIdentifier& identifier) const; - - Status DropTable(const TableIdentifier& identifier, bool purge); - - Result> LoadTable(const TableIdentifier& identifier) const; - - Result> RegisterTable(const TableIdentifier& identifier, - const std::string& metadata_file_location); - - std::unique_ptr BuildTable(const TableIdentifier& identifier, - const Schema& schema) const; - - private: - std::string catalog_name_; - std::unordered_map properties_; - std::shared_ptr file_io_; - std::string warehouse_location_; - std::unique_ptr root_namespace_; - mutable std::recursive_mutex mutex_; -}; - -InMemoryCatalogImpl::InMemoryCatalogImpl( - std::string name, std::shared_ptr file_io, std::string warehouse_location, - std::unordered_map properties) +InMemoryCatalog::InMemoryCatalog( + std::string const& name, std::shared_ptr const& file_io, + std::string const& warehouse_location, + std::unordered_map const& properties) : catalog_name_(std::move(name)), properties_(std::move(properties)), file_io_(std::move(file_io)), warehouse_location_(std::move(warehouse_location)), root_namespace_(std::make_unique()) {} -std::string_view InMemoryCatalogImpl::name() const { return catalog_name_; } +InMemoryCatalog::~InMemoryCatalog() = default; + +std::string_view InMemoryCatalog::name() const { return catalog_name_; } -Status InMemoryCatalogImpl::CreateNamespace( +Status InMemoryCatalog::CreateNamespace( const Namespace& ns, const std::unordered_map& properties) { std::unique_lock lock(mutex_); return root_namespace_->CreateNamespace(ns, properties); } -Result> InMemoryCatalogImpl::ListNamespaces( +Result> +InMemoryCatalog::GetNamespaceProperties(const Namespace& ns) const { + std::unique_lock lock(mutex_); + return root_namespace_->GetProperties(ns); +} + +Result> InMemoryCatalog::ListNamespaces( const Namespace& ns) const { std::unique_lock lock(mutex_); return root_namespace_->ListNamespaces(ns); } -Status InMemoryCatalogImpl::DropNamespace(const Namespace& ns) { +Status InMemoryCatalog::DropNamespace(const Namespace& ns) { std::unique_lock lock(mutex_); return root_namespace_->DropNamespace(ns); } -Result InMemoryCatalogImpl::NamespaceExists(const Namespace& ns) const { +Result InMemoryCatalog::NamespaceExists(const Namespace& ns) const { std::unique_lock lock(mutex_); return root_namespace_->NamespaceExists(ns); } -Result> -InMemoryCatalogImpl::GetNamespaceProperties(const Namespace& ns) const { - std::unique_lock lock(mutex_); - return root_namespace_->GetProperties(ns); -} - -Status InMemoryCatalogImpl::UpdateNamespaceProperties( +Status InMemoryCatalog::UpdateNamespaceProperties( const Namespace& ns, const std::unordered_map& updates, const std::unordered_set& removals) { std::unique_lock lock(mutex_); return root_namespace_->UpdateNamespaceProperties(ns, updates, removals); } -Result> InMemoryCatalogImpl::ListTables( +Result> InMemoryCatalog::ListTables( const Namespace& ns) const { std::unique_lock lock(mutex_); const auto& table_names = root_namespace_->ListTables(ns); @@ -441,44 +376,58 @@ Result> InMemoryCatalogImpl::ListTables( return table_idents; } -Result> InMemoryCatalogImpl::CreateTable( +Result> InMemoryCatalog::CreateTable( const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec, const std::string& location, const std::unordered_map& properties) { return NotImplemented("create table"); } -Result> InMemoryCatalogImpl::UpdateTable( +Result> InMemoryCatalog::UpdateTable( const TableIdentifier& identifier, const std::vector>& requirements, const std::vector>& updates) { return NotImplemented("update table"); } -Result> InMemoryCatalogImpl::StageCreateTable( +Result> InMemoryCatalog::StageCreateTable( const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec, const std::string& location, const std::unordered_map& properties) { return NotImplemented("stage create table"); } -Result InMemoryCatalogImpl::TableExists(const TableIdentifier& identifier) const { +Result InMemoryCatalog::TableExists(const TableIdentifier& identifier) const { std::unique_lock lock(mutex_); return root_namespace_->TableExists(identifier); } -Status InMemoryCatalogImpl::DropTable(const TableIdentifier& identifier, bool purge) { +Status InMemoryCatalog::DropTable(const TableIdentifier& identifier, bool purge) { std::unique_lock lock(mutex_); // TODO(Guotao): Delete all metadata files if purge is true. return root_namespace_->UnregisterTable(identifier); } -Result> InMemoryCatalogImpl::LoadTable( +Result> InMemoryCatalog::LoadTable( const TableIdentifier& identifier) const { - return NotImplemented("load table"); + if (!file_io_) [[unlikely]] { + return NotSupported("file_io is not set for catalog {}", catalog_name_); + } + + std::unique_lock lock(mutex_); + auto metadata_location = root_namespace_->GetTableMetadataLocation(identifier); + ICEBERG_RETURN_UNEXPECTED(metadata_location); + + auto metadata = TableMetadataUtil::Read(*file_io_, metadata_location.value()); + ICEBERG_RETURN_UNEXPECTED(metadata); + + return std::make_shared( + identifier, std::move(metadata.value()), metadata_location.value(), file_io_, + std::static_pointer_cast( + std::const_pointer_cast(shared_from_this()))); } -Result> InMemoryCatalogImpl::RegisterTable( +Result> InMemoryCatalog::RegisterTable( const TableIdentifier& identifier, const std::string& metadata_file_location) { std::unique_lock lock(mutex_); if (!root_namespace_->NamespaceExists(identifier.ns)) { @@ -490,95 +439,6 @@ Result> InMemoryCatalogImpl::RegisterTable( return LoadTable(identifier); } -std::unique_ptr InMemoryCatalogImpl::BuildTable( - const TableIdentifier& identifier, const Schema& schema) const { - throw IcebergError("not implemented"); -} - -InMemoryCatalog::InMemoryCatalog( - std::string const& name, std::shared_ptr const& file_io, - std::string const& warehouse_location, - std::unordered_map const& properties) - : impl_(std::make_unique(name, file_io, warehouse_location, - properties)) {} - -InMemoryCatalog::~InMemoryCatalog() = default; - -std::string_view InMemoryCatalog::name() const { return impl_->name(); } - -Status InMemoryCatalog::CreateNamespace( - const Namespace& ns, const std::unordered_map& properties) { - return impl_->CreateNamespace(ns, properties); -} - -Result> -InMemoryCatalog::GetNamespaceProperties(const Namespace& ns) const { - return impl_->GetNamespaceProperties(ns); -} - -Result> InMemoryCatalog::ListNamespaces( - const Namespace& ns) const { - return impl_->ListNamespaces(ns); -} - -Status InMemoryCatalog::DropNamespace(const Namespace& ns) { - return impl_->DropNamespace(ns); -} - -Result InMemoryCatalog::NamespaceExists(const Namespace& ns) const { - return impl_->NamespaceExists(ns); -} - -Status InMemoryCatalog::UpdateNamespaceProperties( - const Namespace& ns, const std::unordered_map& updates, - const std::unordered_set& removals) { - return impl_->UpdateNamespaceProperties(ns, updates, removals); -} - -Result> InMemoryCatalog::ListTables( - const Namespace& ns) const { - return impl_->ListTables(ns); -} - -Result> InMemoryCatalog::CreateTable( - const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec, - const std::string& location, - const std::unordered_map& properties) { - return impl_->CreateTable(identifier, schema, spec, location, properties); -} - -Result> InMemoryCatalog::UpdateTable( - const TableIdentifier& identifier, - const std::vector>& requirements, - const std::vector>& updates) { - return impl_->UpdateTable(identifier, requirements, updates); -} - -Result> InMemoryCatalog::StageCreateTable( - const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec, - const std::string& location, - const std::unordered_map& properties) { - return impl_->StageCreateTable(identifier, schema, spec, location, properties); -} - -Result InMemoryCatalog::TableExists(const TableIdentifier& identifier) const { - return impl_->TableExists(identifier); -} - -Status InMemoryCatalog::DropTable(const TableIdentifier& identifier, bool purge) { - return impl_->DropTable(identifier, purge); -} - -Result> InMemoryCatalog::LoadTable( - const TableIdentifier& identifier) const { - return impl_->LoadTable(identifier); -} - -Result> InMemoryCatalog::RegisterTable( - const TableIdentifier& identifier, const std::string& metadata_file_location) { - return impl_->RegisterTable(identifier, metadata_file_location); -} - std::unique_ptr InMemoryCatalog::BuildTable( const TableIdentifier& identifier, const Schema& schema) const { throw IcebergError("not implemented"); diff --git a/src/iceberg/catalog/in_memory_catalog.h b/src/iceberg/catalog/in_memory_catalog.h index c8e24b5db..d32a61b5e 100644 --- a/src/iceberg/catalog/in_memory_catalog.h +++ b/src/iceberg/catalog/in_memory_catalog.h @@ -19,9 +19,12 @@ #pragma once +#include + #include "iceberg/catalog.h" namespace iceberg { + /** * @brief An in-memory implementation of the Iceberg Catalog interface. * @@ -32,7 +35,9 @@ namespace iceberg { * @note This class is **not** suitable for production use. * All data will be lost when the process exits. */ -class ICEBERG_EXPORT InMemoryCatalog : public Catalog { +class ICEBERG_EXPORT InMemoryCatalog + : public Catalog, + public std::enable_shared_from_this { public: InMemoryCatalog(std::string const& name, std::shared_ptr const& file_io, std::string const& warehouse_location, @@ -90,7 +95,12 @@ class ICEBERG_EXPORT InMemoryCatalog : public Catalog { const Schema& schema) const override; private: - std::unique_ptr impl_; + std::string catalog_name_; + std::unordered_map properties_; + std::shared_ptr file_io_; + std::string warehouse_location_; + std::unique_ptr root_namespace_; + mutable std::recursive_mutex mutex_; }; } // namespace iceberg diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 62e6321a4..ca96187ab 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -44,11 +44,6 @@ target_sources(schema_test target_link_libraries(schema_test PRIVATE iceberg_static GTest::gtest_main GTest::gmock) add_test(NAME schema_test COMMAND schema_test) -add_executable(catalog_test) -target_sources(catalog_test PRIVATE in_memory_catalog_test.cc) -target_link_libraries(catalog_test PRIVATE iceberg_static GTest::gtest_main GTest::gmock) -add_test(NAME catalog_test COMMAND catalog_test) - add_executable(table_test) target_include_directories(table_test PRIVATE "${CMAKE_BINARY_DIR}") target_sources(table_test PRIVATE test_common.cc json_internal_test.cc table_test.cc @@ -90,4 +85,11 @@ if(ICEBERG_BUILD_BUNDLE) target_link_libraries(arrow_test PRIVATE iceberg_bundle_static GTest::gtest_main GTest::gmock) add_test(NAME arrow_test COMMAND arrow_test) + + add_executable(catalog_test) + target_include_directories(catalog_test PRIVATE "${CMAKE_BINARY_DIR}") + target_sources(catalog_test PRIVATE test_common.cc in_memory_catalog_test.cc) + target_link_libraries(catalog_test PRIVATE iceberg_bundle_static GTest::gtest_main + GTest::gmock) + add_test(NAME catalog_test COMMAND catalog_test) endif() diff --git a/test/in_memory_catalog_test.cc b/test/in_memory_catalog_test.cc index c76d78878..753dedcea 100644 --- a/test/in_memory_catalog_test.cc +++ b/test/in_memory_catalog_test.cc @@ -19,24 +19,41 @@ #include "iceberg/catalog/in_memory_catalog.h" +#include #include #include +#include "iceberg/arrow/arrow_fs_file_io.h" +#include "iceberg/table.h" +#include "iceberg/table_metadata.h" #include "matchers.h" +#include "temp_file_test_base.h" +#include "test_common.h" namespace iceberg { class InMemoryCatalogTest : public ::testing::Test { protected: void SetUp() override { - file_io_ = nullptr; // TODO(Guotao): A real FileIO instance needs to be constructed. + // generate a unique temporary file path for the test + temp_filepath_ = GenerateUniqueTempFilePathWithSuffix(".metadata.json"); + + file_io_ = std::make_shared( + std::make_shared<::arrow::fs::LocalFileSystem>()); std::unordered_map properties = {{"prop1", "val1"}}; - catalog_ = std::make_unique("test_catalog", file_io_, + catalog_ = std::make_shared("test_catalog", file_io_, "/tmp/warehouse/", properties); } + void TearDown() override { + // Clean up the temporary files created for the table metadata + std::error_code ec; + std::filesystem::remove_all(temp_filepath_, ec); + } + + std::string temp_filepath_; std::shared_ptr file_io_; - std::unique_ptr catalog_; + std::shared_ptr catalog_; }; TEST_F(InMemoryCatalogTest, CatalogName) { @@ -58,6 +75,21 @@ TEST_F(InMemoryCatalogTest, TableExists) { EXPECT_THAT(result, HasValue(::testing::Eq(false))); } +TEST_F(InMemoryCatalogTest, RegisterTable) { + TableIdentifier tableIdent{.ns = {}, .name = "t1"}; + + std::unique_ptr metadata; + ASSERT_NO_FATAL_FAILURE(ReadTableMetadata("TableMetadataV2Valid.json", &metadata)); + + auto status = TableMetadataUtil::Write(*file_io_, temp_filepath_, *metadata); + EXPECT_THAT(status, IsOk()); + + auto table = catalog_->RegisterTable(tableIdent, temp_filepath_); + EXPECT_THAT(table, IsOk()); + ASSERT_EQ(table.value()->name().name, "t1"); + ASSERT_EQ(table.value()->location(), "s3://bucket/test/location"); +} + TEST_F(InMemoryCatalogTest, DropTable) { TableIdentifier tableIdent{.ns = {}, .name = "t1"}; auto result = catalog_->DropTable(tableIdent, false); diff --git a/test/temp_file_test_base.h b/test/temp_file_test_base.h index 8e20e2ca5..39714118b 100644 --- a/test/temp_file_test_base.h +++ b/test/temp_file_test_base.h @@ -31,6 +31,46 @@ namespace iceberg { +/// \brief Get the test name for inclusion in the filename +inline std::string TestInfo() { + if (const auto info = ::testing::UnitTest::GetInstance()->current_test_info(); info) { + return std::format("{}_{}", info->test_suite_name(), info->name()); + } + return "unknown_test"; +} + +/// \brief Helper to generate a random alphanumeric string for unique filenames +inline std::string GenerateRandomString(size_t length) { + const std::string_view chars = + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> dist(0, static_cast(chars.size() - 1)); + + std::string result; + result.reserve(length); + for (size_t i = 0; i < length; ++i) { + result += chars[dist(gen)]; + } + return result; +} + +/// \brief Generates a unique temporary filepath that works across platforms +inline std::string GenerateUniqueTempFilePath() { + std::filesystem::path temp_dir = std::filesystem::temp_directory_path(); + std::string file_name = + std::format("iceberg_test_{}_{}.tmp", TestInfo(), GenerateRandomString(8)); + return (temp_dir / file_name).string(); +} + +/// \brief Create a temporary filepath with the specified suffix/extension +inline std::string GenerateUniqueTempFilePathWithSuffix(const std::string& suffix) { + std::filesystem::path temp_dir = std::filesystem::temp_directory_path(); + std::string file_name = + std::format("iceberg_test_{}_{}{}", TestInfo(), GenerateRandomString(8), suffix); + return (temp_dir / file_name).string(); +} + /// A base class for tests that need to create and manage temporary files. /// Provides utilities for creating platform-independent temporary files /// and ensures proper cleanup after tests run. @@ -83,46 +123,6 @@ class TempFileTestBase : public ::testing::Test { } } - /// \brief Generates a unique temporary filepath that works across platforms - std::string GenerateUniqueTempFilePath() const { - std::filesystem::path temp_dir = std::filesystem::temp_directory_path(); - std::string file_name = - std::format("iceberg_test_{}_{}.tmp", TestInfo(), GenerateRandomString(8)); - return (temp_dir / file_name).string(); - } - - /// \brief Create a temporary filepath with the specified suffix/extension - std::string GenerateUniqueTempFilePathWithSuffix(const std::string& suffix) { - std::filesystem::path temp_dir = std::filesystem::temp_directory_path(); - std::string file_name = - std::format("iceberg_test_{}_{}{}", TestInfo(), GenerateRandomString(8), suffix); - return (temp_dir / file_name).string(); - } - - /// \brief Helper to generate a random alphanumeric string for unique filenames - std::string GenerateRandomString(size_t length) const { - const std::string_view chars = - "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<> dist(0, static_cast(chars.size() - 1)); - - std::string result; - result.reserve(length); - for (size_t i = 0; i < length; ++i) { - result += chars[dist(gen)]; - } - return result; - } - - /// \brief Get the test name for inclusion in the filename - std::string TestInfo() const { - if (const auto info = ::testing::UnitTest::GetInstance()->current_test_info(); info) { - return std::format("{}_{}", info->test_suite_name(), info->name()); - } - return "unknown_test"; - } - /// \brief Creates a new temporary filepath and registers it for cleanup std::string CreateNewTempFilePath() { std::string filepath = GenerateUniqueTempFilePath();