From 5c43a2933a28466e9b57a099a3f9f1fbbacd4a81 Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Tue, 22 Jul 2025 17:20:54 +0800 Subject: [PATCH 01/16] feat: Refresh method support for table --- src/iceberg/table.cc | 20 ++++++++++++++++++++ src/iceberg/table.h | 7 ++++++- test/in_memory_catalog_test.cc | 24 ++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/iceberg/table.cc b/src/iceberg/table.cc index 3bbb3b824..c42c32047 100644 --- a/src/iceberg/table.cc +++ b/src/iceberg/table.cc @@ -21,16 +21,36 @@ #include +#include "iceberg/catalog.h" #include "iceberg/partition_spec.h" #include "iceberg/schema.h" #include "iceberg/sort_order.h" #include "iceberg/table_metadata.h" #include "iceberg/table_scan.h" +#include "iceberg/util/macros.h" namespace iceberg { const std::string& Table::uuid() const { return metadata_->table_uuid; } +Status Table::Refresh() { + if (!catalog_) { + return InvalidArgument("Cannot refresh table metadata without a catalog"); + } + + ICEBERG_ASSIGN_OR_RAISE(auto fresh, catalog_->LoadTable(identifier_)); + if (metadata_location_ != fresh->metadata_location_) { + metadata_ = std::move(fresh->metadata_); + metadata_location_ = std::move(fresh->metadata_location_); + io_ = std::move(fresh->io_); + + schemas_map_.reset(); + partition_spec_map_.reset(); + sort_orders_map_.reset(); + } + return {}; +} + Result> Table::schema() const { return metadata_->Schema(); } const std::shared_ptr>>& diff --git a/src/iceberg/table.h b/src/iceberg/table.h index 9a89057bc..37305db0d 100644 --- a/src/iceberg/table.h +++ b/src/iceberg/table.h @@ -57,6 +57,9 @@ class ICEBERG_EXPORT Table { /// \brief Returns the UUID of the table const std::string& uuid() const; + /// \brief Refresh the current table metadata + Status Refresh(); + /// \brief Return the schema for this table, return NotFoundError if not found Result> schema() const; @@ -114,9 +117,11 @@ class ICEBERG_EXPORT Table { const std::shared_ptr& io() const; private: + friend class Table; + const TableIdentifier identifier_; std::shared_ptr metadata_; - const std::string metadata_location_; + std::string metadata_location_; std::shared_ptr io_; std::shared_ptr catalog_; diff --git a/test/in_memory_catalog_test.cc b/test/in_memory_catalog_test.cc index 77b0823e1..d93cdb1e2 100644 --- a/test/in_memory_catalog_test.cc +++ b/test/in_memory_catalog_test.cc @@ -115,6 +115,30 @@ TEST_F(InMemoryCatalogTest, RegisterTable) { ASSERT_EQ(table.value()->location(), "s3://bucket/test/location"); } +TEST_F(InMemoryCatalogTest, RefreshTable) { + TableIdentifier tableIdent{.ns = {}, .name = "t1"}; + + std::unique_ptr metadata; + ASSERT_NO_FATAL_FAILURE(ReadTableMetadata("TableMetadataV2Valid.json", &metadata)); + + auto table_location = GenerateTestTableLocation(tableIdent.name); + auto metadata_location = std::format("{}v0.metadata.json", table_location); + auto status = TableMetadataUtil::Write(*file_io_, metadata_location, *metadata); + EXPECT_THAT(status, IsOk()); + + auto table = catalog_->RegisterTable(tableIdent, metadata_location); + EXPECT_THAT(table, IsOk()); + ASSERT_TRUE(table.value()->current_snapshot().has_value()); + ASSERT_EQ(table.value()->current_snapshot().value()->snapshot_id, 3055729675574597004); + + // Now we don't support commit method in catalog, so here only test refresh with the + // same version + status = table.value()->Refresh(); + EXPECT_THAT(status, IsOk()); + ASSERT_TRUE(table.value()->current_snapshot().has_value()); + ASSERT_EQ(table.value()->current_snapshot().value()->snapshot_id, 3055729675574597004); +} + TEST_F(InMemoryCatalogTest, DropTable) { TableIdentifier tableIdent{.ns = {}, .name = "t1"}; auto result = catalog_->DropTable(tableIdent, false); From 9485aeaa3d9ae9a0dcb1676b08b036a9d6c794a6 Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Sat, 26 Jul 2025 22:44:08 +0800 Subject: [PATCH 02/16] feat: Refresh method support for table --- src/iceberg/catalog/in_memory_catalog.cc | 26 +++++++++++++++ src/iceberg/catalog/in_memory_catalog.h | 4 +++ src/iceberg/table.cc | 12 +++---- test/in_memory_catalog_test.cc | 23 +++++++++----- test/mock_catalog.h | 40 ++++++++++++++++++++++++ 5 files changed, 92 insertions(+), 13 deletions(-) create mode 100644 test/mock_catalog.h diff --git a/src/iceberg/catalog/in_memory_catalog.cc b/src/iceberg/catalog/in_memory_catalog.cc index 6c34d3adf..e7bed7dc3 100644 --- a/src/iceberg/catalog/in_memory_catalog.cc +++ b/src/iceberg/catalog/in_memory_catalog.cc @@ -109,6 +109,15 @@ class ICEBERG_EXPORT InMemoryNamespace { /// ErrorKind::kNoSuchTable if the table does not exist. Status UnregisterTable(const TableIdentifier& table_ident); + /// \brief Updates the metadata location of an existing table. + /// + /// \param table_ident The fully qualified identifier of the table. + /// \param metadata_location The path to the table's metadata. + /// \return Status::OK if the table metadata location is updated; + /// Error otherwise. + Status UpdateTableMetaLocation(const TableIdentifier& table_ident, + const std::string& metadata_location); + /// \brief Checks if a table exists in the specified namespace. /// /// \param table_ident The identifier of the table to check. @@ -297,6 +306,17 @@ Status InMemoryNamespace::UnregisterTable(TableIdentifier const& table_ident) { return {}; } +Status InMemoryNamespace::UpdateTableMetaLocation(const TableIdentifier& table_ident, + const std::string& metadata_location) { + const auto ns = GetNamespace(this, table_ident.ns); + ICEBERG_RETURN_UNEXPECTED(ns); + if (!ns.value()->table_metadata_locations_.contains(table_ident.name)) { + return NotFound("{} does not exist", table_ident.name); + } + ns.value()->table_metadata_locations_[table_ident.name] = metadata_location; + return {}; +} + Result InMemoryNamespace::TableExists(TableIdentifier const& table_ident) const { const auto ns = GetNamespace(this, table_ident.ns); ICEBERG_RETURN_UNEXPECTED(ns); @@ -446,4 +466,10 @@ std::unique_ptr InMemoryCatalog::BuildTable( throw IcebergError("not implemented"); } +Status InMemoryCatalog::UpdateTableMetaLocationInternal( + const TableIdentifier& identifier, const std::string& metadata_location) { + std::unique_lock lock(mutex_); + return root_namespace_->UpdateTableMetaLocation(identifier, metadata_location); +} + } // namespace iceberg diff --git a/src/iceberg/catalog/in_memory_catalog.h b/src/iceberg/catalog/in_memory_catalog.h index 979c784de..112a59312 100644 --- a/src/iceberg/catalog/in_memory_catalog.h +++ b/src/iceberg/catalog/in_memory_catalog.h @@ -93,6 +93,10 @@ class ICEBERG_EXPORT InMemoryCatalog std::unique_ptr BuildTable(const TableIdentifier& identifier, const Schema& schema) const override; + protected: + Status UpdateTableMetaLocationInternal(const TableIdentifier& identifier, + const std::string& metadata_location); + private: std::string catalog_name_; std::unordered_map properties_; diff --git a/src/iceberg/table.cc b/src/iceberg/table.cc index c42c32047..d4decf2d3 100644 --- a/src/iceberg/table.cc +++ b/src/iceberg/table.cc @@ -35,14 +35,14 @@ const std::string& Table::uuid() const { return metadata_->table_uuid; } Status Table::Refresh() { if (!catalog_) { - return InvalidArgument("Cannot refresh table metadata without a catalog"); + return InvalidArgument("Refresh is not supported for table without a catalog"); } - ICEBERG_ASSIGN_OR_RAISE(auto fresh, catalog_->LoadTable(identifier_)); - if (metadata_location_ != fresh->metadata_location_) { - metadata_ = std::move(fresh->metadata_); - metadata_location_ = std::move(fresh->metadata_location_); - io_ = std::move(fresh->io_); + ICEBERG_ASSIGN_OR_RAISE(auto refreshed_table, catalog_->LoadTable(identifier_)); + if (metadata_location_ != refreshed_table->metadata_location_) { + metadata_ = std::move(refreshed_table->metadata_); + metadata_location_ = std::move(refreshed_table->metadata_location_); + io_ = std::move(refreshed_table->io_); schemas_map_.reset(); partition_spec_map_.reset(); diff --git a/test/in_memory_catalog_test.cc b/test/in_memory_catalog_test.cc index d93cdb1e2..33fedc219 100644 --- a/test/in_memory_catalog_test.cc +++ b/test/in_memory_catalog_test.cc @@ -17,8 +17,6 @@ * under the License. */ -#include "iceberg/catalog/in_memory_catalog.h" - #include #include @@ -29,6 +27,7 @@ #include "iceberg/table.h" #include "iceberg/table_metadata.h" #include "matchers.h" +#include "mock_catalog.h" #include "test_common.h" namespace iceberg { @@ -39,8 +38,8 @@ class InMemoryCatalogTest : public ::testing::Test { file_io_ = std::make_shared( std::make_shared<::arrow::fs::LocalFileSystem>()); std::unordered_map properties = {{"prop1", "val1"}}; - catalog_ = std::make_shared("test_catalog", file_io_, - "/tmp/warehouse/", properties); + catalog_ = std::make_shared("test_catalog", file_io_, + "/tmp/warehouse/", properties); } void TearDown() override { @@ -74,7 +73,7 @@ class InMemoryCatalogTest : public ::testing::Test { } std::shared_ptr file_io_; - std::shared_ptr catalog_; + std::shared_ptr catalog_; // Used to store temporary paths created during the test std::vector created_temp_paths_; }; @@ -121,6 +120,7 @@ TEST_F(InMemoryCatalogTest, RefreshTable) { std::unique_ptr metadata; ASSERT_NO_FATAL_FAILURE(ReadTableMetadata("TableMetadataV2Valid.json", &metadata)); + metadata->current_snapshot_id = 3051729675574597004; auto table_location = GenerateTestTableLocation(tableIdent.name); auto metadata_location = std::format("{}v0.metadata.json", table_location); auto status = TableMetadataUtil::Write(*file_io_, metadata_location, *metadata); @@ -129,9 +129,18 @@ TEST_F(InMemoryCatalogTest, RefreshTable) { auto table = catalog_->RegisterTable(tableIdent, metadata_location); EXPECT_THAT(table, IsOk()); ASSERT_TRUE(table.value()->current_snapshot().has_value()); - ASSERT_EQ(table.value()->current_snapshot().value()->snapshot_id, 3055729675574597004); + ASSERT_EQ(table.value()->current_snapshot().value()->snapshot_id, 3051729675574597004); + + // update table version to 3055729675574597004 + metadata_location = std::format("{}v1.metadata.json", table_location); + metadata->current_snapshot_id = 3055729675574597004; + status = TableMetadataUtil::Write(*file_io_, metadata_location, *metadata); + EXPECT_THAT(status, IsOk()); + + // update table metadata location in catalog + status = catalog_->UpdateTableMetaLocation(tableIdent, metadata_location); + EXPECT_THAT(status, IsOk()); - // Now we don't support commit method in catalog, so here only test refresh with the // same version status = table.value()->Refresh(); EXPECT_THAT(status, IsOk()); diff --git a/test/mock_catalog.h b/test/mock_catalog.h new file mode 100644 index 000000000..890966536 --- /dev/null +++ b/test/mock_catalog.h @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include "iceberg/catalog/in_memory_catalog.h" + +namespace iceberg { + +class ICEBERG_EXPORT MockInMemoryCatalog : public InMemoryCatalog { + public: + MockInMemoryCatalog(std::string const& name, std::shared_ptr const& file_io, + std::string const& warehouse_location, + std::unordered_map const& properties) + : InMemoryCatalog(name, file_io, warehouse_location, properties) {} + + /// \brief Directly sets the metadata location of a table. + /// \note This should only be used in unit tests. + Status UpdateTableMetaLocation(const TableIdentifier& identifier, + const std::string& metadata_location) { + return UpdateTableMetaLocationInternal(identifier, metadata_location); + } +}; +} // namespace iceberg From aa2c11e8ca5ce2912f67e6cc949e247cb9fb143c Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Fri, 1 Aug 2025 16:30:51 +0800 Subject: [PATCH 03/16] feat: Refresh method support for table --- src/iceberg/catalog/in_memory_catalog.cc | 6 ---- src/iceberg/catalog/in_memory_catalog.h | 7 ++-- src/iceberg/table.cc | 2 +- test/in_memory_catalog_test.cc | 44 ++++++++++++++---------- test/mock_catalog.h | 8 ++--- 5 files changed, 30 insertions(+), 37 deletions(-) diff --git a/src/iceberg/catalog/in_memory_catalog.cc b/src/iceberg/catalog/in_memory_catalog.cc index e7bed7dc3..24eac6465 100644 --- a/src/iceberg/catalog/in_memory_catalog.cc +++ b/src/iceberg/catalog/in_memory_catalog.cc @@ -466,10 +466,4 @@ std::unique_ptr InMemoryCatalog::BuildTable( throw IcebergError("not implemented"); } -Status InMemoryCatalog::UpdateTableMetaLocationInternal( - const TableIdentifier& identifier, const std::string& metadata_location) { - std::unique_lock lock(mutex_); - return root_namespace_->UpdateTableMetaLocation(identifier, metadata_location); -} - } // namespace iceberg diff --git a/src/iceberg/catalog/in_memory_catalog.h b/src/iceberg/catalog/in_memory_catalog.h index 112a59312..70b703e62 100644 --- a/src/iceberg/catalog/in_memory_catalog.h +++ b/src/iceberg/catalog/in_memory_catalog.h @@ -84,7 +84,8 @@ class ICEBERG_EXPORT InMemoryCatalog Status DropTable(const TableIdentifier& identifier, bool purge) override; - Result> LoadTable(const TableIdentifier& identifier) override; + virtual Result> LoadTable( + const TableIdentifier& identifier) override; Result> RegisterTable( const TableIdentifier& identifier, @@ -93,10 +94,6 @@ class ICEBERG_EXPORT InMemoryCatalog std::unique_ptr BuildTable(const TableIdentifier& identifier, const Schema& schema) const override; - protected: - Status UpdateTableMetaLocationInternal(const TableIdentifier& identifier, - const std::string& metadata_location); - private: std::string catalog_name_; std::unordered_map properties_; diff --git a/src/iceberg/table.cc b/src/iceberg/table.cc index d4decf2d3..bff445648 100644 --- a/src/iceberg/table.cc +++ b/src/iceberg/table.cc @@ -35,7 +35,7 @@ const std::string& Table::uuid() const { return metadata_->table_uuid; } Status Table::Refresh() { if (!catalog_) { - return InvalidArgument("Refresh is not supported for table without a catalog"); + return NotSupported("Refresh is not supported for table without a catalog"); } ICEBERG_ASSIGN_OR_RAISE(auto refreshed_table, catalog_->LoadTable(identifier_)); diff --git a/test/in_memory_catalog_test.cc b/test/in_memory_catalog_test.cc index 33fedc219..22a472e2c 100644 --- a/test/in_memory_catalog_test.cc +++ b/test/in_memory_catalog_test.cc @@ -117,32 +117,38 @@ TEST_F(InMemoryCatalogTest, RegisterTable) { TEST_F(InMemoryCatalogTest, RefreshTable) { TableIdentifier tableIdent{.ns = {}, .name = "t1"}; - std::unique_ptr metadata; - ASSERT_NO_FATAL_FAILURE(ReadTableMetadata("TableMetadataV2Valid.json", &metadata)); - - metadata->current_snapshot_id = 3051729675574597004; auto table_location = GenerateTestTableLocation(tableIdent.name); - auto metadata_location = std::format("{}v0.metadata.json", table_location); - auto status = TableMetadataUtil::Write(*file_io_, metadata_location, *metadata); - EXPECT_THAT(status, IsOk()); - + auto buildTable = [this, &tableIdent, &table_location]( + int64_t snapshot_id, int64_t version) -> std::unique_ptr { + std::unique_ptr metadata; + + ReadTableMetadata("TableMetadataV2Valid.json", &metadata); + metadata->current_snapshot_id = snapshot_id; + + auto metadata_location = std::format("{}v{}.metadata.json", table_location, version); + auto status = TableMetadataUtil::Write(*file_io_, metadata_location, *metadata); + EXPECT_THAT(status, IsOk()); + + return std::make_unique
(tableIdent, std::move(metadata), metadata_location, + file_io_, std::static_pointer_cast(catalog_)); + }; + + auto table_v0 = buildTable(3051729675574597004, 0); + auto table_v1 = buildTable(3055729675574597004, 1); + EXPECT_CALL(*catalog_, LoadTable(::testing::_)) + .WillOnce(::testing::Return( + ::testing::ByMove(Result>(std::move(table_v0))))) + .WillOnce(::testing::Return( + ::testing::ByMove(Result>(std::move(table_v1))))); + + auto metadata_location = std::format("{}v{}.metadata.json", table_location, 0); auto table = catalog_->RegisterTable(tableIdent, metadata_location); EXPECT_THAT(table, IsOk()); ASSERT_TRUE(table.value()->current_snapshot().has_value()); ASSERT_EQ(table.value()->current_snapshot().value()->snapshot_id, 3051729675574597004); - // update table version to 3055729675574597004 - metadata_location = std::format("{}v1.metadata.json", table_location); - metadata->current_snapshot_id = 3055729675574597004; - status = TableMetadataUtil::Write(*file_io_, metadata_location, *metadata); - EXPECT_THAT(status, IsOk()); - - // update table metadata location in catalog - status = catalog_->UpdateTableMetaLocation(tableIdent, metadata_location); - EXPECT_THAT(status, IsOk()); - // same version - status = table.value()->Refresh(); + auto status = table.value()->Refresh(); EXPECT_THAT(status, IsOk()); ASSERT_TRUE(table.value()->current_snapshot().has_value()); ASSERT_EQ(table.value()->current_snapshot().value()->snapshot_id, 3055729675574597004); diff --git a/test/mock_catalog.h b/test/mock_catalog.h index 890966536..c49f3ec71 100644 --- a/test/mock_catalog.h +++ b/test/mock_catalog.h @@ -30,11 +30,7 @@ class ICEBERG_EXPORT MockInMemoryCatalog : public InMemoryCatalog { std::unordered_map const& properties) : InMemoryCatalog(name, file_io, warehouse_location, properties) {} - /// \brief Directly sets the metadata location of a table. - /// \note This should only be used in unit tests. - Status UpdateTableMetaLocation(const TableIdentifier& identifier, - const std::string& metadata_location) { - return UpdateTableMetaLocationInternal(identifier, metadata_location); - } + MOCK_METHOD(Result>, LoadTable, (const TableIdentifier&), + (override)); }; } // namespace iceberg From ec44f5037bc3c82892373c66e0904efa0e677a53 Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Fri, 1 Aug 2025 16:58:05 +0800 Subject: [PATCH 04/16] feat: Refresh method support for table --- test/in_memory_catalog_test.cc | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/test/in_memory_catalog_test.cc b/test/in_memory_catalog_test.cc index 22a472e2c..d80403bc2 100644 --- a/test/in_memory_catalog_test.cc +++ b/test/in_memory_catalog_test.cc @@ -38,8 +38,8 @@ class InMemoryCatalogTest : public ::testing::Test { file_io_ = std::make_shared( std::make_shared<::arrow::fs::LocalFileSystem>()); std::unordered_map properties = {{"prop1", "val1"}}; - catalog_ = std::make_shared("test_catalog", file_io_, - "/tmp/warehouse/", properties); + catalog_ = std::make_shared("test_catalog", file_io_, + "/tmp/warehouse/", properties); } void TearDown() override { @@ -73,7 +73,7 @@ class InMemoryCatalogTest : public ::testing::Test { } std::shared_ptr file_io_; - std::shared_ptr catalog_; + std::shared_ptr catalog_; // Used to store temporary paths created during the test std::vector created_temp_paths_; }; @@ -116,9 +116,12 @@ TEST_F(InMemoryCatalogTest, RegisterTable) { TEST_F(InMemoryCatalogTest, RefreshTable) { TableIdentifier tableIdent{.ns = {}, .name = "t1"}; - + std::shared_ptr mock_catalog = + std::make_shared( + "mock_catalog", file_io_, "/tmp/warehouse/", + std::unordered_map()); auto table_location = GenerateTestTableLocation(tableIdent.name); - auto buildTable = [this, &tableIdent, &table_location]( + auto buildTable = [this, &tableIdent, &mock_catalog, &table_location]( int64_t snapshot_id, int64_t version) -> std::unique_ptr
{ std::unique_ptr metadata; @@ -130,19 +133,20 @@ TEST_F(InMemoryCatalogTest, RefreshTable) { EXPECT_THAT(status, IsOk()); return std::make_unique
(tableIdent, std::move(metadata), metadata_location, - file_io_, std::static_pointer_cast(catalog_)); + file_io_, + std::static_pointer_cast(mock_catalog)); }; auto table_v0 = buildTable(3051729675574597004, 0); auto table_v1 = buildTable(3055729675574597004, 1); - EXPECT_CALL(*catalog_, LoadTable(::testing::_)) + EXPECT_CALL(*mock_catalog, LoadTable(::testing::_)) .WillOnce(::testing::Return( ::testing::ByMove(Result>(std::move(table_v0))))) .WillOnce(::testing::Return( ::testing::ByMove(Result>(std::move(table_v1))))); auto metadata_location = std::format("{}v{}.metadata.json", table_location, 0); - auto table = catalog_->RegisterTable(tableIdent, metadata_location); + auto table = mock_catalog->RegisterTable(tableIdent, metadata_location); EXPECT_THAT(table, IsOk()); ASSERT_TRUE(table.value()->current_snapshot().has_value()); ASSERT_EQ(table.value()->current_snapshot().value()->snapshot_id, 3051729675574597004); From 314184227b6bf158b37a374ecd39338a89ebf39a Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Sat, 2 Aug 2025 10:08:19 +0800 Subject: [PATCH 05/16] feat: Refresh method support for table --- src/iceberg/table.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/iceberg/table.h b/src/iceberg/table.h index 37305db0d..9fce3c29f 100644 --- a/src/iceberg/table.h +++ b/src/iceberg/table.h @@ -117,8 +117,6 @@ class ICEBERG_EXPORT Table { const std::shared_ptr& io() const; private: - friend class Table; - const TableIdentifier identifier_; std::shared_ptr metadata_; std::string metadata_location_; From d7d4f55ed7e252aa995ea957fda3345432f99b08 Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Sun, 3 Aug 2025 11:44:38 +0800 Subject: [PATCH 06/16] feat: Refresh method support for table --- src/iceberg/catalog/in_memory_catalog.cc | 20 -------------------- test/in_memory_catalog_test.cc | 2 +- test/mock_catalog.h | 2 +- 3 files changed, 2 insertions(+), 22 deletions(-) diff --git a/src/iceberg/catalog/in_memory_catalog.cc b/src/iceberg/catalog/in_memory_catalog.cc index 24eac6465..6c34d3adf 100644 --- a/src/iceberg/catalog/in_memory_catalog.cc +++ b/src/iceberg/catalog/in_memory_catalog.cc @@ -109,15 +109,6 @@ class ICEBERG_EXPORT InMemoryNamespace { /// ErrorKind::kNoSuchTable if the table does not exist. Status UnregisterTable(const TableIdentifier& table_ident); - /// \brief Updates the metadata location of an existing table. - /// - /// \param table_ident The fully qualified identifier of the table. - /// \param metadata_location The path to the table's metadata. - /// \return Status::OK if the table metadata location is updated; - /// Error otherwise. - Status UpdateTableMetaLocation(const TableIdentifier& table_ident, - const std::string& metadata_location); - /// \brief Checks if a table exists in the specified namespace. /// /// \param table_ident The identifier of the table to check. @@ -306,17 +297,6 @@ Status InMemoryNamespace::UnregisterTable(TableIdentifier const& table_ident) { return {}; } -Status InMemoryNamespace::UpdateTableMetaLocation(const TableIdentifier& table_ident, - const std::string& metadata_location) { - const auto ns = GetNamespace(this, table_ident.ns); - ICEBERG_RETURN_UNEXPECTED(ns); - if (!ns.value()->table_metadata_locations_.contains(table_ident.name)) { - return NotFound("{} does not exist", table_ident.name); - } - ns.value()->table_metadata_locations_[table_ident.name] = metadata_location; - return {}; -} - Result InMemoryNamespace::TableExists(TableIdentifier const& table_ident) const { const auto ns = GetNamespace(this, table_ident.ns); ICEBERG_RETURN_UNEXPECTED(ns); diff --git a/test/in_memory_catalog_test.cc b/test/in_memory_catalog_test.cc index d80403bc2..7ea662f69 100644 --- a/test/in_memory_catalog_test.cc +++ b/test/in_memory_catalog_test.cc @@ -151,7 +151,7 @@ TEST_F(InMemoryCatalogTest, RefreshTable) { ASSERT_TRUE(table.value()->current_snapshot().has_value()); ASSERT_EQ(table.value()->current_snapshot().value()->snapshot_id, 3051729675574597004); - // same version + // refresh table to new snapshot auto status = table.value()->Refresh(); EXPECT_THAT(status, IsOk()); ASSERT_TRUE(table.value()->current_snapshot().has_value()); diff --git a/test/mock_catalog.h b/test/mock_catalog.h index c49f3ec71..70c2f2e2d 100644 --- a/test/mock_catalog.h +++ b/test/mock_catalog.h @@ -23,7 +23,7 @@ namespace iceberg { -class ICEBERG_EXPORT MockInMemoryCatalog : public InMemoryCatalog { +class MockInMemoryCatalog : public InMemoryCatalog { public: MockInMemoryCatalog(std::string const& name, std::shared_ptr const& file_io, std::string const& warehouse_location, From 5c5058d7847cfc520e66908423f210c5c2662966 Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Sun, 3 Aug 2025 11:54:56 +0800 Subject: [PATCH 07/16] feat: Refresh method support for table --- test/in_memory_catalog_test.cc | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/test/in_memory_catalog_test.cc b/test/in_memory_catalog_test.cc index 7ea662f69..61a6bc5c4 100644 --- a/test/in_memory_catalog_test.cc +++ b/test/in_memory_catalog_test.cc @@ -124,17 +124,12 @@ TEST_F(InMemoryCatalogTest, RefreshTable) { auto buildTable = [this, &tableIdent, &mock_catalog, &table_location]( int64_t snapshot_id, int64_t version) -> std::unique_ptr
{ std::unique_ptr metadata; - ReadTableMetadata("TableMetadataV2Valid.json", &metadata); metadata->current_snapshot_id = snapshot_id; - - auto metadata_location = std::format("{}v{}.metadata.json", table_location, version); - auto status = TableMetadataUtil::Write(*file_io_, metadata_location, *metadata); - EXPECT_THAT(status, IsOk()); - - return std::make_unique
(tableIdent, std::move(metadata), metadata_location, - file_io_, - std::static_pointer_cast(mock_catalog)); + return std::make_unique
( + tableIdent, std::move(metadata), + std::format("{}v{}.metadata.json", table_location, version), file_io_, + std::static_pointer_cast(mock_catalog)); }; auto table_v0 = buildTable(3051729675574597004, 0); From 50ab76ebda81865a68b9bbe2899b7d5da6a219a3 Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Fri, 8 Aug 2025 19:16:12 +0800 Subject: [PATCH 08/16] feat: Refresh method support for table --- src/iceberg/catalog.h | 1 - src/iceberg/catalog/in_memory_catalog.h | 4 +- test/in_memory_catalog_test.cc | 82 +++++++++++++++---------- test/mock_catalog.h | 63 ++++++++++++++++--- 4 files changed, 106 insertions(+), 44 deletions(-) diff --git a/src/iceberg/catalog.h b/src/iceberg/catalog.h index 03bd0f6c4..c179d56b1 100644 --- a/src/iceberg/catalog.h +++ b/src/iceberg/catalog.h @@ -175,7 +175,6 @@ class ICEBERG_EXPORT Catalog { /// \return a Table instance or ErrorKind::kAlreadyExists if the table already exists virtual Result> RegisterTable( const TableIdentifier& identifier, const std::string& metadata_file_location) = 0; - /// \brief A builder used to create valid tables or start create/replace transactions class TableBuilder { public: diff --git a/src/iceberg/catalog/in_memory_catalog.h b/src/iceberg/catalog/in_memory_catalog.h index 70b703e62..c8e358e11 100644 --- a/src/iceberg/catalog/in_memory_catalog.h +++ b/src/iceberg/catalog/in_memory_catalog.h @@ -91,8 +91,8 @@ class ICEBERG_EXPORT InMemoryCatalog const TableIdentifier& identifier, const std::string& metadata_file_location) override; - std::unique_ptr BuildTable(const TableIdentifier& identifier, - const Schema& schema) const override; + std::unique_ptr BuildTable(const TableIdentifier& identifier, + const Schema& schema) const override; private: std::string catalog_name_; diff --git a/test/in_memory_catalog_test.cc b/test/in_memory_catalog_test.cc index 61a6bc5c4..36d67cbe7 100644 --- a/test/in_memory_catalog_test.cc +++ b/test/in_memory_catalog_test.cc @@ -17,6 +17,8 @@ * under the License. */ +#include "iceberg/catalog/in_memory_catalog.h" + #include #include @@ -24,6 +26,7 @@ #include #include "iceberg/arrow/arrow_fs_file_io_internal.h" +#include "iceberg/schema.h" #include "iceberg/table.h" #include "iceberg/table_metadata.h" #include "matchers.h" @@ -115,42 +118,53 @@ TEST_F(InMemoryCatalogTest, RegisterTable) { } TEST_F(InMemoryCatalogTest, RefreshTable) { - TableIdentifier tableIdent{.ns = {}, .name = "t1"}; - std::shared_ptr mock_catalog = - std::make_shared( - "mock_catalog", file_io_, "/tmp/warehouse/", - std::unordered_map()); - auto table_location = GenerateTestTableLocation(tableIdent.name); - auto buildTable = [this, &tableIdent, &mock_catalog, &table_location]( - int64_t snapshot_id, int64_t version) -> std::unique_ptr
{ - std::unique_ptr metadata; - ReadTableMetadata("TableMetadataV2Valid.json", &metadata); - metadata->current_snapshot_id = snapshot_id; - return std::make_unique
( - tableIdent, std::move(metadata), - std::format("{}v{}.metadata.json", table_location, version), file_io_, - std::static_pointer_cast(mock_catalog)); - }; - - auto table_v0 = buildTable(3051729675574597004, 0); - auto table_v1 = buildTable(3055729675574597004, 1); - EXPECT_CALL(*mock_catalog, LoadTable(::testing::_)) - .WillOnce(::testing::Return( - ::testing::ByMove(Result>(std::move(table_v0))))) - .WillOnce(::testing::Return( - ::testing::ByMove(Result>(std::move(table_v1))))); + TableIdentifier table_ident{.ns = {}, .name = "t1"}; + auto schema = std::make_shared( + std::vector{SchemaField::MakeRequired(1, "x", int64())}, + /*schema_id=*/1); - auto metadata_location = std::format("{}v{}.metadata.json", table_location, 0); - auto table = mock_catalog->RegisterTable(tableIdent, metadata_location); - EXPECT_THAT(table, IsOk()); - ASSERT_TRUE(table.value()->current_snapshot().has_value()); - ASSERT_EQ(table.value()->current_snapshot().value()->snapshot_id, 3051729675574597004); + std::shared_ptr io; - // refresh table to new snapshot - auto status = table.value()->Refresh(); - EXPECT_THAT(status, IsOk()); - ASSERT_TRUE(table.value()->current_snapshot().has_value()); - ASSERT_EQ(table.value()->current_snapshot().value()->snapshot_id, 3055729675574597004); + auto catalog = std::make_shared(); + // Mock 1st call to LoadTable + EXPECT_CALL(*catalog, LoadTable(::testing::_)) + .WillOnce(::testing::Return( + std::make_unique
(table_ident, + std::make_shared(TableMetadata{ + .schemas = {schema}, + .current_schema_id = 1, + .current_snapshot_id = 1, + .snapshots = {std::make_shared(Snapshot{ + .snapshot_id = 1, + .sequence_number = 1, + })}}), + "s3://location/1.json", io, catalog))); + auto load_table_result = catalog->LoadTable(table_ident); + ASSERT_THAT(load_table_result, IsOk()); + auto loaded_table = std::move(load_table_result.value()); + ASSERT_EQ(loaded_table->current_snapshot().value()->snapshot_id, 1); + + // Mock 2nd call to LoadTable + EXPECT_CALL(*catalog, LoadTable(::testing::_)) + .WillOnce(::testing::Return( + std::make_unique
(table_ident, + std::make_shared(TableMetadata{ + .schemas = {schema}, + .current_schema_id = 1, + .current_snapshot_id = 2, + .snapshots = {std::make_shared(Snapshot{ + .snapshot_id = 1, + .sequence_number = 1, + }), + std::make_shared(Snapshot{ + .snapshot_id = 2, + .sequence_number = 2, + })}}), + "s3://location/2.json", io, catalog))); + auto refreshed_result = loaded_table->Refresh(); + ASSERT_THAT(refreshed_result, IsOk()); + // check table is refreshed + ASSERT_EQ(loaded_table->current_snapshot().value()->snapshot_id, 2); } TEST_F(InMemoryCatalogTest, DropTable) { diff --git a/test/mock_catalog.h b/test/mock_catalog.h index 70c2f2e2d..a35e5af3e 100644 --- a/test/mock_catalog.h +++ b/test/mock_catalog.h @@ -19,18 +19,67 @@ #pragma once -#include "iceberg/catalog/in_memory_catalog.h" +#include "iceberg/catalog.h" namespace iceberg { -class MockInMemoryCatalog : public InMemoryCatalog { +class MockCatalog : public Catalog { public: - MockInMemoryCatalog(std::string const& name, std::shared_ptr const& file_io, - std::string const& warehouse_location, - std::unordered_map const& properties) - : InMemoryCatalog(name, file_io, warehouse_location, properties) {} + MockCatalog() = default; + ~MockCatalog() override = default; - MOCK_METHOD(Result>, LoadTable, (const TableIdentifier&), + MOCK_METHOD(std::string_view, name, (), (const, override)); + + MOCK_METHOD(Status, CreateNamespace, + (const Namespace&, (const std::unordered_map&)), + (override)); + + MOCK_METHOD((Result>), ListNamespaces, (const Namespace&), + (const, override)); + + MOCK_METHOD((Result>), + GetNamespaceProperties, (const Namespace&), (const, override)); + + MOCK_METHOD(Status, UpdateNamespaceProperties, + (const Namespace&, (const std::unordered_map&), + (const std::unordered_set&)), (override)); + + MOCK_METHOD(Status, DropNamespace, (const Namespace&), (override)); + + MOCK_METHOD(Result, NamespaceExists, (const Namespace&), (const, override)); + + MOCK_METHOD((Result>), ListTables, (const Namespace&), + (const, override)); + + MOCK_METHOD((Result>), CreateTable, + (const TableIdentifier&, const Schema&, const PartitionSpec&, + const std::string&, (const std::unordered_map&)), + (override)); + + MOCK_METHOD((Result>), UpdateTable, + (const TableIdentifier&, + (const std::vector>&), + (const std::vector>&)), + (override)); + + MOCK_METHOD((Result>), StageCreateTable, + (const TableIdentifier&, const Schema&, const PartitionSpec&, + const std::string&, (const std::unordered_map&)), + (override)); + + MOCK_METHOD(Result, TableExists, (const TableIdentifier&), (const, override)); + + MOCK_METHOD(Status, DropTable, (const TableIdentifier&, bool), (override)); + + MOCK_METHOD((Result>), LoadTable, (const TableIdentifier&), + (override)); + + MOCK_METHOD((Result>), RegisterTable, + (const TableIdentifier&, const std::string&), (override)); + + MOCK_METHOD((std::unique_ptr), BuildTable, + (const TableIdentifier&, const Schema&), (const, override)); }; + } // namespace iceberg From c17bd3e8f347a57374c0eb093fecec8f900ca92f Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Fri, 8 Aug 2025 21:17:31 +0800 Subject: [PATCH 09/16] feat: Refresh method support for table --- test/mock_catalog.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/mock_catalog.h b/test/mock_catalog.h index a35e5af3e..4b2a0ef72 100644 --- a/test/mock_catalog.h +++ b/test/mock_catalog.h @@ -78,7 +78,7 @@ class MockCatalog : public Catalog { MOCK_METHOD((Result>), RegisterTable, (const TableIdentifier&, const std::string&), (override)); - MOCK_METHOD((std::unique_ptr), BuildTable, + MOCK_METHOD((std::unique_ptr), BuildTable, (const TableIdentifier&, const Schema&), (const, override)); }; From d8ee5ec6cb5b2cd7894746384df485cba92daacd Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Fri, 8 Aug 2025 22:37:16 +0800 Subject: [PATCH 10/16] feat: Refresh method support for table --- test/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 091fa292d..520f2b39f 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -23,6 +23,10 @@ fetchcontent_declare(googletest GTest) fetchcontent_makeavailable(googletest) +set(CMAKE_CXX_STANDARD 23) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + set(ICEBERG_TEST_RESOURCES "${CMAKE_SOURCE_DIR}/test/resources") configure_file("${CMAKE_SOURCE_DIR}/test/test_config.h.in" From af38f2b8f56c6ec6fcffcef085667e34abea919c Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Sat, 9 Aug 2025 10:17:51 +0800 Subject: [PATCH 11/16] feat: Refresh method support for table --- test/CMakeLists.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 520f2b39f..091fa292d 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -23,10 +23,6 @@ fetchcontent_declare(googletest GTest) fetchcontent_makeavailable(googletest) -set(CMAKE_CXX_STANDARD 23) -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS OFF) - set(ICEBERG_TEST_RESOURCES "${CMAKE_SOURCE_DIR}/test/resources") configure_file("${CMAKE_SOURCE_DIR}/test/test_config.h.in" From ff139112e2d3c1a9c89133bc2722ca26b1274360 Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Sat, 9 Aug 2025 10:27:33 +0800 Subject: [PATCH 12/16] feat: Refresh method support for table --- src/iceberg/catalog/in_memory_catalog.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/iceberg/catalog/in_memory_catalog.h b/src/iceberg/catalog/in_memory_catalog.h index c8e358e11..cd5011797 100644 --- a/src/iceberg/catalog/in_memory_catalog.h +++ b/src/iceberg/catalog/in_memory_catalog.h @@ -84,8 +84,7 @@ class ICEBERG_EXPORT InMemoryCatalog Status DropTable(const TableIdentifier& identifier, bool purge) override; - virtual Result> LoadTable( - const TableIdentifier& identifier) override; + Result> LoadTable(const TableIdentifier& identifier) override; Result> RegisterTable( const TableIdentifier& identifier, From 018d8bb333f0fc1e722c37f149d7a3e5c593b4f3 Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Mon, 11 Aug 2025 10:49:42 +0800 Subject: [PATCH 13/16] feat: Refresh method support for table --- test/mock_catalog.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/mock_catalog.h b/test/mock_catalog.h index 4b2a0ef72..bc5ef1843 100644 --- a/test/mock_catalog.h +++ b/test/mock_catalog.h @@ -28,17 +28,20 @@ class MockCatalog : public Catalog { MockCatalog() = default; ~MockCatalog() override = default; + // NOLINTBGEGIN(clang-diagnostic-error) MOCK_METHOD(std::string_view, name, (), (const, override)); MOCK_METHOD(Status, CreateNamespace, (const Namespace&, (const std::unordered_map&)), (override)); + // NOLINTEND(clang-diagnostic-error) MOCK_METHOD((Result>), ListNamespaces, (const Namespace&), (const, override)); MOCK_METHOD((Result>), - GetNamespaceProperties, (const Namespace&), (const, override)); + GetNamespaceProperties, (const Namespace&), + (contest / mock_catalog.hst, override)); MOCK_METHOD(Status, UpdateNamespaceProperties, (const Namespace&, (const std::unordered_map&), From dd7c6798ede5345a2e9206dd58abc28e6b2a2a57 Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Mon, 11 Aug 2025 11:10:49 +0800 Subject: [PATCH 14/16] feat: Refresh method support for table --- src/iceberg/catalog.h | 1 + test/mock_catalog.h | 7 +++---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/iceberg/catalog.h b/src/iceberg/catalog.h index c179d56b1..03bd0f6c4 100644 --- a/src/iceberg/catalog.h +++ b/src/iceberg/catalog.h @@ -175,6 +175,7 @@ class ICEBERG_EXPORT Catalog { /// \return a Table instance or ErrorKind::kAlreadyExists if the table already exists virtual Result> RegisterTable( const TableIdentifier& identifier, const std::string& metadata_file_location) = 0; + /// \brief A builder used to create valid tables or start create/replace transactions class TableBuilder { public: diff --git a/test/mock_catalog.h b/test/mock_catalog.h index bc5ef1843..6984307d7 100644 --- a/test/mock_catalog.h +++ b/test/mock_catalog.h @@ -28,20 +28,19 @@ class MockCatalog : public Catalog { MockCatalog() = default; ~MockCatalog() override = default; - // NOLINTBGEGIN(clang-diagnostic-error) + // NOLINTBEGIN MOCK_METHOD(std::string_view, name, (), (const, override)); MOCK_METHOD(Status, CreateNamespace, (const Namespace&, (const std::unordered_map&)), (override)); - // NOLINTEND(clang-diagnostic-error) + // NOLINTEND MOCK_METHOD((Result>), ListNamespaces, (const Namespace&), (const, override)); MOCK_METHOD((Result>), - GetNamespaceProperties, (const Namespace&), - (contest / mock_catalog.hst, override)); + GetNamespaceProperties, (const Namespace&), (const, override)); MOCK_METHOD(Status, UpdateNamespaceProperties, (const Namespace&, (const std::unordered_map&), From 0e99d68bcd6bb090ad8b05ee5fa12dea4cbcf56f Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Mon, 11 Aug 2025 12:00:09 +0800 Subject: [PATCH 15/16] feat: Refresh method support for table --- test/mock_catalog.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/mock_catalog.h b/test/mock_catalog.h index 6984307d7..7e32359ce 100644 --- a/test/mock_catalog.h +++ b/test/mock_catalog.h @@ -28,13 +28,13 @@ class MockCatalog : public Catalog { MockCatalog() = default; ~MockCatalog() override = default; - // NOLINTBEGIN - MOCK_METHOD(std::string_view, name, (), (const, override)); + // NOLINTBEGIN(clang-diagnostic-error) + MOCK_METHOD((std::string_view), name, (), (const, override)); - MOCK_METHOD(Status, CreateNamespace, + MOCK_METHOD((Status), CreateNamespace, (const Namespace&, (const std::unordered_map&)), (override)); - // NOLINTEND + // NOLINTEND(clang-diagnostic-error) MOCK_METHOD((Result>), ListNamespaces, (const Namespace&), (const, override)); From 95b0044883cdc109eb33f13b8d9d33387274ba06 Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Mon, 11 Aug 2025 13:01:48 +0800 Subject: [PATCH 16/16] feat: Refresh method support for table --- test/mock_catalog.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/test/mock_catalog.h b/test/mock_catalog.h index 7e32359ce..5363f1c7b 100644 --- a/test/mock_catalog.h +++ b/test/mock_catalog.h @@ -19,6 +19,9 @@ #pragma once +#include +#include + #include "iceberg/catalog.h" namespace iceberg { @@ -28,13 +31,11 @@ class MockCatalog : public Catalog { MockCatalog() = default; ~MockCatalog() override = default; - // NOLINTBEGIN(clang-diagnostic-error) - MOCK_METHOD((std::string_view), name, (), (const, override)); + MOCK_METHOD(std::string_view, name, (), (const, override)); - MOCK_METHOD((Status), CreateNamespace, + MOCK_METHOD(Status, CreateNamespace, (const Namespace&, (const std::unordered_map&)), (override)); - // NOLINTEND(clang-diagnostic-error) MOCK_METHOD((Result>), ListNamespaces, (const Namespace&), (const, override));