diff --git a/src/iceberg/partition_spec.h b/src/iceberg/partition_spec.h index 554692264..88a081bf7 100644 --- a/src/iceberg/partition_spec.h +++ b/src/iceberg/partition_spec.h @@ -47,6 +47,7 @@ class ICEBERG_EXPORT PartitionSpec : public util::Formattable { /// \brief The start ID for partition field. It is only used to generate /// partition field id for v1 metadata where it is tracked. static constexpr int32_t kLegacyPartitionDataIdStart = 1000; + static constexpr int32_t kInvalidPartitionFieldId = -1; /// \brief Create a new partition spec. /// diff --git a/src/iceberg/schema.h b/src/iceberg/schema.h index 81f9aa394..3e71fa2e7 100644 --- a/src/iceberg/schema.h +++ b/src/iceberg/schema.h @@ -46,6 +46,7 @@ namespace iceberg { class ICEBERG_EXPORT Schema : public StructType { public: static constexpr int32_t kInitialSchemaId = 0; + static constexpr int32_t kInvalidColumnId = -1; explicit Schema(std::vector fields, std::optional schema_id = std::nullopt); diff --git a/src/iceberg/table_metadata.cc b/src/iceberg/table_metadata.cc index e32e75ee1..669e5a15b 100644 --- a/src/iceberg/table_metadata.cc +++ b/src/iceberg/table_metadata.cc @@ -20,6 +20,7 @@ #include "iceberg/table_metadata.h" #include +#include #include #include @@ -36,9 +37,14 @@ #include "iceberg/table_update.h" #include "iceberg/util/gzip_internal.h" #include "iceberg/util/macros.h" +#include "iceberg/util/uuid.h" namespace iceberg { +namespace { +const TimePointMs kInvalidLastUpdatedMs = TimePointMs::min(); +} + std::string ToString(const SnapshotLogEntry& entry) { return std::format("SnapshotLogEntry[timestampMillis={},snapshotId={}]", entry.timestamp_ms, entry.snapshot_id); @@ -201,13 +207,46 @@ Status TableMetadataUtil::Write(FileIO& io, const std::string& location, // TableMetadataBuilder implementation -struct TableMetadataBuilder::Impl {}; +struct TableMetadataBuilder::Impl { + // Base metadata (nullptr for new tables) + const TableMetadata* base; + + // Working metadata copy + TableMetadata metadata; + + // Change tracking + std::vector> changes; + + // Error collection (since methods return *this and cannot throw) + std::vector errors; + + // Metadata location tracking + std::optional metadata_location; + std::optional previous_metadata_location; + + // Constructor for new table + explicit Impl(int8_t format_version) : base(nullptr), metadata{} { + metadata.format_version = format_version; + metadata.last_sequence_number = TableMetadata::kInitialSequenceNumber; + metadata.last_updated_ms = kInvalidLastUpdatedMs; + metadata.last_column_id = Schema::kInvalidColumnId; + metadata.default_spec_id = PartitionSpec::kInitialSpecId; + metadata.last_partition_id = PartitionSpec::kInvalidPartitionFieldId; + metadata.current_snapshot_id = Snapshot::kInvalidSnapshotId; + metadata.default_sort_order_id = SortOrder::kInitialSortOrderId; + metadata.next_row_id = TableMetadata::kInitialRowId; + } + + // Constructor from existing metadata + explicit Impl(const TableMetadata* base_metadata) + : base(base_metadata), metadata(*base_metadata) {} +}; TableMetadataBuilder::TableMetadataBuilder(int8_t format_version) - : impl_(std::make_unique()) {} + : impl_(std::make_unique(format_version)) {} TableMetadataBuilder::TableMetadataBuilder(const TableMetadata* base) - : impl_(std::make_unique()) {} + : impl_(std::make_unique(base)) {} TableMetadataBuilder::~TableMetadataBuilder() = default; @@ -238,12 +277,35 @@ TableMetadataBuilder& TableMetadataBuilder::SetPreviousMetadataLocation( } TableMetadataBuilder& TableMetadataBuilder::AssignUUID() { - throw IcebergError(std::format("{} not implemented", __FUNCTION__)); + if (impl_->metadata.table_uuid.empty()) { + // Generate a random UUID + return AssignUUID(Uuid::GenerateV4().ToString()); + } + + return *this; } TableMetadataBuilder& TableMetadataBuilder::AssignUUID(std::string_view uuid) { - throw IcebergError(std::format("{} not implemented", __FUNCTION__)); - ; + std::string uuid_str(uuid); + + // Validation: UUID cannot be empty + if (uuid_str.empty()) { + impl_->errors.emplace_back(ErrorKind::kInvalidArgument, "Cannot assign empty UUID"); + return *this; + } + + // Check if UUID is already set to the same value (no-op) + if (StringUtils::EqualsIgnoreCase(impl_->metadata.table_uuid, uuid_str)) { + return *this; + } + + // Update the metadata + impl_->metadata.table_uuid = uuid_str; + + // Record the change + impl_->changes.push_back(std::make_unique(std::move(uuid_str))); + + return *this; } TableMetadataBuilder& TableMetadataBuilder::UpgradeFormatVersion( @@ -377,12 +439,29 @@ TableMetadataBuilder& TableMetadataBuilder::RemoveEncryptionKey(std::string_view throw IcebergError(std::format("{} not implemented", __FUNCTION__)); } -TableMetadataBuilder& TableMetadataBuilder::DiscardChanges() { - throw IcebergError(std::format("{} not implemented", __FUNCTION__)); -} - Result> TableMetadataBuilder::Build() { - return NotImplemented("TableMetadataBuilder::Build not implemented"); + // 1. Check for accumulated errors + if (!impl_->errors.empty()) { + std::string error_msg = "Failed to build TableMetadata due to validation errors:\n"; + for (const auto& [kind, message] : impl_->errors) { + error_msg += " - " + message + "\n"; + } + return CommitFailed("{}", error_msg); + } + + // 2. Validate metadata consistency through TableMetadata#Validate + + // 3. Update last_updated_ms if there are changes + if (impl_->metadata.last_updated_ms == kInvalidLastUpdatedMs) { + impl_->metadata.last_updated_ms = + TimePointMs{std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch())}; + } + + // 4. Create and return the TableMetadata + auto result = std::make_unique(std::move(impl_->metadata)); + + return result; } } // namespace iceberg diff --git a/src/iceberg/table_metadata.h b/src/iceberg/table_metadata.h index 11b17eb84..2a998c7a1 100644 --- a/src/iceberg/table_metadata.h +++ b/src/iceberg/table_metadata.h @@ -377,13 +377,6 @@ class ICEBERG_EXPORT TableMetadataBuilder { /// \return Reference to this builder for method chaining TableMetadataBuilder& RemoveEncryptionKey(std::string_view key_id); - /// \brief Discard all accumulated changes - /// - /// This is useful when you want to reset the builder state without - /// creating a new builder instance. - /// \return Reference to this builder for method chaining - TableMetadataBuilder& DiscardChanges(); - /// \brief Build the TableMetadata object /// /// \return A Result containing the constructed TableMetadata or an error diff --git a/src/iceberg/table_requirement.cc b/src/iceberg/table_requirement.cc index 4ca4b915f..e951d7003 100644 --- a/src/iceberg/table_requirement.cc +++ b/src/iceberg/table_requirement.cc @@ -20,15 +20,28 @@ #include "iceberg/table_requirement.h" #include "iceberg/table_metadata.h" +#include "iceberg/util/string_util.h" namespace iceberg::table { Status AssertDoesNotExist::Validate(const TableMetadata* base) const { - return NotImplemented("AssertTableDoesNotExist::Validate not implemented"); + return NotImplemented("AssertDoesNotExist::Validate not implemented"); } Status AssertUUID::Validate(const TableMetadata* base) const { - return NotImplemented("AssertTableUUID::Validate not implemented"); + // Validate that the table UUID matches the expected value + + if (base == nullptr) { + return CommitFailed("Requirement failed: current table metadata is missing"); + } + + if (!StringUtils::EqualsIgnoreCase(base->table_uuid, uuid_)) { + return CommitFailed( + "Requirement failed: table UUID does not match (expected='{}', actual='{}')", + uuid_, base->table_uuid); + } + + return {}; } Status AssertRefSnapshotID::Validate(const TableMetadata* base) const { diff --git a/src/iceberg/table_requirements.cc b/src/iceberg/table_requirements.cc index 1eb870cb4..aae874ecd 100644 --- a/src/iceberg/table_requirements.cc +++ b/src/iceberg/table_requirements.cc @@ -26,11 +26,11 @@ namespace iceberg { void TableUpdateContext::AddRequirement(std::unique_ptr requirement) { - throw IcebergError("TableUpdateContext::AddRequirement not implemented"); + requirements_.emplace_back(std::move(requirement)); } Result>> TableUpdateContext::Build() { - return NotImplemented("TableUpdateContext::Build not implemented"); + return std::move(requirements_); } Result>> TableRequirements::ForCreateTable( diff --git a/src/iceberg/table_update.cc b/src/iceberg/table_update.cc index 7d81dd8f1..fcb7a58c2 100644 --- a/src/iceberg/table_update.cc +++ b/src/iceberg/table_update.cc @@ -21,6 +21,7 @@ #include "iceberg/exception.h" #include "iceberg/table_metadata.h" +#include "iceberg/table_requirement.h" #include "iceberg/table_requirements.h" namespace iceberg::table { @@ -28,11 +29,24 @@ namespace iceberg::table { // AssignUUID void AssignUUID::ApplyTo(TableMetadataBuilder& builder) const { - throw IcebergError(std::format("{} not implemented", __FUNCTION__)); + builder.AssignUUID(uuid_); } Status AssignUUID::GenerateRequirements(TableUpdateContext& context) const { - return NotImplemented("AssignTableUUID::GenerateRequirements not implemented"); + // AssignUUID operation generates a requirement to assert the table's UUID + // if a base metadata exists (i.e., this is an update operation) + + const TableMetadata* base = context.base(); + + if (base != nullptr && !base->table_uuid.empty()) { + // For table updates, assert that the current UUID matches what we expect + context.AddRequirement(std::make_unique(base->table_uuid)); + } + + // Note: For table creation (base == nullptr), no UUID requirement is needed + // as the table doesn't exist yet + + return {}; } // UpgradeFormatVersion @@ -42,8 +56,7 @@ void UpgradeFormatVersion::ApplyTo(TableMetadataBuilder& builder) const { } Status UpgradeFormatVersion::GenerateRequirements(TableUpdateContext& context) const { - return NotImplemented( - "UpgradeTableFormatVersion::GenerateRequirements not implemented"); + return NotImplemented("UpgradeFormatVersion::GenerateRequirements not implemented"); } // AddSchema diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index 68af62bf1..100287ff9 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -82,7 +82,8 @@ add_iceberg_test(table_test test_common.cc json_internal_test.cc table_test.cc - schema_json_test.cc) + schema_json_test.cc + table_metadata_builder_test.cc) add_iceberg_test(expression_test SOURCES diff --git a/src/iceberg/test/matchers.h b/src/iceberg/test/matchers.h index f04d4a51b..55d29be60 100644 --- a/src/iceberg/test/matchers.h +++ b/src/iceberg/test/matchers.h @@ -23,6 +23,7 @@ #include #include "iceberg/result.h" +#include "iceberg/util/macros.h" /* * \brief Define custom matchers for expected values @@ -210,4 +211,17 @@ auto ErrorIs(MatcherT&& matcher) { ResultMatcher>(false, std::forward(matcher))); } +// Evaluate `rexpr` which should return a Result. +// On success: assign the contained value to `lhs`. +// On failure: fail the test with the error message. +#define ICEBERG_UNWRAP_OR_FAIL_IMPL(result_name, lhs, rexpr) \ + auto&& result_name = (rexpr); \ + ASSERT_TRUE(result_name.has_value()) \ + << "Operation failed: " << result_name.error().message; \ + lhs = std::move(result_name.value()); + +#define ICEBERG_UNWRAP_OR_FAIL(lhs, rexpr) \ + ICEBERG_UNWRAP_OR_FAIL_IMPL(ICEBERG_ASSIGN_OR_RAISE_NAME(result_, __COUNTER__), lhs, \ + rexpr) + } // namespace iceberg diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build index 88b16325c..89cb3579b 100644 --- a/src/iceberg/test/meson.build +++ b/src/iceberg/test/meson.build @@ -47,6 +47,7 @@ iceberg_tests = { 'sources': files( 'json_internal_test.cc', 'schema_json_test.cc', + 'table_metadata_builder_test.cc', 'table_test.cc', 'test_common.cc', ), diff --git a/src/iceberg/test/table_metadata_builder_test.cc b/src/iceberg/test/table_metadata_builder_test.cc new file mode 100644 index 000000000..aa22f8571 --- /dev/null +++ b/src/iceberg/test/table_metadata_builder_test.cc @@ -0,0 +1,265 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include + +#include +#include + +#include "iceberg/partition_spec.h" +#include "iceberg/sort_order.h" +#include "iceberg/table_metadata.h" +#include "iceberg/table_requirement.h" +#include "iceberg/table_requirements.h" +#include "iceberg/table_update.h" +#include "iceberg/test/matchers.h" + +namespace iceberg { + +// Helper functions to reduce test boilerplate +namespace { + +// Generate requirements and return them +std::vector> GenerateRequirements( + const TableUpdate& update, const TableMetadata* base) { + TableUpdateContext context(base, /*is_replace=*/false); + EXPECT_THAT(update.GenerateRequirements(context), IsOk()); + + auto requirements = context.Build(); + EXPECT_THAT(requirements, IsOk()); + return std::move(requirements.value()); +} + +} // namespace + +// Test fixture for TableMetadataBuilder tests +class TableMetadataBuilderTest : public ::testing::Test { + protected: + void SetUp() override { + // Create a base metadata for update tests + base_metadata_ = std::make_unique(); + base_metadata_->format_version = 2; + base_metadata_->table_uuid = "test-uuid-1234"; + base_metadata_->location = "s3://bucket/test"; + base_metadata_->last_sequence_number = 0; + base_metadata_->last_updated_ms = TimePointMs{std::chrono::milliseconds(1000)}; + base_metadata_->last_column_id = 0; + base_metadata_->default_spec_id = PartitionSpec::kInitialSpecId; + base_metadata_->last_partition_id = 0; + base_metadata_->current_snapshot_id = Snapshot::kInvalidSnapshotId; + base_metadata_->default_sort_order_id = SortOrder::kInitialSortOrderId; + base_metadata_->next_row_id = TableMetadata::kInitialRowId; + } + + std::unique_ptr base_metadata_; +}; + +// ============================================================================ +// TableMetadataBuilder - Basic Construction Tests +// ============================================================================ + +TEST_F(TableMetadataBuilderTest, BuildFromEmpty) { + auto builder = TableMetadataBuilder::BuildFromEmpty(2); + ASSERT_NE(builder, nullptr); + + builder->AssignUUID("new-uuid-5678"); + + ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); + ASSERT_NE(metadata, nullptr); + + EXPECT_EQ(metadata->format_version, 2); + EXPECT_EQ(metadata->last_sequence_number, TableMetadata::kInitialSequenceNumber); + EXPECT_EQ(metadata->default_spec_id, PartitionSpec::kInitialSpecId); + EXPECT_EQ(metadata->default_sort_order_id, SortOrder::kInitialSortOrderId); + EXPECT_EQ(metadata->current_snapshot_id, Snapshot::kInvalidSnapshotId); +} + +TEST_F(TableMetadataBuilderTest, BuildFromExisting) { + auto builder = TableMetadataBuilder::BuildFrom(base_metadata_.get()); + ASSERT_NE(builder, nullptr); + + ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); + ASSERT_NE(metadata, nullptr); + + EXPECT_EQ(metadata->format_version, 2); + EXPECT_EQ(metadata->table_uuid, "test-uuid-1234"); + EXPECT_EQ(metadata->location, "s3://bucket/test"); +} + +// ============================================================================ +// TableMetadataBuilder - AssignUUID Tests +// ============================================================================ + +TEST_F(TableMetadataBuilderTest, AssignUUIDForNewTable) { + auto builder = TableMetadataBuilder::BuildFromEmpty(2); + builder->AssignUUID("new-uuid-5678"); + + ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); + EXPECT_EQ(metadata->table_uuid, "new-uuid-5678"); +} + +TEST_F(TableMetadataBuilderTest, AssignUUIDAndUpdateExisting) { + auto builder = TableMetadataBuilder::BuildFrom(base_metadata_.get()); + builder->AssignUUID("updated-uuid-9999"); + + ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); + EXPECT_EQ(metadata->table_uuid, "updated-uuid-9999"); +} + +TEST_F(TableMetadataBuilderTest, AssignUUIDWithEmptyUUID) { + auto builder = TableMetadataBuilder::BuildFromEmpty(2); + builder->AssignUUID(""); + + ASSERT_THAT(builder->Build(), HasErrorMessage("Cannot assign empty UUID")); +} + +TEST_F(TableMetadataBuilderTest, AssignUUIDWithSameUUID) { + auto builder = TableMetadataBuilder::BuildFrom(base_metadata_.get()); + builder->AssignUUID("test-uuid-1234"); // Same UUID + + ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); + EXPECT_EQ(metadata->table_uuid, "test-uuid-1234"); +} + +TEST_F(TableMetadataBuilderTest, AssignUUIDWithAutoGenerate) { + auto builder = TableMetadataBuilder::BuildFromEmpty(2); + builder->AssignUUID(); // Auto-generate + + ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); + EXPECT_FALSE(metadata->table_uuid.empty()); +} + +TEST_F(TableMetadataBuilderTest, AssignUUIDAndCaseInsensitiveComparison) { + base_metadata_->table_uuid = "TEST-UUID-ABCD"; + auto builder = TableMetadataBuilder::BuildFrom(base_metadata_.get()); + builder->AssignUUID("test-uuid-abcd"); // Different case - should be no-op + + ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); + EXPECT_EQ(metadata->table_uuid, "TEST-UUID-ABCD"); // Original case preserved +} + +// ============================================================================ +// TableUpdate - ApplyTo Tests +// ============================================================================ + +TEST_F(TableMetadataBuilderTest, TableUpdateWithAssignUUID) { + auto builder = TableMetadataBuilder::BuildFromEmpty(2); + + table::AssignUUID update("apply-uuid"); + update.ApplyTo(*builder); + + ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); + EXPECT_EQ(metadata->table_uuid, "apply-uuid"); +} + +// ============================================================================ +// TableUpdate - GenerateRequirements Tests +// ============================================================================ + +TEST_F(TableMetadataBuilderTest, + TableUpdateWithAssignUUIDAndGenerateRequirementsForNewTable) { + table::AssignUUID update("new-uuid"); + + auto requirements = GenerateRequirements(update, nullptr); + EXPECT_TRUE(requirements.empty()); // No requirements for new table +} + +TEST_F(TableMetadataBuilderTest, + TableUpdateWithAssignUUIDAndGenerateRequirementsForExistingTable) { + table::AssignUUID update("new-uuid"); + + auto requirements = GenerateRequirements(update, base_metadata_.get()); + EXPECT_EQ(requirements.size(), 1); // Should generate AssertUUID requirement +} + +TEST_F(TableMetadataBuilderTest, + TableUpdateWithAssignUUIDAndGenerateRequirementsWithEmptyUUID) { + base_metadata_->table_uuid = ""; + table::AssignUUID update("new-uuid"); + + auto requirements = GenerateRequirements(update, base_metadata_.get()); + EXPECT_TRUE(requirements.empty()); // No requirement when base has no UUID +} + +// ============================================================================ +// TableRequirement - Validate Tests +// ============================================================================ + +TEST_F(TableMetadataBuilderTest, TableRequirementAssertUUIDSuccess) { + table::AssertUUID requirement("test-uuid-1234"); + + ASSERT_THAT(requirement.Validate(base_metadata_.get()), IsOk()); +} + +TEST_F(TableMetadataBuilderTest, TableRequirementAssertUUIDMismatch) { + table::AssertUUID requirement("wrong-uuid"); + + auto status = requirement.Validate(base_metadata_.get()); + EXPECT_THAT(status, IsError(ErrorKind::kCommitFailed)); + EXPECT_THAT(status, HasErrorMessage("UUID does not match")); +} + +TEST_F(TableMetadataBuilderTest, TableRequirementAssertUUIDNullBase) { + table::AssertUUID requirement("any-uuid"); + + auto status = requirement.Validate(nullptr); + EXPECT_THAT(status, IsError(ErrorKind::kCommitFailed)); + EXPECT_THAT(status, HasErrorMessage("metadata is missing")); +} + +TEST_F(TableMetadataBuilderTest, TableRequirementAssertUUIDCaseInsensitive) { + base_metadata_->table_uuid = "TEST-UUID-1234"; + table::AssertUUID requirement("test-uuid-1234"); + + ASSERT_THAT(requirement.Validate(base_metadata_.get()), IsOk()); +} + +// ============================================================================ +// Integration Tests - End-to-End Workflow +// ============================================================================ + +TEST_F(TableMetadataBuilderTest, IntegrationCreateTableWithUUID) { + auto builder = TableMetadataBuilder::BuildFromEmpty(2); + builder->AssignUUID("integration-test-uuid"); + + ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); + EXPECT_EQ(metadata->table_uuid, "integration-test-uuid"); + EXPECT_EQ(metadata->format_version, 2); +} + +TEST_F(TableMetadataBuilderTest, IntegrationOptimisticConcurrencyControl) { + table::AssignUUID update("new-uuid"); + + // Generate and validate requirements + auto requirements = GenerateRequirements(update, base_metadata_.get()); + for (const auto& req : requirements) { + auto val_status = req->Validate(base_metadata_.get()); + ASSERT_THAT(val_status, IsOk()) << "Requirement validation failed"; + } + + // Apply update and build + auto builder = TableMetadataBuilder::BuildFrom(base_metadata_.get()); + update.ApplyTo(*builder); + + ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); + ASSERT_NE(metadata, nullptr); +} + +} // namespace iceberg diff --git a/src/iceberg/util/string_util.h b/src/iceberg/util/string_util.h index a22aa7a5d..aafb740cd 100644 --- a/src/iceberg/util/string_util.h +++ b/src/iceberg/util/string_util.h @@ -45,7 +45,7 @@ class ICEBERG_EXPORT StringUtils { return input; } - static bool EqualsIgnoreCase(const std::string& lhs, const std::string& rhs) { + static bool EqualsIgnoreCase(std::string_view lhs, std::string_view rhs) { return std::ranges::equal( lhs, rhs, [](char lc, char rc) { return std::tolower(lc) == std::tolower(rc); }); }