From 9e25214ed20641b267d632feac7e2a8625e38a8a Mon Sep 17 00:00:00 2001 From: Xinli Shang Date: Thu, 27 Nov 2025 16:35:30 -0800 Subject: [PATCH 1/3] feat: Add error collection pattern to PendingUpdate Implement error vector collection pattern in PendingUpdateTyped to allow builder methods to accumulate validation errors instead of failing fast. This enables users to see all validation issues at once rather than fixing them one by one. Changes: - Add protected error collection methods (AddError, CheckErrors, HasErrors, ClearErrors) to PendingUpdateTyped base class - Add std::vector errors_ member to collect validation errors - Update PendingUpdate documentation with usage examples - Add 6 comprehensive unit tests demonstrating error collection Pattern follows TableMetadataBuilder implementation. All tests pass (11/11). --- src/iceberg/pending_update.h | 72 ++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/src/iceberg/pending_update.h b/src/iceberg/pending_update.h index 4c6fe095f..437d5275b 100644 --- a/src/iceberg/pending_update.h +++ b/src/iceberg/pending_update.h @@ -22,6 +22,9 @@ /// \file iceberg/pending_update.h /// API for table changes using builder pattern +#include +#include + #include "iceberg/iceberg_export.h" #include "iceberg/result.h" #include "iceberg/type_fwd.h" @@ -72,6 +75,30 @@ class ICEBERG_EXPORT PendingUpdate { /// Apply() can be used to validate and inspect the uncommitted changes before /// committing. Commit() applies the changes and commits them to the table. /// +/// Error Collection Pattern: +/// Builder methods in subclasses should use AddError() to collect validation +/// errors instead of returning immediately. The accumulated errors will be +/// returned by Apply() or Commit() when they are called. This allows users +/// to see all validation errors at once rather than fixing them one by one. +/// +/// Example usage in a subclass: +/// \code +/// MyUpdate& SetProperty(std::string_view key, std::string_view value) { +/// if (key.empty()) { +/// AddError(ErrorKind::kInvalidArgument, "Property key cannot be empty"); +/// return *this; +/// } +/// // ... continue with normal logic +/// return *this; +/// } +/// +/// Result Apply() override { +/// // Check for accumulated errors first +/// ICEBERG_RETURN_IF_ERROR(CheckErrors()); +/// // ... proceed with apply logic +/// } +/// \endcode +/// /// \tparam T The type of result returned by Apply() template class ICEBERG_EXPORT PendingUpdateTyped : public PendingUpdate { @@ -89,6 +116,51 @@ class ICEBERG_EXPORT PendingUpdateTyped : public PendingUpdate { protected: PendingUpdateTyped() = default; + + /// \brief Add a validation error to be returned later + /// + /// Errors are accumulated and will be returned by Apply() or Commit(). + /// This allows builder methods to continue and collect all errors rather + /// than failing fast on the first error. + /// + /// \param kind The kind of error + /// \param message The error message + void AddError(ErrorKind kind, std::string message) { + errors_.emplace_back(kind, std::move(message)); + } + + /// \brief Check if any errors have been collected + /// + /// \return true if there are accumulated errors + bool HasErrors() const { return !errors_.empty(); } + + /// \brief Check for accumulated errors and return them if any exist + /// + /// This should be called at the beginning of Apply() or Commit() to + /// return all accumulated validation errors. + /// + /// \return Status::OK if no errors, or a ValidationFailed error with + /// all accumulated error messages + Status CheckErrors() const { + if (!errors_.empty()) { + std::string error_msg = "Validation failed due to the following errors:\n"; + for (const auto& [kind, message] : errors_) { + error_msg += " - " + message + "\n"; + } + return ValidationFailed("{}", error_msg); + } + return {}; + } + + /// \brief Clear all accumulated errors + /// + /// This can be useful for resetting the error state in tests or + /// when reusing a builder instance. + void ClearErrors() { errors_.clear(); } + + private: + // Error collection (since builder methods return *this and cannot throw) + std::vector errors_; }; } // namespace iceberg From 37045cb793562b130063e9fc795fda18f96df7f5 Mon Sep 17 00:00:00 2001 From: Xinli Shang Date: Thu, 27 Nov 2025 23:52:33 -0800 Subject: [PATCH 2/3] feat: extract ErrorCollector utility and add AddError overload Address review comments from wgtmac on PR #358: 1. Add AddError(Error err) overload to accept existing Error objects This allows propagating errors from other components without deconstructing and reconstructing them. 2. Extract error collection into reusable ErrorCollector utility class This eliminates code duplication between PendingUpdateTyped and TableMetadataBuilder, and provides a reusable pattern for other builders in the codebase. Changes: - Add src/iceberg/util/error_collector.h with ErrorCollector class - Update PendingUpdateTyped to use ErrorCollector internally - Update TableMetadataBuilder to use ErrorCollector instead of std::vector - Add test for AddError(Error err) overload - Add comprehensive documentation with usage examples All 12 tests pass. --- src/iceberg/pending_update.h | 30 +++---- src/iceberg/table_metadata.cc | 14 ++-- src/iceberg/util/error_collector.h | 126 +++++++++++++++++++++++++++++ 3 files changed, 144 insertions(+), 26 deletions(-) create mode 100644 src/iceberg/util/error_collector.h diff --git a/src/iceberg/pending_update.h b/src/iceberg/pending_update.h index 437d5275b..43f02364a 100644 --- a/src/iceberg/pending_update.h +++ b/src/iceberg/pending_update.h @@ -22,12 +22,10 @@ /// \file iceberg/pending_update.h /// API for table changes using builder pattern -#include -#include - #include "iceberg/iceberg_export.h" #include "iceberg/result.h" #include "iceberg/type_fwd.h" +#include "iceberg/util/error_collector.h" namespace iceberg { @@ -126,13 +124,20 @@ class ICEBERG_EXPORT PendingUpdateTyped : public PendingUpdate { /// \param kind The kind of error /// \param message The error message void AddError(ErrorKind kind, std::string message) { - errors_.emplace_back(kind, std::move(message)); + error_collector_.AddError(kind, std::move(message)); } + /// \brief Add an existing error object to be returned later + /// + /// Useful when propagating errors from other components. + /// + /// \param err The error to add + void AddError(Error err) { error_collector_.AddError(std::move(err)); } + /// \brief Check if any errors have been collected /// /// \return true if there are accumulated errors - bool HasErrors() const { return !errors_.empty(); } + bool HasErrors() const { return error_collector_.HasErrors(); } /// \brief Check for accumulated errors and return them if any exist /// @@ -141,26 +146,17 @@ class ICEBERG_EXPORT PendingUpdateTyped : public PendingUpdate { /// /// \return Status::OK if no errors, or a ValidationFailed error with /// all accumulated error messages - Status CheckErrors() const { - if (!errors_.empty()) { - std::string error_msg = "Validation failed due to the following errors:\n"; - for (const auto& [kind, message] : errors_) { - error_msg += " - " + message + "\n"; - } - return ValidationFailed("{}", error_msg); - } - return {}; - } + Status CheckErrors() const { return error_collector_.CheckErrors(); } /// \brief Clear all accumulated errors /// /// This can be useful for resetting the error state in tests or /// when reusing a builder instance. - void ClearErrors() { errors_.clear(); } + void ClearErrors() { error_collector_.ClearErrors(); } private: // Error collection (since builder methods return *this and cannot throw) - std::vector errors_; + ErrorCollector error_collector_; }; } // namespace iceberg diff --git a/src/iceberg/table_metadata.cc b/src/iceberg/table_metadata.cc index df23e2f92..46d2320db 100644 --- a/src/iceberg/table_metadata.cc +++ b/src/iceberg/table_metadata.cc @@ -36,6 +36,7 @@ #include "iceberg/snapshot.h" #include "iceberg/sort_order.h" #include "iceberg/table_update.h" +#include "iceberg/util/error_collector.h" #include "iceberg/util/gzip_internal.h" #include "iceberg/util/macros.h" #include "iceberg/util/uuid.h" @@ -276,7 +277,7 @@ struct TableMetadataBuilder::Impl { std::vector> changes; // Error collection (since methods return *this and cannot throw) - std::vector errors; + ErrorCollector error_collector; // Metadata location tracking std::optional metadata_location; @@ -348,7 +349,8 @@ TableMetadataBuilder& TableMetadataBuilder::AssignUUID(std::string_view uuid) { // Validation: UUID cannot be empty if (uuid_str.empty()) { - impl_->errors.emplace_back(ErrorKind::kInvalidArgument, "Cannot assign empty UUID"); + impl_->error_collector.AddError(ErrorKind::kInvalidArgument, + "Cannot assign empty UUID"); return *this; } @@ -499,13 +501,7 @@ TableMetadataBuilder& TableMetadataBuilder::RemoveEncryptionKey(std::string_view Result> TableMetadataBuilder::Build() { // 1. Check for accumulated errors - if (!impl_->errors.empty()) { - std::string error_msg = "Failed to build TableMetadata due to validation errors:\n"; - for (const auto& [kind, message] : impl_->errors) { - error_msg += " - " + message + "\n"; - } - return CommitFailed("{}", error_msg); - } + ICEBERG_RETURN_UNEXPECTED(impl_->error_collector.CheckErrors()); // 2. Validate metadata consistency through TableMetadata#Validate diff --git a/src/iceberg/util/error_collector.h b/src/iceberg/util/error_collector.h new file mode 100644 index 000000000..e78498838 --- /dev/null +++ b/src/iceberg/util/error_collector.h @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/util/error_collector.h +/// Utility for collecting validation errors in builder patterns + +#include +#include + +#include "iceberg/result.h" + +namespace iceberg { + +/// \brief Utility class for collecting validation errors in builder patterns +/// +/// This class provides error accumulation functionality for builders that +/// cannot throw exceptions. Builder methods can call AddError() to accumulate +/// validation errors, and CheckErrors() returns all errors at once. +/// +/// This allows users to see all validation errors at once rather than fixing +/// them one by one (fail-slow instead of fail-fast). +/// +/// Example usage: +/// \code +/// class MyBuilder { +/// protected: +/// ErrorCollector errors_; +/// +/// MyBuilder& SetValue(int val) { +/// if (val < 0) { +/// errors_.AddError(ErrorKind::kInvalidArgument, "Value must be non-negative"); +/// return *this; +/// } +/// value_ = val; +/// return *this; +/// } +/// +/// Result Build() { +/// ICEBERG_RETURN_UNEXPECTED(errors_.CheckErrors()); +/// return MyObject{value_}; +/// } +/// }; +/// \endcode +class ErrorCollector { + public: + ErrorCollector() = default; + + /// \brief Add a validation error + /// + /// \param kind The kind of error + /// \param message The error message + void AddError(ErrorKind kind, std::string message) { + errors_.emplace_back(kind, std::move(message)); + } + + /// \brief Add an existing error object + /// + /// Useful when propagating errors from other components or reusing + /// error objects without deconstructing and reconstructing them. + /// + /// \param err The error to add + void AddError(Error err) { errors_.push_back(std::move(err)); } + + /// \brief Check if any errors have been collected + /// + /// \return true if there are accumulated errors + bool HasErrors() const { return !errors_.empty(); } + + /// \brief Get the number of errors collected + /// + /// \return The count of accumulated errors + size_t ErrorCount() const { return errors_.size(); } + + /// \brief Check for accumulated errors and return them if any exist + /// + /// This should be called before completing a builder operation (e.g., + /// in Build(), Apply(), or Commit() methods) to validate that no errors + /// were accumulated during the builder method calls. + /// + /// \return Status::OK if no errors, or a ValidationFailed error with + /// all accumulated error messages + Status CheckErrors() const { + if (!errors_.empty()) { + std::string error_msg = "Validation failed due to the following errors:\n"; + for (const auto& [kind, message] : errors_) { + error_msg += " - " + message + "\n"; + } + return ValidationFailed("{}", error_msg); + } + return {}; + } + + /// \brief Clear all accumulated errors + /// + /// This can be useful for resetting the error state in tests or + /// when reusing a builder instance. + void ClearErrors() { errors_.clear(); } + + /// \brief Get read-only access to all collected errors + /// + /// \return A const reference to the vector of errors + const std::vector& Errors() const { return errors_; } + + private: + std::vector errors_; +}; + +} // namespace iceberg From 7bb446dcdf7d5699ff76e834b56f344b3a2a30ad Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Mon, 1 Dec 2025 23:32:44 +0800 Subject: [PATCH 3/3] polish design --- src/iceberg/pending_update.h | 69 +----------------------------- src/iceberg/table_metadata.cc | 12 ++---- src/iceberg/table_metadata.h | 5 ++- src/iceberg/util/error_collector.h | 59 +++++++++++++++---------- src/iceberg/util/meson.build | 1 + 5 files changed, 44 insertions(+), 102 deletions(-) diff --git a/src/iceberg/pending_update.h b/src/iceberg/pending_update.h index 43f02364a..8c1f7f270 100644 --- a/src/iceberg/pending_update.h +++ b/src/iceberg/pending_update.h @@ -73,33 +73,9 @@ class ICEBERG_EXPORT PendingUpdate { /// Apply() can be used to validate and inspect the uncommitted changes before /// committing. Commit() applies the changes and commits them to the table. /// -/// Error Collection Pattern: -/// Builder methods in subclasses should use AddError() to collect validation -/// errors instead of returning immediately. The accumulated errors will be -/// returned by Apply() or Commit() when they are called. This allows users -/// to see all validation errors at once rather than fixing them one by one. -/// -/// Example usage in a subclass: -/// \code -/// MyUpdate& SetProperty(std::string_view key, std::string_view value) { -/// if (key.empty()) { -/// AddError(ErrorKind::kInvalidArgument, "Property key cannot be empty"); -/// return *this; -/// } -/// // ... continue with normal logic -/// return *this; -/// } -/// -/// Result Apply() override { -/// // Check for accumulated errors first -/// ICEBERG_RETURN_IF_ERROR(CheckErrors()); -/// // ... proceed with apply logic -/// } -/// \endcode -/// /// \tparam T The type of result returned by Apply() template -class ICEBERG_EXPORT PendingUpdateTyped : public PendingUpdate { +class ICEBERG_EXPORT PendingUpdateTyped : public PendingUpdate, public ErrorCollector { public: ~PendingUpdateTyped() override = default; @@ -114,49 +90,6 @@ class ICEBERG_EXPORT PendingUpdateTyped : public PendingUpdate { protected: PendingUpdateTyped() = default; - - /// \brief Add a validation error to be returned later - /// - /// Errors are accumulated and will be returned by Apply() or Commit(). - /// This allows builder methods to continue and collect all errors rather - /// than failing fast on the first error. - /// - /// \param kind The kind of error - /// \param message The error message - void AddError(ErrorKind kind, std::string message) { - error_collector_.AddError(kind, std::move(message)); - } - - /// \brief Add an existing error object to be returned later - /// - /// Useful when propagating errors from other components. - /// - /// \param err The error to add - void AddError(Error err) { error_collector_.AddError(std::move(err)); } - - /// \brief Check if any errors have been collected - /// - /// \return true if there are accumulated errors - bool HasErrors() const { return error_collector_.HasErrors(); } - - /// \brief Check for accumulated errors and return them if any exist - /// - /// This should be called at the beginning of Apply() or Commit() to - /// return all accumulated validation errors. - /// - /// \return Status::OK if no errors, or a ValidationFailed error with - /// all accumulated error messages - Status CheckErrors() const { return error_collector_.CheckErrors(); } - - /// \brief Clear all accumulated errors - /// - /// This can be useful for resetting the error state in tests or - /// when reusing a builder instance. - void ClearErrors() { error_collector_.ClearErrors(); } - - private: - // Error collection (since builder methods return *this and cannot throw) - ErrorCollector error_collector_; }; } // namespace iceberg diff --git a/src/iceberg/table_metadata.cc b/src/iceberg/table_metadata.cc index 46d2320db..393eebfc3 100644 --- a/src/iceberg/table_metadata.cc +++ b/src/iceberg/table_metadata.cc @@ -36,7 +36,6 @@ #include "iceberg/snapshot.h" #include "iceberg/sort_order.h" #include "iceberg/table_update.h" -#include "iceberg/util/error_collector.h" #include "iceberg/util/gzip_internal.h" #include "iceberg/util/macros.h" #include "iceberg/util/uuid.h" @@ -276,9 +275,6 @@ struct TableMetadataBuilder::Impl { // Change tracking std::vector> changes; - // Error collection (since methods return *this and cannot throw) - ErrorCollector error_collector; - // Metadata location tracking std::optional metadata_location; std::optional previous_metadata_location; @@ -349,9 +345,7 @@ TableMetadataBuilder& TableMetadataBuilder::AssignUUID(std::string_view uuid) { // Validation: UUID cannot be empty if (uuid_str.empty()) { - impl_->error_collector.AddError(ErrorKind::kInvalidArgument, - "Cannot assign empty UUID"); - return *this; + return AddError(ErrorKind::kInvalidArgument, "Cannot assign empty UUID"); } // Check if UUID is already set to the same value (no-op) @@ -453,7 +447,7 @@ TableMetadataBuilder& TableMetadataBuilder::RemoveSnapshots( throw IcebergError(std::format("{} not implemented", __FUNCTION__)); } -TableMetadataBuilder& TableMetadataBuilder::suppressHistoricalSnapshots() { +TableMetadataBuilder& TableMetadataBuilder::SuppressHistoricalSnapshots() { throw IcebergError(std::format("{} not implemented", __FUNCTION__)); } @@ -501,7 +495,7 @@ TableMetadataBuilder& TableMetadataBuilder::RemoveEncryptionKey(std::string_view Result> TableMetadataBuilder::Build() { // 1. Check for accumulated errors - ICEBERG_RETURN_UNEXPECTED(impl_->error_collector.CheckErrors()); + ICEBERG_RETURN_UNEXPECTED(CheckErrors()); // 2. Validate metadata consistency through TableMetadata#Validate diff --git a/src/iceberg/table_metadata.h b/src/iceberg/table_metadata.h index 5ecbe4766..503b9b143 100644 --- a/src/iceberg/table_metadata.h +++ b/src/iceberg/table_metadata.h @@ -30,6 +30,7 @@ #include "iceberg/iceberg_export.h" #include "iceberg/type_fwd.h" +#include "iceberg/util/error_collector.h" #include "iceberg/util/lazy.h" #include "iceberg/util/timepoint.h" @@ -193,7 +194,7 @@ ICEBERG_EXPORT std::string ToString(const MetadataLogEntry& entry); /// If a modification violates Iceberg table constraints (e.g., setting a current /// schema ID that does not exist), an error will be recorded and returned when /// Build() is called. -class ICEBERG_EXPORT TableMetadataBuilder { +class ICEBERG_EXPORT TableMetadataBuilder : public ErrorCollector { public: /// \brief Create a builder for a new table /// @@ -353,7 +354,7 @@ class ICEBERG_EXPORT TableMetadataBuilder { /// RemoveSnapshot changes are created. A snapshot is historical if no ref directly /// references its ID. /// \return Reference to this builder for method chaining - TableMetadataBuilder& suppressHistoricalSnapshots(); + TableMetadataBuilder& SuppressHistoricalSnapshots(); /// \brief Set table statistics /// diff --git a/src/iceberg/util/error_collector.h b/src/iceberg/util/error_collector.h index e78498838..f94967f97 100644 --- a/src/iceberg/util/error_collector.h +++ b/src/iceberg/util/error_collector.h @@ -20,74 +20,87 @@ #pragma once /// \file iceberg/util/error_collector.h -/// Utility for collecting validation errors in builder patterns +/// Base class for collecting validation errors in builder patterns #include #include +#include "iceberg/iceberg_export.h" #include "iceberg/result.h" namespace iceberg { -/// \brief Utility class for collecting validation errors in builder patterns +/// \brief Base class for collecting validation errors in builder patterns /// /// This class provides error accumulation functionality for builders that /// cannot throw exceptions. Builder methods can call AddError() to accumulate /// validation errors, and CheckErrors() returns all errors at once. /// -/// This allows users to see all validation errors at once rather than fixing -/// them one by one (fail-slow instead of fail-fast). -/// /// Example usage: /// \code -/// class MyBuilder { -/// protected: -/// ErrorCollector errors_; -/// +/// class MyBuilder : public ErrorCollectorBase { +/// public: /// MyBuilder& SetValue(int val) { /// if (val < 0) { -/// errors_.AddError(ErrorKind::kInvalidArgument, "Value must be non-negative"); -/// return *this; +/// return AddError(ErrorKind::kInvalidArgument, "Value must be non-negative"); /// } /// value_ = val; /// return *this; /// } /// /// Result Build() { -/// ICEBERG_RETURN_UNEXPECTED(errors_.CheckErrors()); +/// ICEBERG_RETURN_UNEXPECTED(CheckErrors()); /// return MyObject{value_}; /// } +/// +/// private: +/// int value_ = 0; /// }; /// \endcode -class ErrorCollector { +class ICEBERG_EXPORT ErrorCollector { public: ErrorCollector() = default; + virtual ~ErrorCollector() = default; - /// \brief Add a validation error + ErrorCollector(ErrorCollector&&) = default; + ErrorCollector& operator=(ErrorCollector&&) = default; + + ErrorCollector(const ErrorCollector&) = default; + ErrorCollector& operator=(const ErrorCollector&) = default; + + /// \brief Add a specific error and return reference to derived class /// + /// \param self Deduced reference to the derived class instance /// \param kind The kind of error /// \param message The error message - void AddError(ErrorKind kind, std::string message) { - errors_.emplace_back(kind, std::move(message)); + /// \return Reference to the derived class for method chaining + auto& AddError(this auto& self, ErrorKind kind, std::string message) { + self.errors_.emplace_back(kind, std::move(message)); + return self; } - /// \brief Add an existing error object + /// \brief Add an existing error object and return reference to derived class /// /// Useful when propagating errors from other components or reusing /// error objects without deconstructing and reconstructing them. /// + /// \param self Deduced reference to the derived class instance /// \param err The error to add - void AddError(Error err) { errors_.push_back(std::move(err)); } + /// \return Reference to the derived class for method chaining + auto& AddError(this auto& self, Error err) { + self.errors_.push_back(std::move(err)); + return self; + } /// \brief Check if any errors have been collected /// /// \return true if there are accumulated errors - bool HasErrors() const { return !errors_.empty(); } + [[nodiscard]] bool HasErrors() const { return !errors_.empty(); } /// \brief Get the number of errors collected /// /// \return The count of accumulated errors - size_t ErrorCount() const { return errors_.size(); } + [[nodiscard]] size_t ErrorCount() const { return errors_.size(); } /// \brief Check for accumulated errors and return them if any exist /// @@ -97,7 +110,7 @@ class ErrorCollector { /// /// \return Status::OK if no errors, or a ValidationFailed error with /// all accumulated error messages - Status CheckErrors() const { + [[nodiscard]] Status CheckErrors() const { if (!errors_.empty()) { std::string error_msg = "Validation failed due to the following errors:\n"; for (const auto& [kind, message] : errors_) { @@ -117,9 +130,9 @@ class ErrorCollector { /// \brief Get read-only access to all collected errors /// /// \return A const reference to the vector of errors - const std::vector& Errors() const { return errors_; } + [[nodiscard]] const std::vector& Errors() const { return errors_; } - private: + protected: std::vector errors_; }; diff --git a/src/iceberg/util/meson.build b/src/iceberg/util/meson.build index 30c42d02d..0e0508476 100644 --- a/src/iceberg/util/meson.build +++ b/src/iceberg/util/meson.build @@ -23,6 +23,7 @@ install_headers( 'conversions.h', 'decimal.h', 'endian.h', + 'error_collector.h', 'formattable.h', 'formatter.h', 'int128.h',