From 5dfb60661390fe4f887bc287a01e75a6ec0a86ed Mon Sep 17 00:00:00 2001 From: HeartLinked Date: Fri, 15 Aug 2025 17:03:42 +0800 Subject: [PATCH 01/16] fix: implement manifest entry metadata inheritance system - Add InheritableMetadata interface and implementations for applying metadata inheritance rules to manifest entries - Fix ManifestReader::Make methods to properly handle Result types from InheritableMetadataFactory methods - Move constructor/destructor definitions from header to implementation to avoid redefinition errors - Add missing ManifestListReaderImpl::Files() method implementation - Update method names from MakeReader to Make for consistency This ensures proper inheritance of snapshot_id, sequence_number, and file_sequence_number from manifest metadata to individual entries according to Iceberg specification. --- src/iceberg/inheritable_metadata.cc | 102 ++++++++++++++++++++ src/iceberg/inheritable_metadata.h | 118 ++++++++++++++++++++++++ src/iceberg/manifest_reader.cc | 35 ++++++- src/iceberg/manifest_reader.h | 15 ++- src/iceberg/manifest_reader_internal.cc | 7 ++ src/iceberg/manifest_reader_internal.h | 17 +++- 6 files changed, 284 insertions(+), 10 deletions(-) create mode 100644 src/iceberg/inheritable_metadata.cc create mode 100644 src/iceberg/inheritable_metadata.h diff --git a/src/iceberg/inheritable_metadata.cc b/src/iceberg/inheritable_metadata.cc new file mode 100644 index 000000000..944fd2e72 --- /dev/null +++ b/src/iceberg/inheritable_metadata.cc @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/inheritable_metadata.h" + +#include +#include + +#include + +#include "iceberg/manifest_entry.h" + +namespace iceberg { + +BaseInheritableMetadata::BaseInheritableMetadata(int32_t spec_id, int64_t snapshot_id, + int64_t sequence_number, + std::string manifest_location) + : spec_id_(spec_id), + snapshot_id_(snapshot_id), + sequence_number_(sequence_number), + manifest_location_(std::move(manifest_location)) {} + +Result BaseInheritableMetadata::Apply(ManifestEntry entry) { + if (!entry.snapshot_id.has_value()) { + entry.snapshot_id = snapshot_id_; + } + + // In v1 tables, the data sequence number is not persisted and can be safely defaulted + // to 0 In v2 tables, the data sequence number should be inherited iff the entry status + // is ADDED + if (!entry.sequence_number.has_value() && + (sequence_number_ == 0 || entry.status == ManifestStatus::kAdded)) { + entry.sequence_number = sequence_number_; + } + + // In v1 tables, the file sequence number is not persisted and can be safely defaulted + // to 0 In v2 tables, the file sequence number should be inherited iff the entry status + // is ADDED + if (!entry.file_sequence_number.has_value() && + (sequence_number_ == 0 || entry.status == ManifestStatus::kAdded)) { + entry.file_sequence_number = sequence_number_; + } + + if (entry.data_file) { + entry.data_file->partition_spec_id = spec_id_; + } + + return entry; +} + +Result EmptyInheritableMetadata::Apply(ManifestEntry entry) { + return entry; +} + +CopyInheritableMetadata::CopyInheritableMetadata(int64_t snapshot_id) + : snapshot_id_(snapshot_id) {} + +Result CopyInheritableMetadata::Apply(ManifestEntry entry) { + if (!entry.snapshot_id.has_value()) { + entry.snapshot_id = snapshot_id_; + } + return entry; +} + +Result> InheritableMetadataFactory::Empty() { + return std::make_unique(); +} + +Result> InheritableMetadataFactory::FromManifest( + const ManifestFile& manifest) { + // Validate that the manifest has a snapshot ID assigned + if (manifest.added_snapshot_id == Snapshot::kInvalidSnapshotId) { + return InvalidArgument("Manifest file {} has no snapshot ID", manifest.manifest_path); + } + + return std::make_unique( + manifest.partition_spec_id, manifest.added_snapshot_id, manifest.sequence_number, + manifest.manifest_path); +} + +Result> InheritableMetadataFactory::ForCopy( + int64_t snapshot_id) { + return std::make_unique(snapshot_id); +} + +} // namespace iceberg diff --git a/src/iceberg/inheritable_metadata.h b/src/iceberg/inheritable_metadata.h new file mode 100644 index 000000000..76e3c6241 --- /dev/null +++ b/src/iceberg/inheritable_metadata.h @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/inheritable_metadata.h +/// Metadata inheritance system for manifest entries. + +#include +#include +#include + +#include + +#include "iceberg/iceberg_export.h" +#include "iceberg/manifest_entry.h" +#include "iceberg/manifest_list.h" + +namespace iceberg { + +/// \brief Interface for applying inheritable metadata to manifest entries. +/// +/// When manifest entries have null values for certain fields (snapshot_id, +/// data sequence number, file sequence number), these values should be inherited +/// from the manifest file. This interface provides a way to apply such inheritance rules. +class ICEBERG_EXPORT InheritableMetadata { + public: + virtual ~InheritableMetadata() = default; + + /// \brief Apply inheritable metadata to a manifest entry. + /// \param entry The manifest entry to modify. + /// \return The modified manifest entry with inherited metadata applied. + virtual Result Apply(ManifestEntry entry) = 0; +}; + +/// \brief Base implementation of InheritableMetadata that handles standard inheritance +/// rules. +class ICEBERG_EXPORT BaseInheritableMetadata : public InheritableMetadata { + public: + /// \brief Constructor for base inheritable metadata. + /// \param spec_id Partition spec ID from the manifest. + /// \param snapshot_id Snapshot ID from the manifest. + /// \param sequence_number Sequence number from the manifest. + /// \param manifest_location Path to the manifest file. + BaseInheritableMetadata(int32_t spec_id, int64_t snapshot_id, int64_t sequence_number, + std::string manifest_location); + + /// \brief Apply inheritance rules to a manifest entry. + /// \param entry The manifest entry to modify. + /// \return The modified manifest entry. + Result Apply(ManifestEntry entry) override; + + private: + int32_t spec_id_; + int64_t snapshot_id_; + int64_t sequence_number_; + std::string manifest_location_; +}; + +/// \brief Empty implementation that applies no inheritance. +class ICEBERG_EXPORT EmptyInheritableMetadata : public InheritableMetadata { + public: + /// \brief Apply no inheritance - returns the entry unchanged. + Result Apply(ManifestEntry entry) override; +}; + +/// \brief Metadata inheritance for copying manifests before commit. +class ICEBERG_EXPORT CopyInheritableMetadata : public InheritableMetadata { + public: + /// \brief Constructor for copy metadata. + /// \param snapshot_id The snapshot ID to use for copying. + explicit CopyInheritableMetadata(int64_t snapshot_id); + + /// \brief Apply copy inheritance rules. + Result Apply(ManifestEntry entry) override; + + private: + int64_t snapshot_id_; +}; + +/// \brief Factory for creating InheritableMetadata instances. +class ICEBERG_EXPORT InheritableMetadataFactory { + public: + /// \brief Create an empty metadata instance that applies no inheritance. + static Result> Empty(); + + /// \brief Create metadata instance from a manifest file. + /// \param manifest The manifest file to extract metadata from. + /// \return Inheritable metadata based on the manifest. + static Result> FromManifest( + const ManifestFile& manifest); + + /// \brief Create metadata instance for rewriting a manifest before commit. + /// \param snapshot_id The snapshot ID for the copy operation. + /// \return Inheritable metadata for copying. + static Result> ForCopy(int64_t snapshot_id); + + private: + InheritableMetadataFactory() = default; +}; + +} // namespace iceberg diff --git a/src/iceberg/manifest_reader.cc b/src/iceberg/manifest_reader.cc index 208263d57..b4ae2b1fd 100644 --- a/src/iceberg/manifest_reader.cc +++ b/src/iceberg/manifest_reader.cc @@ -19,6 +19,7 @@ #include "iceberg/manifest_reader.h" +#include "iceberg/file_reader.h" #include "iceberg/manifest_entry.h" #include "iceberg/manifest_list.h" #include "iceberg/manifest_reader_internal.h" @@ -27,7 +28,33 @@ namespace iceberg { -Result> ManifestReader::MakeReader( +Result> ManifestReader::Make( + const ManifestFile& manifest, std::shared_ptr file_io, + std::shared_ptr partition_schema) { + // Validate manifest content type - only DATA manifests are supported by ManifestReader + if (manifest.content != ManifestFile::Content::kData) { + return InvalidArgument("Cannot read a delete manifest with a ManifestReader: {}", + manifest.manifest_path); + } + + auto manifest_entry_schema = ManifestEntry::TypeFromPartitionType(partition_schema); + auto fields_span = manifest_entry_schema->fields(); + std::vector fields(fields_span.begin(), fields_span.end()); + auto schema = std::make_shared(fields); + ICEBERG_ASSIGN_OR_RAISE( + auto reader, + ReaderFactoryRegistry::Open(FileFormatType::kAvro, {.path = manifest.manifest_path, + .io = std::move(file_io), + .projection = schema})); + // Create inheritable metadata for this manifest + ICEBERG_ASSIGN_OR_RAISE(auto inheritable_metadata, + InheritableMetadataFactory::FromManifest(manifest)); + + return std::make_unique(std::move(reader), std::move(schema), + std::move(inheritable_metadata)); +} + +Result> ManifestReader::Make( std::string_view manifest_location, std::shared_ptr file_io, std::shared_ptr partition_schema) { auto manifest_entry_schema = ManifestEntry::TypeFromPartitionType(partition_schema); @@ -39,10 +66,12 @@ Result> ManifestReader::MakeReader( {.path = std::string(manifest_location), .io = std::move(file_io), .projection = schema})); - return std::make_unique(std::move(reader), std::move(schema)); + ICEBERG_ASSIGN_OR_RAISE(auto inheritable_metadata, InheritableMetadataFactory::Empty()); + return std::make_unique(std::move(reader), std::move(schema), + std::move(inheritable_metadata)); } -Result> ManifestListReader::MakeReader( +Result> ManifestListReader::Make( std::string_view manifest_list_location, std::shared_ptr file_io) { std::vector fields(ManifestFile::Type().fields().begin(), ManifestFile::Type().fields().end()); diff --git a/src/iceberg/manifest_reader.h b/src/iceberg/manifest_reader.h index 5d231de0f..fa13c56fa 100644 --- a/src/iceberg/manifest_reader.h +++ b/src/iceberg/manifest_reader.h @@ -27,6 +27,7 @@ #include "iceberg/file_reader.h" #include "iceberg/iceberg_export.h" +#include "iceberg/manifest_list.h" #include "iceberg/type_fwd.h" namespace iceberg { @@ -37,11 +38,21 @@ class ICEBERG_EXPORT ManifestReader { virtual ~ManifestReader() = default; virtual Result> Entries() const = 0; + /// \brief Creates a reader for a manifest file. + /// \param manifest A ManifestFile object containing metadata about the manifest. + /// \param file_io File IO implementation to use. + /// \param partition_schema Schema for the partition. + /// \return A Result containing the reader or an error. + static Result> Make( + const ManifestFile& manifest, std::shared_ptr file_io, + std::shared_ptr partition_schema); + /// \brief Creates a reader for a manifest file. /// \param manifest_location Path to the manifest file. /// \param file_io File IO implementation to use. + /// \param partition_schema Schema for the partition. /// \return A Result containing the reader or an error. - static Result> MakeReader( + static Result> Make( std::string_view manifest_location, std::shared_ptr file_io, std::shared_ptr partition_schema); }; @@ -56,7 +67,7 @@ class ICEBERG_EXPORT ManifestListReader { /// \param manifest_list_location Path to the manifest list file. /// \param file_io File IO implementation to use. /// \return A Result containing the reader or an error. - static Result> MakeReader( + static Result> Make( std::string_view manifest_list_location, std::shared_ptr file_io); }; diff --git a/src/iceberg/manifest_reader_internal.cc b/src/iceberg/manifest_reader_internal.cc index e3b9becc7..1267e9b32 100644 --- a/src/iceberg/manifest_reader_internal.cc +++ b/src/iceberg/manifest_reader_internal.cc @@ -30,6 +30,7 @@ #include "iceberg/schema.h" #include "iceberg/type.h" #include "iceberg/util/macros.h" +#include "iceberg/util/visit_type.h" namespace iceberg { @@ -543,6 +544,12 @@ Result> ManifestReaderImpl::Entries() const { break; } } + + // Apply inheritance to all entries + for (auto& entry : manifest_entries) { + ICEBERG_ASSIGN_OR_RAISE(entry, inheritable_metadata_->Apply(std::move(entry))); + } + return manifest_entries; } diff --git a/src/iceberg/manifest_reader_internal.h b/src/iceberg/manifest_reader_internal.h index a367ede2f..93f7c663c 100644 --- a/src/iceberg/manifest_reader_internal.h +++ b/src/iceberg/manifest_reader_internal.h @@ -22,6 +22,8 @@ /// \file iceberg/internal/manifest_reader_internal.h /// Reader implementation for manifest list files and manifest files. +#include "iceberg/file_reader.h" +#include "iceberg/inheritable_metadata.h" #include "iceberg/manifest_reader.h" namespace iceberg { @@ -29,23 +31,28 @@ namespace iceberg { /// \brief Read manifest entries from a manifest file. class ManifestReaderImpl : public ManifestReader { public: - explicit ManifestReaderImpl(std::unique_ptr reader, - std::shared_ptr schema) - : schema_(std::move(schema)), reader_(std::move(reader)) {} + ManifestReaderImpl(std::unique_ptr reader, std::shared_ptr schema, + std::unique_ptr inheritable_metadata) + : schema_(std::move(schema)), + reader_(std::move(reader)), + inheritable_metadata_(std::move(inheritable_metadata)) {} + + ManifestReaderImpl() = default; Result> Entries() const override; private: std::shared_ptr schema_; std::unique_ptr reader_; + std::unique_ptr inheritable_metadata_; }; /// \brief Read manifest files from a manifest list file. class ManifestListReaderImpl : public ManifestListReader { public: - explicit ManifestListReaderImpl(std::unique_ptr reader, - std::shared_ptr schema) + ManifestListReaderImpl(std::unique_ptr reader, std::shared_ptr schema) : schema_(std::move(schema)), reader_(std::move(reader)) {} + ManifestListReaderImpl() = default; Result> Files() const override; From a4cecb490a953590ba65b96681bee662929ee2bf Mon Sep 17 00:00:00 2001 From: HeartLinked Date: Fri, 15 Aug 2025 17:09:22 +0800 Subject: [PATCH 02/16] fix: add explicit keyword and update method calls - Add explicit keyword to ManifestReaderImpl and ManifestListReaderImpl constructors to prevent unintended implicit conversions - Update table_scan.cc to use the new Make method names instead of deprecated MakeReader methods --- src/iceberg/manifest_reader_internal.h | 8 +++++--- src/iceberg/table_scan.cc | 8 ++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/iceberg/manifest_reader_internal.h b/src/iceberg/manifest_reader_internal.h index 93f7c663c..78cd18496 100644 --- a/src/iceberg/manifest_reader_internal.h +++ b/src/iceberg/manifest_reader_internal.h @@ -31,8 +31,9 @@ namespace iceberg { /// \brief Read manifest entries from a manifest file. class ManifestReaderImpl : public ManifestReader { public: - ManifestReaderImpl(std::unique_ptr reader, std::shared_ptr schema, - std::unique_ptr inheritable_metadata) + explicit ManifestReaderImpl(std::unique_ptr reader, + std::shared_ptr schema, + std::unique_ptr inheritable_metadata) : schema_(std::move(schema)), reader_(std::move(reader)), inheritable_metadata_(std::move(inheritable_metadata)) {} @@ -50,7 +51,8 @@ class ManifestReaderImpl : public ManifestReader { /// \brief Read manifest files from a manifest list file. class ManifestListReaderImpl : public ManifestListReader { public: - ManifestListReaderImpl(std::unique_ptr reader, std::shared_ptr schema) + explicit ManifestListReaderImpl(std::unique_ptr reader, + std::shared_ptr schema) : schema_(std::move(schema)), reader_(std::move(reader)) {} ManifestListReaderImpl() = default; diff --git a/src/iceberg/table_scan.cc b/src/iceberg/table_scan.cc index 43deb2aca..a7edd5d79 100644 --- a/src/iceberg/table_scan.cc +++ b/src/iceberg/table_scan.cc @@ -147,7 +147,7 @@ DataTableScan::DataTableScan(TableScanContext context, std::shared_ptr f Result>> DataTableScan::PlanFiles() const { ICEBERG_ASSIGN_OR_RAISE( auto manifest_list_reader, - ManifestListReader::MakeReader(context_.snapshot->manifest_list, file_io_)); + ManifestListReader::Make(context_.snapshot->manifest_list, file_io_)); ICEBERG_ASSIGN_OR_RAISE(auto manifest_files, manifest_list_reader->Files()); std::vector> tasks; @@ -155,9 +155,9 @@ Result>> DataTableScan::PlanFiles() co auto partition_schema = partition_spec->schema(); for (const auto& manifest_file : manifest_files) { - ICEBERG_ASSIGN_OR_RAISE(auto manifest_reader, - ManifestReader::MakeReader(manifest_file.manifest_path, - file_io_, partition_schema)); + ICEBERG_ASSIGN_OR_RAISE( + auto manifest_reader, + ManifestReader::Make(manifest_file, file_io_, partition_schema)); ICEBERG_ASSIGN_OR_RAISE(auto manifests, manifest_reader->Entries()); // TODO(gty404): filter manifests using partition spec and filter expression From 65473166c4a212d78118b0d3a6793a8fe8508a03 Mon Sep 17 00:00:00 2001 From: HeartLinked Date: Fri, 15 Aug 2025 17:10:16 +0800 Subject: [PATCH 03/16] fix --- src/iceberg/CMakeLists.txt | 1 + test/manifest_reader_test.cc | 3 +-- test_detail.xml | 7 +++++++ 3 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 test_detail.xml diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index 316b02cee..b509c2579 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -24,6 +24,7 @@ set(ICEBERG_SOURCES expression/literal.cc file_reader.cc file_writer.cc + inheritable_metadata.cc json_internal.cc manifest_entry.cc manifest_list.cc diff --git a/test/manifest_reader_test.cc b/test/manifest_reader_test.cc index 55fbdd8e5..3288a85f6 100644 --- a/test/manifest_reader_test.cc +++ b/test/manifest_reader_test.cc @@ -105,8 +105,7 @@ TEST_F(ManifestReaderV1Test, V1PartitionedBasicTest) { auto partition_schema = std::make_shared(std::vector({partition_field})); std::string path = GetResourcePath("56357cd7-391f-4df8-aa24-e7e667da8870-m4.avro"); - auto manifest_reader_result = - ManifestReader::MakeReader(path, file_io_, partition_schema); + auto manifest_reader_result = ManifestReader::Make(path, file_io_, partition_schema); ASSERT_EQ(manifest_reader_result.has_value(), true) << manifest_reader_result.error().message; auto manifest_reader = std::move(manifest_reader_result.value()); diff --git a/test_detail.xml b/test_detail.xml new file mode 100644 index 000000000..071ddd3f8 --- /dev/null +++ b/test_detail.xml @@ -0,0 +1,7 @@ + + + + + + + From cb617b5df3139c0f809009f349ee1e5ad4d9b01c Mon Sep 17 00:00:00 2001 From: HeartLinked Date: Fri, 15 Aug 2025 17:24:07 +0800 Subject: [PATCH 04/16] fix: update method name in manifest_list_reader_test.cc --- test/manifest_list_reader_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/manifest_list_reader_test.cc b/test/manifest_list_reader_test.cc index 09984f389..a3c08c35c 100644 --- a/test/manifest_list_reader_test.cc +++ b/test/manifest_list_reader_test.cc @@ -43,7 +43,7 @@ class ManifestListReaderTestBase : public TempFileTestBase { void TestManifestListReading(const std::string& resource_name, const std::vector& expected_manifest_list) { std::string path = GetResourcePath(resource_name); - auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_); + auto manifest_reader_result = ManifestListReader::Make(path, file_io_); ASSERT_EQ(manifest_reader_result.has_value(), true); auto manifest_reader = std::move(manifest_reader_result.value()); From 7ef7aa4898480df2f32bc17bb1ad82c0087496eb Mon Sep 17 00:00:00 2001 From: HeartLinked Date: Fri, 15 Aug 2025 18:01:49 +0800 Subject: [PATCH 05/16] remove file --- test_detail.xml | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 test_detail.xml diff --git a/test_detail.xml b/test_detail.xml deleted file mode 100644 index 071ddd3f8..000000000 --- a/test_detail.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - From d7b8a8511656cafa553915e63b402a0a0af8f389 Mon Sep 17 00:00:00 2001 From: HeartLinked Date: Mon, 18 Aug 2025 10:49:30 +0800 Subject: [PATCH 06/16] fix review --- src/iceberg/inheritable_metadata.cc | 8 +++----- src/iceberg/inheritable_metadata.h | 16 ++++++++++------ src/iceberg/manifest_reader.cc | 15 +++++---------- src/iceberg/manifest_reader.h | 1 - src/iceberg/manifest_reader_internal.cc | 5 +---- src/iceberg/manifest_reader_internal.h | 3 --- 6 files changed, 19 insertions(+), 29 deletions(-) diff --git a/src/iceberg/inheritable_metadata.cc b/src/iceberg/inheritable_metadata.cc index 944fd2e72..8f0410b40 100644 --- a/src/iceberg/inheritable_metadata.cc +++ b/src/iceberg/inheritable_metadata.cc @@ -22,8 +22,6 @@ #include #include -#include - #include "iceberg/manifest_entry.h" namespace iceberg { @@ -36,7 +34,7 @@ BaseInheritableMetadata::BaseInheritableMetadata(int32_t spec_id, int64_t snapsh sequence_number_(sequence_number), manifest_location_(std::move(manifest_location)) {} -Result BaseInheritableMetadata::Apply(ManifestEntry entry) { +Result BaseInheritableMetadata::Apply(ManifestEntry& entry) { if (!entry.snapshot_id.has_value()) { entry.snapshot_id = snapshot_id_; } @@ -64,14 +62,14 @@ Result BaseInheritableMetadata::Apply(ManifestEntry entry) { return entry; } -Result EmptyInheritableMetadata::Apply(ManifestEntry entry) { +Result EmptyInheritableMetadata::Apply(ManifestEntry& entry) { return entry; } CopyInheritableMetadata::CopyInheritableMetadata(int64_t snapshot_id) : snapshot_id_(snapshot_id) {} -Result CopyInheritableMetadata::Apply(ManifestEntry entry) { +Result CopyInheritableMetadata::Apply(ManifestEntry& entry) { if (!entry.snapshot_id.has_value()) { entry.snapshot_id = snapshot_id_; } diff --git a/src/iceberg/inheritable_metadata.h b/src/iceberg/inheritable_metadata.h index 76e3c6241..6f4f19493 100644 --- a/src/iceberg/inheritable_metadata.h +++ b/src/iceberg/inheritable_metadata.h @@ -44,9 +44,9 @@ class ICEBERG_EXPORT InheritableMetadata { virtual ~InheritableMetadata() = default; /// \brief Apply inheritable metadata to a manifest entry. - /// \param entry The manifest entry to modify. + /// \param entry The manifest entry to modify in-place. /// \return The modified manifest entry with inherited metadata applied. - virtual Result Apply(ManifestEntry entry) = 0; + virtual Result Apply(ManifestEntry& entry) = 0; }; /// \brief Base implementation of InheritableMetadata that handles standard inheritance @@ -62,9 +62,9 @@ class ICEBERG_EXPORT BaseInheritableMetadata : public InheritableMetadata { std::string manifest_location); /// \brief Apply inheritance rules to a manifest entry. - /// \param entry The manifest entry to modify. + /// \param entry The manifest entry to modify in-place. /// \return The modified manifest entry. - Result Apply(ManifestEntry entry) override; + Result Apply(ManifestEntry& entry) override; private: int32_t spec_id_; @@ -77,7 +77,9 @@ class ICEBERG_EXPORT BaseInheritableMetadata : public InheritableMetadata { class ICEBERG_EXPORT EmptyInheritableMetadata : public InheritableMetadata { public: /// \brief Apply no inheritance - returns the entry unchanged. - Result Apply(ManifestEntry entry) override; + /// \param entry The manifest entry (unchanged). + /// \return The manifest entry. + Result Apply(ManifestEntry& entry) override; }; /// \brief Metadata inheritance for copying manifests before commit. @@ -88,7 +90,9 @@ class ICEBERG_EXPORT CopyInheritableMetadata : public InheritableMetadata { explicit CopyInheritableMetadata(int64_t snapshot_id); /// \brief Apply copy inheritance rules. - Result Apply(ManifestEntry entry) override; + /// \param entry The manifest entry to modify in-place. + /// \return The modified manifest entry. + Result Apply(ManifestEntry& entry) override; private: int64_t snapshot_id_; diff --git a/src/iceberg/manifest_reader.cc b/src/iceberg/manifest_reader.cc index b4ae2b1fd..fc09b794e 100644 --- a/src/iceberg/manifest_reader.cc +++ b/src/iceberg/manifest_reader.cc @@ -19,7 +19,8 @@ #include "iceberg/manifest_reader.h" -#include "iceberg/file_reader.h" +#include + #include "iceberg/manifest_entry.h" #include "iceberg/manifest_list.h" #include "iceberg/manifest_reader_internal.h" @@ -31,16 +32,10 @@ namespace iceberg { Result> ManifestReader::Make( const ManifestFile& manifest, std::shared_ptr file_io, std::shared_ptr partition_schema) { - // Validate manifest content type - only DATA manifests are supported by ManifestReader - if (manifest.content != ManifestFile::Content::kData) { - return InvalidArgument("Cannot read a delete manifest with a ManifestReader: {}", - manifest.manifest_path); - } - auto manifest_entry_schema = ManifestEntry::TypeFromPartitionType(partition_schema); - auto fields_span = manifest_entry_schema->fields(); - std::vector fields(fields_span.begin(), fields_span.end()); - auto schema = std::make_shared(fields); + auto schema_unique = FromStructType(std::move(*manifest_entry_schema), std::nullopt); + auto schema = std::shared_ptr(std::move(schema_unique)); + ICEBERG_ASSIGN_OR_RAISE( auto reader, ReaderFactoryRegistry::Open(FileFormatType::kAvro, {.path = manifest.manifest_path, diff --git a/src/iceberg/manifest_reader.h b/src/iceberg/manifest_reader.h index fa13c56fa..989b2d69f 100644 --- a/src/iceberg/manifest_reader.h +++ b/src/iceberg/manifest_reader.h @@ -25,7 +25,6 @@ #include #include -#include "iceberg/file_reader.h" #include "iceberg/iceberg_export.h" #include "iceberg/manifest_list.h" #include "iceberg/type_fwd.h" diff --git a/src/iceberg/manifest_reader_internal.cc b/src/iceberg/manifest_reader_internal.cc index 1267e9b32..a62731cfe 100644 --- a/src/iceberg/manifest_reader_internal.cc +++ b/src/iceberg/manifest_reader_internal.cc @@ -19,8 +19,6 @@ #include "manifest_reader_internal.h" -#include - #include #include "iceberg/arrow_c_data_guard_internal.h" @@ -30,7 +28,6 @@ #include "iceberg/schema.h" #include "iceberg/type.h" #include "iceberg/util/macros.h" -#include "iceberg/util/visit_type.h" namespace iceberg { @@ -547,7 +544,7 @@ Result> ManifestReaderImpl::Entries() const { // Apply inheritance to all entries for (auto& entry : manifest_entries) { - ICEBERG_ASSIGN_OR_RAISE(entry, inheritable_metadata_->Apply(std::move(entry))); + ICEBERG_ASSIGN_OR_RAISE(entry, inheritable_metadata_->Apply(entry)); } return manifest_entries; diff --git a/src/iceberg/manifest_reader_internal.h b/src/iceberg/manifest_reader_internal.h index 78cd18496..3144d078c 100644 --- a/src/iceberg/manifest_reader_internal.h +++ b/src/iceberg/manifest_reader_internal.h @@ -38,8 +38,6 @@ class ManifestReaderImpl : public ManifestReader { reader_(std::move(reader)), inheritable_metadata_(std::move(inheritable_metadata)) {} - ManifestReaderImpl() = default; - Result> Entries() const override; private: @@ -54,7 +52,6 @@ class ManifestListReaderImpl : public ManifestListReader { explicit ManifestListReaderImpl(std::unique_ptr reader, std::shared_ptr schema) : schema_(std::move(schema)), reader_(std::move(reader)) {} - ManifestListReaderImpl() = default; Result> Files() const override; From 0d38fd037bf4591f6bdd5ce96fccbd1f2ecd177f Mon Sep 17 00:00:00 2001 From: HeartLinked Date: Mon, 18 Aug 2025 10:57:32 +0800 Subject: [PATCH 07/16] fix review --- src/iceberg/inheritable_metadata.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/iceberg/inheritable_metadata.h b/src/iceberg/inheritable_metadata.h index 6f4f19493..50a69ec6b 100644 --- a/src/iceberg/inheritable_metadata.h +++ b/src/iceberg/inheritable_metadata.h @@ -44,7 +44,7 @@ class ICEBERG_EXPORT InheritableMetadata { virtual ~InheritableMetadata() = default; /// \brief Apply inheritable metadata to a manifest entry. - /// \param entry The manifest entry to modify in-place. + /// \param entry The manifest entry to modify. /// \return The modified manifest entry with inherited metadata applied. virtual Result Apply(ManifestEntry& entry) = 0; }; @@ -62,7 +62,7 @@ class ICEBERG_EXPORT BaseInheritableMetadata : public InheritableMetadata { std::string manifest_location); /// \brief Apply inheritance rules to a manifest entry. - /// \param entry The manifest entry to modify in-place. + /// \param entry The manifest entry to modify. /// \return The modified manifest entry. Result Apply(ManifestEntry& entry) override; @@ -90,7 +90,7 @@ class ICEBERG_EXPORT CopyInheritableMetadata : public InheritableMetadata { explicit CopyInheritableMetadata(int64_t snapshot_id); /// \brief Apply copy inheritance rules. - /// \param entry The manifest entry to modify in-place. + /// \param entry The manifest entry to modify. /// \return The modified manifest entry. Result Apply(ManifestEntry& entry) override; From bc8d1064c3dffbaab05c339edffdc068433fabba Mon Sep 17 00:00:00 2001 From: HeartLinked Date: Tue, 19 Aug 2025 13:55:49 +0800 Subject: [PATCH 08/16] add test --- test/manifest_reader_test.cc | 79 ++++++++++++++++++++++++++++++++++-- 1 file changed, 76 insertions(+), 3 deletions(-) diff --git a/test/manifest_reader_test.cc b/test/manifest_reader_test.cc index 3288a85f6..0f9852e3a 100644 --- a/test/manifest_reader_test.cc +++ b/test/manifest_reader_test.cc @@ -126,7 +126,7 @@ class ManifestReaderV2Test : public TempFileTestBase { file_io_ = std::make_shared(local_fs_); } - std::vector PrepareV2NonPartitionedManifestEntries() { + std::vector prepareV2NonPartitionedManifestEntries() { std::vector manifest_entries; std::string test_dir_prefix = "/tmp/db/db/v2_manifest_non_partitioned/data/"; @@ -175,6 +175,56 @@ class ManifestReaderV2Test : public TempFileTestBase { return manifest_entries; } + std::vector prepareV2ManifestEntryMetadataInheritance() { + std::vector manifest_entries; + std::string test_dir_prefix = "/tmp/db/db/v2_manifest_non_partitioned/data/"; + + std::vector paths = { + "00000-0-b0f98903-6d21-45fd-9e0b-afbd4963e365-0-00001.parquet"}; + + std::vector file_sizes = {1344}; + std::vector record_counts = {4}; + + std::vector>> lower_bounds = { + {{1, {0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {2, {'r', 'e', 'c', 'o', 'r', 'd', '_', 'f', 'o', 'u', 'r'}}, + {3, {'d', 'a', 't', 'a', '_', 'c', 'o', 'n', 't', 'e', 'n', 't', '_', '1'}}, + {4, {0xcd, 0xcc, 0xcc, 0xcc, 0xcc, 0xdc, 0x5e, 0x40}}}}; + + std::vector>> upper_bounds = { + {{1, {0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {2, {'r', 'e', 'c', 'o', 'r', 'd', '_', 't', 'w', 'o'}}, + {3, {'d', 'a', 't', 'a', '_', 'c', 'o', 'n', 't', 'e', 'n', 't', '_', '4'}}, + {4, {0x14, 0xae, 0x47, 0xe1, 0x7a, 0x8c, 0x7c, 0x40}}}}; + + manifest_entries.emplace_back( + ManifestEntry{.status = ManifestStatus::kAdded, + .snapshot_id = 679879563479918846LL, + .sequence_number = 15, + .file_sequence_number = 15, + .data_file = std::make_shared( + DataFile{.file_path = test_dir_prefix + paths[0], + .file_format = FileFormatType::kParquet, + .record_count = record_counts[0], + .file_size_in_bytes = file_sizes[0], + .column_sizes = {{1, 56}, {2, 73}, {3, 66}, {4, 67}}, + .value_counts = {{1, 4}, {2, 4}, {3, 4}, {4, 4}}, + .null_value_counts = {{1, 0}, {2, 0}, {3, 0}, {4, 0}}, + .nan_value_counts = {{4, 0}}, + .lower_bounds = lower_bounds[0], + .upper_bounds = upper_bounds[0], + .key_metadata = {}, + .split_offsets = {4}, + .equality_ids = {}, + .sort_order_id = 0, + .partition_spec_id = 12, // inherit from manifest + .first_row_id = std::nullopt, + .referenced_data_file = std::nullopt, + .content_offset = std::nullopt, + .content_size_in_bytes = std::nullopt})}); + return manifest_entries; + } + std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_; std::shared_ptr file_io_; }; @@ -182,7 +232,30 @@ class ManifestReaderV2Test : public TempFileTestBase { TEST_F(ManifestReaderV2Test, V2NonPartitionedBasicTest) { std::string path = GetResourcePath("2ddf1bc9-830b-4015-aced-c060df36f150-m0.avro"); - auto manifest_reader_result = ManifestReader::MakeReader(path, file_io_, nullptr); + auto manifest_reader_result = ManifestReader::Make(path, file_io_, nullptr); + ASSERT_EQ(manifest_reader_result.has_value(), true) + << manifest_reader_result.error().message; + + auto manifest_reader = std::move(manifest_reader_result.value()); + auto read_result = manifest_reader->Entries(); + ASSERT_EQ(read_result.has_value(), true) << read_result.error().message; + ASSERT_EQ(read_result.value().size(), 1); + + auto expected_entries = prepareV2NonPartitionedManifestEntries(); + ASSERT_EQ(read_result.value(), expected_entries); +} + +TEST_F(ManifestReaderV2Test, V2ManifestEntryMetadataInheritanceTest) { + std::string path = GetResourcePath("2ddf1bc9-830b-4015-aced-c060df36f150-m0.avro"); + ManifestFile manifest_file{ + .manifest_path = path, + .manifest_length = 100, + .partition_spec_id = 12, + .content = ManifestFile::Content::kData, + .sequence_number = 15, + .added_snapshot_id = 679879563479918846LL, + }; + auto manifest_reader_result = ManifestReader::Make(manifest_file, file_io_, nullptr); ASSERT_EQ(manifest_reader_result.has_value(), true) << manifest_reader_result.error().message; @@ -191,7 +264,7 @@ TEST_F(ManifestReaderV2Test, V2NonPartitionedBasicTest) { ASSERT_EQ(read_result.has_value(), true) << read_result.error().message; ASSERT_EQ(read_result.value().size(), 1); - auto expected_entries = PrepareV2NonPartitionedManifestEntries(); + auto expected_entries = prepareV2ManifestEntryMetadataInheritance(); ASSERT_EQ(read_result.value(), expected_entries); } From 845cb61b7c161a4fd7c668e33e2a015ed5425e1a Mon Sep 17 00:00:00 2001 From: HeartLinked Date: Tue, 19 Aug 2025 13:56:40 +0800 Subject: [PATCH 09/16] add test --- test/manifest_reader_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/manifest_reader_test.cc b/test/manifest_reader_test.cc index 0f9852e3a..ddbaf558c 100644 --- a/test/manifest_reader_test.cc +++ b/test/manifest_reader_test.cc @@ -126,7 +126,7 @@ class ManifestReaderV2Test : public TempFileTestBase { file_io_ = std::make_shared(local_fs_); } - std::vector prepareV2NonPartitionedManifestEntries() { + std::vector PrepareV2NonPartitionedManifestEntries() { std::vector manifest_entries; std::string test_dir_prefix = "/tmp/db/db/v2_manifest_non_partitioned/data/"; @@ -241,7 +241,7 @@ TEST_F(ManifestReaderV2Test, V2NonPartitionedBasicTest) { ASSERT_EQ(read_result.has_value(), true) << read_result.error().message; ASSERT_EQ(read_result.value().size(), 1); - auto expected_entries = prepareV2NonPartitionedManifestEntries(); + auto expected_entries = PrepareV2NonPartitionedManifestEntries(); ASSERT_EQ(read_result.value(), expected_entries); } From 59360cc89f438e0bba400d1b59143c68aed4d2ff Mon Sep 17 00:00:00 2001 From: HeartLinked Date: Wed, 20 Aug 2025 10:41:30 +0800 Subject: [PATCH 10/16] fix review1 --- src/iceberg/inheritable_metadata.cc | 26 ++++++++++++++----------- src/iceberg/inheritable_metadata.h | 8 ++++---- src/iceberg/manifest_reader.cc | 4 ++-- src/iceberg/manifest_reader.h | 2 +- src/iceberg/manifest_reader_internal.cc | 3 ++- test/manifest_reader_test.cc | 1 + 6 files changed, 25 insertions(+), 19 deletions(-) diff --git a/src/iceberg/inheritable_metadata.cc b/src/iceberg/inheritable_metadata.cc index 8f0410b40..0e78c4e45 100644 --- a/src/iceberg/inheritable_metadata.cc +++ b/src/iceberg/inheritable_metadata.cc @@ -34,13 +34,14 @@ BaseInheritableMetadata::BaseInheritableMetadata(int32_t spec_id, int64_t snapsh sequence_number_(sequence_number), manifest_location_(std::move(manifest_location)) {} -Result BaseInheritableMetadata::Apply(ManifestEntry& entry) { +Status BaseInheritableMetadata::Apply(ManifestEntry& entry) { if (!entry.snapshot_id.has_value()) { entry.snapshot_id = snapshot_id_; } // In v1 tables, the data sequence number is not persisted and can be safely defaulted - // to 0 In v2 tables, the data sequence number should be inherited iff the entry status + // to 0. + // In v2 tables, the data sequence number should be inherited iff the entry status // is ADDED if (!entry.sequence_number.has_value() && (sequence_number_ == 0 || entry.status == ManifestStatus::kAdded)) { @@ -48,7 +49,8 @@ Result BaseInheritableMetadata::Apply(ManifestEntry& entry) { } // In v1 tables, the file sequence number is not persisted and can be safely defaulted - // to 0 In v2 tables, the file sequence number should be inherited iff the entry status + // to 0. + // In v2 tables, the file sequence number should be inherited iff the entry status // is ADDED if (!entry.file_sequence_number.has_value() && (sequence_number_ == 0 || entry.status == ManifestStatus::kAdded)) { @@ -59,21 +61,23 @@ Result BaseInheritableMetadata::Apply(ManifestEntry& entry) { entry.data_file->partition_spec_id = spec_id_; } - return entry; + return {}; } -Result EmptyInheritableMetadata::Apply(ManifestEntry& entry) { - return entry; +Status EmptyInheritableMetadata::Apply(ManifestEntry& entry) { + if (!entry.snapshot_id.has_value()) { + return InvalidArgument( + "Entries must have explicit snapshot ids if inherited metadata is empty"); + } + return {}; } CopyInheritableMetadata::CopyInheritableMetadata(int64_t snapshot_id) : snapshot_id_(snapshot_id) {} -Result CopyInheritableMetadata::Apply(ManifestEntry& entry) { - if (!entry.snapshot_id.has_value()) { - entry.snapshot_id = snapshot_id_; - } - return entry; +Status CopyInheritableMetadata::Apply(ManifestEntry& entry) { + entry.snapshot_id = snapshot_id_; + return {}; } Result> InheritableMetadataFactory::Empty() { diff --git a/src/iceberg/inheritable_metadata.h b/src/iceberg/inheritable_metadata.h index 50a69ec6b..f5b6b5169 100644 --- a/src/iceberg/inheritable_metadata.h +++ b/src/iceberg/inheritable_metadata.h @@ -46,7 +46,7 @@ class ICEBERG_EXPORT InheritableMetadata { /// \brief Apply inheritable metadata to a manifest entry. /// \param entry The manifest entry to modify. /// \return The modified manifest entry with inherited metadata applied. - virtual Result Apply(ManifestEntry& entry) = 0; + virtual Status Apply(ManifestEntry& entry) = 0; }; /// \brief Base implementation of InheritableMetadata that handles standard inheritance @@ -64,7 +64,7 @@ class ICEBERG_EXPORT BaseInheritableMetadata : public InheritableMetadata { /// \brief Apply inheritance rules to a manifest entry. /// \param entry The manifest entry to modify. /// \return The modified manifest entry. - Result Apply(ManifestEntry& entry) override; + Status Apply(ManifestEntry& entry) override; private: int32_t spec_id_; @@ -79,7 +79,7 @@ class ICEBERG_EXPORT EmptyInheritableMetadata : public InheritableMetadata { /// \brief Apply no inheritance - returns the entry unchanged. /// \param entry The manifest entry (unchanged). /// \return The manifest entry. - Result Apply(ManifestEntry& entry) override; + Status Apply(ManifestEntry& entry) override; }; /// \brief Metadata inheritance for copying manifests before commit. @@ -92,7 +92,7 @@ class ICEBERG_EXPORT CopyInheritableMetadata : public InheritableMetadata { /// \brief Apply copy inheritance rules. /// \param entry The manifest entry to modify. /// \return The modified manifest entry. - Result Apply(ManifestEntry& entry) override; + Status Apply(ManifestEntry& entry) override; private: int64_t snapshot_id_; diff --git a/src/iceberg/manifest_reader.cc b/src/iceberg/manifest_reader.cc index fc09b794e..eed8878e3 100644 --- a/src/iceberg/manifest_reader.cc +++ b/src/iceberg/manifest_reader.cc @@ -33,8 +33,8 @@ Result> ManifestReader::Make( const ManifestFile& manifest, std::shared_ptr file_io, std::shared_ptr partition_schema) { auto manifest_entry_schema = ManifestEntry::TypeFromPartitionType(partition_schema); - auto schema_unique = FromStructType(std::move(*manifest_entry_schema), std::nullopt); - auto schema = std::shared_ptr(std::move(schema_unique)); + std::shared_ptr schema = + FromStructType(std::move(*manifest_entry_schema), std::nullopt); ICEBERG_ASSIGN_OR_RAISE( auto reader, diff --git a/src/iceberg/manifest_reader.h b/src/iceberg/manifest_reader.h index 989b2d69f..7dc0a2077 100644 --- a/src/iceberg/manifest_reader.h +++ b/src/iceberg/manifest_reader.h @@ -26,7 +26,7 @@ #include #include "iceberg/iceberg_export.h" -#include "iceberg/manifest_list.h" +#include "iceberg/result.h" #include "iceberg/type_fwd.h" namespace iceberg { diff --git a/src/iceberg/manifest_reader_internal.cc b/src/iceberg/manifest_reader_internal.cc index a62731cfe..60a9e7926 100644 --- a/src/iceberg/manifest_reader_internal.cc +++ b/src/iceberg/manifest_reader_internal.cc @@ -544,7 +544,8 @@ Result> ManifestReaderImpl::Entries() const { // Apply inheritance to all entries for (auto& entry : manifest_entries) { - ICEBERG_ASSIGN_OR_RAISE(entry, inheritable_metadata_->Apply(entry)); + auto status = inheritable_metadata_->Apply(entry); + ICEBERG_RETURN_UNEXPECTED(status); } return manifest_entries; diff --git a/test/manifest_reader_test.cc b/test/manifest_reader_test.cc index ddbaf558c..13f7b325c 100644 --- a/test/manifest_reader_test.cc +++ b/test/manifest_reader_test.cc @@ -27,6 +27,7 @@ #include "iceberg/arrow/arrow_fs_file_io_internal.h" #include "iceberg/avro/avro_register.h" #include "iceberg/manifest_entry.h" +#include "iceberg/manifest_list.h" #include "iceberg/schema.h" #include "temp_file_test_base.h" #include "test_common.h" From a8657047554e79634bef0aad3d415030c58882c8 Mon Sep 17 00:00:00 2001 From: HeartLinked Date: Wed, 20 Aug 2025 10:55:32 +0800 Subject: [PATCH 11/16] modify test --- test/manifest_reader_test.cc | 207 ++++++++++++++--------------------- 1 file changed, 84 insertions(+), 123 deletions(-) diff --git a/test/manifest_reader_test.cc b/test/manifest_reader_test.cc index 13f7b325c..2fd1b56ec 100644 --- a/test/manifest_reader_test.cc +++ b/test/manifest_reader_test.cc @@ -34,7 +34,7 @@ namespace iceberg { -class ManifestReaderV1Test : public TempFileTestBase { +class ManifestReaderTestBase : public TempFileTestBase { protected: static void SetUpTestSuite() { avro::RegisterAll(); } @@ -44,7 +44,44 @@ class ManifestReaderV1Test : public TempFileTestBase { file_io_ = std::make_shared(local_fs_); } - std::vector PrepareV1ManifestEntries() { + void TestManifestReading(const std::string& resource_name, + const std::vector& expected_entries, + std::shared_ptr partition_schema = nullptr) { + std::string path = GetResourcePath(resource_name); + auto manifest_reader_result = ManifestReader::Make(path, file_io_, partition_schema); + ASSERT_EQ(manifest_reader_result.has_value(), true) + << manifest_reader_result.error().message; + + auto manifest_reader = std::move(manifest_reader_result.value()); + auto read_result = manifest_reader->Entries(); + ASSERT_EQ(read_result.has_value(), true) << read_result.error().message; + ASSERT_EQ(read_result.value().size(), expected_entries.size()); + ASSERT_EQ(read_result.value(), expected_entries); + } + + void TestManifestReadingWithManifestFile( + const ManifestFile& manifest_file, + const std::vector& expected_entries, + std::shared_ptr partition_schema = nullptr) { + auto manifest_reader_result = + ManifestReader::Make(manifest_file, file_io_, partition_schema); + ASSERT_EQ(manifest_reader_result.has_value(), true) + << manifest_reader_result.error().message; + + auto manifest_reader = std::move(manifest_reader_result.value()); + auto read_result = manifest_reader->Entries(); + ASSERT_EQ(read_result.has_value(), true) << read_result.error().message; + ASSERT_EQ(read_result.value().size(), expected_entries.size()); + ASSERT_EQ(read_result.value(), expected_entries); + } + + std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_; + std::shared_ptr file_io_; +}; + +class ManifestReaderV1Test : public ManifestReaderTestBase { + protected: + std::vector PreparePartitionedTestData() { std::vector manifest_entries; std::string test_dir_prefix = "/tmp/db/db/iceberg_test/data/"; std::vector paths = { @@ -96,38 +133,22 @@ class ManifestReaderV1Test : public TempFileTestBase { } return manifest_entries; } - - std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_; - std::shared_ptr file_io_; }; -TEST_F(ManifestReaderV1Test, V1PartitionedBasicTest) { +TEST_F(ManifestReaderV1Test, PartitionedTest) { iceberg::SchemaField partition_field(1000, "order_ts_hour", iceberg::int32(), true); auto partition_schema = std::make_shared(std::vector({partition_field})); - std::string path = GetResourcePath("56357cd7-391f-4df8-aa24-e7e667da8870-m4.avro"); - auto manifest_reader_result = ManifestReader::Make(path, file_io_, partition_schema); - ASSERT_EQ(manifest_reader_result.has_value(), true) - << manifest_reader_result.error().message; - auto manifest_reader = std::move(manifest_reader_result.value()); - auto read_result = manifest_reader->Entries(); - ASSERT_EQ(read_result.has_value(), true) << read_result.error().message; - - auto expected_entries = PrepareV1ManifestEntries(); - ASSERT_EQ(read_result.value(), expected_entries); + auto expected_entries = PreparePartitionedTestData(); + TestManifestReading("56357cd7-391f-4df8-aa24-e7e667da8870-m4.avro", expected_entries, + partition_schema); } -class ManifestReaderV2Test : public TempFileTestBase { +class ManifestReaderV2Test : public ManifestReaderTestBase { protected: - static void SetUpTestSuite() { avro::RegisterAll(); } - - void SetUp() override { - TempFileTestBase::SetUp(); - local_fs_ = std::make_shared<::arrow::fs::LocalFileSystem>(); - file_io_ = std::make_shared(local_fs_); - } - - std::vector PrepareV2NonPartitionedManifestEntries() { + std::vector CreateV2TestData( + std::optional sequence_number = std::nullopt, + std::optional partition_spec_id = std::nullopt) { std::vector manifest_entries; std::string test_dir_prefix = "/tmp/db/db/v2_manifest_non_partitioned/data/"; @@ -149,104 +170,53 @@ class ManifestReaderV2Test : public TempFileTestBase { {3, {'d', 'a', 't', 'a', '_', 'c', 'o', 'n', 't', 'e', 'n', 't', '_', '4'}}, {4, {0x14, 0xae, 0x47, 0xe1, 0x7a, 0x8c, 0x7c, 0x40}}}}; + DataFile data_file{.file_path = test_dir_prefix + paths[0], + .file_format = FileFormatType::kParquet, + .record_count = record_counts[0], + .file_size_in_bytes = file_sizes[0], + .column_sizes = {{1, 56}, {2, 73}, {3, 66}, {4, 67}}, + .value_counts = {{1, 4}, {2, 4}, {3, 4}, {4, 4}}, + .null_value_counts = {{1, 0}, {2, 0}, {3, 0}, {4, 0}}, + .nan_value_counts = {{4, 0}}, + .lower_bounds = lower_bounds[0], + .upper_bounds = upper_bounds[0], + .key_metadata = {}, + .split_offsets = {4}, + .equality_ids = {}, + .sort_order_id = 0, + .first_row_id = std::nullopt, + .referenced_data_file = std::nullopt, + .content_offset = std::nullopt, + .content_size_in_bytes = std::nullopt}; + + if (partition_spec_id.has_value()) { + data_file.partition_spec_id = partition_spec_id.value(); + } + manifest_entries.emplace_back( ManifestEntry{.status = ManifestStatus::kAdded, .snapshot_id = 679879563479918846LL, - .sequence_number = std::nullopt, - .file_sequence_number = std::nullopt, - .data_file = std::make_shared( - DataFile{.file_path = test_dir_prefix + paths[0], - .file_format = FileFormatType::kParquet, - .record_count = record_counts[0], - .file_size_in_bytes = file_sizes[0], - .column_sizes = {{1, 56}, {2, 73}, {3, 66}, {4, 67}}, - .value_counts = {{1, 4}, {2, 4}, {3, 4}, {4, 4}}, - .null_value_counts = {{1, 0}, {2, 0}, {3, 0}, {4, 0}}, - .nan_value_counts = {{4, 0}}, - .lower_bounds = lower_bounds[0], - .upper_bounds = upper_bounds[0], - .key_metadata = {}, - .split_offsets = {4}, - .equality_ids = {}, - .sort_order_id = 0, - .first_row_id = std::nullopt, - .referenced_data_file = std::nullopt, - .content_offset = std::nullopt, - .content_size_in_bytes = std::nullopt})}); + .sequence_number = sequence_number, + .file_sequence_number = sequence_number, + .data_file = std::make_shared(data_file)}); return manifest_entries; } - std::vector prepareV2ManifestEntryMetadataInheritance() { - std::vector manifest_entries; - std::string test_dir_prefix = "/tmp/db/db/v2_manifest_non_partitioned/data/"; - - std::vector paths = { - "00000-0-b0f98903-6d21-45fd-9e0b-afbd4963e365-0-00001.parquet"}; - - std::vector file_sizes = {1344}; - std::vector record_counts = {4}; - - std::vector>> lower_bounds = { - {{1, {0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, - {2, {'r', 'e', 'c', 'o', 'r', 'd', '_', 'f', 'o', 'u', 'r'}}, - {3, {'d', 'a', 't', 'a', '_', 'c', 'o', 'n', 't', 'e', 'n', 't', '_', '1'}}, - {4, {0xcd, 0xcc, 0xcc, 0xcc, 0xcc, 0xdc, 0x5e, 0x40}}}}; - - std::vector>> upper_bounds = { - {{1, {0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, - {2, {'r', 'e', 'c', 'o', 'r', 'd', '_', 't', 'w', 'o'}}, - {3, {'d', 'a', 't', 'a', '_', 'c', 'o', 'n', 't', 'e', 'n', 't', '_', '4'}}, - {4, {0x14, 0xae, 0x47, 0xe1, 0x7a, 0x8c, 0x7c, 0x40}}}}; - - manifest_entries.emplace_back( - ManifestEntry{.status = ManifestStatus::kAdded, - .snapshot_id = 679879563479918846LL, - .sequence_number = 15, - .file_sequence_number = 15, - .data_file = std::make_shared( - DataFile{.file_path = test_dir_prefix + paths[0], - .file_format = FileFormatType::kParquet, - .record_count = record_counts[0], - .file_size_in_bytes = file_sizes[0], - .column_sizes = {{1, 56}, {2, 73}, {3, 66}, {4, 67}}, - .value_counts = {{1, 4}, {2, 4}, {3, 4}, {4, 4}}, - .null_value_counts = {{1, 0}, {2, 0}, {3, 0}, {4, 0}}, - .nan_value_counts = {{4, 0}}, - .lower_bounds = lower_bounds[0], - .upper_bounds = upper_bounds[0], - .key_metadata = {}, - .split_offsets = {4}, - .equality_ids = {}, - .sort_order_id = 0, - .partition_spec_id = 12, // inherit from manifest - .first_row_id = std::nullopt, - .referenced_data_file = std::nullopt, - .content_offset = std::nullopt, - .content_size_in_bytes = std::nullopt})}); - return manifest_entries; + std::vector PrepareNonPartitionedTestData() { + return CreateV2TestData(); } - std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_; - std::shared_ptr file_io_; + std::vector PrepareMetadataInheritanceTestData() { + return CreateV2TestData(15, 12); + } }; -TEST_F(ManifestReaderV2Test, V2NonPartitionedBasicTest) { - std::string path = GetResourcePath("2ddf1bc9-830b-4015-aced-c060df36f150-m0.avro"); - - auto manifest_reader_result = ManifestReader::Make(path, file_io_, nullptr); - ASSERT_EQ(manifest_reader_result.has_value(), true) - << manifest_reader_result.error().message; - - auto manifest_reader = std::move(manifest_reader_result.value()); - auto read_result = manifest_reader->Entries(); - ASSERT_EQ(read_result.has_value(), true) << read_result.error().message; - ASSERT_EQ(read_result.value().size(), 1); - - auto expected_entries = PrepareV2NonPartitionedManifestEntries(); - ASSERT_EQ(read_result.value(), expected_entries); +TEST_F(ManifestReaderV2Test, NonPartitionedTest) { + auto expected_entries = PrepareNonPartitionedTestData(); + TestManifestReading("2ddf1bc9-830b-4015-aced-c060df36f150-m0.avro", expected_entries); } -TEST_F(ManifestReaderV2Test, V2ManifestEntryMetadataInheritanceTest) { +TEST_F(ManifestReaderV2Test, MetadataInheritanceTest) { std::string path = GetResourcePath("2ddf1bc9-830b-4015-aced-c060df36f150-m0.avro"); ManifestFile manifest_file{ .manifest_path = path, @@ -256,17 +226,8 @@ TEST_F(ManifestReaderV2Test, V2ManifestEntryMetadataInheritanceTest) { .sequence_number = 15, .added_snapshot_id = 679879563479918846LL, }; - auto manifest_reader_result = ManifestReader::Make(manifest_file, file_io_, nullptr); - ASSERT_EQ(manifest_reader_result.has_value(), true) - << manifest_reader_result.error().message; - - auto manifest_reader = std::move(manifest_reader_result.value()); - auto read_result = manifest_reader->Entries(); - ASSERT_EQ(read_result.has_value(), true) << read_result.error().message; - ASSERT_EQ(read_result.value().size(), 1); - - auto expected_entries = prepareV2ManifestEntryMetadataInheritance(); - ASSERT_EQ(read_result.value(), expected_entries); + auto expected_entries = PrepareMetadataInheritanceTestData(); + TestManifestReadingWithManifestFile(manifest_file, expected_entries); } } // namespace iceberg From 48f34b913676801d38b324648de302de15f62eeb Mon Sep 17 00:00:00 2001 From: HeartLinked Date: Wed, 20 Aug 2025 11:37:53 +0800 Subject: [PATCH 12/16] fix review --- src/iceberg/inheritable_metadata.cc | 7 +++++-- src/iceberg/inheritable_metadata.h | 19 ++++--------------- src/iceberg/manifest_reader.cc | 11 ++++++----- src/iceberg/manifest_reader_internal.cc | 3 +-- src/iceberg/manifest_writer.h | 2 +- test/manifest_reader_test.cc | 10 +++++----- 6 files changed, 22 insertions(+), 30 deletions(-) diff --git a/src/iceberg/inheritable_metadata.cc b/src/iceberg/inheritable_metadata.cc index 0e78c4e45..dac7e921d 100644 --- a/src/iceberg/inheritable_metadata.cc +++ b/src/iceberg/inheritable_metadata.cc @@ -23,6 +23,9 @@ #include #include "iceberg/manifest_entry.h" +#include "iceberg/manifest_list.h" +#include "iceberg/snapshot.h" +#include "iceberg/type_fwd.h" namespace iceberg { @@ -66,7 +69,7 @@ Status BaseInheritableMetadata::Apply(ManifestEntry& entry) { Status EmptyInheritableMetadata::Apply(ManifestEntry& entry) { if (!entry.snapshot_id.has_value()) { - return InvalidArgument( + return InvalidManifest( "Entries must have explicit snapshot ids if inherited metadata is empty"); } return {}; @@ -88,7 +91,7 @@ Result> InheritableMetadataFactory::FromMan const ManifestFile& manifest) { // Validate that the manifest has a snapshot ID assigned if (manifest.added_snapshot_id == Snapshot::kInvalidSnapshotId) { - return InvalidArgument("Manifest file {} has no snapshot ID", manifest.manifest_path); + return InvalidManifest("Manifest file {} has no snapshot ID", manifest.manifest_path); } return std::make_unique( diff --git a/src/iceberg/inheritable_metadata.h b/src/iceberg/inheritable_metadata.h index f5b6b5169..f06693a42 100644 --- a/src/iceberg/inheritable_metadata.h +++ b/src/iceberg/inheritable_metadata.h @@ -26,17 +26,15 @@ #include #include -#include - #include "iceberg/iceberg_export.h" -#include "iceberg/manifest_entry.h" -#include "iceberg/manifest_list.h" +#include "iceberg/result.h" +#include "iceberg/type_fwd.h" namespace iceberg { /// \brief Interface for applying inheritable metadata to manifest entries. /// -/// When manifest entries have null values for certain fields (snapshot_id, +/// When manifest entries have null values for certain fields (snapshot id, /// data sequence number, file sequence number), these values should be inherited /// from the manifest file. This interface provides a way to apply such inheritance rules. class ICEBERG_EXPORT InheritableMetadata { @@ -45,7 +43,7 @@ class ICEBERG_EXPORT InheritableMetadata { /// \brief Apply inheritable metadata to a manifest entry. /// \param entry The manifest entry to modify. - /// \return The modified manifest entry with inherited metadata applied. + /// \return Status indicating success or failure. virtual Status Apply(ManifestEntry& entry) = 0; }; @@ -61,9 +59,6 @@ class ICEBERG_EXPORT BaseInheritableMetadata : public InheritableMetadata { BaseInheritableMetadata(int32_t spec_id, int64_t snapshot_id, int64_t sequence_number, std::string manifest_location); - /// \brief Apply inheritance rules to a manifest entry. - /// \param entry The manifest entry to modify. - /// \return The modified manifest entry. Status Apply(ManifestEntry& entry) override; private: @@ -76,9 +71,6 @@ class ICEBERG_EXPORT BaseInheritableMetadata : public InheritableMetadata { /// \brief Empty implementation that applies no inheritance. class ICEBERG_EXPORT EmptyInheritableMetadata : public InheritableMetadata { public: - /// \brief Apply no inheritance - returns the entry unchanged. - /// \param entry The manifest entry (unchanged). - /// \return The manifest entry. Status Apply(ManifestEntry& entry) override; }; @@ -89,9 +81,6 @@ class ICEBERG_EXPORT CopyInheritableMetadata : public InheritableMetadata { /// \param snapshot_id The snapshot ID to use for copying. explicit CopyInheritableMetadata(int64_t snapshot_id); - /// \brief Apply copy inheritance rules. - /// \param entry The manifest entry to modify. - /// \return The modified manifest entry. Status Apply(ManifestEntry& entry) override; private: diff --git a/src/iceberg/manifest_reader.cc b/src/iceberg/manifest_reader.cc index eed8878e3..3f62b19f8 100644 --- a/src/iceberg/manifest_reader.cc +++ b/src/iceberg/manifest_reader.cc @@ -36,11 +36,12 @@ Result> ManifestReader::Make( std::shared_ptr schema = FromStructType(std::move(*manifest_entry_schema), std::nullopt); - ICEBERG_ASSIGN_OR_RAISE( - auto reader, - ReaderFactoryRegistry::Open(FileFormatType::kAvro, {.path = manifest.manifest_path, - .io = std::move(file_io), - .projection = schema})); + ICEBERG_ASSIGN_OR_RAISE(auto reader, + ReaderFactoryRegistry::Open(FileFormatType::kAvro, + {.path = manifest.manifest_path, + .length = manifest.manifest_length, + .io = std::move(file_io), + .projection = schema})); // Create inheritable metadata for this manifest ICEBERG_ASSIGN_OR_RAISE(auto inheritable_metadata, InheritableMetadataFactory::FromManifest(manifest)); diff --git a/src/iceberg/manifest_reader_internal.cc b/src/iceberg/manifest_reader_internal.cc index 60a9e7926..0ff743edb 100644 --- a/src/iceberg/manifest_reader_internal.cc +++ b/src/iceberg/manifest_reader_internal.cc @@ -544,8 +544,7 @@ Result> ManifestReaderImpl::Entries() const { // Apply inheritance to all entries for (auto& entry : manifest_entries) { - auto status = inheritable_metadata_->Apply(entry); - ICEBERG_RETURN_UNEXPECTED(status); + ICEBERG_RETURN_UNEXPECTED(inheritable_metadata_->Apply(entry)); } return manifest_entries; diff --git a/src/iceberg/manifest_writer.h b/src/iceberg/manifest_writer.h index 3c5091b3d..a946136c5 100644 --- a/src/iceberg/manifest_writer.h +++ b/src/iceberg/manifest_writer.h @@ -25,8 +25,8 @@ #include #include -#include "iceberg/file_writer.h" #include "iceberg/iceberg_export.h" +#include "iceberg/manifest_entry.h" #include "iceberg/type_fwd.h" namespace iceberg { diff --git a/test/manifest_reader_test.cc b/test/manifest_reader_test.cc index 2fd1b56ec..db703c172 100644 --- a/test/manifest_reader_test.cc +++ b/test/manifest_reader_test.cc @@ -49,12 +49,12 @@ class ManifestReaderTestBase : public TempFileTestBase { std::shared_ptr partition_schema = nullptr) { std::string path = GetResourcePath(resource_name); auto manifest_reader_result = ManifestReader::Make(path, file_io_, partition_schema); - ASSERT_EQ(manifest_reader_result.has_value(), true) + ASSERT_TRUE(manifest_reader_result.has_value()) << manifest_reader_result.error().message; auto manifest_reader = std::move(manifest_reader_result.value()); auto read_result = manifest_reader->Entries(); - ASSERT_EQ(read_result.has_value(), true) << read_result.error().message; + ASSERT_TRUE(read_result.has_value()) << read_result.error().message; ASSERT_EQ(read_result.value().size(), expected_entries.size()); ASSERT_EQ(read_result.value(), expected_entries); } @@ -65,12 +65,12 @@ class ManifestReaderTestBase : public TempFileTestBase { std::shared_ptr partition_schema = nullptr) { auto manifest_reader_result = ManifestReader::Make(manifest_file, file_io_, partition_schema); - ASSERT_EQ(manifest_reader_result.has_value(), true) + ASSERT_TRUE(manifest_reader_result.has_value()) << manifest_reader_result.error().message; auto manifest_reader = std::move(manifest_reader_result.value()); auto read_result = manifest_reader->Entries(); - ASSERT_EQ(read_result.has_value(), true) << read_result.error().message; + ASSERT_TRUE(read_result.has_value()) << read_result.error().message; ASSERT_EQ(read_result.value().size(), expected_entries.size()); ASSERT_EQ(read_result.value(), expected_entries); } @@ -207,7 +207,7 @@ class ManifestReaderV2Test : public ManifestReaderTestBase { } std::vector PrepareMetadataInheritanceTestData() { - return CreateV2TestData(15, 12); + return CreateV2TestData(/*sequence_number=*/15, /*partition_spec_id*/ 12); } }; From fd8d2250516d392761bc5b67d65427589f3cd41b Mon Sep 17 00:00:00 2001 From: HeartLinked Date: Wed, 20 Aug 2025 14:24:42 +0800 Subject: [PATCH 13/16] v1 --- src/iceberg/inheritable_metadata.cc | 1 - src/iceberg/manifest_reader.cc | 3 +-- src/iceberg/manifest_writer.h | 5 ++--- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/iceberg/inheritable_metadata.cc b/src/iceberg/inheritable_metadata.cc index dac7e921d..606933237 100644 --- a/src/iceberg/inheritable_metadata.cc +++ b/src/iceberg/inheritable_metadata.cc @@ -25,7 +25,6 @@ #include "iceberg/manifest_entry.h" #include "iceberg/manifest_list.h" #include "iceberg/snapshot.h" -#include "iceberg/type_fwd.h" namespace iceberg { diff --git a/src/iceberg/manifest_reader.cc b/src/iceberg/manifest_reader.cc index 3f62b19f8..42606fbf7 100644 --- a/src/iceberg/manifest_reader.cc +++ b/src/iceberg/manifest_reader.cc @@ -19,12 +19,11 @@ #include "iceberg/manifest_reader.h" -#include - #include "iceberg/manifest_entry.h" #include "iceberg/manifest_list.h" #include "iceberg/manifest_reader_internal.h" #include "iceberg/schema.h" +#include "iceberg/schema_internal.h" #include "iceberg/util/macros.h" namespace iceberg { diff --git a/src/iceberg/manifest_writer.h b/src/iceberg/manifest_writer.h index a946136c5..d77309d6f 100644 --- a/src/iceberg/manifest_writer.h +++ b/src/iceberg/manifest_writer.h @@ -25,9 +25,8 @@ #include #include +#include "iceberg/file_writer.h" #include "iceberg/iceberg_export.h" -#include "iceberg/manifest_entry.h" -#include "iceberg/type_fwd.h" namespace iceberg { @@ -43,7 +42,7 @@ class ICEBERG_EXPORT ManifestWriter { /// \param file_io File IO implementation to use. /// \return A Result containing the writer or an error. static Result> MakeWriter( - std::string_view manifest_location, std::shared_ptr file_io, + std::string_view manifest_location, std::shared_ptr file_io, std::shared_ptr partition_schema); }; From 84d62bd4838ba0b41db3c07a1ea90c121c8ec567 Mon Sep 17 00:00:00 2001 From: HeartLinked Date: Wed, 20 Aug 2025 14:25:35 +0800 Subject: [PATCH 14/16] v1 --- src/iceberg/manifest_writer.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/iceberg/manifest_writer.h b/src/iceberg/manifest_writer.h index d77309d6f..3c5091b3d 100644 --- a/src/iceberg/manifest_writer.h +++ b/src/iceberg/manifest_writer.h @@ -27,6 +27,7 @@ #include "iceberg/file_writer.h" #include "iceberg/iceberg_export.h" +#include "iceberg/type_fwd.h" namespace iceberg { @@ -42,7 +43,7 @@ class ICEBERG_EXPORT ManifestWriter { /// \param file_io File IO implementation to use. /// \return A Result containing the writer or an error. static Result> MakeWriter( - std::string_view manifest_location, std::shared_ptr file_io, + std::string_view manifest_location, std::shared_ptr file_io, std::shared_ptr partition_schema); }; From 82fe891e9c447fc7292efc3615e6b02681e59776 Mon Sep 17 00:00:00 2001 From: HeartLinked Date: Wed, 20 Aug 2025 14:30:16 +0800 Subject: [PATCH 15/16] fmt --- src/iceberg/inheritable_metadata.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/iceberg/inheritable_metadata.cc b/src/iceberg/inheritable_metadata.cc index 606933237..366cf9fea 100644 --- a/src/iceberg/inheritable_metadata.cc +++ b/src/iceberg/inheritable_metadata.cc @@ -44,7 +44,7 @@ Status BaseInheritableMetadata::Apply(ManifestEntry& entry) { // In v1 tables, the data sequence number is not persisted and can be safely defaulted // to 0. // In v2 tables, the data sequence number should be inherited iff the entry status - // is ADDED + // is ADDED. if (!entry.sequence_number.has_value() && (sequence_number_ == 0 || entry.status == ManifestStatus::kAdded)) { entry.sequence_number = sequence_number_; @@ -52,8 +52,8 @@ Status BaseInheritableMetadata::Apply(ManifestEntry& entry) { // In v1 tables, the file sequence number is not persisted and can be safely defaulted // to 0. - // In v2 tables, the file sequence number should be inherited iff the entry status - // is ADDED + // In v2 tables, the file sequence number should be inherited iff the entry status + // is ADDED. if (!entry.file_sequence_number.has_value() && (sequence_number_ == 0 || entry.status == ManifestStatus::kAdded)) { entry.file_sequence_number = sequence_number_; From 2de3512fafbdff3b0cd0e001a0f45067b20e6b02 Mon Sep 17 00:00:00 2001 From: Li Feiyang Date: Mon, 25 Aug 2025 17:50:11 +0800 Subject: [PATCH 16/16] fix review --- src/iceberg/inheritable_metadata.cc | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/iceberg/inheritable_metadata.cc b/src/iceberg/inheritable_metadata.cc index 366cf9fea..ae0920873 100644 --- a/src/iceberg/inheritable_metadata.cc +++ b/src/iceberg/inheritable_metadata.cc @@ -22,6 +22,8 @@ #include #include +#include + #include "iceberg/manifest_entry.h" #include "iceberg/manifest_list.h" #include "iceberg/snapshot.h" @@ -41,18 +43,18 @@ Status BaseInheritableMetadata::Apply(ManifestEntry& entry) { entry.snapshot_id = snapshot_id_; } - // In v1 tables, the data sequence number is not persisted and can be safely defaulted + // In v1 metadata, the data sequence number is not persisted and can be safely defaulted // to 0. - // In v2 tables, the data sequence number should be inherited iff the entry status + // In v2 metadata, the data sequence number should be inherited iff the entry status // is ADDED. if (!entry.sequence_number.has_value() && (sequence_number_ == 0 || entry.status == ManifestStatus::kAdded)) { entry.sequence_number = sequence_number_; } - // In v1 tables, the file sequence number is not persisted and can be safely defaulted + // In v1 metadata, the file sequence number is not persisted and can be safely defaulted // to 0. - // In v2 tables, the file sequence number should be inherited iff the entry status + // In v2 metadata, the file sequence number should be inherited iff the entry status // is ADDED. if (!entry.file_sequence_number.has_value() && (sequence_number_ == 0 || entry.status == ManifestStatus::kAdded)) { @@ -61,6 +63,8 @@ Status BaseInheritableMetadata::Apply(ManifestEntry& entry) { if (entry.data_file) { entry.data_file->partition_spec_id = spec_id_; + } else { + return InvalidManifest("Manifest entry has no data file"); } return {};