Skip to content

Commit aaf679a

Browse files
committed
fix: implement manifest entry metadata inheritance system
- Add InheritableMetadata interface and implementations for applying metadata inheritance rules to manifest entries - Fix ManifestReader::Make methods to properly handle Result types from InheritableMetadataFactory methods - Move constructor/destructor definitions from header to implementation to avoid redefinition errors - Add missing ManifestListReaderImpl::Files() method implementation - Update method names from MakeReader to Make for consistency This ensures proper inheritance of snapshot_id, sequence_number, and file_sequence_number from manifest metadata to individual entries according to Iceberg specification.
1 parent a54f116 commit aaf679a

File tree

6 files changed

+284
-10
lines changed

6 files changed

+284
-10
lines changed
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/inheritable_metadata.h"
21+
22+
#include <cassert>
23+
#include <utility>
24+
25+
#include <iceberg/result.h>
26+
27+
#include "iceberg/manifest_entry.h"
28+
29+
namespace iceberg {
30+
31+
BaseInheritableMetadata::BaseInheritableMetadata(int32_t spec_id, int64_t snapshot_id,
32+
int64_t sequence_number,
33+
std::string manifest_location)
34+
: spec_id_(spec_id),
35+
snapshot_id_(snapshot_id),
36+
sequence_number_(sequence_number),
37+
manifest_location_(std::move(manifest_location)) {}
38+
39+
Result<ManifestEntry> BaseInheritableMetadata::Apply(ManifestEntry entry) {
40+
if (!entry.snapshot_id.has_value()) {
41+
entry.snapshot_id = snapshot_id_;
42+
}
43+
44+
// In v1 tables, the data sequence number is not persisted and can be safely defaulted
45+
// to 0 In v2 tables, the data sequence number should be inherited iff the entry status
46+
// is ADDED
47+
if (!entry.sequence_number.has_value() &&
48+
(sequence_number_ == 0 || entry.status == ManifestStatus::kAdded)) {
49+
entry.sequence_number = sequence_number_;
50+
}
51+
52+
// In v1 tables, the file sequence number is not persisted and can be safely defaulted
53+
// to 0 In v2 tables, the file sequence number should be inherited iff the entry status
54+
// is ADDED
55+
if (!entry.file_sequence_number.has_value() &&
56+
(sequence_number_ == 0 || entry.status == ManifestStatus::kAdded)) {
57+
entry.file_sequence_number = sequence_number_;
58+
}
59+
60+
if (entry.data_file) {
61+
entry.data_file->partition_spec_id = spec_id_;
62+
}
63+
64+
return entry;
65+
}
66+
67+
Result<ManifestEntry> EmptyInheritableMetadata::Apply(ManifestEntry entry) {
68+
return entry;
69+
}
70+
71+
CopyInheritableMetadata::CopyInheritableMetadata(int64_t snapshot_id)
72+
: snapshot_id_(snapshot_id) {}
73+
74+
Result<ManifestEntry> CopyInheritableMetadata::Apply(ManifestEntry entry) {
75+
if (!entry.snapshot_id.has_value()) {
76+
entry.snapshot_id = snapshot_id_;
77+
}
78+
return entry;
79+
}
80+
81+
Result<std::unique_ptr<InheritableMetadata>> InheritableMetadataFactory::Empty() {
82+
return std::make_unique<EmptyInheritableMetadata>();
83+
}
84+
85+
Result<std::unique_ptr<InheritableMetadata>> InheritableMetadataFactory::FromManifest(
86+
const ManifestFile& manifest) {
87+
// Validate that the manifest has a snapshot ID assigned
88+
if (manifest.added_snapshot_id == Snapshot::kInvalidSnapshotId) {
89+
return InvalidArgument("Manifest file {} has no snapshot ID", manifest.manifest_path);
90+
}
91+
92+
return std::make_unique<BaseInheritableMetadata>(
93+
manifest.partition_spec_id, manifest.added_snapshot_id, manifest.sequence_number,
94+
manifest.manifest_path);
95+
}
96+
97+
Result<std::unique_ptr<InheritableMetadata>> InheritableMetadataFactory::ForCopy(
98+
int64_t snapshot_id) {
99+
return std::make_unique<CopyInheritableMetadata>(snapshot_id);
100+
}
101+
102+
} // namespace iceberg

src/iceberg/inheritable_metadata.h

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
/// \file iceberg/inheritable_metadata.h
23+
/// Metadata inheritance system for manifest entries.
24+
25+
#include <cstdint>
26+
#include <memory>
27+
#include <string>
28+
29+
#include <iceberg/result.h>
30+
31+
#include "iceberg/iceberg_export.h"
32+
#include "iceberg/manifest_entry.h"
33+
#include "iceberg/manifest_list.h"
34+
35+
namespace iceberg {
36+
37+
/// \brief Interface for applying inheritable metadata to manifest entries.
38+
///
39+
/// When manifest entries have null values for certain fields (snapshot_id,
40+
/// data sequence number, file sequence number), these values should be inherited
41+
/// from the manifest file. This interface provides a way to apply such inheritance rules.
42+
class ICEBERG_EXPORT InheritableMetadata {
43+
public:
44+
virtual ~InheritableMetadata() = default;
45+
46+
/// \brief Apply inheritable metadata to a manifest entry.
47+
/// \param entry The manifest entry to modify.
48+
/// \return The modified manifest entry with inherited metadata applied.
49+
virtual Result<ManifestEntry> Apply(ManifestEntry entry) = 0;
50+
};
51+
52+
/// \brief Base implementation of InheritableMetadata that handles standard inheritance
53+
/// rules.
54+
class ICEBERG_EXPORT BaseInheritableMetadata : public InheritableMetadata {
55+
public:
56+
/// \brief Constructor for base inheritable metadata.
57+
/// \param spec_id Partition spec ID from the manifest.
58+
/// \param snapshot_id Snapshot ID from the manifest.
59+
/// \param sequence_number Sequence number from the manifest.
60+
/// \param manifest_location Path to the manifest file.
61+
BaseInheritableMetadata(int32_t spec_id, int64_t snapshot_id, int64_t sequence_number,
62+
std::string manifest_location);
63+
64+
/// \brief Apply inheritance rules to a manifest entry.
65+
/// \param entry The manifest entry to modify.
66+
/// \return The modified manifest entry.
67+
Result<ManifestEntry> Apply(ManifestEntry entry) override;
68+
69+
private:
70+
int32_t spec_id_;
71+
int64_t snapshot_id_;
72+
int64_t sequence_number_;
73+
std::string manifest_location_;
74+
};
75+
76+
/// \brief Empty implementation that applies no inheritance.
77+
class ICEBERG_EXPORT EmptyInheritableMetadata : public InheritableMetadata {
78+
public:
79+
/// \brief Apply no inheritance - returns the entry unchanged.
80+
Result<ManifestEntry> Apply(ManifestEntry entry) override;
81+
};
82+
83+
/// \brief Metadata inheritance for copying manifests before commit.
84+
class ICEBERG_EXPORT CopyInheritableMetadata : public InheritableMetadata {
85+
public:
86+
/// \brief Constructor for copy metadata.
87+
/// \param snapshot_id The snapshot ID to use for copying.
88+
explicit CopyInheritableMetadata(int64_t snapshot_id);
89+
90+
/// \brief Apply copy inheritance rules.
91+
Result<ManifestEntry> Apply(ManifestEntry entry) override;
92+
93+
private:
94+
int64_t snapshot_id_;
95+
};
96+
97+
/// \brief Factory for creating InheritableMetadata instances.
98+
class ICEBERG_EXPORT InheritableMetadataFactory {
99+
public:
100+
/// \brief Create an empty metadata instance that applies no inheritance.
101+
static Result<std::unique_ptr<InheritableMetadata>> Empty();
102+
103+
/// \brief Create metadata instance from a manifest file.
104+
/// \param manifest The manifest file to extract metadata from.
105+
/// \return Inheritable metadata based on the manifest.
106+
static Result<std::unique_ptr<InheritableMetadata>> FromManifest(
107+
const ManifestFile& manifest);
108+
109+
/// \brief Create metadata instance for rewriting a manifest before commit.
110+
/// \param snapshot_id The snapshot ID for the copy operation.
111+
/// \return Inheritable metadata for copying.
112+
static Result<std::unique_ptr<InheritableMetadata>> ForCopy(int64_t snapshot_id);
113+
114+
private:
115+
InheritableMetadataFactory() = default;
116+
};
117+
118+
} // namespace iceberg

src/iceberg/manifest_reader.cc

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
#include "iceberg/manifest_reader.h"
2121

22+
#include "iceberg/file_reader.h"
2223
#include "iceberg/manifest_entry.h"
2324
#include "iceberg/manifest_list.h"
2425
#include "iceberg/manifest_reader_internal.h"
@@ -27,7 +28,33 @@
2728

2829
namespace iceberg {
2930

30-
Result<std::unique_ptr<ManifestReader>> ManifestReader::MakeReader(
31+
Result<std::unique_ptr<ManifestReader>> ManifestReader::Make(
32+
const ManifestFile& manifest, std::shared_ptr<FileIO> file_io,
33+
std::shared_ptr<Schema> partition_schema) {
34+
// Validate manifest content type - only DATA manifests are supported by ManifestReader
35+
if (manifest.content != ManifestFile::Content::kData) {
36+
return InvalidArgument("Cannot read a delete manifest with a ManifestReader: {}",
37+
manifest.manifest_path);
38+
}
39+
40+
auto manifest_entry_schema = ManifestEntry::TypeFromPartitionType(partition_schema);
41+
auto fields_span = manifest_entry_schema->fields();
42+
std::vector<SchemaField> fields(fields_span.begin(), fields_span.end());
43+
auto schema = std::make_shared<Schema>(fields);
44+
ICEBERG_ASSIGN_OR_RAISE(
45+
auto reader,
46+
ReaderFactoryRegistry::Open(FileFormatType::kAvro, {.path = manifest.manifest_path,
47+
.io = std::move(file_io),
48+
.projection = schema}));
49+
// Create inheritable metadata for this manifest
50+
ICEBERG_ASSIGN_OR_RAISE(auto inheritable_metadata,
51+
InheritableMetadataFactory::FromManifest(manifest));
52+
53+
return std::make_unique<ManifestReaderImpl>(std::move(reader), std::move(schema),
54+
std::move(inheritable_metadata));
55+
}
56+
57+
Result<std::unique_ptr<ManifestReader>> ManifestReader::Make(
3158
std::string_view manifest_location, std::shared_ptr<FileIO> file_io,
3259
std::shared_ptr<Schema> partition_schema) {
3360
auto manifest_entry_schema = ManifestEntry::TypeFromPartitionType(partition_schema);
@@ -39,10 +66,12 @@ Result<std::unique_ptr<ManifestReader>> ManifestReader::MakeReader(
3966
{.path = std::string(manifest_location),
4067
.io = std::move(file_io),
4168
.projection = schema}));
42-
return std::make_unique<ManifestReaderImpl>(std::move(reader), std::move(schema));
69+
ICEBERG_ASSIGN_OR_RAISE(auto inheritable_metadata, InheritableMetadataFactory::Empty());
70+
return std::make_unique<ManifestReaderImpl>(std::move(reader), std::move(schema),
71+
std::move(inheritable_metadata));
4372
}
4473

45-
Result<std::unique_ptr<ManifestListReader>> ManifestListReader::MakeReader(
74+
Result<std::unique_ptr<ManifestListReader>> ManifestListReader::Make(
4675
std::string_view manifest_list_location, std::shared_ptr<FileIO> file_io) {
4776
std::vector<SchemaField> fields(ManifestFile::Type().fields().begin(),
4877
ManifestFile::Type().fields().end());

src/iceberg/manifest_reader.h

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727

2828
#include "iceberg/file_reader.h"
2929
#include "iceberg/iceberg_export.h"
30+
#include "iceberg/manifest_list.h"
3031
#include "iceberg/type_fwd.h"
3132

3233
namespace iceberg {
@@ -37,11 +38,21 @@ class ICEBERG_EXPORT ManifestReader {
3738
virtual ~ManifestReader() = default;
3839
virtual Result<std::vector<ManifestEntry>> Entries() const = 0;
3940

41+
/// \brief Creates a reader for a manifest file.
42+
/// \param manifest A ManifestFile object containing metadata about the manifest.
43+
/// \param file_io File IO implementation to use.
44+
/// \param partition_schema Schema for the partition.
45+
/// \return A Result containing the reader or an error.
46+
static Result<std::unique_ptr<ManifestReader>> Make(
47+
const ManifestFile& manifest, std::shared_ptr<FileIO> file_io,
48+
std::shared_ptr<Schema> partition_schema);
49+
4050
/// \brief Creates a reader for a manifest file.
4151
/// \param manifest_location Path to the manifest file.
4252
/// \param file_io File IO implementation to use.
53+
/// \param partition_schema Schema for the partition.
4354
/// \return A Result containing the reader or an error.
44-
static Result<std::unique_ptr<ManifestReader>> MakeReader(
55+
static Result<std::unique_ptr<ManifestReader>> Make(
4556
std::string_view manifest_location, std::shared_ptr<FileIO> file_io,
4657
std::shared_ptr<Schema> partition_schema);
4758
};
@@ -56,7 +67,7 @@ class ICEBERG_EXPORT ManifestListReader {
5667
/// \param manifest_list_location Path to the manifest list file.
5768
/// \param file_io File IO implementation to use.
5869
/// \return A Result containing the reader or an error.
59-
static Result<std::unique_ptr<ManifestListReader>> MakeReader(
70+
static Result<std::unique_ptr<ManifestListReader>> Make(
6071
std::string_view manifest_list_location, std::shared_ptr<FileIO> file_io);
6172
};
6273

src/iceberg/manifest_reader_internal.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "iceberg/schema.h"
3131
#include "iceberg/type.h"
3232
#include "iceberg/util/macros.h"
33+
#include "iceberg/util/visit_type.h"
3334

3435
namespace iceberg {
3536

@@ -541,6 +542,12 @@ Result<std::vector<ManifestEntry>> ManifestReaderImpl::Entries() const {
541542
break;
542543
}
543544
}
545+
546+
// Apply inheritance to all entries
547+
for (auto& entry : manifest_entries) {
548+
ICEBERG_ASSIGN_OR_RAISE(entry, inheritable_metadata_->Apply(std::move(entry)));
549+
}
550+
544551
return manifest_entries;
545552
}
546553

src/iceberg/manifest_reader_internal.h

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,30 +22,37 @@
2222
/// \file iceberg/internal/manifest_reader_internal.h
2323
/// Reader implementation for manifest list files and manifest files.
2424

25+
#include "iceberg/file_reader.h"
26+
#include "iceberg/inheritable_metadata.h"
2527
#include "iceberg/manifest_reader.h"
2628

2729
namespace iceberg {
2830

2931
/// \brief Read manifest entries from a manifest file.
3032
class ManifestReaderImpl : public ManifestReader {
3133
public:
32-
explicit ManifestReaderImpl(std::unique_ptr<Reader> reader,
33-
std::shared_ptr<Schema> schema)
34-
: schema_(std::move(schema)), reader_(std::move(reader)) {}
34+
ManifestReaderImpl(std::unique_ptr<Reader> reader, std::shared_ptr<Schema> schema,
35+
std::unique_ptr<InheritableMetadata> inheritable_metadata)
36+
: schema_(std::move(schema)),
37+
reader_(std::move(reader)),
38+
inheritable_metadata_(std::move(inheritable_metadata)) {}
39+
40+
ManifestReaderImpl() = default;
3541

3642
Result<std::vector<ManifestEntry>> Entries() const override;
3743

3844
private:
3945
std::shared_ptr<Schema> schema_;
4046
std::unique_ptr<Reader> reader_;
47+
std::unique_ptr<InheritableMetadata> inheritable_metadata_;
4148
};
4249

4350
/// \brief Read manifest files from a manifest list file.
4451
class ManifestListReaderImpl : public ManifestListReader {
4552
public:
46-
explicit ManifestListReaderImpl(std::unique_ptr<Reader> reader,
47-
std::shared_ptr<Schema> schema)
53+
ManifestListReaderImpl(std::unique_ptr<Reader> reader, std::shared_ptr<Schema> schema)
4854
: schema_(std::move(schema)), reader_(std::move(reader)) {}
55+
ManifestListReaderImpl() = default;
4956

5057
Result<std::vector<ManifestFile>> Files() const override;
5158

0 commit comments

Comments
 (0)