1919
2020#include " iceberg/manifest_adapter.h"
2121
22+ #include < memory>
2223#include < utility>
2324
2425#include < nanoarrow/nanoarrow.h>
2526
2627#include " iceberg/arrow/nanoarrow_status_internal.h"
2728#include " iceberg/manifest_entry.h"
2829#include " iceberg/manifest_list.h"
30+ #include " iceberg/partition_summary_internal.h"
2931#include " iceberg/result.h"
3032#include " iceberg/schema.h"
3133#include " iceberg/schema_internal.h"
@@ -141,10 +143,12 @@ Result<ArrowArray*> ManifestAdapter::FinishAppending() {
141143 return &array_;
142144}
143145
144- ManifestEntryAdapter::ManifestEntryAdapter (std::shared_ptr<PartitionSpec> partition_spec,
146+ ManifestEntryAdapter::ManifestEntryAdapter (std::optional<int64_t > snapshot_id_,
147+ std::shared_ptr<PartitionSpec> partition_spec,
145148 std::shared_ptr<Schema> current_schema,
146149 ManifestContent content)
147- : partition_spec_(std::move(partition_spec)),
150+ : snapshot_id_(snapshot_id_),
151+ partition_spec_ (std::move(partition_spec)),
148152 current_schema_(std::move(current_schema)),
149153 content_(content) {
150154 if (!partition_spec_) {
@@ -161,6 +165,110 @@ ManifestEntryAdapter::~ManifestEntryAdapter() {
161165 }
162166}
163167
168+ Status ManifestEntryAdapter::AddEntry (ManifestEntry& entry) {
169+ ICEBERG_RETURN_UNEXPECTED (CheckDataFile (*entry.data_file ));
170+ entry.status = ManifestStatus::kAdded ;
171+ entry.snapshot_id = snapshot_id_;
172+ if (entry.sequence_number .has_value () &&
173+ entry.sequence_number .value () < TableMetadata::kInitialSequenceNumber ) {
174+ entry.sequence_number = std::nullopt ;
175+ }
176+ entry.file_sequence_number = std::nullopt ;
177+ return AddEntryInternal (entry);
178+ }
179+
180+ Status ManifestEntryAdapter::AddDeleteEntry (ManifestEntry& entry) {
181+ ICEBERG_RETURN_UNEXPECTED (CheckDataFile (*entry.data_file ));
182+ entry.status = ManifestStatus::kDeleted ;
183+ entry.snapshot_id = snapshot_id_;
184+ return AddEntryInternal (entry);
185+ }
186+
187+ Status ManifestEntryAdapter::AddExistingEntry (ManifestEntry& entry) {
188+ ICEBERG_RETURN_UNEXPECTED (CheckDataFile (*entry.data_file ));
189+ entry.status = ManifestStatus::kExisting ;
190+ return AddEntryInternal (entry);
191+ }
192+
193+ ManifestFile ManifestEntryAdapter::ToManifestFile () const {
194+ ManifestFile manifest_file;
195+ manifest_file.partition_spec_id = partition_spec_->spec_id ();
196+ manifest_file.content = content_;
197+ // sequence_number and min_sequence_number with kInvalidSequenceNumber will be
198+ // replace with real sequence number in `ManifestListWriter`.
199+ manifest_file.sequence_number = TableMetadata::kInvalidSequenceNumber ;
200+ manifest_file.min_sequence_number =
201+ min_sequence_number_.value_or (TableMetadata::kInvalidSequenceNumber );
202+ manifest_file.existing_files_count = existing_files_count_;
203+ manifest_file.added_snapshot_id = snapshot_id_.value_or (Snapshot::kInvalidSnapshotId );
204+ manifest_file.added_files_count = add_files_count_;
205+ manifest_file.existing_files_count = existing_files_count_;
206+ manifest_file.deleted_files_count = delete_files_count_;
207+ manifest_file.added_rows_count = add_rows_count_;
208+ manifest_file.existing_rows_count = existing_rows_count_;
209+ manifest_file.deleted_rows_count = delete_rows_count_;
210+ manifest_file.partitions = std::move (partition_summary_->Summaries ());
211+ return manifest_file;
212+ }
213+
214+ Status ManifestEntryAdapter::CheckDataFile (const DataFile& file) const {
215+ switch (content_) {
216+ case ManifestContent::kData :
217+ if (file.content != DataFile::Content::kData ) {
218+ return InvalidArgument (
219+ " Manifest content type: data, data file content should be: data, but got: {}" ,
220+ ToString (file.content ));
221+ }
222+ break ;
223+ case ManifestContent::kDeletes :
224+ if (file.content != DataFile::Content::kPositionDeletes &&
225+ file.content != DataFile::Content::kEqualityDeletes ) {
226+ return InvalidArgument (
227+ " Manifest content type: deletes, data file content should be: "
228+ " position_deletes or equality_deletes, but got: {}" ,
229+ ToString (file.content ));
230+ }
231+ break ;
232+ default :
233+ std::unreachable ();
234+ }
235+ return {};
236+ }
237+
238+ Status ManifestEntryAdapter::AddEntryInternal (const ManifestEntry& entry) {
239+ if (entry.data_file == nullptr ) [[unlikely]] {
240+ return InvalidManifest (" Missing required data_file field from manifest entry." );
241+ }
242+
243+ switch (entry.status ) {
244+ case ManifestStatus::kAdded :
245+ add_files_count_++;
246+ add_rows_count_ += entry.data_file ->record_count ;
247+ break ;
248+ case ManifestStatus::kExisting :
249+ existing_files_count_++;
250+ existing_rows_count_ += entry.data_file ->record_count ;
251+ break ;
252+ case ManifestStatus::kDeleted :
253+ delete_files_count_++;
254+ delete_rows_count_ += entry.data_file ->record_count ;
255+ break ;
256+ default :
257+ std::unreachable ();
258+ }
259+
260+ partition_summary_->Update (entry.data_file ->partition );
261+
262+ if (entry.IsAlive () && entry.sequence_number .has_value ()) {
263+ if (!min_sequence_number_.has_value () ||
264+ entry.sequence_number .value () < min_sequence_number_.value ()) {
265+ min_sequence_number_ = entry.sequence_number .value ();
266+ }
267+ }
268+
269+ return AppendInternal (entry);
270+ }
271+
164272Status ManifestEntryAdapter::AppendPartitionValues (
165273 ArrowArray* array, const std::shared_ptr<StructType>& partition_type,
166274 const std::vector<Literal>& partition_values) {
0 commit comments