Skip to content

Commit 9177557

Browse files
zhjwpkuFokko
andauthored
feat: add manifest related structures (#91)
Add DataFile, ManifestEntry, ManifestFile, and ManifestList to Iceberg core. Support for parsing these data structures from Avro file will be added in future PRs. --------- Signed-off-by: Junwang Zhao <[email protected]> Co-authored-by: Fokko Driesprong <[email protected]>
1 parent 1c4c047 commit 9177557

File tree

8 files changed

+695
-2
lines changed

8 files changed

+695
-2
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ set(ICEBERG_SOURCES
2323
expression/expression.cc
2424
file_reader.cc
2525
json_internal.cc
26+
manifest_entry.cc
27+
manifest_list.cc
2628
metadata_columns.cc
2729
name_mapping.cc
2830
partition_field.cc

src/iceberg/file_format.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <string_view>
2626

2727
#include "iceberg/iceberg_export.h"
28+
#include "iceberg/result.h"
2829

2930
namespace iceberg {
3031

@@ -50,4 +51,14 @@ ICEBERG_EXPORT inline std::string_view ToString(FileFormatType format_type) {
5051
}
5152
}
5253

54+
/// \brief Convert a string to a FileFormatType
55+
ICEBERG_EXPORT constexpr Result<FileFormatType> FileFormatTypeFromString(
56+
std::string_view str) noexcept {
57+
if (str == "parquet") return FileFormatType::kParquet;
58+
if (str == "avro") return FileFormatType::kAvro;
59+
if (str == "orc") return FileFormatType::kOrc;
60+
if (str == "puffin") return FileFormatType::kPuffin;
61+
return InvalidArgument("Invalid file format type: {}", str);
62+
}
63+
5364
} // namespace iceberg

src/iceberg/manifest_entry.cc

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/manifest_entry.h"
21+
22+
#include <memory>
23+
#include <vector>
24+
25+
#include "iceberg/schema_field.h"
26+
#include "iceberg/type.h"
27+
28+
namespace iceberg {
29+
30+
std::shared_ptr<StructType> DataFile::Type(std::shared_ptr<StructType> partition_type) {
31+
return std::make_shared<StructType>(std::vector<SchemaField>{
32+
kContent,
33+
kFilePath,
34+
kFileFormat,
35+
SchemaField::MakeRequired(102, "partition", std::move(partition_type)),
36+
kRecordCount,
37+
kFileSize,
38+
kColumnSizes,
39+
kValueCounts,
40+
kNullValueCounts,
41+
kNanValueCounts,
42+
kLowerBounds,
43+
kUpperBounds,
44+
kKeyMetadata,
45+
kSplitOffsets,
46+
kEqualityIds,
47+
kSortOrderId,
48+
kFirstRowId,
49+
kReferencedDataFile,
50+
kContentOffset,
51+
kContentSize});
52+
}
53+
54+
std::shared_ptr<StructType> ManifestEntry::TypeFromPartitionType(
55+
std::shared_ptr<StructType> partition_type) {
56+
return TypeFromDataFileType(DataFile::Type(std::move(partition_type)));
57+
}
58+
59+
std::shared_ptr<StructType> ManifestEntry::TypeFromDataFileType(
60+
std::shared_ptr<StructType> datafile_type) {
61+
return std::make_shared<StructType>(std::vector<SchemaField>{
62+
kStatus, kSnapshotId, kSequenceNumber, kFileSequenceNumber,
63+
SchemaField::MakeRequired(2, "data_file", std::move(datafile_type))});
64+
}
65+
66+
} // namespace iceberg

0 commit comments

Comments
 (0)