Skip to content

Commit 5841811

Browse files
committed
fix more review comments
Signed-off-by: Junwang Zhao <[email protected]>
1 parent d69d437 commit 5841811

File tree

3 files changed

+88
-59
lines changed

3 files changed

+88
-59
lines changed

src/iceberg/manifest_entry.h

Lines changed: 41 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ struct ICEBERG_EXPORT DataFile {
138138
std::map<int32_t, std::vector<uint8_t>> upper_bounds;
139139
/// Field id: 131
140140
/// Implementation-specific key metadata for encryption
141-
std::optional<std::vector<uint8_t>> key_metadata;
141+
std::vector<uint8_t> key_metadata;
142142
/// Field id: 132
143143
/// Element Field id: 133
144144
/// Split offsets for the data file. For example, all row group offsets in a Parquet
@@ -190,78 +190,95 @@ struct ICEBERG_EXPORT DataFile {
190190
/// present
191191
std::optional<int64_t> content_size_in_bytes;
192192

193-
inline static const SchemaField kContent =
194-
SchemaField::MakeRequired(134, "content", std::make_shared<IntType>());
195-
inline static const SchemaField kFilePath =
196-
SchemaField::MakeRequired(100, "file_path", std::make_shared<StringType>());
193+
inline static const SchemaField kContent = SchemaField::MakeRequired(
194+
134, "content", std::make_shared<IntType>(),
195+
"Contents of the file: 0=data, 1=position deletes, 2=equality deletes");
196+
inline static const SchemaField kFilePath = SchemaField::MakeRequired(
197+
100, "file_path", std::make_shared<StringType>(), "Location URI with FS scheme");
197198
inline static const SchemaField kFileFormat =
198-
SchemaField::MakeRequired(101, "file_format", std::make_shared<IntType>());
199-
inline static const SchemaField kRecordCount =
200-
SchemaField::MakeRequired(103, "record_count", std::make_shared<LongType>());
199+
SchemaField::MakeRequired(101, "file_format", std::make_shared<IntType>(),
200+
"File format name: avro, orc, or parquet");
201+
inline static const SchemaField kRecordCount = SchemaField::MakeRequired(
202+
103, "record_count", std::make_shared<LongType>(), "Number of records in the file");
201203
inline static const SchemaField kFileSize =
202-
SchemaField::MakeRequired(104, "file_size_in_bytes", std::make_shared<LongType>());
204+
SchemaField::MakeRequired(104, "file_size_in_bytes", std::make_shared<LongType>(),
205+
"Total file size in bytes");
203206
inline static const SchemaField kColumnSizes = SchemaField::MakeOptional(
204207
108, "column_sizes",
205208
std::make_shared<MapType>(
206209
SchemaField::MakeRequired(117, std::string(MapType::kKeyName),
207210
std::make_shared<IntType>()),
208211
SchemaField::MakeRequired(118, std::string(MapType::kValueName),
209-
std::make_shared<LongType>())));
212+
std::make_shared<LongType>())),
213+
"Map of column id to total size on disk");
210214
inline static const SchemaField kValueCounts = SchemaField::MakeOptional(
211215
109, "value_counts",
212216
std::make_shared<MapType>(
213217
SchemaField::MakeRequired(119, std::string(MapType::kKeyName),
214218
std::make_shared<IntType>()),
215219
SchemaField::MakeRequired(120, std::string(MapType::kValueName),
216-
std::make_shared<LongType>())));
220+
std::make_shared<LongType>())),
221+
"Map of column id to total count, including null and NaN");
217222
inline static const SchemaField kNullValueCounts = SchemaField::MakeOptional(
218223
110, "null_value_counts",
219224
std::make_shared<MapType>(
220225
SchemaField::MakeRequired(121, std::string(MapType::kKeyName),
221226
std::make_shared<IntType>()),
222227
SchemaField::MakeRequired(122, std::string(MapType::kValueName),
223-
std::make_shared<LongType>())));
228+
std::make_shared<LongType>())),
229+
"Map of column id to null value count");
224230
inline static const SchemaField kNanValueCounts = SchemaField::MakeOptional(
225231
137, "nan_value_counts",
226232
std::make_shared<MapType>(
227233
SchemaField::MakeRequired(138, std::string(MapType::kKeyName),
228234
std::make_shared<IntType>()),
229235
SchemaField::MakeRequired(139, std::string(MapType::kValueName),
230-
std::make_shared<LongType>())));
236+
std::make_shared<LongType>())),
237+
"Map of column id to number of NaN values in the column");
231238
inline static const SchemaField kLowerBounds = SchemaField::MakeOptional(
232239
125, "lower_bounds",
233240
std::make_shared<MapType>(
234241
SchemaField::MakeRequired(126, std::string(MapType::kKeyName),
235242
std::make_shared<IntType>()),
236243
SchemaField::MakeRequired(127, std::string(MapType::kValueName),
237-
std::make_shared<BinaryType>())));
244+
std::make_shared<BinaryType>())),
245+
"Map of column id to lower bound");
238246
inline static const SchemaField kUpperBounds = SchemaField::MakeOptional(
239247
128, "upper_bounds",
240248
std::make_shared<MapType>(
241249
SchemaField::MakeRequired(129, std::string(MapType::kKeyName),
242250
std::make_shared<IntType>()),
243251
SchemaField::MakeRequired(130, std::string(MapType::kValueName),
244-
std::make_shared<BinaryType>())));
252+
std::make_shared<BinaryType>())),
253+
"Map of column id to upper bound");
245254
inline static const SchemaField kKeyMetadata =
246-
SchemaField::MakeOptional(131, "key_metadata", std::make_shared<BinaryType>());
255+
SchemaField::MakeOptional(131, "key_metadata", std::make_shared<BinaryType>(),
256+
"Encryption key metadata blob");
247257
inline static const SchemaField kSplitOffsets = SchemaField::MakeOptional(
248258
132, "split_offsets",
249259
std::make_shared<ListType>(SchemaField::MakeRequired(
250-
133, std::string(ListType::kElementName), std::make_shared<LongType>())));
260+
133, std::string(ListType::kElementName), std::make_shared<LongType>())),
261+
"Splittable offsets");
251262
inline static const SchemaField kEqualityIds = SchemaField::MakeOptional(
252263
135, "equality_ids",
253264
std::make_shared<ListType>(SchemaField::MakeRequired(
254-
136, std::string(ListType::kElementName), std::make_shared<IntType>())));
255-
inline static const SchemaField kSortOrderId =
256-
SchemaField::MakeOptional(140, "sort_order_id", std::make_shared<IntType>());
265+
136, std::string(ListType::kElementName), std::make_shared<IntType>())),
266+
"Equality comparison field IDs");
267+
inline static const SchemaField kSortOrderId = SchemaField::MakeOptional(
268+
140, "sort_order_id", std::make_shared<IntType>(), "Sort order ID");
257269
inline static const SchemaField kFirstRowId =
258-
SchemaField::MakeOptional(142, "first_row_id", std::make_shared<LongType>());
270+
SchemaField::MakeOptional(142, "first_row_id", std::make_shared<LongType>(),
271+
"Starting row ID to assign to new rows");
259272
inline static const SchemaField kReferencedDataFile = SchemaField::MakeOptional(
260-
143, "referenced_data_file", std::make_shared<StringType>());
273+
143, "referenced_data_file", std::make_shared<StringType>(),
274+
"Fully qualified location (URI with FS scheme) of a data file that all deletes "
275+
"reference");
261276
inline static const SchemaField kContentOffset =
262-
SchemaField::MakeOptional(144, "content_offset", std::make_shared<LongType>());
277+
SchemaField::MakeOptional(144, "content_offset", std::make_shared<LongType>(),
278+
"The offset in the file where the content starts");
263279
inline static const SchemaField kContentSize = SchemaField::MakeOptional(
264-
145, "content_size_in_bytes", std::make_shared<LongType>());
280+
145, "content_size_in_bytes", std::make_shared<LongType>(),
281+
"The length of referenced content stored in the file");
265282

266283
static std::shared_ptr<StructType> Type(std::shared_ptr<StructType> partition_type);
267284
};

src/iceberg/manifest_list.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,22 +26,22 @@
2626
namespace iceberg {
2727

2828
const StructType& PartitionFieldSummary::Type() {
29-
static const std::shared_ptr<StructType> instance{new StructType({
29+
static const StructType kInstance{{
3030
PartitionFieldSummary::kConsTainsNull,
3131
PartitionFieldSummary::kContainsNaN,
3232
PartitionFieldSummary::kLowerBound,
3333
PartitionFieldSummary::kUpperBound,
34-
})};
35-
return *instance;
34+
}};
35+
return kInstance;
3636
}
3737

3838
const StructType& ManifestFile::Type() {
39-
static const std::shared_ptr<StructType> instance{new StructType(
39+
static const StructType kInstance(
4040
{kManifestPath, kManifestLength, kPartitionSpecId, kContent, kSequenceNumber,
4141
kMinSequenceNumber, kAddedSnapshotId, kAddedFilesCount, kExistingFilesCount,
4242
kDeletedFilesCount, kAddedRowsCount, kExistingRowsCount, kDeletedRowsCount,
43-
kPartitions, kKeyMetadata, kFirstRowId})};
44-
return *instance;
43+
kPartitions, kKeyMetadata, kFirstRowId});
44+
return kInstance;
4545
}
4646

4747
} // namespace iceberg

src/iceberg/manifest_list.h

Lines changed: 41 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,17 @@ struct ICEBERG_EXPORT PartitionFieldSummary {
8181
std::optional<std::vector<uint8_t>> upper_bound;
8282

8383
inline static const SchemaField kConsTainsNull =
84-
SchemaField::MakeRequired(509, "contains_null", std::make_shared<BooleanType>());
84+
SchemaField::MakeRequired(509, "contains_null", std::make_shared<BooleanType>(),
85+
"True if any file has a null partition value");
8586
inline static const SchemaField kContainsNaN =
86-
SchemaField::MakeOptional(518, "contains_nan", std::make_shared<BooleanType>());
87+
SchemaField::MakeOptional(518, "contains_nan", std::make_shared<BooleanType>(),
88+
"True if any file has a nan partition value");
8789
inline static const SchemaField kLowerBound =
88-
SchemaField::MakeOptional(510, "lower_bound", std::make_shared<BinaryType>());
90+
SchemaField::MakeOptional(510, "lower_bound", std::make_shared<BinaryType>(),
91+
"Partition lower bound for all files");
8992
inline static const SchemaField kUpperBound =
90-
SchemaField::MakeOptional(511, "upper_bound", std::make_shared<BinaryType>());
93+
SchemaField::MakeOptional(511, "upper_bound", std::make_shared<BinaryType>(),
94+
"Partition upper bound for all files");
9195

9296
static const StructType& Type();
9397
};
@@ -165,40 +169,48 @@ struct ICEBERG_EXPORT ManifestFile {
165169
bool has_deleted_files() const { return deleted_files_count.value_or(-1) > 0; }
166170

167171
inline static const SchemaField kManifestPath =
168-
SchemaField::MakeRequired(500, "manifest_path", std::make_shared<StringType>());
169-
inline static const SchemaField kManifestLength =
170-
SchemaField::MakeRequired(501, "manifest_length", std::make_shared<LongType>());
171-
inline static const SchemaField kPartitionSpecId =
172-
SchemaField::MakeRequired(502, "partition_spec_id", std::make_shared<IntType>());
172+
SchemaField::MakeRequired(500, "manifest_path", std::make_shared<StringType>(),
173+
"Location URI with FS scheme");
174+
inline static const SchemaField kManifestLength = SchemaField::MakeRequired(
175+
501, "manifest_length", std::make_shared<LongType>(), "Total file size in bytes");
176+
inline static const SchemaField kPartitionSpecId = SchemaField::MakeRequired(
177+
502, "partition_spec_id", std::make_shared<IntType>(), "Spec ID used to write");
173178
inline static const SchemaField kContent =
174-
SchemaField::MakeOptional(517, "content", std::make_shared<IntType>());
179+
SchemaField::MakeOptional(517, "content", std::make_shared<IntType>(),
180+
"Contents of the manifest: 0=data, 1=deletes");
175181
inline static const SchemaField kSequenceNumber =
176-
SchemaField::MakeOptional(515, "sequence_number", std::make_shared<LongType>());
182+
SchemaField::MakeOptional(515, "sequence_number", std::make_shared<LongType>(),
183+
"Sequence number when the manifest was added");
177184
inline static const SchemaField kMinSequenceNumber =
178-
SchemaField::MakeOptional(516, "min_sequence_number", std::make_shared<LongType>());
185+
SchemaField::MakeOptional(516, "min_sequence_number", std::make_shared<LongType>(),
186+
"Lowest sequence number in the manifest");
179187
inline static const SchemaField kAddedSnapshotId =
180-
SchemaField::MakeRequired(503, "added_snapshot_id", std::make_shared<LongType>());
181-
inline static const SchemaField kAddedFilesCount =
182-
SchemaField::MakeOptional(504, "added_files_count", std::make_shared<IntType>());
183-
inline static const SchemaField kExistingFilesCount =
184-
SchemaField::MakeOptional(505, "existing_files_count", std::make_shared<IntType>());
185-
inline static const SchemaField kDeletedFilesCount =
186-
SchemaField::MakeOptional(506, "deleted_files_count", std::make_shared<IntType>());
187-
inline static const SchemaField kAddedRowsCount =
188-
SchemaField::MakeOptional(512, "added_rows_count", std::make_shared<LongType>());
189-
inline static const SchemaField kExistingRowsCount =
190-
SchemaField::MakeOptional(513, "existing_rows_count", std::make_shared<LongType>());
191-
inline static const SchemaField kDeletedRowsCount =
192-
SchemaField::MakeOptional(514, "deleted_rows_count", std::make_shared<LongType>());
188+
SchemaField::MakeRequired(503, "added_snapshot_id", std::make_shared<LongType>(),
189+
"Snapshot ID that added the manifest");
190+
inline static const SchemaField kAddedFilesCount = SchemaField::MakeOptional(
191+
504, "added_files_count", std::make_shared<IntType>(), "Added entry count");
192+
inline static const SchemaField kExistingFilesCount = SchemaField::MakeOptional(
193+
505, "existing_files_count", std::make_shared<IntType>(), "Existing entry count");
194+
inline static const SchemaField kDeletedFilesCount = SchemaField::MakeOptional(
195+
506, "deleted_files_count", std::make_shared<IntType>(), "Deleted entry count");
196+
inline static const SchemaField kAddedRowsCount = SchemaField::MakeOptional(
197+
512, "added_rows_count", std::make_shared<LongType>(), "Added rows count");
198+
inline static const SchemaField kExistingRowsCount = SchemaField::MakeOptional(
199+
513, "existing_rows_count", std::make_shared<LongType>(), "Existing rows count");
200+
inline static const SchemaField kDeletedRowsCount = SchemaField::MakeOptional(
201+
514, "deleted_rows_count", std::make_shared<LongType>(), "Deleted rows count");
193202
inline static const SchemaField kPartitions = SchemaField::MakeOptional(
194203
507, "partitions",
195204
std::make_shared<ListType>(SchemaField::MakeRequired(
196205
508, std::string(ListType::kElementName),
197-
std::make_shared<StructType>(PartitionFieldSummary::Type()))));
206+
std::make_shared<StructType>(PartitionFieldSummary::Type()))),
207+
"Summary for each partition");
198208
inline static const SchemaField kKeyMetadata =
199-
SchemaField::MakeOptional(519, "key_metadata", std::make_shared<BinaryType>());
200-
inline static const SchemaField kFirstRowId =
201-
SchemaField::MakeOptional(520, "first_row_id", std::make_shared<LongType>());
209+
SchemaField::MakeOptional(519, "key_metadata", std::make_shared<BinaryType>(),
210+
"Encryption key metadata blob");
211+
inline static const SchemaField kFirstRowId = SchemaField::MakeOptional(
212+
520, "first_row_id", std::make_shared<LongType>(),
213+
"Starting row ID to assign to new rows in ADDED data files");
202214

203215
static const StructType& Type();
204216
};

0 commit comments

Comments
 (0)