Skip to content

Commit 49bddfc

Browse files
committed
feat: add row-based immutable data structure
- Add StructLike, MapLike, and ArrayLike interfaces - Add wrapper for ManifestFile and ArrowArray
1 parent a54f116 commit 49bddfc

File tree

10 files changed

+901
-19
lines changed

10 files changed

+901
-19
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ set(ICEBERG_SOURCES
3131
name_mapping.cc
3232
partition_field.cc
3333
partition_spec.cc
34+
row/arrow_array_wrapper.cc
35+
row/manifest_wrapper.cc
3436
schema.cc
3537
schema_field.cc
3638
schema_internal.cc
@@ -98,6 +100,7 @@ iceberg_install_all_headers(iceberg)
98100

99101
add_subdirectory(catalog)
100102
add_subdirectory(expression)
103+
add_subdirectory(row)
101104
add_subdirectory(util)
102105

103106
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/iceberg_export.h

src/iceberg/manifest_reader_internal.cc

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -227,66 +227,67 @@ Result<std::vector<ManifestFile>> ParseManifestList(ArrowSchema* schema,
227227
auto field_name = field.value().get().name();
228228
bool required = !field.value().get().optional();
229229
auto view_of_column = array_view.children[idx];
230-
switch (idx) {
231-
case 0:
230+
ICEBERG_ASSIGN_OR_RAISE(auto manifest_file_field, ManifestFileFieldFromIndex(idx));
231+
switch (manifest_file_field) {
232+
case ManifestFileField::kManifestPath:
232233
PARSE_STRING_FIELD(manifest_files[row_idx].manifest_path, view_of_column);
233234
break;
234-
case 1:
235+
case ManifestFileField::kManifestLength:
235236
PARSE_PRIMITIVE_FIELD(manifest_files[row_idx].manifest_length, view_of_column,
236237
int64_t);
237238
break;
238-
case 2:
239+
case ManifestFileField::kPartitionSpecId:
239240
PARSE_PRIMITIVE_FIELD(manifest_files[row_idx].partition_spec_id, view_of_column,
240241
int32_t);
241242
break;
242-
case 3:
243+
case ManifestFileField::kContent:
243244
PARSE_PRIMITIVE_FIELD(manifest_files[row_idx].content, view_of_column,
244245
ManifestFile::Content);
245246
break;
246-
case 4:
247+
case ManifestFileField::kSequenceNumber:
247248
PARSE_PRIMITIVE_FIELD(manifest_files[row_idx].sequence_number, view_of_column,
248249
int64_t);
249250
break;
250-
case 5:
251+
case ManifestFileField::kMinSequenceNumber:
251252
PARSE_PRIMITIVE_FIELD(manifest_files[row_idx].min_sequence_number, view_of_column,
252253
int64_t);
253254
break;
254-
case 6:
255+
case ManifestFileField::kAddedSnapshotId:
255256
PARSE_PRIMITIVE_FIELD(manifest_files[row_idx].added_snapshot_id, view_of_column,
256257
int64_t);
257258
break;
258-
case 7:
259+
case ManifestFileField::kAddedFilesCount:
259260
PARSE_PRIMITIVE_FIELD(manifest_files[row_idx].added_files_count, view_of_column,
260261
int32_t);
261262
break;
262-
case 8:
263+
case ManifestFileField::kExistingFilesCount:
263264
PARSE_PRIMITIVE_FIELD(manifest_files[row_idx].existing_files_count,
264265
view_of_column, int32_t);
265266
break;
266-
case 9:
267+
case ManifestFileField::kDeletedFilesCount:
267268
PARSE_PRIMITIVE_FIELD(manifest_files[row_idx].deleted_files_count, view_of_column,
268269
int32_t);
269270
break;
270-
case 10:
271+
case ManifestFileField::kAddedRowsCount:
271272
PARSE_PRIMITIVE_FIELD(manifest_files[row_idx].added_rows_count, view_of_column,
272273
int64_t);
273274
break;
274-
case 11:
275+
case ManifestFileField::kExistingRowsCount:
275276
PARSE_PRIMITIVE_FIELD(manifest_files[row_idx].existing_rows_count, view_of_column,
276277
int64_t);
277278
break;
278-
case 12:
279+
case ManifestFileField::kDeletedRowsCount:
279280
PARSE_PRIMITIVE_FIELD(manifest_files[row_idx].deleted_rows_count, view_of_column,
280281
int64_t);
281282
break;
282-
case 13:
283+
case ManifestFileField::kPartitionFieldSummary:
283284
ICEBERG_RETURN_UNEXPECTED(
284285
ParsePartitionFieldSummaryList(view_of_column, manifest_files));
285286
break;
286-
case 14:
287+
case ManifestFileField::kKeyMetadata:
287288
PARSE_BINARY_FIELD(manifest_files[row_idx].key_metadata, view_of_column);
288289
break;
289-
case 15:
290+
case ManifestFileField::kFirstRowId:
290291
PARSE_PRIMITIVE_FIELD(manifest_files[row_idx].first_row_id, view_of_column,
291292
int64_t);
292293
break;
@@ -565,4 +566,11 @@ Result<std::vector<ManifestFile>> ManifestListReaderImpl::Files() const {
565566
return manifest_files;
566567
}
567568

569+
Result<ManifestFileField> ManifestFileFieldFromIndex(int32_t index) {
570+
if (index >= 0 && index < static_cast<int32_t>(ManifestFileField::kNextId)) {
571+
return static_cast<ManifestFileField>(index);
572+
}
573+
return InvalidArgument("Invalid manifest file field index: {}", index);
574+
}
575+
568576
} // namespace iceberg

src/iceberg/manifest_reader_internal.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,26 @@ class ManifestListReaderImpl : public ManifestListReader {
5454
std::unique_ptr<Reader> reader_;
5555
};
5656

57+
enum class ManifestFileField : int32_t {
58+
kManifestPath = 0,
59+
kManifestLength,
60+
kPartitionSpecId = 2,
61+
kContent = 3,
62+
kSequenceNumber = 4,
63+
kMinSequenceNumber = 5,
64+
kAddedSnapshotId = 6,
65+
kAddedFilesCount = 7,
66+
kExistingFilesCount = 8,
67+
kDeletedFilesCount = 9,
68+
kAddedRowsCount = 10,
69+
kExistingRowsCount = 11,
70+
kDeletedRowsCount = 12,
71+
kPartitionFieldSummary = 13,
72+
kKeyMetadata = 14,
73+
kFirstRowId = 15,
74+
kNextId = 16,
75+
};
76+
77+
Result<ManifestFileField> ManifestFileFieldFromIndex(int32_t index);
78+
5779
} // namespace iceberg

src/iceberg/row/CMakeLists.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
iceberg_install_all_headers(iceberg/row)

0 commit comments

Comments
 (0)