Skip to content

Commit ad10678

Browse files
committed
bump clang-tidy version for cpp-linter
1 parent 316f42a commit ad10678

File tree

4 files changed

+60
-59
lines changed

4 files changed

+60
-59
lines changed

.github/workflows/cpp-linter.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ jobs:
4747
with:
4848
style: file
4949
tidy-checks: ''
50-
version: 19
50+
version: 22
5151
files-changed-only: true
5252
lines-changed-only: true
5353
thread-comments: true

.github/workflows/pre-commit.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,4 @@ jobs:
3030
steps:
3131
- uses: actions/checkout@v4
3232
- uses: actions/setup-python@v5
33-
- uses: pre-commit/[email protected]
33+
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1

src/iceberg/parquet/parquet_schema_util.cc

Lines changed: 46 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
* under the License.
1818
*/
1919

20+
#include <charconv>
21+
2022
#include <arrow/type.h>
2123
#include <arrow/type_fwd.h>
2224
#include <arrow/util/key_value_metadata.h>
@@ -48,20 +50,19 @@ std::optional<int32_t> FieldIdFromMetadata(
4850
}
4951
std::string field_id_str = metadata->value(key);
5052
int32_t field_id = -1;
51-
try {
52-
field_id = std::stoi(field_id_str);
53-
} catch (const std::invalid_argument& e) {
54-
return std::nullopt;
55-
} catch (const std::out_of_range& e) {
53+
auto [_, ec] = std::from_chars(field_id_str.data(),
54+
field_id_str.data() + field_id_str.size(), field_id);
55+
if (ec != std::errc() || field_id < 0) {
5656
return std::nullopt;
5757
}
58-
return field_id < 0 ? std::nullopt : std::make_optional(field_id);
58+
return field_id;
5959
}
6060

6161
std::optional<int32_t> GetFieldId(const ::parquet::arrow::SchemaField& parquet_field) {
6262
return FieldIdFromMetadata(parquet_field.field->metadata());
6363
}
6464

65+
// TODO(gangwu): support v3 unknown type
6566
Status ValidateParquetSchemaEvolution(
6667
const Type& expected_type, const ::parquet::arrow::SchemaField& parquet_field) {
6768
const auto& arrow_type = parquet_field.field->type();
@@ -189,7 +190,7 @@ Status ValidateParquetSchemaEvolution(
189190

190191
// Forward declaration
191192
Result<FieldProjection> ProjectNested(
192-
const Type& expected_type,
193+
const Type& nested_type,
193194
const std::vector<::parquet::arrow::SchemaField>& parquet_fields);
194195

195196
Result<FieldProjection> ProjectStruct(
@@ -212,25 +213,25 @@ Result<FieldProjection> ProjectStruct(
212213
std::piecewise_construct, std::forward_as_tuple(field_id.value()),
213214
std::forward_as_tuple(i, parquet_field));
214215
!inserted) [[unlikely]] {
215-
return InvalidSchema("Duplicate field id found in Parquet schema: {}",
216+
return InvalidSchema("Duplicate field id {} found in Parquet schema",
216217
field_id.value());
217218
}
218219
}
219220

220221
FieldProjection result;
221222
result.children.reserve(struct_type.fields().size());
222223

223-
for (const auto& expected_field : struct_type.fields()) {
224-
int32_t field_id = expected_field.field_id();
224+
for (const auto& field : struct_type.fields()) {
225+
int32_t field_id = field.field_id();
225226
FieldProjection child_projection;
226227

227228
if (auto iter = field_context_map.find(field_id); iter != field_context_map.cend()) {
228229
const auto& parquet_field = iter->second.parquet_field;
229230
ICEBERG_RETURN_UNEXPECTED(
230-
ValidateParquetSchemaEvolution(*expected_field.type(), parquet_field));
231-
if (expected_field.type()->is_nested()) {
232-
ICEBERG_ASSIGN_OR_RAISE(child_projection, ProjectNested(*expected_field.type(),
233-
parquet_field.children));
231+
ValidateParquetSchemaEvolution(*field.type(), parquet_field));
232+
if (field.type()->is_nested()) {
233+
ICEBERG_ASSIGN_OR_RAISE(child_projection,
234+
ProjectNested(*field.type(), parquet_field.children));
234235
} else {
235236
child_projection.attributes =
236237
std::make_shared<ParquetExtraAttributes>(parquet_field.column_index);
@@ -239,7 +240,7 @@ Result<FieldProjection> ProjectStruct(
239240
child_projection.kind = FieldProjection::Kind::kProjected;
240241
} else if (MetadataColumns::IsMetadataColumn(field_id)) {
241242
child_projection.kind = FieldProjection::Kind::kMetadata;
242-
} else if (expected_field.optional()) {
243+
} else if (field.optional()) {
243244
child_projection.kind = FieldProjection::Kind::kNull;
244245
} else {
245246
return InvalidSchema("Missing required field with id: {}", field_id);
@@ -266,20 +267,19 @@ Result<FieldProjection> ProjectList(
266267
return InvalidSchema("List element field missing field id");
267268
}
268269

269-
const auto& expected_element_field = list_type.fields().back();
270-
if (expected_element_field.field_id() != element_field_id.value()) {
270+
const auto& element_field = list_type.fields().back();
271+
if (element_field.field_id() != element_field_id.value()) {
271272
return InvalidSchema("List element field id mismatch, expected {}, got {}",
272-
expected_element_field.field_id(), element_field_id.value());
273+
element_field.field_id(), element_field_id.value());
273274
}
274275

275276
ICEBERG_RETURN_UNEXPECTED(
276-
ValidateParquetSchemaEvolution(*expected_element_field.type(), parquet_field));
277+
ValidateParquetSchemaEvolution(*element_field.type(), parquet_field));
277278

278279
FieldProjection element_projection;
279-
if (expected_element_field.type()->is_nested()) {
280-
ICEBERG_ASSIGN_OR_RAISE(
281-
element_projection,
282-
ProjectNested(*expected_element_field.type(), parquet_field.children));
280+
if (element_field.type()->is_nested()) {
281+
ICEBERG_ASSIGN_OR_RAISE(element_projection,
282+
ProjectNested(*element_field.type(), parquet_field.children));
283283
} else {
284284
element_projection.attributes =
285285
std::make_shared<ParquetExtraAttributes>(parquet_field.column_index);
@@ -310,28 +310,29 @@ Result<FieldProjection> ProjectMap(
310310
return InvalidSchema("Map value field missing field id");
311311
}
312312

313-
const auto& expected_key_field = map_type.key();
314-
const auto& expected_value_field = map_type.value();
315-
if (expected_key_field.field_id() != key_field_id.value()) {
313+
const auto& key_field = map_type.key();
314+
const auto& value_field = map_type.value();
315+
if (key_field.field_id() != key_field_id.value()) {
316316
return InvalidSchema("Map key field id mismatch, expected {}, got {}",
317-
expected_key_field.field_id(), key_field_id.value());
317+
key_field.field_id(), key_field_id.value());
318318
}
319-
if (expected_value_field.field_id() != value_field_id.value()) {
319+
if (value_field.field_id() != value_field_id.value()) {
320320
return InvalidSchema("Map value field id mismatch, expected {}, got {}",
321-
expected_value_field.field_id(), value_field_id.value());
321+
value_field.field_id(), value_field_id.value());
322322
}
323323

324324
FieldProjection result;
325+
result.children.reserve(2);
325326

326327
for (size_t i = 0; i < parquet_fields.size(); ++i) {
327328
FieldProjection sub_projection;
328329
const auto& sub_node = parquet_fields[i];
329-
const auto& expected_sub_field = map_type.fields()[i];
330+
const auto& sub_field = map_type.fields()[i];
330331
ICEBERG_RETURN_UNEXPECTED(
331-
ValidateParquetSchemaEvolution(*expected_sub_field.type(), sub_node));
332-
if (expected_sub_field.type()->is_nested()) {
333-
ICEBERG_ASSIGN_OR_RAISE(
334-
sub_projection, ProjectNested(*expected_sub_field.type(), sub_node.children));
332+
ValidateParquetSchemaEvolution(*sub_field.type(), sub_node));
333+
if (sub_field.type()->is_nested()) {
334+
ICEBERG_ASSIGN_OR_RAISE(sub_projection,
335+
ProjectNested(*sub_field.type(), sub_node.children));
335336
} else {
336337
sub_projection.attributes =
337338
std::make_shared<ParquetExtraAttributes>(sub_node.column_index);
@@ -345,18 +346,18 @@ Result<FieldProjection> ProjectMap(
345346
}
346347

347348
Result<FieldProjection> ProjectNested(
348-
const Type& expected_type,
349+
const Type& nested_type,
349350
const std::vector<::parquet::arrow::SchemaField>& parquet_fields) {
350-
if (!expected_type.is_nested()) {
351-
return InvalidSchema("Expected a nested type, but got {}", expected_type);
351+
if (!nested_type.is_nested()) {
352+
return InvalidSchema("Expected a nested type, but got {}", nested_type);
352353
}
353354

354-
switch (expected_type.type_id()) {
355+
switch (nested_type.type_id()) {
355356
case TypeId::kStruct:
356-
return ProjectStruct(internal::checked_cast<const StructType&>(expected_type),
357+
return ProjectStruct(internal::checked_cast<const StructType&>(nested_type),
357358
parquet_fields);
358359
case TypeId::kList:
359-
return ProjectList(internal::checked_cast<const ListType&>(expected_type),
360+
return ProjectList(internal::checked_cast<const ListType&>(nested_type),
360361
parquet_fields);
361362
case TypeId::kMap:
362363
if (parquet_fields.size() != 1 ||
@@ -365,20 +366,20 @@ Result<FieldProjection> ProjectNested(
365366
return InvalidSchema(
366367
"Map type must have exactly one struct field with two children");
367368
}
368-
return ProjectMap(internal::checked_cast<const MapType&>(expected_type),
369+
return ProjectMap(internal::checked_cast<const MapType&>(nested_type),
369370
parquet_fields[0].children);
370371
default:
371-
return InvalidSchema("Unsupported nested type: {}", expected_type);
372+
return InvalidSchema("Unsupported nested type: {}", nested_type);
372373
}
373374
}
374375

375376
void CollectColumnIds(const FieldProjection& field_projection,
376377
std::vector<int32_t>* column_ids) {
377378
if (field_projection.attributes) {
378-
auto parquet_attributes = internal::checked_cast<const ParquetExtraAttributes&>(
379+
const auto& attributes = internal::checked_cast<const ParquetExtraAttributes&>(
379380
*field_projection.attributes);
380-
if (parquet_attributes.column_id) {
381-
column_ids->push_back(parquet_attributes.column_id.value());
381+
if (attributes.column_id) {
382+
column_ids->push_back(attributes.column_id.value());
382383
}
383384
}
384385
for (const auto& child : field_projection.children) {

test/parquet_schema_test.cc

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
*/
1919

2020
#include <arrow/type.h>
21-
#include <gtest/gtest.h>
2221
#include <parquet/arrow/reader.h>
2322
#include <parquet/arrow/schema.h>
2423
#include <parquet/schema.h>
@@ -128,25 +127,26 @@ ::parquet::arrow::SchemaManifest MakeSchemaManifest(
128127

129128
} // namespace
130129

131-
TEST(HasFieldIds, PrimitiveNode) {
130+
TEST(HasFieldIdsTest, PrimitiveNode) {
132131
EXPECT_FALSE(HasFieldIds(MakeInt32Node("test_field")));
133132
EXPECT_TRUE(HasFieldIds(MakeInt32Node("test_field", /*field_id=*/1)));
134133
}
135134

136-
TEST(HasFieldIds, GroupNode) {
137-
auto group_node_without_field_id =
138-
MakeGroupNode("test_group", {MakeInt32Node("c1"), MakeInt32Node("c2")});
139-
EXPECT_FALSE(HasFieldIds(group_node_without_field_id));
135+
TEST(HasFieldIdsTest, GroupNode) {
136+
// Group node without field id
137+
EXPECT_FALSE(HasFieldIds(
138+
MakeGroupNode("test_group", {MakeInt32Node("c1"), MakeInt32Node("c2")})));
140139

141-
auto group_node_with_full_field_id = MakeGroupNode(
140+
// Group node with full field id
141+
EXPECT_TRUE(HasFieldIds(MakeGroupNode(
142142
"test_group",
143143
{MakeInt32Node("c1", /*field_id=*/2), MakeInt32Node("c2", /*field_id=*/3)},
144-
/*field_id=*/1);
145-
EXPECT_TRUE(HasFieldIds(group_node_with_full_field_id));
144+
/*field_id=*/1)));
146145

147-
auto group_node_with_partial_field_id = MakeGroupNode(
148-
"test_group", {MakeInt32Node("c1", /*field_id=*/1), MakeInt32Node("c2")});
149-
EXPECT_TRUE(HasFieldIds(group_node_with_partial_field_id));
146+
// Group node with partial field id
147+
EXPECT_TRUE(HasFieldIds(MakeGroupNode("test_group", {MakeInt32Node("c1",
148+
/*field_id=*/1),
149+
MakeInt32Node("c2")})));
150150
}
151151

152152
TEST(ParquetSchemaProjectionTest, ProjectIdenticalSchemas) {

0 commit comments

Comments
 (0)