1717 * under the License.
1818 */
1919
20+ #include < charconv>
21+
2022#include < arrow/type.h>
2123#include < arrow/type_fwd.h>
2224#include < arrow/util/key_value_metadata.h>
@@ -48,20 +50,19 @@ std::optional<int32_t> FieldIdFromMetadata(
4850 }
4951 std::string field_id_str = metadata->value (key);
5052 int32_t field_id = -1 ;
51- try {
52- field_id = std::stoi (field_id_str);
53- } catch (const std::invalid_argument& e) {
54- return std::nullopt ;
55- } catch (const std::out_of_range& e) {
53+ auto [_, ec] = std::from_chars (field_id_str.data (),
54+ field_id_str.data () + field_id_str.size (), field_id);
55+ if (ec != std::errc () || field_id < 0 ) {
5656 return std::nullopt ;
5757 }
58- return field_id < 0 ? std:: nullopt : std::make_optional (field_id) ;
58+ return field_id;
5959}
6060
6161std::optional<int32_t > GetFieldId (const ::parquet::arrow::SchemaField& parquet_field) {
6262 return FieldIdFromMetadata (parquet_field.field ->metadata ());
6363}
6464
65+ // TODO(gangwu): support v3 unknown type
6566Status ValidateParquetSchemaEvolution (
6667 const Type& expected_type, const ::parquet::arrow::SchemaField& parquet_field) {
6768 const auto & arrow_type = parquet_field.field ->type ();
@@ -189,7 +190,7 @@ Status ValidateParquetSchemaEvolution(
189190
190191// Forward declaration
191192Result<FieldProjection> ProjectNested (
192- const Type& expected_type ,
193+ const Type& nested_type ,
193194 const std::vector<::parquet::arrow::SchemaField>& parquet_fields);
194195
195196Result<FieldProjection> ProjectStruct (
@@ -212,25 +213,25 @@ Result<FieldProjection> ProjectStruct(
212213 std::piecewise_construct, std::forward_as_tuple (field_id.value ()),
213214 std::forward_as_tuple (i, parquet_field));
214215 !inserted) [[unlikely]] {
215- return InvalidSchema (" Duplicate field id found in Parquet schema: {} " ,
216+ return InvalidSchema (" Duplicate field id {} found in Parquet schema" ,
216217 field_id.value ());
217218 }
218219 }
219220
220221 FieldProjection result;
221222 result.children .reserve (struct_type.fields ().size ());
222223
223- for (const auto & expected_field : struct_type.fields ()) {
224- int32_t field_id = expected_field .field_id ();
224+ for (const auto & field : struct_type.fields ()) {
225+ int32_t field_id = field .field_id ();
225226 FieldProjection child_projection;
226227
227228 if (auto iter = field_context_map.find (field_id); iter != field_context_map.cend ()) {
228229 const auto & parquet_field = iter->second .parquet_field ;
229230 ICEBERG_RETURN_UNEXPECTED (
230- ValidateParquetSchemaEvolution (*expected_field .type (), parquet_field));
231- if (expected_field .type ()->is_nested ()) {
232- ICEBERG_ASSIGN_OR_RAISE (child_projection, ProjectNested (*expected_field. type (),
233- parquet_field.children ));
231+ ValidateParquetSchemaEvolution (*field .type (), parquet_field));
232+ if (field .type ()->is_nested ()) {
233+ ICEBERG_ASSIGN_OR_RAISE (child_projection,
234+ ProjectNested (*field. type (), parquet_field.children ));
234235 } else {
235236 child_projection.attributes =
236237 std::make_shared<ParquetExtraAttributes>(parquet_field.column_index );
@@ -239,7 +240,7 @@ Result<FieldProjection> ProjectStruct(
239240 child_projection.kind = FieldProjection::Kind::kProjected ;
240241 } else if (MetadataColumns::IsMetadataColumn (field_id)) {
241242 child_projection.kind = FieldProjection::Kind::kMetadata ;
242- } else if (expected_field .optional ()) {
243+ } else if (field .optional ()) {
243244 child_projection.kind = FieldProjection::Kind::kNull ;
244245 } else {
245246 return InvalidSchema (" Missing required field with id: {}" , field_id);
@@ -266,20 +267,19 @@ Result<FieldProjection> ProjectList(
266267 return InvalidSchema (" List element field missing field id" );
267268 }
268269
269- const auto & expected_element_field = list_type.fields ().back ();
270- if (expected_element_field .field_id () != element_field_id.value ()) {
270+ const auto & element_field = list_type.fields ().back ();
271+ if (element_field .field_id () != element_field_id.value ()) {
271272 return InvalidSchema (" List element field id mismatch, expected {}, got {}" ,
272- expected_element_field .field_id (), element_field_id.value ());
273+ element_field .field_id (), element_field_id.value ());
273274 }
274275
275276 ICEBERG_RETURN_UNEXPECTED (
276- ValidateParquetSchemaEvolution (*expected_element_field .type (), parquet_field));
277+ ValidateParquetSchemaEvolution (*element_field .type (), parquet_field));
277278
278279 FieldProjection element_projection;
279- if (expected_element_field.type ()->is_nested ()) {
280- ICEBERG_ASSIGN_OR_RAISE (
281- element_projection,
282- ProjectNested (*expected_element_field.type (), parquet_field.children ));
280+ if (element_field.type ()->is_nested ()) {
281+ ICEBERG_ASSIGN_OR_RAISE (element_projection,
282+ ProjectNested (*element_field.type (), parquet_field.children ));
283283 } else {
284284 element_projection.attributes =
285285 std::make_shared<ParquetExtraAttributes>(parquet_field.column_index );
@@ -310,28 +310,29 @@ Result<FieldProjection> ProjectMap(
310310 return InvalidSchema (" Map value field missing field id" );
311311 }
312312
313- const auto & expected_key_field = map_type.key ();
314- const auto & expected_value_field = map_type.value ();
315- if (expected_key_field .field_id () != key_field_id.value ()) {
313+ const auto & key_field = map_type.key ();
314+ const auto & value_field = map_type.value ();
315+ if (key_field .field_id () != key_field_id.value ()) {
316316 return InvalidSchema (" Map key field id mismatch, expected {}, got {}" ,
317- expected_key_field .field_id (), key_field_id.value ());
317+ key_field .field_id (), key_field_id.value ());
318318 }
319- if (expected_value_field .field_id () != value_field_id.value ()) {
319+ if (value_field .field_id () != value_field_id.value ()) {
320320 return InvalidSchema (" Map value field id mismatch, expected {}, got {}" ,
321- expected_value_field .field_id (), value_field_id.value ());
321+ value_field .field_id (), value_field_id.value ());
322322 }
323323
324324 FieldProjection result;
325+ result.children .reserve (2 );
325326
326327 for (size_t i = 0 ; i < parquet_fields.size (); ++i) {
327328 FieldProjection sub_projection;
328329 const auto & sub_node = parquet_fields[i];
329- const auto & expected_sub_field = map_type.fields ()[i];
330+ const auto & sub_field = map_type.fields ()[i];
330331 ICEBERG_RETURN_UNEXPECTED (
331- ValidateParquetSchemaEvolution (*expected_sub_field .type (), sub_node));
332- if (expected_sub_field .type ()->is_nested ()) {
333- ICEBERG_ASSIGN_OR_RAISE (
334- sub_projection, ProjectNested (*expected_sub_field .type (), sub_node.children ));
332+ ValidateParquetSchemaEvolution (*sub_field .type (), sub_node));
333+ if (sub_field .type ()->is_nested ()) {
334+ ICEBERG_ASSIGN_OR_RAISE (sub_projection,
335+ ProjectNested (*sub_field .type (), sub_node.children ));
335336 } else {
336337 sub_projection.attributes =
337338 std::make_shared<ParquetExtraAttributes>(sub_node.column_index );
@@ -345,18 +346,18 @@ Result<FieldProjection> ProjectMap(
345346}
346347
347348Result<FieldProjection> ProjectNested (
348- const Type& expected_type ,
349+ const Type& nested_type ,
349350 const std::vector<::parquet::arrow::SchemaField>& parquet_fields) {
350- if (!expected_type .is_nested ()) {
351- return InvalidSchema (" Expected a nested type, but got {}" , expected_type );
351+ if (!nested_type .is_nested ()) {
352+ return InvalidSchema (" Expected a nested type, but got {}" , nested_type );
352353 }
353354
354- switch (expected_type .type_id ()) {
355+ switch (nested_type .type_id ()) {
355356 case TypeId::kStruct :
356- return ProjectStruct (internal::checked_cast<const StructType&>(expected_type ),
357+ return ProjectStruct (internal::checked_cast<const StructType&>(nested_type ),
357358 parquet_fields);
358359 case TypeId::kList :
359- return ProjectList (internal::checked_cast<const ListType&>(expected_type ),
360+ return ProjectList (internal::checked_cast<const ListType&>(nested_type ),
360361 parquet_fields);
361362 case TypeId::kMap :
362363 if (parquet_fields.size () != 1 ||
@@ -365,20 +366,20 @@ Result<FieldProjection> ProjectNested(
365366 return InvalidSchema (
366367 " Map type must have exactly one struct field with two children" );
367368 }
368- return ProjectMap (internal::checked_cast<const MapType&>(expected_type ),
369+ return ProjectMap (internal::checked_cast<const MapType&>(nested_type ),
369370 parquet_fields[0 ].children );
370371 default :
371- return InvalidSchema (" Unsupported nested type: {}" , expected_type );
372+ return InvalidSchema (" Unsupported nested type: {}" , nested_type );
372373 }
373374}
374375
375376void CollectColumnIds (const FieldProjection& field_projection,
376377 std::vector<int32_t >* column_ids) {
377378 if (field_projection.attributes ) {
378- auto parquet_attributes = internal::checked_cast<const ParquetExtraAttributes&>(
379+ const auto & attributes = internal::checked_cast<const ParquetExtraAttributes&>(
379380 *field_projection.attributes );
380- if (parquet_attributes .column_id ) {
381- column_ids->push_back (parquet_attributes .column_id .value ());
381+ if (attributes .column_id ) {
382+ column_ids->push_back (attributes .column_id .value ());
382383 }
383384 }
384385 for (const auto & child : field_projection.children ) {
0 commit comments