1717 * under the License.
1818 */
1919
20+ #include < charconv>
21+
2022#include < arrow/type.h>
2123#include < arrow/type_fwd.h>
2224#include < arrow/util/key_value_metadata.h>
@@ -48,14 +50,12 @@ std::optional<int32_t> FieldIdFromMetadata(
4850 }
4951 std::string field_id_str = metadata->value (key);
5052 int32_t field_id = -1 ;
51- try {
52- field_id = std::stoi (field_id_str);
53- } catch (const std::invalid_argument& e) {
54- return std::nullopt ;
55- } catch (const std::out_of_range& e) {
53+ auto [_, ec] = std::from_chars (field_id_str.data (),
54+ field_id_str.data () + field_id_str.size (), field_id);
55+ if (ec != std::errc () || field_id < 0 ) {
5656 return std::nullopt ;
5757 }
58- return field_id < 0 ? std:: nullopt : std::make_optional (field_id) ;
58+ return field_id;
5959}
6060
6161std::optional<int32_t > GetFieldId (const ::parquet::arrow::SchemaField& parquet_field) {
@@ -189,7 +189,7 @@ Status ValidateParquetSchemaEvolution(
189189
190190// Forward declaration
191191Result<FieldProjection> ProjectNested (
192- const Type& expected_type ,
192+ const Type& nested_type ,
193193 const std::vector<::parquet::arrow::SchemaField>& parquet_fields);
194194
195195Result<FieldProjection> ProjectStruct (
@@ -212,25 +212,25 @@ Result<FieldProjection> ProjectStruct(
212212 std::piecewise_construct, std::forward_as_tuple (field_id.value ()),
213213 std::forward_as_tuple (i, parquet_field));
214214 !inserted) [[unlikely]] {
215- return InvalidSchema (" Duplicate field id found in Parquet schema: {} " ,
215+ return InvalidSchema (" Duplicate field id {} found in Parquet schema" ,
216216 field_id.value ());
217217 }
218218 }
219219
220220 FieldProjection result;
221221 result.children .reserve (struct_type.fields ().size ());
222222
223- for (const auto & expected_field : struct_type.fields ()) {
224- int32_t field_id = expected_field .field_id ();
223+ for (const auto & field : struct_type.fields ()) {
224+ int32_t field_id = field .field_id ();
225225 FieldProjection child_projection;
226226
227227 if (auto iter = field_context_map.find (field_id); iter != field_context_map.cend ()) {
228228 const auto & parquet_field = iter->second .parquet_field ;
229229 ICEBERG_RETURN_UNEXPECTED (
230- ValidateParquetSchemaEvolution (*expected_field .type (), parquet_field));
231- if (expected_field .type ()->is_nested ()) {
232- ICEBERG_ASSIGN_OR_RAISE (child_projection, ProjectNested (*expected_field. type (),
233- parquet_field.children ));
230+ ValidateParquetSchemaEvolution (*field .type (), parquet_field));
231+ if (field .type ()->is_nested ()) {
232+ ICEBERG_ASSIGN_OR_RAISE (child_projection,
233+ ProjectNested (*field. type (), parquet_field.children ));
234234 } else {
235235 child_projection.attributes =
236236 std::make_shared<ParquetExtraAttributes>(parquet_field.column_index );
@@ -239,7 +239,7 @@ Result<FieldProjection> ProjectStruct(
239239 child_projection.kind = FieldProjection::Kind::kProjected ;
240240 } else if (MetadataColumns::IsMetadataColumn (field_id)) {
241241 child_projection.kind = FieldProjection::Kind::kMetadata ;
242- } else if (expected_field .optional ()) {
242+ } else if (field .optional ()) {
243243 child_projection.kind = FieldProjection::Kind::kNull ;
244244 } else {
245245 return InvalidSchema (" Missing required field with id: {}" , field_id);
@@ -266,20 +266,19 @@ Result<FieldProjection> ProjectList(
266266 return InvalidSchema (" List element field missing field id" );
267267 }
268268
269- const auto & expected_element_field = list_type.fields ().back ();
270- if (expected_element_field .field_id () != element_field_id.value ()) {
269+ const auto & element_field = list_type.fields ().back ();
270+ if (element_field .field_id () != element_field_id.value ()) {
271271 return InvalidSchema (" List element field id mismatch, expected {}, got {}" ,
272- expected_element_field .field_id (), element_field_id.value ());
272+ element_field .field_id (), element_field_id.value ());
273273 }
274274
275275 ICEBERG_RETURN_UNEXPECTED (
276- ValidateParquetSchemaEvolution (*expected_element_field .type (), parquet_field));
276+ ValidateParquetSchemaEvolution (*element_field .type (), parquet_field));
277277
278278 FieldProjection element_projection;
279- if (expected_element_field.type ()->is_nested ()) {
280- ICEBERG_ASSIGN_OR_RAISE (
281- element_projection,
282- ProjectNested (*expected_element_field.type (), parquet_field.children ));
279+ if (element_field.type ()->is_nested ()) {
280+ ICEBERG_ASSIGN_OR_RAISE (element_projection,
281+ ProjectNested (*element_field.type (), parquet_field.children ));
283282 } else {
284283 element_projection.attributes =
285284 std::make_shared<ParquetExtraAttributes>(parquet_field.column_index );
@@ -310,28 +309,29 @@ Result<FieldProjection> ProjectMap(
310309 return InvalidSchema (" Map value field missing field id" );
311310 }
312311
313- const auto & expected_key_field = map_type.key ();
314- const auto & expected_value_field = map_type.value ();
315- if (expected_key_field .field_id () != key_field_id.value ()) {
312+ const auto & key_field = map_type.key ();
313+ const auto & value_field = map_type.value ();
314+ if (key_field .field_id () != key_field_id.value ()) {
316315 return InvalidSchema (" Map key field id mismatch, expected {}, got {}" ,
317- expected_key_field .field_id (), key_field_id.value ());
316+ key_field .field_id (), key_field_id.value ());
318317 }
319- if (expected_value_field .field_id () != value_field_id.value ()) {
318+ if (value_field .field_id () != value_field_id.value ()) {
320319 return InvalidSchema (" Map value field id mismatch, expected {}, got {}" ,
321- expected_value_field .field_id (), value_field_id.value ());
320+ value_field .field_id (), value_field_id.value ());
322321 }
323322
324323 FieldProjection result;
324+ result.children .reserve (2 );
325325
326326 for (size_t i = 0 ; i < parquet_fields.size (); ++i) {
327327 FieldProjection sub_projection;
328328 const auto & sub_node = parquet_fields[i];
329- const auto & expected_sub_field = map_type.fields ()[i];
329+ const auto & sub_field = map_type.fields ()[i];
330330 ICEBERG_RETURN_UNEXPECTED (
331- ValidateParquetSchemaEvolution (*expected_sub_field .type (), sub_node));
332- if (expected_sub_field .type ()->is_nested ()) {
333- ICEBERG_ASSIGN_OR_RAISE (
334- sub_projection, ProjectNested (*expected_sub_field .type (), sub_node.children ));
331+ ValidateParquetSchemaEvolution (*sub_field .type (), sub_node));
332+ if (sub_field .type ()->is_nested ()) {
333+ ICEBERG_ASSIGN_OR_RAISE (sub_projection,
334+ ProjectNested (*sub_field .type (), sub_node.children ));
335335 } else {
336336 sub_projection.attributes =
337337 std::make_shared<ParquetExtraAttributes>(sub_node.column_index );
@@ -345,18 +345,18 @@ Result<FieldProjection> ProjectMap(
345345}
346346
347347Result<FieldProjection> ProjectNested (
348- const Type& expected_type ,
348+ const Type& nested_type ,
349349 const std::vector<::parquet::arrow::SchemaField>& parquet_fields) {
350- if (!expected_type .is_nested ()) {
351- return InvalidSchema (" Expected a nested type, but got {}" , expected_type );
350+ if (!nested_type .is_nested ()) {
351+ return InvalidSchema (" Expected a nested type, but got {}" , nested_type );
352352 }
353353
354- switch (expected_type .type_id ()) {
354+ switch (nested_type .type_id ()) {
355355 case TypeId::kStruct :
356- return ProjectStruct (internal::checked_cast<const StructType&>(expected_type ),
356+ return ProjectStruct (internal::checked_cast<const StructType&>(nested_type ),
357357 parquet_fields);
358358 case TypeId::kList :
359- return ProjectList (internal::checked_cast<const ListType&>(expected_type ),
359+ return ProjectList (internal::checked_cast<const ListType&>(nested_type ),
360360 parquet_fields);
361361 case TypeId::kMap :
362362 if (parquet_fields.size () != 1 ||
@@ -365,20 +365,20 @@ Result<FieldProjection> ProjectNested(
365365 return InvalidSchema (
366366 " Map type must have exactly one struct field with two children" );
367367 }
368- return ProjectMap (internal::checked_cast<const MapType&>(expected_type ),
368+ return ProjectMap (internal::checked_cast<const MapType&>(nested_type ),
369369 parquet_fields[0 ].children );
370370 default :
371- return InvalidSchema (" Unsupported nested type: {}" , expected_type );
371+ return InvalidSchema (" Unsupported nested type: {}" , nested_type );
372372 }
373373}
374374
375375void CollectColumnIds (const FieldProjection& field_projection,
376376 std::vector<int32_t >* column_ids) {
377377 if (field_projection.attributes ) {
378- auto parquet_attributes = internal::checked_cast<const ParquetExtraAttributes&>(
378+ const auto & attributes = internal::checked_cast<const ParquetExtraAttributes&>(
379379 *field_projection.attributes );
380- if (parquet_attributes .column_id ) {
381- column_ids->push_back (parquet_attributes .column_id .value ());
380+ if (attributes .column_id ) {
381+ column_ids->push_back (attributes .column_id .value ());
382382 }
383383 }
384384 for (const auto & child : field_projection.children ) {
0 commit comments