@@ -266,192 +266,167 @@ void NameToIdVisitor::Finish() {
266266// / - select_full_types=false: Recursively project nested fields within selected structs
267267// /
268268// / \warning Error conditions that will cause projection to fail:
269- // / - Attempting to explicitly project List or Map types (returns InvalidArgument)
270- // / - Projecting a List when element result is null (returns InvalidArgument)
271- // / - Projecting a Map without a defined map value type (returns InvalidArgument)
272- // / - Projecting a struct when result is not StructType (returns InvalidArgument)
269+ // / - Project or Select a Map with just key or value (returns InvalidArgument)
273270class PruneColumnVisitor {
274271 public:
275272 PruneColumnVisitor (const std::unordered_set<int32_t >& selected_ids,
276273 bool select_full_types)
277274 : selected_ids_(selected_ids), select_full_types_(select_full_types) {}
278275
279- Status Visit (const StructType& type, std::unique_ptr<Type>* out) const {
280- std::vector<std::shared_ptr<const Type>> selected_types;
281- for (const auto & field : type.fields ()) {
282- std::unique_ptr<Type> child_result;
276+ Result<std::shared_ptr<Type>> Visit (const std::shared_ptr<Type>& type) const {
277+ switch (type->type_id ()) {
278+ case TypeId::kStruct : {
279+ auto struct_type = std::static_pointer_cast<StructType>(type);
280+ return Visit (struct_type);
281+ }
282+ case TypeId::kList : {
283+ auto list_type = std::static_pointer_cast<ListType>(type);
284+ return Visit (list_type);
285+ }
286+ case TypeId::kMap : {
287+ auto map_type = std::static_pointer_cast<MapType>(type);
288+ return Visit (map_type);
289+ }
290+ default : {
291+ auto primitive_type = std::static_pointer_cast<PrimitiveType>(type);
292+ return Visit (primitive_type);
293+ }
294+ }
295+ }
296+
297+ Result<std::shared_ptr<Type>> Visit (const std::shared_ptr<StructType>& type) const {
298+ std::vector<std::shared_ptr<Type>> selected_types;
299+ for (const auto & field : type->fields ()) {
283300 if (select_full_types_ and selected_ids_.contains (field.field_id ())) {
284301 selected_types.emplace_back (field.type ());
285302 continue ;
286303 }
287- ICEBERG_RETURN_UNEXPECTED ( VisitTypeInline (* field.type (), this , &child_result ));
304+ ICEBERG_ASSIGN_OR_RAISE ( auto child_result, Visit ( field.type ()));
288305 if (selected_ids_.contains (field.field_id ())) {
289- // select
290- if (field.type ()->type_id () == TypeId::kStruct ) {
291- // project(kstruct)
292- ICEBERG_RETURN_UNEXPECTED (ProjectStruct (field, &child_result));
293- selected_types.emplace_back (std::move (child_result));
294- } else {
295- // project(list, map, primitive)
296- if (!field.type ()->is_primitive ()) {
297- return InvalidArgument (
298- " Cannot explicitly project List or Map types, {}:{} of type {} was "
299- " selected" ,
300- field.field_id (), field.name (), field.type ()->ToString ());
301- }
302- selected_types.emplace_back (field.type ());
303- }
304- } else if (child_result) {
305- // project, select
306- selected_types.emplace_back (std::move (child_result));
306+ selected_types.emplace_back (
307+ field.type ()->is_primitive () ? field.type () : std::move (child_result));
307308 } else {
308- // project, select
309- selected_types.emplace_back (nullptr );
309+ selected_types.emplace_back (std::move (child_result));
310310 }
311311 }
312312
313313 bool same_types = true ;
314314 std::vector<SchemaField> selected_fields;
315- const auto & fields = type. fields ();
315+ const auto & fields = type-> fields ();
316316 for (size_t i = 0 ; i < fields.size (); i++) {
317317 if (fields[i].type () == selected_types[i]) {
318- selected_fields.emplace_back (fields[i]);
318+ selected_fields.emplace_back (std::move ( fields[i]) );
319319 } else if (selected_types[i]) {
320320 same_types = false ;
321321 selected_fields.emplace_back (fields[i].field_id (), std::string (fields[i].name ()),
322- std::const_pointer_cast<Type> (selected_types[i]),
323- fields[i]. optional (), std::string (fields[i].doc ()));
322+ std::move (selected_types[i]), fields[i]. optional ( ),
323+ std::string (fields[i].doc ()));
324324 }
325325 }
326326
327327 if (!selected_fields.empty ()) {
328328 if (same_types && selected_fields.size () == fields.size ()) {
329- *out = std::make_unique<StructType>( type) ;
329+ return type;
330330 } else {
331- *out = std::make_unique <StructType>(std::move (selected_fields));
331+ return std::make_shared <StructType>(std::move (selected_fields));
332332 }
333333 }
334334
335- return {} ;
335+ return nullptr ;
336336 }
337337
338- Status Visit (const ListType& type, std::unique_ptr<Type>* out) const {
339- const auto & element_field = type.fields ()[0 ];
340-
338+ Result<std::shared_ptr<Type>> Visit (const std::shared_ptr<ListType>& type) const {
339+ const auto & element_field = type->fields ()[0 ];
341340 if (select_full_types_ and selected_ids_.contains (element_field.field_id ())) {
342- *out = std::make_unique<ListType>(type);
343- return {};
341+ return type;
344342 }
345343
346- std::unique_ptr<Type> child_result;
347- ICEBERG_RETURN_UNEXPECTED (
348- VisitTypeInline (*element_field.type (), this , &child_result));
344+ ICEBERG_ASSIGN_OR_RAISE (auto child_result, Visit (element_field.type ()));
349345
346+ std::shared_ptr<Type> out;
350347 if (selected_ids_.contains (element_field.field_id ())) {
351- if (element_field.type ()->type_id () == TypeId::kStruct ) {
352- ICEBERG_RETURN_UNEXPECTED (ProjectStruct (element_field, &child_result));
353- ICEBERG_RETURN_UNEXPECTED (
354- ProjectList (element_field, std::move (child_result), out));
348+ if (element_field.type ()->is_primitive ()) {
349+ out = std::make_shared<ListType>(element_field);
355350 } else {
356- if (!element_field.type ()->is_primitive ()) {
357- return InvalidArgument (
358- " Cannot explicitly project List or Map types, List element {} of type {} "
359- " was "
360- " selected" ,
361- element_field.field_id (), element_field.name ());
362- }
363- *out = std::make_unique<ListType>(element_field);
351+ ICEBERG_ASSIGN_OR_RAISE (out, ProjectList (element_field, std::move (child_result)));
364352 }
365353 } else if (child_result) {
366- ICEBERG_RETURN_UNEXPECTED ( ProjectList (element_field, std::move (child_result), out ));
354+ ICEBERG_ASSIGN_OR_RAISE (out, ProjectList (element_field, std::move (child_result)));
367355 }
368-
369- return {};
356+ return out;
370357 }
371358
372- Status Visit (const MapType& type, std::unique_ptr<Type>* out ) const {
373- const auto & key_field = type. fields ()[0 ];
374- const auto & value_field = type. fields ()[1 ];
359+ Result<std::shared_ptr<Type>> Visit (const std::shared_ptr<MapType>& type ) const {
360+ const auto & key_field = type-> fields ()[0 ];
361+ const auto & value_field = type-> fields ()[1 ];
375362
376- if (select_full_types_ and selected_ids_.contains (value_field .field_id ())) {
377- *out = std::make_unique<MapType>(type);
378- return {} ;
363+ if (select_full_types_ and selected_ids_.contains (key_field .field_id ()) and
364+ selected_ids_. contains (value_field. field_id ())) {
365+ return type ;
379366 }
380367
381- std::unique_ptr<Type> key_result;
382- ICEBERG_RETURN_UNEXPECTED ( VisitTypeInline (*key_field .type (), this , &key_result ));
368+ ICEBERG_ASSIGN_OR_RAISE ( auto key_result, Visit (key_field. type ())) ;
369+ ICEBERG_ASSIGN_OR_RAISE ( auto value_result, Visit (value_field .type ()));
383370
384- std::unique_ptr<Type> value_result;
385- ICEBERG_RETURN_UNEXPECTED (VisitTypeInline (*value_field.type (), this , &value_result));
371+ if (selected_ids_.contains (value_field.field_id ()) and
372+ value_field.type ()->is_primitive ()) {
373+ value_result = value_field.type ();
374+ }
375+ if (selected_ids_.contains (key_field.field_id ()) and
376+ key_field.type ()->is_primitive ()) {
377+ key_result = key_field.type ();
378+ }
386379
387- if (selected_ids_.contains (value_field.field_id ())) {
388- if (value_field.type ()->type_id () == TypeId::kStruct ) {
389- ICEBERG_RETURN_UNEXPECTED (ProjectStruct (value_field, &value_result));
390- ICEBERG_RETURN_UNEXPECTED (
391- ProjectMap (key_field, value_field, std::move (value_result), out));
392- } else {
393- if (!value_field.type ()->is_primitive ()) {
394- return InvalidArgument (
395- " Cannot explicitly project List or Map types, Map value {} of type {} was "
396- " selected" ,
397- value_field.field_id (), type.ToString ());
398- }
399- *out = std::make_unique<MapType>(type);
400- }
401- } else if (value_result) {
402- ICEBERG_RETURN_UNEXPECTED (
403- ProjectMap (key_field, value_field, std::move (value_result), out));
404- } else if (selected_ids_.contains (key_field.field_id ())) {
405- *out = std::make_unique<MapType>(type);
380+ if (!key_result && !value_result) {
381+ return nullptr ;
406382 }
407383
408- return {};
384+ if (!key_result || !value_result) {
385+ return InvalidArgument (
386+ " Cannot project Map with only key or value: key={}, value={}" ,
387+ key_result ? " present" : " null" , value_result ? " present" : " null" );
388+ }
389+
390+ ICEBERG_ASSIGN_OR_RAISE (auto out,
391+ ProjectMap (key_field, value_field, key_result, value_result));
392+ return out;
409393 }
410394
411- Status Visit (const PrimitiveType& type, std::unique_ptr<Type>* result ) const {
412- return {} ;
395+ Result<std::shared_ptr<Type>> Visit (const std::shared_ptr<PrimitiveType>& type ) const {
396+ return nullptr ;
413397 }
414398
415- Status ProjectList (const SchemaField& element_field, std::shared_ptr<Type> child_result ,
416- std::unique_ptr <Type>* out ) const {
399+ Result<std::shared_ptr<Type>> ProjectList (const SchemaField& element_field,
400+ std::shared_ptr <Type> child_result ) const {
417401 if (!child_result) {
418- return InvalidArgument ( " Cannot project a list when the element result is null " ) ;
402+ return nullptr ;
419403 }
420404 if (element_field.type () == child_result) {
421- *out = std::make_unique<ListType>(element_field);
422- } else {
423- *out = std::make_unique<ListType>(element_field.field_id (), child_result,
424- element_field.optional ());
405+ return std::make_shared<ListType>(element_field);
425406 }
426- return {};
407+ return std::make_shared<ListType>(element_field.field_id (), child_result,
408+ element_field.optional ());
427409 }
428410
429- Status ProjectMap (const SchemaField& key_field, const SchemaField& value_field,
430- std::shared_ptr<Type> value_result,
431- std::unique_ptr<Type>* out) const {
432- if (!value_result) {
433- return InvalidArgument (
434- " Attempted to project a map without a defined map value type" );
435- }
436- if (value_field.type () == value_result) {
437- *out = std::make_unique<MapType>(key_field, value_field);
438- } else {
439- *out = std::make_unique<MapType>(
440- key_field, SchemaField (value_field.field_id (), std::string (value_field.name ()),
441- value_result, value_field.optional ()));
411+ Result<std::shared_ptr<Type>> ProjectMap (const SchemaField& key_field,
412+ const SchemaField& value_field,
413+ std::shared_ptr<Type> key_result,
414+ std::shared_ptr<Type> value_result) const {
415+ SchemaField projected_key_field = key_field;
416+ if (key_field.type () != key_result) {
417+ projected_key_field =
418+ SchemaField (key_field.field_id (), std::string (key_field.name ()), key_result,
419+ key_field.optional ());
442420 }
443- return {};
444- }
445421
446- Status ProjectStruct (const SchemaField& field, std::unique_ptr<Type>* out) const {
447- if (*out && (*out)->type_id () != TypeId::kStruct ) {
448- return InvalidArgument (" Project struct {}:{}, but result is not StructType" ,
449- field.name (), field.field_id ());
422+ SchemaField projected_value_field = value_field;
423+ if (value_field.type () != value_result) {
424+ projected_value_field =
425+ SchemaField (value_field.field_id (), std::string (value_field.name ()),
426+ value_result, value_field.optional ());
450427 }
451- if (*out == nullptr ) {
452- *out = std::make_unique<StructType>(std::vector<SchemaField>{});
453- }
454- return {};
428+
429+ return std::make_shared<MapType>(projected_key_field, projected_value_field);
455430 }
456431
457432 private:
@@ -484,9 +459,10 @@ Result<std::unique_ptr<const Schema>> Schema::SelectInternal(
484459 }
485460 }
486461
487- std::unique_ptr<Type> result;
488462 PruneColumnVisitor visitor (selected_ids, /* select_full_types=*/ true );
489- ICEBERG_RETURN_UNEXPECTED (VisitTypeInline (*this , &visitor, &result));
463+ auto self = std::shared_ptr<const StructType>(this , [](const StructType*) {});
464+ ICEBERG_ASSIGN_OR_RAISE (auto result,
465+ visitor.Visit (std::const_pointer_cast<StructType>(self)));
490466
491467 if (!result) {
492468 return std::make_unique<Schema>(std::vector<SchemaField>{}, schema_id_);
@@ -504,9 +480,9 @@ Result<std::unique_ptr<const Schema>> Schema::SelectInternal(
504480Result<std::unique_ptr<const Schema>> Schema::Project (
505481 std::unordered_set<int32_t >& field_ids) const {
506482 PruneColumnVisitor visitor (field_ids, /* select_full_types=*/ false );
507-
508- std::unique_ptr<Type> result;
509- ICEBERG_RETURN_UNEXPECTED ( VisitTypeInline (* this , &visitor, &result ));
483+ auto self = std::shared_ptr< const StructType>( this , []( const StructType*) {});
484+ ICEBERG_ASSIGN_OR_RAISE ( auto result,
485+ visitor. Visit (std::const_pointer_cast<StructType>(self) ));
510486
511487 if (!result) {
512488 return std::make_unique<Schema>(std::vector<SchemaField>{}, schema_id_);
@@ -516,10 +492,8 @@ Result<std::unique_ptr<const Schema>> Schema::Project(
516492 return InvalidSchema (" Projected type must be a struct type" );
517493 }
518494
519- const auto & projected_struct = internal::checked_cast<const StructType&>(*result);
520- std::vector<SchemaField> fields_vec (projected_struct.fields ().begin (),
521- projected_struct.fields ().end ());
522- return std::make_unique<Schema>(std::move (fields_vec), schema_id_);
495+ auto & projected_struct = internal::checked_cast<const StructType&>(*result);
496+ return FromStructType (std::move (const_cast <StructType&>(projected_struct)), schema_id_);
523497}
524498
525499} // namespace iceberg
0 commit comments