@@ -492,6 +492,198 @@ bool IcebergMetadata::update(const ContextPtr & local_context)
492492 return previous_snapshot_schema_id != relevant_snapshot_schema_id;
493493}
494494
495+ namespace
496+ {
497+
498+ using IdToName = std::unordered_map<Int32, String>;
499+
500+ IdToName buildIdToNameMap (const Poco::JSON::Object::Ptr & metadata_obj)
501+ {
502+ IdToName map;
503+ if (!metadata_obj || !metadata_obj->has (" current-schema-id" ) || !metadata_obj->has (" schemas" ))
504+ return map;
505+
506+ const auto current_schema_id = metadata_obj->getValue <Int32>(" current-schema-id" );
507+ auto schemas = metadata_obj->getArray (" schemas" );
508+ if (!schemas)
509+ return map;
510+
511+ for (size_t i = 0 ; i < schemas->size (); ++i)
512+ {
513+ auto schema = schemas->getObject (i);
514+ if (!schema || !schema->has (" schema-id" ))
515+ continue ;
516+ if (schema->getValue <Int32>(" schema-id" ) != current_schema_id)
517+ continue ;
518+
519+ if (auto fields = schema->getArray (" fields" ))
520+ {
521+ for (size_t j = 0 ; j < fields->size (); ++j)
522+ {
523+ auto f = fields->getObject (j);
524+ if (!f || !f->has (" id" ) || !f->has (" name" ))
525+ continue ;
526+ map.emplace (f->getValue <Int32>(" id" ), f->getValue <String>(" name" ));
527+ }
528+ }
529+ break ;
530+ }
531+ return map;
532+ }
533+
534+ String formatTransform (
535+ const String & transform,
536+ const Poco::JSON::Object::Ptr & field_obj,
537+ const IdToName & id_to_name)
538+ {
539+ Int32 source_id = (field_obj && field_obj->has (" source-id" ))
540+ ? field_obj->getValue <Int32>(" source-id" )
541+ : -1 ;
542+
543+ const auto it = id_to_name.find (source_id);
544+ const String col = (it != id_to_name.end ()) ? it->second : (" col_" + toString (source_id));
545+
546+ String base = transform;
547+ String param;
548+ if (const auto lpos = transform.find (' [' ); lpos != String::npos && transform.back () == ' ]' )
549+ {
550+ base = transform.substr (0 , lpos);
551+ param = transform.substr (lpos + 1 , transform.size () - lpos - 2 ); // strip [ and ]
552+ }
553+
554+ String result;
555+ if (base == " identity" )
556+ result = col;
557+ else if (base == " year" || base == " month" || base == " day" || base == " hour" )
558+ result = base + " (" + col + " )" ;
559+ else if (base != " void" )
560+ {
561+ if (!param.empty ())
562+ result = base + " (" + param + " , " + col + " )" ;
563+ else
564+ result = base + " (" + col + " )" ;
565+ }
566+ return result;
567+ }
568+
569+ Poco::JSON::Array::Ptr findActivePartitionFields (const Poco::JSON::Object::Ptr & metadata_obj)
570+ {
571+ if (!metadata_obj)
572+ return nullptr ;
573+
574+ if (metadata_obj->has (" partition-spec" ))
575+ return metadata_obj->getArray (" partition-spec" );
576+
577+ // If for some reason there is no partition-spec, try partition-specs + default-
578+ if (metadata_obj->has (" partition-specs" ) && metadata_obj->has (" default-spec-id" ))
579+ {
580+ const auto default_spec_id = metadata_obj->getValue <Int32>(" default-spec-id" );
581+ if (auto specs = metadata_obj->getArray (" partition-specs" ))
582+ {
583+ for (size_t i = 0 ; i < specs->size (); ++i)
584+ {
585+ auto spec = specs->getObject (i);
586+ if (!spec || !spec->has (" spec-id" ))
587+ continue ;
588+ if (spec->getValue <Int32>(" spec-id" ) == default_spec_id)
589+ return spec->has (" fields" ) ? spec->getArray (" fields" ) : nullptr ;
590+ }
591+ }
592+ }
593+
594+ return nullptr ;
595+ }
596+
597+ Poco::JSON::Array::Ptr findActiveSortFields (const Poco::JSON::Object::Ptr & metadata_obj)
598+ {
599+ if (!metadata_obj || !metadata_obj->has (" default-sort-order-id" ) || !metadata_obj->has (" sort-orders" ))
600+ return nullptr ;
601+
602+ const auto default_sort_order_id = metadata_obj->getValue <Int32>(" default-sort-order-id" );
603+ auto orders = metadata_obj->getArray (" sort-orders" );
604+ if (!orders)
605+ return nullptr ;
606+
607+ for (size_t i = 0 ; i < orders->size (); ++i)
608+ {
609+ auto order = orders->getObject (i);
610+ if (!order || !order->has (" order-id" ))
611+ continue ;
612+ if (order->getValue <Int32>(" order-id" ) == default_sort_order_id)
613+ return order->has (" fields" ) ? order->getArray (" fields" ) : nullptr ;
614+ }
615+ return nullptr ;
616+ }
617+
618+ String composeList (
619+ const Poco::JSON::Array::Ptr & fields,
620+ const IdToName & id_to_name,
621+ bool lookup_sort_modifiers)
622+ {
623+ if (!fields || fields->size () == 0 )
624+ return {};
625+
626+ Strings parts;
627+ parts.reserve (fields->size ());
628+
629+ for (size_t i = 0 ; i < fields->size (); ++i)
630+ {
631+ auto field = fields->getObject (i);
632+ if (!field)
633+ continue ;
634+
635+ const String transform = field->has (" transform" ) ? field->getValue <String>(" transform" ) : " identity" ;
636+ String expr = formatTransform (transform, field, id_to_name);
637+ if (expr.empty ())
638+ continue ;
639+
640+ if (lookup_sort_modifiers)
641+ {
642+ if (field->has (" direction" ))
643+ {
644+ auto d = field->getValue <String>(" direction" );
645+ expr += (Poco::icompare (d, " desc" ) == 0 ) ? " DESC" : " ASC" ;
646+ }
647+ if (field->has (" null-order" ))
648+ {
649+ auto n = field->getValue <String>(" null-order" );
650+ expr += (Poco::icompare (n, " nulls-last" ) == 0 ) ? " NULLS LAST" : " NULLS FIRST" ;
651+ }
652+ }
653+
654+ parts.push_back (std::move (expr));
655+ }
656+
657+ if (parts.empty ())
658+ return {};
659+
660+ String res;
661+ for (size_t i = 0 ; i < parts.size (); ++i)
662+ {
663+ if (i) res += " , " ;
664+ res += parts[i];
665+ }
666+ return res;
667+ }
668+
669+ std::pair<std::optional<String>, std::optional<String>> extractIcebergKeys (const Poco::JSON::Object::Ptr & metadata_obj)
670+ {
671+ std::optional<String> partition_key;
672+ std::optional<String> sort_key;
673+
674+ if (metadata_obj)
675+ {
676+ auto id_to_name = buildIdToNameMap (metadata_obj);
677+
678+ partition_key = composeList (findActivePartitionFields (metadata_obj), id_to_name, /* lookup_sort_modifiers=*/ false );
679+ sort_key = composeList (findActiveSortFields (metadata_obj), id_to_name, /* lookup_sort_modifiers=*/ true );
680+ }
681+
682+ return {partition_key, sort_key};
683+ }
684+
685+ }
686+
495687void IcebergMetadata::updateSnapshot (ContextPtr local_context, Poco::JSON::Object::Ptr metadata_object)
496688{
497689 auto configuration_ptr = configuration.lock ();
@@ -526,10 +718,11 @@ void IcebergMetadata::updateSnapshot(ContextPtr local_context, Poco::JSON::Objec
526718 total_bytes = summary_object->getValue <Int64>(f_total_files_size);
527719 }
528720
721+ auto [partition_key, sorting_key] = extractIcebergKeys (metadata_object);
529722 relevant_snapshot = IcebergSnapshot{
530723 getManifestList (local_context, getProperFilePathFromMetadataInfo (
531724 snapshot->getValue <String>(f_manifest_list), configuration_ptr->getPathForRead ().path , table_location)),
532- relevant_snapshot_id, total_rows, total_bytes};
725+ relevant_snapshot_id, total_rows, total_bytes, partition_key, sorting_key };
533726
534727 if (!snapshot->has (f_schema_id))
535728 throw Exception (
@@ -973,6 +1166,29 @@ std::optional<size_t> IcebergMetadata::totalBytes(ContextPtr local_context) cons
9731166 return result;
9741167}
9751168
1169+ std::optional<String> IcebergMetadata::partitionKey (ContextPtr) const
1170+ {
1171+ SharedLockGuard lock (mutex);
1172+ if (relevant_snapshot->partition_key .has_value ())
1173+ {
1174+ return relevant_snapshot->partition_key ;
1175+ }
1176+
1177+ return std::nullopt ;
1178+ }
1179+
1180+ std::optional<String> IcebergMetadata::sortingKey (ContextPtr) const
1181+ {
1182+ SharedLockGuard lock (mutex);
1183+ if (relevant_snapshot->sorting_key .has_value ())
1184+ {
1185+ return relevant_snapshot->sorting_key ;
1186+ }
1187+
1188+ return std::nullopt ;
1189+ }
1190+
1191+
9761192ObjectIterator IcebergMetadata::iterate (
9771193 const ActionsDAG * filter_dag,
9781194 FileProgressCallback callback,
0 commit comments