@@ -523,9 +523,17 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
523523 QueryPipelineBuilder builder;
524524 std::shared_ptr<ISource> source;
525525 std::unique_ptr<ReadBuffer> read_buf;
526+ std::optional<Int64> rows_count_from_metadata;
526527
527528 auto try_get_num_rows_from_cache = [&]() -> std::optional<size_t >
528529 {
530+ if (rows_count_from_metadata.has_value ())
531+ {
532+ // / Must be non negative here
533+ size_t value = rows_count_from_metadata.value ();
534+ return value;
535+ }
536+
529537 if (!schema_cache)
530538 return std::nullopt ;
531539
@@ -559,54 +567,122 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
559567
560568 if (context_->getSettingsRef ()[Setting::allow_experimental_iceberg_read_optimization])
561569 {
562- auto schema = configuration-> tryGetTableStructureFromMetadata ();
563- if (schema .has_value ())
570+ auto file_meta_data = object_info-> getFileMetaInfo ();
571+ if (file_meta_data .has_value ())
564572 {
565- auto file_meta_data = object_info-> getFileMetaInfo () ;
566- if ( file_meta_data.has_value () )
573+ bool is_all_rows_count_equals = true ;
574+ for ( const auto & column : file_meta_data.value ()-> columns_info )
567575 {
568- for ( const auto & column : file_meta_data. value ()-> columns_info )
576+ if (is_all_rows_count_equals && column. second . rows_count . has_value () )
569577 {
570- if (column. second . hyperrectangle .has_value ())
578+ if (rows_count_from_metadata .has_value ())
571579 {
572- if (column.second .hyperrectangle .value (). isPoint ())
580+ if (column.second .rows_count .value () != rows_count_from_metadata. value ())
573581 {
574- auto column_name = column.first ;
575-
576- auto i_column = requested_columns_list.find (column_name);
577- if (i_column == requested_columns_list.end ())
578- continue ;
579-
580- // / isPoint() method checks that left==right
581- constant_columns_with_values[i_column->second .first ] =
582- ConstColumnWithValue{
583- i_column->second .second ,
584- column.second .hyperrectangle .value ().left
585- };
586- constant_columns.insert (column_name);
587-
588- LOG_DEBUG (log, " In file {} constant column '{}' id {} type '{}' with value '{}'" ,
589- object_info->getPath (),
590- column_name,
591- i_column->second .first ,
592- i_column->second .second .type ,
593- column.second .hyperrectangle .value ().left .dump ());
582+ LOG_WARNING (log, " Inconsistent rows count for file {} in metadats, ignored" , object_info->getPath ());
583+ is_all_rows_count_equals = false ;
584+ rows_count_from_metadata = std::nullopt ;
594585 }
595586 }
587+ else if (column.second .rows_count .value () < 0 )
588+ {
589+ LOG_WARNING (log, " Negative rows count for file {} in metadats, ignored" , object_info->getPath ());
590+ is_all_rows_count_equals = false ;
591+ rows_count_from_metadata = std::nullopt ;
592+ }
593+ else
594+ rows_count_from_metadata = column.second .rows_count ;
595+ }
596+ if (column.second .hyperrectangle .has_value ())
597+ {
598+ if (column.second .hyperrectangle .value ().isPoint () &&
599+ (!column.second .nulls_count .has_value () || !column.second .nulls_count .value ()))
600+ {
601+ auto column_name = column.first ;
602+
603+ auto i_column = requested_columns_list.find (column_name);
604+ if (i_column == requested_columns_list.end ())
605+ continue ;
606+
607+ // / isPoint() method checks that left==right
608+ constant_columns_with_values[i_column->second .first ] =
609+ ConstColumnWithValue{
610+ i_column->second .second ,
611+ column.second .hyperrectangle .value ().left
612+ };
613+ constant_columns.insert (column_name);
614+
615+ LOG_DEBUG (log, " In file {} constant column '{}' id {} type '{}' with value '{}'" ,
616+ object_info->getPath (),
617+ column_name,
618+ i_column->second .first ,
619+ i_column->second .second .type ,
620+ column.second .hyperrectangle .value ().left .dump ());
621+ }
622+ else if (column.second .rows_count .has_value () && column.second .nulls_count .has_value ()
623+ && column.second .rows_count .value () == column.second .nulls_count .value ())
624+ {
625+ auto column_name = column.first ;
626+
627+ auto i_column = requested_columns_list.find (column_name);
628+ if (i_column == requested_columns_list.end ())
629+ continue ;
630+
631+ if (!i_column->second .second .type ->isNullable ())
632+ continue ;
633+
634+ constant_columns_with_values[i_column->second .first ] =
635+ ConstColumnWithValue{
636+ i_column->second .second ,
637+ Field ()
638+ };
639+ constant_columns.insert (column_name);
640+
641+ LOG_DEBUG (log, " In file {} constant column '{}' id {} type '{}' with value 'NULL'" ,
642+ object_info->getPath (),
643+ column_name,
644+ i_column->second .first ,
645+ i_column->second .second .type );
646+ }
596647 }
597648 }
598649
599- if (!constant_columns. empty () )
650+ for ( const auto & column : requested_columns_list )
600651 {
601- size_t original_columns = requested_columns_copy.size ();
602- requested_columns_copy = requested_columns_copy.eraseNames (constant_columns);
603- if (requested_columns_copy.size () + constant_columns.size () != original_columns)
604- throw Exception (ErrorCodes::LOGICAL_ERROR, " Can't remove constant columns for file {} correct, fallback to read. Founded constant columns: [{}]" ,
605- object_info->getPath (), constant_columns);
606- if (requested_columns_copy.empty ())
607- need_only_count = true ;
652+ const auto & column_name = column.first ;
653+
654+ if (file_meta_data.value ()->columns_info .contains (column_name))
655+ continue ;
656+
657+ if (!column.second .second .type ->isNullable ())
658+ continue ;
659+
660+ // / Column is nullable and absent in file
661+ constant_columns_with_values[column.second .first ] =
662+ ConstColumnWithValue{
663+ column.second .second ,
664+ Field ()
665+ };
666+ constant_columns.insert (column_name);
667+
668+ LOG_DEBUG (log, " In file {} constant column '{}' id {} type '{}' with value 'NULL'" ,
669+ object_info->getPath (),
670+ column_name,
671+ column.second .first ,
672+ column.second .second .type );
608673 }
609674 }
675+
676+ if (!constant_columns.empty ())
677+ {
678+ size_t original_columns = requested_columns_copy.size ();
679+ requested_columns_copy = requested_columns_copy.eraseNames (constant_columns);
680+ if (requested_columns_copy.size () + constant_columns.size () != original_columns)
681+ throw Exception (ErrorCodes::LOGICAL_ERROR, " Can't remove constant columns for file {} correct, fallback to read. Founded constant columns: [{}]" ,
682+ object_info->getPath (), constant_columns);
683+ if (requested_columns_copy.empty ())
684+ need_only_count = true ;
685+ }
610686 }
611687
612688 std::optional<size_t > num_rows_from_cache
0 commit comments