Skip to content

Commit bb2da6b

Browse files
committed
Optimization for NULLs, count form metadata, test
1 parent 8fb2aa2 commit bb2da6b

File tree

4 files changed

+318
-37
lines changed

4 files changed

+318
-37
lines changed

src/Core/Settings.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6906,7 +6906,7 @@ Allow retries in cluster request, when one node goes offline
69066906
DECLARE(Bool, object_storage_remote_initiator, false, R"(
69076907
Execute request to object storage as remote on one of object_storage_cluster nodes.
69086908
)", EXPERIMENTAL) \
6909-
DECLARE(Bool, allow_experimental_iceberg_read_optimization, false, R"(
6909+
DECLARE(Bool, allow_experimental_iceberg_read_optimization, true, R"(
69106910
Allow Iceberg read optimization based on Iceberg metadata.
69116911
)", EXPERIMENTAL) \
69126912
\

src/Core/SettingsChangesHistory.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()
7979
{"object_storage_cluster_join_mode", "allow", "allow", "New setting"},
8080
{"object_storage_remote_initiator", false, false, "New setting."},
8181
{"allow_experimental_export_merge_tree_part", false, false, "New setting."},
82-
{"allow_experimental_iceberg_read_optimization", false, false, "New setting."}
82+
{"allow_experimental_iceberg_read_optimization", true, true, "New setting."}
8383
});
8484
addSettingsChanges(settings_changes_history, "25.6",
8585
{

src/Storages/ObjectStorage/StorageObjectStorageSource.cpp

Lines changed: 111 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -523,9 +523,17 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
523523
QueryPipelineBuilder builder;
524524
std::shared_ptr<ISource> source;
525525
std::unique_ptr<ReadBuffer> read_buf;
526+
std::optional<Int64> rows_count_from_metadata;
526527

527528
auto try_get_num_rows_from_cache = [&]() -> std::optional<size_t>
528529
{
530+
if (rows_count_from_metadata.has_value())
531+
{
532+
/// Must be non negative here
533+
size_t value = rows_count_from_metadata.value();
534+
return value;
535+
}
536+
529537
if (!schema_cache)
530538
return std::nullopt;
531539

@@ -559,54 +567,122 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
559567

560568
if (context_->getSettingsRef()[Setting::allow_experimental_iceberg_read_optimization])
561569
{
562-
auto schema = configuration->tryGetTableStructureFromMetadata();
563-
if (schema.has_value())
570+
auto file_meta_data = object_info->getFileMetaInfo();
571+
if (file_meta_data.has_value())
564572
{
565-
auto file_meta_data = object_info->getFileMetaInfo();
566-
if (file_meta_data.has_value())
573+
bool is_all_rows_count_equals = true;
574+
for (const auto & column : file_meta_data.value()->columns_info)
567575
{
568-
for (const auto & column : file_meta_data.value()->columns_info)
576+
if (is_all_rows_count_equals && column.second.rows_count.has_value())
569577
{
570-
if (column.second.hyperrectangle.has_value())
578+
if (rows_count_from_metadata.has_value())
571579
{
572-
if (column.second.hyperrectangle.value().isPoint())
580+
if (column.second.rows_count.value() != rows_count_from_metadata.value())
573581
{
574-
auto column_name = column.first;
575-
576-
auto i_column = requested_columns_list.find(column_name);
577-
if (i_column == requested_columns_list.end())
578-
continue;
579-
580-
/// isPoint() method checks that left==right
581-
constant_columns_with_values[i_column->second.first] =
582-
ConstColumnWithValue{
583-
i_column->second.second,
584-
column.second.hyperrectangle.value().left
585-
};
586-
constant_columns.insert(column_name);
587-
588-
LOG_DEBUG(log, "In file {} constant column '{}' id {} type '{}' with value '{}'",
589-
object_info->getPath(),
590-
column_name,
591-
i_column->second.first,
592-
i_column->second.second.type,
593-
column.second.hyperrectangle.value().left.dump());
582+
LOG_WARNING(log, "Inconsistent rows count for file {} in metadats, ignored", object_info->getPath());
583+
is_all_rows_count_equals = false;
584+
rows_count_from_metadata = std::nullopt;
594585
}
595586
}
587+
else if (column.second.rows_count.value() < 0)
588+
{
589+
LOG_WARNING(log, "Negative rows count for file {} in metadats, ignored", object_info->getPath());
590+
is_all_rows_count_equals = false;
591+
rows_count_from_metadata = std::nullopt;
592+
}
593+
else
594+
rows_count_from_metadata = column.second.rows_count;
595+
}
596+
if (column.second.hyperrectangle.has_value())
597+
{
598+
if (column.second.hyperrectangle.value().isPoint() &&
599+
(!column.second.nulls_count.has_value() || !column.second.nulls_count.value()))
600+
{
601+
auto column_name = column.first;
602+
603+
auto i_column = requested_columns_list.find(column_name);
604+
if (i_column == requested_columns_list.end())
605+
continue;
606+
607+
/// isPoint() method checks that left==right
608+
constant_columns_with_values[i_column->second.first] =
609+
ConstColumnWithValue{
610+
i_column->second.second,
611+
column.second.hyperrectangle.value().left
612+
};
613+
constant_columns.insert(column_name);
614+
615+
LOG_DEBUG(log, "In file {} constant column '{}' id {} type '{}' with value '{}'",
616+
object_info->getPath(),
617+
column_name,
618+
i_column->second.first,
619+
i_column->second.second.type,
620+
column.second.hyperrectangle.value().left.dump());
621+
}
622+
else if (column.second.rows_count.has_value() && column.second.nulls_count.has_value()
623+
&& column.second.rows_count.value() == column.second.nulls_count.value())
624+
{
625+
auto column_name = column.first;
626+
627+
auto i_column = requested_columns_list.find(column_name);
628+
if (i_column == requested_columns_list.end())
629+
continue;
630+
631+
if (!i_column->second.second.type->isNullable())
632+
continue;
633+
634+
constant_columns_with_values[i_column->second.first] =
635+
ConstColumnWithValue{
636+
i_column->second.second,
637+
Field()
638+
};
639+
constant_columns.insert(column_name);
640+
641+
LOG_DEBUG(log, "In file {} constant column '{}' id {} type '{}' with value 'NULL'",
642+
object_info->getPath(),
643+
column_name,
644+
i_column->second.first,
645+
i_column->second.second.type);
646+
}
596647
}
597648
}
598649

599-
if (!constant_columns.empty())
650+
for (const auto & column : requested_columns_list)
600651
{
601-
size_t original_columns = requested_columns_copy.size();
602-
requested_columns_copy = requested_columns_copy.eraseNames(constant_columns);
603-
if (requested_columns_copy.size() + constant_columns.size() != original_columns)
604-
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't remove constant columns for file {} correct, fallback to read. Founded constant columns: [{}]",
605-
object_info->getPath(), constant_columns);
606-
if (requested_columns_copy.empty())
607-
need_only_count = true;
652+
const auto & column_name = column.first;
653+
654+
if (file_meta_data.value()->columns_info.contains(column_name))
655+
continue;
656+
657+
if (!column.second.second.type->isNullable())
658+
continue;
659+
660+
/// Column is nullable and absent in file
661+
constant_columns_with_values[column.second.first] =
662+
ConstColumnWithValue{
663+
column.second.second,
664+
Field()
665+
};
666+
constant_columns.insert(column_name);
667+
668+
LOG_DEBUG(log, "In file {} constant column '{}' id {} type '{}' with value 'NULL'",
669+
object_info->getPath(),
670+
column_name,
671+
column.second.first,
672+
column.second.second.type);
608673
}
609674
}
675+
676+
if (!constant_columns.empty())
677+
{
678+
size_t original_columns = requested_columns_copy.size();
679+
requested_columns_copy = requested_columns_copy.eraseNames(constant_columns);
680+
if (requested_columns_copy.size() + constant_columns.size() != original_columns)
681+
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't remove constant columns for file {} correct, fallback to read. Founded constant columns: [{}]",
682+
object_info->getPath(), constant_columns);
683+
if (requested_columns_copy.empty())
684+
need_only_count = true;
685+
}
610686
}
611687

612688
std::optional<size_t> num_rows_from_cache

0 commit comments

Comments
 (0)