Skip to content

Commit dd98094

Browse files
committed
Setting allow_experimental_iceberg_read_optimization, 0 by default
1 parent de7545d commit dd98094

File tree

4 files changed

+61
-45
lines changed

4 files changed

+61
-45
lines changed

src/Core/Settings.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6905,6 +6905,9 @@ Allow retries in cluster request, when one node goes offline
69056905
)", EXPERIMENTAL) \
69066906
DECLARE(Bool, object_storage_remote_initiator, false, R"(
69076907
Execute request to object storage as remote on one of object_storage_cluster nodes.
6908+
)", EXPERIMENTAL) \
6909+
DECLARE(Bool, allow_experimental_iceberg_read_optimization, false, R"(
6910+
Allow Iceberg read optimization based on Iceberg metadata.
69086911
)", EXPERIMENTAL) \
69096912
\
69106913
/** Experimental timeSeries* aggregate functions. */ \

src/Core/SettingsChangesHistory.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()
7979
{"object_storage_cluster_join_mode", "allow", "allow", "New setting"},
8080
{"object_storage_remote_initiator", false, false, "New setting."},
8181
{"allow_experimental_export_merge_tree_part", false, false, "New setting."},
82+
{"allow_experimental_iceberg_read_optimization", false, false, "New setting."}
8283
});
8384
addSettingsChanges(settings_changes_history, "25.6",
8485
{

src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ extern const SettingsInt64 iceberg_timestamp_ms;
6060
extern const SettingsInt64 iceberg_snapshot_id;
6161
extern const SettingsBool use_iceberg_metadata_files_cache;
6262
extern const SettingsBool use_iceberg_partition_pruning;
63+
extern const SettingsBool allow_experimental_iceberg_read_optimization;
6364
}
6465

6566

@@ -1091,6 +1092,7 @@ ManifestFilePtr IcebergMetadata::getManifestFile(ContextPtr local_context, const
10911092
DataFileInfos IcebergMetadata::getDataFilesImpl(const ActionsDAG * filter_dag, ContextPtr local_context) const
10921093
{
10931094
bool use_partition_pruning = filter_dag && local_context->getSettingsRef()[Setting::use_iceberg_partition_pruning];
1095+
bool use_iceberg_read_optimization = local_context->getSettingsRef()[Setting::allow_experimental_iceberg_read_optimization];
10941096

10951097
{
10961098
std::lock_guard cache_lock(cached_unprunned_files_for_last_processed_snapshot_mutex);
@@ -1122,7 +1124,8 @@ DataFileInfos IcebergMetadata::getDataFilesImpl(const ActionsDAG * filter_dag, C
11221124
if (std::holds_alternative<DataFileEntry>(manifest_file_entry.file))
11231125
{
11241126
data_files.push_back(DataFileInfo(std::get<DataFileEntry>(manifest_file_entry.file).file_name));
1125-
data_files.back().file_meta_info = std::make_shared<DataFileMetaInfo>(manifest_file_entry.columns_infos);
1127+
if (use_iceberg_read_optimization)
1128+
data_files.back().file_meta_info = std::make_shared<DataFileMetaInfo>(manifest_file_entry.columns_infos);
11261129
}
11271130
}
11281131
}

src/Storages/ObjectStorage/StorageObjectStorageSource.cpp

Lines changed: 53 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ namespace Setting
6666
extern const SettingsBool cluster_function_process_archive_on_multiple_nodes;
6767
extern const SettingsBool table_engine_read_through_distributed_cache;
6868
extern const SettingsBool use_object_storage_list_objects_cache;
69+
extern const SettingsBool allow_experimental_iceberg_read_optimization;
6970
}
7071

7172
namespace ErrorCodes
@@ -279,6 +280,8 @@ Chunk StorageObjectStorageSource::generate()
279280
{
280281
lazyInitialize();
281282

283+
bool use_iceberg_read_optimization = read_context->getSettingsRef()[Setting::allow_experimental_iceberg_read_optimization];
284+
282285
while (true)
283286
{
284287
if (isCancelled() || !reader)
@@ -333,11 +336,14 @@ Chunk StorageObjectStorageSource::generate()
333336
.etag = &(object_info->metadata->etag)},
334337
read_context);
335338

336-
for (const auto & constant_column : reader.constant_columns_with_values)
339+
if (use_iceberg_read_optimization)
337340
{
338-
chunk.addColumn(constant_column.first,
339-
constant_column.second.name_and_type.type->createColumnConst(
340-
chunk.getNumRows(), constant_column.second.value)->convertToFullColumnIfConst());
341+
for (const auto & constant_column : reader.constant_columns_with_values)
342+
{
343+
chunk.addColumn(constant_column.first,
344+
constant_column.second.name_and_type.type->createColumnConst(
345+
chunk.getNumRows(), constant_column.second.value)->convertToFullColumnIfConst());
346+
}
341347
}
342348

343349
if (chunk_size && chunk.hasColumns())
@@ -551,61 +557,64 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
551557
physical_columns_names[++column_counter] = column.getNameInStorage();
552558
/// now column_counter contains maximum column index
553559

554-
auto file_meta_data = object_info->getFileMetaInfo();
555-
if (file_meta_data.has_value())
560+
NamesAndTypesList requested_columns_copy = read_from_format_info.requested_columns;
561+
562+
if (context_->getSettingsRef()[Setting::allow_experimental_iceberg_read_optimization])
556563
{
557-
for (const auto & column : file_meta_data.value()->columns_info)
564+
auto file_meta_data = object_info->getFileMetaInfo();
565+
if (file_meta_data.has_value())
558566
{
559-
if (column.second.hyperrectangle.has_value())
567+
for (const auto & column : file_meta_data.value()->columns_info)
560568
{
561-
if (column.second.hyperrectangle.value().isPoint())
569+
if (column.second.hyperrectangle.has_value())
562570
{
563-
auto column_id = column.first;
571+
if (column.second.hyperrectangle.value().isPoint())
572+
{
573+
auto column_id = column.first;
564574

565-
if (column_id <= 0 || column_id > column_counter)
566-
{ /// Something wrong, ignore file metadata
567-
LOG_WARNING(log, "Incorrect column ID: {}, ignoring file metadata", column_id);
568-
constant_columns.clear();
569-
break;
570-
}
575+
if (column_id <= 0 || column_id > column_counter)
576+
{ /// Something wrong, ignore file metadata
577+
LOG_WARNING(log, "Incorrect column ID: {}, ignoring file metadata", column_id);
578+
constant_columns.clear();
579+
break;
580+
}
571581

572-
const auto & column_name = physical_columns_names[column_id];
582+
const auto & column_name = physical_columns_names[column_id];
573583

574-
auto i_column = requested_columns_list.find(column_name);
575-
if (i_column == requested_columns_list.end())
576-
continue;
584+
auto i_column = requested_columns_list.find(column_name);
585+
if (i_column == requested_columns_list.end())
586+
continue;
577587

578-
/// isPoint() method checks that left==right
579-
constant_columns_with_values[i_column->second.first] =
580-
ConstColumnWithValue{
581-
i_column->second.second,
582-
column.second.hyperrectangle.value().left
583-
};
584-
constant_columns.insert(column_name);
588+
/// isPoint() method checks that left==right
589+
constant_columns_with_values[i_column->second.first] =
590+
ConstColumnWithValue{
591+
i_column->second.second,
592+
column.second.hyperrectangle.value().left
593+
};
594+
constant_columns.insert(column_name);
585595

586-
LOG_DEBUG(log, "In file {} constant column {} with value {}",
587-
object_info->getPath(), column_name, column.second.hyperrectangle.value().left.dump());
596+
LOG_DEBUG(log, "In file {} constant column {} with value {}",
597+
object_info->getPath(), column_name, column.second.hyperrectangle.value().left.dump());
598+
}
588599
}
589600
}
590601
}
591-
}
592-
593-
NamesAndTypesList requested_columns_copy = read_from_format_info.requested_columns;
594602

595-
if (!constant_columns.empty())
596-
{
597-
size_t original_columns = requested_columns_copy.size();
598-
requested_columns_copy = requested_columns_copy.eraseNames(constant_columns);
599-
if (requested_columns_copy.size() + constant_columns.size() != original_columns)
603+
if (!constant_columns.empty())
600604
{
601-
LOG_WARNING(log, "Can't remove constant columns for file {} correct, fallback to read. Founded constant columns: [{}]",
602-
object_info->getPath(), constant_columns);
603-
requested_columns_copy = read_from_format_info.requested_columns;
604-
constant_columns.clear();
605-
constant_columns_with_values.clear();
605+
size_t original_columns = requested_columns_copy.size();
606+
requested_columns_copy = requested_columns_copy.eraseNames(constant_columns);
607+
if (requested_columns_copy.size() + constant_columns.size() != original_columns)
608+
{
609+
LOG_WARNING(log, "Can't remove constant columns for file {} correct, fallback to read. Founded constant columns: [{}]",
610+
object_info->getPath(), constant_columns);
611+
requested_columns_copy = read_from_format_info.requested_columns;
612+
constant_columns.clear();
613+
constant_columns_with_values.clear();
614+
}
615+
else if (requested_columns_copy.empty())
616+
need_only_count = true;
606617
}
607-
else if (requested_columns_copy.empty())
608-
need_only_count = true;
609618
}
610619

611620
std::optional<size_t> num_rows_from_cache

0 commit comments

Comments
 (0)