@@ -266,7 +266,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::extractMergingAndGatheringColu
266266 Names sort_key_columns_vec = sorting_key_expr->getRequiredColumns ();
267267
268268 // / Collect columns used in the sorting key expressions.
269- std::set<String> key_columns;
269+ NameSet key_columns;
270270 auto storage_columns = global_ctx->storage_columns .getNameSet ();
271271 for (const auto & name : sort_key_columns_vec)
272272 {
@@ -292,6 +292,15 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::extractMergingAndGatheringColu
292292 if (global_ctx->merging_params .mode == MergeTreeData::MergingParams::VersionedCollapsing)
293293 key_columns.emplace (global_ctx->merging_params .sign_column );
294294
295+ // / Force all columns params of Graphite mode
296+ if (global_ctx->merging_params .mode == MergeTreeData::MergingParams::Graphite)
297+ {
298+ key_columns.emplace (global_ctx->merging_params .graphite_params .path_column_name );
299+ key_columns.emplace (global_ctx->merging_params .graphite_params .time_column_name );
300+ key_columns.emplace (global_ctx->merging_params .graphite_params .value_column_name );
301+ key_columns.emplace (global_ctx->merging_params .graphite_params .version_column_name );
302+ }
303+
295304 // / Force to merge at least one column in case of empty key
296305 if (key_columns.empty ())
297306 key_columns.emplace (global_ctx->storage_columns .front ().name );
@@ -303,6 +312,11 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::extractMergingAndGatheringColu
303312 key_columns.insert (minmax_columns.begin (), minmax_columns.end ());
304313 }
305314
315+ key_columns.insert (global_ctx->deduplicate_by_columns .begin (), global_ctx->deduplicate_by_columns .end ());
316+
317+ // / Key columns required for merge, must not be expired early.
318+ global_ctx->merge_required_key_columns = key_columns;
319+
306320 const auto & skip_indexes = global_ctx->metadata_snapshot ->getSecondaryIndices ();
307321
308322 for (const auto & index : skip_indexes)
@@ -468,23 +482,37 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() const
468482
469483 const auto & patch_parts = global_ctx->future_part ->patch_parts ;
470484
471- // / Skip fully expired columns manually, since in case of
472- // / need_remove_expired_values is not set, TTLTransform will not be used,
473- // / and columns that had been removed by TTL (via TTLColumnAlgorithm) will
474- // / be added again with default values.
485+ // / Determine columns that are absent in all source parts—either fully expired or never written—and mark them as
486+ // / expired to avoid unnecessary reads or writes during merges.
487+ // /
488+ // / NOTE:
489+ // / Handling missing columns that have default expressions is non-trivial and currently unresolved
490+ // / (see https://github.com/ClickHouse/ClickHouse/issues/91127).
491+ // / For now, we conservatively avoid expiring such columns.
475492 // /
476- // / Also note, that it is better to do this here, since in other places it
477- // / will be too late (i.e. they will be written, and we will burn CPU/disk
478- // / resources for this).
479- if (!ctx->need_remove_expired_values )
493+ // / The main challenges include:
494+ // / 1. A default expression may depend on other columns, which themselves may be missing or expired,
495+ // / making it unclear whether the default should be materialized or recomputed.
496+ // / 2. Default expressions may introduce semantic changes if re-evaluated during merges, leading to
497+ // / non-deterministic results across parts.
480498 {
481- for (auto & [column_name, ttl] : global_ctx->new_data_part ->ttl_infos .columns_ttl )
499+ NameSet columns_present_in_parts;
500+ columns_present_in_parts.reserve (global_ctx->storage_columns .size ());
501+
502+ // / Collect all column names that actually exist in the source parts
503+ for (const auto & part : global_ctx->future_part ->parts )
482504 {
483- if (ttl.finished ())
484- {
485- global_ctx->new_data_part ->expired_columns .insert (column_name);
486- LOG_TRACE (ctx->log , " Adding expired column {} for part {}" , column_name, global_ctx->new_data_part ->name );
487- }
505+ for (const auto & col : part->getColumns ())
506+ columns_present_in_parts.emplace (col.name );
507+ }
508+
509+ const auto & columns_desc = global_ctx->metadata_snapshot ->getColumns ();
510+
511+ // / Any storage column not present in any part and without a default expression is considered expired
512+ for (const auto & storage_column : global_ctx->storage_columns )
513+ {
514+ if (!columns_present_in_parts.contains (storage_column.name ) && !columns_desc.getDefault (storage_column.name ))
515+ global_ctx->new_data_part ->expired_columns .emplace (storage_column.name );
488516 }
489517 }
490518
@@ -510,8 +538,27 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() const
510538 if (!expired_columns.empty ())
511539 {
512540 global_ctx->gathering_columns = global_ctx->gathering_columns .eraseNames (expired_columns);
513- global_ctx->merging_columns = global_ctx->merging_columns .eraseNames (expired_columns);
514- global_ctx->storage_columns = global_ctx->storage_columns .eraseNames (expired_columns);
541+
542+ auto filter_columns = [&](const NamesAndTypesList & input, NamesAndTypesList & expired_out)
543+ {
544+ NamesAndTypesList result;
545+ for (const auto & column : input)
546+ {
547+ bool is_expired = expired_columns.contains (column.name );
548+ bool is_required_for_merge = global_ctx->merge_required_key_columns .contains (column.name );
549+
550+ if (is_expired)
551+ expired_out.push_back (column);
552+
553+ if (!is_expired || is_required_for_merge)
554+ result.push_back (column);
555+ }
556+
557+ return result;
558+ };
559+
560+ global_ctx->merging_columns = filter_columns (global_ctx->merging_columns , global_ctx->merging_columns_expired_by_ttl );
561+ global_ctx->storage_columns = filter_columns (global_ctx->storage_columns , global_ctx->storage_columns_expired_by_ttl );
515562 }
516563
517564 global_ctx->new_data_part ->uuid = global_ctx->future_part ->uuid ;
@@ -612,6 +659,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() const
612659 case MergeAlgorithm::Horizontal:
613660 {
614661 global_ctx->merging_columns = global_ctx->storage_columns ;
662+ global_ctx->merging_columns_expired_by_ttl = global_ctx->storage_columns_expired_by_ttl ;
615663 global_ctx->merging_skip_indexes = global_ctx->metadata_snapshot ->getSecondaryIndices ();
616664 global_ctx->gathering_columns .clear ();
617665 global_ctx->skip_indexes_by_column .clear ();
@@ -1786,11 +1834,13 @@ class TTLStep : public ITransformingStep
17861834 const MergeTreeData & storage_,
17871835 const StorageMetadataPtr & metadata_snapshot_,
17881836 const MergeTreeData::MutableDataPartPtr & data_part_,
1837+ const NamesAndTypesList & expired_columns_,
17891838 time_t current_time,
17901839 bool force_)
1791- : ITransformingStep(input_header_, input_header_, getTraits())
1840+ : ITransformingStep(input_header_, TTLTransform::addExpiredColumnsToBlock( input_header_, expired_columns_) , getTraits())
17921841 {
1793- transform = std::make_shared<TTLTransform>(context_, input_header_, storage_, metadata_snapshot_, data_part_, current_time, force_);
1842+ transform = std::make_shared<TTLTransform>(
1843+ context_, input_header_, storage_, metadata_snapshot_, data_part_, expired_columns_, current_time, force_);
17941844 subqueries_for_sets = transform->getSubqueries ();
17951845 }
17961846
@@ -2014,10 +2064,17 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() const
20142064 PreparedSets::Subqueries subqueries;
20152065
20162066 // / TTL step
2017- if (ctx->need_remove_expired_values )
2067+ if (ctx->need_remove_expired_values || !global_ctx-> merging_columns_expired_by_ttl . empty () )
20182068 {
20192069 auto ttl_step = std::make_unique<TTLStep>(
2020- merge_parts_query_plan.getCurrentHeader (), global_ctx->context , *global_ctx->data , global_ctx->metadata_snapshot , global_ctx->new_data_part , global_ctx->time_of_merge , ctx->force_ttl );
2070+ merge_parts_query_plan.getCurrentHeader (),
2071+ global_ctx->context ,
2072+ *global_ctx->data ,
2073+ global_ctx->metadata_snapshot ,
2074+ global_ctx->new_data_part ,
2075+ global_ctx->merging_columns_expired_by_ttl ,
2076+ global_ctx->time_of_merge ,
2077+ ctx->force_ttl );
20212078 subqueries = ttl_step->getSubqueries ();
20222079 ttl_step->setStepDescription (" TTL step" );
20232080 merge_parts_query_plan.addStep (std::move (ttl_step));
0 commit comments