@@ -1261,6 +1261,18 @@ static std::pair<std::shared_ptr<ExpressionActions>, String> createExpressionFor
12611261 return {std::make_shared<ExpressionActions>(std::move (actions)), sign_filter->getColumnName ()};
12621262}
12631263
1264+ static std::pair<std::shared_ptr<ExpressionActions>, String> createExpressionForIsDeleted (const String & is_deleted_column_name, const Block & header, const ContextPtr & context)
1265+ {
1266+ ASTPtr is_deleted_identifier = std::make_shared<ASTIdentifier>(is_deleted_column_name);
1267+ ASTPtr is_deleted_filter = makeASTFunction (" equals" , is_deleted_identifier, std::make_shared<ASTLiteral>(Field (static_cast <Int8>(0 ))));
1268+
1269+ const auto & is_deleted_column = header.getByName (is_deleted_column_name);
1270+
1271+ auto syntax_result = TreeRewriter (context).analyze (is_deleted_filter, {{is_deleted_column.name , is_deleted_column.type }});
1272+ auto actions = ExpressionAnalyzer (is_deleted_filter, syntax_result, context).getActionsDAG (false );
1273+ return {std::make_shared<ExpressionActions>(std::move (actions)), is_deleted_filter->getColumnName ()};
1274+ }
1275+
12641276bool ReadFromMergeTree::doNotMergePartsAcrossPartitionsFinal () const
12651277{
12661278 const auto & settings = context->getSettingsRef ();
@@ -1354,7 +1366,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
13541366 bool no_merging_final = do_not_merge_across_partitions_select_final &&
13551367 std::distance (parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1 ]) == 1 &&
13561368 parts_to_merge_ranges[range_index]->data_part ->info .level > 0 &&
1357- data. merging_params . is_deleted_column . empty () && !reader_settings.read_in_order ;
1369+ !reader_settings.read_in_order ;
13581370
13591371 if (no_merging_final)
13601372 {
@@ -1386,11 +1398,12 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
13861398 info.use_uncompressed_cache );
13871399 };
13881400
1389- // / Parts of non-zero level still may contain duplicate PK values to merge on FINAL if there's is_deleted column,
1390- // / so we have to process all ranges. It would be more optimal to remove this flag and add an extra filtering step.
1401+ // / Parts of non-zero level still may contain duplicate PK values to merge on FINAL if there's is_deleted column.
1402+ // / Non-intersecting ranges will just go through extra filter added by createExpressionForIsDeleted() to filter
1403+ // / deleted rows.
13911404 bool split_parts_ranges_into_intersecting_and_non_intersecting_final
1392- = settings[Setting::split_parts_ranges_into_intersecting_and_non_intersecting_final]
1393- && data. merging_params . is_deleted_column . empty () && !reader_settings.read_in_order ;
1405+ = settings[Setting::split_parts_ranges_into_intersecting_and_non_intersecting_final] &&
1406+ !reader_settings.read_in_order ;
13941407
13951408 SplitPartsWithRangesByPrimaryKeyResult split_ranges_result = splitPartsWithRangesByPrimaryKey (
13961409 storage_snapshot->metadata ->getPrimaryKey (),
@@ -1477,6 +1490,21 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
14771490 return std::make_shared<FilterTransform>(header, expression, filter_name, true );
14781491 });
14791492 }
1493+ else if (!data.merging_params .is_deleted_column .empty ())
1494+ {
1495+ auto columns_with_is_deleted = origin_column_names;
1496+ if (std::ranges::find (columns_with_is_deleted, data.merging_params .is_deleted_column ) == columns_with_is_deleted.end ())
1497+ columns_with_is_deleted.push_back (data.merging_params .is_deleted_column );
1498+
1499+ pipe = spreadMarkRangesAmongStreams (
1500+ std::move (non_intersecting_parts_by_primary_key), index_build_context, num_streams, columns_with_is_deleted);
1501+ auto [expression, filter_name] = createExpressionForIsDeleted (data.merging_params .is_deleted_column , pipe.getHeader (), context);
1502+
1503+ pipe.addSimpleTransform ([&](const SharedHeader & header)
1504+ {
1505+ return std::make_shared<FilterTransform>(header, expression, filter_name, true );
1506+ });
1507+ }
14801508 else
14811509 {
14821510 pipe = spreadMarkRangesAmongStreams (std::move (non_intersecting_parts_by_primary_key), num_streams, origin_column_names);
0 commit comments