@@ -256,7 +256,10 @@ class RangesInDataPartsBuilder
256256 return ;
257257 }
258258
259- ranges_in_data_parts[it->second ].ranges .push_back (mark_range);
259+ if (ranges_in_data_parts[it->second ].ranges .back ().end == mark_range.begin )
260+ ranges_in_data_parts[it->second ].ranges .back ().end = mark_range.end ;
261+ else
262+ ranges_in_data_parts[it->second ].ranges .push_back (mark_range);
260263 }
261264
262265 RangesInDataParts & getCurrentRangesInDataParts ()
@@ -289,6 +292,10 @@ struct PartsRangesIterator
289292
290293 if (event == other.event )
291294 {
295+ if (!selected && other.selected )
296+ return true ;
297+ if (selected && !other.selected )
298+ return false ;
292299 if (part_index == other.part_index )
293300 {
294301 // / Within the same part we should process events in order of mark numbers,
@@ -347,6 +354,7 @@ struct PartsRangesIterator
347354 MarkRange range;
348355 size_t part_index;
349356 EventType event;
357+ bool selected; // / Whether this range was selected or rejected in skip index filtering
350358};
351359
352360struct PartRangeIndex
@@ -475,7 +483,8 @@ SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts,
475483 in_reverse_order,
476484 range,
477485 part_index,
478- PartsRangesIterator::EventType::RangeStart});
486+ PartsRangesIterator::EventType::RangeStart,
487+ false });
479488
480489 const bool value_is_defined_at_end_mark = range.end < index_granularity->getMarksCount ();
481490 if (!value_is_defined_at_end_mark)
@@ -486,7 +495,8 @@ SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts,
486495 in_reverse_order,
487496 range,
488497 part_index,
489- PartsRangesIterator::EventType::RangeEnd});
498+ PartsRangesIterator::EventType::RangeEnd,
499+ false });
490500 }
491501 }
492502
@@ -713,7 +723,8 @@ SplitPartsByRanges splitIntersectingPartsRangesIntoLayers(
713723 in_reverse_order,
714724 range,
715725 part_index,
716- PartsRangesIterator::EventType::RangeStart};
726+ PartsRangesIterator::EventType::RangeStart,
727+ false };
717728 PartRangeIndex parts_range_start_index (parts_range_start);
718729 parts_ranges_queue.push ({std::move (parts_range_start), std::move (parts_range_start_index)});
719730
@@ -726,7 +737,8 @@ SplitPartsByRanges splitIntersectingPartsRangesIntoLayers(
726737 in_reverse_order,
727738 range,
728739 part_index,
729- PartsRangesIterator::EventType::RangeEnd};
740+ PartsRangesIterator::EventType::RangeEnd,
741+ false };
730742 PartRangeIndex parts_range_end_index (parts_range_end);
731743 parts_ranges_queue.push ({std::move (parts_range_end), std::move (parts_range_end_index)});
732744 }
@@ -911,6 +923,130 @@ static ASTs buildFilters(const KeyDescription & primary_key, const std::vector<V
911923 return filters;
912924}
913925
926+ RangesInDataParts findPKRangesForFinalAfterSkipIndexImpl (RangesInDataParts & ranges_in_data_parts, bool cannot_sort_primary_key, const LoggerPtr & logger)
927+ {
928+ IndexAccess index_access (ranges_in_data_parts);
929+ std::vector<PartsRangesIterator> selected_ranges;
930+ std::vector<PartsRangesIterator> rejected_ranges;
931+
932+ RangesInDataPartsBuilder result (ranges_in_data_parts);
933+
934+ auto skip_and_return_all_part_ranges = [&]()
935+ {
936+ RangesInDataParts all_part_ranges (std::move (ranges_in_data_parts));
937+ for (auto & all_part_range : all_part_ranges)
938+ {
939+ const auto & index_granularity = all_part_range.data_part ->index_granularity ;
940+ all_part_range.ranges = MarkRanges{{MarkRange{0 , index_granularity->getMarksCountWithoutFinal ()}}};
941+ }
942+ return all_part_ranges;
943+ };
944+
945+ if (cannot_sort_primary_key) // / just expand to all parts + ranges
946+ {
947+ return skip_and_return_all_part_ranges ();
948+ }
949+
950+ for (size_t part_index = 0 ; part_index < ranges_in_data_parts.size (); ++part_index)
951+ {
952+ const auto & index_granularity = ranges_in_data_parts[part_index].data_part ->index_granularity ;
953+ std::vector<bool > is_selected_range (index_granularity->getMarksCountWithoutFinal (), false );
954+ for (const auto & range : ranges_in_data_parts[part_index].ranges )
955+ {
956+ const bool value_is_defined_at_end_mark = range.end < index_granularity->getMarksCount ();
957+ if (!value_is_defined_at_end_mark)
958+ {
959+ return skip_and_return_all_part_ranges ();
960+ }
961+
962+ selected_ranges.push_back (
963+ {index_access.getValue (part_index, range.begin ), false , range, part_index, PartsRangesIterator::EventType::RangeStart, true });
964+ for (auto i = range.begin ; i < range.end ;i++)
965+ is_selected_range[i] = true ;
966+ }
967+
968+ for (size_t range_begin = 0 ; range_begin < is_selected_range.size (); range_begin++)
969+ {
970+ const bool value_is_defined_at_end_mark = ((range_begin + 1 ) < index_granularity->getMarksCount ());
971+ if (!value_is_defined_at_end_mark)
972+ {
973+ return skip_and_return_all_part_ranges ();
974+ }
975+
976+ if (is_selected_range[range_begin])
977+ continue ;
978+ MarkRange rejected_range (range_begin, range_begin + 1 );
979+ rejected_ranges.push_back (
980+ {index_access.getValue (part_index, rejected_range.begin ), false , rejected_range, part_index, PartsRangesIterator::EventType::RangeStart, false });
981+ }
982+ }
983+
984+ ::sort (selected_ranges.begin(), selected_ranges.end());
985+
986+ ::sort (rejected_ranges.begin(), rejected_ranges.end());
987+
988+ LOG_TRACE (logger, " findPKRangesForFinalAfterSkipIndex : sorting phase complete" );
989+
990+ std::vector<PartsRangesIterator>::iterator selected_ranges_iter = selected_ranges.begin ();
991+ std::vector<PartsRangesIterator>::iterator rejected_ranges_iter = rejected_ranges.begin ();
992+ size_t more_ranges_added = 0 ;
993+
994+ while (selected_ranges_iter != selected_ranges.end () && rejected_ranges_iter != rejected_ranges.end ())
995+ {
996+ auto selected_range_start = selected_ranges_iter->value ;
997+ auto selected_range_end = index_access.getValue (selected_ranges_iter->part_index , selected_ranges_iter->range .end );
998+ auto rejected_range_start = rejected_ranges_iter->value ;
999+
1000+ int result1 = compareValues (rejected_range_start, selected_range_start, false );
1001+ int result2 = compareValues (rejected_range_start, selected_range_end, false );
1002+
1003+ if (result1 == 0 || result2 == 0 || (result1 > 0 && result2 < 0 )) // / rejected_range_start inside [selected_range]
1004+ {
1005+ result.addRange (rejected_ranges_iter->part_index , rejected_ranges_iter->range );
1006+ rejected_ranges_iter++;
1007+ more_ranges_added++;
1008+ }
1009+ else if (result1 > 0 ) // / rejected_range_start beyond [selected_range]
1010+ {
1011+ result.addRange (selected_ranges_iter->part_index , selected_ranges_iter->range );
1012+ selected_ranges_iter++;
1013+ }
1014+ else
1015+ {
1016+ auto rejected_range_end = index_access.getValue (rejected_ranges_iter->part_index , rejected_ranges_iter->range .end );
1017+ int result3 = compareValues (rejected_range_end, selected_range_start, false );
1018+ int result4 = compareValues (rejected_range_end, selected_range_end, false );
1019+ // / rejected_range_end inside [selected range] OR [rejected range] encompasses [selected range]
1020+ if (result3 == 0 || result4 == 0 || (result3 > 0 && result4 < 0 ) || (result1 < 0 && result4 > 0 ))
1021+ {
1022+ result.addRange (rejected_ranges_iter->part_index , rejected_ranges_iter->range );
1023+ more_ranges_added++;
1024+ }
1025+ rejected_ranges_iter++;
1026+ }
1027+ }
1028+
1029+ while (selected_ranges_iter != selected_ranges.end ())
1030+ {
1031+ result.addRange (selected_ranges_iter->part_index , selected_ranges_iter->range );
1032+ selected_ranges_iter++;
1033+ }
1034+
1035+ auto result_final_ranges = result.getCurrentRangesInDataParts ();
1036+ std::stable_sort (
1037+ result_final_ranges.begin (),
1038+ result_final_ranges.end (),
1039+ [](const auto & lhs, const auto & rhs) { return lhs.part_index_in_query < rhs.part_index_in_query ; });
1040+ for (auto & result_final_range : result_final_ranges)
1041+ {
1042+ std::sort (result_final_range.ranges .begin (), result_final_range.ranges .end ());
1043+ }
1044+
1045+ LOG_TRACE (logger, " findPKRangesForFinalAfterSkipIndex : processed {} parts, initially selected {} ranges & rejected {}, more {} ranges added" , ranges_in_data_parts.size (), selected_ranges.size (), rejected_ranges.size (), more_ranges_added);
1046+
1047+ return result_final_ranges;
1048+ }
1049+
9141050static void reorderColumns (ActionsDAG & dag, const Block & header, const std::string & filter_column)
9151051{
9161052 std::unordered_map<std::string_view, const ActionsDAG::Node *> inputs_map;
@@ -1056,4 +1192,18 @@ Pipes readByLayers(
10561192 return merging_pipes;
10571193}
10581194
1195+ RangesInDataParts findPKRangesForFinalAfterSkipIndex (
1196+ const KeyDescription & primary_key,
1197+ const KeyDescription & sorting_key,
1198+ RangesInDataParts & ranges_in_data_parts,
1199+ const LoggerPtr & logger)
1200+ {
1201+ bool cannot_sort_primary_key = false ;
1202+ if (!isSafePrimaryKey (primary_key) || !sorting_key.reverse_flags .empty ())
1203+ {
1204+ LOG_TRACE (logger, " Primary key is not sortable, expanding PK range to entire due to exact_mode." );
1205+ cannot_sort_primary_key = true ;
1206+ }
1207+ return findPKRangesForFinalAfterSkipIndexImpl (ranges_in_data_parts, cannot_sort_primary_key, logger);
1208+ }
10591209}
0 commit comments