Skip to content

Commit be80a0e

Browse files
korowaazat
andcommitted
Fix excessive memory usage for large amount of streams in readInOrder
Forcefully reduce the requested amount of num_streams while spreading mark ranges for inOrder reading to prevent excessive memory usage Co-authored-by: Azat Khuzhin <[email protected]>
1 parent e3c164f commit be80a0e

File tree

3 files changed

+35
-0
lines changed

3 files changed

+35
-0
lines changed

src/Processors/QueryPlan/ReadFromMergeTree.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1126,6 +1126,20 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder(
11261126
return new_ranges;
11271127
};
11281128

1129+
if (num_streams > 1)
1130+
{
1131+
/// Reduce num_streams if requested value is unnecessarily large.
1132+
///
1133+
/// Additional increase of streams number in case of skewed parts, like it's
1134+
/// done in `spreadMarkRangesAmongStreams` won't affect overall performance
1135+
/// due to the single downstream `MergingSortedTransform`.
1136+
if (info.sum_marks < num_streams * info.min_marks_for_concurrent_read && parts_with_ranges.size() < num_streams)
1137+
{
1138+
num_streams = std::max(
1139+
(info.sum_marks + info.min_marks_for_concurrent_read - 1) / info.min_marks_for_concurrent_read, parts_with_ranges.size());
1140+
}
1141+
}
1142+
11291143
const size_t min_marks_per_stream = (info.sum_marks - 1) / num_streams + 1;
11301144
bool need_preliminary_merge = (parts_with_ranges.size() > settings[Setting::read_in_order_two_level_merge_threshold]);
11311145

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
1
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
DROP TABLE IF EXISTS 03403_data;
2+
CREATE TABLE 03403_data(id UInt32, val String) ENGINE = MergeTree ORDER BY id AS SELECT 1, 'test';
3+
4+
SELECT *
5+
FROM 03403_data
6+
ORDER BY id
7+
FORMAT Null
8+
SETTINGS max_threads = 1024,
9+
max_streams_to_max_threads_ratio = 10000000;
10+
11+
SYSTEM FLUSH LOGS query_log;
12+
13+
SELECT memory_usage < 10_000_000
14+
FROM system.query_log
15+
WHERE Settings['max_streams_to_max_threads_ratio'] = '10000000'
16+
AND query like '%FROM 03403_data%'
17+
AND type = 'QueryFinish'
18+
AND current_database = currentDatabase();
19+
20+
DROP TABLE 03403_data;

0 commit comments

Comments
 (0)