Skip to content

Commit b19cb30

Browse files
kaka11chenmorningman
authored andcommitted
[opt](multi-catalog) Optimize remote scan concurrency. (apache#51415)
Problem Summary: [opt] (multi-catalog) Optimize remote scan concurrency. 1. Use `ScannerScheduler::get_remote_scan_thread_num()` to replace `config::doris_scanner_thread_pool_thread_num` when calculate max scanners in the external table case. 2. Remove `parallel_scan_max_scanners_count` calculation logic.
1 parent 4b12a75 commit b19cb30

File tree

2 files changed

+11
-11
lines changed

2 files changed

+11
-11
lines changed

be/src/pipeline/exec/file_scan_operator.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,9 @@ Status FileScanLocalState::_init_scanners(std::list<vectorized::VScannerSPtr>* s
3939

4040
auto& p = _parent->cast<FileScanOperatorX>();
4141
// There's only one scan range for each backend in batch split mode. Each backend only starts up one ScanNode instance.
42-
size_t shard_num = std::min<size_t>(
43-
config::doris_scanner_thread_pool_thread_num / p.query_parallel_instance_num(),
44-
_max_scanners);
42+
size_t shard_num = std::min<size_t>(vectorized::ScannerScheduler::get_remote_scan_thread_num() /
43+
p.query_parallel_instance_num(),
44+
_max_scanners);
4545
shard_num = std::max(shard_num, (size_t)1);
4646
_kv_cache.reset(new vectorized::ShardedKVCache(shard_num));
4747
for (int i = 0; i < _max_scanners; ++i) {
@@ -65,9 +65,8 @@ void FileScanLocalState::set_scan_ranges(RuntimeState* state,
6565
auto& p = _parent->cast<FileScanOperatorX>();
6666

6767
auto calc_max_scanners = [&](int parallel_instance_num) -> int {
68-
int max_scanners = config::doris_scanner_thread_pool_thread_num / parallel_instance_num;
69-
max_scanners =
70-
std::max(std::max(max_scanners, state->parallel_scan_max_scanners_count()), 1);
68+
int max_scanners =
69+
vectorized::ScannerScheduler::get_remote_scan_thread_num() / parallel_instance_num;
7170
if (should_run_serial()) {
7271
max_scanners = 1;
7372
}

be/src/vec/exec/scan/scanner_scheduler.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -323,11 +323,12 @@ void ScannerScheduler::_scanner_scan(std::shared_ptr<ScannerContext> ctx,
323323
}
324324

325325
int ScannerScheduler::get_remote_scan_thread_num() {
326-
int remote_max_thread_num = config::doris_max_remote_scanner_thread_pool_thread_num != -1
327-
? config::doris_max_remote_scanner_thread_pool_thread_num
328-
: std::max(512, CpuInfo::num_cores() * 10);
329-
remote_max_thread_num =
330-
std::max(remote_max_thread_num, config::doris_scanner_thread_pool_thread_num);
326+
static int remote_max_thread_num = []() {
327+
int num = config::doris_max_remote_scanner_thread_pool_thread_num != -1
328+
? config::doris_max_remote_scanner_thread_pool_thread_num
329+
: std::max(512, CpuInfo::num_cores() * 10);
330+
return std::max(num, config::doris_scanner_thread_pool_thread_num);
331+
}();
331332
return remote_max_thread_num;
332333
}
333334

0 commit comments

Comments
 (0)