Skip to content

Commit 9beb975

Browse files
committed
bugfix: disable process_group test on npu cause that we use atb-internal comm interface currently.
Signed-off-by: Tao Peng <[email protected]>
1 parent 850ced1 commit 9beb975

File tree

4 files changed

+17
-3
lines changed

4 files changed

+17
-3
lines changed

xllm/core/runtime/llm_engine.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,14 @@ LLMEngine::LLMEngine(const runtime::Options& options,
8989
// create ThreadPool for link cluster
9090
link_threadpool_ = std::make_unique<ThreadPool>(worker_clients_num_);
9191

92+
process_group_test();
93+
94+
// init thread pool
95+
threadpool_ = std::make_unique<ThreadPool>(16);
96+
}
97+
98+
void LLMEngine::process_group_test() {
99+
#if !defined(USE_NPU)
92100
// In multi-node serving mode, only driver engine
93101
// create worker_clients_.
94102
if (worker_clients_num_ > 1) {
@@ -101,9 +109,7 @@ LLMEngine::LLMEngine(const runtime::Options& options,
101109
// wait up to 4 seconds for all futures to complete
102110
folly::collectAll(futures).within(std::chrono::seconds(4)).get();
103111
}
104-
105-
// init thread pool
106-
threadpool_ = std::make_unique<ThreadPool>(16);
112+
#endif
107113
}
108114

109115
bool LLMEngine::init() {

xllm/core/runtime/llm_engine.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ class LLMEngine : public Engine {
108108
const Engine::KVCacheCapacity& kv_cache_cap);
109109
std::vector<std::vector<RawForwardInput>> prepare_inputs(
110110
std::vector<Batch>& batch);
111+
void process_group_test();
111112

112113
protected:
113114
// options

xllm/core/runtime/vlm_engine.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ VLMEngine::VLMEngine(const runtime::Options& options) : options_(options) {
6161
parallel_args, devices[i], options_, worker_type));
6262
}
6363

64+
process_group_test();
65+
}
66+
67+
void VLMEngine::process_group_test() {
68+
#if !defined(USE_NPU)
6469
if (workers_.size() > 1) {
6570
// test process group
6671
std::vector<folly::SemiFuture<folly::Unit>> futures;
@@ -71,6 +76,7 @@ VLMEngine::VLMEngine(const runtime::Options& options) : options_(options) {
7176
// wait up to 4 seconds for all futures to complete
7277
folly::collectAll(futures).within(std::chrono::seconds(4)).get();
7378
}
79+
#endif
7480
}
7581

7682
bool VLMEngine::init() {

xllm/core/runtime/vlm_engine.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ class VLMEngine : public Engine {
5353
bool init_model();
5454
Engine::KVCacheCapacity estimate_kv_cache_capacity();
5555
bool allocate_kv_cache(const Engine::KVCacheCapacity& kv_cache_cap);
56+
void process_group_test();
5657

5758
private:
5859
// options

0 commit comments

Comments
 (0)