bugfix: disable process_group test on npu cause that we use atb-internal comm interface currently.

yq33victor · yq33victor · commit 9beb9759eb70 · 2025-11-03T17:56:40.000+08:00
Signed-off-by: Tao Peng &lt;pengtao.156@jd.com&gt;
diff --git a/xllm/core/runtime/llm_engine.cpp b/xllm/core/runtime/llm_engine.cpp
@@ -89,6 +89,14 @@ LLMEngine::LLMEngine(const runtime::Options& options,
   // create ThreadPool for link cluster
   link_threadpool_ = std::make_unique<ThreadPool>(worker_clients_num_);
 
+  process_group_test();
+
+  // init thread pool
+  threadpool_ = std::make_unique<ThreadPool>(16);
+}
+
+void LLMEngine::process_group_test() {
+#if !defined(USE_NPU)
   // In multi-node serving mode, only driver engine
   // create worker_clients_.
   if (worker_clients_num_ > 1) {
@@ -101,9 +109,7 @@ LLMEngine::LLMEngine(const runtime::Options& options,
     // wait up to 4 seconds for all futures to complete
     folly::collectAll(futures).within(std::chrono::seconds(4)).get();
   }
-
-  // init thread pool
-  threadpool_ = std::make_unique<ThreadPool>(16);
+#endif
 }
 
 bool LLMEngine::init() {
diff --git a/xllm/core/runtime/llm_engine.h b/xllm/core/runtime/llm_engine.h
@@ -108,6 +108,7 @@ class LLMEngine : public Engine {
       const Engine::KVCacheCapacity& kv_cache_cap);
   std::vector<std::vector<RawForwardInput>> prepare_inputs(
       std::vector<Batch>& batch);
+  void process_group_test();
 
  protected:
   // options
diff --git a/xllm/core/runtime/vlm_engine.cpp b/xllm/core/runtime/vlm_engine.cpp
@@ -61,6 +61,11 @@ VLMEngine::VLMEngine(const runtime::Options& options) : options_(options) {
         parallel_args, devices[i], options_, worker_type));
   }
 
+  process_group_test();
+}
+
+void VLMEngine::process_group_test() {
+#if !defined(USE_NPU)
   if (workers_.size() > 1) {
     // test process group
     std::vector<folly::SemiFuture<folly::Unit>> futures;
@@ -71,6 +76,7 @@ VLMEngine::VLMEngine(const runtime::Options& options) : options_(options) {
     // wait up to 4 seconds for all futures to complete
     folly::collectAll(futures).within(std::chrono::seconds(4)).get();
   }
+#endif
 }
 
 bool VLMEngine::init() {
diff --git a/xllm/core/runtime/vlm_engine.h b/xllm/core/runtime/vlm_engine.h
@@ -53,6 +53,7 @@ class VLMEngine : public Engine {
   bool init_model();
   Engine::KVCacheCapacity estimate_kv_cache_capacity();
   bool allocate_kv_cache(const Engine::KVCacheCapacity& kv_cache_cap);
+  void process_group_test();
 
  private:
   // options

Original file line number	Diff line number	Diff line change
`@@ -61,6 +61,11 @@ VLMEngine::VLMEngine(const runtime::Options& options) : options_(options) {`
`61`	`61`	`parallel_args, devices[i], options_, worker_type));`
`62`	`62`	`}`
`63`	`63`
	`64`	`+ process_group_test();`
	`65`	`+}`
	`66`	`+`
	`67`	`+void VLMEngine::process_group_test() {`
	`68`	`+#if !defined(USE_NPU)`
`64`	`69`	`if (workers_.size() > 1) {`
`65`	`70`	`// test process group`
`66`	`71`	`std::vector<folly::SemiFuture<folly::Unit>> futures;`
`@@ -71,6 +76,7 @@ VLMEngine::VLMEngine(const runtime::Options& options) : options_(options) {`
`71`	`76`	`// wait up to 4 seconds for all futures to complete`
`72`	`77`	`folly::collectAll(futures).within(std::chrono::seconds(4)).get();`
`73`	`78`	`}`
	`79`	`+#endif`
`74`	`80`	`}`
`75`	`81`
`76`	`82`	`bool VLMEngine::init() {`