refactor: fix header redefinition and use to define weight. (#291)

yq33victor · web-flow · commit 12dde6bb9d72 · 2025-10-29T17:41:55.000+08:00
Signed-off-by: Tao Peng &lt;pengtao.156@jd.com&gt;
diff --git a/xllm/core/common/interruption_bus.h b/xllm/core/common/interruption_bus.h
@@ -1,3 +1,20 @@
+/* Copyright 2025 The xLLM Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    https://github.com/jd-opensource/xllm/blob/main/LICENSE
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#pragma once
+
 #include <functional>
 #include <vector>
 
diff --git a/xllm/core/common/mspti_helper.h b/xllm/core/common/mspti_helper.h
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#pragma once
+
 #include <cstdint>
 #include <iostream>
 
diff --git a/xllm/core/distributed_runtime/remote_worker.h b/xllm/core/distributed_runtime/remote_worker.h
@@ -142,7 +142,7 @@ class RemoteWorker : public WorkerClient {
   ThreadPool threadpool_;
   // general working thread
   // do some overlap work with model execute
-  ThreadPool general_threadpool_{5};
+  ThreadPool general_threadpool_{4};
   const torch::Device device_;
 };
 }  // namespace xllm
diff --git a/xllm/core/distributed_runtime/worker_service.h b/xllm/core/distributed_runtime/worker_service.h
@@ -149,7 +149,7 @@ class WorkerService : public proto::DistributeWorker {
 
   std::unique_ptr<std::thread> polling_thread_;
 
-  ThreadPool threadpool_{5};
+  ThreadPool threadpool_{4};
 };
 
 }  // namespace xllm
diff --git a/xllm/core/framework/xtensor/xtensor_manager_service.h b/xllm/core/framework/xtensor/xtensor_manager_service.h
@@ -70,8 +70,8 @@ class XTensorManagerService : public proto::DistributeXTensorManager {
   int32_t global_rank_;
   int32_t world_size_;
   torch::Device device_;
-  ThreadPool threadpool_{5};
+  ThreadPool threadpool_{4};
   std::unique_ptr<XTensorManager> xtensor_manager_;
 };
 
-}  // namespace xllm
+}  // namespace xllm
diff --git a/xllm/core/layers/common/fuse_norm.h b/xllm/core/layers/common/fuse_norm.h
@@ -18,6 +18,7 @@ limitations under the License.
 #include <torch/torch.h>
 
 #include "framework/state_dict/state_dict.h"
+#include "framework/state_dict/utils.h"
 
 namespace xllm {
 namespace layer {
@@ -33,7 +34,7 @@ class FusedRMSNormImpl : public torch::nn::Module {
   void load_state_dict(const StateDict& state_dict);
 
  private:
-  torch::Tensor weight_;
+  DEFINE_WEIGHT(weight);
   int64_t norm_dim_;
   double eps_;
 };
diff --git a/xllm/core/layers/common/word_embedding_impl.h b/xllm/core/layers/common/word_embedding_impl.h
@@ -72,13 +72,14 @@ class WordEmbeddingImpl : public torch::nn::Module {
       CHECK_EQ(weight_.sizes(), weight.sizes())
           << "weight size mismatch for " << name();
       weight_.copy_(weight);
-      is_loaded_ = true;
+      weight_is_loaded_ = true;
     }
   }
 
   // whether the weight is loaded
   void verify_loaded_weights(const std::string& prefix) const {
-    CHECK(is_loaded_) << "weight is not loaded for " << prefix + "weight";
+    CHECK(weight_is_loaded_)
+        << "weight is not loaded for " << prefix + "weight";
   }
 
   void pretty_print(std::ostream& stream) const override {
@@ -94,11 +95,9 @@ class WordEmbeddingImpl : public torch::nn::Module {
 
   // world size
   PROPERTY(int32_t, world_size) = 0;
-  // parameter members, must be registered
-  torch::Tensor weight_{nullptr};
 
-  // whether the weight is loaded
-  bool is_loaded_ = false;
+  // parameter members, must be registered
+  DEFINE_WEIGHT(weight);
 
   // parallel args
   ParallelArgs parallel_args_;
diff --git a/xllm/core/layers/multi_head_attention.cpp b/xllm/core/layers/multi_head_attention.cpp
@@ -71,37 +71,37 @@ void MultiheadAttentionImpl::load_state_dict(const StateDict& state_dict) {
   const auto in_proj_weight = state_dict.get_tensor("in_proj_weight");
   if (in_proj_weight.defined()) {
     in_proj_weight_ = in_proj_weight.to(options_);
-    is_in_proj_weight_loaded_ = true;
+    in_proj_weight_is_loaded_ = true;
   }
 
   const auto in_proj_bias = state_dict.get_tensor("in_proj_bias");
   if (in_proj_bias.defined()) {
     in_proj_bias_ = in_proj_bias.to(options_);
-    is_in_proj_bias_loaded_ = true;
+    in_proj_bias_is_loaded_ = true;
   }
 
   const auto out_proj_weight = state_dict.get_tensor("out_proj.weight");
   if (out_proj_weight.defined()) {
     out_proj_weight_ = out_proj_weight.to(options_);
-    is_out_proj_weight_loaded_ = true;
+    out_proj_weight_is_loaded_ = true;
   }
 
   const auto out_proj_bias = state_dict.get_tensor("out_proj.bias");
   if (out_proj_bias.defined()) {
     out_proj_bias_ = out_proj_bias.to(options_);
-    is_out_proj_bias_loaded_ = true;
+    out_proj_bias_is_loaded_ = true;
   }
 }
 
 void MultiheadAttentionImpl::verify_loaded_weights(
     const std::string& prefix) const {
-  CHECK(is_in_proj_weight_loaded_)
+  CHECK(in_proj_weight_is_loaded_)
       << "in_proj_weight is not loaded for " << prefix + "in_proj_weight";
-  CHECK(is_in_proj_bias_loaded_)
+  CHECK(in_proj_bias_is_loaded_)
       << "in_proj_bias is not loaded for " << prefix + "in_proj_bias";
-  CHECK(is_out_proj_weight_loaded_)
+  CHECK(out_proj_weight_is_loaded_)
       << "out_proj.weight is not loaded for " << prefix + "out_proj.weight";
-  CHECK(is_out_proj_bias_loaded_)
+  CHECK(out_proj_bias_is_loaded_)
       << "out_proj.bias is not loaded for " << prefix + "out_proj.bias";
 }
 
diff --git a/xllm/core/layers/multi_head_attention.h b/xllm/core/layers/multi_head_attention.h
@@ -19,6 +19,7 @@ limitations under the License.
 
 #include "framework/model_context.h"
 #include "framework/state_dict/state_dict.h"
+#include "framework/state_dict/utils.h"
 
 namespace xllm {
 namespace layer {
@@ -42,18 +43,13 @@ class MultiheadAttentionImpl : public torch::nn::Module {
   int64_t hidden_size_;
   torch::TensorOptions options_;
 
-  torch::Tensor in_proj_weight_;
-  torch::Tensor in_proj_bias_;
-  torch::Tensor out_proj_weight_;
-  torch::Tensor out_proj_bias_;
-
-  bool is_in_proj_weight_loaded_;
-  bool is_in_proj_bias_loaded_;
-  bool is_out_proj_weight_loaded_;
-  bool is_out_proj_bias_loaded_;
+  DEFINE_WEIGHT(in_proj_weight);
+  DEFINE_WEIGHT(in_proj_bias);
+  DEFINE_WEIGHT(out_proj_weight);
+  DEFINE_WEIGHT(out_proj_bias);
 };
 
 TORCH_MODULE(MultiheadAttention);
 
 }  // namespace layer
-}  // namespace xllm
+}  // namespace xllm