jd-opensource
diff --git a/‎xllm/core/framework/model/causal_lm.h
Lines changed: 10 additions & 12 deletions b/‎xllm/core/framework/model/causal_lm.h
Lines changed: 10 additions & 12 deletions
diff --git a/‎xllm/core/framework/model/causal_vlm.h
Lines changed: 4 additions & 4 deletions b/‎xllm/core/framework/model/causal_vlm.h
Lines changed: 4 additions & 4 deletions
diff --git a/‎xllm/core/layers/CMakeLists.txt
Lines changed: 3 additions & 0 deletions b/‎xllm/core/layers/CMakeLists.txt
Lines changed: 3 additions & 0 deletions
diff --git a/‎xllm/core/layers/npu/llm_head.cpp renamed to ‎xllm/core/layers/lm_head.h
Lines changed: 14 additions & 9 deletions b/‎xllm/core/layers/npu/llm_head.cpp renamed to ‎xllm/core/layers/lm_head.h
Lines changed: 14 additions & 9 deletions
diff --git a/‎xllm/core/layers/npu/CMakeLists.txt
Lines changed: 6 additions & 12 deletions b/‎xllm/core/layers/npu/CMakeLists.txt
Lines changed: 6 additions & 12 deletions
diff --git a/‎xllm/core/layers/npu/atb_base.cpp
Lines changed: 0 additions & 250 deletions b/‎xllm/core/layers/npu/atb_base.cpp
Lines changed: 0 additions & 250 deletions
@@ -26,10 +26,8 @@ limitations under the License.
 #include "core/framework/parallel_state.h"
 #include "core/framework/quant_args.h"
 #include "core/framework/state_dict/state_dict.h"
-#if defined(USE_NPU)
-#include "layers/npu/llm_head.h"
-#include "layers/npu/word_embedding.h"
-#endif
+#include "layers/lm_head.h"
+#include "layers/word_embedding.h"
 #include "model_args.h"
 #include "model_input_params.h"
 
@@ -65,10 +63,10 @@ class CausalLM : public torch::nn::Module {
   virtual const torch::TensorOptions& options() const = 0;
 
 #if defined(USE_NPU)
-  virtual hf::LlmHead get_lm_head() = 0;
-  virtual void set_lm_head(hf::LlmHead& head) = 0;
-  virtual hf::AtbWordEmbedding get_word_embedding() = 0;
-  virtual void set_word_embedding(hf::AtbWordEmbedding& embedding) = 0;
+  virtual LmHead get_lm_head() = 0;
+  virtual void set_lm_head(LmHead& head) = 0;
+  virtual WordEmbedding get_word_embedding() = 0;
+  virtual void set_word_embedding(WordEmbedding& embedding) = 0;
 #endif
 };
 
@@ -104,15 +102,15 @@ class CausalLMImpl : public CausalLM {
   }
 
 #if defined(USE_NPU)
-  hf::LlmHead get_lm_head() override { return model_->get_lm_head(); };
+  LmHead get_lm_head() override { return model_->get_lm_head(); };
 
-  void set_lm_head(hf::LlmHead& head) override { model_->set_lm_head(head); };
+  void set_lm_head(LmHead& head) override { model_->set_lm_head(head); };
 
-  hf::AtbWordEmbedding get_word_embedding() override {
+  WordEmbedding get_word_embedding() override {
     return model_->get_word_embedding();
   };
 
-  void set_word_embedding(hf::AtbWordEmbedding& embedding) override {
+  void set_word_embedding(WordEmbedding& embedding) override {
     model_->set_word_embedding(embedding);
   };
 #endif
 
@@ -65,15 +65,15 @@ class CausalVLMImpl : public CausalVLM {
   virtual void update_expert_weight(int32_t layer_id) { return; }
 
 #if defined(USE_NPU)
-  hf::LlmHead get_lm_head() override { return model_->get_lm_head(); };
+  LmHead get_lm_head() override { return model_->get_lm_head(); };
 
-  void set_lm_head(hf::LlmHead& head) override { model_->set_lm_head(head); };
+  void set_lm_head(LmHead& head) override { model_->set_lm_head(head); };
 
-  hf::AtbWordEmbedding get_word_embedding() override {
+  WordEmbedding get_word_embedding() override {
     return model_->get_word_embedding();
   };
 
-  void set_word_embedding(hf::AtbWordEmbedding& embedding) override {
+  void set_word_embedding(WordEmbedding& embedding) override {
     model_->set_word_embedding(embedding);
   };
 #endif
 
@@ -52,6 +52,9 @@ cc_library(
     qwen3_moe_decoder_layer.h
     rms_norm.h
     siglip_encoder_layer.h
+    pos_embedding.h
+    word_embedding.h
+    lm_head.h
   SRCS
     multi_head_attention.cpp
   DEPS
 
@@ -13,17 +13,22 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "llm_head.h"
+#pragma once
 
-#include "atb_head_impl.h"
+#if defined(USE_NPU)
+#include "npu/npu_lm_head_impl.h"
+#include "pytorch/adapter/utils/utils.h"
+#endif
 
-namespace xllm::hf {
+namespace xllm {
 
-std::shared_ptr<LlmHeadImpl> create_llm_head_layer(const Context& context) {
-  return std::make_shared<AtbLmHeadImpl>(context);
-}
+class LmHead : public torch::nn::ModuleHolder<NpuLmHeadImpl> {
+ public:
+  using torch::nn::ModuleHolder<NpuLmHeadImpl>::ModuleHolder;
+  using Impl __attribute__((__unused__)) = NpuLmHeadImpl;
 
-LlmHead::LlmHead(const Context& context)
-    : ModuleHolder(create_llm_head_layer(context)) {}
+  LmHead(const Context& context)
+      : ModuleHolder(std::make_shared<NpuLmHeadImpl>(context)) {}
+};
 
-}  // namespace xllm::hf
+}  // namespace xllm
@@ -6,12 +6,9 @@ cc_library(
   NAME
     npu_layers 
   HDRS
-    atb_base.h
-    word_embedding.h
-    atb_word_embedding_impl.h
-    pos_embedding.h
-    llm_head.h
-    atb_head_impl.h
+    npu_word_embedding_impl.h
+    npu_pos_embedding_impl.h
+    npu_lm_head_impl.h
     $<$<BOOL:${USE_A2}>:npu_qwen2dot5_vision_encoder_layer_impl.h>
     $<$<BOOL:${USE_A2}>:npu_qwen3_moe_decoder_layer_impl.h>
     # atb_parallel_linear.h
@@ -26,14 +23,11 @@ cc_library(
     npu_rms_norm_impl.h
     npu_siglip_encoder_layer_impl.h
   SRCS
+    npu_word_embedding_impl.cpp
+    npu_pos_embedding_impl.cpp
+    npu_lm_head_impl.cpp
     $<$<BOOL:${USE_A2}>:npu_qwen2dot5_vision_encoder_layer_impl.cpp>
     $<$<BOOL:${USE_A2}>:npu_qwen3_moe_decoder_layer_impl.cpp>
-    atb_base.cpp
-    word_embedding.cpp
-    atb_word_embedding_impl.cpp
-    pos_embedding.cpp
-    llm_head.cpp
-    atb_head_impl.cpp
     # atb_parallel_linear.cpp
     buffer/atb_buffer.cpp
     buffer/atb_workspace.cpp