jd-opensource
diff --git a/‎xllm/core/layers/attention_mask.cpp
Lines changed: 2 additions & 0 deletions b/‎xllm/core/layers/attention_mask.cpp
Lines changed: 2 additions & 0 deletions
diff --git a/‎xllm/core/layers/attention_mask.h
Lines changed: 2 additions & 0 deletions b/‎xllm/core/layers/attention_mask.h
Lines changed: 2 additions & 0 deletions
diff --git a/‎xllm/core/layers/base_layer.cpp
Lines changed: 2 additions & 0 deletions b/‎xllm/core/layers/base_layer.cpp
Lines changed: 2 additions & 0 deletions
diff --git a/‎xllm/core/layers/base_layer.h
Lines changed: 2 additions & 0 deletions b/‎xllm/core/layers/base_layer.h
Lines changed: 2 additions & 0 deletions
diff --git a/‎xllm/core/layers/column_parallel_linear.h
Lines changed: 4 additions & 1 deletion b/‎xllm/core/layers/column_parallel_linear.h
Lines changed: 4 additions & 1 deletion
diff --git a/‎xllm/core/layers/deepseek_v2_decoder_layer.h
Lines changed: 4 additions & 1 deletion b/‎xllm/core/layers/deepseek_v2_decoder_layer.h
Lines changed: 4 additions & 1 deletion
diff --git a/‎xllm/core/layers/llama_decoder_layer.h
Lines changed: 4 additions & 1 deletion b/‎xllm/core/layers/llama_decoder_layer.h
Lines changed: 4 additions & 1 deletion
diff --git a/‎xllm/core/layers/lm_head.h
Lines changed: 4 additions & 1 deletion b/‎xllm/core/layers/lm_head.h
Lines changed: 4 additions & 1 deletion
diff --git a/‎xllm/core/layers/multi_head_attention.cpp
Lines changed: 2 additions & 0 deletions b/‎xllm/core/layers/multi_head_attention.cpp
Lines changed: 2 additions & 0 deletions
diff --git a/‎xllm/core/layers/multi_head_attention.h
Lines changed: 2 additions & 0 deletions b/‎xllm/core/layers/multi_head_attention.h
Lines changed: 2 additions & 0 deletions
@@ -16,6 +16,7 @@ limitations under the License.
 #include "attention_mask.h"
 
 namespace xllm {
+namespace layer {
 
 AttentionMask::AttentionMask(at::Device device,
                              torch::Dtype dtype,
@@ -83,4 +84,5 @@ void AttentionMask::update_attn_cache(torch::Dtype dtype,
   }
 }
 
+}  // namespace layer
 }  // namespace xllm
@@ -17,6 +17,7 @@ limitations under the License.
 #include <torch/torch.h>
 
 namespace xllm {
+namespace layer {
 
 class AttentionMask : public torch::nn::Module {
  public:
@@ -49,4 +50,5 @@ class AttentionMask : public torch::nn::Module {
   at::Tensor atten_mask_cache_;
 };
 
+}  // namespace layer
 }  // namespace xllm
@@ -16,6 +16,7 @@ limitations under the License.
 #include "base_layer.h"
 
 namespace xllm {
+namespace layer {
 
 BaseLayer::BaseLayer(const Context& context)
     : device_(context.get_tensor_options().device()),
@@ -142,4 +143,5 @@ void BaseLayer::set_weight(const StateDict& state_dict,
   }
 }
 
+}  // namespace layer
 }  // namespace xllm
@@ -33,6 +33,7 @@ limitations under the License.
 #include "framework/state_dict/state_dict.h"
 
 namespace xllm {
+namespace layer {
 
 enum class TransposeType : int {
   INVALID = -1,
@@ -139,4 +140,5 @@ class BaseLayer : public torch::nn::Module {
   int32_t dp_local_tp_rank_;
 };
 
+}  // namespace layer
 }  // namespace xllm
@@ -17,11 +17,12 @@ limitations under the License.
 
 #if defined(USE_NPU)
 #include "npu/npu_column_parallel_linear_impl.h"
-#include "pytorch/adapter/utils/utils.h"
 #endif
 
 namespace xllm {
+namespace layer {
 
+#if defined(USE_NPU)
 class ColumnParallelLinear
     : public torch::nn::ModuleHolder<NpuColumnParallelLinearImpl> {
  public:
@@ -31,5 +32,7 @@ class ColumnParallelLinear
   ColumnParallelLinear(const Context& context)
       : ModuleHolder(std::make_shared<NpuColumnParallelLinearImpl>(context)) {}
 };
+#endif
 
+}  // namespace layer
 }  // namespace xllm
@@ -17,11 +17,12 @@ limitations under the License.
 
 #if defined(USE_NPU)
 #include "npu/npu_deepseek_v2_decoder_layer_impl.h"
-#include "pytorch/adapter/utils/utils.h"
 #endif
 
 namespace xllm {
+namespace layer {
 
+#if defined(USE_NPU)
 class DeepseekV2DecoderLayer
     : public torch::nn::ModuleHolder<NpuDeepseekV2DecoderLayerImpl> {
  public:
@@ -36,5 +37,7 @@ class DeepseekV2DecoderLayer
                                                             layer_id,
                                                             sm_scale)) {}
 };
+#endif
 
+}  // namespace layer
 }  // namespace xllm
@@ -17,11 +17,12 @@ limitations under the License.
 
 #if defined(USE_NPU)
 #include "npu/npu_llama_decoder_layer_impl.h"
-#include "pytorch/adapter/utils/utils.h"
 #endif
 
 namespace xllm {
+namespace layer {
 
+#if defined(USE_NPU)
 class LlamaDecoderLayer
     : public torch::nn::ModuleHolder<NpuLlamaDecoderLayerImpl> {
  public:
@@ -31,5 +32,7 @@ class LlamaDecoderLayer
   LlamaDecoderLayer(const Context& context)
       : ModuleHolder(std::make_shared<NpuLlamaDecoderLayerImpl>(context)) {}
 };
+#endif
 
+}  // namespace layer
 }  // namespace xllm
@@ -17,11 +17,12 @@ limitations under the License.
 
 #if defined(USE_NPU)
 #include "npu/npu_lm_head_impl.h"
-#include "pytorch/adapter/utils/utils.h"
 #endif
 
 namespace xllm {
+namespace layer {
 
+#if defined(USE_NPU)
 class LmHead : public torch::nn::ModuleHolder<NpuLmHeadImpl> {
  public:
   using torch::nn::ModuleHolder<NpuLmHeadImpl>::ModuleHolder;
@@ -30,5 +31,7 @@ class LmHead : public torch::nn::ModuleHolder<NpuLmHeadImpl> {
   LmHead(const Context& context)
       : ModuleHolder(std::make_shared<NpuLmHeadImpl>(context)) {}
 };
+#endif
 
+}  // namespace layer
 }  // namespace xllm
@@ -16,6 +16,7 @@ limitations under the License.
 #include "multi_head_attention.h"
 
 namespace xllm {
+namespace layer {
 
 MultiheadAttentionImpl::MultiheadAttentionImpl(const Context& context)
     : n_head_(context.get_model_args().n_heads()),
@@ -104,4 +105,5 @@ void MultiheadAttentionImpl::verify_loaded_weights(
       << "out_proj.bias is not loaded for " << prefix + "out_proj.bias";
 }
 
+}  // namespace layer
 }  // namespace xllm
@@ -21,6 +21,7 @@ limitations under the License.
 #include "framework/state_dict/state_dict.h"
 
 namespace xllm {
+namespace layer {
 
 class MultiheadAttentionImpl : public torch::nn::Module {
  public:
@@ -54,4 +55,5 @@ class MultiheadAttentionImpl : public torch::nn::Module {
 
 TORCH_MODULE(MultiheadAttention);
 
+}  // namespace layer
 }  // namespace xllm
Original file line number	Diff line number	Diff line change
`@@ -16,6 +16,7 @@ limitations under the License.`
`16`	`16`	`#include "attention_mask.h"`
`17`	`17`
`18`	`18`	`namespace xllm {`
	`19`	`+namespace layer {`
`19`	`20`
`20`	`21`	`AttentionMask::AttentionMask(at::Device device,`
`21`	`22`	`torch::Dtype dtype,`
`@@ -83,4 +84,5 @@ void AttentionMask::update_attn_cache(torch::Dtype dtype,`
`83`	`84`	`}`
`84`	`85`	`}`
`85`	`86`
	`87`	`+} // namespace layer`
`86`	`88`	`} // namespace xllm`