@@ -21,12 +21,12 @@ limitations under the License.
2121
2222#include < nlohmann/json.hpp>
2323
24- #include " npu_base_layer.h"
2524#include " framework/model/model_args.h"
2625#include " framework/model/npu_dp_ep_padding.h"
2726#include " framework/parallel_state.h"
2827#include " framework/quant_args.h"
2928#include " framework/state_dict/state_dict.h"
29+ #include " npu_base_layer.h"
3030#include " xllm_kernels/models/glm/layer/moe_decoder_layer.h"
3131
3232namespace xllm {
@@ -35,7 +35,7 @@ namespace layer {
3535class Glm4MoeDecoderImpl : public NpuBaseLayer {
3636 public:
3737 explicit Glm4MoeDecoderImpl (const ModelContext& context,
38- const int32_t layer_id);
38+ const int32_t layer_id);
3939
4040 ~Glm4MoeDecoderImpl () {};
4141
@@ -82,21 +82,18 @@ class Glm4MoeDecoderImpl : public NpuBaseLayer {
8282 const ParallelArgs& parallel_args,
8383 bool is_prefill);
8484
85- void initialize_attention_parameters (
86- atb_speed::moe::MoeLayerParam& param,
87- const ModelArgs& args,
88- const ParallelArgs& parallel_args);
85+ void initialize_attention_parameters (atb_speed::moe::MoeLayerParam& param,
86+ const ModelArgs& args,
87+ const ParallelArgs& parallel_args);
8988
9089 void initialize_mlp_parameters (atb_speed::moe::MoeLayerParam& param,
9190 const ModelArgs& args,
9291 const ParallelArgs& parallel_args);
9392
94- void initialize_parallel_parameters (
95- atb_speed::moe::MoeLayerParam& param,
96- const ParallelArgs& parallel_args);
93+ void initialize_parallel_parameters (atb_speed::moe::MoeLayerParam& param,
94+ const ParallelArgs& parallel_args);
9795
98- void initialize_quantization_parameters (
99- atb_speed::moe::MoeLayerParam& param);
96+ void initialize_quantization_parameters (atb_speed::moe::MoeLayerParam& param);
10097
10198 torch::Tensor get_sharded_tensor (const StateDict& state_dict,
10299 const std::string& name,
0 commit comments