xfix: continue fix

niushengxiao · niushengxiao · commit 59e1a5d581c6 · 2025-03-10T16:50:54.000+08:00
diff --git a/lightllm/common/basemodel/layer_weights/meta_weights/fused_moe_weight_ep.py b/lightllm/common/basemodel/layer_weights/meta_weights/fused_moe_weight_ep.py
@@ -22,8 +22,8 @@ def __init__(
         split_inter_size: int,
         data_type: torch.dtype,
         network_config: Dict[str, Any],
-        weight_scale_suffix: Optional[str] = None,
-        act_scale_suffix: Optional[str] = None,
+        layer_num: int,
+        quant_cfg = None,
     ) -> None:
         super().__init__(
             gate_proj_name,
@@ -35,8 +35,8 @@ def __init__(
             split_inter_size,
             data_type,
             network_config,
-            weight_scale_suffix,
-            act_scale_suffix
+            layer_num,
+            quant_cfg,
         )
         self.expert_gate_up_proj_etp = None
         self.expert_down_proj_etp = None
diff --git a/lightllm/common/basemodel/layer_weights/meta_weights/fused_moe_weight_tp.py b/lightllm/common/basemodel/layer_weights/meta_weights/fused_moe_weight_tp.py
@@ -7,6 +7,7 @@
 from lightllm.common.quantization.quantize_method import QuantizationMethod
 from lightllm.utils.dist_utils import get_global_world_size, get_global_rank, get_current_device_id
 from lightllm.common.vllm_kernel import _custom_ops as ops
+from .mm_weight.mm_weight import MMWeight
 
 
 class FusedMoeWeightTP(BaseWeight):
@@ -21,17 +22,16 @@ def __init__(
         split_inter_size: int,
         data_type: torch.dtype,
         network_config: Dict[str, Any],
-        weight_scale_suffix: Optional[str] = None,
-        act_scale_suffix: Optional[str] = None,
+        layer_num: int,
+        quant_cfg = None,
     ) -> None:
         super().__init__()
+        self.quant_method, self.quantized_weight = MMWeight._get_quant_method(quant_cfg, layer_num, weight_prefix)
+        if quant_cfg is not None and  quant_cfg.quantized_weight:
+            self.weight_scale_suffix = "weight_scale_inv"
         self.w1_weight_name = gate_proj_name
         self.w2_weight_name = down_proj_name
         self.w3_weight_name = up_proj_name
-        self.weight_scale_suffix = weight_scale_suffix
-        self.act_scale_suffix = act_scale_suffix
-        self.quantized_weight = weight_scale_suffix is not None
-        self.static_activation = act_scale_suffix is not None
 
         self.e_score_correction_bias_name = e_score_correction_bias_name
         self.weight_prefix = weight_prefix
@@ -46,7 +46,6 @@ def __init__(
         self.e_score_correction_bias = None
         self.w2_list = [None] * self.n_routed_experts
         self.w2_scale_list = [None] * self.n_routed_experts
-        self.quant_method = None
         self.scoring_func = network_config["scoring_func"]
         self.w1 = [None, None]  # weight, weight_scale
         self.w2 = [None, None]  # weight, weight_scale
diff --git a/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/mm_weight.py b/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/mm_weight.py
@@ -164,15 +164,17 @@ def __new__(cls, **kwargs):
         name = kwargs.pop("name", None)
         quant_method, quantized_weight = cls._get_quant_method(quant_cfg, layer_num_, name)
         kwargs["quant_method"] = quant_method
-        if quant_cfg.static_activation:
+        if quant_cfg is not None and quant_cfg.static_activation:
             kwargs["act_scale_suffix"] = "input_scale"
-        if quant_cfg.quantized_weight:
+        if quant_cfg is not None and quant_cfg.quantized_weight:
             kwargs["weight_scale_suffix"] = "weight_scale_inv"
         mmcls = cls._get_mmcls(quant_method, quantized_weight)
         return mmcls(**kwargs)
 
     @classmethod
     def _get_quant_method(cls, quant_cfg: Quantcfg, layer_num_: int, name: str) -> QuantizationMethod:
+        if quant_cfg is None:
+            return None, False
         quant_method = quant_cfg.get_quant_method(layer_num_, name)
         quant_type = quant_cfg.get_quant_type(layer_num_, name)
         quantized_weight = quant_cfg.quantized_weight
diff --git a/lightllm/models/deepseek2/layer_weights/transformer_layer_weight.py b/lightllm/models/deepseek2/layer_weights/transformer_layer_weight.py
@@ -321,7 +321,6 @@ def _init_moe(self):
         self.moe_gate = ROWMMWeight(
             weight_name=f"model.layers.{self.layer_num_}.mlp.gate.weight",
             data_type=self.data_type_,
-            quant_cfg=self.quant_cfg,
             layer_num=self.layer_num_,
             name="moe_gate",
             tp_rank=0,
@@ -342,8 +341,8 @@ def _init_moe(self):
             split_inter_size=moe_intermediate_size // self.tp_world_size_,
             data_type=self.data_type_,
             network_config=self.network_config_,
-            weight_scale_suffix=self.weight_scale_suffix,
-            act_scale_suffix=self.act_scale_suffix,
+            layer_num=self.layer_num_,
+            quant_cfg=self.quant_cfg,
         )
 
     def _init_ffn(self):