ModelTC
diff --git a/‎lightllm/common/basemodel/layer_weights/meta_weights/base_weight.py‎
Lines changed: 2 additions & 3 deletions b/‎lightllm/common/basemodel/layer_weights/meta_weights/base_weight.py‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/colmm_weight.py‎
Lines changed: 19 additions & 38 deletions b/‎lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/colmm_weight.py‎
Lines changed: 19 additions & 38 deletions
diff --git a/‎lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/mm_factory.py‎
Lines changed: 24 additions & 23 deletions b/‎lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/mm_factory.py‎
Lines changed: 24 additions & 23 deletions
@@ -1,4 +1,3 @@
-from multiprocessing import parent_process
 import torch
 from abc import ABC, abstractmethod
 from typing import Dict
@@ -14,8 +13,8 @@ def load_hf_weights(self, weights):
         pass
 
     @abstractmethod
-    def verify_load(self):
-        parent_process
+    def verify_load(self) -> bool:
+        pass
 
 
 class BaseWeightTpl(BaseWeight):
 
@@ -1,30 +1,30 @@
 import torch
 from lightllm.common.basemodel.layer_weights.meta_weights.mm_weight.mm_weight import (
-    SingleMMWeightTpl,
+    MMWeightTpl,
     DeepGemmFP8W8A8B128MMWeight,
     AWQMMWeightTpl,
 )
 from lightllm.common.quantization import Quantcfg
 from lightllm.utils.dist_utils import get_current_device_id
 from lightllm.common.quantization.quantize_method import QuantizationMethod
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Union
 from .mm_slicer import ColSliceMixin, QuantizedRowSliceMixin, QuantizedColSliceMixin
 
 
-class UnquantizedCOLMMWeight(SingleMMWeightTpl):
+class UnquantizedCOLMMWeight(MMWeightTpl):
     def __init__(
         self,
-        weight_name: str,
+        weight_names: Union[str, List[str]],
         data_type: torch.dtype,
-        bias_name: Optional[str] = None,
+        bias_names: Optional[Union[str, List[str]]] = None,
         quant_method: QuantizationMethod = None,
         tp_rank: int = None,
         tp_world_size: int = None,
     ) -> None:
         super().__init__(
-            weight_name=weight_name,
+            weight_name=weight_names,
             data_type=data_type,
-            bias_name=bias_name,
+            bias_name=bias_names,
             quant_method=quant_method,
             tp_rank=tp_rank,
             tp_world_size=tp_world_size,
@@ -35,17 +35,17 @@ def __init__(
 class DeepGemmFP8W8A8B128COLMMWeight(DeepGemmFP8W8A8B128MMWeight):
     def __init__(
         self,
-        weight_name: str,
+        weight_names: Union[str, List[str]],
         data_type: torch.dtype,
-        bias_name: Optional[str] = None,
+        bias_names: Optional[Union[str, List[str]]] = None,
         quant_method: QuantizationMethod = None,
         tp_rank: int = None,
         tp_world_size: int = None,
     ) -> None:
         super().__init__(
-            weight_name=weight_name,
+            weight_names=weight_names,
             data_type=data_type,
-            bias_name=bias_name,
+            bias_names=bias_names,
             quant_method=quant_method,
             tp_rank=tp_rank,
             tp_world_size=tp_world_size,
@@ -56,17 +56,17 @@ def __init__(
 class AWQCOLMMWeight(AWQMMWeightTpl):
     def __init__(
         self,
-        weight_name: str,
+        weight_names: Union[str, List[str]],
         data_type: torch.dtype,
-        bias_name: Optional[str] = None,
+        bias_names: Optional[Union[str, List[str]]] = None,
         quant_method: QuantizationMethod = None,
         tp_rank: int = None,
         tp_world_size: int = None,
     ) -> None:
         super().__init__(
-            weight_name=weight_name,
+            weight_names=weight_names,
             data_type=data_type,
-            bias_name=bias_name,
+            bias_names=bias_names,
             quant_method=quant_method,
             tp_rank=tp_rank,
             tp_world_size=tp_world_size,
@@ -78,41 +78,22 @@ def __init__(
 class AWQMARLINCOLMMWeight(AWQCOLMMWeight):
     def __init__(
         self,
-        weight_name: str,
+        weight_names: Union[str, List[str]],
         data_type: torch.dtype,
-        bias_name: Optional[str] = None,
+        bias_names: Optional[Union[str, List[str]]] = None,
         quant_method: QuantizationMethod = None,
         tp_rank: int = None,
         tp_world_size: int = None,
     ) -> None:
         super().__init__(
-            weight_name=weight_name,
+            weight_names=weight_names,
             data_type=data_type,
-            bias_name=bias_name,
+            bias_names=bias_names,
             quant_method=quant_method,
             tp_rank=tp_rank,
             tp_world_size=tp_world_size,
         )
 
-    def _process_weight(self, weight: torch.Tensor) -> torch.Tensor:
-        new_weight = self.quant_method._process_weight_after_loading(weight.cuda(get_current_device_id()))
-        self.mm_param.weight = new_weight
-        return
-
-    def _process_weight_scale(self, weight_scale: torch.Tensor) -> torch.Tensor:
-        new_weight_scale = self.quant_method._process_weight_scale_after_loading(
-            weight_scale.cuda(get_current_device_id()).to(self.data_type_)
-        )
-        self.mm_param.weight_scale = new_weight_scale
-        return
-
-    def _process_weight_zero_point(self, weight_zero_point: torch.Tensor) -> torch.Tensor:
-        new_weight_zero_point = self.quant_method._process_weight_zero_point_after_loading(
-            weight_zero_point.cuda(get_current_device_id())
-        )
-        self.mm_param.weight_zero_point = new_weight_zero_point
-        return
-
 
 COLMM_WEIGHT_CLS_MAP = {
     "deepgemm-fp8w8a8-b128": DeepGemmFP8W8A8B128COLMMWeight,
 
@@ -3,15 +3,12 @@
 from typing import Type, Union, Dict
 from lightllm.common.basemodel.layer_weights.meta_weights.mm_weight.mm_weight import (
     MMWeightTpl,
-    MultiMMWeightTpl,
     BMMWeightTpl,
 )
 from lightllm.common.basemodel.layer_weights.meta_weights.mm_weight.rowmm_weight import (
     UnquantizedROWMMWeight,
     UnquantizedROWBMMWeight,
-    UnquantizedMultiROWMMWeight,
     ROWMM_WEIGHT_CLS_MAP,
-    MULTI_ROWMM_WEIGHT_CLS_MAP,
 )
 from lightllm.common.basemodel.layer_weights.meta_weights.mm_weight.colmm_weight import (
     UnquantizedCOLMMWeight,
@@ -21,54 +18,58 @@
 
 class MMWeight:
     def __new__(cls, **kwargs):
+        """
+        weight_names,
+        data_type,
+        bias_names,
+        quant_cfg,
+        layer_num,
+        name,
+        tp_rank,
+        tp_world_size,
+        ...
+        该类主要是通过重载 __new__ 为对应的mm权重绑定量化方法，其他参数都是透传。
+        """
+
         quant_cfg = kwargs.pop("quant_cfg", None)
         layer_num_ = kwargs.pop("layer_num", None)
         name = kwargs.pop("name", None)
         quant_method, quantized_weight = cls._get_quant_method(quant_cfg, layer_num_, name)
+        # quantized_weight 本身是用来标识权重本身在文件中是否是以量化后的形式存储，
+        # 现在不再使用该参数，是否量化由后续的加载过程自动识别。
         kwargs["quant_method"] = quant_method
-        mmcls = cls._get_mmcls(quant_method, quantized_weight)
+        mmcls = cls._get_mmcls(quant_method)
         return mmcls(**kwargs)
 
     @classmethod
     def _get_quant_method(cls, quant_cfg: Quantcfg, layer_num_: int, name: str) -> QuantizationMethod:
         if quant_cfg is None:
             return None, False
-        quant_method = quant_cfg.get_quant_method(layer_num_, name)
+        quant_method: QuantizationMethod = quant_cfg.get_quant_method(layer_num_, name)
         if quant_method is None:
             return None, False
         quant_method.hf_quantization_config = quant_cfg.hf_quantization_config
         quantized_weight = quant_cfg.quantized_weight
         return quant_method, quantized_weight
 
     @classmethod
-    def _get_mmcls(
-        cls, quant_method: QuantizationMethod, quantized_weight: bool
-    ) -> Type[Union[MMWeightTpl, MultiMMWeightTpl, BMMWeightTpl]]:
+    def _get_mmcls(cls, quant_method: QuantizationMethod) -> Type[Union[MMWeightTpl, BMMWeightTpl]]:
         raise NotImplementedError("Subclasses must implement _get_mmcls method")
 
 
 class ROWMMWeight(MMWeight):
     @classmethod
-    def _get_mmcls(cls, quant_method: QuantizationMethod, quantized_weight: bool):
-        if quant_method is None or not quantized_weight:
+    def _get_mmcls(cls, quant_method: QuantizationMethod):
+        if quant_method is None:
             return UnquantizedROWMMWeight
 
         return ROWMM_WEIGHT_CLS_MAP[quant_method.method_name]
 
 
-class MultiROWMMWeight(MMWeight):
-    @classmethod
-    def _get_mmcls(cls, quant_method: QuantizationMethod, quantized_weight: bool):
-        if quant_method is None or not quantized_weight:
-            return UnquantizedMultiROWMMWeight
-
-        return MULTI_ROWMM_WEIGHT_CLS_MAP[quant_method.method_name]
-
-
 class ROWBMMWeight(MMWeight):
     @classmethod
-    def _get_mmcls(cls, quant_method: QuantizationMethod, quantized_weight: bool):
-        if quant_method is None or not quantized_weight:
+    def _get_mmcls(cls, quant_method: QuantizationMethod):
+        if quant_method is None:
             return UnquantizedROWBMMWeight
         else:
             # TODO: Implement more quantization weight
@@ -77,7 +78,7 @@ def _get_mmcls(cls, quant_method: QuantizationMethod, quantized_weight: bool):
 
 class COLMMWeight(MMWeight):
     @classmethod
-    def _get_mmcls(cls, quant_method: QuantizationMethod, quantized_weight: bool):
-        if quant_method is None or not quantized_weight:
+    def _get_mmcls(cls, quant_method: QuantizationMethod):
+        if quant_method is None:
             return UnquantizedCOLMMWeight
         return COLMM_WEIGHT_CLS_MAP[quant_method.method_name]