fix: continue fix

niushengxiao · niushengxiao · commit 7b36448159ac · 2025-03-10T20:19:18.000+08:00
diff --git a/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/colmm_weight.py b/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/colmm_weight.py
@@ -39,10 +39,6 @@ def _slice_weight(self, tensor):
         self.weight_tp_size = tp_size
         return tensor[:, tp_size * self.tp_rank_ : tp_size * (self.tp_rank_ + 1)].to(self.data_type_)
 
-    def _slice_bias(self, bias):
-        tp_size = bias.shape[0] // self.tp_world_size_
-        return bias[tp_size * self.tp_rank_ : tp_size * (self.tp_rank_ + 1)].to(self.data_type_)
-
 
 class W8A8B128COLMMWeight(UnquantizedCOLMMWeight):
     def __init__(
@@ -85,8 +81,7 @@ def _post_process_weight_scale(self, weight_scale) -> None:
     def _post_process_weight(self, weight) -> None:
         self.weight = weight.cuda(get_current_device_id()).transpose(0, 1)
 
-    def _load_weights(self, weights: Dict[str, torch.Tensor]) -> None:
-        super()._load_weights(weights)
+    def _load_scales(self, weights: Dict[str, torch.Tensor]) -> None:
         if self.weight_scale_name is not None and self.weight_scale_name in weights:
             weight_scale = weights[self.weight_scale_name]
             # per channel or block-wise
diff --git a/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/rowmm_weight.py b/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/rowmm_weight.py
@@ -66,7 +66,7 @@ def _slice_weight(self, weight: torch.Tensor):
 
     def _slice_bias(self, bias):
         tp_size = bias.shape[0] // self.tp_world_size_
-        return bias[tp_size * self.tp_rank_ : tp_size * (self.tp_rank_ + 1)].to(self.data_type_)
+        return bias[tp_size * self.tp_rank_ : tp_size * (self.tp_rank_ + 1)] / self.tp_world_size_.to(self.data_type_)
 
 
 class W8A8B128ROWMMWeight(UnquantizedROWMMWeight):
@@ -98,7 +98,7 @@ def _slice_weight(self, weight: torch.Tensor):
 
     def _slice_bias(self, bias):
         tp_size = bias.shape[0] // self.tp_world_size_
-        return bias[tp_size * self.tp_rank_ : tp_size * (self.tp_rank_ + 1)]
+        return bias[tp_size * self.tp_rank_ : tp_size * (self.tp_rank_ + 1)] / self.tp_world_size_
 
     def _slice_weight_scale(self, weight_scale: torch.Tensor):
         scale_start = (self.weight_tp_size * self.tp_rank_ + self.block_size - 1) // self.block_size
@@ -114,8 +114,7 @@ def _post_process_weight_scale(self, weight_scale) -> None:
     def _post_process_weight(self, weight) -> None:
         self.weight = weight.cuda(get_current_device_id()).transpose(0, 1)
 
-    def _load_weights(self, weights: Dict[str, torch.Tensor]) -> None:
-        super()._load_weights(weights)
+    def _load_scales(self, weights: Dict[str, torch.Tensor]) -> None:
         if self.weight_scale_name is not None and self.weight_scale_name in weights:
             weight_scale = weights[self.weight_scale_name]
             # per channel or block-wise
@@ -296,8 +295,7 @@ def _slice_weight_scale(self, weight_scale: torch.Tensor):
         scale_end = self.weight_tp_size * (self.tp_rank_ + 1)
         return weight_scale[scale_start : scale_end].to(torch.float)
 
-    def _load_weights(self, weights: Dict[str, torch.Tensor]) -> None:
-        super()._load_weights(weights)
+    def _load_scales(self, weights: Dict[str, torch.Tensor]) -> None:
         if self.weight_scale_name is not None and self.weight_scale_name in weights:
             weight_scale = weights[self.weight_scale_name]
             # per channel or block-wise