function private

shihaobai · shihaobai · commit 449c55160dab · 2024-11-21T14:42:51.000+08:00
diff --git a/lightllm/common/basemodel/layer_weights/meta_weights/base_weight.py b/lightllm/common/basemodel/layer_weights/meta_weights/base_weight.py
@@ -26,29 +26,3 @@ def load_hf_weights(self, weights):
 
     def verify_load(self):
         pass
-
-
-# class BaseWeightTpl(BaseWeight):
-#     def __init__(self, weight_name, data_type, bias_name):
-#         self.weight_name = weight_name
-#         self.bias_name = bias_name
-#         self.data_type_ = data_type
-#         self.world_size_ = get_world_size()
-#         self.tp_rank_ = get_rank()
-#         self.weight = None
-#         self.bias = None
-
-#     def load_hf_weights(self, weights):
-#         if self.weight_name in weights:
-#             self.weight = weights[self.weight_name].to(self.data_type_).cuda(self.tp_rank_)
-#         if self.bias_name in weights:
-#             self.bias = weights[self.bias_name].to(self.data_type_).cuda(self.tp_rank_)
-
-#     def verify_load(self):
-#         load_ok = True
-#         #Verify weight. The weight must be not None.
-#         load_ok = load_ok and self.weight is not None
-#         #Verify bias. If bias_name is set, it must be not None.
-#         if self.bias_name is not None:
-#             load_ok = load_ok and self.bias is not None
-#         return load_ok
diff --git a/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight.py b/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight.py
@@ -15,12 +15,6 @@ def __init__(self, data_type, split_n_embed):
     def set_quant_method(self, quant_method):
         self.quant_method = quant_method
 
-    def post_load_weights(self):
-        if self.quant_method is not None:
-            self.weight = self.quant_method.quantize(self.weight.cuda(self.tp_rank_))
-            return
-        self.weight = self.weight.transpose(0, 1).cuda(self.tp_rank_)
-
     def mm(self, input_tensor, out=None, use_custom_tensor_mananger=True):
         if self.quant_method is not None:
             return self.quant_method.apply(input_tensor, self.weight, self.bias, out)
@@ -36,6 +30,12 @@ def mm(self, input_tensor, out=None, use_custom_tensor_mananger=True):
             return torch.mm(input_tensor, self.weight, out=out)
         return torch.addmm(self.bias, input_tensor, self.weight, out=out)
 
+    def _post_load_weights(self):
+        if self.quant_method is not None:
+            self.weight = self.quant_method.quantize(self.weight.cuda(self.tp_rank_))
+            return
+        self.weight = self.weight.transpose(0, 1).cuda(self.tp_rank_)
+
 
 class MMWeight(MMWeightTpl):
     def __init__(self, weight_name, data_type, split_n_embed, bias_name=None):
@@ -69,7 +69,7 @@ def load_hf_weights(self, weights):
             self.bias = bias.cuda(self.tp_rank_)
         if weight is None:
             return
-        self.post_load_weights()
+        self._post_load_weights()
         return
 
 
@@ -89,7 +89,7 @@ def load_hf_weights(self, weights):
             self.bias = bias.cuda(self.tp_rank_) / self.world_size_
         if weight is None:
             return
-        self.post_load_weights()
+        self._post_load_weights()
         return
 
 
@@ -116,10 +116,10 @@ class MultiROWMMWeight(MultiMMWeight):
     def __init__(self, weight_names, data_type, split_n_embed, bias_names=None):
         super().__init__(weight_names, data_type, split_n_embed, bias_names)
 
-    def fuse(self):
+    def _fuse(self):
         if self.weight is None and all(w is not None for w in self.weights):
             self.weight = torch.cat(self.weights, dim=0)
-            self.post_load_weights()
+            self._post_load_weights()
         if self.has_bias:
             if self.bias is None and all(b is not None for b in self.biases):
                 self.bias = torch.cat(self.bias, dim=0).cuda(self.tp_rank_)
@@ -136,7 +136,7 @@ def load_hf_weights(self, weights):
             if self.has_bias and self.bias_names[i] in weights:
                 bias = weights[self.bias_names[i]].to(self.data_type_)
                 self.biases[i] = bias[start:end]
-        self.fuse()
+        self._fuse()
         return
 
 
diff --git a/lightllm/common/basemodel/layer_weights/transformer_layer_weight.py b/lightllm/common/basemodel/layer_weights/transformer_layer_weight.py
@@ -18,14 +18,13 @@ def __init__(self, layer_num, tp_rank, world_size, data_type, network_config, mo
         self.network_config_ = network_config
         self.mode = mode
         self.quant_cfg = quant_cfg
-        self.init_static_params()
-        self._init_config()
+        self._parse_config()
         self._init_weight_names()
         self._init_weight()
         self.set_quantization()
         return
 
-    def _init_config(self):
+    def _parse_config(self):
         pass
 
     def _init_weight_names(self):
diff --git a/lightllm/models/llama/layer_weights/transformer_layer_weight.py b/lightllm/models/llama/layer_weights/transformer_layer_weight.py
@@ -25,7 +25,7 @@ def _init_weight(self):
         self._init_ffn()
         self._init_norm()
 
-    def _init_config(self):
+    def _parse_config(self):
         self.n_embed = self.network_config_["hidden_size"]
         self.n_head = self.network_config_["num_attention_heads"]
         self.n_inter = self.network_config_["intermediate_size"]