1717from dataclasses import asdict , dataclass , field
1818from typing import List , Optional
1919
20- import paddle
21-
2220from paddlenlp .utils .env import MERGE_CONFIG_NAME
23- from paddlenlp .utils .log import logger
2421
2522
2623@dataclass
@@ -30,7 +27,6 @@ class MergeConfig:
3027 """
3128
3229 # Common parameters
33- device : str = field (default = "cpu" , metadata = {"help" : "Device to use for the merge.ex cpu、 gpu、low_gpu_mem" })
3430 tensor_type : str = field (
3531 default = "np" , metadata = {"help" : "Tensor type to use for the merge. Choose np(CPU Only) or pd (CPU/GPU)" }
3632 )
@@ -39,14 +35,20 @@ class MergeConfig:
3935 merge_method : str = field (default = "linear" , metadata = {"help" : "The merge strategy." })
4036 merge_type : str = field (default = "linear" , metadata = {"help" : "The type of merge process." })
4137 sparsify_type : str = field (default = None , metadata = {"help" : "The type of sparsify process." })
38+ split_pieces : int = field (default = 8 , metadata = {"help" : "Split large tensor to multi-piece" })
39+ max_tensor_mem : float = field (default = 0.5 , metadata = {"help" : "Split tensor if exceed setting max_tensor_mem." })
4240
4341 # Model parameters
4442 model_path_list : Optional [List [str ]] = field (default = None , metadata = {"help" : "Merge model name or path list" })
4543 model_path_str : Optional [str ] = field (
4644 default = None , metadata = {"help" : "Merge model name or path string.(split by ',')" }
4745 )
4846 base_model_path : str = field (default = None , metadata = {"help" : "Base model name or path." })
49- output_path : str = field (default = None , metadata = {"help" : "Base model name or path." })
47+ output_path : str = field (default = None , metadata = {"help" : "Output model name or path." })
48+ lora_model_path : str = field (default = None , metadata = {"help" : "LoRA model name or path." })
49+ copy_file_list : Optional [List [str ]] = field (
50+ default = None , metadata = {"help" : "Copy file list from base model path or first model path." }
51+ )
5052 # merge parameters
5153 weight_list : Optional [List [float ]] = field (
5254 default = None , metadata = {"help" : "Relative (or absolute if normalize=False) weighting of a given tensor" }
@@ -75,32 +77,43 @@ def config_check(self):
7577 os .makedirs (self .output_path , exist_ok = True )
7678 if self .tensor_type not in ["np" , "pd" ]:
7779 raise ValueError (f"Unsupported tensor type: { self .tensor_type } . Support 'np' and 'pd' only." )
78- if self .device == "gpu" and self .tensor_type == "np" :
79- logger .warning ("np only support cpu device, but got gpu. Setting `device` to `cpu`." )
80- self .device = "cpu"
81-
82- elif self .merge_method not in ["linear" , "ties" , "slerp" , "della_linear" , "della" , "dare_linear" , "dare_ties" ]:
83- raise ValueError (
84- f"Unsupported merge strategy: { self .merge_method } . Please choose one from ['linear', 'slerp']."
85- )
86- if self .model_path_str is not None :
87- self .model_path_list = self .model_path_str .split ("," )
88- if self .model_path_list is not None :
89- if not isinstance (self .model_path_list , list ) or len (self .model_path_list ) < 2 :
90- raise ValueError (f"Please specify the model_path_list at least two. But got { self .model_path_list } " )
91- if self .weight_list is None :
92- self .weight_list = [1.0 ] * len (self .model_path_list )
93- self .normalize = True
94- if len (self .model_path_list ) != len (self .weight_list ):
95- raise ValueError ("The length of model_path_list and weight_list must be the same." )
96- if self .reserve_p < 0 or self .reserve_p > 1 :
97- raise ValueError ("reserve_p must be between 0 and 1." )
98- if "della" in self .merge_method or self .sparsify_type == "magprune" :
99- if self .reserve_p <= self .epsilon / 2 or self .reserve_p >= (1 - self .epsilon ):
80+ if self .lora_model_path is not None :
81+ if self .base_model_path is None :
82+ raise ValueError ("Please specify the base_model_path when using LoRA merge." )
83+ self .tensor_type = "pd"
84+
85+ if self .lora_model_path is None :
86+ if self .merge_method not in [
87+ "linear" ,
88+ "ties" ,
89+ "slerp" ,
90+ "della_linear" ,
91+ "della" ,
92+ "dare_linear" ,
93+ "dare_ties" ,
94+ ]:
10095 raise ValueError (
101- f"Error: reserve_p +- epsilon/2 must be in the range (0, 1). reserve_p + epsilon/2 = { self .reserve_p + self . epsilon / 2 } , reserve_p - epsilon/2 = { self . reserve_p - self . epsilon / 2 } "
96+ f"Unsupported merge strategy: { self .merge_method } . Please choose one from ['linear', 'slerp', 'ties', 'della_linear', 'della', ']. "
10297 )
103- paddle .set_device (self .device )
98+ if self .model_path_str is not None :
99+ self .model_path_list = self .model_path_str .split ("," )
100+ if self .model_path_list is not None :
101+ if not isinstance (self .model_path_list , list ) or len (self .model_path_list ) < 2 :
102+ raise ValueError (
103+ f"Please specify the model_path_list at least two. But got { self .model_path_list } "
104+ )
105+ if self .weight_list is None :
106+ self .weight_list = [1.0 ] * len (self .model_path_list )
107+ self .normalize = True
108+ if len (self .model_path_list ) != len (self .weight_list ):
109+ raise ValueError ("The length of model_path_list and weight_list must be the same." )
110+ if self .reserve_p < 0 or self .reserve_p > 1 :
111+ raise ValueError ("reserve_p must be between 0 and 1." )
112+ if "della" in self .merge_method or self .sparsify_type == "magprune" :
113+ if self .reserve_p <= self .epsilon / 2 or self .reserve_p >= (1 - self .epsilon ):
114+ raise ValueError (
115+ f"Error: reserve_p +- epsilon/2 must be in the range (0, 1). reserve_p + epsilon/2 = { self .reserve_p + self .epsilon / 2 } , reserve_p - epsilon/2 = { self .reserve_p - self .epsilon / 2 } "
116+ )
104117
105118 @property
106119 def __dict__ (self ):
0 commit comments