fix lora from pretrained. (#7714)

zzjjay · web-flow · commit 764080427991 · 2023-12-26T11:11:42.000+08:00
* fix lora from pretrained.

* updates.

* fix comments.

* fix comments.
diff --git a/llm/finetune_generation.py b/llm/finetune_generation.py
@@ -417,6 +417,7 @@ def neft_post_hook(module, input, output):
                 merge_weights=False,
                 tensor_parallel_degree=training_args.tensor_parallel_degree,
                 dtype=dtype,
+                do_qat=quant_args.do_qat,
             )
             model = LoRAModel(model, lora_config)
         else:
diff --git a/paddlenlp/peft/lora/lora_config.py b/paddlenlp/peft/lora/lora_config.py
@@ -71,6 +71,7 @@ class LoRAConfig:
             "help": "The model multi head dimension.Only for LoRAMergedLinear and ColumnParallelLoRAMergedLinear."
         },
     )
+    do_qat: bool = field(default=False, metadata={"help": "Whether the lora model would do quant-aware training"})
 
     @property
     def __dict__(self):
diff --git a/paddlenlp/peft/lora/lora_model.py b/paddlenlp/peft/lora/lora_model.py
@@ -218,8 +218,11 @@ def _convert_tensor_parallel(self, lora_state_dict):
                 lora_name_action_mappings[v] = name_action_mappings[k]
 
         for name, action in lora_name_action_mappings.items():
-            tensor = lora_state_dict.pop(name)
-            lora_state_dict[name] = action(tensor)
+            if name in lora_state_dict:
+                tensor = lora_state_dict.pop(name)
+                lora_state_dict[name] = action(tensor)
+            else:
+                logger.warning(f"{name} not found in lora_state_dict!")
         return lora_state_dict
 
     def save_pretrained(self, save_directory: str, merge_tensor_parallel: bool = False, **kwargs):
@@ -326,9 +329,10 @@ def _find_and_replace_module(self, model, module_name, lora_config, enable_lora)
                 self.add_lora_split_mapping(module_name + ".lora_B", is_column=True)
 
                 # for lora qat
-                self.add_lora_split_mapping(module_name + ".weight_quanter._scale", is_column=True)
-                self.add_lora_split_mapping(module_name + ".activation_quanter._scale", is_column=False)
-                self.add_lora_split_mapping(module_name + ".activation_quanter.quanter._scale", is_column=False)
+                if self.lora_config.do_qat:
+                    self.add_lora_split_mapping(module_name + ".weight_quanter._scale", is_column=True)
+                    self.add_lora_split_mapping(module_name + ".activation_quanter._scale", is_column=False)
+                    self.add_lora_split_mapping(module_name + ".activation_quanter.quanter._scale", is_column=False)
             elif isinstance(module, RowParallelLinear):
                 # recover the original output_features
                 lora_module = RowParallelLoRALinear(
@@ -345,9 +349,10 @@ def _find_and_replace_module(self, model, module_name, lora_config, enable_lora)
                 self.add_lora_split_mapping(module_name + ".lora_A", is_column=False)
 
                 # for lora qat
-                self.add_lora_split_mapping(module_name + ".weight_quanter._scale", is_column=False)
-                self.add_lora_split_mapping(module_name + ".activation_quanter._scale", is_column=False)
-                self.add_lora_split_mapping(module_name + ".activation_quanter.quanter._scale", is_column=False)
+                if self.lora_config.do_qat:
+                    self.add_lora_split_mapping(module_name + ".weight_quanter._scale", is_column=False)
+                    self.add_lora_split_mapping(module_name + ".activation_quanter._scale", is_column=False)
+                    self.add_lora_split_mapping(module_name + ".activation_quanter.quanter._scale", is_column=False)
             elif QuantizationLinear is not None and isinstance(module, QuantizationLinear):
                 lora_module = QuantizationLoRALinear(
                     in_features=module.in_features,

Original file line number	Diff line number	Diff line change
`@@ -417,6 +417,7 @@ def neft_post_hook(module, input, output):`
`417`	`417`	`merge_weights=False,`
`418`	`418`	`tensor_parallel_degree=training_args.tensor_parallel_degree,`
`419`	`419`	`dtype=dtype,`
	`420`	`+ do_qat=quant_args.do_qat,`
`420`	`421`	`)`
`421`	`422`	`model = LoRAModel(model, lora_config)`
`422`	`423`	`else:`
Original file line number	Diff line number	Diff line change
`@@ -71,6 +71,7 @@ class LoRAConfig:`
`71`	`71`	`"help": "The model multi head dimension.Only for LoRAMergedLinear and ColumnParallelLoRAMergedLinear."`
`72`	`72`	`},`
`73`	`73`	`)`
	`74`	`+ do_qat: bool = field(default=False, metadata={"help": "Whether the lora model would do quant-aware training"})`
`74`	`75`
`75`	`76`	`@property`
`76`	`77`	`def __dict__(self):`