Fix model saving in new format (#198)

tastelikefeet · tastelikefeet · commit 87b63554b596 · 2023-12-06T15:46:54.000+08:00
(cherry picked from commit c6bf657)
diff --git a/swift/tuners/base.py b/swift/tuners/base.py
@@ -3,6 +3,7 @@
 import inspect
 import os
 import re
+from copy import copy
 from inspect import Parameter, Signature, signature
 from types import MethodType
 from typing import Dict, List, Union
@@ -118,9 +119,35 @@ def forward(self, *args, **kwargs):
         else:
             for output in self.adapters.values():
                 output.mark_trainable_callback(model)
-
-    def load_state_dict(self, state_dict, strict=True):
-        return self.model.load_state_dict(state_dict, False)
+            if self.extra_state_keys:
+                for n, p in model.named_parameters():
+                    if any(
+                            re.fullmatch(extra_key, n)
+                            for extra_key in self.extra_state_keys):
+                        p.requires_grad = True
+
+    def load_state_dict(self,
+                        state_dict,
+                        strict=True,
+                        adapter_name: str = None):
+        if adapter_name is not None:
+            output = self.adapters[adapter_name]
+            if getattr(output.config, 'modules_to_save', None):
+                for key, value in copy(state_dict).items():
+                    for module_name in output.config.modules_to_save:
+                        if module_name in key:
+                            state_dict.pop(key)
+                            key = key.replace(
+                                module_name,
+                                f'{module_name}.modules_to_save.{adapter_name}'
+                            )
+                            break
+                    state_dict[key] = value
+        incompatible_keys = self.model.load_state_dict(state_dict, False)
+        if len(incompatible_keys[1]) > 0:
+            logger.error(
+                f'Load state dict with unexpected keys: {incompatible_keys[1]}'
+            )
 
     def state_dict(self,
                    *args,
@@ -149,18 +176,28 @@ def state_dict(self,
         Returns:
             The state dict to be saved.
         """
-        destination = self.model.state_dict(
+        state_dict = self.model.state_dict(
             destination=destination, prefix=prefix, keep_vars=keep_vars)
         state_dicts = {}
         if kwargs.get('save_adapter', True):
             for name, output in self.adapters.items():
                 if adapter_name == name or adapter_name is None:
                     state_dicts.update(
-                        output.state_dict_callback(destination, name))
+                        output.state_dict_callback(state_dict, name))
+                    modules_to_save_names = [
+                        sub_name
+                        for sub_name, _ in self.model.named_parameters()
+                        if 'modules_to_save' in sub_name
+                    ]
+                    for module_name in modules_to_save_names:
+                        if f'modules_to_save.{name}' in module_name:
+                            state_dicts[module_name.replace(
+                                f'modules_to_save.{name}.',
+                                '')] = state_dict[module_name]
         if kwargs.get('save_extra_states', True):
             state_dicts.update({
                 k: v
-                for k, v in destination.items() if any(
+                for k, v in state_dict.items() if any(
                     re.fullmatch(extra_key, k)
                     for extra_key in self.extra_state_keys)
             })
@@ -289,10 +326,10 @@ def from_pretrained(cls,
                                     f'lora_B.{_name}.weight'): value
                         for key, value in state_dict.items()
                     }
-                self.model.load_state_dict(state_dict, strict=False)
+                self.load_state_dict(state_dict, adapter_name=_name)
         state_dict = cls.load_state_file(model_dir)
         if state_dict is not None:
-            self.model.load_state_dict(state_dict, strict=False)
+            self.load_state_dict(state_dict)
         return self
 
     @classmethod
@@ -597,7 +634,7 @@ def from_pretrained(model: Union[nn.Module, SwiftModel],
         if os.path.exists(os.path.join(model_id, _name, CONFIG_NAME)):
             with open(os.path.join(model_id, _name, CONFIG_NAME), 'r') as f:
                 _json = json.load(f)
-            is_peft_model = SWIFT_TYPE_KEY not in _json
+            is_peft_model = SWIFT_TYPE_KEY not in _json and 'extra_state_keys' not in _json
         if is_peft_model:
             return PeftModel.from_pretrained(
                 model,
diff --git a/swift/tuners/lora.py b/swift/tuners/lora.py
@@ -8,7 +8,7 @@
 
 from swift import LoraConfig
 from .lora_layers import *  # noqa
-from .utils import SwiftAdapter, SwiftConfig, SwiftOutput
+from .utils import SwiftAdapter, SwiftConfig, SwiftOutput, set_adapter
 
 logger = get_logger()
 
@@ -64,6 +64,7 @@ def mark_trainable_callback(model):
     @staticmethod
     def activate_adapter(module: torch.nn.Module, adapter_name: str,
                          activate: bool):
+        set_adapter(module, adapter_name, activate)
         for sub_module in module.modules():
             if isinstance(sub_module, (LoraLayer, LoRALayer)):
                 sub_module.set_activation(adapter_name, activate)
diff --git a/swift/tuners/lora_layers.py b/swift/tuners/lora_layers.py
@@ -17,11 +17,12 @@
 from peft.tuners.lora import Linear as _Linear
 from peft.tuners.lora import LoraLayer
 from peft.tuners.lora import LoraModel as _LoraModel
-from peft.utils import get_auto_gptq_quant_linear, get_quantization_config
+from peft.utils import (_get_submodules, get_auto_gptq_quant_linear,
+                        get_quantization_config)
 from transformers import Conv1D
 
 from swift import get_logger
-from .utils import ActivationMixin
+from .utils import ActivationMixin, ModulesToSaveWrapper
 
 logger = get_logger()
 
@@ -202,6 +203,87 @@ def __init__(self, model, config, adapter_name):
             nn.Module.__init__(self)
             self.model = model
 
+    def inject_adapter(self, model: nn.Module, adapter_name: str):
+        r"""
+        Creates adapter layers and replaces the target modules with the adapter layers. This method is called under the
+        hood by `peft.mapping.get_peft_model` if a non-prompt tuning adapter class is passed.
+
+        The corresponding PEFT config is directly retrieved from the `peft_config` attribute of the BaseTuner class.
+
+        Args:
+            model (`nn.Module`):
+                The model to be tuned.
+            adapter_name (`str`):
+                The adapter name.
+        """
+        peft_config = self.peft_config[adapter_name]
+        # Note: If possible, all checks should be performed *at the start of this method*.
+        # This way, we can raise early if something goes wrong, without leaving the model
+        # in a bad (half-initialized) state.
+        self._check_new_adapter_config(peft_config)
+
+        is_target_modules_in_base_model = False
+        key_list = [key for key, _ in model.named_modules()]
+
+        _check_for_modules_to_save = getattr(peft_config, 'modules_to_save',
+                                             None) is not None
+        _has_modules_to_save = False
+
+        model_config = getattr(model, 'config', {'model_type': 'custom'})
+        if hasattr(model_config, 'to_dict'):
+            model_config = model_config.to_dict()
+
+        peft_config = self._prepare_adapter_config(peft_config, model_config)
+
+        for key in key_list:
+            # Check for modules_to_save in case
+            if _check_for_modules_to_save and any(
+                    key.endswith(f'{module_to_save}')
+                    for module_to_save in peft_config.modules_to_save):
+                # Optionally set the modules to save
+                parent, target, target_name = _get_submodules(model, key)
+
+                if not isinstance(target, ModulesToSaveWrapper):
+                    new_module = ModulesToSaveWrapper(target, adapter_name)
+                    setattr(parent, target_name, new_module)
+                else:
+                    target.update(adapter_name)
+
+                _has_modules_to_save = True
+                continue
+
+            if not self._check_target_module_exists(peft_config, key):
+                continue
+
+            is_target_modules_in_base_model = True
+            parent, target, target_name = _get_submodules(model, key)
+
+            optional_kwargs = {
+                'loaded_in_8bit': getattr(model, 'is_loaded_in_8bit', False),
+                'loaded_in_4bit': getattr(model, 'is_loaded_in_4bit', False),
+                'current_key': key,
+            }
+            self._create_and_replace(peft_config, adapter_name, target,
+                                     target_name, parent, **optional_kwargs)
+
+        if not is_target_modules_in_base_model:
+            raise ValueError(
+                f'Target modules {peft_config.target_modules} not found in the base model. '
+                f'Please check the target modules and try again.')
+
+        self._mark_only_adapters_as_trainable()
+
+        if self.peft_config[adapter_name].inference_mode:
+            for n, p in self.model.named_parameters():
+                if adapter_name in n:
+                    p.requires_grad = False
+
+        if _has_modules_to_save:
+            if not hasattr(model, 'modules_to_save'):
+                model.modules_to_save = set(peft_config.modules_to_save)
+            else:
+                model.modules_to_save.update(set(peft_config.modules_to_save))
+
     def _create_and_replace(
         self,
         lora_config,
diff --git a/swift/tuners/utils.py b/swift/tuners/utils.py
@@ -5,14 +5,20 @@
 import threading
 from dataclasses import asdict, dataclass, field
 from types import FunctionType
-from typing import Dict
+from typing import Dict, List, Optional
 
 import json
+import peft.utils
 import torch
 from peft.utils import CONFIG_NAME
+from peft.utils import ModulesToSaveWrapper as _ModulesToSaveWrapper
+from peft.utils import _get_submodules
 
 from swift.hub.snapshot_download import snapshot_download
 from swift.utils.constants import BIN_EXTENSIONS
+from swift.utils.logger import get_logger
+
+logger = get_logger()
 
 
 @dataclass
@@ -138,6 +144,10 @@ def __init__(self):
         self._thread_inf: Dict[int, Dict[str, bool]] = {}
         self._unique_thread = bool(
             int(os.environ.get(ActivationMixin.USE_UNIQUE_THREAD, '1')))
+        if not self._unique_thread:
+            logger.info(
+                'Using multiple thread mode, gradient checkpointing is not supported.'
+            )
 
     @property
     def indent(self):
@@ -180,3 +190,59 @@ def activate_adapter(module: torch.nn.Module, adapter_name: str,
     @staticmethod
     def freeze_model():
         return True
+
+
+class ModulesToSaveWrapper(ActivationMixin, _ModulesToSaveWrapper):
+
+    def __init__(self, *args, **kwargs):
+        super(ModulesToSaveWrapper, self).__init__()
+        super(ActivationMixin, self).__init__(*args, **kwargs)
+
+    @property
+    def active_adapter(self):
+        active_adapters = self.get_activated_adapters()
+        if not active_adapters:
+            return None
+        elif len(active_adapters) > 1:
+            raise ValueError(
+                'ModulesToSaveWrapper does not support multiple active adapters'
+            )
+        return active_adapters[0]
+
+    def set_adapter(self, adapter_name: str):
+        if adapter_name not in self.modules_to_save:
+            raise ValueError(
+                f'Adapter {adapter_name} not found in {self.modules_to_save.keys()}'
+            )
+        self.modules_to_save[adapter_name].requires_grad_(True)
+        self.set_activation(adapter_name, True)
+
+    def deactivate_adapter(self, adapter_name: str):
+        if adapter_name in self.modules_to_save and self.unique_thread:
+            self.modules_to_save[adapter_name].requires_grad_(False)
+        self.set_activation(adapter_name, False)
+
+
+def set_adapter(model, adapter_name, activate):
+    for module in model.modules():
+        if isinstance(module, ModulesToSaveWrapper):
+            if activate:
+                module.set_adapter(adapter_name)
+            else:
+                module.deactivate_adapter(adapter_name)
+
+
+def set_trainable(model, adapter_name):
+    key_list = [key for key, _ in model.named_modules()]
+    for key in key_list:
+        target_module_found = any(
+            key.endswith(target_key) for target_key in model.modules_to_save)
+        if target_module_found:
+            parent, target, target_name = _get_submodules(model, key)
+            if isinstance(target, ModulesToSaveWrapper):
+                target.update(adapter_name)
+                target.set_adapter(target.active_adapter)
+            else:
+                new_module = ModulesToSaveWrapper(target, adapter_name)
+                new_module.set_adapter(adapter_name)
+                setattr(parent, target_name, new_module)
diff --git a/tests/tuners/test_extra_state_dict.py b/tests/tuners/test_extra_state_dict.py