Skip to content

Commit 3867c0d

Browse files
committed
merge parallel-adapter succeed
1 parent 26e4511 commit 3867c0d

File tree

3 files changed

+13
-13
lines changed

3 files changed

+13
-13
lines changed

docs/source/notes/faq.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,8 @@
77
2. **Available Models with default configurations are ..., Please manually add the delta models by speicifying 'modified_modules' based on the visualization of your model structure**
88

99
Although most pre-trained models (PTMs) use the transformers archtecture, they are implemented differently. For example, the attention module in GPT2 and BERT is not only named differently, but also implemented in different ways. Common structure mapping mapps the different name conventions of different PTMs into a unified name convention. But there are many PTMs that we do not currently cover. But don't worry! For these models, you can figure out which modules should you modify by simply [visualizing the PTMs](visualization), and then specify the `modified modules` manually (See [name-based addressing](namebasedaddr)).
10+
11+
12+
3. **Requires a dummy_inputs to be passed through the model to understand the dimensionality of each tensor in the computation graph. The {module.__class__.__name__} Class has no dummy_inputs, and automatically created dummy_inputs failed.**
13+
14+
The `dummy_inputs` can be any data that make `backbone_model.forward(**dummy_inputs)` succeed. Only the form and shape of the `dummy_inputs` matter. To set dummy_inputs for your model, please use: `setattr(backbone_model, 'dummy_inputs', some_dummy_inputs)` before initializing `{self.__class__.__name__}`.

opendelta/basemodel.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,7 @@ def _pseudo_data_to_instantiate_module(self, module: Optional[nn.Module]=None):
372372
except:
373373
_auto_dummy_fail = True
374374
if _auto_dummy_fail:
375-
raise AttributeError(f"\nThe {self.__class__.__name__} requires a pseudo-data to be passed through the model to understand the dimensionality of each tensor in the computation graph. \nThe automatically created dummy inputs failed.\nThe `dummy_inputs` can be any data that make `backbone_model.forward(**dummy_inputs)` succeed. Only the form and shape of the `dummy_inputs` matter.\n\tTo set dummy_inputs for your model, please use: `setattr(backbone_model, 'dummy_inputs', some_dummy_inputs)` before initializing `{self.__class__.__name__}` ")
375+
raise AttributeError(f"\n\tThe {self.__class__.__name__} requires a dummy_inputs to be passed through the model to understand the dimensionality of each tensor in the computation graph. \n\t The {module.__class__.__name__} Class has no dummy_inputs, and automatically created dummy_inputs failed.\n\t Refer to `https://opendelta.readthedocs.io/en/latest/notes/faq.html` for detail.")
376376

377377

378378

@@ -804,13 +804,7 @@ def detach(self, module: Optional[nn.Module]=None, reset_state_dict=True):
804804

805805
if _delta_info['method'] == "replace":
806806
setattr(submodule, _delta_info["child_name"], _delta_info['org_module'])
807-
elif _delta_info['method'] == "insert_sequential":
808-
if hasattr(submodule.forward, "__wrapped__"):
809-
submodule.forward = submodule.forward.__wrapped__
810-
delattr(submodule, _delta_info["delta_name"])
811-
else:
812-
raise AttributeError("submodule {}'s forward has no attribute __wrapped__. It's not a wrapped function.".format(name))
813-
elif _delta_info['method'] == "insert_parallel":
807+
elif _delta_info['method'] in ["sequential", "before", "after", "parallel"]:
814808
if hasattr(submodule.forward, "__wrapped__"):
815809
submodule.forward = submodule.forward.__wrapped__
816810
delattr(submodule, _delta_info["delta_name"])

opendelta/delta_models/parallel_adapter.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,9 @@
55
from opendelta.utils.name_based_addressing import *
66
from opendelta.utils.cuda import get_device
77
from opendelta.basemodel import DeltaBase
8-
import loralib as lora
98
import torch.nn as nn
109
import torch
11-
import math
1210
from opendelta.delta_models.layers.activations import Activations
13-
import inspect
1411
from opendelta import BaseDeltaConfig
1512
import opendelta.utils.logging as logging
1613
logger = logging.get_logger(__name__)
@@ -147,20 +144,24 @@ class attributes:
147144
148145
"""
149146
config_class = ParallelAdapterConfig
150-
delta_type = "adapter"
151-
default_modified_modules = ["attn", "attn", "ff.w1", "ff.w2"]
147+
delta_type = "parallel_adapter"
148+
default_modified_modules = ["attn@", "attn@", "ff@.w1@", "ff@.w2@"]
149+
# default_modified_modules = ["attn", "attn", "ff.w1", "ff.w2"]
150+
_need_pseudo_data = True
152151
def __init__(self,
153152
backbone_model: nn.Module,
154153
bottleneck_dim: Optional[int]=24,
155154
non_linearity: Optional[str]='gelu_new',
156155
modified_modules: Optional[bool] = None,
156+
exclude_modules: Optional[List[str]] = None,
157157
unfrozen_modules: Optional[bool] = None,
158158
common_structure: Optional[bool] = None,
159159
interactive_modify: Optional[Union[bool, int]] = False,
160160
):
161161
DeltaBase.__init__(self,
162162
backbone_model,
163163
modified_modules=modified_modules,
164+
exclude_modules=exclude_modules,
164165
unfrozen_modules=unfrozen_modules,
165166
common_structure=common_structure,
166167
interactive_modify=interactive_modify,

0 commit comments

Comments
 (0)