update prompt tuner (#38)

jiangzeyinzi · web-flow · commit aad7a74257ca · 2023-09-01T13:45:44.000+08:00
diff --git a/swift/tuners/prompt.py b/swift/tuners/prompt.py
@@ -31,6 +31,7 @@ class PromptConfig(SwiftConfig):
         attention_mask_value: The value to pad to the attention mask
         prompt_length: The length of the prompt tokens
         attach_front: When set to True, prompt is attached in front of the embedding
+        extract_embedding: Whether the embedding is extracted at final stage to keep the same dims with inputs
     """
 
     dim: int = field(
@@ -60,6 +61,13 @@ class PromptConfig(SwiftConfig):
             'help':
             'When set to True, prompt is attached in front of the embedding'
         })
+    
+    extract_embedding: bool = field(
+        default=False,
+        metadata={
+            'help':
+            'Whether the embedding is extracted at final stage to keep the same dims with inputs'
+        })
 
     def __post_init__(self):
         from .mapping import SwiftTuners
@@ -71,6 +79,7 @@ class Prompt:
     @staticmethod
     def prepare_model(model: nn.Module, config: PromptConfig):
         module_keys = [key for key, _ in model.named_modules()]
+        match_module_keys = []
         for module_key in module_keys:
             if re.fullmatch(config.target_modules, module_key):  # noqa
                 module = model.get_submodule(module_key)
@@ -109,16 +118,26 @@ def _forward(self, *args, **kwargs):
                         else:
                             kwargs[config.attention_mask_pos] = attention_mask
 
-                    return self.forward_origin(*args, **kwargs)
+                    forward_output = self.forward_origin(*args, **kwargs)
+                    if config.extract_embedding:
+                        forward_output = getattr(
+                            self, 'prompt').extract(forward_output)
+
+                    return forward_output
 
                 module.forward_origin = module.forward
                 module.forward = types.MethodType(_forward, module)
-                prompt_module = PromptModule(config.dim,
+                if isinstance(config.dim, list):
+                    input_dim = config.dim[len(match_module_keys)]
+                else:
+                    input_dim = config.dim
+                prompt_module = PromptModule(input_dim,
                                              int(module_key.rsplit('.')[-1]),
                                              config.prompt_length,
                                              config.attention_mask_value,
                                              config.attach_front)
                 setattr(module, 'prompt', prompt_module)
+                match_module_keys.append(module_key)
 
         def state_dict_callback(state_dict):
             return {
@@ -184,3 +203,9 @@ def patch_attention_mask(self, m):
         prefix_attention_mask = torch.full((*m.shape[:-1], self.prompt_length),
                                            self.mask_values).to(m.device)
         return torch.cat((prefix_attention_mask, m), dim=-1)
+
+    def extract(self, x):
+        if self.attach_front:
+            return x[:, self.prompt_length:, :]
+        else:
+            return x[:, :-self.prompt_length, :]