fix device_map 4 (qwen-vl) (#695)

Jintao-Huang · web-flow · commit 24e6b1128072 · 2024-04-14T22:34:33.000+08:00
diff --git a/docs/source/Multi-Modal/minicpm-v最佳实践.md b/docs/source/Multi-Modal/minicpm-v最佳实践.md
@@ -1,5 +1,6 @@
 
 # MiniCPM-V 最佳实践
+以下内容以`minicpm-v-3b-chat`为例, 如果你想要使用更新版本的 MiniCPM-V 多模态模型(v2), 你可以将`--model_type minicpm-v-3b-chat`切换成`--model_type minicpm-v-v2`.
 
 ## 目录
 - [环境准备](#环境准备)
@@ -13,9 +14,14 @@
 pip install ms-swift[llm] -U
 ```
 
+模型链接:
+- minicpm-v-3b-chat: [https://modelscope.cn/models/OpenBMB/MiniCPM-V/summary](https://modelscope.cn/models/OpenBMB/MiniCPM-V/summary)
+- minicpm-v-v2: [https://modelscope.cn/models/OpenBMB/MiniCPM-V-2.0/summary](https://modelscope.cn/models/OpenBMB/MiniCPM-V-2.0/summary)
+
+
 ## 推理
 
-推理[minicpm-v-3b-chat](https://modelscope.cn/models/OpenBMB/MiniCPM-V/summary):
+推理minicpm-v-3b-chat:
 ```shell
 # Experimental environment: A10, 3090, V100, ...
 # 10GB GPU memory
diff --git a/swift/llm/utils/model.py b/swift/llm/utils/model.py
@@ -26,7 +26,8 @@
 from transformers.utils.versions import require_version
 
 from swift import get_logger
-from swift.utils import is_dist, is_local_master, use_torchacc
+from swift.utils import (get_dist_setting, is_dist, is_local_master,
+                         use_torchacc)
 from .template import TemplateType
 from .utils import get_max_model_len
 
@@ -2206,7 +2207,27 @@ def get_model_tokenizer_qwen_chat(*args, **kwargs):
     return model, tokenizer
 
 
+def _qwen_vl_visual_block_forward(
+    self,
+    q_x: torch.Tensor,
+    k_x: Optional[torch.Tensor] = None,
+    v_x: Optional[torch.Tensor] = None,
+    attn_mask: Optional[torch.Tensor] = None,
+):
+    k_x = self.ln_1_kv(k_x) if hasattr(self,
+                                       'ln_1_kv') and k_x is not None else None
+    v_x = self.ln_1_kv(v_x) if hasattr(self,
+                                       'ln_1_kv') and v_x is not None else None
+
+    x = q_x + self.attention(
+        q_x=self.ln_1(q_x), k_x=k_x, v_x=v_x, attn_mask=attn_mask)
+    z = self.mlp(self.ln_2(x))
+    x = x.to(z.device) + z  # FIX
+    return x
+
+
 def fix_qwen_inplace_bug(model) -> None:
+    # qwen-vl, qwen-audio
     first_drop = model.transformer.drop
     if first_drop.p == 0.:
         # fix in-place operation bug
@@ -2271,12 +2292,27 @@ def get_model_tokenizer_qwen_vl(model_dir: str,
     if not hasattr(tokenizer_cls, '_old_decode'):  # avoid double patching
         tokenizer_cls._old_decode = tokenizer_cls._decode
         tokenizer_cls._decode = _qwen_vl_audio_decode
+    # fix device_map is 4
+    n_gpu = torch.cuda.device_count()
+    local_world_size = get_dist_setting()[3]
+    if n_gpu // local_world_size >= 4:
+        visual_block_cls = get_class_from_dynamic_module(
+            'visual.VisualAttentionBlock', model_dir)
+        if not hasattr(visual_block_cls,
+                       '__old_forward'):  # avoid double patching
+            visual_block_cls.__old_forward = visual_block_cls.forward
+            visual_block_cls.forward = _qwen_vl_visual_block_forward
+
     kwargs['tokenizer'] = tokenizer_cls.from_pretrained(
         model_dir, trust_remote_code=True)
     model, tokenizer = get_qwen_function(model_dir, torch_dtype, model_kwargs,
                                          load_model, **kwargs)
     if model is not None:
         fix_qwen_inplace_bug(model)
+        # fix device_map is 4
+        if n_gpu // local_world_size >= 4:
+            model.transformer.visual.proj.data = model.transformer.visual.proj.to(
+                model.transformer.visual.ln_post.bias.device)
 
     return model, tokenizer
 
diff --git a/swift/llm/utils/utils.py b/swift/llm/utils/utils.py
@@ -399,12 +399,23 @@ def find_all_linears(model: Module, quantization_bit: int,
     if 'aqlm' in model_type:
         from aqlm import QuantizedLinear
         linear_cls.append(QuantizedLinear)
+
+    # The content of target_module_names cannot exist in inner_nodes.
+    # O(n^2logn), n represents the number of nodes, n<1000.
+    inner_nodes = set()
+    for name, module in model.named_modules():
+        if not isinstance(module, tuple(linear_cls)):
+            inner_nodes.add(name)
     target_module_names = set()
     for name, module in model.named_modules():
-        if isinstance(module, tuple(linear_cls)):
-            module_name = '.'.join(name.split('.')[-2:])
-            if head_module_name not in module_name:
-                target_module_names.add(module_name)
+        if isinstance(module,
+                      tuple(linear_cls)) and head_module_name not in name:
+            module_name_list = name.split('.')
+            module_name = module_name_list.pop()
+            for inner_node in inner_nodes:
+                while inner_node.endswith(module_name):
+                    module_name = f'{module_name_list.pop()}.{module_name}'
+            target_module_names.add(module_name)
     return list(target_module_names)