Fix flash attention bug

czczup · czczup · commit eb7e55146a07 · 2024-05-01T00:50:06.000+08:00
diff --git a/internvl_chat/internvl/train/internvl_chat_finetune.py b/internvl_chat/internvl/train/internvl_chat_finetune.py
@@ -489,8 +489,10 @@ def main():
         logger.info('Loading InternVLChatModel...')
         config = InternVLChatConfig.from_pretrained(model_args.model_name_or_path)
         config.vision_config.drop_path_rate = model_args.drop_path_rate
-        config.llm_config.attn_implementation = 'flash_attention_2'  # for InternLM
-        config.llm_config._attn_implementation = 'flash_attention_2'  # for LLaMA
+        if 'internlm' in model_args.model_name_or_path.lower():
+            config.llm_config.attn_implementation = 'flash_attention_2'  # for InternLM
+        else:
+            config.llm_config._attn_implementation = 'flash_attention_2'  # for LLaMA
         config.template = data_args.conv_style
         config.select_layer = model_args.vision_select_layer
         config.dynamic_image_size = data_args.dynamic_image_size
@@ -508,8 +510,10 @@ def main():
             model_args.vision_path, torch_dtype=torch.bfloat16, config=vision_config)
         logger.info('Loading LLaMA...')
         llm_config = AutoConfig.from_pretrained(model_args.llm_path, trust_remote_code=True)
-        llm_config.attn_implementation = 'flash_attention_2'  # for InternLM
-        llm_config._attn_implementation = 'flash_attention_2'  # for LLaMA
+        if 'internlm' in model_args.llm_path.lower():
+            llm_config.attn_implementation = 'flash_attention_2'  # for InternLM
+        else:
+            llm_config._attn_implementation = 'flash_attention_2'  # for LLaMA
         llm = AutoModelForCausalLM.from_pretrained(
             model_args.llm_path, torch_dtype=torch.bfloat16,
             config=llm_config, trust_remote_code=True)
diff --git a/internvl_chat/internvl/train/internvl_chat_pretrain.py b/internvl_chat/internvl/train/internvl_chat_pretrain.py
@@ -509,8 +509,10 @@ def main():
         logger.info('Loading InternVLChatModel...')
         config = InternVLChatConfig.from_pretrained(model_args.model_name_or_path)
         config.vision_config.drop_path_rate = model_args.drop_path_rate
-        config.llm_config.attn_implementation = 'flash_attention_2'  # for InternLM
-        config.llm_config._attn_implementation = 'flash_attention_2'  # for LLaMA
+        if 'internlm' in model_args.model_name_or_path.lower():
+            config.llm_config.attn_implementation = 'flash_attention_2'  # for InternLM
+        else:
+            config.llm_config._attn_implementation = 'flash_attention_2'  # for LLaMA
         config.template = data_args.conv_style
         config.select_layer = model_args.vision_select_layer
         config.dynamic_image_size = data_args.dynamic_image_size
@@ -528,8 +530,10 @@ def main():
             model_args.vision_path, torch_dtype=torch.bfloat16, config=vision_config)
         logger.info('Loading LLaMA...')
         llm_config = AutoConfig.from_pretrained(model_args.llm_path, trust_remote_code=True)
-        llm_config.attn_implementation = 'flash_attention_2'  # for InternLM
-        llm_config._attn_implementation = 'flash_attention_2'  # for LLaMA
+        if 'internlm' in model_args.llm_path.lower():
+            llm_config.attn_implementation = 'flash_attention_2'  # for InternLM
+        else:
+            llm_config._attn_implementation = 'flash_attention_2'  # for LLaMA
         llm = AutoModelForCausalLM.from_pretrained(
             model_args.llm_path, torch_dtype=torch.bfloat16,
             config=llm_config, trust_remote_code=True)