Fix bug 1206 (#202)

Jintao-Huang · Jintao-Huang · commit a30d235b208d · 2023-12-07T13:50:45.000+08:00
diff --git a/docs/source/LLM/LLM微调文档.md b/docs/source/LLM/LLM微调文档.md
@@ -116,7 +116,7 @@ cd examples/pytorch/llm
 - 如果你想要使用基于**auto_gptq**的量化, 你需要先安装对应cuda版本的[auto_gptq](https://github.com/PanQiWei/AutoGPTQ): `pip install auto_gptq -U`.
   > 使用auto_gptq的模型可以查看[LLM支持的模型](https://github.com/modelscope/swift/blob/main/docs/source/LLM/支持的模型和数据集.md#模型). 建议使用auto_gptq, 而不是bnb.
 - 如果你想要使用deepspeed, 你需要`pip install deepspeed -U`. 使用deepspeed可以**节约显存**, 但可能会略微降低训练速度.
-- 如果你的训练涉及到知识编辑的内容, 例如: [自我认知微调](https://github.com/modelscope/swift/blob/main/docs/source/LLM/自我认知微调最佳实践.md), 你需要在MLP上也加上LoRA, 否则可能会效果不佳. 你可以简单传入参数`--lora_target_modules ALL`来对所有的linear(qkvo, mlp)加上lora, 这通常是效果最好的.
+- 如果你的训练涉及到**知识编辑**的内容, 例如: [自我认知微调](https://github.com/modelscope/swift/blob/main/docs/source/LLM/自我认知微调最佳实践.md), 你需要在MLP上也加上LoRA, 否则可能会效果不佳. 你可以简单传入参数`--lora_target_modules ALL`来对所有的linear(qkvo, mlp)加上lora, **这通常是效果最好的**.
 - 如果你使用的是**V100**等较老的GPU, 你需要设置`--dtype AUTO`或者`--dtype fp16`, 因为其不支持bf16.
 - 如果你的机器是A100等高性能显卡, 且使用的是qwen系列模型, 推荐你安装[**flash-attn**](https://github.com/Dao-AILab/flash-attention), 这将会加快训练和推理的速度以及显存占用(A10, 3090, V100等显卡不支持flash-attn进行训练). 支持flash-attn的模型可以查看[LLM支持的模型](https://github.com/modelscope/swift/blob/main/docs/source/LLM/支持的模型和数据集.md#模型)
 - 如果你要进行**二次预训练**, **多轮对话**, 你可以参考[自定义与拓展](https://github.com/modelscope/swift/blob/main/docs/source/LLM/自定义与拓展.md#注册数据集的方式)
diff --git a/docs/source/LLM/命令行参数.md b/docs/source/LLM/命令行参数.md
@@ -35,7 +35,7 @@
 - `--bnb_4bit_comp_dtype`: 在进行4bit量化时, 我们需要在模型的forward和backward时, 将其进行反量化. 该参数用于指定反量化后的torch_dtype. 默认为`'AUTO'`, 即与`dtype`保持一致. 可选择的值包括: 'fp16', 'bf16', 'fp32'. 当quantization_bit为0时, 该参数无效.
 - `--bnb_4bit_quant_type`: 4bit量化时的量化方式, 默认是`'nf4'`. 可选择的值包括: 'nf4', 'fp4'. 当quantization_bit为0时, 该参数无效.
 - `--bnb_4bit_use_double_quant`: 是否在4bit量化时开启double量化, 默认为`True`. 当quantization_bit为0时, 该参数无效.
-- `--lora_target_modules`: 指定lora模块, 默认为`None`. 如果lora_target_modules为None, 或者传入`'DEFAULT'` or `'AUTO'`, 则根据`model_type`查找`MODEL_MAPPING`中的`lora_target_modules`(默认指定为qkv). 如果传入`ALL`, 则将所有的Linear层都指定为lora模块(不含head). 该参数只有当`sft_type`指定为'lora'时才生效.
+- `--lora_target_modules`: 指定lora模块, 默认为`None`. 如果lora_target_modules为None, 或者传入`'DEFAULT'` or `'AUTO'`, 则根据`model_type`查找`MODEL_MAPPING`中的`lora_target_modules`(默认指定为qkv). 如果传入`ALL`, 则将所有的Linear层都指定为lora模块(不含head). 如果内存允许, 建议设置成'ALL'. 该参数只有当`sft_type`指定为'lora'时才生效.
 - `--lora_rank`: 默认为`8`. 只有当`sft_type`指定为'lora'时才生效.
 - `--lora_alpha`: 默认为`32`. 只有当`sft_type`指定为'lora'时才生效.
 - `--lora_dropout_p`: 默认为`0.05`, 只有当`sft_type`指定为'lora'时才生效.
diff --git a/docs/source/LLM/支持的模型和数据集.md b/docs/source/LLM/支持的模型和数据集.md
@@ -15,23 +15,23 @@
 | ---------  | -------- | --------------------------- | ---------------- | ------------------ | -------- |
 |qwen-1_8b|[qwen/Qwen-1_8B](https://modelscope.cn/models/qwen/Qwen-1_8B/summary)|c_attn|default-generation|&#x2714;||
 |qwen-1_8b-chat|[qwen/Qwen-1_8B-Chat](https://modelscope.cn/models/qwen/Qwen-1_8B-Chat/summary)|c_attn|chatml|&#x2714;||
-|qwen-1_8b-chat-int4|[qwen/Qwen-1_8B-Chat-Int4](https://modelscope.cn/models/qwen/Qwen-1_8B-Chat-Int4/summary)|c_attn|chatml|&#x2714;|auto_gptq>=0.4.2|
-|qwen-1_8b-chat-int8|[qwen/Qwen-1_8B-Chat-Int8](https://modelscope.cn/models/qwen/Qwen-1_8B-Chat-Int8/summary)|c_attn|chatml|&#x2714;|auto_gptq>=0.4.2|
+|qwen-1_8b-chat-int4|[qwen/Qwen-1_8B-Chat-Int4](https://modelscope.cn/models/qwen/Qwen-1_8B-Chat-Int4/summary)|c_attn|chatml|&#x2714;|auto_gptq>=0.5|
+|qwen-1_8b-chat-int8|[qwen/Qwen-1_8B-Chat-Int8](https://modelscope.cn/models/qwen/Qwen-1_8B-Chat-Int8/summary)|c_attn|chatml|&#x2714;|auto_gptq>=0.5|
 |qwen-7b|[qwen/Qwen-7B](https://modelscope.cn/models/qwen/Qwen-7B/summary)|c_attn|default-generation|&#x2714;||
 |qwen-7b-chat|[qwen/Qwen-7B-Chat](https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary)|c_attn|chatml|&#x2714;||
-|qwen-7b-chat-int4|[qwen/Qwen-7B-Chat-Int4](https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int4/summary)|c_attn|chatml|&#x2714;|auto_gptq>=0.4.2|
-|qwen-7b-chat-int8|[qwen/Qwen-7B-Chat-Int8](https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int8/summary)|c_attn|chatml|&#x2714;|auto_gptq>=0.4.2|
+|qwen-7b-chat-int4|[qwen/Qwen-7B-Chat-Int4](https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int4/summary)|c_attn|chatml|&#x2714;|auto_gptq>=0.5|
+|qwen-7b-chat-int8|[qwen/Qwen-7B-Chat-Int8](https://modelscope.cn/models/qwen/Qwen-7B-Chat-Int8/summary)|c_attn|chatml|&#x2714;|auto_gptq>=0.5|
 |qwen-14b|[qwen/Qwen-14B](https://modelscope.cn/models/qwen/Qwen-14B/summary)|c_attn|default-generation|&#x2714;||
 |qwen-14b-chat|[qwen/Qwen-14B-Chat](https://modelscope.cn/models/qwen/Qwen-14B-Chat/summary)|c_attn|chatml|&#x2714;||
-|qwen-14b-chat-int4|[qwen/Qwen-14B-Chat-Int4](https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int4/summary)|c_attn|chatml|&#x2714;|auto_gptq>=0.4.2|
-|qwen-14b-chat-int8|[qwen/Qwen-14B-Chat-Int8](https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int8/summary)|c_attn|chatml|&#x2714;|auto_gptq>=0.4.2|
+|qwen-14b-chat-int4|[qwen/Qwen-14B-Chat-Int4](https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int4/summary)|c_attn|chatml|&#x2714;|auto_gptq>=0.5|
+|qwen-14b-chat-int8|[qwen/Qwen-14B-Chat-Int8](https://modelscope.cn/models/qwen/Qwen-14B-Chat-Int8/summary)|c_attn|chatml|&#x2714;|auto_gptq>=0.5|
 |qwen-72b|[qwen/Qwen-72B](https://modelscope.cn/models/qwen/Qwen-72B/summary)|c_attn|default-generation|&#x2714;||
 |qwen-72b-chat|[qwen/Qwen-72B-Chat](https://modelscope.cn/models/qwen/Qwen-72B-Chat/summary)|c_attn|chatml|&#x2714;||
-|qwen-72b-chat-int4|[qwen/Qwen-72B-Chat-Int4](https://modelscope.cn/models/qwen/Qwen-72B-Chat-Int4/summary)|c_attn|chatml|&#x2714;|auto_gptq>=0.4.2|
-|qwen-72b-chat-int8|[qwen/Qwen-72B-Chat-Int8](https://modelscope.cn/models/qwen/Qwen-72B-Chat-Int8/summary)|c_attn|chatml|&#x2714;|auto_gptq>=0.4.2|
+|qwen-72b-chat-int4|[qwen/Qwen-72B-Chat-Int4](https://modelscope.cn/models/qwen/Qwen-72B-Chat-Int4/summary)|c_attn|chatml|&#x2714;|auto_gptq>=0.5|
+|qwen-72b-chat-int8|[qwen/Qwen-72B-Chat-Int8](https://modelscope.cn/models/qwen/Qwen-72B-Chat-Int8/summary)|c_attn|chatml|&#x2714;|auto_gptq>=0.5|
 |qwen-vl|[qwen/Qwen-VL](https://modelscope.cn/models/qwen/Qwen-VL/summary)|c_attn|default-generation|&#x2714;||
 |qwen-vl-chat|[qwen/Qwen-VL-Chat](https://modelscope.cn/models/qwen/Qwen-VL-Chat/summary)|c_attn|chatml|&#x2714;||
-|qwen-vl-chat-int4|[qwen/Qwen-VL-Chat-Int4](https://modelscope.cn/models/qwen/Qwen-VL-Chat-Int4/summary)|c_attn|chatml|&#x2714;|auto_gptq>=0.4.2|
+|qwen-vl-chat-int4|[qwen/Qwen-VL-Chat-Int4](https://modelscope.cn/models/qwen/Qwen-VL-Chat-Int4/summary)|c_attn|chatml|&#x2714;|auto_gptq>=0.5|
 |qwen-audio|[qwen/Qwen-Audio](https://modelscope.cn/models/qwen/Qwen-Audio/summary)|c_attn|default-generation|&#x2714;||
 |qwen-audio-chat|[qwen/Qwen-Audio-Chat](https://modelscope.cn/models/qwen/Qwen-Audio-Chat/summary)|c_attn|chatml|&#x2714;||
 |chatglm2-6b|[ZhipuAI/chatglm2-6b](https://modelscope.cn/models/ZhipuAI/chatglm2-6b/summary)|query_key_value|chatglm2|&#x2718;||
@@ -87,7 +87,7 @@
 |seqgpt-560m|[damo/nlp_seqgpt-560m](https://modelscope.cn/models/damo/nlp_seqgpt-560m/summary)|query_key_value|default-generation|&#x2718;||
 |tongyi-finance-14b|[TongyiFinance/Tongyi-Finance-14B](https://modelscope.cn/models/TongyiFinance/Tongyi-Finance-14B/summary)|c_attn|default-generation|&#x2714;||
 |tongyi-finance-14b-chat|[TongyiFinance/Tongyi-Finance-14B-Chat](https://modelscope.cn/models/TongyiFinance/Tongyi-Finance-14B-Chat/summary)|c_attn|chatml|&#x2714;||
-|tongyi-finance-14b-chat-int4|[TongyiFinance/Tongyi-Finance-14B-Chat-Int4](https://modelscope.cn/models/TongyiFinance/Tongyi-Finance-14B-Chat-Int4/summary)|c_attn|chatml|&#x2714;|auto_gptq>=0.4.2|
+|tongyi-finance-14b-chat-int4|[TongyiFinance/Tongyi-Finance-14B-Chat-Int4](https://modelscope.cn/models/TongyiFinance/Tongyi-Finance-14B-Chat-Int4/summary)|c_attn|chatml|&#x2714;|auto_gptq>=0.5|
 |codefuse-codellama-34b-chat|[codefuse-ai/CodeFuse-CodeLlama-34B](https://modelscope.cn/models/codefuse-ai/CodeFuse-CodeLlama-34B/summary)|q_proj, k_proj, v_proj|codefuse-codellama|&#x2714;||
 
 
diff --git a/docs/source/LLM/自我认知微调最佳实践.md b/docs/source/LLM/自我认知微调最佳实践.md
@@ -176,7 +176,6 @@ swift sft \
     --self_cognition_sample 500 \
     --model_name 小黄 'Xiao Huang' \
     --model_author 魔搭 ModelScope \
-    --gradient_accumulation_steps 8 \
 ```
 
 ## 微调后推理
diff --git a/swift/llm/infer.py b/swift/llm/infer.py
@@ -2,7 +2,7 @@
 import datetime as dt
 import os
 import shutil
-from typing import Tuple
+from typing import Literal, Tuple
 
 import json
 import torch
@@ -158,8 +158,8 @@ def llm_infer(args: InferArguments) -> None:
     if args.save_result and args.ckpt_dir is not None:
         time = dt.datetime.now().strftime('%Y%m%d-%H%M%S')
         jsonl_path = os.path.join(args.ckpt_dir, f'infer_result_{time}.jsonl')
-    input_mode: Literal['S', 'M'] = 'S'
     if args.eval_human:
+        input_mode: Literal['S', 'M'] = 'S'
         logger.info('Input `exit` to exit the conversation.')
         logger.info('Input `multi-line` to switch to multi-line input mode.')
         if template.support_multi_round:
diff --git a/swift/llm/utils/argument.py b/swift/llm/utils/argument.py
@@ -296,6 +296,7 @@ class InferArguments:
     ckpt_dir: Optional[str] = field(
         default=None, metadata={'help': '/path/to/your/vx_xxx/checkpoint-xxx'})
     load_args_from_ckpt_dir: bool = True
+    load_dataset_config: bool = True
     eval_human: bool = False  # False: eval val_dataset
 
     seed: int = 42
@@ -609,7 +610,7 @@ def load_from_ckpt_dir(args: InferArguments) -> None:
         'bnb_4bit_comp_dtype', 'bnb_4bit_quant_type',
         'bnb_4bit_use_double_quant'
     ]
-    if not args.eval_human:
+    if not args.eval_human and args.load_dataset_config:
         imported_keys += [
             'dataset', 'dataset_seed', 'dataset_test_ratio',
             'check_dataset_strategy', 'custom_train_dataset_path',
diff --git a/swift/llm/utils/model.py b/swift/llm/utils/model.py
@@ -796,7 +796,7 @@ def fix_qwen_inplace_bug(model) -> None:
                     *args, **kwargs).clone()
             else:
                 __old_forward = first_drop.forward
-                first_drop.forwad = lambda *args, **kwargs: __old_forward(
+                first_drop.forward = lambda *args, **kwargs: __old_forward(
                     *args, **kwargs).clone()
             first_drop.__old_forward = __old_forward
 
@@ -882,7 +882,7 @@ def get_model_tokenizer_qwen_audio(model_dir: str,
     'qwen/Qwen-1_8B-Chat-Int8',
     LoRATM.qwen,
     TemplateType.chatml,
-    requires=['auto_gptq>=0.4.2'],
+    requires=['auto_gptq>=0.5'],
     torch_dtype=torch.float16,
     function_kwargs={'bits': 8},
     support_flash_attn=True)
@@ -891,7 +891,7 @@ def get_model_tokenizer_qwen_audio(model_dir: str,
     'qwen/Qwen-1_8B-Chat-Int4',
     LoRATM.qwen,
     TemplateType.chatml,
-    requires=['auto_gptq>=0.4.2'],
+    requires=['auto_gptq>=0.5'],
     torch_dtype=torch.float16,
     function_kwargs={'bits': 4},
     support_flash_attn=True)
@@ -900,7 +900,7 @@ def get_model_tokenizer_qwen_audio(model_dir: str,
     'qwen/Qwen-72B-Chat-Int8',
     LoRATM.qwen,
     TemplateType.chatml,
-    requires=['auto_gptq>=0.4.2'],
+    requires=['auto_gptq>=0.5'],
     torch_dtype=torch.float16,
     function_kwargs={'bits': 8},
     support_flash_attn=True)
@@ -909,7 +909,7 @@ def get_model_tokenizer_qwen_audio(model_dir: str,
     'qwen/Qwen-72B-Chat-Int4',
     LoRATM.qwen,
     TemplateType.chatml,
-    requires=['auto_gptq>=0.4.2'],
+    requires=['auto_gptq>=0.5'],
     torch_dtype=torch.float16,
     function_kwargs={'bits': 4},
     support_flash_attn=True)
@@ -918,7 +918,7 @@ def get_model_tokenizer_qwen_audio(model_dir: str,
     'TongyiFinance/Tongyi-Finance-14B-Chat-Int4',
     LoRATM.qwen,
     TemplateType.chatml,
-    requires=['auto_gptq>=0.4.2'],
+    requires=['auto_gptq>=0.5'],
     torch_dtype=torch.float16,
     function_kwargs={'bits': 4},
     support_flash_attn=True)
@@ -927,7 +927,7 @@ def get_model_tokenizer_qwen_audio(model_dir: str,
     'qwen/Qwen-VL-Chat-Int4',
     LoRATM.qwen,
     TemplateType.chatml,
-    requires=['auto_gptq>=0.4.2'],
+    requires=['auto_gptq>=0.5'],
     torch_dtype=torch.float16,
     support_flash_attn=True,
     function_kwargs={
@@ -939,7 +939,7 @@ def get_model_tokenizer_qwen_audio(model_dir: str,
     'qwen/Qwen-14B-Chat-Int8',
     LoRATM.qwen,
     TemplateType.chatml,
-    requires=['auto_gptq>=0.4.2'],
+    requires=['auto_gptq>=0.5'],
     torch_dtype=torch.float16,
     function_kwargs={'bits': 8},
     support_flash_attn=True)
@@ -948,7 +948,7 @@ def get_model_tokenizer_qwen_audio(model_dir: str,
     'qwen/Qwen-7B-Chat-Int8',
     LoRATM.qwen,
     TemplateType.chatml,
-    requires=['auto_gptq>=0.4.2'],
+    requires=['auto_gptq>=0.5'],
     torch_dtype=torch.float16,
     function_kwargs={'bits': 8},
     support_flash_attn=True)
@@ -957,7 +957,7 @@ def get_model_tokenizer_qwen_audio(model_dir: str,
     'qwen/Qwen-14B-Chat-Int4',
     LoRATM.qwen,
     TemplateType.chatml,
-    requires=['auto_gptq>=0.4.2'],
+    requires=['auto_gptq>=0.5'],
     torch_dtype=torch.float16,
     function_kwargs={'bits': 4},
     support_flash_attn=True)
@@ -966,7 +966,7 @@ def get_model_tokenizer_qwen_audio(model_dir: str,
     'qwen/Qwen-7B-Chat-Int4',
     LoRATM.qwen,
     TemplateType.chatml,
-    requires=['auto_gptq>=0.4.2'],
+    requires=['auto_gptq>=0.5'],
     torch_dtype=torch.float16,
     function_kwargs={'bits': 4},
     support_flash_attn=True)
diff --git a/swift/llm/utils/utils.py b/swift/llm/utils/utils.py
@@ -180,6 +180,9 @@ def dataset_map(
         if audio_info is not None:
             audio_info.pop('input_audios', None)
         data.append(d)
+    if len(data) == 0:
+        logger.info('len(dataset): 0')
+        return None
     return LLMDataset(data)
 
 
diff --git a/tests/llm/test_run.py b/tests/llm/test_run.py
@@ -102,7 +102,7 @@ def test_vl_audio(self):
                 train_dataset_sample=200,
                 dataset=[dataset],
                 output_dir=output_dir,
-                gradient_checkpointing=False)
+                gradient_checkpointing=True)
             output = sft_main(sft_args)
             print(output)
             best_model_checkpoint = output['best_model_checkpoint']