Update yi sh (#506)

Jintao-Huang · web-flow · commit 2fcc9f059ca5 · 2024-03-07T14:14:01.000+08:00
diff --git a/README.md b/README.md
@@ -64,7 +64,7 @@ Users can check the [documentation of SWIFT](docs/source/GetStarted/快速使用
 
 
 ## 🎉 News
-- 2024.03.06: Support training and inference of qwen1.5 awq series, support training and inference of yi-9b.
+- 2024.03.06: Support training and inference of qwen1.5 awq series, support training and inference of [yi-9b](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/yi_9b/lora_mp_ddp).
 - 🔥2024.02.29: Support [LLaMA PRO](https://arxiv.org/pdf/2401.02415.pdf), use [this script](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/yi_6b_chat/llamapro/sft.sh) to begin.
 - 🔥2024.02.29: Support [LoRA+](https://arxiv.org/pdf/2402.12354.pdf), use [this script](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/yi_6b_chat/lorap/sft.sh) to begin.
 - 2024.02.25: Support `swift export` to export models for **AWQ/GPTQ** quantization and push to ModelScope Hub. For more details, please refer to the document: [LLM Quantization Document](https://github.com/modelscope/swift/blob/main/docs/source/LLM/LLM%E9%87%8F%E5%8C%96%E6%96%87%E6%A1%A3.md).
diff --git a/README_CN.md b/README_CN.md
@@ -62,7 +62,7 @@ SWIFT（Scalable lightWeight Infrastructure for Fine-Tuning）是一个可扩展
 用户可以查看 [SWIFT官方文档](docs/source/GetStarted/快速使用.md) 来了解详细信息。
 
 ## 🎉 新闻
-- 2024.03.06: 支持qwen1.5 awq系列训练与推理, 支持yi-9b训练与推理.
+- 2024.03.06: 支持qwen1.5 awq系列训练与推理, 支持[yi-9b](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/yi_9b/lora_mp_ddp)训练与推理.
 - 🔥2024.02.29: 支持[LLaMA PRO](https://arxiv.org/pdf/2401.02415.pdf), 使用[这个脚本](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/yi_6b_chat/llamapro/sft.sh)即可开始训练.
 - 🔥2024.02.29: 支持[LoRA+](https://arxiv.org/pdf/2402.12354.pdf), 使用[这个脚本](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/yi_6b_chat/lorap/sft.sh)即可开始训练.
 - 2024.02.25: 支持`swift export`, 对模型进行**AWQ/GPTQ**量化导出, 以及推送ModelScope Hub. 具体可以查看文档: [LLM量化文档](https://github.com/modelscope/swift/blob/main/docs/source/LLM/LLM%E9%87%8F%E5%8C%96%E6%96%87%E6%A1%A3.md).
diff --git a/docs/source/LLM/命令行参数.md b/docs/source/LLM/命令行参数.md
@@ -48,7 +48,7 @@
 - `--lora_alpha`: 默认为`32`. 只有当`sft_type`指定为'lora'时才生效.
 - `--lora_dropout_p`: 默认为`0.05`, 只有当`sft_type`指定为'lora'时才生效.
 - `--lora_bias_trainable`: 默认为`'none'`, 可以选择的值: 'none', 'all'. 如果你要将bias全都设置为可训练, 你可以设置为`'all'`.
-- `--lora_modules_to_save`: 默认为`[]`. 如果你想要训练embedding, lm_head, 或者layer_norm, 你可以设置此参数, 例如: `--lora_modules_to_save wte ln_1 ln_2 ln_f lm_head`, 这个参数用于任何adapter的训练中.
+- `--lora_modules_to_save`: 默认为`[]`. 如果你想要训练embedding, lm_head, 或者layer_norm, 你可以设置此参数, 例如: `--lora_modules_to_save EMBEDDING LN lm_head`. 如果传入`'EMBEDDING'`, 则将Embedding层添加到`lora_modules_to_save`. 如果传入`'LN'`, 则将`RMSNorm`和`LayerNorm`添加到`lora_modules_to_save`.
 - `--lora_dtype`: 默认为`'fp32'`, 指定lora模块的dtype类型. 如果是`AUTO`则跟随原始模块的dtype类型. 你可以选择的值: 'fp16', 'bf16', 'fp32', 'AUTO'.
 - `--use_dora`: 默认为`False`, 是否使用`DoRA`.
 - `--use_rslora`: 默认为`False`, 是否使用`RS-LoRA`.
diff --git a/examples/pytorch/llm/scripts/yi_9b/lora_mp_ddp/infer.sh b/examples/pytorch/llm/scripts/yi_9b/lora_mp_ddp/infer.sh
@@ -0,0 +1,13 @@
+# Experimental environment: 2 * 3090
+CUDA_VISIBLE_DEVICES=0,1 \
+swift infer \
+    --ckpt_dir "output/yi-9b/vx-xxx/checkpoint-xxx" \
+    --load_dataset_config true \
+    --max_length 2048 \
+    --use_flash_attn true \
+    --max_new_tokens 2048 \
+    --temperature 0.3 \
+    --top_p 0.7 \
+    --repetition_penalty 1. \
+    --do_sample true \
+    --merge_lora false \
diff --git a/examples/pytorch/llm/scripts/yi_9b/lora_mp_ddp/sft.sh b/examples/pytorch/llm/scripts/yi_9b/lora_mp_ddp/sft.sh
@@ -0,0 +1,39 @@
+# Experimental environment: 4 * 3090
+# 4 * 22GB GPU memory
+# Train a chat model with agent capabilities and self-cognition from the base.
+
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+NPROC_PER_NODE=2 \
+swift sft \
+    --model_type yi-9b \
+    --sft_type lora \
+    --tuner_backend swift \
+    --template_type yi \
+    --dtype AUTO \
+    --output_dir output \
+    --dataset ms-agent \
+    --train_dataset_sample 20000 \
+    --train_dataset_mix_ratio 2 \
+    --num_train_epochs 3 \
+    --max_length 4096 \
+    --check_dataset_strategy warning \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --lora_dropout_p 0.05 \
+    --lora_target_modules ALL \
+    --lora_modules_to_save EMBEDDING LN \
+    --gradient_checkpointing true \
+    --batch_size 1 \
+    --weight_decay 0.1 \
+    --learning_rate 5e-5 \
+    --gradient_accumulation_steps 16 \
+    --max_grad_norm 0.5 \
+    --warmup_ratio 0.03 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 10 \
+    --use_flash_attn false \
+    --self_cognition_sample 2000 \
+    --model_name 小黄 'Xiao Huang' \
+    --model_author 魔搭 ModelScope \
diff --git a/swift/llm/tuner.py b/swift/llm/tuner.py
@@ -13,12 +13,13 @@
 from swift.tuners.module_mapping import MODEL_KEYS_MAPPING
 from swift.utils import (activate_model_parameters, freeze_model_parameters,
                          get_logger)
-from .utils import SftArguments, find_all_linears, find_embedding, is_adapter
+from .utils import (SftArguments, find_all_linears, find_embedding, find_ln,
+                    is_adapter)
 
 logger = get_logger()
 
 
-def handle_target_modules_all(model, args: SftArguments) -> None:
+def handle_target_modules(model, args: SftArguments) -> None:
     if args.sft_type == 'ia3':
         target_modules = args.ia3_target_modules
         assert len(args.ia3_feedforward_modules) > 0, (
@@ -39,11 +40,30 @@ def handle_target_modules_all(model, args: SftArguments) -> None:
         logger.info(f'lora_target_modules: {args.lora_target_modules}')
 
 
+def handle_modules_to_save(model, args: SftArguments) -> None:
+    if args.sft_type == 'ia3':
+        modules_to_save = args.ia3_modules_to_save
+    else:
+        modules_to_save = args.lora_modules_to_save
+    if args.lora_m2s_use_embedding:
+        modules_to_save += find_embedding(model)
+    if args.lora_m2s_use_ln:
+        modules_to_save += find_ln(model)
+
+    if args.sft_type == 'ia3':
+        args.ia3_modules_to_save = modules_to_save
+        logger.info(f'ia3_modules_to_save: {args.ia3_modules_to_save}')
+    else:
+        args.lora_modules_to_save = modules_to_save
+        logger.info(f'lora_modules_to_save: {args.lora_modules_to_save}')
+
+
 def prepare_model(model, args: SftArguments):
     # Preparing LoRA
     if is_adapter(args.sft_type):
         if args.resume_from_checkpoint is None:
-            handle_target_modules_all(model, args)
+            handle_target_modules(model, args)
+            handle_modules_to_save(model, args)
             lora_kwargs = {
                 'r': args.lora_rank,
                 'target_modules': args.lora_target_modules,
diff --git a/swift/llm/utils/__init__.py b/swift/llm/utils/__init__.py
@@ -30,12 +30,12 @@
                        StopWords, Template, TemplateType, get_template,
                        register_template)
 from .utils import (LazyLLMDataset, LLMDataset, dataset_map, download_dataset,
-                    find_all_linears, find_embedding, get_max_model_len,
-                    get_time_info, history_to_messages, inference,
-                    inference_stream, is_vllm_available, limit_history_length,
-                    messages_to_history, print_example, safe_tokenizer_decode,
-                    set_generation_config, sort_by_max_length, stat_dataset,
-                    to_device)
+                    find_all_linears, find_embedding, find_ln,
+                    get_max_model_len, get_time_info, history_to_messages,
+                    inference, inference_stream, is_vllm_available,
+                    limit_history_length, messages_to_history, print_example,
+                    safe_tokenizer_decode, set_generation_config,
+                    sort_by_max_length, stat_dataset, to_device)
 
 try:
     if is_vllm_available():
diff --git a/swift/llm/utils/argument.py b/swift/llm/utils/argument.py
@@ -216,7 +216,7 @@ class SftArguments:
     deepspeed_config_path: Optional[str] = None
     model_cache_dir: Optional[str] = None
 
-    def _prepare_target_modules(self, target_modules):
+    def _prepare_target_modules(self, target_modules) -> List[str]:
         if isinstance(target_modules, str):
             target_modules = [target_modules]
         if len(target_modules) == 0:
@@ -238,6 +238,19 @@ def _prepare_target_modules(self, target_modules):
             self.lora_use_all = True
         return target_modules
 
+    def _prepare_modules_to_save(self, modules_to_save) -> List[str]:
+        if isinstance(modules_to_save, str):
+            modules_to_save = [modules_to_save]
+        if len(modules_to_save) == 0:
+            return modules_to_save
+        if 'EMBEDDING' in modules_to_save:
+            modules_to_save.remove('EMBEDDING')
+            self.lora_m2s_use_embedding = True
+        if 'LN' in modules_to_save:
+            modules_to_save.remove('LN')
+            self.lora_m2s_use_ln = True
+        return modules_to_save
+
     def __post_init__(self) -> None:
         handle_compatibility(self)
         if is_pai_training_job():
@@ -259,14 +272,20 @@ def __post_init__(self) -> None:
 
         self.lora_use_embedding = False
         self.lora_use_all = False
+        self.lora_m2s_use_embedding = False
+        self.lora_m2s_use_ln = False
         if self.sft_type == 'ia3':
             self.ia3_feedforward_modules = self._prepare_target_modules(
                 self.ia3_feedforward_modules)
             self.ia3_target_modules = self._prepare_target_modules(
                 self.ia3_target_modules)
+            self.ia3_modules_to_save = self._prepare_modules_to_save(
+                self.ia3_modules_to_save)
         else:
             self.lora_target_modules = self._prepare_target_modules(
                 self.lora_target_modules)
+            self.lora_modules_to_save = self._prepare_modules_to_save(
+                self.lora_modules_to_save)
         if self.sft_type in {'adalora', 'ia3'} and self.lora_use_embedding:
             raise ValueError(
                 '`adalora` and `ia3` do not support setting embedding as target_modules.'
diff --git a/swift/llm/utils/model.py b/swift/llm/utils/model.py
@@ -1004,7 +1004,7 @@ def cross_entropy_forward(self, inputs: Tensor,
     support_vllm=True)
 @register_model(
     ModelType.yi_9b,
-    'AI-ModelScope/Yi-9B',
+    '01ai/Yi-9B',
     LoRATM.llama2,
     TemplateType.default_generation,
     support_flash_attn=True,
diff --git a/swift/llm/utils/utils.py b/swift/llm/utils/utils.py
@@ -339,13 +339,28 @@ def print_example(example: Dict[str, Any],
         logger.info(f'[LABLES] {labels_str}')
 
 
-def find_embedding(model: Module) -> List[str]:
-    target_module_names = set()
+def _find_layers(model: Module, module_cls: type) -> List[str]:
+    module_names = set()
     for name, module in model.named_modules():
-        if isinstance(module, torch.nn.Embedding):
+        if isinstance(module, module_cls):
             module_name = '.'.join(name.split('.')[-2:])
-            target_module_names.add(module_name)
-    return list(target_module_names)
+            module_names.add(module_name)
+    return list(module_names)
+
+
+def find_ln(model: Module) -> List[str]:
+    module_names = set()
+    for name, module in model.named_modules():
+        module_cls_name = module.__class__.__name__.lower()
+        if isinstance(module,
+                      torch.nn.LayerNorm) or 'rmsnorm' in module_cls_name:
+            module_name = '.'.join(name.split('.')[-1:])
+            module_names.add(module_name)
+    return list(module_names)
+
+
+def find_embedding(model: Module) -> List[str]:
+    return _find_layers(model, torch.nn.Embedding)
 
 
 def find_all_linears(model: Module, quantization_bit: int,