support warmup_stable_decay (#1312)

Jintao-Huang · web-flow · commit 4e4b794ba196 · 2024-07-07T14:28:57.000+08:00
diff --git a/docs/source/LLM/命令行参数.md b/docs/source/LLM/命令行参数.md
@@ -87,6 +87,7 @@
 - `--predict_with_generate`: 评估时是否使用生成式的方式, 默认为`False`. 如果设置为False, 则使用`loss`进行评估. 如果设置为True, 则使用`ROUGE-L`等指标进行评估. 使用生成式评估耗费的时间很长, 请谨慎选择.
 - `--lr_scheduler_type`: 默认值为`'cosine'`, 你可以选择: 'linear', 'cosine', 'constant'等.
 - `--warmup_ratio`: warmup占用总的训练steps的比例, 默认为`0.05`.
+- `--warmup_steps`: warmup的步数, 默认为`0`. 如果设置`warmup_steps>0`, 则覆盖warmup_ratio.
 - `--eval_steps`: 每训练多少steps进行评估, 默认为`50`.
 - `--save_steps`: 每训练多少个steps进行保存, 默认为`None`, 即设置为`eval_steps`.
 - `--save_only_model`: 是否只保存模型参数, 而不存储断点续训所需的中间状态, 默认为`None`, 即如果`sft_type`为'lora'并且不使用deepspeed(`deepspeed`为`None`), 设置为False, 否则设置为True(e.g. 使用了全参数微调或者使用了deepspeed).
diff --git a/docs/source_en/LLM/Command-line-parameters.md b/docs/source_en/LLM/Command-line-parameters.md
@@ -88,6 +88,7 @@
 - `--predict_with_generate`: Whether to use generation for evaluation, default is `False`. If set to False, evaluate using `loss`. If set to True, evaluate using `ROUGE-L` and other metrics. Generative evaluation takes a long time, choose carefully.
 - `--lr_scheduler_type`: Default is `'cosine'`, options: 'linear', 'cosine', 'constant', etc.
 - `--warmup_ratio`: Proportion of warmup in total training steps, default is `0.05`.
+- `--warmup_steps`: The number of warmup steps, default is `0`. If warmup_steps > 0 is set, it overrides warmup_ratio.
 - `--eval_steps`: Evaluate every this many steps, default is `50`.
 - `--save_steps`: Save every this many steps, default is `None`, i.e. set to `eval_steps`.
 - `--save_only_model`: Whether to save only model parameters, without saving intermediate states needed for checkpoint resuming, default is `None`, i.e. if `sft_type` is 'lora' and not using deepspeed (`deepspeed` is `None`), set to False, otherwise set to True (e.g. using full fine-tuning or deepspeed).
diff --git a/swift/llm/utils/argument.py b/swift/llm/utils/argument.py
@@ -581,6 +581,7 @@ class SftArguments(ArgumentsBase):
     lr_scheduler_type: str = 'cosine'
     lr_scheduler_kwargs: Optional[str] = None  # json
     warmup_ratio: float = 0.05
+    warmup_steps: int = 0  # Overrides any effect of `warmup_ratio` if warmup_steps > 0
 
     eval_steps: int = 50
     save_steps: Optional[int] = None
@@ -984,6 +985,7 @@ def _init_training_args(self) -> None:
             lr_scheduler_type=self.lr_scheduler_type,
             lr_scheduler_kwargs=self.lr_scheduler_kwargs,
             warmup_ratio=self.warmup_ratio,
+            warmup_steps=self.warmup_steps,
             logging_steps=self.logging_steps,
             save_strategy=self.save_strategy,
             save_steps=self.save_steps,