Merge branch 'main' into release/3.6

Jintao-Huang · Jintao-Huang · commit 2f8c8a539ed3 · 2025-07-07T14:22:08.000+08:00
diff --git a/docs/source/Instruction/Megatron-SWIFT训练.md b/docs/source/Instruction/Megatron-SWIFT训练.md
@@ -40,6 +40,7 @@ modelscope-registry.us-west-1.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu2
 
 首先，我们需要将HF格式的权重转为Megatron格式：
 - 若出现OOM，将`CUDA_VISIBLE_DEVICES=0`删除即可。
+- "ms-swift>=3.6"推荐增加`--test_convert_precision true`参数测试转换精度。
 ```shell
 CUDA_VISIBLE_DEVICES=0 \
 swift export \
@@ -87,7 +88,8 @@ megatron sft \
 
 最后，将Megatron格式权重转为HF格式：
 - 注意：`--mcore_model`请指向`iter_xxx`的上级目录。默认会使用`latest_checkpointed_iteration.txt`中对应的checkpoint。
-
+- 若出现OOM，将`CUDA_VISIBLE_DEVICES=0`删除即可。
+- "ms-swift>=3.6"推荐增加`--test_convert_precision true`参数测试转换精度。
 ```shell
 CUDA_VISIBLE_DEVICES=0 \
 swift export \
diff --git a/docs/source/Instruction/命令行参数.md b/docs/source/Instruction/命令行参数.md
@@ -110,7 +110,8 @@
 - top_p: top_p参数，默认为None。读取generation_config.json。
 - repetition_penalty: 重复惩罚项。默认为None，读取generation_config.json。
 - num_beams: beam search的并行保留数量，默认为1。
-- 🔥stream: 流式输出，默认为`False`。
+- 🔥stream: 流式输出，默认为`None`，即使用交互式界面时为True，数据集批量推理时为False。
+  - "ms-swift<3.6"stream默认值为False。
 - stop_words: 除了eos_token外额外的停止词，默认为`[]`。
   - 注意：eos_token会在输出respsone中被删除，额外停止词会在输出中保留。
 - logprobs: 是否输出logprobs，默认为False。
@@ -515,6 +516,8 @@ soft overlong 奖励参数
 - swanlab_project: swanlab的project，需要在页面中预先创建好:[https://swanlab.cn/space/~](https://swanlab.cn/space/~)。
 - swanlab_workspace: 默认为None，会使用api-key对应的username。
 - swanlab_exp_name: 实验名，可以为空，为空时默认传入--output_dir的值。
+- swanlab_lark_webhook_url: 默认为None。swanlab的lark webhook url，用于推送实验结果到飞书。
+- swanlab_lark_secret: 默认为None。swanlab的lark secret，用于推送实验结果到飞书。
 - swanlab_mode: 可选cloud和local，云模式或者本地模式。
 
 ### 推理参数
diff --git a/docs/source_en/Instruction/Command-line-parameters.md b/docs/source_en/Instruction/Command-line-parameters.md
@@ -112,7 +112,8 @@ Refer to the [generation_config](https://huggingface.co/docs/transformers/main_c
 - top_p: The top_p parameter, defaults to None. It is read from generation_config.json.
 - repetition_penalty: The repetition penalty. Defaults to None and is read from generation_config.json.
 - num_beams: The number of beams reserved for parallel beam search, default is 1.
-- 🔥stream: Stream output, default is `False`.
+- 🔥stream: Streaming output. Default is `None`, which means it is set to True when using the interactive interface and False during batch inference on datasets.
+  - For "ms-swift<3.6", the default value of stream is False.
 - stop_words: Additional stop words beyond eos_token, default is`[]`.
   - Note: eos_token will be removed in the output response, whereas additional stop words will be retained in the output.
 - logprobs: Whether to output logprobs, default is False.
@@ -535,6 +536,8 @@ Soft overlong reward parameters:
 - **swanlab_project**: SwanLab's project, which needs to be created in advance on the page: [https://swanlab.cn/space/~](https://swanlab.cn/space/~)
 - **swanlab_workspace**: Defaults to `None`, will use the username associated with the API key
 - **swanlab_exp_name**: Experiment name, can be left empty. If empty, the value of `--output_dir` will be used by default
+- swanlab_lark_webhook_url: Defaults to None. SwanLab's Lark webhook URL, used for pushing experiment results to Lark.
+- swanlab_lark_secret: Defaults to None. SwanLab's Lark secret, used for pushing experiment results to Lark.
 - **swanlab_mode**: Optional values are `cloud` and `local`, representing cloud mode or local mode
 
 
diff --git a/docs/source_en/Instruction/Megatron-SWIFT-Training.md b/docs/source_en/Instruction/Megatron-SWIFT-Training.md
@@ -40,7 +40,8 @@ The training module in the dependent library Megatron-LM will be cloned and inst
 This section introduces a quick start example for fine-tuning the self-awareness of the Qwen2.5-7B-Instruct model using two 80GiB A100 GPUs. The following best practices can be completed within 10 minutes.
 
 First, we need to convert the weights from HF (Hugging Face) format to Megatron format:
-- If OOM occurs, simply remove `CUDA_VISIBLE_DEVICES=0`.
+- If you encounter OOM, simply remove `CUDA_VISIBLE_DEVICES=0`.
+- For "ms-swift>=3.6", it is recommended to add the `--test_convert_precision true` parameter to test conversion precision.
 ```shell
 CUDA_VISIBLE_DEVICES=0 \
 swift export \
@@ -88,6 +89,8 @@ megatron sft \
 
 Finally, convert the Megatron format weights back to HF format:
 - Note: Please point `--mcore_model` to the parent directory of `iter_xxx`. By default, the corresponding checkpoint from `latest_checkpointed_iteration.txt` will be used.
+- If you encounter OOM, simply remove `CUDA_VISIBLE_DEVICES=0`.
+- For "ms-swift>=3.6", it is recommended to add the `--test_convert_precision true` parameter to test conversion precision.
 
 ```shell
 CUDA_VISIBLE_DEVICES=0 \
diff --git a/swift/llm/argument/app_args.py b/swift/llm/argument/app_args.py
@@ -19,6 +19,7 @@ class AppArguments(WebUIArguments, DeployArguments):
 
     lang: Literal['en', 'zh'] = 'en'
     verbose: bool = False
+    stream: bool = True
 
     def _init_torch_dtype(self) -> None:
         if self.base_url:
diff --git a/swift/llm/argument/base_args/base_args.py b/swift/llm/argument/base_args/base_args.py
@@ -153,6 +153,7 @@ def __post_init__(self):
         self._init_custom_register()
         self._import_external_plugins()
         self._init_model_kwargs()
+        self._init_stream()
         # The Seq2SeqTrainingArguments has a property called world_size, which cannot be assigned a value.
         self.rank, self.local_rank, self.global_world_size, self.local_world_size = get_dist_setting()
         logger.info(f'rank: {self.rank}, local_rank: {self.local_rank}, '
@@ -223,6 +224,8 @@ def load_args_from_ckpt(self) -> None:
             'bnb_4bit_quant_type',
             'bnb_4bit_use_double_quant',
         ]
+        if 'megatron' in self.__class__.__name__.lower():
+            force_load_keys = []
         # If the current value is None or an empty list and it is among the following keys
         load_keys = [
             'custom_register_path',
diff --git a/swift/llm/argument/base_args/generation_args.py b/swift/llm/argument/base_args/generation_args.py
@@ -32,11 +32,15 @@ class GenerationArguments:
     repetition_penalty: Optional[float] = None
     num_beams: int = 1
 
-    stream: bool = False
+    stream: Optional[bool] = None
     stop_words: List[str] = field(default_factory=list)
     logprobs: bool = False
     top_logprobs: Optional[int] = None
 
+    def _init_stream(self):
+        if self.stream is None:
+            self.stream = False
+
     def get_request_config(self):
         if getattr(self, 'task_type') != 'causal_lm':
             return
diff --git a/swift/llm/argument/deploy_args.py b/swift/llm/argument/deploy_args.py
@@ -4,6 +4,7 @@
 
 from swift.llm import safe_snapshot_download
 from swift.utils import find_free_port, get_logger
+from .base_args import BaseArguments
 from .infer_args import InferArguments
 
 logger = get_logger()
@@ -66,7 +67,7 @@ def _init_ckpt_dir(self, adapters=None):
         return super()._init_ckpt_dir(self.adapters + list(self.adapter_mapping.values()))
 
     def _init_stream(self):
-        pass
+        return BaseArguments._init_stream(self)
 
     def _init_eval_human(self):
         pass
diff --git a/swift/llm/argument/infer_args.py b/swift/llm/argument/infer_args.py
@@ -180,7 +180,8 @@ def _init_result_path(self, folder_name: str) -> None:
 
     def _init_stream(self):
         self.eval_human = not (self.dataset and self.split_dataset_ratio > 0 or self.val_dataset)
-
+        if self.stream is None:
+            self.stream = self.eval_human
         if self.stream and self.num_beams != 1:
             self.stream = False
             logger.info('Setting args.stream: False')
@@ -199,7 +200,6 @@ def __post_init__(self) -> None:
         VllmArguments.__post_init__(self)
         self._init_result_path('infer_result')
         self._init_eval_human()
-        self._init_stream()
         self._init_ddp()
 
     def _init_eval_human(self):
diff --git a/swift/llm/argument/train_args.py b/swift/llm/argument/train_args.py
@@ -68,6 +68,8 @@ class SwanlabArguments:
     swanlab_project: Optional[str] = None
     swanlab_workspace: Optional[str] = None
     swanlab_exp_name: Optional[str] = None
+    swanlab_lark_webhook_url: Optional[str] = None
+    swanlab_lark_secret: Optional[str] = None
     swanlab_mode: Literal['cloud', 'local'] = 'cloud'
 
     def _init_swanlab(self):
@@ -80,6 +82,15 @@ def _init_swanlab(self):
         from swanlab.integration.transformers import SwanLabCallback
         if self.swanlab_token:
             swanlab.login(self.swanlab_token)
+
+        if self.swanlab_lark_webhook_url is not None:
+            from swanlab.plugin.notification import LarkCallback
+            lark_callback = LarkCallback(
+                webhook_url=self.swanlab_lark_webhook_url,
+                secret=self.swanlab_lark_secret,
+            )
+            swanlab.register_callbacks([lark_callback])
+
         INTEGRATION_TO_CALLBACK['swanlab'] = SwanLabCallback(
             project=self.swanlab_project,
             workspace=self.swanlab_workspace,
diff --git a/swift/llm/infer/utils.py b/swift/llm/infer/utils.py
@@ -4,6 +4,7 @@
 from dataclasses import dataclass, field
 from typing import List, Literal, Optional
 
+from swift.llm.utils import update_generation_config_eos_token
 from swift.plugin import extra_tuners
 from swift.tuners import Swift
 from swift.utils import get_logger
@@ -144,4 +145,5 @@ def prepare_model_template(args, **kwargs):
     model, processor = args.get_model_processor(**kwargs)
     model = prepare_adapter(args, model)
     template = args.get_template(processor)
+    update_generation_config_eos_token(model.generation_config, template)
     return model, template
diff --git a/swift/llm/template/base.py b/swift/llm/template/base.py
@@ -501,6 +501,8 @@ def encode(self,
                     lengths += value
         if self.is_training:
             encoded['length'] = max(lengths)
+        else:
+            encoded.pop('length', None)
         if return_template_inputs:
             encoded['template_inputs'] = inputs
         if not self.remove_unused_columns:
diff --git a/swift/llm/utils.py b/swift/llm/utils.py
@@ -300,3 +300,21 @@ def get_ckpt_dir(model_dir: str, adapters_dir: Optional[List[str]]) -> str:
             ckpt_dir = model_dir
             break
     return ckpt_dir
+
+
+def update_generation_config_eos_token(generation_config, template):
+    stop_words = template.template_meta.stop_words
+    eos_token_id = generation_config.eos_token_id
+    if isinstance(eos_token_id, int):
+        eos_token_id = [eos_token_id]
+    modified = False
+    for stop_word in stop_words:
+        if stop_word is None:
+            continue
+        if isinstance(stop_word, str):
+            stop_word = template._tokenize(stop_word)
+        if isinstance(stop_word, (list, tuple)) and len(stop_word) == 1 and stop_word[0] not in eos_token_id:
+            eos_token_id.append(stop_word[0])
+            modified = True
+    if modified:
+        generation_config.eos_token_id = eos_token_id
diff --git a/swift/trainers/mixin.py b/swift/trainers/mixin.py
@@ -33,6 +33,7 @@
 
 from swift.hub import get_hub
 from swift.llm import BatchSamplerShard, DataLoaderDispatcher, DataLoaderShard, Template
+from swift.llm.utils import update_generation_config_eos_token
 from swift.plugin import MeanMetric, compute_acc, extra_tuners
 from swift.tuners import SwiftModel
 from swift.utils import get_logger, is_dist, is_mp, is_mp_ddp, ms_logger_context, seed_worker, use_torchacc
@@ -115,6 +116,9 @@ def __init__(self,
             from swift.trainers.sequence_parallel import sequence_parallel
             sequence_parallel.prepare_trainer(self)
         self._fix_gradient_checkpointing()
+        update_generation_config_eos_token(self.model.generation_config, self.template)
+        if getattr(self.model, 'origin_generation_config', None):
+            self.model.origin_generation_config.eos_token_id = self.model.generation_config.eos_token_id
 
     def get_use_logits_to_keep(self, default_value: bool = True):
         use_logits_to_keep = self.args.use_logits_to_keep
diff --git a/swift/ui/llm_train/report_to.py b/swift/ui/llm_train/report_to.py
@@ -47,6 +47,18 @@ class ReportTo(BaseUI):
                 'en': 'Experiment of SwanLab'
             },
         },
+        'swanlab_lark_webhook_url': {
+            'label': {
+                'zh': 'SwanLab飞书Webhook地址',
+                'en': 'Webhook URL of SwanLab Lark Callback'
+            },
+        },
+        'swanlab_lark_secret': {
+            'label': {
+                'zh': 'SwanLab飞书Secret',
+                'en': 'Secret of SwanLab Lark Callback'
+            },
+        },
         'swanlab_mode': {
             'label': {
                 'zh': 'SwanLab工作模式',
@@ -69,6 +81,9 @@ def do_build_ui(cls, base_tab: Type['BaseUI']):
                         scale=20)
                     gr.Textbox(elem_id='swanlab_token', lines=1, scale=20)
                     gr.Textbox(elem_id='swanlab_project', lines=1, scale=20)
+                with gr.Row():
+                    gr.Textbox(elem_id='swanlab_lark_webhook_url', lines=1, scale=20)
+                    gr.Textbox(elem_id='swanlab_lark_secret', lines=1, scale=20)
                 with gr.Row():
                     gr.Textbox(elem_id='swanlab_workspace', lines=1, scale=20)
                     gr.Textbox(elem_id='swanlab_exp_name', lines=1, scale=20)
diff --git a/tests/megatron/test_train.py b/tests/megatron/test_train.py
@@ -18,6 +18,7 @@ def test_sft():
             model_author='swift',
             model_name='swift-robot',
             eval_iters=5,
+            sequence_parallel=True,
             finetune=True))
 
 
@@ -35,5 +36,5 @@ def test_pt():
 
 
 if __name__ == '__main__':
-    # test_sft()
-    test_pt()
+    test_sft()
+    # test_pt()