Merge branch 'main' into release/3.3

Jintao-Huang · Jintao-Huang · commit 1e918953abfe · 2025-04-26T16:36:05.000+08:00
diff --git a/docs/source/Instruction/Megatron-SWIFT训练.md b/docs/source/Instruction/Megatron-SWIFT训练.md
@@ -38,10 +38,8 @@ swift export \
     --model Qwen/Qwen2.5-7B-Instruct \
     --to_mcore true \
     --torch_dtype bfloat16 \
-    --test_convert_precision true \
     --output_dir Qwen2.5-7B-Instruct-mcore
 ```
-- 注意：若出现OOM，请将`--test_convert_precision true`参数去除
 
 然后，使用以下脚本进行训练，训练所需显存资源为2*80GiB：
 ```shell
@@ -82,7 +80,6 @@ swift export \
     --mcore_model megatron_output/Qwen2.5-7B-Instruct/vx-xxx \
     --to_hf true \
     --torch_dtype bfloat16 \
-    --test_convert_precision true \
     --output_dir megatron_output/Qwen2.5-7B-Instruct/vx-xxx-hf
 ```
 
diff --git a/docs/source/Instruction/命令行参数.md b/docs/source/Instruction/命令行参数.md
@@ -518,7 +518,7 @@ App参数继承于[部署参数](#部署参数), [Web-UI参数](#Web-UI参数)
 - to_hf: Megatron格式权重转成HF格式。默认为False
 - mcore_model: mcore格式模型路径。默认为None
 - thread_count: `--to_mcore true`时的模型切片数。默认为None，根据模型大小自动设置，使得最大分片小于10GB
-- test_convert_precision: 测试HF和Megatron格式权重转换的精度误差。默认为False
+- 🔥test_convert_precision: 测试HF和Megatron格式权重转换的精度误差。默认为False
 - 🔥push_to_hub: 是否推送hub，默认为False。例子参考[这里](https://github.com/modelscope/ms-swift/blob/main/examples/export/push_to_hub.sh)
 - hub_model_id: 推送的model_id，默认为None
 - hub_private_repo: 是否是private repo，默认为False
diff --git a/docs/source_en/Instruction/Command-line-parameters.md b/docs/source_en/Instruction/Command-line-parameters.md
@@ -535,7 +535,7 @@ Export Arguments include the [basic arguments](#base-arguments) and [merge argum
 - to_hf: Convert weights from Megatron format to HF format. Default is False.
 - mcore_model: Path to the mcore format model. Default is None.
 - thread_count: The number of model slices when `--to_mcore true` is set. Defaults to None, and is automatically configured based on the model size, ensuring that the largest slice is less than 10GB.
-- test_convert_precision: Test the precision error when converting weights between HF and Megatron formats. Default is False.
+- 🔥test_convert_precision: Test the precision error when converting weights between HF and Megatron formats. Default is False.
 - 🔥push_to_hub: Whether to push to the hub, with the default being False. Examples can be found [here](https://github.com/modelscope/ms-swift/blob/main/examples/export/push_to_hub.sh).
 - hub_model_id: Model ID for pushing, default is None.
 - hub_private_repo: Whether it is a private repo, default is False.
diff --git a/docs/source_en/Instruction/Megatron-SWIFT-Training.md b/docs/source_en/Instruction/Megatron-SWIFT-Training.md
@@ -40,10 +40,8 @@ swift export \
     --model Qwen/Qwen2.5-7B-Instruct \
     --to_mcore true \
     --torch_dtype bfloat16 \
-    --test_convert_precision true \
     --output_dir Qwen2.5-7B-Instruct-mcore
 ```
-- Note: If an OOM (Out Of Memory) error occurs, please remove the --test_convert_precision true parameter.
 
 Next, use the following script to start training. The required GPU memory resources are 2*80GiB:
 
@@ -86,7 +84,6 @@ swift export \
     --mcore_model megatron_output/Qwen2.5-7B-Instruct/vx-xxx \
     --to_hf true \
     --torch_dtype bfloat16 \
-    --test_convert_precision true \
     --output_dir megatron_output/Qwen2.5-7B-Instruct/vx-xxx-hf
 ```
 
diff --git a/swift/llm/model/model/mllm.py b/swift/llm/model/model/mllm.py
@@ -84,7 +84,7 @@ def to_dict(self, *args, **kwargs):
     if model is not None:
         model.config._to_dict = model.config.to_dict
         model.config.to_dict = MethodType(to_dict, model.config)
-
+        patch_output_clone(model.model.transformer.wte)
     return model, processor
 
 
@@ -114,8 +114,8 @@ def get_model_tokenizer_molmo(model_dir: str,
     model_cls = get_class_from_dynamic_module('modeling_molmo.MolmoForCausalLM', model_dir)
     model_cls._no_split_modules = ['MolmoSequentialBlock']
     model, processor = get_model_tokenizer_multimodal(model_dir, model_info, model_kwargs, load_model, **kwargs)
-
-    patch_output_clone(model.model.transformer.wte)
+    if model is not None:
+        patch_output_clone(model.model.transformer.wte)
     return model, processor
 
 
diff --git a/swift/llm/model/patcher.py b/swift/llm/model/patcher.py
@@ -249,10 +249,6 @@ def patch_automodel_for_sequence_classification(model_meta):
 
     @classmethod
     def _new_from_pretrained(cls, *args, **kwargs):
-        cls_name = cls.__name__
-        cls_name = cls_name.split('For', 1)[0]
-        cls_name += 'ForSequenceClassification'
-        cls = type(cls_name, (cls, ), {})  # new_cls
         __init__ = cls.__init__
 
         def __new_init__(self, *args, **kwargs):
diff --git a/swift/llm/template/base.py b/swift/llm/template/base.py
@@ -123,6 +123,8 @@ def __init__(
         logger.info(f'agent_template: {agent_template}')
         self.agent_template = agent_templates[agent_template]()
         self.norm_bbox = norm_bbox or self.norm_bbox
+        logger.info(f'max_length: {self.max_length}')
+        logger.info(f'norm_bbox: {self.norm_bbox}')
         if self.is_encoder_decoder:
             self.skip_prompt = False
         self.mode: Literal['pt', 'vllm', 'lmdeploy',  # infer