Fix bugs (#3025)

Jintao-Huang · web-flow · commit 58fd32f28ef5 · 2025-02-06T22:05:52.000+08:00
diff --git a/README.md b/README.md
@@ -110,7 +110,7 @@ Running Environment:
 | python       | >=3.8                | 3.10        |                                           |
 | cuda         |                      | cuda12      | No need to install if using CPU, NPU, MPS |
 | torch        | >=2.0                |             |                                           |
-| transformers | >=4.33               | 4.48.1      |                                           |
+| transformers | >=4.33               | 4.48.2      |                                           |
 | modelscope   | >=1.19               |             |                                           |
 | peft         | >=0.11.0,<0.15.0     |             |                                           |
 | trl          | >=0.13,<0.15         | 0.14.0      | RLHF                                      |
diff --git a/README_CN.md b/README_CN.md
@@ -104,7 +104,7 @@ pip install -e .
 | python | >=3.8 | 3.10 ||
 | cuda |  | cuda12 |使用cpu、npu、mps则无需安装|
 | torch | >=2.0 |  ||
-| transformers | >=4.33 | 4.48.1 ||
+| transformers | >=4.33 | 4.48.2 ||
 | modelscope | >=1.19 |  ||
 | peft | >=0.11.0,<0.15.0 | ||
 | trl | >=0.13,<0.15 | 0.14.0 |RLHF|
diff --git a/docs/source/GetStarted/SWIFT安装.md b/docs/source/GetStarted/SWIFT安装.md
@@ -17,6 +17,8 @@ pip install 'ms-swift[all]' -U
 ## 源代码安装
 
 ```shell
+# pip install git+https://github.com/modelscope/ms-swift.git
+
 git clone https://github.com/modelscope/ms-swift.git
 cd ms-swift
 pip install -e .
@@ -43,7 +45,7 @@ pip install ms-swift==2.*
 | python | >=3.8 | 3.10 ||
 | cuda |  | cuda12 |使用cpu、npu、mps则无需安装|
 | torch | >=2.0 |  ||
-| transformers | >=4.33 | 4.48.1 ||
+| transformers | >=4.33 | 4.48.2 ||
 | modelscope | >=1.19 |  ||
 | peft | >=0.11.0,<0.15.0 | ||
 | trl | >=0.13,<0.15 | 0.14.0 |RLHF|
diff --git a/docs/source_en/GetStarted/SWIFT-installation.md b/docs/source_en/GetStarted/SWIFT-installation.md
@@ -17,6 +17,8 @@ pip install 'ms-swift[all]' -U
 ## Source Code Installation
 
 ```shell
+# pip install git+https://github.com/modelscope/ms-swift.git
+
 git clone https://github.com/modelscope/ms-swift.git
 cd ms-swift
 pip install -e .
@@ -45,7 +47,7 @@ You can view the image [here](https://modelscope.cn/docs/intro/environment-setup
 | python       | >=3.8                | 3.10        |                                           |
 | cuda         |                      | cuda12      | No need to install if using CPU, NPU, MPS |
 | torch        | >=2.0                |             |                                           |
-| transformers | >=4.33               | 4.48.1      |                                           |
+| transformers | >=4.33               | 4.48.2      |                                           |
 | modelscope   | >=1.19               |             |                                           |
 | peft         | >=0.11.0,<0.15.0     |             |                                           |
 | trl          | >=0.13,<0.15         | 0.14.0      | RLHF                                      |
diff --git a/examples/train/multi-gpu/device_map/train.sh b/examples/train/multi-gpu/device_map/train.sh
@@ -0,0 +1,25 @@
+# 2 * 76GiB
+CUDA_VISIBLE_DEVICES=0,1 \
+MAX_PIXELS=1003520 \
+swift sft \
+    --model Qwen/Qwen2-VL-72B-Instruct \
+    --dataset 'modelscope/coco_2014_caption:validation#20000' \
+    --train_type lora \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --freeze_vit true \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 5 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4
diff --git a/examples/train/qlora/gptq.sh b/examples/train/qlora/gptq.sh
@@ -1,11 +1,10 @@
-# 9GB
-CUDA_VISIBLE_DEVICES=0 \
+# 2 * 30GiB
+CUDA_VISIBLE_DEVICES=0,1 \
+MAX_PIXELS=1003520 \
 swift sft \
-    --model Qwen/Qwen2.5-7B-Instruct-GPTQ-Int4 \
+    --model Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4 \
+    --dataset 'modelscope/coco_2014_caption:validation#20000' \
     --train_type lora \
-    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
-              'AI-ModelScope/alpaca-gpt4-data-en#500' \
-              'swift/self-cognition#500' \
     --torch_dtype bfloat16 \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
@@ -14,15 +13,13 @@ swift sft \
     --lora_rank 8 \
     --lora_alpha 32 \
     --target_modules all-linear \
+    --freeze_vit true \
     --gradient_accumulation_steps 16 \
-    --eval_steps 50 \
-    --save_steps 50 \
+    --eval_steps 100 \
+    --save_steps 100 \
     --save_total_limit 5 \
     --logging_steps 5 \
     --max_length 2048 \
     --output_dir output \
-    --system 'You are a helpful assistant.' \
     --warmup_ratio 0.05 \
-    --dataloader_num_workers 4 \
-    --model_author swift \
-    --model_name swift-robot
+    --dataloader_num_workers 4
diff --git a/swift/hub/hub.py b/swift/hub/hub.py
@@ -9,7 +9,6 @@
 import huggingface_hub
 from huggingface_hub import RepoUrl
 from huggingface_hub.hf_api import api, future_compatible
-from modelscope.utils.repo_utils import CommitInfo
 from requests.exceptions import HTTPError
 from transformers import trainer
 from transformers.utils import logging, strtobool
@@ -155,6 +154,7 @@ def upload_folder(
         ignore_patterns: Optional[Union[List[str], str]] = None,
         **kwargs,
     ):
+        from modelscope.utils.repo_utils import CommitInfo
         MSHub.push_to_hub(repo_id, folder_path, path_in_repo, commit_message, commit_description, token, True, revision,
                           ignore_patterns)
         return CommitInfo(
diff --git a/swift/llm/template/template/deepseek.py b/swift/llm/template/template/deepseek.py
@@ -128,9 +128,10 @@ class DeepseekV2_5TemplateMeta(TemplateMeta):
 class DeepseekR1Template(Template):
 
     def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]:
-        for message in inputs.messages:
-            if message['role'] == 'assistant' and isinstance(message['content'], str):
-                message['content'] = message['content'].split('</think>')[-1]
+        if not self.is_training:
+            for message in inputs.messages:
+                if message['role'] == 'assistant' and isinstance(message['content'], str):
+                    message['content'] = message['content'].split('</think>')[-1]
         return super()._encode(inputs)