Merge branch 'main' into release/3.3

Jintao-Huang · Jintao-Huang · commit ea819282105c · 2025-04-10T11:27:35.000+08:00
diff --git a/README.md b/README.md
@@ -120,12 +120,12 @@ Running Environment:
 | python       | >=3.9        | 3.10        |                                           |
 | cuda         |              | cuda12      | No need to install if using CPU, NPU, MPS |
 | torch        | >=2.0        |             |                                           |
-| transformers | >=4.33       | 4.50      |                                           |
+| transformers | >=4.33       | 4.51      |                                           |
 | modelscope   | >=1.19       |             |                                           |
 | peft | >=0.11,<0.16 | ||
 | trl | >=0.13,<0.17 | 0.16 |RLHF|
 | deepspeed    | >=0.14       | 0.14.5 | Training                                  |
-| vllm         | >=0.5.1,<0.8      | 0.7.3       | Inference/Deployment/Evaluation           |
+| vllm         | >=0.5.1      | 0.8.3       | Inference/Deployment/Evaluation           |
 | lmdeploy     | >=0.5        | 0.7.2.post1       | Inference/Deployment/Evaluation           |
 | evalscope | >=0.11       |  | Evaluation |
 
diff --git a/README_CN.md b/README_CN.md
@@ -115,12 +115,12 @@ pip install -e .
 | python | >=3.9        | 3.10 ||
 | cuda |              | cuda12 |使用cpu、npu、mps则无需安装|
 | torch | >=2.0        |  ||
-| transformers | >=4.33       | 4.50 ||
+| transformers | >=4.33       | 4.51 ||
 | modelscope | >=1.19       |  ||
 | peft | >=0.11,<0.16 | ||
 | trl | >=0.13,<0.17 | 0.16 |RLHF|
 | deepspeed | >=0.14       | 0.14.5 |训练|
-| vllm | >=0.5.1,<0.8      | 0.7.3 |推理/部署/评测|
+| vllm | >=0.5.1      | 0.8.3 |推理/部署/评测|
 | lmdeploy | >=0.5        | 0.7.2.post1 |推理/部署/评测|
 | evalscope | >=0.11       | |评测|
 
diff --git a/docs/source/GetStarted/SWIFT安装.md b/docs/source/GetStarted/SWIFT安装.md
@@ -64,12 +64,12 @@ modelscope-registry.us-west-1.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu2
 | python | >=3.9        | 3.10 ||
 | cuda |              | cuda12 |使用cpu、npu、mps则无需安装|
 | torch | >=2.0        |  ||
-| transformers | >=4.33       | 4.50 ||
+| transformers | >=4.33       | 4.51 ||
 | modelscope | >=1.19       |  ||
 | peft | >=0.11,<0.16 | ||
 | trl | >=0.13,<0.17 | 0.16 |RLHF|
 | deepspeed | >=0.14       | 0.14.5 |训练|
-| vllm | >=0.5.1,<0.8      | 0.7.3 |推理/部署/评测|
+| vllm | >=0.5.1      | 0.8.3 |推理/部署/评测|
 | lmdeploy | >=0.5        | 0.7.2.post1 |推理/部署/评测|
 | evalscope | >=0.11       | |评测|
 
diff --git a/docs/source_en/GetStarted/SWIFT-installation.md b/docs/source_en/GetStarted/SWIFT-installation.md
@@ -65,12 +65,12 @@ More images can be found [here](https://modelscope.cn/docs/intro/environment-set
 | python       | >=3.9        | 3.10        |                                           |
 | cuda         |              | cuda12      | No need to install if using CPU, NPU, MPS |
 | torch        | >=2.0        |             |                                           |
-| transformers | >=4.33       | 4.50      |                                           |
+| transformers | >=4.33       | 4.51      |                                           |
 | modelscope   | >=1.19       |             |                                           |
 | peft         | >=0.11,<0.16 |             |                                           |
 | trl          | >=0.13,<0.17 | 0.16      | RLHF                                      |
 | deepspeed    | >=0.14       | 0.14.5 | Training                                  |
-| vllm         | >=0.5.1,<0.8      | 0.7.3       | Inference/Deployment/Evaluation           |
+| vllm         | >=0.5.1      | 0.8.3       | Inference/Deployment/Evaluation           |
 | lmdeploy     | >=0.5        | 0.7.2.post1       | Inference/Deployment/Evaluation           |
 | evalscope | >=0.11       | | Evaluation |
 
diff --git a/requirements/install_all.sh b/requirements/install_all.sh
@@ -1,6 +1,6 @@
 # please use python=3.10, cuda12.*
 # sh requirements/install_all.sh
-pip install "vllm>=0.5.1,<0.8" -U
+pip install "vllm>=0.5.1" -U
 pip install "lmdeploy>=0.5" -U --no-deps
 pip install autoawq -U --no-deps
 pip install auto_gptq optimum bitsandbytes -U
diff --git a/swift/llm/argument/base_args/template_args.py b/swift/llm/argument/base_args/template_args.py
@@ -33,7 +33,7 @@ class TemplateArguments:
     system: Optional[str] = None  # Override the default_system in the template.
     max_length: Optional[int] = None
 
-    truncation_strategy: Literal['delete', 'left', 'right'] = 'delete'
+    truncation_strategy: Optional[Literal['delete', 'left', 'right']] = None
     max_pixels: Optional[int] = None
     tools_prompt: str = 'react_en'  # Override the default_tools_prompt in the template.
     norm_bbox: Literal['norm1000', 'none', None] = None
@@ -53,6 +53,8 @@ def __post_init__(self):
             assert os.path.isfile(self.system), f'self.system: {self.system}'
             with open(self.system, 'r') as f:
                 self.system = f.read()
+        if self.truncation_strategy is None:
+            self.truncation_strategy = 'delete'
 
     def get_template_kwargs(self):
         truncation_strategy = self.truncation_strategy
diff --git a/swift/llm/argument/rlhf_args.py b/swift/llm/argument/rlhf_args.py
@@ -56,6 +56,8 @@ class GRPOArguments(GRPOArgumentsMixin):
     # multi step
     num_iterations: int = 1
 
+    truncation_strategy: Optional[Literal['delete', 'left', 'right']] = None
+
 
 @dataclass
 class RLHFArguments(GRPOArguments, PPOArguments, RewardModelArguments, TrainArguments):
@@ -108,7 +110,6 @@ def __post_init__(self):
         self._init_ppo()
         self._set_default()
         super().__post_init__()
-        self._init_grpo_ds3()
         self._check_rlhf()
         self._check_grpo()
 
@@ -139,7 +140,11 @@ def _init_grpo(self):
                 self.gradient_accumulation_steps = 1
             self.remove_unused_columns = False
             logger.info(f'Setting args.remove_unused_columns: {self.remove_unused_columns}')
-            self.truncation_strategy = 'left'  # Used for trimming the excessively long parts of a prompt.
+            if self.truncation_strategy is None:
+                self.truncation_strategy = 'left'
+            assert self.truncation_strategy == 'left', \
+                "GRPO requires `truncation_strategy='left'`," \
+                f"Current value: `truncation_strategy='{self.truncation_strategy}'`."
             if self.beta is None:
                 self.beta = 0.04  # https://arxiv.org/abs/2402.03300
             if self.async_generate:
@@ -189,11 +194,6 @@ def _set_default(self):
             elif self.rlhf_type in ['kto']:
                 self.loss_type = 'kto'
 
-    def _init_grpo_ds3(self):
-        if self.rlhf_type == 'grpo' and self.deepspeed:
-            if 'zero_optimization' in self.deepspeed and self.deepspeed['zero_optimization']['stage'] == 3:
-                self.deepspeed['zero_optimization']['stage3_prefetch_bucket_size'] = 0
-
     def _check_rlhf(self):
         if self.sequence_parallel_size > 1:
             raise ValueError('RLHF do not support sequence parallel')
diff --git a/swift/llm/model/utils.py b/swift/llm/model/utils.py
@@ -98,16 +98,17 @@ def _get_config_attrs(config: Union[PretrainedConfig, Dict[str, Any]],
         else:
             return []
 
+        value = deep_getattr(config, attr_name, None)
+        if value is not None and parent_key in [None, 'language_config', 'llm_config', 'text_config']:
+            res.append((config, value))
+
         for k in keys:
             if k.endswith('_config'):
                 if isinstance(config, dict):
                     v = config[k]
                 else:
                     v = getattr(config, k)
                 res += HfConfigFactory._get_config_attrs(v, attr_name, k)
-        value = deep_getattr(config, attr_name, None)
-        if value is not None and parent_key in [None, 'language_config', 'llm_config', 'text_config']:
-            res.append((config, value))
         return res
 
     @staticmethod
diff --git a/swift/llm/template/base.py b/swift/llm/template/base.py
@@ -969,6 +969,13 @@ def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]:
                 if loss_scale is not None:
                     loss_scale = loss_scale[:self.max_length]
             else:
+                if len(input_ids) > self.max_length:
+                    logger.warning_once(
+                        'Input data was left-truncated because its length exceeds `max_length` (input length: '
+                        f'{len(input_ids)}, max_length: {self.max_length}). '
+                        'This may cause loss of important tokens (e.g., image tokens) and lead to errors. '
+                        'To avoid this, consider increasing `max_length` or pre-filtering long sequences.',
+                        hash_id='max_length_check')
                 input_ids = input_ids[-self.max_length:]
                 if labels is not None:
                     labels = labels[-self.max_length:]
diff --git a/swift/trainers/rlhf_arguments.py b/swift/trainers/rlhf_arguments.py
@@ -52,3 +52,9 @@ def __post_init__(self):
         if self.cosine_max_len is None:
             self.cosine_max_len = self.max_completion_length
         self.vllm_limit_mm_per_prompt = ModelArguments.parse_to_dict(self.vllm_limit_mm_per_prompt)
+
+        if self.deepspeed and 'zero_optimization' in self.deepspeed and self.deepspeed['zero_optimization'][
+                'stage'] == 3:
+            # https://github.com/modelscope/ms-swift/issues/3237
+            self.deepspeed['zero_optimization']['stage3_prefetch_bucket_size'] = 0
+            self.deepspeed_plugin.hf_ds_config.config['zero_optimization']['stage3_prefetch_bucket_size'] = 0
diff --git a/swift/trainers/rlhf_trainer/__init__.py b/swift/trainers/rlhf_trainer/__init__.py
@@ -1,10 +1,37 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
-from .cpo_trainer import CPOTrainer
-from .dpo_trainer import DPOTrainer
-from .grpo_trainer import GRPOTrainer
-from .kto_trainer import KTOTrainer
-from .orpo_trainer import ORPOTrainer
-from .ppo_trainer import PPOTrainer
-from .reward_trainer import RewardTrainer
-from .rlhf_mixin import RLHFTrainerMixin
-from .utils import _split_into_mini_batches, patch_lora_merge, patch_lora_unmerge, round_robin
+from typing import TYPE_CHECKING
+
+from swift.utils.import_utils import _LazyModule
+
+if TYPE_CHECKING:
+    from .cpo_trainer import CPOTrainer
+    from .dpo_trainer import DPOTrainer
+    from .grpo_trainer import GRPOTrainer
+    from .kto_trainer import KTOTrainer
+    from .orpo_trainer import ORPOTrainer
+    from .ppo_trainer import PPOTrainer
+    from .reward_trainer import RewardTrainer
+    from .rlhf_mixin import RLHFTrainerMixin
+    from .utils import _split_into_mini_batches, patch_lora_merge, patch_lora_unmerge, round_robin
+else:
+    _import_structure = {
+        'cpo_trainer': ['CPOTrainer'],
+        'dpo_trainer': ['DPOTrainer'],
+        'grpo_trainer': ['GRPOTrainer'],
+        'kto_trainer': ['KTOTrainer'],
+        'orpo_trainer': ['ORPOTrainer'],
+        'ppo_trainer': ['PPOTrainer'],
+        'reward_trainer': ['RewardTrainer'],
+        'rlhf_mixin': ['RLHFTrainerMixin'],
+        'utils': ['_split_into_mini_batches', 'patch_lora_merge', 'patch_lora_unmerge', 'round_robin'],
+    }
+
+    import sys
+
+    sys.modules[__name__] = _LazyModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )
diff --git a/swift/trainers/rlhf_trainer/grpo_trainer.py b/swift/trainers/rlhf_trainer/grpo_trainer.py
@@ -25,6 +25,7 @@
 from torch.nn import ModuleList
 from torch.utils.data import DataLoader
 from transformers import PreTrainedModel, TrainerCallback
+from transformers.integrations import is_deepspeed_zero3_enabled
 from transformers.trainer import Trainer
 from transformers.trainer_utils import seed_worker
 from trl import GRPOTrainer as HFGRPOTrainer
@@ -43,7 +44,7 @@
 try:
     from trl.extras.profiling import profiling_decorator
 except ImportError:
-    raise ImportError('Please install trl : `pip install -U trl`')
+    raise ImportError('Please install trl: `pip install -U trl`')
 del HFGRPOTrainer.__init__
 del HFGRPOTrainer.log
 
@@ -294,8 +295,9 @@ def __init__(self,
 
         self.model_accepts_loss_kwargs = False
         for i, reward_func in enumerate(self.reward_funcs):
-            if isinstance(reward_func, PreTrainedModel):
-                self.reward_funcs[i] = self.accelerator.prepare_model(reward_func, evaluation_mode=True)
+            if isinstance(reward_func, PreTrainedModel) and is_deepspeed_zero3_enabled():
+                from trl.models.utils import prepare_deepspeed
+                prepare_deepspeed(reward_func, self.accelerator)  # Does not wrap DeepSpeedEngine
 
         # Multi-step
         self.num_iterations = args.num_iterations  # = 𝜇 in the GRPO paper
@@ -413,8 +415,10 @@ def prepare_vllm(self, model, fast_infer_device):
         if local_world_size == self.args.num_infer_workers == get_device_count() and local_world_size > 1:
             # Compatibility with TP
             cls = GRPOVllmEngine
+            vllm_kwargs = {'distributed_executor_backend': 'external_launcher'}
         else:
             cls = VllmEngine
+            vllm_kwargs = {}
         with Swift.grpo_context(model, self.template.processor):
             self.engine = cls(
                 model.model_dir,
@@ -430,8 +434,8 @@ def prepare_vllm(self, model, fast_infer_device):
                 num_infer_workers=self.args.num_infer_workers,
                 enable_sleep_mode=self.args.sleep_level > 0,
                 use_async_engine=False,
-                distributed_executor_backend='external_launcher',
-                max_model_len=self.args.vllm_max_model_len)
+                max_model_len=self.args.vllm_max_model_len,
+                **vllm_kwargs)
             self.engine.default_template = self.template
 
     @property
@@ -813,10 +817,8 @@ def _score_completions(self, inputs):
                     batched_inputs = [reward_template.encode(infer_request) for infer_request in inputs]
                     reward_inputs = to_device(reward_template.data_collator(batched_inputs), reward_func.device)
 
-                with torch.inference_mode(), unwrap_model_for_generation(
-                        reward_func, self.accelerator,
-                        gather_deepspeed3_params=self.args.ds3_gather_for_generation) as unwrapped_reward_func:
-                    rewards_per_func[:, i] = unwrapped_reward_func(**reward_inputs).logits[:, 0]
+                with torch.inference_mode():
+                    rewards_per_func[:, i] = reward_func(**reward_inputs).logits[:, 0]
             # reward function
             else:
                 # Repeat all input columns (but "messages" and "completion") to match the number of generations
@@ -1305,17 +1307,17 @@ def log(self, logs: dict[str, float], start_time: Optional[float] = None) -> Non
             super().log(logs)
         self._metrics[mode].clear()
 
-        if (self.accelerator.is_main_process and self.args.report_to and 'wandb' in self.args.report_to
-                and wandb.run is not None and self.log_completions):
-            import pandas as pd
+        if self.accelerator.is_main_process and self.log_completions:
             table = {
                 'step': [str(self.state.global_step)] * len(self._textual_logs['prompt']),
                 'prompt': self._textual_logs['prompt'],
                 'completion': self._textual_logs['completion'],
                 **self._textual_logs['rewards'],
             }
-            df = pd.DataFrame(table)
             self.jsonl_writer.append(table)
-            if self.args.wandb_log_unique_prompts:
-                df = df.drop_duplicates(subset=['prompt'])
-            wandb.log({'completions': wandb.Table(dataframe=df)})
+            if self.args.report_to and 'wandb' in self.args.report_to and wandb.run is not None:
+                import pandas as pd
+                df = pd.DataFrame(table)
+                if self.args.wandb_log_unique_prompts:
+                    df = df.drop_duplicates(subset=['prompt'])
+                wandb.log({'completions': wandb.Table(dataframe=df)})
diff --git a/swift/trainers/rlhf_trainer/rlhf_mixin.py b/swift/trainers/rlhf_trainer/rlhf_mixin.py