Fix create checkpoint symlink & grpo omni (#4468)

Jintao-Huang · web-flow · commit 9dfa63a060ba · 2025-06-04T13:00:11.000+08:00
diff --git a/examples/train/grpo/qwen2_5_omni/grpo.sh b/examples/train/grpo/qwen2_5_omni/grpo.sh
@@ -3,6 +3,7 @@ pip install transformers math_verify trl -U
 
 MAX_PIXELS=1003520 \
 NPROC_PER_NODE=4 \
+ENABLE_AUDIO_OUTPUT=1 \
 CUDA_VISIBLE_DEVICES=0,1,2,3 \
 swift rlhf \
     --rlhf_type grpo \
diff --git a/examples/train/multimodal/omni/sft.sh b/examples/train/multimodal/omni/sft.sh
@@ -5,6 +5,7 @@ pip install transformers -U
 nproc_per_node=4
 
 CUDA_VISIBLE_DEVICES=0,1,2,3 \
+ENABLE_AUDIO_OUTPUT=1 \
 NPROC_PER_NODE=$nproc_per_node \
 VIDEO_MAX_PIXELS=50176 \
 FPS_MAX_FRAMES=12 \
diff --git a/examples/train/packing/qwen2_5_omni.sh b/examples/train/packing/qwen2_5_omni.sh
@@ -5,6 +5,7 @@
 pip install transformers -U
 
 NPROC_PER_NODE=4 \
+ENABLE_AUDIO_OUTPUT=1 \
 CUDA_VISIBLE_DEVICES=0,1,2,3 \
 VIDEO_MAX_PIXELS=50176 \
 FPS_MAX_FRAMES=12 \
diff --git a/swift/llm/train/sft.py b/swift/llm/train/sft.py
@@ -142,11 +142,12 @@ def _save_trainer_state(self, trainer):
         training_args = trainer.args
         state = trainer.state
         if hasattr(state, 'last_model_checkpoint'):
-            if is_master() and self.args.create_checkpoint_symlink:
+            if self.args.create_checkpoint_symlink:
                 last_checkpoint = os.path.join(self.args.output_dir, 'last')
                 best_checkpoint = os.path.join(self.args.output_dir, 'best')
-                os.symlink(state.last_model_checkpoint, last_checkpoint)
-                os.symlink(state.best_model_checkpoint, best_checkpoint)
+                if is_master():
+                    os.symlink(state.last_model_checkpoint, last_checkpoint)
+                    os.symlink(state.best_model_checkpoint, best_checkpoint)
                 state.last_model_checkpoint = last_checkpoint
                 state.best_model_checkpoint = best_checkpoint
         else:
diff --git a/swift/trainers/rlhf_trainer/grpo_trainer.py b/swift/trainers/rlhf_trainer/grpo_trainer.py
@@ -1197,10 +1197,9 @@ def _padding_free_output_hook(module, args, kwargs, result):
             result.last_hidden_state = torch.stack(unpacked_logits, dim=0)
             return result
 
-        llm_model = get_llm_model(model)
-
-        base_model = llm_model.model
         if self.padding_free:
+            llm_model = get_llm_model(model)
+            base_model = llm_model.model
             remove_handle1 = base_model.register_forward_pre_hook(
                 _padding_free_input_hook, with_kwargs=True, prepend=True)
             remove_handle2 = base_model.register_forward_hook(_padding_free_output_hook, with_kwargs=True, prepend=True)