fix grpo completion length equal zero(#3857)

hjh0119 · web-flow · commit ecea03e94385 · 2025-04-12T23:05:18.000+08:00
Co-authored-by: hjh &lt;hujinghan.hjh@alibaba-inc.com&gt;
diff --git a/swift/trainers/rlhf_trainer/grpo_trainer.py b/swift/trainers/rlhf_trainer/grpo_trainer.py
@@ -1062,7 +1062,7 @@ def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=N
         completions_length = completion_mask.sum()
         if completions_length == 0:
             # Prevent division by zero issues after all completions are filtered by the overlong filter
-            completions_length += 1e-4
+            completions_length = completions_length.float() + 1e-4
         loss = (per_token_loss * completion_mask).sum() / completions_length
 
         # Log the metrics