Fix pre-commit and rec

yanxi-chen · yanxi-chen · commit 542e4ac31096 · 2025-12-26T17:52:46.000+08:00
diff --git a/trinity/algorithm/policy_loss_fn/gspo_policy_loss.py b/trinity/algorithm/policy_loss_fn/gspo_policy_loss.py
@@ -9,11 +9,11 @@
 
 from trinity.algorithm.policy_loss_fn.policy_loss_fn import PolicyLossFn
 from trinity.algorithm.utils import aggregate_loss, masked_mean
-
 from trinity.utils.log import get_logger
 
 logger = get_logger(__name__)
 
+
 class GSPOLossFn(PolicyLossFn):
     def __init__(
         self,
@@ -35,7 +35,9 @@ def __init__(
         self.clip_range_high = _clip_range_high
 
         if loss_agg_mode != "seq-mean-token-mean":
-            logger.warning(f"The original GSPO paper requires loss_agg_mode to be 'seq-mean-token-mean', but the current setting is '{loss_agg_mode}'.")
+            logger.warning(
+                f"The original GSPO paper requires loss_agg_mode to be 'seq-mean-token-mean', but the current setting is '{loss_agg_mode}'."
+            )
             # loss_agg_mode = "seq-mean-token-mean"
         self.loss_agg_mode = loss_agg_mode
 
diff --git a/trinity/algorithm/policy_loss_fn/rec_policy_loss.py b/trinity/algorithm/policy_loss_fn/rec_policy_loss.py
@@ -6,7 +6,7 @@
 import torch
 
 from trinity.algorithm.policy_loss_fn.policy_loss_fn import PolicyLossFn
-from trinity.algorithm.utils import masked_mean
+from trinity.algorithm.utils import aggregate_loss, masked_mean
 
 
 class RECPolicyLossFn(PolicyLossFn):
@@ -123,7 +123,7 @@ def __call__(  # type: ignore
 
         if self.clip_mode == "gspo-one-side":
             # [EXPERIMENTAL] specialized for gspo-style rec variant for now
-            pg_loss = masked_loss(
+            pg_loss = aggregate_loss(
                 values=pg_losses,
                 mask=action_mask,
                 loss_agg_mode="seq-mean-token-mean",