modelscope · pan-x-c · Dec 9, 2025 · Dec 5, 2025 · Dec 5, 2025 · Dec 5, 2025
diff --git a/benchmark/plugins/guru_math/reward.py b/benchmark/plugins/guru_math/reward.py
@@ -14,10 +14,10 @@ def __call__(  # type: ignore
         format_score_coef: Optional[float] = 0.1,
         **kwargs,
     ) -> dict[str, float]:
-        from .naive_dapo import compute_score
+        from trinity.utils.math_utils import compute_score
 
-        ret = compute_score(response, truth, None)  # type: ignore
-        return {"accuracy": ret["score"], "format_score": 0}
+        score = compute_score(response, truth)  # type: ignore
+        return {"accuracy": score, "format_score": 0}
 
 
 @REWARD_FUNCTIONS.register_module("math_boxed_reward_prime_math")
@@ -32,5 +32,5 @@ def __call__(  # type: ignore
     ) -> dict[str, float]:
         from verl.utils.reward_score.prime_math import compute_score
 
-        ret = compute_score(response, truth)
-        return {"accuracy": ret["score"], "format_score": 0}
+        res = compute_score(response, truth)
+        return {"accuracy": res["score"], "format_score": 0}
diff --git a/examples/bots/workflow/bots_math_boxed_reward.py b/examples/bots/workflow/bots_math_boxed_reward.py
@@ -22,9 +22,9 @@ def __call__(  # type: ignore
         format_score_coef: Optional[float] = 0.1,
         **kwargs,
     ) -> dict[str, float]:
-        from trinity.plugins.bots_reward import compute_score
+        from trinity.plugins.bots_reward import compute_score_bots
 
-        accuracy_score = compute_score(response, truth)
+        accuracy_score = compute_score_bots(response, truth)
 
         format_score = 0.0
         if with_think and not validate_think_pattern(response):