fix unittest and bots_reward

chenyushuo · chenyushuo · commit d68a19bf59e7 · 2025-11-20T12:07:01.000+08:00
diff --git a/examples/bots/workflow/bots_reward.py b/examples/bots/workflow/bots_reward.py
@@ -1,8 +1,10 @@
 # Adapted from Reasoning360: https://github.com/LLM360/Reasoning360/blob/main/verl/utils/reward_score/naive_dapo.py
 
+import concurrent
 import contextlib
 import math
 import re
+import resource
 from math import isclose
 from typing import Optional, Union
 
@@ -585,17 +587,25 @@ def should_allow_eval(expr: str):
 
 # @timeout(timeout_seconds=10)
 def are_equal_under_sympy(ground_truth_normalized: str, given_normalized: str):
-    are_equal = False
-    try:
+    def check_equal():
+        memory_size = 1024**3
+        resource.setrlimit(resource.RLIMIT_AS, (memory_size, memory_size))
+
         expr = f"({ground_truth_normalized})-({given_normalized})"
         if should_allow_eval(expr):
             sympy_diff = _sympy_parse(expr)
             simplified = sympy.simplify(sympy_diff)
             if simplified == 0:
-                are_equal = True
-    except Exception:
-        pass
-    return are_equal
+                return True
+        return False
+
+    with concurrent.futures.ProcessPoolExecutor(max_workers=1) as executor:
+        future = executor.submit(check_equal)
+        try:
+            return future.result(timeout=10)
+        except (concurrent.futures.TimeoutError, Exception):
+            future.cancel()
+            return False
 
 
 def split_tuple(expr: str):
diff --git a/trinity/buffer/pipelines/task_pipeline.py b/trinity/buffer/pipelines/task_pipeline.py
@@ -5,6 +5,8 @@
 
 
 def check_and_run_task_pipeline(config: Config) -> Dict:
+    if config.mode not in {"explore", "train", "both"}:
+        return {}
     if config.data_processor.task_pipeline is None:
         return {}
 
diff --git a/trinity/cli/launcher.py b/trinity/cli/launcher.py
@@ -37,7 +37,6 @@ def bench(config: Config) -> None:
 
 def explore(config: Config) -> None:
     """Run explorer."""
-    check_and_run_task_pipeline(config)
     try:
         explorer = Explorer.get_actor(config)
         ray.get(explorer.prepare.remote())
@@ -82,7 +81,6 @@ def both(config: Config) -> None:
     the latest step. The specific number of experiences may vary for different
     algorithms and tasks.
     """
-    check_and_run_task_pipeline(config)
     try:
         explorer = Explorer.get_actor(config)
         trainer = Trainer.get_actor(config)
@@ -153,6 +151,7 @@ def run_stage(config: Config) -> None:
     )
     pprint(config)
     try:
+        check_and_run_task_pipeline(config)
         MODE_MAP[config.mode](config)
     finally:
         if config.monitor.enable_ray_timeline:
diff --git a/trinity/common/config.py b/trinity/common/config.py
@@ -853,8 +853,8 @@ def _check_interval(self) -> None:
                 )
 
     def _check_explorer_input(self) -> None:
-        if self.mode in {"train", "bench", "serve"}:
-            # no need to check explorer_input in train/bench/serve mode
+        if self.mode == "serve":
+            # no need to check explorer_input in serve mode
             return
 
         explorer_input = self.buffer.explorer_input
@@ -864,7 +864,7 @@ def _check_explorer_input(self) -> None:
                 raise ValueError("Do not support setting `taskset` and `tasksets` simultaneously!")
             explorer_input.tasksets = [explorer_input.taskset]
             explorer_input.taskset = None
-        elif len(explorer_input.tasksets) == 0:
+        elif self.mode not in {"bench", "train"} and len(explorer_input.tasksets) == 0:
             raise ValueError("At least one taskset should be provided in explorer_input!")
 
         for i, taskset in enumerate(explorer_input.tasksets):
@@ -913,8 +913,8 @@ def _check_explorer_input(self) -> None:
             set_if_none(dataset.rollout_args, "max_tokens", self.model.max_response_tokens)
 
     def _check_trainer_input(self) -> None:
-        if self.mode in {"explore", "bench", "serve"}:
-            # no need to check trainer_input in explore/bench/serve mode
+        if self.mode in {"bench", "serve"}:
+            # no need to check trainer_input in bench/serve mode
             return
 
         trainer_input = self.buffer.trainer_input
@@ -986,10 +986,15 @@ def _check_data_processor(self) -> None:
                 )
 
         task_pipeline = self.data_processor.task_pipeline
-        if task_pipeline is not None and self.mode in {"explore", "both"}:
+        if task_pipeline is not None and self.mode in {"explore", "train", "both"}:
             if task_pipeline.output is None:
                 if self.mode != "train":
-                    task_pipeline.output = self.buffer.explorer_input.tasksets[0]
+                    if len(self.buffer.explorer_input.tasksets) > 0:
+                        task_pipeline.output = self.buffer.explorer_input.tasksets[0]
+                    else:
+                        raise ValueError(
+                            "At least one taskset should be provided in explorer_input!"
+                        )
                 elif self.mode == "train" and self.algorithm.algorithm_type in {"dpo", "sft"}:
                     task_pipeline.output = self.buffer.trainer_input.experience_buffer
                 else: