agentscope-ai · pan-x-c · Apr 30, 2025 · Apr 29, 2025 · Apr 29, 2025 · Apr 29, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -20,7 +20,7 @@ classifiers = [
 requires-python = ">=3.10"
 dependencies = [
     "verl==0.3.0.post1",
-    "ray==2.43.0",
+    "ray[default]==2.43.0",
     "vllm>=0.8.3",
     "tensordict==0.6.2",
     "wandb",

diff --git a/tests/explorer/explorer_test.py b/tests/explorer/explorer_test.py
@@ -26,9 +26,9 @@ def setUp(self):
         self.config.monitor.monitor_type = MonitorType.TENSORBOARD
         self.config.monitor.project = "Trinity-unittest"
         self.config.model.checkpoint_path = get_checkpoint_path()
-        self.config.synchronizer.sync_iteration_interval = 5
-        self.config.explorer.eval_interval = 10
-        self.config.trainer.eval_interval = 10
+        self.config.synchronizer.sync_iteration_interval = 2
+        self.config.explorer.eval_interval = 4
+        self.config.trainer.eval_interval = 4
 
 
 class TestExplorerCountdownEval(BaseExplorerCase, unittest.TestCase):
@@ -42,8 +42,11 @@ def test_explorer(self):
         self.assertTrue(len(rollout_metrics) > 0)
         eval_metrics = parser.metric_list("eval")
         self.assertTrue(len(eval_metrics) > 0)
-        self.assertEqual(parser.metric_max_step(rollout_metrics[0]), 20)
-        self.assertEqual(parser.metric_max_step(eval_metrics[0]), 20)
+        self.assertEqual(parser.metric_max_step(rollout_metrics[0]), 8)
+        self.assertEqual(parser.metric_max_step(eval_metrics[0]), 8)
+
+    def tearDown(self):
+        pass
 
 
 class TestExplorerCountdownNoEval(BaseExplorerCase, unittest.TestCase):
@@ -58,4 +61,7 @@ def test_explorer(self):
         self.assertTrue(len(rollout_metrics) > 0)
         eval_metrics = parser.metric_list("eval")
         self.assertTrue(len(eval_metrics) == 0)
-        self.assertEqual(parser.metric_max_step(rollout_metrics[0]), 20)
+        self.assertEqual(parser.metric_max_step(rollout_metrics[0]), 8)
+
+    def tearDown(self):
+        pass
diff --git a/tests/explorer/runner_pool_test.py b/tests/explorer/runner_pool_test.py
@@ -31,7 +31,8 @@ def run(self) -> List[Experience]:
         if "timeout" in self.error_type:
             time.sleep(self.seconds)
         elif self.error_type == "exception":
-            raise RuntimeError("Exception occurred")
+            print("rasing exception")
+            raise ValueError("Exception occurred")
         elif self.error_type == "exit":
             exit(1)
         return [Experience(tokens=torch.zeros(5), prompt_length=2, prompt_text=self.error_type)]
@@ -107,19 +108,20 @@ def test_runner_pool(self):
             tasks=tasks,
         )
 
-        # The excepted return order is: `exception` -> `timeout_5` -> `success` -> (`timeout_100`and `timeout_101`) -> `exit`
+        # The excepted return order is: `exception` -> `timeout_2` -> `success` -> (`timeout_100`and `timeout_101`) -> `exit`
         # 1. `exception`
         st = time.time()
         status = pool.get_next_unorder()
         et = time.time()
-        self.assertTrue(et - st < 5)
+        self.assertTrue(et - st < 2)
+        print(f"First task use time: {et - st}")
         self.assertEqual(len(status), 1)
         self.assertFalse(status[0].ok)
         # 2. `timeout_2
         st = time.time()
         status = pool.get_next_unorder()
         et = time.time()
-        self.assertTrue(et - st < 3)
+        self.assertTrue(et - st > 2)
         self.assertEqual(len(status), 1)
         self.assertTrue(status[0].ok)
         # 3. `success`

diff --git a/tests/template/data/countdown/test.jsonl b/tests/template/data/countdown/test.jsonl
@@ -2,4 +2,3 @@
 {"question": "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.\nUser: Using the numbers [63, 95, 96], create an equation that equals 64. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.\nAssistant: Let me solve this step by step.\n<think>", "answer": "{\"numbers\": [63, 95, 96], \"target\": 64}"}
 {"question": "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.\nUser: Using the numbers [95, 11, 56], create an equation that equals 28. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.\nAssistant: Let me solve this step by step.\n<think>", "answer": "{\"numbers\": [95, 11, 56], \"target\": 28}"}
 {"question": "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.\nUser: Using the numbers [19, 74, 45], create an equation that equals 48. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.\nAssistant: Let me solve this step by step.\n<think>", "answer": "{\"numbers\": [19, 74, 45], \"target\": 48}"}
-{"question": "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.\nUser: Using the numbers [49, 41, 73], create an equation that equals 17. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.\nAssistant: Let me solve this step by step.\n<think>", "answer": "{\"numbers\": [49, 41, 73], \"target\": 17}"}
diff --git a/tests/template/data/countdown/train.jsonl b/tests/template/data/countdown/train.jsonl
@@ -14,7 +14,3 @@
 {"question": "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.\nUser: Using the numbers [19, 25, 89], create an equation that equals 95. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.\nAssistant: Let me solve this step by step.\n<think>", "answer": "{\"numbers\": [19, 25, 89], \"target\": 95}"}
 {"question": "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.\nUser: Using the numbers [8, 62, 43], create an equation that equals 27. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.\nAssistant: Let me solve this step by step.\n<think>", "answer": "{\"numbers\": [8, 62, 43], \"target\": 27}"}
 {"question": "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.\nUser: Using the numbers [74, 5, 20, 88], create an equation that equals 50. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.\nAssistant: Let me solve this step by step.\n<think>", "answer": "{\"numbers\": [74, 5, 20, 88], \"target\": 50}"}
-{"question": "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.\nUser: Using the numbers [14, 45, 9, 1], create an equation that equals 18. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.\nAssistant: Let me solve this step by step.\n<think>", "answer": "{\"numbers\": [14, 45, 9, 1], \"target\": 18}"}
-{"question": "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.\nUser: Using the numbers [79, 8, 27, 47], create an equation that equals 91. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.\nAssistant: Let me solve this step by step.\n<think>", "answer": "{\"numbers\": [79, 8, 27, 47], \"target\": 91}"}
-{"question": "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.\nUser: Using the numbers [21, 14, 56], create an equation that equals 84. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.\nAssistant: Let me solve this step by step.\n<think>", "answer": "{\"numbers\": [21, 14, 56], \"target\": 84}"}
-{"question": "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.\nUser: Using the numbers [45, 20, 98], create an equation that equals 73. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.\nAssistant: Let me solve this step by step.\n<think>", "answer": "{\"numbers\": [45, 20, 98], \"target\": 73}"}
diff --git a/tests/tools.py b/tests/tools.py
@@ -31,10 +31,10 @@ def get_checkpoint_path() -> str:
 
 
 def get_unittest_dataset_config(dataset_name: str = "countdown") -> DataConfig:
-    """Countdown sample dataset for 20 iterations"""
+    """Countdown sample dataset for 8 iterations"""
     if dataset_name == "countdown":
         return DataConfig(
-            total_epochs=4,
+            total_epochs=2,
             batch_size=4,
             default_workflow_type="math_workflow",
             default_reward_fn_type="countdown_reward",

diff --git a/tests/trainer/trainer_test.py b/tests/trainer/trainer_test.py
@@ -28,33 +28,33 @@ def setUp(self):
         self.config.model.checkpoint_path = os.path.join(
             get_checkpoint_path(), f"train-{datetime.now().strftime('%Y%m%d%H%M%S')}"
         )
-        self.config.synchronizer.sync_iteration_interval = 5
+        self.config.synchronizer.sync_iteration_interval = 2
         self.config.synchronizer.sync_method = "online"
-        self.config.explorer.eval_interval = 10
-        self.config.trainer.eval_interval = 10
+        self.config.explorer.eval_interval = 4
+        self.config.trainer.eval_interval = 4
 
 
 class TestTrainerCountdown(BaseTrainerCase, unittest.TestCase):
     def test_trainer(self):
         """Test the trainer."""
         self.config.data = get_unittest_dataset_config("countdown")
         self.config.check_and_update()
-        self.config.trainer.trainer_config.trainer.save_freq = 20
+        self.config.trainer.trainer_config.trainer.save_freq = 8
         both(self.config)
         # check tensorboard
         parser = TensorBoardParser(os.path.join(self.config.monitor.job_dir, "tensorboard"))
         rollout_metrics = parser.metric_list("rollout")
         self.assertTrue(len(rollout_metrics) > 0)
-        self.assertEqual(parser.metric_max_step(rollout_metrics[0]), 20)
+        self.assertEqual(parser.metric_max_step(rollout_metrics[0]), 8)
         eval_metrics = parser.metric_list("eval")
         self.assertTrue(len(eval_metrics) > 0)
-        self.assertEqual(parser.metric_max_step(eval_metrics[0]), 20)
+        self.assertEqual(parser.metric_max_step(eval_metrics[0]), 8)
         actor_metrics = parser.metric_list("actor")
         self.assertTrue(len(actor_metrics) > 0)
-        self.assertEqual(parser.metric_max_step(actor_metrics[0]), 20)
+        self.assertEqual(parser.metric_max_step(actor_metrics[0]), 8)
         response_metrics = parser.metric_list("response_length")
         self.assertTrue(len(response_metrics) > 0)
-        self.assertEqual(parser.metric_max_step(response_metrics[0]), 20)
+        self.assertEqual(parser.metric_max_step(response_metrics[0]), 8)
         # check checkpoint
         from trinity.common.models.utils import get_checkpoint_dir_with_iteration
 
@@ -64,7 +64,7 @@ def test_trainer(self):
             iteration_num=None,
         )
         self.assertTrue(os.path.exists(checkpoint_dir))
-        self.assertTrue(checkpoint_dir.endswith("20"))
+        self.assertTrue(checkpoint_dir.endswith("step_8"))
 
     def tearDown(self):
         # remove dir only when the test passed

diff --git a/trinity/explorer/runner_pool.py b/trinity/explorer/runner_pool.py
@@ -52,6 +52,7 @@ def _create_actors(self, num: int = 1):
         for _ in range(num):
             engine_index = self.engine_status.index(min(self.engine_status))
             new_actor = WorkflowRunner.remote(self.config, self.models[engine_index])
+            ray.get(new_actor.__ray_ready__.remote())
             self.engine_status[engine_index] += 1
             self.actor_to_engine_index[new_actor] = engine_index
             self._return_actor(new_actor)

diff --git a/trinity/explorer/workflow_runner.py b/trinity/explorer/workflow_runner.py
@@ -53,6 +53,7 @@ def _run_task(self, task: Task) -> List[Experience]:
 
     def run_task(self, task: Task) -> Status:
         """Run the task and return the states."""
+        print("Start to run test")
         try:
             st = time.time()
             exps = self._run_task(task)