Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ classifiers = [
requires-python = ">=3.10"
dependencies = [
"verl==0.3.0.post1",
"ray==2.43.0",
"ray[default]==2.43.0",
"vllm>=0.8.3",
"tensordict==0.6.2",
"wandb",
Expand Down
18 changes: 12 additions & 6 deletions tests/explorer/explorer_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ def setUp(self):
self.config.monitor.monitor_type = MonitorType.TENSORBOARD
self.config.monitor.project = "Trinity-unittest"
self.config.model.checkpoint_path = get_checkpoint_path()
self.config.synchronizer.sync_iteration_interval = 5
self.config.explorer.eval_interval = 10
self.config.trainer.eval_interval = 10
self.config.synchronizer.sync_iteration_interval = 2
self.config.explorer.eval_interval = 4
self.config.trainer.eval_interval = 4


class TestExplorerCountdownEval(BaseExplorerCase, unittest.TestCase):
Expand All @@ -42,8 +42,11 @@ def test_explorer(self):
self.assertTrue(len(rollout_metrics) > 0)
eval_metrics = parser.metric_list("eval")
self.assertTrue(len(eval_metrics) > 0)
self.assertEqual(parser.metric_max_step(rollout_metrics[0]), 20)
self.assertEqual(parser.metric_max_step(eval_metrics[0]), 20)
self.assertEqual(parser.metric_max_step(rollout_metrics[0]), 8)
self.assertEqual(parser.metric_max_step(eval_metrics[0]), 8)

def tearDown(self):
pass


class TestExplorerCountdownNoEval(BaseExplorerCase, unittest.TestCase):
Expand All @@ -58,4 +61,7 @@ def test_explorer(self):
self.assertTrue(len(rollout_metrics) > 0)
eval_metrics = parser.metric_list("eval")
self.assertTrue(len(eval_metrics) == 0)
self.assertEqual(parser.metric_max_step(rollout_metrics[0]), 20)
self.assertEqual(parser.metric_max_step(rollout_metrics[0]), 8)

def tearDown(self):
pass
10 changes: 6 additions & 4 deletions tests/explorer/runner_pool_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ def run(self) -> List[Experience]:
if "timeout" in self.error_type:
time.sleep(self.seconds)
elif self.error_type == "exception":
raise RuntimeError("Exception occurred")
print("rasing exception")
raise ValueError("Exception occurred")
elif self.error_type == "exit":
exit(1)
return [Experience(tokens=torch.zeros(5), prompt_length=2, prompt_text=self.error_type)]
Expand Down Expand Up @@ -107,19 +108,20 @@ def test_runner_pool(self):
tasks=tasks,
)

# The excepted return order is: `exception` -> `timeout_5` -> `success` -> (`timeout_100`and `timeout_101`) -> `exit`
# The excepted return order is: `exception` -> `timeout_2` -> `success` -> (`timeout_100`and `timeout_101`) -> `exit`
# 1. `exception`
st = time.time()
status = pool.get_next_unorder()
et = time.time()
self.assertTrue(et - st < 5)
self.assertTrue(et - st < 2)
print(f"First task use time: {et - st}")
self.assertEqual(len(status), 1)
self.assertFalse(status[0].ok)
# 2. `timeout_2
st = time.time()
status = pool.get_next_unorder()
et = time.time()
self.assertTrue(et - st < 3)
self.assertTrue(et - st > 2)
self.assertEqual(len(status), 1)
self.assertTrue(status[0].ok)
# 3. `success`
Expand Down
1 change: 0 additions & 1 deletion tests/template/data/countdown/test.jsonl
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,3 @@
{"question": "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.\nUser: Using the numbers [63, 95, 96], create an equation that equals 64. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.\nAssistant: Let me solve this step by step.\n<think>", "answer": "{\"numbers\": [63, 95, 96], \"target\": 64}"}
{"question": "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.\nUser: Using the numbers [95, 11, 56], create an equation that equals 28. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.\nAssistant: Let me solve this step by step.\n<think>", "answer": "{\"numbers\": [95, 11, 56], \"target\": 28}"}
{"question": "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.\nUser: Using the numbers [19, 74, 45], create an equation that equals 48. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.\nAssistant: Let me solve this step by step.\n<think>", "answer": "{\"numbers\": [19, 74, 45], \"target\": 48}"}
{"question": "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.\nUser: Using the numbers [49, 41, 73], create an equation that equals 17. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.\nAssistant: Let me solve this step by step.\n<think>", "answer": "{\"numbers\": [49, 41, 73], \"target\": 17}"}
4 changes: 0 additions & 4 deletions tests/template/data/countdown/train.jsonl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,3 @@
{"question": "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.\nUser: Using the numbers [19, 25, 89], create an equation that equals 95. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.\nAssistant: Let me solve this step by step.\n<think>", "answer": "{\"numbers\": [19, 25, 89], \"target\": 95}"}
{"question": "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.\nUser: Using the numbers [8, 62, 43], create an equation that equals 27. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.\nAssistant: Let me solve this step by step.\n<think>", "answer": "{\"numbers\": [8, 62, 43], \"target\": 27}"}
{"question": "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.\nUser: Using the numbers [74, 5, 20, 88], create an equation that equals 50. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.\nAssistant: Let me solve this step by step.\n<think>", "answer": "{\"numbers\": [74, 5, 20, 88], \"target\": 50}"}
{"question": "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.\nUser: Using the numbers [14, 45, 9, 1], create an equation that equals 18. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.\nAssistant: Let me solve this step by step.\n<think>", "answer": "{\"numbers\": [14, 45, 9, 1], \"target\": 18}"}
{"question": "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.\nUser: Using the numbers [79, 8, 27, 47], create an equation that equals 91. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.\nAssistant: Let me solve this step by step.\n<think>", "answer": "{\"numbers\": [79, 8, 27, 47], \"target\": 91}"}
{"question": "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.\nUser: Using the numbers [21, 14, 56], create an equation that equals 84. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.\nAssistant: Let me solve this step by step.\n<think>", "answer": "{\"numbers\": [21, 14, 56], \"target\": 84}"}
{"question": "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.\nUser: Using the numbers [45, 20, 98], create an equation that equals 73. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.\nAssistant: Let me solve this step by step.\n<think>", "answer": "{\"numbers\": [45, 20, 98], \"target\": 73}"}
4 changes: 2 additions & 2 deletions tests/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ def get_checkpoint_path() -> str:


def get_unittest_dataset_config(dataset_name: str = "countdown") -> DataConfig:
"""Countdown sample dataset for 20 iterations"""
"""Countdown sample dataset for 8 iterations"""
if dataset_name == "countdown":
return DataConfig(
total_epochs=4,
total_epochs=2,
batch_size=4,
default_workflow_type="math_workflow",
default_reward_fn_type="countdown_reward",
Expand Down
18 changes: 9 additions & 9 deletions tests/trainer/trainer_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,33 +28,33 @@ def setUp(self):
self.config.model.checkpoint_path = os.path.join(
get_checkpoint_path(), f"train-{datetime.now().strftime('%Y%m%d%H%M%S')}"
)
self.config.synchronizer.sync_iteration_interval = 5
self.config.synchronizer.sync_iteration_interval = 2
self.config.synchronizer.sync_method = "online"
self.config.explorer.eval_interval = 10
self.config.trainer.eval_interval = 10
self.config.explorer.eval_interval = 4
self.config.trainer.eval_interval = 4


class TestTrainerCountdown(BaseTrainerCase, unittest.TestCase):
def test_trainer(self):
"""Test the trainer."""
self.config.data = get_unittest_dataset_config("countdown")
self.config.check_and_update()
self.config.trainer.trainer_config.trainer.save_freq = 20
self.config.trainer.trainer_config.trainer.save_freq = 8
both(self.config)
# check tensorboard
parser = TensorBoardParser(os.path.join(self.config.monitor.job_dir, "tensorboard"))
rollout_metrics = parser.metric_list("rollout")
self.assertTrue(len(rollout_metrics) > 0)
self.assertEqual(parser.metric_max_step(rollout_metrics[0]), 20)
self.assertEqual(parser.metric_max_step(rollout_metrics[0]), 8)
eval_metrics = parser.metric_list("eval")
self.assertTrue(len(eval_metrics) > 0)
self.assertEqual(parser.metric_max_step(eval_metrics[0]), 20)
self.assertEqual(parser.metric_max_step(eval_metrics[0]), 8)
actor_metrics = parser.metric_list("actor")
self.assertTrue(len(actor_metrics) > 0)
self.assertEqual(parser.metric_max_step(actor_metrics[0]), 20)
self.assertEqual(parser.metric_max_step(actor_metrics[0]), 8)
response_metrics = parser.metric_list("response_length")
self.assertTrue(len(response_metrics) > 0)
self.assertEqual(parser.metric_max_step(response_metrics[0]), 20)
self.assertEqual(parser.metric_max_step(response_metrics[0]), 8)
# check checkpoint
from trinity.common.models.utils import get_checkpoint_dir_with_iteration

Expand All @@ -64,7 +64,7 @@ def test_trainer(self):
iteration_num=None,
)
self.assertTrue(os.path.exists(checkpoint_dir))
self.assertTrue(checkpoint_dir.endswith("20"))
self.assertTrue(checkpoint_dir.endswith("step_8"))

def tearDown(self):
# remove dir only when the test passed
Expand Down
1 change: 1 addition & 0 deletions trinity/explorer/runner_pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def _create_actors(self, num: int = 1):
for _ in range(num):
engine_index = self.engine_status.index(min(self.engine_status))
new_actor = WorkflowRunner.remote(self.config, self.models[engine_index])
ray.get(new_actor.__ray_ready__.remote())
self.engine_status[engine_index] += 1
self.actor_to_engine_index[new_actor] = engine_index
self._return_actor(new_actor)
Expand Down
1 change: 1 addition & 0 deletions trinity/explorer/workflow_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def _run_task(self, task: Task) -> List[Experience]:

def run_task(self, task: Task) -> Status:
"""Run the task and return the states."""
print("Start to run test")
try:
st = time.time()
exps = self._run_task(task)
Expand Down