Skip to content

Commit 80da723

Browse files
author
root
committed
1
1 parent 9c31385 commit 80da723

File tree

6 files changed

+9
-9
lines changed

6 files changed

+9
-9
lines changed

agentlightning/runner.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -261,8 +261,7 @@ async def run_async(self) -> bool:
261261
logger.exception(f"{self._log_prefix(rollout_id)} Exception during on_rollout_end hook.")
262262
await self.client.post_rollout_async(rollout_obj)
263263
else:
264-
if MAX_TRY == 0:
265-
raise Exception("rollout_obj.triplets is EMPTY")
264+
raise Exception("rollout_obj.triplets is EMPTY")
266265
return True
267266

268267
async def iter_async(self) -> int:

agentlightning/verl/trainer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ def _train_step(self, batch_dict: dict) -> dict:
140140

141141
# recompute old_log_probs
142142
with _timer("old_log_prob", timing_raw):
143+
print(batch)
143144
old_log_prob = self.actor_rollout_wg.compute_log_prob(batch)
144145
entropys = old_log_prob.batch["entropys"]
145146
response_masks = batch.batch["response_mask"]

examples/werewolf/prompt.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ class Prompts:
200200

201201
to_wolves_empty = "[仅狼人可见] 投票结果:{}。你们选择空刀,今晚不击杀任何人。"
202202

203-
to_guard_action = "[仅守卫可见] {},作为守卫,你今晚要守护哪位玩家?昨晚守护的玩家是 {},当前存活的玩家是{}。注意不能连续两晚守护同一人。可以选择'空守'放弃守护。也可以选择自守策略(要给出你自己的具体号码),给出你的理由和决定。"
203+
to_guard_action = "[仅守卫可见] {agent_name},作为守卫,你今晚要守护哪位玩家?昨晚守护的玩家是 {last_guarded_player},当前存活的玩家是{current_alive}。注意不能连续两晚守护同一人。可以选择'空守'放弃守护。也可以选择自守策略(要给出你自己的具体号码,例如{agent_name}),给出你的理由和决定。"
204204

205205
to_all_guard_turn = "守卫的回合,守卫请睁眼,决定今晚要守护的玩家。"
206206

examples/werewolf/train.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ python -m agentlightning.verl \
3939
actor_rollout_ref.actor.fsdp_config.param_offload=True \
4040
actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \
4141
actor_rollout_ref.rollout.name=vllm \
42-
actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
42+
actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
4343
actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=1 \
4444
actor_rollout_ref.ref.fsdp_config.param_offload=True \
4545
algorithm.use_kl_in_reward=False \

examples/werewolf/werewolf_agent.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -515,9 +515,9 @@ async def training_rollout_async(self, task: Any, rollout_id: str, resources: Na
515515
msg_guard = await agent(
516516
await moderator(
517517
Prompts.to_guard_action.format(
518-
agent.name,
519-
last_guarded_player,
520-
names_to_str(current_alive),
518+
agent_name=agent.name,
519+
last_guarded_player=last_guarded_player,
520+
current_alive=names_to_str(current_alive),
521521
),
522522
),
523523
structured_model=get_guard_model(current_alive),

merge.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
cd /root/verl && python scripts/legacy_model_merger.py merge \
22
--backend fsdp \
33
--hf_model_path /root/dataDisk/Qwen3-8B \
4-
--local_dir /root/dataDisk/checkpoints/global_step_1/actor \
5-
--target_dir /root/dataDisk/merge_demo
4+
--local_dir /root/dataDisk/checkpoints/global_step_41/actor \
5+
--target_dir /root/dataDisk/DeepWereWolf-Qwen3-8B-Grpo-Agentic

0 commit comments

Comments
 (0)