File tree Expand file tree Collapse file tree 3 files changed +14
-8
lines changed
Expand file tree Collapse file tree 3 files changed +14
-8
lines changed Original file line number Diff line number Diff line change @@ -240,10 +240,16 @@ async def run_async(self) -> bool:
240240 # Pass the task input, not the whole task object
241241 result = await rollout_method (task .input , task .rollout_id , resources_update .resources )
242242 #降低最大rollout
243- if len (result ) > 40 :
244- import random
245- result = random .sample (result ,40 )
246- rollout_obj = self ._to_rollout_object (result , task .rollout_id )
243+ import random
244+ new_result = []
245+ if len (result ) > 10 :
246+ #手动控制global token num 不超过1万
247+ global_token_num = 0
248+ while global_token_num > 10000 :
249+ triplet = random .sample (result ,1 )
250+ global_token_num = len (triplet .prompt .get ("token_ids" )) + len (triplet .response .get ("token_ids" ))
251+ new_result .append (triplet )
252+ rollout_obj = self ._to_rollout_object (new_result , task .rollout_id )
247253 end_time = time .time ()
248254 logger .info (
249255 f"{ self ._log_prefix (rollout_id )} Completed in "
Original file line number Diff line number Diff line change @@ -17,8 +17,8 @@ python -m agentlightning.verl \
1717 data.val_files=${DATA_DIR} /test.parquet \
1818 actor_rollout_ref.rollout.tensor_model_parallel_size=$ROLLOUT_TP_SIZE \
1919 trainer.n_gpus_per_node=${N_GPUS} \
20- data.train_batch_size=1 \
21- actor_rollout_ref.rollout.n=1 \
20+ data.train_batch_size=2 \
21+ actor_rollout_ref.rollout.n=2 \
2222 actor_rollout_ref.actor.ppo_mini_batch_size=8 \
2323 actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=1 \
2424 actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=1 \
Original file line number Diff line number Diff line change 11cd /root/verl && python scripts/legacy_model_merger.py merge \
22 --backend fsdp \
33 --hf_model_path /root/dataDisk/Qwen3-8B \
4- --local_dir /root/dataDisk/checkpoints/global_step_47 /actor \
5- --target_dir /root/dataDisk/DeepWereWolf-Qwen3-8B-Grpo-Agentic4
4+ --local_dir /root/dataDisk/checkpoints/global_step_48 /actor \
5+ --target_dir /root/dataDisk/DeepWereWolf-Qwen3-8B-Grpo-Agentic5
You can’t perform that action at this time.
0 commit comments