File tree Expand file tree Collapse file tree 2 files changed +4
-4
lines changed
examples/grpo_frozen_lake Expand file tree Collapse file tree 2 files changed +4
-4
lines changed Original file line number Diff line number Diff line change @@ -43,8 +43,8 @@ buffer:
4343 env_max_steps : 8
4444 agent_max_steps : 10
4545 is_slippery : false
46+ repeat_times : 4
4647 rollout_args :
47- n : 4
4848 top_p : 0.8
4949 top_k : 20
5050 default_workflow_type : ' frozen_lake_workflow'
Original file line number Diff line number Diff line change @@ -84,7 +84,7 @@ class GenerationConfig:
8484 logprobs : Optional [int ] = None # 0 # vLLM return `logprobs + 1` elements
8585 max_tokens : Optional [int ] = None # if None, use model.max_response_tokens
8686 # repeat each task for `n` times
87- # ! DO NOT SET, it will be set by `algorithm.repeat_times` or `max( buffer.explorer_input.eval_tasksets[i].repeat_times) `
87+ # ! DO NOT SET, it will be set by `algorithm.repeat_times` or `buffer.explorer_input.eval_tasksets[i].repeat_times`
8888 n : int = 1
8989
9090
@@ -249,10 +249,10 @@ class TasksetConfig:
249249
250250 enable_progress_bar : bool = False
251251
252+ # ! This setting is only valid for `eval_taskset`; for other taskset, it will be overridden by `algorithm.repeat_times`.
253+ repeat_times : int = 1
252254 # ! DO NOT SET, automatically load from checkpoint
253255 index : int = 0
254- # ! DO NOT SET in trainer_input, automatically set from `algorithm.repeat_times`
255- repeat_times : int = 1
256256 # ! DO NOT SET, automatically set based on train/eval
257257 is_eval : bool = False
258258 # ! DO NOT SET, automatically set from buffer.batch_size
You can’t perform that action at this time.
0 commit comments