File tree Expand file tree Collapse file tree 3 files changed +3
-3
lines changed
Expand file tree Collapse file tree 3 files changed +3
-3
lines changed Original file line number Diff line number Diff line change 11project : " Trinity-RFT-gsm8k"
22name : " async-qwen2.5-1.5B-gsm8k"
33mode : explore
4- checkpoint_root_dir : ' checkpoints/qwen2.5-1.5B-gsm8k '
4+ checkpoint_root_dir : ' /PATH/TO/CHECKPOINT/ '
55algorithm :
66 algorithm_type : grpo
77 repeat_times : 8
Original file line number Diff line number Diff line change @@ -266,7 +266,7 @@ class TrainerConfig:
266266 # trainer configs
267267 actor_use_kl_loss : bool = False
268268 actor_kl_loss_coef : float = 0.001
269- actor_entropy_coeff : float = 0.001
269+ actor_entropy_coef : float = 0.001
270270 actor_grad_clip : float = 1.0
271271 actor_clip_ratio : float = 0.2
272272 # TODO: extract more train-related params from underlying trainer engine
Original file line number Diff line number Diff line change @@ -935,7 +935,7 @@ def _set_actor_clip_ratio(self):
935935 max_value = 1.0 ,
936936 )
937937
938- def _set_actor_entropy_coeff (self ):
938+ def _set_actor_entropy_coef (self ):
939939 st .number_input (
940940 "Entropy Coeff" ,
941941 key = "actor_entropy_coef" ,
You can’t perform that action at this time.
0 commit comments