@@ -717,7 +717,22 @@ def test_noncolocated_inference_requires_explicit_gpus_per_node_single_node():
717717 },
718718 },
719719 },
720- "loss_fn" : {}, # Config extraction requires this key
720+ "loss_fn" : {
721+ "ratio_clip_min" : 0.2 ,
722+ "ratio_clip_max" : 0.2 ,
723+ "ratio_clip_c" : None ,
724+ "disable_ppo_ratio" : False ,
725+ "reference_policy_kl_penalty" : 0.0 ,
726+ "reference_policy_kl_type" : "k3" ,
727+ "kl_input_clamp_value" : 20.0 ,
728+ "kl_output_clamp_value" : 10.0 ,
729+ "use_on_policy_kl_approximation" : False ,
730+ "use_importance_sampling_correction" : False ,
731+ "truncated_importance_sampling_ratio" : None ,
732+ "sequence_level_importance_ratios" : False ,
733+ "token_level_loss" : True ,
734+ "force_on_policy_ratio" : False ,
735+ },
721736 "env" : {}, # Config extraction requires this key
722737 "grpo" : {
723738 "seed" : 42 ,
@@ -775,7 +790,22 @@ def test_noncolocated_inference_requires_explicit_gpus_per_node_multi_node():
775790 },
776791 },
777792 },
778- "loss_fn" : {}, # Config extraction requires this key
793+ "loss_fn" : {
794+ "ratio_clip_min" : 0.2 ,
795+ "ratio_clip_max" : 0.2 ,
796+ "ratio_clip_c" : None ,
797+ "disable_ppo_ratio" : False ,
798+ "reference_policy_kl_penalty" : 0.0 ,
799+ "reference_policy_kl_type" : "k3" ,
800+ "kl_input_clamp_value" : 20.0 ,
801+ "kl_output_clamp_value" : 10.0 ,
802+ "use_on_policy_kl_approximation" : False ,
803+ "use_importance_sampling_correction" : False ,
804+ "truncated_importance_sampling_ratio" : None ,
805+ "sequence_level_importance_ratios" : False ,
806+ "token_level_loss" : True ,
807+ "force_on_policy_ratio" : False ,
808+ },
779809 "env" : {}, # Config extraction requires this key
780810 "grpo" : {
781811 "seed" : 42 ,
0 commit comments