@@ -182,10 +182,8 @@ class KL_Ctrl:
182182
183183@dataclass
184184class Algorithm :
185- gamma : float = 1.0
186- lam : float = 1.0
187185 adv_estimator : str = "gae"
188- # TODO (yanxi): remove the above advantage-related parameters?
186+ # TODO (yanxi): might remove adv_estimator completely, use AlgorithmConfig.advantage_fn_type instead
189187 norm_adv_by_std_in_grpo : bool = True
190188 use_kl_in_reward : bool = False
191189 kl_penalty : str = "kl"
@@ -316,20 +314,17 @@ def synchronize_config(self, config: Config) -> None: # noqa: C901
316314 self .actor_rollout_ref .actor .clip_ratio = config .trainer .actor_clip_ratio
317315
318316 # Algorithm related config
319- if config .algorithm .gamma is not None :
320- self .algorithm .gamma = config .algorithm .gamma
321- if config .algorithm .lam is not None :
322- self .algorithm .lam = config .algorithm .lam
323317 self .actor_rollout_ref .actor .algorithm_type = config .algorithm .algorithm_type
324318 if config .algorithm .algorithm_type == AlgorithmType .PPO :
325319 logger .info ("Setting `adv_estimator` to 'gae' for PPO" )
326320 self .algorithm .adv_estimator = AdvantageEstimator .GAE .value
327321 elif config .algorithm .algorithm_type in (AlgorithmType .GRPO , AlgorithmType .OPMD ):
328322 logger .info ("Setting `adv_estimator` to 'grpo' for GRPO/OPMD" )
329323 self .algorithm .adv_estimator = AdvantageEstimator .GRPO .value
330- # TODO (yanxi): it seems that adv_estimator only affects whether use_critic is set to
331- # True or False in RayPPOTrainer.__init__() (and hence in VerlPPOTrainerWrapper);
332- # need to double check whether this is indeed the case.
324+ # TODO (yanxi): it seems that adv_estimator now only affects whether use_critic is set to
325+ # True or False in RayPPOTrainer.__init__() (and hence in VerlPPOTrainerWrapper).
326+ # Need to double check whether this is indeed the case,
327+ # and see if adv_estimator can be removed completely.
333328
334329 if self .actor_rollout_ref .actor .algorithm_type .is_dpo (): # for DPO
335330 if not self .actor_rollout_ref .actor .use_kl_loss :
0 commit comments