Skip to content

Commit 19d524a

Browse files
authored
add rm center_rewards_coefficient argument (#3917)
1 parent 3a33b7d commit 19d524a

File tree

1 file changed

+2
-0
lines changed

1 file changed

+2
-0
lines changed

swift/llm/argument/rlhf_args.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ class RLHFArguments(GRPOArguments, PPOArguments, RewardModelArguments, TrainArgu
9898
undesirable_weight: float = 1.0
9999
# PPO/GRPO
100100
temperature: float = 0.9
101+
# RM
102+
center_rewards_coefficient: Optional[float] = None
101103

102104
def _prepare_training_args(self, training_args: Dict[str, Any]) -> None:
103105
if self.rlhf_type == 'ppo':

0 commit comments

Comments
 (0)