Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion skyrl-train/skyrl_train/utils/ppo_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1051,6 +1051,7 @@ def compute_grpo_outcome_advantage(
"""
# this assumes response-level rewards
scores = token_level_rewards.sum(dim=-1)
returns = scores.clone()

id2score = defaultdict(list)
id2mean = {}
Expand All @@ -1076,7 +1077,7 @@ def compute_grpo_outcome_advantage(
scores[i] = scores[i] - id2mean[index[i]]
scores = scores.unsqueeze(-1) * response_mask

return scores, scores
return scores, returns


def repopulate_all_registries():
Expand Down
Loading