We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 4d9d1f4 commit ee9a5b2Copy full SHA for ee9a5b2
rlinf/algorithms/utils.py
@@ -197,9 +197,7 @@ def preprocess_reasoning_advantages_inputs(
197
kwargs.update({"rewards": expanded_rewards})
198
199
elif kwargs["adv_type"] == "grpo":
200
- grouped_rewards = (
201
- rewards.reshape(-1, kwargs["group_size"]).transpose(0, 1).contiguous()
202
- )
+ grouped_rewards = rewards.reshape(-1, kwargs["group_size"]).contiguous()
203
kwargs.update(
204
{
205
"rewards": grouped_rewards,
0 commit comments