diff --git a/README.md b/README.md index 88225172f0..763fe0340f 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,5 @@ - # torchtune [![Unit Test](https://github.com/pytorch/torchtune/actions/workflows/unit_test.yaml/badge.svg?branch=main)](https://github.com/pytorch/torchtune/actions/workflows/unit_test.yaml) diff --git a/torchtune/dev/rl/rewards.py b/torchtune/dev/rl/rewards.py index 95c45ee9b0..62762cfe06 100644 --- a/torchtune/dev/rl/rewards.py +++ b/torchtune/dev/rl/rewards.py @@ -296,21 +296,13 @@ def batched_rewards( metadata = {"func_names": [f.__name__ for f in reward_funcs]} for b in range(batch_size): - for g in range(grpo_size): - answer = answers[b][g] - text_completion = tokenizer.decode(completions[b, g].tolist()) - cot, potential_answer = extract_tags(f"{text_completion}") - for rw_idx, reward_func in enumerate(reward_funcs): - reward, success = reward_func(cot, answer, potential_answer) - rewards_tensor[b, g, rw_idx] += reward - successes_tensor[b, g, rw_idx] += success return rewards_tensor, successes_tensor, metadata