We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent a179cce commit 5aa7d02Copy full SHA for 5aa7d02
torchtitan/experiments/rl/tasks/sum_digits/simple_grpo.py
@@ -40,7 +40,10 @@
40
from torchtitan.experiments.rl.actors.generator import VLLMGenerator
41
from torchtitan.experiments.rl.actors.grader import Grader
42
from torchtitan.experiments.rl.actors.trainer import PolicyTrainer
43
-from torchtitan.experiments.rl.tasks.sum_digits.task import extract_answer, SumDigitsTask
+from torchtitan.experiments.rl.tasks.sum_digits.task import (
44
+ extract_answer,
45
+ SumDigitsTask,
46
+)
47
from torchtitan.experiments.rl.types import Episode
48
from torchtitan.protocols.model_spec import ModelSpec
49
0 commit comments