We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 38f7927 commit 2a1e021Copy full SHA for 2a1e021
apps/grpo/main.py
@@ -274,10 +274,10 @@ async def __next__(self) -> dict[str, str] | None:
274
async def main():
275
"""Main GRPO training loop with rollout and training processes."""
276
group_size = 1
277
- model = "Qwen/Qwen3-0.6B"
278
- titan_model = TitanJobModelConfig(name="qwen3", flavor="0.6B")
279
- # model = "meta-llama/Llama-3.1-8B-Instruct"
280
- # titan_model = TitanJobModelConfig(name="llama3", flavor="8B")
+ # model = "Qwen/Qwen3-0.6B"
+ # titan_model = TitanJobModelConfig(name="qwen3", flavor="0.6B")
+ model = "meta-llama/Llama-3.1-8B-Instruct"
+ titan_model = TitanJobModelConfig(name="llama3", flavor="8B")
281
282
# ---- Setup WandB Logger ---- #
283
logger = get_metric_logger(
0 commit comments