meta-pytorch · daniellepintz · Nov 20, 2025 · Nov 20, 2025 · Nov 20, 2025 · Nov 20, 2025
diff --git a/apps/grpo/qwen3_32b.yaml b/apps/grpo/qwen3_32b.yaml
@@ -4,7 +4,7 @@
 
 # Global configuration
 group_size: 16
-local_batch_size: 32 # per-device batch size
+local_batch_size: 2 # per-device batch size
 max_req_tokens: 1024
 max_res_tokens: 1024
 model: "Qwen/Qwen3-32B"

diff --git a/apps/grpo/qwen3_8b.yaml b/apps/grpo/qwen3_8b.yaml
@@ -12,6 +12,7 @@ off_by_n: 1 # Off by one by default
 # Observability configuration
 metric_logging:
   wandb:
+    entity: agentic-models
     project: grpo-training
     group: grpo_exp_${oc.env:USER}
     logging_mode: global_reduce # global_reduce, per_rank_reduce, per_rank_no_reduce

diff --git a/src/forge/controller/launcher.py b/src/forge/controller/launcher.py
@@ -136,14 +136,19 @@ async def get_allocator(self, name: str, num_hosts: int) -> tuple[Any, Any, str]
         for role in appdef.roles:
             # Note - this is hardcoded to SLURM
             # We got this with sinfo
-            role.resource.memMB = 2062607
-            role.resource.cpu = 128
+            role.resource.memMB = 2047962
+            role.resource.cpu = 192
             role.resource.gpu = 8
 
         # Note - we cannot add in an empty workspace, so we create a fake temporary one
         temp_workspace = tempfile.mkdtemp(prefix="forge_workspace_")
         server_config = Config(
             scheduler="slurm",
+            scheduler_args={
+                "account": "agentic-models",
+                "qos": "h100_lowest",
+                "time": "72:00:00"
+            },
             appdef=appdef,
             workspace=monarch.tools.config.workspace.Workspace(dirs=[temp_workspace]),
         )

diff --git a/submit_grpo.sh b/submit_grpo.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+#SBATCH --job-name=grpo-qwen3-32b
+#SBATCH --qos=h200_agentic-models_high
+#SBATCH --account=agentic-models
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1
+#SBATCH --gpus-per-node=8
+#SBATCH --cpus-per-task=128
+#SBATCH --mem=500G
+#SBATCH --time=72:00:00
+
+echo "Starting GRPO training job"
+
+eval "$(conda shell.bash hook)"
+
+conda activate forge
+
+export TORCH_COMPILE_DISABLE=1
+unset SLURM_MEM_PER_CPU SLURM_MEM_PER_GPU SLURM_MEM_PER_NODE
+export TORCHSTORE_RDMA_ENABLED=0
+
+cd /storage/home/daniellepintz/torchforge
+
+python -m apps.grpo.main --config apps/grpo/qwen3_32b.yaml