meta-pytorch · casteryh · Oct 5, 2025 · Oct 8, 2025 · Oct 8, 2025 · Oct 8, 2025
diff --git a/apps/grpo/qwen3_8b.yaml b/apps/grpo/qwen3_8b.yaml
@@ -42,7 +42,7 @@ policy:
 
 # Trainer configuration
 trainer:
-  use_dcp: true
+  use_dcp: false
   use_vllm_builtin_load: true
   model:
     name: qwen3

diff --git a/src/forge/actors/trainer.py b/src/forge/actors/trainer.py
@@ -403,7 +403,8 @@ async def push_weights(self, policy_version: int) -> None:
         else:
             for name, param in hf_state_dict.items():
                 key = get_param_key(policy_version, name)
-                await ts.put(key, param)
+                # RDMA is still broken on GPU, so we need to copy to CPU
+                await ts.put(key, param.detach().cpu())
             t.step("ts_save")
         t.stop()
         end_time = time.perf_counter()