Explicity set the dtype for gradient tensor

nujoug · nujoug · commit ac29781b7e9a · 2026-02-06T13:21:03.000-08:00
Signed-off-by: mloh &lt;mloh@nvidia.com&gt;
diff --git a/nemo_rl/distributed/model_utils.py b/nemo_rl/distributed/model_utils.py
@@ -220,7 +220,9 @@ def backward(
         seq_size = int(vocab_parallel_logits.shape[1])
         num_chunks = (seq_size + chunk_size - 1) // chunk_size
 
-        grad_input: torch.Tensor = torch.empty_like(vocab_parallel_logits)
+        grad_input: torch.Tensor = torch.empty_like(
+            vocab_parallel_logits, dtype=torch.float32
+        )
 
         for chunk_idx in range(num_chunks):
             chunk_start = chunk_idx * chunk_size