cp: fix: grad norm calculation for dtensor v2 (1693) into r0.5.0 (#1696)

chtruong814 · hemildesai · web-flow · commit 9902db04c617 · 2025-12-24T10:46:01.000Z
Signed-off-by: Hemil Desai &lt;hemild@nvidia.com&gt;
Signed-off-by: NeMo Bot &lt;nemo-bot@nvidia.com&gt;
Co-authored-by: Hemil Desai &lt;hemild@nvidia.com&gt;
diff --git a/nemo_rl/models/policy/workers/dtensor_policy_worker_v2.py b/nemo_rl/models/policy/workers/dtensor_policy_worker_v2.py
@@ -858,6 +858,10 @@ def train(
                             ## NOTE: invalid samples should be multiplied
                             ## by zero in the loss function to prevent them
                             ## from affecting the gradient calculation
+
+                            # when FSDP reduces the gradients over the DP dim, they're automatically averaged
+                            # but we want to sum them so we cancel out the average here
+                            loss *= self.dp_size * self.cp_size
                             loss.backward()
 
                     if num_valid_samples > 0:
@@ -880,8 +884,6 @@ def train(
                         pp_axis_name=None,
                         foreach=True,
                         num_label_tokens=1,
-                        # when FSDP reduces the gradients over the DP dim, they're automatically averaged
-                        # but we want to sum them so we rescale the gradients by self.dp_size * self.cp_size
                         dp_group_size=self.dp_size * self.cp_size,
                     )
                     grad_norm = torch.tensor(
diff --git a/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4.v3.sh b/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4.v3.sh
@@ -35,5 +35,7 @@ uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
 if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
     uv run tests/check_metrics.py $JSON_METRICS \
         'mean(data["train/token_mult_prob_error"]) < 1.1' \
-        'data["train/token_mult_prob_error"]["30"] < 1.1'
+        'data["train/token_mult_prob_error"]["30"] < 1.1' \
+        'data["train/grad_norm"]["30"] < 0.5' \
+        'data["train/grad_norm"]["30"] > 0.1'
 fi