Rename squared_kl to kl_loss

samsja · claude · samsja · commit 63403ac38be3 · 2026-01-31T23:39:59.000Z
Co-Authored-By: Claude Opus 4.5 &lt;noreply@anthropic.com&gt;
diff --git a/src/prime_rl/trainer/rl/loss.py b/src/prime_rl/trainer/rl/loss.py
@@ -112,7 +112,7 @@ def compute_loss(
     total_geo_masked_high = []
     total_geo_seq_ratio = []
     total_teacher_kl = []
-    total_squared_kl = []
+    total_kl_loss = []
 
     if teacher_logprobs is None:
         teacher_logprobs = [None] * len(trainer_logprobs)
@@ -151,8 +151,8 @@ def compute_loss(
         if teacher_logprobs is not None:
             advantages = advantages + loss_config.teacher_tau * teacher_kl.detach()
 
-        # Squared KL: (log π_θ/π_old)² per token
-        squared_kl = _safe_mean(log_importance_ratio**2, loss_mask)
+        # KL loss: (log π_θ/π_old)² per token
+        kl_loss_metric = _safe_mean(log_importance_ratio**2, loss_mask)
 
         if loss_config.kl_loss_type == "k2":
             # Kimi K2 style: direct squared loss (advantages - τ·log_ratio)²
@@ -182,7 +182,7 @@ def compute_loss(
         total_geo_masked_low.append(geo_mask_low.float())
         total_geo_masked_high.append(geo_mask_high.float())
         total_geo_seq_ratio.append(geo_seq_ratio)
-        total_squared_kl.append(squared_kl)
+        total_kl_loss.append(kl_loss_metric)
         if teacher_logprobs is not None:
             total_teacher_kl.append(_safe_mean(teacher_kl, loss_mask))
 
@@ -201,7 +201,7 @@ def compute_loss(
         "geo_masked_low": torch.stack(total_geo_masked_low),
         "geo_masked_high": torch.stack(total_geo_masked_high),
         "geo_seq_ratio": torch.stack(total_geo_seq_ratio),
-        "squared_kl": torch.stack(total_squared_kl),
+        "kl_loss": torch.stack(total_kl_loss),
     }
     if total_teacher_kl:
         result["teacher_kl"] = torch.stack(total_teacher_kl)