Tiny fix k1 estimator typo and low_var_kl comments (#483)

fzyzcjy · web-flow · commit 7b2701ce8345 · 2025-10-20T11:58:21.000+08:00
diff --git a/slime/utils/arguments.py b/slime/utils/arguments.py
@@ -615,9 +615,9 @@ def add_algo_arguments(parser):
             parser.add_argument(
                 "--kl-loss-type",
                 type=str,
-                choices=["kl", "k2", "k3", "low_var_kl"],
-                default="kl",
-                help="Choose KL loss type: kl, k2, k3 low_var_kl",
+                choices=["k1", "k2", "k3", "low_var_kl"],
+                default="k1",
+                help="Choose KL loss type: kl, k2, k3, low_var_kl",
             )
             parser.add_argument(
                 "--advantage-estimator",
diff --git a/slime/utils/ppo_utils.py b/slime/utils/ppo_utils.py
@@ -24,20 +24,20 @@ def compute_approx_kl(
 
     log_ratio = log_probs.float() - log_probs_base.float()
 
-    if kl_loss_type == "kl":
+    if kl_loss_type == "k1":
         return log_ratio
     elif kl_loss_type == "k2":
         log_ratio = log_probs.float() - log_probs_base.float()
         log_ratio = log_ratio**2 / 2.0
         return log_ratio
     elif kl_loss_type == "k3":
+        # The non negative kl approximation in
+        # http://joschu.net/blog/kl-approx.html
+        # Besides non negative, it is also unbiased and have lower variance.
         log_ratio = -log_ratio
         log_ratio = log_ratio.exp() - 1 - log_ratio
         return log_ratio
     elif kl_loss_type == "low_var_kl":
-        # The non negative kl approximation in
-        # http://joschu.net/blog/kl-approx.html
-        # Besides non negative, it is also unbiased and have lower variance.
         log_ratio = -log_ratio
         log_ratio = log_ratio.exp() - 1 - log_ratio
         return torch.clamp(log_ratio, min=-10, max=10)