address PR comments

mkhona-nvidia · mkhona-nvidia · commit 4afbc39c3e54 · 2025-10-03T15:00:41.000-07:00
Signed-off-by: mikail &lt;mkhona@nvidia.com&gt;
diff --git a/emerging_optimizers/orthogonalized_optimizers/spectral_clipping_utils.py b/emerging_optimizers/orthogonalized_optimizers/spectral_clipping_utils.py
@@ -41,14 +41,11 @@ def spectral_clip(X: torch.Tensor, sigma_min: float = -1.0, sigma_max: float = 1
     OX = newton_schulz(X, steps=8, coefficient_type="polar_express")
     result = (sigma_min + sigma_max) * OX
     identity_matrix = torch.eye(X.shape[0], device=X.device, dtype=X.dtype)
-    # Pre-allocate tensors for memory efficiency
-    A = torch.empty_like(identity_matrix)
-    B = torch.empty_like(X)
     for s, sign in zip([sigma_min, sigma_max], [1, -1]):
-        torch.add(s * identity_matrix, OX @ X.T, alpha=-1, out=A)
-        torch.add(s * OX, X, alpha=-1, out=B)
-        torch.add(result, sign * newton_schulz(A, steps=8, coefficient_type="polar_express") @ B, out=result)
-    result = (1 / 2) * result
+        A = torch.add(s * identity_matrix, OX @ X.T, alpha=-1)
+        B = torch.add(s * OX, X, alpha=-1)
+        result = torch.add(result, sign * newton_schulz(A, steps=8, coefficient_type="polar_express") @ B)
+    result = result * 0.5
 
     if needs_transpose:
         result = result.T
@@ -73,10 +70,9 @@ def spectral_hardcap(X: torch.Tensor, beta: float = 1.0) -> torch.Tensor:
         X = X.T
     OX = newton_schulz(X, steps=8, coefficient_type="polar_express")
     aX = torch.add(beta * OX, X, alpha=-1)
-    result = torch.empty_like(X)
-    torch.add(beta * OX, X, out=result)
-    torch.add(result, aX @ newton_schulz(aX, steps=8, coefficient_type="polar_express").T @ OX, alpha=-1, out=result)
-    result = (1 / 2) * result
+    result = torch.add(beta * OX, X)
+    result = torch.add(result, aX @ newton_schulz(aX, steps=8, coefficient_type="polar_express").T @ OX, alpha=-1)
+    result = result * 0.5
     if needs_transpose:
         result = result.T
     return result