We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 0eaad00 commit 047e735Copy full SHA for 047e735
emerging_optimizers/orthogonalized_optimizers/mop.py
@@ -60,7 +60,7 @@ def scaled_orthogonalize_fn(grad: torch.Tensor) -> torch.Tensor:
60
scale_factor = muon.get_muon_scale_factor(grad.size(-2), grad.size(-1), mode=scale_mode)
61
else:
62
# nuclear norm scaling suggested by PolarGrad paper (https://arxiv.org/pdf/2505.21799)
63
- scale_factor = S.sum().sqrt()
+ scale_factor = S.sum()
64
return orth_grad * scale_factor * extra_scale_factor
65
66
super().__init__(
0 commit comments