import module instead of function

skyw · skyw · commit ef9430973399 · 2025-10-08T19:48:15.000-07:00
Signed-off-by: Hao Wu &lt;skyw@nvidia.com&gt;
diff --git a/tests/test_muon_utils.py b/tests/test_muon_utils.py
@@ -19,9 +19,7 @@
 from absl.testing import absltest, parameterized
 
 from emerging_optimizers import utils
-from emerging_optimizers.orthogonalized_optimizers import muon_utils
-from emerging_optimizers.orthogonalized_optimizers.muon import Muon, get_muon_scale_factor
-from emerging_optimizers.orthogonalized_optimizers.muon_utils import _COEFFICIENT_SETS, newton_schulz
+from emerging_optimizers.orthogonalized_optimizers import muon, muon_utils
 
 
 def newton_schulz_ref(x: torch.Tensor, coefficient_sets: list[tuple[float, float, float]]) -> torch.Tensor:
@@ -70,7 +68,7 @@ def tearDown(self):
     def test_newtonschulz5_svd_close(self, dim1, dim2):
         shape = (dim1, dim2)
         x = torch.randn(*shape, device="cuda", dtype=torch.float32)
-        out_zeropowerns = newton_schulz(x, steps=5, coefficient_type="quintic")
+        out_zeropowerns = muon_utils.newton_schulz(x, steps=5, coefficient_type="quintic")
         U, _, V = torch.linalg.svd(x, full_matrices=False)
         out_zeropower_svd = (U @ V).float()
         # Check that the outputs are close.
@@ -91,10 +89,10 @@ def test_newtonschulz5_svd_close(self, dim1, dim2):
     )
     def test_newtonschulz5_close_to_reference(self, dim1, dim2):
         x = torch.randn(dim1, dim2, device="cuda", dtype=torch.float32)
-        out_zeropower_test = newton_schulz(x, steps=5, coefficient_type="quintic")
+        out_zeropower_test = muon_utils.newton_schulz(x, steps=5, coefficient_type="quintic")
         out_zeropowerns_ref = newton_schulz_ref(
             x,
-            coefficient_sets=_COEFFICIENT_SETS["quintic"],
+            coefficient_sets=muon_utils._COEFFICIENT_SETS["quintic"],
         )
 
         torch.testing.assert_close(
@@ -116,7 +114,7 @@ def test_newtonschulz_custom_coeff_close_to_reference(self, dim1, dim2):
             (3, 5, 7),
             (11, 13, 17),
         ]
-        out_zeropower_test = newton_schulz(
+        out_zeropower_test = muon_utils.newton_schulz(
             x,
             steps=2,
             coefficient_type="custom",
@@ -159,8 +157,8 @@ def test_polar_express_better_than_quintic(self, dim1, dim2):
 
         # Compare polar express vs quintic Newton-Schulz methods
         out_svd = (u @ v.T).float()
-        out_polar_express = newton_schulz(x, steps=8, coefficient_type="polar_express")
-        out_quintic = newton_schulz(x, steps=5, coefficient_type="quintic")
+        out_polar_express = muon_utils.newton_schulz(x, steps=8, coefficient_type="polar_express")
+        out_quintic = muon_utils.newton_schulz(x, steps=5, coefficient_type="quintic")
 
         l2_norm_diff_polar = torch.norm(out_polar_express.float() - out_svd.float(), p=2)
         l2_norm_diff_quintic = torch.norm(out_quintic.float() - out_svd.float(), p=2)
@@ -180,7 +178,7 @@ def test_polar_express_better_than_quintic(self, dim1, dim2):
     )
     def test_get_scale_factor(self, size_pairs, mode):
         size_out, size_in = size_pairs
-        scale = get_muon_scale_factor(size_out, size_in, mode)
+        scale = muon.get_muon_scale_factor(size_out, size_in, mode)
         if mode == "shape_scaling":
             self.assertEqual(scale, math.sqrt(max(1, size_out / size_in)))
         elif mode == "spectral":
@@ -196,17 +194,17 @@ def test_qkv_split_shapes_validation(self):
         dummy_args = dict(split_qkv=True, is_qkv_fn=lambda x: True)
         # Test non-integer values
         with self.assertRaises(ValueError) as cm:
-            Muon([dummy_param], **dummy_args, qkv_split_shapes=(512.5, 256, 256))
+            muon.Muon([dummy_param], **dummy_args, qkv_split_shapes=(512.5, 256, 256))
         self.assertIn("must be integers", str(cm.exception))
 
         # Test negative values
         with self.assertRaises(ValueError) as cm:
-            Muon([dummy_param], **dummy_args, qkv_split_shapes=(512, -256, 256))
+            muon.Muon([dummy_param], **dummy_args, qkv_split_shapes=(512, -256, 256))
         self.assertIn("must be positive", str(cm.exception))
 
         # Test wrong number of elements
         with self.assertRaises(ValueError) as cm:
-            Muon([dummy_param], **dummy_args, qkv_split_shapes=(512, 256))
+            muon.Muon([dummy_param], **dummy_args, qkv_split_shapes=(512, 256))
         self.assertIn("tuple of 3 integers", str(cm.exception))