From 3eb1b92e7c12fb4019eaf6ddb80258f8a6786fdf Mon Sep 17 00:00:00 2001
From: mikail <mkhona@nvidia.com>
Date: Mon, 6 Oct 2025 09:23:14 -0700
Subject: [PATCH 1/7] resolved conflicts

Signed-off-by: mikail <mkhona@nvidia.com>
---
 tests/ci/L0_Tests_CPU.sh        |  2 ++
 tests/ci/L0_Tests_GPU.sh        |  7 +++-
 tests/ci/L1_Tests_GPU.sh        |  4 +--
 tests/test_scalar_optimizers.py | 60 ++++++++++++++++++++++++++++++---
 4 files changed, 65 insertions(+), 8 deletions(-)

diff --git a/tests/ci/L0_Tests_CPU.sh b/tests/ci/L0_Tests_CPU.sh
index afd9f1a..594773f 100644
--- a/tests/ci/L0_Tests_CPU.sh
+++ b/tests/ci/L0_Tests_CPU.sh
@@ -15,3 +15,5 @@ export TORCH_COMPILE_DISABLE=1
 set -o pipefail
 torchrun --nproc_per_node=8 --no-python coverage run -p tests/test_distributed_muon_utils_cpu.py
 torchrun --nproc_per_node=4 --no-python coverage run -p tests/test_distributed_muon_utils_cpu.py
+coverage run -p --source=emerging_optimizers tests/test_scalar_optimizers.py --device=cpu 
+
diff --git a/tests/ci/L0_Tests_GPU.sh b/tests/ci/L0_Tests_GPU.sh
index bd1a818..3d53227 100644
--- a/tests/ci/L0_Tests_GPU.sh
+++ b/tests/ci/L0_Tests_GPU.sh
@@ -20,5 +20,10 @@ coverage run -p --source=emerging_optimizers tests/test_soap_functions.py
 coverage run -p --source=emerging_optimizers tests/test_soap_utils.py
 coverage run -p --source=emerging_optimizers tests/soap_smoke_test.py
 coverage run -p --source=emerging_optimizers tests/soap_mnist_test.py
+<<<<<<< HEAD
 coverage run -p --source=emerging_optimizers tests/test_scalar_optimizers.py
-coverage run -p --source=emerging_optimizers tests/test_spectral_clipping_utils.py
\ No newline at end of file
+coverage run -p --source=emerging_optimizers tests/test_spectral_clipping_utils.py
+=======
+coverage run -p --source=emerging_optimizers tests/test_scalar_optimizers.py --device=auto
+
+>>>>>>> 69f98db (added cpu and gpu flag)
diff --git a/tests/ci/L1_Tests_GPU.sh b/tests/ci/L1_Tests_GPU.sh
index 7af079e..add9c89 100644
--- a/tests/ci/L1_Tests_GPU.sh
+++ b/tests/ci/L1_Tests_GPU.sh
@@ -18,5 +18,5 @@ python tests/test_orthogonalized_optimizer.py
 python tests/test_soap_functions.py
 python tests/test_soap_utils.py
 python tests/soap_smoke_test.py
-python tests/test_scalar_optimizers.py
-python tests/test_spectral_clipping_utils.py
\ No newline at end of file
+python tests/test_scalar_optimizers.py --device=cuda
+python tests/test_spectral_clipping_utils.py
diff --git a/tests/test_scalar_optimizers.py b/tests/test_scalar_optimizers.py
index 07a6f88..85aad6d 100644
--- a/tests/test_scalar_optimizers.py
+++ b/tests/test_scalar_optimizers.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import torch
+from absl import flags
 from absl.testing import absltest, parameterized
 
 from emerging_optimizers.scalar_optimizers import (
@@ -23,15 +24,43 @@
 )
 
 
+# Define command line flags
+flags.DEFINE_string("device", "cpu", "Device to run tests on: 'cpu', 'cuda', or 'auto'")
+flags.DEFINE_integer("seed", 42, "Random seed for reproducible tests")
+flags.DEFINE_boolean("skip_gpu_tests", False, "Skip GPU tests even if CUDA is available")
+
+FLAGS = flags.FLAGS
+
+
 # Base class for tests requiring seeding for determinism
 class BaseTestCase(parameterized.TestCase):
     def setUp(self):
-        """Set random seed before each test."""
-        # Set seed for PyTorch
-        torch.manual_seed(42)
+        """Set random seed and device before each test."""
+        # Set seed for PyTorch (using seed from flags)
+        torch.manual_seed(FLAGS.seed)
         # Set seed for CUDA if available
         if torch.cuda.is_available():
-            torch.cuda.manual_seed_all(42)
+            torch.cuda.manual_seed_all(FLAGS.seed)
+
+        # Set up device based on flags
+        self.device = self._get_test_device()
+
+    def _get_test_device(self):
+        """Get the device to use for testing based on flags."""
+        if FLAGS.device == "auto":
+            return "cuda" if torch.cuda.is_available() and not FLAGS.skip_gpu_tests else "cpu"
+        elif FLAGS.device == "cuda":
+            if not torch.cuda.is_available():
+                self.skipTest("CUDA not available")
+            if FLAGS.skip_gpu_tests:
+                self.skipTest("GPU tests skipped by flag")
+            return "cuda"
+        else:
+            return "cpu"
+
+    def _move_to_device(self, *tensors):
+        """Helper method to move tensors to the test device."""
+        return tuple(tensor.to(self.device) for tensor in tensors)
 
 
 class ScalarOptimizerTest(BaseTestCase):
@@ -39,6 +68,10 @@ def test_calculate_adam_update_simple(self) -> None:
         exp_avg_initial = torch.tensor([[1.0]])
         exp_avg_sq_initial = torch.tensor([[2.0]])
         grad = torch.tensor([[0.5]])
+
+        # Move tensors to the test device
+        exp_avg_initial, exp_avg_sq_initial, grad = self._move_to_device(exp_avg_initial, exp_avg_sq_initial, grad)
+
         betas = (0.9, 0.99)
         eps = 1e-8
         step = 10
@@ -59,7 +92,7 @@ def test_calculate_adam_update_simple(self) -> None:
             eps=eps,
         )
 
-        initial_param_val_tensor = torch.tensor([[10.0]])
+        initial_param_val_tensor = torch.tensor([[10.0]]).to(self.device)
         param = torch.nn.Parameter(initial_param_val_tensor.clone())
         param.grad = grad.clone()
 
@@ -249,6 +282,23 @@ def test_calculate_sim_ademamix_update_with_zero_momentum_and_alpha_equals_rmspr
         expected_param_val_after_step = initial_param_val_tensor - lr * sim_ademamix_update
         torch.testing.assert_close(param.data, expected_param_val_after_step, atol=1e-6, rtol=1e-6)
 
+    def test_device_functionality(self) -> None:
+        """Test that tensors are correctly moved to the specified device."""
+        # Create test tensors
+        tensor1 = torch.tensor([1.0, 2.0, 3.0])
+        tensor2 = torch.tensor([[1.0], [2.0]])
+
+        # Move to test device
+        tensor1_device, tensor2_device = self._move_to_device(tensor1, tensor2)
+
+        # Verify they are on the correct device
+        self.assertEqual(str(tensor1_device.device), self.device)
+        self.assertEqual(str(tensor2_device.device), self.device)
+
+        # Verify values are preserved
+        torch.testing.assert_close(tensor1_device.cpu(), tensor1, atol=1e-6, rtol=1e-6)
+        torch.testing.assert_close(tensor2_device.cpu(), tensor2, atol=1e-6, rtol=1e-6)
+
 
 if __name__ == "__main__":
     absltest.main()

From 438c6c928cd7d2fc639e25674c81af7e4a13a112 Mon Sep 17 00:00:00 2001
From: mikail <mkhona@nvidia.com>
Date: Fri, 3 Oct 2025 17:22:29 -0700
Subject: [PATCH 2/7] removed debugging test for device

Signed-off-by: mikail <mkhona@nvidia.com>
---
 tests/test_scalar_optimizers.py | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/tests/test_scalar_optimizers.py b/tests/test_scalar_optimizers.py
index 85aad6d..7c326f4 100644
--- a/tests/test_scalar_optimizers.py
+++ b/tests/test_scalar_optimizers.py
@@ -282,23 +282,6 @@ def test_calculate_sim_ademamix_update_with_zero_momentum_and_alpha_equals_rmspr
         expected_param_val_after_step = initial_param_val_tensor - lr * sim_ademamix_update
         torch.testing.assert_close(param.data, expected_param_val_after_step, atol=1e-6, rtol=1e-6)
 
-    def test_device_functionality(self) -> None:
-        """Test that tensors are correctly moved to the specified device."""
-        # Create test tensors
-        tensor1 = torch.tensor([1.0, 2.0, 3.0])
-        tensor2 = torch.tensor([[1.0], [2.0]])
-
-        # Move to test device
-        tensor1_device, tensor2_device = self._move_to_device(tensor1, tensor2)
-
-        # Verify they are on the correct device
-        self.assertEqual(str(tensor1_device.device), self.device)
-        self.assertEqual(str(tensor2_device.device), self.device)
-
-        # Verify values are preserved
-        torch.testing.assert_close(tensor1_device.cpu(), tensor1, atol=1e-6, rtol=1e-6)
-        torch.testing.assert_close(tensor2_device.cpu(), tensor2, atol=1e-6, rtol=1e-6)
-
 
 if __name__ == "__main__":
     absltest.main()

From c1b201b57b9906aea8a70ca8ce5cd23a0bc84c4c Mon Sep 17 00:00:00 2001
From: mikail <mkhona@nvidia.com>
Date: Mon, 6 Oct 2025 08:44:21 -0700
Subject: [PATCH 3/7] addressed MR

Signed-off-by: mikail <mkhona@nvidia.com>
---
 tests/test_scalar_optimizers.py | 33 +++++++--------------------------
 1 file changed, 7 insertions(+), 26 deletions(-)

diff --git a/tests/test_scalar_optimizers.py b/tests/test_scalar_optimizers.py
index 7c326f4..8fe3ae2 100644
--- a/tests/test_scalar_optimizers.py
+++ b/tests/test_scalar_optimizers.py
@@ -25,15 +25,13 @@
 
 
 # Define command line flags
-flags.DEFINE_string("device", "cpu", "Device to run tests on: 'cpu', 'cuda', or 'auto'")
+flags.DEFINE_string("device", "cpu", "Device to run tests on: 'cpu' or 'cuda'")
 flags.DEFINE_integer("seed", 42, "Random seed for reproducible tests")
-flags.DEFINE_boolean("skip_gpu_tests", False, "Skip GPU tests even if CUDA is available")
 
 FLAGS = flags.FLAGS
 
 
-# Base class for tests requiring seeding for determinism
-class BaseTestCase(parameterized.TestCase):
+class ScalarOptimizerTest(parameterized.TestCase):
     def setUp(self):
         """Set random seed and device before each test."""
         # Set seed for PyTorch (using seed from flags)
@@ -43,34 +41,17 @@ def setUp(self):
             torch.cuda.manual_seed_all(FLAGS.seed)
 
         # Set up device based on flags
-        self.device = self._get_test_device()
-
-    def _get_test_device(self):
-        """Get the device to use for testing based on flags."""
-        if FLAGS.device == "auto":
-            return "cuda" if torch.cuda.is_available() and not FLAGS.skip_gpu_tests else "cpu"
-        elif FLAGS.device == "cuda":
-            if not torch.cuda.is_available():
-                self.skipTest("CUDA not available")
-            if FLAGS.skip_gpu_tests:
-                self.skipTest("GPU tests skipped by flag")
-            return "cuda"
-        else:
-            return "cpu"
-
-    def _move_to_device(self, *tensors):
-        """Helper method to move tensors to the test device."""
-        return tuple(tensor.to(self.device) for tensor in tensors)
-
-
-class ScalarOptimizerTest(BaseTestCase):
+        self.device = FLAGS.device
+
     def test_calculate_adam_update_simple(self) -> None:
         exp_avg_initial = torch.tensor([[1.0]])
         exp_avg_sq_initial = torch.tensor([[2.0]])
         grad = torch.tensor([[0.5]])
 
         # Move tensors to the test device
-        exp_avg_initial, exp_avg_sq_initial, grad = self._move_to_device(exp_avg_initial, exp_avg_sq_initial, grad)
+        exp_avg_initial = exp_avg_initial.to(self.device)
+        exp_avg_sq_initial = exp_avg_sq_initial.to(self.device)
+        grad = grad.to(self.device)
 
         betas = (0.9, 0.99)
         eps = 1e-8

From 979f24bf1ad25f0415437b4cbe2602cef79c6801 Mon Sep 17 00:00:00 2001
From: mikail <mkhona@nvidia.com>
Date: Mon, 6 Oct 2025 08:47:28 -0700
Subject: [PATCH 4/7] added device flag to all created tensors in test

Signed-off-by: mikail <mkhona@nvidia.com>
---
 tests/test_scalar_optimizers.py | 36 ++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/tests/test_scalar_optimizers.py b/tests/test_scalar_optimizers.py
index 8fe3ae2..8e8244b 100644
--- a/tests/test_scalar_optimizers.py
+++ b/tests/test_scalar_optimizers.py
@@ -44,9 +44,9 @@ def setUp(self):
         self.device = FLAGS.device
 
     def test_calculate_adam_update_simple(self) -> None:
-        exp_avg_initial = torch.tensor([[1.0]])
-        exp_avg_sq_initial = torch.tensor([[2.0]])
-        grad = torch.tensor([[0.5]])
+        exp_avg_initial = torch.tensor([[1.0]], device=self.device)
+        exp_avg_sq_initial = torch.tensor([[2.0]], device=self.device)
+        grad = torch.tensor([[0.5]], device=self.device)
 
         # Move tensors to the test device
         exp_avg_initial = exp_avg_initial.to(self.device)
@@ -87,7 +87,7 @@ def test_calculate_adam_update_simple(self) -> None:
         )
 
         # Manually set Adam's internal state to match conditions *before* the current update
-        adam_optimizer.state[param]["step"] = torch.tensor(float(step - 1))
+        adam_optimizer.state[param]["step"] = torch.tensor(float(step - 1), device=self.device)
         adam_optimizer.state[param]["exp_avg"] = exp_avg_initial.clone()
         adam_optimizer.state[param]["exp_avg_sq"] = exp_avg_sq_initial.clone()
 
@@ -110,9 +110,9 @@ def test_calculate_adam_update_simple(self) -> None:
 
     def test_calculate_laprop_update_with_zero_momentum_equals_rmsprop(self) -> None:
         # LaProp with momentum (beta1) = 0 should be equivalent to RMSProp.
-        exp_avg_initial = torch.tensor([[0.0]])  # Momentum is 0, so exp_avg starts at 0
-        exp_avg_sq_initial = torch.tensor([[2.0]])
-        grad = torch.tensor([[0.5]])
+        exp_avg_initial = torch.tensor([[0.0]], device=self.device)  # Momentum is 0, so exp_avg starts at 0
+        exp_avg_sq_initial = torch.tensor([[2.0]], device=self.device)
+        grad = torch.tensor([[0.5]], device=self.device)
         betas = (0.0, 0.99)  # beta1=0 for momentum
         eps = 1e-8
         step = 10
@@ -133,7 +133,7 @@ def test_calculate_laprop_update_with_zero_momentum_equals_rmsprop(self) -> None
         )
 
         # Manually verify with RMSProp logic
-        initial_param_val_tensor = torch.tensor([[10.0]])
+        initial_param_val_tensor = torch.tensor([[10.0]], device=self.device)
         param = torch.nn.Parameter(initial_param_val_tensor.clone())
         param.grad = grad.clone()
 
@@ -148,7 +148,7 @@ def test_calculate_laprop_update_with_zero_momentum_equals_rmsprop(self) -> None
         )
 
         # Manually set RMSProp's internal state
-        rmsprop_optimizer.state[param]["step"] = torch.tensor(float(step))
+        rmsprop_optimizer.state[param]["step"] = torch.tensor(float(step), device=self.device)
         rmsprop_optimizer.state[param]["square_avg"] = exp_avg_sq_initial.clone()
         rmsprop_optimizer.state[param]["momentum_buffer"] = exp_avg_initial.clone()
 
@@ -164,10 +164,10 @@ def test_calculate_laprop_update_with_zero_momentum_equals_rmsprop(self) -> None
 
     def test_calculate_ademamix_update_with_alpha_zero_equals_adam(self) -> None:
         # AdEMAMix with alpha=0 and no beta scheduling should be equivalent to Adam.
-        exp_avg_fast_initial = torch.tensor([[1.0]])
-        exp_avg_slow_initial = torch.tensor([[1.0]])
-        exp_avg_sq_initial = torch.tensor([[2.0]])
-        grad = torch.tensor([[0.5]])
+        exp_avg_fast_initial = torch.tensor([[1.0]], device=self.device)
+        exp_avg_slow_initial = torch.tensor([[1.0]], device=self.device)
+        exp_avg_sq_initial = torch.tensor([[2.0]], device=self.device)
+        grad = torch.tensor([[0.5]], device=self.device)
         betas = (0.9, 0.99, 0.999)
         eps = 1e-8
         step = 10
@@ -209,9 +209,9 @@ def test_calculate_ademamix_update_with_alpha_zero_equals_adam(self) -> None:
 
     def test_calculate_sim_ademamix_update_with_zero_momentum_and_alpha_equals_rmsprop(self) -> None:
         # sim_ademamix with momentum (beta_fast) = 0 and alpha = 0 should be equivalent to RMSProp.
-        exp_avg_initial = torch.tensor([[0.0]])  # Momentum is 0, so exp_avg starts at 0
-        exp_avg_sq_initial = torch.tensor([[2.0]])
-        grad = torch.tensor([[0.5]])
+        exp_avg_initial = torch.tensor([[0.0]], device=self.device)  # Momentum is 0, so exp_avg starts at 0
+        exp_avg_sq_initial = torch.tensor([[2.0]], device=self.device)
+        grad = torch.tensor([[0.5]], device=self.device)
         betas = (0.0, 0.99)  # beta1=0 for momentum
         eps = 1e-8
         step = 10
@@ -235,7 +235,7 @@ def test_calculate_sim_ademamix_update_with_zero_momentum_and_alpha_equals_rmspr
         )
 
         # Manually verify with RMSProp logic
-        initial_param_val_tensor = torch.tensor([[10.0]])
+        initial_param_val_tensor = torch.tensor([[10.0]], device=self.device)
         param = torch.nn.Parameter(initial_param_val_tensor.clone())
         param.grad = grad.clone()
 
@@ -250,7 +250,7 @@ def test_calculate_sim_ademamix_update_with_zero_momentum_and_alpha_equals_rmspr
         )
 
         # Manually set RMSProp's internal state
-        rmsprop_optimizer.state[param]["step"] = torch.tensor(float(step))
+        rmsprop_optimizer.state[param]["step"] = torch.tensor(float(step), device=self.device)
         rmsprop_optimizer.state[param]["square_avg"] = exp_avg_sq_initial.clone()
 
         rmsprop_optimizer.step()

From 43d8ccdd936095012a185c92670f680e070479a5 Mon Sep 17 00:00:00 2001
From: mikail <mkhona@nvidia.com>
Date: Mon, 6 Oct 2025 08:48:45 -0700
Subject: [PATCH 5/7] added explicit device flag for gpu, removed auto, in test

Signed-off-by: mikail <mkhona@nvidia.com>
---
 tests/ci/L0_Tests_GPU.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/ci/L0_Tests_GPU.sh b/tests/ci/L0_Tests_GPU.sh
index 3d53227..2d5c0f6 100644
--- a/tests/ci/L0_Tests_GPU.sh
+++ b/tests/ci/L0_Tests_GPU.sh
@@ -21,9 +21,13 @@ coverage run -p --source=emerging_optimizers tests/test_soap_utils.py
 coverage run -p --source=emerging_optimizers tests/soap_smoke_test.py
 coverage run -p --source=emerging_optimizers tests/soap_mnist_test.py
 <<<<<<< HEAD
+<<<<<<< HEAD
 coverage run -p --source=emerging_optimizers tests/test_scalar_optimizers.py
 coverage run -p --source=emerging_optimizers tests/test_spectral_clipping_utils.py
 =======
 coverage run -p --source=emerging_optimizers tests/test_scalar_optimizers.py --device=auto
+=======
+coverage run -p --source=emerging_optimizers tests/test_scalar_optimizers.py --device=cuda
+>>>>>>> b5f68de (added explicit device flag for gpu, removed auto, in test)
 
 >>>>>>> 69f98db (added cpu and gpu flag)

From 52a643d873a3e8f9dd8dad026bc8ea2fbcf0f321 Mon Sep 17 00:00:00 2001
From: mikail <mkhona@nvidia.com>
Date: Mon, 6 Oct 2025 09:33:33 -0700
Subject: [PATCH 6/7] fixed L0 test file conflict

Signed-off-by: mikail <mkhona@nvidia.com>
---
 tests/ci/L0_Tests_GPU.sh | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/tests/ci/L0_Tests_GPU.sh b/tests/ci/L0_Tests_GPU.sh
index 2d5c0f6..d8dc719 100644
--- a/tests/ci/L0_Tests_GPU.sh
+++ b/tests/ci/L0_Tests_GPU.sh
@@ -20,14 +20,5 @@ coverage run -p --source=emerging_optimizers tests/test_soap_functions.py
 coverage run -p --source=emerging_optimizers tests/test_soap_utils.py
 coverage run -p --source=emerging_optimizers tests/soap_smoke_test.py
 coverage run -p --source=emerging_optimizers tests/soap_mnist_test.py
-<<<<<<< HEAD
-<<<<<<< HEAD
-coverage run -p --source=emerging_optimizers tests/test_scalar_optimizers.py
-coverage run -p --source=emerging_optimizers tests/test_spectral_clipping_utils.py
-=======
-coverage run -p --source=emerging_optimizers tests/test_scalar_optimizers.py --device=auto
-=======
 coverage run -p --source=emerging_optimizers tests/test_scalar_optimizers.py --device=cuda
->>>>>>> b5f68de (added explicit device flag for gpu, removed auto, in test)
-
->>>>>>> 69f98db (added cpu and gpu flag)
+coverage run -p --source=emerging_optimizers tests/test_spectral_clipping_utils.pyg
\ No newline at end of file

From b7fd64379b48d352359e1b4bf10e128aa7d3886b Mon Sep 17 00:00:00 2001
From: mikail <mkhona@nvidia.com>
Date: Mon, 6 Oct 2025 09:34:17 -0700
Subject: [PATCH 7/7] fixed typo

Signed-off-by: mikail <mkhona@nvidia.com>
---
 tests/ci/L0_Tests_GPU.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/L0_Tests_GPU.sh b/tests/ci/L0_Tests_GPU.sh
index d8dc719..9d80bcf 100644
--- a/tests/ci/L0_Tests_GPU.sh
+++ b/tests/ci/L0_Tests_GPU.sh
@@ -21,4 +21,4 @@ coverage run -p --source=emerging_optimizers tests/test_soap_utils.py
 coverage run -p --source=emerging_optimizers tests/soap_smoke_test.py
 coverage run -p --source=emerging_optimizers tests/soap_mnist_test.py
 coverage run -p --source=emerging_optimizers tests/test_scalar_optimizers.py --device=cuda
-coverage run -p --source=emerging_optimizers tests/test_spectral_clipping_utils.pyg
\ No newline at end of file
+coverage run -p --source=emerging_optimizers tests/test_spectral_clipping_utils.py
\ No newline at end of file