From 3eb1b92e7c12fb4019eaf6ddb80258f8a6786fdf Mon Sep 17 00:00:00 2001 From: mikail Date: Mon, 6 Oct 2025 09:23:14 -0700 Subject: [PATCH 1/7] resolved conflicts Signed-off-by: mikail --- tests/ci/L0_Tests_CPU.sh | 2 ++ tests/ci/L0_Tests_GPU.sh | 7 +++- tests/ci/L1_Tests_GPU.sh | 4 +-- tests/test_scalar_optimizers.py | 60 ++++++++++++++++++++++++++++++--- 4 files changed, 65 insertions(+), 8 deletions(-) diff --git a/tests/ci/L0_Tests_CPU.sh b/tests/ci/L0_Tests_CPU.sh index afd9f1a..594773f 100644 --- a/tests/ci/L0_Tests_CPU.sh +++ b/tests/ci/L0_Tests_CPU.sh @@ -15,3 +15,5 @@ export TORCH_COMPILE_DISABLE=1 set -o pipefail torchrun --nproc_per_node=8 --no-python coverage run -p tests/test_distributed_muon_utils_cpu.py torchrun --nproc_per_node=4 --no-python coverage run -p tests/test_distributed_muon_utils_cpu.py +coverage run -p --source=emerging_optimizers tests/test_scalar_optimizers.py --device=cpu + diff --git a/tests/ci/L0_Tests_GPU.sh b/tests/ci/L0_Tests_GPU.sh index bd1a818..3d53227 100644 --- a/tests/ci/L0_Tests_GPU.sh +++ b/tests/ci/L0_Tests_GPU.sh @@ -20,5 +20,10 @@ coverage run -p --source=emerging_optimizers tests/test_soap_functions.py coverage run -p --source=emerging_optimizers tests/test_soap_utils.py coverage run -p --source=emerging_optimizers tests/soap_smoke_test.py coverage run -p --source=emerging_optimizers tests/soap_mnist_test.py +<<<<<<< HEAD coverage run -p --source=emerging_optimizers tests/test_scalar_optimizers.py -coverage run -p --source=emerging_optimizers tests/test_spectral_clipping_utils.py \ No newline at end of file +coverage run -p --source=emerging_optimizers tests/test_spectral_clipping_utils.py +======= +coverage run -p --source=emerging_optimizers tests/test_scalar_optimizers.py --device=auto + +>>>>>>> 69f98db (added cpu and gpu flag) diff --git a/tests/ci/L1_Tests_GPU.sh b/tests/ci/L1_Tests_GPU.sh index 7af079e..add9c89 100644 --- a/tests/ci/L1_Tests_GPU.sh +++ b/tests/ci/L1_Tests_GPU.sh @@ -18,5 +18,5 @@ python tests/test_orthogonalized_optimizer.py python tests/test_soap_functions.py python tests/test_soap_utils.py python tests/soap_smoke_test.py -python tests/test_scalar_optimizers.py -python tests/test_spectral_clipping_utils.py \ No newline at end of file +python tests/test_scalar_optimizers.py --device=cuda +python tests/test_spectral_clipping_utils.py diff --git a/tests/test_scalar_optimizers.py b/tests/test_scalar_optimizers.py index 07a6f88..85aad6d 100644 --- a/tests/test_scalar_optimizers.py +++ b/tests/test_scalar_optimizers.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import torch +from absl import flags from absl.testing import absltest, parameterized from emerging_optimizers.scalar_optimizers import ( @@ -23,15 +24,43 @@ ) +# Define command line flags +flags.DEFINE_string("device", "cpu", "Device to run tests on: 'cpu', 'cuda', or 'auto'") +flags.DEFINE_integer("seed", 42, "Random seed for reproducible tests") +flags.DEFINE_boolean("skip_gpu_tests", False, "Skip GPU tests even if CUDA is available") + +FLAGS = flags.FLAGS + + # Base class for tests requiring seeding for determinism class BaseTestCase(parameterized.TestCase): def setUp(self): - """Set random seed before each test.""" - # Set seed for PyTorch - torch.manual_seed(42) + """Set random seed and device before each test.""" + # Set seed for PyTorch (using seed from flags) + torch.manual_seed(FLAGS.seed) # Set seed for CUDA if available if torch.cuda.is_available(): - torch.cuda.manual_seed_all(42) + torch.cuda.manual_seed_all(FLAGS.seed) + + # Set up device based on flags + self.device = self._get_test_device() + + def _get_test_device(self): + """Get the device to use for testing based on flags.""" + if FLAGS.device == "auto": + return "cuda" if torch.cuda.is_available() and not FLAGS.skip_gpu_tests else "cpu" + elif FLAGS.device == "cuda": + if not torch.cuda.is_available(): + self.skipTest("CUDA not available") + if FLAGS.skip_gpu_tests: + self.skipTest("GPU tests skipped by flag") + return "cuda" + else: + return "cpu" + + def _move_to_device(self, *tensors): + """Helper method to move tensors to the test device.""" + return tuple(tensor.to(self.device) for tensor in tensors) class ScalarOptimizerTest(BaseTestCase): @@ -39,6 +68,10 @@ def test_calculate_adam_update_simple(self) -> None: exp_avg_initial = torch.tensor([[1.0]]) exp_avg_sq_initial = torch.tensor([[2.0]]) grad = torch.tensor([[0.5]]) + + # Move tensors to the test device + exp_avg_initial, exp_avg_sq_initial, grad = self._move_to_device(exp_avg_initial, exp_avg_sq_initial, grad) + betas = (0.9, 0.99) eps = 1e-8 step = 10 @@ -59,7 +92,7 @@ def test_calculate_adam_update_simple(self) -> None: eps=eps, ) - initial_param_val_tensor = torch.tensor([[10.0]]) + initial_param_val_tensor = torch.tensor([[10.0]]).to(self.device) param = torch.nn.Parameter(initial_param_val_tensor.clone()) param.grad = grad.clone() @@ -249,6 +282,23 @@ def test_calculate_sim_ademamix_update_with_zero_momentum_and_alpha_equals_rmspr expected_param_val_after_step = initial_param_val_tensor - lr * sim_ademamix_update torch.testing.assert_close(param.data, expected_param_val_after_step, atol=1e-6, rtol=1e-6) + def test_device_functionality(self) -> None: + """Test that tensors are correctly moved to the specified device.""" + # Create test tensors + tensor1 = torch.tensor([1.0, 2.0, 3.0]) + tensor2 = torch.tensor([[1.0], [2.0]]) + + # Move to test device + tensor1_device, tensor2_device = self._move_to_device(tensor1, tensor2) + + # Verify they are on the correct device + self.assertEqual(str(tensor1_device.device), self.device) + self.assertEqual(str(tensor2_device.device), self.device) + + # Verify values are preserved + torch.testing.assert_close(tensor1_device.cpu(), tensor1, atol=1e-6, rtol=1e-6) + torch.testing.assert_close(tensor2_device.cpu(), tensor2, atol=1e-6, rtol=1e-6) + if __name__ == "__main__": absltest.main() From 438c6c928cd7d2fc639e25674c81af7e4a13a112 Mon Sep 17 00:00:00 2001 From: mikail Date: Fri, 3 Oct 2025 17:22:29 -0700 Subject: [PATCH 2/7] removed debugging test for device Signed-off-by: mikail --- tests/test_scalar_optimizers.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/tests/test_scalar_optimizers.py b/tests/test_scalar_optimizers.py index 85aad6d..7c326f4 100644 --- a/tests/test_scalar_optimizers.py +++ b/tests/test_scalar_optimizers.py @@ -282,23 +282,6 @@ def test_calculate_sim_ademamix_update_with_zero_momentum_and_alpha_equals_rmspr expected_param_val_after_step = initial_param_val_tensor - lr * sim_ademamix_update torch.testing.assert_close(param.data, expected_param_val_after_step, atol=1e-6, rtol=1e-6) - def test_device_functionality(self) -> None: - """Test that tensors are correctly moved to the specified device.""" - # Create test tensors - tensor1 = torch.tensor([1.0, 2.0, 3.0]) - tensor2 = torch.tensor([[1.0], [2.0]]) - - # Move to test device - tensor1_device, tensor2_device = self._move_to_device(tensor1, tensor2) - - # Verify they are on the correct device - self.assertEqual(str(tensor1_device.device), self.device) - self.assertEqual(str(tensor2_device.device), self.device) - - # Verify values are preserved - torch.testing.assert_close(tensor1_device.cpu(), tensor1, atol=1e-6, rtol=1e-6) - torch.testing.assert_close(tensor2_device.cpu(), tensor2, atol=1e-6, rtol=1e-6) - if __name__ == "__main__": absltest.main() From c1b201b57b9906aea8a70ca8ce5cd23a0bc84c4c Mon Sep 17 00:00:00 2001 From: mikail Date: Mon, 6 Oct 2025 08:44:21 -0700 Subject: [PATCH 3/7] addressed MR Signed-off-by: mikail --- tests/test_scalar_optimizers.py | 33 +++++++-------------------------- 1 file changed, 7 insertions(+), 26 deletions(-) diff --git a/tests/test_scalar_optimizers.py b/tests/test_scalar_optimizers.py index 7c326f4..8fe3ae2 100644 --- a/tests/test_scalar_optimizers.py +++ b/tests/test_scalar_optimizers.py @@ -25,15 +25,13 @@ # Define command line flags -flags.DEFINE_string("device", "cpu", "Device to run tests on: 'cpu', 'cuda', or 'auto'") +flags.DEFINE_string("device", "cpu", "Device to run tests on: 'cpu' or 'cuda'") flags.DEFINE_integer("seed", 42, "Random seed for reproducible tests") -flags.DEFINE_boolean("skip_gpu_tests", False, "Skip GPU tests even if CUDA is available") FLAGS = flags.FLAGS -# Base class for tests requiring seeding for determinism -class BaseTestCase(parameterized.TestCase): +class ScalarOptimizerTest(parameterized.TestCase): def setUp(self): """Set random seed and device before each test.""" # Set seed for PyTorch (using seed from flags) @@ -43,34 +41,17 @@ def setUp(self): torch.cuda.manual_seed_all(FLAGS.seed) # Set up device based on flags - self.device = self._get_test_device() - - def _get_test_device(self): - """Get the device to use for testing based on flags.""" - if FLAGS.device == "auto": - return "cuda" if torch.cuda.is_available() and not FLAGS.skip_gpu_tests else "cpu" - elif FLAGS.device == "cuda": - if not torch.cuda.is_available(): - self.skipTest("CUDA not available") - if FLAGS.skip_gpu_tests: - self.skipTest("GPU tests skipped by flag") - return "cuda" - else: - return "cpu" - - def _move_to_device(self, *tensors): - """Helper method to move tensors to the test device.""" - return tuple(tensor.to(self.device) for tensor in tensors) - - -class ScalarOptimizerTest(BaseTestCase): + self.device = FLAGS.device + def test_calculate_adam_update_simple(self) -> None: exp_avg_initial = torch.tensor([[1.0]]) exp_avg_sq_initial = torch.tensor([[2.0]]) grad = torch.tensor([[0.5]]) # Move tensors to the test device - exp_avg_initial, exp_avg_sq_initial, grad = self._move_to_device(exp_avg_initial, exp_avg_sq_initial, grad) + exp_avg_initial = exp_avg_initial.to(self.device) + exp_avg_sq_initial = exp_avg_sq_initial.to(self.device) + grad = grad.to(self.device) betas = (0.9, 0.99) eps = 1e-8 From 979f24bf1ad25f0415437b4cbe2602cef79c6801 Mon Sep 17 00:00:00 2001 From: mikail Date: Mon, 6 Oct 2025 08:47:28 -0700 Subject: [PATCH 4/7] added device flag to all created tensors in test Signed-off-by: mikail --- tests/test_scalar_optimizers.py | 36 ++++++++++++++++----------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tests/test_scalar_optimizers.py b/tests/test_scalar_optimizers.py index 8fe3ae2..8e8244b 100644 --- a/tests/test_scalar_optimizers.py +++ b/tests/test_scalar_optimizers.py @@ -44,9 +44,9 @@ def setUp(self): self.device = FLAGS.device def test_calculate_adam_update_simple(self) -> None: - exp_avg_initial = torch.tensor([[1.0]]) - exp_avg_sq_initial = torch.tensor([[2.0]]) - grad = torch.tensor([[0.5]]) + exp_avg_initial = torch.tensor([[1.0]], device=self.device) + exp_avg_sq_initial = torch.tensor([[2.0]], device=self.device) + grad = torch.tensor([[0.5]], device=self.device) # Move tensors to the test device exp_avg_initial = exp_avg_initial.to(self.device) @@ -87,7 +87,7 @@ def test_calculate_adam_update_simple(self) -> None: ) # Manually set Adam's internal state to match conditions *before* the current update - adam_optimizer.state[param]["step"] = torch.tensor(float(step - 1)) + adam_optimizer.state[param]["step"] = torch.tensor(float(step - 1), device=self.device) adam_optimizer.state[param]["exp_avg"] = exp_avg_initial.clone() adam_optimizer.state[param]["exp_avg_sq"] = exp_avg_sq_initial.clone() @@ -110,9 +110,9 @@ def test_calculate_adam_update_simple(self) -> None: def test_calculate_laprop_update_with_zero_momentum_equals_rmsprop(self) -> None: # LaProp with momentum (beta1) = 0 should be equivalent to RMSProp. - exp_avg_initial = torch.tensor([[0.0]]) # Momentum is 0, so exp_avg starts at 0 - exp_avg_sq_initial = torch.tensor([[2.0]]) - grad = torch.tensor([[0.5]]) + exp_avg_initial = torch.tensor([[0.0]], device=self.device) # Momentum is 0, so exp_avg starts at 0 + exp_avg_sq_initial = torch.tensor([[2.0]], device=self.device) + grad = torch.tensor([[0.5]], device=self.device) betas = (0.0, 0.99) # beta1=0 for momentum eps = 1e-8 step = 10 @@ -133,7 +133,7 @@ def test_calculate_laprop_update_with_zero_momentum_equals_rmsprop(self) -> None ) # Manually verify with RMSProp logic - initial_param_val_tensor = torch.tensor([[10.0]]) + initial_param_val_tensor = torch.tensor([[10.0]], device=self.device) param = torch.nn.Parameter(initial_param_val_tensor.clone()) param.grad = grad.clone() @@ -148,7 +148,7 @@ def test_calculate_laprop_update_with_zero_momentum_equals_rmsprop(self) -> None ) # Manually set RMSProp's internal state - rmsprop_optimizer.state[param]["step"] = torch.tensor(float(step)) + rmsprop_optimizer.state[param]["step"] = torch.tensor(float(step), device=self.device) rmsprop_optimizer.state[param]["square_avg"] = exp_avg_sq_initial.clone() rmsprop_optimizer.state[param]["momentum_buffer"] = exp_avg_initial.clone() @@ -164,10 +164,10 @@ def test_calculate_laprop_update_with_zero_momentum_equals_rmsprop(self) -> None def test_calculate_ademamix_update_with_alpha_zero_equals_adam(self) -> None: # AdEMAMix with alpha=0 and no beta scheduling should be equivalent to Adam. - exp_avg_fast_initial = torch.tensor([[1.0]]) - exp_avg_slow_initial = torch.tensor([[1.0]]) - exp_avg_sq_initial = torch.tensor([[2.0]]) - grad = torch.tensor([[0.5]]) + exp_avg_fast_initial = torch.tensor([[1.0]], device=self.device) + exp_avg_slow_initial = torch.tensor([[1.0]], device=self.device) + exp_avg_sq_initial = torch.tensor([[2.0]], device=self.device) + grad = torch.tensor([[0.5]], device=self.device) betas = (0.9, 0.99, 0.999) eps = 1e-8 step = 10 @@ -209,9 +209,9 @@ def test_calculate_ademamix_update_with_alpha_zero_equals_adam(self) -> None: def test_calculate_sim_ademamix_update_with_zero_momentum_and_alpha_equals_rmsprop(self) -> None: # sim_ademamix with momentum (beta_fast) = 0 and alpha = 0 should be equivalent to RMSProp. - exp_avg_initial = torch.tensor([[0.0]]) # Momentum is 0, so exp_avg starts at 0 - exp_avg_sq_initial = torch.tensor([[2.0]]) - grad = torch.tensor([[0.5]]) + exp_avg_initial = torch.tensor([[0.0]], device=self.device) # Momentum is 0, so exp_avg starts at 0 + exp_avg_sq_initial = torch.tensor([[2.0]], device=self.device) + grad = torch.tensor([[0.5]], device=self.device) betas = (0.0, 0.99) # beta1=0 for momentum eps = 1e-8 step = 10 @@ -235,7 +235,7 @@ def test_calculate_sim_ademamix_update_with_zero_momentum_and_alpha_equals_rmspr ) # Manually verify with RMSProp logic - initial_param_val_tensor = torch.tensor([[10.0]]) + initial_param_val_tensor = torch.tensor([[10.0]], device=self.device) param = torch.nn.Parameter(initial_param_val_tensor.clone()) param.grad = grad.clone() @@ -250,7 +250,7 @@ def test_calculate_sim_ademamix_update_with_zero_momentum_and_alpha_equals_rmspr ) # Manually set RMSProp's internal state - rmsprop_optimizer.state[param]["step"] = torch.tensor(float(step)) + rmsprop_optimizer.state[param]["step"] = torch.tensor(float(step), device=self.device) rmsprop_optimizer.state[param]["square_avg"] = exp_avg_sq_initial.clone() rmsprop_optimizer.step() From 43d8ccdd936095012a185c92670f680e070479a5 Mon Sep 17 00:00:00 2001 From: mikail Date: Mon, 6 Oct 2025 08:48:45 -0700 Subject: [PATCH 5/7] added explicit device flag for gpu, removed auto, in test Signed-off-by: mikail --- tests/ci/L0_Tests_GPU.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/ci/L0_Tests_GPU.sh b/tests/ci/L0_Tests_GPU.sh index 3d53227..2d5c0f6 100644 --- a/tests/ci/L0_Tests_GPU.sh +++ b/tests/ci/L0_Tests_GPU.sh @@ -21,9 +21,13 @@ coverage run -p --source=emerging_optimizers tests/test_soap_utils.py coverage run -p --source=emerging_optimizers tests/soap_smoke_test.py coverage run -p --source=emerging_optimizers tests/soap_mnist_test.py <<<<<<< HEAD +<<<<<<< HEAD coverage run -p --source=emerging_optimizers tests/test_scalar_optimizers.py coverage run -p --source=emerging_optimizers tests/test_spectral_clipping_utils.py ======= coverage run -p --source=emerging_optimizers tests/test_scalar_optimizers.py --device=auto +======= +coverage run -p --source=emerging_optimizers tests/test_scalar_optimizers.py --device=cuda +>>>>>>> b5f68de (added explicit device flag for gpu, removed auto, in test) >>>>>>> 69f98db (added cpu and gpu flag) From 52a643d873a3e8f9dd8dad026bc8ea2fbcf0f321 Mon Sep 17 00:00:00 2001 From: mikail Date: Mon, 6 Oct 2025 09:33:33 -0700 Subject: [PATCH 6/7] fixed L0 test file conflict Signed-off-by: mikail --- tests/ci/L0_Tests_GPU.sh | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/tests/ci/L0_Tests_GPU.sh b/tests/ci/L0_Tests_GPU.sh index 2d5c0f6..d8dc719 100644 --- a/tests/ci/L0_Tests_GPU.sh +++ b/tests/ci/L0_Tests_GPU.sh @@ -20,14 +20,5 @@ coverage run -p --source=emerging_optimizers tests/test_soap_functions.py coverage run -p --source=emerging_optimizers tests/test_soap_utils.py coverage run -p --source=emerging_optimizers tests/soap_smoke_test.py coverage run -p --source=emerging_optimizers tests/soap_mnist_test.py -<<<<<<< HEAD -<<<<<<< HEAD -coverage run -p --source=emerging_optimizers tests/test_scalar_optimizers.py -coverage run -p --source=emerging_optimizers tests/test_spectral_clipping_utils.py -======= -coverage run -p --source=emerging_optimizers tests/test_scalar_optimizers.py --device=auto -======= coverage run -p --source=emerging_optimizers tests/test_scalar_optimizers.py --device=cuda ->>>>>>> b5f68de (added explicit device flag for gpu, removed auto, in test) - ->>>>>>> 69f98db (added cpu and gpu flag) +coverage run -p --source=emerging_optimizers tests/test_spectral_clipping_utils.pyg \ No newline at end of file From b7fd64379b48d352359e1b4bf10e128aa7d3886b Mon Sep 17 00:00:00 2001 From: mikail Date: Mon, 6 Oct 2025 09:34:17 -0700 Subject: [PATCH 7/7] fixed typo Signed-off-by: mikail --- tests/ci/L0_Tests_GPU.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/L0_Tests_GPU.sh b/tests/ci/L0_Tests_GPU.sh index d8dc719..9d80bcf 100644 --- a/tests/ci/L0_Tests_GPU.sh +++ b/tests/ci/L0_Tests_GPU.sh @@ -21,4 +21,4 @@ coverage run -p --source=emerging_optimizers tests/test_soap_utils.py coverage run -p --source=emerging_optimizers tests/soap_smoke_test.py coverage run -p --source=emerging_optimizers tests/soap_mnist_test.py coverage run -p --source=emerging_optimizers tests/test_scalar_optimizers.py --device=cuda -coverage run -p --source=emerging_optimizers tests/test_spectral_clipping_utils.pyg \ No newline at end of file +coverage run -p --source=emerging_optimizers tests/test_spectral_clipping_utils.py \ No newline at end of file