update: test cases

kozistr · kozistr · commit dd5ade1f3790 · 2023-04-21T21:23:21.000+09:00
diff --git a/tests/constants.py b/tests/constants.py
@@ -129,45 +129,147 @@
     (Ranger, {'lr': 5e-1, 'weight_decay': 1e-3}, 150),
     (Ranger21, {'lr': 5e-1, 'weight_decay': 1e-3, 'num_iterations': 500}, 200),
     (Shampoo, {'lr': 5e-1, 'weight_decay': 1e-3, 'momentum': 0.1}, 10),
-    (ScalableShampoo, {'lr': 1e-1, 'weight_decay': 1e-3, 'start_preconditioning_step': 9, 'graft_type': 0}, 10),
-    (ScalableShampoo, {'lr': 1e-1, 'weight_decay': 1e-3, 'start_preconditioning_step': 9, 'graft_type': 1}, 10),
-    (ScalableShampoo, {'lr': 1e-1, 'weight_decay': 1e-3, 'start_preconditioning_step': 9, 'graft_type': 2}, 10),
-    (ScalableShampoo, {'lr': 1e-2, 'weight_decay': 1e-3, 'start_preconditioning_step': 9, 'graft_type': 3}, 10),
-    (ScalableShampoo, {'lr': 1e-1, 'weight_decay': 1e-3, 'start_preconditioning_step': 9, 'graft_type': 4}, 10),
     (
         ScalableShampoo,
-        {'lr': 1e-1, 'weight_decay': 1e-3, 'start_preconditioning_step': 9, 'pre_conditioner_type': 0},
+        {
+            'lr': 1e-1,
+            'weight_decay': 1e-3,
+            'start_preconditioning_step': 9,
+            'preconditioning_compute_steps': 10,
+            'graft_type': 0,
+        },
         10,
     ),
     (
         ScalableShampoo,
-        {'lr': 1e-1, 'weight_decay': 1e-3, 'start_preconditioning_step': 9, 'pre_conditioner_type': 1},
+        {
+            'lr': 1e-1,
+            'weight_decay': 1e-3,
+            'start_preconditioning_step': 9,
+            'preconditioning_compute_steps': 10,
+            'graft_type': 1,
+        },
         10,
     ),
     (
         ScalableShampoo,
-        {'lr': 1e-1, 'weight_decay': 1e-3, 'start_preconditioning_step': 9, 'pre_conditioner_type': 2},
+        {
+            'lr': 1e-1,
+            'weight_decay': 1e-3,
+            'start_preconditioning_step': 9,
+            'preconditioning_compute_steps': 10,
+            'graft_type': 2,
+        },
         10,
     ),
     (
         ScalableShampoo,
-        {'lr': 1e-1, 'weight_decay': 1e-3, 'start_preconditioning_step': 9, 'inverse_exponent_override': 1},
+        {
+            'lr': 1e-2,
+            'weight_decay': 1e-3,
+            'start_preconditioning_step': 9,
+            'preconditioning_compute_steps': 10,
+            'graft_type': 3,
+        },
         10,
     ),
-    (ScalableShampoo, {'lr': 1e-1, 'weight_decay': 1e-3, 'start_preconditioning_step': 9, 'nesterov': False}, 10),
     (
         ScalableShampoo,
-        {'lr': 1e-1, 'weight_decay': 1e-3, 'start_preconditioning_step': 9, 'decoupled_weight_decay': True},
+        {
+            'lr': 1e-1,
+            'weight_decay': 1e-3,
+            'start_preconditioning_step': 9,
+            'preconditioning_compute_steps': 10,
+            'graft_type': 4,
+        },
         10,
     ),
     (
         ScalableShampoo,
-        {'lr': 1e-0, 'weight_decay': 1e-3, 'start_preconditioning_step': 9, 'decoupled_learning_rate': False},
+        {
+            'lr': 1e-1,
+            'weight_decay': 1e-3,
+            'start_preconditioning_step': 9,
+            'preconditioning_compute_steps': 10,
+            'pre_conditioner_type': 0,
+        },
         10,
     ),
     (
         ScalableShampoo,
-        {'lr': 1e-1, 'weight_decay': 1e-3, 'start_preconditioning_step': 9, 'moving_average_for_momentum': True},
+        {
+            'lr': 1e-1,
+            'weight_decay': 1e-3,
+            'start_preconditioning_step': 9,
+            'preconditioning_compute_steps': 10,
+            'pre_conditioner_type': 1,
+        },
+        10,
+    ),
+    (
+        ScalableShampoo,
+        {
+            'lr': 1e-1,
+            'weight_decay': 1e-3,
+            'start_preconditioning_step': 9,
+            'preconditioning_compute_steps': 10,
+            'pre_conditioner_type': 2,
+        },
+        10,
+    ),
+    (
+        ScalableShampoo,
+        {
+            'lr': 1e-1,
+            'weight_decay': 1e-3,
+            'start_preconditioning_step': 9,
+            'preconditioning_compute_steps': 10,
+            'inverse_exponent_override': 1,
+        },
+        10,
+    ),
+    (
+        ScalableShampoo,
+        {
+            'lr': 1e-1,
+            'weight_decay': 1e-3,
+            'start_preconditioning_step': 9,
+            'preconditioning_compute_steps': 10,
+            'nesterov': False,
+        },
+        10,
+    ),
+    (
+        ScalableShampoo,
+        {
+            'lr': 1e-1,
+            'weight_decay': 1e-3,
+            'start_preconditioning_step': 9,
+            'preconditioning_compute_steps': 10,
+            'decoupled_weight_decay': True,
+        },
+        10,
+    ),
+    (
+        ScalableShampoo,
+        {
+            'lr': 1e-0,
+            'weight_decay': 1e-3,
+            'start_preconditioning_step': 9,
+            'preconditioning_compute_steps': 10,
+            'decoupled_learning_rate': False,
+        },
+        10,
+    ),
+    (
+        ScalableShampoo,
+        {
+            'lr': 1e-1,
+            'weight_decay': 1e-3,
+            'start_preconditioning_step': 9,
+            'preconditioning_compute_steps': 10,
+            'moving_average_for_momentum': True,
+        },
         10,
     ),
     (PNM, {'lr': 3e-1}, 50),