Skip to content

Commit dd5ade1

Browse files
committed
update: test cases
1 parent 44893a5 commit dd5ade1

File tree

1 file changed

+115
-13
lines changed

1 file changed

+115
-13
lines changed

tests/constants.py

Lines changed: 115 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -129,45 +129,147 @@
129129
(Ranger, {'lr': 5e-1, 'weight_decay': 1e-3}, 150),
130130
(Ranger21, {'lr': 5e-1, 'weight_decay': 1e-3, 'num_iterations': 500}, 200),
131131
(Shampoo, {'lr': 5e-1, 'weight_decay': 1e-3, 'momentum': 0.1}, 10),
132-
(ScalableShampoo, {'lr': 1e-1, 'weight_decay': 1e-3, 'start_preconditioning_step': 9, 'graft_type': 0}, 10),
133-
(ScalableShampoo, {'lr': 1e-1, 'weight_decay': 1e-3, 'start_preconditioning_step': 9, 'graft_type': 1}, 10),
134-
(ScalableShampoo, {'lr': 1e-1, 'weight_decay': 1e-3, 'start_preconditioning_step': 9, 'graft_type': 2}, 10),
135-
(ScalableShampoo, {'lr': 1e-2, 'weight_decay': 1e-3, 'start_preconditioning_step': 9, 'graft_type': 3}, 10),
136-
(ScalableShampoo, {'lr': 1e-1, 'weight_decay': 1e-3, 'start_preconditioning_step': 9, 'graft_type': 4}, 10),
137132
(
138133
ScalableShampoo,
139-
{'lr': 1e-1, 'weight_decay': 1e-3, 'start_preconditioning_step': 9, 'pre_conditioner_type': 0},
134+
{
135+
'lr': 1e-1,
136+
'weight_decay': 1e-3,
137+
'start_preconditioning_step': 9,
138+
'preconditioning_compute_steps': 10,
139+
'graft_type': 0,
140+
},
140141
10,
141142
),
142143
(
143144
ScalableShampoo,
144-
{'lr': 1e-1, 'weight_decay': 1e-3, 'start_preconditioning_step': 9, 'pre_conditioner_type': 1},
145+
{
146+
'lr': 1e-1,
147+
'weight_decay': 1e-3,
148+
'start_preconditioning_step': 9,
149+
'preconditioning_compute_steps': 10,
150+
'graft_type': 1,
151+
},
145152
10,
146153
),
147154
(
148155
ScalableShampoo,
149-
{'lr': 1e-1, 'weight_decay': 1e-3, 'start_preconditioning_step': 9, 'pre_conditioner_type': 2},
156+
{
157+
'lr': 1e-1,
158+
'weight_decay': 1e-3,
159+
'start_preconditioning_step': 9,
160+
'preconditioning_compute_steps': 10,
161+
'graft_type': 2,
162+
},
150163
10,
151164
),
152165
(
153166
ScalableShampoo,
154-
{'lr': 1e-1, 'weight_decay': 1e-3, 'start_preconditioning_step': 9, 'inverse_exponent_override': 1},
167+
{
168+
'lr': 1e-2,
169+
'weight_decay': 1e-3,
170+
'start_preconditioning_step': 9,
171+
'preconditioning_compute_steps': 10,
172+
'graft_type': 3,
173+
},
155174
10,
156175
),
157-
(ScalableShampoo, {'lr': 1e-1, 'weight_decay': 1e-3, 'start_preconditioning_step': 9, 'nesterov': False}, 10),
158176
(
159177
ScalableShampoo,
160-
{'lr': 1e-1, 'weight_decay': 1e-3, 'start_preconditioning_step': 9, 'decoupled_weight_decay': True},
178+
{
179+
'lr': 1e-1,
180+
'weight_decay': 1e-3,
181+
'start_preconditioning_step': 9,
182+
'preconditioning_compute_steps': 10,
183+
'graft_type': 4,
184+
},
161185
10,
162186
),
163187
(
164188
ScalableShampoo,
165-
{'lr': 1e-0, 'weight_decay': 1e-3, 'start_preconditioning_step': 9, 'decoupled_learning_rate': False},
189+
{
190+
'lr': 1e-1,
191+
'weight_decay': 1e-3,
192+
'start_preconditioning_step': 9,
193+
'preconditioning_compute_steps': 10,
194+
'pre_conditioner_type': 0,
195+
},
166196
10,
167197
),
168198
(
169199
ScalableShampoo,
170-
{'lr': 1e-1, 'weight_decay': 1e-3, 'start_preconditioning_step': 9, 'moving_average_for_momentum': True},
200+
{
201+
'lr': 1e-1,
202+
'weight_decay': 1e-3,
203+
'start_preconditioning_step': 9,
204+
'preconditioning_compute_steps': 10,
205+
'pre_conditioner_type': 1,
206+
},
207+
10,
208+
),
209+
(
210+
ScalableShampoo,
211+
{
212+
'lr': 1e-1,
213+
'weight_decay': 1e-3,
214+
'start_preconditioning_step': 9,
215+
'preconditioning_compute_steps': 10,
216+
'pre_conditioner_type': 2,
217+
},
218+
10,
219+
),
220+
(
221+
ScalableShampoo,
222+
{
223+
'lr': 1e-1,
224+
'weight_decay': 1e-3,
225+
'start_preconditioning_step': 9,
226+
'preconditioning_compute_steps': 10,
227+
'inverse_exponent_override': 1,
228+
},
229+
10,
230+
),
231+
(
232+
ScalableShampoo,
233+
{
234+
'lr': 1e-1,
235+
'weight_decay': 1e-3,
236+
'start_preconditioning_step': 9,
237+
'preconditioning_compute_steps': 10,
238+
'nesterov': False,
239+
},
240+
10,
241+
),
242+
(
243+
ScalableShampoo,
244+
{
245+
'lr': 1e-1,
246+
'weight_decay': 1e-3,
247+
'start_preconditioning_step': 9,
248+
'preconditioning_compute_steps': 10,
249+
'decoupled_weight_decay': True,
250+
},
251+
10,
252+
),
253+
(
254+
ScalableShampoo,
255+
{
256+
'lr': 1e-0,
257+
'weight_decay': 1e-3,
258+
'start_preconditioning_step': 9,
259+
'preconditioning_compute_steps': 10,
260+
'decoupled_learning_rate': False,
261+
},
262+
10,
263+
),
264+
(
265+
ScalableShampoo,
266+
{
267+
'lr': 1e-1,
268+
'weight_decay': 1e-3,
269+
'start_preconditioning_step': 9,
270+
'preconditioning_compute_steps': 10,
271+
'moving_average_for_momentum': True,
272+
},
171273
10,
172274
),
173275
(PNM, {'lr': 3e-1}, 50),

0 commit comments

Comments
 (0)