@@ -93,7 +93,7 @@ def build_lookahead(*parameters, **kwargs):
9393 (DiffGrad , {'lr' : 5e-1 , 'weight_decay' : 1e-3 }, 200 ),
9494 (DiffRGrad , {'lr' : 5e-1 , 'weight_decay' : 1e-3 }, 200 ),
9595 (Lamb , {'lr' : 1e-1 , 'weight_decay' : 1e-3 }, 200 ),
96- (Lamb , {'lr' : 1e -1 , 'weight_decay' : 1e-3 , 'pre_norm' : True }, 500 ),
96+ (Lamb , {'lr' : 2e -1 , 'weight_decay' : 1e-3 , 'pre_norm' : True , 'eps' : 1e-8 }, 500 ),
9797 (RaLamb , {'lr' : 1e-1 , 'weight_decay' : 1e-3 }, 200 ),
9898 (MADGRAD , {'lr' : 1e-2 , 'weight_decay' : 1e-3 }, 500 ),
9999 (RAdam , {'lr' : 1e-1 , 'weight_decay' : 1e-3 }, 200 ),
@@ -117,6 +117,10 @@ def build_lookahead(*parameters, **kwargs):
117117]
118118
119119
120+ def tensor_to_numpy (x : torch .Tensor ) -> np .ndarray :
121+ return x .detach ().cpu ().numpy ()
122+
123+
120124def build_environment (use_gpu : bool = False ) -> Tuple [Tuple [torch .Tensor , torch .Tensor ], nn .Module , nn .Module ]:
121125 torch .manual_seed (42 )
122126
@@ -139,8 +143,7 @@ def test_f32_optimizers(optimizer_fp32_config):
139143 optimizer_class , config , iterations = optimizer_fp32_config
140144 optimizer = optimizer_class (model .parameters (), ** config )
141145
142- loss : float = np .inf
143- init_loss : float = np .inf
146+ init_loss , loss = np .inf , np .inf
144147 for _ in range (iterations ):
145148 optimizer .zero_grad ()
146149
@@ -154,7 +157,7 @@ def test_f32_optimizers(optimizer_fp32_config):
154157
155158 optimizer .step ()
156159
157- assert init_loss > 2.0 * loss
160+ assert tensor_to_numpy ( init_loss ) > 2.0 * tensor_to_numpy ( loss )
158161
159162
160163@pytest .mark .parametrize ('optimizer_fp16_config' , OPTIMIZERS , ids = ids )
@@ -167,8 +170,7 @@ def test_f16_optimizers(optimizer_fp16_config):
167170
168171 optimizer = SafeFP16Optimizer (optimizer_class (model .parameters (), ** config ))
169172
170- loss : float = np .inf
171- init_loss : float = np .inf
173+ init_loss , loss = np .inf , np .inf
172174 for _ in range (1000 ):
173175 optimizer .zero_grad ()
174176
@@ -182,7 +184,7 @@ def test_f16_optimizers(optimizer_fp16_config):
182184
183185 optimizer .step ()
184186
185- assert init_loss - 0.01 > loss
187+ assert tensor_to_numpy ( init_loss ) - 0.01 > tensor_to_numpy ( loss )
186188
187189
188190@pytest .mark .parametrize ('adaptive' , (False , True ))
@@ -193,8 +195,7 @@ def test_sam_optimizers(adaptive, optimizer_sam_config):
193195 optimizer_class , config , iterations = optimizer_sam_config
194196 optimizer = SAM (model .parameters (), optimizer_class , ** config , adaptive = adaptive )
195197
196- loss : float = np .inf
197- init_loss : float = np .inf
198+ init_loss , loss = np .inf , np .inf
198199 for _ in range (iterations ):
199200 loss = loss_fn (y_data , model (x_data ))
200201 loss .backward ()
@@ -206,7 +207,7 @@ def test_sam_optimizers(adaptive, optimizer_sam_config):
206207 if init_loss == np .inf :
207208 init_loss = loss
208209
209- assert init_loss > 2.0 * loss
210+ assert tensor_to_numpy ( init_loss ) > 2.0 * tensor_to_numpy ( loss )
210211
211212
212213@pytest .mark .parametrize ('optimizer_adamd_config' , ADAMD_SUPPORTED_OPTIMIZERS , ids = ids )
@@ -216,8 +217,7 @@ def test_adamd_optimizers(optimizer_adamd_config):
216217 optimizer_class , config , iterations = optimizer_adamd_config
217218 optimizer = optimizer_class (model .parameters (), ** config )
218219
219- loss : float = np .inf
220- init_loss : float = np .inf
220+ init_loss , loss = np .inf , np .inf
221221 for _ in range (iterations ):
222222 optimizer .zero_grad ()
223223
@@ -231,7 +231,7 @@ def test_adamd_optimizers(optimizer_adamd_config):
231231
232232 optimizer .step ()
233233
234- assert init_loss > 2.0 * loss
234+ assert tensor_to_numpy ( init_loss ) > 2.0 * tensor_to_numpy ( loss )
235235
236236
237237@pytest .mark .parametrize ('optimizer_pc_grad_config' , OPTIMIZERS , ids = ids )
@@ -247,8 +247,7 @@ def test_pc_grad_optimizers(optimizer_pc_grad_config):
247247 optimizer_class , config , iterations = optimizer_pc_grad_config
248248 optimizer = PCGrad (optimizer_class (model .parameters (), ** config ))
249249
250- loss : float = np .inf
251- init_loss : float = np .inf
250+ init_loss , loss = np .inf , np .inf
252251 for _ in range (iterations ):
253252 optimizer .zero_grad ()
254253 y_pred_1 , y_pred_2 = model (x_data )
@@ -261,7 +260,7 @@ def test_pc_grad_optimizers(optimizer_pc_grad_config):
261260 optimizer .pc_backward ([loss1 , loss2 ])
262261 optimizer .step ()
263262
264- assert init_loss > 2.0 * loss
263+ assert tensor_to_numpy ( init_loss ) > 2.0 * tensor_to_numpy ( loss )
265264
266265
267266@pytest .mark .parametrize ('optimizer_config' , OPTIMIZERS , ids = ids )
@@ -274,8 +273,7 @@ def test_no_gradients(optimizer_config):
274273 optimizer_class , config , iterations = optimizer_config
275274 optimizer = optimizer_class (model .parameters (), ** config )
276275
277- loss : float = np .inf
278- init_loss : float = np .inf
276+ init_loss , loss = np .inf , np .inf
279277 for _ in range (iterations ):
280278 optimizer .zero_grad ()
281279
@@ -289,4 +287,4 @@ def test_no_gradients(optimizer_config):
289287
290288 optimizer .step ()
291289
292- assert init_loss >= loss
290+ assert tensor_to_numpy ( init_loss ) >= tensor_to_numpy ( loss )
0 commit comments