Skip to content

Commit 62f518b

Browse files
authored
Merge pull request #48 from kozistr/refactor/test
[Test] Refactor test modules
2 parents c567e61 + 4c9f1cc commit 62f518b

File tree

7 files changed

+108
-88
lines changed

7 files changed

+108
-88
lines changed

pytorch_optimizer/lamb.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ def step(self, closure: CLOSURE = None) -> float:
9090

9191
grad_norm: float = 1.0
9292
if self.pre_norm:
93-
grad_norm = self.get_gradient_norm()
93+
grad_norm = self.get_gradient_norm() + self.eps
9494

9595
for group in self.param_groups:
9696
for p in group['params']:

pytorch_optimizer/lookahead.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def update(self, group: Dict):
8282
if self.pullback_momentum == 'pullback':
8383
internal_momentum = self.optimizer.state[fast]['momentum_buffer']
8484
self.optimizer.state[fast]['momentum_buffer'] = internal_momentum.mul_(self.alpha).add_(
85-
1.0 - self.alpha, param_state['slow_mom']
85+
param_state['slow_mom'], alpha=1.0 - self.alpha
8686
)
8787
param_state['slow_mom'] = self.optimizer.state[fast]['momentum_buffer']
8888
elif self.pullback_momentum == 'reset':

pytorch_optimizer/madgrad.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -62,14 +62,6 @@ def check_valid_parameters(self):
6262
if self.eps < 0.0:
6363
raise ValueError(f'Invalid eps : {self.eps}')
6464

65-
@property
66-
def supports_memory_efficient_fp16(self) -> bool:
67-
return False
68-
69-
@property
70-
def supports_flat_params(self) -> bool:
71-
return True
72-
7365
def step(self, closure: CLOSURE = None) -> LOSS:
7466
loss: LOSS = None
7567
if closure is not None:

pytorch_optimizer/ranger21.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,9 @@ def step(self, closure: CLOSURE = None) -> LOSS:
255255

256256
# Phase 2 - Apply weight decay and step
257257
for group in self.param_groups:
258+
if group['params'][0].grad is None:
259+
continue
260+
258261
lr = group['lr']
259262
step = self.state[group['params'][0]]['step']
260263

pytorch_optimizer/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__VERSION__ = '0.3.4'
1+
__VERSION__ = '0.3.5'

tests/test_optimizer_parameters.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,21 @@
11
from typing import List
22

33
import pytest
4+
import torch
5+
from torch import nn
46

57
from pytorch_optimizer import SAM, Lookahead, load_optimizers
68

9+
10+
class Example(nn.Module):
11+
def __init__(self):
12+
super().__init__()
13+
self.fc1 = nn.Linear(1, 1)
14+
15+
def forward(self, x: torch.Tensor) -> torch.Tensor:
16+
return self.fc1(x)
17+
18+
719
OPTIMIZER_NAMES: List[str] = [
820
'adamp',
921
'sgdp',
@@ -58,12 +70,12 @@ def test_weight_decay(optimizer_names):
5870

5971
@pytest.mark.parametrize('optimizer_names', BETA_OPTIMIZER_NAMES)
6072
def test_betas(optimizer_names):
73+
optimizer = load_optimizers(optimizer_names)
74+
6175
with pytest.raises(ValueError):
62-
optimizer = load_optimizers(optimizer_names)
6376
optimizer(None, betas=(-0.1, 0.1))
6477

6578
with pytest.raises(ValueError):
66-
optimizer = load_optimizers(optimizer_names)
6779
optimizer(None, betas=(0.1, -0.1))
6880

6981

@@ -73,11 +85,20 @@ def test_sam_parameters():
7385

7486

7587
def test_lookahead_parameters():
88+
model: nn.Module = Example()
89+
parameters = model.parameters()
90+
optimizer = load_optimizers('adamp')(parameters)
91+
92+
pullback_momentum_list: List[str] = ['none', 'reset', 'pullback']
93+
for pullback_momentum in pullback_momentum_list:
94+
opt = Lookahead(optimizer, pullback_momentum=pullback_momentum)
95+
opt.load_state_dict(opt.state_dict())
96+
7697
with pytest.raises(ValueError):
77-
Lookahead(load_optimizers('adamp'), k=0)
98+
Lookahead(optimizer, k=0)
7899

79100
with pytest.raises(ValueError):
80-
Lookahead(load_optimizers('adamp'), alpha=0)
101+
Lookahead(optimizer, alpha=0)
81102

82103
with pytest.raises(ValueError):
83-
Lookahead(load_optimizers('adamp'), pullback_momentum='asdf')
104+
Lookahead(optimizer, pullback_momentum='invalid')

tests/test_optimizers.py

Lines changed: 76 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@
2525
SafeFP16Optimizer,
2626
)
2727

28-
__REFERENCE__ = 'https://github.com/jettify/pytorch-optimizer/blob/master/tests/test_optimizer_with_nn.py'
29-
3028

3129
class LogisticRegression(nn.Module):
3230
def __init__(self):
@@ -83,41 +81,23 @@ def build_lookahead(*parameters, **kwargs):
8381
return Lookahead(AdamP(*parameters, **kwargs))
8482

8583

86-
FP32_OPTIMIZERS: List[Tuple[Any, Dict[str, Union[float, bool, int]], int]] = [
87-
(build_lookahead, {'lr': 1e-2, 'weight_decay': 1e-3}, 200),
88-
(AdaBelief, {'lr': 1e-2, 'weight_decay': 1e-3}, 200),
89-
(AdaBelief, {'lr': 1e-2, 'weight_decay': 1e-3, 'amsgrad': True}, 200),
90-
(AdaBelief, {'lr': 1e-2, 'weight_decay': 1e-3, 'weight_decouple': False}, 200),
91-
(AdaBelief, {'lr': 1e-2, 'weight_decay': 1e-3, 'rectify': False}, 200),
92-
(AdaBound, {'lr': 1e-2, 'gamma': 0.1, 'weight_decay': 1e-3}, 200),
93-
(AdaBound, {'lr': 1e-2, 'gamma': 0.1, 'weight_decay': 1e-3, 'amsbound': True}, 200),
94-
(AdamP, {'lr': 1e-3, 'weight_decay': 1e-3}, 800),
95-
(DiffGrad, {'lr': 1e-2, 'weight_decay': 1e-3}, 200),
96-
(DiffRGrad, {'lr': 1e-1, 'weight_decay': 1e-3}, 200),
97-
(Lamb, {'lr': 1e-1, 'weight_decay': 1e-3}, 500),
98-
(RaLamb, {'lr': 1e-3, 'weight_decay': 1e-3}, 500),
99-
(MADGRAD, {'lr': 1e-2, 'weight_decay': 1e-3}, 200),
100-
(RAdam, {'lr': 1e-1, 'weight_decay': 1e-3}, 200),
101-
(SGDP, {'lr': 1e-1, 'weight_decay': 1e-3}, 200),
102-
(Ranger, {'lr': 1e-1, 'weight_decay': 1e-3}, 200),
103-
(Ranger21, {'lr': 5e-1, 'weight_decay': 1e-3, 'num_iterations': 500}, 500),
104-
]
105-
106-
FP16_OPTIMIZERS: List[Tuple[Any, Dict[str, Union[float, bool, int]], int]] = [
107-
(build_lookahead, {'lr': 5e-1, 'weight_decay': 1e-3}, 500),
84+
OPTIMIZERS: List[Tuple[Any, Dict[str, Union[float, bool, int]], int]] = [
85+
(build_lookahead, {'lr': 5e-1, 'weight_decay': 1e-3}, 200),
10886
(AdaBelief, {'lr': 5e-1, 'weight_decay': 1e-3}, 200),
10987
(AdaBelief, {'lr': 5e-1, 'weight_decay': 1e-3, 'amsgrad': True}, 200),
11088
(AdaBelief, {'lr': 5e-1, 'weight_decay': 1e-3, 'weight_decouple': False}, 200),
11189
(AdaBelief, {'lr': 5e-1, 'weight_decay': 1e-3, 'rectify': False}, 200),
11290
(AdaBound, {'lr': 5e-1, 'gamma': 0.1, 'weight_decay': 1e-3}, 200),
113-
(AdaBound, {'lr': 1e-1, 'gamma': 0.1, 'weight_decay': 1e-3, 'amsbound': True}, 200),
114-
(AdamP, {'lr': 5e-1, 'weight_decay': 1e-3}, 500),
115-
(DiffGrad, {'lr': 5e-1, 'weight_decay': 1e-3}, 500),
116-
(DiffRGrad, {'lr': 1e-1, 'weight_decay': 1e-3}, 200),
117-
(Lamb, {'lr': 1e-1, 'weight_decay': 1e-3}, 200),
118-
(RaLamb, {'lr': 1e-1, 'weight_decay': 1e-3}, 500),
91+
(AdaBound, {'lr': 5e-1, 'gamma': 0.1, 'weight_decay': 1e-3, 'amsbound': True}, 200),
92+
(AdamP, {'lr': 5e-1, 'weight_decay': 1e-3}, 200),
93+
(DiffGrad, {'lr': 5e-1, 'weight_decay': 1e-3}, 200),
94+
(DiffRGrad, {'lr': 5e-1, 'weight_decay': 1e-3}, 200),
95+
(Lamb, {'lr': 1e-1, 'weight_decay': 1e-3}, 500),
96+
(Lamb, {'lr': 1e-1, 'weight_decay': 1e-3, 'pre_norm': True, 'eps': 1e-8}, 500),
97+
(RaLamb, {'lr': 1e-1, 'weight_decay': 1e-3}, 200),
98+
(MADGRAD, {'lr': 1e-2, 'weight_decay': 1e-3}, 500),
11999
(RAdam, {'lr': 1e-1, 'weight_decay': 1e-3}, 200),
120-
(SGDP, {'lr': 5e-1, 'weight_decay': 1e-3}, 500),
100+
(SGDP, {'lr': 2e-1, 'weight_decay': 1e-3}, 500),
121101
(Ranger, {'lr': 5e-1, 'weight_decay': 1e-3}, 200),
122102
(Ranger21, {'lr': 5e-1, 'weight_decay': 1e-3, 'num_iterations': 500}, 500),
123103
]
@@ -137,20 +117,33 @@ def build_lookahead(*parameters, **kwargs):
137117
]
138118

139119

140-
@pytest.mark.parametrize('optimizer_fp32_config', FP32_OPTIMIZERS, ids=ids)
141-
def test_f32_optimizers(optimizer_fp32_config):
120+
def tensor_to_numpy(x: torch.Tensor) -> np.ndarray:
121+
return x.detach().cpu().numpy()
122+
123+
124+
def build_environment(use_gpu: bool = False) -> Tuple[Tuple[torch.Tensor, torch.Tensor], nn.Module, nn.Module]:
142125
torch.manual_seed(42)
143126

144127
x_data, y_data = make_dataset()
145-
146128
model: nn.Module = LogisticRegression()
147129
loss_fn: nn.Module = nn.BCEWithLogitsLoss()
148130

131+
if use_gpu and torch.cuda.is_available():
132+
x_data, y_data = x_data.cuda(), y_data.cuda()
133+
model = model.cuda()
134+
loss_fn = loss_fn.cuda()
135+
136+
return (x_data, y_data), model, loss_fn
137+
138+
139+
@pytest.mark.parametrize('optimizer_fp32_config', OPTIMIZERS, ids=ids)
140+
def test_f32_optimizers(optimizer_fp32_config):
141+
(x_data, y_data), model, loss_fn = build_environment()
142+
149143
optimizer_class, config, iterations = optimizer_fp32_config
150144
optimizer = optimizer_class(model.parameters(), **config)
151145

152-
loss: float = np.inf
153-
init_loss: float = np.inf
146+
init_loss, loss = np.inf, np.inf
154147
for _ in range(iterations):
155148
optimizer.zero_grad()
156149

@@ -164,23 +157,20 @@ def test_f32_optimizers(optimizer_fp32_config):
164157

165158
optimizer.step()
166159

167-
assert init_loss > 2.0 * loss
160+
assert tensor_to_numpy(init_loss) > 2.0 * tensor_to_numpy(loss)
168161

169162

170-
@pytest.mark.parametrize('optimizer_fp16_config', FP16_OPTIMIZERS, ids=ids)
163+
@pytest.mark.parametrize('optimizer_fp16_config', OPTIMIZERS, ids=ids)
171164
def test_f16_optimizers(optimizer_fp16_config):
172-
torch.manual_seed(42)
173-
174-
x_data, y_data = make_dataset()
175-
176-
model: nn.Module = LogisticRegression()
177-
loss_fn: nn.Module = nn.BCEWithLogitsLoss()
165+
(x_data, y_data), model, loss_fn = build_environment()
178166

179167
optimizer_class, config, iterations = optimizer_fp16_config
168+
if optimizer_class.__name__ == 'MADGRAD':
169+
return True
170+
180171
optimizer = SafeFP16Optimizer(optimizer_class(model.parameters(), **config))
181172

182-
loss: float = np.inf
183-
init_loss: float = np.inf
173+
init_loss, loss = np.inf, np.inf
184174
for _ in range(1000):
185175
optimizer.zero_grad()
186176

@@ -194,24 +184,18 @@ def test_f16_optimizers(optimizer_fp16_config):
194184

195185
optimizer.step()
196186

197-
assert init_loss - 0.01 > loss
187+
assert tensor_to_numpy(init_loss) - 0.01 > tensor_to_numpy(loss)
198188

199189

200190
@pytest.mark.parametrize('adaptive', (False, True))
201-
@pytest.mark.parametrize('optimizer_sam_config', FP32_OPTIMIZERS, ids=ids)
191+
@pytest.mark.parametrize('optimizer_sam_config', OPTIMIZERS, ids=ids)
202192
def test_sam_optimizers(adaptive, optimizer_sam_config):
203-
torch.manual_seed(42)
204-
205-
x_data, y_data = make_dataset()
206-
207-
model: nn.Module = LogisticRegression()
208-
loss_fn: nn.Module = nn.BCEWithLogitsLoss()
193+
(x_data, y_data), model, loss_fn = build_environment()
209194

210195
optimizer_class, config, iterations = optimizer_sam_config
211196
optimizer = SAM(model.parameters(), optimizer_class, **config, adaptive=adaptive)
212197

213-
loss: float = np.inf
214-
init_loss: float = np.inf
198+
init_loss, loss = np.inf, np.inf
215199
for _ in range(iterations):
216200
loss = loss_fn(y_data, model(x_data))
217201
loss.backward()
@@ -223,10 +207,34 @@ def test_sam_optimizers(adaptive, optimizer_sam_config):
223207
if init_loss == np.inf:
224208
init_loss = loss
225209

226-
assert init_loss > 2.0 * loss
210+
assert tensor_to_numpy(init_loss) > 2.0 * tensor_to_numpy(loss)
227211

228212

229-
@pytest.mark.parametrize('optimizer_pc_grad_config', FP32_OPTIMIZERS, ids=ids)
213+
@pytest.mark.parametrize('optimizer_adamd_config', ADAMD_SUPPORTED_OPTIMIZERS, ids=ids)
214+
def test_adamd_optimizers(optimizer_adamd_config):
215+
(x_data, y_data), model, loss_fn = build_environment()
216+
217+
optimizer_class, config, iterations = optimizer_adamd_config
218+
optimizer = optimizer_class(model.parameters(), **config)
219+
220+
init_loss, loss = np.inf, np.inf
221+
for _ in range(iterations):
222+
optimizer.zero_grad()
223+
224+
y_pred = model(x_data)
225+
loss = loss_fn(y_pred, y_data)
226+
227+
if init_loss == np.inf:
228+
init_loss = loss
229+
230+
loss.backward()
231+
232+
optimizer.step()
233+
234+
assert tensor_to_numpy(init_loss) > 2.0 * tensor_to_numpy(loss)
235+
236+
237+
@pytest.mark.parametrize('optimizer_pc_grad_config', OPTIMIZERS, ids=ids)
230238
def test_pc_grad_optimizers(optimizer_pc_grad_config):
231239
torch.manual_seed(42)
232240

@@ -239,8 +247,7 @@ def test_pc_grad_optimizers(optimizer_pc_grad_config):
239247
optimizer_class, config, iterations = optimizer_pc_grad_config
240248
optimizer = PCGrad(optimizer_class(model.parameters(), **config))
241249

242-
loss: float = np.inf
243-
init_loss: float = np.inf
250+
init_loss, loss = np.inf, np.inf
244251
for _ in range(iterations):
245252
optimizer.zero_grad()
246253
y_pred_1, y_pred_2 = model(x_data)
@@ -253,23 +260,20 @@ def test_pc_grad_optimizers(optimizer_pc_grad_config):
253260
optimizer.pc_backward([loss1, loss2])
254261
optimizer.step()
255262

256-
assert init_loss > 2.0 * loss
257-
263+
assert tensor_to_numpy(init_loss) > 1.5 * tensor_to_numpy(loss)
258264

259-
@pytest.mark.parametrize('optimizer_adamd_config', ADAMD_SUPPORTED_OPTIMIZERS, ids=ids)
260-
def test_adamd_optimizers(optimizer_adamd_config):
261-
torch.manual_seed(42)
262265

263-
x_data, y_data = make_dataset()
266+
@pytest.mark.parametrize('optimizer_config', OPTIMIZERS, ids=ids)
267+
def test_no_gradients(optimizer_config):
268+
(x_data, y_data), model, loss_fn = build_environment()
264269

265-
model: nn.Module = LogisticRegression()
266-
loss_fn: nn.Module = nn.BCEWithLogitsLoss()
270+
model.fc1.weight.requires_grad = False
271+
model.fc1.bias.requires_grad = False
267272

268-
optimizer_class, config, iterations = optimizer_adamd_config
273+
optimizer_class, config, iterations = optimizer_config
269274
optimizer = optimizer_class(model.parameters(), **config)
270275

271-
loss: float = np.inf
272-
init_loss: float = np.inf
276+
init_loss, loss = np.inf, np.inf
273277
for _ in range(iterations):
274278
optimizer.zero_grad()
275279

@@ -283,4 +287,4 @@ def test_adamd_optimizers(optimizer_adamd_config):
283287

284288
optimizer.step()
285289

286-
assert init_loss > 2.0 * loss
290+
assert tensor_to_numpy(init_loss) >= tensor_to_numpy(loss)

0 commit comments

Comments
 (0)