refactor: get_warm_down

kozistr · kozistr · commit b3f7b42cd3ca · 2023-04-02T15:04:01.000+09:00
diff --git a/pytorch_optimizer/optimizer/ranger21.py b/pytorch_optimizer/optimizer/ranger21.py
@@ -182,6 +182,10 @@ def get_warm_down(self, lr: float, iteration: int) -> float:
 
         new_lr: float = self.starting_lr - self.warm_down_lr_delta * warm_down_pct
         new_lr = max(new_lr, self.min_lr)
+
+        if new_lr < 0.0:
+            raise NegativeLRError(new_lr)
+
         self.current_lr = new_lr
 
         return new_lr
@@ -249,9 +253,11 @@ def step(self, closure: CLOSURE = None) -> LOSS:
         for group in self.param_groups:
             if len(self.state) == 0:
                 continue
+
             p = next(iter(self.state.keys()))
-            lr = group["lr"]
-            step = self.state[p]["step"]
+
+            lr = group['lr']
+            step = self.state[p]['step']
 
             beta1, beta2 = group['betas']
             bias_correction1 = 1.0 - beta1 ** step  # fmt: skip
@@ -264,17 +270,14 @@ def step(self, closure: CLOSURE = None) -> LOSS:
 
             # warm down
             lr = self.get_warm_down(lr, step)
-            if lr < 0.0:
-                raise NegativeLRError(lr)
 
             # stable decay
             decay = group['weight_decay']
             if decay:
                 p.mul_(1.0 - decay * lr / variance_normalized)
 
             # norm loss
-            u_norm = unit_norm(p)
-            correction = 2.0 * self.norm_loss_factor * (1.0 - torch.div(1, u_norm + self.eps))
+            correction = 2.0 * self.norm_loss_factor * (1.0 - torch.div(1, unit_norm(p) + self.eps))
             p.mul_(1.0 - lr * correction)
 
             for p in group['params']: