update: LARS

kozistr · kozistr · commit 9fadf571acfe · 2022-02-01T13:59:10.000+09:00
diff --git a/pytorch_optimizer/lars.py b/pytorch_optimizer/lars.py
@@ -27,18 +27,21 @@ def __init__(
         weight_decay: float = 0.0,
         momentum: float = 0.9,
         trust_coefficient: float = 0.001,
+        eps: float = 1e-6,
     ):
         """LARS optimizer, no rate scaling or weight decay for parameters <= 1D
         :param params: PARAMETERS. iterable of parameters to optimize or dicts defining parameter groups
         :param lr: float. learning rate
         :param weight_decay: float. weight decay (L2 penalty)
         :param momentum: float. momentum
         :param trust_coefficient: float. trust_coefficient
+        :param eps: float. epsilon
         """
         self.lr = lr
         self.weight_decay = weight_decay
         self.momentum = momentum
         self.trust_coefficient = trust_coefficient
+        self.eps = eps
 
         self.check_valid_parameters()
 
@@ -59,6 +62,8 @@ def check_valid_parameters(self):
             raise ValueError(f'Invalid momentum : {self.momentum}')
         if self.trust_coefficient < 0.0:
             raise ValueError(f'Invalid trust_coefficient : {self.trust_coefficient}')
+        if self.eps < 0.0:
+            raise ValueError(f'Invalid eps : {self.eps}')
 
     @torch.no_grad()
     def step(self, closure: CLOSURE = None) -> LOSS:
@@ -84,7 +89,7 @@ def step(self, closure: CLOSURE = None) -> LOSS:
 
                     q = torch.where(
                         param_norm > 0.0,
-                        torch.where(update_norm > 0, (g['trust_coefficient'] * param_norm / update_norm), one),
+                        torch.where(update_norm > 0.0, (g['trust_coefficient'] * param_norm / update_norm), one),
                         one,
                     )
                     dp = dp.mul(q)