update: RAdam

kozistr · kozistr · commit 6fb3a4f25ec6 · 2022-01-29T15:10:36.000+09:00
diff --git a/pytorch_optimizer/radam.py b/pytorch_optimizer/radam.py
@@ -35,13 +35,13 @@ def __init__(
         adamd_debias_term: bool = False,
         eps: float = 1e-8,
     ):
-        """
+        """RAdam
         :param params: PARAMETERS. iterable of parameters to optimize or dicts defining parameter groups
-        :param lr: float. learning rate.
+        :param lr: float. learning rate
         :param betas: BETAS. coefficients used for computing running averages of gradient and the squared hessian trace
         :param weight_decay: float. weight decay (L2 penalty)
         :param n_sma_threshold: int. (recommended is 5)
-        :param degenerated_to_sgd: float.
+        :param degenerated_to_sgd: float. degenerated to SGD
         :param adamd_debias_term: bool. Only correct the denominator to avoid inflating step sizes early in training
         :param eps: float. term added to the denominator to improve numerical stability
         """