@@ -59,20 +59,20 @@ def __init__(
5959 norm_loss_factor : float = 1e-4 ,
6060 eps : float = 1e-8 ,
6161 ):
62- """
62+ """Ranger21
6363 :param params: PARAMETERS. iterable of parameters to optimize or dicts defining parameter groups
64- :param lr: float. learning rate.
65- :param beta0: float. Manages the amplitude of the noise introduced by positive negative momentum.
66- While 0.9 is a recommended default value, you can use -0.5 to minimize the noise.
64+ :param lr: float. learning rate
65+ :param beta0: float. Manages the amplitude of the noise introduced by positive negative momentum
66+ While 0.9 is a recommended default value, you can use -0.5 to minimize the noise
6767 :param betas: BETAS. coefficients used for computing running averages of gradient and the squared hessian trace
6868 :param use_softplus: bool. use softplus to smooth
6969 :param beta_softplus: float. beta
70- :param agc_clipping_value: float.
71- :param agc_eps: float.
70+ :param agc_clipping_value: float
71+ :param agc_eps: float
7272 :param centralize_gradients: bool. use GC both convolution & fc layers
7373 :param normalize_gradients: bool. use gradient normalization
74- :param lookahead_merge_time: int.
75- :param lookahead_blending_alpha: float.
74+ :param lookahead_merge_time: int. merge time
75+ :param lookahead_blending_alpha: float. blending alpha
7676 :param weight_decay: float. weight decay (L2 penalty)
7777 :param norm_loss_factor: float. norm loss factor
7878 :param eps: float. term added to the denominator to improve numerical stability
0 commit comments