3030
3131
3232class Ranger21 (Optimizer ):
33+ """
34+ Reference : https://github.com/lessw2020/Ranger21/blob/main/ranger21/ranger21.py
35+ """
36+
3337 def __init__ (
3438 self ,
3539 params : PARAMS ,
@@ -55,8 +59,8 @@ def __init__(
5559 pnm_momentum_factor : float = 1.0 ,
5660 momentum : float = 0.9 ,
5761 eps : float = 1e-8 ,
58- num_batches_per_epoch = None ,
59- num_epochs = None ,
62+ num_batches_per_epoch : Optional [ int ] = None ,
63+ num_epochs : Optional [ int ] = None ,
6064 use_chebyshev_schedule : bool = False ,
6165 use_warmup : bool = True ,
6266 num_warmup_iterations = None ,
@@ -69,6 +73,15 @@ def __init__(
6973 warmup_pct_default : float = 0.22 ,
7074 logging_active : bool = True ,
7175 ):
76+ """Ranger optimizer (RAdam + Lookahead + Gradient Centralization, combined into one optimizer)
77+ :param params: PARAMS. iterable of parameters to optimize or dicts defining parameter groups
78+ :param lr: float. learning rate.
79+ :param betas: BETAS. coefficients used for computing running averages of gradient and the squared hessian trace
80+ :param eps: float. term added to the denominator to improve numerical stability
81+ :param weight_decay: float. weight decay (L2 penalty)
82+ :param use_gc: bool. use Gradient Centralization (both convolution & fc layers)
83+ :param gc_conv_only: bool. use Gradient Centralization (only convolution layer)
84+ """
7285 defaults : DEFAULT_PARAMETERS = dict (
7386 lr = lr ,
7487 momentum = momentum ,
0 commit comments