@@ -86,6 +86,9 @@ def build_lookahead(*parameters, **kwargs):
8686FP32_OPTIMIZERS : List [Tuple [Any , Dict [str , Union [float , bool , int ]], int ]] = [
8787 (build_lookahead , {'lr' : 1e-2 , 'weight_decay' : 1e-3 }, 200 ),
8888 (AdaBelief , {'lr' : 1e-2 , 'weight_decay' : 1e-3 }, 200 ),
89+ (AdaBelief , {'lr' : 1e-2 , 'weight_decay' : 1e-3 , 'amsgrad' : True }, 200 ),
90+ (AdaBelief , {'lr' : 1e-2 , 'weight_decay' : 1e-3 , 'weight_decouple' : False }, 200 ),
91+ (AdaBelief , {'lr' : 1e-2 , 'weight_decay' : 1e-3 , 'rectify' : False }, 200 ),
8992 (AdaBound , {'lr' : 1e-2 , 'gamma' : 0.1 , 'weight_decay' : 1e-3 }, 200 ),
9093 (AdaBound , {'lr' : 1e-2 , 'gamma' : 0.1 , 'weight_decay' : 1e-3 , 'amsbound' : True }, 200 ),
9194 (AdamP , {'lr' : 1e-3 , 'weight_decay' : 1e-3 }, 800 ),
@@ -103,6 +106,9 @@ def build_lookahead(*parameters, **kwargs):
103106FP16_OPTIMIZERS : List [Tuple [Any , Dict [str , Union [float , bool , int ]], int ]] = [
104107 (build_lookahead , {'lr' : 5e-1 , 'weight_decay' : 1e-3 }, 500 ),
105108 (AdaBelief , {'lr' : 5e-1 , 'weight_decay' : 1e-3 }, 200 ),
109+ (AdaBelief , {'lr' : 5e-1 , 'weight_decay' : 1e-3 , 'amsgrad' : True }, 200 ),
110+ (AdaBelief , {'lr' : 5e-1 , 'weight_decay' : 1e-3 , 'weight_decouple' : False }, 200 ),
111+ (AdaBelief , {'lr' : 5e-1 , 'weight_decay' : 1e-3 , 'rectify' : False }, 200 ),
106112 (AdaBound , {'lr' : 5e-1 , 'gamma' : 0.1 , 'weight_decay' : 1e-3 }, 200 ),
107113 (AdaBound , {'lr' : 1e-1 , 'gamma' : 0.1 , 'weight_decay' : 1e-3 , 'amsbound' : True }, 200 ),
108114 (AdamP , {'lr' : 5e-1 , 'weight_decay' : 1e-3 }, 500 ),
0 commit comments