@@ -340,7 +340,8 @@ def sample_params(self, seed: int) -> Dict[str, Any]:
340340 assert self .hpo_space_name in ['default' , 'clr' , 'moresigma' , 'moresigmadim' , 'moresigmadimreg' ,
341341 'moresigmadimsize' , 'moresigmadimlr' , 'probclass' , 'probclass-mlp' , 'large' ,
342342 'alt1' , 'alt2' , 'alt3' , 'alt4' , 'alt5' , 'alt6' , 'alt7' , 'alt8' , 'alt9' , 'alt10' ,
343- 'tabarena' ]
343+ 'tabarena' , 'alt11' , 'alt12' , 'alt13' , 'alt14' , 'alt15' , 'alt16' , 'alt17' ,
344+ 'alt18' ]
344345 rng = np .random .default_rng (seed = seed )
345346
346347 if self .hpo_space_name == 'probclass-mlp' :
@@ -660,6 +661,186 @@ def sample_params(self, seed: int) -> Dict[str, Any]:
660661 params ['plr_hidden_2' ] = 4
661662 params ['n_epochs' ] = 256
662663 params ['use_early_stopping' ] = False
664+ elif self .hpo_space_name == 'alt11' :
665+ # tabarena without the large configs
666+ params = {
667+ 'n_hidden_layers' : rng .integers (2 , 4 , endpoint = True ),
668+ 'hidden_sizes' : 'rectangular' ,
669+ 'hidden_width' : rng .choice ([256 , 384 , 512 ]),
670+ 'p_drop' : rng .uniform (0.0 , 0.5 ),
671+ 'act' : 'mish' ,
672+ 'plr_sigma' : np .exp (rng .uniform (np .log (1e-2 ), np .log (50 ))),
673+ 'sq_mom' : 1.0 - np .exp (rng .uniform (np .log (5e-3 ), np .log (5e-2 ))),
674+ 'plr_lr_factor' : np .exp (rng .uniform (np .log (5e-2 ), np .log (3e-1 ))),
675+ 'scale_lr_factor' : np .exp (rng .uniform (np .log (2.0 ), np .log (10.0 ))),
676+ 'first_layer_lr_factor' : np .exp (rng .uniform (np .log (0.3 ), np .log (1.5 ))),
677+ 'ls_eps_sched' : 'coslog4' ,
678+ 'ls_eps' : np .exp (rng .uniform (np .log (5e-3 ), np .log (1e-1 ))),
679+ 'p_drop_sched' : 'flat_cos' ,
680+ 'lr' : np .exp (rng .uniform (np .log (2e-2 ), np .log (3e-1 ))),
681+ 'wd' : np .exp (rng .uniform (np .log (1e-3 ), np .log (5e-2 ))),
682+ 'use_ls' : rng .choice ([False , True ]), # use label smoothing (will be ignored for regression)
683+ }
684+ elif self .hpo_space_name == 'alt12' :
685+ # alt11 with n_hidden_layers=1 in the search space
686+ params = {
687+ 'n_hidden_layers' : rng .integers (1 , 4 , endpoint = True ),
688+ 'hidden_sizes' : 'rectangular' ,
689+ 'hidden_width' : rng .choice ([256 , 384 , 512 ]),
690+ 'p_drop' : rng .uniform (0.0 , 0.5 ),
691+ 'act' : 'mish' ,
692+ 'plr_sigma' : np .exp (rng .uniform (np .log (1e-2 ), np .log (50 ))),
693+ 'sq_mom' : 1.0 - np .exp (rng .uniform (np .log (5e-3 ), np .log (5e-2 ))),
694+ 'plr_lr_factor' : np .exp (rng .uniform (np .log (5e-2 ), np .log (3e-1 ))),
695+ 'scale_lr_factor' : np .exp (rng .uniform (np .log (2.0 ), np .log (10.0 ))),
696+ 'first_layer_lr_factor' : np .exp (rng .uniform (np .log (0.3 ), np .log (1.5 ))),
697+ 'ls_eps_sched' : 'coslog4' ,
698+ 'ls_eps' : np .exp (rng .uniform (np .log (5e-3 ), np .log (1e-1 ))),
699+ 'p_drop_sched' : 'flat_cos' ,
700+ 'lr' : np .exp (rng .uniform (np .log (2e-2 ), np .log (3e-1 ))),
701+ 'wd' : np .exp (rng .uniform (np .log (1e-3 ), np .log (5e-2 ))),
702+ 'use_ls' : rng .choice ([False , True ]), # use label smoothing (will be ignored for regression)
703+ }
704+ elif self .hpo_space_name == 'alt13' :
705+ # alt11 with more categorical hyperparameters
706+ params = {
707+ 'n_hidden_layers' : rng .integers (2 , 4 , endpoint = True ),
708+ 'hidden_sizes' : 'rectangular' ,
709+ 'hidden_width' : rng .choice ([256 , 384 , 512 ]),
710+ 'p_drop' : rng .uniform (0.0 , 0.5 ),
711+ 'act' : 'mish' ,
712+ 'plr_sigma' : np .exp (rng .uniform (np .log (1e-2 ), np .log (50 ))),
713+ 'sq_mom' : 1.0 - np .exp (rng .uniform (np .log (5e-3 ), np .log (5e-2 ))),
714+ 'plr_lr_factor' : np .exp (rng .uniform (np .log (5e-2 ), np .log (3e-1 ))),
715+ 'scale_lr_factor' : np .exp (rng .uniform (np .log (2.0 ), np .log (10.0 ))),
716+ 'first_layer_lr_factor' : np .exp (rng .uniform (np .log (0.3 ), np .log (1.5 ))),
717+ 'ls_eps_sched' : 'coslog4' ,
718+ 'ls_eps' : np .exp (rng .uniform (np .log (5e-3 ), np .log (1e-1 ))),
719+ 'p_drop_sched' : 'flat_cos' ,
720+ 'lr' : np .exp (rng .uniform (np .log (2e-2 ), np .log (3e-1 ))),
721+ 'wd' : np .exp (rng .uniform (np .log (1e-3 ), np .log (5e-2 ))),
722+ 'use_ls' : rng .choice ([False , True ]), # use label smoothing (will be ignored for regression)
723+ 'max_one_hot_cat_size' : int (np .floor (np .exp (rng .uniform (np .log (4.0 ), np .log (33.0 )))).item ()),
724+ 'embedding_size' : int (rng .choice ([4 , 8 , 16 ])),
725+ }
726+ elif self .hpo_space_name == 'alt14' :
727+ # alt13 with weight_init_mode='normal'
728+ params = {
729+ 'n_hidden_layers' : rng .integers (2 , 4 , endpoint = True ),
730+ 'hidden_sizes' : 'rectangular' ,
731+ 'hidden_width' : rng .choice ([256 , 384 , 512 ]),
732+ 'p_drop' : rng .uniform (0.0 , 0.5 ),
733+ 'act' : 'mish' ,
734+ 'plr_sigma' : np .exp (rng .uniform (np .log (1e-2 ), np .log (50 ))),
735+ 'sq_mom' : 1.0 - np .exp (rng .uniform (np .log (5e-3 ), np .log (5e-2 ))),
736+ 'plr_lr_factor' : np .exp (rng .uniform (np .log (5e-2 ), np .log (3e-1 ))),
737+ 'scale_lr_factor' : np .exp (rng .uniform (np .log (2.0 ), np .log (10.0 ))),
738+ 'first_layer_lr_factor' : np .exp (rng .uniform (np .log (0.3 ), np .log (1.5 ))),
739+ 'ls_eps_sched' : 'coslog4' ,
740+ 'ls_eps' : np .exp (rng .uniform (np .log (5e-3 ), np .log (1e-1 ))),
741+ 'p_drop_sched' : 'flat_cos' ,
742+ 'lr' : np .exp (rng .uniform (np .log (2e-2 ), np .log (3e-1 ))),
743+ 'wd' : np .exp (rng .uniform (np .log (1e-3 ), np .log (5e-2 ))),
744+ 'use_ls' : rng .choice ([False , True ]), # use label smoothing (will be ignored for regression)
745+ 'max_one_hot_cat_size' : int (np .floor (np .exp (rng .uniform (np .log (4.0 ), np .log (33.0 )))).item ()),
746+ 'embedding_size' : int (rng .choice ([4 , 8 , 16 ])),
747+ 'weight_init_mode' : 'normal' ,
748+ }
749+ elif self .hpo_space_name == 'alt15' :
750+ # alt13 with tuning momentum (beta1)
751+ params = {
752+ 'n_hidden_layers' : rng .integers (2 , 4 , endpoint = True ),
753+ 'hidden_sizes' : 'rectangular' ,
754+ 'hidden_width' : rng .choice ([256 , 384 , 512 ]),
755+ 'p_drop' : rng .uniform (0.0 , 0.5 ),
756+ 'act' : 'mish' ,
757+ 'plr_sigma' : np .exp (rng .uniform (np .log (1e-2 ), np .log (50 ))),
758+ 'sq_mom' : 1.0 - np .exp (rng .uniform (np .log (5e-3 ), np .log (5e-2 ))),
759+ 'plr_lr_factor' : np .exp (rng .uniform (np .log (5e-2 ), np .log (3e-1 ))),
760+ 'scale_lr_factor' : np .exp (rng .uniform (np .log (2.0 ), np .log (10.0 ))),
761+ 'first_layer_lr_factor' : np .exp (rng .uniform (np .log (0.3 ), np .log (1.5 ))),
762+ 'ls_eps_sched' : 'coslog4' ,
763+ 'ls_eps' : np .exp (rng .uniform (np .log (5e-3 ), np .log (1e-1 ))),
764+ 'p_drop_sched' : 'flat_cos' ,
765+ 'lr' : np .exp (rng .uniform (np .log (2e-2 ), np .log (3e-1 ))),
766+ 'wd' : np .exp (rng .uniform (np .log (1e-3 ), np .log (5e-2 ))),
767+ 'use_ls' : rng .choice ([False , True ]), # use label smoothing (will be ignored for regression)
768+ 'max_one_hot_cat_size' : int (np .floor (np .exp (rng .uniform (np .log (4.0 ), np .log (33.0 )))).item ()),
769+ 'embedding_size' : int (rng .choice ([4 , 8 , 16 ])),
770+ 'mom' : 1.0 - np .exp (rng .uniform (np .log (2e-2 ), np .log (3e-1 ))), # tune in [0.7, 0.98]
771+ }
772+ elif self .hpo_space_name == 'alt16' :
773+ # alt13 with n_ens=2
774+ params = {
775+ 'n_hidden_layers' : rng .integers (2 , 4 , endpoint = True ),
776+ 'hidden_sizes' : 'rectangular' ,
777+ 'hidden_width' : rng .choice ([256 , 384 , 512 ]),
778+ 'p_drop' : rng .uniform (0.0 , 0.5 ),
779+ 'act' : 'mish' ,
780+ 'plr_sigma' : np .exp (rng .uniform (np .log (1e-2 ), np .log (50 ))),
781+ 'sq_mom' : 1.0 - np .exp (rng .uniform (np .log (5e-3 ), np .log (5e-2 ))),
782+ 'plr_lr_factor' : np .exp (rng .uniform (np .log (5e-2 ), np .log (3e-1 ))),
783+ 'scale_lr_factor' : np .exp (rng .uniform (np .log (2.0 ), np .log (10.0 ))),
784+ 'first_layer_lr_factor' : np .exp (rng .uniform (np .log (0.3 ), np .log (1.5 ))),
785+ 'ls_eps_sched' : 'coslog4' ,
786+ 'ls_eps' : np .exp (rng .uniform (np .log (5e-3 ), np .log (1e-1 ))),
787+ 'p_drop_sched' : 'flat_cos' ,
788+ 'lr' : np .exp (rng .uniform (np .log (2e-2 ), np .log (3e-1 ))),
789+ 'wd' : np .exp (rng .uniform (np .log (1e-3 ), np .log (5e-2 ))),
790+ 'use_ls' : rng .choice ([False , True ]), # use label smoothing (will be ignored for regression)
791+ 'max_one_hot_cat_size' : int (np .floor (np .exp (rng .uniform (np .log (4.0 ), np .log (33.0 )))).item ()),
792+ 'embedding_size' : int (rng .choice ([4 , 8 , 16 ])),
793+ 'n_ens' : 2 ,
794+ 'ens_av_before_softmax' : True ,
795+ }
796+ elif self .hpo_space_name == 'alt17' :
797+ # alt13 with n_ens=4
798+ params = {
799+ 'n_hidden_layers' : rng .integers (2 , 4 , endpoint = True ),
800+ 'hidden_sizes' : 'rectangular' ,
801+ 'hidden_width' : rng .choice ([256 , 384 , 512 ]),
802+ 'p_drop' : rng .uniform (0.0 , 0.5 ),
803+ 'act' : 'mish' ,
804+ 'plr_sigma' : np .exp (rng .uniform (np .log (1e-2 ), np .log (50 ))),
805+ 'sq_mom' : 1.0 - np .exp (rng .uniform (np .log (5e-3 ), np .log (5e-2 ))),
806+ 'plr_lr_factor' : np .exp (rng .uniform (np .log (5e-2 ), np .log (3e-1 ))),
807+ 'scale_lr_factor' : np .exp (rng .uniform (np .log (2.0 ), np .log (10.0 ))),
808+ 'first_layer_lr_factor' : np .exp (rng .uniform (np .log (0.3 ), np .log (1.5 ))),
809+ 'ls_eps_sched' : 'coslog4' ,
810+ 'ls_eps' : np .exp (rng .uniform (np .log (5e-3 ), np .log (1e-1 ))),
811+ 'p_drop_sched' : 'flat_cos' ,
812+ 'lr' : np .exp (rng .uniform (np .log (2e-2 ), np .log (3e-1 ))),
813+ 'wd' : np .exp (rng .uniform (np .log (1e-3 ), np .log (5e-2 ))),
814+ 'use_ls' : rng .choice ([False , True ]), # use label smoothing (will be ignored for regression)
815+ 'max_one_hot_cat_size' : int (np .floor (np .exp (rng .uniform (np .log (4.0 ), np .log (33.0 )))).item ()),
816+ 'embedding_size' : int (rng .choice ([4 , 8 , 16 ])),
817+ 'n_ens' : 4 ,
818+ 'ens_av_before_softmax' : True ,
819+ }
820+ elif self .hpo_space_name == 'alt18' :
821+ # alt17 but with averaging after softmax
822+ params = {
823+ 'n_hidden_layers' : rng .integers (2 , 4 , endpoint = True ),
824+ 'hidden_sizes' : 'rectangular' ,
825+ 'hidden_width' : rng .choice ([256 , 384 , 512 ]),
826+ 'p_drop' : rng .uniform (0.0 , 0.5 ),
827+ 'act' : 'mish' ,
828+ 'plr_sigma' : np .exp (rng .uniform (np .log (1e-2 ), np .log (50 ))),
829+ 'sq_mom' : 1.0 - np .exp (rng .uniform (np .log (5e-3 ), np .log (5e-2 ))),
830+ 'plr_lr_factor' : np .exp (rng .uniform (np .log (5e-2 ), np .log (3e-1 ))),
831+ 'scale_lr_factor' : np .exp (rng .uniform (np .log (2.0 ), np .log (10.0 ))),
832+ 'first_layer_lr_factor' : np .exp (rng .uniform (np .log (0.3 ), np .log (1.5 ))),
833+ 'ls_eps_sched' : 'coslog4' ,
834+ 'ls_eps' : np .exp (rng .uniform (np .log (5e-3 ), np .log (1e-1 ))),
835+ 'p_drop_sched' : 'flat_cos' ,
836+ 'lr' : np .exp (rng .uniform (np .log (2e-2 ), np .log (3e-1 ))),
837+ 'wd' : np .exp (rng .uniform (np .log (1e-3 ), np .log (5e-2 ))),
838+ 'use_ls' : rng .choice ([False , True ]), # use label smoothing (will be ignored for regression)
839+ 'max_one_hot_cat_size' : int (np .floor (np .exp (rng .uniform (np .log (4.0 ), np .log (33.0 )))).item ()),
840+ 'embedding_size' : int (rng .choice ([4 , 8 , 16 ])),
841+ 'n_ens' : 4 ,
842+ 'ens_av_before_softmax' : False ,
843+ }
663844
664845 # print(f'{params=}')
665846
0 commit comments