|
56 | 56 | # discovered in a bayesian tuning study done on Katib) |
57 | 57 |
|
58 | 58 | meta_trial_number = 0 # In distributed training set this to a random number |
59 | | -activation = "gelu" |
60 | | -predecessor_level_connection_affinity_factor_first = 19.613 |
61 | | -predecessor_level_connection_affinity_factor_main = 0.5518 |
62 | | -max_consecutive_lateral_connections = 34 |
63 | | -p_lateral_connection = 0.36014 |
64 | | -num_lateral_connection_tries_per_unit = 11 |
65 | | -learning_rate = 0.095 |
66 | | -epochs = 145 |
67 | | -batch_size = 634 |
68 | | -maximum_levels = 5 |
69 | | -maximum_units_per_level = 5 |
70 | | -maximum_neurons_per_unit = 25 |
| 59 | +activation = 'swish' |
| 60 | +predecessor_level_connection_affinity_factor_first = 0.506486683067576 |
| 61 | +predecessor_level_connection_affinity_factor_main = 1.6466748663373876 |
| 62 | +max_consecutive_lateral_connections = 35 |
| 63 | +p_lateral_connection = 3.703218275217572 |
| 64 | +num_lateral_connection_tries_per_unit = 12 |
| 65 | +learning_rate = 0.02804912925494706 |
| 66 | +epochs = 130 |
| 67 | +batch_size = 78 |
| 68 | +maximum_levels = 4 |
| 69 | +maximum_units_per_level = 3 |
| 70 | +maximum_neurons_per_unit = 3 |
71 | 71 |
|
72 | 72 |
|
73 | 73 | cerebros =\ |
|
80 | 80 | validation_split=0.35, |
81 | 81 | direction='minimize', |
82 | 82 | metric_to_rank_by='val_root_mean_squared_error', |
83 | | - minimum_levels=1, |
| 83 | + minimum_levels=4, |
84 | 84 | maximum_levels=maximum_levels, |
85 | | - minimum_units_per_level=1, |
| 85 | + minimum_units_per_level=2, |
86 | 86 | maximum_units_per_level=maximum_units_per_level, |
87 | | - minimum_neurons_per_unit=1, |
| 87 | + minimum_neurons_per_unit=3, |
88 | 88 | maximum_neurons_per_unit=maximum_neurons_per_unit, |
89 | 89 | activation=activation, |
90 | 90 | final_activation=None, |
|
0 commit comments