@@ -83,7 +83,7 @@ def objective(trial: optuna.Trial) -> float:
8383 # Number of text samples to create: # Number of text samples (of approximately max_seq_len) to create
8484 # Raises RAM in a linear fashion
8585
86- SAMPLES_TO_CREATE = 150
86+ SAMPLES_TO_CREATE = 230
8787
8888 # How many tokens to provide before expecting the next token to be predicted.
8989 # Half this = double RAM (inversely proportional to RAM requirement)
@@ -119,31 +119,31 @@ def objective(trial: optuna.Trial) -> float:
119119 # Begin MLflow trial run (nested inside parent run if any)
120120
121121
122- POSITIONAL_EMBEDDING_DROPOUT = trial .suggest_float ('POSITIONAL_EMBEDDING_DROPOUT' , 0.7 , 0.99 )
122+ POSITIONAL_EMBEDDING_DROPOUT = 0.734 # trial.suggest_float('POSITIONAL_EMBEDDING_DROPOUT', 0.7, 0.99)
123123
124- activation = trial .suggest_categorical ('activation' , ['relu' , 'gelu' , 'swish' , 'softsign' ])
124+ activation = trial .suggest_categorical ('activation' , ['swish' , 'softsign' ]) # [' relu', 'gelu', 'swish', 'softsign'])
125125
126- predecessor_level_connection_affinity_factor_first = trial .suggest_float ('predecessor_level_connection_affinity_factor_first' , 0.01 , 20 .0 )
126+ predecessor_level_connection_affinity_factor_first = trial .suggest_float ('predecessor_level_connection_affinity_factor_first' , 25.0 , 35 .0 )
127127
128- predecessor_level_connection_affinity_factor_main = trial .suggest_float ('predecessor_level_connection_affinity_factor_main' , 0.1 , 20 .0 )
128+ predecessor_level_connection_affinity_factor_main = trial .suggest_float ('predecessor_level_connection_affinity_factor_main' , 16.0 , 25 .0 )
129129
130- max_consecutive_lateral_connections = trial .suggest_int ('max_consecutive_lateral_connections' , 2 , 7 )
130+ max_consecutive_lateral_connections = trial .suggest_int ('max_consecutive_lateral_connections' , 5 , 7 )
131131
132132 p_lateral_connection = trial .suggest_float ('p_lateral_connection' , 0.01 , 0.5 )
133133
134134 num_lateral_connection_tries_per_unit = trial .suggest_int ('num_lateral_connection_tries_per_unit' , 1 , 17 )
135135
136- learning_rate = trial .suggest_float ('learning_rate' , 10 ** - 4 , 0.05 , log = True )
136+ learning_rate = trial .suggest_float ('learning_rate' , 0.0005 , 0.0012 , log = True )
137137
138138 epochs = trial .suggest_int ('epochs' , 10 , 50 )
139139
140- batch_size = trial .suggest_int ('batch_size' , 5 , 15 )
140+ batch_size = 7 # trial.suggest_int('batch_size', 5, 15)
141141
142- gradient_accumulation_steps = trial .suggest_int ('gradient_accumulation_steps' , 1 , 2 )
142+ gradient_accumulation_steps = trial .suggest_int ('gradient_accumulation_steps' , 2 , 15 )
143143
144144 # Level constraints - ensure max >= min by setting min of max to value of min
145145 minimum_levels = trial .suggest_int ('minimum_levels' , 1 , 3 )
146- maximum_levels = trial .suggest_int ('maximum_levels' , minimum_levels , 3 )
146+ maximum_levels = 3 # trial.suggest_int('maximum_levels', minimum_levels, 3)
147147
148148 # Units per level - ensure max >= min by setting min of max to value of min
149149 minimum_units_per_level = trial .suggest_int ('minimum_units_per_level' , 1 , 3 )
@@ -169,7 +169,7 @@ def objective(trial: optuna.Trial) -> float:
169169 # embedding output dim must be an even number
170170 # Maximize EMBEDDING_N based on available RAM and CPU / GPU
171171
172- EMBEDDING_N = 3 # 12
172+ EMBEDDING_N = trial . suggest_int ( "embedding_n" , 9 , 11 ) # 9 # 3 # 12
173173 EMBEDDING_DIM = int (EMBEDDING_N * 2 )
174174
175175 PROJECTION_N = 1 # Punatuve increase of ram, leaving this as 1 until we are running on HPC
0 commit comments