1818DATA_SET_NAME = "WEB-Bible-Genesis-40-context-681-SPL"
1919
2020
21- N_TRIALS = 10 # 50
21+ N_TRIALS = 50
2222
2323
2424mlflow .set_tracking_uri (uri = f"http://127.0.0.1:{ MLFLOW_PORT } " )
@@ -63,7 +63,7 @@ def objective(trial: optuna.Trial) -> float:
6363 # Number of text samples to create: # Number of text samples (of approximately max_seq_len) to create
6464 # Raises RAM in a linear fashion
6565
66- SAMPLES_TO_CREATE = 10 # 681
66+ SAMPLES_TO_CREATE = 681
6767
6868 # How many tokens to provide before expecting the next token to be predicted.
6969 # Half this = double RAM (inversely proportional to RAM requirement)
@@ -99,9 +99,9 @@ def objective(trial: optuna.Trial) -> float:
9999 # Begin MLflow trial run (nested inside parent run if any)
100100
101101
102- POSITIONAL_EMBEDDING_DROPOUT = trial .suggest_float ('POSITIONAL_EMBEDDING_DROPOUT' , 0.7 , 0.9 )
102+ POSITIONAL_EMBEDDING_DROPOUT = trial .suggest_float ('POSITIONAL_EMBEDDING_DROPOUT' , 0.72 , 0.8 )
103103
104- activation = trial .suggest_categorical ('activation' , ['relu' , 'gelu' , 'swish' , 'softsign' ])
104+ activation = trial .suggest_categorical ('activation' , ['relu' , 'gelu' , 'swish' , 'softsign' , 'softplus' ])
105105
106106 predecessor_level_connection_affinity_factor_first = trial .suggest_float ('predecessor_level_connection_affinity_factor_first' , 10.0 , 30.0 )
107107
@@ -113,25 +113,25 @@ def objective(trial: optuna.Trial) -> float:
113113
114114 num_lateral_connection_tries_per_unit = trial .suggest_int ('num_lateral_connection_tries_per_unit' , 10 , 35 )
115115
116- learning_rate = trial .suggest_float ('learning_rate' , 0.0006 , 0.01 , log = True )
116+ learning_rate = trial .suggest_float ('learning_rate' , 0.003 , 0.006 ) # log=True)
117117
118- epochs = trial .suggest_int ('epochs' , 10 , 85 )
118+ epochs = trial .suggest_int ('epochs' , 50 , 75 )
119119
120120 batch_size = 10 # trial.suggest_int('batch_size', 5, 10)
121121
122- gradient_accumulation_steps = trial .suggest_int ('gradient_accumulation_steps' , 1 , 6 )
122+ gradient_accumulation_steps = trial .suggest_int ('gradient_accumulation_steps' , 1 , 7 )
123123
124124 # Level constraints - ensure max >= min by setting min of max to value of min
125- minimum_levels = trial .suggest_int ('minimum_levels' , 1 , 3 )
126- maximum_levels = trial .suggest_int ('maximum_levels' , minimum_levels , 3 )
125+ minimum_levels = 2 # trial.suggest_int('minimum_levels', 1, 3)
126+ maximum_levels = 2 # trial.suggest_int('maximum_levels', minimum_levels, 3)
127127
128128 # Units per level - ensure max >= min by setting min of max to value of min
129- minimum_units_per_level = trial .suggest_int ('minimum_units_per_level' , 1 , 3 )
130- maximum_units_per_level = trial .suggest_int ('maximum_units_per_level' , minimum_units_per_level , 4 )
129+ minimum_units_per_level = trial .suggest_int ('minimum_units_per_level' , 2 , 3 )
130+ maximum_units_per_level = trial .suggest_int ('maximum_units_per_level' , minimum_units_per_level , 3 )
131131
132132 # Neurons per unit - ensure max >= min by setting min of max to value of min
133- minimum_neurons_per_unit = trial .suggest_int ('minimum_neurons_per_unit' , 1 , 3 )
134- maximum_neurons_per_unit = trial .suggest_int ('maximum_neurons_per_unit' , minimum_neurons_per_unit , 4 )
133+ minimum_neurons_per_unit = trial .suggest_int ('minimum_neurons_per_unit' , 1 , 2 )
134+ maximum_neurons_per_unit = trial .suggest_int ('maximum_neurons_per_unit' , minimum_neurons_per_unit , 2 )
135135
136136
137137 tokenizer_checkpoint = "HuggingFaceTB/SmolLM3-3B" # "HuggingFaceTB/SmolLM2-1.7B-Instruct"
@@ -149,7 +149,7 @@ def objective(trial: optuna.Trial) -> float:
149149 # embedding output dim must be an even number
150150 # Maximize EMBEDDING_N based on available RAM and CPU / GPU
151151
152- EMBEDDING_N = 6 # trial.suggest_int('embedding_n',6, 9) # 12
152+ EMBEDDING_N = 9 # trial.suggest_int('embedding_n',6, 9) # 12
153153 EMBEDDING_DIM = int (EMBEDDING_N * 2 )
154154
155155 PROJECTION_N = 1 # Punatuve increase of ram, leaving this as 1 until we are running on HPC
0 commit comments