1313print (answer .stdout )
1414
1515
16- EXPERIMENT_ITERATION = "0003 "
17- EXPERIMENT_NAME = "single-worker-1st-pass "
16+ EXPERIMENT_ITERATION = "0001 "
17+ EXPERIMENT_NAME = "more-optimizations-br-254-single-machine "
1818DATA_SET_NAME = "WEB-Bible-Genesis-40-context-681-SPL"
1919
2020
21- N_TRIALS = 10 # 50
21+ N_TRIALS = 50
2222
2323
2424mlflow .set_tracking_uri (uri = f"http://127.0.0.1:{ MLFLOW_PORT } " )
@@ -61,8 +61,8 @@ def objective(trial: optuna.Trial) -> float:
6161 ### Non - HP tuning parameters (Optimize to RAM / CPU / GPU capacity)
6262
6363 # Number of text samples to create: # Number of text samples (of approximately max_seq_len) to create
64- # Raises RAM in a linear fashion
65-
64+ # Raises RAM in a linear fashion
65+
6666 SAMPLES_TO_CREATE = 20 # 681
6767
6868 # How many tokens to provide before expecting the next token to be predicted.
@@ -84,7 +84,7 @@ def objective(trial: optuna.Trial) -> float:
8484
8585 ## Generation time configurables: ##########
8686
87- GENERATION_PROMPT_LEN = 10
87+ GENERATION_PROMPT_LEN = 25
8888 MAX_NEW_TOKENS = MAX_SEQ_LENGTH - GENERATION_PROMPT_LEN
8989 RESULT_CUTOFF = 20 # 100 # <---<< In production 100 # Only print out verbose text samples when perplexity is < RESULT_CUTOFF
9090
@@ -99,9 +99,9 @@ def objective(trial: optuna.Trial) -> float:
9999 # Begin MLflow trial run (nested inside parent run if any)
100100
101101
102- POSITIONAL_EMBEDDING_DROPOUT = trial .suggest_float ('POSITIONAL_EMBEDDING_DROPOUT' , 0.7 , 0.9 )
102+ POSITIONAL_EMBEDDING_DROPOUT = trial .suggest_float ('POSITIONAL_EMBEDDING_DROPOUT' , 0.72 , 0.8 )
103103
104- activation = trial .suggest_categorical ('activation' , ['relu' , 'gelu' , 'swish' , 'softsign' ])
104+ activation = trial .suggest_categorical ('activation' , ['relu' , 'gelu' , 'swish' , 'softsign' , 'softplus' ])
105105
106106 predecessor_level_connection_affinity_factor_first = trial .suggest_float ('predecessor_level_connection_affinity_factor_first' , 10.0 , 30.0 )
107107
@@ -113,25 +113,26 @@ def objective(trial: optuna.Trial) -> float:
113113
114114 num_lateral_connection_tries_per_unit = trial .suggest_int ('num_lateral_connection_tries_per_unit' , 10 , 35 )
115115
116- learning_rate = trial .suggest_float ('learning_rate' , 0.0006 , 0.01 , log = True )
116+ learning_rate = trial .suggest_float ('learning_rate' , 0.003 , 0.006 ) # log=True)
117117
118- epochs = trial .suggest_int ('epochs' , 10 , 85 )
118+ epochs = trial .suggest_int ('epochs' , 50 , 75 )
119119
120120 batch_size = 5 # trial.suggest_int('batch_size', 5, 10)
121+
122+ gradient_accumulation_steps = trial .suggest_int ('gradient_accumulation_steps' , 1 , 7 )
121123
122- gradient_accumulation_steps = trial .suggest_int ('gradient_accumulation_steps' , 1 , 10 )
123124
124125 # Level constraints - ensure max >= min by setting min of max to value of min
125- minimum_levels = trial .suggest_int ('minimum_levels' , 1 , 3 )
126- maximum_levels = trial .suggest_int ('maximum_levels' , minimum_levels , 3 )
126+ minimum_levels = 2 # trial.suggest_int('minimum_levels', 1, 3)
127+ maximum_levels = 2 # trial.suggest_int('maximum_levels', minimum_levels, 3)
127128
128129 # Units per level - ensure max >= min by setting min of max to value of min
129- minimum_units_per_level = trial .suggest_int ('minimum_units_per_level' , 1 , 3 )
130- maximum_units_per_level = trial .suggest_int ('maximum_units_per_level' , minimum_units_per_level , 4 )
130+ minimum_units_per_level = trial .suggest_int ('minimum_units_per_level' , 2 , 3 )
131+ maximum_units_per_level = trial .suggest_int ('maximum_units_per_level' , minimum_units_per_level , 3 )
131132
132133 # Neurons per unit - ensure max >= min by setting min of max to value of min
133- minimum_neurons_per_unit = trial .suggest_int ('minimum_neurons_per_unit' , 1 , 3 )
134- maximum_neurons_per_unit = trial .suggest_int ('maximum_neurons_per_unit' , minimum_neurons_per_unit , 4 )
134+ minimum_neurons_per_unit = trial .suggest_int ('minimum_neurons_per_unit' , 1 , 2 )
135+ maximum_neurons_per_unit = trial .suggest_int ('maximum_neurons_per_unit' , minimum_neurons_per_unit , 2 )
135136
136137
137138 tokenizer_checkpoint = "HuggingFaceTB/SmolLM3-3B" # "HuggingFaceTB/SmolLM2-1.7B-Instruct"
@@ -149,7 +150,7 @@ def objective(trial: optuna.Trial) -> float:
149150 # embedding output dim must be an even number
150151 # Maximize EMBEDDING_N based on available RAM and CPU / GPU
151152
152- EMBEDDING_N = 6 # trial.suggest_int('embedding_n',6, 9) # 12
153+ EMBEDDING_N = 9 # trial.suggest_int('embedding_n',6, 9) # 12
153154 EMBEDDING_DIM = int (EMBEDDING_N * 2 )
154155
155156 PROJECTION_N = 1 # Punatuve increase of ram, leaving this as 1 until we are running on HPC
0 commit comments