@@ -63,7 +63,7 @@ def objective(trial: optuna.Trial) -> float:
6363 # Number of text samples to create: # Number of text samples (of approximately max_seq_len) to create
6464 # Raises RAM in a linear fashion
6565
66- SAMPLES_TO_CREATE = 1000
66+ SAMPLES_TO_CREATE = 2000
6767
6868 # How many tokens to provide before expecting the next token to be predicted.
6969 # Half this = double RAM (inversely proportional to RAM requirement)
@@ -84,8 +84,8 @@ def objective(trial: optuna.Trial) -> float:
8484
8585 ## Generation time configurables: ##########
8686
87- GENERATION_PROMPT_LEN = 20
88- MAX_NEW_TOKENS = 20
87+ GENERATION_PROMPT_LEN = 10
88+ MAX_NEW_TOKENS = MAX_SEQ_LENGTH - GENERATION_PROMPT_LEN
8989 RESULT_CUTOFF = 20 # Only print out verbose text samples when perplexity is < RESULT_CUTOFF
9090
9191 if GENERATION_PROMPT_LEN + MAX_NEW_TOKENS > MAX_SEQ_LENGTH :
@@ -99,11 +99,11 @@ def objective(trial: optuna.Trial) -> float:
9999 # Begin MLflow trial run (nested inside parent run if any)
100100
101101
102- POSITIONAL_EMBEDDING_DROPOUT = trial .suggest_float ('POSITIONAL_EMBEDDING_DROPOUT' , 0.7 , 0.99 )
102+ POSITIONAL_EMBEDDING_DROPOUT = trial .suggest_float ('POSITIONAL_EMBEDDING_DROPOUT' , 0.7 , 0.9 )
103103
104104 activation = trial .suggest_categorical ('activation' , ['relu' , 'gelu' , 'swish' , 'softsign' ])
105105
106- predecessor_level_connection_affinity_factor_first = trial .suggest_float ('predecessor_level_connection_affinity_factor_first' , 10.0 , 35 .0 )
106+ predecessor_level_connection_affinity_factor_first = trial .suggest_float ('predecessor_level_connection_affinity_factor_first' , 10.0 , 30 .0 )
107107
108108 predecessor_level_connection_affinity_factor_main = trial .suggest_float ('predecessor_level_connection_affinity_factor_main' , 16.0 , 25.0 )
109109
@@ -117,9 +117,9 @@ def objective(trial: optuna.Trial) -> float:
117117
118118 epochs = trial .suggest_int ('epochs' , 10 , 85 )
119119
120- batch_size = trial .suggest_int ('batch_size' , 5 , 10 )
120+ batch_size = 5 # trial.suggest_int('batch_size', 5, 10)
121121
122- gradient_accumulation_steps = trial .suggest_int ('gradient_accumulation_steps' , 1 , 7 )
122+ gradient_accumulation_steps = trial .suggest_int ('gradient_accumulation_steps' , 1 , 15 )
123123
124124 # Level constraints - ensure max >= min by setting min of max to value of min
125125 minimum_levels = trial .suggest_int ('minimum_levels' , 1 , 3 )
0 commit comments