55from subprocess import run
66from warnings import warn
77
8- MLFLOW_PORT = 5000
8+ MLFLOW_PORT = 7777
99
1010answer = run (f"mlflow server --host 127.0.0.1 --port { MLFLOW_PORT } &" ,
1111 shell = True ,
@@ -63,7 +63,7 @@ def objective(trial: optuna.Trial) -> float:
6363 # Number of text samples to create: # Number of text samples (of approximately max_seq_len) to create
6464 # Raises RAM in a linear fashion
6565
66- SAMPLES_TO_CREATE = 10
66+ SAMPLES_TO_CREATE = 1000
6767
6868 # How many tokens to provide before expecting the next token to be predicted.
6969 # Half this = double RAM (inversely proportional to RAM requirement)
@@ -84,9 +84,9 @@ def objective(trial: optuna.Trial) -> float:
8484
8585 ## Generation time configurables: ##########
8686
87- GENERATION_PROMPT_LEN = 25
88- MAX_NEW_TOKENS = 14
89- RESULT_CUTOFF = 11 # Only print out verbose text samples when perplexity is < RESULT_CUTOFF
87+ GENERATION_PROMPT_LEN = 20
88+ MAX_NEW_TOKENS = 20
89+ RESULT_CUTOFF = 20 # Only print out verbose text samples when perplexity is < RESULT_CUTOFF
9090
9191 if GENERATION_PROMPT_LEN + MAX_NEW_TOKENS > MAX_SEQ_LENGTH :
9292 raise ValueError ("Sequence length overflow: Generated text length (GENERATION_PROMPT_LEN + MAX_NEW_TOKENS) "
@@ -103,23 +103,23 @@ def objective(trial: optuna.Trial) -> float:
103103
104104 activation = trial .suggest_categorical ('activation' , ['relu' , 'gelu' , 'swish' , 'softsign' ])
105105
106- predecessor_level_connection_affinity_factor_first = trial .suggest_float ('predecessor_level_connection_affinity_factor_first' , 0.01 , 20 .0 )
106+ predecessor_level_connection_affinity_factor_first = trial .suggest_float ('predecessor_level_connection_affinity_factor_first' , 10.0 , 35 .0 )
107107
108- predecessor_level_connection_affinity_factor_main = trial .suggest_float ('predecessor_level_connection_affinity_factor_main' , 0.1 , 20 .0 )
108+ predecessor_level_connection_affinity_factor_main = trial .suggest_float ('predecessor_level_connection_affinity_factor_main' , 16.0 , 25 .0 )
109109
110110 max_consecutive_lateral_connections = trial .suggest_int ('max_consecutive_lateral_connections' , 2 , 7 )
111111
112- p_lateral_connection = trial .suggest_float ('p_lateral_connection' , 0.01 , 0.5 )
112+ p_lateral_connection = trial .suggest_float ('p_lateral_connection' , 0.12 , 0.35 )
113113
114- num_lateral_connection_tries_per_unit = trial .suggest_int ('num_lateral_connection_tries_per_unit' , 1 , 17 )
114+ num_lateral_connection_tries_per_unit = trial .suggest_int ('num_lateral_connection_tries_per_unit' , 10 , 35 )
115115
116- learning_rate = trial .suggest_float ('learning_rate' , 10 ** - 4 , 0.05 , log = True )
116+ learning_rate = trial .suggest_float ('learning_rate' , 0.0006 , 0.01 , log = True )
117117
118- epochs = trial .suggest_int ('epochs' , 10 , 50 )
118+ epochs = trial .suggest_int ('epochs' , 10 , 85 )
119119
120- batch_size = trial .suggest_int ('batch_size' , 5 , 15 )
120+ batch_size = trial .suggest_int ('batch_size' , 5 , 10 )
121121
122- gradient_accumulation_steps = trial .suggest_int ('gradient_accumulation_steps' , 1 , 2 )
122+ gradient_accumulation_steps = trial .suggest_int ('gradient_accumulation_steps' , 1 , 7 )
123123
124124 # Level constraints - ensure max >= min by setting min of max to value of min
125125 minimum_levels = trial .suggest_int ('minimum_levels' , 1 , 3 )
@@ -149,7 +149,7 @@ def objective(trial: optuna.Trial) -> float:
149149 # embedding output dim must be an even number
150150 # Maximize EMBEDDING_N based on available RAM and CPU / GPU
151151
152- EMBEDDING_N = 3 # 12
152+ EMBEDDING_N = trial . suggest_int ( 'embedding_n' , 6 , 9 ) # 12
153153 EMBEDDING_DIM = int (EMBEDDING_N * 2 )
154154
155155 PROJECTION_N = 1 # Punatuve increase of ram, leaving this as 1 until we are running on HPC
0 commit comments