Skip to content

Commit 428c0ec

Browse files
Merge branch '254-more-optimizations-to-notgpt-hpo-script' into 255-copy-of-branch-254-updated-hpo-script-for-cicd-scale-testing
2 parents c251dec + 1fa8cf9 commit 428c0ec

File tree

1 file changed

+19
-18
lines changed

1 file changed

+19
-18
lines changed

generative-proof-of-concept-CPU-preprocessing-in-memory.py

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@
1313
print(answer.stdout)
1414

1515

16-
EXPERIMENT_ITERATION = "0003"
17-
EXPERIMENT_NAME = "single-worker-1st-pass"
16+
EXPERIMENT_ITERATION = "0001"
17+
EXPERIMENT_NAME = "more-optimizations-br-254-single-machine"
1818
DATA_SET_NAME = "WEB-Bible-Genesis-40-context-681-SPL"
1919

2020

21-
N_TRIALS = 10 # 50
21+
N_TRIALS = 50
2222

2323

2424
mlflow.set_tracking_uri(uri=f"http://127.0.0.1:{MLFLOW_PORT}")
@@ -61,8 +61,8 @@ def objective(trial: optuna.Trial) -> float:
6161
### Non - HP tuning parameters (Optimize to RAM / CPU / GPU capacity)
6262

6363
# Number of text samples to create: # Number of text samples (of approximately max_seq_len) to create
64-
# Raises RAM in a linear fashion
65-
64+
# Raises RAM in a linear fashion
65+
6666
SAMPLES_TO_CREATE = 20 # 681
6767

6868
# How many tokens to provide before expecting the next token to be predicted.
@@ -84,7 +84,7 @@ def objective(trial: optuna.Trial) -> float:
8484

8585
## Generation time configurables: ##########
8686

87-
GENERATION_PROMPT_LEN = 10
87+
GENERATION_PROMPT_LEN = 25
8888
MAX_NEW_TOKENS = MAX_SEQ_LENGTH - GENERATION_PROMPT_LEN
8989
RESULT_CUTOFF = 20 # 100 # <---<< In production 100 # Only print out verbose text samples when perplexity is < RESULT_CUTOFF
9090

@@ -99,9 +99,9 @@ def objective(trial: optuna.Trial) -> float:
9999
# Begin MLflow trial run (nested inside parent run if any)
100100

101101

102-
POSITIONAL_EMBEDDING_DROPOUT = trial.suggest_float('POSITIONAL_EMBEDDING_DROPOUT', 0.7, 0.9)
102+
POSITIONAL_EMBEDDING_DROPOUT = trial.suggest_float('POSITIONAL_EMBEDDING_DROPOUT', 0.72, 0.8)
103103

104-
activation = trial.suggest_categorical('activation', ['relu', 'gelu', 'swish', 'softsign'])
104+
activation = trial.suggest_categorical('activation', ['relu', 'gelu', 'swish', 'softsign', 'softplus'])
105105

106106
predecessor_level_connection_affinity_factor_first = trial.suggest_float('predecessor_level_connection_affinity_factor_first', 10.0, 30.0)
107107

@@ -113,25 +113,26 @@ def objective(trial: optuna.Trial) -> float:
113113

114114
num_lateral_connection_tries_per_unit = trial.suggest_int('num_lateral_connection_tries_per_unit', 10, 35)
115115

116-
learning_rate = trial.suggest_float('learning_rate', 0.0006, 0.01, log=True)
116+
learning_rate = trial.suggest_float('learning_rate', 0.003, 0.006) # log=True)
117117

118-
epochs = trial.suggest_int('epochs', 10, 85)
118+
epochs = trial.suggest_int('epochs', 50, 75)
119119

120120
batch_size = 5 # trial.suggest_int('batch_size', 5, 10)
121+
122+
gradient_accumulation_steps = trial.suggest_int('gradient_accumulation_steps', 1, 7)
121123

122-
gradient_accumulation_steps = trial.suggest_int('gradient_accumulation_steps', 1, 10)
123124

124125
# Level constraints - ensure max >= min by setting min of max to value of min
125-
minimum_levels = trial.suggest_int('minimum_levels', 1, 3)
126-
maximum_levels = trial.suggest_int('maximum_levels', minimum_levels, 3)
126+
minimum_levels = 2 # trial.suggest_int('minimum_levels', 1, 3)
127+
maximum_levels = 2 # trial.suggest_int('maximum_levels', minimum_levels, 3)
127128

128129
# Units per level - ensure max >= min by setting min of max to value of min
129-
minimum_units_per_level = trial.suggest_int('minimum_units_per_level', 1, 3)
130-
maximum_units_per_level = trial.suggest_int('maximum_units_per_level', minimum_units_per_level, 4)
130+
minimum_units_per_level = trial.suggest_int('minimum_units_per_level', 2, 3)
131+
maximum_units_per_level = trial.suggest_int('maximum_units_per_level', minimum_units_per_level, 3)
131132

132133
# Neurons per unit - ensure max >= min by setting min of max to value of min
133-
minimum_neurons_per_unit = trial.suggest_int('minimum_neurons_per_unit', 1, 3)
134-
maximum_neurons_per_unit = trial.suggest_int('maximum_neurons_per_unit', minimum_neurons_per_unit, 4)
134+
minimum_neurons_per_unit = trial.suggest_int('minimum_neurons_per_unit', 1, 2)
135+
maximum_neurons_per_unit = trial.suggest_int('maximum_neurons_per_unit', minimum_neurons_per_unit, 2)
135136

136137

137138
tokenizer_checkpoint = "HuggingFaceTB/SmolLM3-3B" # "HuggingFaceTB/SmolLM2-1.7B-Instruct"
@@ -149,7 +150,7 @@ def objective(trial: optuna.Trial) -> float:
149150
# embedding output dim must be an even number
150151
# Maximize EMBEDDING_N based on available RAM and CPU / GPU
151152

152-
EMBEDDING_N = 6 # trial.suggest_int('embedding_n',6, 9) # 12
153+
EMBEDDING_N = 9 # trial.suggest_int('embedding_n',6, 9) # 12
153154
EMBEDDING_DIM = int(EMBEDDING_N * 2)
154155

155156
PROJECTION_N = 1 # Punatuve increase of ram, leaving this as 1 until we are running on HPC

0 commit comments

Comments
 (0)