Skip to content

Commit 40708d0

Browse files
Update generative-proof-of-concept-CPU-preprocessing-in-memory.py
More parameterizations, Make batch size 5, allow greater gradient accumulation steps and see if we can get a stable HPO run at CPU scale.
1 parent 30f810e commit 40708d0

File tree

1 file changed

+7
-7
lines changed

1 file changed

+7
-7
lines changed

generative-proof-of-concept-CPU-preprocessing-in-memory.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def objective(trial: optuna.Trial) -> float:
6363
# Number of text samples to create: # Number of text samples (of approximately max_seq_len) to create
6464
# Raises RAM in a linear fashion
6565

66-
SAMPLES_TO_CREATE = 1000
66+
SAMPLES_TO_CREATE = 2000
6767

6868
# How many tokens to provide before expecting the next token to be predicted.
6969
# Half this = double RAM (inversely proportional to RAM requirement)
@@ -84,8 +84,8 @@ def objective(trial: optuna.Trial) -> float:
8484

8585
## Generation time configurables: ##########
8686

87-
GENERATION_PROMPT_LEN = 20
88-
MAX_NEW_TOKENS = 20
87+
GENERATION_PROMPT_LEN = 10
88+
MAX_NEW_TOKENS = MAX_SEQ_LENGTH - GENERATION_PROMPT_LEN
8989
RESULT_CUTOFF = 20 # Only print out verbose text samples when perplexity is < RESULT_CUTOFF
9090

9191
if GENERATION_PROMPT_LEN + MAX_NEW_TOKENS > MAX_SEQ_LENGTH:
@@ -99,11 +99,11 @@ def objective(trial: optuna.Trial) -> float:
9999
# Begin MLflow trial run (nested inside parent run if any)
100100

101101

102-
POSITIONAL_EMBEDDING_DROPOUT = trial.suggest_float('POSITIONAL_EMBEDDING_DROPOUT', 0.7, 0.99)
102+
POSITIONAL_EMBEDDING_DROPOUT = trial.suggest_float('POSITIONAL_EMBEDDING_DROPOUT', 0.7, 0.9)
103103

104104
activation = trial.suggest_categorical('activation', ['relu', 'gelu', 'swish', 'softsign'])
105105

106-
predecessor_level_connection_affinity_factor_first = trial.suggest_float('predecessor_level_connection_affinity_factor_first', 10.0, 35.0)
106+
predecessor_level_connection_affinity_factor_first = trial.suggest_float('predecessor_level_connection_affinity_factor_first', 10.0, 30.0)
107107

108108
predecessor_level_connection_affinity_factor_main = trial.suggest_float('predecessor_level_connection_affinity_factor_main', 16.0, 25.0)
109109

@@ -117,9 +117,9 @@ def objective(trial: optuna.Trial) -> float:
117117

118118
epochs = trial.suggest_int('epochs', 10, 85)
119119

120-
batch_size = trial.suggest_int('batch_size', 5, 10)
120+
batch_size = 5 # trial.suggest_int('batch_size', 5, 10)
121121

122-
gradient_accumulation_steps = trial.suggest_int('gradient_accumulation_steps', 1, 7)
122+
gradient_accumulation_steps = trial.suggest_int('gradient_accumulation_steps', 1, 15)
123123

124124
# Level constraints - ensure max >= min by setting min of max to value of min
125125
minimum_levels = trial.suggest_int('minimum_levels', 1, 3)

0 commit comments

Comments
 (0)