Skip to content

Commit 5f6840e

Browse files
Update generative-proof-of-concept-CPU-preprocessing-in-memory.py
1 parent 22fe1f7 commit 5f6840e

File tree

1 file changed

+12
-14
lines changed

1 file changed

+12
-14
lines changed

generative-proof-of-concept-CPU-preprocessing-in-memory.py

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from datetime import datetime
55
from subprocess import run
66

7-
MLFLOW_PORT = 5000
7+
MLFLOW_PORT = 7777
88

99
answer = run(f"mlflow server --host 127.0.0.1 --port {MLFLOW_PORT} &",
1010
shell=True,
@@ -19,7 +19,7 @@
1919

2020
mlflow.set_tracking_uri(uri=f"http://127.0.0.1:{MLFLOW_PORT}")
2121

22-
mlflow.set_experiment(f"single-worker-1st-pass-tuning-{EXPERIMENT_ITERATION}-a")
22+
mlflow.set_experiment(f"single-worker-femto-scale-tuning-200-samples-{EXPERIMENT_ITERATION}-a")
2323

2424

2525

@@ -28,11 +28,11 @@ def objective(trial: optuna.Trial) -> float:
2828
Objective function for Optuna hyperparameter optimization
2929
Returns the validation loss or metric to minimize
3030
"""
31-
31+
3232
import tensorflow as tf
33-
import tensorflow_text
34-
from keras_nlp.models import GPT2Tokenizer, GPT2Preprocessor, GPT2Backbone
35-
from keras_nlp.layers import PositionEmbedding
33+
# import tensorflow_text
34+
# from keras_nlp.models import GPT2Tokenizer, GPT2Preprocessor, GPT2Backbone
35+
# from keras_nlp.layers import PositionEmbedding
3636
from transformers import AutoTokenizer
3737
from sklearn.model_selection import train_test_split
3838
from sklearn.utils import shuffle
@@ -59,7 +59,7 @@ def objective(trial: optuna.Trial) -> float:
5959
# Number of text samples to create: # Number of text samples (of approximately max_seq_len) to create
6060
# Raises RAM in a linear fashion
6161

62-
SAMPLES_TO_CREATE = 10
62+
SAMPLES_TO_CREATE = 200
6363

6464
# How many tokens to provide before expecting the next token to be predicted.
6565
# Half this = double RAM (inversely proportional to RAM requirement)
@@ -93,19 +93,19 @@ def objective(trial: optuna.Trial) -> float:
9393

9494
predecessor_level_connection_affinity_factor_main = trial.suggest_float('predecessor_level_connection_affinity_factor_main', 0.1, 40.0)
9595

96-
max_consecutive_lateral_connections = trial.suggest_int('max_consecutive_lateral_connections', 2, 10)
96+
max_consecutive_lateral_connections = trial.suggest_int('max_consecutive_lateral_connections', 6, 8)
9797

98-
p_lateral_connection = trial.suggest_float('p_lateral_connection', 0.01, 1.00)
98+
p_lateral_connection = trial.suggest_float('p_lateral_connection', 0.04, 0.45)
9999

100-
num_lateral_connection_tries_per_unit = trial.suggest_int('num_lateral_connection_tries_per_unit', 1, 40)
100+
num_lateral_connection_tries_per_unit = trial.suggest_int('num_lateral_connection_tries_per_unit', 10, 35)
101101

102102
learning_rate = trial.suggest_float('learning_rate', 10 ** -4, 0.05, log=True)
103103

104-
epochs = trial.suggest_int('epochs', 10, 50)
104+
epochs = trial.suggest_int('epochs', 23, 65)
105105

106106
batch_size = trial.suggest_int('batch_size', 5, 15)
107107

108-
gradient_accumulation_steps = trial.suggest_int('gradient_accumulation_steps', 1, 2)
108+
gradient_accumulation_steps = trial.suggest_int('gradient_accumulation_steps', 1, 3)
109109

110110
# Level constraints - ensure max >= min by setting min of max to value of min
111111
minimum_levels = trial.suggest_int('minimum_levels', 1, 3)
@@ -971,5 +971,3 @@ def main():
971971

972972
if __name__ == '__main__':
973973
main()
974-
975-

0 commit comments

Comments
 (0)