Skip to content

Commit 165a64d

Browse files
Update generative-proof-of-concept-CPU-preprocessing-in-memory.py
1. Add saving of model and tokenizer. 2. Some scale down for CICD purposes...
1 parent 983fb5b commit 165a64d

File tree

1 file changed

+14
-5
lines changed

1 file changed

+14
-5
lines changed

generative-proof-of-concept-CPU-preprocessing-in-memory.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
DATA_SET_NAME = "WEB-Bible-Genesis-40-context-681-SPL"
1919
EXPERIMENT_NAME = f"{EXPERIMENT_NAME}-{DATA_SET_NAME}-{EXPERIMENT_ITERATION}-a"
2020

21-
N_TRIALS = 50
21+
N_TRIALS = 10
2222

2323

2424
mlflow.set_tracking_uri(uri=f"http://127.0.0.1:{MLFLOW_PORT}")
@@ -75,10 +75,10 @@ def objective(trial: optuna.Trial) -> float:
7575
# Raises RAM in a linear fashion
7676

7777
PHASE_I_A_SAMPLES_TO_CREATE = 10 # 681
78-
PHASE_I_B_SAMPLES_TO_CREATE = 50
78+
PHASE_I_B_SAMPLES_TO_CREATE = 20
7979
PHASE_I_B_VAL_SPLIT = 0.15 # Validation split for phase I-b (0.0 to 1.0)
8080

81-
PHASE_I_B_SAMPLE_EXPANSION_BATCH_SIZE = 20
81+
PHASE_I_B_SAMPLE_EXPANSION_BATCH_SIZE = 10
8282

8383
# How many tokens to provide before expecting the next token to be predicted.
8484
# Half this = double RAM (inversely proportional to RAM requirement)
@@ -132,8 +132,8 @@ def objective(trial: optuna.Trial) -> float:
132132
# phase_i_b_learning_rate = trial.suggest_float('learning_rate', 0.0001, 0.006)
133133

134134

135-
epochs = trial.suggest_int('epochs', 50, 75)
136-
phase_i_b_epochs = trial.suggest_int('phase_i_b_epochs', 50, 150)
135+
epochs = trial.suggest_int('epochs', 30, 75)
136+
phase_i_b_epochs = trial.suggest_int('phase_i_b_epochs', 40, 60)
137137

138138
batch_size = 5 # trial.suggest_int('batch_size', 5, 10)
139139

@@ -756,6 +756,15 @@ def create_dataset(raw_text_samples, tokenizer, sample_expansion_batch_size=50,
756756
result_0=result_phase_i_b)
757757
counter += 1
758758

759+
760+
TOKENIZER_SAVE_PATH = f"tokenizer-tr-{trial}"
761+
tokenizer.save_pretrained(TOKENIZER_SAVE_PATH)
762+
print(f"Final model saved to {MODEL_SAVE_PATH}")
763+
764+
MODEL_SAVE_PATH = f"final_phase_ib_model_tr_{trial}.keras"
765+
generator.save(MODEL_SAVE_PATH)
766+
print(f"Tokenizer saved to {TOKENIZER_SAVE_PATH}")
767+
759768
# Return the final result to Optuna
760769
return result_phase_i_b
761770

0 commit comments

Comments
 (0)