Update generative-proof-of-concept-CPU-preprocessing-in-memory.py

david-thrower · web-flow · commit 396316c86209 · 2025-10-08T13:34:10.000-04:00
Set sane values for CICD scale / linting / smoke testing run realistic on 4 CPU 16 GB RAM.
diff --git a/generative-proof-of-concept-CPU-preprocessing-in-memory.py b/generative-proof-of-concept-CPU-preprocessing-in-memory.py
@@ -18,7 +18,7 @@
 DATA_SET_NAME = "WEB-Bible-Genesis-40-context-681-SPL"
 
 
-N_TRIALS = 50
+N_TRIALS = 10 # 50
 
 
 mlflow.set_tracking_uri(uri=f"http://127.0.0.1:{MLFLOW_PORT}")
@@ -63,7 +63,7 @@ def objective(trial: optuna.Trial) -> float:
     # Number of text samples to create: # Number of text samples (of approximately max_seq_len) to create 
     # Raises RAM in a linear fashion
     
-    SAMPLES_TO_CREATE = 681
+    SAMPLES_TO_CREATE = 10 # 681
 
     # How many tokens to provide before expecting the next token to be predicted. 
     # Half this = double RAM  (inversely proportional to RAM requirement)
@@ -149,7 +149,7 @@ def objective(trial: optuna.Trial) -> float:
     # embedding output dim must be an even number
     # Maximize EMBEDDING_N based on available RAM and CPU / GPU
     
-    EMBEDDING_N = trial.suggest_int('embedding_n',6, 9) # 12
+    EMBEDDING_N = 6 # trial.suggest_int('embedding_n',6, 9) # 12
     EMBEDDING_DIM = int(EMBEDDING_N * 2)
     
     PROJECTION_N = 1 # Punatuve increase of ram, leaving this as 1 until we are running on HPC