Improve the MlFlow setup workflow.

David Thrower · David Thrower · commit 1ddb636ac17e · 2026-01-20T22:14:41.000-05:00
diff --git a/train_a_generative_llm_docker.py b/train_a_generative_llm_docker.py
@@ -62,13 +62,26 @@
 keras_models_folder = f"{ARTIFACTS_FOLDER}/{TIME}/keras_models"
 Path(keras_models_folder).mkdir(parents=True, exist_ok=True)
 
+## Dataset Selection
+# Assumes:
+# 1. Is a huggingface dataset of the structure ...
+# 2. Has a key ['train']['text']
+# 3. The key duck types as a List[str]
+# 4. The samples tokenize consistent with the MAX_SEQUENCE_LENGTH
 
-MLFLOW_PORT = int(os.getenv("MLFLOW_PORT", 7777))
+DATASET_TO_RUN = str(os.getenv("DATASET_TO_RUN",  "david-thrower/tiny-stories-mini-96-seq-len-50000-samples"))
 
+######################### here ######################
 
+# Samples to use for the neural architecture seaerch stage
+PHASE_I_A_SAMPLES_TO_CREATE = int(getenv("PHASE_I_A_SAMPLES_TO_CREATE", "300"))
 
+# Samples to use for the main training stage
+PHASE_I_B_SAMPLES_TO_CREATE = int(getenv("PHASE_I_B_SAMPLES_TO_CREATE", "200"))
+PHASE_I_B_VAL_SPLIT = float(getenv("PHASE_I_B_VAL_SPLIT", "0.15"))
 
 
+MLFLOW_PORT = int(os.getenv("MLFLOW_PORT", 7777))
 
 # If you don't want Mlflow, just add `-e MLFLOW_PORT=0` to `docker run`
 if MLFLOW_PORT != 0:
@@ -86,38 +99,18 @@
     ])
 
     answer = subprocess.run(cmd, shell=True)
-    time.sleep(10)
+    time.sleep(30)
     print(answer.stdout)
 
 
+    # Set up MlFlow experiment
+    time_hyphenated = TIME.replace('_','-')
+    ds_root_name = DATASET_TO_RUN.split('/')[-1]
+    MLFLOW_EXPERIMENT_NAME = f"{time_hyphenated}--llm-training--{ds_root_name}-" +\
+                      f"ia-{PHASE_I_A_SAMPLES_TO_CREATE}-ib-{PHASE_I_B_SAMPLES_TO_CREATE}-a"
 
-## Dataset Selection
-# Assumes:
-# 1. Is a huggingface dataset of the structure ...
-# 2. Has a key ['train']['text']
-# 3. The key duck types as a List[str]
-# 4. The samples tokenize consistent with the MAX_SEQUENCE_LENGTH
-
-DATASET_TO_RUN = str(os.getenv("DATASET_TO_RUN",  "david-thrower/tiny-stories-mini-96-seq-len-50000-samples"))
-
-######################### here ######################
-
-# Samples to use for the neural architecture seaerch stage
-PHASE_I_A_SAMPLES_TO_CREATE = int(getenv("PHASE_I_A_SAMPLES_TO_CREATE", "300"))
-
-# Samples to use for the main training stage
-PHASE_I_B_SAMPLES_TO_CREATE = int(getenv("PHASE_I_B_SAMPLES_TO_CREATE", "200"))
-PHASE_I_B_VAL_SPLIT = float(getenv("PHASE_I_B_VAL_SPLIT", "0.15"))
-
-
-# Set up MlFlow experiment
-time_hyphenated = TIME.replace('_','-')
-ds_root_name = DATASET_TO_RUN.split('/')[-1]
-EXPERIMENT_NAME = f"{time_hyphenated}--llm-training--{ds_root_name}-" +\
-                  f"ia-{PHASE_I_A_SAMPLES_TO_CREATE}-ib-{PHASE_I_B_SAMPLES_TO_CREATE}-a"
-
-mlflow.set_tracking_uri(uri=f"http://127.0.0.1:{MLFLOW_PORT}")
-mlflow.set_experiment(EXPERIMENT_NAME)
+    mlflow.set_tracking_uri(uri=f"http://127.0.0.1:{MLFLOW_PORT}")
+    mlflow.set_experiment(MLFLOW_EXPERIMENT_NAME)
 
 
 # This is a single head model. It only returns the next token. For this reason,