fill in top-level TODO items (#156)

dgrove-oss · web-flow · commit a1eef9d8bebe · 2025-03-20T13:02:32.000-04:00
diff --git a/setup.KubeConEU25/README.md b/setup.KubeConEU25/README.md
@@ -121,10 +121,10 @@ cd mlbatch
 # Setup priority classes
 kubectl apply -f setup.k8s/mlbatch-priorities.yaml
 
-# Deploy scheduler plugins
+# Deploy scheduler-plugins
 helm install scheduler-plugins --namespace scheduler-plugins --create-namespace scheduler-plugins/manifests/install/charts/as-a-second-scheduler/ --set-json pluginConfig='[{"args":{"scoringStrategy":{"resources":[{"name":"nvidia.com/GPU","weight":1}],"requestedToCapacityRatio":{"shape":[{"utilization":0,"score":0},{"utilization":100,"score":10}]},"type":"RequestedToCapacityRatio"}},"name":"NodeResourcesFit"},{"args":{"permitWaitingTimeSeconds":300},"name":"Coscheduling"}]'
 
-# Wait for scheduler-plugins pods to be running
+# Wait for scheduler-plugins pods to be ready
 while [[ $(kubectl get pods -n scheduler-plugins -o 'jsonpath={..status.conditions[?(@.type=="Ready")].status}' | tr ' ' '\n' | sort -u) != "True" ]]
 do
     echo -n "." && sleep 1;
@@ -154,8 +154,6 @@ do
 done
 echo ""
 
-kubectl get pods -n mlbatch-system
-
 # Deploy AppWrapper
 kubectl apply --server-side -k setup.k8s/appwrapper/coscheduling
 
@@ -496,7 +494,8 @@ kubectl label servicemonitors.monitoring.coreos.com -n nvidia-GPU-operator nvidi
 
 ## Workload Management
 
-TODO
+We will now demonstrate the queueing, quota management, and fault recovery capabilities
+of MLBatch using synthetic workloads.
 
 <details>
 
@@ -506,7 +505,8 @@ TODO
 
 ## Example Workloads
 
-We now run a few example workloads.
+We now will now run some sample workloads that are representative of what is run on
+an AI GPU Cluster.
 
 ### Batch Inference with vLLM
 
@@ -627,7 +627,8 @@ The two containers are synchronized as follows: `load-generator` waits for
 
 ### Pre-Training with PyTorch
 
-TODO
+In this example, `alice` uses the [Kubeflow Training Operator](https://github.com/kubeflow/training-operator)
+to run a job that uses [PyTorch](https://pytorch.org) to train a machine learning model.
 
 <details>
 
@@ -637,7 +638,8 @@ TODO
 
 ### Fine-Tuning with Ray
 
-TODO
+In this example, `alice` uses [KubeRay](https://github.com/ray-project/kuberay) to run a job that
+uses [Ray](https://github.com/ray-project/ray) to fine tune a machine learning model.
 
 <details>