@@ -86,8 +86,10 @@ helm repo add nfs-subdir-external-provisioner https://kubernetes-sigs.github.io/
8686
8787helm install -n nfs-provisioner pokprod nfs-subdir-external-provisioner/nfs-subdir-external-provisioner \
8888 --create-namespace \
89- --set nfs.server=192.168.98.96 --set nfs.path=/gpfs/fs_ec/pokprod002 \
90- --set storageClass.name=nfs-client-pokprod --set storageClass.provisionerName=k8s-sigs.io/pokprod-nfs-subdir-external-provisioner
89+ --set nfs.server=192.168.98.96 \
90+ --set nfs.path=/gpfs/fs_ec/pokprod002 \
91+ --set storageClass.name=nfs-client-pokprod \
92+ --set storageClass.provisionerName=k8s-sigs.io/pokprod-nfs-subdir-external-provisioner
9193```
9294Make sure to set the ` nfs.server ` and ` nfs.path ` values to the right values for
9395your environment.
@@ -101,7 +103,8 @@ nfs-client-pokprod k8s-sigs.io/pokprod-nfs-subdir-external-provisioner D
101103OpenShift clusters require an additional configuration step to permit the
102104provisioner pod to mount the storage volume.
103105``` sh
104- oc adm policy add-scc-to-user hostmount-anyuid system:serviceaccount:nfs-provisioner:pokprod-nfs-subdir-external-provisioner
106+ oc adm policy add-scc-to-user hostmount-anyuid \
107+ system:serviceaccount:nfs-provisioner:pokprod-nfs-subdir-external-provisioner
105108```
106109
107110</details >
@@ -122,7 +125,9 @@ cd mlbatch
122125kubectl apply -f setup.k8s/mlbatch-priorities.yaml
123126
124127# Deploy scheduler-plugins
125- helm install scheduler-plugins --namespace scheduler-plugins --create-namespace scheduler-plugins/manifests/install/charts/as-a-second-scheduler/ --set-json pluginConfig=' [{"args":{"scoringStrategy":{"resources":[{"name":"nvidia.com/GPU","weight":1}],"requestedToCapacityRatio":{"shape":[{"utilization":0,"score":0},{"utilization":100,"score":10}]},"type":"RequestedToCapacityRatio"}},"name":"NodeResourcesFit"},{"args":{"permitWaitingTimeSeconds":300},"name":"Coscheduling"}]'
128+ helm install scheduler-plugins -n scheduler-plugins --create-namespace \
129+ scheduler-plugins/manifests/install/charts/as-a-second-scheduler/ \
130+ --set-json pluginConfig=' [{"args":{"scoringStrategy":{"resources":[{"name":"nvidia.com/GPU","weight":1}],"requestedToCapacityRatio":{"shape":[{"utilization":0,"score":0},{"utilization":100,"score":10}]},"type":"RequestedToCapacityRatio"}},"name":"NodeResourcesFit"},{"args":{"permitWaitingTimeSeconds":300},"name":"Coscheduling"}]'
126131
127132# Wait for scheduler-plugins pods to be ready
128133while [[ $( kubectl get pods -n scheduler-plugins -o ' jsonpath={..status.conditions[?(@.type=="Ready")].status}' | tr ' ' ' \n' | sort -u) != " True" ]]
@@ -132,8 +137,10 @@ done
132137echo " "
133138
134139# Patch scheduler-plugins pod priorities
135- kubectl patch deployment -n scheduler-plugins --type=json --patch-file setup.k8s/scheduler-priority-patch.yaml scheduler-plugins-controller
136- kubectl patch deployment -n scheduler-plugins --type=json --patch-file setup.k8s/scheduler-priority-patch.yaml scheduler-plugins-scheduler
140+ kubectl patch deployment -n scheduler-plugins --type=json \
141+ --patch-file setup.k8s/scheduler-priority-patch.yaml scheduler-plugins-controller
142+ kubectl patch deployment -n scheduler-plugins --type=json \
143+ --patch-file setup.k8s/scheduler-priority-patch.yaml scheduler-plugins-scheduler
137144
138145# Create mlbatch-system namespace
139146kubectl create namespace mlbatch-system
@@ -160,7 +167,7 @@ kubectl apply --server-side -k setup.k8s/appwrapper/coscheduling
160167# Deploy Autopilot
161168helm repo add autopilot https://ibm.github.io/autopilot/ && helm repo update
162169
163- helm upgrade --install autopilot -n autopilot autopilot/autopilot --create-namespace
170+ helm upgrade -i autopilot -n autopilot autopilot/autopilot --create-namespace
164171
165172# Create Kueue's default flavor
166173kubectl apply -f setup.k8s/default-flavor.yaml
350357```
351358Then reapply the helm chart, this will start a rollout update.
352359``` sh
353- helm upgrade autopilot autopilot/autopilot --install --namespace= autopilot --create-namespace -f autopilot-extended.yaml
360+ helm upgrade -i autopilot autopilot/autopilot -n autopilot --create-namespace -f autopilot-extended.yaml
354361```
355362
356363</details >
@@ -431,7 +438,7 @@ grafana:
431438 - kubernetes.io/pvc-protection
432439EOF
433440
434- helm upgrade --install kube-prometheus-stack -n prometheus prometheus-community/kube-prometheus-stack --create-namespace -f config.yaml
441+ helm upgrade -i kube-prometheus-stack -n prometheus prometheus-community/kube-prometheus-stack --create-namespace -f config.yaml
435442```
436443
437444If deploying on OpenShift based systems, you need to assign the privileged
@@ -464,11 +471,11 @@ prometheus-kube-prometheus-stack-prometheus-0 2/2 Running 0
464471To access the Grafana dashboard on ` localhost:3000 ` :
465472
466473``` sh
467- kubectl --namespace prometheus get secrets kube-prometheus-stack-grafana -o jsonpath=" {.data.admin-password}" | base64 -d ; echo
474+ kubectl -n prometheus get secrets kube-prometheus-stack-grafana -o jsonpath=" {.data.admin-password}" | base64 -d ; echo
468475```
469476``` sh
470- export POD_NAME=$( kubectl --namespace prometheus get pod -l " app.kubernetes.io/name=grafana,app.kubernetes.io/instance=kube-prometheus-stack" -oname)
471- kubectl --namespace prometheus port-forward $POD_NAME 3000
477+ export POD_NAME=$( kubectl -n prometheus get pod -l " app.kubernetes.io/name=grafana,app.kubernetes.io/instance=kube-prometheus-stack" -oname)
478+ kubectl -n prometheus port-forward $POD_NAME 3000
472479```
473480
474481To import NVidia and Autopilot metrics, from the Grafana dashboard:
0 commit comments