@@ -82,12 +82,15 @@ Provisioner](https://github.com/kubernetes-sigs/nfs-subdir-external-provisioner)
8282<details >
8383
8484``` sh
85- helm repo add nfs-subdir-external-provisioner https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner && helm repo update
85+ helm repo add nfs-subdir-external-provisioner https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner
86+ helm repo update
8687
8788helm install -n nfs-provisioner pokprod nfs-subdir-external-provisioner/nfs-subdir-external-provisioner \
8889 --create-namespace \
89- --set nfs.server=192.168.98.96 --set nfs.path=/gpfs/fs_ec/pokprod002 \
90- --set storageClass.name=nfs-client-pokprod --set storageClass.provisionerName=k8s-sigs.io/pokprod-nfs-subdir-external-provisioner
90+ --set nfs.server=192.168.98.96 \
91+ --set nfs.path=/gpfs/fs_ec/pokprod002 \
92+ --set storageClass.name=nfs-client-pokprod \
93+ --set storageClass.provisionerName=k8s-sigs.io/pokprod-nfs-subdir-external-provisioner
9194```
9295Make sure to set the ` nfs.server ` and ` nfs.path ` values to the right values for
9396your environment.
@@ -101,7 +104,8 @@ nfs-client-pokprod k8s-sigs.io/pokprod-nfs-subdir-external-provisioner D
101104OpenShift clusters require an additional configuration step to permit the
102105provisioner pod to mount the storage volume.
103106``` sh
104- oc adm policy add-scc-to-user hostmount-anyuid system:serviceaccount:nfs-provisioner:pokprod-nfs-subdir-external-provisioner
107+ oc adm policy add-scc-to-user hostmount-anyuid \
108+ system:serviceaccount:nfs-provisioner:pokprod-nfs-subdir-external-provisioner
105109```
106110
107111</details >
@@ -122,7 +126,9 @@ cd mlbatch
122126kubectl apply -f setup.k8s/mlbatch-priorities.yaml
123127
124128# Deploy scheduler-plugins
125- helm install scheduler-plugins --namespace scheduler-plugins --create-namespace scheduler-plugins/manifests/install/charts/as-a-second-scheduler/ --set-json pluginConfig=' [{"args":{"scoringStrategy":{"resources":[{"name":"nvidia.com/GPU","weight":1}],"requestedToCapacityRatio":{"shape":[{"utilization":0,"score":0},{"utilization":100,"score":10}]},"type":"RequestedToCapacityRatio"}},"name":"NodeResourcesFit"},{"args":{"permitWaitingTimeSeconds":300},"name":"Coscheduling"}]'
129+ helm install scheduler-plugins -n scheduler-plugins --create-namespace \
130+ scheduler-plugins/manifests/install/charts/as-a-second-scheduler/ \
131+ --set-json pluginConfig=' [{"args":{"scoringStrategy":{"resources":[{"name":"nvidia.com/GPU","weight":1}],"requestedToCapacityRatio":{"shape":[{"utilization":0,"score":0},{"utilization":100,"score":10}]},"type":"RequestedToCapacityRatio"}},"name":"NodeResourcesFit"},{"args":{"permitWaitingTimeSeconds":300},"name":"Coscheduling"}]'
126132
127133# Wait for scheduler-plugins pods to be ready
128134while [[ $( kubectl get pods -n scheduler-plugins -o ' jsonpath={..status.conditions[?(@.type=="Ready")].status}' | tr ' ' ' \n' | sort -u) != " True" ]]
@@ -132,8 +138,10 @@ done
132138echo " "
133139
134140# Patch scheduler-plugins pod priorities
135- kubectl patch deployment -n scheduler-plugins --type=json --patch-file setup.k8s/scheduler-priority-patch.yaml scheduler-plugins-controller
136- kubectl patch deployment -n scheduler-plugins --type=json --patch-file setup.k8s/scheduler-priority-patch.yaml scheduler-plugins-scheduler
141+ kubectl patch deployment -n scheduler-plugins --type=json \
142+ --patch-file setup.k8s/scheduler-priority-patch.yaml scheduler-plugins-controller
143+ kubectl patch deployment -n scheduler-plugins --type=json \
144+ --patch-file setup.k8s/scheduler-priority-patch.yaml scheduler-plugins-scheduler
137145
138146# Create mlbatch-system namespace
139147kubectl create namespace mlbatch-system
@@ -158,9 +166,10 @@ echo ""
158166kubectl apply --server-side -k setup.k8s/appwrapper/coscheduling
159167
160168# Deploy Autopilot
161- helm repo add autopilot https://ibm.github.io/autopilot/ && helm repo update
169+ helm repo add autopilot https://ibm.github.io/autopilot/
170+ helm repo update
162171
163- helm upgrade --install autopilot -n autopilot autopilot/autopilot --create-namespace
172+ helm upgrade -i autopilot -n autopilot autopilot/autopilot --create-namespace
164173
165174# Create Kueue's default flavor
166175kubectl apply -f setup.k8s/default-flavor.yaml
350359```
351360Then reapply the helm chart, this will start a rollout update.
352361``` sh
353- helm upgrade autopilot autopilot/autopilot --install --namespace= autopilot --create-namespace -f autopilot-extended.yaml
362+ helm upgrade -i autopilot autopilot/autopilot -n autopilot --create-namespace -f autopilot-extended.yaml
354363```
355364
356365</details >
@@ -431,7 +440,7 @@ grafana:
431440 - kubernetes.io/pvc-protection
432441EOF
433442
434- helm upgrade --install kube-prometheus-stack -n prometheus prometheus-community/kube-prometheus-stack --create-namespace -f config.yaml
443+ helm upgrade -i kube-prometheus-stack -n prometheus prometheus-community/kube-prometheus-stack --create-namespace -f config.yaml
435444```
436445
437446If deploying on OpenShift based systems, you need to assign the privileged
@@ -464,11 +473,11 @@ prometheus-kube-prometheus-stack-prometheus-0 2/2 Running 0
464473To access the Grafana dashboard on ` localhost:3000 ` :
465474
466475``` sh
467- kubectl --namespace prometheus get secrets kube-prometheus-stack-grafana -o jsonpath=" {.data.admin-password}" | base64 -d ; echo
476+ kubectl -n prometheus get secrets kube-prometheus-stack-grafana -o jsonpath=" {.data.admin-password}" | base64 -d ; echo
468477```
469478``` sh
470- export POD_NAME=$( kubectl --namespace prometheus get pod -l " app.kubernetes.io/name=grafana,app.kubernetes.io/instance=kube-prometheus-stack" -oname)
471- kubectl --namespace prometheus port-forward $POD_NAME 3000
479+ export POD_NAME=$( kubectl -n prometheus get pod -l " app.kubernetes.io/name=grafana,app.kubernetes.io/instance=kube-prometheus-stack" -oname)
480+ kubectl -n prometheus port-forward $POD_NAME 3000
472481```
473482
474483To import NVidia and Autopilot metrics, from the Grafana dashboard:
0 commit comments