@@ -206,54 +206,12 @@ function wait_for_appwrapper_controller {
206206
207207function add_virtual_GPUs {
208208 # Patch nodes to provide GPUs resources without physical GPUs.
209- # This is intended to allow testing of our autopilot integration.
210-
211- # Start communication with cluster
212- kubectl proxy --port=0 > .port.dat 2>&1 &
213- proxy_pid=$!
214-
215- echo " Starting background proxy connection (pid=${proxy_pid} )..."
216- echo " Waiting for proxy process to start."
217- sleep 5
218-
219- kube_proxy_port=$( cat .port.dat | awk ' {split($5, substrings, ":"); print substrings[2]}' )
220- curl -s 127.0.0.1:${kube_proxy_port} > /dev/null 2>&1
221-
222- if [[ ! $? -eq 0 ]]; then
223- echo " Calling 'kubectl proxy' did not create a successful connection to the kubelet needed to patch the nodes. Exiting."
224- kill -9 ${proxy_pid}
225- exit 1
226- else
227- echo " Connected to the kubelet for patching the nodes. Using port ${kube_proxy_port} ."
228- fi
229-
230- rm .port.dat
231-
232- # Variables
233- resource_name=" nvidia.com~1gpu"
234- resource_count=" 8"
235-
236- # Patch nodes
209+ # This enables testing of our autopilot integration.
210+ echo " Adding virtual GPUs to all nodes"
237211 for node_name in $( kubectl get nodes --no-headers -o custom-columns=" :metadata.name" )
238212 do
239- echo " - Patching node (add): ${node_name} "
240-
241- patching_status=$( curl -s --header " Content-Type: application/json-patch+json" \
242- --request PATCH \
243- --data ' [{"op": "add", "path": "/status/capacity/' ${resource_name} ' ", "value": "' ${resource_count} ' "}]' \
244- http://localhost:${kube_proxy_port} /api/v1/nodes/${node_name} /status | jq -r ' .status' )
245-
246- if [[ ${patching_status} == " Failure" ]]; then
247- echo " Failed to patch node '${node_name} ' with GPU resources"
248- exit 1
249- fi
250-
251- echo " Patching done!"
213+ kubectl patch node $node_name --subresource=status --type=json -p=' [{"op":"add","path":"/status/capacity/nvidia.com~1gpu","value":"8"}]'
252214 done
253-
254- # Stop communication with cluster
255- echo " Killing proxy (pid=${proxy_pid} )..."
256- kill -9 ${proxy_pid}
257215}
258216
259217# clean up
0 commit comments