You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# this installs the NVIDIA device plugin: https://learn.microsoft.com/en-us/azure/aks/gpu-cluster?tabs=add-ubuntu-gpu-node-pool#manually-install-the-nvidia-device-plugin
2
+
# kubectl create namespace gpu-resources
3
+
# kubectl apply -f nvidia-device-plugin-ds.yaml
4
+
5
+
apiVersion: apps/v1
6
+
kind: DaemonSet
7
+
metadata:
8
+
name: nvidia-device-plugin-daemonset
9
+
namespace: kube-system
10
+
spec:
11
+
selector:
12
+
matchLabels:
13
+
name: nvidia-device-plugin-ds
14
+
updateStrategy:
15
+
type: RollingUpdate
16
+
template:
17
+
metadata:
18
+
labels:
19
+
name: nvidia-device-plugin-ds
20
+
spec:
21
+
nodeSelector:
22
+
gpuEnabled: "true"
23
+
tolerations:
24
+
- effect: NoSchedule
25
+
key: kubernetes.azure.com/scalesetpriority
26
+
operator: Equal
27
+
value: spot
28
+
# Mark this pod as a critical add-on; when enabled, the critical add-on
29
+
# scheduler reserves resources for critical add-on pods so that they can
30
+
# be rescheduled after a failure.
31
+
# See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
RUN wget -qO nerdctl.tar.gz "https://github.com/containerd/nerdctl/releases/download/v1.7.6/nerdctl-1.7.6-linux-amd64.tar.gz" \
5
+
&& tar Cxzvvf /usr/local nerdctl.tar.gz \
6
+
&& rm nerdctl.tar.gz
7
+
8
+
# for i in $(nerdctl -a /host/run/containerd/containerd.sock -n k8s.io container ls --format "{{.ID}}"); do nerdctl -a /host/run/containerd/containerd.sock -n k8s.io inspect -f '{{.State.Pid}} {{index .Config.Labels "io.kubernetes.pod.name"}}' $i; done | grep gooey-gpu
0 commit comments