Skip to content

Commit 84fc0ef

Browse files
committed
Add example deployment for MIG config support
Signed-off-by: Michail Resvanis <mresvani@redhat.com>
1 parent fd143dc commit 84fc0ef

File tree

2 files changed

+120
-1
lines changed

2 files changed

+120
-1
lines changed

deployments/container/Dockerfile.ubi9

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,15 @@ ARG VERSION="N/A"
3838
ARG GIT_COMMIT="unknown"
3939
RUN make PREFIX=/artifacts cmds
4040

41+
FROM nvcr.io/nvidia/cloud-native/k8s-mig-manager:v0.12.1-ubi9 as mig-manager
4142

4243
FROM nvcr.io/nvidia/distroless/go:v3.1.8
4344

4445
ENV NVIDIA_VISIBLE_DEVICES=void
4546

4647
COPY --from=build /artifacts/nvidia-vgpu-dm /usr/bin/nvidia-vgpu-dm
4748
COPY --from=build /artifacts/nvidia-k8s-vgpu-dm /usr/bin/nvidia-k8s-vgpu-dm
48-
COPY --from=nvcr.io/nvidia/cloud-native/k8s-mig-manager:v0.12.1-ubi9 /usr/bin/nvidia-mig-parted /usr/bin/nvidia-mig-parted
49+
COPY --from=mig-manager /usr/bin/nvidia-mig-parted /usr/bin/nvidia-mig-parted
4950

5051
LABEL version="${VERSION}"
5152
LABEL release="N/A"
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
---
2+
apiVersion: apps/v1
3+
kind: DaemonSet
4+
metadata:
5+
name: vgpu-device-manager
6+
namespace: default
7+
labels:
8+
app: vgpu-device-manager
9+
spec:
10+
selector:
11+
matchLabels:
12+
app: vgpu-device-manager
13+
template:
14+
metadata:
15+
labels:
16+
app: vgpu-device-manager
17+
spec:
18+
serviceAccount: vgpu-device-manager
19+
serviceAccountName: vgpu-device-manager
20+
containers:
21+
- name: vgpu-device-manager
22+
image: nvcr.io/nvidia/cloud-native/vgpu-device-manager:v0.3.0
23+
imagePullPolicy: IfNotPresent
24+
env:
25+
- name: NAMESPACE
26+
value: "default"
27+
- name: NODE_NAME
28+
valueFrom:
29+
fieldRef:
30+
fieldPath: spec.nodeName
31+
- name: CONFIG_FILE
32+
value: "/vgpu-devices-config/config.yaml"
33+
- name: DEFAULT_VGPU_CONFIG
34+
value: "default"
35+
- name: GPU_CLIENTS_FILE
36+
value: "/gpu-clients/clients.yaml"
37+
- name: LD_PRELOAD
38+
value: "/driver-root/usr/lib64/libnvidia-ml.so.1"
39+
securityContext:
40+
privileged: true
41+
volumeMounts:
42+
- mountPath: /vgpu-devices-config
43+
name: vgpu-devices-config
44+
- mountPath: /gpu-clients
45+
name: gpu-clients
46+
- name: driver-install-dir
47+
mountPath: /driver-root
48+
mountPropagation: HostToContainer
49+
- mountPath: /sys
50+
name: host-sys
51+
- mountPath: /host
52+
name: host-root
53+
54+
volumes:
55+
- name: vgpu-devices-config
56+
configMap:
57+
name: vgpu-devices-config
58+
- name: gpu-clients
59+
configMap:
60+
name: gpu-clients
61+
- name: driver-install-dir
62+
hostPath:
63+
path: "/run/nvidia/driver"
64+
type: DirectoryOrCreate
65+
- name: host-sys
66+
hostPath:
67+
path: /sys
68+
type: Directory
69+
- name: host-root
70+
hostPath:
71+
path: /
72+
type: Directory
73+
74+
---
75+
apiVersion: v1
76+
kind: ServiceAccount
77+
metadata:
78+
name: vgpu-device-manager
79+
namespace: default
80+
81+
---
82+
apiVersion: rbac.authorization.k8s.io/v1
83+
kind: ClusterRole
84+
metadata:
85+
name: vgpu-device-manager
86+
rules:
87+
- apiGroups:
88+
- config.openshift.io
89+
resources:
90+
- clusterversions
91+
verbs:
92+
- get
93+
- list
94+
- apiGroups:
95+
- ""
96+
resources:
97+
- nodes
98+
- pods
99+
- pods/eviction
100+
verbs:
101+
- get
102+
- list
103+
- watch
104+
- update
105+
106+
---
107+
apiVersion: rbac.authorization.k8s.io/v1
108+
kind: ClusterRoleBinding
109+
metadata:
110+
name: vgpu-device-manager
111+
roleRef:
112+
apiGroup: rbac.authorization.k8s.io
113+
kind: ClusterRole
114+
name: vgpu-device-manager
115+
subjects:
116+
- kind: ServiceAccount
117+
name: vgpu-device-manager
118+
namespace: default

0 commit comments

Comments
 (0)