Skip to content

Commit 5feab0a

Browse files
authored
Merge pull request kubernetes#93207 from hasheddan/nvidia-gpu-installer
Use local daemonset manifest for installing Nvidia drivers
2 parents c237804 + e990698 commit 5feab0a

File tree

3 files changed

+95
-8
lines changed

3 files changed

+95
-8
lines changed

test/e2e/scheduling/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ go_library(
5555
"//test/e2e/framework/resource:go_default_library",
5656
"//test/e2e/framework/service:go_default_library",
5757
"//test/e2e/framework/skipper:go_default_library",
58+
"//test/e2e/framework/testfiles:go_default_library",
5859
"//test/utils:go_default_library",
5960
"//test/utils/image:go_default_library",
6061
"//vendor/github.com/onsi/ginkgo:go_default_library",

test/e2e/scheduling/nvidia-gpus.go

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"regexp"
2323
"time"
2424

25+
appsv1 "k8s.io/api/apps/v1"
2526
v1 "k8s.io/api/core/v1"
2627
"k8s.io/apimachinery/pkg/api/resource"
2728
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -36,6 +37,7 @@ import (
3637
"k8s.io/kubernetes/test/e2e/framework/providers/gce"
3738
e2eresource "k8s.io/kubernetes/test/e2e/framework/resource"
3839
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
40+
e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles"
3941
imageutils "k8s.io/kubernetes/test/utils/image"
4042

4143
"github.com/onsi/ginkgo"
@@ -50,7 +52,6 @@ const (
5052

5153
var (
5254
gpuResourceName v1.ResourceName
53-
dsYamlURL string
5455
)
5556

5657
func makeCudaAdditionDevicePluginTestPod() *v1.Pod {
@@ -128,18 +129,23 @@ func getGPUsAvailable(f *framework.Framework) int64 {
128129
func SetupNVIDIAGPUNode(f *framework.Framework, setupResourceGatherer bool) *framework.ContainerResourceGatherer {
129130
logOSImages(f)
130131

132+
var err error
133+
var ds *appsv1.DaemonSet
131134
dsYamlURLFromEnv := os.Getenv("NVIDIA_DRIVER_INSTALLER_DAEMONSET")
132135
if dsYamlURLFromEnv != "" {
133-
dsYamlURL = dsYamlURLFromEnv
136+
// Using DaemonSet from remote URL
137+
framework.Logf("Using remote nvidia-driver-installer daemonset manifest from %v", dsYamlURLFromEnv)
138+
ds, err = e2emanifest.DaemonSetFromURL(dsYamlURLFromEnv)
139+
framework.ExpectNoError(err, "failed get remote")
134140
} else {
135-
dsYamlURL = "https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/daemonset.yaml"
141+
// Using default local DaemonSet
142+
framework.Logf("Using default local nvidia-driver-installer daemonset manifest.")
143+
data, err := e2etestfiles.Read("test/e2e/testing-manifests/scheduling/nvidia-driver-installer.yaml")
144+
framework.ExpectNoError(err, "failed to read local manifest for nvidia-driver-installer daemonset")
145+
ds, err = e2emanifest.DaemonSetFromData(data)
146+
framework.ExpectNoError(err, "failed to parse local manifest for nvidia-driver-installer daemonset")
136147
}
137148
gpuResourceName = e2egpu.NVIDIAGPUResourceName
138-
139-
framework.Logf("Using %v", dsYamlURL)
140-
// Creates the DaemonSet that installs Nvidia Drivers.
141-
ds, err := e2emanifest.DaemonSetFromURL(dsYamlURL)
142-
framework.ExpectNoError(err)
143149
ds.Namespace = f.Namespace.Name
144150
_, err = f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).Create(context.TODO(), ds, metav1.CreateOptions{})
145151
framework.ExpectNoError(err, "failed to create nvidia-driver-installer daemonset")
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# This DaemonSet was originally referenced from
2+
# https://github.com/GoogleCloudPlatform/container-engine-accelerators/blob/master/daemonset.yaml
3+
4+
# The Dockerfile and other source for this daemonset are in
5+
# https://github.com/GoogleCloudPlatform/cos-gpu-installer
6+
7+
apiVersion: apps/v1
8+
kind: DaemonSet
9+
metadata:
10+
name: nvidia-driver-installer
11+
namespace: kube-system
12+
labels:
13+
k8s-app: nvidia-driver-installer
14+
spec:
15+
selector:
16+
matchLabels:
17+
k8s-app: nvidia-driver-installer
18+
updateStrategy:
19+
type: RollingUpdate
20+
template:
21+
metadata:
22+
labels:
23+
name: nvidia-driver-installer
24+
k8s-app: nvidia-driver-installer
25+
spec:
26+
affinity:
27+
nodeAffinity:
28+
requiredDuringSchedulingIgnoredDuringExecution:
29+
nodeSelectorTerms:
30+
- matchExpressions:
31+
- key: cloud.google.com/gke-accelerator
32+
operator: Exists
33+
tolerations:
34+
- operator: "Exists"
35+
hostNetwork: true
36+
hostPID: true
37+
volumes:
38+
- name: dev
39+
hostPath:
40+
path: /dev
41+
- name: vulkan-icd-mount
42+
hostPath:
43+
path: /home/kubernetes/bin/nvidia/vulkan/icd.d
44+
- name: nvidia-install-dir-host
45+
hostPath:
46+
path: /home/kubernetes/bin/nvidia
47+
- name: root-mount
48+
hostPath:
49+
path: /
50+
initContainers:
51+
- image: gcr.io/cos-cloud/cos-gpu-installer:v20200701
52+
name: nvidia-driver-installer
53+
resources:
54+
requests:
55+
cpu: 0.15
56+
securityContext:
57+
privileged: true
58+
env:
59+
- name: NVIDIA_INSTALL_DIR_HOST
60+
value: /home/kubernetes/bin/nvidia
61+
- name: NVIDIA_INSTALL_DIR_CONTAINER
62+
value: /usr/local/nvidia
63+
- name: VULKAN_ICD_DIR_HOST
64+
value: /home/kubernetes/bin/nvidia/vulkan/icd.d
65+
- name: VULKAN_ICD_DIR_CONTAINER
66+
value: /etc/vulkan/icd.d
67+
- name: ROOT_MOUNT_DIR
68+
value: /root
69+
volumeMounts:
70+
- name: nvidia-install-dir-host
71+
mountPath: /usr/local/nvidia
72+
- name: vulkan-icd-mount
73+
mountPath: /etc/vulkan/icd.d
74+
- name: dev
75+
mountPath: /dev
76+
- name: root-mount
77+
mountPath: /root
78+
containers:
79+
- image: "gcr.io/google-containers/pause:3.2"
80+
name: pause

0 commit comments

Comments
 (0)