Skip to content

Commit a097243

Browse files
authored
Merge pull request kubernetes#86062 from haosdent/clean-e2e-framework-gpu
e2e: move funs of framework/gpu to e2e_node
2 parents 33bba19 + 973fddd commit a097243

File tree

3 files changed

+34
-46
lines changed

3 files changed

+34
-46
lines changed

test/e2e/framework/gpu/BUILD

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,6 @@ go_library(
55
srcs = ["gpu_util.go"],
66
importpath = "k8s.io/kubernetes/test/e2e/framework/gpu",
77
visibility = ["//visibility:public"],
8-
deps = [
9-
"//staging/src/k8s.io/api/core/v1:go_default_library",
10-
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
11-
"//staging/src/k8s.io/apimachinery/pkg/util/uuid:go_default_library",
12-
"//test/e2e/framework:go_default_library",
13-
],
148
)
159

1610
filegroup(

test/e2e/framework/gpu/gpu_util.go

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,6 @@ limitations under the License.
1616

1717
package gpu
1818

19-
import (
20-
v1 "k8s.io/api/core/v1"
21-
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
22-
"k8s.io/apimachinery/pkg/util/uuid"
23-
"k8s.io/kubernetes/test/e2e/framework"
24-
)
25-
2619
const (
2720
// NVIDIAGPUResourceName is the extended name of the GPU resource since v1.8
2821
// this uses the device plugin mechanism
@@ -33,31 +26,3 @@ const (
3326
// so we can override the daemonset in other setups (non COS).
3427
GPUDevicePluginDSYAML = "https://raw.githubusercontent.com/kubernetes/kubernetes/master/cluster/addons/device-plugins/nvidia-gpu/daemonset.yaml"
3528
)
36-
37-
// NumberOfNVIDIAGPUs returns the number of GPUs advertised by a node
38-
// This is based on the Device Plugin system and expected to run on a COS based node
39-
// After the NVIDIA drivers were installed
40-
// TODO make this generic and not linked to COS only
41-
func NumberOfNVIDIAGPUs(node *v1.Node) int64 {
42-
val, ok := node.Status.Capacity[NVIDIAGPUResourceName]
43-
if !ok {
44-
return 0
45-
}
46-
return val.Value()
47-
}
48-
49-
// NVIDIADevicePlugin returns the official Google Device Plugin pod for NVIDIA GPU in GKE
50-
func NVIDIADevicePlugin() *v1.Pod {
51-
ds, err := framework.DsFromManifest(GPUDevicePluginDSYAML)
52-
framework.ExpectNoError(err)
53-
p := &v1.Pod{
54-
ObjectMeta: metav1.ObjectMeta{
55-
Name: "device-plugin-nvidia-gpu-" + string(uuid.NewUUID()),
56-
Namespace: metav1.NamespaceSystem,
57-
},
58-
Spec: ds.Spec.Template.Spec,
59-
}
60-
// Remove node affinity
61-
p.Spec.Affinity = nil
62-
return p
63-
}

test/e2e_node/gpu_device_plugin_test.go

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323

2424
v1 "k8s.io/api/core/v1"
2525
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
26+
"k8s.io/apimachinery/pkg/util/uuid"
2627
kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
2728
"k8s.io/kubernetes/test/e2e/framework"
2829
"k8s.io/kubernetes/test/e2e/framework/gpu"
@@ -33,6 +34,34 @@ import (
3334
"github.com/prometheus/common/model"
3435
)
3536

37+
// numberOfNVIDIAGPUs returns the number of GPUs advertised by a node
38+
// This is based on the Device Plugin system and expected to run on a COS based node
39+
// After the NVIDIA drivers were installed
40+
// TODO make this generic and not linked to COS only
41+
func numberOfNVIDIAGPUs(node *v1.Node) int64 {
42+
val, ok := node.Status.Capacity[gpu.NVIDIAGPUResourceName]
43+
if !ok {
44+
return 0
45+
}
46+
return val.Value()
47+
}
48+
49+
// NVIDIADevicePlugin returns the official Google Device Plugin pod for NVIDIA GPU in GKE
50+
func NVIDIADevicePlugin() *v1.Pod {
51+
ds, err := framework.DsFromManifest(gpu.GPUDevicePluginDSYAML)
52+
framework.ExpectNoError(err)
53+
p := &v1.Pod{
54+
ObjectMeta: metav1.ObjectMeta{
55+
Name: "device-plugin-nvidia-gpu-" + string(uuid.NewUUID()),
56+
Namespace: metav1.NamespaceSystem,
57+
},
58+
Spec: ds.Spec.Template.Spec,
59+
}
60+
// Remove node affinity
61+
p.Spec.Affinity = nil
62+
return p
63+
}
64+
3665
// Serial because the test restarts Kubelet
3766
var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugin][NodeFeature:GPUDevicePlugin][Serial] [Disruptive]", func() {
3867
f := framework.NewDefaultFramework("device-plugin-gpus-errors")
@@ -47,15 +76,15 @@ var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugi
4776
}
4877

4978
ginkgo.By("Creating the Google Device Plugin pod for NVIDIA GPU in GKE")
50-
devicePluginPod, err = f.ClientSet.CoreV1().Pods(metav1.NamespaceSystem).Create(gpu.NVIDIADevicePlugin())
79+
devicePluginPod, err = f.ClientSet.CoreV1().Pods(metav1.NamespaceSystem).Create(NVIDIADevicePlugin())
5180
framework.ExpectNoError(err)
5281

5382
ginkgo.By("Waiting for GPUs to become available on the local node")
5483
gomega.Eventually(func() bool {
55-
return gpu.NumberOfNVIDIAGPUs(getLocalNode(f)) > 0
84+
return numberOfNVIDIAGPUs(getLocalNode(f)) > 0
5685
}, 5*time.Minute, framework.Poll).Should(gomega.BeTrue())
5786

58-
if gpu.NumberOfNVIDIAGPUs(getLocalNode(f)) < 2 {
87+
if numberOfNVIDIAGPUs(getLocalNode(f)) < 2 {
5988
ginkgo.Skip("Not enough GPUs to execute this test (at least two needed)")
6089
}
6190
})
@@ -95,7 +124,7 @@ var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugi
95124
restartKubelet()
96125
framework.WaitForAllNodesSchedulable(f.ClientSet, framework.TestContext.NodeSchedulableTimeout)
97126
gomega.Eventually(func() bool {
98-
return gpu.NumberOfNVIDIAGPUs(getLocalNode(f)) > 0
127+
return numberOfNVIDIAGPUs(getLocalNode(f)) > 0
99128
}, 5*time.Minute, framework.Poll).Should(gomega.BeTrue())
100129
p2 := f.PodClient().CreateSync(makeBusyboxPod(gpu.NVIDIAGPUResourceName, podRECMD))
101130

@@ -110,7 +139,7 @@ var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugi
110139
gomega.Eventually(func() bool {
111140
node, err := f.ClientSet.CoreV1().Nodes().Get(framework.TestContext.NodeName, metav1.GetOptions{})
112141
framework.ExpectNoError(err)
113-
return gpu.NumberOfNVIDIAGPUs(node) <= 0
142+
return numberOfNVIDIAGPUs(node) <= 0
114143
}, 10*time.Minute, framework.Poll).Should(gomega.BeTrue())
115144
ginkgo.By("Checking that scheduled pods can continue to run even after we delete device plugin.")
116145
ensurePodContainerRestart(f, p1.Name, p1.Name)

0 commit comments

Comments
 (0)