Skip to content

Commit 349c713

Browse files
committed
Wait for GPUs even for AWS kubetest2 ec2 harness
Signed-off-by: Davanum Srinivas <[email protected]>
1 parent 5973acc commit 349c713

File tree

1 file changed

+11
-3
lines changed

1 file changed

+11
-3
lines changed

test/e2e/node/gpu.go

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,11 @@ print(f"Time taken for {n}x{n} matrix multiplication: {end_time - start_time:.2f
263263
func SetupEnvironmentAndSkipIfNeeded(ctx context.Context, f *framework.Framework, clientSet clientset.Interface) {
264264
if framework.ProviderIs("gce") {
265265
SetupNVIDIAGPUNode(ctx, f)
266+
} else if framework.ProviderIs("aws") {
267+
// see nvidia-device-plugin.yml in https://github.com/NVIDIA/k8s-device-plugin/tree/main/deployments/static
268+
waitForGPUs(ctx, f, "kube-system", "nvidia-device-plugin-daemonset")
266269
}
270+
267271
nodes, err := e2enode.GetReadySchedulableNodes(ctx, clientSet)
268272
framework.ExpectNoError(err)
269273
capacity := 0
@@ -281,10 +285,10 @@ func SetupEnvironmentAndSkipIfNeeded(ctx context.Context, f *framework.Framework
281285
allocatable += int(val.Value())
282286
}
283287
if capacity == 0 {
284-
e2eskipper.Skipf("%d ready nodes do not have any Nvidia GPU(s). Skipping...", len(nodes.Items))
288+
framework.Failf("%d ready nodes do not have any Nvidia GPU(s). Bailing out...", len(nodes.Items))
285289
}
286290
if allocatable == 0 {
287-
e2eskipper.Skipf("%d ready nodes do not have any allocatable Nvidia GPU(s). Skipping...", len(nodes.Items))
291+
framework.Failf("%d ready nodes do not have any allocatable Nvidia GPU(s). Bailing out...", len(nodes.Items))
288292
}
289293
}
290294

@@ -351,7 +355,11 @@ func SetupNVIDIAGPUNode(ctx context.Context, f *framework.Framework) {
351355
framework.ExpectNoError(err, "failed to create nvidia-driver-installer daemonset")
352356
framework.Logf("Successfully created daemonset to install Nvidia drivers.")
353357

354-
pods, err := e2eresource.WaitForControlledPods(ctx, f.ClientSet, ds.Namespace, ds.Name, extensionsinternal.Kind("DaemonSet"))
358+
waitForGPUs(ctx, f, ds.Namespace, ds.Name)
359+
}
360+
361+
func waitForGPUs(ctx context.Context, f *framework.Framework, namespace, name string) {
362+
pods, err := e2eresource.WaitForControlledPods(ctx, f.ClientSet, namespace, name, extensionsinternal.Kind("DaemonSet"))
355363
framework.ExpectNoError(err, "failed to get pods controlled by the nvidia-driver-installer daemonset")
356364

357365
devicepluginPods, err := e2eresource.WaitForControlledPods(ctx, f.ClientSet, "kube-system", "nvidia-gpu-device-plugin", extensionsinternal.Kind("DaemonSet"))

0 commit comments

Comments
 (0)