@@ -263,7 +263,11 @@ print(f"Time taken for {n}x{n} matrix multiplication: {end_time - start_time:.2f
263
263
func SetupEnvironmentAndSkipIfNeeded (ctx context.Context , f * framework.Framework , clientSet clientset.Interface ) {
264
264
if framework .ProviderIs ("gce" ) {
265
265
SetupNVIDIAGPUNode (ctx , f )
266
+ } else if framework .ProviderIs ("aws" ) {
267
+ // see nvidia-device-plugin.yml in https://github.com/NVIDIA/k8s-device-plugin/tree/main/deployments/static
268
+ waitForGPUs (ctx , f , "kube-system" , "nvidia-device-plugin-daemonset" )
266
269
}
270
+
267
271
nodes , err := e2enode .GetReadySchedulableNodes (ctx , clientSet )
268
272
framework .ExpectNoError (err )
269
273
capacity := 0
@@ -281,10 +285,10 @@ func SetupEnvironmentAndSkipIfNeeded(ctx context.Context, f *framework.Framework
281
285
allocatable += int (val .Value ())
282
286
}
283
287
if capacity == 0 {
284
- e2eskipper . Skipf ("%d ready nodes do not have any Nvidia GPU(s). Skipping ..." , len (nodes .Items ))
288
+ framework . Failf ("%d ready nodes do not have any Nvidia GPU(s). Bailing out ..." , len (nodes .Items ))
285
289
}
286
290
if allocatable == 0 {
287
- e2eskipper . Skipf ("%d ready nodes do not have any allocatable Nvidia GPU(s). Skipping ..." , len (nodes .Items ))
291
+ framework . Failf ("%d ready nodes do not have any allocatable Nvidia GPU(s). Bailing out ..." , len (nodes .Items ))
288
292
}
289
293
}
290
294
@@ -296,6 +300,9 @@ func areGPUsAvailableOnAllSchedulableNodes(ctx context.Context, clientSet client
296
300
if node .Spec .Unschedulable {
297
301
continue
298
302
}
303
+ if _ , ok := node .Labels [framework .ControlPlaneLabel ]; ok {
304
+ continue
305
+ }
299
306
framework .Logf ("gpuResourceName %s" , e2egpu .NVIDIAGPUResourceName )
300
307
if val , ok := node .Status .Capacity [e2egpu .NVIDIAGPUResourceName ]; ! ok || val .Value () == 0 {
301
308
framework .Logf ("Nvidia GPUs not available on Node: %q" , node .Name )
@@ -351,7 +358,11 @@ func SetupNVIDIAGPUNode(ctx context.Context, f *framework.Framework) {
351
358
framework .ExpectNoError (err , "failed to create nvidia-driver-installer daemonset" )
352
359
framework .Logf ("Successfully created daemonset to install Nvidia drivers." )
353
360
354
- pods , err := e2eresource .WaitForControlledPods (ctx , f .ClientSet , ds .Namespace , ds .Name , extensionsinternal .Kind ("DaemonSet" ))
361
+ waitForGPUs (ctx , f , ds .Namespace , ds .Name )
362
+ }
363
+
364
+ func waitForGPUs (ctx context.Context , f * framework.Framework , namespace , name string ) {
365
+ pods , err := e2eresource .WaitForControlledPods (ctx , f .ClientSet , namespace , name , extensionsinternal .Kind ("DaemonSet" ))
355
366
framework .ExpectNoError (err , "failed to get pods controlled by the nvidia-driver-installer daemonset" )
356
367
357
368
devicepluginPods , err := e2eresource .WaitForControlledPods (ctx , f .ClientSet , "kube-system" , "nvidia-gpu-device-plugin" , extensionsinternal .Kind ("DaemonSet" ))
0 commit comments