@@ -18,6 +18,7 @@ package node
18
18
19
19
import (
20
20
"context"
21
+ "fmt"
21
22
"os"
22
23
"regexp"
23
24
"time"
@@ -292,10 +293,12 @@ func SetupEnvironmentAndSkipIfNeeded(ctx context.Context, f *framework.Framework
292
293
}
293
294
}
294
295
295
- func areGPUsAvailableOnAllSchedulableNodes (ctx context.Context , clientSet clientset.Interface ) bool {
296
+ func areGPUsAvailableOnAllSchedulableNodes (ctx context.Context , clientSet clientset.Interface ) error {
296
297
framework .Logf ("Getting list of Nodes from API server" )
297
298
nodeList , err := clientSet .CoreV1 ().Nodes ().List (ctx , metav1.ListOptions {})
298
- framework .ExpectNoError (err , "getting node list" )
299
+ if err != nil {
300
+ return fmt .Errorf ("unexpected error getting node list: %w" , err )
301
+ }
299
302
for _ , node := range nodeList .Items {
300
303
if node .Spec .Unschedulable {
301
304
continue
@@ -305,12 +308,11 @@ func areGPUsAvailableOnAllSchedulableNodes(ctx context.Context, clientSet client
305
308
}
306
309
framework .Logf ("gpuResourceName %s" , e2egpu .NVIDIAGPUResourceName )
307
310
if val , ok := node .Status .Capacity [e2egpu .NVIDIAGPUResourceName ]; ! ok || val .Value () == 0 {
308
- framework .Logf ("Nvidia GPUs not available on Node: %q" , node .Name )
309
- return false
311
+ return fmt .Errorf ("nvidia GPUs not available on Node: %q" , node .Name )
310
312
}
311
313
}
312
314
framework .Logf ("Nvidia GPUs exist on all schedulable nodes" )
313
- return true
315
+ return nil
314
316
}
315
317
316
318
func logOSImages (ctx context.Context , f * framework.Framework ) {
@@ -386,9 +388,9 @@ func waitForGPUs(ctx context.Context, f *framework.Framework, namespace, name st
386
388
387
389
// Wait for Nvidia GPUs to be available on nodes
388
390
framework .Logf ("Waiting for drivers to be installed and GPUs to be available in Node Capacity..." )
389
- gomega .Eventually (ctx , func (ctx context.Context ) bool {
391
+ gomega .Eventually (ctx , func (ctx context.Context ) error {
390
392
return areGPUsAvailableOnAllSchedulableNodes (ctx , f .ClientSet )
391
- }, driverInstallTimeout , time .Second ).Should (gomega .BeTrueBecause ( "expected GPU resources to be available within the timout" ))
393
+ }, driverInstallTimeout , time .Second ).Should (gomega .Succeed ( ))
392
394
}
393
395
394
396
// StartJob starts a simple CUDA job that requests gpu and the specified number of completions
0 commit comments