@@ -18,6 +18,7 @@ package node
18
18
19
19
import (
20
20
"context"
21
+ "fmt"
21
22
"os"
22
23
"regexp"
23
24
"time"
@@ -292,12 +293,11 @@ func SetupEnvironmentAndSkipIfNeeded(ctx context.Context, f *framework.Framework
292
293
}
293
294
}
294
295
295
- func areGPUsAvailableOnAllSchedulableNodes (ctx context.Context , clientSet clientset.Interface ) bool {
296
+ func areGPUsAvailableOnAllSchedulableNodes (ctx context.Context , clientSet clientset.Interface ) error {
296
297
framework .Logf ("Getting list of Nodes from API server" )
297
298
nodeList , err := clientSet .CoreV1 ().Nodes ().List (ctx , metav1.ListOptions {})
298
299
if err != nil {
299
- framework .Logf ("Unexpected error getting node list: %v" , err )
300
- return false
300
+ return fmt .Errorf ("unexpected error getting node list: %w" , err )
301
301
}
302
302
for _ , node := range nodeList .Items {
303
303
if node .Spec .Unschedulable {
@@ -308,12 +308,11 @@ func areGPUsAvailableOnAllSchedulableNodes(ctx context.Context, clientSet client
308
308
}
309
309
framework .Logf ("gpuResourceName %s" , e2egpu .NVIDIAGPUResourceName )
310
310
if val , ok := node .Status .Capacity [e2egpu .NVIDIAGPUResourceName ]; ! ok || val .Value () == 0 {
311
- framework .Logf ("Nvidia GPUs not available on Node: %q" , node .Name )
312
- return false
311
+ return fmt .Errorf ("nvidia GPUs not available on Node: %q" , node .Name )
313
312
}
314
313
}
315
314
framework .Logf ("Nvidia GPUs exist on all schedulable nodes" )
316
- return true
315
+ return nil
317
316
}
318
317
319
318
func logOSImages (ctx context.Context , f * framework.Framework ) {
@@ -389,9 +388,9 @@ func waitForGPUs(ctx context.Context, f *framework.Framework, namespace, name st
389
388
390
389
// Wait for Nvidia GPUs to be available on nodes
391
390
framework .Logf ("Waiting for drivers to be installed and GPUs to be available in Node Capacity..." )
392
- gomega .Eventually (ctx , func (ctx context.Context ) bool {
391
+ gomega .Eventually (ctx , func (ctx context.Context ) error {
393
392
return areGPUsAvailableOnAllSchedulableNodes (ctx , f .ClientSet )
394
- }, driverInstallTimeout , time .Second ).Should (gomega .BeTrueBecause ( "expected GPU resources to be available within the timout" ))
393
+ }, driverInstallTimeout , time .Second ).Should (gomega .Succeed ( ))
395
394
}
396
395
397
396
// StartJob starts a simple CUDA job that requests gpu and the specified number of completions
0 commit comments