Skip to content

Commit 51c55ab

Browse files
authored
Merge pull request #1061 from elezar/main-fix-fail-on-init-error
Honor fail-on-init-error when no resources are found
2 parents d5c69f2 + bde27cd commit 51c55ab

File tree

2 files changed

+22
-10
lines changed

2 files changed

+22
-10
lines changed

cmd/gpu-feature-discovery/main.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,11 @@ func start(c *cli.Context, cfg *Config) error {
186186
nvinfo.WithDeviceLib(devicelib),
187187
)
188188

189-
manager := resource.NewManager(infolib, nvmllib, devicelib, config)
189+
manager, err := resource.NewManager(infolib, nvmllib, devicelib, config)
190+
if err != nil {
191+
return fmt.Errorf("failed to create resource manager: %w", err)
192+
193+
}
190194
vgpul := vgpu.NewVGPULib(vgpu.NewNvidiaPCILib())
191195

192196
var clientSets flags.ClientSets

internal/resource/factory.go

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
package resource
1818

1919
import (
20+
"fmt"
21+
2022
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
2123
"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
2224
"github.com/NVIDIA/go-nvml/pkg/nvml"
@@ -26,9 +28,16 @@ import (
2628
)
2729

2830
// NewManager is a factory method that creates a resource Manager based on the specified config.
29-
func NewManager(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interface, config *spec.Config) Manager {
30-
manager := getManager(infolib, nvmllib, devicelib, *config.Flags.DeviceDiscoveryStrategy)
31-
return WithConfig(manager, config)
31+
func NewManager(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interface, config *spec.Config) (Manager, error) {
32+
manager, err := getManager(infolib, nvmllib, devicelib, *config.Flags.DeviceDiscoveryStrategy)
33+
if err != nil {
34+
if *config.Flags.FailOnInitError {
35+
return nil, err
36+
}
37+
klog.ErrorS(err, "using empty manager")
38+
return NewNullManager(), nil
39+
}
40+
return WithConfig(manager, config), nil
3241
}
3342

3443
// WithConfig modifies a manager depending on the specified config.
@@ -42,21 +51,20 @@ func WithConfig(manager Manager, config *spec.Config) Manager {
4251
}
4352

4453
// getManager returns the resource manager depending on the system configuration.
45-
func getManager(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interface, strategy string) Manager {
54+
func getManager(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interface, strategy string) (Manager, error) {
4655
resolved := resolveMode(infolib, strategy)
4756
switch resolved {
4857
case "nvml":
4958
klog.Info("Using NVML manager")
50-
return NewNVMLManager(nvmllib, devicelib)
59+
return NewNVMLManager(nvmllib, devicelib), nil
5160
case "tegra":
5261
klog.Info("Using CUDA manager")
53-
return NewCudaManager()
62+
return NewCudaManager(), nil
5463
case "vfio":
5564
klog.Info("Using Vfio manager")
56-
return NewVfioManager()
65+
return NewVfioManager(), nil
5766
default:
58-
klog.Warningf("Unsupported strategy detected: %v using empty manager.", resolved)
59-
return NewNullManager()
67+
return nil, fmt.Errorf("unsupported strategy %v", resolved)
6068
}
6169
}
6270

0 commit comments

Comments
 (0)