Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion cmd/gpu-feature-discovery/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,11 @@ func start(c *cli.Context, cfg *Config) error {
nvinfo.WithDeviceLib(devicelib),
)

manager := resource.NewManager(infolib, nvmllib, devicelib, config)
manager, err := resource.NewManager(infolib, nvmllib, devicelib, config)
if err != nil {
return fmt.Errorf("failed to create resource manager: %w", err)

}
vgpul := vgpu.NewVGPULib(vgpu.NewNvidiaPCILib())

var clientSets flags.ClientSets
Expand Down
26 changes: 17 additions & 9 deletions internal/resource/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
package resource

import (
"fmt"

"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
"github.com/NVIDIA/go-nvml/pkg/nvml"
Expand All @@ -26,9 +28,16 @@ import (
)

// NewManager is a factory method that creates a resource Manager based on the specified config.
func NewManager(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interface, config *spec.Config) Manager {
manager := getManager(infolib, nvmllib, devicelib, *config.Flags.DeviceDiscoveryStrategy)
return WithConfig(manager, config)
func NewManager(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interface, config *spec.Config) (Manager, error) {
manager, err := getManager(infolib, nvmllib, devicelib, *config.Flags.DeviceDiscoveryStrategy)
if err != nil {
if *config.Flags.FailOnInitError {
return nil, err
}
klog.ErrorS(err, "using empty manager")
return NewNullManager(), nil
}
return WithConfig(manager, config), nil
}

// WithConfig modifies a manager depending on the specified config.
Expand All @@ -42,21 +51,20 @@ func WithConfig(manager Manager, config *spec.Config) Manager {
}

// getManager returns the resource manager depending on the system configuration.
func getManager(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interface, strategy string) Manager {
func getManager(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interface, strategy string) (Manager, error) {
resolved := resolveMode(infolib, strategy)
switch resolved {
case "nvml":
klog.Info("Using NVML manager")
return NewNVMLManager(nvmllib, devicelib)
return NewNVMLManager(nvmllib, devicelib), nil
case "tegra":
klog.Info("Using CUDA manager")
return NewCudaManager()
return NewCudaManager(), nil
case "vfio":
klog.Info("Using Vfio manager")
return NewVfioManager()
return NewVfioManager(), nil
default:
klog.Warningf("Unsupported strategy detected: %v using empty manager.", resolved)
return NewNullManager()
return nil, fmt.Errorf("unsupported strategy %v", resolved)
}
}

Expand Down