Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion cmd/nvidia-vgpu-dm/apply/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@ import (
// VGPUConfig applies the selected vGPU config to the node
func VGPUConfig(c *Context) error {
return assert.WalkSelectedVGPUConfigForEachGPU(c.VGPUConfig, func(vc *v1.VGPUConfigSpec, i int, d types.DeviceID) error {
configManager := vgpu.NewNvlibVGPUConfigManager()
configManager, err := vgpu.NewNvlibVGPUConfigManager()
if err != nil {
return fmt.Errorf("error creating vGPU config manager: %v", err)
}
current, err := configManager.GetVGPUConfig(i)
if err != nil {
return fmt.Errorf("error getting vGPU config: %v", err)
Expand Down
5 changes: 4 additions & 1 deletion cmd/nvidia-vgpu-dm/assert/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@ func VGPUConfig(c *Context) error {

matched := make([]bool, len(gpus))
err = WalkSelectedVGPUConfigForEachGPU(c.VGPUConfig, func(vc *v1.VGPUConfigSpec, i int, d types.DeviceID) error {
configManager := vgpu.NewNvlibVGPUConfigManager()
configManager, err := vgpu.NewNvlibVGPUConfigManager()
if err != nil {
return fmt.Errorf("error creating vGPU config manager: %v", err)
}
current, err := configManager.GetVGPUConfig(i)
if err != nil {
return fmt.Errorf("error getting vGPU config: %v", err)
Expand Down
2 changes: 2 additions & 0 deletions deployments/container/Dockerfile.distroless
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ FROM nvcr.io/nvidia/cloud-native/k8s-mig-manager:v0.13.1 as mig-manager
FROM nvcr.io/nvidia/distroless/go:v4.0.3-dev

ENV NVIDIA_VISIBLE_DEVICES=void
# Preload NVIDIA NVML library from the driver mounted at /driver-root
ENV LD_PRELOAD=/driver-root/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1

COPY --from=build /artifacts/nvidia-vgpu-dm /usr/bin/nvidia-vgpu-dm
COPY --from=build /artifacts/nvidia-k8s-vgpu-dm /usr/bin/nvidia-k8s-vgpu-dm
Expand Down
43 changes: 43 additions & 0 deletions internal/vgpu/api.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package vgpu

import (
"fmt"

"github.com/NVIDIA/go-nvlib/pkg/nvpci"
)

// Interface is the exported vGPU manager API (implemented by *nvvfio and *nvmdev).
type Interface interface {
GetAllDevices() ([]Device, error)
GetAllParentDevices() ([]ParentDevice, error)
CreateVGPUDevices(device *nvpci.NvidiaPCIDevice, vgpuType string, count int) error
}

// ParentDevice is implemented by *vfioParentDevice and *mdevParentDevice.
type ParentDevice interface {
GetPhysicalFunction() *nvpci.NvidiaPCIDevice
IsVGPUTypeAvailable(string) (bool, error)
CreateVGPUDevice(string, string) error
GetAvailableVGPUInstances(string) (int, error)
}

// Device is implemented by *vfioDevice and *mdevDevice.
type Device interface {
GetPhysicalFunction() *nvpci.NvidiaPCIDevice
Delete() error
}

// New returns an Interface, selecting *nvvfio when isVFIOEnabled(0) is true, otherwise *nvmdev from newNvmdev().
func New() (Interface, error) {
vfioInstance := newNvvfio()

isVFIOMode, err := vfioInstance.isVFIOEnabled(0)
if err != nil {
return nil, fmt.Errorf("error checking VFIO mode: %w", err)
}

if isVFIOMode {
return vfioInstance, nil
}
return newNvmdev(), nil
}
Loading