diff --git a/Makefile b/Makefile index 07b2bf0..58e6715 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ SHELL := /bin/bash -.PHONY: oapi-generate generate-vmm-client generate-wire generate-all dev build test install-tools gen-jwt download-ch-binaries download-ch-spec ensure-ch-binaries build-caddy-binaries build-caddy ensure-caddy-binaries release-prep clean +.PHONY: oapi-generate generate-vmm-client generate-wire generate-all dev build test install-tools gen-jwt download-ch-binaries download-ch-spec ensure-ch-binaries build-caddy-binaries build-caddy ensure-caddy-binaries build-preview-cli release-prep clean # Directory where local binaries will be installed BIN_DIR ?= $(CURDIR)/bin diff --git a/cmd/api/api/api.go b/cmd/api/api/api.go index 5fd5033..f511cbf 100644 --- a/cmd/api/api/api.go +++ b/cmd/api/api/api.go @@ -2,6 +2,7 @@ package api import ( "github.com/onkernel/hypeman/cmd/api/config" + "github.com/onkernel/hypeman/lib/devices" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/ingress" "github.com/onkernel/hypeman/lib/instances" @@ -17,6 +18,7 @@ type ApiService struct { InstanceManager instances.Manager VolumeManager volumes.Manager NetworkManager network.Manager + DeviceManager devices.Manager IngressManager ingress.Manager } @@ -29,6 +31,7 @@ func New( instanceManager instances.Manager, volumeManager volumes.Manager, networkManager network.Manager, + deviceManager devices.Manager, ingressManager ingress.Manager, ) *ApiService { return &ApiService{ @@ -37,6 +40,7 @@ func New( InstanceManager: instanceManager, VolumeManager: volumeManager, NetworkManager: networkManager, + DeviceManager: deviceManager, IngressManager: ingressManager, } } diff --git a/cmd/api/api/api_test.go b/cmd/api/api/api_test.go index 8aaa406..c5984fd 100644 --- a/cmd/api/api/api_test.go +++ b/cmd/api/api/api_test.go @@ -9,6 +9,7 @@ import ( "time" "github.com/onkernel/hypeman/cmd/api/config" + "github.com/onkernel/hypeman/lib/devices" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/instances" mw "github.com/onkernel/hypeman/lib/middleware" @@ -34,11 +35,12 @@ func newTestService(t *testing.T) *ApiService { systemMgr := system.NewManager(p) networkMgr := network.NewManager(p, cfg, nil) + deviceMgr := devices.NewManager(p) volumeMgr := volumes.NewManager(p, 0, nil) // 0 = unlimited storage limits := instances.ResourceLimits{ MaxOverlaySize: 100 * 1024 * 1024 * 1024, // 100GB } - instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, volumeMgr, limits, nil, nil) + instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, nil, nil) // Register cleanup for orphaned Cloud Hypervisor processes t.Cleanup(func() { @@ -50,6 +52,7 @@ func newTestService(t *testing.T) *ApiService { ImageManager: imageMgr, InstanceManager: instanceMgr, VolumeManager: volumeMgr, + DeviceManager: deviceMgr, } } diff --git a/cmd/api/api/devices.go b/cmd/api/api/devices.go new file mode 100644 index 0000000..d7d2dd2 --- /dev/null +++ b/cmd/api/api/devices.go @@ -0,0 +1,167 @@ +package api + +import ( + "context" + "errors" + + "github.com/onkernel/hypeman/lib/devices" + "github.com/onkernel/hypeman/lib/oapi" +) + +// ListDevices returns all registered devices +func (s *ApiService) ListDevices(ctx context.Context, request oapi.ListDevicesRequestObject) (oapi.ListDevicesResponseObject, error) { + deviceList, err := s.DeviceManager.ListDevices(ctx) + if err != nil { + return oapi.ListDevices500JSONResponse{ + Code: "internal_error", + Message: err.Error(), + }, nil + } + + result := make([]oapi.Device, len(deviceList)) + for i, d := range deviceList { + result[i] = deviceToOAPI(d) + } + + return oapi.ListDevices200JSONResponse(result), nil +} + +// ListAvailableDevices discovers passthrough-capable devices on the host +func (s *ApiService) ListAvailableDevices(ctx context.Context, request oapi.ListAvailableDevicesRequestObject) (oapi.ListAvailableDevicesResponseObject, error) { + available, err := s.DeviceManager.ListAvailableDevices(ctx) + if err != nil { + return oapi.ListAvailableDevices500JSONResponse{ + Code: "internal_error", + Message: err.Error(), + }, nil + } + + result := make([]oapi.AvailableDevice, len(available)) + for i, d := range available { + result[i] = availableDeviceToOAPI(d) + } + + return oapi.ListAvailableDevices200JSONResponse(result), nil +} + +// CreateDevice registers a new device for passthrough +func (s *ApiService) CreateDevice(ctx context.Context, request oapi.CreateDeviceRequestObject) (oapi.CreateDeviceResponseObject, error) { + var name string + if request.Body.Name != nil { + name = *request.Body.Name + } + req := devices.CreateDeviceRequest{ + Name: name, + PCIAddress: request.Body.PciAddress, + } + + device, err := s.DeviceManager.CreateDevice(ctx, req) + if err != nil { + switch { + case errors.Is(err, devices.ErrInvalidName): + return oapi.CreateDevice400JSONResponse{ + Code: "invalid_name", + Message: err.Error(), + }, nil + case errors.Is(err, devices.ErrInvalidPCIAddress): + return oapi.CreateDevice400JSONResponse{ + Code: "invalid_pci_address", + Message: err.Error(), + }, nil + case errors.Is(err, devices.ErrDeviceNotFound): + return oapi.CreateDevice404JSONResponse{ + Code: "device_not_found", + Message: err.Error(), + }, nil + case errors.Is(err, devices.ErrAlreadyExists), errors.Is(err, devices.ErrNameExists): + return oapi.CreateDevice409JSONResponse{ + Code: "conflict", + Message: err.Error(), + }, nil + default: + return oapi.CreateDevice500JSONResponse{ + Code: "internal_error", + Message: err.Error(), + }, nil + } + } + + return oapi.CreateDevice201JSONResponse(deviceToOAPI(*device)), nil +} + +// GetDevice returns a device by ID or name +func (s *ApiService) GetDevice(ctx context.Context, request oapi.GetDeviceRequestObject) (oapi.GetDeviceResponseObject, error) { + device, err := s.DeviceManager.GetDevice(ctx, request.Id) + if err != nil { + if errors.Is(err, devices.ErrNotFound) { + return oapi.GetDevice404JSONResponse{ + Code: "not_found", + Message: "device not found", + }, nil + } + return oapi.GetDevice500JSONResponse{ + Code: "internal_error", + Message: err.Error(), + }, nil + } + + return oapi.GetDevice200JSONResponse(deviceToOAPI(*device)), nil +} + +// DeleteDevice unregisters a device +func (s *ApiService) DeleteDevice(ctx context.Context, request oapi.DeleteDeviceRequestObject) (oapi.DeleteDeviceResponseObject, error) { + err := s.DeviceManager.DeleteDevice(ctx, request.Id) + if err != nil { + switch { + case errors.Is(err, devices.ErrNotFound): + return oapi.DeleteDevice404JSONResponse{ + Code: "not_found", + Message: "device not found", + }, nil + case errors.Is(err, devices.ErrInUse): + return oapi.DeleteDevice409JSONResponse{ + Code: "in_use", + Message: "device is attached to an instance", + }, nil + default: + return oapi.DeleteDevice500JSONResponse{ + Code: "internal_error", + Message: err.Error(), + }, nil + } + } + + return oapi.DeleteDevice204Response{}, nil +} + +// Helper functions + +func deviceToOAPI(d devices.Device) oapi.Device { + deviceType := oapi.DeviceType(d.Type) + return oapi.Device{ + Id: d.Id, + Name: &d.Name, + Type: deviceType, + PciAddress: d.PCIAddress, + VendorId: d.VendorID, + DeviceId: d.DeviceID, + IommuGroup: d.IOMMUGroup, + BoundToVfio: d.BoundToVFIO, + AttachedTo: d.AttachedTo, + CreatedAt: d.CreatedAt, + } +} + +func availableDeviceToOAPI(d devices.AvailableDevice) oapi.AvailableDevice { + return oapi.AvailableDevice{ + PciAddress: d.PCIAddress, + VendorId: d.VendorID, + DeviceId: d.DeviceID, + VendorName: &d.VendorName, + DeviceName: &d.DeviceName, + IommuGroup: d.IOMMUGroup, + CurrentDriver: d.CurrentDriver, + } +} + + diff --git a/cmd/api/api/instances.go b/cmd/api/api/instances.go index 8adb8c8..acbd37c 100644 --- a/cmd/api/api/instances.go +++ b/cmd/api/api/instances.go @@ -96,6 +96,12 @@ func (s *ApiService) CreateInstance(ctx context.Context, request oapi.CreateInst networkEnabled = *request.Body.Network.Enabled } + // Parse devices (GPU passthrough) + var deviceRefs []string + if request.Body.Devices != nil { + deviceRefs = *request.Body.Devices + } + // Parse volumes var volumes []instances.VolumeAttachment if request.Body.Volumes != nil { @@ -139,6 +145,7 @@ func (s *ApiService) CreateInstance(ctx context.Context, request oapi.CreateInst Vcpus: vcpus, Env: env, NetworkEnabled: networkEnabled, + Devices: deviceRefs, Volumes: volumes, } diff --git a/cmd/api/main.go b/cmd/api/main.go index e2cb704..48c9e31 100644 --- a/cmd/api/main.go +++ b/cmd/api/main.go @@ -172,6 +172,18 @@ func run() error { } logger.Info("Network manager initialized") + // Reconcile device state (clears orphaned attachments from crashed VMs) + // Set up liveness checker so device reconciliation can accurately detect orphaned attachments + logger.Info("Reconciling device state...") + livenessChecker := instances.NewLivenessChecker(app.InstanceManager) + if livenessChecker != nil { + app.DeviceManager.SetLivenessChecker(livenessChecker) + } + if err := app.DeviceManager.ReconcileDevices(app.Ctx); err != nil { + logger.Error("failed to reconcile device state", "error", err) + return fmt.Errorf("reconcile device state: %w", err) + } + // Initialize ingress manager (starts Caddy daemon and DNS server for dynamic upstreams) logger.Info("Initializing ingress manager...") if err := app.IngressManager.Initialize(app.Ctx); err != nil { diff --git a/cmd/api/wire.go b/cmd/api/wire.go index 21f9ddf..dfa2fc1 100644 --- a/cmd/api/wire.go +++ b/cmd/api/wire.go @@ -9,6 +9,7 @@ import ( "github.com/google/wire" "github.com/onkernel/hypeman/cmd/api/api" "github.com/onkernel/hypeman/cmd/api/config" + "github.com/onkernel/hypeman/lib/devices" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/ingress" "github.com/onkernel/hypeman/lib/instances" @@ -27,6 +28,7 @@ type application struct { ImageManager images.Manager SystemManager system.Manager NetworkManager network.Manager + DeviceManager devices.Manager InstanceManager instances.Manager VolumeManager volumes.Manager IngressManager ingress.Manager @@ -44,6 +46,7 @@ func initializeApp() (*application, func(), error) { providers.ProvideImageManager, providers.ProvideSystemManager, providers.ProvideNetworkManager, + providers.ProvideDeviceManager, providers.ProvideInstanceManager, providers.ProvideVolumeManager, providers.ProvideIngressManager, diff --git a/cmd/api/wire_gen.go b/cmd/api/wire_gen.go index 5a94276..6b3e81a 100644 --- a/cmd/api/wire_gen.go +++ b/cmd/api/wire_gen.go @@ -8,8 +8,11 @@ package main import ( "context" + "log/slog" + "github.com/onkernel/hypeman/cmd/api/api" "github.com/onkernel/hypeman/cmd/api/config" + "github.com/onkernel/hypeman/lib/devices" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/ingress" "github.com/onkernel/hypeman/lib/instances" @@ -18,10 +21,7 @@ import ( "github.com/onkernel/hypeman/lib/registry" "github.com/onkernel/hypeman/lib/system" "github.com/onkernel/hypeman/lib/volumes" - "log/slog" -) -import ( _ "embed" ) @@ -39,11 +39,12 @@ func initializeApp() (*application, func(), error) { } systemManager := providers.ProvideSystemManager(paths) networkManager := providers.ProvideNetworkManager(paths, config) + devicesManager := providers.ProvideDeviceManager(paths) volumesManager, err := providers.ProvideVolumeManager(paths, config) if err != nil { return nil, nil, err } - instancesManager, err := providers.ProvideInstanceManager(paths, config, manager, systemManager, networkManager, volumesManager) + instancesManager, err := providers.ProvideInstanceManager(paths, config, manager, systemManager, networkManager, devicesManager, volumesManager) if err != nil { return nil, nil, err } @@ -55,7 +56,7 @@ func initializeApp() (*application, func(), error) { if err != nil { return nil, nil, err } - apiService := api.New(config, manager, instancesManager, volumesManager, networkManager, ingressManager) + apiService := api.New(config, manager, instancesManager, volumesManager, networkManager, devicesManager, ingressManager) mainApplication := &application{ Ctx: context, Logger: logger, @@ -63,6 +64,7 @@ func initializeApp() (*application, func(), error) { ImageManager: manager, SystemManager: systemManager, NetworkManager: networkManager, + DeviceManager: devicesManager, InstanceManager: instancesManager, VolumeManager: volumesManager, IngressManager: ingressManager, @@ -83,6 +85,7 @@ type application struct { ImageManager images.Manager SystemManager system.Manager NetworkManager network.Manager + DeviceManager devices.Manager InstanceManager instances.Manager VolumeManager volumes.Manager IngressManager ingress.Manager diff --git a/go.mod b/go.mod index da8f315..0359d7b 100644 --- a/go.mod +++ b/go.mod @@ -41,7 +41,6 @@ require ( go.opentelemetry.io/otel/trace v1.38.0 golang.org/x/sync v0.17.0 golang.org/x/sys v0.38.0 - golang.org/x/term v0.37.0 google.golang.org/grpc v1.77.0 google.golang.org/protobuf v1.36.10 gvisor.dev/gvisor v0.0.0-20251125014920-fc40e232ff54 @@ -49,15 +48,22 @@ require ( require ( github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6 // indirect + github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect + github.com/Microsoft/go-winio v0.6.2 // indirect github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect github.com/apex/log v1.9.0 // indirect github.com/blang/semver/v4 v4.0.0 // indirect github.com/cenkalti/backoff/v5 v5.0.3 // indirect + github.com/containerd/errdefs v1.0.0 // indirect + github.com/containerd/errdefs/pkg v0.3.0 // indirect github.com/containerd/stargz-snapshotter/estargz v0.16.3 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/docker/cli v28.2.2+incompatible // indirect github.com/docker/distribution v2.8.3+incompatible // indirect + github.com/docker/docker v28.2.2+incompatible // indirect github.com/docker/docker-credential-helpers v0.9.3 // indirect + github.com/docker/go-connections v0.5.0 // indirect + github.com/docker/go-units v0.5.0 // indirect github.com/dustin/go-humanize v1.0.1 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/go-logr/logr v1.4.3 // indirect @@ -65,6 +71,7 @@ require ( github.com/go-openapi/jsonpointer v0.21.0 // indirect github.com/go-openapi/swag v0.23.0 // indirect github.com/go-test/deep v1.1.1 // indirect + github.com/gogo/protobuf v1.3.2 // indirect github.com/google/uuid v1.6.0 // indirect github.com/gorilla/mux v1.8.1 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 // indirect @@ -74,6 +81,8 @@ require ( github.com/mailru/easyjson v0.7.7 // indirect github.com/mdlayher/socket v0.5.1 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect + github.com/moby/docker-image-spec v1.3.1 // indirect + github.com/moby/sys/sequential v0.6.0 // indirect github.com/moby/sys/user v0.4.0 // indirect github.com/moby/sys/userns v0.1.0 // indirect github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 // indirect @@ -92,6 +101,7 @@ require ( github.com/vishvananda/netns v0.0.5 // indirect github.com/woodsbury/decimal128 v1.3.0 // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 // indirect go.opentelemetry.io/otel/log v0.14.0 // indirect go.opentelemetry.io/proto/otlp v1.7.1 // indirect diff --git a/go.sum b/go.sum index 0ee9efd..6772c9e 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,9 @@ github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6 h1:He8afgbRMd7mFxO99hRNu+6tazq8nFF9lIwo9JFroBk= github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8= +github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEKWjV8V+WSxDXJ4NFATAsZjh8iIbsQIg= +github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= +github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= +github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk= github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7Dml6nw9rQ= github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP2+08jFMw88y4klk= @@ -15,8 +19,17 @@ github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2y github.com/bmatcuk/doublestar v1.1.1/go.mod h1:UD6OnuiIn0yFxxA2le/rnRU1G4RaI4UvFv1sNto9p6w= github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500 h1:6lhrsTEnloDPXyeZBvSYvQf8u86jbKehZPVDDlkgDl4= github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500/go.mod h1:S/7n9copUssQ56c7aAgHqftWO4LTf4xY6CGWt8Bc+3M= +github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4= +github.com/cenkalti/backoff/v4 v4.1.3 h1:cFAlzYUlVYDysBEH2T5hyJZMh3+5+WCBvSnK6Q8UtC4= +github.com/cenkalti/backoff/v4 v4.1.3/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw= github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= +github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI= +github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M= +github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE= +github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk= +github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= +github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= github.com/containerd/stargz-snapshotter/estargz v0.16.3 h1:7evrXtoh1mSbGj/pfRccTampEyKpjpOnS3CyiV1Ebr8= github.com/containerd/stargz-snapshotter/estargz v0.16.3/go.mod h1:uyr4BfYfOj3G9WBVE8cOlQmXAbPN9VEQpBBeJIuOipU= github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s= @@ -33,8 +46,14 @@ github.com/docker/cli v28.2.2+incompatible h1:qzx5BNUDFqlvyq4AHzdNB7gSyVTmU4cgsy github.com/docker/cli v28.2.2+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= github.com/docker/distribution v2.8.3+incompatible h1:AtKxIZ36LoNK51+Z6RpzLpddBirtxJnzDrHLEKxTAYk= github.com/docker/distribution v2.8.3+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= +github.com/docker/docker v28.2.2+incompatible h1:CjwRSksz8Yo4+RmQ339Dp/D2tGO5JxwYeqtMOEe0LDw= +github.com/docker/docker v28.2.2+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/docker-credential-helpers v0.9.3 h1:gAm/VtF9wgqJMoxzT3Gj5p4AqIjCBS4wrsOh9yRqcz8= github.com/docker/docker-credential-helpers v0.9.3/go.mod h1:x+4Gbw9aGmChi3qTLZj8Dfn0TD20M/fuWy0E5+WDeCo= +github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c= +github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc= +github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= +github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= @@ -59,6 +78,8 @@ github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+Gr github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= github.com/go-test/deep v1.1.1 h1:0r/53hagsehfO4bzD2Pgr/+RgHqhmf+k1Bpse2cTu1U= github.com/go-test/deep v1.1.1/go.mod h1:5C2ZWiW0ErCdrYzpqxLbTX7MG14M9iiw8DgHncVwcsE= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo= github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -90,6 +111,8 @@ github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8Hm github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/jpillora/backoff v0.0.0-20180909062703-3050d21c67d7/go.mod h1:2iMrUgbbvHEiQClaW2NsSzMyGHqN+rDFqY705q49KG0= github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU= @@ -117,12 +140,22 @@ github.com/miekg/dns v1.1.68 h1:jsSRkNozw7G/mnmXULynzMNIsgY2dHC8LO6U6Ij2JEA= github.com/miekg/dns v1.1.68/go.mod h1:fujopn7TB3Pu3JM69XaawiU0wqjpL9/8xGop5UrTPps= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= +github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo= +github.com/moby/sys/atomicwriter v0.1.0 h1:kw5D/EqkBwsBFi0ss9v1VG3wIkVhzGvLklJ+w3A14Sw= +github.com/moby/sys/atomicwriter v0.1.0/go.mod h1:Ul8oqv2ZMNHOceF643P6FKPXeCmYtlQMvpizfsSoaWs= +github.com/moby/sys/sequential v0.6.0 h1:qrx7XFUd/5DxtqcoH1h438hF5TmOvzC/lspjy7zgvCU= +github.com/moby/sys/sequential v0.6.0/go.mod h1:uyv8EUTrca5PnDsdMGXhZe6CCe8U/UiTWd+lL+7b/Ko= github.com/moby/sys/user v0.4.0 h1:jhcMKit7SA80hivmFJcbB1vqmw//wU61Zdui2eQXuMs= github.com/moby/sys/user v0.4.0/go.mod h1:bG+tYYYJgaMtRKgEmuueC0hJEAZWwtIbZTB+85uoHjs= github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g= github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28= +github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA= +github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 h1:RWengNIwukTxcDr9M+97sNutRR1RKhG96O6jWumTTnw= github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826/go.mod h1:TaXosZuwdSHYgviHp1DAtfrULt5eUgsSMsZf+YrPgl8= +github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= +github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= github.com/nrednav/cuid2 v1.1.0 h1:Y2P9Fo1Iz7lKuwcn+fS0mbxkNvEqoNLUtm0+moHCnYc= github.com/nrednav/cuid2 v1.1.0/go.mod h1:jBjkJAI+QLM4EUGvtwGDHC1cP1QQrRNfLo/A7qJFDhA= github.com/oapi-codegen/nethttp-middleware v1.1.2 h1:TQwEU3WM6ifc7ObBEtiJgbRPaCe513tvJpiMJjypVPA= @@ -198,10 +231,14 @@ github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zd github.com/vishvananda/netns v0.0.5/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM= github.com/woodsbury/decimal128 v1.3.0 h1:8pffMNWIlC0O5vbyHWFZAt5yWvWcrHA+3ovIIjVWss0= github.com/woodsbury/decimal128 v1.3.0/go.mod h1:C5UTmyTjW3JftjUFzOVhC20BEQa2a4ZKOB5I6Zjb+ds= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= go.opentelemetry.io/contrib/bridges/otelslog v0.13.0 h1:bwnLpizECbPr1RrQ27waeY2SPIPeccCx/xLuoYADZ9s= go.opentelemetry.io/contrib/bridges/otelslog v0.13.0/go.mod h1:3nWlOiiqA9UtUnrcNk82mYasNxD8ehOspL0gOfEo6Y4= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6hwnZ41NSFTO5q4LYDtJRXBf2PD0rNBkeB/lus= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0/go.mod h1:UHB22Z8QsdRDrnAtX4PntOl36ajSxcdUMt1sF7Y6E7Q= go.opentelemetry.io/contrib/instrumentation/runtime v0.63.0 h1:PeBoRj6af6xMI7qCupwFvTbbnd49V7n5YpG6pg8iDYQ= go.opentelemetry.io/contrib/instrumentation/runtime v0.63.0/go.mod h1:ingqBCtMCe8I4vpz/UVzCW6sxoqgZB37nao91mLQ3Bw= go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8= @@ -214,6 +251,8 @@ go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZF go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0/go.mod h1:ri3aaHSmCTVYu2AWv44YMauwAQc0aqI9gHKIcSbI1pU= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 h1:lwI4Dc5leUqENgGuQImwLo4WnuXFPetmPpkLi2IrX54= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0/go.mod h1:Kz/oCE7z5wuyhPxsXDuaPteSWqjSBD5YaSdbxZYGbGk= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.33.0 h1:wpMfgF8E1rkrT1Z6meFh1NDtownE9Ii3n3X2GJYjsaU= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.33.0/go.mod h1:wAy0T/dUbs468uOlkT31xjvqQgEVXv58BRFWEgn5v/0= go.opentelemetry.io/otel/log v0.14.0 h1:2rzJ+pOAZ8qmZ3DDHg73NEKzSZkhkGIua9gXtxNGgrM= go.opentelemetry.io/otel/log v0.14.0/go.mod h1:5jRG92fEAgx0SU/vFPxmJvhIuDU9E1SUnEQrMlJpOno= go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA= @@ -234,37 +273,55 @@ go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190426145343-a29dc8fdc734/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04= golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.28.0 h1:gQBtGhjxykdjY9YhZpSlZIsbnaE2+PgjfLWUQTnoZ1U= golang.org/x/mod v0.28.0/go.mod h1:yfB/L0NOf/kmEbXjzCPOx1iK1fRutOydrCMsqRhEBxI= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.46.1-0.20251013234738-63d1a5100f82 h1:6/3JGEh1C88g7m+qzzTbl3A0FtsLguXieqofVLU/JAo= golang.org/x/net v0.46.1-0.20251013234738-63d1a5100f82/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= -golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU= -golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k= golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM= +golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= +golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE= golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= google.golang.org/genproto/googleapis/api v0.0.0-20251022142026-3a174f9686a8 h1:mepRgnBZa07I4TRuomDE4sTIYieg/osKmzIf4USdWS4= diff --git a/lib/devices/GPU.md b/lib/devices/GPU.md new file mode 100644 index 0000000..55c7367 --- /dev/null +++ b/lib/devices/GPU.md @@ -0,0 +1,177 @@ +# GPU Passthrough Support + +This document covers NVIDIA GPU passthrough specifics. For general device passthrough, see [README.md](README.md). + +## How GPU Passthrough Works + +hypeman supports NVIDIA GPU passthrough via VFIO, with automatic driver injection: + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ hypeman Initrd (built at startup) │ +│ ┌──────────────────────────────────────────────────────────────┐ │ +│ │ /lib/modules//kernel/drivers/gpu/ │ │ +│ │ ├── nvidia.ko │ │ +│ │ ├── nvidia-uvm.ko │ │ +│ │ ├── nvidia-modeset.ko │ │ +│ │ └── nvidia-drm.ko │ │ +│ ├──────────────────────────────────────────────────────────────┤ │ +│ │ /usr/lib/nvidia/ │ │ +│ │ ├── libcuda.so.570.86.16 │ │ +│ │ ├── libnvidia-ml.so.570.86.16 │ │ +│ │ ├── libnvidia-ptxjitcompiler.so.570.86.16 │ │ +│ │ └── ... (other driver libraries) │ │ +│ └──────────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────┘ + │ + ▼ (at VM boot, if HAS_GPU=1) +┌─────────────────────────────────────────────────────────────────────┐ +│ Guest VM │ +│ 1. Load kernel modules (modprobe nvidia, etc.) │ +│ 2. Create device nodes (/dev/nvidia0, /dev/nvidiactl, etc.) │ +│ 3. Copy driver libs to container rootfs │ +│ 4. Run ldconfig to update library cache │ +│ 5. Container can now use GPU! │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +## Container Image Requirements + +With driver injection, containers **do not need** to bundle NVIDIA driver libraries. + +**Minimal CUDA image example:** + +```dockerfile +FROM nvidia/cuda:12.4-runtime-ubuntu22.04 +# Your application - no driver installation needed! +RUN pip install torch +CMD ["python", "train.py"] +``` + +hypeman injects the following at boot: + +- `libcuda.so` - CUDA driver API +- `libnvidia-ml.so` - NVML (nvidia-smi, monitoring) +- `libnvidia-ptxjitcompiler.so` - PTX JIT compilation +- `libnvidia-nvvm.so` - NVVM compiler +- `libnvidia-gpucomp.so` - GPU compute library +- `nvidia-smi` binary +- `nvidia-modprobe` binary + +## Driver Version Compatibility + +The driver libraries injected by hypeman are pinned to a specific version that matches the kernel modules. This version is tracked in: + +- **Kernel release:** `onkernel/linux` GitHub releases (e.g., `ch-6.12.8-kernel-2-20251211`) +- **hypeman config:** `lib/system/versions.go` - `NvidiaDriverVersion` map + +### Current Driver Version + +| Kernel Version | Driver Version | Release Date | +|---------------|----------------|--------------| +| ch-6.12.8-kernel-2-20251211 | 570.86.16 | 2025-12-11 | + +### CUDA Compatibility + +Driver 570.86.16 supports CUDA 12.4 and earlier. Check [NVIDIA's compatibility matrix](https://docs.nvidia.com/deploy/cuda-compatibility/) for details. + +## Upgrading the Driver + +To upgrade the NVIDIA driver version: + +1. **Choose a new version** from [NVIDIA's Linux drivers](https://www.nvidia.com/Download/index.aspx) + +2. **Update onkernel/linux:** + - Edit `.github/workflows/release.yaml` + - Change `DRIVER_VERSION=` in all locations (search for the current version) + - The workflow file contains comments explaining what to update + - Create a new release tag (e.g., `ch-6.12.8-kernel-2-YYYYMMDD`) + +3. **Update hypeman:** + - Edit `lib/system/versions.go` + - Add new `KernelVersion` constant + - Update `DefaultKernelVersion` + - Update `NvidiaDriverVersion` map entry + - Update `NvidiaModuleURLs` with new release URL + - Update `NvidiaDriverLibURLs` with new release URL + +4. **Test thoroughly** before deploying: + - Run GPU passthrough E2E tests + - Verify with real CUDA workloads (e.g., ollama inference) + +## Supported GPUs + +All NVIDIA datacenter GPUs supported by the open-gpu-kernel-modules are supported: + +- NVIDIA H100, H200 +- NVIDIA L4, L40, L40S +- NVIDIA A100, A10, A30 +- NVIDIA T4 +- And other Turing/Ampere/Hopper/Ada Lovelace architecture GPUs + +Consumer GPUs (GeForce) are **not** supported by the open kernel modules. + +## Troubleshooting + +### nvidia-smi shows wrong driver version + +The driver version shown by nvidia-smi should match hypeman's configured version. If it differs, the container may have its own driver libraries that are taking precedence. Either: + +- Use a minimal CUDA runtime image without driver libs +- Or ensure the container's driver version matches + +### CUDA initialization failed + +Check that: + +1. Kernel modules are loaded: `cat /proc/modules | grep nvidia` +2. Device nodes exist: `ls -la /dev/nvidia*` +3. Libraries are in LD_LIBRARY_PATH: `ldconfig -p | grep nvidia` + +### Driver/library version mismatch + +Error like `NVML_ERROR_LIB_RM_VERSION_MISMATCH` means the userspace library version doesn't match the kernel module version. This shouldn't happen with hypeman's automatic injection, but can occur if the container has its own driver libraries. + +**Solution:** Use a base image that doesn't include driver libraries, or ensure any bundled libraries match the hypeman driver version. + +### GPU not detected in container + +1. Verify the GPU was attached to the instance: + ```bash + hypeman instance get | jq .devices + ``` + +2. Check the VM console log for module loading errors: + ```bash + cat /var/lib/hypeman/instances//console.log | grep -i nvidia + ``` + +3. Verify VFIO binding on the host: + ```bash + ls -la /sys/bus/pci/devices//driver + ``` + +## Performance Tuning + +### Huge Pages + +For best GPU performance, enable huge pages on the host: + +```bash +echo 1024 > /proc/sys/vm/nr_hugepages +``` + +### IOMMU Configuration + +Ensure IOMMU is properly configured: + +```bash +# Intel +intel_iommu=on iommu=pt + +# AMD +amd_iommu=on iommu=pt +``` + +The `iommu=pt` (passthrough) option improves performance for devices not using VFIO. + diff --git a/lib/devices/README.md b/lib/devices/README.md new file mode 100644 index 0000000..0e34e66 --- /dev/null +++ b/lib/devices/README.md @@ -0,0 +1,451 @@ +# Device Passthrough + +This package provides GPU and PCI device passthrough for virtual machines using the Linux VFIO (Virtual Function I/O) framework. + +## Overview + +Device passthrough allows a VM to have direct, near-native access to physical hardware (GPUs, network cards, etc.) by bypassing the host's device drivers and giving the guest exclusive control. For a deep dive into the VFIO framework, see the [kernel documentation](https://docs.kernel.org/driver-api/vfio.html). + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Host │ +│ ┌─────────────┐ ┌─────────────────────────────────┐ │ +│ │ hypeman │ │ VFIO Driver │ │ +│ │ (VMM) │────▶│ /dev/vfio/ │ │ +│ └─────────────┘ └─────────────────────────────────┘ │ +│ │ │ +│ ┌───────────────────────────┼──────────────────────────┐ │ +│ │ IOMMU (hardware) ▼ │ │ +│ │ - Translates guest physical → host physical │ │ +│ │ - Isolates DMA (device can only access VM memory) │ │ +│ └──────────────────────────────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌──────────────┐ │ +│ │ GPU (PCIe) │ │ +│ └──────────────┘ │ +└─────────────────────────────────────────────────────────────┘ +``` + +## Package Structure + +``` +lib/devices/ +├── types.go # Device, AvailableDevice, CreateDeviceRequest +├── errors.go # Error definitions +├── discovery.go # PCI device discovery from sysfs +├── vfio.go # VFIO bind/unbind operations +├── manager.go # Manager interface and implementation +├── manager_test.go # Unit tests +├── gpu_e2e_test.go # End-to-end GPU passthrough test (auto-skips if no GPU) +└── scripts/ + └── gpu-reset.sh # GPU recovery script (see Troubleshooting) +``` + +## Example: Full Workflow + +```bash +# 1. Discover available devices +curl localhost:8080/devices/available +# → [{"pci_address": "0000:a2:00.0", "vendor_name": "NVIDIA Corporation", ...}] + +# 2. Register the GPU +curl -X POST localhost:8080/devices \ + -d '{"name": "l4-gpu", "pci_address": "0000:a2:00.0"}' + +# 3. Create instance with GPU (auto-binds to VFIO) +curl -X POST localhost:8080/instances \ + -d '{"name": "ml-training", "image": "nvidia/cuda:12.0-base", "devices": ["l4-gpu"]}' + +# 4. Inside VM: verify GPU +lspci | grep -i nvidia +nvidia-smi + +# 5. Delete instance (auto-unbinds from VFIO) +curl -X DELETE localhost:8080/instances/{id} +# GPU returns to host control +``` + +## Device Lifecycle + +### 1. Discovery + +Discover passthrough-capable devices on the host: + +``` +GET /devices/available +``` + +Returns PCI devices that are candidates for passthrough (GPUs, 3D controllers). Each device includes its PCI address, vendor/device IDs, IOMMU group, and current driver. + +### 2. Registration + +Register a device with a unique name: + +``` +POST /devices +{ + "name": "l4-gpu", + "pci_address": "0000:a2:00.0" +} +``` + +Registration does not modify the device's driver binding. The device remains usable by the host until an instance requests it. + +### 3. Instance Creation (Auto-Bind) + +When an instance is created with devices: + +``` +POST /instances +{ + "name": "gpu-workload", + "image": "docker.io/nvidia/cuda:12.0-base", + "devices": ["l4-gpu"] +} +``` + +The system automatically: +1. **Validates** the device exists and isn't attached to another instance +2. **Binds to VFIO** if not already bound (unbinds native driver like `nvidia`) +3. **Passes to cloud-hypervisor** via the `--device` flag +4. **Marks as attached** to prevent concurrent use + +### 4. Instance Deletion (Auto-Unbind) + +When an instance is deleted, the system automatically: +1. **Marks device as detached** +2. **Unbinds from VFIO** (triggers kernel driver probe to restore native driver) + +This returns the device to host control so it can be used by other processes or a new instance. + +### 5. Unregistration + +``` +DELETE /devices/{id} +``` + +Removes the device from hypeman's registry. Fails if the device is currently attached to an instance. + +## Cloud Hypervisor Integration + +Cloud-hypervisor receives device passthrough configuration via the `VmConfig.Devices` field: + +```go +vmConfig.Devices = &[]vmm.DeviceConfig{ + { + Path: "/sys/bus/pci/devices/0000:a2:00.0/", + }, +} +``` + +Cloud-hypervisor then: +1. Opens the VFIO group file (`/dev/vfio/`) +2. Maps device BARs (memory regions) into guest physical address space +3. Configures interrupt routing (MSI/MSI-X) to the guest +4. The guest sees a real PCIe device and loads native drivers + +### NVIDIA-Specific Options + +For multi-GPU configurations, cloud-hypervisor supports GPUDirect P2P: + +```go +DeviceConfig{ + Path: "/sys/bus/pci/devices/0000:a2:00.0/", + XNvGpudirectClique: ptr(int8(0)), // Enable P2P within clique 0 +} +``` + +This is not currently exposed through the hypeman API but could be added for HPC workloads. + +## Constraints and Limitations + +### IOMMU Requirements + +- **IOMMU must be enabled** in BIOS and kernel (`intel_iommu=on` or `amd_iommu=on`) +- All devices in an IOMMU group must be passed through together +- Some motherboards place many devices in the same group (ACS override may help) + +### VFIO Module Requirements + +The following kernel modules must be loaded: +```bash +modprobe vfio_pci +modprobe vfio_iommu_type1 +``` + +### Driver Binding + +- Binding to VFIO **unloads the native driver** (e.g., `nvidia`, `amdgpu`) +- Host processes using the device will lose access +- Some drivers (like NVIDIA) may resist unbinding if in use + +### Single Attachment + +A device can only be attached to one instance at a time. Attempts to attach an already-attached device will fail. + +### No Hot-Plug + +Devices must be specified at instance creation time. Hot-adding devices to a running VM is not currently supported (though cloud-hypervisor has this capability). + +### Guest Driver Requirements + +The guest must have appropriate drivers: +- **NVIDIA GPUs**: Install NVIDIA drivers in the guest image +- **AMD GPUs**: Install amdgpu/ROCm in the guest image + +### Performance Considerations + +- **ACS (Access Control Services)**: Required for proper isolation on some systems +- **Huge Pages**: Recommended for GPU workloads (`hugepages=on` in cloud-hypervisor) +- **CPU Pinning**: Can improve latency for GPU compute workloads + +## Troubleshooting + +### GPU Reset Script + +If GPU passthrough tests fail or hang, the GPU may be left in a bad state (still bound to vfio-pci, or stuck without a driver). Use the provided reset script: + +```bash +# Reset all NVIDIA GPUs to their native driver +sudo ./lib/devices/scripts/gpu-reset.sh + +# Reset a specific GPU +sudo ./lib/devices/scripts/gpu-reset.sh 0000:a2:00.0 +``` + +The script will: +1. Kill any stuck cloud-hypervisor processes holding the GPU +2. Unbind from vfio-pci if still bound +3. Clear `driver_override` +4. Trigger driver probe to rebind to the nvidia driver +5. Restart `nvidia-persistenced` + +### Common Issues + +#### VFIO Bind Hangs + +**Symptom**: `BindToVFIO` hangs indefinitely. + +**Cause**: The `nvidia-persistenced` service keeps `/dev/nvidia*` open, preventing driver unbind. + +**Solution**: The code now automatically stops `nvidia-persistenced` before unbinding. If you're testing manually: +```bash +sudo systemctl stop nvidia-persistenced +# ... do VFIO bind/unbind ... +sudo systemctl start nvidia-persistenced +``` + +#### VM Exec Fails After Boot + +**Symptom**: VM boots but exec commands time out. + +**Cause**: Usually the container's main process exited (e.g., `alpine` image runs `/bin/sh` which exits immediately), causing init to exit and the VM to kernel panic. + +**Solution**: Use an image with a long-running process (e.g., `nginx:alpine`) or ensure your container has a persistent entrypoint. + +#### GPU Not Restored After Test + +**Symptom**: GPU has no driver bound, `nvidia-smi` fails. + +**Solution**: +```bash +# Trigger kernel driver probe +sudo sh -c 'echo 0000:a2:00.0 > /sys/bus/pci/drivers_probe' +# Restart nvidia-persistenced +sudo systemctl start nvidia-persistenced +# Verify +nvidia-smi +``` + +If that fails, a system **reboot** may be necessary. + +#### VFIO Modules Not Loaded + +**Symptom**: `ErrVFIONotAvailable` error. + +**Solution**: +```bash +sudo modprobe vfio_pci vfio_iommu_type1 +# Verify +ls /dev/vfio/ +``` + +Add to `/etc/modules-load.d/vfio.conf` for persistence across reboots. + +#### IOMMU Not Enabled + +**Symptom**: No IOMMU groups found, passthrough fails. + +**Solution**: Add kernel parameter to bootloader: +- Intel: `intel_iommu=on iommu=pt` +- AMD: `amd_iommu=on iommu=pt` + +Then reboot. + +### Running the E2E Test + +The GPU passthrough E2E test **automatically detects** GPU availability and skips if prerequisites aren't met. + +**Why GPU tests require root**: Unlike network tests which can use Linux capabilities (`CAP_NET_ADMIN`), GPU passthrough requires writing to sysfs files (`/sys/bus/pci/drivers/*/unbind`, etc.) which are protected by standard Unix file permissions (owned by root, mode 0200). Capabilities don't bypass DAC (discretionary access control) for file writes. + +Prerequisites for the test to run (not skip): +- **Root permissions** (sudo) - required for sysfs driver operations +- NVIDIA GPU on host +- IOMMU enabled (`intel_iommu=on` or `amd_iommu=on`) +- `vfio_pci` and `vfio_iommu_type1` modules loaded +- `/sbin` in PATH (for `mkfs.ext4`) + +```bash +# Prepare the environment +sudo modprobe vfio_pci vfio_iommu_type1 + +# Run via make - test auto-skips if not root or no GPU +make test + +# Or run directly with sudo +sudo env PATH=$PATH:/sbin:/usr/sbin \ + go test -v -run TestGPUPassthrough -timeout 5m ./lib/devices/... +``` + +The test will: +1. Check prerequisites and skip if not met (not root, no GPU, no IOMMU, etc.) +2. Discover available NVIDIA GPUs +3. Register the first GPU found +4. Create a VM with GPU passthrough +5. Verify the GPU is visible inside the VM +6. Clean up (delete VM, unbind from VFIO, restore nvidia driver) + +## Future Plans: GPU Sharing Across Multiple VMs + +### The Problem + +With current VFIO passthrough, a GPU is assigned **exclusively** to one VM. To share a single GPU across multiple VMs (e.g., give each VM a "slice"), you need NVIDIA's **vGPU (GRID)** technology. + +### Why MIG Alone Doesn't Help + +**MIG (Multi-Instance GPU)** partitions a GPU into isolated instances at the hardware level, but: + +- MIG partitions are **not separate PCI devices**—the GPU remains one PCI endpoint +- MIG partitions are accessed via CUDA APIs (`CUDA_VISIBLE_DEVICES=MIG-`) +- You can only VFIO-passthrough the **whole GPU** to one VM +- MIG is useful for workload isolation **within** a single host or VM, not for multi-VM sharing + +``` +Physical GPU (0000:a2:00.0) ─── still ONE PCI device + └── MIG partitions (logical, not separate devices) + ├── MIG Instance 0 ─┐ + ├── MIG Instance 1 ─┼── All accessed via CUDA on the same GPU + └── MIG Instance 2 ─┘ +``` + +**Supported MIG Hardware**: A100, A30, H100, H200 (NOT L4 or consumer GPUs) + +### vGPU/mdev: The Only Path to Multi-VM GPU Sharing + +To assign GPU shares to **separate VMs**, NVIDIA requires their **vGPU (GRID)** technology, which uses the Linux mediated device (mdev) framework. + +#### Cloud-Hypervisor mdev Support Status + +Cloud-hypervisor **does** support mdev passthrough: + +```bash +cloud-hypervisor --device path=/sys/bus/mdev/devices// +``` + +However, NVIDIA's proprietary vGPU manager has a QEMU-specific quirk: it reads the VMM process's `/proc//cmdline` looking for a `-uuid` argument to map mdev UUIDs to VMs. This doesn't work out-of-the-box with cloud-hypervisor. + +**Workarounds** (from [cloud-hypervisor#5319](https://github.com/cloud-hypervisor/cloud-hypervisor/issues/5319)): +- Patch CH to accept a dummy `-uuid` flag +- Use wrapper scripts that inject the UUID into the process name +- Wait for NVIDIA to fix their driver's VMM assumptions + +#### vGPU Requirements + +- **Hardware**: Datacenter GPUs (A100, L40, etc.) +- **Licensing**: NVIDIA GRID subscription ($$/GPU/year) +- **Host Software**: NVIDIA vGPU Manager installed on host +- **Guest Drivers**: vGPU-aware guest drivers + +### Design Changes for mdev/vGPU Support + +#### 1. New Device Type: `MdevDevice` + +```go +type MdevDevice struct { + UUID string // mdev instance UUID + ParentGPU string // PCI address of parent GPU + Type string // vGPU type (e.g., "nvidia-256") + Available bool // Not assigned to a VM +} +``` + +#### 2. Discovery Extensions + +```go +// List mdev types supported by a GPU +func (m *manager) ListMdevTypes(ctx context.Context, pciAddress string) ([]MdevType, error) + +// List existing mdev instances +func (m *manager) ListMdevInstances(ctx context.Context) ([]MdevDevice, error) + +// Create an mdev instance +func (m *manager) CreateMdevInstance(ctx context.Context, pciAddress, mdevType string) (*MdevDevice, error) + +// Destroy an mdev instance +func (m *manager) DestroyMdevInstance(ctx context.Context, uuid string) error +``` + +#### 3. Passthrough Mechanism + +mdev devices use a different sysfs path: + +``` +# mdev device path +/sys/bus/mdev/devices// + +# vs VFIO-PCI (current) +/sys/bus/pci/devices/0000:a2:00.0/ +``` + +Cloud-hypervisor's `--device` flag already accepts mdev paths. + +#### 4. NVIDIA vGPU Workaround + +To work around NVIDIA's QEMU-specific UUID detection, we may need to: +- Add a `--platform uuid=` option to cloud-hypervisor invocation +- Or use a wrapper that sets the process name appropriately + +### Implementation Phases + +**Phase 1**: mdev Discovery & Passthrough +- Detect mdev-capable GPUs +- List available mdev types and instances +- Pass mdev devices to VMs (path already works) + +**Phase 2**: mdev Lifecycle Management +- Create/destroy mdev instances via sysfs +- API endpoints for mdev management + +**Phase 3**: NVIDIA vGPU Integration +- Implement UUID workaround for NVIDIA's driver +- Test with GRID licensing +- Document guest driver requirements + +### How vGPU + MIG Work Together + +vGPU creates mdev devices that can be backed by MIG partitions, giving you both hardware isolation (MIG) and multi-VM assignment (vGPU): + +``` +Physical GPU (one PCI device) + │ + ├── Without vGPU: VFIO passthrough gives whole GPU to ONE VM + │ + └── With vGPU (GRID license required): + └── MIG Mode enabled on host + ├── MIG Instance 0 ──→ vGPU mdev A ──→ VM 1 + ├── MIG Instance 1 ──→ vGPU mdev B ──→ VM 2 + └── MIG Instance 2 ──→ vGPU mdev C ──→ VM 3 +``` + +Without vGPU, MIG is only useful for workload isolation on the host or within a single VM that owns the whole GPU. diff --git a/lib/devices/discovery.go b/lib/devices/discovery.go new file mode 100644 index 0000000..b04213c --- /dev/null +++ b/lib/devices/discovery.go @@ -0,0 +1,279 @@ +package devices + +import ( + "fmt" + "os" + "path/filepath" + "regexp" + "strconv" + "strings" +) + +const ( + sysfsDevicesPath = "/sys/bus/pci/devices" + sysfsIOMMUPath = "/sys/kernel/iommu_groups" +) + +// pciAddressPattern matches PCI addresses like "0000:a2:00.0" +var pciAddressPattern = regexp.MustCompile(`^[0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F]$`) + +// ValidatePCIAddress validates that a string is a valid PCI address format +func ValidatePCIAddress(addr string) bool { + return pciAddressPattern.MatchString(addr) +} + +// DiscoverAvailableDevices scans sysfs for PCI devices that can be used for passthrough +// It filters for devices that are likely candidates (GPUs, network cards, etc.) +func DiscoverAvailableDevices() ([]AvailableDevice, error) { + entries, err := os.ReadDir(sysfsDevicesPath) + if err != nil { + return nil, fmt.Errorf("read sysfs devices: %w", err) + } + + var devices []AvailableDevice + for _, entry := range entries { + addr := entry.Name() + if !ValidatePCIAddress(addr) { + continue + } + + device, err := readDeviceInfo(addr) + if err != nil { + // Skip devices we can't read + continue + } + + // Filter for passthrough-capable devices (GPUs, 3D controllers, etc.) + if isPassthroughCandidate(device) { + devices = append(devices, *device) + } + } + + return devices, nil +} + +// GetDeviceInfo reads information about a specific PCI device +func GetDeviceInfo(pciAddress string) (*AvailableDevice, error) { + if !ValidatePCIAddress(pciAddress) { + return nil, ErrInvalidPCIAddress + } + + devicePath := filepath.Join(sysfsDevicesPath, pciAddress) + if _, err := os.Stat(devicePath); os.IsNotExist(err) { + return nil, ErrDeviceNotFound + } + + return readDeviceInfo(pciAddress) +} + +// readDeviceInfo reads device information from sysfs +func readDeviceInfo(pciAddress string) (*AvailableDevice, error) { + devicePath := filepath.Join(sysfsDevicesPath, pciAddress) + + vendorID, err := readSysfsFile(filepath.Join(devicePath, "vendor")) + if err != nil { + return nil, fmt.Errorf("read vendor: %w", err) + } + vendorID = strings.TrimPrefix(vendorID, "0x") + + deviceID, err := readSysfsFile(filepath.Join(devicePath, "device")) + if err != nil { + return nil, fmt.Errorf("read device: %w", err) + } + deviceID = strings.TrimPrefix(deviceID, "0x") + + iommuGroup, err := readIOMMUGroup(pciAddress) + if err != nil { + return nil, fmt.Errorf("read iommu group: %w", err) + } + + driver := readCurrentDriver(pciAddress) + + // Get device class to determine type + classCode, _ := readSysfsFile(filepath.Join(devicePath, "class")) + + return &AvailableDevice{ + PCIAddress: pciAddress, + VendorID: vendorID, + DeviceID: deviceID, + VendorName: getVendorName(vendorID), + DeviceName: getDeviceName(vendorID, deviceID, classCode), + IOMMUGroup: iommuGroup, + CurrentDriver: driver, + }, nil +} + +// readSysfsFile reads and trims a sysfs file +func readSysfsFile(path string) (string, error) { + data, err := os.ReadFile(path) + if err != nil { + return "", err + } + return strings.TrimSpace(string(data)), nil +} + +// readIOMMUGroup reads the IOMMU group number for a device +func readIOMMUGroup(pciAddress string) (int, error) { + iommuLink := filepath.Join(sysfsDevicesPath, pciAddress, "iommu_group") + target, err := os.Readlink(iommuLink) + if err != nil { + return -1, fmt.Errorf("read iommu_group link: %w", err) + } + + // Target is like "../../../../kernel/iommu_groups/82" + groupStr := filepath.Base(target) + group, err := strconv.Atoi(groupStr) + if err != nil { + return -1, fmt.Errorf("parse iommu group: %w", err) + } + + return group, nil +} + +// readCurrentDriver reads the current driver bound to the device +func readCurrentDriver(pciAddress string) *string { + driverLink := filepath.Join(sysfsDevicesPath, pciAddress, "driver") + target, err := os.Readlink(driverLink) + if err != nil { + // No driver bound + return nil + } + + driver := filepath.Base(target) + return &driver +} + +// GetIOMMUGroupDevices returns all PCI devices in the same IOMMU group +func GetIOMMUGroupDevices(iommuGroup int) ([]string, error) { + groupPath := filepath.Join(sysfsIOMMUPath, strconv.Itoa(iommuGroup), "devices") + entries, err := os.ReadDir(groupPath) + if err != nil { + return nil, fmt.Errorf("read iommu group devices: %w", err) + } + + var devices []string + for _, entry := range entries { + devices = append(devices, entry.Name()) + } + return devices, nil +} + +// isPassthroughCandidate determines if a device is a good candidate for passthrough +func isPassthroughCandidate(device *AvailableDevice) bool { + // Check class code for GPUs and 3D controllers + // Class 0x03 = Display controller + // Subclass 0x00 = VGA controller + // Subclass 0x02 = 3D controller (like NVIDIA compute GPUs) + devicePath := filepath.Join(sysfsDevicesPath, device.PCIAddress) + classCode, err := readSysfsFile(filepath.Join(devicePath, "class")) + if err != nil { + return false + } + + classCode = strings.TrimPrefix(classCode, "0x") + if len(classCode) >= 4 { + classPrefix := classCode[:4] + // 0300 = VGA controller, 0302 = 3D controller + if classPrefix == "0300" || classPrefix == "0302" { + return true + } + } + + // Also include NVIDIA devices by vendor ID + if device.VendorID == "10de" { + return true + } + + return false +} + +// getVendorName returns a human-readable vendor name +func getVendorName(vendorID string) string { + vendors := map[string]string{ + "10de": "NVIDIA Corporation", + "1002": "AMD/ATI", + "8086": "Intel Corporation", + } + if name, ok := vendors[vendorID]; ok { + return name + } + return "Unknown Vendor" +} + +// getDeviceName returns a human-readable device name based on class and IDs +func getDeviceName(vendorID, deviceID, classCode string) string { + // For NVIDIA, provide some common device names. + // Sources: + // - NVIDIA Driver README, Appendix A "Supported NVIDIA GPU Products": + // https://download.nvidia.com/XFree86/Linux-x86_64/570.133.07/README/supportedchips.html + // - PCI ID Database: https://pci-ids.ucw.cz/read/PC/10de + if vendorID == "10de" { + nvidiaDevices := map[string]string{ + // H100 series + "2321": "H100 NVL", + "2330": "H100 SXM5 80GB", + "2331": "H100 PCIe", + "2339": "H100", + // H200 series + "2335": "H200", + // L4 + "27b8": "L4", + // L40 series + "26b5": "L40", + "26b9": "L40S", + // A100 series + "20b0": "A100 SXM4 40GB", + "20b2": "A100 SXM4 80GB", + "20b5": "A100 PCIe 40GB", + "20f1": "A100 PCIe 80GB", + // A30/A40 + "20b7": "A30", + "2235": "A40", + // RTX 4000 series (datacenter) + "2684": "RTX 4090", + "27b0": "RTX 4090 D", + // V100 series + "1db4": "V100 PCIe 16GB", + "1db5": "V100 SXM2 16GB", + "1db6": "V100 PCIe 32GB", + } + if name, ok := nvidiaDevices[deviceID]; ok { + return name + } + } + + // Fall back to class-based description + classCode = strings.TrimPrefix(classCode, "0x") + if len(classCode) >= 4 { + switch classCode[:4] { + case "0300": + return "VGA Controller" + case "0302": + return "3D Controller" + case "0403": + return "Audio Device" + } + } + + return "PCI Device" +} + +// DetermineDeviceType determines the DeviceType based on device properties +func DetermineDeviceType(device *AvailableDevice) DeviceType { + devicePath := filepath.Join(sysfsDevicesPath, device.PCIAddress) + classCode, err := readSysfsFile(filepath.Join(devicePath, "class")) + if err != nil { + return DeviceTypeGeneric + } + + classCode = strings.TrimPrefix(classCode, "0x") + if len(classCode) >= 4 { + classPrefix := classCode[:4] + // 0300 = VGA controller, 0302 = 3D controller + if classPrefix == "0300" || classPrefix == "0302" { + return DeviceTypeGPU + } + } + + return DeviceTypeGeneric +} diff --git a/lib/devices/errors.go b/lib/devices/errors.go new file mode 100644 index 0000000..afacaf2 --- /dev/null +++ b/lib/devices/errors.go @@ -0,0 +1,40 @@ +package devices + +import "errors" + +var ( + // ErrNotFound is returned when a device is not found + ErrNotFound = errors.New("device not found") + + // ErrInUse is returned when a device is currently attached to an instance + ErrInUse = errors.New("device is in use") + + // ErrNotBound is returned when a VFIO operation requires the device to be bound + ErrNotBound = errors.New("device is not bound to VFIO") + + // ErrAlreadyBound is returned when trying to bind a device that's already bound to VFIO + ErrAlreadyBound = errors.New("device is already bound to VFIO") + + // ErrAlreadyExists is returned when trying to register a device that already exists + ErrAlreadyExists = errors.New("device already exists") + + // ErrInvalidName is returned when the device name doesn't match the required pattern + ErrInvalidName = errors.New("device name must match pattern ^[a-zA-Z0-9][a-zA-Z0-9_.-]+$") + + // ErrNameExists is returned when a device with the same name already exists + ErrNameExists = errors.New("device name already exists") + + // ErrInvalidPCIAddress is returned when the PCI address format is invalid + ErrInvalidPCIAddress = errors.New("invalid PCI address format") + + // ErrDeviceNotFound is returned when the PCI device doesn't exist on the host + ErrDeviceNotFound = errors.New("PCI device not found on host") + + // ErrVFIONotAvailable is returned when VFIO modules are not loaded + ErrVFIONotAvailable = errors.New("VFIO is not available (modules not loaded)") + + // ErrIOMMUGroupConflict is returned when not all devices in IOMMU group can be passed through + ErrIOMMUGroupConflict = errors.New("IOMMU group contains other devices that must also be passed through") +) + + diff --git a/lib/devices/gpu_e2e_test.go b/lib/devices/gpu_e2e_test.go new file mode 100644 index 0000000..4348ebd --- /dev/null +++ b/lib/devices/gpu_e2e_test.go @@ -0,0 +1,353 @@ +package devices_test + +import ( + "bytes" + "context" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/onkernel/hypeman/cmd/api/config" + "github.com/onkernel/hypeman/lib/devices" + "github.com/onkernel/hypeman/lib/exec" + "github.com/onkernel/hypeman/lib/images" + "github.com/onkernel/hypeman/lib/instances" + "github.com/onkernel/hypeman/lib/network" + "github.com/onkernel/hypeman/lib/paths" + "github.com/onkernel/hypeman/lib/system" + "github.com/onkernel/hypeman/lib/volumes" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestGPUPassthrough is an E2E test that verifies GPU passthrough works. +// +// This test automatically detects GPU availability and skips if: +// - No NVIDIA GPU is found +// - IOMMU is not enabled +// - VFIO modules are not loaded +// - Not running as root +// +// To run manually: +// +// sudo env PATH=$PATH:/sbin:/usr/sbin go test -v -run TestGPUPassthrough ./lib/devices/... +// +// WARNING: This test will unbind the GPU from the nvidia driver, which may +// disrupt other processes using the GPU. The test attempts to restore the +// nvidia driver binding on cleanup. +func TestGPUPassthrough(t *testing.T) { + ctx := context.Background() + + // Auto-detect GPU availability - skip if prerequisites not met + skipReason := checkGPUTestPrerequisites() + if skipReason != "" { + t.Skip(skipReason) + } + + // Log that prerequisites passed + groups, _ := os.ReadDir("/sys/kernel/iommu_groups") + t.Logf("GPU test prerequisites met: %d IOMMU groups found", len(groups)) + + // Setup test infrastructure + tmpDir := t.TempDir() + p := paths.New(tmpDir) + + cfg := &config.Config{ + DataDir: tmpDir, + BridgeName: "vmbr0", + SubnetCIDR: "10.100.0.0/16", + DNSServer: "1.1.1.1", + } + + // Initialize managers (nil meter/tracer disables metrics/tracing) + imageMgr, err := images.NewManager(p, 1, nil) + require.NoError(t, err) + + systemMgr := system.NewManager(p) + networkMgr := network.NewManager(p, cfg, nil) + deviceMgr := devices.NewManager(p) + volumeMgr := volumes.NewManager(p, 100*1024*1024*1024, nil) // 100GB max volume storage + limits := instances.ResourceLimits{ + MaxOverlaySize: 100 * 1024 * 1024 * 1024, // 100GB + } + instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, nil, nil) + + // Step 1: Discover available GPUs + t.Log("Step 1: Discovering available GPUs...") + availableDevices, err := deviceMgr.ListAvailableDevices(ctx) + require.NoError(t, err) + + // Find an NVIDIA GPU + var targetGPU *devices.AvailableDevice + for _, d := range availableDevices { + if strings.Contains(strings.ToLower(d.VendorName), "nvidia") { + targetGPU = &d + break + } + } + require.NotNil(t, targetGPU, "No NVIDIA GPU found on this system") + driverStr := "none" + if targetGPU.CurrentDriver != nil { + driverStr = *targetGPU.CurrentDriver + } + t.Logf("Found NVIDIA GPU: %s at %s (driver: %s)", targetGPU.DeviceName, targetGPU.PCIAddress, driverStr) + + // Check GPU is in a usable state (has a driver bound) + if targetGPU.CurrentDriver == nil || *targetGPU.CurrentDriver == "" { + t.Skip("GPU has no driver bound - may need reboot to recover. Run: sudo reboot") + } + + // Verify the driver path exists (GPU not in broken state) + driverPath := filepath.Join("/sys/bus/pci/devices", targetGPU.PCIAddress, "driver") + if _, err := os.Stat(driverPath); os.IsNotExist(err) { + t.Skipf("GPU driver symlink missing at %s - GPU in broken state, reboot required", driverPath) + } + + // Step 2: Register the GPU + t.Log("Step 2: Registering GPU...") + device, err := deviceMgr.CreateDevice(ctx, devices.CreateDeviceRequest{ + Name: "test-gpu", + PCIAddress: targetGPU.PCIAddress, + }) + require.NoError(t, err) + t.Logf("Registered device: %s (ID: %s)", device.Name, device.Id) + + // Store original driver for cleanup + originalDriver := driverStr + + // Cleanup: always unregister device and try to restore original driver + t.Cleanup(func() { + t.Log("Cleanup: Deleting registered device...") + deviceMgr.DeleteDevice(ctx, device.Id) + + // Try to restore original driver binding via driver_probe + if originalDriver != "" && originalDriver != "none" && originalDriver != "vfio-pci" { + t.Logf("Cleanup: Triggering driver probe to restore %s driver...", originalDriver) + // Use driver_probe to let the kernel find and bind the right driver + probePath := "/sys/bus/pci/drivers_probe" + if err := os.WriteFile(probePath, []byte(targetGPU.PCIAddress), 0200); err != nil { + t.Logf("Warning: Could not trigger driver probe: %v (may need reboot)", err) + } else { + t.Logf("Cleanup: Driver probe triggered for %s", targetGPU.PCIAddress) + } + } + }) + + // Step 3: Ensure system files (kernel, initrd) + t.Log("Step 3: Ensuring system files...") + err = systemMgr.EnsureSystemFiles(ctx) + require.NoError(t, err) + t.Log("System files ready") + + // Step 4: Pull nginx:alpine (nginx keeps running unlike plain alpine which exits immediately) + t.Log("Step 4: Pulling nginx:alpine image...") + createdImg, createErr := imageMgr.CreateImage(ctx, images.CreateImageRequest{ + Name: "docker.io/library/nginx:alpine", + }) + require.NoError(t, createErr, "CreateImage should succeed") + t.Logf("CreateImage returned: name=%s, status=%s", createdImg.Name, createdImg.Status) + + // Use the name returned from CreateImage (it may be normalized) + imageName := createdImg.Name + + // Wait for image to be ready + var img *images.Image + for i := 0; i < 90; i++ { + img, err = imageMgr.GetImage(ctx, imageName) + if err != nil { + if i < 5 || i%10 == 0 { + t.Logf("GetImage attempt %d: error=%v", i+1, err) + } + } else { + if i < 5 || i%10 == 0 { + t.Logf("GetImage attempt %d: status=%s", i+1, img.Status) + } + if img.Status == images.StatusReady { + break + } + if img.Status == images.StatusFailed { + errMsg := "unknown" + if img.Error != nil { + errMsg = *img.Error + } + t.Fatalf("Image build failed: %s", errMsg) + } + } + time.Sleep(1 * time.Second) + } + require.NotNil(t, img, "Image should exist after 90 seconds") + require.Equal(t, images.StatusReady, img.Status, "Image should be ready") + t.Log("Image ready") + + // Step 5: Create instance with GPU (with timeout to prevent hang on VFIO issues) + t.Log("Step 5: Creating instance with GPU...") + createCtx, createCancel := context.WithTimeout(ctx, 60*time.Second) + defer createCancel() + + inst, err := instanceMgr.CreateInstance(createCtx, instances.CreateInstanceRequest{ + Name: "gpu-test", + Image: "docker.io/library/nginx:alpine", + Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) + HotplugSize: 512 * 1024 * 1024, + OverlaySize: 10 * 1024 * 1024 * 1024, + Vcpus: 1, + NetworkEnabled: false, + Devices: []string{"test-gpu"}, + Env: map[string]string{}, + }) + require.NoError(t, err) + t.Logf("Instance created: %s", inst.Id) + + // Cleanup: always delete instance + t.Cleanup(func() { + t.Log("Cleanup: Deleting instance...") + instanceMgr.DeleteInstance(ctx, inst.Id) + }) + + // Step 6: Wait for instance to be ready + t.Log("Step 6: Waiting for instance to be ready...") + err = waitForInstanceReady(ctx, t, instanceMgr, inst.Id, 30*time.Second) + require.NoError(t, err) + t.Log("Instance is ready") + + // Step 7: Verify GPU is visible inside VM + // Note: Alpine doesn't have lspci, so we check /sys/bus/pci directly for NVIDIA vendor ID (0x10de) + t.Log("Step 7: Verifying GPU visibility inside VM...") + actualInst, err := instanceMgr.GetInstance(ctx, inst.Id) + require.NoError(t, err) + + // Create a context with timeout for exec operations + execCtx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + + // Retry exec a few times (exec agent may need time to start) + var stdout, stderr outputBuffer + var execErr error + // Command to find NVIDIA devices by checking vendor IDs (0x10de = NVIDIA) + checkGPUCmd := "cat /sys/bus/pci/devices/*/vendor 2>/dev/null | grep -i 10de && echo 'NVIDIA_FOUND'" + + for i := 0; i < 15; i++ { + stdout = outputBuffer{} + stderr = outputBuffer{} + + _, execErr = exec.ExecIntoInstance(execCtx, actualInst.VsockSocket, exec.ExecOptions{ + Command: []string{"/bin/sh", "-c", checkGPUCmd}, + Stdin: nil, + Stdout: &stdout, + Stderr: &stderr, + TTY: false, + }) + + if execErr == nil { + break + } + t.Logf("Exec attempt %d/15 failed: %v", i+1, execErr) + time.Sleep(1 * time.Second) + } + if execErr != nil { + // Print console log for debugging + p := paths.New(tmpDir) + consoleLogPath := p.InstanceAppLog(inst.Id) + if consoleLog, err := os.ReadFile(consoleLogPath); err == nil { + t.Logf("=== VM Console Log ===\n%s\n=== End Console Log ===", string(consoleLog)) + } else { + t.Logf("Could not read console log: %v", err) + } + } + require.NoError(t, execErr, "exec should succeed") + + pciOutput := stdout.String() + t.Logf("PCI vendor check output:\n%s", pciOutput) + + // Verify NVIDIA device is visible (vendor ID 0x10de) + assert.True(t, + strings.Contains(pciOutput, "NVIDIA_FOUND") || + strings.Contains(strings.ToLower(pciOutput), "10de"), + "NVIDIA GPU (vendor 0x10de) should be visible in guest") + + t.Log("✅ GPU passthrough test PASSED!") +} + +// checkGPUTestPrerequisites checks if GPU passthrough test can run. +// Returns empty string if all prerequisites are met, otherwise returns skip reason. +func checkGPUTestPrerequisites() string { + // Check KVM + if _, err := os.Stat("/dev/kvm"); os.IsNotExist(err) { + return "GPU passthrough test requires /dev/kvm" + } + + // Check VFIO modules + if _, err := os.Stat("/dev/vfio/vfio"); os.IsNotExist(err) { + return "GPU passthrough test requires VFIO (modprobe vfio_pci vfio_iommu_type1)" + } + + // Check IOMMU is enabled by looking for IOMMU groups + groups, err := os.ReadDir("/sys/kernel/iommu_groups") + if err != nil || len(groups) == 0 { + return "GPU passthrough test requires IOMMU (intel_iommu=on or amd_iommu=on)" + } + + // Check for NVIDIA GPU + available, err := devices.DiscoverAvailableDevices() + if err != nil { + return "GPU passthrough test failed to discover devices: " + err.Error() + } + + hasNvidiaGPU := false + for _, d := range available { + if strings.Contains(strings.ToLower(d.VendorName), "nvidia") { + hasNvidiaGPU = true + break + } + } + if !hasNvidiaGPU { + return "GPU passthrough test requires an NVIDIA GPU" + } + + // GPU passthrough requires root (euid=0) for sysfs driver bind/unbind operations. + // Unlike network operations which can use CAP_NET_ADMIN, sysfs file writes are + // protected by standard Unix DAC (file permissions), not just capabilities. + // The files in /sys/bus/pci/drivers/ are owned by root with mode 0200. + if os.Geteuid() != 0 { + return "GPU passthrough test requires root (sudo) for sysfs driver operations" + } + + return "" // All prerequisites met +} + +func waitForInstanceReady(ctx context.Context, t *testing.T, mgr instances.Manager, id string, timeout time.Duration) error { + t.Helper() + + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + inst, err := mgr.GetInstance(ctx, id) + if err != nil { + time.Sleep(500 * time.Millisecond) + continue + } + + if inst.State == instances.StateRunning { + // Additional check: wait a bit for exec agent + time.Sleep(2 * time.Second) + return nil + } + + time.Sleep(500 * time.Millisecond) + } + + return context.DeadlineExceeded +} + +type outputBuffer struct { + buf bytes.Buffer +} + +func (b *outputBuffer) Write(p []byte) (n int, err error) { + return b.buf.Write(p) +} + +func (b *outputBuffer) String() string { + return b.buf.String() +} diff --git a/lib/devices/gpu_inference_test.go b/lib/devices/gpu_inference_test.go new file mode 100644 index 0000000..0749b84 --- /dev/null +++ b/lib/devices/gpu_inference_test.go @@ -0,0 +1,536 @@ +package devices_test + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/http/httptest" + "os" + osExec "os/exec" + "path/filepath" + "runtime" + "strings" + "testing" + "time" + + "github.com/go-chi/chi/v5" + "github.com/google/go-containerregistry/pkg/name" + "github.com/google/go-containerregistry/pkg/v1/daemon" + "github.com/google/go-containerregistry/pkg/v1/remote" + "github.com/onkernel/hypeman/cmd/api/config" + "github.com/onkernel/hypeman/lib/devices" + "github.com/onkernel/hypeman/lib/exec" + "github.com/onkernel/hypeman/lib/images" + "github.com/onkernel/hypeman/lib/instances" + "github.com/onkernel/hypeman/lib/network" + "github.com/onkernel/hypeman/lib/paths" + "github.com/onkernel/hypeman/lib/registry" + "github.com/onkernel/hypeman/lib/system" + "github.com/onkernel/hypeman/lib/volumes" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// persistentTestDataDir is used to persist volumes between test runs. +// This allows the ollama model cache to survive across test executions. +// Note: Uses /var/lib instead of /tmp because /tmp often has limited space +// and the custom CUDA+Ollama image is ~4GB. +const persistentTestDataDir = "/var/lib/hypeman-gpu-inference-test" + +// ollamaCudaDockerImage is the name we use for the custom CUDA+Ollama image +const ollamaCudaDockerImage = "ollama-cuda:test" + +// TestGPUInference is an E2E test that verifies Ollama GPU inference works with VFIO passthrough. +// +// This test: +// 1. Builds a custom Docker image with NVIDIA CUDA runtime + Ollama +// 2. Pushes the image to hypeman's test registry +// 3. Launches a VM with GPU passthrough + the image +// 4. Runs `ollama run tinyllama` to perform GPU-accelerated inference +// 5. Verifies the model generates output +// +// The custom image bundles CUDA libraries, enabling Ollama to detect and use the GPU +// without needing nvidia-docker/nvidia-container-toolkit. +// +// Prerequisites: +// - NVIDIA GPU on host +// - IOMMU enabled +// - VFIO modules loaded (modprobe vfio_pci) +// - Docker installed (for building custom image) +// - Running as root +// +// To run manually: +// +// sudo env PATH=$PATH:/sbin:/usr/sbin go test -v -run TestGPUInference -timeout 30m ./lib/devices/... +// +// To clean up: +// +// sudo rm -rf /var/lib/hypeman-gpu-inference-test +// docker rmi ollama-cuda:test +func TestGPUInference(t *testing.T) { + ctx := context.Background() + + // Auto-detect GPU availability - skip if prerequisites not met + skipReason := checkGPUTestPrerequisites() + if skipReason != "" { + t.Skip(skipReason) + } + + // Check Docker is available + if _, err := osExec.LookPath("docker"); err != nil { + t.Skip("Docker not installed - required for building custom CUDA image") + } + + groups, _ := os.ReadDir("/sys/kernel/iommu_groups") + t.Logf("GPU inference test prerequisites met: %d IOMMU groups found", len(groups)) + + // Use persistent directory for volume storage (survives between test runs) + if err := os.MkdirAll(persistentTestDataDir, 0755); err != nil { + t.Fatalf("Failed to create persistent test directory: %v", err) + } + p := paths.New(persistentTestDataDir) + + cfg := &config.Config{ + DataDir: persistentTestDataDir, + BridgeName: "vmbr0", + SubnetCIDR: "10.100.0.0/16", + DNSServer: "1.1.1.1", + } + + // Initialize managers + imageMgr, err := images.NewManager(p, 1, nil) + require.NoError(t, err) + + systemMgr := system.NewManager(p) + networkMgr := network.NewManager(p, cfg, nil) + deviceMgr := devices.NewManager(p) + volumeMgr := volumes.NewManager(p, 100*1024*1024*1024, nil) + limits := instances.ResourceLimits{ + MaxOverlaySize: 100 * 1024 * 1024 * 1024, + } + instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, nil, nil) + + // Step 1: Build custom CUDA+Ollama image + t.Log("Step 1: Building custom CUDA+Ollama Docker image...") + dockerfilePath := getDockerfilePath(t) + buildCustomCudaImage(t, dockerfilePath, ollamaCudaDockerImage) + + // Step 2: Set up test registry and push the image + t.Log("Step 2: Pushing custom image to hypeman registry...") + reg, err := registry.New(p, imageMgr) + require.NoError(t, err) + + router := chi.NewRouter() + router.Mount("/v2", reg.Handler()) + ts := httptest.NewServer(router) + t.Cleanup(ts.Close) + + serverHost := strings.TrimPrefix(ts.URL, "http://") + pushLocalDockerImage(t, ollamaCudaDockerImage, serverHost) + t.Log("Push complete") + + // Wait for image conversion - find image by listing since digest may change during Docker->OCI conversion + t.Log("Waiting for image conversion...") + var img *images.Image + var imageName string + for i := 0; i < 300; i++ { // 5 minutes for large CUDA image + // List images and find our ollama-cuda image + allImages, listErr := imageMgr.ListImages(ctx) + if listErr == nil { + for _, candidate := range allImages { + if strings.Contains(candidate.Name, "ollama-cuda") { + img = &candidate + imageName = candidate.Name + break + } + } + } + if img != nil && img.Status == images.StatusReady { + break + } + if img != nil && img.Status == images.StatusFailed { + errMsg := "unknown" + if img.Error != nil { + errMsg = *img.Error + } + t.Fatalf("Image conversion failed: %s", errMsg) + } + if i%30 == 0 { + status := "not found" + if img != nil { + status = string(img.Status) + } + t.Logf("Waiting for image conversion... (%d/300, status=%s)", i+1, status) + } + time.Sleep(time.Second) + } + require.NotNil(t, img, "Image should exist after 5 minutes") + require.Equal(t, images.StatusReady, img.Status, "Image should be ready") + t.Logf("Image ready: %s (digest: %s)", imageName, img.Digest) + + // Step 3: Discover and register GPU + t.Log("Step 3: Discovering available GPUs...") + availableDevices, err := deviceMgr.ListAvailableDevices(ctx) + require.NoError(t, err) + + var targetGPU *devices.AvailableDevice + for _, d := range availableDevices { + if strings.Contains(strings.ToLower(d.VendorName), "nvidia") { + targetGPU = &d + break + } + } + require.NotNil(t, targetGPU, "No NVIDIA GPU found") + + driverStr := "none" + if targetGPU.CurrentDriver != nil { + driverStr = *targetGPU.CurrentDriver + } + t.Logf("Found NVIDIA GPU: %s at %s (driver: %s)", targetGPU.DeviceName, targetGPU.PCIAddress, driverStr) + + if targetGPU.CurrentDriver == nil || *targetGPU.CurrentDriver == "" { + t.Skip("GPU has no driver bound - may need reboot") + } + + driverPath := filepath.Join("/sys/bus/pci/devices", targetGPU.PCIAddress, "driver") + if _, err := os.Stat(driverPath); os.IsNotExist(err) { + t.Skipf("GPU driver symlink missing - GPU in broken state") + } + + // Register GPU + t.Log("Step 4: Registering GPU...") + device, err := deviceMgr.GetDevice(ctx, "inference-gpu") + if err != nil { + device, err = deviceMgr.CreateDevice(ctx, devices.CreateDeviceRequest{ + Name: "inference-gpu", + PCIAddress: targetGPU.PCIAddress, + }) + require.NoError(t, err) + t.Logf("Registered new device: %s (ID: %s)", device.Name, device.Id) + } else { + t.Logf("Using existing device: %s (ID: %s)", device.Name, device.Id) + } + + originalDriver := driverStr + t.Cleanup(func() { + t.Log("Cleanup: Deleting registered device...") + deviceMgr.DeleteDevice(ctx, device.Id) + if originalDriver != "" && originalDriver != "none" && originalDriver != "vfio-pci" { + probePath := "/sys/bus/pci/drivers_probe" + os.WriteFile(probePath, []byte(targetGPU.PCIAddress), 0200) + } + }) + + // Step 5: Initialize network and create volume + t.Log("Step 5: Initializing network...") + err = networkMgr.Initialize(ctx, []string{}) + require.NoError(t, err) + + t.Log("Step 6: Setting up persistent volume for Ollama models...") + vol, err := volumeMgr.GetVolumeByName(ctx, "ollama-models") + if err != nil { + vol, err = volumeMgr.CreateVolume(ctx, volumes.CreateVolumeRequest{ + Name: "ollama-models", + SizeGb: 5, + }) + require.NoError(t, err) + t.Logf("Created new volume: %s", vol.Name) + } else { + t.Logf("Using existing volume: %s", vol.Name) + } + + // Step 7: Ensure system files + t.Log("Step 7: Ensuring system files...") + err = systemMgr.EnsureSystemFiles(ctx) + require.NoError(t, err) + + // Step 8: Create instance with GPU + t.Log("Step 8: Creating instance with GPU and custom CUDA image...") + createCtx, createCancel := context.WithTimeout(ctx, 120*time.Second) + defer createCancel() + + inst, err := instanceMgr.CreateInstance(createCtx, instances.CreateInstanceRequest{ + Name: "gpu-inference-test", + Image: imageName, + Size: 8 * 1024 * 1024 * 1024, // 8GB RAM for CUDA + HotplugSize: 8 * 1024 * 1024 * 1024, + OverlaySize: 10 * 1024 * 1024 * 1024, + Vcpus: 4, + Env: map[string]string{ + "OLLAMA_HOST": "0.0.0.0", + "OLLAMA_MODELS": "/data/models", + }, + NetworkEnabled: true, + Devices: []string{"inference-gpu"}, + Volumes: []instances.VolumeAttachment{ + {VolumeID: vol.Id, MountPath: "/data/models", Readonly: false}, + }, + }) + require.NoError(t, err) + t.Logf("Instance created: %s", inst.Id) + + t.Cleanup(func() { + t.Log("Cleanup: Deleting instance...") + instanceMgr.DeleteInstance(ctx, inst.Id) + }) + + // Step 9: Wait for instance + t.Log("Step 9: Waiting for instance to be ready...") + err = waitForInstanceReady(ctx, t, instanceMgr, inst.Id, 60*time.Second) + require.NoError(t, err) + + actualInst, err := instanceMgr.GetInstance(ctx, inst.Id) + require.NoError(t, err) + + // Step 10: Wait for Ollama server + t.Log("Step 10: Waiting for Ollama server to be ready...") + ollamaReady := false + for i := 0; i < 60; i++ { // 60 seconds for CUDA init + healthCtx, healthCancel := context.WithTimeout(ctx, 5*time.Second) + var healthStdout, healthStderr inferenceOutputBuffer + + _, err = exec.ExecIntoInstance(healthCtx, actualInst.VsockSocket, exec.ExecOptions{ + Command: []string{"/bin/sh", "-c", "ollama list 2>&1"}, + Stdout: &healthStdout, + Stderr: &healthStderr, + }) + healthCancel() + + output := healthStdout.String() + if err == nil && !strings.Contains(output, "could not connect") { + t.Logf("Ollama is ready (attempt %d)", i+1) + ollamaReady = true + break + } + if i%10 == 0 { + t.Logf("Waiting for Ollama (attempt %d/60)...", i+1) + } + time.Sleep(time.Second) + } + require.True(t, ollamaReady, "Ollama server should become ready") + + // Step 11: Check GPU detection + t.Log("Step 11: Checking GPU detection...") + gpuCheckCtx, gpuCheckCancel := context.WithTimeout(ctx, 10*time.Second) + defer gpuCheckCancel() + + // Check nvidia-smi (should work now with CUDA image) + var nvidiaSmiStdout, nvidiaSmiStderr inferenceOutputBuffer + _, _ = exec.ExecIntoInstance(gpuCheckCtx, actualInst.VsockSocket, exec.ExecOptions{ + Command: []string{"/bin/sh", "-c", "nvidia-smi 2>&1 || echo 'nvidia-smi failed'"}, + Stdout: &nvidiaSmiStdout, + Stderr: &nvidiaSmiStderr, + }) + nvidiaSmiOutput := nvidiaSmiStdout.String() + if strings.Contains(nvidiaSmiOutput, "NVIDIA-SMI") { + t.Logf("✓ nvidia-smi works! GPU detected:\n%s", truncateHead(nvidiaSmiOutput, 500)) + } else { + t.Logf("nvidia-smi output: %s", nvidiaSmiOutput) + } + + // Check NVIDIA kernel modules + var modulesStdout inferenceOutputBuffer + exec.ExecIntoInstance(gpuCheckCtx, actualInst.VsockSocket, exec.ExecOptions{ + Command: []string{"/bin/sh", "-c", "cat /proc/modules | grep nvidia"}, + Stdout: &modulesStdout, + }) + if modulesStdout.String() != "" { + t.Logf("✓ NVIDIA kernel modules loaded:\n%s", modulesStdout.String()) + } + + // Check device nodes + var devStdout inferenceOutputBuffer + exec.ExecIntoInstance(gpuCheckCtx, actualInst.VsockSocket, exec.ExecOptions{ + Command: []string{"/bin/sh", "-c", "ls -la /dev/nvidia* 2>&1"}, + Stdout: &devStdout, + }) + if !strings.Contains(devStdout.String(), "No such file") { + t.Logf("✓ NVIDIA device nodes:\n%s", devStdout.String()) + } + + // Step 12: Pull model via exec (needed for first time) + t.Log("Step 12: Ensuring TinyLlama model is available...") + + var listStdout inferenceOutputBuffer + exec.ExecIntoInstance(gpuCheckCtx, actualInst.VsockSocket, exec.ExecOptions{ + Command: []string{"/bin/sh", "-c", "ollama list 2>&1"}, + Stdout: &listStdout, + }) + + if !strings.Contains(listStdout.String(), "tinyllama") { + t.Log("Model not cached - pulling now...") + pullCtx, pullCancel := context.WithTimeout(ctx, 10*time.Minute) + defer pullCancel() + + var pullStdout inferenceOutputBuffer + _, pullErr := exec.ExecIntoInstance(pullCtx, actualInst.VsockSocket, exec.ExecOptions{ + Command: []string{"/bin/sh", "-c", "ollama pull tinyllama 2>&1"}, + Stdout: &pullStdout, + }) + t.Logf("Pull output: %s", truncateTail(pullStdout.String(), 500)) + require.NoError(t, pullErr, "ollama pull should succeed") + } else { + t.Log("Model already cached") + } + + // Step 13: Test inference via HTTP API using the VM's private IP + // This is much faster than using `ollama run` CLI + t.Log("Step 13: Running inference via Ollama API...") + require.NotEmpty(t, actualInst.IP, "Instance should have a private IP") + ollamaURL := fmt.Sprintf("http://%s:11434/api/generate", actualInst.IP) + t.Logf("Calling Ollama API at %s", ollamaURL) + + // Create the inference request + inferenceReq := map[string]interface{}{ + "model": "tinyllama", + "prompt": "Say hello in 3 words", + "stream": false, + } + reqBody, err := json.Marshal(inferenceReq) + require.NoError(t, err) + + // Make the HTTP request with timeout + httpClient := &http.Client{Timeout: 2 * time.Minute} + start := time.Now() + resp, err := httpClient.Post(ollamaURL, "application/json", bytes.NewReader(reqBody)) + elapsed := time.Since(start) + + if err != nil { + // Log console for debugging + consoleLogPath := p.InstanceAppLog(inst.Id) + if consoleLog, readErr := os.ReadFile(consoleLogPath); readErr == nil { + t.Logf("=== VM Console Log ===\n%s\n=== End ===", truncateTail(string(consoleLog), 3000)) + } + } + require.NoError(t, err, "HTTP request to Ollama should succeed") + defer resp.Body.Close() + + require.Equal(t, http.StatusOK, resp.StatusCode, "Ollama should return 200") + + // Parse response + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + + var ollamaResp struct { + Response string `json:"response"` + Done bool `json:"done"` + TotalDuration int64 `json:"total_duration"` // nanoseconds + EvalDuration int64 `json:"eval_duration"` // nanoseconds + EvalCount int `json:"eval_count"` // tokens generated + } + err = json.Unmarshal(body, &ollamaResp) + require.NoError(t, err) + + // Log results + t.Logf("Inference response: %s", ollamaResp.Response) + t.Logf("Total time: %v (API reported: %dms)", elapsed, ollamaResp.TotalDuration/1e6) + if ollamaResp.EvalCount > 0 && ollamaResp.EvalDuration > 0 { + tokensPerSec := float64(ollamaResp.EvalCount) / (float64(ollamaResp.EvalDuration) / 1e9) + t.Logf("Generation speed: %.1f tokens/sec (%d tokens in %dms)", + tokensPerSec, ollamaResp.EvalCount, ollamaResp.EvalDuration/1e6) + } + + // Verify output + assert.True(t, ollamaResp.Done, "Inference should complete") + assert.NotEmpty(t, ollamaResp.Response, "Model should generate output") + assert.True(t, len(ollamaResp.Response) > 5, "Model output should be substantive") + + // GPU inference should be fast (< 5 seconds for this small prompt) + assert.Less(t, elapsed, 30*time.Second, "GPU inference should be fast") + + t.Log("✅ GPU inference test PASSED!") +} + +// getDockerfilePath returns the path to the CUDA+Ollama Dockerfile +func getDockerfilePath(t *testing.T) string { + _, thisFile, _, ok := runtime.Caller(0) + require.True(t, ok, "Could not get current file path") + return filepath.Join(filepath.Dir(thisFile), "testdata", "ollama-cuda", "Dockerfile") +} + +// buildCustomCudaImage builds the custom CUDA+Ollama Docker image +func buildCustomCudaImage(t *testing.T, dockerfilePath, imageName string) { + t.Helper() + + // Check if image already exists + checkCmd := osExec.Command("docker", "image", "inspect", imageName) + if checkCmd.Run() == nil { + t.Logf("Docker image %s already exists, skipping build", imageName) + return + } + + t.Logf("Building Docker image %s (this may take several minutes)...", imageName) + dockerfileDir := filepath.Dir(dockerfilePath) + + cmd := osExec.Command("docker", "build", "-t", imageName, dockerfileDir) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + err := cmd.Run() + require.NoError(t, err, "Docker build should succeed") + t.Logf("Docker image %s built successfully", imageName) +} + +// pushLocalDockerImage loads an image from local Docker and pushes to hypeman's test registry +func pushLocalDockerImage(t *testing.T, dockerImage, serverHost string) { + t.Helper() + + t.Log("Loading image from Docker daemon...") + srcRef, err := name.ParseReference(dockerImage) + require.NoError(t, err, "Parse source image reference") + + img, err := daemon.Image(srcRef) + require.NoError(t, err, "Load image from Docker daemon") + + // Check image size for progress context + layers, _ := img.Layers() + var totalSize int64 + for _, layer := range layers { + if size, err := layer.Size(); err == nil { + totalSize += size + } + } + t.Logf("Image has %d layers, ~%.1f GB total", len(layers), float64(totalSize)/1e9) + + // Push to test registry with a tag (not just digest) so ListImages can find it + targetRef := fmt.Sprintf("%s/test/ollama-cuda:latest", serverHost) + t.Logf("Pushing to %s", targetRef) + + dstRef, err := name.ParseReference(targetRef, name.Insecure) + require.NoError(t, err, "Parse target reference") + + err = remote.Write(dstRef, img) + require.NoError(t, err, "Push to registry") +} + +// inferenceOutputBuffer is a simple buffer for capturing command output +type inferenceOutputBuffer struct { + buf bytes.Buffer +} + +func (b *inferenceOutputBuffer) Write(p []byte) (n int, err error) { + return b.buf.Write(p) +} + +func (b *inferenceOutputBuffer) String() string { + return b.buf.String() +} + +// truncateTail returns the last n characters of s +func truncateTail(s string, n int) string { + if len(s) <= n { + return s + } + return "..." + s[len(s)-n:] +} + +// truncateHead returns the first n characters of s +func truncateHead(s string, n int) string { + if len(s) <= n { + return s + } + return s[:n] + "..." +} diff --git a/lib/devices/gpu_module_test.go b/lib/devices/gpu_module_test.go new file mode 100644 index 0000000..841faed --- /dev/null +++ b/lib/devices/gpu_module_test.go @@ -0,0 +1,505 @@ +package devices_test + +import ( + "context" + "fmt" + "log" + "net/http" + "net/http/httptest" + "os" + osexec "os/exec" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/google/go-containerregistry/pkg/name" + "github.com/google/go-containerregistry/pkg/v1/daemon" + "github.com/google/go-containerregistry/pkg/v1/remote" + "github.com/onkernel/hypeman/cmd/api/config" + "github.com/onkernel/hypeman/lib/devices" + "github.com/onkernel/hypeman/lib/exec" + "github.com/onkernel/hypeman/lib/images" + "github.com/onkernel/hypeman/lib/instances" + "github.com/onkernel/hypeman/lib/network" + "github.com/onkernel/hypeman/lib/paths" + "github.com/onkernel/hypeman/lib/registry" + "github.com/onkernel/hypeman/lib/system" + "github.com/onkernel/hypeman/lib/volumes" + "github.com/stretchr/testify/require" +) + +// TestNVIDIAModuleLoading verifies that NVIDIA kernel modules load correctly in the VM. +// +// This is a simpler test than TestGPUInference that just verifies: +// 1. NVIDIA kernel modules (nvidia.ko, nvidia-uvm.ko, etc.) load during init +// 2. GSP firmware is found and loaded +// 3. /dev/nvidia* device nodes are created +// +// Prerequisites: +// - NVIDIA GPU on host +// - IOMMU enabled +// - VFIO modules loaded (modprobe vfio_pci) +// - Running as root +// +// To run manually: +// +// sudo env PATH=$PATH:/sbin:/usr/sbin go test -v -run TestNVIDIAModuleLoading -timeout 5m ./lib/devices/... +func TestNVIDIAModuleLoading(t *testing.T) { + ctx := context.Background() + + // Auto-detect GPU availability - skip if prerequisites not met + skipReason := checkGPUTestPrerequisites() + if skipReason != "" { + t.Skip(skipReason) + } + + groups, _ := os.ReadDir("/sys/kernel/iommu_groups") + t.Logf("Test prerequisites met: %d IOMMU groups found", len(groups)) + + // Setup test infrastructure + tmpDir := t.TempDir() + p := paths.New(tmpDir) + + cfg := &config.Config{ + DataDir: tmpDir, + BridgeName: "vmbr0", + SubnetCIDR: "10.100.0.0/16", + DNSServer: "1.1.1.1", + } + + // Initialize managers + imageMgr, err := images.NewManager(p, 1, nil) + require.NoError(t, err) + + systemMgr := system.NewManager(p) + networkMgr := network.NewManager(p, cfg, nil) + deviceMgr := devices.NewManager(p) + volumeMgr := volumes.NewManager(p, 10*1024*1024*1024, nil) + limits := instances.ResourceLimits{MaxOverlaySize: 10 * 1024 * 1024 * 1024} + instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, nil, nil) + + // Step 1: Find an NVIDIA GPU + t.Log("Step 1: Discovering available GPUs...") + availableDevices, err := deviceMgr.ListAvailableDevices(ctx) + require.NoError(t, err) + + var targetGPU *devices.AvailableDevice + for _, d := range availableDevices { + if strings.Contains(strings.ToLower(d.VendorName), "nvidia") { + targetGPU = &d + break + } + } + require.NotNil(t, targetGPU, "No NVIDIA GPU found") + + driverStr := "none" + if targetGPU.CurrentDriver != nil { + driverStr = *targetGPU.CurrentDriver + } + t.Logf("Found NVIDIA GPU: %s at %s (driver: %s)", targetGPU.DeviceName, targetGPU.PCIAddress, driverStr) + + if targetGPU.CurrentDriver == nil || *targetGPU.CurrentDriver == "" { + t.Skip("GPU has no driver bound - may need reboot") + } + + driverPath := filepath.Join("/sys/bus/pci/devices", targetGPU.PCIAddress, "driver") + if _, err := os.Stat(driverPath); os.IsNotExist(err) { + t.Skipf("GPU driver symlink missing - GPU in broken state") + } + + // Step 2: Register the GPU + t.Log("Step 2: Registering GPU...") + device, err := deviceMgr.CreateDevice(ctx, devices.CreateDeviceRequest{ + Name: "module-test-gpu", + PCIAddress: targetGPU.PCIAddress, + }) + require.NoError(t, err) + t.Logf("Registered device: %s (ID: %s)", device.Name, device.Id) + + originalDriver := driverStr + t.Cleanup(func() { + t.Log("Cleanup: Deleting registered device...") + deviceMgr.DeleteDevice(ctx, device.Id) + if originalDriver != "" && originalDriver != "none" && originalDriver != "vfio-pci" { + probePath := "/sys/bus/pci/drivers_probe" + os.WriteFile(probePath, []byte(targetGPU.PCIAddress), 0200) + } + }) + + // Step 3: Ensure system files + t.Log("Step 3: Ensuring system files...") + require.NoError(t, systemMgr.EnsureSystemFiles(ctx)) + + // Step 4: Pull nginx:alpine (stays running unlike plain alpine) + t.Log("Step 4: Pulling nginx:alpine image...") + createdImg, err := imageMgr.CreateImage(ctx, images.CreateImageRequest{ + Name: "docker.io/library/nginx:alpine", + }) + require.NoError(t, err) + t.Logf("CreateImage returned: name=%s, status=%s", createdImg.Name, createdImg.Status) + + // Wait for image to be ready + var img *images.Image + for i := 0; i < 90; i++ { + img, _ = imageMgr.GetImage(ctx, createdImg.Name) + if img != nil && img.Status == images.StatusReady { + break + } + time.Sleep(time.Second) + } + require.NotNil(t, img, "Image should exist") + require.Equal(t, images.StatusReady, img.Status, "Image should be ready") + t.Log("Image ready") + + // Step 5: Create instance with GPU + t.Log("Step 5: Creating instance with GPU...") + + // Initialize network first + require.NoError(t, networkMgr.Initialize(ctx, []string{})) + + createCtx, createCancel := context.WithTimeout(ctx, 60*time.Second) + defer createCancel() + + inst, err := instanceMgr.CreateInstance(createCtx, instances.CreateInstanceRequest{ + Name: "nvidia-module-test", + Image: createdImg.Name, + Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) + HotplugSize: 512 * 1024 * 1024, + OverlaySize: 10 * 1024 * 1024 * 1024, + Vcpus: 2, + NetworkEnabled: false, + Devices: []string{"module-test-gpu"}, + Env: map[string]string{}, + }) + require.NoError(t, err) + t.Logf("Instance created: %s", inst.Id) + + t.Cleanup(func() { + t.Log("Cleanup: Deleting instance...") + instanceMgr.DeleteInstance(ctx, inst.Id) + }) + + // Wait for instance to be running + err = waitForInstanceReady(ctx, t, instanceMgr, inst.Id, 30*time.Second) + require.NoError(t, err) + t.Log("Instance is ready") + + // Wait for init script to complete (module loading happens early in boot) + time.Sleep(5 * time.Second) + + // Step 6: Check module loading via dmesg + t.Log("Step 6: Checking NVIDIA module loading in VM...") + + actualInst, err := instanceMgr.GetInstance(ctx, inst.Id) + require.NoError(t, err) + + execCtx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + + // Check dmesg for NVIDIA messages + var stdout, stderr outputBuffer + dmesgCmd := "dmesg | grep -i nvidia | head -50" + + for i := 0; i < 10; i++ { + stdout = outputBuffer{} + stderr = outputBuffer{} + _, err = exec.ExecIntoInstance(execCtx, actualInst.VsockSocket, exec.ExecOptions{ + Command: []string{"/bin/sh", "-c", dmesgCmd}, + Stdin: nil, + Stdout: &stdout, + Stderr: &stderr, + }) + if err == nil { + break + } + time.Sleep(time.Second) + } + require.NoError(t, err, "dmesg command should succeed") + + dmesgOutput := stdout.String() + t.Logf("=== NVIDIA dmesg output ===\n%s", dmesgOutput) + + // Check for key error indicators + firmwareMissing := strings.Contains(dmesgOutput, "No firmware image found") + initFailed := strings.Contains(dmesgOutput, "RmInitAdapter failed") + + if firmwareMissing { + t.Errorf("✗ GSP firmware not found - firmware not included in initrd") + } + if initFailed { + t.Errorf("✗ NVIDIA driver RmInitAdapter failed - GPU initialization error") + } + + // Check lsmod for nvidia modules + stdout = outputBuffer{} + stderr = outputBuffer{} + _, err = exec.ExecIntoInstance(execCtx, actualInst.VsockSocket, exec.ExecOptions{ + Command: []string{"/bin/sh", "-c", "cat /proc/modules | grep nvidia || echo 'No nvidia modules loaded'"}, + Stdin: nil, + Stdout: &stdout, + Stderr: &stderr, + }) + require.NoError(t, err) + modulesOutput := stdout.String() + t.Logf("=== Loaded nvidia modules ===\n%s", modulesOutput) + + hasModules := !strings.Contains(modulesOutput, "No nvidia modules loaded") + if !hasModules { + t.Errorf("✗ NVIDIA modules not loaded in VM") + } else { + t.Log("✓ NVIDIA kernel modules are loaded") + } + + // Check for /dev/nvidia* devices + stdout = outputBuffer{} + stderr = outputBuffer{} + _, err = exec.ExecIntoInstance(execCtx, actualInst.VsockSocket, exec.ExecOptions{ + Command: []string{"/bin/sh", "-c", "ls -la /dev/nvidia* 2>&1 || echo 'No nvidia devices found'"}, + Stdin: nil, + Stdout: &stdout, + Stderr: &stderr, + }) + require.NoError(t, err) + devicesOutput := stdout.String() + t.Logf("=== NVIDIA device nodes ===\n%s", devicesOutput) + + hasDevices := !strings.Contains(devicesOutput, "No nvidia devices found") && !strings.Contains(devicesOutput, "No such file") + if hasDevices { + t.Log("✓ /dev/nvidia* device nodes exist") + } else { + t.Log("✗ /dev/nvidia* device nodes not found (expected if init failed)") + } + + // Final verdict + if !firmwareMissing && !initFailed && hasModules { + t.Log("\n=== SUCCESS: NVIDIA kernel modules loaded correctly ===") + } else { + t.Errorf("\n=== FAILURE: NVIDIA module loading has issues ===") + } +} + +// TestNVMLDetection tests if NVML can detect the GPU from userspace. +// This uses the custom CUDA+Ollama image and runs a Python NVML test. +// +// To run manually: +// +// sudo env PATH=$PATH:/sbin:/usr/sbin go test -v -run TestNVMLDetection -timeout 10m ./lib/devices/... +func TestNVMLDetection(t *testing.T) { + ctx := context.Background() + + skipReason := checkGPUTestPrerequisites() + if skipReason != "" { + t.Skip(skipReason) + } + + groups, _ := os.ReadDir("/sys/kernel/iommu_groups") + t.Logf("Test prerequisites met: %d IOMMU groups found", len(groups)) + + // Use persistent test directory for image caching + const persistentTestDataDir = "/var/lib/hypeman-gpu-inference-test" + if err := os.MkdirAll(persistentTestDataDir, 0755); err != nil { + t.Fatalf("Failed to create persistent test dir: %v", err) + } + + p := paths.New(persistentTestDataDir) + cfg := &config.Config{ + DataDir: persistentTestDataDir, + BridgeName: "vmbr0", + SubnetCIDR: "10.100.0.0/16", + DNSServer: "1.1.1.1", + } + + imageMgr, err := images.NewManager(p, 1, nil) + require.NoError(t, err) + + systemMgr := system.NewManager(p) + networkMgr := network.NewManager(p, cfg, nil) + deviceMgr := devices.NewManager(p) + volumeMgr := volumes.NewManager(p, 10*1024*1024*1024, nil) + limits := instances.ResourceLimits{MaxOverlaySize: 10 * 1024 * 1024 * 1024} + instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, nil, nil) + + // Step 1: Check if ollama-cuda:test image exists in Docker + t.Log("Step 1: Checking for ollama-cuda:test Docker image...") + checkCmd := osexec.Command("docker", "image", "inspect", "ollama-cuda:test") + if err := checkCmd.Run(); err != nil { + t.Fatal("Docker image ollama-cuda:test not found. Build it first with:\n" + + " cd lib/devices/testdata/ollama-cuda && docker build -t ollama-cuda:test .") + } + t.Log("Docker image ollama-cuda:test exists") + + // Step 2: Start registry and push image + t.Log("Step 2: Starting registry and pushing image...") + reg, err := registry.New(p, imageMgr) + require.NoError(t, err) + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + log.Printf("%s %s", r.Method, r.URL.Path) + reg.Handler().ServeHTTP(w, r) + })) + defer server.Close() + + serverHost := strings.TrimPrefix(server.URL, "http://") + pushLocalDockerImageForTest(t, "ollama-cuda:test", serverHost) + t.Log("Push complete") + + // Wait for image conversion + t.Log("Waiting for image conversion...") + var img *images.Image + var imageName string + for i := 0; i < 180; i++ { // 3 minutes max + allImages, listErr := imageMgr.ListImages(ctx) + if listErr == nil { + for _, candidate := range allImages { + if strings.Contains(candidate.Name, "ollama-cuda") { + img = &candidate + imageName = candidate.Name + break + } + } + } + if img != nil && img.Status == images.StatusReady { + break + } + if i%30 == 0 { + status := "not found" + if img != nil { + status = string(img.Status) + } + t.Logf("Waiting for image... (%d/180, status=%s)", i+1, status) + } + time.Sleep(time.Second) + } + require.NotNil(t, img, "Image should exist after 3 minutes") + require.Equal(t, images.StatusReady, img.Status, "Image should be ready") + t.Logf("Image ready: %s", imageName) + + // Step 3: Find and register GPU + t.Log("Step 3: Discovering GPUs...") + availableDevices, err := deviceMgr.ListAvailableDevices(ctx) + require.NoError(t, err) + + var targetGPU *devices.AvailableDevice + for _, d := range availableDevices { + if strings.Contains(strings.ToLower(d.VendorName), "nvidia") { + targetGPU = &d + break + } + } + require.NotNil(t, targetGPU, "No NVIDIA GPU found") + t.Logf("Found GPU: %s at %s", targetGPU.DeviceName, targetGPU.PCIAddress) + + device, err := deviceMgr.CreateDevice(ctx, devices.CreateDeviceRequest{ + Name: "nvml-test-gpu", + PCIAddress: targetGPU.PCIAddress, + }) + require.NoError(t, err) + t.Cleanup(func() { + deviceMgr.DeleteDevice(ctx, device.Id) + }) + + // Step 4: Initialize network and system + require.NoError(t, networkMgr.Initialize(ctx, []string{})) + require.NoError(t, systemMgr.EnsureSystemFiles(ctx)) + + // Step 5: Create instance + t.Log("Step 4: Creating instance with CUDA image...") + inst, err := instanceMgr.CreateInstance(ctx, instances.CreateInstanceRequest{ + Name: "nvml-test", + Image: imageName, + Size: 2 * 1024 * 1024 * 1024, + HotplugSize: 512 * 1024 * 1024, + OverlaySize: 10 * 1024 * 1024 * 1024, + Vcpus: 2, + NetworkEnabled: true, + Devices: []string{"nvml-test-gpu"}, + Env: map[string]string{}, + }) + require.NoError(t, err) + t.Logf("Instance created: %s", inst.Id) + + t.Cleanup(func() { + t.Log("Cleanup: Deleting instance...") + instanceMgr.DeleteInstance(ctx, inst.Id) + }) + + err = waitForInstanceReady(ctx, t, instanceMgr, inst.Id, 60*time.Second) + require.NoError(t, err) + time.Sleep(5 * time.Second) + + actualInst, err := instanceMgr.GetInstance(ctx, inst.Id) + require.NoError(t, err) + + // Step 5: Run NVML test + t.Log("Step 5: Running NVML detection test...") + execCtx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + + var stdout, stderr outputBuffer + _, err = exec.ExecIntoInstance(execCtx, actualInst.VsockSocket, exec.ExecOptions{ + Command: []string{"/bin/sh", "-c", "python3 /usr/local/bin/test-nvml.py 2>&1"}, + Stdin: nil, + Stdout: &stdout, + Stderr: &stderr, + }) + + t.Logf("NVML test output:\n%s", stdout.String()) + if stderr.String() != "" { + t.Logf("NVML test stderr:\n%s", stderr.String()) + } + + require.NoError(t, err, "NVML test command should succeed") + + output := stdout.String() + if strings.Contains(output, "GPU DETECTED") { + t.Log("✓ SUCCESS: NVML detected the GPU!") + } else if strings.Contains(output, "NVML_ERROR_LIB_RM_VERSION_MISMATCH") { + t.Log("✗ NVML version mismatch - container NVML library doesn't match kernel driver version") + t.Log(" Container has: 570.195.03") + t.Log(" Kernel driver: 570.86.16") + t.FailNow() + } else if strings.Contains(output, "NVML_ERROR_DRIVER_NOT_LOADED") { + t.Log("✗ NVML reports driver not loaded (but kernel modules are loaded)") + t.FailNow() + } else { + t.Errorf("✗ NVML test failed: %s", output) + } + + // Step 6: Run CUDA test + t.Log("Step 6: Running CUDA driver test...") + stdout = outputBuffer{} + stderr = outputBuffer{} + _, err = exec.ExecIntoInstance(execCtx, actualInst.VsockSocket, exec.ExecOptions{ + Command: []string{"/bin/sh", "-c", "python3 /usr/local/bin/test-cuda.py 2>&1"}, + Stdin: nil, + Stdout: &stdout, + Stderr: &stderr, + }) + + t.Logf("CUDA test output:\n%s", stdout.String()) + if strings.Contains(stdout.String(), "CUDA WORKS") { + t.Log("✓ SUCCESS: CUDA driver works!") + } else { + t.Logf("CUDA test may have issues: %s", stdout.String()) + } +} + +// pushLocalDockerImageForTest is a test helper that pushes a local Docker image to the registry +func pushLocalDockerImageForTest(t *testing.T, dockerImage, serverHost string) { + t.Helper() + + srcRef, err := name.ParseReference(dockerImage) + require.NoError(t, err) + + img, err := daemon.Image(srcRef) + require.NoError(t, err) + + targetRef := fmt.Sprintf("%s/test/ollama-cuda:latest", serverHost) + t.Logf("Pushing to %s", targetRef) + + dstRef, err := name.ParseReference(targetRef, name.Insecure) + require.NoError(t, err) + + err = remote.Write(dstRef, img) + require.NoError(t, err) +} diff --git a/lib/devices/manager.go b/lib/devices/manager.go new file mode 100644 index 0000000..cc004cd --- /dev/null +++ b/lib/devices/manager.go @@ -0,0 +1,784 @@ +package devices + +import ( + "context" + "encoding/json" + "fmt" + "os" + "strings" + "sync" + "time" + + "github.com/nrednav/cuid2" + "github.com/onkernel/hypeman/lib/logger" + "github.com/onkernel/hypeman/lib/paths" +) + +// InstanceLivenessChecker provides a way to check if an instance is running. +// This interface allows devices to query instance state without a circular dependency. +type InstanceLivenessChecker interface { + // IsInstanceRunning returns true if the instance exists and is in a running state + // (i.e., has an active VMM process). Returns false if the instance doesn't exist + // or is stopped/standby/unknown. + IsInstanceRunning(ctx context.Context, instanceID string) bool + + // GetInstanceDevices returns the list of device IDs attached to an instance. + // Returns nil if the instance doesn't exist. + GetInstanceDevices(ctx context.Context, instanceID string) []string + + // ListAllInstanceDevices returns a map of instanceID -> []deviceIDs for all instances. + ListAllInstanceDevices(ctx context.Context) map[string][]string + + // DetectSuspiciousVMMProcesses finds cloud-hypervisor processes that don't match + // known instances and logs warnings. Returns the count of suspicious processes found. + DetectSuspiciousVMMProcesses(ctx context.Context) int +} + +// Manager provides device management operations +type Manager interface { + // ListDevices returns all registered devices + ListDevices(ctx context.Context) ([]Device, error) + + // ListAvailableDevices discovers passthrough-capable devices on the host + ListAvailableDevices(ctx context.Context) ([]AvailableDevice, error) + + // CreateDevice registers a new device for passthrough + CreateDevice(ctx context.Context, req CreateDeviceRequest) (*Device, error) + + // GetDevice returns a device by ID or name + GetDevice(ctx context.Context, idOrName string) (*Device, error) + + // DeleteDevice unregisters a device + DeleteDevice(ctx context.Context, id string) error + + // BindToVFIO binds a device to vfio-pci driver + BindToVFIO(ctx context.Context, id string) error + + // UnbindFromVFIO unbinds a device from vfio-pci driver + UnbindFromVFIO(ctx context.Context, id string) error + + // MarkAttached marks a device as attached to an instance + MarkAttached(ctx context.Context, deviceID, instanceID string) error + + // MarkDetached marks a device as detached from an instance + MarkDetached(ctx context.Context, deviceID string) error + + // ReconcileDevices cleans up stale device state on startup. + // It detects devices with AttachedTo referencing non-existent instances + // and clears the orphaned attachment state. + ReconcileDevices(ctx context.Context) error + + // SetLivenessChecker sets the instance liveness checker after construction. + // This allows breaking the circular dependency between device and instance managers. + SetLivenessChecker(checker InstanceLivenessChecker) +} + +type manager struct { + paths *paths.Paths + vfioBinder *VFIOBinder + livenessChecker InstanceLivenessChecker + mu sync.RWMutex +} + +// NewManager creates a new device manager. +// Use SetLivenessChecker after construction to enable accurate orphan detection. +func NewManager(p *paths.Paths) Manager { + return &manager{ + paths: p, + vfioBinder: NewVFIOBinder(), + } +} + +// SetLivenessChecker sets the instance liveness checker. +// This enables accurate orphan detection during reconciliation. +// If not set, orphan detection falls back to checking if the instance directory exists. +func (m *manager) SetLivenessChecker(checker InstanceLivenessChecker) { + m.mu.Lock() + defer m.mu.Unlock() + m.livenessChecker = checker +} + +func (m *manager) ListDevices(ctx context.Context) ([]Device, error) { + // RLock protects against concurrent directory modifications (CreateDevice/DeleteDevice) + // during iteration. While individual file reads are atomic, directory iteration could + // see inconsistent state if a device is being created or deleted concurrently. + m.mu.RLock() + defer m.mu.RUnlock() + + entries, err := os.ReadDir(m.paths.DevicesDir()) + if err != nil { + if os.IsNotExist(err) { + return []Device{}, nil + } + return nil, fmt.Errorf("read devices dir: %w", err) + } + + var devices []Device + for _, entry := range entries { + if !entry.IsDir() { + continue + } + + device, err := m.loadDevice(entry.Name()) + if err != nil { + continue + } + + // Update VFIO binding status from system state + device.BoundToVFIO = m.vfioBinder.IsDeviceBoundToVFIO(device.PCIAddress) + + devices = append(devices, *device) + } + + return devices, nil +} + +func (m *manager) ListAvailableDevices(ctx context.Context) ([]AvailableDevice, error) { + return DiscoverAvailableDevices() +} + +func (m *manager) CreateDevice(ctx context.Context, req CreateDeviceRequest) (*Device, error) { + log := logger.FromContext(ctx) + + // Validate PCI address format (required) + if !ValidatePCIAddress(req.PCIAddress) { + return nil, ErrInvalidPCIAddress + } + + // Get device info from sysfs + deviceInfo, err := GetDeviceInfo(req.PCIAddress) + if err != nil { + return nil, fmt.Errorf("get device info: %w", err) + } + + // Generate ID + id := cuid2.Generate() + + // Handle optional name: if not provided, generate one from PCI address + name := req.Name + if name == "" { + // Generate name from PCI address: 0000:a2:00.0 -> pci-0000-a2-00-0 + name = "pci-" + strings.ReplaceAll(strings.ReplaceAll(req.PCIAddress, ":", "-"), ".", "-") + } + + // Validate name format + if !ValidateDeviceName(name) { + return nil, ErrInvalidName + } + + m.mu.Lock() + defer m.mu.Unlock() + + // Check if name already exists + if _, err := m.findByName(name); err == nil { + return nil, ErrNameExists + } + + // Check if PCI address already registered + if _, err := m.findByPCIAddress(req.PCIAddress); err == nil { + return nil, ErrAlreadyExists + } + + // Create device + device := &Device{ + Id: id, + Name: name, + Type: DetermineDeviceType(deviceInfo), + PCIAddress: req.PCIAddress, + VendorID: deviceInfo.VendorID, + DeviceID: deviceInfo.DeviceID, + IOMMUGroup: deviceInfo.IOMMUGroup, + BoundToVFIO: m.vfioBinder.IsDeviceBoundToVFIO(req.PCIAddress), + AttachedTo: nil, + CreatedAt: time.Now(), + } + + // Ensure directories exist + if err := os.MkdirAll(m.paths.DeviceDir(id), 0755); err != nil { + return nil, fmt.Errorf("create device dir: %w", err) + } + + // Save device metadata + if err := m.saveDevice(device); err != nil { + os.RemoveAll(m.paths.DeviceDir(id)) + return nil, fmt.Errorf("save device: %w", err) + } + + log.InfoContext(ctx, "registered device", + "id", id, + "name", name, + "pci_address", req.PCIAddress, + "type", device.Type, + ) + + return device, nil +} + +func (m *manager) GetDevice(ctx context.Context, idOrName string) (*Device, error) { + // RLock protects against concurrent modifications while looking up by name, + // which requires iterating the devices directory. + m.mu.RLock() + defer m.mu.RUnlock() + + // Try by ID first + device, err := m.loadDevice(idOrName) + if err == nil { + device.BoundToVFIO = m.vfioBinder.IsDeviceBoundToVFIO(device.PCIAddress) + return device, nil + } + + // Try by name + device, err = m.findByName(idOrName) + if err == nil { + device.BoundToVFIO = m.vfioBinder.IsDeviceBoundToVFIO(device.PCIAddress) + return device, nil + } + + return nil, ErrNotFound +} + +func (m *manager) DeleteDevice(ctx context.Context, id string) error { + log := logger.FromContext(ctx) + + m.mu.Lock() + defer m.mu.Unlock() + + device, err := m.loadDevice(id) + if err != nil { + // Try by name + device, err = m.findByName(id) + if err != nil { + return ErrNotFound + } + id = device.Id + } + + // Check if device is attached + if device.AttachedTo != nil { + return ErrInUse + } + + // Remove device directory + if err := os.RemoveAll(m.paths.DeviceDir(id)); err != nil { + return fmt.Errorf("remove device dir: %w", err) + } + + log.InfoContext(ctx, "unregistered device", + "id", id, + "name", device.Name, + "pci_address", device.PCIAddress, + ) + + return nil +} + +func (m *manager) BindToVFIO(ctx context.Context, id string) error { + log := logger.FromContext(ctx) + + m.mu.Lock() + defer m.mu.Unlock() + + device, err := m.loadDevice(id) + if err != nil { + // Try by name + device, err = m.findByName(id) + if err != nil { + return ErrNotFound + } + } + + // Check IOMMU group safety + if err := m.vfioBinder.CheckIOMMUGroupSafe(device.PCIAddress, []string{device.PCIAddress}); err != nil { + return err + } + + // Bind to VFIO + if err := m.vfioBinder.BindToVFIO(device.PCIAddress); err != nil { + return err + } + + // Update device state + device.BoundToVFIO = true + if err := m.saveDevice(device); err != nil { + return fmt.Errorf("save device: %w", err) + } + + log.InfoContext(ctx, "bound device to VFIO", + "id", device.Id, + "name", device.Name, + "pci_address", device.PCIAddress, + ) + + return nil +} + +func (m *manager) UnbindFromVFIO(ctx context.Context, id string) error { + log := logger.FromContext(ctx) + + m.mu.Lock() + defer m.mu.Unlock() + + device, err := m.loadDevice(id) + if err != nil { + // Try by name + device, err = m.findByName(id) + if err != nil { + return ErrNotFound + } + } + + // Check if device is attached + if device.AttachedTo != nil { + return ErrInUse + } + + // Unbind from VFIO + if err := m.vfioBinder.UnbindFromVFIO(device.PCIAddress); err != nil { + return err + } + + // Update device state + device.BoundToVFIO = false + if err := m.saveDevice(device); err != nil { + return fmt.Errorf("save device: %w", err) + } + + log.InfoContext(ctx, "unbound device from VFIO", + "id", device.Id, + "name", device.Name, + "pci_address", device.PCIAddress, + ) + + return nil +} + +func (m *manager) MarkAttached(ctx context.Context, deviceID, instanceID string) error { + m.mu.Lock() + defer m.mu.Unlock() + + device, err := m.loadDevice(deviceID) + if err != nil { + device, err = m.findByName(deviceID) + if err != nil { + return ErrNotFound + } + } + + if device.AttachedTo != nil { + return ErrInUse + } + + device.AttachedTo = &instanceID + return m.saveDevice(device) +} + +func (m *manager) MarkDetached(ctx context.Context, deviceID string) error { + m.mu.Lock() + defer m.mu.Unlock() + + device, err := m.loadDevice(deviceID) + if err != nil { + device, err = m.findByName(deviceID) + if err != nil { + return ErrNotFound + } + } + + device.AttachedTo = nil + return m.saveDevice(device) +} + +// ReconcileDevices cleans up stale device state on startup. +// It performs safe-by-default reconciliation: +// 1. Detects orphaned device attachments (instance missing or not running) +// 2. Clears orphaned AttachedTo metadata +// 3. Runs GPU-reset-lite for orphaned devices (unbind VFIO, clear override, probe driver) +// 4. Logs mismatches between instance→device and device→instance references +// 5. Detects suspicious cloud-hypervisor processes +func (m *manager) ReconcileDevices(ctx context.Context) error { + log := logger.FromContext(ctx) + log.InfoContext(ctx, "reconciling device state") + + // Validate GPU prerequisites and log warnings + m.validatePrerequisites(ctx) + + m.mu.Lock() + defer m.mu.Unlock() + + entries, err := os.ReadDir(m.paths.DevicesDir()) + if err != nil { + if os.IsNotExist(err) { + // No devices directory yet, nothing to reconcile + return nil + } + return fmt.Errorf("read devices dir: %w", err) + } + + // Load all devices + var allDevices []*Device + deviceByID := make(map[string]*Device) + for _, entry := range entries { + if !entry.IsDir() { + continue + } + + device, err := m.loadDevice(entry.Name()) + if err != nil { + log.WarnContext(ctx, "failed to load device during reconciliation", + "device_id", entry.Name(), + "error", err, + ) + continue + } + // Update VFIO binding status from system state + device.BoundToVFIO = m.vfioBinder.IsDeviceBoundToVFIO(device.PCIAddress) + allDevices = append(allDevices, device) + deviceByID[device.Id] = device + } + + // Build instance→device map if we have a liveness checker + var instanceDevices map[string][]string + if m.livenessChecker != nil { + instanceDevices = m.livenessChecker.ListAllInstanceDevices(ctx) + } + + // Track stats + var stats reconcileStats + + // Phase 1: Detect and handle orphaned device attachments + for _, device := range allDevices { + if device.AttachedTo == nil { + continue + } + + instanceID := *device.AttachedTo + orphaned := m.isInstanceOrphaned(ctx, instanceID) + + if orphaned { + log.WarnContext(ctx, "detected orphaned device attachment", + "device_id", device.Id, + "device_name", device.Name, + "pci_address", device.PCIAddress, + "orphaned_instance_id", instanceID, + ) + + // Clear the orphaned attachment + device.AttachedTo = nil + if err := m.saveDevice(device); err != nil { + log.ErrorContext(ctx, "failed to save device after clearing attachment", + "device_id", device.Id, + "error", err, + ) + stats.errors++ + continue + } + stats.orphanedCleared++ + + // Run GPU-reset-lite for orphaned device + m.resetOrphanedDevice(ctx, device, &stats) + } + } + + // Phase 2: Two-way reconciliation (log-only for mismatches) + if instanceDevices != nil { + for instanceID, deviceIDs := range instanceDevices { + for _, deviceID := range deviceIDs { + device, exists := deviceByID[deviceID] + if !exists { + // Instance references a device that doesn't exist in device metadata + log.WarnContext(ctx, "instance references unknown device (mismatch)", + "instance_id", instanceID, + "device_id", deviceID, + ) + stats.mismatches++ + continue + } + + // Check if device's AttachedTo matches + if device.AttachedTo == nil { + log.WarnContext(ctx, "instance references device but device.AttachedTo is nil (mismatch)", + "instance_id", instanceID, + "device_id", deviceID, + "device_name", device.Name, + ) + stats.mismatches++ + } else if *device.AttachedTo != instanceID { + log.WarnContext(ctx, "instance references device but device.AttachedTo points elsewhere (mismatch)", + "instance_id", instanceID, + "device_id", deviceID, + "device_name", device.Name, + "device_attached_to", *device.AttachedTo, + ) + stats.mismatches++ + } + + // Check VFIO binding state - if instance is running, device should be bound + if m.livenessChecker != nil && m.livenessChecker.IsInstanceRunning(ctx, instanceID) { + if !device.BoundToVFIO { + log.WarnContext(ctx, "running instance has device not bound to VFIO (mismatch)", + "instance_id", instanceID, + "device_id", deviceID, + "device_name", device.Name, + "pci_address", device.PCIAddress, + ) + stats.mismatches++ + } + } + } + } + } + + // Phase 3: Detect suspicious cloud-hypervisor processes (log-only) + if m.livenessChecker != nil { + stats.suspiciousVMM = m.livenessChecker.DetectSuspiciousVMMProcesses(ctx) + } + + // Log summary + log.InfoContext(ctx, "device reconciliation complete", + "orphaned_cleared", stats.orphanedCleared, + "reset_attempted", stats.resetAttempted, + "reset_succeeded", stats.resetSucceeded, + "reset_failed", stats.resetFailed, + "mismatches", stats.mismatches, + "suspicious_vmm", stats.suspiciousVMM, + "errors", stats.errors, + ) + + return nil +} + +// validatePrerequisites checks GPU passthrough prerequisites and logs warnings. +// This helps operators debug configuration issues. +func (m *manager) validatePrerequisites(ctx context.Context) { + log := logger.FromContext(ctx) + + // Check IOMMU availability + iommuGroupsDir := "/sys/kernel/iommu_groups" + entries, err := os.ReadDir(iommuGroupsDir) + if err != nil { + log.WarnContext(ctx, "IOMMU not available - GPU passthrough will not work", + "error", err, + "hint", "enable IOMMU in BIOS and kernel (intel_iommu=on or amd_iommu=on)", + ) + } else if len(entries) == 0 { + log.WarnContext(ctx, "no IOMMU groups found - GPU passthrough will not work", + "hint", "enable IOMMU in BIOS and kernel (intel_iommu=on or amd_iommu=on)", + ) + } + + // Check VFIO modules + vfioModules := []string{"vfio_pci", "vfio_iommu_type1"} + for _, module := range vfioModules { + modulePath := "/sys/module/" + module + if _, err := os.Stat(modulePath); os.IsNotExist(err) { + log.WarnContext(ctx, "VFIO module not loaded - GPU passthrough will not work", + "module", module, + "hint", "run: modprobe "+module, + ) + } + } + + // Check huge pages (info-level hint if devices exist but no huge pages) + hugePagesPath := "/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages" + if data, err := os.ReadFile(hugePagesPath); err == nil { + count := strings.TrimSpace(string(data)) + if count == "0" || count == "" { + // Only warn if we have registered devices + if devicesDir := m.paths.DevicesDir(); devicesDir != "" { + if entries, err := os.ReadDir(devicesDir); err == nil && len(entries) > 0 { + log.InfoContext(ctx, "huge pages not configured - GPU performance may be reduced", + "hint", "run: echo 1024 > /proc/sys/vm/nr_hugepages", + ) + } + } + } + } +} + +// reconcileStats tracks reconciliation metrics +type reconcileStats struct { + orphanedCleared int + resetAttempted int + resetSucceeded int + resetFailed int + mismatches int + suspiciousVMM int + errors int +} + +// isInstanceOrphaned checks if an instance should be considered orphaned +// (device attachment should be cleared). +func (m *manager) isInstanceOrphaned(ctx context.Context, instanceID string) bool { + // If we have a liveness checker, use it for more accurate detection + if m.livenessChecker != nil { + // Instance is orphaned if it's not running (stopped, standby, unknown, or missing) + return !m.livenessChecker.IsInstanceRunning(ctx, instanceID) + } + + // Fallback: just check if instance directory exists + instanceDir := m.paths.InstanceDir(instanceID) + _, err := os.Stat(instanceDir) + return os.IsNotExist(err) +} + +// resetOrphanedDevice performs GPU-reset-lite for an orphaned device. +// This is safe because we've already confirmed the device is orphaned. +// Steps mirror gpu-reset.sh but are per-device and non-destructive. +func (m *manager) resetOrphanedDevice(ctx context.Context, device *Device, stats *reconcileStats) { + log := logger.FromContext(ctx) + stats.resetAttempted++ + + log.InfoContext(ctx, "running GPU-reset-lite for orphaned device", + "device_id", device.Id, + "device_name", device.Name, + "pci_address", device.PCIAddress, + "bound_to_vfio", device.BoundToVFIO, + ) + + // Step 1: If bound to VFIO, unbind + if device.BoundToVFIO { + log.DebugContext(ctx, "unbinding orphaned device from VFIO", "pci_address", device.PCIAddress) + if err := m.vfioBinder.unbindFromDriver(device.PCIAddress, "vfio-pci"); err != nil { + log.WarnContext(ctx, "failed to unbind device from VFIO during reset", + "device_id", device.Id, + "pci_address", device.PCIAddress, + "error", err, + ) + // Continue with other steps + } + } + + // Step 2: Clear driver_override + log.DebugContext(ctx, "clearing driver_override", "pci_address", device.PCIAddress) + if err := m.vfioBinder.setDriverOverride(device.PCIAddress, ""); err != nil { + log.WarnContext(ctx, "failed to clear driver_override during reset", + "device_id", device.Id, + "pci_address", device.PCIAddress, + "error", err, + ) + // Continue with other steps + } + + // Step 3: Trigger driver probe to rebind to original driver + log.DebugContext(ctx, "triggering driver probe", "pci_address", device.PCIAddress) + if err := m.vfioBinder.triggerDriverProbe(device.PCIAddress); err != nil { + log.WarnContext(ctx, "failed to trigger driver probe during reset", + "device_id", device.Id, + "pci_address", device.PCIAddress, + "error", err, + ) + } + + // Step 4: For NVIDIA devices, restart nvidia-persistenced + if device.VendorID == "10de" { + log.DebugContext(ctx, "restarting nvidia-persistenced", "pci_address", device.PCIAddress) + if err := m.vfioBinder.startNvidiaPersistenced(); err != nil { + log.WarnContext(ctx, "failed to restart nvidia-persistenced during reset", + "device_id", device.Id, + "error", err, + ) + } + } + + // Verify the device is now unbound from VFIO + stillBoundToVFIO := m.vfioBinder.IsDeviceBoundToVFIO(device.PCIAddress) + if stillBoundToVFIO { + log.WarnContext(ctx, "device still bound to VFIO after reset-lite", + "device_id", device.Id, + "pci_address", device.PCIAddress, + ) + stats.resetFailed++ + } else { + log.InfoContext(ctx, "GPU-reset-lite completed for orphaned device", + "device_id", device.Id, + "device_name", device.Name, + "pci_address", device.PCIAddress, + ) + stats.resetSucceeded++ + } + + // Update device metadata to reflect new VFIO state + device.BoundToVFIO = stillBoundToVFIO + if err := m.saveDevice(device); err != nil { + log.WarnContext(ctx, "failed to save device after reset-lite", + "device_id", device.Id, + "error", err, + ) + } +} + +// Helper methods + +func (m *manager) loadDevice(id string) (*Device, error) { + data, err := os.ReadFile(m.paths.DeviceMetadata(id)) + if err != nil { + if os.IsNotExist(err) { + return nil, ErrNotFound + } + return nil, err + } + + var device Device + if err := json.Unmarshal(data, &device); err != nil { + return nil, err + } + + return &device, nil +} + +func (m *manager) saveDevice(device *Device) error { + data, err := json.MarshalIndent(device, "", " ") + if err != nil { + return err + } + + return os.WriteFile(m.paths.DeviceMetadata(device.Id), data, 0644) +} + +func (m *manager) findByName(name string) (*Device, error) { + entries, err := os.ReadDir(m.paths.DevicesDir()) + if err != nil { + return nil, ErrNotFound + } + + for _, entry := range entries { + if !entry.IsDir() { + continue + } + + device, err := m.loadDevice(entry.Name()) + if err != nil { + continue + } + + if device.Name == name { + return device, nil + } + } + + return nil, ErrNotFound +} + +func (m *manager) findByPCIAddress(pciAddress string) (*Device, error) { + entries, err := os.ReadDir(m.paths.DevicesDir()) + if err != nil { + return nil, ErrNotFound + } + + for _, entry := range entries { + if !entry.IsDir() { + continue + } + + device, err := m.loadDevice(entry.Name()) + if err != nil { + continue + } + + if device.PCIAddress == pciAddress { + return device, nil + } + } + + return nil, ErrNotFound +} diff --git a/lib/devices/manager_test.go b/lib/devices/manager_test.go new file mode 100644 index 0000000..bb6a167 --- /dev/null +++ b/lib/devices/manager_test.go @@ -0,0 +1,165 @@ +package devices + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestValidateDeviceName(t *testing.T) { + tests := []struct { + name string + input string + expected bool + }{ + {"valid alphanumeric", "l4gpu", true}, + {"valid with underscore", "my_gpu", true}, + {"valid with dash", "gpu-1", true}, + {"valid with dot", "nvidia.l4", true}, + {"valid mixed", "my-gpu_01.test", true}, + {"valid starting with number", "1gpu", true}, + {"invalid empty", "", false}, + {"invalid single char", "a", false}, // pattern requires at least 2 chars + {"invalid starts with dash", "-gpu", false}, + {"invalid starts with underscore", "_gpu", false}, + {"invalid starts with dot", ".gpu", false}, + {"invalid contains space", "my gpu", false}, + {"invalid contains special char", "gpu@1", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := ValidateDeviceName(tt.input) + assert.Equal(t, tt.expected, result, "ValidateDeviceName(%q)", tt.input) + }) + } +} + +func TestValidatePCIAddress(t *testing.T) { + tests := []struct { + name string + input string + expected bool + }{ + {"valid standard", "0000:00:00.0", true}, + {"valid with letters", "0000:a2:00.0", true}, + {"valid uppercase", "0000:A2:00.0", true}, + {"valid mixed case", "0000:aB:c1.2", true}, + {"invalid too short", "0000:00:0.0", false}, + {"invalid no domain", "00:00.0", false}, + {"invalid missing colon", "000000:00.0", false}, + {"invalid missing dot", "0000:00:000", false}, + {"invalid extra segment", "0000:00:00:00.0", false}, + {"invalid empty", "", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := ValidatePCIAddress(tt.input) + assert.Equal(t, tt.expected, result, "ValidatePCIAddress(%q)", tt.input) + }) + } +} + +func TestDetermineDeviceType(t *testing.T) { + // This test is limited since it reads from sysfs + // We test the function structure but can't mock sysfs easily + t.Run("returns generic for nil device", func(t *testing.T) { + device := &AvailableDevice{ + PCIAddress: "0000:99:99.0", // Non-existent device + } + deviceType := DetermineDeviceType(device) + assert.Equal(t, DeviceTypeGeneric, deviceType) + }) +} + +func TestGetDeviceSysfsPath(t *testing.T) { + tests := []struct { + pciAddress string + expected string + }{ + {"0000:a2:00.0", "/sys/bus/pci/devices/0000:a2:00.0/"}, + {"0000:00:1f.0", "/sys/bus/pci/devices/0000:00:1f.0/"}, + } + + for _, tt := range tests { + t.Run(tt.pciAddress, func(t *testing.T) { + result := GetDeviceSysfsPath(tt.pciAddress) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestGetVendorName(t *testing.T) { + tests := []struct { + vendorID string + expected string + }{ + {"10de", "NVIDIA Corporation"}, + {"1002", "AMD/ATI"}, + {"8086", "Intel Corporation"}, + {"1234", "Unknown Vendor"}, + } + + for _, tt := range tests { + t.Run(tt.vendorID, func(t *testing.T) { + result := getVendorName(tt.vendorID) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestGetDeviceName(t *testing.T) { + tests := []struct { + name string + vendorID string + deviceID string + classCode string + expected string + }{ + {"NVIDIA L4", "10de", "27b8", "0x030200", "L4"}, + {"NVIDIA RTX 4090", "10de", "2684", "0x030000", "RTX 4090"}, + {"Unknown NVIDIA", "10de", "9999", "0x030000", "VGA Controller"}, + {"Generic VGA", "1234", "5678", "0x030000", "VGA Controller"}, + {"Generic 3D", "1234", "5678", "0x030200", "3D Controller"}, + {"Audio device", "1234", "5678", "0x040300", "Audio Device"}, + {"Unknown class", "1234", "5678", "0x999999", "PCI Device"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := getDeviceName(tt.vendorID, tt.deviceID, tt.classCode) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestVFIOBinderIsVFIOAvailable(t *testing.T) { + binder := NewVFIOBinder() + // Just test that it doesn't panic + _ = binder.IsVFIOAvailable() +} + +func TestDeviceTypes(t *testing.T) { + t.Run("device type constants", func(t *testing.T) { + require.Equal(t, DeviceType("gpu"), DeviceTypeGPU) + require.Equal(t, DeviceType("pci"), DeviceTypeGeneric) + }) +} + +func TestErrors(t *testing.T) { + t.Run("error types are distinct", func(t *testing.T) { + assert.NotEqual(t, ErrNotFound, ErrInUse) + assert.NotEqual(t, ErrNotBound, ErrAlreadyBound) + assert.NotEqual(t, ErrAlreadyExists, ErrNameExists) + }) + + t.Run("error messages are meaningful", func(t *testing.T) { + assert.Contains(t, ErrNotFound.Error(), "not found") + assert.Contains(t, ErrInUse.Error(), "in use") + assert.Contains(t, ErrInvalidName.Error(), "pattern") + }) +} + + diff --git a/lib/devices/reconcile_test.go b/lib/devices/reconcile_test.go new file mode 100644 index 0000000..7c2bdae --- /dev/null +++ b/lib/devices/reconcile_test.go @@ -0,0 +1,612 @@ +package devices + +import ( + "context" + "encoding/json" + "os" + "path/filepath" + "testing" + "time" + + "github.com/onkernel/hypeman/lib/paths" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// mockLivenessChecker implements InstanceLivenessChecker for testing +type mockLivenessChecker struct { + runningInstances map[string]bool // instanceID -> isRunning + instanceDevices map[string][]string // instanceID -> deviceIDs +} + +func newMockLivenessChecker() *mockLivenessChecker { + return &mockLivenessChecker{ + runningInstances: make(map[string]bool), + instanceDevices: make(map[string][]string), + } +} + +func (m *mockLivenessChecker) IsInstanceRunning(ctx context.Context, instanceID string) bool { + return m.runningInstances[instanceID] +} + +func (m *mockLivenessChecker) GetInstanceDevices(ctx context.Context, instanceID string) []string { + return m.instanceDevices[instanceID] +} + +func (m *mockLivenessChecker) ListAllInstanceDevices(ctx context.Context) map[string][]string { + return m.instanceDevices +} + +func (m *mockLivenessChecker) DetectSuspiciousVMMProcesses(ctx context.Context) int { + return 0 // Mock returns no suspicious processes +} + +func (m *mockLivenessChecker) setRunning(instanceID string, running bool) { + m.runningInstances[instanceID] = running +} + +func (m *mockLivenessChecker) setInstanceDevices(instanceID string, deviceIDs []string) { + m.instanceDevices[instanceID] = deviceIDs +} + +// setupTestManager creates a manager with a temporary directory for testing +func setupTestManager(t *testing.T) (*manager, *paths.Paths, string) { + t.Helper() + tmpDir := t.TempDir() + p := paths.New(tmpDir) + + // Create devices directory + require.NoError(t, os.MkdirAll(p.DevicesDir(), 0755)) + + mgr := &manager{ + paths: p, + vfioBinder: NewVFIOBinder(), + } + + return mgr, p, tmpDir +} + +// createTestDevice creates a device in the test directory +func createTestDevice(t *testing.T, p *paths.Paths, device *Device) { + t.Helper() + deviceDir := p.DeviceDir(device.Id) + require.NoError(t, os.MkdirAll(deviceDir, 0755)) + + data, err := json.MarshalIndent(device, "", " ") + require.NoError(t, err) + + require.NoError(t, os.WriteFile(p.DeviceMetadata(device.Id), data, 0644)) +} + +// createTestInstanceDir creates an instance directory (simulating instance existence) +func createTestInstanceDir(t *testing.T, p *paths.Paths, instanceID string) { + t.Helper() + instanceDir := p.InstanceDir(instanceID) + require.NoError(t, os.MkdirAll(instanceDir, 0755)) +} + +func TestReconcileDevices_NoDevices(t *testing.T) { + mgr, _, _ := setupTestManager(t) + ctx := context.Background() + + err := mgr.ReconcileDevices(ctx) + require.NoError(t, err) +} + +func TestReconcileDevices_OrphanedAttachment_NoLivenessChecker(t *testing.T) { + mgr, p, _ := setupTestManager(t) + ctx := context.Background() + + instanceID := "orphaned-instance-123" + deviceID := "device-abc" + + // Create device with AttachedTo pointing to non-existent instance + device := &Device{ + Id: deviceID, + Name: "test-gpu", + Type: DeviceTypeGPU, + PCIAddress: "0000:99:00.0", // Non-existent for test + VendorID: "10de", + DeviceID: "1234", + AttachedTo: &instanceID, + CreatedAt: time.Now(), + } + createTestDevice(t, p, device) + + // Don't create the instance directory - it's orphaned + + // Run reconciliation + err := mgr.ReconcileDevices(ctx) + require.NoError(t, err) + + // Verify attachment was cleared + updatedDevice, err := mgr.loadDevice(deviceID) + require.NoError(t, err) + assert.Nil(t, updatedDevice.AttachedTo, "AttachedTo should be cleared for orphaned device") +} + +func TestReconcileDevices_ValidAttachment_NoLivenessChecker(t *testing.T) { + mgr, p, _ := setupTestManager(t) + ctx := context.Background() + + instanceID := "valid-instance-123" + deviceID := "device-abc" + + // Create device with AttachedTo pointing to existing instance + device := &Device{ + Id: deviceID, + Name: "test-gpu", + Type: DeviceTypeGPU, + PCIAddress: "0000:99:00.0", + VendorID: "10de", + DeviceID: "1234", + AttachedTo: &instanceID, + CreatedAt: time.Now(), + } + createTestDevice(t, p, device) + + // Create the instance directory - it exists + createTestInstanceDir(t, p, instanceID) + + // Run reconciliation + err := mgr.ReconcileDevices(ctx) + require.NoError(t, err) + + // Verify attachment was NOT cleared (instance exists) + updatedDevice, err := mgr.loadDevice(deviceID) + require.NoError(t, err) + require.NotNil(t, updatedDevice.AttachedTo, "AttachedTo should NOT be cleared for valid device") + assert.Equal(t, instanceID, *updatedDevice.AttachedTo) +} + +func TestReconcileDevices_OrphanedAttachment_WithLivenessChecker(t *testing.T) { + mgr, p, _ := setupTestManager(t) + ctx := context.Background() + + // Set up liveness checker + liveness := newMockLivenessChecker() + mgr.livenessChecker = liveness + + instanceID := "stopped-instance-123" + deviceID := "device-abc" + + // Create device with AttachedTo + device := &Device{ + Id: deviceID, + Name: "test-gpu", + Type: DeviceTypeGPU, + PCIAddress: "0000:99:00.0", + VendorID: "10de", + DeviceID: "1234", + AttachedTo: &instanceID, + CreatedAt: time.Now(), + } + createTestDevice(t, p, device) + + // Create instance directory but mark as NOT running + createTestInstanceDir(t, p, instanceID) + liveness.setRunning(instanceID, false) // Stopped/standby + + // Run reconciliation + err := mgr.ReconcileDevices(ctx) + require.NoError(t, err) + + // Verify attachment was cleared (instance not running) + updatedDevice, err := mgr.loadDevice(deviceID) + require.NoError(t, err) + assert.Nil(t, updatedDevice.AttachedTo, "AttachedTo should be cleared for non-running instance") +} + +func TestReconcileDevices_ValidAttachment_WithLivenessChecker(t *testing.T) { + mgr, p, _ := setupTestManager(t) + ctx := context.Background() + + // Set up liveness checker + liveness := newMockLivenessChecker() + mgr.livenessChecker = liveness + + instanceID := "running-instance-123" + deviceID := "device-abc" + + // Create device with AttachedTo + device := &Device{ + Id: deviceID, + Name: "test-gpu", + Type: DeviceTypeGPU, + PCIAddress: "0000:99:00.0", + VendorID: "10de", + DeviceID: "1234", + AttachedTo: &instanceID, + CreatedAt: time.Now(), + } + createTestDevice(t, p, device) + + // Create instance and mark as running + createTestInstanceDir(t, p, instanceID) + liveness.setRunning(instanceID, true) // Running + + // Run reconciliation + err := mgr.ReconcileDevices(ctx) + require.NoError(t, err) + + // Verify attachment was NOT cleared (instance is running) + updatedDevice, err := mgr.loadDevice(deviceID) + require.NoError(t, err) + require.NotNil(t, updatedDevice.AttachedTo, "AttachedTo should NOT be cleared for running instance") + assert.Equal(t, instanceID, *updatedDevice.AttachedTo) +} + +func TestReconcileDevices_TwoWayMismatch_InstanceRefsUnknownDevice(t *testing.T) { + mgr, _, _ := setupTestManager(t) + ctx := context.Background() + + // Set up liveness checker with instance that references unknown device + liveness := newMockLivenessChecker() + mgr.livenessChecker = liveness + + instanceID := "instance-with-ghost-device" + unknownDeviceID := "device-that-doesnt-exist" + + // Instance references a device that doesn't exist + liveness.setInstanceDevices(instanceID, []string{unknownDeviceID}) + liveness.setRunning(instanceID, true) + + // Run reconciliation - should not error, just log the mismatch + err := mgr.ReconcileDevices(ctx) + require.NoError(t, err) + // Note: We can't easily verify log output, but the test ensures no panic/error +} + +func TestReconcileDevices_TwoWayMismatch_DeviceAttachedToNil(t *testing.T) { + mgr, p, _ := setupTestManager(t) + ctx := context.Background() + + // Set up liveness checker + liveness := newMockLivenessChecker() + mgr.livenessChecker = liveness + + instanceID := "instance-123" + deviceID := "device-abc" + + // Create device with NO AttachedTo + device := &Device{ + Id: deviceID, + Name: "test-gpu", + Type: DeviceTypeGPU, + PCIAddress: "0000:99:00.0", + VendorID: "10de", + DeviceID: "1234", + AttachedTo: nil, // Not attached according to device metadata + CreatedAt: time.Now(), + } + createTestDevice(t, p, device) + + // Instance claims to have this device + liveness.setInstanceDevices(instanceID, []string{deviceID}) + liveness.setRunning(instanceID, true) + + // Run reconciliation - should log mismatch but not error + err := mgr.ReconcileDevices(ctx) + require.NoError(t, err) + // Note: This is a log-only mismatch, device state should remain unchanged + + updatedDevice, err := mgr.loadDevice(deviceID) + require.NoError(t, err) + assert.Nil(t, updatedDevice.AttachedTo, "Device should remain unattached (log-only mismatch)") +} + +func TestReconcileDevices_TwoWayMismatch_DeviceAttachedToWrongInstance(t *testing.T) { + mgr, p, _ := setupTestManager(t) + ctx := context.Background() + + // Set up liveness checker + liveness := newMockLivenessChecker() + mgr.livenessChecker = liveness + + instanceID1 := "instance-1" + instanceID2 := "instance-2" + deviceID := "device-abc" + + // Create device attached to instance-1 + device := &Device{ + Id: deviceID, + Name: "test-gpu", + Type: DeviceTypeGPU, + PCIAddress: "0000:99:00.0", + VendorID: "10de", + DeviceID: "1234", + AttachedTo: &instanceID1, // Attached to instance-1 + CreatedAt: time.Now(), + } + createTestDevice(t, p, device) + + // Both instances exist and are running + createTestInstanceDir(t, p, instanceID1) + createTestInstanceDir(t, p, instanceID2) + liveness.setRunning(instanceID1, true) + liveness.setRunning(instanceID2, true) + + // instance-2 claims to have this device (mismatch!) + liveness.setInstanceDevices(instanceID2, []string{deviceID}) + + // Run reconciliation - should log mismatch but not error + err := mgr.ReconcileDevices(ctx) + require.NoError(t, err) + // Note: This is a log-only mismatch, device state should remain unchanged + + updatedDevice, err := mgr.loadDevice(deviceID) + require.NoError(t, err) + require.NotNil(t, updatedDevice.AttachedTo) + assert.Equal(t, instanceID1, *updatedDevice.AttachedTo, "Device should remain attached to original instance (log-only mismatch)") +} + +func TestReconcileDevices_MultipleDevices(t *testing.T) { + mgr, p, _ := setupTestManager(t) + ctx := context.Background() + + // Set up liveness checker + liveness := newMockLivenessChecker() + mgr.livenessChecker = liveness + + runningInstanceID := "running-instance" + stoppedInstanceID := "stopped-instance" + orphanedInstanceID := "orphaned-instance" + + // Device 1: Attached to running instance - should stay attached + device1 := &Device{ + Id: "device-1", + Name: "gpu-1", + Type: DeviceTypeGPU, + PCIAddress: "0000:01:00.0", + VendorID: "10de", + DeviceID: "1234", + AttachedTo: &runningInstanceID, + CreatedAt: time.Now(), + } + + // Device 2: Attached to stopped instance - should be cleared + device2 := &Device{ + Id: "device-2", + Name: "gpu-2", + Type: DeviceTypeGPU, + PCIAddress: "0000:02:00.0", + VendorID: "10de", + DeviceID: "5678", + AttachedTo: &stoppedInstanceID, + CreatedAt: time.Now(), + } + + // Device 3: Attached to non-existent instance - should be cleared + device3 := &Device{ + Id: "device-3", + Name: "gpu-3", + Type: DeviceTypeGPU, + PCIAddress: "0000:03:00.0", + VendorID: "10de", + DeviceID: "9abc", + AttachedTo: &orphanedInstanceID, + CreatedAt: time.Now(), + } + + // Device 4: Not attached - should stay unattached + device4 := &Device{ + Id: "device-4", + Name: "gpu-4", + Type: DeviceTypeGPU, + PCIAddress: "0000:04:00.0", + VendorID: "10de", + DeviceID: "def0", + AttachedTo: nil, + CreatedAt: time.Now(), + } + + createTestDevice(t, p, device1) + createTestDevice(t, p, device2) + createTestDevice(t, p, device3) + createTestDevice(t, p, device4) + + // Set up instance states + createTestInstanceDir(t, p, runningInstanceID) + createTestInstanceDir(t, p, stoppedInstanceID) + // Don't create orphanedInstanceID directory + + liveness.setRunning(runningInstanceID, true) + liveness.setRunning(stoppedInstanceID, false) + // orphanedInstanceID doesn't exist in liveness checker + + // Run reconciliation + err := mgr.ReconcileDevices(ctx) + require.NoError(t, err) + + // Verify device 1 stays attached (running instance) + d1, err := mgr.loadDevice("device-1") + require.NoError(t, err) + require.NotNil(t, d1.AttachedTo) + assert.Equal(t, runningInstanceID, *d1.AttachedTo) + + // Verify device 2 is cleared (stopped instance) + d2, err := mgr.loadDevice("device-2") + require.NoError(t, err) + assert.Nil(t, d2.AttachedTo) + + // Verify device 3 is cleared (orphaned instance) + d3, err := mgr.loadDevice("device-3") + require.NoError(t, err) + assert.Nil(t, d3.AttachedTo) + + // Verify device 4 stays unattached + d4, err := mgr.loadDevice("device-4") + require.NoError(t, err) + assert.Nil(t, d4.AttachedTo) +} + +func TestSetLivenessChecker(t *testing.T) { + mgr, _, _ := setupTestManager(t) + + // Initially nil + assert.Nil(t, mgr.livenessChecker) + + // Set liveness checker + liveness := newMockLivenessChecker() + mgr.SetLivenessChecker(liveness) + + // Verify it was set + assert.Equal(t, liveness, mgr.livenessChecker) +} + +func TestIsInstanceOrphaned_NoLivenessChecker(t *testing.T) { + mgr, p, _ := setupTestManager(t) + ctx := context.Background() + + existingInstanceID := "existing-instance" + missingInstanceID := "missing-instance" + + // Create one instance directory + createTestInstanceDir(t, p, existingInstanceID) + + // Existing instance is NOT orphaned + assert.False(t, mgr.isInstanceOrphaned(ctx, existingInstanceID)) + + // Missing instance IS orphaned + assert.True(t, mgr.isInstanceOrphaned(ctx, missingInstanceID)) +} + +func TestIsInstanceOrphaned_WithLivenessChecker(t *testing.T) { + mgr, p, _ := setupTestManager(t) + ctx := context.Background() + + // Set up liveness checker + liveness := newMockLivenessChecker() + mgr.livenessChecker = liveness + + runningInstanceID := "running-instance" + stoppedInstanceID := "stopped-instance" + + // Both instances have directories + createTestInstanceDir(t, p, runningInstanceID) + createTestInstanceDir(t, p, stoppedInstanceID) + + liveness.setRunning(runningInstanceID, true) + liveness.setRunning(stoppedInstanceID, false) + + // Running instance is NOT orphaned + assert.False(t, mgr.isInstanceOrphaned(ctx, runningInstanceID)) + + // Stopped instance IS orphaned (even though directory exists) + assert.True(t, mgr.isInstanceOrphaned(ctx, stoppedInstanceID)) +} + +func TestReconcileDevices_NoDevicesDirectory(t *testing.T) { + tmpDir := t.TempDir() + p := paths.New(tmpDir) + + // Don't create devices directory + + mgr := &manager{ + paths: p, + vfioBinder: NewVFIOBinder(), + } + + ctx := context.Background() + + // Should not error when directory doesn't exist + err := mgr.ReconcileDevices(ctx) + require.NoError(t, err) +} + +func TestReconcileStats(t *testing.T) { + // Verify stats struct has expected fields + stats := reconcileStats{} + + stats.orphanedCleared = 1 + stats.resetAttempted = 2 + stats.resetSucceeded = 3 + stats.resetFailed = 4 + stats.mismatches = 5 + stats.suspiciousVMM = 6 + stats.errors = 7 + + assert.Equal(t, 1, stats.orphanedCleared) + assert.Equal(t, 2, stats.resetAttempted) + assert.Equal(t, 3, stats.resetSucceeded) + assert.Equal(t, 4, stats.resetFailed) + assert.Equal(t, 5, stats.mismatches) + assert.Equal(t, 6, stats.suspiciousVMM) + assert.Equal(t, 7, stats.errors) +} + +// TestResetOrphanedDevice_NonExistentPCIAddress tests that reset-lite +// handles non-existent PCI addresses gracefully (doesn't panic) +func TestResetOrphanedDevice_NonExistentPCIAddress(t *testing.T) { + mgr, p, _ := setupTestManager(t) + ctx := context.Background() + + // Create device with fake PCI address that doesn't exist + device := &Device{ + Id: "test-device", + Name: "test-gpu", + Type: DeviceTypeGPU, + PCIAddress: "0000:ff:ff.f", // Non-existent + VendorID: "10de", // NVIDIA vendor ID + DeviceID: "1234", + BoundToVFIO: true, // Claim it's bound to VFIO + CreatedAt: time.Now(), + } + createTestDevice(t, p, device) + + stats := &reconcileStats{} + + // Should not panic, should handle errors gracefully + mgr.resetOrphanedDevice(ctx, device, stats) + + // Reset was attempted + assert.Equal(t, 1, stats.resetAttempted) + + // May fail due to non-existent device, that's expected + // The key is it doesn't panic +} + +// Helper function for testing: verify device directory structure +func verifyDeviceDir(t *testing.T, p *paths.Paths, deviceID string) bool { + t.Helper() + metadataPath := p.DeviceMetadata(deviceID) + _, err := os.Stat(metadataPath) + return err == nil +} + +// TestReconcileDevices_CorruptedDeviceMetadata tests handling of +// corrupted device metadata files +func TestReconcileDevices_CorruptedDeviceMetadata(t *testing.T) { + mgr, p, _ := setupTestManager(t) + ctx := context.Background() + + // Create a valid device + validDevice := &Device{ + Id: "valid-device", + Name: "valid-gpu", + Type: DeviceTypeGPU, + PCIAddress: "0000:01:00.0", + VendorID: "10de", + DeviceID: "1234", + CreatedAt: time.Now(), + } + createTestDevice(t, p, validDevice) + + // Create a corrupted device directory with invalid JSON + corruptedID := "corrupted-device" + corruptedDir := p.DeviceDir(corruptedID) + require.NoError(t, os.MkdirAll(corruptedDir, 0755)) + corruptedPath := filepath.Join(corruptedDir, "metadata.json") + require.NoError(t, os.WriteFile(corruptedPath, []byte("not valid json{{{"), 0644)) + + // Should not error - should skip corrupted device and continue + err := mgr.ReconcileDevices(ctx) + require.NoError(t, err) + + // Valid device should still be loadable + d, err := mgr.loadDevice("valid-device") + require.NoError(t, err) + assert.Equal(t, "valid-gpu", d.Name) +} + diff --git a/lib/devices/scripts/gpu-reset.sh b/lib/devices/scripts/gpu-reset.sh new file mode 100755 index 0000000..37006f7 --- /dev/null +++ b/lib/devices/scripts/gpu-reset.sh @@ -0,0 +1,178 @@ +#!/bin/bash +# +# gpu-reset.sh - Reset GPU state after failed passthrough tests or hangs +# +# This script handles common GPU recovery scenarios: +# 1. Killing any stuck cloud-hypervisor processes holding the GPU +# 2. Unbinding from vfio-pci if still bound +# 3. Clearing driver_override +# 4. Triggering driver probe to rebind to nvidia driver +# 5. Restarting nvidia-persistenced +# +# Usage: +# sudo ./gpu-reset.sh # Reset all NVIDIA GPUs +# sudo ./gpu-reset.sh 0000:a2:00.0 # Reset specific GPU by PCI address +# +# Must be run as root. + +set -e + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +log_info() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# Check if running as root +if [[ $EUID -ne 0 ]]; then + log_error "This script must be run as root (sudo)" + exit 1 +fi + +# Get PCI address from argument or find all NVIDIA GPUs +if [[ -n "$1" ]]; then + PCI_ADDRESSES=("$1") +else + # Find all NVIDIA GPUs (vendor 10de) + PCI_ADDRESSES=() + for dev in /sys/bus/pci/devices/*; do + if [[ -f "$dev/vendor" ]]; then + vendor=$(cat "$dev/vendor" 2>/dev/null) + class=$(cat "$dev/class" 2>/dev/null) + # Check for NVIDIA vendor (0x10de) and display/3D controller class (0x03xxxx) + if [[ "$vendor" == "0x10de" && "$class" == 0x03* ]]; then + addr=$(basename "$dev") + PCI_ADDRESSES+=("$addr") + fi + fi + done +fi + +if [[ ${#PCI_ADDRESSES[@]} -eq 0 ]]; then + log_error "No NVIDIA GPUs found" + exit 1 +fi + +log_info "Found ${#PCI_ADDRESSES[@]} GPU(s) to reset: ${PCI_ADDRESSES[*]}" + +# Step 1: Kill any cloud-hypervisor processes that might be holding GPUs +log_info "Step 1: Checking for stuck cloud-hypervisor processes..." +if pgrep -f "cloud-hypervisor" > /dev/null 2>&1; then + log_warn "Found cloud-hypervisor processes, killing them..." + pkill -9 -f "cloud-hypervisor" 2>/dev/null || true + sleep 2 + if pgrep -f "cloud-hypervisor" > /dev/null 2>&1; then + log_error "Failed to kill cloud-hypervisor processes" + ps aux | grep cloud-hypervisor | grep -v grep + else + log_info "Killed cloud-hypervisor processes" + fi +else + log_info "No cloud-hypervisor processes found" +fi + +# Process each GPU +for PCI_ADDR in "${PCI_ADDRESSES[@]}"; do + log_info "Processing GPU at $PCI_ADDR..." + + DEVICE_PATH="/sys/bus/pci/devices/$PCI_ADDR" + + if [[ ! -d "$DEVICE_PATH" ]]; then + log_error "Device $PCI_ADDR not found at $DEVICE_PATH" + continue + fi + + # Get current driver + CURRENT_DRIVER="" + if [[ -L "$DEVICE_PATH/driver" ]]; then + CURRENT_DRIVER=$(basename "$(readlink "$DEVICE_PATH/driver")") + fi + log_info " Current driver: ${CURRENT_DRIVER:-none}" + + # Step 2: If bound to vfio-pci, unbind + if [[ "$CURRENT_DRIVER" == "vfio-pci" ]]; then + log_info " Step 2: Unbinding from vfio-pci..." + echo "$PCI_ADDR" > /sys/bus/pci/drivers/vfio-pci/unbind 2>/dev/null || true + sleep 1 + else + log_info " Step 2: Not bound to vfio-pci, skipping unbind" + fi + + # Step 3: Clear driver_override + log_info " Step 3: Clearing driver_override..." + if [[ -f "$DEVICE_PATH/driver_override" ]]; then + OVERRIDE=$(cat "$DEVICE_PATH/driver_override" 2>/dev/null) + if [[ -n "$OVERRIDE" && "$OVERRIDE" != "(null)" ]]; then + log_info " Current override: $OVERRIDE" + echo > "$DEVICE_PATH/driver_override" 2>/dev/null || true + log_info " Cleared driver_override" + else + log_info " No driver_override set" + fi + fi + + # Step 4: Trigger driver probe to rebind to nvidia + log_info " Step 4: Triggering driver probe..." + echo "$PCI_ADDR" > /sys/bus/pci/drivers_probe 2>/dev/null || true + sleep 2 + + # Check new driver + NEW_DRIVER="" + if [[ -L "$DEVICE_PATH/driver" ]]; then + NEW_DRIVER=$(basename "$(readlink "$DEVICE_PATH/driver")") + fi + log_info " New driver: ${NEW_DRIVER:-none}" + + if [[ "$NEW_DRIVER" == "nvidia" ]]; then + log_info " ✓ GPU successfully rebound to nvidia driver" + elif [[ -z "$NEW_DRIVER" ]]; then + log_warn " GPU has no driver bound - may need manual intervention or reboot" + else + log_warn " GPU bound to $NEW_DRIVER (expected nvidia)" + fi +done + +# Step 5: Restart nvidia-persistenced +log_info "Step 5: Restarting nvidia-persistenced..." +if systemctl is-active nvidia-persistenced > /dev/null 2>&1; then + log_info " nvidia-persistenced is already running" +else + if systemctl start nvidia-persistenced 2>/dev/null; then + log_info " Started nvidia-persistenced" + else + log_warn " Failed to start nvidia-persistenced (may not be installed or GPU not ready)" + fi +fi + +# Final verification +log_info "" +log_info "=== Final GPU State ===" +for PCI_ADDR in "${PCI_ADDRESSES[@]}"; do + echo "" + lspci -nnks "$PCI_ADDR" 2>/dev/null || echo "Could not query $PCI_ADDR" +done + +echo "" +log_info "=== nvidia-smi ===" +if command -v nvidia-smi &> /dev/null; then + nvidia-smi 2>&1 | head -20 || log_warn "nvidia-smi failed (GPU may need more time or reboot)" +else + log_warn "nvidia-smi not found" +fi + +echo "" +log_info "GPU reset complete!" +log_info "If GPUs are still in a bad state, a system reboot may be required." + diff --git a/lib/devices/testdata/ollama-cuda/Dockerfile b/lib/devices/testdata/ollama-cuda/Dockerfile new file mode 100644 index 0000000..d31107f --- /dev/null +++ b/lib/devices/testdata/ollama-cuda/Dockerfile @@ -0,0 +1,29 @@ +# Minimal CUDA image for GPU inference testing +# +# NO NVIDIA DRIVER INSTALLATION NEEDED! +# hypeman automatically injects the matching driver libraries at VM boot time. +# See lib/devices/GPU.md for documentation on driver injection. +# +# This image demonstrates that standard CUDA runtime images work out of the box +# with hypeman's GPU passthrough - no driver version matching required. + +FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04 + +# Install dependencies and Ollama +# Note: We use the runtime image (not devel) since we don't need CUDA compilation tools +RUN apt-get update && \ + apt-get install -y curl ca-certificates python3 && \ + curl -fsSL https://ollama.com/install.sh | sh && \ + rm -rf /var/lib/apt/lists/* + +# Add test scripts for verifying GPU access +COPY test-nvml.py /usr/local/bin/test-nvml.py +COPY test-cuda.py /usr/local/bin/test-cuda.py +RUN chmod +x /usr/local/bin/test-nvml.py /usr/local/bin/test-cuda.py + +# Ensure libraries are in the path (hypeman injects to /usr/lib/x86_64-linux-gnu) +ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH} +ENV PATH=/usr/local/cuda/bin:/usr/bin:${PATH} + +EXPOSE 11434 +CMD ["ollama", "serve"] diff --git a/lib/devices/testdata/ollama-cuda/test-cuda.py b/lib/devices/testdata/ollama-cuda/test-cuda.py new file mode 100644 index 0000000..9e9c0eb --- /dev/null +++ b/lib/devices/testdata/ollama-cuda/test-cuda.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +"""Test basic CUDA operations.""" +import ctypes +import os +import sys + +def test_cuda(): + """Try to use the CUDA driver API.""" + print("=== CUDA Driver Test ===") + print(f"LD_LIBRARY_PATH: {os.environ.get('LD_LIBRARY_PATH', 'not set')}") + + # Try loading libcuda + try: + cuda = ctypes.CDLL("libcuda.so") + print("✓ Loaded libcuda.so") + except OSError as e: + print(f"✗ Failed to load libcuda.so: {e}") + return False + + # Initialize CUDA + ret = cuda.cuInit(0) + if ret != 0: + print(f"✗ cuInit failed with code: {ret}") + return False + print("✓ cuInit succeeded") + + # Get device count + count = ctypes.c_int() + ret = cuda.cuDeviceGetCount(ctypes.byref(count)) + if ret != 0: + print(f"✗ cuDeviceGetCount failed with code: {ret}") + return False + print(f"✓ Found {count.value} CUDA device(s)") + + if count.value == 0: + return False + + # Get device name + device = ctypes.c_int() + ret = cuda.cuDeviceGet(ctypes.byref(device), 0) + if ret != 0: + print(f"✗ cuDeviceGet failed: {ret}") + return False + + name = ctypes.create_string_buffer(256) + ret = cuda.cuDeviceGetName(name, 256, device) + if ret == 0: + print(f"✓ Device 0: {name.value.decode()}") + + # Get total memory + total_mem = ctypes.c_size_t() + ret = cuda.cuDeviceTotalMem_v2(ctypes.byref(total_mem), device) + if ret == 0: + print(f"✓ Total memory: {total_mem.value / (1024**3):.1f} GB") + + return True + +if __name__ == "__main__": + success = test_cuda() + print() + print("Result:", "CUDA WORKS" if success else "CUDA FAILED") + sys.exit(0 if success else 1) + diff --git a/lib/devices/testdata/ollama-cuda/test-nvml.py b/lib/devices/testdata/ollama-cuda/test-nvml.py new file mode 100644 index 0000000..42e9882 --- /dev/null +++ b/lib/devices/testdata/ollama-cuda/test-nvml.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 +"""Test NVML GPU detection - matches what Ollama does internally.""" +import ctypes +import os + +def test_nvml(): + """Try to initialize NVML and detect GPUs.""" + # Try different library paths + lib_paths = [ + "libnvidia-ml.so.1", + "libnvidia-ml.so", + "/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1", + ] + + nvml = None + for path in lib_paths: + try: + nvml = ctypes.CDLL(path) + print(f"✓ Loaded NVML from: {path}") + break + except OSError as e: + print(f"✗ Failed to load {path}: {e}") + + if nvml is None: + print("ERROR: Could not load NVML library") + return False + + # Try to initialize + try: + ret = nvml.nvmlInit_v2() + if ret != 0: + print(f"✗ nvmlInit_v2 failed with code: {ret}") + # Error codes: 1=uninitialized, 2=invalid argument, 3=not supported, + # 9=driver not loaded, 12=library not found + error_names = { + 1: "NVML_ERROR_UNINITIALIZED", + 2: "NVML_ERROR_INVALID_ARGUMENT", + 3: "NVML_ERROR_NOT_SUPPORTED", + 9: "NVML_ERROR_DRIVER_NOT_LOADED", + 12: "NVML_ERROR_LIB_RM_VERSION_MISMATCH", + 255: "NVML_ERROR_UNKNOWN", + } + print(f" Error name: {error_names.get(ret, 'UNKNOWN')}") + return False + print("✓ nvmlInit_v2 succeeded") + except Exception as e: + print(f"✗ nvmlInit_v2 exception: {e}") + return False + + # Get device count + try: + count = ctypes.c_uint() + ret = nvml.nvmlDeviceGetCount_v2(ctypes.byref(count)) + if ret != 0: + print(f"✗ nvmlDeviceGetCount failed with code: {ret}") + return False + print(f"✓ Found {count.value} GPU(s)") + except Exception as e: + print(f"✗ nvmlDeviceGetCount exception: {e}") + return False + + # Shutdown + nvml.nvmlShutdown() + return count.value > 0 + +if __name__ == "__main__": + print("=== NVML GPU Detection Test ===") + print(f"LD_LIBRARY_PATH: {os.environ.get('LD_LIBRARY_PATH', 'not set')}") + print() + + # Check device nodes + print("Device nodes:") + for dev in ["/dev/nvidia0", "/dev/nvidiactl", "/dev/nvidia-uvm"]: + exists = os.path.exists(dev) + print(f" {dev}: {'exists' if exists else 'MISSING'}") + print() + + success = test_nvml() + print() + print("Result:", "GPU DETECTED" if success else "NO GPU FOUND") + exit(0 if success else 1) + + diff --git a/lib/devices/types.go b/lib/devices/types.go new file mode 100644 index 0000000..ca7b68e --- /dev/null +++ b/lib/devices/types.go @@ -0,0 +1,56 @@ +package devices + +import ( + "regexp" + "time" +) + +// DeviceType represents the type of PCI device +type DeviceType string + +const ( + DeviceTypeGPU DeviceType = "gpu" + DeviceTypeGeneric DeviceType = "pci" +) + +// Device represents a registered PCI device for passthrough +type Device struct { + Id string `json:"id"` // cuid2 identifier + Name string `json:"name"` // user-provided globally unique name + Type DeviceType `json:"type"` // gpu or pci + PCIAddress string `json:"pci_address"` // e.g., "0000:a2:00.0" + VendorID string `json:"vendor_id"` // e.g., "10de" + DeviceID string `json:"device_id"` // e.g., "27b8" + IOMMUGroup int `json:"iommu_group"` // IOMMU group number + BoundToVFIO bool `json:"bound_to_vfio"` // whether device is bound to vfio-pci + AttachedTo *string `json:"attached_to"` // instance ID if attached, nil otherwise + CreatedAt time.Time `json:"created_at"` +} + +// CreateDeviceRequest is the request to register a new device +type CreateDeviceRequest struct { + Name string `json:"name,omitempty"` // optional: globally unique name (auto-generated if not provided) + PCIAddress string `json:"pci_address"` // required: PCI address (e.g., "0000:a2:00.0") +} + +// AvailableDevice represents a PCI device discovered on the host +type AvailableDevice struct { + PCIAddress string `json:"pci_address"` + VendorID string `json:"vendor_id"` + DeviceID string `json:"device_id"` + VendorName string `json:"vendor_name"` + DeviceName string `json:"device_name"` + IOMMUGroup int `json:"iommu_group"` + CurrentDriver *string `json:"current_driver"` // nil if no driver bound +} + +// DeviceNamePattern is the regex pattern for valid device names +// Must start with alphanumeric, followed by alphanumeric, underscore, dot, or dash +var DeviceNamePattern = regexp.MustCompile(`^[a-zA-Z0-9][a-zA-Z0-9_.-]+$`) + +// ValidateDeviceName validates that a device name matches the required pattern +func ValidateDeviceName(name string) bool { + return DeviceNamePattern.MatchString(name) +} + + diff --git a/lib/devices/vfio.go b/lib/devices/vfio.go new file mode 100644 index 0000000..38606f5 --- /dev/null +++ b/lib/devices/vfio.go @@ -0,0 +1,310 @@ +package devices + +import ( + "fmt" + "log/slog" + "os" + "os/exec" + "path/filepath" + "strings" + "time" +) + +const ( + vfioDriverPath = "/sys/bus/pci/drivers/vfio-pci" + pciDriversPath = "/sys/bus/pci/drivers" + vfioDevicePath = "/dev/vfio" +) + +// VFIOBinder handles binding and unbinding devices to/from VFIO +type VFIOBinder struct{} + +// NewVFIOBinder creates a new VFIOBinder +func NewVFIOBinder() *VFIOBinder { + return &VFIOBinder{} +} + +// IsVFIOAvailable checks if VFIO is available on the system +func (v *VFIOBinder) IsVFIOAvailable() bool { + _, err := os.Stat(vfioDriverPath) + return err == nil +} + +// IsDeviceBoundToVFIO checks if a device is currently bound to vfio-pci +func (v *VFIOBinder) IsDeviceBoundToVFIO(pciAddress string) bool { + driver := readCurrentDriver(pciAddress) + return driver != nil && *driver == "vfio-pci" +} + +// BindToVFIO binds a PCI device to the vfio-pci driver +// This requires: +// 1. Stopping any processes using the device (e.g., nvidia-persistenced for NVIDIA GPUs) +// 2. Unbinding the device from its current driver (if any) +// 3. Binding it to vfio-pci +func (v *VFIOBinder) BindToVFIO(pciAddress string) error { + if !v.IsVFIOAvailable() { + return ErrVFIONotAvailable + } + + if v.IsDeviceBoundToVFIO(pciAddress) { + return ErrAlreadyBound + } + + // Get device info for vendor/device IDs + deviceInfo, err := GetDeviceInfo(pciAddress) + if err != nil { + return fmt.Errorf("get device info: %w", err) + } + + // For NVIDIA GPUs, stop nvidia-persistenced which holds the device open + // This is required because the service keeps /dev/nvidia* open, blocking driver unbind + isNvidia := deviceInfo.VendorID == "10de" + stoppedNvidiaPersistenced := false + if isNvidia { + if err := v.stopNvidiaPersistenced(); err != nil { + slog.Warn("failed to stop nvidia-persistenced", "error", err) + // Continue anyway - it might not be running + } else { + stoppedNvidiaPersistenced = true + } + } + + // Use defer to ensure nvidia-persistenced is restarted on any error + // after we successfully stopped it + bindSucceeded := false + defer func() { + if stoppedNvidiaPersistenced && !bindSucceeded { + _ = v.startNvidiaPersistenced() + } + }() + + // Unbind from current driver if bound + currentDriver := readCurrentDriver(pciAddress) + if currentDriver != nil && *currentDriver != "" { + if err := v.unbindFromDriver(pciAddress, *currentDriver); err != nil { + return fmt.Errorf("unbind from %s: %w", *currentDriver, err) + } + } + + // Override driver to vfio-pci + if err := v.setDriverOverride(pciAddress, "vfio-pci"); err != nil { + return fmt.Errorf("set driver override: %w", err) + } + + // Bind to vfio-pci using the bind method (more reliable than new_id) + if err := v.bindDeviceToVFIO(pciAddress); err != nil { + return fmt.Errorf("bind to vfio-pci: %w", err) + } + + bindSucceeded = true + return nil +} + +// UnbindFromVFIO unbinds a device from vfio-pci and restores the original driver +func (v *VFIOBinder) UnbindFromVFIO(pciAddress string) error { + if !v.IsDeviceBoundToVFIO(pciAddress) { + return ErrNotBound + } + + // Get device info to check if it's NVIDIA + deviceInfo, err := GetDeviceInfo(pciAddress) + if err != nil { + return fmt.Errorf("get device info: %w", err) + } + isNvidia := deviceInfo.VendorID == "10de" + + // Clear driver override first + if err := v.setDriverOverride(pciAddress, ""); err != nil { + // Non-fatal, continue with unbind + } + + // Unbind from vfio-pci + if err := v.unbindFromDriver(pciAddress, "vfio-pci"); err != nil { + return fmt.Errorf("unbind from vfio-pci: %w", err) + } + + // Trigger driver probe to rebind to original driver + if err := v.triggerDriverProbe(pciAddress); err != nil { + slog.Warn("failed to trigger driver probe", "pci_address", pciAddress, "error", err) + } + + // For NVIDIA GPUs, restart nvidia-persistenced after rebinding + if isNvidia { + if err := v.startNvidiaPersistenced(); err != nil { + slog.Warn("failed to start nvidia-persistenced", "error", err) + } + } + + return nil +} + +// unbindFromDriver unbinds a device from its current driver +func (v *VFIOBinder) unbindFromDriver(pciAddress, driver string) error { + unbindPath := filepath.Join(pciDriversPath, driver, "unbind") + return os.WriteFile(unbindPath, []byte(pciAddress), 0200) +} + +// setDriverOverride sets the driver_override for a device +func (v *VFIOBinder) setDriverOverride(pciAddress, driver string) error { + overridePath := filepath.Join(sysfsDevicesPath, pciAddress, "driver_override") + + // Empty string clears the override + content := driver + if driver == "" { + content = "\n" // Writing newline clears the override + } + + return os.WriteFile(overridePath, []byte(content), 0200) +} + + +// bindDeviceToVFIO binds a specific device to vfio-pci using bind +func (v *VFIOBinder) bindDeviceToVFIO(pciAddress string) error { + bindPath := filepath.Join(vfioDriverPath, "bind") + return os.WriteFile(bindPath, []byte(pciAddress), 0200) +} + +// triggerDriverProbe triggers the kernel to probe for drivers for a device +func (v *VFIOBinder) triggerDriverProbe(pciAddress string) error { + probePath := "/sys/bus/pci/drivers_probe" + return os.WriteFile(probePath, []byte(pciAddress), 0200) +} + +// stopNvidiaPersistenced stops the nvidia-persistenced service +// This service keeps /dev/nvidia* open and blocks driver unbind +func (v *VFIOBinder) stopNvidiaPersistenced() error { + slog.Debug("stopping nvidia-persistenced service") + + // Try systemctl first (works as root) + cmd := exec.Command("systemctl", "stop", "nvidia-persistenced") + if err := cmd.Run(); err == nil { + return nil + } + + // Fall back to killing the process directly (works with CAP_KILL or as root) + // This is less clean but allows running with capabilities instead of full root + cmd = exec.Command("pkill", "-TERM", "nvidia-persistenced") + if err := cmd.Run(); err != nil { + // Check if process even exists + checkCmd := exec.Command("pgrep", "nvidia-persistenced") + if checkCmd.Run() != nil { + // Process doesn't exist, that's fine + return nil + } + return fmt.Errorf("failed to stop nvidia-persistenced (try: sudo systemctl stop nvidia-persistenced)") + } + + // Wait for process to exit with polling instead of arbitrary sleep + return v.waitForProcessExit("nvidia-persistenced", 2*time.Second) +} + +// waitForProcessExit polls for a process to exit, with timeout +func (v *VFIOBinder) waitForProcessExit(processName string, timeout time.Duration) error { + deadline := time.Now().Add(timeout) + pollInterval := 100 * time.Millisecond + + for time.Now().Before(deadline) { + checkCmd := exec.Command("pgrep", processName) + if checkCmd.Run() != nil { + // Process no longer exists + return nil + } + time.Sleep(pollInterval) + } + + // Timeout - process still running + slog.Warn("timeout waiting for process to exit", "process", processName, "timeout", timeout) + return nil // Continue anyway, the bind might still work +} + +// startNvidiaPersistenced starts the nvidia-persistenced service +func (v *VFIOBinder) startNvidiaPersistenced() error { + slog.Debug("starting nvidia-persistenced service") + + // Try systemctl first (works as root) + cmd := exec.Command("systemctl", "start", "nvidia-persistenced") + if err := cmd.Run(); err != nil { + // If we can't start it, just log - not critical for test cleanup + slog.Warn("could not restart nvidia-persistenced", "error", err) + } + return nil +} + +// GetVFIOGroupPath returns the path to the VFIO group device for a PCI device +func (v *VFIOBinder) GetVFIOGroupPath(pciAddress string) (string, error) { + iommuGroup, err := readIOMMUGroup(pciAddress) + if err != nil { + return "", fmt.Errorf("read iommu group: %w", err) + } + + groupPath := filepath.Join(vfioDevicePath, fmt.Sprintf("%d", iommuGroup)) + if _, err := os.Stat(groupPath); os.IsNotExist(err) { + return "", fmt.Errorf("vfio group device not found: %s", groupPath) + } + + return groupPath, nil +} + +// CheckIOMMUGroupSafe checks if all devices in the IOMMU group are safe to pass through +// Returns an error if there are other devices in the group that aren't being passed through +func (v *VFIOBinder) CheckIOMMUGroupSafe(pciAddress string, allowedDevices []string) error { + iommuGroup, err := readIOMMUGroup(pciAddress) + if err != nil { + return fmt.Errorf("read iommu group: %w", err) + } + + groupDevices, err := GetIOMMUGroupDevices(iommuGroup) + if err != nil { + return fmt.Errorf("get iommu group devices: %w", err) + } + + // Build a set of allowed devices + allowed := make(map[string]bool) + for _, addr := range allowedDevices { + allowed[addr] = true + } + + // Check each device in the group + for _, device := range groupDevices { + if allowed[device] { + continue + } + + // Check if device is already bound to vfio-pci or is a bridge + driver := readCurrentDriver(device) + if driver != nil && *driver == "vfio-pci" { + continue + } + + // Check if it's a PCI bridge (these are usually okay to leave) + if v.isPCIBridge(device) { + continue + } + + // Found a device that's not allowed and not safe + return fmt.Errorf("%w: device %s in IOMMU group %d is not included", + ErrIOMMUGroupConflict, device, iommuGroup) + } + + return nil +} + +// isPCIBridge checks if a device is a PCI bridge +func (v *VFIOBinder) isPCIBridge(pciAddress string) bool { + classPath := filepath.Join(sysfsDevicesPath, pciAddress, "class") + classCode, err := readSysfsFile(classPath) + if err != nil { + return false + } + + classCode = strings.TrimPrefix(classCode, "0x") + // Class 06 = Bridge, Subclass 04 = PCI bridge + return len(classCode) >= 4 && classCode[:2] == "06" +} + +// GetDeviceSysfsPath returns the sysfs path for a PCI device (used by cloud-hypervisor) +func GetDeviceSysfsPath(pciAddress string) string { + return filepath.Join(sysfsDevicesPath, pciAddress) + "/" +} + + diff --git a/lib/instances/configdisk.go b/lib/instances/configdisk.go index dfc6bdb..fb9305a 100644 --- a/lib/instances/configdisk.go +++ b/lib/instances/configdisk.go @@ -1,6 +1,7 @@ package instances import ( + "context" "encoding/json" "fmt" "os" @@ -8,6 +9,7 @@ import ( "strconv" "strings" + "github.com/onkernel/hypeman/lib/devices" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/network" ) @@ -16,7 +18,7 @@ import ( // The disk contains: // - /config.sh - Shell script sourced by init // - /metadata.json - JSON metadata for programmatic access -func (m *manager) createConfigDisk(inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) error { +func (m *manager) createConfigDisk(ctx context.Context, inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) error { // Create temporary directory for config files tmpDir, err := os.MkdirTemp("", "hypeman-config-*") if err != nil { @@ -25,7 +27,7 @@ func (m *manager) createConfigDisk(inst *Instance, imageInfo *images.Image, netC defer os.RemoveAll(tmpDir) // Generate config.sh - configScript := m.generateConfigScript(inst, imageInfo, netConfig) + configScript := m.generateConfigScript(ctx, inst, imageInfo, netConfig) configPath := filepath.Join(tmpDir, "config.sh") if err := os.WriteFile(configPath, []byte(configScript), 0644); err != nil { return fmt.Errorf("write config.sh: %w", err) @@ -53,7 +55,7 @@ func (m *manager) createConfigDisk(inst *Instance, imageInfo *images.Image, netC // Create ext4 disk with config files // Use ext4 for now (can switch to erofs when kernel supports it) diskPath := m.paths.InstanceConfigDisk(inst.Id) - + // Calculate size (config files are tiny, use 1MB minimum) _, err = images.ExportRootfs(tmpDir, diskPath, images.FormatExt4) if err != nil { @@ -64,32 +66,32 @@ func (m *manager) createConfigDisk(inst *Instance, imageInfo *images.Image, netC } // generateConfigScript creates the shell script that will be sourced by init -func (m *manager) generateConfigScript(inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) string { +func (m *manager) generateConfigScript(ctx context.Context, inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) string { // Prepare entrypoint value entrypoint := "" if len(imageInfo.Entrypoint) > 0 { entrypoint = shellQuoteArray(imageInfo.Entrypoint) } - + // Prepare cmd value cmd := "" if len(imageInfo.Cmd) > 0 { cmd = shellQuoteArray(imageInfo.Cmd) } - + // Prepare workdir value workdir := shellQuote("/") if imageInfo.WorkingDir != "" { workdir = shellQuote(imageInfo.WorkingDir) } - + // Build environment variable exports var envLines strings.Builder mergedEnv := mergeEnv(imageInfo.Env, inst.Env) for key, value := range mergedEnv { envLines.WriteString(fmt.Sprintf("export %s=%s\n", key, shellQuote(value))) } - + // Build network configuration section // Use netConfig directly instead of trying to derive it (VM hasn't started yet) networkSection := "" @@ -105,6 +107,17 @@ GUEST_DNS="%s" `, netConfig.IP, cidr, netConfig.Gateway, netConfig.DNS) } + // GPU passthrough configuration + // Only set HAS_GPU=1 if at least one attached device is actually a GPU + gpuSection := "" + for _, deviceID := range inst.Devices { + device, err := m.deviceManager.GetDevice(ctx, deviceID) + if err == nil && device.Type == devices.DeviceTypeGPU { + gpuSection = "\n# GPU passthrough\nHAS_GPU=1\n" + break + } + } + // Build volume mounts section // Volumes are attached as /dev/vdd, /dev/vde, etc. (after vda=rootfs, vdb=overlay, vdc=config) // For overlay volumes, two devices are used: base + overlay disk @@ -137,7 +150,7 @@ GUEST_DNS="%s" volumeLines.WriteString("\"\n") volumeSection = volumeLines.String() } - + // Generate script as a readable template block // ENTRYPOINT and CMD contain shell-quoted arrays that will be eval'd in init script := fmt.Sprintf(`#!/bin/sh @@ -149,7 +162,7 @@ CMD="%s" WORKDIR=%s # Environment variables -%s%s%s`, +%s%s%s%s`, inst.Id, entrypoint, cmd, @@ -157,8 +170,9 @@ WORKDIR=%s envLines.String(), networkSection, volumeSection, + gpuSection, ) - + return script } diff --git a/lib/instances/create.go b/lib/instances/create.go index 0e02307..f133c0f 100644 --- a/lib/instances/create.go +++ b/lib/instances/create.go @@ -9,6 +9,7 @@ import ( "time" "github.com/nrednav/cuid2" + "github.com/onkernel/hypeman/lib/devices" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/logger" "github.com/onkernel/hypeman/lib/network" @@ -141,7 +142,7 @@ func (m *manager) createInstance( return nil, ErrAlreadyExists } - // 5. Apply defaults + // 6. Apply defaults size := req.Size if size == 0 { size = 1 * 1024 * 1024 * 1024 // 1GB default @@ -191,16 +192,70 @@ func (m *manager) createInstance( req.Env = make(map[string]string) } - // 6. Determine network based on NetworkEnabled flag + // 7. Determine network based on NetworkEnabled flag networkName := "" if req.NetworkEnabled { networkName = "default" } - // 7. Get default kernel version + // 8. Get default kernel version kernelVer := m.systemManager.GetDefaultKernelVersion() - // 8. Create instance metadata + // 9. Validate, resolve, and auto-bind devices (GPU passthrough) + // Track devices we've marked as attached for cleanup on error. + // The cleanup closure captures this slice by reference, so it will see + // whatever devices have been attached when cleanup runs. + var attachedDeviceIDs []string + var resolvedDeviceIDs []string + + // Setup cleanup stack early so device attachment errors trigger cleanup + cu := cleanup.Make(func() { + log.DebugContext(ctx, "cleaning up instance on error", "instance_id", id) + m.deleteInstanceData(id) + }) + defer cu.Clean() + + // Add device detachment cleanup - closure captures attachedDeviceIDs by reference + if m.deviceManager != nil { + cu.Add(func() { + for _, deviceID := range attachedDeviceIDs { + log.DebugContext(ctx, "detaching device on cleanup", "instance_id", id, "device", deviceID) + m.deviceManager.MarkDetached(ctx, deviceID) + } + }) + } + + if len(req.Devices) > 0 && m.deviceManager != nil { + for _, deviceRef := range req.Devices { + device, err := m.deviceManager.GetDevice(ctx, deviceRef) + if err != nil { + log.ErrorContext(ctx, "failed to get device", "device", deviceRef, "error", err) + return nil, fmt.Errorf("device %s: %w", deviceRef, err) + } + if device.AttachedTo != nil { + log.ErrorContext(ctx, "device already attached", "device", deviceRef, "instance", *device.AttachedTo) + return nil, fmt.Errorf("device %s is already attached to instance %s", deviceRef, *device.AttachedTo) + } + // Auto-bind to VFIO if not already bound + if !device.BoundToVFIO { + log.InfoContext(ctx, "auto-binding device to VFIO", "device", deviceRef, "pci_address", device.PCIAddress) + if err := m.deviceManager.BindToVFIO(ctx, device.Id); err != nil { + log.ErrorContext(ctx, "failed to bind device to VFIO", "device", deviceRef, "error", err) + return nil, fmt.Errorf("bind device %s to VFIO: %w", deviceRef, err) + } + } + // Mark device as attached to this instance + if err := m.deviceManager.MarkAttached(ctx, device.Id, id); err != nil { + log.ErrorContext(ctx, "failed to mark device as attached", "device", deviceRef, "error", err) + return nil, fmt.Errorf("mark device %s as attached: %w", deviceRef, err) + } + attachedDeviceIDs = append(attachedDeviceIDs, device.Id) + resolvedDeviceIDs = append(resolvedDeviceIDs, device.Id) + } + log.DebugContext(ctx, "validated devices for passthrough", "id", id, "devices", resolvedDeviceIDs) + } + + // 10. Create instance metadata stored := &StoredMetadata{ Id: id, Name: req.Name, @@ -220,30 +275,24 @@ func (m *manager) createInstance( DataDir: m.paths.InstanceDir(id), VsockCID: vsockCID, VsockSocket: vsockSocket, + Devices: resolvedDeviceIDs, } - // Setup cleanup stack for automatic rollback on errors - cu := cleanup.Make(func() { - log.DebugContext(ctx, "cleaning up instance on error", "instance_id", id) - m.deleteInstanceData(id) - }) - defer cu.Clean() - - // 8. Ensure directories + // 11. Ensure directories log.DebugContext(ctx, "creating instance directories", "instance_id", id) if err := m.ensureDirectories(id); err != nil { log.ErrorContext(ctx, "failed to create directories", "instance_id", id, "error", err) return nil, fmt.Errorf("ensure directories: %w", err) } - // 9. Create overlay disk with specified size + // 12. Create overlay disk with specified size log.DebugContext(ctx, "creating overlay disk", "instance_id", id, "size_bytes", stored.OverlaySize) if err := m.createOverlayDisk(id, stored.OverlaySize); err != nil { log.ErrorContext(ctx, "failed to create overlay disk", "instance_id", id, "error", err) return nil, fmt.Errorf("create overlay disk: %w", err) } - // 10. Allocate network (if network enabled) + // 13. Allocate network (if network enabled) var netConfig *network.NetworkConfig if networkName != "" { log.DebugContext(ctx, "allocating network", "instance_id", id, "network", networkName) @@ -268,7 +317,7 @@ func (m *manager) createInstance( }) } - // 10.5. Validate and attach volumes + // 14. Validate and attach volumes if len(req.Volumes) > 0 { log.DebugContext(ctx, "validating volumes", "instance_id", id, "count", len(req.Volumes)) for _, volAttach := range req.Volumes { @@ -308,15 +357,15 @@ func (m *manager) createInstance( stored.Volumes = req.Volumes } - // 11. Create config disk (needs Instance for buildVMConfig) + // 15. Create config disk (needs Instance for buildVMConfig) inst := &Instance{StoredMetadata: *stored} log.DebugContext(ctx, "creating config disk", "instance_id", id) - if err := m.createConfigDisk(inst, imageInfo, netConfig); err != nil { + if err := m.createConfigDisk(ctx, inst, imageInfo, netConfig); err != nil { log.ErrorContext(ctx, "failed to create config disk", "instance_id", id, "error", err) return nil, fmt.Errorf("create config disk: %w", err) } - // 12. Save metadata + // 16. Save metadata log.DebugContext(ctx, "saving instance metadata", "instance_id", id) meta := &metadata{StoredMetadata: *stored} if err := m.saveMetadata(meta); err != nil { @@ -324,14 +373,14 @@ func (m *manager) createInstance( return nil, fmt.Errorf("save metadata: %w", err) } - // 13. Start VMM and boot VM + // 17. Start VMM and boot VM log.InfoContext(ctx, "starting VMM and booting VM", "instance_id", id) if err := m.startAndBootVM(ctx, stored, imageInfo, netConfig); err != nil { log.ErrorContext(ctx, "failed to start and boot VM", "instance_id", id, "error", err) return nil, err } - // 14. Update timestamp after VM is running + // 18. Update timestamp after VM is running now := time.Now() stored.StartedAt = &now @@ -487,7 +536,7 @@ func (m *manager) startAndBootVM( // Build VM configuration matching Cloud Hypervisor VmConfig inst := &Instance{StoredMetadata: *stored} - vmConfig, err := m.buildVMConfig(inst, imageInfo, netConfig) + vmConfig, err := m.buildVMConfig(ctx, inst, imageInfo, netConfig) if err != nil { return fmt.Errorf("build vm config: %w", err) } @@ -537,7 +586,7 @@ func (m *manager) startAndBootVM( } // buildVMConfig creates the Cloud Hypervisor VmConfig -func (m *manager) buildVMConfig(inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) (vmm.VmConfig, error) { +func (m *manager) buildVMConfig(ctx context.Context, inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) (vmm.VmConfig, error) { // Get system file paths kernelPath, _ := m.systemManager.GetKernelPath(system.KernelVersion(inst.KernelVersion)) initrdPath, _ := m.systemManager.GetInitrdPath() @@ -644,6 +693,22 @@ func (m *manager) buildVMConfig(inst *Instance, imageInfo *images.Image, netConf Socket: inst.VsockSocket, } + // Device passthrough configuration (GPU, etc.) + var deviceConfigs *[]vmm.DeviceConfig + if len(inst.Devices) > 0 && m.deviceManager != nil { + configs := make([]vmm.DeviceConfig, 0, len(inst.Devices)) + for _, deviceID := range inst.Devices { + device, err := m.deviceManager.GetDevice(ctx, deviceID) + if err != nil { + return vmm.VmConfig{}, fmt.Errorf("get device %s: %w", deviceID, err) + } + configs = append(configs, vmm.DeviceConfig{ + Path: devices.GetDeviceSysfsPath(device.PCIAddress), + }) + } + deviceConfigs = &configs + } + return vmm.VmConfig{ Payload: payload, Cpus: &cpus, @@ -653,6 +718,7 @@ func (m *manager) buildVMConfig(inst *Instance, imageInfo *images.Image, netConf Console: &console, Net: nets, Vsock: &vsock, + Devices: deviceConfigs, }, nil } diff --git a/lib/instances/delete.go b/lib/instances/delete.go index 24d8ddb..06bc50c 100644 --- a/lib/instances/delete.go +++ b/lib/instances/delete.go @@ -59,7 +59,24 @@ func (m *manager) deleteInstance( } } - // 5. Detach volumes + // 5. Detach and auto-unbind devices from VFIO + if len(inst.Devices) > 0 && m.deviceManager != nil { + for _, deviceID := range inst.Devices { + log.DebugContext(ctx, "detaching device", "id", id, "device", deviceID) + // Mark device as detached + if err := m.deviceManager.MarkDetached(ctx, deviceID); err != nil { + log.WarnContext(ctx, "failed to mark device as detached", "id", id, "device", deviceID, "error", err) + } + // Auto-unbind from VFIO so native driver can reclaim it + log.InfoContext(ctx, "auto-unbinding device from VFIO", "id", id, "device", deviceID) + if err := m.deviceManager.UnbindFromVFIO(ctx, deviceID); err != nil { + // Log but continue - device might already be unbound or in use by another instance + log.WarnContext(ctx, "failed to unbind device from VFIO", "id", id, "device", deviceID, "error", err) + } + } + } + + // 5b. Detach volumes if len(inst.Volumes) > 0 { log.DebugContext(ctx, "detaching volumes", "instance_id", id, "count", len(inst.Volumes)) for _, volAttach := range inst.Volumes { diff --git a/lib/instances/exec_test.go b/lib/instances/exec_test.go index f0e6966..d3dbfde 100644 --- a/lib/instances/exec_test.go +++ b/lib/instances/exec_test.go @@ -75,7 +75,7 @@ func TestExecConcurrent(t *testing.T) { inst, err := manager.CreateInstance(ctx, CreateInstanceRequest{ Name: "exec-test", Image: "docker.io/library/nginx:alpine", - Size: 512 * 1024 * 1024, + Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, OverlaySize: 1024 * 1024 * 1024, Vcpus: 2, // More vCPUs for concurrency diff --git a/lib/instances/liveness.go b/lib/instances/liveness.go new file mode 100644 index 0000000..19d3d20 --- /dev/null +++ b/lib/instances/liveness.go @@ -0,0 +1,155 @@ +package instances + +import ( + "context" + "os/exec" + "strings" + + "github.com/onkernel/hypeman/lib/devices" + "github.com/onkernel/hypeman/lib/logger" +) + +// Ensure instanceLivenessAdapter implements the interface +var _ devices.InstanceLivenessChecker = (*instanceLivenessAdapter)(nil) + +// instanceLivenessAdapter adapts instances.Manager to devices.InstanceLivenessChecker +type instanceLivenessAdapter struct { + manager *manager +} + +// NewLivenessChecker creates a new InstanceLivenessChecker that wraps the instances manager. +// This adapter allows the devices package to query instance state without a circular import. +func NewLivenessChecker(m Manager) devices.InstanceLivenessChecker { + // Type assert to get the concrete manager type + mgr, ok := m.(*manager) + if !ok { + return nil + } + return &instanceLivenessAdapter{manager: mgr} +} + +// IsInstanceRunning returns true if the instance exists and is in a running state +// (i.e., has an active VMM process). Returns false if the instance doesn't exist +// or is stopped/standby/unknown. +func (a *instanceLivenessAdapter) IsInstanceRunning(ctx context.Context, instanceID string) bool { + if a.manager == nil { + return false + } + inst, err := a.manager.getInstance(ctx, instanceID) + if err != nil { + return false + } + + // Consider instance "running" if the VMM is active (any of these states means VM is using the device) + switch inst.State { + case StateRunning, StatePaused, StateCreated: + return true + default: + // StateStopped, StateStandby, StateShutdown, StateUnknown + return false + } +} + +// GetInstanceDevices returns the list of device IDs attached to an instance. +// Returns nil if the instance doesn't exist. +func (a *instanceLivenessAdapter) GetInstanceDevices(ctx context.Context, instanceID string) []string { + if a.manager == nil { + return nil + } + inst, err := a.manager.getInstance(ctx, instanceID) + if err != nil { + return nil + } + return inst.Devices +} + +// ListAllInstanceDevices returns a map of instanceID -> []deviceIDs for all instances. +func (a *instanceLivenessAdapter) ListAllInstanceDevices(ctx context.Context) map[string][]string { + if a.manager == nil { + return nil + } + instances, err := a.manager.listInstances(ctx) + if err != nil { + return nil + } + + result := make(map[string][]string) + for _, inst := range instances { + if len(inst.Devices) > 0 { + result[inst.Id] = inst.Devices + } + } + return result +} + +// DetectSuspiciousVMMProcesses finds cloud-hypervisor processes that don't match +// known instances and logs warnings. Returns the count of suspicious processes found. +// This uses ListInstances (all instances) rather than ListAllInstanceDevices to avoid +// false positives for instances without GPU devices attached. +func (a *instanceLivenessAdapter) DetectSuspiciousVMMProcesses(ctx context.Context) int { + log := logger.FromContext(ctx) + + if a.manager == nil { + return 0 + } + + // Find all cloud-hypervisor processes + cmd := exec.Command("pgrep", "-a", "cloud-hypervisor") + output, err := cmd.Output() + if err != nil { + // pgrep returns exit code 1 if no processes found - that's fine + return 0 + } + + lines := strings.Split(strings.TrimSpace(string(output)), "\n") + if len(lines) == 0 || (len(lines) == 1 && lines[0] == "") { + return 0 + } + + suspiciousCount := 0 + for _, line := range lines { + if line == "" { + continue + } + + // Try to extract socket path from command line to match against known instances + // cloud-hypervisor command typically includes --api-socket + socketPath := "" + parts := strings.Fields(line) + for i, part := range parts { + if part == "--api-socket" && i+1 < len(parts) { + socketPath = parts[i+1] + break + } + } + + // Check if this socket path matches any known instance + matched := false + if socketPath != "" { + // Socket path is typically like /var/lib/hypeman/guests//ch.sock + // Try to extract instance ID + if strings.Contains(socketPath, "/guests/") { + pathParts := strings.Split(socketPath, "/guests/") + if len(pathParts) > 1 { + instancePath := pathParts[1] + instanceID := strings.Split(instancePath, "/")[0] + if a.IsInstanceRunning(ctx, instanceID) { + matched = true + } + } + } + } + + if !matched { + log.WarnContext(ctx, "detected untracked cloud-hypervisor process", + "process_info", line, + "socket_path", socketPath, + "remediation", "Run lib/devices/scripts/gpu-reset.sh for manual recovery if needed", + ) + suspiciousCount++ + } + } + + return suspiciousCount +} + diff --git a/lib/instances/manager.go b/lib/instances/manager.go index 3b95a1c..7244c01 100644 --- a/lib/instances/manager.go +++ b/lib/instances/manager.go @@ -5,6 +5,7 @@ import ( "fmt" "sync" + "github.com/onkernel/hypeman/lib/devices" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/network" "github.com/onkernel/hypeman/lib/paths" @@ -46,6 +47,7 @@ type manager struct { imageManager images.Manager systemManager system.Manager networkManager network.Manager + deviceManager devices.Manager volumeManager volumes.Manager limits ResourceLimits instanceLocks sync.Map // map[string]*sync.RWMutex - per-instance locks @@ -55,12 +57,13 @@ type manager struct { // NewManager creates a new instances manager. // If meter is nil, metrics are disabled. -func NewManager(p *paths.Paths, imageManager images.Manager, systemManager system.Manager, networkManager network.Manager, volumeManager volumes.Manager, limits ResourceLimits, meter metric.Meter, tracer trace.Tracer) Manager { +func NewManager(p *paths.Paths, imageManager images.Manager, systemManager system.Manager, networkManager network.Manager, deviceManager devices.Manager, volumeManager volumes.Manager, limits ResourceLimits, meter metric.Meter, tracer trace.Tracer) Manager { m := &manager{ paths: p, imageManager: imageManager, systemManager: systemManager, networkManager: networkManager, + deviceManager: deviceManager, volumeManager: volumeManager, limits: limits, instanceLocks: sync.Map{}, diff --git a/lib/instances/manager_test.go b/lib/instances/manager_test.go index 2ee0a7f..95b9917 100644 --- a/lib/instances/manager_test.go +++ b/lib/instances/manager_test.go @@ -17,6 +17,7 @@ import ( "github.com/joho/godotenv" "github.com/onkernel/hypeman/cmd/api/config" + "github.com/onkernel/hypeman/lib/devices" "github.com/onkernel/hypeman/lib/exec" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/ingress" @@ -46,6 +47,7 @@ func setupTestManager(t *testing.T) (*manager, string) { systemManager := system.NewManager(p) networkManager := network.NewManager(p, cfg, nil) + deviceManager := devices.NewManager(p) volumeManager := volumes.NewManager(p, 0, nil) // 0 = unlimited storage limits := ResourceLimits{ MaxOverlaySize: 100 * 1024 * 1024 * 1024, // 100GB @@ -54,7 +56,7 @@ func setupTestManager(t *testing.T) (*manager, string) { MaxTotalVcpus: 0, // unlimited MaxTotalMemory: 0, // unlimited } - mgr := NewManager(p, imageManager, systemManager, networkManager, volumeManager, limits, nil, nil).(*manager) + mgr := NewManager(p, imageManager, systemManager, networkManager, deviceManager, volumeManager, limits, nil, nil).(*manager) // Register cleanup to kill any orphaned Cloud Hypervisor processes t.Cleanup(func() { @@ -246,7 +248,7 @@ func TestBasicEndToEnd(t *testing.T) { req := CreateInstanceRequest{ Name: "test-nginx", Image: "docker.io/library/nginx:alpine", - Size: 512 * 1024 * 1024, // 512MB + Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, // 512MB OverlaySize: 10 * 1024 * 1024 * 1024, // 10GB Vcpus: 1, @@ -754,6 +756,7 @@ func TestStorageOperations(t *testing.T) { imageManager, _ := images.NewManager(p, 1, nil) systemManager := system.NewManager(p) networkManager := network.NewManager(p, cfg, nil) + deviceManager := devices.NewManager(p) volumeManager := volumes.NewManager(p, 0, nil) // 0 = unlimited storage limits := ResourceLimits{ MaxOverlaySize: 100 * 1024 * 1024 * 1024, // 100GB @@ -762,7 +765,7 @@ func TestStorageOperations(t *testing.T) { MaxTotalVcpus: 0, // unlimited MaxTotalMemory: 0, // unlimited } - manager := NewManager(p, imageManager, systemManager, networkManager, volumeManager, limits, nil, nil).(*manager) + manager := NewManager(p, imageManager, systemManager, networkManager, deviceManager, volumeManager, limits, nil, nil).(*manager) // Test metadata doesn't exist initially _, err := manager.loadMetadata("nonexistent") @@ -859,7 +862,7 @@ func TestStandbyAndRestore(t *testing.T) { req := CreateInstanceRequest{ Name: "test-standby", Image: "docker.io/library/nginx:alpine", - Size: 512 * 1024 * 1024, + Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, OverlaySize: 10 * 1024 * 1024 * 1024, Vcpus: 1, diff --git a/lib/instances/network_test.go b/lib/instances/network_test.go index 579b4b8..419115e 100644 --- a/lib/instances/network_test.go +++ b/lib/instances/network_test.go @@ -63,7 +63,7 @@ func TestCreateInstanceWithNetwork(t *testing.T) { inst, err := manager.CreateInstance(ctx, CreateInstanceRequest{ Name: "test-net-instance", Image: "docker.io/library/nginx:alpine", - Size: 512 * 1024 * 1024, + Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, OverlaySize: 5 * 1024 * 1024 * 1024, Vcpus: 1, diff --git a/lib/instances/resource_limits_test.go b/lib/instances/resource_limits_test.go index 8393042..91dc4a4 100644 --- a/lib/instances/resource_limits_test.go +++ b/lib/instances/resource_limits_test.go @@ -8,6 +8,7 @@ import ( "time" "github.com/onkernel/hypeman/cmd/api/config" + "github.com/onkernel/hypeman/lib/devices" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/network" "github.com/onkernel/hypeman/lib/paths" @@ -159,9 +160,10 @@ func createTestManager(t *testing.T, limits ResourceLimits) *manager { systemMgr := system.NewManager(p) networkMgr := network.NewManager(p, cfg, nil) + deviceMgr := devices.NewManager(p) volumeMgr := volumes.NewManager(p, 0, nil) - return NewManager(p, imageMgr, systemMgr, networkMgr, volumeMgr, limits, nil, nil).(*manager) + return NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, nil, nil).(*manager) } func TestResourceLimits_StructValues(t *testing.T) { @@ -251,20 +253,21 @@ func TestAggregateLimits_EnforcedAtRuntime(t *testing.T) { systemManager := system.NewManager(p) networkManager := network.NewManager(p, cfg, nil) + deviceManager := devices.NewManager(p) volumeManager := volumes.NewManager(p, 0, nil) // Set small aggregate limits: // - MaxTotalVcpus: 2 (first VM gets 1, second wants 2 -> denied) - // - MaxTotalMemory: 2GB (first VM gets 1GB, second wants 1.5GB -> denied) + // - MaxTotalMemory: 6GB (first VM gets 2.5GB, second wants 4GB -> denied) limits := ResourceLimits{ MaxOverlaySize: 100 * 1024 * 1024 * 1024, // 100GB MaxVcpusPerInstance: 4, // per-instance limit (high) - MaxMemoryPerInstance: 4 * 1024 * 1024 * 1024, // 4GB per-instance (high) + MaxMemoryPerInstance: 8 * 1024 * 1024 * 1024, // 8GB per-instance (high) MaxTotalVcpus: 2, // aggregate: only 2 total - MaxTotalMemory: 2 * 1024 * 1024 * 1024, // aggregate: only 2GB total + MaxTotalMemory: 6 * 1024 * 1024 * 1024, // aggregate: only 6GB total (allows first 2.5GB VM) } - mgr := NewManager(p, imageManager, systemManager, networkManager, volumeManager, limits, nil, nil).(*manager) + mgr := NewManager(p, imageManager, systemManager, networkManager, deviceManager, volumeManager, limits, nil, nil).(*manager) // Cleanup any orphaned processes on test end t.Cleanup(func() { @@ -303,14 +306,14 @@ func TestAggregateLimits_EnforcedAtRuntime(t *testing.T) { assert.Equal(t, 0, usage.TotalVcpus, "Initial vCPUs should be 0") assert.Equal(t, int64(0), usage.TotalMemory, "Initial memory should be 0") - // Create first VM: 1 vCPU, 512MB + 512MB = 1GB memory - t.Log("Creating first instance (1 vCPU, 1GB memory)...") + // Create first VM: 1 vCPU, 2GB + 512MB = 2.5GB memory + t.Log("Creating first instance (1 vCPU, 2.5GB memory)...") inst1, err := mgr.CreateInstance(ctx, CreateInstanceRequest{ Name: "small-vm-1", Image: "docker.io/library/alpine:latest", Vcpus: 1, - Size: 512 * 1024 * 1024, // 512MB - HotplugSize: 512 * 1024 * 1024, // 512MB (total 1GB) + Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) + HotplugSize: 512 * 1024 * 1024, // 512MB OverlaySize: 1 * 1024 * 1024 * 1024, NetworkEnabled: false, }) @@ -322,7 +325,7 @@ func TestAggregateLimits_EnforcedAtRuntime(t *testing.T) { usage, err = mgr.calculateAggregateUsage(ctx) require.NoError(t, err) assert.Equal(t, 1, usage.TotalVcpus, "Should have 1 vCPU in use") - assert.Equal(t, int64(1024*1024*1024), usage.TotalMemory, "Should have 1GB memory in use") + assert.Equal(t, int64(2*1024*1024*1024+512*1024*1024), usage.TotalMemory, "Should have 2.5GB memory in use") t.Logf("Aggregate usage after first VM: %d vCPUs, %d bytes memory", usage.TotalVcpus, usage.TotalMemory) // Try to create second VM: 2 vCPUs (would exceed MaxTotalVcpus=2) diff --git a/lib/instances/start.go b/lib/instances/start.go index a29c3ad..1687d59 100644 --- a/lib/instances/start.go +++ b/lib/instances/start.go @@ -84,7 +84,7 @@ func (m *manager) startInstance( // 5. Regenerate config disk with new network configuration instForConfig := &Instance{StoredMetadata: *stored} log.DebugContext(ctx, "regenerating config disk", "instance_id", id) - if err := m.createConfigDisk(instForConfig, imageInfo, netConfig); err != nil { + if err := m.createConfigDisk(ctx, instForConfig, imageInfo, netConfig); err != nil { log.ErrorContext(ctx, "failed to create config disk", "instance_id", id, "error", err) return nil, fmt.Errorf("create config disk: %w", err) } diff --git a/lib/instances/types.go b/lib/instances/types.go index e46372a..6320c23 100644 --- a/lib/instances/types.go +++ b/lib/instances/types.go @@ -67,6 +67,9 @@ type StoredMetadata struct { // vsock configuration VsockCID int64 // Guest vsock Context ID VsockSocket string // Host-side vsock socket path + + // Attached devices (GPU passthrough) + Devices []string // Device IDs attached to this instance } // Instance represents a virtual machine instance with derived runtime state @@ -89,6 +92,7 @@ type CreateInstanceRequest struct { Vcpus int // Default 2 Env map[string]string // Optional environment variables NetworkEnabled bool // Whether to enable networking (uses default network) + Devices []string // Device IDs or names to attach (GPU passthrough) Volumes []VolumeAttachment // Volumes to attach at creation time } diff --git a/lib/instances/volumes_test.go b/lib/instances/volumes_test.go index abb760f..3237db3 100644 --- a/lib/instances/volumes_test.go +++ b/lib/instances/volumes_test.go @@ -93,7 +93,7 @@ func TestVolumeMultiAttachReadOnly(t *testing.T) { writerInst, err := manager.CreateInstance(ctx, CreateInstanceRequest{ Name: "writer", Image: "docker.io/library/alpine:latest", - Size: 512 * 1024 * 1024, + Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, OverlaySize: 1024 * 1024 * 1024, Vcpus: 1, @@ -135,7 +135,7 @@ func TestVolumeMultiAttachReadOnly(t *testing.T) { reader1, err := manager.CreateInstance(ctx, CreateInstanceRequest{ Name: "reader-1", Image: "docker.io/library/alpine:latest", - Size: 512 * 1024 * 1024, + Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, OverlaySize: 1024 * 1024 * 1024, Vcpus: 1, @@ -151,7 +151,7 @@ func TestVolumeMultiAttachReadOnly(t *testing.T) { reader2, err := manager.CreateInstance(ctx, CreateInstanceRequest{ Name: "reader-2-overlay", Image: "docker.io/library/alpine:latest", - Size: 512 * 1024 * 1024, + Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, OverlaySize: 1024 * 1024 * 1024, Vcpus: 1, @@ -270,7 +270,7 @@ func TestOverlayDiskCleanupOnDelete(t *testing.T) { inst, err := manager.CreateInstance(ctx, CreateInstanceRequest{ Name: "overlay-cleanup-test", Image: "docker.io/library/alpine:latest", - Size: 512 * 1024 * 1024, + Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, OverlaySize: 1024 * 1024 * 1024, Vcpus: 1, @@ -394,7 +394,7 @@ func TestVolumeFromArchive(t *testing.T) { inst, err := manager.CreateInstance(ctx, CreateInstanceRequest{ Name: "archive-reader", Image: "docker.io/library/alpine:latest", - Size: 512 * 1024 * 1024, + Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, OverlaySize: 1024 * 1024 * 1024, Vcpus: 1, diff --git a/lib/oapi/oapi.go b/lib/oapi/oapi.go index 4709a15..0401b78 100644 --- a/lib/oapi/oapi.go +++ b/lib/oapi/oapi.go @@ -29,6 +29,12 @@ const ( BearerAuthScopes = "bearerAuth.Scopes" ) +// Defines values for DeviceType. +const ( + Gpu DeviceType = "gpu" + Pci DeviceType = "pci" +) + // Defines values for HealthStatus. const ( Ok HealthStatus = "ok" @@ -70,6 +76,39 @@ type AttachVolumeRequest struct { Readonly *bool `json:"readonly,omitempty"` } +// AvailableDevice defines model for AvailableDevice. +type AvailableDevice struct { + // CurrentDriver Currently bound driver (null if none) + CurrentDriver *string `json:"current_driver"` + + // DeviceId PCI device ID (hex) + DeviceId string `json:"device_id"` + + // DeviceName Human-readable device name + DeviceName *string `json:"device_name,omitempty"` + + // IommuGroup IOMMU group number + IommuGroup int `json:"iommu_group"` + + // PciAddress PCI address + PciAddress string `json:"pci_address"` + + // VendorId PCI vendor ID (hex) + VendorId string `json:"vendor_id"` + + // VendorName Human-readable vendor name + VendorName *string `json:"vendor_name,omitempty"` +} + +// CreateDeviceRequest defines model for CreateDeviceRequest. +type CreateDeviceRequest struct { + // Name Optional globally unique device name. If not provided, a name is auto-generated from the PCI address (e.g., "pci-0000-a2-00-0") + Name *string `json:"name,omitempty"` + + // PciAddress PCI address of the device (required, e.g., "0000:a2:00.0") + PciAddress string `json:"pci_address"` +} + // CreateImageRequest defines model for CreateImageRequest. type CreateImageRequest struct { // Name OCI image reference (e.g., docker.io/library/nginx:latest) @@ -87,6 +126,9 @@ type CreateIngressRequest struct { // CreateInstanceRequest defines model for CreateInstanceRequest. type CreateInstanceRequest struct { + // Devices Device IDs or names to attach for GPU/PCI passthrough + Devices *[]string `json:"devices,omitempty"` + // Env Environment variables Env *map[string]string `json:"env,omitempty"` @@ -130,6 +172,44 @@ type CreateVolumeRequest struct { SizeGb int `json:"size_gb"` } +// Device defines model for Device. +type Device struct { + // AttachedTo Instance ID if attached + AttachedTo *string `json:"attached_to"` + + // BoundToVfio Whether the device is currently bound to the vfio-pci driver, which is required for VM passthrough. + // - true: Device is bound to vfio-pci and ready for (or currently in use by) a VM. The device's native driver has been unloaded. + // - false: Device is using its native driver (e.g., nvidia) or no driver. Hypeman will automatically bind to vfio-pci when attaching to an instance. + BoundToVfio bool `json:"bound_to_vfio"` + + // CreatedAt Registration timestamp (RFC3339) + CreatedAt time.Time `json:"created_at"` + + // DeviceId PCI device ID (hex) + DeviceId string `json:"device_id"` + + // Id Auto-generated unique identifier (CUID2 format) + Id string `json:"id"` + + // IommuGroup IOMMU group number + IommuGroup int `json:"iommu_group"` + + // Name Device name (user-provided or auto-generated from PCI address) + Name *string `json:"name,omitempty"` + + // PciAddress PCI address + PciAddress string `json:"pci_address"` + + // Type Type of PCI device + Type DeviceType `json:"type"` + + // VendorId PCI vendor ID (hex) + VendorId string `json:"vendor_id"` +} + +// DeviceType Type of PCI device +type DeviceType string + // Error defines model for Error. type Error struct { // Code Application-specific error code (machine-readable) @@ -415,6 +495,9 @@ type CreateVolumeMultipartBody struct { SizeGb int `json:"size_gb"` } +// CreateDeviceJSONRequestBody defines body for CreateDevice for application/json ContentType. +type CreateDeviceJSONRequestBody = CreateDeviceRequest + // CreateImageJSONRequestBody defines body for CreateImage for application/json ContentType. type CreateImageJSONRequestBody = CreateImageRequest @@ -506,6 +589,23 @@ func WithRequestEditorFn(fn RequestEditorFn) ClientOption { // The interface specification for the client above. type ClientInterface interface { + // ListDevices request + ListDevices(ctx context.Context, reqEditors ...RequestEditorFn) (*http.Response, error) + + // CreateDeviceWithBody request with any body + CreateDeviceWithBody(ctx context.Context, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*http.Response, error) + + CreateDevice(ctx context.Context, body CreateDeviceJSONRequestBody, reqEditors ...RequestEditorFn) (*http.Response, error) + + // ListAvailableDevices request + ListAvailableDevices(ctx context.Context, reqEditors ...RequestEditorFn) (*http.Response, error) + + // DeleteDevice request + DeleteDevice(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*http.Response, error) + + // GetDevice request + GetDevice(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*http.Response, error) + // GetHealth request GetHealth(ctx context.Context, reqEditors ...RequestEditorFn) (*http.Response, error) @@ -589,6 +689,78 @@ type ClientInterface interface { GetVolume(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*http.Response, error) } +func (c *Client) ListDevices(ctx context.Context, reqEditors ...RequestEditorFn) (*http.Response, error) { + req, err := NewListDevicesRequest(c.Server) + if err != nil { + return nil, err + } + req = req.WithContext(ctx) + if err := c.applyEditors(ctx, req, reqEditors); err != nil { + return nil, err + } + return c.Client.Do(req) +} + +func (c *Client) CreateDeviceWithBody(ctx context.Context, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*http.Response, error) { + req, err := NewCreateDeviceRequestWithBody(c.Server, contentType, body) + if err != nil { + return nil, err + } + req = req.WithContext(ctx) + if err := c.applyEditors(ctx, req, reqEditors); err != nil { + return nil, err + } + return c.Client.Do(req) +} + +func (c *Client) CreateDevice(ctx context.Context, body CreateDeviceJSONRequestBody, reqEditors ...RequestEditorFn) (*http.Response, error) { + req, err := NewCreateDeviceRequest(c.Server, body) + if err != nil { + return nil, err + } + req = req.WithContext(ctx) + if err := c.applyEditors(ctx, req, reqEditors); err != nil { + return nil, err + } + return c.Client.Do(req) +} + +func (c *Client) ListAvailableDevices(ctx context.Context, reqEditors ...RequestEditorFn) (*http.Response, error) { + req, err := NewListAvailableDevicesRequest(c.Server) + if err != nil { + return nil, err + } + req = req.WithContext(ctx) + if err := c.applyEditors(ctx, req, reqEditors); err != nil { + return nil, err + } + return c.Client.Do(req) +} + +func (c *Client) DeleteDevice(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*http.Response, error) { + req, err := NewDeleteDeviceRequest(c.Server, id) + if err != nil { + return nil, err + } + req = req.WithContext(ctx) + if err := c.applyEditors(ctx, req, reqEditors); err != nil { + return nil, err + } + return c.Client.Do(req) +} + +func (c *Client) GetDevice(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*http.Response, error) { + req, err := NewGetDeviceRequest(c.Server, id) + if err != nil { + return nil, err + } + req = req.WithContext(ctx) + if err := c.applyEditors(ctx, req, reqEditors); err != nil { + return nil, err + } + return c.Client.Do(req) +} + func (c *Client) GetHealth(ctx context.Context, reqEditors ...RequestEditorFn) (*http.Response, error) { req, err := NewGetHealthRequest(c.Server) if err != nil { @@ -937,6 +1109,168 @@ func (c *Client) GetVolume(ctx context.Context, id string, reqEditors ...Request return c.Client.Do(req) } +// NewListDevicesRequest generates requests for ListDevices +func NewListDevicesRequest(server string) (*http.Request, error) { + var err error + + serverURL, err := url.Parse(server) + if err != nil { + return nil, err + } + + operationPath := fmt.Sprintf("/devices") + if operationPath[0] == '/' { + operationPath = "." + operationPath + } + + queryURL, err := serverURL.Parse(operationPath) + if err != nil { + return nil, err + } + + req, err := http.NewRequest("GET", queryURL.String(), nil) + if err != nil { + return nil, err + } + + return req, nil +} + +// NewCreateDeviceRequest calls the generic CreateDevice builder with application/json body +func NewCreateDeviceRequest(server string, body CreateDeviceJSONRequestBody) (*http.Request, error) { + var bodyReader io.Reader + buf, err := json.Marshal(body) + if err != nil { + return nil, err + } + bodyReader = bytes.NewReader(buf) + return NewCreateDeviceRequestWithBody(server, "application/json", bodyReader) +} + +// NewCreateDeviceRequestWithBody generates requests for CreateDevice with any type of body +func NewCreateDeviceRequestWithBody(server string, contentType string, body io.Reader) (*http.Request, error) { + var err error + + serverURL, err := url.Parse(server) + if err != nil { + return nil, err + } + + operationPath := fmt.Sprintf("/devices") + if operationPath[0] == '/' { + operationPath = "." + operationPath + } + + queryURL, err := serverURL.Parse(operationPath) + if err != nil { + return nil, err + } + + req, err := http.NewRequest("POST", queryURL.String(), body) + if err != nil { + return nil, err + } + + req.Header.Add("Content-Type", contentType) + + return req, nil +} + +// NewListAvailableDevicesRequest generates requests for ListAvailableDevices +func NewListAvailableDevicesRequest(server string) (*http.Request, error) { + var err error + + serverURL, err := url.Parse(server) + if err != nil { + return nil, err + } + + operationPath := fmt.Sprintf("/devices/available") + if operationPath[0] == '/' { + operationPath = "." + operationPath + } + + queryURL, err := serverURL.Parse(operationPath) + if err != nil { + return nil, err + } + + req, err := http.NewRequest("GET", queryURL.String(), nil) + if err != nil { + return nil, err + } + + return req, nil +} + +// NewDeleteDeviceRequest generates requests for DeleteDevice +func NewDeleteDeviceRequest(server string, id string) (*http.Request, error) { + var err error + + var pathParam0 string + + pathParam0, err = runtime.StyleParamWithLocation("simple", false, "id", runtime.ParamLocationPath, id) + if err != nil { + return nil, err + } + + serverURL, err := url.Parse(server) + if err != nil { + return nil, err + } + + operationPath := fmt.Sprintf("/devices/%s", pathParam0) + if operationPath[0] == '/' { + operationPath = "." + operationPath + } + + queryURL, err := serverURL.Parse(operationPath) + if err != nil { + return nil, err + } + + req, err := http.NewRequest("DELETE", queryURL.String(), nil) + if err != nil { + return nil, err + } + + return req, nil +} + +// NewGetDeviceRequest generates requests for GetDevice +func NewGetDeviceRequest(server string, id string) (*http.Request, error) { + var err error + + var pathParam0 string + + pathParam0, err = runtime.StyleParamWithLocation("simple", false, "id", runtime.ParamLocationPath, id) + if err != nil { + return nil, err + } + + serverURL, err := url.Parse(server) + if err != nil { + return nil, err + } + + operationPath := fmt.Sprintf("/devices/%s", pathParam0) + if operationPath[0] == '/' { + operationPath = "." + operationPath + } + + queryURL, err := serverURL.Parse(operationPath) + if err != nil { + return nil, err + } + + req, err := http.NewRequest("GET", queryURL.String(), nil) + if err != nil { + return nil, err + } + + return req, nil +} + // NewGetHealthRequest generates requests for GetHealth func NewGetHealthRequest(server string) (*http.Request, error) { var err error @@ -1866,6 +2200,23 @@ func WithBaseURL(baseURL string) ClientOption { // ClientWithResponsesInterface is the interface specification for the client with responses above. type ClientWithResponsesInterface interface { + // ListDevicesWithResponse request + ListDevicesWithResponse(ctx context.Context, reqEditors ...RequestEditorFn) (*ListDevicesResponse, error) + + // CreateDeviceWithBodyWithResponse request with any body + CreateDeviceWithBodyWithResponse(ctx context.Context, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*CreateDeviceResponse, error) + + CreateDeviceWithResponse(ctx context.Context, body CreateDeviceJSONRequestBody, reqEditors ...RequestEditorFn) (*CreateDeviceResponse, error) + + // ListAvailableDevicesWithResponse request + ListAvailableDevicesWithResponse(ctx context.Context, reqEditors ...RequestEditorFn) (*ListAvailableDevicesResponse, error) + + // DeleteDeviceWithResponse request + DeleteDeviceWithResponse(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*DeleteDeviceResponse, error) + + // GetDeviceWithResponse request + GetDeviceWithResponse(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*GetDeviceResponse, error) + // GetHealthWithResponse request GetHealthWithResponse(ctx context.Context, reqEditors ...RequestEditorFn) (*GetHealthResponse, error) @@ -1949,14 +2300,16 @@ type ClientWithResponsesInterface interface { GetVolumeWithResponse(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*GetVolumeResponse, error) } -type GetHealthResponse struct { +type ListDevicesResponse struct { Body []byte HTTPResponse *http.Response - JSON200 *Health + JSON200 *[]Device + JSON401 *Error + JSON500 *Error } // Status returns HTTPResponse.Status -func (r GetHealthResponse) Status() string { +func (r ListDevicesResponse) Status() string { if r.HTTPResponse != nil { return r.HTTPResponse.Status } @@ -1964,17 +2317,138 @@ func (r GetHealthResponse) Status() string { } // StatusCode returns HTTPResponse.StatusCode -func (r GetHealthResponse) StatusCode() int { +func (r ListDevicesResponse) StatusCode() int { if r.HTTPResponse != nil { return r.HTTPResponse.StatusCode } return 0 } -type ListImagesResponse struct { +type CreateDeviceResponse struct { Body []byte HTTPResponse *http.Response - JSON200 *[]Image + JSON201 *Device + JSON400 *Error + JSON401 *Error + JSON404 *Error + JSON409 *Error + JSON500 *Error +} + +// Status returns HTTPResponse.Status +func (r CreateDeviceResponse) Status() string { + if r.HTTPResponse != nil { + return r.HTTPResponse.Status + } + return http.StatusText(0) +} + +// StatusCode returns HTTPResponse.StatusCode +func (r CreateDeviceResponse) StatusCode() int { + if r.HTTPResponse != nil { + return r.HTTPResponse.StatusCode + } + return 0 +} + +type ListAvailableDevicesResponse struct { + Body []byte + HTTPResponse *http.Response + JSON200 *[]AvailableDevice + JSON401 *Error + JSON500 *Error +} + +// Status returns HTTPResponse.Status +func (r ListAvailableDevicesResponse) Status() string { + if r.HTTPResponse != nil { + return r.HTTPResponse.Status + } + return http.StatusText(0) +} + +// StatusCode returns HTTPResponse.StatusCode +func (r ListAvailableDevicesResponse) StatusCode() int { + if r.HTTPResponse != nil { + return r.HTTPResponse.StatusCode + } + return 0 +} + +type DeleteDeviceResponse struct { + Body []byte + HTTPResponse *http.Response + JSON404 *Error + JSON409 *Error + JSON500 *Error +} + +// Status returns HTTPResponse.Status +func (r DeleteDeviceResponse) Status() string { + if r.HTTPResponse != nil { + return r.HTTPResponse.Status + } + return http.StatusText(0) +} + +// StatusCode returns HTTPResponse.StatusCode +func (r DeleteDeviceResponse) StatusCode() int { + if r.HTTPResponse != nil { + return r.HTTPResponse.StatusCode + } + return 0 +} + +type GetDeviceResponse struct { + Body []byte + HTTPResponse *http.Response + JSON200 *Device + JSON404 *Error + JSON500 *Error +} + +// Status returns HTTPResponse.Status +func (r GetDeviceResponse) Status() string { + if r.HTTPResponse != nil { + return r.HTTPResponse.Status + } + return http.StatusText(0) +} + +// StatusCode returns HTTPResponse.StatusCode +func (r GetDeviceResponse) StatusCode() int { + if r.HTTPResponse != nil { + return r.HTTPResponse.StatusCode + } + return 0 +} + +type GetHealthResponse struct { + Body []byte + HTTPResponse *http.Response + JSON200 *Health +} + +// Status returns HTTPResponse.Status +func (r GetHealthResponse) Status() string { + if r.HTTPResponse != nil { + return r.HTTPResponse.Status + } + return http.StatusText(0) +} + +// StatusCode returns HTTPResponse.StatusCode +func (r GetHealthResponse) StatusCode() int { + if r.HTTPResponse != nil { + return r.HTTPResponse.StatusCode + } + return 0 +} + +type ListImagesResponse struct { + Body []byte + HTTPResponse *http.Response + JSON200 *[]Image JSON401 *Error JSON500 *Error } @@ -2533,6 +3007,59 @@ func (r GetVolumeResponse) StatusCode() int { return 0 } +// ListDevicesWithResponse request returning *ListDevicesResponse +func (c *ClientWithResponses) ListDevicesWithResponse(ctx context.Context, reqEditors ...RequestEditorFn) (*ListDevicesResponse, error) { + rsp, err := c.ListDevices(ctx, reqEditors...) + if err != nil { + return nil, err + } + return ParseListDevicesResponse(rsp) +} + +// CreateDeviceWithBodyWithResponse request with arbitrary body returning *CreateDeviceResponse +func (c *ClientWithResponses) CreateDeviceWithBodyWithResponse(ctx context.Context, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*CreateDeviceResponse, error) { + rsp, err := c.CreateDeviceWithBody(ctx, contentType, body, reqEditors...) + if err != nil { + return nil, err + } + return ParseCreateDeviceResponse(rsp) +} + +func (c *ClientWithResponses) CreateDeviceWithResponse(ctx context.Context, body CreateDeviceJSONRequestBody, reqEditors ...RequestEditorFn) (*CreateDeviceResponse, error) { + rsp, err := c.CreateDevice(ctx, body, reqEditors...) + if err != nil { + return nil, err + } + return ParseCreateDeviceResponse(rsp) +} + +// ListAvailableDevicesWithResponse request returning *ListAvailableDevicesResponse +func (c *ClientWithResponses) ListAvailableDevicesWithResponse(ctx context.Context, reqEditors ...RequestEditorFn) (*ListAvailableDevicesResponse, error) { + rsp, err := c.ListAvailableDevices(ctx, reqEditors...) + if err != nil { + return nil, err + } + return ParseListAvailableDevicesResponse(rsp) +} + +// DeleteDeviceWithResponse request returning *DeleteDeviceResponse +func (c *ClientWithResponses) DeleteDeviceWithResponse(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*DeleteDeviceResponse, error) { + rsp, err := c.DeleteDevice(ctx, id, reqEditors...) + if err != nil { + return nil, err + } + return ParseDeleteDeviceResponse(rsp) +} + +// GetDeviceWithResponse request returning *GetDeviceResponse +func (c *ClientWithResponses) GetDeviceWithResponse(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*GetDeviceResponse, error) { + rsp, err := c.GetDevice(ctx, id, reqEditors...) + if err != nil { + return nil, err + } + return ParseGetDeviceResponse(rsp) +} + // GetHealthWithResponse request returning *GetHealthResponse func (c *ClientWithResponses) GetHealthWithResponse(ctx context.Context, reqEditors ...RequestEditorFn) (*GetHealthResponse, error) { rsp, err := c.GetHealth(ctx, reqEditors...) @@ -2789,6 +3316,227 @@ func (c *ClientWithResponses) GetVolumeWithResponse(ctx context.Context, id stri return ParseGetVolumeResponse(rsp) } +// ParseListDevicesResponse parses an HTTP response from a ListDevicesWithResponse call +func ParseListDevicesResponse(rsp *http.Response) (*ListDevicesResponse, error) { + bodyBytes, err := io.ReadAll(rsp.Body) + defer func() { _ = rsp.Body.Close() }() + if err != nil { + return nil, err + } + + response := &ListDevicesResponse{ + Body: bodyBytes, + HTTPResponse: rsp, + } + + switch { + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 200: + var dest []Device + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON200 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 401: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON401 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 500: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON500 = &dest + + } + + return response, nil +} + +// ParseCreateDeviceResponse parses an HTTP response from a CreateDeviceWithResponse call +func ParseCreateDeviceResponse(rsp *http.Response) (*CreateDeviceResponse, error) { + bodyBytes, err := io.ReadAll(rsp.Body) + defer func() { _ = rsp.Body.Close() }() + if err != nil { + return nil, err + } + + response := &CreateDeviceResponse{ + Body: bodyBytes, + HTTPResponse: rsp, + } + + switch { + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 201: + var dest Device + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON201 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 400: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON400 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 401: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON401 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 404: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON404 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 409: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON409 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 500: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON500 = &dest + + } + + return response, nil +} + +// ParseListAvailableDevicesResponse parses an HTTP response from a ListAvailableDevicesWithResponse call +func ParseListAvailableDevicesResponse(rsp *http.Response) (*ListAvailableDevicesResponse, error) { + bodyBytes, err := io.ReadAll(rsp.Body) + defer func() { _ = rsp.Body.Close() }() + if err != nil { + return nil, err + } + + response := &ListAvailableDevicesResponse{ + Body: bodyBytes, + HTTPResponse: rsp, + } + + switch { + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 200: + var dest []AvailableDevice + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON200 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 401: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON401 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 500: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON500 = &dest + + } + + return response, nil +} + +// ParseDeleteDeviceResponse parses an HTTP response from a DeleteDeviceWithResponse call +func ParseDeleteDeviceResponse(rsp *http.Response) (*DeleteDeviceResponse, error) { + bodyBytes, err := io.ReadAll(rsp.Body) + defer func() { _ = rsp.Body.Close() }() + if err != nil { + return nil, err + } + + response := &DeleteDeviceResponse{ + Body: bodyBytes, + HTTPResponse: rsp, + } + + switch { + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 404: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON404 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 409: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON409 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 500: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON500 = &dest + + } + + return response, nil +} + +// ParseGetDeviceResponse parses an HTTP response from a GetDeviceWithResponse call +func ParseGetDeviceResponse(rsp *http.Response) (*GetDeviceResponse, error) { + bodyBytes, err := io.ReadAll(rsp.Body) + defer func() { _ = rsp.Body.Close() }() + if err != nil { + return nil, err + } + + response := &GetDeviceResponse{ + Body: bodyBytes, + HTTPResponse: rsp, + } + + switch { + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 200: + var dest Device + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON200 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 404: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON404 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 500: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON500 = &dest + + } + + return response, nil +} + // ParseGetHealthResponse parses an HTTP response from a GetHealthWithResponse call func ParseGetHealthResponse(rsp *http.Response) (*GetHealthResponse, error) { bodyBytes, err := io.ReadAll(rsp.Body) @@ -3807,6 +4555,21 @@ func ParseGetVolumeResponse(rsp *http.Response) (*GetVolumeResponse, error) { // ServerInterface represents all server handlers. type ServerInterface interface { + // List registered devices + // (GET /devices) + ListDevices(w http.ResponseWriter, r *http.Request) + // Register a device for passthrough + // (POST /devices) + CreateDevice(w http.ResponseWriter, r *http.Request) + // Discover passthrough-capable devices on host + // (GET /devices/available) + ListAvailableDevices(w http.ResponseWriter, r *http.Request) + // Unregister device + // (DELETE /devices/{id}) + DeleteDevice(w http.ResponseWriter, r *http.Request, id string) + // Get device details + // (GET /devices/{id}) + GetDevice(w http.ResponseWriter, r *http.Request, id string) // Health check // (GET /health) GetHealth(w http.ResponseWriter, r *http.Request) @@ -3885,6 +4648,36 @@ type ServerInterface interface { type Unimplemented struct{} +// List registered devices +// (GET /devices) +func (_ Unimplemented) ListDevices(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotImplemented) +} + +// Register a device for passthrough +// (POST /devices) +func (_ Unimplemented) CreateDevice(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotImplemented) +} + +// Discover passthrough-capable devices on host +// (GET /devices/available) +func (_ Unimplemented) ListAvailableDevices(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotImplemented) +} + +// Unregister device +// (DELETE /devices/{id}) +func (_ Unimplemented) DeleteDevice(w http.ResponseWriter, r *http.Request, id string) { + w.WriteHeader(http.StatusNotImplemented) +} + +// Get device details +// (GET /devices/{id}) +func (_ Unimplemented) GetDevice(w http.ResponseWriter, r *http.Request, id string) { + w.WriteHeader(http.StatusNotImplemented) +} + // Health check // (GET /health) func (_ Unimplemented) GetHealth(w http.ResponseWriter, r *http.Request) { @@ -4038,6 +4831,128 @@ type ServerInterfaceWrapper struct { type MiddlewareFunc func(http.Handler) http.Handler +// ListDevices operation middleware +func (siw *ServerInterfaceWrapper) ListDevices(w http.ResponseWriter, r *http.Request) { + + ctx := r.Context() + + ctx = context.WithValue(ctx, BearerAuthScopes, []string{}) + + r = r.WithContext(ctx) + + handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + siw.Handler.ListDevices(w, r) + })) + + for _, middleware := range siw.HandlerMiddlewares { + handler = middleware(handler) + } + + handler.ServeHTTP(w, r) +} + +// CreateDevice operation middleware +func (siw *ServerInterfaceWrapper) CreateDevice(w http.ResponseWriter, r *http.Request) { + + ctx := r.Context() + + ctx = context.WithValue(ctx, BearerAuthScopes, []string{}) + + r = r.WithContext(ctx) + + handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + siw.Handler.CreateDevice(w, r) + })) + + for _, middleware := range siw.HandlerMiddlewares { + handler = middleware(handler) + } + + handler.ServeHTTP(w, r) +} + +// ListAvailableDevices operation middleware +func (siw *ServerInterfaceWrapper) ListAvailableDevices(w http.ResponseWriter, r *http.Request) { + + ctx := r.Context() + + ctx = context.WithValue(ctx, BearerAuthScopes, []string{}) + + r = r.WithContext(ctx) + + handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + siw.Handler.ListAvailableDevices(w, r) + })) + + for _, middleware := range siw.HandlerMiddlewares { + handler = middleware(handler) + } + + handler.ServeHTTP(w, r) +} + +// DeleteDevice operation middleware +func (siw *ServerInterfaceWrapper) DeleteDevice(w http.ResponseWriter, r *http.Request) { + + var err error + + // ------------- Path parameter "id" ------------- + var id string + + err = runtime.BindStyledParameterWithOptions("simple", "id", chi.URLParam(r, "id"), &id, runtime.BindStyledParameterOptions{ParamLocation: runtime.ParamLocationPath, Explode: false, Required: true}) + if err != nil { + siw.ErrorHandlerFunc(w, r, &InvalidParamFormatError{ParamName: "id", Err: err}) + return + } + + ctx := r.Context() + + ctx = context.WithValue(ctx, BearerAuthScopes, []string{}) + + r = r.WithContext(ctx) + + handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + siw.Handler.DeleteDevice(w, r, id) + })) + + for _, middleware := range siw.HandlerMiddlewares { + handler = middleware(handler) + } + + handler.ServeHTTP(w, r) +} + +// GetDevice operation middleware +func (siw *ServerInterfaceWrapper) GetDevice(w http.ResponseWriter, r *http.Request) { + + var err error + + // ------------- Path parameter "id" ------------- + var id string + + err = runtime.BindStyledParameterWithOptions("simple", "id", chi.URLParam(r, "id"), &id, runtime.BindStyledParameterOptions{ParamLocation: runtime.ParamLocationPath, Explode: false, Required: true}) + if err != nil { + siw.ErrorHandlerFunc(w, r, &InvalidParamFormatError{ParamName: "id", Err: err}) + return + } + + ctx := r.Context() + + ctx = context.WithValue(ctx, BearerAuthScopes, []string{}) + + r = r.WithContext(ctx) + + handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + siw.Handler.GetDevice(w, r, id) + })) + + for _, middleware := range siw.HandlerMiddlewares { + handler = middleware(handler) + } + + handler.ServeHTTP(w, r) +} + // GetHealth operation middleware func (siw *ServerInterfaceWrapper) GetHealth(w http.ResponseWriter, r *http.Request) { @@ -4835,6 +5750,21 @@ func HandlerWithOptions(si ServerInterface, options ChiServerOptions) http.Handl ErrorHandlerFunc: options.ErrorHandlerFunc, } + r.Group(func(r chi.Router) { + r.Get(options.BaseURL+"/devices", wrapper.ListDevices) + }) + r.Group(func(r chi.Router) { + r.Post(options.BaseURL+"/devices", wrapper.CreateDevice) + }) + r.Group(func(r chi.Router) { + r.Get(options.BaseURL+"/devices/available", wrapper.ListAvailableDevices) + }) + r.Group(func(r chi.Router) { + r.Delete(options.BaseURL+"/devices/{id}", wrapper.DeleteDevice) + }) + r.Group(func(r chi.Router) { + r.Get(options.BaseURL+"/devices/{id}", wrapper.GetDevice) + }) r.Group(func(r chi.Router) { r.Get(options.BaseURL+"/health", wrapper.GetHealth) }) @@ -4911,6 +5841,214 @@ func HandlerWithOptions(si ServerInterface, options ChiServerOptions) http.Handl return r } +type ListDevicesRequestObject struct { +} + +type ListDevicesResponseObject interface { + VisitListDevicesResponse(w http.ResponseWriter) error +} + +type ListDevices200JSONResponse []Device + +func (response ListDevices200JSONResponse) VisitListDevicesResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(200) + + return json.NewEncoder(w).Encode(response) +} + +type ListDevices401JSONResponse Error + +func (response ListDevices401JSONResponse) VisitListDevicesResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(401) + + return json.NewEncoder(w).Encode(response) +} + +type ListDevices500JSONResponse Error + +func (response ListDevices500JSONResponse) VisitListDevicesResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(500) + + return json.NewEncoder(w).Encode(response) +} + +type CreateDeviceRequestObject struct { + Body *CreateDeviceJSONRequestBody +} + +type CreateDeviceResponseObject interface { + VisitCreateDeviceResponse(w http.ResponseWriter) error +} + +type CreateDevice201JSONResponse Device + +func (response CreateDevice201JSONResponse) VisitCreateDeviceResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(201) + + return json.NewEncoder(w).Encode(response) +} + +type CreateDevice400JSONResponse Error + +func (response CreateDevice400JSONResponse) VisitCreateDeviceResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(400) + + return json.NewEncoder(w).Encode(response) +} + +type CreateDevice401JSONResponse Error + +func (response CreateDevice401JSONResponse) VisitCreateDeviceResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(401) + + return json.NewEncoder(w).Encode(response) +} + +type CreateDevice404JSONResponse Error + +func (response CreateDevice404JSONResponse) VisitCreateDeviceResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(404) + + return json.NewEncoder(w).Encode(response) +} + +type CreateDevice409JSONResponse Error + +func (response CreateDevice409JSONResponse) VisitCreateDeviceResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(409) + + return json.NewEncoder(w).Encode(response) +} + +type CreateDevice500JSONResponse Error + +func (response CreateDevice500JSONResponse) VisitCreateDeviceResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(500) + + return json.NewEncoder(w).Encode(response) +} + +type ListAvailableDevicesRequestObject struct { +} + +type ListAvailableDevicesResponseObject interface { + VisitListAvailableDevicesResponse(w http.ResponseWriter) error +} + +type ListAvailableDevices200JSONResponse []AvailableDevice + +func (response ListAvailableDevices200JSONResponse) VisitListAvailableDevicesResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(200) + + return json.NewEncoder(w).Encode(response) +} + +type ListAvailableDevices401JSONResponse Error + +func (response ListAvailableDevices401JSONResponse) VisitListAvailableDevicesResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(401) + + return json.NewEncoder(w).Encode(response) +} + +type ListAvailableDevices500JSONResponse Error + +func (response ListAvailableDevices500JSONResponse) VisitListAvailableDevicesResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(500) + + return json.NewEncoder(w).Encode(response) +} + +type DeleteDeviceRequestObject struct { + Id string `json:"id"` +} + +type DeleteDeviceResponseObject interface { + VisitDeleteDeviceResponse(w http.ResponseWriter) error +} + +type DeleteDevice204Response struct { +} + +func (response DeleteDevice204Response) VisitDeleteDeviceResponse(w http.ResponseWriter) error { + w.WriteHeader(204) + return nil +} + +type DeleteDevice404JSONResponse Error + +func (response DeleteDevice404JSONResponse) VisitDeleteDeviceResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(404) + + return json.NewEncoder(w).Encode(response) +} + +type DeleteDevice409JSONResponse Error + +func (response DeleteDevice409JSONResponse) VisitDeleteDeviceResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(409) + + return json.NewEncoder(w).Encode(response) +} + +type DeleteDevice500JSONResponse Error + +func (response DeleteDevice500JSONResponse) VisitDeleteDeviceResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(500) + + return json.NewEncoder(w).Encode(response) +} + +type GetDeviceRequestObject struct { + Id string `json:"id"` +} + +type GetDeviceResponseObject interface { + VisitGetDeviceResponse(w http.ResponseWriter) error +} + +type GetDevice200JSONResponse Device + +func (response GetDevice200JSONResponse) VisitGetDeviceResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(200) + + return json.NewEncoder(w).Encode(response) +} + +type GetDevice404JSONResponse Error + +func (response GetDevice404JSONResponse) VisitGetDeviceResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(404) + + return json.NewEncoder(w).Encode(response) +} + +type GetDevice500JSONResponse Error + +func (response GetDevice500JSONResponse) VisitGetDeviceResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(500) + + return json.NewEncoder(w).Encode(response) +} + type GetHealthRequestObject struct { } @@ -5876,6 +7014,21 @@ func (response GetVolume500JSONResponse) VisitGetVolumeResponse(w http.ResponseW // StrictServerInterface represents all server handlers. type StrictServerInterface interface { + // List registered devices + // (GET /devices) + ListDevices(ctx context.Context, request ListDevicesRequestObject) (ListDevicesResponseObject, error) + // Register a device for passthrough + // (POST /devices) + CreateDevice(ctx context.Context, request CreateDeviceRequestObject) (CreateDeviceResponseObject, error) + // Discover passthrough-capable devices on host + // (GET /devices/available) + ListAvailableDevices(ctx context.Context, request ListAvailableDevicesRequestObject) (ListAvailableDevicesResponseObject, error) + // Unregister device + // (DELETE /devices/{id}) + DeleteDevice(ctx context.Context, request DeleteDeviceRequestObject) (DeleteDeviceResponseObject, error) + // Get device details + // (GET /devices/{id}) + GetDevice(ctx context.Context, request GetDeviceRequestObject) (GetDeviceResponseObject, error) // Health check // (GET /health) GetHealth(ctx context.Context, request GetHealthRequestObject) (GetHealthResponseObject, error) @@ -5979,6 +7132,137 @@ type strictHandler struct { options StrictHTTPServerOptions } +// ListDevices operation middleware +func (sh *strictHandler) ListDevices(w http.ResponseWriter, r *http.Request) { + var request ListDevicesRequestObject + + handler := func(ctx context.Context, w http.ResponseWriter, r *http.Request, request interface{}) (interface{}, error) { + return sh.ssi.ListDevices(ctx, request.(ListDevicesRequestObject)) + } + for _, middleware := range sh.middlewares { + handler = middleware(handler, "ListDevices") + } + + response, err := handler(r.Context(), w, r, request) + + if err != nil { + sh.options.ResponseErrorHandlerFunc(w, r, err) + } else if validResponse, ok := response.(ListDevicesResponseObject); ok { + if err := validResponse.VisitListDevicesResponse(w); err != nil { + sh.options.ResponseErrorHandlerFunc(w, r, err) + } + } else if response != nil { + sh.options.ResponseErrorHandlerFunc(w, r, fmt.Errorf("unexpected response type: %T", response)) + } +} + +// CreateDevice operation middleware +func (sh *strictHandler) CreateDevice(w http.ResponseWriter, r *http.Request) { + var request CreateDeviceRequestObject + + var body CreateDeviceJSONRequestBody + if err := json.NewDecoder(r.Body).Decode(&body); err != nil { + sh.options.RequestErrorHandlerFunc(w, r, fmt.Errorf("can't decode JSON body: %w", err)) + return + } + request.Body = &body + + handler := func(ctx context.Context, w http.ResponseWriter, r *http.Request, request interface{}) (interface{}, error) { + return sh.ssi.CreateDevice(ctx, request.(CreateDeviceRequestObject)) + } + for _, middleware := range sh.middlewares { + handler = middleware(handler, "CreateDevice") + } + + response, err := handler(r.Context(), w, r, request) + + if err != nil { + sh.options.ResponseErrorHandlerFunc(w, r, err) + } else if validResponse, ok := response.(CreateDeviceResponseObject); ok { + if err := validResponse.VisitCreateDeviceResponse(w); err != nil { + sh.options.ResponseErrorHandlerFunc(w, r, err) + } + } else if response != nil { + sh.options.ResponseErrorHandlerFunc(w, r, fmt.Errorf("unexpected response type: %T", response)) + } +} + +// ListAvailableDevices operation middleware +func (sh *strictHandler) ListAvailableDevices(w http.ResponseWriter, r *http.Request) { + var request ListAvailableDevicesRequestObject + + handler := func(ctx context.Context, w http.ResponseWriter, r *http.Request, request interface{}) (interface{}, error) { + return sh.ssi.ListAvailableDevices(ctx, request.(ListAvailableDevicesRequestObject)) + } + for _, middleware := range sh.middlewares { + handler = middleware(handler, "ListAvailableDevices") + } + + response, err := handler(r.Context(), w, r, request) + + if err != nil { + sh.options.ResponseErrorHandlerFunc(w, r, err) + } else if validResponse, ok := response.(ListAvailableDevicesResponseObject); ok { + if err := validResponse.VisitListAvailableDevicesResponse(w); err != nil { + sh.options.ResponseErrorHandlerFunc(w, r, err) + } + } else if response != nil { + sh.options.ResponseErrorHandlerFunc(w, r, fmt.Errorf("unexpected response type: %T", response)) + } +} + +// DeleteDevice operation middleware +func (sh *strictHandler) DeleteDevice(w http.ResponseWriter, r *http.Request, id string) { + var request DeleteDeviceRequestObject + + request.Id = id + + handler := func(ctx context.Context, w http.ResponseWriter, r *http.Request, request interface{}) (interface{}, error) { + return sh.ssi.DeleteDevice(ctx, request.(DeleteDeviceRequestObject)) + } + for _, middleware := range sh.middlewares { + handler = middleware(handler, "DeleteDevice") + } + + response, err := handler(r.Context(), w, r, request) + + if err != nil { + sh.options.ResponseErrorHandlerFunc(w, r, err) + } else if validResponse, ok := response.(DeleteDeviceResponseObject); ok { + if err := validResponse.VisitDeleteDeviceResponse(w); err != nil { + sh.options.ResponseErrorHandlerFunc(w, r, err) + } + } else if response != nil { + sh.options.ResponseErrorHandlerFunc(w, r, fmt.Errorf("unexpected response type: %T", response)) + } +} + +// GetDevice operation middleware +func (sh *strictHandler) GetDevice(w http.ResponseWriter, r *http.Request, id string) { + var request GetDeviceRequestObject + + request.Id = id + + handler := func(ctx context.Context, w http.ResponseWriter, r *http.Request, request interface{}) (interface{}, error) { + return sh.ssi.GetDevice(ctx, request.(GetDeviceRequestObject)) + } + for _, middleware := range sh.middlewares { + handler = middleware(handler, "GetDevice") + } + + response, err := handler(r.Context(), w, r, request) + + if err != nil { + sh.options.ResponseErrorHandlerFunc(w, r, err) + } else if validResponse, ok := response.(GetDeviceResponseObject); ok { + if err := validResponse.VisitGetDeviceResponse(w); err != nil { + sh.options.ResponseErrorHandlerFunc(w, r, err) + } + } else if response != nil { + sh.options.ResponseErrorHandlerFunc(w, r, fmt.Errorf("unexpected response type: %T", response)) + } +} + // GetHealth operation middleware func (sh *strictHandler) GetHealth(w http.ResponseWriter, r *http.Request) { var request GetHealthRequestObject @@ -6637,90 +7921,104 @@ func (sh *strictHandler) GetVolume(w http.ResponseWriter, r *http.Request, id st // Base64 encoded, gzipped, json marshaled Swagger object var swaggerSpec = []string{ - "H4sIAAAAAAAC/+xdC3PTyJb+K6e0d2qdXfmRBLjgW1tbmQSYTBFIEcjsXcyGtnRs99DqFt0tJ4bKf9/q", - "h2TJkh8ZEkMuVFFFbPXrvL8+fVr+EkQiSQVHrlXQ/xKoaIIJsX8eaE2iyblgWYKv8VOGSpuvUylSlJqi", - "bZSIjOuLlOiJ+RSjiiRNNRU86AenRE/gcoISYWpHATURGYthiGD7YRyEAV6RJGUY9INuwnU3JpoEYaBn", - "qflKaUn5OLgOA4kkFpzN3DQjkjEd9EeEKQwXpj0xQwNRYLq0bZ9ivKEQDAkPru2InzIqMQ7678pkvC8a", - "i+GfGGkz+aFEovE4IePlnOAkwToPXh0eAzX9QOIIJfIIoYWdcSeEWEQfUXao6DI6lETOunxM+VWfEY1K", - "71RYs7ptnV8L5Nm1rSCMjyUqdUPSfssSwtuGyWTIEEwjaDFxiTIiCoGh1ihVCDEdU61CIDyGmKgJKjBC", - "+QdEhHOhQWkiNQgJyGO4pHoCxLarciCZtUlK29QtNQiDhFy9QD42ivdoPwxSYqYz6/q/d6T9udd+8r7l", - "/2i//4/8q53//lujcmXMUVql8LXINOVjsI9hJCToCVUwXwPVmNh+f5M4CvrBv3Xn1tT1ptTNuZsxNHMl", - "lB+7brvFSoiUZNYstXxxq6SnNOHRcs1EPjX/kTimhjDCTiuPa9yoMuEpn1IpeIJcw5RIaoStyqL5Erx8", - "dfT04unL86BvZo6zyHYNg9NXr98E/WC/1+uZcWvrnwidsmx8oehnrNh1sP/812BxIQfF+iHBRMiZlYgf", - "A1qTqjqOhEyIBkY/IgzMeIMghEGw+3wQVBVrz05VY4I12o3seY2hEpZSjkstNfxerOtSyI9MkLi9e8vG", - "xVGbseskvnQPIBJ8RMeZJOZ7b2YI1Kt1ENbU2XAkriiMllktDvwxQT1BCVoAsaGsGNJ8Zabw3SFfYYkj", - "bsCGqFFTYjFFycisQYl3ew1a/Iek2krU94OYqo9gOq9RYTOa0+GHvboS95q1uGFRDWv61WiUt6lNVlIs", - "ZHfvxP+5t6ldTaM0U5Ul7S0u52WWDFGCGMGUSp0RBoenbysuZ68YmHKNY5R2ZIsxGty4gzCqpAhe/oU+", - "EA2R8aVG/zS1Xncj1+5GtoCj5OBWenPnV5Z78zV4i8YNPin1bjHKlBYJ0Bi5piOKElok06I9Ro6SaIyB", - "jsA4hVSKKY0xrkpsKljbwC/rATZ0U2654ImrOBQ7lBPKMtW8GA/rQ54ZDaQcxnRMhjNdDTa7vbromxmd", - "j9/E6qdSCllnbiTiBhIP0pTRyCpHW6UY0RGNAM0IYDpAKyHRhHIszKXK1SGJL6QXZ9gUbDWhrEFrS+HO", - "TeZbQst4yCRjmqYM3TO1s6nGWsqP7Eh1jQ0DyjnKC8zZc4ORElSqMWIuBLKclqKJdfgxDrPx2LCkzLoT", - "qpTFX166MKLI4r4LwGtBr5XmfGFL9cDTsKE2vDAhuM1wiqysBM6izGITIREKPXFCq1BF+ZQwGl9QnmaN", - "KrGUlc8yaSOaGxTIUGTaOjInsPIkdq9ibX0kMh43MqvGjt+QMLeRq3JCaaIzH3uzxPBWfDT8nE8nPq4V", - "hx+kSQzHOdZaEEDS4OwOT45gJEViUIMmlKOEBDXx28ZiRe8Cu0EKwqBtdCommAgOYjT6h1lBYSp1L5cx", - "ZvR0AQEUBmLDBMYXRDcsrRxClCZJCq3Xzw739/efLEbrvYft3m579+Gb3V6/Z/79bxAGLsoaEEk0tn0c", - "qjsMOvaRYWGzgkqwKcaQEE5HqDT4luWZ1YTsPXzUJ8Nod28/xtGDh486nU7TNMi1nKWC8oapnhbPNhNF", - "16Hi9nzMjpp8nRzuYE+zCS1fgtODN78F/aCbKdllIiKsq4aU90ufi4/zB/YP93FIeeNeqPC5Cyu1LsZ7", - "BBO+nRkBVTAilC1kUNKMMf9931DCMSoUUlhns4Sv68L8S6OajH7GGBozGpqMzR7DadzXpS7C4FOGGV6k", - "QlE3ey2v5J8YkDDMKIvB9oCWIS6HOParKsDZW0p+CUVa2OBgR23iowKqm5lNGz9nxjVlNt80q8z4cP/R", - "47/3nuzulYybcv3oQbDRUgq3uwDXLc3+aVj45BR57CKoUQP3VyT41FiF/WDXZ/yMU5yKA8+f1YRhNkaU", - "jy9i2qCdf7iHEFOJkbZb8vU2FHRJmq5XxWZUV/i0gvySR26MLT5ZU48u39yTN2H5gypezzj9lGEJ0Vdn", - "fzX+/dP/qNO//7n76cX5+T+nz38/ekn/ec5OX31VomF14u2bZs9WbrGo8YaVrNmm6nFCdNQAfCZC6SVc", - "80/MVjIxnTtwSDgMsT/gbXhBNUrC+jAISEo7npmdSCSDAFp4RSLteoHgYIaCCZIY5Y7pfOqSLabzl3yH", - "er04RjzjJKERSM/kIVEYm9FUNoxFQijfGfAB92NBToiCTDkZxxCRVGcSjUQgyiSbwVCSyPg0l5+eTx7C", - "F5Km1zsDridEA15paShIidTKbNMNBM1nsIL2q+rAGwNOXXOMYUpYhgoiy6gBL+JHbJZgBtFEjlF3in25", - "xfudAa8oZDNTmhQzFVJXUg2Pe2GDHMG0M4JkVGnkUCShqLLKC608UfS4VzH/x73H67ejhQ6tUD+r3fWz", - "lVwpN7APp8B2aueMLyZap+sPS6y/cTYCv715c2rYYP4/g3ygOS8KEbcEZzMgZl+MCi4nyEEzi0l8bm6n", - "4cQlDJx0NyTojWtsujG1no6ndmJ48+IMNMqEcue/W5Fh58hs3xFsLoQqlRlVpAQODk+e7nQ2OByyvC3W", - "v0KObwoKF7I2eS6z5kdcj3kmyvA3hOOj0MApb6FzoNUx/uGZkMCcg5nbdR/eKqwmtayohITjIx+f2Sw3", - "7YH36oNgJx8xXfQUfXhd4DtSLMXG94q9F0PO7dIOO+B/GMXI7A6+NnpYXauxtHz/4l0bYUbHNPjciQ3F", - "y13BavNv4Li1ecEXU803s+1yjtpM1qwac9nfOQLZvxkCuZuTofo5D1EXipNUTUQDqXmenkDeBvCKKl3B", - "DHUBeTdQdzP1U6Wqw3fnRSvS3ZudD/0F8Aatw7fHR3s+nV6dRn9+QJ48vroi+skjeqmefE6GcvznPrkn", - "Z1MrT5O+9kjIQ4zNToSaVKvsZ9wRAMZ/+RAoDGjaIHul6JhjDMenQOLYRIPyhjQfvir03Sd7nd1Hjzu7", - "vV5nt7fJ9jwh0Yq5Tw4ON5+8t+c2LH0y7EdxH0dfkR7wYnMBgbBLMlMwyEP2IHAYoQQOSkrpw/pGCcr6", - "WdtfO1pbkMLaw7ObHJZt5D3sqewS139mT2xv7vcfLvX7a6VqNtO4Hpg5IzqzjfNeFzdJXCFEImMx/3cN", - "Q2N5Dqph7BGlQu00xbWlCt7yj1xc8irpLn9h7PdThnIG5ycnlWyXxFGmbGZsA8JFmi6Vg0hvJIa9NeF3", - "7WpKZ6PbOA9d9IQl/3rrp5/lrXl+fOO0boMtelnvatTkj53S2O23O0mN+0YzwI8Ow0xDURBhVO6QiSyG", - "32YpyilVQoLZM0zRIuLXGeeUj80INmZE5gmbgXTfr+58Soz65X1T+2l1j7NJpmNxyW0fNck0mE92yYYE", - "D4dWD+E0uQ8vhe3jVxoa97+Aq1xzwuPhrN58EYO13G7dwHMtJMZ2Mm+WfXhWmGJhzN54Wwr9n85D+GM+", - "e4S542C8z1t6aQVh4LkehIFjYRAGOWfMn45C+5ddfBAGfiElvZlbk1PPOux2ap/kFY8Lx3tUaWNpUSal", - "wbWlxtDCJNWzPMGcW8/OzczloBiw6fz1trcEvSe3kZR8uzIL+S9SNlD2UPkka31TTaZLt/4XTYw9PlrE", - "tm6/48tlq2h14RBZ6bY7xms8Ql5RluvqY82zPO02zhbPCW9QittYcjXBkuW43f28FnfdFm7JJvvCSqhE", - "WWkly2XjwtNX1i1TlRcs/0WWeUS6Po/lnCGkKNuFSuRw1njQS0ntsZNnkGOsYcF/GUzRnHdbjZpPyFUx", - "g8WzRMFC+ZejY57psQVgOx14nddl0FE+hF1GpwqvmyHw5gXduVbVhbGqwjsHQI2G5/3PCo+2zLYWlHM+", - "R7i6iNy4LowySfXszAQEp4ZDJBLlQebU0EYKS4T9ej65zeVeX9sCnZGok/McOUoawcHpsdWShHAyNiI7", - "PwFGRxjNIoY+FVcDEbYo9NXhcdudIeQ7d5vJodoyxLROCDfjB2EwRancvL3OXseW9ooUOUlp0A/2O7sd", - "s5M1bLAkdidFVYnPjRo7tJHsOLZr177uxHBWpYIrx5u9Xs+V4XDtPSuZV2J1/1TuaNhF13Wx189gWbgQ", - "NgwbXGbALdRhV5UlCZEzQ7v9FqIJRh/to67Fr2opQQZCHLsmX0nRZgdkFk3X8XeN0hza+OVfh8GD3u6t", - "cdiV1DVM+5aTTE+EpJ8xNpM+vEWxLp30mGuUnDBQKKcofYFU2QiD/ruq+b17f/2+LHfLrjmvUqEaZF26", - "GBI4x4BK/yri2a2R2HD15LrqhIzHva5p2t6trcArWAOTba5xmFcjuF0VUTMe7Tjt2oKgfyVxfkLwzTT6", - "Qe/BFjR6oaDvHlnSacaYvZ/gq1HmJURlf9r9YsD3tQtuDN1Wv2ptR/b73NpSIklidp3KrmBBRq9ftJFH", - "IjboxLHOJx/MUw8f3d6kqBioWFRYYtwiBHhfs7YHDdjezupI+akmG6iJk26uGOFStPAV8ncQdn7z7pe9", - "Z/4o5Je9Z+4w5Jf9g/kFvLtRlt62XHNeW/5T+dYq33P0wX7ONOua3Ln+OrRXtNoK4PMFVDeBfMUCf6K+", - "TVBfmV0rgV9Ry3aH0K96OXcj8Hd7Ai6UrYnb9lGeYP/BIN+Tu5/0UPARo5GGdq6Rbq9uU4Q2nBFm63bz", - "XL29DuvLciiHTOF9Mj2f+qKFxpX9b/cLjTfBhoVBrkQHueoeH4W+4srVSaUSR/SqOfDbFM9tY0S/jq2j", - "RD9vJVRvRacPkiEdZyJT5bIYW2CHan6preKA7xt+nYfnpQj2O9bS3jZDx9YB6k+9vyPovChQ57zdAcY6", - "8Jy32g549jUON0LP+Qp/oueN0HOJXavRc1Fvcpfwufp2lK3j51zfmhjuD/h+RAR9z1Ap4T7HPa+Sqvq4", - "jQHqvIZ1dez3unF8BLbQZ1ny8m5gqZ98+7g0v7twH3NItmDQvocnR4LzWLMcCn5v+tDbru/bPgS8zyr2", - "vHxXqBlsWUfUZWJchl2Lla0SSTK/VgGmNRAFZ3Zh7TPkGp5ODVWdAc8vFn1QIpMRfoBCUUELUMgw0nA5", - "odHEjGO/s+PbUswPJE0/FBfodvrw3F7mKXHXTd5SKClhEAmuBHMljR+mSfKhXy9YOD85sZ1sm4krTfjQ", - "h7xIobAxZVoN+IC/Rp1JriwVjCgNL4FRjgpaRuBSMIYxDGfwwfCzRN+OvaVkRnS3Y9hswE0PyjNUnkrK", - "x8Dx0g9IR/BhJBgTl7YQ5oO7sLTU6l8YKX0jyw+Xlxk7WrQAaRnnLn2hfYuIndeWXc8n9m84mU9VlPPs", - "9hrr7r7UE12Wp40sJSNtb3BQbfRDZNq9NaVpIY7zzUtZWmtWf8HLGJymL6gySdNN1dcv02rxNElW6DC0", - "JvMvlY5Fpv9T6RiluwvstXuZckOLRO6DJh/dzdXKfSlXaNvEKkdhM6sCdz8/r891n6ZJEoSBX09Dve0G", - "kUTjle6icSttx9aqT10csL4fM5KxHaF1dvZ052fM2BCWWJZVnb1nYEPk8IXetmSycfP22jX44ZFLXhH/", - "jdVw+0cRpVVQe1eHx8OZle38qsF9MhCv0HPKbLzzdDXaSP5sqY34Gwo/vI3M9eMHt5JISPsyBZVfnrs/", - "xVulHUfJ3Fv2XtP8vlCY73rPT052lhmNu4+/1GTkz+2wr6P84WOKvep1/6zF3d0lBQGrkoVd02iVPYj0", - "pzn4O38/g8e9DB42I1pQ0xpLEuEoY/aKa2zvdTfZhb+43P3i/jhel1ef/0zFd5NL8feL1k2TE3gvjNLT", - "FKO7kbh9mxTFFbB7Wt9s3z3uSbB7jPIJQXMUKP8Iy4+j3bd/GNz0YzYbHQVv1bby277fjW1tO/L5NeR1", - "jWV+3Bczd5qWU6LFAgYsvZVjaUmMf0HHVgpivGu5QTlMTsHPyoENimFKzModfNOFbwXEHnm45h04y9JU", - "SK1AXwpIRIzKHkH8fvbqJQxFPOtD0Y+Dey2FVzj/PgH/Anqzh6Kf0fQ9sUVmZnsyEjIpDZD3TCW2U5Fm", - "zL4uxVYaex67YEVAE9kZfwYiowmdYsPRVvknLO60qmfRkYdBkpPXNeTZt0hUB118uX+xlqo8qjTCiDLM", - "32dM+djy1vMrH6L0Zo0h5UTONn2txuLvdkyLsHoff7bjhFzRJEuK92M//xVa/tWv9lce7G9X0FGhU3gV", - "IcbKHljtfN1PfISFOBvu3W+13Cv3pksj/Dcs9YKW/+UJMCI2ET9Xci0EMCLHuPPDXKjwtja/T3F8tHCb", - "4h4WqU1z7ZvjjA3L0jbbYGyI+++iJK3YfG63IO38+8HEpZcB3cNbEdMCZi6rhPu+VLC3vZCw7Qq483uc", - "Q3mOOaQuVb/ZAcyITQrzQkSEQYxTZCK1b6NybYMwyCTz79bpd91vt0yE0vbNz8H1++v/DwAA///8wcL2", - "G3cAAA==", + "H4sIAAAAAAAC/+x9C3MTO7L/V+ma/26t81+/kgAL3rp1KycBjk8RSBHIubsn3CDPtG0dZqRB0jgxVL77", + "LT3mafkRIIYsqaIKx6ORulu/bnW3WvLnIORJyhkyJYPB50CGU0yI+XigFAmnZzzOEnyNHzOUSn+dCp6i", + "UBRNo4RnTF2kRE31XxHKUNBUUc6CQXBC1BQupygQZqYXkFOexRGMEMx7GAXtAK9IksYYDIJewlQvIooE", + "7UDNU/2VVIKySXDdDgSSiLN4bocZkyxWwWBMYontxrDHumsgEvQrHfNO0d+I8xgJC65Njx8zKjAKBn9U", + "2XhXNOajPzFUevCDGaExGcV4hDMa4qIYwkwIZOoiEnSGYlEUh/Z5PIcRz1gEth20WBbHQMfAOMOdmjDY", + "jEZUS0I30UMHAyUy9EgmMjRd0MgzA4dDsI9heAStKV7VB9n7x+hxsLxLRhJc7PTXLCGso4Wrycr7N22r", + "fb944OuZ8iTJLiaCZ+liz8NXx8dvwTwEliUjFNUeH+8V/VGmcIJCd5iG9IJEkUAp/fznD6u09fv9/oDs", + "Dfr9bt9H5QxZxMVSkdrHfpHu9iNc0eVGInX9L4j05dnwaHgAh1ykXBDz7sJIDWBXxVPlqwqb+qz48H8o", + "kCgH/qWmwM/aK/OBxDCJ+YjE8RwyRj9mNdx0YahVQEEq+IxGGLWBmAdAJZBM8c4EGQqiMIKx4AmoKUJl", + "bqGF3Um3Deea3Y6e3A7Z6/T7nf55UJ+d+EFnkmZBO0iJUig0gf/7B+l8Ouj8u9958q78eNHtvPv7X3wT", + "uSnggI8NnY7PVj4rbciJraKwSehqhK6Y5OXTN0zI5MazdzgEqt8DgWMUyDQnlv6Ihx9QdCnvxXQkiJj3", + "2ISyq0FMFEpV52Z127X8GdpWMMYmmvUbstbQOQO3VswvUYREIsSoASLbENEJVbINRJttIqcoQa8p/4SQ", + "MI1ZqYhQwAUgi+CSqikQ064ugWTeISntUEtq0A4ScvUC2USvm4/2F/CowdhyHzrv/n/+1c5/eyEpshg9", + "YHzNM0XZBMxjGHMBakollDRQhYl57y8Cx8Eg+H+90hnoOU+gl0s3i1GPlVA2tK/tFpQQIcjcP2s5catm", + "TyrCVtgVq0Ae/o7ylU2Cs5YSFAdi/BbD7/OTtz2tkimRUk0FzybT6qz8kduDdxVZLEi3zmQ7QDbT7UgU", + "UWvaTmrkehbTKtFP2YwKzhJkCmZEUA2+2uL0OXj56ujpxdOXZ8FASyLKQmfpT169fhMMgv1+v1+hq5Tn", + "lKs0ziYXkn7CmpsU7D//JWgSclDQDwkmXMyNxFwf0JrW1WPMRUIUxPQDwrnu7zzQJmz3edNw7ZmhFtd9", + "bUQ2si9rDAeJU8pwqeVo/yjafsnFh5iTqLP7jZWdodJ9L7L40j6AkLMxnWTWQXBqj0Cdmum1r4ZXZFoi", + "UQ0w1tOsd//7FNUURUXD8i71V3alM69DTmFFIjXXteqEL4CYz1DEZO4B8W7fg+LfBVVmRt17EFH5AfTL", + "ayCse7MYfthfBHHfj2IPUR6aftGIcjq1CSUFIbt7x+7j3qZ6NQvTTNZI2muS89J40todmVGhMhLD4cnb", + "msnxOtY2ZPOYXRsRVk2tm/8CD0RBqG27xp+iZhXYaKmxPZv4bdHw+lcXa1eWry5rwlefh194rGEmFU+A", + "RsgUHVMdrzWcUVp3W+szNuNxR0ezxgJsaKYsuYuefzK3XdlJWQbNi8losctTjUDKYEInZDRX9cVmt784", + "9X5B5/37RL0sKrbwwOhCcU+wl6NleKTlmLfdJOI1MfSF4hezMfX0XFiq0vumEsJGCO5Aq7vopCF1IXkb", + "LqdU2zYJuRCMCT07rjoR3XPWAU3cAI6KAYpuiy71IqKV3i6tLS4qRFAGmUQYzXeAwNlxF94U1P5NAiOK", + "zjBPE0yJhBEig4zpJQUjM75JflQJyKT29qhqvu4cdptR2DG+EnfPuvDrPMWEMLikcWxirYQoGppAbUQb", + "/FxOkbmJ0iNpA8AKre+esyqyXGqmafLbgbEMGF0Q5fFYcUKlEqXlkIokKbRePzvc399/0jTSew87/d3O", + "7sM3u/1BX//7d9AOrHHVvgNR2HHmZxtJE19fB3V74ULfqkU5fDs82nMrQn0c9ekBefL46oqoJ4/opXzy", + "KRmJyZ/7ZCtpFb95OipjdmhlEkUnN30aVb5IvRIQL4nEvzjAvlFGx36xevmx3L3RLW8jB9SwqybxYpq0", + "vyBL0zSCNb1abqPfODHU+dHfav+gRL5mh2WJptPlS0Ja6baU61MhuPDkQ3nkGecgTWMaGu3uyBRDOqYh", + "oO4B9AvQSoxlwcJTqot1RKIL4VZyr0orQmMPZiqRjh3MtYSWNstJFiuaxmifGZRu5KwYzo9MT74okTKG", + "4gJz8dygpwSl9AZLjRgm56VoYlaZCEfZZKJFUhXdMZVmcSjXNIpxNLCx11qomtksCfPBq8rDhmh4oaOv", + "TowzjKsgsBZFE5twgVDgxE5ajSvKZiSm0QVlaeaFxFJRPsuEcRFsp0BGPFPGHbATVh3EpM2MmzfWGucV", + "1oI4fkUS2y2RuiSkIipzYZdVL/5By7Mcjn9YOx2uE980DPMwuzEBiceKHR4fWRsdcqYIZSggQUXcBkwl", + "SWJydUE76GhMRQQTzoCPx/9cnTZZ4sUVCrLKDzisRg+35wPQiQsKml6I5PEMI0gIo2OUClzL6shySvYe", + "PhqQUbi7tx/h+MHDR91u1zcMMiXmKafMM9TT4tlmU9GzCZFO2WdXTr9uHm4hnbUJL5+Dk4M3vwaDoJdJ", + "0Yt5SOKeHFE2qPxd/Fk+MB/snyPKvGmwwuY2KDUmxlkEHXFYNdKO85jQuLEXmWZx7L4faE4YhgUguTE2", + "a6MUvwv1UkMzpp8wAm9yXZGJ9qUs4r4ui94OPmaY4UXKJbWjLzgy7omORkYZjSMwb1T3JZX9qh7b7i1l", + "v+JCmojRRpyLjmSRpdEj6zZuzIwpGtugqTbiw/1Hj//Rf7K7V1FuytSjB8FGpBRmt5GpMTy7p6XLkyKL", + "7AqqYWA/hZzNtFaYPwx92s5Y4NQMeP5sYTIuufhA2eQioh50/m4fQkQFhspkY9frUNAjaboeiv6AvrBp", + "BftrPEi3EeBZXb67Jf+S0Ks++qvJbx//R57848/djy/Ozv41e/7b0Uv6r7P45NVX5ZhX7wF9142cldk1", + "E2/UNnA2hccxUaHH8ZlyqZZIzT0BxSHRL3fhkDAY4eCcdeAFVShIPIDzgKS064TZDXlyHkALr0io7FvA", + "GeiuYIokQrGjXz6xeXb98uc8TXHd7COaM5LQEIQT8ohIHc4ykNko4gmhbOecnTPXF+SMSJO+0Z8iCEmq", + "MoF6RiDMRDyHkSAhFvvS5eBt+EzS9HrnnKkpUYBXSmgOUiJUsWGcj2Am2lFl00OuOUYwI3GGEkIjqHNW", + "rB+RJkF3ooiYoOoWKVnj7zdSNEuE4o3JuVC1LPPjftszj6Db6YmMqVTIoNh/oNKAF1r5HsHjfk39H/cf", + "r89EFhhaAT+D7sUqpRyUG+iHBbAZ2hrji6lS6fqyI2NvrI7Ar2/enGgx6P9PIe+olEUxxS3O4jkQHRej", + "tPk1FRufxG3L7AS+HJqd3Q0ZemMb69diuZ6Pp2ZgePPiFBSKhDJrv1uhFudYh+9oMz1UykxDkRI4ODx+", + "utPdoMzKyLagf8U8vik4bCTs822sxSSGeaPchNDybcPwqK3dKaehpaNlMqjPuIDYGphSrwfwVmJ9P8NM", + "lU322JmM52XJibXq58FO3mPatBQDeF34d6QgpShkKcGQd1nqpen2nP2ugWHTuwu9t+u0msS1i1+caTPJ", + "XKLA5U7MUrzcFKxWf4/Ejc5z1txlvJluV7cn9WB+aJRzf+seyP7NPJDbKQpY3OIn8kIyksopV8s3Pgjk", + "bQCvqFQ1n2Fxgpam6hcLCuoG35YKrNjp3Kw04HvmzX+8soSVhQRfWw3gXIzNigF80KramXzL7ov3/9sB", + "9WxXHEhJJwwjGJ6UBX5lQJp330i5P9nr7j563N3t97u7/U3C84SEK8Y+PjjcfPD+ng1YBmQ0CKMBjr8i", + "PeCmzS4IJL4kcwnn+ZJ9HlgfoeIcVEDplvWNEpSLZRZfVlXR3PhYVzdxkzqJjayHKchZYvpPTbHOze3+", + "w6V2f+2s6mAa1ztmVolOTeP8rYubJK4QQp7FEfubgpHWPOuqYeQ8SonKIsW2pRLesg+MX7I66zZ/ofX3", + "Y4ZiDmfHx7Vsl8BxJjfbk5eKp+nSeeDpjaZhb83yu5aaSlnMNkphmpawYl+/eeFLNTTPt28s6jYI0au4", + "W16RYboz4bctookGGhngeodRpqCohdOQO4x5FplKAjGj0tRiKjpD4xG/zhijbKJ7MGtGqJ/EcxD2+9Uv", + "nxANv/zd1Py1+o3TaaYifsnMO3KaKdB/GZI1C84dWt2FRfIAXnLzjqO0rc1/w6+yzQmLRvPF5k0frGWj", + "de2eKy4wMoM5tRzAs0IVC2V2ytuS6D5aC+G2+cwW5o51413e0s1W0A6c1IN2YEUYtINcMvqj5dB8MsQH", + "7cAR4t3ktfBcVt+T5GeHGtt7VCqtaa7kBSqNoYVJquZ5gjnXnp2bqctB0aFv//VbhwT9J98iKfl2ZRby", + "P6RirGqh8kHW2qaFOV0a+ntLMoZHTd/Wxjvu4FndW21sIkvVsdt43i3kFQfc7Ekz/SxPu02y5j7hDQ61", + "LathKzXHRvflqbZ1IdySINuWk1Q4q1CyfG7s8vSVJwCpzI/+faHInEe6Po9ljSGkKDoFJHJ3VlvQS0HN", + "tpMTkBWsFsF/aZ/Cn3db7TUfk6tiBOPPEgmNyl/LR5npMbW/O114nddl0HHehSGjW3ev/S7w5kcjc1Qt", + "Tsaqs5K5A+RVPGd/Vli0ZbrVAGc5Rnv1cUxtujDMBFXzU70gWBiOkAgUB5mFoVkpDBPm63Jwk8u9vjYF", + "OmNP7ehzZChoCAcnQ4OShDAy0VN2dgwxHWM4D2N0qbgFJ8KcB3h1OOzYPYQ8cjeZHKqMQPJyy4OToan0", + "EtKO2+/udc2pDp4iIykNBsF+d9fUsmkxGBZ7lZMwLjmqFdEsZcPILblHro0Wrkw5k7b9Xr9vK3GYcsaV", + "lMVYvT+l3R22C6wxtpusw67ud9FjXUhw5c6AMAWeqJGeM3PdDh70d29E3NpiKh8JbxnJ1JQL+gkjPejD", + "G0rkiwYdMoWCkRgkihkKV15UhXAw+KMO3j/eXb9rBzJLEiLmuej8cku59KCgelgzsDqGUv3Co/k349d3", + "HvS6rtDael0vgPDbzXOOvUWZuyLVUmQWYluY7V9IVCTZW644rdg8qFXCfi/QP+g/uP1BKwXURdkccLtl", + "YYl4cvtEHHI2jmmooJPT4s4IAoltTX4dIHfFHLx2VAPJ+RqbPafygKPuLl8qeiS/uWDlotG432A7q0fz", + "UoUbLCMFV5WS5fuVZB10jqgMtXNZRUsnJGnlGgdZ6mkVRZ9pdG19pRht5qiOoSPzfbHkpESQBBUKaWha", + "cmoXyisOqH7gIhEb5togsr6ctCsybPqS7xYQ+2Dp8YWMNdeGLRjFo4ZB/I6GsLF1UznDc5fQ/LaYxfzM", + "wnXbb+Geo/qxoNnfnheUH4r4njC/K4h6jipXkUJs2gpOi2L+ZfBy5f63ONFuBA/jpzr6tFptCbVbBiVb", + "9lUIpxh+sAyZbYPVYeTQNtmGH2DPLNxg9Xfk3y/3GwSOpaxWBYtDt490e7Fi7fKZjULFvW9GgQOYR8im", + "xGOUF4HbzSwi5yzc+R4x4392VNg8R3WHNOkki2NzmNsdAihPblTtae+z9g828JNzbVvpi7x9/aKDLOQR", + "Rq4UablDkhdqf1tv2U6YZeUeJpvEV0ZUOTCWO6NfMf9256C8e+uve89cBdpf957ZGrS/7h+UV3DdDlj6", + "2zLN2/Ze7zD4tPNK60IzpsmWU6/z9opWW3H43LmVm7h8BYH3Xt8mXl9VXCsdv+II0S26fvXr+ba8T1CA", + "zSdt8yiva/rJXL7tpp4cIu0WqanMqOXiXYmUuYDOnYawF+TcJdVzFQe0QFzV/m6YQy0VcqV3kEN3eNR2", + "B13s8ZRU4JhebS+jmtOxdS/Rjbv9dOpBMqKTjGeyehrBnGtCWd4lUjPAd81/LZfnpR7sD4zS/jaXjq07", + "qPe4vyXXuTmh1njbbZF1znPeajvOc7lVs7n3nFN47z1v5D1XxLXaey7K/G/Tfa7fj7x1/znHm0/grq7y", + "Z/Sg75hXSpjLcVc2e2s2bmMHtTw6uHrtL6/33PpGfzH49v3S/Mj4XcwhmXNa5ubr3BMs15rlruCPhof+", + "dm3f9l3Auwyx59UrGvzOljFEvZhPqm5X80ChQJKUp9lBtwYi4dQQ1jlFpuDpTHPVPWf5fQ7vJc9EiO+h", + "ACooDhJjDJW79jfm5lpbafo3J+DekzR9X9xbsjOA56a8syJdO3hLoqAkhpAzyWN7kuz9LEneDxbrxM+O", + "j81Lps3UVoS/HxRX8RY6JnWrc3bOXqPKBJOGi5hIBS8hpgwltPSECx7HGMFoDu+1PCv87ZjLIXSP9lKC", + "eH7O9BuUZSgdl5RNgOGl65CO4f2YxzG/NOcP3tt7IpZq/Qs9S99J89vLT3daXhQHYQRn79pAc3mjGdec", + "di0HdhdLlkMVpyh2+97jTp8XE11Gpl6RkrEyB+ep0vjgmbKXVfoIsZL3k7L0iM/ivZoTsEhvQJmk6abw", + "dWQaFM+SZAWGoTUtv5Qq4pn6u1QRCnsFk0P3MnBDi4T2D0U+2AuDatdU2PONPlFZDv2iCuy1aPmxSPvX", + "LEmCduDo8Rxz3GAlUXileqjNSseKtW5Tmx0uxmN6ZsyL0Do9fbpzv2Zs6JYYkdWNvROgZ+Vw52vNSTVv", + "8PbaNvjpPZf8IPJ3huH2tyIqVFBzRQKLRnN3L35xu86dOhNgJrLkzKx3ji+vjuTPluqIOxj+0+tIiY+f", + "XEtCLswddjK/s+TuFG9VIo6KurfMdRLlNQ3tPOo9Oz7eWaY09hq0pSoj7sNhV0f5068p5oaNu6ct9sok", + "UjCwKlnY041W6QNP79XBXbVyv3jcycXDZEQLbloTQUIcZ7G5WSgy12n59MLdF9X7bD8M1+XVy9/Z/mFy", + "Ke5ah3XD5AzeCaV0PEXofmls6zrJi5s37mh9s/m1P8eCiTGqOwT+VaD6K/I/D7q//Waw79f4N9oK3qpu", + "Fb/i96Po1rZXPkdDXtdYlcddUXOLtJwTxRs+YOUyxKUlMe5exK0UxDjTcoNymJyD+8qBDYphKsLKDbzv", + "ni0JxGx52OZdOM3SlAslQV1ySHiE0mxB/Hb66iWMeDQfQPEeA3sboAOcu8bN/e6XjqHoJ9TvHpsiMx2e", + "jLlIKh3kb6YCOylPs9jcUmkqjZ2M7WJFQBHRnXwCIsIpnaFna6v6o7G3WtXTNOTtIMnZ62n2zOV99U6b", + "v6lW0FKfjzqPMKYx5j8jY360c1rcxZZ3UbnQcEQZEfNNbzNs/lLurFhW7+IP5R6TK5pkSfGzRM9/gZb7", + "xQ3z43rmJwPpuMAUXoWIkTQbVjtf96O67WI6PdedbbXcK7emS1f471jqVd6ppKfY/OKoA7niHGIiJrjz", + "0xyocLpWnqcYHjVOU9zBIrVZjr7Sz9iwLG2zAGNDv/82StKK4HO7BWlnP45PXLl25g6eipgVbuaySrgf", + "C4L97S0J266AO7vDOZTnmLvUleo304Hu0QeYFzwkMUQ4w5in5hJg2zZoB5mI3ZWmg579ycwpl8r84E5w", + "/e76/wIAAP//cHRGgNyPAAA=", } // GetSwagger returns the content of the embedded swagger specification file diff --git a/lib/paths/paths.go b/lib/paths/paths.go index 65e66a9..ce06aeb 100644 --- a/lib/paths/paths.go +++ b/lib/paths/paths.go @@ -1,5 +1,4 @@ // Package paths provides centralized path construction for hypeman data directory. - package paths import "path/filepath" @@ -196,6 +195,23 @@ func (p *Paths) GuestsDir() string { return filepath.Join(p.dataDir, "guests") } +// Device path methods + +// DevicesDir returns the root devices directory. +func (p *Paths) DevicesDir() string { + return filepath.Join(p.dataDir, "devices") +} + +// DeviceDir returns the directory for a device. +func (p *Paths) DeviceDir(id string) string { + return filepath.Join(p.DevicesDir(), id) +} + +// DeviceMetadata returns the path to device metadata.json. +func (p *Paths) DeviceMetadata(id string) string { + return filepath.Join(p.DeviceDir(id), "metadata.json") +} + // Volume path methods // VolumesDir returns the root volumes directory. diff --git a/lib/providers/providers.go b/lib/providers/providers.go index 6252306..ecbeb70 100644 --- a/lib/providers/providers.go +++ b/lib/providers/providers.go @@ -8,6 +8,7 @@ import ( "github.com/c2h5oh/datasize" "github.com/onkernel/hypeman/cmd/api/config" + "github.com/onkernel/hypeman/lib/devices" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/ingress" "github.com/onkernel/hypeman/lib/instances" @@ -70,8 +71,13 @@ func ProvideNetworkManager(p *paths.Paths, cfg *config.Config) network.Manager { return network.NewManager(p, cfg, meter) } +// ProvideDeviceManager provides the device manager +func ProvideDeviceManager(p *paths.Paths) devices.Manager { + return devices.NewManager(p) +} + // ProvideInstanceManager provides the instance manager -func ProvideInstanceManager(p *paths.Paths, cfg *config.Config, imageManager images.Manager, systemManager system.Manager, networkManager network.Manager, volumeManager volumes.Manager) (instances.Manager, error) { +func ProvideInstanceManager(p *paths.Paths, cfg *config.Config, imageManager images.Manager, systemManager system.Manager, networkManager network.Manager, deviceManager devices.Manager, volumeManager volumes.Manager) (instances.Manager, error) { // Parse max overlay size from config var maxOverlaySize datasize.ByteSize if err := maxOverlaySize.UnmarshalText([]byte(cfg.MaxOverlaySize)); err != nil { @@ -108,7 +114,7 @@ func ProvideInstanceManager(p *paths.Paths, cfg *config.Config, imageManager ima meter := otel.GetMeterProvider().Meter("hypeman") tracer := otel.GetTracerProvider().Tracer("hypeman") - return instances.NewManager(p, imageManager, systemManager, networkManager, volumeManager, limits, meter, tracer), nil + return instances.NewManager(p, imageManager, systemManager, networkManager, deviceManager, volumeManager, limits, meter, tracer), nil } // ProvideVolumeManager provides the volume manager diff --git a/lib/system/init_script.go b/lib/system/init_script.go index 2c6a5e9..ebe9f8b 100644 --- a/lib/system/init_script.go +++ b/lib/system/init_script.go @@ -7,8 +7,12 @@ package system // 1. Mounts essential filesystems (proc, sys, dev) // 2. Sets up overlay filesystem (lowerdir=rootfs, upperdir=overlay disk) // 3. Mounts and sources config disk (/dev/vdc) -// 4. Configures networking (if enabled) -// 5. Executes container entrypoint +// 4. Loads NVIDIA kernel modules (if HAS_GPU=1 in config.sh) +// 5. Configures networking (if enabled) +// 6. Executes container entrypoint +// +// GPU support: When HAS_GPU=1 is set in the instance's config.sh, the init script +// will load NVIDIA kernel modules before launching the container entrypoint. func GenerateInitScript() string { return `#!/bin/sh set -xe @@ -71,6 +75,95 @@ else exit 1 fi +# Load NVIDIA kernel modules for GPU passthrough (if HAS_GPU=1) +if [ "${HAS_GPU:-0}" = "1" ]; then + echo "overlay-init: loading NVIDIA kernel modules for GPU passthrough" + if [ -d /lib/modules ]; then + # Find the kernel version directory + KVER=$(ls /lib/modules/ 2>/dev/null | head -1) + if [ -n "$KVER" ] && [ -d "/lib/modules/$KVER/kernel/drivers/gpu" ]; then + # Load modules in order (dependencies first) + insmod /lib/modules/$KVER/kernel/drivers/gpu/nvidia.ko 2>&1 || echo "overlay-init: nvidia.ko load failed" + insmod /lib/modules/$KVER/kernel/drivers/gpu/nvidia-uvm.ko 2>&1 || echo "overlay-init: nvidia-uvm.ko load failed" + insmod /lib/modules/$KVER/kernel/drivers/gpu/nvidia-modeset.ko 2>&1 || echo "overlay-init: nvidia-modeset.ko load failed" + insmod /lib/modules/$KVER/kernel/drivers/gpu/nvidia-drm.ko modeset=1 2>&1 || echo "overlay-init: nvidia-drm.ko load failed" + echo "overlay-init: NVIDIA modules loaded for kernel $KVER" + + # Use nvidia-modprobe to create device nodes with correct major/minor numbers. + # nvidia-modprobe is the official NVIDIA utility that: + # 1. Loads kernel modules if needed (already done above) + # 2. Creates /dev/nvidiactl and /dev/nvidia0 with correct permissions + # 3. Creates /dev/nvidia-uvm and /dev/nvidia-uvm-tools + if [ -x /usr/bin/nvidia-modprobe ]; then + echo "overlay-init: running nvidia-modprobe to create device nodes" + /usr/bin/nvidia-modprobe 2>&1 || echo "overlay-init: nvidia-modprobe failed" + /usr/bin/nvidia-modprobe -u -c=0 2>&1 || echo "overlay-init: nvidia-modprobe -u failed" + echo "overlay-init: nvidia-modprobe completed" + ls -la /dev/nvidia* 2>/dev/null || true + else + echo "overlay-init: nvidia-modprobe not found, falling back to manual mknod" + # Fallback: Manual device node creation + NVIDIA_MAJOR=$(awk '/nvidia-frontend|^[0-9]+ nvidia$/ {print $1}' /proc/devices 2>/dev/null | head -1) + NVIDIA_UVM_MAJOR=$(awk '/nvidia-uvm/ {print $1}' /proc/devices 2>/dev/null) + + if [ -n "$NVIDIA_MAJOR" ]; then + mknod -m 666 /dev/nvidiactl c $NVIDIA_MAJOR 255 + mknod -m 666 /dev/nvidia0 c $NVIDIA_MAJOR 0 + echo "overlay-init: created /dev/nvidiactl and /dev/nvidia0 (major $NVIDIA_MAJOR)" + fi + + if [ -n "$NVIDIA_UVM_MAJOR" ]; then + mknod -m 666 /dev/nvidia-uvm c $NVIDIA_UVM_MAJOR 0 + mknod -m 666 /dev/nvidia-uvm-tools c $NVIDIA_UVM_MAJOR 1 + echo "overlay-init: created /dev/nvidia-uvm* (major $NVIDIA_UVM_MAJOR)" + fi + fi + else + echo "overlay-init: NVIDIA modules not found in /lib/modules/$KVER" + fi + else + echo "overlay-init: /lib/modules not found, skipping NVIDIA module loading" + fi + + # Inject NVIDIA userspace driver libraries into container rootfs + # This allows containers to use standard CUDA images without bundled drivers + # See lib/devices/GPU.md for documentation + if [ -d /usr/lib/nvidia ]; then + echo "overlay-init: injecting NVIDIA driver libraries into container" + + DRIVER_VERSION=$(cat /usr/lib/nvidia/version 2>/dev/null || echo "unknown") + LIB_DST="/overlay/newroot/usr/lib/x86_64-linux-gnu" + BIN_DST="/overlay/newroot/usr/bin" + + mkdir -p "$LIB_DST" "$BIN_DST" + + # Copy all driver libraries and create symlinks + for lib in /usr/lib/nvidia/*.so.*; do + if [ -f "$lib" ]; then + libname=$(basename "$lib") + cp "$lib" "$LIB_DST/" + + # Create standard symlinks: libfoo.so.VERSION -> libfoo.so.1 -> libfoo.so + base=$(echo "$libname" | sed 's/\.so\..*//') + ln -sf "$libname" "$LIB_DST/${base}.so.1" 2>/dev/null || true + ln -sf "${base}.so.1" "$LIB_DST/${base}.so" 2>/dev/null || true + fi + done + + # Copy nvidia-smi and nvidia-modprobe binaries + for bin in nvidia-smi nvidia-modprobe; do + if [ -x /usr/bin/$bin ]; then + cp /usr/bin/$bin "$BIN_DST/" + fi + done + + # Update ldconfig cache so applications can find the libraries + chroot /overlay/newroot ldconfig 2>/dev/null || true + + echo "overlay-init: NVIDIA driver libraries injected (version: $DRIVER_VERSION)" + fi +fi + # Mount attached volumes (from config: VOLUME_MOUNTS="device:path:mode[:overlay_device] ...") # Modes: ro (read-only), rw (read-write), overlay (base ro + per-instance overlay) if [ -n "${VOLUME_MOUNTS:-}" ]; then diff --git a/lib/system/initrd.go b/lib/system/initrd.go index c409ec7..09f286c 100644 --- a/lib/system/initrd.go +++ b/lib/system/initrd.go @@ -1,16 +1,21 @@ package system import ( + "archive/tar" + "compress/gzip" "context" "crypto/sha256" "encoding/hex" "fmt" + "io" + "net/http" "os" "path/filepath" "strconv" "time" "github.com/onkernel/hypeman/lib/images" + "github.com/onkernel/hypeman/lib/logger" ) const alpineBaseImage = "alpine:3.22" @@ -49,12 +54,19 @@ func (m *manager) buildInitrd(ctx context.Context, arch string) (string, error) if err := os.MkdirAll(binDir, 0755); err != nil { return "", fmt.Errorf("create bin dir: %w", err) } - + agentPath := filepath.Join(binDir, "exec-agent") if err := os.WriteFile(agentPath, ExecAgentBinary, 0755); err != nil { return "", fmt.Errorf("write exec-agent: %w", err) } + // Add NVIDIA kernel modules (for GPU passthrough support) + if err := m.addNvidiaModules(ctx, rootfsDir, arch); err != nil { + // Log but don't fail - NVIDIA modules are optional (not available on all architectures) + log := logger.FromContext(ctx) + log.InfoContext(ctx, "skipping NVIDIA modules", "error", err) + } + // Write generated init script initScript := GenerateInitScript() initPath := filepath.Join(rootfsDir, "init") @@ -64,13 +76,13 @@ func (m *manager) buildInitrd(ctx context.Context, arch string) (string, error) // Generate timestamp for this build timestamp := strconv.FormatInt(time.Now().Unix(), 10) - + // Package as cpio.gz outputPath := m.paths.SystemInitrdTimestamp(timestamp, arch) if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil { return "", fmt.Errorf("create output dir: %w", err) } - + if _, err := images.ExportRootfs(rootfsDir, outputPath, images.FormatCpio); err != nil { return "", fmt.Errorf("export initrd: %w", err) } @@ -135,10 +147,167 @@ func (m *manager) isInitrdStale(initrdPath string) bool { return string(storedHash) != currentHash } -// computeInitrdHash computes a hash of the embedded binary and init script +// computeInitrdHash computes a hash of the embedded binary, init script, and NVIDIA assets func computeInitrdHash() string { h := sha256.New() h.Write(ExecAgentBinary) h.Write([]byte(GenerateInitScript())) + // Include NVIDIA driver version in hash so initrd is rebuilt when driver changes + if ver, ok := NvidiaDriverVersion[DefaultKernelVersion]; ok { + h.Write([]byte(ver)) + } + // Include driver libs URL so initrd is rebuilt when the libs tarball changes + if archURLs, ok := NvidiaDriverLibURLs[DefaultKernelVersion]; ok { + if url, ok := archURLs["x86_64"]; ok { + h.Write([]byte(url)) + } + } return hex.EncodeToString(h.Sum(nil))[:16] } + +// addNvidiaModules downloads and extracts NVIDIA kernel modules into the rootfs +func (m *manager) addNvidiaModules(ctx context.Context, rootfsDir, arch string) error { + // Check if NVIDIA modules are available for this architecture + archURLs, ok := NvidiaModuleURLs[DefaultKernelVersion] + if !ok { + return fmt.Errorf("no NVIDIA modules for kernel version %s", DefaultKernelVersion) + } + url, ok := archURLs[arch] + if !ok { + return fmt.Errorf("no NVIDIA modules for architecture %s", arch) + } + + // Download the tarball + client := &http.Client{ + CheckRedirect: func(req *http.Request, via []*http.Request) error { + return nil // Follow redirects + }, + } + + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) + if err != nil { + return fmt.Errorf("create request: %w", err) + } + + resp, err := client.Do(req) + if err != nil { + return fmt.Errorf("download nvidia modules: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("download failed with status %d", resp.StatusCode) + } + + // Extract tarball directly into rootfs + if err := extractTarGz(resp.Body, rootfsDir); err != nil { + return fmt.Errorf("extract nvidia modules: %w", err) + } + + // Add userspace driver libraries (libcuda.so, libnvidia-ml.so, nvidia-smi, etc.) + // These are injected into containers at boot time - see lib/devices/GPU.md + if err := m.addNvidiaDriverLibs(ctx, rootfsDir, arch); err != nil { + log := logger.FromContext(ctx) + log.WarnContext(ctx, "could not add nvidia driver libs", "error", err) + // Don't fail - kernel modules can still work, but containers won't have driver libs + } + + return nil +} + +// addNvidiaDriverLibs downloads and extracts NVIDIA userspace driver libraries +// These libraries (libcuda.so, libnvidia-ml.so, nvidia-smi, etc.) are injected +// into containers at boot time, eliminating the need for containers to bundle +// matching driver versions. See lib/devices/GPU.md for documentation. +func (m *manager) addNvidiaDriverLibs(ctx context.Context, rootfsDir, arch string) error { + archURLs, ok := NvidiaDriverLibURLs[DefaultKernelVersion] + if !ok { + return fmt.Errorf("no NVIDIA driver libs for kernel version %s", DefaultKernelVersion) + } + url, ok := archURLs[arch] + if !ok { + return fmt.Errorf("no NVIDIA driver libs for architecture %s", arch) + } + + client := &http.Client{ + CheckRedirect: func(req *http.Request, via []*http.Request) error { + return nil // Follow redirects + }, + } + + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) + if err != nil { + return fmt.Errorf("create request: %w", err) + } + + resp, err := client.Do(req) + if err != nil { + return fmt.Errorf("download nvidia driver libs: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("download failed with status %d", resp.StatusCode) + } + + // Extract tarball directly into rootfs + if err := extractTarGz(resp.Body, rootfsDir); err != nil { + return fmt.Errorf("extract nvidia driver libs: %w", err) + } + + log := logger.FromContext(ctx) + log.InfoContext(ctx, "added NVIDIA driver libraries", "url", url) + return nil +} + +// extractTarGz extracts a gzipped tarball into the destination directory +func extractTarGz(r io.Reader, destDir string) error { + gzr, err := gzip.NewReader(r) + if err != nil { + return fmt.Errorf("create gzip reader: %w", err) + } + defer gzr.Close() + + tr := tar.NewReader(gzr) + for { + header, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + return fmt.Errorf("read tar: %w", err) + } + + // Calculate destination path + destPath := filepath.Join(destDir, header.Name) + + switch header.Typeflag { + case tar.TypeDir: + if err := os.MkdirAll(destPath, os.FileMode(header.Mode)); err != nil { + return fmt.Errorf("create directory %s: %w", destPath, err) + } + case tar.TypeReg: + // Ensure parent directory exists + if err := os.MkdirAll(filepath.Dir(destPath), 0755); err != nil { + return fmt.Errorf("create parent dir: %w", err) + } + + outFile, err := os.Create(destPath) + if err != nil { + return fmt.Errorf("create file %s: %w", destPath, err) + } + + if _, err := io.Copy(outFile, tr); err != nil { + outFile.Close() + return fmt.Errorf("write file %s: %w", destPath, err) + } + outFile.Close() + + if err := os.Chmod(destPath, os.FileMode(header.Mode)); err != nil { + return fmt.Errorf("chmod %s: %w", destPath, err) + } + } + } + + return nil +} diff --git a/lib/system/versions.go b/lib/system/versions.go index 1aca99e..aaca2bf 100644 --- a/lib/system/versions.go +++ b/lib/system/versions.go @@ -6,19 +6,21 @@ import "runtime" type KernelVersion string const ( - // Kernel versions from Kernel linux build + // Kernel versions from onkernel/linux releases Kernel_202511182 KernelVersion = "ch-6.12.8-kernel-1-202511182" Kernel_20251211 KernelVersion = "ch-6.12.8-kernel-1.1-20251211" + Kernel_20251213 KernelVersion = "ch-6.12.8-kernel-1.2-20251213" // NVIDIA module + driver lib support + networking configs ) var ( // DefaultKernelVersion is the kernel version used for new instances - DefaultKernelVersion = Kernel_20251211 + DefaultKernelVersion = Kernel_20251213 // SupportedKernelVersions lists all supported kernel versions SupportedKernelVersions = []KernelVersion{ Kernel_202511182, Kernel_20251211, + Kernel_20251213, // Add future versions here } ) @@ -33,9 +35,39 @@ var KernelDownloadURLs = map[KernelVersion]map[string]string{ "x86_64": "https://github.com/onkernel/linux/releases/download/ch-6.12.8-kernel-1.1-20251211/vmlinux-x86_64", "aarch64": "https://github.com/onkernel/linux/releases/download/ch-6.12.8-kernel-1.1-20251211/Image-arm64", }, + Kernel_20251213: { + "x86_64": "https://github.com/onkernel/linux/releases/download/ch-6.12.8-kernel-1.2-20251213/vmlinux-x86_64", + "aarch64": "https://github.com/onkernel/linux/releases/download/ch-6.12.8-kernel-1.2-20251213/Image-arm64", + }, // Add future versions here } +// NvidiaModuleURLs maps kernel versions and architectures to NVIDIA module tarball URLs +// These tarballs contain pre-built NVIDIA kernel modules that match the kernel version +var NvidiaModuleURLs = map[KernelVersion]map[string]string{ + Kernel_20251213: { + "x86_64": "https://github.com/onkernel/linux/releases/download/ch-6.12.8-kernel-1.2-20251213/nvidia-modules-x86_64.tar.gz", + // Note: NVIDIA open-gpu-kernel-modules does not support arm64 yet + }, + // Kernel_202511182 and Kernel_20251211 do not have NVIDIA modules (pre-module-support kernels) +} + +// NvidiaDriverLibURLs maps kernel versions and architectures to driver library tarball URLs +// These tarballs contain userspace NVIDIA libraries (libcuda.so, libnvidia-ml.so, etc.) +// that match the kernel modules and are injected into containers at boot time. +// See lib/devices/GPU.md for documentation on driver injection. +var NvidiaDriverLibURLs = map[KernelVersion]map[string]string{ + Kernel_20251213: { + "x86_64": "https://github.com/onkernel/linux/releases/download/ch-6.12.8-kernel-1.2-20251213/nvidia-driver-libs-x86_64.tar.gz", + }, +} + +// NvidiaDriverVersion tracks the NVIDIA driver version bundled with each kernel +var NvidiaDriverVersion = map[KernelVersion]string{ + Kernel_20251213: "570.86.16", + // Kernel_202511182 and Kernel_20251211 do not have NVIDIA modules +} + // GetArch returns the architecture string for the current platform func GetArch() string { arch := runtime.GOARCH diff --git a/openapi.yaml b/openapi.yaml index 19e39ee..a096083 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -149,6 +149,12 @@ components: description: Whether to attach instance to the default network default: true example: true + devices: + type: array + items: + type: string + description: Device IDs or names to attach for GPU/PCI passthrough + example: ["l4-gpu"] volumes: type: array description: Volumes to attach to the instance at creation time @@ -502,6 +508,107 @@ components: description: Creation timestamp (RFC3339) example: "2025-01-15T10:00:00Z" + DeviceType: + type: string + enum: [gpu, pci] + description: Type of PCI device + + CreateDeviceRequest: + type: object + required: [pci_address] + properties: + name: + type: string + description: Optional globally unique device name. If not provided, a name is auto-generated from the PCI address (e.g., "pci-0000-a2-00-0") + pattern: ^[a-zA-Z0-9][a-zA-Z0-9_.-]+$ + example: l4-gpu + pci_address: + type: string + description: PCI address of the device (required, e.g., "0000:a2:00.0") + example: "0000:a2:00.0" + + Device: + type: object + required: [id, type, pci_address, vendor_id, device_id, iommu_group, bound_to_vfio, created_at] + properties: + id: + type: string + description: Auto-generated unique identifier (CUID2 format) + example: tz4a98xxat96iws9zmbrgj3a + name: + type: string + description: Device name (user-provided or auto-generated from PCI address) + example: l4-gpu + type: + $ref: "#/components/schemas/DeviceType" + pci_address: + type: string + description: PCI address + example: "0000:a2:00.0" + vendor_id: + type: string + description: PCI vendor ID (hex) + example: "10de" + device_id: + type: string + description: PCI device ID (hex) + example: "27b8" + iommu_group: + type: integer + description: IOMMU group number + example: 82 + bound_to_vfio: + type: boolean + description: | + Whether the device is currently bound to the vfio-pci driver, which is required for VM passthrough. + - true: Device is bound to vfio-pci and ready for (or currently in use by) a VM. The device's native driver has been unloaded. + - false: Device is using its native driver (e.g., nvidia) or no driver. Hypeman will automatically bind to vfio-pci when attaching to an instance. + example: false + attached_to: + type: string + description: Instance ID if attached + nullable: true + example: null + created_at: + type: string + format: date-time + description: Registration timestamp (RFC3339) + example: "2025-01-15T10:00:00Z" + + AvailableDevice: + type: object + required: [pci_address, vendor_id, device_id, iommu_group] + properties: + pci_address: + type: string + description: PCI address + example: "0000:a2:00.0" + vendor_id: + type: string + description: PCI vendor ID (hex) + example: "10de" + device_id: + type: string + description: PCI device ID (hex) + example: "27b8" + vendor_name: + type: string + description: Human-readable vendor name + example: "NVIDIA Corporation" + device_name: + type: string + description: Human-readable device name + example: "L4" + iommu_group: + type: integer + description: IOMMU group number + example: 82 + current_driver: + type: string + description: Currently bound driver (null if none) + nullable: true + example: "nvidia" + paths: /health: get: @@ -1246,6 +1353,176 @@ paths: schema: $ref: "#/components/schemas/Error" + /devices: + get: + summary: List registered devices + operationId: listDevices + security: + - bearerAuth: [] + responses: + 200: + description: List of registered devices + content: + application/json: + schema: + type: array + items: + $ref: "#/components/schemas/Device" + 401: + description: Unauthorized + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + 500: + description: Internal server error + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + post: + summary: Register a device for passthrough + operationId: createDevice + security: + - bearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/CreateDeviceRequest" + responses: + 201: + description: Device registered + content: + application/json: + schema: + $ref: "#/components/schemas/Device" + 400: + description: Bad request (invalid name or PCI address) + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + 401: + description: Unauthorized + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + 404: + description: PCI device not found on host + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + 409: + description: Conflict - device or name already registered + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + 500: + description: Internal server error + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + + /devices/available: + get: + summary: Discover passthrough-capable devices on host + operationId: listAvailableDevices + security: + - bearerAuth: [] + responses: + 200: + description: List of available PCI devices + content: + application/json: + schema: + type: array + items: + $ref: "#/components/schemas/AvailableDevice" + 401: + description: Unauthorized + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + 500: + description: Internal server error + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + + /devices/{id}: + get: + summary: Get device details + operationId: getDevice + security: + - bearerAuth: [] + parameters: + - name: id + in: path + required: true + schema: + type: string + description: Device ID or name + responses: + 200: + description: Device details + content: + application/json: + schema: + $ref: "#/components/schemas/Device" + 404: + description: Device not found + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + 500: + description: Internal server error + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + delete: + summary: Unregister device + operationId: deleteDevice + security: + - bearerAuth: [] + parameters: + - name: id + in: path + required: true + schema: + type: string + description: Device ID or name + responses: + 204: + description: Device unregistered + 404: + description: Device not found + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + 409: + description: Conflict - device is attached to an instance + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + 500: + description: Internal server error + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + /ingresses: get: summary: List ingresses @@ -1388,3 +1665,4 @@ paths: schema: $ref: "#/components/schemas/Error" + diff --git a/stainless.yaml b/stainless.yaml index 6aac3c9..4e27148 100644 --- a/stainless.yaml +++ b/stainless.yaml @@ -103,6 +103,18 @@ resources: get: get /volumes/{id} delete: delete /volumes/{id} + devices: + models: + device: '#/components/schemas/Device' + available_device: '#/components/schemas/AvailableDevice' + device_type: '#/components/schemas/DeviceType' + methods: + list: get /devices + create: post /devices + retrieve: get /devices/{id} + delete: delete /devices/{id} + list_available: get /devices/available + ingresses: models: ingress: '#/components/schemas/Ingress'