diff --git a/Makefile b/Makefile
index 07b2bf0..58e6715 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 SHELL := /bin/bash
-.PHONY: oapi-generate generate-vmm-client generate-wire generate-all dev build test install-tools gen-jwt download-ch-binaries download-ch-spec ensure-ch-binaries build-caddy-binaries build-caddy ensure-caddy-binaries release-prep clean
+.PHONY: oapi-generate generate-vmm-client generate-wire generate-all dev build test install-tools gen-jwt download-ch-binaries download-ch-spec ensure-ch-binaries build-caddy-binaries build-caddy ensure-caddy-binaries build-preview-cli release-prep clean
 
 # Directory where local binaries will be installed
 BIN_DIR ?= $(CURDIR)/bin
diff --git a/cmd/api/api/api.go b/cmd/api/api/api.go
index 5fd5033..f511cbf 100644
--- a/cmd/api/api/api.go
+++ b/cmd/api/api/api.go
@@ -2,6 +2,7 @@ package api
 
 import (
 	"github.com/onkernel/hypeman/cmd/api/config"
+	"github.com/onkernel/hypeman/lib/devices"
 	"github.com/onkernel/hypeman/lib/images"
 	"github.com/onkernel/hypeman/lib/ingress"
 	"github.com/onkernel/hypeman/lib/instances"
@@ -17,6 +18,7 @@ type ApiService struct {
 	InstanceManager instances.Manager
 	VolumeManager   volumes.Manager
 	NetworkManager  network.Manager
+	DeviceManager   devices.Manager
 	IngressManager  ingress.Manager
 }
 
@@ -29,6 +31,7 @@ func New(
 	instanceManager instances.Manager,
 	volumeManager volumes.Manager,
 	networkManager network.Manager,
+	deviceManager devices.Manager,
 	ingressManager ingress.Manager,
 ) *ApiService {
 	return &ApiService{
@@ -37,6 +40,7 @@ func New(
 		InstanceManager: instanceManager,
 		VolumeManager:   volumeManager,
 		NetworkManager:  networkManager,
+		DeviceManager:   deviceManager,
 		IngressManager:  ingressManager,
 	}
 }
diff --git a/cmd/api/api/api_test.go b/cmd/api/api/api_test.go
index 8aaa406..c5984fd 100644
--- a/cmd/api/api/api_test.go
+++ b/cmd/api/api/api_test.go
@@ -9,6 +9,7 @@ import (
 	"time"
 
 	"github.com/onkernel/hypeman/cmd/api/config"
+	"github.com/onkernel/hypeman/lib/devices"
 	"github.com/onkernel/hypeman/lib/images"
 	"github.com/onkernel/hypeman/lib/instances"
 	mw "github.com/onkernel/hypeman/lib/middleware"
@@ -34,11 +35,12 @@ func newTestService(t *testing.T) *ApiService {
 
 	systemMgr := system.NewManager(p)
 	networkMgr := network.NewManager(p, cfg, nil)
+	deviceMgr := devices.NewManager(p)
 	volumeMgr := volumes.NewManager(p, 0, nil) // 0 = unlimited storage
 	limits := instances.ResourceLimits{
 		MaxOverlaySize: 100 * 1024 * 1024 * 1024, // 100GB
 	}
-	instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, volumeMgr, limits, nil, nil)
+	instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, nil, nil)
 
 	// Register cleanup for orphaned Cloud Hypervisor processes
 	t.Cleanup(func() {
@@ -50,6 +52,7 @@ func newTestService(t *testing.T) *ApiService {
 		ImageManager:    imageMgr,
 		InstanceManager: instanceMgr,
 		VolumeManager:   volumeMgr,
+		DeviceManager:   deviceMgr,
 	}
 }
 
diff --git a/cmd/api/api/devices.go b/cmd/api/api/devices.go
new file mode 100644
index 0000000..d7d2dd2
--- /dev/null
+++ b/cmd/api/api/devices.go
@@ -0,0 +1,167 @@
+package api
+
+import (
+	"context"
+	"errors"
+
+	"github.com/onkernel/hypeman/lib/devices"
+	"github.com/onkernel/hypeman/lib/oapi"
+)
+
+// ListDevices returns all registered devices
+func (s *ApiService) ListDevices(ctx context.Context, request oapi.ListDevicesRequestObject) (oapi.ListDevicesResponseObject, error) {
+	deviceList, err := s.DeviceManager.ListDevices(ctx)
+	if err != nil {
+		return oapi.ListDevices500JSONResponse{
+			Code:    "internal_error",
+			Message: err.Error(),
+		}, nil
+	}
+
+	result := make([]oapi.Device, len(deviceList))
+	for i, d := range deviceList {
+		result[i] = deviceToOAPI(d)
+	}
+
+	return oapi.ListDevices200JSONResponse(result), nil
+}
+
+// ListAvailableDevices discovers passthrough-capable devices on the host
+func (s *ApiService) ListAvailableDevices(ctx context.Context, request oapi.ListAvailableDevicesRequestObject) (oapi.ListAvailableDevicesResponseObject, error) {
+	available, err := s.DeviceManager.ListAvailableDevices(ctx)
+	if err != nil {
+		return oapi.ListAvailableDevices500JSONResponse{
+			Code:    "internal_error",
+			Message: err.Error(),
+		}, nil
+	}
+
+	result := make([]oapi.AvailableDevice, len(available))
+	for i, d := range available {
+		result[i] = availableDeviceToOAPI(d)
+	}
+
+	return oapi.ListAvailableDevices200JSONResponse(result), nil
+}
+
+// CreateDevice registers a new device for passthrough
+func (s *ApiService) CreateDevice(ctx context.Context, request oapi.CreateDeviceRequestObject) (oapi.CreateDeviceResponseObject, error) {
+	var name string
+	if request.Body.Name != nil {
+		name = *request.Body.Name
+	}
+	req := devices.CreateDeviceRequest{
+		Name:       name,
+		PCIAddress: request.Body.PciAddress,
+	}
+
+	device, err := s.DeviceManager.CreateDevice(ctx, req)
+	if err != nil {
+		switch {
+		case errors.Is(err, devices.ErrInvalidName):
+			return oapi.CreateDevice400JSONResponse{
+				Code:    "invalid_name",
+				Message: err.Error(),
+			}, nil
+		case errors.Is(err, devices.ErrInvalidPCIAddress):
+			return oapi.CreateDevice400JSONResponse{
+				Code:    "invalid_pci_address",
+				Message: err.Error(),
+			}, nil
+		case errors.Is(err, devices.ErrDeviceNotFound):
+			return oapi.CreateDevice404JSONResponse{
+				Code:    "device_not_found",
+				Message: err.Error(),
+			}, nil
+		case errors.Is(err, devices.ErrAlreadyExists), errors.Is(err, devices.ErrNameExists):
+			return oapi.CreateDevice409JSONResponse{
+				Code:    "conflict",
+				Message: err.Error(),
+			}, nil
+		default:
+			return oapi.CreateDevice500JSONResponse{
+				Code:    "internal_error",
+				Message: err.Error(),
+			}, nil
+		}
+	}
+
+	return oapi.CreateDevice201JSONResponse(deviceToOAPI(*device)), nil
+}
+
+// GetDevice returns a device by ID or name
+func (s *ApiService) GetDevice(ctx context.Context, request oapi.GetDeviceRequestObject) (oapi.GetDeviceResponseObject, error) {
+	device, err := s.DeviceManager.GetDevice(ctx, request.Id)
+	if err != nil {
+		if errors.Is(err, devices.ErrNotFound) {
+			return oapi.GetDevice404JSONResponse{
+				Code:    "not_found",
+				Message: "device not found",
+			}, nil
+		}
+		return oapi.GetDevice500JSONResponse{
+			Code:    "internal_error",
+			Message: err.Error(),
+		}, nil
+	}
+
+	return oapi.GetDevice200JSONResponse(deviceToOAPI(*device)), nil
+}
+
+// DeleteDevice unregisters a device
+func (s *ApiService) DeleteDevice(ctx context.Context, request oapi.DeleteDeviceRequestObject) (oapi.DeleteDeviceResponseObject, error) {
+	err := s.DeviceManager.DeleteDevice(ctx, request.Id)
+	if err != nil {
+		switch {
+		case errors.Is(err, devices.ErrNotFound):
+			return oapi.DeleteDevice404JSONResponse{
+				Code:    "not_found",
+				Message: "device not found",
+			}, nil
+		case errors.Is(err, devices.ErrInUse):
+			return oapi.DeleteDevice409JSONResponse{
+				Code:    "in_use",
+				Message: "device is attached to an instance",
+			}, nil
+		default:
+			return oapi.DeleteDevice500JSONResponse{
+				Code:    "internal_error",
+				Message: err.Error(),
+			}, nil
+		}
+	}
+
+	return oapi.DeleteDevice204Response{}, nil
+}
+
+// Helper functions
+
+func deviceToOAPI(d devices.Device) oapi.Device {
+	deviceType := oapi.DeviceType(d.Type)
+	return oapi.Device{
+		Id:          d.Id,
+		Name:        &d.Name,
+		Type:        deviceType,
+		PciAddress:  d.PCIAddress,
+		VendorId:    d.VendorID,
+		DeviceId:    d.DeviceID,
+		IommuGroup:  d.IOMMUGroup,
+		BoundToVfio: d.BoundToVFIO,
+		AttachedTo:  d.AttachedTo,
+		CreatedAt:   d.CreatedAt,
+	}
+}
+
+func availableDeviceToOAPI(d devices.AvailableDevice) oapi.AvailableDevice {
+	return oapi.AvailableDevice{
+		PciAddress:    d.PCIAddress,
+		VendorId:      d.VendorID,
+		DeviceId:      d.DeviceID,
+		VendorName:    &d.VendorName,
+		DeviceName:    &d.DeviceName,
+		IommuGroup:    d.IOMMUGroup,
+		CurrentDriver: d.CurrentDriver,
+	}
+}
+
+
diff --git a/cmd/api/api/instances.go b/cmd/api/api/instances.go
index 8adb8c8..acbd37c 100644
--- a/cmd/api/api/instances.go
+++ b/cmd/api/api/instances.go
@@ -96,6 +96,12 @@ func (s *ApiService) CreateInstance(ctx context.Context, request oapi.CreateInst
 		networkEnabled = *request.Body.Network.Enabled
 	}
 
+	// Parse devices (GPU passthrough)
+	var deviceRefs []string
+	if request.Body.Devices != nil {
+		deviceRefs = *request.Body.Devices
+	}
+
 	// Parse volumes
 	var volumes []instances.VolumeAttachment
 	if request.Body.Volumes != nil {
@@ -139,6 +145,7 @@ func (s *ApiService) CreateInstance(ctx context.Context, request oapi.CreateInst
 		Vcpus:          vcpus,
 		Env:            env,
 		NetworkEnabled: networkEnabled,
+		Devices:        deviceRefs,
 		Volumes:        volumes,
 	}
 
diff --git a/cmd/api/main.go b/cmd/api/main.go
index e2cb704..48c9e31 100644
--- a/cmd/api/main.go
+++ b/cmd/api/main.go
@@ -172,6 +172,18 @@ func run() error {
 	}
 	logger.Info("Network manager initialized")
 
+	// Reconcile device state (clears orphaned attachments from crashed VMs)
+	// Set up liveness checker so device reconciliation can accurately detect orphaned attachments
+	logger.Info("Reconciling device state...")
+	livenessChecker := instances.NewLivenessChecker(app.InstanceManager)
+	if livenessChecker != nil {
+		app.DeviceManager.SetLivenessChecker(livenessChecker)
+	}
+	if err := app.DeviceManager.ReconcileDevices(app.Ctx); err != nil {
+		logger.Error("failed to reconcile device state", "error", err)
+		return fmt.Errorf("reconcile device state: %w", err)
+	}
+
 	// Initialize ingress manager (starts Caddy daemon and DNS server for dynamic upstreams)
 	logger.Info("Initializing ingress manager...")
 	if err := app.IngressManager.Initialize(app.Ctx); err != nil {
diff --git a/cmd/api/wire.go b/cmd/api/wire.go
index 21f9ddf..dfa2fc1 100644
--- a/cmd/api/wire.go
+++ b/cmd/api/wire.go
@@ -9,6 +9,7 @@ import (
 	"github.com/google/wire"
 	"github.com/onkernel/hypeman/cmd/api/api"
 	"github.com/onkernel/hypeman/cmd/api/config"
+	"github.com/onkernel/hypeman/lib/devices"
 	"github.com/onkernel/hypeman/lib/images"
 	"github.com/onkernel/hypeman/lib/ingress"
 	"github.com/onkernel/hypeman/lib/instances"
@@ -27,6 +28,7 @@ type application struct {
 	ImageManager    images.Manager
 	SystemManager   system.Manager
 	NetworkManager  network.Manager
+	DeviceManager   devices.Manager
 	InstanceManager instances.Manager
 	VolumeManager   volumes.Manager
 	IngressManager  ingress.Manager
@@ -44,6 +46,7 @@ func initializeApp() (*application, func(), error) {
 		providers.ProvideImageManager,
 		providers.ProvideSystemManager,
 		providers.ProvideNetworkManager,
+		providers.ProvideDeviceManager,
 		providers.ProvideInstanceManager,
 		providers.ProvideVolumeManager,
 		providers.ProvideIngressManager,
diff --git a/cmd/api/wire_gen.go b/cmd/api/wire_gen.go
index 5a94276..6b3e81a 100644
--- a/cmd/api/wire_gen.go
+++ b/cmd/api/wire_gen.go
@@ -8,8 +8,11 @@ package main
 
 import (
 	"context"
+	"log/slog"
+
 	"github.com/onkernel/hypeman/cmd/api/api"
 	"github.com/onkernel/hypeman/cmd/api/config"
+	"github.com/onkernel/hypeman/lib/devices"
 	"github.com/onkernel/hypeman/lib/images"
 	"github.com/onkernel/hypeman/lib/ingress"
 	"github.com/onkernel/hypeman/lib/instances"
@@ -18,10 +21,7 @@ import (
 	"github.com/onkernel/hypeman/lib/registry"
 	"github.com/onkernel/hypeman/lib/system"
 	"github.com/onkernel/hypeman/lib/volumes"
-	"log/slog"
-)
 
-import (
 	_ "embed"
 )
 
@@ -39,11 +39,12 @@ func initializeApp() (*application, func(), error) {
 	}
 	systemManager := providers.ProvideSystemManager(paths)
 	networkManager := providers.ProvideNetworkManager(paths, config)
+	devicesManager := providers.ProvideDeviceManager(paths)
 	volumesManager, err := providers.ProvideVolumeManager(paths, config)
 	if err != nil {
 		return nil, nil, err
 	}
-	instancesManager, err := providers.ProvideInstanceManager(paths, config, manager, systemManager, networkManager, volumesManager)
+	instancesManager, err := providers.ProvideInstanceManager(paths, config, manager, systemManager, networkManager, devicesManager, volumesManager)
 	if err != nil {
 		return nil, nil, err
 	}
@@ -55,7 +56,7 @@ func initializeApp() (*application, func(), error) {
 	if err != nil {
 		return nil, nil, err
 	}
-	apiService := api.New(config, manager, instancesManager, volumesManager, networkManager, ingressManager)
+	apiService := api.New(config, manager, instancesManager, volumesManager, networkManager, devicesManager, ingressManager)
 	mainApplication := &application{
 		Ctx:             context,
 		Logger:          logger,
@@ -63,6 +64,7 @@ func initializeApp() (*application, func(), error) {
 		ImageManager:    manager,
 		SystemManager:   systemManager,
 		NetworkManager:  networkManager,
+		DeviceManager:   devicesManager,
 		InstanceManager: instancesManager,
 		VolumeManager:   volumesManager,
 		IngressManager:  ingressManager,
@@ -83,6 +85,7 @@ type application struct {
 	ImageManager    images.Manager
 	SystemManager   system.Manager
 	NetworkManager  network.Manager
+	DeviceManager   devices.Manager
 	InstanceManager instances.Manager
 	VolumeManager   volumes.Manager
 	IngressManager  ingress.Manager
diff --git a/go.mod b/go.mod
index da8f315..0359d7b 100644
--- a/go.mod
+++ b/go.mod
@@ -41,7 +41,6 @@ require (
 	go.opentelemetry.io/otel/trace v1.38.0
 	golang.org/x/sync v0.17.0
 	golang.org/x/sys v0.38.0
-	golang.org/x/term v0.37.0
 	google.golang.org/grpc v1.77.0
 	google.golang.org/protobuf v1.36.10
 	gvisor.dev/gvisor v0.0.0-20251125014920-fc40e232ff54
@@ -49,15 +48,22 @@ require (
 
 require (
 	github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6 // indirect
+	github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect
+	github.com/Microsoft/go-winio v0.6.2 // indirect
 	github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
 	github.com/apex/log v1.9.0 // indirect
 	github.com/blang/semver/v4 v4.0.0 // indirect
 	github.com/cenkalti/backoff/v5 v5.0.3 // indirect
+	github.com/containerd/errdefs v1.0.0 // indirect
+	github.com/containerd/errdefs/pkg v0.3.0 // indirect
 	github.com/containerd/stargz-snapshotter/estargz v0.16.3 // indirect
 	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
 	github.com/docker/cli v28.2.2+incompatible // indirect
 	github.com/docker/distribution v2.8.3+incompatible // indirect
+	github.com/docker/docker v28.2.2+incompatible // indirect
 	github.com/docker/docker-credential-helpers v0.9.3 // indirect
+	github.com/docker/go-connections v0.5.0 // indirect
+	github.com/docker/go-units v0.5.0 // indirect
 	github.com/dustin/go-humanize v1.0.1 // indirect
 	github.com/felixge/httpsnoop v1.0.4 // indirect
 	github.com/go-logr/logr v1.4.3 // indirect
@@ -65,6 +71,7 @@ require (
 	github.com/go-openapi/jsonpointer v0.21.0 // indirect
 	github.com/go-openapi/swag v0.23.0 // indirect
 	github.com/go-test/deep v1.1.1 // indirect
+	github.com/gogo/protobuf v1.3.2 // indirect
 	github.com/google/uuid v1.6.0 // indirect
 	github.com/gorilla/mux v1.8.1 // indirect
 	github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 // indirect
@@ -74,6 +81,8 @@ require (
 	github.com/mailru/easyjson v0.7.7 // indirect
 	github.com/mdlayher/socket v0.5.1 // indirect
 	github.com/mitchellh/go-homedir v1.1.0 // indirect
+	github.com/moby/docker-image-spec v1.3.1 // indirect
+	github.com/moby/sys/sequential v0.6.0 // indirect
 	github.com/moby/sys/user v0.4.0 // indirect
 	github.com/moby/sys/userns v0.1.0 // indirect
 	github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 // indirect
@@ -92,6 +101,7 @@ require (
 	github.com/vishvananda/netns v0.0.5 // indirect
 	github.com/woodsbury/decimal128 v1.3.0 // indirect
 	go.opentelemetry.io/auto/sdk v1.2.1 // indirect
+	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect
 	go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 // indirect
 	go.opentelemetry.io/otel/log v0.14.0 // indirect
 	go.opentelemetry.io/proto/otlp v1.7.1 // indirect
diff --git a/go.sum b/go.sum
index 0ee9efd..6772c9e 100644
--- a/go.sum
+++ b/go.sum
@@ -1,5 +1,9 @@
 github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6 h1:He8afgbRMd7mFxO99hRNu+6tazq8nFF9lIwo9JFroBk=
 github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8=
+github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEKWjV8V+WSxDXJ4NFATAsZjh8iIbsQIg=
+github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
+github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
+github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
 github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk=
 github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7Dml6nw9rQ=
 github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP2+08jFMw88y4klk=
@@ -15,8 +19,17 @@ github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2y
 github.com/bmatcuk/doublestar v1.1.1/go.mod h1:UD6OnuiIn0yFxxA2le/rnRU1G4RaI4UvFv1sNto9p6w=
 github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500 h1:6lhrsTEnloDPXyeZBvSYvQf8u86jbKehZPVDDlkgDl4=
 github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500/go.mod h1:S/7n9copUssQ56c7aAgHqftWO4LTf4xY6CGWt8Bc+3M=
+github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4=
+github.com/cenkalti/backoff/v4 v4.1.3 h1:cFAlzYUlVYDysBEH2T5hyJZMh3+5+WCBvSnK6Q8UtC4=
+github.com/cenkalti/backoff/v4 v4.1.3/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw=
 github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
 github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
+github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
+github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
+github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE=
+github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk=
+github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
+github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
 github.com/containerd/stargz-snapshotter/estargz v0.16.3 h1:7evrXtoh1mSbGj/pfRccTampEyKpjpOnS3CyiV1Ebr8=
 github.com/containerd/stargz-snapshotter/estargz v0.16.3/go.mod h1:uyr4BfYfOj3G9WBVE8cOlQmXAbPN9VEQpBBeJIuOipU=
 github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s=
@@ -33,8 +46,14 @@ github.com/docker/cli v28.2.2+incompatible h1:qzx5BNUDFqlvyq4AHzdNB7gSyVTmU4cgsy
 github.com/docker/cli v28.2.2+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=
 github.com/docker/distribution v2.8.3+incompatible h1:AtKxIZ36LoNK51+Z6RpzLpddBirtxJnzDrHLEKxTAYk=
 github.com/docker/distribution v2.8.3+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
+github.com/docker/docker v28.2.2+incompatible h1:CjwRSksz8Yo4+RmQ339Dp/D2tGO5JxwYeqtMOEe0LDw=
+github.com/docker/docker v28.2.2+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
 github.com/docker/docker-credential-helpers v0.9.3 h1:gAm/VtF9wgqJMoxzT3Gj5p4AqIjCBS4wrsOh9yRqcz8=
 github.com/docker/docker-credential-helpers v0.9.3/go.mod h1:x+4Gbw9aGmChi3qTLZj8Dfn0TD20M/fuWy0E5+WDeCo=
+github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c=
+github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
+github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
+github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
 github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
 github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
 github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
@@ -59,6 +78,8 @@ github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+Gr
 github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
 github.com/go-test/deep v1.1.1 h1:0r/53hagsehfO4bzD2Pgr/+RgHqhmf+k1Bpse2cTu1U=
 github.com/go-test/deep v1.1.1/go.mod h1:5C2ZWiW0ErCdrYzpqxLbTX7MG14M9iiw8DgHncVwcsE=
+github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
+github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
 github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo=
 github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
 github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
@@ -90,6 +111,8 @@ github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8Hm
 github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
 github.com/jpillora/backoff v0.0.0-20180909062703-3050d21c67d7/go.mod h1:2iMrUgbbvHEiQClaW2NsSzMyGHqN+rDFqY705q49KG0=
 github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE=
+github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
+github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
 github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
 github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
 github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU=
@@ -117,12 +140,22 @@ github.com/miekg/dns v1.1.68 h1:jsSRkNozw7G/mnmXULynzMNIsgY2dHC8LO6U6Ij2JEA=
 github.com/miekg/dns v1.1.68/go.mod h1:fujopn7TB3Pu3JM69XaawiU0wqjpL9/8xGop5UrTPps=
 github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
 github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
+github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
+github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
+github.com/moby/sys/atomicwriter v0.1.0 h1:kw5D/EqkBwsBFi0ss9v1VG3wIkVhzGvLklJ+w3A14Sw=
+github.com/moby/sys/atomicwriter v0.1.0/go.mod h1:Ul8oqv2ZMNHOceF643P6FKPXeCmYtlQMvpizfsSoaWs=
+github.com/moby/sys/sequential v0.6.0 h1:qrx7XFUd/5DxtqcoH1h438hF5TmOvzC/lspjy7zgvCU=
+github.com/moby/sys/sequential v0.6.0/go.mod h1:uyv8EUTrca5PnDsdMGXhZe6CCe8U/UiTWd+lL+7b/Ko=
 github.com/moby/sys/user v0.4.0 h1:jhcMKit7SA80hivmFJcbB1vqmw//wU61Zdui2eQXuMs=
 github.com/moby/sys/user v0.4.0/go.mod h1:bG+tYYYJgaMtRKgEmuueC0hJEAZWwtIbZTB+85uoHjs=
 github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g=
 github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28=
+github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA=
+github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
 github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 h1:RWengNIwukTxcDr9M+97sNutRR1RKhG96O6jWumTTnw=
 github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826/go.mod h1:TaXosZuwdSHYgviHp1DAtfrULt5eUgsSMsZf+YrPgl8=
+github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
+github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
 github.com/nrednav/cuid2 v1.1.0 h1:Y2P9Fo1Iz7lKuwcn+fS0mbxkNvEqoNLUtm0+moHCnYc=
 github.com/nrednav/cuid2 v1.1.0/go.mod h1:jBjkJAI+QLM4EUGvtwGDHC1cP1QQrRNfLo/A7qJFDhA=
 github.com/oapi-codegen/nethttp-middleware v1.1.2 h1:TQwEU3WM6ifc7ObBEtiJgbRPaCe513tvJpiMJjypVPA=
@@ -198,10 +231,14 @@ github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zd
 github.com/vishvananda/netns v0.0.5/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
 github.com/woodsbury/decimal128 v1.3.0 h1:8pffMNWIlC0O5vbyHWFZAt5yWvWcrHA+3ovIIjVWss0=
 github.com/woodsbury/decimal128 v1.3.0/go.mod h1:C5UTmyTjW3JftjUFzOVhC20BEQa2a4ZKOB5I6Zjb+ds=
+github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
 go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
 go.opentelemetry.io/contrib/bridges/otelslog v0.13.0 h1:bwnLpizECbPr1RrQ27waeY2SPIPeccCx/xLuoYADZ9s=
 go.opentelemetry.io/contrib/bridges/otelslog v0.13.0/go.mod h1:3nWlOiiqA9UtUnrcNk82mYasNxD8ehOspL0gOfEo6Y4=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6hwnZ41NSFTO5q4LYDtJRXBf2PD0rNBkeB/lus=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0/go.mod h1:UHB22Z8QsdRDrnAtX4PntOl36ajSxcdUMt1sF7Y6E7Q=
 go.opentelemetry.io/contrib/instrumentation/runtime v0.63.0 h1:PeBoRj6af6xMI7qCupwFvTbbnd49V7n5YpG6pg8iDYQ=
 go.opentelemetry.io/contrib/instrumentation/runtime v0.63.0/go.mod h1:ingqBCtMCe8I4vpz/UVzCW6sxoqgZB37nao91mLQ3Bw=
 go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8=
@@ -214,6 +251,8 @@ go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZF
 go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0/go.mod h1:ri3aaHSmCTVYu2AWv44YMauwAQc0aqI9gHKIcSbI1pU=
 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 h1:lwI4Dc5leUqENgGuQImwLo4WnuXFPetmPpkLi2IrX54=
 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0/go.mod h1:Kz/oCE7z5wuyhPxsXDuaPteSWqjSBD5YaSdbxZYGbGk=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.33.0 h1:wpMfgF8E1rkrT1Z6meFh1NDtownE9Ii3n3X2GJYjsaU=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.33.0/go.mod h1:wAy0T/dUbs468uOlkT31xjvqQgEVXv58BRFWEgn5v/0=
 go.opentelemetry.io/otel/log v0.14.0 h1:2rzJ+pOAZ8qmZ3DDHg73NEKzSZkhkGIua9gXtxNGgrM=
 go.opentelemetry.io/otel/log v0.14.0/go.mod h1:5jRG92fEAgx0SU/vFPxmJvhIuDU9E1SUnEQrMlJpOno=
 go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA=
@@ -234,37 +273,55 @@ go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
 go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20190426145343-a29dc8fdc734/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04=
 golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0=
+golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.28.0 h1:gQBtGhjxykdjY9YhZpSlZIsbnaE2+PgjfLWUQTnoZ1U=
 golang.org/x/mod v0.28.0/go.mod h1:yfB/L0NOf/kmEbXjzCPOx1iK1fRutOydrCMsqRhEBxI=
 golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 golang.org/x/net v0.46.1-0.20251013234738-63d1a5100f82 h1:6/3JGEh1C88g7m+qzzTbl3A0FtsLguXieqofVLU/JAo=
 golang.org/x/net v0.46.1-0.20251013234738-63d1a5100f82/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
 golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
 golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
 golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
-golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU=
-golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k=
 golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM=
+golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE=
+golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
+golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE=
 golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
 gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
 google.golang.org/genproto/googleapis/api v0.0.0-20251022142026-3a174f9686a8 h1:mepRgnBZa07I4TRuomDE4sTIYieg/osKmzIf4USdWS4=
diff --git a/lib/devices/GPU.md b/lib/devices/GPU.md
new file mode 100644
index 0000000..55c7367
--- /dev/null
+++ b/lib/devices/GPU.md
@@ -0,0 +1,177 @@
+# GPU Passthrough Support
+
+This document covers NVIDIA GPU passthrough specifics. For general device passthrough, see [README.md](README.md).
+
+## How GPU Passthrough Works
+
+hypeman supports NVIDIA GPU passthrough via VFIO, with automatic driver injection:
+
+```
+┌─────────────────────────────────────────────────────────────────────┐
+│  hypeman Initrd (built at startup)                                  │
+│  ┌──────────────────────────────────────────────────────────────┐   │
+│  │  /lib/modules/<kver>/kernel/drivers/gpu/                     │   │
+│  │    ├── nvidia.ko                                             │   │
+│  │    ├── nvidia-uvm.ko                                         │   │
+│  │    ├── nvidia-modeset.ko                                     │   │
+│  │    └── nvidia-drm.ko                                         │   │
+│  ├──────────────────────────────────────────────────────────────┤   │
+│  │  /usr/lib/nvidia/                                            │   │
+│  │    ├── libcuda.so.570.86.16                                  │   │
+│  │    ├── libnvidia-ml.so.570.86.16                             │   │
+│  │    ├── libnvidia-ptxjitcompiler.so.570.86.16                 │   │
+│  │    └── ... (other driver libraries)                          │   │
+│  └──────────────────────────────────────────────────────────────┘   │
+└─────────────────────────────────────────────────────────────────────┘
+                              │
+                              ▼ (at VM boot, if HAS_GPU=1)
+┌─────────────────────────────────────────────────────────────────────┐
+│  Guest VM                                                           │
+│  1. Load kernel modules (modprobe nvidia, etc.)                     │
+│  2. Create device nodes (/dev/nvidia0, /dev/nvidiactl, etc.)        │
+│  3. Copy driver libs to container rootfs                            │
+│  4. Run ldconfig to update library cache                            │
+│  5. Container can now use GPU!                                      │
+└─────────────────────────────────────────────────────────────────────┘
+```
+
+## Container Image Requirements
+
+With driver injection, containers **do not need** to bundle NVIDIA driver libraries.
+
+**Minimal CUDA image example:**
+
+```dockerfile
+FROM nvidia/cuda:12.4-runtime-ubuntu22.04
+# Your application - no driver installation needed!
+RUN pip install torch
+CMD ["python", "train.py"]
+```
+
+hypeman injects the following at boot:
+
+- `libcuda.so` - CUDA driver API
+- `libnvidia-ml.so` - NVML (nvidia-smi, monitoring)
+- `libnvidia-ptxjitcompiler.so` - PTX JIT compilation
+- `libnvidia-nvvm.so` - NVVM compiler
+- `libnvidia-gpucomp.so` - GPU compute library
+- `nvidia-smi` binary
+- `nvidia-modprobe` binary
+
+## Driver Version Compatibility
+
+The driver libraries injected by hypeman are pinned to a specific version that matches the kernel modules. This version is tracked in:
+
+- **Kernel release:** `onkernel/linux` GitHub releases (e.g., `ch-6.12.8-kernel-2-20251211`)
+- **hypeman config:** `lib/system/versions.go` - `NvidiaDriverVersion` map
+
+### Current Driver Version
+
+| Kernel Version | Driver Version | Release Date |
+|---------------|----------------|--------------|
+| ch-6.12.8-kernel-2-20251211 | 570.86.16 | 2025-12-11 |
+
+### CUDA Compatibility
+
+Driver 570.86.16 supports CUDA 12.4 and earlier. Check [NVIDIA's compatibility matrix](https://docs.nvidia.com/deploy/cuda-compatibility/) for details.
+
+## Upgrading the Driver
+
+To upgrade the NVIDIA driver version:
+
+1. **Choose a new version** from [NVIDIA's Linux drivers](https://www.nvidia.com/Download/index.aspx)
+
+2. **Update onkernel/linux:**
+   - Edit `.github/workflows/release.yaml`
+   - Change `DRIVER_VERSION=` in all locations (search for the current version)
+   - The workflow file contains comments explaining what to update
+   - Create a new release tag (e.g., `ch-6.12.8-kernel-2-YYYYMMDD`)
+
+3. **Update hypeman:**
+   - Edit `lib/system/versions.go`
+   - Add new `KernelVersion` constant
+   - Update `DefaultKernelVersion`
+   - Update `NvidiaDriverVersion` map entry
+   - Update `NvidiaModuleURLs` with new release URL
+   - Update `NvidiaDriverLibURLs` with new release URL
+
+4. **Test thoroughly** before deploying:
+   - Run GPU passthrough E2E tests
+   - Verify with real CUDA workloads (e.g., ollama inference)
+
+## Supported GPUs
+
+All NVIDIA datacenter GPUs supported by the open-gpu-kernel-modules are supported:
+
+- NVIDIA H100, H200
+- NVIDIA L4, L40, L40S
+- NVIDIA A100, A10, A30
+- NVIDIA T4
+- And other Turing/Ampere/Hopper/Ada Lovelace architecture GPUs
+
+Consumer GPUs (GeForce) are **not** supported by the open kernel modules.
+
+## Troubleshooting
+
+### nvidia-smi shows wrong driver version
+
+The driver version shown by nvidia-smi should match hypeman's configured version. If it differs, the container may have its own driver libraries that are taking precedence. Either:
+
+- Use a minimal CUDA runtime image without driver libs
+- Or ensure the container's driver version matches
+
+### CUDA initialization failed
+
+Check that:
+
+1. Kernel modules are loaded: `cat /proc/modules | grep nvidia`
+2. Device nodes exist: `ls -la /dev/nvidia*`
+3. Libraries are in LD_LIBRARY_PATH: `ldconfig -p | grep nvidia`
+
+### Driver/library version mismatch
+
+Error like `NVML_ERROR_LIB_RM_VERSION_MISMATCH` means the userspace library version doesn't match the kernel module version. This shouldn't happen with hypeman's automatic injection, but can occur if the container has its own driver libraries.
+
+**Solution:** Use a base image that doesn't include driver libraries, or ensure any bundled libraries match the hypeman driver version.
+
+### GPU not detected in container
+
+1. Verify the GPU was attached to the instance:
+   ```bash
+   hypeman instance get <id> | jq .devices
+   ```
+
+2. Check the VM console log for module loading errors:
+   ```bash
+   cat /var/lib/hypeman/instances/<id>/console.log | grep -i nvidia
+   ```
+
+3. Verify VFIO binding on the host:
+   ```bash
+   ls -la /sys/bus/pci/devices/<pci-addr>/driver
+   ```
+
+## Performance Tuning
+
+### Huge Pages
+
+For best GPU performance, enable huge pages on the host:
+
+```bash
+echo 1024 > /proc/sys/vm/nr_hugepages
+```
+
+### IOMMU Configuration
+
+Ensure IOMMU is properly configured:
+
+```bash
+# Intel
+intel_iommu=on iommu=pt
+
+# AMD
+amd_iommu=on iommu=pt
+```
+
+The `iommu=pt` (passthrough) option improves performance for devices not using VFIO.
+
diff --git a/lib/devices/README.md b/lib/devices/README.md
new file mode 100644
index 0000000..0e34e66
--- /dev/null
+++ b/lib/devices/README.md
@@ -0,0 +1,451 @@
+# Device Passthrough
+
+This package provides GPU and PCI device passthrough for virtual machines using the Linux VFIO (Virtual Function I/O) framework.
+
+## Overview
+
+Device passthrough allows a VM to have direct, near-native access to physical hardware (GPUs, network cards, etc.) by bypassing the host's device drivers and giving the guest exclusive control. For a deep dive into the VFIO framework, see the [kernel documentation](https://docs.kernel.org/driver-api/vfio.html).
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│  Host                                                       │
+│  ┌─────────────┐     ┌─────────────────────────────────┐    │
+│  │  hypeman    │     │  VFIO Driver                    │    │
+│  │  (VMM)      │────▶│  /dev/vfio/<group>              │    │
+│  └─────────────┘     └─────────────────────────────────┘    │
+│                              │                              │
+│  ┌───────────────────────────┼──────────────────────────┐   │
+│  │  IOMMU (hardware)         ▼                          │   │
+│  │  - Translates guest physical → host physical         │   │
+│  │  - Isolates DMA (device can only access VM memory)   │   │
+│  └──────────────────────────────────────────────────────┘   │
+│                              │                              │
+│                              ▼                              │
+│                    ┌──────────────┐                         │
+│                    │  GPU (PCIe)  │                         │
+│                    └──────────────┘                         │
+└─────────────────────────────────────────────────────────────┘
+```
+
+## Package Structure
+
+```
+lib/devices/
+├── types.go        # Device, AvailableDevice, CreateDeviceRequest
+├── errors.go       # Error definitions
+├── discovery.go    # PCI device discovery from sysfs
+├── vfio.go         # VFIO bind/unbind operations
+├── manager.go      # Manager interface and implementation
+├── manager_test.go # Unit tests
+├── gpu_e2e_test.go # End-to-end GPU passthrough test (auto-skips if no GPU)
+└── scripts/
+    └── gpu-reset.sh  # GPU recovery script (see Troubleshooting)
+```
+
+## Example: Full Workflow
+
+```bash
+# 1. Discover available devices
+curl localhost:8080/devices/available
+# → [{"pci_address": "0000:a2:00.0", "vendor_name": "NVIDIA Corporation", ...}]
+
+# 2. Register the GPU
+curl -X POST localhost:8080/devices \
+  -d '{"name": "l4-gpu", "pci_address": "0000:a2:00.0"}'
+
+# 3. Create instance with GPU (auto-binds to VFIO)
+curl -X POST localhost:8080/instances \
+  -d '{"name": "ml-training", "image": "nvidia/cuda:12.0-base", "devices": ["l4-gpu"]}'
+
+# 4. Inside VM: verify GPU
+lspci | grep -i nvidia
+nvidia-smi
+
+# 5. Delete instance (auto-unbinds from VFIO)
+curl -X DELETE localhost:8080/instances/{id}
+# GPU returns to host control
+```
+
+## Device Lifecycle
+
+### 1. Discovery
+
+Discover passthrough-capable devices on the host:
+
+```
+GET /devices/available
+```
+
+Returns PCI devices that are candidates for passthrough (GPUs, 3D controllers). Each device includes its PCI address, vendor/device IDs, IOMMU group, and current driver.
+
+### 2. Registration
+
+Register a device with a unique name:
+
+```
+POST /devices
+{
+  "name": "l4-gpu",
+  "pci_address": "0000:a2:00.0"
+}
+```
+
+Registration does not modify the device's driver binding. The device remains usable by the host until an instance requests it.
+
+### 3. Instance Creation (Auto-Bind)
+
+When an instance is created with devices:
+
+```
+POST /instances
+{
+  "name": "gpu-workload",
+  "image": "docker.io/nvidia/cuda:12.0-base",
+  "devices": ["l4-gpu"]
+}
+```
+
+The system automatically:
+1. **Validates** the device exists and isn't attached to another instance
+2. **Binds to VFIO** if not already bound (unbinds native driver like `nvidia`)
+3. **Passes to cloud-hypervisor** via the `--device` flag
+4. **Marks as attached** to prevent concurrent use
+
+### 4. Instance Deletion (Auto-Unbind)
+
+When an instance is deleted, the system automatically:
+1. **Marks device as detached**
+2. **Unbinds from VFIO** (triggers kernel driver probe to restore native driver)
+
+This returns the device to host control so it can be used by other processes or a new instance.
+
+### 5. Unregistration
+
+```
+DELETE /devices/{id}
+```
+
+Removes the device from hypeman's registry. Fails if the device is currently attached to an instance.
+
+## Cloud Hypervisor Integration
+
+Cloud-hypervisor receives device passthrough configuration via the `VmConfig.Devices` field:
+
+```go
+vmConfig.Devices = &[]vmm.DeviceConfig{
+    {
+        Path: "/sys/bus/pci/devices/0000:a2:00.0/",
+    },
+}
+```
+
+Cloud-hypervisor then:
+1. Opens the VFIO group file (`/dev/vfio/<group>`)
+2. Maps device BARs (memory regions) into guest physical address space
+3. Configures interrupt routing (MSI/MSI-X) to the guest
+4. The guest sees a real PCIe device and loads native drivers
+
+### NVIDIA-Specific Options
+
+For multi-GPU configurations, cloud-hypervisor supports GPUDirect P2P:
+
+```go
+DeviceConfig{
+    Path: "/sys/bus/pci/devices/0000:a2:00.0/",
+    XNvGpudirectClique: ptr(int8(0)),  // Enable P2P within clique 0
+}
+```
+
+This is not currently exposed through the hypeman API but could be added for HPC workloads.
+
+## Constraints and Limitations
+
+### IOMMU Requirements
+
+- **IOMMU must be enabled** in BIOS and kernel (`intel_iommu=on` or `amd_iommu=on`)
+- All devices in an IOMMU group must be passed through together
+- Some motherboards place many devices in the same group (ACS override may help)
+
+### VFIO Module Requirements
+
+The following kernel modules must be loaded:
+```bash
+modprobe vfio_pci
+modprobe vfio_iommu_type1
+```
+
+### Driver Binding
+
+- Binding to VFIO **unloads the native driver** (e.g., `nvidia`, `amdgpu`)
+- Host processes using the device will lose access
+- Some drivers (like NVIDIA) may resist unbinding if in use
+
+### Single Attachment
+
+A device can only be attached to one instance at a time. Attempts to attach an already-attached device will fail.
+
+### No Hot-Plug
+
+Devices must be specified at instance creation time. Hot-adding devices to a running VM is not currently supported (though cloud-hypervisor has this capability).
+
+### Guest Driver Requirements
+
+The guest must have appropriate drivers:
+- **NVIDIA GPUs**: Install NVIDIA drivers in the guest image
+- **AMD GPUs**: Install amdgpu/ROCm in the guest image
+
+### Performance Considerations
+
+- **ACS (Access Control Services)**: Required for proper isolation on some systems
+- **Huge Pages**: Recommended for GPU workloads (`hugepages=on` in cloud-hypervisor)
+- **CPU Pinning**: Can improve latency for GPU compute workloads
+
+## Troubleshooting
+
+### GPU Reset Script
+
+If GPU passthrough tests fail or hang, the GPU may be left in a bad state (still bound to vfio-pci, or stuck without a driver). Use the provided reset script:
+
+```bash
+# Reset all NVIDIA GPUs to their native driver
+sudo ./lib/devices/scripts/gpu-reset.sh
+
+# Reset a specific GPU
+sudo ./lib/devices/scripts/gpu-reset.sh 0000:a2:00.0
+```
+
+The script will:
+1. Kill any stuck cloud-hypervisor processes holding the GPU
+2. Unbind from vfio-pci if still bound
+3. Clear `driver_override`
+4. Trigger driver probe to rebind to the nvidia driver
+5. Restart `nvidia-persistenced`
+
+### Common Issues
+
+#### VFIO Bind Hangs
+
+**Symptom**: `BindToVFIO` hangs indefinitely.
+
+**Cause**: The `nvidia-persistenced` service keeps `/dev/nvidia*` open, preventing driver unbind.
+
+**Solution**: The code now automatically stops `nvidia-persistenced` before unbinding. If you're testing manually:
+```bash
+sudo systemctl stop nvidia-persistenced
+# ... do VFIO bind/unbind ...
+sudo systemctl start nvidia-persistenced
+```
+
+#### VM Exec Fails After Boot
+
+**Symptom**: VM boots but exec commands time out.
+
+**Cause**: Usually the container's main process exited (e.g., `alpine` image runs `/bin/sh` which exits immediately), causing init to exit and the VM to kernel panic.
+
+**Solution**: Use an image with a long-running process (e.g., `nginx:alpine`) or ensure your container has a persistent entrypoint.
+
+#### GPU Not Restored After Test
+
+**Symptom**: GPU has no driver bound, `nvidia-smi` fails.
+
+**Solution**:
+```bash
+# Trigger kernel driver probe
+sudo sh -c 'echo 0000:a2:00.0 > /sys/bus/pci/drivers_probe'
+# Restart nvidia-persistenced
+sudo systemctl start nvidia-persistenced
+# Verify
+nvidia-smi
+```
+
+If that fails, a system **reboot** may be necessary.
+
+#### VFIO Modules Not Loaded
+
+**Symptom**: `ErrVFIONotAvailable` error.
+
+**Solution**:
+```bash
+sudo modprobe vfio_pci vfio_iommu_type1
+# Verify
+ls /dev/vfio/
+```
+
+Add to `/etc/modules-load.d/vfio.conf` for persistence across reboots.
+
+#### IOMMU Not Enabled
+
+**Symptom**: No IOMMU groups found, passthrough fails.
+
+**Solution**: Add kernel parameter to bootloader:
+- Intel: `intel_iommu=on iommu=pt`
+- AMD: `amd_iommu=on iommu=pt`
+
+Then reboot.
+
+### Running the E2E Test
+
+The GPU passthrough E2E test **automatically detects** GPU availability and skips if prerequisites aren't met.
+
+**Why GPU tests require root**: Unlike network tests which can use Linux capabilities (`CAP_NET_ADMIN`), GPU passthrough requires writing to sysfs files (`/sys/bus/pci/drivers/*/unbind`, etc.) which are protected by standard Unix file permissions (owned by root, mode 0200). Capabilities don't bypass DAC (discretionary access control) for file writes.
+
+Prerequisites for the test to run (not skip):
+- **Root permissions** (sudo) - required for sysfs driver operations
+- NVIDIA GPU on host
+- IOMMU enabled (`intel_iommu=on` or `amd_iommu=on`)
+- `vfio_pci` and `vfio_iommu_type1` modules loaded
+- `/sbin` in PATH (for `mkfs.ext4`)
+
+```bash
+# Prepare the environment
+sudo modprobe vfio_pci vfio_iommu_type1
+
+# Run via make - test auto-skips if not root or no GPU
+make test
+
+# Or run directly with sudo
+sudo env PATH=$PATH:/sbin:/usr/sbin \
+  go test -v -run TestGPUPassthrough -timeout 5m ./lib/devices/...
+```
+
+The test will:
+1. Check prerequisites and skip if not met (not root, no GPU, no IOMMU, etc.)
+2. Discover available NVIDIA GPUs
+3. Register the first GPU found
+4. Create a VM with GPU passthrough
+5. Verify the GPU is visible inside the VM
+6. Clean up (delete VM, unbind from VFIO, restore nvidia driver)
+
+## Future Plans: GPU Sharing Across Multiple VMs
+
+### The Problem
+
+With current VFIO passthrough, a GPU is assigned **exclusively** to one VM. To share a single GPU across multiple VMs (e.g., give each VM a "slice"), you need NVIDIA's **vGPU (GRID)** technology.
+
+### Why MIG Alone Doesn't Help
+
+**MIG (Multi-Instance GPU)** partitions a GPU into isolated instances at the hardware level, but:
+
+- MIG partitions are **not separate PCI devices**—the GPU remains one PCI endpoint
+- MIG partitions are accessed via CUDA APIs (`CUDA_VISIBLE_DEVICES=MIG-<uuid>`)
+- You can only VFIO-passthrough the **whole GPU** to one VM
+- MIG is useful for workload isolation **within** a single host or VM, not for multi-VM sharing
+
+```
+Physical GPU (0000:a2:00.0) ─── still ONE PCI device
+    └── MIG partitions (logical, not separate devices)
+        ├── MIG Instance 0 ─┐
+        ├── MIG Instance 1 ─┼── All accessed via CUDA on the same GPU
+        └── MIG Instance 2 ─┘
+```
+
+**Supported MIG Hardware**: A100, A30, H100, H200 (NOT L4 or consumer GPUs)
+
+### vGPU/mdev: The Only Path to Multi-VM GPU Sharing
+
+To assign GPU shares to **separate VMs**, NVIDIA requires their **vGPU (GRID)** technology, which uses the Linux mediated device (mdev) framework.
+
+#### Cloud-Hypervisor mdev Support Status
+
+Cloud-hypervisor **does** support mdev passthrough:
+
+```bash
+cloud-hypervisor --device path=/sys/bus/mdev/devices/<uuid>/
+```
+
+However, NVIDIA's proprietary vGPU manager has a QEMU-specific quirk: it reads the VMM process's `/proc/<pid>/cmdline` looking for a `-uuid` argument to map mdev UUIDs to VMs. This doesn't work out-of-the-box with cloud-hypervisor.
+
+**Workarounds** (from [cloud-hypervisor#5319](https://github.com/cloud-hypervisor/cloud-hypervisor/issues/5319)):
+- Patch CH to accept a dummy `-uuid` flag
+- Use wrapper scripts that inject the UUID into the process name
+- Wait for NVIDIA to fix their driver's VMM assumptions
+
+#### vGPU Requirements
+
+- **Hardware**: Datacenter GPUs (A100, L40, etc.)
+- **Licensing**: NVIDIA GRID subscription ($$/GPU/year)
+- **Host Software**: NVIDIA vGPU Manager installed on host
+- **Guest Drivers**: vGPU-aware guest drivers
+
+### Design Changes for mdev/vGPU Support
+
+#### 1. New Device Type: `MdevDevice`
+
+```go
+type MdevDevice struct {
+    UUID         string   // mdev instance UUID
+    ParentGPU    string   // PCI address of parent GPU  
+    Type         string   // vGPU type (e.g., "nvidia-256")
+    Available    bool     // Not assigned to a VM
+}
+```
+
+#### 2. Discovery Extensions
+
+```go
+// List mdev types supported by a GPU
+func (m *manager) ListMdevTypes(ctx context.Context, pciAddress string) ([]MdevType, error)
+
+// List existing mdev instances
+func (m *manager) ListMdevInstances(ctx context.Context) ([]MdevDevice, error)
+
+// Create an mdev instance
+func (m *manager) CreateMdevInstance(ctx context.Context, pciAddress, mdevType string) (*MdevDevice, error)
+
+// Destroy an mdev instance
+func (m *manager) DestroyMdevInstance(ctx context.Context, uuid string) error
+```
+
+#### 3. Passthrough Mechanism
+
+mdev devices use a different sysfs path:
+
+```
+# mdev device path
+/sys/bus/mdev/devices/<uuid>/
+
+# vs VFIO-PCI (current)
+/sys/bus/pci/devices/0000:a2:00.0/
+```
+
+Cloud-hypervisor's `--device` flag already accepts mdev paths.
+
+#### 4. NVIDIA vGPU Workaround
+
+To work around NVIDIA's QEMU-specific UUID detection, we may need to:
+- Add a `--platform uuid=<uuid>` option to cloud-hypervisor invocation
+- Or use a wrapper that sets the process name appropriately
+
+### Implementation Phases
+
+**Phase 1**: mdev Discovery & Passthrough
+- Detect mdev-capable GPUs
+- List available mdev types and instances
+- Pass mdev devices to VMs (path already works)
+
+**Phase 2**: mdev Lifecycle Management
+- Create/destroy mdev instances via sysfs
+- API endpoints for mdev management
+
+**Phase 3**: NVIDIA vGPU Integration
+- Implement UUID workaround for NVIDIA's driver
+- Test with GRID licensing
+- Document guest driver requirements
+
+### How vGPU + MIG Work Together
+
+vGPU creates mdev devices that can be backed by MIG partitions, giving you both hardware isolation (MIG) and multi-VM assignment (vGPU):
+
+```
+Physical GPU (one PCI device)
+    │
+    ├── Without vGPU: VFIO passthrough gives whole GPU to ONE VM
+    │
+    └── With vGPU (GRID license required):
+        └── MIG Mode enabled on host
+            ├── MIG Instance 0 ──→ vGPU mdev A ──→ VM 1
+            ├── MIG Instance 1 ──→ vGPU mdev B ──→ VM 2
+            └── MIG Instance 2 ──→ vGPU mdev C ──→ VM 3
+```
+
+Without vGPU, MIG is only useful for workload isolation on the host or within a single VM that owns the whole GPU.
diff --git a/lib/devices/discovery.go b/lib/devices/discovery.go
new file mode 100644
index 0000000..b04213c
--- /dev/null
+++ b/lib/devices/discovery.go
@@ -0,0 +1,279 @@
+package devices
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"regexp"
+	"strconv"
+	"strings"
+)
+
+const (
+	sysfsDevicesPath = "/sys/bus/pci/devices"
+	sysfsIOMMUPath   = "/sys/kernel/iommu_groups"
+)
+
+// pciAddressPattern matches PCI addresses like "0000:a2:00.0"
+var pciAddressPattern = regexp.MustCompile(`^[0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F]$`)
+
+// ValidatePCIAddress validates that a string is a valid PCI address format
+func ValidatePCIAddress(addr string) bool {
+	return pciAddressPattern.MatchString(addr)
+}
+
+// DiscoverAvailableDevices scans sysfs for PCI devices that can be used for passthrough
+// It filters for devices that are likely candidates (GPUs, network cards, etc.)
+func DiscoverAvailableDevices() ([]AvailableDevice, error) {
+	entries, err := os.ReadDir(sysfsDevicesPath)
+	if err != nil {
+		return nil, fmt.Errorf("read sysfs devices: %w", err)
+	}
+
+	var devices []AvailableDevice
+	for _, entry := range entries {
+		addr := entry.Name()
+		if !ValidatePCIAddress(addr) {
+			continue
+		}
+
+		device, err := readDeviceInfo(addr)
+		if err != nil {
+			// Skip devices we can't read
+			continue
+		}
+
+		// Filter for passthrough-capable devices (GPUs, 3D controllers, etc.)
+		if isPassthroughCandidate(device) {
+			devices = append(devices, *device)
+		}
+	}
+
+	return devices, nil
+}
+
+// GetDeviceInfo reads information about a specific PCI device
+func GetDeviceInfo(pciAddress string) (*AvailableDevice, error) {
+	if !ValidatePCIAddress(pciAddress) {
+		return nil, ErrInvalidPCIAddress
+	}
+
+	devicePath := filepath.Join(sysfsDevicesPath, pciAddress)
+	if _, err := os.Stat(devicePath); os.IsNotExist(err) {
+		return nil, ErrDeviceNotFound
+	}
+
+	return readDeviceInfo(pciAddress)
+}
+
+// readDeviceInfo reads device information from sysfs
+func readDeviceInfo(pciAddress string) (*AvailableDevice, error) {
+	devicePath := filepath.Join(sysfsDevicesPath, pciAddress)
+
+	vendorID, err := readSysfsFile(filepath.Join(devicePath, "vendor"))
+	if err != nil {
+		return nil, fmt.Errorf("read vendor: %w", err)
+	}
+	vendorID = strings.TrimPrefix(vendorID, "0x")
+
+	deviceID, err := readSysfsFile(filepath.Join(devicePath, "device"))
+	if err != nil {
+		return nil, fmt.Errorf("read device: %w", err)
+	}
+	deviceID = strings.TrimPrefix(deviceID, "0x")
+
+	iommuGroup, err := readIOMMUGroup(pciAddress)
+	if err != nil {
+		return nil, fmt.Errorf("read iommu group: %w", err)
+	}
+
+	driver := readCurrentDriver(pciAddress)
+
+	// Get device class to determine type
+	classCode, _ := readSysfsFile(filepath.Join(devicePath, "class"))
+
+	return &AvailableDevice{
+		PCIAddress:    pciAddress,
+		VendorID:      vendorID,
+		DeviceID:      deviceID,
+		VendorName:    getVendorName(vendorID),
+		DeviceName:    getDeviceName(vendorID, deviceID, classCode),
+		IOMMUGroup:    iommuGroup,
+		CurrentDriver: driver,
+	}, nil
+}
+
+// readSysfsFile reads and trims a sysfs file
+func readSysfsFile(path string) (string, error) {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return "", err
+	}
+	return strings.TrimSpace(string(data)), nil
+}
+
+// readIOMMUGroup reads the IOMMU group number for a device
+func readIOMMUGroup(pciAddress string) (int, error) {
+	iommuLink := filepath.Join(sysfsDevicesPath, pciAddress, "iommu_group")
+	target, err := os.Readlink(iommuLink)
+	if err != nil {
+		return -1, fmt.Errorf("read iommu_group link: %w", err)
+	}
+
+	// Target is like "../../../../kernel/iommu_groups/82"
+	groupStr := filepath.Base(target)
+	group, err := strconv.Atoi(groupStr)
+	if err != nil {
+		return -1, fmt.Errorf("parse iommu group: %w", err)
+	}
+
+	return group, nil
+}
+
+// readCurrentDriver reads the current driver bound to the device
+func readCurrentDriver(pciAddress string) *string {
+	driverLink := filepath.Join(sysfsDevicesPath, pciAddress, "driver")
+	target, err := os.Readlink(driverLink)
+	if err != nil {
+		// No driver bound
+		return nil
+	}
+
+	driver := filepath.Base(target)
+	return &driver
+}
+
+// GetIOMMUGroupDevices returns all PCI devices in the same IOMMU group
+func GetIOMMUGroupDevices(iommuGroup int) ([]string, error) {
+	groupPath := filepath.Join(sysfsIOMMUPath, strconv.Itoa(iommuGroup), "devices")
+	entries, err := os.ReadDir(groupPath)
+	if err != nil {
+		return nil, fmt.Errorf("read iommu group devices: %w", err)
+	}
+
+	var devices []string
+	for _, entry := range entries {
+		devices = append(devices, entry.Name())
+	}
+	return devices, nil
+}
+
+// isPassthroughCandidate determines if a device is a good candidate for passthrough
+func isPassthroughCandidate(device *AvailableDevice) bool {
+	// Check class code for GPUs and 3D controllers
+	// Class 0x03 = Display controller
+	// Subclass 0x00 = VGA controller
+	// Subclass 0x02 = 3D controller (like NVIDIA compute GPUs)
+	devicePath := filepath.Join(sysfsDevicesPath, device.PCIAddress)
+	classCode, err := readSysfsFile(filepath.Join(devicePath, "class"))
+	if err != nil {
+		return false
+	}
+
+	classCode = strings.TrimPrefix(classCode, "0x")
+	if len(classCode) >= 4 {
+		classPrefix := classCode[:4]
+		// 0300 = VGA controller, 0302 = 3D controller
+		if classPrefix == "0300" || classPrefix == "0302" {
+			return true
+		}
+	}
+
+	// Also include NVIDIA devices by vendor ID
+	if device.VendorID == "10de" {
+		return true
+	}
+
+	return false
+}
+
+// getVendorName returns a human-readable vendor name
+func getVendorName(vendorID string) string {
+	vendors := map[string]string{
+		"10de": "NVIDIA Corporation",
+		"1002": "AMD/ATI",
+		"8086": "Intel Corporation",
+	}
+	if name, ok := vendors[vendorID]; ok {
+		return name
+	}
+	return "Unknown Vendor"
+}
+
+// getDeviceName returns a human-readable device name based on class and IDs
+func getDeviceName(vendorID, deviceID, classCode string) string {
+	// For NVIDIA, provide some common device names.
+	// Sources:
+	//   - NVIDIA Driver README, Appendix A "Supported NVIDIA GPU Products":
+	//     https://download.nvidia.com/XFree86/Linux-x86_64/570.133.07/README/supportedchips.html
+	//   - PCI ID Database: https://pci-ids.ucw.cz/read/PC/10de
+	if vendorID == "10de" {
+		nvidiaDevices := map[string]string{
+			// H100 series
+			"2321": "H100 NVL",
+			"2330": "H100 SXM5 80GB",
+			"2331": "H100 PCIe",
+			"2339": "H100",
+			// H200 series
+			"2335": "H200",
+			// L4
+			"27b8": "L4",
+			// L40 series
+			"26b5": "L40",
+			"26b9": "L40S",
+			// A100 series
+			"20b0": "A100 SXM4 40GB",
+			"20b2": "A100 SXM4 80GB",
+			"20b5": "A100 PCIe 40GB",
+			"20f1": "A100 PCIe 80GB",
+			// A30/A40
+			"20b7": "A30",
+			"2235": "A40",
+			// RTX 4000 series (datacenter)
+			"2684": "RTX 4090",
+			"27b0": "RTX 4090 D",
+			// V100 series
+			"1db4": "V100 PCIe 16GB",
+			"1db5": "V100 SXM2 16GB",
+			"1db6": "V100 PCIe 32GB",
+		}
+		if name, ok := nvidiaDevices[deviceID]; ok {
+			return name
+		}
+	}
+
+	// Fall back to class-based description
+	classCode = strings.TrimPrefix(classCode, "0x")
+	if len(classCode) >= 4 {
+		switch classCode[:4] {
+		case "0300":
+			return "VGA Controller"
+		case "0302":
+			return "3D Controller"
+		case "0403":
+			return "Audio Device"
+		}
+	}
+
+	return "PCI Device"
+}
+
+// DetermineDeviceType determines the DeviceType based on device properties
+func DetermineDeviceType(device *AvailableDevice) DeviceType {
+	devicePath := filepath.Join(sysfsDevicesPath, device.PCIAddress)
+	classCode, err := readSysfsFile(filepath.Join(devicePath, "class"))
+	if err != nil {
+		return DeviceTypeGeneric
+	}
+
+	classCode = strings.TrimPrefix(classCode, "0x")
+	if len(classCode) >= 4 {
+		classPrefix := classCode[:4]
+		// 0300 = VGA controller, 0302 = 3D controller
+		if classPrefix == "0300" || classPrefix == "0302" {
+			return DeviceTypeGPU
+		}
+	}
+
+	return DeviceTypeGeneric
+}
diff --git a/lib/devices/errors.go b/lib/devices/errors.go
new file mode 100644
index 0000000..afacaf2
--- /dev/null
+++ b/lib/devices/errors.go
@@ -0,0 +1,40 @@
+package devices
+
+import "errors"
+
+var (
+	// ErrNotFound is returned when a device is not found
+	ErrNotFound = errors.New("device not found")
+
+	// ErrInUse is returned when a device is currently attached to an instance
+	ErrInUse = errors.New("device is in use")
+
+	// ErrNotBound is returned when a VFIO operation requires the device to be bound
+	ErrNotBound = errors.New("device is not bound to VFIO")
+
+	// ErrAlreadyBound is returned when trying to bind a device that's already bound to VFIO
+	ErrAlreadyBound = errors.New("device is already bound to VFIO")
+
+	// ErrAlreadyExists is returned when trying to register a device that already exists
+	ErrAlreadyExists = errors.New("device already exists")
+
+	// ErrInvalidName is returned when the device name doesn't match the required pattern
+	ErrInvalidName = errors.New("device name must match pattern ^[a-zA-Z0-9][a-zA-Z0-9_.-]+$")
+
+	// ErrNameExists is returned when a device with the same name already exists
+	ErrNameExists = errors.New("device name already exists")
+
+	// ErrInvalidPCIAddress is returned when the PCI address format is invalid
+	ErrInvalidPCIAddress = errors.New("invalid PCI address format")
+
+	// ErrDeviceNotFound is returned when the PCI device doesn't exist on the host
+	ErrDeviceNotFound = errors.New("PCI device not found on host")
+
+	// ErrVFIONotAvailable is returned when VFIO modules are not loaded
+	ErrVFIONotAvailable = errors.New("VFIO is not available (modules not loaded)")
+
+	// ErrIOMMUGroupConflict is returned when not all devices in IOMMU group can be passed through
+	ErrIOMMUGroupConflict = errors.New("IOMMU group contains other devices that must also be passed through")
+)
+
+
diff --git a/lib/devices/gpu_e2e_test.go b/lib/devices/gpu_e2e_test.go
new file mode 100644
index 0000000..4348ebd
--- /dev/null
+++ b/lib/devices/gpu_e2e_test.go
@@ -0,0 +1,353 @@
+package devices_test
+
+import (
+	"bytes"
+	"context"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/onkernel/hypeman/cmd/api/config"
+	"github.com/onkernel/hypeman/lib/devices"
+	"github.com/onkernel/hypeman/lib/exec"
+	"github.com/onkernel/hypeman/lib/images"
+	"github.com/onkernel/hypeman/lib/instances"
+	"github.com/onkernel/hypeman/lib/network"
+	"github.com/onkernel/hypeman/lib/paths"
+	"github.com/onkernel/hypeman/lib/system"
+	"github.com/onkernel/hypeman/lib/volumes"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// TestGPUPassthrough is an E2E test that verifies GPU passthrough works.
+//
+// This test automatically detects GPU availability and skips if:
+//   - No NVIDIA GPU is found
+//   - IOMMU is not enabled
+//   - VFIO modules are not loaded
+//   - Not running as root
+//
+// To run manually:
+//
+//	sudo env PATH=$PATH:/sbin:/usr/sbin go test -v -run TestGPUPassthrough ./lib/devices/...
+//
+// WARNING: This test will unbind the GPU from the nvidia driver, which may
+// disrupt other processes using the GPU. The test attempts to restore the
+// nvidia driver binding on cleanup.
+func TestGPUPassthrough(t *testing.T) {
+	ctx := context.Background()
+
+	// Auto-detect GPU availability - skip if prerequisites not met
+	skipReason := checkGPUTestPrerequisites()
+	if skipReason != "" {
+		t.Skip(skipReason)
+	}
+
+	// Log that prerequisites passed
+	groups, _ := os.ReadDir("/sys/kernel/iommu_groups")
+	t.Logf("GPU test prerequisites met: %d IOMMU groups found", len(groups))
+
+	// Setup test infrastructure
+	tmpDir := t.TempDir()
+	p := paths.New(tmpDir)
+
+	cfg := &config.Config{
+		DataDir:    tmpDir,
+		BridgeName: "vmbr0",
+		SubnetCIDR: "10.100.0.0/16",
+		DNSServer:  "1.1.1.1",
+	}
+
+	// Initialize managers (nil meter/tracer disables metrics/tracing)
+	imageMgr, err := images.NewManager(p, 1, nil)
+	require.NoError(t, err)
+
+	systemMgr := system.NewManager(p)
+	networkMgr := network.NewManager(p, cfg, nil)
+	deviceMgr := devices.NewManager(p)
+	volumeMgr := volumes.NewManager(p, 100*1024*1024*1024, nil) // 100GB max volume storage
+	limits := instances.ResourceLimits{
+		MaxOverlaySize: 100 * 1024 * 1024 * 1024, // 100GB
+	}
+	instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, nil, nil)
+
+	// Step 1: Discover available GPUs
+	t.Log("Step 1: Discovering available GPUs...")
+	availableDevices, err := deviceMgr.ListAvailableDevices(ctx)
+	require.NoError(t, err)
+
+	// Find an NVIDIA GPU
+	var targetGPU *devices.AvailableDevice
+	for _, d := range availableDevices {
+		if strings.Contains(strings.ToLower(d.VendorName), "nvidia") {
+			targetGPU = &d
+			break
+		}
+	}
+	require.NotNil(t, targetGPU, "No NVIDIA GPU found on this system")
+	driverStr := "none"
+	if targetGPU.CurrentDriver != nil {
+		driverStr = *targetGPU.CurrentDriver
+	}
+	t.Logf("Found NVIDIA GPU: %s at %s (driver: %s)", targetGPU.DeviceName, targetGPU.PCIAddress, driverStr)
+
+	// Check GPU is in a usable state (has a driver bound)
+	if targetGPU.CurrentDriver == nil || *targetGPU.CurrentDriver == "" {
+		t.Skip("GPU has no driver bound - may need reboot to recover. Run: sudo reboot")
+	}
+
+	// Verify the driver path exists (GPU not in broken state)
+	driverPath := filepath.Join("/sys/bus/pci/devices", targetGPU.PCIAddress, "driver")
+	if _, err := os.Stat(driverPath); os.IsNotExist(err) {
+		t.Skipf("GPU driver symlink missing at %s - GPU in broken state, reboot required", driverPath)
+	}
+
+	// Step 2: Register the GPU
+	t.Log("Step 2: Registering GPU...")
+	device, err := deviceMgr.CreateDevice(ctx, devices.CreateDeviceRequest{
+		Name:       "test-gpu",
+		PCIAddress: targetGPU.PCIAddress,
+	})
+	require.NoError(t, err)
+	t.Logf("Registered device: %s (ID: %s)", device.Name, device.Id)
+
+	// Store original driver for cleanup
+	originalDriver := driverStr
+
+	// Cleanup: always unregister device and try to restore original driver
+	t.Cleanup(func() {
+		t.Log("Cleanup: Deleting registered device...")
+		deviceMgr.DeleteDevice(ctx, device.Id)
+
+		// Try to restore original driver binding via driver_probe
+		if originalDriver != "" && originalDriver != "none" && originalDriver != "vfio-pci" {
+			t.Logf("Cleanup: Triggering driver probe to restore %s driver...", originalDriver)
+			// Use driver_probe to let the kernel find and bind the right driver
+			probePath := "/sys/bus/pci/drivers_probe"
+			if err := os.WriteFile(probePath, []byte(targetGPU.PCIAddress), 0200); err != nil {
+				t.Logf("Warning: Could not trigger driver probe: %v (may need reboot)", err)
+			} else {
+				t.Logf("Cleanup: Driver probe triggered for %s", targetGPU.PCIAddress)
+			}
+		}
+	})
+
+	// Step 3: Ensure system files (kernel, initrd)
+	t.Log("Step 3: Ensuring system files...")
+	err = systemMgr.EnsureSystemFiles(ctx)
+	require.NoError(t, err)
+	t.Log("System files ready")
+
+	// Step 4: Pull nginx:alpine (nginx keeps running unlike plain alpine which exits immediately)
+	t.Log("Step 4: Pulling nginx:alpine image...")
+	createdImg, createErr := imageMgr.CreateImage(ctx, images.CreateImageRequest{
+		Name: "docker.io/library/nginx:alpine",
+	})
+	require.NoError(t, createErr, "CreateImage should succeed")
+	t.Logf("CreateImage returned: name=%s, status=%s", createdImg.Name, createdImg.Status)
+
+	// Use the name returned from CreateImage (it may be normalized)
+	imageName := createdImg.Name
+
+	// Wait for image to be ready
+	var img *images.Image
+	for i := 0; i < 90; i++ {
+		img, err = imageMgr.GetImage(ctx, imageName)
+		if err != nil {
+			if i < 5 || i%10 == 0 {
+				t.Logf("GetImage attempt %d: error=%v", i+1, err)
+			}
+		} else {
+			if i < 5 || i%10 == 0 {
+				t.Logf("GetImage attempt %d: status=%s", i+1, img.Status)
+			}
+			if img.Status == images.StatusReady {
+				break
+			}
+			if img.Status == images.StatusFailed {
+				errMsg := "unknown"
+				if img.Error != nil {
+					errMsg = *img.Error
+				}
+				t.Fatalf("Image build failed: %s", errMsg)
+			}
+		}
+		time.Sleep(1 * time.Second)
+	}
+	require.NotNil(t, img, "Image should exist after 90 seconds")
+	require.Equal(t, images.StatusReady, img.Status, "Image should be ready")
+	t.Log("Image ready")
+
+	// Step 5: Create instance with GPU (with timeout to prevent hang on VFIO issues)
+	t.Log("Step 5: Creating instance with GPU...")
+	createCtx, createCancel := context.WithTimeout(ctx, 60*time.Second)
+	defer createCancel()
+
+	inst, err := instanceMgr.CreateInstance(createCtx, instances.CreateInstanceRequest{
+		Name:           "gpu-test",
+		Image:          "docker.io/library/nginx:alpine",
+		Size:           2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs)
+		HotplugSize:    512 * 1024 * 1024,
+		OverlaySize:    10 * 1024 * 1024 * 1024,
+		Vcpus:          1,
+		NetworkEnabled: false,
+		Devices:        []string{"test-gpu"},
+		Env:            map[string]string{},
+	})
+	require.NoError(t, err)
+	t.Logf("Instance created: %s", inst.Id)
+
+	// Cleanup: always delete instance
+	t.Cleanup(func() {
+		t.Log("Cleanup: Deleting instance...")
+		instanceMgr.DeleteInstance(ctx, inst.Id)
+	})
+
+	// Step 6: Wait for instance to be ready
+	t.Log("Step 6: Waiting for instance to be ready...")
+	err = waitForInstanceReady(ctx, t, instanceMgr, inst.Id, 30*time.Second)
+	require.NoError(t, err)
+	t.Log("Instance is ready")
+
+	// Step 7: Verify GPU is visible inside VM
+	// Note: Alpine doesn't have lspci, so we check /sys/bus/pci directly for NVIDIA vendor ID (0x10de)
+	t.Log("Step 7: Verifying GPU visibility inside VM...")
+	actualInst, err := instanceMgr.GetInstance(ctx, inst.Id)
+	require.NoError(t, err)
+
+	// Create a context with timeout for exec operations
+	execCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
+	defer cancel()
+
+	// Retry exec a few times (exec agent may need time to start)
+	var stdout, stderr outputBuffer
+	var execErr error
+	// Command to find NVIDIA devices by checking vendor IDs (0x10de = NVIDIA)
+	checkGPUCmd := "cat /sys/bus/pci/devices/*/vendor 2>/dev/null | grep -i 10de && echo 'NVIDIA_FOUND'"
+
+	for i := 0; i < 15; i++ {
+		stdout = outputBuffer{}
+		stderr = outputBuffer{}
+
+		_, execErr = exec.ExecIntoInstance(execCtx, actualInst.VsockSocket, exec.ExecOptions{
+			Command: []string{"/bin/sh", "-c", checkGPUCmd},
+			Stdin:   nil,
+			Stdout:  &stdout,
+			Stderr:  &stderr,
+			TTY:     false,
+		})
+
+		if execErr == nil {
+			break
+		}
+		t.Logf("Exec attempt %d/15 failed: %v", i+1, execErr)
+		time.Sleep(1 * time.Second)
+	}
+	if execErr != nil {
+		// Print console log for debugging
+		p := paths.New(tmpDir)
+		consoleLogPath := p.InstanceAppLog(inst.Id)
+		if consoleLog, err := os.ReadFile(consoleLogPath); err == nil {
+			t.Logf("=== VM Console Log ===\n%s\n=== End Console Log ===", string(consoleLog))
+		} else {
+			t.Logf("Could not read console log: %v", err)
+		}
+	}
+	require.NoError(t, execErr, "exec should succeed")
+
+	pciOutput := stdout.String()
+	t.Logf("PCI vendor check output:\n%s", pciOutput)
+
+	// Verify NVIDIA device is visible (vendor ID 0x10de)
+	assert.True(t,
+		strings.Contains(pciOutput, "NVIDIA_FOUND") ||
+			strings.Contains(strings.ToLower(pciOutput), "10de"),
+		"NVIDIA GPU (vendor 0x10de) should be visible in guest")
+
+	t.Log("✅ GPU passthrough test PASSED!")
+}
+
+// checkGPUTestPrerequisites checks if GPU passthrough test can run.
+// Returns empty string if all prerequisites are met, otherwise returns skip reason.
+func checkGPUTestPrerequisites() string {
+	// Check KVM
+	if _, err := os.Stat("/dev/kvm"); os.IsNotExist(err) {
+		return "GPU passthrough test requires /dev/kvm"
+	}
+
+	// Check VFIO modules
+	if _, err := os.Stat("/dev/vfio/vfio"); os.IsNotExist(err) {
+		return "GPU passthrough test requires VFIO (modprobe vfio_pci vfio_iommu_type1)"
+	}
+
+	// Check IOMMU is enabled by looking for IOMMU groups
+	groups, err := os.ReadDir("/sys/kernel/iommu_groups")
+	if err != nil || len(groups) == 0 {
+		return "GPU passthrough test requires IOMMU (intel_iommu=on or amd_iommu=on)"
+	}
+
+	// Check for NVIDIA GPU
+	available, err := devices.DiscoverAvailableDevices()
+	if err != nil {
+		return "GPU passthrough test failed to discover devices: " + err.Error()
+	}
+
+	hasNvidiaGPU := false
+	for _, d := range available {
+		if strings.Contains(strings.ToLower(d.VendorName), "nvidia") {
+			hasNvidiaGPU = true
+			break
+		}
+	}
+	if !hasNvidiaGPU {
+		return "GPU passthrough test requires an NVIDIA GPU"
+	}
+
+	// GPU passthrough requires root (euid=0) for sysfs driver bind/unbind operations.
+	// Unlike network operations which can use CAP_NET_ADMIN, sysfs file writes are
+	// protected by standard Unix DAC (file permissions), not just capabilities.
+	// The files in /sys/bus/pci/drivers/ are owned by root with mode 0200.
+	if os.Geteuid() != 0 {
+		return "GPU passthrough test requires root (sudo) for sysfs driver operations"
+	}
+
+	return "" // All prerequisites met
+}
+
+func waitForInstanceReady(ctx context.Context, t *testing.T, mgr instances.Manager, id string, timeout time.Duration) error {
+	t.Helper()
+
+	deadline := time.Now().Add(timeout)
+	for time.Now().Before(deadline) {
+		inst, err := mgr.GetInstance(ctx, id)
+		if err != nil {
+			time.Sleep(500 * time.Millisecond)
+			continue
+		}
+
+		if inst.State == instances.StateRunning {
+			// Additional check: wait a bit for exec agent
+			time.Sleep(2 * time.Second)
+			return nil
+		}
+
+		time.Sleep(500 * time.Millisecond)
+	}
+
+	return context.DeadlineExceeded
+}
+
+type outputBuffer struct {
+	buf bytes.Buffer
+}
+
+func (b *outputBuffer) Write(p []byte) (n int, err error) {
+	return b.buf.Write(p)
+}
+
+func (b *outputBuffer) String() string {
+	return b.buf.String()
+}
diff --git a/lib/devices/gpu_inference_test.go b/lib/devices/gpu_inference_test.go
new file mode 100644
index 0000000..0749b84
--- /dev/null
+++ b/lib/devices/gpu_inference_test.go
@@ -0,0 +1,536 @@
+package devices_test
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	osExec "os/exec"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/go-chi/chi/v5"
+	"github.com/google/go-containerregistry/pkg/name"
+	"github.com/google/go-containerregistry/pkg/v1/daemon"
+	"github.com/google/go-containerregistry/pkg/v1/remote"
+	"github.com/onkernel/hypeman/cmd/api/config"
+	"github.com/onkernel/hypeman/lib/devices"
+	"github.com/onkernel/hypeman/lib/exec"
+	"github.com/onkernel/hypeman/lib/images"
+	"github.com/onkernel/hypeman/lib/instances"
+	"github.com/onkernel/hypeman/lib/network"
+	"github.com/onkernel/hypeman/lib/paths"
+	"github.com/onkernel/hypeman/lib/registry"
+	"github.com/onkernel/hypeman/lib/system"
+	"github.com/onkernel/hypeman/lib/volumes"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// persistentTestDataDir is used to persist volumes between test runs.
+// This allows the ollama model cache to survive across test executions.
+// Note: Uses /var/lib instead of /tmp because /tmp often has limited space
+// and the custom CUDA+Ollama image is ~4GB.
+const persistentTestDataDir = "/var/lib/hypeman-gpu-inference-test"
+
+// ollamaCudaDockerImage is the name we use for the custom CUDA+Ollama image
+const ollamaCudaDockerImage = "ollama-cuda:test"
+
+// TestGPUInference is an E2E test that verifies Ollama GPU inference works with VFIO passthrough.
+//
+// This test:
+//  1. Builds a custom Docker image with NVIDIA CUDA runtime + Ollama
+//  2. Pushes the image to hypeman's test registry
+//  3. Launches a VM with GPU passthrough + the image
+//  4. Runs `ollama run tinyllama` to perform GPU-accelerated inference
+//  5. Verifies the model generates output
+//
+// The custom image bundles CUDA libraries, enabling Ollama to detect and use the GPU
+// without needing nvidia-docker/nvidia-container-toolkit.
+//
+// Prerequisites:
+//   - NVIDIA GPU on host
+//   - IOMMU enabled
+//   - VFIO modules loaded (modprobe vfio_pci)
+//   - Docker installed (for building custom image)
+//   - Running as root
+//
+// To run manually:
+//
+//	sudo env PATH=$PATH:/sbin:/usr/sbin go test -v -run TestGPUInference -timeout 30m ./lib/devices/...
+//
+// To clean up:
+//
+//	sudo rm -rf /var/lib/hypeman-gpu-inference-test
+//	docker rmi ollama-cuda:test
+func TestGPUInference(t *testing.T) {
+	ctx := context.Background()
+
+	// Auto-detect GPU availability - skip if prerequisites not met
+	skipReason := checkGPUTestPrerequisites()
+	if skipReason != "" {
+		t.Skip(skipReason)
+	}
+
+	// Check Docker is available
+	if _, err := osExec.LookPath("docker"); err != nil {
+		t.Skip("Docker not installed - required for building custom CUDA image")
+	}
+
+	groups, _ := os.ReadDir("/sys/kernel/iommu_groups")
+	t.Logf("GPU inference test prerequisites met: %d IOMMU groups found", len(groups))
+
+	// Use persistent directory for volume storage (survives between test runs)
+	if err := os.MkdirAll(persistentTestDataDir, 0755); err != nil {
+		t.Fatalf("Failed to create persistent test directory: %v", err)
+	}
+	p := paths.New(persistentTestDataDir)
+
+	cfg := &config.Config{
+		DataDir:    persistentTestDataDir,
+		BridgeName: "vmbr0",
+		SubnetCIDR: "10.100.0.0/16",
+		DNSServer:  "1.1.1.1",
+	}
+
+	// Initialize managers
+	imageMgr, err := images.NewManager(p, 1, nil)
+	require.NoError(t, err)
+
+	systemMgr := system.NewManager(p)
+	networkMgr := network.NewManager(p, cfg, nil)
+	deviceMgr := devices.NewManager(p)
+	volumeMgr := volumes.NewManager(p, 100*1024*1024*1024, nil)
+	limits := instances.ResourceLimits{
+		MaxOverlaySize: 100 * 1024 * 1024 * 1024,
+	}
+	instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, nil, nil)
+
+	// Step 1: Build custom CUDA+Ollama image
+	t.Log("Step 1: Building custom CUDA+Ollama Docker image...")
+	dockerfilePath := getDockerfilePath(t)
+	buildCustomCudaImage(t, dockerfilePath, ollamaCudaDockerImage)
+
+	// Step 2: Set up test registry and push the image
+	t.Log("Step 2: Pushing custom image to hypeman registry...")
+	reg, err := registry.New(p, imageMgr)
+	require.NoError(t, err)
+
+	router := chi.NewRouter()
+	router.Mount("/v2", reg.Handler())
+	ts := httptest.NewServer(router)
+	t.Cleanup(ts.Close)
+
+	serverHost := strings.TrimPrefix(ts.URL, "http://")
+	pushLocalDockerImage(t, ollamaCudaDockerImage, serverHost)
+	t.Log("Push complete")
+
+	// Wait for image conversion - find image by listing since digest may change during Docker->OCI conversion
+	t.Log("Waiting for image conversion...")
+	var img *images.Image
+	var imageName string
+	for i := 0; i < 300; i++ { // 5 minutes for large CUDA image
+		// List images and find our ollama-cuda image
+		allImages, listErr := imageMgr.ListImages(ctx)
+		if listErr == nil {
+			for _, candidate := range allImages {
+				if strings.Contains(candidate.Name, "ollama-cuda") {
+					img = &candidate
+					imageName = candidate.Name
+					break
+				}
+			}
+		}
+		if img != nil && img.Status == images.StatusReady {
+			break
+		}
+		if img != nil && img.Status == images.StatusFailed {
+			errMsg := "unknown"
+			if img.Error != nil {
+				errMsg = *img.Error
+			}
+			t.Fatalf("Image conversion failed: %s", errMsg)
+		}
+		if i%30 == 0 {
+			status := "not found"
+			if img != nil {
+				status = string(img.Status)
+			}
+			t.Logf("Waiting for image conversion... (%d/300, status=%s)", i+1, status)
+		}
+		time.Sleep(time.Second)
+	}
+	require.NotNil(t, img, "Image should exist after 5 minutes")
+	require.Equal(t, images.StatusReady, img.Status, "Image should be ready")
+	t.Logf("Image ready: %s (digest: %s)", imageName, img.Digest)
+
+	// Step 3: Discover and register GPU
+	t.Log("Step 3: Discovering available GPUs...")
+	availableDevices, err := deviceMgr.ListAvailableDevices(ctx)
+	require.NoError(t, err)
+
+	var targetGPU *devices.AvailableDevice
+	for _, d := range availableDevices {
+		if strings.Contains(strings.ToLower(d.VendorName), "nvidia") {
+			targetGPU = &d
+			break
+		}
+	}
+	require.NotNil(t, targetGPU, "No NVIDIA GPU found")
+
+	driverStr := "none"
+	if targetGPU.CurrentDriver != nil {
+		driverStr = *targetGPU.CurrentDriver
+	}
+	t.Logf("Found NVIDIA GPU: %s at %s (driver: %s)", targetGPU.DeviceName, targetGPU.PCIAddress, driverStr)
+
+	if targetGPU.CurrentDriver == nil || *targetGPU.CurrentDriver == "" {
+		t.Skip("GPU has no driver bound - may need reboot")
+	}
+
+	driverPath := filepath.Join("/sys/bus/pci/devices", targetGPU.PCIAddress, "driver")
+	if _, err := os.Stat(driverPath); os.IsNotExist(err) {
+		t.Skipf("GPU driver symlink missing - GPU in broken state")
+	}
+
+	// Register GPU
+	t.Log("Step 4: Registering GPU...")
+	device, err := deviceMgr.GetDevice(ctx, "inference-gpu")
+	if err != nil {
+		device, err = deviceMgr.CreateDevice(ctx, devices.CreateDeviceRequest{
+			Name:       "inference-gpu",
+			PCIAddress: targetGPU.PCIAddress,
+		})
+		require.NoError(t, err)
+		t.Logf("Registered new device: %s (ID: %s)", device.Name, device.Id)
+	} else {
+		t.Logf("Using existing device: %s (ID: %s)", device.Name, device.Id)
+	}
+
+	originalDriver := driverStr
+	t.Cleanup(func() {
+		t.Log("Cleanup: Deleting registered device...")
+		deviceMgr.DeleteDevice(ctx, device.Id)
+		if originalDriver != "" && originalDriver != "none" && originalDriver != "vfio-pci" {
+			probePath := "/sys/bus/pci/drivers_probe"
+			os.WriteFile(probePath, []byte(targetGPU.PCIAddress), 0200)
+		}
+	})
+
+	// Step 5: Initialize network and create volume
+	t.Log("Step 5: Initializing network...")
+	err = networkMgr.Initialize(ctx, []string{})
+	require.NoError(t, err)
+
+	t.Log("Step 6: Setting up persistent volume for Ollama models...")
+	vol, err := volumeMgr.GetVolumeByName(ctx, "ollama-models")
+	if err != nil {
+		vol, err = volumeMgr.CreateVolume(ctx, volumes.CreateVolumeRequest{
+			Name:   "ollama-models",
+			SizeGb: 5,
+		})
+		require.NoError(t, err)
+		t.Logf("Created new volume: %s", vol.Name)
+	} else {
+		t.Logf("Using existing volume: %s", vol.Name)
+	}
+
+	// Step 7: Ensure system files
+	t.Log("Step 7: Ensuring system files...")
+	err = systemMgr.EnsureSystemFiles(ctx)
+	require.NoError(t, err)
+
+	// Step 8: Create instance with GPU
+	t.Log("Step 8: Creating instance with GPU and custom CUDA image...")
+	createCtx, createCancel := context.WithTimeout(ctx, 120*time.Second)
+	defer createCancel()
+
+	inst, err := instanceMgr.CreateInstance(createCtx, instances.CreateInstanceRequest{
+		Name:        "gpu-inference-test",
+		Image:       imageName,
+		Size:        8 * 1024 * 1024 * 1024, // 8GB RAM for CUDA
+		HotplugSize: 8 * 1024 * 1024 * 1024,
+		OverlaySize: 10 * 1024 * 1024 * 1024,
+		Vcpus:       4,
+		Env: map[string]string{
+			"OLLAMA_HOST":   "0.0.0.0",
+			"OLLAMA_MODELS": "/data/models",
+		},
+		NetworkEnabled: true,
+		Devices:        []string{"inference-gpu"},
+		Volumes: []instances.VolumeAttachment{
+			{VolumeID: vol.Id, MountPath: "/data/models", Readonly: false},
+		},
+	})
+	require.NoError(t, err)
+	t.Logf("Instance created: %s", inst.Id)
+
+	t.Cleanup(func() {
+		t.Log("Cleanup: Deleting instance...")
+		instanceMgr.DeleteInstance(ctx, inst.Id)
+	})
+
+	// Step 9: Wait for instance
+	t.Log("Step 9: Waiting for instance to be ready...")
+	err = waitForInstanceReady(ctx, t, instanceMgr, inst.Id, 60*time.Second)
+	require.NoError(t, err)
+
+	actualInst, err := instanceMgr.GetInstance(ctx, inst.Id)
+	require.NoError(t, err)
+
+	// Step 10: Wait for Ollama server
+	t.Log("Step 10: Waiting for Ollama server to be ready...")
+	ollamaReady := false
+	for i := 0; i < 60; i++ { // 60 seconds for CUDA init
+		healthCtx, healthCancel := context.WithTimeout(ctx, 5*time.Second)
+		var healthStdout, healthStderr inferenceOutputBuffer
+
+		_, err = exec.ExecIntoInstance(healthCtx, actualInst.VsockSocket, exec.ExecOptions{
+			Command: []string{"/bin/sh", "-c", "ollama list 2>&1"},
+			Stdout:  &healthStdout,
+			Stderr:  &healthStderr,
+		})
+		healthCancel()
+
+		output := healthStdout.String()
+		if err == nil && !strings.Contains(output, "could not connect") {
+			t.Logf("Ollama is ready (attempt %d)", i+1)
+			ollamaReady = true
+			break
+		}
+		if i%10 == 0 {
+			t.Logf("Waiting for Ollama (attempt %d/60)...", i+1)
+		}
+		time.Sleep(time.Second)
+	}
+	require.True(t, ollamaReady, "Ollama server should become ready")
+
+	// Step 11: Check GPU detection
+	t.Log("Step 11: Checking GPU detection...")
+	gpuCheckCtx, gpuCheckCancel := context.WithTimeout(ctx, 10*time.Second)
+	defer gpuCheckCancel()
+
+	// Check nvidia-smi (should work now with CUDA image)
+	var nvidiaSmiStdout, nvidiaSmiStderr inferenceOutputBuffer
+	_, _ = exec.ExecIntoInstance(gpuCheckCtx, actualInst.VsockSocket, exec.ExecOptions{
+		Command: []string{"/bin/sh", "-c", "nvidia-smi 2>&1 || echo 'nvidia-smi failed'"},
+		Stdout:  &nvidiaSmiStdout,
+		Stderr:  &nvidiaSmiStderr,
+	})
+	nvidiaSmiOutput := nvidiaSmiStdout.String()
+	if strings.Contains(nvidiaSmiOutput, "NVIDIA-SMI") {
+		t.Logf("✓ nvidia-smi works! GPU detected:\n%s", truncateHead(nvidiaSmiOutput, 500))
+	} else {
+		t.Logf("nvidia-smi output: %s", nvidiaSmiOutput)
+	}
+
+	// Check NVIDIA kernel modules
+	var modulesStdout inferenceOutputBuffer
+	exec.ExecIntoInstance(gpuCheckCtx, actualInst.VsockSocket, exec.ExecOptions{
+		Command: []string{"/bin/sh", "-c", "cat /proc/modules | grep nvidia"},
+		Stdout:  &modulesStdout,
+	})
+	if modulesStdout.String() != "" {
+		t.Logf("✓ NVIDIA kernel modules loaded:\n%s", modulesStdout.String())
+	}
+
+	// Check device nodes
+	var devStdout inferenceOutputBuffer
+	exec.ExecIntoInstance(gpuCheckCtx, actualInst.VsockSocket, exec.ExecOptions{
+		Command: []string{"/bin/sh", "-c", "ls -la /dev/nvidia* 2>&1"},
+		Stdout:  &devStdout,
+	})
+	if !strings.Contains(devStdout.String(), "No such file") {
+		t.Logf("✓ NVIDIA device nodes:\n%s", devStdout.String())
+	}
+
+	// Step 12: Pull model via exec (needed for first time)
+	t.Log("Step 12: Ensuring TinyLlama model is available...")
+
+	var listStdout inferenceOutputBuffer
+	exec.ExecIntoInstance(gpuCheckCtx, actualInst.VsockSocket, exec.ExecOptions{
+		Command: []string{"/bin/sh", "-c", "ollama list 2>&1"},
+		Stdout:  &listStdout,
+	})
+
+	if !strings.Contains(listStdout.String(), "tinyllama") {
+		t.Log("Model not cached - pulling now...")
+		pullCtx, pullCancel := context.WithTimeout(ctx, 10*time.Minute)
+		defer pullCancel()
+
+		var pullStdout inferenceOutputBuffer
+		_, pullErr := exec.ExecIntoInstance(pullCtx, actualInst.VsockSocket, exec.ExecOptions{
+			Command: []string{"/bin/sh", "-c", "ollama pull tinyllama 2>&1"},
+			Stdout:  &pullStdout,
+		})
+		t.Logf("Pull output: %s", truncateTail(pullStdout.String(), 500))
+		require.NoError(t, pullErr, "ollama pull should succeed")
+	} else {
+		t.Log("Model already cached")
+	}
+
+	// Step 13: Test inference via HTTP API using the VM's private IP
+	// This is much faster than using `ollama run` CLI
+	t.Log("Step 13: Running inference via Ollama API...")
+	require.NotEmpty(t, actualInst.IP, "Instance should have a private IP")
+	ollamaURL := fmt.Sprintf("http://%s:11434/api/generate", actualInst.IP)
+	t.Logf("Calling Ollama API at %s", ollamaURL)
+
+	// Create the inference request
+	inferenceReq := map[string]interface{}{
+		"model":  "tinyllama",
+		"prompt": "Say hello in 3 words",
+		"stream": false,
+	}
+	reqBody, err := json.Marshal(inferenceReq)
+	require.NoError(t, err)
+
+	// Make the HTTP request with timeout
+	httpClient := &http.Client{Timeout: 2 * time.Minute}
+	start := time.Now()
+	resp, err := httpClient.Post(ollamaURL, "application/json", bytes.NewReader(reqBody))
+	elapsed := time.Since(start)
+
+	if err != nil {
+		// Log console for debugging
+		consoleLogPath := p.InstanceAppLog(inst.Id)
+		if consoleLog, readErr := os.ReadFile(consoleLogPath); readErr == nil {
+			t.Logf("=== VM Console Log ===\n%s\n=== End ===", truncateTail(string(consoleLog), 3000))
+		}
+	}
+	require.NoError(t, err, "HTTP request to Ollama should succeed")
+	defer resp.Body.Close()
+
+	require.Equal(t, http.StatusOK, resp.StatusCode, "Ollama should return 200")
+
+	// Parse response
+	body, err := io.ReadAll(resp.Body)
+	require.NoError(t, err)
+
+	var ollamaResp struct {
+		Response      string `json:"response"`
+		Done          bool   `json:"done"`
+		TotalDuration int64  `json:"total_duration"` // nanoseconds
+		EvalDuration  int64  `json:"eval_duration"`  // nanoseconds
+		EvalCount     int    `json:"eval_count"`     // tokens generated
+	}
+	err = json.Unmarshal(body, &ollamaResp)
+	require.NoError(t, err)
+
+	// Log results
+	t.Logf("Inference response: %s", ollamaResp.Response)
+	t.Logf("Total time: %v (API reported: %dms)", elapsed, ollamaResp.TotalDuration/1e6)
+	if ollamaResp.EvalCount > 0 && ollamaResp.EvalDuration > 0 {
+		tokensPerSec := float64(ollamaResp.EvalCount) / (float64(ollamaResp.EvalDuration) / 1e9)
+		t.Logf("Generation speed: %.1f tokens/sec (%d tokens in %dms)",
+			tokensPerSec, ollamaResp.EvalCount, ollamaResp.EvalDuration/1e6)
+	}
+
+	// Verify output
+	assert.True(t, ollamaResp.Done, "Inference should complete")
+	assert.NotEmpty(t, ollamaResp.Response, "Model should generate output")
+	assert.True(t, len(ollamaResp.Response) > 5, "Model output should be substantive")
+
+	// GPU inference should be fast (< 5 seconds for this small prompt)
+	assert.Less(t, elapsed, 30*time.Second, "GPU inference should be fast")
+
+	t.Log("✅ GPU inference test PASSED!")
+}
+
+// getDockerfilePath returns the path to the CUDA+Ollama Dockerfile
+func getDockerfilePath(t *testing.T) string {
+	_, thisFile, _, ok := runtime.Caller(0)
+	require.True(t, ok, "Could not get current file path")
+	return filepath.Join(filepath.Dir(thisFile), "testdata", "ollama-cuda", "Dockerfile")
+}
+
+// buildCustomCudaImage builds the custom CUDA+Ollama Docker image
+func buildCustomCudaImage(t *testing.T, dockerfilePath, imageName string) {
+	t.Helper()
+
+	// Check if image already exists
+	checkCmd := osExec.Command("docker", "image", "inspect", imageName)
+	if checkCmd.Run() == nil {
+		t.Logf("Docker image %s already exists, skipping build", imageName)
+		return
+	}
+
+	t.Logf("Building Docker image %s (this may take several minutes)...", imageName)
+	dockerfileDir := filepath.Dir(dockerfilePath)
+
+	cmd := osExec.Command("docker", "build", "-t", imageName, dockerfileDir)
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+
+	err := cmd.Run()
+	require.NoError(t, err, "Docker build should succeed")
+	t.Logf("Docker image %s built successfully", imageName)
+}
+
+// pushLocalDockerImage loads an image from local Docker and pushes to hypeman's test registry
+func pushLocalDockerImage(t *testing.T, dockerImage, serverHost string) {
+	t.Helper()
+
+	t.Log("Loading image from Docker daemon...")
+	srcRef, err := name.ParseReference(dockerImage)
+	require.NoError(t, err, "Parse source image reference")
+
+	img, err := daemon.Image(srcRef)
+	require.NoError(t, err, "Load image from Docker daemon")
+
+	// Check image size for progress context
+	layers, _ := img.Layers()
+	var totalSize int64
+	for _, layer := range layers {
+		if size, err := layer.Size(); err == nil {
+			totalSize += size
+		}
+	}
+	t.Logf("Image has %d layers, ~%.1f GB total", len(layers), float64(totalSize)/1e9)
+
+	// Push to test registry with a tag (not just digest) so ListImages can find it
+	targetRef := fmt.Sprintf("%s/test/ollama-cuda:latest", serverHost)
+	t.Logf("Pushing to %s", targetRef)
+
+	dstRef, err := name.ParseReference(targetRef, name.Insecure)
+	require.NoError(t, err, "Parse target reference")
+
+	err = remote.Write(dstRef, img)
+	require.NoError(t, err, "Push to registry")
+}
+
+// inferenceOutputBuffer is a simple buffer for capturing command output
+type inferenceOutputBuffer struct {
+	buf bytes.Buffer
+}
+
+func (b *inferenceOutputBuffer) Write(p []byte) (n int, err error) {
+	return b.buf.Write(p)
+}
+
+func (b *inferenceOutputBuffer) String() string {
+	return b.buf.String()
+}
+
+// truncateTail returns the last n characters of s
+func truncateTail(s string, n int) string {
+	if len(s) <= n {
+		return s
+	}
+	return "..." + s[len(s)-n:]
+}
+
+// truncateHead returns the first n characters of s
+func truncateHead(s string, n int) string {
+	if len(s) <= n {
+		return s
+	}
+	return s[:n] + "..."
+}
diff --git a/lib/devices/gpu_module_test.go b/lib/devices/gpu_module_test.go
new file mode 100644
index 0000000..841faed
--- /dev/null
+++ b/lib/devices/gpu_module_test.go
@@ -0,0 +1,505 @@
+package devices_test
+
+import (
+	"context"
+	"fmt"
+	"log"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	osexec "os/exec"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/google/go-containerregistry/pkg/name"
+	"github.com/google/go-containerregistry/pkg/v1/daemon"
+	"github.com/google/go-containerregistry/pkg/v1/remote"
+	"github.com/onkernel/hypeman/cmd/api/config"
+	"github.com/onkernel/hypeman/lib/devices"
+	"github.com/onkernel/hypeman/lib/exec"
+	"github.com/onkernel/hypeman/lib/images"
+	"github.com/onkernel/hypeman/lib/instances"
+	"github.com/onkernel/hypeman/lib/network"
+	"github.com/onkernel/hypeman/lib/paths"
+	"github.com/onkernel/hypeman/lib/registry"
+	"github.com/onkernel/hypeman/lib/system"
+	"github.com/onkernel/hypeman/lib/volumes"
+	"github.com/stretchr/testify/require"
+)
+
+// TestNVIDIAModuleLoading verifies that NVIDIA kernel modules load correctly in the VM.
+//
+// This is a simpler test than TestGPUInference that just verifies:
+//  1. NVIDIA kernel modules (nvidia.ko, nvidia-uvm.ko, etc.) load during init
+//  2. GSP firmware is found and loaded
+//  3. /dev/nvidia* device nodes are created
+//
+// Prerequisites:
+//   - NVIDIA GPU on host
+//   - IOMMU enabled
+//   - VFIO modules loaded (modprobe vfio_pci)
+//   - Running as root
+//
+// To run manually:
+//
+//	sudo env PATH=$PATH:/sbin:/usr/sbin go test -v -run TestNVIDIAModuleLoading -timeout 5m ./lib/devices/...
+func TestNVIDIAModuleLoading(t *testing.T) {
+	ctx := context.Background()
+
+	// Auto-detect GPU availability - skip if prerequisites not met
+	skipReason := checkGPUTestPrerequisites()
+	if skipReason != "" {
+		t.Skip(skipReason)
+	}
+
+	groups, _ := os.ReadDir("/sys/kernel/iommu_groups")
+	t.Logf("Test prerequisites met: %d IOMMU groups found", len(groups))
+
+	// Setup test infrastructure
+	tmpDir := t.TempDir()
+	p := paths.New(tmpDir)
+
+	cfg := &config.Config{
+		DataDir:    tmpDir,
+		BridgeName: "vmbr0",
+		SubnetCIDR: "10.100.0.0/16",
+		DNSServer:  "1.1.1.1",
+	}
+
+	// Initialize managers
+	imageMgr, err := images.NewManager(p, 1, nil)
+	require.NoError(t, err)
+
+	systemMgr := system.NewManager(p)
+	networkMgr := network.NewManager(p, cfg, nil)
+	deviceMgr := devices.NewManager(p)
+	volumeMgr := volumes.NewManager(p, 10*1024*1024*1024, nil)
+	limits := instances.ResourceLimits{MaxOverlaySize: 10 * 1024 * 1024 * 1024}
+	instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, nil, nil)
+
+	// Step 1: Find an NVIDIA GPU
+	t.Log("Step 1: Discovering available GPUs...")
+	availableDevices, err := deviceMgr.ListAvailableDevices(ctx)
+	require.NoError(t, err)
+
+	var targetGPU *devices.AvailableDevice
+	for _, d := range availableDevices {
+		if strings.Contains(strings.ToLower(d.VendorName), "nvidia") {
+			targetGPU = &d
+			break
+		}
+	}
+	require.NotNil(t, targetGPU, "No NVIDIA GPU found")
+
+	driverStr := "none"
+	if targetGPU.CurrentDriver != nil {
+		driverStr = *targetGPU.CurrentDriver
+	}
+	t.Logf("Found NVIDIA GPU: %s at %s (driver: %s)", targetGPU.DeviceName, targetGPU.PCIAddress, driverStr)
+
+	if targetGPU.CurrentDriver == nil || *targetGPU.CurrentDriver == "" {
+		t.Skip("GPU has no driver bound - may need reboot")
+	}
+
+	driverPath := filepath.Join("/sys/bus/pci/devices", targetGPU.PCIAddress, "driver")
+	if _, err := os.Stat(driverPath); os.IsNotExist(err) {
+		t.Skipf("GPU driver symlink missing - GPU in broken state")
+	}
+
+	// Step 2: Register the GPU
+	t.Log("Step 2: Registering GPU...")
+	device, err := deviceMgr.CreateDevice(ctx, devices.CreateDeviceRequest{
+		Name:       "module-test-gpu",
+		PCIAddress: targetGPU.PCIAddress,
+	})
+	require.NoError(t, err)
+	t.Logf("Registered device: %s (ID: %s)", device.Name, device.Id)
+
+	originalDriver := driverStr
+	t.Cleanup(func() {
+		t.Log("Cleanup: Deleting registered device...")
+		deviceMgr.DeleteDevice(ctx, device.Id)
+		if originalDriver != "" && originalDriver != "none" && originalDriver != "vfio-pci" {
+			probePath := "/sys/bus/pci/drivers_probe"
+			os.WriteFile(probePath, []byte(targetGPU.PCIAddress), 0200)
+		}
+	})
+
+	// Step 3: Ensure system files
+	t.Log("Step 3: Ensuring system files...")
+	require.NoError(t, systemMgr.EnsureSystemFiles(ctx))
+
+	// Step 4: Pull nginx:alpine (stays running unlike plain alpine)
+	t.Log("Step 4: Pulling nginx:alpine image...")
+	createdImg, err := imageMgr.CreateImage(ctx, images.CreateImageRequest{
+		Name: "docker.io/library/nginx:alpine",
+	})
+	require.NoError(t, err)
+	t.Logf("CreateImage returned: name=%s, status=%s", createdImg.Name, createdImg.Status)
+
+	// Wait for image to be ready
+	var img *images.Image
+	for i := 0; i < 90; i++ {
+		img, _ = imageMgr.GetImage(ctx, createdImg.Name)
+		if img != nil && img.Status == images.StatusReady {
+			break
+		}
+		time.Sleep(time.Second)
+	}
+	require.NotNil(t, img, "Image should exist")
+	require.Equal(t, images.StatusReady, img.Status, "Image should be ready")
+	t.Log("Image ready")
+
+	// Step 5: Create instance with GPU
+	t.Log("Step 5: Creating instance with GPU...")
+
+	// Initialize network first
+	require.NoError(t, networkMgr.Initialize(ctx, []string{}))
+
+	createCtx, createCancel := context.WithTimeout(ctx, 60*time.Second)
+	defer createCancel()
+
+	inst, err := instanceMgr.CreateInstance(createCtx, instances.CreateInstanceRequest{
+		Name:           "nvidia-module-test",
+		Image:          createdImg.Name,
+		Size:           2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs)
+		HotplugSize:    512 * 1024 * 1024,
+		OverlaySize:    10 * 1024 * 1024 * 1024,
+		Vcpus:          2,
+		NetworkEnabled: false,
+		Devices:        []string{"module-test-gpu"},
+		Env:            map[string]string{},
+	})
+	require.NoError(t, err)
+	t.Logf("Instance created: %s", inst.Id)
+
+	t.Cleanup(func() {
+		t.Log("Cleanup: Deleting instance...")
+		instanceMgr.DeleteInstance(ctx, inst.Id)
+	})
+
+	// Wait for instance to be running
+	err = waitForInstanceReady(ctx, t, instanceMgr, inst.Id, 30*time.Second)
+	require.NoError(t, err)
+	t.Log("Instance is ready")
+
+	// Wait for init script to complete (module loading happens early in boot)
+	time.Sleep(5 * time.Second)
+
+	// Step 6: Check module loading via dmesg
+	t.Log("Step 6: Checking NVIDIA module loading in VM...")
+
+	actualInst, err := instanceMgr.GetInstance(ctx, inst.Id)
+	require.NoError(t, err)
+
+	execCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
+	defer cancel()
+
+	// Check dmesg for NVIDIA messages
+	var stdout, stderr outputBuffer
+	dmesgCmd := "dmesg | grep -i nvidia | head -50"
+
+	for i := 0; i < 10; i++ {
+		stdout = outputBuffer{}
+		stderr = outputBuffer{}
+		_, err = exec.ExecIntoInstance(execCtx, actualInst.VsockSocket, exec.ExecOptions{
+			Command: []string{"/bin/sh", "-c", dmesgCmd},
+			Stdin:   nil,
+			Stdout:  &stdout,
+			Stderr:  &stderr,
+		})
+		if err == nil {
+			break
+		}
+		time.Sleep(time.Second)
+	}
+	require.NoError(t, err, "dmesg command should succeed")
+
+	dmesgOutput := stdout.String()
+	t.Logf("=== NVIDIA dmesg output ===\n%s", dmesgOutput)
+
+	// Check for key error indicators
+	firmwareMissing := strings.Contains(dmesgOutput, "No firmware image found")
+	initFailed := strings.Contains(dmesgOutput, "RmInitAdapter failed")
+
+	if firmwareMissing {
+		t.Errorf("✗ GSP firmware not found - firmware not included in initrd")
+	}
+	if initFailed {
+		t.Errorf("✗ NVIDIA driver RmInitAdapter failed - GPU initialization error")
+	}
+
+	// Check lsmod for nvidia modules
+	stdout = outputBuffer{}
+	stderr = outputBuffer{}
+	_, err = exec.ExecIntoInstance(execCtx, actualInst.VsockSocket, exec.ExecOptions{
+		Command: []string{"/bin/sh", "-c", "cat /proc/modules | grep nvidia || echo 'No nvidia modules loaded'"},
+		Stdin:   nil,
+		Stdout:  &stdout,
+		Stderr:  &stderr,
+	})
+	require.NoError(t, err)
+	modulesOutput := stdout.String()
+	t.Logf("=== Loaded nvidia modules ===\n%s", modulesOutput)
+
+	hasModules := !strings.Contains(modulesOutput, "No nvidia modules loaded")
+	if !hasModules {
+		t.Errorf("✗ NVIDIA modules not loaded in VM")
+	} else {
+		t.Log("✓ NVIDIA kernel modules are loaded")
+	}
+
+	// Check for /dev/nvidia* devices
+	stdout = outputBuffer{}
+	stderr = outputBuffer{}
+	_, err = exec.ExecIntoInstance(execCtx, actualInst.VsockSocket, exec.ExecOptions{
+		Command: []string{"/bin/sh", "-c", "ls -la /dev/nvidia* 2>&1 || echo 'No nvidia devices found'"},
+		Stdin:   nil,
+		Stdout:  &stdout,
+		Stderr:  &stderr,
+	})
+	require.NoError(t, err)
+	devicesOutput := stdout.String()
+	t.Logf("=== NVIDIA device nodes ===\n%s", devicesOutput)
+
+	hasDevices := !strings.Contains(devicesOutput, "No nvidia devices found") && !strings.Contains(devicesOutput, "No such file")
+	if hasDevices {
+		t.Log("✓ /dev/nvidia* device nodes exist")
+	} else {
+		t.Log("✗ /dev/nvidia* device nodes not found (expected if init failed)")
+	}
+
+	// Final verdict
+	if !firmwareMissing && !initFailed && hasModules {
+		t.Log("\n=== SUCCESS: NVIDIA kernel modules loaded correctly ===")
+	} else {
+		t.Errorf("\n=== FAILURE: NVIDIA module loading has issues ===")
+	}
+}
+
+// TestNVMLDetection tests if NVML can detect the GPU from userspace.
+// This uses the custom CUDA+Ollama image and runs a Python NVML test.
+//
+// To run manually:
+//
+//	sudo env PATH=$PATH:/sbin:/usr/sbin go test -v -run TestNVMLDetection -timeout 10m ./lib/devices/...
+func TestNVMLDetection(t *testing.T) {
+	ctx := context.Background()
+
+	skipReason := checkGPUTestPrerequisites()
+	if skipReason != "" {
+		t.Skip(skipReason)
+	}
+
+	groups, _ := os.ReadDir("/sys/kernel/iommu_groups")
+	t.Logf("Test prerequisites met: %d IOMMU groups found", len(groups))
+
+	// Use persistent test directory for image caching
+	const persistentTestDataDir = "/var/lib/hypeman-gpu-inference-test"
+	if err := os.MkdirAll(persistentTestDataDir, 0755); err != nil {
+		t.Fatalf("Failed to create persistent test dir: %v", err)
+	}
+
+	p := paths.New(persistentTestDataDir)
+	cfg := &config.Config{
+		DataDir:    persistentTestDataDir,
+		BridgeName: "vmbr0",
+		SubnetCIDR: "10.100.0.0/16",
+		DNSServer:  "1.1.1.1",
+	}
+
+	imageMgr, err := images.NewManager(p, 1, nil)
+	require.NoError(t, err)
+
+	systemMgr := system.NewManager(p)
+	networkMgr := network.NewManager(p, cfg, nil)
+	deviceMgr := devices.NewManager(p)
+	volumeMgr := volumes.NewManager(p, 10*1024*1024*1024, nil)
+	limits := instances.ResourceLimits{MaxOverlaySize: 10 * 1024 * 1024 * 1024}
+	instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, nil, nil)
+
+	// Step 1: Check if ollama-cuda:test image exists in Docker
+	t.Log("Step 1: Checking for ollama-cuda:test Docker image...")
+	checkCmd := osexec.Command("docker", "image", "inspect", "ollama-cuda:test")
+	if err := checkCmd.Run(); err != nil {
+		t.Fatal("Docker image ollama-cuda:test not found. Build it first with:\n" +
+			"  cd lib/devices/testdata/ollama-cuda && docker build -t ollama-cuda:test .")
+	}
+	t.Log("Docker image ollama-cuda:test exists")
+
+	// Step 2: Start registry and push image
+	t.Log("Step 2: Starting registry and pushing image...")
+	reg, err := registry.New(p, imageMgr)
+	require.NoError(t, err)
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		log.Printf("%s %s", r.Method, r.URL.Path)
+		reg.Handler().ServeHTTP(w, r)
+	}))
+	defer server.Close()
+
+	serverHost := strings.TrimPrefix(server.URL, "http://")
+	pushLocalDockerImageForTest(t, "ollama-cuda:test", serverHost)
+	t.Log("Push complete")
+
+	// Wait for image conversion
+	t.Log("Waiting for image conversion...")
+	var img *images.Image
+	var imageName string
+	for i := 0; i < 180; i++ { // 3 minutes max
+		allImages, listErr := imageMgr.ListImages(ctx)
+		if listErr == nil {
+			for _, candidate := range allImages {
+				if strings.Contains(candidate.Name, "ollama-cuda") {
+					img = &candidate
+					imageName = candidate.Name
+					break
+				}
+			}
+		}
+		if img != nil && img.Status == images.StatusReady {
+			break
+		}
+		if i%30 == 0 {
+			status := "not found"
+			if img != nil {
+				status = string(img.Status)
+			}
+			t.Logf("Waiting for image... (%d/180, status=%s)", i+1, status)
+		}
+		time.Sleep(time.Second)
+	}
+	require.NotNil(t, img, "Image should exist after 3 minutes")
+	require.Equal(t, images.StatusReady, img.Status, "Image should be ready")
+	t.Logf("Image ready: %s", imageName)
+
+	// Step 3: Find and register GPU
+	t.Log("Step 3: Discovering GPUs...")
+	availableDevices, err := deviceMgr.ListAvailableDevices(ctx)
+	require.NoError(t, err)
+
+	var targetGPU *devices.AvailableDevice
+	for _, d := range availableDevices {
+		if strings.Contains(strings.ToLower(d.VendorName), "nvidia") {
+			targetGPU = &d
+			break
+		}
+	}
+	require.NotNil(t, targetGPU, "No NVIDIA GPU found")
+	t.Logf("Found GPU: %s at %s", targetGPU.DeviceName, targetGPU.PCIAddress)
+
+	device, err := deviceMgr.CreateDevice(ctx, devices.CreateDeviceRequest{
+		Name:       "nvml-test-gpu",
+		PCIAddress: targetGPU.PCIAddress,
+	})
+	require.NoError(t, err)
+	t.Cleanup(func() {
+		deviceMgr.DeleteDevice(ctx, device.Id)
+	})
+
+	// Step 4: Initialize network and system
+	require.NoError(t, networkMgr.Initialize(ctx, []string{}))
+	require.NoError(t, systemMgr.EnsureSystemFiles(ctx))
+
+	// Step 5: Create instance
+	t.Log("Step 4: Creating instance with CUDA image...")
+	inst, err := instanceMgr.CreateInstance(ctx, instances.CreateInstanceRequest{
+		Name:           "nvml-test",
+		Image:          imageName,
+		Size:           2 * 1024 * 1024 * 1024,
+		HotplugSize:    512 * 1024 * 1024,
+		OverlaySize:    10 * 1024 * 1024 * 1024,
+		Vcpus:          2,
+		NetworkEnabled: true,
+		Devices:        []string{"nvml-test-gpu"},
+		Env:            map[string]string{},
+	})
+	require.NoError(t, err)
+	t.Logf("Instance created: %s", inst.Id)
+
+	t.Cleanup(func() {
+		t.Log("Cleanup: Deleting instance...")
+		instanceMgr.DeleteInstance(ctx, inst.Id)
+	})
+
+	err = waitForInstanceReady(ctx, t, instanceMgr, inst.Id, 60*time.Second)
+	require.NoError(t, err)
+	time.Sleep(5 * time.Second)
+
+	actualInst, err := instanceMgr.GetInstance(ctx, inst.Id)
+	require.NoError(t, err)
+
+	// Step 5: Run NVML test
+	t.Log("Step 5: Running NVML detection test...")
+	execCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
+	defer cancel()
+
+	var stdout, stderr outputBuffer
+	_, err = exec.ExecIntoInstance(execCtx, actualInst.VsockSocket, exec.ExecOptions{
+		Command: []string{"/bin/sh", "-c", "python3 /usr/local/bin/test-nvml.py 2>&1"},
+		Stdin:   nil,
+		Stdout:  &stdout,
+		Stderr:  &stderr,
+	})
+
+	t.Logf("NVML test output:\n%s", stdout.String())
+	if stderr.String() != "" {
+		t.Logf("NVML test stderr:\n%s", stderr.String())
+	}
+
+	require.NoError(t, err, "NVML test command should succeed")
+
+	output := stdout.String()
+	if strings.Contains(output, "GPU DETECTED") {
+		t.Log("✓ SUCCESS: NVML detected the GPU!")
+	} else if strings.Contains(output, "NVML_ERROR_LIB_RM_VERSION_MISMATCH") {
+		t.Log("✗ NVML version mismatch - container NVML library doesn't match kernel driver version")
+		t.Log("  Container has: 570.195.03")
+		t.Log("  Kernel driver: 570.86.16")
+		t.FailNow()
+	} else if strings.Contains(output, "NVML_ERROR_DRIVER_NOT_LOADED") {
+		t.Log("✗ NVML reports driver not loaded (but kernel modules are loaded)")
+		t.FailNow()
+	} else {
+		t.Errorf("✗ NVML test failed: %s", output)
+	}
+
+	// Step 6: Run CUDA test
+	t.Log("Step 6: Running CUDA driver test...")
+	stdout = outputBuffer{}
+	stderr = outputBuffer{}
+	_, err = exec.ExecIntoInstance(execCtx, actualInst.VsockSocket, exec.ExecOptions{
+		Command: []string{"/bin/sh", "-c", "python3 /usr/local/bin/test-cuda.py 2>&1"},
+		Stdin:   nil,
+		Stdout:  &stdout,
+		Stderr:  &stderr,
+	})
+
+	t.Logf("CUDA test output:\n%s", stdout.String())
+	if strings.Contains(stdout.String(), "CUDA WORKS") {
+		t.Log("✓ SUCCESS: CUDA driver works!")
+	} else {
+		t.Logf("CUDA test may have issues: %s", stdout.String())
+	}
+}
+
+// pushLocalDockerImageForTest is a test helper that pushes a local Docker image to the registry
+func pushLocalDockerImageForTest(t *testing.T, dockerImage, serverHost string) {
+	t.Helper()
+
+	srcRef, err := name.ParseReference(dockerImage)
+	require.NoError(t, err)
+
+	img, err := daemon.Image(srcRef)
+	require.NoError(t, err)
+
+	targetRef := fmt.Sprintf("%s/test/ollama-cuda:latest", serverHost)
+	t.Logf("Pushing to %s", targetRef)
+
+	dstRef, err := name.ParseReference(targetRef, name.Insecure)
+	require.NoError(t, err)
+
+	err = remote.Write(dstRef, img)
+	require.NoError(t, err)
+}
diff --git a/lib/devices/manager.go b/lib/devices/manager.go
new file mode 100644
index 0000000..cc004cd
--- /dev/null
+++ b/lib/devices/manager.go
@@ -0,0 +1,784 @@
+package devices
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"os"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/nrednav/cuid2"
+	"github.com/onkernel/hypeman/lib/logger"
+	"github.com/onkernel/hypeman/lib/paths"
+)
+
+// InstanceLivenessChecker provides a way to check if an instance is running.
+// This interface allows devices to query instance state without a circular dependency.
+type InstanceLivenessChecker interface {
+	// IsInstanceRunning returns true if the instance exists and is in a running state
+	// (i.e., has an active VMM process). Returns false if the instance doesn't exist
+	// or is stopped/standby/unknown.
+	IsInstanceRunning(ctx context.Context, instanceID string) bool
+
+	// GetInstanceDevices returns the list of device IDs attached to an instance.
+	// Returns nil if the instance doesn't exist.
+	GetInstanceDevices(ctx context.Context, instanceID string) []string
+
+	// ListAllInstanceDevices returns a map of instanceID -> []deviceIDs for all instances.
+	ListAllInstanceDevices(ctx context.Context) map[string][]string
+
+	// DetectSuspiciousVMMProcesses finds cloud-hypervisor processes that don't match
+	// known instances and logs warnings. Returns the count of suspicious processes found.
+	DetectSuspiciousVMMProcesses(ctx context.Context) int
+}
+
+// Manager provides device management operations
+type Manager interface {
+	// ListDevices returns all registered devices
+	ListDevices(ctx context.Context) ([]Device, error)
+
+	// ListAvailableDevices discovers passthrough-capable devices on the host
+	ListAvailableDevices(ctx context.Context) ([]AvailableDevice, error)
+
+	// CreateDevice registers a new device for passthrough
+	CreateDevice(ctx context.Context, req CreateDeviceRequest) (*Device, error)
+
+	// GetDevice returns a device by ID or name
+	GetDevice(ctx context.Context, idOrName string) (*Device, error)
+
+	// DeleteDevice unregisters a device
+	DeleteDevice(ctx context.Context, id string) error
+
+	// BindToVFIO binds a device to vfio-pci driver
+	BindToVFIO(ctx context.Context, id string) error
+
+	// UnbindFromVFIO unbinds a device from vfio-pci driver
+	UnbindFromVFIO(ctx context.Context, id string) error
+
+	// MarkAttached marks a device as attached to an instance
+	MarkAttached(ctx context.Context, deviceID, instanceID string) error
+
+	// MarkDetached marks a device as detached from an instance
+	MarkDetached(ctx context.Context, deviceID string) error
+
+	// ReconcileDevices cleans up stale device state on startup.
+	// It detects devices with AttachedTo referencing non-existent instances
+	// and clears the orphaned attachment state.
+	ReconcileDevices(ctx context.Context) error
+
+	// SetLivenessChecker sets the instance liveness checker after construction.
+	// This allows breaking the circular dependency between device and instance managers.
+	SetLivenessChecker(checker InstanceLivenessChecker)
+}
+
+type manager struct {
+	paths           *paths.Paths
+	vfioBinder      *VFIOBinder
+	livenessChecker InstanceLivenessChecker
+	mu              sync.RWMutex
+}
+
+// NewManager creates a new device manager.
+// Use SetLivenessChecker after construction to enable accurate orphan detection.
+func NewManager(p *paths.Paths) Manager {
+	return &manager{
+		paths:      p,
+		vfioBinder: NewVFIOBinder(),
+	}
+}
+
+// SetLivenessChecker sets the instance liveness checker.
+// This enables accurate orphan detection during reconciliation.
+// If not set, orphan detection falls back to checking if the instance directory exists.
+func (m *manager) SetLivenessChecker(checker InstanceLivenessChecker) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	m.livenessChecker = checker
+}
+
+func (m *manager) ListDevices(ctx context.Context) ([]Device, error) {
+	// RLock protects against concurrent directory modifications (CreateDevice/DeleteDevice)
+	// during iteration. While individual file reads are atomic, directory iteration could
+	// see inconsistent state if a device is being created or deleted concurrently.
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+
+	entries, err := os.ReadDir(m.paths.DevicesDir())
+	if err != nil {
+		if os.IsNotExist(err) {
+			return []Device{}, nil
+		}
+		return nil, fmt.Errorf("read devices dir: %w", err)
+	}
+
+	var devices []Device
+	for _, entry := range entries {
+		if !entry.IsDir() {
+			continue
+		}
+
+		device, err := m.loadDevice(entry.Name())
+		if err != nil {
+			continue
+		}
+
+		// Update VFIO binding status from system state
+		device.BoundToVFIO = m.vfioBinder.IsDeviceBoundToVFIO(device.PCIAddress)
+
+		devices = append(devices, *device)
+	}
+
+	return devices, nil
+}
+
+func (m *manager) ListAvailableDevices(ctx context.Context) ([]AvailableDevice, error) {
+	return DiscoverAvailableDevices()
+}
+
+func (m *manager) CreateDevice(ctx context.Context, req CreateDeviceRequest) (*Device, error) {
+	log := logger.FromContext(ctx)
+
+	// Validate PCI address format (required)
+	if !ValidatePCIAddress(req.PCIAddress) {
+		return nil, ErrInvalidPCIAddress
+	}
+
+	// Get device info from sysfs
+	deviceInfo, err := GetDeviceInfo(req.PCIAddress)
+	if err != nil {
+		return nil, fmt.Errorf("get device info: %w", err)
+	}
+
+	// Generate ID
+	id := cuid2.Generate()
+
+	// Handle optional name: if not provided, generate one from PCI address
+	name := req.Name
+	if name == "" {
+		// Generate name from PCI address: 0000:a2:00.0 -> pci-0000-a2-00-0
+		name = "pci-" + strings.ReplaceAll(strings.ReplaceAll(req.PCIAddress, ":", "-"), ".", "-")
+	}
+
+	// Validate name format
+	if !ValidateDeviceName(name) {
+		return nil, ErrInvalidName
+	}
+
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	// Check if name already exists
+	if _, err := m.findByName(name); err == nil {
+		return nil, ErrNameExists
+	}
+
+	// Check if PCI address already registered
+	if _, err := m.findByPCIAddress(req.PCIAddress); err == nil {
+		return nil, ErrAlreadyExists
+	}
+
+	// Create device
+	device := &Device{
+		Id:          id,
+		Name:        name,
+		Type:        DetermineDeviceType(deviceInfo),
+		PCIAddress:  req.PCIAddress,
+		VendorID:    deviceInfo.VendorID,
+		DeviceID:    deviceInfo.DeviceID,
+		IOMMUGroup:  deviceInfo.IOMMUGroup,
+		BoundToVFIO: m.vfioBinder.IsDeviceBoundToVFIO(req.PCIAddress),
+		AttachedTo:  nil,
+		CreatedAt:   time.Now(),
+	}
+
+	// Ensure directories exist
+	if err := os.MkdirAll(m.paths.DeviceDir(id), 0755); err != nil {
+		return nil, fmt.Errorf("create device dir: %w", err)
+	}
+
+	// Save device metadata
+	if err := m.saveDevice(device); err != nil {
+		os.RemoveAll(m.paths.DeviceDir(id))
+		return nil, fmt.Errorf("save device: %w", err)
+	}
+
+	log.InfoContext(ctx, "registered device",
+		"id", id,
+		"name", name,
+		"pci_address", req.PCIAddress,
+		"type", device.Type,
+	)
+
+	return device, nil
+}
+
+func (m *manager) GetDevice(ctx context.Context, idOrName string) (*Device, error) {
+	// RLock protects against concurrent modifications while looking up by name,
+	// which requires iterating the devices directory.
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+
+	// Try by ID first
+	device, err := m.loadDevice(idOrName)
+	if err == nil {
+		device.BoundToVFIO = m.vfioBinder.IsDeviceBoundToVFIO(device.PCIAddress)
+		return device, nil
+	}
+
+	// Try by name
+	device, err = m.findByName(idOrName)
+	if err == nil {
+		device.BoundToVFIO = m.vfioBinder.IsDeviceBoundToVFIO(device.PCIAddress)
+		return device, nil
+	}
+
+	return nil, ErrNotFound
+}
+
+func (m *manager) DeleteDevice(ctx context.Context, id string) error {
+	log := logger.FromContext(ctx)
+
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	device, err := m.loadDevice(id)
+	if err != nil {
+		// Try by name
+		device, err = m.findByName(id)
+		if err != nil {
+			return ErrNotFound
+		}
+		id = device.Id
+	}
+
+	// Check if device is attached
+	if device.AttachedTo != nil {
+		return ErrInUse
+	}
+
+	// Remove device directory
+	if err := os.RemoveAll(m.paths.DeviceDir(id)); err != nil {
+		return fmt.Errorf("remove device dir: %w", err)
+	}
+
+	log.InfoContext(ctx, "unregistered device",
+		"id", id,
+		"name", device.Name,
+		"pci_address", device.PCIAddress,
+	)
+
+	return nil
+}
+
+func (m *manager) BindToVFIO(ctx context.Context, id string) error {
+	log := logger.FromContext(ctx)
+
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	device, err := m.loadDevice(id)
+	if err != nil {
+		// Try by name
+		device, err = m.findByName(id)
+		if err != nil {
+			return ErrNotFound
+		}
+	}
+
+	// Check IOMMU group safety
+	if err := m.vfioBinder.CheckIOMMUGroupSafe(device.PCIAddress, []string{device.PCIAddress}); err != nil {
+		return err
+	}
+
+	// Bind to VFIO
+	if err := m.vfioBinder.BindToVFIO(device.PCIAddress); err != nil {
+		return err
+	}
+
+	// Update device state
+	device.BoundToVFIO = true
+	if err := m.saveDevice(device); err != nil {
+		return fmt.Errorf("save device: %w", err)
+	}
+
+	log.InfoContext(ctx, "bound device to VFIO",
+		"id", device.Id,
+		"name", device.Name,
+		"pci_address", device.PCIAddress,
+	)
+
+	return nil
+}
+
+func (m *manager) UnbindFromVFIO(ctx context.Context, id string) error {
+	log := logger.FromContext(ctx)
+
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	device, err := m.loadDevice(id)
+	if err != nil {
+		// Try by name
+		device, err = m.findByName(id)
+		if err != nil {
+			return ErrNotFound
+		}
+	}
+
+	// Check if device is attached
+	if device.AttachedTo != nil {
+		return ErrInUse
+	}
+
+	// Unbind from VFIO
+	if err := m.vfioBinder.UnbindFromVFIO(device.PCIAddress); err != nil {
+		return err
+	}
+
+	// Update device state
+	device.BoundToVFIO = false
+	if err := m.saveDevice(device); err != nil {
+		return fmt.Errorf("save device: %w", err)
+	}
+
+	log.InfoContext(ctx, "unbound device from VFIO",
+		"id", device.Id,
+		"name", device.Name,
+		"pci_address", device.PCIAddress,
+	)
+
+	return nil
+}
+
+func (m *manager) MarkAttached(ctx context.Context, deviceID, instanceID string) error {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	device, err := m.loadDevice(deviceID)
+	if err != nil {
+		device, err = m.findByName(deviceID)
+		if err != nil {
+			return ErrNotFound
+		}
+	}
+
+	if device.AttachedTo != nil {
+		return ErrInUse
+	}
+
+	device.AttachedTo = &instanceID
+	return m.saveDevice(device)
+}
+
+func (m *manager) MarkDetached(ctx context.Context, deviceID string) error {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	device, err := m.loadDevice(deviceID)
+	if err != nil {
+		device, err = m.findByName(deviceID)
+		if err != nil {
+			return ErrNotFound
+		}
+	}
+
+	device.AttachedTo = nil
+	return m.saveDevice(device)
+}
+
+// ReconcileDevices cleans up stale device state on startup.
+// It performs safe-by-default reconciliation:
+// 1. Detects orphaned device attachments (instance missing or not running)
+// 2. Clears orphaned AttachedTo metadata
+// 3. Runs GPU-reset-lite for orphaned devices (unbind VFIO, clear override, probe driver)
+// 4. Logs mismatches between instance→device and device→instance references
+// 5. Detects suspicious cloud-hypervisor processes
+func (m *manager) ReconcileDevices(ctx context.Context) error {
+	log := logger.FromContext(ctx)
+	log.InfoContext(ctx, "reconciling device state")
+
+	// Validate GPU prerequisites and log warnings
+	m.validatePrerequisites(ctx)
+
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	entries, err := os.ReadDir(m.paths.DevicesDir())
+	if err != nil {
+		if os.IsNotExist(err) {
+			// No devices directory yet, nothing to reconcile
+			return nil
+		}
+		return fmt.Errorf("read devices dir: %w", err)
+	}
+
+	// Load all devices
+	var allDevices []*Device
+	deviceByID := make(map[string]*Device)
+	for _, entry := range entries {
+		if !entry.IsDir() {
+			continue
+		}
+
+		device, err := m.loadDevice(entry.Name())
+		if err != nil {
+			log.WarnContext(ctx, "failed to load device during reconciliation",
+				"device_id", entry.Name(),
+				"error", err,
+			)
+			continue
+		}
+		// Update VFIO binding status from system state
+		device.BoundToVFIO = m.vfioBinder.IsDeviceBoundToVFIO(device.PCIAddress)
+		allDevices = append(allDevices, device)
+		deviceByID[device.Id] = device
+	}
+
+	// Build instance→device map if we have a liveness checker
+	var instanceDevices map[string][]string
+	if m.livenessChecker != nil {
+		instanceDevices = m.livenessChecker.ListAllInstanceDevices(ctx)
+	}
+
+	// Track stats
+	var stats reconcileStats
+
+	// Phase 1: Detect and handle orphaned device attachments
+	for _, device := range allDevices {
+		if device.AttachedTo == nil {
+			continue
+		}
+
+		instanceID := *device.AttachedTo
+		orphaned := m.isInstanceOrphaned(ctx, instanceID)
+
+		if orphaned {
+			log.WarnContext(ctx, "detected orphaned device attachment",
+				"device_id", device.Id,
+				"device_name", device.Name,
+				"pci_address", device.PCIAddress,
+				"orphaned_instance_id", instanceID,
+			)
+
+			// Clear the orphaned attachment
+			device.AttachedTo = nil
+			if err := m.saveDevice(device); err != nil {
+				log.ErrorContext(ctx, "failed to save device after clearing attachment",
+					"device_id", device.Id,
+					"error", err,
+				)
+				stats.errors++
+				continue
+			}
+			stats.orphanedCleared++
+
+			// Run GPU-reset-lite for orphaned device
+			m.resetOrphanedDevice(ctx, device, &stats)
+		}
+	}
+
+	// Phase 2: Two-way reconciliation (log-only for mismatches)
+	if instanceDevices != nil {
+		for instanceID, deviceIDs := range instanceDevices {
+			for _, deviceID := range deviceIDs {
+				device, exists := deviceByID[deviceID]
+				if !exists {
+					// Instance references a device that doesn't exist in device metadata
+					log.WarnContext(ctx, "instance references unknown device (mismatch)",
+						"instance_id", instanceID,
+						"device_id", deviceID,
+					)
+					stats.mismatches++
+					continue
+				}
+
+				// Check if device's AttachedTo matches
+				if device.AttachedTo == nil {
+					log.WarnContext(ctx, "instance references device but device.AttachedTo is nil (mismatch)",
+						"instance_id", instanceID,
+						"device_id", deviceID,
+						"device_name", device.Name,
+					)
+					stats.mismatches++
+				} else if *device.AttachedTo != instanceID {
+					log.WarnContext(ctx, "instance references device but device.AttachedTo points elsewhere (mismatch)",
+						"instance_id", instanceID,
+						"device_id", deviceID,
+						"device_name", device.Name,
+						"device_attached_to", *device.AttachedTo,
+					)
+					stats.mismatches++
+				}
+
+				// Check VFIO binding state - if instance is running, device should be bound
+				if m.livenessChecker != nil && m.livenessChecker.IsInstanceRunning(ctx, instanceID) {
+					if !device.BoundToVFIO {
+						log.WarnContext(ctx, "running instance has device not bound to VFIO (mismatch)",
+							"instance_id", instanceID,
+							"device_id", deviceID,
+							"device_name", device.Name,
+							"pci_address", device.PCIAddress,
+						)
+						stats.mismatches++
+					}
+				}
+			}
+		}
+	}
+
+	// Phase 3: Detect suspicious cloud-hypervisor processes (log-only)
+	if m.livenessChecker != nil {
+		stats.suspiciousVMM = m.livenessChecker.DetectSuspiciousVMMProcesses(ctx)
+	}
+
+	// Log summary
+	log.InfoContext(ctx, "device reconciliation complete",
+		"orphaned_cleared", stats.orphanedCleared,
+		"reset_attempted", stats.resetAttempted,
+		"reset_succeeded", stats.resetSucceeded,
+		"reset_failed", stats.resetFailed,
+		"mismatches", stats.mismatches,
+		"suspicious_vmm", stats.suspiciousVMM,
+		"errors", stats.errors,
+	)
+
+	return nil
+}
+
+// validatePrerequisites checks GPU passthrough prerequisites and logs warnings.
+// This helps operators debug configuration issues.
+func (m *manager) validatePrerequisites(ctx context.Context) {
+	log := logger.FromContext(ctx)
+
+	// Check IOMMU availability
+	iommuGroupsDir := "/sys/kernel/iommu_groups"
+	entries, err := os.ReadDir(iommuGroupsDir)
+	if err != nil {
+		log.WarnContext(ctx, "IOMMU not available - GPU passthrough will not work",
+			"error", err,
+			"hint", "enable IOMMU in BIOS and kernel (intel_iommu=on or amd_iommu=on)",
+		)
+	} else if len(entries) == 0 {
+		log.WarnContext(ctx, "no IOMMU groups found - GPU passthrough will not work",
+			"hint", "enable IOMMU in BIOS and kernel (intel_iommu=on or amd_iommu=on)",
+		)
+	}
+
+	// Check VFIO modules
+	vfioModules := []string{"vfio_pci", "vfio_iommu_type1"}
+	for _, module := range vfioModules {
+		modulePath := "/sys/module/" + module
+		if _, err := os.Stat(modulePath); os.IsNotExist(err) {
+			log.WarnContext(ctx, "VFIO module not loaded - GPU passthrough will not work",
+				"module", module,
+				"hint", "run: modprobe "+module,
+			)
+		}
+	}
+
+	// Check huge pages (info-level hint if devices exist but no huge pages)
+	hugePagesPath := "/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages"
+	if data, err := os.ReadFile(hugePagesPath); err == nil {
+		count := strings.TrimSpace(string(data))
+		if count == "0" || count == "" {
+			// Only warn if we have registered devices
+			if devicesDir := m.paths.DevicesDir(); devicesDir != "" {
+				if entries, err := os.ReadDir(devicesDir); err == nil && len(entries) > 0 {
+					log.InfoContext(ctx, "huge pages not configured - GPU performance may be reduced",
+						"hint", "run: echo 1024 > /proc/sys/vm/nr_hugepages",
+					)
+				}
+			}
+		}
+	}
+}
+
+// reconcileStats tracks reconciliation metrics
+type reconcileStats struct {
+	orphanedCleared int
+	resetAttempted  int
+	resetSucceeded  int
+	resetFailed     int
+	mismatches      int
+	suspiciousVMM   int
+	errors          int
+}
+
+// isInstanceOrphaned checks if an instance should be considered orphaned
+// (device attachment should be cleared).
+func (m *manager) isInstanceOrphaned(ctx context.Context, instanceID string) bool {
+	// If we have a liveness checker, use it for more accurate detection
+	if m.livenessChecker != nil {
+		// Instance is orphaned if it's not running (stopped, standby, unknown, or missing)
+		return !m.livenessChecker.IsInstanceRunning(ctx, instanceID)
+	}
+
+	// Fallback: just check if instance directory exists
+	instanceDir := m.paths.InstanceDir(instanceID)
+	_, err := os.Stat(instanceDir)
+	return os.IsNotExist(err)
+}
+
+// resetOrphanedDevice performs GPU-reset-lite for an orphaned device.
+// This is safe because we've already confirmed the device is orphaned.
+// Steps mirror gpu-reset.sh but are per-device and non-destructive.
+func (m *manager) resetOrphanedDevice(ctx context.Context, device *Device, stats *reconcileStats) {
+	log := logger.FromContext(ctx)
+	stats.resetAttempted++
+
+	log.InfoContext(ctx, "running GPU-reset-lite for orphaned device",
+		"device_id", device.Id,
+		"device_name", device.Name,
+		"pci_address", device.PCIAddress,
+		"bound_to_vfio", device.BoundToVFIO,
+	)
+
+	// Step 1: If bound to VFIO, unbind
+	if device.BoundToVFIO {
+		log.DebugContext(ctx, "unbinding orphaned device from VFIO", "pci_address", device.PCIAddress)
+		if err := m.vfioBinder.unbindFromDriver(device.PCIAddress, "vfio-pci"); err != nil {
+			log.WarnContext(ctx, "failed to unbind device from VFIO during reset",
+				"device_id", device.Id,
+				"pci_address", device.PCIAddress,
+				"error", err,
+			)
+			// Continue with other steps
+		}
+	}
+
+	// Step 2: Clear driver_override
+	log.DebugContext(ctx, "clearing driver_override", "pci_address", device.PCIAddress)
+	if err := m.vfioBinder.setDriverOverride(device.PCIAddress, ""); err != nil {
+		log.WarnContext(ctx, "failed to clear driver_override during reset",
+			"device_id", device.Id,
+			"pci_address", device.PCIAddress,
+			"error", err,
+		)
+		// Continue with other steps
+	}
+
+	// Step 3: Trigger driver probe to rebind to original driver
+	log.DebugContext(ctx, "triggering driver probe", "pci_address", device.PCIAddress)
+	if err := m.vfioBinder.triggerDriverProbe(device.PCIAddress); err != nil {
+		log.WarnContext(ctx, "failed to trigger driver probe during reset",
+			"device_id", device.Id,
+			"pci_address", device.PCIAddress,
+			"error", err,
+		)
+	}
+
+	// Step 4: For NVIDIA devices, restart nvidia-persistenced
+	if device.VendorID == "10de" {
+		log.DebugContext(ctx, "restarting nvidia-persistenced", "pci_address", device.PCIAddress)
+		if err := m.vfioBinder.startNvidiaPersistenced(); err != nil {
+			log.WarnContext(ctx, "failed to restart nvidia-persistenced during reset",
+				"device_id", device.Id,
+				"error", err,
+			)
+		}
+	}
+
+	// Verify the device is now unbound from VFIO
+	stillBoundToVFIO := m.vfioBinder.IsDeviceBoundToVFIO(device.PCIAddress)
+	if stillBoundToVFIO {
+		log.WarnContext(ctx, "device still bound to VFIO after reset-lite",
+			"device_id", device.Id,
+			"pci_address", device.PCIAddress,
+		)
+		stats.resetFailed++
+	} else {
+		log.InfoContext(ctx, "GPU-reset-lite completed for orphaned device",
+			"device_id", device.Id,
+			"device_name", device.Name,
+			"pci_address", device.PCIAddress,
+		)
+		stats.resetSucceeded++
+	}
+
+	// Update device metadata to reflect new VFIO state
+	device.BoundToVFIO = stillBoundToVFIO
+	if err := m.saveDevice(device); err != nil {
+		log.WarnContext(ctx, "failed to save device after reset-lite",
+			"device_id", device.Id,
+			"error", err,
+		)
+	}
+}
+
+// Helper methods
+
+func (m *manager) loadDevice(id string) (*Device, error) {
+	data, err := os.ReadFile(m.paths.DeviceMetadata(id))
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil, ErrNotFound
+		}
+		return nil, err
+	}
+
+	var device Device
+	if err := json.Unmarshal(data, &device); err != nil {
+		return nil, err
+	}
+
+	return &device, nil
+}
+
+func (m *manager) saveDevice(device *Device) error {
+	data, err := json.MarshalIndent(device, "", "  ")
+	if err != nil {
+		return err
+	}
+
+	return os.WriteFile(m.paths.DeviceMetadata(device.Id), data, 0644)
+}
+
+func (m *manager) findByName(name string) (*Device, error) {
+	entries, err := os.ReadDir(m.paths.DevicesDir())
+	if err != nil {
+		return nil, ErrNotFound
+	}
+
+	for _, entry := range entries {
+		if !entry.IsDir() {
+			continue
+		}
+
+		device, err := m.loadDevice(entry.Name())
+		if err != nil {
+			continue
+		}
+
+		if device.Name == name {
+			return device, nil
+		}
+	}
+
+	return nil, ErrNotFound
+}
+
+func (m *manager) findByPCIAddress(pciAddress string) (*Device, error) {
+	entries, err := os.ReadDir(m.paths.DevicesDir())
+	if err != nil {
+		return nil, ErrNotFound
+	}
+
+	for _, entry := range entries {
+		if !entry.IsDir() {
+			continue
+		}
+
+		device, err := m.loadDevice(entry.Name())
+		if err != nil {
+			continue
+		}
+
+		if device.PCIAddress == pciAddress {
+			return device, nil
+		}
+	}
+
+	return nil, ErrNotFound
+}
diff --git a/lib/devices/manager_test.go b/lib/devices/manager_test.go
new file mode 100644
index 0000000..bb6a167
--- /dev/null
+++ b/lib/devices/manager_test.go
@@ -0,0 +1,165 @@
+package devices
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestValidateDeviceName(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected bool
+	}{
+		{"valid alphanumeric", "l4gpu", true},
+		{"valid with underscore", "my_gpu", true},
+		{"valid with dash", "gpu-1", true},
+		{"valid with dot", "nvidia.l4", true},
+		{"valid mixed", "my-gpu_01.test", true},
+		{"valid starting with number", "1gpu", true},
+		{"invalid empty", "", false},
+		{"invalid single char", "a", false}, // pattern requires at least 2 chars
+		{"invalid starts with dash", "-gpu", false},
+		{"invalid starts with underscore", "_gpu", false},
+		{"invalid starts with dot", ".gpu", false},
+		{"invalid contains space", "my gpu", false},
+		{"invalid contains special char", "gpu@1", false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := ValidateDeviceName(tt.input)
+			assert.Equal(t, tt.expected, result, "ValidateDeviceName(%q)", tt.input)
+		})
+	}
+}
+
+func TestValidatePCIAddress(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected bool
+	}{
+		{"valid standard", "0000:00:00.0", true},
+		{"valid with letters", "0000:a2:00.0", true},
+		{"valid uppercase", "0000:A2:00.0", true},
+		{"valid mixed case", "0000:aB:c1.2", true},
+		{"invalid too short", "0000:00:0.0", false},
+		{"invalid no domain", "00:00.0", false},
+		{"invalid missing colon", "000000:00.0", false},
+		{"invalid missing dot", "0000:00:000", false},
+		{"invalid extra segment", "0000:00:00:00.0", false},
+		{"invalid empty", "", false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := ValidatePCIAddress(tt.input)
+			assert.Equal(t, tt.expected, result, "ValidatePCIAddress(%q)", tt.input)
+		})
+	}
+}
+
+func TestDetermineDeviceType(t *testing.T) {
+	// This test is limited since it reads from sysfs
+	// We test the function structure but can't mock sysfs easily
+	t.Run("returns generic for nil device", func(t *testing.T) {
+		device := &AvailableDevice{
+			PCIAddress: "0000:99:99.0", // Non-existent device
+		}
+		deviceType := DetermineDeviceType(device)
+		assert.Equal(t, DeviceTypeGeneric, deviceType)
+	})
+}
+
+func TestGetDeviceSysfsPath(t *testing.T) {
+	tests := []struct {
+		pciAddress string
+		expected   string
+	}{
+		{"0000:a2:00.0", "/sys/bus/pci/devices/0000:a2:00.0/"},
+		{"0000:00:1f.0", "/sys/bus/pci/devices/0000:00:1f.0/"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.pciAddress, func(t *testing.T) {
+			result := GetDeviceSysfsPath(tt.pciAddress)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestGetVendorName(t *testing.T) {
+	tests := []struct {
+		vendorID string
+		expected string
+	}{
+		{"10de", "NVIDIA Corporation"},
+		{"1002", "AMD/ATI"},
+		{"8086", "Intel Corporation"},
+		{"1234", "Unknown Vendor"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.vendorID, func(t *testing.T) {
+			result := getVendorName(tt.vendorID)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestGetDeviceName(t *testing.T) {
+	tests := []struct {
+		name      string
+		vendorID  string
+		deviceID  string
+		classCode string
+		expected  string
+	}{
+		{"NVIDIA L4", "10de", "27b8", "0x030200", "L4"},
+		{"NVIDIA RTX 4090", "10de", "2684", "0x030000", "RTX 4090"},
+		{"Unknown NVIDIA", "10de", "9999", "0x030000", "VGA Controller"},
+		{"Generic VGA", "1234", "5678", "0x030000", "VGA Controller"},
+		{"Generic 3D", "1234", "5678", "0x030200", "3D Controller"},
+		{"Audio device", "1234", "5678", "0x040300", "Audio Device"},
+		{"Unknown class", "1234", "5678", "0x999999", "PCI Device"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := getDeviceName(tt.vendorID, tt.deviceID, tt.classCode)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestVFIOBinderIsVFIOAvailable(t *testing.T) {
+	binder := NewVFIOBinder()
+	// Just test that it doesn't panic
+	_ = binder.IsVFIOAvailable()
+}
+
+func TestDeviceTypes(t *testing.T) {
+	t.Run("device type constants", func(t *testing.T) {
+		require.Equal(t, DeviceType("gpu"), DeviceTypeGPU)
+		require.Equal(t, DeviceType("pci"), DeviceTypeGeneric)
+	})
+}
+
+func TestErrors(t *testing.T) {
+	t.Run("error types are distinct", func(t *testing.T) {
+		assert.NotEqual(t, ErrNotFound, ErrInUse)
+		assert.NotEqual(t, ErrNotBound, ErrAlreadyBound)
+		assert.NotEqual(t, ErrAlreadyExists, ErrNameExists)
+	})
+
+	t.Run("error messages are meaningful", func(t *testing.T) {
+		assert.Contains(t, ErrNotFound.Error(), "not found")
+		assert.Contains(t, ErrInUse.Error(), "in use")
+		assert.Contains(t, ErrInvalidName.Error(), "pattern")
+	})
+}
+
+
diff --git a/lib/devices/reconcile_test.go b/lib/devices/reconcile_test.go
new file mode 100644
index 0000000..7c2bdae
--- /dev/null
+++ b/lib/devices/reconcile_test.go
@@ -0,0 +1,612 @@
+package devices
+
+import (
+	"context"
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"testing"
+	"time"
+
+	"github.com/onkernel/hypeman/lib/paths"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// mockLivenessChecker implements InstanceLivenessChecker for testing
+type mockLivenessChecker struct {
+	runningInstances map[string]bool      // instanceID -> isRunning
+	instanceDevices  map[string][]string  // instanceID -> deviceIDs
+}
+
+func newMockLivenessChecker() *mockLivenessChecker {
+	return &mockLivenessChecker{
+		runningInstances: make(map[string]bool),
+		instanceDevices:  make(map[string][]string),
+	}
+}
+
+func (m *mockLivenessChecker) IsInstanceRunning(ctx context.Context, instanceID string) bool {
+	return m.runningInstances[instanceID]
+}
+
+func (m *mockLivenessChecker) GetInstanceDevices(ctx context.Context, instanceID string) []string {
+	return m.instanceDevices[instanceID]
+}
+
+func (m *mockLivenessChecker) ListAllInstanceDevices(ctx context.Context) map[string][]string {
+	return m.instanceDevices
+}
+
+func (m *mockLivenessChecker) DetectSuspiciousVMMProcesses(ctx context.Context) int {
+	return 0 // Mock returns no suspicious processes
+}
+
+func (m *mockLivenessChecker) setRunning(instanceID string, running bool) {
+	m.runningInstances[instanceID] = running
+}
+
+func (m *mockLivenessChecker) setInstanceDevices(instanceID string, deviceIDs []string) {
+	m.instanceDevices[instanceID] = deviceIDs
+}
+
+// setupTestManager creates a manager with a temporary directory for testing
+func setupTestManager(t *testing.T) (*manager, *paths.Paths, string) {
+	t.Helper()
+	tmpDir := t.TempDir()
+	p := paths.New(tmpDir)
+	
+	// Create devices directory
+	require.NoError(t, os.MkdirAll(p.DevicesDir(), 0755))
+	
+	mgr := &manager{
+		paths:      p,
+		vfioBinder: NewVFIOBinder(),
+	}
+	
+	return mgr, p, tmpDir
+}
+
+// createTestDevice creates a device in the test directory
+func createTestDevice(t *testing.T, p *paths.Paths, device *Device) {
+	t.Helper()
+	deviceDir := p.DeviceDir(device.Id)
+	require.NoError(t, os.MkdirAll(deviceDir, 0755))
+	
+	data, err := json.MarshalIndent(device, "", "  ")
+	require.NoError(t, err)
+	
+	require.NoError(t, os.WriteFile(p.DeviceMetadata(device.Id), data, 0644))
+}
+
+// createTestInstanceDir creates an instance directory (simulating instance existence)
+func createTestInstanceDir(t *testing.T, p *paths.Paths, instanceID string) {
+	t.Helper()
+	instanceDir := p.InstanceDir(instanceID)
+	require.NoError(t, os.MkdirAll(instanceDir, 0755))
+}
+
+func TestReconcileDevices_NoDevices(t *testing.T) {
+	mgr, _, _ := setupTestManager(t)
+	ctx := context.Background()
+	
+	err := mgr.ReconcileDevices(ctx)
+	require.NoError(t, err)
+}
+
+func TestReconcileDevices_OrphanedAttachment_NoLivenessChecker(t *testing.T) {
+	mgr, p, _ := setupTestManager(t)
+	ctx := context.Background()
+	
+	instanceID := "orphaned-instance-123"
+	deviceID := "device-abc"
+	
+	// Create device with AttachedTo pointing to non-existent instance
+	device := &Device{
+		Id:         deviceID,
+		Name:       "test-gpu",
+		Type:       DeviceTypeGPU,
+		PCIAddress: "0000:99:00.0", // Non-existent for test
+		VendorID:   "10de",
+		DeviceID:   "1234",
+		AttachedTo: &instanceID,
+		CreatedAt:  time.Now(),
+	}
+	createTestDevice(t, p, device)
+	
+	// Don't create the instance directory - it's orphaned
+	
+	// Run reconciliation
+	err := mgr.ReconcileDevices(ctx)
+	require.NoError(t, err)
+	
+	// Verify attachment was cleared
+	updatedDevice, err := mgr.loadDevice(deviceID)
+	require.NoError(t, err)
+	assert.Nil(t, updatedDevice.AttachedTo, "AttachedTo should be cleared for orphaned device")
+}
+
+func TestReconcileDevices_ValidAttachment_NoLivenessChecker(t *testing.T) {
+	mgr, p, _ := setupTestManager(t)
+	ctx := context.Background()
+	
+	instanceID := "valid-instance-123"
+	deviceID := "device-abc"
+	
+	// Create device with AttachedTo pointing to existing instance
+	device := &Device{
+		Id:         deviceID,
+		Name:       "test-gpu",
+		Type:       DeviceTypeGPU,
+		PCIAddress: "0000:99:00.0",
+		VendorID:   "10de",
+		DeviceID:   "1234",
+		AttachedTo: &instanceID,
+		CreatedAt:  time.Now(),
+	}
+	createTestDevice(t, p, device)
+	
+	// Create the instance directory - it exists
+	createTestInstanceDir(t, p, instanceID)
+	
+	// Run reconciliation
+	err := mgr.ReconcileDevices(ctx)
+	require.NoError(t, err)
+	
+	// Verify attachment was NOT cleared (instance exists)
+	updatedDevice, err := mgr.loadDevice(deviceID)
+	require.NoError(t, err)
+	require.NotNil(t, updatedDevice.AttachedTo, "AttachedTo should NOT be cleared for valid device")
+	assert.Equal(t, instanceID, *updatedDevice.AttachedTo)
+}
+
+func TestReconcileDevices_OrphanedAttachment_WithLivenessChecker(t *testing.T) {
+	mgr, p, _ := setupTestManager(t)
+	ctx := context.Background()
+	
+	// Set up liveness checker
+	liveness := newMockLivenessChecker()
+	mgr.livenessChecker = liveness
+	
+	instanceID := "stopped-instance-123"
+	deviceID := "device-abc"
+	
+	// Create device with AttachedTo
+	device := &Device{
+		Id:         deviceID,
+		Name:       "test-gpu",
+		Type:       DeviceTypeGPU,
+		PCIAddress: "0000:99:00.0",
+		VendorID:   "10de",
+		DeviceID:   "1234",
+		AttachedTo: &instanceID,
+		CreatedAt:  time.Now(),
+	}
+	createTestDevice(t, p, device)
+	
+	// Create instance directory but mark as NOT running
+	createTestInstanceDir(t, p, instanceID)
+	liveness.setRunning(instanceID, false) // Stopped/standby
+	
+	// Run reconciliation
+	err := mgr.ReconcileDevices(ctx)
+	require.NoError(t, err)
+	
+	// Verify attachment was cleared (instance not running)
+	updatedDevice, err := mgr.loadDevice(deviceID)
+	require.NoError(t, err)
+	assert.Nil(t, updatedDevice.AttachedTo, "AttachedTo should be cleared for non-running instance")
+}
+
+func TestReconcileDevices_ValidAttachment_WithLivenessChecker(t *testing.T) {
+	mgr, p, _ := setupTestManager(t)
+	ctx := context.Background()
+	
+	// Set up liveness checker
+	liveness := newMockLivenessChecker()
+	mgr.livenessChecker = liveness
+	
+	instanceID := "running-instance-123"
+	deviceID := "device-abc"
+	
+	// Create device with AttachedTo
+	device := &Device{
+		Id:         deviceID,
+		Name:       "test-gpu",
+		Type:       DeviceTypeGPU,
+		PCIAddress: "0000:99:00.0",
+		VendorID:   "10de",
+		DeviceID:   "1234",
+		AttachedTo: &instanceID,
+		CreatedAt:  time.Now(),
+	}
+	createTestDevice(t, p, device)
+	
+	// Create instance and mark as running
+	createTestInstanceDir(t, p, instanceID)
+	liveness.setRunning(instanceID, true) // Running
+	
+	// Run reconciliation
+	err := mgr.ReconcileDevices(ctx)
+	require.NoError(t, err)
+	
+	// Verify attachment was NOT cleared (instance is running)
+	updatedDevice, err := mgr.loadDevice(deviceID)
+	require.NoError(t, err)
+	require.NotNil(t, updatedDevice.AttachedTo, "AttachedTo should NOT be cleared for running instance")
+	assert.Equal(t, instanceID, *updatedDevice.AttachedTo)
+}
+
+func TestReconcileDevices_TwoWayMismatch_InstanceRefsUnknownDevice(t *testing.T) {
+	mgr, _, _ := setupTestManager(t)
+	ctx := context.Background()
+	
+	// Set up liveness checker with instance that references unknown device
+	liveness := newMockLivenessChecker()
+	mgr.livenessChecker = liveness
+	
+	instanceID := "instance-with-ghost-device"
+	unknownDeviceID := "device-that-doesnt-exist"
+	
+	// Instance references a device that doesn't exist
+	liveness.setInstanceDevices(instanceID, []string{unknownDeviceID})
+	liveness.setRunning(instanceID, true)
+	
+	// Run reconciliation - should not error, just log the mismatch
+	err := mgr.ReconcileDevices(ctx)
+	require.NoError(t, err)
+	// Note: We can't easily verify log output, but the test ensures no panic/error
+}
+
+func TestReconcileDevices_TwoWayMismatch_DeviceAttachedToNil(t *testing.T) {
+	mgr, p, _ := setupTestManager(t)
+	ctx := context.Background()
+	
+	// Set up liveness checker
+	liveness := newMockLivenessChecker()
+	mgr.livenessChecker = liveness
+	
+	instanceID := "instance-123"
+	deviceID := "device-abc"
+	
+	// Create device with NO AttachedTo
+	device := &Device{
+		Id:         deviceID,
+		Name:       "test-gpu",
+		Type:       DeviceTypeGPU,
+		PCIAddress: "0000:99:00.0",
+		VendorID:   "10de",
+		DeviceID:   "1234",
+		AttachedTo: nil, // Not attached according to device metadata
+		CreatedAt:  time.Now(),
+	}
+	createTestDevice(t, p, device)
+	
+	// Instance claims to have this device
+	liveness.setInstanceDevices(instanceID, []string{deviceID})
+	liveness.setRunning(instanceID, true)
+	
+	// Run reconciliation - should log mismatch but not error
+	err := mgr.ReconcileDevices(ctx)
+	require.NoError(t, err)
+	// Note: This is a log-only mismatch, device state should remain unchanged
+	
+	updatedDevice, err := mgr.loadDevice(deviceID)
+	require.NoError(t, err)
+	assert.Nil(t, updatedDevice.AttachedTo, "Device should remain unattached (log-only mismatch)")
+}
+
+func TestReconcileDevices_TwoWayMismatch_DeviceAttachedToWrongInstance(t *testing.T) {
+	mgr, p, _ := setupTestManager(t)
+	ctx := context.Background()
+	
+	// Set up liveness checker
+	liveness := newMockLivenessChecker()
+	mgr.livenessChecker = liveness
+	
+	instanceID1 := "instance-1"
+	instanceID2 := "instance-2"
+	deviceID := "device-abc"
+	
+	// Create device attached to instance-1
+	device := &Device{
+		Id:         deviceID,
+		Name:       "test-gpu",
+		Type:       DeviceTypeGPU,
+		PCIAddress: "0000:99:00.0",
+		VendorID:   "10de",
+		DeviceID:   "1234",
+		AttachedTo: &instanceID1, // Attached to instance-1
+		CreatedAt:  time.Now(),
+	}
+	createTestDevice(t, p, device)
+	
+	// Both instances exist and are running
+	createTestInstanceDir(t, p, instanceID1)
+	createTestInstanceDir(t, p, instanceID2)
+	liveness.setRunning(instanceID1, true)
+	liveness.setRunning(instanceID2, true)
+	
+	// instance-2 claims to have this device (mismatch!)
+	liveness.setInstanceDevices(instanceID2, []string{deviceID})
+	
+	// Run reconciliation - should log mismatch but not error
+	err := mgr.ReconcileDevices(ctx)
+	require.NoError(t, err)
+	// Note: This is a log-only mismatch, device state should remain unchanged
+	
+	updatedDevice, err := mgr.loadDevice(deviceID)
+	require.NoError(t, err)
+	require.NotNil(t, updatedDevice.AttachedTo)
+	assert.Equal(t, instanceID1, *updatedDevice.AttachedTo, "Device should remain attached to original instance (log-only mismatch)")
+}
+
+func TestReconcileDevices_MultipleDevices(t *testing.T) {
+	mgr, p, _ := setupTestManager(t)
+	ctx := context.Background()
+	
+	// Set up liveness checker
+	liveness := newMockLivenessChecker()
+	mgr.livenessChecker = liveness
+	
+	runningInstanceID := "running-instance"
+	stoppedInstanceID := "stopped-instance"
+	orphanedInstanceID := "orphaned-instance"
+	
+	// Device 1: Attached to running instance - should stay attached
+	device1 := &Device{
+		Id:         "device-1",
+		Name:       "gpu-1",
+		Type:       DeviceTypeGPU,
+		PCIAddress: "0000:01:00.0",
+		VendorID:   "10de",
+		DeviceID:   "1234",
+		AttachedTo: &runningInstanceID,
+		CreatedAt:  time.Now(),
+	}
+	
+	// Device 2: Attached to stopped instance - should be cleared
+	device2 := &Device{
+		Id:         "device-2",
+		Name:       "gpu-2",
+		Type:       DeviceTypeGPU,
+		PCIAddress: "0000:02:00.0",
+		VendorID:   "10de",
+		DeviceID:   "5678",
+		AttachedTo: &stoppedInstanceID,
+		CreatedAt:  time.Now(),
+	}
+	
+	// Device 3: Attached to non-existent instance - should be cleared
+	device3 := &Device{
+		Id:         "device-3",
+		Name:       "gpu-3",
+		Type:       DeviceTypeGPU,
+		PCIAddress: "0000:03:00.0",
+		VendorID:   "10de",
+		DeviceID:   "9abc",
+		AttachedTo: &orphanedInstanceID,
+		CreatedAt:  time.Now(),
+	}
+	
+	// Device 4: Not attached - should stay unattached
+	device4 := &Device{
+		Id:         "device-4",
+		Name:       "gpu-4",
+		Type:       DeviceTypeGPU,
+		PCIAddress: "0000:04:00.0",
+		VendorID:   "10de",
+		DeviceID:   "def0",
+		AttachedTo: nil,
+		CreatedAt:  time.Now(),
+	}
+	
+	createTestDevice(t, p, device1)
+	createTestDevice(t, p, device2)
+	createTestDevice(t, p, device3)
+	createTestDevice(t, p, device4)
+	
+	// Set up instance states
+	createTestInstanceDir(t, p, runningInstanceID)
+	createTestInstanceDir(t, p, stoppedInstanceID)
+	// Don't create orphanedInstanceID directory
+	
+	liveness.setRunning(runningInstanceID, true)
+	liveness.setRunning(stoppedInstanceID, false)
+	// orphanedInstanceID doesn't exist in liveness checker
+	
+	// Run reconciliation
+	err := mgr.ReconcileDevices(ctx)
+	require.NoError(t, err)
+	
+	// Verify device 1 stays attached (running instance)
+	d1, err := mgr.loadDevice("device-1")
+	require.NoError(t, err)
+	require.NotNil(t, d1.AttachedTo)
+	assert.Equal(t, runningInstanceID, *d1.AttachedTo)
+	
+	// Verify device 2 is cleared (stopped instance)
+	d2, err := mgr.loadDevice("device-2")
+	require.NoError(t, err)
+	assert.Nil(t, d2.AttachedTo)
+	
+	// Verify device 3 is cleared (orphaned instance)
+	d3, err := mgr.loadDevice("device-3")
+	require.NoError(t, err)
+	assert.Nil(t, d3.AttachedTo)
+	
+	// Verify device 4 stays unattached
+	d4, err := mgr.loadDevice("device-4")
+	require.NoError(t, err)
+	assert.Nil(t, d4.AttachedTo)
+}
+
+func TestSetLivenessChecker(t *testing.T) {
+	mgr, _, _ := setupTestManager(t)
+	
+	// Initially nil
+	assert.Nil(t, mgr.livenessChecker)
+	
+	// Set liveness checker
+	liveness := newMockLivenessChecker()
+	mgr.SetLivenessChecker(liveness)
+	
+	// Verify it was set
+	assert.Equal(t, liveness, mgr.livenessChecker)
+}
+
+func TestIsInstanceOrphaned_NoLivenessChecker(t *testing.T) {
+	mgr, p, _ := setupTestManager(t)
+	ctx := context.Background()
+	
+	existingInstanceID := "existing-instance"
+	missingInstanceID := "missing-instance"
+	
+	// Create one instance directory
+	createTestInstanceDir(t, p, existingInstanceID)
+	
+	// Existing instance is NOT orphaned
+	assert.False(t, mgr.isInstanceOrphaned(ctx, existingInstanceID))
+	
+	// Missing instance IS orphaned
+	assert.True(t, mgr.isInstanceOrphaned(ctx, missingInstanceID))
+}
+
+func TestIsInstanceOrphaned_WithLivenessChecker(t *testing.T) {
+	mgr, p, _ := setupTestManager(t)
+	ctx := context.Background()
+	
+	// Set up liveness checker
+	liveness := newMockLivenessChecker()
+	mgr.livenessChecker = liveness
+	
+	runningInstanceID := "running-instance"
+	stoppedInstanceID := "stopped-instance"
+	
+	// Both instances have directories
+	createTestInstanceDir(t, p, runningInstanceID)
+	createTestInstanceDir(t, p, stoppedInstanceID)
+	
+	liveness.setRunning(runningInstanceID, true)
+	liveness.setRunning(stoppedInstanceID, false)
+	
+	// Running instance is NOT orphaned
+	assert.False(t, mgr.isInstanceOrphaned(ctx, runningInstanceID))
+	
+	// Stopped instance IS orphaned (even though directory exists)
+	assert.True(t, mgr.isInstanceOrphaned(ctx, stoppedInstanceID))
+}
+
+func TestReconcileDevices_NoDevicesDirectory(t *testing.T) {
+	tmpDir := t.TempDir()
+	p := paths.New(tmpDir)
+	
+	// Don't create devices directory
+	
+	mgr := &manager{
+		paths:      p,
+		vfioBinder: NewVFIOBinder(),
+	}
+	
+	ctx := context.Background()
+	
+	// Should not error when directory doesn't exist
+	err := mgr.ReconcileDevices(ctx)
+	require.NoError(t, err)
+}
+
+func TestReconcileStats(t *testing.T) {
+	// Verify stats struct has expected fields
+	stats := reconcileStats{}
+	
+	stats.orphanedCleared = 1
+	stats.resetAttempted = 2
+	stats.resetSucceeded = 3
+	stats.resetFailed = 4
+	stats.mismatches = 5
+	stats.suspiciousVMM = 6
+	stats.errors = 7
+	
+	assert.Equal(t, 1, stats.orphanedCleared)
+	assert.Equal(t, 2, stats.resetAttempted)
+	assert.Equal(t, 3, stats.resetSucceeded)
+	assert.Equal(t, 4, stats.resetFailed)
+	assert.Equal(t, 5, stats.mismatches)
+	assert.Equal(t, 6, stats.suspiciousVMM)
+	assert.Equal(t, 7, stats.errors)
+}
+
+// TestResetOrphanedDevice_NonExistentPCIAddress tests that reset-lite
+// handles non-existent PCI addresses gracefully (doesn't panic)
+func TestResetOrphanedDevice_NonExistentPCIAddress(t *testing.T) {
+	mgr, p, _ := setupTestManager(t)
+	ctx := context.Background()
+	
+	// Create device with fake PCI address that doesn't exist
+	device := &Device{
+		Id:          "test-device",
+		Name:        "test-gpu",
+		Type:        DeviceTypeGPU,
+		PCIAddress:  "0000:ff:ff.f", // Non-existent
+		VendorID:    "10de",         // NVIDIA vendor ID
+		DeviceID:    "1234",
+		BoundToVFIO: true, // Claim it's bound to VFIO
+		CreatedAt:   time.Now(),
+	}
+	createTestDevice(t, p, device)
+	
+	stats := &reconcileStats{}
+	
+	// Should not panic, should handle errors gracefully
+	mgr.resetOrphanedDevice(ctx, device, stats)
+	
+	// Reset was attempted
+	assert.Equal(t, 1, stats.resetAttempted)
+	
+	// May fail due to non-existent device, that's expected
+	// The key is it doesn't panic
+}
+
+// Helper function for testing: verify device directory structure
+func verifyDeviceDir(t *testing.T, p *paths.Paths, deviceID string) bool {
+	t.Helper()
+	metadataPath := p.DeviceMetadata(deviceID)
+	_, err := os.Stat(metadataPath)
+	return err == nil
+}
+
+// TestReconcileDevices_CorruptedDeviceMetadata tests handling of
+// corrupted device metadata files
+func TestReconcileDevices_CorruptedDeviceMetadata(t *testing.T) {
+	mgr, p, _ := setupTestManager(t)
+	ctx := context.Background()
+	
+	// Create a valid device
+	validDevice := &Device{
+		Id:         "valid-device",
+		Name:       "valid-gpu",
+		Type:       DeviceTypeGPU,
+		PCIAddress: "0000:01:00.0",
+		VendorID:   "10de",
+		DeviceID:   "1234",
+		CreatedAt:  time.Now(),
+	}
+	createTestDevice(t, p, validDevice)
+	
+	// Create a corrupted device directory with invalid JSON
+	corruptedID := "corrupted-device"
+	corruptedDir := p.DeviceDir(corruptedID)
+	require.NoError(t, os.MkdirAll(corruptedDir, 0755))
+	corruptedPath := filepath.Join(corruptedDir, "metadata.json")
+	require.NoError(t, os.WriteFile(corruptedPath, []byte("not valid json{{{"), 0644))
+	
+	// Should not error - should skip corrupted device and continue
+	err := mgr.ReconcileDevices(ctx)
+	require.NoError(t, err)
+	
+	// Valid device should still be loadable
+	d, err := mgr.loadDevice("valid-device")
+	require.NoError(t, err)
+	assert.Equal(t, "valid-gpu", d.Name)
+}
+
diff --git a/lib/devices/scripts/gpu-reset.sh b/lib/devices/scripts/gpu-reset.sh
new file mode 100755
index 0000000..37006f7
--- /dev/null
+++ b/lib/devices/scripts/gpu-reset.sh
@@ -0,0 +1,178 @@
+#!/bin/bash
+#
+# gpu-reset.sh - Reset GPU state after failed passthrough tests or hangs
+#
+# This script handles common GPU recovery scenarios:
+# 1. Killing any stuck cloud-hypervisor processes holding the GPU
+# 2. Unbinding from vfio-pci if still bound
+# 3. Clearing driver_override
+# 4. Triggering driver probe to rebind to nvidia driver
+# 5. Restarting nvidia-persistenced
+#
+# Usage:
+#   sudo ./gpu-reset.sh                    # Reset all NVIDIA GPUs
+#   sudo ./gpu-reset.sh 0000:a2:00.0       # Reset specific GPU by PCI address
+#
+# Must be run as root.
+
+set -e
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+log_info() {
+    echo -e "${GREEN}[INFO]${NC} $1"
+}
+
+log_warn() {
+    echo -e "${YELLOW}[WARN]${NC} $1"
+}
+
+log_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# Check if running as root
+if [[ $EUID -ne 0 ]]; then
+    log_error "This script must be run as root (sudo)"
+    exit 1
+fi
+
+# Get PCI address from argument or find all NVIDIA GPUs
+if [[ -n "$1" ]]; then
+    PCI_ADDRESSES=("$1")
+else
+    # Find all NVIDIA GPUs (vendor 10de)
+    PCI_ADDRESSES=()
+    for dev in /sys/bus/pci/devices/*; do
+        if [[ -f "$dev/vendor" ]]; then
+            vendor=$(cat "$dev/vendor" 2>/dev/null)
+            class=$(cat "$dev/class" 2>/dev/null)
+            # Check for NVIDIA vendor (0x10de) and display/3D controller class (0x03xxxx)
+            if [[ "$vendor" == "0x10de" && "$class" == 0x03* ]]; then
+                addr=$(basename "$dev")
+                PCI_ADDRESSES+=("$addr")
+            fi
+        fi
+    done
+fi
+
+if [[ ${#PCI_ADDRESSES[@]} -eq 0 ]]; then
+    log_error "No NVIDIA GPUs found"
+    exit 1
+fi
+
+log_info "Found ${#PCI_ADDRESSES[@]} GPU(s) to reset: ${PCI_ADDRESSES[*]}"
+
+# Step 1: Kill any cloud-hypervisor processes that might be holding GPUs
+log_info "Step 1: Checking for stuck cloud-hypervisor processes..."
+if pgrep -f "cloud-hypervisor" > /dev/null 2>&1; then
+    log_warn "Found cloud-hypervisor processes, killing them..."
+    pkill -9 -f "cloud-hypervisor" 2>/dev/null || true
+    sleep 2
+    if pgrep -f "cloud-hypervisor" > /dev/null 2>&1; then
+        log_error "Failed to kill cloud-hypervisor processes"
+        ps aux | grep cloud-hypervisor | grep -v grep
+    else
+        log_info "Killed cloud-hypervisor processes"
+    fi
+else
+    log_info "No cloud-hypervisor processes found"
+fi
+
+# Process each GPU
+for PCI_ADDR in "${PCI_ADDRESSES[@]}"; do
+    log_info "Processing GPU at $PCI_ADDR..."
+    
+    DEVICE_PATH="/sys/bus/pci/devices/$PCI_ADDR"
+    
+    if [[ ! -d "$DEVICE_PATH" ]]; then
+        log_error "Device $PCI_ADDR not found at $DEVICE_PATH"
+        continue
+    fi
+    
+    # Get current driver
+    CURRENT_DRIVER=""
+    if [[ -L "$DEVICE_PATH/driver" ]]; then
+        CURRENT_DRIVER=$(basename "$(readlink "$DEVICE_PATH/driver")")
+    fi
+    log_info "  Current driver: ${CURRENT_DRIVER:-none}"
+    
+    # Step 2: If bound to vfio-pci, unbind
+    if [[ "$CURRENT_DRIVER" == "vfio-pci" ]]; then
+        log_info "  Step 2: Unbinding from vfio-pci..."
+        echo "$PCI_ADDR" > /sys/bus/pci/drivers/vfio-pci/unbind 2>/dev/null || true
+        sleep 1
+    else
+        log_info "  Step 2: Not bound to vfio-pci, skipping unbind"
+    fi
+    
+    # Step 3: Clear driver_override
+    log_info "  Step 3: Clearing driver_override..."
+    if [[ -f "$DEVICE_PATH/driver_override" ]]; then
+        OVERRIDE=$(cat "$DEVICE_PATH/driver_override" 2>/dev/null)
+        if [[ -n "$OVERRIDE" && "$OVERRIDE" != "(null)" ]]; then
+            log_info "    Current override: $OVERRIDE"
+            echo > "$DEVICE_PATH/driver_override" 2>/dev/null || true
+            log_info "    Cleared driver_override"
+        else
+            log_info "    No driver_override set"
+        fi
+    fi
+    
+    # Step 4: Trigger driver probe to rebind to nvidia
+    log_info "  Step 4: Triggering driver probe..."
+    echo "$PCI_ADDR" > /sys/bus/pci/drivers_probe 2>/dev/null || true
+    sleep 2
+    
+    # Check new driver
+    NEW_DRIVER=""
+    if [[ -L "$DEVICE_PATH/driver" ]]; then
+        NEW_DRIVER=$(basename "$(readlink "$DEVICE_PATH/driver")")
+    fi
+    log_info "  New driver: ${NEW_DRIVER:-none}"
+    
+    if [[ "$NEW_DRIVER" == "nvidia" ]]; then
+        log_info "  ✓ GPU successfully rebound to nvidia driver"
+    elif [[ -z "$NEW_DRIVER" ]]; then
+        log_warn "  GPU has no driver bound - may need manual intervention or reboot"
+    else
+        log_warn "  GPU bound to $NEW_DRIVER (expected nvidia)"
+    fi
+done
+
+# Step 5: Restart nvidia-persistenced
+log_info "Step 5: Restarting nvidia-persistenced..."
+if systemctl is-active nvidia-persistenced > /dev/null 2>&1; then
+    log_info "  nvidia-persistenced is already running"
+else
+    if systemctl start nvidia-persistenced 2>/dev/null; then
+        log_info "  Started nvidia-persistenced"
+    else
+        log_warn "  Failed to start nvidia-persistenced (may not be installed or GPU not ready)"
+    fi
+fi
+
+# Final verification
+log_info ""
+log_info "=== Final GPU State ==="
+for PCI_ADDR in "${PCI_ADDRESSES[@]}"; do
+    echo ""
+    lspci -nnks "$PCI_ADDR" 2>/dev/null || echo "Could not query $PCI_ADDR"
+done
+
+echo ""
+log_info "=== nvidia-smi ==="
+if command -v nvidia-smi &> /dev/null; then
+    nvidia-smi 2>&1 | head -20 || log_warn "nvidia-smi failed (GPU may need more time or reboot)"
+else
+    log_warn "nvidia-smi not found"
+fi
+
+echo ""
+log_info "GPU reset complete!"
+log_info "If GPUs are still in a bad state, a system reboot may be required."
+
diff --git a/lib/devices/testdata/ollama-cuda/Dockerfile b/lib/devices/testdata/ollama-cuda/Dockerfile
new file mode 100644
index 0000000..d31107f
--- /dev/null
+++ b/lib/devices/testdata/ollama-cuda/Dockerfile
@@ -0,0 +1,29 @@
+# Minimal CUDA image for GPU inference testing
+# 
+# NO NVIDIA DRIVER INSTALLATION NEEDED!
+# hypeman automatically injects the matching driver libraries at VM boot time.
+# See lib/devices/GPU.md for documentation on driver injection.
+#
+# This image demonstrates that standard CUDA runtime images work out of the box
+# with hypeman's GPU passthrough - no driver version matching required.
+
+FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04
+
+# Install dependencies and Ollama
+# Note: We use the runtime image (not devel) since we don't need CUDA compilation tools
+RUN apt-get update && \
+    apt-get install -y curl ca-certificates python3 && \
+    curl -fsSL https://ollama.com/install.sh | sh && \
+    rm -rf /var/lib/apt/lists/*
+
+# Add test scripts for verifying GPU access
+COPY test-nvml.py /usr/local/bin/test-nvml.py
+COPY test-cuda.py /usr/local/bin/test-cuda.py
+RUN chmod +x /usr/local/bin/test-nvml.py /usr/local/bin/test-cuda.py
+
+# Ensure libraries are in the path (hypeman injects to /usr/lib/x86_64-linux-gnu)
+ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}
+ENV PATH=/usr/local/cuda/bin:/usr/bin:${PATH}
+
+EXPOSE 11434
+CMD ["ollama", "serve"]
diff --git a/lib/devices/testdata/ollama-cuda/test-cuda.py b/lib/devices/testdata/ollama-cuda/test-cuda.py
new file mode 100644
index 0000000..9e9c0eb
--- /dev/null
+++ b/lib/devices/testdata/ollama-cuda/test-cuda.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python3
+"""Test basic CUDA operations."""
+import ctypes
+import os
+import sys
+
+def test_cuda():
+    """Try to use the CUDA driver API."""
+    print("=== CUDA Driver Test ===")
+    print(f"LD_LIBRARY_PATH: {os.environ.get('LD_LIBRARY_PATH', 'not set')}")
+    
+    # Try loading libcuda
+    try:
+        cuda = ctypes.CDLL("libcuda.so")
+        print("✓ Loaded libcuda.so")
+    except OSError as e:
+        print(f"✗ Failed to load libcuda.so: {e}")
+        return False
+    
+    # Initialize CUDA
+    ret = cuda.cuInit(0)
+    if ret != 0:
+        print(f"✗ cuInit failed with code: {ret}")
+        return False
+    print("✓ cuInit succeeded")
+    
+    # Get device count
+    count = ctypes.c_int()
+    ret = cuda.cuDeviceGetCount(ctypes.byref(count))
+    if ret != 0:
+        print(f"✗ cuDeviceGetCount failed with code: {ret}")
+        return False
+    print(f"✓ Found {count.value} CUDA device(s)")
+    
+    if count.value == 0:
+        return False
+    
+    # Get device name
+    device = ctypes.c_int()
+    ret = cuda.cuDeviceGet(ctypes.byref(device), 0)
+    if ret != 0:
+        print(f"✗ cuDeviceGet failed: {ret}")
+        return False
+    
+    name = ctypes.create_string_buffer(256)
+    ret = cuda.cuDeviceGetName(name, 256, device)
+    if ret == 0:
+        print(f"✓ Device 0: {name.value.decode()}")
+    
+    # Get total memory
+    total_mem = ctypes.c_size_t()
+    ret = cuda.cuDeviceTotalMem_v2(ctypes.byref(total_mem), device)
+    if ret == 0:
+        print(f"✓ Total memory: {total_mem.value / (1024**3):.1f} GB")
+    
+    return True
+
+if __name__ == "__main__":
+    success = test_cuda()
+    print()
+    print("Result:", "CUDA WORKS" if success else "CUDA FAILED")
+    sys.exit(0 if success else 1)
+
diff --git a/lib/devices/testdata/ollama-cuda/test-nvml.py b/lib/devices/testdata/ollama-cuda/test-nvml.py
new file mode 100644
index 0000000..42e9882
--- /dev/null
+++ b/lib/devices/testdata/ollama-cuda/test-nvml.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python3
+"""Test NVML GPU detection - matches what Ollama does internally."""
+import ctypes
+import os
+
+def test_nvml():
+    """Try to initialize NVML and detect GPUs."""
+    # Try different library paths
+    lib_paths = [
+        "libnvidia-ml.so.1",
+        "libnvidia-ml.so",
+        "/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1",
+    ]
+    
+    nvml = None
+    for path in lib_paths:
+        try:
+            nvml = ctypes.CDLL(path)
+            print(f"✓ Loaded NVML from: {path}")
+            break
+        except OSError as e:
+            print(f"✗ Failed to load {path}: {e}")
+    
+    if nvml is None:
+        print("ERROR: Could not load NVML library")
+        return False
+    
+    # Try to initialize
+    try:
+        ret = nvml.nvmlInit_v2()
+        if ret != 0:
+            print(f"✗ nvmlInit_v2 failed with code: {ret}")
+            # Error codes: 1=uninitialized, 2=invalid argument, 3=not supported,
+            # 9=driver not loaded, 12=library not found
+            error_names = {
+                1: "NVML_ERROR_UNINITIALIZED",
+                2: "NVML_ERROR_INVALID_ARGUMENT",  
+                3: "NVML_ERROR_NOT_SUPPORTED",
+                9: "NVML_ERROR_DRIVER_NOT_LOADED",
+                12: "NVML_ERROR_LIB_RM_VERSION_MISMATCH",
+                255: "NVML_ERROR_UNKNOWN",
+            }
+            print(f"   Error name: {error_names.get(ret, 'UNKNOWN')}")
+            return False
+        print("✓ nvmlInit_v2 succeeded")
+    except Exception as e:
+        print(f"✗ nvmlInit_v2 exception: {e}")
+        return False
+    
+    # Get device count
+    try:
+        count = ctypes.c_uint()
+        ret = nvml.nvmlDeviceGetCount_v2(ctypes.byref(count))
+        if ret != 0:
+            print(f"✗ nvmlDeviceGetCount failed with code: {ret}")
+            return False
+        print(f"✓ Found {count.value} GPU(s)")
+    except Exception as e:
+        print(f"✗ nvmlDeviceGetCount exception: {e}")
+        return False
+    
+    # Shutdown
+    nvml.nvmlShutdown()
+    return count.value > 0
+
+if __name__ == "__main__":
+    print("=== NVML GPU Detection Test ===")
+    print(f"LD_LIBRARY_PATH: {os.environ.get('LD_LIBRARY_PATH', 'not set')}")
+    print()
+    
+    # Check device nodes
+    print("Device nodes:")
+    for dev in ["/dev/nvidia0", "/dev/nvidiactl", "/dev/nvidia-uvm"]:
+        exists = os.path.exists(dev)
+        print(f"  {dev}: {'exists' if exists else 'MISSING'}")
+    print()
+    
+    success = test_nvml()
+    print()
+    print("Result:", "GPU DETECTED" if success else "NO GPU FOUND")
+    exit(0 if success else 1)
+
+
diff --git a/lib/devices/types.go b/lib/devices/types.go
new file mode 100644
index 0000000..ca7b68e
--- /dev/null
+++ b/lib/devices/types.go
@@ -0,0 +1,56 @@
+package devices
+
+import (
+	"regexp"
+	"time"
+)
+
+// DeviceType represents the type of PCI device
+type DeviceType string
+
+const (
+	DeviceTypeGPU     DeviceType = "gpu"
+	DeviceTypeGeneric DeviceType = "pci"
+)
+
+// Device represents a registered PCI device for passthrough
+type Device struct {
+	Id          string     `json:"id"`           // cuid2 identifier
+	Name        string     `json:"name"`         // user-provided globally unique name
+	Type        DeviceType `json:"type"`         // gpu or pci
+	PCIAddress  string     `json:"pci_address"`  // e.g., "0000:a2:00.0"
+	VendorID    string     `json:"vendor_id"`    // e.g., "10de"
+	DeviceID    string     `json:"device_id"`    // e.g., "27b8"
+	IOMMUGroup  int        `json:"iommu_group"`  // IOMMU group number
+	BoundToVFIO bool       `json:"bound_to_vfio"` // whether device is bound to vfio-pci
+	AttachedTo  *string    `json:"attached_to"`  // instance ID if attached, nil otherwise
+	CreatedAt   time.Time  `json:"created_at"`
+}
+
+// CreateDeviceRequest is the request to register a new device
+type CreateDeviceRequest struct {
+	Name       string `json:"name,omitempty"` // optional: globally unique name (auto-generated if not provided)
+	PCIAddress string `json:"pci_address"`    // required: PCI address (e.g., "0000:a2:00.0")
+}
+
+// AvailableDevice represents a PCI device discovered on the host
+type AvailableDevice struct {
+	PCIAddress    string  `json:"pci_address"`
+	VendorID      string  `json:"vendor_id"`
+	DeviceID      string  `json:"device_id"`
+	VendorName    string  `json:"vendor_name"`
+	DeviceName    string  `json:"device_name"`
+	IOMMUGroup    int     `json:"iommu_group"`
+	CurrentDriver *string `json:"current_driver"` // nil if no driver bound
+}
+
+// DeviceNamePattern is the regex pattern for valid device names
+// Must start with alphanumeric, followed by alphanumeric, underscore, dot, or dash
+var DeviceNamePattern = regexp.MustCompile(`^[a-zA-Z0-9][a-zA-Z0-9_.-]+$`)
+
+// ValidateDeviceName validates that a device name matches the required pattern
+func ValidateDeviceName(name string) bool {
+	return DeviceNamePattern.MatchString(name)
+}
+
+
diff --git a/lib/devices/vfio.go b/lib/devices/vfio.go
new file mode 100644
index 0000000..38606f5
--- /dev/null
+++ b/lib/devices/vfio.go
@@ -0,0 +1,310 @@
+package devices
+
+import (
+	"fmt"
+	"log/slog"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"time"
+)
+
+const (
+	vfioDriverPath  = "/sys/bus/pci/drivers/vfio-pci"
+	pciDriversPath  = "/sys/bus/pci/drivers"
+	vfioDevicePath  = "/dev/vfio"
+)
+
+// VFIOBinder handles binding and unbinding devices to/from VFIO
+type VFIOBinder struct{}
+
+// NewVFIOBinder creates a new VFIOBinder
+func NewVFIOBinder() *VFIOBinder {
+	return &VFIOBinder{}
+}
+
+// IsVFIOAvailable checks if VFIO is available on the system
+func (v *VFIOBinder) IsVFIOAvailable() bool {
+	_, err := os.Stat(vfioDriverPath)
+	return err == nil
+}
+
+// IsDeviceBoundToVFIO checks if a device is currently bound to vfio-pci
+func (v *VFIOBinder) IsDeviceBoundToVFIO(pciAddress string) bool {
+	driver := readCurrentDriver(pciAddress)
+	return driver != nil && *driver == "vfio-pci"
+}
+
+// BindToVFIO binds a PCI device to the vfio-pci driver
+// This requires:
+// 1. Stopping any processes using the device (e.g., nvidia-persistenced for NVIDIA GPUs)
+// 2. Unbinding the device from its current driver (if any)
+// 3. Binding it to vfio-pci
+func (v *VFIOBinder) BindToVFIO(pciAddress string) error {
+	if !v.IsVFIOAvailable() {
+		return ErrVFIONotAvailable
+	}
+
+	if v.IsDeviceBoundToVFIO(pciAddress) {
+		return ErrAlreadyBound
+	}
+
+	// Get device info for vendor/device IDs
+	deviceInfo, err := GetDeviceInfo(pciAddress)
+	if err != nil {
+		return fmt.Errorf("get device info: %w", err)
+	}
+
+	// For NVIDIA GPUs, stop nvidia-persistenced which holds the device open
+	// This is required because the service keeps /dev/nvidia* open, blocking driver unbind
+	isNvidia := deviceInfo.VendorID == "10de"
+	stoppedNvidiaPersistenced := false
+	if isNvidia {
+		if err := v.stopNvidiaPersistenced(); err != nil {
+			slog.Warn("failed to stop nvidia-persistenced", "error", err)
+			// Continue anyway - it might not be running
+		} else {
+			stoppedNvidiaPersistenced = true
+		}
+	}
+
+	// Use defer to ensure nvidia-persistenced is restarted on any error
+	// after we successfully stopped it
+	bindSucceeded := false
+	defer func() {
+		if stoppedNvidiaPersistenced && !bindSucceeded {
+			_ = v.startNvidiaPersistenced()
+		}
+	}()
+
+	// Unbind from current driver if bound
+	currentDriver := readCurrentDriver(pciAddress)
+	if currentDriver != nil && *currentDriver != "" {
+		if err := v.unbindFromDriver(pciAddress, *currentDriver); err != nil {
+			return fmt.Errorf("unbind from %s: %w", *currentDriver, err)
+		}
+	}
+
+	// Override driver to vfio-pci
+	if err := v.setDriverOverride(pciAddress, "vfio-pci"); err != nil {
+		return fmt.Errorf("set driver override: %w", err)
+	}
+
+	// Bind to vfio-pci using the bind method (more reliable than new_id)
+	if err := v.bindDeviceToVFIO(pciAddress); err != nil {
+		return fmt.Errorf("bind to vfio-pci: %w", err)
+	}
+
+	bindSucceeded = true
+	return nil
+}
+
+// UnbindFromVFIO unbinds a device from vfio-pci and restores the original driver
+func (v *VFIOBinder) UnbindFromVFIO(pciAddress string) error {
+	if !v.IsDeviceBoundToVFIO(pciAddress) {
+		return ErrNotBound
+	}
+
+	// Get device info to check if it's NVIDIA
+	deviceInfo, err := GetDeviceInfo(pciAddress)
+	if err != nil {
+		return fmt.Errorf("get device info: %w", err)
+	}
+	isNvidia := deviceInfo.VendorID == "10de"
+
+	// Clear driver override first
+	if err := v.setDriverOverride(pciAddress, ""); err != nil {
+		// Non-fatal, continue with unbind
+	}
+
+	// Unbind from vfio-pci
+	if err := v.unbindFromDriver(pciAddress, "vfio-pci"); err != nil {
+		return fmt.Errorf("unbind from vfio-pci: %w", err)
+	}
+
+	// Trigger driver probe to rebind to original driver
+	if err := v.triggerDriverProbe(pciAddress); err != nil {
+		slog.Warn("failed to trigger driver probe", "pci_address", pciAddress, "error", err)
+	}
+
+	// For NVIDIA GPUs, restart nvidia-persistenced after rebinding
+	if isNvidia {
+		if err := v.startNvidiaPersistenced(); err != nil {
+			slog.Warn("failed to start nvidia-persistenced", "error", err)
+		}
+	}
+
+	return nil
+}
+
+// unbindFromDriver unbinds a device from its current driver
+func (v *VFIOBinder) unbindFromDriver(pciAddress, driver string) error {
+	unbindPath := filepath.Join(pciDriversPath, driver, "unbind")
+	return os.WriteFile(unbindPath, []byte(pciAddress), 0200)
+}
+
+// setDriverOverride sets the driver_override for a device
+func (v *VFIOBinder) setDriverOverride(pciAddress, driver string) error {
+	overridePath := filepath.Join(sysfsDevicesPath, pciAddress, "driver_override")
+	
+	// Empty string clears the override
+	content := driver
+	if driver == "" {
+		content = "\n" // Writing newline clears the override
+	}
+	
+	return os.WriteFile(overridePath, []byte(content), 0200)
+}
+
+
+// bindDeviceToVFIO binds a specific device to vfio-pci using bind
+func (v *VFIOBinder) bindDeviceToVFIO(pciAddress string) error {
+	bindPath := filepath.Join(vfioDriverPath, "bind")
+	return os.WriteFile(bindPath, []byte(pciAddress), 0200)
+}
+
+// triggerDriverProbe triggers the kernel to probe for drivers for a device
+func (v *VFIOBinder) triggerDriverProbe(pciAddress string) error {
+	probePath := "/sys/bus/pci/drivers_probe"
+	return os.WriteFile(probePath, []byte(pciAddress), 0200)
+}
+
+// stopNvidiaPersistenced stops the nvidia-persistenced service
+// This service keeps /dev/nvidia* open and blocks driver unbind
+func (v *VFIOBinder) stopNvidiaPersistenced() error {
+	slog.Debug("stopping nvidia-persistenced service")
+	
+	// Try systemctl first (works as root)
+	cmd := exec.Command("systemctl", "stop", "nvidia-persistenced")
+	if err := cmd.Run(); err == nil {
+		return nil
+	}
+	
+	// Fall back to killing the process directly (works with CAP_KILL or as root)
+	// This is less clean but allows running with capabilities instead of full root
+	cmd = exec.Command("pkill", "-TERM", "nvidia-persistenced")
+	if err := cmd.Run(); err != nil {
+		// Check if process even exists
+		checkCmd := exec.Command("pgrep", "nvidia-persistenced")
+		if checkCmd.Run() != nil {
+			// Process doesn't exist, that's fine
+			return nil
+		}
+		return fmt.Errorf("failed to stop nvidia-persistenced (try: sudo systemctl stop nvidia-persistenced)")
+	}
+	
+	// Wait for process to exit with polling instead of arbitrary sleep
+	return v.waitForProcessExit("nvidia-persistenced", 2*time.Second)
+}
+
+// waitForProcessExit polls for a process to exit, with timeout
+func (v *VFIOBinder) waitForProcessExit(processName string, timeout time.Duration) error {
+	deadline := time.Now().Add(timeout)
+	pollInterval := 100 * time.Millisecond
+	
+	for time.Now().Before(deadline) {
+		checkCmd := exec.Command("pgrep", processName)
+		if checkCmd.Run() != nil {
+			// Process no longer exists
+			return nil
+		}
+		time.Sleep(pollInterval)
+	}
+	
+	// Timeout - process still running
+	slog.Warn("timeout waiting for process to exit", "process", processName, "timeout", timeout)
+	return nil // Continue anyway, the bind might still work
+}
+
+// startNvidiaPersistenced starts the nvidia-persistenced service
+func (v *VFIOBinder) startNvidiaPersistenced() error {
+	slog.Debug("starting nvidia-persistenced service")
+	
+	// Try systemctl first (works as root)
+	cmd := exec.Command("systemctl", "start", "nvidia-persistenced")
+	if err := cmd.Run(); err != nil {
+		// If we can't start it, just log - not critical for test cleanup
+		slog.Warn("could not restart nvidia-persistenced", "error", err)
+	}
+	return nil
+}
+
+// GetVFIOGroupPath returns the path to the VFIO group device for a PCI device
+func (v *VFIOBinder) GetVFIOGroupPath(pciAddress string) (string, error) {
+	iommuGroup, err := readIOMMUGroup(pciAddress)
+	if err != nil {
+		return "", fmt.Errorf("read iommu group: %w", err)
+	}
+
+	groupPath := filepath.Join(vfioDevicePath, fmt.Sprintf("%d", iommuGroup))
+	if _, err := os.Stat(groupPath); os.IsNotExist(err) {
+		return "", fmt.Errorf("vfio group device not found: %s", groupPath)
+	}
+
+	return groupPath, nil
+}
+
+// CheckIOMMUGroupSafe checks if all devices in the IOMMU group are safe to pass through
+// Returns an error if there are other devices in the group that aren't being passed through
+func (v *VFIOBinder) CheckIOMMUGroupSafe(pciAddress string, allowedDevices []string) error {
+	iommuGroup, err := readIOMMUGroup(pciAddress)
+	if err != nil {
+		return fmt.Errorf("read iommu group: %w", err)
+	}
+
+	groupDevices, err := GetIOMMUGroupDevices(iommuGroup)
+	if err != nil {
+		return fmt.Errorf("get iommu group devices: %w", err)
+	}
+
+	// Build a set of allowed devices
+	allowed := make(map[string]bool)
+	for _, addr := range allowedDevices {
+		allowed[addr] = true
+	}
+
+	// Check each device in the group
+	for _, device := range groupDevices {
+		if allowed[device] {
+			continue
+		}
+
+		// Check if device is already bound to vfio-pci or is a bridge
+		driver := readCurrentDriver(device)
+		if driver != nil && *driver == "vfio-pci" {
+			continue
+		}
+
+		// Check if it's a PCI bridge (these are usually okay to leave)
+		if v.isPCIBridge(device) {
+			continue
+		}
+
+		// Found a device that's not allowed and not safe
+		return fmt.Errorf("%w: device %s in IOMMU group %d is not included",
+			ErrIOMMUGroupConflict, device, iommuGroup)
+	}
+
+	return nil
+}
+
+// isPCIBridge checks if a device is a PCI bridge
+func (v *VFIOBinder) isPCIBridge(pciAddress string) bool {
+	classPath := filepath.Join(sysfsDevicesPath, pciAddress, "class")
+	classCode, err := readSysfsFile(classPath)
+	if err != nil {
+		return false
+	}
+
+	classCode = strings.TrimPrefix(classCode, "0x")
+	// Class 06 = Bridge, Subclass 04 = PCI bridge
+	return len(classCode) >= 4 && classCode[:2] == "06"
+}
+
+// GetDeviceSysfsPath returns the sysfs path for a PCI device (used by cloud-hypervisor)
+func GetDeviceSysfsPath(pciAddress string) string {
+	return filepath.Join(sysfsDevicesPath, pciAddress) + "/"
+}
+
+
diff --git a/lib/instances/configdisk.go b/lib/instances/configdisk.go
index dfc6bdb..fb9305a 100644
--- a/lib/instances/configdisk.go
+++ b/lib/instances/configdisk.go
@@ -1,6 +1,7 @@
 package instances
 
 import (
+	"context"
 	"encoding/json"
 	"fmt"
 	"os"
@@ -8,6 +9,7 @@ import (
 	"strconv"
 	"strings"
 
+	"github.com/onkernel/hypeman/lib/devices"
 	"github.com/onkernel/hypeman/lib/images"
 	"github.com/onkernel/hypeman/lib/network"
 )
@@ -16,7 +18,7 @@ import (
 // The disk contains:
 // - /config.sh - Shell script sourced by init
 // - /metadata.json - JSON metadata for programmatic access
-func (m *manager) createConfigDisk(inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) error {
+func (m *manager) createConfigDisk(ctx context.Context, inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) error {
 	// Create temporary directory for config files
 	tmpDir, err := os.MkdirTemp("", "hypeman-config-*")
 	if err != nil {
@@ -25,7 +27,7 @@ func (m *manager) createConfigDisk(inst *Instance, imageInfo *images.Image, netC
 	defer os.RemoveAll(tmpDir)
 
 	// Generate config.sh
-	configScript := m.generateConfigScript(inst, imageInfo, netConfig)
+	configScript := m.generateConfigScript(ctx, inst, imageInfo, netConfig)
 	configPath := filepath.Join(tmpDir, "config.sh")
 	if err := os.WriteFile(configPath, []byte(configScript), 0644); err != nil {
 		return fmt.Errorf("write config.sh: %w", err)
@@ -53,7 +55,7 @@ func (m *manager) createConfigDisk(inst *Instance, imageInfo *images.Image, netC
 	// Create ext4 disk with config files
 	// Use ext4 for now (can switch to erofs when kernel supports it)
 	diskPath := m.paths.InstanceConfigDisk(inst.Id)
-	
+
 	// Calculate size (config files are tiny, use 1MB minimum)
 	_, err = images.ExportRootfs(tmpDir, diskPath, images.FormatExt4)
 	if err != nil {
@@ -64,32 +66,32 @@ func (m *manager) createConfigDisk(inst *Instance, imageInfo *images.Image, netC
 }
 
 // generateConfigScript creates the shell script that will be sourced by init
-func (m *manager) generateConfigScript(inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) string {
+func (m *manager) generateConfigScript(ctx context.Context, inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) string {
 	// Prepare entrypoint value
 	entrypoint := ""
 	if len(imageInfo.Entrypoint) > 0 {
 		entrypoint = shellQuoteArray(imageInfo.Entrypoint)
 	}
-	
+
 	// Prepare cmd value
 	cmd := ""
 	if len(imageInfo.Cmd) > 0 {
 		cmd = shellQuoteArray(imageInfo.Cmd)
 	}
-	
+
 	// Prepare workdir value
 	workdir := shellQuote("/")
 	if imageInfo.WorkingDir != "" {
 		workdir = shellQuote(imageInfo.WorkingDir)
 	}
-	
+
 	// Build environment variable exports
 	var envLines strings.Builder
 	mergedEnv := mergeEnv(imageInfo.Env, inst.Env)
 	for key, value := range mergedEnv {
 		envLines.WriteString(fmt.Sprintf("export %s=%s\n", key, shellQuote(value)))
 	}
-	
+
 	// Build network configuration section
 	// Use netConfig directly instead of trying to derive it (VM hasn't started yet)
 	networkSection := ""
@@ -105,6 +107,17 @@ GUEST_DNS="%s"
 `, netConfig.IP, cidr, netConfig.Gateway, netConfig.DNS)
 	}
 
+	// GPU passthrough configuration
+	// Only set HAS_GPU=1 if at least one attached device is actually a GPU
+	gpuSection := ""
+	for _, deviceID := range inst.Devices {
+		device, err := m.deviceManager.GetDevice(ctx, deviceID)
+		if err == nil && device.Type == devices.DeviceTypeGPU {
+			gpuSection = "\n# GPU passthrough\nHAS_GPU=1\n"
+			break
+		}
+	}
+
 	// Build volume mounts section
 	// Volumes are attached as /dev/vdd, /dev/vde, etc. (after vda=rootfs, vdb=overlay, vdc=config)
 	// For overlay volumes, two devices are used: base + overlay disk
@@ -137,7 +150,7 @@ GUEST_DNS="%s"
 		volumeLines.WriteString("\"\n")
 		volumeSection = volumeLines.String()
 	}
-	
+
 	// Generate script as a readable template block
 	// ENTRYPOINT and CMD contain shell-quoted arrays that will be eval'd in init
 	script := fmt.Sprintf(`#!/bin/sh
@@ -149,7 +162,7 @@ CMD="%s"
 WORKDIR=%s
 
 # Environment variables
-%s%s%s`, 
+%s%s%s%s`,
 		inst.Id,
 		entrypoint,
 		cmd,
@@ -157,8 +170,9 @@ WORKDIR=%s
 		envLines.String(),
 		networkSection,
 		volumeSection,
+		gpuSection,
 	)
-	
+
 	return script
 }
 
diff --git a/lib/instances/create.go b/lib/instances/create.go
index 0e02307..f133c0f 100644
--- a/lib/instances/create.go
+++ b/lib/instances/create.go
@@ -9,6 +9,7 @@ import (
 	"time"
 
 	"github.com/nrednav/cuid2"
+	"github.com/onkernel/hypeman/lib/devices"
 	"github.com/onkernel/hypeman/lib/images"
 	"github.com/onkernel/hypeman/lib/logger"
 	"github.com/onkernel/hypeman/lib/network"
@@ -141,7 +142,7 @@ func (m *manager) createInstance(
 		return nil, ErrAlreadyExists
 	}
 
-	// 5. Apply defaults
+	// 6. Apply defaults
 	size := req.Size
 	if size == 0 {
 		size = 1 * 1024 * 1024 * 1024 // 1GB default
@@ -191,16 +192,70 @@ func (m *manager) createInstance(
 		req.Env = make(map[string]string)
 	}
 
-	// 6. Determine network based on NetworkEnabled flag
+	// 7. Determine network based on NetworkEnabled flag
 	networkName := ""
 	if req.NetworkEnabled {
 		networkName = "default"
 	}
 
-	// 7. Get default kernel version
+	// 8. Get default kernel version
 	kernelVer := m.systemManager.GetDefaultKernelVersion()
 
-	// 8. Create instance metadata
+	// 9. Validate, resolve, and auto-bind devices (GPU passthrough)
+	// Track devices we've marked as attached for cleanup on error.
+	// The cleanup closure captures this slice by reference, so it will see
+	// whatever devices have been attached when cleanup runs.
+	var attachedDeviceIDs []string
+	var resolvedDeviceIDs []string
+
+	// Setup cleanup stack early so device attachment errors trigger cleanup
+	cu := cleanup.Make(func() {
+		log.DebugContext(ctx, "cleaning up instance on error", "instance_id", id)
+		m.deleteInstanceData(id)
+	})
+	defer cu.Clean()
+
+	// Add device detachment cleanup - closure captures attachedDeviceIDs by reference
+	if m.deviceManager != nil {
+		cu.Add(func() {
+			for _, deviceID := range attachedDeviceIDs {
+				log.DebugContext(ctx, "detaching device on cleanup", "instance_id", id, "device", deviceID)
+				m.deviceManager.MarkDetached(ctx, deviceID)
+			}
+		})
+	}
+
+	if len(req.Devices) > 0 && m.deviceManager != nil {
+		for _, deviceRef := range req.Devices {
+			device, err := m.deviceManager.GetDevice(ctx, deviceRef)
+			if err != nil {
+				log.ErrorContext(ctx, "failed to get device", "device", deviceRef, "error", err)
+				return nil, fmt.Errorf("device %s: %w", deviceRef, err)
+			}
+			if device.AttachedTo != nil {
+				log.ErrorContext(ctx, "device already attached", "device", deviceRef, "instance", *device.AttachedTo)
+				return nil, fmt.Errorf("device %s is already attached to instance %s", deviceRef, *device.AttachedTo)
+			}
+			// Auto-bind to VFIO if not already bound
+			if !device.BoundToVFIO {
+				log.InfoContext(ctx, "auto-binding device to VFIO", "device", deviceRef, "pci_address", device.PCIAddress)
+				if err := m.deviceManager.BindToVFIO(ctx, device.Id); err != nil {
+					log.ErrorContext(ctx, "failed to bind device to VFIO", "device", deviceRef, "error", err)
+					return nil, fmt.Errorf("bind device %s to VFIO: %w", deviceRef, err)
+				}
+			}
+			// Mark device as attached to this instance
+			if err := m.deviceManager.MarkAttached(ctx, device.Id, id); err != nil {
+				log.ErrorContext(ctx, "failed to mark device as attached", "device", deviceRef, "error", err)
+				return nil, fmt.Errorf("mark device %s as attached: %w", deviceRef, err)
+			}
+			attachedDeviceIDs = append(attachedDeviceIDs, device.Id)
+			resolvedDeviceIDs = append(resolvedDeviceIDs, device.Id)
+		}
+		log.DebugContext(ctx, "validated devices for passthrough", "id", id, "devices", resolvedDeviceIDs)
+	}
+
+	// 10. Create instance metadata
 	stored := &StoredMetadata{
 		Id:             id,
 		Name:           req.Name,
@@ -220,30 +275,24 @@ func (m *manager) createInstance(
 		DataDir:        m.paths.InstanceDir(id),
 		VsockCID:       vsockCID,
 		VsockSocket:    vsockSocket,
+		Devices:        resolvedDeviceIDs,
 	}
 
-	// Setup cleanup stack for automatic rollback on errors
-	cu := cleanup.Make(func() {
-		log.DebugContext(ctx, "cleaning up instance on error", "instance_id", id)
-		m.deleteInstanceData(id)
-	})
-	defer cu.Clean()
-
-	// 8. Ensure directories
+	// 11. Ensure directories
 	log.DebugContext(ctx, "creating instance directories", "instance_id", id)
 	if err := m.ensureDirectories(id); err != nil {
 		log.ErrorContext(ctx, "failed to create directories", "instance_id", id, "error", err)
 		return nil, fmt.Errorf("ensure directories: %w", err)
 	}
 
-	// 9. Create overlay disk with specified size
+	// 12. Create overlay disk with specified size
 	log.DebugContext(ctx, "creating overlay disk", "instance_id", id, "size_bytes", stored.OverlaySize)
 	if err := m.createOverlayDisk(id, stored.OverlaySize); err != nil {
 		log.ErrorContext(ctx, "failed to create overlay disk", "instance_id", id, "error", err)
 		return nil, fmt.Errorf("create overlay disk: %w", err)
 	}
 
-	// 10. Allocate network (if network enabled)
+	// 13. Allocate network (if network enabled)
 	var netConfig *network.NetworkConfig
 	if networkName != "" {
 		log.DebugContext(ctx, "allocating network", "instance_id", id, "network", networkName)
@@ -268,7 +317,7 @@ func (m *manager) createInstance(
 		})
 	}
 
-	// 10.5. Validate and attach volumes
+	// 14. Validate and attach volumes
 	if len(req.Volumes) > 0 {
 		log.DebugContext(ctx, "validating volumes", "instance_id", id, "count", len(req.Volumes))
 		for _, volAttach := range req.Volumes {
@@ -308,15 +357,15 @@ func (m *manager) createInstance(
 		stored.Volumes = req.Volumes
 	}
 
-	// 11. Create config disk (needs Instance for buildVMConfig)
+	// 15. Create config disk (needs Instance for buildVMConfig)
 	inst := &Instance{StoredMetadata: *stored}
 	log.DebugContext(ctx, "creating config disk", "instance_id", id)
-	if err := m.createConfigDisk(inst, imageInfo, netConfig); err != nil {
+	if err := m.createConfigDisk(ctx, inst, imageInfo, netConfig); err != nil {
 		log.ErrorContext(ctx, "failed to create config disk", "instance_id", id, "error", err)
 		return nil, fmt.Errorf("create config disk: %w", err)
 	}
 
-	// 12. Save metadata
+	// 16. Save metadata
 	log.DebugContext(ctx, "saving instance metadata", "instance_id", id)
 	meta := &metadata{StoredMetadata: *stored}
 	if err := m.saveMetadata(meta); err != nil {
@@ -324,14 +373,14 @@ func (m *manager) createInstance(
 		return nil, fmt.Errorf("save metadata: %w", err)
 	}
 
-	// 13. Start VMM and boot VM
+	// 17. Start VMM and boot VM
 	log.InfoContext(ctx, "starting VMM and booting VM", "instance_id", id)
 	if err := m.startAndBootVM(ctx, stored, imageInfo, netConfig); err != nil {
 		log.ErrorContext(ctx, "failed to start and boot VM", "instance_id", id, "error", err)
 		return nil, err
 	}
 
-	// 14. Update timestamp after VM is running
+	// 18. Update timestamp after VM is running
 	now := time.Now()
 	stored.StartedAt = &now
 
@@ -487,7 +536,7 @@ func (m *manager) startAndBootVM(
 
 	// Build VM configuration matching Cloud Hypervisor VmConfig
 	inst := &Instance{StoredMetadata: *stored}
-	vmConfig, err := m.buildVMConfig(inst, imageInfo, netConfig)
+	vmConfig, err := m.buildVMConfig(ctx, inst, imageInfo, netConfig)
 	if err != nil {
 		return fmt.Errorf("build vm config: %w", err)
 	}
@@ -537,7 +586,7 @@ func (m *manager) startAndBootVM(
 }
 
 // buildVMConfig creates the Cloud Hypervisor VmConfig
-func (m *manager) buildVMConfig(inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) (vmm.VmConfig, error) {
+func (m *manager) buildVMConfig(ctx context.Context, inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) (vmm.VmConfig, error) {
 	// Get system file paths
 	kernelPath, _ := m.systemManager.GetKernelPath(system.KernelVersion(inst.KernelVersion))
 	initrdPath, _ := m.systemManager.GetInitrdPath()
@@ -644,6 +693,22 @@ func (m *manager) buildVMConfig(inst *Instance, imageInfo *images.Image, netConf
 		Socket: inst.VsockSocket,
 	}
 
+	// Device passthrough configuration (GPU, etc.)
+	var deviceConfigs *[]vmm.DeviceConfig
+	if len(inst.Devices) > 0 && m.deviceManager != nil {
+		configs := make([]vmm.DeviceConfig, 0, len(inst.Devices))
+		for _, deviceID := range inst.Devices {
+			device, err := m.deviceManager.GetDevice(ctx, deviceID)
+			if err != nil {
+				return vmm.VmConfig{}, fmt.Errorf("get device %s: %w", deviceID, err)
+			}
+			configs = append(configs, vmm.DeviceConfig{
+				Path: devices.GetDeviceSysfsPath(device.PCIAddress),
+			})
+		}
+		deviceConfigs = &configs
+	}
+
 	return vmm.VmConfig{
 		Payload: payload,
 		Cpus:    &cpus,
@@ -653,6 +718,7 @@ func (m *manager) buildVMConfig(inst *Instance, imageInfo *images.Image, netConf
 		Console: &console,
 		Net:     nets,
 		Vsock:   &vsock,
+		Devices: deviceConfigs,
 	}, nil
 }
 
diff --git a/lib/instances/delete.go b/lib/instances/delete.go
index 24d8ddb..06bc50c 100644
--- a/lib/instances/delete.go
+++ b/lib/instances/delete.go
@@ -59,7 +59,24 @@ func (m *manager) deleteInstance(
 		}
 	}
 
-	// 5. Detach volumes
+	// 5. Detach and auto-unbind devices from VFIO
+	if len(inst.Devices) > 0 && m.deviceManager != nil {
+		for _, deviceID := range inst.Devices {
+			log.DebugContext(ctx, "detaching device", "id", id, "device", deviceID)
+			// Mark device as detached
+			if err := m.deviceManager.MarkDetached(ctx, deviceID); err != nil {
+				log.WarnContext(ctx, "failed to mark device as detached", "id", id, "device", deviceID, "error", err)
+			}
+			// Auto-unbind from VFIO so native driver can reclaim it
+			log.InfoContext(ctx, "auto-unbinding device from VFIO", "id", id, "device", deviceID)
+			if err := m.deviceManager.UnbindFromVFIO(ctx, deviceID); err != nil {
+				// Log but continue - device might already be unbound or in use by another instance
+				log.WarnContext(ctx, "failed to unbind device from VFIO", "id", id, "device", deviceID, "error", err)
+			}
+		}
+	}
+
+	// 5b. Detach volumes
 	if len(inst.Volumes) > 0 {
 		log.DebugContext(ctx, "detaching volumes", "instance_id", id, "count", len(inst.Volumes))
 		for _, volAttach := range inst.Volumes {
diff --git a/lib/instances/exec_test.go b/lib/instances/exec_test.go
index f0e6966..d3dbfde 100644
--- a/lib/instances/exec_test.go
+++ b/lib/instances/exec_test.go
@@ -75,7 +75,7 @@ func TestExecConcurrent(t *testing.T) {
 	inst, err := manager.CreateInstance(ctx, CreateInstanceRequest{
 		Name:           "exec-test",
 		Image:          "docker.io/library/nginx:alpine",
-		Size:           512 * 1024 * 1024,
+		Size:           2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs)
 		HotplugSize:    512 * 1024 * 1024,
 		OverlaySize:    1024 * 1024 * 1024,
 		Vcpus:          2, // More vCPUs for concurrency
diff --git a/lib/instances/liveness.go b/lib/instances/liveness.go
new file mode 100644
index 0000000..19d3d20
--- /dev/null
+++ b/lib/instances/liveness.go
@@ -0,0 +1,155 @@
+package instances
+
+import (
+	"context"
+	"os/exec"
+	"strings"
+
+	"github.com/onkernel/hypeman/lib/devices"
+	"github.com/onkernel/hypeman/lib/logger"
+)
+
+// Ensure instanceLivenessAdapter implements the interface
+var _ devices.InstanceLivenessChecker = (*instanceLivenessAdapter)(nil)
+
+// instanceLivenessAdapter adapts instances.Manager to devices.InstanceLivenessChecker
+type instanceLivenessAdapter struct {
+	manager *manager
+}
+
+// NewLivenessChecker creates a new InstanceLivenessChecker that wraps the instances manager.
+// This adapter allows the devices package to query instance state without a circular import.
+func NewLivenessChecker(m Manager) devices.InstanceLivenessChecker {
+	// Type assert to get the concrete manager type
+	mgr, ok := m.(*manager)
+	if !ok {
+		return nil
+	}
+	return &instanceLivenessAdapter{manager: mgr}
+}
+
+// IsInstanceRunning returns true if the instance exists and is in a running state
+// (i.e., has an active VMM process). Returns false if the instance doesn't exist
+// or is stopped/standby/unknown.
+func (a *instanceLivenessAdapter) IsInstanceRunning(ctx context.Context, instanceID string) bool {
+	if a.manager == nil {
+		return false
+	}
+	inst, err := a.manager.getInstance(ctx, instanceID)
+	if err != nil {
+		return false
+	}
+
+	// Consider instance "running" if the VMM is active (any of these states means VM is using the device)
+	switch inst.State {
+	case StateRunning, StatePaused, StateCreated:
+		return true
+	default:
+		// StateStopped, StateStandby, StateShutdown, StateUnknown
+		return false
+	}
+}
+
+// GetInstanceDevices returns the list of device IDs attached to an instance.
+// Returns nil if the instance doesn't exist.
+func (a *instanceLivenessAdapter) GetInstanceDevices(ctx context.Context, instanceID string) []string {
+	if a.manager == nil {
+		return nil
+	}
+	inst, err := a.manager.getInstance(ctx, instanceID)
+	if err != nil {
+		return nil
+	}
+	return inst.Devices
+}
+
+// ListAllInstanceDevices returns a map of instanceID -> []deviceIDs for all instances.
+func (a *instanceLivenessAdapter) ListAllInstanceDevices(ctx context.Context) map[string][]string {
+	if a.manager == nil {
+		return nil
+	}
+	instances, err := a.manager.listInstances(ctx)
+	if err != nil {
+		return nil
+	}
+
+	result := make(map[string][]string)
+	for _, inst := range instances {
+		if len(inst.Devices) > 0 {
+			result[inst.Id] = inst.Devices
+		}
+	}
+	return result
+}
+
+// DetectSuspiciousVMMProcesses finds cloud-hypervisor processes that don't match
+// known instances and logs warnings. Returns the count of suspicious processes found.
+// This uses ListInstances (all instances) rather than ListAllInstanceDevices to avoid
+// false positives for instances without GPU devices attached.
+func (a *instanceLivenessAdapter) DetectSuspiciousVMMProcesses(ctx context.Context) int {
+	log := logger.FromContext(ctx)
+
+	if a.manager == nil {
+		return 0
+	}
+
+	// Find all cloud-hypervisor processes
+	cmd := exec.Command("pgrep", "-a", "cloud-hypervisor")
+	output, err := cmd.Output()
+	if err != nil {
+		// pgrep returns exit code 1 if no processes found - that's fine
+		return 0
+	}
+
+	lines := strings.Split(strings.TrimSpace(string(output)), "\n")
+	if len(lines) == 0 || (len(lines) == 1 && lines[0] == "") {
+		return 0
+	}
+
+	suspiciousCount := 0
+	for _, line := range lines {
+		if line == "" {
+			continue
+		}
+
+		// Try to extract socket path from command line to match against known instances
+		// cloud-hypervisor command typically includes --api-socket <path>
+		socketPath := ""
+		parts := strings.Fields(line)
+		for i, part := range parts {
+			if part == "--api-socket" && i+1 < len(parts) {
+				socketPath = parts[i+1]
+				break
+			}
+		}
+
+		// Check if this socket path matches any known instance
+		matched := false
+		if socketPath != "" {
+			// Socket path is typically like /var/lib/hypeman/guests/<id>/ch.sock
+			// Try to extract instance ID
+			if strings.Contains(socketPath, "/guests/") {
+				pathParts := strings.Split(socketPath, "/guests/")
+				if len(pathParts) > 1 {
+					instancePath := pathParts[1]
+					instanceID := strings.Split(instancePath, "/")[0]
+					if a.IsInstanceRunning(ctx, instanceID) {
+						matched = true
+					}
+				}
+			}
+		}
+
+		if !matched {
+			log.WarnContext(ctx, "detected untracked cloud-hypervisor process",
+				"process_info", line,
+				"socket_path", socketPath,
+				"remediation", "Run lib/devices/scripts/gpu-reset.sh for manual recovery if needed",
+			)
+			suspiciousCount++
+		}
+	}
+
+	return suspiciousCount
+}
+
diff --git a/lib/instances/manager.go b/lib/instances/manager.go
index 3b95a1c..7244c01 100644
--- a/lib/instances/manager.go
+++ b/lib/instances/manager.go
@@ -5,6 +5,7 @@ import (
 	"fmt"
 	"sync"
 
+	"github.com/onkernel/hypeman/lib/devices"
 	"github.com/onkernel/hypeman/lib/images"
 	"github.com/onkernel/hypeman/lib/network"
 	"github.com/onkernel/hypeman/lib/paths"
@@ -46,6 +47,7 @@ type manager struct {
 	imageManager   images.Manager
 	systemManager  system.Manager
 	networkManager network.Manager
+	deviceManager  devices.Manager
 	volumeManager  volumes.Manager
 	limits         ResourceLimits
 	instanceLocks  sync.Map      // map[string]*sync.RWMutex - per-instance locks
@@ -55,12 +57,13 @@ type manager struct {
 
 // NewManager creates a new instances manager.
 // If meter is nil, metrics are disabled.
-func NewManager(p *paths.Paths, imageManager images.Manager, systemManager system.Manager, networkManager network.Manager, volumeManager volumes.Manager, limits ResourceLimits, meter metric.Meter, tracer trace.Tracer) Manager {
+func NewManager(p *paths.Paths, imageManager images.Manager, systemManager system.Manager, networkManager network.Manager, deviceManager devices.Manager, volumeManager volumes.Manager, limits ResourceLimits, meter metric.Meter, tracer trace.Tracer) Manager {
 	m := &manager{
 		paths:          p,
 		imageManager:   imageManager,
 		systemManager:  systemManager,
 		networkManager: networkManager,
+		deviceManager:  deviceManager,
 		volumeManager:  volumeManager,
 		limits:         limits,
 		instanceLocks:  sync.Map{},
diff --git a/lib/instances/manager_test.go b/lib/instances/manager_test.go
index 2ee0a7f..95b9917 100644
--- a/lib/instances/manager_test.go
+++ b/lib/instances/manager_test.go
@@ -17,6 +17,7 @@ import (
 
 	"github.com/joho/godotenv"
 	"github.com/onkernel/hypeman/cmd/api/config"
+	"github.com/onkernel/hypeman/lib/devices"
 	"github.com/onkernel/hypeman/lib/exec"
 	"github.com/onkernel/hypeman/lib/images"
 	"github.com/onkernel/hypeman/lib/ingress"
@@ -46,6 +47,7 @@ func setupTestManager(t *testing.T) (*manager, string) {
 
 	systemManager := system.NewManager(p)
 	networkManager := network.NewManager(p, cfg, nil)
+	deviceManager := devices.NewManager(p)
 	volumeManager := volumes.NewManager(p, 0, nil) // 0 = unlimited storage
 	limits := ResourceLimits{
 		MaxOverlaySize:       100 * 1024 * 1024 * 1024, // 100GB
@@ -54,7 +56,7 @@ func setupTestManager(t *testing.T) (*manager, string) {
 		MaxTotalVcpus:        0,                        // unlimited
 		MaxTotalMemory:       0,                        // unlimited
 	}
-	mgr := NewManager(p, imageManager, systemManager, networkManager, volumeManager, limits, nil, nil).(*manager)
+	mgr := NewManager(p, imageManager, systemManager, networkManager, deviceManager, volumeManager, limits, nil, nil).(*manager)
 
 	// Register cleanup to kill any orphaned Cloud Hypervisor processes
 	t.Cleanup(func() {
@@ -246,7 +248,7 @@ func TestBasicEndToEnd(t *testing.T) {
 	req := CreateInstanceRequest{
 		Name:           "test-nginx",
 		Image:          "docker.io/library/nginx:alpine",
-		Size:           512 * 1024 * 1024,       // 512MB
+		Size:           2 * 1024 * 1024 * 1024,  // 2GB (needs extra room for initrd with NVIDIA libs)
 		HotplugSize:    512 * 1024 * 1024,       // 512MB
 		OverlaySize:    10 * 1024 * 1024 * 1024, // 10GB
 		Vcpus:          1,
@@ -754,6 +756,7 @@ func TestStorageOperations(t *testing.T) {
 	imageManager, _ := images.NewManager(p, 1, nil)
 	systemManager := system.NewManager(p)
 	networkManager := network.NewManager(p, cfg, nil)
+	deviceManager := devices.NewManager(p)
 	volumeManager := volumes.NewManager(p, 0, nil) // 0 = unlimited storage
 	limits := ResourceLimits{
 		MaxOverlaySize:       100 * 1024 * 1024 * 1024, // 100GB
@@ -762,7 +765,7 @@ func TestStorageOperations(t *testing.T) {
 		MaxTotalVcpus:        0,                        // unlimited
 		MaxTotalMemory:       0,                        // unlimited
 	}
-	manager := NewManager(p, imageManager, systemManager, networkManager, volumeManager, limits, nil, nil).(*manager)
+	manager := NewManager(p, imageManager, systemManager, networkManager, deviceManager, volumeManager, limits, nil, nil).(*manager)
 
 	// Test metadata doesn't exist initially
 	_, err := manager.loadMetadata("nonexistent")
@@ -859,7 +862,7 @@ func TestStandbyAndRestore(t *testing.T) {
 	req := CreateInstanceRequest{
 		Name:           "test-standby",
 		Image:          "docker.io/library/nginx:alpine",
-		Size:           512 * 1024 * 1024,
+		Size:           2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs)
 		HotplugSize:    512 * 1024 * 1024,
 		OverlaySize:    10 * 1024 * 1024 * 1024,
 		Vcpus:          1,
diff --git a/lib/instances/network_test.go b/lib/instances/network_test.go
index 579b4b8..419115e 100644
--- a/lib/instances/network_test.go
+++ b/lib/instances/network_test.go
@@ -63,7 +63,7 @@ func TestCreateInstanceWithNetwork(t *testing.T) {
 	inst, err := manager.CreateInstance(ctx, CreateInstanceRequest{
 		Name:           "test-net-instance",
 		Image:          "docker.io/library/nginx:alpine",
-		Size:           512 * 1024 * 1024,
+		Size:           2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs)
 		HotplugSize:    512 * 1024 * 1024,
 		OverlaySize:    5 * 1024 * 1024 * 1024,
 		Vcpus:          1,
diff --git a/lib/instances/resource_limits_test.go b/lib/instances/resource_limits_test.go
index 8393042..91dc4a4 100644
--- a/lib/instances/resource_limits_test.go
+++ b/lib/instances/resource_limits_test.go
@@ -8,6 +8,7 @@ import (
 	"time"
 
 	"github.com/onkernel/hypeman/cmd/api/config"
+	"github.com/onkernel/hypeman/lib/devices"
 	"github.com/onkernel/hypeman/lib/images"
 	"github.com/onkernel/hypeman/lib/network"
 	"github.com/onkernel/hypeman/lib/paths"
@@ -159,9 +160,10 @@ func createTestManager(t *testing.T, limits ResourceLimits) *manager {
 
 	systemMgr := system.NewManager(p)
 	networkMgr := network.NewManager(p, cfg, nil)
+	deviceMgr := devices.NewManager(p)
 	volumeMgr := volumes.NewManager(p, 0, nil)
 
-	return NewManager(p, imageMgr, systemMgr, networkMgr, volumeMgr, limits, nil, nil).(*manager)
+	return NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, nil, nil).(*manager)
 }
 
 func TestResourceLimits_StructValues(t *testing.T) {
@@ -251,20 +253,21 @@ func TestAggregateLimits_EnforcedAtRuntime(t *testing.T) {
 
 	systemManager := system.NewManager(p)
 	networkManager := network.NewManager(p, cfg, nil)
+	deviceManager := devices.NewManager(p)
 	volumeManager := volumes.NewManager(p, 0, nil)
 
 	// Set small aggregate limits:
 	// - MaxTotalVcpus: 2 (first VM gets 1, second wants 2 -> denied)
-	// - MaxTotalMemory: 2GB (first VM gets 1GB, second wants 1.5GB -> denied)
+	// - MaxTotalMemory: 6GB (first VM gets 2.5GB, second wants 4GB -> denied)
 	limits := ResourceLimits{
 		MaxOverlaySize:       100 * 1024 * 1024 * 1024, // 100GB
 		MaxVcpusPerInstance:  4,                        // per-instance limit (high)
-		MaxMemoryPerInstance: 4 * 1024 * 1024 * 1024,   // 4GB per-instance (high)
+		MaxMemoryPerInstance: 8 * 1024 * 1024 * 1024,   // 8GB per-instance (high)
 		MaxTotalVcpus:        2,                        // aggregate: only 2 total
-		MaxTotalMemory:       2 * 1024 * 1024 * 1024,   // aggregate: only 2GB total
+		MaxTotalMemory:       6 * 1024 * 1024 * 1024,   // aggregate: only 6GB total (allows first 2.5GB VM)
 	}
 
-	mgr := NewManager(p, imageManager, systemManager, networkManager, volumeManager, limits, nil, nil).(*manager)
+	mgr := NewManager(p, imageManager, systemManager, networkManager, deviceManager, volumeManager, limits, nil, nil).(*manager)
 
 	// Cleanup any orphaned processes on test end
 	t.Cleanup(func() {
@@ -303,14 +306,14 @@ func TestAggregateLimits_EnforcedAtRuntime(t *testing.T) {
 	assert.Equal(t, 0, usage.TotalVcpus, "Initial vCPUs should be 0")
 	assert.Equal(t, int64(0), usage.TotalMemory, "Initial memory should be 0")
 
-	// Create first VM: 1 vCPU, 512MB + 512MB = 1GB memory
-	t.Log("Creating first instance (1 vCPU, 1GB memory)...")
+	// Create first VM: 1 vCPU, 2GB + 512MB = 2.5GB memory
+	t.Log("Creating first instance (1 vCPU, 2.5GB memory)...")
 	inst1, err := mgr.CreateInstance(ctx, CreateInstanceRequest{
 		Name:           "small-vm-1",
 		Image:          "docker.io/library/alpine:latest",
 		Vcpus:          1,
-		Size:           512 * 1024 * 1024, // 512MB
-		HotplugSize:    512 * 1024 * 1024, // 512MB (total 1GB)
+		Size:           2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs)
+		HotplugSize:    512 * 1024 * 1024,     // 512MB
 		OverlaySize:    1 * 1024 * 1024 * 1024,
 		NetworkEnabled: false,
 	})
@@ -322,7 +325,7 @@ func TestAggregateLimits_EnforcedAtRuntime(t *testing.T) {
 	usage, err = mgr.calculateAggregateUsage(ctx)
 	require.NoError(t, err)
 	assert.Equal(t, 1, usage.TotalVcpus, "Should have 1 vCPU in use")
-	assert.Equal(t, int64(1024*1024*1024), usage.TotalMemory, "Should have 1GB memory in use")
+	assert.Equal(t, int64(2*1024*1024*1024+512*1024*1024), usage.TotalMemory, "Should have 2.5GB memory in use")
 	t.Logf("Aggregate usage after first VM: %d vCPUs, %d bytes memory", usage.TotalVcpus, usage.TotalMemory)
 
 	// Try to create second VM: 2 vCPUs (would exceed MaxTotalVcpus=2)
diff --git a/lib/instances/start.go b/lib/instances/start.go
index a29c3ad..1687d59 100644
--- a/lib/instances/start.go
+++ b/lib/instances/start.go
@@ -84,7 +84,7 @@ func (m *manager) startInstance(
 	// 5. Regenerate config disk with new network configuration
 	instForConfig := &Instance{StoredMetadata: *stored}
 	log.DebugContext(ctx, "regenerating config disk", "instance_id", id)
-	if err := m.createConfigDisk(instForConfig, imageInfo, netConfig); err != nil {
+	if err := m.createConfigDisk(ctx, instForConfig, imageInfo, netConfig); err != nil {
 		log.ErrorContext(ctx, "failed to create config disk", "instance_id", id, "error", err)
 		return nil, fmt.Errorf("create config disk: %w", err)
 	}
diff --git a/lib/instances/types.go b/lib/instances/types.go
index e46372a..6320c23 100644
--- a/lib/instances/types.go
+++ b/lib/instances/types.go
@@ -67,6 +67,9 @@ type StoredMetadata struct {
 	// vsock configuration
 	VsockCID    int64  // Guest vsock Context ID
 	VsockSocket string // Host-side vsock socket path
+
+	// Attached devices (GPU passthrough)
+	Devices []string // Device IDs attached to this instance
 }
 
 // Instance represents a virtual machine instance with derived runtime state
@@ -89,6 +92,7 @@ type CreateInstanceRequest struct {
 	Vcpus          int                // Default 2
 	Env            map[string]string  // Optional environment variables
 	NetworkEnabled bool               // Whether to enable networking (uses default network)
+	Devices        []string           // Device IDs or names to attach (GPU passthrough)
 	Volumes        []VolumeAttachment // Volumes to attach at creation time
 }
 
diff --git a/lib/instances/volumes_test.go b/lib/instances/volumes_test.go
index abb760f..3237db3 100644
--- a/lib/instances/volumes_test.go
+++ b/lib/instances/volumes_test.go
@@ -93,7 +93,7 @@ func TestVolumeMultiAttachReadOnly(t *testing.T) {
 	writerInst, err := manager.CreateInstance(ctx, CreateInstanceRequest{
 		Name:           "writer",
 		Image:          "docker.io/library/alpine:latest",
-		Size:           512 * 1024 * 1024,
+		Size:           2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs)
 		HotplugSize:    512 * 1024 * 1024,
 		OverlaySize:    1024 * 1024 * 1024,
 		Vcpus:          1,
@@ -135,7 +135,7 @@ func TestVolumeMultiAttachReadOnly(t *testing.T) {
 	reader1, err := manager.CreateInstance(ctx, CreateInstanceRequest{
 		Name:           "reader-1",
 		Image:          "docker.io/library/alpine:latest",
-		Size:           512 * 1024 * 1024,
+		Size:           2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs)
 		HotplugSize:    512 * 1024 * 1024,
 		OverlaySize:    1024 * 1024 * 1024,
 		Vcpus:          1,
@@ -151,7 +151,7 @@ func TestVolumeMultiAttachReadOnly(t *testing.T) {
 	reader2, err := manager.CreateInstance(ctx, CreateInstanceRequest{
 		Name:           "reader-2-overlay",
 		Image:          "docker.io/library/alpine:latest",
-		Size:           512 * 1024 * 1024,
+		Size:           2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs)
 		HotplugSize:    512 * 1024 * 1024,
 		OverlaySize:    1024 * 1024 * 1024,
 		Vcpus:          1,
@@ -270,7 +270,7 @@ func TestOverlayDiskCleanupOnDelete(t *testing.T) {
 	inst, err := manager.CreateInstance(ctx, CreateInstanceRequest{
 		Name:           "overlay-cleanup-test",
 		Image:          "docker.io/library/alpine:latest",
-		Size:           512 * 1024 * 1024,
+		Size:           2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs)
 		HotplugSize:    512 * 1024 * 1024,
 		OverlaySize:    1024 * 1024 * 1024,
 		Vcpus:          1,
@@ -394,7 +394,7 @@ func TestVolumeFromArchive(t *testing.T) {
 	inst, err := manager.CreateInstance(ctx, CreateInstanceRequest{
 		Name:           "archive-reader",
 		Image:          "docker.io/library/alpine:latest",
-		Size:           512 * 1024 * 1024,
+		Size:           2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs)
 		HotplugSize:    512 * 1024 * 1024,
 		OverlaySize:    1024 * 1024 * 1024,
 		Vcpus:          1,
diff --git a/lib/oapi/oapi.go b/lib/oapi/oapi.go
index 4709a15..0401b78 100644
--- a/lib/oapi/oapi.go
+++ b/lib/oapi/oapi.go
@@ -29,6 +29,12 @@ const (
 	BearerAuthScopes = "bearerAuth.Scopes"
 )
 
+// Defines values for DeviceType.
+const (
+	Gpu DeviceType = "gpu"
+	Pci DeviceType = "pci"
+)
+
 // Defines values for HealthStatus.
 const (
 	Ok HealthStatus = "ok"
@@ -70,6 +76,39 @@ type AttachVolumeRequest struct {
 	Readonly *bool `json:"readonly,omitempty"`
 }
 
+// AvailableDevice defines model for AvailableDevice.
+type AvailableDevice struct {
+	// CurrentDriver Currently bound driver (null if none)
+	CurrentDriver *string `json:"current_driver"`
+
+	// DeviceId PCI device ID (hex)
+	DeviceId string `json:"device_id"`
+
+	// DeviceName Human-readable device name
+	DeviceName *string `json:"device_name,omitempty"`
+
+	// IommuGroup IOMMU group number
+	IommuGroup int `json:"iommu_group"`
+
+	// PciAddress PCI address
+	PciAddress string `json:"pci_address"`
+
+	// VendorId PCI vendor ID (hex)
+	VendorId string `json:"vendor_id"`
+
+	// VendorName Human-readable vendor name
+	VendorName *string `json:"vendor_name,omitempty"`
+}
+
+// CreateDeviceRequest defines model for CreateDeviceRequest.
+type CreateDeviceRequest struct {
+	// Name Optional globally unique device name. If not provided, a name is auto-generated from the PCI address (e.g., "pci-0000-a2-00-0")
+	Name *string `json:"name,omitempty"`
+
+	// PciAddress PCI address of the device (required, e.g., "0000:a2:00.0")
+	PciAddress string `json:"pci_address"`
+}
+
 // CreateImageRequest defines model for CreateImageRequest.
 type CreateImageRequest struct {
 	// Name OCI image reference (e.g., docker.io/library/nginx:latest)
@@ -87,6 +126,9 @@ type CreateIngressRequest struct {
 
 // CreateInstanceRequest defines model for CreateInstanceRequest.
 type CreateInstanceRequest struct {
+	// Devices Device IDs or names to attach for GPU/PCI passthrough
+	Devices *[]string `json:"devices,omitempty"`
+
 	// Env Environment variables
 	Env *map[string]string `json:"env,omitempty"`
 
@@ -130,6 +172,44 @@ type CreateVolumeRequest struct {
 	SizeGb int `json:"size_gb"`
 }
 
+// Device defines model for Device.
+type Device struct {
+	// AttachedTo Instance ID if attached
+	AttachedTo *string `json:"attached_to"`
+
+	// BoundToVfio Whether the device is currently bound to the vfio-pci driver, which is required for VM passthrough.
+	// - true: Device is bound to vfio-pci and ready for (or currently in use by) a VM. The device's native driver has been unloaded.
+	// - false: Device is using its native driver (e.g., nvidia) or no driver. Hypeman will automatically bind to vfio-pci when attaching to an instance.
+	BoundToVfio bool `json:"bound_to_vfio"`
+
+	// CreatedAt Registration timestamp (RFC3339)
+	CreatedAt time.Time `json:"created_at"`
+
+	// DeviceId PCI device ID (hex)
+	DeviceId string `json:"device_id"`
+
+	// Id Auto-generated unique identifier (CUID2 format)
+	Id string `json:"id"`
+
+	// IommuGroup IOMMU group number
+	IommuGroup int `json:"iommu_group"`
+
+	// Name Device name (user-provided or auto-generated from PCI address)
+	Name *string `json:"name,omitempty"`
+
+	// PciAddress PCI address
+	PciAddress string `json:"pci_address"`
+
+	// Type Type of PCI device
+	Type DeviceType `json:"type"`
+
+	// VendorId PCI vendor ID (hex)
+	VendorId string `json:"vendor_id"`
+}
+
+// DeviceType Type of PCI device
+type DeviceType string
+
 // Error defines model for Error.
 type Error struct {
 	// Code Application-specific error code (machine-readable)
@@ -415,6 +495,9 @@ type CreateVolumeMultipartBody struct {
 	SizeGb int `json:"size_gb"`
 }
 
+// CreateDeviceJSONRequestBody defines body for CreateDevice for application/json ContentType.
+type CreateDeviceJSONRequestBody = CreateDeviceRequest
+
 // CreateImageJSONRequestBody defines body for CreateImage for application/json ContentType.
 type CreateImageJSONRequestBody = CreateImageRequest
 
@@ -506,6 +589,23 @@ func WithRequestEditorFn(fn RequestEditorFn) ClientOption {
 
 // The interface specification for the client above.
 type ClientInterface interface {
+	// ListDevices request
+	ListDevices(ctx context.Context, reqEditors ...RequestEditorFn) (*http.Response, error)
+
+	// CreateDeviceWithBody request with any body
+	CreateDeviceWithBody(ctx context.Context, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*http.Response, error)
+
+	CreateDevice(ctx context.Context, body CreateDeviceJSONRequestBody, reqEditors ...RequestEditorFn) (*http.Response, error)
+
+	// ListAvailableDevices request
+	ListAvailableDevices(ctx context.Context, reqEditors ...RequestEditorFn) (*http.Response, error)
+
+	// DeleteDevice request
+	DeleteDevice(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*http.Response, error)
+
+	// GetDevice request
+	GetDevice(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*http.Response, error)
+
 	// GetHealth request
 	GetHealth(ctx context.Context, reqEditors ...RequestEditorFn) (*http.Response, error)
 
@@ -589,6 +689,78 @@ type ClientInterface interface {
 	GetVolume(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*http.Response, error)
 }
 
+func (c *Client) ListDevices(ctx context.Context, reqEditors ...RequestEditorFn) (*http.Response, error) {
+	req, err := NewListDevicesRequest(c.Server)
+	if err != nil {
+		return nil, err
+	}
+	req = req.WithContext(ctx)
+	if err := c.applyEditors(ctx, req, reqEditors); err != nil {
+		return nil, err
+	}
+	return c.Client.Do(req)
+}
+
+func (c *Client) CreateDeviceWithBody(ctx context.Context, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*http.Response, error) {
+	req, err := NewCreateDeviceRequestWithBody(c.Server, contentType, body)
+	if err != nil {
+		return nil, err
+	}
+	req = req.WithContext(ctx)
+	if err := c.applyEditors(ctx, req, reqEditors); err != nil {
+		return nil, err
+	}
+	return c.Client.Do(req)
+}
+
+func (c *Client) CreateDevice(ctx context.Context, body CreateDeviceJSONRequestBody, reqEditors ...RequestEditorFn) (*http.Response, error) {
+	req, err := NewCreateDeviceRequest(c.Server, body)
+	if err != nil {
+		return nil, err
+	}
+	req = req.WithContext(ctx)
+	if err := c.applyEditors(ctx, req, reqEditors); err != nil {
+		return nil, err
+	}
+	return c.Client.Do(req)
+}
+
+func (c *Client) ListAvailableDevices(ctx context.Context, reqEditors ...RequestEditorFn) (*http.Response, error) {
+	req, err := NewListAvailableDevicesRequest(c.Server)
+	if err != nil {
+		return nil, err
+	}
+	req = req.WithContext(ctx)
+	if err := c.applyEditors(ctx, req, reqEditors); err != nil {
+		return nil, err
+	}
+	return c.Client.Do(req)
+}
+
+func (c *Client) DeleteDevice(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*http.Response, error) {
+	req, err := NewDeleteDeviceRequest(c.Server, id)
+	if err != nil {
+		return nil, err
+	}
+	req = req.WithContext(ctx)
+	if err := c.applyEditors(ctx, req, reqEditors); err != nil {
+		return nil, err
+	}
+	return c.Client.Do(req)
+}
+
+func (c *Client) GetDevice(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*http.Response, error) {
+	req, err := NewGetDeviceRequest(c.Server, id)
+	if err != nil {
+		return nil, err
+	}
+	req = req.WithContext(ctx)
+	if err := c.applyEditors(ctx, req, reqEditors); err != nil {
+		return nil, err
+	}
+	return c.Client.Do(req)
+}
+
 func (c *Client) GetHealth(ctx context.Context, reqEditors ...RequestEditorFn) (*http.Response, error) {
 	req, err := NewGetHealthRequest(c.Server)
 	if err != nil {
@@ -937,6 +1109,168 @@ func (c *Client) GetVolume(ctx context.Context, id string, reqEditors ...Request
 	return c.Client.Do(req)
 }
 
+// NewListDevicesRequest generates requests for ListDevices
+func NewListDevicesRequest(server string) (*http.Request, error) {
+	var err error
+
+	serverURL, err := url.Parse(server)
+	if err != nil {
+		return nil, err
+	}
+
+	operationPath := fmt.Sprintf("/devices")
+	if operationPath[0] == '/' {
+		operationPath = "." + operationPath
+	}
+
+	queryURL, err := serverURL.Parse(operationPath)
+	if err != nil {
+		return nil, err
+	}
+
+	req, err := http.NewRequest("GET", queryURL.String(), nil)
+	if err != nil {
+		return nil, err
+	}
+
+	return req, nil
+}
+
+// NewCreateDeviceRequest calls the generic CreateDevice builder with application/json body
+func NewCreateDeviceRequest(server string, body CreateDeviceJSONRequestBody) (*http.Request, error) {
+	var bodyReader io.Reader
+	buf, err := json.Marshal(body)
+	if err != nil {
+		return nil, err
+	}
+	bodyReader = bytes.NewReader(buf)
+	return NewCreateDeviceRequestWithBody(server, "application/json", bodyReader)
+}
+
+// NewCreateDeviceRequestWithBody generates requests for CreateDevice with any type of body
+func NewCreateDeviceRequestWithBody(server string, contentType string, body io.Reader) (*http.Request, error) {
+	var err error
+
+	serverURL, err := url.Parse(server)
+	if err != nil {
+		return nil, err
+	}
+
+	operationPath := fmt.Sprintf("/devices")
+	if operationPath[0] == '/' {
+		operationPath = "." + operationPath
+	}
+
+	queryURL, err := serverURL.Parse(operationPath)
+	if err != nil {
+		return nil, err
+	}
+
+	req, err := http.NewRequest("POST", queryURL.String(), body)
+	if err != nil {
+		return nil, err
+	}
+
+	req.Header.Add("Content-Type", contentType)
+
+	return req, nil
+}
+
+// NewListAvailableDevicesRequest generates requests for ListAvailableDevices
+func NewListAvailableDevicesRequest(server string) (*http.Request, error) {
+	var err error
+
+	serverURL, err := url.Parse(server)
+	if err != nil {
+		return nil, err
+	}
+
+	operationPath := fmt.Sprintf("/devices/available")
+	if operationPath[0] == '/' {
+		operationPath = "." + operationPath
+	}
+
+	queryURL, err := serverURL.Parse(operationPath)
+	if err != nil {
+		return nil, err
+	}
+
+	req, err := http.NewRequest("GET", queryURL.String(), nil)
+	if err != nil {
+		return nil, err
+	}
+
+	return req, nil
+}
+
+// NewDeleteDeviceRequest generates requests for DeleteDevice
+func NewDeleteDeviceRequest(server string, id string) (*http.Request, error) {
+	var err error
+
+	var pathParam0 string
+
+	pathParam0, err = runtime.StyleParamWithLocation("simple", false, "id", runtime.ParamLocationPath, id)
+	if err != nil {
+		return nil, err
+	}
+
+	serverURL, err := url.Parse(server)
+	if err != nil {
+		return nil, err
+	}
+
+	operationPath := fmt.Sprintf("/devices/%s", pathParam0)
+	if operationPath[0] == '/' {
+		operationPath = "." + operationPath
+	}
+
+	queryURL, err := serverURL.Parse(operationPath)
+	if err != nil {
+		return nil, err
+	}
+
+	req, err := http.NewRequest("DELETE", queryURL.String(), nil)
+	if err != nil {
+		return nil, err
+	}
+
+	return req, nil
+}
+
+// NewGetDeviceRequest generates requests for GetDevice
+func NewGetDeviceRequest(server string, id string) (*http.Request, error) {
+	var err error
+
+	var pathParam0 string
+
+	pathParam0, err = runtime.StyleParamWithLocation("simple", false, "id", runtime.ParamLocationPath, id)
+	if err != nil {
+		return nil, err
+	}
+
+	serverURL, err := url.Parse(server)
+	if err != nil {
+		return nil, err
+	}
+
+	operationPath := fmt.Sprintf("/devices/%s", pathParam0)
+	if operationPath[0] == '/' {
+		operationPath = "." + operationPath
+	}
+
+	queryURL, err := serverURL.Parse(operationPath)
+	if err != nil {
+		return nil, err
+	}
+
+	req, err := http.NewRequest("GET", queryURL.String(), nil)
+	if err != nil {
+		return nil, err
+	}
+
+	return req, nil
+}
+
 // NewGetHealthRequest generates requests for GetHealth
 func NewGetHealthRequest(server string) (*http.Request, error) {
 	var err error
@@ -1866,6 +2200,23 @@ func WithBaseURL(baseURL string) ClientOption {
 
 // ClientWithResponsesInterface is the interface specification for the client with responses above.
 type ClientWithResponsesInterface interface {
+	// ListDevicesWithResponse request
+	ListDevicesWithResponse(ctx context.Context, reqEditors ...RequestEditorFn) (*ListDevicesResponse, error)
+
+	// CreateDeviceWithBodyWithResponse request with any body
+	CreateDeviceWithBodyWithResponse(ctx context.Context, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*CreateDeviceResponse, error)
+
+	CreateDeviceWithResponse(ctx context.Context, body CreateDeviceJSONRequestBody, reqEditors ...RequestEditorFn) (*CreateDeviceResponse, error)
+
+	// ListAvailableDevicesWithResponse request
+	ListAvailableDevicesWithResponse(ctx context.Context, reqEditors ...RequestEditorFn) (*ListAvailableDevicesResponse, error)
+
+	// DeleteDeviceWithResponse request
+	DeleteDeviceWithResponse(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*DeleteDeviceResponse, error)
+
+	// GetDeviceWithResponse request
+	GetDeviceWithResponse(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*GetDeviceResponse, error)
+
 	// GetHealthWithResponse request
 	GetHealthWithResponse(ctx context.Context, reqEditors ...RequestEditorFn) (*GetHealthResponse, error)
 
@@ -1949,14 +2300,16 @@ type ClientWithResponsesInterface interface {
 	GetVolumeWithResponse(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*GetVolumeResponse, error)
 }
 
-type GetHealthResponse struct {
+type ListDevicesResponse struct {
 	Body         []byte
 	HTTPResponse *http.Response
-	JSON200      *Health
+	JSON200      *[]Device
+	JSON401      *Error
+	JSON500      *Error
 }
 
 // Status returns HTTPResponse.Status
-func (r GetHealthResponse) Status() string {
+func (r ListDevicesResponse) Status() string {
 	if r.HTTPResponse != nil {
 		return r.HTTPResponse.Status
 	}
@@ -1964,17 +2317,138 @@ func (r GetHealthResponse) Status() string {
 }
 
 // StatusCode returns HTTPResponse.StatusCode
-func (r GetHealthResponse) StatusCode() int {
+func (r ListDevicesResponse) StatusCode() int {
 	if r.HTTPResponse != nil {
 		return r.HTTPResponse.StatusCode
 	}
 	return 0
 }
 
-type ListImagesResponse struct {
+type CreateDeviceResponse struct {
 	Body         []byte
 	HTTPResponse *http.Response
-	JSON200      *[]Image
+	JSON201      *Device
+	JSON400      *Error
+	JSON401      *Error
+	JSON404      *Error
+	JSON409      *Error
+	JSON500      *Error
+}
+
+// Status returns HTTPResponse.Status
+func (r CreateDeviceResponse) Status() string {
+	if r.HTTPResponse != nil {
+		return r.HTTPResponse.Status
+	}
+	return http.StatusText(0)
+}
+
+// StatusCode returns HTTPResponse.StatusCode
+func (r CreateDeviceResponse) StatusCode() int {
+	if r.HTTPResponse != nil {
+		return r.HTTPResponse.StatusCode
+	}
+	return 0
+}
+
+type ListAvailableDevicesResponse struct {
+	Body         []byte
+	HTTPResponse *http.Response
+	JSON200      *[]AvailableDevice
+	JSON401      *Error
+	JSON500      *Error
+}
+
+// Status returns HTTPResponse.Status
+func (r ListAvailableDevicesResponse) Status() string {
+	if r.HTTPResponse != nil {
+		return r.HTTPResponse.Status
+	}
+	return http.StatusText(0)
+}
+
+// StatusCode returns HTTPResponse.StatusCode
+func (r ListAvailableDevicesResponse) StatusCode() int {
+	if r.HTTPResponse != nil {
+		return r.HTTPResponse.StatusCode
+	}
+	return 0
+}
+
+type DeleteDeviceResponse struct {
+	Body         []byte
+	HTTPResponse *http.Response
+	JSON404      *Error
+	JSON409      *Error
+	JSON500      *Error
+}
+
+// Status returns HTTPResponse.Status
+func (r DeleteDeviceResponse) Status() string {
+	if r.HTTPResponse != nil {
+		return r.HTTPResponse.Status
+	}
+	return http.StatusText(0)
+}
+
+// StatusCode returns HTTPResponse.StatusCode
+func (r DeleteDeviceResponse) StatusCode() int {
+	if r.HTTPResponse != nil {
+		return r.HTTPResponse.StatusCode
+	}
+	return 0
+}
+
+type GetDeviceResponse struct {
+	Body         []byte
+	HTTPResponse *http.Response
+	JSON200      *Device
+	JSON404      *Error
+	JSON500      *Error
+}
+
+// Status returns HTTPResponse.Status
+func (r GetDeviceResponse) Status() string {
+	if r.HTTPResponse != nil {
+		return r.HTTPResponse.Status
+	}
+	return http.StatusText(0)
+}
+
+// StatusCode returns HTTPResponse.StatusCode
+func (r GetDeviceResponse) StatusCode() int {
+	if r.HTTPResponse != nil {
+		return r.HTTPResponse.StatusCode
+	}
+	return 0
+}
+
+type GetHealthResponse struct {
+	Body         []byte
+	HTTPResponse *http.Response
+	JSON200      *Health
+}
+
+// Status returns HTTPResponse.Status
+func (r GetHealthResponse) Status() string {
+	if r.HTTPResponse != nil {
+		return r.HTTPResponse.Status
+	}
+	return http.StatusText(0)
+}
+
+// StatusCode returns HTTPResponse.StatusCode
+func (r GetHealthResponse) StatusCode() int {
+	if r.HTTPResponse != nil {
+		return r.HTTPResponse.StatusCode
+	}
+	return 0
+}
+
+type ListImagesResponse struct {
+	Body         []byte
+	HTTPResponse *http.Response
+	JSON200      *[]Image
 	JSON401      *Error
 	JSON500      *Error
 }
@@ -2533,6 +3007,59 @@ func (r GetVolumeResponse) StatusCode() int {
 	return 0
 }
 
+// ListDevicesWithResponse request returning *ListDevicesResponse
+func (c *ClientWithResponses) ListDevicesWithResponse(ctx context.Context, reqEditors ...RequestEditorFn) (*ListDevicesResponse, error) {
+	rsp, err := c.ListDevices(ctx, reqEditors...)
+	if err != nil {
+		return nil, err
+	}
+	return ParseListDevicesResponse(rsp)
+}
+
+// CreateDeviceWithBodyWithResponse request with arbitrary body returning *CreateDeviceResponse
+func (c *ClientWithResponses) CreateDeviceWithBodyWithResponse(ctx context.Context, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*CreateDeviceResponse, error) {
+	rsp, err := c.CreateDeviceWithBody(ctx, contentType, body, reqEditors...)
+	if err != nil {
+		return nil, err
+	}
+	return ParseCreateDeviceResponse(rsp)
+}
+
+func (c *ClientWithResponses) CreateDeviceWithResponse(ctx context.Context, body CreateDeviceJSONRequestBody, reqEditors ...RequestEditorFn) (*CreateDeviceResponse, error) {
+	rsp, err := c.CreateDevice(ctx, body, reqEditors...)
+	if err != nil {
+		return nil, err
+	}
+	return ParseCreateDeviceResponse(rsp)
+}
+
+// ListAvailableDevicesWithResponse request returning *ListAvailableDevicesResponse
+func (c *ClientWithResponses) ListAvailableDevicesWithResponse(ctx context.Context, reqEditors ...RequestEditorFn) (*ListAvailableDevicesResponse, error) {
+	rsp, err := c.ListAvailableDevices(ctx, reqEditors...)
+	if err != nil {
+		return nil, err
+	}
+	return ParseListAvailableDevicesResponse(rsp)
+}
+
+// DeleteDeviceWithResponse request returning *DeleteDeviceResponse
+func (c *ClientWithResponses) DeleteDeviceWithResponse(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*DeleteDeviceResponse, error) {
+	rsp, err := c.DeleteDevice(ctx, id, reqEditors...)
+	if err != nil {
+		return nil, err
+	}
+	return ParseDeleteDeviceResponse(rsp)
+}
+
+// GetDeviceWithResponse request returning *GetDeviceResponse
+func (c *ClientWithResponses) GetDeviceWithResponse(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*GetDeviceResponse, error) {
+	rsp, err := c.GetDevice(ctx, id, reqEditors...)
+	if err != nil {
+		return nil, err
+	}
+	return ParseGetDeviceResponse(rsp)
+}
+
 // GetHealthWithResponse request returning *GetHealthResponse
 func (c *ClientWithResponses) GetHealthWithResponse(ctx context.Context, reqEditors ...RequestEditorFn) (*GetHealthResponse, error) {
 	rsp, err := c.GetHealth(ctx, reqEditors...)
@@ -2789,6 +3316,227 @@ func (c *ClientWithResponses) GetVolumeWithResponse(ctx context.Context, id stri
 	return ParseGetVolumeResponse(rsp)
 }
 
+// ParseListDevicesResponse parses an HTTP response from a ListDevicesWithResponse call
+func ParseListDevicesResponse(rsp *http.Response) (*ListDevicesResponse, error) {
+	bodyBytes, err := io.ReadAll(rsp.Body)
+	defer func() { _ = rsp.Body.Close() }()
+	if err != nil {
+		return nil, err
+	}
+
+	response := &ListDevicesResponse{
+		Body:         bodyBytes,
+		HTTPResponse: rsp,
+	}
+
+	switch {
+	case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 200:
+		var dest []Device
+		if err := json.Unmarshal(bodyBytes, &dest); err != nil {
+			return nil, err
+		}
+		response.JSON200 = &dest
+
+	case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 401:
+		var dest Error
+		if err := json.Unmarshal(bodyBytes, &dest); err != nil {
+			return nil, err
+		}
+		response.JSON401 = &dest
+
+	case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 500:
+		var dest Error
+		if err := json.Unmarshal(bodyBytes, &dest); err != nil {
+			return nil, err
+		}
+		response.JSON500 = &dest
+
+	}
+
+	return response, nil
+}
+
+// ParseCreateDeviceResponse parses an HTTP response from a CreateDeviceWithResponse call
+func ParseCreateDeviceResponse(rsp *http.Response) (*CreateDeviceResponse, error) {
+	bodyBytes, err := io.ReadAll(rsp.Body)
+	defer func() { _ = rsp.Body.Close() }()
+	if err != nil {
+		return nil, err
+	}
+
+	response := &CreateDeviceResponse{
+		Body:         bodyBytes,
+		HTTPResponse: rsp,
+	}
+
+	switch {
+	case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 201:
+		var dest Device
+		if err := json.Unmarshal(bodyBytes, &dest); err != nil {
+			return nil, err
+		}
+		response.JSON201 = &dest
+
+	case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 400:
+		var dest Error
+		if err := json.Unmarshal(bodyBytes, &dest); err != nil {
+			return nil, err
+		}
+		response.JSON400 = &dest
+
+	case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 401:
+		var dest Error
+		if err := json.Unmarshal(bodyBytes, &dest); err != nil {
+			return nil, err
+		}
+		response.JSON401 = &dest
+
+	case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 404:
+		var dest Error
+		if err := json.Unmarshal(bodyBytes, &dest); err != nil {
+			return nil, err
+		}
+		response.JSON404 = &dest
+
+	case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 409:
+		var dest Error
+		if err := json.Unmarshal(bodyBytes, &dest); err != nil {
+			return nil, err
+		}
+		response.JSON409 = &dest
+
+	case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 500:
+		var dest Error
+		if err := json.Unmarshal(bodyBytes, &dest); err != nil {
+			return nil, err
+		}
+		response.JSON500 = &dest
+
+	}
+
+	return response, nil
+}
+
+// ParseListAvailableDevicesResponse parses an HTTP response from a ListAvailableDevicesWithResponse call
+func ParseListAvailableDevicesResponse(rsp *http.Response) (*ListAvailableDevicesResponse, error) {
+	bodyBytes, err := io.ReadAll(rsp.Body)
+	defer func() { _ = rsp.Body.Close() }()
+	if err != nil {
+		return nil, err
+	}
+
+	response := &ListAvailableDevicesResponse{
+		Body:         bodyBytes,
+		HTTPResponse: rsp,
+	}
+
+	switch {
+	case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 200:
+		var dest []AvailableDevice
+		if err := json.Unmarshal(bodyBytes, &dest); err != nil {
+			return nil, err
+		}
+		response.JSON200 = &dest
+
+	case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 401:
+		var dest Error
+		if err := json.Unmarshal(bodyBytes, &dest); err != nil {
+			return nil, err
+		}
+		response.JSON401 = &dest
+
+	case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 500:
+		var dest Error
+		if err := json.Unmarshal(bodyBytes, &dest); err != nil {
+			return nil, err
+		}
+		response.JSON500 = &dest
+
+	}
+
+	return response, nil
+}
+
+// ParseDeleteDeviceResponse parses an HTTP response from a DeleteDeviceWithResponse call
+func ParseDeleteDeviceResponse(rsp *http.Response) (*DeleteDeviceResponse, error) {
+	bodyBytes, err := io.ReadAll(rsp.Body)
+	defer func() { _ = rsp.Body.Close() }()
+	if err != nil {
+		return nil, err
+	}
+
+	response := &DeleteDeviceResponse{
+		Body:         bodyBytes,
+		HTTPResponse: rsp,
+	}
+
+	switch {
+	case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 404:
+		var dest Error
+		if err := json.Unmarshal(bodyBytes, &dest); err != nil {
+			return nil, err
+		}
+		response.JSON404 = &dest
+
+	case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 409:
+		var dest Error
+		if err := json.Unmarshal(bodyBytes, &dest); err != nil {
+			return nil, err
+		}
+		response.JSON409 = &dest
+
+	case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 500:
+		var dest Error
+		if err := json.Unmarshal(bodyBytes, &dest); err != nil {
+			return nil, err
+		}
+		response.JSON500 = &dest
+
+	}
+
+	return response, nil
+}
+
+// ParseGetDeviceResponse parses an HTTP response from a GetDeviceWithResponse call
+func ParseGetDeviceResponse(rsp *http.Response) (*GetDeviceResponse, error) {
+	bodyBytes, err := io.ReadAll(rsp.Body)
+	defer func() { _ = rsp.Body.Close() }()
+	if err != nil {
+		return nil, err
+	}
+
+	response := &GetDeviceResponse{
+		Body:         bodyBytes,
+		HTTPResponse: rsp,
+	}
+
+	switch {
+	case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 200:
+		var dest Device
+		if err := json.Unmarshal(bodyBytes, &dest); err != nil {
+			return nil, err
+		}
+		response.JSON200 = &dest
+
+	case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 404:
+		var dest Error
+		if err := json.Unmarshal(bodyBytes, &dest); err != nil {
+			return nil, err
+		}
+		response.JSON404 = &dest
+
+	case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 500:
+		var dest Error
+		if err := json.Unmarshal(bodyBytes, &dest); err != nil {
+			return nil, err
+		}
+		response.JSON500 = &dest
+
+	}
+
+	return response, nil
+}
+
 // ParseGetHealthResponse parses an HTTP response from a GetHealthWithResponse call
 func ParseGetHealthResponse(rsp *http.Response) (*GetHealthResponse, error) {
 	bodyBytes, err := io.ReadAll(rsp.Body)
@@ -3807,6 +4555,21 @@ func ParseGetVolumeResponse(rsp *http.Response) (*GetVolumeResponse, error) {
 
 // ServerInterface represents all server handlers.
 type ServerInterface interface {
+	// List registered devices
+	// (GET /devices)
+	ListDevices(w http.ResponseWriter, r *http.Request)
+	// Register a device for passthrough
+	// (POST /devices)
+	CreateDevice(w http.ResponseWriter, r *http.Request)
+	// Discover passthrough-capable devices on host
+	// (GET /devices/available)
+	ListAvailableDevices(w http.ResponseWriter, r *http.Request)
+	// Unregister device
+	// (DELETE /devices/{id})
+	DeleteDevice(w http.ResponseWriter, r *http.Request, id string)
+	// Get device details
+	// (GET /devices/{id})
+	GetDevice(w http.ResponseWriter, r *http.Request, id string)
 	// Health check
 	// (GET /health)
 	GetHealth(w http.ResponseWriter, r *http.Request)
@@ -3885,6 +4648,36 @@ type ServerInterface interface {
 
 type Unimplemented struct{}
 
+// List registered devices
+// (GET /devices)
+func (_ Unimplemented) ListDevices(w http.ResponseWriter, r *http.Request) {
+	w.WriteHeader(http.StatusNotImplemented)
+}
+
+// Register a device for passthrough
+// (POST /devices)
+func (_ Unimplemented) CreateDevice(w http.ResponseWriter, r *http.Request) {
+	w.WriteHeader(http.StatusNotImplemented)
+}
+
+// Discover passthrough-capable devices on host
+// (GET /devices/available)
+func (_ Unimplemented) ListAvailableDevices(w http.ResponseWriter, r *http.Request) {
+	w.WriteHeader(http.StatusNotImplemented)
+}
+
+// Unregister device
+// (DELETE /devices/{id})
+func (_ Unimplemented) DeleteDevice(w http.ResponseWriter, r *http.Request, id string) {
+	w.WriteHeader(http.StatusNotImplemented)
+}
+
+// Get device details
+// (GET /devices/{id})
+func (_ Unimplemented) GetDevice(w http.ResponseWriter, r *http.Request, id string) {
+	w.WriteHeader(http.StatusNotImplemented)
+}
+
 // Health check
 // (GET /health)
 func (_ Unimplemented) GetHealth(w http.ResponseWriter, r *http.Request) {
@@ -4038,6 +4831,128 @@ type ServerInterfaceWrapper struct {
 
 type MiddlewareFunc func(http.Handler) http.Handler
 
+// ListDevices operation middleware
+func (siw *ServerInterfaceWrapper) ListDevices(w http.ResponseWriter, r *http.Request) {
+
+	ctx := r.Context()
+
+	ctx = context.WithValue(ctx, BearerAuthScopes, []string{})
+
+	r = r.WithContext(ctx)
+
+	handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		siw.Handler.ListDevices(w, r)
+	}))
+
+	for _, middleware := range siw.HandlerMiddlewares {
+		handler = middleware(handler)
+	}
+
+	handler.ServeHTTP(w, r)
+}
+
+// CreateDevice operation middleware
+func (siw *ServerInterfaceWrapper) CreateDevice(w http.ResponseWriter, r *http.Request) {
+
+	ctx := r.Context()
+
+	ctx = context.WithValue(ctx, BearerAuthScopes, []string{})
+
+	r = r.WithContext(ctx)
+
+	handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		siw.Handler.CreateDevice(w, r)
+	}))
+
+	for _, middleware := range siw.HandlerMiddlewares {
+		handler = middleware(handler)
+	}
+
+	handler.ServeHTTP(w, r)
+}
+
+// ListAvailableDevices operation middleware
+func (siw *ServerInterfaceWrapper) ListAvailableDevices(w http.ResponseWriter, r *http.Request) {
+
+	ctx := r.Context()
+
+	ctx = context.WithValue(ctx, BearerAuthScopes, []string{})
+
+	r = r.WithContext(ctx)
+
+	handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		siw.Handler.ListAvailableDevices(w, r)
+	}))
+
+	for _, middleware := range siw.HandlerMiddlewares {
+		handler = middleware(handler)
+	}
+
+	handler.ServeHTTP(w, r)
+}
+
+// DeleteDevice operation middleware
+func (siw *ServerInterfaceWrapper) DeleteDevice(w http.ResponseWriter, r *http.Request) {
+
+	var err error
+
+	// ------------- Path parameter "id" -------------
+	var id string
+
+	err = runtime.BindStyledParameterWithOptions("simple", "id", chi.URLParam(r, "id"), &id, runtime.BindStyledParameterOptions{ParamLocation: runtime.ParamLocationPath, Explode: false, Required: true})
+	if err != nil {
+		siw.ErrorHandlerFunc(w, r, &InvalidParamFormatError{ParamName: "id", Err: err})
+		return
+	}
+
+	ctx := r.Context()
+
+	ctx = context.WithValue(ctx, BearerAuthScopes, []string{})
+
+	r = r.WithContext(ctx)
+
+	handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		siw.Handler.DeleteDevice(w, r, id)
+	}))
+
+	for _, middleware := range siw.HandlerMiddlewares {
+		handler = middleware(handler)
+	}
+
+	handler.ServeHTTP(w, r)
+}
+
+// GetDevice operation middleware
+func (siw *ServerInterfaceWrapper) GetDevice(w http.ResponseWriter, r *http.Request) {
+
+	var err error
+
+	// ------------- Path parameter "id" -------------
+	var id string
+
+	err = runtime.BindStyledParameterWithOptions("simple", "id", chi.URLParam(r, "id"), &id, runtime.BindStyledParameterOptions{ParamLocation: runtime.ParamLocationPath, Explode: false, Required: true})
+	if err != nil {
+		siw.ErrorHandlerFunc(w, r, &InvalidParamFormatError{ParamName: "id", Err: err})
+		return
+	}
+
+	ctx := r.Context()
+
+	ctx = context.WithValue(ctx, BearerAuthScopes, []string{})
+
+	r = r.WithContext(ctx)
+
+	handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		siw.Handler.GetDevice(w, r, id)
+	}))
+
+	for _, middleware := range siw.HandlerMiddlewares {
+		handler = middleware(handler)
+	}
+
+	handler.ServeHTTP(w, r)
+}
+
 // GetHealth operation middleware
 func (siw *ServerInterfaceWrapper) GetHealth(w http.ResponseWriter, r *http.Request) {
 
@@ -4835,6 +5750,21 @@ func HandlerWithOptions(si ServerInterface, options ChiServerOptions) http.Handl
 		ErrorHandlerFunc:   options.ErrorHandlerFunc,
 	}
 
+	r.Group(func(r chi.Router) {
+		r.Get(options.BaseURL+"/devices", wrapper.ListDevices)
+	})
+	r.Group(func(r chi.Router) {
+		r.Post(options.BaseURL+"/devices", wrapper.CreateDevice)
+	})
+	r.Group(func(r chi.Router) {
+		r.Get(options.BaseURL+"/devices/available", wrapper.ListAvailableDevices)
+	})
+	r.Group(func(r chi.Router) {
+		r.Delete(options.BaseURL+"/devices/{id}", wrapper.DeleteDevice)
+	})
+	r.Group(func(r chi.Router) {
+		r.Get(options.BaseURL+"/devices/{id}", wrapper.GetDevice)
+	})
 	r.Group(func(r chi.Router) {
 		r.Get(options.BaseURL+"/health", wrapper.GetHealth)
 	})
@@ -4911,6 +5841,214 @@ func HandlerWithOptions(si ServerInterface, options ChiServerOptions) http.Handl
 	return r
 }
 
+type ListDevicesRequestObject struct {
+}
+
+type ListDevicesResponseObject interface {
+	VisitListDevicesResponse(w http.ResponseWriter) error
+}
+
+type ListDevices200JSONResponse []Device
+
+func (response ListDevices200JSONResponse) VisitListDevicesResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(200)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
+type ListDevices401JSONResponse Error
+
+func (response ListDevices401JSONResponse) VisitListDevicesResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(401)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
+type ListDevices500JSONResponse Error
+
+func (response ListDevices500JSONResponse) VisitListDevicesResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(500)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
+type CreateDeviceRequestObject struct {
+	Body *CreateDeviceJSONRequestBody
+}
+
+type CreateDeviceResponseObject interface {
+	VisitCreateDeviceResponse(w http.ResponseWriter) error
+}
+
+type CreateDevice201JSONResponse Device
+
+func (response CreateDevice201JSONResponse) VisitCreateDeviceResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(201)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
+type CreateDevice400JSONResponse Error
+
+func (response CreateDevice400JSONResponse) VisitCreateDeviceResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(400)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
+type CreateDevice401JSONResponse Error
+
+func (response CreateDevice401JSONResponse) VisitCreateDeviceResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(401)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
+type CreateDevice404JSONResponse Error
+
+func (response CreateDevice404JSONResponse) VisitCreateDeviceResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(404)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
+type CreateDevice409JSONResponse Error
+
+func (response CreateDevice409JSONResponse) VisitCreateDeviceResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(409)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
+type CreateDevice500JSONResponse Error
+
+func (response CreateDevice500JSONResponse) VisitCreateDeviceResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(500)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
+type ListAvailableDevicesRequestObject struct {
+}
+
+type ListAvailableDevicesResponseObject interface {
+	VisitListAvailableDevicesResponse(w http.ResponseWriter) error
+}
+
+type ListAvailableDevices200JSONResponse []AvailableDevice
+
+func (response ListAvailableDevices200JSONResponse) VisitListAvailableDevicesResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(200)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
+type ListAvailableDevices401JSONResponse Error
+
+func (response ListAvailableDevices401JSONResponse) VisitListAvailableDevicesResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(401)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
+type ListAvailableDevices500JSONResponse Error
+
+func (response ListAvailableDevices500JSONResponse) VisitListAvailableDevicesResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(500)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
+type DeleteDeviceRequestObject struct {
+	Id string `json:"id"`
+}
+
+type DeleteDeviceResponseObject interface {
+	VisitDeleteDeviceResponse(w http.ResponseWriter) error
+}
+
+type DeleteDevice204Response struct {
+}
+
+func (response DeleteDevice204Response) VisitDeleteDeviceResponse(w http.ResponseWriter) error {
+	w.WriteHeader(204)
+	return nil
+}
+
+type DeleteDevice404JSONResponse Error
+
+func (response DeleteDevice404JSONResponse) VisitDeleteDeviceResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(404)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
+type DeleteDevice409JSONResponse Error
+
+func (response DeleteDevice409JSONResponse) VisitDeleteDeviceResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(409)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
+type DeleteDevice500JSONResponse Error
+
+func (response DeleteDevice500JSONResponse) VisitDeleteDeviceResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(500)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
+type GetDeviceRequestObject struct {
+	Id string `json:"id"`
+}
+
+type GetDeviceResponseObject interface {
+	VisitGetDeviceResponse(w http.ResponseWriter) error
+}
+
+type GetDevice200JSONResponse Device
+
+func (response GetDevice200JSONResponse) VisitGetDeviceResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(200)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
+type GetDevice404JSONResponse Error
+
+func (response GetDevice404JSONResponse) VisitGetDeviceResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(404)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
+type GetDevice500JSONResponse Error
+
+func (response GetDevice500JSONResponse) VisitGetDeviceResponse(w http.ResponseWriter) error {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(500)
+
+	return json.NewEncoder(w).Encode(response)
+}
+
 type GetHealthRequestObject struct {
 }
 
@@ -5876,6 +7014,21 @@ func (response GetVolume500JSONResponse) VisitGetVolumeResponse(w http.ResponseW
 
 // StrictServerInterface represents all server handlers.
 type StrictServerInterface interface {
+	// List registered devices
+	// (GET /devices)
+	ListDevices(ctx context.Context, request ListDevicesRequestObject) (ListDevicesResponseObject, error)
+	// Register a device for passthrough
+	// (POST /devices)
+	CreateDevice(ctx context.Context, request CreateDeviceRequestObject) (CreateDeviceResponseObject, error)
+	// Discover passthrough-capable devices on host
+	// (GET /devices/available)
+	ListAvailableDevices(ctx context.Context, request ListAvailableDevicesRequestObject) (ListAvailableDevicesResponseObject, error)
+	// Unregister device
+	// (DELETE /devices/{id})
+	DeleteDevice(ctx context.Context, request DeleteDeviceRequestObject) (DeleteDeviceResponseObject, error)
+	// Get device details
+	// (GET /devices/{id})
+	GetDevice(ctx context.Context, request GetDeviceRequestObject) (GetDeviceResponseObject, error)
 	// Health check
 	// (GET /health)
 	GetHealth(ctx context.Context, request GetHealthRequestObject) (GetHealthResponseObject, error)
@@ -5979,6 +7132,137 @@ type strictHandler struct {
 	options     StrictHTTPServerOptions
 }
 
+// ListDevices operation middleware
+func (sh *strictHandler) ListDevices(w http.ResponseWriter, r *http.Request) {
+	var request ListDevicesRequestObject
+
+	handler := func(ctx context.Context, w http.ResponseWriter, r *http.Request, request interface{}) (interface{}, error) {
+		return sh.ssi.ListDevices(ctx, request.(ListDevicesRequestObject))
+	}
+	for _, middleware := range sh.middlewares {
+		handler = middleware(handler, "ListDevices")
+	}
+
+	response, err := handler(r.Context(), w, r, request)
+
+	if err != nil {
+		sh.options.ResponseErrorHandlerFunc(w, r, err)
+	} else if validResponse, ok := response.(ListDevicesResponseObject); ok {
+		if err := validResponse.VisitListDevicesResponse(w); err != nil {
+			sh.options.ResponseErrorHandlerFunc(w, r, err)
+		}
+	} else if response != nil {
+		sh.options.ResponseErrorHandlerFunc(w, r, fmt.Errorf("unexpected response type: %T", response))
+	}
+}
+
+// CreateDevice operation middleware
+func (sh *strictHandler) CreateDevice(w http.ResponseWriter, r *http.Request) {
+	var request CreateDeviceRequestObject
+
+	var body CreateDeviceJSONRequestBody
+	if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
+		sh.options.RequestErrorHandlerFunc(w, r, fmt.Errorf("can't decode JSON body: %w", err))
+		return
+	}
+	request.Body = &body
+
+	handler := func(ctx context.Context, w http.ResponseWriter, r *http.Request, request interface{}) (interface{}, error) {
+		return sh.ssi.CreateDevice(ctx, request.(CreateDeviceRequestObject))
+	}
+	for _, middleware := range sh.middlewares {
+		handler = middleware(handler, "CreateDevice")
+	}
+
+	response, err := handler(r.Context(), w, r, request)
+
+	if err != nil {
+		sh.options.ResponseErrorHandlerFunc(w, r, err)
+	} else if validResponse, ok := response.(CreateDeviceResponseObject); ok {
+		if err := validResponse.VisitCreateDeviceResponse(w); err != nil {
+			sh.options.ResponseErrorHandlerFunc(w, r, err)
+		}
+	} else if response != nil {
+		sh.options.ResponseErrorHandlerFunc(w, r, fmt.Errorf("unexpected response type: %T", response))
+	}
+}
+
+// ListAvailableDevices operation middleware
+func (sh *strictHandler) ListAvailableDevices(w http.ResponseWriter, r *http.Request) {
+	var request ListAvailableDevicesRequestObject
+
+	handler := func(ctx context.Context, w http.ResponseWriter, r *http.Request, request interface{}) (interface{}, error) {
+		return sh.ssi.ListAvailableDevices(ctx, request.(ListAvailableDevicesRequestObject))
+	}
+	for _, middleware := range sh.middlewares {
+		handler = middleware(handler, "ListAvailableDevices")
+	}
+
+	response, err := handler(r.Context(), w, r, request)
+
+	if err != nil {
+		sh.options.ResponseErrorHandlerFunc(w, r, err)
+	} else if validResponse, ok := response.(ListAvailableDevicesResponseObject); ok {
+		if err := validResponse.VisitListAvailableDevicesResponse(w); err != nil {
+			sh.options.ResponseErrorHandlerFunc(w, r, err)
+		}
+	} else if response != nil {
+		sh.options.ResponseErrorHandlerFunc(w, r, fmt.Errorf("unexpected response type: %T", response))
+	}
+}
+
+// DeleteDevice operation middleware
+func (sh *strictHandler) DeleteDevice(w http.ResponseWriter, r *http.Request, id string) {
+	var request DeleteDeviceRequestObject
+
+	request.Id = id
+
+	handler := func(ctx context.Context, w http.ResponseWriter, r *http.Request, request interface{}) (interface{}, error) {
+		return sh.ssi.DeleteDevice(ctx, request.(DeleteDeviceRequestObject))
+	}
+	for _, middleware := range sh.middlewares {
+		handler = middleware(handler, "DeleteDevice")
+	}
+
+	response, err := handler(r.Context(), w, r, request)
+
+	if err != nil {
+		sh.options.ResponseErrorHandlerFunc(w, r, err)
+	} else if validResponse, ok := response.(DeleteDeviceResponseObject); ok {
+		if err := validResponse.VisitDeleteDeviceResponse(w); err != nil {
+			sh.options.ResponseErrorHandlerFunc(w, r, err)
+		}
+	} else if response != nil {
+		sh.options.ResponseErrorHandlerFunc(w, r, fmt.Errorf("unexpected response type: %T", response))
+	}
+}
+
+// GetDevice operation middleware
+func (sh *strictHandler) GetDevice(w http.ResponseWriter, r *http.Request, id string) {
+	var request GetDeviceRequestObject
+
+	request.Id = id
+
+	handler := func(ctx context.Context, w http.ResponseWriter, r *http.Request, request interface{}) (interface{}, error) {
+		return sh.ssi.GetDevice(ctx, request.(GetDeviceRequestObject))
+	}
+	for _, middleware := range sh.middlewares {
+		handler = middleware(handler, "GetDevice")
+	}
+
+	response, err := handler(r.Context(), w, r, request)
+
+	if err != nil {
+		sh.options.ResponseErrorHandlerFunc(w, r, err)
+	} else if validResponse, ok := response.(GetDeviceResponseObject); ok {
+		if err := validResponse.VisitGetDeviceResponse(w); err != nil {
+			sh.options.ResponseErrorHandlerFunc(w, r, err)
+		}
+	} else if response != nil {
+		sh.options.ResponseErrorHandlerFunc(w, r, fmt.Errorf("unexpected response type: %T", response))
+	}
+}
+
 // GetHealth operation middleware
 func (sh *strictHandler) GetHealth(w http.ResponseWriter, r *http.Request) {
 	var request GetHealthRequestObject
@@ -6637,90 +7921,104 @@ func (sh *strictHandler) GetVolume(w http.ResponseWriter, r *http.Request, id st
 // Base64 encoded, gzipped, json marshaled Swagger object
 var swaggerSpec = []string{
 
-	"H4sIAAAAAAAC/+xdC3PTyJb+K6e0d2qdXfmRBLjgW1tbmQSYTBFIEcjsXcyGtnRs99DqFt0tJ4bKf9/q",
-	"h2TJkh8ZEkMuVFFFbPXrvL8+fVr+EkQiSQVHrlXQ/xKoaIIJsX8eaE2iyblgWYKv8VOGSpuvUylSlJqi",
-	"bZSIjOuLlOiJ+RSjiiRNNRU86AenRE/gcoISYWpHATURGYthiGD7YRyEAV6RJGUY9INuwnU3JpoEYaBn",
-	"qflKaUn5OLgOA4kkFpzN3DQjkjEd9EeEKQwXpj0xQwNRYLq0bZ9ivKEQDAkPru2InzIqMQ7678pkvC8a",
-	"i+GfGGkz+aFEovE4IePlnOAkwToPXh0eAzX9QOIIJfIIoYWdcSeEWEQfUXao6DI6lETOunxM+VWfEY1K",
-	"71RYs7ptnV8L5Nm1rSCMjyUqdUPSfssSwtuGyWTIEEwjaDFxiTIiCoGh1ihVCDEdU61CIDyGmKgJKjBC",
-	"+QdEhHOhQWkiNQgJyGO4pHoCxLarciCZtUlK29QtNQiDhFy9QD42ivdoPwxSYqYz6/q/d6T9udd+8r7l",
-	"/2i//4/8q53//lujcmXMUVql8LXINOVjsI9hJCToCVUwXwPVmNh+f5M4CvrBv3Xn1tT1ptTNuZsxNHMl",
-	"lB+7brvFSoiUZNYstXxxq6SnNOHRcs1EPjX/kTimhjDCTiuPa9yoMuEpn1IpeIJcw5RIaoStyqL5Erx8",
-	"dfT04unL86BvZo6zyHYNg9NXr98E/WC/1+uZcWvrnwidsmx8oehnrNh1sP/812BxIQfF+iHBRMiZlYgf",
-	"A1qTqjqOhEyIBkY/IgzMeIMghEGw+3wQVBVrz05VY4I12o3seY2hEpZSjkstNfxerOtSyI9MkLi9e8vG",
-	"xVGbseskvnQPIBJ8RMeZJOZ7b2YI1Kt1ENbU2XAkriiMllktDvwxQT1BCVoAsaGsGNJ8Zabw3SFfYYkj",
-	"bsCGqFFTYjFFycisQYl3ew1a/Iek2krU94OYqo9gOq9RYTOa0+GHvboS95q1uGFRDWv61WiUt6lNVlIs",
-	"ZHfvxP+5t6ldTaM0U5Ul7S0u52WWDFGCGMGUSp0RBoenbysuZ68YmHKNY5R2ZIsxGty4gzCqpAhe/oU+",
-	"EA2R8aVG/zS1Xncj1+5GtoCj5OBWenPnV5Z78zV4i8YNPin1bjHKlBYJ0Bi5piOKElok06I9Ro6SaIyB",
-	"jsA4hVSKKY0xrkpsKljbwC/rATZ0U2654ImrOBQ7lBPKMtW8GA/rQ54ZDaQcxnRMhjNdDTa7vbromxmd",
-	"j9/E6qdSCllnbiTiBhIP0pTRyCpHW6UY0RGNAM0IYDpAKyHRhHIszKXK1SGJL6QXZ9gUbDWhrEFrS+HO",
-	"TeZbQst4yCRjmqYM3TO1s6nGWsqP7Eh1jQ0DyjnKC8zZc4ORElSqMWIuBLKclqKJdfgxDrPx2LCkzLoT",
-	"qpTFX166MKLI4r4LwGtBr5XmfGFL9cDTsKE2vDAhuM1wiqysBM6izGITIREKPXFCq1BF+ZQwGl9QnmaN",
-	"KrGUlc8yaSOaGxTIUGTaOjInsPIkdq9ibX0kMh43MqvGjt+QMLeRq3JCaaIzH3uzxPBWfDT8nE8nPq4V",
-	"hx+kSQzHOdZaEEDS4OwOT45gJEViUIMmlKOEBDXx28ZiRe8Cu0EKwqBtdCommAgOYjT6h1lBYSp1L5cx",
-	"ZvR0AQEUBmLDBMYXRDcsrRxClCZJCq3Xzw739/efLEbrvYft3m579+Gb3V6/Z/79bxAGLsoaEEk0tn0c",
-	"qjsMOvaRYWGzgkqwKcaQEE5HqDT4luWZ1YTsPXzUJ8Nod28/xtGDh486nU7TNMi1nKWC8oapnhbPNhNF",
-	"16Hi9nzMjpp8nRzuYE+zCS1fgtODN78F/aCbKdllIiKsq4aU90ufi4/zB/YP93FIeeNeqPC5Cyu1LsZ7",
-	"BBO+nRkBVTAilC1kUNKMMf9931DCMSoUUlhns4Sv68L8S6OajH7GGBozGpqMzR7DadzXpS7C4FOGGV6k",
-	"QlE3ey2v5J8YkDDMKIvB9oCWIS6HOParKsDZW0p+CUVa2OBgR23iowKqm5lNGz9nxjVlNt80q8z4cP/R",
-	"47/3nuzulYybcv3oQbDRUgq3uwDXLc3+aVj45BR57CKoUQP3VyT41FiF/WDXZ/yMU5yKA8+f1YRhNkaU",
-	"jy9i2qCdf7iHEFOJkbZb8vU2FHRJmq5XxWZUV/i0gvySR26MLT5ZU48u39yTN2H5gypezzj9lGEJ0Vdn",
-	"fzX+/dP/qNO//7n76cX5+T+nz38/ekn/ec5OX31VomF14u2bZs9WbrGo8YaVrNmm6nFCdNQAfCZC6SVc",
-	"80/MVjIxnTtwSDgMsT/gbXhBNUrC+jAISEo7npmdSCSDAFp4RSLteoHgYIaCCZIY5Y7pfOqSLabzl3yH",
-	"er04RjzjJKERSM/kIVEYm9FUNoxFQijfGfAB92NBToiCTDkZxxCRVGcSjUQgyiSbwVCSyPg0l5+eTx7C",
-	"F5Km1zsDridEA15paShIidTKbNMNBM1nsIL2q+rAGwNOXXOMYUpYhgoiy6gBL+JHbJZgBtFEjlF3in25",
-	"xfudAa8oZDNTmhQzFVJXUg2Pe2GDHMG0M4JkVGnkUCShqLLKC608UfS4VzH/x73H67ejhQ6tUD+r3fWz",
-	"lVwpN7APp8B2aueMLyZap+sPS6y/cTYCv715c2rYYP4/g3ygOS8KEbcEZzMgZl+MCi4nyEEzi0l8bm6n",
-	"4cQlDJx0NyTojWtsujG1no6ndmJ48+IMNMqEcue/W5Fh58hs3xFsLoQqlRlVpAQODk+e7nQ2OByyvC3W",
-	"v0KObwoKF7I2eS6z5kdcj3kmyvA3hOOj0MApb6FzoNUx/uGZkMCcg5nbdR/eKqwmtayohITjIx+f2Sw3",
-	"7YH36oNgJx8xXfQUfXhd4DtSLMXG94q9F0PO7dIOO+B/GMXI7A6+NnpYXauxtHz/4l0bYUbHNPjciQ3F",
-	"y13BavNv4Li1ecEXU803s+1yjtpM1qwac9nfOQLZvxkCuZuTofo5D1EXipNUTUQDqXmenkDeBvCKKl3B",
-	"DHUBeTdQdzP1U6Wqw3fnRSvS3ZudD/0F8Aatw7fHR3s+nV6dRn9+QJ48vroi+skjeqmefE6GcvznPrkn",
-	"Z1MrT5O+9kjIQ4zNToSaVKvsZ9wRAMZ/+RAoDGjaIHul6JhjDMenQOLYRIPyhjQfvir03Sd7nd1Hjzu7",
-	"vV5nt7fJ9jwh0Yq5Tw4ON5+8t+c2LH0y7EdxH0dfkR7wYnMBgbBLMlMwyEP2IHAYoQQOSkrpw/pGCcr6",
-	"WdtfO1pbkMLaw7ObHJZt5D3sqewS139mT2xv7vcfLvX7a6VqNtO4Hpg5IzqzjfNeFzdJXCFEImMx/3cN",
-	"Q2N5Dqph7BGlQu00xbWlCt7yj1xc8irpLn9h7PdThnIG5ycnlWyXxFGmbGZsA8JFmi6Vg0hvJIa9NeF3",
-	"7WpKZ6PbOA9d9IQl/3rrp5/lrXl+fOO0boMtelnvatTkj53S2O23O0mN+0YzwI8Ow0xDURBhVO6QiSyG",
-	"32YpyilVQoLZM0zRIuLXGeeUj80INmZE5gmbgXTfr+58Soz65X1T+2l1j7NJpmNxyW0fNck0mE92yYYE",
-	"D4dWD+E0uQ8vhe3jVxoa97+Aq1xzwuPhrN58EYO13G7dwHMtJMZ2Mm+WfXhWmGJhzN54Wwr9n85D+GM+",
-	"e4S542C8z1t6aQVh4LkehIFjYRAGOWfMn45C+5ddfBAGfiElvZlbk1PPOux2ap/kFY8Lx3tUaWNpUSal",
-	"wbWlxtDCJNWzPMGcW8/OzczloBiw6fz1trcEvSe3kZR8uzIL+S9SNlD2UPkka31TTaZLt/4XTYw9PlrE",
-	"tm6/48tlq2h14RBZ6bY7xms8Ql5RluvqY82zPO02zhbPCW9QittYcjXBkuW43f28FnfdFm7JJvvCSqhE",
-	"WWkly2XjwtNX1i1TlRcs/0WWeUS6Po/lnCGkKNuFSuRw1njQS0ntsZNnkGOsYcF/GUzRnHdbjZpPyFUx",
-	"g8WzRMFC+ZejY57psQVgOx14nddl0FE+hF1GpwqvmyHw5gXduVbVhbGqwjsHQI2G5/3PCo+2zLYWlHM+",
-	"R7i6iNy4LowySfXszAQEp4ZDJBLlQebU0EYKS4T9ej65zeVeX9sCnZGok/McOUoawcHpsdWShHAyNiI7",
-	"PwFGRxjNIoY+FVcDEbYo9NXhcdudIeQ7d5vJodoyxLROCDfjB2EwRancvL3OXseW9ooUOUlp0A/2O7sd",
-	"s5M1bLAkdidFVYnPjRo7tJHsOLZr177uxHBWpYIrx5u9Xs+V4XDtPSuZV2J1/1TuaNhF13Wx189gWbgQ",
-	"NgwbXGbALdRhV5UlCZEzQ7v9FqIJRh/to67Fr2opQQZCHLsmX0nRZgdkFk3X8XeN0hza+OVfh8GD3u6t",
-	"cdiV1DVM+5aTTE+EpJ8xNpM+vEWxLp30mGuUnDBQKKcofYFU2QiD/ruq+b17f/2+LHfLrjmvUqEaZF26",
-	"GBI4x4BK/yri2a2R2HD15LrqhIzHva5p2t6trcArWAOTba5xmFcjuF0VUTMe7Tjt2oKgfyVxfkLwzTT6",
-	"Qe/BFjR6oaDvHlnSacaYvZ/gq1HmJURlf9r9YsD3tQtuDN1Wv2ptR/b73NpSIklidp3KrmBBRq9ftJFH",
-	"IjboxLHOJx/MUw8f3d6kqBioWFRYYtwiBHhfs7YHDdjezupI+akmG6iJk26uGOFStPAV8ncQdn7z7pe9",
-	"Z/4o5Je9Z+4w5Jf9g/kFvLtRlt62XHNeW/5T+dYq33P0wX7ONOua3Ln+OrRXtNoK4PMFVDeBfMUCf6K+",
-	"TVBfmV0rgV9Ry3aH0K96OXcj8Hd7Ai6UrYnb9lGeYP/BIN+Tu5/0UPARo5GGdq6Rbq9uU4Q2nBFm63bz",
-	"XL29DuvLciiHTOF9Mj2f+qKFxpX9b/cLjTfBhoVBrkQHueoeH4W+4srVSaUSR/SqOfDbFM9tY0S/jq2j",
-	"RD9vJVRvRacPkiEdZyJT5bIYW2CHan6preKA7xt+nYfnpQj2O9bS3jZDx9YB6k+9vyPovChQ57zdAcY6",
-	"8Jy32g549jUON0LP+Qp/oueN0HOJXavRc1Fvcpfwufp2lK3j51zfmhjuD/h+RAR9z1Ap4T7HPa+Sqvq4",
-	"jQHqvIZ1dez3unF8BLbQZ1ny8m5gqZ98+7g0v7twH3NItmDQvocnR4LzWLMcCn5v+tDbru/bPgS8zyr2",
-	"vHxXqBlsWUfUZWJchl2Lla0SSTK/VgGmNRAFZ3Zh7TPkGp5ODVWdAc8vFn1QIpMRfoBCUUELUMgw0nA5",
-	"odHEjGO/s+PbUswPJE0/FBfodvrw3F7mKXHXTd5SKClhEAmuBHMljR+mSfKhXy9YOD85sZ1sm4krTfjQ",
-	"h7xIobAxZVoN+IC/Rp1JriwVjCgNL4FRjgpaRuBSMIYxDGfwwfCzRN+OvaVkRnS3Y9hswE0PyjNUnkrK",
-	"x8Dx0g9IR/BhJBgTl7YQ5oO7sLTU6l8YKX0jyw+Xlxk7WrQAaRnnLn2hfYuIndeWXc8n9m84mU9VlPPs",
-	"9hrr7r7UE12Wp40sJSNtb3BQbfRDZNq9NaVpIY7zzUtZWmtWf8HLGJymL6gySdNN1dcv02rxNElW6DC0",
-	"JvMvlY5Fpv9T6RiluwvstXuZckOLRO6DJh/dzdXKfSlXaNvEKkdhM6sCdz8/r891n6ZJEoSBX09Dve0G",
-	"kUTjle6icSttx9aqT10csL4fM5KxHaF1dvZ052fM2BCWWJZVnb1nYEPk8IXetmSycfP22jX44ZFLXhH/",
-	"jdVw+0cRpVVQe1eHx8OZle38qsF9MhCv0HPKbLzzdDXaSP5sqY34Gwo/vI3M9eMHt5JISPsyBZVfnrs/",
-	"xVulHUfJ3Fv2XtP8vlCY73rPT052lhmNu4+/1GTkz+2wr6P84WOKvep1/6zF3d0lBQGrkoVd02iVPYj0",
-	"pzn4O38/g8e9DB42I1pQ0xpLEuEoY/aKa2zvdTfZhb+43P3i/jhel1ef/0zFd5NL8feL1k2TE3gvjNLT",
-	"FKO7kbh9mxTFFbB7Wt9s3z3uSbB7jPIJQXMUKP8Iy4+j3bd/GNz0YzYbHQVv1bby277fjW1tO/L5NeR1",
-	"jWV+3Bczd5qWU6LFAgYsvZVjaUmMf0HHVgpivGu5QTlMTsHPyoENimFKzModfNOFbwXEHnm45h04y9JU",
-	"SK1AXwpIRIzKHkH8fvbqJQxFPOtD0Y+Dey2FVzj/PgH/Anqzh6Kf0fQ9sUVmZnsyEjIpDZD3TCW2U5Fm",
-	"zL4uxVYaex67YEVAE9kZfwYiowmdYsPRVvknLO60qmfRkYdBkpPXNeTZt0hUB118uX+xlqo8qjTCiDLM",
-	"32dM+djy1vMrH6L0Zo0h5UTONn2txuLvdkyLsHoff7bjhFzRJEuK92M//xVa/tWv9lce7G9X0FGhU3gV",
-	"IcbKHljtfN1PfISFOBvu3W+13Cv3pksj/Dcs9YKW/+UJMCI2ET9Xci0EMCLHuPPDXKjwtja/T3F8tHCb",
-	"4h4WqU1z7ZvjjA3L0jbbYGyI+++iJK3YfG63IO38+8HEpZcB3cNbEdMCZi6rhPu+VLC3vZCw7Qq483uc",
-	"Q3mOOaQuVb/ZAcyITQrzQkSEQYxTZCK1b6NybYMwyCTz79bpd91vt0yE0vbNz8H1++v/DwAA///8wcL2",
-	"G3cAAA==",
+	"H4sIAAAAAAAC/+x9C3MTO7L/V+ma/26t81+/kgAL3rp1KycBjk8RSBHIubsn3CDPtG0dZqRB0jgxVL77",
+	"LT3mafkRIIYsqaIKx6ORulu/bnW3WvLnIORJyhkyJYPB50CGU0yI+XigFAmnZzzOEnyNHzOUSn+dCp6i",
+	"UBRNo4RnTF2kRE31XxHKUNBUUc6CQXBC1BQupygQZqYXkFOexRGMEMx7GAXtAK9IksYYDIJewlQvIooE",
+	"7UDNU/2VVIKySXDdDgSSiLN4bocZkyxWwWBMYontxrDHumsgEvQrHfNO0d+I8xgJC65Njx8zKjAKBn9U",
+	"2XhXNOajPzFUevCDGaExGcV4hDMa4qIYwkwIZOoiEnSGYlEUh/Z5PIcRz1gEth20WBbHQMfAOMOdmjDY",
+	"jEZUS0I30UMHAyUy9EgmMjRd0MgzA4dDsI9heAStKV7VB9n7x+hxsLxLRhJc7PTXLCGso4Wrycr7N22r",
+	"fb944OuZ8iTJLiaCZ+liz8NXx8dvwTwEliUjFNUeH+8V/VGmcIJCd5iG9IJEkUAp/fznD6u09fv9/oDs",
+	"Dfr9bt9H5QxZxMVSkdrHfpHu9iNc0eVGInX9L4j05dnwaHgAh1ykXBDz7sJIDWBXxVPlqwqb+qz48H8o",
+	"kCgH/qWmwM/aK/OBxDCJ+YjE8RwyRj9mNdx0YahVQEEq+IxGGLWBmAdAJZBM8c4EGQqiMIKx4AmoKUJl",
+	"bqGF3Um3Deea3Y6e3A7Z6/T7nf55UJ+d+EFnkmZBO0iJUig0gf/7B+l8Ouj8u9958q78eNHtvPv7X3wT",
+	"uSnggI8NnY7PVj4rbciJraKwSehqhK6Y5OXTN0zI5MazdzgEqt8DgWMUyDQnlv6Ihx9QdCnvxXQkiJj3",
+	"2ISyq0FMFEpV52Z127X8GdpWMMYmmvUbstbQOQO3VswvUYREIsSoASLbENEJVbINRJttIqcoQa8p/4SQ",
+	"MI1ZqYhQwAUgi+CSqikQ064ugWTeISntUEtq0A4ScvUC2USvm4/2F/CowdhyHzrv/n/+1c5/eyEpshg9",
+	"YHzNM0XZBMxjGHMBakollDRQhYl57y8Cx8Eg+H+90hnoOU+gl0s3i1GPlVA2tK/tFpQQIcjcP2s5catm",
+	"TyrCVtgVq0Ae/o7ylU2Cs5YSFAdi/BbD7/OTtz2tkimRUk0FzybT6qz8kduDdxVZLEi3zmQ7QDbT7UgU",
+	"UWvaTmrkehbTKtFP2YwKzhJkCmZEUA2+2uL0OXj56ujpxdOXZ8FASyLKQmfpT169fhMMgv1+v1+hq5Tn",
+	"lKs0ziYXkn7CmpsU7D//JWgSclDQDwkmXMyNxFwf0JrW1WPMRUIUxPQDwrnu7zzQJmz3edNw7ZmhFtd9",
+	"bUQ2si9rDAeJU8pwqeVo/yjafsnFh5iTqLP7jZWdodJ9L7L40j6AkLMxnWTWQXBqj0Cdmum1r4ZXZFoi",
+	"UQ0w1tOsd//7FNUURUXD8i71V3alM69DTmFFIjXXteqEL4CYz1DEZO4B8W7fg+LfBVVmRt17EFH5AfTL",
+	"ayCse7MYfthfBHHfj2IPUR6aftGIcjq1CSUFIbt7x+7j3qZ6NQvTTNZI2muS89J40todmVGhMhLD4cnb",
+	"msnxOtY2ZPOYXRsRVk2tm/8CD0RBqG27xp+iZhXYaKmxPZv4bdHw+lcXa1eWry5rwlefh194rGEmFU+A",
+	"RsgUHVMdrzWcUVp3W+szNuNxR0ezxgJsaKYsuYuefzK3XdlJWQbNi8losctTjUDKYEInZDRX9cVmt784",
+	"9X5B5/37RL0sKrbwwOhCcU+wl6NleKTlmLfdJOI1MfSF4hezMfX0XFiq0vumEsJGCO5Aq7vopCF1IXkb",
+	"LqdU2zYJuRCMCT07rjoR3XPWAU3cAI6KAYpuiy71IqKV3i6tLS4qRFAGmUQYzXeAwNlxF94U1P5NAiOK",
+	"zjBPE0yJhBEig4zpJQUjM75JflQJyKT29qhqvu4cdptR2DG+EnfPuvDrPMWEMLikcWxirYQoGppAbUQb",
+	"/FxOkbmJ0iNpA8AKre+esyqyXGqmafLbgbEMGF0Q5fFYcUKlEqXlkIokKbRePzvc399/0jTSew87/d3O",
+	"7sM3u/1BX//7d9AOrHHVvgNR2HHmZxtJE19fB3V74ULfqkU5fDs82nMrQn0c9ekBefL46oqoJ4/opXzy",
+	"KRmJyZ/7ZCtpFb95OipjdmhlEkUnN30aVb5IvRIQL4nEvzjAvlFGx36xevmx3L3RLW8jB9SwqybxYpq0",
+	"vyBL0zSCNb1abqPfODHU+dHfav+gRL5mh2WJptPlS0Ja6baU61MhuPDkQ3nkGecgTWMaGu3uyBRDOqYh",
+	"oO4B9AvQSoxlwcJTqot1RKIL4VZyr0orQmMPZiqRjh3MtYSWNstJFiuaxmifGZRu5KwYzo9MT74okTKG",
+	"4gJz8dygpwSl9AZLjRgm56VoYlaZCEfZZKJFUhXdMZVmcSjXNIpxNLCx11qomtksCfPBq8rDhmh4oaOv",
+	"TowzjKsgsBZFE5twgVDgxE5ajSvKZiSm0QVlaeaFxFJRPsuEcRFsp0BGPFPGHbATVh3EpM2MmzfWGucV",
+	"1oI4fkUS2y2RuiSkIipzYZdVL/5By7Mcjn9YOx2uE980DPMwuzEBiceKHR4fWRsdcqYIZSggQUXcBkwl",
+	"SWJydUE76GhMRQQTzoCPx/9cnTZZ4sUVCrLKDzisRg+35wPQiQsKml6I5PEMI0gIo2OUClzL6shySvYe",
+	"PhqQUbi7tx/h+MHDR91u1zcMMiXmKafMM9TT4tlmU9GzCZFO2WdXTr9uHm4hnbUJL5+Dk4M3vwaDoJdJ",
+	"0Yt5SOKeHFE2qPxd/Fk+MB/snyPKvGmwwuY2KDUmxlkEHXFYNdKO85jQuLEXmWZx7L4faE4YhgUguTE2",
+	"a6MUvwv1UkMzpp8wAm9yXZGJ9qUs4r4ui94OPmaY4UXKJbWjLzgy7omORkYZjSMwb1T3JZX9qh7b7i1l",
+	"v+JCmojRRpyLjmSRpdEj6zZuzIwpGtugqTbiw/1Hj//Rf7K7V1FuytSjB8FGpBRmt5GpMTy7p6XLkyKL",
+	"7AqqYWA/hZzNtFaYPwx92s5Y4NQMeP5sYTIuufhA2eQioh50/m4fQkQFhspkY9frUNAjaboeiv6AvrBp",
+	"BftrPEi3EeBZXb67Jf+S0Ks++qvJbx//R57848/djy/Ozv41e/7b0Uv6r7P45NVX5ZhX7wF9142cldk1",
+	"E2/UNnA2hccxUaHH8ZlyqZZIzT0BxSHRL3fhkDAY4eCcdeAFVShIPIDzgKS064TZDXlyHkALr0io7FvA",
+	"GeiuYIokQrGjXz6xeXb98uc8TXHd7COaM5LQEIQT8ohIHc4ykNko4gmhbOecnTPXF+SMSJO+0Z8iCEmq",
+	"MoF6RiDMRDyHkSAhFvvS5eBt+EzS9HrnnKkpUYBXSmgOUiJUsWGcj2Am2lFl00OuOUYwI3GGEkIjqHNW",
+	"rB+RJkF3ooiYoOoWKVnj7zdSNEuE4o3JuVC1LPPjftszj6Db6YmMqVTIoNh/oNKAF1r5HsHjfk39H/cf",
+	"r89EFhhaAT+D7sUqpRyUG+iHBbAZ2hrji6lS6fqyI2NvrI7Ar2/enGgx6P9PIe+olEUxxS3O4jkQHRej",
+	"tPk1FRufxG3L7AS+HJqd3Q0ZemMb69diuZ6Pp2ZgePPiFBSKhDJrv1uhFudYh+9oMz1UykxDkRI4ODx+",
+	"utPdoMzKyLagf8U8vik4bCTs822sxSSGeaPchNDybcPwqK3dKaehpaNlMqjPuIDYGphSrwfwVmJ9P8NM",
+	"lU322JmM52XJibXq58FO3mPatBQDeF34d6QgpShkKcGQd1nqpen2nP2ugWHTuwu9t+u0msS1i1+caTPJ",
+	"XKLA5U7MUrzcFKxWf4/Ejc5z1txlvJluV7cn9WB+aJRzf+seyP7NPJDbKQpY3OIn8kIyksopV8s3Pgjk",
+	"bQCvqFQ1n2Fxgpam6hcLCuoG35YKrNjp3Kw04HvmzX+8soSVhQRfWw3gXIzNigF80KramXzL7ov3/9sB",
+	"9WxXHEhJJwwjGJ6UBX5lQJp330i5P9nr7j563N3t97u7/U3C84SEK8Y+PjjcfPD+ng1YBmQ0CKMBjr8i",
+	"PeCmzS4IJL4kcwnn+ZJ9HlgfoeIcVEDplvWNEpSLZRZfVlXR3PhYVzdxkzqJjayHKchZYvpPTbHOze3+",
+	"w6V2f+2s6mAa1ztmVolOTeP8rYubJK4QQp7FEfubgpHWPOuqYeQ8SonKIsW2pRLesg+MX7I66zZ/ofX3",
+	"Y4ZiDmfHx7Vsl8BxJjfbk5eKp+nSeeDpjaZhb83yu5aaSlnMNkphmpawYl+/eeFLNTTPt28s6jYI0au4",
+	"W16RYboz4bctookGGhngeodRpqCohdOQO4x5FplKAjGj0tRiKjpD4xG/zhijbKJ7MGtGqJ/EcxD2+9Uv",
+	"nxANv/zd1Py1+o3TaaYifsnMO3KaKdB/GZI1C84dWt2FRfIAXnLzjqO0rc1/w6+yzQmLRvPF5k0frGWj",
+	"de2eKy4wMoM5tRzAs0IVC2V2ytuS6D5aC+G2+cwW5o51413e0s1W0A6c1IN2YEUYtINcMvqj5dB8MsQH",
+	"7cAR4t3ktfBcVt+T5GeHGtt7VCqtaa7kBSqNoYVJquZ5gjnXnp2bqctB0aFv//VbhwT9J98iKfl2ZRby",
+	"P6RirGqh8kHW2qaFOV0a+ntLMoZHTd/Wxjvu4FndW21sIkvVsdt43i3kFQfc7Ekz/SxPu02y5j7hDQ61",
+	"LathKzXHRvflqbZ1IdySINuWk1Q4q1CyfG7s8vSVJwCpzI/+faHInEe6Po9ljSGkKDoFJHJ3VlvQS0HN",
+	"tpMTkBWsFsF/aZ/Cn3db7TUfk6tiBOPPEgmNyl/LR5npMbW/O114nddl0HHehSGjW3ev/S7w5kcjc1Qt",
+	"Tsaqs5K5A+RVPGd/Vli0ZbrVAGc5Rnv1cUxtujDMBFXzU70gWBiOkAgUB5mFoVkpDBPm63Jwk8u9vjYF",
+	"OmNP7ehzZChoCAcnQ4OShDAy0VN2dgwxHWM4D2N0qbgFJ8KcB3h1OOzYPYQ8cjeZHKqMQPJyy4OToan0",
+	"EtKO2+/udc2pDp4iIykNBsF+d9fUsmkxGBZ7lZMwLjmqFdEsZcPILblHro0Wrkw5k7b9Xr9vK3GYcsaV",
+	"lMVYvT+l3R22C6wxtpusw67ud9FjXUhw5c6AMAWeqJGeM3PdDh70d29E3NpiKh8JbxnJ1JQL+gkjPejD",
+	"G0rkiwYdMoWCkRgkihkKV15UhXAw+KMO3j/eXb9rBzJLEiLmuej8cku59KCgelgzsDqGUv3Co/k349d3",
+	"HvS6rtDael0vgPDbzXOOvUWZuyLVUmQWYluY7V9IVCTZW644rdg8qFXCfi/QP+g/uP1BKwXURdkccLtl",
+	"YYl4cvtEHHI2jmmooJPT4s4IAoltTX4dIHfFHLx2VAPJ+RqbPafygKPuLl8qeiS/uWDlotG432A7q0fz",
+	"UoUbLCMFV5WS5fuVZB10jqgMtXNZRUsnJGnlGgdZ6mkVRZ9pdG19pRht5qiOoSPzfbHkpESQBBUKaWha",
+	"cmoXyisOqH7gIhEb5togsr6ctCsybPqS7xYQ+2Dp8YWMNdeGLRjFo4ZB/I6GsLF1UznDc5fQ/LaYxfzM",
+	"wnXbb+Geo/qxoNnfnheUH4r4njC/K4h6jipXkUJs2gpOi2L+ZfBy5f63ONFuBA/jpzr6tFptCbVbBiVb",
+	"9lUIpxh+sAyZbYPVYeTQNtmGH2DPLNxg9Xfk3y/3GwSOpaxWBYtDt490e7Fi7fKZjULFvW9GgQOYR8im",
+	"xGOUF4HbzSwi5yzc+R4x4392VNg8R3WHNOkki2NzmNsdAihPblTtae+z9g828JNzbVvpi7x9/aKDLOQR",
+	"Rq4UablDkhdqf1tv2U6YZeUeJpvEV0ZUOTCWO6NfMf9256C8e+uve89cBdpf957ZGrS/7h+UV3DdDlj6",
+	"2zLN2/Ze7zD4tPNK60IzpsmWU6/z9opWW3H43LmVm7h8BYH3Xt8mXl9VXCsdv+II0S26fvXr+ba8T1CA",
+	"zSdt8yiva/rJXL7tpp4cIu0WqanMqOXiXYmUuYDOnYawF+TcJdVzFQe0QFzV/m6YQy0VcqV3kEN3eNR2",
+	"B13s8ZRU4JhebS+jmtOxdS/Rjbv9dOpBMqKTjGeyehrBnGtCWd4lUjPAd81/LZfnpR7sD4zS/jaXjq07",
+	"qPe4vyXXuTmh1njbbZF1znPeajvOc7lVs7n3nFN47z1v5D1XxLXaey7K/G/Tfa7fj7x1/znHm0/grq7y",
+	"Z/Sg75hXSpjLcVc2e2s2bmMHtTw6uHrtL6/33PpGfzH49v3S/Mj4XcwhmXNa5ubr3BMs15rlruCPhof+",
+	"dm3f9l3Auwyx59UrGvzOljFEvZhPqm5X80ChQJKUp9lBtwYi4dQQ1jlFpuDpTHPVPWf5fQ7vJc9EiO+h",
+	"ACooDhJjDJW79jfm5lpbafo3J+DekzR9X9xbsjOA56a8syJdO3hLoqAkhpAzyWN7kuz9LEneDxbrxM+O",
+	"j81Lps3UVoS/HxRX8RY6JnWrc3bOXqPKBJOGi5hIBS8hpgwltPSECx7HGMFoDu+1PCv87ZjLIXSP9lKC",
+	"eH7O9BuUZSgdl5RNgOGl65CO4f2YxzG/NOcP3tt7IpZq/Qs9S99J89vLT3daXhQHYQRn79pAc3mjGdec",
+	"di0HdhdLlkMVpyh2+97jTp8XE11Gpl6RkrEyB+ep0vjgmbKXVfoIsZL3k7L0iM/ivZoTsEhvQJmk6abw",
+	"dWQaFM+SZAWGoTUtv5Qq4pn6u1QRCnsFk0P3MnBDi4T2D0U+2AuDatdU2PONPlFZDv2iCuy1aPmxSPvX",
+	"LEmCduDo8Rxz3GAlUXileqjNSseKtW5Tmx0uxmN6ZsyL0Do9fbpzv2Zs6JYYkdWNvROgZ+Vw52vNSTVv",
+	"8PbaNvjpPZf8IPJ3huH2tyIqVFBzRQKLRnN3L35xu86dOhNgJrLkzKx3ji+vjuTPluqIOxj+0+tIiY+f",
+	"XEtCLswddjK/s+TuFG9VIo6KurfMdRLlNQ3tPOo9Oz7eWaY09hq0pSoj7sNhV0f5068p5oaNu6ct9sok",
+	"UjCwKlnY041W6QNP79XBXbVyv3jcycXDZEQLbloTQUIcZ7G5WSgy12n59MLdF9X7bD8M1+XVy9/Z/mFy",
+	"Ke5ah3XD5AzeCaV0PEXofmls6zrJi5s37mh9s/m1P8eCiTGqOwT+VaD6K/I/D7q//Waw79f4N9oK3qpu",
+	"Fb/i96Po1rZXPkdDXtdYlcddUXOLtJwTxRs+YOUyxKUlMe5exK0UxDjTcoNymJyD+8qBDYphKsLKDbzv",
+	"ni0JxGx52OZdOM3SlAslQV1ySHiE0mxB/Hb66iWMeDQfQPEeA3sboAOcu8bN/e6XjqHoJ9TvHpsiMx2e",
+	"jLlIKh3kb6YCOylPs9jcUmkqjZ2M7WJFQBHRnXwCIsIpnaFna6v6o7G3WtXTNOTtIMnZ62n2zOV99U6b",
+	"v6lW0FKfjzqPMKYx5j8jY360c1rcxZZ3UbnQcEQZEfNNbzNs/lLurFhW7+IP5R6TK5pkSfGzRM9/gZb7",
+	"xQ3z43rmJwPpuMAUXoWIkTQbVjtf96O67WI6PdedbbXcK7emS1f471jqVd6ppKfY/OKoA7niHGIiJrjz",
+	"0xyocLpWnqcYHjVOU9zBIrVZjr7Sz9iwLG2zAGNDv/82StKK4HO7BWlnP45PXLl25g6eipgVbuaySrgf",
+	"C4L97S0J266AO7vDOZTnmLvUleo304Hu0QeYFzwkMUQ4w5in5hJg2zZoB5mI3ZWmg579ycwpl8r84E5w",
+	"/e76/wIAAP//cHRGgNyPAAA=",
 }
 
 // GetSwagger returns the content of the embedded swagger specification file
diff --git a/lib/paths/paths.go b/lib/paths/paths.go
index 65e66a9..ce06aeb 100644
--- a/lib/paths/paths.go
+++ b/lib/paths/paths.go
@@ -1,5 +1,4 @@
 // Package paths provides centralized path construction for hypeman data directory.
-
 package paths
 
 import "path/filepath"
@@ -196,6 +195,23 @@ func (p *Paths) GuestsDir() string {
 	return filepath.Join(p.dataDir, "guests")
 }
 
+// Device path methods
+
+// DevicesDir returns the root devices directory.
+func (p *Paths) DevicesDir() string {
+	return filepath.Join(p.dataDir, "devices")
+}
+
+// DeviceDir returns the directory for a device.
+func (p *Paths) DeviceDir(id string) string {
+	return filepath.Join(p.DevicesDir(), id)
+}
+
+// DeviceMetadata returns the path to device metadata.json.
+func (p *Paths) DeviceMetadata(id string) string {
+	return filepath.Join(p.DeviceDir(id), "metadata.json")
+}
+
 // Volume path methods
 
 // VolumesDir returns the root volumes directory.
diff --git a/lib/providers/providers.go b/lib/providers/providers.go
index 6252306..ecbeb70 100644
--- a/lib/providers/providers.go
+++ b/lib/providers/providers.go
@@ -8,6 +8,7 @@ import (
 
 	"github.com/c2h5oh/datasize"
 	"github.com/onkernel/hypeman/cmd/api/config"
+	"github.com/onkernel/hypeman/lib/devices"
 	"github.com/onkernel/hypeman/lib/images"
 	"github.com/onkernel/hypeman/lib/ingress"
 	"github.com/onkernel/hypeman/lib/instances"
@@ -70,8 +71,13 @@ func ProvideNetworkManager(p *paths.Paths, cfg *config.Config) network.Manager {
 	return network.NewManager(p, cfg, meter)
 }
 
+// ProvideDeviceManager provides the device manager
+func ProvideDeviceManager(p *paths.Paths) devices.Manager {
+	return devices.NewManager(p)
+}
+
 // ProvideInstanceManager provides the instance manager
-func ProvideInstanceManager(p *paths.Paths, cfg *config.Config, imageManager images.Manager, systemManager system.Manager, networkManager network.Manager, volumeManager volumes.Manager) (instances.Manager, error) {
+func ProvideInstanceManager(p *paths.Paths, cfg *config.Config, imageManager images.Manager, systemManager system.Manager, networkManager network.Manager, deviceManager devices.Manager, volumeManager volumes.Manager) (instances.Manager, error) {
 	// Parse max overlay size from config
 	var maxOverlaySize datasize.ByteSize
 	if err := maxOverlaySize.UnmarshalText([]byte(cfg.MaxOverlaySize)); err != nil {
@@ -108,7 +114,7 @@ func ProvideInstanceManager(p *paths.Paths, cfg *config.Config, imageManager ima
 
 	meter := otel.GetMeterProvider().Meter("hypeman")
 	tracer := otel.GetTracerProvider().Tracer("hypeman")
-	return instances.NewManager(p, imageManager, systemManager, networkManager, volumeManager, limits, meter, tracer), nil
+	return instances.NewManager(p, imageManager, systemManager, networkManager, deviceManager, volumeManager, limits, meter, tracer), nil
 }
 
 // ProvideVolumeManager provides the volume manager
diff --git a/lib/system/init_script.go b/lib/system/init_script.go
index 2c6a5e9..ebe9f8b 100644
--- a/lib/system/init_script.go
+++ b/lib/system/init_script.go
@@ -7,8 +7,12 @@ package system
 // 1. Mounts essential filesystems (proc, sys, dev)
 // 2. Sets up overlay filesystem (lowerdir=rootfs, upperdir=overlay disk)
 // 3. Mounts and sources config disk (/dev/vdc)
-// 4. Configures networking (if enabled)
-// 5. Executes container entrypoint
+// 4. Loads NVIDIA kernel modules (if HAS_GPU=1 in config.sh)
+// 5. Configures networking (if enabled)
+// 6. Executes container entrypoint
+//
+// GPU support: When HAS_GPU=1 is set in the instance's config.sh, the init script
+// will load NVIDIA kernel modules before launching the container entrypoint.
 func GenerateInitScript() string {
 	return `#!/bin/sh
 set -xe
@@ -71,6 +75,95 @@ else
   exit 1
 fi
 
+# Load NVIDIA kernel modules for GPU passthrough (if HAS_GPU=1)
+if [ "${HAS_GPU:-0}" = "1" ]; then
+  echo "overlay-init: loading NVIDIA kernel modules for GPU passthrough"
+  if [ -d /lib/modules ]; then
+    # Find the kernel version directory
+    KVER=$(ls /lib/modules/ 2>/dev/null | head -1)
+    if [ -n "$KVER" ] && [ -d "/lib/modules/$KVER/kernel/drivers/gpu" ]; then
+      # Load modules in order (dependencies first)
+      insmod /lib/modules/$KVER/kernel/drivers/gpu/nvidia.ko 2>&1 || echo "overlay-init: nvidia.ko load failed"
+      insmod /lib/modules/$KVER/kernel/drivers/gpu/nvidia-uvm.ko 2>&1 || echo "overlay-init: nvidia-uvm.ko load failed"
+      insmod /lib/modules/$KVER/kernel/drivers/gpu/nvidia-modeset.ko 2>&1 || echo "overlay-init: nvidia-modeset.ko load failed"
+      insmod /lib/modules/$KVER/kernel/drivers/gpu/nvidia-drm.ko modeset=1 2>&1 || echo "overlay-init: nvidia-drm.ko load failed"
+      echo "overlay-init: NVIDIA modules loaded for kernel $KVER"
+      
+      # Use nvidia-modprobe to create device nodes with correct major/minor numbers.
+      # nvidia-modprobe is the official NVIDIA utility that:
+      # 1. Loads kernel modules if needed (already done above)
+      # 2. Creates /dev/nvidiactl and /dev/nvidia0 with correct permissions
+      # 3. Creates /dev/nvidia-uvm and /dev/nvidia-uvm-tools
+      if [ -x /usr/bin/nvidia-modprobe ]; then
+        echo "overlay-init: running nvidia-modprobe to create device nodes"
+        /usr/bin/nvidia-modprobe 2>&1 || echo "overlay-init: nvidia-modprobe failed"
+        /usr/bin/nvidia-modprobe -u -c=0 2>&1 || echo "overlay-init: nvidia-modprobe -u failed"
+        echo "overlay-init: nvidia-modprobe completed"
+        ls -la /dev/nvidia* 2>/dev/null || true
+      else
+        echo "overlay-init: nvidia-modprobe not found, falling back to manual mknod"
+        # Fallback: Manual device node creation
+        NVIDIA_MAJOR=$(awk '/nvidia-frontend|^[0-9]+ nvidia$/ {print $1}' /proc/devices 2>/dev/null | head -1)
+        NVIDIA_UVM_MAJOR=$(awk '/nvidia-uvm/ {print $1}' /proc/devices 2>/dev/null)
+        
+        if [ -n "$NVIDIA_MAJOR" ]; then
+          mknod -m 666 /dev/nvidiactl c $NVIDIA_MAJOR 255
+          mknod -m 666 /dev/nvidia0 c $NVIDIA_MAJOR 0
+          echo "overlay-init: created /dev/nvidiactl and /dev/nvidia0 (major $NVIDIA_MAJOR)"
+        fi
+        
+        if [ -n "$NVIDIA_UVM_MAJOR" ]; then
+          mknod -m 666 /dev/nvidia-uvm c $NVIDIA_UVM_MAJOR 0
+          mknod -m 666 /dev/nvidia-uvm-tools c $NVIDIA_UVM_MAJOR 1
+          echo "overlay-init: created /dev/nvidia-uvm* (major $NVIDIA_UVM_MAJOR)"
+        fi
+      fi
+    else
+      echo "overlay-init: NVIDIA modules not found in /lib/modules/$KVER"
+    fi
+  else
+    echo "overlay-init: /lib/modules not found, skipping NVIDIA module loading"
+  fi
+  
+  # Inject NVIDIA userspace driver libraries into container rootfs
+  # This allows containers to use standard CUDA images without bundled drivers
+  # See lib/devices/GPU.md for documentation
+  if [ -d /usr/lib/nvidia ]; then
+    echo "overlay-init: injecting NVIDIA driver libraries into container"
+    
+    DRIVER_VERSION=$(cat /usr/lib/nvidia/version 2>/dev/null || echo "unknown")
+    LIB_DST="/overlay/newroot/usr/lib/x86_64-linux-gnu"
+    BIN_DST="/overlay/newroot/usr/bin"
+    
+    mkdir -p "$LIB_DST" "$BIN_DST"
+    
+    # Copy all driver libraries and create symlinks
+    for lib in /usr/lib/nvidia/*.so.*; do
+      if [ -f "$lib" ]; then
+        libname=$(basename "$lib")
+        cp "$lib" "$LIB_DST/"
+        
+        # Create standard symlinks: libfoo.so.VERSION -> libfoo.so.1 -> libfoo.so
+        base=$(echo "$libname" | sed 's/\.so\..*//')
+        ln -sf "$libname" "$LIB_DST/${base}.so.1" 2>/dev/null || true
+        ln -sf "${base}.so.1" "$LIB_DST/${base}.so" 2>/dev/null || true
+      fi
+    done
+    
+    # Copy nvidia-smi and nvidia-modprobe binaries
+    for bin in nvidia-smi nvidia-modprobe; do
+      if [ -x /usr/bin/$bin ]; then
+        cp /usr/bin/$bin "$BIN_DST/"
+      fi
+    done
+    
+    # Update ldconfig cache so applications can find the libraries
+    chroot /overlay/newroot ldconfig 2>/dev/null || true
+    
+    echo "overlay-init: NVIDIA driver libraries injected (version: $DRIVER_VERSION)"
+  fi
+fi
+
 # Mount attached volumes (from config: VOLUME_MOUNTS="device:path:mode[:overlay_device] ...")
 # Modes: ro (read-only), rw (read-write), overlay (base ro + per-instance overlay)
 if [ -n "${VOLUME_MOUNTS:-}" ]; then
diff --git a/lib/system/initrd.go b/lib/system/initrd.go
index c409ec7..09f286c 100644
--- a/lib/system/initrd.go
+++ b/lib/system/initrd.go
@@ -1,16 +1,21 @@
 package system
 
 import (
+	"archive/tar"
+	"compress/gzip"
 	"context"
 	"crypto/sha256"
 	"encoding/hex"
 	"fmt"
+	"io"
+	"net/http"
 	"os"
 	"path/filepath"
 	"strconv"
 	"time"
 
 	"github.com/onkernel/hypeman/lib/images"
+	"github.com/onkernel/hypeman/lib/logger"
 )
 
 const alpineBaseImage = "alpine:3.22"
@@ -49,12 +54,19 @@ func (m *manager) buildInitrd(ctx context.Context, arch string) (string, error)
 	if err := os.MkdirAll(binDir, 0755); err != nil {
 		return "", fmt.Errorf("create bin dir: %w", err)
 	}
-	
+
 	agentPath := filepath.Join(binDir, "exec-agent")
 	if err := os.WriteFile(agentPath, ExecAgentBinary, 0755); err != nil {
 		return "", fmt.Errorf("write exec-agent: %w", err)
 	}
 
+	// Add NVIDIA kernel modules (for GPU passthrough support)
+	if err := m.addNvidiaModules(ctx, rootfsDir, arch); err != nil {
+		// Log but don't fail - NVIDIA modules are optional (not available on all architectures)
+		log := logger.FromContext(ctx)
+		log.InfoContext(ctx, "skipping NVIDIA modules", "error", err)
+	}
+
 	// Write generated init script
 	initScript := GenerateInitScript()
 	initPath := filepath.Join(rootfsDir, "init")
@@ -64,13 +76,13 @@ func (m *manager) buildInitrd(ctx context.Context, arch string) (string, error)
 
 	// Generate timestamp for this build
 	timestamp := strconv.FormatInt(time.Now().Unix(), 10)
-	
+
 	// Package as cpio.gz
 	outputPath := m.paths.SystemInitrdTimestamp(timestamp, arch)
 	if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil {
 		return "", fmt.Errorf("create output dir: %w", err)
 	}
-	
+
 	if _, err := images.ExportRootfs(rootfsDir, outputPath, images.FormatCpio); err != nil {
 		return "", fmt.Errorf("export initrd: %w", err)
 	}
@@ -135,10 +147,167 @@ func (m *manager) isInitrdStale(initrdPath string) bool {
 	return string(storedHash) != currentHash
 }
 
-// computeInitrdHash computes a hash of the embedded binary and init script
+// computeInitrdHash computes a hash of the embedded binary, init script, and NVIDIA assets
 func computeInitrdHash() string {
 	h := sha256.New()
 	h.Write(ExecAgentBinary)
 	h.Write([]byte(GenerateInitScript()))
+	// Include NVIDIA driver version in hash so initrd is rebuilt when driver changes
+	if ver, ok := NvidiaDriverVersion[DefaultKernelVersion]; ok {
+		h.Write([]byte(ver))
+	}
+	// Include driver libs URL so initrd is rebuilt when the libs tarball changes
+	if archURLs, ok := NvidiaDriverLibURLs[DefaultKernelVersion]; ok {
+		if url, ok := archURLs["x86_64"]; ok {
+			h.Write([]byte(url))
+		}
+	}
 	return hex.EncodeToString(h.Sum(nil))[:16]
 }
+
+// addNvidiaModules downloads and extracts NVIDIA kernel modules into the rootfs
+func (m *manager) addNvidiaModules(ctx context.Context, rootfsDir, arch string) error {
+	// Check if NVIDIA modules are available for this architecture
+	archURLs, ok := NvidiaModuleURLs[DefaultKernelVersion]
+	if !ok {
+		return fmt.Errorf("no NVIDIA modules for kernel version %s", DefaultKernelVersion)
+	}
+	url, ok := archURLs[arch]
+	if !ok {
+		return fmt.Errorf("no NVIDIA modules for architecture %s", arch)
+	}
+
+	// Download the tarball
+	client := &http.Client{
+		CheckRedirect: func(req *http.Request, via []*http.Request) error {
+			return nil // Follow redirects
+		},
+	}
+
+	req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
+	if err != nil {
+		return fmt.Errorf("create request: %w", err)
+	}
+
+	resp, err := client.Do(req)
+	if err != nil {
+		return fmt.Errorf("download nvidia modules: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return fmt.Errorf("download failed with status %d", resp.StatusCode)
+	}
+
+	// Extract tarball directly into rootfs
+	if err := extractTarGz(resp.Body, rootfsDir); err != nil {
+		return fmt.Errorf("extract nvidia modules: %w", err)
+	}
+
+	// Add userspace driver libraries (libcuda.so, libnvidia-ml.so, nvidia-smi, etc.)
+	// These are injected into containers at boot time - see lib/devices/GPU.md
+	if err := m.addNvidiaDriverLibs(ctx, rootfsDir, arch); err != nil {
+		log := logger.FromContext(ctx)
+		log.WarnContext(ctx, "could not add nvidia driver libs", "error", err)
+		// Don't fail - kernel modules can still work, but containers won't have driver libs
+	}
+
+	return nil
+}
+
+// addNvidiaDriverLibs downloads and extracts NVIDIA userspace driver libraries
+// These libraries (libcuda.so, libnvidia-ml.so, nvidia-smi, etc.) are injected
+// into containers at boot time, eliminating the need for containers to bundle
+// matching driver versions. See lib/devices/GPU.md for documentation.
+func (m *manager) addNvidiaDriverLibs(ctx context.Context, rootfsDir, arch string) error {
+	archURLs, ok := NvidiaDriverLibURLs[DefaultKernelVersion]
+	if !ok {
+		return fmt.Errorf("no NVIDIA driver libs for kernel version %s", DefaultKernelVersion)
+	}
+	url, ok := archURLs[arch]
+	if !ok {
+		return fmt.Errorf("no NVIDIA driver libs for architecture %s", arch)
+	}
+
+	client := &http.Client{
+		CheckRedirect: func(req *http.Request, via []*http.Request) error {
+			return nil // Follow redirects
+		},
+	}
+
+	req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
+	if err != nil {
+		return fmt.Errorf("create request: %w", err)
+	}
+
+	resp, err := client.Do(req)
+	if err != nil {
+		return fmt.Errorf("download nvidia driver libs: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return fmt.Errorf("download failed with status %d", resp.StatusCode)
+	}
+
+	// Extract tarball directly into rootfs
+	if err := extractTarGz(resp.Body, rootfsDir); err != nil {
+		return fmt.Errorf("extract nvidia driver libs: %w", err)
+	}
+
+	log := logger.FromContext(ctx)
+	log.InfoContext(ctx, "added NVIDIA driver libraries", "url", url)
+	return nil
+}
+
+// extractTarGz extracts a gzipped tarball into the destination directory
+func extractTarGz(r io.Reader, destDir string) error {
+	gzr, err := gzip.NewReader(r)
+	if err != nil {
+		return fmt.Errorf("create gzip reader: %w", err)
+	}
+	defer gzr.Close()
+
+	tr := tar.NewReader(gzr)
+	for {
+		header, err := tr.Next()
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			return fmt.Errorf("read tar: %w", err)
+		}
+
+		// Calculate destination path
+		destPath := filepath.Join(destDir, header.Name)
+
+		switch header.Typeflag {
+		case tar.TypeDir:
+			if err := os.MkdirAll(destPath, os.FileMode(header.Mode)); err != nil {
+				return fmt.Errorf("create directory %s: %w", destPath, err)
+			}
+		case tar.TypeReg:
+			// Ensure parent directory exists
+			if err := os.MkdirAll(filepath.Dir(destPath), 0755); err != nil {
+				return fmt.Errorf("create parent dir: %w", err)
+			}
+
+			outFile, err := os.Create(destPath)
+			if err != nil {
+				return fmt.Errorf("create file %s: %w", destPath, err)
+			}
+
+			if _, err := io.Copy(outFile, tr); err != nil {
+				outFile.Close()
+				return fmt.Errorf("write file %s: %w", destPath, err)
+			}
+			outFile.Close()
+
+			if err := os.Chmod(destPath, os.FileMode(header.Mode)); err != nil {
+				return fmt.Errorf("chmod %s: %w", destPath, err)
+			}
+		}
+	}
+
+	return nil
+}
diff --git a/lib/system/versions.go b/lib/system/versions.go
index 1aca99e..aaca2bf 100644
--- a/lib/system/versions.go
+++ b/lib/system/versions.go
@@ -6,19 +6,21 @@ import "runtime"
 type KernelVersion string
 
 const (
-	// Kernel versions from Kernel linux build
+	// Kernel versions from onkernel/linux releases
 	Kernel_202511182 KernelVersion = "ch-6.12.8-kernel-1-202511182"
 	Kernel_20251211  KernelVersion = "ch-6.12.8-kernel-1.1-20251211"
+	Kernel_20251213  KernelVersion = "ch-6.12.8-kernel-1.2-20251213" // NVIDIA module + driver lib support + networking configs
 )
 
 var (
 	// DefaultKernelVersion is the kernel version used for new instances
-	DefaultKernelVersion = Kernel_20251211
+	DefaultKernelVersion = Kernel_20251213
 
 	// SupportedKernelVersions lists all supported kernel versions
 	SupportedKernelVersions = []KernelVersion{
 		Kernel_202511182,
 		Kernel_20251211,
+		Kernel_20251213,
 		// Add future versions here
 	}
 )
@@ -33,9 +35,39 @@ var KernelDownloadURLs = map[KernelVersion]map[string]string{
 		"x86_64":  "https://github.com/onkernel/linux/releases/download/ch-6.12.8-kernel-1.1-20251211/vmlinux-x86_64",
 		"aarch64": "https://github.com/onkernel/linux/releases/download/ch-6.12.8-kernel-1.1-20251211/Image-arm64",
 	},
+	Kernel_20251213: {
+		"x86_64":  "https://github.com/onkernel/linux/releases/download/ch-6.12.8-kernel-1.2-20251213/vmlinux-x86_64",
+		"aarch64": "https://github.com/onkernel/linux/releases/download/ch-6.12.8-kernel-1.2-20251213/Image-arm64",
+	},
 	// Add future versions here
 }
 
+// NvidiaModuleURLs maps kernel versions and architectures to NVIDIA module tarball URLs
+// These tarballs contain pre-built NVIDIA kernel modules that match the kernel version
+var NvidiaModuleURLs = map[KernelVersion]map[string]string{
+	Kernel_20251213: {
+		"x86_64": "https://github.com/onkernel/linux/releases/download/ch-6.12.8-kernel-1.2-20251213/nvidia-modules-x86_64.tar.gz",
+		// Note: NVIDIA open-gpu-kernel-modules does not support arm64 yet
+	},
+	// Kernel_202511182 and Kernel_20251211 do not have NVIDIA modules (pre-module-support kernels)
+}
+
+// NvidiaDriverLibURLs maps kernel versions and architectures to driver library tarball URLs
+// These tarballs contain userspace NVIDIA libraries (libcuda.so, libnvidia-ml.so, etc.)
+// that match the kernel modules and are injected into containers at boot time.
+// See lib/devices/GPU.md for documentation on driver injection.
+var NvidiaDriverLibURLs = map[KernelVersion]map[string]string{
+	Kernel_20251213: {
+		"x86_64": "https://github.com/onkernel/linux/releases/download/ch-6.12.8-kernel-1.2-20251213/nvidia-driver-libs-x86_64.tar.gz",
+	},
+}
+
+// NvidiaDriverVersion tracks the NVIDIA driver version bundled with each kernel
+var NvidiaDriverVersion = map[KernelVersion]string{
+	Kernel_20251213: "570.86.16",
+	// Kernel_202511182 and Kernel_20251211 do not have NVIDIA modules
+}
+
 // GetArch returns the architecture string for the current platform
 func GetArch() string {
 	arch := runtime.GOARCH
diff --git a/openapi.yaml b/openapi.yaml
index 19e39ee..a096083 100644
--- a/openapi.yaml
+++ b/openapi.yaml
@@ -149,6 +149,12 @@ components:
               description: Whether to attach instance to the default network
               default: true
               example: true
+        devices:
+          type: array
+          items:
+            type: string
+          description: Device IDs or names to attach for GPU/PCI passthrough
+          example: ["l4-gpu"]
         volumes:
           type: array
           description: Volumes to attach to the instance at creation time
@@ -502,6 +508,107 @@ components:
           description: Creation timestamp (RFC3339)
           example: "2025-01-15T10:00:00Z"
 
+    DeviceType:
+      type: string
+      enum: [gpu, pci]
+      description: Type of PCI device
+    
+    CreateDeviceRequest:
+      type: object
+      required: [pci_address]
+      properties:
+        name:
+          type: string
+          description: Optional globally unique device name. If not provided, a name is auto-generated from the PCI address (e.g., "pci-0000-a2-00-0")
+          pattern: ^[a-zA-Z0-9][a-zA-Z0-9_.-]+$
+          example: l4-gpu
+        pci_address:
+          type: string
+          description: PCI address of the device (required, e.g., "0000:a2:00.0")
+          example: "0000:a2:00.0"
+    
+    Device:
+      type: object
+      required: [id, type, pci_address, vendor_id, device_id, iommu_group, bound_to_vfio, created_at]
+      properties:
+        id:
+          type: string
+          description: Auto-generated unique identifier (CUID2 format)
+          example: tz4a98xxat96iws9zmbrgj3a
+        name:
+          type: string
+          description: Device name (user-provided or auto-generated from PCI address)
+          example: l4-gpu
+        type:
+          $ref: "#/components/schemas/DeviceType"
+        pci_address:
+          type: string
+          description: PCI address
+          example: "0000:a2:00.0"
+        vendor_id:
+          type: string
+          description: PCI vendor ID (hex)
+          example: "10de"
+        device_id:
+          type: string
+          description: PCI device ID (hex)
+          example: "27b8"
+        iommu_group:
+          type: integer
+          description: IOMMU group number
+          example: 82
+        bound_to_vfio:
+          type: boolean
+          description: |
+            Whether the device is currently bound to the vfio-pci driver, which is required for VM passthrough.
+            - true: Device is bound to vfio-pci and ready for (or currently in use by) a VM. The device's native driver has been unloaded.
+            - false: Device is using its native driver (e.g., nvidia) or no driver. Hypeman will automatically bind to vfio-pci when attaching to an instance.
+          example: false
+        attached_to:
+          type: string
+          description: Instance ID if attached
+          nullable: true
+          example: null
+        created_at:
+          type: string
+          format: date-time
+          description: Registration timestamp (RFC3339)
+          example: "2025-01-15T10:00:00Z"
+    
+    AvailableDevice:
+      type: object
+      required: [pci_address, vendor_id, device_id, iommu_group]
+      properties:
+        pci_address:
+          type: string
+          description: PCI address
+          example: "0000:a2:00.0"
+        vendor_id:
+          type: string
+          description: PCI vendor ID (hex)
+          example: "10de"
+        device_id:
+          type: string
+          description: PCI device ID (hex)
+          example: "27b8"
+        vendor_name:
+          type: string
+          description: Human-readable vendor name
+          example: "NVIDIA Corporation"
+        device_name:
+          type: string
+          description: Human-readable device name
+          example: "L4"
+        iommu_group:
+          type: integer
+          description: IOMMU group number
+          example: 82
+        current_driver:
+          type: string
+          description: Currently bound driver (null if none)
+          nullable: true
+          example: "nvidia"
+
 paths:
   /health:
     get:
@@ -1246,6 +1353,176 @@ paths:
               schema:
                 $ref: "#/components/schemas/Error"
   
+  /devices:
+    get:
+      summary: List registered devices
+      operationId: listDevices
+      security:
+        - bearerAuth: []
+      responses:
+        200:
+          description: List of registered devices
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: "#/components/schemas/Device"
+        401:
+          description: Unauthorized
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        500:
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+    post:
+      summary: Register a device for passthrough
+      operationId: createDevice
+      security:
+        - bearerAuth: []
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/CreateDeviceRequest"
+      responses:
+        201:
+          description: Device registered
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Device"
+        400:
+          description: Bad request (invalid name or PCI address)
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        401:
+          description: Unauthorized
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        404:
+          description: PCI device not found on host
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        409:
+          description: Conflict - device or name already registered
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        500:
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+
+  /devices/available:
+    get:
+      summary: Discover passthrough-capable devices on host
+      operationId: listAvailableDevices
+      security:
+        - bearerAuth: []
+      responses:
+        200:
+          description: List of available PCI devices
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: "#/components/schemas/AvailableDevice"
+        401:
+          description: Unauthorized
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        500:
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+
+  /devices/{id}:
+    get:
+      summary: Get device details
+      operationId: getDevice
+      security:
+        - bearerAuth: []
+      parameters:
+        - name: id
+          in: path
+          required: true
+          schema:
+            type: string
+          description: Device ID or name
+      responses:
+        200:
+          description: Device details
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Device"
+        404:
+          description: Device not found
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        500:
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+    delete:
+      summary: Unregister device
+      operationId: deleteDevice
+      security:
+        - bearerAuth: []
+      parameters:
+        - name: id
+          in: path
+          required: true
+          schema:
+            type: string
+          description: Device ID or name
+      responses:
+        204:
+          description: Device unregistered
+        404:
+          description: Device not found
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        409:
+          description: Conflict - device is attached to an instance
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        500:
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+  
   /ingresses:
     get:
       summary: List ingresses
@@ -1388,3 +1665,4 @@ paths:
               schema:
                 $ref: "#/components/schemas/Error"
 
+
diff --git a/stainless.yaml b/stainless.yaml
index 6aac3c9..4e27148 100644
--- a/stainless.yaml
+++ b/stainless.yaml
@@ -103,6 +103,18 @@ resources:
       get: get /volumes/{id}
       delete: delete /volumes/{id}
 
+  devices:
+    models:
+      device: '#/components/schemas/Device'
+      available_device: '#/components/schemas/AvailableDevice'
+      device_type: '#/components/schemas/DeviceType'
+    methods:
+      list: get /devices
+      create: post /devices
+      retrieve: get /devices/{id}
+      delete: delete /devices/{id}
+      list_available: get /devices/available
+
   ingresses:
     models:
       ingress: '#/components/schemas/Ingress'