Skip to content

Commit 712e328

Browse files
author
Rafael Garcia
committed
fix(configdisk): only set HAS_GPU=1 for actual GPU devices
The HAS_GPU flag was being set unconditionally when any device was attached, regardless of device type. This would trigger NVIDIA module loading in the VM init script even for non-GPU PCI devices. Now iterates through attached devices and checks each device's type, only setting HAS_GPU=1 if at least one device is DeviceTypeGPU.
1 parent 4fa81c4 commit 712e328

File tree

3 files changed

+14
-8
lines changed

3 files changed

+14
-8
lines changed

lib/instances/configdisk.go

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
package instances
22

33
import (
4+
"context"
45
"encoding/json"
56
"fmt"
67
"os"
78
"path/filepath"
89
"strconv"
910
"strings"
1011

12+
"github.com/onkernel/hypeman/lib/devices"
1113
"github.com/onkernel/hypeman/lib/images"
1214
"github.com/onkernel/hypeman/lib/network"
1315
)
@@ -16,7 +18,7 @@ import (
1618
// The disk contains:
1719
// - /config.sh - Shell script sourced by init
1820
// - /metadata.json - JSON metadata for programmatic access
19-
func (m *manager) createConfigDisk(inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) error {
21+
func (m *manager) createConfigDisk(ctx context.Context, inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) error {
2022
// Create temporary directory for config files
2123
tmpDir, err := os.MkdirTemp("", "hypeman-config-*")
2224
if err != nil {
@@ -25,7 +27,7 @@ func (m *manager) createConfigDisk(inst *Instance, imageInfo *images.Image, netC
2527
defer os.RemoveAll(tmpDir)
2628

2729
// Generate config.sh
28-
configScript := m.generateConfigScript(inst, imageInfo, netConfig)
30+
configScript := m.generateConfigScript(ctx, inst, imageInfo, netConfig)
2931
configPath := filepath.Join(tmpDir, "config.sh")
3032
if err := os.WriteFile(configPath, []byte(configScript), 0644); err != nil {
3133
return fmt.Errorf("write config.sh: %w", err)
@@ -64,7 +66,7 @@ func (m *manager) createConfigDisk(inst *Instance, imageInfo *images.Image, netC
6466
}
6567

6668
// generateConfigScript creates the shell script that will be sourced by init
67-
func (m *manager) generateConfigScript(inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) string {
69+
func (m *manager) generateConfigScript(ctx context.Context, inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) string {
6870
// Prepare entrypoint value
6971
entrypoint := ""
7072
if len(imageInfo.Entrypoint) > 0 {
@@ -106,10 +108,14 @@ GUEST_DNS="%s"
106108
}
107109

108110
// GPU passthrough configuration
109-
// When devices are attached, set HAS_GPU=1 to trigger NVIDIA module loading in init
111+
// Only set HAS_GPU=1 if at least one attached device is actually a GPU
110112
gpuSection := ""
111-
if len(inst.Devices) > 0 {
112-
gpuSection = "\n# GPU passthrough\nHAS_GPU=1\n"
113+
for _, deviceID := range inst.Devices {
114+
device, err := m.deviceManager.GetDevice(ctx, deviceID)
115+
if err == nil && device.Type == devices.DeviceTypeGPU {
116+
gpuSection = "\n# GPU passthrough\nHAS_GPU=1\n"
117+
break
118+
}
113119
}
114120

115121
// Build volume mounts section

lib/instances/create.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,7 @@ func (m *manager) createInstance(
360360
// 15. Create config disk (needs Instance for buildVMConfig)
361361
inst := &Instance{StoredMetadata: *stored}
362362
log.DebugContext(ctx, "creating config disk", "instance_id", id)
363-
if err := m.createConfigDisk(inst, imageInfo, netConfig); err != nil {
363+
if err := m.createConfigDisk(ctx, inst, imageInfo, netConfig); err != nil {
364364
log.ErrorContext(ctx, "failed to create config disk", "instance_id", id, "error", err)
365365
return nil, fmt.Errorf("create config disk: %w", err)
366366
}

lib/instances/start.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ func (m *manager) startInstance(
8484
// 5. Regenerate config disk with new network configuration
8585
instForConfig := &Instance{StoredMetadata: *stored}
8686
log.DebugContext(ctx, "regenerating config disk", "instance_id", id)
87-
if err := m.createConfigDisk(instForConfig, imageInfo, netConfig); err != nil {
87+
if err := m.createConfigDisk(ctx, instForConfig, imageInfo, netConfig); err != nil {
8888
log.ErrorContext(ctx, "failed to create config disk", "instance_id", id, "error", err)
8989
return nil, fmt.Errorf("create config disk: %w", err)
9090
}

0 commit comments

Comments
 (0)