Skip to content

Commit 8358088

Browse files
committed
Fix socket name too long
1 parent 1230c4e commit 8358088

File tree

8 files changed

+73
-25
lines changed

8 files changed

+73
-25
lines changed

lib/hypervisor/cloudhypervisor/process.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ import (
99
"github.com/onkernel/hypeman/lib/vmm"
1010
)
1111

12+
func init() {
13+
hypervisor.RegisterSocketName(hypervisor.TypeCloudHypervisor, "ch.sock")
14+
}
15+
1216
// ProcessManager implements hypervisor.ProcessManager for Cloud Hypervisor.
1317
type ProcessManager struct{}
1418

@@ -20,6 +24,11 @@ func NewProcessManager() *ProcessManager {
2024
// Verify ProcessManager implements the interface
2125
var _ hypervisor.ProcessManager = (*ProcessManager)(nil)
2226

27+
// SocketName returns the socket filename for Cloud Hypervisor.
28+
func (p *ProcessManager) SocketName() string {
29+
return "ch.sock"
30+
}
31+
2332
// StartProcess launches a Cloud Hypervisor VMM process.
2433
func (p *ProcessManager) StartProcess(ctx context.Context, paths *paths.Paths, version string, socketPath string) (int, error) {
2534
chVersion := vmm.CHVersion(version)

lib/hypervisor/hypervisor.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,25 @@ const (
1919
// Future: TypeQEMU Type = "qemu"
2020
)
2121

22+
// socketNames maps hypervisor types to their socket filenames.
23+
// Registered by each hypervisor package's init() function.
24+
var socketNames = make(map[Type]string)
25+
26+
// RegisterSocketName registers the socket filename for a hypervisor type.
27+
// Called by each hypervisor implementation's init() function.
28+
func RegisterSocketName(t Type, name string) {
29+
socketNames[t] = name
30+
}
31+
32+
// SocketNameForType returns the socket filename for a hypervisor type.
33+
// Falls back to type + ".sock" if not registered.
34+
func SocketNameForType(t Type) string {
35+
if name, ok := socketNames[t]; ok {
36+
return name
37+
}
38+
return string(t) + ".sock"
39+
}
40+
2241
// Hypervisor defines the interface for VM management operations.
2342
// All hypervisor implementations must implement this interface.
2443
type Hypervisor interface {
@@ -92,6 +111,10 @@ type Capabilities struct {
92111
// This is separate from the Hypervisor interface because process management
93112
// happens before/after the VMM socket is available.
94113
type ProcessManager interface {
114+
// SocketName returns the socket filename for this hypervisor.
115+
// Uses short names to stay within Unix socket path length limits (SUN_LEN ~108 bytes).
116+
SocketName() string
117+
95118
// StartProcess launches the hypervisor process.
96119
// Returns the process ID of the started hypervisor.
97120
StartProcess(ctx context.Context, p *paths.Paths, version string, socketPath string) (pid int, err error)

lib/instances/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ Manages VM instance lifecycle using Cloud Hypervisor.
4040
metadata.json # State, versions, timestamps
4141
overlay.raw # 50GB sparse writable overlay
4242
config.erofs # Compressed config disk
43-
cloud-hypervisor.sock # Hypervisor API socket (named after hypervisor type)
43+
ch.sock # Hypervisor API socket (abbreviated for SUN_LEN limit)
4444
logs/
4545
app.log # Guest application log (serial console output)
4646
vmm.log # Hypervisor log (stdout+stderr)

lib/instances/create.go

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,15 @@ func (m *manager) createInstance(
202202
// 8. Get default kernel version
203203
kernelVer := m.systemManager.GetDefaultKernelVersion()
204204

205-
// 9. Validate, resolve, and auto-bind devices (GPU passthrough)
205+
// 9. Get process manager for hypervisor type (needed for socket name)
206+
hvType := hypervisor.TypeCloudHypervisor
207+
pm, err := m.getProcessManager(hvType)
208+
if err != nil {
209+
log.ErrorContext(ctx, "failed to get process manager", "error", err)
210+
return nil, fmt.Errorf("get process manager: %w", err)
211+
}
212+
213+
// 10. Validate, resolve, and auto-bind devices (GPU passthrough)
206214
// Track devices we've marked as attached for cleanup on error.
207215
// The cleanup closure captures this slice by reference, so it will see
208216
// whatever devices have been attached when cleanup runs.
@@ -256,7 +264,7 @@ func (m *manager) createInstance(
256264
log.DebugContext(ctx, "validated devices for passthrough", "id", id, "devices", resolvedDeviceIDs)
257265
}
258266

259-
// 10. Create instance metadata
267+
// 11. Create instance metadata
260268
stored := &StoredMetadata{
261269
Id: id,
262270
Name: req.Name,
@@ -271,30 +279,30 @@ func (m *manager) createInstance(
271279
StartedAt: nil,
272280
StoppedAt: nil,
273281
KernelVersion: string(kernelVer),
274-
HypervisorType: hypervisor.TypeCloudHypervisor,
282+
HypervisorType: hvType,
275283
HypervisorVersion: string(vmm.V49_0), // Use latest
276-
SocketPath: m.paths.InstanceSocket(id, string(hypervisor.TypeCloudHypervisor)),
284+
SocketPath: m.paths.InstanceSocket(id, pm.SocketName()),
277285
DataDir: m.paths.InstanceDir(id),
278286
VsockCID: vsockCID,
279287
VsockSocket: vsockSocket,
280288
Devices: resolvedDeviceIDs,
281289
}
282290

283-
// 11. Ensure directories
291+
// 12. Ensure directories
284292
log.DebugContext(ctx, "creating instance directories", "instance_id", id)
285293
if err := m.ensureDirectories(id); err != nil {
286294
log.ErrorContext(ctx, "failed to create directories", "instance_id", id, "error", err)
287295
return nil, fmt.Errorf("ensure directories: %w", err)
288296
}
289297

290-
// 12. Create overlay disk with specified size
298+
// 13. Create overlay disk with specified size
291299
log.DebugContext(ctx, "creating overlay disk", "instance_id", id, "size_bytes", stored.OverlaySize)
292300
if err := m.createOverlayDisk(id, stored.OverlaySize); err != nil {
293301
log.ErrorContext(ctx, "failed to create overlay disk", "instance_id", id, "error", err)
294302
return nil, fmt.Errorf("create overlay disk: %w", err)
295303
}
296304

297-
// 13. Allocate network (if network enabled)
305+
// 14. Allocate network (if network enabled)
298306
var netConfig *network.NetworkConfig
299307
if networkName != "" {
300308
log.DebugContext(ctx, "allocating network", "instance_id", id, "network", networkName)
@@ -319,7 +327,7 @@ func (m *manager) createInstance(
319327
})
320328
}
321329

322-
// 14. Validate and attach volumes
330+
// 15. Validate and attach volumes
323331
if len(req.Volumes) > 0 {
324332
log.DebugContext(ctx, "validating volumes", "instance_id", id, "count", len(req.Volumes))
325333
for _, volAttach := range req.Volumes {
@@ -359,30 +367,30 @@ func (m *manager) createInstance(
359367
stored.Volumes = req.Volumes
360368
}
361369

362-
// 15. Create config disk (needs Instance for buildVMConfig)
370+
// 16. Create config disk (needs Instance for buildVMConfig)
363371
inst := &Instance{StoredMetadata: *stored}
364372
log.DebugContext(ctx, "creating config disk", "instance_id", id)
365373
if err := m.createConfigDisk(ctx, inst, imageInfo, netConfig); err != nil {
366374
log.ErrorContext(ctx, "failed to create config disk", "instance_id", id, "error", err)
367375
return nil, fmt.Errorf("create config disk: %w", err)
368376
}
369377

370-
// 16. Save metadata
378+
// 17. Save metadata
371379
log.DebugContext(ctx, "saving instance metadata", "instance_id", id)
372380
meta := &metadata{StoredMetadata: *stored}
373381
if err := m.saveMetadata(meta); err != nil {
374382
log.ErrorContext(ctx, "failed to save metadata", "instance_id", id, "error", err)
375383
return nil, fmt.Errorf("save metadata: %w", err)
376384
}
377385

378-
// 17. Start VMM and boot VM
386+
// 18. Start VMM and boot VM
379387
log.InfoContext(ctx, "starting VMM and booting VM", "instance_id", id)
380388
if err := m.startAndBootVM(ctx, stored, imageInfo, netConfig); err != nil {
381389
log.ErrorContext(ctx, "failed to start and boot VM", "instance_id", id, "error", err)
382390
return nil, err
383391
}
384392

385-
// 18. Update timestamp after VM is running
393+
// 19. Update timestamp after VM is running
386394
now := time.Now()
387395
stored.StartedAt = &now
388396

lib/instances/storage.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,16 @@ import (
1111

1212
// Filesystem structure:
1313
// {dataDir}/guests/{instance-id}/
14-
// metadata.json # Instance metadata
15-
// overlay.raw # Configurable sparse overlay disk (default 10GB)
16-
// config.ext4 # Read-only config disk (generated)
17-
// cloud-hypervisor.sock # Hypervisor API socket (named after hypervisor type)
14+
// metadata.json # Instance metadata
15+
// overlay.raw # Configurable sparse overlay disk (default 10GB)
16+
// config.ext4 # Read-only config disk (generated)
17+
// ch.sock # Hypervisor API socket (abbreviated name for SUN_LEN limit)
1818
// logs/
19-
// app.log # Guest application log (serial console output)
20-
// vmm.log # Hypervisor log (stdout+stderr combined)
21-
// hypeman.log # Hypeman operations log (actions taken on this instance)
19+
// app.log # Guest application log (serial console output)
20+
// vmm.log # Hypervisor log (stdout+stderr combined)
21+
// hypeman.log # Hypeman operations log (actions taken on this instance)
2222
// snapshots/
23-
// snapshot-latest/ # Snapshot directory
23+
// snapshot-latest/ # Snapshot directory
2424
// config.json
2525
// memory-ranges
2626

lib/network/derive.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"net"
88
"os"
99

10+
"github.com/onkernel/hypeman/lib/hypervisor"
1011
"github.com/onkernel/hypeman/lib/logger"
1112
"github.com/onkernel/hypeman/lib/vmm"
1213
)
@@ -49,7 +50,7 @@ func (m *manager) deriveAllocation(ctx context.Context, instanceID string) (*All
4950
netmask := fmt.Sprintf("%d.%d.%d.%d", ipNet.Mask[0], ipNet.Mask[1], ipNet.Mask[2], ipNet.Mask[3])
5051

5152
// 4. Try to derive from running VM first
52-
socketPath := m.paths.InstanceSocket(instanceID, meta.HypervisorType)
53+
socketPath := m.paths.InstanceSocket(instanceID, hypervisor.SocketNameForType(hypervisor.Type(meta.HypervisorType)))
5354
if fileExists(socketPath) {
5455
client, err := vmm.NewVMM(socketPath)
5556
if err == nil {

lib/paths/paths.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -144,9 +144,11 @@ func (p *Paths) InstanceVolumeOverlaysDir(instanceID string) string {
144144
return filepath.Join(p.InstanceDir(instanceID), "vol-overlays")
145145
}
146146

147-
// InstanceSocket returns the path to instance API socket for the given hypervisor type.
148-
func (p *Paths) InstanceSocket(id string, hypervisorType string) string {
149-
return filepath.Join(p.InstanceDir(id), hypervisorType+".sock")
147+
// InstanceSocket returns the path to instance API socket.
148+
// The socketName should be obtained from hypervisor.Type.SocketName() to ensure
149+
// it stays within Unix socket path length limits (SUN_LEN ~108 bytes).
150+
func (p *Paths) InstanceSocket(id string, socketName string) string {
151+
return filepath.Join(p.InstanceDir(id), socketName)
150152
}
151153

152154
// InstanceVsockSocket returns the path to instance vsock socket.

lib/vmm/client.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,11 @@ func StartProcessWithArgs(ctx context.Context, p *paths.Paths, version CHVersion
152152
defer cancel()
153153

154154
if err := waitForSocket(waitCtx, socketPath, 5*time.Second); err != nil {
155+
// Read vmm.log to understand why socket wasn't created
156+
vmmLogPath := filepath.Join(logsDir, "vmm.log")
157+
if logData, readErr := os.ReadFile(vmmLogPath); readErr == nil && len(logData) > 0 {
158+
return 0, fmt.Errorf("%w; vmm.log: %s", err, string(logData))
159+
}
155160
return 0, err
156161
}
157162

0 commit comments

Comments
 (0)