Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions cmd/api/api/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ func newTestService(t *testing.T) *ApiService {
}
}

// cleanupOrphanedProcesses kills Cloud Hypervisor processes from metadata files
// cleanupOrphanedProcesses kills hypervisor processes from metadata files
func cleanupOrphanedProcesses(t *testing.T, dataDir string) {
p := paths.New(dataDir)
guestsDir := p.GuestsDir()
Expand All @@ -77,21 +77,21 @@ func cleanupOrphanedProcesses(t *testing.T, dataDir string) {
continue
}

// Parse just the CHPID field
// Parse just the HypervisorPID field
var meta struct {
CHPID *int `json:"CHPID"`
HypervisorPID *int `json:"HypervisorPID"`
}
if err := json.Unmarshal(data, &meta); err != nil {
continue
}

// If metadata has a PID, try to kill it
if meta.CHPID != nil {
pid := *meta.CHPID
if meta.HypervisorPID != nil {
pid := *meta.HypervisorPID

// Check if process exists
if err := syscall.Kill(pid, 0); err == nil {
t.Logf("Cleaning up orphaned Cloud Hypervisor process: PID %d", pid)
t.Logf("Cleaning up orphaned hypervisor process: PID %d", pid)
syscall.Kill(pid, syscall.SIGKILL)
}
}
Expand Down
35 changes: 35 additions & 0 deletions lib/hypervisor/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Hypervisor Abstraction

Provides a common interface for VM management across different hypervisors.

## Purpose

Hypeman originally supported only Cloud Hypervisor. This abstraction layer allows supporting multiple hypervisors (e.g., QEMU) through a unified interface, enabling:

- **Hypervisor choice per instance** - Different instances can use different hypervisors
- **Feature parity where possible** - Common operations work the same way
- **Graceful degradation** - Features unsupported by a hypervisor can be detected and handled

## How It Works

The abstraction defines two key interfaces:

1. **Hypervisor** - VM lifecycle operations (create, boot, pause, resume, snapshot, restore, shutdown)
2. **ProcessManager** - Hypervisor process lifecycle (start binary, get binary path)

Each hypervisor implementation translates the generic configuration and operations to its native format. For example, Cloud Hypervisor uses an HTTP API over a Unix socket, while QEMU would use QMP.

Before using optional features, callers check capabilities:

```go
if hv.Capabilities().SupportsSnapshot {
hv.Snapshot(ctx, path)
}
```

## Hypervisor Switching

Instances store their hypervisor type in metadata. An instance can switch hypervisors only when stopped (no running VM, no snapshot), since:

- Disk images are hypervisor-agnostic
- Snapshots are hypervisor-specific and cannot be restored by a different hypervisor
190 changes: 190 additions & 0 deletions lib/hypervisor/cloudhypervisor/cloudhypervisor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
// Package cloudhypervisor implements the hypervisor.Hypervisor interface
// for Cloud Hypervisor VMM.
package cloudhypervisor

import (
"context"
"fmt"

"github.com/onkernel/hypeman/lib/hypervisor"
"github.com/onkernel/hypeman/lib/vmm"
)

// CloudHypervisor implements hypervisor.Hypervisor for Cloud Hypervisor VMM.
type CloudHypervisor struct {
client *vmm.VMM
socketPath string
}

// New creates a new Cloud Hypervisor client for an existing VMM socket.
func New(socketPath string) (*CloudHypervisor, error) {
client, err := vmm.NewVMM(socketPath)
if err != nil {
return nil, fmt.Errorf("create vmm client: %w", err)
}
return &CloudHypervisor{
client: client,
socketPath: socketPath,
}, nil
}

// Capabilities returns the features supported by Cloud Hypervisor.
func (c *CloudHypervisor) Capabilities() hypervisor.Capabilities {
return hypervisor.Capabilities{
SupportsSnapshot: true,
SupportsHotplugMemory: true,
SupportsPause: true,
SupportsVsock: true,
SupportsGPUPassthrough: true,
}
}

// CreateVM configures the VM in Cloud Hypervisor.
func (c *CloudHypervisor) CreateVM(ctx context.Context, config hypervisor.VMConfig) error {
vmConfig := ToVMConfig(config)
resp, err := c.client.CreateVMWithResponse(ctx, vmConfig)
if err != nil {
return fmt.Errorf("create vm: %w", err)
}
if resp.StatusCode() != 204 {
return fmt.Errorf("create vm failed with status %d: %s", resp.StatusCode(), string(resp.Body))
}
return nil
}

// BootVM starts the configured VM.
func (c *CloudHypervisor) BootVM(ctx context.Context) error {
resp, err := c.client.BootVMWithResponse(ctx)
if err != nil {
return fmt.Errorf("boot vm: %w", err)
}
if resp.StatusCode() != 204 {
return fmt.Errorf("boot vm failed with status %d: %s", resp.StatusCode(), string(resp.Body))
}
return nil
}

// DeleteVM removes the VM configuration from Cloud Hypervisor.
func (c *CloudHypervisor) DeleteVM(ctx context.Context) error {
resp, err := c.client.DeleteVMWithResponse(ctx)
if err != nil {
return fmt.Errorf("delete vm: %w", err)
}
if resp.StatusCode() != 204 {
return fmt.Errorf("delete vm failed with status %d: %s", resp.StatusCode(), string(resp.Body))
}
return nil
}

// Shutdown stops the VMM process gracefully.
func (c *CloudHypervisor) Shutdown(ctx context.Context) error {
resp, err := c.client.ShutdownVMMWithResponse(ctx)
if err != nil {
return fmt.Errorf("shutdown vmm: %w", err)
}
// ShutdownVMM may return various codes, 204 is success
if resp.StatusCode() != 204 {
return fmt.Errorf("shutdown vmm failed with status %d", resp.StatusCode())
}
return nil
}

// GetVMInfo returns current VM state.
func (c *CloudHypervisor) GetVMInfo(ctx context.Context) (*hypervisor.VMInfo, error) {
resp, err := c.client.GetVmInfoWithResponse(ctx)
if err != nil {
return nil, fmt.Errorf("get vm info: %w", err)
}
if resp.StatusCode() != 200 || resp.JSON200 == nil {
return nil, fmt.Errorf("get vm info failed with status %d", resp.StatusCode())
}

// Map Cloud Hypervisor state to hypervisor.VMState
var state hypervisor.VMState
switch resp.JSON200.State {
case vmm.Created:
state = hypervisor.StateCreated
case vmm.Running:
state = hypervisor.StateRunning
case vmm.Paused:
state = hypervisor.StatePaused
case vmm.Shutdown:
state = hypervisor.StateShutdown
default:
return nil, fmt.Errorf("unknown vm state: %s", resp.JSON200.State)
}

return &hypervisor.VMInfo{State: state}, nil
}

// Pause suspends VM execution.
func (c *CloudHypervisor) Pause(ctx context.Context) error {
resp, err := c.client.PauseVMWithResponse(ctx)
if err != nil {
return fmt.Errorf("pause vm: %w", err)
}
if resp.StatusCode() != 204 {
return fmt.Errorf("pause vm failed with status %d", resp.StatusCode())
}
return nil
}

// Resume continues VM execution.
func (c *CloudHypervisor) Resume(ctx context.Context) error {
resp, err := c.client.ResumeVMWithResponse(ctx)
if err != nil {
return fmt.Errorf("resume vm: %w", err)
}
if resp.StatusCode() != 204 {
return fmt.Errorf("resume vm failed with status %d", resp.StatusCode())
}
return nil
}

// Snapshot creates a VM snapshot.
func (c *CloudHypervisor) Snapshot(ctx context.Context, destPath string) error {
snapshotURL := "file://" + destPath
snapshotConfig := vmm.VmSnapshotConfig{DestinationUrl: &snapshotURL}
resp, err := c.client.PutVmSnapshotWithResponse(ctx, snapshotConfig)
if err != nil {
return fmt.Errorf("snapshot: %w", err)
}
if resp.StatusCode() != 204 {
return fmt.Errorf("snapshot failed with status %d", resp.StatusCode())
}
return nil
}

// Restore loads a VM from snapshot.
func (c *CloudHypervisor) Restore(ctx context.Context, sourcePath string) error {
sourceURL := "file://" + sourcePath
restoreConfig := vmm.RestoreConfig{
SourceUrl: sourceURL,
Prefault: ptr(false),
}
resp, err := c.client.PutVmRestoreWithResponse(ctx, restoreConfig)
if err != nil {
return fmt.Errorf("restore: %w", err)
}
if resp.StatusCode() != 204 {
return fmt.Errorf("restore failed with status %d", resp.StatusCode())
}
return nil
}

// ResizeMemory changes the VM's memory allocation.
func (c *CloudHypervisor) ResizeMemory(ctx context.Context, bytes int64) error {
resizeConfig := vmm.VmResize{DesiredRam: &bytes}
resp, err := c.client.PutVmResizeWithResponse(ctx, resizeConfig)
if err != nil {
return fmt.Errorf("resize memory: %w", err)
}
if resp.StatusCode() != 204 {
return fmt.Errorf("resize memory failed with status %d", resp.StatusCode())
}
return nil
}

func ptr[T any](v T) *T {
return &v
}
112 changes: 112 additions & 0 deletions lib/hypervisor/cloudhypervisor/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
package cloudhypervisor

import (
"github.com/onkernel/hypeman/lib/hypervisor"
"github.com/onkernel/hypeman/lib/vmm"
)

// ToVMConfig converts hypervisor.VMConfig to Cloud Hypervisor's vmm.VmConfig.
func ToVMConfig(cfg hypervisor.VMConfig) vmm.VmConfig {
// Payload configuration (kernel + initramfs)
payload := vmm.PayloadConfig{
Kernel: ptr(cfg.KernelPath),
Cmdline: ptr(cfg.KernelArgs),
Initramfs: ptr(cfg.InitrdPath),
}

// CPU configuration
cpus := vmm.CpusConfig{
BootVcpus: cfg.VCPUs,
MaxVcpus: cfg.VCPUs,
}

// Add topology if provided
if cfg.Topology != nil {
cpus.Topology = &vmm.CpuTopology{
ThreadsPerCore: ptr(cfg.Topology.ThreadsPerCore),
CoresPerDie: ptr(cfg.Topology.CoresPerDie),
DiesPerPackage: ptr(cfg.Topology.DiesPerPackage),
Packages: ptr(cfg.Topology.Packages),
}
}

// Memory configuration
memory := vmm.MemoryConfig{
Size: cfg.MemoryBytes,
}
if cfg.HotplugBytes > 0 {
memory.HotplugSize = &cfg.HotplugBytes
memory.HotplugMethod = ptr("VirtioMem")
}

// Disk configuration
var disks []vmm.DiskConfig
for _, d := range cfg.Disks {
disk := vmm.DiskConfig{
Path: ptr(d.Path),
}
if d.Readonly {
disk.Readonly = ptr(true)
}
disks = append(disks, disk)
}

// Serial console configuration
serial := vmm.ConsoleConfig{
Mode: vmm.ConsoleConfigMode("File"),
File: ptr(cfg.SerialLogPath),
}

// Console off (we use serial)
console := vmm.ConsoleConfig{
Mode: vmm.ConsoleConfigMode("Off"),
}

// Network configuration
var nets *[]vmm.NetConfig
if len(cfg.Networks) > 0 {
netConfigs := make([]vmm.NetConfig, 0, len(cfg.Networks))
for _, n := range cfg.Networks {
netConfigs = append(netConfigs, vmm.NetConfig{
Tap: ptr(n.TAPDevice),
Ip: ptr(n.IP),
Mac: ptr(n.MAC),
Mask: ptr(n.Netmask),
})
}
nets = &netConfigs
}

// Vsock configuration
var vsock *vmm.VsockConfig
if cfg.VsockCID > 0 {
vsock = &vmm.VsockConfig{
Cid: cfg.VsockCID,
Socket: cfg.VsockSocket,
}
}

// Device passthrough configuration
var devices *[]vmm.DeviceConfig
if len(cfg.PCIDevices) > 0 {
deviceConfigs := make([]vmm.DeviceConfig, 0, len(cfg.PCIDevices))
for _, path := range cfg.PCIDevices {
deviceConfigs = append(deviceConfigs, vmm.DeviceConfig{
Path: path,
})
}
devices = &deviceConfigs
}

return vmm.VmConfig{
Payload: payload,
Cpus: &cpus,
Memory: &memory,
Disks: &disks,
Serial: &serial,
Console: &console,
Net: nets,
Vsock: vsock,
Devices: devices,
}
}
Loading
Loading