kernel · sjmiller609 · Dec 21, 2025 · Dec 20, 2025 · Dec 20, 2025 · Dec 20, 2025
diff --git a/cmd/api/api/api_test.go b/cmd/api/api/api_test.go
@@ -56,7 +56,7 @@ func newTestService(t *testing.T) *ApiService {
 	}
 }
 
-// cleanupOrphanedProcesses kills Cloud Hypervisor processes from metadata files
+// cleanupOrphanedProcesses kills hypervisor processes from metadata files
 func cleanupOrphanedProcesses(t *testing.T, dataDir string) {
 	p := paths.New(dataDir)
 	guestsDir := p.GuestsDir()
@@ -77,21 +77,21 @@ func cleanupOrphanedProcesses(t *testing.T, dataDir string) {
 			continue
 		}
 
-		// Parse just the CHPID field
+		// Parse just the HypervisorPID field
 		var meta struct {
-			CHPID *int `json:"CHPID"`
+			HypervisorPID *int `json:"HypervisorPID"`
 		}
 		if err := json.Unmarshal(data, &meta); err != nil {
 			continue
 		}
 
 		// If metadata has a PID, try to kill it
-		if meta.CHPID != nil {
-			pid := *meta.CHPID
+		if meta.HypervisorPID != nil {
+			pid := *meta.HypervisorPID
 
 			// Check if process exists
 			if err := syscall.Kill(pid, 0); err == nil {
-				t.Logf("Cleaning up orphaned Cloud Hypervisor process: PID %d", pid)
+				t.Logf("Cleaning up orphaned hypervisor process: PID %d", pid)
 				syscall.Kill(pid, syscall.SIGKILL)
 			}
 		}

diff --git a/lib/hypervisor/README.md b/lib/hypervisor/README.md
@@ -0,0 +1,35 @@
+# Hypervisor Abstraction
+
+Provides a common interface for VM management across different hypervisors.
+
+## Purpose
+
+Hypeman originally supported only Cloud Hypervisor. This abstraction layer allows supporting multiple hypervisors (e.g., QEMU) through a unified interface, enabling:
+
+- **Hypervisor choice per instance** - Different instances can use different hypervisors
+- **Feature parity where possible** - Common operations work the same way
+- **Graceful degradation** - Features unsupported by a hypervisor can be detected and handled
+
+## How It Works
+
+The abstraction defines two key interfaces:
+
+1. **Hypervisor** - VM lifecycle operations (create, boot, pause, resume, snapshot, restore, shutdown)
+2. **ProcessManager** - Hypervisor process lifecycle (start binary, get binary path)
+
+Each hypervisor implementation translates the generic configuration and operations to its native format. For example, Cloud Hypervisor uses an HTTP API over a Unix socket, while QEMU would use QMP.
+
+Before using optional features, callers check capabilities:
+
+```go
+if hv.Capabilities().SupportsSnapshot {
+    hv.Snapshot(ctx, path)
+}
+```
+
+## Hypervisor Switching
+
+Instances store their hypervisor type in metadata. An instance can switch hypervisors only when stopped (no running VM, no snapshot), since:
+
+- Disk images are hypervisor-agnostic
+- Snapshots are hypervisor-specific and cannot be restored by a different hypervisor
diff --git a/lib/hypervisor/cloudhypervisor/cloudhypervisor.go b/lib/hypervisor/cloudhypervisor/cloudhypervisor.go
@@ -0,0 +1,245 @@
+// Package cloudhypervisor implements the hypervisor.Hypervisor interface
+// for Cloud Hypervisor VMM.
+package cloudhypervisor
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"github.com/onkernel/hypeman/lib/hypervisor"
+	"github.com/onkernel/hypeman/lib/vmm"
+)
+
+// CloudHypervisor implements hypervisor.Hypervisor for Cloud Hypervisor VMM.
+type CloudHypervisor struct {
+	client *vmm.VMM
+}
+
+// New creates a new Cloud Hypervisor client for an existing VMM socket.
+func New(socketPath string) (*CloudHypervisor, error) {
+	client, err := vmm.NewVMM(socketPath)
+	if err != nil {
+		return nil, fmt.Errorf("create vmm client: %w", err)
+	}
+	return &CloudHypervisor{
+		client: client,
+	}, nil
+}
+
+// Capabilities returns the features supported by Cloud Hypervisor.
+func (c *CloudHypervisor) Capabilities() hypervisor.Capabilities {
+	return hypervisor.Capabilities{
+		SupportsSnapshot:       true,
+		SupportsHotplugMemory:  true,
+		SupportsPause:          true,
+		SupportsVsock:          true,
+		SupportsGPUPassthrough: true,
+	}
+}
+
+// CreateVM configures the VM in Cloud Hypervisor.
+func (c *CloudHypervisor) CreateVM(ctx context.Context, config hypervisor.VMConfig) error {
+	vmConfig := ToVMConfig(config)
+	resp, err := c.client.CreateVMWithResponse(ctx, vmConfig)
+	if err != nil {
+		return fmt.Errorf("create vm: %w", err)
+	}
+	if resp.StatusCode() != 204 {
+		return fmt.Errorf("create vm failed with status %d: %s", resp.StatusCode(), string(resp.Body))
+	}
+	return nil
+}
+
+// BootVM starts the configured VM.
+func (c *CloudHypervisor) BootVM(ctx context.Context) error {
+	resp, err := c.client.BootVMWithResponse(ctx)
+	if err != nil {
+		return fmt.Errorf("boot vm: %w", err)
+	}
+	if resp.StatusCode() != 204 {
+		return fmt.Errorf("boot vm failed with status %d: %s", resp.StatusCode(), string(resp.Body))
+	}
+	return nil
+}
+
+// DeleteVM removes the VM configuration from Cloud Hypervisor.
+func (c *CloudHypervisor) DeleteVM(ctx context.Context) error {
+	resp, err := c.client.DeleteVMWithResponse(ctx)
+	if err != nil {
+		return fmt.Errorf("delete vm: %w", err)
+	}
+	if resp.StatusCode() != 204 {
+		return fmt.Errorf("delete vm failed with status %d: %s", resp.StatusCode(), string(resp.Body))
+	}
+	return nil
+}
+
+// Shutdown stops the VMM process gracefully.
+func (c *CloudHypervisor) Shutdown(ctx context.Context) error {
+	resp, err := c.client.ShutdownVMMWithResponse(ctx)
+	if err != nil {
+		return fmt.Errorf("shutdown vmm: %w", err)
+	}
+	// ShutdownVMM may return various codes, 204 is success
+	if resp.StatusCode() != 204 {
+		return fmt.Errorf("shutdown vmm failed with status %d", resp.StatusCode())
+	}
+	return nil
+}
+
+// GetVMInfo returns current VM state.
+func (c *CloudHypervisor) GetVMInfo(ctx context.Context) (*hypervisor.VMInfo, error) {
+	resp, err := c.client.GetVmInfoWithResponse(ctx)
+	if err != nil {
+		return nil, fmt.Errorf("get vm info: %w", err)
+	}
+	if resp.StatusCode() != 200 || resp.JSON200 == nil {
+		return nil, fmt.Errorf("get vm info failed with status %d", resp.StatusCode())
+	}
+
+	// Map Cloud Hypervisor state to hypervisor.VMState
+	var state hypervisor.VMState
+	switch resp.JSON200.State {
+	case vmm.Created:
+		state = hypervisor.StateCreated
+	case vmm.Running:
+		state = hypervisor.StateRunning
+	case vmm.Paused:
+		state = hypervisor.StatePaused
+	case vmm.Shutdown:
+		state = hypervisor.StateShutdown
+	default:
+		return nil, fmt.Errorf("unknown vm state: %s", resp.JSON200.State)
+	}
+
+	return &hypervisor.VMInfo{
+		State:            state,
+		MemoryActualSize: resp.JSON200.MemoryActualSize,
+	}, nil
+}
+
+// Pause suspends VM execution.
+func (c *CloudHypervisor) Pause(ctx context.Context) error {
+	resp, err := c.client.PauseVMWithResponse(ctx)
+	if err != nil {
+		return fmt.Errorf("pause vm: %w", err)
+	}
+	if resp.StatusCode() != 204 {
+		return fmt.Errorf("pause vm failed with status %d", resp.StatusCode())
+	}
+	return nil
+}
+
+// Resume continues VM execution.
+func (c *CloudHypervisor) Resume(ctx context.Context) error {
+	resp, err := c.client.ResumeVMWithResponse(ctx)
+	if err != nil {
+		return fmt.Errorf("resume vm: %w", err)
+	}
+	if resp.StatusCode() != 204 {
+		return fmt.Errorf("resume vm failed with status %d", resp.StatusCode())
+	}
+	return nil
+}
+
+// Snapshot creates a VM snapshot.
+func (c *CloudHypervisor) Snapshot(ctx context.Context, destPath string) error {
+	snapshotURL := "file://" + destPath
+	snapshotConfig := vmm.VmSnapshotConfig{DestinationUrl: &snapshotURL}
+	resp, err := c.client.PutVmSnapshotWithResponse(ctx, snapshotConfig)
+	if err != nil {
+		return fmt.Errorf("snapshot: %w", err)
+	}
+	if resp.StatusCode() != 204 {
+		return fmt.Errorf("snapshot failed with status %d", resp.StatusCode())
+	}
+	return nil
+}
+
+// Restore loads a VM from snapshot.
+func (c *CloudHypervisor) Restore(ctx context.Context, sourcePath string) error {
+	sourceURL := "file://" + sourcePath
+	restoreConfig := vmm.RestoreConfig{
+		SourceUrl: sourceURL,
+		Prefault:  ptr(false),
+	}
+	resp, err := c.client.PutVmRestoreWithResponse(ctx, restoreConfig)
+	if err != nil {
+		return fmt.Errorf("restore: %w", err)
+	}
+	if resp.StatusCode() != 204 {
+		return fmt.Errorf("restore failed with status %d", resp.StatusCode())
+	}
+	return nil
+}
+
+// ResizeMemory changes the VM's memory allocation.
+func (c *CloudHypervisor) ResizeMemory(ctx context.Context, bytes int64) error {
+	resizeConfig := vmm.VmResize{DesiredRam: &bytes}
+	resp, err := c.client.PutVmResizeWithResponse(ctx, resizeConfig)
+	if err != nil {
+		return fmt.Errorf("resize memory: %w", err)
+	}
+	if resp.StatusCode() != 204 {
+		return fmt.Errorf("resize memory failed with status %d", resp.StatusCode())
+	}
+	return nil
+}
+
+// ResizeMemoryAndWait changes the VM's memory allocation and waits for it to stabilize.
+// It polls until the actual memory size stabilizes (stops changing) or timeout is reached.
+func (c *CloudHypervisor) ResizeMemoryAndWait(ctx context.Context, bytes int64, timeout time.Duration) error {
+	// First, request the resize
+	if err := c.ResizeMemory(ctx, bytes); err != nil {
+		return err
+	}
+
+	// Poll until memory stabilizes
+	const pollInterval = 20 * time.Millisecond
+	deadline := time.Now().Add(timeout)
+
+	var lastSize int64 = -1
+	stableCount := 0
+	const requiredStableChecks = 3 // Require 3 consecutive stable readings
+
+	for time.Now().Before(deadline) {
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		default:
+		}
+
+		info, err := c.GetVMInfo(ctx)
+		if err != nil {
+			return fmt.Errorf("poll memory size: %w", err)
+		}
+
+		if info.MemoryActualSize == nil {
+			// No memory info available, just return after resize
+			return nil
+		}
+
+		currentSize := *info.MemoryActualSize
+
+		if currentSize == lastSize {
+			stableCount++
+			if stableCount >= requiredStableChecks {
+				// Memory has stabilized
+				return nil
+			}
+		} else {
+			stableCount = 0
+			lastSize = currentSize
+		}
+
+		time.Sleep(pollInterval)
+	}
+
+	// Timeout reached, but resize was requested successfully
+	return nil
+}
+
+func ptr[T any](v T) *T {
+	return &v
+}