From 52982d0ec71cce51a0226a86a9803b3b362931f4 Mon Sep 17 00:00:00 2001
From: Steven Miller <sjmiller609@gmail.com>
Date: Sat, 20 Dec 2025 13:39:50 -0500
Subject: [PATCH 1/4] Move vmm calls behind hypervisor interface

---
 cmd/api/api/api_test.go                       |  12 +-
 .../cloudhypervisor/cloudhypervisor.go        | 190 +++++++++++++
 lib/hypervisor/cloudhypervisor/config.go      | 112 ++++++++
 lib/hypervisor/cloudhypervisor/process.go     |  48 ++++
 lib/hypervisor/config.go                      |  73 +++++
 lib/hypervisor/hypervisor.go                  |  98 +++++++
 lib/instances/create.go                       | 250 ++++++++----------
 lib/instances/delete.go                       |  32 +--
 lib/instances/manager.go                      |  29 +-
 lib/instances/manager_test.go                 |  32 +--
 lib/instances/query.go                        |  53 ++--
 lib/instances/resource_limits_test.go         |   8 +-
 lib/instances/restore.go                      |  63 ++---
 lib/instances/standby.go                      | 175 ++++--------
 lib/instances/start.go                        |   6 +-
 lib/instances/stop.go                         |  10 +-
 lib/instances/types.go                        |  11 +-
 17 files changed, 805 insertions(+), 397 deletions(-)
 create mode 100644 lib/hypervisor/cloudhypervisor/cloudhypervisor.go
 create mode 100644 lib/hypervisor/cloudhypervisor/config.go
 create mode 100644 lib/hypervisor/cloudhypervisor/process.go
 create mode 100644 lib/hypervisor/config.go
 create mode 100644 lib/hypervisor/hypervisor.go

diff --git a/cmd/api/api/api_test.go b/cmd/api/api/api_test.go
index c5984fd..fda6127 100644
--- a/cmd/api/api/api_test.go
+++ b/cmd/api/api/api_test.go
@@ -56,7 +56,7 @@ func newTestService(t *testing.T) *ApiService {
 	}
 }
 
-// cleanupOrphanedProcesses kills Cloud Hypervisor processes from metadata files
+// cleanupOrphanedProcesses kills hypervisor processes from metadata files
 func cleanupOrphanedProcesses(t *testing.T, dataDir string) {
 	p := paths.New(dataDir)
 	guestsDir := p.GuestsDir()
@@ -77,21 +77,21 @@ func cleanupOrphanedProcesses(t *testing.T, dataDir string) {
 			continue
 		}
 
-		// Parse just the CHPID field
+		// Parse just the HypervisorPID field
 		var meta struct {
-			CHPID *int `json:"CHPID"`
+			HypervisorPID *int `json:"HypervisorPID"`
 		}
 		if err := json.Unmarshal(data, &meta); err != nil {
 			continue
 		}
 
 		// If metadata has a PID, try to kill it
-		if meta.CHPID != nil {
-			pid := *meta.CHPID
+		if meta.HypervisorPID != nil {
+			pid := *meta.HypervisorPID
 
 			// Check if process exists
 			if err := syscall.Kill(pid, 0); err == nil {
-				t.Logf("Cleaning up orphaned Cloud Hypervisor process: PID %d", pid)
+				t.Logf("Cleaning up orphaned hypervisor process: PID %d", pid)
 				syscall.Kill(pid, syscall.SIGKILL)
 			}
 		}
diff --git a/lib/hypervisor/cloudhypervisor/cloudhypervisor.go b/lib/hypervisor/cloudhypervisor/cloudhypervisor.go
new file mode 100644
index 0000000..394d628
--- /dev/null
+++ b/lib/hypervisor/cloudhypervisor/cloudhypervisor.go
@@ -0,0 +1,190 @@
+// Package cloudhypervisor implements the hypervisor.Hypervisor interface
+// for Cloud Hypervisor VMM.
+package cloudhypervisor
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/onkernel/hypeman/lib/hypervisor"
+	"github.com/onkernel/hypeman/lib/vmm"
+)
+
+// CloudHypervisor implements hypervisor.Hypervisor for Cloud Hypervisor VMM.
+type CloudHypervisor struct {
+	client     *vmm.VMM
+	socketPath string
+}
+
+// New creates a new Cloud Hypervisor client for an existing VMM socket.
+func New(socketPath string) (*CloudHypervisor, error) {
+	client, err := vmm.NewVMM(socketPath)
+	if err != nil {
+		return nil, fmt.Errorf("create vmm client: %w", err)
+	}
+	return &CloudHypervisor{
+		client:     client,
+		socketPath: socketPath,
+	}, nil
+}
+
+// Capabilities returns the features supported by Cloud Hypervisor.
+func (c *CloudHypervisor) Capabilities() hypervisor.Capabilities {
+	return hypervisor.Capabilities{
+		SupportsSnapshot:       true,
+		SupportsHotplugMemory:  true,
+		SupportsPause:          true,
+		SupportsVsock:          true,
+		SupportsGPUPassthrough: true,
+	}
+}
+
+// CreateVM configures the VM in Cloud Hypervisor.
+func (c *CloudHypervisor) CreateVM(ctx context.Context, config hypervisor.VMConfig) error {
+	vmConfig := ToVMConfig(config)
+	resp, err := c.client.CreateVMWithResponse(ctx, vmConfig)
+	if err != nil {
+		return fmt.Errorf("create vm: %w", err)
+	}
+	if resp.StatusCode() != 204 {
+		return fmt.Errorf("create vm failed with status %d: %s", resp.StatusCode(), string(resp.Body))
+	}
+	return nil
+}
+
+// BootVM starts the configured VM.
+func (c *CloudHypervisor) BootVM(ctx context.Context) error {
+	resp, err := c.client.BootVMWithResponse(ctx)
+	if err != nil {
+		return fmt.Errorf("boot vm: %w", err)
+	}
+	if resp.StatusCode() != 204 {
+		return fmt.Errorf("boot vm failed with status %d: %s", resp.StatusCode(), string(resp.Body))
+	}
+	return nil
+}
+
+// DeleteVM removes the VM configuration from Cloud Hypervisor.
+func (c *CloudHypervisor) DeleteVM(ctx context.Context) error {
+	resp, err := c.client.DeleteVMWithResponse(ctx)
+	if err != nil {
+		return fmt.Errorf("delete vm: %w", err)
+	}
+	if resp.StatusCode() != 204 {
+		return fmt.Errorf("delete vm failed with status %d: %s", resp.StatusCode(), string(resp.Body))
+	}
+	return nil
+}
+
+// Shutdown stops the VMM process gracefully.
+func (c *CloudHypervisor) Shutdown(ctx context.Context) error {
+	resp, err := c.client.ShutdownVMMWithResponse(ctx)
+	if err != nil {
+		return fmt.Errorf("shutdown vmm: %w", err)
+	}
+	// ShutdownVMM may return various codes, 204 is success
+	if resp.StatusCode() != 204 {
+		return fmt.Errorf("shutdown vmm failed with status %d", resp.StatusCode())
+	}
+	return nil
+}
+
+// GetVMInfo returns current VM state.
+func (c *CloudHypervisor) GetVMInfo(ctx context.Context) (*hypervisor.VMInfo, error) {
+	resp, err := c.client.GetVmInfoWithResponse(ctx)
+	if err != nil {
+		return nil, fmt.Errorf("get vm info: %w", err)
+	}
+	if resp.StatusCode() != 200 || resp.JSON200 == nil {
+		return nil, fmt.Errorf("get vm info failed with status %d", resp.StatusCode())
+	}
+
+	// Map Cloud Hypervisor state to hypervisor.VMState
+	var state hypervisor.VMState
+	switch resp.JSON200.State {
+	case vmm.Created:
+		state = hypervisor.StateCreated
+	case vmm.Running:
+		state = hypervisor.StateRunning
+	case vmm.Paused:
+		state = hypervisor.StatePaused
+	case vmm.Shutdown:
+		state = hypervisor.StateShutdown
+	default:
+		return nil, fmt.Errorf("unknown vm state: %s", resp.JSON200.State)
+	}
+
+	return &hypervisor.VMInfo{State: state}, nil
+}
+
+// Pause suspends VM execution.
+func (c *CloudHypervisor) Pause(ctx context.Context) error {
+	resp, err := c.client.PauseVMWithResponse(ctx)
+	if err != nil {
+		return fmt.Errorf("pause vm: %w", err)
+	}
+	if resp.StatusCode() != 204 {
+		return fmt.Errorf("pause vm failed with status %d", resp.StatusCode())
+	}
+	return nil
+}
+
+// Resume continues VM execution.
+func (c *CloudHypervisor) Resume(ctx context.Context) error {
+	resp, err := c.client.ResumeVMWithResponse(ctx)
+	if err != nil {
+		return fmt.Errorf("resume vm: %w", err)
+	}
+	if resp.StatusCode() != 204 {
+		return fmt.Errorf("resume vm failed with status %d", resp.StatusCode())
+	}
+	return nil
+}
+
+// Snapshot creates a VM snapshot.
+func (c *CloudHypervisor) Snapshot(ctx context.Context, destPath string) error {
+	snapshotURL := "file://" + destPath
+	snapshotConfig := vmm.VmSnapshotConfig{DestinationUrl: &snapshotURL}
+	resp, err := c.client.PutVmSnapshotWithResponse(ctx, snapshotConfig)
+	if err != nil {
+		return fmt.Errorf("snapshot: %w", err)
+	}
+	if resp.StatusCode() != 204 {
+		return fmt.Errorf("snapshot failed with status %d", resp.StatusCode())
+	}
+	return nil
+}
+
+// Restore loads a VM from snapshot.
+func (c *CloudHypervisor) Restore(ctx context.Context, sourcePath string) error {
+	sourceURL := "file://" + sourcePath
+	restoreConfig := vmm.RestoreConfig{
+		SourceUrl: sourceURL,
+		Prefault:  ptr(false),
+	}
+	resp, err := c.client.PutVmRestoreWithResponse(ctx, restoreConfig)
+	if err != nil {
+		return fmt.Errorf("restore: %w", err)
+	}
+	if resp.StatusCode() != 204 {
+		return fmt.Errorf("restore failed with status %d", resp.StatusCode())
+	}
+	return nil
+}
+
+// ResizeMemory changes the VM's memory allocation.
+func (c *CloudHypervisor) ResizeMemory(ctx context.Context, bytes int64) error {
+	resizeConfig := vmm.VmResize{DesiredRam: &bytes}
+	resp, err := c.client.PutVmResizeWithResponse(ctx, resizeConfig)
+	if err != nil {
+		return fmt.Errorf("resize memory: %w", err)
+	}
+	if resp.StatusCode() != 204 {
+		return fmt.Errorf("resize memory failed with status %d", resp.StatusCode())
+	}
+	return nil
+}
+
+func ptr[T any](v T) *T {
+	return &v
+}
diff --git a/lib/hypervisor/cloudhypervisor/config.go b/lib/hypervisor/cloudhypervisor/config.go
new file mode 100644
index 0000000..7dcd9a7
--- /dev/null
+++ b/lib/hypervisor/cloudhypervisor/config.go
@@ -0,0 +1,112 @@
+package cloudhypervisor
+
+import (
+	"github.com/onkernel/hypeman/lib/hypervisor"
+	"github.com/onkernel/hypeman/lib/vmm"
+)
+
+// ToVMConfig converts hypervisor.VMConfig to Cloud Hypervisor's vmm.VmConfig.
+func ToVMConfig(cfg hypervisor.VMConfig) vmm.VmConfig {
+	// Payload configuration (kernel + initramfs)
+	payload := vmm.PayloadConfig{
+		Kernel:    ptr(cfg.KernelPath),
+		Cmdline:   ptr(cfg.KernelArgs),
+		Initramfs: ptr(cfg.InitrdPath),
+	}
+
+	// CPU configuration
+	cpus := vmm.CpusConfig{
+		BootVcpus: cfg.VCPUs,
+		MaxVcpus:  cfg.VCPUs,
+	}
+
+	// Add topology if provided
+	if cfg.Topology != nil {
+		cpus.Topology = &vmm.CpuTopology{
+			ThreadsPerCore: ptr(cfg.Topology.ThreadsPerCore),
+			CoresPerDie:    ptr(cfg.Topology.CoresPerDie),
+			DiesPerPackage: ptr(cfg.Topology.DiesPerPackage),
+			Packages:       ptr(cfg.Topology.Packages),
+		}
+	}
+
+	// Memory configuration
+	memory := vmm.MemoryConfig{
+		Size: cfg.MemoryBytes,
+	}
+	if cfg.HotplugBytes > 0 {
+		memory.HotplugSize = &cfg.HotplugBytes
+		memory.HotplugMethod = ptr("VirtioMem")
+	}
+
+	// Disk configuration
+	var disks []vmm.DiskConfig
+	for _, d := range cfg.Disks {
+		disk := vmm.DiskConfig{
+			Path: ptr(d.Path),
+		}
+		if d.Readonly {
+			disk.Readonly = ptr(true)
+		}
+		disks = append(disks, disk)
+	}
+
+	// Serial console configuration
+	serial := vmm.ConsoleConfig{
+		Mode: vmm.ConsoleConfigMode("File"),
+		File: ptr(cfg.SerialLogPath),
+	}
+
+	// Console off (we use serial)
+	console := vmm.ConsoleConfig{
+		Mode: vmm.ConsoleConfigMode("Off"),
+	}
+
+	// Network configuration
+	var nets *[]vmm.NetConfig
+	if len(cfg.Networks) > 0 {
+		netConfigs := make([]vmm.NetConfig, 0, len(cfg.Networks))
+		for _, n := range cfg.Networks {
+			netConfigs = append(netConfigs, vmm.NetConfig{
+				Tap:  ptr(n.TAPDevice),
+				Ip:   ptr(n.IP),
+				Mac:  ptr(n.MAC),
+				Mask: ptr(n.Netmask),
+			})
+		}
+		nets = &netConfigs
+	}
+
+	// Vsock configuration
+	var vsock *vmm.VsockConfig
+	if cfg.VsockCID > 0 {
+		vsock = &vmm.VsockConfig{
+			Cid:    cfg.VsockCID,
+			Socket: cfg.VsockSocket,
+		}
+	}
+
+	// Device passthrough configuration
+	var devices *[]vmm.DeviceConfig
+	if len(cfg.PCIDevices) > 0 {
+		deviceConfigs := make([]vmm.DeviceConfig, 0, len(cfg.PCIDevices))
+		for _, path := range cfg.PCIDevices {
+			deviceConfigs = append(deviceConfigs, vmm.DeviceConfig{
+				Path: path,
+			})
+		}
+		devices = &deviceConfigs
+	}
+
+	return vmm.VmConfig{
+		Payload: payload,
+		Cpus:    &cpus,
+		Memory:  &memory,
+		Disks:   &disks,
+		Serial:  &serial,
+		Console: &console,
+		Net:     nets,
+		Vsock:   vsock,
+		Devices: devices,
+	}
+}
diff --git a/lib/hypervisor/cloudhypervisor/process.go b/lib/hypervisor/cloudhypervisor/process.go
new file mode 100644
index 0000000..69d0744
--- /dev/null
+++ b/lib/hypervisor/cloudhypervisor/process.go
@@ -0,0 +1,48 @@
+package cloudhypervisor
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/onkernel/hypeman/lib/hypervisor"
+	"github.com/onkernel/hypeman/lib/paths"
+	"github.com/onkernel/hypeman/lib/vmm"
+)
+
+// ProcessManager implements hypervisor.ProcessManager for Cloud Hypervisor.
+type ProcessManager struct{}
+
+// NewProcessManager creates a new Cloud Hypervisor process manager.
+func NewProcessManager() *ProcessManager {
+	return &ProcessManager{}
+}
+
+// Verify ProcessManager implements the interface
+var _ hypervisor.ProcessManager = (*ProcessManager)(nil)
+
+// StartProcess launches a Cloud Hypervisor VMM process.
+func (p *ProcessManager) StartProcess(ctx context.Context, paths *paths.Paths, version string, socketPath string) (int, error) {
+	chVersion := vmm.CHVersion(version)
+	if !vmm.IsVersionSupported(chVersion) {
+		return 0, fmt.Errorf("unsupported cloud-hypervisor version: %s", version)
+	}
+	return vmm.StartProcess(ctx, paths, chVersion, socketPath)
+}
+
+// StartProcessWithArgs launches a Cloud Hypervisor VMM process with extra arguments.
+func (p *ProcessManager) StartProcessWithArgs(ctx context.Context, paths *paths.Paths, version string, socketPath string, extraArgs []string) (int, error) {
+	chVersion := vmm.CHVersion(version)
+	if !vmm.IsVersionSupported(chVersion) {
+		return 0, fmt.Errorf("unsupported cloud-hypervisor version: %s", version)
+	}
+	return vmm.StartProcessWithArgs(ctx, paths, chVersion, socketPath, extraArgs)
+}
+
+// GetBinaryPath returns the path to the Cloud Hypervisor binary.
+func (p *ProcessManager) GetBinaryPath(paths *paths.Paths, version string) (string, error) {
+	chVersion := vmm.CHVersion(version)
+	if !vmm.IsVersionSupported(chVersion) {
+		return "", fmt.Errorf("unsupported cloud-hypervisor version: %s", version)
+	}
+	return vmm.GetBinaryPath(paths, chVersion)
+}
diff --git a/lib/hypervisor/config.go b/lib/hypervisor/config.go
new file mode 100644
index 0000000..9d56ebb
--- /dev/null
+++ b/lib/hypervisor/config.go
@@ -0,0 +1,73 @@
+package hypervisor
+
+// VMConfig is the hypervisor-agnostic VM configuration.
+// Each hypervisor implementation translates this to its native format.
+type VMConfig struct {
+	// Compute resources
+	VCPUs        int
+	MemoryBytes  int64
+	HotplugBytes int64
+	Topology     *CPUTopology
+
+	// Storage
+	Disks []DiskConfig
+
+	// Network
+	Networks []NetworkConfig
+
+	// Console
+	SerialLogPath string
+
+	// Vsock
+	VsockCID    int64
+	VsockSocket string
+
+	// PCI device passthrough (GPU, etc.)
+	PCIDevices []string
+
+	// Boot configuration
+	KernelPath string
+	InitrdPath string
+	KernelArgs string
+}
+
+// CPUTopology defines the virtual CPU topology
+type CPUTopology struct {
+	ThreadsPerCore int
+	CoresPerDie    int
+	DiesPerPackage int
+	Packages       int
+}
+
+// DiskConfig represents a disk attached to the VM
+type DiskConfig struct {
+	Path     string
+	Readonly bool
+}
+
+// NetworkConfig represents a network interface attached to the VM
+type NetworkConfig struct {
+	TAPDevice string
+	IP        string
+	MAC       string
+	Netmask   string
+}
+
+// VMInfo contains current VM state information
+type VMInfo struct {
+	State VMState
+}
+
+// VMState represents the VM execution state
+type VMState string
+
+const (
+	// StateCreated means the VM is configured but not running
+	StateCreated VMState = "created"
+	// StateRunning means the VM is actively executing
+	StateRunning VMState = "running"
+	// StatePaused means the VM execution is suspended
+	StatePaused VMState = "paused"
+	// StateShutdown means the VM has stopped but VMM exists
+	StateShutdown VMState = "shutdown"
+)
diff --git a/lib/hypervisor/hypervisor.go b/lib/hypervisor/hypervisor.go
new file mode 100644
index 0000000..7b1c0fb
--- /dev/null
+++ b/lib/hypervisor/hypervisor.go
@@ -0,0 +1,98 @@
+// Package hypervisor provides an abstraction layer for virtual machine managers.
+// This allows the instances package to work with different hypervisors
+// (e.g., Cloud Hypervisor, QEMU) through a common interface.
+package hypervisor
+
+import (
+	"context"
+
+	"github.com/onkernel/hypeman/lib/paths"
+)
+
+// Type identifies the hypervisor implementation
+type Type string
+
+const (
+	// TypeCloudHypervisor is the Cloud Hypervisor VMM
+	TypeCloudHypervisor Type = "cloud-hypervisor"
+	// Future: TypeQEMU Type = "qemu"
+)
+
+// Hypervisor defines the interface for VM management operations.
+// All hypervisor implementations must implement this interface.
+type Hypervisor interface {
+	// CreateVM configures the VM with the given configuration.
+	// The VM is not started yet after this call.
+	CreateVM(ctx context.Context, config VMConfig) error
+
+	// BootVM starts the configured VM.
+	// Must be called after CreateVM.
+	BootVM(ctx context.Context) error
+
+	// DeleteVM removes the VM configuration.
+	// The VMM process may still be running after this call.
+	DeleteVM(ctx context.Context) error
+
+	// Shutdown stops the VMM process gracefully.
+	Shutdown(ctx context.Context) error
+
+	// GetVMInfo returns current VM state information.
+	GetVMInfo(ctx context.Context) (*VMInfo, error)
+
+	// Pause suspends VM execution.
+	// Check Capabilities().SupportsPause before calling.
+	Pause(ctx context.Context) error
+
+	// Resume continues VM execution after pause.
+	// Check Capabilities().SupportsPause before calling.
+	Resume(ctx context.Context) error
+
+	// Snapshot creates a VM snapshot at the given path.
+	// Check Capabilities().SupportsSnapshot before calling.
+	Snapshot(ctx context.Context, destPath string) error
+
+	// Restore loads a VM from a snapshot at the given path.
+	// Check Capabilities().SupportsSnapshot before calling.
+	Restore(ctx context.Context, sourcePath string) error
+
+	// ResizeMemory changes the VM's memory allocation.
+	// Check Capabilities().SupportsHotplugMemory before calling.
+	ResizeMemory(ctx context.Context, bytes int64) error
+
+	// Capabilities returns what features this hypervisor supports.
+	Capabilities() Capabilities
+}
+
+// Capabilities indicates which optional features a hypervisor supports.
+// Callers should check these before calling optional methods.
+type Capabilities struct {
+	// SupportsSnapshot indicates if Snapshot/Restore are available
+	SupportsSnapshot bool
+
+	// SupportsHotplugMemory indicates if ResizeMemory is available
+	SupportsHotplugMemory bool
+
+	// SupportsPause indicates if Pause/Resume are available
+	SupportsPause bool
+
+	// SupportsVsock indicates if vsock communication is available
+	SupportsVsock bool
+
+	// SupportsGPUPassthrough indicates if PCI device passthrough is available
+	SupportsGPUPassthrough bool
+}
+
+// ProcessManager handles hypervisor process lifecycle.
+// This is separate from the Hypervisor interface because process management
+// happens before/after the VMM socket is available.
+type ProcessManager interface {
+	// StartProcess launches the hypervisor process.
+	// Returns the process ID of the started hypervisor.
+	StartProcess(ctx context.Context, p *paths.Paths, version string, socketPath string) (pid int, err error)
+
+	// StartProcessWithArgs launches the hypervisor process with extra arguments.
+	StartProcessWithArgs(ctx context.Context, p *paths.Paths, version string, socketPath string, extraArgs []string) (pid int, err error)
+
+	// GetBinaryPath returns the path to the hypervisor binary, extracting if needed.
+	GetBinaryPath(p *paths.Paths, version string) (string, error)
+}
diff --git a/lib/instances/create.go b/lib/instances/create.go
index f133c0f..71e6ded 100644
--- a/lib/instances/create.go
+++ b/lib/instances/create.go
@@ -10,6 +10,7 @@ import (
 
 	"github.com/nrednav/cuid2"
 	"github.com/onkernel/hypeman/lib/devices"
+	"github.com/onkernel/hypeman/lib/hypervisor"
 	"github.com/onkernel/hypeman/lib/images"
 	"github.com/onkernel/hypeman/lib/logger"
 	"github.com/onkernel/hypeman/lib/network"
@@ -257,25 +258,26 @@ func (m *manager) createInstance(
 
 	// 10. Create instance metadata
 	stored := &StoredMetadata{
-		Id:             id,
-		Name:           req.Name,
-		Image:          req.Image,
-		Size:           size,
-		HotplugSize:    hotplugSize,
-		OverlaySize:    overlaySize,
-		Vcpus:          vcpus,
-		Env:            req.Env,
-		NetworkEnabled: req.NetworkEnabled,
-		CreatedAt:      time.Now(),
-		StartedAt:      nil,
-		StoppedAt:      nil,
-		KernelVersion:  string(kernelVer),
-		CHVersion:      vmm.V49_0, // Use latest
-		SocketPath:     m.paths.InstanceSocket(id),
-		DataDir:        m.paths.InstanceDir(id),
-		VsockCID:       vsockCID,
-		VsockSocket:    vsockSocket,
-		Devices:        resolvedDeviceIDs,
+		Id:                id,
+		Name:              req.Name,
+		Image:             req.Image,
+		Size:              size,
+		HotplugSize:       hotplugSize,
+		OverlaySize:       overlaySize,
+		Vcpus:             vcpus,
+		Env:               req.Env,
+		NetworkEnabled:    req.NetworkEnabled,
+		CreatedAt:         time.Now(),
+		StartedAt:         nil,
+		StoppedAt:         nil,
+		KernelVersion:     string(kernelVer),
+		HypervisorType:    hypervisor.TypeCloudHypervisor,
+		HypervisorVersion: string(vmm.V49_0), // Use latest
+		SocketPath:        m.paths.InstanceSocket(id),
+		DataDir:           m.paths.InstanceDir(id),
+		VsockCID:          vsockCID,
+		VsockSocket:       vsockSocket,
+		Devices:           resolvedDeviceIDs,
 	}
 
 	// 11. Ensure directories
@@ -517,67 +519,57 @@ func (m *manager) startAndBootVM(
 ) error {
 	log := logger.FromContext(ctx)
 
+	// Get process manager for this hypervisor type
+	pm, err := m.getProcessManager(stored.HypervisorType)
+	if err != nil {
+		return fmt.Errorf("get process manager: %w", err)
+	}
+
 	// Start VMM process and capture PID
-	log.DebugContext(ctx, "starting VMM process", "instance_id", stored.Id, "version", stored.CHVersion)
-	pid, err := vmm.StartProcess(ctx, m.paths, stored.CHVersion, stored.SocketPath)
+	log.DebugContext(ctx, "starting VMM process", "instance_id", stored.Id, "hypervisor", stored.HypervisorType, "version", stored.HypervisorVersion)
+	pid, err := pm.StartProcess(ctx, m.paths, stored.HypervisorVersion, stored.SocketPath)
 	if err != nil {
 		return fmt.Errorf("start vmm: %w", err)
 	}
 
 	// Store the PID for later cleanup
-	stored.CHPID = &pid
+	stored.HypervisorPID = &pid
 	log.DebugContext(ctx, "VMM process started", "instance_id", stored.Id, "pid", pid)
 
-	// Create VMM client
-	client, err := vmm.NewVMM(stored.SocketPath)
+	// Create hypervisor client
+	hv, err := m.getHypervisor(stored.SocketPath, stored.HypervisorType)
 	if err != nil {
-		return fmt.Errorf("create vmm client: %w", err)
+		return fmt.Errorf("create hypervisor client: %w", err)
 	}
 
-	// Build VM configuration matching Cloud Hypervisor VmConfig
+	// Build VM configuration
 	inst := &Instance{StoredMetadata: *stored}
-	vmConfig, err := m.buildVMConfig(ctx, inst, imageInfo, netConfig)
+	vmConfig, err := m.buildHypervisorConfig(ctx, inst, imageInfo, netConfig)
 	if err != nil {
 		return fmt.Errorf("build vm config: %w", err)
 	}
 
-	// Create VM in VMM
-	log.DebugContext(ctx, "creating VM in VMM", "instance_id", stored.Id)
-	createResp, err := client.CreateVMWithResponse(ctx, vmConfig)
-	if err != nil {
+	// Create VM in hypervisor
+	log.DebugContext(ctx, "creating VM in hypervisor", "instance_id", stored.Id)
+	if err := hv.CreateVM(ctx, vmConfig); err != nil {
 		return fmt.Errorf("create vm: %w", err)
 	}
-	if createResp.StatusCode() != 204 {
-		// Include response body for debugging
-		body := string(createResp.Body)
-		log.ErrorContext(ctx, "create VM failed", "instance_id", stored.Id, "status", createResp.StatusCode(), "body", body)
-		return fmt.Errorf("create vm failed with status %d: %s", createResp.StatusCode(), body)
-	}
 
 	// Transition: Created → Running (boot VM)
 	log.DebugContext(ctx, "booting VM", "instance_id", stored.Id)
-	bootResp, err := client.BootVMWithResponse(ctx)
-	if err != nil {
+	if err := hv.BootVM(ctx); err != nil {
 		// Try to cleanup
-		client.DeleteVMWithResponse(ctx)
-		client.ShutdownVMMWithResponse(ctx)
+		hv.DeleteVM(ctx)
+		hv.Shutdown(ctx)
 		return fmt.Errorf("boot vm: %w", err)
 	}
-	if bootResp.StatusCode() != 204 {
-		client.DeleteVMWithResponse(ctx)
-		client.ShutdownVMMWithResponse(ctx)
-		body := string(bootResp.Body)
-		log.ErrorContext(ctx, "boot VM failed", "instance_id", stored.Id, "status", bootResp.StatusCode(), "body", body)
-		return fmt.Errorf("boot vm failed with status %d: %s", bootResp.StatusCode(), body)
-	}
 
 	// Optional: Expand memory to max if hotplug configured
-	if inst.HotplugSize > 0 {
+	if inst.HotplugSize > 0 && hv.Capabilities().SupportsHotplugMemory {
 		totalBytes := inst.Size + inst.HotplugSize
 		log.DebugContext(ctx, "expanding VM memory", "instance_id", stored.Id, "total_bytes", totalBytes)
-		resizeConfig := vmm.VmResize{DesiredRam: &totalBytes}
 		// Best effort, ignore errors
-		if resp, err := client.PutVmResizeWithResponse(ctx, resizeConfig); err != nil || resp.StatusCode() != 204 {
+		if err := hv.ResizeMemory(ctx, totalBytes); err != nil {
 			log.WarnContext(ctx, "failed to expand VM memory", "instance_id", stored.Id, "error", err)
 		}
 	}
@@ -585,140 +577,106 @@ func (m *manager) startAndBootVM(
 	return nil
 }
 
-// buildVMConfig creates the Cloud Hypervisor VmConfig
-func (m *manager) buildVMConfig(ctx context.Context, inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) (vmm.VmConfig, error) {
+// buildHypervisorConfig creates a hypervisor-agnostic VM configuration
+func (m *manager) buildHypervisorConfig(ctx context.Context, inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) (hypervisor.VMConfig, error) {
 	// Get system file paths
 	kernelPath, _ := m.systemManager.GetKernelPath(system.KernelVersion(inst.KernelVersion))
 	initrdPath, _ := m.systemManager.GetInitrdPath()
 
-	// Payload configuration (kernel + initramfs)
-	payload := vmm.PayloadConfig{
-		Kernel:    ptr(kernelPath),
-		Cmdline:   ptr("console=ttyS0"),
-		Initramfs: ptr(initrdPath),
-	}
-
-	// CPU configuration
-	cpus := vmm.CpusConfig{
-		BootVcpus: inst.Vcpus,
-		MaxVcpus:  inst.Vcpus,
-	}
-
-	// Calculate and set guest topology based on host topology
-	if topology := calculateGuestTopology(inst.Vcpus, m.hostTopology); topology != nil {
-		cpus.Topology = topology
-	}
-
-	// Memory configuration
-	memory := vmm.MemoryConfig{
-		Size: inst.Size,
-	}
-	if inst.HotplugSize > 0 {
-		memory.HotplugSize = &inst.HotplugSize
-		memory.HotplugMethod = ptr("VirtioMem") // PascalCase, not kebab-case
-	}
-
 	// Disk configuration
 	// Get rootfs disk path from image manager
 	rootfsPath, err := images.GetDiskPath(m.paths, imageInfo.Name, imageInfo.Digest)
 	if err != nil {
-		return vmm.VmConfig{}, err
+		return hypervisor.VMConfig{}, err
 	}
 
-	disks := []vmm.DiskConfig{
+	disks := []hypervisor.DiskConfig{
 		// Rootfs (from image, read-only)
-		{
-			Path:     &rootfsPath,
-			Readonly: ptr(true),
-		},
+		{Path: rootfsPath, Readonly: true},
 		// Overlay disk (writable)
-		{
-			Path: ptr(m.paths.InstanceOverlay(inst.Id)),
-		},
+		{Path: m.paths.InstanceOverlay(inst.Id), Readonly: false},
 		// Config disk (read-only)
-		{
-			Path:     ptr(m.paths.InstanceConfigDisk(inst.Id)),
-			Readonly: ptr(true),
-		},
+		{Path: m.paths.InstanceConfigDisk(inst.Id), Readonly: true},
 	}
 
 	// Add attached volumes as additional disks
-	// For overlay volumes, add both base (readonly) and overlay disk
 	for _, volAttach := range inst.Volumes {
 		volumePath := m.volumeManager.GetVolumePath(volAttach.VolumeID)
 		if volAttach.Overlay {
 			// Base volume is always read-only when overlay is enabled
-			disks = append(disks, vmm.DiskConfig{
-				Path:     &volumePath,
-				Readonly: ptr(true),
+			disks = append(disks, hypervisor.DiskConfig{
+				Path:     volumePath,
+				Readonly: true,
 			})
 			// Overlay disk is writable
 			overlayPath := m.paths.InstanceVolumeOverlay(inst.Id, volAttach.VolumeID)
-			disks = append(disks, vmm.DiskConfig{
-				Path: &overlayPath,
+			disks = append(disks, hypervisor.DiskConfig{
+				Path:     overlayPath,
+				Readonly: false,
 			})
 		} else {
-			disks = append(disks, vmm.DiskConfig{
-				Path:     &volumePath,
-				Readonly: ptr(volAttach.Readonly),
+			disks = append(disks, hypervisor.DiskConfig{
+				Path:     volumePath,
+				Readonly: volAttach.Readonly,
 			})
 		}
 	}
 
-	// Serial console configuration
-	serial := vmm.ConsoleConfig{
-		Mode: vmm.ConsoleConfigMode("File"),
-		File: ptr(m.paths.InstanceAppLog(inst.Id)),
-	}
-
-	// Console off (we use serial)
-	console := vmm.ConsoleConfig{
-		Mode: vmm.ConsoleConfigMode("Off"),
-	}
-
-	// Network configuration (optional, use passed config)
-	var nets *[]vmm.NetConfig
+	// Network configuration
+	var networks []hypervisor.NetworkConfig
 	if netConfig != nil {
-		nets = &[]vmm.NetConfig{{
-			Tap:  &netConfig.TAPDevice,
-			Ip:   &netConfig.IP,
-			Mac:  &netConfig.MAC,
-			Mask: &netConfig.Netmask,
-		}}
-	}
-
-	// vsock configuration for remote exec
-	vsock := vmm.VsockConfig{
-		Cid:    inst.VsockCID,
-		Socket: inst.VsockSocket,
+		networks = append(networks, hypervisor.NetworkConfig{
+			TAPDevice: netConfig.TAPDevice,
+			IP:        netConfig.IP,
+			MAC:       netConfig.MAC,
+			Netmask:   netConfig.Netmask,
+		})
 	}
 
 	// Device passthrough configuration (GPU, etc.)
-	var deviceConfigs *[]vmm.DeviceConfig
+	var pciDevices []string
 	if len(inst.Devices) > 0 && m.deviceManager != nil {
-		configs := make([]vmm.DeviceConfig, 0, len(inst.Devices))
 		for _, deviceID := range inst.Devices {
 			device, err := m.deviceManager.GetDevice(ctx, deviceID)
 			if err != nil {
-				return vmm.VmConfig{}, fmt.Errorf("get device %s: %w", deviceID, err)
+				return hypervisor.VMConfig{}, fmt.Errorf("get device %s: %w", deviceID, err)
 			}
-			configs = append(configs, vmm.DeviceConfig{
-				Path: devices.GetDeviceSysfsPath(device.PCIAddress),
-			})
+			pciDevices = append(pciDevices, devices.GetDeviceSysfsPath(device.PCIAddress))
 		}
-		deviceConfigs = &configs
-	}
-
-	return vmm.VmConfig{
-		Payload: payload,
-		Cpus:    &cpus,
-		Memory:  &memory,
-		Disks:   &disks,
-		Serial:  &serial,
-		Console: &console,
-		Net:     nets,
-		Vsock:   &vsock,
-		Devices: deviceConfigs,
+	}
+
+	// Build topology if available
+	var topology *hypervisor.CPUTopology
+	if hostTopo := calculateGuestTopology(inst.Vcpus, m.hostTopology); hostTopo != nil {
+		topology = &hypervisor.CPUTopology{}
+		if hostTopo.ThreadsPerCore != nil {
+			topology.ThreadsPerCore = *hostTopo.ThreadsPerCore
+		}
+		if hostTopo.CoresPerDie != nil {
+			topology.CoresPerDie = *hostTopo.CoresPerDie
+		}
+		if hostTopo.DiesPerPackage != nil {
+			topology.DiesPerPackage = *hostTopo.DiesPerPackage
+		}
+		if hostTopo.Packages != nil {
+			topology.Packages = *hostTopo.Packages
+		}
+	}
+
+	return hypervisor.VMConfig{
+		VCPUs:         inst.Vcpus,
+		MemoryBytes:   inst.Size,
+		HotplugBytes:  inst.HotplugSize,
+		Topology:      topology,
+		Disks:         disks,
+		Networks:      networks,
+		SerialLogPath: m.paths.InstanceAppLog(inst.Id),
+		VsockCID:      inst.VsockCID,
+		VsockSocket:   inst.VsockSocket,
+		PCIDevices:    pciDevices,
+		KernelPath:    kernelPath,
+		InitrdPath:    initrdPath,
+		KernelArgs:    "console=ttyS0",
 	}, nil
 }
 
diff --git a/lib/instances/delete.go b/lib/instances/delete.go
index 06bc50c..d0ddb8c 100644
--- a/lib/instances/delete.go
+++ b/lib/instances/delete.go
@@ -39,14 +39,14 @@ func (m *manager) deleteInstance(
 		}
 	}
 
-	// 3. If VMM might be running, force kill it
-	// Also attempt kill for StateUnknown since we can't be sure if VMM is running
+	// 3. If hypervisor might be running, force kill it
+	// Also attempt kill for StateUnknown since we can't be sure if hypervisor is running
 	if inst.State.RequiresVMM() || inst.State == StateUnknown {
-		log.DebugContext(ctx, "stopping VMM", "instance_id", id, "state", inst.State)
-		if err := m.killVMM(ctx, &inst); err != nil {
+		log.DebugContext(ctx, "stopping hypervisor", "instance_id", id, "state", inst.State)
+		if err := m.killHypervisor(ctx, &inst); err != nil {
 			// Log error but continue with cleanup
-			// Best effort to clean up even if VMM is unresponsive
-			log.WarnContext(ctx, "failed to kill VMM, continuing with cleanup", "instance_id", id, "error", err)
+			// Best effort to clean up even if hypervisor is unresponsive
+			log.WarnContext(ctx, "failed to kill hypervisor, continuing with cleanup", "instance_id", id, "error", err)
 		}
 	}
 
@@ -98,23 +98,23 @@ func (m *manager) deleteInstance(
 	return nil
 }
 
-// killVMM force kills the VMM process without graceful shutdown
+// killHypervisor force kills the hypervisor process without graceful shutdown
 // Used only for delete operations where we're removing all data anyway.
-// For operations that need graceful shutdown (like standby), use the VMM API directly.
-func (m *manager) killVMM(ctx context.Context, inst *Instance) error {
+// For operations that need graceful shutdown (like standby), use the hypervisor API directly.
+func (m *manager) killHypervisor(ctx context.Context, inst *Instance) error {
 	log := logger.FromContext(ctx)
 
 	// If we have a PID, kill the process immediately
-	if inst.CHPID != nil {
-		pid := *inst.CHPID
+	if inst.HypervisorPID != nil {
+		pid := *inst.HypervisorPID
 
 		// Check if process exists
 		if err := syscall.Kill(pid, 0); err == nil {
 			// Process exists - kill it immediately with SIGKILL
 			// No graceful shutdown needed since we're deleting all data
-			log.DebugContext(ctx, "killing VMM process", "instance_id", inst.Id, "pid", pid)
+			log.DebugContext(ctx, "killing hypervisor process", "instance_id", inst.Id, "pid", pid)
 			if err := syscall.Kill(pid, syscall.SIGKILL); err != nil {
-				log.WarnContext(ctx, "failed to kill VMM process", "instance_id", inst.Id, "pid", pid, "error", err)
+				log.WarnContext(ctx, "failed to kill hypervisor process", "instance_id", inst.Id, "pid", pid, "error", err)
 			}
 
 			// Wait for process to die and reap it to prevent zombies
@@ -124,16 +124,16 @@ func (m *manager) killVMM(ctx context.Context, inst *Instance) error {
 				wpid, err := syscall.Wait4(pid, &wstatus, syscall.WNOHANG, nil)
 				if err != nil || wpid == pid {
 					// Process reaped successfully or error (likely ECHILD if already reaped)
-					log.DebugContext(ctx, "VMM process killed and reaped", "instance_id", inst.Id, "pid", pid)
+					log.DebugContext(ctx, "hypervisor process killed and reaped", "instance_id", inst.Id, "pid", pid)
 					break
 				}
 				if i == 49 {
-					log.WarnContext(ctx, "VMM process did not exit in time", "instance_id", inst.Id, "pid", pid)
+					log.WarnContext(ctx, "hypervisor process did not exit in time", "instance_id", inst.Id, "pid", pid)
 				}
 				time.Sleep(100 * time.Millisecond)
 			}
 		} else {
-			log.DebugContext(ctx, "VMM process not running", "instance_id", inst.Id, "pid", pid)
+			log.DebugContext(ctx, "hypervisor process not running", "instance_id", inst.Id, "pid", pid)
 		}
 	}
 
diff --git a/lib/instances/manager.go b/lib/instances/manager.go
index 7244c01..08980e8 100644
--- a/lib/instances/manager.go
+++ b/lib/instances/manager.go
@@ -6,6 +6,8 @@ import (
 	"sync"
 
 	"github.com/onkernel/hypeman/lib/devices"
+	"github.com/onkernel/hypeman/lib/hypervisor"
+	"github.com/onkernel/hypeman/lib/hypervisor/cloudhypervisor"
 	"github.com/onkernel/hypeman/lib/images"
 	"github.com/onkernel/hypeman/lib/network"
 	"github.com/onkernel/hypeman/lib/paths"
@@ -53,6 +55,9 @@ type manager struct {
 	instanceLocks  sync.Map      // map[string]*sync.RWMutex - per-instance locks
 	hostTopology   *HostTopology // Cached host CPU topology
 	metrics        *Metrics
+
+	// Hypervisor support
+	processManagers map[hypervisor.Type]hypervisor.ProcessManager
 }
 
 // NewManager creates a new instances manager.
@@ -68,6 +73,9 @@ func NewManager(p *paths.Paths, imageManager images.Manager, systemManager syste
 		limits:         limits,
 		instanceLocks:  sync.Map{},
 		hostTopology:   detectHostTopology(), // Detect and cache host topology
+		processManagers: map[hypervisor.Type]hypervisor.ProcessManager{
+			hypervisor.TypeCloudHypervisor: cloudhypervisor.NewProcessManager(),
+		},
 	}
 
 	// Initialize metrics if meter is provided
@@ -81,6 +89,25 @@ func NewManager(p *paths.Paths, imageManager images.Manager, systemManager syste
 	return m
 }
 
+// getHypervisor creates a hypervisor client for the given socket and type.
+func (m *manager) getHypervisor(socketPath string, hvType hypervisor.Type) (hypervisor.Hypervisor, error) {
+	switch hvType {
+	case hypervisor.TypeCloudHypervisor:
+		return cloudhypervisor.New(socketPath)
+	default:
+		return nil, fmt.Errorf("unsupported hypervisor type: %s", hvType)
+	}
+}
+
+// getProcessManager returns the process manager for the given hypervisor type.
+func (m *manager) getProcessManager(hvType hypervisor.Type) (hypervisor.ProcessManager, error) {
+	pm, ok := m.processManagers[hvType]
+	if !ok {
+		return nil, fmt.Errorf("no process manager for hypervisor type: %s", hvType)
+	}
+	return pm, nil
+}
+
 // getInstanceLock returns or creates a lock for a specific instance
 func (m *manager) getInstanceLock(id string) *sync.RWMutex {
 	lock, _ := m.instanceLocks.LoadOrStore(id, &sync.RWMutex{})
@@ -224,7 +251,7 @@ func (m *manager) RotateLogs(ctx context.Context, maxBytes int64, maxFiles int)
 			m.paths.InstanceHypemanLog(inst.Id),
 		}
 		for _, logPath := range logPaths {
-		if err := rotateLogIfNeeded(logPath, maxBytes, maxFiles); err != nil {
+			if err := rotateLogIfNeeded(logPath, maxBytes, maxFiles); err != nil {
 				lastErr = err // Continue with other logs, but track error
 			}
 		}
diff --git a/lib/instances/manager_test.go b/lib/instances/manager_test.go
index 95b9917..b279533 100644
--- a/lib/instances/manager_test.go
+++ b/lib/instances/manager_test.go
@@ -19,6 +19,7 @@ import (
 	"github.com/onkernel/hypeman/cmd/api/config"
 	"github.com/onkernel/hypeman/lib/devices"
 	"github.com/onkernel/hypeman/lib/exec"
+	"github.com/onkernel/hypeman/lib/hypervisor"
 	"github.com/onkernel/hypeman/lib/images"
 	"github.com/onkernel/hypeman/lib/ingress"
 	"github.com/onkernel/hypeman/lib/network"
@@ -157,12 +158,12 @@ func cleanupOrphanedProcesses(t *testing.T, mgr *manager) {
 		}
 
 		// If metadata has a PID, try to kill it
-		if meta.CHPID != nil {
-			pid := *meta.CHPID
+		if meta.HypervisorPID != nil {
+			pid := *meta.HypervisorPID
 
 			// Check if process exists
 			if err := syscall.Kill(pid, 0); err == nil {
-				t.Logf("Cleaning up orphaned Cloud Hypervisor process: PID %d (instance %s)", pid, id)
+				t.Logf("Cleaning up orphaned hypervisor process: PID %d (instance %s)", pid, id)
 				syscall.Kill(pid, syscall.SIGKILL)
 
 				// Wait for process to exit
@@ -773,18 +774,19 @@ func TestStorageOperations(t *testing.T) {
 
 	// Create instance metadata (stored fields only)
 	stored := &StoredMetadata{
-		Id:          "test-123",
-		Name:        "test",
-		Image:       "test:latest",
-		Size:        1024 * 1024 * 1024,
-		HotplugSize: 2048 * 1024 * 1024,
-		OverlaySize: 10 * 1024 * 1024 * 1024,
-		Vcpus:       2,
-		Env:         map[string]string{"TEST": "value"},
-		CreatedAt:   time.Now(),
-		CHVersion:   vmm.V49_0,
-		SocketPath:  "/tmp/test.sock",
-		DataDir:     paths.New(tmpDir).InstanceDir("test-123"),
+		Id:                "test-123",
+		Name:              "test",
+		Image:             "test:latest",
+		Size:              1024 * 1024 * 1024,
+		HotplugSize:       2048 * 1024 * 1024,
+		OverlaySize:       10 * 1024 * 1024 * 1024,
+		Vcpus:             2,
+		Env:               map[string]string{"TEST": "value"},
+		CreatedAt:         time.Now(),
+		HypervisorType:    hypervisor.TypeCloudHypervisor,
+		HypervisorVersion: string(vmm.V49_0),
+		SocketPath:        "/tmp/test.sock",
+		DataDir:           paths.New(tmpDir).InstanceDir("test-123"),
 	}
 
 	// Ensure directories
diff --git a/lib/instances/query.go b/lib/instances/query.go
index 819ba1a..3ccf25c 100644
--- a/lib/instances/query.go
+++ b/lib/instances/query.go
@@ -6,8 +6,8 @@ import (
 	"os"
 	"path/filepath"
 
+	"github.com/onkernel/hypeman/lib/hypervisor"
 	"github.com/onkernel/hypeman/lib/logger"
-	"github.com/onkernel/hypeman/lib/vmm"
 )
 
 // stateResult holds the result of state derivation
@@ -16,8 +16,8 @@ type stateResult struct {
 	Error *string // Non-nil if state couldn't be determined
 }
 
-// deriveState determines instance state by checking socket and querying VMM.
-// Returns StateUnknown with an error message if the socket exists but VMM is unreachable.
+// deriveState determines instance state by checking socket and querying the hypervisor.
+// Returns StateUnknown with an error message if the socket exists but hypervisor is unreachable.
 func (m *manager) deriveState(ctx context.Context, stored *StoredMetadata) stateResult {
 	log := logger.FromContext(ctx)
 
@@ -30,11 +30,11 @@ func (m *manager) deriveState(ctx context.Context, stored *StoredMetadata) state
 		return stateResult{State: StateStopped}
 	}
 
-	// 2. Socket exists - query VMM for actual state
-	client, err := vmm.NewVMM(stored.SocketPath)
+	// 2. Socket exists - query hypervisor for actual state
+	hv, err := m.getHypervisor(stored.SocketPath, stored.HypervisorType)
 	if err != nil {
 		// Failed to create client - this is unexpected if socket exists
-		errMsg := fmt.Sprintf("failed to create VMM client: %v", err)
+		errMsg := fmt.Sprintf("failed to create hypervisor client: %v", err)
 		log.WarnContext(ctx, "failed to determine instance state",
 			"instance_id", stored.Id,
 			"socket", stored.SocketPath,
@@ -43,11 +43,11 @@ func (m *manager) deriveState(ctx context.Context, stored *StoredMetadata) state
 		return stateResult{State: StateUnknown, Error: &errMsg}
 	}
 
-	resp, err := client.GetVmInfoWithResponse(ctx)
+	info, err := hv.GetVMInfo(ctx)
 	if err != nil {
-		// Socket exists but VMM is unreachable - this is unexpected
-		errMsg := fmt.Sprintf("failed to query VMM: %v", err)
-		log.WarnContext(ctx, "failed to query VMM state",
+		// Socket exists but hypervisor is unreachable - this is unexpected
+		errMsg := fmt.Sprintf("failed to query hypervisor: %v", err)
+		log.WarnContext(ctx, "failed to query hypervisor state",
 			"instance_id", stored.Id,
 			"socket", stored.SocketPath,
 			"error", err,
@@ -55,35 +55,22 @@ func (m *manager) deriveState(ctx context.Context, stored *StoredMetadata) state
 		return stateResult{State: StateUnknown, Error: &errMsg}
 	}
 
-	if resp.StatusCode() != 200 || resp.JSON200 == nil {
-		// VMM returned an error - log it and return Unknown
-		body := string(resp.Body)
-		errMsg := fmt.Sprintf("VMM returned error (status %d): %s", resp.StatusCode(), body)
-		log.WarnContext(ctx, "VMM returned error response",
-			"instance_id", stored.Id,
-			"socket", stored.SocketPath,
-			"status_code", resp.StatusCode(),
-			"body", body,
-		)
-		return stateResult{State: StateUnknown, Error: &errMsg}
-	}
-
-	// 3. Map CH state to our state
-	switch resp.JSON200.State {
-	case vmm.Created:
+	// 3. Map hypervisor state to our state
+	switch info.State {
+	case hypervisor.StateCreated:
 		return stateResult{State: StateCreated}
-	case vmm.Running:
+	case hypervisor.StateRunning:
 		return stateResult{State: StateRunning}
-	case vmm.Paused:
+	case hypervisor.StatePaused:
 		return stateResult{State: StatePaused}
-	case vmm.Shutdown:
+	case hypervisor.StateShutdown:
 		return stateResult{State: StateShutdown}
 	default:
-		// Unknown CH state - log and return Unknown
-		errMsg := fmt.Sprintf("unexpected VMM state: %s", resp.JSON200.State)
-		log.WarnContext(ctx, "VMM returned unexpected state",
+		// Unknown state - log and return Unknown
+		errMsg := fmt.Sprintf("unexpected hypervisor state: %s", info.State)
+		log.WarnContext(ctx, "hypervisor returned unexpected state",
 			"instance_id", stored.Id,
-			"vmm_state", resp.JSON200.State,
+			"hypervisor_state", info.State,
 		)
 		return stateResult{State: StateUnknown, Error: &errMsg}
 	}
diff --git a/lib/instances/resource_limits_test.go b/lib/instances/resource_limits_test.go
index 91dc4a4..9392ee0 100644
--- a/lib/instances/resource_limits_test.go
+++ b/lib/instances/resource_limits_test.go
@@ -313,7 +313,7 @@ func TestAggregateLimits_EnforcedAtRuntime(t *testing.T) {
 		Image:          "docker.io/library/alpine:latest",
 		Vcpus:          1,
 		Size:           2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs)
-		HotplugSize:    512 * 1024 * 1024,     // 512MB
+		HotplugSize:    512 * 1024 * 1024,      // 512MB
 		OverlaySize:    1 * 1024 * 1024 * 1024,
 		NetworkEnabled: false,
 	})
@@ -369,10 +369,10 @@ func cleanupTestProcesses(t *testing.T, mgr *manager) {
 		return
 	}
 	for _, inst := range instances {
-		if inst.StoredMetadata.CHPID != nil {
-			pid := *inst.StoredMetadata.CHPID
+		if inst.StoredMetadata.HypervisorPID != nil {
+			pid := *inst.StoredMetadata.HypervisorPID
 			if err := syscall.Kill(pid, 0); err == nil {
-				t.Logf("Cleaning up CH process: PID %d", pid)
+				t.Logf("Cleaning up hypervisor process: PID %d", pid)
 				syscall.Kill(pid, syscall.SIGKILL)
 			}
 		}
diff --git a/lib/instances/restore.go b/lib/instances/restore.go
index 9ec95be..d4698bf 100644
--- a/lib/instances/restore.go
+++ b/lib/instances/restore.go
@@ -7,7 +7,6 @@ import (
 	"time"
 
 	"github.com/onkernel/hypeman/lib/logger"
-	"github.com/onkernel/hypeman/lib/vmm"
 	"go.opentelemetry.io/otel/trace"
 )
 
@@ -63,14 +62,11 @@ func (m *manager) restoreInstance(
 		}
 	}
 
-	// 5. Transition: Standby → Paused (start VMM + restore)
+	// 5. Transition: Standby → Paused (start hypervisor + restore)
 	log.DebugContext(ctx, "restoring from snapshot", "instance_id", id, "snapshot_dir", snapshotDir)
 	if err := m.restoreFromSnapshot(ctx, stored, snapshotDir); err != nil {
 		log.ErrorContext(ctx, "failed to restore from snapshot", "instance_id", id, "error", err)
 		// Cleanup network on failure
-		// Note: Network cleanup is explicitly called on failure paths to ensure TAP devices
-		// are removed. In production, stale TAP devices from unexpected failures (e.g.,
-		// power loss) would require manual cleanup or host reboot.
 		if stored.NetworkEnabled {
 			netAlloc, _ := m.networkManager.GetAllocation(ctx, id)
 			m.networkManager.ReleaseAllocation(ctx, netAlloc)
@@ -78,22 +74,21 @@ func (m *manager) restoreInstance(
 		return nil, err
 	}
 
-	// 6. Create client for resumed VM
-	client, err := vmm.NewVMM(stored.SocketPath)
+	// 6. Create hypervisor client for resumed VM
+	hv, err := m.getHypervisor(stored.SocketPath, stored.HypervisorType)
 	if err != nil {
-		log.ErrorContext(ctx, "failed to create VMM client", "instance_id", id, "error", err)
+		log.ErrorContext(ctx, "failed to create hypervisor client", "instance_id", id, "error", err)
 		// Cleanup network on failure
 		if stored.NetworkEnabled {
 			netAlloc, _ := m.networkManager.GetAllocation(ctx, id)
 			m.networkManager.ReleaseAllocation(ctx, netAlloc)
 		}
-		return nil, fmt.Errorf("create vmm client: %w", err)
+		return nil, fmt.Errorf("create hypervisor client: %w", err)
 	}
 
 	// 7. Transition: Paused → Running (resume)
 	log.DebugContext(ctx, "resuming VM", "instance_id", id)
-	resumeResp, err := client.ResumeVMWithResponse(ctx)
-	if err != nil || resumeResp.StatusCode() != 204 {
+	if err := hv.Resume(ctx); err != nil {
 		log.ErrorContext(ctx, "failed to resume VM", "instance_id", id, "error", err)
 		// Cleanup network on failure
 		if stored.NetworkEnabled {
@@ -129,7 +124,7 @@ func (m *manager) restoreInstance(
 	return &finalInst, nil
 }
 
-// restoreFromSnapshot starts VMM and restores from snapshot
+// restoreFromSnapshot starts the hypervisor and restores from snapshot
 func (m *manager) restoreFromSnapshot(
 	ctx context.Context,
 	stored *StoredMetadata,
@@ -137,41 +132,35 @@ func (m *manager) restoreFromSnapshot(
 ) error {
 	log := logger.FromContext(ctx)
 
-	// Start VMM process and capture PID
-	log.DebugContext(ctx, "starting VMM process for restore", "instance_id", stored.Id, "version", stored.CHVersion)
-	pid, err := vmm.StartProcess(ctx, m.paths, stored.CHVersion, stored.SocketPath)
+	// Get process manager for this hypervisor type
+	pm, err := m.getProcessManager(stored.HypervisorType)
 	if err != nil {
-		return fmt.Errorf("start vmm: %w", err)
+		return fmt.Errorf("get process manager: %w", err)
+	}
+
+	// Start hypervisor process and capture PID
+	log.DebugContext(ctx, "starting hypervisor process for restore", "instance_id", stored.Id, "hypervisor", stored.HypervisorType, "version", stored.HypervisorVersion)
+	pid, err := pm.StartProcess(ctx, m.paths, stored.HypervisorVersion, stored.SocketPath)
+	if err != nil {
+		return fmt.Errorf("start hypervisor: %w", err)
 	}
 
 	// Store the PID for later cleanup
-	stored.CHPID = &pid
-	log.DebugContext(ctx, "VMM process started", "instance_id", stored.Id, "pid", pid)
+	stored.HypervisorPID = &pid
+	log.DebugContext(ctx, "hypervisor process started", "instance_id", stored.Id, "pid", pid)
 
-	// Create client
-	client, err := vmm.NewVMM(stored.SocketPath)
+	// Create hypervisor client
+	hv, err := m.getHypervisor(stored.SocketPath, stored.HypervisorType)
 	if err != nil {
-		return fmt.Errorf("create vmm client: %w", err)
+		return fmt.Errorf("create hypervisor client: %w", err)
 	}
 
 	// Restore from snapshot
-	sourceURL := "file://" + snapshotDir
-	restoreConfig := vmm.RestoreConfig{
-		SourceUrl: sourceURL,
-		Prefault:  ptr(false), // Don't prefault pages for faster restore
-	}
-
-	log.DebugContext(ctx, "invoking VMM restore API", "instance_id", stored.Id, "source_url", sourceURL)
-	resp, err := client.PutVmRestoreWithResponse(ctx, restoreConfig)
-	if err != nil {
+	log.DebugContext(ctx, "invoking hypervisor restore API", "instance_id", stored.Id, "snapshot_dir", snapshotDir)
+	if err := hv.Restore(ctx, snapshotDir); err != nil {
 		log.ErrorContext(ctx, "restore API call failed", "instance_id", stored.Id, "error", err)
-		client.ShutdownVMMWithResponse(ctx) // Cleanup
-		return fmt.Errorf("restore api call: %w", err)
-	}
-	if resp.StatusCode() != 204 {
-		log.ErrorContext(ctx, "restore API returned error", "instance_id", stored.Id, "status", resp.StatusCode())
-		client.ShutdownVMMWithResponse(ctx) // Cleanup
-		return fmt.Errorf("restore failed with status %d", resp.StatusCode())
+		hv.Shutdown(ctx) // Cleanup
+		return fmt.Errorf("restore: %w", err)
 	}
 
 	log.DebugContext(ctx, "VM restored from snapshot successfully", "instance_id", stored.Id)
diff --git a/lib/instances/standby.go b/lib/instances/standby.go
index 8c2a262..a073066 100644
--- a/lib/instances/standby.go
+++ b/lib/instances/standby.go
@@ -6,9 +6,9 @@ import (
 	"os"
 	"time"
 
+	"github.com/onkernel/hypeman/lib/hypervisor"
 	"github.com/onkernel/hypeman/lib/logger"
 	"github.com/onkernel/hypeman/lib/network"
-	"github.com/onkernel/hypeman/lib/vmm"
 	"go.opentelemetry.io/otel/trace"
 )
 
@@ -58,46 +58,53 @@ func (m *manager) standbyInstance(
 		}
 	}
 
-	// 4. Create VMM client
-	client, err := vmm.NewVMM(inst.SocketPath)
+	// 4. Create hypervisor client
+	hv, err := m.getHypervisor(inst.SocketPath, stored.HypervisorType)
 	if err != nil {
-		log.ErrorContext(ctx, "failed to create VMM client", "instance_id", id, "error", err)
-		return nil, fmt.Errorf("create vmm client: %w", err)
+		log.ErrorContext(ctx, "failed to create hypervisor client", "instance_id", id, "error", err)
+		return nil, fmt.Errorf("create hypervisor client: %w", err)
 	}
 
-	// 5. Reduce memory to base size (virtio-mem hotplug)
-	log.DebugContext(ctx, "reducing VM memory before snapshot", "instance_id", id, "base_size", inst.Size)
-	if err := reduceMemory(ctx, client, inst.Size); err != nil {
-		// Log warning but continue - snapshot will just be larger
-		log.WarnContext(ctx, "failed to reduce memory, snapshot will be larger", "instance_id", id, "error", err)
+	// 5. Check if snapshot is supported
+	if !hv.Capabilities().SupportsSnapshot {
+		log.ErrorContext(ctx, "hypervisor does not support snapshots", "instance_id", id, "hypervisor", stored.HypervisorType)
+		return nil, fmt.Errorf("hypervisor %s does not support standby (snapshots)", stored.HypervisorType)
 	}
 
-	// 6. Transition: Running → Paused
+	// 6. Reduce memory to base size (virtio-mem hotplug) if supported
+	if hv.Capabilities().SupportsHotplugMemory {
+		log.DebugContext(ctx, "reducing VM memory before snapshot", "instance_id", id, "base_size", inst.Size)
+		if err := hv.ResizeMemory(ctx, inst.Size); err != nil {
+			// Log warning but continue - snapshot will just be larger
+			log.WarnContext(ctx, "failed to reduce memory, snapshot will be larger", "instance_id", id, "error", err)
+		}
+	}
+
+	// 7. Transition: Running → Paused
 	log.DebugContext(ctx, "pausing VM", "instance_id", id)
-	pauseResp, err := client.PauseVMWithResponse(ctx)
-	if err != nil || pauseResp.StatusCode() != 204 {
+	if err := hv.Pause(ctx); err != nil {
 		log.ErrorContext(ctx, "failed to pause VM", "instance_id", id, "error", err)
 		return nil, fmt.Errorf("pause vm failed: %w", err)
 	}
 
-	// 7. Create snapshot
+	// 8. Create snapshot
 	snapshotDir := m.paths.InstanceSnapshotLatest(id)
 	log.DebugContext(ctx, "creating snapshot", "instance_id", id, "snapshot_dir", snapshotDir)
-	if err := createSnapshot(ctx, client, snapshotDir); err != nil {
+	if err := createSnapshot(ctx, hv, snapshotDir); err != nil {
 		// Snapshot failed - try to resume VM
 		log.ErrorContext(ctx, "snapshot failed, attempting to resume VM", "instance_id", id, "error", err)
-		client.ResumeVMWithResponse(ctx)
+		hv.Resume(ctx)
 		return nil, fmt.Errorf("create snapshot: %w", err)
 	}
 
-	// 8. Stop VMM gracefully (snapshot is complete)
-	log.DebugContext(ctx, "shutting down VMM", "instance_id", id)
-	if err := m.shutdownVMM(ctx, &inst); err != nil {
+	// 9. Stop VMM gracefully (snapshot is complete)
+	log.DebugContext(ctx, "shutting down hypervisor", "instance_id", id)
+	if err := m.shutdownHypervisor(ctx, &inst); err != nil {
 		// Log but continue - snapshot was created successfully
-		log.WarnContext(ctx, "failed to shutdown VMM gracefully, snapshot still valid", "instance_id", id, "error", err)
+		log.WarnContext(ctx, "failed to shutdown hypervisor gracefully, snapshot still valid", "instance_id", id, "error", err)
 	}
 
-	// 9. Release network allocation (delete TAP device)
+	// 10. Release network allocation (delete TAP device)
 	// TAP devices with explicit Owner/Group fields do NOT auto-delete when VMM exits
 	// They must be explicitly deleted
 	if inst.NetworkEnabled {
@@ -108,10 +115,10 @@ func (m *manager) standbyInstance(
 		}
 	}
 
-	// 10. Update timestamp and clear PID (VMM no longer running)
+	// 11. Update timestamp and clear PID (hypervisor no longer running)
 	now := time.Now()
 	stored.StoppedAt = &now
-	stored.CHPID = nil
+	stored.HypervisorPID = nil
 
 	meta = &metadata{StoredMetadata: *stored}
 	if err := m.saveMetadata(meta); err != nil {
@@ -131,86 +138,8 @@ func (m *manager) standbyInstance(
 	return &finalInst, nil
 }
 
-// reduceMemory attempts to reduce VM memory to minimize snapshot size
-func reduceMemory(ctx context.Context, client *vmm.VMM, targetBytes int64) error {
-	resizeConfig := vmm.VmResize{DesiredRam: &targetBytes}
-	if resp, err := client.PutVmResizeWithResponse(ctx, resizeConfig); err != nil || resp.StatusCode() != 204 {
-		return fmt.Errorf("memory resize failed")
-	}
-
-	// Poll actual memory usage until it reaches target size
-	return pollVMMemory(ctx, client, targetBytes, 5*time.Second)
-}
-
-// pollVMMemory polls VM memory usage until it reduces and stabilizes
-func pollVMMemory(ctx context.Context, client *vmm.VMM, targetBytes int64, timeout time.Duration) error {
-	log := logger.FromContext(ctx)
-	deadline := time.Now().Add(timeout)
-
-	// Use 20ms for fast response with minimal overhead
-	const pollInterval = 20 * time.Millisecond
-	const stabilityThreshold = 3 // Memory unchanged for 3 checks = stable
-
-	var previousSize *int64
-	unchangedCount := 0
-
-	for time.Now().Before(deadline) {
-		infoResp, err := client.GetVmInfoWithResponse(ctx)
-		if err != nil {
-			time.Sleep(pollInterval)
-			continue
-		}
-
-		if infoResp.StatusCode() != 200 || infoResp.JSON200 == nil {
-			time.Sleep(pollInterval)
-			continue
-		}
-
-		actualSize := infoResp.JSON200.MemoryActualSize
-		if actualSize == nil {
-			time.Sleep(pollInterval)
-			continue
-		}
-
-		currentSize := *actualSize
-
-		// Best case: reached target or below
-		if currentSize <= targetBytes {
-			log.DebugContext(ctx, "memory reduced to target",
-				"target_mb", targetBytes/(1024*1024),
-				"actual_mb", currentSize/(1024*1024))
-			return nil
-		}
-
-		// Check if memory has stopped shrinking (stabilized above target)
-		if previousSize != nil {
-			if currentSize == *previousSize {
-				unchangedCount++
-				if unchangedCount >= stabilityThreshold {
-					// Memory has stabilized but above target
-					// Guest OS couldn't free more memory - accept this as "done"
-					log.WarnContext(ctx, "memory reduction stabilized above target",
-						"target_mb", targetBytes/(1024*1024),
-						"actual_mb", currentSize/(1024*1024),
-						"diff_mb", (currentSize-targetBytes)/(1024*1024))
-					return nil // Not an error - snapshot will just be larger
-				}
-			} else if currentSize < *previousSize {
-				// Still shrinking - reset counter
-				unchangedCount = 0
-			}
-		}
-
-		previousSize = &currentSize
-		time.Sleep(pollInterval)
-	}
-
-	// Timeout - memory never stabilized
-	return fmt.Errorf("memory reduction did not complete within %v", timeout)
-}
-
-// createSnapshot creates a Cloud Hypervisor snapshot
-func createSnapshot(ctx context.Context, client *vmm.VMM, snapshotDir string) error {
+// createSnapshot creates a snapshot using the hypervisor interface
+func createSnapshot(ctx context.Context, hv hypervisor.Hypervisor, snapshotDir string) error {
 	log := logger.FromContext(ctx)
 
 	// Remove old snapshot
@@ -221,46 +150,38 @@ func createSnapshot(ctx context.Context, client *vmm.VMM, snapshotDir string) er
 		return fmt.Errorf("create snapshot dir: %w", err)
 	}
 
-	// Create snapshot via VMM API
-	snapshotURL := "file://" + snapshotDir
-	snapshotConfig := vmm.VmSnapshotConfig{DestinationUrl: &snapshotURL}
-
-	log.DebugContext(ctx, "invoking VMM snapshot API", "snapshot_url", snapshotURL)
-	resp, err := client.PutVmSnapshotWithResponse(ctx, snapshotConfig)
-	if err != nil {
-		return fmt.Errorf("snapshot api call: %w", err)
-	}
-	if resp.StatusCode() != 204 {
-		log.ErrorContext(ctx, "snapshot API returned error", "status", resp.StatusCode())
-		return fmt.Errorf("snapshot failed with status %d", resp.StatusCode())
+	// Create snapshot via hypervisor API
+	log.DebugContext(ctx, "invoking hypervisor snapshot API", "snapshot_dir", snapshotDir)
+	if err := hv.Snapshot(ctx, snapshotDir); err != nil {
+		return fmt.Errorf("snapshot: %w", err)
 	}
 
 	log.DebugContext(ctx, "snapshot created successfully", "snapshot_dir", snapshotDir)
 	return nil
 }
 
-// shutdownVMM gracefully shuts down the VMM process via API
-func (m *manager) shutdownVMM(ctx context.Context, inst *Instance) error {
+// shutdownHypervisor gracefully shuts down the hypervisor process via API
+func (m *manager) shutdownHypervisor(ctx context.Context, inst *Instance) error {
 	log := logger.FromContext(ctx)
 
-	// Try to connect to VMM
-	client, err := vmm.NewVMM(inst.SocketPath)
+	// Try to connect to hypervisor
+	hv, err := m.getHypervisor(inst.SocketPath, inst.HypervisorType)
 	if err != nil {
-		// Can't connect - VMM might already be stopped
-		log.DebugContext(ctx, "could not connect to VMM, may already be stopped", "instance_id", inst.Id)
+		// Can't connect - hypervisor might already be stopped
+		log.DebugContext(ctx, "could not connect to hypervisor, may already be stopped", "instance_id", inst.Id)
 		return nil
 	}
 
 	// Try graceful shutdown
-	log.DebugContext(ctx, "sending shutdown command to VMM", "instance_id", inst.Id)
-	client.ShutdownVMMWithResponse(ctx)
+	log.DebugContext(ctx, "sending shutdown command to hypervisor", "instance_id", inst.Id)
+	hv.Shutdown(ctx)
 
 	// Wait for process to exit
-	if inst.CHPID != nil {
-		if !WaitForProcessExit(*inst.CHPID, 2*time.Second) {
-			log.WarnContext(ctx, "VMM did not exit gracefully in time", "instance_id", inst.Id, "pid", *inst.CHPID)
+	if inst.HypervisorPID != nil {
+		if !WaitForProcessExit(*inst.HypervisorPID, 2*time.Second) {
+			log.WarnContext(ctx, "hypervisor did not exit gracefully in time", "instance_id", inst.Id, "pid", *inst.HypervisorPID)
 		} else {
-			log.DebugContext(ctx, "VMM shutdown gracefully", "instance_id", inst.Id, "pid", *inst.CHPID)
+			log.DebugContext(ctx, "hypervisor shutdown gracefully", "instance_id", inst.Id, "pid", *inst.HypervisorPID)
 		}
 	}
 
diff --git a/lib/instances/start.go b/lib/instances/start.go
index 1687d59..b57fd12 100644
--- a/lib/instances/start.go
+++ b/lib/instances/start.go
@@ -45,7 +45,7 @@ func (m *manager) startInstance(
 		return nil, fmt.Errorf("%w: cannot start from state %s, must be Stopped", ErrInvalidState, inst.State)
 	}
 
-	// 3. Get image info (needed for buildVMConfig)
+	// 3. Get image info (needed for buildHypervisorConfig)
 	log.DebugContext(ctx, "getting image info", "instance_id", id, "image", stored.Image)
 	imageInfo, err := m.imageManager.GetImage(ctx, stored.Image)
 	if err != nil {
@@ -89,8 +89,8 @@ func (m *manager) startInstance(
 		return nil, fmt.Errorf("create config disk: %w", err)
 	}
 
-	// 6. Start VMM and boot VM (reuses logic from create)
-	log.InfoContext(ctx, "starting VMM and booting VM", "instance_id", id)
+	// 6. Start hypervisor and boot VM (reuses logic from create)
+	log.InfoContext(ctx, "starting hypervisor and booting VM", "instance_id", id)
 	if err := m.startAndBootVM(ctx, stored, imageInfo, netConfig); err != nil {
 		log.ErrorContext(ctx, "failed to start and boot VM", "instance_id", id, "error", err)
 		return nil, err
diff --git a/lib/instances/stop.go b/lib/instances/stop.go
index 47336ca..1d7ee11 100644
--- a/lib/instances/stop.go
+++ b/lib/instances/stop.go
@@ -54,12 +54,12 @@ func (m *manager) stopInstance(
 		}
 	}
 
-	// 4. Shutdown VMM process
+	// 4. Shutdown hypervisor process
 	// TODO: Add graceful shutdown via vsock signal to allow app to clean up
-	log.DebugContext(ctx, "shutting down VMM", "instance_id", id)
-	if err := m.shutdownVMM(ctx, &inst); err != nil {
+	log.DebugContext(ctx, "shutting down hypervisor", "instance_id", id)
+	if err := m.shutdownHypervisor(ctx, &inst); err != nil {
 		// Log but continue - try to clean up anyway
-		log.WarnContext(ctx, "failed to shutdown VMM gracefully", "instance_id", id, "error", err)
+		log.WarnContext(ctx, "failed to shutdown hypervisor gracefully", "instance_id", id, "error", err)
 	}
 
 	// 5. Release network allocation (delete TAP device)
@@ -74,7 +74,7 @@ func (m *manager) stopInstance(
 	// 6. Update metadata (clear PID, set StoppedAt)
 	now := time.Now()
 	stored.StoppedAt = &now
-	stored.CHPID = nil
+	stored.HypervisorPID = nil
 
 	meta = &metadata{StoredMetadata: *stored}
 	if err := m.saveMetadata(meta); err != nil {
diff --git a/lib/instances/types.go b/lib/instances/types.go
index 6320c23..0f3356f 100644
--- a/lib/instances/types.go
+++ b/lib/instances/types.go
@@ -3,7 +3,7 @@ package instances
 import (
 	"time"
 
-	"github.com/onkernel/hypeman/lib/vmm"
+	"github.com/onkernel/hypeman/lib/hypervisor"
 )
 
 // State represents the instance state
@@ -56,9 +56,12 @@ type StoredMetadata struct {
 	StoppedAt *time.Time // Last time VM was stopped
 
 	// Versions
-	KernelVersion string        // Kernel version (e.g., "ch-v6.12.9")
-	CHVersion     vmm.CHVersion // Cloud Hypervisor version
-	CHPID         *int          // Cloud Hypervisor process ID (may be stale after host restart)
+	KernelVersion string // Kernel version (e.g., "ch-v6.12.9")
+
+	// Hypervisor configuration
+	HypervisorType    hypervisor.Type // Hypervisor type (e.g., "cloud-hypervisor")
+	HypervisorVersion string          // Hypervisor version (e.g., "v49.0")
+	HypervisorPID     *int            // Hypervisor process ID (may be stale after host restart)
 
 	// Paths
 	SocketPath string // Path to API socket

From 8ec12514079b3dc616ee9be66624fa5d9c0b026b Mon Sep 17 00:00:00 2001
From: Steven Miller <sjmiller609@gmail.com>
Date: Sat, 20 Dec 2025 13:42:16 -0500
Subject: [PATCH 2/4] Add README

---
 lib/hypervisor/README.md | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 lib/hypervisor/README.md

diff --git a/lib/hypervisor/README.md b/lib/hypervisor/README.md
new file mode 100644
index 0000000..2bab53d
--- /dev/null
+++ b/lib/hypervisor/README.md
@@ -0,0 +1,35 @@
+# Hypervisor Abstraction
+
+Provides a common interface for VM management across different hypervisors.
+
+## Purpose
+
+Hypeman originally supported only Cloud Hypervisor. This abstraction layer allows supporting multiple hypervisors (e.g., QEMU) through a unified interface, enabling:
+
+- **Hypervisor choice per instance** - Different instances can use different hypervisors
+- **Feature parity where possible** - Common operations work the same way
+- **Graceful degradation** - Features unsupported by a hypervisor can be detected and handled
+
+## How It Works
+
+The abstraction defines two key interfaces:
+
+1. **Hypervisor** - VM lifecycle operations (create, boot, pause, resume, snapshot, restore, shutdown)
+2. **ProcessManager** - Hypervisor process lifecycle (start binary, get binary path)
+
+Each hypervisor implementation translates the generic configuration and operations to its native format. For example, Cloud Hypervisor uses an HTTP API over a Unix socket, while QEMU would use QMP.
+
+Before using optional features, callers check capabilities:
+
+```go
+if hv.Capabilities().SupportsSnapshot {
+    hv.Snapshot(ctx, path)
+}
+```
+
+## Hypervisor Switching
+
+Instances store their hypervisor type in metadata. An instance can switch hypervisors only when stopped (no running VM, no snapshot), since:
+
+- Disk images are hypervisor-agnostic
+- Snapshots are hypervisor-specific and cannot be restored by a different hypervisor

From 1230c4e3946fc91603945f9f1a6db8cb3c0ce027 Mon Sep 17 00:00:00 2001
From: Steven Miller <sjmiller609@gmail.com>
Date: Sat, 20 Dec 2025 13:56:25 -0500
Subject: [PATCH 3/4] address code reviews

---
 .../cloudhypervisor/cloudhypervisor.go        | 65 +++++++++++++++++--
 lib/hypervisor/cloudhypervisor/config.go      |  2 +-
 lib/hypervisor/config.go                      |  3 +-
 lib/hypervisor/hypervisor.go                  |  6 ++
 lib/instances/README.md                       |  7 +-
 lib/instances/create.go                       |  2 +-
 lib/instances/standby.go                      |  3 +-
 lib/instances/storage.go                      | 16 ++---
 lib/network/derive.go                         |  3 +-
 lib/paths/paths.go                            |  6 +-
 10 files changed, 89 insertions(+), 24 deletions(-)

diff --git a/lib/hypervisor/cloudhypervisor/cloudhypervisor.go b/lib/hypervisor/cloudhypervisor/cloudhypervisor.go
index 394d628..effcc59 100644
--- a/lib/hypervisor/cloudhypervisor/cloudhypervisor.go
+++ b/lib/hypervisor/cloudhypervisor/cloudhypervisor.go
@@ -5,6 +5,7 @@ package cloudhypervisor
 import (
 	"context"
 	"fmt"
+	"time"
 
 	"github.com/onkernel/hypeman/lib/hypervisor"
 	"github.com/onkernel/hypeman/lib/vmm"
@@ -12,8 +13,7 @@ import (
 
 // CloudHypervisor implements hypervisor.Hypervisor for Cloud Hypervisor VMM.
 type CloudHypervisor struct {
-	client     *vmm.VMM
-	socketPath string
+	client *vmm.VMM
 }
 
 // New creates a new Cloud Hypervisor client for an existing VMM socket.
@@ -23,8 +23,7 @@ func New(socketPath string) (*CloudHypervisor, error) {
 		return nil, fmt.Errorf("create vmm client: %w", err)
 	}
 	return &CloudHypervisor{
-		client:     client,
-		socketPath: socketPath,
+		client: client,
 	}, nil
 }
 
@@ -114,7 +113,10 @@ func (c *CloudHypervisor) GetVMInfo(ctx context.Context) (*hypervisor.VMInfo, er
 		return nil, fmt.Errorf("unknown vm state: %s", resp.JSON200.State)
 	}
 
-	return &hypervisor.VMInfo{State: state}, nil
+	return &hypervisor.VMInfo{
+		State:            state,
+		MemoryActualSize: resp.JSON200.MemoryActualSize,
+	}, nil
 }
 
 // Pause suspends VM execution.
@@ -185,6 +187,59 @@ func (c *CloudHypervisor) ResizeMemory(ctx context.Context, bytes int64) error {
 	return nil
 }
 
+// ResizeMemoryAndWait changes the VM's memory allocation and waits for it to stabilize.
+// It polls until the actual memory size stabilizes (stops changing) or timeout is reached.
+func (c *CloudHypervisor) ResizeMemoryAndWait(ctx context.Context, bytes int64, timeout time.Duration) error {
+	// First, request the resize
+	if err := c.ResizeMemory(ctx, bytes); err != nil {
+		return err
+	}
+
+	// Poll until memory stabilizes
+	const pollInterval = 20 * time.Millisecond
+	deadline := time.Now().Add(timeout)
+
+	var lastSize int64 = -1
+	stableCount := 0
+	const requiredStableChecks = 3 // Require 3 consecutive stable readings
+
+	for time.Now().Before(deadline) {
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		default:
+		}
+
+		info, err := c.GetVMInfo(ctx)
+		if err != nil {
+			return fmt.Errorf("poll memory size: %w", err)
+		}
+
+		if info.MemoryActualSize == nil {
+			// No memory info available, just return after resize
+			return nil
+		}
+
+		currentSize := *info.MemoryActualSize
+
+		if currentSize == lastSize {
+			stableCount++
+			if stableCount >= requiredStableChecks {
+				// Memory has stabilized
+				return nil
+			}
+		} else {
+			stableCount = 0
+			lastSize = currentSize
+		}
+
+		time.Sleep(pollInterval)
+	}
+
+	// Timeout reached, but resize was requested successfully
+	return nil
+}
+
 func ptr[T any](v T) *T {
 	return &v
 }
diff --git a/lib/hypervisor/cloudhypervisor/config.go b/lib/hypervisor/cloudhypervisor/config.go
index 7dcd9a7..22d4a9a 100644
--- a/lib/hypervisor/cloudhypervisor/config.go
+++ b/lib/hypervisor/cloudhypervisor/config.go
@@ -40,7 +40,7 @@ func ToVMConfig(cfg hypervisor.VMConfig) vmm.VmConfig {
 	}
 
 	// Disk configuration
-	var disks []vmm.DiskConfig
+	disks := make([]vmm.DiskConfig, 0, len(cfg.Disks))
 	for _, d := range cfg.Disks {
 		disk := vmm.DiskConfig{
 			Path: ptr(d.Path),
diff --git a/lib/hypervisor/config.go b/lib/hypervisor/config.go
index 9d56ebb..04f8040 100644
--- a/lib/hypervisor/config.go
+++ b/lib/hypervisor/config.go
@@ -55,7 +55,8 @@ type NetworkConfig struct {
 
 // VMInfo contains current VM state information
 type VMInfo struct {
-	State VMState
+	State            VMState
+	MemoryActualSize *int64 // Current actual memory size in bytes (if available)
 }
 
 // VMState represents the VM execution state
diff --git a/lib/hypervisor/hypervisor.go b/lib/hypervisor/hypervisor.go
index 7b1c0fb..93d5cc1 100644
--- a/lib/hypervisor/hypervisor.go
+++ b/lib/hypervisor/hypervisor.go
@@ -5,6 +5,7 @@ package hypervisor
 
 import (
 	"context"
+	"time"
 
 	"github.com/onkernel/hypeman/lib/paths"
 )
@@ -59,6 +60,11 @@ type Hypervisor interface {
 	// Check Capabilities().SupportsHotplugMemory before calling.
 	ResizeMemory(ctx context.Context, bytes int64) error
 
+	// ResizeMemoryAndWait changes the VM's memory allocation and waits for it to stabilize.
+	// This polls until the actual memory size matches the target or stabilizes.
+	// Check Capabilities().SupportsHotplugMemory before calling.
+	ResizeMemoryAndWait(ctx context.Context, bytes int64, timeout time.Duration) error
+
 	// Capabilities returns what features this hypervisor supports.
 	Capabilities() Capabilities
 }
diff --git a/lib/instances/README.md b/lib/instances/README.md
index a5de128..6aa10bb 100644
--- a/lib/instances/README.md
+++ b/lib/instances/README.md
@@ -40,10 +40,11 @@ Manages VM instance lifecycle using Cloud Hypervisor.
       metadata.json             # State, versions, timestamps
       overlay.raw               # 50GB sparse writable overlay
       config.erofs              # Compressed config disk
-      ch.sock                   # Cloud Hypervisor API socket
-      ch-stdout.log             # CH process output
+      cloud-hypervisor.sock     # Hypervisor API socket (named after hypervisor type)
       logs/
-        console.log             # Serial console (VM output)
+        app.log                 # Guest application log (serial console output)
+        vmm.log                 # Hypervisor log (stdout+stderr)
+        hypeman.log             # Hypeman operations log
       snapshots/
         snapshot-latest/        # Snapshot directory
           config.json           # VM configuration
diff --git a/lib/instances/create.go b/lib/instances/create.go
index 71e6ded..7a411d7 100644
--- a/lib/instances/create.go
+++ b/lib/instances/create.go
@@ -273,7 +273,7 @@ func (m *manager) createInstance(
 		KernelVersion:     string(kernelVer),
 		HypervisorType:    hypervisor.TypeCloudHypervisor,
 		HypervisorVersion: string(vmm.V49_0), // Use latest
-		SocketPath:        m.paths.InstanceSocket(id),
+		SocketPath:        m.paths.InstanceSocket(id, string(hypervisor.TypeCloudHypervisor)),
 		DataDir:           m.paths.InstanceDir(id),
 		VsockCID:          vsockCID,
 		VsockSocket:       vsockSocket,
diff --git a/lib/instances/standby.go b/lib/instances/standby.go
index a073066..b4391ff 100644
--- a/lib/instances/standby.go
+++ b/lib/instances/standby.go
@@ -72,9 +72,10 @@ func (m *manager) standbyInstance(
 	}
 
 	// 6. Reduce memory to base size (virtio-mem hotplug) if supported
+	// Wait for memory to stabilize so the snapshot is as small as possible
 	if hv.Capabilities().SupportsHotplugMemory {
 		log.DebugContext(ctx, "reducing VM memory before snapshot", "instance_id", id, "base_size", inst.Size)
-		if err := hv.ResizeMemory(ctx, inst.Size); err != nil {
+		if err := hv.ResizeMemoryAndWait(ctx, inst.Size, 5*time.Second); err != nil {
 			// Log warning but continue - snapshot will just be larger
 			log.WarnContext(ctx, "failed to reduce memory, snapshot will be larger", "instance_id", id, "error", err)
 		}
diff --git a/lib/instances/storage.go b/lib/instances/storage.go
index 1332137..e95691f 100644
--- a/lib/instances/storage.go
+++ b/lib/instances/storage.go
@@ -11,16 +11,16 @@ import (
 
 // Filesystem structure:
 // {dataDir}/guests/{instance-id}/
-//   metadata.json      # Instance metadata
-//   overlay.raw        # Configurable sparse overlay disk (default 10GB)
-//   config.ext4        # Read-only config disk (generated)
-//   ch.sock            # Cloud Hypervisor API socket
+//   metadata.json           # Instance metadata
+//   overlay.raw             # Configurable sparse overlay disk (default 10GB)
+//   config.ext4             # Read-only config disk (generated)
+//   cloud-hypervisor.sock   # Hypervisor API socket (named after hypervisor type)
 //   logs/
-//     app.log          # Guest application log (serial console output)
-//     vmm.log          # Cloud Hypervisor VMM log (stdout+stderr combined)
-//     hypeman.log      # Hypeman operations log (actions taken on this instance)
+//     app.log               # Guest application log (serial console output)
+//     vmm.log               # Hypervisor log (stdout+stderr combined)
+//     hypeman.log           # Hypeman operations log (actions taken on this instance)
 //   snapshots/
-//     snapshot-latest/ # Snapshot directory
+//     snapshot-latest/      # Snapshot directory
 //       config.json
 //       memory-ranges
 
diff --git a/lib/network/derive.go b/lib/network/derive.go
index 86c3bb2..7fb849c 100644
--- a/lib/network/derive.go
+++ b/lib/network/derive.go
@@ -16,6 +16,7 @@ import (
 type instanceMetadata struct {
 	Name           string
 	NetworkEnabled bool
+	HypervisorType string
 }
 
 // deriveAllocation derives network allocation from CH or snapshot
@@ -48,7 +49,7 @@ func (m *manager) deriveAllocation(ctx context.Context, instanceID string) (*All
 	netmask := fmt.Sprintf("%d.%d.%d.%d", ipNet.Mask[0], ipNet.Mask[1], ipNet.Mask[2], ipNet.Mask[3])
 
 	// 4. Try to derive from running VM first
-	socketPath := m.paths.InstanceSocket(instanceID)
+	socketPath := m.paths.InstanceSocket(instanceID, meta.HypervisorType)
 	if fileExists(socketPath) {
 		client, err := vmm.NewVMM(socketPath)
 		if err == nil {
diff --git a/lib/paths/paths.go b/lib/paths/paths.go
index ce06aeb..26ed46b 100644
--- a/lib/paths/paths.go
+++ b/lib/paths/paths.go
@@ -144,9 +144,9 @@ func (p *Paths) InstanceVolumeOverlaysDir(instanceID string) string {
 	return filepath.Join(p.InstanceDir(instanceID), "vol-overlays")
 }
 
-// InstanceSocket returns the path to instance API socket.
-func (p *Paths) InstanceSocket(id string) string {
-	return filepath.Join(p.InstanceDir(id), "ch.sock")
+// InstanceSocket returns the path to instance API socket for the given hypervisor type.
+func (p *Paths) InstanceSocket(id string, hypervisorType string) string {
+	return filepath.Join(p.InstanceDir(id), hypervisorType+".sock")
 }
 
 // InstanceVsockSocket returns the path to instance vsock socket.

From 8358088e5c0c17a90cd39578ee67391b27221d44 Mon Sep 17 00:00:00 2001
From: Steven Miller <sjmiller609@gmail.com>
Date: Sat, 20 Dec 2025 14:19:46 -0500
Subject: [PATCH 4/4] Fix socket name too long

---
 lib/hypervisor/cloudhypervisor/process.go |  9 +++++++
 lib/hypervisor/hypervisor.go              | 23 ++++++++++++++++
 lib/instances/README.md                   |  2 +-
 lib/instances/create.go                   | 32 ++++++++++++++---------
 lib/instances/storage.go                  | 16 ++++++------
 lib/network/derive.go                     |  3 ++-
 lib/paths/paths.go                        |  8 +++---
 lib/vmm/client.go                         |  5 ++++
 8 files changed, 73 insertions(+), 25 deletions(-)

diff --git a/lib/hypervisor/cloudhypervisor/process.go b/lib/hypervisor/cloudhypervisor/process.go
index 69d0744..705c91c 100644
--- a/lib/hypervisor/cloudhypervisor/process.go
+++ b/lib/hypervisor/cloudhypervisor/process.go
@@ -9,6 +9,10 @@ import (
 	"github.com/onkernel/hypeman/lib/vmm"
 )
 
+func init() {
+	hypervisor.RegisterSocketName(hypervisor.TypeCloudHypervisor, "ch.sock")
+}
+
 // ProcessManager implements hypervisor.ProcessManager for Cloud Hypervisor.
 type ProcessManager struct{}
 
@@ -20,6 +24,11 @@ func NewProcessManager() *ProcessManager {
 // Verify ProcessManager implements the interface
 var _ hypervisor.ProcessManager = (*ProcessManager)(nil)
 
+// SocketName returns the socket filename for Cloud Hypervisor.
+func (p *ProcessManager) SocketName() string {
+	return "ch.sock"
+}
+
 // StartProcess launches a Cloud Hypervisor VMM process.
 func (p *ProcessManager) StartProcess(ctx context.Context, paths *paths.Paths, version string, socketPath string) (int, error) {
 	chVersion := vmm.CHVersion(version)
diff --git a/lib/hypervisor/hypervisor.go b/lib/hypervisor/hypervisor.go
index 93d5cc1..2b93b0b 100644
--- a/lib/hypervisor/hypervisor.go
+++ b/lib/hypervisor/hypervisor.go
@@ -19,6 +19,25 @@ const (
 	// Future: TypeQEMU Type = "qemu"
 )
 
+// socketNames maps hypervisor types to their socket filenames.
+// Registered by each hypervisor package's init() function.
+var socketNames = make(map[Type]string)
+
+// RegisterSocketName registers the socket filename for a hypervisor type.
+// Called by each hypervisor implementation's init() function.
+func RegisterSocketName(t Type, name string) {
+	socketNames[t] = name
+}
+
+// SocketNameForType returns the socket filename for a hypervisor type.
+// Falls back to type + ".sock" if not registered.
+func SocketNameForType(t Type) string {
+	if name, ok := socketNames[t]; ok {
+		return name
+	}
+	return string(t) + ".sock"
+}
+
 // Hypervisor defines the interface for VM management operations.
 // All hypervisor implementations must implement this interface.
 type Hypervisor interface {
@@ -92,6 +111,10 @@ type Capabilities struct {
 // This is separate from the Hypervisor interface because process management
 // happens before/after the VMM socket is available.
 type ProcessManager interface {
+	// SocketName returns the socket filename for this hypervisor.
+	// Uses short names to stay within Unix socket path length limits (SUN_LEN ~108 bytes).
+	SocketName() string
+
 	// StartProcess launches the hypervisor process.
 	// Returns the process ID of the started hypervisor.
 	StartProcess(ctx context.Context, p *paths.Paths, version string, socketPath string) (pid int, err error)
diff --git a/lib/instances/README.md b/lib/instances/README.md
index 6aa10bb..a2d4217 100644
--- a/lib/instances/README.md
+++ b/lib/instances/README.md
@@ -40,7 +40,7 @@ Manages VM instance lifecycle using Cloud Hypervisor.
       metadata.json             # State, versions, timestamps
       overlay.raw               # 50GB sparse writable overlay
       config.erofs              # Compressed config disk
-      cloud-hypervisor.sock     # Hypervisor API socket (named after hypervisor type)
+      ch.sock                   # Hypervisor API socket (abbreviated for SUN_LEN limit)
       logs/
         app.log                 # Guest application log (serial console output)
         vmm.log                 # Hypervisor log (stdout+stderr)
diff --git a/lib/instances/create.go b/lib/instances/create.go
index 7a411d7..77b40ed 100644
--- a/lib/instances/create.go
+++ b/lib/instances/create.go
@@ -202,7 +202,15 @@ func (m *manager) createInstance(
 	// 8. Get default kernel version
 	kernelVer := m.systemManager.GetDefaultKernelVersion()
 
-	// 9. Validate, resolve, and auto-bind devices (GPU passthrough)
+	// 9. Get process manager for hypervisor type (needed for socket name)
+	hvType := hypervisor.TypeCloudHypervisor
+	pm, err := m.getProcessManager(hvType)
+	if err != nil {
+		log.ErrorContext(ctx, "failed to get process manager", "error", err)
+		return nil, fmt.Errorf("get process manager: %w", err)
+	}
+
+	// 10. Validate, resolve, and auto-bind devices (GPU passthrough)
 	// Track devices we've marked as attached for cleanup on error.
 	// The cleanup closure captures this slice by reference, so it will see
 	// whatever devices have been attached when cleanup runs.
@@ -256,7 +264,7 @@ func (m *manager) createInstance(
 		log.DebugContext(ctx, "validated devices for passthrough", "id", id, "devices", resolvedDeviceIDs)
 	}
 
-	// 10. Create instance metadata
+	// 11. Create instance metadata
 	stored := &StoredMetadata{
 		Id:                id,
 		Name:              req.Name,
@@ -271,30 +279,30 @@ func (m *manager) createInstance(
 		StartedAt:         nil,
 		StoppedAt:         nil,
 		KernelVersion:     string(kernelVer),
-		HypervisorType:    hypervisor.TypeCloudHypervisor,
+		HypervisorType:    hvType,
 		HypervisorVersion: string(vmm.V49_0), // Use latest
-		SocketPath:        m.paths.InstanceSocket(id, string(hypervisor.TypeCloudHypervisor)),
+		SocketPath:        m.paths.InstanceSocket(id, pm.SocketName()),
 		DataDir:           m.paths.InstanceDir(id),
 		VsockCID:          vsockCID,
 		VsockSocket:       vsockSocket,
 		Devices:           resolvedDeviceIDs,
 	}
 
-	// 11. Ensure directories
+	// 12. Ensure directories
 	log.DebugContext(ctx, "creating instance directories", "instance_id", id)
 	if err := m.ensureDirectories(id); err != nil {
 		log.ErrorContext(ctx, "failed to create directories", "instance_id", id, "error", err)
 		return nil, fmt.Errorf("ensure directories: %w", err)
 	}
 
-	// 12. Create overlay disk with specified size
+	// 13. Create overlay disk with specified size
 	log.DebugContext(ctx, "creating overlay disk", "instance_id", id, "size_bytes", stored.OverlaySize)
 	if err := m.createOverlayDisk(id, stored.OverlaySize); err != nil {
 		log.ErrorContext(ctx, "failed to create overlay disk", "instance_id", id, "error", err)
 		return nil, fmt.Errorf("create overlay disk: %w", err)
 	}
 
-	// 13. Allocate network (if network enabled)
+	// 14. Allocate network (if network enabled)
 	var netConfig *network.NetworkConfig
 	if networkName != "" {
 		log.DebugContext(ctx, "allocating network", "instance_id", id, "network", networkName)
@@ -319,7 +327,7 @@ func (m *manager) createInstance(
 		})
 	}
 
-	// 14. Validate and attach volumes
+	// 15. Validate and attach volumes
 	if len(req.Volumes) > 0 {
 		log.DebugContext(ctx, "validating volumes", "instance_id", id, "count", len(req.Volumes))
 		for _, volAttach := range req.Volumes {
@@ -359,7 +367,7 @@ func (m *manager) createInstance(
 		stored.Volumes = req.Volumes
 	}
 
-	// 15. Create config disk (needs Instance for buildVMConfig)
+	// 16. Create config disk (needs Instance for buildVMConfig)
 	inst := &Instance{StoredMetadata: *stored}
 	log.DebugContext(ctx, "creating config disk", "instance_id", id)
 	if err := m.createConfigDisk(ctx, inst, imageInfo, netConfig); err != nil {
@@ -367,7 +375,7 @@ func (m *manager) createInstance(
 		return nil, fmt.Errorf("create config disk: %w", err)
 	}
 
-	// 16. Save metadata
+	// 17. Save metadata
 	log.DebugContext(ctx, "saving instance metadata", "instance_id", id)
 	meta := &metadata{StoredMetadata: *stored}
 	if err := m.saveMetadata(meta); err != nil {
@@ -375,14 +383,14 @@ func (m *manager) createInstance(
 		return nil, fmt.Errorf("save metadata: %w", err)
 	}
 
-	// 17. Start VMM and boot VM
+	// 18. Start VMM and boot VM
 	log.InfoContext(ctx, "starting VMM and booting VM", "instance_id", id)
 	if err := m.startAndBootVM(ctx, stored, imageInfo, netConfig); err != nil {
 		log.ErrorContext(ctx, "failed to start and boot VM", "instance_id", id, "error", err)
 		return nil, err
 	}
 
-	// 18. Update timestamp after VM is running
+	// 19. Update timestamp after VM is running
 	now := time.Now()
 	stored.StartedAt = &now
 
diff --git a/lib/instances/storage.go b/lib/instances/storage.go
index e95691f..95c77da 100644
--- a/lib/instances/storage.go
+++ b/lib/instances/storage.go
@@ -11,16 +11,16 @@ import (
 
 // Filesystem structure:
 // {dataDir}/guests/{instance-id}/
-//   metadata.json           # Instance metadata
-//   overlay.raw             # Configurable sparse overlay disk (default 10GB)
-//   config.ext4             # Read-only config disk (generated)
-//   cloud-hypervisor.sock   # Hypervisor API socket (named after hypervisor type)
+//   metadata.json      # Instance metadata
+//   overlay.raw        # Configurable sparse overlay disk (default 10GB)
+//   config.ext4        # Read-only config disk (generated)
+//   ch.sock            # Hypervisor API socket (abbreviated name for SUN_LEN limit)
 //   logs/
-//     app.log               # Guest application log (serial console output)
-//     vmm.log               # Hypervisor log (stdout+stderr combined)
-//     hypeman.log           # Hypeman operations log (actions taken on this instance)
+//     app.log          # Guest application log (serial console output)
+//     vmm.log          # Hypervisor log (stdout+stderr combined)
+//     hypeman.log      # Hypeman operations log (actions taken on this instance)
 //   snapshots/
-//     snapshot-latest/      # Snapshot directory
+//     snapshot-latest/ # Snapshot directory
 //       config.json
 //       memory-ranges
 
diff --git a/lib/network/derive.go b/lib/network/derive.go
index 7fb849c..f568c03 100644
--- a/lib/network/derive.go
+++ b/lib/network/derive.go
@@ -7,6 +7,7 @@ import (
 	"net"
 	"os"
 
+	"github.com/onkernel/hypeman/lib/hypervisor"
 	"github.com/onkernel/hypeman/lib/logger"
 	"github.com/onkernel/hypeman/lib/vmm"
 )
@@ -49,7 +50,7 @@ func (m *manager) deriveAllocation(ctx context.Context, instanceID string) (*All
 	netmask := fmt.Sprintf("%d.%d.%d.%d", ipNet.Mask[0], ipNet.Mask[1], ipNet.Mask[2], ipNet.Mask[3])
 
 	// 4. Try to derive from running VM first
-	socketPath := m.paths.InstanceSocket(instanceID, meta.HypervisorType)
+	socketPath := m.paths.InstanceSocket(instanceID, hypervisor.SocketNameForType(hypervisor.Type(meta.HypervisorType)))
 	if fileExists(socketPath) {
 		client, err := vmm.NewVMM(socketPath)
 		if err == nil {
diff --git a/lib/paths/paths.go b/lib/paths/paths.go
index 26ed46b..3009773 100644
--- a/lib/paths/paths.go
+++ b/lib/paths/paths.go
@@ -144,9 +144,11 @@ func (p *Paths) InstanceVolumeOverlaysDir(instanceID string) string {
 	return filepath.Join(p.InstanceDir(instanceID), "vol-overlays")
 }
 
-// InstanceSocket returns the path to instance API socket for the given hypervisor type.
-func (p *Paths) InstanceSocket(id string, hypervisorType string) string {
-	return filepath.Join(p.InstanceDir(id), hypervisorType+".sock")
+// InstanceSocket returns the path to instance API socket.
+// The socketName should be obtained from hypervisor.Type.SocketName() to ensure
+// it stays within Unix socket path length limits (SUN_LEN ~108 bytes).
+func (p *Paths) InstanceSocket(id string, socketName string) string {
+	return filepath.Join(p.InstanceDir(id), socketName)
 }
 
 // InstanceVsockSocket returns the path to instance vsock socket.
diff --git a/lib/vmm/client.go b/lib/vmm/client.go
index 322dac8..9424188 100644
--- a/lib/vmm/client.go
+++ b/lib/vmm/client.go
@@ -152,6 +152,11 @@ func StartProcessWithArgs(ctx context.Context, p *paths.Paths, version CHVersion
 	defer cancel()
 
 	if err := waitForSocket(waitCtx, socketPath, 5*time.Second); err != nil {
+		// Read vmm.log to understand why socket wasn't created
+		vmmLogPath := filepath.Join(logsDir, "vmm.log")
+		if logData, readErr := os.ReadFile(vmmLogPath); readErr == nil && len(logData) > 0 {
+			return 0, fmt.Errorf("%w; vmm.log: %s", err, string(logData))
+		}
 		return 0, err
 	}