Skip to content

Commit 1230c4e

Browse files
committed
address code reviews
1 parent 8ec1251 commit 1230c4e

File tree

10 files changed

+89
-24
lines changed

10 files changed

+89
-24
lines changed

lib/hypervisor/cloudhypervisor/cloudhypervisor.go

Lines changed: 60 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,15 @@ package cloudhypervisor
55
import (
66
"context"
77
"fmt"
8+
"time"
89

910
"github.com/onkernel/hypeman/lib/hypervisor"
1011
"github.com/onkernel/hypeman/lib/vmm"
1112
)
1213

1314
// CloudHypervisor implements hypervisor.Hypervisor for Cloud Hypervisor VMM.
1415
type CloudHypervisor struct {
15-
client *vmm.VMM
16-
socketPath string
16+
client *vmm.VMM
1717
}
1818

1919
// New creates a new Cloud Hypervisor client for an existing VMM socket.
@@ -23,8 +23,7 @@ func New(socketPath string) (*CloudHypervisor, error) {
2323
return nil, fmt.Errorf("create vmm client: %w", err)
2424
}
2525
return &CloudHypervisor{
26-
client: client,
27-
socketPath: socketPath,
26+
client: client,
2827
}, nil
2928
}
3029

@@ -114,7 +113,10 @@ func (c *CloudHypervisor) GetVMInfo(ctx context.Context) (*hypervisor.VMInfo, er
114113
return nil, fmt.Errorf("unknown vm state: %s", resp.JSON200.State)
115114
}
116115

117-
return &hypervisor.VMInfo{State: state}, nil
116+
return &hypervisor.VMInfo{
117+
State: state,
118+
MemoryActualSize: resp.JSON200.MemoryActualSize,
119+
}, nil
118120
}
119121

120122
// Pause suspends VM execution.
@@ -185,6 +187,59 @@ func (c *CloudHypervisor) ResizeMemory(ctx context.Context, bytes int64) error {
185187
return nil
186188
}
187189

190+
// ResizeMemoryAndWait changes the VM's memory allocation and waits for it to stabilize.
191+
// It polls until the actual memory size stabilizes (stops changing) or timeout is reached.
192+
func (c *CloudHypervisor) ResizeMemoryAndWait(ctx context.Context, bytes int64, timeout time.Duration) error {
193+
// First, request the resize
194+
if err := c.ResizeMemory(ctx, bytes); err != nil {
195+
return err
196+
}
197+
198+
// Poll until memory stabilizes
199+
const pollInterval = 20 * time.Millisecond
200+
deadline := time.Now().Add(timeout)
201+
202+
var lastSize int64 = -1
203+
stableCount := 0
204+
const requiredStableChecks = 3 // Require 3 consecutive stable readings
205+
206+
for time.Now().Before(deadline) {
207+
select {
208+
case <-ctx.Done():
209+
return ctx.Err()
210+
default:
211+
}
212+
213+
info, err := c.GetVMInfo(ctx)
214+
if err != nil {
215+
return fmt.Errorf("poll memory size: %w", err)
216+
}
217+
218+
if info.MemoryActualSize == nil {
219+
// No memory info available, just return after resize
220+
return nil
221+
}
222+
223+
currentSize := *info.MemoryActualSize
224+
225+
if currentSize == lastSize {
226+
stableCount++
227+
if stableCount >= requiredStableChecks {
228+
// Memory has stabilized
229+
return nil
230+
}
231+
} else {
232+
stableCount = 0
233+
lastSize = currentSize
234+
}
235+
236+
time.Sleep(pollInterval)
237+
}
238+
239+
// Timeout reached, but resize was requested successfully
240+
return nil
241+
}
242+
188243
func ptr[T any](v T) *T {
189244
return &v
190245
}

lib/hypervisor/cloudhypervisor/config.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ func ToVMConfig(cfg hypervisor.VMConfig) vmm.VmConfig {
4040
}
4141

4242
// Disk configuration
43-
var disks []vmm.DiskConfig
43+
disks := make([]vmm.DiskConfig, 0, len(cfg.Disks))
4444
for _, d := range cfg.Disks {
4545
disk := vmm.DiskConfig{
4646
Path: ptr(d.Path),

lib/hypervisor/config.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ type NetworkConfig struct {
5555

5656
// VMInfo contains current VM state information
5757
type VMInfo struct {
58-
State VMState
58+
State VMState
59+
MemoryActualSize *int64 // Current actual memory size in bytes (if available)
5960
}
6061

6162
// VMState represents the VM execution state

lib/hypervisor/hypervisor.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package hypervisor
55

66
import (
77
"context"
8+
"time"
89

910
"github.com/onkernel/hypeman/lib/paths"
1011
)
@@ -59,6 +60,11 @@ type Hypervisor interface {
5960
// Check Capabilities().SupportsHotplugMemory before calling.
6061
ResizeMemory(ctx context.Context, bytes int64) error
6162

63+
// ResizeMemoryAndWait changes the VM's memory allocation and waits for it to stabilize.
64+
// This polls until the actual memory size matches the target or stabilizes.
65+
// Check Capabilities().SupportsHotplugMemory before calling.
66+
ResizeMemoryAndWait(ctx context.Context, bytes int64, timeout time.Duration) error
67+
6268
// Capabilities returns what features this hypervisor supports.
6369
Capabilities() Capabilities
6470
}

lib/instances/README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,11 @@ Manages VM instance lifecycle using Cloud Hypervisor.
4040
metadata.json # State, versions, timestamps
4141
overlay.raw # 50GB sparse writable overlay
4242
config.erofs # Compressed config disk
43-
ch.sock # Cloud Hypervisor API socket
44-
ch-stdout.log # CH process output
43+
cloud-hypervisor.sock # Hypervisor API socket (named after hypervisor type)
4544
logs/
46-
console.log # Serial console (VM output)
45+
app.log # Guest application log (serial console output)
46+
vmm.log # Hypervisor log (stdout+stderr)
47+
hypeman.log # Hypeman operations log
4748
snapshots/
4849
snapshot-latest/ # Snapshot directory
4950
config.json # VM configuration

lib/instances/create.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ func (m *manager) createInstance(
273273
KernelVersion: string(kernelVer),
274274
HypervisorType: hypervisor.TypeCloudHypervisor,
275275
HypervisorVersion: string(vmm.V49_0), // Use latest
276-
SocketPath: m.paths.InstanceSocket(id),
276+
SocketPath: m.paths.InstanceSocket(id, string(hypervisor.TypeCloudHypervisor)),
277277
DataDir: m.paths.InstanceDir(id),
278278
VsockCID: vsockCID,
279279
VsockSocket: vsockSocket,

lib/instances/standby.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,10 @@ func (m *manager) standbyInstance(
7272
}
7373

7474
// 6. Reduce memory to base size (virtio-mem hotplug) if supported
75+
// Wait for memory to stabilize so the snapshot is as small as possible
7576
if hv.Capabilities().SupportsHotplugMemory {
7677
log.DebugContext(ctx, "reducing VM memory before snapshot", "instance_id", id, "base_size", inst.Size)
77-
if err := hv.ResizeMemory(ctx, inst.Size); err != nil {
78+
if err := hv.ResizeMemoryAndWait(ctx, inst.Size, 5*time.Second); err != nil {
7879
// Log warning but continue - snapshot will just be larger
7980
log.WarnContext(ctx, "failed to reduce memory, snapshot will be larger", "instance_id", id, "error", err)
8081
}

lib/instances/storage.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,16 @@ import (
1111

1212
// Filesystem structure:
1313
// {dataDir}/guests/{instance-id}/
14-
// metadata.json # Instance metadata
15-
// overlay.raw # Configurable sparse overlay disk (default 10GB)
16-
// config.ext4 # Read-only config disk (generated)
17-
// ch.sock # Cloud Hypervisor API socket
14+
// metadata.json # Instance metadata
15+
// overlay.raw # Configurable sparse overlay disk (default 10GB)
16+
// config.ext4 # Read-only config disk (generated)
17+
// cloud-hypervisor.sock # Hypervisor API socket (named after hypervisor type)
1818
// logs/
19-
// app.log # Guest application log (serial console output)
20-
// vmm.log # Cloud Hypervisor VMM log (stdout+stderr combined)
21-
// hypeman.log # Hypeman operations log (actions taken on this instance)
19+
// app.log # Guest application log (serial console output)
20+
// vmm.log # Hypervisor log (stdout+stderr combined)
21+
// hypeman.log # Hypeman operations log (actions taken on this instance)
2222
// snapshots/
23-
// snapshot-latest/ # Snapshot directory
23+
// snapshot-latest/ # Snapshot directory
2424
// config.json
2525
// memory-ranges
2626

lib/network/derive.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
type instanceMetadata struct {
1717
Name string
1818
NetworkEnabled bool
19+
HypervisorType string
1920
}
2021

2122
// deriveAllocation derives network allocation from CH or snapshot
@@ -48,7 +49,7 @@ func (m *manager) deriveAllocation(ctx context.Context, instanceID string) (*All
4849
netmask := fmt.Sprintf("%d.%d.%d.%d", ipNet.Mask[0], ipNet.Mask[1], ipNet.Mask[2], ipNet.Mask[3])
4950

5051
// 4. Try to derive from running VM first
51-
socketPath := m.paths.InstanceSocket(instanceID)
52+
socketPath := m.paths.InstanceSocket(instanceID, meta.HypervisorType)
5253
if fileExists(socketPath) {
5354
client, err := vmm.NewVMM(socketPath)
5455
if err == nil {

lib/paths/paths.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -144,9 +144,9 @@ func (p *Paths) InstanceVolumeOverlaysDir(instanceID string) string {
144144
return filepath.Join(p.InstanceDir(instanceID), "vol-overlays")
145145
}
146146

147-
// InstanceSocket returns the path to instance API socket.
148-
func (p *Paths) InstanceSocket(id string) string {
149-
return filepath.Join(p.InstanceDir(id), "ch.sock")
147+
// InstanceSocket returns the path to instance API socket for the given hypervisor type.
148+
func (p *Paths) InstanceSocket(id string, hypervisorType string) string {
149+
return filepath.Join(p.InstanceDir(id), hypervisorType+".sock")
150150
}
151151

152152
// InstanceVsockSocket returns the path to instance vsock socket.

0 commit comments

Comments
 (0)