Skip to content

Commit 3cfa5fd

Browse files
authored
Instance manager (#6)
* Instance manager * Kernel and initrd setup * Add instances manager README * Hotplug memory management only * Working on tests * Switch back to ext4 for rootfs, and instance booting working * Create cpio initrd with Golang * Safer versioning initrd * More readable config script formatting * Make overlay size configurable * Derive state instead of storing it * Per instance locks * Track process IDs * Clean up test vmms * Add logs * Testing for memory unplug * Improve test * Use paths module * Fix tests * Cuid2 * Handle CPU topology * Add test to confirm files not closed for CH * Without cancel * allow consecutive dashes
1 parent 4990e47 commit 3cfa5fd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+4757
-381
lines changed

cmd/api/api/api_test.go

Lines changed: 62 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,30 +2,88 @@ package api
22

33
import (
44
"context"
5+
"encoding/json"
6+
"os"
7+
"syscall"
58
"testing"
69

710
"github.com/onkernel/hypeman/cmd/api/config"
811
"github.com/onkernel/hypeman/lib/images"
912
"github.com/onkernel/hypeman/lib/instances"
13+
"github.com/onkernel/hypeman/lib/paths"
14+
"github.com/onkernel/hypeman/lib/system"
1015
"github.com/onkernel/hypeman/lib/volumes"
1116
)
1217

13-
// newTestService creates an ApiService for testing with temporary data directory
18+
// newTestService creates an ApiService for testing with automatic cleanup
1419
func newTestService(t *testing.T) *ApiService {
1520
cfg := &config.Config{
1621
DataDir: t.TempDir(),
1722
}
1823

19-
imageMgr, err := images.NewManager(cfg.DataDir, 1)
24+
p := paths.New(cfg.DataDir)
25+
imageMgr, err := images.NewManager(p, 1)
2026
if err != nil {
2127
t.Fatalf("failed to create image manager: %v", err)
2228
}
2329

30+
systemMgr := system.NewManager(p)
31+
maxOverlaySize := int64(100 * 1024 * 1024 * 1024) // 100GB for tests
32+
instanceMgr := instances.NewManager(p, imageMgr, systemMgr, maxOverlaySize)
33+
volumeMgr := volumes.NewManager(p)
34+
35+
// Register cleanup for orphaned Cloud Hypervisor processes
36+
t.Cleanup(func() {
37+
cleanupOrphanedProcesses(t, cfg.DataDir)
38+
})
39+
2440
return &ApiService{
2541
Config: cfg,
2642
ImageManager: imageMgr,
27-
InstanceManager: instances.NewManager(cfg.DataDir),
28-
VolumeManager: volumes.NewManager(cfg.DataDir),
43+
InstanceManager: instanceMgr,
44+
VolumeManager: volumeMgr,
45+
}
46+
}
47+
48+
// cleanupOrphanedProcesses kills Cloud Hypervisor processes from metadata files
49+
func cleanupOrphanedProcesses(t *testing.T, dataDir string) {
50+
p := paths.New(dataDir)
51+
guestsDir := p.GuestsDir()
52+
53+
entries, err := os.ReadDir(guestsDir)
54+
if err != nil {
55+
return // No guests directory
56+
}
57+
58+
for _, entry := range entries {
59+
if !entry.IsDir() {
60+
continue
61+
}
62+
63+
metaPath := p.InstanceMetadata(entry.Name())
64+
data, err := os.ReadFile(metaPath)
65+
if err != nil {
66+
continue
67+
}
68+
69+
// Parse just the CHPID field
70+
var meta struct {
71+
CHPID *int `json:"CHPID"`
72+
}
73+
if err := json.Unmarshal(data, &meta); err != nil {
74+
continue
75+
}
76+
77+
// If metadata has a PID, try to kill it
78+
if meta.CHPID != nil {
79+
pid := *meta.CHPID
80+
81+
// Check if process exists
82+
if err := syscall.Kill(pid, 0); err == nil {
83+
t.Logf("Cleaning up orphaned Cloud Hypervisor process: PID %d", pid)
84+
syscall.Kill(pid, syscall.SIGKILL)
85+
}
86+
}
2987
}
3088
}
3189

cmd/api/api/instances.go

Lines changed: 106 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@ package api
33
import (
44
"context"
55
"errors"
6+
"fmt"
67
"strings"
78

9+
"github.com/c2h5oh/datasize"
810
"github.com/onkernel/hypeman/lib/instances"
911
"github.com/onkernel/hypeman/lib/logger"
1012
"github.com/onkernel/hypeman/lib/oapi"
@@ -35,19 +37,85 @@ func (s *ApiService) ListInstances(ctx context.Context, request oapi.ListInstanc
3537
func (s *ApiService) CreateInstance(ctx context.Context, request oapi.CreateInstanceRequestObject) (oapi.CreateInstanceResponseObject, error) {
3638
log := logger.FromContext(ctx)
3739

40+
// Parse size (default: 1GB)
41+
size := int64(0)
42+
if request.Body.Size != nil && *request.Body.Size != "" {
43+
var sizeBytes datasize.ByteSize
44+
if err := sizeBytes.UnmarshalText([]byte(*request.Body.Size)); err != nil {
45+
return oapi.CreateInstance400JSONResponse{
46+
Code: "invalid_size",
47+
Message: fmt.Sprintf("invalid size format: %v", err),
48+
}, nil
49+
}
50+
size = int64(sizeBytes)
51+
}
52+
53+
// Parse hotplug_size (default: 3GB)
54+
hotplugSize := int64(0)
55+
if request.Body.HotplugSize != nil && *request.Body.HotplugSize != "" {
56+
var hotplugBytes datasize.ByteSize
57+
if err := hotplugBytes.UnmarshalText([]byte(*request.Body.HotplugSize)); err != nil {
58+
return oapi.CreateInstance400JSONResponse{
59+
Code: "invalid_hotplug_size",
60+
Message: fmt.Sprintf("invalid hotplug_size format: %v", err),
61+
}, nil
62+
}
63+
hotplugSize = int64(hotplugBytes)
64+
}
65+
66+
// Parse overlay_size (default: 10GB)
67+
overlaySize := int64(0)
68+
if request.Body.OverlaySize != nil && *request.Body.OverlaySize != "" {
69+
var overlayBytes datasize.ByteSize
70+
if err := overlayBytes.UnmarshalText([]byte(*request.Body.OverlaySize)); err != nil {
71+
return oapi.CreateInstance400JSONResponse{
72+
Code: "invalid_overlay_size",
73+
Message: fmt.Sprintf("invalid overlay_size format: %v", err),
74+
}, nil
75+
}
76+
overlaySize = int64(overlayBytes)
77+
}
78+
79+
vcpus := 2
80+
if request.Body.Vcpus != nil {
81+
vcpus = *request.Body.Vcpus
82+
}
83+
84+
env := make(map[string]string)
85+
if request.Body.Env != nil {
86+
env = *request.Body.Env
87+
}
88+
3889
domainReq := instances.CreateInstanceRequest{
39-
Id: request.Body.Id,
40-
Name: request.Body.Name,
41-
Image: request.Body.Image,
90+
Name: request.Body.Name,
91+
Image: request.Body.Image,
92+
Size: size,
93+
HotplugSize: hotplugSize,
94+
OverlaySize: overlaySize,
95+
Vcpus: vcpus,
96+
Env: env,
4297
}
4398

4499
inst, err := s.InstanceManager.CreateInstance(ctx, domainReq)
45100
if err != nil {
101+
switch {
102+
case errors.Is(err, instances.ErrImageNotReady):
103+
return oapi.CreateInstance400JSONResponse{
104+
Code: "image_not_ready",
105+
Message: err.Error(),
106+
}, nil
107+
case errors.Is(err, instances.ErrAlreadyExists):
108+
return oapi.CreateInstance400JSONResponse{
109+
Code: "already_exists",
110+
Message: "instance already exists",
111+
}, nil
112+
default:
46113
log.Error("failed to create instance", "error", err, "image", request.Body.Image)
47114
return oapi.CreateInstance500JSONResponse{
48115
Code: "internal_error",
49116
Message: "failed to create instance",
50117
}, nil
118+
}
51119
}
52120
return oapi.CreateInstance201JSONResponse(instanceToOAPI(*inst)), nil
53121
}
@@ -75,8 +143,6 @@ func (s *ApiService) GetInstance(ctx context.Context, request oapi.GetInstanceRe
75143
return oapi.GetInstance200JSONResponse(instanceToOAPI(*inst)), nil
76144
}
77145

78-
79-
80146
// DeleteInstance stops and deletes an instance
81147
func (s *ApiService) DeleteInstance(ctx context.Context, request oapi.DeleteInstanceRequestObject) (oapi.DeleteInstanceResponseObject, error) {
82148
log := logger.FromContext(ctx)
@@ -115,7 +181,7 @@ func (s *ApiService) StandbyInstance(ctx context.Context, request oapi.StandbyIn
115181
case errors.Is(err, instances.ErrInvalidState):
116182
return oapi.StandbyInstance409JSONResponse{
117183
Code: "invalid_state",
118-
Message: "instance is not in a valid state for standby",
184+
Message: err.Error(),
119185
}, nil
120186
default:
121187
log.Error("failed to standby instance", "error", err, "id", request.Id)
@@ -143,7 +209,7 @@ func (s *ApiService) RestoreInstance(ctx context.Context, request oapi.RestoreIn
143209
case errors.Is(err, instances.ErrInvalidState):
144210
return oapi.RestoreInstance409JSONResponse{
145211
Code: "invalid_state",
146-
Message: "instance is not in standby state",
212+
Message: err.Error(),
147213
}, nil
148214
default:
149215
log.Error("failed to restore instance", "error", err, "id", request.Id)
@@ -192,61 +258,48 @@ func (s *ApiService) GetInstanceLogs(ctx context.Context, request oapi.GetInstan
192258
}, nil
193259
}
194260

195-
// AttachVolume attaches a volume to an instance
261+
// AttachVolume attaches a volume to an instance (not yet implemented)
196262
func (s *ApiService) AttachVolume(ctx context.Context, request oapi.AttachVolumeRequestObject) (oapi.AttachVolumeResponseObject, error) {
197-
log := logger.FromContext(ctx)
198-
199-
domainReq := instances.AttachVolumeRequest{
200-
MountPath: request.Body.MountPath,
201-
}
202-
203-
inst, err := s.InstanceManager.AttachVolume(ctx, request.Id, request.VolumeId, domainReq)
204-
if err != nil {
205-
switch {
206-
case errors.Is(err, instances.ErrNotFound):
207-
return oapi.AttachVolume404JSONResponse{
208-
Code: "not_found",
209-
Message: "instance or volume not found",
210-
}, nil
211-
default:
212-
log.Error("failed to attach volume", "error", err, "instance_id", request.Id, "volume_id", request.VolumeId)
213263
return oapi.AttachVolume500JSONResponse{
214-
Code: "internal_error",
215-
Message: "failed to attach volume",
264+
Code: "not_implemented",
265+
Message: "volume attachment not yet implemented",
216266
}, nil
217-
}
218-
}
219-
return oapi.AttachVolume200JSONResponse(instanceToOAPI(*inst)), nil
220267
}
221268

222-
// DetachVolume detaches a volume from an instance
269+
// DetachVolume detaches a volume from an instance (not yet implemented)
223270
func (s *ApiService) DetachVolume(ctx context.Context, request oapi.DetachVolumeRequestObject) (oapi.DetachVolumeResponseObject, error) {
224-
log := logger.FromContext(ctx)
225-
226-
inst, err := s.InstanceManager.DetachVolume(ctx, request.Id, request.VolumeId)
227-
if err != nil {
228-
switch {
229-
case errors.Is(err, instances.ErrNotFound):
230-
return oapi.DetachVolume404JSONResponse{
231-
Code: "not_found",
232-
Message: "instance or volume not found",
233-
}, nil
234-
default:
235-
log.Error("failed to detach volume", "error", err, "instance_id", request.Id, "volume_id", request.VolumeId)
236271
return oapi.DetachVolume500JSONResponse{
237-
Code: "internal_error",
238-
Message: "failed to detach volume",
272+
Code: "not_implemented",
273+
Message: "volume detachment not yet implemented",
239274
}, nil
240-
}
241-
}
242-
return oapi.DetachVolume200JSONResponse(instanceToOAPI(*inst)), nil
243275
}
244276

277+
// instanceToOAPI converts domain Instance to OAPI Instance
245278
func instanceToOAPI(inst instances.Instance) oapi.Instance {
246-
return oapi.Instance{
247-
Id: inst.Id,
248-
Name: inst.Name,
249-
Image: inst.Image,
250-
CreatedAt: inst.CreatedAt,
279+
// Format sizes as human-readable strings with best precision
280+
// HR() returns format like "1.5 GB" with 1 decimal place
281+
sizeStr := datasize.ByteSize(inst.Size).HR()
282+
hotplugSizeStr := datasize.ByteSize(inst.HotplugSize).HR()
283+
overlaySizeStr := datasize.ByteSize(inst.OverlaySize).HR()
284+
285+
oapiInst := oapi.Instance{
286+
Id: inst.Id,
287+
Name: inst.Name,
288+
Image: inst.Image,
289+
State: oapi.InstanceState(inst.State),
290+
Size: &sizeStr,
291+
HotplugSize: &hotplugSizeStr,
292+
OverlaySize: &overlaySizeStr,
293+
Vcpus: &inst.Vcpus,
294+
CreatedAt: inst.CreatedAt,
295+
StartedAt: inst.StartedAt,
296+
StoppedAt: inst.StoppedAt,
297+
HasSnapshot: &inst.HasSnapshot,
298+
}
299+
300+
if len(inst.Env) > 0 {
301+
oapiInst.Env = &inst.Env
251302
}
252-
}
303+
304+
return oapiInst
305+
}

0 commit comments

Comments
 (0)