Skip to content

Commit 58df3eb

Browse files
authored
Resource accounting (#51)
* Account for resource limits * Clean up providers * Apply network rate limit on resume * Bidirectional network * Better storage accounting * Fix tests * Improved disk io accounting * Appropriate defaults * review fixes * Include resources in instance response * Download burst * Review fixes * Don't truncate bandwidth * Fix test * Fix tests * address review comments
1 parent a08c2c8 commit 58df3eb

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+3401
-220
lines changed

Makefile

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -192,11 +192,13 @@ dev: ensure-ch-binaries ensure-caddy-binaries build-embedded $(AIR)
192192
# Usage: make test - runs all tests
193193
# make test TEST=TestCreateInstanceWithNetwork - runs specific test
194194
test: ensure-ch-binaries ensure-caddy-binaries build-embedded
195-
@if [ -n "$(TEST)" ]; then \
195+
@VERBOSE_FLAG=""; \
196+
if [ -n "$(VERBOSE)" ]; then VERBOSE_FLAG="-v"; fi; \
197+
if [ -n "$(TEST)" ]; then \
196198
echo "Running specific test: $(TEST)"; \
197-
sudo env "PATH=$$PATH" "DOCKER_CONFIG=$${DOCKER_CONFIG:-$$HOME/.docker}" go test -tags containers_image_openpgp -run=$(TEST) -v -timeout=180s ./...; \
199+
sudo env "PATH=$$PATH" "DOCKER_CONFIG=$${DOCKER_CONFIG:-$$HOME/.docker}" go test -tags containers_image_openpgp -run=$(TEST) $$VERBOSE_FLAG -timeout=180s ./...; \
198200
else \
199-
sudo env "PATH=$$PATH" "DOCKER_CONFIG=$${DOCKER_CONFIG:-$$HOME/.docker}" go test -tags containers_image_openpgp -v -timeout=180s ./...; \
201+
sudo env "PATH=$$PATH" "DOCKER_CONFIG=$${DOCKER_CONFIG:-$$HOME/.docker}" go test -tags containers_image_openpgp $$VERBOSE_FLAG -timeout=180s ./...; \
200202
fi
201203

202204
# Generate JWT token for testing

cmd/api/api/api.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"github.com/onkernel/hypeman/lib/instances"
99
"github.com/onkernel/hypeman/lib/network"
1010
"github.com/onkernel/hypeman/lib/oapi"
11+
"github.com/onkernel/hypeman/lib/resources"
1112
"github.com/onkernel/hypeman/lib/volumes"
1213
)
1314

@@ -20,6 +21,7 @@ type ApiService struct {
2021
NetworkManager network.Manager
2122
DeviceManager devices.Manager
2223
IngressManager ingress.Manager
24+
ResourceManager *resources.Manager
2325
}
2426

2527
var _ oapi.StrictServerInterface = (*ApiService)(nil)
@@ -33,6 +35,7 @@ func New(
3335
networkManager network.Manager,
3436
deviceManager devices.Manager,
3537
ingressManager ingress.Manager,
38+
resourceManager *resources.Manager,
3639
) *ApiService {
3740
return &ApiService{
3841
Config: config,
@@ -42,5 +45,6 @@ func New(
4245
NetworkManager: networkManager,
4346
DeviceManager: deviceManager,
4447
IngressManager: ingressManager,
48+
ResourceManager: resourceManager,
4549
}
4650
}

cmd/api/api/api_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"github.com/onkernel/hypeman/lib/network"
1717
"github.com/onkernel/hypeman/lib/oapi"
1818
"github.com/onkernel/hypeman/lib/paths"
19+
"github.com/onkernel/hypeman/lib/resources"
1920
"github.com/onkernel/hypeman/lib/system"
2021
"github.com/onkernel/hypeman/lib/volumes"
2122
"github.com/stretchr/testify/require"
@@ -37,6 +38,7 @@ func newTestService(t *testing.T) *ApiService {
3738
networkMgr := network.NewManager(p, cfg, nil)
3839
deviceMgr := devices.NewManager(p)
3940
volumeMgr := volumes.NewManager(p, 0, nil) // 0 = unlimited storage
41+
resourceMgr := resources.NewManager(cfg, p)
4042
limits := instances.ResourceLimits{
4143
MaxOverlaySize: 100 * 1024 * 1024 * 1024, // 100GB
4244
}
@@ -53,6 +55,7 @@ func newTestService(t *testing.T) *ApiService {
5355
InstanceManager: instanceMgr,
5456
VolumeManager: volumeMgr,
5557
DeviceManager: deviceMgr,
58+
ResourceManager: resourceMgr,
5659
}
5760
}
5861

cmd/api/api/cp_test.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,9 @@ func TestCpToAndFromInstance(t *testing.T) {
4646
Name: "cp-test",
4747
Image: "docker.io/library/nginx:alpine",
4848
Network: &struct {
49-
Enabled *bool `json:"enabled,omitempty"`
49+
BandwidthDownload *string `json:"bandwidth_download,omitempty"`
50+
BandwidthUpload *string `json:"bandwidth_upload,omitempty"`
51+
Enabled *bool `json:"enabled,omitempty"`
5052
}{
5153
Enabled: &networkEnabled,
5254
},
@@ -182,7 +184,9 @@ func TestCpDirectoryToInstance(t *testing.T) {
182184
Name: "cp-dir-test",
183185
Image: "docker.io/library/nginx:alpine",
184186
Network: &struct {
185-
Enabled *bool `json:"enabled,omitempty"`
187+
BandwidthDownload *string `json:"bandwidth_download,omitempty"`
188+
BandwidthUpload *string `json:"bandwidth_upload,omitempty"`
189+
Enabled *bool `json:"enabled,omitempty"`
186190
}{
187191
Enabled: &networkEnabled,
188192
},

cmd/api/api/exec_test.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,9 @@ func TestExecInstanceNonTTY(t *testing.T) {
4747
Name: "exec-test",
4848
Image: "docker.io/library/nginx:alpine",
4949
Network: &struct {
50-
Enabled *bool `json:"enabled,omitempty"`
50+
BandwidthDownload *string `json:"bandwidth_download,omitempty"`
51+
BandwidthUpload *string `json:"bandwidth_upload,omitempty"`
52+
Enabled *bool `json:"enabled,omitempty"`
5153
}{
5254
Enabled: &networkEnabled,
5355
},
@@ -185,7 +187,9 @@ func TestExecWithDebianMinimal(t *testing.T) {
185187
Name: "debian-exec-test",
186188
Image: "docker.io/library/debian:12-slim",
187189
Network: &struct {
188-
Enabled *bool `json:"enabled,omitempty"`
190+
BandwidthDownload *string `json:"bandwidth_download,omitempty"`
191+
BandwidthUpload *string `json:"bandwidth_upload,omitempty"`
192+
Enabled *bool `json:"enabled,omitempty"`
189193
}{
190194
Enabled: &networkEnabled,
191195
},

cmd/api/api/instances.go

Lines changed: 104 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"errors"
77
"fmt"
88
"net/http"
9+
"strings"
910

1011
"github.com/c2h5oh/datasize"
1112
"github.com/onkernel/hypeman/lib/guest"
@@ -15,6 +16,7 @@ import (
1516
mw "github.com/onkernel/hypeman/lib/middleware"
1617
"github.com/onkernel/hypeman/lib/network"
1718
"github.com/onkernel/hypeman/lib/oapi"
19+
"github.com/onkernel/hypeman/lib/resources"
1820
"github.com/samber/lo"
1921
)
2022

@@ -82,6 +84,23 @@ func (s *ApiService) CreateInstance(ctx context.Context, request oapi.CreateInst
8284
overlaySize = int64(overlayBytes)
8385
}
8486

87+
// Parse disk_io_bps (0 = auto/unlimited)
88+
diskIOBps := int64(0)
89+
if request.Body.DiskIoBps != nil && *request.Body.DiskIoBps != "" {
90+
var ioBpsBytes datasize.ByteSize
91+
// Remove "/s" suffix if present
92+
ioStr := *request.Body.DiskIoBps
93+
ioStr = strings.TrimSuffix(ioStr, "/s")
94+
ioStr = strings.TrimSuffix(ioStr, "ps")
95+
if err := ioBpsBytes.UnmarshalText([]byte(ioStr)); err != nil {
96+
return oapi.CreateInstance400JSONResponse{
97+
Code: "invalid_disk_io_bps",
98+
Message: fmt.Sprintf("invalid disk_io_bps format: %v", err),
99+
}, nil
100+
}
101+
diskIOBps = int64(ioBpsBytes)
102+
}
103+
85104
vcpus := 2
86105
if request.Body.Vcpus != nil {
87106
vcpus = *request.Body.Vcpus
@@ -98,6 +117,33 @@ func (s *ApiService) CreateInstance(ctx context.Context, request oapi.CreateInst
98117
networkEnabled = *request.Body.Network.Enabled
99118
}
100119

120+
// Parse network bandwidth limits (0 = auto)
121+
// Supports both bit-based (e.g., "1Gbps") and byte-based (e.g., "125MB/s") formats
122+
var networkBandwidthDownload int64
123+
var networkBandwidthUpload int64
124+
if request.Body.Network != nil {
125+
if request.Body.Network.BandwidthDownload != nil && *request.Body.Network.BandwidthDownload != "" {
126+
bw, err := resources.ParseBandwidth(*request.Body.Network.BandwidthDownload)
127+
if err != nil {
128+
return oapi.CreateInstance400JSONResponse{
129+
Code: "invalid_bandwidth_download",
130+
Message: fmt.Sprintf("invalid bandwidth_download format: %v", err),
131+
}, nil
132+
}
133+
networkBandwidthDownload = bw
134+
}
135+
if request.Body.Network.BandwidthUpload != nil && *request.Body.Network.BandwidthUpload != "" {
136+
bw, err := resources.ParseBandwidth(*request.Body.Network.BandwidthUpload)
137+
if err != nil {
138+
return oapi.CreateInstance400JSONResponse{
139+
Code: "invalid_bandwidth_upload",
140+
Message: fmt.Sprintf("invalid bandwidth_upload format: %v", err),
141+
}, nil
142+
}
143+
networkBandwidthUpload = bw
144+
}
145+
}
146+
101147
// Parse devices (GPU passthrough)
102148
var deviceRefs []string
103149
if request.Body.Devices != nil {
@@ -144,18 +190,36 @@ func (s *ApiService) CreateInstance(ctx context.Context, request oapi.CreateInst
144190
hvType = hypervisor.Type(*request.Body.Hypervisor)
145191
}
146192

193+
// Calculate default resource limits when not specified (0 = auto)
194+
// Uses proportional allocation based on CPU: (vcpus / cpuCapacity) * resourceCapacity
195+
if diskIOBps == 0 {
196+
diskIOBps, _ = s.ResourceManager.DefaultDiskIOBandwidth(vcpus)
197+
}
198+
if networkBandwidthDownload == 0 || networkBandwidthUpload == 0 {
199+
defaultDown, defaultUp := s.ResourceManager.DefaultNetworkBandwidth(vcpus)
200+
if networkBandwidthDownload == 0 {
201+
networkBandwidthDownload = defaultDown
202+
}
203+
if networkBandwidthUpload == 0 {
204+
networkBandwidthUpload = defaultUp
205+
}
206+
}
207+
147208
domainReq := instances.CreateInstanceRequest{
148-
Name: request.Body.Name,
149-
Image: request.Body.Image,
150-
Size: size,
151-
HotplugSize: hotplugSize,
152-
OverlaySize: overlaySize,
153-
Vcpus: vcpus,
154-
Env: env,
155-
NetworkEnabled: networkEnabled,
156-
Devices: deviceRefs,
157-
Volumes: volumes,
158-
Hypervisor: hvType,
209+
Name: request.Body.Name,
210+
Image: request.Body.Image,
211+
Size: size,
212+
HotplugSize: hotplugSize,
213+
OverlaySize: overlaySize,
214+
Vcpus: vcpus,
215+
DiskIOBps: diskIOBps,
216+
NetworkBandwidthDownload: networkBandwidthDownload,
217+
NetworkBandwidthUpload: networkBandwidthUpload,
218+
Env: env,
219+
NetworkEnabled: networkEnabled,
220+
Devices: deviceRefs,
221+
Volumes: volumes,
222+
Hypervisor: hvType,
159223
}
160224

161225
inst, err := s.InstanceManager.CreateInstance(ctx, domainReq)
@@ -539,14 +603,29 @@ func instanceToOAPI(inst instances.Instance) oapi.Instance {
539603
hotplugSizeStr := datasize.ByteSize(inst.HotplugSize).HR()
540604
overlaySizeStr := datasize.ByteSize(inst.OverlaySize).HR()
541605

542-
// Build network object with ip/mac nested inside
606+
// Format bandwidth as human-readable (bytes/s to rate string)
607+
var downloadBwStr, uploadBwStr *string
608+
if inst.NetworkBandwidthDownload > 0 {
609+
s := datasize.ByteSize(inst.NetworkBandwidthDownload).HR() + "/s"
610+
downloadBwStr = &s
611+
}
612+
if inst.NetworkBandwidthUpload > 0 {
613+
s := datasize.ByteSize(inst.NetworkBandwidthUpload).HR() + "/s"
614+
uploadBwStr = &s
615+
}
616+
617+
// Build network object with ip/mac and bandwidth nested inside
543618
netObj := &struct {
544-
Enabled *bool `json:"enabled,omitempty"`
545-
Ip *string `json:"ip"`
546-
Mac *string `json:"mac"`
547-
Name *string `json:"name,omitempty"`
619+
BandwidthDownload *string `json:"bandwidth_download,omitempty"`
620+
BandwidthUpload *string `json:"bandwidth_upload,omitempty"`
621+
Enabled *bool `json:"enabled,omitempty"`
622+
Ip *string `json:"ip"`
623+
Mac *string `json:"mac"`
624+
Name *string `json:"name,omitempty"`
548625
}{
549-
Enabled: lo.ToPtr(inst.NetworkEnabled),
626+
Enabled: lo.ToPtr(inst.NetworkEnabled),
627+
BandwidthDownload: downloadBwStr,
628+
BandwidthUpload: uploadBwStr,
550629
}
551630
if inst.NetworkEnabled {
552631
netObj.Name = lo.ToPtr("default")
@@ -557,6 +636,13 @@ func instanceToOAPI(inst instances.Instance) oapi.Instance {
557636
// Convert hypervisor type
558637
hvType := oapi.InstanceHypervisor(inst.HypervisorType)
559638

639+
// Format disk I/O as human-readable
640+
var diskIoBpsStr *string
641+
if inst.DiskIOBps > 0 {
642+
s := datasize.ByteSize(inst.DiskIOBps).HR() + "/s"
643+
diskIoBpsStr = &s
644+
}
645+
560646
oapiInst := oapi.Instance{
561647
Id: inst.Id,
562648
Name: inst.Name,
@@ -567,6 +653,7 @@ func instanceToOAPI(inst instances.Instance) oapi.Instance {
567653
HotplugSize: lo.ToPtr(hotplugSizeStr),
568654
OverlaySize: lo.ToPtr(overlaySizeStr),
569655
Vcpus: lo.ToPtr(inst.Vcpus),
656+
DiskIoBps: diskIoBpsStr,
570657
Network: netObj,
571658
CreatedAt: inst.CreatedAt,
572659
StartedAt: inst.StartedAt,

cmd/api/api/instances_test.go

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,9 @@ func TestCreateInstance_ParsesHumanReadableSizes(t *testing.T) {
6565
HotplugSize: &hotplugSize,
6666
OverlaySize: &overlaySize,
6767
Network: &struct {
68-
Enabled *bool `json:"enabled,omitempty"`
68+
BandwidthDownload *string `json:"bandwidth_download,omitempty"`
69+
BandwidthUpload *string `json:"bandwidth_upload,omitempty"`
70+
Enabled *bool `json:"enabled,omitempty"`
6971
}{
7072
Enabled: &networkEnabled,
7173
},
@@ -109,7 +111,9 @@ func TestCreateInstance_InvalidSizeFormat(t *testing.T) {
109111
Image: "docker.io/library/alpine:latest",
110112
Size: &invalidSize,
111113
Network: &struct {
112-
Enabled *bool `json:"enabled,omitempty"`
114+
BandwidthDownload *string `json:"bandwidth_download,omitempty"`
115+
BandwidthUpload *string `json:"bandwidth_upload,omitempty"`
116+
Enabled *bool `json:"enabled,omitempty"`
113117
}{
114118
Enabled: &networkEnabled,
115119
},
@@ -150,7 +154,9 @@ func TestInstanceLifecycle_StopStart(t *testing.T) {
150154
Name: "test-lifecycle",
151155
Image: "docker.io/library/nginx:alpine",
152156
Network: &struct {
153-
Enabled *bool `json:"enabled,omitempty"`
157+
BandwidthDownload *string `json:"bandwidth_download,omitempty"`
158+
BandwidthUpload *string `json:"bandwidth_upload,omitempty"`
159+
Enabled *bool `json:"enabled,omitempty"`
154160
}{
155161
Enabled: &networkEnabled,
156162
},
@@ -208,11 +214,11 @@ func waitForState(t *testing.T, svc *ApiService, instanceID string, expectedStat
208214
inst, err := svc.InstanceManager.GetInstance(ctx(), instanceID)
209215
require.NoError(t, err)
210216

211-
if string(inst.State) == expectedState {
212-
t.Logf("Instance reached %s state", expectedState)
213-
return
214-
}
215-
t.Logf("Instance state: %s (waiting for %s)", inst.State, expectedState)
217+
if string(inst.State) == expectedState {
218+
t.Logf("Instance reached %s state", expectedState)
219+
return
220+
}
221+
t.Logf("Instance state: %s (waiting for %s)", inst.State, expectedState)
216222
time.Sleep(100 * time.Millisecond)
217223
}
218224
t.Fatalf("Timeout waiting for instance to reach %s state", expectedState)

cmd/api/api/registry_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,9 @@ func TestRegistryPushAndCreateInstance(t *testing.T) {
135135
Name: "test-pushed-image",
136136
Image: imageName,
137137
Network: &struct {
138-
Enabled *bool `json:"enabled,omitempty"`
138+
BandwidthDownload *string `json:"bandwidth_download,omitempty"`
139+
BandwidthUpload *string `json:"bandwidth_upload,omitempty"`
140+
Enabled *bool `json:"enabled,omitempty"`
139141
}{
140142
Enabled: &networkEnabled,
141143
},

0 commit comments

Comments
 (0)