From 5945751f1f5e23829d12a90452cede7ee223ec77 Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Thu, 11 Dec 2025 11:17:49 -0500 Subject: [PATCH 1/3] fix: allocate fresh network on start VM --- lib/instances/start.go | 49 ++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/lib/instances/start.go b/lib/instances/start.go index 149c1995..4f010bd0 100644 --- a/lib/instances/start.go +++ b/lib/instances/start.go @@ -8,6 +8,7 @@ import ( "github.com/onkernel/hypeman/lib/logger" "github.com/onkernel/hypeman/lib/network" "go.opentelemetry.io/otel/trace" + "gvisor.dev/gvisor/pkg/cleanup" ) // startInstance starts a stopped instance @@ -52,45 +53,41 @@ func (m *manager) startInstance( return nil, fmt.Errorf("get image: %w", err) } - // 4. Recreate network allocation if network enabled + // Setup cleanup stack for automatic rollback on errors + cu := cleanup.Make(func() {}) + defer cu.Clean() + + // 4. Allocate fresh network if network enabled var netConfig *network.NetworkConfig if stored.NetworkEnabled { - log.DebugContext(ctx, "recreating network for start", "instance_id", id, "network", "default") - if err := m.networkManager.RecreateAllocation(ctx, id); err != nil { - log.ErrorContext(ctx, "failed to recreate network", "instance_id", id, "error", err) - return nil, fmt.Errorf("recreate network: %w", err) - } - // Get the network config for VM configuration - netAlloc, err := m.networkManager.GetAllocation(ctx, id) + log.DebugContext(ctx, "allocating network for start", "instance_id", id, "network", "default") + netConfig, err = m.networkManager.CreateAllocation(ctx, network.AllocateRequest{ + InstanceID: id, + InstanceName: stored.Name, + }) if err != nil { - log.ErrorContext(ctx, "failed to get network allocation", "instance_id", id, "error", err) - // Cleanup network on failure - if netAlloc != nil { - m.networkManager.ReleaseAllocation(ctx, netAlloc) - } - return nil, fmt.Errorf("get network allocation: %w", err) - } - netConfig = &network.NetworkConfig{ - TAPDevice: netAlloc.TAPDevice, - IP: netAlloc.IP, - MAC: netAlloc.MAC, - Netmask: "255.255.255.0", // Default netmask + log.ErrorContext(ctx, "failed to allocate network", "instance_id", id, "error", err) + return nil, fmt.Errorf("allocate network: %w", err) } + // Add network cleanup to stack + cu.Add(func() { + m.networkManager.ReleaseAllocation(ctx, &network.Allocation{ + InstanceID: id, + TAPDevice: netConfig.TAPDevice, + }) + }) } // 5. Start VMM and boot VM (reuses logic from create) log.InfoContext(ctx, "starting VMM and booting VM", "instance_id", id) if err := m.startAndBootVM(ctx, stored, imageInfo, netConfig); err != nil { log.ErrorContext(ctx, "failed to start and boot VM", "instance_id", id, "error", err) - // Cleanup network on failure - if stored.NetworkEnabled { - if netAlloc, err := m.networkManager.GetAllocation(ctx, id); err == nil { - m.networkManager.ReleaseAllocation(ctx, netAlloc) - } - } return nil, err } + // Success - release cleanup stack (prevent cleanup) + cu.Release() + // 6. Update metadata (set PID, StartedAt) now := time.Now() stored.StartedAt = &now From dbb46ecf9c3a85d31b449028c4f57bb2a8036c60 Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Thu, 11 Dec 2025 11:28:52 -0500 Subject: [PATCH 2/3] Update config disk on start --- lib/instances/start.go | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/lib/instances/start.go b/lib/instances/start.go index 4f010bd0..a29c3ad0 100644 --- a/lib/instances/start.go +++ b/lib/instances/start.go @@ -69,6 +69,9 @@ func (m *manager) startInstance( log.ErrorContext(ctx, "failed to allocate network", "instance_id", id, "error", err) return nil, fmt.Errorf("allocate network: %w", err) } + // Update stored metadata with new IP/MAC + stored.IP = netConfig.IP + stored.MAC = netConfig.MAC // Add network cleanup to stack cu.Add(func() { m.networkManager.ReleaseAllocation(ctx, &network.Allocation{ @@ -78,7 +81,15 @@ func (m *manager) startInstance( }) } - // 5. Start VMM and boot VM (reuses logic from create) + // 5. Regenerate config disk with new network configuration + instForConfig := &Instance{StoredMetadata: *stored} + log.DebugContext(ctx, "regenerating config disk", "instance_id", id) + if err := m.createConfigDisk(instForConfig, imageInfo, netConfig); err != nil { + log.ErrorContext(ctx, "failed to create config disk", "instance_id", id, "error", err) + return nil, fmt.Errorf("create config disk: %w", err) + } + + // 6. Start VMM and boot VM (reuses logic from create) log.InfoContext(ctx, "starting VMM and booting VM", "instance_id", id) if err := m.startAndBootVM(ctx, stored, imageInfo, netConfig); err != nil { log.ErrorContext(ctx, "failed to start and boot VM", "instance_id", id, "error", err) @@ -88,7 +99,7 @@ func (m *manager) startInstance( // Success - release cleanup stack (prevent cleanup) cu.Release() - // 6. Update metadata (set PID, StartedAt) + // 7. Update metadata (set PID, StartedAt) now := time.Now() stored.StartedAt = &now From 514591fed9f0caf9118767ad66f0568c631cf6f9 Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Thu, 11 Dec 2025 11:52:05 -0500 Subject: [PATCH 3/3] Run clean in CI --- .github/workflows/test.yml | 3 +++ Makefile | 8 +++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4f9aa7da..00d46f8c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -34,6 +34,9 @@ jobs: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_PASSWORD }} + - name: Clean cached binaries + run: make clean + - name: Build run: make build diff --git a/Makefile b/Makefile index 8b06a3cf..07b2bf0c 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ SHELL := /bin/bash -.PHONY: oapi-generate generate-vmm-client generate-wire generate-all dev build test install-tools gen-jwt download-ch-binaries download-ch-spec ensure-ch-binaries build-caddy-binaries build-caddy ensure-caddy-binaries release-prep +.PHONY: oapi-generate generate-vmm-client generate-wire generate-all dev build test install-tools gen-jwt download-ch-binaries download-ch-spec ensure-ch-binaries build-caddy-binaries build-caddy ensure-caddy-binaries release-prep clean # Directory where local binaries will be installed BIN_DIR ?= $(CURDIR)/bin @@ -192,10 +192,8 @@ gen-jwt: $(GODOTENV) # Clean generated files and binaries clean: rm -rf $(BIN_DIR) - rm -f lib/oapi/oapi.go - rm -f lib/vmm/vmm.go - rm -f lib/exec/exec.pb.go - rm -f lib/exec/exec_grpc.pb.go + rm -rf lib/vmm/binaries/cloud-hypervisor/ + rm -rf lib/ingress/binaries/ rm -f lib/system/exec_agent/exec-agent # Prepare for release build (called by GoReleaser)