Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ jobs:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}

- name: Clean cached binaries
run: make clean

- name: Build
run: make build

Expand Down
8 changes: 3 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
SHELL := /bin/bash
.PHONY: oapi-generate generate-vmm-client generate-wire generate-all dev build test install-tools gen-jwt download-ch-binaries download-ch-spec ensure-ch-binaries build-caddy-binaries build-caddy ensure-caddy-binaries release-prep
.PHONY: oapi-generate generate-vmm-client generate-wire generate-all dev build test install-tools gen-jwt download-ch-binaries download-ch-spec ensure-ch-binaries build-caddy-binaries build-caddy ensure-caddy-binaries release-prep clean

# Directory where local binaries will be installed
BIN_DIR ?= $(CURDIR)/bin
Expand Down Expand Up @@ -192,10 +192,8 @@ gen-jwt: $(GODOTENV)
# Clean generated files and binaries
clean:
rm -rf $(BIN_DIR)
rm -f lib/oapi/oapi.go
rm -f lib/vmm/vmm.go
rm -f lib/exec/exec.pb.go
rm -f lib/exec/exec_grpc.pb.go
rm -rf lib/vmm/binaries/cloud-hypervisor/
rm -rf lib/ingress/binaries/
rm -f lib/system/exec_agent/exec-agent

# Prepare for release build (called by GoReleaser)
Expand Down
64 changes: 36 additions & 28 deletions lib/instances/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"github.com/onkernel/hypeman/lib/logger"
"github.com/onkernel/hypeman/lib/network"
"go.opentelemetry.io/otel/trace"
"gvisor.dev/gvisor/pkg/cleanup"
)

// startInstance starts a stopped instance
Expand Down Expand Up @@ -52,46 +53,53 @@ func (m *manager) startInstance(
return nil, fmt.Errorf("get image: %w", err)
}

// 4. Recreate network allocation if network enabled
// Setup cleanup stack for automatic rollback on errors
cu := cleanup.Make(func() {})
defer cu.Clean()

// 4. Allocate fresh network if network enabled
var netConfig *network.NetworkConfig
if stored.NetworkEnabled {
log.DebugContext(ctx, "recreating network for start", "instance_id", id, "network", "default")
if err := m.networkManager.RecreateAllocation(ctx, id); err != nil {
log.ErrorContext(ctx, "failed to recreate network", "instance_id", id, "error", err)
return nil, fmt.Errorf("recreate network: %w", err)
}
// Get the network config for VM configuration
netAlloc, err := m.networkManager.GetAllocation(ctx, id)
log.DebugContext(ctx, "allocating network for start", "instance_id", id, "network", "default")
netConfig, err = m.networkManager.CreateAllocation(ctx, network.AllocateRequest{
InstanceID: id,
InstanceName: stored.Name,
})
if err != nil {
log.ErrorContext(ctx, "failed to get network allocation", "instance_id", id, "error", err)
// Cleanup network on failure
if netAlloc != nil {
m.networkManager.ReleaseAllocation(ctx, netAlloc)
}
return nil, fmt.Errorf("get network allocation: %w", err)
}
netConfig = &network.NetworkConfig{
TAPDevice: netAlloc.TAPDevice,
IP: netAlloc.IP,
MAC: netAlloc.MAC,
Netmask: "255.255.255.0", // Default netmask
log.ErrorContext(ctx, "failed to allocate network", "instance_id", id, "error", err)
return nil, fmt.Errorf("allocate network: %w", err)
}
// Update stored metadata with new IP/MAC
stored.IP = netConfig.IP
stored.MAC = netConfig.MAC
// Add network cleanup to stack
cu.Add(func() {
m.networkManager.ReleaseAllocation(ctx, &network.Allocation{
InstanceID: id,
TAPDevice: netConfig.TAPDevice,
})
})
}

// 5. Start VMM and boot VM (reuses logic from create)
// 5. Regenerate config disk with new network configuration
instForConfig := &Instance{StoredMetadata: *stored}
log.DebugContext(ctx, "regenerating config disk", "instance_id", id)
if err := m.createConfigDisk(instForConfig, imageInfo, netConfig); err != nil {
log.ErrorContext(ctx, "failed to create config disk", "instance_id", id, "error", err)
return nil, fmt.Errorf("create config disk: %w", err)
}

// 6. Start VMM and boot VM (reuses logic from create)
log.InfoContext(ctx, "starting VMM and booting VM", "instance_id", id)
if err := m.startAndBootVM(ctx, stored, imageInfo, netConfig); err != nil {
log.ErrorContext(ctx, "failed to start and boot VM", "instance_id", id, "error", err)
// Cleanup network on failure
if stored.NetworkEnabled {
if netAlloc, err := m.networkManager.GetAllocation(ctx, id); err == nil {
m.networkManager.ReleaseAllocation(ctx, netAlloc)
}
}
return nil, err
}

// 6. Update metadata (set PID, StartedAt)
// Success - release cleanup stack (prevent cleanup)
cu.Release()

// 7. Update metadata (set PID, StartedAt)
now := time.Now()
stored.StartedAt = &now

Expand Down