diff --git a/packages/orchestrator/cmd/smoketest/smoke_test.go b/packages/orchestrator/cmd/smoketest/smoke_test.go index 27d00764c7..d3b2b6df1a 100644 --- a/packages/orchestrator/cmd/smoketest/smoke_test.go +++ b/packages/orchestrator/cmd/smoketest/smoke_test.go @@ -8,6 +8,7 @@ import ( "os" "os/exec" "path/filepath" + "runtime" "strings" "testing" "time" @@ -285,7 +286,7 @@ func findOrBuildEnvd(t *testing.T) string { cmd := exec.CommandContext(t.Context(), "go", "build", "-o", binPath, ".") //nolint:gosec // trusted input cmd.Dir = envdDir - cmd.Env = append(os.Environ(), "CGO_ENABLED=0", "GOOS=linux", "GOARCH=amd64") + cmd.Env = append(os.Environ(), "CGO_ENABLED=0", "GOOS=linux", "GOARCH="+runtime.GOARCH) out, err := cmd.CombinedOutput() if err != nil { t.Skipf("failed to build envd: %v\n%s", err, out) diff --git a/packages/orchestrator/pkg/sandbox/fc/client.go b/packages/orchestrator/pkg/sandbox/fc/client.go index 1af2db0ec8..46b82f5722 100644 --- a/packages/orchestrator/pkg/sandbox/fc/client.go +++ b/packages/orchestrator/pkg/sandbox/fc/client.go @@ -3,6 +3,7 @@ package fc import ( "context" "fmt" + "runtime" "github.com/bits-and-blooms/bitset" "github.com/firecracker-microvm/firecracker-go-sdk" @@ -18,6 +19,8 @@ import ( "github.com/e2b-dev/infra/packages/shared/pkg/utils" ) +const archARM64 = "arm64" + type apiClient struct { client *client.Firecracker } @@ -326,7 +329,14 @@ func (c *apiClient) setMachineConfig( memoryMB int64, hugePages bool, ) error { - smt := true + // SMT (Simultaneous Multi-Threading / Hyper-Threading) must be disabled on + // ARM64 because ARM processors use a different core topology (big.LITTLE, + // efficiency/performance cores) rather than hardware threads per core. + // Firecracker validates this against the host CPU and rejects SMT=true on ARM. + // See: https://github.com/firecracker-microvm/firecracker/blob/main/docs/cpu_templates/cpu-features.md + // We use runtime.GOARCH (not TARGET_ARCH) because the orchestrator binary + // always runs on the same architecture as Firecracker. + smt := runtime.GOARCH != archARM64 trackDirtyPages := false machineConfig := &models.MachineConfiguration{ VcpuCount: &vCPUCount, diff --git a/packages/orchestrator/pkg/sandbox/uffd/testutils/page_mmap.go b/packages/orchestrator/pkg/sandbox/uffd/testutils/page_mmap.go index 929a396702..ac17b0788f 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/testutils/page_mmap.go +++ b/packages/orchestrator/pkg/sandbox/uffd/testutils/page_mmap.go @@ -1,6 +1,7 @@ package testutils import ( + "errors" "fmt" "math" "syscall" @@ -20,7 +21,16 @@ func NewPageMmap(t *testing.T, size, pagesize uint64) ([]byte, uintptr, error) { } if pagesize == header.HugepageSize { - return newMmap(t, size, header.HugepageSize, unix.MAP_HUGETLB|unix.MAP_HUGE_2MB) + b, addr, err := newMmap(t, size, header.HugepageSize, unix.MAP_HUGETLB|unix.MAP_HUGE_2MB) + // Hugepage allocation can fail with ENOMEM on CI runners that don't + // have enough (or any) hugepages pre-allocated in /proc/sys/vm/nr_hugepages. + // Skip gracefully rather than failing the test. + if err != nil && errors.Is(err, syscall.ENOMEM) { + pages := int(math.Ceil(float64(size) / float64(header.HugepageSize))) + t.Skipf("skipping: hugepage mmap failed (need %d hugepages): %v", pages, err) + } + + return b, addr, err } return nil, 0, fmt.Errorf("unsupported page size: %d", pagesize) diff --git a/packages/orchestrator/pkg/service/machineinfo/main.go b/packages/orchestrator/pkg/service/machineinfo/main.go index 27d280da8b..0934880df7 100644 --- a/packages/orchestrator/pkg/service/machineinfo/main.go +++ b/packages/orchestrator/pkg/service/machineinfo/main.go @@ -22,13 +22,31 @@ func Detect() (MachineInfo, error) { } if len(info) > 0 { - if info[0].Family == "" || info[0].Model == "" { + family := info[0].Family + model := info[0].Model + + // On ARM64, gopsutil doesn't populate Family/Model from /proc/cpuinfo. + // Provide fallback values so callers don't get an error. + // NOTE: Using a generic "arm64" family treats all ARM64 CPUs as compatible. + // This works for same-host snapshot restore but cross-host restore between + // different ARM CPU implementations (e.g. Graviton2 vs Graviton3) may fail. + // For finer granularity, consider using MIDR_EL1 register values. + if runtime.GOARCH == "arm64" { + if family == "" { + family = "arm64" + } + if model == "" { + model = "0" + } + } + + if family == "" || model == "" { return MachineInfo{}, fmt.Errorf("unable to detect CPU platform from CPU info: %+v", info[0]) } return MachineInfo{ - Family: info[0].Family, - Model: info[0].Model, + Family: family, + Model: model, ModelName: info[0].ModelName, Flags: info[0].Flags, Arch: runtime.GOARCH,