From 53b7f9bfb8020513b13be77bc0817d9ce585fa55 Mon Sep 17 00:00:00 2001 From: Vaughn Dice Date: Mon, 5 May 2025 11:27:09 -0600 Subject: [PATCH 1/4] feat(installer/Dockerfile): switch to busybox base image Signed-off-by: Vaughn Dice --- images/installer/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/images/installer/Dockerfile b/images/installer/Dockerfile index f9b1fb97..151cbcdf 100644 --- a/images/installer/Dockerfile +++ b/images/installer/Dockerfile @@ -10,7 +10,8 @@ COPY . . RUN CGO_ENABLED=0 go build -o rcm-node-installer ./cmd/node-installer RUN /app/rcm-node-installer -h -FROM scratch +# Using busybox instead of scratch so that the nsenter utility is present, as used in restarter logic +FROM busybox COPY --from=builder /app/rcm-node-installer /rcm-node-installer ENTRYPOINT ["/rcm-node-installer"] From 872704f34a6b4632424a95850c3e42fa23d4a73d Mon Sep 17 00:00:00 2001 From: Vaughn Dice Date: Mon, 5 May 2025 11:27:37 -0600 Subject: [PATCH 2/4] feat(restarter): add k8s distro-specific restarters Signed-off-by: Vaughn Dice --- internal/containerd/restart_unix.go | 135 +++++++++++++++++++++++++--- internal/preset/preset.go | 15 +++- 2 files changed, 135 insertions(+), 15 deletions(-) diff --git a/internal/containerd/restart_unix.go b/internal/containerd/restart_unix.go index d13e42ba..3b8e2ba3 100644 --- a/internal/containerd/restart_unix.go +++ b/internal/containerd/restart_unix.go @@ -22,6 +22,9 @@ package containerd import ( "fmt" "log/slog" + "os" + "os/exec" + "regexp" "syscall" "github.com/mitchellh/go-ps" @@ -29,28 +32,136 @@ import ( var psProcesses = ps.Processes -type restarter struct{} +type defaultRestarter struct{} -func NewRestarter() Restarter { - return restarter{} +func NewDefaultRestarter() Restarter { + return defaultRestarter{} } -func (c restarter) Restart() error { - pid, err := getPid() +func (c defaultRestarter) Restart() error { + // If systemctl exists, use that, otherwise go pid + if UsesSystemd() { + out, err := nsenterCmd("systemctl", "restart", "containerd").CombinedOutput() + slog.Debug(string(out)) + if err != nil { + return fmt.Errorf("unable to restart containerd: %w", err) + } + } else { + pid, err := getPid("containerd") + if err != nil { + return err + } + slog.Debug("found containerd process", "pid", pid) + + err = syscall.Kill(pid, syscall.SIGHUP) + if err != nil { + return fmt.Errorf("failed to send SIGHUP to containerd: %w", err) + } + } + + return nil +} + +type K0sRestarter struct{} + +func (c K0sRestarter) Restart() error { + // First, collect systemd units to determine which mode k0s is running in, eg + // k0sworker or k0scontroller + units, err := nsenterCmd("systemctl", "list-units").CombinedOutput() + if err != nil { + return fmt.Errorf("unable to list systemd units: %w", err) + } + service := regexp.MustCompile("k0sworker|k0scontroller").FindString(string(units)) + + out, err := nsenterCmd("systemctl", "restart", service).CombinedOutput() + slog.Debug(string(out)) + if err != nil { + return fmt.Errorf("unable to restart %s: %w", service, err) + } + + return nil +} + +type K3sRestarter struct{} + +func (c K3sRestarter) Restart() error { + // This restarter will be used both for stock K3s distros + // using systemd as well as K3d, which does not. + if UsesSystemd() { + out, err := nsenterCmd("systemctl", "restart", "k3s").CombinedOutput() + slog.Debug(string(out)) + if err != nil { + return fmt.Errorf("unable to restart k3s: %w", err) + } + } else { + // TODO: this approach still leads to the behavior mentioned in https://github.com/spinframework/runtime-class-manager/issues/140: + // The first pod's provisioner container exits with code 255, leading to pod status Unknown, + // followed by the subsequent pod's provisioner container no-op-ing and finishing with status Completed. + pid, err := getPid("k3s") + if err != nil { + return err + } + slog.Debug("found k3s process", "pid", pid) + + err = syscall.Kill(pid, syscall.SIGHUP) + if err != nil { + return fmt.Errorf("failed to send SIGHUP to k3s: %w", err) + } + } + + return nil +} + +type MicroK8sRestarter struct{} + +func (c MicroK8sRestarter) Restart() error { + out, err := nsenterCmd("systemctl", "restart", "snap.microk8s.daemon-containerd").CombinedOutput() + slog.Debug(string(out)) if err != nil { - return err + return fmt.Errorf("unable to restart snap.microk8s.daemon-containerd: %w", err) } - slog.Debug("found containerd process", "pid", pid) - err = syscall.Kill(pid, syscall.SIGHUP) + return nil +} +type RKE2Restarter struct{} + +func (c RKE2Restarter) Restart() error { + // First, collect systemd units to determine which mode rke2 is running in, eg + // rke2-agent or rke2-server + units, err := nsenterCmd("systemctl", "list-units").CombinedOutput() if err != nil { - return fmt.Errorf("failed to send SIGHUP to containerd: %w", err) + return fmt.Errorf("unable to list systemd units: %w", err) } + service := regexp.MustCompile("rke2-agent|rke2-server").FindString(string(units)) + + out, err := nsenterCmd("systemctl", "restart", service).CombinedOutput() + slog.Debug(string(out)) + if err != nil { + return fmt.Errorf("unable to restart %s: %w", service, err) + } + return nil } -func getPid() (int, error) { +// TODO: lifted and amended from https://github.com/spinframework/runtime-class-manager/pull/387 +// +// UsesSystemd checks if the system is using systemd +func UsesSystemd() bool { + cmd := nsenterCmd("systemctl", "list-units", "|", "grep", "-q", "containerd.service") + if err := cmd.Run(); err != nil { + slog.Debug("Error with systemctl: \n", "error", err) + return false + } + return true +} + +func nsenterCmd(cmd ...string) *exec.Cmd { + return exec.Command("nsenter", + append([]string{fmt.Sprintf("-m/%s/proc/1/ns/mnt", os.Getenv("HOST_ROOT")), "--"}, cmd...)...) // #nosec G204 +} + +func getPid(executable string) (int, error) { processes, err := psProcesses() if err != nil { return 0, fmt.Errorf("could not get processes: %w", err) @@ -59,13 +170,13 @@ func getPid() (int, error) { var containerdProcesses = []ps.Process{} for _, process := range processes { - if process.Executable() == "containerd" { + if process.Executable() == executable { containerdProcesses = append(containerdProcesses, process) } } if len(containerdProcesses) != 1 { - return 0, fmt.Errorf("need exactly one containerd process, found: %d", len(containerdProcesses)) + return 0, fmt.Errorf("need exactly one %s process, found: %d", executable, len(containerdProcesses)) } return containerdProcesses[0].Pid(), nil diff --git a/internal/preset/preset.go b/internal/preset/preset.go index 7ab9b454..a4a703b7 100644 --- a/internal/preset/preset.go +++ b/internal/preset/preset.go @@ -24,7 +24,7 @@ type Env struct { var Default = Settings{ ConfigPath: "/etc/containerd/config.toml", Setup: func(_ Env) error { return nil }, - Restarter: containerd.NewRestarter(), + Restarter: containerd.NewDefaultRestarter(), } func (s Settings) WithConfigPath(path string) Settings { @@ -37,9 +37,16 @@ func (s Settings) WithSetup(setup func(env Env) error) Settings { return s } -var MicroK8s = Default.WithConfigPath("/var/snap/microk8s/current/args/containerd-template.toml") +func (s Settings) WithRestarter(restarter containerd.Restarter) Settings { + s.Restarter = restarter + return s +} + +var MicroK8s = Default.WithConfigPath("/var/snap/microk8s/current/args/containerd-template.toml"). + WithRestarter(containerd.MicroK8sRestarter{}) var RKE2 = Default.WithConfigPath("/var/lib/rancher/rke2/agent/etc/containerd/config.toml.tmpl"). + WithRestarter(containerd.RKE2Restarter{}). WithSetup(func(env Env) error { _, err := env.HostFs.Stat(env.ConfigPath) if err == nil { @@ -75,9 +82,11 @@ var RKE2 = Default.WithConfigPath("/var/lib/rancher/rke2/agent/etc/containerd/co return err }) -var K3s = RKE2.WithConfigPath("/var/lib/rancher/k3s/agent/etc/containerd/config.toml.tmpl") +var K3s = RKE2.WithConfigPath("/var/lib/rancher/k3s/agent/etc/containerd/config.toml.tmpl"). + WithRestarter(containerd.K3sRestarter{}) var K0s = Default.WithConfigPath("/etc/k0s/containerd.d/config.toml"). + WithRestarter(containerd.K0sRestarter{}). WithSetup(func(env Env) error { _, err := env.HostFs.Stat(env.ConfigPath) if err == nil { From 17531e801fd249ea014708004a7d2bc9bce5a631 Mon Sep 17 00:00:00 2001 From: Vaughn Dice Date: Thu, 8 May 2025 17:17:28 -0600 Subject: [PATCH 3/4] ci(.github): add installer pod status check to helm smoke test Signed-off-by: Vaughn Dice --- .github/workflows/helm-chart-smoketest.yml | 14 ++++++-------- internal/containerd/restart_unix_test.go | 2 +- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/.github/workflows/helm-chart-smoketest.yml b/.github/workflows/helm-chart-smoketest.yml index 51ff6050..3d451a1b 100644 --- a/.github/workflows/helm-chart-smoketest.yml +++ b/.github/workflows/helm-chart-smoketest.yml @@ -158,13 +158,11 @@ jobs: - name: label nodes run: kubectl label node --all spin=true - # MicroK8s runs directly on the host, so both the host's containerd process and MicroK8s' would - # otherwise be detected by runtime-class-manager. As of writing, rcm will fail if more than one - # containerd process is detected when attempting to restart. So, we stop the host process until - # the shim has been installed and the test app has been confirmed to run. - - name: stop system containerd - if: matrix.config.type == 'microk8s' - run: sudo systemctl stop containerd + - name: verify only one installer pod with Succeeded status + # TODO: provisioning on k3d still leads to the first installer pod finishing with provisioner status Unknown and phase Failed + if: matrix.config.type != 'k3d' + run: | + timeout 60s bash -c 'until [[ "$(kubectl -n rcm get $(kubectl get pods -n rcm --no-headers -o name | grep install | head -n1) -o jsonpath="{.status.phase}" 2>/dev/null)" == "Succeeded" ]]; do sleep 2; done' - name: run Spin App run: | @@ -186,7 +184,7 @@ jobs: kubectl describe runtimeclass wasmtime-spin-v2 # Get install pod logs - # Note: there may be multiple pods pending fix in https://github.com/spinkube/runtime-class-manager/issues/140 + # Note: there may be multiple pods pending k3d fix for issue https://github.com/spinkube/runtime-class-manager/issues/140 install_pod=$(kubectl get pods -n rcm --no-headers -o name | awk '{if ($1 ~ "-spin-v2-install") print $0}' | tail -n 1) kubectl describe -n rcm $install_pod || true kubectl logs -n rcm -c downloader $install_pod || true diff --git a/internal/containerd/restart_unix_test.go b/internal/containerd/restart_unix_test.go index 866f6847..82ba2192 100644 --- a/internal/containerd/restart_unix_test.go +++ b/internal/containerd/restart_unix_test.go @@ -57,7 +57,7 @@ func Test_getPid(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { psProcesses = tt.psProccessesMock - got, err := getPid() + got, err := getPid("containerd") if tt.wantErr { require.Error(t, err) From 7ca534a7a0504357a00bf988591d6052cb7f7e16 Mon Sep 17 00:00:00 2001 From: Vaughn Dice Date: Fri, 9 May 2025 13:08:06 -0600 Subject: [PATCH 4/4] ref(*): use listSystemdUnits as systemd check Signed-off-by: Vaughn Dice --- images/installer/Dockerfile | 2 +- internal/containerd/restart_unix.go | 27 ++++++++++----------------- 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/images/installer/Dockerfile b/images/installer/Dockerfile index 151cbcdf..856026c3 100644 --- a/images/installer/Dockerfile +++ b/images/installer/Dockerfile @@ -11,7 +11,7 @@ RUN CGO_ENABLED=0 go build -o rcm-node-installer ./cmd/node-installer RUN /app/rcm-node-installer -h # Using busybox instead of scratch so that the nsenter utility is present, as used in restarter logic -FROM busybox +FROM busybox:1.37 COPY --from=builder /app/rcm-node-installer /rcm-node-installer ENTRYPOINT ["/rcm-node-installer"] diff --git a/internal/containerd/restart_unix.go b/internal/containerd/restart_unix.go index 3b8e2ba3..ffe2550a 100644 --- a/internal/containerd/restart_unix.go +++ b/internal/containerd/restart_unix.go @@ -39,8 +39,8 @@ func NewDefaultRestarter() Restarter { } func (c defaultRestarter) Restart() error { - // If systemctl exists, use that, otherwise go pid - if UsesSystemd() { + // If listing systemd units succeeds, prefer systemctl restart; otherwise kill pid + if _, err := listSystemdUnits(); err == nil { out, err := nsenterCmd("systemctl", "restart", "containerd").CombinedOutput() slog.Debug(string(out)) if err != nil { @@ -67,7 +67,7 @@ type K0sRestarter struct{} func (c K0sRestarter) Restart() error { // First, collect systemd units to determine which mode k0s is running in, eg // k0sworker or k0scontroller - units, err := nsenterCmd("systemctl", "list-units").CombinedOutput() + units, err := listSystemdUnits() if err != nil { return fmt.Errorf("unable to list systemd units: %w", err) } @@ -85,9 +85,10 @@ func (c K0sRestarter) Restart() error { type K3sRestarter struct{} func (c K3sRestarter) Restart() error { - // This restarter will be used both for stock K3s distros - // using systemd as well as K3d, which does not. - if UsesSystemd() { + // This restarter will be used both for stock K3s distros, which use systemd as well as K3d, which does not. + + // If listing systemd units succeeds, prefer systemctl restart; otherwise kill pid + if _, err := listSystemdUnits(); err == nil { out, err := nsenterCmd("systemctl", "restart", "k3s").CombinedOutput() slog.Debug(string(out)) if err != nil { @@ -129,7 +130,7 @@ type RKE2Restarter struct{} func (c RKE2Restarter) Restart() error { // First, collect systemd units to determine which mode rke2 is running in, eg // rke2-agent or rke2-server - units, err := nsenterCmd("systemctl", "list-units").CombinedOutput() + units, err := listSystemdUnits() if err != nil { return fmt.Errorf("unable to list systemd units: %w", err) } @@ -144,16 +145,8 @@ func (c RKE2Restarter) Restart() error { return nil } -// TODO: lifted and amended from https://github.com/spinframework/runtime-class-manager/pull/387 -// -// UsesSystemd checks if the system is using systemd -func UsesSystemd() bool { - cmd := nsenterCmd("systemctl", "list-units", "|", "grep", "-q", "containerd.service") - if err := cmd.Run(); err != nil { - slog.Debug("Error with systemctl: \n", "error", err) - return false - } - return true +func listSystemdUnits() ([]byte, error) { + return nsenterCmd("systemctl", "list-units", "--type", "service").CombinedOutput() } func nsenterCmd(cmd ...string) *exec.Cmd {