Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 6 additions & 8 deletions .github/workflows/helm-chart-smoketest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -158,13 +158,11 @@ jobs:
- name: label nodes
run: kubectl label node --all spin=true

# MicroK8s runs directly on the host, so both the host's containerd process and MicroK8s' would
# otherwise be detected by runtime-class-manager. As of writing, rcm will fail if more than one
# containerd process is detected when attempting to restart. So, we stop the host process until
# the shim has been installed and the test app has been confirmed to run.
- name: stop system containerd
if: matrix.config.type == 'microk8s'
run: sudo systemctl stop containerd
- name: verify only one installer pod with Succeeded status
# TODO: provisioning on k3d still leads to the first installer pod finishing with provisioner status Unknown and phase Failed
if: matrix.config.type != 'k3d'
run: |
timeout 60s bash -c 'until [[ "$(kubectl -n rcm get $(kubectl get pods -n rcm --no-headers -o name | grep install | head -n1) -o jsonpath="{.status.phase}" 2>/dev/null)" == "Succeeded" ]]; do sleep 2; done'

- name: run Spin App
run: |
Expand All @@ -186,7 +184,7 @@ jobs:
kubectl describe runtimeclass wasmtime-spin-v2

# Get install pod logs
# Note: there may be multiple pods pending fix in https://github.com/spinkube/runtime-class-manager/issues/140
# Note: there may be multiple pods pending k3d fix for issue https://github.com/spinkube/runtime-class-manager/issues/140
install_pod=$(kubectl get pods -n rcm --no-headers -o name | awk '{if ($1 ~ "-spin-v2-install") print $0}' | tail -n 1)
kubectl describe -n rcm $install_pod || true
kubectl logs -n rcm -c downloader $install_pod || true
Expand Down
3 changes: 2 additions & 1 deletion images/installer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ COPY . .
RUN CGO_ENABLED=0 go build -o rcm-node-installer ./cmd/node-installer
RUN /app/rcm-node-installer -h

FROM scratch
# Using busybox instead of scratch so that the nsenter utility is present, as used in restarter logic
FROM busybox:1.37
COPY --from=builder /app/rcm-node-installer /rcm-node-installer

ENTRYPOINT ["/rcm-node-installer"]
128 changes: 116 additions & 12 deletions internal/containerd/restart_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,35 +22,139 @@ package containerd
import (
"fmt"
"log/slog"
"os"
"os/exec"
"regexp"
"syscall"

"github.com/mitchellh/go-ps"
)

var psProcesses = ps.Processes

type restarter struct{}
type defaultRestarter struct{}

func NewRestarter() Restarter {
return restarter{}
func NewDefaultRestarter() Restarter {
return defaultRestarter{}
}

func (c restarter) Restart() error {
pid, err := getPid()
func (c defaultRestarter) Restart() error {
// If listing systemd units succeeds, prefer systemctl restart; otherwise kill pid
if _, err := listSystemdUnits(); err == nil {
out, err := nsenterCmd("systemctl", "restart", "containerd").CombinedOutput()
slog.Debug(string(out))
if err != nil {
return fmt.Errorf("unable to restart containerd: %w", err)
}
} else {
pid, err := getPid("containerd")
if err != nil {
return err
}
slog.Debug("found containerd process", "pid", pid)

err = syscall.Kill(pid, syscall.SIGHUP)
if err != nil {
return fmt.Errorf("failed to send SIGHUP to containerd: %w", err)
}
}

return nil
}

type K0sRestarter struct{}

func (c K0sRestarter) Restart() error {
// First, collect systemd units to determine which mode k0s is running in, eg
// k0sworker or k0scontroller
units, err := listSystemdUnits()
if err != nil {
return fmt.Errorf("unable to list systemd units: %w", err)
}
service := regexp.MustCompile("k0sworker|k0scontroller").FindString(string(units))

out, err := nsenterCmd("systemctl", "restart", service).CombinedOutput()
slog.Debug(string(out))
if err != nil {
return fmt.Errorf("unable to restart %s: %w", service, err)
}

return nil
}

type K3sRestarter struct{}

func (c K3sRestarter) Restart() error {
// This restarter will be used both for stock K3s distros, which use systemd as well as K3d, which does not.

// If listing systemd units succeeds, prefer systemctl restart; otherwise kill pid
if _, err := listSystemdUnits(); err == nil {
out, err := nsenterCmd("systemctl", "restart", "k3s").CombinedOutput()
slog.Debug(string(out))
if err != nil {
return fmt.Errorf("unable to restart k3s: %w", err)
}
} else {
// TODO: this approach still leads to the behavior mentioned in https://github.com/spinframework/runtime-class-manager/issues/140:
// The first pod's provisioner container exits with code 255, leading to pod status Unknown,
// followed by the subsequent pod's provisioner container no-op-ing and finishing with status Completed.
pid, err := getPid("k3s")
if err != nil {
return err
}
slog.Debug("found k3s process", "pid", pid)

err = syscall.Kill(pid, syscall.SIGHUP)
if err != nil {
return fmt.Errorf("failed to send SIGHUP to k3s: %w", err)
}
}

return nil
}

type MicroK8sRestarter struct{}

func (c MicroK8sRestarter) Restart() error {
out, err := nsenterCmd("systemctl", "restart", "snap.microk8s.daemon-containerd").CombinedOutput()
slog.Debug(string(out))
if err != nil {
return err
return fmt.Errorf("unable to restart snap.microk8s.daemon-containerd: %w", err)
}
slog.Debug("found containerd process", "pid", pid)

err = syscall.Kill(pid, syscall.SIGHUP)
return nil
}

type RKE2Restarter struct{}

func (c RKE2Restarter) Restart() error {
// First, collect systemd units to determine which mode rke2 is running in, eg
// rke2-agent or rke2-server
units, err := listSystemdUnits()
if err != nil {
return fmt.Errorf("unable to list systemd units: %w", err)
}
service := regexp.MustCompile("rke2-agent|rke2-server").FindString(string(units))

out, err := nsenterCmd("systemctl", "restart", service).CombinedOutput()
slog.Debug(string(out))
if err != nil {
return fmt.Errorf("failed to send SIGHUP to containerd: %w", err)
return fmt.Errorf("unable to restart %s: %w", service, err)
}

return nil
}

func getPid() (int, error) {
func listSystemdUnits() ([]byte, error) {
return nsenterCmd("systemctl", "list-units", "--type", "service").CombinedOutput()
}

func nsenterCmd(cmd ...string) *exec.Cmd {
return exec.Command("nsenter",
append([]string{fmt.Sprintf("-m/%s/proc/1/ns/mnt", os.Getenv("HOST_ROOT")), "--"}, cmd...)...) // #nosec G204
}

func getPid(executable string) (int, error) {
processes, err := psProcesses()
if err != nil {
return 0, fmt.Errorf("could not get processes: %w", err)
Expand All @@ -59,13 +163,13 @@ func getPid() (int, error) {
var containerdProcesses = []ps.Process{}

for _, process := range processes {
if process.Executable() == "containerd" {
if process.Executable() == executable {
containerdProcesses = append(containerdProcesses, process)
}
}

if len(containerdProcesses) != 1 {
return 0, fmt.Errorf("need exactly one containerd process, found: %d", len(containerdProcesses))
return 0, fmt.Errorf("need exactly one %s process, found: %d", executable, len(containerdProcesses))
}

return containerdProcesses[0].Pid(), nil
Expand Down
2 changes: 1 addition & 1 deletion internal/containerd/restart_unix_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func Test_getPid(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
psProcesses = tt.psProccessesMock
got, err := getPid()
got, err := getPid("containerd")

if tt.wantErr {
require.Error(t, err)
Expand Down
15 changes: 12 additions & 3 deletions internal/preset/preset.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ type Env struct {
var Default = Settings{
ConfigPath: "/etc/containerd/config.toml",
Setup: func(_ Env) error { return nil },
Restarter: containerd.NewRestarter(),
Restarter: containerd.NewDefaultRestarter(),
}

func (s Settings) WithConfigPath(path string) Settings {
Expand All @@ -37,9 +37,16 @@ func (s Settings) WithSetup(setup func(env Env) error) Settings {
return s
}

var MicroK8s = Default.WithConfigPath("/var/snap/microk8s/current/args/containerd-template.toml")
func (s Settings) WithRestarter(restarter containerd.Restarter) Settings {
s.Restarter = restarter
return s
}

var MicroK8s = Default.WithConfigPath("/var/snap/microk8s/current/args/containerd-template.toml").
WithRestarter(containerd.MicroK8sRestarter{})

var RKE2 = Default.WithConfigPath("/var/lib/rancher/rke2/agent/etc/containerd/config.toml.tmpl").
WithRestarter(containerd.RKE2Restarter{}).
WithSetup(func(env Env) error {
_, err := env.HostFs.Stat(env.ConfigPath)
if err == nil {
Expand Down Expand Up @@ -75,9 +82,11 @@ var RKE2 = Default.WithConfigPath("/var/lib/rancher/rke2/agent/etc/containerd/co
return err
})

var K3s = RKE2.WithConfigPath("/var/lib/rancher/k3s/agent/etc/containerd/config.toml.tmpl")
var K3s = RKE2.WithConfigPath("/var/lib/rancher/k3s/agent/etc/containerd/config.toml.tmpl").
WithRestarter(containerd.K3sRestarter{})

var K0s = Default.WithConfigPath("/etc/k0s/containerd.d/config.toml").
WithRestarter(containerd.K0sRestarter{}).
WithSetup(func(env Env) error {
_, err := env.HostFs.Stat(env.ConfigPath)
if err == nil {
Expand Down
Loading