Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 20 additions & 11 deletions pkg/domain/infra/abi/system_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package abi

import (
"context"
"errors"
"fmt"
"os"

Expand All @@ -14,6 +15,7 @@ import (
"go.podman.io/common/pkg/config"
"go.podman.io/common/pkg/systemd"
"go.podman.io/storage/pkg/unshare"
"golang.org/x/sys/unix"
)

// Default path for system runtime state
Expand Down Expand Up @@ -59,6 +61,8 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool,
}
}
}

// return early as we are already re-exec or root here so no need to join the rootless userns.
return nil
}

Expand All @@ -74,36 +78,41 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool,
if became {
os.Exit(ret)
}
if noMoveProcess {
return nil
}

// if there is no pid file, try to join existing containers, and create a pause process.
ctrs, err := ic.Libpod.GetRunningContainers()
if err != nil {
logrus.Error(err.Error())
os.Exit(1)
return err
}

paths := []string{}
paths := make([]string, 0, len(ctrs))
for _, ctr := range ctrs {
paths = append(paths, ctr.ConfigNoCopy().ConmonPidFile)
}

if len(paths) > 0 {
became, ret, err = rootless.TryJoinFromFilePaths(pausePidPath, paths)
// TryJoinFromFilePaths fails with ESRCH when the PID are all not valid anymore
// In this case create a new userns.
if errors.Is(err, unix.ESRCH) {
logrus.Warnf("Failed to join existing conmon namespace, creating a new rootless podman user namespace. If there are existing container running please stop them with %q to reset the namespace", os.Args[0]+" system migrate")
became, ret, err = rootless.BecomeRootInUserNS(pausePidPath)
}
} else {
logrus.Info("Creating a new rootless user namespace")
became, ret, err = rootless.BecomeRootInUserNS(pausePidPath)
if err == nil {
systemd.MovePauseProcessToScope(pausePidPath)
}
}

if err != nil {
logrus.Error(fmt.Errorf("invalid internal status, try resetting the pause process with %q: %w", os.Args[0]+" system migrate", err))
os.Exit(1)
return fmt.Errorf("fatal error, invalid internal status, unable to create a new pause process: %w. Try running %q and if that doesn't work reboot to recover", err, os.Args[0]+" system migrate")
}
if !noMoveProcess {
systemd.MovePauseProcessToScope(pausePidPath)
}
if became {
os.Exit(ret)
}

logrus.Error("Internal error, failed to re-exec podman into user namespace without error. This should never happen, if you see this please report a bug")
return nil
}
3 changes: 1 addition & 2 deletions pkg/rootless/rootless_linux.c
Original file line number Diff line number Diff line change
Expand Up @@ -384,8 +384,7 @@ can_use_shortcut (char **argv)
|| strcmp (argv[argc], "version") == 0
|| strcmp (argv[argc], "context") == 0
|| strcmp (argv[argc], "search") == 0
|| strcmp (argv[argc], "compose") == 0
|| (strcmp (argv[argc], "system") == 0 && argv[argc+1] && strcmp (argv[argc+1], "service") != 0))
|| strcmp (argv[argc], "compose") == 0)
{
ret = false;
break;
Expand Down
27 changes: 27 additions & 0 deletions test/system/550-pause-process.bats
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,30 @@ function _check_pause_process() {
# This used to hang trying to unmount the netns.
run_podman rm -f -t0 $cname
}

# regression test for https://issues.redhat.com/browse/RHEL-130252
@test "podman system migrate works with conmon being killed" {
skip_if_not_rootless "pause process is only used as rootless"
skip_if_remote "system migrate not supported via remote"

local cname=c-$(safename)
run_podman run --name $cname --stop-signal SIGKILL -d $IMAGE sleep 100

run_podman inspect --format '{{.State.ConmonPid}}' $cname
conmon_pid="$output"

# check for pause pid and then kill it
_check_pause_process
kill -9 $pause_pid

# kill conmon
kill -9 $conmon_pid

# Use podman system migrate to stop the currently running pause process
run_podman 125 system migrate
assert "$output" =~ "Failed to join existing conmon namespace" "fallback to userns creating"
assert "$output" =~ "conmon process killed"

# Now the removal command should work fine without errors.
run_podman rm $cname
}