From 973ab340787afdfd3ff4cfcb5438fba0c982aff9 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Tue, 25 Nov 2025 12:39:24 +0100 Subject: [PATCH 1/5] use return error handling in SetupRootless There is no good reason to use logrus and os.Exit() here, other parts of this function already return the error so do the same. The main podman process will exit then with the normal formatted error message. And also log an error about the last return which should never happen as we should have exited above if the re-exec worked or errored out. Signed-off-by: Paul Holzinger --- pkg/domain/infra/abi/system_linux.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pkg/domain/infra/abi/system_linux.go b/pkg/domain/infra/abi/system_linux.go index 7acaa9bd19f..e65cc703818 100644 --- a/pkg/domain/infra/abi/system_linux.go +++ b/pkg/domain/infra/abi/system_linux.go @@ -59,6 +59,8 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool, } } } + + // return early as we are already re-exec or root here so no need to join the rootless userns. return nil } @@ -81,8 +83,7 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool, // if there is no pid file, try to join existing containers, and create a pause process. ctrs, err := ic.Libpod.GetRunningContainers() if err != nil { - logrus.Error(err.Error()) - os.Exit(1) + return err } paths := []string{} @@ -99,11 +100,12 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool, } } if err != nil { - logrus.Error(fmt.Errorf("invalid internal status, try resetting the pause process with %q: %w", os.Args[0]+" system migrate", err)) - os.Exit(1) + return fmt.Errorf("invalid internal status, try resetting the pause process with %q: %w", os.Args[0]+" system migrate", err) } if became { os.Exit(ret) } + + logrus.Error("Internal error, failed to re-exec podman into user namespace without error. This should never happen, if you see this please report a bug") return nil } From 6a9ce66e5c7b40593c0d3e83253bf6f3e68cef4a Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Tue, 25 Nov 2025 12:47:19 +0100 Subject: [PATCH 2/5] fix noMoveProcess in SetupRootless Based on the description in commit 63ef557 this was added so that the migrate command does not move the pause process into a separate cgroup. It should however not disable the rejoining of the userns when the pause process join failed. BEcause of this we end up calling migrate without a userns and that then can fail if there are actual contianer it tries to cleanup. Fixes: 63ef5576ed ("command: migrate doesn't move process to cgroup") Signed-off-by: Paul Holzinger --- pkg/domain/infra/abi/system_linux.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pkg/domain/infra/abi/system_linux.go b/pkg/domain/infra/abi/system_linux.go index e65cc703818..8829e997ae7 100644 --- a/pkg/domain/infra/abi/system_linux.go +++ b/pkg/domain/infra/abi/system_linux.go @@ -76,9 +76,6 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool, if became { os.Exit(ret) } - if noMoveProcess { - return nil - } // if there is no pid file, try to join existing containers, and create a pause process. ctrs, err := ic.Libpod.GetRunningContainers() @@ -95,7 +92,7 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool, became, ret, err = rootless.TryJoinFromFilePaths(pausePidPath, paths) } else { became, ret, err = rootless.BecomeRootInUserNS(pausePidPath) - if err == nil { + if err == nil && !noMoveProcess { systemd.MovePauseProcessToScope(pausePidPath) } } From 118ec04065534fb12634f63f15fac95958dc8ee1 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Tue, 25 Nov 2025 13:58:41 +0100 Subject: [PATCH 3/5] preallocate paths in SetupRootless Just a minor improvement as we know the size needed for the slice we can allocate it only once instead of the append having to resize it. Signed-off-by: Paul Holzinger --- pkg/domain/infra/abi/system_linux.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/domain/infra/abi/system_linux.go b/pkg/domain/infra/abi/system_linux.go index 8829e997ae7..e78f659a69e 100644 --- a/pkg/domain/infra/abi/system_linux.go +++ b/pkg/domain/infra/abi/system_linux.go @@ -83,7 +83,7 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool, return err } - paths := []string{} + paths := make([]string, 0, len(ctrs)) for _, ctr := range ctrs { paths = append(paths, ctr.ConfigNoCopy().ConmonPidFile) } From 0647387bfed8a10b78b601b5d3805728300bdb37 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Tue, 25 Nov 2025 15:34:16 +0100 Subject: [PATCH 4/5] SetupRootless handle case where conmon pid are not valid When trying to join the conmon pid to recreate the pause process based on the namespace it can be that the pid is no longer valid, i.e. when conmon crashed or was killed. Currently we have a big issue that can be reproduced using: $ podman run -d quay.io/libpod/testimage:20241011 sleep 100 $ killall -9 conmon $ killall catatonit All commands would fail as we keep trying to rejoin the namespace of the non existing conmon process. So to address that fall back to creating a new namespace if we fail to join the conmon pids. Signed-off-by: Paul Holzinger --- pkg/domain/infra/abi/system_linux.go | 18 ++++++++++++++---- test/system/550-pause-process.bats | 27 +++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/pkg/domain/infra/abi/system_linux.go b/pkg/domain/infra/abi/system_linux.go index e78f659a69e..7c345aeed3b 100644 --- a/pkg/domain/infra/abi/system_linux.go +++ b/pkg/domain/infra/abi/system_linux.go @@ -4,6 +4,7 @@ package abi import ( "context" + "errors" "fmt" "os" @@ -14,6 +15,7 @@ import ( "go.podman.io/common/pkg/config" "go.podman.io/common/pkg/systemd" "go.podman.io/storage/pkg/unshare" + "golang.org/x/sys/unix" ) // Default path for system runtime state @@ -90,14 +92,22 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool, if len(paths) > 0 { became, ret, err = rootless.TryJoinFromFilePaths(pausePidPath, paths) + // TryJoinFromFilePaths fails with ESRCH when the PID are all not valid anymore + // In this case create a new userns. + if errors.Is(err, unix.ESRCH) { + logrus.Warnf("Failed to join existing conmon namespace, creating a new rootless podman user namespace. If there are existing container running please stop them with %q to reset the namespace", os.Args[0]+" system migrate") + became, ret, err = rootless.BecomeRootInUserNS(pausePidPath) + } } else { + logrus.Info("Creating a new rootless user namespace") became, ret, err = rootless.BecomeRootInUserNS(pausePidPath) - if err == nil && !noMoveProcess { - systemd.MovePauseProcessToScope(pausePidPath) - } } + if err != nil { - return fmt.Errorf("invalid internal status, try resetting the pause process with %q: %w", os.Args[0]+" system migrate", err) + return fmt.Errorf("fatal error, invalid internal status, unable to create a new pause process: %w. Try running %q and if that doesn't work reboot to recover", err, os.Args[0]+" system migrate") + } + if !noMoveProcess { + systemd.MovePauseProcessToScope(pausePidPath) } if became { os.Exit(ret) diff --git a/test/system/550-pause-process.bats b/test/system/550-pause-process.bats index da657e77e1c..7818f36bb9f 100644 --- a/test/system/550-pause-process.bats +++ b/test/system/550-pause-process.bats @@ -149,3 +149,30 @@ function _check_pause_process() { # This used to hang trying to unmount the netns. run_podman rm -f -t0 $cname } + +# regression test for https://issues.redhat.com/browse/RHEL-130252 +@test "podman system migrate works with conmon being killed" { + skip_if_not_rootless "pause process is only used as rootless" + skip_if_remote "system migrate not supported via remote" + + local cname=c-$(safename) + run_podman run --name $cname --stop-signal SIGKILL -d $IMAGE sleep 100 + + run_podman inspect --format '{{.State.ConmonPid}}' $cname + conmon_pid="$output" + + # check for pause pid and then kill it + _check_pause_process + kill -9 $pause_pid + + # kill conmon + kill -9 $conmon_pid + + # Use podman system migrate to stop the currently running pause process + run_podman 125 system migrate + assert "$output" =~ "Failed to join existing conmon namespace" "fallback to userns creating" + assert "$output" =~ "conmon process killed" + + # Now the removal command should work fine without errors. + run_podman rm $cname +} From 9538a7d9760050763d0b3f5882939bb1b9ec7283 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Tue, 25 Nov 2025 15:58:44 +0100 Subject: [PATCH 5/5] rootless_linux.c: use shortcut for system commands There doesn't seem any reason why the system commands should not join the userns. In particular the main commands use ParentNSRequired and UnshareNSRequired when they don't want to be joined to the main userns. Since the system command don't set these the go code does the join and re-exec anyway so might as well use the shortcut to speed that up. Signed-off-by: Paul Holzinger --- pkg/rootless/rootless_linux.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/rootless/rootless_linux.c b/pkg/rootless/rootless_linux.c index 3d74af6a6ca..644c8ef9a20 100644 --- a/pkg/rootless/rootless_linux.c +++ b/pkg/rootless/rootless_linux.c @@ -384,8 +384,7 @@ can_use_shortcut (char **argv) || strcmp (argv[argc], "version") == 0 || strcmp (argv[argc], "context") == 0 || strcmp (argv[argc], "search") == 0 - || strcmp (argv[argc], "compose") == 0 - || (strcmp (argv[argc], "system") == 0 && argv[argc+1] && strcmp (argv[argc+1], "service") != 0)) + || strcmp (argv[argc], "compose") == 0) { ret = false; break;