From c3f3dd36c4feac5c77c62cbb6a36d8677e99c7e6 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Tue, 25 Nov 2025 12:39:24 +0100 Subject: [PATCH 1/5] use return error handling in SetupRootless There is no good reason to use logrus and os.Exit() here, other parts of this function already return the error so do the same. The main podman process will exit then with the normal formatted error message. And also log an error about the last return which should never happen as we should have exited above if the re-exec worked or errored out. Signed-off-by: Paul Holzinger --- pkg/domain/infra/abi/system_linux.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pkg/domain/infra/abi/system_linux.go b/pkg/domain/infra/abi/system_linux.go index 5348a0d9722..4a9ebbd77f1 100644 --- a/pkg/domain/infra/abi/system_linux.go +++ b/pkg/domain/infra/abi/system_linux.go @@ -59,6 +59,8 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool, } } } + + // return early as we are already re-exec or root here so no need to join the rootless userns. return nil } @@ -81,8 +83,7 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool, // if there is no pid file, try to join existing containers, and create a pause process. ctrs, err := ic.Libpod.GetRunningContainers() if err != nil { - logrus.Error(err.Error()) - os.Exit(1) + return err } paths := []string{} @@ -99,11 +100,12 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool, } } if err != nil { - logrus.Error(fmt.Errorf("invalid internal status, try resetting the pause process with %q: %w", os.Args[0]+" system migrate", err)) - os.Exit(1) + return fmt.Errorf("invalid internal status, try resetting the pause process with %q: %w", os.Args[0]+" system migrate", err) } if became { os.Exit(ret) } + + logrus.Error("Internal error, failed to re-exec podman into user namespace without error. This should never happen, if you see this please report a bug") return nil } From ed9d298fb492ed416dadfbab3704737fbb11480f Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Tue, 25 Nov 2025 12:47:19 +0100 Subject: [PATCH 2/5] fix noMoveProcess in SetupRootless Based on the description in commit 63ef557 this was added so that the migrate command does not move the pause process into a separate cgroup. It should however not disable the rejoining of the userns when the pause process join failed. BEcause of this we end up calling migrate without a userns and that then can fail if there are actual contianer it tries to cleanup. Fixes: 63ef5576ed ("command: migrate doesn't move process to cgroup") Signed-off-by: Paul Holzinger --- pkg/domain/infra/abi/system_linux.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pkg/domain/infra/abi/system_linux.go b/pkg/domain/infra/abi/system_linux.go index 4a9ebbd77f1..1cfbef3ab26 100644 --- a/pkg/domain/infra/abi/system_linux.go +++ b/pkg/domain/infra/abi/system_linux.go @@ -76,9 +76,6 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool, if became { os.Exit(ret) } - if noMoveProcess { - return nil - } // if there is no pid file, try to join existing containers, and create a pause process. ctrs, err := ic.Libpod.GetRunningContainers() @@ -95,7 +92,7 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool, became, ret, err = rootless.TryJoinFromFilePaths(pausePidPath, paths) } else { became, ret, err = rootless.BecomeRootInUserNS(pausePidPath) - if err == nil { + if err == nil && !noMoveProcess { systemd.MovePauseProcessToScope(pausePidPath) } } From 4833357c724f76e73af4d1eb9fe6e01a7d235751 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Tue, 25 Nov 2025 13:58:41 +0100 Subject: [PATCH 3/5] preallocate paths in SetupRootless Just a minor improvement as we know the size needed for the slice we can allocate it only once instead of the append having to resize it. Signed-off-by: Paul Holzinger --- pkg/domain/infra/abi/system_linux.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/domain/infra/abi/system_linux.go b/pkg/domain/infra/abi/system_linux.go index 1cfbef3ab26..da16961e693 100644 --- a/pkg/domain/infra/abi/system_linux.go +++ b/pkg/domain/infra/abi/system_linux.go @@ -83,7 +83,7 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool, return err } - paths := []string{} + paths := make([]string, 0, len(ctrs)) for _, ctr := range ctrs { paths = append(paths, ctr.ConfigNoCopy().ConmonPidFile) } From aaadb4726d99800cfde3af892377a65683f96321 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Tue, 25 Nov 2025 15:34:16 +0100 Subject: [PATCH 4/5] SetupRootless handle case where conmon pid are not valid When trying to join the conmon pid to recreate the pause process based on the namespace it can be that the pid is no longer valid, i.e. when conmon crashed or was killed. Currently we have a big issue that can be reproduced using: $ podman run -d quay.io/libpod/testimage:20241011 sleep 100 $ killall -9 conmon $ killall catatonit All commands would fail as we keep trying to rejoin the namespace of the non existing conmon process. So to address that fall back to creating a new namespace if we fail to join the conmon pids. Signed-off-by: Paul Holzinger --- pkg/domain/infra/abi/system_linux.go | 18 ++++++++++++++---- test/system/550-pause-process.bats | 27 +++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/pkg/domain/infra/abi/system_linux.go b/pkg/domain/infra/abi/system_linux.go index da16961e693..6f1f6ff2299 100644 --- a/pkg/domain/infra/abi/system_linux.go +++ b/pkg/domain/infra/abi/system_linux.go @@ -4,6 +4,7 @@ package abi import ( "context" + "errors" "fmt" "os" @@ -14,6 +15,7 @@ import ( "go.podman.io/common/pkg/config" "go.podman.io/common/pkg/systemd" "go.podman.io/storage/pkg/unshare" + "golang.org/x/sys/unix" ) // Default path for system runtime state @@ -90,14 +92,22 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool, if len(paths) > 0 { became, ret, err = rootless.TryJoinFromFilePaths(pausePidPath, paths) + // TryJoinFromFilePaths fails with ESRCH when the PID are all not valid anymore + // In this case create a new userns. + if errors.Is(err, unix.ESRCH) { + logrus.Warnf("Failed to join existing conmon namespace, creating a new rootless podman user namespace. If there are existing container running please stop them with %q to reset the namespace", os.Args[0]+" system migrate") + became, ret, err = rootless.BecomeRootInUserNS(pausePidPath) + } } else { + logrus.Info("Creating a new rootless user namespace") became, ret, err = rootless.BecomeRootInUserNS(pausePidPath) - if err == nil && !noMoveProcess { - systemd.MovePauseProcessToScope(pausePidPath) - } } + if err != nil { - return fmt.Errorf("invalid internal status, try resetting the pause process with %q: %w", os.Args[0]+" system migrate", err) + return fmt.Errorf("fatal error, invalid internal status, unable to create a new pause process: %w. Try running %q and if that doesn't work reboot to recover", err, os.Args[0]+" system migrate") + } + if !noMoveProcess { + systemd.MovePauseProcessToScope(pausePidPath) } if became { os.Exit(ret) diff --git a/test/system/550-pause-process.bats b/test/system/550-pause-process.bats index da657e77e1c..7818f36bb9f 100644 --- a/test/system/550-pause-process.bats +++ b/test/system/550-pause-process.bats @@ -149,3 +149,30 @@ function _check_pause_process() { # This used to hang trying to unmount the netns. run_podman rm -f -t0 $cname } + +# regression test for https://issues.redhat.com/browse/RHEL-130252 +@test "podman system migrate works with conmon being killed" { + skip_if_not_rootless "pause process is only used as rootless" + skip_if_remote "system migrate not supported via remote" + + local cname=c-$(safename) + run_podman run --name $cname --stop-signal SIGKILL -d $IMAGE sleep 100 + + run_podman inspect --format '{{.State.ConmonPid}}' $cname + conmon_pid="$output" + + # check for pause pid and then kill it + _check_pause_process + kill -9 $pause_pid + + # kill conmon + kill -9 $conmon_pid + + # Use podman system migrate to stop the currently running pause process + run_podman 125 system migrate + assert "$output" =~ "Failed to join existing conmon namespace" "fallback to userns creating" + assert "$output" =~ "conmon process killed" + + # Now the removal command should work fine without errors. + run_podman rm $cname +} From b9a1f87b9ea80edf932ac456b575489525027aaa Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Tue, 25 Nov 2025 15:58:44 +0100 Subject: [PATCH 5/5] rootless_linux.c: use shortcut for system commands There doesn't seem any reason why the system commands should not join the userns. In particular the main commands use ParentNSRequired and UnshareNSRequired when they don't want to be joined to the main userns. Since the system command don't set these the go code does the join and re-exec anyway so might as well use the shortcut to speed that up. Signed-off-by: Paul Holzinger --- pkg/rootless/rootless_linux.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/rootless/rootless_linux.c b/pkg/rootless/rootless_linux.c index 3d74af6a6ca..644c8ef9a20 100644 --- a/pkg/rootless/rootless_linux.c +++ b/pkg/rootless/rootless_linux.c @@ -384,8 +384,7 @@ can_use_shortcut (char **argv) || strcmp (argv[argc], "version") == 0 || strcmp (argv[argc], "context") == 0 || strcmp (argv[argc], "search") == 0 - || strcmp (argv[argc], "compose") == 0 - || (strcmp (argv[argc], "system") == 0 && argv[argc+1] && strcmp (argv[argc+1], "service") != 0)) + || strcmp (argv[argc], "compose") == 0) { ret = false; break;