Skip to content

Commit 4ca9158

Browse files
Merge pull request #27604 from Luap99/migrate
podman system migrate fixes when pause process and conmon got killed
2 parents 90a03ca + b9a1f87 commit 4ca9158

File tree

3 files changed

+48
-13
lines changed

3 files changed

+48
-13
lines changed

pkg/domain/infra/abi/system_linux.go

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ package abi
44

55
import (
66
"context"
7+
"errors"
78
"fmt"
89
"os"
910

@@ -14,6 +15,7 @@ import (
1415
"go.podman.io/common/pkg/config"
1516
"go.podman.io/common/pkg/systemd"
1617
"go.podman.io/storage/pkg/unshare"
18+
"golang.org/x/sys/unix"
1719
)
1820

1921
// Default path for system runtime state
@@ -59,6 +61,8 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool,
5961
}
6062
}
6163
}
64+
65+
// return early as we are already re-exec or root here so no need to join the rootless userns.
6266
return nil
6367
}
6468

@@ -74,36 +78,41 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool,
7478
if became {
7579
os.Exit(ret)
7680
}
77-
if noMoveProcess {
78-
return nil
79-
}
8081

8182
// if there is no pid file, try to join existing containers, and create a pause process.
8283
ctrs, err := ic.Libpod.GetRunningContainers()
8384
if err != nil {
84-
logrus.Error(err.Error())
85-
os.Exit(1)
85+
return err
8686
}
8787

88-
paths := []string{}
88+
paths := make([]string, 0, len(ctrs))
8989
for _, ctr := range ctrs {
9090
paths = append(paths, ctr.ConfigNoCopy().ConmonPidFile)
9191
}
9292

9393
if len(paths) > 0 {
9494
became, ret, err = rootless.TryJoinFromFilePaths(pausePidPath, paths)
95+
// TryJoinFromFilePaths fails with ESRCH when the PID are all not valid anymore
96+
// In this case create a new userns.
97+
if errors.Is(err, unix.ESRCH) {
98+
logrus.Warnf("Failed to join existing conmon namespace, creating a new rootless podman user namespace. If there are existing container running please stop them with %q to reset the namespace", os.Args[0]+" system migrate")
99+
became, ret, err = rootless.BecomeRootInUserNS(pausePidPath)
100+
}
95101
} else {
102+
logrus.Info("Creating a new rootless user namespace")
96103
became, ret, err = rootless.BecomeRootInUserNS(pausePidPath)
97-
if err == nil {
98-
systemd.MovePauseProcessToScope(pausePidPath)
99-
}
100104
}
105+
101106
if err != nil {
102-
logrus.Error(fmt.Errorf("invalid internal status, try resetting the pause process with %q: %w", os.Args[0]+" system migrate", err))
103-
os.Exit(1)
107+
return fmt.Errorf("fatal error, invalid internal status, unable to create a new pause process: %w. Try running %q and if that doesn't work reboot to recover", err, os.Args[0]+" system migrate")
108+
}
109+
if !noMoveProcess {
110+
systemd.MovePauseProcessToScope(pausePidPath)
104111
}
105112
if became {
106113
os.Exit(ret)
107114
}
115+
116+
logrus.Error("Internal error, failed to re-exec podman into user namespace without error. This should never happen, if you see this please report a bug")
108117
return nil
109118
}

pkg/rootless/rootless_linux.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -384,8 +384,7 @@ can_use_shortcut (char **argv)
384384
|| strcmp (argv[argc], "version") == 0
385385
|| strcmp (argv[argc], "context") == 0
386386
|| strcmp (argv[argc], "search") == 0
387-
|| strcmp (argv[argc], "compose") == 0
388-
|| (strcmp (argv[argc], "system") == 0 && argv[argc+1] && strcmp (argv[argc+1], "service") != 0))
387+
|| strcmp (argv[argc], "compose") == 0)
389388
{
390389
ret = false;
391390
break;

test/system/550-pause-process.bats

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,3 +149,30 @@ function _check_pause_process() {
149149
# This used to hang trying to unmount the netns.
150150
run_podman rm -f -t0 $cname
151151
}
152+
153+
# regression test for https://issues.redhat.com/browse/RHEL-130252
154+
@test "podman system migrate works with conmon being killed" {
155+
skip_if_not_rootless "pause process is only used as rootless"
156+
skip_if_remote "system migrate not supported via remote"
157+
158+
local cname=c-$(safename)
159+
run_podman run --name $cname --stop-signal SIGKILL -d $IMAGE sleep 100
160+
161+
run_podman inspect --format '{{.State.ConmonPid}}' $cname
162+
conmon_pid="$output"
163+
164+
# check for pause pid and then kill it
165+
_check_pause_process
166+
kill -9 $pause_pid
167+
168+
# kill conmon
169+
kill -9 $conmon_pid
170+
171+
# Use podman system migrate to stop the currently running pause process
172+
run_podman 125 system migrate
173+
assert "$output" =~ "Failed to join existing conmon namespace" "fallback to userns creating"
174+
assert "$output" =~ "conmon process killed"
175+
176+
# Now the removal command should work fine without errors.
177+
run_podman rm $cname
178+
}

0 commit comments

Comments
 (0)