Skip to content

Commit dcf1b73

Browse files
kolyshkinlifubang
andcommitted
runc kill: fix sending KILL to non-pidns container
Commit f8ad20f made it impossible to kill leftover processes in a stopped container that does not have its own PID namespace. In other words, if a container init is gone, it is no longer possible to use `runc kill` to kill the leftover processes. Fix this by moving the check if container init exists to after the special case of handling the container without own PID namespace. While at it, fix the minor issue introduced by commit 9583b3d: if signalAllProcesses is used, there is no need to thaw the container (as freeze/thaw is either done in signalAllProcesses already, or not needed at all). Also, make signalAllProcesses return an error early if the container cgroup does not exist (as it relies on it to do its job). This way, the error message returned is more generic and easier to understand ("container not running" instead of "can't open file"). Finally, add a test case. Fixes: f8ad20f Fixes: 9583b3d Co-authored-by: lifubang <[email protected]> Signed-off-by: Kir Kolyshkin <[email protected]>
1 parent 542cce0 commit dcf1b73

File tree

3 files changed

+68
-22
lines changed

3 files changed

+68
-22
lines changed

libcontainer/container_linux.go

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -364,25 +364,26 @@ func (c *Container) start(process *Process) (retErr error) {
364364
func (c *Container) Signal(s os.Signal) error {
365365
c.m.Lock()
366366
defer c.m.Unlock()
367-
// To avoid a PID reuse attack, don't kill non-running container.
368-
if !c.hasInit() {
369-
return ErrNotRunning
370-
}
371367

372368
// When a container has its own PID namespace, inside it the init PID
373369
// is 1, and thus it is handled specially by the kernel. In particular,
374370
// killing init with SIGKILL from an ancestor namespace will also kill
375371
// all other processes in that PID namespace (see pid_namespaces(7)).
376372
//
377373
// OTOH, if PID namespace is shared, we should kill all pids to avoid
378-
// leftover processes.
379-
var err error
374+
// leftover processes. Handle this special case here.
380375
if s == unix.SIGKILL && !c.config.Namespaces.IsPrivate(configs.NEWPID) {
381-
err = signalAllProcesses(c.cgroupManager, unix.SIGKILL)
382-
} else {
383-
err = c.initProcess.signal(s)
376+
if err := signalAllProcesses(c.cgroupManager, unix.SIGKILL); err != nil {
377+
return fmt.Errorf("unable to kill all processes: %w", err)
378+
}
379+
return nil
384380
}
385-
if err != nil {
381+
382+
// To avoid a PID reuse attack, don't kill non-running container.
383+
if !c.hasInit() {
384+
return ErrNotRunning
385+
}
386+
if err := c.initProcess.signal(s); err != nil {
386387
return fmt.Errorf("unable to signal init: %w", err)
387388
}
388389
if s == unix.SIGKILL {

libcontainer/init_linux.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,9 @@ func setupPersonality(config *configs.Config) error {
673673
// signalAllProcesses freezes then iterates over all the processes inside the
674674
// manager's cgroups sending the signal s to them.
675675
func signalAllProcesses(m cgroups.Manager, s unix.Signal) error {
676+
if !m.Exists() {
677+
return ErrNotRunning
678+
}
676679
// Use cgroup.kill, if available.
677680
if s == unix.SIGKILL {
678681
if p := m.Path(""); p != "" { // Either cgroup v2 or hybrid.

tests/integration/kill.bats

Lines changed: 54 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ function teardown() {
1010
teardown_bundle
1111
}
1212

13+
# shellcheck disable=SC2030
1314
@test "kill detached busybox" {
1415
# run busybox detached
1516
runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
@@ -35,16 +36,15 @@ function teardown() {
3536
[ "$status" -eq 0 ]
3637
}
3738

38-
# This is roughly the same as TestPIDHostInitProcessWait in libcontainer/integration.
39-
@test "kill KILL [host pidns]" {
40-
# kill -a currently requires cgroup freezer.
39+
test_host_pidns_kill() {
4140
requires cgroups_freezer
4241

4342
update_config ' .linux.namespaces -= [{"type": "pid"}]'
4443
set_cgroups_path
4544
if [ $EUID -ne 0 ]; then
46-
# kill --all requires working cgroups.
4745
requires rootless_cgroup
46+
# Can't mount real /proc when rootless + no pidns,
47+
# so change it to a bind-mounted one from the host.
4848
update_config ' .mounts |= map((select(.type == "proc")
4949
| .type = "none"
5050
| .source = "/proc"
@@ -53,32 +53,74 @@ function teardown() {
5353
fi
5454

5555
runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
56+
# shellcheck disable=SC2031
5657
[ "$status" -eq 0 ]
58+
cgpath=$(get_cgroup_path "pids")
59+
init_pid=$(cat "$cgpath"/cgroup.procs)
5760

5861
# Start a few more processes.
5962
for _ in 1 2 3 4 5; do
6063
__runc exec -d test_busybox sleep 1h
61-
[ "$status" -eq 0 ]
6264
done
6365

66+
if [ -v KILL_INIT ]; then
67+
# Now kill the container's init process. Since the container do
68+
# not have own PID ns, its init is no special and the container
69+
# will still be up and running (except for rootless container
70+
# AND systemd cgroup driver AND systemd > v245, when systemd
71+
# kills the container; see "kill KILL [host pidns + init gone]"
72+
# below).
73+
kill -9 "$init_pid"
74+
fi
75+
6476
# Get the list of all container processes.
65-
path=$(get_cgroup_path "pids")
66-
pids=$(cat "$path"/cgroup.procs)
77+
pids=$(cat "$cgpath"/cgroup.procs)
6778
echo "pids: $pids"
6879
# Sanity check -- make sure all processes exist.
6980
for p in $pids; do
7081
kill -0 "$p"
7182
done
7283

7384
runc kill test_busybox KILL
85+
# shellcheck disable=SC2031
7486
[ "$status" -eq 0 ]
7587
wait_for_container 10 1 test_busybox stopped
7688

7789
# Make sure all processes are gone.
78-
pids=$(cat "$path"/cgroup.procs) || true # OK if cgroup is gone
90+
pids=$(cat "$cgpath"/cgroup.procs) || true # OK if cgroup is gone
7991
echo "pids: $pids"
80-
for p in $pids; do
81-
run ! kill -0 "$p"
82-
[[ "$output" = *"No such process" ]]
83-
done
92+
[ -z "$pids" ]
93+
}
94+
95+
# This is roughly the same as TestPIDHostInitProcessWait in libcontainer/integration.
96+
# The differences are:
97+
#
98+
# 1. Here we use separate processes to create and to kill a container, so the
99+
# processes inside a container are not children of "runc kill".
100+
#
101+
# 2. We hit different codepaths (nonChildProcess.signal rather than initProcess.signal).
102+
@test "kill KILL [host pidns]" {
103+
unset KILL_INIT
104+
test_host_pidns_kill
105+
}
106+
107+
# Same as above plus:
108+
#
109+
# 3. Test runc kill on a container whose init process is gone.
110+
#
111+
# Issue 4047, case "runc kill".
112+
@test "kill KILL [host pidns + init gone]" {
113+
# Apparently, for rootless test, when using systemd cgroup manager,
114+
# newer versions of systemd clean up the container as soon as its init
115+
# process is gone. This is all fine and dandy, except it prevents us to
116+
# test this case, thus we skip the test.
117+
#
118+
# It is not entirely clear which systemd version got this feature:
119+
# v245 works fine, and v249 does not.
120+
if [ $EUID -ne 0 ] && [ -v RUNC_USE_SYSTEMD ] && [ "$(systemd_version)" -gt 245 ]; then
121+
skip "rootless+systemd conflicts with systemd > 245"
122+
fi
123+
KILL_INIT=1
124+
test_host_pidns_kill
125+
unset KILL_INIT
84126
}

0 commit comments

Comments
 (0)