Skip to content

Commit 29283bb

Browse files
committed
runc delete -f: fix for no pidns + no init case
Commit f8ad20f moved the kill logic from container destroy to container kill (which is the right thing to do). Alas, it broke the use case of doing "runc delete -f" for a container which does not have its own private PID namespace, when its init process is gone. In this case, some processes may still be running, and runc delete -f should kill them (the same way as "runc kill" does). It does not do that because the container status is "stopped" (as runc considers the container with no init process as stopped), and so we only call "destroy" (which was doing the killing before). The fix is easy: if --force is set, call killContainer no matter what. Add a test case, similar to the one in the previous commit. Signed-off-by: Kir Kolyshkin <[email protected]>
1 parent dcf1b73 commit 29283bb

File tree

3 files changed

+72
-3
lines changed

3 files changed

+72
-3
lines changed

delete.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,14 @@ status of "ubuntu01" as "stopped" the following will delete resources held for
6666
}
6767
return err
6868
}
69+
// When --force is given, we kill all container processes and
70+
// then destroy the container. This is done even for a stopped
71+
// container, because (in case it does not have its own PID
72+
// namespace) there may be some leftover processes in the
73+
// container's cgroup.
74+
if force {
75+
return killContainer(container)
76+
}
6977
s, err := container.Status()
7078
if err != nil {
7179
return err
@@ -76,9 +84,6 @@ status of "ubuntu01" as "stopped" the following will delete resources held for
7684
case libcontainer.Created:
7785
return killContainer(container)
7886
default:
79-
if force {
80-
return killContainer(container)
81-
}
8287
return fmt.Errorf("cannot delete container %s that is not stopped: %s", id, s)
8388
}
8489

tests/integration/delete.bats

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,69 @@ function teardown() {
6262
[ "$status" -eq 0 ]
6363
}
6464

65+
# Issue 4047, case "runc delete -f".
66+
# See also: "kill KILL [host pidns + init gone]" test in kill.bats.
67+
@test "runc delete --force [host pidns + init gone]" {
68+
requires cgroups_freezer
69+
70+
update_config ' .linux.namespaces -= [{"type": "pid"}]'
71+
set_cgroups_path
72+
if [ $EUID -ne 0 ]; then
73+
requires rootless_cgroup
74+
# Apparently, for rootless test, when using systemd cgroup manager,
75+
# newer versions of systemd clean up the container as soon as its init
76+
# process is gone. This is all fine and dandy, except it prevents us to
77+
# test this case, thus we skip the test.
78+
#
79+
# It is not entirely clear which systemd version got this feature:
80+
# v245 works fine, and v249 does not.
81+
if [ -v RUNC_USE_SYSTEMD ] && [ "$(systemd_version)" -gt 245 ]; then
82+
skip "rootless+systemd conflicts with systemd > 245"
83+
fi
84+
# Can't mount real /proc when rootless + no pidns,
85+
# so change it to a bind-mounted one from the host.
86+
update_config ' .mounts |= map((select(.type == "proc")
87+
| .type = "none"
88+
| .source = "/proc"
89+
| .options = ["rbind", "nosuid", "nodev", "noexec"]
90+
) // .)'
91+
fi
92+
93+
runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
94+
[ "$status" -eq 0 ]
95+
cgpath=$(get_cgroup_path "pids")
96+
init_pid=$(cat "$cgpath"/cgroup.procs)
97+
98+
# Start a few more processes.
99+
for _ in 1 2 3 4 5; do
100+
__runc exec -d test_busybox sleep 1h
101+
done
102+
103+
# Now kill the container's init process. Since the container do
104+
# not have own PID ns, its init is no special and the container
105+
# will still be up and running.
106+
kill -9 "$init_pid"
107+
108+
# Get the list of all container processes.
109+
pids=$(cat "$cgpath"/cgroup.procs)
110+
echo "pids: $pids"
111+
# Sanity check -- make sure all processes exist.
112+
for p in $pids; do
113+
kill -0 "$p"
114+
done
115+
116+
runc delete -f test_busybox
117+
[ "$status" -eq 0 ]
118+
119+
runc state test_busybox
120+
[ "$status" -ne 0 ] # "Container does not exist"
121+
122+
# Make sure all processes are gone.
123+
pids=$(cat "$cgpath"/cgroup.procs) || true # OK if cgroup is gone
124+
echo "pids: $pids"
125+
[ -z "$pids" ]
126+
}
127+
65128
@test "runc delete --force [paused container]" {
66129
runc run -d --console-socket "$CONSOLE_SOCKET" ct1
67130
[ "$status" -eq 0 ]

tests/integration/kill.bats

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ test_host_pidns_kill() {
109109
# 3. Test runc kill on a container whose init process is gone.
110110
#
111111
# Issue 4047, case "runc kill".
112+
# See also: "runc delete --force [host pidns + init gone]" test in delete.bats.
112113
@test "kill KILL [host pidns + init gone]" {
113114
# Apparently, for rootless test, when using systemd cgroup manager,
114115
# newer versions of systemd clean up the container as soon as its init

0 commit comments

Comments
 (0)