runc delete -f: fix for no pidns + no init case

kolyshkin · kolyshkin · commit 29283bb7db78 · 2023-11-27T09:15:39.000-08:00
Commit f8ad20f moved the kill logic from container destroy to container kill (which is the right thing to do). Alas, it broke the use case of doing "runc delete -f" for a container which does not have its own private PID namespace, when its init process is gone. In this case, some processes may still be running, and runc delete -f should kill them (the same way as "runc kill" does). It does not do that because the container status is "stopped" (as runc considers the container with no init process as stopped), and so we only call "destroy" (which was doing the killing before). The fix is easy: if --force is set, call killContainer no matter what. Add a test case, similar to the one in the previous commit. Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
diff --git a/delete.go b/delete.go
@@ -66,6 +66,14 @@ status of "ubuntu01" as "stopped" the following will delete resources held for
 			}
 			return err
 		}
+		// When --force is given, we kill all container processes and
+		// then destroy the container. This is done even for a stopped
+		// container, because (in case it does not have its own PID
+		// namespace) there may be some leftover processes in the
+		// container's cgroup.
+		if force {
+			return killContainer(container)
+		}
 		s, err := container.Status()
 		if err != nil {
 			return err
@@ -76,9 +84,6 @@ status of "ubuntu01" as "stopped" the following will delete resources held for
 		case libcontainer.Created:
 			return killContainer(container)
 		default:
-			if force {
-				return killContainer(container)
-			}
 			return fmt.Errorf("cannot delete container %s that is not stopped: %s", id, s)
 		}
 
diff --git a/tests/integration/delete.bats b/tests/integration/delete.bats
@@ -62,6 +62,69 @@ function teardown() {
 	[ "$status" -eq 0 ]
 }
 
+# Issue 4047, case "runc delete -f".
+# See also: "kill KILL [host pidns + init gone]" test in kill.bats.
+@test "runc delete --force [host pidns + init gone]" {
+	requires cgroups_freezer
+
+	update_config '	  .linux.namespaces -= [{"type": "pid"}]'
+	set_cgroups_path
+	if [ $EUID -ne 0 ]; then
+		requires rootless_cgroup
+		# Apparently, for rootless test, when using systemd cgroup manager,
+		# newer versions of systemd clean up the container as soon as its init
+		# process is gone. This is all fine and dandy, except it prevents us to
+		# test this case, thus we skip the test.
+		#
+		# It is not entirely clear which systemd version got this feature:
+		# v245 works fine, and v249 does not.
+		if [ -v RUNC_USE_SYSTEMD ] && [ "$(systemd_version)" -gt 245 ]; then
+			skip "rootless+systemd conflicts with systemd > 245"
+		fi
+		# Can't mount real /proc when rootless + no pidns,
+		# so change it to a bind-mounted one from the host.
+		update_config '	  .mounts |= map((select(.type == "proc")
+					| .type = "none"
+					| .source = "/proc"
+					| .options = ["rbind", "nosuid", "nodev", "noexec"]
+				  ) // .)'
+	fi
+
+	runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
+	[ "$status" -eq 0 ]
+	cgpath=$(get_cgroup_path "pids")
+	init_pid=$(cat "$cgpath"/cgroup.procs)
+
+	# Start a few more processes.
+	for _ in 1 2 3 4 5; do
+		__runc exec -d test_busybox sleep 1h
+	done
+
+	# Now kill the container's init process. Since the container do
+	# not have own PID ns, its init is no special and the container
+	# will still be up and running.
+	kill -9 "$init_pid"
+
+	# Get the list of all container processes.
+	pids=$(cat "$cgpath"/cgroup.procs)
+	echo "pids: $pids"
+	# Sanity check -- make sure all processes exist.
+	for p in $pids; do
+		kill -0 "$p"
+	done
+
+	runc delete -f test_busybox
+	[ "$status" -eq 0 ]
+
+	runc state test_busybox
+	[ "$status" -ne 0 ] # "Container does not exist"
+
+	# Make sure all processes are gone.
+	pids=$(cat "$cgpath"/cgroup.procs) || true # OK if cgroup is gone
+	echo "pids: $pids"
+	[ -z "$pids" ]
+}
+
 @test "runc delete --force [paused container]" {
 	runc run -d --console-socket "$CONSOLE_SOCKET" ct1
 	[ "$status" -eq 0 ]
diff --git a/tests/integration/kill.bats b/tests/integration/kill.bats
@@ -109,6 +109,7 @@ test_host_pidns_kill() {
 # 3. Test runc kill on a container whose init process is gone.
 #
 # Issue 4047, case "runc kill".
+# See also: "runc delete --force [host pidns + init gone]" test in delete.bats.
 @test "kill KILL [host pidns + init gone]" {
 	# Apparently, for rootless test, when using systemd cgroup manager,
 	# newer versions of systemd clean up the container as soon as its init

Original file line number	Diff line number	Diff line change
`@@ -109,6 +109,7 @@ test_host_pidns_kill() {`
`109`	`109`	`# 3. Test runc kill on a container whose init process is gone.`
`110`	`110`	`#`
`111`	`111`	`# Issue 4047, case "runc kill".`
	`112`	`+# See also: "runc delete --force [host pidns + init gone]" test in delete.bats.`
`112`	`113`	`@test "kill KILL [host pidns + init gone]" {`
`113`	`114`	`# Apparently, for rootless test, when using systemd cgroup manager,`
`114`	`115`	`# newer versions of systemd clean up the container as soon as its init`