Skip to content

Commit b69bb47

Browse files
shakeelbhtejun
authored andcommitted
cgroup: fix race between fork and cgroup.kill
Tejun reported the following race between fork() and cgroup.kill at [1]. Tejun: I was looking at cgroup.kill implementation and wondering whether there could be a race window. So, __cgroup_kill() does the following: k1. Set CGRP_KILL. k2. Iterate tasks and deliver SIGKILL. k3. Clear CGRP_KILL. The copy_process() does the following: c1. Copy a bunch of stuff. c2. Grab siglock. c3. Check fatal_signal_pending(). c4. Commit to forking. c5. Release siglock. c6. Call cgroup_post_fork() which puts the task on the css_set and tests CGRP_KILL. The intention seems to be that either a forking task gets SIGKILL and terminates on c3 or it sees CGRP_KILL on c6 and kills the child. However, I don't see what guarantees that k3 can't happen before c6. ie. After a forking task passes c5, k2 can take place and then before the forking task reaches c6, k3 can happen. Then, nobody would send SIGKILL to the child. What am I missing? This is indeed a race. One way to fix this race is by taking cgroup_threadgroup_rwsem in write mode in __cgroup_kill() as the fork() side takes cgroup_threadgroup_rwsem in read mode from cgroup_can_fork() to cgroup_post_fork(). However that would be heavy handed as this adds one more potential stall scenario for cgroup.kill which is usually called under extreme situation like memory pressure. To fix this race, let's maintain a sequence number per cgroup which gets incremented on __cgroup_kill() call. On the fork() side, the cgroup_can_fork() will cache the sequence number locally and recheck it against the cgroup's sequence number at cgroup_post_fork() site. If the sequence numbers mismatch, it means __cgroup_kill() can been called and we should send SIGKILL to the newly created task. Reported-by: Tejun Heo <[email protected]> Closes: https://lore.kernel.org/all/[email protected]/ [1] Fixes: 661ee62 ("cgroup: introduce cgroup.kill") Cc: [email protected] # v5.14+ Signed-off-by: Shakeel Butt <[email protected]> Reviewed-by: Michal Koutný <[email protected]> Signed-off-by: Tejun Heo <[email protected]>
1 parent a86bf22 commit b69bb47

File tree

3 files changed

+16
-11
lines changed

3 files changed

+16
-11
lines changed

include/linux/cgroup-defs.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,6 @@ enum {
7171

7272
/* Cgroup is frozen. */
7373
CGRP_FROZEN,
74-
75-
/* Control group has to be killed. */
76-
CGRP_KILL,
7774
};
7875

7976
/* cgroup_root->flags */
@@ -461,6 +458,9 @@ struct cgroup {
461458

462459
int nr_threaded_children; /* # of live threaded child cgroups */
463460

461+
/* sequence number for cgroup.kill, serialized by css_set_lock. */
462+
unsigned int kill_seq;
463+
464464
struct kernfs_node *kn; /* cgroup kernfs entry */
465465
struct cgroup_file procs_file; /* handle for "cgroup.procs" */
466466
struct cgroup_file events_file; /* handle for "cgroup.events" */

include/linux/sched/task.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ struct kernel_clone_args {
4343
void *fn_arg;
4444
struct cgroup *cgrp;
4545
struct css_set *cset;
46+
unsigned int kill_seq;
4647
};
4748

4849
/*

kernel/cgroup/cgroup.c

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4013,7 +4013,7 @@ static void __cgroup_kill(struct cgroup *cgrp)
40134013
lockdep_assert_held(&cgroup_mutex);
40144014

40154015
spin_lock_irq(&css_set_lock);
4016-
set_bit(CGRP_KILL, &cgrp->flags);
4016+
cgrp->kill_seq++;
40174017
spin_unlock_irq(&css_set_lock);
40184018

40194019
css_task_iter_start(&cgrp->self, CSS_TASK_ITER_PROCS | CSS_TASK_ITER_THREADED, &it);
@@ -4029,10 +4029,6 @@ static void __cgroup_kill(struct cgroup *cgrp)
40294029
send_sig(SIGKILL, task, 0);
40304030
}
40314031
css_task_iter_end(&it);
4032-
4033-
spin_lock_irq(&css_set_lock);
4034-
clear_bit(CGRP_KILL, &cgrp->flags);
4035-
spin_unlock_irq(&css_set_lock);
40364032
}
40374033

40384034
static void cgroup_kill(struct cgroup *cgrp)
@@ -6488,6 +6484,10 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs)
64886484
spin_lock_irq(&css_set_lock);
64896485
cset = task_css_set(current);
64906486
get_css_set(cset);
6487+
if (kargs->cgrp)
6488+
kargs->kill_seq = kargs->cgrp->kill_seq;
6489+
else
6490+
kargs->kill_seq = cset->dfl_cgrp->kill_seq;
64916491
spin_unlock_irq(&css_set_lock);
64926492

64936493
if (!(kargs->flags & CLONE_INTO_CGROUP)) {
@@ -6668,6 +6668,7 @@ void cgroup_post_fork(struct task_struct *child,
66686668
struct kernel_clone_args *kargs)
66696669
__releases(&cgroup_threadgroup_rwsem) __releases(&cgroup_mutex)
66706670
{
6671+
unsigned int cgrp_kill_seq = 0;
66716672
unsigned long cgrp_flags = 0;
66726673
bool kill = false;
66736674
struct cgroup_subsys *ss;
@@ -6681,10 +6682,13 @@ void cgroup_post_fork(struct task_struct *child,
66816682

66826683
/* init tasks are special, only link regular threads */
66836684
if (likely(child->pid)) {
6684-
if (kargs->cgrp)
6685+
if (kargs->cgrp) {
66856686
cgrp_flags = kargs->cgrp->flags;
6686-
else
6687+
cgrp_kill_seq = kargs->cgrp->kill_seq;
6688+
} else {
66876689
cgrp_flags = cset->dfl_cgrp->flags;
6690+
cgrp_kill_seq = cset->dfl_cgrp->kill_seq;
6691+
}
66886692

66896693
WARN_ON_ONCE(!list_empty(&child->cg_list));
66906694
cset->nr_tasks++;
@@ -6719,7 +6723,7 @@ void cgroup_post_fork(struct task_struct *child,
67196723
* child down right after we finished preparing it for
67206724
* userspace.
67216725
*/
6722-
kill = test_bit(CGRP_KILL, &cgrp_flags);
6726+
kill = kargs->kill_seq != cgrp_kill_seq;
67236727
}
67246728

67256729
spin_unlock_irq(&css_set_lock);

0 commit comments

Comments
 (0)