Skip to content

Commit 5c1ee56

Browse files
committed
Merge branch 'for-5.17-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup fixes from Tejun Heo: - Fix for a subtle bug in the recent release_agent permission check update - Fix for a long-standing race condition between cpuset and cpu hotplug - Comment updates * 'for-5.17-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cpuset: Fix kernel-doc cgroup-v1: Correct privileges check in release_agent writes cgroup: clarify cgroup_css_set_fork() cgroup/cpuset: Fix a race between cpuset_attach() and cpu hotplug
2 parents 917bbdb + c70cd03 commit 5c1ee56

File tree

3 files changed

+25
-7
lines changed

3 files changed

+25
-7
lines changed

kernel/cgroup/cgroup-v1.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -546,15 +546,17 @@ static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
546546
char *buf, size_t nbytes, loff_t off)
547547
{
548548
struct cgroup *cgrp;
549+
struct cgroup_file_ctx *ctx;
549550

550551
BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
551552

552553
/*
553554
* Release agent gets called with all capabilities,
554555
* require capabilities to set release agent.
555556
*/
556-
if ((of->file->f_cred->user_ns != &init_user_ns) ||
557-
!capable(CAP_SYS_ADMIN))
557+
ctx = of->priv;
558+
if ((ctx->ns->user_ns != &init_user_ns) ||
559+
!file_ns_capable(of->file, &init_user_ns, CAP_SYS_ADMIN))
558560
return -EPERM;
559561

560562
cgrp = cgroup_kn_lock_live(of->kn, false);

kernel/cgroup/cgroup.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6166,6 +6166,20 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs)
61666166
if (ret)
61676167
goto err;
61686168

6169+
/*
6170+
* Spawning a task directly into a cgroup works by passing a file
6171+
* descriptor to the target cgroup directory. This can even be an O_PATH
6172+
* file descriptor. But it can never be a cgroup.procs file descriptor.
6173+
* This was done on purpose so spawning into a cgroup could be
6174+
* conceptualized as an atomic
6175+
*
6176+
* fd = openat(dfd_cgroup, "cgroup.procs", ...);
6177+
* write(fd, <child-pid>, ...);
6178+
*
6179+
* sequence, i.e. it's a shorthand for the caller opening and writing
6180+
* cgroup.procs of the cgroup indicated by @dfd_cgroup. This allows us
6181+
* to always use the caller's credentials.
6182+
*/
61696183
ret = cgroup_attach_permissions(cset->dfl_cgrp, dst_cgrp, sb,
61706184
!(kargs->flags & CLONE_THREAD),
61716185
current->nsproxy->cgroup_ns);

kernel/cgroup/cpuset.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2289,6 +2289,7 @@ static void cpuset_attach(struct cgroup_taskset *tset)
22892289
cgroup_taskset_first(tset, &css);
22902290
cs = css_cs(css);
22912291

2292+
cpus_read_lock();
22922293
percpu_down_write(&cpuset_rwsem);
22932294

22942295
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
@@ -2342,6 +2343,7 @@ static void cpuset_attach(struct cgroup_taskset *tset)
23422343
wake_up(&cpuset_attach_wq);
23432344

23442345
percpu_up_write(&cpuset_rwsem);
2346+
cpus_read_unlock();
23452347
}
23462348

23472349
/* The various types of files and directories in a cpuset file system */
@@ -3522,8 +3524,8 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
35223524
return cs;
35233525
}
35243526

3525-
/**
3526-
* cpuset_node_allowed - Can we allocate on a memory node?
3527+
/*
3528+
* __cpuset_node_allowed - Can we allocate on a memory node?
35273529
* @node: is this an allowed node?
35283530
* @gfp_mask: memory allocation flags
35293531
*
@@ -3694,8 +3696,8 @@ void cpuset_print_current_mems_allowed(void)
36943696

36953697
int cpuset_memory_pressure_enabled __read_mostly;
36963698

3697-
/**
3698-
* cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims.
3699+
/*
3700+
* __cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims.
36993701
*
37003702
* Keep a running average of the rate of synchronous (direct)
37013703
* page reclaim efforts initiated by tasks in each cpuset.
@@ -3710,7 +3712,7 @@ int cpuset_memory_pressure_enabled __read_mostly;
37103712
* "memory_pressure". Value displayed is an integer
37113713
* representing the recent rate of entry into the synchronous
37123714
* (direct) page reclaim by any task attached to the cpuset.
3713-
**/
3715+
*/
37143716

37153717
void __cpuset_memory_pressure_bump(void)
37163718
{

0 commit comments

Comments
 (0)