Skip to content

Commit d1587f7

Browse files
committed
Merge branch 'for-5.16-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup fixes from Tejun Heo: "This contains the cgroup.procs permission check fixes so that they use the credentials at the time of open rather than write, which also fixes the cgroup namespace lifetime bug" * 'for-5.16-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: selftests: cgroup: Test open-time cgroup namespace usage for migration checks selftests: cgroup: Test open-time credential usage for migration checks selftests: cgroup: Make cg_create() use 0755 for permission instead of 0644 cgroup: Use open-time cgroup namespace for process migration perm checks cgroup: Allocate cgroup_file_ctx for kernfs_open_file->priv cgroup: Use open-time credentials for process migraton perm checks
2 parents 35632d9 + bf35a78 commit d1587f7

File tree

5 files changed

+263
-44
lines changed

5 files changed

+263
-44
lines changed

kernel/cgroup/cgroup-internal.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,25 @@ static inline struct cgroup_fs_context *cgroup_fc2context(struct fs_context *fc)
6565
return container_of(kfc, struct cgroup_fs_context, kfc);
6666
}
6767

68+
struct cgroup_pidlist;
69+
70+
struct cgroup_file_ctx {
71+
struct cgroup_namespace *ns;
72+
73+
struct {
74+
void *trigger;
75+
} psi;
76+
77+
struct {
78+
bool started;
79+
struct css_task_iter iter;
80+
} procs;
81+
82+
struct {
83+
struct cgroup_pidlist *pidlist;
84+
} procs1;
85+
};
86+
6887
/*
6988
* A cgroup can be associated with multiple css_sets as different tasks may
7089
* belong to different cgroups on different hierarchies. In the other

kernel/cgroup/cgroup-v1.c

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
394394
* next pid to display, if any
395395
*/
396396
struct kernfs_open_file *of = s->private;
397+
struct cgroup_file_ctx *ctx = of->priv;
397398
struct cgroup *cgrp = seq_css(s)->cgroup;
398399
struct cgroup_pidlist *l;
399400
enum cgroup_filetype type = seq_cft(s)->private;
@@ -403,25 +404,24 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
403404
mutex_lock(&cgrp->pidlist_mutex);
404405

405406
/*
406-
* !NULL @of->priv indicates that this isn't the first start()
407-
* after open. If the matching pidlist is around, we can use that.
408-
* Look for it. Note that @of->priv can't be used directly. It
409-
* could already have been destroyed.
407+
* !NULL @ctx->procs1.pidlist indicates that this isn't the first
408+
* start() after open. If the matching pidlist is around, we can use
409+
* that. Look for it. Note that @ctx->procs1.pidlist can't be used
410+
* directly. It could already have been destroyed.
410411
*/
411-
if (of->priv)
412-
of->priv = cgroup_pidlist_find(cgrp, type);
412+
if (ctx->procs1.pidlist)
413+
ctx->procs1.pidlist = cgroup_pidlist_find(cgrp, type);
413414

414415
/*
415416
* Either this is the first start() after open or the matching
416417
* pidlist has been destroyed inbetween. Create a new one.
417418
*/
418-
if (!of->priv) {
419-
ret = pidlist_array_load(cgrp, type,
420-
(struct cgroup_pidlist **)&of->priv);
419+
if (!ctx->procs1.pidlist) {
420+
ret = pidlist_array_load(cgrp, type, &ctx->procs1.pidlist);
421421
if (ret)
422422
return ERR_PTR(ret);
423423
}
424-
l = of->priv;
424+
l = ctx->procs1.pidlist;
425425

426426
if (pid) {
427427
int end = l->length;
@@ -449,7 +449,8 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
449449
static void cgroup_pidlist_stop(struct seq_file *s, void *v)
450450
{
451451
struct kernfs_open_file *of = s->private;
452-
struct cgroup_pidlist *l = of->priv;
452+
struct cgroup_file_ctx *ctx = of->priv;
453+
struct cgroup_pidlist *l = ctx->procs1.pidlist;
453454

454455
if (l)
455456
mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork,
@@ -460,7 +461,8 @@ static void cgroup_pidlist_stop(struct seq_file *s, void *v)
460461
static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
461462
{
462463
struct kernfs_open_file *of = s->private;
463-
struct cgroup_pidlist *l = of->priv;
464+
struct cgroup_file_ctx *ctx = of->priv;
465+
struct cgroup_pidlist *l = ctx->procs1.pidlist;
464466
pid_t *p = v;
465467
pid_t *end = l->list + l->length;
466468
/*
@@ -504,10 +506,11 @@ static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of,
504506
goto out_unlock;
505507

506508
/*
507-
* Even if we're attaching all tasks in the thread group, we only
508-
* need to check permissions on one of them.
509+
* Even if we're attaching all tasks in the thread group, we only need
510+
* to check permissions on one of them. Check permissions using the
511+
* credentials from file open to protect against inherited fd attacks.
509512
*/
510-
cred = current_cred();
513+
cred = of->file->f_cred;
511514
tcred = get_task_cred(task);
512515
if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
513516
!uid_eq(cred->euid, tcred->uid) &&

kernel/cgroup/cgroup.c

Lines changed: 60 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3630,6 +3630,7 @@ static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
36303630
static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
36313631
size_t nbytes, enum psi_res res)
36323632
{
3633+
struct cgroup_file_ctx *ctx = of->priv;
36333634
struct psi_trigger *new;
36343635
struct cgroup *cgrp;
36353636
struct psi_group *psi;
@@ -3648,7 +3649,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
36483649
return PTR_ERR(new);
36493650
}
36503651

3651-
psi_trigger_replace(&of->priv, new);
3652+
psi_trigger_replace(&ctx->psi.trigger, new);
36523653

36533654
cgroup_put(cgrp);
36543655

@@ -3679,12 +3680,16 @@ static ssize_t cgroup_cpu_pressure_write(struct kernfs_open_file *of,
36793680
static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of,
36803681
poll_table *pt)
36813682
{
3682-
return psi_trigger_poll(&of->priv, of->file, pt);
3683+
struct cgroup_file_ctx *ctx = of->priv;
3684+
3685+
return psi_trigger_poll(&ctx->psi.trigger, of->file, pt);
36833686
}
36843687

36853688
static void cgroup_pressure_release(struct kernfs_open_file *of)
36863689
{
3687-
psi_trigger_replace(&of->priv, NULL);
3690+
struct cgroup_file_ctx *ctx = of->priv;
3691+
3692+
psi_trigger_replace(&ctx->psi.trigger, NULL);
36883693
}
36893694

36903695
bool cgroup_psi_enabled(void)
@@ -3811,24 +3816,43 @@ static ssize_t cgroup_kill_write(struct kernfs_open_file *of, char *buf,
38113816
static int cgroup_file_open(struct kernfs_open_file *of)
38123817
{
38133818
struct cftype *cft = of_cft(of);
3819+
struct cgroup_file_ctx *ctx;
3820+
int ret;
38143821

3815-
if (cft->open)
3816-
return cft->open(of);
3817-
return 0;
3822+
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
3823+
if (!ctx)
3824+
return -ENOMEM;
3825+
3826+
ctx->ns = current->nsproxy->cgroup_ns;
3827+
get_cgroup_ns(ctx->ns);
3828+
of->priv = ctx;
3829+
3830+
if (!cft->open)
3831+
return 0;
3832+
3833+
ret = cft->open(of);
3834+
if (ret) {
3835+
put_cgroup_ns(ctx->ns);
3836+
kfree(ctx);
3837+
}
3838+
return ret;
38183839
}
38193840

38203841
static void cgroup_file_release(struct kernfs_open_file *of)
38213842
{
38223843
struct cftype *cft = of_cft(of);
3844+
struct cgroup_file_ctx *ctx = of->priv;
38233845

38243846
if (cft->release)
38253847
cft->release(of);
3848+
put_cgroup_ns(ctx->ns);
3849+
kfree(ctx);
38263850
}
38273851

38283852
static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
38293853
size_t nbytes, loff_t off)
38303854
{
3831-
struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
3855+
struct cgroup_file_ctx *ctx = of->priv;
38323856
struct cgroup *cgrp = of->kn->parent->priv;
38333857
struct cftype *cft = of_cft(of);
38343858
struct cgroup_subsys_state *css;
@@ -3845,7 +3869,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
38453869
*/
38463870
if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) &&
38473871
!(cft->flags & CFTYPE_NS_DELEGATABLE) &&
3848-
ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp)
3872+
ctx->ns != &init_cgroup_ns && ctx->ns->root_cset->dfl_cgrp == cgrp)
38493873
return -EPERM;
38503874

38513875
if (cft->write)
@@ -4751,43 +4775,40 @@ void css_task_iter_end(struct css_task_iter *it)
47514775

47524776
static void cgroup_procs_release(struct kernfs_open_file *of)
47534777
{
4754-
if (of->priv) {
4755-
css_task_iter_end(of->priv);
4756-
kfree(of->priv);
4757-
}
4778+
struct cgroup_file_ctx *ctx = of->priv;
4779+
4780+
if (ctx->procs.started)
4781+
css_task_iter_end(&ctx->procs.iter);
47584782
}
47594783

47604784
static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)
47614785
{
47624786
struct kernfs_open_file *of = s->private;
4763-
struct css_task_iter *it = of->priv;
4787+
struct cgroup_file_ctx *ctx = of->priv;
47644788

47654789
if (pos)
47664790
(*pos)++;
47674791

4768-
return css_task_iter_next(it);
4792+
return css_task_iter_next(&ctx->procs.iter);
47694793
}
47704794

47714795
static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
47724796
unsigned int iter_flags)
47734797
{
47744798
struct kernfs_open_file *of = s->private;
47754799
struct cgroup *cgrp = seq_css(s)->cgroup;
4776-
struct css_task_iter *it = of->priv;
4800+
struct cgroup_file_ctx *ctx = of->priv;
4801+
struct css_task_iter *it = &ctx->procs.iter;
47774802

47784803
/*
47794804
* When a seq_file is seeked, it's always traversed sequentially
47804805
* from position 0, so we can simply keep iterating on !0 *pos.
47814806
*/
4782-
if (!it) {
4807+
if (!ctx->procs.started) {
47834808
if (WARN_ON_ONCE((*pos)))
47844809
return ERR_PTR(-EINVAL);
4785-
4786-
it = kzalloc(sizeof(*it), GFP_KERNEL);
4787-
if (!it)
4788-
return ERR_PTR(-ENOMEM);
4789-
of->priv = it;
47904810
css_task_iter_start(&cgrp->self, iter_flags, it);
4811+
ctx->procs.started = true;
47914812
} else if (!(*pos)) {
47924813
css_task_iter_end(it);
47934814
css_task_iter_start(&cgrp->self, iter_flags, it);
@@ -4838,9 +4859,9 @@ static int cgroup_may_write(const struct cgroup *cgrp, struct super_block *sb)
48384859

48394860
static int cgroup_procs_write_permission(struct cgroup *src_cgrp,
48404861
struct cgroup *dst_cgrp,
4841-
struct super_block *sb)
4862+
struct super_block *sb,
4863+
struct cgroup_namespace *ns)
48424864
{
4843-
struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
48444865
struct cgroup *com_cgrp = src_cgrp;
48454866
int ret;
48464867

@@ -4869,11 +4890,12 @@ static int cgroup_procs_write_permission(struct cgroup *src_cgrp,
48694890

48704891
static int cgroup_attach_permissions(struct cgroup *src_cgrp,
48714892
struct cgroup *dst_cgrp,
4872-
struct super_block *sb, bool threadgroup)
4893+
struct super_block *sb, bool threadgroup,
4894+
struct cgroup_namespace *ns)
48734895
{
48744896
int ret = 0;
48754897

4876-
ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb);
4898+
ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb, ns);
48774899
if (ret)
48784900
return ret;
48794901

@@ -4890,8 +4912,10 @@ static int cgroup_attach_permissions(struct cgroup *src_cgrp,
48904912
static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
48914913
bool threadgroup)
48924914
{
4915+
struct cgroup_file_ctx *ctx = of->priv;
48934916
struct cgroup *src_cgrp, *dst_cgrp;
48944917
struct task_struct *task;
4918+
const struct cred *saved_cred;
48954919
ssize_t ret;
48964920
bool locked;
48974921

@@ -4909,9 +4933,16 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
49094933
src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
49104934
spin_unlock_irq(&css_set_lock);
49114935

4912-
/* process and thread migrations follow same delegation rule */
4936+
/*
4937+
* Process and thread migrations follow same delegation rule. Check
4938+
* permissions using the credentials from file open to protect against
4939+
* inherited fd attacks.
4940+
*/
4941+
saved_cred = override_creds(of->file->f_cred);
49134942
ret = cgroup_attach_permissions(src_cgrp, dst_cgrp,
4914-
of->file->f_path.dentry->d_sb, threadgroup);
4943+
of->file->f_path.dentry->d_sb,
4944+
threadgroup, ctx->ns);
4945+
revert_creds(saved_cred);
49154946
if (ret)
49164947
goto out_finish;
49174948

@@ -6130,7 +6161,8 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs)
61306161
goto err;
61316162

61326163
ret = cgroup_attach_permissions(cset->dfl_cgrp, dst_cgrp, sb,
6133-
!(kargs->flags & CLONE_THREAD));
6164+
!(kargs->flags & CLONE_THREAD),
6165+
current->nsproxy->cgroup_ns);
61346166
if (ret)
61356167
goto err;
61366168

tools/testing/selftests/cgroup/cgroup_util.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ int cg_find_unified_root(char *root, size_t len)
221221

222222
int cg_create(const char *cgroup)
223223
{
224-
return mkdir(cgroup, 0644);
224+
return mkdir(cgroup, 0755);
225225
}
226226

227227
int cg_wait_for_proc_count(const char *cgroup, int count)

0 commit comments

Comments
 (0)