Skip to content

Commit a85373f

Browse files
committed
Merge branch 'for-5.16' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo: - The misc controller now reports allocation rejections through misc.events instead of printking - cgroup_mutex usage is reduced to improve scalability of some operations - vhost helper threads are now assigned to the right cgroup on cgroup2 - Bug fixes * 'for-5.16' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cgroup: bpf: Move wrapper for __cgroup_bpf_*() to kernel/bpf/cgroup.c cgroup: Fix rootcg cpu.stat guest double counting cgroup: no need for cgroup_mutex for /proc/cgroups cgroup: remove cgroup_mutex from cgroupstats_build cgroup: reduce dependency on cgroup_mutex cgroup: cgroup-v1: do not exclude cgrp_dfl_root cgroup: Make rebind_subsystems() disable v2 controllers all at once docs/cgroup: add entry for misc.events misc_cgroup: remove error log to avoid log flood misc_cgroup: introduce misc.events to count failures
2 parents 4075409 + 588e5d8 commit a85373f

File tree

8 files changed

+144
-116
lines changed

8 files changed

+144
-116
lines changed

Documentation/admin-guide/cgroup-v2.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2318,6 +2318,16 @@ Miscellaneous controller provides 3 interface files. If two misc resources (res_
23182318
Limits can be set higher than the capacity value in the misc.capacity
23192319
file.
23202320

2321+
misc.events
2322+
A read-only flat-keyed file which exists on non-root cgroups. The
2323+
following entries are defined. Unless specified otherwise, a value
2324+
change in this file generates a file modified event. All fields in
2325+
this file are hierarchical.
2326+
2327+
max
2328+
The number of times the cgroup's resource usage was
2329+
about to go over the max boundary.
2330+
23212331
Migration and Ownership
23222332
~~~~~~~~~~~~~~~~~~~~~~~
23232333

include/linux/bpf-cgroup.h

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -157,26 +157,6 @@ struct cgroup_bpf {
157157
int cgroup_bpf_inherit(struct cgroup *cgrp);
158158
void cgroup_bpf_offline(struct cgroup *cgrp);
159159

160-
int __cgroup_bpf_attach(struct cgroup *cgrp,
161-
struct bpf_prog *prog, struct bpf_prog *replace_prog,
162-
struct bpf_cgroup_link *link,
163-
enum bpf_attach_type type, u32 flags);
164-
int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
165-
struct bpf_cgroup_link *link,
166-
enum bpf_attach_type type);
167-
int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
168-
union bpf_attr __user *uattr);
169-
170-
/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */
171-
int cgroup_bpf_attach(struct cgroup *cgrp,
172-
struct bpf_prog *prog, struct bpf_prog *replace_prog,
173-
struct bpf_cgroup_link *link, enum bpf_attach_type type,
174-
u32 flags);
175-
int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
176-
enum bpf_attach_type type);
177-
int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
178-
union bpf_attr __user *uattr);
179-
180160
int __cgroup_bpf_run_filter_skb(struct sock *sk,
181161
struct sk_buff *skb,
182162
enum cgroup_bpf_attach_type atype);

include/linux/misc_cgroup.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ struct misc_cg;
3636
struct misc_res {
3737
unsigned long max;
3838
atomic_long_t usage;
39-
bool failed;
39+
atomic_long_t events;
4040
};
4141

4242
/**
@@ -46,6 +46,10 @@ struct misc_res {
4646
*/
4747
struct misc_cg {
4848
struct cgroup_subsys_state css;
49+
50+
/* misc.events */
51+
struct cgroup_file events_file;
52+
4953
struct misc_res res[MISC_CG_RES_TYPES];
5054
};
5155

kernel/bpf/cgroup.c

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -430,10 +430,10 @@ static struct bpf_prog_list *find_attach_entry(struct list_head *progs,
430430
* Exactly one of @prog or @link can be non-null.
431431
* Must be called with cgroup_mutex held.
432432
*/
433-
int __cgroup_bpf_attach(struct cgroup *cgrp,
434-
struct bpf_prog *prog, struct bpf_prog *replace_prog,
435-
struct bpf_cgroup_link *link,
436-
enum bpf_attach_type type, u32 flags)
433+
static int __cgroup_bpf_attach(struct cgroup *cgrp,
434+
struct bpf_prog *prog, struct bpf_prog *replace_prog,
435+
struct bpf_cgroup_link *link,
436+
enum bpf_attach_type type, u32 flags)
437437
{
438438
u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI));
439439
struct bpf_prog *old_prog = NULL;
@@ -523,6 +523,20 @@ int __cgroup_bpf_attach(struct cgroup *cgrp,
523523
return err;
524524
}
525525

526+
static int cgroup_bpf_attach(struct cgroup *cgrp,
527+
struct bpf_prog *prog, struct bpf_prog *replace_prog,
528+
struct bpf_cgroup_link *link,
529+
enum bpf_attach_type type,
530+
u32 flags)
531+
{
532+
int ret;
533+
534+
mutex_lock(&cgroup_mutex);
535+
ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags);
536+
mutex_unlock(&cgroup_mutex);
537+
return ret;
538+
}
539+
526540
/* Swap updated BPF program for given link in effective program arrays across
527541
* all descendant cgroups. This function is guaranteed to succeed.
528542
*/
@@ -672,14 +686,14 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
672686
* propagate the change to descendants
673687
* @cgrp: The cgroup which descendants to traverse
674688
* @prog: A program to detach or NULL
675-
* @prog: A link to detach or NULL
689+
* @link: A link to detach or NULL
676690
* @type: Type of detach operation
677691
*
678692
* At most one of @prog or @link can be non-NULL.
679693
* Must be called with cgroup_mutex held.
680694
*/
681-
int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
682-
struct bpf_cgroup_link *link, enum bpf_attach_type type)
695+
static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
696+
struct bpf_cgroup_link *link, enum bpf_attach_type type)
683697
{
684698
enum cgroup_bpf_attach_type atype;
685699
struct bpf_prog *old_prog;
@@ -730,9 +744,20 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
730744
return err;
731745
}
732746

747+
static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
748+
enum bpf_attach_type type)
749+
{
750+
int ret;
751+
752+
mutex_lock(&cgroup_mutex);
753+
ret = __cgroup_bpf_detach(cgrp, prog, NULL, type);
754+
mutex_unlock(&cgroup_mutex);
755+
return ret;
756+
}
757+
733758
/* Must be called with cgroup_mutex held to avoid races. */
734-
int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
735-
union bpf_attr __user *uattr)
759+
static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
760+
union bpf_attr __user *uattr)
736761
{
737762
__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
738763
enum bpf_attach_type type = attr->query.attach_type;
@@ -789,6 +814,17 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
789814
return ret;
790815
}
791816

817+
static int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
818+
union bpf_attr __user *uattr)
819+
{
820+
int ret;
821+
822+
mutex_lock(&cgroup_mutex);
823+
ret = __cgroup_bpf_query(cgrp, attr, uattr);
824+
mutex_unlock(&cgroup_mutex);
825+
return ret;
826+
}
827+
792828
int cgroup_bpf_prog_attach(const union bpf_attr *attr,
793829
enum bpf_prog_type ptype, struct bpf_prog *prog)
794830
{

kernel/cgroup/cgroup-v1.c

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,6 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
6363
for_each_root(root) {
6464
struct cgroup *from_cgrp;
6565

66-
if (root == &cgrp_dfl_root)
67-
continue;
68-
6966
spin_lock_irq(&css_set_lock);
7067
from_cgrp = task_cgroup_from_root(from, root);
7168
spin_unlock_irq(&css_set_lock);
@@ -662,19 +659,16 @@ int proc_cgroupstats_show(struct seq_file *m, void *v)
662659

663660
seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
664661
/*
665-
* ideally we don't want subsystems moving around while we do this.
666-
* cgroup_mutex is also necessary to guarantee an atomic snapshot of
667-
* subsys/hierarchy state.
662+
* Grab the subsystems state racily. No need to add avenue to
663+
* cgroup_mutex contention.
668664
*/
669-
mutex_lock(&cgroup_mutex);
670665

671666
for_each_subsys(ss, i)
672667
seq_printf(m, "%s\t%d\t%d\t%d\n",
673668
ss->legacy_name, ss->root->hierarchy_id,
674669
atomic_read(&ss->root->nr_cgrps),
675670
cgroup_ssid_enabled(i));
676671

677-
mutex_unlock(&cgroup_mutex);
678672
return 0;
679673
}
680674

@@ -701,18 +695,15 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
701695
kernfs_type(kn) != KERNFS_DIR)
702696
return -EINVAL;
703697

704-
mutex_lock(&cgroup_mutex);
705-
706698
/*
707699
* We aren't being called from kernfs and there's no guarantee on
708700
* @kn->priv's validity. For this and css_tryget_online_from_dir(),
709701
* @kn->priv is RCU safe. Let's do the RCU dancing.
710702
*/
711703
rcu_read_lock();
712704
cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
713-
if (!cgrp || cgroup_is_dead(cgrp)) {
705+
if (!cgrp || !cgroup_tryget(cgrp)) {
714706
rcu_read_unlock();
715-
mutex_unlock(&cgroup_mutex);
716707
return -ENOENT;
717708
}
718709
rcu_read_unlock();
@@ -740,7 +731,7 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
740731
}
741732
css_task_iter_end(&it);
742733

743-
mutex_unlock(&cgroup_mutex);
734+
cgroup_put(cgrp);
744735
return 0;
745736
}
746737

kernel/cgroup/cgroup.c

Lines changed: 56 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1740,6 +1740,7 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
17401740
struct cgroup *dcgrp = &dst_root->cgrp;
17411741
struct cgroup_subsys *ss;
17421742
int ssid, i, ret;
1743+
u16 dfl_disable_ss_mask = 0;
17431744

17441745
lockdep_assert_held(&cgroup_mutex);
17451746

@@ -1756,8 +1757,28 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
17561757
/* can't move between two non-dummy roots either */
17571758
if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root)
17581759
return -EBUSY;
1760+
1761+
/*
1762+
* Collect ssid's that need to be disabled from default
1763+
* hierarchy.
1764+
*/
1765+
if (ss->root == &cgrp_dfl_root)
1766+
dfl_disable_ss_mask |= 1 << ssid;
1767+
17591768
} while_each_subsys_mask();
17601769

1770+
if (dfl_disable_ss_mask) {
1771+
struct cgroup *scgrp = &cgrp_dfl_root.cgrp;
1772+
1773+
/*
1774+
* Controllers from default hierarchy that need to be rebound
1775+
* are all disabled together in one go.
1776+
*/
1777+
cgrp_dfl_root.subsys_mask &= ~dfl_disable_ss_mask;
1778+
WARN_ON(cgroup_apply_control(scgrp));
1779+
cgroup_finalize_control(scgrp, 0);
1780+
}
1781+
17611782
do_each_subsys_mask(ss, ssid, ss_mask) {
17621783
struct cgroup_root *src_root = ss->root;
17631784
struct cgroup *scgrp = &src_root->cgrp;
@@ -1766,10 +1787,12 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
17661787

17671788
WARN_ON(!css || cgroup_css(dcgrp, ss));
17681789

1769-
/* disable from the source */
1770-
src_root->subsys_mask &= ~(1 << ssid);
1771-
WARN_ON(cgroup_apply_control(scgrp));
1772-
cgroup_finalize_control(scgrp, 0);
1790+
if (src_root != &cgrp_dfl_root) {
1791+
/* disable from the source */
1792+
src_root->subsys_mask &= ~(1 << ssid);
1793+
WARN_ON(cgroup_apply_control(scgrp));
1794+
cgroup_finalize_control(scgrp, 0);
1795+
}
17731796

17741797
/* rebind */
17751798
RCU_INIT_POINTER(scgrp->subsys[ssid], NULL);
@@ -5911,17 +5934,20 @@ struct cgroup *cgroup_get_from_id(u64 id)
59115934
struct kernfs_node *kn;
59125935
struct cgroup *cgrp = NULL;
59135936

5914-
mutex_lock(&cgroup_mutex);
59155937
kn = kernfs_find_and_get_node_by_id(cgrp_dfl_root.kf_root, id);
59165938
if (!kn)
5917-
goto out_unlock;
5939+
goto out;
59185940

5919-
cgrp = kn->priv;
5920-
if (cgroup_is_dead(cgrp) || !cgroup_tryget(cgrp))
5941+
rcu_read_lock();
5942+
5943+
cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
5944+
if (cgrp && !cgroup_tryget(cgrp))
59215945
cgrp = NULL;
5946+
5947+
rcu_read_unlock();
5948+
59225949
kernfs_put(kn);
5923-
out_unlock:
5924-
mutex_unlock(&cgroup_mutex);
5950+
out:
59255951
return cgrp;
59265952
}
59275953
EXPORT_SYMBOL_GPL(cgroup_get_from_id);
@@ -6474,30 +6500,34 @@ struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
64746500
*
64756501
* Find the cgroup at @path on the default hierarchy, increment its
64766502
* reference count and return it. Returns pointer to the found cgroup on
6477-
* success, ERR_PTR(-ENOENT) if @path doesn't exist and ERR_PTR(-ENOTDIR)
6478-
* if @path points to a non-directory.
6503+
* success, ERR_PTR(-ENOENT) if @path doesn't exist or if the cgroup has already
6504+
* been released and ERR_PTR(-ENOTDIR) if @path points to a non-directory.
64796505
*/
64806506
struct cgroup *cgroup_get_from_path(const char *path)
64816507
{
64826508
struct kernfs_node *kn;
6483-
struct cgroup *cgrp;
6484-
6485-
mutex_lock(&cgroup_mutex);
6509+
struct cgroup *cgrp = ERR_PTR(-ENOENT);
64866510

64876511
kn = kernfs_walk_and_get(cgrp_dfl_root.cgrp.kn, path);
6488-
if (kn) {
6489-
if (kernfs_type(kn) == KERNFS_DIR) {
6490-
cgrp = kn->priv;
6491-
cgroup_get_live(cgrp);
6492-
} else {
6493-
cgrp = ERR_PTR(-ENOTDIR);
6494-
}
6495-
kernfs_put(kn);
6496-
} else {
6497-
cgrp = ERR_PTR(-ENOENT);
6512+
if (!kn)
6513+
goto out;
6514+
6515+
if (kernfs_type(kn) != KERNFS_DIR) {
6516+
cgrp = ERR_PTR(-ENOTDIR);
6517+
goto out_kernfs;
64986518
}
64996519

6500-
mutex_unlock(&cgroup_mutex);
6520+
rcu_read_lock();
6521+
6522+
cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
6523+
if (!cgrp || !cgroup_tryget(cgrp))
6524+
cgrp = ERR_PTR(-ENOENT);
6525+
6526+
rcu_read_unlock();
6527+
6528+
out_kernfs:
6529+
kernfs_put(kn);
6530+
out:
65016531
return cgrp;
65026532
}
65036533
EXPORT_SYMBOL_GPL(cgroup_get_from_path);
@@ -6625,44 +6655,6 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)
66256655

66266656
#endif /* CONFIG_SOCK_CGROUP_DATA */
66276657

6628-
#ifdef CONFIG_CGROUP_BPF
6629-
int cgroup_bpf_attach(struct cgroup *cgrp,
6630-
struct bpf_prog *prog, struct bpf_prog *replace_prog,
6631-
struct bpf_cgroup_link *link,
6632-
enum bpf_attach_type type,
6633-
u32 flags)
6634-
{
6635-
int ret;
6636-
6637-
mutex_lock(&cgroup_mutex);
6638-
ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags);
6639-
mutex_unlock(&cgroup_mutex);
6640-
return ret;
6641-
}
6642-
6643-
int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
6644-
enum bpf_attach_type type)
6645-
{
6646-
int ret;
6647-
6648-
mutex_lock(&cgroup_mutex);
6649-
ret = __cgroup_bpf_detach(cgrp, prog, NULL, type);
6650-
mutex_unlock(&cgroup_mutex);
6651-
return ret;
6652-
}
6653-
6654-
int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
6655-
union bpf_attr __user *uattr)
6656-
{
6657-
int ret;
6658-
6659-
mutex_lock(&cgroup_mutex);
6660-
ret = __cgroup_bpf_query(cgrp, attr, uattr);
6661-
mutex_unlock(&cgroup_mutex);
6662-
return ret;
6663-
}
6664-
#endif /* CONFIG_CGROUP_BPF */
6665-
66666658
#ifdef CONFIG_SYSFS
66676659
static ssize_t show_delegatable_files(struct cftype *files, char *buf,
66686660
ssize_t size, const char *prefix)

0 commit comments

Comments
 (0)