Skip to content

Commit 0595021

Browse files
committed
Merge tag 'cgroup-for-6.17-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup fixes from Tejun Heo: "This contains two cgroup changes. Both are pretty low risk. - Fix deadlock in cgroup destruction when repeatedly mounting/unmounting perf_event and net_prio controllers. The issue occurs because cgroup_destroy_wq has max_active=1, causing root destruction to wait for CSS offline operations that are queued behind it. The fix splits cgroup_destroy_wq into three separate workqueues to eliminate the blocking. - Set of->priv to NULL upon file release to make potential bugs to manifest as NULL pointer dereferences rather than use-after-free errors" * tag 'cgroup-for-6.17-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cgroup/psi: Set of->priv to NULL upon file release cgroup: split cgroup_destroy_wq into 3 workqueues
2 parents d4b7799 + 94a4acf commit 0595021

File tree

1 file changed

+37
-7
lines changed

1 file changed

+37
-7
lines changed

kernel/cgroup/cgroup.c

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,31 @@ DEFINE_PERCPU_RWSEM(cgroup_threadgroup_rwsem);
126126
* of concurrent destructions. Use a separate workqueue so that cgroup
127127
* destruction work items don't end up filling up max_active of system_wq
128128
* which may lead to deadlock.
129+
*
130+
* A cgroup destruction should enqueue work sequentially to:
131+
* cgroup_offline_wq: use for css offline work
132+
* cgroup_release_wq: use for css release work
133+
* cgroup_free_wq: use for free work
134+
*
135+
* Rationale for using separate workqueues:
136+
* The cgroup root free work may depend on completion of other css offline
137+
* operations. If all tasks were enqueued to a single workqueue, this could
138+
* create a deadlock scenario where:
139+
* - Free work waits for other css offline work to complete.
140+
* - But other css offline work is queued after free work in the same queue.
141+
*
142+
* Example deadlock scenario with single workqueue (cgroup_destroy_wq):
143+
* 1. umount net_prio
144+
* 2. net_prio root destruction enqueues work to cgroup_destroy_wq (CPUx)
145+
* 3. perf_event CSS A offline enqueues work to same cgroup_destroy_wq (CPUx)
146+
* 4. net_prio cgroup_destroy_root->cgroup_lock_and_drain_offline.
147+
* 5. net_prio root destruction blocks waiting for perf_event CSS A offline,
148+
* which can never complete as it's behind in the same queue and
149+
* workqueue's max_active is 1.
129150
*/
130-
static struct workqueue_struct *cgroup_destroy_wq;
151+
static struct workqueue_struct *cgroup_offline_wq;
152+
static struct workqueue_struct *cgroup_release_wq;
153+
static struct workqueue_struct *cgroup_free_wq;
131154

132155
/* generate an array of cgroup subsystem pointers */
133156
#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys,
@@ -4159,6 +4182,7 @@ static void cgroup_file_release(struct kernfs_open_file *of)
41594182
cft->release(of);
41604183
put_cgroup_ns(ctx->ns);
41614184
kfree(ctx);
4185+
of->priv = NULL;
41624186
}
41634187

41644188
static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
@@ -5558,7 +5582,7 @@ static void css_release_work_fn(struct work_struct *work)
55585582
cgroup_unlock();
55595583

55605584
INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
5561-
queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
5585+
queue_rcu_work(cgroup_free_wq, &css->destroy_rwork);
55625586
}
55635587

55645588
static void css_release(struct percpu_ref *ref)
@@ -5567,7 +5591,7 @@ static void css_release(struct percpu_ref *ref)
55675591
container_of(ref, struct cgroup_subsys_state, refcnt);
55685592

55695593
INIT_WORK(&css->destroy_work, css_release_work_fn);
5570-
queue_work(cgroup_destroy_wq, &css->destroy_work);
5594+
queue_work(cgroup_release_wq, &css->destroy_work);
55715595
}
55725596

55735597
static void init_and_link_css(struct cgroup_subsys_state *css,
@@ -5701,7 +5725,7 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
57015725
list_del_rcu(&css->sibling);
57025726
err_free_css:
57035727
INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
5704-
queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
5728+
queue_rcu_work(cgroup_free_wq, &css->destroy_rwork);
57055729
return ERR_PTR(err);
57065730
}
57075731

@@ -5939,7 +5963,7 @@ static void css_killed_ref_fn(struct percpu_ref *ref)
59395963

59405964
if (atomic_dec_and_test(&css->online_cnt)) {
59415965
INIT_WORK(&css->destroy_work, css_killed_work_fn);
5942-
queue_work(cgroup_destroy_wq, &css->destroy_work);
5966+
queue_work(cgroup_offline_wq, &css->destroy_work);
59435967
}
59445968
}
59455969

@@ -6325,8 +6349,14 @@ static int __init cgroup_wq_init(void)
63256349
* We would prefer to do this in cgroup_init() above, but that
63266350
* is called before init_workqueues(): so leave this until after.
63276351
*/
6328-
cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
6329-
BUG_ON(!cgroup_destroy_wq);
6352+
cgroup_offline_wq = alloc_workqueue("cgroup_offline", 0, 1);
6353+
BUG_ON(!cgroup_offline_wq);
6354+
6355+
cgroup_release_wq = alloc_workqueue("cgroup_release", 0, 1);
6356+
BUG_ON(!cgroup_release_wq);
6357+
6358+
cgroup_free_wq = alloc_workqueue("cgroup_free", 0, 1);
6359+
BUG_ON(!cgroup_free_wq);
63306360
return 0;
63316361
}
63326362
core_initcall(cgroup_wq_init);

0 commit comments

Comments
 (0)