Skip to content

Commit 5c5c662

Browse files
committed
cgroup/cpuset: Enforce at most one rebuild_sched_domains_locked() call per operation
jira NONE_AUTOMATION Rebuild_History Non-Buildable kernel-5.14.0-570.17.1.el9_6 commit-author Waiman Long <[email protected]> commit a040c35 Since commit ff0ce72 ("cgroup/cpuset: Eliminate unncessary sched domains rebuilds in hotplug"), there is only one rebuild_sched_domains_locked() call per hotplug operation. However, writing to the various cpuset control files may still casue more than one rebuild_sched_domains_locked() call to happen in some cases. Juri had found that two rebuild_sched_domains_locked() calls in update_prstate(), one from update_cpumasks_hier() and another one from update_partition_sd_lb() could cause cpuset partition to be created with null total_bw for DL tasks. IOW, DL tasks may not be scheduled correctly in such a partition. A sample command sequence that can reproduce null total_bw is as follows. # echo Y >/sys/kernel/debug/sched/verbose # echo +cpuset >/sys/fs/cgroup/cgroup.subtree_control # mkdir /sys/fs/cgroup/test # echo 0-7 > /sys/fs/cgroup/test/cpuset.cpus # echo 6-7 > /sys/fs/cgroup/test/cpuset.cpus.exclusive # echo root >/sys/fs/cgroup/test/cpuset.cpus.partition Fix this double rebuild_sched_domains_locked() calls problem by replacing existing calls with cpuset_force_rebuild() except the rebuild_sched_domains_cpuslocked() call at the end of cpuset_handle_hotplug(). Checking of the force_sd_rebuild flag is now done at the end of cpuset_write_resmask() and update_prstate() to determine if rebuild_sched_domains_locked() should be called or not. The cpuset v1 code can still call rebuild_sched_domains_locked() directly as double rebuild_sched_domains_locked() calls is not possible. Reported-by: Juri Lelli <[email protected]> Closes: https://lore.kernel.org/lkml/[email protected]/ Signed-off-by: Waiman Long <[email protected]> Tested-by: Juri Lelli <[email protected]> Signed-off-by: Tejun Heo <[email protected]> (cherry picked from commit a040c35) Signed-off-by: Jonathan Maple <[email protected]>
1 parent e35c088 commit 5c5c662

File tree

1 file changed

+33
-16
lines changed

1 file changed

+33
-16
lines changed

kernel/cgroup/cpuset.c

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -234,9 +234,19 @@ static bool have_boot_isolcpus;
234234
static struct list_head remote_children;
235235

236236
/*
237-
* A flag to force sched domain rebuild at the end of an operation while
238-
* inhibiting it in the intermediate stages when set. Currently it is only
239-
* set in hotplug code.
237+
* A flag to force sched domain rebuild at the end of an operation.
238+
* It can be set in
239+
* - update_partition_sd_lb()
240+
* - remote_partition_check()
241+
* - update_cpumasks_hier()
242+
* - cpuset_update_flag()
243+
* - cpuset_hotplug_update_tasks()
244+
* - cpuset_handle_hotplug()
245+
*
246+
* Protected by cpuset_mutex (with cpus_read_lock held) or cpus_write_lock.
247+
*
248+
* Note that update_relax_domain_level() in cpuset-v1.c can still call
249+
* rebuild_sched_domains_locked() directly without using this flag.
240250
*/
241251
static bool force_sd_rebuild;
242252

@@ -1273,6 +1283,7 @@ static void rebuild_sched_domains_locked(void)
12731283

12741284
lockdep_assert_cpus_held();
12751285
lockdep_assert_held(&cpuset_mutex);
1286+
force_sd_rebuild = false;
12761287

12771288
/*
12781289
* If we have raced with CPU hotplug, return early to avoid
@@ -1449,8 +1460,8 @@ static void update_partition_sd_lb(struct cpuset *cs, int old_prs)
14491460
clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
14501461
}
14511462

1452-
if (rebuild_domains && !force_sd_rebuild)
1453-
rebuild_sched_domains_locked();
1463+
if (rebuild_domains)
1464+
cpuset_force_rebuild();
14541465
}
14551466

14561467
/*
@@ -1797,8 +1808,8 @@ static void remote_partition_check(struct cpuset *cs, struct cpumask *newmask,
17971808
remote_partition_disable(child, tmp);
17981809
disable_cnt++;
17991810
}
1800-
if (disable_cnt && !force_sd_rebuild)
1801-
rebuild_sched_domains_locked();
1811+
if (disable_cnt)
1812+
cpuset_force_rebuild();
18021813
}
18031814

18041815
/*
@@ -2391,8 +2402,8 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
23912402
}
23922403
rcu_read_unlock();
23932404

2394-
if (need_rebuild_sched_domains && !force_sd_rebuild)
2395-
rebuild_sched_domains_locked();
2405+
if (need_rebuild_sched_domains)
2406+
cpuset_force_rebuild();
23962407
}
23972408

23982409
/**
@@ -3047,9 +3058,13 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
30473058
cs->flags = trialcs->flags;
30483059
spin_unlock_irq(&callback_lock);
30493060

3050-
if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed &&
3051-
!force_sd_rebuild)
3052-
rebuild_sched_domains_locked();
3061+
if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) {
3062+
if (!IS_ENABLED(CONFIG_CPUSETS_V1) ||
3063+
cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
3064+
cpuset_force_rebuild();
3065+
else
3066+
rebuild_sched_domains_locked();
3067+
}
30533068

30543069
if (spread_flag_changed)
30553070
update_tasks_flags(cs);
@@ -3169,6 +3184,8 @@ static int update_prstate(struct cpuset *cs, int new_prs)
31693184
update_partition_sd_lb(cs, old_prs);
31703185

31713186
notify_partition_change(cs, old_prs);
3187+
if (force_sd_rebuild)
3188+
rebuild_sched_domains_locked();
31723189
free_cpumasks(NULL, &tmpmask);
31733190
return 0;
31743191
}
@@ -3666,6 +3683,8 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
36663683
}
36673684

36683685
free_cpuset(trialcs);
3686+
if (force_sd_rebuild)
3687+
rebuild_sched_domains_locked();
36693688
out_unlock:
36703689
mutex_unlock(&cpuset_mutex);
36713690
cpus_read_unlock();
@@ -4652,11 +4671,9 @@ static void cpuset_handle_hotplug(void)
46524671
rcu_read_unlock();
46534672
}
46544673

4655-
/* rebuild sched domains if cpus_allowed has changed */
4656-
if (force_sd_rebuild) {
4657-
force_sd_rebuild = false;
4674+
/* rebuild sched domains if necessary */
4675+
if (force_sd_rebuild)
46584676
rebuild_sched_domains_cpuslocked();
4659-
}
46604677

46614678
free_cpumasks(NULL, ptmp);
46624679
}

0 commit comments

Comments
 (0)