Skip to content

Commit a040c35

Browse files
Waiman-Longhtejun
authored andcommitted
cgroup/cpuset: Enforce at most one rebuild_sched_domains_locked() call per operation
Since commit ff0ce72 ("cgroup/cpuset: Eliminate unncessary sched domains rebuilds in hotplug"), there is only one rebuild_sched_domains_locked() call per hotplug operation. However, writing to the various cpuset control files may still casue more than one rebuild_sched_domains_locked() call to happen in some cases. Juri had found that two rebuild_sched_domains_locked() calls in update_prstate(), one from update_cpumasks_hier() and another one from update_partition_sd_lb() could cause cpuset partition to be created with null total_bw for DL tasks. IOW, DL tasks may not be scheduled correctly in such a partition. A sample command sequence that can reproduce null total_bw is as follows. # echo Y >/sys/kernel/debug/sched/verbose # echo +cpuset >/sys/fs/cgroup/cgroup.subtree_control # mkdir /sys/fs/cgroup/test # echo 0-7 > /sys/fs/cgroup/test/cpuset.cpus # echo 6-7 > /sys/fs/cgroup/test/cpuset.cpus.exclusive # echo root >/sys/fs/cgroup/test/cpuset.cpus.partition Fix this double rebuild_sched_domains_locked() calls problem by replacing existing calls with cpuset_force_rebuild() except the rebuild_sched_domains_cpuslocked() call at the end of cpuset_handle_hotplug(). Checking of the force_sd_rebuild flag is now done at the end of cpuset_write_resmask() and update_prstate() to determine if rebuild_sched_domains_locked() should be called or not. The cpuset v1 code can still call rebuild_sched_domains_locked() directly as double rebuild_sched_domains_locked() calls is not possible. Reported-by: Juri Lelli <[email protected]> Closes: https://lore.kernel.org/lkml/[email protected]/ Signed-off-by: Waiman Long <[email protected]> Tested-by: Juri Lelli <[email protected]> Signed-off-by: Tejun Heo <[email protected]>
1 parent bcd7012 commit a040c35

File tree

1 file changed

+33
-16
lines changed

1 file changed

+33
-16
lines changed

kernel/cgroup/cpuset.c

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,19 @@ static bool have_boot_isolcpus;
8484
static struct list_head remote_children;
8585

8686
/*
87-
* A flag to force sched domain rebuild at the end of an operation while
88-
* inhibiting it in the intermediate stages when set. Currently it is only
89-
* set in hotplug code.
87+
* A flag to force sched domain rebuild at the end of an operation.
88+
* It can be set in
89+
* - update_partition_sd_lb()
90+
* - remote_partition_check()
91+
* - update_cpumasks_hier()
92+
* - cpuset_update_flag()
93+
* - cpuset_hotplug_update_tasks()
94+
* - cpuset_handle_hotplug()
95+
*
96+
* Protected by cpuset_mutex (with cpus_read_lock held) or cpus_write_lock.
97+
*
98+
* Note that update_relax_domain_level() in cpuset-v1.c can still call
99+
* rebuild_sched_domains_locked() directly without using this flag.
90100
*/
91101
static bool force_sd_rebuild;
92102

@@ -990,6 +1000,7 @@ void rebuild_sched_domains_locked(void)
9901000

9911001
lockdep_assert_cpus_held();
9921002
lockdep_assert_held(&cpuset_mutex);
1003+
force_sd_rebuild = false;
9931004

9941005
/*
9951006
* If we have raced with CPU hotplug, return early to avoid
@@ -1164,8 +1175,8 @@ static void update_partition_sd_lb(struct cpuset *cs, int old_prs)
11641175
clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
11651176
}
11661177

1167-
if (rebuild_domains && !force_sd_rebuild)
1168-
rebuild_sched_domains_locked();
1178+
if (rebuild_domains)
1179+
cpuset_force_rebuild();
11691180
}
11701181

11711182
/*
@@ -1512,8 +1523,8 @@ static void remote_partition_check(struct cpuset *cs, struct cpumask *newmask,
15121523
remote_partition_disable(child, tmp);
15131524
disable_cnt++;
15141525
}
1515-
if (disable_cnt && !force_sd_rebuild)
1516-
rebuild_sched_domains_locked();
1526+
if (disable_cnt)
1527+
cpuset_force_rebuild();
15171528
}
15181529

15191530
/*
@@ -2106,8 +2117,8 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
21062117
}
21072118
rcu_read_unlock();
21082119

2109-
if (need_rebuild_sched_domains && !force_sd_rebuild)
2110-
rebuild_sched_domains_locked();
2120+
if (need_rebuild_sched_domains)
2121+
cpuset_force_rebuild();
21112122
}
21122123

21132124
/**
@@ -2726,9 +2737,13 @@ int cpuset_update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
27262737
cs->flags = trialcs->flags;
27272738
spin_unlock_irq(&callback_lock);
27282739

2729-
if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed &&
2730-
!force_sd_rebuild)
2731-
rebuild_sched_domains_locked();
2740+
if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) {
2741+
if (!IS_ENABLED(CONFIG_CPUSETS_V1) ||
2742+
cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
2743+
cpuset_force_rebuild();
2744+
else
2745+
rebuild_sched_domains_locked();
2746+
}
27322747

27332748
if (spread_flag_changed)
27342749
cpuset1_update_tasks_flags(cs);
@@ -2848,6 +2863,8 @@ static int update_prstate(struct cpuset *cs, int new_prs)
28482863
update_partition_sd_lb(cs, old_prs);
28492864

28502865
notify_partition_change(cs, old_prs);
2866+
if (force_sd_rebuild)
2867+
rebuild_sched_domains_locked();
28512868
free_cpumasks(NULL, &tmpmask);
28522869
return 0;
28532870
}
@@ -3141,6 +3158,8 @@ ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
31413158
}
31423159

31433160
free_cpuset(trialcs);
3161+
if (force_sd_rebuild)
3162+
rebuild_sched_domains_locked();
31443163
out_unlock:
31453164
mutex_unlock(&cpuset_mutex);
31463165
cpus_read_unlock();
@@ -3885,11 +3904,9 @@ static void cpuset_handle_hotplug(void)
38853904
rcu_read_unlock();
38863905
}
38873906

3888-
/* rebuild sched domains if cpus_allowed has changed */
3889-
if (force_sd_rebuild) {
3890-
force_sd_rebuild = false;
3907+
/* rebuild sched domains if necessary */
3908+
if (force_sd_rebuild)
38913909
rebuild_sched_domains_cpuslocked();
3892-
}
38933910

38943911
free_cpumasks(NULL, ptmp);
38953912
}

0 commit comments

Comments
 (0)