Skip to content

Commit 2ff899e

Browse files
jlelliPeter Zijlstra
authored andcommitted
sched/deadline: Rebuild root domain accounting after every update
Rebuilding of root domains accounting information (total_bw) is currently broken on some cases, e.g. suspend/resume on aarch64. Problem is that the way we keep track of domain changes and try to add bandwidth back is convoluted and fragile. Fix it by simplify things by making sure bandwidth accounting is cleared and completely restored after root domains changes (after root domains are again stable). To be sure we always call dl_rebuild_rd_accounting while holding cpuset_mutex we also add cpuset_reset_sched_domains() wrapper. Fixes: 53916d5 ("sched/deadline: Check bandwidth overflow earlier for hotplug") Reported-by: Jon Hunter <[email protected]> Co-developed-by: Waiman Long <[email protected]> Signed-off-by: Waiman Long <[email protected]> Signed-off-by: Juri Lelli <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Dietmar Eggemann <[email protected]> Tested-by: Dietmar Eggemann <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 45007c6 commit 2ff899e

File tree

7 files changed

+38
-15
lines changed

7 files changed

+38
-15
lines changed

include/linux/cpuset.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ extern bool current_cpuset_is_being_rebound(void);
128128
extern void rebuild_sched_domains(void);
129129

130130
extern void cpuset_print_current_mems_allowed(void);
131+
extern void cpuset_reset_sched_domains(void);
131132

132133
/*
133134
* read_mems_allowed_begin is required when making decisions involving
@@ -264,6 +265,11 @@ static inline void rebuild_sched_domains(void)
264265
partition_sched_domains(1, NULL, NULL);
265266
}
266267

268+
static inline void cpuset_reset_sched_domains(void)
269+
{
270+
partition_sched_domains(1, NULL, NULL);
271+
}
272+
267273
static inline void cpuset_print_current_mems_allowed(void)
268274
{
269275
}

include/linux/sched/deadline.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ static inline bool dl_time_before(u64 a, u64 b)
3434
struct root_domain;
3535
extern void dl_add_task_root_domain(struct task_struct *p);
3636
extern void dl_clear_root_domain(struct root_domain *rd);
37+
extern void dl_clear_root_domain_cpu(int cpu);
3738

3839
#endif /* CONFIG_SMP */
3940

include/linux/sched/topology.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,8 @@ static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
166166
return to_cpumask(sd->span);
167167
}
168168

169+
extern void dl_rebuild_rd_accounting(void);
170+
169171
extern void partition_sched_domains_locked(int ndoms_new,
170172
cpumask_var_t doms_new[],
171173
struct sched_domain_attr *dattr_new);

kernel/cgroup/cpuset.c

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -954,22 +954,25 @@ static void dl_update_tasks_root_domain(struct cpuset *cs)
954954
css_task_iter_end(&it);
955955
}
956956

957-
static void dl_rebuild_rd_accounting(void)
957+
void dl_rebuild_rd_accounting(void)
958958
{
959959
struct cpuset *cs = NULL;
960960
struct cgroup_subsys_state *pos_css;
961+
int cpu;
962+
u64 cookie = ++dl_cookie;
961963

962964
lockdep_assert_held(&cpuset_mutex);
963965
lockdep_assert_cpus_held();
964966
lockdep_assert_held(&sched_domains_mutex);
965967

966968
rcu_read_lock();
967969

968-
/*
969-
* Clear default root domain DL accounting, it will be computed again
970-
* if a task belongs to it.
971-
*/
972-
dl_clear_root_domain(&def_root_domain);
970+
for_each_possible_cpu(cpu) {
971+
if (dl_bw_visited(cpu, cookie))
972+
continue;
973+
974+
dl_clear_root_domain_cpu(cpu);
975+
}
973976

974977
cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
975978

@@ -996,7 +999,6 @@ partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
996999
{
9971000
sched_domains_mutex_lock();
9981001
partition_sched_domains_locked(ndoms_new, doms_new, dattr_new);
999-
dl_rebuild_rd_accounting();
10001002
sched_domains_mutex_unlock();
10011003
}
10021004

@@ -1083,6 +1085,13 @@ void rebuild_sched_domains(void)
10831085
cpus_read_unlock();
10841086
}
10851087

1088+
void cpuset_reset_sched_domains(void)
1089+
{
1090+
mutex_lock(&cpuset_mutex);
1091+
partition_sched_domains(1, NULL, NULL);
1092+
mutex_unlock(&cpuset_mutex);
1093+
}
1094+
10861095
/**
10871096
* cpuset_update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
10881097
* @cs: the cpuset in which each task's cpus_allowed mask needs to be changed

kernel/sched/core.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8229,7 +8229,7 @@ static void cpuset_cpu_active(void)
82298229
* operation in the resume sequence, just build a single sched
82308230
* domain, ignoring cpusets.
82318231
*/
8232-
partition_sched_domains(1, NULL, NULL);
8232+
cpuset_reset_sched_domains();
82338233
if (--num_cpus_frozen)
82348234
return;
82358235
/*
@@ -8248,7 +8248,7 @@ static void cpuset_cpu_inactive(unsigned int cpu)
82488248
cpuset_update_active_cpus();
82498249
} else {
82508250
num_cpus_frozen++;
8251-
partition_sched_domains(1, NULL, NULL);
8251+
cpuset_reset_sched_domains();
82528252
}
82538253
}
82548254

kernel/sched/deadline.c

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ static inline unsigned long dl_bw_capacity(int i)
166166
}
167167
}
168168

169-
static inline bool dl_bw_visited(int cpu, u64 cookie)
169+
bool dl_bw_visited(int cpu, u64 cookie)
170170
{
171171
struct root_domain *rd = cpu_rq(cpu)->rd;
172172

@@ -207,7 +207,7 @@ static inline unsigned long dl_bw_capacity(int i)
207207
return SCHED_CAPACITY_SCALE;
208208
}
209209

210-
static inline bool dl_bw_visited(int cpu, u64 cookie)
210+
bool dl_bw_visited(int cpu, u64 cookie)
211211
{
212212
return false;
213213
}
@@ -2981,18 +2981,22 @@ void dl_clear_root_domain(struct root_domain *rd)
29812981
rd->dl_bw.total_bw = 0;
29822982

29832983
/*
2984-
* dl_server bandwidth is only restored when CPUs are attached to root
2985-
* domains (after domains are created or CPUs moved back to the
2986-
* default root doamin).
2984+
* dl_servers are not tasks. Since dl_add_task_root_domain ignores
2985+
* them, we need to account for them here explicitly.
29872986
*/
29882987
for_each_cpu(i, rd->span) {
29892988
struct sched_dl_entity *dl_se = &cpu_rq(i)->fair_server;
29902989

29912990
if (dl_server(dl_se) && cpu_active(i))
2992-
rd->dl_bw.total_bw += dl_se->dl_bw;
2991+
__dl_add(&rd->dl_bw, dl_se->dl_bw, dl_bw_cpus(i));
29932992
}
29942993
}
29952994

2995+
void dl_clear_root_domain_cpu(int cpu)
2996+
{
2997+
dl_clear_root_domain(cpu_rq(cpu)->rd);
2998+
}
2999+
29963000
#endif /* CONFIG_SMP */
29973001

29983002
static void switched_from_dl(struct rq *rq, struct task_struct *p)

kernel/sched/topology.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2791,6 +2791,7 @@ void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[],
27912791
ndoms_cur = ndoms_new;
27922792

27932793
update_sched_domain_debugfs();
2794+
dl_rebuild_rd_accounting();
27942795
}
27952796

27962797
/*

0 commit comments

Comments
 (0)