Skip to content

Commit e5ed055

Browse files
vingu-linaroPeter Zijlstra
authored andcommitted
sched/fair: unlink misfit task from cpu overutilized
By taking into account uclamp_min, the 1:1 relation between task misfit and cpu overutilized is no more true as a task with a small util_avg may not fit a high capacity cpu because of uclamp_min constraint. Add a new state in util_fits_cpu() to reflect the case that task would fit a CPU except for the uclamp_min hint which is a performance requirement. Use -1 to reflect that a CPU doesn't fit only because of uclamp_min so we can use this new value to take additional action to select the best CPU that doesn't match uclamp_min hint. When util_fits_cpu() returns -1, we will continue to look for a possible CPU with better performance, which replaces Capacity Inversion detection with capacity_orig_of() - thermal_load_avg to detect a capacity inversion. Signed-off-by: Vincent Guittot <[email protected]> Reviewed-and-tested-by: Qais Yousef <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Dietmar Eggemann <[email protected]> Tested-by: Kajetan Puchalski <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 443ed4c commit e5ed055

File tree

1 file changed

+82
-23
lines changed

1 file changed

+82
-23
lines changed

kernel/sched/fair.c

Lines changed: 82 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4561,8 +4561,8 @@ static inline int util_fits_cpu(unsigned long util,
45614561
* handle the case uclamp_min > uclamp_max.
45624562
*/
45634563
uclamp_min = min(uclamp_min, uclamp_max);
4564-
if (util < uclamp_min && capacity_orig != SCHED_CAPACITY_SCALE)
4565-
fits = fits && (uclamp_min <= capacity_orig_thermal);
4564+
if (fits && (util < uclamp_min) && (uclamp_min > capacity_orig_thermal))
4565+
return -1;
45664566

45674567
return fits;
45684568
}
@@ -4572,7 +4572,11 @@ static inline int task_fits_cpu(struct task_struct *p, int cpu)
45724572
unsigned long uclamp_min = uclamp_eff_value(p, UCLAMP_MIN);
45734573
unsigned long uclamp_max = uclamp_eff_value(p, UCLAMP_MAX);
45744574
unsigned long util = task_util_est(p);
4575-
return util_fits_cpu(util, uclamp_min, uclamp_max, cpu);
4575+
/*
4576+
* Return true only if the cpu fully fits the task requirements, which
4577+
* include the utilization but also the performance hints.
4578+
*/
4579+
return (util_fits_cpu(util, uclamp_min, uclamp_max, cpu) > 0);
45764580
}
45774581

45784582
static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
@@ -6138,6 +6142,7 @@ static inline bool cpu_overutilized(int cpu)
61386142
unsigned long rq_util_min = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MIN);
61396143
unsigned long rq_util_max = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MAX);
61406144

6145+
/* Return true only if the utilization doesn't fit CPU's capacity */
61416146
return !util_fits_cpu(cpu_util_cfs(cpu), rq_util_min, rq_util_max, cpu);
61426147
}
61436148

@@ -6931,6 +6936,7 @@ static int
69316936
select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
69326937
{
69336938
unsigned long task_util, util_min, util_max, best_cap = 0;
6939+
int fits, best_fits = 0;
69346940
int cpu, best_cpu = -1;
69356941
struct cpumask *cpus;
69366942

@@ -6946,12 +6952,28 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
69466952

69476953
if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu))
69486954
continue;
6949-
if (util_fits_cpu(task_util, util_min, util_max, cpu))
6955+
6956+
fits = util_fits_cpu(task_util, util_min, util_max, cpu);
6957+
6958+
/* This CPU fits with all requirements */
6959+
if (fits > 0)
69506960
return cpu;
6961+
/*
6962+
* Only the min performance hint (i.e. uclamp_min) doesn't fit.
6963+
* Look for the CPU with best capacity.
6964+
*/
6965+
else if (fits < 0)
6966+
cpu_cap = capacity_orig_of(cpu) - thermal_load_avg(cpu_rq(cpu));
69516967

6952-
if (cpu_cap > best_cap) {
6968+
/*
6969+
* First, select CPU which fits better (-1 being better than 0).
6970+
* Then, select the one with best capacity at same level.
6971+
*/
6972+
if ((fits < best_fits) ||
6973+
((fits == best_fits) && (cpu_cap > best_cap))) {
69536974
best_cap = cpu_cap;
69546975
best_cpu = cpu;
6976+
best_fits = fits;
69556977
}
69566978
}
69576979

@@ -6964,7 +6986,11 @@ static inline bool asym_fits_cpu(unsigned long util,
69646986
int cpu)
69656987
{
69666988
if (sched_asym_cpucap_active())
6967-
return util_fits_cpu(util, util_min, util_max, cpu);
6989+
/*
6990+
* Return true only if the cpu fully fits the task requirements
6991+
* which include the utilization and the performance hints.
6992+
*/
6993+
return (util_fits_cpu(util, util_min, util_max, cpu) > 0);
69686994

69696995
return true;
69706996
}
@@ -7331,6 +7357,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
73317357
unsigned long p_util_max = uclamp_is_used() ? uclamp_eff_value(p, UCLAMP_MAX) : 1024;
73327358
struct root_domain *rd = this_rq()->rd;
73337359
int cpu, best_energy_cpu, target = -1;
7360+
int prev_fits = -1, best_fits = -1;
7361+
unsigned long best_thermal_cap = 0;
7362+
unsigned long prev_thermal_cap = 0;
73347363
struct sched_domain *sd;
73357364
struct perf_domain *pd;
73367365
struct energy_env eenv;
@@ -7366,6 +7395,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
73667395
unsigned long prev_spare_cap = 0;
73677396
int max_spare_cap_cpu = -1;
73687397
unsigned long base_energy;
7398+
int fits, max_fits = -1;
73697399

73707400
cpumask_and(cpus, perf_domain_span(pd), cpu_online_mask);
73717401

@@ -7415,22 +7445,27 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
74157445
util_min = max(rq_util_min, p_util_min);
74167446
util_max = max(rq_util_max, p_util_max);
74177447
}
7418-
if (!util_fits_cpu(util, util_min, util_max, cpu))
7448+
7449+
fits = util_fits_cpu(util, util_min, util_max, cpu);
7450+
if (!fits)
74197451
continue;
74207452

74217453
lsub_positive(&cpu_cap, util);
74227454

74237455
if (cpu == prev_cpu) {
74247456
/* Always use prev_cpu as a candidate. */
74257457
prev_spare_cap = cpu_cap;
7426-
} else if (cpu_cap > max_spare_cap) {
7458+
prev_fits = fits;
7459+
} else if ((fits > max_fits) ||
7460+
((fits == max_fits) && (cpu_cap > max_spare_cap))) {
74277461
/*
74287462
* Find the CPU with the maximum spare capacity
74297463
* among the remaining CPUs in the performance
74307464
* domain.
74317465
*/
74327466
max_spare_cap = cpu_cap;
74337467
max_spare_cap_cpu = cpu;
7468+
max_fits = fits;
74347469
}
74357470
}
74367471

@@ -7449,26 +7484,50 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
74497484
if (prev_delta < base_energy)
74507485
goto unlock;
74517486
prev_delta -= base_energy;
7487+
prev_thermal_cap = cpu_thermal_cap;
74527488
best_delta = min(best_delta, prev_delta);
74537489
}
74547490

74557491
/* Evaluate the energy impact of using max_spare_cap_cpu. */
74567492
if (max_spare_cap_cpu >= 0 && max_spare_cap > prev_spare_cap) {
7493+
/* Current best energy cpu fits better */
7494+
if (max_fits < best_fits)
7495+
continue;
7496+
7497+
/*
7498+
* Both don't fit performance hint (i.e. uclamp_min)
7499+
* but best energy cpu has better capacity.
7500+
*/
7501+
if ((max_fits < 0) &&
7502+
(cpu_thermal_cap <= best_thermal_cap))
7503+
continue;
7504+
74577505
cur_delta = compute_energy(&eenv, pd, cpus, p,
74587506
max_spare_cap_cpu);
74597507
/* CPU utilization has changed */
74607508
if (cur_delta < base_energy)
74617509
goto unlock;
74627510
cur_delta -= base_energy;
7463-
if (cur_delta < best_delta) {
7464-
best_delta = cur_delta;
7465-
best_energy_cpu = max_spare_cap_cpu;
7466-
}
7511+
7512+
/*
7513+
* Both fit for the task but best energy cpu has lower
7514+
* energy impact.
7515+
*/
7516+
if ((max_fits > 0) && (best_fits > 0) &&
7517+
(cur_delta >= best_delta))
7518+
continue;
7519+
7520+
best_delta = cur_delta;
7521+
best_energy_cpu = max_spare_cap_cpu;
7522+
best_fits = max_fits;
7523+
best_thermal_cap = cpu_thermal_cap;
74677524
}
74687525
}
74697526
rcu_read_unlock();
74707527

7471-
if (best_delta < prev_delta)
7528+
if ((best_fits > prev_fits) ||
7529+
((best_fits > 0) && (best_delta < prev_delta)) ||
7530+
((best_fits < 0) && (best_thermal_cap > prev_thermal_cap)))
74727531
target = best_energy_cpu;
74737532

74747533
return target;
@@ -10271,24 +10330,23 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
1027110330
*/
1027210331
update_sd_lb_stats(env, &sds);
1027310332

10274-
if (sched_energy_enabled()) {
10275-
struct root_domain *rd = env->dst_rq->rd;
10276-
10277-
if (rcu_dereference(rd->pd) && !READ_ONCE(rd->overutilized))
10278-
goto out_balanced;
10279-
}
10280-
10281-
local = &sds.local_stat;
10282-
busiest = &sds.busiest_stat;
10283-
1028410333
/* There is no busy sibling group to pull tasks from */
1028510334
if (!sds.busiest)
1028610335
goto out_balanced;
1028710336

10337+
busiest = &sds.busiest_stat;
10338+
1028810339
/* Misfit tasks should be dealt with regardless of the avg load */
1028910340
if (busiest->group_type == group_misfit_task)
1029010341
goto force_balance;
1029110342

10343+
if (sched_energy_enabled()) {
10344+
struct root_domain *rd = env->dst_rq->rd;
10345+
10346+
if (rcu_dereference(rd->pd) && !READ_ONCE(rd->overutilized))
10347+
goto out_balanced;
10348+
}
10349+
1029210350
/* ASYM feature bypasses nice load balance check */
1029310351
if (busiest->group_type == group_asym_packing)
1029410352
goto force_balance;
@@ -10301,6 +10359,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
1030110359
if (busiest->group_type == group_imbalanced)
1030210360
goto force_balance;
1030310361

10362+
local = &sds.local_stat;
1030410363
/*
1030510364
* If the local group is busier than the selected busiest group
1030610365
* don't try and pull any tasks.

0 commit comments

Comments
 (0)