Skip to content

Commit 60ffd5e

Browse files
Luca AbeniPeter Zijlstra
authored andcommitted
sched/deadline: Improve admission control for asymmetric CPU capacities
The current SCHED_DEADLINE (DL) admission control ensures that sum of reserved CPU bandwidth < x * M where x = /proc/sys/kernel/sched_rt_{runtime,period}_us M = # CPUs in root domain. DL admission control works well for homogeneous systems where the capacity of all CPUs are equal (1024). I.e. bounded tardiness for DL and non-starvation of non-DL tasks is guaranteed. But on heterogeneous systems where capacity of CPUs are different it could fail by over-allocating CPU time on smaller capacity CPUs. On an Arm big.LITTLE/DynamIQ system DL tasks can easily starve other tasks making it unusable. Fix this by explicitly considering the CPU capacity in the DL admission test by replacing M with the root domain CPU capacity sum. Signed-off-by: Luca Abeni <[email protected]> Signed-off-by: Dietmar Eggemann <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Acked-by: Juri Lelli <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent fc9dc69 commit 60ffd5e

File tree

2 files changed

+20
-16
lines changed

2 files changed

+20
-16
lines changed

kernel/sched/deadline.c

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2590,11 +2590,12 @@ void sched_dl_do_global(void)
25902590
int sched_dl_overflow(struct task_struct *p, int policy,
25912591
const struct sched_attr *attr)
25922592
{
2593-
struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
25942593
u64 period = attr->sched_period ?: attr->sched_deadline;
25952594
u64 runtime = attr->sched_runtime;
25962595
u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;
2597-
int cpus, err = -1;
2596+
int cpus, err = -1, cpu = task_cpu(p);
2597+
struct dl_bw *dl_b = dl_bw_of(cpu);
2598+
unsigned long cap;
25982599

25992600
if (attr->sched_flags & SCHED_FLAG_SUGOV)
26002601
return 0;
@@ -2609,15 +2610,17 @@ int sched_dl_overflow(struct task_struct *p, int policy,
26092610
* allocated bandwidth of the container.
26102611
*/
26112612
raw_spin_lock(&dl_b->lock);
2612-
cpus = dl_bw_cpus(task_cpu(p));
2613+
cpus = dl_bw_cpus(cpu);
2614+
cap = dl_bw_capacity(cpu);
2615+
26132616
if (dl_policy(policy) && !task_has_dl_policy(p) &&
2614-
!__dl_overflow(dl_b, cpus, 0, new_bw)) {
2617+
!__dl_overflow(dl_b, cap, 0, new_bw)) {
26152618
if (hrtimer_active(&p->dl.inactive_timer))
26162619
__dl_sub(dl_b, p->dl.dl_bw, cpus);
26172620
__dl_add(dl_b, new_bw, cpus);
26182621
err = 0;
26192622
} else if (dl_policy(policy) && task_has_dl_policy(p) &&
2620-
!__dl_overflow(dl_b, cpus, p->dl.dl_bw, new_bw)) {
2623+
!__dl_overflow(dl_b, cap, p->dl.dl_bw, new_bw)) {
26212624
/*
26222625
* XXX this is slightly incorrect: when the task
26232626
* utilization decreases, we should delay the total
@@ -2772,19 +2775,19 @@ bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
27722775
#ifdef CONFIG_SMP
27732776
int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed)
27742777
{
2778+
unsigned long flags, cap;
27752779
unsigned int dest_cpu;
27762780
struct dl_bw *dl_b;
27772781
bool overflow;
2778-
int cpus, ret;
2779-
unsigned long flags;
2782+
int ret;
27802783

27812784
dest_cpu = cpumask_any_and(cpu_active_mask, cs_cpus_allowed);
27822785

27832786
rcu_read_lock_sched();
27842787
dl_b = dl_bw_of(dest_cpu);
27852788
raw_spin_lock_irqsave(&dl_b->lock, flags);
2786-
cpus = dl_bw_cpus(dest_cpu);
2787-
overflow = __dl_overflow(dl_b, cpus, 0, p->dl.dl_bw);
2789+
cap = dl_bw_capacity(dest_cpu);
2790+
overflow = __dl_overflow(dl_b, cap, 0, p->dl.dl_bw);
27882791
if (overflow) {
27892792
ret = -EBUSY;
27902793
} else {
@@ -2794,6 +2797,8 @@ int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allo
27942797
* We will free resources in the source root_domain
27952798
* later on (see set_cpus_allowed_dl()).
27962799
*/
2800+
int cpus = dl_bw_cpus(dest_cpu);
2801+
27972802
__dl_add(dl_b, p->dl.dl_bw, cpus);
27982803
ret = 0;
27992804
}
@@ -2826,16 +2831,15 @@ int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
28262831

28272832
bool dl_cpu_busy(unsigned int cpu)
28282833
{
2829-
unsigned long flags;
2834+
unsigned long flags, cap;
28302835
struct dl_bw *dl_b;
28312836
bool overflow;
2832-
int cpus;
28332837

28342838
rcu_read_lock_sched();
28352839
dl_b = dl_bw_of(cpu);
28362840
raw_spin_lock_irqsave(&dl_b->lock, flags);
2837-
cpus = dl_bw_cpus(cpu);
2838-
overflow = __dl_overflow(dl_b, cpus, 0, 0);
2841+
cap = dl_bw_capacity(cpu);
2842+
overflow = __dl_overflow(dl_b, cap, 0, 0);
28392843
raw_spin_unlock_irqrestore(&dl_b->lock, flags);
28402844
rcu_read_unlock_sched();
28412845

kernel/sched/sched.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -310,11 +310,11 @@ void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
310310
__dl_update(dl_b, -((s32)tsk_bw / cpus));
311311
}
312312

313-
static inline
314-
bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw)
313+
static inline bool __dl_overflow(struct dl_bw *dl_b, unsigned long cap,
314+
u64 old_bw, u64 new_bw)
315315
{
316316
return dl_b->bw != -1 &&
317-
dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw;
317+
cap_scale(dl_b->bw, cap) < dl_b->total_bw - old_bw + new_bw;
318318
}
319319

320320
extern void init_dl_bw(struct dl_bw *dl_b);

0 commit comments

Comments
 (0)