Skip to content

Commit 19a1f5e

Browse files
Peter ZijlstraIngo Molnar
authored andcommitted
sched: Fix smp_call_function_single_async() usage for ILB
The recent commit: 90b5363 ("sched: Clean up scheduler_ipi()") got smp_call_function_single_async() subtly wrong. Even though it will return -EBUSY when trying to re-use a csd, that condition is not atomic and still requires external serialization. The change in kick_ilb() got this wrong. While on first reading kick_ilb() has an atomic test-and-set that appears to serialize the use, the matching 'release' is not in the right place to actually guarantee this serialization. Rework the nohz_idle_balance() trigger so that the release is in the IPI callback and thus guarantees the required serialization for the CSD. Fixes: 90b5363 ("sched: Clean up scheduler_ipi()") Reported-by: Qian Cai <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Signed-off-by: Ingo Molnar <[email protected]> Reviewed-by: Frederic Weisbecker <[email protected]> Cc: [email protected] Link: https://lore.kernel.org/r/[email protected]
1 parent 58ef57b commit 19a1f5e

File tree

3 files changed

+19
-36
lines changed

3 files changed

+19
-36
lines changed

kernel/sched/core.c

Lines changed: 10 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -637,41 +637,25 @@ void wake_up_nohz_cpu(int cpu)
637637
wake_up_idle_cpu(cpu);
638638
}
639639

640-
static inline bool got_nohz_idle_kick(void)
640+
static void nohz_csd_func(void *info)
641641
{
642-
int cpu = smp_processor_id();
643-
644-
if (!(atomic_read(nohz_flags(cpu)) & NOHZ_KICK_MASK))
645-
return false;
646-
647-
if (idle_cpu(cpu) && !need_resched())
648-
return true;
642+
struct rq *rq = info;
643+
int cpu = cpu_of(rq);
644+
unsigned int flags;
649645

650646
/*
651-
* We can't run Idle Load Balance on this CPU for this time so we
652-
* cancel it and clear NOHZ_BALANCE_KICK
647+
* Release the rq::nohz_csd.
653648
*/
654-
atomic_andnot(NOHZ_KICK_MASK, nohz_flags(cpu));
655-
return false;
656-
}
649+
flags = atomic_fetch_andnot(NOHZ_KICK_MASK, nohz_flags(cpu));
650+
WARN_ON(!(flags & NOHZ_KICK_MASK));
657651

658-
static void nohz_csd_func(void *info)
659-
{
660-
struct rq *rq = info;
661-
662-
if (got_nohz_idle_kick()) {
663-
rq->idle_balance = 1;
652+
rq->idle_balance = idle_cpu(cpu);
653+
if (rq->idle_balance && !need_resched()) {
654+
rq->nohz_idle_balance = flags;
664655
raise_softirq_irqoff(SCHED_SOFTIRQ);
665656
}
666657
}
667658

668-
#else /* CONFIG_NO_HZ_COMMON */
669-
670-
static inline bool got_nohz_idle_kick(void)
671-
{
672-
return false;
673-
}
674-
675659
#endif /* CONFIG_NO_HZ_COMMON */
676660

677661
#ifdef CONFIG_NO_HZ_FULL

kernel/sched/fair.c

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10024,6 +10024,10 @@ static void kick_ilb(unsigned int flags)
1002410024
if (ilb_cpu >= nr_cpu_ids)
1002510025
return;
1002610026

10027+
/*
10028+
* Access to rq::nohz_csd is serialized by NOHZ_KICK_MASK; he who sets
10029+
* the first flag owns it; cleared by nohz_csd_func().
10030+
*/
1002710031
flags = atomic_fetch_or(flags, nohz_flags(ilb_cpu));
1002810032
if (flags & NOHZ_KICK_MASK)
1002910033
return;
@@ -10371,20 +10375,14 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
1037110375
*/
1037210376
static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
1037310377
{
10374-
int this_cpu = this_rq->cpu;
10375-
unsigned int flags;
10378+
unsigned int flags = this_rq->nohz_idle_balance;
1037610379

10377-
if (!(atomic_read(nohz_flags(this_cpu)) & NOHZ_KICK_MASK))
10380+
if (!flags)
1037810381
return false;
1037910382

10380-
if (idle != CPU_IDLE) {
10381-
atomic_andnot(NOHZ_KICK_MASK, nohz_flags(this_cpu));
10382-
return false;
10383-
}
10383+
this_rq->nohz_idle_balance = 0;
1038410384

10385-
/* could be _relaxed() */
10386-
flags = atomic_fetch_andnot(NOHZ_KICK_MASK, nohz_flags(this_cpu));
10387-
if (!(flags & NOHZ_KICK_MASK))
10385+
if (idle != CPU_IDLE)
1038810386
return false;
1038910387

1039010388
_nohz_idle_balance(this_rq, flags, idle);

kernel/sched/sched.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -951,6 +951,7 @@ struct rq {
951951

952952
struct callback_head *balance_callback;
953953

954+
unsigned char nohz_idle_balance;
954955
unsigned char idle_balance;
955956

956957
unsigned long misfit_task_load;

0 commit comments

Comments
 (0)