Skip to content

Commit 91a9a90

Browse files
committed
Merge tag 'sched_urgent_for_5.8_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Borislav Petkov: "The most anticipated fix in this pull request is probably the horrible build fix for the RANDSTRUCT fail that didn't make -rc2. Also included is the cleanup that removes those BUILD_BUG_ON()s and replaces it with ugly unions. Also included is the try_to_wake_up() race fix that was first triggered by Paul's RCU-torture runs, but was independently hit by Dave Chinner's fstest runs as well" * tag 'sched_urgent_for_5.8_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/cfs: change initial value of runnable_avg smp, irq_work: Continue smp_call_function*() and irq_work*() integration sched/core: s/WF_ON_RQ/WQ_ON_CPU/ sched/core: Fix ttwu() race sched/core: Fix PI boosting between RT and DEADLINE tasks sched/deadline: Initialize ->dl_boosted sched/core: Check cpus_mask, not cpus_ptr in __set_cpus_allowed_ptr(), to fix mask corruption sched/core: Fix CONFIG_GCC_PLUGIN_RANDSTRUCT build fail
2 parents 098c793 + e21cf43 commit 91a9a90

File tree

9 files changed

+120
-65
lines changed

9 files changed

+120
-65
lines changed

include/linux/irq_work.h

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#ifndef _LINUX_IRQ_WORK_H
33
#define _LINUX_IRQ_WORK_H
44

5-
#include <linux/llist.h>
5+
#include <linux/smp_types.h>
66

77
/*
88
* An entry can be in one of four states:
@@ -13,24 +13,14 @@
1313
* busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed
1414
*/
1515

16-
/* flags share CSD_FLAG_ space */
17-
18-
#define IRQ_WORK_PENDING BIT(0)
19-
#define IRQ_WORK_BUSY BIT(1)
20-
21-
/* Doesn't want IPI, wait for tick: */
22-
#define IRQ_WORK_LAZY BIT(2)
23-
/* Run hard IRQ context, even on RT */
24-
#define IRQ_WORK_HARD_IRQ BIT(3)
25-
26-
#define IRQ_WORK_CLAIMED (IRQ_WORK_PENDING | IRQ_WORK_BUSY)
27-
28-
/*
29-
* structure shares layout with single_call_data_t.
30-
*/
3116
struct irq_work {
32-
struct llist_node llnode;
33-
atomic_t flags;
17+
union {
18+
struct __call_single_node node;
19+
struct {
20+
struct llist_node llnode;
21+
atomic_t flags;
22+
};
23+
};
3424
void (*func)(struct irq_work *);
3525
};
3626

include/linux/sched.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -654,9 +654,8 @@ struct task_struct {
654654
unsigned int ptrace;
655655

656656
#ifdef CONFIG_SMP
657-
struct llist_node wake_entry;
658-
unsigned int wake_entry_type;
659657
int on_cpu;
658+
struct __call_single_node wake_entry;
660659
#ifdef CONFIG_THREAD_INFO_IN_TASK
661660
/* Current CPU: */
662661
unsigned int cpu;

include/linux/smp.h

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,29 +12,22 @@
1212
#include <linux/list.h>
1313
#include <linux/cpumask.h>
1414
#include <linux/init.h>
15-
#include <linux/llist.h>
15+
#include <linux/smp_types.h>
1616

1717
typedef void (*smp_call_func_t)(void *info);
1818
typedef bool (*smp_cond_func_t)(int cpu, void *info);
1919

20-
enum {
21-
CSD_FLAG_LOCK = 0x01,
22-
23-
/* IRQ_WORK_flags */
24-
25-
CSD_TYPE_ASYNC = 0x00,
26-
CSD_TYPE_SYNC = 0x10,
27-
CSD_TYPE_IRQ_WORK = 0x20,
28-
CSD_TYPE_TTWU = 0x30,
29-
CSD_FLAG_TYPE_MASK = 0xF0,
30-
};
31-
3220
/*
3321
* structure shares (partial) layout with struct irq_work
3422
*/
3523
struct __call_single_data {
36-
struct llist_node llist;
37-
unsigned int flags;
24+
union {
25+
struct __call_single_node node;
26+
struct {
27+
struct llist_node llist;
28+
unsigned int flags;
29+
};
30+
};
3831
smp_call_func_t func;
3932
void *info;
4033
};

include/linux/smp_types.h

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
#ifndef __LINUX_SMP_TYPES_H
3+
#define __LINUX_SMP_TYPES_H
4+
5+
#include <linux/llist.h>
6+
7+
enum {
8+
CSD_FLAG_LOCK = 0x01,
9+
10+
IRQ_WORK_PENDING = 0x01,
11+
IRQ_WORK_BUSY = 0x02,
12+
IRQ_WORK_LAZY = 0x04, /* No IPI, wait for tick */
13+
IRQ_WORK_HARD_IRQ = 0x08, /* IRQ context on PREEMPT_RT */
14+
15+
IRQ_WORK_CLAIMED = (IRQ_WORK_PENDING | IRQ_WORK_BUSY),
16+
17+
CSD_TYPE_ASYNC = 0x00,
18+
CSD_TYPE_SYNC = 0x10,
19+
CSD_TYPE_IRQ_WORK = 0x20,
20+
CSD_TYPE_TTWU = 0x30,
21+
22+
CSD_FLAG_TYPE_MASK = 0xF0,
23+
};
24+
25+
/*
26+
* struct __call_single_node is the primary type on
27+
* smp.c:call_single_queue.
28+
*
29+
* flush_smp_call_function_queue() only reads the type from
30+
* __call_single_node::u_flags as a regular load, the above
31+
* (anonymous) enum defines all the bits of this word.
32+
*
33+
* Other bits are not modified until the type is known.
34+
*
35+
* CSD_TYPE_SYNC/ASYNC:
36+
* struct {
37+
* struct llist_node node;
38+
* unsigned int flags;
39+
* smp_call_func_t func;
40+
* void *info;
41+
* };
42+
*
43+
* CSD_TYPE_IRQ_WORK:
44+
* struct {
45+
* struct llist_node node;
46+
* atomic_t flags;
47+
* void (*func)(struct irq_work *);
48+
* };
49+
*
50+
* CSD_TYPE_TTWU:
51+
* struct {
52+
* struct llist_node node;
53+
* unsigned int flags;
54+
* };
55+
*
56+
*/
57+
58+
struct __call_single_node {
59+
struct llist_node llist;
60+
union {
61+
unsigned int u_flags;
62+
atomic_t a_flags;
63+
};
64+
};
65+
66+
#endif /* __LINUX_SMP_TYPES_H */

kernel/sched/core.c

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1637,7 +1637,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
16371637
goto out;
16381638
}
16391639

1640-
if (cpumask_equal(p->cpus_ptr, new_mask))
1640+
if (cpumask_equal(&p->cpus_mask, new_mask))
16411641
goto out;
16421642

16431643
/*
@@ -2293,8 +2293,15 @@ void sched_ttwu_pending(void *arg)
22932293
rq_lock_irqsave(rq, &rf);
22942294
update_rq_clock(rq);
22952295

2296-
llist_for_each_entry_safe(p, t, llist, wake_entry)
2296+
llist_for_each_entry_safe(p, t, llist, wake_entry.llist) {
2297+
if (WARN_ON_ONCE(p->on_cpu))
2298+
smp_cond_load_acquire(&p->on_cpu, !VAL);
2299+
2300+
if (WARN_ON_ONCE(task_cpu(p) != cpu_of(rq)))
2301+
set_task_cpu(p, cpu_of(rq));
2302+
22972303
ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0, &rf);
2304+
}
22982305

22992306
rq_unlock_irqrestore(rq, &rf);
23002307
}
@@ -2322,7 +2329,7 @@ static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags
23222329
p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED);
23232330

23242331
WRITE_ONCE(rq->ttwu_pending, 1);
2325-
__smp_call_single_queue(cpu, &p->wake_entry);
2332+
__smp_call_single_queue(cpu, &p->wake_entry.llist);
23262333
}
23272334

23282335
void wake_up_if_idle(int cpu)
@@ -2369,7 +2376,7 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags)
23692376
* the soon-to-be-idle CPU as the current CPU is likely busy.
23702377
* nr_running is checked to avoid unnecessary task stacking.
23712378
*/
2372-
if ((wake_flags & WF_ON_RQ) && cpu_rq(cpu)->nr_running <= 1)
2379+
if ((wake_flags & WF_ON_CPU) && cpu_rq(cpu)->nr_running <= 1)
23732380
return true;
23742381

23752382
return false;
@@ -2378,6 +2385,9 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags)
23782385
static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
23792386
{
23802387
if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) {
2388+
if (WARN_ON_ONCE(cpu == smp_processor_id()))
2389+
return false;
2390+
23812391
sched_clock_cpu(cpu); /* Sync clocks across CPUs */
23822392
__ttwu_queue_wakelist(p, cpu, wake_flags);
23832393
return true;
@@ -2528,7 +2538,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
25282538
goto out;
25292539

25302540
success = 1;
2531-
cpu = task_cpu(p);
25322541
trace_sched_waking(p);
25332542
p->state = TASK_RUNNING;
25342543
trace_sched_wakeup(p);
@@ -2550,7 +2559,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
25502559

25512560
/* We're going to change ->state: */
25522561
success = 1;
2553-
cpu = task_cpu(p);
25542562

25552563
/*
25562564
* Ensure we load p->on_rq _after_ p->state, otherwise it would
@@ -2614,8 +2622,21 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
26142622
* which potentially sends an IPI instead of spinning on p->on_cpu to
26152623
* let the waker make forward progress. This is safe because IRQs are
26162624
* disabled and the IPI will deliver after on_cpu is cleared.
2625+
*
2626+
* Ensure we load task_cpu(p) after p->on_cpu:
2627+
*
2628+
* set_task_cpu(p, cpu);
2629+
* STORE p->cpu = @cpu
2630+
* __schedule() (switch to task 'p')
2631+
* LOCK rq->lock
2632+
* smp_mb__after_spin_lock() smp_cond_load_acquire(&p->on_cpu)
2633+
* STORE p->on_cpu = 1 LOAD p->cpu
2634+
*
2635+
* to ensure we observe the correct CPU on which the task is currently
2636+
* scheduling.
26172637
*/
2618-
if (READ_ONCE(p->on_cpu) && ttwu_queue_wakelist(p, cpu, wake_flags | WF_ON_RQ))
2638+
if (smp_load_acquire(&p->on_cpu) &&
2639+
ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU))
26192640
goto unlock;
26202641

26212642
/*
@@ -2635,14 +2656,16 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
26352656
psi_ttwu_dequeue(p);
26362657
set_task_cpu(p, cpu);
26372658
}
2659+
#else
2660+
cpu = task_cpu(p);
26382661
#endif /* CONFIG_SMP */
26392662

26402663
ttwu_queue(p, cpu, wake_flags);
26412664
unlock:
26422665
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
26432666
out:
26442667
if (success)
2645-
ttwu_stat(p, cpu, wake_flags);
2668+
ttwu_stat(p, task_cpu(p), wake_flags);
26462669
preempt_enable();
26472670

26482671
return success;
@@ -2763,7 +2786,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
27632786
#endif
27642787
init_numa_balancing(clone_flags, p);
27652788
#ifdef CONFIG_SMP
2766-
p->wake_entry_type = CSD_TYPE_TTWU;
2789+
p->wake_entry.u_flags = CSD_TYPE_TTWU;
27672790
#endif
27682791
}
27692792

@@ -4533,7 +4556,8 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
45334556
*/
45344557
if (dl_prio(prio)) {
45354558
if (!dl_prio(p->normal_prio) ||
4536-
(pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) {
4559+
(pi_task && dl_prio(pi_task->prio) &&
4560+
dl_entity_preempt(&pi_task->dl, &p->dl))) {
45374561
p->dl.dl_boosted = 1;
45384562
queue_flag |= ENQUEUE_REPLENISH;
45394563
} else

kernel/sched/deadline.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2692,6 +2692,7 @@ void __dl_clear_params(struct task_struct *p)
26922692
dl_se->dl_bw = 0;
26932693
dl_se->dl_density = 0;
26942694

2695+
dl_se->dl_boosted = 0;
26952696
dl_se->dl_throttled = 0;
26962697
dl_se->dl_yielded = 0;
26972698
dl_se->dl_non_contending = 0;

kernel/sched/fair.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -806,7 +806,7 @@ void post_init_entity_util_avg(struct task_struct *p)
806806
}
807807
}
808808

809-
sa->runnable_avg = cpu_scale;
809+
sa->runnable_avg = sa->util_avg;
810810

811811
if (p->sched_class != &fair_sched_class) {
812812
/*

kernel/sched/sched.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1682,7 +1682,7 @@ static inline int task_on_rq_migrating(struct task_struct *p)
16821682
#define WF_SYNC 0x01 /* Waker goes to sleep after wakeup */
16831683
#define WF_FORK 0x02 /* Child wakeup after fork */
16841684
#define WF_MIGRATED 0x04 /* Internal use, task got migrated */
1685-
#define WF_ON_RQ 0x08 /* Wakee is on_rq */
1685+
#define WF_ON_CPU 0x08 /* Wakee is on_cpu */
16861686

16871687
/*
16881688
* To aid in avoiding the subversion of "niceness" due to uneven distribution

kernel/smp.c

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -669,24 +669,6 @@ void __init smp_init(void)
669669
{
670670
int num_nodes, num_cpus;
671671

672-
/*
673-
* Ensure struct irq_work layout matches so that
674-
* flush_smp_call_function_queue() can do horrible things.
675-
*/
676-
BUILD_BUG_ON(offsetof(struct irq_work, llnode) !=
677-
offsetof(struct __call_single_data, llist));
678-
BUILD_BUG_ON(offsetof(struct irq_work, func) !=
679-
offsetof(struct __call_single_data, func));
680-
BUILD_BUG_ON(offsetof(struct irq_work, flags) !=
681-
offsetof(struct __call_single_data, flags));
682-
683-
/*
684-
* Assert the CSD_TYPE_TTWU layout is similar enough
685-
* for task_struct to be on the @call_single_queue.
686-
*/
687-
BUILD_BUG_ON(offsetof(struct task_struct, wake_entry_type) - offsetof(struct task_struct, wake_entry) !=
688-
offsetof(struct __call_single_data, flags) - offsetof(struct __call_single_data, llist));
689-
690672
idle_threads_init();
691673
cpuhp_threads_init();
692674

0 commit comments

Comments
 (0)