Skip to content

Commit f4b936f

Browse files
committed
Merge tag 'sched-urgent-2020-11-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Thomas Gleixner: "A couple of scheduler fixes: - Make the conditional update of the overutilized state work correctly by caching the relevant flags state before overwriting them and checking them afterwards. - Fix a data race in the wakeup path which caused loadavg on ARM64 platforms to become a random number generator. - Fix the ordering of the iowaiter accounting operations so it can't be decremented before it is incremented. - Fix a bug in the deadline scheduler vs. priority inheritance when a non-deadline task A has inherited the parameters of a deadline task B and then blocks on a non-deadline task C. The second inheritance step used the static deadline parameters of task A, which are usually 0, instead of further propagating task B's parameters. The zero initialized parameters trigger a bug in the deadline scheduler" * tag 'sched-urgent-2020-11-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/deadline: Fix priority inheritance with multiple scheduling classes sched: Fix rq->nr_iowait ordering sched: Fix data-race in wakeup sched/fair: Fix overutilized update in enqueue_task_fair()
2 parents 48da330 + 2279f54 commit f4b936f

File tree

4 files changed

+95
-57
lines changed

4 files changed

+95
-57
lines changed

include/linux/sched.h

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -552,7 +552,6 @@ struct sched_dl_entity {
552552
* overruns.
553553
*/
554554
unsigned int dl_throttled : 1;
555-
unsigned int dl_boosted : 1;
556555
unsigned int dl_yielded : 1;
557556
unsigned int dl_non_contending : 1;
558557
unsigned int dl_overrun : 1;
@@ -571,6 +570,15 @@ struct sched_dl_entity {
571570
* time.
572571
*/
573572
struct hrtimer inactive_timer;
573+
574+
#ifdef CONFIG_RT_MUTEXES
575+
/*
576+
* Priority Inheritance. When a DEADLINE scheduling entity is boosted
577+
* pi_se points to the donor, otherwise points to the dl_se it belongs
578+
* to (the original one/itself).
579+
*/
580+
struct sched_dl_entity *pi_se;
581+
#endif
574582
};
575583

576584
#ifdef CONFIG_UCLAMP_TASK
@@ -770,7 +778,6 @@ struct task_struct {
770778
unsigned sched_reset_on_fork:1;
771779
unsigned sched_contributes_to_load:1;
772780
unsigned sched_migrated:1;
773-
unsigned sched_remote_wakeup:1;
774781
#ifdef CONFIG_PSI
775782
unsigned sched_psi_wake_requeue:1;
776783
#endif
@@ -780,6 +787,21 @@ struct task_struct {
780787

781788
/* Unserialized, strictly 'current' */
782789

790+
/*
791+
* This field must not be in the scheduler word above due to wakelist
792+
* queueing no longer being serialized by p->on_cpu. However:
793+
*
794+
* p->XXX = X; ttwu()
795+
* schedule() if (p->on_rq && ..) // false
796+
* smp_mb__after_spinlock(); if (smp_load_acquire(&p->on_cpu) && //true
797+
* deactivate_task() ttwu_queue_wakelist())
798+
* p->on_rq = 0; p->sched_remote_wakeup = Y;
799+
*
800+
* guarantees all stores of 'current' are visible before
801+
* ->sched_remote_wakeup gets used, so it can be in this word.
802+
*/
803+
unsigned sched_remote_wakeup:1;
804+
783805
/* Bit to tell LSMs we're in execve(): */
784806
unsigned in_execve:1;
785807
unsigned in_iowait:1;

kernel/sched/core.c

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2501,7 +2501,12 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
25012501
#ifdef CONFIG_SMP
25022502
if (wake_flags & WF_MIGRATED)
25032503
en_flags |= ENQUEUE_MIGRATED;
2504+
else
25042505
#endif
2506+
if (p->in_iowait) {
2507+
delayacct_blkio_end(p);
2508+
atomic_dec(&task_rq(p)->nr_iowait);
2509+
}
25052510

25062511
activate_task(rq, p, en_flags);
25072512
ttwu_do_wakeup(rq, p, wake_flags, rf);
@@ -2888,11 +2893,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
28882893
if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags))
28892894
goto unlock;
28902895

2891-
if (p->in_iowait) {
2892-
delayacct_blkio_end(p);
2893-
atomic_dec(&task_rq(p)->nr_iowait);
2894-
}
2895-
28962896
#ifdef CONFIG_SMP
28972897
/*
28982898
* Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
@@ -2963,6 +2963,11 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
29632963

29642964
cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
29652965
if (task_cpu(p) != cpu) {
2966+
if (p->in_iowait) {
2967+
delayacct_blkio_end(p);
2968+
atomic_dec(&task_rq(p)->nr_iowait);
2969+
}
2970+
29662971
wake_flags |= WF_MIGRATED;
29672972
psi_ttwu_dequeue(p);
29682973
set_task_cpu(p, cpu);
@@ -4907,20 +4912,21 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
49074912
if (!dl_prio(p->normal_prio) ||
49084913
(pi_task && dl_prio(pi_task->prio) &&
49094914
dl_entity_preempt(&pi_task->dl, &p->dl))) {
4910-
p->dl.dl_boosted = 1;
4915+
p->dl.pi_se = pi_task->dl.pi_se;
49114916
queue_flag |= ENQUEUE_REPLENISH;
4912-
} else
4913-
p->dl.dl_boosted = 0;
4917+
} else {
4918+
p->dl.pi_se = &p->dl;
4919+
}
49144920
p->sched_class = &dl_sched_class;
49154921
} else if (rt_prio(prio)) {
49164922
if (dl_prio(oldprio))
4917-
p->dl.dl_boosted = 0;
4923+
p->dl.pi_se = &p->dl;
49184924
if (oldprio < prio)
49194925
queue_flag |= ENQUEUE_HEAD;
49204926
p->sched_class = &rt_sched_class;
49214927
} else {
49224928
if (dl_prio(oldprio))
4923-
p->dl.dl_boosted = 0;
4929+
p->dl.pi_se = &p->dl;
49244930
if (rt_prio(oldprio))
49254931
p->rt.timeout = 0;
49264932
p->sched_class = &fair_sched_class;

kernel/sched/deadline.c

Lines changed: 53 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,28 @@ static inline int on_dl_rq(struct sched_dl_entity *dl_se)
4343
return !RB_EMPTY_NODE(&dl_se->rb_node);
4444
}
4545

46+
#ifdef CONFIG_RT_MUTEXES
47+
static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se)
48+
{
49+
return dl_se->pi_se;
50+
}
51+
52+
static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
53+
{
54+
return pi_of(dl_se) != dl_se;
55+
}
56+
#else
57+
static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se)
58+
{
59+
return dl_se;
60+
}
61+
62+
static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
63+
{
64+
return false;
65+
}
66+
#endif
67+
4668
#ifdef CONFIG_SMP
4769
static inline struct dl_bw *dl_bw_of(int i)
4870
{
@@ -698,7 +720,7 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
698720
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
699721
struct rq *rq = rq_of_dl_rq(dl_rq);
700722

701-
WARN_ON(dl_se->dl_boosted);
723+
WARN_ON(is_dl_boosted(dl_se));
702724
WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
703725

704726
/*
@@ -736,21 +758,20 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
736758
* could happen are, typically, a entity voluntarily trying to overcome its
737759
* runtime, or it just underestimated it during sched_setattr().
738760
*/
739-
static void replenish_dl_entity(struct sched_dl_entity *dl_se,
740-
struct sched_dl_entity *pi_se)
761+
static void replenish_dl_entity(struct sched_dl_entity *dl_se)
741762
{
742763
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
743764
struct rq *rq = rq_of_dl_rq(dl_rq);
744765

745-
BUG_ON(pi_se->dl_runtime <= 0);
766+
BUG_ON(pi_of(dl_se)->dl_runtime <= 0);
746767

747768
/*
748769
* This could be the case for a !-dl task that is boosted.
749770
* Just go with full inherited parameters.
750771
*/
751772
if (dl_se->dl_deadline == 0) {
752-
dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
753-
dl_se->runtime = pi_se->dl_runtime;
773+
dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
774+
dl_se->runtime = pi_of(dl_se)->dl_runtime;
754775
}
755776

756777
if (dl_se->dl_yielded && dl_se->runtime > 0)
@@ -763,8 +784,8 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
763784
* arbitrary large.
764785
*/
765786
while (dl_se->runtime <= 0) {
766-
dl_se->deadline += pi_se->dl_period;
767-
dl_se->runtime += pi_se->dl_runtime;
787+
dl_se->deadline += pi_of(dl_se)->dl_period;
788+
dl_se->runtime += pi_of(dl_se)->dl_runtime;
768789
}
769790

770791
/*
@@ -778,8 +799,8 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
778799
*/
779800
if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
780801
printk_deferred_once("sched: DL replenish lagged too much\n");
781-
dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
782-
dl_se->runtime = pi_se->dl_runtime;
802+
dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
803+
dl_se->runtime = pi_of(dl_se)->dl_runtime;
783804
}
784805

785806
if (dl_se->dl_yielded)
@@ -812,8 +833,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
812833
* task with deadline equal to period this is the same of using
813834
* dl_period instead of dl_deadline in the equation above.
814835
*/
815-
static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
816-
struct sched_dl_entity *pi_se, u64 t)
836+
static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t)
817837
{
818838
u64 left, right;
819839

@@ -835,9 +855,9 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
835855
* of anything below microseconds resolution is actually fiction
836856
* (but still we want to give the user that illusion >;).
837857
*/
838-
left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
858+
left = (pi_of(dl_se)->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
839859
right = ((dl_se->deadline - t) >> DL_SCALE) *
840-
(pi_se->dl_runtime >> DL_SCALE);
860+
(pi_of(dl_se)->dl_runtime >> DL_SCALE);
841861

842862
return dl_time_before(right, left);
843863
}
@@ -922,24 +942,23 @@ static inline bool dl_is_implicit(struct sched_dl_entity *dl_se)
922942
* Please refer to the comments update_dl_revised_wakeup() function to find
923943
* more about the Revised CBS rule.
924944
*/
925-
static void update_dl_entity(struct sched_dl_entity *dl_se,
926-
struct sched_dl_entity *pi_se)
945+
static void update_dl_entity(struct sched_dl_entity *dl_se)
927946
{
928947
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
929948
struct rq *rq = rq_of_dl_rq(dl_rq);
930949

931950
if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
932-
dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
951+
dl_entity_overflow(dl_se, rq_clock(rq))) {
933952

934953
if (unlikely(!dl_is_implicit(dl_se) &&
935954
!dl_time_before(dl_se->deadline, rq_clock(rq)) &&
936-
!dl_se->dl_boosted)){
955+
!is_dl_boosted(dl_se))) {
937956
update_dl_revised_wakeup(dl_se, rq);
938957
return;
939958
}
940959

941-
dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
942-
dl_se->runtime = pi_se->dl_runtime;
960+
dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
961+
dl_se->runtime = pi_of(dl_se)->dl_runtime;
943962
}
944963
}
945964

@@ -1038,7 +1057,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
10381057
* The task might have been boosted by someone else and might be in the
10391058
* boosting/deboosting path, its not throttled.
10401059
*/
1041-
if (dl_se->dl_boosted)
1060+
if (is_dl_boosted(dl_se))
10421061
goto unlock;
10431062

10441063
/*
@@ -1066,7 +1085,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
10661085
* but do not enqueue -- wait for our wakeup to do that.
10671086
*/
10681087
if (!task_on_rq_queued(p)) {
1069-
replenish_dl_entity(dl_se, dl_se);
1088+
replenish_dl_entity(dl_se);
10701089
goto unlock;
10711090
}
10721091

@@ -1156,7 +1175,7 @@ static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
11561175

11571176
if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
11581177
dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
1159-
if (unlikely(dl_se->dl_boosted || !start_dl_timer(p)))
1178+
if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(p)))
11601179
return;
11611180
dl_se->dl_throttled = 1;
11621181
if (dl_se->runtime > 0)
@@ -1287,7 +1306,7 @@ static void update_curr_dl(struct rq *rq)
12871306
dl_se->dl_overrun = 1;
12881307

12891308
__dequeue_task_dl(rq, curr, 0);
1290-
if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr)))
1309+
if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(curr)))
12911310
enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
12921311

12931312
if (!is_leftmost(curr, &rq->dl))
@@ -1481,8 +1500,7 @@ static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
14811500
}
14821501

14831502
static void
1484-
enqueue_dl_entity(struct sched_dl_entity *dl_se,
1485-
struct sched_dl_entity *pi_se, int flags)
1503+
enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
14861504
{
14871505
BUG_ON(on_dl_rq(dl_se));
14881506

@@ -1493,9 +1511,9 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se,
14931511
*/
14941512
if (flags & ENQUEUE_WAKEUP) {
14951513
task_contending(dl_se, flags);
1496-
update_dl_entity(dl_se, pi_se);
1514+
update_dl_entity(dl_se);
14971515
} else if (flags & ENQUEUE_REPLENISH) {
1498-
replenish_dl_entity(dl_se, pi_se);
1516+
replenish_dl_entity(dl_se);
14991517
} else if ((flags & ENQUEUE_RESTORE) &&
15001518
dl_time_before(dl_se->deadline,
15011519
rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) {
@@ -1512,19 +1530,7 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
15121530

15131531
static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
15141532
{
1515-
struct task_struct *pi_task = rt_mutex_get_top_task(p);
1516-
struct sched_dl_entity *pi_se = &p->dl;
1517-
1518-
/*
1519-
* Use the scheduling parameters of the top pi-waiter task if:
1520-
* - we have a top pi-waiter which is a SCHED_DEADLINE task AND
1521-
* - our dl_boosted is set (i.e. the pi-waiter's (absolute) deadline is
1522-
* smaller than our deadline OR we are a !SCHED_DEADLINE task getting
1523-
* boosted due to a SCHED_DEADLINE pi-waiter).
1524-
* Otherwise we keep our runtime and deadline.
1525-
*/
1526-
if (pi_task && dl_prio(pi_task->normal_prio) && p->dl.dl_boosted) {
1527-
pi_se = &pi_task->dl;
1533+
if (is_dl_boosted(&p->dl)) {
15281534
/*
15291535
* Because of delays in the detection of the overrun of a
15301536
* thread's runtime, it might be the case that a thread
@@ -1557,7 +1563,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
15571563
* the throttle.
15581564
*/
15591565
p->dl.dl_throttled = 0;
1560-
BUG_ON(!p->dl.dl_boosted || flags != ENQUEUE_REPLENISH);
1566+
BUG_ON(!is_dl_boosted(&p->dl) || flags != ENQUEUE_REPLENISH);
15611567
return;
15621568
}
15631569

@@ -1594,7 +1600,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
15941600
return;
15951601
}
15961602

1597-
enqueue_dl_entity(&p->dl, pi_se, flags);
1603+
enqueue_dl_entity(&p->dl, flags);
15981604

15991605
if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
16001606
enqueue_pushable_dl_task(rq, p);
@@ -2787,11 +2793,14 @@ void __dl_clear_params(struct task_struct *p)
27872793
dl_se->dl_bw = 0;
27882794
dl_se->dl_density = 0;
27892795

2790-
dl_se->dl_boosted = 0;
27912796
dl_se->dl_throttled = 0;
27922797
dl_se->dl_yielded = 0;
27932798
dl_se->dl_non_contending = 0;
27942799
dl_se->dl_overrun = 0;
2800+
2801+
#ifdef CONFIG_RT_MUTEXES
2802+
dl_se->pi_se = dl_se;
2803+
#endif
27952804
}
27962805

27972806
bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)

kernel/sched/fair.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5477,6 +5477,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
54775477
struct cfs_rq *cfs_rq;
54785478
struct sched_entity *se = &p->se;
54795479
int idle_h_nr_running = task_has_idle_policy(p);
5480+
int task_new = !(flags & ENQUEUE_WAKEUP);
54805481

54815482
/*
54825483
* The code below (indirectly) updates schedutil which looks at
@@ -5549,7 +5550,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
55495550
* into account, but that is not straightforward to implement,
55505551
* and the following generally works well enough in practice.
55515552
*/
5552-
if (flags & ENQUEUE_WAKEUP)
5553+
if (!task_new)
55535554
update_overutilized_status(rq);
55545555

55555556
enqueue_throttle:

0 commit comments

Comments
 (0)