Skip to content

Commit 81388c2

Browse files
committed
Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Thomas Gleixner: "Two fixes for scheduler regressions: - Plug a subtle race condition which was introduced with the rework of the next task selection functionality. The change of task properties became unprotected which can be observed inconsistently causing state corruption. - A trivial compile fix for CONFIG_CGROUPS=n" * 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched: Fix pick_next_task() vs 'change' pattern race sched/core: Fix compilation error when cgroup not selected
2 parents b584a17 + 6e2df05 commit 81388c2

File tree

7 files changed

+113
-59
lines changed

7 files changed

+113
-59
lines changed

kernel/sched/core.c

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1073,6 +1073,7 @@ uclamp_update_active(struct task_struct *p, enum uclamp_id clamp_id)
10731073
task_rq_unlock(rq, p, &rf);
10741074
}
10751075

1076+
#ifdef CONFIG_UCLAMP_TASK_GROUP
10761077
static inline void
10771078
uclamp_update_active_tasks(struct cgroup_subsys_state *css,
10781079
unsigned int clamps)
@@ -1091,7 +1092,6 @@ uclamp_update_active_tasks(struct cgroup_subsys_state *css,
10911092
css_task_iter_end(&it);
10921093
}
10931094

1094-
#ifdef CONFIG_UCLAMP_TASK_GROUP
10951095
static void cpu_util_update_eff(struct cgroup_subsys_state *css);
10961096
static void uclamp_update_root_tg(void)
10971097
{
@@ -3929,13 +3929,22 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
39293929
}
39303930

39313931
restart:
3932+
#ifdef CONFIG_SMP
39323933
/*
3933-
* Ensure that we put DL/RT tasks before the pick loop, such that they
3934-
* can PULL higher prio tasks when we lower the RQ 'priority'.
3934+
* We must do the balancing pass before put_next_task(), such
3935+
* that when we release the rq->lock the task is in the same
3936+
* state as before we took rq->lock.
3937+
*
3938+
* We can terminate the balance pass as soon as we know there is
3939+
* a runnable task of @class priority or higher.
39353940
*/
3936-
prev->sched_class->put_prev_task(rq, prev, rf);
3937-
if (!rq->nr_running)
3938-
newidle_balance(rq, rf);
3941+
for_class_range(class, prev->sched_class, &idle_sched_class) {
3942+
if (class->balance(rq, prev, rf))
3943+
break;
3944+
}
3945+
#endif
3946+
3947+
put_prev_task(rq, prev);
39393948

39403949
for_each_class(class) {
39413950
p = class->pick_next_task(rq, NULL, NULL);
@@ -6201,7 +6210,7 @@ static struct task_struct *__pick_migrate_task(struct rq *rq)
62016210
for_each_class(class) {
62026211
next = class->pick_next_task(rq, NULL, NULL);
62036212
if (next) {
6204-
next->sched_class->put_prev_task(rq, next, NULL);
6213+
next->sched_class->put_prev_task(rq, next);
62056214
return next;
62066215
}
62076216
}

kernel/sched/deadline.c

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1691,6 +1691,22 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
16911691
resched_curr(rq);
16921692
}
16931693

1694+
static int balance_dl(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
1695+
{
1696+
if (!on_dl_rq(&p->dl) && need_pull_dl_task(rq, p)) {
1697+
/*
1698+
* This is OK, because current is on_cpu, which avoids it being
1699+
* picked for load-balance and preemption/IRQs are still
1700+
* disabled avoiding further scheduler activity on it and we've
1701+
* not yet started the picking loop.
1702+
*/
1703+
rq_unpin_lock(rq, rf);
1704+
pull_dl_task(rq);
1705+
rq_repin_lock(rq, rf);
1706+
}
1707+
1708+
return sched_stop_runnable(rq) || sched_dl_runnable(rq);
1709+
}
16941710
#endif /* CONFIG_SMP */
16951711

16961712
/*
@@ -1758,45 +1774,28 @@ static struct task_struct *
17581774
pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
17591775
{
17601776
struct sched_dl_entity *dl_se;
1777+
struct dl_rq *dl_rq = &rq->dl;
17611778
struct task_struct *p;
1762-
struct dl_rq *dl_rq;
17631779

17641780
WARN_ON_ONCE(prev || rf);
17651781

1766-
dl_rq = &rq->dl;
1767-
1768-
if (unlikely(!dl_rq->dl_nr_running))
1782+
if (!sched_dl_runnable(rq))
17691783
return NULL;
17701784

17711785
dl_se = pick_next_dl_entity(rq, dl_rq);
17721786
BUG_ON(!dl_se);
1773-
17741787
p = dl_task_of(dl_se);
1775-
17761788
set_next_task_dl(rq, p);
1777-
17781789
return p;
17791790
}
17801791

1781-
static void put_prev_task_dl(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
1792+
static void put_prev_task_dl(struct rq *rq, struct task_struct *p)
17821793
{
17831794
update_curr_dl(rq);
17841795

17851796
update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1);
17861797
if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
17871798
enqueue_pushable_dl_task(rq, p);
1788-
1789-
if (rf && !on_dl_rq(&p->dl) && need_pull_dl_task(rq, p)) {
1790-
/*
1791-
* This is OK, because current is on_cpu, which avoids it being
1792-
* picked for load-balance and preemption/IRQs are still
1793-
* disabled avoiding further scheduler activity on it and we've
1794-
* not yet started the picking loop.
1795-
*/
1796-
rq_unpin_lock(rq, rf);
1797-
pull_dl_task(rq);
1798-
rq_repin_lock(rq, rf);
1799-
}
18001799
}
18011800

18021801
/*
@@ -2442,6 +2441,7 @@ const struct sched_class dl_sched_class = {
24422441
.set_next_task = set_next_task_dl,
24432442

24442443
#ifdef CONFIG_SMP
2444+
.balance = balance_dl,
24452445
.select_task_rq = select_task_rq_dl,
24462446
.migrate_task_rq = migrate_task_rq_dl,
24472447
.set_cpus_allowed = set_cpus_allowed_dl,

kernel/sched/fair.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6570,6 +6570,15 @@ static void task_dead_fair(struct task_struct *p)
65706570
{
65716571
remove_entity_load_avg(&p->se);
65726572
}
6573+
6574+
static int
6575+
balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
6576+
{
6577+
if (rq->nr_running)
6578+
return 1;
6579+
6580+
return newidle_balance(rq, rf) != 0;
6581+
}
65736582
#endif /* CONFIG_SMP */
65746583

65756584
static unsigned long wakeup_gran(struct sched_entity *se)
@@ -6746,7 +6755,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
67466755
int new_tasks;
67476756

67486757
again:
6749-
if (!cfs_rq->nr_running)
6758+
if (!sched_fair_runnable(rq))
67506759
goto idle;
67516760

67526761
#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -6884,7 +6893,7 @@ done: __maybe_unused;
68846893
/*
68856894
* Account for a descheduled task:
68866895
*/
6887-
static void put_prev_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
6896+
static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
68886897
{
68896898
struct sched_entity *se = &prev->se;
68906899
struct cfs_rq *cfs_rq;
@@ -10414,11 +10423,11 @@ const struct sched_class fair_sched_class = {
1041410423
.check_preempt_curr = check_preempt_wakeup,
1041510424

1041610425
.pick_next_task = pick_next_task_fair,
10417-
1041810426
.put_prev_task = put_prev_task_fair,
1041910427
.set_next_task = set_next_task_fair,
1042010428

1042110429
#ifdef CONFIG_SMP
10430+
.balance = balance_fair,
1042210431
.select_task_rq = select_task_rq_fair,
1042310432
.migrate_task_rq = migrate_task_rq_fair,
1042410433

kernel/sched/idle.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,12 @@ select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags)
365365
{
366366
return task_cpu(p); /* IDLE tasks as never migrated */
367367
}
368+
369+
static int
370+
balance_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
371+
{
372+
return WARN_ON_ONCE(1);
373+
}
368374
#endif
369375

370376
/*
@@ -375,7 +381,7 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl
375381
resched_curr(rq);
376382
}
377383

378-
static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
384+
static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
379385
{
380386
}
381387

@@ -460,6 +466,7 @@ const struct sched_class idle_sched_class = {
460466
.set_next_task = set_next_task_idle,
461467

462468
#ifdef CONFIG_SMP
469+
.balance = balance_idle,
463470
.select_task_rq = select_task_rq_idle,
464471
.set_cpus_allowed = set_cpus_allowed_common,
465472
#endif

kernel/sched/rt.c

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1469,6 +1469,22 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
14691469
resched_curr(rq);
14701470
}
14711471

1472+
static int balance_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
1473+
{
1474+
if (!on_rt_rq(&p->rt) && need_pull_rt_task(rq, p)) {
1475+
/*
1476+
* This is OK, because current is on_cpu, which avoids it being
1477+
* picked for load-balance and preemption/IRQs are still
1478+
* disabled avoiding further scheduler activity on it and we've
1479+
* not yet started the picking loop.
1480+
*/
1481+
rq_unpin_lock(rq, rf);
1482+
pull_rt_task(rq);
1483+
rq_repin_lock(rq, rf);
1484+
}
1485+
1486+
return sched_stop_runnable(rq) || sched_dl_runnable(rq) || sched_rt_runnable(rq);
1487+
}
14721488
#endif /* CONFIG_SMP */
14731489

14741490
/*
@@ -1552,21 +1568,18 @@ static struct task_struct *
15521568
pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
15531569
{
15541570
struct task_struct *p;
1555-
struct rt_rq *rt_rq = &rq->rt;
15561571

15571572
WARN_ON_ONCE(prev || rf);
15581573

1559-
if (!rt_rq->rt_queued)
1574+
if (!sched_rt_runnable(rq))
15601575
return NULL;
15611576

15621577
p = _pick_next_task_rt(rq);
1563-
15641578
set_next_task_rt(rq, p);
1565-
15661579
return p;
15671580
}
15681581

1569-
static void put_prev_task_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
1582+
static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
15701583
{
15711584
update_curr_rt(rq);
15721585

@@ -1578,18 +1591,6 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p, struct rq_fla
15781591
*/
15791592
if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
15801593
enqueue_pushable_task(rq, p);
1581-
1582-
if (rf && !on_rt_rq(&p->rt) && need_pull_rt_task(rq, p)) {
1583-
/*
1584-
* This is OK, because current is on_cpu, which avoids it being
1585-
* picked for load-balance and preemption/IRQs are still
1586-
* disabled avoiding further scheduler activity on it and we've
1587-
* not yet started the picking loop.
1588-
*/
1589-
rq_unpin_lock(rq, rf);
1590-
pull_rt_task(rq);
1591-
rq_repin_lock(rq, rf);
1592-
}
15931594
}
15941595

15951596
#ifdef CONFIG_SMP
@@ -2366,8 +2367,8 @@ const struct sched_class rt_sched_class = {
23662367
.set_next_task = set_next_task_rt,
23672368

23682369
#ifdef CONFIG_SMP
2370+
.balance = balance_rt,
23692371
.select_task_rq = select_task_rq_rt,
2370-
23712372
.set_cpus_allowed = set_cpus_allowed_common,
23722373
.rq_online = rq_online_rt,
23732374
.rq_offline = rq_offline_rt,

kernel/sched/sched.h

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1727,10 +1727,11 @@ struct sched_class {
17271727
struct task_struct * (*pick_next_task)(struct rq *rq,
17281728
struct task_struct *prev,
17291729
struct rq_flags *rf);
1730-
void (*put_prev_task)(struct rq *rq, struct task_struct *p, struct rq_flags *rf);
1730+
void (*put_prev_task)(struct rq *rq, struct task_struct *p);
17311731
void (*set_next_task)(struct rq *rq, struct task_struct *p);
17321732

17331733
#ifdef CONFIG_SMP
1734+
int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
17341735
int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
17351736
void (*migrate_task_rq)(struct task_struct *p, int new_cpu);
17361737

@@ -1773,7 +1774,7 @@ struct sched_class {
17731774
static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
17741775
{
17751776
WARN_ON_ONCE(rq->curr != prev);
1776-
prev->sched_class->put_prev_task(rq, prev, NULL);
1777+
prev->sched_class->put_prev_task(rq, prev);
17771778
}
17781779

17791780
static inline void set_next_task(struct rq *rq, struct task_struct *next)
@@ -1787,15 +1788,38 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next)
17871788
#else
17881789
#define sched_class_highest (&dl_sched_class)
17891790
#endif
1791+
1792+
#define for_class_range(class, _from, _to) \
1793+
for (class = (_from); class != (_to); class = class->next)
1794+
17901795
#define for_each_class(class) \
1791-
for (class = sched_class_highest; class; class = class->next)
1796+
for_class_range(class, sched_class_highest, NULL)
17921797

17931798
extern const struct sched_class stop_sched_class;
17941799
extern const struct sched_class dl_sched_class;
17951800
extern const struct sched_class rt_sched_class;
17961801
extern const struct sched_class fair_sched_class;
17971802
extern const struct sched_class idle_sched_class;
17981803

1804+
static inline bool sched_stop_runnable(struct rq *rq)
1805+
{
1806+
return rq->stop && task_on_rq_queued(rq->stop);
1807+
}
1808+
1809+
static inline bool sched_dl_runnable(struct rq *rq)
1810+
{
1811+
return rq->dl.dl_nr_running > 0;
1812+
}
1813+
1814+
static inline bool sched_rt_runnable(struct rq *rq)
1815+
{
1816+
return rq->rt.rt_queued > 0;
1817+
}
1818+
1819+
static inline bool sched_fair_runnable(struct rq *rq)
1820+
{
1821+
return rq->cfs.nr_running > 0;
1822+
}
17991823

18001824
#ifdef CONFIG_SMP
18011825

kernel/sched/stop_task.c

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,12 @@ select_task_rq_stop(struct task_struct *p, int cpu, int sd_flag, int flags)
1515
{
1616
return task_cpu(p); /* stop tasks as never migrate */
1717
}
18+
19+
static int
20+
balance_stop(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
21+
{
22+
return sched_stop_runnable(rq);
23+
}
1824
#endif /* CONFIG_SMP */
1925

2026
static void
@@ -31,16 +37,13 @@ static void set_next_task_stop(struct rq *rq, struct task_struct *stop)
3137
static struct task_struct *
3238
pick_next_task_stop(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
3339
{
34-
struct task_struct *stop = rq->stop;
35-
3640
WARN_ON_ONCE(prev || rf);
3741

38-
if (!stop || !task_on_rq_queued(stop))
42+
if (!sched_stop_runnable(rq))
3943
return NULL;
4044

41-
set_next_task_stop(rq, stop);
42-
43-
return stop;
45+
set_next_task_stop(rq, rq->stop);
46+
return rq->stop;
4447
}
4548

4649
static void
@@ -60,7 +63,7 @@ static void yield_task_stop(struct rq *rq)
6063
BUG(); /* the stop task should never yield, its pointless. */
6164
}
6265

63-
static void put_prev_task_stop(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
66+
static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
6467
{
6568
struct task_struct *curr = rq->curr;
6669
u64 delta_exec;
@@ -129,6 +132,7 @@ const struct sched_class stop_sched_class = {
129132
.set_next_task = set_next_task_stop,
130133

131134
#ifdef CONFIG_SMP
135+
.balance = balance_stop,
132136
.select_task_rq = select_task_rq_stop,
133137
.set_cpus_allowed = set_cpus_allowed_common,
134138
#endif

0 commit comments

Comments
 (0)